diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000..2b65f6fe3cc80
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.bat text eol=crlf
+*.cmd text eol=crlf
diff --git a/.gitignore b/.gitignore
index 4f177c82ae5e0..34939e3a97aaa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,12 @@
 *~
+*.#*
+*#*#
 *.swp
 *.ipr
 *.iml
 *.iws
 .idea/
+.idea_modules/
 sbt/*.jar
 .settings
 .cache
@@ -15,10 +18,12 @@ out/
 third_party/libmesos.so
 third_party/libmesos.dylib
 conf/java-opts
-conf/spark-env.sh
-conf/streaming-env.sh
-conf/log4j.properties
-conf/spark-defaults.conf
+conf/*.sh
+conf/*.cmd
+conf/*.properties
+conf/*.conf
+conf/*.xml
+conf/slaves
 docs/_site
 docs/api
 target/
@@ -49,10 +54,11 @@ unit-tests.log
 /lib/
 rat-results.txt
 scalastyle.txt
-conf/*.conf
+scalastyle-output.xml
 
 # For Hive
 metastore_db/
 metastore/
 warehouse/
 TempStatsStore/
+sql/hive-thriftserver/test_warehouses
diff --git a/.rat-excludes b/.rat-excludes
index 796c32a80896c..d8bee1f8e49c9 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -1,5 +1,6 @@
 target
 .gitignore
+.gitattributes
 .project
 .classpath
 .mima-excludes
@@ -19,15 +20,21 @@ log4j.properties
 log4j.properties.template
 metrics.properties.template
 slaves
+slaves.template
 spark-env.sh
+spark-env.cmd
 spark-env.sh.template
 log4j-defaults.properties
+bootstrap-tooltip.js
+jquery-1.11.1.min.js
 sorttable.js
+.*avsc
 .*txt
 .*json
 .*data
 .*log
 cloudpickle.py
+heapq3.py
 join.py
 SparkExprTyper.scala
 SparkILoop.scala
@@ -37,11 +44,13 @@ SparkImports.scala
 SparkJLineCompletion.scala
 SparkJLineReader.scala
 SparkMemberHandlers.scala
+SparkReplReporter.scala
 sbt
 sbt-launch-lib.bash
 plugins.sbt
 work
 .*\.q
+.*\.qv
 golden
 test.out/*
 .*iml
@@ -53,3 +62,5 @@ dist/*
 .*ipr
 .*iws
 logs
+.*scalastyle-output.xml
+.*dependency-reduced-pom.xml
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 8ebd0d68429fc..0000000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
- 
- language: scala
- scala:
-   - "2.10.3"
- jdk:
-   - oraclejdk7
- env:
-  matrix:
-   - TEST="scalastyle assembly/assembly"
-   - TEST="catalyst/test sql/test streaming/test mllib/test graphx/test bagel/test"
-   - TEST=hive/test
- cache:
-   directories:
-     - $HOME/.m2
-     - $HOME/.ivy2
-     - $HOME/.sbt
- script:
-   - "sbt ++$TRAVIS_SCALA_VERSION $TEST"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000000000..b6c6b050fa331
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,12 @@
+## Contributing to Spark
+
+Contributions via GitHub pull requests are gladly accepted from their original
+author. Along with any pull requests, please state that the contribution is
+your original work and that you license the work to the project under the
+project's open source license. Whether or not you state this explicitly, by
+submitting any copyrighted material via pull request, email, or other means
+you agree to license the material under the project's open source license and
+warrant that you have the legal authority to do so.
+
+Please see the [Contributing to Spark wiki page](https://cwiki.apache.org/SPARK/Contributing+to+Spark)
+for more information.
diff --git a/LICENSE b/LICENSE
index 383f079df8c8b..3c667bf45059a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -272,7 +272,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 ========================================================================
-For Py4J (python/lib/py4j0.7.egg and files in assembly/lib/net/sf/py4j):
+For Py4J (python/lib/py4j-0.8.2.1-src.zip)
 ========================================================================
 
 Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved.
@@ -338,6 +338,289 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+========================================================================
+For heapq (pyspark/heapq3.py):
+========================================================================
+
+# A. HISTORY OF THE SOFTWARE
+# ==========================
+#
+# Python was created in the early 1990s by Guido van Rossum at Stichting
+# Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
+# as a successor of a language called ABC.  Guido remains Python's
+# principal author, although it includes many contributions from others.
+#
+# In 1995, Guido continued his work on Python at the Corporation for
+#     National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
+# in Reston, Virginia where he released several versions of the
+# software.
+#
+# In May 2000, Guido and the Python core development team moved to
+# BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
+# year, the PythonLabs team moved to Digital Creations (now Zope
+# Corporation, see http://www.zope.com).  In 2001, the Python Software
+# Foundation (PSF, see http://www.python.org/psf/) was formed, a
+# non-profit organization created specifically to own Python-related
+# Intellectual Property.  Zope Corporation is a sponsoring member of
+# the PSF.
+#
+# All Python releases are Open Source (see http://www.opensource.org for
+# the Open Source Definition).  Historically, most, but not all, Python
+# releases have also been GPL-compatible; the table below summarizes
+# the various releases.
+#
+# Release         Derived     Year        Owner       GPL-
+# from                                compatible? (1)
+#
+# 0.9.0 thru 1.2              1991-1995   CWI         yes
+# 1.3 thru 1.5.2  1.2         1995-1999   CNRI        yes
+# 1.6             1.5.2       2000        CNRI        no
+# 2.0             1.6         2000        BeOpen.com  no
+# 1.6.1           1.6         2001        CNRI        yes (2)
+# 2.1             2.0+1.6.1   2001        PSF         no
+# 2.0.1           2.0+1.6.1   2001        PSF         yes
+# 2.1.1           2.1+2.0.1   2001        PSF         yes
+# 2.2             2.1.1       2001        PSF         yes
+# 2.1.2           2.1.1       2002        PSF         yes
+# 2.1.3           2.1.2       2002        PSF         yes
+# 2.2.1           2.2         2002        PSF         yes
+# 2.2.2           2.2.1       2002        PSF         yes
+# 2.2.3           2.2.2       2003        PSF         yes
+# 2.3             2.2.2       2002-2003   PSF         yes
+# 2.3.1           2.3         2002-2003   PSF         yes
+# 2.3.2           2.3.1       2002-2003   PSF         yes
+# 2.3.3           2.3.2       2002-2003   PSF         yes
+# 2.3.4           2.3.3       2004        PSF         yes
+# 2.3.5           2.3.4       2005        PSF         yes
+# 2.4             2.3         2004        PSF         yes
+# 2.4.1           2.4         2005        PSF         yes
+# 2.4.2           2.4.1       2005        PSF         yes
+# 2.4.3           2.4.2       2006        PSF         yes
+# 2.4.4           2.4.3       2006        PSF         yes
+# 2.5             2.4         2006        PSF         yes
+# 2.5.1           2.5         2007        PSF         yes
+# 2.5.2           2.5.1       2008        PSF         yes
+# 2.5.3           2.5.2       2008        PSF         yes
+# 2.6             2.5         2008        PSF         yes
+# 2.6.1           2.6         2008        PSF         yes
+# 2.6.2           2.6.1       2009        PSF         yes
+# 2.6.3           2.6.2       2009        PSF         yes
+# 2.6.4           2.6.3       2009        PSF         yes
+# 2.6.5           2.6.4       2010        PSF         yes
+# 2.7             2.6         2010        PSF         yes
+#
+# Footnotes:
+#
+# (1) GPL-compatible doesn't mean that we're distributing Python under
+# the GPL.  All Python licenses, unlike the GPL, let you distribute
+# a modified version without making your changes open source.  The
+# GPL-compatible licenses make it possible to combine Python with
+#     other software that is released under the GPL; the others don't.
+#
+# (2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
+# because its license has a choice of law clause.  According to
+# CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
+# is "not incompatible" with the GPL.
+#
+# Thanks to the many outside volunteers who have worked under Guido's
+# direction to make these releases possible.
+#
+#
+# B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
+# ===============================================================
+#
+# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+# --------------------------------------------
+#
+# 1. This LICENSE AGREEMENT is between the Python Software Foundation
+# ("PSF"), and the Individual or Organization ("Licensee") accessing and
+# otherwise using this software ("Python") in source or binary form and
+# its associated documentation.
+#
+# 2. Subject to the terms and conditions of this License Agreement, PSF hereby
+# grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+# analyze, test, perform and/or display publicly, prepare derivative works,
+# distribute, and otherwise use Python alone or in any derivative version,
+# provided, however, that PSF's License Agreement and PSF's notice of copyright,
+# i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+# 2011, 2012, 2013 Python Software Foundation; All Rights Reserved" are retained
+# in Python alone or in any derivative version prepared by Licensee.
+#
+# 3. In the event Licensee prepares a derivative work that is based on
+# or incorporates Python or any part thereof, and wants to make
+# the derivative work available to others as provided herein, then
+# Licensee hereby agrees to include in any such work a brief summary of
+# the changes made to Python.
+#
+# 4. PSF is making Python available to Licensee on an "AS IS"
+# basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 7. Nothing in this License Agreement shall be deemed to create any
+# relationship of agency, partnership, or joint venture between PSF and
+# Licensee.  This License Agreement does not grant permission to use PSF
+# trademarks or trade name in a trademark sense to endorse or promote
+# products or services of Licensee, or any third party.
+#
+# 8. By copying, installing or otherwise using Python, Licensee
+# agrees to be bound by the terms and conditions of this License
+# Agreement.
+#
+#
+# BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
+# -------------------------------------------
+#
+# BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
+#
+# 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
+# office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
+# Individual or Organization ("Licensee") accessing and otherwise using
+# this software in source or binary form and its associated
+# documentation ("the Software").
+#
+# 2. Subject to the terms and conditions of this BeOpen Python License
+# Agreement, BeOpen hereby grants Licensee a non-exclusive,
+# royalty-free, world-wide license to reproduce, analyze, test, perform
+# and/or display publicly, prepare derivative works, distribute, and
+# otherwise use the Software alone or in any derivative version,
+# provided, however, that the BeOpen Python License is retained in the
+# Software, alone or in any derivative version prepared by Licensee.
+#
+# 3. BeOpen is making the Software available to Licensee on an "AS IS"
+# basis.  BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
+# SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
+# AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
+# DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 5. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 6. This License Agreement shall be governed by and interpreted in all
+# respects by the law of the State of California, excluding conflict of
+# law provisions.  Nothing in this License Agreement shall be deemed to
+# create any relationship of agency, partnership, or joint venture
+# between BeOpen and Licensee.  This License Agreement does not grant
+# permission to use BeOpen trademarks or trade names in a trademark
+# sense to endorse or promote products or services of Licensee, or any
+# third party.  As an exception, the "BeOpen Python" logos available at
+# http://www.pythonlabs.com/logos.html may be used according to the
+# permissions granted on that web page.
+#
+# 7. By copying, installing or otherwise using the software, Licensee
+# agrees to be bound by the terms and conditions of this License
+# Agreement.
+#
+#
+# CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
+# ---------------------------------------
+#
+# 1. This LICENSE AGREEMENT is between the Corporation for National
+#     Research Initiatives, having an office at 1895 Preston White Drive,
+# Reston, VA 20191 ("CNRI"), and the Individual or Organization
+# ("Licensee") accessing and otherwise using Python 1.6.1 software in
+# source or binary form and its associated documentation.
+#
+# 2. Subject to the terms and conditions of this License Agreement, CNRI
+# hereby grants Licensee a nonexclusive, royalty-free, world-wide
+# license to reproduce, analyze, test, perform and/or display publicly,
+# prepare derivative works, distribute, and otherwise use Python 1.6.1
+# alone or in any derivative version, provided, however, that CNRI's
+# License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
+# 1995-2001 Corporation for National Research Initiatives; All Rights
+# Reserved" are retained in Python 1.6.1 alone or in any derivative
+# version prepared by Licensee.  Alternately, in lieu of CNRI's License
+# Agreement, Licensee may substitute the following text (omitting the
+# quotes): "Python 1.6.1 is made available subject to the terms and
+# conditions in CNRI's License Agreement.  This Agreement together with
+# Python 1.6.1 may be located on the Internet using the following
+# unique, persistent identifier (known as a handle): 1895.22/1013.  This
+# Agreement may also be obtained from a proxy server on the Internet
+# using the following URL: http://hdl.handle.net/1895.22/1013".
+#
+# 3. In the event Licensee prepares a derivative work that is based on
+# or incorporates Python 1.6.1 or any part thereof, and wants to make
+# the derivative work available to others as provided herein, then
+# Licensee hereby agrees to include in any such work a brief summary of
+# the changes made to Python 1.6.1.
+#
+# 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
+# basis.  CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+# 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
+# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 7. This License Agreement shall be governed by the federal
+# intellectual property law of the United States, including without
+# limitation the federal copyright law, and, to the extent such
+# U.S. federal law does not apply, by the law of the Commonwealth of
+# Virginia, excluding Virginia's conflict of law provisions.
+# Notwithstanding the foregoing, with regard to derivative works based
+# on Python 1.6.1 that incorporate non-separable material that was
+# previously distributed under the GNU General Public License (GPL), the
+# law of the Commonwealth of Virginia shall govern this License
+# Agreement only as to issues arising under or with respect to
+# Paragraphs 4, 5, and 7 of this License Agreement.  Nothing in this
+# License Agreement shall be deemed to create any relationship of
+# agency, partnership, or joint venture between CNRI and Licensee.  This
+# License Agreement does not grant permission to use CNRI trademarks or
+# trade name in a trademark sense to endorse or promote products or
+# services of Licensee, or any third party.
+#
+# 8. By clicking on the "ACCEPT" button where indicated, or by copying,
+# installing or otherwise using Python 1.6.1, Licensee agrees to be
+# bound by the terms and conditions of this License Agreement.
+#
+# ACCEPT
+#
+#
+# CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
+# --------------------------------------------------
+#
+# Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
+# The Netherlands.  All rights reserved.
+#
+# Permission to use, copy, modify, and distribute this software and its
+# documentation for any purpose and without fee is hereby granted,
+# provided that the above copyright notice appear in all copies and that
+# both that copyright notice and this permission notice appear in
+# supporting documentation, and that the name of Stichting Mathematisch
+# Centrum or CWI not be used in advertising or publicity pertaining to
+# distribution of the software without specific, written prior
+# permission.
+#
+# STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
+# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+# FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
+# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 ========================================================================
 For sorttable (core/src/main/resources/org/apache/spark/ui/static/sorttable.js):
@@ -430,19 +713,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 ========================================================================
-For colt:
-========================================================================
-
-Copyright (c) 1999 CERN - European Organization for Nuclear Research.
-Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose is hereby granted without fee, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation. CERN makes no representations about the suitability of this software for any purpose. It is provided "as is" without expressed or implied warranty.
-
-Packages hep.aida.*
-
-Written by Pavel Binko, Dino Ferrero Merlino, Wolfgang Hoschek, Tony Johnson, Andreas Pfeiffer, and others. Check the FreeHEP home page for more info. Permission to use and/or redistribute this work is granted under the terms of the LGPL License, with the exception that any usage related to military applications is expressly forbidden. The software and documentation made available under the terms of this license are provided with no warranty.
-
-
-========================================================================
-Fo SnapTree:
+For SnapTree:
 ========================================================================
 
 SNAPTREE LICENSE
@@ -482,6 +753,43 @@ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGE.
 
 
+========================================================================
+For Timsort (core/src/main/java/org/apache/spark/util/collection/TimSort.java):
+========================================================================
+Copyright (C) 2008 The Android Open Source Project
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+
+========================================================================
+For LimitedInputStream
+  (network/common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java):
+========================================================================
+Copyright (C) 2007 The Guava Authors
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+
 ========================================================================
 BSD-style licenses
 ========================================================================
@@ -514,7 +822,7 @@ The following components are provided under a BSD-style license. See project lin
      (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
      (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
      (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
-     (The New BSD License) Py4J (net.sf.py4j:py4j:0.8.1 - http://py4j.sourceforge.net/)
+     (The New BSD License) Py4J (net.sf.py4j:py4j:0.8.2.1 - http://py4j.sourceforge.net/)
      (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
      (ISC/BSD License) jbcrypt (org.mindrot:jbcrypt:0.3m - http://www.mindrot.org/)
 
@@ -531,3 +839,4 @@ The following components are provided under the MIT License. See project link fo
      (MIT License) pyrolite (org.spark-project:pyrolite:2.0.1 - http://pythonhosted.org/Pyro4/)
      (MIT License) scopt (com.github.scopt:scopt_2.10:3.2.0 - https://github.com/scopt/scopt)
      (The MIT License) Mockito (org.mockito:mockito-all:1.8.5 - http://www.mockito.org)
+     (MIT License) jquery (https://jquery.org/license/)
diff --git a/README.md b/README.md
index 6211a5889a3f5..8d57d50da96c9 100644
--- a/README.md
+++ b/README.md
@@ -1,21 +1,32 @@
 # Apache Spark
 
-Lightning-Fast Cluster Computing - <http://spark.apache.org/>
+Spark is a fast and general cluster computing system for Big Data. It provides
+high-level APIs in Scala, Java, and Python, and an optimized engine that
+supports general computation graphs for data analysis. It also supports a
+rich set of higher-level tools including Spark SQL for SQL and structured
+data processing, MLlib for machine learning, GraphX for graph processing,
+and Spark Streaming for stream processing.
+
+<http://spark.apache.org/>
 
 
 ## Online Documentation
 
 You can find the latest Spark documentation, including a programming
-guide, on the project webpage at <http://spark.apache.org/documentation.html>.
+guide, on the [project web page](http://spark.apache.org/documentation.html)
+and [project wiki](https://cwiki.apache.org/confluence/display/SPARK).
 This README file only contains basic setup instructions.
 
 ## Building Spark
 
-Spark is built on Scala 2.10. To build Spark and its example programs, run:
+Spark is built using [Apache Maven](http://maven.apache.org/).
+To build Spark and its example programs, run:
 
-    ./sbt/sbt assembly
+    mvn -DskipTests clean package
 
 (You do not need to do this if you downloaded a pre-built package.)
+More detailed documentation is available from the project site, at
+["Building Spark with Maven"](http://spark.apache.org/docs/latest/building-with-maven.html).
 
 ## Interactive Scala Shell
 
@@ -62,66 +73,26 @@ Many of the example programs print usage help if no params are given.
 Testing first requires [building Spark](#building-spark). Once Spark is built, tests
 can be run using:
 
-    ./sbt/sbt test
+    ./dev/run-tests
+
+Please see the guidance on how to 
+[run all automated tests](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-AutomatedTesting).
 
 ## A Note About Hadoop Versions
 
 Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
 storage systems. Because the protocols have changed in different versions of
 Hadoop, you must build Spark against the same version that your cluster runs.
-You can change the version by setting the `SPARK_HADOOP_VERSION` environment
-when building Spark.
-
-For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop
-versions without YARN, use:
-
-    # Apache Hadoop 1.2.1
-    $ SPARK_HADOOP_VERSION=1.2.1 sbt/sbt assembly
-
-    # Cloudera CDH 4.2.0 with MapReduce v1
-    $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt assembly
-
-For Apache Hadoop 2.2.X, 2.1.X, 2.0.X, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions
-with YARN, also set `SPARK_YARN=true`:
-
-    # Apache Hadoop 2.0.5-alpha
-    $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly
-
-    # Cloudera CDH 4.2.0 with MapReduce v2
-    $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt/sbt assembly
-
-    # Apache Hadoop 2.2.X and newer
-    $ SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt/sbt assembly
-
-When developing a Spark application, specify the Hadoop version by adding the
-"hadoop-client" artifact to your project's dependencies. For example, if you're
-using Hadoop 1.2.1 and build your application using SBT, add this entry to
-`libraryDependencies`:
-
-    "org.apache.hadoop" % "hadoop-client" % "1.2.1"
-
-If your project is built with Maven, add this to your POM file's `<dependencies>` section:
-
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
-      <version>1.2.1</version>
-    </dependency>
 
+Please refer to the build documentation at
+["Specifying the Hadoop Version"](http://spark.apache.org/docs/latest/building-with-maven.html#specifying-the-hadoop-version)
+for detailed guidance on building for a particular distribution of Hadoop, including
+building for particular Hive and Hive Thriftserver distributions. See also
+["Third Party Hadoop Distributions"](http://spark.apache.org/docs/latest/hadoop-third-party-distributions.html)
+for guidance on building a Spark application that works with a particular
+distribution.
 
 ## Configuration
 
 Please refer to the [Configuration guide](http://spark.apache.org/docs/latest/configuration.html)
 in the online documentation for an overview on how to configure Spark.
-
-
-## Contributing to Spark
-
-Contributions via GitHub pull requests are gladly accepted from their original
-author. Along with any pull requests, please state that the contribution is
-your original work and that you license the work to the project under the
-project's open source license. Whether or not you state this explicitly, by
-submitting any copyrighted material via pull request, email, or other means
-you agree to license the material under the project's open source license and
-warrant that you have the legal authority to do so.
-
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 0c60b66c3daca..c65192bde64c6 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -32,15 +32,23 @@
   <packaging>pom</packaging>
 
   <properties>
+    <sbt.project.name>assembly</sbt.project.name>
     <spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
     <spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
     <spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
     <deb.pkg.name>spark</deb.pkg.name>
     <deb.install.path>/usr/share/spark</deb.install.path>
     <deb.user>root</deb.user>
+    <deb.bin.filemode>744</deb.bin.filemode>
   </properties>
 
   <dependencies>
+    <!-- Promote Guava to compile scope in this module so it's included while shading. -->
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <scope>compile</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -58,28 +66,42 @@
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-repl_${scala.binary.version}</artifactId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <artifactId>spark-graphx_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-graphx_${scala.binary.version}</artifactId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <artifactId>spark-repl_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
   </dependencies>
 
   <build>
     <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-deploy-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-install-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
       <!-- Use the shade plugin to create a big JAR with all the dependencies -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -111,6 +133,20 @@
               <goal>shade</goal>
             </goals>
             <configuration>
+              <relocations>
+                <relocation>
+                  <pattern>com.google</pattern>
+                  <shadedPattern>org.spark-project.guava</shadedPattern>
+                  <includes>
+                    <include>com.google.common.**</include>
+                  </includes>
+                  <excludes>
+                    <exclude>com/google/common/base/Absent*</exclude>
+                    <exclude>com/google/common/base/Optional*</exclude>
+                    <exclude>com/google/common/base/Present*</exclude>
+                  </excludes>
+                </relocation>
+              </relocations>
               <transformers>
                 <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
@@ -163,6 +199,16 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>hive-thriftserver</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
     <profile>
       <id>spark-ganglia-lgpl</id>
       <dependencies>
@@ -275,7 +321,7 @@
                         <user>${deb.user}</user>
                         <group>${deb.user}</group>
                         <prefix>${deb.install.path}/bin</prefix>
-                        <filemode>744</filemode>
+                        <filemode>${deb.bin.filemode}</filemode>
                       </mapper>
                     </data>
                     <data>
@@ -308,5 +354,15 @@
         </plugins>
       </build>
     </profile>
+    <profile>
+      <id>kinesis-asl</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.httpcomponents</groupId>
+          <artifactId>httpclient</artifactId>
+          <version>${commons.httpclient.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
   </profiles>
 </project>
diff --git a/bagel/pom.xml b/bagel/pom.xml
index c8e39a415af28..93db0d5efda5f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,12 +21,15 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-bagel_2.10</artifactId>
+  <properties>
+    <sbt.project.name>bagel</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project Bagel</name>
   <url>http://spark.apache.org/</url>
diff --git a/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala b/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala
index 70a99b33d753c..ef0bb2ac13f08 100644
--- a/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala
+++ b/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala
@@ -72,6 +72,7 @@ object Bagel extends Logging {
     var verts = vertices
     var msgs = messages
     var noActivity = false
+    var lastRDD: RDD[(K, (V, Array[M]))] = null
     do {
       logInfo("Starting superstep " + superstep + ".")
       val startTime = System.currentTimeMillis
@@ -83,6 +84,10 @@ object Bagel extends Logging {
       val superstep_ = superstep  // Create a read-only copy of superstep for capture in closure
       val (processed, numMsgs, numActiveVerts) =
         comp[K, V, M, C](sc, grouped, compute(_, _, aggregated, superstep_), storageLevel)
+      if (lastRDD != null) {
+        lastRDD.unpersist(false)
+      }
+      lastRDD = processed
 
       val timeTaken = System.currentTimeMillis - startTime
       logInfo("Superstep %d took %d s".format(superstep, timeTaken / 1000))
diff --git a/bagel/src/test/resources/log4j.properties b/bagel/src/test/resources/log4j.properties
index 30b4baa4d714a..789869f72e3b0 100644
--- a/bagel/src/test/resources/log4j.properties
+++ b/bagel/src/test/resources/log4j.properties
@@ -21,7 +21,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala b/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
index 55241d33cd3f0..ccb262a4ee02a 100644
--- a/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
+++ b/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
@@ -24,8 +24,6 @@ import org.scalatest.time.SpanSugar._
 import org.apache.spark._
 import org.apache.spark.storage.StorageLevel
 
-import scala.language.postfixOps
-
 class TestVertex(val active: Boolean, val age: Int) extends Vertex with Serializable
 class TestMessage(val targetId: String) extends Message[String] with Serializable
 
diff --git a/bin/beeline b/bin/beeline
new file mode 100755
index 0000000000000..3fcb6df34339d
--- /dev/null
+++ b/bin/beeline
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Shell script for starting BeeLine
+
+# Enter posix mode for bash
+set -o posix
+
+# Figure out where Spark is installed
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
+
+CLASS="org.apache.hive.beeline.BeeLine"
+exec "$FWDIR/bin/spark-class" $CLASS "$@"
diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index 58710cd1bd548..a4c099fb45b14 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -1,110 +1,117 @@
-@echo off
-
-rem
-rem Licensed to the Apache Software Foundation (ASF) under one or more
-rem contributor license agreements.  See the NOTICE file distributed with
-rem this work for additional information regarding copyright ownership.
-rem The ASF licenses this file to You under the Apache License, Version 2.0
-rem (the "License"); you may not use this file except in compliance with
-rem the License.  You may obtain a copy of the License at
-rem
-rem    http://www.apache.org/licenses/LICENSE-2.0
-rem
-rem Unless required by applicable law or agreed to in writing, software
-rem distributed under the License is distributed on an "AS IS" BASIS,
-rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-rem See the License for the specific language governing permissions and
-rem limitations under the License.
-rem
-
-rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
-rem script and the ExecutorRunner in standalone cluster mode.
-
-rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
-rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
-rem need to set it here because we use !datanucleus_jars! below.
-if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
-setlocal enabledelayedexpansion
-:skip_delayed_expansion
-
-set SCALA_VERSION=2.10
-
-rem Figure out where the Spark framework is installed
-set FWDIR=%~dp0..\
-
-rem Load environment variables from conf\spark-env.cmd, if it exists
-if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
-
-rem Build up classpath
-set CLASSPATH=%FWDIR%conf
-if exist "%FWDIR%RELEASE" (
-  for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
-    set ASSEMBLY_JAR=%%d
-  )
-) else (
-  for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
-    set ASSEMBLY_JAR=%%d
-  )
-)
-
-set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
-
-rem When Hive support is needed, Datanucleus jars must be included on the classpath.
-rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
-rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
-rem built with Hive, so look for them there.
-if exist "%FWDIR%RELEASE" (
-  set datanucleus_dir=%FWDIR%lib
-) else (
-  set datanucleus_dir=%FWDIR%lib_managed\jars
-)
-set "datanucleus_jars="
-for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
-  set datanucleus_jars=!datanucleus_jars!;%%d
-)
-set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
-
-set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%tools\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\classes
-
-set SPARK_TEST_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\test-classes
-
-if "x%SPARK_TESTING%"=="x1" (
-  rem Add test clases to path - note, add SPARK_CLASSES and SPARK_TEST_CLASSES before CLASSPATH
-  rem so that local compilation takes precedence over assembled jar
-  set CLASSPATH=%SPARK_CLASSES%;%SPARK_TEST_CLASSES%;%CLASSPATH%
-)
-
-rem Add hadoop conf dir - else FileSystem.*, etc fail
-rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
-rem the configurtion files.
-if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
-  set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
-:no_hadoop_conf_dir
-
-if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
-  set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
-:no_yarn_conf_dir
-
-rem A bit of a hack to allow calling this script within run2.cmd without seeing output
-if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
-
-echo %CLASSPATH%
-
-:exit
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
+rem script and the ExecutorRunner in standalone cluster mode.
+
+rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
+rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
+rem need to set it here because we use !datanucleus_jars! below.
+if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
+setlocal enabledelayedexpansion
+:skip_delayed_expansion
+
+set SCALA_VERSION=2.10
+
+rem Figure out where the Spark framework is installed
+set FWDIR=%~dp0..\
+
+rem Load environment variables from conf\spark-env.cmd, if it exists
+if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
+
+rem Build up classpath
+set CLASSPATH=%SPARK_CLASSPATH%;%SPARK_SUBMIT_CLASSPATH%
+
+if not "x%SPARK_CONF_DIR%"=="x" (
+  set CLASSPATH=%CLASSPATH%;%SPARK_CONF_DIR%
+) else (
+  set CLASSPATH=%CLASSPATH%;%FWDIR%conf
+)
+
+if exist "%FWDIR%RELEASE" (
+  for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
+    set ASSEMBLY_JAR=%%d
+  )
+) else (
+  for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
+    set ASSEMBLY_JAR=%%d
+  )
+)
+
+set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
+
+rem When Hive support is needed, Datanucleus jars must be included on the classpath.
+rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+rem built with Hive, so look for them there.
+if exist "%FWDIR%RELEASE" (
+  set datanucleus_dir=%FWDIR%lib
+) else (
+  set datanucleus_dir=%FWDIR%lib_managed\jars
+)
+set "datanucleus_jars="
+for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
+  set datanucleus_jars=!datanucleus_jars!;%%d
+)
+set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
+
+set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%tools\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\classes
+
+set SPARK_TEST_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\test-classes
+
+if "x%SPARK_TESTING%"=="x1" (
+  rem Add test clases to path - note, add SPARK_CLASSES and SPARK_TEST_CLASSES before CLASSPATH
+  rem so that local compilation takes precedence over assembled jar
+  set CLASSPATH=%SPARK_CLASSES%;%SPARK_TEST_CLASSES%;%CLASSPATH%
+)
+
+rem Add hadoop conf dir - else FileSystem.*, etc fail
+rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
+rem the configurtion files.
+if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
+  set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
+:no_hadoop_conf_dir
+
+if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
+  set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
+:no_yarn_conf_dir
+
+rem A bit of a hack to allow calling this script within run2.cmd without seeing output
+if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
+
+echo %CLASSPATH%
+
+:exit
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index e81e8c060cb98..298641f2684de 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -20,17 +20,21 @@
 # This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
 # script and the ExecutorRunner in standalone cluster mode.
 
-SCALA_VERSION=2.10
-
 # Figure out where Spark is installed
-FWDIR="$(cd `dirname $0`/..; pwd)"
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
+
+. "$FWDIR"/bin/load-spark-env.sh
 
-. $FWDIR/bin/load-spark-env.sh
+CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH"
 
 # Build up classpath
-CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:$FWDIR/conf"
+if [ -n "$SPARK_CONF_DIR" ]; then
+  CLASSPATH="$CLASSPATH:$SPARK_CONF_DIR"
+else
+  CLASSPATH="$CLASSPATH:$FWDIR/conf"
+fi
 
-ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION"
+ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SPARK_SCALA_VERSION"
 
 if [ -n "$JAVA_HOME" ]; then
   JAR_CMD="$JAVA_HOME/bin/jar"
@@ -42,17 +46,19 @@ fi
 if [ -n "$SPARK_PREPEND_CLASSES" ]; then
   echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
     "classes ahead of assembly." >&2
-  CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/core/target/jars/*"
+  CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive-thriftserver/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SPARK_SCALA_VERSION/classes"
 fi
 
 # Use spark-assembly jar from either RELEASE or assembly directory
@@ -62,7 +68,7 @@ else
   assembly_folder="$ASSEMBLY_DIR"
 fi
 
-num_jars=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)
+num_jars="$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)"
 if [ "$num_jars" -eq "0" ]; then
   echo "Failed to find Spark assembly in $assembly_folder"
   echo "You need to build Spark before running this program."
@@ -76,7 +82,7 @@ if [ "$num_jars" -gt "1" ]; then
   exit 1
 fi
 
-ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
+ASSEMBLY_JAR="$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)"
 
 # Verify that versions of java used to build the jars and run Spark are compatible
 jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
@@ -102,8 +108,8 @@ else
   datanucleus_dir="$FWDIR"/lib_managed/jars
 fi
 
-datanucleus_jars=$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")
-datanucleus_jars=$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)
+datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")"
+datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"
 
 if [ -n "$datanucleus_jars" ]; then
   hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null)
@@ -115,15 +121,15 @@ fi
 
 # Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
 if [[ $SPARK_TESTING == 1 ]]; then
-  CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SPARK_SCALA_VERSION/test-classes"
 fi
 
 # Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail !
diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh
index d425f9feaac54..356b3d49b2ffe 100644
--- a/bin/load-spark-env.sh
+++ b/bin/load-spark-env.sh
@@ -25,14 +25,34 @@ if [ -z "$SPARK_ENV_LOADED" ]; then
   export SPARK_ENV_LOADED=1
 
   # Returns the parent of the directory this script lives in.
-  parent_dir="$(cd `dirname $0`/..; pwd)"
+  parent_dir="$(cd "`dirname "$0"`"/..; pwd)"
 
-  use_conf_dir=${SPARK_CONF_DIR:-"$parent_dir/conf"}
+  user_conf_dir="${SPARK_CONF_DIR:-"$parent_dir"/conf}"
 
-  if [ -f "${use_conf_dir}/spark-env.sh" ]; then
+  if [ -f "${user_conf_dir}/spark-env.sh" ]; then
     # Promote all variable declarations to environment (exported) variables
     set -a
-    . "${use_conf_dir}/spark-env.sh"
+    . "${user_conf_dir}/spark-env.sh"
     set +a
   fi
 fi
+
+# Setting SPARK_SCALA_VERSION if not already set.
+
+if [ -z "$SPARK_SCALA_VERSION" ]; then
+
+    ASSEMBLY_DIR2="$FWDIR/assembly/target/scala-2.11"
+    ASSEMBLY_DIR1="$FWDIR/assembly/target/scala-2.10"
+    
+    if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
+        echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2
+        echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2
+        exit 1
+    fi
+
+    if [ -d "$ASSEMBLY_DIR2" ]; then
+        export SPARK_SCALA_VERSION="2.11"
+    else
+        export SPARK_SCALA_VERSION="2.10"
+    fi        
+fi
diff --git a/bin/pyspark b/bin/pyspark
index 69b056fe28f2c..0b4f695dd06dd 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -18,23 +18,29 @@
 #
 
 # Figure out where Spark is installed
-FWDIR="$(cd `dirname $0`/..; pwd)"
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
 
-SCALA_VERSION=2.10
+source "$FWDIR/bin/utils.sh"
 
-if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+source "$FWDIR"/bin/load-spark-env.sh
+
+function usage() {
   echo "Usage: ./bin/pyspark [options]" 1>&2
-  $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+  "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
   exit 0
+}
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  usage
 fi
 
 # Exit if the user hasn't compiled Spark
 if [ ! -f "$FWDIR/RELEASE" ]; then
   # Exit if the user hasn't compiled Spark
-  ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
+  ls "$FWDIR"/assembly/target/scala-$SPARK_SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
   if [[ $? != 0 ]]; then
     echo "Failed to find Spark assembly in $FWDIR/assembly/target" 1>&2
     echo "You need to build Spark before running this program" 1>&2
@@ -42,34 +48,63 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
   fi
 fi
 
-. $FWDIR/bin/load-spark-env.sh
+# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
+# executable, while the worker would still be launched using PYSPARK_PYTHON.
+#
+# In Spark 1.2, we removed the documentation of the IPYTHON and IPYTHON_OPTS variables and added
+# PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS to allow IPython to be used for the driver.
+# Now, users can simply set PYSPARK_DRIVER_PYTHON=ipython to use IPython and set
+# PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
+# (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook').  This supports full customization of the IPython
+# and executor Python executables.
+#
+# For backwards-compatibility, we retain the old IPYTHON and IPYTHON_OPTS variables.
+
+# Determine the Python executable to use if PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON isn't set:
+if hash python2.7 2>/dev/null; then
+  # Attempt to use Python 2.7, if installed:
+  DEFAULT_PYTHON="python2.7"
+else
+  DEFAULT_PYTHON="python"
+fi
 
-# Figure out which Python executable to use
+# Determine the Python executable to use for the driver:
+if [[ -n "$IPYTHON_OPTS" || "$IPYTHON" == "1" ]]; then
+  # If IPython options are specified, assume user wants to run IPython
+  # (for backwards-compatibility)
+  PYSPARK_DRIVER_PYTHON_OPTS="$PYSPARK_DRIVER_PYTHON_OPTS $IPYTHON_OPTS"
+  PYSPARK_DRIVER_PYTHON="ipython"
+elif [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
+  PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"$DEFAULT_PYTHON"}"
+fi
+
+# Determine the Python executable to use for the executors:
 if [[ -z "$PYSPARK_PYTHON" ]]; then
-  PYSPARK_PYTHON="python"
+  if [[ $PYSPARK_DRIVER_PYTHON == *ipython* && $DEFAULT_PYTHON != "python2.7" ]]; then
+    echo "IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON" 1>&2
+    exit 1
+  else
+    PYSPARK_PYTHON="$DEFAULT_PYTHON"
+  fi
 fi
 export PYSPARK_PYTHON
 
 # Add the PySpark classes to the Python path:
-export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
-export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH
+export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
+export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
-export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
-export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
-
-# If IPython options are specified, assume user wants to run IPython
-if [[ -n "$IPYTHON_OPTS" ]]; then
-  IPYTHON=1
-fi
+export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
+export PYTHONSTARTUP="$FWDIR/python/pyspark/shell.py"
 
 # Build up arguments list manually to preserve quotes and backslashes.
 # We export Spark submit arguments as an environment variable because shell.py must run as a
 # PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
-
+SUBMIT_USAGE_FUNCTION=usage
+gatherSparkSubmitOpts "$@"
 PYSPARK_SUBMIT_ARGS=""
 whitespace="[[:space:]]"
-for i in "$@"; do
+for i in "${SUBMISSION_OPTS[@]}"; do
   if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
   if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
   PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
@@ -78,10 +113,12 @@ export PYSPARK_SUBMIT_ARGS
 
 # For pyspark tests
 if [[ -n "$SPARK_TESTING" ]]; then
+  unset YARN_CONF_DIR
+  unset HADOOP_CONF_DIR
   if [[ -n "$PYSPARK_DOC_TEST" ]]; then
-    exec "$PYSPARK_PYTHON" -m doctest $1
+    exec "$PYSPARK_DRIVER_PYTHON" -m doctest $1
   else
-    exec "$PYSPARK_PYTHON" $1
+    exec "$PYSPARK_DRIVER_PYTHON" $1
   fi
   exit
 fi
@@ -90,12 +127,10 @@ fi
 if [[ "$1" =~ \.py$ ]]; then
   echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
   echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
-  exec $FWDIR/bin/spark-submit "$@"
+  primary="$1"
+  shift
+  gatherSparkSubmitOpts "$@"
+  exec "$FWDIR"/bin/spark-submit "${SUBMISSION_OPTS[@]}" "$primary" "${APPLICATION_OPTS[@]}"
 else
-  # Only use ipython if no command line arguments were provided [SPARK-1134]
-  if [[ "$IPYTHON" = "1" ]]; then
-    exec ipython $IPYTHON_OPTS
-  else
-    exec "$PYSPARK_PYTHON"
-  fi
+  exec "$PYSPARK_DRIVER_PYTHON" $PYSPARK_DRIVER_PYTHON_OPTS
 fi
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index 0ef9eea95342e..a542ec80b49d6 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -33,7 +33,7 @@ for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*
 )
 if [%FOUND_JAR%] == [0] (
   echo Failed to find Spark assembly JAR.
-  echo You need to build Spark with sbt\sbt assembly before running this program.
+  echo You need to build Spark before running this program.
   goto exit
 )
 :skip_build_test
@@ -45,7 +45,7 @@ rem Figure out which Python to use.
 if [%PYSPARK_PYTHON%] == [] set PYSPARK_PYTHON=python
 
 set PYTHONPATH=%FWDIR%python;%PYTHONPATH%
-set PYTHONPATH=%FWDIR%python\lib\py4j-0.8.1-src.zip;%PYTHONPATH%
+set PYTHONPATH=%FWDIR%python\lib\py4j-0.8.2.1-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%FWDIR%python\pyspark\shell.py
@@ -59,7 +59,11 @@ for /f %%i in ('echo %1^| findstr /R "\.py"') do (
 )
 
 if [%PYTHON_FILE%] == [] (
-  %PYSPARK_PYTHON%
+  if [%IPYTHON%] == [1] (
+	ipython %IPYTHON_OPTS%
+  ) else (
+	%PYSPARK_PYTHON%
+  ) 
 ) else (
   echo.
   echo WARNING: Running python applications through ./bin/pyspark.cmd is deprecated as of Spark 1.0.
diff --git a/bin/run-example b/bin/run-example
index 942706d733122..3d932509426fc 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -17,29 +17,30 @@
 # limitations under the License.
 #
 
-SCALA_VERSION=2.10
-
-FWDIR="$(cd `dirname $0`/..; pwd)"
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 export SPARK_HOME="$FWDIR"
 EXAMPLES_DIR="$FWDIR"/examples
 
+. "$FWDIR"/bin/load-spark-env.sh
+
 if [ -n "$1" ]; then
   EXAMPLE_CLASS="$1"
   shift
 else
   echo "Usage: ./bin/run-example <example-class> [example-args]" 1>&2
   echo "  - set MASTER=XX to use a specific master" 1>&2
-  echo "  - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)" 1>&2
+  echo "  - can use abbreviated example class name relative to com.apache.spark.examples" 1>&2
+  echo "     (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL)" 1>&2
   exit 1
 fi
 
 if [ -f "$FWDIR/RELEASE" ]; then
-  export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
-elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
-  export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`
+  export SPARK_EXAMPLES_JAR="`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`"
+elif [ -e "$EXAMPLES_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
+  export SPARK_EXAMPLES_JAR="`ls "$EXAMPLES_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-examples-*hadoop*.jar`"
 fi
 
-if [[ -z $SPARK_EXAMPLES_JAR ]]; then
+if [[ -z "$SPARK_EXAMPLES_JAR" ]]; then
   echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
   echo "You need to build Spark before running this program" 1>&2
   exit 1
diff --git a/bin/run-example2.cmd b/bin/run-example2.cmd
index eadedd7fa61ff..b49d0dcb4ff2d 100644
--- a/bin/run-example2.cmd
+++ b/bin/run-example2.cmd
@@ -32,7 +32,8 @@ rem Test that an argument was given
 if not "x%1"=="x" goto arg_given
   echo Usage: run-example ^<example-class^> [example-args]
   echo   - set MASTER=XX to use a specific master
-  echo   - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)
+  echo   - can use abbreviated example class name relative to com.apache.spark.examples
+  echo      (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL)
   goto exit
 :arg_given
 
@@ -51,7 +52,7 @@ if exist "%FWDIR%RELEASE" (
 )
 if "x%SPARK_EXAMPLES_JAR%"=="x" (
   echo Failed to find Spark examples assembly JAR.
-  echo You need to build Spark with sbt\sbt assembly before running this program.
+  echo You need to build Spark before running this program.
   goto exit
 )
 
diff --git a/bin/spark-class b/bin/spark-class
index 04fa52c6756b1..0d58d95c1aee3 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -17,20 +17,20 @@
 # limitations under the License.
 #
 
+# NOTE: Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala!
+
 cygwin=false
 case "`uname`" in
     CYGWIN*) cygwin=true;;
 esac
 
-SCALA_VERSION=2.10
-
 # Figure out where Spark is installed
-FWDIR="$(cd `dirname $0`/..; pwd)"
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
 
-. $FWDIR/bin/load-spark-env.sh
+. "$FWDIR"/bin/load-spark-env.sh
 
 if [ -z "$1" ]; then
   echo "Usage: spark-class <class> [<args>]" 1>&2
@@ -39,7 +39,7 @@ fi
 
 if [ -n "$SPARK_MEM" ]; then
   echo -e "Warning: SPARK_MEM is deprecated, please use a more specific config option" 1>&2
-  echo -e "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)." 1>&2
+  echo -e "(e.g., spark.executor.memory or spark.driver.memory)." 1>&2
 fi
 
 # Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
@@ -73,11 +73,21 @@ case "$1" in
     OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
     ;;
 
-  # Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS
-    'org.apache.spark.deploy.SparkSubmit')
-    OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \
-      -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
+  # Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
+  # SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
+  'org.apache.spark.deploy.SparkSubmit')
+    OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS"
     OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
+    if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then
+      if [[ $OSTYPE == darwin* ]]; then
+       export DYLD_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DYLD_LIBRARY_PATH"
+      else
+       export LD_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$LD_LIBRARY_PATH"
+      fi
+    fi
+    if [ -n "$SPARK_SUBMIT_DRIVER_MEMORY" ]; then
+      OUR_JAVA_MEM="$SPARK_SUBMIT_DRIVER_MEMORY"
+    fi
     ;;
 
   *)
@@ -97,59 +107,83 @@ else
     exit 1
   fi
 fi
+JAVA_VERSION=$("$RUNNER" -version 2>&1 | grep 'version' | sed 's/.* version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
 
 # Set JAVA_OPTS to be able to load native libraries and to set heap size
-JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
+if [ "$JAVA_VERSION" -ge 18 ]; then
+  JAVA_OPTS="$OUR_JAVA_OPTS"
+else
+  JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
+fi
 JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
+
 # Load extra JAVA_OPTS from conf/java-opts, if it exists
 if [ -e "$FWDIR/conf/java-opts" ] ; then
-  JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
+  JAVA_OPTS="$JAVA_OPTS `cat "$FWDIR"/conf/java-opts`"
 fi
-export JAVA_OPTS
+
 # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
 
 TOOLS_DIR="$FWDIR"/tools
 SPARK_TOOLS_JAR=""
-if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
+if [ -e "$TOOLS_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
   # Use the JAR from the SBT build
-  export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar`
+  export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-tools*[0-9Tg].jar`"
 fi
 if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
   # Use the JAR from the Maven build
   # TODO: this also needs to become an assembly!
-  export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`
+  export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`"
 fi
 
 # Compute classpath using external script
-classpath_output=$($FWDIR/bin/compute-classpath.sh)
+classpath_output=$("$FWDIR"/bin/compute-classpath.sh)
 if [[ "$?" != "0" ]]; then
   echo "$classpath_output"
   exit 1
 else
-  CLASSPATH=$classpath_output
+  CLASSPATH="$classpath_output"
 fi
 
 if [[ "$1" =~ org.apache.spark.tools.* ]]; then
   if test -z "$SPARK_TOOLS_JAR"; then
-    echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SCALA_VERSION/" 1>&2
-    echo "You need to build spark before running $1." 1>&2
+    echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/" 1>&2
+    echo "You need to build Spark before running $1." 1>&2
     exit 1
   fi
   CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
 fi
 
 if $cygwin; then
-  CLASSPATH=`cygpath -wp $CLASSPATH`
+  CLASSPATH="`cygpath -wp "$CLASSPATH"`"
   if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
-    export SPARK_TOOLS_JAR=`cygpath -w $SPARK_TOOLS_JAR`
+    export SPARK_TOOLS_JAR="`cygpath -w "$SPARK_TOOLS_JAR"`"
   fi
 fi
 export CLASSPATH
 
-if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
-  echo -n "Spark Command: " 1>&2
-  echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
-  echo -e "========================================\n" 1>&2
+# In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
+# Here we must parse the properties file for relevant "spark.driver.*" configs before launching
+# the driver JVM itself. Instead of handling this complexity in Bash, we launch a separate JVM
+# to prepare the launch environment of this driver JVM.
+
+if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then
+  # This is used only if the properties file actually contains these special configs
+  # Export the environment variables needed by SparkSubmitDriverBootstrapper
+  export RUNNER
+  export CLASSPATH
+  export JAVA_OPTS
+  export OUR_JAVA_MEM
+  export SPARK_CLASS=1
+  shift # Ignore main class (org.apache.spark.deploy.SparkSubmit) and use our own
+  exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@"
+else
+  # Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala
+  if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then
+    echo -n "Spark Command: " 1>&2
+    echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
+    echo -e "========================================\n" 1>&2
+  fi
+  exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
 fi
 
-exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
old mode 100755
new mode 100644
index e420eb409e529..da46543647efd
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -17,6 +17,8 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 rem
 
+rem Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala!
+
 setlocal enabledelayedexpansion
 
 set SCALA_VERSION=2.10
@@ -38,7 +40,7 @@ if not "x%1"=="x" goto arg_given
 
 if not "x%SPARK_MEM%"=="x" (
   echo Warning: SPARK_MEM is deprecated, please use a more specific config option
-  echo e.g., spark.executor.memory or SPARK_DRIVER_MEMORY.
+  echo e.g., spark.executor.memory or spark.driver.memory.
 )
 
 rem Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
@@ -67,17 +69,31 @@ rem Executors use SPARK_JAVA_OPTS + SPARK_EXECUTOR_MEMORY.
   set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_EXECUTOR_OPTS%
   if not "x%SPARK_EXECUTOR_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_EXECUTOR_MEMORY%
 
-rem All drivers use SPARK_JAVA_OPTS + SPARK_DRIVER_MEMORY. The repl also uses SPARK_REPL_OPTS.
-) else if "%1"=="org.apache.spark.repl.Main" (
-  set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_REPL_OPTS%
+rem Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
+rem SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
+rem The repl also uses SPARK_REPL_OPTS.
+) else if "%1"=="org.apache.spark.deploy.SparkSubmit" (
+  set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_SUBMIT_OPTS% %SPARK_REPL_OPTS%
+  if not "x%SPARK_SUBMIT_LIBRARY_PATH%"=="x" (
+    set OUR_JAVA_OPTS=!OUR_JAVA_OPTS! -Djava.library.path=%SPARK_SUBMIT_LIBRARY_PATH%
+  ) else if not "x%SPARK_LIBRARY_PATH%"=="x" (
+    set OUR_JAVA_OPTS=!OUR_JAVA_OPTS! -Djava.library.path=%SPARK_LIBRARY_PATH%
+  )
   if not "x%SPARK_DRIVER_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_DRIVER_MEMORY%
+  if not "x%SPARK_SUBMIT_DRIVER_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_SUBMIT_DRIVER_MEMORY%
 ) else (
   set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS%
   if not "x%SPARK_DRIVER_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_DRIVER_MEMORY%
 )
 
 rem Set JAVA_OPTS to be able to load native libraries and to set heap size
-set JAVA_OPTS=-XX:MaxPermSize=128m %OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
+for /f "tokens=3" %%i in ('java -version 2^>^&1 ^| find "version"') do set jversion=%%i
+for /f "tokens=1 delims=_" %%i in ("%jversion:~1,-1%") do set jversion=%%i
+if "%jversion%" geq "1.8.0" (
+  set JAVA_OPTS=%OUR_JAVA_OPTS% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
+) else (
+  set JAVA_OPTS=-XX:MaxPermSize=128m %OUR_JAVA_OPTS% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
+)
 rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
 
 rem Test whether the user has built Spark
@@ -88,7 +104,7 @@ for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*
 )
 if "%FOUND_JAR%"=="0" (
   echo Failed to find Spark assembly JAR.
-  echo You need to build Spark with sbt\sbt assembly before running this program.
+  echo You need to build Spark before running this program.
   goto exit
 )
 :skip_build_test
@@ -109,5 +125,27 @@ rem Figure out where java is.
 set RUNNER=java
 if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
 
-"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
+rem In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
+rem Here we must parse the properties file for relevant "spark.driver.*" configs before launching
+rem the driver JVM itself. Instead of handling this complexity here, we launch a separate JVM
+rem to prepare the launch environment of this driver JVM.
+
+rem In this case, leave out the main class (org.apache.spark.deploy.SparkSubmit) and use our own.
+rem Leaving out the first argument is surprisingly difficult to do in Windows. Note that this must
+rem be done here because the Windows "shift" command does not work in a conditional block.
+set BOOTSTRAP_ARGS=
+shift
+:start_parse
+if "%~1" == "" goto end_parse
+set BOOTSTRAP_ARGS=%BOOTSTRAP_ARGS% %~1
+shift
+goto start_parse
+:end_parse
+
+if not [%SPARK_SUBMIT_BOOTSTRAP_DRIVER%] == [] (
+  set SPARK_CLASS=1
+  "%RUNNER%" org.apache.spark.deploy.SparkSubmitDriverBootstrapper %BOOTSTRAP_ARGS%
+) else (
+  "%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
+)
 :exit
diff --git a/bin/spark-shell b/bin/spark-shell
index 850e9507ec38f..4a0670fc6c8aa 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -22,36 +22,44 @@
 
 cygwin=false
 case "`uname`" in
-    CYGWIN*) cygwin=true;;
+  CYGWIN*) cygwin=true;;
 esac
 
 # Enter posix mode for bash
 set -o posix
 
 ## Global script variables
-FWDIR="$(cd `dirname $0`/..; pwd)"
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 
-if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+function usage() {
   echo "Usage: ./bin/spark-shell [options]"
-  $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+  "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
   exit 0
+}
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  usage
 fi
 
-function main(){
-    if $cygwin; then
-        # Workaround for issue involving JLine and Cygwin
-        # (see http://sourceforge.net/p/jline/bugs/40/).
-        # If you're using the Mintty terminal emulator in Cygwin, may need to set the
-        # "Backspace sends ^H" setting in "Keys" section of the Mintty options
-        # (see https://github.com/sbt/sbt/issues/562).
-        stty -icanon min 1 -echo > /dev/null 2>&1
-        export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
-        $FWDIR/bin/spark-submit spark-shell "$@" --class org.apache.spark.repl.Main
-        stty icanon echo > /dev/null 2>&1
-    else
-        export SPARK_SUBMIT_OPTS
-        $FWDIR/bin/spark-submit spark-shell "$@" --class org.apache.spark.repl.Main
-    fi
+source "$FWDIR"/bin/utils.sh
+SUBMIT_USAGE_FUNCTION=usage
+gatherSparkSubmitOpts "$@"
+
+function main() {
+  if $cygwin; then
+    # Workaround for issue involving JLine and Cygwin
+    # (see http://sourceforge.net/p/jline/bugs/40/).
+    # If you're using the Mintty terminal emulator in Cygwin, may need to set the
+    # "Backspace sends ^H" setting in "Keys" section of the Mintty options
+    # (see https://github.com/sbt/sbt/issues/562).
+    stty -icanon min 1 -echo > /dev/null 2>&1
+    export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
+    "$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
+    stty icanon echo > /dev/null 2>&1
+  else
+    export SPARK_SUBMIT_OPTS
+    "$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
+  fi
 }
 
 # Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
diff --git a/bin/spark-shell.cmd b/bin/spark-shell.cmd
index 4b9708a8c03f3..8f90ba5a0b3b8 100755
--- a/bin/spark-shell.cmd
+++ b/bin/spark-shell.cmd
@@ -17,6 +17,7 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 rem
 
-set SPARK_HOME=%~dp0..
+rem This is the entry point for running Spark shell. To avoid polluting the
+rem environment, it just launches a new cmd to do the real work.
 
-cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd spark-shell %* --class org.apache.spark.repl.Main
+cmd /V /E /C %~dp0spark-shell2.cmd %*
diff --git a/bin/spark-shell2.cmd b/bin/spark-shell2.cmd
new file mode 100644
index 0000000000000..2ee60b4e2a2b3
--- /dev/null
+++ b/bin/spark-shell2.cmd
@@ -0,0 +1,22 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+set SPARK_HOME=%~dp0..
+
+cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %* spark-shell
diff --git a/bin/spark-sql b/bin/spark-sql
new file mode 100755
index 0000000000000..63d00437d508d
--- /dev/null
+++ b/bin/spark-sql
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Shell script for starting the Spark SQL CLI
+
+# Enter posix mode for bash
+set -o posix
+
+CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
+
+# Figure out where Spark is installed
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
+
+function usage {
+  echo "Usage: ./bin/spark-sql [options] [cli option]"
+  pattern="usage"
+  pattern+="\|Spark assembly has been built with Hive"
+  pattern+="\|NOTE: SPARK_PREPEND_CLASSES is set"
+  pattern+="\|Spark Command: "
+  pattern+="\|--help"
+  pattern+="\|======="
+
+  "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+  echo
+  echo "CLI options:"
+  "$FWDIR"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
+}
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  usage
+  exit 0
+fi
+
+source "$FWDIR"/bin/utils.sh
+SUBMIT_USAGE_FUNCTION=usage
+gatherSparkSubmitOpts "$@"
+
+exec "$FWDIR"/bin/spark-submit --class $CLASS "${SUBMISSION_OPTS[@]}" spark-internal "${APPLICATION_OPTS[@]}"
diff --git a/bin/spark-submit b/bin/spark-submit
index 9e7cecedd0325..c557311b4b20e 100755
--- a/bin/spark-submit
+++ b/bin/spark-submit
@@ -17,14 +17,18 @@
 # limitations under the License.
 #
 
-export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
+# NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala!
+
+export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
 ORIG_ARGS=("$@")
 
 while (($#)); do
   if [ "$1" = "--deploy-mode" ]; then
-    DEPLOY_MODE=$2
+    SPARK_SUBMIT_DEPLOY_MODE=$2
+  elif [ "$1" = "--properties-file" ]; then
+    SPARK_SUBMIT_PROPERTIES_FILE=$2
   elif [ "$1" = "--driver-memory" ]; then
-    DRIVER_MEMORY=$2
+    export SPARK_SUBMIT_DRIVER_MEMORY=$2
   elif [ "$1" = "--driver-library-path" ]; then
     export SPARK_SUBMIT_LIBRARY_PATH=$2
   elif [ "$1" = "--driver-class-path" ]; then
@@ -35,11 +39,25 @@ while (($#)); do
   shift
 done
 
-DEPLOY_MODE=${DEPLOY_MODE:-"client"}
+DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
+export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"}
+export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}
+
+# For client mode, the driver will be launched in the same JVM that launches
+# SparkSubmit, so we may need to read the properties file for any extra class
+# paths, library paths, java options and memory early on. Otherwise, it will
+# be too late by the time the driver JVM has started.
 
-if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then
-  export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
+if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FILE" ]]; then
+  # Parse the properties file only if the special configs exist
+  contains_special_configs=$(
+    grep -e "spark.driver.extra*\|spark.driver.memory" "$SPARK_SUBMIT_PROPERTIES_FILE" | \
+    grep -v "^[[:space:]]*#"
+  )
+  if [ -n "$contains_special_configs" ]; then
+    export SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
+  fi
 fi
 
-exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"
+exec "$SPARK_HOME"/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"
 
diff --git a/bin/spark-submit.cmd b/bin/spark-submit.cmd
index 6eb702ed8c561..8f3b84c7b971d 100644
--- a/bin/spark-submit.cmd
+++ b/bin/spark-submit.cmd
@@ -17,40 +17,7 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 rem
 
-set SPARK_HOME=%~dp0..
-set ORIG_ARGS=%*
+rem This is the entry point for running Spark submit. To avoid polluting the
+rem environment, it just launches a new cmd to do the real work.
 
-rem Clear the values of all variables used
-set DEPLOY_MODE=
-set DRIVER_MEMORY=
-set SPARK_SUBMIT_LIBRARY_PATH=
-set SPARK_SUBMIT_CLASSPATH=
-set SPARK_SUBMIT_OPTS=
-set SPARK_DRIVER_MEMORY=
-
-:loop
-if [%1] == [] goto continue
-  if [%1] == [--deploy-mode] (
-    set DEPLOY_MODE=%2
-  ) else if [%1] == [--driver-memory] (
-    set DRIVER_MEMORY=%2
-  ) else if [%1] == [--driver-library-path] (
-    set SPARK_SUBMIT_LIBRARY_PATH=%2
-  ) else if [%1] == [--driver-class-path] (
-    set SPARK_SUBMIT_CLASSPATH=%2
-  ) else if [%1] == [--driver-java-options] (
-    set SPARK_SUBMIT_OPTS=%2
-  )
-  shift
-goto loop
-:continue
-
-if [%DEPLOY_MODE%] == [] (
-  set DEPLOY_MODE=client
-)
-
-if not [%DRIVER_MEMORY%] == [] if [%DEPLOY_MODE%] == [client] (
-  set SPARK_DRIVER_MEMORY=%DRIVER_MEMORY%
-)
-
-cmd /V /E /C %SPARK_HOME%\bin\spark-class.cmd org.apache.spark.deploy.SparkSubmit %ORIG_ARGS%
+cmd /V /E /C %~dp0spark-submit2.cmd %*
diff --git a/bin/spark-submit2.cmd b/bin/spark-submit2.cmd
new file mode 100644
index 0000000000000..cf6046d1547ad
--- /dev/null
+++ b/bin/spark-submit2.cmd
@@ -0,0 +1,68 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala!
+
+set SPARK_HOME=%~dp0..
+set ORIG_ARGS=%*
+
+rem Reset the values of all variables used
+set SPARK_SUBMIT_DEPLOY_MODE=client
+set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf
+set SPARK_SUBMIT_DRIVER_MEMORY=
+set SPARK_SUBMIT_LIBRARY_PATH=
+set SPARK_SUBMIT_CLASSPATH=
+set SPARK_SUBMIT_OPTS=
+set SPARK_SUBMIT_BOOTSTRAP_DRIVER=
+
+:loop
+if [%1] == [] goto continue
+  if [%1] == [--deploy-mode] (
+    set SPARK_SUBMIT_DEPLOY_MODE=%2
+  ) else if [%1] == [--properties-file] (
+    set SPARK_SUBMIT_PROPERTIES_FILE=%2
+  ) else if [%1] == [--driver-memory] (
+    set SPARK_SUBMIT_DRIVER_MEMORY=%2
+  ) else if [%1] == [--driver-library-path] (
+    set SPARK_SUBMIT_LIBRARY_PATH=%2
+  ) else if [%1] == [--driver-class-path] (
+    set SPARK_SUBMIT_CLASSPATH=%2
+  ) else if [%1] == [--driver-java-options] (
+    set SPARK_SUBMIT_OPTS=%2
+  )
+  shift
+goto loop
+:continue
+
+rem For client mode, the driver will be launched in the same JVM that launches
+rem SparkSubmit, so we may need to read the properties file for any extra class
+rem paths, library paths, java options and memory early on. Otherwise, it will
+rem be too late by the time the driver JVM has started.
+
+if [%SPARK_SUBMIT_DEPLOY_MODE%] == [client] (
+  if exist %SPARK_SUBMIT_PROPERTIES_FILE% (
+    rem Parse the properties file only if the special configs exist
+    for /f %%i in ('findstr /r /c:"^[\t ]*spark.driver.memory" /c:"^[\t ]*spark.driver.extra" ^
+      %SPARK_SUBMIT_PROPERTIES_FILE%') do (
+      set SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
+    )
+  )
+)
+
+cmd /V /E /C %SPARK_HOME%\bin\spark-class.cmd org.apache.spark.deploy.SparkSubmit %ORIG_ARGS%
diff --git a/bin/utils.sh b/bin/utils.sh
new file mode 100755
index 0000000000000..22ea2b9a6d586
--- /dev/null
+++ b/bin/utils.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Gather all spark-submit options into SUBMISSION_OPTS
+function gatherSparkSubmitOpts() {
+
+  if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then
+    echo "Function for printing usage of $0 is not set." 1>&2
+    echo "Please set usage function to shell variable 'SUBMIT_USAGE_FUNCTION' in $0" 1>&2
+    exit 1
+  fi
+
+  # NOTE: If you add or remove spark-sumbmit options,
+  # modify NOT ONLY this script but also SparkSubmitArgument.scala
+  SUBMISSION_OPTS=()
+  APPLICATION_OPTS=()
+  while (($#)); do
+    case "$1" in
+      --master | --deploy-mode | --class | --name | --jars | --py-files | --files | \
+      --conf | --properties-file | --driver-memory | --driver-java-options | \
+      --driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \
+      --total-executor-cores | --executor-cores | --queue | --num-executors | --archives)
+        if [[ $# -lt 2 ]]; then
+          "$SUBMIT_USAGE_FUNCTION"
+          exit 1;
+        fi
+        SUBMISSION_OPTS+=("$1"); shift
+        SUBMISSION_OPTS+=("$1"); shift
+        ;;
+
+      --verbose | -v | --supervise)
+        SUBMISSION_OPTS+=("$1"); shift
+        ;;
+
+      *)
+        APPLICATION_OPTS+=("$1"); shift
+        ;;
+    esac
+  done
+
+  export SUBMISSION_OPTS
+  export APPLICATION_OPTS
+}
diff --git a/conf/slaves b/conf/slaves.template
similarity index 100%
rename from conf/slaves
rename to conf/slaves.template
diff --git a/conf/spark-defaults.conf.template b/conf/spark-defaults.conf.template
index 2779342769c14..a48dcc70e1363 100644
--- a/conf/spark-defaults.conf.template
+++ b/conf/spark-defaults.conf.template
@@ -2,7 +2,9 @@
 # This is useful for setting default environmental settings.
 
 # Example:
-# spark.master            spark://master:7077
-# spark.eventLog.enabled  true
-# spark.eventLog.dir      hdfs://namenode:8021/directory
-# spark.serializer        org.apache.spark.serializer.KryoSerializer
+# spark.master                     spark://master:7077
+# spark.eventLog.enabled           true
+# spark.eventLog.dir               hdfs://namenode:8021/directory
+# spark.serializer                 org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory              5g
+# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index f8ffbf64278fb..0886b0276fb90 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -28,7 +28,7 @@
 # - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job.
 # - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job.
 
-# Options for the daemons used in the standalone deploy mode:
+# Options for the daemons used in the standalone deploy mode
 # - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
 # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
 # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
@@ -41,3 +41,10 @@
 # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
 # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
 # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
+
+# Generic options for the daemons used in the standalone deploy mode
+# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
+# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
+# - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
+# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
diff --git a/core/pom.xml b/core/pom.xml
index 6abf8480d5da0..492eddda744c2 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,16 +21,47 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-core_2.10</artifactId>
+  <properties>
+    <sbt.project.name>core</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project Core</name>
   <url>http://spark.apache.org/</url>
   <dependencies>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>chill_${scala.binary.version}</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.ow2.asm</groupId>
+          <artifactId>asm</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.ow2.asm</groupId>
+          <artifactId>asm-commons</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>chill-java</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.ow2.asm</groupId>
+          <artifactId>asm</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.ow2.asm</groupId>
+          <artifactId>asm-commons</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-client</artifactId>
@@ -41,6 +72,16 @@
         </exclusion>
       </exclusions>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-network-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-network-shuffle_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>net.java.dev.jets3t</groupId>
       <artifactId>jets3t</artifactId>
@@ -65,9 +106,15 @@
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
     </dependency>
+    <!--
+      Promote Guava to "compile" so that maven-shade-plugin picks it up (for packaging the Optional
+      class exposed in the Java API). The plugin will then remove this dependency from the published
+      pom, so that Guava does not pollute the client's compilation classpath.
+    -->
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
+      <scope>compile</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
@@ -76,8 +123,6 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-math3</artifactId>
-      <version>3.3</version>
-      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>com.google.code.findbugs</groupId>
@@ -112,12 +157,12 @@
       <artifactId>snappy-java</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.twitter</groupId>
-      <artifactId>chill_${scala.binary.version}</artifactId>
+      <groupId>net.jpountz.lz4</groupId>
+      <artifactId>lz4</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.twitter</groupId>
-      <artifactId>chill-java</artifactId>
+      <groupId>org.roaringbitmap</groupId>
+      <artifactId>RoaringBitmap</artifactId>
     </dependency>
     <dependency>
       <groupId>commons-net</groupId>
@@ -143,11 +188,7 @@
     <dependency>
       <groupId>org.json4s</groupId>
       <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
-      <version>3.2.6</version>
-    </dependency>
-    <dependency>
-      <groupId>colt</groupId>
-      <artifactId>colt</artifactId>
+      <version>3.2.10</version>
     </dependency>
     <dependency>
       <groupId>org.apache.mesos</groupId>
@@ -185,8 +226,8 @@
     </dependency>
     <dependency>
       <groupId>org.tachyonproject</groupId>
-      <artifactId>tachyon</artifactId>
-      <version>0.4.1-thrift</version>
+      <artifactId>tachyon-client</artifactId>
+      <version>0.5.0</version>
       <exclusions>
         <exclusion>
           <groupId>org.apache.hadoop</groupId>
@@ -230,6 +271,11 @@
         </exclusion>
       </exclusions>
     </dependency>
+    <dependency>
+      <groupId>org.seleniumhq.selenium</groupId>
+      <artifactId>selenium-java</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>
@@ -255,6 +301,11 @@
       <artifactId>asm</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
@@ -268,7 +319,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.8.1</version>
+      <version>0.8.2.1</version>
     </dependency>
   </dependencies>
   <build>
@@ -278,38 +329,103 @@
       <plugin>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
-        <configuration>
-          <environmentVariables>
-            <SPARK_HOME>${basedir}/..</SPARK_HOME>
-            <SPARK_TESTING>1</SPARK_TESTING>
-            <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
-          </environmentVariables>
-        </configuration>
+        <executions>
+          <execution>
+            <id>test</id>
+            <goals>
+              <goal>test</goal>
+            </goals>
+          </execution>
+        </executions>
       </plugin>
+
       <!-- Unzip py4j so we can include its files in the jar -->
       <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>exec-maven-plugin</artifactId>
-        <version>1.2.1</version>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-antrun-plugin</artifactId>
         <executions>
           <execution>
             <phase>generate-resources</phase>
             <goals>
-              <goal>exec</goal>
+              <goal>run</goal>
             </goals>
           </execution>
         </executions>
         <configuration>
-          <executable>unzip</executable>
-          <workingDirectory>../python</workingDirectory>
-          <arguments>
-            <argument>-o</argument>
-            <argument>lib/py4j*.zip</argument>
-            <argument>-d</argument>
-            <argument>build</argument>
-          </arguments>
+          <tasks>
+            <unzip src="../python/lib/py4j-0.8.2.1-src.zip" dest="../python/build" />
+          </tasks>
+        </configuration>
+      </plugin>
+      <plugin>
+        <artifactId>maven-clean-plugin</artifactId>
+        <configuration>
+          <filesets>
+            <fileset>
+              <directory>${basedir}/../python/build</directory>
+            </fileset>
+          </filesets>
+          <verbose>true</verbose>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <shadedArtifactAttached>false</shadedArtifactAttached>
+              <artifactSet>
+                <includes>
+                  <include>com.google.guava:guava</include>
+                </includes>
+              </artifactSet>
+              <filters>
+                <!-- See comment in the guava dependency declaration above. -->
+                <filter>
+                  <artifact>com.google.guava:guava</artifact>
+                  <includes>
+                    <include>com/google/common/base/Absent*</include>
+                    <include>com/google/common/base/Optional*</include>
+                    <include>com/google/common/base/Present*</include>
+                  </includes>
+                </filter>
+              </filters>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <!--
+        Copy guava to the build directory. This is needed to make the SPARK_PREPEND_CLASSES
+        option work in compute-classpath.sh, since it would put the non-shaded Spark classes in
+        the runtime classpath.
+      -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy-dependencies</id>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>${project.build.directory}</outputDirectory>
+              <overWriteReleases>false</overWriteReleases>
+              <overWriteSnapshots>false</overWriteSnapshots>
+              <overWriteIfNewer>true</overWriteIfNewer>
+              <useSubDirectoryPerType>true</useSubDirectoryPerType>
+              <includeArtifactIds>guava</includeArtifactIds>
+              <silent>true</silent>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
 
     <resources>
@@ -330,4 +446,5 @@
       </resource>
     </resources>
   </build>
+
 </project>
diff --git a/core/src/main/java/org/apache/spark/JobExecutionStatus.java b/core/src/main/java/org/apache/spark/JobExecutionStatus.java
new file mode 100644
index 0000000000000..6e161313702bb
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/JobExecutionStatus.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+public enum JobExecutionStatus {
+  RUNNING,
+  SUCCEEDED,
+  FAILED,
+  UNKNOWN
+}
diff --git a/core/src/main/java/org/apache/spark/SparkJobInfo.java b/core/src/main/java/org/apache/spark/SparkJobInfo.java
new file mode 100644
index 0000000000000..4e3c983b1170a
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/SparkJobInfo.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+/**
+ * Exposes information about Spark Jobs.
+ *
+ * This interface is not designed to be implemented outside of Spark.  We may add additional methods
+ * which may break binary compatibility with outside implementations.
+ */
+public interface SparkJobInfo {
+  int jobId();
+  int[] stageIds();
+  JobExecutionStatus status();
+}
diff --git a/core/src/main/java/org/apache/spark/SparkStageInfo.java b/core/src/main/java/org/apache/spark/SparkStageInfo.java
new file mode 100644
index 0000000000000..04e2247210ecc
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/SparkStageInfo.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+/**
+ * Exposes information about Spark Stages.
+ *
+ * This interface is not designed to be implemented outside of Spark.  We may add additional methods
+ * which may break binary compatibility with outside implementations.
+ */
+public interface SparkStageInfo {
+  int stageId();
+  int currentAttemptId();
+  String name();
+  int numTasks();
+  int numActiveTasks();
+  int numCompletedTasks();
+  int numFailedTasks();
+}
diff --git a/core/src/main/java/org/apache/spark/TaskContext.java b/core/src/main/java/org/apache/spark/TaskContext.java
new file mode 100644
index 0000000000000..0d6973203eba1
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/TaskContext.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+import java.io.Serializable;
+
+import scala.Function0;
+import scala.Function1;
+import scala.Unit;
+
+import org.apache.spark.annotation.DeveloperApi;
+import org.apache.spark.executor.TaskMetrics;
+import org.apache.spark.util.TaskCompletionListener;
+
+/**
+ * Contextual information about a task which can be read or mutated during
+ * execution. To access the TaskContext for a running task use
+ * TaskContext.get().
+ */
+public abstract class TaskContext implements Serializable {
+  /**
+   * Return the currently active TaskContext. This can be called inside of
+   * user functions to access contextual information about running tasks.
+   */
+  public static TaskContext get() {
+    return taskContext.get();
+  }
+
+  private static ThreadLocal<TaskContext> taskContext =
+    new ThreadLocal<TaskContext>();
+
+  static void setTaskContext(TaskContext tc) {
+    taskContext.set(tc);
+  }
+
+  static void unset() {
+    taskContext.remove();
+  }
+
+  /**
+   * Whether the task has completed.
+   */
+  public abstract boolean isCompleted();
+
+  /**
+   * Whether the task has been killed.
+   */
+  public abstract boolean isInterrupted();
+
+  /** @deprecated: use isRunningLocally() */
+  @Deprecated
+  public abstract boolean runningLocally();
+
+  public abstract boolean isRunningLocally();
+
+  /**
+   * Add a (Java friendly) listener to be executed on task completion.
+   * This will be called in all situation - success, failure, or cancellation.
+   * An example use is for HadoopRDD to register a callback to close the input stream.
+   */
+  public abstract TaskContext addTaskCompletionListener(TaskCompletionListener listener);
+
+  /**
+   * Add a listener in the form of a Scala closure to be executed on task completion.
+   * This will be called in all situations - success, failure, or cancellation.
+   * An example use is for HadoopRDD to register a callback to close the input stream.
+   */
+  public abstract TaskContext addTaskCompletionListener(final Function1<TaskContext, Unit> f);
+
+  /**
+   * Add a callback function to be executed on task completion. An example use
+   * is for HadoopRDD to register a callback to close the input stream.
+   * Will be called in any situation - success, failure, or cancellation.
+   *
+   * @deprecated: use addTaskCompletionListener
+   *
+   * @param f Callback function.
+   */
+  @Deprecated
+  public abstract void addOnCompleteCallback(final Function0<Unit> f);
+
+  public abstract int stageId();
+
+  public abstract int partitionId();
+
+  public abstract long attemptId();
+
+  /** ::DeveloperApi:: */
+  @DeveloperApi
+  public abstract TaskMetrics taskMetrics();
+}
diff --git a/core/src/main/java/org/apache/spark/api/java/JavaFutureAction.java b/core/src/main/java/org/apache/spark/api/java/JavaFutureAction.java
new file mode 100644
index 0000000000000..0ad189633e427
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/api/java/JavaFutureAction.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.java;
+
+
+import java.util.List;
+import java.util.concurrent.Future;
+
+public interface JavaFutureAction<T> extends Future<T> {
+
+  /**
+   * Returns the job IDs run by the underlying async operation.
+   *
+   * This returns the current snapshot of the job list. Certain operations may run multiple
+   * jobs, so multiple calls to this method may return different lists.
+   */
+  List<Integer> jobIds();
+}
diff --git a/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java b/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java
index abd9bcc07ac61..99bf240a17225 100644
--- a/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java
+++ b/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java
@@ -22,7 +22,8 @@
 import scala.Tuple2;
 
 /**
- * A function that returns key-value pairs (Tuple2<K, V>), and can be used to construct PairRDDs.
+ * A function that returns key-value pairs (Tuple2&lt;K, V&gt;), and can be used to
+ * construct PairRDDs.
  */
 public interface PairFunction<T, K, V> extends Serializable {
   public Tuple2<K, V> call(T t) throws Exception;
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileClient.java b/core/src/main/java/org/apache/spark/network/netty/FileClient.java
deleted file mode 100644
index 0d31894d6ec7a..0000000000000
--- a/core/src/main/java/org/apache/spark/network/netty/FileClient.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty;
-
-import java.util.concurrent.TimeUnit;
-
-import io.netty.bootstrap.Bootstrap;
-import io.netty.channel.Channel;
-import io.netty.channel.ChannelOption;
-import io.netty.channel.EventLoopGroup;
-import io.netty.channel.oio.OioEventLoopGroup;
-import io.netty.channel.socket.oio.OioSocketChannel;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-class FileClient {
-
-  private static final Logger LOG = LoggerFactory.getLogger(FileClient.class.getName());
-
-  private final FileClientHandler handler;
-  private Channel channel = null;
-  private Bootstrap bootstrap = null;
-  private EventLoopGroup group = null;
-  private final int connectTimeout;
-  private final int sendTimeout = 60; // 1 min
-
-  FileClient(FileClientHandler handler, int connectTimeout) {
-    this.handler = handler;
-    this.connectTimeout = connectTimeout;
-  }
-
-  public void init() {
-    group = new OioEventLoopGroup();
-    bootstrap = new Bootstrap();
-    bootstrap.group(group)
-      .channel(OioSocketChannel.class)
-      .option(ChannelOption.SO_KEEPALIVE, true)
-      .option(ChannelOption.TCP_NODELAY, true)
-      .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, connectTimeout)
-      .handler(new FileClientChannelInitializer(handler));
-  }
-
-  public void connect(String host, int port) {
-    try {
-      // Start the connection attempt.
-      channel = bootstrap.connect(host, port).sync().channel();
-      // ChannelFuture cf = channel.closeFuture();
-      //cf.addListener(new ChannelCloseListener(this));
-    } catch (InterruptedException e) {
-      LOG.warn("FileClient interrupted while trying to connect", e);
-      close();
-    }
-  }
-
-  public void waitForClose() {
-    try {
-      channel.closeFuture().sync();
-    } catch (InterruptedException e) {
-      LOG.warn("FileClient interrupted", e);
-    }
-  }
-
-  public void sendRequest(String file) {
-    //assert(file == null);
-    //assert(channel == null);
-      try {
-          // Should be able to send the message to network link channel.
-          boolean bSent = channel.writeAndFlush(file + "\r\n").await(sendTimeout, TimeUnit.SECONDS);
-          if (!bSent) {
-              throw new RuntimeException("Failed to send");
-          }
-      } catch (InterruptedException e) {
-          LOG.error("Error", e);
-      }
-  }
-
-  public void close() {
-    if (group != null) {
-      group.shutdownGracefully();
-      group = null;
-      bootstrap = null;
-    }
-  }
-}
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java b/core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java
deleted file mode 100644
index 264cf97d0209f..0000000000000
--- a/core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty;
-
-import io.netty.channel.ChannelInitializer;
-import io.netty.channel.socket.SocketChannel;
-import io.netty.handler.codec.string.StringEncoder;
-
-class FileClientChannelInitializer extends ChannelInitializer<SocketChannel> {
-
-  private final FileClientHandler fhandler;
-
-  FileClientChannelInitializer(FileClientHandler handler) {
-    fhandler = handler;
-  }
-
-  @Override
-  public void initChannel(SocketChannel channel) {
-    // file no more than 2G
-    channel.pipeline()
-      .addLast("encoder", new StringEncoder())
-      .addLast("handler", fhandler);
-  }
-}
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java b/core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java
deleted file mode 100644
index 63d3d927255f9..0000000000000
--- a/core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty;
-
-import io.netty.buffer.ByteBuf;
-import io.netty.channel.ChannelHandlerContext;
-import io.netty.channel.SimpleChannelInboundHandler;
-
-import org.apache.spark.storage.BlockId;
-
-abstract class FileClientHandler extends SimpleChannelInboundHandler<ByteBuf> {
-
-  private FileHeader currentHeader = null;
-
-  private volatile boolean handlerCalled = false;
-
-  public boolean isComplete() {
-    return handlerCalled;
-  }
-
-  public abstract void handle(ChannelHandlerContext ctx, ByteBuf in, FileHeader header);
-  public abstract void handleError(BlockId blockId);
-
-  @Override
-  public void channelRead0(ChannelHandlerContext ctx, ByteBuf in) {
-    // get header
-    if (currentHeader == null && in.readableBytes() >= FileHeader.HEADER_SIZE()) {
-      currentHeader = FileHeader.create(in.readBytes(FileHeader.HEADER_SIZE()));
-    }
-    // get file
-    if(in.readableBytes() >= currentHeader.fileLen()) {
-      handle(ctx, in, currentHeader);
-      handlerCalled = true;
-      currentHeader = null;
-      ctx.close();
-    }
-  }
-
-}
-
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileServer.java b/core/src/main/java/org/apache/spark/network/netty/FileServer.java
deleted file mode 100644
index c93425e2787dc..0000000000000
--- a/core/src/main/java/org/apache/spark/network/netty/FileServer.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty;
-
-import java.net.InetSocketAddress;
-
-import io.netty.bootstrap.ServerBootstrap;
-import io.netty.channel.ChannelFuture;
-import io.netty.channel.ChannelOption;
-import io.netty.channel.EventLoopGroup;
-import io.netty.channel.oio.OioEventLoopGroup;
-import io.netty.channel.socket.oio.OioServerSocketChannel;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Server that accept the path of a file an echo back its content.
- */
-class FileServer {
-
-  private static final Logger LOG = LoggerFactory.getLogger(FileServer.class.getName());
-
-  private EventLoopGroup bossGroup = null;
-  private EventLoopGroup workerGroup = null;
-  private ChannelFuture channelFuture = null;
-  private int port = 0;
-
-  FileServer(PathResolver pResolver, int port) {
-    InetSocketAddress addr = new InetSocketAddress(port);
-
-    // Configure the server.
-    bossGroup = new OioEventLoopGroup();
-    workerGroup = new OioEventLoopGroup();
-
-    ServerBootstrap bootstrap = new ServerBootstrap();
-    bootstrap.group(bossGroup, workerGroup)
-        .channel(OioServerSocketChannel.class)
-        .option(ChannelOption.SO_BACKLOG, 100)
-        .option(ChannelOption.SO_RCVBUF, 1500)
-        .childHandler(new FileServerChannelInitializer(pResolver));
-    // Start the server.
-    channelFuture = bootstrap.bind(addr);
-    try {
-      // Get the address we bound to.
-      InetSocketAddress boundAddress =
-        ((InetSocketAddress) channelFuture.sync().channel().localAddress());
-      this.port = boundAddress.getPort();
-    } catch (InterruptedException ie) {
-      this.port = 0;
-    }
-  }
-
-  /**
-   * Start the file server asynchronously in a new thread.
-   */
-  public void start() {
-    Thread blockingThread = new Thread() {
-      @Override
-      public void run() {
-        try {
-          channelFuture.channel().closeFuture().sync();
-          LOG.info("FileServer exiting");
-        } catch (InterruptedException e) {
-          LOG.error("File server start got interrupted", e);
-        }
-        // NOTE: bootstrap is shutdown in stop()
-      }
-    };
-    blockingThread.setDaemon(true);
-    blockingThread.start();
-  }
-
-  public int getPort() {
-    return port;
-  }
-
-  public void stop() {
-    // Close the bound channel.
-    if (channelFuture != null) {
-      channelFuture.channel().close().awaitUninterruptibly();
-      channelFuture = null;
-    }
-
-    // Shutdown event groups
-    if (bossGroup != null) {
-       bossGroup.shutdownGracefully();
-       bossGroup = null;
-    }
-
-    if (workerGroup != null) {
-       workerGroup.shutdownGracefully();
-       workerGroup = null;
-    }
-    // TODO: Shutdown all accepted channels as well ?
-  }
-}
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java b/core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java
deleted file mode 100644
index 46efec8f8d963..0000000000000
--- a/core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty;
-
-import io.netty.channel.ChannelInitializer;
-import io.netty.channel.socket.SocketChannel;
-import io.netty.handler.codec.DelimiterBasedFrameDecoder;
-import io.netty.handler.codec.Delimiters;
-import io.netty.handler.codec.string.StringDecoder;
-
-class FileServerChannelInitializer extends ChannelInitializer<SocketChannel> {
-
-  private final PathResolver pResolver;
-
-  FileServerChannelInitializer(PathResolver pResolver) {
-    this.pResolver = pResolver;
-  }
-
-  @Override
-  public void initChannel(SocketChannel channel) {
-    channel.pipeline()
-      .addLast("framer", new DelimiterBasedFrameDecoder(8192, Delimiters.lineDelimiter()))
-      .addLast("stringDecoder", new StringDecoder())
-      .addLast("handler", new FileServerHandler(pResolver));
-  }
-}
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java b/core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java
deleted file mode 100644
index c0133e19c7f79..0000000000000
--- a/core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty;
-
-import java.io.File;
-import java.io.FileInputStream;
-
-import io.netty.channel.ChannelHandlerContext;
-import io.netty.channel.SimpleChannelInboundHandler;
-import io.netty.channel.DefaultFileRegion;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.spark.storage.BlockId;
-import org.apache.spark.storage.FileSegment;
-
-class FileServerHandler extends SimpleChannelInboundHandler<String> {
-
-  private static final Logger LOG = LoggerFactory.getLogger(FileServerHandler.class.getName());
-
-  private final PathResolver pResolver;
-
-  FileServerHandler(PathResolver pResolver){
-    this.pResolver = pResolver;
-  }
-
-  @Override
-  public void channelRead0(ChannelHandlerContext ctx, String blockIdString) {
-    BlockId blockId = BlockId.apply(blockIdString);
-    FileSegment fileSegment = pResolver.getBlockLocation(blockId);
-    // if getBlockLocation returns null, close the channel
-    if (fileSegment == null) {
-      //ctx.close();
-      return;
-    }
-    File file = fileSegment.file();
-    if (file.exists()) {
-      if (!file.isFile()) {
-        ctx.write(new FileHeader(0, blockId).buffer());
-        ctx.flush();
-        return;
-      }
-      long length = fileSegment.length();
-      if (length > Integer.MAX_VALUE || length <= 0) {
-        ctx.write(new FileHeader(0, blockId).buffer());
-        ctx.flush();
-        return;
-      }
-      int len = (int) length;
-      ctx.write((new FileHeader(len, blockId)).buffer());
-      try {
-        ctx.write(new DefaultFileRegion(new FileInputStream(file)
-          .getChannel(), fileSegment.offset(), fileSegment.length()));
-      } catch (Exception e) {
-          LOG.error("Exception: ", e);
-      }
-    } else {
-      ctx.write(new FileHeader(0, blockId).buffer());
-    }
-    ctx.flush();
-  }
-
-  @Override
-  public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
-    LOG.error("Exception: ", cause);
-    ctx.close();
-  }
-}
diff --git a/core/src/main/java/org/apache/spark/network/netty/PathResolver.java b/core/src/main/java/org/apache/spark/network/netty/PathResolver.java
deleted file mode 100755
index 7ad8d03efbadc..0000000000000
--- a/core/src/main/java/org/apache/spark/network/netty/PathResolver.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty;
-
-import org.apache.spark.storage.BlockId;
-import org.apache.spark.storage.FileSegment;
-
-public interface PathResolver {
-  /** Get the file segment in which the given block resides. */
-  FileSegment getBlockLocation(BlockId blockId);
-}
diff --git a/core/src/main/java/org/apache/spark/util/collection/TimSort.java b/core/src/main/java/org/apache/spark/util/collection/TimSort.java
new file mode 100644
index 0000000000000..409e1a41c5d49
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/util/collection/TimSort.java
@@ -0,0 +1,940 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection;
+
+import java.util.Comparator;
+
+/**
+ * A port of the Android TimSort class, which utilizes a "stable, adaptive, iterative mergesort."
+ * See the method comment on sort() for more details.
+ *
+ * This has been kept in Java with the original style in order to match very closely with the
+ * Android source code, and thus be easy to verify correctness. The class is package private. We put
+ * a simple Scala wrapper {@link org.apache.spark.util.collection.Sorter}, which is available to
+ * package org.apache.spark.
+ *
+ * The purpose of the port is to generalize the interface to the sort to accept input data formats
+ * besides simple arrays where every element is sorted individually. For instance, the AppendOnlyMap
+ * uses this to sort an Array with alternating elements of the form [key, value, key, value].
+ * This generalization comes with minimal overhead -- see SortDataFormat for more information.
+ *
+ * We allow key reuse to prevent creating many key objects -- see SortDataFormat.
+ *
+ * @see org.apache.spark.util.collection.SortDataFormat
+ * @see org.apache.spark.util.collection.Sorter
+ */
+class TimSort<K, Buffer> {
+
+  /**
+   * This is the minimum sized sequence that will be merged.  Shorter
+   * sequences will be lengthened by calling binarySort.  If the entire
+   * array is less than this length, no merges will be performed.
+   *
+   * This constant should be a power of two.  It was 64 in Tim Peter's C
+   * implementation, but 32 was empirically determined to work better in
+   * this implementation.  In the unlikely event that you set this constant
+   * to be a number that's not a power of two, you'll need to change the
+   * minRunLength computation.
+   *
+   * If you decrease this constant, you must change the stackLen
+   * computation in the TimSort constructor, or you risk an
+   * ArrayOutOfBounds exception.  See listsort.txt for a discussion
+   * of the minimum stack length required as a function of the length
+   * of the array being sorted and the minimum merge sequence length.
+   */
+  private static final int MIN_MERGE = 32;
+
+  private final SortDataFormat<K, Buffer> s;
+
+  public TimSort(SortDataFormat<K, Buffer> sortDataFormat) {
+    this.s = sortDataFormat;
+  }
+
+  /**
+   * A stable, adaptive, iterative mergesort that requires far fewer than
+   * n lg(n) comparisons when running on partially sorted arrays, while
+   * offering performance comparable to a traditional mergesort when run
+   * on random arrays.  Like all proper mergesorts, this sort is stable and
+   * runs O(n log n) time (worst case).  In the worst case, this sort requires
+   * temporary storage space for n/2 object references; in the best case,
+   * it requires only a small constant amount of space.
+   *
+   * This implementation was adapted from Tim Peters's list sort for
+   * Python, which is described in detail here:
+   *
+   *   http://svn.python.org/projects/python/trunk/Objects/listsort.txt
+   *
+   * Tim's C code may be found here:
+   *
+   *   http://svn.python.org/projects/python/trunk/Objects/listobject.c
+   *
+   * The underlying techniques are described in this paper (and may have
+   * even earlier origins):
+   *
+   *  "Optimistic Sorting and Information Theoretic Complexity"
+   *  Peter McIlroy
+   *  SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms),
+   *  pp 467-474, Austin, Texas, 25-27 January 1993.
+   *
+   * While the API to this class consists solely of static methods, it is
+   * (privately) instantiable; a TimSort instance holds the state of an ongoing
+   * sort, assuming the input array is large enough to warrant the full-blown
+   * TimSort. Small arrays are sorted in place, using a binary insertion sort.
+   *
+   * @author Josh Bloch
+   */
+  public void sort(Buffer a, int lo, int hi, Comparator<? super K> c) {
+    assert c != null;
+
+    int nRemaining  = hi - lo;
+    if (nRemaining < 2)
+      return;  // Arrays of size 0 and 1 are always sorted
+
+    // If array is small, do a "mini-TimSort" with no merges
+    if (nRemaining < MIN_MERGE) {
+      int initRunLen = countRunAndMakeAscending(a, lo, hi, c);
+      binarySort(a, lo, hi, lo + initRunLen, c);
+      return;
+    }
+
+    /**
+     * March over the array once, left to right, finding natural runs,
+     * extending short natural runs to minRun elements, and merging runs
+     * to maintain stack invariant.
+     */
+    SortState sortState = new SortState(a, c, hi - lo);
+    int minRun = minRunLength(nRemaining);
+    do {
+      // Identify next run
+      int runLen = countRunAndMakeAscending(a, lo, hi, c);
+
+      // If run is short, extend to min(minRun, nRemaining)
+      if (runLen < minRun) {
+        int force = nRemaining <= minRun ? nRemaining : minRun;
+        binarySort(a, lo, lo + force, lo + runLen, c);
+        runLen = force;
+      }
+
+      // Push run onto pending-run stack, and maybe merge
+      sortState.pushRun(lo, runLen);
+      sortState.mergeCollapse();
+
+      // Advance to find next run
+      lo += runLen;
+      nRemaining -= runLen;
+    } while (nRemaining != 0);
+
+    // Merge all remaining runs to complete sort
+    assert lo == hi;
+    sortState.mergeForceCollapse();
+    assert sortState.stackSize == 1;
+  }
+
+  /**
+   * Sorts the specified portion of the specified array using a binary
+   * insertion sort.  This is the best method for sorting small numbers
+   * of elements.  It requires O(n log n) compares, but O(n^2) data
+   * movement (worst case).
+   *
+   * If the initial part of the specified range is already sorted,
+   * this method can take advantage of it: the method assumes that the
+   * elements from index {@code lo}, inclusive, to {@code start},
+   * exclusive are already sorted.
+   *
+   * @param a the array in which a range is to be sorted
+   * @param lo the index of the first element in the range to be sorted
+   * @param hi the index after the last element in the range to be sorted
+   * @param start the index of the first element in the range that is
+   *        not already known to be sorted ({@code lo <= start <= hi})
+   * @param c comparator to used for the sort
+   */
+  @SuppressWarnings("fallthrough")
+  private void binarySort(Buffer a, int lo, int hi, int start, Comparator<? super K> c) {
+    assert lo <= start && start <= hi;
+    if (start == lo)
+      start++;
+
+    K key0 = s.newKey();
+    K key1 = s.newKey();
+
+    Buffer pivotStore = s.allocate(1);
+    for ( ; start < hi; start++) {
+      s.copyElement(a, start, pivotStore, 0);
+      K pivot = s.getKey(pivotStore, 0, key0);
+
+      // Set left (and right) to the index where a[start] (pivot) belongs
+      int left = lo;
+      int right = start;
+      assert left <= right;
+      /*
+       * Invariants:
+       *   pivot >= all in [lo, left).
+       *   pivot <  all in [right, start).
+       */
+      while (left < right) {
+        int mid = (left + right) >>> 1;
+        if (c.compare(pivot, s.getKey(a, mid, key1)) < 0)
+          right = mid;
+        else
+          left = mid + 1;
+      }
+      assert left == right;
+
+      /*
+       * The invariants still hold: pivot >= all in [lo, left) and
+       * pivot < all in [left, start), so pivot belongs at left.  Note
+       * that if there are elements equal to pivot, left points to the
+       * first slot after them -- that's why this sort is stable.
+       * Slide elements over to make room for pivot.
+       */
+      int n = start - left;  // The number of elements to move
+      // Switch is just an optimization for arraycopy in default case
+      switch (n) {
+        case 2:  s.copyElement(a, left + 1, a, left + 2);
+        case 1:  s.copyElement(a, left, a, left + 1);
+          break;
+        default: s.copyRange(a, left, a, left + 1, n);
+      }
+      s.copyElement(pivotStore, 0, a, left);
+    }
+  }
+
+  /**
+   * Returns the length of the run beginning at the specified position in
+   * the specified array and reverses the run if it is descending (ensuring
+   * that the run will always be ascending when the method returns).
+   *
+   * A run is the longest ascending sequence with:
+   *
+   *    a[lo] <= a[lo + 1] <= a[lo + 2] <= ...
+   *
+   * or the longest descending sequence with:
+   *
+   *    a[lo] >  a[lo + 1] >  a[lo + 2] >  ...
+   *
+   * For its intended use in a stable mergesort, the strictness of the
+   * definition of "descending" is needed so that the call can safely
+   * reverse a descending sequence without violating stability.
+   *
+   * @param a the array in which a run is to be counted and possibly reversed
+   * @param lo index of the first element in the run
+   * @param hi index after the last element that may be contained in the run.
+  It is required that {@code lo < hi}.
+   * @param c the comparator to used for the sort
+   * @return  the length of the run beginning at the specified position in
+   *          the specified array
+   */
+  private int countRunAndMakeAscending(Buffer a, int lo, int hi, Comparator<? super K> c) {
+    assert lo < hi;
+    int runHi = lo + 1;
+    if (runHi == hi)
+      return 1;
+
+    K key0 = s.newKey();
+    K key1 = s.newKey();
+
+    // Find end of run, and reverse range if descending
+    if (c.compare(s.getKey(a, runHi++, key0), s.getKey(a, lo, key1)) < 0) { // Descending
+      while (runHi < hi && c.compare(s.getKey(a, runHi, key0), s.getKey(a, runHi - 1, key1)) < 0)
+        runHi++;
+      reverseRange(a, lo, runHi);
+    } else {                              // Ascending
+      while (runHi < hi && c.compare(s.getKey(a, runHi, key0), s.getKey(a, runHi - 1, key1)) >= 0)
+        runHi++;
+    }
+
+    return runHi - lo;
+  }
+
+  /**
+   * Reverse the specified range of the specified array.
+   *
+   * @param a the array in which a range is to be reversed
+   * @param lo the index of the first element in the range to be reversed
+   * @param hi the index after the last element in the range to be reversed
+   */
+  private void reverseRange(Buffer a, int lo, int hi) {
+    hi--;
+    while (lo < hi) {
+      s.swap(a, lo, hi);
+      lo++;
+      hi--;
+    }
+  }
+
+  /**
+   * Returns the minimum acceptable run length for an array of the specified
+   * length. Natural runs shorter than this will be extended with
+   * {@link #binarySort}.
+   *
+   * Roughly speaking, the computation is:
+   *
+   *  If n < MIN_MERGE, return n (it's too small to bother with fancy stuff).
+   *  Else if n is an exact power of 2, return MIN_MERGE/2.
+   *  Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k
+   *   is close to, but strictly less than, an exact power of 2.
+   *
+   * For the rationale, see listsort.txt.
+   *
+   * @param n the length of the array to be sorted
+   * @return the length of the minimum run to be merged
+   */
+  private int minRunLength(int n) {
+    assert n >= 0;
+    int r = 0;      // Becomes 1 if any 1 bits are shifted off
+    while (n >= MIN_MERGE) {
+      r |= (n & 1);
+      n >>= 1;
+    }
+    return n + r;
+  }
+
+  private class SortState {
+
+    /**
+     * The Buffer being sorted.
+     */
+    private final Buffer a;
+
+    /**
+     * Length of the sort Buffer.
+     */
+    private final int aLength;
+
+    /**
+     * The comparator for this sort.
+     */
+    private final Comparator<? super K> c;
+
+    /**
+     * When we get into galloping mode, we stay there until both runs win less
+     * often than MIN_GALLOP consecutive times.
+     */
+    private static final int  MIN_GALLOP = 7;
+
+    /**
+     * This controls when we get *into* galloping mode.  It is initialized
+     * to MIN_GALLOP.  The mergeLo and mergeHi methods nudge it higher for
+     * random data, and lower for highly structured data.
+     */
+    private int minGallop = MIN_GALLOP;
+
+    /**
+     * Maximum initial size of tmp array, which is used for merging.  The array
+     * can grow to accommodate demand.
+     *
+     * Unlike Tim's original C version, we do not allocate this much storage
+     * when sorting smaller arrays.  This change was required for performance.
+     */
+    private static final int INITIAL_TMP_STORAGE_LENGTH = 256;
+
+    /**
+     * Temp storage for merges.
+     */
+    private Buffer tmp; // Actual runtime type will be Object[], regardless of T
+
+    /**
+     * Length of the temp storage.
+     */
+    private int tmpLength = 0;
+
+    /**
+     * A stack of pending runs yet to be merged.  Run i starts at
+     * address base[i] and extends for len[i] elements.  It's always
+     * true (so long as the indices are in bounds) that:
+     *
+     *     runBase[i] + runLen[i] == runBase[i + 1]
+     *
+     * so we could cut the storage for this, but it's a minor amount,
+     * and keeping all the info explicit simplifies the code.
+     */
+    private int stackSize = 0;  // Number of pending runs on stack
+    private final int[] runBase;
+    private final int[] runLen;
+
+    /**
+     * Creates a TimSort instance to maintain the state of an ongoing sort.
+     *
+     * @param a the array to be sorted
+     * @param c the comparator to determine the order of the sort
+     */
+    private SortState(Buffer a, Comparator<? super K> c, int len) {
+      this.aLength = len;
+      this.a = a;
+      this.c = c;
+
+      // Allocate temp storage (which may be increased later if necessary)
+      tmpLength = len < 2 * INITIAL_TMP_STORAGE_LENGTH ? len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
+      tmp = s.allocate(tmpLength);
+
+      /*
+       * Allocate runs-to-be-merged stack (which cannot be expanded).  The
+       * stack length requirements are described in listsort.txt.  The C
+       * version always uses the same stack length (85), but this was
+       * measured to be too expensive when sorting "mid-sized" arrays (e.g.,
+       * 100 elements) in Java.  Therefore, we use smaller (but sufficiently
+       * large) stack lengths for smaller arrays.  The "magic numbers" in the
+       * computation below must be changed if MIN_MERGE is decreased.  See
+       * the MIN_MERGE declaration above for more information.
+       */
+      int stackLen = (len <    120  ?  5 :
+                      len <   1542  ? 10 :
+                      len < 119151  ? 19 : 40);
+      runBase = new int[stackLen];
+      runLen = new int[stackLen];
+    }
+
+    /**
+     * Pushes the specified run onto the pending-run stack.
+     *
+     * @param runBase index of the first element in the run
+     * @param runLen  the number of elements in the run
+     */
+    private void pushRun(int runBase, int runLen) {
+      this.runBase[stackSize] = runBase;
+      this.runLen[stackSize] = runLen;
+      stackSize++;
+    }
+
+    /**
+     * Examines the stack of runs waiting to be merged and merges adjacent runs
+     * until the stack invariants are reestablished:
+     *
+     *     1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1]
+     *     2. runLen[i - 2] > runLen[i - 1]
+     *
+     * This method is called each time a new run is pushed onto the stack,
+     * so the invariants are guaranteed to hold for i < stackSize upon
+     * entry to the method.
+     */
+    private void mergeCollapse() {
+      while (stackSize > 1) {
+        int n = stackSize - 2;
+        if (n > 0 && runLen[n-1] <= runLen[n] + runLen[n+1]) {
+          if (runLen[n - 1] < runLen[n + 1])
+            n--;
+          mergeAt(n);
+        } else if (runLen[n] <= runLen[n + 1]) {
+          mergeAt(n);
+        } else {
+          break; // Invariant is established
+        }
+      }
+    }
+
+    /**
+     * Merges all runs on the stack until only one remains.  This method is
+     * called once, to complete the sort.
+     */
+    private void mergeForceCollapse() {
+      while (stackSize > 1) {
+        int n = stackSize - 2;
+        if (n > 0 && runLen[n - 1] < runLen[n + 1])
+          n--;
+        mergeAt(n);
+      }
+    }
+
+    /**
+     * Merges the two runs at stack indices i and i+1.  Run i must be
+     * the penultimate or antepenultimate run on the stack.  In other words,
+     * i must be equal to stackSize-2 or stackSize-3.
+     *
+     * @param i stack index of the first of the two runs to merge
+     */
+    private void mergeAt(int i) {
+      assert stackSize >= 2;
+      assert i >= 0;
+      assert i == stackSize - 2 || i == stackSize - 3;
+
+      int base1 = runBase[i];
+      int len1 = runLen[i];
+      int base2 = runBase[i + 1];
+      int len2 = runLen[i + 1];
+      assert len1 > 0 && len2 > 0;
+      assert base1 + len1 == base2;
+
+      /*
+       * Record the length of the combined runs; if i is the 3rd-last
+       * run now, also slide over the last run (which isn't involved
+       * in this merge).  The current run (i+1) goes away in any case.
+       */
+      runLen[i] = len1 + len2;
+      if (i == stackSize - 3) {
+        runBase[i + 1] = runBase[i + 2];
+        runLen[i + 1] = runLen[i + 2];
+      }
+      stackSize--;
+
+      K key0 = s.newKey();
+
+      /*
+       * Find where the first element of run2 goes in run1. Prior elements
+       * in run1 can be ignored (because they're already in place).
+       */
+      int k = gallopRight(s.getKey(a, base2, key0), a, base1, len1, 0, c);
+      assert k >= 0;
+      base1 += k;
+      len1 -= k;
+      if (len1 == 0)
+        return;
+
+      /*
+       * Find where the last element of run1 goes in run2. Subsequent elements
+       * in run2 can be ignored (because they're already in place).
+       */
+      len2 = gallopLeft(s.getKey(a, base1 + len1 - 1, key0), a, base2, len2, len2 - 1, c);
+      assert len2 >= 0;
+      if (len2 == 0)
+        return;
+
+      // Merge remaining runs, using tmp array with min(len1, len2) elements
+      if (len1 <= len2)
+        mergeLo(base1, len1, base2, len2);
+      else
+        mergeHi(base1, len1, base2, len2);
+    }
+
+    /**
+     * Locates the position at which to insert the specified key into the
+     * specified sorted range; if the range contains an element equal to key,
+     * returns the index of the leftmost equal element.
+     *
+     * @param key the key whose insertion point to search for
+     * @param a the array in which to search
+     * @param base the index of the first element in the range
+     * @param len the length of the range; must be > 0
+     * @param hint the index at which to begin the search, 0 <= hint < n.
+     *     The closer hint is to the result, the faster this method will run.
+     * @param c the comparator used to order the range, and to search
+     * @return the int k,  0 <= k <= n such that a[b + k - 1] < key <= a[b + k],
+     *    pretending that a[b - 1] is minus infinity and a[b + n] is infinity.
+     *    In other words, key belongs at index b + k; or in other words,
+     *    the first k elements of a should precede key, and the last n - k
+     *    should follow it.
+     */
+    private int gallopLeft(K key, Buffer a, int base, int len, int hint, Comparator<? super K> c) {
+      assert len > 0 && hint >= 0 && hint < len;
+      int lastOfs = 0;
+      int ofs = 1;
+      K key0 = s.newKey();
+
+      if (c.compare(key, s.getKey(a, base + hint, key0)) > 0) {
+        // Gallop right until a[base+hint+lastOfs] < key <= a[base+hint+ofs]
+        int maxOfs = len - hint;
+        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint + ofs, key0)) > 0) {
+          lastOfs = ofs;
+          ofs = (ofs << 1) + 1;
+          if (ofs <= 0)   // int overflow
+            ofs = maxOfs;
+        }
+        if (ofs > maxOfs)
+          ofs = maxOfs;
+
+        // Make offsets relative to base
+        lastOfs += hint;
+        ofs += hint;
+      } else { // key <= a[base + hint]
+        // Gallop left until a[base+hint-ofs] < key <= a[base+hint-lastOfs]
+        final int maxOfs = hint + 1;
+        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint - ofs, key0)) <= 0) {
+          lastOfs = ofs;
+          ofs = (ofs << 1) + 1;
+          if (ofs <= 0)   // int overflow
+            ofs = maxOfs;
+        }
+        if (ofs > maxOfs)
+          ofs = maxOfs;
+
+        // Make offsets relative to base
+        int tmp = lastOfs;
+        lastOfs = hint - ofs;
+        ofs = hint - tmp;
+      }
+      assert -1 <= lastOfs && lastOfs < ofs && ofs <= len;
+
+      /*
+       * Now a[base+lastOfs] < key <= a[base+ofs], so key belongs somewhere
+       * to the right of lastOfs but no farther right than ofs.  Do a binary
+       * search, with invariant a[base + lastOfs - 1] < key <= a[base + ofs].
+       */
+      lastOfs++;
+      while (lastOfs < ofs) {
+        int m = lastOfs + ((ofs - lastOfs) >>> 1);
+
+        if (c.compare(key, s.getKey(a, base + m, key0)) > 0)
+          lastOfs = m + 1;  // a[base + m] < key
+        else
+          ofs = m;          // key <= a[base + m]
+      }
+      assert lastOfs == ofs;    // so a[base + ofs - 1] < key <= a[base + ofs]
+      return ofs;
+    }
+
+    /**
+     * Like gallopLeft, except that if the range contains an element equal to
+     * key, gallopRight returns the index after the rightmost equal element.
+     *
+     * @param key the key whose insertion point to search for
+     * @param a the array in which to search
+     * @param base the index of the first element in the range
+     * @param len the length of the range; must be > 0
+     * @param hint the index at which to begin the search, 0 <= hint < n.
+     *     The closer hint is to the result, the faster this method will run.
+     * @param c the comparator used to order the range, and to search
+     * @return the int k,  0 <= k <= n such that a[b + k - 1] <= key < a[b + k]
+     */
+    private int gallopRight(K key, Buffer a, int base, int len, int hint, Comparator<? super K> c) {
+      assert len > 0 && hint >= 0 && hint < len;
+
+      int ofs = 1;
+      int lastOfs = 0;
+      K key1 = s.newKey();
+
+      if (c.compare(key, s.getKey(a, base + hint, key1)) < 0) {
+        // Gallop left until a[b+hint - ofs] <= key < a[b+hint - lastOfs]
+        int maxOfs = hint + 1;
+        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint - ofs, key1)) < 0) {
+          lastOfs = ofs;
+          ofs = (ofs << 1) + 1;
+          if (ofs <= 0)   // int overflow
+            ofs = maxOfs;
+        }
+        if (ofs > maxOfs)
+          ofs = maxOfs;
+
+        // Make offsets relative to b
+        int tmp = lastOfs;
+        lastOfs = hint - ofs;
+        ofs = hint - tmp;
+      } else { // a[b + hint] <= key
+        // Gallop right until a[b+hint + lastOfs] <= key < a[b+hint + ofs]
+        int maxOfs = len - hint;
+        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint + ofs, key1)) >= 0) {
+          lastOfs = ofs;
+          ofs = (ofs << 1) + 1;
+          if (ofs <= 0)   // int overflow
+            ofs = maxOfs;
+        }
+        if (ofs > maxOfs)
+          ofs = maxOfs;
+
+        // Make offsets relative to b
+        lastOfs += hint;
+        ofs += hint;
+      }
+      assert -1 <= lastOfs && lastOfs < ofs && ofs <= len;
+
+      /*
+       * Now a[b + lastOfs] <= key < a[b + ofs], so key belongs somewhere to
+       * the right of lastOfs but no farther right than ofs.  Do a binary
+       * search, with invariant a[b + lastOfs - 1] <= key < a[b + ofs].
+       */
+      lastOfs++;
+      while (lastOfs < ofs) {
+        int m = lastOfs + ((ofs - lastOfs) >>> 1);
+
+        if (c.compare(key, s.getKey(a, base + m, key1)) < 0)
+          ofs = m;          // key < a[b + m]
+        else
+          lastOfs = m + 1;  // a[b + m] <= key
+      }
+      assert lastOfs == ofs;    // so a[b + ofs - 1] <= key < a[b + ofs]
+      return ofs;
+    }
+
+    /**
+     * Merges two adjacent runs in place, in a stable fashion.  The first
+     * element of the first run must be greater than the first element of the
+     * second run (a[base1] > a[base2]), and the last element of the first run
+     * (a[base1 + len1-1]) must be greater than all elements of the second run.
+     *
+     * For performance, this method should be called only when len1 <= len2;
+     * its twin, mergeHi should be called if len1 >= len2.  (Either method
+     * may be called if len1 == len2.)
+     *
+     * @param base1 index of first element in first run to be merged
+     * @param len1  length of first run to be merged (must be > 0)
+     * @param base2 index of first element in second run to be merged
+     *        (must be aBase + aLen)
+     * @param len2  length of second run to be merged (must be > 0)
+     */
+    private void mergeLo(int base1, int len1, int base2, int len2) {
+      assert len1 > 0 && len2 > 0 && base1 + len1 == base2;
+
+      // Copy first run into temp array
+      Buffer a = this.a; // For performance
+      Buffer tmp = ensureCapacity(len1);
+      s.copyRange(a, base1, tmp, 0, len1);
+
+      int cursor1 = 0;       // Indexes into tmp array
+      int cursor2 = base2;   // Indexes int a
+      int dest = base1;      // Indexes int a
+
+      // Move first element of second run and deal with degenerate cases
+      s.copyElement(a, cursor2++, a, dest++);
+      if (--len2 == 0) {
+        s.copyRange(tmp, cursor1, a, dest, len1);
+        return;
+      }
+      if (len1 == 1) {
+        s.copyRange(a, cursor2, a, dest, len2);
+        s.copyElement(tmp, cursor1, a, dest + len2); // Last elt of run 1 to end of merge
+        return;
+      }
+
+      K key0 = s.newKey();
+      K key1 = s.newKey();
+
+      Comparator<? super K> c = this.c;  // Use local variable for performance
+      int minGallop = this.minGallop;    //  "    "       "     "      "
+      outer:
+      while (true) {
+        int count1 = 0; // Number of times in a row that first run won
+        int count2 = 0; // Number of times in a row that second run won
+
+        /*
+         * Do the straightforward thing until (if ever) one run starts
+         * winning consistently.
+         */
+        do {
+          assert len1 > 1 && len2 > 0;
+          if (c.compare(s.getKey(a, cursor2, key0), s.getKey(tmp, cursor1, key1)) < 0) {
+            s.copyElement(a, cursor2++, a, dest++);
+            count2++;
+            count1 = 0;
+            if (--len2 == 0)
+              break outer;
+          } else {
+            s.copyElement(tmp, cursor1++, a, dest++);
+            count1++;
+            count2 = 0;
+            if (--len1 == 1)
+              break outer;
+          }
+        } while ((count1 | count2) < minGallop);
+
+        /*
+         * One run is winning so consistently that galloping may be a
+         * huge win. So try that, and continue galloping until (if ever)
+         * neither run appears to be winning consistently anymore.
+         */
+        do {
+          assert len1 > 1 && len2 > 0;
+          count1 = gallopRight(s.getKey(a, cursor2, key0), tmp, cursor1, len1, 0, c);
+          if (count1 != 0) {
+            s.copyRange(tmp, cursor1, a, dest, count1);
+            dest += count1;
+            cursor1 += count1;
+            len1 -= count1;
+            if (len1 <= 1) // len1 == 1 || len1 == 0
+              break outer;
+          }
+          s.copyElement(a, cursor2++, a, dest++);
+          if (--len2 == 0)
+            break outer;
+
+          count2 = gallopLeft(s.getKey(tmp, cursor1, key0), a, cursor2, len2, 0, c);
+          if (count2 != 0) {
+            s.copyRange(a, cursor2, a, dest, count2);
+            dest += count2;
+            cursor2 += count2;
+            len2 -= count2;
+            if (len2 == 0)
+              break outer;
+          }
+          s.copyElement(tmp, cursor1++, a, dest++);
+          if (--len1 == 1)
+            break outer;
+          minGallop--;
+        } while (count1 >= MIN_GALLOP | count2 >= MIN_GALLOP);
+        if (minGallop < 0)
+          minGallop = 0;
+        minGallop += 2;  // Penalize for leaving gallop mode
+      }  // End of "outer" loop
+      this.minGallop = minGallop < 1 ? 1 : minGallop;  // Write back to field
+
+      if (len1 == 1) {
+        assert len2 > 0;
+        s.copyRange(a, cursor2, a, dest, len2);
+        s.copyElement(tmp, cursor1, a, dest + len2); //  Last elt of run 1 to end of merge
+      } else if (len1 == 0) {
+        throw new IllegalArgumentException(
+            "Comparison method violates its general contract!");
+      } else {
+        assert len2 == 0;
+        assert len1 > 1;
+        s.copyRange(tmp, cursor1, a, dest, len1);
+      }
+    }
+
+    /**
+     * Like mergeLo, except that this method should be called only if
+     * len1 >= len2; mergeLo should be called if len1 <= len2.  (Either method
+     * may be called if len1 == len2.)
+     *
+     * @param base1 index of first element in first run to be merged
+     * @param len1  length of first run to be merged (must be > 0)
+     * @param base2 index of first element in second run to be merged
+     *        (must be aBase + aLen)
+     * @param len2  length of second run to be merged (must be > 0)
+     */
+    private void mergeHi(int base1, int len1, int base2, int len2) {
+      assert len1 > 0 && len2 > 0 && base1 + len1 == base2;
+
+      // Copy second run into temp array
+      Buffer a = this.a; // For performance
+      Buffer tmp = ensureCapacity(len2);
+      s.copyRange(a, base2, tmp, 0, len2);
+
+      int cursor1 = base1 + len1 - 1;  // Indexes into a
+      int cursor2 = len2 - 1;          // Indexes into tmp array
+      int dest = base2 + len2 - 1;     // Indexes into a
+
+      K key0 = s.newKey();
+      K key1 = s.newKey();
+
+      // Move last element of first run and deal with degenerate cases
+      s.copyElement(a, cursor1--, a, dest--);
+      if (--len1 == 0) {
+        s.copyRange(tmp, 0, a, dest - (len2 - 1), len2);
+        return;
+      }
+      if (len2 == 1) {
+        dest -= len1;
+        cursor1 -= len1;
+        s.copyRange(a, cursor1 + 1, a, dest + 1, len1);
+        s.copyElement(tmp, cursor2, a, dest);
+        return;
+      }
+
+      Comparator<? super K> c = this.c;  // Use local variable for performance
+      int minGallop = this.minGallop;    //  "    "       "     "      "
+      outer:
+      while (true) {
+        int count1 = 0; // Number of times in a row that first run won
+        int count2 = 0; // Number of times in a row that second run won
+
+        /*
+         * Do the straightforward thing until (if ever) one run
+         * appears to win consistently.
+         */
+        do {
+          assert len1 > 0 && len2 > 1;
+          if (c.compare(s.getKey(tmp, cursor2, key0), s.getKey(a, cursor1, key1)) < 0) {
+            s.copyElement(a, cursor1--, a, dest--);
+            count1++;
+            count2 = 0;
+            if (--len1 == 0)
+              break outer;
+          } else {
+            s.copyElement(tmp, cursor2--, a, dest--);
+            count2++;
+            count1 = 0;
+            if (--len2 == 1)
+              break outer;
+          }
+        } while ((count1 | count2) < minGallop);
+
+        /*
+         * One run is winning so consistently that galloping may be a
+         * huge win. So try that, and continue galloping until (if ever)
+         * neither run appears to be winning consistently anymore.
+         */
+        do {
+          assert len1 > 0 && len2 > 1;
+          count1 = len1 - gallopRight(s.getKey(tmp, cursor2, key0), a, base1, len1, len1 - 1, c);
+          if (count1 != 0) {
+            dest -= count1;
+            cursor1 -= count1;
+            len1 -= count1;
+            s.copyRange(a, cursor1 + 1, a, dest + 1, count1);
+            if (len1 == 0)
+              break outer;
+          }
+          s.copyElement(tmp, cursor2--, a, dest--);
+          if (--len2 == 1)
+            break outer;
+
+          count2 = len2 - gallopLeft(s.getKey(a, cursor1, key0), tmp, 0, len2, len2 - 1, c);
+          if (count2 != 0) {
+            dest -= count2;
+            cursor2 -= count2;
+            len2 -= count2;
+            s.copyRange(tmp, cursor2 + 1, a, dest + 1, count2);
+            if (len2 <= 1)  // len2 == 1 || len2 == 0
+              break outer;
+          }
+          s.copyElement(a, cursor1--, a, dest--);
+          if (--len1 == 0)
+            break outer;
+          minGallop--;
+        } while (count1 >= MIN_GALLOP | count2 >= MIN_GALLOP);
+        if (minGallop < 0)
+          minGallop = 0;
+        minGallop += 2;  // Penalize for leaving gallop mode
+      }  // End of "outer" loop
+      this.minGallop = minGallop < 1 ? 1 : minGallop;  // Write back to field
+
+      if (len2 == 1) {
+        assert len1 > 0;
+        dest -= len1;
+        cursor1 -= len1;
+        s.copyRange(a, cursor1 + 1, a, dest + 1, len1);
+        s.copyElement(tmp, cursor2, a, dest); // Move first elt of run2 to front of merge
+      } else if (len2 == 0) {
+        throw new IllegalArgumentException(
+            "Comparison method violates its general contract!");
+      } else {
+        assert len1 == 0;
+        assert len2 > 0;
+        s.copyRange(tmp, 0, a, dest - (len2 - 1), len2);
+      }
+    }
+
+    /**
+     * Ensures that the external array tmp has at least the specified
+     * number of elements, increasing its size if necessary.  The size
+     * increases exponentially to ensure amortized linear time complexity.
+     *
+     * @param minCapacity the minimum required capacity of the tmp array
+     * @return tmp, whether or not it grew
+     */
+    private Buffer ensureCapacity(int minCapacity) {
+      if (tmpLength < minCapacity) {
+        // Compute smallest power of 2 > minCapacity
+        int newSize = minCapacity;
+        newSize |= newSize >> 1;
+        newSize |= newSize >> 2;
+        newSize |= newSize >> 4;
+        newSize |= newSize >> 8;
+        newSize |= newSize >> 16;
+        newSize++;
+
+        if (newSize < 0) // Not bloody likely!
+          newSize = minCapacity;
+        else
+          newSize = Math.min(newSize, aLength >>> 1);
+
+        tmp = s.allocate(newSize);
+        tmpLength = newSize;
+      }
+      return tmp;
+    }
+  }
+}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/additional-metrics.js b/core/src/main/resources/org/apache/spark/ui/static/additional-metrics.js
new file mode 100644
index 0000000000000..badd85ed48c82
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/additional-metrics.js
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Register functions to show/hide columns based on checkboxes. These need
+ * to be registered after the page loads. */
+$(function() {
+    $("span.expand-additional-metrics").click(function(){
+        // Expand the list of additional metrics.
+        var additionalMetricsDiv = $(this).parent().find('.additional-metrics');
+        $(additionalMetricsDiv).toggleClass('collapsed');
+
+        // Switch the class of the arrow from open to closed.
+        $(this).find('.expand-additional-metrics-arrow').toggleClass('arrow-open');
+        $(this).find('.expand-additional-metrics-arrow').toggleClass('arrow-closed');
+
+        // If clicking caused the metrics to expand, automatically check all options for additional
+        // metrics (don't trigger a click when collapsing metrics, because it leads to weird
+        // toggling behavior).
+        if (!$(additionalMetricsDiv).hasClass('collapsed')) {
+            $(this).parent().find('input:checkbox:not(:checked)').trigger('click');
+        }
+    });
+
+    $("input:checkbox:not(:checked)").each(function() {
+        var column = "table ." + $(this).attr("name");
+        $(column).hide();
+    });
+    // Stripe table rows after rows have been hidden to ensure correct striping.
+    stripeTables();
+
+    $("input:checkbox").click(function() {
+        var column = "table ." + $(this).attr("name");
+        $(column).toggle();
+        stripeTables();
+    });
+
+    // Trigger a click on the checkbox if a user clicks the label next to it.
+    $("span.additional-metric-title").click(function() {
+        $(this).parent().find('input:checkbox').trigger('click');
+    });
+});
diff --git a/core/src/main/resources/org/apache/spark/ui/static/bootstrap-tooltip.js b/core/src/main/resources/org/apache/spark/ui/static/bootstrap-tooltip.js
new file mode 100644
index 0000000000000..2934181c1006a
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/bootstrap-tooltip.js
@@ -0,0 +1,288 @@
+/* ===========================================================
+ * bootstrap-tooltip.js v2.2.2
+ * http://twitter.github.com/bootstrap/javascript.html#tooltips
+ * Inspired by the original jQuery.tipsy by Jason Frame
+ * ===========================================================
+ * Copyright 2012 Twitter, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ========================================================== */
+
+
+!function ($) {
+
+  "use strict"; // jshint ;_;
+
+
+ /* TOOLTIP PUBLIC CLASS DEFINITION
+  * =============================== */
+
+  var Tooltip = function (element, options) {
+    this.init('tooltip', element, options)
+  }
+
+  Tooltip.prototype = {
+
+    constructor: Tooltip
+
+  , init: function (type, element, options) {
+      var eventIn
+        , eventOut
+
+      this.type = type
+      this.$element = $(element)
+      this.options = this.getOptions(options)
+      this.enabled = true
+
+      if (this.options.trigger == 'click') {
+        this.$element.on('click.' + this.type, this.options.selector, $.proxy(this.toggle, this))
+      } else if (this.options.trigger != 'manual') {
+        eventIn = this.options.trigger == 'hover' ? 'mouseenter' : 'focus'
+        eventOut = this.options.trigger == 'hover' ? 'mouseleave' : 'blur'
+        this.$element.on(eventIn + '.' + this.type, this.options.selector, $.proxy(this.enter, this))
+        this.$element.on(eventOut + '.' + this.type, this.options.selector, $.proxy(this.leave, this))
+      }
+
+      this.options.selector ?
+        (this._options = $.extend({}, this.options, { trigger: 'manual', selector: '' })) :
+        this.fixTitle()
+    }
+
+  , getOptions: function (options) {
+      options = $.extend({}, $.fn[this.type].defaults, options, this.$element.data())
+
+      if (options.delay && typeof options.delay == 'number') {
+        options.delay = {
+          show: options.delay
+        , hide: options.delay
+        }
+      }
+
+      return options
+    }
+
+  , enter: function (e) {
+      var self = $(e.currentTarget)[this.type](this._options).data(this.type)
+
+      if (!self.options.delay || !self.options.delay.show) return self.show()
+
+      clearTimeout(this.timeout)
+      self.hoverState = 'in'
+      this.timeout = setTimeout(function() {
+        if (self.hoverState == 'in') self.show()
+      }, self.options.delay.show)
+    }
+
+  , leave: function (e) {
+      var self = $(e.currentTarget)[this.type](this._options).data(this.type)
+
+      if (this.timeout) clearTimeout(this.timeout)
+      if (!self.options.delay || !self.options.delay.hide) return self.hide()
+
+      self.hoverState = 'out'
+      this.timeout = setTimeout(function() {
+        if (self.hoverState == 'out') self.hide()
+      }, self.options.delay.hide)
+    }
+
+  , show: function () {
+      var $tip
+        , inside
+        , pos
+        , actualWidth
+        , actualHeight
+        , placement
+        , tp
+
+      if (this.hasContent() && this.enabled) {
+        $tip = this.tip()
+        this.setContent()
+
+        if (this.options.animation) {
+          $tip.addClass('fade')
+        }
+
+        placement = typeof this.options.placement == 'function' ?
+          this.options.placement.call(this, $tip[0], this.$element[0]) :
+          this.options.placement
+
+        inside = /in/.test(placement)
+
+        $tip
+          .detach()
+          .css({ top: 0, left: 0, display: 'block' })
+          .insertAfter(this.$element)
+
+        pos = this.getPosition(inside)
+
+        actualWidth = $tip[0].offsetWidth
+        actualHeight = $tip[0].offsetHeight
+
+        switch (inside ? placement.split(' ')[1] : placement) {
+          case 'bottom':
+            tp = {top: pos.top + pos.height, left: pos.left + pos.width / 2 - actualWidth / 2}
+            break
+          case 'top':
+            tp = {top: pos.top - actualHeight, left: pos.left + pos.width / 2 - actualWidth / 2}
+            break
+          case 'left':
+            tp = {top: pos.top + pos.height / 2 - actualHeight / 2, left: pos.left - actualWidth}
+            break
+          case 'right':
+            tp = {top: pos.top + pos.height / 2 - actualHeight / 2, left: pos.left + pos.width}
+            break
+        }
+
+        $tip
+          .offset(tp)
+          .addClass(placement)
+          .addClass('in')
+      }
+    }
+
+  , setContent: function () {
+      var $tip = this.tip()
+        , title = this.getTitle()
+
+      $tip.find('.tooltip-inner')[this.options.html ? 'html' : 'text'](title)
+      $tip.removeClass('fade in top bottom left right')
+    }
+
+  , hide: function () {
+      var that = this
+        , $tip = this.tip()
+
+      $tip.removeClass('in')
+
+      function removeWithAnimation() {
+        var timeout = setTimeout(function () {
+          $tip.off($.support.transition.end).detach()
+        }, 500)
+
+        $tip.one($.support.transition.end, function () {
+          clearTimeout(timeout)
+          $tip.detach()
+        })
+      }
+
+      $.support.transition && this.$tip.hasClass('fade') ?
+        removeWithAnimation() :
+        $tip.detach()
+
+      return this
+    }
+
+  , fixTitle: function () {
+      var $e = this.$element
+      if ($e.attr('title') || typeof($e.attr('data-original-title')) != 'string') {
+        $e.attr('data-original-title', $e.attr('title') || '').attr('title', '')
+      }
+    }
+
+  , hasContent: function () {
+      return this.getTitle()
+    }
+
+  , getPosition: function (inside) {
+      return $.extend({}, (inside ? {top: 0, left: 0} : this.$element.offset()), {
+        width: this.$element[0].offsetWidth
+      , height: this.$element[0].offsetHeight
+      })
+    }
+
+  , getTitle: function () {
+      var title
+        , $e = this.$element
+        , o = this.options
+
+      title = $e.attr('data-original-title')
+        || (typeof o.title == 'function' ? o.title.call($e[0]) :  o.title)
+
+      return title
+    }
+
+  , tip: function () {
+      return this.$tip = this.$tip || $(this.options.template)
+    }
+
+  , validate: function () {
+      if (!this.$element[0].parentNode) {
+        this.hide()
+        this.$element = null
+        this.options = null
+      }
+    }
+
+  , enable: function () {
+      this.enabled = true
+    }
+
+  , disable: function () {
+      this.enabled = false
+    }
+
+  , toggleEnabled: function () {
+      this.enabled = !this.enabled
+    }
+
+  , toggle: function (e) {
+      var self = $(e.currentTarget)[this.type](this._options).data(this.type)
+      self[self.tip().hasClass('in') ? 'hide' : 'show']()
+    }
+
+  , destroy: function () {
+      this.hide().$element.off('.' + this.type).removeData(this.type)
+    }
+
+  }
+
+
+ /* TOOLTIP PLUGIN DEFINITION
+  * ========================= */
+
+  var old = $.fn.tooltip
+
+  $.fn.tooltip = function ( option ) {
+    return this.each(function () {
+      var $this = $(this)
+        , data = $this.data('tooltip')
+        , options = typeof option == 'object' && option
+      if (!data) $this.data('tooltip', (data = new Tooltip(this, options)))
+      if (typeof option == 'string') data[option]()
+    })
+  }
+
+  $.fn.tooltip.Constructor = Tooltip
+
+  $.fn.tooltip.defaults = {
+    animation: true
+  , placement: 'top'
+  , selector: false
+  , template: '<div class="tooltip"><div class="tooltip-arrow"></div><div class="tooltip-inner"></div></div>'
+  , trigger: 'hover'
+  , title: ''
+  , delay: 0
+  , html: false
+  }
+
+
+ /* TOOLTIP NO CONFLICT
+  * =================== */
+
+  $.fn.tooltip.noConflict = function () {
+    $.fn.tooltip = old
+    return this
+  }
+
+}(window.jQuery);
+
diff --git a/core/src/main/resources/org/apache/spark/ui/static/initialize-tooltips.js b/core/src/main/resources/org/apache/spark/ui/static/initialize-tooltips.js
new file mode 100644
index 0000000000000..70f355dfb49cb
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/initialize-tooltips.js
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+$(document).ready(function(){
+   $("[data-toggle=tooltip]").tooltip({container: 'body'});
+});
+
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery-1.11.1.min.js b/core/src/main/resources/org/apache/spark/ui/static/jquery-1.11.1.min.js
new file mode 100644
index 0000000000000..ab28a24729b32
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/jquery-1.11.1.min.js
@@ -0,0 +1,4 @@
+/*! jQuery v1.11.1 | (c) 2005, 2014 jQuery Foundation, Inc. | jquery.org/license */
+!function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){var c=[],d=c.slice,e=c.concat,f=c.push,g=c.indexOf,h={},i=h.toString,j=h.hasOwnProperty,k={},l="1.11.1",m=function(a,b){return new m.fn.init(a,b)},n=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,o=/^-ms-/,p=/-([\da-z])/gi,q=function(a,b){return b.toUpperCase()};m.fn=m.prototype={jquery:l,constructor:m,selector:"",length:0,toArray:function(){return d.call(this)},get:function(a){return null!=a?0>a?this[a+this.length]:this[a]:d.call(this)},pushStack:function(a){var b=m.merge(this.constructor(),a);return b.prevObject=this,b.context=this.context,b},each:function(a,b){return m.each(this,a,b)},map:function(a){return this.pushStack(m.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(d.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(0>a?b:0);return this.pushStack(c>=0&&b>c?[this[c]]:[])},end:function(){return this.prevObject||this.constructor(null)},push:f,sort:c.sort,splice:c.splice},m.extend=m.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||m.isFunction(g)||(g={}),h===i&&(g=this,h--);i>h;h++)if(null!=(e=arguments[h]))for(d in e)a=g[d],c=e[d],g!==c&&(j&&c&&(m.isPlainObject(c)||(b=m.isArray(c)))?(b?(b=!1,f=a&&m.isArray(a)?a:[]):f=a&&m.isPlainObject(a)?a:{},g[d]=m.extend(j,f,c)):void 0!==c&&(g[d]=c));return g},m.extend({expando:"jQuery"+(l+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===m.type(a)},isArray:Array.isArray||function(a){return"array"===m.type(a)},isWindow:function(a){return null!=a&&a==a.window},isNumeric:function(a){return!m.isArray(a)&&a-parseFloat(a)>=0},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},isPlainObject:function(a){var b;if(!a||"object"!==m.type(a)||a.nodeType||m.isWindow(a))return!1;try{if(a.constructor&&!j.call(a,"constructor")&&!j.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}if(k.ownLast)for(b in a)return j.call(a,b);for(b in a);return void 0===b||j.call(a,b)},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?h[i.call(a)]||"object":typeof a},globalEval:function(b){b&&m.trim(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(o,"ms-").replace(p,q)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()},each:function(a,b,c){var d,e=0,f=a.length,g=r(a);if(c){if(g){for(;f>e;e++)if(d=b.apply(a[e],c),d===!1)break}else for(e in a)if(d=b.apply(a[e],c),d===!1)break}else if(g){for(;f>e;e++)if(d=b.call(a[e],e,a[e]),d===!1)break}else for(e in a)if(d=b.call(a[e],e,a[e]),d===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(n,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(r(Object(a))?m.merge(c,"string"==typeof a?[a]:a):f.call(c,a)),c},inArray:function(a,b,c){var d;if(b){if(g)return g.call(b,a,c);for(d=b.length,c=c?0>c?Math.max(0,d+c):c:0;d>c;c++)if(c in b&&b[c]===a)return c}return-1},merge:function(a,b){var c=+b.length,d=0,e=a.length;while(c>d)a[e++]=b[d++];if(c!==c)while(void 0!==b[d])a[e++]=b[d++];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;g>f;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,f=0,g=a.length,h=r(a),i=[];if(h)for(;g>f;f++)d=b(a[f],f,c),null!=d&&i.push(d);else for(f in a)d=b(a[f],f,c),null!=d&&i.push(d);return e.apply([],i)},guid:1,proxy:function(a,b){var c,e,f;return"string"==typeof b&&(f=a[b],b=a,a=f),m.isFunction(a)?(c=d.call(arguments,2),e=function(){return a.apply(b||this,c.concat(d.call(arguments)))},e.guid=a.guid=a.guid||m.guid++,e):void 0},now:function(){return+new Date},support:k}),m.each("Boolean Number String Function Array Date RegExp Object Error".split(" "),function(a,b){h["[object "+b+"]"]=b.toLowerCase()});function r(a){var b=a.length,c=m.type(a);return"function"===c||m.isWindow(a)?!1:1===a.nodeType&&b?!0:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}var s=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+-new Date,v=a.document,w=0,x=0,y=gb(),z=gb(),A=gb(),B=function(a,b){return a===b&&(l=!0),0},C="undefined",D=1<<31,E={}.hasOwnProperty,F=[],G=F.pop,H=F.push,I=F.push,J=F.slice,K=F.indexOf||function(a){for(var b=0,c=this.length;c>b;b++)if(this[b]===a)return b;return-1},L="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",M="[\\x20\\t\\r\\n\\f]",N="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",O=N.replace("w","w#"),P="\\["+M+"*("+N+")(?:"+M+"*([*^$|!~]?=)"+M+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+O+"))|)"+M+"*\\]",Q=":("+N+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+P+")*)|.*)\\)|)",R=new RegExp("^"+M+"+|((?:^|[^\\\\])(?:\\\\.)*)"+M+"+$","g"),S=new RegExp("^"+M+"*,"+M+"*"),T=new RegExp("^"+M+"*([>+~]|"+M+")"+M+"*"),U=new RegExp("="+M+"*([^\\]'\"]*?)"+M+"*\\]","g"),V=new RegExp(Q),W=new RegExp("^"+O+"$"),X={ID:new RegExp("^#("+N+")"),CLASS:new RegExp("^\\.("+N+")"),TAG:new RegExp("^("+N.replace("w","w*")+")"),ATTR:new RegExp("^"+P),PSEUDO:new RegExp("^"+Q),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+L+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/^(?:input|select|textarea|button)$/i,Z=/^h\d$/i,$=/^[^{]+\{\s*\[native \w/,_=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ab=/[+~]/,bb=/'|\\/g,cb=new RegExp("\\\\([\\da-f]{1,6}"+M+"?|("+M+")|.)","ig"),db=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:0>d?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)};try{I.apply(F=J.call(v.childNodes),v.childNodes),F[v.childNodes.length].nodeType}catch(eb){I={apply:F.length?function(a,b){H.apply(a,J.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function fb(a,b,d,e){var f,h,j,k,l,o,r,s,w,x;if((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,d=d||[],!a||"string"!=typeof a)return d;if(1!==(k=b.nodeType)&&9!==k)return[];if(p&&!e){if(f=_.exec(a))if(j=f[1]){if(9===k){if(h=b.getElementById(j),!h||!h.parentNode)return d;if(h.id===j)return d.push(h),d}else if(b.ownerDocument&&(h=b.ownerDocument.getElementById(j))&&t(b,h)&&h.id===j)return d.push(h),d}else{if(f[2])return I.apply(d,b.getElementsByTagName(a)),d;if((j=f[3])&&c.getElementsByClassName&&b.getElementsByClassName)return I.apply(d,b.getElementsByClassName(j)),d}if(c.qsa&&(!q||!q.test(a))){if(s=r=u,w=b,x=9===k&&a,1===k&&"object"!==b.nodeName.toLowerCase()){o=g(a),(r=b.getAttribute("id"))?s=r.replace(bb,"\\$&"):b.setAttribute("id",s),s="[id='"+s+"'] ",l=o.length;while(l--)o[l]=s+qb(o[l]);w=ab.test(a)&&ob(b.parentNode)||b,x=o.join(",")}if(x)try{return I.apply(d,w.querySelectorAll(x)),d}catch(y){}finally{r||b.removeAttribute("id")}}}return i(a.replace(R,"$1"),b,d,e)}function gb(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function hb(a){return a[u]=!0,a}function ib(a){var b=n.createElement("div");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function jb(a,b){var c=a.split("|"),e=a.length;while(e--)d.attrHandle[c[e]]=b}function kb(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&(~b.sourceIndex||D)-(~a.sourceIndex||D);if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function lb(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function mb(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function nb(a){return hb(function(b){return b=+b,hb(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function ob(a){return a&&typeof a.getElementsByTagName!==C&&a}c=fb.support={},f=fb.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?"HTML"!==b.nodeName:!1},m=fb.setDocument=function(a){var b,e=a?a.ownerDocument||a:v,g=e.defaultView;return e!==n&&9===e.nodeType&&e.documentElement?(n=e,o=e.documentElement,p=!f(e),g&&g!==g.top&&(g.addEventListener?g.addEventListener("unload",function(){m()},!1):g.attachEvent&&g.attachEvent("onunload",function(){m()})),c.attributes=ib(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ib(function(a){return a.appendChild(e.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=$.test(e.getElementsByClassName)&&ib(function(a){return a.innerHTML="<div class='a'></div><div class='a i'></div>",a.firstChild.className="i",2===a.getElementsByClassName("i").length}),c.getById=ib(function(a){return o.appendChild(a).id=u,!e.getElementsByName||!e.getElementsByName(u).length}),c.getById?(d.find.ID=function(a,b){if(typeof b.getElementById!==C&&p){var c=b.getElementById(a);return c&&c.parentNode?[c]:[]}},d.filter.ID=function(a){var b=a.replace(cb,db);return function(a){return a.getAttribute("id")===b}}):(delete d.find.ID,d.filter.ID=function(a){var b=a.replace(cb,db);return function(a){var c=typeof a.getAttributeNode!==C&&a.getAttributeNode("id");return c&&c.value===b}}),d.find.TAG=c.getElementsByTagName?function(a,b){return typeof b.getElementsByTagName!==C?b.getElementsByTagName(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){return typeof b.getElementsByClassName!==C&&p?b.getElementsByClassName(a):void 0},r=[],q=[],(c.qsa=$.test(e.querySelectorAll))&&(ib(function(a){a.innerHTML="<select msallowclip=''><option selected=''></option></select>",a.querySelectorAll("[msallowclip^='']").length&&q.push("[*^$]="+M+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+M+"*(?:value|"+L+")"),a.querySelectorAll(":checked").length||q.push(":checked")}),ib(function(a){var b=e.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+M+"*[*^$|!~]?="),a.querySelectorAll(":enabled").length||q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=$.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ib(function(a){c.disconnectedMatch=s.call(a,"div"),s.call(a,"[s!='']:x"),r.push("!=",Q)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=$.test(o.compareDocumentPosition),t=b||$.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===e||a.ownerDocument===v&&t(v,a)?-1:b===e||b.ownerDocument===v&&t(v,b)?1:k?K.call(k,a)-K.call(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,f=a.parentNode,g=b.parentNode,h=[a],i=[b];if(!f||!g)return a===e?-1:b===e?1:f?-1:g?1:k?K.call(k,a)-K.call(k,b):0;if(f===g)return kb(a,b);c=a;while(c=c.parentNode)h.unshift(c);c=b;while(c=c.parentNode)i.unshift(c);while(h[d]===i[d])d++;return d?kb(h[d],i[d]):h[d]===v?-1:i[d]===v?1:0},e):n},fb.matches=function(a,b){return fb(a,null,null,b)},fb.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(U,"='$1']"),!(!c.matchesSelector||!p||r&&r.test(b)||q&&q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return fb(b,n,null,[a]).length>0},fb.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},fb.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&E.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},fb.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},fb.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=fb.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=fb.selectors={cacheLength:50,createPseudo:hb,match:X,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(cb,db),a[3]=(a[3]||a[4]||a[5]||"").replace(cb,db),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||fb.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&fb.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return X.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&V.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(cb,db).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+M+")"+a+"("+M+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||typeof a.getAttribute!==C&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=fb.attr(d,a);return null==e?"!="===b:b?(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e+" ").indexOf(c)>-1:"|="===b?e===c||e.slice(0,c.length+1)===c+"-":!1):!0}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h;if(q){if(f){while(p){l=b;while(l=l[p])if(h?l.nodeName.toLowerCase()===r:1===l.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){k=q[u]||(q[u]={}),j=k[a]||[],n=j[0]===w&&j[1],m=j[0]===w&&j[2],l=n&&q.childNodes[n];while(l=++n&&l&&l[p]||(m=n=0)||o.pop())if(1===l.nodeType&&++m&&l===b){k[a]=[w,n,m];break}}else if(s&&(j=(b[u]||(b[u]={}))[a])&&j[0]===w)m=j[1];else while(l=++n&&l&&l[p]||(m=n=0)||o.pop())if((h?l.nodeName.toLowerCase()===r:1===l.nodeType)&&++m&&(s&&((l[u]||(l[u]={}))[a]=[w,m]),l===b))break;return m-=e,m===d||m%d===0&&m/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||fb.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?hb(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=K.call(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:hb(function(a){var b=[],c=[],d=h(a.replace(R,"$1"));return d[u]?hb(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),!c.pop()}}),has:hb(function(a){return function(b){return fb(a,b).length>0}}),contains:hb(function(a){return function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:hb(function(a){return W.test(a||"")||fb.error("unsupported lang: "+a),a=a.replace(cb,db).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return Z.test(a.nodeName)},input:function(a){return Y.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:nb(function(){return[0]}),last:nb(function(a,b){return[b-1]}),eq:nb(function(a,b,c){return[0>c?c+b:c]}),even:nb(function(a,b){for(var c=0;b>c;c+=2)a.push(c);return a}),odd:nb(function(a,b){for(var c=1;b>c;c+=2)a.push(c);return a}),lt:nb(function(a,b,c){for(var d=0>c?c+b:c;--d>=0;)a.push(d);return a}),gt:nb(function(a,b,c){for(var d=0>c?c+b:c;++d<b;)a.push(d);return a})}},d.pseudos.nth=d.pseudos.eq;for(b in{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})d.pseudos[b]=lb(b);for(b in{submit:!0,reset:!0})d.pseudos[b]=mb(b);function pb(){}pb.prototype=d.filters=d.pseudos,d.setFilters=new pb,g=fb.tokenize=function(a,b){var c,e,f,g,h,i,j,k=z[a+" "];if(k)return b?0:k.slice(0);h=a,i=[],j=d.preFilter;while(h){(!c||(e=S.exec(h)))&&(e&&(h=h.slice(e[0].length)||h),i.push(f=[])),c=!1,(e=T.exec(h))&&(c=e.shift(),f.push({value:c,type:e[0].replace(R," ")}),h=h.slice(c.length));for(g in d.filter)!(e=X[g].exec(h))||j[g]&&!(e=j[g](e))||(c=e.shift(),f.push({value:c,type:g,matches:e}),h=h.slice(c.length));if(!c)break}return b?h.length:h?fb.error(a):z(a,i).slice(0)};function qb(a){for(var b=0,c=a.length,d="";c>b;b++)d+=a[b].value;return d}function rb(a,b,c){var d=b.dir,e=c&&"parentNode"===d,f=x++;return b.first?function(b,c,f){while(b=b[d])if(1===b.nodeType||e)return a(b,c,f)}:function(b,c,g){var h,i,j=[w,f];if(g){while(b=b[d])if((1===b.nodeType||e)&&a(b,c,g))return!0}else while(b=b[d])if(1===b.nodeType||e){if(i=b[u]||(b[u]={}),(h=i[d])&&h[0]===w&&h[1]===f)return j[2]=h[2];if(i[d]=j,j[2]=a(b,c,g))return!0}}}function sb(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function tb(a,b,c){for(var d=0,e=b.length;e>d;d++)fb(a,b[d],c);return c}function ub(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;i>h;h++)(f=a[h])&&(!c||c(f,d,e))&&(g.push(f),j&&b.push(h));return g}function vb(a,b,c,d,e,f){return d&&!d[u]&&(d=vb(d)),e&&!e[u]&&(e=vb(e,f)),hb(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||tb(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:ub(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=ub(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?K.call(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=ub(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):I.apply(g,r)})}function wb(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=rb(function(a){return a===b},h,!0),l=rb(function(a){return K.call(b,a)>-1},h,!0),m=[function(a,c,d){return!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d))}];f>i;i++)if(c=d.relative[a[i].type])m=[rb(sb(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;f>e;e++)if(d.relative[a[e].type])break;return vb(i>1&&sb(m),i>1&&qb(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(R,"$1"),c,e>i&&wb(a.slice(i,e)),f>e&&wb(a=a.slice(e)),f>e&&qb(a))}m.push(c)}return sb(m)}function xb(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,m,o,p=0,q="0",r=f&&[],s=[],t=j,u=f||e&&d.find.TAG("*",k),v=w+=null==t?1:Math.random()||.1,x=u.length;for(k&&(j=g!==n&&g);q!==x&&null!=(l=u[q]);q++){if(e&&l){m=0;while(o=a[m++])if(o(l,g,h)){i.push(l);break}k&&(w=v)}c&&((l=!o&&l)&&p--,f&&r.push(l))}if(p+=q,c&&q!==p){m=0;while(o=b[m++])o(r,s,g,h);if(f){if(p>0)while(q--)r[q]||s[q]||(s[q]=G.call(i));s=ub(s)}I.apply(i,s),k&&!f&&s.length>0&&p+b.length>1&&fb.uniqueSort(i)}return k&&(w=v,j=t),r};return c?hb(f):f}return h=fb.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=wb(b[c]),f[u]?d.push(f):e.push(f);f=A(a,xb(e,d)),f.selector=a}return f},i=fb.select=function(a,b,e,f){var i,j,k,l,m,n="function"==typeof a&&a,o=!f&&g(a=n.selector||a);if(e=e||[],1===o.length){if(j=o[0]=o[0].slice(0),j.length>2&&"ID"===(k=j[0]).type&&c.getById&&9===b.nodeType&&p&&d.relative[j[1].type]){if(b=(d.find.ID(k.matches[0].replace(cb,db),b)||[])[0],!b)return e;n&&(b=b.parentNode),a=a.slice(j.shift().value.length)}i=X.needsContext.test(a)?0:j.length;while(i--){if(k=j[i],d.relative[l=k.type])break;if((m=d.find[l])&&(f=m(k.matches[0].replace(cb,db),ab.test(j[0].type)&&ob(b.parentNode)||b))){if(j.splice(i,1),a=f.length&&qb(j),!a)return I.apply(e,f),e;break}}}return(n||h(a,o))(f,b,!p,e,ab.test(a)&&ob(b.parentNode)||b),e},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ib(function(a){return 1&a.compareDocumentPosition(n.createElement("div"))}),ib(function(a){return a.innerHTML="<a href='#'></a>","#"===a.firstChild.getAttribute("href")})||jb("type|href|height|width",function(a,b,c){return c?void 0:a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ib(function(a){return a.innerHTML="<input/>",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||jb("value",function(a,b,c){return c||"input"!==a.nodeName.toLowerCase()?void 0:a.defaultValue}),ib(function(a){return null==a.getAttribute("disabled")})||jb(L,function(a,b,c){var d;return c?void 0:a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),fb}(a);m.find=s,m.expr=s.selectors,m.expr[":"]=m.expr.pseudos,m.unique=s.uniqueSort,m.text=s.getText,m.isXMLDoc=s.isXML,m.contains=s.contains;var t=m.expr.match.needsContext,u=/^<(\w+)\s*\/?>(?:<\/\1>|)$/,v=/^.[^:#\[\.,]*$/;function w(a,b,c){if(m.isFunction(b))return m.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return m.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(v.test(b))return m.filter(b,a,c);b=m.filter(b,a)}return m.grep(a,function(a){return m.inArray(a,b)>=0!==c})}m.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?m.find.matchesSelector(d,a)?[d]:[]:m.find.matches(a,m.grep(b,function(a){return 1===a.nodeType}))},m.fn.extend({find:function(a){var b,c=[],d=this,e=d.length;if("string"!=typeof a)return this.pushStack(m(a).filter(function(){for(b=0;e>b;b++)if(m.contains(d[b],this))return!0}));for(b=0;e>b;b++)m.find(a,d[b],c);return c=this.pushStack(e>1?m.unique(c):c),c.selector=this.selector?this.selector+" "+a:a,c},filter:function(a){return this.pushStack(w(this,a||[],!1))},not:function(a){return this.pushStack(w(this,a||[],!0))},is:function(a){return!!w(this,"string"==typeof a&&t.test(a)?m(a):a||[],!1).length}});var x,y=a.document,z=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,A=m.fn.init=function(a,b){var c,d;if(!a)return this;if("string"==typeof a){if(c="<"===a.charAt(0)&&">"===a.charAt(a.length-1)&&a.length>=3?[null,a,null]:z.exec(a),!c||!c[1]&&b)return!b||b.jquery?(b||x).find(a):this.constructor(b).find(a);if(c[1]){if(b=b instanceof m?b[0]:b,m.merge(this,m.parseHTML(c[1],b&&b.nodeType?b.ownerDocument||b:y,!0)),u.test(c[1])&&m.isPlainObject(b))for(c in b)m.isFunction(this[c])?this[c](b[c]):this.attr(c,b[c]);return this}if(d=y.getElementById(c[2]),d&&d.parentNode){if(d.id!==c[2])return x.find(a);this.length=1,this[0]=d}return this.context=y,this.selector=a,this}return a.nodeType?(this.context=this[0]=a,this.length=1,this):m.isFunction(a)?"undefined"!=typeof x.ready?x.ready(a):a(m):(void 0!==a.selector&&(this.selector=a.selector,this.context=a.context),m.makeArray(a,this))};A.prototype=m.fn,x=m(y);var B=/^(?:parents|prev(?:Until|All))/,C={children:!0,contents:!0,next:!0,prev:!0};m.extend({dir:function(a,b,c){var d=[],e=a[b];while(e&&9!==e.nodeType&&(void 0===c||1!==e.nodeType||!m(e).is(c)))1===e.nodeType&&d.push(e),e=e[b];return d},sibling:function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c}}),m.fn.extend({has:function(a){var b,c=m(a,this),d=c.length;return this.filter(function(){for(b=0;d>b;b++)if(m.contains(this,c[b]))return!0})},closest:function(a,b){for(var c,d=0,e=this.length,f=[],g=t.test(a)||"string"!=typeof a?m(a,b||this.context):0;e>d;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&m.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?m.unique(f):f)},index:function(a){return a?"string"==typeof a?m.inArray(this[0],m(a)):m.inArray(a.jquery?a[0]:a,this):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(m.unique(m.merge(this.get(),m(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function D(a,b){do a=a[b];while(a&&1!==a.nodeType);return a}m.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return m.dir(a,"parentNode")},parentsUntil:function(a,b,c){return m.dir(a,"parentNode",c)},next:function(a){return D(a,"nextSibling")},prev:function(a){return D(a,"previousSibling")},nextAll:function(a){return m.dir(a,"nextSibling")},prevAll:function(a){return m.dir(a,"previousSibling")},nextUntil:function(a,b,c){return m.dir(a,"nextSibling",c)},prevUntil:function(a,b,c){return m.dir(a,"previousSibling",c)},siblings:function(a){return m.sibling((a.parentNode||{}).firstChild,a)},children:function(a){return m.sibling(a.firstChild)},contents:function(a){return m.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:m.merge([],a.childNodes)}},function(a,b){m.fn[a]=function(c,d){var e=m.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=m.filter(d,e)),this.length>1&&(C[a]||(e=m.unique(e)),B.test(a)&&(e=e.reverse())),this.pushStack(e)}});var E=/\S+/g,F={};function G(a){var b=F[a]={};return m.each(a.match(E)||[],function(a,c){b[c]=!0}),b}m.Callbacks=function(a){a="string"==typeof a?F[a]||G(a):m.extend({},a);var b,c,d,e,f,g,h=[],i=!a.once&&[],j=function(l){for(c=a.memory&&l,d=!0,f=g||0,g=0,e=h.length,b=!0;h&&e>f;f++)if(h[f].apply(l[0],l[1])===!1&&a.stopOnFalse){c=!1;break}b=!1,h&&(i?i.length&&j(i.shift()):c?h=[]:k.disable())},k={add:function(){if(h){var d=h.length;!function f(b){m.each(b,function(b,c){var d=m.type(c);"function"===d?a.unique&&k.has(c)||h.push(c):c&&c.length&&"string"!==d&&f(c)})}(arguments),b?e=h.length:c&&(g=d,j(c))}return this},remove:function(){return h&&m.each(arguments,function(a,c){var d;while((d=m.inArray(c,h,d))>-1)h.splice(d,1),b&&(e>=d&&e--,f>=d&&f--)}),this},has:function(a){return a?m.inArray(a,h)>-1:!(!h||!h.length)},empty:function(){return h=[],e=0,this},disable:function(){return h=i=c=void 0,this},disabled:function(){return!h},lock:function(){return i=void 0,c||k.disable(),this},locked:function(){return!i},fireWith:function(a,c){return!h||d&&!i||(c=c||[],c=[a,c.slice?c.slice():c],b?i.push(c):j(c)),this},fire:function(){return k.fireWith(this,arguments),this},fired:function(){return!!d}};return k},m.extend({Deferred:function(a){var b=[["resolve","done",m.Callbacks("once memory"),"resolved"],["reject","fail",m.Callbacks("once memory"),"rejected"],["notify","progress",m.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return m.Deferred(function(c){m.each(b,function(b,f){var g=m.isFunction(a[b])&&a[b];e[f[1]](function(){var a=g&&g.apply(this,arguments);a&&m.isFunction(a.promise)?a.promise().done(c.resolve).fail(c.reject).progress(c.notify):c[f[0]+"With"](this===d?c.promise():this,g?[a]:arguments)})}),a=null}).promise()},promise:function(a){return null!=a?m.extend(a,d):d}},e={};return d.pipe=d.then,m.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[1^a][2].disable,b[2][2].lock),e[f[0]]=function(){return e[f[0]+"With"](this===e?d:this,arguments),this},e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=d.call(arguments),e=c.length,f=1!==e||a&&m.isFunction(a.promise)?e:0,g=1===f?a:m.Deferred(),h=function(a,b,c){return function(e){b[a]=this,c[a]=arguments.length>1?d.call(arguments):e,c===i?g.notifyWith(b,c):--f||g.resolveWith(b,c)}},i,j,k;if(e>1)for(i=new Array(e),j=new Array(e),k=new Array(e);e>b;b++)c[b]&&m.isFunction(c[b].promise)?c[b].promise().done(h(b,k,c)).fail(g.reject).progress(h(b,j,i)):--f;return f||g.resolveWith(k,c),g.promise()}});var H;m.fn.ready=function(a){return m.ready.promise().done(a),this},m.extend({isReady:!1,readyWait:1,holdReady:function(a){a?m.readyWait++:m.ready(!0)},ready:function(a){if(a===!0?!--m.readyWait:!m.isReady){if(!y.body)return setTimeout(m.ready);m.isReady=!0,a!==!0&&--m.readyWait>0||(H.resolveWith(y,[m]),m.fn.triggerHandler&&(m(y).triggerHandler("ready"),m(y).off("ready")))}}});function I(){y.addEventListener?(y.removeEventListener("DOMContentLoaded",J,!1),a.removeEventListener("load",J,!1)):(y.detachEvent("onreadystatechange",J),a.detachEvent("onload",J))}function J(){(y.addEventListener||"load"===event.type||"complete"===y.readyState)&&(I(),m.ready())}m.ready.promise=function(b){if(!H)if(H=m.Deferred(),"complete"===y.readyState)setTimeout(m.ready);else if(y.addEventListener)y.addEventListener("DOMContentLoaded",J,!1),a.addEventListener("load",J,!1);else{y.attachEvent("onreadystatechange",J),a.attachEvent("onload",J);var c=!1;try{c=null==a.frameElement&&y.documentElement}catch(d){}c&&c.doScroll&&!function e(){if(!m.isReady){try{c.doScroll("left")}catch(a){return setTimeout(e,50)}I(),m.ready()}}()}return H.promise(b)};var K="undefined",L;for(L in m(k))break;k.ownLast="0"!==L,k.inlineBlockNeedsLayout=!1,m(function(){var a,b,c,d;c=y.getElementsByTagName("body")[0],c&&c.style&&(b=y.createElement("div"),d=y.createElement("div"),d.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(d).appendChild(b),typeof b.style.zoom!==K&&(b.style.cssText="display:inline;margin:0;border:0;padding:1px;width:1px;zoom:1",k.inlineBlockNeedsLayout=a=3===b.offsetWidth,a&&(c.style.zoom=1)),c.removeChild(d))}),function(){var a=y.createElement("div");if(null==k.deleteExpando){k.deleteExpando=!0;try{delete a.test}catch(b){k.deleteExpando=!1}}a=null}(),m.acceptData=function(a){var b=m.noData[(a.nodeName+" ").toLowerCase()],c=+a.nodeType||1;return 1!==c&&9!==c?!1:!b||b!==!0&&a.getAttribute("classid")===b};var M=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,N=/([A-Z])/g;function O(a,b,c){if(void 0===c&&1===a.nodeType){var d="data-"+b.replace(N,"-$1").toLowerCase();if(c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:M.test(c)?m.parseJSON(c):c}catch(e){}m.data(a,b,c)}else c=void 0}return c}function P(a){var b;for(b in a)if(("data"!==b||!m.isEmptyObject(a[b]))&&"toJSON"!==b)return!1;return!0}function Q(a,b,d,e){if(m.acceptData(a)){var f,g,h=m.expando,i=a.nodeType,j=i?m.cache:a,k=i?a[h]:a[h]&&h;
+if(k&&j[k]&&(e||j[k].data)||void 0!==d||"string"!=typeof b)return k||(k=i?a[h]=c.pop()||m.guid++:h),j[k]||(j[k]=i?{}:{toJSON:m.noop}),("object"==typeof b||"function"==typeof b)&&(e?j[k]=m.extend(j[k],b):j[k].data=m.extend(j[k].data,b)),g=j[k],e||(g.data||(g.data={}),g=g.data),void 0!==d&&(g[m.camelCase(b)]=d),"string"==typeof b?(f=g[b],null==f&&(f=g[m.camelCase(b)])):f=g,f}}function R(a,b,c){if(m.acceptData(a)){var d,e,f=a.nodeType,g=f?m.cache:a,h=f?a[m.expando]:m.expando;if(g[h]){if(b&&(d=c?g[h]:g[h].data)){m.isArray(b)?b=b.concat(m.map(b,m.camelCase)):b in d?b=[b]:(b=m.camelCase(b),b=b in d?[b]:b.split(" ")),e=b.length;while(e--)delete d[b[e]];if(c?!P(d):!m.isEmptyObject(d))return}(c||(delete g[h].data,P(g[h])))&&(f?m.cleanData([a],!0):k.deleteExpando||g!=g.window?delete g[h]:g[h]=null)}}}m.extend({cache:{},noData:{"applet ":!0,"embed ":!0,"object ":"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000"},hasData:function(a){return a=a.nodeType?m.cache[a[m.expando]]:a[m.expando],!!a&&!P(a)},data:function(a,b,c){return Q(a,b,c)},removeData:function(a,b){return R(a,b)},_data:function(a,b,c){return Q(a,b,c,!0)},_removeData:function(a,b){return R(a,b,!0)}}),m.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=m.data(f),1===f.nodeType&&!m._data(f,"parsedAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=m.camelCase(d.slice(5)),O(f,d,e[d])));m._data(f,"parsedAttrs",!0)}return e}return"object"==typeof a?this.each(function(){m.data(this,a)}):arguments.length>1?this.each(function(){m.data(this,a,b)}):f?O(f,a,m.data(f,a)):void 0},removeData:function(a){return this.each(function(){m.removeData(this,a)})}}),m.extend({queue:function(a,b,c){var d;return a?(b=(b||"fx")+"queue",d=m._data(a,b),c&&(!d||m.isArray(c)?d=m._data(a,b,m.makeArray(c)):d.push(c)),d||[]):void 0},dequeue:function(a,b){b=b||"fx";var c=m.queue(a,b),d=c.length,e=c.shift(),f=m._queueHooks(a,b),g=function(){m.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return m._data(a,c)||m._data(a,c,{empty:m.Callbacks("once memory").add(function(){m._removeData(a,b+"queue"),m._removeData(a,c)})})}}),m.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length<c?m.queue(this[0],a):void 0===b?this:this.each(function(){var c=m.queue(this,a,b);m._queueHooks(this,a),"fx"===a&&"inprogress"!==c[0]&&m.dequeue(this,a)})},dequeue:function(a){return this.each(function(){m.dequeue(this,a)})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,b){var c,d=1,e=m.Deferred(),f=this,g=this.length,h=function(){--d||e.resolveWith(f,[f])};"string"!=typeof a&&(b=a,a=void 0),a=a||"fx";while(g--)c=m._data(f[g],a+"queueHooks"),c&&c.empty&&(d++,c.empty.add(h));return h(),e.promise(b)}});var S=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,T=["Top","Right","Bottom","Left"],U=function(a,b){return a=b||a,"none"===m.css(a,"display")||!m.contains(a.ownerDocument,a)},V=m.access=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===m.type(c)){e=!0;for(h in c)m.access(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,m.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(m(a),c)})),b))for(;i>h;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f},W=/^(?:checkbox|radio)$/i;!function(){var a=y.createElement("input"),b=y.createElement("div"),c=y.createDocumentFragment();if(b.innerHTML="  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>",k.leadingWhitespace=3===b.firstChild.nodeType,k.tbody=!b.getElementsByTagName("tbody").length,k.htmlSerialize=!!b.getElementsByTagName("link").length,k.html5Clone="<:nav></:nav>"!==y.createElement("nav").cloneNode(!0).outerHTML,a.type="checkbox",a.checked=!0,c.appendChild(a),k.appendChecked=a.checked,b.innerHTML="<textarea>x</textarea>",k.noCloneChecked=!!b.cloneNode(!0).lastChild.defaultValue,c.appendChild(b),b.innerHTML="<input type='radio' checked='checked' name='t'/>",k.checkClone=b.cloneNode(!0).cloneNode(!0).lastChild.checked,k.noCloneEvent=!0,b.attachEvent&&(b.attachEvent("onclick",function(){k.noCloneEvent=!1}),b.cloneNode(!0).click()),null==k.deleteExpando){k.deleteExpando=!0;try{delete b.test}catch(d){k.deleteExpando=!1}}}(),function(){var b,c,d=y.createElement("div");for(b in{submit:!0,change:!0,focusin:!0})c="on"+b,(k[b+"Bubbles"]=c in a)||(d.setAttribute(c,"t"),k[b+"Bubbles"]=d.attributes[c].expando===!1);d=null}();var X=/^(?:input|select|textarea)$/i,Y=/^key/,Z=/^(?:mouse|pointer|contextmenu)|click/,$=/^(?:focusinfocus|focusoutblur)$/,_=/^([^.]*)(?:\.(.+)|)$/;function ab(){return!0}function bb(){return!1}function cb(){try{return y.activeElement}catch(a){}}m.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,n,o,p,q,r=m._data(a);if(r){c.handler&&(i=c,c=i.handler,e=i.selector),c.guid||(c.guid=m.guid++),(g=r.events)||(g=r.events={}),(k=r.handle)||(k=r.handle=function(a){return typeof m===K||a&&m.event.triggered===a.type?void 0:m.event.dispatch.apply(k.elem,arguments)},k.elem=a),b=(b||"").match(E)||[""],h=b.length;while(h--)f=_.exec(b[h])||[],o=q=f[1],p=(f[2]||"").split(".").sort(),o&&(j=m.event.special[o]||{},o=(e?j.delegateType:j.bindType)||o,j=m.event.special[o]||{},l=m.extend({type:o,origType:q,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&m.expr.match.needsContext.test(e),namespace:p.join(".")},i),(n=g[o])||(n=g[o]=[],n.delegateCount=0,j.setup&&j.setup.call(a,d,p,k)!==!1||(a.addEventListener?a.addEventListener(o,k,!1):a.attachEvent&&a.attachEvent("on"+o,k))),j.add&&(j.add.call(a,l),l.handler.guid||(l.handler.guid=c.guid)),e?n.splice(n.delegateCount++,0,l):n.push(l),m.event.global[o]=!0);a=null}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,n,o,p,q,r=m.hasData(a)&&m._data(a);if(r&&(k=r.events)){b=(b||"").match(E)||[""],j=b.length;while(j--)if(h=_.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o){l=m.event.special[o]||{},o=(d?l.delegateType:l.bindType)||o,n=k[o]||[],h=h[2]&&new RegExp("(^|\\.)"+p.join("\\.(?:.*\\.|)")+"(\\.|$)"),i=f=n.length;while(f--)g=n[f],!e&&q!==g.origType||c&&c.guid!==g.guid||h&&!h.test(g.namespace)||d&&d!==g.selector&&("**"!==d||!g.selector)||(n.splice(f,1),g.selector&&n.delegateCount--,l.remove&&l.remove.call(a,g));i&&!n.length&&(l.teardown&&l.teardown.call(a,p,r.handle)!==!1||m.removeEvent(a,o,r.handle),delete k[o])}else for(o in k)m.event.remove(a,o+b[j],c,d,!0);m.isEmptyObject(k)&&(delete r.handle,m._removeData(a,"events"))}},trigger:function(b,c,d,e){var f,g,h,i,k,l,n,o=[d||y],p=j.call(b,"type")?b.type:b,q=j.call(b,"namespace")?b.namespace.split("."):[];if(h=l=d=d||y,3!==d.nodeType&&8!==d.nodeType&&!$.test(p+m.event.triggered)&&(p.indexOf(".")>=0&&(q=p.split("."),p=q.shift(),q.sort()),g=p.indexOf(":")<0&&"on"+p,b=b[m.expando]?b:new m.Event(p,"object"==typeof b&&b),b.isTrigger=e?2:3,b.namespace=q.join("."),b.namespace_re=b.namespace?new RegExp("(^|\\.)"+q.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=d),c=null==c?[b]:m.makeArray(c,[b]),k=m.event.special[p]||{},e||!k.trigger||k.trigger.apply(d,c)!==!1)){if(!e&&!k.noBubble&&!m.isWindow(d)){for(i=k.delegateType||p,$.test(i+p)||(h=h.parentNode);h;h=h.parentNode)o.push(h),l=h;l===(d.ownerDocument||y)&&o.push(l.defaultView||l.parentWindow||a)}n=0;while((h=o[n++])&&!b.isPropagationStopped())b.type=n>1?i:k.bindType||p,f=(m._data(h,"events")||{})[b.type]&&m._data(h,"handle"),f&&f.apply(h,c),f=g&&h[g],f&&f.apply&&m.acceptData(h)&&(b.result=f.apply(h,c),b.result===!1&&b.preventDefault());if(b.type=p,!e&&!b.isDefaultPrevented()&&(!k._default||k._default.apply(o.pop(),c)===!1)&&m.acceptData(d)&&g&&d[p]&&!m.isWindow(d)){l=d[g],l&&(d[g]=null),m.event.triggered=p;try{d[p]()}catch(r){}m.event.triggered=void 0,l&&(d[g]=l)}return b.result}},dispatch:function(a){a=m.event.fix(a);var b,c,e,f,g,h=[],i=d.call(arguments),j=(m._data(this,"events")||{})[a.type]||[],k=m.event.special[a.type]||{};if(i[0]=a,a.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,a)!==!1){h=m.event.handlers.call(this,a,j),b=0;while((f=h[b++])&&!a.isPropagationStopped()){a.currentTarget=f.elem,g=0;while((e=f.handlers[g++])&&!a.isImmediatePropagationStopped())(!a.namespace_re||a.namespace_re.test(e.namespace))&&(a.handleObj=e,a.data=e.data,c=((m.event.special[e.origType]||{}).handle||e.handler).apply(f.elem,i),void 0!==c&&(a.result=c)===!1&&(a.preventDefault(),a.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,a),a.result}},handlers:function(a,b){var c,d,e,f,g=[],h=b.delegateCount,i=a.target;if(h&&i.nodeType&&(!a.button||"click"!==a.type))for(;i!=this;i=i.parentNode||this)if(1===i.nodeType&&(i.disabled!==!0||"click"!==a.type)){for(e=[],f=0;h>f;f++)d=b[f],c=d.selector+" ",void 0===e[c]&&(e[c]=d.needsContext?m(c,this).index(i)>=0:m.find(c,this,null,[i]).length),e[c]&&e.push(d);e.length&&g.push({elem:i,handlers:e})}return h<b.length&&g.push({elem:this,handlers:b.slice(h)}),g},fix:function(a){if(a[m.expando])return a;var b,c,d,e=a.type,f=a,g=this.fixHooks[e];g||(this.fixHooks[e]=g=Z.test(e)?this.mouseHooks:Y.test(e)?this.keyHooks:{}),d=g.props?this.props.concat(g.props):this.props,a=new m.Event(f),b=d.length;while(b--)c=d[b],a[c]=f[c];return a.target||(a.target=f.srcElement||y),3===a.target.nodeType&&(a.target=a.target.parentNode),a.metaKey=!!a.metaKey,g.filter?g.filter(a,f):a},props:"altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),fixHooks:{},keyHooks:{props:"char charCode key keyCode".split(" "),filter:function(a,b){return null==a.which&&(a.which=null!=b.charCode?b.charCode:b.keyCode),a}},mouseHooks:{props:"button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "),filter:function(a,b){var c,d,e,f=b.button,g=b.fromElement;return null==a.pageX&&null!=b.clientX&&(d=a.target.ownerDocument||y,e=d.documentElement,c=d.body,a.pageX=b.clientX+(e&&e.scrollLeft||c&&c.scrollLeft||0)-(e&&e.clientLeft||c&&c.clientLeft||0),a.pageY=b.clientY+(e&&e.scrollTop||c&&c.scrollTop||0)-(e&&e.clientTop||c&&c.clientTop||0)),!a.relatedTarget&&g&&(a.relatedTarget=g===a.target?b.toElement:g),a.which||void 0===f||(a.which=1&f?1:2&f?3:4&f?2:0),a}},special:{load:{noBubble:!0},focus:{trigger:function(){if(this!==cb()&&this.focus)try{return this.focus(),!1}catch(a){}},delegateType:"focusin"},blur:{trigger:function(){return this===cb()&&this.blur?(this.blur(),!1):void 0},delegateType:"focusout"},click:{trigger:function(){return m.nodeName(this,"input")&&"checkbox"===this.type&&this.click?(this.click(),!1):void 0},_default:function(a){return m.nodeName(a.target,"a")}},beforeunload:{postDispatch:function(a){void 0!==a.result&&a.originalEvent&&(a.originalEvent.returnValue=a.result)}}},simulate:function(a,b,c,d){var e=m.extend(new m.Event,c,{type:a,isSimulated:!0,originalEvent:{}});d?m.event.trigger(e,null,b):m.event.dispatch.call(b,e),e.isDefaultPrevented()&&c.preventDefault()}},m.removeEvent=y.removeEventListener?function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c,!1)}:function(a,b,c){var d="on"+b;a.detachEvent&&(typeof a[d]===K&&(a[d]=null),a.detachEvent(d,c))},m.Event=function(a,b){return this instanceof m.Event?(a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||void 0===a.defaultPrevented&&a.returnValue===!1?ab:bb):this.type=a,b&&m.extend(this,b),this.timeStamp=a&&a.timeStamp||m.now(),void(this[m.expando]=!0)):new m.Event(a,b)},m.Event.prototype={isDefaultPrevented:bb,isPropagationStopped:bb,isImmediatePropagationStopped:bb,preventDefault:function(){var a=this.originalEvent;this.isDefaultPrevented=ab,a&&(a.preventDefault?a.preventDefault():a.returnValue=!1)},stopPropagation:function(){var a=this.originalEvent;this.isPropagationStopped=ab,a&&(a.stopPropagation&&a.stopPropagation(),a.cancelBubble=!0)},stopImmediatePropagation:function(){var a=this.originalEvent;this.isImmediatePropagationStopped=ab,a&&a.stopImmediatePropagation&&a.stopImmediatePropagation(),this.stopPropagation()}},m.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(a,b){m.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c,d=this,e=a.relatedTarget,f=a.handleObj;return(!e||e!==d&&!m.contains(d,e))&&(a.type=f.origType,c=f.handler.apply(this,arguments),a.type=b),c}}}),k.submitBubbles||(m.event.special.submit={setup:function(){return m.nodeName(this,"form")?!1:void m.event.add(this,"click._submit keypress._submit",function(a){var b=a.target,c=m.nodeName(b,"input")||m.nodeName(b,"button")?b.form:void 0;c&&!m._data(c,"submitBubbles")&&(m.event.add(c,"submit._submit",function(a){a._submit_bubble=!0}),m._data(c,"submitBubbles",!0))})},postDispatch:function(a){a._submit_bubble&&(delete a._submit_bubble,this.parentNode&&!a.isTrigger&&m.event.simulate("submit",this.parentNode,a,!0))},teardown:function(){return m.nodeName(this,"form")?!1:void m.event.remove(this,"._submit")}}),k.changeBubbles||(m.event.special.change={setup:function(){return X.test(this.nodeName)?(("checkbox"===this.type||"radio"===this.type)&&(m.event.add(this,"propertychange._change",function(a){"checked"===a.originalEvent.propertyName&&(this._just_changed=!0)}),m.event.add(this,"click._change",function(a){this._just_changed&&!a.isTrigger&&(this._just_changed=!1),m.event.simulate("change",this,a,!0)})),!1):void m.event.add(this,"beforeactivate._change",function(a){var b=a.target;X.test(b.nodeName)&&!m._data(b,"changeBubbles")&&(m.event.add(b,"change._change",function(a){!this.parentNode||a.isSimulated||a.isTrigger||m.event.simulate("change",this.parentNode,a,!0)}),m._data(b,"changeBubbles",!0))})},handle:function(a){var b=a.target;return this!==b||a.isSimulated||a.isTrigger||"radio"!==b.type&&"checkbox"!==b.type?a.handleObj.handler.apply(this,arguments):void 0},teardown:function(){return m.event.remove(this,"._change"),!X.test(this.nodeName)}}),k.focusinBubbles||m.each({focus:"focusin",blur:"focusout"},function(a,b){var c=function(a){m.event.simulate(b,a.target,m.event.fix(a),!0)};m.event.special[b]={setup:function(){var d=this.ownerDocument||this,e=m._data(d,b);e||d.addEventListener(a,c,!0),m._data(d,b,(e||0)+1)},teardown:function(){var d=this.ownerDocument||this,e=m._data(d,b)-1;e?m._data(d,b,e):(d.removeEventListener(a,c,!0),m._removeData(d,b))}}}),m.fn.extend({on:function(a,b,c,d,e){var f,g;if("object"==typeof a){"string"!=typeof b&&(c=c||b,b=void 0);for(f in a)this.on(f,b,c,a[f],e);return this}if(null==c&&null==d?(d=b,c=b=void 0):null==d&&("string"==typeof b?(d=c,c=void 0):(d=c,c=b,b=void 0)),d===!1)d=bb;else if(!d)return this;return 1===e&&(g=d,d=function(a){return m().off(a),g.apply(this,arguments)},d.guid=g.guid||(g.guid=m.guid++)),this.each(function(){m.event.add(this,a,d,c,b)})},one:function(a,b,c,d){return this.on(a,b,c,d,1)},off:function(a,b,c){var d,e;if(a&&a.preventDefault&&a.handleObj)return d=a.handleObj,m(a.delegateTarget).off(d.namespace?d.origType+"."+d.namespace:d.origType,d.selector,d.handler),this;if("object"==typeof a){for(e in a)this.off(e,b,a[e]);return this}return(b===!1||"function"==typeof b)&&(c=b,b=void 0),c===!1&&(c=bb),this.each(function(){m.event.remove(this,a,c,b)})},trigger:function(a,b){return this.each(function(){m.event.trigger(a,b,this)})},triggerHandler:function(a,b){var c=this[0];return c?m.event.trigger(a,b,c,!0):void 0}});function db(a){var b=eb.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}var eb="abbr|article|aside|audio|bdi|canvas|data|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",fb=/ jQuery\d+="(?:null|\d+)"/g,gb=new RegExp("<(?:"+eb+")[\\s/>]","i"),hb=/^\s+/,ib=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/gi,jb=/<([\w:]+)/,kb=/<tbody/i,lb=/<|&#?\w+;/,mb=/<(?:script|style|link)/i,nb=/checked\s*(?:[^=]|=\s*.checked.)/i,ob=/^$|\/(?:java|ecma)script/i,pb=/^true\/(.*)/,qb=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g,rb={option:[1,"<select multiple='multiple'>","</select>"],legend:[1,"<fieldset>","</fieldset>"],area:[1,"<map>","</map>"],param:[1,"<object>","</object>"],thead:[1,"<table>","</table>"],tr:[2,"<table><tbody>","</tbody></table>"],col:[2,"<table><tbody></tbody><colgroup>","</colgroup></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:k.htmlSerialize?[0,"",""]:[1,"X<div>","</div>"]},sb=db(y),tb=sb.appendChild(y.createElement("div"));rb.optgroup=rb.option,rb.tbody=rb.tfoot=rb.colgroup=rb.caption=rb.thead,rb.th=rb.td;function ub(a,b){var c,d,e=0,f=typeof a.getElementsByTagName!==K?a.getElementsByTagName(b||"*"):typeof a.querySelectorAll!==K?a.querySelectorAll(b||"*"):void 0;if(!f)for(f=[],c=a.childNodes||a;null!=(d=c[e]);e++)!b||m.nodeName(d,b)?f.push(d):m.merge(f,ub(d,b));return void 0===b||b&&m.nodeName(a,b)?m.merge([a],f):f}function vb(a){W.test(a.type)&&(a.defaultChecked=a.checked)}function wb(a,b){return m.nodeName(a,"table")&&m.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function xb(a){return a.type=(null!==m.find.attr(a,"type"))+"/"+a.type,a}function yb(a){var b=pb.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function zb(a,b){for(var c,d=0;null!=(c=a[d]);d++)m._data(c,"globalEval",!b||m._data(b[d],"globalEval"))}function Ab(a,b){if(1===b.nodeType&&m.hasData(a)){var c,d,e,f=m._data(a),g=m._data(b,f),h=f.events;if(h){delete g.handle,g.events={};for(c in h)for(d=0,e=h[c].length;e>d;d++)m.event.add(b,c,h[c][d])}g.data&&(g.data=m.extend({},g.data))}}function Bb(a,b){var c,d,e;if(1===b.nodeType){if(c=b.nodeName.toLowerCase(),!k.noCloneEvent&&b[m.expando]){e=m._data(b);for(d in e.events)m.removeEvent(b,d,e.handle);b.removeAttribute(m.expando)}"script"===c&&b.text!==a.text?(xb(b).text=a.text,yb(b)):"object"===c?(b.parentNode&&(b.outerHTML=a.outerHTML),k.html5Clone&&a.innerHTML&&!m.trim(b.innerHTML)&&(b.innerHTML=a.innerHTML)):"input"===c&&W.test(a.type)?(b.defaultChecked=b.checked=a.checked,b.value!==a.value&&(b.value=a.value)):"option"===c?b.defaultSelected=b.selected=a.defaultSelected:("input"===c||"textarea"===c)&&(b.defaultValue=a.defaultValue)}}m.extend({clone:function(a,b,c){var d,e,f,g,h,i=m.contains(a.ownerDocument,a);if(k.html5Clone||m.isXMLDoc(a)||!gb.test("<"+a.nodeName+">")?f=a.cloneNode(!0):(tb.innerHTML=a.outerHTML,tb.removeChild(f=tb.firstChild)),!(k.noCloneEvent&&k.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||m.isXMLDoc(a)))for(d=ub(f),h=ub(a),g=0;null!=(e=h[g]);++g)d[g]&&Bb(e,d[g]);if(b)if(c)for(h=h||ub(a),d=d||ub(f),g=0;null!=(e=h[g]);g++)Ab(e,d[g]);else Ab(a,f);return d=ub(f,"script"),d.length>0&&zb(d,!i&&ub(a,"script")),d=h=e=null,f},buildFragment:function(a,b,c,d){for(var e,f,g,h,i,j,l,n=a.length,o=db(b),p=[],q=0;n>q;q++)if(f=a[q],f||0===f)if("object"===m.type(f))m.merge(p,f.nodeType?[f]:f);else if(lb.test(f)){h=h||o.appendChild(b.createElement("div")),i=(jb.exec(f)||["",""])[1].toLowerCase(),l=rb[i]||rb._default,h.innerHTML=l[1]+f.replace(ib,"<$1></$2>")+l[2],e=l[0];while(e--)h=h.lastChild;if(!k.leadingWhitespace&&hb.test(f)&&p.push(b.createTextNode(hb.exec(f)[0])),!k.tbody){f="table"!==i||kb.test(f)?"<table>"!==l[1]||kb.test(f)?0:h:h.firstChild,e=f&&f.childNodes.length;while(e--)m.nodeName(j=f.childNodes[e],"tbody")&&!j.childNodes.length&&f.removeChild(j)}m.merge(p,h.childNodes),h.textContent="";while(h.firstChild)h.removeChild(h.firstChild);h=o.lastChild}else p.push(b.createTextNode(f));h&&o.removeChild(h),k.appendChecked||m.grep(ub(p,"input"),vb),q=0;while(f=p[q++])if((!d||-1===m.inArray(f,d))&&(g=m.contains(f.ownerDocument,f),h=ub(o.appendChild(f),"script"),g&&zb(h),c)){e=0;while(f=h[e++])ob.test(f.type||"")&&c.push(f)}return h=null,o},cleanData:function(a,b){for(var d,e,f,g,h=0,i=m.expando,j=m.cache,l=k.deleteExpando,n=m.event.special;null!=(d=a[h]);h++)if((b||m.acceptData(d))&&(f=d[i],g=f&&j[f])){if(g.events)for(e in g.events)n[e]?m.event.remove(d,e):m.removeEvent(d,e,g.handle);j[f]&&(delete j[f],l?delete d[i]:typeof d.removeAttribute!==K?d.removeAttribute(i):d[i]=null,c.push(f))}}}),m.fn.extend({text:function(a){return V(this,function(a){return void 0===a?m.text(this):this.empty().append((this[0]&&this[0].ownerDocument||y).createTextNode(a))},null,a,arguments.length)},append:function(){return this.domManip(arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=wb(this,a);b.appendChild(a)}})},prepend:function(){return this.domManip(arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=wb(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return this.domManip(arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return this.domManip(arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},remove:function(a,b){for(var c,d=a?m.filter(a,this):this,e=0;null!=(c=d[e]);e++)b||1!==c.nodeType||m.cleanData(ub(c)),c.parentNode&&(b&&m.contains(c.ownerDocument,c)&&zb(ub(c,"script")),c.parentNode.removeChild(c));return this},empty:function(){for(var a,b=0;null!=(a=this[b]);b++){1===a.nodeType&&m.cleanData(ub(a,!1));while(a.firstChild)a.removeChild(a.firstChild);a.options&&m.nodeName(a,"select")&&(a.options.length=0)}return this},clone:function(a,b){return a=null==a?!1:a,b=null==b?a:b,this.map(function(){return m.clone(this,a,b)})},html:function(a){return V(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a)return 1===b.nodeType?b.innerHTML.replace(fb,""):void 0;if(!("string"!=typeof a||mb.test(a)||!k.htmlSerialize&&gb.test(a)||!k.leadingWhitespace&&hb.test(a)||rb[(jb.exec(a)||["",""])[1].toLowerCase()])){a=a.replace(ib,"<$1></$2>");try{for(;d>c;c++)b=this[c]||{},1===b.nodeType&&(m.cleanData(ub(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=arguments[0];return this.domManip(arguments,function(b){a=this.parentNode,m.cleanData(ub(this)),a&&a.replaceChild(b,this)}),a&&(a.length||a.nodeType)?this:this.remove()},detach:function(a){return this.remove(a,!0)},domManip:function(a,b){a=e.apply([],a);var c,d,f,g,h,i,j=0,l=this.length,n=this,o=l-1,p=a[0],q=m.isFunction(p);if(q||l>1&&"string"==typeof p&&!k.checkClone&&nb.test(p))return this.each(function(c){var d=n.eq(c);q&&(a[0]=p.call(this,c,d.html())),d.domManip(a,b)});if(l&&(i=m.buildFragment(a,this[0].ownerDocument,!1,this),c=i.firstChild,1===i.childNodes.length&&(i=c),c)){for(g=m.map(ub(i,"script"),xb),f=g.length;l>j;j++)d=i,j!==o&&(d=m.clone(d,!0,!0),f&&m.merge(g,ub(d,"script"))),b.call(this[j],d,j);if(f)for(h=g[g.length-1].ownerDocument,m.map(g,yb),j=0;f>j;j++)d=g[j],ob.test(d.type||"")&&!m._data(d,"globalEval")&&m.contains(h,d)&&(d.src?m._evalUrl&&m._evalUrl(d.src):m.globalEval((d.text||d.textContent||d.innerHTML||"").replace(qb,"")));i=c=null}return this}}),m.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){m.fn[a]=function(a){for(var c,d=0,e=[],g=m(a),h=g.length-1;h>=d;d++)c=d===h?this:this.clone(!0),m(g[d])[b](c),f.apply(e,c.get());return this.pushStack(e)}});var Cb,Db={};function Eb(b,c){var d,e=m(c.createElement(b)).appendTo(c.body),f=a.getDefaultComputedStyle&&(d=a.getDefaultComputedStyle(e[0]))?d.display:m.css(e[0],"display");return e.detach(),f}function Fb(a){var b=y,c=Db[a];return c||(c=Eb(a,b),"none"!==c&&c||(Cb=(Cb||m("<iframe frameborder='0' width='0' height='0'/>")).appendTo(b.documentElement),b=(Cb[0].contentWindow||Cb[0].contentDocument).document,b.write(),b.close(),c=Eb(a,b),Cb.detach()),Db[a]=c),c}!function(){var a;k.shrinkWrapBlocks=function(){if(null!=a)return a;a=!1;var b,c,d;return c=y.getElementsByTagName("body")[0],c&&c.style?(b=y.createElement("div"),d=y.createElement("div"),d.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(d).appendChild(b),typeof b.style.zoom!==K&&(b.style.cssText="-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;display:block;margin:0;border:0;padding:1px;width:1px;zoom:1",b.appendChild(y.createElement("div")).style.width="5px",a=3!==b.offsetWidth),c.removeChild(d),a):void 0}}();var Gb=/^margin/,Hb=new RegExp("^("+S+")(?!px)[a-z%]+$","i"),Ib,Jb,Kb=/^(top|right|bottom|left)$/;a.getComputedStyle?(Ib=function(a){return a.ownerDocument.defaultView.getComputedStyle(a,null)},Jb=function(a,b,c){var d,e,f,g,h=a.style;return c=c||Ib(a),g=c?c.getPropertyValue(b)||c[b]:void 0,c&&(""!==g||m.contains(a.ownerDocument,a)||(g=m.style(a,b)),Hb.test(g)&&Gb.test(b)&&(d=h.width,e=h.minWidth,f=h.maxWidth,h.minWidth=h.maxWidth=h.width=g,g=c.width,h.width=d,h.minWidth=e,h.maxWidth=f)),void 0===g?g:g+""}):y.documentElement.currentStyle&&(Ib=function(a){return a.currentStyle},Jb=function(a,b,c){var d,e,f,g,h=a.style;return c=c||Ib(a),g=c?c[b]:void 0,null==g&&h&&h[b]&&(g=h[b]),Hb.test(g)&&!Kb.test(b)&&(d=h.left,e=a.runtimeStyle,f=e&&e.left,f&&(e.left=a.currentStyle.left),h.left="fontSize"===b?"1em":g,g=h.pixelLeft+"px",h.left=d,f&&(e.left=f)),void 0===g?g:g+""||"auto"});function Lb(a,b){return{get:function(){var c=a();if(null!=c)return c?void delete this.get:(this.get=b).apply(this,arguments)}}}!function(){var b,c,d,e,f,g,h;if(b=y.createElement("div"),b.innerHTML="  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>",d=b.getElementsByTagName("a")[0],c=d&&d.style){c.cssText="float:left;opacity:.5",k.opacity="0.5"===c.opacity,k.cssFloat=!!c.cssFloat,b.style.backgroundClip="content-box",b.cloneNode(!0).style.backgroundClip="",k.clearCloneStyle="content-box"===b.style.backgroundClip,k.boxSizing=""===c.boxSizing||""===c.MozBoxSizing||""===c.WebkitBoxSizing,m.extend(k,{reliableHiddenOffsets:function(){return null==g&&i(),g},boxSizingReliable:function(){return null==f&&i(),f},pixelPosition:function(){return null==e&&i(),e},reliableMarginRight:function(){return null==h&&i(),h}});function i(){var b,c,d,i;c=y.getElementsByTagName("body")[0],c&&c.style&&(b=y.createElement("div"),d=y.createElement("div"),d.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(d).appendChild(b),b.style.cssText="-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;display:block;margin-top:1%;top:1%;border:1px;padding:1px;width:4px;position:absolute",e=f=!1,h=!0,a.getComputedStyle&&(e="1%"!==(a.getComputedStyle(b,null)||{}).top,f="4px"===(a.getComputedStyle(b,null)||{width:"4px"}).width,i=b.appendChild(y.createElement("div")),i.style.cssText=b.style.cssText="-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;display:block;margin:0;border:0;padding:0",i.style.marginRight=i.style.width="0",b.style.width="1px",h=!parseFloat((a.getComputedStyle(i,null)||{}).marginRight)),b.innerHTML="<table><tr><td></td><td>t</td></tr></table>",i=b.getElementsByTagName("td"),i[0].style.cssText="margin:0;border:0;padding:0;display:none",g=0===i[0].offsetHeight,g&&(i[0].style.display="",i[1].style.display="none",g=0===i[0].offsetHeight),c.removeChild(d))}}}(),m.swap=function(a,b,c,d){var e,f,g={};for(f in b)g[f]=a.style[f],a.style[f]=b[f];e=c.apply(a,d||[]);for(f in b)a.style[f]=g[f];return e};var Mb=/alpha\([^)]*\)/i,Nb=/opacity\s*=\s*([^)]*)/,Ob=/^(none|table(?!-c[ea]).+)/,Pb=new RegExp("^("+S+")(.*)$","i"),Qb=new RegExp("^([+-])=("+S+")","i"),Rb={position:"absolute",visibility:"hidden",display:"block"},Sb={letterSpacing:"0",fontWeight:"400"},Tb=["Webkit","O","Moz","ms"];function Ub(a,b){if(b in a)return b;var c=b.charAt(0).toUpperCase()+b.slice(1),d=b,e=Tb.length;while(e--)if(b=Tb[e]+c,b in a)return b;return d}function Vb(a,b){for(var c,d,e,f=[],g=0,h=a.length;h>g;g++)d=a[g],d.style&&(f[g]=m._data(d,"olddisplay"),c=d.style.display,b?(f[g]||"none"!==c||(d.style.display=""),""===d.style.display&&U(d)&&(f[g]=m._data(d,"olddisplay",Fb(d.nodeName)))):(e=U(d),(c&&"none"!==c||!e)&&m._data(d,"olddisplay",e?c:m.css(d,"display"))));for(g=0;h>g;g++)d=a[g],d.style&&(b&&"none"!==d.style.display&&""!==d.style.display||(d.style.display=b?f[g]||"":"none"));return a}function Wb(a,b,c){var d=Pb.exec(b);return d?Math.max(0,d[1]-(c||0))+(d[2]||"px"):b}function Xb(a,b,c,d,e){for(var f=c===(d?"border":"content")?4:"width"===b?1:0,g=0;4>f;f+=2)"margin"===c&&(g+=m.css(a,c+T[f],!0,e)),d?("content"===c&&(g-=m.css(a,"padding"+T[f],!0,e)),"margin"!==c&&(g-=m.css(a,"border"+T[f]+"Width",!0,e))):(g+=m.css(a,"padding"+T[f],!0,e),"padding"!==c&&(g+=m.css(a,"border"+T[f]+"Width",!0,e)));return g}function Yb(a,b,c){var d=!0,e="width"===b?a.offsetWidth:a.offsetHeight,f=Ib(a),g=k.boxSizing&&"border-box"===m.css(a,"boxSizing",!1,f);if(0>=e||null==e){if(e=Jb(a,b,f),(0>e||null==e)&&(e=a.style[b]),Hb.test(e))return e;d=g&&(k.boxSizingReliable()||e===a.style[b]),e=parseFloat(e)||0}return e+Xb(a,b,c||(g?"border":"content"),d,f)+"px"}m.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=Jb(a,"opacity");return""===c?"1":c}}}},cssNumber:{columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":k.cssFloat?"cssFloat":"styleFloat"},style:function(a,b,c,d){if(a&&3!==a.nodeType&&8!==a.nodeType&&a.style){var e,f,g,h=m.camelCase(b),i=a.style;if(b=m.cssProps[h]||(m.cssProps[h]=Ub(i,h)),g=m.cssHooks[b]||m.cssHooks[h],void 0===c)return g&&"get"in g&&void 0!==(e=g.get(a,!1,d))?e:i[b];if(f=typeof c,"string"===f&&(e=Qb.exec(c))&&(c=(e[1]+1)*e[2]+parseFloat(m.css(a,b)),f="number"),null!=c&&c===c&&("number"!==f||m.cssNumber[h]||(c+="px"),k.clearCloneStyle||""!==c||0!==b.indexOf("background")||(i[b]="inherit"),!(g&&"set"in g&&void 0===(c=g.set(a,c,d)))))try{i[b]=c}catch(j){}}},css:function(a,b,c,d){var e,f,g,h=m.camelCase(b);return b=m.cssProps[h]||(m.cssProps[h]=Ub(a.style,h)),g=m.cssHooks[b]||m.cssHooks[h],g&&"get"in g&&(f=g.get(a,!0,c)),void 0===f&&(f=Jb(a,b,d)),"normal"===f&&b in Sb&&(f=Sb[b]),""===c||c?(e=parseFloat(f),c===!0||m.isNumeric(e)?e||0:f):f}}),m.each(["height","width"],function(a,b){m.cssHooks[b]={get:function(a,c,d){return c?Ob.test(m.css(a,"display"))&&0===a.offsetWidth?m.swap(a,Rb,function(){return Yb(a,b,d)}):Yb(a,b,d):void 0},set:function(a,c,d){var e=d&&Ib(a);return Wb(a,c,d?Xb(a,b,d,k.boxSizing&&"border-box"===m.css(a,"boxSizing",!1,e),e):0)}}}),k.opacity||(m.cssHooks.opacity={get:function(a,b){return Nb.test((b&&a.currentStyle?a.currentStyle.filter:a.style.filter)||"")?.01*parseFloat(RegExp.$1)+"":b?"1":""},set:function(a,b){var c=a.style,d=a.currentStyle,e=m.isNumeric(b)?"alpha(opacity="+100*b+")":"",f=d&&d.filter||c.filter||"";c.zoom=1,(b>=1||""===b)&&""===m.trim(f.replace(Mb,""))&&c.removeAttribute&&(c.removeAttribute("filter"),""===b||d&&!d.filter)||(c.filter=Mb.test(f)?f.replace(Mb,e):f+" "+e)}}),m.cssHooks.marginRight=Lb(k.reliableMarginRight,function(a,b){return b?m.swap(a,{display:"inline-block"},Jb,[a,"marginRight"]):void 0}),m.each({margin:"",padding:"",border:"Width"},function(a,b){m.cssHooks[a+b]={expand:function(c){for(var d=0,e={},f="string"==typeof c?c.split(" "):[c];4>d;d++)e[a+T[d]+b]=f[d]||f[d-2]||f[0];return e}},Gb.test(a)||(m.cssHooks[a+b].set=Wb)}),m.fn.extend({css:function(a,b){return V(this,function(a,b,c){var d,e,f={},g=0;if(m.isArray(b)){for(d=Ib(a),e=b.length;e>g;g++)f[b[g]]=m.css(a,b[g],!1,d);return f}return void 0!==c?m.style(a,b,c):m.css(a,b)},a,b,arguments.length>1)},show:function(){return Vb(this,!0)},hide:function(){return Vb(this)},toggle:function(a){return"boolean"==typeof a?a?this.show():this.hide():this.each(function(){U(this)?m(this).show():m(this).hide()})}});function Zb(a,b,c,d,e){return new Zb.prototype.init(a,b,c,d,e)}m.Tween=Zb,Zb.prototype={constructor:Zb,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||"swing",this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(m.cssNumber[c]?"":"px")
+},cur:function(){var a=Zb.propHooks[this.prop];return a&&a.get?a.get(this):Zb.propHooks._default.get(this)},run:function(a){var b,c=Zb.propHooks[this.prop];return this.pos=b=this.options.duration?m.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration):a,this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):Zb.propHooks._default.set(this),this}},Zb.prototype.init.prototype=Zb.prototype,Zb.propHooks={_default:{get:function(a){var b;return null==a.elem[a.prop]||a.elem.style&&null!=a.elem.style[a.prop]?(b=m.css(a.elem,a.prop,""),b&&"auto"!==b?b:0):a.elem[a.prop]},set:function(a){m.fx.step[a.prop]?m.fx.step[a.prop](a):a.elem.style&&(null!=a.elem.style[m.cssProps[a.prop]]||m.cssHooks[a.prop])?m.style(a.elem,a.prop,a.now+a.unit):a.elem[a.prop]=a.now}}},Zb.propHooks.scrollTop=Zb.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},m.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2}},m.fx=Zb.prototype.init,m.fx.step={};var $b,_b,ac=/^(?:toggle|show|hide)$/,bc=new RegExp("^(?:([+-])=|)("+S+")([a-z%]*)$","i"),cc=/queueHooks$/,dc=[ic],ec={"*":[function(a,b){var c=this.createTween(a,b),d=c.cur(),e=bc.exec(b),f=e&&e[3]||(m.cssNumber[a]?"":"px"),g=(m.cssNumber[a]||"px"!==f&&+d)&&bc.exec(m.css(c.elem,a)),h=1,i=20;if(g&&g[3]!==f){f=f||g[3],e=e||[],g=+d||1;do h=h||".5",g/=h,m.style(c.elem,a,g+f);while(h!==(h=c.cur()/d)&&1!==h&&--i)}return e&&(g=c.start=+g||+d||0,c.unit=f,c.end=e[1]?g+(e[1]+1)*e[2]:+e[2]),c}]};function fc(){return setTimeout(function(){$b=void 0}),$b=m.now()}function gc(a,b){var c,d={height:a},e=0;for(b=b?1:0;4>e;e+=2-b)c=T[e],d["margin"+c]=d["padding"+c]=a;return b&&(d.opacity=d.width=a),d}function hc(a,b,c){for(var d,e=(ec[b]||[]).concat(ec["*"]),f=0,g=e.length;g>f;f++)if(d=e[f].call(c,b,a))return d}function ic(a,b,c){var d,e,f,g,h,i,j,l,n=this,o={},p=a.style,q=a.nodeType&&U(a),r=m._data(a,"fxshow");c.queue||(h=m._queueHooks(a,"fx"),null==h.unqueued&&(h.unqueued=0,i=h.empty.fire,h.empty.fire=function(){h.unqueued||i()}),h.unqueued++,n.always(function(){n.always(function(){h.unqueued--,m.queue(a,"fx").length||h.empty.fire()})})),1===a.nodeType&&("height"in b||"width"in b)&&(c.overflow=[p.overflow,p.overflowX,p.overflowY],j=m.css(a,"display"),l="none"===j?m._data(a,"olddisplay")||Fb(a.nodeName):j,"inline"===l&&"none"===m.css(a,"float")&&(k.inlineBlockNeedsLayout&&"inline"!==Fb(a.nodeName)?p.zoom=1:p.display="inline-block")),c.overflow&&(p.overflow="hidden",k.shrinkWrapBlocks()||n.always(function(){p.overflow=c.overflow[0],p.overflowX=c.overflow[1],p.overflowY=c.overflow[2]}));for(d in b)if(e=b[d],ac.exec(e)){if(delete b[d],f=f||"toggle"===e,e===(q?"hide":"show")){if("show"!==e||!r||void 0===r[d])continue;q=!0}o[d]=r&&r[d]||m.style(a,d)}else j=void 0;if(m.isEmptyObject(o))"inline"===("none"===j?Fb(a.nodeName):j)&&(p.display=j);else{r?"hidden"in r&&(q=r.hidden):r=m._data(a,"fxshow",{}),f&&(r.hidden=!q),q?m(a).show():n.done(function(){m(a).hide()}),n.done(function(){var b;m._removeData(a,"fxshow");for(b in o)m.style(a,b,o[b])});for(d in o)g=hc(q?r[d]:0,d,n),d in r||(r[d]=g.start,q&&(g.end=g.start,g.start="width"===d||"height"===d?1:0))}}function jc(a,b){var c,d,e,f,g;for(c in a)if(d=m.camelCase(c),e=b[d],f=a[c],m.isArray(f)&&(e=f[1],f=a[c]=f[0]),c!==d&&(a[d]=f,delete a[c]),g=m.cssHooks[d],g&&"expand"in g){f=g.expand(f),delete a[d];for(c in f)c in a||(a[c]=f[c],b[c]=e)}else b[d]=e}function kc(a,b,c){var d,e,f=0,g=dc.length,h=m.Deferred().always(function(){delete i.elem}),i=function(){if(e)return!1;for(var b=$b||fc(),c=Math.max(0,j.startTime+j.duration-b),d=c/j.duration||0,f=1-d,g=0,i=j.tweens.length;i>g;g++)j.tweens[g].run(f);return h.notifyWith(a,[j,f,c]),1>f&&i?c:(h.resolveWith(a,[j]),!1)},j=h.promise({elem:a,props:m.extend({},b),opts:m.extend(!0,{specialEasing:{}},c),originalProperties:b,originalOptions:c,startTime:$b||fc(),duration:c.duration,tweens:[],createTween:function(b,c){var d=m.Tween(a,j.opts,b,c,j.opts.specialEasing[b]||j.opts.easing);return j.tweens.push(d),d},stop:function(b){var c=0,d=b?j.tweens.length:0;if(e)return this;for(e=!0;d>c;c++)j.tweens[c].run(1);return b?h.resolveWith(a,[j,b]):h.rejectWith(a,[j,b]),this}}),k=j.props;for(jc(k,j.opts.specialEasing);g>f;f++)if(d=dc[f].call(j,a,k,j.opts))return d;return m.map(k,hc,j),m.isFunction(j.opts.start)&&j.opts.start.call(a,j),m.fx.timer(m.extend(i,{elem:a,anim:j,queue:j.opts.queue})),j.progress(j.opts.progress).done(j.opts.done,j.opts.complete).fail(j.opts.fail).always(j.opts.always)}m.Animation=m.extend(kc,{tweener:function(a,b){m.isFunction(a)?(b=a,a=["*"]):a=a.split(" ");for(var c,d=0,e=a.length;e>d;d++)c=a[d],ec[c]=ec[c]||[],ec[c].unshift(b)},prefilter:function(a,b){b?dc.unshift(a):dc.push(a)}}),m.speed=function(a,b,c){var d=a&&"object"==typeof a?m.extend({},a):{complete:c||!c&&b||m.isFunction(a)&&a,duration:a,easing:c&&b||b&&!m.isFunction(b)&&b};return d.duration=m.fx.off?0:"number"==typeof d.duration?d.duration:d.duration in m.fx.speeds?m.fx.speeds[d.duration]:m.fx.speeds._default,(null==d.queue||d.queue===!0)&&(d.queue="fx"),d.old=d.complete,d.complete=function(){m.isFunction(d.old)&&d.old.call(this),d.queue&&m.dequeue(this,d.queue)},d},m.fn.extend({fadeTo:function(a,b,c,d){return this.filter(U).css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){var e=m.isEmptyObject(a),f=m.speed(b,c,d),g=function(){var b=kc(this,m.extend({},a),f);(e||m._data(this,"finish"))&&b.stop(!0)};return g.finish=g,e||f.queue===!1?this.each(g):this.queue(f.queue,g)},stop:function(a,b,c){var d=function(a){var b=a.stop;delete a.stop,b(c)};return"string"!=typeof a&&(c=b,b=a,a=void 0),b&&a!==!1&&this.queue(a||"fx",[]),this.each(function(){var b=!0,e=null!=a&&a+"queueHooks",f=m.timers,g=m._data(this);if(e)g[e]&&g[e].stop&&d(g[e]);else for(e in g)g[e]&&g[e].stop&&cc.test(e)&&d(g[e]);for(e=f.length;e--;)f[e].elem!==this||null!=a&&f[e].queue!==a||(f[e].anim.stop(c),b=!1,f.splice(e,1));(b||!c)&&m.dequeue(this,a)})},finish:function(a){return a!==!1&&(a=a||"fx"),this.each(function(){var b,c=m._data(this),d=c[a+"queue"],e=c[a+"queueHooks"],f=m.timers,g=d?d.length:0;for(c.finish=!0,m.queue(this,a,[]),e&&e.stop&&e.stop.call(this,!0),b=f.length;b--;)f[b].elem===this&&f[b].queue===a&&(f[b].anim.stop(!0),f.splice(b,1));for(b=0;g>b;b++)d[b]&&d[b].finish&&d[b].finish.call(this);delete c.finish})}}),m.each(["toggle","show","hide"],function(a,b){var c=m.fn[b];m.fn[b]=function(a,d,e){return null==a||"boolean"==typeof a?c.apply(this,arguments):this.animate(gc(b,!0),a,d,e)}}),m.each({slideDown:gc("show"),slideUp:gc("hide"),slideToggle:gc("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){m.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),m.timers=[],m.fx.tick=function(){var a,b=m.timers,c=0;for($b=m.now();c<b.length;c++)a=b[c],a()||b[c]!==a||b.splice(c--,1);b.length||m.fx.stop(),$b=void 0},m.fx.timer=function(a){m.timers.push(a),a()?m.fx.start():m.timers.pop()},m.fx.interval=13,m.fx.start=function(){_b||(_b=setInterval(m.fx.tick,m.fx.interval))},m.fx.stop=function(){clearInterval(_b),_b=null},m.fx.speeds={slow:600,fast:200,_default:400},m.fn.delay=function(a,b){return a=m.fx?m.fx.speeds[a]||a:a,b=b||"fx",this.queue(b,function(b,c){var d=setTimeout(b,a);c.stop=function(){clearTimeout(d)}})},function(){var a,b,c,d,e;b=y.createElement("div"),b.setAttribute("className","t"),b.innerHTML="  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>",d=b.getElementsByTagName("a")[0],c=y.createElement("select"),e=c.appendChild(y.createElement("option")),a=b.getElementsByTagName("input")[0],d.style.cssText="top:1px",k.getSetAttribute="t"!==b.className,k.style=/top/.test(d.getAttribute("style")),k.hrefNormalized="/a"===d.getAttribute("href"),k.checkOn=!!a.value,k.optSelected=e.selected,k.enctype=!!y.createElement("form").enctype,c.disabled=!0,k.optDisabled=!e.disabled,a=y.createElement("input"),a.setAttribute("value",""),k.input=""===a.getAttribute("value"),a.value="t",a.setAttribute("type","radio"),k.radioValue="t"===a.value}();var lc=/\r/g;m.fn.extend({val:function(a){var b,c,d,e=this[0];{if(arguments.length)return d=m.isFunction(a),this.each(function(c){var e;1===this.nodeType&&(e=d?a.call(this,c,m(this).val()):a,null==e?e="":"number"==typeof e?e+="":m.isArray(e)&&(e=m.map(e,function(a){return null==a?"":a+""})),b=m.valHooks[this.type]||m.valHooks[this.nodeName.toLowerCase()],b&&"set"in b&&void 0!==b.set(this,e,"value")||(this.value=e))});if(e)return b=m.valHooks[e.type]||m.valHooks[e.nodeName.toLowerCase()],b&&"get"in b&&void 0!==(c=b.get(e,"value"))?c:(c=e.value,"string"==typeof c?c.replace(lc,""):null==c?"":c)}}}),m.extend({valHooks:{option:{get:function(a){var b=m.find.attr(a,"value");return null!=b?b:m.trim(m.text(a))}},select:{get:function(a){for(var b,c,d=a.options,e=a.selectedIndex,f="select-one"===a.type||0>e,g=f?null:[],h=f?e+1:d.length,i=0>e?h:f?e:0;h>i;i++)if(c=d[i],!(!c.selected&&i!==e||(k.optDisabled?c.disabled:null!==c.getAttribute("disabled"))||c.parentNode.disabled&&m.nodeName(c.parentNode,"optgroup"))){if(b=m(c).val(),f)return b;g.push(b)}return g},set:function(a,b){var c,d,e=a.options,f=m.makeArray(b),g=e.length;while(g--)if(d=e[g],m.inArray(m.valHooks.option.get(d),f)>=0)try{d.selected=c=!0}catch(h){d.scrollHeight}else d.selected=!1;return c||(a.selectedIndex=-1),e}}}}),m.each(["radio","checkbox"],function(){m.valHooks[this]={set:function(a,b){return m.isArray(b)?a.checked=m.inArray(m(a).val(),b)>=0:void 0}},k.checkOn||(m.valHooks[this].get=function(a){return null===a.getAttribute("value")?"on":a.value})});var mc,nc,oc=m.expr.attrHandle,pc=/^(?:checked|selected)$/i,qc=k.getSetAttribute,rc=k.input;m.fn.extend({attr:function(a,b){return V(this,m.attr,a,b,arguments.length>1)},removeAttr:function(a){return this.each(function(){m.removeAttr(this,a)})}}),m.extend({attr:function(a,b,c){var d,e,f=a.nodeType;if(a&&3!==f&&8!==f&&2!==f)return typeof a.getAttribute===K?m.prop(a,b,c):(1===f&&m.isXMLDoc(a)||(b=b.toLowerCase(),d=m.attrHooks[b]||(m.expr.match.bool.test(b)?nc:mc)),void 0===c?d&&"get"in d&&null!==(e=d.get(a,b))?e:(e=m.find.attr(a,b),null==e?void 0:e):null!==c?d&&"set"in d&&void 0!==(e=d.set(a,c,b))?e:(a.setAttribute(b,c+""),c):void m.removeAttr(a,b))},removeAttr:function(a,b){var c,d,e=0,f=b&&b.match(E);if(f&&1===a.nodeType)while(c=f[e++])d=m.propFix[c]||c,m.expr.match.bool.test(c)?rc&&qc||!pc.test(c)?a[d]=!1:a[m.camelCase("default-"+c)]=a[d]=!1:m.attr(a,c,""),a.removeAttribute(qc?c:d)},attrHooks:{type:{set:function(a,b){if(!k.radioValue&&"radio"===b&&m.nodeName(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}}}}),nc={set:function(a,b,c){return b===!1?m.removeAttr(a,c):rc&&qc||!pc.test(c)?a.setAttribute(!qc&&m.propFix[c]||c,c):a[m.camelCase("default-"+c)]=a[c]=!0,c}},m.each(m.expr.match.bool.source.match(/\w+/g),function(a,b){var c=oc[b]||m.find.attr;oc[b]=rc&&qc||!pc.test(b)?function(a,b,d){var e,f;return d||(f=oc[b],oc[b]=e,e=null!=c(a,b,d)?b.toLowerCase():null,oc[b]=f),e}:function(a,b,c){return c?void 0:a[m.camelCase("default-"+b)]?b.toLowerCase():null}}),rc&&qc||(m.attrHooks.value={set:function(a,b,c){return m.nodeName(a,"input")?void(a.defaultValue=b):mc&&mc.set(a,b,c)}}),qc||(mc={set:function(a,b,c){var d=a.getAttributeNode(c);return d||a.setAttributeNode(d=a.ownerDocument.createAttribute(c)),d.value=b+="","value"===c||b===a.getAttribute(c)?b:void 0}},oc.id=oc.name=oc.coords=function(a,b,c){var d;return c?void 0:(d=a.getAttributeNode(b))&&""!==d.value?d.value:null},m.valHooks.button={get:function(a,b){var c=a.getAttributeNode(b);return c&&c.specified?c.value:void 0},set:mc.set},m.attrHooks.contenteditable={set:function(a,b,c){mc.set(a,""===b?!1:b,c)}},m.each(["width","height"],function(a,b){m.attrHooks[b]={set:function(a,c){return""===c?(a.setAttribute(b,"auto"),c):void 0}}})),k.style||(m.attrHooks.style={get:function(a){return a.style.cssText||void 0},set:function(a,b){return a.style.cssText=b+""}});var sc=/^(?:input|select|textarea|button|object)$/i,tc=/^(?:a|area)$/i;m.fn.extend({prop:function(a,b){return V(this,m.prop,a,b,arguments.length>1)},removeProp:function(a){return a=m.propFix[a]||a,this.each(function(){try{this[a]=void 0,delete this[a]}catch(b){}})}}),m.extend({propFix:{"for":"htmlFor","class":"className"},prop:function(a,b,c){var d,e,f,g=a.nodeType;if(a&&3!==g&&8!==g&&2!==g)return f=1!==g||!m.isXMLDoc(a),f&&(b=m.propFix[b]||b,e=m.propHooks[b]),void 0!==c?e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:a[b]=c:e&&"get"in e&&null!==(d=e.get(a,b))?d:a[b]},propHooks:{tabIndex:{get:function(a){var b=m.find.attr(a,"tabindex");return b?parseInt(b,10):sc.test(a.nodeName)||tc.test(a.nodeName)&&a.href?0:-1}}}}),k.hrefNormalized||m.each(["href","src"],function(a,b){m.propHooks[b]={get:function(a){return a.getAttribute(b,4)}}}),k.optSelected||(m.propHooks.selected={get:function(a){var b=a.parentNode;return b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex),null}}),m.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){m.propFix[this.toLowerCase()]=this}),k.enctype||(m.propFix.enctype="encoding");var uc=/[\t\r\n\f]/g;m.fn.extend({addClass:function(a){var b,c,d,e,f,g,h=0,i=this.length,j="string"==typeof a&&a;if(m.isFunction(a))return this.each(function(b){m(this).addClass(a.call(this,b,this.className))});if(j)for(b=(a||"").match(E)||[];i>h;h++)if(c=this[h],d=1===c.nodeType&&(c.className?(" "+c.className+" ").replace(uc," "):" ")){f=0;while(e=b[f++])d.indexOf(" "+e+" ")<0&&(d+=e+" ");g=m.trim(d),c.className!==g&&(c.className=g)}return this},removeClass:function(a){var b,c,d,e,f,g,h=0,i=this.length,j=0===arguments.length||"string"==typeof a&&a;if(m.isFunction(a))return this.each(function(b){m(this).removeClass(a.call(this,b,this.className))});if(j)for(b=(a||"").match(E)||[];i>h;h++)if(c=this[h],d=1===c.nodeType&&(c.className?(" "+c.className+" ").replace(uc," "):"")){f=0;while(e=b[f++])while(d.indexOf(" "+e+" ")>=0)d=d.replace(" "+e+" "," ");g=a?m.trim(d):"",c.className!==g&&(c.className=g)}return this},toggleClass:function(a,b){var c=typeof a;return"boolean"==typeof b&&"string"===c?b?this.addClass(a):this.removeClass(a):this.each(m.isFunction(a)?function(c){m(this).toggleClass(a.call(this,c,this.className,b),b)}:function(){if("string"===c){var b,d=0,e=m(this),f=a.match(E)||[];while(b=f[d++])e.hasClass(b)?e.removeClass(b):e.addClass(b)}else(c===K||"boolean"===c)&&(this.className&&m._data(this,"__className__",this.className),this.className=this.className||a===!1?"":m._data(this,"__className__")||"")})},hasClass:function(a){for(var b=" "+a+" ",c=0,d=this.length;d>c;c++)if(1===this[c].nodeType&&(" "+this[c].className+" ").replace(uc," ").indexOf(b)>=0)return!0;return!1}}),m.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(a,b){m.fn[b]=function(a,c){return arguments.length>0?this.on(b,null,a,c):this.trigger(b)}}),m.fn.extend({hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)},bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return 1===arguments.length?this.off(a,"**"):this.off(b,a||"**",c)}});var vc=m.now(),wc=/\?/,xc=/(,)|(\[|{)|(}|])|"(?:[^"\\\r\n]|\\["\\\/bfnrt]|\\u[\da-fA-F]{4})*"\s*:?|true|false|null|-?(?!0\d)\d+(?:\.\d+|)(?:[eE][+-]?\d+|)/g;m.parseJSON=function(b){if(a.JSON&&a.JSON.parse)return a.JSON.parse(b+"");var c,d=null,e=m.trim(b+"");return e&&!m.trim(e.replace(xc,function(a,b,e,f){return c&&b&&(d=0),0===d?a:(c=e||b,d+=!f-!e,"")}))?Function("return "+e)():m.error("Invalid JSON: "+b)},m.parseXML=function(b){var c,d;if(!b||"string"!=typeof b)return null;try{a.DOMParser?(d=new DOMParser,c=d.parseFromString(b,"text/xml")):(c=new ActiveXObject("Microsoft.XMLDOM"),c.async="false",c.loadXML(b))}catch(e){c=void 0}return c&&c.documentElement&&!c.getElementsByTagName("parsererror").length||m.error("Invalid XML: "+b),c};var yc,zc,Ac=/#.*$/,Bc=/([?&])_=[^&]*/,Cc=/^(.*?):[ \t]*([^\r\n]*)\r?$/gm,Dc=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,Ec=/^(?:GET|HEAD)$/,Fc=/^\/\//,Gc=/^([\w.+-]+:)(?:\/\/(?:[^\/?#]*@|)([^\/?#:]*)(?::(\d+)|)|)/,Hc={},Ic={},Jc="*/".concat("*");try{zc=location.href}catch(Kc){zc=y.createElement("a"),zc.href="",zc=zc.href}yc=Gc.exec(zc.toLowerCase())||[];function Lc(a){return function(b,c){"string"!=typeof b&&(c=b,b="*");var d,e=0,f=b.toLowerCase().match(E)||[];if(m.isFunction(c))while(d=f[e++])"+"===d.charAt(0)?(d=d.slice(1)||"*",(a[d]=a[d]||[]).unshift(c)):(a[d]=a[d]||[]).push(c)}}function Mc(a,b,c,d){var e={},f=a===Ic;function g(h){var i;return e[h]=!0,m.each(a[h]||[],function(a,h){var j=h(b,c,d);return"string"!=typeof j||f||e[j]?f?!(i=j):void 0:(b.dataTypes.unshift(j),g(j),!1)}),i}return g(b.dataTypes[0])||!e["*"]&&g("*")}function Nc(a,b){var c,d,e=m.ajaxSettings.flatOptions||{};for(d in b)void 0!==b[d]&&((e[d]?a:c||(c={}))[d]=b[d]);return c&&m.extend(!0,a,c),a}function Oc(a,b,c){var d,e,f,g,h=a.contents,i=a.dataTypes;while("*"===i[0])i.shift(),void 0===e&&(e=a.mimeType||b.getResponseHeader("Content-Type"));if(e)for(g in h)if(h[g]&&h[g].test(e)){i.unshift(g);break}if(i[0]in c)f=i[0];else{for(g in c){if(!i[0]||a.converters[g+" "+i[0]]){f=g;break}d||(d=g)}f=f||d}return f?(f!==i[0]&&i.unshift(f),c[f]):void 0}function Pc(a,b,c,d){var e,f,g,h,i,j={},k=a.dataTypes.slice();if(k[1])for(g in a.converters)j[g.toLowerCase()]=a.converters[g];f=k.shift();while(f)if(a.responseFields[f]&&(c[a.responseFields[f]]=b),!i&&d&&a.dataFilter&&(b=a.dataFilter(b,a.dataType)),i=f,f=k.shift())if("*"===f)f=i;else if("*"!==i&&i!==f){if(g=j[i+" "+f]||j["* "+f],!g)for(e in j)if(h=e.split(" "),h[1]===f&&(g=j[i+" "+h[0]]||j["* "+h[0]])){g===!0?g=j[e]:j[e]!==!0&&(f=h[0],k.unshift(h[1]));break}if(g!==!0)if(g&&a["throws"])b=g(b);else try{b=g(b)}catch(l){return{state:"parsererror",error:g?l:"No conversion from "+i+" to "+f}}}return{state:"success",data:b}}m.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:zc,type:"GET",isLocal:Dc.test(yc[1]),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":Jc,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":m.parseJSON,"text xml":m.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?Nc(Nc(a,m.ajaxSettings),b):Nc(m.ajaxSettings,a)},ajaxPrefilter:Lc(Hc),ajaxTransport:Lc(Ic),ajax:function(a,b){"object"==typeof a&&(b=a,a=void 0),b=b||{};var c,d,e,f,g,h,i,j,k=m.ajaxSetup({},b),l=k.context||k,n=k.context&&(l.nodeType||l.jquery)?m(l):m.event,o=m.Deferred(),p=m.Callbacks("once memory"),q=k.statusCode||{},r={},s={},t=0,u="canceled",v={readyState:0,getResponseHeader:function(a){var b;if(2===t){if(!j){j={};while(b=Cc.exec(f))j[b[1].toLowerCase()]=b[2]}b=j[a.toLowerCase()]}return null==b?null:b},getAllResponseHeaders:function(){return 2===t?f:null},setRequestHeader:function(a,b){var c=a.toLowerCase();return t||(a=s[c]=s[c]||a,r[a]=b),this},overrideMimeType:function(a){return t||(k.mimeType=a),this},statusCode:function(a){var b;if(a)if(2>t)for(b in a)q[b]=[q[b],a[b]];else v.always(a[v.status]);return this},abort:function(a){var b=a||u;return i&&i.abort(b),x(0,b),this}};if(o.promise(v).complete=p.add,v.success=v.done,v.error=v.fail,k.url=((a||k.url||zc)+"").replace(Ac,"").replace(Fc,yc[1]+"//"),k.type=b.method||b.type||k.method||k.type,k.dataTypes=m.trim(k.dataType||"*").toLowerCase().match(E)||[""],null==k.crossDomain&&(c=Gc.exec(k.url.toLowerCase()),k.crossDomain=!(!c||c[1]===yc[1]&&c[2]===yc[2]&&(c[3]||("http:"===c[1]?"80":"443"))===(yc[3]||("http:"===yc[1]?"80":"443")))),k.data&&k.processData&&"string"!=typeof k.data&&(k.data=m.param(k.data,k.traditional)),Mc(Hc,k,b,v),2===t)return v;h=k.global,h&&0===m.active++&&m.event.trigger("ajaxStart"),k.type=k.type.toUpperCase(),k.hasContent=!Ec.test(k.type),e=k.url,k.hasContent||(k.data&&(e=k.url+=(wc.test(e)?"&":"?")+k.data,delete k.data),k.cache===!1&&(k.url=Bc.test(e)?e.replace(Bc,"$1_="+vc++):e+(wc.test(e)?"&":"?")+"_="+vc++)),k.ifModified&&(m.lastModified[e]&&v.setRequestHeader("If-Modified-Since",m.lastModified[e]),m.etag[e]&&v.setRequestHeader("If-None-Match",m.etag[e])),(k.data&&k.hasContent&&k.contentType!==!1||b.contentType)&&v.setRequestHeader("Content-Type",k.contentType),v.setRequestHeader("Accept",k.dataTypes[0]&&k.accepts[k.dataTypes[0]]?k.accepts[k.dataTypes[0]]+("*"!==k.dataTypes[0]?", "+Jc+"; q=0.01":""):k.accepts["*"]);for(d in k.headers)v.setRequestHeader(d,k.headers[d]);if(k.beforeSend&&(k.beforeSend.call(l,v,k)===!1||2===t))return v.abort();u="abort";for(d in{success:1,error:1,complete:1})v[d](k[d]);if(i=Mc(Ic,k,b,v)){v.readyState=1,h&&n.trigger("ajaxSend",[v,k]),k.async&&k.timeout>0&&(g=setTimeout(function(){v.abort("timeout")},k.timeout));try{t=1,i.send(r,x)}catch(w){if(!(2>t))throw w;x(-1,w)}}else x(-1,"No Transport");function x(a,b,c,d){var j,r,s,u,w,x=b;2!==t&&(t=2,g&&clearTimeout(g),i=void 0,f=d||"",v.readyState=a>0?4:0,j=a>=200&&300>a||304===a,c&&(u=Oc(k,v,c)),u=Pc(k,u,v,j),j?(k.ifModified&&(w=v.getResponseHeader("Last-Modified"),w&&(m.lastModified[e]=w),w=v.getResponseHeader("etag"),w&&(m.etag[e]=w)),204===a||"HEAD"===k.type?x="nocontent":304===a?x="notmodified":(x=u.state,r=u.data,s=u.error,j=!s)):(s=x,(a||!x)&&(x="error",0>a&&(a=0))),v.status=a,v.statusText=(b||x)+"",j?o.resolveWith(l,[r,x,v]):o.rejectWith(l,[v,x,s]),v.statusCode(q),q=void 0,h&&n.trigger(j?"ajaxSuccess":"ajaxError",[v,k,j?r:s]),p.fireWith(l,[v,x]),h&&(n.trigger("ajaxComplete",[v,k]),--m.active||m.event.trigger("ajaxStop")))}return v},getJSON:function(a,b,c){return m.get(a,b,c,"json")},getScript:function(a,b){return m.get(a,void 0,b,"script")}}),m.each(["get","post"],function(a,b){m[b]=function(a,c,d,e){return m.isFunction(c)&&(e=e||d,d=c,c=void 0),m.ajax({url:a,type:b,dataType:e,data:c,success:d})}}),m.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(a,b){m.fn[b]=function(a){return this.on(b,a)}}),m._evalUrl=function(a){return m.ajax({url:a,type:"GET",dataType:"script",async:!1,global:!1,"throws":!0})},m.fn.extend({wrapAll:function(a){if(m.isFunction(a))return this.each(function(b){m(this).wrapAll(a.call(this,b))});if(this[0]){var b=m(a,this[0].ownerDocument).eq(0).clone(!0);this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstChild&&1===a.firstChild.nodeType)a=a.firstChild;return a}).append(this)}return this},wrapInner:function(a){return this.each(m.isFunction(a)?function(b){m(this).wrapInner(a.call(this,b))}:function(){var b=m(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=m.isFunction(a);return this.each(function(c){m(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){m.nodeName(this,"body")||m(this).replaceWith(this.childNodes)}).end()}}),m.expr.filters.hidden=function(a){return a.offsetWidth<=0&&a.offsetHeight<=0||!k.reliableHiddenOffsets()&&"none"===(a.style&&a.style.display||m.css(a,"display"))},m.expr.filters.visible=function(a){return!m.expr.filters.hidden(a)};var Qc=/%20/g,Rc=/\[\]$/,Sc=/\r?\n/g,Tc=/^(?:submit|button|image|reset|file)$/i,Uc=/^(?:input|select|textarea|keygen)/i;function Vc(a,b,c,d){var e;if(m.isArray(b))m.each(b,function(b,e){c||Rc.test(a)?d(a,e):Vc(a+"["+("object"==typeof e?b:"")+"]",e,c,d)});else if(c||"object"!==m.type(b))d(a,b);else for(e in b)Vc(a+"["+e+"]",b[e],c,d)}m.param=function(a,b){var c,d=[],e=function(a,b){b=m.isFunction(b)?b():null==b?"":b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};if(void 0===b&&(b=m.ajaxSettings&&m.ajaxSettings.traditional),m.isArray(a)||a.jquery&&!m.isPlainObject(a))m.each(a,function(){e(this.name,this.value)});else for(c in a)Vc(c,a[c],b,e);return d.join("&").replace(Qc,"+")},m.fn.extend({serialize:function(){return m.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var a=m.prop(this,"elements");return a?m.makeArray(a):this}).filter(function(){var a=this.type;return this.name&&!m(this).is(":disabled")&&Uc.test(this.nodeName)&&!Tc.test(a)&&(this.checked||!W.test(a))}).map(function(a,b){var c=m(this).val();return null==c?null:m.isArray(c)?m.map(c,function(a){return{name:b.name,value:a.replace(Sc,"\r\n")}}):{name:b.name,value:c.replace(Sc,"\r\n")}}).get()}}),m.ajaxSettings.xhr=void 0!==a.ActiveXObject?function(){return!this.isLocal&&/^(get|post|head|put|delete|options)$/i.test(this.type)&&Zc()||$c()}:Zc;var Wc=0,Xc={},Yc=m.ajaxSettings.xhr();a.ActiveXObject&&m(a).on("unload",function(){for(var a in Xc)Xc[a](void 0,!0)}),k.cors=!!Yc&&"withCredentials"in Yc,Yc=k.ajax=!!Yc,Yc&&m.ajaxTransport(function(a){if(!a.crossDomain||k.cors){var b;return{send:function(c,d){var e,f=a.xhr(),g=++Wc;if(f.open(a.type,a.url,a.async,a.username,a.password),a.xhrFields)for(e in a.xhrFields)f[e]=a.xhrFields[e];a.mimeType&&f.overrideMimeType&&f.overrideMimeType(a.mimeType),a.crossDomain||c["X-Requested-With"]||(c["X-Requested-With"]="XMLHttpRequest");for(e in c)void 0!==c[e]&&f.setRequestHeader(e,c[e]+"");f.send(a.hasContent&&a.data||null),b=function(c,e){var h,i,j;if(b&&(e||4===f.readyState))if(delete Xc[g],b=void 0,f.onreadystatechange=m.noop,e)4!==f.readyState&&f.abort();else{j={},h=f.status,"string"==typeof f.responseText&&(j.text=f.responseText);try{i=f.statusText}catch(k){i=""}h||!a.isLocal||a.crossDomain?1223===h&&(h=204):h=j.text?200:404}j&&d(h,i,j,f.getAllResponseHeaders())},a.async?4===f.readyState?setTimeout(b):f.onreadystatechange=Xc[g]=b:b()},abort:function(){b&&b(void 0,!0)}}}});function Zc(){try{return new a.XMLHttpRequest}catch(b){}}function $c(){try{return new a.ActiveXObject("Microsoft.XMLHTTP")}catch(b){}}m.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/(?:java|ecma)script/},converters:{"text script":function(a){return m.globalEval(a),a}}}),m.ajaxPrefilter("script",function(a){void 0===a.cache&&(a.cache=!1),a.crossDomain&&(a.type="GET",a.global=!1)}),m.ajaxTransport("script",function(a){if(a.crossDomain){var b,c=y.head||m("head")[0]||y.documentElement;return{send:function(d,e){b=y.createElement("script"),b.async=!0,a.scriptCharset&&(b.charset=a.scriptCharset),b.src=a.url,b.onload=b.onreadystatechange=function(a,c){(c||!b.readyState||/loaded|complete/.test(b.readyState))&&(b.onload=b.onreadystatechange=null,b.parentNode&&b.parentNode.removeChild(b),b=null,c||e(200,"success"))},c.insertBefore(b,c.firstChild)},abort:function(){b&&b.onload(void 0,!0)}}}});var _c=[],ad=/(=)\?(?=&|$)|\?\?/;m.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var a=_c.pop()||m.expando+"_"+vc++;return this[a]=!0,a}}),m.ajaxPrefilter("json jsonp",function(b,c,d){var e,f,g,h=b.jsonp!==!1&&(ad.test(b.url)?"url":"string"==typeof b.data&&!(b.contentType||"").indexOf("application/x-www-form-urlencoded")&&ad.test(b.data)&&"data");return h||"jsonp"===b.dataTypes[0]?(e=b.jsonpCallback=m.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,h?b[h]=b[h].replace(ad,"$1"+e):b.jsonp!==!1&&(b.url+=(wc.test(b.url)?"&":"?")+b.jsonp+"="+e),b.converters["script json"]=function(){return g||m.error(e+" was not called"),g[0]},b.dataTypes[0]="json",f=a[e],a[e]=function(){g=arguments},d.always(function(){a[e]=f,b[e]&&(b.jsonpCallback=c.jsonpCallback,_c.push(e)),g&&m.isFunction(f)&&f(g[0]),g=f=void 0}),"script"):void 0}),m.parseHTML=function(a,b,c){if(!a||"string"!=typeof a)return null;"boolean"==typeof b&&(c=b,b=!1),b=b||y;var d=u.exec(a),e=!c&&[];return d?[b.createElement(d[1])]:(d=m.buildFragment([a],b,e),e&&e.length&&m(e).remove(),m.merge([],d.childNodes))};var bd=m.fn.load;m.fn.load=function(a,b,c){if("string"!=typeof a&&bd)return bd.apply(this,arguments);var d,e,f,g=this,h=a.indexOf(" ");return h>=0&&(d=m.trim(a.slice(h,a.length)),a=a.slice(0,h)),m.isFunction(b)?(c=b,b=void 0):b&&"object"==typeof b&&(f="POST"),g.length>0&&m.ajax({url:a,type:f,dataType:"html",data:b}).done(function(a){e=arguments,g.html(d?m("<div>").append(m.parseHTML(a)).find(d):a)}).complete(c&&function(a,b){g.each(c,e||[a.responseText,b,a])}),this},m.expr.filters.animated=function(a){return m.grep(m.timers,function(b){return a===b.elem}).length};var cd=a.document.documentElement;function dd(a){return m.isWindow(a)?a:9===a.nodeType?a.defaultView||a.parentWindow:!1}m.offset={setOffset:function(a,b,c){var d,e,f,g,h,i,j,k=m.css(a,"position"),l=m(a),n={};"static"===k&&(a.style.position="relative"),h=l.offset(),f=m.css(a,"top"),i=m.css(a,"left"),j=("absolute"===k||"fixed"===k)&&m.inArray("auto",[f,i])>-1,j?(d=l.position(),g=d.top,e=d.left):(g=parseFloat(f)||0,e=parseFloat(i)||0),m.isFunction(b)&&(b=b.call(a,c,h)),null!=b.top&&(n.top=b.top-h.top+g),null!=b.left&&(n.left=b.left-h.left+e),"using"in b?b.using.call(a,n):l.css(n)}},m.fn.extend({offset:function(a){if(arguments.length)return void 0===a?this:this.each(function(b){m.offset.setOffset(this,a,b)});var b,c,d={top:0,left:0},e=this[0],f=e&&e.ownerDocument;if(f)return b=f.documentElement,m.contains(b,e)?(typeof e.getBoundingClientRect!==K&&(d=e.getBoundingClientRect()),c=dd(f),{top:d.top+(c.pageYOffset||b.scrollTop)-(b.clientTop||0),left:d.left+(c.pageXOffset||b.scrollLeft)-(b.clientLeft||0)}):d},position:function(){if(this[0]){var a,b,c={top:0,left:0},d=this[0];return"fixed"===m.css(d,"position")?b=d.getBoundingClientRect():(a=this.offsetParent(),b=this.offset(),m.nodeName(a[0],"html")||(c=a.offset()),c.top+=m.css(a[0],"borderTopWidth",!0),c.left+=m.css(a[0],"borderLeftWidth",!0)),{top:b.top-c.top-m.css(d,"marginTop",!0),left:b.left-c.left-m.css(d,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||cd;while(a&&!m.nodeName(a,"html")&&"static"===m.css(a,"position"))a=a.offsetParent;return a||cd})}}),m.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,b){var c=/Y/.test(b);m.fn[a]=function(d){return V(this,function(a,d,e){var f=dd(a);return void 0===e?f?b in f?f[b]:f.document.documentElement[d]:a[d]:void(f?f.scrollTo(c?m(f).scrollLeft():e,c?e:m(f).scrollTop()):a[d]=e)},a,d,arguments.length,null)}}),m.each(["top","left"],function(a,b){m.cssHooks[b]=Lb(k.pixelPosition,function(a,c){return c?(c=Jb(a,b),Hb.test(c)?m(a).position()[b]+"px":c):void 0})}),m.each({Height:"height",Width:"width"},function(a,b){m.each({padding:"inner"+a,content:b,"":"outer"+a},function(c,d){m.fn[d]=function(d,e){var f=arguments.length&&(c||"boolean"!=typeof d),g=c||(d===!0||e===!0?"margin":"border");return V(this,function(b,c,d){var e;return m.isWindow(b)?b.document.documentElement["client"+a]:9===b.nodeType?(e=b.documentElement,Math.max(b.body["scroll"+a],e["scroll"+a],b.body["offset"+a],e["offset"+a],e["client"+a])):void 0===d?m.css(b,c,g):m.style(b,c,d,g)},b,f?d:void 0,f,null)}})}),m.fn.size=function(){return this.length},m.fn.andSelf=m.fn.addBack,"function"==typeof define&&define.amd&&define("jquery",[],function(){return m});var ed=a.jQuery,fd=a.$;return m.noConflict=function(b){return a.$===m&&(a.$=fd),b&&a.jQuery===m&&(a.jQuery=ed),m},typeof b===K&&(a.jQuery=a.$=m),m});
diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
index 7abb9011ccf36..dbacbf19beee5 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
@@ -81,15 +81,15 @@ sorttable = {
       if (!headrow[i].className.match(/\bsorttable_nosort\b/)) { // skip this col
         mtch = headrow[i].className.match(/\bsorttable_([a-z0-9]+)\b/);
         if (mtch) { override = mtch[1]; }
-	      if (mtch && typeof sorttable["sort_"+override] == 'function') {
-	        headrow[i].sorttable_sortfunction = sorttable["sort_"+override];
-	      } else {
-	        headrow[i].sorttable_sortfunction = sorttable.guessType(table,i);
-	      }
-	      // make it clickable to sort
-	      headrow[i].sorttable_columnindex = i;
-	      headrow[i].sorttable_tbody = table.tBodies[0];
-	      dean_addEvent(headrow[i],"click", function(e) {
+        if (mtch && typeof sorttable["sort_"+override] == 'function') {
+          headrow[i].sorttable_sortfunction = sorttable["sort_"+override];
+        } else {
+          headrow[i].sorttable_sortfunction = sorttable.guessType(table,i);
+        }
+        // make it clickable to sort
+        headrow[i].sorttable_columnindex = i;
+        headrow[i].sorttable_tbody = table.tBodies[0];
+        dean_addEvent(headrow[i],"click", function(e) {
 
           if (this.className.search(/\bsorttable_sorted\b/) != -1) {
             // if we're already sorted by this column, just 
@@ -109,7 +109,7 @@ sorttable = {
             // re-reverse the table, which is quicker
             sorttable.reverse(this.sorttable_tbody);
             this.className = this.className.replace('sorttable_sorted_reverse',
-                                                    'sorttable_sorted');
+                                                  'sorttable_sorted');
             this.removeChild(document.getElementById('sorttable_sortrevind'));
             sortfwdind = document.createElement('span');
             sortfwdind.id = "sorttable_sortfwdind";
@@ -117,7 +117,7 @@ sorttable = {
             this.appendChild(sortfwdind);
             return;
           }
-          
+
           // remove sorttable_sorted classes
           theadrow = this.parentNode;
           forEach(theadrow.childNodes, function(cell) {
@@ -130,36 +130,36 @@ sorttable = {
           if (sortfwdind) { sortfwdind.parentNode.removeChild(sortfwdind); }
           sortrevind = document.getElementById('sorttable_sortrevind');
           if (sortrevind) { sortrevind.parentNode.removeChild(sortrevind); }
-          
+
           this.className += ' sorttable_sorted';
           sortfwdind = document.createElement('span');
           sortfwdind.id = "sorttable_sortfwdind";
           sortfwdind.innerHTML = stIsIE ? '&nbsp<font face="webdings">6</font>' : '&nbsp;&#x25BE;';
           this.appendChild(sortfwdind);
 
-	        // build an array to sort. This is a Schwartzian transform thing,
-	        // i.e., we "decorate" each row with the actual sort key,
-	        // sort based on the sort keys, and then put the rows back in order
-	        // which is a lot faster because you only do getInnerText once per row
-	        row_array = [];
-	        col = this.sorttable_columnindex;
-	        rows = this.sorttable_tbody.rows;
-	        for (var j=0; j<rows.length; j++) {
-	          row_array[row_array.length] = [sorttable.getInnerText(rows[j].cells[col]), rows[j]];
-	        }
-	        /* If you want a stable sort, uncomment the following line */
-	        //sorttable.shaker_sort(row_array, this.sorttable_sortfunction);
-	        /* and comment out this one */
-	        row_array.sort(this.sorttable_sortfunction);
-	        
-	        tb = this.sorttable_tbody;
-	        for (var j=0; j<row_array.length; j++) {
-	          tb.appendChild(row_array[j][1]);
-	        }
-	        
-	        delete row_array;
-	      });
-	    }
+          // build an array to sort. This is a Schwartzian transform thing,
+          // i.e., we "decorate" each row with the actual sort key,
+          // sort based on the sort keys, and then put the rows back in order
+          // which is a lot faster because you only do getInnerText once per row
+          row_array = [];
+          col = this.sorttable_columnindex;
+          rows = this.sorttable_tbody.rows;
+          for (var j=0; j<rows.length; j++) {
+            row_array[row_array.length] = [sorttable.getInnerText(rows[j].cells[col]), rows[j]];
+          }
+          /* If you want a stable sort, uncomment the following line */
+          //sorttable.shaker_sort(row_array, this.sorttable_sortfunction);
+          /* and comment out this one */
+          row_array.sort(this.sorttable_sortfunction);
+        
+          tb = this.sorttable_tbody;
+          for (var j=0; j<row_array.length; j++) {
+            tb.appendChild(row_array[j][1]);
+          }
+        
+          delete row_array;
+        });
+      }
     }
   },
   
@@ -310,25 +310,24 @@ sorttable = {
     var swap = true;
 
     while(swap) {
-        swap = false;
-        for(var i = b; i < t; ++i) {
-            if ( comp_func(list[i], list[i+1]) > 0 ) {
-                var q = list[i]; list[i] = list[i+1]; list[i+1] = q;
-                swap = true;
-            }
-        } // for
-        t--;
+      swap = false;
+      for(var i = b; i < t; ++i) {
+          if ( comp_func(list[i], list[i+1]) > 0 ) {
+              var q = list[i]; list[i] = list[i+1]; list[i+1] = q;
+              swap = true;
+          }
+      } // for
+      t--;
 
-        if (!swap) break;
-
-        for(var i = t; i > b; --i) {
-            if ( comp_func(list[i], list[i-1]) < 0 ) {
-                var q = list[i]; list[i] = list[i-1]; list[i-1] = q;
-                swap = true;
-            }
-        } // for
-        b++;
+      if (!swap) break;
 
+      for(var i = t; i > b; --i) {
+        if ( comp_func(list[i], list[i-1]) < 0 ) {
+          var q = list[i]; list[i] = list[i-1]; list[i-1] = q;
+          swap = true;
+          }
+      } // for
+      b++;
     } // while(swap)
   }  
 }
@@ -358,11 +357,11 @@ if (document.addEventListener) {
 
 /* for Safari */
 if (/WebKit/i.test(navigator.userAgent)) { // sniff
-    var _timer = setInterval(function() {
-        if (/loaded|complete/.test(document.readyState)) {
-            sorttable.init(); // call the onload handler
-        }
-    }, 10);
+  var _timer = setInterval(function() {
+    if (/loaded|complete/.test(document.readyState)) {
+      sorttable.init(); // call the onload handler
+    }
+  }, 10);
 }
 
 /* for other browsers */
@@ -374,66 +373,66 @@ window.onload = sorttable.init;
 // http://dean.edwards.name/weblog/2005/10/add-event/
 
 function dean_addEvent(element, type, handler) {
-	if (element.addEventListener) {
-		element.addEventListener(type, handler, false);
-	} else {
-		// assign each event handler a unique ID
-		if (!handler.$$guid) handler.$$guid = dean_addEvent.guid++;
-		// create a hash table of event types for the element
-		if (!element.events) element.events = {};
-		// create a hash table of event handlers for each element/event pair
-		var handlers = element.events[type];
-		if (!handlers) {
-			handlers = element.events[type] = {};
-			// store the existing event handler (if there is one)
-			if (element["on" + type]) {
-				handlers[0] = element["on" + type];
-			}
-		}
-		// store the event handler in the hash table
-		handlers[handler.$$guid] = handler;
-		// assign a global event handler to do all the work
-		element["on" + type] = handleEvent;
-	}
+  if (element.addEventListener) {
+    element.addEventListener(type, handler, false);
+  } else {
+    // assign each event handler a unique ID
+    if (!handler.$$guid) handler.$$guid = dean_addEvent.guid++;
+    // create a hash table of event types for the element
+    if (!element.events) element.events = {};
+    // create a hash table of event handlers for each element/event pair
+    var handlers = element.events[type];
+    if (!handlers) {
+      handlers = element.events[type] = {};
+      // store the existing event handler (if there is one)
+      if (element["on" + type]) {
+        handlers[0] = element["on" + type];
+      }
+    }
+    // store the event handler in the hash table
+    handlers[handler.$$guid] = handler;
+    // assign a global event handler to do all the work
+   element["on" + type] = handleEvent;
+    }
 };
 // a counter used to create unique IDs
 dean_addEvent.guid = 1;
 
 function removeEvent(element, type, handler) {
-	if (element.removeEventListener) {
-		element.removeEventListener(type, handler, false);
-	} else {
-		// delete the event handler from the hash table
-		if (element.events && element.events[type]) {
-			delete element.events[type][handler.$$guid];
-		}
-	}
+  if (element.removeEventListener) {
+  element.removeEventListener(type, handler, false);
+  } else {
+    // delete the event handler from the hash table
+    if (element.events && element.events[type]) {
+      delete element.events[type][handler.$$guid];
+    }
+  }
 };
 
 function handleEvent(event) {
-	var returnValue = true;
-	// grab the event object (IE uses a global event object)
-	event = event || fixEvent(((this.ownerDocument || this.document || this).parentWindow || window).event);
-	// get a reference to the hash table of event handlers
-	var handlers = this.events[event.type];
-	// execute each event handler
-	for (var i in handlers) {
-		this.$$handleEvent = handlers[i];
-		if (this.$$handleEvent(event) === false) {
-			returnValue = false;
-		}
-	}
-	return returnValue;
+  var returnValue = true;
+  // grab the event object (IE uses a global event object)
+  event = event || fixEvent(((this.ownerDocument || this.document || this).parentWindow || window).event);
+  // get a reference to the hash table of event handlers
+  var handlers = this.events[event.type];
+  // execute each event handler
+  for (var i in handlers) {
+    this.$$handleEvent = handlers[i];
+    if (this.$$handleEvent(event) === false) {
+      returnValue = false;
+    }
+  }
+  return returnValue;
 };
 
 function fixEvent(event) {
-	// add W3C standard event methods
-	event.preventDefault = fixEvent.preventDefault;
-	event.stopPropagation = fixEvent.stopPropagation;
-	return event;
+  // add W3C standard event methods
+  event.preventDefault = fixEvent.preventDefault;
+  event.stopPropagation = fixEvent.stopPropagation;
+  return event;
 };
 fixEvent.preventDefault = function() {
-	this.returnValue = false;
+  this.returnValue = false;
 };
 fixEvent.stopPropagation = function() {
   this.cancelBubble = true;
@@ -441,55 +440,55 @@ fixEvent.stopPropagation = function() {
 
 // Dean's forEach: http://dean.edwards.name/base/forEach.js
 /*
-	forEach, version 1.0
-	Copyright 2006, Dean Edwards
-	License: http://www.opensource.org/licenses/mit-license.php
+forEach, version 1.0
+Copyright 2006, Dean Edwards
+License: http://www.opensource.org/licenses/mit-license.php
 */
 
 // array-like enumeration
 if (!Array.forEach) { // mozilla already supports this
-	Array.forEach = function(array, block, context) {
-		for (var i = 0; i < array.length; i++) {
-			block.call(context, array[i], i, array);
-		}
-	};
+  Array.forEach = function(array, block, context) {
+    for (var i = 0; i < array.length; i++) {
+      block.call(context, array[i], i, array);
+    }
+  };
 }
 
 // generic enumeration
 Function.prototype.forEach = function(object, block, context) {
-	for (var key in object) {
-		if (typeof this.prototype[key] == "undefined") {
-			block.call(context, object[key], key, object);
-		}
-	}
+  for (var key in object) {
+    if (typeof this.prototype[key] == "undefined") {
+      block.call(context, object[key], key, object);
+    }
+  }
 };
 
 // character enumeration
 String.forEach = function(string, block, context) {
-	Array.forEach(string.split(""), function(chr, index) {
-		block.call(context, chr, index, string);
-	});
+  Array.forEach(string.split(""), function(chr, index) {
+    block.call(context, chr, index, string);
+  });
 };
 
 // globally resolve forEach enumeration
 var forEach = function(object, block, context) {
-	if (object) {
-		var resolve = Object; // default
-		if (object instanceof Function) {
-			// functions have a "length" property
-			resolve = Function;
-		} else if (object.forEach instanceof Function) {
-			// the object implements a custom forEach method so use that
-			object.forEach(block, context);
-			return;
-		} else if (typeof object == "string") {
-			// the object is a string
-			resolve = String;
-		} else if (typeof object.length == "number") {
-			// the object is array-like
-			resolve = Array;
-		}
-		resolve.forEach(object, block, context);
-	}
+  if (object) {
+    var resolve = Object; // default
+    if (object instanceof Function) {
+    // functions have a "length" property
+    resolve = Function;
+  } else if (object.forEach instanceof Function) {
+    // the object implements a custom forEach method so use that
+    object.forEach(block, context);
+    return;
+  } else if (typeof object == "string") {
+    // the object is a string
+    resolve = String;
+  } else if (typeof object.length == "number") {
+    // the object is array-like
+    resolve = Array;
+  }
+  resolve.forEach(object, block, context);
+  }
 };
 
diff --git a/core/src/main/resources/org/apache/spark/ui/static/table.js b/core/src/main/resources/org/apache/spark/ui/static/table.js
new file mode 100644
index 0000000000000..6bb03015abb51
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/table.js
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Adds background colors to stripe table rows. This is necessary (instead of using css or the
+ * table striping provided by bootstrap) to appropriately stripe tables with hidden rows. */
+function stripeTables() {
+    $("table.table-striped-custom").each(function() {
+        $(this).find("tr:not(:hidden)").each(function (index) {
+           if (index % 2 == 1) {
+             $(this).css("background-color", "#f9f9f9");
+           } else {
+             $(this).css("background-color", "#ffffff");
+           }
+        });
+    });
+}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
index 11fd956bfbe66..db57712c83503 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -51,6 +51,11 @@ table.sortable thead {
   cursor: pointer;
 }
 
+table.sortable td {
+  word-wrap: break-word;
+  max-width: 600px;
+}
+
 .progress {
   margin-bottom: 0px; position: relative
 }
@@ -81,7 +86,9 @@ table.sortable thead {
 
 span.kill-link {
   margin-right: 2px;
+  margin-left: 20px;
   color: gray;
+  float: right;
 }
 
 span.kill-link a {
@@ -112,3 +119,52 @@ pre {
   padding-bottom: 0;
   border: none;
 }
+
+.stacktrace-details {
+  max-height: 300px;
+  overflow-y: auto;
+  margin: 0;
+  transition: max-height 0.5s ease-out, padding 0.5s ease-out;
+}
+
+.stacktrace-details.collapsed {
+  max-height: 0;
+  padding-top: 0;
+  padding-bottom: 0;
+  border: none;
+}
+
+span.expand-additional-metrics {
+  cursor: pointer;
+}
+
+span.additional-metric-title {
+  cursor: pointer;
+}
+
+.additional-metrics.collapsed {
+  display: none;
+}
+
+.tooltip {
+ font-weight: normal;
+}
+
+.arrow-open {
+  width: 0;
+  height: 0;
+  border-left: 5px solid transparent;
+  border-right: 5px solid transparent;
+  border-top: 5px solid black;
+  float: left;
+  margin-top: 6px;
+}
+
+.arrow-closed {
+  width: 0;
+  height: 0;
+  border-top: 5px solid transparent;
+  border-bottom: 5px solid transparent;
+  border-left: 5px solid black;
+  display: inline-block;
+}
diff --git a/core/src/main/scala/org/apache/spark/Accumulators.scala b/core/src/main/scala/org/apache/spark/Accumulators.scala
index 9c55bfbb47626..2301caafb07ff 100644
--- a/core/src/main/scala/org/apache/spark/Accumulators.scala
+++ b/core/src/main/scala/org/apache/spark/Accumulators.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable.Map
 import scala.reflect.ClassTag
 
 import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.util.Utils
 
 /**
  * A data type that can be accumulated, ie has an commutative and associative "add" operation,
@@ -36,15 +37,21 @@ import org.apache.spark.serializer.JavaSerializer
  *
  * @param initialValue initial value of accumulator
  * @param param helper object defining how to add elements of type `R` and `T`
+ * @param name human-readable name for use in Spark's web UI
  * @tparam R the full accumulated data (result type)
  * @tparam T partial data that can be added in
  */
 class Accumulable[R, T] (
     @transient initialValue: R,
-    param: AccumulableParam[R, T])
+    param: AccumulableParam[R, T],
+    val name: Option[String])
   extends Serializable {
 
-  val id = Accumulators.newId
+  def this(@transient initialValue: R, param: AccumulableParam[R, T]) =
+    this(initialValue, param, None)
+
+  val id: Long = Accumulators.newId
+
   @transient private var value_ = initialValue // Current value on master
   val zero = param.zero(initialValue)  // Zero value to be passed to workers
   private var deserialized = false
@@ -120,7 +127,7 @@ class Accumulable[R, T] (
   }
 
   // Called by Java when deserializing an object
-  private def readObject(in: ObjectInputStream) {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
     in.defaultReadObject()
     value_ = zero
     deserialized = true
@@ -219,8 +226,10 @@ GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Serializa
  * @param param helper object defining how to add elements of type `T`
  * @tparam T result type
  */
-class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T])
-  extends Accumulable[T,T](initialValue, param)
+class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T], name: Option[String])
+    extends Accumulable[T,T](initialValue, param, name) {
+  def this(initialValue: T, param: AccumulatorParam[T]) = this(initialValue, param, None)
+}
 
 /**
  * A simpler version of [[org.apache.spark.AccumulableParam]] where the only data type you can add
@@ -281,4 +290,7 @@ private object Accumulators {
       }
     }
   }
+
+  def stringifyPartialValue(partialValue: Any) = "%s".format(partialValue)
+  def stringifyValue(value: Any) = "%s".format(value)
 }
diff --git a/core/src/main/scala/org/apache/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala
index 59fdf659c9e11..79c9c451d273d 100644
--- a/core/src/main/scala/org/apache/spark/Aggregator.scala
+++ b/core/src/main/scala/org/apache/spark/Aggregator.scala
@@ -55,22 +55,24 @@ case class Aggregator[K, V, C] (
       combiners.iterator
     } else {
       val combiners = new ExternalAppendOnlyMap[K, V, C](createCombiner, mergeValue, mergeCombiners)
-      while (iter.hasNext) {
-        val (k, v) = iter.next()
-        combiners.insert(k, v)
+      combiners.insertAll(iter)
+      // Update task metrics if context is not null
+      // TODO: Make context non optional in a future release
+      Option(context).foreach { c =>
+        c.taskMetrics.memoryBytesSpilled += combiners.memoryBytesSpilled
+        c.taskMetrics.diskBytesSpilled += combiners.diskBytesSpilled
       }
-      // TODO: Make this non optional in a future release
-      Option(context).foreach(c => c.taskMetrics.memoryBytesSpilled = combiners.memoryBytesSpilled)
-      Option(context).foreach(c => c.taskMetrics.diskBytesSpilled = combiners.diskBytesSpilled)
       combiners.iterator
     }
   }
 
   @deprecated("use combineCombinersByKey with TaskContext argument", "0.9.0")
-  def combineCombinersByKey(iter: Iterator[(K, C)]) : Iterator[(K, C)] =
+  def combineCombinersByKey(iter: Iterator[_ <: Product2[K, C]]) : Iterator[(K, C)] =
     combineCombinersByKey(iter, null)
 
-  def combineCombinersByKey(iter: Iterator[(K, C)], context: TaskContext) : Iterator[(K, C)] = {
+  def combineCombinersByKey(iter: Iterator[_ <: Product2[K, C]], context: TaskContext)
+      : Iterator[(K, C)] =
+  {
     if (!externalSorting) {
       val combiners = new AppendOnlyMap[K,C]
       var kc: Product2[K, C] = null
@@ -85,12 +87,15 @@ case class Aggregator[K, V, C] (
     } else {
       val combiners = new ExternalAppendOnlyMap[K, C, C](identity, mergeCombiners, mergeCombiners)
       while (iter.hasNext) {
-        val (k, c) = iter.next()
-        combiners.insert(k, c)
+        val pair = iter.next()
+        combiners.insert(pair._1, pair._2)
+      }
+      // Update task metrics if context is not null
+      // TODO: Make context non-optional in a future release
+      Option(context).foreach { c =>
+        c.taskMetrics.memoryBytesSpilled += combiners.memoryBytesSpilled
+        c.taskMetrics.diskBytesSpilled += combiners.diskBytesSpilled
       }
-      // TODO: Make this non optional in a future release
-      Option(context).foreach(c => c.taskMetrics.memoryBytesSpilled = combiners.memoryBytesSpilled)
-      Option(context).foreach(c => c.taskMetrics.diskBytesSpilled = combiners.diskBytesSpilled)
       combiners.iterator
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/CacheManager.scala b/core/src/main/scala/org/apache/spark/CacheManager.scala
index 8f867686a0443..80da62c44edc5 100644
--- a/core/src/main/scala/org/apache/spark/CacheManager.scala
+++ b/core/src/main/scala/org/apache/spark/CacheManager.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark
 
-import scala.collection.mutable.{ArrayBuffer, HashSet}
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.executor.InputMetrics
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage._
 
@@ -30,7 +30,7 @@ import org.apache.spark.storage._
 private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
 
   /** Keys of RDD partitions that are being computed/loaded. */
-  private val loading = new HashSet[RDDBlockId]()
+  private val loading = new mutable.HashSet[RDDBlockId]
 
   /** Gets or computes an RDD partition. Used by RDD.iterator() when an RDD is cached. */
   def getOrCompute[T](
@@ -61,14 +61,16 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
           val computedValues = rdd.computeOrReadCheckpoint(partition, context)
 
           // If the task is running locally, do not persist the result
-          if (context.runningLocally) {
+          if (context.isRunningLocally) {
             return computedValues
           }
 
           // Otherwise, cache the values and keep track of any updates in block statuses
           val updatedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]
           val cachedValues = putInBlockManager(key, computedValues, storageLevel, updatedBlocks)
-          context.taskMetrics.updatedBlocks = Some(updatedBlocks)
+          val metrics = context.taskMetrics
+          val lastUpdatedBlocks = metrics.updatedBlocks.getOrElse(Seq[(BlockId, BlockStatus)]())
+          metrics.updatedBlocks = Some(lastUpdatedBlocks ++ updatedBlocks.toSeq)
           new InterruptibleIterator(context, cachedValues)
 
         } finally {
@@ -118,21 +120,29 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
   }
 
   /**
-   * Cache the values of a partition, keeping track of any updates in the storage statuses
-   * of other blocks along the way.
+   * Cache the values of a partition, keeping track of any updates in the storage statuses of
+   * other blocks along the way.
+   *
+   * The effective storage level refers to the level that actually specifies BlockManager put
+   * behavior, not the level originally specified by the user. This is mainly for forcing a
+   * MEMORY_AND_DISK partition to disk if there is not enough room to unroll the partition,
+   * while preserving the the original semantics of the RDD as specified by the application.
    */
   private def putInBlockManager[T](
       key: BlockId,
       values: Iterator[T],
-      storageLevel: StorageLevel,
-      updatedBlocks: ArrayBuffer[(BlockId, BlockStatus)]): Iterator[T] = {
-
-    if (!storageLevel.useMemory) {
-      /* This RDD is not to be cached in memory, so we can just pass the computed values
-       * as an iterator directly to the BlockManager, rather than first fully unrolling
-       * it in memory. The latter option potentially uses much more memory and risks OOM
-       * exceptions that can be avoided. */
-      updatedBlocks ++= blockManager.put(key, values, storageLevel, tellMaster = true)
+      level: StorageLevel,
+      updatedBlocks: ArrayBuffer[(BlockId, BlockStatus)],
+      effectiveStorageLevel: Option[StorageLevel] = None): Iterator[T] = {
+
+    val putLevel = effectiveStorageLevel.getOrElse(level)
+    if (!putLevel.useMemory) {
+      /*
+       * This RDD is not to be cached in memory, so we can just pass the computed values as an
+       * iterator directly to the BlockManager rather than first fully unrolling it in memory.
+       */
+      updatedBlocks ++=
+        blockManager.putIterator(key, values, level, tellMaster = true, effectiveStorageLevel)
       blockManager.get(key) match {
         case Some(v) => v.data.asInstanceOf[Iterator[T]]
         case None =>
@@ -140,14 +150,34 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
           throw new BlockException(key, s"Block manager failed to return cached value for $key!")
       }
     } else {
-      /* This RDD is to be cached in memory. In this case we cannot pass the computed values
+      /*
+       * This RDD is to be cached in memory. In this case we cannot pass the computed values
        * to the BlockManager as an iterator and expect to read it back later. This is because
-       * we may end up dropping a partition from memory store before getting it back, e.g.
-       * when the entirety of the RDD does not fit in memory. */
-      val elements = new ArrayBuffer[Any]
-      elements ++= values
-      updatedBlocks ++= blockManager.put(key, elements, storageLevel, tellMaster = true)
-      elements.iterator.asInstanceOf[Iterator[T]]
+       * we may end up dropping a partition from memory store before getting it back.
+       *
+       * In addition, we must be careful to not unroll the entire partition in memory at once.
+       * Otherwise, we may cause an OOM exception if the JVM does not have enough space for this
+       * single partition. Instead, we unroll the values cautiously, potentially aborting and
+       * dropping the partition to disk if applicable.
+       */
+      blockManager.memoryStore.unrollSafely(key, values, updatedBlocks) match {
+        case Left(arr) =>
+          // We have successfully unrolled the entire partition, so cache it in memory
+          updatedBlocks ++=
+            blockManager.putArray(key, arr, level, tellMaster = true, effectiveStorageLevel)
+          arr.iterator.asInstanceOf[Iterator[T]]
+        case Right(it) =>
+          // There is not enough space to cache this partition in memory
+          val returnValues = it.asInstanceOf[Iterator[T]]
+          if (putLevel.useDisk) {
+            logWarning(s"Persisting partition $key to disk instead.")
+            val diskOnlyLevel = StorageLevel(useDisk = true, useMemory = false,
+              useOffHeap = false, deserialized = false, putLevel.replication)
+            putInBlockManager[T](key, returnValues, level, updatedBlocks, Some(diskOnlyLevel))
+          } else {
+            returnValues
+          }
+      }
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index bf3c3a6ceb5ef..ede1e23f4fcc5 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -65,11 +65,30 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   private val cleaningThread = new Thread() { override def run() { keepCleaning() }}
 
   /**
-   * Whether the cleaning thread will block on cleanup tasks.
-   * This is set to true only for tests.
+   * Whether the cleaning thread will block on cleanup tasks (other than shuffle, which
+   * is controlled by the `spark.cleaner.referenceTracking.blocking.shuffle` parameter).
+   *
+   * Due to SPARK-3015, this is set to true by default. This is intended to be only a temporary
+   * workaround for the issue, which is ultimately caused by the way the BlockManager actors
+   * issue inter-dependent blocking Akka messages to each other at high frequencies. This happens,
+   * for instance, when the driver performs a GC and cleans up all broadcast blocks that are no
+   * longer in scope.
    */
   private val blockOnCleanupTasks = sc.conf.getBoolean(
-    "spark.cleaner.referenceTracking.blocking", false)
+    "spark.cleaner.referenceTracking.blocking", true)
+
+  /**
+   * Whether the cleaning thread will block on shuffle cleanup tasks.
+   *
+   * When context cleaner is configured to block on every delete request, it can throw timeout
+   * exceptions on cleanup of shuffle blocks, as reported in SPARK-3139. To avoid that, this
+   * parameter by default disables blocking on shuffle cleanups. Note that this does not affect
+   * the cleanup of RDDs and broadcasts. This is intended to be a temporary workaround,
+   * until the real Akka issue (referred to in the comment above `blockOnCleanupTasks`) is
+   * resolved.
+   */
+  private val blockOnShuffleCleanupTasks = sc.conf.getBoolean(
+    "spark.cleaner.referenceTracking.blocking.shuffle", false)
 
   @volatile private var stopped = false
 
@@ -123,7 +142,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
             case CleanRDD(rddId) =>
               doCleanupRDD(rddId, blocking = blockOnCleanupTasks)
             case CleanShuffle(shuffleId) =>
-              doCleanupShuffle(shuffleId, blocking = blockOnCleanupTasks)
+              doCleanupShuffle(shuffleId, blocking = blockOnShuffleCleanupTasks)
             case CleanBroadcast(broadcastId) =>
               doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks)
           }
@@ -174,9 +193,6 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   private def blockManagerMaster = sc.env.blockManager.master
   private def broadcastManager = sc.env.broadcastManager
   private def mapOutputTrackerMaster = sc.env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
-
-  // Used for testing. These methods explicitly blocks until cleanup is completed
-  // to ensure that more reliable testing.
 }
 
 private object ContextCleaner {
diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index 09a60571238ea..ab2594cfc02eb 100644
--- a/core/src/main/scala/org/apache/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -27,29 +27,35 @@ import org.apache.spark.shuffle.ShuffleHandle
  * Base class for dependencies.
  */
 @DeveloperApi
-abstract class Dependency[T](val rdd: RDD[T]) extends Serializable
+abstract class Dependency[T] extends Serializable {
+  def rdd: RDD[T]
+}
 
 
 /**
  * :: DeveloperApi ::
- * Base class for dependencies where each partition of the parent RDD is used by at most one
- * partition of the child RDD.  Narrow dependencies allow for pipelined execution.
+ * Base class for dependencies where each partition of the child RDD depends on a small number
+ * of partitions of the parent RDD. Narrow dependencies allow for pipelined execution.
  */
 @DeveloperApi
-abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {
+abstract class NarrowDependency[T](_rdd: RDD[T]) extends Dependency[T] {
   /**
    * Get the parent partitions for a child partition.
    * @param partitionId a partition of the child RDD
    * @return the partitions of the parent RDD that the child partition depends upon
    */
   def getParents(partitionId: Int): Seq[Int]
+
+  override def rdd: RDD[T] = _rdd
 }
 
 
 /**
  * :: DeveloperApi ::
- * Represents a dependency on the output of a shuffle stage.
- * @param rdd the parent RDD
+ * Represents a dependency on the output of a shuffle stage. Note that in the case of shuffle,
+ * the RDD is transient since we don't need it on the executor side.
+ *
+ * @param _rdd the parent RDD
  * @param partitioner partitioner used to partition the shuffle output
  * @param serializer [[org.apache.spark.serializer.Serializer Serializer]] to use. If set to None,
  *                   the default serializer, as specified by `spark.serializer` config option, will
@@ -57,20 +63,22 @@ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {
  */
 @DeveloperApi
 class ShuffleDependency[K, V, C](
-    @transient rdd: RDD[_ <: Product2[K, V]],
+    @transient _rdd: RDD[_ <: Product2[K, V]],
     val partitioner: Partitioner,
     val serializer: Option[Serializer] = None,
     val keyOrdering: Option[Ordering[K]] = None,
     val aggregator: Option[Aggregator[K, V, C]] = None,
     val mapSideCombine: Boolean = false)
-  extends Dependency(rdd.asInstanceOf[RDD[Product2[K, V]]]) {
+  extends Dependency[Product2[K, V]] {
+
+  override def rdd = _rdd.asInstanceOf[RDD[Product2[K, V]]]
 
-  val shuffleId: Int = rdd.context.newShuffleId()
+  val shuffleId: Int = _rdd.context.newShuffleId()
 
-  val shuffleHandle: ShuffleHandle = rdd.context.env.shuffleManager.registerShuffle(
-    shuffleId, rdd.partitions.size, this)
+  val shuffleHandle: ShuffleHandle = _rdd.context.env.shuffleManager.registerShuffle(
+    shuffleId, _rdd.partitions.size, this)
 
-  rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this))
+  _rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this))
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
new file mode 100644
index 0000000000000..88adb892998af
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -0,0 +1,516 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import scala.collection.mutable
+
+import org.apache.spark.scheduler._
+
+/**
+ * An agent that dynamically allocates and removes executors based on the workload.
+ *
+ * The add policy depends on whether there are backlogged tasks waiting to be scheduled. If
+ * the scheduler queue is not drained in N seconds, then new executors are added. If the queue
+ * persists for another M seconds, then more executors are added and so on. The number added
+ * in each round increases exponentially from the previous round until an upper bound on the
+ * number of executors has been reached. The upper bound is based both on a configured property
+ * and on the number of tasks pending: the policy will never increase the number of executor
+ * requests past the number needed to handle all pending tasks.
+ *
+ * The rationale for the exponential increase is twofold: (1) Executors should be added slowly
+ * in the beginning in case the number of extra executors needed turns out to be small. Otherwise,
+ * we may add more executors than we need just to remove them later. (2) Executors should be added
+ * quickly over time in case the maximum number of executors is very high. Otherwise, it will take
+ * a long time to ramp up under heavy workloads.
+ *
+ * The remove policy is simpler: If an executor has been idle for K seconds, meaning it has not
+ * been scheduled to run any tasks, then it is removed.
+ *
+ * There is no retry logic in either case because we make the assumption that the cluster manager
+ * will eventually fulfill all requests it receives asynchronously.
+ *
+ * The relevant Spark properties include the following:
+ *
+ *   spark.dynamicAllocation.enabled - Whether this feature is enabled
+ *   spark.dynamicAllocation.minExecutors - Lower bound on the number of executors
+ *   spark.dynamicAllocation.maxExecutors - Upper bound on the number of executors
+ *
+ *   spark.dynamicAllocation.schedulerBacklogTimeout (M) -
+ *     If there are backlogged tasks for this duration, add new executors
+ *
+ *   spark.dynamicAllocation.sustainedSchedulerBacklogTimeout (N) -
+ *     If the backlog is sustained for this duration, add more executors
+ *     This is used only after the initial backlog timeout is exceeded
+ *
+ *   spark.dynamicAllocation.executorIdleTimeout (K) -
+ *     If an executor has been idle for this duration, remove it
+ */
+private[spark] class ExecutorAllocationManager(sc: SparkContext) extends Logging {
+  import ExecutorAllocationManager._
+
+  private val conf = sc.conf
+
+  // Lower and upper bounds on the number of executors. These are required.
+  private val minNumExecutors = conf.getInt("spark.dynamicAllocation.minExecutors", -1)
+  private val maxNumExecutors = conf.getInt("spark.dynamicAllocation.maxExecutors", -1)
+
+  // How long there must be backlogged tasks for before an addition is triggered
+  private val schedulerBacklogTimeout = conf.getLong(
+    "spark.dynamicAllocation.schedulerBacklogTimeout", 60)
+
+  // Same as above, but used only after `schedulerBacklogTimeout` is exceeded
+  private val sustainedSchedulerBacklogTimeout = conf.getLong(
+    "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout", schedulerBacklogTimeout)
+
+  // How long an executor must be idle for before it is removed
+  private val executorIdleTimeout = conf.getLong(
+    "spark.dynamicAllocation.executorIdleTimeout", 600)
+
+  // During testing, the methods to actually kill and add executors are mocked out
+  private val testing = conf.getBoolean("spark.dynamicAllocation.testing", false)
+
+  // TODO: The default value of 1 for spark.executor.cores works right now because dynamic
+  // allocation is only supported for YARN and the default number of cores per executor in YARN is
+  // 1, but it might need to be attained differently for different cluster managers
+  private val tasksPerExecutor =
+    conf.getInt("spark.executor.cores", 1) / conf.getInt("spark.task.cpus", 1)
+
+  validateSettings()
+
+  // Number of executors to add in the next round
+  private var numExecutorsToAdd = 1
+
+  // Number of executors that have been requested but have not registered yet
+  private var numExecutorsPending = 0
+
+  // Executors that have been requested to be removed but have not been killed yet
+  private val executorsPendingToRemove = new mutable.HashSet[String]
+
+  // All known executors
+  private val executorIds = new mutable.HashSet[String]
+
+  // A timestamp of when an addition should be triggered, or NOT_SET if it is not set
+  // This is set when pending tasks are added but not scheduled yet
+  private var addTime: Long = NOT_SET
+
+  // A timestamp for each executor of when the executor should be removed, indexed by the ID
+  // This is set when an executor is no longer running a task, or when it first registers
+  private val removeTimes = new mutable.HashMap[String, Long]
+
+  // Polling loop interval (ms)
+  private val intervalMillis: Long = 100
+
+  // Clock used to schedule when executors should be added and removed
+  private var clock: Clock = new RealClock
+
+  // Listener for Spark events that impact the allocation policy
+  private val listener = new ExecutorAllocationListener(this)
+
+  /**
+   * Verify that the settings specified through the config are valid.
+   * If not, throw an appropriate exception.
+   */
+  private def validateSettings(): Unit = {
+    if (minNumExecutors < 0 || maxNumExecutors < 0) {
+      throw new SparkException("spark.dynamicAllocation.{min/max}Executors must be set!")
+    }
+    if (minNumExecutors == 0 || maxNumExecutors == 0) {
+      throw new SparkException("spark.dynamicAllocation.{min/max}Executors cannot be 0!")
+    }
+    if (minNumExecutors > maxNumExecutors) {
+      throw new SparkException(s"spark.dynamicAllocation.minExecutors ($minNumExecutors) must " +
+        s"be less than or equal to spark.dynamicAllocation.maxExecutors ($maxNumExecutors)!")
+    }
+    if (schedulerBacklogTimeout <= 0) {
+      throw new SparkException("spark.dynamicAllocation.schedulerBacklogTimeout must be > 0!")
+    }
+    if (sustainedSchedulerBacklogTimeout <= 0) {
+      throw new SparkException(
+        "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout must be > 0!")
+    }
+    if (executorIdleTimeout <= 0) {
+      throw new SparkException("spark.dynamicAllocation.executorIdleTimeout must be > 0!")
+    }
+    // Require external shuffle service for dynamic allocation
+    // Otherwise, we may lose shuffle files when killing executors
+    if (!conf.getBoolean("spark.shuffle.service.enabled", false) && !testing) {
+      throw new SparkException("Dynamic allocation of executors requires the external " +
+        "shuffle service. You may enable this through spark.shuffle.service.enabled.")
+    }
+    if (tasksPerExecutor == 0) {
+      throw new SparkException("spark.executor.cores must not be less than spark.task.cpus.cores")
+    }
+  }
+
+  /**
+   * Use a different clock for this allocation manager. This is mainly used for testing.
+   */
+  def setClock(newClock: Clock): Unit = {
+    clock = newClock
+  }
+
+  /**
+   * Register for scheduler callbacks to decide when to add and remove executors.
+   */
+  def start(): Unit = {
+    sc.addSparkListener(listener)
+    startPolling()
+  }
+
+  /**
+   * Start the main polling thread that keeps track of when to add and remove executors.
+   */
+  private def startPolling(): Unit = {
+    val t = new Thread {
+      override def run(): Unit = {
+        while (true) {
+          try {
+            schedule()
+          } catch {
+            case e: Exception => logError("Exception in dynamic executor allocation thread!", e)
+          }
+          Thread.sleep(intervalMillis)
+        }
+      }
+    }
+    t.setName("spark-dynamic-executor-allocation")
+    t.setDaemon(true)
+    t.start()
+  }
+
+  /**
+   * If the add time has expired, request new executors and refresh the add time.
+   * If the remove time for an existing executor has expired, kill the executor.
+   * This is factored out into its own method for testing.
+   */
+  private def schedule(): Unit = synchronized {
+    val now = clock.getTimeMillis
+    if (addTime != NOT_SET && now >= addTime) {
+      addExecutors()
+      logDebug(s"Starting timer to add more executors (to " +
+        s"expire in $sustainedSchedulerBacklogTimeout seconds)")
+      addTime += sustainedSchedulerBacklogTimeout * 1000
+    }
+
+    removeTimes.foreach { case (executorId, expireTime) =>
+      if (now >= expireTime) {
+        removeExecutor(executorId)
+        removeTimes.remove(executorId)
+      }
+    }
+  }
+
+  /**
+   * Request a number of executors from the cluster manager.
+   * If the cap on the number of executors is reached, give up and reset the
+   * number of executors to add next round instead of continuing to double it.
+   * Return the number actually requested.
+   */
+  private def addExecutors(): Int = synchronized {
+    // Do not request more executors if we have already reached the upper bound
+    val numExistingExecutors = executorIds.size + numExecutorsPending
+    if (numExistingExecutors >= maxNumExecutors) {
+      logDebug(s"Not adding executors because there are already ${executorIds.size} " +
+        s"registered and $numExecutorsPending pending executor(s) (limit $maxNumExecutors)")
+      numExecutorsToAdd = 1
+      return 0
+    }
+
+    // The number of executors needed to satisfy all pending tasks is the number of tasks pending
+    // divided by the number of tasks each executor can fit, rounded up.
+    val maxNumExecutorsPending =
+      (listener.totalPendingTasks() + tasksPerExecutor - 1) / tasksPerExecutor
+    if (numExecutorsPending >= maxNumExecutorsPending) {
+      logDebug(s"Not adding executors because there are already $numExecutorsPending " +
+        s"pending and pending tasks could only fill $maxNumExecutorsPending")
+      numExecutorsToAdd = 1
+      return 0
+    }
+
+    // It's never useful to request more executors than could satisfy all the pending tasks, so
+    // cap request at that amount.
+    // Also cap request with respect to the configured upper bound.
+    val maxNumExecutorsToAdd = math.min(
+      maxNumExecutorsPending - numExecutorsPending,
+      maxNumExecutors - numExistingExecutors)
+    assert(maxNumExecutorsToAdd > 0)
+
+    val actualNumExecutorsToAdd = math.min(numExecutorsToAdd, maxNumExecutorsToAdd)
+
+    val newTotalExecutors = numExistingExecutors + actualNumExecutorsToAdd
+    val addRequestAcknowledged = testing || sc.requestExecutors(actualNumExecutorsToAdd)
+    if (addRequestAcknowledged) {
+      logInfo(s"Requesting $actualNumExecutorsToAdd new executor(s) because " +
+        s"tasks are backlogged (new desired total will be $newTotalExecutors)")
+      numExecutorsToAdd =
+        if (actualNumExecutorsToAdd == numExecutorsToAdd) numExecutorsToAdd * 2 else 1
+      numExecutorsPending += actualNumExecutorsToAdd
+      actualNumExecutorsToAdd
+    } else {
+      logWarning(s"Unable to reach the cluster manager " +
+        s"to request $actualNumExecutorsToAdd executors!")
+      0
+    }
+  }
+
+  /**
+   * Request the cluster manager to remove the given executor.
+   * Return whether the request is received.
+   */
+  private def removeExecutor(executorId: String): Boolean = synchronized {
+    // Do not kill the executor if we are not aware of it (should never happen)
+    if (!executorIds.contains(executorId)) {
+      logWarning(s"Attempted to remove unknown executor $executorId!")
+      return false
+    }
+
+    // Do not kill the executor again if it is already pending to be killed (should never happen)
+    if (executorsPendingToRemove.contains(executorId)) {
+      logWarning(s"Attempted to remove executor $executorId " +
+        s"when it is already pending to be removed!")
+      return false
+    }
+
+    // Do not kill the executor if we have already reached the lower bound
+    val numExistingExecutors = executorIds.size - executorsPendingToRemove.size
+    if (numExistingExecutors - 1 < minNumExecutors) {
+      logInfo(s"Not removing idle executor $executorId because there are only " +
+        s"$numExistingExecutors executor(s) left (limit $minNumExecutors)")
+      return false
+    }
+
+    // Send a request to the backend to kill this executor
+    val removeRequestAcknowledged = testing || sc.killExecutor(executorId)
+    if (removeRequestAcknowledged) {
+      logInfo(s"Removing executor $executorId because it has been idle for " +
+        s"$executorIdleTimeout seconds (new desired total will be ${numExistingExecutors - 1})")
+      executorsPendingToRemove.add(executorId)
+      true
+    } else {
+      logWarning(s"Unable to reach the cluster manager to kill executor $executorId!")
+      false
+    }
+  }
+
+  /**
+   * Callback invoked when the specified executor has been added.
+   */
+  private def onExecutorAdded(executorId: String): Unit = synchronized {
+    if (!executorIds.contains(executorId)) {
+      executorIds.add(executorId)
+      executorIds.foreach(onExecutorIdle)
+      logInfo(s"New executor $executorId has registered (new total is ${executorIds.size})")
+      if (numExecutorsPending > 0) {
+        numExecutorsPending -= 1
+        logDebug(s"Decremented number of pending executors ($numExecutorsPending left)")
+      }
+    } else {
+      logWarning(s"Duplicate executor $executorId has registered")
+    }
+  }
+
+  /**
+   * Callback invoked when the specified executor has been removed.
+   */
+  private def onExecutorRemoved(executorId: String): Unit = synchronized {
+    if (executorIds.contains(executorId)) {
+      executorIds.remove(executorId)
+      removeTimes.remove(executorId)
+      logInfo(s"Existing executor $executorId has been removed (new total is ${executorIds.size})")
+      if (executorsPendingToRemove.contains(executorId)) {
+        executorsPendingToRemove.remove(executorId)
+        logDebug(s"Executor $executorId is no longer pending to " +
+          s"be removed (${executorsPendingToRemove.size} left)")
+      }
+    } else {
+      logWarning(s"Unknown executor $executorId has been removed!")
+    }
+  }
+
+  /**
+   * Callback invoked when the scheduler receives new pending tasks.
+   * This sets a time in the future that decides when executors should be added
+   * if it is not already set.
+   */
+  private def onSchedulerBacklogged(): Unit = synchronized {
+    if (addTime == NOT_SET) {
+      logDebug(s"Starting timer to add executors because pending tasks " +
+        s"are building up (to expire in $schedulerBacklogTimeout seconds)")
+      addTime = clock.getTimeMillis + schedulerBacklogTimeout * 1000
+    }
+  }
+
+  /**
+   * Callback invoked when the scheduler queue is drained.
+   * This resets all variables used for adding executors.
+   */
+  private def onSchedulerQueueEmpty(): Unit = synchronized {
+    logDebug(s"Clearing timer to add executors because there are no more pending tasks")
+    addTime = NOT_SET
+    numExecutorsToAdd = 1
+  }
+
+  /**
+   * Callback invoked when the specified executor is no longer running any tasks.
+   * This sets a time in the future that decides when this executor should be removed if
+   * the executor is not already marked as idle.
+   */
+  private def onExecutorIdle(executorId: String): Unit = synchronized {
+    if (!removeTimes.contains(executorId) && !executorsPendingToRemove.contains(executorId)) {
+      logDebug(s"Starting idle timer for $executorId because there are no more tasks " +
+        s"scheduled to run on the executor (to expire in $executorIdleTimeout seconds)")
+      removeTimes(executorId) = clock.getTimeMillis + executorIdleTimeout * 1000
+    }
+  }
+
+  /**
+   * Callback invoked when the specified executor is now running a task.
+   * This resets all variables used for removing this executor.
+   */
+  private def onExecutorBusy(executorId: String): Unit = synchronized {
+    logDebug(s"Clearing idle timer for $executorId because it is now running a task")
+    removeTimes.remove(executorId)
+  }
+
+  /**
+   * A listener that notifies the given allocation manager of when to add and remove executors.
+   *
+   * This class is intentionally conservative in its assumptions about the relative ordering
+   * and consistency of events returned by the listener. For simplicity, it does not account
+   * for speculated tasks.
+   */
+  private class ExecutorAllocationListener(allocationManager: ExecutorAllocationManager)
+    extends SparkListener {
+
+    private val stageIdToNumTasks = new mutable.HashMap[Int, Int]
+    private val stageIdToTaskIndices = new mutable.HashMap[Int, mutable.HashSet[Int]]
+    private val executorIdToTaskIds = new mutable.HashMap[String, mutable.HashSet[Long]]
+
+    override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
+      synchronized {
+        val stageId = stageSubmitted.stageInfo.stageId
+        val numTasks = stageSubmitted.stageInfo.numTasks
+        stageIdToNumTasks(stageId) = numTasks
+        allocationManager.onSchedulerBacklogged()
+      }
+    }
+
+    override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
+      synchronized {
+        val stageId = stageCompleted.stageInfo.stageId
+        stageIdToNumTasks -= stageId
+        stageIdToTaskIndices -= stageId
+
+        // If this is the last stage with pending tasks, mark the scheduler queue as empty
+        // This is needed in case the stage is aborted for any reason
+        if (stageIdToNumTasks.isEmpty) {
+          allocationManager.onSchedulerQueueEmpty()
+        }
+      }
+    }
+
+    override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = synchronized {
+      val stageId = taskStart.stageId
+      val taskId = taskStart.taskInfo.taskId
+      val taskIndex = taskStart.taskInfo.index
+      val executorId = taskStart.taskInfo.executorId
+
+      // If this is the last pending task, mark the scheduler queue as empty
+      stageIdToTaskIndices.getOrElseUpdate(stageId, new mutable.HashSet[Int]) += taskIndex
+      val numTasksScheduled = stageIdToTaskIndices(stageId).size
+      val numTasksTotal = stageIdToNumTasks.getOrElse(stageId, -1)
+      if (numTasksScheduled == numTasksTotal) {
+        // No more pending tasks for this stage
+        stageIdToNumTasks -= stageId
+        if (stageIdToNumTasks.isEmpty) {
+          allocationManager.onSchedulerQueueEmpty()
+        }
+      }
+
+      // Mark the executor on which this task is scheduled as busy
+      executorIdToTaskIds.getOrElseUpdate(executorId, new mutable.HashSet[Long]) += taskId
+      allocationManager.onExecutorBusy(executorId)
+    }
+
+    override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = synchronized {
+      val executorId = taskEnd.taskInfo.executorId
+      val taskId = taskEnd.taskInfo.taskId
+
+      // If the executor is no longer running scheduled any tasks, mark it as idle
+      if (executorIdToTaskIds.contains(executorId)) {
+        executorIdToTaskIds(executorId) -= taskId
+        if (executorIdToTaskIds(executorId).isEmpty) {
+          executorIdToTaskIds -= executorId
+          allocationManager.onExecutorIdle(executorId)
+        }
+      }
+    }
+
+    override def onBlockManagerAdded(blockManagerAdded: SparkListenerBlockManagerAdded): Unit = {
+      val executorId = blockManagerAdded.blockManagerId.executorId
+      if (executorId != SparkContext.DRIVER_IDENTIFIER) {
+        allocationManager.onExecutorAdded(executorId)
+      }
+    }
+
+    override def onBlockManagerRemoved(
+        blockManagerRemoved: SparkListenerBlockManagerRemoved): Unit = {
+      allocationManager.onExecutorRemoved(blockManagerRemoved.blockManagerId.executorId)
+    }
+
+    /**
+     * An estimate of the total number of pending tasks remaining for currently running stages. Does
+     * not account for tasks which may have failed and been resubmitted.
+     */
+    def totalPendingTasks(): Int = {
+      stageIdToNumTasks.map { case (stageId, numTasks) =>
+        numTasks - stageIdToTaskIndices.get(stageId).map(_.size).getOrElse(0)
+      }.sum
+    }
+  }
+
+}
+
+private object ExecutorAllocationManager {
+  val NOT_SET = Long.MaxValue
+}
+
+/**
+ * An abstract clock for measuring elapsed time.
+ */
+private trait Clock {
+  def getTimeMillis: Long
+}
+
+/**
+ * A clock backed by a monotonically increasing time source.
+ * The time returned by this clock does not correspond to any notion of wall-clock time.
+ */
+private class RealClock extends Clock {
+  override def getTimeMillis: Long = System.nanoTime / (1000 * 1000)
+}
+
+/**
+ * A clock that allows the caller to customize the time.
+ * This is used mainly for testing.
+ */
+private class TestClock(startTimeMillis: Long) extends Clock {
+  private var time: Long = startTimeMillis
+  override def getTimeMillis: Long = time
+  def tick(ms: Long): Unit = { time += ms }
+}
diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
index 1e4dec86a0530..e97a7375a267b 100644
--- a/core/src/main/scala/org/apache/spark/FutureAction.scala
+++ b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -17,20 +17,21 @@
 
 package org.apache.spark
 
-import scala.concurrent._
-import scala.concurrent.duration.Duration
-import scala.util.Try
+import java.util.Collections
+import java.util.concurrent.TimeUnit
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaFutureAction
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{JobFailed, JobSucceeded, JobWaiter}
 
+import scala.concurrent._
+import scala.concurrent.duration.Duration
+import scala.util.{Failure, Try}
+
 /**
- * :: Experimental ::
  * A future for the result of an action to support cancellation. This is an extension of the
  * Scala Future interface to support cancellation.
  */
-@Experimental
 trait FutureAction[T] extends Future[T] {
   // Note that we redefine methods of the Future trait here explicitly so we can specify a different
   // documentation (with reference to the word "action").
@@ -69,6 +70,11 @@ trait FutureAction[T] extends Future[T] {
    */
   override def isCompleted: Boolean
 
+  /**
+   * Returns whether the action has been cancelled.
+   */
+  def isCancelled: Boolean
+
   /**
    * The value of this Future.
    *
@@ -83,19 +89,29 @@ trait FutureAction[T] extends Future[T] {
    */
   @throws(classOf[Exception])
   def get(): T = Await.result(this, Duration.Inf)
+
+  /**
+   * Returns the job IDs run by the underlying async operation.
+   *
+   * This returns the current snapshot of the job list. Certain operations may run multiple
+   * jobs, so multiple calls to this method may return different lists.
+   */
+  def jobIds: Seq[Int]
+
 }
 
 
 /**
- * :: Experimental ::
  * A [[FutureAction]] holding the result of an action that triggers a single job. Examples include
  * count, collect, reduce.
  */
-@Experimental
 class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc: => T)
   extends FutureAction[T] {
 
+  @volatile private var _cancelled: Boolean = false
+
   override def cancel() {
+    _cancelled = true
     jobWaiter.cancel()
   }
 
@@ -134,6 +150,8 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
   }
 
   override def isCompleted: Boolean = jobWaiter.jobFinished
+  
+  override def isCancelled: Boolean = _cancelled
 
   override def value: Option[Try[T]] = {
     if (jobWaiter.jobFinished) {
@@ -149,16 +167,16 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
       case JobFailed(e: Exception) => scala.util.Failure(e)
     }
   }
+
+  def jobIds = Seq(jobWaiter.jobId)
 }
 
 
 /**
- * :: Experimental ::
  * A [[FutureAction]] for actions that could trigger multiple Spark jobs. Examples include take,
  * takeSample. Cancellation works by setting the cancelled flag to true and interrupting the
  * action thread if it is being blocked by a job.
  */
-@Experimental
 class ComplexFutureAction[T] extends FutureAction[T] {
 
   // Pointer to the thread that is executing the action. It is set when the action is run.
@@ -168,6 +186,8 @@ class ComplexFutureAction[T] extends FutureAction[T] {
   // is cancelled before the action was even run (and thus we have no thread to interrupt).
   @volatile private var _cancelled: Boolean = false
 
+  @volatile private var jobs: Seq[Int] = Nil
+
   // A promise used to signal the future.
   private val p = promise[T]()
 
@@ -190,7 +210,11 @@ class ComplexFutureAction[T] extends FutureAction[T] {
       } catch {
         case e: Exception => p.failure(e)
       } finally {
-        thread = null
+        // This lock guarantees when calling `thread.interrupt()` in `cancel`,
+        // thread won't be set to null.
+        ComplexFutureAction.this.synchronized {
+          thread = null
+        }
       }
     }
     this
@@ -209,13 +233,15 @@ class ComplexFutureAction[T] extends FutureAction[T] {
     // If the action hasn't been cancelled yet, submit the job. The check and the submitJob
     // command need to be in an atomic block.
     val job = this.synchronized {
-      if (!cancelled) {
+      if (!isCancelled) {
         rdd.context.submitJob(rdd, processPartition, partitions, resultHandler, resultFunc)
       } else {
         throw new SparkException("Action has been cancelled")
       }
     }
 
+    this.jobs = jobs ++ job.jobIds
+
     // Wait for the job to complete. If the action is cancelled (with an interrupt),
     // cancel the job and stop the execution. This is not in a synchronized block because
     // Await.ready eventually waits on the monitor in FutureJob.jobWaiter.
@@ -228,10 +254,7 @@ class ComplexFutureAction[T] extends FutureAction[T] {
     }
   }
 
-  /**
-   * Returns whether the promise has been cancelled.
-   */
-  def cancelled: Boolean = _cancelled
+  override def isCancelled: Boolean = _cancelled
 
   @throws(classOf[InterruptedException])
   @throws(classOf[scala.concurrent.TimeoutException])
@@ -252,4 +275,59 @@ class ComplexFutureAction[T] extends FutureAction[T] {
   override def isCompleted: Boolean = p.isCompleted
 
   override def value: Option[Try[T]] = p.future.value
+
+  def jobIds = jobs
+
+}
+
+private[spark]
+class JavaFutureActionWrapper[S, T](futureAction: FutureAction[S], converter: S => T)
+  extends JavaFutureAction[T] {
+
+  import scala.collection.JavaConverters._
+
+  override def isCancelled: Boolean = futureAction.isCancelled
+
+  override def isDone: Boolean = {
+    // According to java.util.Future's Javadoc, this returns True if the task was completed,
+    // whether that completion was due to successful execution, an exception, or a cancellation.
+    futureAction.isCancelled || futureAction.isCompleted
+  }
+
+  override def jobIds(): java.util.List[java.lang.Integer] = {
+    Collections.unmodifiableList(futureAction.jobIds.map(Integer.valueOf).asJava)
+  }
+
+  private def getImpl(timeout: Duration): T = {
+    // This will throw TimeoutException on timeout:
+    Await.ready(futureAction, timeout)
+    futureAction.value.get match {
+      case scala.util.Success(value) => converter(value)
+      case Failure(exception) =>
+        if (isCancelled) {
+          throw new CancellationException("Job cancelled").initCause(exception)
+        } else {
+          // java.util.Future.get() wraps exceptions in ExecutionException
+          throw new ExecutionException("Exception thrown by job", exception)
+        }
+    }
+  }
+
+  override def get(): T = getImpl(Duration.Inf)
+
+  override def get(timeout: Long, unit: TimeUnit): T =
+    getImpl(Duration.fromNanos(unit.toNanos(timeout)))
+
+  override def cancel(mayInterruptIfRunning: Boolean): Boolean = synchronized {
+    if (isDone) {
+      // According to java.util.Future's Javadoc, this should return false if the task is completed.
+      false
+    } else {
+      // We're limited in terms of the semantics we can provide here; our cancellation is
+      // asynchronous and doesn't provide a mechanism to not cancel if the job is running.
+      futureAction.cancel()
+      true
+    }
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
new file mode 100644
index 0000000000000..83ae57b7f1516
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import akka.actor.Actor
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.scheduler.TaskScheduler
+import org.apache.spark.util.ActorLogReceive
+
+/**
+ * A heartbeat from executors to the driver. This is a shared message used by several internal
+ * components to convey liveness or execution information for in-progress tasks.
+ */
+private[spark] case class Heartbeat(
+    executorId: String,
+    taskMetrics: Array[(Long, TaskMetrics)], // taskId -> TaskMetrics
+    blockManagerId: BlockManagerId)
+
+private[spark] case class HeartbeatResponse(reregisterBlockManager: Boolean)
+
+/**
+ * Lives in the driver to receive heartbeats from executors..
+ */
+private[spark] class HeartbeatReceiver(scheduler: TaskScheduler)
+  extends Actor with ActorLogReceive with Logging {
+
+  override def receiveWithLogging = {
+    case Heartbeat(executorId, taskMetrics, blockManagerId) =>
+      val response = HeartbeatResponse(
+        !scheduler.executorHeartbeatReceived(executorId, taskMetrics, blockManagerId))
+      sender ! response
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/HttpFileServer.scala b/core/src/main/scala/org/apache/spark/HttpFileServer.scala
index 0e3750fdde415..edc3889c9ae51 100644
--- a/core/src/main/scala/org/apache/spark/HttpFileServer.scala
+++ b/core/src/main/scala/org/apache/spark/HttpFileServer.scala
@@ -23,7 +23,10 @@ import com.google.common.io.Files
 
 import org.apache.spark.util.Utils
 
-private[spark] class HttpFileServer(securityManager: SecurityManager) extends Logging {
+private[spark] class HttpFileServer(
+    securityManager: SecurityManager,
+    requestedPort: Int = 0)
+  extends Logging {
 
   var baseDir : File = null
   var fileDir : File = null
@@ -38,7 +41,7 @@ private[spark] class HttpFileServer(securityManager: SecurityManager) extends Lo
     fileDir.mkdir()
     jarDir.mkdir()
     logInfo("HTTP File server directory is " + baseDir)
-    httpServer = new HttpServer(baseDir, securityManager)
+    httpServer = new HttpServer(baseDir, securityManager, requestedPort, "HTTP file server")
     httpServer.start()
     serverUri = httpServer.uri
     logDebug("HTTP file server started at: " + serverUri)
diff --git a/core/src/main/scala/org/apache/spark/HttpServer.scala b/core/src/main/scala/org/apache/spark/HttpServer.scala
index 7e9b517f901a2..912558d0cab7d 100644
--- a/core/src/main/scala/org/apache/spark/HttpServer.scala
+++ b/core/src/main/scala/org/apache/spark/HttpServer.scala
@@ -21,7 +21,7 @@ import java.io.File
 
 import org.eclipse.jetty.util.security.{Constraint, Password}
 import org.eclipse.jetty.security.authentication.DigestAuthenticator
-import org.eclipse.jetty.security.{ConstraintMapping, ConstraintSecurityHandler, HashLoginService, SecurityHandler}
+import org.eclipse.jetty.security.{ConstraintMapping, ConstraintSecurityHandler, HashLoginService}
 
 import org.eclipse.jetty.server.Server
 import org.eclipse.jetty.server.bio.SocketConnector
@@ -41,48 +41,68 @@ private[spark] class ServerStateException(message: String) extends Exception(mes
  * as well as classes created by the interpreter when the user types in code. This is just a wrapper
  * around a Jetty server.
  */
-private[spark] class HttpServer(resourceBase: File, securityManager: SecurityManager)
-    extends Logging {
+private[spark] class HttpServer(
+    resourceBase: File,
+    securityManager: SecurityManager,
+    requestedPort: Int = 0,
+    serverName: String = "HTTP server")
+  extends Logging {
+
   private var server: Server = null
-  private var port: Int = -1
+  private var port: Int = requestedPort
 
   def start() {
     if (server != null) {
       throw new ServerStateException("Server is already started")
     } else {
       logInfo("Starting HTTP Server")
-      server = new Server()
-      val connector = new SocketConnector
-      connector.setMaxIdleTime(60*1000)
-      connector.setSoLingerTime(-1)
-      connector.setPort(0)
-      server.addConnector(connector)
-
-      val threadPool = new QueuedThreadPool
-      threadPool.setDaemon(true)
-      server.setThreadPool(threadPool)
-      val resHandler = new ResourceHandler
-      resHandler.setResourceBase(resourceBase.getAbsolutePath)
-
-      val handlerList = new HandlerList
-      handlerList.setHandlers(Array(resHandler, new DefaultHandler))
-
-      if (securityManager.isAuthenticationEnabled()) {
-        logDebug("HttpServer is using security")
-        val sh = setupSecurityHandler(securityManager)
-        // make sure we go through security handler to get resources
-        sh.setHandler(handlerList)
-        server.setHandler(sh)
-      } else {
-        logDebug("HttpServer is not using security")
-        server.setHandler(handlerList)
-      }
-
-      server.start()
-      port = server.getConnectors()(0).getLocalPort()
+      val (actualServer, actualPort) =
+        Utils.startServiceOnPort[Server](requestedPort, doStart, serverName)
+      server = actualServer
+      port = actualPort
     }
   }
 
+  /**
+   * Actually start the HTTP server on the given port.
+   *
+   * Note that this is only best effort in the sense that we may end up binding to a nearby port
+   * in the event of port collision. Return the bound server and the actual port used.
+   */
+  private def doStart(startPort: Int): (Server, Int) = {
+    val server = new Server()
+    val connector = new SocketConnector
+    connector.setMaxIdleTime(60 * 1000)
+    connector.setSoLingerTime(-1)
+    connector.setPort(startPort)
+    server.addConnector(connector)
+
+    val threadPool = new QueuedThreadPool
+    threadPool.setDaemon(true)
+    server.setThreadPool(threadPool)
+    val resHandler = new ResourceHandler
+    resHandler.setResourceBase(resourceBase.getAbsolutePath)
+
+    val handlerList = new HandlerList
+    handlerList.setHandlers(Array(resHandler, new DefaultHandler))
+
+    if (securityManager.isAuthenticationEnabled()) {
+      logDebug("HttpServer is using security")
+      val sh = setupSecurityHandler(securityManager)
+      // make sure we go through security handler to get resources
+      sh.setHandler(handlerList)
+      server.setHandler(sh)
+    } else {
+      logDebug("HttpServer is not using security")
+      server.setHandler(handlerList)
+    }
+
+    server.start()
+    val actualPort = server.getConnectors()(0).getLocalPort
+
+    (server, actualPort)
+  }
+
   /**
    * Setup Jetty to the HashLoginService using a single user with our
    * shared secret. Configure it to use DIGEST-MD5 authentication so that the password
@@ -134,7 +154,7 @@ private[spark] class HttpServer(resourceBase: File, securityManager: SecurityMan
     if (server == null) {
       throw new ServerStateException("Server is not started")
     } else {
-      return "http://" + Utils.localIpAddress + ":" + port
+      "http://" + Utils.localIpAddress + ":" + port
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala b/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala
index f40baa8e43592..5c262bcbddf76 100644
--- a/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala
+++ b/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala
@@ -33,7 +33,7 @@ class InterruptibleIterator[+T](val context: TaskContext, val delegate: Iterator
     // is allowed. The assumption is that Thread.interrupted does not have a memory fence in read
     // (just a volatile field in C), while context.interrupted is a volatile in the JVM, which
     // introduces an expensive read fence.
-    if (context.interrupted) {
+    if (context.isInterrupted) {
       throw new TaskKilledException
     } else {
       delegate.hasNext
diff --git a/core/src/main/scala/org/apache/spark/Logging.scala b/core/src/main/scala/org/apache/spark/Logging.scala
index 50d8e93e1f0d7..d4f2624061e35 100644
--- a/core/src/main/scala/org/apache/spark/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/Logging.scala
@@ -39,16 +39,17 @@ trait Logging {
   // be serialized and used on another machine
   @transient private var log_ : Logger = null
 
+  // Method to get the logger name for this object
+  protected def logName = {
+    // Ignore trailing $'s in the class names for Scala objects
+    this.getClass.getName.stripSuffix("$")
+  }
+
   // Method to get or create the logger for this object
   protected def log: Logger = {
     if (log_ == null) {
       initializeIfNecessary()
-      var className = this.getClass.getName
-      // Ignore trailing $'s in the class names for Scala objects
-      if (className.endsWith("$")) {
-        className = className.substring(0, className.length - 1)
-      }
-      log_ = LoggerFactory.getLogger(className)
+      log_ = LoggerFactory.getLogger(logName)
     }
     log_
   }
@@ -110,23 +111,27 @@ trait Logging {
   }
 
   private def initializeLogging() {
-    // If Log4j is being used, but is not initialized, load a default properties file
-    val binder = StaticLoggerBinder.getSingleton
-    val usingLog4j = binder.getLoggerFactoryClassStr.endsWith("Log4jLoggerFactory")
-    val log4jInitialized = LogManager.getRootLogger.getAllAppenders.hasMoreElements
-    if (!log4jInitialized && usingLog4j) {
+    // Don't use a logger in here, as this is itself occurring during initialization of a logger
+    // If Log4j 1.2 is being used, but is not initialized, load a default properties file
+    val binderClass = StaticLoggerBinder.getSingleton.getLoggerFactoryClassStr
+    // This distinguishes the log4j 1.2 binding, currently
+    // org.slf4j.impl.Log4jLoggerFactory, from the log4j 2.0 binding, currently
+    // org.apache.logging.slf4j.Log4jLoggerFactory
+    val usingLog4j12 = "org.slf4j.impl.Log4jLoggerFactory".equals(binderClass)
+    val log4j12Initialized = LogManager.getRootLogger.getAllAppenders.hasMoreElements
+    if (!log4j12Initialized && usingLog4j12) {
       val defaultLogProps = "org/apache/spark/log4j-defaults.properties"
       Option(Utils.getSparkClassLoader.getResource(defaultLogProps)) match {
         case Some(url) =>
           PropertyConfigurator.configure(url)
-          log.info(s"Using Spark's default log4j profile: $defaultLogProps")
+          System.err.println(s"Using Spark's default log4j profile: $defaultLogProps")
         case None =>
           System.err.println(s"Spark was unable to load $defaultLogProps")
       }
     }
     Logging.initialized = true
 
-    // Force a call into slf4j to initialize it. Avoids this happening from mutliple threads
+    // Force a call into slf4j to initialize it. Avoids this happening from multiple threads
     // and triggering this: http://mailman.qos.ch/pipermail/slf4j-dev/2010-April/002956.html
     log
   }
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 894091761485d..7d96962c4acd7 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -18,10 +18,12 @@
 package org.apache.spark
 
 import java.io._
+import java.util.concurrent.ConcurrentHashMap
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
 import scala.collection.mutable.{HashSet, HashMap, Map}
 import scala.concurrent.Await
+import scala.collection.JavaConversions._
 
 import akka.actor._
 import akka.pattern.ask
@@ -38,10 +40,10 @@ private[spark] case object StopMapOutputTracker extends MapOutputTrackerMessage
 
 /** Actor class for MapOutputTrackerMaster */
 private[spark] class MapOutputTrackerMasterActor(tracker: MapOutputTrackerMaster, conf: SparkConf)
-  extends Actor with Logging {
+  extends Actor with ActorLogReceive with Logging {
   val maxAkkaFrameSize = AkkaUtils.maxFrameSizeBytes(conf)
 
-  def receive = {
+  override def receiveWithLogging = {
     case GetMapOutputStatuses(shuffleId: Int) =>
       val hostPort = sender.path.address.hostPort
       logInfo("Asked to send map output locations for shuffle " + shuffleId + " to " + hostPort)
@@ -84,6 +86,9 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
    * On the master, it serves as the source of map outputs recorded from ShuffleMapTasks.
    * On the workers, it simply serves as a cache, in which a miss triggers a fetch from the
    * master's corresponding HashMap.
+   *
+   * Note: because mapStatuses is accessed concurrently, subclasses should make sure it's a
+   * thread-safe map.
    */
   protected val mapStatuses: Map[Int, Array[MapStatus]]
 
@@ -173,6 +178,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
           return MapOutputTracker.convertMapStatuses(shuffleId, reduceId, fetchedStatuses)
         }
       } else {
+        logError("Missing all output locations for shuffle " + shuffleId)
         throw new MetadataFetchFailedException(
           shuffleId, reduceId, "Missing all output locations for shuffle " + shuffleId)
       }
@@ -339,11 +345,11 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf)
  * MapOutputTrackerMaster.
  */
 private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTracker(conf) {
-  protected val mapStatuses = new HashMap[Int, Array[MapStatus]]
+  protected val mapStatuses: Map[Int, Array[MapStatus]] =
+    new ConcurrentHashMap[Int, Array[MapStatus]]
 }
 
-private[spark] object MapOutputTracker {
-  private val LOG_BASE = 1.1
+private[spark] object MapOutputTracker extends Logging {
 
   // Serialize an array of map output locations into an efficient byte format so that we can send
   // it to reduce tasks. We do this by compressing the serialized bytes using GZIP. They will
@@ -376,37 +382,12 @@ private[spark] object MapOutputTracker {
     statuses.map {
       status =>
         if (status == null) {
+          logError("Missing an output location for shuffle " + shuffleId)
           throw new MetadataFetchFailedException(
             shuffleId, reduceId, "Missing an output location for shuffle " + shuffleId)
         } else {
-          (status.location, decompressSize(status.compressedSizes(reduceId)))
+          (status.location, status.getSizeForBlock(reduceId))
         }
     }
   }
-
-  /**
-   * Compress a size in bytes to 8 bits for efficient reporting of map output sizes.
-   * We do this by encoding the log base 1.1 of the size as an integer, which can support
-   * sizes up to 35 GB with at most 10% error.
-   */
-  def compressSize(size: Long): Byte = {
-    if (size == 0) {
-      0
-    } else if (size <= 1L) {
-      1
-    } else {
-      math.min(255, math.ceil(math.log(size) / math.log(LOG_BASE)).toInt).toByte
-    }
-  }
-
-  /**
-   * Decompress an 8-bit encoded block size, using the reverse operation of compressSize.
-   */
-  def decompressSize(compressedSize: Byte): Long = {
-    if (compressedSize == 0) {
-      0
-    } else {
-      math.pow(LOG_BASE, compressedSize & 0xFF).toLong
-    }
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index ec99648a8488a..e53a78ead2c0e 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -19,11 +19,15 @@ package org.apache.spark
 
 import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
 
-import scala.reflect.ClassTag
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+import scala.reflect.{ClassTag, classTag}
+import scala.util.hashing.byteswap32
 
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{PartitionPruningRDD, RDD}
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.util.{CollectionsUtils, Utils}
+import org.apache.spark.util.random.{XORShiftRandom, SamplingUtils}
 
 /**
  * An object that defines how the elements in a key-value pair RDD are partitioned by key.
@@ -103,26 +107,49 @@ class RangePartitioner[K : Ordering : ClassTag, V](
     private var ascending: Boolean = true)
   extends Partitioner {
 
+  // We allow partitions = 0, which happens when sorting an empty RDD under the default settings.
+  require(partitions >= 0, s"Number of partitions cannot be negative but found $partitions.")
+
   private var ordering = implicitly[Ordering[K]]
 
   // An array of upper bounds for the first (partitions - 1) partitions
   private var rangeBounds: Array[K] = {
-    if (partitions == 1) {
-      Array()
+    if (partitions <= 1) {
+      Array.empty
     } else {
-      val rddSize = rdd.count()
-      val maxSampleSize = partitions * 20.0
-      val frac = math.min(maxSampleSize / math.max(rddSize, 1), 1.0)
-      val rddSample = rdd.sample(false, frac, 1).map(_._1).collect().sorted
-      if (rddSample.length == 0) {
-        Array()
+      // This is the sample size we need to have roughly balanced output partitions, capped at 1M.
+      val sampleSize = math.min(20.0 * partitions, 1e6)
+      // Assume the input partitions are roughly balanced and over-sample a little bit.
+      val sampleSizePerPartition = math.ceil(3.0 * sampleSize / rdd.partitions.size).toInt
+      val (numItems, sketched) = RangePartitioner.sketch(rdd.map(_._1), sampleSizePerPartition)
+      if (numItems == 0L) {
+        Array.empty
       } else {
-        val bounds = new Array[K](partitions - 1)
-        for (i <- 0 until partitions - 1) {
-          val index = (rddSample.length - 1) * (i + 1) / partitions
-          bounds(i) = rddSample(index)
+        // If a partition contains much more than the average number of items, we re-sample from it
+        // to ensure that enough items are collected from that partition.
+        val fraction = math.min(sampleSize / math.max(numItems, 1L), 1.0)
+        val candidates = ArrayBuffer.empty[(K, Float)]
+        val imbalancedPartitions = mutable.Set.empty[Int]
+        sketched.foreach { case (idx, n, sample) =>
+          if (fraction * n > sampleSizePerPartition) {
+            imbalancedPartitions += idx
+          } else {
+            // The weight is 1 over the sampling probability.
+            val weight = (n.toDouble / sample.size).toFloat
+            for (key <- sample) {
+              candidates += ((key, weight))
+            }
+          }
+        }
+        if (imbalancedPartitions.nonEmpty) {
+          // Re-sample imbalanced partitions with the desired sampling probability.
+          val imbalanced = new PartitionPruningRDD(rdd.map(_._1), imbalancedPartitions.contains)
+          val seed = byteswap32(-rdd.id - 1)
+          val reSampled = imbalanced.sample(withReplacement = false, fraction, seed).collect()
+          val weight = (1.0 / fraction).toFloat
+          candidates ++= reSampled.map(x => (x, weight))
         }
-        bounds
+        RangePartitioner.determineBounds(candidates, partitions)
       }
     }
   }
@@ -134,8 +161,8 @@ class RangePartitioner[K : Ordering : ClassTag, V](
   def getPartition(key: Any): Int = {
     val k = key.asInstanceOf[K]
     var partition = 0
-    if (rangeBounds.length < 1000) {
-      // If we have less than 100 partitions naive search
+    if (rangeBounds.length <= 128) {
+      // If we have less than 128 partitions naive search
       while (partition < rangeBounds.length && ordering.gt(k, rangeBounds(partition))) {
         partition += 1
       }
@@ -177,7 +204,7 @@ class RangePartitioner[K : Ordering : ClassTag, V](
   }
 
   @throws(classOf[IOException])
-  private def writeObject(out: ObjectOutputStream) {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     val sfactory = SparkEnv.get.serializer
     sfactory match {
       case js: JavaSerializer => out.defaultWriteObject()
@@ -195,7 +222,7 @@ class RangePartitioner[K : Ordering : ClassTag, V](
   }
 
   @throws(classOf[IOException])
-  private def readObject(in: ObjectInputStream) {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
     val sfactory = SparkEnv.get.serializer
     sfactory match {
       case js: JavaSerializer => in.defaultReadObject()
@@ -212,3 +239,67 @@ class RangePartitioner[K : Ordering : ClassTag, V](
     }
   }
 }
+
+private[spark] object RangePartitioner {
+
+  /**
+   * Sketches the input RDD via reservoir sampling on each partition.
+   *
+   * @param rdd the input RDD to sketch
+   * @param sampleSizePerPartition max sample size per partition
+   * @return (total number of items, an array of (partitionId, number of items, sample))
+   */
+  def sketch[K:ClassTag](
+      rdd: RDD[K],
+      sampleSizePerPartition: Int): (Long, Array[(Int, Int, Array[K])]) = {
+    val shift = rdd.id
+    // val classTagK = classTag[K] // to avoid serializing the entire partitioner object
+    val sketched = rdd.mapPartitionsWithIndex { (idx, iter) =>
+      val seed = byteswap32(idx ^ (shift << 16))
+      val (sample, n) = SamplingUtils.reservoirSampleAndCount(
+        iter, sampleSizePerPartition, seed)
+      Iterator((idx, n, sample))
+    }.collect()
+    val numItems = sketched.map(_._2.toLong).sum
+    (numItems, sketched)
+  }
+
+  /**
+   * Determines the bounds for range partitioning from candidates with weights indicating how many
+   * items each represents. Usually this is 1 over the probability used to sample this candidate.
+   *
+   * @param candidates unordered candidates with weights
+   * @param partitions number of partitions
+   * @return selected bounds
+   */
+  def determineBounds[K:Ordering:ClassTag](
+      candidates: ArrayBuffer[(K, Float)],
+      partitions: Int): Array[K] = {
+    val ordering = implicitly[Ordering[K]]
+    val ordered = candidates.sortBy(_._1)
+    val numCandidates = ordered.size
+    val sumWeights = ordered.map(_._2.toDouble).sum
+    val step = sumWeights / partitions
+    var cumWeight = 0.0
+    var target = step
+    val bounds = ArrayBuffer.empty[K]
+    var i = 0
+    var j = 0
+    var previousBound = Option.empty[K]
+    while ((i < numCandidates) && (j < partitions - 1)) {
+      val (key, weight) = ordered(i)
+      cumWeight += weight
+      if (cumWeight > target) {
+        // Skip duplicate values.
+        if (previousBound.isEmpty || ordering.gt(key, previousBound.get)) {
+          bounds += key
+          target += step
+          j += 1
+          previousBound = Some(key)
+        }
+      }
+      i += 1
+    }
+    bounds.toArray
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 74aa441619bd2..dbff9d12b5ad7 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -22,6 +22,7 @@ import java.net.{Authenticator, PasswordAuthentication}
 import org.apache.hadoop.io.Text
 
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.network.sasl.SecretKeyHolder
 
 /**
  * Spark class responsible for security.
@@ -41,10 +42,19 @@ import org.apache.spark.deploy.SparkHadoopUtil
  * secure the UI if it has data that other users should not be allowed to see. The javax
  * servlet filter specified by the user can authenticate the user and then once the user
  * is logged in, Spark can compare that user versus the view acls to make sure they are
- * authorized to view the UI. The configs 'spark.ui.acls.enable' and 'spark.ui.view.acls'
+ * authorized to view the UI. The configs 'spark.acls.enable' and 'spark.ui.view.acls'
  * control the behavior of the acls. Note that the person who started the application
  * always has view access to the UI.
  *
+ * Spark has a set of modify acls (`spark.modify.acls`) that controls which users have permission
+ * to  modify a single application. This would include things like killing the application. By
+ * default the person who started the application has modify access. For modify access through
+ * the UI, you must have a filter that does authentication in place for the modify acls to work
+ * properly.
+ *
+ * Spark also has a set of admin acls (`spark.admin.acls`) which is a set of users/administrators
+ * who always have permission to view or modify the Spark application.
+ *
  * Spark does not currently support encryption after authentication.
  *
  * At this point spark has multiple communication protocols that need to be secured and
@@ -75,7 +85,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
  *            Authenticator installed in the SecurityManager to how it does the authentication
  *            and in this case gets the user name and password from the request.
  *
- *  - ConnectionManager -> The Spark ConnectionManager uses java nio to asynchronously
+ *  - BlockTransferService -> The Spark BlockTransferServices uses java nio to asynchronously
  *            exchange messages.  For this we use the Java SASL
  *            (Simple Authentication and Security Layer) API and again use DIGEST-MD5
  *            as the authentication mechanism. This means the shared secret is not passed
@@ -89,15 +99,18 @@ import org.apache.spark.deploy.SparkHadoopUtil
  *            of protection they want. If we support those, the messages will also have to
  *            be wrapped and unwrapped via the SaslServer/SaslClient.wrap/unwrap API's.
  *
- *            Since the connectionManager does asynchronous messages passing, the SASL
+ *            Since the NioBlockTransferService does asynchronous messages passing, the SASL
  *            authentication is a bit more complex. A ConnectionManager can be both a client
  *            and a Server, so for a particular connection is has to determine what to do.
  *            A ConnectionId was added to be able to track connections and is used to
  *            match up incoming messages with connections waiting for authentication.
- *            If its acting as a client and trying to send a message to another ConnectionManager,
- *            it blocks the thread calling sendMessage until the SASL negotiation has occurred.
  *            The ConnectionManager tracks all the sendingConnections using the ConnectionId
- *            and waits for the response from the server and does the handshake.
+ *            and waits for the response from the server and does the handshake before sending
+ *            the real message.
+ *
+ *            The NettyBlockTransferService ensures that SASL authentication is performed
+ *            synchronously prior to any other communication on a connection. This is done in
+ *            SaslClientBootstrap on the client side and SaslRpcHandler on the server side.
  *
  *  - HTTP for the Spark UI -> the UI was changed to use servlets so that javax servlet filters
  *            can be used. Yarn requires a specific AmIpFilter be installed for security to work
@@ -131,24 +144,38 @@ import org.apache.spark.deploy.SparkHadoopUtil
  *  can take place.
  */
 
-private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging {
+private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging with SecretKeyHolder {
 
   // key used to store the spark secret in the Hadoop UGI
   private val sparkSecretLookupKey = "sparkCookie"
 
   private val authOn = sparkConf.getBoolean("spark.authenticate", false)
-  private var uiAclsOn = sparkConf.getBoolean("spark.ui.acls.enable", false)
+  // keep spark.ui.acls.enable for backwards compatibility with 1.0
+  private var aclsOn = sparkConf.getOption("spark.acls.enable").getOrElse(
+    sparkConf.get("spark.ui.acls.enable", "false")).toBoolean
+
+  // admin acls should be set before view or modify acls
+  private var adminAcls: Set[String] =
+    stringToSet(sparkConf.get("spark.admin.acls", ""))
 
   private var viewAcls: Set[String] = _
+
+  // list of users who have permission to modify the application. This should
+  // apply to both UI and CLI for things like killing the application.
+  private var modifyAcls: Set[String] = _
+
   // always add the current user and SPARK_USER to the viewAcls
-  private val defaultAclUsers = Seq[String](System.getProperty("user.name", ""),
-    Option(System.getenv("SPARK_USER")).getOrElse(""))
+  private val defaultAclUsers = Set[String](System.getProperty("user.name", ""),
+    Option(System.getenv("SPARK_USER")).getOrElse("")).filter(!_.isEmpty)
+
   setViewAcls(defaultAclUsers, sparkConf.get("spark.ui.view.acls", ""))
+  setModifyAcls(defaultAclUsers, sparkConf.get("spark.modify.acls", ""))
 
   private val secretKey = generateSecretKey()
   logInfo("SecurityManager: authentication " + (if (authOn) "enabled" else "disabled") +
-    "; ui acls " + (if (uiAclsOn) "enabled" else "disabled") +
-    "; users with view permissions: " + viewAcls.toString())
+    "; ui acls " + (if (aclsOn) "enabled" else "disabled") +
+    "; users with view permissions: " + viewAcls.toString() +
+    "; users with modify permissions: " + modifyAcls.toString())
 
   // Set our own authenticator to properly negotiate user/password for HTTP connections.
   // This is needed by the HTTP client fetching from the HttpServer. Put here so its
@@ -169,18 +196,51 @@ private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging {
     )
   }
 
-  private[spark] def setViewAcls(defaultUsers: Seq[String], allowedUsers: String) {
-    viewAcls = (defaultUsers ++ allowedUsers.split(',')).map(_.trim()).filter(!_.isEmpty).toSet 
+  /**
+   * Split a comma separated String, filter out any empty items, and return a Set of strings
+   */
+  private def stringToSet(list: String): Set[String] = {
+    list.split(',').map(_.trim).filter(!_.isEmpty).toSet
+  }
+
+  /**
+   * Admin acls should be set before the view or modify acls.  If you modify the admin
+   * acls you should also set the view and modify acls again to pick up the changes.
+   */
+  def setViewAcls(defaultUsers: Set[String], allowedUsers: String) {
+    viewAcls = (adminAcls ++ defaultUsers ++ stringToSet(allowedUsers))
     logInfo("Changing view acls to: " + viewAcls.mkString(","))
   }
 
-  private[spark] def setViewAcls(defaultUser: String, allowedUsers: String) {
-    setViewAcls(Seq[String](defaultUser), allowedUsers)
+  def setViewAcls(defaultUser: String, allowedUsers: String) {
+    setViewAcls(Set[String](defaultUser), allowedUsers)
+  }
+
+  def getViewAcls: String = viewAcls.mkString(",")
+
+  /**
+   * Admin acls should be set before the view or modify acls.  If you modify the admin
+   * acls you should also set the view and modify acls again to pick up the changes.
+   */
+  def setModifyAcls(defaultUsers: Set[String], allowedUsers: String) {
+    modifyAcls = (adminAcls ++ defaultUsers ++ stringToSet(allowedUsers))
+    logInfo("Changing modify acls to: " + modifyAcls.mkString(","))
+  }
+
+  def getModifyAcls: String = modifyAcls.mkString(",")
+
+  /**
+   * Admin acls should be set before the view or modify acls.  If you modify the admin
+   * acls you should also set the view and modify acls again to pick up the changes.
+   */
+  def setAdminAcls(adminUsers: String) {
+    adminAcls = stringToSet(adminUsers)
+    logInfo("Changing admin acls to: " + adminAcls.mkString(","))
   }
 
-  private[spark] def setUIAcls(aclSetting: Boolean) { 
-    uiAclsOn = aclSetting 
-    logInfo("Changing acls enabled to: " + uiAclsOn)
+  def setAcls(aclSetting: Boolean) {
+    aclsOn = aclSetting
+    logInfo("Changing acls enabled to: " + aclsOn)
   }
 
   /**
@@ -224,22 +284,39 @@ private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging {
    * Check to see if Acls for the UI are enabled
    * @return true if UI authentication is enabled, otherwise false
    */
-  def uiAclsEnabled(): Boolean = uiAclsOn
+  def aclsEnabled(): Boolean = aclsOn
 
   /**
    * Checks the given user against the view acl list to see if they have
-   * authorization to view the UI. If the UI acls must are disabled
-   * via spark.ui.acls.enable, all users have view access.
+   * authorization to view the UI. If the UI acls are disabled
+   * via spark.acls.enable, all users have view access. If the user is null
+   * it is assumed authentication is off and all users have access.
    *
    * @param user to see if is authorized
    * @return true is the user has permission, otherwise false
    */
   def checkUIViewPermissions(user: String): Boolean = {
-    logDebug("user=" + user + " uiAclsEnabled=" + uiAclsEnabled() + " viewAcls=" + 
+    logDebug("user=" + user + " aclsEnabled=" + aclsEnabled() + " viewAcls=" +
       viewAcls.mkString(","))
-    if (uiAclsEnabled() && (user != null) && (!viewAcls.contains(user))) false else true
+    !aclsEnabled || user == null || viewAcls.contains(user)
+  }
+
+  /**
+   * Checks the given user against the modify acl list to see if they have
+   * authorization to modify the application. If the UI acls are disabled
+   * via spark.acls.enable, all users have modify access. If the user is null
+   * it is assumed authentication isn't turned on and all users have access.
+   *
+   * @param user to see if is authorized
+   * @return true is the user has permission, otherwise false
+   */
+  def checkModifyPermissions(user: String): Boolean = {
+    logDebug("user=" + user + " aclsEnabled=" + aclsEnabled() + " modifyAcls=" +
+      modifyAcls.mkString(","))
+    !aclsEnabled || user == null || modifyAcls.contains(user)
   }
 
+
   /**
    * Check to see if authentication for the Spark communication protocols is enabled
    * @return true if authentication is enabled, otherwise false
@@ -265,4 +342,8 @@ private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging {
    * @return the secret key as a String if authentication is enabled, otherwise returns null
    */
   def getSecretKey(): String = secretKey
+
+  // Default SecurityManager only has a single secret key, so ignore appId.
+  override def getSaslUser(appId: String): String = getSaslUser()
+  override def getSecretKey(appId: String): String = getSecretKey()
 }
diff --git a/core/src/main/scala/org/apache/spark/SerializableWritable.scala b/core/src/main/scala/org/apache/spark/SerializableWritable.scala
index e50b9ac2291f9..55cb25946c2ad 100644
--- a/core/src/main/scala/org/apache/spark/SerializableWritable.scala
+++ b/core/src/main/scala/org/apache/spark/SerializableWritable.scala
@@ -24,18 +24,19 @@ import org.apache.hadoop.io.ObjectWritable
 import org.apache.hadoop.io.Writable
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.util.Utils
 
 @DeveloperApi
 class SerializableWritable[T <: Writable](@transient var t: T) extends Serializable {
   def value = t
   override def toString = t.toString
 
-  private def writeObject(out: ObjectOutputStream) {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     out.defaultWriteObject()
     new ObjectWritable(t).write(out)
   }
 
-  private def readObject(in: ObjectInputStream) {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
     in.defaultReadObject()
     val ow = new ObjectWritable()
     ow.setConf(new Configuration())
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 8ce4b91cae8ae..4c6c86c7bad78 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -18,7 +18,8 @@
 package org.apache.spark
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.HashMap
+import scala.collection.mutable.{HashMap, LinkedHashSet}
+import org.apache.spark.serializer.KryoSerializer
 
 /**
  * Configuration for a Spark application. Used to set various Spark parameters as key-value pairs.
@@ -40,10 +41,12 @@ import scala.collection.mutable.HashMap
  */
 class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
 
+  import SparkConf._
+
   /** Create a SparkConf that loads defaults from system properties and the classpath */
   def this() = this(true)
 
-  private val settings = new HashMap[String, String]()
+  private[spark] val settings = new HashMap[String, String]()
 
   if (loadDefaults) {
     // Load any spark.* system properties
@@ -138,6 +141,20 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
     this
   }
 
+  /**
+   * Use Kryo serialization and register the given set of classes with Kryo.
+   * If called multiple times, this will append the classes from all calls together.
+   */
+  def registerKryoClasses(classes: Array[Class[_]]): SparkConf = {
+    val allClassNames = new LinkedHashSet[String]()
+    allClassNames ++= get("spark.kryo.classesToRegister", "").split(',').filter(!_.isEmpty)
+    allClassNames ++= classes.map(_.getName)
+
+    set("spark.kryo.classesToRegister", allClassNames.mkString(","))
+    set("spark.serializer", classOf[KryoSerializer].getName)
+    this
+  }
+
   /** Remove a parameter from the configuration */
   def remove(key: String): SparkConf = {
     settings.remove(key)
@@ -198,7 +215,13 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
      *
      *   E.g. spark.akka.option.x.y.x = "value"
      */
-    getAll.filter {case (k, v) => k.startsWith("akka.")}
+    getAll.filter { case (k, _) => isAkkaConf(k) }
+
+  /**
+   * Returns the Spark application id, valid in the Driver after TaskScheduler registration and
+   * from the start in the Executor.
+   */
+  def getAppId: String = get("spark.app.id")
 
   /** Does the configuration contain a given parameter? */
   def contains(key: String): Boolean = settings.contains(key)
@@ -208,6 +231,12 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
     new SparkConf(false).setAll(settings)
   }
 
+  /**
+   * By using this instead of System.getenv(), environment variables can be mocked
+   * in unit tests.
+   */
+  private[spark] def getenv(name: String): String = System.getenv(name)
+
   /** Checks for illegal or deprecated config settings. Throws an exception for the former. Not
     * idempotent - may mutate this conf object to convert deprecated settings to supported ones. */
   private[spark] def validateSettings() {
@@ -221,11 +250,24 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
     val executorClasspathKey = "spark.executor.extraClassPath"
     val driverOptsKey = "spark.driver.extraJavaOptions"
     val driverClassPathKey = "spark.driver.extraClassPath"
+    val driverLibraryPathKey = "spark.driver.extraLibraryPath"
+
+    // Used by Yarn in 1.1 and before
+    sys.props.get("spark.driver.libraryPath").foreach { value =>
+      val warning =
+        s"""
+          |spark.driver.libraryPath was detected (set to '$value').
+          |This is deprecated in Spark 1.2+.
+          |
+          |Please instead use: $driverLibraryPathKey
+        """.stripMargin
+      logWarning(warning)
+    }
 
     // Validate spark.executor.extraJavaOptions
     settings.get(executorOptsKey).map { javaOpts =>
       if (javaOpts.contains("-Dspark")) {
-        val msg = s"$executorOptsKey is not allowed to set Spark options (was '$javaOpts)'. " +
+        val msg = s"$executorOptsKey is not allowed to set Spark options (was '$javaOpts'). " +
           "Set them directly on a SparkConf or in a properties file when using ./bin/spark-submit."
         throw new Exception(msg)
       }
@@ -236,6 +278,20 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
       }
     }
 
+    // Validate memory fractions
+    val memoryKeys = Seq(
+      "spark.storage.memoryFraction",
+      "spark.shuffle.memoryFraction", 
+      "spark.shuffle.safetyFraction",
+      "spark.storage.unrollFraction",
+      "spark.storage.safetyFraction")
+    for (key <- memoryKeys) {
+      val value = getDouble(key, 0.5)
+      if (value > 1 || value < 0) {
+        throw new IllegalArgumentException("$key should be between 0 and 1 (was '$value').")
+      }
+    }
+
     // Check for legacy configs
     sys.env.get("SPARK_JAVA_OPTS").foreach { value =>
       val warning =
@@ -292,3 +348,29 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
     settings.toArray.sorted.map{case (k, v) => k + "=" + v}.mkString("\n")
   }
 }
+
+private[spark] object SparkConf {
+  /**
+   * Return whether the given config is an akka config (e.g. akka.actor.provider).
+   * Note that this does not include spark-specific akka configs (e.g. spark.akka.timeout).
+   */
+  def isAkkaConf(name: String): Boolean = name.startsWith("akka.")
+
+  /**
+   * Return whether the given config should be passed to an executor on start-up.
+   *
+   * Certain akka and authentication configs are required of the executor when it connects to
+   * the scheduler, while the rest of the spark configs can be inherited from the driver later.
+   */
+  def isExecutorStartupConf(name: String): Boolean = {
+    isAkkaConf(name) ||
+    name.startsWith("spark.akka") ||
+    name.startsWith("spark.auth") ||
+    isSparkPortConf(name)
+  }
+
+  /**
+   * Return whether the given config is a Spark port config.
+   */
+  def isSparkPortConf(name: String): Boolean = name.startsWith("spark.") && name.endsWith(".port")
+}
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 8819e73d17fb2..65edeeffb837a 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -21,8 +21,8 @@ import scala.language.implicitConversions
 
 import java.io._
 import java.net.URI
+import java.util.{Arrays, Properties, UUID}
 import java.util.concurrent.atomic.AtomicInteger
-import java.util.{Properties, UUID}
 import java.util.UUID.randomUUID
 import scala.collection.{Map, Set}
 import scala.collection.JavaConversions._
@@ -36,20 +36,23 @@ import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf, Sequence
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat, Job => NewHadoopJob}
 import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat}
 import org.apache.mesos.MesosNativeLibrary
+import akka.actor.Props
 
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
-import org.apache.spark.input.WholeTextFileInputFormat
+import org.apache.spark.executor.TriggerThreadDump
+import org.apache.spark.input.{StreamInputFormat, PortableDataStream, WholeTextFileInputFormat, FixedLengthBinaryInputFormat}
 import org.apache.spark.partial.{ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, SparkDeploySchedulerBackend, SimrSchedulerBackend}
 import org.apache.spark.scheduler.cluster.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
 import org.apache.spark.scheduler.local.LocalBackend
-import org.apache.spark.storage.{BlockManagerSource, RDDInfo, StorageStatus, StorageUtils}
+import org.apache.spark.storage._
 import org.apache.spark.ui.SparkUI
-import org.apache.spark.util.{CallSite, ClosureCleaner, MetadataCleaner, MetadataCleanerType, TimeStampedWeakValueHashMap, Utils}
+import org.apache.spark.ui.jobs.JobProgressListener
+import org.apache.spark.util._
 
 /**
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
@@ -186,6 +189,15 @@ class SparkContext(config: SparkConf) extends Logging {
   val master = conf.get("spark.master")
   val appName = conf.get("spark.app.name")
 
+  private[spark] val isEventLogEnabled = conf.getBoolean("spark.eventLog.enabled", false)
+  private[spark] val eventLogDir: Option[String] = {
+    if (isEventLogEnabled) {
+      Some(conf.get("spark.eventLog.dir", EventLoggingListener.DEFAULT_LOG_DIR).stripSuffix("/"))
+    } else {
+      None
+    }
+  }
+
   // Generate the random name for a temp folder in Tachyon
   // Add a timestamp as the suffix here to make it more safe
   val tachyonFolderName = "spark-" + randomUUID.toString()
@@ -198,15 +210,10 @@ class SparkContext(config: SparkConf) extends Logging {
   // An asynchronous listener bus for Spark events
   private[spark] val listenerBus = new LiveListenerBus
 
+  conf.set("spark.executor.id", "driver")
+
   // Create the Spark execution environment (cache, map output tracker, etc)
-  private[spark] val env = SparkEnv.create(
-    conf,
-    "<driver>",
-    conf.get("spark.driver.host"),
-    conf.get("spark.driver.port").toInt,
-    isDriver = true,
-    isLocal = isLocal,
-    listenerBus = listenerBus)
+  private[spark] val env = SparkEnv.createDriverEnv(conf, isLocal, listenerBus)
   SparkEnv.set(env)
 
   // Used to store a URL for each static file/jar together with the file's local timestamp
@@ -218,44 +225,28 @@ class SparkContext(config: SparkConf) extends Logging {
   private[spark] val metadataCleaner =
     new MetadataCleaner(MetadataCleanerType.SPARK_CONTEXT, this.cleanup, conf)
 
-  // Initialize the Spark UI, registering all associated listeners
-  private[spark] val ui = new SparkUI(this)
-  ui.bind()
 
-  /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
-  val hadoopConfiguration: Configuration = {
-    val hadoopConf = SparkHadoopUtil.get.newConfiguration()
-    // Explicitly check for S3 environment variables
-    if (System.getenv("AWS_ACCESS_KEY_ID") != null &&
-        System.getenv("AWS_SECRET_ACCESS_KEY") != null) {
-      hadoopConf.set("fs.s3.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
-      hadoopConf.set("fs.s3n.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
-      hadoopConf.set("fs.s3.awsSecretAccessKey", System.getenv("AWS_SECRET_ACCESS_KEY"))
-      hadoopConf.set("fs.s3n.awsSecretAccessKey", System.getenv("AWS_SECRET_ACCESS_KEY"))
-    }
-    // Copy any "spark.hadoop.foo=bar" system properties into conf as "foo=bar"
-    conf.getAll.foreach { case (key, value) =>
-      if (key.startsWith("spark.hadoop.")) {
-        hadoopConf.set(key.substring("spark.hadoop.".length), value)
-      }
+  private[spark] val jobProgressListener = new JobProgressListener(conf)
+  listenerBus.addListener(jobProgressListener)
+
+  val statusTracker = new SparkStatusTracker(this)
+
+  // Initialize the Spark UI
+  private[spark] val ui: Option[SparkUI] =
+    if (conf.getBoolean("spark.ui.enabled", true)) {
+      Some(SparkUI.createLiveUI(this, conf, listenerBus, jobProgressListener,
+        env.securityManager,appName))
+    } else {
+      // For tests, do not enable the UI
+      None
     }
-    val bufferSize = conf.get("spark.buffer.size", "65536")
-    hadoopConf.set("io.file.buffer.size", bufferSize)
-    hadoopConf
-  }
 
-  // Optionally log Spark events
-  private[spark] val eventLogger: Option[EventLoggingListener] = {
-    if (conf.getBoolean("spark.eventLog.enabled", false)) {
-      val logger = new EventLoggingListener(appName, conf, hadoopConfiguration)
-      logger.start()
-      listenerBus.addListener(logger)
-      Some(logger)
-    } else None
-  }
+  // Bind the UI before starting the task scheduler to communicate
+  // the bound port to the cluster manager properly
+  ui.foreach(_.bind())
 
-  // At this point, all relevant SparkListeners have been registered, so begin releasing events
-  listenerBus.start()
+  /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
+  val hadoopConfiguration = SparkHadoopUtil.get.newConfiguration(conf)
 
   val startTime = System.currentTimeMillis()
 
@@ -289,7 +280,7 @@ class SparkContext(config: SparkConf) extends Logging {
     value <- Option(System.getenv(envKey)).orElse(Option(System.getProperty(propKey)))} {
     executorEnvs(envKey) = value
   }
-  Option(System.getenv("SPARK_PREPEND_CLASSES")).foreach { v => 
+  Option(System.getenv("SPARK_PREPEND_CLASSES")).foreach { v =>
     executorEnvs("SPARK_PREPEND_CLASSES") = v
   }
   // The Mesos scheduler backend relies on this environment variable to set executor memory.
@@ -306,7 +297,10 @@ class SparkContext(config: SparkConf) extends Logging {
   executorEnvs("SPARK_USER") = sparkUser
 
   // Create and start the scheduler
-  private[spark] var taskScheduler = SparkContext.createTaskScheduler(this, master)
+  private[spark] var (schedulerBackend, taskScheduler) =
+    SparkContext.createTaskScheduler(this, master)
+  private val heartbeatReceiver = env.actorSystem.actorOf(
+    Props(new HeartbeatReceiver(taskScheduler)), "HeartbeatReceiver")
   @volatile private[spark] var dagScheduler: DAGScheduler = _
   try {
     dagScheduler = new DAGScheduler(this)
@@ -319,6 +313,40 @@ class SparkContext(config: SparkConf) extends Logging {
   // constructor
   taskScheduler.start()
 
+  val applicationId: String = taskScheduler.applicationId()
+  conf.set("spark.app.id", applicationId)
+
+  env.blockManager.initialize(applicationId)
+
+  val metricsSystem = env.metricsSystem
+
+  // The metrics system for Driver need to be set spark.app.id to app ID.
+  // So it should start after we get app ID from the task scheduler and set spark.app.id.
+  metricsSystem.start()
+
+  // Optionally log Spark events
+  private[spark] val eventLogger: Option[EventLoggingListener] = {
+    if (isEventLogEnabled) {
+      val logger =
+        new EventLoggingListener(applicationId, eventLogDir.get, conf, hadoopConfiguration)
+      logger.start()
+      listenerBus.addListener(logger)
+      Some(logger)
+    } else None
+  }
+
+  // Optionally scale number of executors dynamically based on workload. Exposed for testing.
+  private[spark] val executorAllocationManager: Option[ExecutorAllocationManager] =
+    if (conf.getBoolean("spark.dynamicAllocation.enabled", false)) {
+      Some(new ExecutorAllocationManager(this))
+    } else {
+      None
+    }
+  executorAllocationManager.foreach(_.start())
+
+  // At this point, all relevant SparkListeners have been registered, so begin releasing events
+  listenerBus.start()
+
   private[spark] val cleaner: Option[ContextCleaner] = {
     if (conf.getBoolean("spark.cleaner.referenceTracking", true)) {
       Some(new ContextCleaner(this))
@@ -338,6 +366,29 @@ class SparkContext(config: SparkConf) extends Logging {
     override protected def childValue(parent: Properties): Properties = new Properties(parent)
   }
 
+  /**
+   * Called by the web UI to obtain executor thread dumps.  This method may be expensive.
+   * Logs an error and returns None if we failed to obtain a thread dump, which could occur due
+   * to an executor being dead or unresponsive or due to network issues while sending the thread
+   * dump message back to the driver.
+   */
+  private[spark] def getExecutorThreadDump(executorId: String): Option[Array[ThreadStackTrace]] = {
+    try {
+      if (executorId == SparkContext.DRIVER_IDENTIFIER) {
+        Some(Utils.getThreadDump())
+      } else {
+        val (host, port) = env.blockManager.master.getActorSystemHostPortForExecutor(executorId).get
+        val actorRef = AkkaUtils.makeExecutorRef("ExecutorActor", conf, host, port, env.actorSystem)
+        Some(AkkaUtils.askWithReply[Array[ThreadStackTrace]](TriggerThreadDump, actorRef,
+          AkkaUtils.numRetries(conf), AkkaUtils.retryWaitMs(conf), AkkaUtils.askTimeout(conf)))
+      }
+    } catch {
+      case e: Exception =>
+        logError(s"Exception getting thread dump from executor $executorId", e)
+        None
+    }
+  }
+
   private[spark] def getLocalProperties: Properties = localProperties.get()
 
   private[spark] def setLocalProperties(props: Properties) {
@@ -421,8 +472,8 @@ class SparkContext(config: SparkConf) extends Logging {
   // Post init
   taskScheduler.postStartHook()
 
-  private val dagSchedulerSource = new DAGSchedulerSource(this.dagScheduler, this)
-  private val blockManagerSource = new BlockManagerSource(SparkEnv.get.blockManager, this)
+  private val dagSchedulerSource = new DAGSchedulerSource(this.dagScheduler)
+  private val blockManagerSource = new BlockManagerSource(SparkEnv.get.blockManager)
 
   private def initDriverMetrics() {
     SparkEnv.get.metricsSystem.registerSource(dagSchedulerSource)
@@ -455,7 +506,7 @@ class SparkContext(config: SparkConf) extends Logging {
   /** Distribute a local Scala collection to form an RDD, with one or more
     * location preferences (hostnames of Spark nodes) for each object.
     * Create a new partition for each collection item. */
-   def makeRDD[T: ClassTag](seq: Seq[(T, Seq[String])]): RDD[T] = {
+  def makeRDD[T: ClassTag](seq: Seq[(T, Seq[String])]): RDD[T] = {
     val indexToPrefs = seq.zipWithIndex.map(t => (t._2, t._1._2)).toMap
     new ParallelCollectionRDD[T](this, seq.map(_._1), seq.size, indexToPrefs)
   }
@@ -510,6 +561,73 @@ class SparkContext(config: SparkConf) extends Logging {
       minPartitions).setName(path)
   }
 
+
+  /**
+   * :: Experimental ::
+   *
+   * Get an RDD for a Hadoop-readable dataset as PortableDataStream for each file
+   * (useful for binary data)
+   *
+   * For example, if you have the following files:
+   * {{{
+   *   hdfs://a-hdfs-path/part-00000
+   *   hdfs://a-hdfs-path/part-00001
+   *   ...
+   *   hdfs://a-hdfs-path/part-nnnnn
+   * }}}
+   *
+   * Do
+   * `val rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+   *
+   * then `rdd` contains
+   * {{{
+   *   (a-hdfs-path/part-00000, its content)
+   *   (a-hdfs-path/part-00001, its content)
+   *   ...
+   *   (a-hdfs-path/part-nnnnn, its content)
+   * }}}
+   *
+   * @param minPartitions A suggestion value of the minimal splitting number for input data.
+   *
+   * @note Small files are preferred; very large files may cause bad performance.
+   */
+  @Experimental
+  def binaryFiles(path: String, minPartitions: Int = defaultMinPartitions):
+      RDD[(String, PortableDataStream)] = {
+    val job = new NewHadoopJob(hadoopConfiguration)
+    NewFileInputFormat.addInputPath(job, new Path(path))
+    val updateConf = job.getConfiguration
+    new BinaryFileRDD(
+      this,
+      classOf[StreamInputFormat],
+      classOf[String],
+      classOf[PortableDataStream],
+      updateConf,
+      minPartitions).setName(path)
+  }
+
+  /**
+   * :: Experimental ::
+   *
+   * Load data from a flat binary file, assuming the length of each record is constant.
+   *
+   * @param path Directory to the input data files
+   * @param recordLength The length at which to split the records
+   * @return An RDD of data with values, represented as byte arrays
+   */
+  @Experimental
+  def binaryRecords(path: String, recordLength: Int, conf: Configuration = hadoopConfiguration)
+      : RDD[Array[Byte]] = {
+    conf.setInt(FixedLengthBinaryInputFormat.RECORD_LENGTH_PROPERTY, recordLength)
+    val br = newAPIHadoopFile[LongWritable, BytesWritable, FixedLengthBinaryInputFormat](path,
+      classOf[FixedLengthBinaryInputFormat],
+      classOf[LongWritable],
+      classOf[BytesWritable],
+      conf=conf)
+    val data = br.map{ case (k, v) => v.getBytes}
+    data
+  }
+
   /**
    * Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf given its InputFormat and other
    * necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable),
@@ -757,15 +875,34 @@ class SparkContext(config: SparkConf) extends Logging {
   def accumulator[T](initialValue: T)(implicit param: AccumulatorParam[T]) =
     new Accumulator(initialValue, param)
 
+  /**
+   * Create an [[org.apache.spark.Accumulator]] variable of a given type, with a name for display
+   * in the Spark UI. Tasks can "add" values to the accumulator using the `+=` method. Only the
+   * driver can access the accumulator's `value`.
+   */
+  def accumulator[T](initialValue: T, name: String)(implicit param: AccumulatorParam[T]) = {
+    new Accumulator(initialValue, param, Some(name))
+  }
+
   /**
    * Create an [[org.apache.spark.Accumulable]] shared variable, to which tasks can add values
    * with `+=`. Only the driver can access the accumuable's `value`.
-   * @tparam T accumulator type
-   * @tparam R type that can be added to the accumulator
+   * @tparam R accumulator result type
+   * @tparam T type that can be added to the accumulator
    */
-  def accumulable[T, R](initialValue: T)(implicit param: AccumulableParam[T, R]) =
+  def accumulable[R, T](initialValue: R)(implicit param: AccumulableParam[R, T]) =
     new Accumulable(initialValue, param)
 
+  /**
+   * Create an [[org.apache.spark.Accumulable]] shared variable, with a name for display in the
+   * Spark UI. Tasks can add values to the accumuable using the `+=` operator. Only the driver can
+   * access the accumuable's `value`.
+   * @tparam R accumulator result type
+   * @tparam T type that can be added to the accumulator
+   */
+  def accumulable[R, T](initialValue: R, name: String)(implicit param: AccumulableParam[R, T]) =
+    new Accumulable(initialValue, param, Some(name))
+
   /**
    * Create an accumulator from a "mutable collection" type.
    *
@@ -785,6 +922,8 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   def broadcast[T: ClassTag](value: T): Broadcast[T] = {
     val bc = env.broadcastManager.newBroadcast[T](value, isLocal)
+    val callSite = getCallSite
+    logInfo("Created broadcast " + bc.id + " from " + callSite.shortForm)
     cleaner.foreach(_.registerBroadcastForCleanup(bc))
     bc
   }
@@ -793,7 +932,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Add a file to be downloaded with this Spark job on every node.
    * The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported
    * filesystems), or an HTTP, HTTPS or FTP URI.  To access the file in Spark jobs,
-   * use `SparkFiles.get(path)` to find its download location.
+   * use `SparkFiles.get(fileName)` to find its download location.
    */
   def addFile(path: String) {
     val uri = new URI(path)
@@ -802,10 +941,12 @@ class SparkContext(config: SparkConf) extends Logging {
       case "local"       => "file:" + uri.getPath
       case _             => path
     }
-    addedFiles(key) = System.currentTimeMillis
+    val timestamp = System.currentTimeMillis
+    addedFiles(key) = timestamp
 
     // Fetch the file locally in case a job is executed using DAGScheduler.runLocally().
-    Utils.fetchFile(path, new File(SparkFiles.getRootDirectory()), conf, env.securityManager)
+    Utils.fetchFile(path, new File(SparkFiles.getRootDirectory()), conf, env.securityManager,
+      hadoopConfiguration, timestamp, useCache = false)
 
     logInfo("Added file " + path + " at " + key + " with timestamp " + addedFiles(key))
     postEnvironmentUpdate()
@@ -820,8 +961,48 @@ class SparkContext(config: SparkConf) extends Logging {
     listenerBus.addListener(listener)
   }
 
+  /**
+   * :: DeveloperApi ::
+   * Request an additional number of executors from the cluster manager.
+   * This is currently only supported in Yarn mode. Return whether the request is received.
+   */
+  @DeveloperApi
+  def requestExecutors(numAdditionalExecutors: Int): Boolean = {
+    schedulerBackend match {
+      case b: CoarseGrainedSchedulerBackend =>
+        b.requestExecutors(numAdditionalExecutors)
+      case _ =>
+        logWarning("Requesting executors is only supported in coarse-grained mode")
+        false
+    }
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Request that the cluster manager kill the specified executors.
+   * This is currently only supported in Yarn mode. Return whether the request is received.
+   */
+  @DeveloperApi
+  def killExecutors(executorIds: Seq[String]): Boolean = {
+    schedulerBackend match {
+      case b: CoarseGrainedSchedulerBackend =>
+        b.killExecutors(executorIds)
+      case _ =>
+        logWarning("Killing executors is only supported in coarse-grained mode")
+        false
+    }
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Request that cluster manager the kill the specified executor.
+   * This is currently only supported in Yarn mode. Return whether the request is received.
+   */
+  @DeveloperApi
+  def killExecutor(executorId: String): Boolean = killExecutors(Seq(executorId))
+
   /** The version of Spark on which this application is running. */
-  def version = SparkContext.SPARK_VERSION
+  def version = SPARK_VERSION
 
   /**
    * Return a map from the slave to the max memory available for caching and the remaining
@@ -840,7 +1021,9 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   @DeveloperApi
   def getRDDStorageInfo: Array[RDDInfo] = {
-    StorageUtils.rddInfoFromStorageStatus(getExecutorStorageStatus, this)
+    val rddInfos = persistentRdds.values.map(RDDInfo.fromRdd).toArray
+    StorageUtils.updateRddInfo(rddInfos, getExecutorStorageStatus)
+    rddInfos.filter(_.isCached)
   }
 
   /**
@@ -984,21 +1167,21 @@ class SparkContext(config: SparkConf) extends Logging {
   /** Shut down the SparkContext. */
   def stop() {
     postApplicationEnd()
-    ui.stop()
+    ui.foreach(_.stop())
     // Do this only if not stopped already - best case effort.
     // prevent NPE if stopped more than once.
     val dagSchedulerCopy = dagScheduler
     dagScheduler = null
     if (dagSchedulerCopy != null) {
+      env.metricsSystem.report()
       metadataCleaner.cancel()
+      env.actorSystem.stop(heartbeatReceiver)
       cleaner.foreach(_.stop())
       dagSchedulerCopy.stop()
       taskScheduler = null
       // TODO: Cache.stop()?
       env.stop()
       SparkEnv.set(null)
-      ShuffleMapTask.clearCache()
-      ResultTask.clearCache()
       listenerBus.stop()
       eventLogger.foreach(_.stop())
       logInfo("Successfully stopped SparkContext")
@@ -1018,28 +1201,40 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
-   * Support function for API backtraces.
+   * Set the thread-local property for overriding the call sites
+   * of actions and RDDs.
    */
-  def setCallSite(site: String) {
-    setLocalProperty("externalCallSite", site)
+  def setCallSite(shortCallSite: String) {
+    setLocalProperty(CallSite.SHORT_FORM, shortCallSite)
   }
 
   /**
-   * Support function for API backtraces.
+   * Set the thread-local property for overriding the call sites
+   * of actions and RDDs.
+   */
+  private[spark] def setCallSite(callSite: CallSite) {
+    setLocalProperty(CallSite.SHORT_FORM, callSite.shortForm)
+    setLocalProperty(CallSite.LONG_FORM, callSite.longForm)
+  }
+
+  /**
+   * Clear the thread-local property for overriding the call sites
+   * of actions and RDDs.
    */
   def clearCallSite() {
-    setLocalProperty("externalCallSite", null)
+    setLocalProperty(CallSite.SHORT_FORM, null)
+    setLocalProperty(CallSite.LONG_FORM, null)
   }
 
   /**
    * Capture the current user callsite and return a formatted version for printing. If the user
-   * has overridden the call site, this will return the user's version.
+   * has overridden the call site using `setCallSite()`, this will return the user's version.
    */
   private[spark] def getCallSite(): CallSite = {
-    Option(getLocalProperty("externalCallSite")) match {
-      case Some(callSite) => CallSite(callSite, long = "")
-      case None => Utils.getCallSite
-    }
+    Option(getLocalProperty(CallSite.SHORT_FORM)).map { case shortCallSite =>
+      val longCallSite = Option(getLocalProperty(CallSite.LONG_FORM)).getOrElse("")
+      CallSite(shortCallSite, longCallSite)
+    }.getOrElse(Utils.getCallSite())
   }
 
   /**
@@ -1059,11 +1254,9 @@ class SparkContext(config: SparkConf) extends Logging {
     }
     val callSite = getCallSite
     val cleanedFunc = clean(func)
-    logInfo("Starting job: " + callSite.short)
-    val start = System.nanoTime
+    logInfo("Starting job: " + callSite.shortForm)
     dagScheduler.runJob(rdd, cleanedFunc, partitions, callSite, allowLocal,
       resultHandler, localProperties.get)
-    logInfo("Job finished: " + callSite.short + ", took " + (System.nanoTime - start) / 1e9 + " s")
     rdd.doCheckpoint()
   }
 
@@ -1144,11 +1337,12 @@ class SparkContext(config: SparkConf) extends Logging {
       evaluator: ApproximateEvaluator[U, R],
       timeout: Long): PartialResult[R] = {
     val callSite = getCallSite
-    logInfo("Starting job: " + callSite.short)
+    logInfo("Starting job: " + callSite.shortForm)
     val start = System.nanoTime
     val result = dagScheduler.runApproximateJob(rdd, func, evaluator, callSite, timeout,
       localProperties.get)
-    logInfo("Job finished: " + callSite.short + ", took " + (System.nanoTime - start) / 1e9 + " s")
+    logInfo(
+      "Job finished: " + callSite.shortForm + ", took " + (System.nanoTime - start) / 1e9 + " s")
     result
   }
 
@@ -1203,10 +1397,10 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Clean a closure to make it ready to serialized and send to tasks
    * (removes unreferenced variables in $outer's, updates REPL variables)
-   * If <tt>checkSerializable</tt> is set, <tt>clean</tt> will also proactively 
-   * check to see if <tt>f</tt> is serializable and throw a <tt>SparkException</tt> 
+   * If <tt>checkSerializable</tt> is set, <tt>clean</tt> will also proactively
+   * check to see if <tt>f</tt> is serializable and throw a <tt>SparkException</tt>
    * if not.
-   * 
+   *
    * @param f the closure to clean
    * @param checkSerializable whether or not to immediately check <tt>f</tt> for serializability
    * @throws <tt>SparkException<tt> if <tt>checkSerializable</tt> is set but <tt>f</tt> is not
@@ -1253,7 +1447,10 @@ class SparkContext(config: SparkConf) extends Logging {
 
   /** Post the application start event */
   private def postApplicationStart() {
-    listenerBus.post(SparkListenerApplicationStart(appName, startTime, sparkUser))
+    // Note: this code assumes that the task scheduler has been initialized and has contacted
+    // the cluster manager to get an application ID (in case the cluster manager provides one).
+    listenerBus.post(SparkListenerApplicationStart(appName, Some(applicationId),
+      startTime, sparkUser))
   }
 
   /** Post the application end event */
@@ -1286,8 +1483,6 @@ class SparkContext(config: SparkConf) extends Logging {
  */
 object SparkContext extends Logging {
 
-  private[spark] val SPARK_VERSION = "1.0.0"
-
   private[spark] val SPARK_JOB_DESCRIPTION = "spark.job.description"
 
   private[spark] val SPARK_JOB_GROUP_ID = "spark.jobGroup.id"
@@ -1296,6 +1491,8 @@ object SparkContext extends Logging {
 
   private[spark] val SPARK_UNKNOWN_USER = "<unknown>"
 
+  private[spark] val DRIVER_IDENTIFIER = "<driver>"
+
   implicit object DoubleAccumulatorParam extends AccumulatorParam[Double] {
     def addInPlace(t1: Double, t2: Double): Double = t1 + t2
     def zero(initialValue: Double) = 0.0
@@ -1385,7 +1582,10 @@ object SparkContext extends Logging {
     simpleWritableConverter[Boolean, BooleanWritable](_.get)
 
   implicit def bytesWritableConverter(): WritableConverter[Array[Byte]] = {
-    simpleWritableConverter[Array[Byte], BytesWritable](_.getBytes)
+    simpleWritableConverter[Array[Byte], BytesWritable](bw =>
+      // getBytes method returns array which is longer then data to be returned
+      Arrays.copyOfRange(bw.getBytes, 0, bw.getLength)
+    )
   }
 
   implicit def stringWritableConverter(): WritableConverter[String] =
@@ -1448,12 +1648,17 @@ object SparkContext extends Logging {
     res
   }
 
-  /** Creates a task scheduler based on a given master URL. Extracted for testing. */
-  private def createTaskScheduler(sc: SparkContext, master: String): TaskScheduler = {
+  /**
+   * Create a task scheduler based on a given master URL.
+   * Return a 2-tuple of the scheduler backend and the task scheduler.
+   */
+  private def createTaskScheduler(
+      sc: SparkContext,
+      master: String): (SchedulerBackend, TaskScheduler) = {
     // Regular expression used for local[N] and local[*] master formats
-    val LOCAL_N_REGEX = """local\[([0-9\*]+)\]""".r
+    val LOCAL_N_REGEX = """local\[([0-9]+|\*)\]""".r
     // Regular expression for local[N, maxRetries], used in tests with failing tasks
-    val LOCAL_N_FAILURES_REGEX = """local\[([0-9]+)\s*,\s*([0-9]+)\]""".r
+    val LOCAL_N_FAILURES_REGEX = """local\[([0-9]+|\*)\s*,\s*([0-9]+)\]""".r
     // Regular expression for simulating a Spark cluster of [N, cores, memory] locally
     val LOCAL_CLUSTER_REGEX = """local-cluster\[\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*]""".r
     // Regular expression for connecting to Spark deploy clusters
@@ -1471,7 +1676,7 @@ object SparkContext extends Logging {
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalBackend(scheduler, 1)
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case LOCAL_N_REGEX(threads) =>
         def localCpuCount = Runtime.getRuntime.availableProcessors()
@@ -1480,20 +1685,24 @@ object SparkContext extends Logging {
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalBackend(scheduler, threadCount)
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case LOCAL_N_FAILURES_REGEX(threads, maxFailures) =>
+        def localCpuCount = Runtime.getRuntime.availableProcessors()
+        // local[*, M] means the number of cores on the computer with M failures
+        // local[N, M] means exactly N threads with M failures
+        val threadCount = if (threads == "*") localCpuCount else threads.toInt
         val scheduler = new TaskSchedulerImpl(sc, maxFailures.toInt, isLocal = true)
-        val backend = new LocalBackend(scheduler, threads.toInt)
+        val backend = new LocalBackend(scheduler, threadCount)
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case SPARK_REGEX(sparkUrl) =>
         val scheduler = new TaskSchedulerImpl(sc)
         val masterUrls = sparkUrl.split(",").map("spark://" + _)
         val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls)
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
         // Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
@@ -1513,7 +1722,7 @@ object SparkContext extends Logging {
         backend.shutdownCallback = (backend: SparkDeploySchedulerBackend) => {
           localCluster.stop()
         }
-        scheduler
+        (backend, scheduler)
 
       case "yarn-standalone" | "yarn-cluster" =>
         if (master == "yarn-standalone") {
@@ -1531,9 +1740,18 @@ object SparkContext extends Logging {
             throw new SparkException("YARN mode not available ?", e)
           }
         }
-        val backend = new CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
+        val backend = try {
+          val clazz =
+            Class.forName("org.apache.spark.scheduler.cluster.YarnClusterSchedulerBackend")
+          val cons = clazz.getConstructor(classOf[TaskSchedulerImpl], classOf[SparkContext])
+          cons.newInstance(scheduler, sc).asInstanceOf[CoarseGrainedSchedulerBackend]
+        } catch {
+          case e: Exception => {
+            throw new SparkException("YARN mode not available ?", e)
+          }
+        }
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case "yarn-client" =>
         val scheduler = try {
@@ -1560,7 +1778,7 @@ object SparkContext extends Logging {
         }
 
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case mesosUrl @ MESOS_REGEX(_) =>
         MesosNativeLibrary.load()
@@ -1573,13 +1791,13 @@ object SparkContext extends Logging {
           new MesosSchedulerBackend(scheduler, sc, url)
         }
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case SIMR_REGEX(simrUrl) =>
         val scheduler = new TaskSchedulerImpl(sc)
         val backend = new SimrSchedulerBackend(scheduler, sc, simrUrl)
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case _ =>
         throw new SparkException("Could not parse Master URL: '" + master + "'")
@@ -1598,4 +1816,3 @@ private[spark] class WritableConverter[T](
     val writableClass: ClassTag[T] => Class[_ <: Writable],
     val convert: Writable => T)
   extends Serializable
-
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 2b636b085d73a..e464b32e61dd6 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -18,10 +18,10 @@
 package org.apache.spark
 
 import java.io.File
+import java.net.Socket
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable
-import scala.concurrent.Await
 import scala.util.Properties
 
 import akka.actor._
@@ -31,10 +31,12 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.python.PythonWorkerFactory
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.metrics.MetricsSystem
-import org.apache.spark.network.ConnectionManager
+import org.apache.spark.network.BlockTransferService
+import org.apache.spark.network.netty.NettyBlockTransferService
+import org.apache.spark.network.nio.NioBlockTransferService
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.shuffle.ShuffleManager
+import org.apache.spark.shuffle.{ShuffleMemoryManager, ShuffleManager}
 import org.apache.spark.storage._
 import org.apache.spark.util.{AkkaUtils, Utils}
 
@@ -42,9 +44,8 @@ import org.apache.spark.util.{AkkaUtils, Utils}
  * :: DeveloperApi ::
  * Holds all the runtime environment objects for a running Spark instance (either master or worker),
  * including the serializer, Akka actor system, block manager, map output tracker, etc. Currently
- * Spark code finds the SparkEnv through a thread-local variable, so each thread that accesses these
- * objects needs to have the right SparkEnv set. You can get the current environment with
- * SparkEnv.get (e.g. after creating a SparkContext) and set it with SparkEnv.set.
+ * Spark code finds the SparkEnv through a global variable, so all the threads can access the same
+ * SparkEnv. It can be accessed by SparkEnv.get (e.g. after creating a SparkContext).
  *
  * NOTE: This is not intended for external use. This is exposed for Shark and may be made private
  *       in a future release.
@@ -59,18 +60,16 @@ class SparkEnv (
     val mapOutputTracker: MapOutputTracker,
     val shuffleManager: ShuffleManager,
     val broadcastManager: BroadcastManager,
+    val blockTransferService: BlockTransferService,
     val blockManager: BlockManager,
-    val connectionManager: ConnectionManager,
     val securityManager: SecurityManager,
     val httpFileServer: HttpFileServer,
     val sparkFilesDir: String,
     val metricsSystem: MetricsSystem,
+    val shuffleMemoryManager: ShuffleMemoryManager,
     val conf: SparkConf) extends Logging {
 
-  // A mapping of thread ID to amount of memory used for shuffle in bytes
-  // All accesses should be manually synchronized
-  val shuffleMemoryMap = mutable.HashMap[Long, Long]()
-
+  private[spark] var isStopped = false
   private val pythonWorkers = mutable.HashMap[(String, Map[String, String]), PythonWorkerFactory]()
 
   // A general, soft-reference map for metadata needed during HadoopRDD split computation
@@ -78,8 +77,9 @@ class SparkEnv (
   private[spark] val hadoopJobMetadata = new MapMaker().softValues().makeMap[String, Any]()
 
   private[spark] def stop() {
+    isStopped = true
     pythonWorkers.foreach { case(key, worker) => worker.stop() }
-    httpFileServer.stop()
+    Option(httpFileServer).foreach(_.stop())
     mapOutputTracker.stop()
     shuffleManager.stop()
     broadcastManager.stop()
@@ -91,6 +91,8 @@ class SparkEnv (
     // down, but let's call it anyway in case it gets fixed in a later release
     // UPDATE: In Akka 2.1.x, this hangs if there are remote actors, so we can't call it.
     // actorSystem.awaitTermination()
+
+    // Note that blockTransferService is stopped by BlockManager since it is started by it.
   }
 
   private[spark]
@@ -102,46 +104,90 @@ class SparkEnv (
   }
 
   private[spark]
-  def destroyPythonWorker(pythonExec: String, envVars: Map[String, String]) {
+  def destroyPythonWorker(pythonExec: String, envVars: Map[String, String], worker: Socket) {
+    synchronized {
+      val key = (pythonExec, envVars)
+      pythonWorkers.get(key).foreach(_.stopWorker(worker))
+    }
+  }
+
+  private[spark]
+  def releasePythonWorker(pythonExec: String, envVars: Map[String, String], worker: Socket) {
     synchronized {
       val key = (pythonExec, envVars)
-      pythonWorkers(key).stop()
+      pythonWorkers.get(key).foreach(_.releaseWorker(worker))
     }
   }
 }
 
 object SparkEnv extends Logging {
-  private val env = new ThreadLocal[SparkEnv]
-  @volatile private var lastSetSparkEnv : SparkEnv = _
+  @volatile private var env: SparkEnv = _
+
+  private[spark] val driverActorSystemName = "sparkDriver"
+  private[spark] val executorActorSystemName = "sparkExecutor"
 
   def set(e: SparkEnv) {
-    lastSetSparkEnv = e
-    env.set(e)
+    env = e
   }
 
   /**
-   * Returns the ThreadLocal SparkEnv, if non-null. Else returns the SparkEnv
-   * previously set in any thread.
+   * Returns the SparkEnv.
    */
   def get: SparkEnv = {
-    Option(env.get()).getOrElse(lastSetSparkEnv)
+    env
   }
 
   /**
    * Returns the ThreadLocal SparkEnv.
    */
+  @deprecated("Use SparkEnv.get instead", "1.2")
   def getThreadLocal: SparkEnv = {
-    env.get()
+    env
   }
 
-  private[spark] def create(
+  /**
+   * Create a SparkEnv for the driver.
+   */
+  private[spark] def createDriverEnv(
+      conf: SparkConf,
+      isLocal: Boolean,
+      listenerBus: LiveListenerBus): SparkEnv = {
+    assert(conf.contains("spark.driver.host"), "spark.driver.host is not set on the driver!")
+    assert(conf.contains("spark.driver.port"), "spark.driver.port is not set on the driver!")
+    val hostname = conf.get("spark.driver.host")
+    val port = conf.get("spark.driver.port").toInt
+    create(conf, SparkContext.DRIVER_IDENTIFIER, hostname, port, true, isLocal, listenerBus)
+  }
+
+  /**
+   * Create a SparkEnv for an executor.
+   * In coarse-grained mode, the executor provides an actor system that is already instantiated.
+   */
+  private[spark] def createExecutorEnv(
+      conf: SparkConf,
+      executorId: String,
+      hostname: String,
+      port: Int,
+      numCores: Int,
+      isLocal: Boolean,
+      actorSystem: ActorSystem = null): SparkEnv = {
+    create(conf, executorId, hostname, port, false, isLocal, defaultActorSystem = actorSystem,
+      numUsableCores = numCores)
+  }
+
+  /**
+   * Helper method to create a SparkEnv for a driver or an executor.
+   */
+  private def create(
       conf: SparkConf,
       executorId: String,
       hostname: String,
       port: Int,
       isDriver: Boolean,
       isLocal: Boolean,
-      listenerBus: LiveListenerBus = null): SparkEnv = {
+      listenerBus: LiveListenerBus = null,
+      defaultActorSystem: ActorSystem = null,
+      numUsableCores: Int = 0): SparkEnv = {
 
     // Listener bus is only used on the driver
     if (isDriver) {
@@ -150,20 +196,25 @@ object SparkEnv extends Logging {
 
     val securityManager = new SecurityManager(conf)
 
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, port, conf = conf,
-      securityManager = securityManager)
+    // If an existing actor system is already provided, use it.
+    // This is the case when an executor is launched in coarse-grained mode.
+    val (actorSystem, boundPort) =
+      Option(defaultActorSystem) match {
+        case Some(as) => (as, port)
+        case None =>
+          val actorSystemName = if (isDriver) driverActorSystemName else executorActorSystemName
+          AkkaUtils.createActorSystem(actorSystemName, hostname, port, conf, securityManager)
+      }
 
-    // Bit of a hack: If this is the driver and our port was 0 (meaning bind to any free port),
-    // figure out which port number Akka actually bound to and set spark.driver.port to it.
-    if (isDriver && port == 0) {
-      conf.set("spark.driver.port",  boundPort.toString)
+    // Figure out which port Akka actually bound to in case the original port is 0 or occupied.
+    // This is so that we tell the executors the correct port to connect to.
+    if (isDriver) {
+      conf.set("spark.driver.port", boundPort.toString)
     }
 
-    // Create an instance of the class named by the given Java system property, or by
-    // defaultClassName if the property is not set, and return it as a T
-    def instantiateClass[T](propertyName: String, defaultClassName: String): T = {
-      val name = conf.get(propertyName,  defaultClassName)
-      val cls = Class.forName(name, true, Utils.getContextOrSparkClassLoader)
+    // Create an instance of the class with the given name, possibly initializing it with our conf
+    def instantiateClass[T](className: String): T = {
+      val cls = Class.forName(className, true, Utils.getContextOrSparkClassLoader)
       // Look for a constructor taking a SparkConf and a boolean isDriver, then one taking just
       // SparkConf, then one taking no arguments
       try {
@@ -181,11 +232,17 @@ object SparkEnv extends Logging {
       }
     }
 
-    val serializer = instantiateClass[Serializer](
+    // Create an instance of the class named by the given SparkConf property, or defaultClassName
+    // if the property is not set, possibly initializing it with our conf
+    def instantiateClassFromConf[T](propertyName: String, defaultClassName: String): T = {
+      instantiateClass[T](conf.get(propertyName, defaultClassName))
+    }
+
+    val serializer = instantiateClassFromConf[Serializer](
       "spark.serializer", "org.apache.spark.serializer.JavaSerializer")
     logDebug(s"Using serializer: ${serializer.getClass}")
 
-    val closureSerializer = instantiateClass[Serializer](
+    val closureSerializer = instantiateClassFromConf[Serializer](
       "spark.closure.serializer", "org.apache.spark.serializer.JavaSerializer")
 
     def registerOrLookup(name: String, newActor: => Actor): ActorRef = {
@@ -193,13 +250,7 @@ object SparkEnv extends Logging {
         logInfo("Registering " + name)
         actorSystem.actorOf(Props(newActor), name = name)
       } else {
-        val driverHost: String = conf.get("spark.driver.host", "localhost")
-        val driverPort: Int = conf.getInt("spark.driver.port", 7077)
-        Utils.checkHost(driverHost, "Expected hostname")
-        val url = s"akka.tcp://spark@$driverHost:$driverPort/user/$name"
-        val timeout = AkkaUtils.lookupTimeout(conf)
-        logInfo(s"Connecting to $name: $url")
-        Await.result(actorSystem.actorSelection(url).resolveOne(timeout), timeout)
+        AkkaUtils.makeDriverRef(name, conf, actorSystem)
       }
     }
 
@@ -215,29 +266,58 @@ object SparkEnv extends Logging {
       "MapOutputTracker",
       new MapOutputTrackerMasterActor(mapOutputTracker.asInstanceOf[MapOutputTrackerMaster], conf))
 
+    // Let the user specify short names for shuffle managers
+    val shortShuffleMgrNames = Map(
+      "hash" -> "org.apache.spark.shuffle.hash.HashShuffleManager",
+      "sort" -> "org.apache.spark.shuffle.sort.SortShuffleManager")
+    val shuffleMgrName = conf.get("spark.shuffle.manager", "sort")
+    val shuffleMgrClass = shortShuffleMgrNames.getOrElse(shuffleMgrName.toLowerCase, shuffleMgrName)
+    val shuffleManager = instantiateClass[ShuffleManager](shuffleMgrClass)
+
+    val shuffleMemoryManager = new ShuffleMemoryManager(conf)
+
+    val blockTransferService =
+      conf.get("spark.shuffle.blockTransferService", "netty").toLowerCase match {
+        case "netty" =>
+          new NettyBlockTransferService(conf, securityManager, numUsableCores)
+        case "nio" =>
+          new NioBlockTransferService(conf, securityManager)
+      }
+
     val blockManagerMaster = new BlockManagerMaster(registerOrLookup(
       "BlockManagerMaster",
-      new BlockManagerMasterActor(isLocal, conf, listenerBus)), conf)
+      new BlockManagerMasterActor(isLocal, conf, listenerBus)), conf, isDriver)
 
+    // NB: blockManager is not valid until initialize() is called later.
     val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster,
-      serializer, conf, securityManager, mapOutputTracker)
-
-    val connectionManager = blockManager.connectionManager
+      serializer, conf, mapOutputTracker, shuffleManager, blockTransferService, securityManager,
+      numUsableCores)
 
     val broadcastManager = new BroadcastManager(isDriver, conf, securityManager)
 
     val cacheManager = new CacheManager(blockManager)
 
-    val httpFileServer = new HttpFileServer(securityManager)
-    httpFileServer.initialize()
-    conf.set("spark.fileserver.uri",  httpFileServer.serverUri)
+    val httpFileServer =
+      if (isDriver) {
+        val fileServerPort = conf.getInt("spark.fileserver.port", 0)
+        val server = new HttpFileServer(securityManager, fileServerPort)
+        server.initialize()
+        conf.set("spark.fileserver.uri",  server.serverUri)
+        server
+      } else {
+        null
+      }
 
     val metricsSystem = if (isDriver) {
+      // Don't start metrics system right now for Driver.
+      // We need to wait for the task scheduler to give us an app ID.
+      // Then we can start the metrics system.
       MetricsSystem.createMetricsSystem("driver", conf, securityManager)
     } else {
-      MetricsSystem.createMetricsSystem("executor", conf, securityManager)
+      val ms = MetricsSystem.createMetricsSystem("executor", conf, securityManager)
+      ms.start()
+      ms
     }
-    metricsSystem.start()
 
     // Set the sparkFiles directory, used when downloading dependencies.  In local mode,
     // this is a temporary directory; in distributed mode, this is the executor's current working
@@ -248,9 +328,6 @@ object SparkEnv extends Logging {
       "."
     }
 
-    val shuffleManager = instantiateClass[ShuffleManager](
-      "spark.shuffle.manager", "org.apache.spark.shuffle.hash.HashShuffleManager")
-
     // Warn about deprecated spark.cache.class property
     if (conf.contains("spark.cache.class")) {
       logWarning("The spark.cache.class property is no longer being used! Specify storage " +
@@ -266,12 +343,13 @@ object SparkEnv extends Logging {
       mapOutputTracker,
       shuffleManager,
       broadcastManager,
+      blockTransferService,
       blockManager,
-      connectionManager,
       securityManager,
       httpFileServer,
       sparkFilesDir,
       metricsSystem,
+      shuffleMemoryManager,
       conf)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
index f6703986bdf11..40237596570de 100644
--- a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
+++ b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
@@ -26,6 +26,7 @@ import org.apache.hadoop.mapred._
 import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.mapred.SparkHadoopMapRedUtil
 import org.apache.spark.rdd.HadoopRDD
 
 /**
@@ -116,7 +117,7 @@ class SparkHadoopWriter(@transient jobConf: JobConf)
         }
       }
     } else {
-      logWarning ("No need to commit output of task: " + taID.value)
+      logInfo ("No need to commit output of task: " + taID.value)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/SparkSaslClient.scala b/core/src/main/scala/org/apache/spark/SparkSaslClient.scala
deleted file mode 100644
index 65003b6ac6a0a..0000000000000
--- a/core/src/main/scala/org/apache/spark/SparkSaslClient.scala
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark
-
-import java.io.IOException
-import javax.security.auth.callback.Callback
-import javax.security.auth.callback.CallbackHandler
-import javax.security.auth.callback.NameCallback
-import javax.security.auth.callback.PasswordCallback
-import javax.security.auth.callback.UnsupportedCallbackException
-import javax.security.sasl.RealmCallback
-import javax.security.sasl.RealmChoiceCallback
-import javax.security.sasl.Sasl
-import javax.security.sasl.SaslClient
-import javax.security.sasl.SaslException
-
-import scala.collection.JavaConversions.mapAsJavaMap
-
-/**
- * Implements SASL Client logic for Spark
- */
-private[spark] class SparkSaslClient(securityMgr: SecurityManager)  extends Logging {
-
-  /**
-   * Used to respond to server's counterpart, SaslServer with SASL tokens
-   * represented as byte arrays.
-   *
-   * The authentication mechanism used here is DIGEST-MD5. This could be changed to be
-   * configurable in the future.
-   */
-  private var saslClient: SaslClient = Sasl.createSaslClient(Array[String](SparkSaslServer.DIGEST),
-    null, null, SparkSaslServer.SASL_DEFAULT_REALM, SparkSaslServer.SASL_PROPS,
-    new SparkSaslClientCallbackHandler(securityMgr))
-
-  /**
-   * Used to initiate SASL handshake with server.
-   * @return response to challenge if needed
-   */
-  def firstToken(): Array[Byte] = {
-    synchronized {
-      val saslToken: Array[Byte] =
-        if (saslClient != null && saslClient.hasInitialResponse()) {
-          logDebug("has initial response")
-          saslClient.evaluateChallenge(new Array[Byte](0))
-        } else {
-          new Array[Byte](0)
-        }
-      saslToken
-    }
-  }
-
-  /**
-   * Determines whether the authentication exchange has completed.
-   * @return true is complete, otherwise false
-   */
-  def isComplete(): Boolean = {
-    synchronized {
-      if (saslClient != null) saslClient.isComplete() else false
-    }
-  }
-
-  /**
-   * Respond to server's SASL token.
-   * @param saslTokenMessage contains server's SASL token
-   * @return client's response SASL token
-   */
-  def saslResponse(saslTokenMessage: Array[Byte]): Array[Byte] = {
-    synchronized {
-      if (saslClient != null) saslClient.evaluateChallenge(saslTokenMessage) else new Array[Byte](0)
-    }
-  }
-
-  /**
-   * Disposes of any system resources or security-sensitive information the
-   * SaslClient might be using.
-   */
-  def dispose() {
-    synchronized {
-      if (saslClient != null) {
-        try {
-          saslClient.dispose()
-        } catch {
-          case e: SaslException => // ignored
-        } finally {
-          saslClient = null
-        }
-      }
-    }
-  }
-
-  /**
-   * Implementation of javax.security.auth.callback.CallbackHandler
-   * that works with share secrets.
-   */
-  private class SparkSaslClientCallbackHandler(securityMgr: SecurityManager) extends
-    CallbackHandler {
-
-    private val userName: String =
-      SparkSaslServer.encodeIdentifier(securityMgr.getSaslUser().getBytes("utf-8"))
-    private val secretKey = securityMgr.getSecretKey()
-    private val userPassword: Array[Char] = SparkSaslServer.encodePassword(
-        if (secretKey != null) secretKey.getBytes("utf-8") else "".getBytes("utf-8"))
-
-    /**
-     * Implementation used to respond to SASL request from the server.
-     *
-     * @param callbacks objects that indicate what credential information the
-     *                  server's SaslServer requires from the client.
-     */
-    override def handle(callbacks: Array[Callback]) {
-      logDebug("in the sasl client callback handler")
-      callbacks foreach {
-        case  nc: NameCallback => {
-          logDebug("handle: SASL client callback: setting username: " + userName)
-          nc.setName(userName)
-        }
-        case pc: PasswordCallback => {
-          logDebug("handle: SASL client callback: setting userPassword")
-          pc.setPassword(userPassword)
-        }
-        case rc: RealmCallback => {
-          logDebug("handle: SASL client callback: setting realm: " + rc.getDefaultText())
-          rc.setText(rc.getDefaultText())
-        }
-        case cb: RealmChoiceCallback => {}
-        case cb: Callback => throw
-          new UnsupportedCallbackException(cb, "handle: Unrecognized SASL client callback")
-      }
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/SparkSaslServer.scala b/core/src/main/scala/org/apache/spark/SparkSaslServer.scala
deleted file mode 100644
index f6b0a9132aca4..0000000000000
--- a/core/src/main/scala/org/apache/spark/SparkSaslServer.scala
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark
-
-import javax.security.auth.callback.Callback
-import javax.security.auth.callback.CallbackHandler
-import javax.security.auth.callback.NameCallback
-import javax.security.auth.callback.PasswordCallback
-import javax.security.auth.callback.UnsupportedCallbackException
-import javax.security.sasl.AuthorizeCallback
-import javax.security.sasl.RealmCallback
-import javax.security.sasl.Sasl
-import javax.security.sasl.SaslException
-import javax.security.sasl.SaslServer
-import scala.collection.JavaConversions.mapAsJavaMap
-import org.apache.commons.net.util.Base64
-
-/**
- * Encapsulates SASL server logic
- */
-private[spark] class SparkSaslServer(securityMgr: SecurityManager) extends Logging {
-
-  /**
-   * Actual SASL work done by this object from javax.security.sasl.
-   */
-  private var saslServer: SaslServer = Sasl.createSaslServer(SparkSaslServer.DIGEST, null,
-    SparkSaslServer.SASL_DEFAULT_REALM, SparkSaslServer.SASL_PROPS,
-    new SparkSaslDigestCallbackHandler(securityMgr))
-
-  /**
-   * Determines whether the authentication exchange has completed.
-   * @return true is complete, otherwise false
-   */
-  def isComplete(): Boolean = {
-    synchronized {
-      if (saslServer != null) saslServer.isComplete() else false
-    }
-  }
-
-  /**
-   * Used to respond to server SASL tokens.
-   * @param token Server's SASL token
-   * @return response to send back to the server.
-   */
-  def response(token: Array[Byte]): Array[Byte] = {
-    synchronized {
-      if (saslServer != null) saslServer.evaluateResponse(token) else new Array[Byte](0)
-    }
-  }
-
-  /**
-   * Disposes of any system resources or security-sensitive information the
-   * SaslServer might be using.
-   */
-  def dispose() {
-    synchronized {
-      if (saslServer != null) {
-        try {
-          saslServer.dispose()
-        } catch {
-          case e: SaslException => // ignore
-        } finally {
-          saslServer = null
-        }
-      }
-    }
-  }
-
-  /**
-   * Implementation of javax.security.auth.callback.CallbackHandler
-   * for SASL DIGEST-MD5 mechanism
-   */
-  private class SparkSaslDigestCallbackHandler(securityMgr: SecurityManager)
-    extends CallbackHandler {
-
-    private val userName: String =
-      SparkSaslServer.encodeIdentifier(securityMgr.getSaslUser().getBytes("utf-8"))
-
-    override def handle(callbacks: Array[Callback]) {
-      logDebug("In the sasl server callback handler")
-      callbacks foreach {
-        case nc: NameCallback => {
-          logDebug("handle: SASL server callback: setting username")
-          nc.setName(userName)
-        }
-        case pc: PasswordCallback => {
-          logDebug("handle: SASL server callback: setting userPassword")
-          val password: Array[Char] =
-            SparkSaslServer.encodePassword(securityMgr.getSecretKey().getBytes("utf-8"))
-          pc.setPassword(password)
-        }
-        case rc: RealmCallback => {
-          logDebug("handle: SASL server callback: setting realm: " + rc.getDefaultText())
-          rc.setText(rc.getDefaultText())
-        }
-        case ac: AuthorizeCallback => {
-          val authid = ac.getAuthenticationID()
-          val authzid = ac.getAuthorizationID()
-          if (authid.equals(authzid)) {
-            logDebug("set auth to true")
-            ac.setAuthorized(true)
-          } else {
-            logDebug("set auth to false")
-            ac.setAuthorized(false)
-          }
-          if (ac.isAuthorized()) {
-            logDebug("sasl server is authorized")
-            ac.setAuthorizedID(authzid)
-          }
-        }
-        case cb: Callback => throw
-          new UnsupportedCallbackException(cb, "handle: Unrecognized SASL DIGEST-MD5 Callback")
-      }
-    }
-  }
-}
-
-private[spark] object SparkSaslServer {
-
-  /**
-   * This is passed as the server name when creating the sasl client/server.
-   * This could be changed to be configurable in the future.
-   */
-  val  SASL_DEFAULT_REALM = "default"
-
-  /**
-   * The authentication mechanism used here is DIGEST-MD5. This could be changed to be
-   * configurable in the future.
-   */
-  val DIGEST = "DIGEST-MD5"
-
-  /**
-   * The quality of protection is just "auth". This means that we are doing
-   * authentication only, we are not supporting integrity or privacy protection of the
-   * communication channel after authentication. This could be changed to be configurable
-   * in the future.
-   */
-  val SASL_PROPS = Map(Sasl.QOP -> "auth", Sasl.SERVER_AUTH ->"true")
-
-  /**
-   * Encode a byte[] identifier as a Base64-encoded string.
-   *
-   * @param identifier identifier to encode
-   * @return Base64-encoded string
-   */
-  def encodeIdentifier(identifier: Array[Byte]): String = {
-    new String(Base64.encodeBase64(identifier), "utf-8")
-  }
-
-  /**
-   * Encode a password as a base64-encoded char[] array.
-   * @param password as a byte array.
-   * @return password as a char array.
-   */
-  def encodePassword(password: Array[Byte]): Array[Char] = {
-    new String(Base64.encodeBase64(password), "utf-8").toCharArray()
-  }
-}
-
diff --git a/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
new file mode 100644
index 0000000000000..c18d763d7ff4d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+/**
+ * Low-level status reporting APIs for monitoring job and stage progress.
+ *
+ * These APIs intentionally provide very weak consistency semantics; consumers of these APIs should
+ * be prepared to handle empty / missing information.  For example, a job's stage ids may be known
+ * but the status API may not have any information about the details of those stages, so
+ * `getStageInfo` could potentially return `None` for a valid stage id.
+ *
+ * To limit memory usage, these APIs only provide information on recent jobs / stages.  These APIs
+ * will provide information for the last `spark.ui.retainedStages` stages and
+ * `spark.ui.retainedJobs` jobs.
+ *
+ * NOTE: this class's constructor should be considered private and may be subject to change.
+ */
+class SparkStatusTracker private[spark] (sc: SparkContext) {
+
+  private val jobProgressListener = sc.jobProgressListener
+
+  /**
+   * Return a list of all known jobs in a particular job group.  If `jobGroup` is `null`, then
+   * returns all known jobs that are not associated with a job group.
+   *
+   * The returned list may contain running, failed, and completed jobs, and may vary across
+   * invocations of this method.  This method does not guarantee the order of the elements in
+   * its result.
+   */
+  def getJobIdsForGroup(jobGroup: String): Array[Int] = {
+    jobProgressListener.synchronized {
+      val jobData = jobProgressListener.jobIdToData.valuesIterator
+      jobData.filter(_.jobGroup.orNull == jobGroup).map(_.jobId).toArray
+    }
+  }
+
+  /**
+   * Returns an array containing the ids of all active stages.
+   *
+   * This method does not guarantee the order of the elements in its result.
+   */
+  def getActiveStageIds(): Array[Int] = {
+    jobProgressListener.synchronized {
+      jobProgressListener.activeStages.values.map(_.stageId).toArray
+    }
+  }
+
+  /**
+   * Returns an array containing the ids of all active jobs.
+   *
+   * This method does not guarantee the order of the elements in its result.
+   */
+  def getActiveJobIds(): Array[Int] = {
+    jobProgressListener.synchronized {
+      jobProgressListener.activeJobs.values.map(_.jobId).toArray
+    }
+  }
+
+  /**
+   * Returns job information, or `None` if the job info could not be found or was garbage collected.
+   */
+  def getJobInfo(jobId: Int): Option[SparkJobInfo] = {
+    jobProgressListener.synchronized {
+      jobProgressListener.jobIdToData.get(jobId).map { data =>
+        new SparkJobInfoImpl(jobId, data.stageIds.toArray, data.status)
+      }
+    }
+  }
+
+  /**
+   * Returns stage information, or `None` if the stage info could not be found or was
+   * garbage collected.
+   */
+  def getStageInfo(stageId: Int): Option[SparkStageInfo] = {
+    jobProgressListener.synchronized {
+      for (
+        info <- jobProgressListener.stageIdToInfo.get(stageId);
+        data <- jobProgressListener.stageIdToData.get((stageId, info.attemptId))
+      ) yield {
+        new SparkStageInfoImpl(
+          stageId,
+          info.attemptId,
+          info.name,
+          info.numTasks,
+          data.numActiveTasks,
+          data.numCompleteTasks,
+          data.numFailedTasks)
+      }
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/StatusAPIImpl.scala b/core/src/main/scala/org/apache/spark/StatusAPIImpl.scala
new file mode 100644
index 0000000000000..90b47c847fbca
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/StatusAPIImpl.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+private class SparkJobInfoImpl (
+  val jobId: Int,
+  val stageIds: Array[Int],
+  val status: JobExecutionStatus)
+ extends SparkJobInfo
+
+private class SparkStageInfoImpl(
+  val stageId: Int,
+  val currentAttemptId: Int,
+  val name: String,
+  val numTasks: Int,
+  val numActiveTasks: Int,
+  val numCompletedTasks: Int,
+  val numFailedTasks: Int)
+ extends SparkStageInfo
diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
deleted file mode 100644
index 51f40c339d13c..0000000000000
--- a/core/src/main/scala/org/apache/spark/TaskContext.scala
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark
-
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.executor.TaskMetrics
-
-/**
- * :: DeveloperApi ::
- * Contextual information about a task which can be read or mutated during execution.
- */
-@DeveloperApi
-class TaskContext(
-    val stageId: Int,
-    val partitionId: Int,
-    val attemptId: Long,
-    val runningLocally: Boolean = false,
-    private[spark] val taskMetrics: TaskMetrics = TaskMetrics.empty)
-  extends Serializable {
-
-  @deprecated("use partitionId", "0.8.1")
-  def splitId = partitionId
-
-  // List of callback functions to execute when the task completes.
-  @transient private val onCompleteCallbacks = new ArrayBuffer[() => Unit]
-
-  // Whether the corresponding task has been killed.
-  @volatile var interrupted: Boolean = false
-
-  // Whether the task has completed, before the onCompleteCallbacks are executed.
-  @volatile var completed: Boolean = false
-
-  /**
-   * Add a callback function to be executed on task completion. An example use
-   * is for HadoopRDD to register a callback to close the input stream.
-   * Will be called in any situation - success, failure, or cancellation.
-   * @param f Callback function.
-   */
-  def addOnCompleteCallback(f: () => Unit) {
-    onCompleteCallbacks += f
-  }
-
-  def executeOnCompleteCallbacks() {
-    completed = true
-    // Process complete callbacks in the reverse order of registration
-    onCompleteCallbacks.reverse.foreach { _() }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/TaskContextHelper.scala b/core/src/main/scala/org/apache/spark/TaskContextHelper.scala
new file mode 100644
index 0000000000000..4636c4600a01a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/TaskContextHelper.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+/**
+ * This class exists to restrict the visibility of TaskContext setters.
+ */
+private [spark] object TaskContextHelper {
+
+  def setTaskContext(tc: TaskContext): Unit = TaskContext.setTaskContext(tc)
+
+  def unset(): Unit = TaskContext.unset()
+  
+}
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
new file mode 100644
index 0000000000000..afd2b85d33a77
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.util.{TaskCompletionListener, TaskCompletionListenerException}
+
+import scala.collection.mutable.ArrayBuffer
+
+private[spark] class TaskContextImpl(val stageId: Int,
+    val partitionId: Int,
+    val attemptId: Long,
+    val runningLocally: Boolean = false,
+    val taskMetrics: TaskMetrics = TaskMetrics.empty)
+  extends TaskContext
+  with Logging {
+
+  // List of callback functions to execute when the task completes.
+  @transient private val onCompleteCallbacks = new ArrayBuffer[TaskCompletionListener]
+
+  // Whether the corresponding task has been killed.
+  @volatile private var interrupted: Boolean = false
+
+  // Whether the task has completed.
+  @volatile private var completed: Boolean = false
+
+  override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = {
+    onCompleteCallbacks += listener
+    this
+  }
+
+  override def addTaskCompletionListener(f: TaskContext => Unit): this.type = {
+    onCompleteCallbacks += new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = f(context)
+    }
+    this
+  }
+
+  @deprecated("use addTaskCompletionListener", "1.1.0")
+  override def addOnCompleteCallback(f: () => Unit) {
+    onCompleteCallbacks += new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = f()
+    }
+  }
+
+  /** Marks the task as completed and triggers the listeners. */
+  private[spark] def markTaskCompleted(): Unit = {
+    completed = true
+    val errorMsgs = new ArrayBuffer[String](2)
+    // Process complete callbacks in the reverse order of registration
+    onCompleteCallbacks.reverse.foreach { listener =>
+      try {
+        listener.onTaskCompletion(this)
+      } catch {
+        case e: Throwable =>
+          errorMsgs += e.getMessage
+          logError("Error in TaskCompletionListener", e)
+      }
+    }
+    if (errorMsgs.nonEmpty) {
+      throw new TaskCompletionListenerException(errorMsgs)
+    }
+  }
+
+  /** Marks the task for interruption, i.e. cancellation. */
+  private[spark] def markInterrupted(): Unit = {
+    interrupted = true
+  }
+
+  override def isCompleted: Boolean = completed
+
+  override def isRunningLocally: Boolean = runningLocally
+
+  override def isInterrupted: Boolean = interrupted
+}
+
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index df42d679b4699..af5fd8e0ac00c 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -20,6 +20,7 @@ package org.apache.spark
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.Utils
 
 /**
  * :: DeveloperApi ::
@@ -68,11 +69,13 @@ case class FetchFailed(
     bmAddress: BlockManagerId,  // Note that bmAddress can be null
     shuffleId: Int,
     mapId: Int,
-    reduceId: Int)
+    reduceId: Int,
+    message: String)
   extends TaskFailedReason {
   override def toErrorString: String = {
     val bmAddressString = if (bmAddress == null) "null" else bmAddress.toString
-    s"FetchFailed($bmAddressString, shuffleId=$shuffleId, mapId=$mapId, reduceId=$reduceId)"
+    s"FetchFailed($bmAddressString, shuffleId=$shuffleId, mapId=$mapId, reduceId=$reduceId, " +
+      s"message=\n$message\n)"
   }
 }
 
@@ -80,17 +83,47 @@ case class FetchFailed(
  * :: DeveloperApi ::
  * Task failed due to a runtime exception. This is the most common failure case and also captures
  * user program exceptions.
+ *
+ * `stackTrace` contains the stack trace of the exception itself. It still exists for backward
+ * compatibility. It's better to use `this(e: Throwable, metrics: Option[TaskMetrics])` to
+ * create `ExceptionFailure` as it will handle the backward compatibility properly.
+ *
+ * `fullStackTrace` is a better representation of the stack trace because it contains the whole
+ * stack trace including the exception and its causes
  */
 @DeveloperApi
 case class ExceptionFailure(
     className: String,
     description: String,
     stackTrace: Array[StackTraceElement],
+    fullStackTrace: String,
     metrics: Option[TaskMetrics])
   extends TaskFailedReason {
-  override def toErrorString: String = {
-    val stackTraceString = if (stackTrace == null) "null" else stackTrace.mkString("\n")
-    s"$className ($description}\n$stackTraceString"
+
+  private[spark] def this(e: Throwable, metrics: Option[TaskMetrics]) {
+    this(e.getClass.getName, e.getMessage, e.getStackTrace, Utils.exceptionString(e), metrics)
+  }
+
+  override def toErrorString: String =
+    if (fullStackTrace == null) {
+      // fullStackTrace is added in 1.2.0
+      // If fullStackTrace is null, use the old error string for backward compatibility
+      exceptionString(className, description, stackTrace)
+    } else {
+      fullStackTrace
+    }
+
+  /**
+   * Return a nice string representation of the exception, including the stack trace.
+   * Note: It does not include the exception's causes, and is only used for backward compatibility.
+   */
+  private def exceptionString(
+      className: String,
+      description: String,
+      stackTrace: Array[StackTraceElement]): String = {
+    val desc = if (description == null) "" else description
+    val st = if (stackTrace == null) "" else stackTrace.map("        " + _).mkString("\n")
+    s"$className: $desc\n$st"
   }
 }
 
@@ -119,8 +152,8 @@ case object TaskKilled extends TaskFailedReason {
  * the task crashed the JVM.
  */
 @DeveloperApi
-case object ExecutorLostFailure extends TaskFailedReason {
-  override def toErrorString: String = "ExecutorLostFailure (executor lost)"
+case class ExecutorLostFailure(execId: String) extends TaskFailedReason {
+  override def toErrorString: String = s"ExecutorLostFailure (executor ${execId} lost)"
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 885c6829a2d72..34078142f5385 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -23,8 +23,10 @@ import java.util.jar.{JarEntry, JarOutputStream}
 
 import scala.collection.JavaConversions._
 
+import com.google.common.io.{ByteStreams, Files}
 import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
-import com.google.common.io.Files
+
+import org.apache.spark.util.Utils
 
 /**
  * Utilities for tests. Included in main codebase since it's used by multiple
@@ -42,8 +44,7 @@ private[spark] object TestUtils {
    * in order to avoid interference between tests.
    */
   def createJarWithClasses(classNames: Seq[String], value: String = ""): URL = {
-    val tempDir = Files.createTempDir()
-    tempDir.deleteOnExit()
+    val tempDir = Utils.createTempDir()
     val files = for (name <- classNames) yield createCompiledClass(name, tempDir, value)
     val jarFile = new File(tempDir, "testJar-%s.jar".format(System.currentTimeMillis()))
     createJar(files, jarFile)
@@ -63,12 +64,7 @@ private[spark] object TestUtils {
       jarStream.putNextEntry(jarEntry)
 
       val in = new FileInputStream(file)
-      val buffer = new Array[Byte](10240)
-      var nRead = 0
-      while (nRead <= 0) {
-        nRead = in.read(buffer, 0, buffer.length)
-        jarStream.write(buffer, 0, nRead)
-      }
+      ByteStreams.copy(in, jarStream)
       in.close()
     }
     jarStream.close()
@@ -92,8 +88,8 @@ private[spark] object TestUtils {
   def createCompiledClass(className: String, destDir: File, value: String = ""): File = {
     val compiler = ToolProvider.getSystemJavaCompiler
     val sourceFile = new JavaSourceFromString(className,
-      "public class " + className + " { @Override public String toString() { " +
-       "return \"" + value + "\";}}")
+      "public class " + className + " implements java.io.Serializable {" +
+      "  @Override public String toString() { return \"" + value + "\"; }}")
 
     // Calling this outputs a class file in pwd. It's easier to just rename the file than
     // build a custom FileManager that controls the output location.
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
index a6123bd108c11..8e8f7f6c4fda2 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
@@ -114,7 +114,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[JDouble, Ja
    * Return an RDD with the elements from `this` that are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
+   * RDD will be &lt;= us.
    */
   def subtract(other: JavaDoubleRDD): JavaDoubleRDD =
     fromRDD(srdd.subtract(other))
@@ -233,11 +233,11 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[JDouble, Ja
    * to the left except for the last which is closed
    *  e.g. for the array
    *  [1,10,20,50] the buckets are [1,10) [10,20) [20,50]
-   *  e.g 1<=x<10 , 10<=x<20, 20<=x<50
+   *  e.g 1&lt;=x&lt;10 , 10&lt;=x&lt;20, 20&lt;=x&lt;50
    *  And on the input of 1 and 50 we would have a histogram of 1,0,0
    *
    * Note: if your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
-   * from an O(log n) inseration to O(1) per element. (where n = # buckets) if you set evenBuckets
+   * from an O(log n) insertion to O(1) per element. (where n = # buckets) if you set evenBuckets
    * to true.
    * buckets must be sorted and not contain any duplicates.
    * buckets array must be at least two elements
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaHadoopRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaHadoopRDD.scala
new file mode 100644
index 0000000000000..0ae0b4ec042e2
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaHadoopRDD.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.java
+
+import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
+
+import org.apache.hadoop.mapred.InputSplit
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.api.java.JavaSparkContext._
+import org.apache.spark.api.java.function.{Function2 => JFunction2}
+import org.apache.spark.rdd.HadoopRDD
+
+@DeveloperApi
+class JavaHadoopRDD[K, V](rdd: HadoopRDD[K, V])
+    (implicit override val kClassTag: ClassTag[K], implicit override val vClassTag: ClassTag[V])
+  extends JavaPairRDD[K, V](rdd) {
+
+  /** Maps over a partition, providing the InputSplit that was used as the base of the partition. */
+  @DeveloperApi
+  def mapPartitionsWithInputSplit[R](
+      f: JFunction2[InputSplit, java.util.Iterator[(K, V)], java.util.Iterator[R]],
+      preservesPartitioning: Boolean = false): JavaRDD[R] = {
+    new JavaRDD(rdd.mapPartitionsWithInputSplit((a, b) => f.call(a, asJavaIterator(b)),
+      preservesPartitioning)(fakeClassTag))(fakeClassTag)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaNewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaNewHadoopRDD.scala
new file mode 100644
index 0000000000000..ec4f3964d75e0
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaNewHadoopRDD.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.java
+
+import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
+
+import org.apache.hadoop.mapreduce.InputSplit
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.api.java.JavaSparkContext._
+import org.apache.spark.api.java.function.{Function2 => JFunction2}
+import org.apache.spark.rdd.NewHadoopRDD
+
+@DeveloperApi
+class JavaNewHadoopRDD[K, V](rdd: NewHadoopRDD[K, V])
+    (implicit override val kClassTag: ClassTag[K], implicit override val vClassTag: ClassTag[V])
+  extends JavaPairRDD[K, V](rdd) {
+
+  /** Maps over a partition, providing the InputSplit that was used as the base of the partition. */
+  @DeveloperApi
+  def mapPartitionsWithInputSplit[R](
+      f: JFunction2[InputSplit, java.util.Iterator[(K, V)], java.util.Iterator[R]],
+      preservesPartitioning: Boolean = false): JavaRDD[R] = {
+    new JavaRDD(rdd.mapPartitionsWithInputSplit((a, b) => f.call(a, asJavaIterator(b)),
+      preservesPartitioning)(fakeClassTag))(fakeClassTag)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 4f3081433a542..e37f3acaf6e30 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.api.java
 
-import java.util.{Comparator, List => JList}
+import java.util.{Comparator, List => JList, Map => JMap}
 import java.lang.{Iterable => JIterable}
 
 import scala.collection.JavaConversions._
@@ -35,6 +35,7 @@ import org.apache.spark.Partitioner._
 import org.apache.spark.SparkContext.rddToPairRDDFunctions
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
+import org.apache.spark.api.java.JavaUtils.mapAsSerializableJavaMap
 import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2, PairFunction}
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.{OrderedRDDFunctions, RDD}
@@ -122,13 +123,74 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    */
   def sample(withReplacement: Boolean, fraction: Double): JavaPairRDD[K, V] =
     sample(withReplacement, fraction, Utils.random.nextLong)
-    
+
   /**
    * Return a sampled subset of this RDD.
    */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): JavaPairRDD[K, V] =
     new JavaPairRDD[K, V](rdd.sample(withReplacement, fraction, seed))
 
+  /**
+   * Return a subset of this RDD sampled by key (via stratified sampling).
+   *
+   * Create a sample of this RDD using variable sampling rates for different keys as specified by
+   * `fractions`, a key to sampling rate map, via simple random sampling with one pass over the
+   * RDD, to produce a sample of size that's approximately equal to the sum of
+   * math.ceil(numItems * samplingRate) over all key values.
+   */
+  def sampleByKey(withReplacement: Boolean,
+      fractions: JMap[K, Double],
+      seed: Long): JavaPairRDD[K, V] =
+    new JavaPairRDD[K, V](rdd.sampleByKey(withReplacement, fractions, seed))
+
+  /**
+   * Return a subset of this RDD sampled by key (via stratified sampling).
+   *
+   * Create a sample of this RDD using variable sampling rates for different keys as specified by
+   * `fractions`, a key to sampling rate map, via simple random sampling with one pass over the
+   * RDD, to produce a sample of size that's approximately equal to the sum of
+   * math.ceil(numItems * samplingRate) over all key values.
+   *
+   * Use Utils.random.nextLong as the default seed for the random number generator.
+   */
+  def sampleByKey(withReplacement: Boolean,
+      fractions: JMap[K, Double]): JavaPairRDD[K, V] =
+    sampleByKey(withReplacement, fractions, Utils.random.nextLong)
+
+  /**
+   * ::Experimental::
+   * Return a subset of this RDD sampled by key (via stratified sampling) containing exactly
+   * math.ceil(numItems * samplingRate) for each stratum (group of pairs with the same key).
+   *
+   * This method differs from [[sampleByKey]] in that we make additional passes over the RDD to
+   * create a sample size that's exactly equal to the sum of math.ceil(numItems * samplingRate)
+   * over all key values with a 99.99% confidence. When sampling without replacement, we need one
+   * additional pass over the RDD to guarantee sample size; when sampling with replacement, we need
+   * two additional passes.
+   */
+  @Experimental
+  def sampleByKeyExact(withReplacement: Boolean,
+      fractions: JMap[K, Double],
+      seed: Long): JavaPairRDD[K, V] =
+    new JavaPairRDD[K, V](rdd.sampleByKeyExact(withReplacement, fractions, seed))
+
+  /**
+   * ::Experimental::
+   * Return a subset of this RDD sampled by key (via stratified sampling) containing exactly
+   * math.ceil(numItems * samplingRate) for each stratum (group of pairs with the same key).
+   *
+   * This method differs from [[sampleByKey]] in that we make additional passes over the RDD to
+   * create a sample size that's exactly equal to the sum of math.ceil(numItems * samplingRate)
+   * over all key values with a 99.99% confidence. When sampling without replacement, we need one
+   * additional pass over the RDD to guarantee sample size; when sampling with replacement, we need
+   * two additional passes.
+   *
+   * Use Utils.random.nextLong as the default seed for the random number generator.
+   */
+  @Experimental
+  def sampleByKeyExact(withReplacement: Boolean, fractions: JMap[K, Double]): JavaPairRDD[K, V] =
+    sampleByKeyExact(withReplacement, fractions, Utils.random.nextLong)
+
   /**
    * Return the union of this RDD and another one. Any identical elements will appear multiple
    * times (use `.distinct()` to eliminate them).
@@ -204,10 +266,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * before sending results to a reducer, similarly to a "combiner" in MapReduce.
    */
   def reduceByKeyLocally(func: JFunction2[V, V, V]): java.util.Map[K, V] =
-    mapAsJavaMap(rdd.reduceByKeyLocally(func))
+    mapAsSerializableJavaMap(rdd.reduceByKeyLocally(func))
 
   /** Count the number of elements for each key, and return the result to the master as a Map. */
-  def countByKey(): java.util.Map[K, Long] = mapAsJavaMap(rdd.countByKey())
+  def countByKey(): java.util.Map[K, Long] = mapAsSerializableJavaMap(rdd.countByKey())
 
   /**
    * :: Experimental ::
@@ -216,7 +278,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    */
   @Experimental
   def countByKeyApprox(timeout: Long): PartialResult[java.util.Map[K, BoundedDouble]] =
-    rdd.countByKeyApprox(timeout).map(mapAsJavaMap)
+    rdd.countByKeyApprox(timeout).map(mapAsSerializableJavaMap)
 
   /**
    * :: Experimental ::
@@ -226,7 +288,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
   @Experimental
   def countByKeyApprox(timeout: Long, confidence: Double = 0.95)
   : PartialResult[java.util.Map[K, BoundedDouble]] =
-    rdd.countByKeyApprox(timeout, confidence).map(mapAsJavaMap)
+    rdd.countByKeyApprox(timeout, confidence).map(mapAsSerializableJavaMap)
 
   /**
    * Aggregate the values of each key, using given combine functions and a neutral "zero value".
@@ -330,7 +392,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Return an RDD with the elements from `this` that are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
+   * RDD will be &lt;= us.
    */
   def subtract(other: JavaPairRDD[K, V]): JavaPairRDD[K, V] =
     fromRDD(rdd.subtract(other))
@@ -351,7 +413,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Return an RDD with the pairs from `this` whose keys are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
+   * RDD will be &lt;= us.
    */
   def subtractByKey[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, V] = {
     implicit val ctag: ClassTag[W] = fakeClassTag
@@ -408,6 +470,22 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
     fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
   }
 
+  /**
+   * Perform a full outer join of `this` and `other`. For each element (k, v) in `this`, the
+   * resulting RDD will either contain all pairs (k, (Some(v), Some(w))) for w in `other`, or
+   * the pair (k, (Some(v), None)) if no elements in `other` have key k. Similarly, for each
+   * element (k, w) in `other`, the resulting RDD will either contain all pairs
+   * (k, (Some(v), Some(w))) for v in `this`, or the pair (k, (None, Some(w))) if no elements
+   * in `this` have key k. Uses the given Partitioner to partition the output RDD.
+   */
+  def fullOuterJoin[W](other: JavaPairRDD[K, W], partitioner: Partitioner)
+  : JavaPairRDD[K, (Optional[V], Optional[W])] = {
+    val joinResult = rdd.fullOuterJoin(other, partitioner)
+    fromRDD(joinResult.mapValues{ case (v, w) =>
+      (JavaUtils.optionToOptional(v), JavaUtils.optionToOptional(w))
+    })
+  }
+
   /**
    * Simplified version of combineByKey that hash-partitions the resulting RDD using the existing
    * partitioner/parallelism level.
@@ -502,10 +580,43 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
     fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
   }
 
+  /**
+   * Perform a full outer join of `this` and `other`. For each element (k, v) in `this`, the
+   * resulting RDD will either contain all pairs (k, (Some(v), Some(w))) for w in `other`, or
+   * the pair (k, (Some(v), None)) if no elements in `other` have key k. Similarly, for each
+   * element (k, w) in `other`, the resulting RDD will either contain all pairs
+   * (k, (Some(v), Some(w))) for v in `this`, or the pair (k, (None, Some(w))) if no elements
+   * in `this` have key k. Hash-partitions the resulting RDD using the existing partitioner/
+   * parallelism level.
+   */
+  def fullOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (Optional[V], Optional[W])] = {
+    val joinResult = rdd.fullOuterJoin(other)
+    fromRDD(joinResult.mapValues{ case (v, w) =>
+      (JavaUtils.optionToOptional(v), JavaUtils.optionToOptional(w))
+    })
+  }
+
+  /**
+   * Perform a full outer join of `this` and `other`. For each element (k, v) in `this`, the
+   * resulting RDD will either contain all pairs (k, (Some(v), Some(w))) for w in `other`, or
+   * the pair (k, (Some(v), None)) if no elements in `other` have key k. Similarly, for each
+   * element (k, w) in `other`, the resulting RDD will either contain all pairs
+   * (k, (Some(v), Some(w))) for v in `this`, or the pair (k, (None, Some(w))) if no elements
+   * in `this` have key k. Hash-partitions the resulting RDD into the given number of partitions.
+   */
+  def fullOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int)
+  : JavaPairRDD[K, (Optional[V], Optional[W])] = {
+    val joinResult = rdd.fullOuterJoin(other, numPartitions)
+    fromRDD(joinResult.mapValues{ case (v, w) =>
+      (JavaUtils.optionToOptional(v), JavaUtils.optionToOptional(w))
+    })
+  }
+
   /**
    * Return the key-value pairs in this RDD to the master as a Map.
    */
-  def collectAsMap(): java.util.Map[K, V] = mapAsJavaMap(rdd.collectAsMap())
+  def collectAsMap(): java.util.Map[K, V] = mapAsSerializableJavaMap(rdd.collectAsMap())
+
 
   /**
    * Pass each value in the key-value pair RDD through a map function without changing the keys;
@@ -697,6 +808,32 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
     rdd.saveAsHadoopDataset(conf)
   }
 
+  /**
+   * Repartition the RDD according to the given partitioner and, within each resulting partition,
+   * sort records by their keys.
+   *
+   * This is more efficient than calling `repartition` and then sorting within each partition
+   * because it can push the sorting down into the shuffle machinery.
+   */
+  def repartitionAndSortWithinPartitions(partitioner: Partitioner): JavaPairRDD[K, V] = {
+    val comp = com.google.common.collect.Ordering.natural().asInstanceOf[Comparator[K]]
+    repartitionAndSortWithinPartitions(partitioner, comp)
+  }
+
+  /**
+   * Repartition the RDD according to the given partitioner and, within each resulting partition,
+   * sort records by their keys.
+   *
+   * This is more efficient than calling `repartition` and then sorting within each partition
+   * because it can push the sorting down into the shuffle machinery.
+   */
+  def repartitionAndSortWithinPartitions(partitioner: Partitioner, comp: Comparator[K])
+    : JavaPairRDD[K, V] = {
+    implicit val ordering = comp // Allow implicit conversion of Comparator to Ordering.
+    fromRDD(
+      new OrderedRDDFunctions[K, V, (K, V)](rdd).repartitionAndSortWithinPartitions(partitioner))
+  }
+
   /**
    * Sort the RDD by key, so that each partition contains a sorted range of the elements in
    * ascending order. Calling `collect` or `save` on the resulting RDD will return or output an
@@ -716,6 +853,17 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
     sortByKey(comp, ascending)
   }
 
+  /**
+   * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling
+   * `collect` or `save` on the resulting RDD will return or output an ordered list of records
+   * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in
+   * order of the keys).
+   */
+  def sortByKey(ascending: Boolean, numPartitions: Int): JavaPairRDD[K, V] = {
+    val comp = com.google.common.collect.Ordering.natural().asInstanceOf[Comparator[K]]
+    sortByKey(comp, ascending, numPartitions)
+  }
+
   /**
    * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling
    * `collect` or `save` on the resulting RDD will return or output an ordered list of records
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index f917cfd1419ec..5a8e5bb1f721a 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -21,15 +21,18 @@ import java.util.{Comparator, List => JList, Iterator => JIterator}
 import java.lang.{Iterable => JIterable, Long => JLong}
 
 import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
 import com.google.common.base.Optional
 import org.apache.hadoop.io.compress.CompressionCodec
 
-import org.apache.spark.{Partition, SparkContext, TaskContext}
+import org.apache.spark._
+import org.apache.spark.SparkContext._
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaPairRDD._
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
+import org.apache.spark.api.java.JavaUtils.mapAsSerializableJavaMap
 import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2, _}
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.RDD
@@ -293,8 +296,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * Applies a function f to all elements of this RDD.
    */
   def foreach(f: VoidFunction[T]) {
-    val cleanF = rdd.context.clean((x: T) => f.call(x))
-    rdd.foreach(cleanF)
+    rdd.foreach(x => f.call(x))
   }
 
   /**
@@ -390,7 +392,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * combine step happens locally on the master, equivalent to running a single reduce task.
    */
   def countByValue(): java.util.Map[T, java.lang.Long] =
-    mapAsJavaMap(rdd.countByValue().map((x => (x._1, new java.lang.Long(x._2)))))
+    mapAsSerializableJavaMap(rdd.countByValue().map((x => (x._1, new java.lang.Long(x._2)))))
 
   /**
    * (Experimental) Approximate version of countByValue().
@@ -399,13 +401,13 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
     timeout: Long,
     confidence: Double
     ): PartialResult[java.util.Map[T, BoundedDouble]] =
-    rdd.countByValueApprox(timeout, confidence).map(mapAsJavaMap)
+    rdd.countByValueApprox(timeout, confidence).map(mapAsSerializableJavaMap)
 
   /**
    * (Experimental) Approximate version of countByValue().
    */
   def countByValueApprox(timeout: Long): PartialResult[java.util.Map[T, BoundedDouble]] =
-    rdd.countByValueApprox(timeout).map(mapAsJavaMap)
+    rdd.countByValueApprox(timeout).map(mapAsSerializableJavaMap)
 
   /**
    * Take the first num elements of the RDD. This currently scans the partitions *one by one*, so
@@ -491,9 +493,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   }
 
   /**
-   * Returns the top K elements from this RDD as defined by
+   * Returns the top k (largest) elements from this RDD as defined by
    * the specified Comparator[T].
-   * @param num the number of top elements to return
+   * @param num k, the number of top elements to return
    * @param comp the comparator that defines the order
    * @return an array of top elements
    */
@@ -505,9 +507,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   }
 
   /**
-   * Returns the top K elements from this RDD using the
+   * Returns the top k (largest) elements from this RDD using the
    * natural ordering for T.
-   * @param num the number of top elements to return
+   * @param num k, the number of top elements to return
    * @return an array of top elements
    */
   def top(num: Int): JList[T] = {
@@ -516,9 +518,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   }
 
   /**
-   * Returns the first K elements from this RDD as defined by
+   * Returns the first k (smallest) elements from this RDD as defined by
    * the specified Comparator[T] and maintains the order.
-   * @param num the number of top elements to return
+   * @param num k, the number of elements to return
    * @param comp the comparator that defines the order
    * @return an array of top elements
    */
@@ -550,9 +552,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   }
 
   /**
-   * Returns the first K elements from this RDD using the
+   * Returns the first k (smallest) elements from this RDD using the
    * natural ordering for T while maintain the order.
-   * @param num the number of top elements to return
+   * @param num k, the number of top elements to return
    * @return an array of top elements
    */
   def takeOrdered(num: Int): JList[T] = {
@@ -574,4 +576,45 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
 
   def name(): String = rdd.name
 
+  /**
+   * The asynchronous version of `count`, which returns a
+   * future for counting the number of elements in this RDD.
+   */
+  def countAsync(): JavaFutureAction[JLong] = {
+    new JavaFutureActionWrapper[Long, JLong](rdd.countAsync(), JLong.valueOf)
+  }
+
+  /**
+   * The asynchronous version of `collect`, which returns a future for
+   * retrieving an array containing all of the elements in this RDD.
+   */
+  def collectAsync(): JavaFutureAction[JList[T]] = {
+    new JavaFutureActionWrapper(rdd.collectAsync(), (x: Seq[T]) => x.asJava)
+  }
+
+  /**
+   * The asynchronous version of the `take` action, which returns a
+   * future for retrieving the first `num` elements of this RDD.
+   */
+  def takeAsync(num: Int): JavaFutureAction[JList[T]] = {
+    new JavaFutureActionWrapper(rdd.takeAsync(num), (x: Seq[T]) => x.asJava)
+  }
+
+  /**
+   * The asynchronous version of the `foreach` action, which
+   * applies a function f to all the elements of this RDD.
+   */
+  def foreachAsync(f: VoidFunction[T]): JavaFutureAction[Void] = {
+    new JavaFutureActionWrapper[Unit, Void](rdd.foreachAsync(x => f.call(x)),
+      { x => null.asInstanceOf[Void] })
+  }
+
+  /**
+   * The asynchronous version of the `foreachPartition` action, which
+   * applies a function f to each partition of this RDD.
+   */
+  def foreachPartitionAsync(f: VoidFunction[java.util.Iterator[T]]): JavaFutureAction[Void] = {
+    new JavaFutureActionWrapper[Unit, Void](rdd.foreachPartitionAsync(x => f.call(x)),
+      { x => null.asInstanceOf[Void] })
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 1e0493c4855e0..d50ed32ca085c 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.api.java
 
+import java.io.Closeable
 import java.util
 import java.util.{Map => JMap}
 
@@ -27,20 +28,24 @@ import scala.reflect.ClassTag
 
 import com.google.common.base.Optional
 import org.apache.hadoop.conf.Configuration
+import org.apache.spark.input.PortableDataStream
 import org.apache.hadoop.mapred.{InputFormat, JobConf}
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 
 import org.apache.spark._
-import org.apache.spark.SparkContext.{DoubleAccumulatorParam, IntAccumulatorParam}
+import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD, RDD}
 
 /**
  * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns
  * [[org.apache.spark.api.java.JavaRDD]]s and works with Java collections instead of Scala ones.
  */
-class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWorkaround {
+class JavaSparkContext(val sc: SparkContext)
+  extends JavaSparkContextVarargsWorkaround with Closeable {
+
   /**
    * Create a JavaSparkContext that loads settings from system properties (for instance, when
    * launching with ./bin/spark-submit).
@@ -100,6 +105,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
 
   private[spark] val env = sc.env
 
+  def statusTracker = new JavaSparkStatusTracker(sc)
+
   def isLocal: java.lang.Boolean = sc.isLocal
 
   def sparkUser: String = sc.sparkUser
@@ -112,6 +119,9 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
 
   def startTime: java.lang.Long = sc.startTime
 
+  /** The version of Spark on which this application is running. */
+  def version: String = sc.version
+
   /** Default level of parallelism to use when not given by user (e.g. parallelize and makeRDD). */
   def defaultParallelism: java.lang.Integer = sc.defaultParallelism
 
@@ -132,6 +142,13 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     sc.parallelize(JavaConversions.asScalaBuffer(list), numSlices)
   }
 
+  /** Get an RDD that has no partitions or elements. */
+  def emptyRDD[T]: JavaRDD[T] = {
+    implicit val ctag: ClassTag[T] = fakeClassTag
+    JavaRDD.fromRDD(new EmptyRDD[T](sc))
+  }
+
+
   /** Distribute a local Scala collection to form an RDD. */
   def parallelize[T](list: java.util.List[T]): JavaRDD[T] =
     parallelize(list, sc.defaultParallelism)
@@ -170,6 +187,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   def textFile(path: String, minPartitions: Int): JavaRDD[String] =
     sc.textFile(path, minPartitions)
 
+
+
   /**
    * Read a directory of text files from HDFS, a local file system (available on all nodes), or any
    * Hadoop-supported file system URI. Each file is read as a single record and returned in a
@@ -183,7 +202,10 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
    *   hdfs://a-hdfs-path/part-nnnnn
    * }}}
    *
-   * Do `JavaPairRDD<String, String> rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path")`,
+   * Do
+   * {{{
+   *   JavaPairRDD<String, String> rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path")
+   * }}}
    *
    * <p> then `rdd` contains
    * {{{
@@ -210,6 +232,84 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   def wholeTextFiles(path: String): JavaPairRDD[String, String] =
     new JavaPairRDD(sc.wholeTextFiles(path))
 
+  /**
+   * Read a directory of binary files from HDFS, a local file system (available on all nodes),
+   * or any Hadoop-supported file system URI as a byte array. Each file is read as a single
+   * record and returned in a key-value pair, where the key is the path of each file,
+   * the value is the content of each file.
+   *
+   * For example, if you have the following files:
+   * {{{
+   *   hdfs://a-hdfs-path/part-00000
+   *   hdfs://a-hdfs-path/part-00001
+   *   ...
+   *   hdfs://a-hdfs-path/part-nnnnn
+   * }}}
+   *
+   * Do
+   * `JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+   *
+   * then `rdd` contains
+   * {{{
+   *   (a-hdfs-path/part-00000, its content)
+   *   (a-hdfs-path/part-00001, its content)
+   *   ...
+   *   (a-hdfs-path/part-nnnnn, its content)
+   * }}}
+   *
+   * @note Small files are preferred; very large files but may cause bad performance.
+   *
+   * @param minPartitions A suggestion value of the minimal splitting number for input data.
+   */
+  def binaryFiles(path: String, minPartitions: Int): JavaPairRDD[String, PortableDataStream] =
+    new JavaPairRDD(sc.binaryFiles(path, minPartitions))
+
+  /**
+   * :: Experimental ::
+   *
+   * Read a directory of binary files from HDFS, a local file system (available on all nodes),
+   * or any Hadoop-supported file system URI as a byte array. Each file is read as a single
+   * record and returned in a key-value pair, where the key is the path of each file,
+   * the value is the content of each file.
+   *
+   * For example, if you have the following files:
+   * {{{
+   *   hdfs://a-hdfs-path/part-00000
+   *   hdfs://a-hdfs-path/part-00001
+   *   ...
+   *   hdfs://a-hdfs-path/part-nnnnn
+   * }}}
+   *
+   * Do
+   * `JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+   *
+   * then `rdd` contains
+   * {{{
+   *   (a-hdfs-path/part-00000, its content)
+   *   (a-hdfs-path/part-00001, its content)
+   *   ...
+   *   (a-hdfs-path/part-nnnnn, its content)
+   * }}}
+   *
+   * @note Small files are preferred; very large files but may cause bad performance.
+   */
+  @Experimental
+  def binaryFiles(path: String): JavaPairRDD[String, PortableDataStream] =
+    new JavaPairRDD(sc.binaryFiles(path, defaultMinPartitions))
+
+  /**
+   * :: Experimental ::
+   *
+   * Load data from a flat binary file, assuming the length of each record is constant.
+   *
+   * @param path Directory to the input data files
+   * @return An RDD of data with values, represented as byte arrays
+   */
+  @Experimental
+  def binaryRecords(path: String, recordLength: Int): JavaRDD[Array[Byte]] = {
+    new JavaRDD(sc.binaryRecords(path, recordLength))
+  }
+
   /** Get an RDD for a Hadoop SequenceFile with given key and value types.
     *
     * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
@@ -284,7 +384,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     ): JavaPairRDD[K, V] = {
     implicit val ctagK: ClassTag[K] = ClassTag(keyClass)
     implicit val ctagV: ClassTag[V] = ClassTag(valueClass)
-    new JavaPairRDD(sc.hadoopRDD(conf, inputFormatClass, keyClass, valueClass, minPartitions))
+    val rdd = sc.hadoopRDD(conf, inputFormatClass, keyClass, valueClass, minPartitions)
+    new JavaHadoopRDD(rdd.asInstanceOf[HadoopRDD[K, V]])
   }
 
   /**
@@ -304,7 +405,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     ): JavaPairRDD[K, V] = {
     implicit val ctagK: ClassTag[K] = ClassTag(keyClass)
     implicit val ctagV: ClassTag[V] = ClassTag(valueClass)
-    new JavaPairRDD(sc.hadoopRDD(conf, inputFormatClass, keyClass, valueClass))
+    val rdd = sc.hadoopRDD(conf, inputFormatClass, keyClass, valueClass)
+    new JavaHadoopRDD(rdd.asInstanceOf[HadoopRDD[K, V]])
   }
 
   /** Get an RDD for a Hadoop file with an arbitrary InputFormat.
@@ -323,7 +425,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     ): JavaPairRDD[K, V] = {
     implicit val ctagK: ClassTag[K] = ClassTag(keyClass)
     implicit val ctagV: ClassTag[V] = ClassTag(valueClass)
-    new JavaPairRDD(sc.hadoopFile(path, inputFormatClass, keyClass, valueClass, minPartitions))
+    val rdd = sc.hadoopFile(path, inputFormatClass, keyClass, valueClass, minPartitions)
+    new JavaHadoopRDD(rdd.asInstanceOf[HadoopRDD[K, V]])
   }
 
   /** Get an RDD for a Hadoop file with an arbitrary InputFormat
@@ -341,8 +444,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     ): JavaPairRDD[K, V] = {
     implicit val ctagK: ClassTag[K] = ClassTag(keyClass)
     implicit val ctagV: ClassTag[V] = ClassTag(valueClass)
-    new JavaPairRDD(sc.hadoopFile(path,
-      inputFormatClass, keyClass, valueClass))
+    val rdd = sc.hadoopFile(path, inputFormatClass, keyClass, valueClass)
+    new JavaHadoopRDD(rdd.asInstanceOf[HadoopRDD[K, V]])
   }
 
   /**
@@ -362,7 +465,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     conf: Configuration): JavaPairRDD[K, V] = {
     implicit val ctagK: ClassTag[K] = ClassTag(kClass)
     implicit val ctagV: ClassTag[V] = ClassTag(vClass)
-    new JavaPairRDD(sc.newAPIHadoopFile(path, fClass, kClass, vClass, conf))
+    val rdd = sc.newAPIHadoopFile(path, fClass, kClass, vClass, conf)
+    new JavaNewHadoopRDD(rdd.asInstanceOf[NewHadoopRDD[K, V]])
   }
 
   /**
@@ -381,7 +485,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     vClass: Class[V]): JavaPairRDD[K, V] = {
     implicit val ctagK: ClassTag[K] = ClassTag(kClass)
     implicit val ctagV: ClassTag[V] = ClassTag(vClass)
-    new JavaPairRDD(sc.newAPIHadoopRDD(conf, fClass, kClass, vClass))
+    val rdd = sc.newAPIHadoopRDD(conf, fClass, kClass, vClass)
+    new JavaNewHadoopRDD(rdd.asInstanceOf[NewHadoopRDD[K, V]])
   }
 
   /** Build the union of two or more RDDs. */
@@ -414,6 +519,16 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   def intAccumulator(initialValue: Int): Accumulator[java.lang.Integer] =
     sc.accumulator(initialValue)(IntAccumulatorParam).asInstanceOf[Accumulator[java.lang.Integer]]
 
+  /**
+   * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values
+   * to using the `add` method. Only the master can access the accumulator's `value`.
+   *
+   * This version supports naming the accumulator for display in Spark's web UI.
+   */
+  def intAccumulator(initialValue: Int, name: String): Accumulator[java.lang.Integer] =
+    sc.accumulator(initialValue, name)(IntAccumulatorParam)
+      .asInstanceOf[Accumulator[java.lang.Integer]]
+
   /**
    * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
    * to using the `add` method. Only the master can access the accumulator's `value`.
@@ -421,12 +536,31 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   def doubleAccumulator(initialValue: Double): Accumulator[java.lang.Double] =
     sc.accumulator(initialValue)(DoubleAccumulatorParam).asInstanceOf[Accumulator[java.lang.Double]]
 
+  /**
+   * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
+   * to using the `add` method. Only the master can access the accumulator's `value`.
+   *
+   * This version supports naming the accumulator for display in Spark's web UI.
+   */
+  def doubleAccumulator(initialValue: Double, name: String): Accumulator[java.lang.Double] =
+    sc.accumulator(initialValue, name)(DoubleAccumulatorParam)
+      .asInstanceOf[Accumulator[java.lang.Double]]
+
   /**
    * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values
    * to using the `add` method. Only the master can access the accumulator's `value`.
    */
   def accumulator(initialValue: Int): Accumulator[java.lang.Integer] = intAccumulator(initialValue)
 
+  /**
+   * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values
+   * to using the `add` method. Only the master can access the accumulator's `value`.
+   *
+   * This version supports naming the accumulator for display in Spark's web UI.
+   */
+  def accumulator(initialValue: Int, name: String): Accumulator[java.lang.Integer] =
+    intAccumulator(initialValue, name)
+
   /**
    * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
    * to using the `add` method. Only the master can access the accumulator's `value`.
@@ -434,6 +568,16 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   def accumulator(initialValue: Double): Accumulator[java.lang.Double] =
     doubleAccumulator(initialValue)
 
+
+  /**
+   * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
+   * to using the `add` method. Only the master can access the accumulator's `value`.
+   *
+   * This version supports naming the accumulator for display in Spark's web UI.
+   */
+  def accumulator(initialValue: Double, name: String): Accumulator[java.lang.Double] =
+    doubleAccumulator(initialValue, name)
+
   /**
    * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add"
    * values to using the `add` method. Only the master can access the accumulator's `value`.
@@ -441,6 +585,16 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   def accumulator[T](initialValue: T, accumulatorParam: AccumulatorParam[T]): Accumulator[T] =
     sc.accumulator(initialValue)(accumulatorParam)
 
+  /**
+   * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add"
+   * values to using the `add` method. Only the master can access the accumulator's `value`.
+   *
+   * This version supports naming the accumulator for display in Spark's web UI.
+   */
+  def accumulator[T](initialValue: T, name: String, accumulatorParam: AccumulatorParam[T])
+      : Accumulator[T] =
+    sc.accumulator(initialValue, name)(accumulatorParam)
+
   /**
    * Create an [[org.apache.spark.Accumulable]] shared variable of the given type, to which tasks
    * can "add" values with `add`. Only the master can access the accumuable's `value`.
@@ -448,6 +602,16 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   def accumulable[T, R](initialValue: T, param: AccumulableParam[T, R]): Accumulable[T, R] =
     sc.accumulable(initialValue)(param)
 
+  /**
+   * Create an [[org.apache.spark.Accumulable]] shared variable of the given type, to which tasks
+   * can "add" values with `add`. Only the master can access the accumuable's `value`.
+   *
+   * This version supports naming the accumulator for display in Spark's web UI.
+   */
+  def accumulable[T, R](initialValue: T, name: String, param: AccumulableParam[T, R])
+      : Accumulable[T, R] =
+    sc.accumulable(initialValue, name)(param)
+
   /**
    * Broadcast a read-only variable to the cluster, returning a
    * [[org.apache.spark.broadcast.Broadcast]] object for reading it in distributed functions.
@@ -460,6 +624,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     sc.stop()
   }
 
+  override def close(): Unit = stop()
+
   /**
    * Get Spark's home location from either a value set through the constructor,
    * or the spark.home Java property, or the SPARK_HOME environment variable
@@ -471,7 +637,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
    * Add a file to be downloaded with this Spark job on every node.
    * The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported
    * filesystems), or an HTTP, HTTPS or FTP URI.  To access the file in Spark jobs,
-   * use `SparkFiles.get(path)` to find its download location.
+   * use `SparkFiles.get(fileName)` to find its download location.
    */
   def addFile(path: String) {
     sc.addFile(path)
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkStatusTracker.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkStatusTracker.scala
new file mode 100644
index 0000000000000..3300cad9efbab
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkStatusTracker.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.java
+
+import org.apache.spark.{SparkStageInfo, SparkJobInfo, SparkContext}
+
+/**
+ * Low-level status reporting APIs for monitoring job and stage progress.
+ *
+ * These APIs intentionally provide very weak consistency semantics; consumers of these APIs should
+ * be prepared to handle empty / missing information.  For example, a job's stage ids may be known
+ * but the status API may not have any information about the details of those stages, so
+ * `getStageInfo` could potentially return `null` for a valid stage id.
+ *
+ * To limit memory usage, these APIs only provide information on recent jobs / stages.  These APIs
+ * will provide information for the last `spark.ui.retainedStages` stages and
+ * `spark.ui.retainedJobs` jobs.
+ *
+ * NOTE: this class's constructor should be considered private and may be subject to change.
+ */
+class JavaSparkStatusTracker private[spark] (sc: SparkContext) {
+
+  /**
+   * Return a list of all known jobs in a particular job group.  If `jobGroup` is `null`, then
+   * returns all known jobs that are not associated with a job group.
+   *
+   * The returned list may contain running, failed, and completed jobs, and may vary across
+   * invocations of this method.  This method does not guarantee the order of the elements in
+   * its result.
+   */
+  def getJobIdsForGroup(jobGroup: String): Array[Int] = sc.statusTracker.getJobIdsForGroup(jobGroup)
+
+  /**
+   * Returns an array containing the ids of all active stages.
+   *
+   * This method does not guarantee the order of the elements in its result.
+   */
+  def getActiveStageIds(): Array[Int] = sc.statusTracker.getActiveStageIds()
+
+  /**
+   * Returns an array containing the ids of all active jobs.
+   *
+   * This method does not guarantee the order of the elements in its result.
+   */
+  def getActiveJobIds(): Array[Int] = sc.statusTracker.getActiveJobIds()
+
+  /**
+   * Returns job information, or `null` if the job info could not be found or was garbage collected.
+   */
+  def getJobInfo(jobId: Int): SparkJobInfo = sc.statusTracker.getJobInfo(jobId).orNull
+
+  /**
+   * Returns stage information, or `null` if the stage info could not be found or was
+   * garbage collected.
+   */
+  def getStageInfo(stageId: Int): SparkStageInfo = sc.statusTracker.getStageInfo(stageId).orNull
+}
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
index 22810cb1c662d..b52d0a5028e84 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
@@ -19,10 +19,20 @@ package org.apache.spark.api.java
 
 import com.google.common.base.Optional
 
+import scala.collection.convert.Wrappers.MapWrapper
+
 private[spark] object JavaUtils {
   def optionToOptional[T](option: Option[T]): Optional[T] =
     option match {
       case Some(value) => Optional.of(value)
       case None => Optional.absent()
     }
+
+  // Workaround for SPARK-3926 / SI-8911
+  def mapAsSerializableJavaMap[A, B](underlying: collection.Map[A, B]) =
+    new SerializableMapWrapper(underlying)
+
+  class SerializableMapWrapper[A, B](underlying: collection.Map[A, B])
+    extends MapWrapper(underlying) with java.io.Serializable
+
 }
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala b/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
index adaa1ef6cf9ff..5ba66178e2b78 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark.api.python
 
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
-import org.apache.spark.Logging
+import org.apache.spark.util.Utils
+import org.apache.spark.{Logging, SerializableWritable, SparkException}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io._
 import scala.util.{Failure, Success, Try}
@@ -31,16 +33,17 @@ import org.apache.spark.annotation.Experimental
  * transformation code by overriding the convert method.
  */
 @Experimental
-trait Converter[T, U] extends Serializable {
+trait Converter[T, + U] extends Serializable {
   def convert(obj: T): U
 }
 
 private[python] object Converter extends Logging {
 
-  def getInstance(converterClass: Option[String]): Converter[Any, Any] = {
+  def getInstance(converterClass: Option[String],
+                  defaultConverter: Converter[Any, Any]): Converter[Any, Any] = {
     converterClass.map { cc =>
       Try {
-        val c = Class.forName(cc).newInstance().asInstanceOf[Converter[Any, Any]]
+        val c = Utils.classForName(cc).newInstance().asInstanceOf[Converter[Any, Any]]
         logInfo(s"Loaded converter: $cc")
         c
       } match {
@@ -49,7 +52,7 @@ private[python] object Converter extends Logging {
           logError(s"Failed to load converter: $cc")
           throw err
       }
-    }.getOrElse { new DefaultConverter }
+    }.getOrElse { defaultConverter }
   }
 }
 
@@ -57,7 +60,8 @@ private[python] object Converter extends Logging {
  * A converter that handles conversion of common [[org.apache.hadoop.io.Writable]] objects.
  * Other objects are passed through without conversion.
  */
-private[python] class DefaultConverter extends Converter[Any, Any] {
+private[python] class WritableToJavaConverter(
+    conf: Broadcast[SerializableWritable[Configuration]]) extends Converter[Any, Any] {
 
   /**
    * Converts a [[org.apache.hadoop.io.Writable]] to the underlying primitive, String or
@@ -72,17 +76,29 @@ private[python] class DefaultConverter extends Converter[Any, Any] {
       case fw: FloatWritable => fw.get()
       case t: Text => t.toString
       case bw: BooleanWritable => bw.get()
-      case byw: BytesWritable => byw.getBytes
+      case byw: BytesWritable =>
+        val bytes = new Array[Byte](byw.getLength)
+        System.arraycopy(byw.getBytes(), 0, bytes, 0, byw.getLength)
+        bytes
       case n: NullWritable => null
-      case aw: ArrayWritable => aw.get().map(convertWritable(_))
-      case mw: MapWritable => mapAsJavaMap(mw.map { case (k, v) =>
-        (convertWritable(k), convertWritable(v))
-      }.toMap)
+      case aw: ArrayWritable =>
+        // Due to erasure, all arrays appear as Object[] and they get pickled to Python tuples.
+        // Since we can't determine element types for empty arrays, we will not attempt to
+        // convert to primitive arrays (which get pickled to Python arrays). Users may want
+        // write custom converters for arrays if they know the element types a priori.
+        aw.get().map(convertWritable(_))
+      case mw: MapWritable =>
+        val map = new java.util.HashMap[Any, Any]()
+        mw.foreach { case (k, v) =>
+          map.put(convertWritable(k), convertWritable(v))
+        }
+        map
+      case w: Writable => WritableUtils.clone(w, conf.value.value)
       case other => other
     }
   }
 
-  def convert(obj: Any): Any = {
+  override def convert(obj: Any): Any = {
     obj match {
       case writable: Writable =>
         convertWritable(writable)
@@ -92,6 +108,47 @@ private[python] class DefaultConverter extends Converter[Any, Any] {
   }
 }
 
+/**
+ * A converter that converts common types to [[org.apache.hadoop.io.Writable]]. Note that array
+ * types are not supported since the user needs to subclass [[org.apache.hadoop.io.ArrayWritable]]
+ * to set the type properly. See [[org.apache.spark.api.python.DoubleArrayWritable]] and
+ * [[org.apache.spark.api.python.DoubleArrayToWritableConverter]] for an example. They are used in
+ * PySpark RDD `saveAsNewAPIHadoopFile` doctest.
+ */
+private[python] class JavaToWritableConverter extends Converter[Any, Writable] {
+
+  /**
+   * Converts common data types to [[org.apache.hadoop.io.Writable]]. Note that array types are not
+   * supported out-of-the-box.
+   */
+  private def convertToWritable(obj: Any): Writable = {
+    import collection.JavaConversions._
+    obj match {
+      case i: java.lang.Integer => new IntWritable(i)
+      case d: java.lang.Double => new DoubleWritable(d)
+      case l: java.lang.Long => new LongWritable(l)
+      case f: java.lang.Float => new FloatWritable(f)
+      case s: java.lang.String => new Text(s)
+      case b: java.lang.Boolean => new BooleanWritable(b)
+      case aob: Array[Byte] => new BytesWritable(aob)
+      case null => NullWritable.get()
+      case map: java.util.Map[_, _] =>
+        val mapWritable = new MapWritable()
+        map.foreach { case (k, v) =>
+          mapWritable.put(convertToWritable(k), convertToWritable(v))
+        }
+        mapWritable
+      case other => throw new SparkException(
+        s"Data of type ${other.getClass.getName} cannot be used")
+    }
+  }
+
+  override def convert(obj: Any): Writable = obj match {
+    case writable: Writable => writable
+    case other => convertToWritable(other)
+  }
+}
+
 /** Utilities for working with Python objects <-> Hadoop-related objects */
 private[python] object PythonHadoopUtil {
 
@@ -118,7 +175,7 @@ private[python] object PythonHadoopUtil {
 
   /**
    * Converts an RDD of key-value pairs, where key and/or value could be instances of
-   * [[org.apache.hadoop.io.Writable]], into an RDD[(K, V)]
+   * [[org.apache.hadoop.io.Writable]], into an RDD of base types, or vice versa.
    */
   def convertRDD[K, V](rdd: RDD[(K, V)],
                        keyConverter: Converter[Any, Any],
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 462e09466bfa6..45beb8fc8c925 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -19,26 +19,29 @@ package org.apache.spark.api.python
 
 import java.io._
 import java.net._
-import java.nio.charset.Charset
 import java.util.{List => JList, ArrayList => JArrayList, Map => JMap, Collections}
 
+import org.apache.spark.input.PortableDataStream
+
 import scala.collection.JavaConversions._
-import scala.reflect.ClassTag
-import scala.util.Try
+import scala.collection.mutable
+import scala.language.existentials
 
-import net.razorvine.pickle.{Pickler, Unpickler}
+import com.google.common.base.Charsets.UTF_8
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.mapred.{InputFormat, JobConf}
-import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
+import org.apache.hadoop.io.compress.CompressionCodec
+import org.apache.hadoop.mapred.{InputFormat, OutputFormat, JobConf}
+import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat, OutputFormat => NewOutputFormat}
 import org.apache.spark._
+import org.apache.spark.SparkContext._
 import org.apache.spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.Utils
 
-private[spark] class PythonRDD[T: ClassTag](
-    parent: RDD[T],
+private[spark] class PythonRDD(
+    @transient parent: RDD[_],
     command: Array[Byte],
     envVars: JMap[String, String],
     pythonIncludes: JList[String],
@@ -49,27 +52,39 @@ private[spark] class PythonRDD[T: ClassTag](
   extends RDD[Array[Byte]](parent) {
 
   val bufferSize = conf.getInt("spark.buffer.size", 65536)
+  val reuse_worker = conf.getBoolean("spark.python.worker.reuse", true)
 
-  override def getPartitions = parent.partitions
+  override def getPartitions = firstParent.partitions
 
-  override val partitioner = if (preservePartitoning) parent.partitioner else None
+  override val partitioner = if (preservePartitoning) firstParent.partitioner else None
 
   override def compute(split: Partition, context: TaskContext): Iterator[Array[Byte]] = {
     val startTime = System.currentTimeMillis
     val env = SparkEnv.get
+    val localdir = env.blockManager.diskBlockManager.localDirs.map(
+      f => f.getPath()).mkString(",")
+    envVars += ("SPARK_LOCAL_DIRS" -> localdir) // it's also used in monitor thread
+    if (reuse_worker) {
+      envVars += ("SPARK_REUSE_WORKER" -> "1")
+    }
     val worker: Socket = env.createPythonWorker(pythonExec, envVars.toMap)
 
     // Start a thread to feed the process input from our parent's iterator
     val writerThread = new WriterThread(env, worker, split, context)
 
-    context.addOnCompleteCallback { () =>
+    var complete_cleanly = false
+    context.addTaskCompletionListener { context =>
       writerThread.shutdownOnTaskCompletion()
-
-      // Cleanup the worker socket. This will also cause the Python worker to exit.
-      try {
-        worker.close()
-      } catch {
-        case e: Exception => logWarning("Failed to close worker socket", e)
+      writerThread.join()
+      if (reuse_worker && complete_cleanly) {
+        env.releasePythonWorker(pythonExec, envVars.toMap, worker)
+      } else {
+        try {
+          worker.close()
+        } catch {
+          case e: Exception =>
+            logWarning("Failed to close worker socket", e)
+        }
       }
     }
 
@@ -109,13 +124,17 @@ private[spark] class PythonRDD[T: ClassTag](
               val total = finishTime - startTime
               logInfo("Times: total = %s, boot = %s, init = %s, finish = %s".format(total, boot,
                 init, finish))
+              val memoryBytesSpilled = stream.readLong()
+              val diskBytesSpilled = stream.readLong()
+              context.taskMetrics.memoryBytesSpilled += memoryBytesSpilled
+              context.taskMetrics.diskBytesSpilled += diskBytesSpilled
               read()
             case SpecialLengths.PYTHON_EXCEPTION_THROWN =>
               // Signals that an exception has been thrown in python
               val exLength = stream.readInt()
               val obj = new Array[Byte](exLength)
               stream.readFully(obj)
-              throw new PythonException(new String(obj, "utf-8"),
+              throw new PythonException(new String(obj, UTF_8),
                 writerThread.exception.getOrElse(null))
             case SpecialLengths.END_OF_DATA_SECTION =>
               // We've finished the data section of the output, but we can still
@@ -127,14 +146,21 @@ private[spark] class PythonRDD[T: ClassTag](
                 stream.readFully(update)
                 accumulator += Collections.singletonList(update)
               }
+              if (stream.readInt() == SpecialLengths.END_OF_STREAM) {
+                complete_cleanly = true
+              }
               null
           }
         } catch {
 
-          case e: Exception if context.interrupted =>
+          case e: Exception if context.isInterrupted =>
             logDebug("Exception thrown after task interruption", e)
             throw new TaskKilledException
 
+          case e: Exception if env.isStopped =>
+            logDebug("Exception thrown after context is stopped", e)
+            null  // exit silently
+
           case e: Exception if writerThread.exception.isDefined =>
             logError("Python worker exited unexpectedly (crashed)", e)
             logError("This may have been caused by a prior exception:", writerThread.exception.get)
@@ -170,13 +196,12 @@ private[spark] class PythonRDD[T: ClassTag](
 
     /** Terminates the writer thread, ignoring any exceptions that may occur due to cleanup. */
     def shutdownOnTaskCompletion() {
-      assert(context.completed)
+      assert(context.isCompleted)
       this.interrupt()
     }
 
     override def run(): Unit = Utils.logUncaughtExceptions {
       try {
-        SparkEnv.set(env)
         val stream = new BufferedOutputStream(worker.getOutputStream, bufferSize)
         val dataOut = new DataOutputStream(stream)
         // Partition index
@@ -189,29 +214,51 @@ private[spark] class PythonRDD[T: ClassTag](
           PythonRDD.writeUTF(include, dataOut)
         }
         // Broadcast variables
-        dataOut.writeInt(broadcastVars.length)
+        val oldBids = PythonRDD.getWorkerBroadcasts(worker)
+        val newBids = broadcastVars.map(_.id).toSet
+        // number of different broadcasts
+        val cnt = oldBids.diff(newBids).size + newBids.diff(oldBids).size
+        dataOut.writeInt(cnt)
+        for (bid <- oldBids) {
+          if (!newBids.contains(bid)) {
+            // remove the broadcast from worker
+            dataOut.writeLong(- bid - 1)  // bid >= 0
+            oldBids.remove(bid)
+          }
+        }
         for (broadcast <- broadcastVars) {
-          dataOut.writeLong(broadcast.id)
-          dataOut.writeInt(broadcast.value.length)
-          dataOut.write(broadcast.value)
+          if (!oldBids.contains(broadcast.id)) {
+            // send new broadcast
+            dataOut.writeLong(broadcast.id)
+            dataOut.writeInt(broadcast.value.length)
+            dataOut.write(broadcast.value)
+            oldBids.add(broadcast.id)
+          }
         }
         dataOut.flush()
         // Serialized command:
         dataOut.writeInt(command.length)
         dataOut.write(command)
         // Data values
-        PythonRDD.writeIteratorToStream(parent.iterator(split, context), dataOut)
+        PythonRDD.writeIteratorToStream(firstParent.iterator(split, context), dataOut)
+        dataOut.writeInt(SpecialLengths.END_OF_DATA_SECTION)
+        dataOut.writeInt(SpecialLengths.END_OF_STREAM)
         dataOut.flush()
       } catch {
-        case e: Exception if context.completed || context.interrupted =>
+        case e: Exception if context.isCompleted || context.isInterrupted =>
           logDebug("Exception thrown after task completion (likely due to cleanup)", e)
+          worker.shutdownOutput()
 
         case e: Exception =>
           // We must avoid throwing exceptions here, because the thread uncaught exception handler
           // will kill the whole executor (see org.apache.spark.executor.Executor).
           _exception = e
+          worker.shutdownOutput()
       } finally {
-        Try(worker.shutdownOutput()) // kill Python worker process
+        // Release memory used by this thread for shuffles
+        env.shuffleMemoryManager.releaseMemoryForThisThread()
+        // Release memory used by this thread for unrolling blocks
+        env.blockManager.memoryStore.releaseUnrollMemoryForThisThread()
       }
     }
   }
@@ -229,13 +276,13 @@ private[spark] class PythonRDD[T: ClassTag](
     override def run() {
       // Kill the worker if it is interrupted, checking until task completion.
       // TODO: This has a race condition if interruption occurs, as completed may still become true.
-      while (!context.interrupted && !context.completed) {
+      while (!context.isInterrupted && !context.isCompleted) {
         Thread.sleep(2000)
       }
-      if (!context.completed) {
+      if (!context.isCompleted) {
         try {
           logWarning("Incomplete task interrupted: Attempting to kill Python Worker")
-          env.destroyPythonWorker(pythonExec, envVars.toMap)
+          env.destroyPythonWorker(pythonExec, envVars.toMap, worker)
         } catch {
           case e: Exception =>
             logError("Exception when trying to kill worker", e)
@@ -267,10 +314,18 @@ private object SpecialLengths {
   val END_OF_DATA_SECTION = -1
   val PYTHON_EXCEPTION_THROWN = -2
   val TIMING_DATA = -3
+  val END_OF_STREAM = -4
 }
 
 private[spark] object PythonRDD extends Logging {
-  val UTF8 = Charset.forName("UTF-8")
+
+  // remember the broadcasts sent to each worker
+  private val workerBroadcasts = new mutable.WeakHashMap[Socket, mutable.Set[Long]]()
+  private def getWorkerBroadcasts(worker: Socket) = {
+    synchronized {
+      workerBroadcasts.getOrElseUpdate(worker, new mutable.HashSet[Long]())
+    }
+  }
 
   /**
    * Adapter for calling SparkContext#runJob from Python.
@@ -295,18 +350,34 @@ private[spark] object PythonRDD extends Logging {
   def readRDDFromFile(sc: JavaSparkContext, filename: String, parallelism: Int):
   JavaRDD[Array[Byte]] = {
     val file = new DataInputStream(new FileInputStream(filename))
-    val objs = new collection.mutable.ArrayBuffer[Array[Byte]]
     try {
-      while (true) {
-        val length = file.readInt()
-        val obj = new Array[Byte](length)
-        file.readFully(obj)
-        objs.append(obj)
+      val objs = new collection.mutable.ArrayBuffer[Array[Byte]]
+      try {
+        while (true) {
+          val length = file.readInt()
+          val obj = new Array[Byte](length)
+          file.readFully(obj)
+          objs.append(obj)
+        }
+      } catch {
+        case eof: EOFException => {}
       }
-    } catch {
-      case eof: EOFException => {}
+      JavaRDD.fromRDD(sc.sc.parallelize(objs, parallelism))
+    } finally {
+      file.close()
+    }
+  }
+
+  def readBroadcastFromFile(sc: JavaSparkContext, filename: String): Broadcast[Array[Byte]] = {
+    val file = new DataInputStream(new FileInputStream(filename))
+    try {
+      val length = file.readInt()
+      val obj = new Array[Byte](length)
+      file.readFully(obj)
+      sc.broadcast(obj)
+    } finally {
+      file.close()
     }
-    JavaRDD.fromRDD(sc.sc.parallelize(objs, parallelism))
   }
 
   def writeIteratorToStream[T](iter: Iterator[T], dataOut: DataOutputStream) {
@@ -326,22 +397,33 @@ private[spark] object PythonRDD extends Logging {
           newIter.asInstanceOf[Iterator[String]].foreach { str =>
             writeUTF(str, dataOut)
           }
-        case pair: Tuple2[_, _] =>
-          pair._1 match {
-            case bytePair: Array[Byte] =>
-              newIter.asInstanceOf[Iterator[Tuple2[Array[Byte], Array[Byte]]]].foreach { pair =>
-                dataOut.writeInt(pair._1.length)
-                dataOut.write(pair._1)
-                dataOut.writeInt(pair._2.length)
-                dataOut.write(pair._2)
-              }
-            case stringPair: String =>
-              newIter.asInstanceOf[Iterator[Tuple2[String, String]]].foreach { pair =>
-                writeUTF(pair._1, dataOut)
-                writeUTF(pair._2, dataOut)
-              }
-            case other =>
-              throw new SparkException("Unexpected Tuple2 element type " + pair._1.getClass)
+        case stream: PortableDataStream =>
+          newIter.asInstanceOf[Iterator[PortableDataStream]].foreach { stream =>
+            val bytes = stream.toArray()
+            dataOut.writeInt(bytes.length)
+            dataOut.write(bytes)
+          }
+        case (key: String, stream: PortableDataStream) =>
+          newIter.asInstanceOf[Iterator[(String, PortableDataStream)]].foreach {
+            case (key, stream) =>
+              writeUTF(key, dataOut)
+              val bytes = stream.toArray()
+              dataOut.writeInt(bytes.length)
+              dataOut.write(bytes)
+          }
+        case (key: String, value: String) =>
+          newIter.asInstanceOf[Iterator[(String, String)]].foreach {
+            case (key, value) =>
+              writeUTF(key, dataOut)
+              writeUTF(value, dataOut)
+          }
+        case (key: Array[Byte], value: Array[Byte]) =>
+          newIter.asInstanceOf[Iterator[(Array[Byte], Array[Byte])]].foreach {
+            case (key, value) =>
+              dataOut.writeInt(key.length)
+              dataOut.write(key)
+              dataOut.writeInt(value.length)
+              dataOut.write(value)
           }
         case other =>
           throw new SparkException("Unexpected element type " + first.getClass)
@@ -362,19 +444,17 @@ private[spark] object PythonRDD extends Logging {
       valueClassMaybeNull: String,
       keyConverterClass: String,
       valueConverterClass: String,
-      minSplits: Int) = {
+      minSplits: Int,
+      batchSize: Int) = {
     val keyClass = Option(keyClassMaybeNull).getOrElse("org.apache.hadoop.io.Text")
     val valueClass = Option(valueClassMaybeNull).getOrElse("org.apache.hadoop.io.Text")
-    implicit val kcm = ClassTag(Class.forName(keyClass)).asInstanceOf[ClassTag[K]]
-    implicit val vcm = ClassTag(Class.forName(valueClass)).asInstanceOf[ClassTag[V]]
-    val kc = kcm.runtimeClass.asInstanceOf[Class[K]]
-    val vc = vcm.runtimeClass.asInstanceOf[Class[V]]
-
+    val kc = Utils.classForName(keyClass).asInstanceOf[Class[K]]
+    val vc = Utils.classForName(valueClass).asInstanceOf[Class[V]]
     val rdd = sc.sc.sequenceFile[K, V](path, kc, vc, minSplits)
-    val keyConverter = Converter.getInstance(Option(keyConverterClass))
-    val valueConverter = Converter.getInstance(Option(valueConverterClass))
-    val converted = PythonHadoopUtil.convertRDD[K, V](rdd, keyConverter, valueConverter)
-    JavaRDD.fromRDD(SerDeUtil.rddToPython(converted))
+    val confBroadcasted = sc.sc.broadcast(new SerializableWritable(sc.hadoopConfiguration()))
+    val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
+      new WritableToJavaConverter(confBroadcasted))
+    JavaRDD.fromRDD(SerDeUtil.pairRDDToPython(converted, batchSize))
   }
 
   /**
@@ -391,17 +471,16 @@ private[spark] object PythonRDD extends Logging {
       valueClass: String,
       keyConverterClass: String,
       valueConverterClass: String,
-      confAsMap: java.util.HashMap[String, String]) = {
-    val conf = PythonHadoopUtil.mapToConf(confAsMap)
-    val baseConf = sc.hadoopConfiguration()
-    val mergedConf = PythonHadoopUtil.mergeConfs(baseConf, conf)
+      confAsMap: java.util.HashMap[String, String],
+      batchSize: Int) = {
+    val mergedConf = getMergedConf(confAsMap, sc.hadoopConfiguration())
     val rdd =
       newAPIHadoopRDDFromClassNames[K, V, F](sc,
         Some(path), inputFormatClass, keyClass, valueClass, mergedConf)
-    val keyConverter = Converter.getInstance(Option(keyConverterClass))
-    val valueConverter = Converter.getInstance(Option(valueConverterClass))
-    val converted = PythonHadoopUtil.convertRDD[K, V](rdd, keyConverter, valueConverter)
-    JavaRDD.fromRDD(SerDeUtil.rddToPython(converted))
+    val confBroadcasted = sc.sc.broadcast(new SerializableWritable(mergedConf))
+    val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
+      new WritableToJavaConverter(confBroadcasted))
+    JavaRDD.fromRDD(SerDeUtil.pairRDDToPython(converted, batchSize))
   }
 
   /**
@@ -418,15 +497,16 @@ private[spark] object PythonRDD extends Logging {
       valueClass: String,
       keyConverterClass: String,
       valueConverterClass: String,
-      confAsMap: java.util.HashMap[String, String]) = {
+      confAsMap: java.util.HashMap[String, String],
+      batchSize: Int) = {
     val conf = PythonHadoopUtil.mapToConf(confAsMap)
     val rdd =
       newAPIHadoopRDDFromClassNames[K, V, F](sc,
         None, inputFormatClass, keyClass, valueClass, conf)
-    val keyConverter = Converter.getInstance(Option(keyConverterClass))
-    val valueConverter = Converter.getInstance(Option(valueConverterClass))
-    val converted = PythonHadoopUtil.convertRDD[K, V](rdd, keyConverter, valueConverter)
-    JavaRDD.fromRDD(SerDeUtil.rddToPython(converted))
+    val confBroadcasted = sc.sc.broadcast(new SerializableWritable(conf))
+    val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
+      new WritableToJavaConverter(confBroadcasted))
+    JavaRDD.fromRDD(SerDeUtil.pairRDDToPython(converted, batchSize))
   }
 
   private def newAPIHadoopRDDFromClassNames[K, V, F <: NewInputFormat[K, V]](
@@ -436,18 +516,14 @@ private[spark] object PythonRDD extends Logging {
       keyClass: String,
       valueClass: String,
       conf: Configuration) = {
-    implicit val kcm = ClassTag(Class.forName(keyClass)).asInstanceOf[ClassTag[K]]
-    implicit val vcm = ClassTag(Class.forName(valueClass)).asInstanceOf[ClassTag[V]]
-    implicit val fcm = ClassTag(Class.forName(inputFormatClass)).asInstanceOf[ClassTag[F]]
-    val kc = kcm.runtimeClass.asInstanceOf[Class[K]]
-    val vc = vcm.runtimeClass.asInstanceOf[Class[V]]
-    val fc = fcm.runtimeClass.asInstanceOf[Class[F]]
-    val rdd = if (path.isDefined) {
+    val kc = Utils.classForName(keyClass).asInstanceOf[Class[K]]
+    val vc = Utils.classForName(valueClass).asInstanceOf[Class[V]]
+    val fc = Utils.classForName(inputFormatClass).asInstanceOf[Class[F]]
+    if (path.isDefined) {
       sc.sc.newAPIHadoopFile[K, V, F](path.get, fc, kc, vc, conf)
     } else {
       sc.sc.newAPIHadoopRDD[K, V, F](conf, fc, kc, vc)
     }
-    rdd
   }
 
   /**
@@ -464,17 +540,16 @@ private[spark] object PythonRDD extends Logging {
       valueClass: String,
       keyConverterClass: String,
       valueConverterClass: String,
-      confAsMap: java.util.HashMap[String, String]) = {
-    val conf = PythonHadoopUtil.mapToConf(confAsMap)
-    val baseConf = sc.hadoopConfiguration()
-    val mergedConf = PythonHadoopUtil.mergeConfs(baseConf, conf)
+      confAsMap: java.util.HashMap[String, String],
+      batchSize: Int) = {
+    val mergedConf = getMergedConf(confAsMap, sc.hadoopConfiguration())
     val rdd =
       hadoopRDDFromClassNames[K, V, F](sc,
         Some(path), inputFormatClass, keyClass, valueClass, mergedConf)
-    val keyConverter = Converter.getInstance(Option(keyConverterClass))
-    val valueConverter = Converter.getInstance(Option(valueConverterClass))
-    val converted = PythonHadoopUtil.convertRDD[K, V](rdd, keyConverter, valueConverter)
-    JavaRDD.fromRDD(SerDeUtil.rddToPython(converted))
+    val confBroadcasted = sc.sc.broadcast(new SerializableWritable(mergedConf))
+    val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
+      new WritableToJavaConverter(confBroadcasted))
+    JavaRDD.fromRDD(SerDeUtil.pairRDDToPython(converted, batchSize))
   }
 
   /**
@@ -491,15 +566,16 @@ private[spark] object PythonRDD extends Logging {
       valueClass: String,
       keyConverterClass: String,
       valueConverterClass: String,
-      confAsMap: java.util.HashMap[String, String]) = {
+      confAsMap: java.util.HashMap[String, String],
+      batchSize: Int) = {
     val conf = PythonHadoopUtil.mapToConf(confAsMap)
     val rdd =
       hadoopRDDFromClassNames[K, V, F](sc,
         None, inputFormatClass, keyClass, valueClass, conf)
-    val keyConverter = Converter.getInstance(Option(keyConverterClass))
-    val valueConverter = Converter.getInstance(Option(valueConverterClass))
-    val converted = PythonHadoopUtil.convertRDD[K, V](rdd, keyConverter, valueConverter)
-    JavaRDD.fromRDD(SerDeUtil.rddToPython(converted))
+    val confBroadcasted = sc.sc.broadcast(new SerializableWritable(conf))
+    val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
+      new WritableToJavaConverter(confBroadcasted))
+    JavaRDD.fromRDD(SerDeUtil.pairRDDToPython(converted, batchSize))
   }
 
   private def hadoopRDDFromClassNames[K, V, F <: InputFormat[K, V]](
@@ -509,22 +585,18 @@ private[spark] object PythonRDD extends Logging {
       keyClass: String,
       valueClass: String,
       conf: Configuration) = {
-    implicit val kcm = ClassTag(Class.forName(keyClass)).asInstanceOf[ClassTag[K]]
-    implicit val vcm = ClassTag(Class.forName(valueClass)).asInstanceOf[ClassTag[V]]
-    implicit val fcm = ClassTag(Class.forName(inputFormatClass)).asInstanceOf[ClassTag[F]]
-    val kc = kcm.runtimeClass.asInstanceOf[Class[K]]
-    val vc = vcm.runtimeClass.asInstanceOf[Class[V]]
-    val fc = fcm.runtimeClass.asInstanceOf[Class[F]]
-    val rdd = if (path.isDefined) {
+    val kc = Utils.classForName(keyClass).asInstanceOf[Class[K]]
+    val vc = Utils.classForName(valueClass).asInstanceOf[Class[V]]
+    val fc = Utils.classForName(inputFormatClass).asInstanceOf[Class[F]]
+    if (path.isDefined) {
       sc.sc.hadoopFile(path.get, fc, kc, vc)
     } else {
       sc.sc.hadoopRDD(new JobConf(conf), fc, kc, vc)
     }
-    rdd
   }
 
   def writeUTF(str: String, dataOut: DataOutputStream) {
-    val bytes = str.getBytes(UTF8)
+    val bytes = str.getBytes(UTF_8)
     dataOut.writeInt(bytes.length)
     dataOut.write(bytes)
   }
@@ -540,41 +612,156 @@ private[spark] object PythonRDD extends Logging {
     file.close()
   }
 
+  private def getMergedConf(confAsMap: java.util.HashMap[String, String],
+      baseConf: Configuration): Configuration = {
+    val conf = PythonHadoopUtil.mapToConf(confAsMap)
+    PythonHadoopUtil.mergeConfs(baseConf, conf)
+  }
+
+  private def inferKeyValueTypes[K, V](rdd: RDD[(K, V)], keyConverterClass: String = null,
+      valueConverterClass: String = null): (Class[_], Class[_]) = {
+    // Peek at an element to figure out key/value types. Since Writables are not serializable,
+    // we cannot call first() on the converted RDD. Instead, we call first() on the original RDD
+    // and then convert locally.
+    val (key, value) = rdd.first()
+    val (kc, vc) = getKeyValueConverters(keyConverterClass, valueConverterClass,
+      new JavaToWritableConverter)
+    (kc.convert(key).getClass, vc.convert(value).getClass)
+  }
+
+  private def getKeyValueTypes(keyClass: String, valueClass: String):
+      Option[(Class[_], Class[_])] = {
+    for {
+      k <- Option(keyClass)
+      v <- Option(valueClass)
+    } yield (Utils.classForName(k), Utils.classForName(v))
+  }
+
+  private def getKeyValueConverters(keyConverterClass: String, valueConverterClass: String,
+      defaultConverter: Converter[Any, Any]): (Converter[Any, Any], Converter[Any, Any]) = {
+    val keyConverter = Converter.getInstance(Option(keyConverterClass), defaultConverter)
+    val valueConverter = Converter.getInstance(Option(valueConverterClass), defaultConverter)
+    (keyConverter, valueConverter)
+  }
+
   /**
-   * Convert an RDD of serialized Python dictionaries to Scala Maps
-   * TODO: Support more Python types.
+   * Convert an RDD of key-value pairs from internal types to serializable types suitable for
+   * output, or vice versa.
    */
-  def pythonToJavaMap(pyRDD: JavaRDD[Array[Byte]]): JavaRDD[Map[String, _]] = {
-    pyRDD.rdd.mapPartitions { iter =>
-      val unpickle = new Unpickler
-      // TODO: Figure out why flatMap is necessay for pyspark
-      iter.flatMap { row =>
-        unpickle.loads(row) match {
-          case objs: java.util.ArrayList[JMap[String, _] @unchecked] => objs.map(_.toMap)
-          // Incase the partition doesn't have a collection
-          case obj: JMap[String @unchecked, _] => Seq(obj.toMap)
-        }
-      }
-    }
+  private def convertRDD[K, V](rdd: RDD[(K, V)],
+                               keyConverterClass: String,
+                               valueConverterClass: String,
+                               defaultConverter: Converter[Any, Any]): RDD[(Any, Any)] = {
+    val (kc, vc) = getKeyValueConverters(keyConverterClass, valueConverterClass,
+      defaultConverter)
+    PythonHadoopUtil.convertRDD(rdd, kc, vc)
   }
 
   /**
-   * Convert and RDD of Java objects to and RDD of serialized Python objects, that is usable by
-   * PySpark.
+   * Output a Python RDD of key-value pairs as a Hadoop SequenceFile using the Writable types
+   * we convert from the RDD's key and value types. Note that keys and values can't be
+   * [[org.apache.hadoop.io.Writable]] types already, since Writables are not Java
+   * `Serializable` and we can't peek at them. The `path` can be on any Hadoop file system.
    */
-  def javaToPython(jRDD: JavaRDD[Any]): JavaRDD[Array[Byte]] = {
-    jRDD.rdd.mapPartitions { iter =>
-      val pickle = new Pickler
-      iter.map { row =>
-        pickle.dumps(row)
-      }
+  def saveAsSequenceFile[K, V, C <: CompressionCodec](
+      pyRDD: JavaRDD[Array[Byte]],
+      batchSerialized: Boolean,
+      path: String,
+      compressionCodecClass: String) = {
+    saveAsHadoopFile(
+      pyRDD, batchSerialized, path, "org.apache.hadoop.mapred.SequenceFileOutputFormat",
+      null, null, null, null, new java.util.HashMap(), compressionCodecClass)
+  }
+
+  /**
+   * Output a Python RDD of key-value pairs to any Hadoop file system, using old Hadoop
+   * `OutputFormat` in mapred package. Keys and values are converted to suitable output
+   * types using either user specified converters or, if not specified,
+   * [[org.apache.spark.api.python.JavaToWritableConverter]]. Post-conversion types
+   * `keyClass` and `valueClass` are automatically inferred if not specified. The passed-in
+   * `confAsMap` is merged with the default Hadoop conf associated with the SparkContext of
+   * this RDD.
+   */
+  def saveAsHadoopFile[K, V, F <: OutputFormat[_, _], C <: CompressionCodec](
+      pyRDD: JavaRDD[Array[Byte]],
+      batchSerialized: Boolean,
+      path: String,
+      outputFormatClass: String,
+      keyClass: String,
+      valueClass: String,
+      keyConverterClass: String,
+      valueConverterClass: String,
+      confAsMap: java.util.HashMap[String, String],
+      compressionCodecClass: String) = {
+    val rdd = SerDeUtil.pythonToPairRDD(pyRDD, batchSerialized)
+    val (kc, vc) = getKeyValueTypes(keyClass, valueClass).getOrElse(
+      inferKeyValueTypes(rdd, keyConverterClass, valueConverterClass))
+    val mergedConf = getMergedConf(confAsMap, pyRDD.context.hadoopConfiguration)
+    val codec = Option(compressionCodecClass).map(Utils.classForName(_).asInstanceOf[Class[C]])
+    val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
+      new JavaToWritableConverter)
+    val fc = Utils.classForName(outputFormatClass).asInstanceOf[Class[F]]
+    converted.saveAsHadoopFile(path, kc, vc, fc, new JobConf(mergedConf), codec=codec)
+  }
+
+  /**
+   * Output a Python RDD of key-value pairs to any Hadoop file system, using new Hadoop
+   * `OutputFormat` in mapreduce package. Keys and values are converted to suitable output
+   * types using either user specified converters or, if not specified,
+   * [[org.apache.spark.api.python.JavaToWritableConverter]]. Post-conversion types
+   * `keyClass` and `valueClass` are automatically inferred if not specified. The passed-in
+   * `confAsMap` is merged with the default Hadoop conf associated with the SparkContext of
+   * this RDD.
+   */
+  def saveAsNewAPIHadoopFile[K, V, F <: NewOutputFormat[_, _]](
+      pyRDD: JavaRDD[Array[Byte]],
+      batchSerialized: Boolean,
+      path: String,
+      outputFormatClass: String,
+      keyClass: String,
+      valueClass: String,
+      keyConverterClass: String,
+      valueConverterClass: String,
+      confAsMap: java.util.HashMap[String, String]) = {
+    val rdd = SerDeUtil.pythonToPairRDD(pyRDD, batchSerialized)
+    val (kc, vc) = getKeyValueTypes(keyClass, valueClass).getOrElse(
+      inferKeyValueTypes(rdd, keyConverterClass, valueConverterClass))
+    val mergedConf = getMergedConf(confAsMap, pyRDD.context.hadoopConfiguration)
+    val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
+      new JavaToWritableConverter)
+    val fc = Utils.classForName(outputFormatClass).asInstanceOf[Class[F]]
+    converted.saveAsNewAPIHadoopFile(path, kc, vc, fc, mergedConf)
+  }
+
+  /**
+   * Output a Python RDD of key-value pairs to any Hadoop file system, using a Hadoop conf
+   * converted from the passed-in `confAsMap`. The conf should set relevant output params (
+   * e.g., output path, output format, etc), in the same way as it would be configured for
+   * a Hadoop MapReduce job. Both old and new Hadoop OutputFormat APIs are supported
+   * (mapred vs. mapreduce). Keys/values are converted for output using either user specified
+   * converters or, by default, [[org.apache.spark.api.python.JavaToWritableConverter]].
+   */
+  def saveAsHadoopDataset[K, V](
+      pyRDD: JavaRDD[Array[Byte]],
+      batchSerialized: Boolean,
+      confAsMap: java.util.HashMap[String, String],
+      keyConverterClass: String,
+      valueConverterClass: String,
+      useNewAPI: Boolean) = {
+    val conf = PythonHadoopUtil.mapToConf(confAsMap)
+    val converted = convertRDD(SerDeUtil.pythonToPairRDD(pyRDD, batchSerialized),
+      keyConverterClass, valueConverterClass, new JavaToWritableConverter)
+    if (useNewAPI) {
+      converted.saveAsNewAPIHadoopDataset(conf)
+    } else {
+      converted.saveAsHadoopDataset(new JobConf(conf))
     }
   }
 }
 
 private
 class BytesToString extends org.apache.spark.api.java.function.Function[Array[Byte], String] {
-  override def call(arr: Array[Byte]) : String = new String(arr, PythonRDD.UTF8)
+  override def call(arr: Array[Byte]) : String = new String(arr, UTF_8)
 }
 
 /**
@@ -588,19 +775,30 @@ private class PythonAccumulatorParam(@transient serverHost: String, serverPort:
 
   val bufferSize = SparkEnv.get.conf.getInt("spark.buffer.size", 65536)
 
+  /** 
+   * We try to reuse a single Socket to transfer accumulator updates, as they are all added
+   * by the DAGScheduler's single-threaded actor anyway.
+   */ 
+  @transient var socket: Socket = _
+
+  def openSocket(): Socket = synchronized {
+    if (socket == null || socket.isClosed) {
+      socket = new Socket(serverHost, serverPort)
+    }
+    socket
+  }
+
   override def zero(value: JList[Array[Byte]]): JList[Array[Byte]] = new JArrayList
 
   override def addInPlace(val1: JList[Array[Byte]], val2: JList[Array[Byte]])
-      : JList[Array[Byte]] = {
+      : JList[Array[Byte]] = synchronized {
     if (serverHost == null) {
       // This happens on the worker node, where we just want to remember all the updates
       val1.addAll(val2)
       val1
     } else {
       // This happens on the master, where we pass the updates to Python through a socket
-      val socket = new Socket(serverHost, serverPort)
-      // SPARK-2282: Immediately reuse closed sockets because we create one per task.
-      socket.setReuseAddress(true)
+      val socket = openSocket()
       val in = socket.getInputStream
       val out = new DataOutputStream(new BufferedOutputStream(socket.getOutputStream, bufferSize))
       out.writeInt(val2.size)
@@ -614,7 +812,6 @@ private class PythonAccumulatorParam(@transient serverHost: String, serverPort:
       if (byteRead == -1) {
         throw new SparkException("EOF reached before Python server acknowledged")
       }
-      socket.close()
       null
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 6d3e257c4d5df..be5ebfa9219d3 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -29,7 +29,7 @@ private[spark] object PythonUtils {
     val pythonPath = new ArrayBuffer[String]
     for (sparkHome <- sys.env.get("SPARK_HOME")) {
       pythonPath += Seq(sparkHome, "python").mkString(File.separator)
-      pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.8.1-src.zip").mkString(File.separator)
+      pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.8.2.1-src.zip").mkString(File.separator)
     }
     pythonPath ++= SparkContext.jarOfObject(this)
     pythonPath.mkString(File.pathSeparator)
@@ -40,28 +40,3 @@ private[spark] object PythonUtils {
     paths.filter(_ != "").mkString(File.pathSeparator)
   }
 }
-
-
-/**
- * A utility class to redirect the child process's stdout or stderr.
- */
-private[spark] class RedirectThread(
-    in: InputStream,
-    out: OutputStream,
-    name: String)
-  extends Thread(name) {
-
-  setDaemon(true)
-  override def run() {
-    scala.util.control.Exception.ignoring(classOf[IOException]) {
-      // FIXME: We copy the stream on the level of bytes to avoid encoding problems.
-      val buf = new Array[Byte](1024)
-      var len = in.read(buf)
-      while (len != -1) {
-        out.write(buf, 0, len)
-        out.flush()
-        len = in.read(buf)
-      }
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 759cbe2c46c52..e314408c067e9 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.api.python
 
-import java.io.{DataInputStream, InputStream, OutputStreamWriter}
+import java.io.{DataOutputStream, DataInputStream, InputStream, OutputStreamWriter}
 import java.net.{InetAddress, ServerSocket, Socket, SocketException}
 
+import scala.collection.mutable
 import scala.collection.JavaConversions._
 
 import org.apache.spark._
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{RedirectThread, Utils}
 
 private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String, String])
   extends Logging {
@@ -39,6 +40,12 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   var daemon: Process = null
   val daemonHost = InetAddress.getByAddress(Array(127, 0, 0, 1))
   var daemonPort: Int = 0
+  val daemonWorkers = new mutable.WeakHashMap[Socket, Int]()
+  val idleWorkers = new mutable.Queue[Socket]()
+  var lastActivity = 0L
+  new MonitorThread().start()
+
+  var simpleWorkers = new mutable.WeakHashMap[Socket, Process]()
 
   val pythonPath = PythonUtils.mergePythonPaths(
     PythonUtils.sparkPythonPath,
@@ -47,6 +54,11 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
 
   def create(): Socket = {
     if (useDaemon) {
+      synchronized {
+        if (idleWorkers.size > 0) {
+          return idleWorkers.dequeue()
+        }
+      }
       createThroughDaemon()
     } else {
       createSimpleWorker()
@@ -58,19 +70,31 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
    * to avoid the high cost of forking from Java. This currently only works on UNIX-based systems.
    */
   private def createThroughDaemon(): Socket = {
+
+    def createSocket(): Socket = {
+      val socket = new Socket(daemonHost, daemonPort)
+      val pid = new DataInputStream(socket.getInputStream).readInt()
+      if (pid < 0) {
+        throw new IllegalStateException("Python daemon failed to launch worker with code " + pid)
+      }
+      daemonWorkers.put(socket, pid)
+      socket
+    }
+
     synchronized {
       // Start the daemon if it hasn't been started
       startDaemon()
 
       // Attempt to connect, restart and retry once if it fails
       try {
-        new Socket(daemonHost, daemonPort)
+        createSocket()
       } catch {
         case exc: SocketException =>
-          logWarning("Python daemon unexpectedly quit, attempting to restart")
+          logWarning("Failed to open socket to Python daemon:", exc)
+          logWarning("Assuming that daemon unexpectedly quit, attempting to restart")
           stopDaemon()
           startDaemon()
-          new Socket(daemonHost, daemonPort)
+          createSocket()
       }
     }
   }
@@ -84,10 +108,12 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
       serverSocket = new ServerSocket(0, 1, InetAddress.getByAddress(Array(127, 0, 0, 1)))
 
       // Create and start the worker
-      val pb = new ProcessBuilder(Seq(pythonExec, "-u", "-m", "pyspark.worker"))
+      val pb = new ProcessBuilder(Seq(pythonExec, "-m", "pyspark.worker"))
       val workerEnv = pb.environment()
       workerEnv.putAll(envVars)
       workerEnv.put("PYTHONPATH", pythonPath)
+      // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
+      workerEnv.put("PYTHONUNBUFFERED", "YES")
       val worker = pb.start()
 
       // Redirect worker stdout and stderr
@@ -101,7 +127,9 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
       // Wait for it to connect to our socket
       serverSocket.setSoTimeout(10000)
       try {
-        return serverSocket.accept()
+        val socket = serverSocket.accept()
+        simpleWorkers.put(socket, worker)
+        return socket
       } catch {
         case e: Exception =>
           throw new SparkException("Python worker did not connect back in time", e)
@@ -123,10 +151,12 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
 
       try {
         // Create and start the daemon
-        val pb = new ProcessBuilder(Seq(pythonExec, "-u", "-m", "pyspark.daemon"))
+        val pb = new ProcessBuilder(Seq(pythonExec, "-m", "pyspark.daemon"))
         val workerEnv = pb.environment()
         workerEnv.putAll(envVars)
         workerEnv.put("PYTHONPATH", pythonPath)
+        // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
+        workerEnv.put("PYTHONUNBUFFERED", "YES")
         daemon = pb.start()
 
         val in = new DataInputStream(daemon.getInputStream)
@@ -181,23 +211,99 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     }
   }
 
+  /**
+   * Monitor all the idle workers, kill them after timeout.
+   */
+  private class MonitorThread extends Thread(s"Idle Worker Monitor for $pythonExec") {
+
+    setDaemon(true)
+
+    override def run() {
+      while (true) {
+        synchronized {
+          if (lastActivity + IDLE_WORKER_TIMEOUT_MS < System.currentTimeMillis()) {
+            cleanupIdleWorkers()
+            lastActivity = System.currentTimeMillis()
+          }
+        }
+        Thread.sleep(10000)
+      }
+    }
+  }
+
+  private def cleanupIdleWorkers() {
+    while (idleWorkers.length > 0) {
+      val worker = idleWorkers.dequeue()
+      try {
+        // the worker will exit after closing the socket
+        worker.close()
+      } catch {
+        case e: Exception =>
+          logWarning("Failed to close worker socket", e)
+      }
+    }
+  }
+
   private def stopDaemon() {
     synchronized {
-      // Request shutdown of existing daemon by sending SIGTERM
-      if (daemon != null) {
-        daemon.destroy()
+      if (useDaemon) {
+        cleanupIdleWorkers()
+
+        // Request shutdown of existing daemon by sending SIGTERM
+        if (daemon != null) {
+          daemon.destroy()
+        }
+
+        daemon = null
+        daemonPort = 0
+      } else {
+        simpleWorkers.mapValues(_.destroy())
       }
-
-      daemon = null
-      daemonPort = 0
     }
   }
 
   def stop() {
     stopDaemon()
   }
+
+  def stopWorker(worker: Socket) {
+    synchronized {
+      if (useDaemon) {
+        if (daemon != null) {
+          daemonWorkers.get(worker).foreach { pid =>
+            // tell daemon to kill worker by pid
+            val output = new DataOutputStream(daemon.getOutputStream)
+            output.writeInt(pid)
+            output.flush()
+            daemon.getOutputStream.flush()
+          }
+        }
+      } else {
+        simpleWorkers.get(worker).foreach(_.destroy())
+      }
+    }
+    worker.close()
+  }
+
+  def releaseWorker(worker: Socket) {
+    if (useDaemon) {
+      synchronized {
+        lastActivity = System.currentTimeMillis()
+        idleWorkers.enqueue(worker)
+      }
+    } else {
+      // Cleanup the worker socket. This will also cause the Python worker to exit.
+      try {
+        worker.close()
+      } catch {
+        case e: Exception =>
+          logWarning("Failed to close worker socket", e)
+      }
+    }
+  }
 }
 
 private object PythonWorkerFactory {
   val PROCESS_WAIT_TIMEOUT_MS = 10000
+  val IDLE_WORKER_TIMEOUT_MS = 60000  // kill idle workers after 1 minute
 }
diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
index 9a012e7254901..a4153aaa926f8 100644
--- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
@@ -17,16 +17,149 @@
 
 package org.apache.spark.api.python
 
-import scala.util.Try
-import org.apache.spark.rdd.RDD
-import org.apache.spark.Logging
-import scala.util.Success
+import java.nio.ByteOrder
+import java.util.{ArrayList => JArrayList}
+
+import org.apache.spark.api.java.JavaRDD
+
+import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+import scala.collection.mutable
 import scala.util.Failure
-import net.razorvine.pickle.Pickler
+import scala.util.Try
+
+import net.razorvine.pickle.{Unpickler, Pickler}
 
+import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.rdd.RDD
 
 /** Utilities for serialization / deserialization between Python and Java, using Pickle. */
-private[python] object SerDeUtil extends Logging {
+private[spark] object SerDeUtil extends Logging {
+  // Unpickle array.array generated by Python 2.6
+  class ArrayConstructor extends net.razorvine.pickle.objects.ArrayConstructor {
+    //  /* Description of types */
+    //  static struct arraydescr descriptors[] = {
+    //    {'c', sizeof(char), c_getitem, c_setitem},
+    //    {'b', sizeof(char), b_getitem, b_setitem},
+    //    {'B', sizeof(char), BB_getitem, BB_setitem},
+    //    #ifdef Py_USING_UNICODE
+    //      {'u', sizeof(Py_UNICODE), u_getitem, u_setitem},
+    //    #endif
+    //    {'h', sizeof(short), h_getitem, h_setitem},
+    //    {'H', sizeof(short), HH_getitem, HH_setitem},
+    //    {'i', sizeof(int), i_getitem, i_setitem},
+    //    {'I', sizeof(int), II_getitem, II_setitem},
+    //    {'l', sizeof(long), l_getitem, l_setitem},
+    //    {'L', sizeof(long), LL_getitem, LL_setitem},
+    //    {'f', sizeof(float), f_getitem, f_setitem},
+    //    {'d', sizeof(double), d_getitem, d_setitem},
+    //    {'\0', 0, 0, 0} /* Sentinel */
+    //  };
+    // TODO: support Py_UNICODE with 2 bytes
+    // FIXME: unpickle array of float is wrong in Pyrolite, so we reverse the
+    // machine code for float/double here to workaround it.
+    // we should fix this after Pyrolite fix them
+    val machineCodes: Map[Char, Int] = if (ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
+      Map('c' -> 1, 'B' -> 0, 'b' -> 1, 'H' -> 3, 'h' -> 5, 'I' -> 7, 'i' -> 9,
+        'L' -> 11, 'l' -> 13, 'f' -> 14, 'd' -> 16, 'u' -> 21
+      )
+    } else {
+      Map('c' -> 1, 'B' -> 0, 'b' -> 1, 'H' -> 2, 'h' -> 4, 'I' -> 6, 'i' -> 8,
+        'L' -> 10, 'l' -> 12, 'f' -> 15, 'd' -> 17, 'u' -> 20
+      )
+    }
+    override def construct(args: Array[Object]): Object = {
+      if (args.length == 1) {
+        construct(args ++ Array(""))
+      } else if (args.length == 2 && args(1).isInstanceOf[String]) {
+        val typecode = args(0).asInstanceOf[String].charAt(0)
+        val data: Array[Byte] = args(1).asInstanceOf[String].getBytes("ISO-8859-1")
+        construct(typecode, machineCodes(typecode), data)
+      } else {
+        super.construct(args)
+      }
+    }
+  }
+
+  private var initialized = false
+  // This should be called before trying to unpickle array.array from Python
+  // In cluster mode, this should be put in closure
+  def initialize() = {
+    synchronized{
+      if (!initialized) {
+        Unpickler.registerConstructor("array", "array", new ArrayConstructor())
+        initialized = true
+      }
+    }
+  }
+  initialize()
+
+
+  /**
+   * Convert an RDD of Java objects to Array (no recursive conversions).
+   * It is only used by pyspark.sql.
+   */
+  def toJavaArray(jrdd: JavaRDD[Any]): JavaRDD[Array[_]] = {
+    jrdd.rdd.map {
+      case objs: JArrayList[_] =>
+        objs.toArray
+      case obj if obj.getClass.isArray =>
+        obj.asInstanceOf[Array[_]].toArray
+    }.toJavaRDD()
+  }
+
+  /**
+   * Choose batch size based on size of objects
+   */
+  private[spark] class AutoBatchedPickler(iter: Iterator[Any]) extends Iterator[Array[Byte]] {
+    private val pickle = new Pickler()
+    private var batch = 1
+    private val buffer = new mutable.ArrayBuffer[Any]
+
+    override def hasNext: Boolean = iter.hasNext
+
+    override def next(): Array[Byte] = {
+      while (iter.hasNext && buffer.length < batch) {
+        buffer += iter.next()
+      }
+      val bytes = pickle.dumps(buffer.toArray)
+      val size = bytes.length
+      // let  1M < size < 10M
+      if (size < 1024 * 1024) {
+        batch *= 2
+      } else if (size > 1024 * 1024 * 10 && batch > 1) {
+        batch /= 2
+      }
+      buffer.clear()
+      bytes
+    }
+  }
+
+  /**
+   * Convert an RDD of Java objects to an RDD of serialized Python objects, that is usable by
+   * PySpark.
+   */
+  private[spark] def javaToPython(jRDD: JavaRDD[_]): JavaRDD[Array[Byte]] = {
+    jRDD.rdd.mapPartitions { iter => new AutoBatchedPickler(iter) }
+  }
+
+  /**
+   * Convert an RDD of serialized Python objects to RDD of objects, that is usable by PySpark.
+   */
+  def pythonToJava(pyRDD: JavaRDD[Array[Byte]], batched: Boolean): JavaRDD[Any] = {
+    pyRDD.rdd.mapPartitions { iter =>
+      initialize()
+      val unpickle = new Unpickler
+      iter.flatMap { row =>
+        val obj = unpickle.loads(row)
+        if (batched) {
+          obj.asInstanceOf[JArrayList[_]].asScala
+        } else {
+          Seq(obj)
+        }
+      }
+    }.toJavaRDD()
+  }
 
   private def checkPickle(t: (Any, Any)): (Boolean, Boolean) = {
     val pickle = new Pickler
@@ -65,23 +198,43 @@ private[python] object SerDeUtil extends Logging {
    * by PySpark. By default, if serialization fails, toString is called and the string
    * representation is serialized
    */
-  def rddToPython(rdd: RDD[(Any, Any)]): RDD[Array[Byte]] = {
+  def pairRDDToPython(rdd: RDD[(Any, Any)], batchSize: Int): RDD[Array[Byte]] = {
     val (keyFailed, valueFailed) = checkPickle(rdd.first())
+
     rdd.mapPartitions { iter =>
-      val pickle = new Pickler
-      iter.map { case (k, v) =>
-        if (keyFailed && valueFailed) {
-          pickle.dumps(Array(k.toString, v.toString))
-        } else if (keyFailed) {
-          pickle.dumps(Array(k.toString, v))
-        } else if (!keyFailed && valueFailed) {
-          pickle.dumps(Array(k, v.toString))
-        } else {
-          pickle.dumps(Array(k, v))
-        }
+      val cleaned = iter.map { case (k, v) =>
+        val key = if (keyFailed) k.toString else k
+        val value = if (valueFailed) v.toString else v
+        Array[Any](key, value)
+      }
+      if (batchSize == 0) {
+        new AutoBatchedPickler(cleaned)
+      } else {
+        val pickle = new Pickler
+        cleaned.grouped(batchSize).map(batched => pickle.dumps(seqAsJavaList(batched)))
       }
     }
   }
 
-}
+  /**
+   * Convert an RDD of serialized Python tuple (K, V) to RDD[(K, V)].
+   */
+  def pythonToPairRDD[K, V](pyRDD: RDD[Array[Byte]], batched: Boolean): RDD[(K, V)] = {
+    def isPair(obj: Any): Boolean = {
+      Option(obj.getClass.getComponentType).exists(!_.isPrimitive) &&
+        obj.asInstanceOf[Array[_]].length == 2
+    }
 
+    val rdd = pythonToJava(pyRDD, batched).rdd
+    rdd.first match {
+      case obj if isPair(obj) =>
+        // we only accept (K, V)
+      case other => throw new SparkException(
+        s"RDD element of type ${other.getClass.getName} cannot be used")
+    }
+    rdd.map { obj =>
+      val arr = obj.asInstanceOf[Array[_]]
+      (arr.head.asInstanceOf[K], arr.last.asInstanceOf[V])
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
index f0e3fb9aff5a0..c0cbd28a845be 100644
--- a/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
@@ -17,15 +17,17 @@
 
 package org.apache.spark.api.python
 
-import org.apache.spark.SparkContext
-import org.apache.hadoop.io._
-import scala.Array
 import java.io.{DataOutput, DataInput}
+
+import com.google.common.base.Charsets.UTF_8
+
+import org.apache.hadoop.io._
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
 import org.apache.spark.api.java.JavaSparkContext
+import org.apache.spark.{SparkContext, SparkException}
 
 /**
- * A class to test MsgPack serialization on the Scala side, that will be deserialized
+ * A class to test Pyrolite serialization on the Scala side, that will be deserialized
  * in Python
  * @param str
  * @param int
@@ -54,7 +56,13 @@ case class TestWritable(var str: String, var int: Int, var double: Double) exten
   }
 }
 
-class TestConverter extends Converter[Any, Any] {
+private[python] class TestInputKeyConverter extends Converter[Any, Any] {
+  override def convert(obj: Any) = {
+    obj.asInstanceOf[IntWritable].get().toChar
+  }
+}
+
+private[python] class TestInputValueConverter extends Converter[Any, Any] {
   import collection.JavaConversions._
   override def convert(obj: Any) = {
     val m = obj.asInstanceOf[MapWritable]
@@ -62,6 +70,38 @@ class TestConverter extends Converter[Any, Any] {
   }
 }
 
+private[python] class TestOutputKeyConverter extends Converter[Any, Any] {
+  override def convert(obj: Any) = {
+    new Text(obj.asInstanceOf[Int].toString)
+  }
+}
+
+private[python] class TestOutputValueConverter extends Converter[Any, Any] {
+  import collection.JavaConversions._
+  override def convert(obj: Any) = {
+    new DoubleWritable(obj.asInstanceOf[java.util.Map[Double, _]].keySet().head)
+  }
+}
+
+private[python] class DoubleArrayWritable extends ArrayWritable(classOf[DoubleWritable])
+
+private[python] class DoubleArrayToWritableConverter extends Converter[Any, Writable] {
+  override def convert(obj: Any) = obj match {
+    case arr if arr.getClass.isArray && arr.getClass.getComponentType == classOf[Double] =>
+      val daw = new DoubleArrayWritable
+      daw.set(arr.asInstanceOf[Array[Double]].map(new DoubleWritable(_)))
+      daw
+    case other => throw new SparkException(s"Data of type $other is not supported")
+  }
+}
+
+private[python] class WritableToDoubleArrayConverter extends Converter[Any, Array[Double]] {
+  override def convert(obj: Any): Array[Double] = obj match {
+    case daw : DoubleArrayWritable => daw.get().map(_.asInstanceOf[DoubleWritable].get())
+    case other => throw new SparkException(s"Data of type $other is not supported")
+  }
+}
+
 /**
  * This object contains method to generate SequenceFile test data and write it to a
  * given directory (probably a temp directory)
@@ -97,7 +137,8 @@ object WriteInputFormatTestDataGenerator {
     sc.parallelize(intKeys).saveAsSequenceFile(intPath)
     sc.parallelize(intKeys.map{ case (k, v) => (k.toDouble, v) }).saveAsSequenceFile(doublePath)
     sc.parallelize(intKeys.map{ case (k, v) => (k.toString, v) }).saveAsSequenceFile(textPath)
-    sc.parallelize(intKeys.map{ case (k, v) => (k, v.getBytes) }).saveAsSequenceFile(bytesPath)
+    sc.parallelize(intKeys.map{ case (k, v) => (k, v.getBytes(UTF_8)) }
+      ).saveAsSequenceFile(bytesPath)
     val bools = Seq((1, true), (2, true), (2, false), (3, true), (2, false), (1, false))
     sc.parallelize(bools).saveAsSequenceFile(boolPath)
     sc.parallelize(intKeys).map{ case (k, v) =>
@@ -106,19 +147,20 @@ object WriteInputFormatTestDataGenerator {
 
     // Create test data for ArrayWritable
     val data = Seq(
-      (1, Array(1.0, 2.0, 3.0)),
+      (1, Array()),
       (2, Array(3.0, 4.0, 5.0)),
       (3, Array(4.0, 5.0, 6.0))
     )
     sc.parallelize(data, numSlices = 2)
       .map{ case (k, v) =>
-      (new IntWritable(k), new ArrayWritable(classOf[DoubleWritable], v.map(new DoubleWritable(_))))
-    }.saveAsNewAPIHadoopFile[SequenceFileOutputFormat[IntWritable, ArrayWritable]](arrPath)
+        val va = new DoubleArrayWritable
+        va.set(v.map(new DoubleWritable(_)))
+        (new IntWritable(k), va)
+    }.saveAsNewAPIHadoopFile[SequenceFileOutputFormat[IntWritable, DoubleArrayWritable]](arrPath)
 
     // Create test data for MapWritable, with keys DoubleWritable and values Text
     val mapData = Seq(
-      (1, Map(2.0 -> "aa")),
-      (2, Map(3.0 -> "bb")),
+      (1, Map()),
       (2, Map(1.0 -> "cc")),
       (3, Map(2.0 -> "dd")),
       (2, Map(1.0 -> "aa")),
@@ -126,19 +168,19 @@ object WriteInputFormatTestDataGenerator {
     )
     sc.parallelize(mapData, numSlices = 2).map{ case (i, m) =>
       val mw = new MapWritable()
-      val k = m.keys.head
-      val v = m.values.head
-      mw.put(new DoubleWritable(k), new Text(v))
+      m.foreach { case (k, v) =>
+        mw.put(new DoubleWritable(k), new Text(v))
+      }
       (new IntWritable(i), mw)
     }.saveAsSequenceFile(mapPath)
 
     // Create test data for arbitrary custom writable TestWritable
     val testClass = Seq(
-      ("1", TestWritable("test1", 123, 54.0)),
-      ("2", TestWritable("test2", 456, 8762.3)),
-      ("1", TestWritable("test3", 123, 423.1)),
-      ("3", TestWritable("test56", 456, 423.5)),
-      ("2", TestWritable("test2", 123, 5435.2))
+      ("1", TestWritable("test1", 1, 1.0)),
+      ("2", TestWritable("test2", 2, 2.3)),
+      ("3", TestWritable("test3", 3, 3.1)),
+      ("5", TestWritable("test56", 5, 5.5)),
+      ("4", TestWritable("test4", 4, 4.2))
     )
     val rdd = sc.parallelize(testClass, numSlices = 2).map{ case (k, v) => (new Text(k), v) }
     rdd.saveAsNewAPIHadoopFile(classPath,
diff --git a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
index 76956f6a345d1..a5ea478f231d7 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
@@ -20,6 +20,8 @@ package org.apache.spark.broadcast
 import java.io.Serializable
 
 import org.apache.spark.SparkException
+import org.apache.spark.Logging
+import org.apache.spark.util.Utils
 
 import scala.reflect.ClassTag
 
@@ -37,7 +39,7 @@ import scala.reflect.ClassTag
  *
  * {{{
  * scala> val broadcastVar = sc.broadcast(Array(1, 2, 3))
- * broadcastVar: spark.Broadcast[Array[Int]] = spark.Broadcast(b5c40191-a864-4c7d-b9bf-d87e1a4e787c)
+ * broadcastVar: org.apache.spark.broadcast.Broadcast[Array[Int]] = Broadcast(0)
  *
  * scala> broadcastVar.value
  * res0: Array[Int] = Array(1, 2, 3)
@@ -52,7 +54,7 @@ import scala.reflect.ClassTag
  * @param id A unique identifier for the broadcast variable.
  * @tparam T Type of the data contained in the broadcast variable.
  */
-abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable {
+abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Logging {
 
   /**
    * Flag signifying whether the broadcast variable is valid
@@ -60,6 +62,8 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable {
    */
   @volatile private var _isValid = true
 
+  private var _destroySite = ""
+
   /** Get the broadcasted value. */
   def value: T = {
     assertValid()
@@ -84,13 +88,26 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable {
     doUnpersist(blocking)
   }
 
+
+  /**
+   * Destroy all data and metadata related to this broadcast variable. Use this with caution;
+   * once a broadcast variable has been destroyed, it cannot be used again.
+   * This method blocks until destroy has completed
+   */
+  def destroy() {
+    destroy(blocking = true)
+  }
+
   /**
    * Destroy all data and metadata related to this broadcast variable. Use this with caution;
    * once a broadcast variable has been destroyed, it cannot be used again.
+   * @param blocking Whether to block until destroy has completed
    */
   private[spark] def destroy(blocking: Boolean) {
     assertValid()
     _isValid = false
+    _destroySite = Utils.getCallSite().shortForm
+    logInfo("Destroying %s (from %s)".format(toString, _destroySite))
     doDestroy(blocking)
   }
 
@@ -106,25 +123,26 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable {
    * Actually get the broadcasted value. Concrete implementations of Broadcast class must
    * define their own way to get the value.
    */
-  private[spark] def getValue(): T
+  protected def getValue(): T
 
   /**
    * Actually unpersist the broadcasted value on the executors. Concrete implementations of
    * Broadcast class must define their own logic to unpersist their own data.
    */
-  private[spark] def doUnpersist(blocking: Boolean)
+  protected def doUnpersist(blocking: Boolean)
 
   /**
    * Actually destroy all data and metadata related to this broadcast variable.
    * Implementation of Broadcast class must define their own logic to destroy their own
    * state.
    */
-  private[spark] def doDestroy(blocking: Boolean)
+  protected def doDestroy(blocking: Boolean)
 
   /** Check if this broadcast is valid. If not valid, exception is thrown. */
-  private[spark] def assertValid() {
+  protected def assertValid() {
     if (!_isValid) {
-      throw new SparkException("Attempted to use %s after it has been destroyed!".format(toString))
+      throw new SparkException(
+        "Attempted to use %s after it was destroyed (%s) ".format(toString, _destroySite))
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
index a8c827030a1ef..6a187b40628a2 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
@@ -32,8 +32,19 @@ import org.apache.spark.annotation.DeveloperApi
  */
 @DeveloperApi
 trait BroadcastFactory {
+
   def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager): Unit
+
+  /**
+   * Creates a new broadcast variable.
+   *
+   * @param value value to broadcast
+   * @param isLocal whether we are in local mode (single JVM process)
+   * @param id unique id representing this broadcast variable
+   */
   def newBroadcast[T: ClassTag](value: T, isLocal: Boolean, id: Long): Broadcast[T]
+
   def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean): Unit
+
   def stop(): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
index c88be6aba6901..8f8a0b11f9f2e 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
@@ -39,7 +39,7 @@ private[spark] class BroadcastManager(
     synchronized {
       if (!initialized) {
         val broadcastFactoryClass =
-          conf.get("spark.broadcast.factory", "org.apache.spark.broadcast.HttpBroadcastFactory")
+          conf.get("spark.broadcast.factory", "org.apache.spark.broadcast.TorrentBroadcastFactory")
 
         broadcastFactory =
           Class.forName(broadcastFactoryClass).newInstance.asInstanceOf[BroadcastFactory]
diff --git a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
index 4f6cabaff2b99..31f0a462f84d8 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
@@ -40,9 +40,9 @@ private[spark] class HttpBroadcast[T: ClassTag](
     @transient var value_ : T, isLocal: Boolean, id: Long)
   extends Broadcast[T](id) with Logging with Serializable {
 
-  def getValue = value_
+  override protected def getValue() = value_
 
-  val blockId = BroadcastBlockId(id)
+  private val blockId = BroadcastBlockId(id)
 
   /*
    * Broadcasted data is also stored in the BlockManager of the driver. The BlockManagerMaster
@@ -60,25 +60,25 @@ private[spark] class HttpBroadcast[T: ClassTag](
   /**
    * Remove all persisted state associated with this HTTP broadcast on the executors.
    */
-  def doUnpersist(blocking: Boolean) {
+  override protected def doUnpersist(blocking: Boolean) {
     HttpBroadcast.unpersist(id, removeFromDriver = false, blocking)
   }
 
   /**
    * Remove all persisted state associated with this HTTP broadcast on the executors and driver.
    */
-  def doDestroy(blocking: Boolean) {
+  override protected def doDestroy(blocking: Boolean) {
     HttpBroadcast.unpersist(id, removeFromDriver = true, blocking)
   }
 
   /** Used by the JVM when serializing this object. */
-  private def writeObject(out: ObjectOutputStream) {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     assertValid()
     out.defaultWriteObject()
   }
 
   /** Used by the JVM when deserializing this object. */
-  private def readObject(in: ObjectInputStream) {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
     in.defaultReadObject()
     HttpBroadcast.synchronized {
       SparkEnv.get.blockManager.getSingle(blockId) match {
@@ -102,7 +102,7 @@ private[spark] class HttpBroadcast[T: ClassTag](
   }
 }
 
-private[spark] object HttpBroadcast extends Logging {
+private[broadcast] object HttpBroadcast extends Logging {
   private var initialized = false
   private var broadcastDir: File = null
   private var compress: Boolean = false
@@ -152,7 +152,8 @@ private[spark] object HttpBroadcast extends Logging {
 
   private def createServer(conf: SparkConf) {
     broadcastDir = Utils.createTempDir(Utils.getLocalDir(conf))
-    server = new HttpServer(broadcastDir, securityManager)
+    val broadcastPort = conf.getInt("spark.broadcast.port", 0)
+    server = new HttpServer(broadcastDir, securityManager, broadcastPort, "HTTP broadcast server")
     server.start()
     serverUri = server.uri
     logInfo("Broadcast server started at " + serverUri)
@@ -160,23 +161,28 @@ private[spark] object HttpBroadcast extends Logging {
 
   def getFile(id: Long) = new File(broadcastDir, BroadcastBlockId(id).name)
 
-  def write(id: Long, value: Any) {
+  private def write(id: Long, value: Any) {
     val file = getFile(id)
-    val out: OutputStream = {
-      if (compress) {
-        compressionCodec.compressedOutputStream(new FileOutputStream(file))
-      } else {
-        new BufferedOutputStream(new FileOutputStream(file), bufferSize)
+    val fileOutputStream = new FileOutputStream(file)
+    try {
+      val out: OutputStream = {
+        if (compress) {
+          compressionCodec.compressedOutputStream(fileOutputStream)
+        } else {
+          new BufferedOutputStream(fileOutputStream, bufferSize)
+        }
       }
+      val ser = SparkEnv.get.serializer.newInstance()
+      val serOut = ser.serializeStream(out)
+      serOut.writeObject(value)
+      serOut.close()
+      files += file
+    } finally {
+      fileOutputStream.close()
     }
-    val ser = SparkEnv.get.serializer.newInstance()
-    val serOut = ser.serializeStream(out)
-    serOut.writeObject(value)
-    serOut.close()
-    files += file
   }
 
-  def read[T: ClassTag](id: Long): T = {
+  private def read[T: ClassTag](id: Long): T = {
     logDebug("broadcast read server: " +  serverUri + " id: broadcast-" + id)
     val url = serverUri + "/" + BroadcastBlockId(id).name
 
@@ -185,10 +191,12 @@ private[spark] object HttpBroadcast extends Logging {
       logDebug("broadcast security enabled")
       val newuri = Utils.constructURIForAuthentication(new URI(url), securityManager)
       uc = newuri.toURL.openConnection()
+      uc.setConnectTimeout(httpReadTimeout)
       uc.setAllowUserInteraction(false)
     } else {
       logDebug("broadcast not using security")
       uc = new URL(url).openConnection()
+      uc.setConnectTimeout(httpReadTimeout)
     }
 
     val in = {
diff --git a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcastFactory.scala
index d5a031e2bbb59..c7ef02d572a19 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcastFactory.scala
@@ -27,21 +27,21 @@ import org.apache.spark.{SecurityManager, SparkConf}
  * [[org.apache.spark.broadcast.HttpBroadcast]] for more details about this mechanism.
  */
 class HttpBroadcastFactory extends BroadcastFactory {
-  def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager) {
+  override def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager) {
     HttpBroadcast.initialize(isDriver, conf, securityMgr)
   }
 
-  def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean, id: Long) =
+  override def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean, id: Long) =
     new HttpBroadcast[T](value_, isLocal, id)
 
-  def stop() { HttpBroadcast.stop() }
+  override def stop() { HttpBroadcast.stop() }
 
   /**
    * Remove all persisted state associated with the HTTP broadcast with the given ID.
    * @param removeFromDriver Whether to remove state from the driver
    * @param blocking Whether to block until unbroadcasted
    */
-  def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean) {
+  override def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean) {
     HttpBroadcast.unpersist(id, removeFromDriver, blocking)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 734de37ba115d..94142d33369c7 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -17,60 +17,133 @@
 
 package org.apache.spark.broadcast
 
-import java.io.{ByteArrayInputStream, ObjectInputStream, ObjectOutputStream}
+import java.io._
+import java.nio.ByteBuffer
 
+import scala.collection.JavaConversions.asJavaEnumeration
 import scala.reflect.ClassTag
-import scala.math
 import scala.util.Random
 
 import org.apache.spark.{Logging, SparkConf, SparkEnv, SparkException}
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ByteBufferInputStream, Utils}
+import org.apache.spark.util.io.ByteArrayChunkOutputStream
 
 /**
- *  A [[org.apache.spark.broadcast.Broadcast]] implementation that uses a BitTorrent-like
- *  protocol to do a distributed transfer of the broadcasted data to the executors.
- *  The mechanism is as follows. The driver divides the serializes the broadcasted data,
- *  divides it into smaller chunks, and stores them in the BlockManager of the driver.
- *  These chunks are reported to the BlockManagerMaster so that all the executors can
- *  learn the location of those chunks. The first time the broadcast variable (sent as
- *  part of task) is deserialized at a executor, all the chunks are fetched using
- *  the BlockManager. When all the chunks are fetched (initially from the driver's
- *  BlockManager), they are combined and deserialized to recreate the broadcasted data.
- *  However, the chunks are also stored in the BlockManager and reported to the
- *  BlockManagerMaster. As more executors fetch the chunks, BlockManagerMaster learns
- *  multiple locations for each chunk. Hence, subsequent fetches of each chunk will be
- *  made to other executors who already have those chunks, resulting in a distributed
- *  fetching. This prevents the driver from being the bottleneck in sending out multiple
- *  copies of the broadcast data (one per executor) as done by the
- *  [[org.apache.spark.broadcast.HttpBroadcast]].
+ * A BitTorrent-like implementation of [[org.apache.spark.broadcast.Broadcast]].
+ *
+ * The mechanism is as follows:
+ *
+ * The driver divides the serialized object into small chunks and
+ * stores those chunks in the BlockManager of the driver.
+ *
+ * On each executor, the executor first attempts to fetch the object from its BlockManager. If
+ * it does not exist, it then uses remote fetches to fetch the small chunks from the driver and/or
+ * other executors if available. Once it gets the chunks, it puts the chunks in its own
+ * BlockManager, ready for other executors to fetch from.
+ *
+ * This prevents the driver from being the bottleneck in sending out multiple copies of the
+ * broadcast data (one per executor) as done by the [[org.apache.spark.broadcast.HttpBroadcast]].
+ *
+ * When initialized, TorrentBroadcast objects read SparkEnv.get.conf.
+ *
+ * @param obj object to broadcast
+ * @param id A unique identifier for the broadcast variable.
  */
-private[spark] class TorrentBroadcast[T: ClassTag](
-    @transient var value_ : T, isLocal: Boolean, id: Long)
+private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   extends Broadcast[T](id) with Logging with Serializable {
 
-  def getValue = value_
+  /**
+   * Value of the broadcast object on executors. This is reconstructed by [[readBroadcastBlock]],
+   * which builds this value by reading blocks from the driver and/or other executors.
+   *
+   * On the driver, if the value is required, it is read lazily from the block manager.
+   */
+  @transient private lazy val _value: T = readBroadcastBlock()
+
+  /** The compression codec to use, or None if compression is disabled */
+  @transient private var compressionCodec: Option[CompressionCodec] = _
+  /** Size of each block. Default value is 4MB.  This value is only read by the broadcaster. */
+  @transient private var blockSize: Int = _
+
+  private def setConf(conf: SparkConf) {
+    compressionCodec = if (conf.getBoolean("spark.broadcast.compress", true)) {
+      Some(CompressionCodec.createCodec(conf))
+    } else {
+      None
+    }
+    blockSize = conf.getInt("spark.broadcast.blockSize", 4096) * 1024
+  }
+  setConf(SparkEnv.get.conf)
+
+  private val broadcastId = BroadcastBlockId(id)
+
+  /** Total number of blocks this broadcast variable contains. */
+  private val numBlocks: Int = writeBlocks(obj)
 
-  val broadcastId = BroadcastBlockId(id)
+  override protected def getValue() = {
+    _value
+  }
 
-  TorrentBroadcast.synchronized {
-    SparkEnv.get.blockManager.putSingle(
-      broadcastId, value_, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
+  /**
+   * Divide the object into multiple blocks and put those blocks in the block manager.
+   * @param value the object to divide
+   * @return number of blocks this broadcast variable is divided into
+   */
+  private def writeBlocks(value: T): Int = {
+    // Store a copy of the broadcast variable in the driver so that tasks run on the driver
+    // do not create a duplicate copy of the broadcast variable's value.
+    SparkEnv.get.blockManager.putSingle(broadcastId, value, StorageLevel.MEMORY_AND_DISK,
+      tellMaster = false)
+    val blocks =
+      TorrentBroadcast.blockifyObject(value, blockSize, SparkEnv.get.serializer, compressionCodec)
+    blocks.zipWithIndex.foreach { case (block, i) =>
+      SparkEnv.get.blockManager.putBytes(
+        BroadcastBlockId(id, "piece" + i),
+        block,
+        StorageLevel.MEMORY_AND_DISK_SER,
+        tellMaster = true)
+    }
+    blocks.length
   }
 
-  @transient var arrayOfBlocks: Array[TorrentBlock] = null
-  @transient var totalBlocks = -1
-  @transient var totalBytes = -1
-  @transient var hasBlocks = 0
+  /** Fetch torrent blocks from the driver and/or other executors. */
+  private def readBlocks(): Array[ByteBuffer] = {
+    // Fetch chunks of data. Note that all these chunks are stored in the BlockManager and reported
+    // to the driver, so other executors can pull these chunks from this executor as well.
+    val blocks = new Array[ByteBuffer](numBlocks)
+    val bm = SparkEnv.get.blockManager
 
-  if (!isLocal) {
-    sendBroadcast()
+    for (pid <- Random.shuffle(Seq.range(0, numBlocks))) {
+      val pieceId = BroadcastBlockId(id, "piece" + pid)
+      logDebug(s"Reading piece $pieceId of $broadcastId")
+      // First try getLocalBytes because there is a chance that previous attempts to fetch the
+      // broadcast blocks have already fetched some of the blocks. In that case, some blocks
+      // would be available locally (on this executor).
+      def getLocal: Option[ByteBuffer] = bm.getLocalBytes(pieceId)
+      def getRemote: Option[ByteBuffer] = bm.getRemoteBytes(pieceId).map { block =>
+        // If we found the block from remote executors/driver's BlockManager, put the block
+        // in this executor's BlockManager.
+        SparkEnv.get.blockManager.putBytes(
+          pieceId,
+          block,
+          StorageLevel.MEMORY_AND_DISK_SER,
+          tellMaster = true)
+        block
+      }
+      val block: ByteBuffer = getLocal.orElse(getRemote).getOrElse(
+        throw new SparkException(s"Failed to get $pieceId of $broadcastId"))
+      blocks(pid) = block
+    }
+    blocks
   }
 
   /**
    * Remove all persisted state associated with this Torrent broadcast on the executors.
    */
-  def doUnpersist(blocking: Boolean) {
+  override protected def doUnpersist(blocking: Boolean) {
     TorrentBroadcast.unpersist(id, removeFromDriver = false, blocking)
   }
 
@@ -78,215 +151,79 @@ private[spark] class TorrentBroadcast[T: ClassTag](
    * Remove all persisted state associated with this Torrent broadcast on the executors
    * and driver.
    */
-  def doDestroy(blocking: Boolean) {
+  override protected def doDestroy(blocking: Boolean) {
     TorrentBroadcast.unpersist(id, removeFromDriver = true, blocking)
   }
 
-  def sendBroadcast() {
-    val tInfo = TorrentBroadcast.blockifyObject(value_)
-    totalBlocks = tInfo.totalBlocks
-    totalBytes = tInfo.totalBytes
-    hasBlocks = tInfo.totalBlocks
-
-    // Store meta-info
-    val metaId = BroadcastBlockId(id, "meta")
-    val metaInfo = TorrentInfo(null, totalBlocks, totalBytes)
-    TorrentBroadcast.synchronized {
-      SparkEnv.get.blockManager.putSingle(
-        metaId, metaInfo, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
-    }
-
-    // Store individual pieces
-    for (i <- 0 until totalBlocks) {
-      val pieceId = BroadcastBlockId(id, "piece" + i)
-      TorrentBroadcast.synchronized {
-        SparkEnv.get.blockManager.putSingle(
-          pieceId, tInfo.arrayOfBlocks(i), StorageLevel.MEMORY_AND_DISK, tellMaster = true)
-      }
-    }
-  }
-
   /** Used by the JVM when serializing this object. */
-  private def writeObject(out: ObjectOutputStream) {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     assertValid()
     out.defaultWriteObject()
   }
 
-  /** Used by the JVM when deserializing this object. */
-  private def readObject(in: ObjectInputStream) {
-    in.defaultReadObject()
+  private def readBroadcastBlock(): T = Utils.tryOrIOException {
     TorrentBroadcast.synchronized {
-      SparkEnv.get.blockManager.getSingle(broadcastId) match {
+      setConf(SparkEnv.get.conf)
+      SparkEnv.get.blockManager.getLocal(broadcastId).map(_.data.next()) match {
         case Some(x) =>
-          value_ = x.asInstanceOf[T]
+          x.asInstanceOf[T]
 
         case None =>
-          val start = System.nanoTime
           logInfo("Started reading broadcast variable " + id)
-
-          // Initialize @transient variables that will receive garbage values from the master.
-          resetWorkerVariables()
-
-          if (receiveBroadcast()) {
-            value_ = TorrentBroadcast.unBlockifyObject[T](arrayOfBlocks, totalBytes, totalBlocks)
-
-            /* Store the merged copy in cache so that the next worker doesn't need to rebuild it.
-             * This creates a trade-off between memory usage and latency. Storing copy doubles
-             * the memory footprint; not storing doubles deserialization cost. Also,
-             * this does not need to be reported to BlockManagerMaster since other executors
-             * does not need to access this block (they only need to fetch the chunks,
-             * which are reported).
-             */
-            SparkEnv.get.blockManager.putSingle(
-              broadcastId, value_, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
-
-            // Remove arrayOfBlocks from memory once value_ is on local cache
-            resetWorkerVariables()
-          } else {
-            logError("Reading broadcast variable " + id + " failed")
-          }
-
-          val time = (System.nanoTime - start) / 1e9
-          logInfo("Reading broadcast variable " + id + " took " + time + " s")
+          val startTimeMs = System.currentTimeMillis()
+          val blocks = readBlocks()
+          logInfo("Reading broadcast variable " + id + " took" + Utils.getUsedTimeMs(startTimeMs))
+
+          val obj = TorrentBroadcast.unBlockifyObject[T](
+            blocks, SparkEnv.get.serializer, compressionCodec)
+          // Store the merged copy in BlockManager so other tasks on this executor don't
+          // need to re-fetch it.
+          SparkEnv.get.blockManager.putSingle(
+            broadcastId, obj, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
+          obj
       }
     }
   }
 
-  private def resetWorkerVariables() {
-    arrayOfBlocks = null
-    totalBytes = -1
-    totalBlocks = -1
-    hasBlocks = 0
-  }
-
-  def receiveBroadcast(): Boolean = {
-    // Receive meta-info about the size of broadcast data,
-    // the number of chunks it is divided into, etc.
-    val metaId = BroadcastBlockId(id, "meta")
-    var attemptId = 10
-    while (attemptId > 0 && totalBlocks == -1) {
-      TorrentBroadcast.synchronized {
-        SparkEnv.get.blockManager.getSingle(metaId) match {
-          case Some(x) =>
-            val tInfo = x.asInstanceOf[TorrentInfo]
-            totalBlocks = tInfo.totalBlocks
-            totalBytes = tInfo.totalBytes
-            arrayOfBlocks = new Array[TorrentBlock](totalBlocks)
-            hasBlocks = 0
-
-          case None =>
-            Thread.sleep(500)
-        }
-      }
-      attemptId -= 1
-    }
-    if (totalBlocks == -1) {
-      return false
-    }
-
-    /*
-     * Fetch actual chunks of data. Note that all these chunks are stored in
-     * the BlockManager and reported to the master, so that other executors
-     * can find out and pull the chunks from this executor.
-     */
-    val recvOrder = new Random().shuffle(Array.iterate(0, totalBlocks)(_ + 1).toList)
-    for (pid <- recvOrder) {
-      val pieceId = BroadcastBlockId(id, "piece" + pid)
-      TorrentBroadcast.synchronized {
-        SparkEnv.get.blockManager.getSingle(pieceId) match {
-          case Some(x) =>
-            arrayOfBlocks(pid) = x.asInstanceOf[TorrentBlock]
-            hasBlocks += 1
-            SparkEnv.get.blockManager.putSingle(
-              pieceId, arrayOfBlocks(pid), StorageLevel.MEMORY_AND_DISK, tellMaster = true)
-
-          case None =>
-            throw new SparkException("Failed to get " + pieceId + " of " + broadcastId)
-        }
-      }
-    }
-
-    hasBlocks == totalBlocks
-  }
-
 }
 
-private[spark] object TorrentBroadcast extends Logging {
-  private lazy val BLOCK_SIZE = conf.getInt("spark.broadcast.blockSize", 4096) * 1024
-  private var initialized = false
-  private var conf: SparkConf = null
 
-  def initialize(_isDriver: Boolean, conf: SparkConf) {
-    TorrentBroadcast.conf = conf // TODO: we might have to fix it in tests
-    synchronized {
-      if (!initialized) {
-        initialized = true
-      }
-    }
-  }
+private object TorrentBroadcast extends Logging {
 
-  def stop() {
-    initialized = false
+  def blockifyObject[T: ClassTag](
+      obj: T,
+      blockSize: Int,
+      serializer: Serializer,
+      compressionCodec: Option[CompressionCodec]): Array[ByteBuffer] = {
+    val bos = new ByteArrayChunkOutputStream(blockSize)
+    val out: OutputStream = compressionCodec.map(c => c.compressedOutputStream(bos)).getOrElse(bos)
+    val ser = serializer.newInstance()
+    val serOut = ser.serializeStream(out)
+    serOut.writeObject[T](obj).close()
+    bos.toArrays.map(ByteBuffer.wrap)
   }
 
-  def blockifyObject[T](obj: T): TorrentInfo = {
-    val byteArray = Utils.serialize[T](obj)
-    val bais = new ByteArrayInputStream(byteArray)
-
-    var blockNum = byteArray.length / BLOCK_SIZE
-    if (byteArray.length % BLOCK_SIZE != 0) {
-      blockNum += 1
-    }
-
-    val blocks = new Array[TorrentBlock](blockNum)
-    var blockId = 0
-
-    for (i <- 0 until (byteArray.length, BLOCK_SIZE)) {
-      val thisBlockSize = math.min(BLOCK_SIZE, byteArray.length - i)
-      val tempByteArray = new Array[Byte](thisBlockSize)
-      bais.read(tempByteArray, 0, thisBlockSize)
-
-      blocks(blockId) = new TorrentBlock(blockId, tempByteArray)
-      blockId += 1
-    }
-    bais.close()
-
-    val info = TorrentInfo(blocks, blockNum, byteArray.length)
-    info.hasBlocks = blockNum
-    info
-  }
-
-  def unBlockifyObject[T](
-      arrayOfBlocks: Array[TorrentBlock],
-      totalBytes: Int,
-      totalBlocks: Int): T = {
-    val retByteArray = new Array[Byte](totalBytes)
-    for (i <- 0 until totalBlocks) {
-      System.arraycopy(arrayOfBlocks(i).byteArray, 0, retByteArray,
-        i * BLOCK_SIZE, arrayOfBlocks(i).byteArray.length)
-    }
-    Utils.deserialize[T](retByteArray, Thread.currentThread.getContextClassLoader)
+  def unBlockifyObject[T: ClassTag](
+      blocks: Array[ByteBuffer],
+      serializer: Serializer,
+      compressionCodec: Option[CompressionCodec]): T = {
+    require(blocks.nonEmpty, "Cannot unblockify an empty array of blocks")
+    val is = new SequenceInputStream(
+      asJavaEnumeration(blocks.iterator.map(block => new ByteBufferInputStream(block))))
+    val in: InputStream = compressionCodec.map(c => c.compressedInputStream(is)).getOrElse(is)
+    val ser = serializer.newInstance()
+    val serIn = ser.deserializeStream(in)
+    val obj = serIn.readObject[T]()
+    serIn.close()
+    obj
   }
 
   /**
    * Remove all persisted blocks associated with this torrent broadcast on the executors.
    * If removeFromDriver is true, also remove these persisted blocks on the driver.
    */
-  def unpersist(id: Long, removeFromDriver: Boolean, blocking: Boolean) = synchronized {
+  def unpersist(id: Long, removeFromDriver: Boolean, blocking: Boolean) = {
+    logDebug(s"Unpersisting TorrentBroadcast $id")
     SparkEnv.get.blockManager.master.removeBroadcast(id, removeFromDriver, blocking)
   }
 }
-
-private[spark] case class TorrentBlock(
-    blockID: Int,
-    byteArray: Array[Byte])
-  extends Serializable
-
-private[spark] case class TorrentInfo(
-    @transient arrayOfBlocks: Array[TorrentBlock],
-    totalBlocks: Int,
-    totalBytes: Int)
-  extends Serializable {
-
-  @transient var hasBlocks = 0
-}
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
index 1de8396a0e17f..fb024c12094f2 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
@@ -28,21 +28,20 @@ import org.apache.spark.{SecurityManager, SparkConf}
  */
 class TorrentBroadcastFactory extends BroadcastFactory {
 
-  def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager) {
-    TorrentBroadcast.initialize(isDriver, conf)
-  }
+  override def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager) { }
 
-  def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean, id: Long) =
-    new TorrentBroadcast[T](value_, isLocal, id)
+  override def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean, id: Long) = {
+    new TorrentBroadcast[T](value_, id)
+  }
 
-  def stop() { TorrentBroadcast.stop() }
+  override def stop() { }
 
   /**
    * Remove all persisted state associated with the torrent broadcast with the given ID.
    * @param removeFromDriver Whether to remove state from the driver.
    * @param blocking Whether to block until unbroadcasted
    */
-  def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean) {
+  override def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean) {
     TorrentBroadcast.unpersist(id, removeFromDriver, blocking)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
index 86305d2ea8a09..65a1a8fd7e929 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
@@ -22,7 +22,6 @@ private[spark] class ApplicationDescription(
     val maxCores: Option[Int],
     val memoryPerSlave: Int,
     val command: Command,
-    val sparkHome: Option[String],
     var appUiUrl: String,
     val eventLogDir: Option[String] = None)
   extends Serializable {
diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index c371dc3a51c73..f2687ce6b42b4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.deploy
 
-import scala.collection.JavaConversions._
-import scala.collection.mutable.Map
 import scala.concurrent._
 
 import akka.actor._
@@ -29,12 +27,14 @@ import org.apache.log4j.{Level, Logger}
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.{DriverState, Master}
-import org.apache.spark.util.{AkkaUtils, Utils}
+import org.apache.spark.util.{ActorLogReceive, AkkaUtils, Utils}
 
 /**
  * Proxy that relays messages to the driver.
  */
-private class ClientActor(driverArgs: ClientArguments, conf: SparkConf) extends Actor with Logging {
+private class ClientActor(driverArgs: ClientArguments, conf: SparkConf)
+  extends Actor with ActorLogReceive with Logging {
+
   var masterActor: ActorSelection = _
   val timeout = AkkaUtils.askTimeout(conf)
 
@@ -50,9 +50,6 @@ private class ClientActor(driverArgs: ClientArguments, conf: SparkConf) extends
         // TODO: We could add an env variable here and intercept it in `sc.addJar` that would
         //       truncate filesystem paths similar to what YARN does. For now, we just require
         //       people call `addJar` assuming the jar is in the same directory.
-        val env = Map[String, String]()
-        System.getenv().foreach{case (k, v) => env(k) = v}
-
         val mainClass = "org.apache.spark.deploy.worker.DriverWrapper"
 
         val classPathConf = "spark.driver.extraClassPath"
@@ -65,10 +62,13 @@ private class ClientActor(driverArgs: ClientArguments, conf: SparkConf) extends
           cp.split(java.io.File.pathSeparator)
         }
 
-        val javaOptionsConf = "spark.driver.extraJavaOptions"
-        val javaOpts = sys.props.get(javaOptionsConf)
+        val extraJavaOptsConf = "spark.driver.extraJavaOptions"
+        val extraJavaOpts = sys.props.get(extraJavaOptsConf)
+          .map(Utils.splitCommandString).getOrElse(Seq.empty)
+        val sparkJavaOpts = Utils.sparkJavaOpts(conf)
+        val javaOpts = sparkJavaOpts ++ extraJavaOpts
         val command = new Command(mainClass, Seq("{{WORKER_URL}}", driverArgs.mainClass) ++
-          driverArgs.driverOptions, env, classPathEntries, libraryPathEntries, javaOpts)
+          driverArgs.driverOptions, sys.env, classPathEntries, libraryPathEntries, javaOpts)
 
         val driverDescription = new DriverDescription(
           driverArgs.jarUrl,
@@ -109,13 +109,14 @@ private class ClientActor(driverArgs: ClientArguments, conf: SparkConf) extends
         // Exception, if present
         statusResponse.exception.map { e =>
           println(s"Exception from cluster was: $e")
+          e.printStackTrace()
           System.exit(-1)
         }
         System.exit(0)
     }
   }
 
-  override def receive = {
+  override def receiveWithLogging = {
 
     case SubmitDriverResponse(success, driverId, message) =>
       println(message)
@@ -129,7 +130,7 @@ private class ClientActor(driverArgs: ClientArguments, conf: SparkConf) extends
       println(s"Error connecting to master ${driverArgs.master} ($remoteAddress), exiting.")
       System.exit(-1)
 
-    case AssociationErrorEvent(cause, _, remoteAddress, _) =>
+    case AssociationErrorEvent(cause, _, remoteAddress, _, _) =>
       println(s"Error connecting to master ${driverArgs.master} ($remoteAddress), exiting.")
       println(s"Cause was: $cause")
       System.exit(-1)
@@ -141,8 +142,10 @@ private class ClientActor(driverArgs: ClientArguments, conf: SparkConf) extends
  */
 object Client {
   def main(args: Array[String]) {
-    println("WARNING: This client is deprecated and will be removed in a future version of Spark.")
-    println("Use ./bin/spark-submit with \"--master spark://host:port\"")
+    if (!sys.props.contains("SPARK_SUBMIT")) {
+      println("WARNING: This client is deprecated and will be removed in a future version of Spark")
+      println("Use ./bin/spark-submit with \"--master spark://host:port\"")
+    }
 
     val conf = new SparkConf()
     val driverArgs = new ClientArguments(args)
@@ -154,8 +157,6 @@ object Client {
     conf.set("akka.loglevel", driverArgs.logLevel.toString.replace("WARN", "WARNING"))
     Logger.getRootLogger.setLevel(driverArgs.logLevel)
 
-    // TODO: See if we can initialize akka so return messages are sent back using the same TCP
-    //       flow. Else, this (sadly) requires the DriverClient be routable from the Master.
     val (actorSystem, _) = AkkaUtils.createActorSystem(
       "driverClient", Utils.localHostName(), 0, conf, new SecurityManager(conf))
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
index 39150deab863c..4e802e02c4149 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.deploy
 
+import java.net.{URI, URISyntaxException}
+
 import scala.collection.mutable.ListBuffer
 
 import org.apache.log4j.Level
@@ -114,5 +116,12 @@ private[spark] class ClientArguments(args: Array[String]) {
 }
 
 object ClientArguments {
-  def isValidJarUrl(s: String): Boolean = s.matches("(.+):(.+)jar")
+  def isValidJarUrl(s: String): Boolean = {
+    try {
+      val uri = new URI(s)
+      uri.getScheme != null && uri.getAuthority != null && s.endsWith("jar")
+    } catch {
+      case _: URISyntaxException => false
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/Command.scala b/core/src/main/scala/org/apache/spark/deploy/Command.scala
index 32f3ba385084f..a2b263544c6a2 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Command.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Command.scala
@@ -25,5 +25,5 @@ private[spark] case class Command(
     environment: Map[String, String],
     classPathEntries: Seq[String],
     libraryPathEntries: Seq[String],
-    extraJavaOptions: Option[String] = None) {
+    javaOpts: Seq[String]) {
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
index a7368f9f3dfbe..b9dd8557ee904 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -71,6 +71,8 @@ private[deploy] object DeployMessages {
 
   case class RegisterWorkerFailed(message: String) extends DeployMessage
 
+  case class ReconnectWorker(masterUrl: String) extends DeployMessage
+
   case class KillExecutor(masterUrl: String, appId: String, execId: Int) extends DeployMessage
 
   case class LaunchExecutor(
diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
index c4f5e294a393e..696f32a6f5730 100644
--- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
@@ -56,7 +56,6 @@ private[spark] object JsonProtocol {
     ("cores" -> obj.maxCores) ~
     ("memoryperslave" -> obj.memoryPerSlave) ~
     ("user" -> obj.user) ~
-    ("sparkhome" -> obj.sparkHome) ~
     ("command" -> obj.command.toString)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index 0d6751f3fa6d2..039c8719e2867 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -22,8 +22,8 @@ import java.net.URI
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.JavaConversions._
 
-import org.apache.spark.api.python.{PythonUtils, RedirectThread}
-import org.apache.spark.util.Utils
+import org.apache.spark.api.python.PythonUtils
+import org.apache.spark.util.{RedirectThread, Utils}
 
 /**
  * A main class used by spark-submit to launch Python applications. It executes python as a
@@ -34,7 +34,8 @@ object PythonRunner {
     val pythonFile = args(0)
     val pyFiles = args(1)
     val otherArgs = args.slice(2, args.length)
-    val pythonExec = sys.env.get("PYSPARK_PYTHON").getOrElse("python") // TODO: get this from conf
+    val pythonExec =
+      sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", sys.env.getOrElse("PYSPARK_PYTHON", "python"))
 
     // Format python file paths before adding them to the PYTHONPATH
     val formattedPythonFile = formatPath(pythonFile)
@@ -54,9 +55,11 @@ object PythonRunner {
     val pythonPath = PythonUtils.mergePythonPaths(pathElements: _*)
 
     // Launch Python process
-    val builder = new ProcessBuilder(Seq(pythonExec, "-u", formattedPythonFile) ++ otherArgs)
+    val builder = new ProcessBuilder(Seq(pythonExec, formattedPythonFile) ++ otherArgs)
     val env = builder.environment()
     env.put("PYTHONPATH", pythonPath)
+    // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
+    env.put("PYTHONUNBUFFERED", "YES") // value is needed to be set to a non-empty string
     env.put("PYSPARK_GATEWAY_PORT", "" + gatewayServer.getListeningPort)
     builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize
     val process = builder.start()
@@ -84,8 +87,8 @@ object PythonRunner {
     // Strip the URI scheme from the path
     formattedPath =
       new URI(formattedPath).getScheme match {
-        case Utils.windowsDrive(d) if windows => formattedPath
         case null => formattedPath
+        case Utils.windowsDrive(d) if windows => formattedPath
         case _ => new URI(formattedPath).getPath
       }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 148115d3ed351..60ee115e393ce 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -17,22 +17,29 @@
 
 package org.apache.spark.deploy
 
+import java.lang.reflect.Method
 import java.security.PrivilegedExceptionAction
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.fs.FileSystem.Statistics
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.security.Credentials
 import org.apache.hadoop.security.UserGroupInformation
 
-import org.apache.spark.{Logging, SparkContext, SparkException}
+import org.apache.spark.{Logging, SparkContext, SparkConf, SparkException}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.util.Utils
 
 import scala.collection.JavaConversions._
 
 /**
+ * :: DeveloperApi ::
  * Contains util methods to interact with Hadoop from Spark.
  */
+@DeveloperApi
 class SparkHadoopUtil extends Logging {
-  val conf: Configuration = newConfiguration()
+  val conf: Configuration = newConfiguration(new SparkConf())
   UserGroupInformation.setConfiguration(conf)
 
   /**
@@ -64,11 +71,39 @@ class SparkHadoopUtil extends Logging {
     }
   }
 
+  @Deprecated
+  def newConfiguration(): Configuration = newConfiguration(null)
+
   /**
    * Return an appropriate (subclass) of Configuration. Creating config can initializes some Hadoop
    * subsystems.
    */
-  def newConfiguration(): Configuration = new Configuration()
+  def newConfiguration(conf: SparkConf): Configuration = {
+    val hadoopConf = new Configuration()
+
+    // Note: this null check is around more than just access to the "conf" object to maintain
+    // the behavior of the old implementation of this code, for backwards compatibility.
+    if (conf != null) {
+      // Explicitly check for S3 environment variables
+      if (System.getenv("AWS_ACCESS_KEY_ID") != null &&
+          System.getenv("AWS_SECRET_ACCESS_KEY") != null) {
+        hadoopConf.set("fs.s3.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
+        hadoopConf.set("fs.s3n.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
+        hadoopConf.set("fs.s3.awsSecretAccessKey", System.getenv("AWS_SECRET_ACCESS_KEY"))
+        hadoopConf.set("fs.s3n.awsSecretAccessKey", System.getenv("AWS_SECRET_ACCESS_KEY"))
+      }
+      // Copy any "spark.hadoop.foo=bar" system properties into conf as "foo=bar"
+      conf.getAll.foreach { case (key, value) =>
+        if (key.startsWith("spark.hadoop.")) {
+          hadoopConf.set(key.substring("spark.hadoop.".length), value)
+        }
+      }
+      val bufferSize = conf.get("spark.buffer.size", "65536")
+      hadoopConf.set("io.file.buffer.size", bufferSize)
+    }
+
+    hadoopConf
+  }
 
   /**
    * Add any user credentials to the job conf which are necessary for running on a secure Hadoop
@@ -86,10 +121,68 @@ class SparkHadoopUtil extends Logging {
 
   def getSecretKeyFromUserCredentials(key: String): Array[Byte] = { null }
 
-  def loginUserFromKeytab(principalName: String, keytabFilename: String) { 
+  def loginUserFromKeytab(principalName: String, keytabFilename: String) {
     UserGroupInformation.loginUserFromKeytab(principalName, keytabFilename)
   }
 
+  /**
+   * Returns a function that can be called to find Hadoop FileSystem bytes read. If
+   * getFSBytesReadOnThreadCallback is called from thread r at time t, the returned callback will
+   * return the bytes read on r since t.  Reflection is required because thread-level FileSystem
+   * statistics are only available as of Hadoop 2.5 (see HADOOP-10688).
+   * Returns None if the required method can't be found.
+   */
+  private[spark] def getFSBytesReadOnThreadCallback(path: Path, conf: Configuration)
+    : Option[() => Long] = {
+    try {
+      val threadStats = getFileSystemThreadStatistics(path, conf)
+      val getBytesReadMethod = getFileSystemThreadStatisticsMethod("getBytesRead")
+      val f = () => threadStats.map(getBytesReadMethod.invoke(_).asInstanceOf[Long]).sum
+      val baselineBytesRead = f()
+      Some(() => f() - baselineBytesRead)
+    } catch {
+      case e: NoSuchMethodException => {
+        logDebug("Couldn't find method for retrieving thread-level FileSystem input data", e)
+        None
+      }
+    }
+  }
+
+  /**
+   * Returns a function that can be called to find Hadoop FileSystem bytes written. If
+   * getFSBytesWrittenOnThreadCallback is called from thread r at time t, the returned callback will
+   * return the bytes written on r since t.  Reflection is required because thread-level FileSystem
+   * statistics are only available as of Hadoop 2.5 (see HADOOP-10688).
+   * Returns None if the required method can't be found.
+   */
+  private[spark] def getFSBytesWrittenOnThreadCallback(path: Path, conf: Configuration)
+    : Option[() => Long] = {
+    try {
+      val threadStats = getFileSystemThreadStatistics(path, conf)
+      val getBytesWrittenMethod = getFileSystemThreadStatisticsMethod("getBytesWritten")
+      val f = () => threadStats.map(getBytesWrittenMethod.invoke(_).asInstanceOf[Long]).sum
+      val baselineBytesWritten = f()
+      Some(() => f() - baselineBytesWritten)
+    } catch {
+      case e: NoSuchMethodException => {
+        logDebug("Couldn't find method for retrieving thread-level FileSystem output data", e)
+        None
+      }
+    }
+  }
+
+  private def getFileSystemThreadStatistics(path: Path, conf: Configuration): Seq[AnyRef] = {
+    val qualifiedPath = path.getFileSystem(conf).makeQualified(path)
+    val scheme = qualifiedPath.toUri().getScheme()
+    val stats = FileSystem.getAllStatistics().filter(_.getScheme().equals(scheme))
+    stats.map(Utils.invoke(classOf[Statistics], _, "getThreadStatistics"))
+  }
+
+  private def getFileSystemThreadStatisticsMethod(methodName: String): Method = {
+    val statisticsDataClass =
+      Class.forName("org.apache.hadoop.fs.FileSystem$Statistics$StatisticsData")
+    statisticsDataClass.getDeclaredMethod(methodName)
+  }
 }
 
 object SparkHadoopUtil {
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index b050dccb6d57f..8a62519bd2315 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy
 
 import java.io.{File, PrintStream}
-import java.lang.reflect.InvocationTargetException
+import java.lang.reflect.{Modifier, InvocationTargetException}
 import java.net.URL
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
@@ -27,116 +27,126 @@ import org.apache.spark.executor.ExecutorURLClassLoader
 import org.apache.spark.util.Utils
 
 /**
- * Scala code behind the spark-submit script.  The script handles setting up the classpath with
- * relevant Spark dependencies and provides a layer over the different cluster managers and deploy
- * modes that Spark supports.
+ * Main gateway of launching a Spark application.
+ *
+ * This program handles setting up the classpath with relevant Spark dependencies and provides
+ * a layer over the different cluster managers and deploy modes that Spark supports.
  */
 object SparkSubmit {
+
+  // Cluster managers
   private val YARN = 1
   private val STANDALONE = 2
   private val MESOS = 4
   private val LOCAL = 8
   private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | LOCAL
 
-  private var clusterManager: Int = LOCAL
+  // Deploy modes
+  private val CLIENT = 1
+  private val CLUSTER = 2
+  private val ALL_DEPLOY_MODES = CLIENT | CLUSTER
 
-  /**
-   * Special primary resource names that represent shells rather than application jars.
-   */
+  // A special jar name that indicates the class being run is inside of Spark itself, and therefore
+  // no user jar is needed.
+  private val SPARK_INTERNAL = "spark-internal"
+
+  // Special primary resource names that represent shells rather than application jars.
   private val SPARK_SHELL = "spark-shell"
   private val PYSPARK_SHELL = "pyspark-shell"
 
-  def main(args: Array[String]) {
-    val appArgs = new SparkSubmitArguments(args)
-    if (appArgs.verbose) {
-      printStream.println(appArgs)
-    }
-    val (childArgs, classpath, sysProps, mainClass) = createLaunchEnv(appArgs)
-    launch(childArgs, classpath, sysProps, mainClass, appArgs.verbose)
-  }
+  private val CLASS_NOT_FOUND_EXIT_STATUS = 101
 
   // Exposed for testing
-  private[spark] var printStream: PrintStream = System.err
   private[spark] var exitFn: () => Unit = () => System.exit(-1)
-
+  private[spark] var printStream: PrintStream = System.err
+  private[spark] def printWarning(str: String) = printStream.println("Warning: " + str)
   private[spark] def printErrorAndExit(str: String) = {
     printStream.println("Error: " + str)
     printStream.println("Run with --help for usage help or --verbose for debug output")
     exitFn()
   }
-  private[spark] def printWarning(str: String) = printStream.println("Warning: " + str)
+
+  def main(args: Array[String]) {
+    val appArgs = new SparkSubmitArguments(args)
+    if (appArgs.verbose) {
+      printStream.println(appArgs)
+    }
+    val (childArgs, classpath, sysProps, mainClass) = createLaunchEnv(appArgs)
+    launch(childArgs, classpath, sysProps, mainClass, appArgs.verbose)
+  }
 
   /**
-   * @return a tuple containing the arguments for the child, a list of classpath
-   *         entries for the child, a list of system properties, a list of env vars
-   *         and the main class for the child
+   * @return a tuple containing
+   *           (1) the arguments for the child process,
+   *           (2) a list of classpath entries for the child,
+   *           (3) a list of system properties and env vars, and
+   *           (4) the main class for the child
    */
   private[spark] def createLaunchEnv(args: SparkSubmitArguments)
       : (ArrayBuffer[String], ArrayBuffer[String], Map[String, String], String) = {
-    if (args.master.startsWith("local")) {
-      clusterManager = LOCAL
-    } else if (args.master.startsWith("yarn")) {
-      clusterManager = YARN
-    } else if (args.master.startsWith("spark")) {
-      clusterManager = STANDALONE
-    } else if (args.master.startsWith("mesos")) {
-      clusterManager = MESOS
-    } else {
-      printErrorAndExit("Master must start with yarn, mesos, spark, or local")
-    }
-
-    // Because "yarn-cluster" and "yarn-client" encapsulate both the master
-    // and deploy mode, we have some logic to infer the master and deploy mode
-    // from each other if only one is specified, or exit early if they are at odds.
-    if (args.deployMode == null &&
-        (args.master == "yarn-standalone" || args.master == "yarn-cluster")) {
-      args.deployMode = "cluster"
-    }
-    if (args.deployMode == "cluster" && args.master == "yarn-client") {
-      printErrorAndExit("Deploy mode \"cluster\" and master \"yarn-client\" are not compatible")
-    }
-    if (args.deployMode == "client" &&
-        (args.master == "yarn-standalone" || args.master == "yarn-cluster")) {
-      printErrorAndExit("Deploy mode \"client\" and master \"" + args.master
-        + "\" are not compatible")
-    }
-    if (args.deployMode == "cluster" && args.master.startsWith("yarn")) {
-      args.master = "yarn-cluster"
-    }
-    if (args.deployMode != "cluster" && args.master.startsWith("yarn")) {
-      args.master = "yarn-client"
-    }
 
-    val deployOnCluster = Option(args.deployMode).getOrElse("client") == "cluster"
-
-    val childClasspath = new ArrayBuffer[String]()
+    // Values to return
     val childArgs = new ArrayBuffer[String]()
+    val childClasspath = new ArrayBuffer[String]()
     val sysProps = new HashMap[String, String]()
     var childMainClass = ""
 
-    val isPython = args.isPython
-    val isYarnCluster = clusterManager == YARN && deployOnCluster
+    // Set the cluster manager
+    val clusterManager: Int = args.master match {
+      case m if m.startsWith("yarn") => YARN
+      case m if m.startsWith("spark") => STANDALONE
+      case m if m.startsWith("mesos") => MESOS
+      case m if m.startsWith("local") => LOCAL
+      case _ => printErrorAndExit("Master must start with yarn, spark, mesos, or local"); -1
+    }
 
-    // For mesos, only client mode is supported
-    if (clusterManager == MESOS && deployOnCluster) {
-      printErrorAndExit("Cluster deploy mode is currently not supported for Mesos clusters.")
+    // Set the deploy mode; default is client mode
+    var deployMode: Int = args.deployMode match {
+      case "client" | null => CLIENT
+      case "cluster" => CLUSTER
+      case _ => printErrorAndExit("Deploy mode must be either client or cluster"); -1
     }
 
-    // For standalone, only client mode is supported
-    if (clusterManager == STANDALONE && deployOnCluster) {
-      printErrorAndExit("Cluster deploy mode is currently not supported for standalone clusters.")
+    // Because "yarn-cluster" and "yarn-client" encapsulate both the master
+    // and deploy mode, we have some logic to infer the master and deploy mode
+    // from each other if only one is specified, or exit early if they are at odds.
+    if (clusterManager == YARN) {
+      if (args.master == "yarn-standalone") {
+        printWarning("\"yarn-standalone\" is deprecated. Use \"yarn-cluster\" instead.")
+        args.master = "yarn-cluster"
+      }
+      (args.master, args.deployMode) match {
+        case ("yarn-cluster", null) =>
+          deployMode = CLUSTER
+        case ("yarn-cluster", "client") =>
+          printErrorAndExit("Client deploy mode is not compatible with master \"yarn-cluster\"")
+        case ("yarn-client", "cluster") =>
+          printErrorAndExit("Cluster deploy mode is not compatible with master \"yarn-client\"")
+        case (_, mode) =>
+          args.master = "yarn-" + Option(mode).getOrElse("client")
+      }
+
+      // Make sure YARN is included in our build if we're trying to use it
+      if (!Utils.classIsLoadable("org.apache.spark.deploy.yarn.Client") && !Utils.isTesting) {
+        printErrorAndExit(
+          "Could not load YARN classes. " +
+          "This copy of Spark may not have been compiled with YARN support.")
+      }
     }
 
-    // For shells, only client mode is applicable
-    if (isShell(args.primaryResource) && deployOnCluster) {
-      printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.")
+    // The following modes are not supported or applicable
+    (clusterManager, deployMode) match {
+      case (MESOS, CLUSTER) =>
+        printErrorAndExit("Cluster deploy mode is currently not supported for Mesos clusters.")
+      case (_, CLUSTER) if args.isPython =>
+        printErrorAndExit("Cluster deploy mode is currently not supported for python applications.")
+      case (_, CLUSTER) if isShell(args.primaryResource) =>
+        printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.")
+      case _ =>
     }
 
     // If we're running a python app, set the main class to our specific python runner
-    if (isPython) {
-      if (deployOnCluster) {
-        printErrorAndExit("Cluster deploy mode is currently not supported for python.")
-      }
+    if (args.isPython) {
       if (args.primaryResource == PYSPARK_SHELL) {
         args.mainClass = "py4j.GatewayServer"
         args.childArgs = ArrayBuffer("--die-on-broken-pipe", "0")
@@ -148,27 +158,8 @@ object SparkSubmit {
         args.files = mergeFileLists(args.files, args.primaryResource)
       }
       args.files = mergeFileLists(args.files, args.pyFiles)
-      // Format python file paths properly before adding them to the PYTHONPATH
-      sysProps("spark.submit.pyFiles") = PythonRunner.formatPaths(args.pyFiles).mkString(",")
-    }
-
-    // If we're deploying into YARN, use yarn.Client as a wrapper around the user class
-    if (!deployOnCluster) {
-      childMainClass = args.mainClass
-      if (isUserJar(args.primaryResource)) {
-        childClasspath += args.primaryResource
-      }
-    } else if (clusterManager == YARN) {
-      childMainClass = "org.apache.spark.deploy.yarn.Client"
-      childArgs += ("--jar", args.primaryResource)
-      childArgs += ("--class", args.mainClass)
-    }
-
-    // Make sure YARN is included in our build if we're trying to use it
-    if (clusterManager == YARN) {
-      if (!Utils.classIsLoadable("org.apache.spark.deploy.yarn.Client") && !Utils.isTesting) {
-        printErrorAndExit("Could not load YARN classes. " +
-          "This copy of Spark may not have been compiled with YARN support.")
+      if (args.pyFiles != null) {
+        sysProps("spark.submit.pyFiles") = args.pyFiles
       }
     }
 
@@ -178,94 +169,137 @@ object SparkSubmit {
     // A list of rules to map each argument to system properties or command-line options in
     // each deploy mode; we iterate through these below
     val options = List[OptionAssigner](
-      OptionAssigner(args.master, ALL_CLUSTER_MGRS, false, sysProp = "spark.master"),
-      OptionAssigner(args.name, ALL_CLUSTER_MGRS, false, sysProp = "spark.app.name"),
-      OptionAssigner(args.name, YARN, true, clOption = "--name", sysProp = "spark.app.name"),
-      OptionAssigner(args.driverExtraClassPath, STANDALONE | YARN, true,
+
+      // All cluster managers
+      OptionAssigner(args.master, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.master"),
+      OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"),
+      OptionAssigner(args.jars, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.jars"),
+      OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT,
+        sysProp = "spark.driver.memory"),
+      OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
         sysProp = "spark.driver.extraClassPath"),
-      OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, true,
+      OptionAssigner(args.driverExtraJavaOptions, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
         sysProp = "spark.driver.extraJavaOptions"),
-      OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, true,
+      OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
         sysProp = "spark.driver.extraLibraryPath"),
-      OptionAssigner(args.driverMemory, YARN, true, clOption = "--driver-memory"),
-      OptionAssigner(args.driverMemory, STANDALONE, true, clOption = "--memory"),
-      OptionAssigner(args.driverCores, STANDALONE, true, clOption = "--cores"),
-      OptionAssigner(args.queue, YARN, true, clOption = "--queue"),
-      OptionAssigner(args.queue, YARN, false, sysProp = "spark.yarn.queue"),
-      OptionAssigner(args.numExecutors, YARN, true, clOption = "--num-executors"),
-      OptionAssigner(args.numExecutors, YARN, false, sysProp = "spark.executor.instances"),
-      OptionAssigner(args.executorMemory, YARN, true, clOption = "--executor-memory"),
-      OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, false,
+
+      // Standalone cluster only
+      OptionAssigner(args.jars, STANDALONE, CLUSTER, sysProp = "spark.jars"),
+      OptionAssigner(args.driverMemory, STANDALONE, CLUSTER, clOption = "--memory"),
+      OptionAssigner(args.driverCores, STANDALONE, CLUSTER, clOption = "--cores"),
+
+      // Yarn client only
+      OptionAssigner(args.queue, YARN, CLIENT, sysProp = "spark.yarn.queue"),
+      OptionAssigner(args.numExecutors, YARN, CLIENT, sysProp = "spark.executor.instances"),
+      OptionAssigner(args.executorCores, YARN, CLIENT, sysProp = "spark.executor.cores"),
+      OptionAssigner(args.files, YARN, CLIENT, sysProp = "spark.yarn.dist.files"),
+      OptionAssigner(args.archives, YARN, CLIENT, sysProp = "spark.yarn.dist.archives"),
+
+      // Yarn cluster only
+      OptionAssigner(args.name, YARN, CLUSTER, clOption = "--name"),
+      OptionAssigner(args.driverMemory, YARN, CLUSTER, clOption = "--driver-memory"),
+      OptionAssigner(args.queue, YARN, CLUSTER, clOption = "--queue"),
+      OptionAssigner(args.numExecutors, YARN, CLUSTER, clOption = "--num-executors"),
+      OptionAssigner(args.executorMemory, YARN, CLUSTER, clOption = "--executor-memory"),
+      OptionAssigner(args.executorCores, YARN, CLUSTER, clOption = "--executor-cores"),
+      OptionAssigner(args.files, YARN, CLUSTER, clOption = "--files"),
+      OptionAssigner(args.archives, YARN, CLUSTER, clOption = "--archives"),
+      OptionAssigner(args.jars, YARN, CLUSTER, clOption = "--addJars"),
+
+      // Other options
+      OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES,
         sysProp = "spark.executor.memory"),
-      OptionAssigner(args.executorCores, YARN, true, clOption = "--executor-cores"),
-      OptionAssigner(args.executorCores, YARN, false, sysProp = "spark.executor.cores"),
-      OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, false,
+      OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES,
         sysProp = "spark.cores.max"),
-      OptionAssigner(args.files, YARN, false, sysProp = "spark.yarn.dist.files"),
-      OptionAssigner(args.files, YARN, true, clOption = "--files"),
-      OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, false, sysProp = "spark.files"),
-      OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, true, sysProp = "spark.files"),
-      OptionAssigner(args.archives, YARN, false, sysProp = "spark.yarn.dist.archives"),
-      OptionAssigner(args.archives, YARN, true, clOption = "--archives"),
-      OptionAssigner(args.jars, YARN, true, clOption = "--addJars"),
-      OptionAssigner(args.jars, ALL_CLUSTER_MGRS, false, sysProp = "spark.jars")
+      OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES,
+        sysProp = "spark.files")
     )
 
-    // For client mode make any added jars immediately visible on the classpath
-    if (args.jars != null && !deployOnCluster) {
-      for (jar <- args.jars.split(",")) {
-        childClasspath += jar
+    // In client mode, launch the application main class directly
+    // In addition, add the main application jar and any added jars (if any) to the classpath
+    if (deployMode == CLIENT) {
+      childMainClass = args.mainClass
+      if (isUserJar(args.primaryResource)) {
+        childClasspath += args.primaryResource
       }
+      if (args.jars != null) { childClasspath ++= args.jars.split(",") }
+      if (args.childArgs != null) { childArgs ++= args.childArgs }
     }
 
+
     // Map all arguments to command-line options or system properties for our chosen mode
     for (opt <- options) {
-      if (opt.value != null && deployOnCluster == opt.deployOnCluster &&
+      if (opt.value != null &&
+          (deployMode & opt.deployMode) != 0 &&
           (clusterManager & opt.clusterManager) != 0) {
-        if (opt.clOption != null) {
-          childArgs += (opt.clOption, opt.value)
-        }
-        if (opt.sysProp != null) {
-          sysProps.put(opt.sysProp, opt.value)
-        }
+        if (opt.clOption != null) { childArgs += (opt.clOption, opt.value) }
+        if (opt.sysProp != null) { sysProps.put(opt.sysProp, opt.value) }
       }
     }
 
     // Add the application jar automatically so the user doesn't have to call sc.addJar
     // For YARN cluster mode, the jar is already distributed on each node as "app.jar"
     // For python files, the primary resource is already distributed as a regular file
-    if (!isYarnCluster && !isPython) {
-      var jars = sysProps.get("spark.jars").map(x => x.split(",").toSeq).getOrElse(Seq())
+    val isYarnCluster = clusterManager == YARN && deployMode == CLUSTER
+    if (!isYarnCluster && !args.isPython) {
+      var jars = sysProps.get("spark.jars").map(x => x.split(",").toSeq).getOrElse(Seq.empty)
       if (isUserJar(args.primaryResource)) {
         jars = jars ++ Seq(args.primaryResource)
       }
       sysProps.put("spark.jars", jars.mkString(","))
     }
 
-    // Standalone cluster specific configurations
-    if (deployOnCluster && clusterManager == STANDALONE) {
+    // In standalone-cluster mode, use Client as a wrapper around the user class
+    if (clusterManager == STANDALONE && deployMode == CLUSTER) {
+      childMainClass = "org.apache.spark.deploy.Client"
       if (args.supervise) {
         childArgs += "--supervise"
       }
-      childMainClass = "org.apache.spark.deploy.Client"
       childArgs += "launch"
       childArgs += (args.master, args.primaryResource, args.mainClass)
+      if (args.childArgs != null) {
+        childArgs ++= args.childArgs
+      }
     }
 
-    // Arguments to be passed to user program
-    if (args.childArgs != null) {
-      if (!deployOnCluster || clusterManager == STANDALONE) {
-        childArgs ++= args.childArgs
-      } else if (clusterManager == YARN) {
-        for (arg <- args.childArgs) {
-          childArgs += ("--arg", arg)
-        }
+    // In yarn-cluster mode, use yarn.Client as a wrapper around the user class
+    if (isYarnCluster) {
+      childMainClass = "org.apache.spark.deploy.yarn.Client"
+      if (args.primaryResource != SPARK_INTERNAL) {
+        childArgs += ("--jar", args.primaryResource)
+      }
+      childArgs += ("--class", args.mainClass)
+      if (args.childArgs != null) {
+        args.childArgs.foreach { arg => childArgs += ("--arg", arg) }
+      }
+    }
+
+    // Load any properties specified through --conf and the default properties file
+    for ((k, v) <- args.sparkProperties) {
+      sysProps.getOrElseUpdate(k, v)
+    }
+
+    // Resolve paths in certain spark properties
+    val pathConfigs = Seq(
+      "spark.jars",
+      "spark.files",
+      "spark.yarn.jar",
+      "spark.yarn.dist.files",
+      "spark.yarn.dist.archives")
+    pathConfigs.foreach { config =>
+      // Replace old URIs with resolved URIs, if they exist
+      sysProps.get(config).foreach { oldValue =>
+        sysProps(config) = Utils.resolveURIs(oldValue)
       }
     }
 
-    // Read from default spark properties, if any
-    for ((k, v) <- args.getDefaultSparkProperties) {
-      if (!sysProps.contains(k)) sysProps(k) = v
+    // Resolve and format python file paths properly before adding them to the PYTHONPATH.
+    // The resolving part is redundant in the case of --py-files, but necessary if the user
+    // explicitly sets `spark.submit.pyFiles` in his/her default properties file.
+    sysProps.get("spark.submit.pyFiles").foreach { pyFiles =>
+      val resolvedPyFiles = Utils.resolveURIs(pyFiles)
+      val formattedPyFiles = PythonRunner.formatPaths(resolvedPyFiles).mkString(",")
+      sysProps("spark.submit.pyFiles") = formattedPyFiles
     }
 
     (childArgs, childClasspath, sysProps, childMainClass)
@@ -297,8 +331,24 @@ object SparkSubmit {
       System.setProperty(key, value)
     }
 
-    val mainClass = Class.forName(childMainClass, true, loader)
+    var mainClass: Class[_] = null
+
+    try {
+      mainClass = Class.forName(childMainClass, true, loader)
+    } catch {
+      case e: ClassNotFoundException =>
+        e.printStackTrace(printStream)
+        if (childMainClass.contains("thriftserver")) {
+          println(s"Failed to load main class $childMainClass.")
+          println("You need to build Spark with -Phive and -Phive-thriftserver.")
+        }
+        System.exit(CLASS_NOT_FOUND_EXIT_STATUS)
+    }
+
     val mainMethod = mainClass.getMethod("main", new Array[String](0).getClass)
+    if (!Modifier.isStatic(mainMethod.getModifiers)) {
+      throw new IllegalStateException("The main method in the given main class must be static")
+    }
     try {
       mainMethod.invoke(null, childArgs.toArray)
     } catch {
@@ -328,7 +378,7 @@ object SparkSubmit {
    * Return whether the given primary resource represents a user jar.
    */
   private def isUserJar(primaryResource: String): Boolean = {
-    !isShell(primaryResource) && !isPython(primaryResource)
+    !isShell(primaryResource) && !isPython(primaryResource) && !isInternal(primaryResource)
   }
 
   /**
@@ -345,6 +395,10 @@ object SparkSubmit {
     primaryResource.endsWith(".py") || primaryResource == PYSPARK_SHELL
   }
 
+  private[spark] def isInternal(primaryResource: String): Boolean = {
+    primaryResource == SPARK_INTERNAL
+  }
+
   /**
    * Merge a sequence of comma-separated file lists, some of which may be null to indicate
    * no files, into a single comma-separated string.
@@ -364,6 +418,6 @@ object SparkSubmit {
 private[spark] case class OptionAssigner(
     value: String,
     clusterManager: Int,
-    deployOnCluster: Boolean,
+    deployMode: Int,
     clOption: String = null,
     sysProp: String = null)
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 57655aa4c32b1..f0e9ee67f6a67 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -17,20 +17,17 @@
 
 package org.apache.spark.deploy
 
-import java.io.{File, FileInputStream, IOException}
-import java.util.Properties
 import java.util.jar.JarFile
 
-import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap}
 
-import org.apache.spark.SparkException
 import org.apache.spark.util.Utils
 
 /**
  * Parses and encapsulates arguments from the spark-submit script.
+ * The env argument is used for testing.
  */
-private[spark] class SparkSubmitArguments(args: Seq[String]) {
+private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, String] = sys.env) {
   var master: String = null
   var deployMode: String = null
   var executorMemory: String = null
@@ -55,19 +52,15 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
   var verbose: Boolean = false
   var isPython: Boolean = false
   var pyFiles: String = null
+  val sparkProperties: HashMap[String, String] = new HashMap[String, String]()
 
-  parseOpts(args.toList)
-  loadDefaults()
-  checkRequiredArguments()
-
-  /** Return default present in the currently defined defaults file. */
-  def getDefaultSparkProperties = {
+  /** Default properties present in the currently defined defaults file. */
+  lazy val defaultSparkProperties: HashMap[String, String] = {
     val defaultProperties = new HashMap[String, String]()
     if (verbose) SparkSubmit.printStream.println(s"Using properties file: $propertiesFile")
     Option(propertiesFile).foreach { filename =>
-      val file = new File(filename)
-      SparkSubmitArguments.getPropertiesFromFile(file).foreach { case (k, v) =>
-        if (k.startsWith("spark")) {
+      Utils.getPropertiesFromFile(filename).foreach { case (k, v) =>
+        if (k.startsWith("spark.")) {
           defaultProperties(k) = v
           if (verbose) SparkSubmit.printStream.println(s"Adding default property: $k=$v")
         } else {
@@ -78,37 +71,55 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
     defaultProperties
   }
 
-  /** Fill in any undefined values based on the current properties file or built-in defaults. */
-  private def loadDefaults(): Unit = {
+  // Set parameters from command line arguments
+  parseOpts(args.toList)
+  // Populate `sparkProperties` map from properties file
+  mergeDefaultSparkProperties()
+  // Use `sparkProperties` map along with env vars to fill in any missing parameters
+  loadEnvironmentArguments()
+
+  checkRequiredArguments()
 
+  /**
+   * Merge values from the default properties file with those specified through --conf.
+   * When this is called, `sparkProperties` is already filled with configs from the latter.
+   */
+  private def mergeDefaultSparkProperties(): Unit = {
     // Use common defaults file, if not specified by user
-    if (propertiesFile == null) {
-      sys.env.get("SPARK_HOME").foreach { sparkHome =>
-        val sep = File.separator
-        val defaultPath = s"${sparkHome}${sep}conf${sep}spark-defaults.conf"
-        val file = new File(defaultPath)
-        if (file.exists()) {
-          propertiesFile = file.getAbsolutePath
-        }
+    propertiesFile = Option(propertiesFile).getOrElse(Utils.getDefaultPropertiesFile(env))
+    // Honor --conf before the defaults file
+    defaultSparkProperties.foreach { case (k, v) =>
+      if (!sparkProperties.contains(k)) {
+        sparkProperties(k) = v
       }
     }
+  }
 
-    val defaultProperties = getDefaultSparkProperties
-    // Use properties file as fallback for values which have a direct analog to
-    // arguments in this script.
-    master = Option(master).getOrElse(defaultProperties.get("spark.master").orNull)
+  /**
+   * Load arguments from environment variables, Spark properties etc.
+   */
+  private def loadEnvironmentArguments(): Unit = {
+    master = Option(master)
+      .orElse(sparkProperties.get("spark.master"))
+      .orElse(env.get("MASTER"))
+      .orNull
+    driverMemory = Option(driverMemory)
+      .orElse(sparkProperties.get("spark.driver.memory"))
+      .orElse(env.get("SPARK_DRIVER_MEMORY"))
+      .orNull
     executorMemory = Option(executorMemory)
-      .getOrElse(defaultProperties.get("spark.executor.memory").orNull)
+      .orElse(sparkProperties.get("spark.executor.memory"))
+      .orElse(env.get("SPARK_EXECUTOR_MEMORY"))
+      .orNull
     executorCores = Option(executorCores)
-      .getOrElse(defaultProperties.get("spark.executor.cores").orNull)
+      .orElse(sparkProperties.get("spark.executor.cores"))
+      .orNull
     totalExecutorCores = Option(totalExecutorCores)
-      .getOrElse(defaultProperties.get("spark.cores.max").orNull)
-    name = Option(name).getOrElse(defaultProperties.get("spark.app.name").orNull)
-    jars = Option(jars).getOrElse(defaultProperties.get("spark.jars").orNull)
-
-    // This supports env vars in older versions of Spark
-    master = Option(master).getOrElse(System.getenv("MASTER"))
-    deployMode = Option(deployMode).getOrElse(System.getenv("DEPLOY_MODE"))
+      .orElse(sparkProperties.get("spark.cores.max"))
+      .orNull
+    name = Option(name).orElse(sparkProperties.get("spark.app.name")).orNull
+    jars = Option(jars).orElse(sparkProperties.get("spark.jars")).orNull
+    deployMode = Option(deployMode).orElse(env.get("DEPLOY_MODE")).orNull
 
     // Try to set main class from JAR if no --class argument is given
     if (mainClass == null && !isPython && primaryResource != null) {
@@ -134,7 +145,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
   }
 
   /** Ensure that required fields exists. Call this only once all defaults are loaded. */
-  private def checkRequiredArguments() = {
+  private def checkRequiredArguments(): Unit = {
     if (args.length == 0) {
       printUsageAndExit(-1)
     }
@@ -161,7 +172,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
     }
 
     if (master.startsWith("yarn")) {
-      val hasHadoopEnv = sys.env.contains("HADOOP_CONF_DIR") || sys.env.contains("YARN_CONF_DIR")
+      val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || env.contains("YARN_CONF_DIR")
       if (!hasHadoopEnv && !Utils.isTesting) {
         throw new Exception(s"When running with master '$master' " +
           "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.")
@@ -169,7 +180,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
     }
   }
 
-  override def toString =  {
+  override def toString = {
     s"""Parsed arguments:
     |  master                  $master
     |  deployMode              $deployMode
@@ -195,17 +206,23 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
     |  jars                    $jars
     |  verbose                 $verbose
     |
-    |Default properties from $propertiesFile:
-    |${getDefaultSparkProperties.mkString("  ", "\n  ", "\n")}
+    |Spark properties used, including those specified through
+    | --conf and those from the properties file $propertiesFile:
+    |${sparkProperties.mkString("  ", "\n  ", "\n")}
     """.stripMargin
   }
 
   /** Fill in values by parsing user options. */
   private def parseOpts(opts: Seq[String]): Unit = {
+    val EQ_SEPARATED_OPT="""(--[^=]+)=(.+)""".r
+
     // Delineates parsing of Spark options from parsing of user options.
-    var inSparkOpts = true
     parse(opts)
 
+    /**
+     * NOTE: If you add or remove spark-submit options,
+     * modify NOT ONLY this file but also utils.sh
+     */
     def parse(opts: Seq[String]): Unit = opts match {
       case ("--name") :: value :: tail =>
         name = value
@@ -290,6 +307,13 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
         jars = Utils.resolveURIs(value)
         parse(tail)
 
+      case ("--conf" | "-c") :: value :: tail =>
+        value.split("=", 2).toSeq match {
+          case Seq(k, v) => sparkProperties(k) = v
+          case _ => SparkSubmit.printErrorAndExit(s"Spark config without '=': $value")
+        }
+        parse(tail)
+
       case ("--help" | "-h") :: tail =>
         printUsageAndExit(0)
 
@@ -297,39 +321,27 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
         verbose = true
         parse(tail)
 
+      case EQ_SEPARATED_OPT(opt, value) :: tail =>
+        parse(opt :: value :: tail)
+
+      case value :: tail if value.startsWith("-") =>
+        SparkSubmit.printErrorAndExit(s"Unrecognized option '$value'.")
+
       case value :: tail =>
-        if (inSparkOpts) {
-          value match {
-            // convert --foo=bar to --foo bar
-            case v if v.startsWith("--") && v.contains("=") && v.split("=").size == 2 =>
-              val parts = v.split("=")
-              parse(Seq(parts(0), parts(1)) ++ tail)
-            case v if v.startsWith("-") =>
-              val errMessage = s"Unrecognized option '$value'."
-              SparkSubmit.printErrorAndExit(errMessage)
-            case v =>
-              primaryResource =
-                if (!SparkSubmit.isShell(v)) {
-                  Utils.resolveURI(v).toString
-                } else {
-                  v
-                }
-              inSparkOpts = false
-              isPython = SparkSubmit.isPython(v)
-              parse(tail)
+        primaryResource =
+          if (!SparkSubmit.isShell(value) && !SparkSubmit.isInternal(value)) {
+            Utils.resolveURI(value).toString
+          } else {
+            value
           }
-        } else {
-          if (!value.isEmpty) {
-            childArgs += value
-          }
-          parse(tail)
-        }
+        isPython = SparkSubmit.isPython(value)
+        childArgs ++= tail
 
       case Nil =>
     }
   }
 
-  private def printUsageAndExit(exitCode: Int, unknownParam: Any = null) {
+  private def printUsageAndExit(exitCode: Int, unknownParam: Any = null): Unit = {
     val outStream = SparkSubmit.printStream
     if (unknownParam != null) {
       outStream.println("Unknown/unsupported param " + unknownParam)
@@ -349,6 +361,8 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
         |                              on the PYTHONPATH for Python apps.
         |  --files FILES               Comma-separated list of files to be placed in the working
         |                              directory of each executor.
+        |
+        |  --conf PROP=VALUE           Arbitrary Spark configuration property.
         |  --properties-file FILE      Path to a file from which to load extra properties. If not
         |                              specified, this will look for conf/spark-defaults.conf.
         |
@@ -381,23 +395,3 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
     SparkSubmit.exitFn()
   }
 }
-
-object SparkSubmitArguments {
-  /** Load properties present in the given file. */
-  def getPropertiesFromFile(file: File): Seq[(String, String)] = {
-    require(file.exists(), s"Properties file $file does not exist")
-    require(file.isFile(), s"Properties file $file is not a normal file")
-    val inputStream = new FileInputStream(file)
-    try {
-      val properties = new Properties()
-      properties.load(inputStream)
-      properties.stringPropertyNames().toSeq.map(k => (k, properties(k).trim))
-    } catch {
-      case e: IOException =>
-        val message = s"Failed when loading Spark properties file $file"
-        throw new SparkException(message, e)
-    } finally {
-      inputStream.close()
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala
new file mode 100644
index 0000000000000..aa3743ca7df63
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+import java.io.File
+
+import scala.collection.JavaConversions._
+
+import org.apache.spark.util.{RedirectThread, Utils}
+
+/**
+ * Launch an application through Spark submit in client mode with the appropriate classpath,
+ * library paths, java options and memory. These properties of the JVM must be set before the
+ * driver JVM is launched. The sole purpose of this class is to avoid handling the complexity
+ * of parsing the properties file for such relevant configs in Bash.
+ *
+ * Usage: org.apache.spark.deploy.SparkSubmitDriverBootstrapper <submit args>
+ */
+private[spark] object SparkSubmitDriverBootstrapper {
+
+  // Note: This class depends on the behavior of `bin/spark-class` and `bin/spark-submit`.
+  // Any changes made there must be reflected in this file.
+
+  def main(args: Array[String]): Unit = {
+
+    // This should be called only from `bin/spark-class`
+    if (!sys.env.contains("SPARK_CLASS")) {
+      System.err.println("SparkSubmitDriverBootstrapper must be called from `bin/spark-class`!")
+      System.exit(1)
+    }
+
+    val submitArgs = args
+    val runner = sys.env("RUNNER")
+    val classpath = sys.env("CLASSPATH")
+    val javaOpts = sys.env("JAVA_OPTS")
+    val defaultDriverMemory = sys.env("OUR_JAVA_MEM")
+
+    // Spark submit specific environment variables
+    val deployMode = sys.env("SPARK_SUBMIT_DEPLOY_MODE")
+    val propertiesFile = sys.env("SPARK_SUBMIT_PROPERTIES_FILE")
+    val bootstrapDriver = sys.env("SPARK_SUBMIT_BOOTSTRAP_DRIVER")
+    val submitDriverMemory = sys.env.get("SPARK_SUBMIT_DRIVER_MEMORY")
+    val submitLibraryPath = sys.env.get("SPARK_SUBMIT_LIBRARY_PATH")
+    val submitClasspath = sys.env.get("SPARK_SUBMIT_CLASSPATH")
+    val submitJavaOpts = sys.env.get("SPARK_SUBMIT_OPTS")
+
+    assume(runner != null, "RUNNER must be set")
+    assume(classpath != null, "CLASSPATH must be set")
+    assume(javaOpts != null, "JAVA_OPTS must be set")
+    assume(defaultDriverMemory != null, "OUR_JAVA_MEM must be set")
+    assume(deployMode == "client", "SPARK_SUBMIT_DEPLOY_MODE must be \"client\"!")
+    assume(propertiesFile != null, "SPARK_SUBMIT_PROPERTIES_FILE must be set")
+    assume(bootstrapDriver != null, "SPARK_SUBMIT_BOOTSTRAP_DRIVER must be set")
+
+    // Parse the properties file for the equivalent spark.driver.* configs
+    val properties = Utils.getPropertiesFromFile(propertiesFile)
+    val confDriverMemory = properties.get("spark.driver.memory")
+    val confLibraryPath = properties.get("spark.driver.extraLibraryPath")
+    val confClasspath = properties.get("spark.driver.extraClassPath")
+    val confJavaOpts = properties.get("spark.driver.extraJavaOptions")
+
+    // Favor Spark submit arguments over the equivalent configs in the properties file.
+    // Note that we do not actually use the Spark submit values for library path, classpath,
+    // and Java opts here, because we have already captured them in Bash.
+
+    val newDriverMemory = submitDriverMemory
+      .orElse(confDriverMemory)
+      .getOrElse(defaultDriverMemory)
+
+    val newClasspath =
+      if (submitClasspath.isDefined) {
+        classpath
+      } else {
+        classpath + confClasspath.map(sys.props("path.separator") + _).getOrElse("")
+      }
+
+    val newJavaOpts =
+      if (submitJavaOpts.isDefined) {
+        // SPARK_SUBMIT_OPTS is already captured in JAVA_OPTS
+        javaOpts
+      } else {
+        javaOpts + confJavaOpts.map(" " + _).getOrElse("")
+      }
+
+    val filteredJavaOpts = Utils.splitCommandString(newJavaOpts)
+      .filterNot(_.startsWith("-Xms"))
+      .filterNot(_.startsWith("-Xmx"))
+
+    // Build up command
+    val command: Seq[String] =
+      Seq(runner) ++
+      Seq("-cp", newClasspath) ++
+      filteredJavaOpts ++
+      Seq(s"-Xms$newDriverMemory", s"-Xmx$newDriverMemory") ++
+      Seq("org.apache.spark.deploy.SparkSubmit") ++
+      submitArgs
+
+    // Print the launch command. This follows closely the format used in `bin/spark-class`.
+    if (sys.env.contains("SPARK_PRINT_LAUNCH_COMMAND")) {
+      System.err.print("Spark Command: ")
+      System.err.println(command.mkString(" "))
+      System.err.println("========================================\n")
+    }
+
+    // Start the driver JVM
+    val filteredCommand = command.filter(_.nonEmpty)
+    val builder = new ProcessBuilder(filteredCommand)
+    val env = builder.environment()
+
+    if (submitLibraryPath.isEmpty && confLibraryPath.nonEmpty) {
+      val libraryPaths = confLibraryPath ++ sys.env.get(Utils.libraryPathEnvName)
+      env.put(Utils.libraryPathEnvName, libraryPaths.mkString(sys.props("path.separator")))
+    }
+
+    val process = builder.start()
+
+    // If we kill an app while it's running, its sub-process should be killed too.
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      override def run() = {
+        if (process != null) {
+          process.destroy()
+          sys.exit(process.waitFor())
+        }
+      }
+    })
+
+    // Redirect stdout and stderr from the child JVM
+    val stdoutThread = new RedirectThread(process.getInputStream, System.out, "redirect stdout")
+    val stderrThread = new RedirectThread(process.getErrorStream, System.err, "redirect stderr")
+    stdoutThread.start()
+    stderrThread.start()
+
+    // Redirect stdin to child JVM only if we're not running Windows. This is because the
+    // subprocess there already reads directly from our stdin, so we should avoid spawning a
+    // thread that contends with the subprocess in reading from System.in.
+    val isWindows = Utils.isWindows
+    val isSubprocess = sys.env.contains("IS_SUBPROCESS")
+    if (!isWindows) {
+      val stdinThread = new RedirectThread(System.in, process.getOutputStream, "redirect stdin")
+      stdinThread.start()
+      // Spark submit (JVM) may run as a subprocess, and so this JVM should terminate on
+      // broken pipe, signaling that the parent process has exited. This is the case if the
+      // application is launched directly from python, as in the PySpark shell. In Windows,
+      // the termination logic is handled in java_gateway.py
+      if (isSubprocess) {
+        stdinThread.join()
+        process.destroy()
+      }
+    }
+    val returnCode = process.waitFor()
+    sys.exit(returnCode)
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
index d38e9e79204c2..98a93d1fcb2a3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
@@ -30,7 +30,7 @@ import org.apache.spark.{Logging, SparkConf, SparkException}
 import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.Master
-import org.apache.spark.util.{Utils, AkkaUtils}
+import org.apache.spark.util.{ActorLogReceive, Utils, AkkaUtils}
 
 /**
  * Interface allowing applications to speak with a Spark deploy cluster. Takes a master URL,
@@ -56,7 +56,7 @@ private[spark] class AppClient(
   var registered = false
   var activeMasterUrl: String = null
 
-  class ClientActor extends Actor with Logging {
+  class ClientActor extends Actor with ActorLogReceive with Logging {
     var master: ActorSelection = null
     var alreadyDisconnected = false  // To avoid calling listener.disconnected() multiple times
     var alreadyDead = false  // To avoid calling listener.dead() multiple times
@@ -119,7 +119,7 @@ private[spark] class AppClient(
         .contains(remoteUrl.hostPort)
     }
 
-    override def receive = {
+    override def receiveWithLogging = {
       case RegisteredApplication(appId_, masterUrl) =>
         appId = appId_
         registered = true
@@ -154,7 +154,7 @@ private[spark] class AppClient(
         logWarning(s"Connection to $address failed; waiting for master to reconnect...")
         markDisconnected()
 
-      case AssociationErrorEvent(cause, _, address, _) if isPossibleMaster(address) =>
+      case AssociationErrorEvent(cause, _, address, _, _) if isPossibleMaster(address) =>
         logWarning(s"Could not connect to $address: $cause")
 
       case StopAppClient =>
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
index e15a87bd38fda..88a0862b96afe 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
@@ -46,11 +46,10 @@ private[spark] object TestClient {
   def main(args: Array[String]) {
     val url = args(0)
     val conf = new SparkConf
-    val (actorSystem, port) = AkkaUtils.createActorSystem("spark", Utils.localIpAddress, 0,
+    val (actorSystem, _) = AkkaUtils.createActorSystem("spark", Utils.localIpAddress, 0,
       conf = conf, securityManager = new SecurityManager(conf))
-    val desc = new ApplicationDescription(
-      "TestClient", Some(1), 512, Command("spark.deploy.client.TestExecutor", Seq(), Map(), Seq(),
-        Seq()), Some("dummy-spark-home"), "ignored")
+    val desc = new ApplicationDescription("TestClient", Some(1), 512,
+      Command("spark.deploy.client.TestExecutor", Seq(), Map(), Seq(), Seq(), Seq()), "ignored")
     val listener = new TestListener
     val client = new AppClient(actorSystem, Array(url), desc, listener, new SparkConf)
     client.start()
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index a0e8bd403a41d..fbe39b27649f6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -34,15 +34,15 @@ private[spark] abstract class ApplicationHistoryProvider {
    *
    * @return List of all know applications.
    */
-  def getListing(): Seq[ApplicationHistoryInfo]
+  def getListing(): Iterable[ApplicationHistoryInfo]
 
   /**
    * Returns the Spark UI for a specific application.
    *
    * @param appId The application ID.
-   * @return The application's UI, or null if application is not found.
+   * @return The application's UI, or None if application is not found.
    */
-  def getAppUI(appId: String): SparkUI
+  def getAppUI(appId: String): Option[SparkUI]
 
   /**
    * Called when the server is shutting down.
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index a8c9ac072449f..2d1609b973607 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable
 import org.apache.hadoop.fs.{FileStatus, Path}
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.scheduler._
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.Utils
@@ -31,22 +32,32 @@ import org.apache.spark.util.Utils
 private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHistoryProvider
   with Logging {
 
+  private val NOT_STARTED = "<Not Started>"
+
   // Interval between each check for event log updates
   private val UPDATE_INTERVAL_MS = conf.getInt("spark.history.fs.updateInterval",
     conf.getInt("spark.history.updateInterval", 10)) * 1000
 
   private val logDir = conf.get("spark.history.fs.logDirectory", null)
-  if (logDir == null) {
-    throw new IllegalArgumentException("Logging directory must be specified.")
-  }
+  private val resolvedLogDir = Option(logDir)
+    .map { d => Utils.resolveURI(d) }
+    .getOrElse { throw new IllegalArgumentException("Logging directory must be specified.") }
 
-  private val fs = Utils.getHadoopFileSystem(logDir)
+  private val fs = Utils.getHadoopFileSystem(resolvedLogDir,
+    SparkHadoopUtil.get.newConfiguration(conf))
 
   // A timestamp of when the disk was last accessed to check for log updates
   private var lastLogCheckTimeMs = -1L
 
-  // List of applications, in order from newest to oldest.
-  @volatile private var appList: Seq[ApplicationHistoryInfo] = Nil
+  // The modification time of the newest log detected during the last scan. This is used
+  // to ignore logs that are older during subsequent scans, to avoid processing data that
+  // is already known.
+  private var lastModifiedTime = -1L
+
+  // Mapping of application IDs to their metadata, in descending end time order. Apps are inserted
+  // into the map in order, so the LinkedHashMap maintains the correct ordering.
+  @volatile private var applications: mutable.LinkedHashMap[String, FsApplicationHistoryInfo]
+    = new mutable.LinkedHashMap()
 
   /**
    * A background thread that periodically checks for event log updates on disk.
@@ -76,14 +87,14 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
 
   private def initialize() {
     // Validate the log directory.
-    val path = new Path(logDir)
+    val path = new Path(resolvedLogDir)
     if (!fs.exists(path)) {
       throw new IllegalArgumentException(
-        "Logging directory specified does not exist: %s".format(logDir))
+        "Logging directory specified does not exist: %s".format(resolvedLogDir))
     }
     if (!fs.getFileStatus(path).isDir) {
       throw new IllegalArgumentException(
-        "Logging directory specified is not a directory: %s".format(logDir))
+        "Logging directory specified is not a directory: %s".format(resolvedLogDir))
     }
 
     checkForLogs()
@@ -91,19 +102,40 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
     logCheckingThread.start()
   }
 
-  override def getListing() = appList
+  override def getListing() = applications.values
 
-  override def getAppUI(appId: String): SparkUI = {
+  override def getAppUI(appId: String): Option[SparkUI] = {
     try {
-      val appLogDir = fs.getFileStatus(new Path(logDir, appId))
-      loadAppInfo(appLogDir, true)._2
+      applications.get(appId).map { info =>
+        val (replayBus, appListener) = createReplayBus(fs.getFileStatus(
+          new Path(logDir, info.logDir)))
+        val ui = {
+          val conf = this.conf.clone()
+          val appSecManager = new SecurityManager(conf)
+          SparkUI.createHistoryUI(conf, replayBus, appSecManager, appId,
+            s"${HistoryServer.UI_PATH_PREFIX}/$appId")
+          // Do not call ui.bind() to avoid creating a new server for each application
+        }
+
+        replayBus.replay()
+
+        ui.setAppName(s"${appListener.appName.getOrElse(NOT_STARTED)} ($appId)")
+
+        val uiAclsEnabled = conf.getBoolean("spark.history.ui.acls.enable", false)
+        ui.getSecurityManager.setAcls(uiAclsEnabled)
+        // make sure to set admin acls before view acls so they are properly picked up
+        ui.getSecurityManager.setAdminAcls(appListener.adminAcls.getOrElse(""))
+        ui.getSecurityManager.setViewAcls(appListener.sparkUser.getOrElse(NOT_STARTED),
+          appListener.viewAcls.getOrElse(""))
+        ui
+      }
     } catch {
-      case e: FileNotFoundException => null
+      case e: FileNotFoundException => None
     }
   }
 
   override def getConfig(): Map[String, String] =
-    Map(("Event Log Location" -> logDir))
+    Map("Event Log Location" -> resolvedLogDir.toString)
 
   /**
    * Builds the application list based on the current contents of the log directory.
@@ -114,82 +146,81 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
     lastLogCheckTimeMs = getMonotonicTimeMs()
     logDebug("Checking for logs. Time is now %d.".format(lastLogCheckTimeMs))
     try {
-      val logStatus = fs.listStatus(new Path(logDir))
+      val logStatus = fs.listStatus(new Path(resolvedLogDir))
       val logDirs = if (logStatus != null) logStatus.filter(_.isDir).toSeq else Seq[FileStatus]()
-      val logInfos = logDirs.filter {
-        dir => fs.isFile(new Path(dir.getPath(), EventLoggingListener.APPLICATION_COMPLETE))
-      }
-
-      val currentApps = Map[String, ApplicationHistoryInfo](
-        appList.map(app => (app.id -> app)):_*)
 
-      // For any application that either (i) is not listed or (ii) has changed since the last time
-      // the listing was created (defined by the log dir's modification time), load the app's info.
-      // Otherwise just reuse what's already in memory.
-      val newApps = new mutable.ArrayBuffer[ApplicationHistoryInfo](logInfos.size)
-      for (dir <- logInfos) {
-        val curr = currentApps.getOrElse(dir.getPath().getName(), null)
-        if (curr == null || curr.lastUpdated < getModificationTime(dir)) {
+      // Load all new logs from the log directory. Only directories that have a modification time
+      // later than the last known log directory will be loaded.
+      var newLastModifiedTime = lastModifiedTime
+      val logInfos = logDirs
+        .filter { dir =>
+          if (fs.isFile(new Path(dir.getPath(), EventLoggingListener.APPLICATION_COMPLETE))) {
+            val modTime = getModificationTime(dir)
+            newLastModifiedTime = math.max(newLastModifiedTime, modTime)
+            modTime > lastModifiedTime
+          } else {
+            false
+          }
+        }
+        .flatMap { dir =>
           try {
-            newApps += loadAppInfo(dir, false)._1
+            val (replayBus, appListener) = createReplayBus(dir)
+            replayBus.replay()
+            Some(new FsApplicationHistoryInfo(
+              dir.getPath().getName(),
+              appListener.appId.getOrElse(dir.getPath().getName()),
+              appListener.appName.getOrElse(NOT_STARTED),
+              appListener.startTime.getOrElse(-1L),
+              appListener.endTime.getOrElse(-1L),
+              getModificationTime(dir),
+              appListener.sparkUser.getOrElse(NOT_STARTED)))
           } catch {
-            case e: Exception => logError(s"Failed to load app info from directory $dir.")
+            case e: Exception =>
+              logInfo(s"Failed to load application log data from $dir.", e)
+              None
           }
-        } else {
-          newApps += curr
         }
-      }
+        .sortBy { info => -info.endTime }
+
+      lastModifiedTime = newLastModifiedTime
+
+      // When there are new logs, merge the new list with the existing one, maintaining
+      // the expected ordering (descending end time). Maintaining the order is important
+      // to avoid having to sort the list every time there is a request for the log list.
+      if (!logInfos.isEmpty) {
+        val newApps = new mutable.LinkedHashMap[String, FsApplicationHistoryInfo]()
+        def addIfAbsent(info: FsApplicationHistoryInfo) = {
+          if (!newApps.contains(info.id)) {
+            newApps += (info.id -> info)
+          }
+        }
+
+        val newIterator = logInfos.iterator.buffered
+        val oldIterator = applications.values.iterator.buffered
+        while (newIterator.hasNext && oldIterator.hasNext) {
+          if (newIterator.head.endTime > oldIterator.head.endTime) {
+            addIfAbsent(newIterator.next)
+          } else {
+            addIfAbsent(oldIterator.next)
+          }
+        }
+        newIterator.foreach(addIfAbsent)
+        oldIterator.foreach(addIfAbsent)
 
-      appList = newApps.sortBy { info => -info.endTime }
+        applications = newApps
+      }
     } catch {
       case t: Throwable => logError("Exception in checking for event log updates", t)
     }
   }
 
-  /**
-   * Parse the application's logs to find out the information we need to build the
-   * listing page.
-   *
-   * When creating the listing of available apps, there is no need to load the whole UI for the
-   * application. The UI is requested by the HistoryServer (by calling getAppInfo()) when the user
-   * clicks on a specific application.
-   *
-   * @param logDir Directory with application's log files.
-   * @param renderUI Whether to create the SparkUI for the application.
-   * @return A 2-tuple `(app info, ui)`. `ui` will be null if `renderUI` is false.
-   */
-  private def loadAppInfo(logDir: FileStatus, renderUI: Boolean) = {
-    val elogInfo = EventLoggingListener.parseLoggingInfo(logDir.getPath(), fs)
-    val path = logDir.getPath
-    val appId = path.getName
+  private def createReplayBus(logDir: FileStatus): (ReplayListenerBus, ApplicationEventListener) = {
+    val path = logDir.getPath()
+    val elogInfo = EventLoggingListener.parseLoggingInfo(path, fs)
     val replayBus = new ReplayListenerBus(elogInfo.logPaths, fs, elogInfo.compressionCodec)
     val appListener = new ApplicationEventListener
     replayBus.addListener(appListener)
-
-    val ui: SparkUI = if (renderUI) {
-        val conf = this.conf.clone()
-        val appSecManager = new SecurityManager(conf)
-        new SparkUI(conf, appSecManager, replayBus, appId, "/history/" + appId)
-        // Do not call ui.bind() to avoid creating a new server for each application
-      } else {
-        null
-      }
-
-    replayBus.replay()
-    val appInfo = ApplicationHistoryInfo(
-      appId,
-      appListener.appName,
-      appListener.startTime,
-      appListener.endTime,
-      getModificationTime(logDir),
-      appListener.sparkUser)
-
-    if (ui != null) {
-      val uiAclsEnabled = conf.getBoolean("spark.history.ui.acls.enable", false)
-      ui.getSecurityManager.setUIAcls(uiAclsEnabled)
-      ui.getSecurityManager.setViewAcls(appListener.sparkUser, appListener.viewAcls)
-    }
-    (appInfo, ui)
+    (replayBus, appListener)
   }
 
   /** Return when this directory was last modified. */
@@ -212,3 +243,13 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
   private def getMonotonicTimeMs() = System.nanoTime() / (1000 * 1000)
 
 }
+
+private class FsApplicationHistoryInfo(
+    val logDir: String,
+    id: String,
+    name: String,
+    startTime: Long,
+    endTime: Long,
+    lastUpdated: Long,
+    sparkUser: String)
+  extends ApplicationHistoryInfo(id, name, startTime, endTime, lastUpdated, sparkUser)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index a958c837c2ff6..0e249e51a77d8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -45,7 +45,7 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
       <div class="row-fluid">
         <div class="span12">
           <ul class="unstyled">
-            { providerConfig.map(e => <li><strong>{e._1}:</strong> {e._2}</li>) }
+            {providerConfig.map { case (k, v) => <li><strong>{k}:</strong> {v}</li> }}
           </ul>
           {
             if (allApps.size > 0) {
@@ -67,6 +67,7 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
   }
 
   private val appHeader = Seq(
+    "App ID",
     "App Name",
     "Started",
     "Completed",
@@ -75,18 +76,19 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
     "Last Updated")
 
   private def appRow(info: ApplicationHistoryInfo): Seq[Node] = {
-    val uiAddress = "/history/" + info.id
+    val uiAddress = HistoryServer.UI_PATH_PREFIX + s"/${info.id}"
     val startTime = UIUtils.formatDate(info.startTime)
     val endTime = UIUtils.formatDate(info.endTime)
     val duration = UIUtils.formatDuration(info.endTime - info.startTime)
     val lastUpdated = UIUtils.formatDate(info.lastUpdated)
     <tr>
-      <td><a href={uiAddress}>{info.name}</a></td>
-      <td>{startTime}</td>
-      <td>{endTime}</td>
-      <td>{duration}</td>
+      <td><a href={uiAddress}>{info.id}</a></td>
+      <td>{info.name}</td>
+      <td sorttable_customkey={info.startTime.toString}>{startTime}</td>
+      <td sorttable_customkey={info.endTime.toString}>{endTime}</td>
+      <td sorttable_customkey={(info.endTime - info.startTime).toString}>{duration}</td>
       <td>{info.sparkUser}</td>
-      <td>{lastUpdated}</td>
+      <td sorttable_customkey={info.lastUpdated.toString}>{lastUpdated}</td>
     </tr>
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 56b38ddfc9313..ce00c0ffd21e0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -25,9 +25,9 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.ui.{WebUI, SparkUI, UIUtils}
+import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
 import org.apache.spark.ui.JettyUtils._
-import org.apache.spark.util.{SignalLogger, Utils}
+import org.apache.spark.util.SignalLogger
 
 /**
  * A web server that renders SparkUIs of completed applications.
@@ -52,10 +52,7 @@ class HistoryServer(
 
   private val appLoader = new CacheLoader[String, SparkUI] {
     override def load(key: String): SparkUI = {
-      val ui = provider.getAppUI(key)
-      if (ui == null) {
-        throw new NoSuchElementException()
-      }
+      val ui = provider.getAppUI(key).getOrElse(throw new NoSuchElementException())
       attachSparkUI(ui)
       ui
     }
@@ -114,7 +111,7 @@ class HistoryServer(
     attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static"))
 
     val contextHandler = new ServletContextHandler
-    contextHandler.setContextPath("/history")
+    contextHandler.setContextPath(HistoryServer.UI_PATH_PREFIX)
     contextHandler.addServlet(new ServletHolder(loaderServlet), "/*")
     attachHandler(contextHandler)
   }
@@ -172,10 +169,12 @@ class HistoryServer(
 object HistoryServer extends Logging {
   private val conf = new SparkConf
 
+  val UI_PATH_PREFIX = "/history"
+
   def main(argStrings: Array[String]) {
     SignalLogger.register(log)
     initSecurity()
-    val args = new HistoryServerArguments(conf, argStrings)
+    new HistoryServerArguments(conf, argStrings)
     val securityManager = new SecurityManager(conf)
 
     val providerName = conf.getOption("spark.history.provider")
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
index be9361b754fc3..5bce32a04d16d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
@@ -25,6 +25,7 @@ import org.apache.spark.util.Utils
  */
 private[spark] class HistoryServerArguments(conf: SparkConf, args: Array[String]) {
   private var logDir: String = null
+  private var propertiesFile: String = null
 
   parse(args.toList)
 
@@ -32,25 +33,35 @@ private[spark] class HistoryServerArguments(conf: SparkConf, args: Array[String]
     args match {
       case ("--dir" | "-d") :: value :: tail =>
         logDir = value
+        conf.set("spark.history.fs.logDirectory", value)
+        System.setProperty("spark.history.fs.logDirectory", value)
         parse(tail)
 
       case ("--help" | "-h") :: tail =>
         printUsageAndExit(0)
 
+      case ("--properties-file") :: value :: tail =>
+        propertiesFile = value
+        parse(tail)
+
       case Nil =>
 
       case _ =>
         printUsageAndExit(1)
     }
-    if (logDir != null) {
-      conf.set("spark.history.fs.logDirectory", logDir)
-    }
   }
 
+   // This mutates the SparkConf, so all accesses to it must be made after this line
+   Utils.loadDefaultSparkProperties(conf, propertiesFile)
+
   private def printUsageAndExit(exitCode: Int) {
     System.err.println(
       """
-      |Usage: HistoryServer
+      |Usage: HistoryServer [options]
+      |
+      |Options:
+      |  --properties-file FILE      Path to a custom Spark properties file.
+      |                              Default is conf/spark-defaults.conf.
       |
       |Configuration options can be set by setting the corresponding JVM system property.
       |History Server options are always available; additional options depend on the provider.
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
index 72d0589689e71..ad7d81747c377 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -24,7 +24,9 @@ import scala.collection.mutable.ArrayBuffer
 
 import akka.actor.ActorRef
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.deploy.ApplicationDescription
+import org.apache.spark.util.Utils
 
 private[spark] class ApplicationInfo(
     val startTime: Long,
@@ -46,6 +48,11 @@ private[spark] class ApplicationInfo(
 
   init()
 
+  private def readObject(in: java.io.ObjectInputStream): Unit = Utils.tryOrIOException {
+    in.defaultReadObject()
+    init()
+  }
+
   private def init() {
     state = ApplicationState.WAITING
     executors = new mutable.HashMap[Int, ExecutorInfo]
@@ -91,11 +98,13 @@ private[spark] class ApplicationInfo(
 
   def retryCount = _retryCount
 
-  def incrementRetryCount = {
+  def incrementRetryCount() = {
     _retryCount += 1
     _retryCount
   }
 
+  def resetRetryCount() = _retryCount = 0
+
   def markFinished(endState: ApplicationState.Value) {
     state = endState
     endTime = System.currentTimeMillis()
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
index c87b66f047dc8..38db02cd2421b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
@@ -22,8 +22,8 @@ import com.codahale.metrics.{Gauge, MetricRegistry}
 import org.apache.spark.metrics.source.Source
 
 class ApplicationSource(val application: ApplicationInfo) extends Source {
-  val metricRegistry = new MetricRegistry()
-  val sourceName = "%s.%s.%s".format("application", application.desc.name,
+  override val metricRegistry = new MetricRegistry()
+  override val sourceName = "%s.%s.%s".format("application", application.desc.name,
     System.currentTimeMillis())
 
   metricRegistry.register(MetricRegistry.name("status"), new Gauge[String] {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala
index 33377931d6993..9d3d7938c6ccb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala
@@ -19,7 +19,9 @@ package org.apache.spark.deploy.master
 
 import java.util.Date
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.deploy.DriverDescription
+import org.apache.spark.util.Utils
 
 private[spark] class DriverInfo(
     val startTime: Long,
@@ -33,4 +35,17 @@ private[spark] class DriverInfo(
   @transient var exception: Option[Exception] = None
   /* Most recent worker assigned to this driver */
   @transient var worker: Option[WorkerInfo] = None
+
+  init()
+
+  private def readObject(in: java.io.ObjectInputStream): Unit = Utils.tryOrIOException {
+    in.defaultReadObject()
+    init()
+  }
+
+  private def init(): Unit = {
+    state = DriverState.SUBMITTED
+    worker = None
+    exception = None
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
index aa85aa060d9c1..6ff2aa5244847 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
@@ -18,10 +18,12 @@
 package org.apache.spark.deploy.master
 
 import java.io._
-
-import akka.serialization.Serialization
+import java.nio.ByteBuffer
 
 import org.apache.spark.Logging
+import org.apache.spark.serializer.Serializer
+
+import scala.reflect.ClassTag
 
 /**
  * Stores data in a single on-disk directory with one file per application and worker.
@@ -32,69 +34,47 @@ import org.apache.spark.Logging
  */
 private[spark] class FileSystemPersistenceEngine(
     val dir: String,
-    val serialization: Serialization)
+    val serialization: Serializer)
   extends PersistenceEngine with Logging {
 
+  val serializer = serialization.newInstance()
   new File(dir).mkdir()
 
-  override def addApplication(app: ApplicationInfo) {
-    val appFile = new File(dir + File.separator + "app_" + app.id)
-    serializeIntoFile(appFile, app)
-  }
-
-  override def removeApplication(app: ApplicationInfo) {
-    new File(dir + File.separator + "app_" + app.id).delete()
-  }
-
-  override def addDriver(driver: DriverInfo) {
-    val driverFile = new File(dir + File.separator + "driver_" + driver.id)
-    serializeIntoFile(driverFile, driver)
-  }
-
-  override def removeDriver(driver: DriverInfo) {
-    new File(dir + File.separator + "driver_" + driver.id).delete()
-  }
-
-  override def addWorker(worker: WorkerInfo) {
-    val workerFile = new File(dir + File.separator + "worker_" + worker.id)
-    serializeIntoFile(workerFile, worker)
+  override def persist(name: String, obj: Object): Unit = {
+    serializeIntoFile(new File(dir + File.separator + name), obj)
   }
 
-  override def removeWorker(worker: WorkerInfo) {
-    new File(dir + File.separator + "worker_" + worker.id).delete()
+  override def unpersist(name: String): Unit = {
+    new File(dir + File.separator + name).delete()
   }
 
-  override def readPersistedData(): (Seq[ApplicationInfo], Seq[DriverInfo], Seq[WorkerInfo]) = {
-    val sortedFiles = new File(dir).listFiles().sortBy(_.getName)
-    val appFiles = sortedFiles.filter(_.getName.startsWith("app_"))
-    val apps = appFiles.map(deserializeFromFile[ApplicationInfo])
-    val driverFiles = sortedFiles.filter(_.getName.startsWith("driver_"))
-    val drivers = driverFiles.map(deserializeFromFile[DriverInfo])
-    val workerFiles = sortedFiles.filter(_.getName.startsWith("worker_"))
-    val workers = workerFiles.map(deserializeFromFile[WorkerInfo])
-    (apps, drivers, workers)
+  override def read[T: ClassTag](prefix: String) = {
+    val files = new File(dir).listFiles().filter(_.getName.startsWith(prefix))
+    files.map(deserializeFromFile[T])
   }
 
   private def serializeIntoFile(file: File, value: AnyRef) {
     val created = file.createNewFile()
     if (!created) { throw new IllegalStateException("Could not create file: " + file) }
 
-    val serializer = serialization.findSerializerFor(value)
-    val serialized = serializer.toBinary(value)
+    val out = serializer.serializeStream(new FileOutputStream(file))   
+    try {
+      out.writeObject(value)
+    } finally {
+      out.close()
+    }
 
-    val out = new FileOutputStream(file)
-    out.write(serialized)
-    out.close()
   }
 
-  def deserializeFromFile[T](file: File)(implicit m: Manifest[T]): T = {
+  def deserializeFromFile[T](file: File): T = {
     val fileData = new Array[Byte](file.length().asInstanceOf[Int])
     val dis = new DataInputStream(new FileInputStream(file))
-    dis.readFully(fileData)
-    dis.close()
+    try {
+      dis.readFully(fileData)
+    } finally {
+      dis.close()
+    }
 
-    val clazz = m.runtimeClass.asInstanceOf[Class[T]]
-    val serializer = serialization.serializerFor(clazz)
-    serializer.fromBinary(fileData).asInstanceOf[T]
+    serializer.deserializeStream(dis).readObject()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala b/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
index 4433a2ec29be6..cf77c86d760cf 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
@@ -17,30 +17,27 @@
 
 package org.apache.spark.deploy.master
 
-import akka.actor.{Actor, ActorRef}
-
-import org.apache.spark.deploy.master.MasterMessages.ElectedLeader
+import org.apache.spark.annotation.DeveloperApi
 
 /**
- * A LeaderElectionAgent keeps track of whether the current Master is the leader, meaning it
- * is the only Master serving requests.
- * In addition to the API provided, the LeaderElectionAgent will use of the following messages
- * to inform the Master of leader changes:
- * [[org.apache.spark.deploy.master.MasterMessages.ElectedLeader ElectedLeader]]
- * [[org.apache.spark.deploy.master.MasterMessages.RevokedLeadership RevokedLeadership]]
+ * :: DeveloperApi ::
+ *
+ * A LeaderElectionAgent tracks current master and is a common interface for all election Agents.
  */
-private[spark] trait LeaderElectionAgent extends Actor {
-  // TODO: LeaderElectionAgent does not necessary to be an Actor anymore, need refactoring.
-  val masterActor: ActorRef
+@DeveloperApi
+trait LeaderElectionAgent {
+  val masterActor: LeaderElectable
+  def stop() {} // to avoid noops in implementations.
 }
 
-/** Single-node implementation of LeaderElectionAgent -- we're initially and always the leader. */
-private[spark] class MonarchyLeaderAgent(val masterActor: ActorRef) extends LeaderElectionAgent {
-  override def preStart() {
-    masterActor ! ElectedLeader
-  }
+@DeveloperApi
+trait LeaderElectable {
+  def electedLeader()
+  def revokedLeadership()
+}
 
-  override def receive = {
-    case _ =>
-  }
+/** Single-node implementation of LeaderElectionAgent -- we're initially and always the leader. */
+private[spark] class MonarchyLeaderAgent(val masterActor: LeaderElectable)
+  extends LeaderElectionAgent {
+  masterActor.electedLeader()
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index a304102a49086..021454e25804c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.deploy.master
 
+import java.net.URLEncoder
 import java.text.SimpleDateFormat
 import java.util.Date
 
@@ -30,25 +31,26 @@ import akka.actor._
 import akka.pattern.ask
 import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
 import akka.serialization.SerializationExtension
-import org.apache.hadoop.fs.FileSystem
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException}
-import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState}
+import org.apache.spark.deploy.{ApplicationDescription, DriverDescription,
+  ExecutorState, SparkHadoopUtil}
 import org.apache.spark.deploy.DeployMessages._
+import org.apache.spark.deploy.history.HistoryServer
 import org.apache.spark.deploy.master.DriverState.DriverState
 import org.apache.spark.deploy.master.MasterMessages._
 import org.apache.spark.deploy.master.ui.MasterWebUI
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.scheduler.{EventLoggingListener, ReplayListenerBus}
 import org.apache.spark.ui.SparkUI
-import org.apache.spark.util.{AkkaUtils, SignalLogger, Utils}
+import org.apache.spark.util.{ActorLogReceive, AkkaUtils, SignalLogger, Utils}
 
 private[spark] class Master(
     host: String,
     port: Int,
     webUiPort: Int,
     val securityMgr: SecurityManager)
-  extends Actor with Logging {
+  extends Actor with ActorLogReceive with Logging with LeaderElectable {
 
   import context.dispatcher   // to use Akka's scheduler.schedule()
 
@@ -57,8 +59,8 @@ private[spark] class Master(
   def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss")  // For application IDs
   val WORKER_TIMEOUT = conf.getLong("spark.worker.timeout", 60) * 1000
   val RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
+  val RETAINED_DRIVERS = conf.getInt("spark.deploy.retainedDrivers", 200)
   val REAPER_ITERATIONS = conf.getInt("spark.dead.worker.persistence", 15)
-  val RECOVERY_DIR = conf.get("spark.deploy.recoveryDirectory", "")
   val RECOVERY_MODE = conf.get("spark.deploy.recoveryMode", "NONE")
 
   val workers = new HashSet[WorkerInfo]
@@ -72,9 +74,7 @@ private[spark] class Master(
   val waitingApps = new ArrayBuffer[ApplicationInfo]
   val completedApps = new ArrayBuffer[ApplicationInfo]
   var nextAppNumber = 0
-
   val appIdToUI = new HashMap[String, SparkUI]
-  val fileSystemsUsed = new HashSet[FileSystem]
 
   val drivers = new HashSet[DriverInfo]
   val completedDrivers = new ArrayBuffer[DriverInfo]
@@ -102,7 +102,7 @@ private[spark] class Master(
 
   var persistenceEngine: PersistenceEngine = _
 
-  var leaderElectionAgent: ActorRef = _
+  var leaderElectionAgent: LeaderElectionAgent = _
 
   private var recoveryCompletionTask: Cancellable = _
 
@@ -129,23 +129,24 @@ private[spark] class Master(
     masterMetricsSystem.start()
     applicationMetricsSystem.start()
 
-    persistenceEngine = RECOVERY_MODE match {
+    val (persistenceEngine_, leaderElectionAgent_) = RECOVERY_MODE match {
       case "ZOOKEEPER" =>
         logInfo("Persisting recovery state to ZooKeeper")
-        new ZooKeeperPersistenceEngine(SerializationExtension(context.system), conf)
+        val zkFactory = new ZooKeeperRecoveryModeFactory(conf)
+        (zkFactory.createPersistenceEngine(), zkFactory.createLeaderElectionAgent(this))
       case "FILESYSTEM" =>
-        logInfo("Persisting recovery state to directory: " + RECOVERY_DIR)
-        new FileSystemPersistenceEngine(RECOVERY_DIR, SerializationExtension(context.system))
+        val fsFactory = new FileSystemRecoveryModeFactory(conf)
+        (fsFactory.createPersistenceEngine(), fsFactory.createLeaderElectionAgent(this))
+      case "CUSTOM" =>
+        val clazz = Class.forName(conf.get("spark.deploy.recoveryMode.factory"))
+        val factory = clazz.getConstructor(conf.getClass)
+          .newInstance(conf).asInstanceOf[StandaloneRecoveryModeFactory]
+        (factory.createPersistenceEngine(), factory.createLeaderElectionAgent(this))
       case _ =>
-        new BlackHolePersistenceEngine()
+        (new BlackHolePersistenceEngine(), new MonarchyLeaderAgent(this))
     }
-
-    leaderElectionAgent = RECOVERY_MODE match {
-        case "ZOOKEEPER" =>
-          context.actorOf(Props(classOf[ZooKeeperLeaderElectionAgent], self, masterUrl, conf))
-        case _ =>
-          context.actorOf(Props(classOf[MonarchyLeaderAgent], self))
-      }
+    persistenceEngine = persistenceEngine_
+    leaderElectionAgent = leaderElectionAgent_
   }
 
   override def preRestart(reason: Throwable, message: Option[Any]) {
@@ -154,19 +155,28 @@ private[spark] class Master(
   }
 
   override def postStop() {
+    masterMetricsSystem.report()
+    applicationMetricsSystem.report()
     // prevent the CompleteRecovery message sending to restarted master
     if (recoveryCompletionTask != null) {
       recoveryCompletionTask.cancel()
     }
     webUi.stop()
-    fileSystemsUsed.foreach(_.close())
     masterMetricsSystem.stop()
     applicationMetricsSystem.stop()
     persistenceEngine.close()
-    context.stop(leaderElectionAgent)
+    leaderElectionAgent.stop()
+  }
+
+  override def electedLeader() {
+    self ! ElectedLeader
+  }
+
+  override def revokedLeadership() {
+    self ! RevokedLeadership
   }
 
-  override def receive = {
+  override def receiveWithLogging = {
     case ElectedLeader => {
       val (storedApps, storedDrivers, storedWorkers) = persistenceEngine.readPersistedData()
       state = if (storedApps.isEmpty && storedDrivers.isEmpty && storedWorkers.isEmpty) {
@@ -294,28 +304,34 @@ private[spark] class Master(
       val execOption = idToApp.get(appId).flatMap(app => app.executors.get(execId))
       execOption match {
         case Some(exec) => {
+          val appInfo = idToApp(appId)
           exec.state = state
+          if (state == ExecutorState.RUNNING) { appInfo.resetRetryCount() }
           exec.application.driver ! ExecutorUpdated(execId, state, message, exitStatus)
           if (ExecutorState.isFinished(state)) {
-            val appInfo = idToApp(appId)
             // Remove this executor from the worker and app
-            logInfo("Removing executor " + exec.fullId + " because it is " + state)
+            logInfo(s"Removing executor ${exec.fullId} because it is $state")
             appInfo.removeExecutor(exec)
             exec.worker.removeExecutor(exec)
 
-            val normalExit = exitStatus.exists(_ == 0)
+            val normalExit = exitStatus == Some(0)
             // Only retry certain number of times so we don't go into an infinite loop.
-            if (!normalExit && appInfo.incrementRetryCount < ApplicationState.MAX_NUM_RETRY) {
-              schedule()
-            } else if (!normalExit) {
-              logError("Application %s with ID %s failed %d times, removing it".format(
-                appInfo.desc.name, appInfo.id, appInfo.retryCount))
-              removeApplication(appInfo, ApplicationState.FAILED)
+            if (!normalExit) {
+              if (appInfo.incrementRetryCount() < ApplicationState.MAX_NUM_RETRY) {
+                schedule()
+              } else {
+                val execs = appInfo.executors.values
+                if (!execs.exists(_.state == ExecutorState.RUNNING)) {
+                  logError(s"Application ${appInfo.desc.name} with ID ${appInfo.id} failed " +
+                    s"${appInfo.retryCount} times; removing it")
+                  removeApplication(appInfo, ApplicationState.FAILED)
+                }
+              }
             }
           }
         }
         case None =>
-          logWarning("Got status update for unknown executor " + appId + "/" + execId)
+          logWarning(s"Got status update for unknown executor $appId/$execId")
       }
     }
 
@@ -333,7 +349,14 @@ private[spark] class Master(
         case Some(workerInfo) =>
           workerInfo.lastHeartbeat = System.currentTimeMillis()
         case None =>
-          logWarning("Got heartbeat from unregistered worker " + workerId)
+          if (workers.map(_.id).contains(workerId)) {
+            logWarning(s"Got heartbeat from unregistered worker $workerId." +
+              " Asking it to re-register.")
+            sender ! ReconnectWorker(masterUrl)
+          } else {
+            logWarning(s"Got heartbeat from unregistered worker $workerId." +
+              " This worker was never registered, so ignoring the heartbeat.")
+          }
       }
     }
 
@@ -479,13 +502,26 @@ private[spark] class Master(
     if (state != RecoveryState.ALIVE) { return }
 
     // First schedule drivers, they take strict precedence over applications
-    val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers
-    for (worker <- shuffledWorkers if worker.state == WorkerState.ALIVE) {
-      for (driver <- List(waitingDrivers: _*)) { // iterate over a copy of waitingDrivers
+    // Randomization helps balance drivers
+    val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
+    val numWorkersAlive = shuffledAliveWorkers.size
+    var curPos = 0
+    
+    for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers
+      // We assign workers to each waiting driver in a round-robin fashion. For each driver, we
+      // start from the last worker that was assigned a driver, and continue onwards until we have
+      // explored all alive workers.
+      var launched = false
+      var numWorkersVisited = 0
+      while (numWorkersVisited < numWorkersAlive && !launched) {
+        val worker = shuffledAliveWorkers(curPos)
+        numWorkersVisited += 1
         if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
           launchDriver(worker, driver)
           waitingDrivers -= driver
+          launched = true
         }
+        curPos = (curPos + 1) % numWorkersAlive
       }
     }
 
@@ -644,10 +680,7 @@ private[spark] class Master(
       waitingApps -= app
 
       // If application events are logged, use them to rebuild the UI
-      if (!rebuildSparkUI(app)) {
-        // Avoid broken links if the UI is not reconstructed
-        app.desc.appUiUrl = ""
-      }
+      rebuildSparkUI(app)
 
       for (exec <- app.executors.values) {
         exec.worker.removeExecutor(exec)
@@ -669,29 +702,52 @@ private[spark] class Master(
    */
   def rebuildSparkUI(app: ApplicationInfo): Boolean = {
     val appName = app.desc.name
-    val eventLogDir = app.desc.eventLogDir.getOrElse { return false }
-    val fileSystem = Utils.getHadoopFileSystem(eventLogDir)
-    val eventLogInfo = EventLoggingListener.parseLoggingInfo(eventLogDir, fileSystem)
+    val notFoundBasePath = HistoryServer.UI_PATH_PREFIX + "/not-found"
+    val eventLogDir = app.desc.eventLogDir.getOrElse {
+      // Event logging is not enabled for this application
+      app.desc.appUiUrl = notFoundBasePath
+      return false
+    }
+
+    val appEventLogDir = EventLoggingListener.getLogDirPath(eventLogDir, app.id)
+    val fileSystem = Utils.getHadoopFileSystem(appEventLogDir,
+      SparkHadoopUtil.get.newConfiguration(conf))
+    val eventLogInfo = EventLoggingListener.parseLoggingInfo(appEventLogDir, fileSystem)
     val eventLogPaths = eventLogInfo.logPaths
     val compressionCodec = eventLogInfo.compressionCodec
-    if (!eventLogPaths.isEmpty) {
-      try {
-        val replayBus = new ReplayListenerBus(eventLogPaths, fileSystem, compressionCodec)
-        val ui = new SparkUI(
-          new SparkConf, replayBus, appName + " (completed)", "/history/" + app.id)
-        replayBus.replay()
-        app.desc.appUiUrl = ui.basePath
-        appIdToUI(app.id) = ui
-        webUi.attachSparkUI(ui)
-        return true
-      } catch {
-        case e: Exception =>
-          logError("Exception in replaying log for application %s (%s)".format(appName, app.id), e)
-      }
-    } else {
-      logWarning("Application %s (%s) has no valid logs: %s".format(appName, app.id, eventLogDir))
+
+    if (eventLogPaths.isEmpty) {
+      // Event logging is enabled for this application, but no event logs are found
+      val title = s"Application history not found (${app.id})"
+      var msg = s"No event logs found for application $appName in $appEventLogDir."
+      logWarning(msg)
+      msg += " Did you specify the correct logging directory?"
+      msg = URLEncoder.encode(msg, "UTF-8")
+      app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&title=$title"
+      return false
+    }
+
+    try {
+      val replayBus = new ReplayListenerBus(eventLogPaths, fileSystem, compressionCodec)
+      val ui = SparkUI.createHistoryUI(new SparkConf, replayBus, new SecurityManager(conf),
+        appName + " (completed)", HistoryServer.UI_PATH_PREFIX + s"/${app.id}")
+      replayBus.replay()
+      appIdToUI(app.id) = ui
+      webUi.attachSparkUI(ui)
+      // Application UI is successfully rebuilt, so link the Master UI to it
+      app.desc.appUiUrl = ui.getBasePath
+      true
+    } catch {
+      case e: Exception =>
+        // Relay exception message to application UI page
+        val title = s"Application history load error (${app.id})"
+        val exception = URLEncoder.encode(Utils.exceptionString(e), "UTF-8")
+        var msg = s"Exception in replaying log for application $appName!"
+        logError(msg, e)
+        msg = URLEncoder.encode(msg, "UTF-8")
+        app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&exception=$exception&title=$title"
+        false
     }
-    false
   }
 
   /** Generate a new app ID given a app's submission date */
@@ -744,11 +800,16 @@ private[spark] class Master(
       case Some(driver) =>
         logInfo(s"Removing driver: $driverId")
         drivers -= driver
+        if (completedDrivers.size >= RETAINED_DRIVERS) {
+          val toRemove = math.max(RETAINED_DRIVERS / 10, 1)
+          completedDrivers.trimStart(toRemove)
+        }
         completedDrivers += driver
         persistenceEngine.removeDriver(driver)
         driver.state = finalState
         driver.exception = exception
         driver.worker.foreach(w => w.removeDriver(driver))
+        schedule()
       case None =>
         logWarning(s"Asked to remove unknown driver: $driverId")
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
index a87781fb93850..e34bee7854292 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
@@ -27,6 +27,7 @@ private[spark] class MasterArguments(args: Array[String], conf: SparkConf) {
   var host = Utils.localHostName()
   var port = 7077
   var webUiPort = 8080
+  var propertiesFile: String = null
 
   // Check for settings in environment variables
   if (System.getenv("SPARK_MASTER_HOST") != null) {
@@ -38,12 +39,16 @@ private[spark] class MasterArguments(args: Array[String], conf: SparkConf) {
   if (System.getenv("SPARK_MASTER_WEBUI_PORT") != null) {
     webUiPort = System.getenv("SPARK_MASTER_WEBUI_PORT").toInt
   }
-  if (conf.contains("master.ui.port")) {
-    webUiPort = conf.get("master.ui.port").toInt
-  }
 
   parse(args.toList)
 
+  // This mutates the SparkConf, so all accesses to it must be made after this line
+  propertiesFile = Utils.loadDefaultSparkProperties(conf, propertiesFile)
+
+  if (conf.contains("spark.master.ui.port")) {
+    webUiPort = conf.get("spark.master.ui.port").toInt
+  }
+
   def parse(args: List[String]): Unit = args match {
     case ("--ip" | "-i") :: value :: tail =>
       Utils.checkHost(value, "ip no longer supported, please use hostname " + value)
@@ -63,7 +68,11 @@ private[spark] class MasterArguments(args: Array[String], conf: SparkConf) {
       webUiPort = value
       parse(tail)
 
-    case ("--help" | "-h") :: tail =>
+    case ("--properties-file") :: value :: tail =>
+      propertiesFile = value
+      parse(tail)
+
+    case ("--help") :: tail =>
       printUsageAndExit(0)
 
     case Nil => {}
@@ -83,7 +92,9 @@ private[spark] class MasterArguments(args: Array[String], conf: SparkConf) {
       "  -i HOST, --ip HOST     Hostname to listen on (deprecated, please use --host or -h) \n" +
       "  -h HOST, --host HOST   Hostname to listen on\n" +
       "  -p PORT, --port PORT   Port to listen on (default: 7077)\n" +
-      "  --webui-port PORT      Port for web UI (default: 8080)")
+      "  --webui-port PORT      Port for web UI (default: 8080)\n" +
+      "  --properties-file FILE Path to a custom Spark properties file.\n" +
+      "                         Default is conf/spark-defaults.conf.")
     System.exit(exitCode)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
index 36c1b87b7f684..9c3f79f1244b7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
@@ -22,8 +22,8 @@ import com.codahale.metrics.{Gauge, MetricRegistry}
 import org.apache.spark.metrics.source.Source
 
 private[spark] class MasterSource(val master: Master) extends Source {
-  val metricRegistry = new MetricRegistry()
-  val sourceName = "master"
+  override val metricRegistry = new MetricRegistry()
+  override val sourceName = "master"
 
   // Gauge for worker numbers in cluster
   metricRegistry.register(MetricRegistry.name("workers"), new Gauge[Int] {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
index e3640ea4f7e64..2e0e1e7036ac8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.deploy.master
 
+import org.apache.spark.annotation.DeveloperApi
+
+import scala.reflect.ClassTag
+
 /**
  * Allows Master to persist any state that is necessary in order to recover from a failure.
  * The following semantics are required:
@@ -25,36 +29,70 @@ package org.apache.spark.deploy.master
  * Given these two requirements, we will have all apps and workers persisted, but
  * we might not have yet deleted apps or workers that finished (so their liveness must be verified
  * during recovery).
+ *
+ * The implementation of this trait defines how name-object pairs are stored or retrieved.
  */
-private[spark] trait PersistenceEngine {
-  def addApplication(app: ApplicationInfo)
+@DeveloperApi
+trait PersistenceEngine {
 
-  def removeApplication(app: ApplicationInfo)
+  /**
+   * Defines how the object is serialized and persisted. Implementation will
+   * depend on the store used.
+   */
+  def persist(name: String, obj: Object)
 
-  def addWorker(worker: WorkerInfo)
+  /**
+   * Defines how the object referred by its name is removed from the store.
+   */
+  def unpersist(name: String)
 
-  def removeWorker(worker: WorkerInfo)
+  /**
+   * Gives all objects, matching a prefix. This defines how objects are
+   * read/deserialized back.
+   */
+  def read[T: ClassTag](prefix: String): Seq[T]
 
-  def addDriver(driver: DriverInfo)
+  final def addApplication(app: ApplicationInfo): Unit = {
+    persist("app_" + app.id, app)
+  }
 
-  def removeDriver(driver: DriverInfo)
+  final def removeApplication(app: ApplicationInfo): Unit = {
+    unpersist("app_" + app.id)
+  }
+
+  final def addWorker(worker: WorkerInfo): Unit = {
+    persist("worker_" + worker.id, worker)
+  }
+
+  final def removeWorker(worker: WorkerInfo): Unit = {
+    unpersist("worker_" + worker.id)
+  }
+
+  final def addDriver(driver: DriverInfo): Unit = {
+    persist("driver_" + driver.id, driver)
+  }
+
+  final def removeDriver(driver: DriverInfo): Unit = {
+    unpersist("driver_" + driver.id)
+  }
 
   /**
    * Returns the persisted data sorted by their respective ids (which implies that they're
    * sorted by time of creation).
    */
-  def readPersistedData(): (Seq[ApplicationInfo], Seq[DriverInfo], Seq[WorkerInfo])
+  final def readPersistedData(): (Seq[ApplicationInfo], Seq[DriverInfo], Seq[WorkerInfo]) = {
+    (read[ApplicationInfo]("app_"), read[DriverInfo]("driver_"), read[WorkerInfo]("worker_"))
+  }
 
   def close() {}
 }
 
 private[spark] class BlackHolePersistenceEngine extends PersistenceEngine {
-  override def addApplication(app: ApplicationInfo) {}
-  override def removeApplication(app: ApplicationInfo) {}
-  override def addWorker(worker: WorkerInfo) {}
-  override def removeWorker(worker: WorkerInfo) {}
-  override def addDriver(driver: DriverInfo) {}
-  override def removeDriver(driver: DriverInfo) {}
-
-  override def readPersistedData() = (Nil, Nil, Nil)
+
+  override def persist(name: String, obj: Object): Unit = {}
+
+  override def unpersist(name: String): Unit = {}
+
+  override def read[T: ClassTag](name: String): Seq[T] = Nil
+
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
new file mode 100644
index 0000000000000..d9d36c1ed5f9f
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.master
+
+import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.serializer.JavaSerializer
+
+/**
+ * ::DeveloperApi::
+ *
+ * Implementation of this class can be plugged in as recovery mode alternative for Spark's
+ * Standalone mode.
+ *
+ */
+@DeveloperApi
+abstract class StandaloneRecoveryModeFactory(conf: SparkConf) {
+
+  /**
+   * PersistenceEngine defines how the persistent data(Information about worker, driver etc..)
+   * is handled for recovery.
+   *
+   */
+  def createPersistenceEngine(): PersistenceEngine
+
+  /**
+   * Create an instance of LeaderAgent that decides who gets elected as master.
+   */
+  def createLeaderElectionAgent(master: LeaderElectable): LeaderElectionAgent
+}
+
+/**
+ * LeaderAgent in this case is a no-op. Since leader is forever leader as the actual
+ * recovery is made by restoring from filesystem.
+ */
+private[spark] class FileSystemRecoveryModeFactory(conf: SparkConf)
+  extends StandaloneRecoveryModeFactory(conf) with Logging {
+  val RECOVERY_DIR = conf.get("spark.deploy.recoveryDirectory", "")
+
+  def createPersistenceEngine() = {
+    logInfo("Persisting recovery state to directory: " + RECOVERY_DIR)
+    new FileSystemPersistenceEngine(RECOVERY_DIR, new JavaSerializer(conf))
+  }
+
+  def createLeaderElectionAgent(master: LeaderElectable) = new MonarchyLeaderAgent(master)
+}
+
+private[spark] class ZooKeeperRecoveryModeFactory(conf: SparkConf)
+  extends StandaloneRecoveryModeFactory(conf) {
+  def createPersistenceEngine() = new ZooKeeperPersistenceEngine(new JavaSerializer(conf), conf)
+
+  def createLeaderElectionAgent(master: LeaderElectable) =
+    new ZooKeeperLeaderElectionAgent(master, conf)
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
index c5fa9cf7d7c2d..473ddc23ff0f3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable
 
 import akka.actor.ActorRef
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.Utils
 
 private[spark] class WorkerInfo(
@@ -50,7 +51,7 @@ private[spark] class WorkerInfo(
   def coresFree: Int = cores - coresUsed
   def memoryFree: Int = memory - memoryUsed
 
-  private def readObject(in: java.io.ObjectInputStream) : Unit = {
+  private def readObject(in: java.io.ObjectInputStream): Unit = Utils.tryOrIOException {
     in.defaultReadObject()
     init()
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
index 285f9b014e291..8eaa0ad948519 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
@@ -24,9 +24,8 @@ import org.apache.spark.deploy.master.MasterMessages._
 import org.apache.curator.framework.CuratorFramework
 import org.apache.curator.framework.recipes.leader.{LeaderLatchListener, LeaderLatch}
 
-private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: ActorRef,
-    masterUrl: String, conf: SparkConf)
-  extends LeaderElectionAgent with LeaderLatchListener with Logging  {
+private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: LeaderElectable,
+    conf: SparkConf) extends LeaderLatchListener with LeaderElectionAgent with Logging  {
 
   val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/leader_election"
 
@@ -34,30 +33,21 @@ private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: ActorRef,
   private var leaderLatch: LeaderLatch = _
   private var status = LeadershipStatus.NOT_LEADER
 
-  override def preStart() {
+  start()
 
+  def start() {
     logInfo("Starting ZooKeeper LeaderElection agent")
     zk = SparkCuratorUtil.newClient(conf)
     leaderLatch = new LeaderLatch(zk, WORKING_DIR)
     leaderLatch.addListener(this)
-
     leaderLatch.start()
   }
 
-  override def preRestart(reason: scala.Throwable, message: scala.Option[scala.Any]) {
-    logError("LeaderElectionAgent failed...", reason)
-    super.preRestart(reason, message)
-  }
-
-  override def postStop() {
+  override def stop() {
     leaderLatch.close()
     zk.close()
   }
 
-  override def receive = {
-    case _ =>
-  }
-
   override def isLeader() {
     synchronized {
       // could have lost leadership by now.
@@ -85,10 +75,10 @@ private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: ActorRef,
   def updateLeadershipStatus(isLeader: Boolean) {
     if (isLeader && status == LeadershipStatus.NOT_LEADER) {
       status = LeadershipStatus.LEADER
-      masterActor ! ElectedLeader
+      masterActor.electedLeader()
     } else if (!isLeader && status == LeadershipStatus.LEADER) {
       status = LeadershipStatus.NOT_LEADER
-      masterActor ! RevokedLeadership
+      masterActor.revokedLeadership()
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
index 834dfedee52ce..96c2139eb02f0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
@@ -19,72 +19,54 @@ package org.apache.spark.deploy.master
 
 import scala.collection.JavaConversions._
 
-import akka.serialization.Serialization
 import org.apache.curator.framework.CuratorFramework
 import org.apache.zookeeper.CreateMode
 
 import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.serializer.Serializer
+import java.nio.ByteBuffer
 
-class ZooKeeperPersistenceEngine(serialization: Serialization, conf: SparkConf)
+import scala.reflect.ClassTag
+
+
+private[spark] class ZooKeeperPersistenceEngine(val serialization: Serializer, conf: SparkConf)
   extends PersistenceEngine
   with Logging
 {
   val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/master_status"
   val zk: CuratorFramework = SparkCuratorUtil.newClient(conf)
 
-  SparkCuratorUtil.mkdir(zk, WORKING_DIR)
-
-  override def addApplication(app: ApplicationInfo) {
-    serializeIntoFile(WORKING_DIR + "/app_" + app.id, app)
-  }
+  val serializer = serialization.newInstance()
 
-  override def removeApplication(app: ApplicationInfo) {
-    zk.delete().forPath(WORKING_DIR + "/app_" + app.id)
-  }
+  SparkCuratorUtil.mkdir(zk, WORKING_DIR)
 
-  override def addDriver(driver: DriverInfo) {
-    serializeIntoFile(WORKING_DIR + "/driver_" + driver.id, driver)
-  }
 
-  override def removeDriver(driver: DriverInfo) {
-    zk.delete().forPath(WORKING_DIR + "/driver_" + driver.id)
+  override def persist(name: String, obj: Object): Unit = {
+    serializeIntoFile(WORKING_DIR + "/" + name, obj)
   }
 
-  override def addWorker(worker: WorkerInfo) {
-    serializeIntoFile(WORKING_DIR + "/worker_" + worker.id, worker)
+  override def unpersist(name: String): Unit = {
+    zk.delete().forPath(WORKING_DIR + "/" + name)
   }
 
-  override def removeWorker(worker: WorkerInfo) {
-    zk.delete().forPath(WORKING_DIR + "/worker_" + worker.id)
+  override def read[T: ClassTag](prefix: String) = {
+    val file = zk.getChildren.forPath(WORKING_DIR).filter(_.startsWith(prefix))
+    file.map(deserializeFromFile[T]).flatten
   }
 
   override def close() {
     zk.close()
   }
 
-  override def readPersistedData(): (Seq[ApplicationInfo], Seq[DriverInfo], Seq[WorkerInfo]) = {
-    val sortedFiles = zk.getChildren().forPath(WORKING_DIR).toList.sorted
-    val appFiles = sortedFiles.filter(_.startsWith("app_"))
-    val apps = appFiles.map(deserializeFromFile[ApplicationInfo]).flatten
-    val driverFiles = sortedFiles.filter(_.startsWith("driver_"))
-    val drivers = driverFiles.map(deserializeFromFile[DriverInfo]).flatten
-    val workerFiles = sortedFiles.filter(_.startsWith("worker_"))
-    val workers = workerFiles.map(deserializeFromFile[WorkerInfo]).flatten
-    (apps, drivers, workers)
-  }
-
   private def serializeIntoFile(path: String, value: AnyRef) {
-    val serializer = serialization.findSerializerFor(value)
-    val serialized = serializer.toBinary(value)
-    zk.create().withMode(CreateMode.PERSISTENT).forPath(path, serialized)
+    val serialized = serializer.serialize(value)
+    zk.create().withMode(CreateMode.PERSISTENT).forPath(path, serialized.array())
   }
 
-  def deserializeFromFile[T](filename: String)(implicit m: Manifest[T]): Option[T] = {
+  def deserializeFromFile[T](filename: String): Option[T] = {
     val fileData = zk.getData().forPath(WORKING_DIR + "/" + filename)
-    val clazz = m.runtimeClass.asInstanceOf[Class[T]]
-    val serializer = serialization.serializerFor(clazz)
     try {
-      Some(serializer.fromBinary(fileData).asInstanceOf[T])
+      Some(serializer.deserialize(ByteBuffer.wrap(fileData)))
     } catch {
       case e: Exception => {
         logWarning("Exception while reading persisted file, deleting", e)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 34fa1429c86de..4588c130ef439 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -28,7 +28,7 @@ import org.json4s.JValue
 import org.apache.spark.deploy.{ExecutorState, JsonProtocol}
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.master.ExecutorInfo
-import org.apache.spark.ui.{WebUIPage, UIUtils}
+import org.apache.spark.ui.{UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
 private[spark] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app") {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/HistoryNotFoundPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/HistoryNotFoundPage.scala
new file mode 100644
index 0000000000000..d8daff3e7fb9c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/HistoryNotFoundPage.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.master.ui
+
+import java.net.URLDecoder
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.Node
+
+import org.apache.spark.ui.{UIUtils, WebUIPage}
+
+private[spark] class HistoryNotFoundPage(parent: MasterWebUI)
+  extends WebUIPage("history/not-found") {
+
+  /**
+   * Render a page that conveys failure in loading application history.
+   *
+   * This accepts 3 HTTP parameters:
+   *   msg = message to display to the user
+   *   title = title of the page
+   *   exception = detailed description of the exception in loading application history (if any)
+   *
+   * Parameters "msg" and "exception" are assumed to be UTF-8 encoded.
+   */
+  def render(request: HttpServletRequest): Seq[Node] = {
+    val titleParam = request.getParameter("title")
+    val msgParam = request.getParameter("msg")
+    val exceptionParam = request.getParameter("exception")
+
+    // If no parameters are specified, assume the user did not enable event logging
+    val defaultTitle = "Event logging is not enabled"
+    val defaultContent =
+      <div class="row-fluid">
+        <div class="span12" style="font-size:14px">
+          No event logs were found for this application! To
+          <a href="http://spark.apache.org/docs/latest/monitoring.html">enable event logging</a>,
+          set <span style="font-style:italic">spark.eventLog.enabled</span> to true and
+          <span style="font-style:italic">spark.eventLog.dir</span> to the directory to which your
+          event logs are written.
+        </div>
+      </div>
+
+    val title = Option(titleParam).getOrElse(defaultTitle)
+    val content = Option(msgParam)
+      .map { msg => URLDecoder.decode(msg, "UTF-8") }
+      .map { msg =>
+        <div class="row-fluid">
+          <div class="span12" style="font-size:14px">{msg}</div>
+        </div> ++
+        Option(exceptionParam)
+          .map { e => URLDecoder.decode(e, "UTF-8") }
+          .map { e => <pre>{e}</pre> }
+          .getOrElse(Seq.empty)
+      }.getOrElse(defaultContent)
+
+    UIUtils.basicSparkPage(content, title)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index a18b39fc95d64..d86ec1e03e45c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -21,14 +21,14 @@ import org.apache.spark.Logging
 import org.apache.spark.deploy.master.Master
 import org.apache.spark.ui.{SparkUI, WebUI}
 import org.apache.spark.ui.JettyUtils._
-import org.apache.spark.util.{AkkaUtils, Utils}
+import org.apache.spark.util.AkkaUtils
 
 /**
  * Web UI server for the standalone master.
  */
 private[spark]
 class MasterWebUI(val master: Master, requestedPort: Int)
-  extends WebUI(master.securityMgr, requestedPort, master.conf) with Logging {
+  extends WebUI(master.securityMgr, requestedPort, master.conf, name = "MasterUI") with Logging {
 
   val masterActorRef = master.self
   val timeout = AkkaUtils.askTimeout(master.conf)
@@ -38,6 +38,7 @@ class MasterWebUI(val master: Master, requestedPort: Int)
   /** Initialize all components of the server. */
   def initialize() {
     attachPage(new ApplicationPage(this))
+    attachPage(new HistoryNotFoundPage(this))
     attachPage(new MasterPage(this))
     attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static"))
     master.masterMetricsSystem.getServletHandlers.foreach(attachHandler)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
index 4af5bc3afad6c..28e9662db5da9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
@@ -20,6 +20,8 @@ package org.apache.spark.deploy.worker
 import java.io.{File, FileOutputStream, InputStream, IOException}
 import java.lang.System._
 
+import scala.collection.Map
+
 import org.apache.spark.Logging
 import org.apache.spark.deploy.Command
 import org.apache.spark.util.Utils
@@ -29,8 +31,30 @@ import org.apache.spark.util.Utils
  */
 private[spark]
 object CommandUtils extends Logging {
-  def buildCommandSeq(command: Command, memory: Int, sparkHome: String): Seq[String] = {
-    val runner = getEnv("JAVA_HOME", command).map(_ + "/bin/java").getOrElse("java")
+
+  /**
+   * Build a ProcessBuilder based on the given parameters.
+   * The `env` argument is exposed for testing.
+   */
+  def buildProcessBuilder(
+      command: Command,
+      memory: Int,
+      sparkHome: String,
+      substituteArguments: String => String,
+      classPaths: Seq[String] = Seq[String](),
+      env: Map[String, String] = sys.env): ProcessBuilder = {
+    val localCommand = buildLocalCommand(command, substituteArguments, classPaths, env)
+    val commandSeq = buildCommandSeq(localCommand, memory, sparkHome)
+    val builder = new ProcessBuilder(commandSeq: _*)
+    val environment = builder.environment()
+    for ((key, value) <- localCommand.environment) {
+      environment.put(key, value)
+    }
+    builder
+  }
+
+  private def buildCommandSeq(command: Command, memory: Int, sparkHome: String): Seq[String] = {
+    val runner = sys.env.get("JAVA_HOME").map(_ + "/bin/java").getOrElse("java")
 
     // SPARK-698: do not call the run.cmd script, as process.destroy()
     // fails to kill a process tree on Windows
@@ -38,16 +62,42 @@ object CommandUtils extends Logging {
       command.arguments
   }
 
-  private def getEnv(key: String, command: Command): Option[String] =
-    command.environment.get(key).orElse(Option(System.getenv(key)))
+  /**
+   * Build a command based on the given one, taking into account the local environment
+   * of where this command is expected to run, substitute any placeholders, and append
+   * any extra class paths.
+   */
+  private def buildLocalCommand(
+      command: Command,
+      substituteArguments: String => String,
+      classPath: Seq[String] = Seq[String](),
+      env: Map[String, String]): Command = {
+    val libraryPathName = Utils.libraryPathEnvName
+    val libraryPathEntries = command.libraryPathEntries
+    val cmdLibraryPath = command.environment.get(libraryPathName)
+
+    val newEnvironment = if (libraryPathEntries.nonEmpty && libraryPathName.nonEmpty) {
+      val libraryPaths = libraryPathEntries ++ cmdLibraryPath ++ env.get(libraryPathName)
+      command.environment + ((libraryPathName, libraryPaths.mkString(File.pathSeparator)))
+    } else {
+      command.environment
+    }
+
+    Command(
+      command.mainClass,
+      command.arguments.map(substituteArguments),
+      newEnvironment,
+      command.classPathEntries ++ classPath,
+      Seq[String](), // library path already captured in environment variable
+      command.javaOpts)
+  }
 
   /**
    * Attention: this must always be aligned with the environment variables in the run scripts and
    * the way the JAVA_OPTS are assembled there.
    */
-  def buildJavaOpts(command: Command, memory: Int, sparkHome: String): Seq[String] = {
+  private def buildJavaOpts(command: Command, memory: Int, sparkHome: String): Seq[String] = {
     val memoryOpts = Seq(s"-Xms${memory}M", s"-Xmx${memory}M")
-    val extraOpts = command.extraJavaOptions.map(Utils.splitCommandString).getOrElse(Seq())
 
     // Exists for backwards compatibility with older Spark versions
     val workerLocalOpts = Option(getenv("SPARK_JAVA_OPTS")).map(Utils.splitCommandString)
@@ -57,25 +107,17 @@ object CommandUtils extends Logging {
       logWarning("Set SPARK_LOCAL_DIRS for node-specific storage locations.")
     }
 
-    val libraryOpts =
-      if (command.libraryPathEntries.size > 0) {
-        val joined = command.libraryPathEntries.mkString(File.pathSeparator)
-        Seq(s"-Djava.library.path=$joined")
-      } else {
-         Seq()
-      }
-
-    val permGenOpt = Seq("-XX:MaxPermSize=128m")
-
     // Figure out our classpath with the external compute-classpath script
     val ext = if (System.getProperty("os.name").startsWith("Windows")) ".cmd" else ".sh"
     val classPath = Utils.executeAndGetOutput(
       Seq(sparkHome + "/bin/compute-classpath" + ext),
-      extraEnvironment=command.environment)
+      extraEnvironment = command.environment)
     val userClassPath = command.classPathEntries ++ Seq(classPath)
 
+    val javaVersion = System.getProperty("java.version")
+    val permGenOpt = if (!javaVersion.startsWith("1.8")) Some("-XX:MaxPermSize=128m") else None
     Seq("-cp", userClassPath.filterNot(_.isEmpty).mkString(File.pathSeparator)) ++
-      permGenOpt ++ libraryOpts ++ extraOpts ++ workerLocalOpts ++ memoryOpts
+      permGenOpt ++ workerLocalOpts ++ command.javaOpts ++ memoryOpts
   }
 
   /** Spawn a thread that will redirect a given stream to a file */
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index 662d37871e7a6..28cab36c7b9e2 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -23,21 +23,23 @@ import scala.collection.JavaConversions._
 import scala.collection.Map
 
 import akka.actor.ActorRef
-import com.google.common.base.Charsets
+import com.google.common.base.Charsets.UTF_8
 import com.google.common.io.Files
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileUtil, Path}
 
-import org.apache.spark.Logging
-import org.apache.spark.deploy.{Command, DriverDescription}
+import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.deploy.{Command, DriverDescription, SparkHadoopUtil}
 import org.apache.spark.deploy.DeployMessages.DriverStateChanged
 import org.apache.spark.deploy.master.DriverState
 import org.apache.spark.deploy.master.DriverState.DriverState
 
 /**
  * Manages the execution of one driver, including automatically restarting the driver on failure.
+ * This is currently only used in standalone cluster deploy mode.
  */
 private[spark] class DriverRunner(
+    val conf: SparkConf,
     val driverId: String,
     val workDir: File,
     val sparkHome: File,
@@ -74,17 +76,9 @@ private[spark] class DriverRunner(
 
           // Make sure user application jar is on the classpath
           // TODO: If we add ability to submit multiple jars they should also be added here
-          val classPath = driverDesc.command.classPathEntries ++ Seq(s"$localJarFilename")
-          val newCommand = Command(
-            driverDesc.command.mainClass,
-            driverDesc.command.arguments.map(substituteVariables),
-            driverDesc.command.environment,
-            classPath,
-            driverDesc.command.libraryPathEntries,
-            driverDesc.command.extraJavaOptions)
-          val command = CommandUtils.buildCommandSeq(newCommand, driverDesc.mem,
-            sparkHome.getAbsolutePath)
-          launchDriver(command, driverDesc.command.environment, driverDir, driverDesc.supervise)
+          val builder = CommandUtils.buildProcessBuilder(driverDesc.command, driverDesc.mem,
+            sparkHome.getAbsolutePath, substituteVariables, Seq(localJarFilename))
+          launchDriver(builder, driverDir, driverDesc.supervise)
         }
         catch {
           case e: Exception => finalException = Some(e)
@@ -143,8 +137,8 @@ private[spark] class DriverRunner(
 
     val jarPath = new Path(driverDesc.jarUrl)
 
-    val emptyConf = new Configuration()
-    val jarFileSystem = jarPath.getFileSystem(emptyConf)
+    val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
+    val jarFileSystem = jarPath.getFileSystem(hadoopConf)
 
     val destPath = new File(driverDir.getAbsolutePath, jarPath.getName)
     val jarFileName = jarPath.getName
@@ -153,7 +147,7 @@ private[spark] class DriverRunner(
 
     if (!localJarFile.exists()) { // May already exist if running multiple workers on one node
       logInfo(s"Copying user jar $jarPath to $destPath")
-      FileUtil.copy(jarFileSystem, jarPath, destPath, false, emptyConf)
+      FileUtil.copy(jarFileSystem, jarPath, destPath, false, hadoopConf)
     }
 
     if (!localJarFile.exists()) { // Verify copy succeeded
@@ -163,11 +157,8 @@ private[spark] class DriverRunner(
     localJarFilename
   }
 
-  private def launchDriver(command: Seq[String], envVars: Map[String, String], baseDir: File,
-                           supervise: Boolean) {
-    val builder = new ProcessBuilder(command: _*).directory(baseDir)
-    envVars.map{ case(k,v) => builder.environment().put(k, v) }
-
+  private def launchDriver(builder: ProcessBuilder, baseDir: File, supervise: Boolean) {
+    builder.directory(baseDir)
     def initialize(process: Process) = {
       // Redirect stdout and stderr to files
       val stdout = new File(baseDir, "stdout")
@@ -175,8 +166,8 @@ private[spark] class DriverRunner(
 
       val stderr = new File(baseDir, "stderr")
       val header = "Launch Command: %s\n%s\n\n".format(
-        command.mkString("\"", "\" \"", "\""), "=" * 40)
-      Files.append(header, stderr, Charsets.UTF_8)
+        builder.command.mkString("\"", "\" \"", "\""), "=" * 40)
+      Files.append(header, stderr, UTF_8)
       CommandUtils.redirectStream(process.getErrorStream, stderr)
     }
     runCommandWithRetry(ProcessBuilderLike(builder), initialize, supervise)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 467317dd9b44c..8ba6a01bbcb97 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -19,8 +19,10 @@ package org.apache.spark.deploy.worker
 
 import java.io._
 
+import scala.collection.JavaConversions._
+
 import akka.actor.ActorRef
-import com.google.common.base.Charsets
+import com.google.common.base.Charsets.UTF_8
 import com.google.common.io.Files
 
 import org.apache.spark.{SparkConf, Logging}
@@ -30,6 +32,7 @@ import org.apache.spark.util.logging.FileAppender
 
 /**
  * Manages the execution of one executor process.
+ * This is currently only used in standalone mode.
  */
 private[spark] class ExecutorRunner(
     val appId: String,
@@ -41,7 +44,7 @@ private[spark] class ExecutorRunner(
     val workerId: String,
     val host: String,
     val sparkHome: File,
-    val workDir: File,
+    val executorDir: File,
     val workerUrl: String,
     val conf: SparkConf,
     var state: ExecutorState.Value)
@@ -72,7 +75,7 @@ private[spark] class ExecutorRunner(
   }
 
   /**
-   * kill executor process, wait for exit and notify worker to update resource status
+   * Kill executor process, wait for exit and notify worker to update resource status.
    *
    * @param message the exception message which caused the executor's death 
    */
@@ -110,39 +113,25 @@ private[spark] class ExecutorRunner(
     case "{{EXECUTOR_ID}}" => execId.toString
     case "{{HOSTNAME}}" => host
     case "{{CORES}}" => cores.toString
+    case "{{APP_ID}}" => appId
     case other => other
   }
 
-  def getCommandSeq = {
-    val command = Command(appDesc.command.mainClass,
-      appDesc.command.arguments.map(substituteVariables) ++ Seq(appId), appDesc.command.environment,
-      appDesc.command.classPathEntries, appDesc.command.libraryPathEntries,
-      appDesc.command.extraJavaOptions)
-    CommandUtils.buildCommandSeq(command, memory, sparkHome.getAbsolutePath)
-  }
-
   /**
    * Download and run the executor described in our ApplicationDescription
    */
   def fetchAndRunExecutor() {
     try {
-      // Create the executor's working directory
-      val executorDir = new File(workDir, appId + "/" + execId)
-      if (!executorDir.mkdirs()) {
-        throw new IOException("Failed to create directory " + executorDir)
-      }
-
       // Launch the process
-      val command = getCommandSeq
+      val builder = CommandUtils.buildProcessBuilder(appDesc.command, memory,
+        sparkHome.getAbsolutePath, substituteVariables)
+      val command = builder.command()
       logInfo("Launch command: " + command.mkString("\"", "\" \"", "\""))
-      val builder = new ProcessBuilder(command: _*).directory(executorDir)
-      val env = builder.environment()
-      for ((key, value) <- appDesc.command.environment) {
-        env.put(key, value)
-      }
+
+      builder.directory(executorDir)
       // In case we are running this from within the Spark Shell, avoid creating a "scala"
       // parent process for the executor command
-      env.put("SPARK_LAUNCH_WITH_SCALA", "0")
+      builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "0")
       process = builder.start()
       val header = "Spark Executor Command: %s\n%s\n\n".format(
         command.mkString("\"", "\" \"", "\""), "=" * 40)
@@ -152,9 +141,11 @@ private[spark] class ExecutorRunner(
       stdoutAppender = FileAppender(process.getInputStream, stdout, conf)
 
       val stderr = new File(executorDir, "stderr")
-      Files.write(header, stderr, Charsets.UTF_8)
+      Files.write(header, stderr, UTF_8)
       stderrAppender = FileAppender(process.getErrorStream, stderr, conf)
 
+      state = ExecutorState.RUNNING
+      worker ! ExecutorStateChanged(appId, execId, state, None, None)
       // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
       // or with nonzero exit code
       val exitCode = process.waitFor()
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/StandaloneWorkerShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/worker/StandaloneWorkerShuffleService.scala
new file mode 100644
index 0000000000000..b9798963bab0a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/StandaloneWorkerShuffleService.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.worker
+
+import org.apache.spark.{Logging, SparkConf, SecurityManager}
+import org.apache.spark.network.TransportContext
+import org.apache.spark.network.netty.SparkTransportConf
+import org.apache.spark.network.sasl.SaslRpcHandler
+import org.apache.spark.network.server.TransportServer
+import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler
+
+/**
+ * Provides a server from which Executors can read shuffle files (rather than reading directly from
+ * each other), to provide uninterrupted access to the files in the face of executors being turned
+ * off or killed.
+ *
+ * Optionally requires SASL authentication in order to read. See [[SecurityManager]].
+ */
+private[worker]
+class StandaloneWorkerShuffleService(sparkConf: SparkConf, securityManager: SecurityManager)
+  extends Logging {
+
+  private val enabled = sparkConf.getBoolean("spark.shuffle.service.enabled", false)
+  private val port = sparkConf.getInt("spark.shuffle.service.port", 7337)
+  private val useSasl: Boolean = securityManager.isAuthenticationEnabled()
+
+  private val transportConf = SparkTransportConf.fromSparkConf(sparkConf, numUsableCores = 0)
+  private val blockHandler = new ExternalShuffleBlockHandler(transportConf)
+  private val transportContext: TransportContext = {
+    val handler = if (useSasl) new SaslRpcHandler(blockHandler, securityManager) else blockHandler
+    new TransportContext(transportConf, handler)
+  }
+
+  private var server: TransportServer = _
+
+  /** Starts the external shuffle service if the user has configured us to. */
+  def startIfEnabled() {
+    if (enabled) {
+      require(server == null, "Shuffle server already started")
+      logInfo(s"Starting shuffle service on port $port with useSasl = $useSasl")
+      server = transportContext.createServer(port)
+    }
+  }
+
+  def stop() {
+    if (enabled && server != null) {
+      server.close()
+      server = null
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index ce425443051b0..ca262de832e25 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -18,12 +18,16 @@
 package org.apache.spark.deploy.worker
 
 import java.io.File
+import java.io.IOException
 import java.text.SimpleDateFormat
-import java.util.Date
+import java.util.{UUID, Date}
+import java.util.concurrent.TimeUnit
 
+import scala.collection.JavaConversions._
 import scala.collection.mutable.HashMap
 import scala.concurrent.duration._
 import scala.language.postfixOps
+import scala.util.Random
 
 import akka.actor._
 import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
@@ -34,7 +38,7 @@ import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.{DriverState, Master}
 import org.apache.spark.deploy.worker.ui.WorkerWebUI
 import org.apache.spark.metrics.MetricsSystem
-import org.apache.spark.util.{AkkaUtils, SignalLogger, Utils}
+import org.apache.spark.util.{ActorLogReceive, AkkaUtils, SignalLogger, Utils}
 
 /**
   * @param masterUrls Each url should look like spark://host:port.
@@ -51,7 +55,7 @@ private[spark] class Worker(
     workDirPath: String = null,
     val conf: SparkConf,
     val securityMgr: SecurityManager)
-  extends Actor with Logging {
+  extends Actor with ActorLogReceive with Logging {
   import context.dispatcher
 
   Utils.checkHost(host, "Expected hostname")
@@ -62,8 +66,22 @@ private[spark] class Worker(
   // Send a heartbeat every (heartbeat timeout) / 4 milliseconds
   val HEARTBEAT_MILLIS = conf.getLong("spark.worker.timeout", 60) * 1000 / 4
 
-  val REGISTRATION_TIMEOUT = 20.seconds
-  val REGISTRATION_RETRIES = 3
+  // Model retries to connect to the master, after Hadoop's model.
+  // The first six attempts to reconnect are in shorter intervals (between 5 and 15 seconds)
+  // Afterwards, the next 10 attempts are between 30 and 90 seconds.
+  // A bit of randomness is introduced so that not all of the workers attempt to reconnect at
+  // the same time.
+  val INITIAL_REGISTRATION_RETRIES = 6
+  val TOTAL_REGISTRATION_RETRIES = INITIAL_REGISTRATION_RETRIES + 10
+  val FUZZ_MULTIPLIER_INTERVAL_LOWER_BOUND = 0.500
+  val REGISTRATION_RETRY_FUZZ_MULTIPLIER = {
+    val randomNumberGenerator = new Random(UUID.randomUUID.getMostSignificantBits)
+    randomNumberGenerator.nextDouble + FUZZ_MULTIPLIER_INTERVAL_LOWER_BOUND
+  }
+  val INITIAL_REGISTRATION_RETRY_INTERVAL = (math.round(10 *
+    REGISTRATION_RETRY_FUZZ_MULTIPLIER)).seconds
+  val PROLONGED_REGISTRATION_RETRY_INTERVAL = (math.round(60
+    * REGISTRATION_RETRY_FUZZ_MULTIPLIER)).seconds
 
   val CLEANUP_ENABLED = conf.getBoolean("spark.worker.cleanup.enabled", false)
   // How often worker will clean up old app folders
@@ -71,8 +89,7 @@ private[spark] class Worker(
   // TTL for app folders/data;  after TTL expires it will be cleaned up
   val APP_DATA_RETENTION_SECS = conf.getLong("spark.worker.cleanup.appDataTtl", 7 * 24 * 3600)
 
-
-  val masterLock: Object = new Object()
+  val testing: Boolean = sys.props.contains("spark.testing")
   var master: ActorSelection = null
   var masterAddress: Address = null
   var activeMasterUrl: String = ""
@@ -81,13 +98,22 @@ private[spark] class Worker(
   @volatile var registered = false
   @volatile var connected = false
   val workerId = generateWorkerId()
-  val sparkHome = new File(Option(System.getenv("SPARK_HOME")).getOrElse("."))
+  val sparkHome =
+    if (testing) {
+      assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
+      new File(sys.props("spark.test.home"))
+    } else {
+      new File(sys.env.get("SPARK_HOME").getOrElse("."))
+    }
   var workDir: File = null
   val executors = new HashMap[String, ExecutorRunner]
   val finishedExecutors = new HashMap[String, ExecutorRunner]
   val drivers = new HashMap[String, DriverRunner]
   val finishedDrivers = new HashMap[String, DriverRunner]
 
+  // The shuffle service is not actually started unless configured.
+  val shuffleService = new StandaloneWorkerShuffleService(conf, securityMgr)
+
   val publicAddress = {
     val envVar = System.getenv("SPARK_PUBLIC_DNS")
     if (envVar != null) envVar else host
@@ -96,6 +122,7 @@ private[spark] class Worker(
 
   var coresUsed = 0
   var memoryUsed = 0
+  var connectionAttemptCount = 0
 
   val metricsSystem = MetricsSystem.createMetricsSystem("worker", conf, securityMgr)
   val workerSource = new WorkerSource(this)
@@ -130,7 +157,8 @@ private[spark] class Worker(
     logInfo("Spark home: " + sparkHome)
     createWorkDir()
     context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
-    webUi = new WorkerWebUI(this, workDir, Some(webUiPort))
+    shuffleService.startIfEnabled()
+    webUi = new WorkerWebUI(this, workDir, webUiPort)
     webUi.bind()
     registerWithMaster()
 
@@ -139,21 +167,19 @@ private[spark] class Worker(
   }
 
   def changeMaster(url: String, uiUrl: String) {
-    masterLock.synchronized {
-      activeMasterUrl = url
-      activeMasterWebUiUrl = uiUrl
-      master = context.actorSelection(Master.toAkkaUrl(activeMasterUrl))
-      masterAddress = activeMasterUrl match {
-        case Master.sparkUrlRegex(_host, _port) =>
-          Address("akka.tcp", Master.systemName, _host, _port.toInt)
-        case x =>
-          throw new SparkException("Invalid spark URL: " + x)
-      }
-      connected = true
+    activeMasterUrl = url
+    activeMasterWebUiUrl = uiUrl
+    master = context.actorSelection(Master.toAkkaUrl(activeMasterUrl))
+    masterAddress = activeMasterUrl match {
+      case Master.sparkUrlRegex(_host, _port) =>
+        Address("akka.tcp", Master.systemName, _host, _port.toInt)
+      case x =>
+        throw new SparkException("Invalid spark URL: " + x)
     }
+    connected = true
   }
 
-  def tryRegisterAllMasters() {
+  private def tryRegisterAllMasters() {
     for (masterUrl <- masterUrls) {
       logInfo("Connecting to master " + masterUrl + "...")
       val actor = context.actorSelection(Master.toAkkaUrl(masterUrl))
@@ -161,49 +187,82 @@ private[spark] class Worker(
     }
   }
 
-  def registerWithMaster() {
-    tryRegisterAllMasters()
-    var retries = 0
-    registrationRetryTimer = Some {
-      context.system.scheduler.schedule(REGISTRATION_TIMEOUT, REGISTRATION_TIMEOUT) {
-        Utils.tryOrExit {
-          retries += 1
-          if (registered) {
-            registrationRetryTimer.foreach(_.cancel())
-          } else if (retries >= REGISTRATION_RETRIES) {
-            logError("All masters are unresponsive! Giving up.")
-            System.exit(1)
-          } else {
-            tryRegisterAllMasters()
+  private def retryConnectToMaster() {
+    Utils.tryOrExit {
+      connectionAttemptCount += 1
+      if (registered) {
+        registrationRetryTimer.foreach(_.cancel())
+        registrationRetryTimer = None
+      } else if (connectionAttemptCount <= TOTAL_REGISTRATION_RETRIES) {
+        logInfo(s"Retrying connection to master (attempt # $connectionAttemptCount)")
+        tryRegisterAllMasters()
+        if (connectionAttemptCount == INITIAL_REGISTRATION_RETRIES) {
+          registrationRetryTimer.foreach(_.cancel())
+          registrationRetryTimer = Some {
+            context.system.scheduler.schedule(PROLONGED_REGISTRATION_RETRY_INTERVAL,
+              PROLONGED_REGISTRATION_RETRY_INTERVAL)(retryConnectToMaster)
           }
         }
+      } else {
+        logError("All masters are unresponsive! Giving up.")
+        System.exit(1)
       }
     }
   }
 
-  override def receive = {
+  def registerWithMaster() {
+    // DisassociatedEvent may be triggered multiple times, so don't attempt registration
+    // if there are outstanding registration attempts scheduled.
+    registrationRetryTimer match {
+      case None =>
+        registered = false
+        tryRegisterAllMasters()
+        connectionAttemptCount = 0
+        registrationRetryTimer = Some {
+          context.system.scheduler.schedule(INITIAL_REGISTRATION_RETRY_INTERVAL,
+            INITIAL_REGISTRATION_RETRY_INTERVAL)(retryConnectToMaster)
+        }
+      case Some(_) =>
+        logInfo("Not spawning another attempt to register with the master, since there is an" +
+          " attempt scheduled already.")
+    }
+  }
+
+  override def receiveWithLogging = {
     case RegisteredWorker(masterUrl, masterWebUiUrl) =>
       logInfo("Successfully registered with master " + masterUrl)
       registered = true
       changeMaster(masterUrl, masterWebUiUrl)
       context.system.scheduler.schedule(0 millis, HEARTBEAT_MILLIS millis, self, SendHeartbeat)
       if (CLEANUP_ENABLED) {
+        logInfo(s"Worker cleanup enabled; old application directories will be deleted in: $workDir")
         context.system.scheduler.schedule(CLEANUP_INTERVAL_MILLIS millis,
           CLEANUP_INTERVAL_MILLIS millis, self, WorkDirCleanup)
       }
 
     case SendHeartbeat =>
-      masterLock.synchronized {
-        if (connected) { master ! Heartbeat(workerId) }
-      }
+      if (connected) { master ! Heartbeat(workerId) }
 
     case WorkDirCleanup =>
       // Spin up a separate thread (in a future) to do the dir cleanup; don't tie up worker actor
       val cleanupFuture = concurrent.future {
-        logInfo("Cleaning up oldest application directories in " + workDir + " ...")
-        Utils.findOldFiles(workDir, APP_DATA_RETENTION_SECS)
-          .foreach(Utils.deleteRecursively)
+        val appDirs = workDir.listFiles()
+        if (appDirs == null) {
+          throw new IOException("ERROR: Failed to list files in " + appDirs)
+        }
+        appDirs.filter { dir =>
+          // the directory is used by an application - check that the application is not running
+          // when cleaning up
+          val appIdFromDir = dir.getName
+          val isAppStillRunning = executors.values.map(_.appId).contains(appIdFromDir)
+          dir.isDirectory && !isAppStillRunning &&
+          !Utils.doesDirectoryContainAnyNewFiles(dir, APP_DATA_RETENTION_SECS)
+        }.foreach { dir => 
+          logInfo(s"Removing directory: ${dir.getPath}")
+          Utils.deleteRecursively(dir)
+        }
       }
+
       cleanupFuture onFailure {
         case e: Throwable =>
           logError("App dir cleanup failed: " + e.getMessage, e)
@@ -226,45 +285,49 @@ private[spark] class Worker(
         System.exit(1)
       }
 
+    case ReconnectWorker(masterUrl) =>
+      logInfo(s"Master with url $masterUrl requested this worker to reconnect.")
+      registerWithMaster()
+
     case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>
       if (masterUrl != activeMasterUrl) {
         logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
       } else {
         try {
           logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
+
+          // Create the executor's working directory
+          val executorDir = new File(workDir, appId + "/" + execId)
+          if (!executorDir.mkdirs()) {
+            throw new IOException("Failed to create directory " + executorDir)
+          }
+
           val manager = new ExecutorRunner(appId, execId, appDesc, cores_, memory_,
-            self, workerId, host,
-            appDesc.sparkHome.map(userSparkHome => new File(userSparkHome)).getOrElse(sparkHome),
-            workDir, akkaUrl, conf, ExecutorState.RUNNING)
+            self, workerId, host, sparkHome, executorDir, akkaUrl, conf, ExecutorState.LOADING)
           executors(appId + "/" + execId) = manager
           manager.start()
           coresUsed += cores_
           memoryUsed += memory_
-          masterLock.synchronized {
-            master ! ExecutorStateChanged(appId, execId, manager.state, None, None)
-          }
+          master ! ExecutorStateChanged(appId, execId, manager.state, None, None)
         } catch {
           case e: Exception => {
-            logError("Failed to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
+            logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
             if (executors.contains(appId + "/" + execId)) {
               executors(appId + "/" + execId).kill()
               executors -= appId + "/" + execId
             }
-            masterLock.synchronized {
-              master ! ExecutorStateChanged(appId, execId, ExecutorState.FAILED, None, None)
-            }
+            master ! ExecutorStateChanged(appId, execId, ExecutorState.FAILED,
+              Some(e.toString), None)
           }
         }
       }
 
     case ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
-      masterLock.synchronized {
-        master ! ExecutorStateChanged(appId, execId, state, message, exitStatus)
-      }
+      master ! ExecutorStateChanged(appId, execId, state, message, exitStatus)
       val fullId = appId + "/" + execId
       if (ExecutorState.isFinished(state)) {
         executors.get(fullId) match {
-          case Some(executor) => 
+          case Some(executor) =>
             logInfo("Executor " + fullId + " finished with state " + state +
               message.map(" message " + _).getOrElse("") +
               exitStatus.map(" exitStatus " + _).getOrElse(""))
@@ -295,7 +358,7 @@ private[spark] class Worker(
 
     case LaunchDriver(driverId, driverDesc) => {
       logInfo(s"Asked to launch driver $driverId")
-      val driver = new DriverRunner(driverId, workDir, sparkHome, driverDesc, self, akkaUrl)
+      val driver = new DriverRunner(conf, driverId, workDir, sparkHome, driverDesc, self, akkaUrl)
       drivers(driverId) = driver
       driver.start()
 
@@ -326,9 +389,7 @@ private[spark] class Worker(
         case _ =>
           logDebug(s"Driver $driverId changed state to $state")
       }
-      masterLock.synchronized {
-        master ! DriverStateChanged(driverId, state, exception)
-      }
+      master ! DriverStateChanged(driverId, state, exception)
       val driver = drivers.remove(driverId).get
       finishedDrivers(driverId) = driver
       memoryUsed -= driver.driverDesc.mem
@@ -347,9 +408,10 @@ private[spark] class Worker(
     }
   }
 
-  def masterDisconnected() {
+  private def masterDisconnected() {
     logError("Connection to master failed! Waiting for master to reconnect...")
     connected = false
+    registerWithMaster()
   }
 
   def generateWorkerId(): String = {
@@ -357,9 +419,11 @@ private[spark] class Worker(
   }
 
   override def postStop() {
+    metricsSystem.report()
     registrationRetryTimer.foreach(_.cancel())
     executors.values.foreach(_.kill())
     drivers.values.foreach(_.kill())
+    shuffleService.stop()
     webUi.stop()
     metricsSystem.stop()
   }
@@ -368,7 +432,8 @@ private[spark] class Worker(
 private[spark] object Worker extends Logging {
   def main(argStrings: Array[String]) {
     SignalLogger.register(log)
-    val args = new WorkerArguments(argStrings)
+    val conf = new SparkConf
+    val args = new WorkerArguments(argStrings, conf)
     val (actorSystem, _) = startSystemAndActor(args.host, args.port, args.webUiPort, args.cores,
       args.memory, args.masters, args.workDir)
     actorSystem.awaitTermination()
@@ -381,7 +446,8 @@ private[spark] object Worker extends Logging {
       cores: Int,
       memory: Int,
       masterUrls: Array[String],
-      workDir: String, workerNumber: Option[Int] = None): (ActorSystem, Int) = {
+      workDir: String,
+      workerNumber: Option[Int] = None): (ActorSystem, Int) = {
 
     // The LocalSparkCluster runs multiple local sparkWorkerX actor systems
     val conf = new SparkConf
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
index dc5158102054e..019cd70f2a229 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
@@ -20,11 +20,12 @@ package org.apache.spark.deploy.worker
 import java.lang.management.ManagementFactory
 
 import org.apache.spark.util.{IntParam, MemoryParam, Utils}
+import org.apache.spark.SparkConf
 
 /**
  * Command-line parser for the worker.
  */
-private[spark] class WorkerArguments(args: Array[String]) {
+private[spark] class WorkerArguments(args: Array[String], conf: SparkConf) {
   var host = Utils.localHostName()
   var port = 0
   var webUiPort = 8081
@@ -32,6 +33,7 @@ private[spark] class WorkerArguments(args: Array[String]) {
   var memory = inferDefaultMemory()
   var masters: Array[String] = null
   var workDir: String = null
+  var propertiesFile: String = null
 
   // Check for settings in environment variables
   if (System.getenv("SPARK_WORKER_PORT") != null) {
@@ -40,8 +42,8 @@ private[spark] class WorkerArguments(args: Array[String]) {
   if (System.getenv("SPARK_WORKER_CORES") != null) {
     cores = System.getenv("SPARK_WORKER_CORES").toInt
   }
-  if (System.getenv("SPARK_WORKER_MEMORY") != null) {
-    memory = Utils.memoryStringToMb(System.getenv("SPARK_WORKER_MEMORY"))
+  if (conf.getenv("SPARK_WORKER_MEMORY") != null) {
+    memory = Utils.memoryStringToMb(conf.getenv("SPARK_WORKER_MEMORY"))
   }
   if (System.getenv("SPARK_WORKER_WEBUI_PORT") != null) {
     webUiPort = System.getenv("SPARK_WORKER_WEBUI_PORT").toInt
@@ -52,6 +54,15 @@ private[spark] class WorkerArguments(args: Array[String]) {
 
   parse(args.toList)
 
+  // This mutates the SparkConf, so all accesses to it must be made after this line
+  propertiesFile = Utils.loadDefaultSparkProperties(conf, propertiesFile)
+
+  if (conf.contains("spark.worker.ui.port")) {
+    webUiPort = conf.get("spark.worker.ui.port").toInt
+  }
+
+  checkWorkerMemory()
+
   def parse(args: List[String]): Unit = args match {
     case ("--ip" | "-i") :: value :: tail =>
       Utils.checkHost(value, "ip no longer supported, please use hostname " + value)
@@ -83,7 +94,11 @@ private[spark] class WorkerArguments(args: Array[String]) {
       webUiPort = value
       parse(tail)
 
-    case ("--help" | "-h") :: tail =>
+    case ("--properties-file") :: value :: tail =>
+      propertiesFile = value
+      parse(tail)
+
+    case ("--help") :: tail =>
       printUsageAndExit(0)
 
     case value :: tail =>
@@ -118,7 +133,9 @@ private[spark] class WorkerArguments(args: Array[String]) {
       "  -i HOST, --ip IP         Hostname to listen on (deprecated, please use --host or -h)\n" +
       "  -h HOST, --host HOST     Hostname to listen on\n" +
       "  -p PORT, --port PORT     Port to listen on (default: random)\n" +
-      "  --webui-port PORT        Port for web UI (default: 8081)")
+      "  --webui-port PORT        Port for web UI (default: 8081)\n" +
+      "  --properties-file FILE   Path to a custom Spark properties file.\n" +
+      "                           Default is conf/spark-defaults.conf.")
     System.exit(exitCode)
   }
 
@@ -149,4 +166,11 @@ private[spark] class WorkerArguments(args: Array[String]) {
     // Leave out 1 GB for the operating system, but don't return a negative memory size
     math.max(totalMb - 1024, 512)
   }
+
+  def checkWorkerMemory(): Unit = {
+    if (memory <= 0) {
+      val message = "Memory can't be 0, missing a M or G on the end of the memory specification?"
+      throw new IllegalStateException(message)
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
index b7ddd8c816cbc..df1e01b23b932 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
@@ -22,8 +22,8 @@ import com.codahale.metrics.{Gauge, MetricRegistry}
 import org.apache.spark.metrics.source.Source
 
 private[spark] class WorkerSource(val worker: Worker) extends Source {
-  val sourceName = "worker"
-  val metricRegistry = new MetricRegistry()
+  override val sourceName = "worker"
+  override val metricRegistry = new MetricRegistry()
 
   metricRegistry.register(MetricRegistry.name("executors"), new Gauge[Int] {
     override def getValue: Int = worker.executors.size
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
index 530c147000904..63a8ac817b618 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
@@ -22,13 +22,15 @@ import akka.remote.{AssociatedEvent, AssociationErrorEvent, AssociationEvent, Di
 
 import org.apache.spark.Logging
 import org.apache.spark.deploy.DeployMessages.SendHeartbeat
+import org.apache.spark.util.ActorLogReceive
 
 /**
  * Actor which connects to a worker process and terminates the JVM if the connection is severed.
  * Provides fate sharing between a worker and its associated child processes.
  */
-private[spark] class WorkerWatcher(workerUrl: String) extends Actor
-    with Logging {
+private[spark] class WorkerWatcher(workerUrl: String)
+  extends Actor with ActorLogReceive with Logging {
+
   override def preStart() {
     context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
 
@@ -48,11 +50,11 @@ private[spark] class WorkerWatcher(workerUrl: String) extends Actor
 
   def exitNonZero() = if (isTesting) isShutDown = true else System.exit(-1)
 
-  override def receive = {
+  override def receiveWithLogging = {
     case AssociatedEvent(localAddress, remoteAddress, inbound) if isWorker(remoteAddress) =>
       logInfo(s"Successfully connected to $workerUrl")
 
-    case AssociationErrorEvent(cause, localAddress, remoteAddress, inbound)
+    case AssociationErrorEvent(cause, localAddress, remoteAddress, inbound, _)
         if isWorker(remoteAddress) =>
       // These logs may not be seen if the worker (and associated pipe) has died
       logError(s"Could not initialize connection to worker $workerUrl. Exiting.")
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
index b389cb546de6c..ecb358c399819 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.deploy.worker.ui
 
-import java.io.File
 import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
@@ -25,7 +24,7 @@ import scala.xml.Node
 import org.apache.spark.ui.{WebUIPage, UIUtils}
 import org.apache.spark.util.Utils
 import org.apache.spark.Logging
-import org.apache.spark.util.logging.{FileAppender, RollingFileAppender}
+import org.apache.spark.util.logging.RollingFileAppender
 
 private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with Logging {
   private val worker = parent.worker
@@ -64,11 +63,11 @@ private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") w
     val offset = Option(request.getParameter("offset")).map(_.toLong)
     val byteLength = Option(request.getParameter("byteLength")).map(_.toInt).getOrElse(defaultBytes)
 
-    val (logDir, params) = (appId, executorId, driverId) match {
+    val (logDir, params, pageName) = (appId, executorId, driverId) match {
       case (Some(a), Some(e), None) =>
-        (s"${workDir.getPath}/$a/$e/", s"appId=$a&executorId=$e")
+        (s"${workDir.getPath}/$a/$e/", s"appId=$a&executorId=$e", s"$a/$e")
       case (None, None, Some(d)) =>
-        (s"${workDir.getPath}/$d/", s"driverId=$d")
+        (s"${workDir.getPath}/$d/", s"driverId=$d", d)
       case _ =>
         throw new Exception("Request must specify either application or driver identifiers")
     }
@@ -120,7 +119,7 @@ private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") w
           </div>
         </body>
       </html>
-    UIUtils.basicSparkPage(content, logType + " log page for " + appId.getOrElse("unknown app"))
+    UIUtils.basicSparkPage(content, logType + " log page for " + pageName)
   }
 
   /** Get the part of the log files given the offset and desired length of bytes */
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
index 0ad2edba2227f..b07942a9ca729 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
@@ -22,6 +22,7 @@ import javax.servlet.http.HttpServletRequest
 
 import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.deploy.worker.Worker
+import org.apache.spark.deploy.worker.ui.WorkerWebUI._
 import org.apache.spark.ui.{SparkUI, WebUI}
 import org.apache.spark.ui.JettyUtils._
 import org.apache.spark.util.AkkaUtils
@@ -33,8 +34,8 @@ private[spark]
 class WorkerWebUI(
     val worker: Worker,
     val workDir: File,
-    port: Option[Int] = None)
-  extends WebUI(worker.securityMgr, WorkerWebUI.getUIPort(port, worker.conf), worker.conf)
+    requestedPort: Int)
+  extends WebUI(worker.securityMgr, requestedPort, worker.conf, name = "WorkerUI")
   with Logging {
 
   val timeout = AkkaUtils.askTimeout(worker.conf)
@@ -54,10 +55,5 @@ class WorkerWebUI(
 }
 
 private[spark] object WorkerWebUI {
-  val DEFAULT_PORT = 8081
   val STATIC_RESOURCE_BASE = SparkUI.STATIC_RESOURCE_DIR
-
-  def getUIPort(requestedPort: Option[Int], conf: SparkConf): Int = {
-    requestedPort.getOrElse(conf.getInt("worker.ui.port", WorkerWebUI.DEFAULT_PORT))
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 8d31bd05fdbec..5f46f3b1f085e 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -21,7 +21,7 @@ import java.nio.ByteBuffer
 
 import scala.concurrent.Await
 
-import akka.actor.{Actor, ActorSelection, Props}
+import akka.actor.{Actor, ActorSelection, ActorSystem, Props}
 import akka.pattern.Patterns
 import akka.remote.{RemotingLifecycleEvent, DisassociatedEvent}
 
@@ -31,14 +31,16 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.worker.WorkerWatcher
 import org.apache.spark.scheduler.TaskDescription
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
-import org.apache.spark.util.{AkkaUtils, SignalLogger, Utils}
+import org.apache.spark.util.{ActorLogReceive, AkkaUtils, SignalLogger, Utils}
 
 private[spark] class CoarseGrainedExecutorBackend(
     driverUrl: String,
     executorId: String,
     hostPort: String,
     cores: Int,
-    sparkProperties: Seq[(String, String)]) extends Actor with ExecutorBackend with Logging {
+    sparkProperties: Seq[(String, String)],
+    actorSystem: ActorSystem)
+  extends Actor with ActorLogReceive with ExecutorBackend with Logging {
 
   Utils.checkHostPort(hostPort, "Expected hostport")
 
@@ -52,12 +54,12 @@ private[spark] class CoarseGrainedExecutorBackend(
     context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
   }
 
-  override def receive = {
+  override def receiveWithLogging = {
     case RegisteredExecutor =>
       logInfo("Successfully registered with driver")
-      // Make this host instead of hostPort ?
-      executor = new Executor(executorId, Utils.parseHostPort(hostPort)._1, sparkProperties,
-        false)
+      val (hostname, _) = Utils.parseHostPort(hostPort)
+      executor = new Executor(executorId, hostname, sparkProperties, cores, isLocal = false,
+        actorSystem)
 
     case RegisterExecutorFailed(message) =>
       logError("Slave registration failed: " + message)
@@ -71,7 +73,7 @@ private[spark] class CoarseGrainedExecutorBackend(
         val ser = SparkEnv.get.closureSerializer.newInstance()
         val taskDesc = ser.deserialize[TaskDescription](data.value)
         logInfo("Got assigned task " + taskDesc.taskId)
-        executor.launchTask(this, taskDesc.taskId, taskDesc.serializedTask)
+        executor.launchTask(this, taskDesc.taskId, taskDesc.name, taskDesc.serializedTask)
       }
 
     case KillTask(taskId, _, interruptThread) =>
@@ -88,6 +90,7 @@ private[spark] class CoarseGrainedExecutorBackend(
 
     case StopExecutor =>
       logInfo("Driver commanded a shutdown")
+      executor.stop()
       context.stop(self)
       context.system.shutdown()
   }
@@ -98,8 +101,14 @@ private[spark] class CoarseGrainedExecutorBackend(
 }
 
 private[spark] object CoarseGrainedExecutorBackend extends Logging {
-  def run(driverUrl: String, executorId: String, hostname: String, cores: Int,
-    workerUrl: Option[String]) {
+
+  private def run(
+      driverUrl: String,
+      executorId: String,
+      hostname: String,
+      cores: Int,
+      appId: String,
+      workerUrl: Option[String]) {
 
     SignalLogger.register(log)
 
@@ -109,23 +118,26 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
 
       // Bootstrap to fetch the driver's Spark properties.
       val executorConf = new SparkConf
+      val port = executorConf.getInt("spark.executor.port", 0)
       val (fetcher, _) = AkkaUtils.createActorSystem(
-        "driverPropsFetcher", hostname, 0, executorConf, new SecurityManager(executorConf))
+        "driverPropsFetcher", hostname, port, executorConf, new SecurityManager(executorConf))
       val driver = fetcher.actorSelection(driverUrl)
       val timeout = AkkaUtils.askTimeout(executorConf)
       val fut = Patterns.ask(driver, RetrieveSparkProps, timeout)
-      val props = Await.result(fut, timeout).asInstanceOf[Seq[(String, String)]]
+      val props = Await.result(fut, timeout).asInstanceOf[Seq[(String, String)]] ++
+        Seq[(String, String)](("spark.app.id", appId))
       fetcher.shutdown()
 
       // Create a new ActorSystem using driver's Spark properties to run the backend.
       val driverConf = new SparkConf().setAll(props)
       val (actorSystem, boundPort) = AkkaUtils.createActorSystem(
-        "sparkExecutor", hostname, 0, driverConf, new SecurityManager(driverConf))
+        SparkEnv.executorActorSystemName,
+        hostname, port, driverConf, new SecurityManager(driverConf))
       // set it
       val sparkHostPort = hostname + ":" + boundPort
       actorSystem.actorOf(
         Props(classOf[CoarseGrainedExecutorBackend],
-          driverUrl, executorId, sparkHostPort, cores, props),
+          driverUrl, executorId, sparkHostPort, cores, props, actorSystem),
         name = "Executor")
       workerUrl.foreach { url =>
         actorSystem.actorOf(Props(classOf[WorkerWatcher], url), name = "WorkerWatcher")
@@ -136,16 +148,19 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
 
   def main(args: Array[String]) {
     args.length match {
-      case x if x < 4 =>
+      case x if x < 5 =>
         System.err.println(
           // Worker url is used in spark standalone mode to enforce fate-sharing with worker
           "Usage: CoarseGrainedExecutorBackend <driverUrl> <executorId> <hostname> " +
-          "<cores> [<workerUrl>]")
+          "<cores> <appid> [<workerUrl>] ")
         System.exit(1)
-      case 4 =>
-        run(args(0), args(1), args(2), args(3).toInt, None)
-      case x if x > 4 =>
-        run(args(0), args(1), args(2), args(3).toInt, Some(args(4)))
+
+      // NB: These arguments are provided by SparkDeploySchedulerBackend (for standalone mode)
+      // and CoarseMesosSchedulerBackend (for mesos mode).
+      case 5 =>
+        run(args(0), args(1), args(2), args(3).toInt, args(4), None)
+      case x if x > 5 =>
+        run(args(0), args(1), args(2), args(3).toInt, args(4), Some(args(5)))
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 4d3ba11633bf5..4c378a278b4c1 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -23,22 +23,29 @@ import java.nio.ByteBuffer
 import java.util.concurrent._
 
 import scala.collection.JavaConversions._
-import scala.collection.mutable.HashMap
+import scala.collection.mutable.{ArrayBuffer, HashMap}
+import scala.util.control.NonFatal
+
+import akka.actor.{Props, ActorSystem}
 
 import org.apache.spark._
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.scheduler._
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.{StorageLevel, TaskResultBlockId}
-import org.apache.spark.util.{AkkaUtils, Utils}
+import org.apache.spark.util.{SparkUncaughtExceptionHandler, AkkaUtils, Utils}
 
 /**
  * Spark executor used with Mesos, YARN, and the standalone scheduler.
+ * In coarse-grained mode, an existing actor system is provided.
  */
 private[spark] class Executor(
     executorId: String,
     slaveHostname: String,
     properties: Seq[(String, String)],
-    isLocal: Boolean = false)
+    numCores: Int,
+    isLocal: Boolean = false,
+    actorSystem: ActorSystem = null)
   extends Logging
 {
   // Application dependencies (added through SparkContext) that we've fetched so far on this node.
@@ -48,6 +55,8 @@ private[spark] class Executor(
 
   private val EMPTY_BYTE_BUFFER = ByteBuffer.wrap(new Array[Byte](0))
 
+  @volatile private var isStopped = false
+
   // No ip or host:port - just hostname
   Utils.checkHost(slaveHostname, "Expected executed slave to be a hostname")
   // must not have port specified.
@@ -60,55 +69,61 @@ private[spark] class Executor(
   val conf = new SparkConf(true)
   conf.setAll(properties)
 
-  // If we are in yarn mode, systems can have different disk layouts so we must set it
-  // to what Yarn on this system said was available. This will be used later when SparkEnv
-  // created.
-  if (java.lang.Boolean.valueOf(
-      System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE")))) {
-    conf.set("spark.local.dir", getYarnLocalDirs())
-  } else if (sys.env.contains("SPARK_LOCAL_DIRS")) {
-    conf.set("spark.local.dir", sys.env("SPARK_LOCAL_DIRS"))
-  }
-
   if (!isLocal) {
     // Setup an uncaught exception handler for non-local mode.
     // Make any thread terminations due to uncaught exceptions kill the entire
     // executor process to avoid surprising stalls.
-    Thread.setDefaultUncaughtExceptionHandler(ExecutorUncaughtExceptionHandler)
+    Thread.setDefaultUncaughtExceptionHandler(SparkUncaughtExceptionHandler)
   }
 
   val executorSource = new ExecutorSource(this, executorId)
 
   // Initialize Spark environment (using system properties read above)
+  conf.set("spark.executor.id", executorId)
   private val env = {
     if (!isLocal) {
-      val _env = SparkEnv.create(conf, executorId, slaveHostname, 0,
-        isDriver = false, isLocal = false)
+      val port = conf.getInt("spark.executor.port", 0)
+      val _env = SparkEnv.createExecutorEnv(
+        conf, executorId, slaveHostname, port, numCores, isLocal, actorSystem)
       SparkEnv.set(_env)
       _env.metricsSystem.registerSource(executorSource)
+      _env.blockManager.initialize(conf.getAppId)
       _env
     } else {
       SparkEnv.get
     }
   }
 
+  // Create an actor for receiving RPCs from the driver
+  private val executorActor = env.actorSystem.actorOf(
+    Props(new ExecutorActor(executorId)), "ExecutorActor")
+
   // Create our ClassLoader
   // do this after SparkEnv creation so can access the SecurityManager
   private val urlClassLoader = createClassLoader()
   private val replClassLoader = addReplClassLoaderIfNeeded(urlClassLoader)
 
+  // Set the classloader for serializer
+  env.serializer.setDefaultClassLoader(urlClassLoader)
+
   // Akka's message frame size. If task result is bigger than this, we use the block manager
   // to send the result back.
   private val akkaFrameSize = AkkaUtils.maxFrameSizeBytes(conf)
 
+  // Limit of bytes for total size of results (default is 1GB)
+  private val maxResultSize = Utils.getMaxResultSize(conf)
+
   // Start worker thread pool
   val threadPool = Utils.newDaemonCachedThreadPool("Executor task launch worker")
 
   // Maintains the list of running tasks.
   private val runningTasks = new ConcurrentHashMap[Long, TaskRunner]
 
-  def launchTask(context: ExecutorBackend, taskId: Long, serializedTask: ByteBuffer) {
-    val tr = new TaskRunner(context, taskId, serializedTask)
+  startDriverHeartbeater()
+
+  def launchTask(
+      context: ExecutorBackend, taskId: Long, taskName: String, serializedTask: ByteBuffer) {
+    val tr = new TaskRunner(context, taskId, taskName, serializedTask)
     runningTasks.put(taskId, tr)
     threadPool.execute(tr)
   }
@@ -120,29 +135,26 @@ private[spark] class Executor(
     }
   }
 
-  /** Get the Yarn approved local directories. */
-  private def getYarnLocalDirs(): String = {
-    // Hadoop 0.23 and 2.x have different Environment variable names for the
-    // local dirs, so lets check both. We assume one of the 2 is set.
-    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
-    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .getOrElse(Option(System.getenv("LOCAL_DIRS"))
-      .getOrElse(""))
-
-    if (localDirs.isEmpty) {
-      throw new Exception("Yarn Local dirs can't be empty")
+  def stop() {
+    env.metricsSystem.report()
+    env.actorSystem.stop(executorActor)
+    isStopped = true
+    threadPool.shutdown()
+    if (!isLocal) {
+      env.stop()
     }
-    localDirs
   }
 
-  class TaskRunner(execBackend: ExecutorBackend, taskId: Long, serializedTask: ByteBuffer)
+  class TaskRunner(
+      execBackend: ExecutorBackend, val taskId: Long, taskName: String, serializedTask: ByteBuffer)
     extends Runnable {
 
     @volatile private var killed = false
-    @volatile private var task: Task[Any] = _
+    @volatile var task: Task[Any] = _
+    @volatile var attemptedTask: Option[Task[Any]] = None
 
     def kill(interruptThread: Boolean) {
-      logInfo("Executor is trying to kill task " + taskId)
+      logInfo(s"Executor is trying to kill $taskName (TID $taskId)")
       killed = true
       if (task != null) {
         task.kill(interruptThread)
@@ -150,19 +162,16 @@ private[spark] class Executor(
     }
 
     override def run() {
-      val startTime = System.currentTimeMillis()
-      SparkEnv.set(env)
+      val deserializeStartTime = System.currentTimeMillis()
       Thread.currentThread.setContextClassLoader(replClassLoader)
       val ser = SparkEnv.get.closureSerializer.newInstance()
-      logInfo("Running task ID " + taskId)
+      logInfo(s"Running $taskName (TID $taskId)")
       execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
-      var attemptedTask: Option[Task[Any]] = None
       var taskStart: Long = 0
       def gcTime = ManagementFactory.getGarbageCollectorMXBeans.map(_.getCollectionTime).sum
       val startGCTime = gcTime
 
       try {
-        SparkEnv.set(env)
         Accumulators.clear()
         val (taskFiles, taskJars, taskBytes) = Task.deserializeWithDependencies(serializedTask)
         updateDependencies(taskFiles, taskJars)
@@ -198,8 +207,7 @@ private[spark] class Executor(
         val afterSerialization = System.currentTimeMillis()
 
         for (m <- task.metrics) {
-          m.hostname = Utils.localHostName()
-          m.executorDeserializeTime = taskStart - startTime
+          m.executorDeserializeTime = taskStart - deserializeStartTime
           m.executorRunTime = taskFinish - taskStart
           m.jvmGCTime = gcTime - startGCTime
           m.resultSerializationTime = afterSerialization - beforeSerialization
@@ -207,25 +215,32 @@ private[spark] class Executor(
 
         val accumUpdates = Accumulators.values
 
-        val directResult = new DirectTaskResult(valueBytes, accumUpdates,
-          task.metrics.getOrElse(null))
+        val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.orNull)
         val serializedDirectResult = ser.serialize(directResult)
-        logInfo("Serialized size of result for " + taskId + " is " + serializedDirectResult.limit)
+        val resultSize = serializedDirectResult.limit
+
+        // directSend = sending directly back to the driver
         val serializedResult = {
-          if (serializedDirectResult.limit >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {
-            logInfo("Storing result for " + taskId + " in local BlockManager")
+          if (resultSize > maxResultSize) {
+            logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " +
+              s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " +
+              s"dropping it.")
+            ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))
+          } else if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {
             val blockId = TaskResultBlockId(taskId)
             env.blockManager.putBytes(
               blockId, serializedDirectResult, StorageLevel.MEMORY_AND_DISK_SER)
-            ser.serialize(new IndirectTaskResult[Any](blockId))
+            logInfo(
+              s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")
+            ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))
           } else {
-            logInfo("Sending result for " + taskId + " directly to driver")
+            logInfo(s"Finished $taskName (TID $taskId). $resultSize bytes result sent to driver")
             serializedDirectResult
           }
         }
 
         execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)
-        logInfo("Finished task ID " + taskId)
+
       } catch {
         case ffe: FetchFailedException => {
           val reason = ffe.toTaskEndReason
@@ -233,7 +248,7 @@ private[spark] class Executor(
         }
 
         case _: TaskKilledException | _: InterruptedException if task.killed => {
-          logInfo("Executor killed task " + taskId)
+          logInfo(s"Executor killed $taskName (TID $taskId)")
           execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(TaskKilled))
         }
 
@@ -241,7 +256,7 @@ private[spark] class Executor(
           // Attempt to exit cleanly by informing the driver of our failure.
           // If anything goes wrong (or this was a fatal exception), we will delegate to
           // the default uncaught exception handler, which will terminate the Executor.
-          logError("Exception in task ID " + taskId, t)
+          logError(s"Exception in $taskName (TID $taskId)", t)
 
           val serviceTime = System.currentTimeMillis() - taskStart
           val metrics = attemptedTask.flatMap(t => t.metrics)
@@ -249,21 +264,20 @@ private[spark] class Executor(
             m.executorRunTime = serviceTime
             m.jvmGCTime = gcTime - startGCTime
           }
-          val reason = ExceptionFailure(t.getClass.getName, t.toString, t.getStackTrace, metrics)
+          val reason = new ExceptionFailure(t, metrics)
           execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
 
           // Don't forcibly exit unless the exception was inherently fatal, to avoid
           // stopping other tasks unnecessarily.
           if (Utils.isFatalError(t)) {
-            ExecutorUncaughtExceptionHandler.uncaughtException(t)
+            SparkUncaughtExceptionHandler.uncaughtException(t)
           }
         }
       } finally {
-        // TODO: Unregister shuffle memory only for ResultTask
-        val shuffleMemoryMap = env.shuffleMemoryMap
-        shuffleMemoryMap.synchronized {
-          shuffleMemoryMap.remove(Thread.currentThread().getId)
-        }
+        // Release memory used by this thread for shuffles
+        env.shuffleMemoryManager.releaseMemoryForThisThread()
+        // Release memory used by this thread for unrolling blocks
+        env.blockManager.memoryStore.releaseUnrollMemoryForThisThread()
         runningTasks.remove(taskId)
       }
     }
@@ -301,9 +315,9 @@ private[spark] class Executor(
       try {
         val klass = Class.forName("org.apache.spark.repl.ExecutorClassLoader")
           .asInstanceOf[Class[_ <: ClassLoader]]
-        val constructor = klass.getConstructor(classOf[String], classOf[ClassLoader],
-          classOf[Boolean])
-        constructor.newInstance(classUri, parent, userClassPathFirst)
+        val constructor = klass.getConstructor(classOf[SparkConf], classOf[String],
+          classOf[ClassLoader], classOf[Boolean])
+        constructor.newInstance(conf, classUri, parent, userClassPathFirst)
       } catch {
         case _: ClassNotFoundException =>
           logError("Could not find org.apache.spark.repl.ExecutorClassLoader on classpath!")
@@ -320,16 +334,21 @@ private[spark] class Executor(
    * SparkContext. Also adds any new JARs we fetched to the class loader.
    */
   private def updateDependencies(newFiles: HashMap[String, Long], newJars: HashMap[String, Long]) {
+    val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
     synchronized {
       // Fetch missing dependencies
       for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) {
         logInfo("Fetching " + name + " with timestamp " + timestamp)
-        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf, env.securityManager)
+        // Fetch file with useCache mode, close cache for local mode.
+        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf,
+          env.securityManager, hadoopConf, timestamp, useCache = !isLocal)
         currentFiles(name) = timestamp
       }
       for ((name, timestamp) <- newJars if currentJars.getOrElse(name, -1L) < timestamp) {
         logInfo("Fetching " + name + " with timestamp " + timestamp)
-        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf, env.securityManager)
+        // Fetch file with useCache mode, close cache for local mode.
+        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf,
+          env.securityManager, hadoopConf, timestamp, useCache = !isLocal)
         currentJars(name) = timestamp
         // Add it to our class loader
         val localName = name.split("/").last
@@ -341,4 +360,57 @@ private[spark] class Executor(
       }
     }
   }
+
+  def startDriverHeartbeater() {
+    val interval = conf.getInt("spark.executor.heartbeatInterval", 10000)
+    val timeout = AkkaUtils.lookupTimeout(conf)
+    val retryAttempts = AkkaUtils.numRetries(conf)
+    val retryIntervalMs = AkkaUtils.retryWaitMs(conf)
+    val heartbeatReceiverRef = AkkaUtils.makeDriverRef("HeartbeatReceiver", conf, env.actorSystem)
+
+    val t = new Thread() {
+      override def run() {
+        // Sleep a random interval so the heartbeats don't end up in sync
+        Thread.sleep(interval + (math.random * interval).asInstanceOf[Int])
+
+        while (!isStopped) {
+          val tasksMetrics = new ArrayBuffer[(Long, TaskMetrics)]()
+          for (taskRunner <- runningTasks.values()) {
+            if (!taskRunner.attemptedTask.isEmpty) {
+              Option(taskRunner.task).flatMap(_.metrics).foreach { metrics =>
+                metrics.updateShuffleReadMetrics
+                if (isLocal) {
+                  // JobProgressListener will hold an reference of it during
+                  // onExecutorMetricsUpdate(), then JobProgressListener can not see
+                  // the changes of metrics any more, so make a deep copy of it
+                  val copiedMetrics = Utils.deserialize[TaskMetrics](Utils.serialize(metrics))
+                  tasksMetrics += ((taskRunner.taskId, copiedMetrics))
+                } else {
+                  // It will be copied by serialization
+                  tasksMetrics += ((taskRunner.taskId, metrics))
+                }
+              }
+            }
+          }
+
+          val message = Heartbeat(executorId, tasksMetrics.toArray, env.blockManager.blockManagerId)
+          try {
+            val response = AkkaUtils.askWithReply[HeartbeatResponse](message, heartbeatReceiverRef,
+              retryAttempts, retryIntervalMs, timeout)
+            if (response.reregisterBlockManager) {
+              logWarning("Told to re-register on heartbeat")
+              env.blockManager.reregister()
+            }
+          } catch {
+            case NonFatal(t) => logWarning("Issue communicating with driver in heartbeater", t)
+          }
+
+          Thread.sleep(interval)
+        }
+      }
+    }
+    t.setDaemon(true)
+    t.setName("Driver Heartbeater")
+    t.start()
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorActor.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorActor.scala
new file mode 100644
index 0000000000000..41925f7e97e84
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorActor.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.executor
+
+import akka.actor.Actor
+import org.apache.spark.Logging
+
+import org.apache.spark.util.{Utils, ActorLogReceive}
+
+/**
+ * Driver -> Executor message to trigger a thread dump.
+ */
+private[spark] case object TriggerThreadDump
+
+/**
+ * Actor that runs inside of executors to enable driver -> executor RPC.
+ */
+private[spark]
+class ExecutorActor(executorId: String) extends Actor with ActorLogReceive with Logging {
+
+  override def receiveWithLogging = {
+    case TriggerThreadDump =>
+      sender ! Utils.getThreadDump()
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
index 38be2c58b333f..52862ae0ca5e4 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.executor
 
+import org.apache.spark.util.SparkExitCode._
+
 /**
  * These are exit codes that executors should use to provide the master with information about
  * executor failures assuming that cluster management framework can capture the exit codes (but
@@ -27,16 +29,6 @@ package org.apache.spark.executor
  */
 private[spark]
 object ExecutorExitCode {
-  /** The default uncaught exception handler was reached. */
-  val UNCAUGHT_EXCEPTION = 50
-
-  /** The default uncaught exception handler was called and an exception was encountered while
-      logging the exception. */
-  val UNCAUGHT_EXCEPTION_TWICE = 51
-
-  /** The default uncaught exception handler was reached, and the uncaught exception was an
-      OutOfMemoryError. */
-  val OOM = 52
 
   /** DiskStore failed to create a local temporary directory after many attempts. */
   val DISK_STORE_FAILED_TO_CREATE_DIR = 53
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
index 0ed52cfe9df61..c4d73622c4727 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
@@ -35,9 +35,9 @@ private[spark] class ExecutorSource(val executor: Executor, executorId: String)
     })
   }
 
-  val metricRegistry = new MetricRegistry()
-  // TODO: It would be nice to pass the application name here
-  val sourceName = "executor.%s".format(executorId)
+  override val metricRegistry = new MetricRegistry()
+
+  override val sourceName = "executor"
 
   // Gauge for executor thread pool's actively executing task counts
   metricRegistry.register(MetricRegistry.name("threadpool", "activeTasks"), new Gauge[Int] {
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorUncaughtExceptionHandler.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorUncaughtExceptionHandler.scala
deleted file mode 100644
index b0e984c03964c..0000000000000
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorUncaughtExceptionHandler.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.executor
-
-import org.apache.spark.Logging
-import org.apache.spark.util.Utils
-
-/**
- * The default uncaught exception handler for Executors terminates the whole process, to avoid
- * getting into a bad state indefinitely. Since Executors are relatively lightweight, it's better
- * to fail fast when things go wrong.
- */
-private[spark] object ExecutorUncaughtExceptionHandler
-  extends Thread.UncaughtExceptionHandler with Logging {
-
-  override def uncaughtException(thread: Thread, exception: Throwable) {
-    try {
-      logError("Uncaught exception in thread " + thread, exception)
-
-      // We may have been called from a shutdown hook. If so, we must not call System.exit().
-      // (If we do, we will deadlock.)
-      if (!Utils.inShutdown()) {
-        if (exception.isInstanceOf[OutOfMemoryError]) {
-          System.exit(ExecutorExitCode.OOM)
-        } else {
-          System.exit(ExecutorExitCode.UNCAUGHT_EXCEPTION)
-        }
-      }
-    } catch {
-      case oom: OutOfMemoryError => Runtime.getRuntime.halt(ExecutorExitCode.OOM)
-      case t: Throwable => Runtime.getRuntime.halt(ExecutorExitCode.UNCAUGHT_EXCEPTION_TWICE)
-    }
-  }
-
-  def uncaughtException(exception: Throwable) {
-    uncaughtException(Thread.currentThread(), exception)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index 2232e6237bf26..f15e6bc33fb41 100644
--- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -19,6 +19,8 @@ package org.apache.spark.executor
 
 import java.nio.ByteBuffer
 
+import scala.collection.JavaConversions._
+
 import org.apache.mesos.protobuf.ByteString
 import org.apache.mesos.{Executor => MesosExecutor, ExecutorDriver, MesosExecutorDriver, MesosNativeLibrary}
 import org.apache.mesos.Protos.{TaskStatus => MesosTaskStatus, _}
@@ -50,13 +52,23 @@ private[spark] class MesosExecutorBackend
       executorInfo: ExecutorInfo,
       frameworkInfo: FrameworkInfo,
       slaveInfo: SlaveInfo) {
-    logInfo("Registered with Mesos as executor ID " + executorInfo.getExecutorId.getValue)
+
+    // Get num cores for this task from ExecutorInfo, created in MesosSchedulerBackend.
+    val cpusPerTask = executorInfo.getResourcesList
+      .find(_.getName == "cpus")
+      .map(_.getScalar.getValue.toInt)
+      .getOrElse(0)
+    val executorId = executorInfo.getExecutorId.getValue
+
+    logInfo(s"Registered with Mesos as executor ID $executorId with $cpusPerTask cpus")
     this.driver = driver
-    val properties = Utils.deserialize[Array[(String, String)]](executorInfo.getData.toByteArray)
+    val properties = Utils.deserialize[Array[(String, String)]](executorInfo.getData.toByteArray) ++
+      Seq[(String, String)](("spark.app.id", frameworkInfo.getId.getValue))
     executor = new Executor(
-      executorInfo.getExecutorId.getValue,
+      executorId,
       slaveInfo.getHostname,
-      properties)
+      properties,
+      cpusPerTask)
   }
 
   override def launchTask(d: ExecutorDriver, taskInfo: TaskInfo) {
@@ -64,7 +76,7 @@ private[spark] class MesosExecutorBackend
     if (executor == null) {
       logError("Received launchTask but executor was null")
     } else {
-      executor.launchTask(this, taskId, taskInfo.getData.asReadOnlyByteBuffer)
+      executor.launchTask(this, taskId, taskInfo.getName, taskInfo.getData.asReadOnlyByteBuffer)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index ac73288442a74..51b5328cb4c8f 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -17,12 +17,22 @@
 
 package org.apache.spark.executor
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.storage.{BlockId, BlockStatus}
 
 /**
  * :: DeveloperApi ::
  * Metrics tracked during the execution of a task.
+ *
+ * This class is used to house metrics both for in-progress and completed tasks. In executors,
+ * both the task thread and the heartbeat thread write to the TaskMetrics. The heartbeat thread
+ * reads it to send in-progress metrics, and the task thread reads it to send metrics along with
+ * the completed task.
+ *
+ * So, when adding new fields, take into consideration that the whole object can be serialized for
+ * shipping off at any time to consumers of the SparkListener interface.
  */
 @DeveloperApi
 class TaskMetrics extends Serializable {
@@ -73,9 +83,32 @@ class TaskMetrics extends Serializable {
   var inputMetrics: Option[InputMetrics] = None
 
   /**
-   * If this task reads from shuffle output, metrics on getting shuffle data will be collected here
+   * If this task writes data externally (e.g. to a distributed filesystem), metrics on how much
+   * data was written are stored here.
+   */
+  var outputMetrics: Option[OutputMetrics] = None
+
+  /**
+   * If this task reads from shuffle output, metrics on getting shuffle data will be collected here.
+   * This includes read metrics aggregated over all the task's shuffle dependencies.
+   */
+  private var _shuffleReadMetrics: Option[ShuffleReadMetrics] = None
+
+  def shuffleReadMetrics = _shuffleReadMetrics
+
+  /**
+   * This should only be used when recreating TaskMetrics, not when updating read metrics in
+   * executors.
    */
-  var shuffleReadMetrics: Option[ShuffleReadMetrics] = None
+  private[spark] def setShuffleReadMetrics(shuffleReadMetrics: Option[ShuffleReadMetrics]) {
+    _shuffleReadMetrics = shuffleReadMetrics
+  }
+
+  /**
+   * ShuffleReadMetrics per dependency for collecting independently while task is in progress.
+   */
+  @transient private lazy val depsShuffleReadMetrics: ArrayBuffer[ShuffleReadMetrics] =
+    new ArrayBuffer[ShuffleReadMetrics]()
 
   /**
    * If this task writes to shuffle output, metrics on the written shuffle data will be collected
@@ -87,6 +120,32 @@ class TaskMetrics extends Serializable {
    * Storage statuses of any blocks that have been updated as a result of this task.
    */
   var updatedBlocks: Option[Seq[(BlockId, BlockStatus)]] = None
+
+  /**
+   * A task may have multiple shuffle readers for multiple dependencies. To avoid synchronization
+   * issues from readers in different threads, in-progress tasks use a ShuffleReadMetrics for each
+   * dependency, and merge these metrics before reporting them to the driver. This method returns
+   * a ShuffleReadMetrics for a dependency and registers it for merging later.
+   */
+  private [spark] def createShuffleReadMetricsForDependency(): ShuffleReadMetrics = synchronized {
+    val readMetrics = new ShuffleReadMetrics()
+    depsShuffleReadMetrics += readMetrics
+    readMetrics
+  }
+
+  /**
+   * Aggregates shuffle read metrics for all registered dependencies into shuffleReadMetrics.
+   */
+  private[spark] def updateShuffleReadMetrics() = synchronized {
+    val merged = new ShuffleReadMetrics()
+    for (depMetrics <- depsShuffleReadMetrics) {
+      merged.fetchWaitTime += depMetrics.fetchWaitTime
+      merged.localBlocksFetched += depMetrics.localBlocksFetched
+      merged.remoteBlocksFetched += depMetrics.remoteBlocksFetched
+      merged.remoteBytesRead += depMetrics.remoteBytesRead
+    }
+    _shuffleReadMetrics = Some(merged)
+  }
 }
 
 private[spark] object TaskMetrics {
@@ -104,6 +163,16 @@ object DataReadMethod extends Enumeration with Serializable {
   val Memory, Disk, Hadoop, Network = Value
 }
 
+/**
+ * :: DeveloperApi ::
+ * Method by which output data was written.
+ */
+@DeveloperApi
+object DataWriteMethod extends Enumeration with Serializable {
+  type DataWriteMethod = Value
+  val Hadoop = Value
+}
+
 /**
  * :: DeveloperApi ::
  * Metrics about reading input data.
@@ -116,22 +185,28 @@ case class InputMetrics(readMethod: DataReadMethod.Value) {
   var bytesRead: Long = 0L
 }
 
-
 /**
  * :: DeveloperApi ::
- * Metrics pertaining to shuffle data read in a given task.
+ * Metrics about writing output data.
  */
 @DeveloperApi
-class ShuffleReadMetrics extends Serializable {
+case class OutputMetrics(writeMethod: DataWriteMethod.Value) {
   /**
-   * Absolute time when this task finished reading shuffle data
+   * Total bytes written
    */
-  var shuffleFinishTime: Long = _
+  var bytesWritten: Long = 0L
+}
 
+/**
+ * :: DeveloperApi ::
+ * Metrics pertaining to shuffle data read in a given task.
+ */
+@DeveloperApi
+class ShuffleReadMetrics extends Serializable {
   /**
    * Number of blocks fetched in this shuffle by this task (remote or local)
    */
-  var totalBlocksFetched: Int = _
+  def totalBlocksFetched: Int = remoteBlocksFetched + localBlocksFetched
 
   /**
    * Number of remote blocks fetched in this shuffle by this task
@@ -165,10 +240,10 @@ class ShuffleWriteMetrics extends Serializable {
   /**
    * Number of bytes written for the shuffle by this task
    */
-  var shuffleBytesWritten: Long = _
+  @volatile var shuffleBytesWritten: Long = _
 
   /**
    * Time the task spent blocking on writes to disk or buffer cache, in nanoseconds
    */
-  var shuffleWriteTime: Long = _
+  @volatile var shuffleWriteTime: Long = _
 }
diff --git a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala
new file mode 100644
index 0000000000000..89b29af2000c8
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.input
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.{BytesWritable, LongWritable}
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
+import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}
+
+/**
+ * Custom Input Format for reading and splitting flat binary files that contain records,
+ * each of which are a fixed size in bytes. The fixed record size is specified through
+ * a parameter recordLength in the Hadoop configuration.
+ */
+private[spark] object FixedLengthBinaryInputFormat {
+  /** Property name to set in Hadoop JobConfs for record length */
+  val RECORD_LENGTH_PROPERTY = "org.apache.spark.input.FixedLengthBinaryInputFormat.recordLength"
+
+  /** Retrieves the record length property from a Hadoop configuration */
+  def getRecordLength(context: JobContext): Int = {
+    context.getConfiguration.get(RECORD_LENGTH_PROPERTY).toInt
+  }
+}
+
+private[spark] class FixedLengthBinaryInputFormat
+  extends FileInputFormat[LongWritable, BytesWritable] {
+
+  private var recordLength = -1
+
+  /**
+   * Override of isSplitable to ensure initial computation of the record length
+   */
+  override def isSplitable(context: JobContext, filename: Path): Boolean = {
+    if (recordLength == -1) {
+      recordLength = FixedLengthBinaryInputFormat.getRecordLength(context)
+    }
+    if (recordLength <= 0) {
+      println("record length is less than 0, file cannot be split")
+      false
+    } else {
+      true
+    }
+  }
+
+  /**
+   * This input format overrides computeSplitSize() to make sure that each split
+   * only contains full records. Each InputSplit passed to FixedLengthBinaryRecordReader
+   * will start at the first byte of a record, and the last byte will the last byte of a record.
+   */
+  override def computeSplitSize(blockSize: Long, minSize: Long, maxSize: Long): Long = {
+    val defaultSize = super.computeSplitSize(blockSize, minSize, maxSize)
+    // If the default size is less than the length of a record, make it equal to it
+    // Otherwise, make sure the split size is as close to possible as the default size,
+    // but still contains a complete set of records, with the first record
+    // starting at the first byte in the split and the last record ending with the last byte
+    if (defaultSize < recordLength) {
+      recordLength.toLong
+    } else {
+      (Math.floor(defaultSize / recordLength) * recordLength).toLong
+    }
+  }
+
+  /**
+   * Create a FixedLengthBinaryRecordReader
+   */
+  override def createRecordReader(split: InputSplit, context: TaskAttemptContext)
+      : RecordReader[LongWritable, BytesWritable] = {
+    new FixedLengthBinaryRecordReader
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala
new file mode 100644
index 0000000000000..36a1e5d475f46
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.input
+
+import java.io.IOException
+
+import org.apache.hadoop.fs.FSDataInputStream
+import org.apache.hadoop.io.compress.CompressionCodecFactory
+import org.apache.hadoop.io.{BytesWritable, LongWritable}
+import org.apache.hadoop.mapreduce.{InputSplit, RecordReader, TaskAttemptContext}
+import org.apache.hadoop.mapreduce.lib.input.FileSplit
+
+/**
+ * FixedLengthBinaryRecordReader is returned by FixedLengthBinaryInputFormat.
+ * It uses the record length set in FixedLengthBinaryInputFormat to
+ * read one record at a time from the given InputSplit.
+ *
+ * Each call to nextKeyValue() updates the LongWritable key and BytesWritable value.
+ *
+ * key = record index (Long)
+ * value = the record itself (BytesWritable)
+ */
+private[spark] class FixedLengthBinaryRecordReader
+  extends RecordReader[LongWritable, BytesWritable] {
+
+  private var splitStart: Long = 0L
+  private var splitEnd: Long = 0L
+  private var currentPosition: Long = 0L
+  private var recordLength: Int = 0
+  private var fileInputStream: FSDataInputStream = null
+  private var recordKey: LongWritable = null
+  private var recordValue: BytesWritable = null
+
+  override def close() {
+    if (fileInputStream != null) {
+      fileInputStream.close()
+    }
+  }
+
+  override def getCurrentKey: LongWritable = {
+    recordKey
+  }
+
+  override def getCurrentValue: BytesWritable = {
+    recordValue
+  }
+
+  override def getProgress: Float = {
+    splitStart match {
+      case x if x == splitEnd => 0.0.toFloat
+      case _ => Math.min(
+        ((currentPosition - splitStart) / (splitEnd - splitStart)).toFloat, 1.0
+      ).toFloat
+    }
+  }
+
+  override def initialize(inputSplit: InputSplit, context: TaskAttemptContext) {
+    // the file input
+    val fileSplit = inputSplit.asInstanceOf[FileSplit]
+
+    // the byte position this fileSplit starts at
+    splitStart = fileSplit.getStart
+
+    // splitEnd byte marker that the fileSplit ends at
+    splitEnd = splitStart + fileSplit.getLength
+
+    // the actual file we will be reading from
+    val file = fileSplit.getPath
+    // job configuration
+    val job = context.getConfiguration
+    // check compression
+    val codec = new CompressionCodecFactory(job).getCodec(file)
+    if (codec != null) {
+      throw new IOException("FixedLengthRecordReader does not support reading compressed files")
+    }
+    // get the record length
+    recordLength = FixedLengthBinaryInputFormat.getRecordLength(context)
+    // get the filesystem
+    val fs = file.getFileSystem(job)
+    // open the File
+    fileInputStream = fs.open(file)
+    // seek to the splitStart position
+    fileInputStream.seek(splitStart)
+    // set our current position
+    currentPosition = splitStart
+  }
+
+  override def nextKeyValue(): Boolean = {
+    if (recordKey == null) {
+      recordKey = new LongWritable()
+    }
+    // the key is a linear index of the record, given by the
+    // position the record starts divided by the record length
+    recordKey.set(currentPosition / recordLength)
+    // the recordValue to place the bytes into
+    if (recordValue == null) {
+      recordValue = new BytesWritable(new Array[Byte](recordLength))
+    }
+    // read a record if the currentPosition is less than the split end
+    if (currentPosition < splitEnd) {
+      // setup a buffer to store the record
+      val buffer = recordValue.getBytes
+      fileInputStream.readFully(buffer)
+      // update our current position
+      currentPosition = currentPosition + recordLength
+      // return true
+      return true
+    }
+    false
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
new file mode 100644
index 0000000000000..457472547fcbb
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.input
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
+
+import scala.collection.JavaConversions._
+
+import com.google.common.io.ByteStreams
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}
+import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit}
+
+import org.apache.spark.annotation.Experimental
+
+/**
+ * A general format for reading whole files in as streams, byte arrays,
+ * or other functions to be added
+ */
+private[spark] abstract class StreamFileInputFormat[T]
+  extends CombineFileInputFormat[String, T]
+{
+  override protected def isSplitable(context: JobContext, file: Path): Boolean = false
+
+  /**
+   * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API
+   * which is set through setMaxSplitSize
+   */
+  def setMinPartitions(context: JobContext, minPartitions: Int) {
+    val files = listStatus(context)
+    val totalLen = files.map { file =>
+      if (file.isDir) 0L else file.getLen
+    }.sum
+
+    val maxSplitSize = Math.ceil(totalLen * 1.0 / files.length).toLong
+    super.setMaxSplitSize(maxSplitSize)
+  }
+
+  def createRecordReader(split: InputSplit, taContext: TaskAttemptContext): RecordReader[String, T]
+
+}
+
+/**
+ * An abstract class of [[org.apache.hadoop.mapreduce.RecordReader RecordReader]]
+ * to reading files out as streams
+ */
+private[spark] abstract class StreamBasedRecordReader[T](
+    split: CombineFileSplit,
+    context: TaskAttemptContext,
+    index: Integer)
+  extends RecordReader[String, T] {
+
+  // True means the current file has been processed, then skip it.
+  private var processed = false
+
+  private var key = ""
+  private var value: T = null.asInstanceOf[T]
+
+  override def initialize(split: InputSplit, context: TaskAttemptContext) = {}
+  override def close() = {}
+
+  override def getProgress = if (processed) 1.0f else 0.0f
+
+  override def getCurrentKey = key
+
+  override def getCurrentValue = value
+
+  override def nextKeyValue = {
+    if (!processed) {
+      val fileIn = new PortableDataStream(split, context, index)
+      value = parseStream(fileIn)
+      fileIn.close() // if it has not been open yet, close does nothing
+      key = fileIn.getPath
+      processed = true
+      true
+    } else {
+      false
+    }
+  }
+
+  /**
+   * Parse the stream (and close it afterwards) and return the value as in type T
+   * @param inStream the stream to be read in
+   * @return the data formatted as
+   */
+  def parseStream(inStream: PortableDataStream): T
+}
+
+/**
+ * Reads the record in directly as a stream for other objects to manipulate and handle
+ */
+private[spark] class StreamRecordReader(
+    split: CombineFileSplit,
+    context: TaskAttemptContext,
+    index: Integer)
+  extends StreamBasedRecordReader[PortableDataStream](split, context, index) {
+
+  def parseStream(inStream: PortableDataStream): PortableDataStream = inStream
+}
+
+/**
+ * The format for the PortableDataStream files
+ */
+private[spark] class StreamInputFormat extends StreamFileInputFormat[PortableDataStream] {
+  override def createRecordReader(split: InputSplit, taContext: TaskAttemptContext) = {
+    new CombineFileRecordReader[String, PortableDataStream](
+      split.asInstanceOf[CombineFileSplit], taContext, classOf[StreamRecordReader])
+  }
+}
+
+/**
+ * A class that allows DataStreams to be serialized and moved around by not creating them
+ * until they need to be read
+ * @note TaskAttemptContext is not serializable resulting in the confBytes construct
+ * @note CombineFileSplit is not serializable resulting in the splitBytes construct
+ */
+@Experimental
+class PortableDataStream(
+    @transient isplit: CombineFileSplit,
+    @transient context: TaskAttemptContext,
+    index: Integer)
+  extends Serializable {
+
+  // transient forces file to be reopened after being serialization
+  // it is also used for non-serializable classes
+
+  @transient private var fileIn: DataInputStream = null
+  @transient private var isOpen = false
+
+  private val confBytes = {
+    val baos = new ByteArrayOutputStream()
+    context.getConfiguration.write(new DataOutputStream(baos))
+    baos.toByteArray
+  }
+
+  private val splitBytes = {
+    val baos = new ByteArrayOutputStream()
+    isplit.write(new DataOutputStream(baos))
+    baos.toByteArray
+  }
+
+  @transient private lazy val split = {
+    val bais = new ByteArrayInputStream(splitBytes)
+    val nsplit = new CombineFileSplit()
+    nsplit.readFields(new DataInputStream(bais))
+    nsplit
+  }
+
+  @transient private lazy val conf = {
+    val bais = new ByteArrayInputStream(confBytes)
+    val nconf = new Configuration()
+    nconf.readFields(new DataInputStream(bais))
+    nconf
+  }
+  /**
+   * Calculate the path name independently of opening the file
+   */
+  @transient private lazy val path = {
+    val pathp = split.getPath(index)
+    pathp.toString
+  }
+
+  /**
+   * Create a new DataInputStream from the split and context
+   */
+  def open(): DataInputStream = {
+    if (!isOpen) {
+      val pathp = split.getPath(index)
+      val fs = pathp.getFileSystem(conf)
+      fileIn = fs.open(pathp)
+      isOpen = true
+    }
+    fileIn
+  }
+
+  /**
+   * Read the file as a byte array
+   */
+  def toArray(): Array[Byte] = {
+    open()
+    val innerBuffer = ByteStreams.toByteArray(fileIn)
+    close()
+    innerBuffer
+  }
+
+  /**
+   * Close the file (if it is currently open)
+   */
+  def close() = {
+    if (isOpen) {
+      try {
+        fileIn.close()
+        isOpen = false
+      } catch {
+        case ioe: java.io.IOException => // do nothing
+      }
+    }
+  }
+
+  def getPath(): String = path
+}
+
diff --git a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
index 4cb450577796a..d3601cca832b2 100644
--- a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
+++ b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
@@ -19,14 +19,13 @@ package org.apache.spark.input
 
 import scala.collection.JavaConversions._
 
+import org.apache.hadoop.conf.{Configuration, Configurable}
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.InputSplit
 import org.apache.hadoop.mapreduce.JobContext
 import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat
 import org.apache.hadoop.mapreduce.RecordReader
 import org.apache.hadoop.mapreduce.TaskAttemptContext
-import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader
-import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
 
 /**
  * A [[org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat CombineFileInputFormat]] for
@@ -34,23 +33,31 @@ import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
  * the value is the entire content of file.
  */
 
-private[spark] class WholeTextFileInputFormat extends CombineFileInputFormat[String, String] {
+private[spark] class WholeTextFileInputFormat
+  extends CombineFileInputFormat[String, String] with Configurable {
+
   override protected def isSplitable(context: JobContext, file: Path): Boolean = false
 
+  private var conf: Configuration = _
+  def setConf(c: Configuration) {
+    conf = c
+  }
+  def getConf: Configuration = conf
+
   override def createRecordReader(
       split: InputSplit,
       context: TaskAttemptContext): RecordReader[String, String] = {
 
-    new CombineFileRecordReader[String, String](
-      split.asInstanceOf[CombineFileSplit],
-      context,
-      classOf[WholeTextFileRecordReader])
+    val reader = new WholeCombineFileRecordReader(split, context)
+    reader.setConf(conf)
+    reader
   }
 
   /**
-   * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API.
+   * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API,
+   * which is set through setMaxSplitSize
    */
-  def setMaxSplitSize(context: JobContext, minPartitions: Int) {
+  def setMinPartitions(context: JobContext, minPartitions: Int) {
     val files = listStatus(context)
     val totalLen = files.map { file =>
       if (file.isDir) 0L else file.getLen
diff --git a/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala b/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
index c3dabd2e79995..6d59b24eb0596 100644
--- a/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
+++ b/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.input
 
+import org.apache.hadoop.conf.{Configuration, Configurable}
 import com.google.common.io.{ByteStreams, Closeables}
 
 import org.apache.hadoop.io.Text
+import org.apache.hadoop.io.compress.CompressionCodecFactory
 import org.apache.hadoop.mapreduce.InputSplit
-import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
+import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, CombineFileRecordReader}
 import org.apache.hadoop.mapreduce.RecordReader
 import org.apache.hadoop.mapreduce.TaskAttemptContext
 
@@ -34,35 +36,47 @@ private[spark] class WholeTextFileRecordReader(
     split: CombineFileSplit,
     context: TaskAttemptContext,
     index: Integer)
-  extends RecordReader[String, String] {
+  extends RecordReader[String, String] with Configurable {
 
-  private val path = split.getPath(index)
-  private val fs = path.getFileSystem(context.getConfiguration)
+  private var conf: Configuration = _
+  def setConf(c: Configuration) {
+    conf = c
+  }
+  def getConf: Configuration = conf
+
+  private[this] val path = split.getPath(index)
+  private[this] val fs = path.getFileSystem(context.getConfiguration)
 
   // True means the current file has been processed, then skip it.
-  private var processed = false
+  private[this] var processed = false
 
-  private val key = path.toString
-  private var value: String = null
+  private[this] val key = path.toString
+  private[this] var value: String = null
 
-  override def initialize(split: InputSplit, context: TaskAttemptContext) = {}
+  override def initialize(split: InputSplit, context: TaskAttemptContext): Unit = {}
 
-  override def close() = {}
+  override def close(): Unit = {}
 
-  override def getProgress = if (processed) 1.0f else 0.0f
+  override def getProgress: Float = if (processed) 1.0f else 0.0f
 
-  override def getCurrentKey = key
+  override def getCurrentKey: String = key
 
-  override def getCurrentValue = value
+  override def getCurrentValue: String = value
 
-  override def nextKeyValue = {
+  override def nextKeyValue(): Boolean = {
     if (!processed) {
+      val conf = new Configuration
+      val factory = new CompressionCodecFactory(conf)
+      val codec = factory.getCodec(path)  // infers from file ext.
       val fileIn = fs.open(path)
-      val innerBuffer = ByteStreams.toByteArray(fileIn)
+      val innerBuffer = if (codec != null) {
+        ByteStreams.toByteArray(codec.createInputStream(fileIn))
+      } else {
+        ByteStreams.toByteArray(fileIn)
+      }
 
       value = new Text(innerBuffer).toString
       Closeables.close(fileIn, false)
-
       processed = true
       true
     } else {
@@ -70,3 +84,33 @@ private[spark] class WholeTextFileRecordReader(
     }
   }
 }
+
+
+/**
+ * A [[org.apache.hadoop.mapreduce.RecordReader RecordReader]] for reading a single whole text file
+ * out in a key-value pair, where the key is the file path and the value is the entire content of
+ * the file.
+ */
+private[spark] class WholeCombineFileRecordReader(
+    split: InputSplit,
+    context: TaskAttemptContext)
+  extends CombineFileRecordReader[String, String](
+    split.asInstanceOf[CombineFileSplit],
+    context,
+    classOf[WholeTextFileRecordReader]
+  ) with Configurable {
+
+  private var conf: Configuration = _
+  def setConf(c: Configuration) {
+    conf = c
+  }
+  def getConf: Configuration = conf
+
+  override def initNextRecordReader(): Boolean = {
+    val r = super.initNextRecordReader()
+    if (r) {
+      this.curReader.asInstanceOf[WholeTextFileRecordReader].setConf(conf)
+    }
+    r
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 4b0fe1ab82999..1ac7f4e448eb1 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -20,6 +20,7 @@ package org.apache.spark.io
 import java.io.{InputStream, OutputStream}
 
 import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
+import net.jpountz.lz4.{LZ4BlockInputStream, LZ4BlockOutputStream}
 import org.xerial.snappy.{SnappyInputStream, SnappyOutputStream}
 
 import org.apache.spark.SparkConf
@@ -45,17 +46,46 @@ trait CompressionCodec {
 
 
 private[spark] object CompressionCodec {
+
+  private val shortCompressionCodecNames = Map(
+    "lz4" -> classOf[LZ4CompressionCodec].getName,
+    "lzf" -> classOf[LZFCompressionCodec].getName,
+    "snappy" -> classOf[SnappyCompressionCodec].getName)
+
   def createCodec(conf: SparkConf): CompressionCodec = {
     createCodec(conf, conf.get("spark.io.compression.codec", DEFAULT_COMPRESSION_CODEC))
   }
 
   def createCodec(conf: SparkConf, codecName: String): CompressionCodec = {
-    val ctor = Class.forName(codecName, true, Utils.getContextOrSparkClassLoader)
+    val codecClass = shortCompressionCodecNames.getOrElse(codecName.toLowerCase, codecName)
+    val ctor = Class.forName(codecClass, true, Utils.getContextOrSparkClassLoader)
       .getConstructor(classOf[SparkConf])
     ctor.newInstance(conf).asInstanceOf[CompressionCodec]
   }
 
-  val DEFAULT_COMPRESSION_CODEC = classOf[LZFCompressionCodec].getName
+  val DEFAULT_COMPRESSION_CODEC = "snappy"
+  val ALL_COMPRESSION_CODECS = shortCompressionCodecNames.values.toSeq
+}
+
+
+/**
+ * :: DeveloperApi ::
+ * LZ4 implementation of [[org.apache.spark.io.CompressionCodec]].
+ * Block size can be configured by `spark.io.compression.lz4.block.size`.
+ *
+ * Note: The wire protocol for this codec is not guaranteed to be compatible across versions
+ *       of Spark. This is intended for use as an internal compression utility within a single Spark
+ *       application.
+ */
+@DeveloperApi
+class LZ4CompressionCodec(conf: SparkConf) extends CompressionCodec {
+
+  override def compressedOutputStream(s: OutputStream): OutputStream = {
+    val blockSize = conf.getInt("spark.io.compression.lz4.block.size", 32768)
+    new LZ4BlockOutputStream(s, blockSize)
+  }
+
+  override def compressedInputStream(s: InputStream): InputStream = new LZ4BlockInputStream(s)
 }
 
 
@@ -81,7 +111,7 @@ class LZFCompressionCodec(conf: SparkConf) extends CompressionCodec {
 /**
  * :: DeveloperApi ::
  * Snappy implementation of [[org.apache.spark.io.CompressionCodec]].
- * Block size can be configured by spark.io.compression.snappy.block.size.
+ * Block size can be configured by `spark.io.compression.snappy.block.size`.
  *
  * Note: The wire protocol for this codec is not guaranteed to be compatible across versions
  *       of Spark. This is intended for use as an internal compression utility within a single Spark
diff --git a/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala b/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
similarity index 79%
rename from core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala
rename to core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
index 0c47afae54c8b..21b782edd2a9e 100644
--- a/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala
+++ b/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
@@ -15,15 +15,24 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.mapred
+package org.apache.spark.mapred
 
-private[apache]
+import java.lang.reflect.Modifier
+
+import org.apache.hadoop.mapred.{TaskAttemptID, JobID, JobConf, JobContext, TaskAttemptContext}
+
+private[spark]
 trait SparkHadoopMapRedUtil {
   def newJobContext(conf: JobConf, jobId: JobID): JobContext = {
     val klass = firstAvailableClass("org.apache.hadoop.mapred.JobContextImpl",
       "org.apache.hadoop.mapred.JobContext")
     val ctor = klass.getDeclaredConstructor(classOf[JobConf],
       classOf[org.apache.hadoop.mapreduce.JobID])
+    // In Hadoop 1.0.x, JobContext is an interface, and JobContextImpl is package private.
+    // Make it accessible if it's not in order to access it.
+    if (!Modifier.isPublic(ctor.getModifiers)) {
+      ctor.setAccessible(true)
+    }
     ctor.newInstance(conf, jobId).asInstanceOf[JobContext]
   }
 
@@ -31,6 +40,10 @@ trait SparkHadoopMapRedUtil {
     val klass = firstAvailableClass("org.apache.hadoop.mapred.TaskAttemptContextImpl",
       "org.apache.hadoop.mapred.TaskAttemptContext")
     val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[TaskAttemptID])
+    // See above
+    if (!Modifier.isPublic(ctor.getModifiers)) {
+      ctor.setAccessible(true)
+    }
     ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]
   }
 
diff --git a/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
similarity index 96%
rename from core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala
rename to core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
index 1fca5729c6092..3340673f91156 100644
--- a/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala
+++ b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
@@ -15,13 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.mapreduce
+package org.apache.spark.mapreduce
 
 import java.lang.{Boolean => JBoolean, Integer => JInteger}
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.mapreduce.{JobContext, JobID, TaskAttemptContext, TaskAttemptID}
 
-private[apache]
+private[spark]
 trait SparkHadoopMapReduceUtil {
   def newJobContext(conf: Configuration, jobId: JobID): JobContext = {
     val klass = firstAvailableClass(
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index 651511da1b7fe..5dd67b0cbf683 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -63,15 +63,18 @@ import org.apache.spark.metrics.source.Source
  *
  * [options] is the specific property of this source or sink.
  */
-private[spark] class MetricsSystem private (val instance: String,
-    conf: SparkConf, securityMgr: SecurityManager) extends Logging {
+private[spark] class MetricsSystem private (
+    val instance: String,
+    conf: SparkConf,
+    securityMgr: SecurityManager)
+  extends Logging {
 
-  val confFile = conf.get("spark.metrics.conf", null)
-  val metricsConfig = new MetricsConfig(Option(confFile))
+  private[this] val confFile = conf.get("spark.metrics.conf", null)
+  private[this] val metricsConfig = new MetricsConfig(Option(confFile))
 
-  val sinks = new mutable.ArrayBuffer[Sink]
-  val sources = new mutable.ArrayBuffer[Source]
-  val registry = new MetricRegistry()
+  private val sinks = new mutable.ArrayBuffer[Sink]
+  private val sources = new mutable.ArrayBuffer[Source]
+  private val registry = new MetricRegistry()
 
   // Treat MetricsServlet as a special sink as it should be exposed to add handlers to web ui
   private var metricsServlet: Option[MetricsServlet] = None
@@ -80,10 +83,10 @@ private[spark] class MetricsSystem private (val instance: String,
   def getServletHandlers = metricsServlet.map(_.getHandlers).getOrElse(Array())
 
   metricsConfig.initialize()
-  registerSources()
-  registerSinks()
 
   def start() {
+    registerSources()
+    registerSinks()
     sinks.foreach(_.start)
   }
 
@@ -91,10 +94,43 @@ private[spark] class MetricsSystem private (val instance: String,
     sinks.foreach(_.stop)
   }
 
+  def report() {
+    sinks.foreach(_.report())
+  }
+
+  /**
+   * Build a name that uniquely identifies each metric source.
+   * The name is structured as follows: <app ID>.<executor ID (or "driver")>.<source name>.
+   * If either ID is not available, this defaults to just using <source name>.
+   *
+   * @param source Metric source to be named by this method.
+   * @return An unique metric name for each combination of
+   *         application, executor/driver and metric source.
+   */
+  def buildRegistryName(source: Source): String = {
+    val appId = conf.getOption("spark.app.id")
+    val executorId = conf.getOption("spark.executor.id")
+    val defaultName = MetricRegistry.name(source.sourceName)
+
+    if (instance == "driver" || instance == "executor") {
+      if (appId.isDefined && executorId.isDefined) {
+        MetricRegistry.name(appId.get, executorId.get, source.sourceName)
+      } else {
+        // Only Driver and Executor are set spark.app.id and spark.executor.id.
+        // For instance, Master and Worker are not related to a specific application.
+        val warningMsg = s"Using default name $defaultName for source because %s is not set."
+        if (appId.isEmpty) { logWarning(warningMsg.format("spark.app.id")) }
+        if (executorId.isEmpty) { logWarning(warningMsg.format("spark.executor.id")) }
+        defaultName
+      }
+    } else { defaultName }
+  }
+
   def registerSource(source: Source) {
     sources += source
     try {
-      registry.register(source.sourceName, source.metricRegistry)
+      val regName = buildRegistryName(source)
+      registry.register(regName, source.metricRegistry)
     } catch {
       case e: IllegalArgumentException => logInfo("Metrics already registered", e)
     }
@@ -102,8 +138,9 @@ private[spark] class MetricsSystem private (val instance: String,
 
   def removeSource(source: Source) {
     sources -= source
+    val regName = buildRegistryName(source)
     registry.removeMatching(new MetricFilter {
-      def matches(name: String, metric: Metric): Boolean = name.startsWith(source.sourceName)
+      def matches(name: String, metric: Metric): Boolean = name.startsWith(regName)
     })
   }
 
@@ -118,7 +155,7 @@ private[spark] class MetricsSystem private (val instance: String,
         val source = Class.forName(classPath).newInstance()
         registerSource(source.asInstanceOf[Source])
       } catch {
-        case e: Exception => logError("Source class " + classPath + " cannot be instantialized", e)
+        case e: Exception => logError("Source class " + classPath + " cannot be instantiated", e)
       }
     }
   }
@@ -151,8 +188,8 @@ private[spark] object MetricsSystem {
   val SINK_REGEX = "^sink\\.(.+)\\.(.+)".r
   val SOURCE_REGEX = "^source\\.(.+)\\.(.+)".r
 
-  val MINIMAL_POLL_UNIT = TimeUnit.SECONDS
-  val MINIMAL_POLL_PERIOD = 1
+  private[this] val MINIMAL_POLL_UNIT = TimeUnit.SECONDS
+  private[this] val MINIMAL_POLL_PERIOD = 1
 
   def checkMinimalPollingPeriod(pollUnit: TimeUnit, pollPeriod: Int) {
     val period = MINIMAL_POLL_UNIT.convert(pollPeriod, pollUnit)
@@ -162,7 +199,8 @@ private[spark] object MetricsSystem {
     }
   }
 
-  def createMetricsSystem(instance: String, conf: SparkConf,
-      securityMgr: SecurityManager): MetricsSystem =
+  def createMetricsSystem(
+      instance: String, conf: SparkConf, securityMgr: SecurityManager): MetricsSystem = {
     new MetricsSystem(instance, conf, securityMgr)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
index 05852f1f98993..81b9056b40fb8 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
@@ -57,5 +57,9 @@ private[spark] class ConsoleSink(val property: Properties, val registry: MetricR
   override def stop() {
     reporter.stop()
   }
+
+  override def report() {
+    reporter.report()
+  }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
index 542dce65366b2..9d5f2ae9328ad 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
@@ -66,5 +66,9 @@ private[spark] class CsvSink(val property: Properties, val registry: MetricRegis
   override def stop() {
     reporter.stop()
   }
+
+  override def report() {
+    reporter.report()
+  }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
index aeb4ad44a0647..d7b5f5c40efae 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
@@ -81,4 +81,8 @@ private[spark] class GraphiteSink(val property: Properties, val registry: Metric
   override def stop() {
     reporter.stop()
   }
+
+  override def report() {
+    reporter.report()
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
index ed27234b4e760..2588fe2c9edb8 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
@@ -35,4 +35,6 @@ private[spark] class JmxSink(val property: Properties, val registry: MetricRegis
     reporter.stop()
   }
 
+  override def report() { }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
index 571539ba5e467..2f65bc8b46609 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
@@ -57,4 +57,6 @@ private[spark] class MetricsServlet(val property: Properties, val registry: Metr
   override def start() { }
 
   override def stop() { }
+
+  override def report() { }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala
index 6f2b5a06027ea..0d83d8c425ca4 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala
@@ -20,4 +20,5 @@ package org.apache.spark.metrics.sink
 private[spark] trait Sink {
   def start: Unit
   def stop: Unit
+  def report(): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala b/core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala
index f865f9648a91e..635bff2cd7ec8 100644
--- a/core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala
@@ -21,12 +21,9 @@ import com.codahale.metrics.MetricRegistry
 import com.codahale.metrics.jvm.{GarbageCollectorMetricSet, MemoryUsageGaugeSet}
 
 private[spark] class JvmSource extends Source {
-  val sourceName = "jvm"
-  val metricRegistry = new MetricRegistry()
+  override val sourceName = "jvm"
+  override val metricRegistry = new MetricRegistry()
 
-  val gcMetricSet = new GarbageCollectorMetricSet
-  val memGaugeSet = new MemoryUsageGaugeSet
-
-  metricRegistry.registerAll(gcMetricSet)
-  metricRegistry.registerAll(memGaugeSet)
+  metricRegistry.registerAll(new GarbageCollectorMetricSet)
+  metricRegistry.registerAll(new MemoryUsageGaugeSet)
 }
diff --git a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
new file mode 100644
index 0000000000000..1745d52c81923
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network
+
+import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.storage.{BlockId, StorageLevel}
+
+private[spark]
+trait BlockDataManager {
+
+  /**
+   * Interface to get local block data. Throws an exception if the block cannot be found or
+   * cannot be read successfully.
+   */
+  def getBlockData(blockId: BlockId): ManagedBuffer
+
+  /**
+   * Put the block locally, using the given storage level.
+   */
+  def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Unit
+}
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
new file mode 100644
index 0000000000000..dcbda5a8515dd
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network
+
+import java.io.Closeable
+import java.nio.ByteBuffer
+
+import scala.concurrent.{Promise, Await, Future}
+import scala.concurrent.duration.Duration
+
+import org.apache.spark.Logging
+import org.apache.spark.network.buffer.{NioManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.shuffle.{ShuffleClient, BlockFetchingListener}
+import org.apache.spark.storage.{BlockManagerId, BlockId, StorageLevel}
+
+private[spark]
+abstract class BlockTransferService extends ShuffleClient with Closeable with Logging {
+
+  /**
+   * Initialize the transfer service by giving it the BlockDataManager that can be used to fetch
+   * local blocks or put local blocks.
+   */
+  def init(blockDataManager: BlockDataManager)
+
+  /**
+   * Tear down the transfer service.
+   */
+  def close(): Unit
+
+  /**
+   * Port number the service is listening on, available only after [[init]] is invoked.
+   */
+  def port: Int
+
+  /**
+   * Host name the service is listening on, available only after [[init]] is invoked.
+   */
+  def hostName: String
+
+  /**
+   * Fetch a sequence of blocks from a remote node asynchronously,
+   * available only after [[init]] is invoked.
+   *
+   * Note that this API takes a sequence so the implementation can batch requests, and does not
+   * return a future so the underlying implementation can invoke onBlockFetchSuccess as soon as
+   * the data of a block is fetched, rather than waiting for all blocks to be fetched.
+   */
+  override def fetchBlocks(
+      host: String,
+      port: Int,
+      execId: String,
+      blockIds: Array[String],
+      listener: BlockFetchingListener): Unit
+
+  /**
+   * Upload a single block to a remote node, available only after [[init]] is invoked.
+   */
+  def uploadBlock(
+      hostname: String,
+      port: Int,
+      execId: String,
+      blockId: BlockId,
+      blockData: ManagedBuffer,
+      level: StorageLevel): Future[Unit]
+
+  /**
+   * A special case of [[fetchBlocks]], as it fetches only one block and is blocking.
+   *
+   * It is also only available after [[init]] is invoked.
+   */
+  def fetchBlockSync(host: String, port: Int, execId: String, blockId: String): ManagedBuffer = {
+    // A monitor for the thread to wait on.
+    val result = Promise[ManagedBuffer]()
+    fetchBlocks(host, port, execId, Array(blockId),
+      new BlockFetchingListener {
+        override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = {
+          result.failure(exception)
+        }
+        override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
+          val ret = ByteBuffer.allocate(data.size.toInt)
+          ret.put(data.nioByteBuffer())
+          ret.flip()
+          result.success(new NioManagedBuffer(ret))
+        }
+      })
+
+    Await.result(result.future, Duration.Inf)
+  }
+
+  /**
+   * Upload a single block to a remote node, available only after [[init]] is invoked.
+   *
+   * This method is similar to [[uploadBlock]], except this one blocks the thread
+   * until the upload finishes.
+   */
+  def uploadBlockSync(
+      hostname: String,
+      port: Int,
+      execId: String,
+      blockId: BlockId,
+      blockData: ManagedBuffer,
+      level: StorageLevel): Unit = {
+    Await.result(uploadBlock(hostname, port, execId, blockId, blockData, level), Duration.Inf)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
deleted file mode 100644
index 8a1cdb812962e..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
+++ /dev/null
@@ -1,987 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network
-
-import java.nio._
-import java.nio.channels._
-import java.nio.channels.spi._
-import java.net._
-import java.util.concurrent.atomic.AtomicInteger
-
-import java.util.concurrent.{LinkedBlockingDeque, TimeUnit, ThreadPoolExecutor}
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
-import scala.collection.mutable.SynchronizedMap
-import scala.collection.mutable.SynchronizedQueue
-
-import scala.concurrent.{Await, ExecutionContext, Future, Promise}
-import scala.concurrent.duration._
-import scala.language.postfixOps
-
-import org.apache.spark._
-import org.apache.spark.util.{SystemClock, Utils}
-
-private[spark] class ConnectionManager(port: Int, conf: SparkConf,
-    securityManager: SecurityManager) extends Logging {
-
-  class MessageStatus(
-      val message: Message,
-      val connectionManagerId: ConnectionManagerId,
-      completionHandler: MessageStatus => Unit) {
-
-    var ackMessage: Option[Message] = None
-    var attempted = false
-    var acked = false
-
-    def markDone() { completionHandler(this) }
-  }
-
-  private val selector = SelectorProvider.provider.openSelector()
-
-  // default to 30 second timeout waiting for authentication
-  private val authTimeout = conf.getInt("spark.core.connection.auth.wait.timeout", 30)
-
-  private val handleMessageExecutor = new ThreadPoolExecutor(
-    conf.getInt("spark.core.connection.handler.threads.min", 20),
-    conf.getInt("spark.core.connection.handler.threads.max", 60),
-    conf.getInt("spark.core.connection.handler.threads.keepalive", 60), TimeUnit.SECONDS,
-    new LinkedBlockingDeque[Runnable]())
-
-  private val handleReadWriteExecutor = new ThreadPoolExecutor(
-    conf.getInt("spark.core.connection.io.threads.min", 4),
-    conf.getInt("spark.core.connection.io.threads.max", 32),
-    conf.getInt("spark.core.connection.io.threads.keepalive", 60), TimeUnit.SECONDS,
-    new LinkedBlockingDeque[Runnable]())
-
-  // Use a different, yet smaller, thread pool - infrequently used with very short lived tasks :
-  // which should be executed asap
-  private val handleConnectExecutor = new ThreadPoolExecutor(
-    conf.getInt("spark.core.connection.connect.threads.min", 1),
-    conf.getInt("spark.core.connection.connect.threads.max", 8),
-    conf.getInt("spark.core.connection.connect.threads.keepalive", 60), TimeUnit.SECONDS,
-    new LinkedBlockingDeque[Runnable]())
-
-  private val serverChannel = ServerSocketChannel.open()
-  // used to track the SendingConnections waiting to do SASL negotiation
-  private val connectionsAwaitingSasl = new HashMap[ConnectionId, SendingConnection]
-    with SynchronizedMap[ConnectionId, SendingConnection]
-  private val connectionsByKey =
-    new HashMap[SelectionKey, Connection] with SynchronizedMap[SelectionKey, Connection]
-  private val connectionsById = new HashMap[ConnectionManagerId, SendingConnection]
-    with SynchronizedMap[ConnectionManagerId, SendingConnection]
-  private val messageStatuses = new HashMap[Int, MessageStatus]
-  private val keyInterestChangeRequests = new SynchronizedQueue[(SelectionKey, Int)]
-  private val registerRequests = new SynchronizedQueue[SendingConnection]
-
-  implicit val futureExecContext = ExecutionContext.fromExecutor(
-    Utils.newDaemonCachedThreadPool("Connection manager future execution context"))
-
-  @volatile
-  private var onReceiveCallback: (BufferMessage, ConnectionManagerId) => Option[Message] = null
-
-  private val authEnabled = securityManager.isAuthenticationEnabled()
-
-  serverChannel.configureBlocking(false)
-  serverChannel.socket.setReuseAddress(true)
-  serverChannel.socket.setReceiveBufferSize(256 * 1024)
-
-  serverChannel.socket.bind(new InetSocketAddress(port))
-  serverChannel.register(selector, SelectionKey.OP_ACCEPT)
-
-  val id = new ConnectionManagerId(Utils.localHostName, serverChannel.socket.getLocalPort)
-  logInfo("Bound socket to port " + serverChannel.socket.getLocalPort() + " with id = " + id)
-
-  // used in combination with the ConnectionManagerId to create unique Connection ids
-  // to be able to track asynchronous messages
-  private val idCount: AtomicInteger = new AtomicInteger(1)
-
-  private val selectorThread = new Thread("connection-manager-thread") {
-    override def run() = ConnectionManager.this.run()
-  }
-  selectorThread.setDaemon(true)
-  selectorThread.start()
-
-  private val writeRunnableStarted: HashSet[SelectionKey] = new HashSet[SelectionKey]()
-
-  private def triggerWrite(key: SelectionKey) {
-    val conn = connectionsByKey.getOrElse(key, null)
-    if (conn == null) return
-
-    writeRunnableStarted.synchronized {
-      // So that we do not trigger more write events while processing this one.
-      // The write method will re-register when done.
-      if (conn.changeInterestForWrite()) conn.unregisterInterest()
-      if (writeRunnableStarted.contains(key)) {
-        // key.interestOps(key.interestOps() & ~ SelectionKey.OP_WRITE)
-        return
-      }
-
-      writeRunnableStarted += key
-    }
-    handleReadWriteExecutor.execute(new Runnable {
-      override def run() {
-        var register: Boolean = false
-        try {
-          register = conn.write()
-        } finally {
-          writeRunnableStarted.synchronized {
-            writeRunnableStarted -= key
-            val needReregister = register || conn.resetForceReregister()
-            if (needReregister && conn.changeInterestForWrite()) {
-              conn.registerInterest()
-            }
-          }
-        }
-      }
-    } )
-  }
-
-  private val readRunnableStarted: HashSet[SelectionKey] = new HashSet[SelectionKey]()
-
-  private def triggerRead(key: SelectionKey) {
-    val conn = connectionsByKey.getOrElse(key, null)
-    if (conn == null) return
-
-    readRunnableStarted.synchronized {
-      // So that we do not trigger more read events while processing this one.
-      // The read method will re-register when done.
-      if (conn.changeInterestForRead())conn.unregisterInterest()
-      if (readRunnableStarted.contains(key)) {
-        return
-      }
-
-      readRunnableStarted += key
-    }
-    handleReadWriteExecutor.execute(new Runnable {
-      override def run() {
-        var register: Boolean = false
-        try {
-          register = conn.read()
-        } finally {
-          readRunnableStarted.synchronized {
-            readRunnableStarted -= key
-            if (register && conn.changeInterestForRead()) {
-              conn.registerInterest()
-            }
-          }
-        }
-      }
-    } )
-  }
-
-  private def triggerConnect(key: SelectionKey) {
-    val conn = connectionsByKey.getOrElse(key, null).asInstanceOf[SendingConnection]
-    if (conn == null) return
-
-    // prevent other events from being triggered
-    // Since we are still trying to connect, we do not need to do the additional steps in
-    // triggerWrite
-    conn.changeConnectionKeyInterest(0)
-
-    handleConnectExecutor.execute(new Runnable {
-      override def run() {
-
-        var tries: Int = 10
-        while (tries >= 0) {
-          if (conn.finishConnect(false)) return
-          // Sleep ?
-          Thread.sleep(1)
-          tries -= 1
-        }
-
-        // fallback to previous behavior : we should not really come here since this method was
-        // triggered since channel became connectable : but at times, the first finishConnect need
-        // not succeed : hence the loop to retry a few 'times'.
-        conn.finishConnect(true)
-      }
-    } )
-  }
-
-  // MUST be called within selector loop - else deadlock.
-  private def triggerForceCloseByException(key: SelectionKey, e: Exception) {
-    try {
-      key.interestOps(0)
-    } catch {
-      // ignore exceptions
-      case e: Exception => logDebug("Ignoring exception", e)
-    }
-
-    val conn = connectionsByKey.getOrElse(key, null)
-    if (conn == null) return
-
-    // Pushing to connect threadpool
-    handleConnectExecutor.execute(new Runnable {
-      override def run() {
-        try {
-          conn.callOnExceptionCallback(e)
-        } catch {
-          // ignore exceptions
-          case e: Exception => logDebug("Ignoring exception", e)
-        }
-        try {
-          conn.close()
-        } catch {
-          // ignore exceptions
-          case e: Exception => logDebug("Ignoring exception", e)
-        }
-      }
-    })
-  }
-
-
-  def run() {
-    try {
-      while(!selectorThread.isInterrupted) {
-        while (!registerRequests.isEmpty) {
-          val conn: SendingConnection = registerRequests.dequeue()
-          addListeners(conn)
-          conn.connect()
-          addConnection(conn)
-        }
-
-        while(!keyInterestChangeRequests.isEmpty) {
-          val (key, ops) = keyInterestChangeRequests.dequeue()
-
-          try {
-            if (key.isValid) {
-              val connection = connectionsByKey.getOrElse(key, null)
-              if (connection != null) {
-                val lastOps = key.interestOps()
-                key.interestOps(ops)
-
-                // hot loop - prevent materialization of string if trace not enabled.
-                if (isTraceEnabled()) {
-                  def intToOpStr(op: Int): String = {
-                    val opStrs = ArrayBuffer[String]()
-                    if ((op & SelectionKey.OP_READ) != 0) opStrs += "READ"
-                    if ((op & SelectionKey.OP_WRITE) != 0) opStrs += "WRITE"
-                    if ((op & SelectionKey.OP_CONNECT) != 0) opStrs += "CONNECT"
-                    if ((op & SelectionKey.OP_ACCEPT) != 0) opStrs += "ACCEPT"
-                    if (opStrs.size > 0) opStrs.reduceLeft(_ + " | " + _) else " "
-                  }
-
-                  logTrace("Changed key for connection to [" +
-                    connection.getRemoteConnectionManagerId()  + "] changed from [" +
-                      intToOpStr(lastOps) + "] to [" + intToOpStr(ops) + "]")
-                }
-              }
-            } else {
-              logInfo("Key not valid ? " + key)
-              throw new CancelledKeyException()
-            }
-          } catch {
-            case e: CancelledKeyException => {
-              logInfo("key already cancelled ? " + key, e)
-              triggerForceCloseByException(key, e)
-            }
-            case e: Exception => {
-              logError("Exception processing key " + key, e)
-              triggerForceCloseByException(key, e)
-            }
-          }
-        }
-
-        val selectedKeysCount =
-          try {
-            selector.select()
-          } catch {
-            // Explicitly only dealing with CancelledKeyException here since other exceptions
-            // should be dealt with differently.
-            case e: CancelledKeyException => {
-              // Some keys within the selectors list are invalid/closed. clear them.
-              val allKeys = selector.keys().iterator()
-
-              while (allKeys.hasNext) {
-                val key = allKeys.next()
-                try {
-                  if (! key.isValid) {
-                    logInfo("Key not valid ? " + key)
-                    throw new CancelledKeyException()
-                  }
-                } catch {
-                  case e: CancelledKeyException => {
-                    logInfo("key already cancelled ? " + key, e)
-                    triggerForceCloseByException(key, e)
-                  }
-                  case e: Exception => {
-                    logError("Exception processing key " + key, e)
-                    triggerForceCloseByException(key, e)
-                  }
-                }
-              }
-            }
-            0
-          }
-
-        if (selectedKeysCount == 0) {
-          logDebug("Selector selected " + selectedKeysCount + " of " + selector.keys.size +
-            " keys")
-        }
-        if (selectorThread.isInterrupted) {
-          logInfo("Selector thread was interrupted!")
-          return
-        }
-
-        if (0 != selectedKeysCount) {
-          val selectedKeys = selector.selectedKeys().iterator()
-          while (selectedKeys.hasNext) {
-            val key = selectedKeys.next
-            selectedKeys.remove()
-            try {
-              if (key.isValid) {
-                if (key.isAcceptable) {
-                  acceptConnection(key)
-                } else
-                if (key.isConnectable) {
-                  triggerConnect(key)
-                } else
-                if (key.isReadable) {
-                  triggerRead(key)
-                } else
-                if (key.isWritable) {
-                  triggerWrite(key)
-                }
-              } else {
-                logInfo("Key not valid ? " + key)
-                throw new CancelledKeyException()
-              }
-            } catch {
-              // weird, but we saw this happening - even though key.isValid was true,
-              // key.isAcceptable would throw CancelledKeyException.
-              case e: CancelledKeyException => {
-                logInfo("key already cancelled ? " + key, e)
-                triggerForceCloseByException(key, e)
-              }
-              case e: Exception => {
-                logError("Exception processing key " + key, e)
-                triggerForceCloseByException(key, e)
-              }
-            }
-          }
-        }
-      }
-    } catch {
-      case e: Exception => logError("Error in select loop", e)
-    }
-  }
-
-  def acceptConnection(key: SelectionKey) {
-    val serverChannel = key.channel.asInstanceOf[ServerSocketChannel]
-
-    var newChannel = serverChannel.accept()
-
-    // accept them all in a tight loop. non blocking accept with no processing, should be fine
-    while (newChannel != null) {
-      try {
-        val newConnectionId = new ConnectionId(id, idCount.getAndIncrement.intValue)
-        val newConnection = new ReceivingConnection(newChannel, selector, newConnectionId)
-        newConnection.onReceive(receiveMessage)
-        addListeners(newConnection)
-        addConnection(newConnection)
-        logInfo("Accepted connection from [" + newConnection.remoteAddress.getAddress + "]")
-      } catch {
-        // might happen in case of issues with registering with selector
-        case e: Exception => logError("Error in accept loop", e)
-      }
-
-      newChannel = serverChannel.accept()
-    }
-  }
-
-  private def addListeners(connection: Connection) {
-    connection.onKeyInterestChange(changeConnectionKeyInterest)
-    connection.onException(handleConnectionError)
-    connection.onClose(removeConnection)
-  }
-
-  def addConnection(connection: Connection) {
-    connectionsByKey += ((connection.key, connection))
-  }
-
-  def removeConnection(connection: Connection) {
-    connectionsByKey -= connection.key
-
-    try {
-      connection match {
-        case sendingConnection: SendingConnection =>
-          val sendingConnectionManagerId = sendingConnection.getRemoteConnectionManagerId()
-          logInfo("Removing SendingConnection to " + sendingConnectionManagerId)
-
-          connectionsById -= sendingConnectionManagerId
-          connectionsAwaitingSasl -= connection.connectionId
-
-          messageStatuses.synchronized {
-            messageStatuses.values.filter(_.connectionManagerId == sendingConnectionManagerId)
-              .foreach(status => {
-                logInfo("Notifying " + status)
-                status.synchronized {
-                  status.attempted = true
-                  status.acked = false
-                  status.markDone()
-                }
-              })
-
-            messageStatuses.retain((i, status) => {
-              status.connectionManagerId != sendingConnectionManagerId
-            })
-          }
-        case receivingConnection: ReceivingConnection =>
-          val remoteConnectionManagerId = receivingConnection.getRemoteConnectionManagerId()
-          logInfo("Removing ReceivingConnection to " + remoteConnectionManagerId)
-
-          val sendingConnectionOpt = connectionsById.get(remoteConnectionManagerId)
-          if (!sendingConnectionOpt.isDefined) {
-            logError("Corresponding SendingConnectionManagerId not found")
-            return
-          }
-
-          val sendingConnection = sendingConnectionOpt.get
-          connectionsById -= remoteConnectionManagerId
-          sendingConnection.close()
-
-          val sendingConnectionManagerId = sendingConnection.getRemoteConnectionManagerId()
-
-          assert(sendingConnectionManagerId == remoteConnectionManagerId)
-
-          messageStatuses.synchronized {
-            for (s <- messageStatuses.values
-                 if s.connectionManagerId == sendingConnectionManagerId) {
-              logInfo("Notifying " + s)
-              s.synchronized {
-                s.attempted = true
-                s.acked = false
-                s.markDone()
-              }
-            }
-
-            messageStatuses.retain((i, status) => {
-              status.connectionManagerId != sendingConnectionManagerId
-            })
-          }
-        case _ => logError("Unsupported type of connection.")
-      }
-    } finally {
-      // So that the selection keys can be removed.
-      wakeupSelector()
-    }
-  }
-
-  def handleConnectionError(connection: Connection, e: Exception) {
-    logInfo("Handling connection error on connection to " +
-      connection.getRemoteConnectionManagerId())
-    removeConnection(connection)
-  }
-
-  def changeConnectionKeyInterest(connection: Connection, ops: Int) {
-    keyInterestChangeRequests += ((connection.key, ops))
-    // so that registerations happen !
-    wakeupSelector()
-  }
-
-  def receiveMessage(connection: Connection, message: Message) {
-    val connectionManagerId = ConnectionManagerId.fromSocketAddress(message.senderAddress)
-    logDebug("Received [" + message + "] from [" + connectionManagerId + "]")
-    val runnable = new Runnable() {
-      val creationTime = System.currentTimeMillis
-      def run() {
-        logDebug("Handler thread delay is " + (System.currentTimeMillis - creationTime) + " ms")
-        handleMessage(connectionManagerId, message, connection)
-        logDebug("Handling delay is " + (System.currentTimeMillis - creationTime) + " ms")
-      }
-    }
-    handleMessageExecutor.execute(runnable)
-    /* handleMessage(connection, message) */
-  }
-
-  private def handleClientAuthentication(
-      waitingConn: SendingConnection,
-      securityMsg: SecurityMessage,
-      connectionId : ConnectionId) {
-    if (waitingConn.isSaslComplete()) {
-      logDebug("Client sasl completed for id: "  + waitingConn.connectionId)
-      connectionsAwaitingSasl -= waitingConn.connectionId
-      waitingConn.getAuthenticated().synchronized {
-        waitingConn.getAuthenticated().notifyAll()
-      }
-      return
-    } else {
-      var replyToken : Array[Byte] = null
-      try {
-        replyToken = waitingConn.sparkSaslClient.saslResponse(securityMsg.getToken)
-        if (waitingConn.isSaslComplete()) {
-          logDebug("Client sasl completed after evaluate for id: " + waitingConn.connectionId)
-          connectionsAwaitingSasl -= waitingConn.connectionId
-          waitingConn.getAuthenticated().synchronized {
-            waitingConn.getAuthenticated().notifyAll()
-          }
-          return
-        }
-        val securityMsgResp = SecurityMessage.fromResponse(replyToken,
-          securityMsg.getConnectionId.toString)
-        val message = securityMsgResp.toBufferMessage
-        if (message == null) throw new Exception("Error creating security message")
-        sendSecurityMessage(waitingConn.getRemoteConnectionManagerId(), message)
-      } catch  {
-        case e: Exception => {
-          logError("Error handling sasl client authentication", e)
-          waitingConn.close()
-          throw new Exception("Error evaluating sasl response: " + e)
-        }
-      }
-    }
-  }
-
-  private def handleServerAuthentication(
-      connection: Connection,
-      securityMsg: SecurityMessage,
-      connectionId: ConnectionId) {
-    if (!connection.isSaslComplete()) {
-      logDebug("saslContext not established")
-      var replyToken : Array[Byte] = null
-      try {
-        connection.synchronized {
-          if (connection.sparkSaslServer == null) {
-            logDebug("Creating sasl Server")
-            connection.sparkSaslServer = new SparkSaslServer(securityManager)
-          }
-        }
-        replyToken = connection.sparkSaslServer.response(securityMsg.getToken)
-        if (connection.isSaslComplete()) {
-          logDebug("Server sasl completed: " + connection.connectionId)
-        } else {
-          logDebug("Server sasl not completed: " + connection.connectionId)
-        }
-        if (replyToken != null) {
-          val securityMsgResp = SecurityMessage.fromResponse(replyToken,
-            securityMsg.getConnectionId)
-          val message = securityMsgResp.toBufferMessage
-          if (message == null) throw new Exception("Error creating security Message")
-          sendSecurityMessage(connection.getRemoteConnectionManagerId(), message)
-        }
-      } catch {
-        case e: Exception => {
-          logError("Error in server auth negotiation: " + e)
-          // It would probably be better to send an error message telling other side auth failed
-          // but for now just close
-          connection.close()
-        }
-      }
-    } else {
-      logDebug("connection already established for this connection id: " + connection.connectionId)
-    }
-  }
-
-
-  private def handleAuthentication(conn: Connection, bufferMessage: BufferMessage): Boolean = {
-    if (bufferMessage.isSecurityNeg) {
-      logDebug("This is security neg message")
-
-      // parse as SecurityMessage
-      val securityMsg = SecurityMessage.fromBufferMessage(bufferMessage)
-      val connectionId = ConnectionId.createConnectionIdFromString(securityMsg.getConnectionId)
-
-      connectionsAwaitingSasl.get(connectionId) match {
-        case Some(waitingConn) => {
-          // Client - this must be in response to us doing Send
-          logDebug("Client handleAuth for id: " +  waitingConn.connectionId)
-          handleClientAuthentication(waitingConn, securityMsg, connectionId)
-        }
-        case None => {
-          // Server - someone sent us something and we haven't authenticated yet
-          logDebug("Server handleAuth for id: " + connectionId)
-          handleServerAuthentication(conn, securityMsg, connectionId)
-        }
-      }
-      return true
-    } else {
-      if (!conn.isSaslComplete()) {
-        // We could handle this better and tell the client we need to do authentication
-        // negotiation, but for now just ignore them.
-        logError("message sent that is not security negotiation message on connection " +
-                 "not authenticated yet, ignoring it!!")
-        return true
-      }
-    }
-    false
-  }
-
-  private def handleMessage(
-      connectionManagerId: ConnectionManagerId,
-      message: Message,
-      connection: Connection) {
-    logDebug("Handling [" + message + "] from [" + connectionManagerId + "]")
-    message match {
-      case bufferMessage: BufferMessage => {
-        if (authEnabled) {
-          val res = handleAuthentication(connection, bufferMessage)
-          if (res) {
-            // message was security negotiation so skip the rest
-            logDebug("After handleAuth result was true, returning")
-            return
-          }
-        }
-        if (bufferMessage.hasAckId()) {
-          val sentMessageStatus = messageStatuses.synchronized {
-            messageStatuses.get(bufferMessage.ackId) match {
-              case Some(status) => {
-                messageStatuses -= bufferMessage.ackId
-                status
-              }
-              case None => {
-                throw new Exception("Could not find reference for received ack message " +
-                  message.id)
-              }
-            }
-          }
-          sentMessageStatus.synchronized {
-            sentMessageStatus.ackMessage = Some(message)
-            sentMessageStatus.attempted = true
-            sentMessageStatus.acked = true
-            sentMessageStatus.markDone()
-          }
-        } else {
-          val ackMessage = if (onReceiveCallback != null) {
-            logDebug("Calling back")
-            onReceiveCallback(bufferMessage, connectionManagerId)
-          } else {
-            logDebug("Not calling back as callback is null")
-            None
-          }
-
-          if (ackMessage.isDefined) {
-            if (!ackMessage.get.isInstanceOf[BufferMessage]) {
-              logDebug("Response to " + bufferMessage + " is not a buffer message, it is of type "
-                + ackMessage.get.getClass)
-            } else if (!ackMessage.get.asInstanceOf[BufferMessage].hasAckId) {
-              logDebug("Response to " + bufferMessage + " does not have ack id set")
-              ackMessage.get.asInstanceOf[BufferMessage].ackId = bufferMessage.id
-            }
-          }
-
-          sendMessage(connectionManagerId, ackMessage.getOrElse {
-            Message.createBufferMessage(bufferMessage.id)
-          })
-        }
-      }
-      case _ => throw new Exception("Unknown type message received")
-    }
-  }
-
-  private def checkSendAuthFirst(connManagerId: ConnectionManagerId, conn: SendingConnection) {
-    // see if we need to do sasl before writing
-    // this should only be the first negotiation as the Client!!!
-    if (!conn.isSaslComplete()) {
-      conn.synchronized {
-        if (conn.sparkSaslClient == null) {
-          conn.sparkSaslClient = new SparkSaslClient(securityManager)
-          var firstResponse: Array[Byte] = null
-          try {
-            firstResponse = conn.sparkSaslClient.firstToken()
-            val securityMsg = SecurityMessage.fromResponse(firstResponse,
-              conn.connectionId.toString())
-            val message = securityMsg.toBufferMessage
-            if (message == null) throw new Exception("Error creating security message")
-            connectionsAwaitingSasl += ((conn.connectionId, conn))
-            sendSecurityMessage(connManagerId, message)
-            logDebug("adding connectionsAwaitingSasl id: " + conn.connectionId)
-          } catch {
-            case e: Exception => {
-              logError("Error getting first response from the SaslClient.", e)
-              conn.close()
-              throw new Exception("Error getting first response from the SaslClient")
-            }
-          }
-        }
-      }
-    } else {
-      logDebug("Sasl already established ")
-    }
-  }
-
-  // allow us to add messages to the inbox for doing sasl negotiating
-  private def sendSecurityMessage(connManagerId: ConnectionManagerId, message: Message) {
-    def startNewConnection(): SendingConnection = {
-      val inetSocketAddress = new InetSocketAddress(connManagerId.host, connManagerId.port)
-      val newConnectionId = new ConnectionId(id, idCount.getAndIncrement.intValue)
-      val newConnection = new SendingConnection(inetSocketAddress, selector, connManagerId,
-        newConnectionId)
-      logInfo("creating new sending connection for security! " + newConnectionId )
-      registerRequests.enqueue(newConnection)
-
-      newConnection
-    }
-    // I removed the lookupKey stuff as part of merge ... should I re-add it ?
-    // We did not find it useful in our test-env ...
-    // If we do re-add it, we should consistently use it everywhere I guess ?
-    message.senderAddress = id.toSocketAddress()
-    logTrace("Sending Security [" + message + "] to [" + connManagerId + "]")
-    val connection = connectionsById.getOrElseUpdate(connManagerId, startNewConnection())
-
-    // send security message until going connection has been authenticated
-    connection.send(message)
-
-    wakeupSelector()
-  }
-
-  private def sendMessage(connectionManagerId: ConnectionManagerId, message: Message) {
-    def startNewConnection(): SendingConnection = {
-      val inetSocketAddress = new InetSocketAddress(connectionManagerId.host,
-        connectionManagerId.port)
-      val newConnectionId = new ConnectionId(id, idCount.getAndIncrement.intValue)
-      val newConnection = new SendingConnection(inetSocketAddress, selector, connectionManagerId,
-        newConnectionId)
-      logTrace("creating new sending connection: " + newConnectionId)
-      registerRequests.enqueue(newConnection)
-
-      newConnection
-    }
-    val connection = connectionsById.getOrElseUpdate(connectionManagerId, startNewConnection())
-    if (authEnabled) {
-      checkSendAuthFirst(connectionManagerId, connection)
-    }
-    message.senderAddress = id.toSocketAddress()
-    logDebug("Before Sending [" + message + "] to [" + connectionManagerId + "]" + " " +
-      "connectionid: "  + connection.connectionId)
-
-    if (authEnabled) {
-      // if we aren't authenticated yet lets block the senders until authentication completes
-      try {
-        connection.getAuthenticated().synchronized {
-          val clock = SystemClock
-          val startTime = clock.getTime()
-
-          while (!connection.isSaslComplete()) {
-            logDebug("getAuthenticated wait connectionid: " + connection.connectionId)
-            // have timeout in case remote side never responds
-            connection.getAuthenticated().wait(500)
-            if (((clock.getTime() - startTime) >= (authTimeout * 1000))
-              && (!connection.isSaslComplete())) {
-              // took to long to authenticate the connection, something probably went wrong
-              throw new Exception("Took to long for authentication to " + connectionManagerId +
-                ", waited " + authTimeout + "seconds, failing.")
-            }
-          }
-        }
-      } catch {
-        case e: Exception => logError("Exception while waiting for authentication.", e)
-
-        // need to tell sender it failed
-        messageStatuses.synchronized {
-          val s = messageStatuses.get(message.id)
-          s match {
-            case Some(msgStatus) => {
-              messageStatuses -= message.id
-              logInfo("Notifying " + msgStatus.connectionManagerId)
-              msgStatus.synchronized {
-                msgStatus.attempted = true
-                msgStatus.acked = false
-                msgStatus.markDone()
-              }
-            }
-            case None => {
-              logError("no messageStatus for failed message id: " + message.id)
-            }
-          }
-        }
-      }
-    }
-    logDebug("Sending [" + message + "] to [" + connectionManagerId + "]")
-    connection.send(message)
-
-    wakeupSelector()
-  }
-
-  private def wakeupSelector() {
-    selector.wakeup()
-  }
-
-  def sendMessageReliably(connectionManagerId: ConnectionManagerId, message: Message)
-      : Future[Option[Message]] = {
-    val promise = Promise[Option[Message]]
-    val status = new MessageStatus(
-      message, connectionManagerId, s => promise.success(s.ackMessage))
-    messageStatuses.synchronized {
-      messageStatuses += ((message.id, status))
-    }
-    sendMessage(connectionManagerId, message)
-    promise.future
-  }
-
-  def sendMessageReliablySync(connectionManagerId: ConnectionManagerId,
-      message: Message): Option[Message] = {
-    Await.result(sendMessageReliably(connectionManagerId, message), Duration.Inf)
-  }
-
-  def onReceiveMessage(callback: (Message, ConnectionManagerId) => Option[Message]) {
-    onReceiveCallback = callback
-  }
-
-  def stop() {
-    selectorThread.interrupt()
-    selectorThread.join()
-    selector.close()
-    val connections = connectionsByKey.values
-    connections.foreach(_.close())
-    if (connectionsByKey.size != 0) {
-      logWarning("All connections not cleaned up")
-    }
-    handleMessageExecutor.shutdown()
-    handleReadWriteExecutor.shutdown()
-    handleConnectExecutor.shutdown()
-    logInfo("ConnectionManager stopped")
-  }
-}
-
-
-private[spark] object ConnectionManager {
-
-  def main(args: Array[String]) {
-    val conf = new SparkConf
-    val manager = new ConnectionManager(9999, conf, new SecurityManager(conf))
-    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      println("Received [" + msg + "] from [" + id + "]")
-      None
-    })
-
-    /* testSequentialSending(manager) */
-    /* System.gc() */
-
-    /* testParallelSending(manager) */
-    /* System.gc() */
-
-    /* testParallelDecreasingSending(manager) */
-    /* System.gc() */
-
-    testContinuousSending(manager)
-    System.gc()
-  }
-
-  def testSequentialSending(manager: ConnectionManager) {
-    println("--------------------------")
-    println("Sequential Sending")
-    println("--------------------------")
-    val size = 10 * 1024 * 1024
-    val count = 10
-
-    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
-    buffer.flip
-
-    (0 until count).map(i => {
-      val bufferMessage = Message.createBufferMessage(buffer.duplicate)
-      manager.sendMessageReliablySync(manager.id, bufferMessage)
-    })
-    println("--------------------------")
-    println()
-  }
-
-  def testParallelSending(manager: ConnectionManager) {
-    println("--------------------------")
-    println("Parallel Sending")
-    println("--------------------------")
-    val size = 10 * 1024 * 1024
-    val count = 10
-
-    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
-    buffer.flip
-
-    val startTime = System.currentTimeMillis
-    (0 until count).map(i => {
-      val bufferMessage = Message.createBufferMessage(buffer.duplicate)
-      manager.sendMessageReliably(manager.id, bufferMessage)
-    }).foreach(f => {
-      val g = Await.result(f, 1 second)
-      if (!g.isDefined) println("Failed")
-    })
-    val finishTime = System.currentTimeMillis
-
-    val mb = size * count / 1024.0 / 1024.0
-    val ms = finishTime - startTime
-    val tput = mb * 1000.0 / ms
-    println("--------------------------")
-    println("Started at " + startTime + ", finished at " + finishTime)
-    println("Sent " + count + " messages of size " + size + " in " + ms + " ms " +
-      "(" + tput + " MB/s)")
-    println("--------------------------")
-    println()
-  }
-
-  def testParallelDecreasingSending(manager: ConnectionManager) {
-    println("--------------------------")
-    println("Parallel Decreasing Sending")
-    println("--------------------------")
-    val size = 10 * 1024 * 1024
-    val count = 10
-    val buffers = Array.tabulate(count) { i =>
-      val bufferLen = size * (i + 1)
-      val bufferContent = Array.tabulate[Byte](bufferLen)(x => x.toByte)
-      ByteBuffer.allocate(bufferLen).put(bufferContent)
-    }
-    buffers.foreach(_.flip)
-    val mb = buffers.map(_.remaining).reduceLeft(_ + _) / 1024.0 / 1024.0
-
-    val startTime = System.currentTimeMillis
-    (0 until count).map(i => {
-      val bufferMessage = Message.createBufferMessage(buffers(count - 1 - i).duplicate)
-      manager.sendMessageReliably(manager.id, bufferMessage)
-    }).foreach(f => {
-      val g = Await.result(f, 1 second)
-      if (!g.isDefined) println("Failed")
-    })
-    val finishTime = System.currentTimeMillis
-
-    val ms = finishTime - startTime
-    val tput = mb * 1000.0 / ms
-    println("--------------------------")
-    /* println("Started at " + startTime + ", finished at " + finishTime) */
-    println("Sent " + mb + " MB in " + ms + " ms (" + tput + " MB/s)")
-    println("--------------------------")
-    println()
-  }
-
-  def testContinuousSending(manager: ConnectionManager) {
-    println("--------------------------")
-    println("Continuous Sending")
-    println("--------------------------")
-    val size = 10 * 1024 * 1024
-    val count = 10
-
-    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
-    buffer.flip
-
-    val startTime = System.currentTimeMillis
-    while(true) {
-      (0 until count).map(i => {
-          val bufferMessage = Message.createBufferMessage(buffer.duplicate)
-          manager.sendMessageReliably(manager.id, bufferMessage)
-        }).foreach(f => {
-          val g = Await.result(f, 1 second)
-          if (!g.isDefined) println("Failed")
-        })
-      val finishTime = System.currentTimeMillis
-      Thread.sleep(1000)
-      val mb = size * count / 1024.0 / 1024.0
-      val ms = finishTime - startTime
-      val tput = mb * 1000.0 / ms
-      println("Sent " + mb + " MB in " + ms + " ms (" + tput + " MB/s)")
-      println("--------------------------")
-      println()
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala
deleted file mode 100644
index 4894ecd41f6eb..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network
-
-import java.nio.ByteBuffer
-
-import scala.concurrent.Await
-import scala.concurrent.duration._
-import scala.io.Source
-
-import org.apache.spark._
-
-private[spark] object ConnectionManagerTest extends Logging{
-  def main(args: Array[String]) {
-    // <mesos cluster> - the master URL <slaves file> - a list slaves to run connectionTest on
-    // [num of tasks] - the number of parallel tasks to be initiated default is number of slave
-    // hosts [size of msg in MB (integer)] - the size of messages to be sent in each task,
-    // default is 10 [count] - how many times to run, default is 3 [await time in seconds] :
-    // await time (in seconds), default is 600
-    if (args.length < 2) {
-      println("Usage: ConnectionManagerTest <mesos cluster> <slaves file> [num of tasks] " +
-        "[size of msg in MB (integer)] [count] [await time in seconds)] ")
-      System.exit(1)
-    }
-
-    if (args(0).startsWith("local")) {
-      println("This runs only on a mesos cluster")
-    }
-
-    val sc = new SparkContext(args(0), "ConnectionManagerTest")
-    val slavesFile = Source.fromFile(args(1))
-    val slaves = slavesFile.mkString.split("\n")
-    slavesFile.close()
-
-    /* println("Slaves") */
-    /* slaves.foreach(println) */
-    val tasknum = if (args.length > 2) args(2).toInt else slaves.length
-    val size = ( if (args.length > 3) (args(3).toInt) else 10 ) * 1024 * 1024
-    val count = if (args.length > 4) args(4).toInt else 3
-    val awaitTime = (if (args.length > 5) args(5).toInt else 600 ).second
-    println("Running " + count + " rounds of test: " + "parallel tasks = " + tasknum + ", " +
-      "msg size = " + size/1024/1024 + " MB, awaitTime = " + awaitTime)
-    val slaveConnManagerIds = sc.parallelize(0 until tasknum, tasknum).map(
-        i => SparkEnv.get.connectionManager.id).collect()
-    println("\nSlave ConnectionManagerIds")
-    slaveConnManagerIds.foreach(println)
-    println
-
-    (0 until count).foreach(i => {
-      val resultStrs = sc.parallelize(0 until tasknum, tasknum).map(i => {
-        val connManager = SparkEnv.get.connectionManager
-        val thisConnManagerId = connManager.id
-        connManager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-          logInfo("Received [" + msg + "] from [" + id + "]")
-          None
-        })
-
-        val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
-        buffer.flip
-
-        val startTime = System.currentTimeMillis
-        val futures = slaveConnManagerIds.filter(_ != thisConnManagerId).map{ slaveConnManagerId =>
-          {
-            val bufferMessage = Message.createBufferMessage(buffer.duplicate)
-            logInfo("Sending [" + bufferMessage + "] to [" + slaveConnManagerId + "]")
-            connManager.sendMessageReliably(slaveConnManagerId, bufferMessage)
-          }
-        }
-        val results = futures.map(f => Await.result(f, awaitTime))
-        val finishTime = System.currentTimeMillis
-        Thread.sleep(5000)
-
-        val mb = size * results.size / 1024.0 / 1024.0
-        val ms = finishTime - startTime
-        val resultStr = thisConnManagerId + " Sent " + mb + " MB in " + ms + " ms at " + (mb / ms *
-          1000.0) + " MB/s"
-        logInfo(resultStr)
-        resultStr
-      }).collect()
-
-      println("---------------------")
-      println("Run " + i)
-      resultStrs.foreach(println)
-      println("---------------------")
-    })
-  }
-}
-
diff --git a/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala b/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala
deleted file mode 100644
index 53a6038a9b59e..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network
-
-import java.nio.ByteBuffer
-import org.apache.spark.{SecurityManager, SparkConf}
-
-private[spark] object ReceiverTest {
-  def main(args: Array[String]) {
-    val conf = new SparkConf
-    val manager = new ConnectionManager(9999, conf, new SecurityManager(conf))
-    println("Started connection manager with id = " + manager.id)
-
-    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      /* println("Received [" + msg + "] from [" + id + "] at " + System.currentTimeMillis) */
-      val buffer = ByteBuffer.wrap("response".getBytes("utf-8"))
-      Some(Message.createBufferMessage(buffer, msg.id))
-    })
-    Thread.currentThread.join()
-  }
-}
-
diff --git a/core/src/main/scala/org/apache/spark/network/SenderTest.scala b/core/src/main/scala/org/apache/spark/network/SenderTest.scala
deleted file mode 100644
index b8ea7c2cff9a2..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/SenderTest.scala
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network
-
-import java.nio.ByteBuffer
-import org.apache.spark.{SecurityManager, SparkConf}
-
-private[spark] object SenderTest {
-  def main(args: Array[String]) {
-
-    if (args.length < 2) {
-      println("Usage: SenderTest <target host> <target port>")
-      System.exit(1)
-    }
-
-    val targetHost = args(0)
-    val targetPort = args(1).toInt
-    val targetConnectionManagerId = new ConnectionManagerId(targetHost, targetPort)
-    val conf = new SparkConf
-    val manager = new ConnectionManager(0, conf, new SecurityManager(conf))
-    println("Started connection manager with id = " + manager.id)
-
-    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      println("Received [" + msg + "] from [" + id + "]")
-      None
-    })
-
-    val size =  100 * 1024  * 1024
-    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
-    buffer.flip
-
-    val targetServer = args(0)
-
-    val count = 100
-    (0 until count).foreach(i => {
-      val dataMessage = Message.createBufferMessage(buffer.duplicate)
-      val startTime = System.currentTimeMillis
-      /* println("Started timer at " + startTime) */
-      val responseStr = manager.sendMessageReliablySync(targetConnectionManagerId, dataMessage)
-        .map { response =>
-          val buffer = response.asInstanceOf[BufferMessage].buffers(0)
-          new String(buffer.array, "utf-8")
-        }.getOrElse("none")
-
-      val finishTime = System.currentTimeMillis
-      val mb = size / 1024.0 / 1024.0
-      val ms = finishTime - startTime
-      // val resultStr = "Sent " + mb + " MB " + targetServer + " in " + ms + " ms at " + (mb / ms
-      //  * 1000.0) + " MB/s"
-      val resultStr = "Sent " + mb + " MB " + targetServer + " in " + ms + " ms (" +
-        (mb / ms * 1000.0).toInt + "MB/s) | Response = " + responseStr
-      println(resultStr)
-    })
-  }
-}
-
diff --git a/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala b/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala
deleted file mode 100644
index 136c1912045aa..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import io.netty.buffer._
-
-import org.apache.spark.Logging
-import org.apache.spark.storage.{BlockId, TestBlockId}
-
-private[spark] class FileHeader (
-  val fileLen: Int,
-  val blockId: BlockId) extends Logging {
-
-  lazy val buffer = {
-    val buf = Unpooled.buffer()
-    buf.capacity(FileHeader.HEADER_SIZE)
-    buf.writeInt(fileLen)
-    buf.writeInt(blockId.name.length)
-    blockId.name.foreach((x: Char) => buf.writeByte(x))
-    // padding the rest of header
-    if (FileHeader.HEADER_SIZE - buf.readableBytes > 0 ) {
-      buf.writeZero(FileHeader.HEADER_SIZE - buf.readableBytes)
-    } else {
-      throw new Exception("too long header " + buf.readableBytes)
-      logInfo("too long header")
-    }
-    buf
-  }
-
-}
-
-private[spark] object FileHeader {
-
-  val HEADER_SIZE = 40
-
-  def getFileLenOffset = 0
-  def getFileLenSize = Integer.SIZE/8
-
-  def create(buf: ByteBuf): FileHeader = {
-    val length = buf.readInt
-    val idLength = buf.readInt
-    val idBuilder = new StringBuilder(idLength)
-    for (i <- 1 to idLength) {
-      idBuilder += buf.readByte().asInstanceOf[Char]
-    }
-    val blockId = BlockId(idBuilder.toString())
-    new FileHeader(length, blockId)
-  }
-
-  def main (args:Array[String]) {
-    val header = new FileHeader(25, TestBlockId("my_block"))
-    val buf = header.buffer
-    val newHeader = FileHeader.create(buf)
-    System.out.println("id=" + newHeader.blockId + ",size=" + newHeader.fileLen)
-  }
-}
-
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
new file mode 100644
index 0000000000000..b089da8596e2b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import java.nio.ByteBuffer
+
+import scala.collection.JavaConversions._
+
+import org.apache.spark.Logging
+import org.apache.spark.network.BlockDataManager
+import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
+import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
+import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock}
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.storage.{BlockId, StorageLevel}
+
+/**
+ * Serves requests to open blocks by simply registering one chunk per block requested.
+ * Handles opening and uploading arbitrary BlockManager blocks.
+ *
+ * Opened blocks are registered with the "one-for-one" strategy, meaning each Transport-layer Chunk
+ * is equivalent to one Spark-level shuffle block.
+ */
+class NettyBlockRpcServer(
+    serializer: Serializer,
+    blockManager: BlockDataManager)
+  extends RpcHandler with Logging {
+
+  private val streamManager = new OneForOneStreamManager()
+
+  override def receive(
+      client: TransportClient,
+      messageBytes: Array[Byte],
+      responseContext: RpcResponseCallback): Unit = {
+    val message = BlockTransferMessage.Decoder.fromByteArray(messageBytes)
+    logTrace(s"Received request: $message")
+
+    message match {
+      case openBlocks: OpenBlocks =>
+        val blocks: Seq[ManagedBuffer] =
+          openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData)
+        val streamId = streamManager.registerStream(blocks.iterator)
+        logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
+        responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteArray)
+
+      case uploadBlock: UploadBlock =>
+        // StorageLevel is serialized as bytes using our JavaSerializer.
+        val level: StorageLevel =
+          serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata))
+        val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))
+        blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level)
+        responseContext.onSuccess(new Array[Byte](0))
+    }
+  }
+
+  override def getStreamManager(): StreamManager = streamManager
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
new file mode 100644
index 0000000000000..0027cbb0ff1fb
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import scala.collection.JavaConversions._
+import scala.concurrent.{Future, Promise}
+
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.network._
+import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.network.client.{TransportClientBootstrap, RpcResponseCallback, TransportClientFactory}
+import org.apache.spark.network.sasl.{SaslRpcHandler, SaslClientBootstrap}
+import org.apache.spark.network.server._
+import org.apache.spark.network.shuffle.{RetryingBlockFetcher, BlockFetchingListener, OneForOneBlockFetcher}
+import org.apache.spark.network.shuffle.protocol.UploadBlock
+import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.storage.{BlockId, StorageLevel}
+import org.apache.spark.util.Utils
+
+/**
+ * A BlockTransferService that uses Netty to fetch a set of blocks at at time.
+ */
+class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManager, numCores: Int)
+  extends BlockTransferService {
+
+  // TODO: Don't use Java serialization, use a more cross-version compatible serialization format.
+  private val serializer = new JavaSerializer(conf)
+  private val authEnabled = securityManager.isAuthenticationEnabled()
+  private val transportConf = SparkTransportConf.fromSparkConf(conf, numCores)
+
+  private[this] var transportContext: TransportContext = _
+  private[this] var server: TransportServer = _
+  private[this] var clientFactory: TransportClientFactory = _
+  private[this] var appId: String = _
+
+  override def init(blockDataManager: BlockDataManager): Unit = {
+    val (rpcHandler: RpcHandler, bootstrap: Option[TransportClientBootstrap]) = {
+      val nettyRpcHandler = new NettyBlockRpcServer(serializer, blockDataManager)
+      if (!authEnabled) {
+        (nettyRpcHandler, None)
+      } else {
+        (new SaslRpcHandler(nettyRpcHandler, securityManager),
+          Some(new SaslClientBootstrap(transportConf, conf.getAppId, securityManager)))
+      }
+    }
+    transportContext = new TransportContext(transportConf, rpcHandler)
+    clientFactory = transportContext.createClientFactory(bootstrap.toList)
+    server = transportContext.createServer()
+    appId = conf.getAppId
+    logInfo("Server created on " + server.getPort)
+  }
+
+  override def fetchBlocks(
+      host: String,
+      port: Int,
+      execId: String,
+      blockIds: Array[String],
+      listener: BlockFetchingListener): Unit = {
+    logTrace(s"Fetch blocks from $host:$port (executor id $execId)")
+    try {
+      val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter {
+        override def createAndStart(blockIds: Array[String], listener: BlockFetchingListener) {
+          val client = clientFactory.createClient(host, port)
+          new OneForOneBlockFetcher(client, appId, execId, blockIds.toArray, listener).start()
+        }
+      }
+
+      val maxRetries = transportConf.maxIORetries()
+      if (maxRetries > 0) {
+        // Note this Fetcher will correctly handle maxRetries == 0; we avoid it just in case there's
+        // a bug in this code. We should remove the if statement once we're sure of the stability.
+        new RetryingBlockFetcher(transportConf, blockFetchStarter, blockIds, listener).start()
+      } else {
+        blockFetchStarter.createAndStart(blockIds, listener)
+      }
+    } catch {
+      case e: Exception =>
+        logError("Exception while beginning fetchBlocks", e)
+        blockIds.foreach(listener.onBlockFetchFailure(_, e))
+    }
+  }
+
+  override def hostName: String = Utils.localHostName()
+
+  override def port: Int = server.getPort
+
+  override def uploadBlock(
+      hostname: String,
+      port: Int,
+      execId: String,
+      blockId: BlockId,
+      blockData: ManagedBuffer,
+      level: StorageLevel): Future[Unit] = {
+    val result = Promise[Unit]()
+    val client = clientFactory.createClient(hostname, port)
+
+    // StorageLevel is serialized as bytes using our JavaSerializer. Everything else is encoded
+    // using our binary protocol.
+    val levelBytes = serializer.newInstance().serialize(level).array()
+
+    // Convert or copy nio buffer into array in order to serialize it.
+    val nioBuffer = blockData.nioByteBuffer()
+    val array = if (nioBuffer.hasArray) {
+      nioBuffer.array()
+    } else {
+      val data = new Array[Byte](nioBuffer.remaining())
+      nioBuffer.get(data)
+      data
+    }
+
+    client.sendRpc(new UploadBlock(appId, execId, blockId.toString, levelBytes, array).toByteArray,
+      new RpcResponseCallback {
+        override def onSuccess(response: Array[Byte]): Unit = {
+          logTrace(s"Successfully uploaded block $blockId")
+          result.success()
+        }
+        override def onFailure(e: Throwable): Unit = {
+          logError(s"Error while uploading block $blockId", e)
+          result.failure(e)
+        }
+      })
+
+    result.future
+  }
+
+  override def close(): Unit = {
+    server.close()
+    clientFactory.close()
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
deleted file mode 100644
index e7b2855e1ec91..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import java.util.concurrent.Executors
-
-import scala.collection.JavaConverters._
-
-import io.netty.buffer.ByteBuf
-import io.netty.channel.ChannelHandlerContext
-import io.netty.util.CharsetUtil
-
-import org.apache.spark.{Logging, SparkConf}
-import org.apache.spark.network.ConnectionManagerId
-import org.apache.spark.storage.BlockId
-
-private[spark] class ShuffleCopier(conf: SparkConf) extends Logging {
-
-  def getBlock(host: String, port: Int, blockId: BlockId,
-      resultCollectCallback: (BlockId, Long, ByteBuf) => Unit) {
-
-    val handler = new ShuffleCopier.ShuffleClientHandler(resultCollectCallback)
-    val connectTimeout = conf.getInt("spark.shuffle.netty.connect.timeout", 60000)
-    val fc = new FileClient(handler, connectTimeout)
-
-    try {
-      fc.init()
-      fc.connect(host, port)
-      fc.sendRequest(blockId.name)
-      fc.waitForClose()
-      fc.close()
-    } catch {
-      // Handle any socket-related exceptions in FileClient
-      case e: Exception => {
-        logError("Shuffle copy of block " + blockId + " from " + host + ":" + port + " failed", e)
-        handler.handleError(blockId)
-      }
-    }
-  }
-
-  def getBlock(cmId: ConnectionManagerId, blockId: BlockId,
-      resultCollectCallback: (BlockId, Long, ByteBuf) => Unit) {
-    getBlock(cmId.host, cmId.port, blockId, resultCollectCallback)
-  }
-
-  def getBlocks(cmId: ConnectionManagerId,
-    blocks: Seq[(BlockId, Long)],
-    resultCollectCallback: (BlockId, Long, ByteBuf) => Unit) {
-
-    for ((blockId, size) <- blocks) {
-      getBlock(cmId, blockId, resultCollectCallback)
-    }
-  }
-}
-
-
-private[spark] object ShuffleCopier extends Logging {
-
-  private class ShuffleClientHandler(resultCollectCallBack: (BlockId, Long, ByteBuf) => Unit)
-    extends FileClientHandler with Logging {
-
-    override def handle(ctx: ChannelHandlerContext, in: ByteBuf, header: FileHeader) {
-      logDebug("Received Block: " + header.blockId + " (" + header.fileLen + "B)")
-      resultCollectCallBack(header.blockId, header.fileLen.toLong, in.readBytes(header.fileLen))
-    }
-
-    override def handleError(blockId: BlockId) {
-      if (!isComplete) {
-        resultCollectCallBack(blockId, -1, null)
-      }
-    }
-  }
-
-  def echoResultCollectCallBack(blockId: BlockId, size: Long, content: ByteBuf) {
-    if (size != -1) {
-      logInfo("File: " + blockId + " content is : \" " + content.toString(CharsetUtil.UTF_8) + "\"")
-    }
-  }
-
-  def main(args: Array[String]) {
-    if (args.length < 3) {
-      System.err.println("Usage: ShuffleCopier <host> <port> <shuffle_block_id> <threads>")
-      System.exit(1)
-    }
-    val host = args(0)
-    val port = args(1).toInt
-    val blockId = BlockId(args(2))
-    val threads = if (args.length > 3) args(3).toInt else 10
-
-    val copiers = Executors.newFixedThreadPool(80)
-    val tasks = (for (i <- Range(0, threads)) yield {
-      Executors.callable(new Runnable() {
-        def run() {
-          val copier = new ShuffleCopier(new SparkConf)
-          copier.getBlock(host, port, blockId, echoResultCollectCallBack)
-        }
-      })
-    }).asJava
-    copiers.invokeAll(tasks)
-    copiers.shutdown()
-    System.exit(0)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala
deleted file mode 100644
index 7ef7aecc6a9fb..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import java.io.File
-
-import org.apache.spark.Logging
-import org.apache.spark.util.Utils
-import org.apache.spark.storage.{BlockId, FileSegment}
-
-private[spark] class ShuffleSender(portIn: Int, val pResolver: PathResolver) extends Logging {
-
-  val server = new FileServer(pResolver, portIn)
-  server.start()
-
-  def stop() {
-    server.stop()
-  }
-
-  def port: Int = server.getPort()
-}
-
-
-/**
- * An application for testing the shuffle sender as a standalone program.
- */
-private[spark] object ShuffleSender {
-
-  def main(args: Array[String]) {
-    if (args.length < 3) {
-      System.err.println(
-        "Usage: ShuffleSender <port> <subDirsPerLocalDir> <list of shuffle_block_directories>")
-      System.exit(1)
-    }
-
-    val port = args(0).toInt
-    val subDirsPerLocalDir = args(1).toInt
-    val localDirs = args.drop(2).map(new File(_))
-
-    val pResovler = new PathResolver {
-      override def getBlockLocation(blockId: BlockId): FileSegment = {
-        if (!blockId.isShuffle) {
-          throw new Exception("Block " + blockId + " is not a shuffle block")
-        }
-        // Figure out which local directory it hashes to, and which subdirectory in that
-        val hash = Utils.nonNegativeHash(blockId)
-        val dirId = hash % localDirs.length
-        val subDirId = (hash / localDirs.length) % subDirsPerLocalDir
-        val subDir = new File(localDirs(dirId), "%02x".format(subDirId))
-        val file = new File(subDir, blockId.name)
-        new FileSegment(file, 0, file.length())
-      }
-    }
-    val sender = new ShuffleSender(port, pResovler)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala b/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala
new file mode 100644
index 0000000000000..ce4225cae6d88
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import org.apache.spark.SparkConf
+import org.apache.spark.network.util.{TransportConf, ConfigProvider}
+
+object SparkTransportConf {
+  /**
+   * Utility for creating a [[TransportConf]] from a [[SparkConf]].
+   * @param numUsableCores if nonzero, this will restrict the server and client threads to only
+   *                       use the given number of cores, rather than all of the machine's cores.
+   *                       This restriction will only occur if these properties are not already set.
+   */
+  def fromSparkConf(_conf: SparkConf, numUsableCores: Int = 0): TransportConf = {
+    val conf = _conf.clone
+    if (numUsableCores > 0) {
+      // Only set if serverThreads/clientThreads not already set.
+      conf.set("spark.shuffle.io.serverThreads",
+        conf.get("spark.shuffle.io.serverThreads", numUsableCores.toString))
+      conf.set("spark.shuffle.io.clientThreads",
+        conf.get("spark.shuffle.io.clientThreads", numUsableCores.toString))
+    }
+    new TransportConf(new ConfigProvider {
+      override def get(name: String): String = conf.get(name)
+    })
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala b/core/src/main/scala/org/apache/spark/network/nio/BlockMessage.scala
similarity index 89%
rename from core/src/main/scala/org/apache/spark/storage/BlockMessage.scala
rename to core/src/main/scala/org/apache/spark/network/nio/BlockMessage.scala
index a2bfce7b4a0fa..b573f1a8a5fcb 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/BlockMessage.scala
@@ -15,20 +15,20 @@
  * limitations under the License.
  */
 
-package org.apache.spark.storage
+package org.apache.spark.network.nio
 
 import java.nio.ByteBuffer
 
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.StringBuilder
+import org.apache.spark.storage.{BlockId, StorageLevel, TestBlockId}
 
-import org.apache.spark.network._
+import scala.collection.mutable.{ArrayBuffer, StringBuilder}
 
+// private[spark] because we need to register them in Kryo
 private[spark] case class GetBlock(id: BlockId)
 private[spark] case class GotBlock(id: BlockId, data: ByteBuffer)
 private[spark] case class PutBlock(id: BlockId, data: ByteBuffer, level: StorageLevel)
 
-private[spark] class BlockMessage() {
+private[nio] class BlockMessage() {
   // Un-initialized: typ = 0
   // GetBlock: typ = 1
   // GotBlock: typ = 2
@@ -159,7 +159,7 @@ private[spark] class BlockMessage() {
   }
 }
 
-private[spark] object BlockMessage {
+private[nio] object BlockMessage {
   val TYPE_NON_INITIALIZED: Int = 0
   val TYPE_GET_BLOCK: Int = 1
   val TYPE_GOT_BLOCK: Int = 2
@@ -194,16 +194,4 @@ private[spark] object BlockMessage {
     newBlockMessage.set(putBlock)
     newBlockMessage
   }
-
-  def main(args: Array[String]) {
-    val B = new BlockMessage()
-    val blockId = TestBlockId("ABC")
-    B.set(new PutBlock(blockId, ByteBuffer.allocate(10), StorageLevel.MEMORY_AND_DISK_SER_2))
-    val bMsg = B.toBufferMessage
-    val C = new BlockMessage()
-    C.set(bMsg)
-
-    println(B.getId + " " + B.getLevel)
-    println(C.getId + " " + C.getLevel)
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala b/core/src/main/scala/org/apache/spark/network/nio/BlockMessageArray.scala
similarity index 97%
rename from core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala
rename to core/src/main/scala/org/apache/spark/network/nio/BlockMessageArray.scala
index 973d85c0a9b3a..a1a2c00ed1542 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/BlockMessageArray.scala
@@ -15,16 +15,16 @@
  * limitations under the License.
  */
 
-package org.apache.spark.storage
+package org.apache.spark.network.nio
 
 import java.nio.ByteBuffer
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark._
-import org.apache.spark.network._
+import org.apache.spark.storage.{StorageLevel, TestBlockId}
+
+import scala.collection.mutable.ArrayBuffer
 
-private[spark]
+private[nio]
 class BlockMessageArray(var blockMessages: Seq[BlockMessage])
   extends Seq[BlockMessage] with Logging {
 
@@ -102,7 +102,7 @@ class BlockMessageArray(var blockMessages: Seq[BlockMessage])
   }
 }
 
-private[spark] object BlockMessageArray {
+private[nio] object BlockMessageArray {
 
   def fromBufferMessage(bufferMessage: BufferMessage): BlockMessageArray = {
     val newBlockMessageArray = new BlockMessageArray()
diff --git a/core/src/main/scala/org/apache/spark/network/BufferMessage.scala b/core/src/main/scala/org/apache/spark/network/nio/BufferMessage.scala
similarity index 90%
rename from core/src/main/scala/org/apache/spark/network/BufferMessage.scala
rename to core/src/main/scala/org/apache/spark/network/nio/BufferMessage.scala
index 04df2f3b0d696..3b245c5c7a4f3 100644
--- a/core/src/main/scala/org/apache/spark/network/BufferMessage.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/BufferMessage.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.network
+package org.apache.spark.network.nio
 
 import java.nio.ByteBuffer
 
@@ -23,7 +23,8 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.storage.BlockManager
 
-private[spark]
+
+private[nio]
 class BufferMessage(id_ : Int, val buffers: ArrayBuffer[ByteBuffer], var ackId: Int)
   extends Message(Message.BUFFER_MESSAGE, id_) {
 
@@ -48,7 +49,7 @@ class BufferMessage(id_ : Int, val buffers: ArrayBuffer[ByteBuffer], var ackId:
     val security = if (isSecurityNeg) 1 else 0
     if (size == 0 && !gotChunkForSendingOnce) {
       val newChunk = new MessageChunk(
-        new MessageChunkHeader(typ, id, 0, 0, ackId, security, senderAddress), null)
+        new MessageChunkHeader(typ, id, 0, 0, ackId, hasError, security, senderAddress), null)
       gotChunkForSendingOnce = true
       return Some(newChunk)
     }
@@ -66,7 +67,8 @@ class BufferMessage(id_ : Int, val buffers: ArrayBuffer[ByteBuffer], var ackId:
         }
         buffer.position(buffer.position + newBuffer.remaining)
         val newChunk = new MessageChunk(new MessageChunkHeader(
-            typ, id, size, newBuffer.remaining, ackId, security, senderAddress), newBuffer)
+          typ, id, size, newBuffer.remaining, ackId,
+          hasError, security, senderAddress), newBuffer)
         gotChunkForSendingOnce = true
         return Some(newChunk)
       }
@@ -88,7 +90,7 @@ class BufferMessage(id_ : Int, val buffers: ArrayBuffer[ByteBuffer], var ackId:
       val newBuffer = buffer.slice().limit(chunkSize).asInstanceOf[ByteBuffer]
       buffer.position(buffer.position + newBuffer.remaining)
       val newChunk = new MessageChunk(new MessageChunkHeader(
-          typ, id, size, newBuffer.remaining, ackId, security, senderAddress), newBuffer)
+          typ, id, size, newBuffer.remaining, ackId, hasError, security, senderAddress), newBuffer)
       return Some(newChunk)
     }
     None
diff --git a/core/src/main/scala/org/apache/spark/network/Connection.scala b/core/src/main/scala/org/apache/spark/network/nio/Connection.scala
similarity index 88%
rename from core/src/main/scala/org/apache/spark/network/Connection.scala
rename to core/src/main/scala/org/apache/spark/network/nio/Connection.scala
index 5285ec82c1b64..c2d9578be7ebb 100644
--- a/core/src/main/scala/org/apache/spark/network/Connection.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/Connection.scala
@@ -15,28 +15,36 @@
  * limitations under the License.
  */
 
-package org.apache.spark.network
+package org.apache.spark.network.nio
 
 import java.net._
 import java.nio._
 import java.nio.channels._
+import java.util.concurrent.ConcurrentLinkedQueue
+import java.util.LinkedList
 
-import scala.collection.mutable.{ArrayBuffer, HashMap, Queue}
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ArrayBuffer, HashMap}
+import scala.util.control.NonFatal
 
 import org.apache.spark._
+import org.apache.spark.network.sasl.{SparkSaslClient, SparkSaslServer}
 
-private[spark]
+private[nio]
 abstract class Connection(val channel: SocketChannel, val selector: Selector,
-    val socketRemoteConnectionManagerId: ConnectionManagerId, val connectionId: ConnectionId)
+    val socketRemoteConnectionManagerId: ConnectionManagerId, val connectionId: ConnectionId,
+    val securityMgr: SecurityManager)
   extends Logging {
 
   var sparkSaslServer: SparkSaslServer = null
   var sparkSaslClient: SparkSaslClient = null
 
-  def this(channel_ : SocketChannel, selector_ : Selector, id_ : ConnectionId) = {
+  def this(channel_ : SocketChannel, selector_ : Selector, id_ : ConnectionId,
+      securityMgr_ : SecurityManager) = {
     this(channel_, selector_,
       ConnectionManagerId.fromSocketAddress(
-        channel_.socket.getRemoteSocketAddress.asInstanceOf[InetSocketAddress]), id_)
+        channel_.socket.getRemoteSocketAddress.asInstanceOf[InetSocketAddress]),
+        id_, securityMgr_)
   }
 
   channel.configureBlocking(false)
@@ -47,19 +55,11 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector,
 
   @volatile private var closed = false
   var onCloseCallback: Connection => Unit = null
-  var onExceptionCallback: (Connection, Exception) => Unit = null
+  val onExceptionCallbacks = new ConcurrentLinkedQueue[(Connection, Throwable) => Unit]
   var onKeyInterestChangeCallback: (Connection, Int) => Unit = null
 
   val remoteAddress = getRemoteAddress()
 
-  /**
-   * Used to synchronize client requests: client's work-related requests must
-   * wait until SASL authentication completes.
-   */
-  private val authenticated = new Object()
-
-  def getAuthenticated(): Object = authenticated
-
   def isSaslComplete(): Boolean
 
   def resetForceReregister(): Boolean
@@ -134,20 +134,24 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector,
     onCloseCallback = callback
   }
 
-  def onException(callback: (Connection, Exception) => Unit) {
-    onExceptionCallback = callback
+  def onException(callback: (Connection, Throwable) => Unit) {
+    onExceptionCallbacks.add(callback)
   }
 
   def onKeyInterestChange(callback: (Connection, Int) => Unit) {
     onKeyInterestChangeCallback = callback
   }
 
-  def callOnExceptionCallback(e: Exception) {
-    if (onExceptionCallback != null) {
-      onExceptionCallback(this, e)
-    } else {
-      logError("Error in connection to " + getRemoteConnectionManagerId() +
-        " and OnExceptionCallback not registered", e)
+  def callOnExceptionCallbacks(e: Throwable) {
+    onExceptionCallbacks foreach {
+      callback =>
+        try {
+          callback(this, e)
+        } catch {
+          case NonFatal(e) => {
+            logWarning("Ignored error in onExceptionCallback", e)
+          }
+        }
     }
   }
 
@@ -190,24 +194,24 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector,
 }
 
 
-private[spark]
+private[nio]
 class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
-    remoteId_ : ConnectionManagerId, id_ : ConnectionId)
-  extends Connection(SocketChannel.open, selector_, remoteId_, id_) {
+    remoteId_ : ConnectionManagerId, id_ : ConnectionId,
+    securityMgr_ : SecurityManager)
+  extends Connection(SocketChannel.open, selector_, remoteId_, id_, securityMgr_) {
 
   def isSaslComplete(): Boolean = {
     if (sparkSaslClient != null) sparkSaslClient.isComplete() else false
   }
 
   private class Outbox {
-    val messages = new Queue[Message]()
+    val messages = new LinkedList[Message]()
     val defaultChunkSize = 65536
     var nextMessageToBeUsed = 0
 
     def addMessage(message: Message) {
       messages.synchronized {
-        /* messages += message */
-        messages.enqueue(message)
+        messages.add(message)
         logDebug("Added [" + message + "] to outbox for sending to " +
           "[" + getRemoteConnectionManagerId() + "]")
       }
@@ -218,10 +222,27 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
         while (!messages.isEmpty) {
           /* nextMessageToBeUsed = nextMessageToBeUsed % messages.size */
           /* val message = messages(nextMessageToBeUsed) */
-          val message = messages.dequeue()
+
+          val message = if (securityMgr.isAuthenticationEnabled() && !isSaslComplete()) {
+            // only allow sending of security messages until sasl is complete
+            var pos = 0
+            var securityMsg: Message = null
+            while (pos < messages.size() && securityMsg == null) {
+              if (messages.get(pos).isSecurityNeg) {
+                securityMsg = messages.remove(pos)
+              }
+              pos = pos + 1
+            }
+            // didn't find any security messages and auth isn't completed so return
+            if (securityMsg == null) return None
+            securityMsg
+          } else {
+            messages.removeFirst()
+          }
+
           val chunk = message.getChunkForSending(defaultChunkSize)
           if (chunk.isDefined) {
-            messages.enqueue(message)
+            messages.add(message)
             nextMessageToBeUsed = nextMessageToBeUsed + 1
             if (!message.started) {
               logDebug(
@@ -273,6 +294,15 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
     changeConnectionKeyInterest(DEFAULT_INTEREST)
   }
 
+  def registerAfterAuth(): Unit = {
+    outbox.synchronized {
+      needForceReregister = true
+    }
+    if (channel.isConnected) {
+      registerInterest()
+    }
+  }
+
   def send(message: Message) {
     outbox.synchronized {
       outbox.addMessage(message)
@@ -301,7 +331,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
     } catch {
       case e: Exception => {
         logError("Error connecting to " + address, e)
-        callOnExceptionCallback(e)
+        callOnExceptionCallbacks(e)
       }
     }
   }
@@ -326,7 +356,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
     } catch {
       case e: Exception => {
         logWarning("Error finishing connection to " + address, e)
-        callOnExceptionCallback(e)
+        callOnExceptionCallbacks(e)
       }
     }
     true
@@ -371,7 +401,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
     } catch {
       case e: Exception => {
         logWarning("Error writing in connection to " + getRemoteConnectionManagerId(), e)
-        callOnExceptionCallback(e)
+        callOnExceptionCallbacks(e)
         close()
         return false
       }
@@ -398,7 +428,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
       case e: Exception =>
         logError("Exception while reading SendingConnection to " + getRemoteConnectionManagerId(),
           e)
-        callOnExceptionCallback(e)
+        callOnExceptionCallbacks(e)
         close()
     }
 
@@ -415,8 +445,9 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
 private[spark] class ReceivingConnection(
     channel_ : SocketChannel,
     selector_ : Selector,
-    id_ : ConnectionId)
-    extends Connection(channel_, selector_, id_) {
+    id_ : ConnectionId,
+    securityMgr_ : SecurityManager)
+    extends Connection(channel_, selector_, id_, securityMgr_) {
 
   def isSaslComplete(): Boolean = {
     if (sparkSaslServer != null) sparkSaslServer.isComplete() else false
@@ -460,7 +491,7 @@ private[spark] class ReceivingConnection(
     if (currId != null) currId else super.getRemoteConnectionManagerId()
   }
 
-  // The reciever's remote address is the local socket on remote side : which is NOT
+  // The receiver's remote address is the local socket on remote side : which is NOT
   // the connection manager id of the receiver.
   // We infer that from the messages we receive on the receiver socket.
   private def processConnectionManagerId(header: MessageChunkHeader) {
@@ -554,7 +585,7 @@ private[spark] class ReceivingConnection(
     } catch {
       case e: Exception => {
         logWarning("Error reading from connection to " + getRemoteConnectionManagerId(), e)
-        callOnExceptionCallback(e)
+        callOnExceptionCallbacks(e)
         close()
         return false
       }
diff --git a/core/src/main/scala/org/apache/spark/network/ConnectionId.scala b/core/src/main/scala/org/apache/spark/network/nio/ConnectionId.scala
similarity index 88%
rename from core/src/main/scala/org/apache/spark/network/ConnectionId.scala
rename to core/src/main/scala/org/apache/spark/network/nio/ConnectionId.scala
index d579c165a1917..764dc5e5503ed 100644
--- a/core/src/main/scala/org/apache/spark/network/ConnectionId.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/ConnectionId.scala
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.network
+package org.apache.spark.network.nio
 
-private[spark] case class ConnectionId(connectionManagerId: ConnectionManagerId, uniqId: Int) {
+private[nio] case class ConnectionId(connectionManagerId: ConnectionManagerId, uniqId: Int) {
   override def toString = connectionManagerId.host + "_" + connectionManagerId.port + "_" + uniqId
 }
 
-private[spark] object ConnectionId {
+private[nio] object ConnectionId {
 
   def createConnectionIdFromString(connectionIdString: String): ConnectionId = {
     val res = connectionIdString.split("_").map(_.trim())
diff --git a/core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala
new file mode 100644
index 0000000000000..df4b085d2251e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala
@@ -0,0 +1,1137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.nio
+
+import java.io.IOException
+import java.lang.ref.WeakReference
+import java.net._
+import java.nio._
+import java.nio.channels._
+import java.nio.channels.spi._
+import java.util.concurrent.atomic.AtomicInteger
+import java.util.concurrent.{LinkedBlockingDeque, ThreadPoolExecutor, TimeUnit}
+
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, SynchronizedMap, SynchronizedQueue}
+import scala.concurrent.duration._
+import scala.concurrent.{Await, ExecutionContext, Future, Promise}
+import scala.language.postfixOps
+
+import com.google.common.base.Charsets.UTF_8
+import io.netty.util.{Timeout, TimerTask, HashedWheelTimer}
+
+import org.apache.spark._
+import org.apache.spark.network.sasl.{SparkSaslClient, SparkSaslServer}
+import org.apache.spark.util.Utils
+
+import scala.util.Try
+import scala.util.control.NonFatal
+
+private[nio] class ConnectionManager(
+    port: Int,
+    conf: SparkConf,
+    securityManager: SecurityManager,
+    name: String = "Connection manager")
+  extends Logging {
+
+  /**
+   * Used by sendMessageReliably to track messages being sent.
+   * @param message the message that was sent
+   * @param connectionManagerId the connection manager that sent this message
+   * @param completionHandler callback that's invoked when the send has completed or failed
+   */
+  class MessageStatus(
+      val message: Message,
+      val connectionManagerId: ConnectionManagerId,
+      completionHandler: Try[Message] => Unit) {
+
+    def success(ackMessage: Message) {
+      if (ackMessage == null) {
+        failure(new NullPointerException)
+      }
+      else {
+        completionHandler(scala.util.Success(ackMessage))
+      }
+    }
+
+    def failWithoutAck() {
+      completionHandler(scala.util.Failure(new IOException("Failed without being ACK'd")))
+    }
+
+    def failure(e: Throwable) {
+      completionHandler(scala.util.Failure(e))
+    }
+  }
+
+  private val selector = SelectorProvider.provider.openSelector()
+  private val ackTimeoutMonitor =
+    new HashedWheelTimer(Utils.namedThreadFactory("AckTimeoutMonitor"))
+
+  private val ackTimeout = conf.getInt("spark.core.connection.ack.wait.timeout", 60)
+
+  private val handleMessageExecutor = new ThreadPoolExecutor(
+    conf.getInt("spark.core.connection.handler.threads.min", 20),
+    conf.getInt("spark.core.connection.handler.threads.max", 60),
+    conf.getInt("spark.core.connection.handler.threads.keepalive", 60), TimeUnit.SECONDS,
+    new LinkedBlockingDeque[Runnable](),
+    Utils.namedThreadFactory("handle-message-executor")) {
+
+    override def afterExecute(r: Runnable, t: Throwable): Unit = {
+      super.afterExecute(r, t)
+      if (t != null && NonFatal(t)) {
+        logError("Error in handleMessageExecutor is not handled properly", t)
+      }
+    }
+
+  }
+
+  private val handleReadWriteExecutor = new ThreadPoolExecutor(
+    conf.getInt("spark.core.connection.io.threads.min", 4),
+    conf.getInt("spark.core.connection.io.threads.max", 32),
+    conf.getInt("spark.core.connection.io.threads.keepalive", 60), TimeUnit.SECONDS,
+    new LinkedBlockingDeque[Runnable](),
+    Utils.namedThreadFactory("handle-read-write-executor")) {
+
+    override def afterExecute(r: Runnable, t: Throwable): Unit = {
+      super.afterExecute(r, t)
+      if (t != null && NonFatal(t)) {
+        logError("Error in handleReadWriteExecutor is not handled properly", t)
+      }
+    }
+
+  }
+
+  // Use a different, yet smaller, thread pool - infrequently used with very short lived tasks :
+  // which should be executed asap
+  private val handleConnectExecutor = new ThreadPoolExecutor(
+    conf.getInt("spark.core.connection.connect.threads.min", 1),
+    conf.getInt("spark.core.connection.connect.threads.max", 8),
+    conf.getInt("spark.core.connection.connect.threads.keepalive", 60), TimeUnit.SECONDS,
+    new LinkedBlockingDeque[Runnable](),
+    Utils.namedThreadFactory("handle-connect-executor")) {
+
+    override def afterExecute(r: Runnable, t: Throwable): Unit = {
+      super.afterExecute(r, t)
+      if (t != null && NonFatal(t)) {
+        logError("Error in handleConnectExecutor is not handled properly", t)
+      }
+    }
+
+  }
+
+  private val serverChannel = ServerSocketChannel.open()
+  // used to track the SendingConnections waiting to do SASL negotiation
+  private val connectionsAwaitingSasl = new HashMap[ConnectionId, SendingConnection]
+    with SynchronizedMap[ConnectionId, SendingConnection]
+  private val connectionsByKey =
+    new HashMap[SelectionKey, Connection] with SynchronizedMap[SelectionKey, Connection]
+  private val connectionsById = new HashMap[ConnectionManagerId, SendingConnection]
+    with SynchronizedMap[ConnectionManagerId, SendingConnection]
+  // Tracks sent messages for which we are awaiting acknowledgements.  Entries are added to this
+  // map when messages are sent and are removed when acknowledgement messages are received or when
+  // acknowledgement timeouts expire
+  private val messageStatuses = new HashMap[Int, MessageStatus]  // [MessageId, MessageStatus]
+  private val keyInterestChangeRequests = new SynchronizedQueue[(SelectionKey, Int)]
+  private val registerRequests = new SynchronizedQueue[SendingConnection]
+
+  implicit val futureExecContext = ExecutionContext.fromExecutor(
+    Utils.newDaemonCachedThreadPool("Connection manager future execution context"))
+
+  @volatile
+  private var onReceiveCallback: (BufferMessage, ConnectionManagerId) => Option[Message] = null
+
+  private val authEnabled = securityManager.isAuthenticationEnabled()
+
+  serverChannel.configureBlocking(false)
+  serverChannel.socket.setReuseAddress(true)
+  serverChannel.socket.setReceiveBufferSize(256 * 1024)
+
+  private def startService(port: Int): (ServerSocketChannel, Int) = {
+    serverChannel.socket.bind(new InetSocketAddress(port))
+    (serverChannel, serverChannel.socket.getLocalPort)
+  }
+  Utils.startServiceOnPort[ServerSocketChannel](port, startService, name)
+  serverChannel.register(selector, SelectionKey.OP_ACCEPT)
+
+  val id = new ConnectionManagerId(Utils.localHostName, serverChannel.socket.getLocalPort)
+  logInfo("Bound socket to port " + serverChannel.socket.getLocalPort() + " with id = " + id)
+
+  // used in combination with the ConnectionManagerId to create unique Connection ids
+  // to be able to track asynchronous messages
+  private val idCount: AtomicInteger = new AtomicInteger(1)
+
+  private val selectorThread = new Thread("connection-manager-thread") {
+    override def run() = ConnectionManager.this.run()
+  }
+  selectorThread.setDaemon(true)
+  selectorThread.start()
+
+  private val writeRunnableStarted: HashSet[SelectionKey] = new HashSet[SelectionKey]()
+
+  private def triggerWrite(key: SelectionKey) {
+    val conn = connectionsByKey.getOrElse(key, null)
+    if (conn == null) return
+
+    writeRunnableStarted.synchronized {
+      // So that we do not trigger more write events while processing this one.
+      // The write method will re-register when done.
+      if (conn.changeInterestForWrite()) conn.unregisterInterest()
+      if (writeRunnableStarted.contains(key)) {
+        // key.interestOps(key.interestOps() & ~ SelectionKey.OP_WRITE)
+        return
+      }
+
+      writeRunnableStarted += key
+    }
+    handleReadWriteExecutor.execute(new Runnable {
+      override def run() {
+        try {
+          var register: Boolean = false
+          try {
+            register = conn.write()
+          } finally {
+            writeRunnableStarted.synchronized {
+              writeRunnableStarted -= key
+              val needReregister = register || conn.resetForceReregister()
+              if (needReregister && conn.changeInterestForWrite()) {
+                conn.registerInterest()
+              }
+            }
+          }
+        } catch {
+          case NonFatal(e) => {
+            logError("Error when writing to " + conn.getRemoteConnectionManagerId(), e)
+            conn.callOnExceptionCallbacks(e)
+          }
+        }
+      }
+    } )
+  }
+
+  private val readRunnableStarted: HashSet[SelectionKey] = new HashSet[SelectionKey]()
+
+  private def triggerRead(key: SelectionKey) {
+    val conn = connectionsByKey.getOrElse(key, null)
+    if (conn == null) return
+
+    readRunnableStarted.synchronized {
+      // So that we do not trigger more read events while processing this one.
+      // The read method will re-register when done.
+      if (conn.changeInterestForRead())conn.unregisterInterest()
+      if (readRunnableStarted.contains(key)) {
+        return
+      }
+
+      readRunnableStarted += key
+    }
+    handleReadWriteExecutor.execute(new Runnable {
+      override def run() {
+        try {
+          var register: Boolean = false
+          try {
+            register = conn.read()
+          } finally {
+            readRunnableStarted.synchronized {
+              readRunnableStarted -= key
+              if (register && conn.changeInterestForRead()) {
+                conn.registerInterest()
+              }
+            }
+          }
+        } catch {
+          case NonFatal(e) => {
+            logError("Error when reading from " + conn.getRemoteConnectionManagerId(), e)
+            conn.callOnExceptionCallbacks(e)
+          }
+        }
+      }
+    } )
+  }
+
+  private def triggerConnect(key: SelectionKey) {
+    val conn = connectionsByKey.getOrElse(key, null).asInstanceOf[SendingConnection]
+    if (conn == null) return
+
+    // prevent other events from being triggered
+    // Since we are still trying to connect, we do not need to do the additional steps in
+    // triggerWrite
+    conn.changeConnectionKeyInterest(0)
+
+    handleConnectExecutor.execute(new Runnable {
+      override def run() {
+        try {
+          var tries: Int = 10
+          while (tries >= 0) {
+            if (conn.finishConnect(false)) return
+            // Sleep ?
+            Thread.sleep(1)
+            tries -= 1
+          }
+
+          // fallback to previous behavior : we should not really come here since this method was
+          // triggered since channel became connectable : but at times, the first finishConnect need
+          // not succeed : hence the loop to retry a few 'times'.
+          conn.finishConnect(true)
+        } catch {
+          case NonFatal(e) => {
+            logError("Error when finishConnect for " + conn.getRemoteConnectionManagerId(), e)
+            conn.callOnExceptionCallbacks(e)
+          }
+        }
+      }
+    } )
+  }
+
+  // MUST be called within selector loop - else deadlock.
+  private def triggerForceCloseByException(key: SelectionKey, e: Exception) {
+    try {
+      key.interestOps(0)
+    } catch {
+      // ignore exceptions
+      case e: Exception => logDebug("Ignoring exception", e)
+    }
+
+    val conn = connectionsByKey.getOrElse(key, null)
+    if (conn == null) return
+
+    // Pushing to connect threadpool
+    handleConnectExecutor.execute(new Runnable {
+      override def run() {
+        try {
+          conn.callOnExceptionCallbacks(e)
+        } catch {
+          // ignore exceptions
+          case NonFatal(e) => logDebug("Ignoring exception", e)
+        }
+        try {
+          conn.close()
+        } catch {
+          // ignore exceptions
+          case NonFatal(e) => logDebug("Ignoring exception", e)
+        }
+      }
+    })
+  }
+
+
+  def run() {
+    try {
+      while(!selectorThread.isInterrupted) {
+        while (!registerRequests.isEmpty) {
+          val conn: SendingConnection = registerRequests.dequeue()
+          addListeners(conn)
+          conn.connect()
+          addConnection(conn)
+        }
+
+        while(!keyInterestChangeRequests.isEmpty) {
+          val (key, ops) = keyInterestChangeRequests.dequeue()
+
+          try {
+            if (key.isValid) {
+              val connection = connectionsByKey.getOrElse(key, null)
+              if (connection != null) {
+                val lastOps = key.interestOps()
+                key.interestOps(ops)
+
+                // hot loop - prevent materialization of string if trace not enabled.
+                if (isTraceEnabled()) {
+                  def intToOpStr(op: Int): String = {
+                    val opStrs = ArrayBuffer[String]()
+                    if ((op & SelectionKey.OP_READ) != 0) opStrs += "READ"
+                    if ((op & SelectionKey.OP_WRITE) != 0) opStrs += "WRITE"
+                    if ((op & SelectionKey.OP_CONNECT) != 0) opStrs += "CONNECT"
+                    if ((op & SelectionKey.OP_ACCEPT) != 0) opStrs += "ACCEPT"
+                    if (opStrs.size > 0) opStrs.reduceLeft(_ + " | " + _) else " "
+                  }
+
+                  logTrace("Changed key for connection to [" +
+                    connection.getRemoteConnectionManagerId()  + "] changed from [" +
+                      intToOpStr(lastOps) + "] to [" + intToOpStr(ops) + "]")
+                }
+              }
+            } else {
+              logInfo("Key not valid ? " + key)
+              throw new CancelledKeyException()
+            }
+          } catch {
+            case e: CancelledKeyException => {
+              logInfo("key already cancelled ? " + key, e)
+              triggerForceCloseByException(key, e)
+            }
+            case e: Exception => {
+              logError("Exception processing key " + key, e)
+              triggerForceCloseByException(key, e)
+            }
+          }
+        }
+
+        val selectedKeysCount =
+          try {
+            selector.select()
+          } catch {
+            // Explicitly only dealing with CancelledKeyException here since other exceptions
+            // should be dealt with differently.
+            case e: CancelledKeyException => {
+              // Some keys within the selectors list are invalid/closed. clear them.
+              val allKeys = selector.keys().iterator()
+
+              while (allKeys.hasNext) {
+                val key = allKeys.next()
+                try {
+                  if (! key.isValid) {
+                    logInfo("Key not valid ? " + key)
+                    throw new CancelledKeyException()
+                  }
+                } catch {
+                  case e: CancelledKeyException => {
+                    logInfo("key already cancelled ? " + key, e)
+                    triggerForceCloseByException(key, e)
+                  }
+                  case e: Exception => {
+                    logError("Exception processing key " + key, e)
+                    triggerForceCloseByException(key, e)
+                  }
+                }
+              }
+            }
+            0
+          }
+
+        if (selectedKeysCount == 0) {
+          logDebug("Selector selected " + selectedKeysCount + " of " + selector.keys.size +
+            " keys")
+        }
+        if (selectorThread.isInterrupted) {
+          logInfo("Selector thread was interrupted!")
+          return
+        }
+
+        if (0 != selectedKeysCount) {
+          val selectedKeys = selector.selectedKeys().iterator()
+          while (selectedKeys.hasNext) {
+            val key = selectedKeys.next
+            selectedKeys.remove()
+            try {
+              if (key.isValid) {
+                if (key.isAcceptable) {
+                  acceptConnection(key)
+                } else
+                if (key.isConnectable) {
+                  triggerConnect(key)
+                } else
+                if (key.isReadable) {
+                  triggerRead(key)
+                } else
+                if (key.isWritable) {
+                  triggerWrite(key)
+                }
+              } else {
+                logInfo("Key not valid ? " + key)
+                throw new CancelledKeyException()
+              }
+            } catch {
+              // weird, but we saw this happening - even though key.isValid was true,
+              // key.isAcceptable would throw CancelledKeyException.
+              case e: CancelledKeyException => {
+                logInfo("key already cancelled ? " + key, e)
+                triggerForceCloseByException(key, e)
+              }
+              case e: Exception => {
+                logError("Exception processing key " + key, e)
+                triggerForceCloseByException(key, e)
+              }
+            }
+          }
+        }
+      }
+    } catch {
+      case e: Exception => logError("Error in select loop", e)
+    }
+  }
+
+  def acceptConnection(key: SelectionKey) {
+    val serverChannel = key.channel.asInstanceOf[ServerSocketChannel]
+
+    var newChannel = serverChannel.accept()
+
+    // accept them all in a tight loop. non blocking accept with no processing, should be fine
+    while (newChannel != null) {
+      try {
+        val newConnectionId = new ConnectionId(id, idCount.getAndIncrement.intValue)
+        val newConnection = new ReceivingConnection(newChannel, selector, newConnectionId,
+          securityManager)
+        newConnection.onReceive(receiveMessage)
+        addListeners(newConnection)
+        addConnection(newConnection)
+        logInfo("Accepted connection from [" + newConnection.remoteAddress + "]")
+      } catch {
+        // might happen in case of issues with registering with selector
+        case e: Exception => logError("Error in accept loop", e)
+      }
+
+      newChannel = serverChannel.accept()
+    }
+  }
+
+  private def addListeners(connection: Connection) {
+    connection.onKeyInterestChange(changeConnectionKeyInterest)
+    connection.onException(handleConnectionError)
+    connection.onClose(removeConnection)
+  }
+
+  def addConnection(connection: Connection) {
+    connectionsByKey += ((connection.key, connection))
+  }
+
+  def removeConnection(connection: Connection) {
+    connectionsByKey -= connection.key
+
+    try {
+      connection match {
+        case sendingConnection: SendingConnection =>
+          val sendingConnectionManagerId = sendingConnection.getRemoteConnectionManagerId()
+          logInfo("Removing SendingConnection to " + sendingConnectionManagerId)
+
+          connectionsById -= sendingConnectionManagerId
+          connectionsAwaitingSasl -= connection.connectionId
+
+          messageStatuses.synchronized {
+            messageStatuses.values.filter(_.connectionManagerId == sendingConnectionManagerId)
+              .foreach(status => {
+                logInfo("Notifying " + status)
+                status.failWithoutAck()
+              })
+
+            messageStatuses.retain((i, status) => {
+              status.connectionManagerId != sendingConnectionManagerId
+            })
+          }
+        case receivingConnection: ReceivingConnection =>
+          val remoteConnectionManagerId = receivingConnection.getRemoteConnectionManagerId()
+          logInfo("Removing ReceivingConnection to " + remoteConnectionManagerId)
+
+          val sendingConnectionOpt = connectionsById.get(remoteConnectionManagerId)
+          if (!sendingConnectionOpt.isDefined) {
+            logError(s"Corresponding SendingConnection to ${remoteConnectionManagerId} not found")
+            return
+          }
+
+          val sendingConnection = sendingConnectionOpt.get
+          connectionsById -= remoteConnectionManagerId
+          sendingConnection.close()
+
+          val sendingConnectionManagerId = sendingConnection.getRemoteConnectionManagerId()
+
+          assert(sendingConnectionManagerId == remoteConnectionManagerId)
+
+          messageStatuses.synchronized {
+            for (s <- messageStatuses.values
+                 if s.connectionManagerId == sendingConnectionManagerId) {
+              logInfo("Notifying " + s)
+              s.failWithoutAck()
+            }
+
+            messageStatuses.retain((i, status) => {
+              status.connectionManagerId != sendingConnectionManagerId
+            })
+          }
+        case _ => logError("Unsupported type of connection.")
+      }
+    } finally {
+      // So that the selection keys can be removed.
+      wakeupSelector()
+    }
+  }
+
+  def handleConnectionError(connection: Connection, e: Throwable) {
+    logInfo("Handling connection error on connection to " +
+      connection.getRemoteConnectionManagerId())
+    removeConnection(connection)
+  }
+
+  def changeConnectionKeyInterest(connection: Connection, ops: Int) {
+    keyInterestChangeRequests += ((connection.key, ops))
+    // so that registrations happen !
+    wakeupSelector()
+  }
+
+  def receiveMessage(connection: Connection, message: Message) {
+    val connectionManagerId = ConnectionManagerId.fromSocketAddress(message.senderAddress)
+    logDebug("Received [" + message + "] from [" + connectionManagerId + "]")
+    val runnable = new Runnable() {
+      val creationTime = System.currentTimeMillis
+      def run() {
+        try {
+          logDebug("Handler thread delay is " + (System.currentTimeMillis - creationTime) + " ms")
+          handleMessage(connectionManagerId, message, connection)
+          logDebug("Handling delay is " + (System.currentTimeMillis - creationTime) + " ms")
+        } catch {
+          case NonFatal(e) => {
+            logError("Error when handling messages from " +
+              connection.getRemoteConnectionManagerId(), e)
+            connection.callOnExceptionCallbacks(e)
+          }
+        }
+      }
+    }
+    handleMessageExecutor.execute(runnable)
+    /* handleMessage(connection, message) */
+  }
+
+  private def handleClientAuthentication(
+      waitingConn: SendingConnection,
+      securityMsg: SecurityMessage,
+      connectionId : ConnectionId) {
+    if (waitingConn.isSaslComplete()) {
+      logDebug("Client sasl completed for id: "  + waitingConn.connectionId)
+      connectionsAwaitingSasl -= waitingConn.connectionId
+      waitingConn.registerAfterAuth()
+      wakeupSelector()
+      return
+    } else {
+      var replyToken : Array[Byte] = null
+      try {
+        replyToken = waitingConn.sparkSaslClient.response(securityMsg.getToken)
+        if (waitingConn.isSaslComplete()) {
+          logDebug("Client sasl completed after evaluate for id: " + waitingConn.connectionId)
+          connectionsAwaitingSasl -= waitingConn.connectionId
+          waitingConn.registerAfterAuth()
+          wakeupSelector()
+          return
+        }
+        val securityMsgResp = SecurityMessage.fromResponse(replyToken,
+          securityMsg.getConnectionId.toString)
+        val message = securityMsgResp.toBufferMessage
+        if (message == null) throw new IOException("Error creating security message")
+        sendSecurityMessage(waitingConn.getRemoteConnectionManagerId(), message)
+      } catch  {
+        case e: Exception => {
+          logError("Error handling sasl client authentication", e)
+          waitingConn.close()
+          throw new IOException("Error evaluating sasl response: ", e)
+        }
+      }
+    }
+  }
+
+  private def handleServerAuthentication(
+      connection: Connection,
+      securityMsg: SecurityMessage,
+      connectionId: ConnectionId) {
+    if (!connection.isSaslComplete()) {
+      logDebug("saslContext not established")
+      var replyToken : Array[Byte] = null
+      try {
+        connection.synchronized {
+          if (connection.sparkSaslServer == null) {
+            logDebug("Creating sasl Server")
+            connection.sparkSaslServer = new SparkSaslServer(conf.getAppId, securityManager)
+          }
+        }
+        replyToken = connection.sparkSaslServer.response(securityMsg.getToken)
+        if (connection.isSaslComplete()) {
+          logDebug("Server sasl completed: " + connection.connectionId +
+            " for: " + connectionId)
+        } else {
+          logDebug("Server sasl not completed: " + connection.connectionId +
+            " for: " + connectionId)
+        }
+        if (replyToken != null) {
+          val securityMsgResp = SecurityMessage.fromResponse(replyToken,
+            securityMsg.getConnectionId)
+          val message = securityMsgResp.toBufferMessage
+          if (message == null) throw new Exception("Error creating security Message")
+          sendSecurityMessage(connection.getRemoteConnectionManagerId(), message)
+        }
+      } catch {
+        case e: Exception => {
+          logError("Error in server auth negotiation: " + e)
+          // It would probably be better to send an error message telling other side auth failed
+          // but for now just close
+          connection.close()
+        }
+      }
+    } else {
+      logDebug("connection already established for this connection id: " + connection.connectionId)
+    }
+  }
+
+
+  private def handleAuthentication(conn: Connection, bufferMessage: BufferMessage): Boolean = {
+    if (bufferMessage.isSecurityNeg) {
+      logDebug("This is security neg message")
+
+      // parse as SecurityMessage
+      val securityMsg = SecurityMessage.fromBufferMessage(bufferMessage)
+      val connectionId = ConnectionId.createConnectionIdFromString(securityMsg.getConnectionId)
+
+      connectionsAwaitingSasl.get(connectionId) match {
+        case Some(waitingConn) => {
+          // Client - this must be in response to us doing Send
+          logDebug("Client handleAuth for id: " +  waitingConn.connectionId)
+          handleClientAuthentication(waitingConn, securityMsg, connectionId)
+        }
+        case None => {
+          // Server - someone sent us something and we haven't authenticated yet
+          logDebug("Server handleAuth for id: " + connectionId)
+          handleServerAuthentication(conn, securityMsg, connectionId)
+        }
+      }
+      return true
+    } else {
+      if (!conn.isSaslComplete()) {
+        // We could handle this better and tell the client we need to do authentication
+        // negotiation, but for now just ignore them.
+        logError("message sent that is not security negotiation message on connection " +
+                 "not authenticated yet, ignoring it!!")
+        return true
+      }
+    }
+    false
+  }
+
+  private def handleMessage(
+      connectionManagerId: ConnectionManagerId,
+      message: Message,
+      connection: Connection) {
+    logDebug("Handling [" + message + "] from [" + connectionManagerId + "]")
+    message match {
+      case bufferMessage: BufferMessage => {
+        if (authEnabled) {
+          val res = handleAuthentication(connection, bufferMessage)
+          if (res) {
+            // message was security negotiation so skip the rest
+            logDebug("After handleAuth result was true, returning")
+            return
+          }
+        }
+        if (bufferMessage.hasAckId()) {
+          messageStatuses.synchronized {
+            messageStatuses.get(bufferMessage.ackId) match {
+              case Some(status) => {
+                messageStatuses -= bufferMessage.ackId
+                status.success(message)
+              }
+              case None => {
+                /**
+                 * We can fall down on this code because of following 2 cases
+                 *
+                 * (1) Invalid ack sent due to buggy code.
+                 *
+                 * (2) Late-arriving ack for a SendMessageStatus
+                 *     To avoid unwilling late-arriving ack
+                 *     caused by long pause like GC, you can set
+                 *     larger value than default to spark.core.connection.ack.wait.timeout
+                 */
+                logWarning(s"Could not find reference for received ack Message ${message.id}")
+              }
+            }
+          }
+        } else {
+          var ackMessage : Option[Message] = None
+          try {
+            ackMessage = if (onReceiveCallback != null) {
+              logDebug("Calling back")
+              onReceiveCallback(bufferMessage, connectionManagerId)
+            } else {
+              logDebug("Not calling back as callback is null")
+              None
+            }
+
+            if (ackMessage.isDefined) {
+              if (!ackMessage.get.isInstanceOf[BufferMessage]) {
+                logDebug("Response to " + bufferMessage + " is not a buffer message, it is of type "
+                  + ackMessage.get.getClass)
+              } else if (!ackMessage.get.asInstanceOf[BufferMessage].hasAckId) {
+                logDebug("Response to " + bufferMessage + " does not have ack id set")
+                ackMessage.get.asInstanceOf[BufferMessage].ackId = bufferMessage.id
+              }
+            }
+          } catch {
+            case e: Exception => {
+              logError(s"Exception was thrown while processing message", e)
+              ackMessage = Some(Message.createErrorMessage(e, bufferMessage.id))
+            }
+          } finally {
+            sendMessage(connectionManagerId, ackMessage.getOrElse {
+              Message.createBufferMessage(bufferMessage.id)
+            })
+          }
+        }
+      }
+      case _ => throw new Exception("Unknown type message received")
+    }
+  }
+
+  private def checkSendAuthFirst(connManagerId: ConnectionManagerId, conn: SendingConnection) {
+    // see if we need to do sasl before writing
+    // this should only be the first negotiation as the Client!!!
+    if (!conn.isSaslComplete()) {
+      conn.synchronized {
+        if (conn.sparkSaslClient == null) {
+          conn.sparkSaslClient = new SparkSaslClient(conf.getAppId, securityManager)
+          var firstResponse: Array[Byte] = null
+          try {
+            firstResponse = conn.sparkSaslClient.firstToken()
+            val securityMsg = SecurityMessage.fromResponse(firstResponse,
+              conn.connectionId.toString())
+            val message = securityMsg.toBufferMessage
+            if (message == null) throw new Exception("Error creating security message")
+            connectionsAwaitingSasl += ((conn.connectionId, conn))
+            sendSecurityMessage(connManagerId, message)
+            logDebug("adding connectionsAwaitingSasl id: " + conn.connectionId +
+              " to: " + connManagerId)
+          } catch {
+            case e: Exception => {
+              logError("Error getting first response from the SaslClient.", e)
+              conn.close()
+              throw new Exception("Error getting first response from the SaslClient")
+            }
+          }
+        }
+      }
+    } else {
+      logDebug("Sasl already established ")
+    }
+  }
+
+  // allow us to add messages to the inbox for doing sasl negotiating
+  private def sendSecurityMessage(connManagerId: ConnectionManagerId, message: Message) {
+    def startNewConnection(): SendingConnection = {
+      val inetSocketAddress = new InetSocketAddress(connManagerId.host, connManagerId.port)
+      val newConnectionId = new ConnectionId(id, idCount.getAndIncrement.intValue)
+      val newConnection = new SendingConnection(inetSocketAddress, selector, connManagerId,
+        newConnectionId, securityManager)
+      logInfo("creating new sending connection for security! " + newConnectionId )
+      registerRequests.enqueue(newConnection)
+
+      newConnection
+    }
+    // I removed the lookupKey stuff as part of merge ... should I re-add it ?
+    // We did not find it useful in our test-env ...
+    // If we do re-add it, we should consistently use it everywhere I guess ?
+    message.senderAddress = id.toSocketAddress()
+    logTrace("Sending Security [" + message + "] to [" + connManagerId + "]")
+    val connection = connectionsById.getOrElseUpdate(connManagerId, startNewConnection())
+
+    // send security message until going connection has been authenticated
+    connection.send(message)
+
+    wakeupSelector()
+  }
+
+  private def sendMessage(connectionManagerId: ConnectionManagerId, message: Message) {
+    def startNewConnection(): SendingConnection = {
+      val inetSocketAddress = new InetSocketAddress(connectionManagerId.host,
+        connectionManagerId.port)
+      val newConnectionId = new ConnectionId(id, idCount.getAndIncrement.intValue)
+      val newConnection = new SendingConnection(inetSocketAddress, selector, connectionManagerId,
+        newConnectionId, securityManager)
+      newConnection.onException {
+        case (conn, e) => {
+          logError("Exception while sending message.", e)
+          reportSendingMessageFailure(message.id, e)
+        }
+      }
+      logTrace("creating new sending connection: " + newConnectionId)
+      registerRequests.enqueue(newConnection)
+
+      newConnection
+    }
+    val connection = connectionsById.getOrElseUpdate(connectionManagerId, startNewConnection())
+
+    message.senderAddress = id.toSocketAddress()
+    logDebug("Before Sending [" + message + "] to [" + connectionManagerId + "]" + " " +
+      "connectionid: "  + connection.connectionId)
+
+    if (authEnabled) {
+      try {
+        checkSendAuthFirst(connectionManagerId, connection)
+      } catch {
+        case NonFatal(e) => {
+          reportSendingMessageFailure(message.id, e)
+        }
+      }
+    }
+    logDebug("Sending [" + message + "] to [" + connectionManagerId + "]")
+    connection.send(message)
+    wakeupSelector()
+  }
+
+  private def reportSendingMessageFailure(messageId: Int, e: Throwable): Unit = {
+    // need to tell sender it failed
+    messageStatuses.synchronized {
+      val s = messageStatuses.get(messageId)
+      s match {
+        case Some(msgStatus) => {
+          messageStatuses -= messageId
+          logInfo("Notifying " + msgStatus.connectionManagerId)
+          msgStatus.failure(e)
+        }
+        case None => {
+          logError("no messageStatus for failed message id: " + messageId)
+        }
+      }
+    }
+  }
+
+  private def wakeupSelector() {
+    selector.wakeup()
+  }
+
+  /**
+   * Send a message and block until an acknowledgment is received or an error occurs.
+   * @param connectionManagerId the message's destination
+   * @param message the message being sent
+   * @return a Future that either returns the acknowledgment message or captures an exception.
+   */
+  def sendMessageReliably(connectionManagerId: ConnectionManagerId, message: Message)
+      : Future[Message] = {
+    val promise = Promise[Message]()
+
+    // It's important that the TimerTask doesn't capture a reference to `message`, which can cause
+    // memory leaks since cancelled TimerTasks won't necessarily be garbage collected until the time
+    // at which they would originally be scheduled to run.  Therefore, extract the message id
+    // from outside of the TimerTask closure (see SPARK-4393 for more context).
+    val messageId = message.id
+    // Keep a weak reference to the promise so that the completed promise may be garbage-collected
+    val promiseReference = new WeakReference(promise)
+    val timeoutTask: TimerTask = new TimerTask {
+      override def run(timeout: Timeout): Unit = {
+        messageStatuses.synchronized {
+          messageStatuses.remove(messageId).foreach { s =>
+            val e = new IOException("sendMessageReliably failed because ack " +
+              s"was not received within $ackTimeout sec")
+            val p = promiseReference.get
+            if (p != null) {
+              // Attempt to fail the promise with a Timeout exception
+              if (!p.tryFailure(e)) {
+                // If we reach here, then someone else has already signalled success or failure
+                // on this promise, so log a warning:
+                logError("Ignore error because promise is completed", e)
+              }
+            } else {
+              // The WeakReference was empty, which should never happen because
+              // sendMessageReliably's caller should have a strong reference to promise.future;
+              logError("Promise was garbage collected; this should never happen!", e)
+            }
+          }
+        }
+      }
+    }
+
+    val timeoutTaskHandle = ackTimeoutMonitor.newTimeout(timeoutTask, ackTimeout, TimeUnit.SECONDS)
+
+    val status = new MessageStatus(message, connectionManagerId, s => {
+      timeoutTaskHandle.cancel()
+      s match {
+        case scala.util.Failure(e) =>
+          // Indicates a failure where we either never sent or never got ACK'd
+          if (!promise.tryFailure(e)) {
+            logWarning("Ignore error because promise is completed", e)
+          }
+        case scala.util.Success(ackMessage) =>
+          if (ackMessage.hasError) {
+            val errorMsgByteBuf = ackMessage.asInstanceOf[BufferMessage].buffers.head
+            val errorMsgBytes = new Array[Byte](errorMsgByteBuf.limit())
+            errorMsgByteBuf.get(errorMsgBytes)
+            val errorMsg = new String(errorMsgBytes, UTF_8)
+            val e = new IOException(
+              s"sendMessageReliably failed with ACK that signalled a remote error: $errorMsg")
+            if (!promise.tryFailure(e)) {
+              logWarning("Ignore error because promise is completed", e)
+            }
+          } else {
+            if (!promise.trySuccess(ackMessage)) {
+              logWarning("Drop ackMessage because promise is completed")
+            }
+          }
+      }
+    })
+    messageStatuses.synchronized {
+      messageStatuses += ((message.id, status))
+    }
+
+    sendMessage(connectionManagerId, message)
+    promise.future
+  }
+
+  def onReceiveMessage(callback: (Message, ConnectionManagerId) => Option[Message]) {
+    onReceiveCallback = callback
+  }
+
+  def stop() {
+    ackTimeoutMonitor.stop()
+    selectorThread.interrupt()
+    selectorThread.join()
+    selector.close()
+    val connections = connectionsByKey.values
+    connections.foreach(_.close())
+    if (connectionsByKey.size != 0) {
+      logWarning("All connections not cleaned up")
+    }
+    handleMessageExecutor.shutdown()
+    handleReadWriteExecutor.shutdown()
+    handleConnectExecutor.shutdown()
+    logInfo("ConnectionManager stopped")
+  }
+}
+
+
+private[spark] object ConnectionManager {
+  import scala.concurrent.ExecutionContext.Implicits.global
+
+  def main(args: Array[String]) {
+    val conf = new SparkConf
+    val manager = new ConnectionManager(9999, conf, new SecurityManager(conf))
+    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      println("Received [" + msg + "] from [" + id + "]")
+      None
+    })
+
+    /* testSequentialSending(manager) */
+    /* System.gc() */
+
+    /* testParallelSending(manager) */
+    /* System.gc() */
+
+    /* testParallelDecreasingSending(manager) */
+    /* System.gc() */
+
+    testContinuousSending(manager)
+    System.gc()
+  }
+
+  def testSequentialSending(manager: ConnectionManager) {
+    println("--------------------------")
+    println("Sequential Sending")
+    println("--------------------------")
+    val size = 10 * 1024 * 1024
+    val count = 10
+
+    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
+    buffer.flip
+
+    (0 until count).map(i => {
+      val bufferMessage = Message.createBufferMessage(buffer.duplicate)
+      Await.result(manager.sendMessageReliably(manager.id, bufferMessage), Duration.Inf)
+    })
+    println("--------------------------")
+    println()
+  }
+
+  def testParallelSending(manager: ConnectionManager) {
+    println("--------------------------")
+    println("Parallel Sending")
+    println("--------------------------")
+    val size = 10 * 1024 * 1024
+    val count = 10
+
+    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
+    buffer.flip
+
+    val startTime = System.currentTimeMillis
+    (0 until count).map(i => {
+      val bufferMessage = Message.createBufferMessage(buffer.duplicate)
+      manager.sendMessageReliably(manager.id, bufferMessage)
+    }).foreach(f => {
+      f.onFailure {
+        case e => println("Failed due to " + e)
+      }
+      Await.ready(f, 1 second)
+    })
+    val finishTime = System.currentTimeMillis
+
+    val mb = size * count / 1024.0 / 1024.0
+    val ms = finishTime - startTime
+    val tput = mb * 1000.0 / ms
+    println("--------------------------")
+    println("Started at " + startTime + ", finished at " + finishTime)
+    println("Sent " + count + " messages of size " + size + " in " + ms + " ms " +
+      "(" + tput + " MB/s)")
+    println("--------------------------")
+    println()
+  }
+
+  def testParallelDecreasingSending(manager: ConnectionManager) {
+    println("--------------------------")
+    println("Parallel Decreasing Sending")
+    println("--------------------------")
+    val size = 10 * 1024 * 1024
+    val count = 10
+    val buffers = Array.tabulate(count) { i =>
+      val bufferLen = size * (i + 1)
+      val bufferContent = Array.tabulate[Byte](bufferLen)(x => x.toByte)
+      ByteBuffer.allocate(bufferLen).put(bufferContent)
+    }
+    buffers.foreach(_.flip)
+    val mb = buffers.map(_.remaining).reduceLeft(_ + _) / 1024.0 / 1024.0
+
+    val startTime = System.currentTimeMillis
+    (0 until count).map(i => {
+      val bufferMessage = Message.createBufferMessage(buffers(count - 1 - i).duplicate)
+      manager.sendMessageReliably(manager.id, bufferMessage)
+    }).foreach(f => {
+      f.onFailure {
+        case e => println("Failed due to " + e)
+      }
+      Await.ready(f, 1 second)
+    })
+    val finishTime = System.currentTimeMillis
+
+    val ms = finishTime - startTime
+    val tput = mb * 1000.0 / ms
+    println("--------------------------")
+    /* println("Started at " + startTime + ", finished at " + finishTime) */
+    println("Sent " + mb + " MB in " + ms + " ms (" + tput + " MB/s)")
+    println("--------------------------")
+    println()
+  }
+
+  def testContinuousSending(manager: ConnectionManager) {
+    println("--------------------------")
+    println("Continuous Sending")
+    println("--------------------------")
+    val size = 10 * 1024 * 1024
+    val count = 10
+
+    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
+    buffer.flip
+
+    val startTime = System.currentTimeMillis
+    while(true) {
+      (0 until count).map(i => {
+          val bufferMessage = Message.createBufferMessage(buffer.duplicate)
+          manager.sendMessageReliably(manager.id, bufferMessage)
+        }).foreach(f => {
+          f.onFailure {
+            case e => println("Failed due to " + e)
+          }
+          Await.ready(f, 1 second)
+        })
+      val finishTime = System.currentTimeMillis
+      Thread.sleep(1000)
+      val mb = size * count / 1024.0 / 1024.0
+      val ms = finishTime - startTime
+      val tput = mb * 1000.0 / ms
+      println("Sent " + mb + " MB in " + ms + " ms (" + tput + " MB/s)")
+      println("--------------------------")
+      println()
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/network/ConnectionManagerId.scala b/core/src/main/scala/org/apache/spark/network/nio/ConnectionManagerId.scala
similarity index 88%
rename from core/src/main/scala/org/apache/spark/network/ConnectionManagerId.scala
rename to core/src/main/scala/org/apache/spark/network/nio/ConnectionManagerId.scala
index 57f7586883af1..cbb37ec5ced1f 100644
--- a/core/src/main/scala/org/apache/spark/network/ConnectionManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/ConnectionManagerId.scala
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.network
+package org.apache.spark.network.nio
 
 import java.net.InetSocketAddress
 
 import org.apache.spark.util.Utils
 
-private[spark] case class ConnectionManagerId(host: String, port: Int) {
+private[nio] case class ConnectionManagerId(host: String, port: Int) {
   // DEBUG code
   Utils.checkHost(host)
   assert (port > 0)
@@ -30,7 +30,7 @@ private[spark] case class ConnectionManagerId(host: String, port: Int) {
 }
 
 
-private[spark] object ConnectionManagerId {
+private[nio] object ConnectionManagerId {
   def fromSocketAddress(socketAddress: InetSocketAddress): ConnectionManagerId = {
     new ConnectionManagerId(socketAddress.getHostName, socketAddress.getPort)
   }
diff --git a/core/src/main/scala/org/apache/spark/network/Message.scala b/core/src/main/scala/org/apache/spark/network/nio/Message.scala
similarity index 76%
rename from core/src/main/scala/org/apache/spark/network/Message.scala
rename to core/src/main/scala/org/apache/spark/network/nio/Message.scala
index 7caccfdbb44f9..fb4a979b824c3 100644
--- a/core/src/main/scala/org/apache/spark/network/Message.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/Message.scala
@@ -15,19 +15,24 @@
  * limitations under the License.
  */
 
-package org.apache.spark.network
+package org.apache.spark.network.nio
 
 import java.net.InetSocketAddress
 import java.nio.ByteBuffer
 
 import scala.collection.mutable.ArrayBuffer
 
-private[spark] abstract class Message(val typ: Long, val id: Int) {
+import com.google.common.base.Charsets.UTF_8
+
+import org.apache.spark.util.Utils
+
+private[nio] abstract class Message(val typ: Long, val id: Int) {
   var senderAddress: InetSocketAddress = null
   var started = false
   var startTime = -1L
   var finishTime = -1L
   var isSecurityNeg = false
+  var hasError = false
 
   def size: Int
 
@@ -41,7 +46,7 @@ private[spark] abstract class Message(val typ: Long, val id: Int) {
 }
 
 
-private[spark] object Message {
+private[nio] object Message {
   val BUFFER_MESSAGE = 1111111111L
 
   var lastId = 1
@@ -82,11 +87,25 @@ private[spark] object Message {
     createBufferMessage(new Array[ByteBuffer](0), ackId)
   }
 
+  /**
+   * Create a "negative acknowledgment" to notify a sender that an error occurred
+   * while processing its message.  The exception's stacktrace will be formatted
+   * as a string, serialized into a byte array, and sent as the message payload.
+   */
+  def createErrorMessage(exception: Exception, ackId: Int): BufferMessage = {
+    val exceptionString = Utils.exceptionString(exception)
+    val serializedExceptionString = ByteBuffer.wrap(exceptionString.getBytes(UTF_8))
+    val errorMessage = createBufferMessage(serializedExceptionString, ackId)
+    errorMessage.hasError = true
+    errorMessage
+  }
+
   def create(header: MessageChunkHeader): Message = {
     val newMessage: Message = header.typ match {
       case BUFFER_MESSAGE => new BufferMessage(header.id,
         ArrayBuffer(ByteBuffer.allocate(header.totalSize)), header.other)
     }
+    newMessage.hasError = header.hasError
     newMessage.senderAddress = header.address
     newMessage
   }
diff --git a/core/src/main/scala/org/apache/spark/network/MessageChunk.scala b/core/src/main/scala/org/apache/spark/network/nio/MessageChunk.scala
similarity index 96%
rename from core/src/main/scala/org/apache/spark/network/MessageChunk.scala
rename to core/src/main/scala/org/apache/spark/network/nio/MessageChunk.scala
index d0f986a12bfe0..278c5ac356ef2 100644
--- a/core/src/main/scala/org/apache/spark/network/MessageChunk.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/MessageChunk.scala
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.network
+package org.apache.spark.network.nio
 
 import java.nio.ByteBuffer
 
 import scala.collection.mutable.ArrayBuffer
 
-private[network]
+private[nio]
 class MessageChunk(val header: MessageChunkHeader, val buffer: ByteBuffer) {
 
   val size = if (buffer == null) 0 else buffer.remaining
diff --git a/core/src/main/scala/org/apache/spark/network/MessageChunkHeader.scala b/core/src/main/scala/org/apache/spark/network/nio/MessageChunkHeader.scala
similarity index 87%
rename from core/src/main/scala/org/apache/spark/network/MessageChunkHeader.scala
rename to core/src/main/scala/org/apache/spark/network/nio/MessageChunkHeader.scala
index ead663ede7a1c..6e20f291c5cec 100644
--- a/core/src/main/scala/org/apache/spark/network/MessageChunkHeader.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/MessageChunkHeader.scala
@@ -15,18 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.spark.network
+package org.apache.spark.network.nio
 
-import java.net.InetAddress
-import java.net.InetSocketAddress
+import java.net.{InetAddress, InetSocketAddress}
 import java.nio.ByteBuffer
 
-private[spark] class MessageChunkHeader(
+private[nio] class MessageChunkHeader(
     val typ: Long,
     val id: Int,
     val totalSize: Int,
     val chunkSize: Int,
     val other: Int,
+    val hasError: Boolean,
     val securityNeg: Int,
     val address: InetSocketAddress) {
   lazy val buffer = {
@@ -41,6 +41,7 @@ private[spark] class MessageChunkHeader(
       putInt(totalSize).
       putInt(chunkSize).
       putInt(other).
+      put(if (hasError) 1.asInstanceOf[Byte] else 0.asInstanceOf[Byte]).
       putInt(securityNeg).
       putInt(ip.size).
       put(ip).
@@ -55,8 +56,8 @@ private[spark] class MessageChunkHeader(
 }
 
 
-private[spark] object MessageChunkHeader {
-  val HEADER_SIZE = 44
+private[nio] object MessageChunkHeader {
+  val HEADER_SIZE = 45
 
   def create(buffer: ByteBuffer): MessageChunkHeader = {
     if (buffer.remaining != HEADER_SIZE) {
@@ -67,13 +68,14 @@ private[spark] object MessageChunkHeader {
     val totalSize = buffer.getInt()
     val chunkSize = buffer.getInt()
     val other = buffer.getInt()
+    val hasError = buffer.get() != 0
     val securityNeg = buffer.getInt()
     val ipSize = buffer.getInt()
     val ipBytes = new Array[Byte](ipSize)
     buffer.get(ipBytes)
     val ip = InetAddress.getByAddress(ipBytes)
     val port = buffer.getInt()
-    new MessageChunkHeader(typ, id, totalSize, chunkSize, other, securityNeg,
+    new MessageChunkHeader(typ, id, totalSize, chunkSize, other, hasError, securityNeg,
       new InetSocketAddress(ip, port))
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
new file mode 100644
index 0000000000000..b2aec160635c7
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.nio
+
+import java.nio.ByteBuffer
+
+import org.apache.spark.network._
+import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.shuffle.BlockFetchingListener
+import org.apache.spark.storage.{BlockId, StorageLevel}
+import org.apache.spark.util.Utils
+import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException}
+
+import scala.concurrent.Future
+
+
+/**
+ * A [[BlockTransferService]] implementation based on [[ConnectionManager]], a custom
+ * implementation using Java NIO.
+ */
+final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityManager)
+  extends BlockTransferService with Logging {
+
+  private var cm: ConnectionManager = _
+
+  private var blockDataManager: BlockDataManager = _
+
+  /**
+   * Port number the service is listening on, available only after [[init]] is invoked.
+   */
+  override def port: Int = {
+    checkInit()
+    cm.id.port
+  }
+
+  /**
+   * Host name the service is listening on, available only after [[init]] is invoked.
+   */
+  override def hostName: String = {
+    checkInit()
+    cm.id.host
+  }
+
+  /**
+   * Initialize the transfer service by giving it the BlockDataManager that can be used to fetch
+   * local blocks or put local blocks.
+   */
+  override def init(blockDataManager: BlockDataManager): Unit = {
+    this.blockDataManager = blockDataManager
+    cm = new ConnectionManager(
+      conf.getInt("spark.blockManager.port", 0),
+      conf,
+      securityManager,
+      "Connection manager for block manager")
+    cm.onReceiveMessage(onBlockMessageReceive)
+  }
+
+  /**
+   * Tear down the transfer service.
+   */
+  override def close(): Unit = {
+    if (cm != null) {
+      cm.stop()
+    }
+  }
+
+  override def fetchBlocks(
+      host: String,
+      port: Int,
+      execId: String,
+      blockIds: Array[String],
+      listener: BlockFetchingListener): Unit = {
+    checkInit()
+
+    val cmId = new ConnectionManagerId(host, port)
+    val blockMessageArray = new BlockMessageArray(blockIds.map { blockId =>
+      BlockMessage.fromGetBlock(GetBlock(BlockId(blockId)))
+    })
+
+    val future = cm.sendMessageReliably(cmId, blockMessageArray.toBufferMessage)
+
+    // Register the listener on success/failure future callback.
+    future.onSuccess { case message =>
+      val bufferMessage = message.asInstanceOf[BufferMessage]
+      val blockMessageArray = BlockMessageArray.fromBufferMessage(bufferMessage)
+
+      // SPARK-4064: In some cases(eg. Remote block was removed) blockMessageArray may be empty.
+      if (blockMessageArray.isEmpty) {
+        blockIds.foreach { id =>
+          listener.onBlockFetchFailure(id, new SparkException(s"Received empty message from $cmId"))
+        }
+      } else {
+        for (blockMessage: BlockMessage <- blockMessageArray) {
+          val msgType = blockMessage.getType
+          if (msgType != BlockMessage.TYPE_GOT_BLOCK) {
+            if (blockMessage.getId != null) {
+              listener.onBlockFetchFailure(blockMessage.getId.toString,
+                new SparkException(s"Unexpected message $msgType received from $cmId"))
+            }
+          } else {
+            val blockId = blockMessage.getId
+            val networkSize = blockMessage.getData.limit()
+            listener.onBlockFetchSuccess(
+              blockId.toString, new NioManagedBuffer(blockMessage.getData))
+          }
+        }
+      }
+    }(cm.futureExecContext)
+
+    future.onFailure { case exception =>
+      blockIds.foreach { blockId =>
+        listener.onBlockFetchFailure(blockId, exception)
+      }
+    }(cm.futureExecContext)
+  }
+
+  /**
+   * Upload a single block to a remote node, available only after [[init]] is invoked.
+   *
+   * This call blocks until the upload completes, or throws an exception upon failures.
+   */
+  override def uploadBlock(
+      hostname: String,
+      port: Int,
+      execId: String,
+      blockId: BlockId,
+      blockData: ManagedBuffer,
+      level: StorageLevel)
+    : Future[Unit] = {
+    checkInit()
+    val msg = PutBlock(blockId, blockData.nioByteBuffer(), level)
+    val blockMessageArray = new BlockMessageArray(BlockMessage.fromPutBlock(msg))
+    val remoteCmId = new ConnectionManagerId(hostName, port)
+    val reply = cm.sendMessageReliably(remoteCmId, blockMessageArray.toBufferMessage)
+    reply.map(x => ())(cm.futureExecContext)
+  }
+
+  private def checkInit(): Unit = if (cm == null) {
+    throw new IllegalStateException(getClass.getName + " has not been initialized")
+  }
+
+  private def onBlockMessageReceive(msg: Message, id: ConnectionManagerId): Option[Message] = {
+    logDebug("Handling message " + msg)
+    msg match {
+      case bufferMessage: BufferMessage =>
+        try {
+          logDebug("Handling as a buffer message " + bufferMessage)
+          val blockMessages = BlockMessageArray.fromBufferMessage(bufferMessage)
+          logDebug("Parsed as a block message array")
+          val responseMessages = blockMessages.map(processBlockMessage).filter(_ != None).map(_.get)
+          Some(new BlockMessageArray(responseMessages).toBufferMessage)
+        } catch {
+          case e: Exception =>
+            logError("Exception handling buffer message", e)
+            Some(Message.createErrorMessage(e, msg.id))
+        }
+
+      case otherMessage: Any =>
+        val errorMsg = s"Received unknown message type: ${otherMessage.getClass.getName}"
+        logError(errorMsg)
+        Some(Message.createErrorMessage(new UnsupportedOperationException(errorMsg), msg.id))
+    }
+  }
+
+  private def processBlockMessage(blockMessage: BlockMessage): Option[BlockMessage] = {
+    blockMessage.getType match {
+      case BlockMessage.TYPE_PUT_BLOCK =>
+        val msg = PutBlock(blockMessage.getId, blockMessage.getData, blockMessage.getLevel)
+        logDebug("Received [" + msg + "]")
+        putBlock(msg.id, msg.data, msg.level)
+        None
+
+      case BlockMessage.TYPE_GET_BLOCK =>
+        val msg = new GetBlock(blockMessage.getId)
+        logDebug("Received [" + msg + "]")
+        val buffer = getBlock(msg.id)
+        if (buffer == null) {
+          return None
+        }
+        Some(BlockMessage.fromGotBlock(GotBlock(msg.id, buffer)))
+
+      case _ => None
+    }
+  }
+
+  private def putBlock(blockId: BlockId, bytes: ByteBuffer, level: StorageLevel) {
+    val startTimeMs = System.currentTimeMillis()
+    logDebug("PutBlock " + blockId + " started from " + startTimeMs + " with data: " + bytes)
+    blockDataManager.putBlockData(blockId, new NioManagedBuffer(bytes), level)
+    logDebug("PutBlock " + blockId + " used " + Utils.getUsedTimeMs(startTimeMs)
+      + " with data size: " + bytes.limit)
+  }
+
+  private def getBlock(blockId: BlockId): ByteBuffer = {
+    val startTimeMs = System.currentTimeMillis()
+    logDebug("GetBlock " + blockId + " started from " + startTimeMs)
+    val buffer = blockDataManager.getBlockData(blockId)
+    logDebug("GetBlock " + blockId + " used " + Utils.getUsedTimeMs(startTimeMs)
+      + " and got buffer " + buffer)
+    buffer.nioByteBuffer()
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/network/SecurityMessage.scala b/core/src/main/scala/org/apache/spark/network/nio/SecurityMessage.scala
similarity index 95%
rename from core/src/main/scala/org/apache/spark/network/SecurityMessage.scala
rename to core/src/main/scala/org/apache/spark/network/nio/SecurityMessage.scala
index 9af9e2e8e9e59..747a2088a7258 100644
--- a/core/src/main/scala/org/apache/spark/network/SecurityMessage.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/SecurityMessage.scala
@@ -15,15 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.network
+package org.apache.spark.network.nio
 
 import java.nio.ByteBuffer
 
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.StringBuilder
+import scala.collection.mutable.{ArrayBuffer, StringBuilder}
 
 import org.apache.spark._
-import org.apache.spark.network._
 
 /**
  * SecurityMessage is class that contains the connectionId and sasl token
@@ -54,7 +52,7 @@ import org.apache.spark.network._
  *   - Length of the token
  *   - Token
  */
-private[spark] class SecurityMessage() extends Logging {
+private[nio] class SecurityMessage extends Logging {
 
   private var connectionId: String = null
   private var token: Array[Byte] = null
@@ -134,7 +132,7 @@ private[spark] class SecurityMessage() extends Logging {
   }
 }
 
-private[spark] object SecurityMessage {
+private[nio] object SecurityMessage {
 
   /**
    * Convert the given BufferMessage to a SecurityMessage by parsing the contents
diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala
index 5cdbc306e56a0..e2fc9c649925e 100644
--- a/core/src/main/scala/org/apache/spark/package.scala
+++ b/core/src/main/scala/org/apache/spark/package.scala
@@ -44,4 +44,5 @@ package org.apache
 
 package object spark {
   // For package docs only
+  val SPARK_VERSION = "1.2.0-SNAPSHOT"
 }
diff --git a/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
index 3155dfe165664..637492a97551b 100644
--- a/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.partial
 
-import cern.jet.stat.Probability
+import org.apache.commons.math3.distribution.NormalDistribution
 
 /**
  * An ApproximateEvaluator for counts.
@@ -46,7 +46,8 @@ private[spark] class CountEvaluator(totalOutputs: Int, confidence: Double)
       val mean = (sum + 1 - p) / p
       val variance = (sum + 1) * (1 - p) / (p * p)
       val stdev = math.sqrt(variance)
-      val confFactor = Probability.normalInverse(1 - (1 - confidence) / 2)
+      val confFactor = new NormalDistribution().
+        inverseCumulativeProbability(1 - (1 - confidence) / 2)
       val low = mean - confFactor * stdev
       val high = mean + confFactor * stdev
       new BoundedDouble(mean, confidence, low, high)
diff --git a/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
index 8bb78123e3c9c..3ef3cc219dec6 100644
--- a/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
@@ -24,7 +24,7 @@ import scala.collection.Map
 import scala.collection.mutable.HashMap
 import scala.reflect.ClassTag
 
-import cern.jet.stat.Probability
+import org.apache.commons.math3.distribution.NormalDistribution
 
 import org.apache.spark.util.collection.OpenHashMap
 
@@ -55,7 +55,8 @@ private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, conf
       new HashMap[T, BoundedDouble]
     } else {
       val p = outputsMerged.toDouble / totalOutputs
-      val confFactor = Probability.normalInverse(1 - (1 - confidence) / 2)
+      val confFactor = new NormalDistribution().
+        inverseCumulativeProbability(1 - (1 - confidence) / 2)
       val result = new JHashMap[T, BoundedDouble](sums.size)
       sums.foreach { case (key, sum) =>
         val mean = (sum + 1 - p) / p
diff --git a/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala
index d24959cba8727..787a21a61fdcf 100644
--- a/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.partial
 
-import cern.jet.stat.Probability
+import org.apache.commons.math3.distribution.{NormalDistribution, TDistribution}
 
 import org.apache.spark.util.StatCounter
 
@@ -45,9 +45,10 @@ private[spark] class MeanEvaluator(totalOutputs: Int, confidence: Double)
       val stdev = math.sqrt(counter.sampleVariance / counter.count)
       val confFactor = {
         if (counter.count > 100) {
-          Probability.normalInverse(1 - (1 - confidence) / 2)
+          new NormalDistribution().inverseCumulativeProbability(1 - (1 - confidence) / 2)
         } else {
-          Probability.studentTInverse(1 - confidence, (counter.count - 1).toInt)
+          val degreesOfFreedom = (counter.count - 1).toInt
+          new TDistribution(degreesOfFreedom).inverseCumulativeProbability(1 - (1 - confidence) / 2)
         }
       }
       val low = mean - confFactor * stdev
diff --git a/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala b/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala
index 92915ee66d29f..828bf96c2c0bd 100644
--- a/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala
+++ b/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.partial
 
-import cern.jet.stat.Probability
+import org.apache.commons.math3.distribution.{TDistribution, NormalDistribution}
 
 /**
  * A utility class for caching Student's T distribution values for a given confidence level
@@ -25,8 +25,10 @@ import cern.jet.stat.Probability
  * confidence intervals for many keys.
  */
 private[spark] class StudentTCacher(confidence: Double) {
+
   val NORMAL_APPROX_SAMPLE_SIZE = 100  // For samples bigger than this, use Gaussian approximation
-  val normalApprox = Probability.normalInverse(1 - (1 - confidence) / 2)
+
+  val normalApprox = new NormalDistribution().inverseCumulativeProbability(1 - (1 - confidence) / 2)
   val cache = Array.fill[Double](NORMAL_APPROX_SAMPLE_SIZE)(-1.0)
 
   def get(sampleSize: Long): Double = {
@@ -35,7 +37,8 @@ private[spark] class StudentTCacher(confidence: Double) {
     } else {
       val size = sampleSize.toInt
       if (cache(size) < 0) {
-        cache(size) = Probability.studentTInverse(1 - confidence, size - 1)
+        val tDist = new TDistribution(size - 1)
+        cache(size) = tDist.inverseCumulativeProbability(1 - (1 - confidence) / 2)
       }
       cache(size)
     }
diff --git a/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala
index a74f80094434c..1753c2561b678 100644
--- a/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala
@@ -17,12 +17,12 @@
 
 package org.apache.spark.partial
 
-import cern.jet.stat.Probability
+import org.apache.commons.math3.distribution.{TDistribution, NormalDistribution}
 
 import org.apache.spark.util.StatCounter
 
 /**
- * An ApproximateEvaluator for sums. It estimates the mean and the cont and multiplies them
+ * An ApproximateEvaluator for sums. It estimates the mean and the count and multiplies them
  * together, then uses the formula for the variance of two independent random variables to get
  * a variance for the result and compute a confidence interval.
  */
@@ -55,9 +55,10 @@ private[spark] class SumEvaluator(totalOutputs: Int, confidence: Double)
       val sumStdev = math.sqrt(sumVar)
       val confFactor = {
         if (counter.count > 100) {
-          Probability.normalInverse(1 - (1 - confidence) / 2)
+          new NormalDistribution().inverseCumulativeProbability(1 - (1 - confidence) / 2)
         } else {
-          Probability.studentTInverse(1 - confidence, (counter.count - 1).toInt)
+          val degreesOfFreedom = (counter.count - 1).toInt
+          new TDistribution(degreesOfFreedom).inverseCumulativeProbability(1 - (1 - confidence) / 2)
         }
       }
       val low = sumEstimate - confFactor * sumStdev
diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
index aed951a40b40c..9f9f10b7ebc3a 100644
--- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
@@ -24,14 +24,11 @@ import scala.concurrent.ExecutionContext.Implicits.global
 import scala.reflect.ClassTag
 
 import org.apache.spark.{ComplexFutureAction, FutureAction, Logging}
-import org.apache.spark.annotation.Experimental
 
 /**
- * :: Experimental ::
  * A set of asynchronous RDD actions available through an implicit conversion.
  * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
  */
-@Experimental
 class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Logging {
 
   /**
@@ -78,16 +75,18 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
         // greater than totalParts because we actually cap it at totalParts in runJob.
         var numPartsToTry = 1
         if (partsScanned > 0) {
-          // If we didn't find any rows after the first iteration, just try all partitions next.
+          // If we didn't find any rows after the previous iteration, quadruple and retry.
           // Otherwise, interpolate the number of partitions we need to try, but overestimate it
-          // by 50%.
+          // by 50%. We also cap the estimation in the end.
           if (results.size == 0) {
-            numPartsToTry = totalParts - 1
+            numPartsToTry = partsScanned * 4
           } else {
-            numPartsToTry = (1.5 * num * partsScanned / results.size).toInt
+            // the left side of max is >=1 whenever partsScanned >= 2
+            numPartsToTry = Math.max(1, 
+              (1.5 * num * partsScanned / results.size).toInt - partsScanned)
+            numPartsToTry = Math.min(numPartsToTry, partsScanned * 4) 
           }
         }
-        numPartsToTry = math.max(0, numPartsToTry)  // guard against negative num of partitions
 
         val left = num - results.size
         val p = partsScanned until math.min(partsScanned + numPartsToTry, totalParts)
@@ -112,7 +111,8 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
    * Applies a function f to all elements of this RDD.
    */
   def foreachAsync(f: T => Unit): FutureAction[Unit] = {
-    self.context.submitJob[T, Unit, Unit](self, _.foreach(f), Range(0, self.partitions.size),
+    val cleanF = self.context.clean(f)
+    self.context.submitJob[T, Unit, Unit](self, _.foreach(cleanF), Range(0, self.partitions.size),
       (index, data) => Unit, Unit)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
new file mode 100644
index 0000000000000..6e66ddbdef788
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+import org.apache.hadoop.conf.{ Configurable, Configuration }
+import org.apache.hadoop.io.Writable
+import org.apache.hadoop.mapreduce._
+import org.apache.spark.input.StreamFileInputFormat
+import org.apache.spark.{ Partition, SparkContext }
+
+private[spark] class BinaryFileRDD[T](
+  sc: SparkContext,
+  inputFormatClass: Class[_ <: StreamFileInputFormat[T]],
+  keyClass: Class[String],
+  valueClass: Class[T],
+  @transient conf: Configuration,
+  minPartitions: Int)
+  extends NewHadoopRDD[String, T](sc, inputFormatClass, keyClass, valueClass, conf) {
+
+  override def getPartitions: Array[Partition] = {
+    val inputFormat = inputFormatClass.newInstance
+    inputFormat match {
+      case configurable: Configurable =>
+        configurable.setConf(conf)
+      case _ =>
+    }
+    val jobContext = newJobContext(conf, jobId)
+    inputFormat.setMinPartitions(jobContext, minPartitions)
+    val rawSplits = inputFormat.getSplits(jobContext).toArray
+    val result = new Array[Partition](rawSplits.size)
+    for (i <- 0 until rawSplits.size) {
+      result(i) = new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
+    }
+    result
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
index 2673ec22509e9..fffa1911f5bc2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
@@ -84,5 +84,9 @@ class BlockRDD[T: ClassTag](@transient sc: SparkContext, @transient val blockIds
         "Attempted to use %s after its blocks have been removed!".format(toString))
     }
   }
+
+  protected def getBlockIdLocations(): Map[BlockId, Seq[String]] = {
+    locations_
+  }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
index 4908711d17db7..1cbd684224b7c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
@@ -22,6 +22,7 @@ import java.io.{IOException, ObjectOutputStream}
 import scala.reflect.ClassTag
 
 import org.apache.spark._
+import org.apache.spark.util.Utils
 
 private[spark]
 class CartesianPartition(
@@ -36,7 +37,7 @@ class CartesianPartition(
   override val index: Int = idx
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent split at the time of task serialization
     s1 = rdd1.partitions(s1Index)
     s2 = rdd2.partitions(s2Index)
diff --git a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
index 34c51b833025e..45feac983407c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
@@ -59,7 +59,7 @@ class CheckpointRDD[T: ClassTag](sc: SparkContext, val checkpointPath: String)
     Array.tabulate(numPartitions)(i => new CheckpointRDDPartition(i))
   }
 
-  checkpointData = Some(new RDDCheckpointData[T](this))
+  checkpointData = Some(new RDDCheckpointData[T](this, None))
   checkpointData.get.cpFile = Some(checkpointPath)
 
   override def getPreferredLocations(split: Partition): Seq[String] = {
@@ -141,7 +141,7 @@ private[spark] object CheckpointRDD extends Logging {
     val deserializeStream = serializer.deserializeStream(fileInputStream)
 
     // Register an on-task-completion callback to close the input stream.
-    context.addOnCompleteCallback(() => deserializeStream.close())
+    context.addTaskCompletionListener(context => deserializeStream.close())
 
     deserializeStream.asIterator.asInstanceOf[Iterator[T]]
   }
@@ -157,7 +157,7 @@ private[spark] object CheckpointRDD extends Logging {
     val sc = new SparkContext(cluster, "CheckpointRDD Test")
     val rdd = sc.makeRDD(1 to 10, 10).flatMap(x => 1 to 10000)
     val path = new Path(hdfsPath, "temp")
-    val conf = SparkHadoopUtil.get.newConfiguration()
+    val conf = SparkHadoopUtil.get.newConfiguration(new SparkConf())
     val fs = path.getFileSystem(conf)
     val broadcastedConf = sc.broadcast(new SerializableWritable(conf))
     sc.runJob(rdd, CheckpointRDD.writeToFile[Int](path.toString, broadcastedConf, 1024) _)
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 5951865e56c9d..ffc0a8a6d67eb 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -17,15 +17,17 @@
 
 package org.apache.spark.rdd
 
+import scala.language.existentials
+
 import java.io.{IOException, ObjectOutputStream}
 
 import scala.collection.mutable.ArrayBuffer
-import scala.language.existentials
 
 import org.apache.spark.{InterruptibleIterator, Partition, Partitioner, SparkEnv, TaskContext}
 import org.apache.spark.{Dependency, OneToOneDependency, ShuffleDependency}
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.util.collection.{ExternalAppendOnlyMap, AppendOnlyMap}
+import org.apache.spark.util.collection.{ExternalAppendOnlyMap, AppendOnlyMap, CompactBuffer}
+import org.apache.spark.util.Utils
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.ShuffleHandle
 
@@ -38,7 +40,7 @@ private[spark] case class NarrowCoGroupSplitDep(
   ) extends CoGroupSplitDep {
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent split at the time of task serialization
     split = rdd.partitions(splitIndex)
     oos.defaultWriteObject()
@@ -66,14 +68,14 @@ private[spark] class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep]
  */
 @DeveloperApi
 class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part: Partitioner)
-  extends RDD[(K, Seq[Seq[_]])](rdds.head.context, Nil) {
+  extends RDD[(K, Array[Iterable[_]])](rdds.head.context, Nil) {
 
   // For example, `(k, a) cogroup (k, b)` produces k -> Seq(ArrayBuffer as, ArrayBuffer bs).
   // Each ArrayBuffer is represented as a CoGroup, and the resulting Seq as a CoGroupCombiner.
   // CoGroupValue is the intermediate state of each value before being merged in compute.
-  private type CoGroup = ArrayBuffer[Any]
+  private type CoGroup = CompactBuffer[Any]
   private type CoGroupValue = (Any, Int)  // Int is dependency number
-  private type CoGroupCombiner = Seq[CoGroup]
+  private type CoGroupCombiner = Array[CoGroup]
 
   private var serializer: Option[Serializer] = None
 
@@ -114,7 +116,7 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
 
   override val partitioner: Some[Partitioner] = Some(part)
 
-  override def compute(s: Partition, context: TaskContext): Iterator[(K, CoGroupCombiner)] = {
+  override def compute(s: Partition, context: TaskContext): Iterator[(K, Array[Iterable[_]])] = {
     val sparkConf = SparkEnv.get.conf
     val externalSorting = sparkConf.getBoolean("spark.shuffle.spill", true)
     val split = s.asInstanceOf[CoGroupPartition]
@@ -150,18 +152,17 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
           getCombiner(kv._1)(depNum) += kv._2
         }
       }
-      new InterruptibleIterator(context, map.iterator)
+      new InterruptibleIterator(context,
+        map.iterator.asInstanceOf[Iterator[(K, Array[Iterable[_]])]])
     } else {
       val map = createExternalMap(numRdds)
-      rddIterators.foreach { case (it, depNum) =>
-        while (it.hasNext) {
-          val kv = it.next()
-          map.insert(kv._1, new CoGroupValue(kv._2, depNum))
-        }
+      for ((it, depNum) <- rddIterators) {
+        map.insertAll(it.map(pair => (pair._1, new CoGroupValue(pair._2, depNum))))
       }
-      context.taskMetrics.memoryBytesSpilled = map.memoryBytesSpilled
-      context.taskMetrics.diskBytesSpilled = map.diskBytesSpilled
-      new InterruptibleIterator(context, map.iterator)
+      context.taskMetrics.memoryBytesSpilled += map.memoryBytesSpilled
+      context.taskMetrics.diskBytesSpilled += map.diskBytesSpilled
+      new InterruptibleIterator(context,
+        map.iterator.asInstanceOf[Iterator[(K, Array[Iterable[_]])]])
     }
   }
 
@@ -170,17 +171,22 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
 
     val createCombiner: (CoGroupValue => CoGroupCombiner) = value => {
       val newCombiner = Array.fill(numRdds)(new CoGroup)
-      value match { case (v, depNum) => newCombiner(depNum) += v }
+      newCombiner(value._2) += value._1
       newCombiner
     }
     val mergeValue: (CoGroupCombiner, CoGroupValue) => CoGroupCombiner =
       (combiner, value) => {
-      value match { case (v, depNum) => combiner(depNum) += v }
+      combiner(value._2) += value._1
       combiner
     }
     val mergeCombiners: (CoGroupCombiner, CoGroupCombiner) => CoGroupCombiner =
       (combiner1, combiner2) => {
-        combiner1.zip(combiner2).map { case (v1, v2) => v1 ++ v2 }
+        var depNum = 0
+        while (depNum < numRdds) {
+          combiner1(depNum) ++= combiner2(depNum)
+          depNum += 1
+        }
+        combiner1
       }
     new ExternalAppendOnlyMap[K, CoGroupValue, CoGroupCombiner](
       createCombiner, mergeValue, mergeCombiners)
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
index c45b759f007cc..9fab1d78abb04 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -25,6 +25,7 @@ import scala.language.existentials
 import scala.reflect.ClassTag
 
 import org.apache.spark._
+import org.apache.spark.util.Utils
 
 /**
  * Class that captures a coalesced RDD by essentially keeping track of parent partitions
@@ -42,15 +43,15 @@ private[spark] case class CoalescedRDDPartition(
   var parents: Seq[Partition] = parentsIndices.map(rdd.partitions(_))
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent partition at the time of task serialization
     parents = parentsIndices.map(rdd.partitions(_))
     oos.defaultWriteObject()
   }
 
   /**
-   * Computes how many of the parents partitions have getPreferredLocation
-   * as one of their preferredLocations
+   * Computes the fraction of the parents' partitions containing preferredLocation within
+   * their getPreferredLocs.
    * @return locality of this coalesced partition between 0 and 1
    */
   def localFraction: Double = {
@@ -258,7 +259,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc
         val pgroup = PartitionGroup(nxt_replica)
         groupArr += pgroup
         addPartToPGroup(nxt_part, pgroup)
-        groupHash += (nxt_replica -> (ArrayBuffer(pgroup))) // list in case we have multiple
+        groupHash.put(nxt_replica, ArrayBuffer(pgroup)) // list in case we have multiple
         numCreated += 1
       }
     }
@@ -267,7 +268,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc
       var (nxt_replica, nxt_part) = rotIt.next()
       val pgroup = PartitionGroup(nxt_replica)
       groupArr += pgroup
-      groupHash.get(nxt_replica).get += pgroup
+      groupHash.getOrElseUpdate(nxt_replica, ArrayBuffer()) += pgroup
       var tries = 0
       while (!addPartToPGroup(nxt_part, pgroup) && tries < targetLen) { // ensure at least one part
         nxt_part = rotIt.next()._2
diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
index 9ca971c8a4c27..e0494ee39657c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
@@ -95,7 +95,12 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
    * If the elements in RDD do not vary (max == min) always returns a single bucket.
    */
   def histogram(bucketCount: Int): Pair[Array[Double], Array[Long]] = {
-    // Compute the minimum and the maxium
+    // Scala's built-in range has issues. See #SI-8782
+    def customRange(min: Double, max: Double, steps: Int): IndexedSeq[Double] = {
+      val span = max - min
+      Range.Int(0, steps, 1).map(s => min + (s * span) / steps) :+ max
+    }
+    // Compute the minimum and the maximum
     val (max: Double, min: Double) = self.mapPartitions { items =>
       Iterator(items.foldRight(Double.NegativeInfinity,
         Double.PositiveInfinity)((e: Double, x: Pair[Double, Double]) =>
@@ -107,9 +112,11 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
       throw new UnsupportedOperationException(
         "Histogram on either an empty RDD or RDD containing +/-infinity or NaN")
     }
-    val increment = (max-min)/bucketCount.toDouble
-    val range = if (increment != 0) {
-      Range.Double.inclusive(min, max, increment)
+    val range = if (min != max) {
+      // Range.Double.inclusive(min, max, increment)
+      // The above code doesn't always work. See Scala bug #SI-8782.
+      // https://issues.scala-lang.org/browse/SI-8782
+      customRange(min, max, bucketCount)
     } else {
       List(min, min)
     }
@@ -119,11 +126,11 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
 
   /**
    * Compute a histogram using the provided buckets. The buckets are all open
-   * to the left except for the last which is closed
+   * to the right except for the last which is closed
    *  e.g. for the array
    *  [1, 10, 20, 50] the buckets are [1, 10) [10, 20) [20, 50]
-   *  e.g 1<=x<10 , 10<=x<20, 20<=x<50
-   *  And on the input of 1 and 50 we would have a histogram of 1, 0, 0
+   *  e.g 1<=x<10 , 10<=x<20, 20<=x<=50
+   *  And on the input of 1 and 50 we would have a histogram of 1, 0, 1
    *
    * Note: if your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
    * from an O(log n) inseration to O(1) per element. (where n = # buckets) if you set evenBuckets
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 041028514399b..a157e36e2286e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -20,7 +20,10 @@ package org.apache.spark.rdd
 import java.text.SimpleDateFormat
 import java.util.Date
 import java.io.EOFException
+
 import scala.collection.immutable.Map
+import scala.reflect.ClassTag
+import scala.collection.mutable.ListBuffer
 
 import org.apache.hadoop.conf.{Configurable, Configuration}
 import org.apache.hadoop.mapred.FileSplit
@@ -39,7 +42,9 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.executor.{DataReadMethod, InputMetrics}
-import org.apache.spark.util.NextIterator
+import org.apache.spark.rdd.HadoopRDD.HadoopMapPartitionsWithSplitRDD
+import org.apache.spark.util.{NextIterator, Utils}
+import org.apache.spark.scheduler.{HostTaskLocation, HDFSCacheTaskLocation}
 
 /**
  * A Spark split class that wraps around a Hadoop InputSplit.
@@ -126,27 +131,47 @@ class HadoopRDD[K, V](
   // used to build JobTracker ID
   private val createTime = new Date()
 
+  private val shouldCloneJobConf = sc.conf.get("spark.hadoop.cloneConf", "false").toBoolean
+
   // Returns a JobConf that will be used on slaves to obtain input splits for Hadoop reads.
   protected def getJobConf(): JobConf = {
     val conf: Configuration = broadcastedConf.value.value
-    if (conf.isInstanceOf[JobConf]) {
-      // A user-broadcasted JobConf was provided to the HadoopRDD, so always use it.
-      conf.asInstanceOf[JobConf]
-    } else if (HadoopRDD.containsCachedMetadata(jobConfCacheKey)) {
-      // getJobConf() has been called previously, so there is already a local cache of the JobConf
-      // needed by this RDD.
-      HadoopRDD.getCachedMetadata(jobConfCacheKey).asInstanceOf[JobConf]
-    } else {
-      // Create a JobConf that will be cached and used across this RDD's getJobConf() calls in the
-      // local process. The local cache is accessed through HadoopRDD.putCachedMetadata().
-      // The caching helps minimize GC, since a JobConf can contain ~10KB of temporary objects.
-      // synchronize to prevent ConcurrentModificationException (Spark-1097, Hadoop-10456)
-      conf.synchronized {
+    if (shouldCloneJobConf) {
+      // Hadoop Configuration objects are not thread-safe, which may lead to various problems if
+      // one job modifies a configuration while another reads it (SPARK-2546).  This problem occurs
+      // somewhat rarely because most jobs treat the configuration as though it's immutable.  One
+      // solution, implemented here, is to clone the Configuration object.  Unfortunately, this
+      // clone can be very expensive.  To avoid unexpected performance regressions for workloads and
+      // Hadoop versions that do not suffer from these thread-safety issues, this cloning is
+      // disabled by default.
+      HadoopRDD.CONFIGURATION_INSTANTIATION_LOCK.synchronized {
+        logDebug("Cloning Hadoop Configuration")
         val newJobConf = new JobConf(conf)
-        initLocalJobConfFuncOpt.map(f => f(newJobConf))
-        HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf)
+        if (!conf.isInstanceOf[JobConf]) {
+          initLocalJobConfFuncOpt.map(f => f(newJobConf))
+        }
         newJobConf
       }
+    } else {
+      if (conf.isInstanceOf[JobConf]) {
+        logDebug("Re-using user-broadcasted JobConf")
+        conf.asInstanceOf[JobConf]
+      } else if (HadoopRDD.containsCachedMetadata(jobConfCacheKey)) {
+        logDebug("Re-using cached JobConf")
+        HadoopRDD.getCachedMetadata(jobConfCacheKey).asInstanceOf[JobConf]
+      } else {
+        // Create a JobConf that will be cached and used across this RDD's getJobConf() calls in the
+        // local process. The local cache is accessed through HadoopRDD.putCachedMetadata().
+        // The caching helps minimize GC, since a JobConf can contain ~10KB of temporary objects.
+        // Synchronize to prevent ConcurrentModificationException (SPARK-1097, HADOOP-10456).
+        HadoopRDD.CONFIGURATION_INSTANTIATION_LOCK.synchronized {
+          logDebug("Creating new JobConf and caching it for later re-use")
+          val newJobConf = new JobConf(conf)
+          initLocalJobConfFuncOpt.map(f => f(newJobConf))
+          HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf)
+          newJobConf
+        }
+      }
     }
   }
 
@@ -186,30 +211,33 @@ class HadoopRDD[K, V](
 
       val split = theSplit.asInstanceOf[HadoopPartition]
       logInfo("Input split: " + split.inputSplit)
-      var reader: RecordReader[K, V] = null
       val jobConf = getJobConf()
+
+      val inputMetrics = new InputMetrics(DataReadMethod.Hadoop)
+      // Find a function that will return the FileSystem bytes read by this thread. Do this before
+      // creating RecordReader, because RecordReader's constructor might read some bytes
+      val bytesReadCallback = if (split.inputSplit.value.isInstanceOf[FileSplit]) {
+        SparkHadoopUtil.get.getFSBytesReadOnThreadCallback(
+          split.inputSplit.value.asInstanceOf[FileSplit].getPath, jobConf)
+      } else {
+        None
+      }
+      if (bytesReadCallback.isDefined) {
+        context.taskMetrics.inputMetrics = Some(inputMetrics)
+      }
+
+      var reader: RecordReader[K, V] = null
       val inputFormat = getInputFormat(jobConf)
       HadoopRDD.addLocalConfiguration(new SimpleDateFormat("yyyyMMddHHmm").format(createTime),
         context.stageId, theSplit.index, context.attemptId.toInt, jobConf)
       reader = inputFormat.getRecordReader(split.inputSplit.value, jobConf, Reporter.NULL)
 
       // Register an on-task-completion callback to close the input stream.
-      context.addOnCompleteCallback{ () => closeIfNeeded() }
+      context.addTaskCompletionListener{ context => closeIfNeeded() }
       val key: K = reader.createKey()
       val value: V = reader.createValue()
 
-      // Set the task input metrics.
-      val inputMetrics = new InputMetrics(DataReadMethod.Hadoop)
-      try {
-        /* bytesRead may not exactly equal the bytes read by a task: split boundaries aren't
-         * always at record boundaries, so tasks may need to read into other splits to complete
-         * a record. */
-        inputMetrics.bytesRead = split.inputSplit.value.getLength()
-      } catch {
-        case e: java.io.IOException =>
-          logWarning("Unable to get input size to set InputMetrics for task", e)
-      }
-      context.taskMetrics.inputMetrics = Some(inputMetrics)
+      var recordsSinceMetricsUpdate = 0
 
       override def getNext() = {
         try {
@@ -218,24 +246,72 @@ class HadoopRDD[K, V](
           case eof: EOFException =>
             finished = true
         }
+
+        // Update bytes read metric every few records
+        if (recordsSinceMetricsUpdate == HadoopRDD.RECORDS_BETWEEN_BYTES_READ_METRIC_UPDATES
+            && bytesReadCallback.isDefined) {
+          recordsSinceMetricsUpdate = 0
+          val bytesReadFn = bytesReadCallback.get
+          inputMetrics.bytesRead = bytesReadFn()
+        } else {
+          recordsSinceMetricsUpdate += 1
+        }
         (key, value)
       }
 
       override def close() {
         try {
           reader.close()
+          if (bytesReadCallback.isDefined) {
+            val bytesReadFn = bytesReadCallback.get
+            inputMetrics.bytesRead = bytesReadFn()
+          } else if (split.inputSplit.value.isInstanceOf[FileSplit]) {
+            // If we can't get the bytes read from the FS stats, fall back to the split size,
+            // which may be inaccurate.
+            try {
+              inputMetrics.bytesRead = split.inputSplit.value.getLength
+              context.taskMetrics.inputMetrics = Some(inputMetrics)
+            } catch {
+              case e: java.io.IOException =>
+                logWarning("Unable to get input size to set InputMetrics for task", e)
+            }
+          }
         } catch {
-          case e: Exception => logWarning("Exception in RecordReader.close()", e)
+          case e: Exception => {
+            if (!Utils.inShutdown()) {
+              logWarning("Exception in RecordReader.close()", e)
+            }
+          }
         }
       }
     }
     new InterruptibleIterator[(K, V)](context, iter)
   }
 
+  /** Maps over a partition, providing the InputSplit that was used as the base of the partition. */
+  @DeveloperApi
+  def mapPartitionsWithInputSplit[U: ClassTag](
+      f: (InputSplit, Iterator[(K, V)]) => Iterator[U],
+      preservesPartitioning: Boolean = false): RDD[U] = {
+    new HadoopMapPartitionsWithSplitRDD(this, f, preservesPartitioning)
+  }
+
   override def getPreferredLocations(split: Partition): Seq[String] = {
-    // TODO: Filtering out "localhost" in case of file:// URLs
-    val hadoopSplit = split.asInstanceOf[HadoopPartition]
-    hadoopSplit.inputSplit.value.getLocations.filter(_ != "localhost")
+    val hsplit = split.asInstanceOf[HadoopPartition].inputSplit.value
+    val locs: Option[Seq[String]] = HadoopRDD.SPLIT_INFO_REFLECTIONS match {
+      case Some(c) =>
+        try {
+          val lsplit = c.inputSplitWithLocationInfo.cast(hsplit)
+          val infos = c.getLocationInfo.invoke(lsplit).asInstanceOf[Array[AnyRef]]
+          Some(HadoopRDD.convertSplitLocationInfo(infos))
+        } catch {
+          case e: Exception =>
+            logDebug("Failed to use InputSplitWithLocations.", e)
+            None
+        }
+      case None => None
+    }
+    locs.getOrElse(hsplit.getLocations.filter(_ != "localhost"))
   }
 
   override def checkpoint() {
@@ -245,7 +321,16 @@ class HadoopRDD[K, V](
   def getConf: Configuration = getJobConf()
 }
 
-private[spark] object HadoopRDD {
+private[spark] object HadoopRDD extends Logging {
+  /**
+   * Configuration's constructor is not threadsafe (see SPARK-1097 and HADOOP-10456).
+   * Therefore, we synchronize on this lock before calling new JobConf() or new Configuration().
+   */
+  val CONFIGURATION_INSTANTIATION_LOCK = new Object()
+
+  /** Update the input bytes read metric each time this number of records has been read */
+  val RECORDS_BETWEEN_BYTES_READ_METRIC_UPDATES = 256
+
   /**
    * The three methods below are helpers for accessing the local map, a property of the SparkEnv of
    * the local process.
@@ -269,4 +354,63 @@ private[spark] object HadoopRDD {
     conf.setInt("mapred.task.partition", splitId)
     conf.set("mapred.job.id", jobID.toString)
   }
+
+  /**
+   * Analogous to [[org.apache.spark.rdd.MapPartitionsRDD]], but passes in an InputSplit to
+   * the given function rather than the index of the partition.
+   */
+  private[spark] class HadoopMapPartitionsWithSplitRDD[U: ClassTag, T: ClassTag](
+      prev: RDD[T],
+      f: (InputSplit, Iterator[T]) => Iterator[U],
+      preservesPartitioning: Boolean = false)
+    extends RDD[U](prev) {
+
+    override val partitioner = if (preservesPartitioning) firstParent[T].partitioner else None
+
+    override def getPartitions: Array[Partition] = firstParent[T].partitions
+
+    override def compute(split: Partition, context: TaskContext) = {
+      val partition = split.asInstanceOf[HadoopPartition]
+      val inputSplit = partition.inputSplit.value
+      f(inputSplit, firstParent[T].iterator(split, context))
+    }
+  }
+
+  private[spark] class SplitInfoReflections {
+    val inputSplitWithLocationInfo =
+      Class.forName("org.apache.hadoop.mapred.InputSplitWithLocationInfo")
+    val getLocationInfo = inputSplitWithLocationInfo.getMethod("getLocationInfo")
+    val newInputSplit = Class.forName("org.apache.hadoop.mapreduce.InputSplit")
+    val newGetLocationInfo = newInputSplit.getMethod("getLocationInfo")
+    val splitLocationInfo = Class.forName("org.apache.hadoop.mapred.SplitLocationInfo")
+    val isInMemory = splitLocationInfo.getMethod("isInMemory")
+    val getLocation = splitLocationInfo.getMethod("getLocation")
+  }
+
+  private[spark] val SPLIT_INFO_REFLECTIONS: Option[SplitInfoReflections] = try {
+    Some(new SplitInfoReflections)
+  } catch {
+    case e: Exception =>
+      logDebug("SplitLocationInfo and other new Hadoop classes are " +
+          "unavailable. Using the older Hadoop location info code.", e)
+      None
+  }
+
+  private[spark] def convertSplitLocationInfo(infos: Array[AnyRef]): Seq[String] = {
+    val out = ListBuffer[String]()
+    infos.foreach { loc => {
+      val locationStr = HadoopRDD.SPLIT_INFO_REFLECTIONS.get.
+        getLocation.invoke(loc).asInstanceOf[String]
+      if (locationStr != "localhost") {
+        if (HadoopRDD.SPLIT_INFO_REFLECTIONS.get.isInMemory.
+                invoke(loc).asInstanceOf[Boolean]) {
+          logDebug("Partition " + locationStr + " is cached by Hadoop.")
+          out += new HDFSCacheTaskLocation(locationStr).toString
+        } else {
+          out += new HostTaskLocation(locationStr).toString
+        }
+      }
+    }}
+    out.seq
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index a76a070b5b863..0e38f224ac81d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -68,7 +68,7 @@ class JdbcRDD[T: ClassTag](
   }
 
   override def compute(thePart: Partition, context: TaskContext) = new NextIterator[T] {
-    context.addOnCompleteCallback{ () => closeIfNeeded() }
+    context.addTaskCompletionListener{ context => closeIfNeeded() }
     val part = thePart.asInstanceOf[JdbcPartition]
     val conn = getConnection()
     val stmt = conn.prepareStatement(sql, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
@@ -96,17 +96,23 @@ class JdbcRDD[T: ClassTag](
 
     override def close() {
       try {
-        if (null != rs && ! rs.isClosed()) rs.close()
+        if (null != rs && ! rs.isClosed()) {
+          rs.close()
+        }
       } catch {
         case e: Exception => logWarning("Exception closing resultset", e)
       }
       try {
-        if (null != stmt && ! stmt.isClosed()) stmt.close()
+        if (null != stmt && ! stmt.isClosed()) {
+          stmt.close()
+        }
       } catch {
         case e: Exception => logWarning("Exception closing statement", e)
       }
       try {
-        if (null != conn && ! stmt.isClosed()) conn.close()
+        if (null != conn && ! conn.isClosed()) {
+          conn.close()
+        }
         logInfo("closed connection")
       } catch {
         case e: Exception => logWarning("Exception closing connection", e)
@@ -120,3 +126,4 @@ object JdbcRDD {
     Array.tabulate[Object](rs.getMetaData.getColumnCount)(i => rs.getObject(i + 1))
   }
 }
+
diff --git a/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala
index 2bc47eb9fcd74..a60952eee5901 100644
--- a/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala
@@ -28,6 +28,6 @@ class MappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => U)
   override val partitioner = firstParent[Product2[K, U]].partitioner
 
   override def compute(split: Partition, context: TaskContext): Iterator[(K, U)] = {
-    firstParent[Product2[K, V]].iterator(split, context).map { case Product2(k ,v) => (k, f(v)) }
+    firstParent[Product2[K, V]].iterator(split, context).map { pair => (pair._1, f(pair._2)) }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index f2b3a64bf1345..e55d03d391e03 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -20,9 +20,12 @@ package org.apache.spark.rdd
 import java.text.SimpleDateFormat
 import java.util.Date
 
+import scala.reflect.ClassTag
+
 import org.apache.hadoop.conf.{Configurable, Configuration}
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.input.FileSplit
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.input.WholeTextFileInputFormat
@@ -32,6 +35,10 @@ import org.apache.spark.Partition
 import org.apache.spark.SerializableWritable
 import org.apache.spark.{SparkContext, TaskContext}
 import org.apache.spark.executor.{DataReadMethod, InputMetrics}
+import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
+import org.apache.spark.rdd.NewHadoopRDD.NewHadoopMapPartitionsWithSplitRDD
+import org.apache.spark.util.Utils
+import org.apache.spark.deploy.SparkHadoopUtil
 
 private[spark] class NewHadoopPartition(
     rddId: Int,
@@ -101,6 +108,20 @@ class NewHadoopRDD[K, V](
       val split = theSplit.asInstanceOf[NewHadoopPartition]
       logInfo("Input split: " + split.serializableHadoopSplit)
       val conf = confBroadcast.value.value
+
+      val inputMetrics = new InputMetrics(DataReadMethod.Hadoop)
+      // Find a function that will return the FileSystem bytes read by this thread. Do this before
+      // creating RecordReader, because RecordReader's constructor might read some bytes
+      val bytesReadCallback = if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit]) {
+        SparkHadoopUtil.get.getFSBytesReadOnThreadCallback(
+          split.serializableHadoopSplit.value.asInstanceOf[FileSplit].getPath, conf)
+      } else {
+        None
+      }
+      if (bytesReadCallback.isDefined) {
+        context.taskMetrics.inputMetrics = Some(inputMetrics)
+      }
+
       val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0)
       val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId)
       val format = inputFormatClass.newInstance
@@ -113,22 +134,11 @@ class NewHadoopRDD[K, V](
         split.serializableHadoopSplit.value, hadoopAttemptContext)
       reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext)
 
-      val inputMetrics = new InputMetrics(DataReadMethod.Hadoop)
-      try {
-        /* bytesRead may not exactly equal the bytes read by a task: split boundaries aren't
-         * always at record boundaries, so tasks may need to read into other splits to complete
-         * a record. */
-        inputMetrics.bytesRead = split.serializableHadoopSplit.value.getLength()
-      } catch {
-        case e: Exception =>
-          logWarning("Unable to get input split size in order to set task input bytes", e)
-      }
-      context.taskMetrics.inputMetrics = Some(inputMetrics)
-
       // Register an on-task-completion callback to close the input stream.
-      context.addOnCompleteCallback(() => close())
+      context.addTaskCompletionListener(context => close())
       var havePair = false
       var finished = false
+      var recordsSinceMetricsUpdate = 0
 
       override def hasNext: Boolean = {
         if (!finished && !havePair) {
@@ -143,28 +153,102 @@ class NewHadoopRDD[K, V](
           throw new java.util.NoSuchElementException("End of stream")
         }
         havePair = false
+
+        // Update bytes read metric every few records
+        if (recordsSinceMetricsUpdate == HadoopRDD.RECORDS_BETWEEN_BYTES_READ_METRIC_UPDATES
+            && bytesReadCallback.isDefined) {
+          recordsSinceMetricsUpdate = 0
+          val bytesReadFn = bytesReadCallback.get
+          inputMetrics.bytesRead = bytesReadFn()
+        } else {
+          recordsSinceMetricsUpdate += 1
+        }
+
         (reader.getCurrentKey, reader.getCurrentValue)
       }
 
       private def close() {
         try {
           reader.close()
+
+          // Update metrics with final amount
+          if (bytesReadCallback.isDefined) {
+            val bytesReadFn = bytesReadCallback.get
+            inputMetrics.bytesRead = bytesReadFn()
+          } else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit]) {
+            // If we can't get the bytes read from the FS stats, fall back to the split size,
+            // which may be inaccurate.
+            try {
+              inputMetrics.bytesRead = split.serializableHadoopSplit.value.getLength
+              context.taskMetrics.inputMetrics = Some(inputMetrics)
+            } catch {
+              case e: java.io.IOException =>
+                logWarning("Unable to get input size to set InputMetrics for task", e)
+            }
+          }
         } catch {
-          case e: Exception => logWarning("Exception in RecordReader.close()", e)
+          case e: Exception => {
+            if (!Utils.inShutdown()) {
+              logWarning("Exception in RecordReader.close()", e)
+            }
+          }
         }
       }
     }
     new InterruptibleIterator(context, iter)
   }
 
-  override def getPreferredLocations(split: Partition): Seq[String] = {
-    val theSplit = split.asInstanceOf[NewHadoopPartition]
-    theSplit.serializableHadoopSplit.value.getLocations.filter(_ != "localhost")
+  /** Maps over a partition, providing the InputSplit that was used as the base of the partition. */
+  @DeveloperApi
+  def mapPartitionsWithInputSplit[U: ClassTag](
+      f: (InputSplit, Iterator[(K, V)]) => Iterator[U],
+      preservesPartitioning: Boolean = false): RDD[U] = {
+    new NewHadoopMapPartitionsWithSplitRDD(this, f, preservesPartitioning)
+  }
+
+  override def getPreferredLocations(hsplit: Partition): Seq[String] = {
+    val split = hsplit.asInstanceOf[NewHadoopPartition].serializableHadoopSplit.value
+    val locs = HadoopRDD.SPLIT_INFO_REFLECTIONS match {
+      case Some(c) => 
+        try {
+          val infos = c.newGetLocationInfo.invoke(split).asInstanceOf[Array[AnyRef]]
+          Some(HadoopRDD.convertSplitLocationInfo(infos))
+        } catch {
+          case e : Exception =>
+            logDebug("Failed to use InputSplit#getLocationInfo.", e)
+            None
+        }
+      case None => None
+    }
+    locs.getOrElse(split.getLocations.filter(_ != "localhost"))
   }
 
   def getConf: Configuration = confBroadcast.value.value
 }
 
+private[spark] object NewHadoopRDD {
+  /**
+   * Analogous to [[org.apache.spark.rdd.MapPartitionsRDD]], but passes in an InputSplit to
+   * the given function rather than the index of the partition.
+   */
+  private[spark] class NewHadoopMapPartitionsWithSplitRDD[U: ClassTag, T: ClassTag](
+      prev: RDD[T],
+      f: (InputSplit, Iterator[T]) => Iterator[U],
+      preservesPartitioning: Boolean = false)
+    extends RDD[U](prev) {
+
+    override val partitioner = if (preservesPartitioning) firstParent[T].partitioner else None
+
+    override def getPartitions: Array[Partition] = firstParent[T].partitions
+
+    override def compute(split: Partition, context: TaskContext) = {
+      val partition = split.asInstanceOf[NewHadoopPartition]
+      val inputSplit = partition.serializableHadoopSplit.value
+      f(inputSplit, firstParent[T].iterator(split, context))
+    }
+  }
+}
+
 private[spark] class WholeTextFileRDD(
     sc : SparkContext,
     inputFormatClass: Class[_ <: WholeTextFileInputFormat],
@@ -182,7 +266,7 @@ private[spark] class WholeTextFileRDD(
       case _ =>
     }
     val jobContext = newJobContext(conf, jobId)
-    inputFormat.setMaxSplitSize(jobContext, minPartitions)
+    inputFormat.setMinPartitions(jobContext, minPartitions)
     val rawSplits = inputFormat.getSplits(jobContext).toArray
     val result = new Array[Partition](rawSplits.size)
     for (i <- 0 until rawSplits.size) {
diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
index f1f4b4324edfd..d0dbfef35d03c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
@@ -19,7 +19,8 @@ package org.apache.spark.rdd
 
 import scala.reflect.ClassTag
 
-import org.apache.spark.{Logging, RangePartitioner}
+import org.apache.spark.{Logging, Partitioner, RangePartitioner}
+import org.apache.spark.annotation.DeveloperApi
 
 /**
  * Extra functions available on RDDs of (key, value) pairs where the key is sortable through
@@ -43,10 +44,10 @@ import org.apache.spark.{Logging, RangePartitioner}
  */
 class OrderedRDDFunctions[K : Ordering : ClassTag,
                           V: ClassTag,
-                          P <: Product2[K, V] : ClassTag](
+                          P <: Product2[K, V] : ClassTag] @DeveloperApi() (
     self: RDD[P])
-  extends Logging with Serializable {
-
+  extends Logging with Serializable
+{
   private val ordering = implicitly[Ordering[K]]
 
   /**
@@ -55,16 +56,24 @@ class OrderedRDDFunctions[K : Ordering : ClassTag,
    * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in
    * order of the keys).
    */
-  def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size): RDD[P] = {
+  // TODO: this currently doesn't work on P other than Tuple2!
+  def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size)
+      : RDD[(K, V)] =
+  {
     val part = new RangePartitioner(numPartitions, self, ascending)
-    val shuffled = new ShuffledRDD[K, V, V, P](self, part).setKeyOrdering(ordering)
-    shuffled.mapPartitions(iter => {
-      val buf = iter.toArray
-      if (ascending) {
-        buf.sortWith((x, y) => ordering.lt(x._1, y._1)).iterator
-      } else {
-        buf.sortWith((x, y) => ordering.gt(x._1, y._1)).iterator
-      }
-    }, preservesPartitioning = true)
+    new ShuffledRDD[K, V, V](self, part)
+      .setKeyOrdering(if (ascending) ordering else ordering.reverse)
+  }
+
+  /**
+   * Repartition the RDD according to the given partitioner and, within each resulting partition,
+   * sort records by their keys.
+   *
+   * This is more efficient than calling `repartition` and then sorting within each partition
+   * because it can push the sorting down into the shuffle machinery.
+   */
+  def repartitionAndSortWithinPartitions(partitioner: Partitioner): RDD[(K, V)] = {
+    new ShuffledRDD[K, V, V](self, partitioner).setKeyOrdering(ordering)
   }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index fc9beb166befe..8c2c959e73bb6 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -19,33 +19,34 @@ package org.apache.spark.rdd
 
 import java.nio.ByteBuffer
 import java.text.SimpleDateFormat
-import java.util.Date
-import java.util.{HashMap => JHashMap}
+import java.util.{Date, HashMap => JHashMap}
 
+import scala.collection.{Map, mutable}
 import scala.collection.JavaConversions._
-import scala.collection.Map
-import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.reflect.ClassTag
 
 import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus
 import org.apache.hadoop.conf.{Configurable, Configuration}
-import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.io.SequenceFile.CompressionType
 import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.hadoop.mapred.{FileOutputCommitter, FileOutputFormat, JobConf, OutputFormat}
-import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat, Job => NewAPIHadoopJob,
-RecordWriter => NewRecordWriter, SparkHadoopMapReduceUtil}
-import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat}
+import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob, OutputFormat => NewOutputFormat,
+RecordWriter => NewRecordWriter}
 
 import org.apache.spark._
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.SparkHadoopWriter
 import org.apache.spark.Partitioner.defaultPartitioner
 import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.executor.{DataWriteMethod, OutputMetrics}
+import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.serializer.Serializer
+import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.CompactBuffer
+import org.apache.spark.util.random.StratifiedSamplingUtils
 
 /**
  * Extra functions available on RDDs of (key, value) pairs through an implicit conversion.
@@ -87,11 +88,12 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     }
     val aggregator = new Aggregator[K, V, C](createCombiner, mergeValue, mergeCombiners)
     if (self.partitioner == Some(partitioner)) {
-      self.mapPartitionsWithContext((context, iter) => {
+      self.mapPartitions(iter => {
+        val context = TaskContext.get()
         new InterruptibleIterator(context, aggregator.combineValuesByKey(iter, context))
       }, preservesPartitioning = true)
     } else {
-      new ShuffledRDD[K, V, C, (K, C)](self, partitioner)
+      new ShuffledRDD[K, V, C](self, partitioner)
         .setSerializer(serializer)
         .setAggregator(aggregator)
         .setMapSideCombine(mapSideCombine)
@@ -125,7 +127,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     zeroBuffer.get(zeroArray)
 
     lazy val cachedSerializer = SparkEnv.get.closureSerializer.newInstance()
-    def createZero() = cachedSerializer.deserialize[U](ByteBuffer.wrap(zeroArray))
+    val createZero = () => cachedSerializer.deserialize[U](ByteBuffer.wrap(zeroArray))
 
     combineByKey[U]((v: V) => seqOp(createZero(), v), seqOp, combOp, partitioner)
   }
@@ -171,7 +173,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
 
     // When deserializing, use a lazy val to create just one instance of the serializer per task
     lazy val cachedSerializer = SparkEnv.get.closureSerializer.newInstance()
-    def createZero() = cachedSerializer.deserialize[V](ByteBuffer.wrap(zeroArray))
+    val createZero = () => cachedSerializer.deserialize[V](ByteBuffer.wrap(zeroArray))
 
     combineByKey[V]((v: V) => func(createZero(), v), func, func, partitioner)
   }
@@ -194,6 +196,64 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     foldByKey(zeroValue, defaultPartitioner(self))(func)
   }
 
+  /**
+   * Return a subset of this RDD sampled by key (via stratified sampling).
+   *
+   * Create a sample of this RDD using variable sampling rates for different keys as specified by
+   * `fractions`, a key to sampling rate map, via simple random sampling with one pass over the
+   * RDD, to produce a sample of size that's approximately equal to the sum of
+   * math.ceil(numItems * samplingRate) over all key values.
+   *
+   * @param withReplacement whether to sample with or without replacement
+   * @param fractions map of specific keys to sampling rates
+   * @param seed seed for the random number generator
+   * @return RDD containing the sampled subset
+   */
+  def sampleByKey(withReplacement: Boolean,
+      fractions: Map[K, Double],
+      seed: Long = Utils.random.nextLong): RDD[(K, V)] = {
+
+    require(fractions.values.forall(v => v >= 0.0), "Negative sampling rates.")
+
+    val samplingFunc = if (withReplacement) {
+      StratifiedSamplingUtils.getPoissonSamplingFunction(self, fractions, false, seed)
+    } else {
+      StratifiedSamplingUtils.getBernoulliSamplingFunction(self, fractions, false, seed)
+    }
+    self.mapPartitionsWithIndex(samplingFunc, preservesPartitioning = true)
+  }
+
+  /**
+   * ::Experimental::
+   * Return a subset of this RDD sampled by key (via stratified sampling) containing exactly
+   * math.ceil(numItems * samplingRate) for each stratum (group of pairs with the same key).
+   *
+   * This method differs from [[sampleByKey]] in that we make additional passes over the RDD to
+   * create a sample size that's exactly equal to the sum of math.ceil(numItems * samplingRate)
+   * over all key values with a 99.99% confidence. When sampling without replacement, we need one
+   * additional pass over the RDD to guarantee sample size; when sampling with replacement, we need
+   * two additional passes.
+   *
+   * @param withReplacement whether to sample with or without replacement
+   * @param fractions map of specific keys to sampling rates
+   * @param seed seed for the random number generator
+   * @return RDD containing the sampled subset
+   */
+  @Experimental
+  def sampleByKeyExact(withReplacement: Boolean,
+      fractions: Map[K, Double],
+      seed: Long = Utils.random.nextLong): RDD[(K, V)] = {
+
+    require(fractions.values.forall(v => v >= 0.0), "Negative sampling rates.")
+
+    val samplingFunc = if (withReplacement) {
+      StratifiedSamplingUtils.getPoissonSamplingFunction(self, fractions, true, seed)
+    } else {
+      StratifiedSamplingUtils.getBernoulliSamplingFunction(self, fractions, true, seed)
+    }
+    self.mapPartitionsWithIndex(samplingFunc, preservesPartitioning = true)
+  }
+
   /**
    * Merge the values for each key using an associative reduce function. This will also perform
    * the merging locally on each mapper before sending results to a reducer, similarly to a
@@ -203,6 +263,25 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     combineByKey[V]((v: V) => v, func, func, partitioner)
   }
 
+  /**
+   * Merge the values for each key using an associative reduce function. This will also perform
+   * the merging locally on each mapper before sending results to a reducer, similarly to a
+   * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
+   */
+  def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = {
+    reduceByKey(new HashPartitioner(numPartitions), func)
+  }
+
+  /**
+   * Merge the values for each key using an associative reduce function. This will also perform
+   * the merging locally on each mapper before sending results to a reducer, similarly to a
+   * "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
+   * parallelism level.
+   */
+  def reduceByKey(func: (V, V) => V): RDD[(K, V)] = {
+    reduceByKey(defaultPartitioner(self), func)
+  }
+
   /**
    * Merge the values for each key using an associative reduce function, but return the results
    * immediately to the master as a Map. This will also perform the merging locally on each mapper
@@ -214,22 +293,22 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       throw new SparkException("reduceByKeyLocally() does not support array keys")
     }
 
-    def reducePartition(iter: Iterator[(K, V)]): Iterator[JHashMap[K, V]] = {
+    val reducePartition = (iter: Iterator[(K, V)]) => {
       val map = new JHashMap[K, V]
-      iter.foreach { case (k, v) =>
-        val old = map.get(k)
-        map.put(k, if (old == null) v else func(old, v))
+      iter.foreach { pair =>
+        val old = map.get(pair._1)
+        map.put(pair._1, if (old == null) pair._2 else func(old, pair._2))
       }
       Iterator(map)
-    }
+    } : Iterator[JHashMap[K, V]]
 
-    def mergeMaps(m1: JHashMap[K, V], m2: JHashMap[K, V]): JHashMap[K, V] = {
-      m2.foreach { case (k, v) =>
-        val old = m1.get(k)
-        m1.put(k, if (old == null) v else func(old, v))
+    val mergeMaps = (m1: JHashMap[K, V], m2: JHashMap[K, V]) => {
+      m2.foreach { pair =>
+        val old = m1.get(pair._1)
+        m1.put(pair._1, if (old == null) pair._2 else func(old, pair._2))
       }
       m1
-    }
+    } : JHashMap[K, V]
 
     self.mapPartitions(reducePartition).reduce(mergeMaps)
   }
@@ -238,8 +317,15 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
   @deprecated("Use reduceByKeyLocally", "1.0.0")
   def reduceByKeyToDriver(func: (V, V) => V): Map[K, V] = reduceByKeyLocally(func)
 
-  /** Count the number of elements for each key, and return the result to the master as a Map. */
-  def countByKey(): Map[K, Long] = self.map(_._1).countByValue()
+  /** 
+   * Count the number of elements for each key, collecting the results to a local Map.
+   *
+   * Note that this method should only be used if the resulting map is expected to be small, as
+   * the whole thing is loaded into the driver's memory.
+   * To handle very large results, consider using rdd.mapValues(_ => 1L).reduceByKey(_ + _), which
+   * returns an RDD[T, Long] instead of a map.
+   */
+  def countByKey(): Map[K, Long] = self.mapValues(_ => 1L).reduceByKey(_ + _).collect().toMap
 
   /**
    * :: Experimental ::
@@ -340,42 +426,36 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     countApproxDistinctByKey(relativeSD, defaultPartitioner(self))
   }
 
-  /**
-   * Merge the values for each key using an associative reduce function. This will also perform
-   * the merging locally on each mapper before sending results to a reducer, similarly to a
-   * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
-   */
-  def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = {
-    reduceByKey(new HashPartitioner(numPartitions), func)
-  }
-
   /**
    * Group the values for each key in the RDD into a single sequence. Allows controlling the
    * partitioning of the resulting key-value pair RDD by passing a Partitioner.
+   * The ordering of elements within each group is not guaranteed, and may even differ
+   * each time the resulting RDD is evaluated.
    *
-   * Note: If you are grouping in order to perform an aggregation (such as a sum or average) over
-   * each key, using [[PairRDDFunctions.reduceByKey]] or [[PairRDDFunctions.combineByKey]]
-   * will provide much better performance.
+   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
+   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    */
   def groupByKey(partitioner: Partitioner): RDD[(K, Iterable[V])] = {
     // groupByKey shouldn't use map side combine because map side combine does not
     // reduce the amount of data shuffled and requires all map side data be inserted
     // into a hash table, leading to more objects in the old gen.
-    def createCombiner(v: V) = ArrayBuffer(v)
-    def mergeValue(buf: ArrayBuffer[V], v: V) = buf += v
-    def mergeCombiners(c1: ArrayBuffer[V], c2: ArrayBuffer[V]) = c1 ++ c2
-    val bufs = combineByKey[ArrayBuffer[V]](
-      createCombiner _, mergeValue _, mergeCombiners _, partitioner, mapSideCombine=false)
-    bufs.mapValues(_.toIterable)
+    val createCombiner = (v: V) => CompactBuffer(v)
+    val mergeValue = (buf: CompactBuffer[V], v: V) => buf += v
+    val mergeCombiners = (c1: CompactBuffer[V], c2: CompactBuffer[V]) => c1 ++= c2
+    val bufs = combineByKey[CompactBuffer[V]](
+      createCombiner, mergeValue, mergeCombiners, partitioner, mapSideCombine=false)
+    bufs.asInstanceOf[RDD[(K, Iterable[V])]]
   }
 
   /**
    * Group the values for each key in the RDD into a single sequence. Hash-partitions the
-   * resulting RDD with into `numPartitions` partitions.
+   * resulting RDD with into `numPartitions` partitions. The ordering of elements within
+   * each group is not guaranteed, and may even differ each time the resulting RDD is evaluated.
    *
-   * Note: If you are grouping in order to perform an aggregation (such as a sum or average) over
-   * each key, using [[PairRDDFunctions.reduceByKey]] or [[PairRDDFunctions.combineByKey]]
-   * will provide much better performance.
+   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
+   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    */
   def groupByKey(numPartitions: Int): RDD[(K, Iterable[V])] = {
     groupByKey(new HashPartitioner(numPartitions))
@@ -391,7 +471,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     if (self.partitioner == Some(partitioner)) {
       self
     } else {
-      new ShuffledRDD[K, V, V, (K, V)](self, partitioner)
+      new ShuffledRDD[K, V, V](self, partitioner)
     }
   }
 
@@ -401,9 +481,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * (k, v2) is in `other`. Uses the given Partitioner to partition the output RDD.
    */
   def join[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, W))] = {
-    this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
-      for (v <- vs; w <- ws) yield (v, w)
-    }
+    this.cogroup(other, partitioner).flatMapValues( pair =>
+      for (v <- pair._1; w <- pair._2) yield (v, w)
+    )
   }
 
   /**
@@ -413,11 +493,11 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * partition the output RDD.
    */
   def leftOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, Option[W]))] = {
-    this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
-      if (ws.isEmpty) {
-        vs.map(v => (v, None))
+    this.cogroup(other, partitioner).flatMapValues { pair =>
+      if (pair._2.isEmpty) {
+        pair._1.map(v => (v, None))
       } else {
-        for (v <- vs; w <- ws) yield (v, Some(w))
+        for (v <- pair._1; w <- pair._2) yield (v, Some(w))
       }
     }
   }
@@ -430,15 +510,32 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    */
   def rightOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner)
       : RDD[(K, (Option[V], W))] = {
-    this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
-      if (vs.isEmpty) {
-        ws.map(w => (None, w))
+    this.cogroup(other, partitioner).flatMapValues { pair =>
+      if (pair._1.isEmpty) {
+        pair._2.map(w => (None, w))
       } else {
-        for (v <- vs; w <- ws) yield (Some(v), w)
+        for (v <- pair._1; w <- pair._2) yield (Some(v), w)
       }
     }
   }
 
+  /**
+   * Perform a full outer join of `this` and `other`. For each element (k, v) in `this`, the
+   * resulting RDD will either contain all pairs (k, (Some(v), Some(w))) for w in `other`, or
+   * the pair (k, (Some(v), None)) if no elements in `other` have key k. Similarly, for each
+   * element (k, w) in `other`, the resulting RDD will either contain all pairs
+   * (k, (Some(v), Some(w))) for v in `this`, or the pair (k, (None, Some(w))) if no elements
+   * in `this` have key k. Uses the given Partitioner to partition the output RDD.
+   */
+  def fullOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner)
+      : RDD[(K, (Option[V], Option[W]))] = {
+    this.cogroup(other, partitioner).flatMapValues {
+      case (vs, Seq()) => vs.map(v => (Some(v), None))
+      case (Seq(), ws) => ws.map(w => (None, Some(w)))
+      case (vs, ws) => for (v <- vs; w <- ws) yield (Some(v), Some(w))
+    }
+  }
+
   /**
    * Simplified version of combineByKey that hash-partitions the resulting RDD using the
    * existing partitioner/parallelism level.
@@ -448,23 +545,15 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     combineByKey(createCombiner, mergeValue, mergeCombiners, defaultPartitioner(self))
   }
 
-  /**
-   * Merge the values for each key using an associative reduce function. This will also perform
-   * the merging locally on each mapper before sending results to a reducer, similarly to a
-   * "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
-   * parallelism level.
-   */
-  def reduceByKey(func: (V, V) => V): RDD[(K, V)] = {
-    reduceByKey(defaultPartitioner(self), func)
-  }
-
   /**
    * Group the values for each key in the RDD into a single sequence. Hash-partitions the
-   * resulting RDD with the existing partitioner/parallelism level.
+   * resulting RDD with the existing partitioner/parallelism level. The ordering of elements
+   * within each group is not guaranteed, and may even differ each time the resulting RDD is
+   * evaluated.
    *
-   * Note: If you are grouping in order to perform an aggregation (such as a sum or average) over
-   * each key, using [[PairRDDFunctions.reduceByKey]] or [[PairRDDFunctions.combineByKey]]
-   * will provide much better performance,
+   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
+   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    */
   def groupByKey(): RDD[(K, Iterable[V])] = {
     groupByKey(defaultPartitioner(self))
@@ -528,14 +617,42 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     rightOuterJoin(other, new HashPartitioner(numPartitions))
   }
 
+  /**
+   * Perform a full outer join of `this` and `other`. For each element (k, v) in `this`, the
+   * resulting RDD will either contain all pairs (k, (Some(v), Some(w))) for w in `other`, or
+   * the pair (k, (Some(v), None)) if no elements in `other` have key k. Similarly, for each
+   * element (k, w) in `other`, the resulting RDD will either contain all pairs
+   * (k, (Some(v), Some(w))) for v in `this`, or the pair (k, (None, Some(w))) if no elements
+   * in `this` have key k. Hash-partitions the resulting RDD using the existing partitioner/
+   * parallelism level.
+   */
+  def fullOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (Option[V], Option[W]))] = {
+    fullOuterJoin(other, defaultPartitioner(self, other))
+  }
+
+  /**
+   * Perform a full outer join of `this` and `other`. For each element (k, v) in `this`, the
+   * resulting RDD will either contain all pairs (k, (Some(v), Some(w))) for w in `other`, or
+   * the pair (k, (Some(v), None)) if no elements in `other` have key k. Similarly, for each
+   * element (k, w) in `other`, the resulting RDD will either contain all pairs
+   * (k, (Some(v), Some(w))) for v in `this`, or the pair (k, (None, Some(w))) if no elements
+   * in `this` have key k. Hash-partitions the resulting RDD into the given number of partitions.
+   */
+  def fullOuterJoin[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (Option[V], Option[W]))] = {
+    fullOuterJoin(other, new HashPartitioner(numPartitions))
+  }
+
   /**
    * Return the key-value pairs in this RDD to the master as a Map.
+   *
+   * Warning: this doesn't return a multimap (so if you have multiple values to the same key, only
+   *          one value per key is preserved in the map returned)
    */
   def collectAsMap(): Map[K, V] = {
     val data = self.collect()
     val map = new mutable.HashMap[K, V]
     map.sizeHint(data.length)
-    data.foreach { case (k, v) => map.put(k, v) }
+    data.foreach { pair => map.put(pair._1, pair._2) }
     map
   }
 
@@ -571,11 +688,11 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       throw new SparkException("Default partitioner cannot partition array keys.")
     }
     val cg = new CoGroupedRDD[K](Seq(self, other1, other2, other3), partitioner)
-    cg.mapValues { case Seq(vs, w1s, w2s, w3s) =>
-      (vs.asInstanceOf[Seq[V]],
-        w1s.asInstanceOf[Seq[W1]],
-        w2s.asInstanceOf[Seq[W2]],
-        w3s.asInstanceOf[Seq[W3]])
+    cg.mapValues { case Array(vs, w1s, w2s, w3s) =>
+       (vs.asInstanceOf[Iterable[V]],
+         w1s.asInstanceOf[Iterable[W1]],
+         w2s.asInstanceOf[Iterable[W2]],
+         w3s.asInstanceOf[Iterable[W3]])
     }
   }
 
@@ -589,8 +706,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       throw new SparkException("Default partitioner cannot partition array keys.")
     }
     val cg = new CoGroupedRDD[K](Seq(self, other), partitioner)
-    cg.mapValues { case Seq(vs, ws) =>
-      (vs.asInstanceOf[Seq[V]], ws.asInstanceOf[Seq[W]])
+    cg.mapValues { case Array(vs, w1s) =>
+      (vs.asInstanceOf[Iterable[V]], w1s.asInstanceOf[Iterable[W]])
     }
   }
 
@@ -604,10 +721,10 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       throw new SparkException("Default partitioner cannot partition array keys.")
     }
     val cg = new CoGroupedRDD[K](Seq(self, other1, other2), partitioner)
-    cg.mapValues { case Seq(vs, w1s, w2s) =>
-      (vs.asInstanceOf[Seq[V]],
-       w1s.asInstanceOf[Seq[W1]],
-       w2s.asInstanceOf[Seq[W2]])
+    cg.mapValues { case Array(vs, w1s, w2s) =>
+      (vs.asInstanceOf[Iterable[V]],
+        w1s.asInstanceOf[Iterable[W1]],
+        w2s.asInstanceOf[Iterable[W2]])
     }
   }
 
@@ -710,14 +827,14 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     self.partitioner match {
       case Some(p) =>
         val index = p.getPartition(key)
-        def process(it: Iterator[(K, V)]): Seq[V] = {
+        val process = (it: Iterator[(K, V)]) => {
           val buf = new ArrayBuffer[V]
-          for ((k, v) <- it if k == key) {
-            buf += v
+          for (pair <- it if pair._1 == key) {
+            buf += pair._2
           }
           buf
-        }
-        val res = self.context.runJob(self, process _, Array(index), false)
+        } : Seq[V]
+        val res = self.context.runJob(self, process, Array(index), false)
         res(0)
       case None =>
         self.filter(_._1 == key).map(_._2).collect()
@@ -812,7 +929,12 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       hadoopConf.set("mapred.output.compression.codec", c.getCanonicalName)
       hadoopConf.set("mapred.output.compression.type", CompressionType.BLOCK.toString)
     }
-    hadoopConf.setOutputCommitter(classOf[FileOutputCommitter])
+
+    // Use configured output committer if already set
+    if (conf.getOutputCommitter == null) {
+      hadoopConf.setOutputCommitter(classOf[FileOutputCommitter])
+    }
+
     FileOutputFormat.setOutputPath(hadoopConf,
       SparkHadoopWriter.createPathFromString(path, hadoopConf))
     saveAsHadoopDataset(hadoopConf)
@@ -840,40 +962,49 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       jobFormat.checkOutputSpecs(job)
     }
 
-    def writeShard(context: TaskContext, iter: Iterator[(K,V)]): Int = {
+    val writeShard = (context: TaskContext, iter: Iterator[(K,V)]) => {
+      val config = wrappedConf.value
       // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
       // around by taking a mod. We expect that no task will be attempted 2 billion times.
       val attemptNumber = (context.attemptId % Int.MaxValue).toInt
       /* "reduce task" <split #> <attempt # = spark task #> */
       val attemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = false, context.partitionId,
         attemptNumber)
-      val hadoopContext = newTaskAttemptContext(wrappedConf.value, attemptId)
+      val hadoopContext = newTaskAttemptContext(config, attemptId)
       val format = outfmt.newInstance
       format match {
-        case c: Configurable => c.setConf(wrappedConf.value)
+        case c: Configurable => c.setConf(config)
         case _ => ()
       }
       val committer = format.getOutputCommitter(hadoopContext)
       committer.setupTask(hadoopContext)
+
+      val (outputMetrics, bytesWrittenCallback) = initHadoopOutputMetrics(context, config)
+
       val writer = format.getRecordWriter(hadoopContext).asInstanceOf[NewRecordWriter[K,V]]
       try {
+        var recordsWritten = 0L
         while (iter.hasNext) {
-          val (k, v) = iter.next()
-          writer.write(k, v)
+          val pair = iter.next()
+          writer.write(pair._1, pair._2)
+
+          // Update bytes written metric every few records
+          maybeUpdateOutputMetrics(bytesWrittenCallback, outputMetrics, recordsWritten)
+          recordsWritten += 1
         }
-      }
-      finally {
+      } finally {
         writer.close(hadoopContext)
       }
       committer.commitTask(hadoopContext)
-      return 1
-    }
+      bytesWrittenCallback.foreach { fn => outputMetrics.bytesWritten = fn() }
+      1
+    } : Int
 
     val jobAttemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = true, 0, 0)
     val jobTaskContext = newTaskAttemptContext(wrappedConf.value, jobAttemptId)
     val jobCommitter = jobFormat.getOutputCommitter(jobTaskContext)
     jobCommitter.setupJob(jobTaskContext)
-    self.context.runJob(self, writeShard _)
+    self.context.runJob(self, writeShard)
     jobCommitter.commitJob(jobTaskContext)
   }
 
@@ -886,6 +1017,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
   def saveAsHadoopDataset(conf: JobConf) {
     // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
     val hadoopConf = conf
+    val wrappedConf = new SerializableWritable(hadoopConf)
     val outputFormatInstance = hadoopConf.getOutputFormat
     val keyClass = hadoopConf.getOutputKeyClass
     val valueClass = hadoopConf.getOutputValueClass
@@ -912,31 +1044,57 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     val writer = new SparkHadoopWriter(hadoopConf)
     writer.preSetup()
 
-    def writeToFile(context: TaskContext, iter: Iterator[(K, V)]) {
+    val writeToFile = (context: TaskContext, iter: Iterator[(K, V)]) => {
+      val config = wrappedConf.value
       // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
       // around by taking a mod. We expect that no task will be attempted 2 billion times.
       val attemptNumber = (context.attemptId % Int.MaxValue).toInt
 
+      val (outputMetrics, bytesWrittenCallback) = initHadoopOutputMetrics(context, config)
+
       writer.setup(context.stageId, context.partitionId, attemptNumber)
       writer.open()
       try {
-        var count = 0
-        while(iter.hasNext) {
+        var recordsWritten = 0L
+        while (iter.hasNext) {
           val record = iter.next()
-          count += 1
           writer.write(record._1.asInstanceOf[AnyRef], record._2.asInstanceOf[AnyRef])
+
+          // Update bytes written metric every few records
+          maybeUpdateOutputMetrics(bytesWrittenCallback, outputMetrics, recordsWritten)
+          recordsWritten += 1
         }
-      }
-      finally {
+      } finally {
         writer.close()
       }
       writer.commit()
+      bytesWrittenCallback.foreach { fn => outputMetrics.bytesWritten = fn() }
     }
 
-    self.context.runJob(self, writeToFile _)
+    self.context.runJob(self, writeToFile)
     writer.commitJob()
   }
 
+  private def initHadoopOutputMetrics(context: TaskContext, config: Configuration)
+    : (OutputMetrics, Option[() => Long]) = {
+    val bytesWrittenCallback = Option(config.get("mapreduce.output.fileoutputformat.outputdir"))
+      .map(new Path(_))
+      .flatMap(SparkHadoopUtil.get.getFSBytesWrittenOnThreadCallback(_, config))
+    val outputMetrics = new OutputMetrics(DataWriteMethod.Hadoop)
+    if (bytesWrittenCallback.isDefined) {
+      context.taskMetrics.outputMetrics = Some(outputMetrics)
+    }
+    (outputMetrics, bytesWrittenCallback)
+  }
+
+  private def maybeUpdateOutputMetrics(bytesWrittenCallback: Option[() => Long],
+      outputMetrics: OutputMetrics, recordsWritten: Long): Unit = {
+    if (recordsWritten % PairRDDFunctions.RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES == 0
+        && bytesWrittenCallback.isDefined) {
+      bytesWrittenCallback.foreach { fn => outputMetrics.bytesWritten = fn() }
+    }
+  }
+
   /**
    * Return an RDD with the keys of each tuple.
    */
@@ -953,3 +1111,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
 
   private[spark] def keyOrdering: Option[Ordering[K]] = Option(ord)
 }
+
+private[spark] object PairRDDFunctions {
+  val RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES = 256
+}
diff --git a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
index 66c71bf7e8bb5..87b22de6ae697 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
@@ -48,7 +48,7 @@ private[spark] class ParallelCollectionPartition[T: ClassTag](
   override def index: Int = slice
 
   @throws(classOf[IOException])
-  private def writeObject(out: ObjectOutputStream): Unit = {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
 
     val sfactory = SparkEnv.get.serializer
 
@@ -67,7 +67,7 @@ private[spark] class ParallelCollectionPartition[T: ClassTag](
   }
 
   @throws(classOf[IOException])
-  private def readObject(in: ObjectInputStream): Unit = {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
 
     val sfactory = SparkEnv.get.serializer
     sfactory match {
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
index 0c2cd7a24783b..92b0641d0fb6e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
@@ -22,6 +22,7 @@ import java.io.{IOException, ObjectOutputStream}
 import scala.reflect.ClassTag
 
 import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext}
+import org.apache.spark.util.Utils
 
 /**
  * Class representing partitions of PartitionerAwareUnionRDD, which maintains the list of
@@ -38,7 +39,7 @@ class PartitionerAwareUnionRDDPartition(
   override def hashCode(): Int = idx
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent partition at the time of task serialization
     parents = rdds.map(_.partitions(index)).toArray
     oos.defaultWriteObject()
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
index b5b8a5706deb3..a637d6f15b7e5 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
@@ -39,6 +39,7 @@ class PartitionwiseSampledRDDPartition(val prev: Partition, val seed: Long)
  *
  * @param prev RDD to be sampled
  * @param sampler a random sampler
+ * @param preservesPartitioning whether the sampler preserves the partitioner of the parent RDD
  * @param seed random seed
  * @tparam T input RDD item type
  * @tparam U sampled RDD item type
@@ -46,9 +47,12 @@ class PartitionwiseSampledRDDPartition(val prev: Partition, val seed: Long)
 private[spark] class PartitionwiseSampledRDD[T: ClassTag, U: ClassTag](
     prev: RDD[T],
     sampler: RandomSampler[T, U],
+    @transient preservesPartitioning: Boolean,
     @transient seed: Long = Utils.random.nextLong)
   extends RDD[U](prev) {
 
+  @transient override val partitioner = if (preservesPartitioning) prev.partitioner else None
+
   override def getPartitions: Array[Partition] = {
     val random = new Random(seed)
     firstParent[T].partitions.map(x => new PartitionwiseSampledRDDPartition(x, random.nextLong()))
diff --git a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
index 5d77d37378458..56ac7a69be0d3 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
@@ -131,7 +131,6 @@ private[spark] class PipedRDD[T: ClassTag](
     // Start a thread to feed the process input from our parent's iterator
     new Thread("stdin writer for " + command) {
       override def run() {
-        SparkEnv.set(env)
         val out = new PrintWriter(proc.getOutputStream)
 
         // input the pipe context firstly
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 4e841bc992bff..41dba0ae99941 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.rdd
 
-import java.util.Random
+import java.util.{Properties, Random}
 
 import scala.collection.{mutable, Map}
 import scala.collection.mutable.ArrayBuffer
@@ -35,14 +35,16 @@ import org.apache.spark.Partitioner._
 import org.apache.spark.SparkContext._
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.partial.BoundedDouble
 import org.apache.spark.partial.CountEvaluator
 import org.apache.spark.partial.GroupedCountEvaluator
 import org.apache.spark.partial.PartialResult
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.{BoundedPriorityQueue, CallSite, Utils}
+import org.apache.spark.util.{BoundedPriorityQueue, Utils, CallSite}
 import org.apache.spark.util.collection.OpenHashMap
-import org.apache.spark.util.random.{BernoulliSampler, PoissonSampler, SamplingUtils}
+import org.apache.spark.util.random.{BernoulliSampler, PoissonSampler, BernoulliCellSampler,
+  SamplingUtils}
 
 /**
  * A Resilient Distributed Dataset (RDD), the basic abstraction in Spark. Represents an immutable,
@@ -207,7 +209,7 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
-   * Get the preferred locations of a partition (as hostnames), taking into account whether the
+   * Get the preferred locations of a partition, taking into account whether the
    * RDD is checkpointed.
    */
   final def preferredLocations(split: Partition): Seq[String] = {
@@ -328,19 +330,19 @@ abstract class RDD[T: ClassTag](
       : RDD[T] = {
     if (shuffle) {
       /** Distributes elements evenly across output partitions, starting from a random partition. */
-      def distributePartition(index: Int, items: Iterator[T]): Iterator[(Int, T)] = {
+      val distributePartition = (index: Int, items: Iterator[T]) => {
         var position = (new Random(index)).nextInt(numPartitions)
         items.map { t =>
-          // Note that the hash code of the key will just be the key itself. The HashPartitioner 
+          // Note that the hash code of the key will just be the key itself. The HashPartitioner
           // will mod it with the number of total partitions.
           position = position + 1
           (position, t)
         }
-      }
+      } : Iterator[(Int, T)]
 
       // include a shuffle step so that our upstream tasks are still distributed
       new CoalescedRDD(
-        new ShuffledRDD[Int, T, T, (Int, T)](mapPartitionsWithIndex(distributePartition),
+        new ShuffledRDD[Int, T, T](mapPartitionsWithIndex(distributePartition),
         new HashPartitioner(numPartitions)),
         numPartitions).values
     } else {
@@ -351,14 +353,14 @@ abstract class RDD[T: ClassTag](
   /**
    * Return a sampled subset of this RDD.
    */
-  def sample(withReplacement: Boolean, 
-      fraction: Double, 
+  def sample(withReplacement: Boolean,
+      fraction: Double,
       seed: Long = Utils.random.nextLong): RDD[T] = {
-    require(fraction >= 0.0, "Invalid fraction value: " + fraction)
+    require(fraction >= 0.0, "Negative fraction value: " + fraction)
     if (withReplacement) {
-      new PartitionwiseSampledRDD[T, T](this, new PoissonSampler[T](fraction), seed)
+      new PartitionwiseSampledRDD[T, T](this, new PoissonSampler[T](fraction), true, seed)
     } else {
-      new PartitionwiseSampledRDD[T, T](this, new BernoulliSampler[T](fraction), seed)
+      new PartitionwiseSampledRDD[T, T](this, new BernoulliSampler[T](fraction), true, seed)
     }
   }
 
@@ -374,7 +376,8 @@ abstract class RDD[T: ClassTag](
     val sum = weights.sum
     val normalizedCumWeights = weights.map(_ / sum).scanLeft(0.0d)(_ + _)
     normalizedCumWeights.sliding(2).map { x =>
-      new PartitionwiseSampledRDD[T, T](this, new BernoulliSampler[T](x(0), x(1)), seed)
+      new PartitionwiseSampledRDD[T, T](
+        this, new BernoulliCellSampler[T](x(0), x(1)), true, seed)
     }.toArray
   }
 
@@ -508,21 +511,36 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Return an RDD of grouped items. Each group consists of a key and a sequence of elements
-   * mapping to that key.
+   * mapping to that key. The ordering of elements within each group is not guaranteed, and
+   * may even differ each time the resulting RDD is evaluated.
+   *
+   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
+   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    */
   def groupBy[K](f: T => K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] =
     groupBy[K](f, defaultPartitioner(this))
 
   /**
    * Return an RDD of grouped elements. Each group consists of a key and a sequence of elements
-   * mapping to that key.
+   * mapping to that key. The ordering of elements within each group is not guaranteed, and
+   * may even differ each time the resulting RDD is evaluated.
+   *
+   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
+   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    */
   def groupBy[K](f: T => K, numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] =
     groupBy(f, new HashPartitioner(numPartitions))
 
   /**
    * Return an RDD of grouped items. Each group consists of a key and a sequence of elements
-   * mapping to that key.
+   * mapping to that key. The ordering of elements within each group is not guaranteed, and
+   * may even differ each time the resulting RDD is evaluated.
+   *
+   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
+   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    */
   def groupBy[K](f: T => K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K] = null)
       : RDD[(K, Iterable[T])] = {
@@ -574,6 +592,9 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Return a new RDD by applying a function to each partition of this RDD.
+   *
+   * `preservesPartitioning` indicates whether the input function preserves the partitioner, which
+   * should be `false` unless this is a pair RDD and the input function doesn't modify the keys.
    */
   def mapPartitions[U: ClassTag](
       f: Iterator[T] => Iterator[U], preservesPartitioning: Boolean = false): RDD[U] = {
@@ -584,6 +605,9 @@ abstract class RDD[T: ClassTag](
   /**
    * Return a new RDD by applying a function to each partition of this RDD, while tracking the index
    * of the original partition.
+   *
+   * `preservesPartitioning` indicates whether the input function preserves the partitioner, which
+   * should be `false` unless this is a pair RDD and the input function doesn't modify the keys.
    */
   def mapPartitionsWithIndex[U: ClassTag](
       f: (Int, Iterator[T]) => Iterator[U], preservesPartitioning: Boolean = false): RDD[U] = {
@@ -595,8 +619,12 @@ abstract class RDD[T: ClassTag](
    * :: DeveloperApi ::
    * Return a new RDD by applying a function to each partition of this RDD. This is a variant of
    * mapPartitions that also passes the TaskContext into the closure.
+   *
+   * `preservesPartitioning` indicates whether the input function preserves the partitioner, which
+   * should be `false` unless this is a pair RDD and the input function doesn't modify the keys.
    */
   @DeveloperApi
+  @deprecated("use TaskContext.get", "1.2.0")
   def mapPartitionsWithContext[U: ClassTag](
       f: (TaskContext, Iterator[T]) => Iterator[U],
       preservesPartitioning: Boolean = false): RDD[U] = {
@@ -677,7 +705,7 @@ abstract class RDD[T: ClassTag](
    * a map on the other).
    */
   def zip[U: ClassTag](other: RDD[U]): RDD[(T, U)] = {
-    zipPartitions(other, true) { (thisIter, otherIter) =>
+    zipPartitions(other, preservesPartitioning = false) { (thisIter, otherIter) =>
       new Iterator[(T, U)] {
         def hasNext = (thisIter.hasNext, otherIter.hasNext) match {
           case (true, true) => true
@@ -733,14 +761,16 @@ abstract class RDD[T: ClassTag](
    * Applies a function f to all elements of this RDD.
    */
   def foreach(f: T => Unit) {
-    sc.runJob(this, (iter: Iterator[T]) => iter.foreach(f))
+    val cleanF = sc.clean(f)
+    sc.runJob(this, (iter: Iterator[T]) => iter.foreach(cleanF))
   }
 
   /**
    * Applies a function f to each partition of this RDD.
    */
   def foreachPartition(f: Iterator[T] => Unit) {
-    sc.runJob(this, (iter: Iterator[T]) => f(iter))
+    val cleanF = sc.clean(f)
+    sc.runJob(this, (iter: Iterator[T]) => cleanF(iter))
   }
 
   /**
@@ -899,32 +929,15 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
-   * Return the count of each unique value in this RDD as a map of (value, count) pairs. The final
-   * combine step happens locally on the master, equivalent to running a single reduce task.
+   * Return the count of each unique value in this RDD as a local map of (value, count) pairs.
+   *
+   * Note that this method should only be used if the resulting map is expected to be small, as
+   * the whole thing is loaded into the driver's memory.
+   * To handle very large results, consider using rdd.map(x => (x, 1L)).reduceByKey(_ + _), which
+   * returns an RDD[T, Long] instead of a map.
    */
   def countByValue()(implicit ord: Ordering[T] = null): Map[T, Long] = {
-    if (elementClassTag.runtimeClass.isArray) {
-      throw new SparkException("countByValue() does not support arrays")
-    }
-    // TODO: This should perhaps be distributed by default.
-    def countPartition(iter: Iterator[T]): Iterator[OpenHashMap[T,Long]] = {
-      val map = new OpenHashMap[T,Long]
-      iter.foreach {
-        t => map.changeValue(t, 1L, _ + 1L)
-      }
-      Iterator(map)
-    }
-    def mergeMaps(m1: OpenHashMap[T,Long], m2: OpenHashMap[T,Long]): OpenHashMap[T,Long] = {
-      m2.foreach { case (key, value) =>
-        m1.changeValue(key, value, _ + value)
-      }
-      m1
-    }
-    val myResult = mapPartitions(countPartition).reduce(mergeMaps)
-    // Convert to a Scala mutable map
-    val mutableResult = scala.collection.mutable.Map[T,Long]()
-    myResult.foreach { case (k, v) => mutableResult.put(k, v) }
-    mutableResult
+    map(value => (value, null)).countByKey()
   }
 
   /**
@@ -969,7 +982,7 @@ abstract class RDD[T: ClassTag](
    */
   @Experimental
   def countApproxDistinct(p: Int, sp: Int): Long = {
-    require(p >= 4, s"p ($p) must be greater than 0")
+    require(p >= 4, s"p ($p) must be at least 4")
     require(sp <= 32, s"sp ($sp) cannot be greater than 32")
     require(sp == 0 || p <= sp, s"p ($p) cannot be greater than sp ($sp)")
     val zeroCounter = new HyperLogLogPlus(p, sp)
@@ -980,7 +993,7 @@ abstract class RDD[T: ClassTag](
       },
       (h1: HyperLogLogPlus, h2: HyperLogLogPlus) => {
         h1.addAll(h2)
-        h2
+        h1
       }).cardinality()
   }
 
@@ -1003,8 +1016,14 @@ abstract class RDD[T: ClassTag](
    * Zips this RDD with its element indices. The ordering is first based on the partition index
    * and then the ordering of items within each partition. So the first item in the first
    * partition gets index 0, and the last item in the last partition receives the largest index.
+   *
    * This is similar to Scala's zipWithIndex but it uses Long instead of Int as the index type.
    * This method needs to trigger a spark job when this RDD contains more than one partitions.
+   *
+   * Note that some RDDs, such as those returned by groupBy(), do not guarantee order of
+   * elements in a partition. The index assigned to each element is therefore not guaranteed,
+   * and may even change if the RDD is reevaluated. If a fixed ordering is required to guarantee
+   * the same index assignments, you should sort the RDD with sortByKey() or save it to a file.
    */
   def zipWithIndex(): RDD[(T, Long)] = new ZippedWithIndexRDD(this)
 
@@ -1012,6 +1031,11 @@ abstract class RDD[T: ClassTag](
    * Zips this RDD with generated unique Long ids. Items in the kth partition will get ids k, n+k,
    * 2*n+k, ..., where n is the number of partitions. So there may exist gaps, but this method
    * won't trigger a spark job, which is different from [[org.apache.spark.rdd.RDD#zipWithIndex]].
+   *
+   * Note that some RDDs, such as those returned by groupBy(), do not guarantee order of
+   * elements in a partition. The unique ID assigned to each element is therefore not guaranteed,
+   * and may even change if the RDD is reevaluated. If a fixed ordering is required to guarantee
+   * the same index assignments, you should sort the RDD with sortByKey() or save it to a file.
    */
   def zipWithUniqueId(): RDD[(T, Long)] = {
     val n = this.partitions.size.toLong
@@ -1040,16 +1064,17 @@ abstract class RDD[T: ClassTag](
       // greater than totalParts because we actually cap it at totalParts in runJob.
       var numPartsToTry = 1
       if (partsScanned > 0) {
-        // If we didn't find any rows after the first iteration, just try all partitions next.
-        // Otherwise, interpolate the number of partitions we need to try, but overestimate it
-        // by 50%.
+        // If we didn't find any rows after the previous iteration, quadruple and retry. Otherwise,
+        // interpolate the number of partitions we need to try, but overestimate it by 50%.
+        // We also cap the estimation in the end.
         if (buf.size == 0) {
-          numPartsToTry = totalParts - 1
+          numPartsToTry = partsScanned * 4
         } else {
-          numPartsToTry = (1.5 * num * partsScanned / buf.size).toInt
+          // the left side of max is >=1 whenever partsScanned >= 2
+          numPartsToTry = Math.max((1.5 * num * partsScanned / buf.size).toInt - partsScanned, 1)
+          numPartsToTry = Math.min(numPartsToTry, partsScanned * 4) 
         }
       }
-      numPartsToTry = math.max(0, numPartsToTry)  // guard against negative num of partitions
 
       val left = num - buf.size
       val p = partsScanned until math.min(partsScanned + numPartsToTry, totalParts)
@@ -1071,7 +1096,7 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
-   * Returns the top K (largest) elements from this RDD as defined by the specified
+   * Returns the top k (largest) elements from this RDD as defined by the specified
    * implicit Ordering[T]. This does the opposite of [[takeOrdered]]. For example:
    * {{{
    *   sc.parallelize(Seq(10, 4, 2, 12, 3)).top(1)
@@ -1081,14 +1106,14 @@ abstract class RDD[T: ClassTag](
    *   // returns Array(6, 5)
    * }}}
    *
-   * @param num the number of top elements to return
+   * @param num k, the number of top elements to return
    * @param ord the implicit ordering for T
    * @return an array of top elements
    */
   def top(num: Int)(implicit ord: Ordering[T]): Array[T] = takeOrdered(num)(ord.reverse)
 
   /**
-   * Returns the first K (smallest) elements from this RDD as defined by the specified
+   * Returns the first k (smallest) elements from this RDD as defined by the specified
    * implicit Ordering[T] and maintains the ordering. This does the opposite of [[top]].
    * For example:
    * {{{
@@ -1099,20 +1124,24 @@ abstract class RDD[T: ClassTag](
    *   // returns Array(2, 3)
    * }}}
    *
-   * @param num the number of top elements to return
+   * @param num k, the number of elements to return
    * @param ord the implicit ordering for T
    * @return an array of top elements
    */
   def takeOrdered(num: Int)(implicit ord: Ordering[T]): Array[T] = {
-    mapPartitions { items =>
-      // Priority keeps the largest elements, so let's reverse the ordering.
-      val queue = new BoundedPriorityQueue[T](num)(ord.reverse)
-      queue ++= util.collection.Utils.takeOrdered(items, num)(ord)
-      Iterator.single(queue)
-    }.reduce { (queue1, queue2) =>
-      queue1 ++= queue2
-      queue1
-    }.toArray.sorted(ord)
+    if (num == 0) {
+      Array.empty
+    } else {
+      mapPartitions { items =>
+        // Priority keeps the largest elements, so let's reverse the ordering.
+        val queue = new BoundedPriorityQueue[T](num)(ord.reverse)
+        queue ++= util.collection.Utils.takeOrdered(items, num)(ord)
+        Iterator.single(queue)
+      }.reduce { (queue1, queue2) =>
+        queue1 ++= queue2
+        queue1
+      }.toArray.sorted(ord)
+    }
   }
 
   /**
@@ -1164,35 +1193,45 @@ abstract class RDD[T: ClassTag](
     sc.runJob(this, (iter: Iterator[T]) => iter.toArray)
   }
 
-  /**
-   * Mark this RDD for checkpointing. It will be saved to a file inside the checkpoint
-   * directory set with SparkContext.setCheckpointDir() and all references to its parent
-   * RDDs will be removed. This function must be called before any job has been
-   * executed on this RDD. It is strongly recommended that this RDD is persisted in
-   * memory, otherwise saving it on a file will require recomputation.
-   */
-  def checkpoint() {
-    if (context.checkpointDir.isEmpty) {
-      throw new Exception("Checkpoint directory has not been set in the SparkContext")
+  /** A private method to execute checkpointing with the provided f function */
+  private[spark] def checkpoint(f: Option[RDD[T] => RDD[T]]) {
+    if (f.isEmpty && context.checkpointDir.isEmpty) {
+      throw new SparkException("Checkpoint directory has not been set in the SparkContext")
     } else if (checkpointData.isEmpty) {
-      checkpointData = Some(new RDDCheckpointData(this))
+      checkpointData = Some(new RDDCheckpointData(this, f))
       checkpointData.get.markForCheckpoint()
     }
   }
+  
+  /**
+   * Mark this RDD for checkpointing. It will be saved to a file inside the checkpoint
+   * directory set with SparkContext.setCheckpointDir() and all references to its parent
+   * RDDs will be removed. This function must be called before any job has been executed
+   * on this RDD. It is strongly recommended that this RDD is persisted in memory,
+   * otherwise saving it on a file will require recomputation.
+   * Not providing any parameters invokes the default implementation. 
+   */
+  def checkpoint(): Unit = checkpoint(None)
+
+  /**
+   * Mark this RDD for checkpointing. Its saving and RDD reloading logic will be defined
+   * by function f (ie. save to a custom file format) and all references to its parent
+   * RDDs will be removed. This function must be called before any job has been executed
+   * on this RDD. It is strongly recommended that this RDD is persisted in memory,
+   * otherwise saving it on a file will require recomputation.
+   * f should not break the deterministic behavior of RDDs.
+   */
+  def checkpoint(f: RDD[T] => RDD[T]): Unit = checkpoint(Some(f))
 
   /**
    * Return whether this RDD has been checkpointed or not
    */
-  def isCheckpointed: Boolean = {
-    checkpointData.map(_.isCheckpointed).getOrElse(false)
-  }
+  def isCheckpointed: Boolean = checkpointData.exists(_.isCheckpointed)
 
   /**
    * Gets the name of the file to which this RDD was checkpointed
    */
-  def getCheckpointFile: Option[String] = {
-    checkpointData.flatMap(_.getCheckpointFile)
-  }
+  def getCheckpointFile: Option[String] = checkpointData.flatMap(_.getCheckpointFile)
 
   // =======================================================================
   // Other internal methods and fields
@@ -1201,8 +1240,9 @@ abstract class RDD[T: ClassTag](
   private var storageLevel: StorageLevel = StorageLevel.NONE
 
   /** User code that created this RDD (e.g. `textFile`, `parallelize`). */
-  @transient private[spark] val creationSite = Utils.getCallSite
-  private[spark] def getCreationSite: String = Option(creationSite).map(_.short).getOrElse("")
+  @transient private[spark] val creationSite = sc.getCallSite()
+
+  private[spark] def getCreationSite: String = Option(creationSite).map(_.shortForm).getOrElse("")
 
   private[spark] def elementClassTag: ClassTag[T] = classTag[T]
 
@@ -1213,9 +1253,31 @@ abstract class RDD[T: ClassTag](
     dependencies.head.rdd.asInstanceOf[RDD[U]]
   }
 
+  /** Returns the jth parent RDD: e.g. rdd.parent[T](0) is equivalent to rdd.firstParent[T] */
+  protected[spark] def parent[U: ClassTag](j: Int) = {
+    dependencies(j).rdd.asInstanceOf[RDD[U]]
+  }
+
   /** The [[org.apache.spark.SparkContext]] that this RDD was created on. */
   def context = sc
 
+  /**
+   * Private API for changing an RDD's ClassTag.
+   * Used for internal Java <-> Scala API compatibility.
+   */
+  private[spark] def retag(cls: Class[T]): RDD[T] = {
+    val classTag: ClassTag[T] = ClassTag.apply(cls)
+    this.retag(classTag)
+  }
+
+  /**
+   * Private API for changing an RDD's ClassTag.
+   * Used for internal Java <-> Scala API compatibility.
+   */
+  private[spark] def retag(implicit classTag: ClassTag[T]): RDD[T] = {
+    this.mapPartitions(identity, preservesPartitioning = true)(classTag)
+  }
+
   // Avoid handling doCheckpoint multiple times to prevent excessive recursion
   @transient private var doCheckpointCalled = false
 
@@ -1257,11 +1319,75 @@ abstract class RDD[T: ClassTag](
 
   /** A description of this RDD and its recursive dependencies for debugging. */
   def toDebugString: String = {
-    def debugString(rdd: RDD[_], prefix: String = ""): Seq[String] = {
-      Seq(prefix + rdd + " (" + rdd.partitions.size + " partitions)") ++
-        rdd.dependencies.flatMap(d => debugString(d.rdd, prefix + "  "))
+    // Get a debug description of an rdd without its children
+    def debugSelf (rdd: RDD[_]): Seq[String] = {
+      import Utils.bytesToString
+
+      val persistence = storageLevel.description
+      val storageInfo = rdd.context.getRDDStorageInfo.filter(_.id == rdd.id).map(info =>
+        "    CachedPartitions: %d; MemorySize: %s; TachyonSize: %s; DiskSize: %s".format(
+          info.numCachedPartitions, bytesToString(info.memSize),
+          bytesToString(info.tachyonSize), bytesToString(info.diskSize)))
+
+      s"$rdd [$persistence]" +: storageInfo
+    }
+
+    // Apply a different rule to the last child
+    def debugChildren(rdd: RDD[_], prefix: String): Seq[String] = {
+      val len = rdd.dependencies.length
+      len match {
+        case 0 => Seq.empty
+        case 1 =>
+          val d = rdd.dependencies.head
+          debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_,_,_]], true)
+        case _ =>
+          val frontDeps = rdd.dependencies.take(len - 1)
+          val frontDepStrings = frontDeps.flatMap(
+            d => debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_,_,_]]))
+
+          val lastDep = rdd.dependencies.last
+          val lastDepStrings =
+            debugString(lastDep.rdd, prefix, lastDep.isInstanceOf[ShuffleDependency[_,_,_]], true)
+
+          (frontDepStrings ++ lastDepStrings)
+      }
+    }
+    // The first RDD in the dependency stack has no parents, so no need for a +-
+    def firstDebugString(rdd: RDD[_]): Seq[String] = {
+      val partitionStr = "(" + rdd.partitions.size + ")"
+      val leftOffset = (partitionStr.length - 1) / 2
+      val nextPrefix = (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset))
+
+      debugSelf(rdd).zipWithIndex.map{
+        case (desc: String, 0) => s"$partitionStr $desc"
+        case (desc: String, _) => s"$nextPrefix $desc"
+      } ++ debugChildren(rdd, nextPrefix)
+    }
+    def shuffleDebugString(rdd: RDD[_], prefix: String = "", isLastChild: Boolean): Seq[String] = {
+      val partitionStr = "(" + rdd.partitions.size + ")"
+      val leftOffset = (partitionStr.length - 1) / 2
+      val thisPrefix = prefix.replaceAll("\\|\\s+$", "")
+      val nextPrefix = (
+        thisPrefix
+        + (if (isLastChild) "  " else "| ")
+        + (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset)))
+
+      debugSelf(rdd).zipWithIndex.map{
+        case (desc: String, 0) => s"$thisPrefix+-$partitionStr $desc"
+        case (desc: String, _) => s"$nextPrefix$desc"
+      } ++ debugChildren(rdd, nextPrefix)
+    }
+    def debugString(rdd: RDD[_],
+                    prefix: String = "",
+                    isShuffle: Boolean = true,
+                    isLastChild: Boolean = false): Seq[String] = {
+      if (isShuffle) {
+        shuffleDebugString(rdd, prefix, isLastChild)
+      } else {
+        debugSelf(rdd).map(prefix + _) ++ debugChildren(rdd, prefix)
+      }
     }
-    debugString(this).mkString("\n")
+    firstDebugString(this).mkString("\n")
   }
 
   override def toString: String = "%s%s[%d] at %s".format(
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
index c3b2a33fb54d0..2fcc31ddffc15 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
@@ -35,12 +35,16 @@ private[spark] object CheckpointState extends Enumeration {
 
 /**
  * This class contains all the information related to RDD checkpointing. Each instance of this
- * class is associated with a RDD. It manages process of checkpointing of the associated RDD,
- * as well as, manages the post-checkpoint state by providing the updated partitions,
- * iterator and preferred locations of the checkpointed RDD.
+ * class is associated with an RDD. It manages process of checkpointing of the associated RDD,
+ * as well as, manages the post-checkpoint state by providing the updated partitions, iterator
+ * and preferred locations of the checkpointed RDD. The default save and reload implementation
+ * can be overridden by providing a custom saveAndReloadRDD function that can return any kind
+ * of RDD, not only a CheckpointRDD.
  */
-private[spark] class RDDCheckpointData[T: ClassTag](@transient rdd: RDD[T])
-  extends Logging with Serializable {
+private[spark] class RDDCheckpointData[T: ClassTag](
+    @transient rdd: RDD[T],
+    @transient saveAndReloadRDD: Option[RDD[T] => RDD[T]]
+  ) extends Logging with Serializable {
 
   import CheckpointState._
 
@@ -82,33 +86,46 @@ private[spark] class RDDCheckpointData[T: ClassTag](@transient rdd: RDD[T])
       }
     }
 
-    // Create the output path for the checkpoint
-    val path = new Path(rdd.context.checkpointDir.get, "rdd-" + rdd.id)
-    val fs = path.getFileSystem(rdd.context.hadoopConfiguration)
-    if (!fs.mkdirs(path)) {
-      throw new SparkException("Failed to create checkpoint path " + path)
-    }
+    if (saveAndReloadRDD.isEmpty) {
+      // Default implementation
+      // Create the output path for the checkpoint
+      val path = new Path(rdd.context.checkpointDir.get, "rdd-" + rdd.id)
+      val fs = path.getFileSystem(rdd.context.hadoopConfiguration)
+      if (!fs.mkdirs(path)) {
+        throw new SparkException("Failed to create checkpoint path " + path)
+      }
 
-    // Save to file, and reload it as an RDD
-    val broadcastedConf = rdd.context.broadcast(
-      new SerializableWritable(rdd.context.hadoopConfiguration))
-    rdd.context.runJob(rdd, CheckpointRDD.writeToFile[T](path.toString, broadcastedConf) _)
-    val newRDD = new CheckpointRDD[T](rdd.context, path.toString)
-    if (newRDD.partitions.size != rdd.partitions.size) {
-      throw new SparkException(
-        "Checkpoint RDD " + newRDD + "(" + newRDD.partitions.size + ") has different " +
-          "number of partitions than original RDD " + rdd + "(" + rdd.partitions.size + ")")
-    }
+      // Save to file, and reload it as an RDD
+      val broadcastedConf = rdd.context.broadcast(
+        new SerializableWritable(rdd.context.hadoopConfiguration))
+      rdd.context.runJob(rdd, CheckpointRDD.writeToFile[T](path.toString, broadcastedConf) _)
+      val newRDD = new CheckpointRDD[T](rdd.context, path.toString)
+      if (newRDD.partitions.size != rdd.partitions.size) {
+        throw new SparkException(
+          "Checkpoint RDD " + newRDD + "(" + newRDD.partitions.size + ") has different " +
+            "number of partitions than original RDD " + rdd + "(" + rdd.partitions.size + ")")
+      }
 
-    // Change the dependencies and partitions of the RDD
-    RDDCheckpointData.synchronized {
-      cpFile = Some(path.toString)
-      cpRDD = Some(newRDD)
-      rdd.markCheckpointed(newRDD)   // Update the RDD's dependencies and partitions
-      cpState = Checkpointed
-      RDDCheckpointData.clearTaskCaches()
+      // Change the dependencies and partitions of the RDD
+      RDDCheckpointData.synchronized {
+        cpFile = Some(path.toString)
+        cpRDD = Some(newRDD)
+        rdd.markCheckpointed(newRDD) // Update the RDD's dependencies and partitions
+        cpState = Checkpointed
+      }
+
+      logInfo(
+        "Done checkpointing RDD " + rdd.id + " to " + path + ", new parent is RDD " + newRDD.id)
+    } else {
+      val newRDD = saveAndReloadRDD.get(rdd)
+      RDDCheckpointData.synchronized {
+        cpRDD = Some(newRDD)
+        rdd.markCheckpointed(newRDD) // Update the RDD's dependencies and partitions
+        cpState = Checkpointed
+      }
+
+      logInfo("Done custom checkpointing RDD " + rdd.id + ", new parent is RDD " + newRDD.id)
     }
-    logInfo("Done checkpointing RDD " + rdd.id + " to " + path + ", new parent is RDD " + newRDD.id)
   }
 
   // Get preferred location of a split after checkpointing
@@ -131,9 +148,5 @@ private[spark] class RDDCheckpointData[T: ClassTag](@transient rdd: RDD[T])
   }
 }
 
-private[spark] object RDDCheckpointData {
-  def clearTaskCaches() {
-    ShuffleMapTask.clearCache()
-    ResultTask.clearCache()
-  }
-}
+// Used for synchronization
+private[spark] object RDDCheckpointData
diff --git a/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
index b097c30f8c231..9e8cee5331cf8 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
@@ -21,8 +21,7 @@ import java.util.Random
 
 import scala.reflect.ClassTag
 
-import cern.jet.random.Poisson
-import cern.jet.random.engine.DRand
+import org.apache.commons.math3.distribution.PoissonDistribution
 
 import org.apache.spark.{Partition, TaskContext}
 
@@ -53,9 +52,11 @@ private[spark] class SampledRDD[T: ClassTag](
     if (withReplacement) {
       // For large datasets, the expected number of occurrences of each element in a sample with
       // replacement is Poisson(frac). We use that to get a count for each element.
-      val poisson = new Poisson(frac, new DRand(split.seed))
+      val poisson = new PoissonDistribution(frac)
+      poisson.reseedRandomGenerator(split.seed)
+
       firstParent[T].iterator(split.prev, context).flatMap { element =>
-        val count = poisson.nextInt()
+        val count = poisson.sample()
         if (count == 0) {
           Iterator.empty  // Avoid object allocation when we return 0 items, which is quite often
         } else {
diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
index bf02f68d0d3d3..d9fe6847254fa 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -37,11 +37,12 @@ private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
  * @tparam V the value class.
  * @tparam C the combiner class.
  */
+// TODO: Make this return RDD[Product2[K, C]] or have some way to configure mutable pairs
 @DeveloperApi
-class ShuffledRDD[K, V, C, P <: Product2[K, C] : ClassTag](
+class ShuffledRDD[K, V, C](
     @transient var prev: RDD[_ <: Product2[K, V]],
     part: Partitioner)
-  extends RDD[P](prev.context, Nil) {
+  extends RDD[(K, C)](prev.context, Nil) {
 
   private var serializer: Option[Serializer] = None
 
@@ -52,25 +53,25 @@ class ShuffledRDD[K, V, C, P <: Product2[K, C] : ClassTag](
   private var mapSideCombine: Boolean = false
 
   /** Set a serializer for this RDD's shuffle, or null to use the default (spark.serializer) */
-  def setSerializer(serializer: Serializer): ShuffledRDD[K, V, C, P] = {
+  def setSerializer(serializer: Serializer): ShuffledRDD[K, V, C] = {
     this.serializer = Option(serializer)
     this
   }
 
   /** Set key ordering for RDD's shuffle. */
-  def setKeyOrdering(keyOrdering: Ordering[K]): ShuffledRDD[K, V, C, P] = {
+  def setKeyOrdering(keyOrdering: Ordering[K]): ShuffledRDD[K, V, C] = {
     this.keyOrdering = Option(keyOrdering)
     this
   }
 
   /** Set aggregator for RDD's shuffle. */
-  def setAggregator(aggregator: Aggregator[K, V, C]): ShuffledRDD[K, V, C, P] = {
+  def setAggregator(aggregator: Aggregator[K, V, C]): ShuffledRDD[K, V, C] = {
     this.aggregator = Option(aggregator)
     this
   }
 
   /** Set mapSideCombine flag for RDD's shuffle. */
-  def setMapSideCombine(mapSideCombine: Boolean): ShuffledRDD[K, V, C, P] = {
+  def setMapSideCombine(mapSideCombine: Boolean): ShuffledRDD[K, V, C] = {
     this.mapSideCombine = mapSideCombine
     this
   }
@@ -85,11 +86,11 @@ class ShuffledRDD[K, V, C, P <: Product2[K, C] : ClassTag](
     Array.tabulate[Partition](part.numPartitions)(i => new ShuffledRDDPartition(i))
   }
 
-  override def compute(split: Partition, context: TaskContext): Iterator[P] = {
+  override def compute(split: Partition, context: TaskContext): Iterator[(K, C)] = {
     val dep = dependencies.head.asInstanceOf[ShuffleDependency[K, V, C]]
     SparkEnv.get.shuffleManager.getReader(dep.shuffleHandle, split.index, split.index + 1, context)
       .read()
-      .asInstanceOf[Iterator[P]]
+      .asInstanceOf[Iterator[(K, C)]]
   }
 
   override def clearDependencies() {
diff --git a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
index 21c6e07d69f90..aece683ff3199 100644
--- a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
@@ -24,22 +24,34 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext}
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.util.Utils
 
-private[spark] class UnionPartition[T: ClassTag](idx: Int, rdd: RDD[T], splitIndex: Int)
+/**
+ * Partition for UnionRDD.
+ *
+ * @param idx index of the partition
+ * @param rdd the parent RDD this partition refers to
+ * @param parentRddIndex index of the parent RDD this partition refers to
+ * @param parentRddPartitionIndex index of the partition within the parent RDD
+ *                                this partition refers to
+ */
+private[spark] class UnionPartition[T: ClassTag](
+    idx: Int,
+    @transient rdd: RDD[T],
+    val parentRddIndex: Int,
+    @transient parentRddPartitionIndex: Int)
   extends Partition {
 
-  var split: Partition = rdd.partitions(splitIndex)
-
-  def iterator(context: TaskContext) = rdd.iterator(split, context)
+  var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex)
 
-  def preferredLocations() = rdd.preferredLocations(split)
+  def preferredLocations() = rdd.preferredLocations(parentPartition)
 
   override val index: Int = idx
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent split at the time of task serialization
-    split = rdd.partitions(splitIndex)
+    parentPartition = rdd.partitions(parentRddPartitionIndex)
     oos.defaultWriteObject()
   }
 }
@@ -47,14 +59,14 @@ private[spark] class UnionPartition[T: ClassTag](idx: Int, rdd: RDD[T], splitInd
 @DeveloperApi
 class UnionRDD[T: ClassTag](
     sc: SparkContext,
-    @transient var rdds: Seq[RDD[T]])
+    var rdds: Seq[RDD[T]])
   extends RDD[T](sc, Nil) {  // Nil since we implement getDependencies
 
   override def getPartitions: Array[Partition] = {
     val array = new Array[Partition](rdds.map(_.partitions.size).sum)
     var pos = 0
-    for (rdd <- rdds; split <- rdd.partitions) {
-      array(pos) = new UnionPartition(pos, rdd, split.index)
+    for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) {
+      array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index)
       pos += 1
     }
     array
@@ -70,9 +82,16 @@ class UnionRDD[T: ClassTag](
     deps
   }
 
-  override def compute(s: Partition, context: TaskContext): Iterator[T] =
-    s.asInstanceOf[UnionPartition[T]].iterator(context)
+  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
+    val part = s.asInstanceOf[UnionPartition[T]]
+    parent[T](part.parentRddIndex).iterator(part.parentPartition, context)
+  }
 
   override def getPreferredLocations(s: Partition): Seq[String] =
     s.asInstanceOf[UnionPartition[T]].preferredLocations()
+
+  override def clearDependencies() {
+    super.clearDependencies()
+    rdds = null
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
index f3d30f6c9b32f..996f2cd3f34a3 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
@@ -22,6 +22,7 @@ import java.io.{IOException, ObjectOutputStream}
 import scala.reflect.ClassTag
 
 import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext}
+import org.apache.spark.util.Utils
 
 private[spark] class ZippedPartitionsPartition(
     idx: Int,
@@ -34,7 +35,7 @@ private[spark] class ZippedPartitionsPartition(
   def partitions = partitionValues
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent split at the time of task serialization
     partitionValues = rdds.map(rdd => rdd.partitions(idx))
     oos.defaultWriteObject()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala
new file mode 100644
index 0000000000000..fa83372bb4d11
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * :: DeveloperApi ::
+ * Information about an [[org.apache.spark.Accumulable]] modified during a task or stage.
+ */
+@DeveloperApi
+class AccumulableInfo (
+    val id: Long,
+    val name: String,
+    val update: Option[String], // represents a partial update within a task
+    val value: String) {
+
+  override def equals(other: Any): Boolean = other match {
+    case acc: AccumulableInfo =>
+      this.id == acc.id && this.name == acc.name &&
+        this.update == acc.update && this.value == acc.value
+    case _ => false
+  }
+}
+
+object AccumulableInfo {
+  def apply(id: Long, name: String, update: Option[String], value: String) =
+    new AccumulableInfo(id, name, update, value)
+
+  def apply(id: Long, name: String, value: String) = new AccumulableInfo(id, name, None, value)
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ApplicationEventListener.scala b/core/src/main/scala/org/apache/spark/scheduler/ApplicationEventListener.scala
index cd5d44ad4a7e6..6d39a5e3fa64c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ApplicationEventListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ApplicationEventListener.scala
@@ -24,38 +24,31 @@ package org.apache.spark.scheduler
  * from multiple applications are seen, the behavior is unspecified.
  */
 private[spark] class ApplicationEventListener extends SparkListener {
-  var appName = "<Not Started>"
-  var sparkUser = "<Not Started>"
-  var startTime = -1L
-  var endTime = -1L
-  var viewAcls = ""
-  var enableViewAcls = false
-
-  def applicationStarted = startTime != -1
-
-  def applicationCompleted = endTime != -1
-
-  def applicationDuration: Long = {
-    val difference = endTime - startTime
-    if (applicationStarted && applicationCompleted && difference > 0) difference else -1L
-  }
+  var appName: Option[String] = None
+  var appId: Option[String] = None
+  var sparkUser: Option[String] = None
+  var startTime: Option[Long] = None
+  var endTime: Option[Long] = None
+  var viewAcls: Option[String] = None
+  var adminAcls: Option[String] = None
 
   override def onApplicationStart(applicationStart: SparkListenerApplicationStart) {
-    appName = applicationStart.appName
-    startTime = applicationStart.time
-    sparkUser = applicationStart.sparkUser
+    appName = Some(applicationStart.appName)
+    appId = applicationStart.appId
+    startTime = Some(applicationStart.time)
+    sparkUser = Some(applicationStart.sparkUser)
   }
 
   override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) {
-    endTime = applicationEnd.time
+    endTime = Some(applicationEnd.time)
   }
 
   override def onEnvironmentUpdate(environmentUpdate: SparkListenerEnvironmentUpdate) {
     synchronized {
       val environmentDetails = environmentUpdate.environmentDetails
       val allProperties = environmentDetails("Spark Properties").toMap
-      viewAcls = allProperties.getOrElse("spark.ui.view.acls", "")
-      enableViewAcls = allProperties.getOrElse("spark.ui.acls.enable", "false").toBoolean
+      viewAcls = allProperties.get("spark.ui.view.acls")
+      adminAcls = allProperties.get("spark.admin.acls")
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 81c136d970312..22449517d100f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -17,28 +17,30 @@
 
 package org.apache.spark.scheduler
 
-import java.io.{NotSerializableException, PrintWriter, StringWriter}
+import java.io.NotSerializableException
 import java.util.Properties
 import java.util.concurrent.atomic.AtomicInteger
 
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map, Stack}
 import scala.concurrent.Await
 import scala.concurrent.duration._
 import scala.language.postfixOps
 import scala.reflect.ClassTag
+import scala.util.control.NonFatal
 
 import akka.actor._
-import akka.actor.OneForOneStrategy
 import akka.actor.SupervisorStrategy.Stop
 import akka.pattern.ask
 import akka.util.Timeout
 
 import org.apache.spark._
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.partial.{ApproximateActionListener, ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.{BlockId, BlockManager, BlockManagerMaster, RDDBlockId}
+import org.apache.spark.storage._
 import org.apache.spark.util.{CallSite, SystemClock, Clock, Utils}
+import org.apache.spark.storage.BlockManagerMessages.BlockManagerHeartbeat
 
 /**
  * The high-level scheduling layer that implements stage-oriented scheduling. It computes a DAG of
@@ -84,12 +86,9 @@ class DAGScheduler(
   private val nextStageId = new AtomicInteger(0)
 
   private[scheduler] val jobIdToStageIds = new HashMap[Int, HashSet[Int]]
-  private[scheduler] val stageIdToJobIds = new HashMap[Int, HashSet[Int]]
   private[scheduler] val stageIdToStage = new HashMap[Int, Stage]
   private[scheduler] val shuffleToMapStage = new HashMap[Int, Stage]
   private[scheduler] val jobIdToActiveJob = new HashMap[Int, ActiveJob]
-  private[scheduler] val resultStageToJob = new HashMap[Stage, ActiveJob]
-  private[scheduler] val stageToInfos = new HashMap[Stage, StageInfo]
 
   // Stages we need to run whose parents aren't done
   private[scheduler] val waitingStages = new HashSet[Stage]
@@ -100,9 +99,6 @@ class DAGScheduler(
   // Stages that must be resubmitted due to fetch failures
   private[scheduler] val failedStages = new HashSet[Stage]
 
-  // Missing tasks from each stage
-  private[scheduler] val pendingTasks = new HashMap[Stage, HashSet[Task[_]]]
-
   private[scheduler] val activeJobs = new HashSet[ActiveJob]
 
   // Contains the locations that each RDD's partitions are cached on
@@ -119,8 +115,18 @@ class DAGScheduler(
   private val dagSchedulerActorSupervisor =
     env.actorSystem.actorOf(Props(new DAGSchedulerActorSupervisor(this)))
 
+  // A closure serializer that we reuse.
+  // This is only safe because DAGScheduler runs in a single thread.
+  private val closureSerializer = SparkEnv.get.closureSerializer.newInstance()
+
   private[scheduler] var eventProcessActor: ActorRef = _
 
+  /** If enabled, we may run certain actions like take() and first() locally. */
+  private val localExecutionEnabled = sc.getConf.getBoolean("spark.localExecution.enabled", false)
+
+  /** If enabled, FetchFailed will not cause stage retry, in order to surface the problem. */
+  private val disallowStageRetryForTest = sc.getConf.getBoolean("spark.test.noStageRetry", false)
+
   private def initializeEventProcessActor() {
     // blocking the thread until supervisor is started, which ensures eventProcessActor is
     // not null before any job is submitted
@@ -154,6 +160,23 @@ class DAGScheduler(
     eventProcessActor ! CompletionEvent(task, reason, result, accumUpdates, taskInfo, taskMetrics)
   }
 
+  /**
+   * Update metrics for in-progress tasks and let the master know that the BlockManager is still
+   * alive. Return true if the driver knows about the given block manager. Otherwise, return false,
+   * indicating that the block manager should re-register.
+   */
+  def executorHeartbeatReceived(
+      execId: String,
+      taskMetrics: Array[(Long, Int, Int, TaskMetrics)], // (taskId, stageId, stateAttempt, metrics)
+      blockManagerId: BlockManagerId): Boolean = {
+    listenerBus.post(SparkListenerExecutorMetricsUpdate(execId, taskMetrics))
+    implicit val timeout = Timeout(600 seconds)
+
+    Await.result(
+      blockManagerMaster.driverActor ? BlockManagerHeartbeat(blockManagerId),
+      timeout.duration).asInstanceOf[Boolean]
+  }
+
   // Called by TaskScheduler when an executor fails.
   def executorLost(execId: String) {
     eventProcessActor ! ExecutorLost(execId)
@@ -194,11 +217,15 @@ class DAGScheduler(
     shuffleToMapStage.get(shuffleDep.shuffleId) match {
       case Some(stage) => stage
       case None =>
+        // We are going to register ancestor shuffle dependencies
+        registerShuffleDependencies(shuffleDep, jobId)
+        // Then register current shuffleDep
         val stage =
           newOrUsedStage(
             shuffleDep.rdd, shuffleDep.rdd.partitions.size, shuffleDep, jobId,
             shuffleDep.rdd.creationSite)
         shuffleToMapStage(shuffleDep.shuffleId) = stage
+ 
         stage
     }
   }
@@ -217,12 +244,11 @@ class DAGScheduler(
       callSite: CallSite)
     : Stage =
   {
+    val parentStages = getParentStages(rdd, jobId)
     val id = nextStageId.getAndIncrement()
-    val stage =
-      new Stage(id, rdd, numTasks, shuffleDep, getParentStages(rdd, jobId), jobId, callSite)
+    val stage = new Stage(id, rdd, numTasks, shuffleDep, parentStages, jobId, callSite)
     stageIdToStage(id) = stage
     updateJobIdStageIdMaps(jobId, stage)
-    stageToInfos(stage) = StageInfo.fromStage(stage)
     stage
   }
 
@@ -264,6 +290,9 @@ class DAGScheduler(
   private def getParentStages(rdd: RDD[_], jobId: Int): List[Stage] = {
     val parents = new HashSet[Stage]
     val visited = new HashSet[RDD[_]]
+    // We are manually maintaining a stack here to prevent StackOverflowError
+    // caused by recursively visiting
+    val waitingForVisit = new Stack[RDD[_]]
     def visit(r: RDD[_]) {
       if (!visited(r)) {
         visited += r
@@ -274,18 +303,69 @@ class DAGScheduler(
             case shufDep: ShuffleDependency[_, _, _] =>
               parents += getShuffleMapStage(shufDep, jobId)
             case _ =>
-              visit(dep.rdd)
+              waitingForVisit.push(dep.rdd)
           }
         }
       }
     }
-    visit(rdd)
+    waitingForVisit.push(rdd)
+    while (!waitingForVisit.isEmpty) {
+      visit(waitingForVisit.pop())
+    }
     parents.toList
   }
 
+  // Find ancestor missing shuffle dependencies and register into shuffleToMapStage
+  private def registerShuffleDependencies(shuffleDep: ShuffleDependency[_, _, _], jobId: Int) = {
+    val parentsWithNoMapStage = getAncestorShuffleDependencies(shuffleDep.rdd)
+    while (!parentsWithNoMapStage.isEmpty) {
+      val currentShufDep = parentsWithNoMapStage.pop()
+      val stage =
+        newOrUsedStage(
+          currentShufDep.rdd, currentShufDep.rdd.partitions.size, currentShufDep, jobId,
+          currentShufDep.rdd.creationSite)
+      shuffleToMapStage(currentShufDep.shuffleId) = stage
+    }
+  }
+
+  // Find ancestor shuffle dependencies that are not registered in shuffleToMapStage yet
+  private def getAncestorShuffleDependencies(rdd: RDD[_]): Stack[ShuffleDependency[_, _, _]] = {
+    val parents = new Stack[ShuffleDependency[_, _, _]]
+    val visited = new HashSet[RDD[_]]
+    // We are manually maintaining a stack here to prevent StackOverflowError
+    // caused by recursively visiting
+    val waitingForVisit = new Stack[RDD[_]]
+    def visit(r: RDD[_]) {
+      if (!visited(r)) {
+        visited += r
+        for (dep <- r.dependencies) {
+          dep match {
+            case shufDep: ShuffleDependency[_, _, _] =>
+              if (!shuffleToMapStage.contains(shufDep.shuffleId)) {
+                parents.push(shufDep)
+              }
+
+              waitingForVisit.push(shufDep.rdd)
+            case _ =>
+              waitingForVisit.push(dep.rdd)
+          }
+        }
+      }
+    }
+
+    waitingForVisit.push(rdd)
+    while (!waitingForVisit.isEmpty) {
+      visit(waitingForVisit.pop())
+    }
+    parents
+  }
+
   private def getMissingParentStages(stage: Stage): List[Stage] = {
     val missing = new HashSet[Stage]
     val visited = new HashSet[RDD[_]]
+    // We are manually maintaining a stack here to prevent StackOverflowError
+    // caused by recursively visiting
+    val waitingForVisit = new Stack[RDD[_]]
     def visit(rdd: RDD[_]) {
       if (!visited(rdd)) {
         visited += rdd
@@ -298,13 +378,16 @@ class DAGScheduler(
                   missing += mapStage
                 }
               case narrowDep: NarrowDependency[_] =>
-                visit(narrowDep.rdd)
+                waitingForVisit.push(narrowDep.rdd)
             }
           }
         }
       }
     }
-    visit(stage.rdd)
+    waitingForVisit.push(stage.rdd)
+    while (!waitingForVisit.isEmpty) {
+      visit(waitingForVisit.pop())
+    }
     missing.toList
   }
 
@@ -314,13 +397,12 @@ class DAGScheduler(
    */
   private def updateJobIdStageIdMaps(jobId: Int, stage: Stage) {
     def updateJobIdStageIdMapsList(stages: List[Stage]) {
-      if (!stages.isEmpty) {
+      if (stages.nonEmpty) {
         val s = stages.head
-        stageIdToJobIds.getOrElseUpdate(s.id, new HashSet[Int]()) += jobId
+        s.jobIds += jobId
         jobIdToStageIds.getOrElseUpdate(jobId, new HashSet[Int]()) += s.id
-        val parents = getParentStages(s.rdd, jobId)
-        val parentsWithoutThisJobId = parents.filter(p =>
-          !stageIdToJobIds.get(p.id).exists(_.contains(jobId)))
+        val parents: List[Stage] = getParentStages(s.rdd, jobId)
+        val parentsWithoutThisJobId = parents.filter { ! _.jobIds.contains(jobId) }
         updateJobIdStageIdMapsList(parentsWithoutThisJobId ++ stages.tail)
       }
     }
@@ -332,16 +414,15 @@ class DAGScheduler(
    * handle cancelling tasks or notifying the SparkListener about finished jobs/stages/tasks.
    *
    * @param job The job whose state to cleanup.
-   * @param resultStage Specifies the result stage for the job; if set to None, this method
-   *                    searches resultStagesToJob to find and cleanup the appropriate result stage.
    */
-  private def cleanupStateForJobAndIndependentStages(job: ActiveJob, resultStage: Option[Stage]) {
+  private def cleanupStateForJobAndIndependentStages(job: ActiveJob) {
     val registeredStages = jobIdToStageIds.get(job.jobId)
     if (registeredStages.isEmpty || registeredStages.get.isEmpty) {
       logError("No stages registered for job " + job.jobId)
     } else {
-      stageIdToJobIds.filterKeys(stageId => registeredStages.get.contains(stageId)).foreach {
-        case (stageId, jobSet) =>
+      stageIdToStage.filterKeys(stageId => registeredStages.get.contains(stageId)).foreach {
+        case (stageId, stage) =>
+          val jobSet = stage.jobIds
           if (!jobSet.contains(job.jobId)) {
             logError(
               "Job %d not registered for stage %d even though that stage was registered for the job"
@@ -354,14 +435,9 @@ class DAGScheduler(
                   logDebug("Removing running stage %d".format(stageId))
                   runningStages -= stage
                 }
-                stageToInfos -= stage
                 for ((k, v) <- shuffleToMapStage.find(_._2 == stage)) {
                   shuffleToMapStage.remove(k)
                 }
-                if (pendingTasks.contains(stage) && !pendingTasks(stage).isEmpty) {
-                  logDebug("Removing pending status for stage %d".format(stageId))
-                }
-                pendingTasks -= stage
                 if (waitingStages.contains(stage)) {
                   logDebug("Removing stage %d from waiting set.".format(stageId))
                   waitingStages -= stage
@@ -373,10 +449,6 @@ class DAGScheduler(
               }
               // data structures based on StageId
               stageIdToStage -= stageId
-              stageIdToJobIds -= stageId
-
-              ShuffleMapTask.removeStage(stageId)
-              ResultTask.removeStage(stageId)
 
               logDebug("After removal of stage %d, remaining stages = %d"
                 .format(stageId, stageIdToStage.size))
@@ -392,19 +464,7 @@ class DAGScheduler(
     jobIdToStageIds -= job.jobId
     jobIdToActiveJob -= job.jobId
     activeJobs -= job
-
-    if (resultStage.isEmpty) {
-      // Clean up result stages.
-      val resultStagesForJob = resultStageToJob.keySet.filter(
-        stage => resultStageToJob(stage).jobId == job.jobId)
-      if (resultStagesForJob.size != 1) {
-        logWarning(
-          s"${resultStagesForJob.size} result stages for job ${job.jobId} (expect exactly 1)")
-      }
-      resultStageToJob --= resultStagesForJob
-    } else {
-      resultStageToJob -= resultStage.get
-    }
+    job.finalStage.resultOfJob = None
   }
 
   /**
@@ -450,11 +510,16 @@ class DAGScheduler(
       resultHandler: (Int, U) => Unit,
       properties: Properties = null)
   {
+    val start = System.nanoTime
     val waiter = submitJob(rdd, func, partitions, callSite, allowLocal, resultHandler, properties)
     waiter.awaitResult() match {
-      case JobSucceeded => {}
+      case JobSucceeded => {
+        logInfo("Job %d finished: %s, took %f s".format
+          (waiter.jobId, callSite.shortForm, (System.nanoTime - start) / 1e9))
+      }
       case JobFailed(exception: Exception) =>
-        logInfo("Failed to run " + callSite.short)
+        logInfo("Job %d failed: %s, took %f s".format
+          (waiter.jobId, callSite.shortForm, (System.nanoTime - start) / 1e9))
         throw exception
     }
   }
@@ -568,16 +633,17 @@ class DAGScheduler(
   protected def runLocallyWithinThread(job: ActiveJob) {
     var jobResult: JobResult = JobSucceeded
     try {
-      SparkEnv.set(env)
       val rdd = job.finalStage.rdd
       val split = rdd.partitions(job.partitions(0))
       val taskContext =
-        new TaskContext(job.finalStage.id, job.partitions(0), 0, runningLocally = true)
+        new TaskContextImpl(job.finalStage.id, job.partitions(0), 0, true)
+      TaskContextHelper.setTaskContext(taskContext)
       try {
         val result = job.func(taskContext, rdd.iterator(split, taskContext))
         job.listener.taskSucceeded(0, result)
       } finally {
-        taskContext.executeOnCompleteCallbacks()
+        taskContext.markTaskCompleted()
+        TaskContextHelper.unset()
       }
     } catch {
       case e: Exception =>
@@ -590,9 +656,10 @@ class DAGScheduler(
         job.listener.jobFailed(exception)
     } finally {
       val s = job.finalStage
-      stageIdToJobIds -= s.id    // clean up data structures that were populated for a local job,
-      stageIdToStage -= s.id     // but that won't get cleaned up via the normal paths through
-      stageToInfos -= s          // completion events or stage abort
+      // clean up data structures that were populated for a local job,
+      // but that won't get cleaned up via the normal paths through
+      // completion events or stage abort
+      stageIdToStage -= s.id
       jobIdToStageIds -= job.jobId
       listenerBus.post(SparkListenerJobEnd(job.jobId, jobResult))
     }
@@ -604,12 +671,8 @@ class DAGScheduler(
   // That should take care of at least part of the priority inversion problem with
   // cross-job dependencies.
   private def activeJobForStage(stage: Stage): Option[Int] = {
-    if (stageIdToJobIds.contains(stage.id)) {
-      val jobsThatUseStage: Array[Int] = stageIdToJobIds(stage.id).toArray.sorted
-      jobsThatUseStage.find(jobIdToActiveJob.contains)
-    } else {
-      None
-    }
+    val jobsThatUseStage: Array[Int] = stage.jobIds.toArray.sorted
+    jobsThatUseStage.find(jobIdToActiveJob.contains)
   }
 
   private[scheduler] def handleJobGroupCancelled(groupId: String) {
@@ -623,7 +686,10 @@ class DAGScheduler(
   }
 
   private[scheduler] def handleBeginEvent(task: Task[_], taskInfo: TaskInfo) {
-    listenerBus.post(SparkListenerTaskStart(task.stageId, taskInfo))
+    // Note that there is a chance that this task is launched after the stage is cancelled.
+    // In that case, we wouldn't have the stage anymore in stageIdToStage.
+    val stageAttemptId = stageIdToStage.get(task.stageId).map(_.latestInfo.attemptId).getOrElse(-1)
+    listenerBus.post(SparkListenerTaskStart(task.stageId, stageAttemptId, taskInfo))
     submitWaitingStages()
   }
 
@@ -641,9 +707,8 @@ class DAGScheduler(
       // is in the process of getting stopped.
       val stageFailedMessage = "Stage cancelled because SparkContext was shut down"
       runningStages.foreach { stage =>
-        val info = stageToInfos(stage)
-        info.stageFailed(stageFailedMessage)
-        listenerBus.post(SparkListenerStageCompleted(info))
+        stage.latestInfo.stageFailed(stageFailedMessage)
+        listenerBus.post(SparkListenerStageCompleted(stage.latestInfo))
       }
       listenerBus.post(SparkListenerJobEnd(job.jobId, JobFailed(error)))
     }
@@ -678,18 +743,20 @@ class DAGScheduler(
       val job = new ActiveJob(jobId, finalStage, func, partitions, callSite, listener, properties)
       clearCacheLocs()
       logInfo("Got job %s (%s) with %d output partitions (allowLocal=%s)".format(
-        job.jobId, callSite.short, partitions.length, allowLocal))
+        job.jobId, callSite.shortForm, partitions.length, allowLocal))
       logInfo("Final stage: " + finalStage + "(" + finalStage.name + ")")
       logInfo("Parents of final stage: " + finalStage.parents)
       logInfo("Missing parents: " + getMissingParentStages(finalStage))
-      if (allowLocal && finalStage.parents.size == 0 && partitions.length == 1) {
+      val shouldRunLocally =
+        localExecutionEnabled && allowLocal && finalStage.parents.isEmpty && partitions.length == 1
+      if (shouldRunLocally) {
         // Compute very short actions like first() or take() with no parent stages locally.
         listenerBus.post(SparkListenerJobStart(job.jobId, Array[Int](), properties))
         runLocally(job)
       } else {
         jobIdToActiveJob(jobId) = job
         activeJobs += job
-        resultStageToJob(finalStage) = job
+        finalStage.resultOfJob = Some(job)
         listenerBus.post(SparkListenerJobStart(job.jobId, jobIdToStageIds(jobId).toArray,
           properties))
         submitStage(finalStage)
@@ -709,7 +776,6 @@ class DAGScheduler(
         if (missing == Nil) {
           logInfo("Submitting " + stage + " (" + stage.rdd + "), which has no missing parents")
           submitMissingTasks(stage, jobId.get)
-          runningStages += stage
         } else {
           for (parent <- missing) {
             submitStage(parent)
@@ -722,26 +788,19 @@ class DAGScheduler(
     }
   }
 
-
   /** Called when stage's parents are available and we can now do its task. */
   private def submitMissingTasks(stage: Stage, jobId: Int) {
     logDebug("submitMissingTasks(" + stage + ")")
     // Get our pending tasks and remember them in our pendingTasks entry
-    val myPending = pendingTasks.getOrElseUpdate(stage, new HashSet)
-    myPending.clear()
-    var tasks = ArrayBuffer[Task[_]]()
-    if (stage.isShuffleMap) {
-      for (p <- 0 until stage.numPartitions if stage.outputLocs(p) == Nil) {
-        val locs = getPreferredLocs(stage.rdd, p)
-        tasks += new ShuffleMapTask(stage.id, stage.rdd, stage.shuffleDep.get, p, locs)
-      }
-    } else {
-      // This is a final stage; figure out its job's missing partitions
-      val job = resultStageToJob(stage)
-      for (id <- 0 until job.numPartitions if !job.finished(id)) {
-        val partition = job.partitions(id)
-        val locs = getPreferredLocs(stage.rdd, partition)
-        tasks += new ResultTask(stage.id, stage.rdd, job.func, partition, locs, id)
+    stage.pendingTasks.clear()
+
+    // First figure out the indexes of partition ids to compute.
+    val partitionsToCompute: Seq[Int] = {
+      if (stage.isShuffleMap) {
+        (0 until stage.numPartitions).filter(id => stage.outputLocs(id) == Nil)
+      } else {
+        val job = stage.resultOfJob.get
+        (0 until job.numPartitions).filter(id => !job.finished(id))
       }
     }
 
@@ -752,31 +811,90 @@ class DAGScheduler(
       null
     }
 
-    // must be run listener before possible NotSerializableException
-    // should be "StageSubmitted" first and then "JobEnded"
-    listenerBus.post(SparkListenerStageSubmitted(stageToInfos(stage), properties))
+    runningStages += stage
+    // SparkListenerStageSubmitted should be posted before testing whether tasks are
+    // serializable. If tasks are not serializable, a SparkListenerStageCompleted event
+    // will be posted, which should always come after a corresponding SparkListenerStageSubmitted
+    // event.
+    stage.latestInfo = StageInfo.fromStage(stage, Some(partitionsToCompute.size))
+    listenerBus.post(SparkListenerStageSubmitted(stage.latestInfo, properties))
+
+    // TODO: Maybe we can keep the taskBinary in Stage to avoid serializing it multiple times.
+    // Broadcasted binary for the task, used to dispatch tasks to executors. Note that we broadcast
+    // the serialized copy of the RDD and for each task we will deserialize it, which means each
+    // task gets a different copy of the RDD. This provides stronger isolation between tasks that
+    // might modify state of objects referenced in their closures. This is necessary in Hadoop
+    // where the JobConf/Configuration object is not thread-safe.
+    var taskBinary: Broadcast[Array[Byte]] = null
+    try {
+      // For ShuffleMapTask, serialize and broadcast (rdd, shuffleDep).
+      // For ResultTask, serialize and broadcast (rdd, func).
+      val taskBinaryBytes: Array[Byte] =
+        if (stage.isShuffleMap) {
+          closureSerializer.serialize((stage.rdd, stage.shuffleDep.get) : AnyRef).array()
+        } else {
+          closureSerializer.serialize((stage.rdd, stage.resultOfJob.get.func) : AnyRef).array()
+        }
+      taskBinary = sc.broadcast(taskBinaryBytes)
+    } catch {
+      // In the case of a failure during serialization, abort the stage.
+      case e: NotSerializableException =>
+        abortStage(stage, "Task not serializable: " + e.toString)
+        runningStages -= stage
+        return
+      case NonFatal(e) =>
+        abortStage(stage, s"Task serialization failed: $e\n${e.getStackTraceString}")
+        runningStages -= stage
+        return
+    }
+
+    val tasks: Seq[Task[_]] = if (stage.isShuffleMap) {
+      partitionsToCompute.map { id =>
+        val locs = getPreferredLocs(stage.rdd, id)
+        val part = stage.rdd.partitions(id)
+        new ShuffleMapTask(stage.id, taskBinary, part, locs)
+      }
+    } else {
+      val job = stage.resultOfJob.get
+      partitionsToCompute.map { id =>
+        val p: Int = job.partitions(id)
+        val part = stage.rdd.partitions(p)
+        val locs = getPreferredLocs(stage.rdd, p)
+        new ResultTask(stage.id, taskBinary, part, locs, id)
+      }
+    }
 
     if (tasks.size > 0) {
       // Preemptively serialize a task to make sure it can be serialized. We are catching this
       // exception here because it would be fairly hard to catch the non-serializable exception
       // down the road, where we have several different implementations for local scheduler and
       // cluster schedulers.
+      //
+      // We've already serialized RDDs and closures in taskBinary, but here we check for all other
+      // objects such as Partition.
       try {
-        SparkEnv.get.closureSerializer.newInstance().serialize(tasks.head)
+        closureSerializer.serialize(tasks.head)
       } catch {
         case e: NotSerializableException =>
           abortStage(stage, "Task not serializable: " + e.toString)
           runningStages -= stage
           return
+        case NonFatal(e) => // Other exceptions, such as IllegalArgumentException from Kryo.
+          abortStage(stage, s"Task serialization failed: $e\n${e.getStackTraceString}")
+          runningStages -= stage
+          return
       }
 
       logInfo("Submitting " + tasks.size + " missing tasks from " + stage + " (" + stage.rdd + ")")
-      myPending ++= tasks
-      logDebug("New pending tasks: " + myPending)
+      stage.pendingTasks ++= tasks
+      logDebug("New pending tasks: " + stage.pendingTasks)
       taskScheduler.submitTasks(
         new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.jobId, properties))
-      stageToInfos(stage).submissionTime = Some(clock.getTime())
+      stage.latestInfo.submissionTime = Some(clock.getTime())
     } else {
+      // Because we posted SparkListenerStageSubmitted earlier, we should post
+      // SparkListenerStageCompleted here in case there are no tasks to run.
+      listenerBus.post(SparkListenerStageCompleted(stage.latestInfo))
       logDebug("Stage " + stage + " is actually done; %b %d %d".format(
         stage.isAvailable, stage.numAvailableOutputs, stage.numPartitions))
       runningStages -= stage
@@ -791,35 +909,65 @@ class DAGScheduler(
     val task = event.task
     val stageId = task.stageId
     val taskType = Utils.getFormattedClassName(task)
-    listenerBus.post(SparkListenerTaskEnd(stageId, taskType, event.reason, event.taskInfo,
-      event.taskMetrics))
+
+    // The success case is dealt with separately below, since we need to compute accumulator
+    // updates before posting.
+    if (event.reason != Success) {
+      val attemptId = stageIdToStage.get(task.stageId).map(_.latestInfo.attemptId).getOrElse(-1)
+      listenerBus.post(SparkListenerTaskEnd(stageId, attemptId, taskType, event.reason,
+        event.taskInfo, event.taskMetrics))
+    }
+
     if (!stageIdToStage.contains(task.stageId)) {
       // Skip all the actions if the stage has been cancelled.
       return
     }
     val stage = stageIdToStage(task.stageId)
 
-    def markStageAsFinished(stage: Stage) = {
-      val serviceTime = stageToInfos(stage).submissionTime match {
+    def markStageAsFinished(stage: Stage, errorMessage: Option[String] = None) = {
+      val serviceTime = stage.latestInfo.submissionTime match {
         case Some(t) => "%.03f".format((clock.getTime() - t) / 1000.0)
         case _ => "Unknown"
       }
-      logInfo("%s (%s) finished in %s s".format(stage, stage.name, serviceTime))
-      stageToInfos(stage).completionTime = Some(clock.getTime())
-      listenerBus.post(SparkListenerStageCompleted(stageToInfos(stage)))
+      if (errorMessage.isEmpty) {
+        logInfo("%s (%s) finished in %s s".format(stage, stage.name, serviceTime))
+        stage.latestInfo.completionTime = Some(clock.getTime())
+      } else {
+        stage.latestInfo.stageFailed(errorMessage.get)
+        logInfo("%s (%s) failed in %s s".format(stage, stage.name, serviceTime))
+      }
+      listenerBus.post(SparkListenerStageCompleted(stage.latestInfo))
       runningStages -= stage
     }
     event.reason match {
       case Success =>
-        logInfo("Completed " + task)
         if (event.accumUpdates != null) {
-          // TODO: fail the stage if the accumulator update fails...
-          Accumulators.add(event.accumUpdates) // TODO: do this only if task wasn't resubmitted
+          try {
+            Accumulators.add(event.accumUpdates)
+            event.accumUpdates.foreach { case (id, partialValue) =>
+              val acc = Accumulators.originals(id).asInstanceOf[Accumulable[Any, Any]]
+              // To avoid UI cruft, ignore cases where value wasn't updated
+              if (acc.name.isDefined && partialValue != acc.zero) {
+                val name = acc.name.get
+                val stringPartialValue = Accumulators.stringifyPartialValue(partialValue)
+                val stringValue = Accumulators.stringifyValue(acc.value)
+                stage.latestInfo.accumulables(id) = AccumulableInfo(id, name, stringValue)
+                event.taskInfo.accumulables +=
+                  AccumulableInfo(id, name, Some(stringPartialValue), stringValue)
+              }
+            }
+          } catch {
+            // If we see an exception during accumulator update, just log the error and move on.
+            case e: Exception =>
+              logError(s"Failed to update accumulators for $task", e)
+          }
         }
-        pendingTasks(stage) -= task
+        listenerBus.post(SparkListenerTaskEnd(stageId, stage.latestInfo.attemptId, taskType,
+          event.reason, event.taskInfo, event.taskMetrics))
+        stage.pendingTasks -= task
         task match {
           case rt: ResultTask[_, _] =>
-            resultStageToJob.get(stage) match {
+            stage.resultOfJob match {
               case Some(job) =>
                 if (!job.finished(rt.outputId)) {
                   job.finished(rt.outputId) = true
@@ -827,7 +975,7 @@ class DAGScheduler(
                   // If the whole job has finished, remove it
                   if (job.numFinished == job.numPartitions) {
                     markStageAsFinished(stage)
-                    cleanupStateForJobAndIndependentStages(job, Some(stage))
+                    cleanupStateForJobAndIndependentStages(job)
                     listenerBus.post(SparkListenerJobEnd(job.jobId, JobSucceeded))
                   }
 
@@ -854,7 +1002,7 @@ class DAGScheduler(
             } else {
               stage.addOutputLoc(smt.partitionId, status)
             }
-            if (runningStages.contains(stage) && pendingTasks(stage).isEmpty) {
+            if (runningStages.contains(stage) && stage.pendingTasks.isEmpty) {
               markStageAsFinished(stage)
               logInfo("looking for newly runnable stages")
               logInfo("running: " + runningStages)
@@ -903,39 +1051,50 @@ class DAGScheduler(
 
       case Resubmitted =>
         logInfo("Resubmitted " + task + ", so marking it as still running")
-        pendingTasks(stage) += task
+        stage.pendingTasks += task
 
-      case FetchFailed(bmAddress, shuffleId, mapId, reduceId) =>
-        // Mark the stage that the reducer was in as unrunnable
+      case FetchFailed(bmAddress, shuffleId, mapId, reduceId, failureMessage) =>
         val failedStage = stageIdToStage(task.stageId)
-        runningStages -= failedStage
-        // TODO: Cancel running tasks in the stage
-        logInfo("Marking " + failedStage + " (" + failedStage.name +
-          ") for resubmision due to a fetch failure")
-        // Mark the map whose fetch failed as broken in the map stage
         val mapStage = shuffleToMapStage(shuffleId)
-        if (mapId != -1) {
-          mapStage.removeOutputLoc(mapId, bmAddress)
-          mapOutputTracker.unregisterMapOutput(shuffleId, mapId, bmAddress)
+
+        // It is likely that we receive multiple FetchFailed for a single stage (because we have
+        // multiple tasks running concurrently on different executors). In that case, it is possible
+        // the fetch failure has already been handled by the scheduler.
+        if (runningStages.contains(failedStage)) {
+          logInfo(s"Marking $failedStage (${failedStage.name}) as failed " +
+            s"due to a fetch failure from $mapStage (${mapStage.name})")
+          markStageAsFinished(failedStage, Some(failureMessage))
+          runningStages -= failedStage
         }
-        logInfo("The failed fetch was from " + mapStage + " (" + mapStage.name +
-          "); marking it for resubmission")
-        if (failedStages.isEmpty && eventProcessActor != null) {
+
+        if (disallowStageRetryForTest) {
+          abortStage(failedStage, "Fetch failure will not retry stage due to testing config")
+        } else if (failedStages.isEmpty && eventProcessActor != null) {
           // Don't schedule an event to resubmit failed stages if failed isn't empty, because
           // in that case the event will already have been scheduled. eventProcessActor may be
           // null during unit tests.
+          // TODO: Cancel running tasks in the stage
           import env.actorSystem.dispatcher
+          logInfo(s"Resubmitting $mapStage (${mapStage.name}) and " +
+            s"$failedStage (${failedStage.name}) due to fetch failure")
           env.actorSystem.scheduler.scheduleOnce(
             RESUBMIT_TIMEOUT, eventProcessActor, ResubmitFailedStages)
         }
         failedStages += failedStage
         failedStages += mapStage
+
+        // Mark the map whose fetch failed as broken in the map stage
+        if (mapId != -1) {
+          mapStage.removeOutputLoc(mapId, bmAddress)
+          mapOutputTracker.unregisterMapOutput(shuffleId, mapId, bmAddress)
+        }
+
         // TODO: mark the executor as failed only if there were lots of fetch failures on it
         if (bmAddress != null) {
-          handleExecutorLost(bmAddress.executorId, Some(task.epoch))
+          handleExecutorLost(bmAddress.executorId, fetchFailed = true, Some(task.epoch))
         }
 
-      case ExceptionFailure(className, description, stackTrace, metrics) =>
+      case ExceptionFailure(className, description, stackTrace, fullStackTrace, metrics) =>
         // Do nothing here, left up to the TaskScheduler to decide how to handle user failures
 
       case TaskResultLost =>
@@ -952,25 +1111,35 @@ class DAGScheduler(
    * Responds to an executor being lost. This is called inside the event loop, so it assumes it can
    * modify the scheduler's internal state. Use executorLost() to post a loss event from outside.
    *
+   * We will also assume that we've lost all shuffle blocks associated with the executor if the
+   * executor serves its own blocks (i.e., we're not using external shuffle) OR a FetchFailed
+   * occurred, in which case we presume all shuffle data related to this executor to be lost.
+   *
    * Optionally the epoch during which the failure was caught can be passed to avoid allowing
    * stray fetch failures from possibly retriggering the detection of a node as lost.
    */
-  private[scheduler] def handleExecutorLost(execId: String, maybeEpoch: Option[Long] = None) {
+  private[scheduler] def handleExecutorLost(
+      execId: String,
+      fetchFailed: Boolean,
+      maybeEpoch: Option[Long] = None) {
     val currentEpoch = maybeEpoch.getOrElse(mapOutputTracker.getEpoch)
     if (!failedEpoch.contains(execId) || failedEpoch(execId) < currentEpoch) {
       failedEpoch(execId) = currentEpoch
       logInfo("Executor lost: %s (epoch %d)".format(execId, currentEpoch))
       blockManagerMaster.removeExecutor(execId)
-      // TODO: This will be really slow if we keep accumulating shuffle map stages
-      for ((shuffleId, stage) <- shuffleToMapStage) {
-        stage.removeOutputsOnExecutor(execId)
-        val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray
-        mapOutputTracker.registerMapOutputs(shuffleId, locs, changeEpoch = true)
-      }
-      if (shuffleToMapStage.isEmpty) {
-        mapOutputTracker.incrementEpoch()
+
+      if (!env.blockManager.externalShuffleServiceEnabled || fetchFailed) {
+        // TODO: This will be really slow if we keep accumulating shuffle map stages
+        for ((shuffleId, stage) <- shuffleToMapStage) {
+          stage.removeOutputsOnExecutor(execId)
+          val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray
+          mapOutputTracker.registerMapOutputs(shuffleId, locs, changeEpoch = true)
+        }
+        if (shuffleToMapStage.isEmpty) {
+          mapOutputTracker.incrementEpoch()
+        }
+        clearCacheLocs()
       }
-      clearCacheLocs()
     } else {
       logDebug("Additional executor lost message for " + execId +
                "(epoch " + currentEpoch + ")")
@@ -988,13 +1157,14 @@ class DAGScheduler(
   }
 
   private[scheduler] def handleStageCancellation(stageId: Int) {
-    if (stageIdToJobIds.contains(stageId)) {
-      val jobsThatUseStage: Array[Int] = stageIdToJobIds(stageId).toArray
-      jobsThatUseStage.foreach(jobId => {
-        handleJobCancellation(jobId, "because Stage %s was cancelled".format(stageId))
-      })
-    } else {
-      logInfo("No active jobs to kill for Stage " + stageId)
+    stageIdToStage.get(stageId) match {
+      case Some(stage) =>
+        val jobsThatUseStage: Array[Int] = stage.jobIds.toArray
+        jobsThatUseStage.foreach { jobId =>
+          handleJobCancellation(jobId, s"because Stage $stageId was cancelled")
+        }
+      case None =>
+        logInfo("No active jobs to kill for Stage " + stageId)
     }
     submitWaitingStages()
   }
@@ -1003,8 +1173,8 @@ class DAGScheduler(
     if (!jobIdToStageIds.contains(jobId)) {
       logDebug("Trying to cancel unregistered job " + jobId)
     } else {
-      failJobAndIndependentStages(jobIdToActiveJob(jobId),
-        "Job %d cancelled %s".format(jobId, reason), None)
+      failJobAndIndependentStages(
+        jobIdToActiveJob(jobId), "Job %d cancelled %s".format(jobId, reason))
     }
     submitWaitingStages()
   }
@@ -1018,26 +1188,21 @@ class DAGScheduler(
       // Skip all the actions if the stage has been removed.
       return
     }
-    val dependentStages = resultStageToJob.keys.filter(x => stageDependsOn(x, failedStage)).toSeq
-    stageToInfos(failedStage).completionTime = Some(clock.getTime())
-    for (resultStage <- dependentStages) {
-      val job = resultStageToJob(resultStage)
-      failJobAndIndependentStages(job, s"Job aborted due to stage failure: $reason",
-        Some(resultStage))
+    val dependentJobs: Seq[ActiveJob] =
+      activeJobs.filter(job => stageDependsOn(job.finalStage, failedStage)).toSeq
+    failedStage.latestInfo.completionTime = Some(clock.getTime())
+    for (job <- dependentJobs) {
+      failJobAndIndependentStages(job, s"Job aborted due to stage failure: $reason")
     }
-    if (dependentStages.isEmpty) {
+    if (dependentJobs.isEmpty) {
       logInfo("Ignoring failure of " + failedStage + " because all jobs depending on it are done")
     }
   }
 
   /**
    * Fails a job and all stages that are only used by that job, and cleans up relevant state.
-   *
-   * @param resultStage The result stage for the job, if known. Used to cleanup state for the job
-   *                    slightly more efficiently than when not specified.
    */
-  private def failJobAndIndependentStages(job: ActiveJob, failureReason: String,
-      resultStage: Option[Stage]) {
+  private def failJobAndIndependentStages(job: ActiveJob, failureReason: String) {
     val error = new SparkException(failureReason)
     var ableToCancelStages = true
 
@@ -1051,23 +1216,22 @@ class DAGScheduler(
       logError("No stages registered for job " + job.jobId)
     }
     stages.foreach { stageId =>
-      val jobsForStage = stageIdToJobIds.get(stageId)
+      val jobsForStage: Option[HashSet[Int]] = stageIdToStage.get(stageId).map(_.jobIds)
       if (jobsForStage.isEmpty || !jobsForStage.get.contains(job.jobId)) {
         logError(
           "Job %d not registered for stage %d even though that stage was registered for the job"
             .format(job.jobId, stageId))
       } else if (jobsForStage.get.size == 1) {
         if (!stageIdToStage.contains(stageId)) {
-          logError("Missing Stage for stage with id $stageId")
+          logError(s"Missing Stage for stage with id $stageId")
         } else {
           // This is the only job that uses this stage, so fail the stage if it is running.
           val stage = stageIdToStage(stageId)
           if (runningStages.contains(stage)) {
             try { // cancelTasks will fail if a SchedulerBackend does not implement killTask
               taskScheduler.cancelTasks(stageId, shouldInterruptThread)
-              val stageInfo = stageToInfos(stage)
-              stageInfo.stageFailed(failureReason)
-              listenerBus.post(SparkListenerStageCompleted(stageToInfos(stage)))
+              stage.latestInfo.stageFailed(failureReason)
+              listenerBus.post(SparkListenerStageCompleted(stage.latestInfo))
             } catch {
               case e: UnsupportedOperationException =>
                 logInfo(s"Could not cancel tasks for stage $stageId", e)
@@ -1080,7 +1244,7 @@ class DAGScheduler(
 
     if (ableToCancelStages) {
       job.listener.jobFailed(error)
-      cleanupStateForJobAndIndependentStages(job, resultStage)
+      cleanupStateForJobAndIndependentStages(job)
       listenerBus.post(SparkListenerJobEnd(job.jobId, JobFailed(error)))
     }
   }
@@ -1094,6 +1258,9 @@ class DAGScheduler(
     }
     val visitedRdds = new HashSet[RDD[_]]
     val visitedStages = new HashSet[Stage]
+    // We are manually maintaining a stack here to prevent StackOverflowError
+    // caused by recursively visiting
+    val waitingForVisit = new Stack[RDD[_]]
     def visit(rdd: RDD[_]) {
       if (!visitedRdds(rdd)) {
         visitedRdds += rdd
@@ -1103,15 +1270,18 @@ class DAGScheduler(
               val mapStage = getShuffleMapStage(shufDep, stage.jobId)
               if (!mapStage.isAvailable) {
                 visitedStages += mapStage
-                visit(mapStage.rdd)
+                waitingForVisit.push(mapStage.rdd)
               }  // Otherwise there's no need to follow the dependency back
             case narrowDep: NarrowDependency[_] =>
-              visit(narrowDep.rdd)
+              waitingForVisit.push(narrowDep.rdd)
           }
         }
       }
     }
-    visit(stage.rdd)
+    waitingForVisit.push(stage.rdd)
+    while (!waitingForVisit.isEmpty) {
+      visit(waitingForVisit.pop())
+    }
     visitedRdds.contains(target.rdd)
   }
 
@@ -1123,6 +1293,22 @@ class DAGScheduler(
    */
   private[spark]
   def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = synchronized {
+    getPreferredLocsInternal(rdd, partition, new HashSet)
+  }
+
+  /** Recursive implementation for getPreferredLocs. */
+  private def getPreferredLocsInternal(
+      rdd: RDD[_],
+      partition: Int,
+      visited: HashSet[(RDD[_],Int)])
+    : Seq[TaskLocation] =
+  {
+    // If the partition has already been visited, no need to re-visit.
+    // This avoids exponential path exploration.  SPARK-695
+    if (!visited.add((rdd,partition))) {
+      // Nil has already been returned for previously visited partitions.
+      return Nil
+    }
     // If the partition is cached, return the cache locations
     val cached = getCacheLocs(rdd)(partition)
     if (!cached.isEmpty) {
@@ -1131,7 +1317,7 @@ class DAGScheduler(
     // If the RDD has some placement preferences (as is the case for input RDDs), get those
     val rddPrefs = rdd.preferredLocations(rdd.partitions(partition)).toList
     if (!rddPrefs.isEmpty) {
-      return rddPrefs.map(host => TaskLocation(host))
+      return rddPrefs.map(TaskLocation(_))
     }
     // If the RDD has narrow dependencies, pick the first partition of the first narrow dep
     // that has any placement preferences. Ideally we would choose based on transfer sizes,
@@ -1139,7 +1325,7 @@ class DAGScheduler(
     rdd.dependencies.foreach {
       case n: NarrowDependency[_] =>
         for (inPart <- n.getParents(partition)) {
-          val locs = getPreferredLocs(n.rdd, inPart)
+          val locs = getPreferredLocsInternal(n.rdd, inPart, visited)
           if (locs != Nil) {
             return locs
           }
@@ -1211,7 +1397,7 @@ private[scheduler] class DAGSchedulerEventProcessActor(dagScheduler: DAGSchedule
       dagScheduler.handleExecutorAdded(execId, host)
 
     case ExecutorLost(execId) =>
-      dagScheduler.handleExecutorLost(execId)
+      dagScheduler.handleExecutorLost(execId, fetchFailed = false)
 
     case BeginEvent(task, taskInfo) =>
       dagScheduler.handleBeginEvent(task, taskInfo)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala
index 5878e733908f5..12668b6c0988e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala
@@ -22,10 +22,10 @@ import com.codahale.metrics.{Gauge,MetricRegistry}
 import org.apache.spark.SparkContext
 import org.apache.spark.metrics.source.Source
 
-private[spark] class DAGSchedulerSource(val dagScheduler: DAGScheduler, sc: SparkContext)
+private[spark] class DAGSchedulerSource(val dagScheduler: DAGScheduler)
     extends Source {
-  val metricRegistry = new MetricRegistry()
-  val sourceName = "%s.DAGScheduler".format(sc.appName)
+  override val metricRegistry = new MetricRegistry()
+  override val sourceName = "DAGScheduler"
 
   metricRegistry.register(MetricRegistry.name("stage", "failedStages"), new Gauge[Int] {
     override def getValue: Int = dagScheduler.failedStages.size
diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index a90b0d475c04e..597dbc884913c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -29,7 +29,8 @@ import org.json4s.jackson.JsonMethods._
 import org.apache.spark.{Logging, SparkConf, SparkContext}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.io.CompressionCodec
-import org.apache.spark.util.{FileLogger, JsonProtocol}
+import org.apache.spark.SPARK_VERSION
+import org.apache.spark.util.{FileLogger, JsonProtocol, Utils}
 
 /**
  * A SparkListener that logs events to persistent storage.
@@ -42,21 +43,23 @@ import org.apache.spark.util.{FileLogger, JsonProtocol}
  *   spark.eventLog.buffer.kb - Buffer size to use when writing to output streams
  */
 private[spark] class EventLoggingListener(
-    appName: String,
+    appId: String,
+    logBaseDir: String,
     sparkConf: SparkConf,
-    hadoopConf: Configuration = SparkHadoopUtil.get.newConfiguration())
+    hadoopConf: Configuration)
   extends SparkListener with Logging {
 
   import EventLoggingListener._
 
+  def this(appId: String, logBaseDir: String, sparkConf: SparkConf) =
+    this(appId, logBaseDir, sparkConf, SparkHadoopUtil.get.newConfiguration(sparkConf))
+
   private val shouldCompress = sparkConf.getBoolean("spark.eventLog.compress", false)
   private val shouldOverwrite = sparkConf.getBoolean("spark.eventLog.overwrite", false)
   private val testing = sparkConf.getBoolean("spark.eventLog.testing", false)
   private val outputBufferSize = sparkConf.getInt("spark.eventLog.buffer.kb", 100) * 1024
-  private val logBaseDir = sparkConf.get("spark.eventLog.dir", DEFAULT_LOG_DIR).stripSuffix("/")
-  private val name = appName.replaceAll("[ :/]", "-").toLowerCase + "-" + System.currentTimeMillis
-  val logDir = logBaseDir + "/" + name
-
+  val logDir = EventLoggingListener.getLogDirPath(logBaseDir, appId)
+  val logDirName: String = logDir.split("/").last
   protected val logger = new FileLogger(logDir, sparkConf, hadoopConf, outputBufferSize,
     shouldCompress, shouldOverwrite, Some(LOG_FILE_PERMISSIONS))
 
@@ -75,7 +78,7 @@ private[spark] class EventLoggingListener(
         sparkConf.get("spark.io.compression.codec", CompressionCodec.DEFAULT_COMPRESSION_CODEC)
       logger.newFile(COMPRESSION_CODEC_PREFIX + codec)
     }
-    logger.newFile(SPARK_VERSION_PREFIX + SparkContext.SPARK_VERSION)
+    logger.newFile(SPARK_VERSION_PREFIX + SPARK_VERSION)
     logger.newFile(LOG_PREFIX + logger.fileIndex)
   }
 
@@ -120,6 +123,8 @@ private[spark] class EventLoggingListener(
     logEvent(event, flushLogger = true)
   override def onApplicationEnd(event: SparkListenerApplicationEnd) =
     logEvent(event, flushLogger = true)
+  // No-op because logging every update would be overkill
+  override def onExecutorMetricsUpdate(event: SparkListenerExecutorMetricsUpdate) { }
 
   /**
    * Stop logging events.
@@ -137,7 +142,7 @@ private[spark] object EventLoggingListener extends Logging {
   val SPARK_VERSION_PREFIX = "SPARK_VERSION_"
   val COMPRESSION_CODEC_PREFIX = "COMPRESSION_CODEC_"
   val APPLICATION_COMPLETE = "APPLICATION_COMPLETE"
-  val LOG_FILE_PERMISSIONS = FsPermission.createImmutable(Integer.parseInt("770", 8).toShort)
+  val LOG_FILE_PERMISSIONS = new FsPermission(Integer.parseInt("770", 8).toShort)
 
   // A cache for compression codecs to avoid creating the same codec many times
   private val codecMap = new mutable.HashMap[String, CompressionCodec]
@@ -170,6 +175,18 @@ private[spark] object EventLoggingListener extends Logging {
     } else ""
   }
 
+  /**
+   * Return a file-system-safe path to the log directory for the given application.
+   *
+   * @param logBaseDir A base directory for the path to the log directory for given application.
+   * @param appId A unique app ID.
+   * @return A path which consists of file-system-safe characters.
+   */
+  def getLogDirPath(logBaseDir: String, appId: String): String = {
+    val name = appId.replaceAll("[ :/]", "-").replaceAll("[${}'\"]", "_").toLowerCase
+    Utils.resolveURI(logBaseDir) + "/" + name.stripSuffix("/")
+  }
+
   /**
    * Parse the event logging information associated with the logs in the given directory.
    *
@@ -208,7 +225,7 @@ private[spark] object EventLoggingListener extends Logging {
     } catch {
       case e: Exception =>
         logError("Exception in parsing logging info from directory %s".format(logDir), e)
-      EventLoggingInfo.empty
+        EventLoggingInfo.empty
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
index 47dd112f68325..3bb54855bae44 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
@@ -20,15 +20,12 @@ package org.apache.spark.scheduler
 import java.io.{File, FileNotFoundException, IOException, PrintWriter}
 import java.text.SimpleDateFormat
 import java.util.{Date, Properties}
-import java.util.concurrent.LinkedBlockingQueue
 
 import scala.collection.mutable.HashMap
 
 import org.apache.spark._
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.executor.{DataReadMethod, TaskMetrics}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.StorageLevel
+import org.apache.spark.executor.TaskMetrics
 
 /**
  * :: DeveloperApi ::
@@ -62,24 +59,16 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener
   private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
     override def initialValue() = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
   }
-  private val eventQueue = new LinkedBlockingQueue[SparkListenerEvent]
 
   createLogDir()
 
-  // The following 5 functions are used only in testing.
-  private[scheduler] def getLogDir = logDir
-  private[scheduler] def getJobIdToPrintWriter = jobIdToPrintWriter
-  private[scheduler] def getStageIdToJobId = stageIdToJobId
-  private[scheduler] def getJobIdToStageIds = jobIdToStageIds
-  private[scheduler] def getEventQueue = eventQueue
-
   /** Create a folder for log files, the folder's name is the creation time of jobLogger */
   protected def createLogDir() {
     val dir = new File(logDir + "/" + logDirName + "/")
     if (dir.exists()) {
       return
     }
-    if (dir.mkdirs() == false) {
+    if (!dir.mkdirs()) {
       // JobLogger should throw a exception rather than continue to construct this object.
       throw new IOException("create log directory error:" + logDir + "/" + logDirName + "/")
     }
@@ -162,15 +151,20 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener
                " START_TIME=" + taskInfo.launchTime + " FINISH_TIME=" + taskInfo.finishTime +
                " EXECUTOR_ID=" + taskInfo.executorId +  " HOST=" + taskMetrics.hostname
     val executorRunTime = " EXECUTOR_RUN_TIME=" + taskMetrics.executorRunTime
+    val gcTime = " GC_TIME=" + taskMetrics.jvmGCTime
     val inputMetrics = taskMetrics.inputMetrics match {
       case Some(metrics) =>
         " READ_METHOD=" + metrics.readMethod.toString +
         " INPUT_BYTES=" + metrics.bytesRead
       case None => ""
     }
+    val outputMetrics = taskMetrics.outputMetrics match {
+      case Some(metrics) =>
+        " OUTPUT_BYTES=" + metrics.bytesWritten
+      case None => ""
+    }
     val shuffleReadMetrics = taskMetrics.shuffleReadMetrics match {
       case Some(metrics) =>
-        " SHUFFLE_FINISH_TIME=" + metrics.shuffleFinishTime +
         " BLOCK_FETCHED_TOTAL=" + metrics.totalBlocksFetched +
         " BLOCK_FETCHED_LOCAL=" + metrics.localBlocksFetched +
         " BLOCK_FETCHED_REMOTE=" + metrics.remoteBlocksFetched +
@@ -179,11 +173,13 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener
       case None => ""
     }
     val writeMetrics = taskMetrics.shuffleWriteMetrics match {
-      case Some(metrics) => " SHUFFLE_BYTES_WRITTEN=" + metrics.shuffleBytesWritten
+      case Some(metrics) =>
+        " SHUFFLE_BYTES_WRITTEN=" + metrics.shuffleBytesWritten +
+        " SHUFFLE_WRITE_TIME=" + metrics.shuffleWriteTime
       case None => ""
     }
-    stageLogInfo(stageId, status + info + executorRunTime + inputMetrics + shuffleReadMetrics +
-      writeMetrics)
+    stageLogInfo(stageId, status + info + executorRunTime + gcTime + inputMetrics + outputMetrics +
+      shuffleReadMetrics + writeMetrics)
   }
 
   /**
@@ -224,7 +220,7 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener
         taskStatus += " STATUS=RESUBMITTED TID=" + taskInfo.taskId +
                       " STAGE_ID=" + taskEnd.stageId
         stageLogInfo(taskEnd.stageId, taskStatus)
-      case FetchFailed(bmAddress, shuffleId, mapId, reduceId) =>
+      case FetchFailed(bmAddress, shuffleId, mapId, reduceId, message) =>
         taskStatus += " STATUS=FETCHFAILED TID=" + taskInfo.taskId + " STAGE_ID=" +
                       taskEnd.stageId + " SHUFFLE_ID=" + shuffleId + " MAP_ID=" +
                       mapId + " REDUCE_ID=" + reduceId
@@ -259,7 +255,7 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener
   protected def recordJobProperties(jobId: Int, properties: Properties) {
     if (properties != null) {
       val description = properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION, "")
-      jobLogInfo(jobId, description, false)
+      jobLogInfo(jobId, description, withTime = false)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
index e9bfee2248e5b..29879b374b801 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
@@ -23,7 +23,7 @@ package org.apache.spark.scheduler
  */
 private[spark] class JobWaiter[T](
     dagScheduler: DAGScheduler,
-    jobId: Int,
+    val jobId: Int,
     totalTasks: Int,
     resultHandler: (Int, T) => Unit)
   extends JobListener {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index d3f63ff92ac6f..01d5943d777f3 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -19,27 +19,181 @@ package org.apache.spark.scheduler
 
 import java.io.{Externalizable, ObjectInput, ObjectOutput}
 
+import org.roaringbitmap.RoaringBitmap
+
 import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.Utils
 
 /**
  * Result returned by a ShuffleMapTask to a scheduler. Includes the block manager address that the
  * task ran on as well as the sizes of outputs for each reducer, for passing on to the reduce tasks.
- * The map output sizes are compressed using MapOutputTracker.compressSize.
  */
-private[spark] class MapStatus(var location: BlockManagerId, var compressedSizes: Array[Byte])
-  extends Externalizable {
+private[spark] sealed trait MapStatus {
+  /** Location where this task was run. */
+  def location: BlockManagerId
+
+  /**
+   * Estimated size for the reduce block, in bytes.
+   *
+   * If a block is non-empty, then this method MUST return a non-zero size.  This invariant is
+   * necessary for correctness, since block fetchers are allowed to skip zero-size blocks.
+   */
+  def getSizeForBlock(reduceId: Int): Long
+}
+
+
+private[spark] object MapStatus {
+
+  def apply(loc: BlockManagerId, uncompressedSizes: Array[Long]): MapStatus = {
+    if (uncompressedSizes.length > 2000) {
+      HighlyCompressedMapStatus(loc, uncompressedSizes)
+    } else {
+      new CompressedMapStatus(loc, uncompressedSizes)
+    }
+  }
+
+  private[this] val LOG_BASE = 1.1
+
+  /**
+   * Compress a size in bytes to 8 bits for efficient reporting of map output sizes.
+   * We do this by encoding the log base 1.1 of the size as an integer, which can support
+   * sizes up to 35 GB with at most 10% error.
+   */
+  def compressSize(size: Long): Byte = {
+    if (size == 0) {
+      0
+    } else if (size <= 1L) {
+      1
+    } else {
+      math.min(255, math.ceil(math.log(size) / math.log(LOG_BASE)).toInt).toByte
+    }
+  }
+
+  /**
+   * Decompress an 8-bit encoded block size, using the reverse operation of compressSize.
+   */
+  def decompressSize(compressedSize: Byte): Long = {
+    if (compressedSize == 0) {
+      0
+    } else {
+      math.pow(LOG_BASE, compressedSize & 0xFF).toLong
+    }
+  }
+}
+
+
+/**
+ * A [[MapStatus]] implementation that tracks the size of each block. Size for each block is
+ * represented using a single byte.
+ *
+ * @param loc location where the task is being executed.
+ * @param compressedSizes size of the blocks, indexed by reduce partition id.
+ */
+private[spark] class CompressedMapStatus(
+    private[this] var loc: BlockManagerId,
+    private[this] var compressedSizes: Array[Byte])
+  extends MapStatus with Externalizable {
+
+  protected def this() = this(null, null.asInstanceOf[Array[Byte]])  // For deserialization only
+
+  def this(loc: BlockManagerId, uncompressedSizes: Array[Long]) {
+    this(loc, uncompressedSizes.map(MapStatus.compressSize))
+  }
+
+  override def location: BlockManagerId = loc
 
-  def this() = this(null, null)  // For deserialization only
+  override def getSizeForBlock(reduceId: Int): Long = {
+    MapStatus.decompressSize(compressedSizes(reduceId))
+  }
 
-  def writeExternal(out: ObjectOutput) {
-    location.writeExternal(out)
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
+    loc.writeExternal(out)
     out.writeInt(compressedSizes.length)
     out.write(compressedSizes)
   }
 
-  def readExternal(in: ObjectInput) {
-    location = BlockManagerId(in)
-    compressedSizes = new Array[Byte](in.readInt())
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
+    loc = BlockManagerId(in)
+    val len = in.readInt()
+    compressedSizes = new Array[Byte](len)
     in.readFully(compressedSizes)
   }
 }
+
+/**
+ * A [[MapStatus]] implementation that only stores the average size of non-empty blocks,
+ * plus a bitmap for tracking which blocks are non-empty.  During serialization, this bitmap
+ * is compressed.
+ *
+ * @param loc location where the task is being executed
+ * @param numNonEmptyBlocks the number of non-empty blocks
+ * @param emptyBlocks a bitmap tracking which blocks are empty
+ * @param avgSize average size of the non-empty blocks
+ */
+private[spark] class HighlyCompressedMapStatus private (
+    private[this] var loc: BlockManagerId,
+    private[this] var numNonEmptyBlocks: Int,
+    private[this] var emptyBlocks: RoaringBitmap,
+    private[this] var avgSize: Long)
+  extends MapStatus with Externalizable {
+
+  // loc could be null when the default constructor is called during deserialization
+  require(loc == null || avgSize > 0 || numNonEmptyBlocks == 0,
+    "Average size can only be zero for map stages that produced no output")
+
+  protected def this() = this(null, -1, null, -1)  // For deserialization only
+
+  override def location: BlockManagerId = loc
+
+  override def getSizeForBlock(reduceId: Int): Long = {
+    if (emptyBlocks.contains(reduceId)) {
+      0
+    } else {
+      avgSize
+    }
+  }
+
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
+    loc.writeExternal(out)
+    emptyBlocks.writeExternal(out)
+    out.writeLong(avgSize)
+  }
+
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
+    loc = BlockManagerId(in)
+    emptyBlocks = new RoaringBitmap()
+    emptyBlocks.readExternal(in)
+    avgSize = in.readLong()
+  }
+}
+
+private[spark] object HighlyCompressedMapStatus {
+  def apply(loc: BlockManagerId, uncompressedSizes: Array[Long]): HighlyCompressedMapStatus = {
+    // We must keep track of which blocks are empty so that we don't report a zero-sized
+    // block as being non-empty (or vice-versa) when using the average block size.
+    var i = 0
+    var numNonEmptyBlocks: Int = 0
+    var totalSize: Long = 0
+    // From a compression standpoint, it shouldn't matter whether we track empty or non-empty
+    // blocks. From a performance standpoint, we benefit from tracking empty blocks because
+    // we expect that there will be far fewer of them, so we will perform fewer bitmap insertions.
+    val emptyBlocks = new RoaringBitmap()
+    val totalNumBlocks = uncompressedSizes.length
+    while (i < totalNumBlocks) {
+      var size = uncompressedSizes(i)
+      if (size > 0) {
+        numNonEmptyBlocks += 1
+        totalSize += size
+      } else {
+        emptyBlocks.add(i)
+      }
+      i += 1
+    }
+    val avgSize = if (numNonEmptyBlocks > 0) {
+      totalSize / numNonEmptyBlocks
+    } else {
+      0
+    }
+    new HighlyCompressedMapStatus(loc, numNonEmptyBlocks, emptyBlocks, avgSize)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
index bbf9f7388b074..4a9ff918afe25 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
@@ -17,134 +17,52 @@
 
 package org.apache.spark.scheduler
 
-import scala.language.existentials
+import java.nio.ByteBuffer
 
 import java.io._
-import java.util.zip.{GZIPInputStream, GZIPOutputStream}
-
-import scala.collection.mutable.HashMap
 
 import org.apache.spark._
-import org.apache.spark.rdd.{RDD, RDDCheckpointData}
-
-private[spark] object ResultTask {
-
-  // A simple map between the stage id to the serialized byte array of a task.
-  // Served as a cache for task serialization because serialization can be
-  // expensive on the master node if it needs to launch thousands of tasks.
-  private val serializedInfoCache = new HashMap[Int, Array[Byte]]
-
-  def serializeInfo(stageId: Int, rdd: RDD[_], func: (TaskContext, Iterator[_]) => _): Array[Byte] =
-  {
-    synchronized {
-      val old = serializedInfoCache.get(stageId).orNull
-      if (old != null) {
-        old
-      } else {
-        val out = new ByteArrayOutputStream
-        val ser = SparkEnv.get.closureSerializer.newInstance()
-        val objOut = ser.serializeStream(new GZIPOutputStream(out))
-        objOut.writeObject(rdd)
-        objOut.writeObject(func)
-        objOut.close()
-        val bytes = out.toByteArray
-        serializedInfoCache.put(stageId, bytes)
-        bytes
-      }
-    }
-  }
-
-  def deserializeInfo(stageId: Int, bytes: Array[Byte]): (RDD[_], (TaskContext, Iterator[_]) => _) =
-  {
-    val in = new GZIPInputStream(new ByteArrayInputStream(bytes))
-    val ser = SparkEnv.get.closureSerializer.newInstance()
-    val objIn = ser.deserializeStream(in)
-    val rdd = objIn.readObject().asInstanceOf[RDD[_]]
-    val func = objIn.readObject().asInstanceOf[(TaskContext, Iterator[_]) => _]
-    (rdd, func)
-  }
-
-  def removeStage(stageId: Int) {
-    serializedInfoCache.remove(stageId)
-  }
-
-  def clearCache() {
-    synchronized {
-      serializedInfoCache.clear()
-    }
-  }
-}
-
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.rdd.RDD
 
 /**
  * A task that sends back the output to the driver application.
  *
- * See [[org.apache.spark.scheduler.Task]] for more information.
+ * See [[Task]] for more information.
  *
  * @param stageId id of the stage this task belongs to
- * @param rdd input to func
- * @param func a function to apply on a partition of the RDD
- * @param _partitionId index of the number in the RDD
+ * @param taskBinary broadcasted version of the serialized RDD and the function to apply on each
+ *                   partition of the given RDD. Once deserialized, the type should be
+ *                   (RDD[T], (TaskContext, Iterator[T]) => U).
+ * @param partition partition of the RDD this task is associated with
  * @param locs preferred task execution locations for locality scheduling
  * @param outputId index of the task in this job (a job can launch tasks on only a subset of the
  *                 input RDD's partitions).
  */
 private[spark] class ResultTask[T, U](
     stageId: Int,
-    var rdd: RDD[T],
-    var func: (TaskContext, Iterator[T]) => U,
-    _partitionId: Int,
+    taskBinary: Broadcast[Array[Byte]],
+    partition: Partition,
     @transient locs: Seq[TaskLocation],
-    var outputId: Int)
-  extends Task[U](stageId, _partitionId) with Externalizable {
+    val outputId: Int)
+  extends Task[U](stageId, partition.index) with Serializable {
 
-  def this() = this(0, null, null, 0, null, 0)
-
-  var split = if (rdd == null) null else rdd.partitions(partitionId)
-
-  @transient private val preferredLocs: Seq[TaskLocation] = {
+  @transient private[this] val preferredLocs: Seq[TaskLocation] = {
     if (locs == null) Nil else locs.toSet.toSeq
   }
 
   override def runTask(context: TaskContext): U = {
+    // Deserialize the RDD and the func using the broadcast variables.
+    val ser = SparkEnv.get.closureSerializer.newInstance()
+    val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)](
+      ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
+
     metrics = Some(context.taskMetrics)
-    try {
-      func(context, rdd.iterator(split, context))
-    } finally {
-      context.executeOnCompleteCallbacks()
-    }
+    func(context, rdd.iterator(partition, context))
   }
 
+  // This is only callable on the driver side.
   override def preferredLocations: Seq[TaskLocation] = preferredLocs
 
   override def toString = "ResultTask(" + stageId + ", " + partitionId + ")"
-
-  override def writeExternal(out: ObjectOutput) {
-    RDDCheckpointData.synchronized {
-      split = rdd.partitions(partitionId)
-      out.writeInt(stageId)
-      val bytes = ResultTask.serializeInfo(
-        stageId, rdd, func.asInstanceOf[(TaskContext, Iterator[_]) => _])
-      out.writeInt(bytes.length)
-      out.write(bytes)
-      out.writeInt(partitionId)
-      out.writeInt(outputId)
-      out.writeLong(epoch)
-      out.writeObject(split)
-    }
-  }
-
-  override def readExternal(in: ObjectInput) {
-    val stageId = in.readInt()
-    val numBytes = in.readInt()
-    val bytes = new Array[Byte](numBytes)
-    in.readFully(bytes)
-    val (rdd_, func_) = ResultTask.deserializeInfo(stageId, bytes)
-    rdd = rdd_.asInstanceOf[RDD[T]]
-    func = func_.asInstanceOf[(TaskContext, Iterator[T]) => U]
-    partitionId = in.readInt()
-    outputId = in.readInt()
-    epoch = in.readLong()
-    split = in.readObject().asInstanceOf[Partition]
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
index 6a6d8e609bc39..992c477493d8e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
@@ -23,6 +23,8 @@ package org.apache.spark.scheduler
  * machines become available and can launch tasks on them.
  */
 private[spark] trait SchedulerBackend {
+  private val appId = "spark-application-" + System.currentTimeMillis
+
   def start(): Unit
   def stop(): Unit
   def reviveOffers(): Unit
@@ -30,4 +32,13 @@ private[spark] trait SchedulerBackend {
 
   def killTask(taskId: Long, executorId: String, interruptThread: Boolean): Unit =
     throw new UnsupportedOperationException
+  def isReady(): Boolean = true
+
+  /**
+   * Get an application ID associated with the job.
+   *
+   * @return An application ID
+   */
+  def applicationId(): String = appId
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index fdaf1de83f051..79709089c0da4 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -17,143 +17,67 @@
 
 package org.apache.spark.scheduler
 
-import scala.language.existentials
-
-import java.io._
-import java.util.zip.{GZIPInputStream, GZIPOutputStream}
+import java.nio.ByteBuffer
 
-import scala.collection.mutable.HashMap
+import scala.language.existentials
 
 import org.apache.spark._
-import org.apache.spark.rdd.{RDD, RDDCheckpointData}
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.rdd.RDD
 import org.apache.spark.shuffle.ShuffleWriter
 
-private[spark] object ShuffleMapTask {
-
-  // A simple map between the stage id to the serialized byte array of a task.
-  // Served as a cache for task serialization because serialization can be
-  // expensive on the master node if it needs to launch thousands of tasks.
-  private val serializedInfoCache = new HashMap[Int, Array[Byte]]
-
-  def serializeInfo(stageId: Int, rdd: RDD[_], dep: ShuffleDependency[_, _, _]): Array[Byte] = {
-    synchronized {
-      val old = serializedInfoCache.get(stageId).orNull
-      if (old != null) {
-        return old
-      } else {
-        val out = new ByteArrayOutputStream
-        val ser = SparkEnv.get.closureSerializer.newInstance()
-        val objOut = ser.serializeStream(new GZIPOutputStream(out))
-        objOut.writeObject(rdd)
-        objOut.writeObject(dep)
-        objOut.close()
-        val bytes = out.toByteArray
-        serializedInfoCache.put(stageId, bytes)
-        bytes
-      }
-    }
-  }
-
-  def deserializeInfo(stageId: Int, bytes: Array[Byte]): (RDD[_], ShuffleDependency[_, _, _]) = {
-    val in = new GZIPInputStream(new ByteArrayInputStream(bytes))
-    val ser = SparkEnv.get.closureSerializer.newInstance()
-    val objIn = ser.deserializeStream(in)
-    val rdd = objIn.readObject().asInstanceOf[RDD[_]]
-    val dep = objIn.readObject().asInstanceOf[ShuffleDependency[_, _, _]]
-    (rdd, dep)
-  }
-
-  // Since both the JarSet and FileSet have the same format this is used for both.
-  def deserializeFileSet(bytes: Array[Byte]): HashMap[String, Long] = {
-    val in = new GZIPInputStream(new ByteArrayInputStream(bytes))
-    val objIn = new ObjectInputStream(in)
-    val set = objIn.readObject().asInstanceOf[Array[(String, Long)]].toMap
-    HashMap(set.toSeq: _*)
-  }
-
-  def removeStage(stageId: Int) {
-    serializedInfoCache.remove(stageId)
-  }
-
-  def clearCache() {
-    synchronized {
-      serializedInfoCache.clear()
-    }
-  }
-}
-
 /**
- * A ShuffleMapTask divides the elements of an RDD into multiple buckets (based on a partitioner
- * specified in the ShuffleDependency).
- *
- * See [[org.apache.spark.scheduler.Task]] for more information.
- *
+* A ShuffleMapTask divides the elements of an RDD into multiple buckets (based on a partitioner
+* specified in the ShuffleDependency).
+*
+* See [[org.apache.spark.scheduler.Task]] for more information.
+*
  * @param stageId id of the stage this task belongs to
- * @param rdd the final RDD in this stage
- * @param dep the ShuffleDependency
- * @param _partitionId index of the number in the RDD
+ * @param taskBinary broadcast version of of the RDD and the ShuffleDependency. Once deserialized,
+ *                   the type should be (RDD[_], ShuffleDependency[_, _, _]).
+ * @param partition partition of the RDD this task is associated with
  * @param locs preferred task execution locations for locality scheduling
  */
 private[spark] class ShuffleMapTask(
     stageId: Int,
-    var rdd: RDD[_],
-    var dep: ShuffleDependency[_, _, _],
-    _partitionId: Int,
+    taskBinary: Broadcast[Array[Byte]],
+    partition: Partition,
     @transient private var locs: Seq[TaskLocation])
-  extends Task[MapStatus](stageId, _partitionId)
-  with Externalizable
-  with Logging {
+  extends Task[MapStatus](stageId, partition.index) with Logging {
 
-  protected def this() = this(0, null, null, 0, null)
+  /** A constructor used only in test suites. This does not require passing in an RDD. */
+  def this(partitionId: Int) {
+    this(0, null, new Partition { override def index = 0 }, null)
+  }
 
   @transient private val preferredLocs: Seq[TaskLocation] = {
     if (locs == null) Nil else locs.toSet.toSeq
   }
 
-  var split = if (rdd == null) null else rdd.partitions(partitionId)
-
-  override def writeExternal(out: ObjectOutput) {
-    RDDCheckpointData.synchronized {
-      split = rdd.partitions(partitionId)
-      out.writeInt(stageId)
-      val bytes = ShuffleMapTask.serializeInfo(stageId, rdd, dep)
-      out.writeInt(bytes.length)
-      out.write(bytes)
-      out.writeInt(partitionId)
-      out.writeLong(epoch)
-      out.writeObject(split)
-    }
-  }
-
-  override def readExternal(in: ObjectInput) {
-    val stageId = in.readInt()
-    val numBytes = in.readInt()
-    val bytes = new Array[Byte](numBytes)
-    in.readFully(bytes)
-    val (rdd_, dep_) = ShuffleMapTask.deserializeInfo(stageId, bytes)
-    rdd = rdd_
-    dep = dep_
-    partitionId = in.readInt()
-    epoch = in.readLong()
-    split = in.readObject().asInstanceOf[Partition]
-  }
-
   override def runTask(context: TaskContext): MapStatus = {
+    // Deserialize the RDD using the broadcast variable.
+    val ser = SparkEnv.get.closureSerializer.newInstance()
+    val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])](
+      ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
+
     metrics = Some(context.taskMetrics)
     var writer: ShuffleWriter[Any, Any] = null
     try {
       val manager = SparkEnv.get.shuffleManager
       writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context)
-      writer.write(rdd.iterator(split, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
+      writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
       return writer.stop(success = true).get
     } catch {
       case e: Exception =>
-        if (writer != null) {
-          writer.stop(success = false)
+        try {
+          if (writer != null) {
+            writer.stop(success = false)
+          }
+        } catch {
+          case e: Exception =>
+            log.debug("Could not stop writer", e)
         }
         throw e
-    } finally {
-      context.executeOnCompleteCallbacks()
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
index 82163eadd56e9..86afe3bd5265f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -39,7 +39,8 @@ case class SparkListenerStageSubmitted(stageInfo: StageInfo, properties: Propert
 case class SparkListenerStageCompleted(stageInfo: StageInfo) extends SparkListenerEvent
 
 @DeveloperApi
-case class SparkListenerTaskStart(stageId: Int, taskInfo: TaskInfo) extends SparkListenerEvent
+case class SparkListenerTaskStart(stageId: Int, stageAttemptId: Int, taskInfo: TaskInfo)
+  extends SparkListenerEvent
 
 @DeveloperApi
 case class SparkListenerTaskGettingResult(taskInfo: TaskInfo) extends SparkListenerEvent
@@ -47,6 +48,7 @@ case class SparkListenerTaskGettingResult(taskInfo: TaskInfo) extends SparkListe
 @DeveloperApi
 case class SparkListenerTaskEnd(
     stageId: Int,
+    stageAttemptId: Int,
     taskType: String,
     reason: TaskEndReason,
     taskInfo: TaskInfo,
@@ -65,20 +67,31 @@ case class SparkListenerEnvironmentUpdate(environmentDetails: Map[String, Seq[(S
   extends SparkListenerEvent
 
 @DeveloperApi
-case class SparkListenerBlockManagerAdded(blockManagerId: BlockManagerId, maxMem: Long)
+case class SparkListenerBlockManagerAdded(time: Long, blockManagerId: BlockManagerId, maxMem: Long)
   extends SparkListenerEvent
 
 @DeveloperApi
-case class SparkListenerBlockManagerRemoved(blockManagerId: BlockManagerId)
+case class SparkListenerBlockManagerRemoved(time: Long, blockManagerId: BlockManagerId)
   extends SparkListenerEvent
 
 @DeveloperApi
 case class SparkListenerUnpersistRDD(rddId: Int) extends SparkListenerEvent
 
+/**
+ * Periodic updates from executors.
+ * @param execId executor id
+ * @param taskMetrics sequence of (task id, stage id, stage attempt, metrics)
+ */
 @DeveloperApi
-case class SparkListenerApplicationStart(appName: String, time: Long, sparkUser: String)
+case class SparkListenerExecutorMetricsUpdate(
+    execId: String,
+    taskMetrics: Seq[(Long, Int, Int, TaskMetrics)])
   extends SparkListenerEvent
 
+@DeveloperApi
+case class SparkListenerApplicationStart(appName: String, appId: Option[String], time: Long,
+  sparkUser: String) extends SparkListenerEvent
+
 @DeveloperApi
 case class SparkListenerApplicationEnd(time: Long) extends SparkListenerEvent
 
@@ -158,6 +171,11 @@ trait SparkListener {
    * Called when the application ends
    */
   def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) { }
+
+  /**
+   * Called when the driver receives task metrics from an executor in a heartbeat.
+   */
+  def onExecutorMetricsUpdate(executorMetricsUpdate: SparkListenerExecutorMetricsUpdate) { }
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
index ed9fb24bc8ce8..e79ffd7a3587d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
@@ -68,6 +68,8 @@ private[spark] trait SparkListenerBus extends Logging {
         foreachListener(_.onApplicationStart(applicationStart))
       case applicationEnd: SparkListenerApplicationEnd =>
         foreachListener(_.onApplicationEnd(applicationEnd))
+      case metricsUpdate: SparkListenerExecutorMetricsUpdate =>
+        foreachListener(_.onExecutorMetricsUpdate(metricsUpdate))
       case SparkListenerShutdown =>
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
index 8ec482a6f6d9c..cc13f57a49b89 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.scheduler
 
+import scala.collection.mutable.HashSet
+
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.BlockManagerId
@@ -41,6 +43,9 @@ import org.apache.spark.util.CallSite
  * stage, the callSite gives the user code that created the RDD being shuffled. For a result
  * stage, the callSite gives the user code that executes the associated action (e.g. count()).
  *
+ * A single stage can consist of multiple attempts. In that case, the latestInfo field will
+ * be updated for each attempt.
+ *
  */
 private[spark] class Stage(
     val id: Int,
@@ -56,8 +61,22 @@ private[spark] class Stage(
   val numPartitions = rdd.partitions.size
   val outputLocs = Array.fill[List[MapStatus]](numPartitions)(Nil)
   var numAvailableOutputs = 0
+
+  /** Set of jobs that this stage belongs to. */
+  val jobIds = new HashSet[Int]
+
+  /** For stages that are the final (consists of only ResultTasks), link to the ActiveJob. */
+  var resultOfJob: Option[ActiveJob] = None
+  var pendingTasks = new HashSet[Task[_]]
+
   private var nextAttemptId = 0
 
+  val name = callSite.shortForm
+  val details = callSite.longForm
+
+  /** Pointer to the latest [StageInfo] object, set by DAGScheduler. */
+  var latestInfo: StageInfo = StageInfo.fromStage(this)
+
   def isAvailable: Boolean = {
     if (!isShuffleMap) {
       true
@@ -83,6 +102,11 @@ private[spark] class Stage(
     }
   }
 
+  /**
+   * Removes all shuffle outputs associated with this executor. Note that this will also remove
+   * outputs which are served by an external shuffle server (if one exists), as they are still
+   * registered with this execId.
+   */
   def removeOutputsOnExecutor(execId: String) {
     var becameUnavailable = false
     for (partition <- 0 until numPartitions) {
@@ -100,6 +124,7 @@ private[spark] class Stage(
     }
   }
 
+  /** Return a new attempt id, starting with 0. */
   def newAttemptId(): Int = {
     val id = nextAttemptId
     nextAttemptId += 1
@@ -108,10 +133,12 @@ private[spark] class Stage(
 
   def attemptId: Int = nextAttemptId
 
-  val name = callSite.short
-  val details = callSite.long
-
   override def toString = "Stage " + id
 
   override def hashCode(): Int = id
+
+  override def equals(other: Any): Boolean = other match {
+    case stage: Stage => stage != null && stage.id == id
+    case _ => false
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
index 480891550eb60..c6dc3369ba5cc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.scheduler
 
+import scala.collection.mutable.HashMap
+
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.storage.RDDInfo
 
@@ -27,6 +29,7 @@ import org.apache.spark.storage.RDDInfo
 @DeveloperApi
 class StageInfo(
     val stageId: Int,
+    val attemptId: Int,
     val name: String,
     val numTasks: Int,
     val rddInfos: Seq[RDDInfo],
@@ -37,6 +40,8 @@ class StageInfo(
   var completionTime: Option[Long] = None
   /** If the stage failed, the reason why. */
   var failureReason: Option[String] = None
+  /** Terminal values of accumulables updated during this stage. */
+  val accumulables = HashMap[Long, AccumulableInfo]()
 
   def stageFailed(reason: String) {
     failureReason = Some(reason)
@@ -52,9 +57,15 @@ private[spark] object StageInfo {
    * shuffle dependencies. Therefore, all ancestor RDDs related to this Stage's RDD through a
    * sequence of narrow dependencies should also be associated with this Stage.
    */
-  def fromStage(stage: Stage): StageInfo = {
+  def fromStage(stage: Stage, numTasks: Option[Int] = None): StageInfo = {
     val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd)
     val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos
-    new StageInfo(stage.id, stage.name, stage.numTasks, rddInfos, stage.details)
+    new StageInfo(
+      stage.id,
+      stage.attemptId,
+      stage.name,
+      numTasks.getOrElse(stage.numTasks),
+      rddInfos,
+      stage.details)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 5871edeb856ad..2552d03d18d06 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -22,10 +22,12 @@ import java.nio.ByteBuffer
 
 import scala.collection.mutable.HashMap
 
-import org.apache.spark.TaskContext
+import org.apache.spark.{TaskContextHelper, TaskContextImpl, TaskContext}
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.serializer.SerializerInstance
 import org.apache.spark.util.ByteBufferInputStream
+import org.apache.spark.util.Utils
+
 
 /**
  * A unit of execution. We have two kinds of Task's in Spark:
@@ -43,12 +45,19 @@ import org.apache.spark.util.ByteBufferInputStream
 private[spark] abstract class Task[T](val stageId: Int, var partitionId: Int) extends Serializable {
 
   final def run(attemptId: Long): T = {
-    context = new TaskContext(stageId, partitionId, attemptId, runningLocally = false)
+    context = new TaskContextImpl(stageId, partitionId, attemptId, false)
+    TaskContextHelper.setTaskContext(context)
+    context.taskMetrics.hostname = Utils.localHostName()
     taskThread = Thread.currentThread()
     if (_killed) {
       kill(interruptThread = false)
     }
-    runTask(context)
+    try {
+      runTask(context)
+    } finally {
+      context.markTaskCompleted()
+      TaskContextHelper.unset()
+    }
   }
 
   def runTask(context: TaskContext): T
@@ -61,7 +70,7 @@ private[spark] abstract class Task[T](val stageId: Int, var partitionId: Int) ex
   var metrics: Option[TaskMetrics] = None
 
   // Task context, to be initialized in run().
-  @transient protected var context: TaskContext = _
+  @transient protected var context: TaskContextImpl = _
 
   // The actual Thread on which the task is running, if any. Initialized in run().
   @volatile @transient private var taskThread: Thread = _
@@ -84,12 +93,12 @@ private[spark] abstract class Task[T](val stageId: Int, var partitionId: Int) ex
   def kill(interruptThread: Boolean) {
     _killed = true
     if (context != null) {
-      context.interrupted = true
+      context.markInterrupted()
     }
     if (interruptThread && taskThread != null) {
       taskThread.interrupt()
     }
-  }
+  }  
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
index 29de0453ac19a..6fa1f2c880f7a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.scheduler
 
+import scala.collection.mutable.ListBuffer
+
 import org.apache.spark.annotation.DeveloperApi
 
 /**
@@ -41,6 +43,13 @@ class TaskInfo(
    */
   var gettingResultTime: Long = 0
 
+  /**
+   * Intermediate updates to accumulables during this task. Note that it is valid for the same
+   * accumulable to be updated multiple times in a single task or for two accumulables with the
+   * same name but different IDs to exist in a task.
+   */
+  val accumulables = ListBuffer[AccumulableInfo]()
+
   /**
    * The time when the task has completed successfully (including the time to remotely fetch
    * results, if necessary).
@@ -84,6 +93,8 @@ class TaskInfo(
     }
   }
 
+  def id: String = s"$index.$attempt"
+
   def duration: Long = {
     if (!finished) {
       throw new UnsupportedOperationException("duration() called on unfinished task")
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskLocality.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskLocality.scala
index eb920ab0c0b67..f176d09816f5e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskLocality.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskLocality.scala
@@ -22,7 +22,7 @@ import org.apache.spark.annotation.DeveloperApi
 @DeveloperApi
 object TaskLocality extends Enumeration {
   // Process local is expected to be used ONLY within TaskSetManager for now.
-  val PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY = Value
+  val PROCESS_LOCAL, NODE_LOCAL, NO_PREF, RACK_LOCAL, ANY = Value
 
   type TaskLocality = Value
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
index 67c9a6760b1b3..10c685f29d3ac 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
@@ -22,13 +22,51 @@ package org.apache.spark.scheduler
  * In the latter case, we will prefer to launch the task on that executorID, but our next level
  * of preference will be executors on the same host if this is not possible.
  */
-private[spark]
-class TaskLocation private (val host: String, val executorId: Option[String]) extends Serializable {
-  override def toString: String = "TaskLocation(" + host + ", " + executorId + ")"
+private[spark] sealed trait TaskLocation {
+  def host: String
+}
+
+/**
+ * A location that includes both a host and an executor id on that host.
+ */
+private [spark] case class ExecutorCacheTaskLocation(override val host: String,
+    val executorId: String) extends TaskLocation {
+}
+
+/**
+ * A location on a host.
+ */
+private [spark] case class HostTaskLocation(override val host: String) extends TaskLocation {
+  override def toString = host
+}
+
+/**
+ * A location on a host that is cached by HDFS.
+ */
+private [spark] case class HDFSCacheTaskLocation(override val host: String)
+    extends TaskLocation {
+  override def toString = TaskLocation.inMemoryLocationTag + host
 }
 
 private[spark] object TaskLocation {
-  def apply(host: String, executorId: String) = new TaskLocation(host, Some(executorId))
+  // We identify hosts on which the block is cached with this prefix.  Because this prefix contains
+  // underscores, which are not legal characters in hostnames, there should be no potential for
+  // confusion.  See  RFC 952 and RFC 1123 for information about the format of hostnames.
+  val inMemoryLocationTag = "hdfs_cache_"
+
+  def apply(host: String, executorId: String) = new ExecutorCacheTaskLocation(host, executorId)
 
-  def apply(host: String) = new TaskLocation(host, None)
+  /**
+   * Create a TaskLocation from a string returned by getPreferredLocations.
+   * These strings have the form [hostname] or hdfs_cache_[hostname], depending on whether the
+   * location is cached.
+   */
+  def apply(str: String) = {
+    val hstr = str.stripPrefix(inMemoryLocationTag)
+    if (hstr.equals(str)) {
+      new HostTaskLocation(str)
+    } else {
+      new HostTaskLocation(hstr)
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
index d49d8fb887007..1f114a0207f7b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
@@ -31,8 +31,8 @@ import org.apache.spark.util.Utils
 private[spark] sealed trait TaskResult[T]
 
 /** A reference to a DirectTaskResult that has been stored in the worker's BlockManager. */
-private[spark]
-case class IndirectTaskResult[T](blockId: BlockId) extends TaskResult[T] with Serializable
+private[spark] case class IndirectTaskResult[T](blockId: BlockId, size: Int)
+  extends TaskResult[T] with Serializable
 
 /** A TaskResult that contains the task's return value and accumulator updates. */
 private[spark]
@@ -42,7 +42,7 @@ class DirectTaskResult[T](var valueBytes: ByteBuffer, var accumUpdates: Map[Long
 
   def this() = this(null.asInstanceOf[ByteBuffer], null, null)
 
-  override def writeExternal(out: ObjectOutput) {
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
 
     out.writeInt(valueBytes.remaining);
     Utils.writeByteBuffer(valueBytes, out)
@@ -55,7 +55,7 @@ class DirectTaskResult[T](var valueBytes: ByteBuffer, var accumUpdates: Map[Long
     out.writeObject(metrics)
   }
 
-  override def readExternal(in: ObjectInput) {
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
 
     val blen = in.readInt()
     val byteVal = new Array[Byte](blen)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index df59f444b7a0e..819b51e12ad8c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -19,6 +19,8 @@ package org.apache.spark.scheduler
 
 import java.nio.ByteBuffer
 
+import scala.util.control.NonFatal
+
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.serializer.SerializerInstance
@@ -32,7 +34,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
 
   private val THREADS = sparkEnv.conf.getInt("spark.resultGetter.threads", 4)
   private val getTaskResultExecutor = Utils.newDaemonFixedThreadPool(
-    THREADS, "Result resolver thread")
+    THREADS, "task-result-getter")
 
   protected val serializer = new ThreadLocal[SerializerInstance] {
     override def initialValue(): SerializerInstance = {
@@ -45,9 +47,18 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
     getTaskResultExecutor.execute(new Runnable {
       override def run(): Unit = Utils.logUncaughtExceptions {
         try {
-          val result = serializer.get().deserialize[TaskResult[_]](serializedData) match {
-            case directResult: DirectTaskResult[_] => directResult
-            case IndirectTaskResult(blockId) =>
+          val (result, size) = serializer.get().deserialize[TaskResult[_]](serializedData) match {
+            case directResult: DirectTaskResult[_] =>
+              if (!taskSetManager.canFetchMoreResults(serializedData.limit())) {
+                return
+              }
+              (directResult, serializedData.limit())
+            case IndirectTaskResult(blockId, size) =>
+              if (!taskSetManager.canFetchMoreResults(size)) {
+                // dropped by executor if size is larger than maxResultSize
+                sparkEnv.blockManager.master.removeBlock(blockId)
+                return
+              }
               logDebug("Fetching indirect task result for TID %s".format(tid))
               scheduler.handleTaskGettingResult(taskSetManager, tid)
               val serializedTaskResult = sparkEnv.blockManager.getRemoteBytes(blockId)
@@ -62,15 +73,17 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
               val deserializedResult = serializer.get().deserialize[DirectTaskResult[_]](
                 serializedTaskResult.get)
               sparkEnv.blockManager.master.removeBlock(blockId)
-              deserializedResult
+              (deserializedResult, size)
           }
-          result.metrics.resultSize = serializedData.limit()
+
+          result.metrics.resultSize = size
           scheduler.handleSuccessfulTask(taskSetManager, tid, result)
         } catch {
           case cnf: ClassNotFoundException =>
             val loader = Thread.currentThread.getContextClassLoader
             taskSetManager.abort("ClassNotFound with classloader: " + loader)
-          case ex: Exception =>
+          // Matching NonFatal so we don't catch the ControlThrowable from the "return" above.
+          case NonFatal(ex) =>
             logError("Exception while getting task result", ex)
             taskSetManager.abort("Exception while getting task result: %s".format(ex))
         }
@@ -90,7 +103,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
           }
         } catch {
           case cnd: ClassNotFoundException =>
-            // Log an error but keep going here -- the task failed, so not catastropic if we can't
+            // Log an error but keep going here -- the task failed, so not catastrophic if we can't
             // deserialize the reason.
             val loader = Utils.getContextOrSparkClassLoader
             logError(
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index 819c35257b5a7..f095915352b17 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -18,10 +18,12 @@
 package org.apache.spark.scheduler
 
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.storage.BlockManagerId
 
 /**
  * Low-level task scheduler interface, currently implemented exclusively by TaskSchedulerImpl.
- * This interface allows plugging in different task schedulers. Each TaskScheduler schedulers tasks
+ * This interface allows plugging in different task schedulers. Each TaskScheduler schedules tasks
  * for a single SparkContext. These schedulers get sets of tasks submitted to them from the
  * DAGScheduler for each stage, and are responsible for sending the tasks to the cluster, running
  * them, retrying if there are failures, and mitigating stragglers. They return events to the
@@ -29,6 +31,8 @@ import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
  */
 private[spark] trait TaskScheduler {
 
+  private val appId = "spark-application-" + System.currentTimeMillis
+
   def rootPool: Pool
 
   def schedulingMode: SchedulingMode
@@ -37,7 +41,7 @@ private[spark] trait TaskScheduler {
 
   // Invoked after system has successfully initialized (typically in spark context).
   // Yarn uses this to bootstrap allocation of resources based on preferred locations,
-  // wait for slave registerations, etc.
+  // wait for slave registrations, etc.
   def postStartHook() { }
 
   // Disconnect from the cluster.
@@ -54,4 +58,20 @@ private[spark] trait TaskScheduler {
 
   // Get the default level of parallelism to use in the cluster, as a hint for sizing jobs.
   def defaultParallelism(): Int
+
+  /**
+   * Update metrics for in-progress tasks and let the master know that the BlockManager is still
+   * alive. Return true if the driver knows about the given block manager. Otherwise, return false,
+   * indicating that the block manager should re-register.
+   */
+  def executorHeartbeatReceived(execId: String, taskMetrics: Array[(Long, TaskMetrics)],
+    blockManagerId: BlockManagerId): Boolean
+
+  /**
+   * Get an application ID associated with the job.
+   *
+   * @return An application ID
+   */
+  def applicationId(): String = appId
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 5ed2803d76afc..cd3c015321e85 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -32,6 +32,8 @@ import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.util.Utils
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.storage.BlockManagerId
 
 /**
  * Schedules tasks for multiple types of clusters by acting through a SchedulerBackend.
@@ -86,9 +88,11 @@ private[spark] class TaskSchedulerImpl(
 
   // The set of executors we have on each host; this is used to compute hostsAlive, which
   // in turn is used to decide when we can attain data locality on a given host
-  private val executorsByHost = new HashMap[String, HashSet[String]]
+  protected val executorsByHost = new HashMap[String, HashSet[String]]
 
-  private val executorIdToHost = new HashMap[String, String]
+  protected val hostsByRack = new HashMap[String, HashSet[String]]
+
+  protected val executorIdToHost = new HashMap[String, String]
 
   // Listener object to pass upcalls into
   var dagScheduler: DAGScheduler = null
@@ -145,6 +149,10 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
+  override def postStartHook() {
+    waitBackendReady()
+  }
+
   override def submitTasks(taskSet: TaskSet) {
     val tasks = taskSet.tasks
     logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")
@@ -207,18 +215,20 @@ private[spark] class TaskSchedulerImpl(
    * that tasks are balanced across the cluster.
    */
   def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = synchronized {
-    SparkEnv.set(sc.env)
-
     // Mark each slave as alive and remember its hostname
     // Also track if new executor is added
     var newExecAvail = false
     for (o <- offers) {
       executorIdToHost(o.executorId) = o.host
+      activeExecutorIds += o.executorId
       if (!executorsByHost.contains(o.host)) {
         executorsByHost(o.host) = new HashSet[String]()
         executorAdded(o.executorId, o.host)
         newExecAvail = true
       }
+      for (rack <- getRackForHost(o.host)) {
+        hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += o.host
+      }
     }
 
     // Randomly shuffle offers to avoid always placing tasks on the same set of workers.
@@ -237,6 +247,7 @@ private[spark] class TaskSchedulerImpl(
 
     // Take each TaskSet in our scheduling order, and then offer it each node in increasing order
     // of locality levels so that it gets a chance to launch local tasks on all of them.
+    // NOTE: the preferredLocality order: PROCESS_LOCAL, NODE_LOCAL, NO_PREF, RACK_LOCAL, ANY
     var launchedTask = false
     for (taskSet <- sortedTaskSets; maxLocality <- taskSet.myLocalityLevels) {
       do {
@@ -250,10 +261,9 @@ private[spark] class TaskSchedulerImpl(
               val tid = task.taskId
               taskIdToTaskSetId(tid) = taskSet.taskSet.id
               taskIdToExecutorId(tid) = execId
-              activeExecutorIds += execId
               executorsByHost(host) += execId
               availableCpus(i) -= CPUS_PER_TASK
-              assert (availableCpus(i) >= 0)
+              assert(availableCpus(i) >= 0)
               launchedTask = true
             }
           }
@@ -311,6 +321,26 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
+  /**
+   * Update metrics for in-progress tasks and let the master know that the BlockManager is still
+   * alive. Return true if the driver knows about the given block manager. Otherwise, return false,
+   * indicating that the block manager should re-register.
+   */
+  override def executorHeartbeatReceived(
+      execId: String,
+      taskMetrics: Array[(Long, TaskMetrics)], // taskId -> TaskMetrics
+      blockManagerId: BlockManagerId): Boolean = {
+
+    val metricsWithStageIds: Array[(Long, Int, Int, TaskMetrics)] = synchronized {
+      taskMetrics.flatMap { case (id, metrics) =>
+        taskIdToTaskSetId.get(id)
+          .flatMap(activeTaskSets.get)
+          .map(taskSetMgr => (id, taskSetMgr.stageId, taskSetMgr.taskSet.attempt, metrics))
+      }
+    }
+    dagScheduler.executorHeartbeatReceived(execId, metricsWithStageIds, blockManagerId)
+  }
+
   def handleTaskGettingResult(taskSetManager: TaskSetManager, tid: Long) {
     taskSetManager.handleTaskGettingResult(tid)
   }
@@ -414,6 +444,12 @@ private[spark] class TaskSchedulerImpl(
     execs -= executorId
     if (execs.isEmpty) {
       executorsByHost -= host
+      for (rack <- getRackForHost(host); hosts <- hostsByRack.get(rack)) {
+        hosts -= host
+        if (hosts.isEmpty) {
+          hostsByRack -= rack
+        }
+      }
     }
     executorIdToHost -= executorId
     rootPool.executorLost(executorId, host)
@@ -431,12 +467,30 @@ private[spark] class TaskSchedulerImpl(
     executorsByHost.contains(host)
   }
 
+  def hasHostAliveOnRack(rack: String): Boolean = synchronized {
+    hostsByRack.contains(rack)
+  }
+
   def isExecutorAlive(execId: String): Boolean = synchronized {
     activeExecutorIds.contains(execId)
   }
 
   // By default, rack is unknown
   def getRackForHost(value: String): Option[String] = None
+
+  private def waitBackendReady(): Unit = {
+    if (backend.isReady) {
+      return
+    }
+    while (!backend.isReady) {
+      synchronized {
+        this.wait(100)
+      }
+    }
+  }
+
+  override def applicationId(): String = backend.applicationId()
+
 }
 
 
@@ -481,4 +535,5 @@ private[spark] object TaskSchedulerImpl {
 
     retval.toList
   }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
index 613fa7850bb25..c3ad325156f53 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
@@ -31,9 +31,5 @@ private[spark] class TaskSet(
     val properties: Properties) {
     val id: String = stageId + "." + attempt
 
-  def kill(interruptThread: Boolean) {
-    tasks.foreach(_.kill(interruptThread))
-  }
-
   override def toString: String = "TaskSet " + id
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 059cc9085a2e7..d8fb640350343 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -23,14 +23,12 @@ import java.util.Arrays
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
 import scala.collection.mutable.HashSet
-import scala.math.max
-import scala.math.min
+import scala.math.{min, max}
 
-import org.apache.spark.{ExceptionFailure, ExecutorLostFailure, FetchFailed, Logging, Resubmitted,
-  SparkEnv, Success, TaskEndReason, TaskKilled, TaskResultLost, TaskState}
-import org.apache.spark.TaskState.TaskState
+import org.apache.spark._
 import org.apache.spark.executor.TaskMetrics
-import org.apache.spark.util.{Clock, SystemClock}
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
  * Schedules the tasks within a single TaskSet in the TaskSchedulerImpl. This class keeps track of
@@ -52,8 +50,8 @@ private[spark] class TaskSetManager(
     val taskSet: TaskSet,
     val maxTaskFailures: Int,
     clock: Clock = SystemClock)
-  extends Schedulable with Logging
-{
+  extends Schedulable with Logging {
+
   val conf = sched.sc.conf
 
   /*
@@ -69,6 +67,9 @@ private[spark] class TaskSetManager(
   val SPECULATION_QUANTILE = conf.getDouble("spark.speculation.quantile", 0.75)
   val SPECULATION_MULTIPLIER = conf.getDouble("spark.speculation.multiplier", 1.5)
 
+  // Limit of bytes for total size of results (default is 1GB)
+  val maxResultSize = Utils.getMaxResultSize(conf)
+
   // Serializer for closures and tasks.
   val env = SparkEnv.get
   val ser = env.closureSerializer.newInstance()
@@ -80,6 +81,7 @@ private[spark] class TaskSetManager(
   private val numFailures = new Array[Int](numTasks)
   // key is taskId, value is a Map of executor id to when it failed
   private val failedExecutors = new HashMap[Int, HashMap[String, Long]]()
+
   val taskAttempts = Array.fill[List[TaskInfo]](numTasks)(Nil)
   var tasksSuccessful = 0
 
@@ -89,6 +91,8 @@ private[spark] class TaskSetManager(
   var stageId = taskSet.stageId
   var name = "TaskSet_" + taskSet.stageId.toString
   var parent: Pool = null
+  var totalResultSize = 0L
+  var calculatedTasks = 0
 
   val runningTasksSet = new HashSet[Long]
   override def runningTasks = runningTasksSet.size
@@ -180,24 +184,33 @@ private[spark] class TaskSetManager(
       }
     }
 
-    var hadAliveLocations = false
     for (loc <- tasks(index).preferredLocations) {
-      for (execId <- loc.executorId) {
-        addTo(pendingTasksForExecutor.getOrElseUpdate(execId, new ArrayBuffer))
-      }
-      if (sched.hasExecutorsAliveOnHost(loc.host)) {
-        hadAliveLocations = true
+      loc match {
+        case e: ExecutorCacheTaskLocation =>
+          addTo(pendingTasksForExecutor.getOrElseUpdate(e.executorId, new ArrayBuffer))
+        case e: HDFSCacheTaskLocation => {
+          val exe = sched.getExecutorsAliveOnHost(loc.host)
+          exe match {
+            case Some(set) => {
+              for (e <- set) {
+                addTo(pendingTasksForExecutor.getOrElseUpdate(e, new ArrayBuffer))
+              }
+              logInfo(s"Pending task $index has a cached location at ${e.host} " +
+                ", where there are executors " + set.mkString(","))
+            }
+            case None => logDebug(s"Pending task $index has a cached location at ${e.host} " +
+                ", but there are no executors alive there.")
+          }
+        }
+        case _ => Unit
       }
       addTo(pendingTasksForHost.getOrElseUpdate(loc.host, new ArrayBuffer))
       for (rack <- sched.getRackForHost(loc.host)) {
         addTo(pendingTasksForRack.getOrElseUpdate(rack, new ArrayBuffer))
-        hadAliveLocations = true
       }
     }
 
-    if (!hadAliveLocations) {
-      // Even though the task might've had preferred locations, all of those hosts or executors
-      // are dead; put it in the no-prefs list so we can schedule it elsewhere right away.
+    if (tasks(index).preferredLocations == Nil) {
       addTo(pendingTasksWithNoPrefs)
     }
 
@@ -238,7 +251,6 @@ private[spark] class TaskSetManager(
    */
   private def findTaskFromList(execId: String, list: ArrayBuffer[Int]): Option[Int] = {
     var indexOffset = list.size
-
     while (indexOffset > 0) {
       indexOffset -= 1
       val index = list(indexOffset)
@@ -287,12 +299,15 @@ private[spark] class TaskSetManager(
       !hasAttemptOnHost(index, host) && !executorIsBlacklisted(execId, index)
 
     if (!speculatableTasks.isEmpty) {
-      // Check for process-local or preference-less tasks; note that tasks can be process-local
+      // Check for process-local tasks; note that tasks can be process-local
       // on multiple nodes when we replicate cached blocks, as in Spark Streaming
       for (index <- speculatableTasks if canRunOnHost(index)) {
         val prefs = tasks(index).preferredLocations
-        val executors = prefs.flatMap(_.executorId)
-        if (prefs.size == 0 || executors.contains(execId)) {
+        val executors = prefs.flatMap(_ match {
+          case e: ExecutorCacheTaskLocation => Some(e.executorId)
+          case _ => None
+        });
+        if (executors.contains(execId)) {
           speculatableTasks -= index
           return Some((index, TaskLocality.PROCESS_LOCAL))
         }
@@ -309,6 +324,17 @@ private[spark] class TaskSetManager(
         }
       }
 
+      // Check for no-preference tasks
+      if (TaskLocality.isAllowed(locality, TaskLocality.NO_PREF)) {
+        for (index <- speculatableTasks if canRunOnHost(index)) {
+          val locations = tasks(index).preferredLocations
+          if (locations.size == 0) {
+            speculatableTasks -= index
+            return Some((index, TaskLocality.PROCESS_LOCAL))
+          }
+        }
+      }
+
       // Check for rack-local tasks
       if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
         for (rack <- sched.getRackForHost(host)) {
@@ -340,20 +366,27 @@ private[spark] class TaskSetManager(
    *
    * @return An option containing (task index within the task set, locality, is speculative?)
    */
-  private def findTask(execId: String, host: String, locality: TaskLocality.Value)
+  private def findTask(execId: String, host: String, maxLocality: TaskLocality.Value)
     : Option[(Int, TaskLocality.Value, Boolean)] =
   {
     for (index <- findTaskFromList(execId, getPendingTasksForExecutor(execId))) {
       return Some((index, TaskLocality.PROCESS_LOCAL, false))
     }
 
-    if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) {
+    if (TaskLocality.isAllowed(maxLocality, TaskLocality.NODE_LOCAL)) {
       for (index <- findTaskFromList(execId, getPendingTasksForHost(host))) {
         return Some((index, TaskLocality.NODE_LOCAL, false))
       }
     }
 
-    if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
+    if (TaskLocality.isAllowed(maxLocality, TaskLocality.NO_PREF)) {
+      // Look for noPref tasks after NODE_LOCAL for minimize cross-rack traffic
+      for (index <- findTaskFromList(execId, pendingTasksWithNoPrefs)) {
+        return Some((index, TaskLocality.PROCESS_LOCAL, false))
+      }
+    }
+
+    if (TaskLocality.isAllowed(maxLocality, TaskLocality.RACK_LOCAL)) {
       for {
         rack <- sched.getRackForHost(host)
         index <- findTaskFromList(execId, getPendingTasksForRack(rack))
@@ -362,25 +395,27 @@ private[spark] class TaskSetManager(
       }
     }
 
-    // Look for no-pref tasks after rack-local tasks since they can run anywhere.
-    for (index <- findTaskFromList(execId, pendingTasksWithNoPrefs)) {
-      return Some((index, TaskLocality.PROCESS_LOCAL, false))
-    }
-
-    if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
+    if (TaskLocality.isAllowed(maxLocality, TaskLocality.ANY)) {
       for (index <- findTaskFromList(execId, allPendingTasks)) {
         return Some((index, TaskLocality.ANY, false))
       }
     }
 
-    // Finally, if all else has failed, find a speculative task
-    findSpeculativeTask(execId, host, locality).map { case (taskIndex, allowedLocality) =>
-      (taskIndex, allowedLocality, true)
-    }
+    // find a speculative task if all others tasks have been scheduled
+    findSpeculativeTask(execId, host, maxLocality).map {
+      case (taskIndex, allowedLocality) => (taskIndex, allowedLocality, true)}
   }
 
   /**
    * Respond to an offer of a single executor from the scheduler by finding a task
+   *
+   * NOTE: this function is either called with a maxLocality which
+   * would be adjusted by delay scheduling algorithm or it will be with a special
+   * NO_PREF locality which will be not modified
+   *
+   * @param execId the executor Id of the offered resource
+   * @param host  the host Id of the offered resource
+   * @param maxLocality the maximum locality we want to schedule the tasks at
    */
   def resourceOffer(
       execId: String,
@@ -391,9 +426,14 @@ private[spark] class TaskSetManager(
     if (!isZombie) {
       val curTime = clock.getTime()
 
-      var allowedLocality = getAllowedLocalityLevel(curTime)
-      if (allowedLocality > maxLocality) {
-        allowedLocality = maxLocality   // We're not allowed to search for farther-away tasks
+      var allowedLocality = maxLocality
+
+      if (maxLocality != TaskLocality.NO_PREF) {
+        allowedLocality = getAllowedLocalityLevel(curTime)
+        if (allowedLocality > maxLocality) {
+          // We're not allowed to search for farther-away tasks
+          allowedLocality = maxLocality
+        }
       }
 
       findTask(execId, host, allowedLocality) match {
@@ -401,19 +441,19 @@ private[spark] class TaskSetManager(
           // Found a task; do some bookkeeping and return a task description
           val task = tasks(index)
           val taskId = sched.newTaskId()
-          // Figure out whether this should count as a preferred launch
-          logInfo("Starting task %s:%d as TID %s on executor %s: %s (%s)".format(
-            taskSet.id, index, taskId, execId, host, taskLocality))
           // Do various bookkeeping
           copiesRunning(index) += 1
           val attemptNum = taskAttempts(index).size
-          val info = new TaskInfo(
-            taskId, index, attemptNum + 1, curTime, execId, host, taskLocality, speculative)
+          val info = new TaskInfo(taskId, index, attemptNum, curTime,
+            execId, host, taskLocality, speculative)
           taskInfos(taskId) = info
           taskAttempts(index) = info :: taskAttempts(index)
           // Update our locality level for delay scheduling
-          currentLocalityIndex = getLocalityIndex(taskLocality)
-          lastLaunchTime = curTime
+          // NO_PREF will not affect the variables related to delay scheduling
+          if (maxLocality != TaskLocality.NO_PREF) {
+            currentLocalityIndex = getLocalityIndex(taskLocality)
+            lastLaunchTime = curTime
+          }
           // Serialize and return the task
           val startTime = clock.getTime()
           // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here
@@ -427,11 +467,15 @@ private[spark] class TaskSetManager(
               s"(${serializedTask.limit / 1024} KB). The maximum recommended task size is " +
               s"${TaskSetManager.TASK_SIZE_TO_WARN_KB} KB.")
           }
-          val timeTaken = clock.getTime() - startTime
           addRunningTask(taskId)
-          logInfo("Serialized task %s:%d as %d bytes in %d ms".format(
-            taskSet.id, index, serializedTask.limit, timeTaken))
-          val taskName = "task %s:%d".format(taskSet.id, index)
+
+          // We used to log the time it takes to serialize the task, but task size is already
+          // a good proxy to task serialization time.
+          // val timeTaken = clock.getTime() - startTime
+          val taskName = s"task ${info.id} in stage ${taskSet.id}"
+          logInfo("Starting %s (TID %d, %s, %s, %d bytes)".format(
+              taskName, taskId, host, taskLocality, serializedTask.limit))
+
           sched.dagScheduler.taskStarted(task, info)
           return Some(new TaskDescription(taskId, execId, taskName, index, serializedTask))
         }
@@ -475,12 +519,33 @@ private[spark] class TaskSetManager(
     index
   }
 
+  /**
+   * Marks the task as getting result and notifies the DAG Scheduler
+   */
   def handleTaskGettingResult(tid: Long) = {
     val info = taskInfos(tid)
     info.markGettingResult()
     sched.dagScheduler.taskGettingResult(info)
   }
 
+  /**
+   * Check whether has enough quota to fetch the result with `size` bytes
+   */
+  def canFetchMoreResults(size: Long): Boolean = synchronized {
+    totalResultSize += size
+    calculatedTasks += 1
+    if (maxResultSize > 0 && totalResultSize > maxResultSize) {
+      val msg = s"Total size of serialized results of ${calculatedTasks} tasks " +
+        s"(${Utils.bytesToString(totalResultSize)}) is bigger than maxResultSize " +
+        s"(${Utils.bytesToString(maxResultSize)})"
+      logError(msg)
+      abort(msg)
+      false
+    } else {
+      true
+    }
+  }
+
   /**
    * Marks the task as successful and notifies the DAGScheduler that a task has ended.
    */
@@ -490,19 +555,19 @@ private[spark] class TaskSetManager(
     info.markSuccessful()
     removeRunningTask(tid)
     sched.dagScheduler.taskEnded(
-      tasks(index), Success, result.value, result.accumUpdates, info, result.metrics)
+      tasks(index), Success, result.value(), result.accumUpdates, info, result.metrics)
     if (!successful(index)) {
       tasksSuccessful += 1
-      logInfo("Finished TID %s in %d ms on %s (progress: %d/%d)".format(
-        tid, info.duration, info.host, tasksSuccessful, numTasks))
+      logInfo("Finished task %s in stage %s (TID %d) in %d ms on %s (%d/%d)".format(
+        info.id, taskSet.id, info.taskId, info.duration, info.host, tasksSuccessful, numTasks))
       // Mark successful and stop if all the tasks have succeeded.
       successful(index) = true
       if (tasksSuccessful == numTasks) {
         isZombie = true
       }
     } else {
-      logInfo("Ignorning task-finished event for TID " + tid + " because task " +
-        index + " has already completed successfully")
+      logInfo("Ignoring task-finished event for " + info.id + " in stage " + taskSet.id +
+        " because task " + index + " has already completed successfully")
     }
     failedExecutors.remove(index)
     maybeFinishTaskSet()
@@ -521,14 +586,13 @@ private[spark] class TaskSetManager(
     info.markFailed()
     val index = info.index
     copiesRunning(index) -= 1
-    if (!isZombie) {
-      logWarning("Lost TID %s (task %s:%d)".format(tid, taskSet.id, index))
-    }
     var taskMetrics : TaskMetrics = null
-    var failureReason: String = null
+
+    val failureReason = s"Lost task ${info.id} in stage ${taskSet.id} (TID $tid, ${info.host}): " +
+      reason.asInstanceOf[TaskFailedReason].toErrorString
     reason match {
       case fetchFailed: FetchFailed =>
-        logWarning("Loss was due to fetch failure from " + fetchFailed.bmAddress)
+        logWarning(failureReason)
         if (!successful(index)) {
           successful(index) = true
           tasksSuccessful += 1
@@ -536,23 +600,17 @@ private[spark] class TaskSetManager(
         // Not adding to failed executors for FetchFailed.
         isZombie = true
 
-      case TaskKilled =>
-        // Not adding to failed executors for TaskKilled.
-        logWarning("Task %d was killed.".format(tid))
-
       case ef: ExceptionFailure =>
-        taskMetrics = ef.metrics.getOrElse(null)
-        if (ef.className == classOf[NotSerializableException].getName()) {
+        taskMetrics = ef.metrics.orNull
+        if (ef.className == classOf[NotSerializableException].getName) {
           // If the task result wasn't serializable, there's no point in trying to re-execute it.
-          logError("Task %s:%s had a not serializable result: %s; not retrying".format(
-            taskSet.id, index, ef.description))
-          abort("Task %s:%s had a not serializable result: %s".format(
-            taskSet.id, index, ef.description))
+          logError("Task %s in stage %s (TID %d) had a not serializable result: %s; not retrying"
+            .format(info.id, taskSet.id, tid, ef.description))
+          abort("Task %s in stage %s (TID %d) had a not serializable result: %s".format(
+            info.id, taskSet.id, tid, ef.description))
           return
         }
         val key = ef.description
-        failureReason = "Exception failure in TID %s on host %s: %s\n%s".format(
-          tid, info.host, ef.description, ef.stackTrace.map("        " + _).mkString("\n"))
         val now = clock.getTime()
         val (printFull, dupCount) = {
           if (recentExceptions.contains(key)) {
@@ -570,19 +628,18 @@ private[spark] class TaskSetManager(
           }
         }
         if (printFull) {
-          val locs = ef.stackTrace.map(loc => "\tat %s".format(loc.toString))
-          logWarning("Loss was due to %s\n%s\n%s".format(
-            ef.className, ef.description, locs.mkString("\n")))
+          logWarning(failureReason)
         } else {
-          logInfo("Loss was due to %s [duplicate %d]".format(ef.description, dupCount))
+          logInfo(
+            s"Lost task ${info.id} in stage ${taskSet.id} (TID $tid) on executor ${info.host}: " +
+            s"${ef.className} (${ef.description}) [duplicate $dupCount]")
         }
 
-      case TaskResultLost =>
-        failureReason = "Lost result for TID %s on host %s".format(tid, info.host)
+      case e: TaskFailedReason =>  // TaskResultLost, TaskKilled, and others
         logWarning(failureReason)
 
-      case _ =>
-        failureReason = "TID %s on host %s failed for unknown reason".format(tid, info.host)
+      case e: TaskEndReason =>
+        logError("Unknown TaskEndReason: " + e)
     }
     // always add to failed executors
     failedExecutors.getOrElseUpdate(index, new HashMap[String, Long]()).
@@ -593,10 +650,10 @@ private[spark] class TaskSetManager(
       assert (null != failureReason)
       numFailures(index) += 1
       if (numFailures(index) >= maxTaskFailures) {
-        logError("Task %s:%d failed %d times; aborting job".format(
-          taskSet.id, index, maxTaskFailures))
-        abort("Task %s:%d failed %d times, most recent failure: %s\nDriver stacktrace:".format(
-          taskSet.id, index, maxTaskFailures, failureReason))
+        logError("Task %d in stage %s failed %d times; aborting job".format(
+          index, taskSet.id, maxTaskFailures))
+        abort("Task %d in stage %s failed %d times, most recent failure: %s\nDriver stacktrace:"
+          .format(index, taskSet.id, maxTaskFailures, failureReason))
         return
       }
     }
@@ -645,8 +702,7 @@ private[spark] class TaskSetManager(
   override def executorLost(execId: String, host: String) {
     logInfo("Re-queueing tasks for " + execId + " from TaskSet " + taskSet.id)
 
-    // Re-enqueue pending tasks for this host based on the status of the cluster -- for example, a
-    // task that used to have locations on only this host might now go to the no-prefs list. Note
+    // Re-enqueue pending tasks for this host based on the status of the cluster. Note
     // that it's okay if we add a task to the same queue twice (if it had multiple preferred
     // locations), because findTaskFromList will skip already-running tasks.
     for (index <- getPendingTasksForExecutor(execId)) {
@@ -656,10 +712,11 @@ private[spark] class TaskSetManager(
       addPendingTask(index, readding=true)
     }
 
-    // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage.
+    // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage,
+    // and we are not using an external shuffle server which could serve the shuffle outputs.
     // The reason is the next stage wouldn't be able to fetch the data from this dead executor
     // so we would need to rerun these tasks on other executors.
-    if (tasks(0).isInstanceOf[ShuffleMapTask]) {
+    if (tasks(0).isInstanceOf[ShuffleMapTask] && !env.blockManager.externalShuffleServiceEnabled) {
       for ((tid, info) <- taskInfos if info.executorId == execId) {
         val index = taskInfos(tid).index
         if (successful(index)) {
@@ -675,8 +732,10 @@ private[spark] class TaskSetManager(
     }
     // Also re-enqueue any tasks that were running on the node
     for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
-      handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure)
+      handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure(execId))
     }
+    // recalculate valid locality levels and waits when executor is lost
+    recomputeLocality()
   }
 
   /**
@@ -709,8 +768,8 @@ private[spark] class TaskSetManager(
         if (!successful(index) && copiesRunning(index) == 1 && info.timeRunning(time) > threshold &&
           !speculatableTasks.contains(index)) {
           logInfo(
-            "Marking task %s:%d (on %s) as speculatable because it ran more than %.0f ms".format(
-              taskSet.id, index, info.host, threshold))
+            "Marking task %d in stage %s (on %s) as speculatable because it ran more than %.0f ms"
+              .format(index, taskSet.id, info.host, threshold))
           speculatableTasks += index
           foundTasks = true
         }
@@ -728,17 +787,17 @@ private[spark] class TaskSetManager(
         conf.get("spark.locality.wait.node", defaultWait).toLong
       case TaskLocality.RACK_LOCAL =>
         conf.get("spark.locality.wait.rack", defaultWait).toLong
-      case TaskLocality.ANY =>
-        0L
+      case _ => 0L
     }
   }
 
   /**
    * Compute the locality levels used in this TaskSet. Assumes that all tasks have already been
    * added to queues using addPendingTask.
+   *
    */
   private def computeValidLocalityLevels(): Array[TaskLocality.TaskLocality] = {
-    import TaskLocality.{PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY}
+    import TaskLocality.{PROCESS_LOCAL, NODE_LOCAL, NO_PREF, RACK_LOCAL, ANY}
     val levels = new ArrayBuffer[TaskLocality.TaskLocality]
     if (!pendingTasksForExecutor.isEmpty && getLocalityWait(PROCESS_LOCAL) != 0 &&
         pendingTasksForExecutor.keySet.exists(sched.isExecutorAlive(_))) {
@@ -748,7 +807,11 @@ private[spark] class TaskSetManager(
         pendingTasksForHost.keySet.exists(sched.hasExecutorsAliveOnHost(_))) {
       levels += NODE_LOCAL
     }
-    if (!pendingTasksForRack.isEmpty && getLocalityWait(RACK_LOCAL) != 0) {
+    if (!pendingTasksWithNoPrefs.isEmpty) {
+      levels += NO_PREF
+    }
+    if (!pendingTasksForRack.isEmpty && getLocalityWait(RACK_LOCAL) != 0 &&
+        pendingTasksForRack.keySet.exists(sched.hasHostAliveOnRack(_))) {
       levels += RACK_LOCAL
     }
     levels += ANY
@@ -756,21 +819,15 @@ private[spark] class TaskSetManager(
     levels.toArray
   }
 
-  // Re-compute pendingTasksWithNoPrefs since new preferred locations may become available
-  def executorAdded() {
-    def newLocAvail(index: Int): Boolean = {
-      for (loc <- tasks(index).preferredLocations) {
-        if (sched.hasExecutorsAliveOnHost(loc.host) ||
-            sched.getRackForHost(loc.host).isDefined) {
-          return true
-        }
-      }
-      false
-    }
-    logInfo("Re-computing pending task lists.")
-    pendingTasksWithNoPrefs = pendingTasksWithNoPrefs.filter(!newLocAvail(_))
+  def recomputeLocality() {
+    val previousLocalityLevel = myLocalityLevels(currentLocalityIndex)
     myLocalityLevels = computeValidLocalityLevels()
     localityWaits = myLocalityLevels.map(getLocalityWait)
+    currentLocalityIndex = getLocalityIndex(previousLocalityLevel)
+  }
+
+  def executorAdded() {
+    recomputeLocality()
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index 318e16552201c..1da6fe976da5b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -66,4 +66,19 @@ private[spark] object CoarseGrainedClusterMessages {
 
   case class RemoveExecutor(executorId: String, reason: String) extends CoarseGrainedClusterMessage
 
+  // Exchanged between the driver and the AM in Yarn client mode
+  case class AddWebUIFilter(filterName:String, filterParams: Map[String, String], proxyBase: String)
+    extends CoarseGrainedClusterMessage
+
+  // Messages exchanged between the driver and the cluster manager for executor allocation
+  // In Yarn mode, these are exchanged between the driver and the AM
+
+  case object RegisterClusterManager extends CoarseGrainedClusterMessage
+
+  // Request executors by specifying the new total number of executors desired
+  // This includes executors already pending or running
+  case class RequestExecutors(requestedTotal: Int) extends CoarseGrainedClusterMessage
+
+  case class KillExecutors(executorIds: Seq[String]) extends CoarseGrainedClusterMessage
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 05d01b0c821f9..7a6ee56f81689 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -30,7 +30,7 @@ import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
 import org.apache.spark.{SparkEnv, Logging, SparkException, TaskState}
 import org.apache.spark.scheduler.{SchedulerBackend, SlaveLost, TaskDescription, TaskSchedulerImpl, WorkerOffer}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
-import org.apache.spark.util.{SerializableBuffer, AkkaUtils, Utils}
+import org.apache.spark.util.{ActorLogReceive, SerializableBuffer, AkkaUtils, Utils}
 
 /**
  * A scheduler backend that waits for coarse grained executors to connect to it through Akka.
@@ -41,21 +41,35 @@ import org.apache.spark.util.{SerializableBuffer, AkkaUtils, Utils}
  * (spark.deploy.*).
  */
 private[spark]
-class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: ActorSystem)
+class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val actorSystem: ActorSystem)
   extends SchedulerBackend with Logging
 {
   // Use an atomic variable to track total number of cores in the cluster for simplicity and speed
   var totalCoreCount = new AtomicInteger(0)
+  var totalRegisteredExecutors = new AtomicInteger(0)
   val conf = scheduler.sc.conf
   private val timeout = AkkaUtils.askTimeout(conf)
   private val akkaFrameSize = AkkaUtils.maxFrameSizeBytes(conf)
+  // Submit tasks only after (registered resources / total expected resources)
+  // is equal to at least this value, that is double between 0 and 1.
+  var minRegisteredRatio =
+    math.min(1, conf.getDouble("spark.scheduler.minRegisteredResourcesRatio", 0))
+  // Submit tasks after maxRegisteredWaitingTime milliseconds
+  // if minRegisteredRatio has not yet been reached
+  val maxRegisteredWaitingTime =
+    conf.getInt("spark.scheduler.maxRegisteredResourcesWaitingTime", 30000)
+  val createTime = System.currentTimeMillis()
 
-  class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor {
-    private val executorActor = new HashMap[String, ActorRef]
-    private val executorAddress = new HashMap[String, Address]
-    private val executorHost = new HashMap[String, String]
-    private val freeCores = new HashMap[String, Int]
-    private val totalCores = new HashMap[String, Int]
+  private val executorDataMap = new HashMap[String, ExecutorData]
+
+  // Number of executors requested from the cluster manager that have not registered yet
+  private var numPendingExecutors = 0
+
+  // Executors we have requested the cluster manager to kill that have not died yet
+  private val executorsPendingToRemove = new HashSet[String]
+
+  class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor with ActorLogReceive {
+    override protected def log = CoarseGrainedSchedulerBackend.this.log
     private val addressToExecutorId = new HashMap[Address, String]
 
     override def preStart() {
@@ -68,34 +82,43 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
       context.system.scheduler.schedule(0.millis, reviveInterval.millis, self, ReviveOffers)
     }
 
-    def receive = {
+    def receiveWithLogging = {
       case RegisterExecutor(executorId, hostPort, cores) =>
         Utils.checkHostPort(hostPort, "Host port expected " + hostPort)
-        if (executorActor.contains(executorId)) {
+        if (executorDataMap.contains(executorId)) {
           sender ! RegisterExecutorFailed("Duplicate executor ID: " + executorId)
         } else {
           logInfo("Registered executor: " + sender + " with ID " + executorId)
           sender ! RegisteredExecutor
-          executorActor(executorId) = sender
-          executorHost(executorId) = Utils.parseHostPort(hostPort)._1
-          totalCores(executorId) = cores
-          freeCores(executorId) = cores
-          executorAddress(executorId) = sender.path.address
+
           addressToExecutorId(sender.path.address) = executorId
           totalCoreCount.addAndGet(cores)
+          totalRegisteredExecutors.addAndGet(1)
+          val (host, _) = Utils.parseHostPort(hostPort)
+          val data = new ExecutorData(sender, sender.path.address, host, cores, cores)
+          // This must be synchronized because variables mutated
+          // in this block are read when requesting executors
+          CoarseGrainedSchedulerBackend.this.synchronized {
+            executorDataMap.put(executorId, data)
+            if (numPendingExecutors > 0) {
+              numPendingExecutors -= 1
+              logDebug(s"Decremented number of pending executors ($numPendingExecutors left)")
+            }
+          }
           makeOffers()
         }
 
       case StatusUpdate(executorId, taskId, state, data) =>
         scheduler.statusUpdate(taskId, state, data.value)
         if (TaskState.isFinished(state)) {
-          if (executorActor.contains(executorId)) {
-            freeCores(executorId) += scheduler.CPUS_PER_TASK
-            makeOffers(executorId)
-          } else {
-            // Ignoring the update since we don't know about the executor.
-            val msg = "Ignored task status update (%d state %s) from unknown executor %s with ID %s"
-            logWarning(msg.format(taskId, state, sender, executorId))
+          executorDataMap.get(executorId) match {
+            case Some(executorInfo) =>
+              executorInfo.freeCores += scheduler.CPUS_PER_TASK
+              makeOffers(executorId)
+            case None =>
+              // Ignoring the update since we don't know about the executor.
+              logWarning(s"Ignored task status update ($taskId state $state) " +
+                "from unknown executor $sender with ID $executorId")
           }
         }
 
@@ -103,7 +126,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
         makeOffers()
 
       case KillTask(taskId, executorId, interruptThread) =>
-        executorActor(executorId) ! KillTask(taskId, executorId, interruptThread)
+        executorDataMap(executorId).executorActor ! KillTask(taskId, executorId, interruptThread)
 
       case StopDriver =>
         sender ! true
@@ -111,8 +134,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
 
       case StopExecutors =>
         logInfo("Asking each executor to shut down")
-        for (executor <- executorActor.values) {
-          executor ! StopExecutor
+        for ((_, executorData) <- executorDataMap) {
+          executorData.executorActor ! StopExecutor
         }
         sender ! true
 
@@ -130,14 +153,16 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
 
     // Make fake resource offers on all executors
     def makeOffers() {
-      launchTasks(scheduler.resourceOffers(
-        executorHost.toArray.map {case (id, host) => new WorkerOffer(id, host, freeCores(id))}))
+      launchTasks(scheduler.resourceOffers(executorDataMap.map { case (id, executorData) =>
+        new WorkerOffer(id, executorData.executorHost, executorData.freeCores)
+      }.toSeq))
     }
 
     // Make fake resource offers on just one executor
     def makeOffers(executorId: String) {
+      val executorData = executorDataMap(executorId)
       launchTasks(scheduler.resourceOffers(
-        Seq(new WorkerOffer(executorId, executorHost(executorId), freeCores(executorId)))))
+        Seq(new WorkerOffer(executorId, executorData.executorHost, executorData.freeCores))))
     }
 
     // Launch tasks returned by a set of resource offers
@@ -161,25 +186,26 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
           }
         }
         else {
-          freeCores(task.executorId) -= scheduler.CPUS_PER_TASK
-          executorActor(task.executorId) ! LaunchTask(new SerializableBuffer(serializedTask))
+          val executorData = executorDataMap(task.executorId)
+          executorData.freeCores -= scheduler.CPUS_PER_TASK
+          executorData.executorActor ! LaunchTask(new SerializableBuffer(serializedTask))
         }
       }
     }
 
     // Remove a disconnected slave from the cluster
-    def removeExecutor(executorId: String, reason: String) {
-      if (executorActor.contains(executorId)) {
-        logInfo("Executor " + executorId + " disconnected, so removing it")
-        val numCores = totalCores(executorId)
-        executorActor -= executorId
-        executorHost -= executorId
-        addressToExecutorId -= executorAddress(executorId)
-        executorAddress -= executorId
-        totalCores -= executorId
-        freeCores -= executorId
-        totalCoreCount.addAndGet(-numCores)
-        scheduler.executorLost(executorId, SlaveLost(reason))
+    def removeExecutor(executorId: String, reason: String): Unit = {
+      executorDataMap.get(executorId) match {
+        case Some(executorInfo) =>
+          // This must be synchronized because variables mutated
+          // in this block are read when requesting executors
+          CoarseGrainedSchedulerBackend.this.synchronized {
+            executorDataMap -= executorId
+            executorsPendingToRemove -= executorId
+          }
+          totalCoreCount.addAndGet(-executorInfo.totalCores)
+          scheduler.executorLost(executorId, SlaveLost(reason))
+        case None => logError(s"Asked to remove non-existent executor $executorId")
       }
     }
   }
@@ -247,6 +273,79 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
         throw new SparkException("Error notifying standalone scheduler's driver actor", e)
     }
   }
+
+  def sufficientResourcesRegistered(): Boolean = true
+
+  override def isReady(): Boolean = {
+    if (sufficientResourcesRegistered) {
+      logInfo("SchedulerBackend is ready for scheduling beginning after " +
+        s"reached minRegisteredResourcesRatio: $minRegisteredRatio")
+      return true
+    }
+    if ((System.currentTimeMillis() - createTime) >= maxRegisteredWaitingTime) {
+      logInfo("SchedulerBackend is ready for scheduling beginning after waiting " +
+        s"maxRegisteredResourcesWaitingTime: $maxRegisteredWaitingTime(ms)")
+      return true
+    }
+    false
+  }
+
+  /**
+   * Return the number of executors currently registered with this backend.
+   */
+  def numExistingExecutors: Int = executorDataMap.size
+
+  /**
+   * Request an additional number of executors from the cluster manager.
+   * Return whether the request is acknowledged.
+   */
+  final def requestExecutors(numAdditionalExecutors: Int): Boolean = synchronized {
+    logInfo(s"Requesting $numAdditionalExecutors additional executor(s) from the cluster manager")
+    logDebug(s"Number of pending executors is now $numPendingExecutors")
+    numPendingExecutors += numAdditionalExecutors
+    // Account for executors pending to be added or removed
+    val newTotal = numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size
+    doRequestTotalExecutors(newTotal)
+  }
+
+  /**
+   * Request executors from the cluster manager by specifying the total number desired,
+   * including existing pending and running executors.
+   *
+   * The semantics here guarantee that we do not over-allocate executors for this application,
+   * since a later request overrides the value of any prior request. The alternative interface
+   * of requesting a delta of executors risks double counting new executors when there are
+   * insufficient resources to satisfy the first request. We make the assumption here that the
+   * cluster manager will eventually fulfill all requests when resources free up.
+   *
+   * Return whether the request is acknowledged.
+   */
+  protected def doRequestTotalExecutors(requestedTotal: Int): Boolean = false
+
+  /**
+   * Request that the cluster manager kill the specified executors.
+   * Return whether the kill request is acknowledged.
+   */
+  final def killExecutors(executorIds: Seq[String]): Boolean = {
+    logInfo(s"Requesting to kill executor(s) ${executorIds.mkString(", ")}")
+    val filteredExecutorIds = new ArrayBuffer[String]
+    executorIds.foreach { id =>
+      if (executorDataMap.contains(id)) {
+        filteredExecutorIds += id
+      } else {
+        logWarning(s"Executor to kill $id does not exist!")
+      }
+    }
+    executorsPendingToRemove ++= filteredExecutorIds
+    doKillExecutors(filteredExecutorIds)
+  }
+
+  /**
+   * Kill the given list of executors through the cluster manager.
+   * Return whether the kill request is acknowledged.
+   */
+  protected def doKillExecutors(executorIds: Seq[String]): Boolean = false
+
 }
 
 private[spark] object CoarseGrainedSchedulerBackend {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
new file mode 100644
index 0000000000000..b71bd5783d6df
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import akka.actor.{Address, ActorRef}
+
+/**
+ * Grouping of data for an executor used by CoarseGrainedSchedulerBackend.
+ *
+ * @param executorActor The ActorRef representing this executor
+ * @param executorAddress The network address of this executor
+ * @param executorHost The hostname that this executor is running on
+ * @param freeCores  The current number of cores available for work on the executor
+ * @param totalCores The total number of cores available to the executor
+ */
+private[cluster] class ExecutorData(
+   val executorActor: ActorRef,
+   val executorAddress: Address,
+   val executorHost: String ,
+   var freeCores: Int,
+   val totalCores: Int
+)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
index d99c76117c168..ee10aa061f4e9 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
@@ -17,10 +17,10 @@
 
 package org.apache.spark.scheduler.cluster
 
-import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{Path, FileSystem}
 
-import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.{Logging, SparkContext, SparkEnv}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.scheduler.TaskSchedulerImpl
 
 private[spark] class SimrSchedulerBackend(
@@ -38,22 +38,25 @@ private[spark] class SimrSchedulerBackend(
   override def start() {
     super.start()
 
-    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
-      sc.conf.get("spark.driver.host"), sc.conf.get("spark.driver.port"),
+    val driverUrl = "akka.tcp://%s@%s:%s/user/%s".format(
+      SparkEnv.driverActorSystemName,
+      sc.conf.get("spark.driver.host"),
+      sc.conf.get("spark.driver.port"),
       CoarseGrainedSchedulerBackend.ACTOR_NAME)
 
-    val conf = new Configuration()
+    val conf = SparkHadoopUtil.get.newConfiguration(sc.conf)
     val fs = FileSystem.get(conf)
+    val appUIAddress = sc.ui.map(_.appUIAddress).getOrElse("")
 
     logInfo("Writing to HDFS file: "  + driverFilePath)
     logInfo("Writing Akka address: "  + driverUrl)
-    logInfo("Writing Spark UI Address: " + sc.ui.appUIAddress)
+    logInfo("Writing Spark UI Address: " + appUIAddress)
 
     // Create temporary file to prevent race condition where executors get empty driverUrl file
     val temp = fs.create(tmpPath, true)
     temp.writeUTF(driverUrl)
     temp.writeInt(maxCores)
-    temp.writeUTF(sc.ui.appUIAddress)
+    temp.writeUTF(appUIAddress)
     temp.close()
 
     // "Atomic" rename
@@ -61,9 +64,10 @@ private[spark] class SimrSchedulerBackend(
   }
 
   override def stop() {
-    val conf = new Configuration()
+    val conf = SparkHadoopUtil.get.newConfiguration(sc.conf)
     val fs = FileSystem.get(conf)
     fs.delete(new Path(driverFilePath), false)
     super.stop()
   }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 9c07b3f7b695a..8c7de75600b5f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.scheduler.cluster
 
-import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.{Logging, SparkConf, SparkContext, SparkEnv}
 import org.apache.spark.deploy.{ApplicationDescription, Command}
 import org.apache.spark.deploy.client.{AppClient, AppClientListener}
 import org.apache.spark.scheduler.{ExecutorExited, ExecutorLossReason, SlaveLost, TaskSchedulerImpl}
@@ -34,18 +34,27 @@ private[spark] class SparkDeploySchedulerBackend(
   var client: AppClient = null
   var stopping = false
   var shutdownCallback : (SparkDeploySchedulerBackend) => Unit = _
+  @volatile var appId: String = _
+
+  val registrationLock = new Object()
+  var registrationDone = false
 
   val maxCores = conf.getOption("spark.cores.max").map(_.toInt)
+  val totalExpectedCores = maxCores.getOrElse(0)
 
   override def start() {
     super.start()
 
     // The endpoint for executors to talk to us
-    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
-      conf.get("spark.driver.host"), conf.get("spark.driver.port"),
+    val driverUrl = "akka.tcp://%s@%s:%s/user/%s".format(
+      SparkEnv.driverActorSystemName,
+      conf.get("spark.driver.host"),
+      conf.get("spark.driver.port"),
       CoarseGrainedSchedulerBackend.ACTOR_NAME)
-    val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}", "{{WORKER_URL}}")
+    val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}", "{{APP_ID}}",
+      "{{WORKER_URL}}")
     val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions")
+      .map(Utils.splitCommandString).getOrElse(Seq.empty)
     val classPathEntries = sc.conf.getOption("spark.executor.extraClassPath").toSeq.flatMap { cp =>
       cp.split(java.io.File.pathSeparator)
     }
@@ -54,15 +63,19 @@ private[spark] class SparkDeploySchedulerBackend(
         cp.split(java.io.File.pathSeparator)
       }
 
-    val command = Command(
-      "org.apache.spark.executor.CoarseGrainedExecutorBackend", args, sc.executorEnvs,
-      classPathEntries, libraryPathEntries, extraJavaOpts)
-    val sparkHome = sc.getSparkHome()
+    // Start executors with a few necessary configs for registering with the scheduler
+    val sparkJavaOpts = Utils.sparkJavaOpts(conf, SparkConf.isExecutorStartupConf)
+    val javaOpts = sparkJavaOpts ++ extraJavaOpts
+    val command = Command("org.apache.spark.executor.CoarseGrainedExecutorBackend",
+      args, sc.executorEnvs, classPathEntries, libraryPathEntries, javaOpts)
+    val appUIAddress = sc.ui.map(_.appUIAddress).getOrElse("")
     val appDesc = new ApplicationDescription(sc.appName, maxCores, sc.executorMemory, command,
-      sparkHome, sc.ui.appUIAddress, sc.eventLogger.map(_.logDir))
+      appUIAddress, sc.eventLogDir)
 
     client = new AppClient(sc.env.actorSystem, masters, appDesc, this, conf)
     client.start()
+
+    waitForRegistration()
   }
 
   override def stop() {
@@ -76,15 +89,19 @@ private[spark] class SparkDeploySchedulerBackend(
 
   override def connected(appId: String) {
     logInfo("Connected to Spark cluster with app ID " + appId)
+    this.appId = appId
+    notifyContext()
   }
 
   override def disconnected() {
+    notifyContext()
     if (!stopping) {
       logWarning("Disconnected from Spark cluster! Waiting for reconnection...")
     }
   }
 
   override def dead(reason: String) {
+    notifyContext()
     if (!stopping) {
       logError("Application has been killed. Reason: " + reason)
       scheduler.error(reason)
@@ -107,4 +124,30 @@ private[spark] class SparkDeploySchedulerBackend(
     logInfo("Executor %s removed: %s".format(fullId, message))
     removeExecutor(fullId.split("/")(1), reason.toString)
   }
+
+  override def sufficientResourcesRegistered(): Boolean = {
+    totalCoreCount.get() >= totalExpectedCores * minRegisteredRatio
+  }
+
+  override def applicationId(): String =
+    Option(appId).getOrElse {
+      logWarning("Application ID is not initialized yet.")
+      super.applicationId
+    }
+
+  private def waitForRegistration() = {
+    registrationLock.synchronized {
+      while (!registrationDone) {
+        registrationLock.wait()
+      }
+    }
+  }
+
+  private def notifyContext() = {
+    registrationLock.synchronized {
+      registrationDone = true
+      registrationLock.notifyAll()
+    }
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
new file mode 100644
index 0000000000000..50721b9d6cd6c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import akka.actor.{Actor, ActorRef, Props}
+import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
+
+import org.apache.spark.SparkContext
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
+import org.apache.spark.scheduler.TaskSchedulerImpl
+import org.apache.spark.ui.JettyUtils
+import org.apache.spark.util.AkkaUtils
+
+/**
+ * Abstract Yarn scheduler backend that contains common logic
+ * between the client and cluster Yarn scheduler backends.
+ */
+private[spark] abstract class YarnSchedulerBackend(
+    scheduler: TaskSchedulerImpl,
+    sc: SparkContext)
+  extends CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem) {
+
+  if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) {
+    minRegisteredRatio = 0.8
+  }
+
+  protected var totalExpectedExecutors = 0
+
+  private val yarnSchedulerActor: ActorRef =
+    actorSystem.actorOf(
+      Props(new YarnSchedulerActor),
+      name = YarnSchedulerBackend.ACTOR_NAME)
+
+  private implicit val askTimeout = AkkaUtils.askTimeout(sc.conf)
+
+  /**
+   * Request executors from the ApplicationMaster by specifying the total number desired.
+   * This includes executors already pending or running.
+   */
+  override def doRequestTotalExecutors(requestedTotal: Int): Boolean = {
+    AkkaUtils.askWithReply[Boolean](
+      RequestExecutors(requestedTotal), yarnSchedulerActor, askTimeout)
+  }
+
+  /**
+   * Request that the ApplicationMaster kill the specified executors.
+   */
+  override def doKillExecutors(executorIds: Seq[String]): Boolean = {
+    AkkaUtils.askWithReply[Boolean](
+      KillExecutors(executorIds), yarnSchedulerActor, askTimeout)
+  }
+
+  override def sufficientResourcesRegistered(): Boolean = {
+    totalRegisteredExecutors.get() >= totalExpectedExecutors * minRegisteredRatio
+  }
+
+  /**
+   * Add filters to the SparkUI.
+   */
+  private def addWebUIFilter(
+      filterName: String,
+      filterParams: Map[String, String],
+      proxyBase: String): Unit = {
+    if (proxyBase != null && proxyBase.nonEmpty) {
+      System.setProperty("spark.ui.proxyBase", proxyBase)
+    }
+
+    val hasFilter =
+      filterName != null && filterName.nonEmpty &&
+      filterParams != null && filterParams.nonEmpty
+    if (hasFilter) {
+      logInfo(s"Add WebUI Filter. $filterName, $filterParams, $proxyBase")
+      conf.set("spark.ui.filters", filterName)
+      filterParams.foreach { case (k, v) => conf.set(s"spark.$filterName.param.$k", v) }
+      scheduler.sc.ui.foreach { ui => JettyUtils.addFilters(ui.getHandlers, conf) }
+    }
+  }
+
+  /**
+   * An actor that communicates with the ApplicationMaster.
+   */
+  private class YarnSchedulerActor extends Actor {
+    private var amActor: Option[ActorRef] = None
+
+    override def preStart(): Unit = {
+      // Listen for disassociation events
+      context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
+    }
+
+    override def receive = {
+      case RegisterClusterManager =>
+        logInfo(s"ApplicationMaster registered as $sender")
+        amActor = Some(sender)
+
+      case r: RequestExecutors =>
+        amActor match {
+          case Some(actor) =>
+            sender ! AkkaUtils.askWithReply[Boolean](r, actor, askTimeout)
+          case None =>
+            logWarning("Attempted to request executors before the AM has registered!")
+            sender ! false
+        }
+
+      case k: KillExecutors =>
+        amActor match {
+          case Some(actor) =>
+            sender ! AkkaUtils.askWithReply[Boolean](k, actor, askTimeout)
+          case None =>
+            logWarning("Attempted to kill executors before the AM has registered!")
+            sender ! false
+        }
+
+      case AddWebUIFilter(filterName, filterParams, proxyBase) =>
+        addWebUIFilter(filterName, filterParams, proxyBase)
+        sender ! true
+
+      case d: DisassociatedEvent =>
+        if (amActor.isDefined && sender == amActor.get) {
+          logWarning(s"ApplicationMaster has disassociated: $d")
+        }
+    }
+  }
+}
+
+private[spark] object YarnSchedulerBackend {
+  val ACTOR_NAME = "YarnScheduler"
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
index 9f45400bcf852..5289661eb896b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
@@ -28,9 +28,10 @@ import org.apache.mesos.{Scheduler => MScheduler}
 import org.apache.mesos._
 import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _}
 
-import org.apache.spark.{Logging, SparkContext, SparkException}
+import org.apache.spark.{Logging, SparkContext, SparkEnv, SparkException}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
+import org.apache.spark.util.Utils
 
 /**
  * A SchedulerBackend that runs tasks on Mesos, but uses "coarse-grained" tasks, where it holds
@@ -71,14 +72,13 @@ private[spark] class CoarseMesosSchedulerBackend(
   val taskIdToSlaveId = new HashMap[Int, String]
   val failuresBySlaveId = new HashMap[String, Int] // How many times tasks on each slave failed
 
-  val sparkHome = sc.getSparkHome().getOrElse(throw new SparkException(
-    "Spark home is not set; set it through the spark.home system " +
-    "property, the SPARK_HOME environment variable or the SparkContext constructor"))
 
   val extraCoresPerSlave = conf.getInt("spark.mesos.extra.cores", 0)
 
   var nextMesosTaskId = 0
 
+  @volatile var appId: String = _
+
   def newMesosTaskId(): Int = {
     val id = nextMesosTaskId
     nextMesosTaskId += 1
@@ -93,7 +93,7 @@ private[spark] class CoarseMesosSchedulerBackend(
         setDaemon(true)
         override def run() {
           val scheduler = CoarseMesosSchedulerBackend.this
-          val fwInfo = FrameworkInfo.newBuilder().setUser("").setName(sc.appName).build()
+          val fwInfo = FrameworkInfo.newBuilder().setUser(sc.sparkUser).setName(sc.appName).build()
           driver = new MesosSchedulerDriver(scheduler, fwInfo, master)
           try { {
             val ret = driver.run()
@@ -110,17 +110,30 @@ private[spark] class CoarseMesosSchedulerBackend(
   }
 
   def createCommand(offer: Offer, numCores: Int): CommandInfo = {
+    val executorSparkHome = conf.getOption("spark.mesos.executor.home")
+      .orElse(sc.getSparkHome())
+      .getOrElse {
+        throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!")
+      }
     val environment = Environment.newBuilder()
     val extraClassPath = conf.getOption("spark.executor.extraClassPath")
     extraClassPath.foreach { cp =>
       environment.addVariables(
         Environment.Variable.newBuilder().setName("SPARK_CLASSPATH").setValue(cp).build())
     }
-    val extraJavaOpts = conf.getOption("spark.executor.extraJavaOptions")
-
-    val libraryPathOption = "spark.executor.extraLibraryPath"
-    val extraLibraryPath = conf.getOption(libraryPathOption).map(p => s"-Djava.library.path=$p")
-    val extraOpts = Seq(extraJavaOpts, extraLibraryPath).flatten.mkString(" ")
+    val extraJavaOpts = conf.get("spark.executor.extraJavaOptions", "")
+
+    // Set the environment variable through a command prefix
+    // to append to the existing value of the variable
+    val prefixEnv = conf.getOption("spark.executor.extraLibraryPath").map { p =>
+      Utils.libraryPathEnvPrefix(Seq(p))
+    }.getOrElse("")
+
+    environment.addVariables(
+      Environment.Variable.newBuilder()
+        .setName("SPARK_EXECUTOR_OPTS")
+        .setValue(extraJavaOpts)
+        .build())
 
     sc.executorEnvs.foreach { case (key, value) =>
       environment.addVariables(Environment.Variable.newBuilder()
@@ -130,26 +143,28 @@ private[spark] class CoarseMesosSchedulerBackend(
     }
     val command = CommandInfo.newBuilder()
       .setEnvironment(environment)
-    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
+    val driverUrl = "akka.tcp://%s@%s:%s/user/%s".format(
+      SparkEnv.driverActorSystemName,
       conf.get("spark.driver.host"),
       conf.get("spark.driver.port"),
       CoarseGrainedSchedulerBackend.ACTOR_NAME)
 
     val uri = conf.get("spark.executor.uri", null)
     if (uri == null) {
-      val runScript = new File(sparkHome, "./bin/spark-class").getCanonicalPath
+      val runScript = new File(executorSparkHome, "./bin/spark-class").getCanonicalPath
       command.setValue(
-        "\"%s\" org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %s %d".format(
-          runScript, extraOpts, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores))
+        "%s \"%s\" org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d %s".format(
+          prefixEnv, runScript, driverUrl, offer.getSlaveId.getValue,
+          offer.getHostname, numCores, appId))
     } else {
       // Grab everything to the first '.'. We'll use that and '*' to
       // glob the directory "correctly".
       val basename = uri.split('/').last.split('.').head
       command.setValue(
-        ("cd %s*; " +
-          "./bin/spark-class org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %s %d")
-          .format(basename, extraOpts, driverUrl, offer.getSlaveId.getValue,
-            offer.getHostname, numCores))
+        ("cd %s*; %s " +
+          "./bin/spark-class org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d %s")
+          .format(basename, prefixEnv, driverUrl, offer.getSlaveId.getValue,
+            offer.getHostname, numCores, appId))
       command.addUris(CommandInfo.URI.newBuilder().setValue(uri))
     }
     command.build()
@@ -158,7 +173,8 @@ private[spark] class CoarseMesosSchedulerBackend(
   override def offerRescinded(d: SchedulerDriver, o: OfferID) {}
 
   override def registered(d: SchedulerDriver, frameworkId: FrameworkID, masterInfo: MasterInfo) {
-    logInfo("Registered as framework ID " + frameworkId.getValue)
+    appId = frameworkId.getValue
+    logInfo("Registered as framework ID " + appId)
     registeredLock.synchronized {
       isRegistered = true
       registeredLock.notifyAll()
@@ -189,7 +205,9 @@ private[spark] class CoarseMesosSchedulerBackend(
         val slaveId = offer.getSlaveId.toString
         val mem = getResource(offer.getResourcesList, "mem")
         val cpus = getResource(offer.getResourcesList, "cpus").toInt
-        if (totalCoresAcquired < maxCores && mem >= sc.executorMemory && cpus >= 1 &&
+        if (totalCoresAcquired < maxCores &&
+            mem >= MemoryUtils.calculateTotalMemory(sc) &&
+            cpus >= 1 &&
             failuresBySlaveId.getOrElse(slaveId, 0) < MAX_SLAVE_FAILURES &&
             !slaveIdsWithExecutors.contains(slaveId)) {
           // Launch an executor on the slave
@@ -205,7 +223,8 @@ private[spark] class CoarseMesosSchedulerBackend(
             .setCommand(createCommand(offer, cpusToUse + extraCoresPerSlave))
             .setName("Task " + taskId)
             .addResources(createResource("cpus", cpusToUse))
-            .addResources(createResource("mem", sc.executorMemory))
+            .addResources(createResource("mem",
+              MemoryUtils.calculateTotalMemory(sc)))
             .build()
           d.launchTasks(
             Collections.singleton(offer.getId),  Collections.singletonList(task), filters)
@@ -223,8 +242,7 @@ private[spark] class CoarseMesosSchedulerBackend(
     for (r <- res if r.getName == name) {
       return r.getScalar.getValue
     }
-    // If we reached here, no resource with the required name was present
-    throw new IllegalArgumentException("No resource called " + name + " in " + res)
+    0
   }
 
   /** Build a Mesos resource protobuf object */
@@ -300,4 +318,11 @@ private[spark] class CoarseMesosSchedulerBackend(
     logInfo("Executor lost: %s, marking slave %s as lost".format(e.getValue, s.getValue))
     slaveLost(d, s)
   }
+
+  override def applicationId(): String =
+    Option(appId).getOrElse {
+      logWarning("Application ID is not initialized yet.")
+      super.applicationId
+    }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtils.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtils.scala
new file mode 100644
index 0000000000000..5101ec8352e79
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtils.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster.mesos
+
+import org.apache.spark.SparkContext
+
+private[spark] object MemoryUtils {
+  // These defaults copied from YARN
+  val OVERHEAD_FRACTION = 1.07
+  val OVERHEAD_MINIMUM = 384
+
+  def calculateTotalMemory(sc: SparkContext) = {
+    math.max(
+      sc.conf.getOption("spark.mesos.executor.memoryOverhead")
+        .getOrElse(OVERHEAD_MINIMUM.toString)
+        .toInt + sc.executorMemory,
+        OVERHEAD_FRACTION * sc.executorMemory
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index c717e7c621a8f..d13795186c48e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -30,7 +30,7 @@ import org.apache.mesos._
 import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _}
 
 import org.apache.spark.{Logging, SparkContext, SparkException, TaskState}
-import org.apache.spark.scheduler.{ExecutorExited, ExecutorLossReason, SchedulerBackend, SlaveLost, TaskDescription, TaskSchedulerImpl, WorkerOffer}
+import org.apache.spark.scheduler._
 import org.apache.spark.util.Utils
 
 /**
@@ -62,6 +62,8 @@ private[spark] class MesosSchedulerBackend(
 
   var classLoader: ClassLoader = null
 
+  @volatile var appId: String = _
+
   override def start() {
     synchronized {
       classLoader = Thread.currentThread.getContextClassLoader
@@ -70,7 +72,7 @@ private[spark] class MesosSchedulerBackend(
         setDaemon(true)
         override def run() {
           val scheduler = MesosSchedulerBackend.this
-          val fwInfo = FrameworkInfo.newBuilder().setUser("").setName(sc.appName).build()
+          val fwInfo = FrameworkInfo.newBuilder().setUser(sc.sparkUser).setName(sc.appName).build()
           driver = new MesosSchedulerDriver(scheduler, fwInfo, master)
           try {
             val ret = driver.run()
@@ -86,10 +88,27 @@ private[spark] class MesosSchedulerBackend(
   }
 
   def createExecutorInfo(execId: String): ExecutorInfo = {
-    val sparkHome = sc.getSparkHome().getOrElse(throw new SparkException(
-      "Spark home is not set; set it through the spark.home system " +
-      "property, the SPARK_HOME environment variable or the SparkContext constructor"))
+    val executorSparkHome = sc.conf.getOption("spark.mesos.executor.home")
+      .orElse(sc.getSparkHome()) // Fall back to driver Spark home for backward compatibility
+      .getOrElse {
+        throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!")
+      }
     val environment = Environment.newBuilder()
+    sc.conf.getOption("spark.executor.extraClassPath").foreach { cp =>
+      environment.addVariables(
+        Environment.Variable.newBuilder().setName("SPARK_CLASSPATH").setValue(cp).build())
+    }
+    val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions").getOrElse("")
+
+    val prefixEnv = sc.conf.getOption("spark.executor.extraLibraryPath").map { p =>
+      Utils.libraryPathEnvPrefix(Seq(p))
+    }.getOrElse("")
+
+    environment.addVariables(
+      Environment.Variable.newBuilder()
+        .setName("SPARK_EXECUTOR_OPTS")
+        .setValue(extraJavaOpts)
+        .build())
     sc.executorEnvs.foreach { case (key, value) =>
       environment.addVariables(Environment.Variable.newBuilder()
         .setName(key)
@@ -100,23 +119,33 @@ private[spark] class MesosSchedulerBackend(
       .setEnvironment(environment)
     val uri = sc.conf.get("spark.executor.uri", null)
     if (uri == null) {
-      command.setValue(new File(sparkHome, "/sbin/spark-executor").getCanonicalPath)
+      val executorPath = new File(executorSparkHome, "/sbin/spark-executor").getCanonicalPath
+      command.setValue("%s %s".format(prefixEnv, executorPath))
     } else {
       // Grab everything to the first '.'. We'll use that and '*' to
       // glob the directory "correctly".
       val basename = uri.split('/').last.split('.').head
-      command.setValue("cd %s*; ./sbin/spark-executor".format(basename))
+      command.setValue("cd %s*; %s ./sbin/spark-executor".format(basename, prefixEnv))
       command.addUris(CommandInfo.URI.newBuilder().setValue(uri))
     }
+    val cpus = Resource.newBuilder()
+      .setName("cpus")
+      .setType(Value.Type.SCALAR)
+      .setScalar(Value.Scalar.newBuilder()
+        .setValue(scheduler.CPUS_PER_TASK).build())
+      .build()
     val memory = Resource.newBuilder()
       .setName("mem")
       .setType(Value.Type.SCALAR)
-      .setScalar(Value.Scalar.newBuilder().setValue(sc.executorMemory).build())
+      .setScalar(
+        Value.Scalar.newBuilder()
+          .setValue(MemoryUtils.calculateTotalMemory(sc)).build())
       .build()
     ExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
       .setData(ByteString.copyFrom(createExecArg()))
+      .addResources(cpus)
       .addResources(memory)
       .build()
   }
@@ -137,28 +166,16 @@ private[spark] class MesosSchedulerBackend(
     execArgs
   }
 
-  private def setClassLoader(): ClassLoader = {
-    val oldClassLoader = Thread.currentThread.getContextClassLoader
-    Thread.currentThread.setContextClassLoader(classLoader)
-    oldClassLoader
-  }
-
-  private def restoreClassLoader(oldClassLoader: ClassLoader) {
-    Thread.currentThread.setContextClassLoader(oldClassLoader)
-  }
-
   override def offerRescinded(d: SchedulerDriver, o: OfferID) {}
 
   override def registered(d: SchedulerDriver, frameworkId: FrameworkID, masterInfo: MasterInfo) {
-    val oldClassLoader = setClassLoader()
-    try {
-      logInfo("Registered as framework ID " + frameworkId.getValue)
+    inClassLoader() {
+      appId = frameworkId.getValue
+      logInfo("Registered as framework ID " + appId)
       registeredLock.synchronized {
         isRegistered = true
         registeredLock.notifyAll()
       }
-    } finally {
-      restoreClassLoader(oldClassLoader)
     }
   }
 
@@ -170,6 +187,16 @@ private[spark] class MesosSchedulerBackend(
     }
   }
 
+  private def inClassLoader()(fun: => Unit) = {
+    val oldClassLoader = Thread.currentThread.getContextClassLoader
+    Thread.currentThread.setContextClassLoader(classLoader)
+    try {
+      fun
+    } finally {
+      Thread.currentThread.setContextClassLoader(oldClassLoader)
+    }
+  }
+
   override def disconnected(d: SchedulerDriver) {}
 
   override def reregistered(d: SchedulerDriver, masterInfo: MasterInfo) {}
@@ -180,53 +207,57 @@ private[spark] class MesosSchedulerBackend(
    * tasks are balanced across the cluster.
    */
   override def resourceOffers(d: SchedulerDriver, offers: JList[Offer]) {
-    val oldClassLoader = setClassLoader()
-    try {
-      synchronized {
-        // Build a big list of the offerable workers, and remember their indices so that we can
-        // figure out which Offer to reply to for each worker
-        val offerableWorkers = new ArrayBuffer[WorkerOffer]
-        val offerableIndices = new HashMap[String, Int]
-
-        def enoughMemory(o: Offer) = {
-          val mem = getResource(o.getResourcesList, "mem")
-          val slaveId = o.getSlaveId.getValue
-          mem >= sc.executorMemory || slaveIdsWithExecutors.contains(slaveId)
-        }
+    inClassLoader() {
+      val (acceptedOffers, declinedOffers) = offers.partition { o =>
+        val mem = getResource(o.getResourcesList, "mem")
+        val cpus = getResource(o.getResourcesList, "cpus")
+        val slaveId = o.getSlaveId.getValue
+        (mem >= MemoryUtils.calculateTotalMemory(sc) &&
+          // need at least 1 for executor, 1 for task
+          cpus >= 2 * scheduler.CPUS_PER_TASK) ||
+          (slaveIdsWithExecutors.contains(slaveId) &&
+            cpus >= scheduler.CPUS_PER_TASK)
+      }
 
-        for ((offer, index) <- offers.zipWithIndex if enoughMemory(offer)) {
-          offerableIndices.put(offer.getSlaveId.getValue, index)
-          offerableWorkers += new WorkerOffer(
-            offer.getSlaveId.getValue,
-            offer.getHostname,
-            getResource(offer.getResourcesList, "cpus").toInt)
+      val offerableWorkers = acceptedOffers.map { o =>
+        val cpus = if (slaveIdsWithExecutors.contains(o.getSlaveId.getValue)) {
+          getResource(o.getResourcesList, "cpus").toInt
+        } else {
+          // If the executor doesn't exist yet, subtract CPU for executor
+          getResource(o.getResourcesList, "cpus").toInt -
+            scheduler.CPUS_PER_TASK
         }
+        new WorkerOffer(
+          o.getSlaveId.getValue,
+          o.getHostname,
+          cpus)
+      }
 
-        // Call into the TaskSchedulerImpl
-        val taskLists = scheduler.resourceOffers(offerableWorkers)
-
-        // Build a list of Mesos tasks for each slave
-        val mesosTasks = offers.map(o => new JArrayList[MesosTaskInfo]())
-        for ((taskList, index) <- taskLists.zipWithIndex) {
-          if (!taskList.isEmpty) {
-            for (taskDesc <- taskList) {
-              val slaveId = taskDesc.executorId
-              val offerNum = offerableIndices(slaveId)
-              slaveIdsWithExecutors += slaveId
-              taskIdToSlaveId(taskDesc.taskId) = slaveId
-              mesosTasks(offerNum).add(createMesosTask(taskDesc, slaveId))
-            }
+      val slaveIdToOffer = acceptedOffers.map(o => o.getSlaveId.getValue -> o).toMap
+
+      val mesosTasks = new HashMap[String, JArrayList[MesosTaskInfo]]
+
+      // Call into the TaskSchedulerImpl
+      scheduler.resourceOffers(offerableWorkers)
+        .filter(!_.isEmpty)
+        .foreach { offer =>
+          offer.foreach { taskDesc =>
+            val slaveId = taskDesc.executorId
+            slaveIdsWithExecutors += slaveId
+            taskIdToSlaveId(taskDesc.taskId) = slaveId
+            mesosTasks.getOrElseUpdate(slaveId, new JArrayList[MesosTaskInfo])
+              .add(createMesosTask(taskDesc, slaveId))
           }
         }
 
-        // Reply to the offers
-        val filters = Filters.newBuilder().setRefuseSeconds(1).build() // TODO: lower timeout?
-        for (i <- 0 until offers.size) {
-          d.launchTasks(Collections.singleton(offers(i).getId), mesosTasks(i), filters)
-        }
+      // Reply to the offers
+      val filters = Filters.newBuilder().setRefuseSeconds(1).build() // TODO: lower timeout?
+
+      mesosTasks.foreach { case (slaveId, tasks) =>
+        d.launchTasks(Collections.singleton(slaveIdToOffer(slaveId).getId), tasks, filters)
       }
-    } finally {
-      restoreClassLoader(oldClassLoader)
+
+      declinedOffers.foreach(o => d.declineOffer(o.getId))
     }
   }
 
@@ -235,8 +266,7 @@ private[spark] class MesosSchedulerBackend(
     for (r <- res if r.getName == name) {
       return r.getScalar.getValue
     }
-    // If we reached here, no resource with the required name was present
-    throw new IllegalArgumentException("No resource called " + name + " in " + res)
+    0
   }
 
   /** Turn a Spark TaskDescription into a Mesos task */
@@ -266,8 +296,7 @@ private[spark] class MesosSchedulerBackend(
   }
 
   override def statusUpdate(d: SchedulerDriver, status: TaskStatus) {
-    val oldClassLoader = setClassLoader()
-    try {
+    inClassLoader() {
       val tid = status.getTaskId.getValue.toLong
       val state = TaskState.fromMesos(status.getState)
       synchronized {
@@ -280,18 +309,13 @@ private[spark] class MesosSchedulerBackend(
         }
       }
       scheduler.statusUpdate(tid, state, status.getData.asReadOnlyByteBuffer)
-    } finally {
-      restoreClassLoader(oldClassLoader)
     }
   }
 
   override def error(d: SchedulerDriver, message: String) {
-    val oldClassLoader = setClassLoader()
-    try {
+    inClassLoader() {
       logError("Mesos error: " + message)
       scheduler.error(message)
-    } finally {
-      restoreClassLoader(oldClassLoader)
     }
   }
 
@@ -308,15 +332,12 @@ private[spark] class MesosSchedulerBackend(
   override def frameworkMessage(d: SchedulerDriver, e: ExecutorID, s: SlaveID, b: Array[Byte]) {}
 
   private def recordSlaveLost(d: SchedulerDriver, slaveId: SlaveID, reason: ExecutorLossReason) {
-    val oldClassLoader = setClassLoader()
-    try {
+    inClassLoader() {
       logInfo("Mesos slave lost: " + slaveId.getValue)
       synchronized {
         slaveIdsWithExecutors -= slaveId.getValue
       }
       scheduler.executorLost(slaveId.getValue, reason)
-    } finally {
-      restoreClassLoader(oldClassLoader)
     }
   }
 
@@ -331,6 +352,20 @@ private[spark] class MesosSchedulerBackend(
     recordSlaveLost(d, slaveId, ExecutorExited(status))
   }
 
+  override def killTask(taskId: Long, executorId: String, interruptThread: Boolean): Unit = {
+    driver.killTask(
+      TaskID.newBuilder()
+        .setValue(taskId.toString).build()
+    )
+  }
+
   // TODO: query Mesos for number of cores
   override def defaultParallelism() = sc.conf.getInt("spark.default.parallelism", 8)
+
+  override def applicationId(): String =
+    Option(appId).getOrElse {
+      logWarning("Application ID is not initialized yet.")
+      super.applicationId
+    }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
index 9b95ccca0443e..a2f1f14264a99 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
@@ -21,10 +21,11 @@ import java.nio.ByteBuffer
 
 import akka.actor.{Actor, ActorRef, Props}
 
-import org.apache.spark.{Logging, SparkEnv, TaskState}
+import org.apache.spark.{Logging, SparkContext, SparkEnv, TaskState}
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.executor.{Executor, ExecutorBackend}
 import org.apache.spark.scheduler.{SchedulerBackend, TaskSchedulerImpl, WorkerOffer}
+import org.apache.spark.util.ActorLogReceive
 
 private case class ReviveOffers()
 
@@ -32,6 +33,8 @@ private case class StatusUpdate(taskId: Long, state: TaskState, serializedData:
 
 private case class KillTask(taskId: Long, interruptThread: Boolean)
 
+private case class StopExecutor()
+
 /**
  * Calls to LocalBackend are all serialized through LocalActor. Using an actor makes the calls on
  * LocalBackend asynchronous, which is necessary to prevent deadlock between LocalBackend
@@ -40,36 +43,39 @@ private case class KillTask(taskId: Long, interruptThread: Boolean)
 private[spark] class LocalActor(
   scheduler: TaskSchedulerImpl,
   executorBackend: LocalBackend,
-  private val totalCores: Int) extends Actor with Logging {
+  private val totalCores: Int) extends Actor with ActorLogReceive with Logging {
 
   private var freeCores = totalCores
 
-  private val localExecutorId = "localhost"
+  private val localExecutorId = SparkContext.DRIVER_IDENTIFIER
   private val localExecutorHostname = "localhost"
 
   val executor = new Executor(
-    localExecutorId, localExecutorHostname, scheduler.conf.getAll, isLocal = true)
+    localExecutorId, localExecutorHostname, scheduler.conf.getAll, totalCores, isLocal = true)
 
-  def receive = {
+  override def receiveWithLogging = {
     case ReviveOffers =>
       reviveOffers()
 
     case StatusUpdate(taskId, state, serializedData) =>
       scheduler.statusUpdate(taskId, state, serializedData)
       if (TaskState.isFinished(state)) {
-        freeCores += 1
+        freeCores += scheduler.CPUS_PER_TASK
         reviveOffers()
       }
 
     case KillTask(taskId, interruptThread) =>
       executor.killTask(taskId, interruptThread)
+
+    case StopExecutor =>
+      executor.stop()
   }
 
   def reviveOffers() {
     val offers = Seq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores))
     for (task <- scheduler.resourceOffers(offers).flatten) {
-      freeCores -= 1
-      executor.launchTask(executorBackend, task.taskId, task.serializedTask)
+      freeCores -= scheduler.CPUS_PER_TASK
+      executor.launchTask(executorBackend, task.taskId, task.name, task.serializedTask)
     }
   }
 }
@@ -82,6 +88,7 @@ private[spark] class LocalActor(
 private[spark] class LocalBackend(scheduler: TaskSchedulerImpl, val totalCores: Int)
   extends SchedulerBackend with ExecutorBackend {
 
+  private val appId = "local-" + System.currentTimeMillis
   var localActor: ActorRef = null
 
   override def start() {
@@ -91,6 +98,7 @@ private[spark] class LocalBackend(scheduler: TaskSchedulerImpl, val totalCores:
   }
 
   override def stop() {
+    localActor ! StopExecutor
   }
 
   override def reviveOffers() {
@@ -107,4 +115,7 @@ private[spark] class LocalBackend(scheduler: TaskSchedulerImpl, val totalCores:
   override def statusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer) {
     localActor ! StatusUpdate(taskId, state, serializedData)
   }
+
+  override def applicationId(): String = appId
+
 }
diff --git a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
index 0a7e1ec539679..662a7b91248aa 100644
--- a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
@@ -35,16 +35,15 @@ private[spark] class JavaSerializationStream(out: OutputStream, counterReset: In
   /**
    * Calling reset to avoid memory leak:
    * http://stackoverflow.com/questions/1281549/memory-leak-traps-in-the-java-standard-api
-   * But only call it every 10,000th time to avoid bloated serialization streams (when
+   * But only call it every 100th time to avoid bloated serialization streams (when
    * the stream 'resets' object class descriptions have to be re-written)
    */
   def writeObject[T: ClassTag](t: T): SerializationStream = {
     objOut.writeObject(t)
+    counter += 1
     if (counterReset > 0 && counter >= counterReset) {
       objOut.reset()
       counter = 0
-    } else {
-      counter += 1
     }
     this
   }
@@ -64,8 +63,11 @@ extends DeserializationStream {
   def close() { objIn.close() }
 }
 
-private[spark] class JavaSerializerInstance(counterReset: Int) extends SerializerInstance {
-  def serialize[T: ClassTag](t: T): ByteBuffer = {
+
+private[spark] class JavaSerializerInstance(counterReset: Int, defaultClassLoader: ClassLoader)
+  extends SerializerInstance {
+
+  override def serialize[T: ClassTag](t: T): ByteBuffer = {
     val bos = new ByteArrayOutputStream()
     val out = serializeStream(bos)
     out.writeObject(t)
@@ -73,23 +75,23 @@ private[spark] class JavaSerializerInstance(counterReset: Int) extends Serialize
     ByteBuffer.wrap(bos.toByteArray)
   }
 
-  def deserialize[T: ClassTag](bytes: ByteBuffer): T = {
+  override def deserialize[T: ClassTag](bytes: ByteBuffer): T = {
     val bis = new ByteBufferInputStream(bytes)
     val in = deserializeStream(bis)
-    in.readObject().asInstanceOf[T]
+    in.readObject()
   }
 
-  def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T = {
+  override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T = {
     val bis = new ByteBufferInputStream(bytes)
     val in = deserializeStream(bis, loader)
-    in.readObject().asInstanceOf[T]
+    in.readObject()
   }
 
-  def serializeStream(s: OutputStream): SerializationStream = {
+  override def serializeStream(s: OutputStream): SerializationStream = {
     new JavaSerializationStream(s, counterReset)
   }
 
-  def deserializeStream(s: InputStream): DeserializationStream = {
+  override def deserializeStream(s: InputStream): DeserializationStream = {
     new JavaDeserializationStream(s, Utils.getContextOrSparkClassLoader)
   }
 
@@ -108,15 +110,18 @@ private[spark] class JavaSerializerInstance(counterReset: Int) extends Serialize
  */
 @DeveloperApi
 class JavaSerializer(conf: SparkConf) extends Serializer with Externalizable {
-  private var counterReset = conf.getInt("spark.serializer.objectStreamReset", 10000)
+  private var counterReset = conf.getInt("spark.serializer.objectStreamReset", 100)
 
-  def newInstance(): SerializerInstance = new JavaSerializerInstance(counterReset)
+  override def newInstance(): SerializerInstance = {
+    val classLoader = defaultClassLoader.getOrElse(Thread.currentThread.getContextClassLoader)
+    new JavaSerializerInstance(counterReset, classLoader)
+  }
 
-  override def writeExternal(out: ObjectOutput) {
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     out.writeInt(counterReset)
   }
 
-  override def readExternal(in: ObjectInput) {
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     counterReset = in.readInt()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 1ce4243194798..621a951c27d07 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -27,10 +27,11 @@ import com.twitter.chill.{AllScalaRegistrar, EmptyScalaKryoInstantiator}
 
 import org.apache.spark._
 import org.apache.spark.broadcast.HttpBroadcast
+import org.apache.spark.network.nio.{PutBlock, GotBlock, GetBlock}
 import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.storage._
-import org.apache.spark.storage.{GetBlock, GotBlock, PutBlock}
 import org.apache.spark.util.BoundedPriorityQueue
+import org.apache.spark.util.collection.CompactBuffer
 
 import scala.reflect.ClassTag
 
@@ -46,16 +47,34 @@ class KryoSerializer(conf: SparkConf)
   with Logging
   with Serializable {
 
-  private val bufferSize = conf.getInt("spark.kryoserializer.buffer.mb", 2) * 1024 * 1024
+  private val bufferSize =
+    (conf.getDouble("spark.kryoserializer.buffer.mb", 0.064) * 1024 * 1024).toInt
+
+  private val maxBufferSize = conf.getInt("spark.kryoserializer.buffer.max.mb", 64) * 1024 * 1024
   private val referenceTracking = conf.getBoolean("spark.kryo.referenceTracking", true)
-  private val registrator = conf.getOption("spark.kryo.registrator")
+  private val registrationRequired = conf.getBoolean("spark.kryo.registrationRequired", false)
+  private val userRegistrator = conf.getOption("spark.kryo.registrator")
+  private val classesToRegister = conf.get("spark.kryo.classesToRegister", "")
+    .split(',')
+    .filter(!_.isEmpty)
+    .map { className =>
+      try {
+        Class.forName(className)
+      } catch {
+        case e: Exception =>
+          throw new SparkException("Failed to load class to register with Kryo", e)
+      }
+    }
 
-  def newKryoOutput() = new KryoOutput(bufferSize)
+  def newKryoOutput() = new KryoOutput(bufferSize, math.max(bufferSize, maxBufferSize))
 
   def newKryo(): Kryo = {
     val instantiator = new EmptyScalaKryoInstantiator
     val kryo = instantiator.newKryo()
-    val classLoader = Thread.currentThread.getContextClassLoader
+    kryo.setRegistrationRequired(registrationRequired)
+
+    val oldClassLoader = Thread.currentThread.getContextClassLoader
+    val classLoader = defaultClassLoader.getOrElse(Thread.currentThread.getContextClassLoader)
 
     // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops.
     // Do this before we invoke the user registrator so the user registrator can override this.
@@ -72,16 +91,20 @@ class KryoSerializer(conf: SparkConf)
     kryo.register(classOf[SerializableWritable[_]], new KryoJavaSerializer())
     kryo.register(classOf[HttpBroadcast[_]], new KryoJavaSerializer())
 
-    // Allow the user to register their own classes by setting spark.kryo.registrator
     try {
-      for (regCls <- registrator) {
-        logDebug("Running user registrator: " + regCls)
-        val reg = Class.forName(regCls, true, classLoader).newInstance()
-          .asInstanceOf[KryoRegistrator]
-        reg.registerClasses(kryo)
-      }
+      // Use the default classloader when calling the user registrator.
+      Thread.currentThread.setContextClassLoader(classLoader)
+      // Register classes given through spark.kryo.classesToRegister.
+      classesToRegister.foreach { clazz => kryo.register(clazz) }
+      // Allow the user to register their own classes by setting spark.kryo.registrator.
+      userRegistrator
+        .map(Class.forName(_, true, classLoader).newInstance().asInstanceOf[KryoRegistrator])
+        .foreach { reg => reg.registerClasses(kryo) }
     } catch {
-      case e: Exception => logError("Failed to run spark.kryo.registrator", e)
+      case e: Exception =>
+        throw new SparkException(s"Failed to register classes with Kryo", e)
+    } finally {
+      Thread.currentThread.setContextClassLoader(oldClassLoader)
     }
 
     // Register Chill's classes; we do this after our ranges and the user's own classes to let
@@ -92,7 +115,7 @@ class KryoSerializer(conf: SparkConf)
     kryo
   }
 
-  def newInstance(): SerializerInstance = {
+  override def newInstance(): SerializerInstance = {
     new KryoSerializerInstance(this)
   }
 }
@@ -101,20 +124,20 @@ private[spark]
 class KryoSerializationStream(kryo: Kryo, outStream: OutputStream) extends SerializationStream {
   val output = new KryoOutput(outStream)
 
-  def writeObject[T: ClassTag](t: T): SerializationStream = {
+  override def writeObject[T: ClassTag](t: T): SerializationStream = {
     kryo.writeClassAndObject(output, t)
     this
   }
 
-  def flush() { output.flush() }
-  def close() { output.close() }
+  override def flush() { output.flush() }
+  override def close() { output.close() }
 }
 
 private[spark]
 class KryoDeserializationStream(kryo: Kryo, inStream: InputStream) extends DeserializationStream {
-  val input = new KryoInput(inStream)
+  private val input = new KryoInput(inStream)
 
-  def readObject[T: ClassTag](): T = {
+  override def readObject[T: ClassTag](): T = {
     try {
       kryo.readClassAndObject(input).asInstanceOf[T]
     } catch {
@@ -124,31 +147,31 @@ class KryoDeserializationStream(kryo: Kryo, inStream: InputStream) extends Deser
     }
   }
 
-  def close() {
+  override def close() {
     // Kryo's Input automatically closes the input stream it is using.
     input.close()
   }
 }
 
 private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends SerializerInstance {
-  val kryo = ks.newKryo()
+  private val kryo = ks.newKryo()
 
   // Make these lazy vals to avoid creating a buffer unless we use them
-  lazy val output = ks.newKryoOutput()
-  lazy val input = new KryoInput()
+  private lazy val output = ks.newKryoOutput()
+  private lazy val input = new KryoInput()
 
-  def serialize[T: ClassTag](t: T): ByteBuffer = {
+  override def serialize[T: ClassTag](t: T): ByteBuffer = {
     output.clear()
     kryo.writeClassAndObject(output, t)
     ByteBuffer.wrap(output.toBytes)
   }
 
-  def deserialize[T: ClassTag](bytes: ByteBuffer): T = {
+  override def deserialize[T: ClassTag](bytes: ByteBuffer): T = {
     input.setBuffer(bytes.array)
     kryo.readClassAndObject(input).asInstanceOf[T]
   }
 
-  def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T = {
+  override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T = {
     val oldClassLoader = kryo.getClassLoader
     kryo.setClassLoader(loader)
     input.setBuffer(bytes.array)
@@ -157,11 +180,11 @@ private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends Serializ
     obj
   }
 
-  def serializeStream(s: OutputStream): SerializationStream = {
+  override def serializeStream(s: OutputStream): SerializationStream = {
     new KryoSerializationStream(kryo, s)
   }
 
-  def deserializeStream(s: InputStream): DeserializationStream = {
+  override def deserializeStream(s: InputStream): DeserializationStream = {
     new KryoDeserializationStream(kryo, s)
   }
 }
@@ -183,9 +206,11 @@ private[serializer] object KryoSerializer {
     classOf[GotBlock],
     classOf[GetBlock],
     classOf[MapStatus],
+    classOf[CompactBuffer[_]],
     classOf[BlockManagerId],
     classOf[Array[Byte]],
-    classOf[BoundedPriorityQueue[_]]
+    classOf[BoundedPriorityQueue[_]],
+    classOf[SparkConf]
   )
 }
 
diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index f2f5cea469c61..ca6e971d227fb 100644
--- a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.serializer
 
-import java.io.{ByteArrayOutputStream, EOFException, InputStream, OutputStream}
+import java.io._
 import java.nio.ByteBuffer
 
 import scala.reflect.ClassTag
 
-import org.apache.spark.SparkEnv
+import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.util.{ByteBufferInputStream, NextIterator}
+import org.apache.spark.util.{Utils, ByteBufferInputStream, NextIterator}
 
 /**
  * :: DeveloperApi ::
@@ -43,11 +43,30 @@ import org.apache.spark.util.{ByteBufferInputStream, NextIterator}
  * They are intended to be used to serialize/de-serialize data within a single Spark application.
  */
 @DeveloperApi
-trait Serializer {
+abstract class Serializer {
+
+  /**
+   * Default ClassLoader to use in deserialization. Implementations of [[Serializer]] should
+   * make sure it is using this when set.
+   */
+  @volatile protected var defaultClassLoader: Option[ClassLoader] = None
+
+  /**
+   * Sets a class loader for the serializer to use in deserialization.
+   *
+   * @return this Serializer object
+   */
+  def setDefaultClassLoader(classLoader: ClassLoader): Serializer = {
+    defaultClassLoader = Some(classLoader)
+    this
+  }
+
+  /** Creates a new [[SerializerInstance]]. */
   def newInstance(): SerializerInstance
 }
 
 
+@DeveloperApi
 object Serializer {
   def getSerializer(serializer: Serializer): Serializer = {
     if (serializer == null) SparkEnv.get.serializer else serializer
@@ -64,7 +83,7 @@ object Serializer {
  * An instance of a serializer, for use by one thread at a time.
  */
 @DeveloperApi
-trait SerializerInstance {
+abstract class SerializerInstance {
   def serialize[T: ClassTag](t: T): ByteBuffer
 
   def deserialize[T: ClassTag](bytes: ByteBuffer): T
@@ -74,21 +93,6 @@ trait SerializerInstance {
   def serializeStream(s: OutputStream): SerializationStream
 
   def deserializeStream(s: InputStream): DeserializationStream
-
-  def serializeMany[T: ClassTag](iterator: Iterator[T]): ByteBuffer = {
-    // Default implementation uses serializeStream
-    val stream = new ByteArrayOutputStream()
-    serializeStream(stream).writeAll(iterator)
-    val buffer = ByteBuffer.wrap(stream.toByteArray)
-    buffer.flip()
-    buffer
-  }
-
-  def deserializeMany(buffer: ByteBuffer): Iterator[Any] = {
-    // Default implementation uses deserializeStream
-    buffer.rewind()
-    deserializeStream(new ByteBufferInputStream(buffer)).asIterator
-  }
 }
 
 /**
@@ -96,7 +100,7 @@ trait SerializerInstance {
  * A stream for writing serialized objects.
  */
 @DeveloperApi
-trait SerializationStream {
+abstract class SerializationStream {
   def writeObject[T: ClassTag](t: T): SerializationStream
   def flush(): Unit
   def close(): Unit
@@ -115,7 +119,7 @@ trait SerializationStream {
  * A stream for reading serialized objects.
  */
 @DeveloperApi
-trait DeserializationStream {
+abstract class DeserializationStream {
   def readObject[T: ClassTag](): T
   def close(): Unit
 
diff --git a/core/src/main/scala/org/apache/spark/serializer/package-info.java b/core/src/main/scala/org/apache/spark/serializer/package-info.java
index 4c0b73ab36a00..207c6e02e4293 100644
--- a/core/src/main/scala/org/apache/spark/serializer/package-info.java
+++ b/core/src/main/scala/org/apache/spark/serializer/package-info.java
@@ -18,4 +18,4 @@
 /**
  * Pluggable serializers for RDD and shuffle data.
  */
-package org.apache.spark.serializer;
\ No newline at end of file
+package org.apache.spark.serializer;
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
index 71c08e9d5a8c3..be184464e0ae9 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
@@ -19,6 +19,7 @@ package org.apache.spark.shuffle
 
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.{FetchFailed, TaskEndReason}
+import org.apache.spark.util.Utils
 
 /**
  * Failed to fetch a shuffle block. The executor catches this exception and propagates it
@@ -30,13 +31,22 @@ private[spark] class FetchFailedException(
     bmAddress: BlockManagerId,
     shuffleId: Int,
     mapId: Int,
-    reduceId: Int)
-  extends Exception {
+    reduceId: Int,
+    message: String,
+    cause: Throwable = null)
+  extends Exception(message, cause) {
 
-  override def getMessage: String =
-    "Fetch failed: %s %d %d %d".format(bmAddress, shuffleId, mapId, reduceId)
+  def this(
+      bmAddress: BlockManagerId,
+      shuffleId: Int,
+      mapId: Int,
+      reduceId: Int,
+      cause: Throwable) {
+    this(bmAddress, shuffleId, mapId, reduceId, cause.getMessage, cause)
+  }
 
-  def toTaskEndReason: TaskEndReason = FetchFailed(bmAddress, shuffleId, mapId, reduceId)
+  def toTaskEndReason: TaskEndReason = FetchFailed(bmAddress, shuffleId, mapId, reduceId,
+    Utils.exceptionString(this))
 }
 
 /**
@@ -46,7 +56,4 @@ private[spark] class MetadataFetchFailedException(
     shuffleId: Int,
     reduceId: Int,
     message: String)
-  extends FetchFailedException(null, shuffleId, -1, reduceId) {
-
-  override def getMessage: String = message
-}
+  extends FetchFailedException(null, shuffleId, -1, reduceId, message)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
new file mode 100644
index 0000000000000..7de2f9cbb2866
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
@@ -0,0 +1,298 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import java.io.File
+import java.nio.ByteBuffer
+import java.util.concurrent.ConcurrentLinkedQueue
+import java.util.concurrent.atomic.AtomicInteger
+
+import scala.collection.JavaConversions._
+
+import org.apache.spark.{Logging, SparkConf, SparkEnv}
+import org.apache.spark.executor.ShuffleWriteMetrics
+import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.netty.SparkTransportConf
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.shuffle.FileShuffleBlockManager.ShuffleFileGroup
+import org.apache.spark.storage._
+import org.apache.spark.util.{MetadataCleaner, MetadataCleanerType, TimeStampedHashMap}
+import org.apache.spark.util.collection.{PrimitiveKeyOpenHashMap, PrimitiveVector}
+
+/** A group of writers for a ShuffleMapTask, one writer per reducer. */
+private[spark] trait ShuffleWriterGroup {
+  val writers: Array[BlockObjectWriter]
+
+  /** @param success Indicates all writes were successful. If false, no blocks will be recorded. */
+  def releaseWriters(success: Boolean)
+}
+
+/**
+ * Manages assigning disk-based block writers to shuffle tasks. Each shuffle task gets one file
+ * per reducer (this set of files is called a ShuffleFileGroup).
+ *
+ * As an optimization to reduce the number of physical shuffle files produced, multiple shuffle
+ * blocks are aggregated into the same file. There is one "combined shuffle file" per reducer
+ * per concurrently executing shuffle task. As soon as a task finishes writing to its shuffle
+ * files, it releases them for another task.
+ * Regarding the implementation of this feature, shuffle files are identified by a 3-tuple:
+ *   - shuffleId: The unique id given to the entire shuffle stage.
+ *   - bucketId: The id of the output partition (i.e., reducer id)
+ *   - fileId: The unique id identifying a group of "combined shuffle files." Only one task at a
+ *       time owns a particular fileId, and this id is returned to a pool when the task finishes.
+ * Each shuffle file is then mapped to a FileSegment, which is a 3-tuple (file, offset, length)
+ * that specifies where in a given file the actual block data is located.
+ *
+ * Shuffle file metadata is stored in a space-efficient manner. Rather than simply mapping
+ * ShuffleBlockIds directly to FileSegments, each ShuffleFileGroup maintains a list of offsets for
+ * each block stored in each file. In order to find the location of a shuffle block, we search the
+ * files within a ShuffleFileGroups associated with the block's reducer.
+ */
+// Note: Changes to the format in this file should be kept in sync with
+// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getHashBasedShuffleBlockData().
+private[spark]
+class FileShuffleBlockManager(conf: SparkConf)
+  extends ShuffleBlockManager with Logging {
+
+  private val transportConf = SparkTransportConf.fromSparkConf(conf)
+
+  private lazy val blockManager = SparkEnv.get.blockManager
+
+  // Turning off shuffle file consolidation causes all shuffle Blocks to get their own file.
+  // TODO: Remove this once the shuffle file consolidation feature is stable.
+  private val consolidateShuffleFiles =
+    conf.getBoolean("spark.shuffle.consolidateFiles", false)
+
+  private val bufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 32) * 1024
+
+  /**
+   * Contains all the state related to a particular shuffle. This includes a pool of unused
+   * ShuffleFileGroups, as well as all ShuffleFileGroups that have been created for the shuffle.
+   */
+  private class ShuffleState(val numBuckets: Int) {
+    val nextFileId = new AtomicInteger(0)
+    val unusedFileGroups = new ConcurrentLinkedQueue[ShuffleFileGroup]()
+    val allFileGroups = new ConcurrentLinkedQueue[ShuffleFileGroup]()
+
+    /**
+     * The mapIds of all map tasks completed on this Executor for this shuffle.
+     * NB: This is only populated if consolidateShuffleFiles is FALSE. We don't need it otherwise.
+     */
+    val completedMapTasks = new ConcurrentLinkedQueue[Int]()
+  }
+
+  private val shuffleStates = new TimeStampedHashMap[ShuffleId, ShuffleState]
+
+  private val metadataCleaner =
+    new MetadataCleaner(MetadataCleanerType.SHUFFLE_BLOCK_MANAGER, this.cleanup, conf)
+
+  /**
+   * Get a ShuffleWriterGroup for the given map task, which will register it as complete
+   * when the writers are closed successfully
+   */
+  def forMapTask(shuffleId: Int, mapId: Int, numBuckets: Int, serializer: Serializer,
+      writeMetrics: ShuffleWriteMetrics) = {
+    new ShuffleWriterGroup {
+      shuffleStates.putIfAbsent(shuffleId, new ShuffleState(numBuckets))
+      private val shuffleState = shuffleStates(shuffleId)
+      private var fileGroup: ShuffleFileGroup = null
+
+      val writers: Array[BlockObjectWriter] = if (consolidateShuffleFiles) {
+        fileGroup = getUnusedFileGroup()
+        Array.tabulate[BlockObjectWriter](numBuckets) { bucketId =>
+          val blockId = ShuffleBlockId(shuffleId, mapId, bucketId)
+          blockManager.getDiskWriter(blockId, fileGroup(bucketId), serializer, bufferSize,
+            writeMetrics)
+        }
+      } else {
+        Array.tabulate[BlockObjectWriter](numBuckets) { bucketId =>
+          val blockId = ShuffleBlockId(shuffleId, mapId, bucketId)
+          val blockFile = blockManager.diskBlockManager.getFile(blockId)
+          // Because of previous failures, the shuffle file may already exist on this machine.
+          // If so, remove it.
+          if (blockFile.exists) {
+            if (blockFile.delete()) {
+              logInfo(s"Removed existing shuffle file $blockFile")
+            } else {
+              logWarning(s"Failed to remove existing shuffle file $blockFile")
+            }
+          }
+          blockManager.getDiskWriter(blockId, blockFile, serializer, bufferSize, writeMetrics)
+        }
+      }
+
+      override def releaseWriters(success: Boolean) {
+        if (consolidateShuffleFiles) {
+          if (success) {
+            val offsets = writers.map(_.fileSegment().offset)
+            val lengths = writers.map(_.fileSegment().length)
+            fileGroup.recordMapOutput(mapId, offsets, lengths)
+          }
+          recycleFileGroup(fileGroup)
+        } else {
+          shuffleState.completedMapTasks.add(mapId)
+        }
+      }
+
+      private def getUnusedFileGroup(): ShuffleFileGroup = {
+        val fileGroup = shuffleState.unusedFileGroups.poll()
+        if (fileGroup != null) fileGroup else newFileGroup()
+      }
+
+      private def newFileGroup(): ShuffleFileGroup = {
+        val fileId = shuffleState.nextFileId.getAndIncrement()
+        val files = Array.tabulate[File](numBuckets) { bucketId =>
+          val filename = physicalFileName(shuffleId, bucketId, fileId)
+          blockManager.diskBlockManager.getFile(filename)
+        }
+        val fileGroup = new ShuffleFileGroup(shuffleId, fileId, files)
+        shuffleState.allFileGroups.add(fileGroup)
+        fileGroup
+      }
+
+      private def recycleFileGroup(group: ShuffleFileGroup) {
+        shuffleState.unusedFileGroups.add(group)
+      }
+    }
+  }
+
+  override def getBytes(blockId: ShuffleBlockId): Option[ByteBuffer] = {
+    val segment = getBlockData(blockId)
+    Some(segment.nioByteBuffer())
+  }
+
+  override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = {
+    if (consolidateShuffleFiles) {
+      // Search all file groups associated with this shuffle.
+      val shuffleState = shuffleStates(blockId.shuffleId)
+      val iter = shuffleState.allFileGroups.iterator
+      while (iter.hasNext) {
+        val segmentOpt = iter.next.getFileSegmentFor(blockId.mapId, blockId.reduceId)
+        if (segmentOpt.isDefined) {
+          val segment = segmentOpt.get
+          return new FileSegmentManagedBuffer(
+            transportConf, segment.file, segment.offset, segment.length)
+        }
+      }
+      throw new IllegalStateException("Failed to find shuffle block: " + blockId)
+    } else {
+      val file = blockManager.diskBlockManager.getFile(blockId)
+      new FileSegmentManagedBuffer(transportConf, file, 0, file.length)
+    }
+  }
+
+  /** Remove all the blocks / files and metadata related to a particular shuffle. */
+  def removeShuffle(shuffleId: ShuffleId): Boolean = {
+    // Do not change the ordering of this, if shuffleStates should be removed only
+    // after the corresponding shuffle blocks have been removed
+    val cleaned = removeShuffleBlocks(shuffleId)
+    shuffleStates.remove(shuffleId)
+    cleaned
+  }
+
+  /** Remove all the blocks / files related to a particular shuffle. */
+  private def removeShuffleBlocks(shuffleId: ShuffleId): Boolean = {
+    shuffleStates.get(shuffleId) match {
+      case Some(state) =>
+        if (consolidateShuffleFiles) {
+          for (fileGroup <- state.allFileGroups; file <- fileGroup.files) {
+            file.delete()
+          }
+        } else {
+          for (mapId <- state.completedMapTasks; reduceId <- 0 until state.numBuckets) {
+            val blockId = new ShuffleBlockId(shuffleId, mapId, reduceId)
+            blockManager.diskBlockManager.getFile(blockId).delete()
+          }
+        }
+        logInfo("Deleted all files for shuffle " + shuffleId)
+        true
+      case None =>
+        logInfo("Could not find files for shuffle " + shuffleId + " for deleting")
+        false
+    }
+  }
+
+  private def physicalFileName(shuffleId: Int, bucketId: Int, fileId: Int) = {
+    "merged_shuffle_%d_%d_%d".format(shuffleId, bucketId, fileId)
+  }
+
+  private def cleanup(cleanupTime: Long) {
+    shuffleStates.clearOldValues(cleanupTime, (shuffleId, state) => removeShuffleBlocks(shuffleId))
+  }
+
+  override def stop() {
+    metadataCleaner.cancel()
+  }
+}
+
+private[spark]
+object FileShuffleBlockManager {
+  /**
+   * A group of shuffle files, one per reducer.
+   * A particular mapper will be assigned a single ShuffleFileGroup to write its output to.
+   */
+  private class ShuffleFileGroup(val shuffleId: Int, val fileId: Int, val files: Array[File]) {
+    private var numBlocks: Int = 0
+
+    /**
+     * Stores the absolute index of each mapId in the files of this group. For instance,
+     * if mapId 5 is the first block in each file, mapIdToIndex(5) = 0.
+     */
+    private val mapIdToIndex = new PrimitiveKeyOpenHashMap[Int, Int]()
+
+    /**
+     * Stores consecutive offsets and lengths of blocks into each reducer file, ordered by
+     * position in the file.
+     * Note: mapIdToIndex(mapId) returns the index of the mapper into the vector for every
+     * reducer.
+     */
+    private val blockOffsetsByReducer = Array.fill[PrimitiveVector[Long]](files.length) {
+      new PrimitiveVector[Long]()
+    }
+    private val blockLengthsByReducer = Array.fill[PrimitiveVector[Long]](files.length) {
+      new PrimitiveVector[Long]()
+    }
+
+    def apply(bucketId: Int) = files(bucketId)
+
+    def recordMapOutput(mapId: Int, offsets: Array[Long], lengths: Array[Long]) {
+      assert(offsets.length == lengths.length)
+      mapIdToIndex(mapId) = numBlocks
+      numBlocks += 1
+      for (i <- 0 until offsets.length) {
+        blockOffsetsByReducer(i) += offsets(i)
+        blockLengthsByReducer(i) += lengths(i)
+      }
+    }
+
+    /** Returns the FileSegment associated with the given map task, or None if no entry exists. */
+    def getFileSegmentFor(mapId: Int, reducerId: Int): Option[FileSegment] = {
+      val file = files(reducerId)
+      val blockOffsets = blockOffsetsByReducer(reducerId)
+      val blockLengths = blockLengthsByReducer(reducerId)
+      val index = mapIdToIndex.getOrElse(mapId, -1)
+      if (index >= 0) {
+        val offset = blockOffsets(index)
+        val length = blockLengths(index)
+        Some(new FileSegment(file, offset, length))
+      } else {
+        None
+      }
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
new file mode 100644
index 0000000000000..b292587d37028
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import java.io._
+import java.nio.ByteBuffer
+
+import com.google.common.io.ByteStreams
+
+import org.apache.spark.{SparkConf, SparkEnv}
+import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.netty.SparkTransportConf
+import org.apache.spark.storage._
+
+/**
+ * Create and maintain the shuffle blocks' mapping between logic block and physical file location.
+ * Data of shuffle blocks from the same map task are stored in a single consolidated data file.
+ * The offsets of the data blocks in the data file are stored in a separate index file.
+ *
+ * We use the name of the shuffle data's shuffleBlockId with reduce ID set to 0 and add ".data"
+ * as the filename postfix for data file, and ".index" as the filename postfix for index file.
+ *
+ */
+// Note: Changes to the format in this file should be kept in sync with
+// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getSortBasedShuffleBlockData().
+private[spark]
+class IndexShuffleBlockManager(conf: SparkConf) extends ShuffleBlockManager {
+
+  private lazy val blockManager = SparkEnv.get.blockManager
+
+  private val transportConf = SparkTransportConf.fromSparkConf(conf)
+
+  /**
+   * Mapping to a single shuffleBlockId with reduce ID 0.
+   * */
+  def consolidateId(shuffleId: Int, mapId: Int): ShuffleBlockId = {
+    ShuffleBlockId(shuffleId, mapId, 0)
+  }
+
+  def getDataFile(shuffleId: Int, mapId: Int): File = {
+    blockManager.diskBlockManager.getFile(ShuffleDataBlockId(shuffleId, mapId, 0))
+  }
+
+  private def getIndexFile(shuffleId: Int, mapId: Int): File = {
+    blockManager.diskBlockManager.getFile(ShuffleIndexBlockId(shuffleId, mapId, 0))
+  }
+
+  /**
+   * Remove data file and index file that contain the output data from one map.
+   * */
+  def removeDataByMap(shuffleId: Int, mapId: Int): Unit = {
+    var file = getDataFile(shuffleId, mapId)
+    if (file.exists()) {
+      file.delete()
+    }
+
+    file = getIndexFile(shuffleId, mapId)
+    if (file.exists()) {
+      file.delete()
+    }
+  }
+
+  /**
+   * Write an index file with the offsets of each block, plus a final offset at the end for the
+   * end of the output file. This will be used by getBlockLocation to figure out where each block
+   * begins and ends.
+   * */
+  def writeIndexFile(shuffleId: Int, mapId: Int, lengths: Array[Long]) = {
+    val indexFile = getIndexFile(shuffleId, mapId)
+    val out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(indexFile)))
+    try {
+      // We take in lengths of each block, need to convert it to offsets.
+      var offset = 0L
+      out.writeLong(offset)
+
+      for (length <- lengths) {
+        offset += length
+        out.writeLong(offset)
+      }
+    } finally {
+      out.close()
+    }
+  }
+
+  override def getBytes(blockId: ShuffleBlockId): Option[ByteBuffer] = {
+    Some(getBlockData(blockId).nioByteBuffer())
+  }
+
+  override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = {
+    // The block is actually going to be a range of a single map output file for this map, so
+    // find out the consolidated file, then the offset within that from our index
+    val indexFile = getIndexFile(blockId.shuffleId, blockId.mapId)
+
+    val in = new DataInputStream(new FileInputStream(indexFile))
+    try {
+      ByteStreams.skipFully(in, blockId.reduceId * 8)
+      val offset = in.readLong()
+      val nextOffset = in.readLong()
+      new FileSegmentManagedBuffer(
+        transportConf,
+        getDataFile(blockId.shuffleId, blockId.mapId),
+        offset,
+        nextOffset - offset)
+    } finally {
+      in.close()
+    }
+  }
+
+  override def stop() = {}
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
new file mode 100644
index 0000000000000..b521f0c7fc77e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import java.nio.ByteBuffer
+import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.storage.ShuffleBlockId
+
+private[spark]
+trait ShuffleBlockManager {
+  type ShuffleId = Int
+
+  /**
+   * Get shuffle block data managed by the local ShuffleBlockManager.
+   * @return Some(ByteBuffer) if block found, otherwise None.
+   */
+  def getBytes(blockId: ShuffleBlockId): Option[ByteBuffer]
+
+  def getBlockData(blockId: ShuffleBlockId): ManagedBuffer
+
+  def stop(): Unit
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
index 9c859b8b4a118..801ae54086053 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
@@ -49,8 +49,13 @@ private[spark] trait ShuffleManager {
       endPartition: Int,
       context: TaskContext): ShuffleReader[K, C]
 
-  /** Remove a shuffle's metadata from the ShuffleManager. */
-  def unregisterShuffle(shuffleId: Int)
+  /**
+    * Remove a shuffle's metadata from the ShuffleManager.
+    * @return true if the metadata removed successfully, otherwise false.
+    */
+  def unregisterShuffle(shuffleId: Int): Boolean
+
+  def shuffleBlockManager: ShuffleBlockManager
 
   /** Shut down this ShuffleManager. */
   def stop(): Unit
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleMemoryManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleMemoryManager.scala
new file mode 100644
index 0000000000000..ee91a368b76ea
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleMemoryManager.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import scala.collection.mutable
+
+import org.apache.spark.{Logging, SparkException, SparkConf}
+
+/**
+ * Allocates a pool of memory to task threads for use in shuffle operations. Each disk-spilling
+ * collection (ExternalAppendOnlyMap or ExternalSorter) used by these tasks can acquire memory
+ * from this pool and release it as it spills data out. When a task ends, all its memory will be
+ * released by the Executor.
+ *
+ * This class tries to ensure that each thread gets a reasonable share of memory, instead of some
+ * thread ramping up to a large amount first and then causing others to spill to disk repeatedly.
+ * If there are N threads, it ensures that each thread can acquire at least 1 / 2N of the memory
+ * before it has to spill, and at most 1 / N. Because N varies dynamically, we keep track of the
+ * set of active threads and redo the calculations of 1 / 2N and 1 / N in waiting threads whenever
+ * this set changes. This is all done by synchronizing access on "this" to mutate state and using
+ * wait() and notifyAll() to signal changes.
+ */
+private[spark] class ShuffleMemoryManager(maxMemory: Long) extends Logging {
+  private val threadMemory = new mutable.HashMap[Long, Long]()  // threadId -> memory bytes
+
+  def this(conf: SparkConf) = this(ShuffleMemoryManager.getMaxMemory(conf))
+
+  /**
+   * Try to acquire up to numBytes memory for the current thread, and return the number of bytes
+   * obtained, or 0 if none can be allocated. This call may block until there is enough free memory
+   * in some situations, to make sure each thread has a chance to ramp up to at least 1 / 2N of the
+   * total memory pool (where N is the # of active threads) before it is forced to spill. This can
+   * happen if the number of threads increases but an older thread had a lot of memory already.
+   */
+  def tryToAcquire(numBytes: Long): Long = synchronized {
+    val threadId = Thread.currentThread().getId
+    assert(numBytes > 0, "invalid number of bytes requested: " + numBytes)
+
+    // Add this thread to the threadMemory map just so we can keep an accurate count of the number
+    // of active threads, to let other threads ramp down their memory in calls to tryToAcquire
+    if (!threadMemory.contains(threadId)) {
+      threadMemory(threadId) = 0L
+      notifyAll()  // Will later cause waiting threads to wake up and check numThreads again
+    }
+
+    // Keep looping until we're either sure that we don't want to grant this request (because this
+    // thread would have more than 1 / numActiveThreads of the memory) or we have enough free
+    // memory to give it (we always let each thread get at least 1 / (2 * numActiveThreads)).
+    while (true) {
+      val numActiveThreads = threadMemory.keys.size
+      val curMem = threadMemory(threadId)
+      val freeMemory = maxMemory - threadMemory.values.sum
+
+      // How much we can grant this thread; don't let it grow to more than 1 / numActiveThreads
+      val maxToGrant = math.min(numBytes, (maxMemory / numActiveThreads) - curMem)
+
+      if (curMem < maxMemory / (2 * numActiveThreads)) {
+        // We want to let each thread get at least 1 / (2 * numActiveThreads) before blocking;
+        // if we can't give it this much now, wait for other threads to free up memory
+        // (this happens if older threads allocated lots of memory before N grew)
+        if (freeMemory >= math.min(maxToGrant, maxMemory / (2 * numActiveThreads) - curMem)) {
+          val toGrant = math.min(maxToGrant, freeMemory)
+          threadMemory(threadId) += toGrant
+          return toGrant
+        } else {
+          logInfo(s"Thread $threadId waiting for at least 1/2N of shuffle memory pool to be free")
+          wait()
+        }
+      } else {
+        // Only give it as much memory as is free, which might be none if it reached 1 / numThreads
+        val toGrant = math.min(maxToGrant, freeMemory)
+        threadMemory(threadId) += toGrant
+        return toGrant
+      }
+    }
+    0L  // Never reached
+  }
+
+  /** Release numBytes bytes for the current thread. */
+  def release(numBytes: Long): Unit = synchronized {
+    val threadId = Thread.currentThread().getId
+    val curMem = threadMemory.getOrElse(threadId, 0L)
+    if (curMem < numBytes) {
+      throw new SparkException(
+        s"Internal error: release called on ${numBytes} bytes but thread only has ${curMem}")
+    }
+    threadMemory(threadId) -= numBytes
+    notifyAll()  // Notify waiters who locked "this" in tryToAcquire that memory has been freed
+  }
+
+  /** Release all memory for the current thread and mark it as inactive (e.g. when a task ends). */
+  def releaseMemoryForThisThread(): Unit = synchronized {
+    val threadId = Thread.currentThread().getId
+    threadMemory.remove(threadId)
+    notifyAll()  // Notify waiters who locked "this" in tryToAcquire that memory has been freed
+  }
+}
+
+private object ShuffleMemoryManager {
+  /**
+   * Figure out the shuffle memory limit from a SparkConf. We currently have both a fraction
+   * of the memory pool and a safety factor since collections can sometimes grow bigger than
+   * the size we target before we estimate their sizes again.
+   */
+  def getMaxMemory(conf: SparkConf): Long = {
+    val memoryFraction = conf.getDouble("spark.shuffle.memoryFraction", 0.2)
+    val safetyFraction = conf.getDouble("spark.shuffle.safetyFraction", 0.8)
+    (Runtime.getRuntime.maxMemory * memoryFraction * safetyFraction).toLong
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleReader.scala
index b30e366d06006..292e48314ee10 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleReader.scala
@@ -24,6 +24,10 @@ private[spark] trait ShuffleReader[K, C] {
   /** Read the combined key-values for this reduce task */
   def read(): Iterator[Product2[K, C]]
 
-  /** Close this reader */
-  def stop(): Unit
+  /**
+   * Close this reader.
+   * TODO: Add this back when we make the ShuffleReader a developer API that others can implement
+   * (at which point this will likely be necessary).
+   */
+  // def stop(): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala
index a932455776e34..e3e7434df45b0 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala
@@ -19,12 +19,12 @@ package org.apache.spark.shuffle.hash
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
+import scala.util.{Failure, Success, Try}
 
 import org.apache.spark._
-import org.apache.spark.executor.ShuffleReadMetrics
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.FetchFailedException
-import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockId}
+import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockFetcherIterator, ShuffleBlockId}
 import org.apache.spark.util.CompletionIterator
 
 private[hash] object BlockStoreShuffleFetcher extends Logging {
@@ -53,38 +53,37 @@ private[hash] object BlockStoreShuffleFetcher extends Logging {
         (address, splits.map(s => (ShuffleBlockId(shuffleId, s._1, reduceId), s._2)))
     }
 
-    def unpackBlock(blockPair: (BlockId, Option[Iterator[Any]])) : Iterator[T] = {
+    def unpackBlock(blockPair: (BlockId, Try[Iterator[Any]])) : Iterator[T] = {
       val blockId = blockPair._1
       val blockOption = blockPair._2
       blockOption match {
-        case Some(block) => {
+        case Success(block) => {
           block.asInstanceOf[Iterator[T]]
         }
-        case None => {
+        case Failure(e) => {
           blockId match {
             case ShuffleBlockId(shufId, mapId, _) =>
               val address = statuses(mapId.toInt)._1
-              throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId)
+              throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e)
             case _ =>
               throw new SparkException(
-                "Failed to get block " + blockId + ", which is not a shuffle block")
+                "Failed to get block " + blockId + ", which is not a shuffle block", e)
           }
         }
       }
     }
 
-    val blockFetcherItr = blockManager.getMultiple(blocksByAddress, serializer)
+    val blockFetcherItr = new ShuffleBlockFetcherIterator(
+      context,
+      SparkEnv.get.blockManager.shuffleClient,
+      blockManager,
+      blocksByAddress,
+      serializer,
+      SparkEnv.get.conf.getLong("spark.reducer.maxMbInFlight", 48) * 1024 * 1024)
     val itr = blockFetcherItr.flatMap(unpackBlock)
 
     val completionIter = CompletionIterator[T, Iterator[T]](itr, {
-      val shuffleMetrics = new ShuffleReadMetrics
-      shuffleMetrics.shuffleFinishTime = System.currentTimeMillis
-      shuffleMetrics.fetchWaitTime = blockFetcherItr.fetchWaitTime
-      shuffleMetrics.remoteBytesRead = blockFetcherItr.remoteBytesRead
-      shuffleMetrics.totalBlocksFetched = blockFetcherItr.totalBlocks
-      shuffleMetrics.localBlocksFetched = blockFetcherItr.numLocalBlocks
-      shuffleMetrics.remoteBlocksFetched = blockFetcherItr.numRemoteBlocks
-      context.taskMetrics.shuffleReadMetrics = Some(shuffleMetrics)
+      context.taskMetrics.updateShuffleReadMetrics()
     })
 
     new InterruptibleIterator[T](context, completionIter)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
index 5b0940ecce29d..62e0629b34400 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
@@ -24,7 +24,10 @@ import org.apache.spark.shuffle._
  * A ShuffleManager using hashing, that creates one output file per reduce partition on each
  * mapper (possibly reusing these across waves of tasks).
  */
-class HashShuffleManager(conf: SparkConf) extends ShuffleManager {
+private[spark] class HashShuffleManager(conf: SparkConf) extends ShuffleManager {
+
+  private val fileShuffleBlockManager = new FileShuffleBlockManager(conf)
+
   /* Register a shuffle with the manager and obtain a handle for it to pass to tasks. */
   override def registerShuffle[K, V, C](
       shuffleId: Int,
@@ -49,12 +52,21 @@ class HashShuffleManager(conf: SparkConf) extends ShuffleManager {
   /** Get a writer for a given partition. Called on executors by map tasks. */
   override def getWriter[K, V](handle: ShuffleHandle, mapId: Int, context: TaskContext)
       : ShuffleWriter[K, V] = {
-    new HashShuffleWriter(handle.asInstanceOf[BaseShuffleHandle[K, V, _]], mapId, context)
+    new HashShuffleWriter(
+      shuffleBlockManager, handle.asInstanceOf[BaseShuffleHandle[K, V, _]], mapId, context)
   }
 
   /** Remove a shuffle's metadata from the ShuffleManager. */
-  override def unregisterShuffle(shuffleId: Int): Unit = {}
+  override def unregisterShuffle(shuffleId: Int): Boolean = {
+    shuffleBlockManager.removeShuffle(shuffleId)
+  }
+
+  override def shuffleBlockManager: FileShuffleBlockManager = {
+    fileShuffleBlockManager
+  }
 
   /** Shut down this ShuffleManager. */
-  override def stop(): Unit = {}
+  override def stop(): Unit = {
+    shuffleBlockManager.stop()
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala
index d45258c0a492b..5baf45db45c17 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala
@@ -20,8 +20,9 @@ package org.apache.spark.shuffle.hash
 import org.apache.spark.{InterruptibleIterator, TaskContext}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.{BaseShuffleHandle, ShuffleReader}
+import org.apache.spark.util.collection.ExternalSorter
 
-class HashShuffleReader[K, C](
+private[spark] class HashShuffleReader[K, C](
     handle: BaseShuffleHandle[K, _, C],
     startPartition: Int,
     endPartition: Int,
@@ -35,10 +36,10 @@ class HashShuffleReader[K, C](
 
   /** Read the combined key-values for this reduce task */
   override def read(): Iterator[Product2[K, C]] = {
-    val iter = BlockStoreShuffleFetcher.fetch(handle.shuffleId, startPartition, context,
-      Serializer.getSerializer(dep.serializer))
+    val ser = Serializer.getSerializer(dep.serializer)
+    val iter = BlockStoreShuffleFetcher.fetch(handle.shuffleId, startPartition, context, ser)
 
-    if (dep.aggregator.isDefined) {
+    val aggregatedIter: Iterator[Product2[K, C]] = if (dep.aggregator.isDefined) {
       if (dep.mapSideCombine) {
         new InterruptibleIterator(context, dep.aggregator.get.combineCombinersByKey(iter, context))
       } else {
@@ -47,10 +48,22 @@ class HashShuffleReader[K, C](
     } else if (dep.aggregator.isEmpty && dep.mapSideCombine) {
       throw new IllegalStateException("Aggregator is empty for map-side combine")
     } else {
-      iter
+      // Convert the Product2s to pairs since this is what downstream RDDs currently expect
+      iter.asInstanceOf[Iterator[Product2[K, C]]].map(pair => (pair._1, pair._2))
     }
-  }
 
-  /** Close this reader */
-  override def stop(): Unit = ???
+    // Sort the output if there is a sort ordering defined.
+    dep.keyOrdering match {
+      case Some(keyOrd: Ordering[K]) =>
+        // Create an ExternalSorter to sort the data. Note that if spark.shuffle.spill is disabled,
+        // the ExternalSorter won't spill to disk.
+        val sorter = new ExternalSorter[K, C, C](ordering = Some(keyOrd), serializer = Some(ser))
+        sorter.insertAll(aggregatedIter)
+        context.taskMetrics.memoryBytesSpilled += sorter.memoryBytesSpilled
+        context.taskMetrics.diskBytesSpilled += sorter.diskBytesSpilled
+        sorter.iterator
+      case None =>
+        aggregatedIter
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
index 9b78228519da4..183a30373b28c 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
@@ -17,14 +17,15 @@
 
 package org.apache.spark.shuffle.hash
 
-import org.apache.spark.shuffle.{BaseShuffleHandle, ShuffleWriter}
-import org.apache.spark.{Logging, MapOutputTracker, SparkEnv, TaskContext}
-import org.apache.spark.storage.{BlockObjectWriter}
-import org.apache.spark.serializer.Serializer
+import org.apache.spark._
 import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.shuffle._
+import org.apache.spark.storage.BlockObjectWriter
 
-class HashShuffleWriter[K, V](
+private[spark] class HashShuffleWriter[K, V](
+    shuffleBlockManager: FileShuffleBlockManager,
     handle: BaseShuffleHandle[K, V, _],
     mapId: Int,
     context: TaskContext)
@@ -33,12 +34,19 @@ class HashShuffleWriter[K, V](
   private val dep = handle.dependency
   private val numOutputSplits = dep.partitioner.numPartitions
   private val metrics = context.taskMetrics
+
+  // Are we in the process of stopping? Because map tasks can call stop() with success = true
+  // and then call stop() with success = false if they get an exception, we want to make sure
+  // we don't try deleting files, etc twice.
   private var stopping = false
 
+  private val writeMetrics = new ShuffleWriteMetrics()
+  metrics.shuffleWriteMetrics = Some(writeMetrics)
+
   private val blockManager = SparkEnv.get.blockManager
-  private val shuffleBlockManager = blockManager.shuffleBlockManager
   private val ser = Serializer.getSerializer(dep.serializer.getOrElse(null))
-  private val shuffle = shuffleBlockManager.forMapTask(dep.shuffleId, mapId, numOutputSplits, ser)
+  private val shuffle = shuffleBlockManager.forMapTask(dep.shuffleId, mapId, numOutputSplits, ser,
+    writeMetrics)
 
   /** Write a bunch of records to this task's output */
   override def write(records: Iterator[_ <: Product2[K, V]]): Unit = {
@@ -61,7 +69,8 @@ class HashShuffleWriter[K, V](
   }
 
   /** Close this writer, passing along whether the map completed */
-  override def stop(success: Boolean): Option[MapStatus] = {
+  override def stop(initiallySuccess: Boolean): Option[MapStatus] = {
+    var success = initiallySuccess
     try {
       if (stopping) {
         return None
@@ -69,15 +78,16 @@ class HashShuffleWriter[K, V](
       stopping = true
       if (success) {
         try {
-          return Some(commitWritesAndBuildStatus())
+          Some(commitWritesAndBuildStatus())
         } catch {
           case e: Exception =>
+            success = false
             revertWrites()
             throw e
         }
       } else {
         revertWrites()
-        return None
+        None
       }
     } finally {
       // Release the writers back to the shuffle block manager.
@@ -93,31 +103,17 @@ class HashShuffleWriter[K, V](
 
   private def commitWritesAndBuildStatus(): MapStatus = {
     // Commit the writes. Get the size of each bucket block (total block size).
-    var totalBytes = 0L
-    var totalTime = 0L
-    val compressedSizes = shuffle.writers.map { writer: BlockObjectWriter =>
-      writer.commit()
-      writer.close()
-      val size = writer.fileSegment().length
-      totalBytes += size
-      totalTime += writer.timeWriting()
-      MapOutputTracker.compressSize(size)
+    val sizes: Array[Long] = shuffle.writers.map { writer: BlockObjectWriter =>
+      writer.commitAndClose()
+      writer.fileSegment().length
     }
-
-    // Update shuffle metrics.
-    val shuffleMetrics = new ShuffleWriteMetrics
-    shuffleMetrics.shuffleBytesWritten = totalBytes
-    shuffleMetrics.shuffleWriteTime = totalTime
-    metrics.shuffleWriteMetrics = Some(shuffleMetrics)
-
-    new MapStatus(blockManager.blockManagerId, compressedSizes)
+    MapStatus(blockManager.shuffleServerId, sizes)
   }
 
   private def revertWrites(): Unit = {
     if (shuffle != null && shuffle.writers != null) {
       for (writer <- shuffle.writers) {
-        writer.revertPartialWrites()
-        writer.close()
+        writer.revertPartialWritesAndClose()
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
new file mode 100644
index 0000000000000..bda30a56d808e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.sort
+
+import java.util.concurrent.ConcurrentHashMap
+
+import org.apache.spark.{SparkConf, TaskContext, ShuffleDependency}
+import org.apache.spark.shuffle._
+import org.apache.spark.shuffle.hash.HashShuffleReader
+
+private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager {
+
+  private val indexShuffleBlockManager = new IndexShuffleBlockManager(conf)
+  private val shuffleMapNumber = new ConcurrentHashMap[Int, Int]()
+
+  /**
+   * Register a shuffle with the manager and obtain a handle for it to pass to tasks.
+   */
+  override def registerShuffle[K, V, C](
+      shuffleId: Int,
+      numMaps: Int,
+      dependency: ShuffleDependency[K, V, C]): ShuffleHandle = {
+    new BaseShuffleHandle(shuffleId, numMaps, dependency)
+  }
+
+  /**
+   * Get a reader for a range of reduce partitions (startPartition to endPartition-1, inclusive).
+   * Called on executors by reduce tasks.
+   */
+  override def getReader[K, C](
+      handle: ShuffleHandle,
+      startPartition: Int,
+      endPartition: Int,
+      context: TaskContext): ShuffleReader[K, C] = {
+    // We currently use the same block store shuffle fetcher as the hash-based shuffle.
+    new HashShuffleReader(
+      handle.asInstanceOf[BaseShuffleHandle[K, _, C]], startPartition, endPartition, context)
+  }
+
+  /** Get a writer for a given partition. Called on executors by map tasks. */
+  override def getWriter[K, V](handle: ShuffleHandle, mapId: Int, context: TaskContext)
+      : ShuffleWriter[K, V] = {
+    val baseShuffleHandle = handle.asInstanceOf[BaseShuffleHandle[K, V, _]]
+    shuffleMapNumber.putIfAbsent(baseShuffleHandle.shuffleId, baseShuffleHandle.numMaps)
+    new SortShuffleWriter(
+      shuffleBlockManager, baseShuffleHandle, mapId, context)
+  }
+
+  /** Remove a shuffle's metadata from the ShuffleManager. */
+  override def unregisterShuffle(shuffleId: Int): Boolean = {
+    if (shuffleMapNumber.containsKey(shuffleId)) {
+      val numMaps = shuffleMapNumber.remove(shuffleId)
+      (0 until numMaps).map{ mapId =>
+        shuffleBlockManager.removeDataByMap(shuffleId, mapId)
+      }
+    }
+    true
+  }
+
+  override def shuffleBlockManager: IndexShuffleBlockManager = {
+    indexShuffleBlockManager
+  }
+
+  /** Shut down this ShuffleManager. */
+  override def stop(): Unit = {
+    shuffleBlockManager.stop()
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
new file mode 100644
index 0000000000000..d75f9d7311fad
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.sort
+
+import org.apache.spark.{MapOutputTracker, SparkEnv, Logging, TaskContext}
+import org.apache.spark.executor.ShuffleWriteMetrics
+import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.shuffle.{IndexShuffleBlockManager, ShuffleWriter, BaseShuffleHandle}
+import org.apache.spark.storage.ShuffleBlockId
+import org.apache.spark.util.collection.ExternalSorter
+
+private[spark] class SortShuffleWriter[K, V, C](
+    shuffleBlockManager: IndexShuffleBlockManager,
+    handle: BaseShuffleHandle[K, V, C],
+    mapId: Int,
+    context: TaskContext)
+  extends ShuffleWriter[K, V] with Logging {
+
+  private val dep = handle.dependency
+
+  private val blockManager = SparkEnv.get.blockManager
+
+  private var sorter: ExternalSorter[K, V, _] = null
+
+  // Are we in the process of stopping? Because map tasks can call stop() with success = true
+  // and then call stop() with success = false if they get an exception, we want to make sure
+  // we don't try deleting files, etc twice.
+  private var stopping = false
+
+  private var mapStatus: MapStatus = null
+
+  private val writeMetrics = new ShuffleWriteMetrics()
+  context.taskMetrics.shuffleWriteMetrics = Some(writeMetrics)
+
+  /** Write a bunch of records to this task's output */
+  override def write(records: Iterator[_ <: Product2[K, V]]): Unit = {
+    if (dep.mapSideCombine) {
+      if (!dep.aggregator.isDefined) {
+        throw new IllegalStateException("Aggregator is empty for map-side combine")
+      }
+      sorter = new ExternalSorter[K, V, C](
+        dep.aggregator, Some(dep.partitioner), dep.keyOrdering, dep.serializer)
+      sorter.insertAll(records)
+    } else {
+      // In this case we pass neither an aggregator nor an ordering to the sorter, because we don't
+      // care whether the keys get sorted in each partition; that will be done on the reduce side
+      // if the operation being run is sortByKey.
+      sorter = new ExternalSorter[K, V, V](
+        None, Some(dep.partitioner), None, dep.serializer)
+      sorter.insertAll(records)
+    }
+
+    val outputFile = shuffleBlockManager.getDataFile(dep.shuffleId, mapId)
+    val blockId = shuffleBlockManager.consolidateId(dep.shuffleId, mapId)
+    val partitionLengths = sorter.writePartitionedFile(blockId, context, outputFile)
+    shuffleBlockManager.writeIndexFile(dep.shuffleId, mapId, partitionLengths)
+
+    mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths)
+  }
+
+  /** Close this writer, passing along whether the map completed */
+  override def stop(success: Boolean): Option[MapStatus] = {
+    try {
+      if (stopping) {
+        return None
+      }
+      stopping = true
+      if (success) {
+        return Option(mapStatus)
+      } else {
+        // The map task failed, so delete our output data.
+        shuffleBlockManager.removeDataByMap(dep.shuffleId, mapId)
+        return None
+      }
+    } finally {
+      // Clean up our sorter, which may have its own intermediate files
+      if (sorter != null) {
+        sorter.stop()
+        sorter = null
+      }
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
deleted file mode 100644
index 408a797088059..0000000000000
--- a/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.storage
-
-import java.util.concurrent.LinkedBlockingQueue
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashSet
-import scala.collection.mutable.Queue
-
-import io.netty.buffer.ByteBuf
-
-import org.apache.spark.{Logging, SparkException}
-import org.apache.spark.network.BufferMessage
-import org.apache.spark.network.ConnectionManagerId
-import org.apache.spark.network.netty.ShuffleCopier
-import org.apache.spark.serializer.Serializer
-import org.apache.spark.util.Utils
-
-/**
- * A block fetcher iterator interface. There are two implementations:
- *
- * BasicBlockFetcherIterator: uses a custom-built NIO communication layer.
- * NettyBlockFetcherIterator: uses Netty (OIO) as the communication layer.
- *
- * Eventually we would like the two to converge and use a single NIO-based communication layer,
- * but extensive tests show that under some circumstances (e.g. large shuffles with lots of cores),
- * NIO would perform poorly and thus the need for the Netty OIO one.
- */
-
-private[storage]
-trait BlockFetcherIterator extends Iterator[(BlockId, Option[Iterator[Any]])] with Logging {
-  def initialize()
-  def totalBlocks: Int
-  def numLocalBlocks: Int
-  def numRemoteBlocks: Int
-  def fetchWaitTime: Long
-  def remoteBytesRead: Long
-}
-
-
-private[storage]
-object BlockFetcherIterator {
-
-  // A request to fetch one or more blocks, complete with their sizes
-  class FetchRequest(val address: BlockManagerId, val blocks: Seq[(BlockId, Long)]) {
-    val size = blocks.map(_._2).sum
-  }
-
-  // A result of a fetch. Includes the block ID, size in bytes, and a function to deserialize
-  // the block (since we want all deserializaton to happen in the calling thread); can also
-  // represent a fetch failure if size == -1.
-  class FetchResult(val blockId: BlockId, val size: Long, val deserialize: () => Iterator[Any]) {
-    def failed: Boolean = size == -1
-  }
-
-  class BasicBlockFetcherIterator(
-      private val blockManager: BlockManager,
-      val blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])],
-      serializer: Serializer)
-    extends BlockFetcherIterator {
-
-    import blockManager._
-
-    private var _remoteBytesRead = 0L
-    private var _fetchWaitTime = 0L
-
-    if (blocksByAddress == null) {
-      throw new IllegalArgumentException("BlocksByAddress is null")
-    }
-
-    // Total number blocks fetched (local + remote). Also number of FetchResults expected
-    protected var _numBlocksToFetch = 0
-
-    protected var startTime = System.currentTimeMillis
-
-    // This represents the number of local blocks, also counting zero-sized blocks
-    private var numLocal = 0
-    // BlockIds for local blocks that need to be fetched. Excludes zero-sized blocks
-    protected val localBlocksToFetch = new ArrayBuffer[BlockId]()
-
-    // This represents the number of remote blocks, also counting zero-sized blocks
-    private var numRemote = 0
-    // BlockIds for remote blocks that need to be fetched. Excludes zero-sized blocks
-    protected val remoteBlocksToFetch = new HashSet[BlockId]()
-
-    // A queue to hold our results.
-    protected val results = new LinkedBlockingQueue[FetchResult]
-
-    // Queue of fetch requests to issue; we'll pull requests off this gradually to make sure that
-    // the number of bytes in flight is limited to maxBytesInFlight
-    private val fetchRequests = new Queue[FetchRequest]
-
-    // Current bytes in flight from our requests
-    private var bytesInFlight = 0L
-
-    protected def sendRequest(req: FetchRequest) {
-      logDebug("Sending request for %d blocks (%s) from %s".format(
-        req.blocks.size, Utils.bytesToString(req.size), req.address.hostPort))
-      val cmId = new ConnectionManagerId(req.address.host, req.address.port)
-      val blockMessageArray = new BlockMessageArray(req.blocks.map {
-        case (blockId, size) => BlockMessage.fromGetBlock(GetBlock(blockId))
-      })
-      bytesInFlight += req.size
-      val sizeMap = req.blocks.toMap  // so we can look up the size of each blockID
-      val future = connectionManager.sendMessageReliably(cmId, blockMessageArray.toBufferMessage)
-      future.onSuccess {
-        case Some(message) => {
-          val bufferMessage = message.asInstanceOf[BufferMessage]
-          val blockMessageArray = BlockMessageArray.fromBufferMessage(bufferMessage)
-          for (blockMessage <- blockMessageArray) {
-            if (blockMessage.getType != BlockMessage.TYPE_GOT_BLOCK) {
-              throw new SparkException(
-                "Unexpected message " + blockMessage.getType + " received from " + cmId)
-            }
-            val blockId = blockMessage.getId
-            val networkSize = blockMessage.getData.limit()
-            results.put(new FetchResult(blockId, sizeMap(blockId),
-              () => dataDeserialize(blockId, blockMessage.getData, serializer)))
-            _remoteBytesRead += networkSize
-            logDebug("Got remote block " + blockId + " after " + Utils.getUsedTimeMs(startTime))
-          }
-        }
-        case None => {
-          logError("Could not get block(s) from " + cmId)
-          for ((blockId, size) <- req.blocks) {
-            results.put(new FetchResult(blockId, -1, null))
-          }
-        }
-      }
-    }
-
-    protected def splitLocalRemoteBlocks(): ArrayBuffer[FetchRequest] = {
-      // Make remote requests at most maxBytesInFlight / 5 in length; the reason to keep them
-      // smaller than maxBytesInFlight is to allow multiple, parallel fetches from up to 5
-      // nodes, rather than blocking on reading output from one node.
-      val targetRequestSize = math.max(maxBytesInFlight / 5, 1L)
-      logInfo("maxBytesInFlight: " + maxBytesInFlight + ", targetRequestSize: " + targetRequestSize)
-
-      // Split local and remote blocks. Remote blocks are further split into FetchRequests of size
-      // at most maxBytesInFlight in order to limit the amount of data in flight.
-      val remoteRequests = new ArrayBuffer[FetchRequest]
-      for ((address, blockInfos) <- blocksByAddress) {
-        if (address == blockManagerId) {
-          numLocal = blockInfos.size
-          // Filter out zero-sized blocks
-          localBlocksToFetch ++= blockInfos.filter(_._2 != 0).map(_._1)
-          _numBlocksToFetch += localBlocksToFetch.size
-        } else {
-          numRemote += blockInfos.size
-          val iterator = blockInfos.iterator
-          var curRequestSize = 0L
-          var curBlocks = new ArrayBuffer[(BlockId, Long)]
-          while (iterator.hasNext) {
-            val (blockId, size) = iterator.next()
-            // Skip empty blocks
-            if (size > 0) {
-              curBlocks += ((blockId, size))
-              remoteBlocksToFetch += blockId
-              _numBlocksToFetch += 1
-              curRequestSize += size
-            } else if (size < 0) {
-              throw new BlockException(blockId, "Negative block size " + size)
-            }
-            if (curRequestSize >= targetRequestSize) {
-              // Add this FetchRequest
-              remoteRequests += new FetchRequest(address, curBlocks)
-              curRequestSize = 0
-              curBlocks = new ArrayBuffer[(BlockId, Long)]
-              logDebug(s"Creating fetch request of $curRequestSize at $address")
-            }
-          }
-          // Add in the final request
-          if (!curBlocks.isEmpty) {
-            remoteRequests += new FetchRequest(address, curBlocks)
-          }
-        }
-      }
-      logInfo("Getting " + _numBlocksToFetch + " non-empty blocks out of " +
-        totalBlocks + " blocks")
-      remoteRequests
-    }
-
-    protected def getLocalBlocks() {
-      // Get the local blocks while remote blocks are being fetched. Note that it's okay to do
-      // these all at once because they will just memory-map some files, so they won't consume
-      // any memory that might exceed our maxBytesInFlight
-      for (id <- localBlocksToFetch) {
-        getLocalFromDisk(id, serializer) match {
-          case Some(iter) => {
-            // Pass 0 as size since it's not in flight
-            results.put(new FetchResult(id, 0, () => iter))
-            logDebug("Got local block " + id)
-          }
-          case None => {
-            throw new BlockException(id, "Could not get block " + id + " from local machine")
-          }
-        }
-      }
-    }
-
-    override def initialize() {
-      // Split local and remote blocks.
-      val remoteRequests = splitLocalRemoteBlocks()
-      // Add the remote requests into our queue in a random order
-      fetchRequests ++= Utils.randomize(remoteRequests)
-
-      // Send out initial requests for blocks, up to our maxBytesInFlight
-      while (!fetchRequests.isEmpty &&
-        (bytesInFlight == 0 || bytesInFlight + fetchRequests.front.size <= maxBytesInFlight)) {
-        sendRequest(fetchRequests.dequeue())
-      }
-
-      val numFetches = remoteRequests.size - fetchRequests.size
-      logInfo("Started " + numFetches + " remote fetches in" + Utils.getUsedTimeMs(startTime))
-
-      // Get Local Blocks
-      startTime = System.currentTimeMillis
-      getLocalBlocks()
-      logDebug("Got local blocks in " + Utils.getUsedTimeMs(startTime) + " ms")
-    }
-
-    override def totalBlocks: Int = numLocal + numRemote
-    override def numLocalBlocks: Int = numLocal
-    override def numRemoteBlocks: Int = numRemote
-    override def fetchWaitTime: Long = _fetchWaitTime
-    override def remoteBytesRead: Long = _remoteBytesRead
-
-
-    // Implementing the Iterator methods with an iterator that reads fetched blocks off the queue
-    // as they arrive.
-    @volatile protected var resultsGotten = 0
-
-    override def hasNext: Boolean = resultsGotten < _numBlocksToFetch
-
-    override def next(): (BlockId, Option[Iterator[Any]]) = {
-      resultsGotten += 1
-      val startFetchWait = System.currentTimeMillis()
-      val result = results.take()
-      val stopFetchWait = System.currentTimeMillis()
-      _fetchWaitTime += (stopFetchWait - startFetchWait)
-      if (! result.failed) bytesInFlight -= result.size
-      while (!fetchRequests.isEmpty &&
-        (bytesInFlight == 0 || bytesInFlight + fetchRequests.front.size <= maxBytesInFlight)) {
-        sendRequest(fetchRequests.dequeue())
-      }
-      (result.blockId, if (result.failed) None else Some(result.deserialize()))
-    }
-  }
-  // End of BasicBlockFetcherIterator
-
-  class NettyBlockFetcherIterator(
-      blockManager: BlockManager,
-      blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])],
-      serializer: Serializer)
-    extends BasicBlockFetcherIterator(blockManager, blocksByAddress, serializer) {
-
-    import blockManager._
-
-    val fetchRequestsSync = new LinkedBlockingQueue[FetchRequest]
-
-    private def startCopiers(numCopiers: Int): List[_ <: Thread] = {
-      (for ( i <- Range(0,numCopiers) ) yield {
-        val copier = new Thread {
-          override def run(){
-            try {
-              while(!isInterrupted && !fetchRequestsSync.isEmpty) {
-                sendRequest(fetchRequestsSync.take())
-              }
-            } catch {
-              case x: InterruptedException => logInfo("Copier Interrupted")
-              // case _ => throw new SparkException("Exception Throw in Shuffle Copier")
-            }
-          }
-        }
-        copier.start
-        copier
-      }).toList
-    }
-
-    // keep this to interrupt the threads when necessary
-    private def stopCopiers() {
-      for (copier <- copiers) {
-        copier.interrupt()
-      }
-    }
-
-    override protected def sendRequest(req: FetchRequest) {
-
-      def putResult(blockId: BlockId, blockSize: Long, blockData: ByteBuf) {
-        val fetchResult = new FetchResult(blockId, blockSize,
-          () => dataDeserialize(blockId, blockData.nioBuffer, serializer))
-        results.put(fetchResult)
-      }
-
-      logDebug("Sending request for %d blocks (%s) from %s".format(
-        req.blocks.size, Utils.bytesToString(req.size), req.address.host))
-      val cmId = new ConnectionManagerId(req.address.host, req.address.nettyPort)
-      val cpier = new ShuffleCopier(blockManager.conf)
-      cpier.getBlocks(cmId, req.blocks, putResult)
-      logDebug("Sent request for remote blocks " + req.blocks + " from " + req.address.host )
-    }
-
-    private var copiers: List[_ <: Thread] = null
-
-    override def initialize() {
-      // Split Local Remote Blocks and set numBlocksToFetch
-      val remoteRequests = splitLocalRemoteBlocks()
-      // Add the remote requests into our queue in a random order
-      for (request <- Utils.randomize(remoteRequests)) {
-        fetchRequestsSync.put(request)
-      }
-
-      copiers = startCopiers(conf.getInt("spark.shuffle.copier.threads", 6))
-      logInfo("Started " + fetchRequestsSync.size + " remote fetches in " +
-        Utils.getUsedTimeMs(startTime))
-
-      // Get Local Blocks
-      startTime = System.currentTimeMillis
-      getLocalBlocks()
-      logDebug("Got local blocks in " + Utils.getUsedTimeMs(startTime) + " ms")
-    }
-
-    override def next(): (BlockId, Option[Iterator[Any]]) = {
-      resultsGotten += 1
-      val result = results.take()
-      // If all the results has been retrieved, copiers will exit automatically
-      (result.blockId, if (result.failed) None else Some(result.deserialize()))
-    }
-  }
-  // End of NettyBlockFetcherIterator
-}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockId.scala b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
index 42ec181b00bb3..1f012941c85ab 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
@@ -53,12 +53,23 @@ case class RDDBlockId(rddId: Int, splitIndex: Int) extends BlockId {
   def name = "rdd_" + rddId + "_" + splitIndex
 }
 
+// Format of the shuffle block ids (including data and index) should be kept in sync with
+// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getBlockData().
 @DeveloperApi
-case class ShuffleBlockId(shuffleId: Int, mapId: Int, reduceId: Int)
-  extends BlockId {
+case class ShuffleBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
   def name = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId
 }
 
+@DeveloperApi
+case class ShuffleDataBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
+  def name = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".data"
+}
+
+@DeveloperApi
+case class ShuffleIndexBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
+  def name = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".index"
+}
+
 @DeveloperApi
 case class BroadcastBlockId(broadcastId: Long, field: String = "") extends BlockId {
   def name = "broadcast_" + broadcastId + (if (field == "") "" else "_" + field)
@@ -74,9 +85,14 @@ case class StreamBlockId(streamId: Int, uniqueId: Long) extends BlockId {
   def name = "input-" + streamId + "-" + uniqueId
 }
 
-/** Id associated with temporary data managed as blocks. Not serializable. */
-private[spark] case class TempBlockId(id: UUID) extends BlockId {
-  def name = "temp_" + id
+/** Id associated with temporary local data managed as blocks. Not serializable. */
+private[spark] case class TempLocalBlockId(id: UUID) extends BlockId {
+  def name = "temp_local_" + id
+}
+
+/** Id associated with temporary shuffle data managed as blocks. Not serializable. */
+private[spark] case class TempShuffleBlockId(id: UUID) extends BlockId {
+  def name = "temp_shuffle_" + id
 }
 
 // Intended only for testing purposes
@@ -88,6 +104,8 @@ private[spark] case class TestBlockId(id: String) extends BlockId {
 object BlockId {
   val RDD = "rdd_([0-9]+)_([0-9]+)".r
   val SHUFFLE = "shuffle_([0-9]+)_([0-9]+)_([0-9]+)".r
+  val SHUFFLE_DATA = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).data".r
+  val SHUFFLE_INDEX = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).index".r
   val BROADCAST = "broadcast_([0-9]+)([_A-Za-z0-9]*)".r
   val TASKRESULT = "taskresult_([0-9]+)".r
   val STREAM = "input-([0-9]+)-([0-9]+)".r
@@ -99,6 +117,10 @@ object BlockId {
       RDDBlockId(rddId.toInt, splitIndex.toInt)
     case SHUFFLE(shuffleId, mapId, reduceId) =>
       ShuffleBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
+    case SHUFFLE_DATA(shuffleId, mapId, reduceId) =>
+      ShuffleDataBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
+    case SHUFFLE_INDEX(shuffleId, mapId, reduceId) =>
+      ShuffleIndexBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
     case BROADCAST(broadcastId, field) =>
       BroadcastBlockId(broadcastId.toLong, field.stripPrefix("_"))
     case TASKRESULT(taskId) =>
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 0db0a5bc7341b..308c59eda594d 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -17,28 +17,36 @@
 
 package org.apache.spark.storage
 
-import java.io.{File, InputStream, OutputStream, BufferedOutputStream, ByteArrayOutputStream}
+import java.io.{BufferedOutputStream, ByteArrayOutputStream, File, InputStream, OutputStream}
 import java.nio.{ByteBuffer, MappedByteBuffer}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.concurrent.{Await, Future}
+import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.duration._
 import scala.util.Random
 
-import akka.actor.{ActorSystem, Cancellable, Props}
+import akka.actor.{ActorSystem, Props}
 import sun.nio.ch.DirectBuffer
 
 import org.apache.spark._
-import org.apache.spark.executor.{DataReadMethod, InputMetrics}
+import org.apache.spark.executor._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.network._
+import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.netty.{SparkTransportConf, NettyBlockTransferService}
+import org.apache.spark.network.shuffle.ExternalShuffleClient
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo
+import org.apache.spark.network.util.{ConfigProvider, TransportConf}
 import org.apache.spark.serializer.Serializer
+import org.apache.spark.shuffle.ShuffleManager
+import org.apache.spark.shuffle.hash.HashShuffleManager
 import org.apache.spark.util._
 
 private[spark] sealed trait BlockValues
 private[spark] case class ByteBufferValues(buffer: ByteBuffer) extends BlockValues
 private[spark] case class IteratorValues(iterator: Iterator[Any]) extends BlockValues
-private[spark] case class ArrayBufferValues(buffer: ArrayBuffer[Any]) extends BlockValues
+private[spark] case class ArrayValues(buffer: Array[Any]) extends BlockValues
 
 /* Class for returning a fetched block and associated metrics. */
 private[spark] class BlockResult(
@@ -49,6 +57,12 @@ private[spark] class BlockResult(
   inputMetrics.bytesRead = bytes
 }
 
+/**
+ * Manager running on every node (driver and executors) which provides interfaces for putting and
+ * retrieving blocks both locally and remotely into various stores (memory, disk, and off-heap).
+ *
+ * Note that #initialize() must be called before the BlockManager is usable.
+ */
 private[spark] class BlockManager(
     executorId: String,
     actorSystem: ActorSystem,
@@ -56,47 +70,63 @@ private[spark] class BlockManager(
     defaultSerializer: Serializer,
     maxMemory: Long,
     val conf: SparkConf,
+    mapOutputTracker: MapOutputTracker,
+    shuffleManager: ShuffleManager,
+    blockTransferService: BlockTransferService,
     securityManager: SecurityManager,
-    mapOutputTracker: MapOutputTracker)
-  extends Logging {
-
-  val shuffleBlockManager = new ShuffleBlockManager(this)
-  val diskBlockManager = new DiskBlockManager(shuffleBlockManager,
-    conf.get("spark.local.dir", System.getProperty("java.io.tmpdir")))
-  val connectionManager = new ConnectionManager(0, conf, securityManager)
+    numUsableCores: Int)
+  extends BlockDataManager with Logging {
 
-  implicit val futureExecContext = connectionManager.futureExecContext
+  val diskBlockManager = new DiskBlockManager(this, conf)
 
   private val blockInfo = new TimeStampedHashMap[BlockId, BlockInfo]
 
   // Actual storage of where blocks are kept
   private var tachyonInitialized = false
-  private[storage] val memoryStore = new MemoryStore(this, maxMemory)
-  private[storage] val diskStore = new DiskStore(this, diskBlockManager)
-  private[storage] lazy val tachyonStore: TachyonStore = {
+  private[spark] val memoryStore = new MemoryStore(this, maxMemory)
+  private[spark] val diskStore = new DiskStore(this, diskBlockManager)
+  private[spark] lazy val tachyonStore: TachyonStore = {
     val storeDir = conf.get("spark.tachyonStore.baseDir", "/tmp_spark_tachyon")
     val appFolderName = conf.get("spark.tachyonStore.folderName")
     val tachyonStorePath = s"$storeDir/$appFolderName/${this.executorId}"
     val tachyonMaster = conf.get("spark.tachyonStore.url",  "tachyon://localhost:19998")
     val tachyonBlockManager =
-      new TachyonBlockManager(shuffleBlockManager, tachyonStorePath, tachyonMaster)
+      new TachyonBlockManager(this, tachyonStorePath, tachyonMaster)
     tachyonInitialized = true
     new TachyonStore(this, tachyonBlockManager)
   }
 
-  // If we use Netty for shuffle, start a new Netty-based shuffle sender service.
-  private val nettyPort: Int = {
-    val useNetty = conf.getBoolean("spark.shuffle.use.netty", false)
-    val nettyPortConfig = conf.getInt("spark.shuffle.sender.port", 0)
-    if (useNetty) diskBlockManager.startShuffleBlockSender(nettyPortConfig) else 0
+  private[spark]
+  val externalShuffleServiceEnabled = conf.getBoolean("spark.shuffle.service.enabled", false)
+
+  // Port used by the external shuffle service. In Yarn mode, this may be already be
+  // set through the Hadoop configuration as the server is launched in the Yarn NM.
+  private val externalShuffleServicePort =
+    Utils.getSparkOrYarnConfig(conf, "spark.shuffle.service.port", "7337").toInt
+
+  // Check that we're not using external shuffle service with consolidated shuffle files.
+  if (externalShuffleServiceEnabled
+      && conf.getBoolean("spark.shuffle.consolidateFiles", false)
+      && shuffleManager.isInstanceOf[HashShuffleManager]) {
+    throw new UnsupportedOperationException("Cannot use external shuffle service with consolidated"
+      + " shuffle files in hash-based shuffle. Please disable spark.shuffle.consolidateFiles or "
+      + " switch to sort-based shuffle.")
   }
 
-  val blockManagerId = BlockManagerId(
-    executorId, connectionManager.id.host, connectionManager.id.port, nettyPort)
+  var blockManagerId: BlockManagerId = _
 
-  // Max megabytes of data to keep in flight per reducer (to avoid over-allocating memory
-  // for receiving shuffle outputs)
-  val maxBytesInFlight = conf.getLong("spark.reducer.maxMbInFlight", 48) * 1024 * 1024
+  // Address of the server that serves this executor's shuffle files. This is either an external
+  // service, or just our own Executor's BlockManager.
+  private[spark] var shuffleServerId: BlockManagerId = _
+
+  // Client to read other executors' shuffle files. This is either an external service, or just the
+  // standard BlockTranserService to directly connect to other Executors.
+  private[spark] val shuffleClient = if (externalShuffleServiceEnabled) {
+    val transConf = SparkTransportConf.fromSparkConf(conf, numUsableCores)
+    new ExternalShuffleClient(transConf, securityManager, securityManager.isAuthenticationEnabled())
+  } else {
+    blockTransferService
+  }
 
   // Whether to compress broadcast variables that are stored
   private val compressBroadcast = conf.getBoolean("spark.broadcast.compress", true)
@@ -116,21 +146,15 @@ private[spark] class BlockManager(
   private var asyncReregisterTask: Future[Unit] = null
   private val asyncReregisterLock = new Object
 
-  private def heartBeat(): Unit = {
-    if (!master.sendHeartBeat(blockManagerId)) {
-      reregister()
-    }
-  }
-
-  private val heartBeatFrequency = BlockManager.getHeartBeatFrequency(conf)
-  private var heartBeatTask: Cancellable = null
-
   private val metadataCleaner = new MetadataCleaner(
     MetadataCleanerType.BLOCK_MANAGER, this.dropOldNonBroadcastBlocks, conf)
   private val broadcastCleaner = new MetadataCleaner(
     MetadataCleanerType.BROADCAST_VARS, this.dropOldBroadcastBlocks, conf)
 
-  initialize()
+  // Field related to peer block managers that are necessary for block replication
+  @volatile private var cachedPeers: Seq[BlockManagerId] = _
+  private val peerFetchLock = new Object
+  private var lastPeerFetchTime = 0L
 
   /* The compression codec to use. Note that the "lazy" val is necessary because we want to delay
    * the initialization of the compression codec until it is first used. The reason is that a Spark
@@ -148,22 +172,66 @@ private[spark] class BlockManager(
       master: BlockManagerMaster,
       serializer: Serializer,
       conf: SparkConf,
+      mapOutputTracker: MapOutputTracker,
+      shuffleManager: ShuffleManager,
+      blockTransferService: BlockTransferService,
       securityManager: SecurityManager,
-      mapOutputTracker: MapOutputTracker) = {
+      numUsableCores: Int) = {
     this(execId, actorSystem, master, serializer, BlockManager.getMaxMemory(conf),
-      conf, securityManager, mapOutputTracker)
+      conf, mapOutputTracker, shuffleManager, blockTransferService, securityManager, numUsableCores)
   }
 
   /**
-   * Initialize the BlockManager. Register to the BlockManagerMaster, and start the
-   * BlockManagerWorker actor.
+   * Initializes the BlockManager with the given appId. This is not performed in the constructor as
+   * the appId may not be known at BlockManager instantiation time (in particular for the driver,
+   * where it is only learned after registration with the TaskScheduler).
+   *
+   * This method initializes the BlockTransferService and ShuffleClient, registers with the
+   * BlockManagerMaster, starts the BlockManagerWorker actor, and registers with a local shuffle
+   * service if configured.
    */
-  private def initialize(): Unit = {
+  def initialize(appId: String): Unit = {
+    blockTransferService.init(this)
+    shuffleClient.init(appId)
+
+    blockManagerId = BlockManagerId(
+      executorId, blockTransferService.hostName, blockTransferService.port)
+
+    shuffleServerId = if (externalShuffleServiceEnabled) {
+      BlockManagerId(executorId, blockTransferService.hostName, externalShuffleServicePort)
+    } else {
+      blockManagerId
+    }
+
     master.registerBlockManager(blockManagerId, maxMemory, slaveActor)
-    BlockManagerWorker.startBlockManagerWorker(this)
-    if (!BlockManager.getDisableHeartBeatsForTesting(conf)) {
-      heartBeatTask = actorSystem.scheduler.schedule(0.seconds, heartBeatFrequency.milliseconds) {
-        Utils.tryOrExit { heartBeat() }
+
+    // Register Executors' configuration with the local shuffle service, if one should exist.
+    if (externalShuffleServiceEnabled && !blockManagerId.isDriver) {
+      registerWithExternalShuffleServer()
+    }
+  }
+
+  private def registerWithExternalShuffleServer() {
+    logInfo("Registering executor with local external shuffle service.")
+    val shuffleConfig = new ExecutorShuffleInfo(
+      diskBlockManager.localDirs.map(_.toString),
+      diskBlockManager.subDirsPerLocalDir,
+      shuffleManager.getClass.getName)
+
+    val MAX_ATTEMPTS = 3
+    val SLEEP_TIME_SECS = 5
+
+    for (i <- 1 to MAX_ATTEMPTS) {
+      try {
+        // Synchronous and will throw an exception if we cannot connect.
+        shuffleClient.asInstanceOf[ExternalShuffleClient].registerWithShuffleServer(
+          shuffleServerId.host, shuffleServerId.port, shuffleServerId.executorId, shuffleConfig)
+        return
+      } catch {
+        case e: Exception if i < MAX_ATTEMPTS =>
+          logError(s"Failed to connect to external shuffle server, will retry ${MAX_ATTEMPTS - i}}"
+            + s" more times after waiting $SLEEP_TIME_SECS seconds...", e)
+          Thread.sleep(SLEEP_TIME_SECS * 1000)
       }
     }
   }
@@ -195,7 +263,7 @@ private[spark] class BlockManager(
    *
    * Note that this method must be called without any BlockInfo locks held.
    */
-  private def reregister(): Unit = {
+  def reregister(): Unit = {
     // TODO: We might need to rate limit re-registering.
     logInfo("BlockManager re-registering with master")
     master.registerBlockManager(blockManagerId, maxMemory, slaveActor)
@@ -228,6 +296,32 @@ private[spark] class BlockManager(
     }
   }
 
+  /**
+   * Interface to get local block data. Throws an exception if the block cannot be found or
+   * cannot be read successfully.
+   */
+  override def getBlockData(blockId: BlockId): ManagedBuffer = {
+    if (blockId.isShuffle) {
+      shuffleManager.shuffleBlockManager.getBlockData(blockId.asInstanceOf[ShuffleBlockId])
+    } else {
+      val blockBytesOpt = doGetLocal(blockId, asBlockResult = false)
+        .asInstanceOf[Option[ByteBuffer]]
+      if (blockBytesOpt.isDefined) {
+        val buffer = blockBytesOpt.get
+        new NioManagedBuffer(buffer)
+      } else {
+        throw new BlockNotFoundException(blockId.toString)
+      }
+    }
+  }
+
+  /**
+   * Put the block locally, using the given storage level.
+   */
+  override def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Unit = {
+    putBytes(blockId, data.nioByteBuffer(), level)
+  }
+
   /**
    * Get the BlockStatus for the block identified by the given ID, if it exists.
    * NOTE: This is mainly for testing, and it doesn't fetch information from Tachyon.
@@ -330,17 +424,6 @@ private[spark] class BlockManager(
     locations
   }
 
-  /**
-   * A short-circuited method to get blocks directly from disk. This is used for getting
-   * shuffle blocks. It is safe to do so without a lock on block info since disk store
-   * never deletes (recent) items.
-   */
-  def getLocalFromDisk(blockId: BlockId, serializer: Serializer): Option[Iterator[Any]] = {
-    diskStore.getValues(blockId, serializer).orElse {
-      throw new BlockException(blockId, s"Block $blockId not found on disk, though it should be")
-    }
-  }
-
   /**
    * Get block from local block manager.
    */
@@ -357,7 +440,8 @@ private[spark] class BlockManager(
     // As an optimization for map output fetches, if the block is for a shuffle, return it
     // without acquiring a lock; the disk store never deletes (recent) items so this should work
     if (blockId.isShuffle) {
-      diskStore.getBytes(blockId) match {
+      val shuffleBlockManager = shuffleManager.shuffleBlockManager
+      shuffleBlockManager.getBytes(blockId.asInstanceOf[ShuffleBlockId]) match {
         case Some(bytes) =>
           Some(bytes)
         case None =>
@@ -463,16 +547,17 @@ private[spark] class BlockManager(
               val values = dataDeserialize(blockId, bytes)
               if (level.deserialized) {
                 // Cache the values before returning them
-                // TODO: Consider creating a putValues that also takes in a iterator?
-                val valuesBuffer = new ArrayBuffer[Any]
-                valuesBuffer ++= values
-                memoryStore.putValues(blockId, valuesBuffer, level, returnValues = true).data
-                  match {
-                    case Left(values2) =>
-                      return Some(new BlockResult(values2, DataReadMethod.Disk, info.size))
-                    case _ =>
-                      throw new SparkException("Memory store did not return back an iterator")
-                  }
+                val putResult = memoryStore.putIterator(
+                  blockId, values, level, returnValues = true, allowPersistToDisk = false)
+                // The put may or may not have succeeded, depending on whether there was enough
+                // space to unroll the block. Either way, the put here should return an iterator.
+                putResult.data match {
+                  case Left(it) =>
+                    return Some(new BlockResult(it, DataReadMethod.Disk, info.size))
+                  case _ =>
+                    // This only happens if we dropped the values back to disk (which is never)
+                    throw new SparkException("Memory store did not return an iterator!")
+                }
               } else {
                 return Some(new BlockResult(values, DataReadMethod.Disk, info.size))
               }
@@ -507,8 +592,9 @@ private[spark] class BlockManager(
     val locations = Random.shuffle(master.getLocations(blockId))
     for (loc <- locations) {
       logDebug(s"Getting remote block $blockId from $loc")
-      val data = BlockManagerWorker.syncGetBlock(
-        GetBlock(blockId), ConnectionManagerId(loc.host, loc.port))
+      val data = blockTransferService.fetchBlockSync(
+        loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()
+
       if (data != null) {
         if (asBlockResult) {
           return Some(new BlockResult(
@@ -542,60 +628,45 @@ private[spark] class BlockManager(
     None
   }
 
-  /**
-   * Get multiple blocks from local and remote block manager using their BlockManagerIds. Returns
-   * an Iterator of (block ID, value) pairs so that clients may handle blocks in a pipelined
-   * fashion as they're received. Expects a size in bytes to be provided for each block fetched,
-   * so that we can control the maxMegabytesInFlight for the fetch.
-   */
-  def getMultiple(
-      blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])],
-      serializer: Serializer): BlockFetcherIterator = {
-    val iter =
-      if (conf.getBoolean("spark.shuffle.use.netty", false)) {
-        new BlockFetcherIterator.NettyBlockFetcherIterator(this, blocksByAddress, serializer)
-      } else {
-        new BlockFetcherIterator.BasicBlockFetcherIterator(this, blocksByAddress, serializer)
-      }
-    iter.initialize()
-    iter
-  }
-
-  def put(
+  def putIterator(
       blockId: BlockId,
       values: Iterator[Any],
       level: StorageLevel,
-      tellMaster: Boolean): Seq[(BlockId, BlockStatus)] = {
+      tellMaster: Boolean = true,
+      effectiveStorageLevel: Option[StorageLevel] = None): Seq[(BlockId, BlockStatus)] = {
     require(values != null, "Values is null")
-    doPut(blockId, IteratorValues(values), level, tellMaster)
+    doPut(blockId, IteratorValues(values), level, tellMaster, effectiveStorageLevel)
   }
 
   /**
    * A short circuited method to get a block writer that can write data directly to disk.
-   * The Block will be appended to the File specified by filename. This is currently used for
-   * writing shuffle files out. Callers should handle error cases.
+   * The Block will be appended to the File specified by filename. Callers should handle error
+   * cases.
    */
   def getDiskWriter(
       blockId: BlockId,
       file: File,
       serializer: Serializer,
-      bufferSize: Int): BlockObjectWriter = {
+      bufferSize: Int,
+      writeMetrics: ShuffleWriteMetrics): BlockObjectWriter = {
     val compressStream: OutputStream => OutputStream = wrapForCompression(blockId, _)
     val syncWrites = conf.getBoolean("spark.shuffle.sync", false)
-    new DiskBlockObjectWriter(blockId, file, serializer, bufferSize, compressStream, syncWrites)
+    new DiskBlockObjectWriter(blockId, file, serializer, bufferSize, compressStream, syncWrites,
+      writeMetrics)
   }
 
   /**
    * Put a new block of values to the block manager.
    * Return a list of blocks updated as a result of this put.
    */
-  def put(
+  def putArray(
       blockId: BlockId,
-      values: ArrayBuffer[Any],
+      values: Array[Any],
       level: StorageLevel,
-      tellMaster: Boolean = true): Seq[(BlockId, BlockStatus)] = {
+      tellMaster: Boolean = true,
+      effectiveStorageLevel: Option[StorageLevel] = None): Seq[(BlockId, BlockStatus)] = {
     require(values != null, "Values is null")
-    doPut(blockId, ArrayBufferValues(values), level, tellMaster)
+    doPut(blockId, ArrayValues(values), level, tellMaster, effectiveStorageLevel)
   }
 
   /**
@@ -606,19 +677,33 @@ private[spark] class BlockManager(
       blockId: BlockId,
       bytes: ByteBuffer,
       level: StorageLevel,
-      tellMaster: Boolean = true): Seq[(BlockId, BlockStatus)] = {
+      tellMaster: Boolean = true,
+      effectiveStorageLevel: Option[StorageLevel] = None): Seq[(BlockId, BlockStatus)] = {
     require(bytes != null, "Bytes is null")
-    doPut(blockId, ByteBufferValues(bytes), level, tellMaster)
+    doPut(blockId, ByteBufferValues(bytes), level, tellMaster, effectiveStorageLevel)
   }
 
+  /**
+   * Put the given block according to the given level in one of the block stores, replicating
+   * the values if necessary.
+   *
+   * The effective storage level refers to the level according to which the block will actually be
+   * handled. This allows the caller to specify an alternate behavior of doPut while preserving
+   * the original level specified by the user.
+   */
   private def doPut(
       blockId: BlockId,
       data: BlockValues,
       level: StorageLevel,
-      tellMaster: Boolean = true): Seq[(BlockId, BlockStatus)] = {
+      tellMaster: Boolean = true,
+      effectiveStorageLevel: Option[StorageLevel] = None)
+    : Seq[(BlockId, BlockStatus)] = {
 
     require(blockId != null, "BlockId is null")
     require(level != null && level.isValid, "StorageLevel is null or invalid")
+    effectiveStorageLevel.foreach { level =>
+      require(level != null && level.isValid, "Effective StorageLevel is null or invalid")
+    }
 
     // Return value
     val updatedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]
@@ -657,13 +742,16 @@ private[spark] class BlockManager(
     // Size of the block in bytes
     var size = 0L
 
+    // The level we actually use to put the block
+    val putLevel = effectiveStorageLevel.getOrElse(level)
+
     // If we're storing bytes, then initiate the replication before storing them locally.
     // This is faster as data is already serialized and ready to send.
     val replicationFuture = data match {
-      case b: ByteBufferValues if level.replication > 1 =>
+      case b: ByteBufferValues if putLevel.replication > 1 =>
         // Duplicate doesn't copy the bytes, but just creates a wrapper
         val bufferView = b.buffer.duplicate()
-        Future { replicate(blockId, bufferView, level) }
+        Future { replicate(blockId, bufferView, putLevel) }
       case _ => null
     }
 
@@ -676,18 +764,18 @@ private[spark] class BlockManager(
         // returnValues - Whether to return the values put
         // blockStore - The type of storage to put these values into
         val (returnValues, blockStore: BlockStore) = {
-          if (level.useMemory) {
+          if (putLevel.useMemory) {
             // Put it in memory first, even if it also has useDisk set to true;
             // We will drop it to disk later if the memory store can't hold it.
             (true, memoryStore)
-          } else if (level.useOffHeap) {
+          } else if (putLevel.useOffHeap) {
             // Use tachyon for off-heap storage
             (false, tachyonStore)
-          } else if (level.useDisk) {
+          } else if (putLevel.useDisk) {
             // Don't get back the bytes from put unless we replicate them
-            (level.replication > 1, diskStore)
+            (putLevel.replication > 1, diskStore)
           } else {
-            assert(level == StorageLevel.NONE)
+            assert(putLevel == StorageLevel.NONE)
             throw new BlockException(
               blockId, s"Attempted to put block $blockId without specifying storage level!")
           }
@@ -696,22 +784,22 @@ private[spark] class BlockManager(
         // Actually put the values
         val result = data match {
           case IteratorValues(iterator) =>
-            blockStore.putValues(blockId, iterator, level, returnValues)
-          case ArrayBufferValues(array) =>
-            blockStore.putValues(blockId, array, level, returnValues)
+            blockStore.putIterator(blockId, iterator, putLevel, returnValues)
+          case ArrayValues(array) =>
+            blockStore.putArray(blockId, array, putLevel, returnValues)
           case ByteBufferValues(bytes) =>
             bytes.rewind()
-            blockStore.putBytes(blockId, bytes, level)
+            blockStore.putBytes(blockId, bytes, putLevel)
         }
         size = result.size
         result.data match {
-          case Left (newIterator) if level.useMemory => valuesAfterPut = newIterator
+          case Left (newIterator) if putLevel.useMemory => valuesAfterPut = newIterator
           case Right (newBytes) => bytesAfterPut = newBytes
           case _ =>
         }
 
         // Keep track of which blocks are dropped from memory
-        if (level.useMemory) {
+        if (putLevel.useMemory) {
           result.droppedBlocks.foreach { updatedBlocks += _ }
         }
 
@@ -742,7 +830,7 @@ private[spark] class BlockManager(
 
     // Either we're storing bytes and we asynchronously started replication, or we're storing
     // values and need to serialize and replicate them now:
-    if (level.replication > 1) {
+    if (putLevel.replication > 1) {
       data match {
         case ByteBufferValues(bytes) =>
           if (replicationFuture != null) {
@@ -758,7 +846,7 @@ private[spark] class BlockManager(
             }
             bytesAfterPut = dataSerialize(blockId, valuesAfterPut)
           }
-          replicate(blockId, bytesAfterPut, level)
+          replicate(blockId, bytesAfterPut, putLevel)
           logDebug("Put block %s remotely took %s"
             .format(blockId, Utils.getUsedTimeMs(remoteStartTime)))
       }
@@ -766,7 +854,7 @@ private[spark] class BlockManager(
 
     BlockManager.dispose(bytesAfterPut)
 
-    if (level.replication > 1) {
+    if (putLevel.replication > 1) {
       logDebug("Putting block %s with replication took %s"
         .format(blockId, Utils.getUsedTimeMs(startTimeMs)))
     } else {
@@ -778,28 +866,111 @@ private[spark] class BlockManager(
   }
 
   /**
-   * Replicate block to another node.
+   * Get peer block managers in the system.
+   */
+  private def getPeers(forceFetch: Boolean): Seq[BlockManagerId] = {
+    peerFetchLock.synchronized {
+      val cachedPeersTtl = conf.getInt("spark.storage.cachedPeersTtl", 60 * 1000) // milliseconds
+      val timeout = System.currentTimeMillis - lastPeerFetchTime > cachedPeersTtl
+      if (cachedPeers == null || forceFetch || timeout) {
+        cachedPeers = master.getPeers(blockManagerId).sortBy(_.hashCode)
+        lastPeerFetchTime = System.currentTimeMillis
+        logDebug("Fetched peers from master: " + cachedPeers.mkString("[", ",", "]"))
+      }
+      cachedPeers
+    }
+  }
+
+  /**
+   * Replicate block to another node. Not that this is a blocking call that returns after
+   * the block has been replicated.
    */
-  @volatile var cachedPeers: Seq[BlockManagerId] = null
   private def replicate(blockId: BlockId, data: ByteBuffer, level: StorageLevel): Unit = {
+    val maxReplicationFailures = conf.getInt("spark.storage.maxReplicationFailures", 1)
+    val numPeersToReplicateTo = level.replication - 1
+    val peersForReplication = new ArrayBuffer[BlockManagerId]
+    val peersReplicatedTo = new ArrayBuffer[BlockManagerId]
+    val peersFailedToReplicateTo = new ArrayBuffer[BlockManagerId]
     val tLevel = StorageLevel(
       level.useDisk, level.useMemory, level.useOffHeap, level.deserialized, 1)
-    if (cachedPeers == null) {
-      cachedPeers = master.getPeers(blockManagerId, level.replication - 1)
+    val startTime = System.currentTimeMillis
+    val random = new Random(blockId.hashCode)
+
+    var replicationFailed = false
+    var failures = 0
+    var done = false
+
+    // Get cached list of peers
+    peersForReplication ++= getPeers(forceFetch = false)
+
+    // Get a random peer. Note that this selection of a peer is deterministic on the block id.
+    // So assuming the list of peers does not change and no replication failures,
+    // if there are multiple attempts in the same node to replicate the same block,
+    // the same set of peers will be selected.
+    def getRandomPeer(): Option[BlockManagerId] = {
+      // If replication had failed, then force update the cached list of peers and remove the peers
+      // that have been already used
+      if (replicationFailed) {
+        peersForReplication.clear()
+        peersForReplication ++= getPeers(forceFetch = true)
+        peersForReplication --= peersReplicatedTo
+        peersForReplication --= peersFailedToReplicateTo
+      }
+      if (!peersForReplication.isEmpty) {
+        Some(peersForReplication(random.nextInt(peersForReplication.size)))
+      } else {
+        None
+      }
     }
-    for (peer: BlockManagerId <- cachedPeers) {
-      val start = System.nanoTime
-      data.rewind()
-      logDebug(s"Try to replicate $blockId once; The size of the data is ${data.limit()} Bytes. " +
-        s"To node: $peer")
-      val putBlock = PutBlock(blockId, data, tLevel)
-      val cmId = new ConnectionManagerId(peer.host, peer.port)
-      val syncPutBlockSuccess = BlockManagerWorker.syncPutBlock(putBlock, cmId)
-      if (!syncPutBlockSuccess) {
-        logError(s"Failed to call syncPutBlock to $peer")
+
+    // One by one choose a random peer and try uploading the block to it
+    // If replication fails (e.g., target peer is down), force the list of cached peers
+    // to be re-fetched from driver and then pick another random peer for replication. Also
+    // temporarily black list the peer for which replication failed.
+    //
+    // This selection of a peer and replication is continued in a loop until one of the
+    // following 3 conditions is fulfilled:
+    // (i) specified number of peers have been replicated to
+    // (ii) too many failures in replicating to peers
+    // (iii) no peer left to replicate to
+    //
+    while (!done) {
+      getRandomPeer() match {
+        case Some(peer) =>
+          try {
+            val onePeerStartTime = System.currentTimeMillis
+            data.rewind()
+            logTrace(s"Trying to replicate $blockId of ${data.limit()} bytes to $peer")
+            blockTransferService.uploadBlockSync(
+              peer.host, peer.port, peer.executorId, blockId, new NioManagedBuffer(data), tLevel)
+            logTrace(s"Replicated $blockId of ${data.limit()} bytes to $peer in %s ms"
+              .format(System.currentTimeMillis - onePeerStartTime))
+            peersReplicatedTo += peer
+            peersForReplication -= peer
+            replicationFailed = false
+            if (peersReplicatedTo.size == numPeersToReplicateTo) {
+              done = true  // specified number of peers have been replicated to
+            }
+          } catch {
+            case e: Exception =>
+              logWarning(s"Failed to replicate $blockId to $peer, failure #$failures", e)
+              failures += 1
+              replicationFailed = true
+              peersFailedToReplicateTo += peer
+              if (failures > maxReplicationFailures) { // too many failures in replcating to peers
+                done = true
+              }
+          }
+        case None => // no peer left to replicate to
+          done = true
       }
-      logDebug("Replicating BlockId %s once used %fs; The size of the data is %d bytes."
-        .format(blockId, (System.nanoTime - start) / 1e6, data.limit()))
+    }
+    val timeTakeMs = (System.currentTimeMillis - startTime)
+    logDebug(s"Replicating $blockId of ${data.limit()} bytes to " +
+      s"${peersReplicatedTo.size} peer(s) took $timeTakeMs ms")
+    if (peersReplicatedTo.size < numPeersToReplicateTo) {
+      logWarning(s"Block $blockId replicated to only " +
+        s"${peersReplicatedTo.size} peer(s) instead of $numPeersToReplicateTo peers")
     }
   }
 
@@ -818,7 +989,7 @@ private[spark] class BlockManager(
       value: Any,
       level: StorageLevel,
       tellMaster: Boolean = true): Seq[(BlockId, BlockStatus)] = {
-    put(blockId, Iterator(value), level, tellMaster)
+    putIterator(blockId, Iterator(value), level, tellMaster)
   }
 
   /**
@@ -829,7 +1000,7 @@ private[spark] class BlockManager(
    */
   def dropFromMemory(
       blockId: BlockId,
-      data: Either[ArrayBuffer[Any], ByteBuffer]): Option[BlockStatus] = {
+      data: Either[Array[Any], ByteBuffer]): Option[BlockStatus] = {
 
     logInfo(s"Dropping block $blockId from memory")
     val info = blockInfo.get(blockId).orNull
@@ -853,7 +1024,7 @@ private[spark] class BlockManager(
           logInfo(s"Writing block $blockId to disk")
           data match {
             case Left(elements) =>
-              diskStore.putValues(blockId, elements, level, returnValues = false)
+              diskStore.putArray(blockId, elements, level, returnValues = false)
             case Right(bytes) =>
               diskStore.putBytes(blockId, bytes, level)
           }
@@ -973,7 +1144,8 @@ private[spark] class BlockManager(
       case _: ShuffleBlockId => compressShuffle
       case _: BroadcastBlockId => compressBroadcast
       case _: RDDBlockId => compressRdds
-      case _: TempBlockId => compressShuffleSpill
+      case _: TempLocalBlockId => compressShuffleSpill
+      case _: TempShuffleBlockId => compressShuffle
       case _ => false
     }
   }
@@ -1022,34 +1194,16 @@ private[spark] class BlockManager(
       bytes: ByteBuffer,
       serializer: Serializer = defaultSerializer): Iterator[Any] = {
     bytes.rewind()
-
-    def getIterator: Iterator[Any] = {
-      val stream = wrapForCompression(blockId, new ByteBufferInputStream(bytes, true))
-      serializer.newInstance().deserializeStream(stream).asIterator
-    }
-
-    if (blockId.isShuffle) {
-      /* Reducer may need to read many local shuffle blocks and will wrap them into Iterators
-       * at the beginning. The wrapping will cost some memory (compression instance
-       * initialization, etc.). Reducer reads shuffle blocks one by one so we could do the
-       * wrapping lazily to save memory. */
-      class LazyProxyIterator(f: => Iterator[Any]) extends Iterator[Any] {
-        lazy val proxy = f
-        override def hasNext: Boolean = proxy.hasNext
-        override def next(): Any = proxy.next()
-      }
-      new LazyProxyIterator(getIterator)
-    } else {
-      getIterator
-    }
+    val stream = wrapForCompression(blockId, new ByteBufferInputStream(bytes, true))
+    serializer.newInstance().deserializeStream(stream).asIterator
   }
 
   def stop(): Unit = {
-    if (heartBeatTask != null) {
-      heartBeatTask.cancel()
+    blockTransferService.close()
+    if (shuffleClient ne blockTransferService) {
+      // Closing should be idempotent, but maybe not for the NioBlockTransferService.
+      shuffleClient.close()
     }
-    connectionManager.stop()
-    shuffleBlockManager.stop()
     diskBlockManager.stop()
     actorSystem.stop(slaveActor)
     blockInfo.clear()
@@ -1068,17 +1222,13 @@ private[spark] class BlockManager(
 private[spark] object BlockManager extends Logging {
   private val ID_GENERATOR = new IdGenerator
 
+  /** Return the total amount of storage memory available. */
   private def getMaxMemory(conf: SparkConf): Long = {
     val memoryFraction = conf.getDouble("spark.storage.memoryFraction", 0.6)
-    (Runtime.getRuntime.maxMemory * memoryFraction).toLong
+    val safetyFraction = conf.getDouble("spark.storage.safetyFraction", 0.9)
+    (Runtime.getRuntime.maxMemory * memoryFraction * safetyFraction).toLong
   }
 
-  def getHeartBeatFrequency(conf: SparkConf): Long =
-    conf.getLong("spark.storage.blockManagerTimeoutIntervalMs", 60000) / 4
-
-  def getDisableHeartBeatsForTesting(conf: SparkConf): Boolean =
-    conf.getBoolean("spark.test.disableBlockManagerHeartBeat", false)
-
   /**
    * Attempt to clean up a ByteBuffer if it is memory-mapped. This uses an *unsafe* Sun API that
    * might cause errors if one attempts to read from the unmapped buffer, but it's better than
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index b1585bd8199d1..b177a59c721df 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -20,6 +20,7 @@ package org.apache.spark.storage
 import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
 import java.util.concurrent.ConcurrentHashMap
 
+import org.apache.spark.SparkContext
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.Utils
 
@@ -36,11 +37,10 @@ import org.apache.spark.util.Utils
 class BlockManagerId private (
     private var executorId_ : String,
     private var host_ : String,
-    private var port_ : Int,
-    private var nettyPort_ : Int
-  ) extends Externalizable {
+    private var port_ : Int)
+  extends Externalizable {
 
-  private def this() = this(null, null, 0, 0)  // For deserialization only
+  private def this() = this(null, null, 0)  // For deserialization only
 
   def executorId: String = executorId_
 
@@ -60,32 +60,30 @@ class BlockManagerId private (
 
   def port: Int = port_
 
-  def nettyPort: Int = nettyPort_
+  def isDriver: Boolean = { executorId == SparkContext.DRIVER_IDENTIFIER }
 
-  override def writeExternal(out: ObjectOutput) {
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     out.writeUTF(executorId_)
     out.writeUTF(host_)
     out.writeInt(port_)
-    out.writeInt(nettyPort_)
   }
 
-  override def readExternal(in: ObjectInput) {
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     executorId_ = in.readUTF()
     host_ = in.readUTF()
     port_ = in.readInt()
-    nettyPort_ = in.readInt()
   }
 
   @throws(classOf[IOException])
   private def readResolve(): Object = BlockManagerId.getCachedBlockManagerId(this)
 
-  override def toString = "BlockManagerId(%s, %s, %d, %d)".format(executorId, host, port, nettyPort)
+  override def toString = s"BlockManagerId($executorId, $host, $port)"
 
-  override def hashCode: Int = (executorId.hashCode * 41 + host.hashCode) * 41 + port + nettyPort
+  override def hashCode: Int = (executorId.hashCode * 41 + host.hashCode) * 41 + port
 
   override def equals(that: Any) = that match {
     case id: BlockManagerId =>
-      executorId == id.executorId && port == id.port && host == id.host && nettyPort == id.nettyPort
+      executorId == id.executorId && port == id.port && host == id.host
     case _ =>
       false
   }
@@ -100,11 +98,10 @@ private[spark] object BlockManagerId {
    * @param execId ID of the executor.
    * @param host Host name of the block manager.
    * @param port Port of the block manager.
-   * @param nettyPort Optional port for the Netty-based shuffle sender.
    * @return A new [[org.apache.spark.storage.BlockManagerId]].
    */
-  def apply(execId: String, host: String, port: Int, nettyPort: Int) =
-    getCachedBlockManagerId(new BlockManagerId(execId, host, port, nettyPort))
+  def apply(execId: String, host: String, port: Int) =
+    getCachedBlockManagerId(new BlockManagerId(execId, host, port))
 
   def apply(in: ObjectInput) = {
     val obj = new BlockManagerId()
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index 7897fade2df2b..b63c7f191155c 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -21,16 +21,19 @@ import scala.concurrent.{Await, Future}
 import scala.concurrent.ExecutionContext.Implicits.global
 
 import akka.actor._
-import akka.pattern.ask
 
 import org.apache.spark.{Logging, SparkConf, SparkException}
 import org.apache.spark.storage.BlockManagerMessages._
 import org.apache.spark.util.AkkaUtils
 
 private[spark]
-class BlockManagerMaster(var driverActor: ActorRef, conf: SparkConf) extends Logging {
-  val AKKA_RETRY_ATTEMPTS: Int = conf.getInt("spark.akka.num.retries", 3)
-  val AKKA_RETRY_INTERVAL_MS: Int = conf.getInt("spark.akka.retry.wait", 3000)
+class BlockManagerMaster(
+    var driverActor: ActorRef,
+    conf: SparkConf,
+    isDriver: Boolean)
+  extends Logging {
+  private val AKKA_RETRY_ATTEMPTS: Int = AkkaUtils.numRetries(conf)
+  private val AKKA_RETRY_INTERVAL_MS: Int = AkkaUtils.retryWaitMs(conf)
 
   val DRIVER_AKKA_ACTOR_NAME = "BlockManagerMaster"
 
@@ -42,15 +45,6 @@ class BlockManagerMaster(var driverActor: ActorRef, conf: SparkConf) extends Log
     logInfo("Removed " + execId + " successfully in removeExecutor")
   }
 
-  /**
-   * Send the driver actor a heart beat from the slave. Returns true if everything works out,
-   * false if the driver does not know about the given block manager, which means the block
-   * manager should re-register.
-   */
-  def sendHeartBeat(blockManagerId: BlockManagerId): Boolean = {
-    askDriverWithReply[Boolean](HeartBeat(blockManagerId))
-  }
-
   /** Register the BlockManager's id with the driver. */
   def registerBlockManager(blockManagerId: BlockManagerId, maxMemSize: Long, slaveActor: ActorRef) {
     logInfo("Trying to register BlockManager")
@@ -90,13 +84,12 @@ class BlockManagerMaster(var driverActor: ActorRef, conf: SparkConf) extends Log
   }
 
   /** Get ids of other nodes in the cluster from the driver */
-  def getPeers(blockManagerId: BlockManagerId, numPeers: Int): Seq[BlockManagerId] = {
-    val result = askDriverWithReply[Seq[BlockManagerId]](GetPeers(blockManagerId, numPeers))
-    if (result.length != numPeers) {
-      throw new SparkException(
-        "Error getting peers, only got " + result.size + " instead of " + numPeers)
-    }
-    result
+  def getPeers(blockManagerId: BlockManagerId): Seq[BlockManagerId] = {
+    askDriverWithReply[Seq[BlockManagerId]](GetPeers(blockManagerId))
+  }
+
+  def getActorSystemHostPortForExecutor(executorId: String): Option[(String, Int)] = {
+    askDriverWithReply[Option[(String, Int)]](GetActorSystemHostPortForExecutor(executorId))
   }
 
   /**
@@ -111,7 +104,8 @@ class BlockManagerMaster(var driverActor: ActorRef, conf: SparkConf) extends Log
   def removeRdd(rddId: Int, blocking: Boolean) {
     val future = askDriverWithReply[Future[Seq[Int]]](RemoveRdd(rddId))
     future.onFailure {
-      case e: Throwable => logError("Failed to remove RDD " + rddId, e)
+      case e: Exception =>
+        logWarning(s"Failed to remove RDD $rddId - ${e.getMessage}}")
     }
     if (blocking) {
       Await.result(future, timeout)
@@ -122,7 +116,8 @@ class BlockManagerMaster(var driverActor: ActorRef, conf: SparkConf) extends Log
   def removeShuffle(shuffleId: Int, blocking: Boolean) {
     val future = askDriverWithReply[Future[Seq[Boolean]]](RemoveShuffle(shuffleId))
     future.onFailure {
-      case e: Throwable => logError("Failed to remove shuffle " + shuffleId, e)
+      case e: Exception =>
+        logWarning(s"Failed to remove shuffle $shuffleId - ${e.getMessage}}")
     }
     if (blocking) {
       Await.result(future, timeout)
@@ -134,9 +129,9 @@ class BlockManagerMaster(var driverActor: ActorRef, conf: SparkConf) extends Log
     val future = askDriverWithReply[Future[Seq[Int]]](
       RemoveBroadcast(broadcastId, removeFromMaster))
     future.onFailure {
-      case e: Throwable =>
-        logError("Failed to remove broadcast " + broadcastId +
-          " with removeFromMaster = " + removeFromMaster, e)
+      case e: Exception =>
+        logWarning(s"Failed to remove broadcast $broadcastId" +
+          s" with removeFromMaster = $removeFromMaster - ${e.getMessage}}")
     }
     if (blocking) {
       Await.result(future, timeout)
@@ -204,7 +199,7 @@ class BlockManagerMaster(var driverActor: ActorRef, conf: SparkConf) extends Log
 
   /** Stop the driver actor, called only on the Spark driver node */
   def stop() {
-    if (driverActor != null) {
+    if (driverActor != null && isDriver) {
       tell(StopBlockManagerMaster)
       driverActor = null
       logInfo("BlockManagerMaster stopped")
@@ -223,33 +218,8 @@ class BlockManagerMaster(var driverActor: ActorRef, conf: SparkConf) extends Log
    * throw a SparkException if this fails.
    */
   private def askDriverWithReply[T](message: Any): T = {
-    // TODO: Consider removing multiple attempts
-    if (driverActor == null) {
-      throw new SparkException("Error sending message to BlockManager as driverActor is null " +
-        "[message = " + message + "]")
-    }
-    var attempts = 0
-    var lastException: Exception = null
-    while (attempts < AKKA_RETRY_ATTEMPTS) {
-      attempts += 1
-      try {
-        val future = driverActor.ask(message)(timeout)
-        val result = Await.result(future, timeout)
-        if (result == null) {
-          throw new SparkException("BlockManagerMaster returned null")
-        }
-        return result.asInstanceOf[T]
-      } catch {
-        case ie: InterruptedException => throw ie
-        case e: Exception =>
-          lastException = e
-          logWarning("Error sending message to BlockManagerMaster in " + attempts + " attempts", e)
-      }
-      Thread.sleep(AKKA_RETRY_INTERVAL_MS)
-    }
-
-    throw new SparkException(
-      "Error sending message to BlockManagerMaster [message = " + message + "]", lastException)
+    AkkaUtils.askWithReply(message, driverActor, AKKA_RETRY_ATTEMPTS, AKKA_RETRY_INTERVAL_MS,
+      timeout)
   }
 
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
index 6aed322eeb185..685b2e11440fb 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
@@ -31,7 +31,7 @@ import org.apache.spark.{Logging, SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.BlockManagerMessages._
-import org.apache.spark.util.{AkkaUtils, Utils}
+import org.apache.spark.util.{ActorLogReceive, AkkaUtils, Utils}
 
 /**
  * BlockManagerMasterActor is an actor on the master node to track statuses of
@@ -39,7 +39,7 @@ import org.apache.spark.util.{AkkaUtils, Utils}
  */
 private[spark]
 class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus: LiveListenerBus)
-  extends Actor with Logging {
+  extends Actor with ActorLogReceive with Logging {
 
   // Mapping from block manager id to the block manager's information.
   private val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]
@@ -52,24 +52,21 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
 
   private val akkaTimeout = AkkaUtils.askTimeout(conf)
 
-  val slaveTimeout = conf.get("spark.storage.blockManagerSlaveTimeoutMs",
-    "" + (BlockManager.getHeartBeatFrequency(conf) * 3)).toLong
+  val slaveTimeout = conf.getLong("spark.storage.blockManagerSlaveTimeoutMs",
+    math.max(conf.getInt("spark.executor.heartbeatInterval", 10000) * 3, 45000))
 
-  val checkTimeoutInterval = conf.get("spark.storage.blockManagerTimeoutIntervalMs",
-    "60000").toLong
+  val checkTimeoutInterval = conf.getLong("spark.storage.blockManagerTimeoutIntervalMs", 60000)
 
   var timeoutCheckingTask: Cancellable = null
 
   override def preStart() {
-    if (!BlockManager.getDisableHeartBeatsForTesting(conf)) {
-      import context.dispatcher
-      timeoutCheckingTask = context.system.scheduler.schedule(0.seconds,
-        checkTimeoutInterval.milliseconds, self, ExpireDeadHosts)
-    }
+    import context.dispatcher
+    timeoutCheckingTask = context.system.scheduler.schedule(0.seconds,
+      checkTimeoutInterval.milliseconds, self, ExpireDeadHosts)
     super.preStart()
   }
 
-  def receive = {
+  override def receiveWithLogging = {
     case RegisterBlockManager(blockManagerId, maxMemSize, slaveActor) =>
       register(blockManagerId, maxMemSize, slaveActor)
       sender ! true
@@ -86,8 +83,11 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
     case GetLocationsMultipleBlockIds(blockIds) =>
       sender ! getLocationsMultipleBlockIds(blockIds)
 
-    case GetPeers(blockManagerId, size) =>
-      sender ! getPeers(blockManagerId, size)
+    case GetPeers(blockManagerId) =>
+      sender ! getPeers(blockManagerId)
+
+    case GetActorSystemHostPortForExecutor(executorId) =>
+      sender ! getActorSystemHostPortForExecutor(executorId)
 
     case GetMemoryStatus =>
       sender ! memoryStatus
@@ -119,7 +119,6 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
       sender ! true
 
     case StopBlockManagerMaster =>
-      logInfo("Stopping BlockManagerMaster")
       sender ! true
       if (timeoutCheckingTask != null) {
         timeoutCheckingTask.cancel()
@@ -129,8 +128,8 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
     case ExpireDeadHosts =>
       expireDeadHosts()
 
-    case HeartBeat(blockManagerId) =>
-      sender ! heartBeat(blockManagerId)
+    case BlockManagerHeartbeat(blockManagerId) =>
+      sender ! heartbeatReceived(blockManagerId)
 
     case other =>
       logWarning("Got unknown message: " + other)
@@ -177,11 +176,10 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
    * from the executors, but not from the driver.
    */
   private def removeBroadcast(broadcastId: Long, removeFromDriver: Boolean): Future[Seq[Int]] = {
-    // TODO: Consolidate usages of <driver>
     import context.dispatcher
     val removeMsg = RemoveBroadcast(broadcastId, removeFromDriver)
     val requiredBlockManagers = blockManagerInfo.values.filter { info =>
-      removeFromDriver || info.blockManagerId.executorId != "<driver>"
+      removeFromDriver || !info.blockManagerId.isDriver
     }
     Future.sequence(
       requiredBlockManagers.map { bm =>
@@ -207,7 +205,8 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
         blockLocations.remove(blockId)
       }
     }
-    listenerBus.post(SparkListenerBlockManagerRemoved(blockManagerId))
+    listenerBus.post(SparkListenerBlockManagerRemoved(System.currentTimeMillis(), blockManagerId))
+    logInfo(s"Removing block manager $blockManagerId")
   }
 
   private def expireDeadHosts() {
@@ -216,7 +215,7 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
     val minSeenTime = now - slaveTimeout
     val toRemove = new mutable.HashSet[BlockManagerId]
     for (info <- blockManagerInfo.values) {
-      if (info.lastSeenMs < minSeenTime) {
+      if (info.lastSeenMs < minSeenTime && !info.blockManagerId.isDriver) {
         logWarning("Removing BlockManager " + info.blockManagerId + " with no recent heart beats: "
           + (now - info.lastSeenMs) + "ms exceeds " + slaveTimeout + "ms")
         toRemove += info.blockManagerId
@@ -230,9 +229,13 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
     blockManagerIdByExecutor.get(execId).foreach(removeBlockManager)
   }
 
-  private def heartBeat(blockManagerId: BlockManagerId): Boolean = {
+  /**
+   * Return true if the driver knows about the given block manager. Otherwise, return false,
+   * indicating that the block manager should re-register.
+   */
+  private def heartbeatReceived(blockManagerId: BlockManagerId): Boolean = {
     if (!blockManagerInfo.contains(blockManagerId)) {
-      blockManagerId.executorId == "<driver>" && !isLocal
+      blockManagerId.isDriver && !isLocal
     } else {
       blockManagerInfo(blockManagerId).updateLastSeenMs()
       true
@@ -264,9 +267,8 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
   }
 
   private def storageStatus: Array[StorageStatus] = {
-    blockManagerInfo.map { case(blockManagerId, info) =>
-      val blockMap = mutable.Map[BlockId, BlockStatus](info.blocks.toSeq: _*)
-      new StorageStatus(blockManagerId, info.maxMem, blockMap)
+    blockManagerInfo.map { case (blockManagerId, info) =>
+      new StorageStatus(blockManagerId, info.maxMem, info.blocks)
     }.toArray
   }
 
@@ -326,20 +328,25 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
   }
 
   private def register(id: BlockManagerId, maxMemSize: Long, slaveActor: ActorRef) {
+    val time = System.currentTimeMillis()
     if (!blockManagerInfo.contains(id)) {
       blockManagerIdByExecutor.get(id.executorId) match {
-        case Some(manager) =>
-          // A block manager of the same executor already exists.
-          // This should never happen. Let's just quit.
-          logError("Got two different block manager registrations on " + id.executorId)
-          System.exit(1)
+        case Some(oldId) =>
+          // A block manager of the same executor already exists, so remove it (assumed dead)
+          logError("Got two different block manager registrations on same executor - " 
+              + s" will replace old one $oldId with new one $id")
+          removeExecutor(id.executorId)  
         case None =>
-          blockManagerIdByExecutor(id.executorId) = id
       }
-      blockManagerInfo(id) =
-        new BlockManagerInfo(id, System.currentTimeMillis(), maxMemSize, slaveActor)
+      logInfo("Registering block manager %s with %s RAM, %s".format(
+        id.hostPort, Utils.bytesToString(maxMemSize), id))
+      
+      blockManagerIdByExecutor(id.executorId) = id
+      
+      blockManagerInfo(id) = new BlockManagerInfo(
+        id, System.currentTimeMillis(), maxMemSize, slaveActor)
     }
-    listenerBus.post(SparkListenerBlockManagerAdded(id, maxMemSize))
+    listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxMemSize))
   }
 
   private def updateBlockInfo(
@@ -351,7 +358,7 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
       tachyonSize: Long) {
 
     if (!blockManagerInfo.contains(blockManagerId)) {
-      if (blockManagerId.executorId == "<driver>" && !isLocal) {
+      if (blockManagerId.isDriver && !isLocal) {
         // We intentionally do not register the master (except in local mode),
         // so we should not indicate failure.
         sender ! true
@@ -399,16 +406,29 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
     blockIds.map(blockId => getLocations(blockId))
   }
 
-  private def getPeers(blockManagerId: BlockManagerId, size: Int): Seq[BlockManagerId] = {
-    val peers: Array[BlockManagerId] = blockManagerInfo.keySet.toArray
-
-    val selfIndex = peers.indexOf(blockManagerId)
-    if (selfIndex == -1) {
-      throw new SparkException("Self index for " + blockManagerId + " not found")
+  /** Get the list of the peers of the given block manager */
+  private def getPeers(blockManagerId: BlockManagerId): Seq[BlockManagerId] = {
+    val blockManagerIds = blockManagerInfo.keySet
+    if (blockManagerIds.contains(blockManagerId)) {
+      blockManagerIds.filterNot { _.isDriver }.filterNot { _ == blockManagerId }.toSeq
+    } else {
+      Seq.empty
     }
+  }
 
-    // Note that this logic will select the same node multiple times if there aren't enough peers
-    Array.tabulate[BlockManagerId](size) { i => peers((selfIndex + i + 1) % peers.length) }.toSeq
+  /**
+   * Returns the hostname and port of an executor's actor system, based on the Akka address of its
+   * BlockManagerSlaveActor.
+   */
+  private def getActorSystemHostPortForExecutor(executorId: String): Option[(String, Int)] = {
+    for (
+      blockManagerId <- blockManagerIdByExecutor.get(executorId);
+      info <- blockManagerInfo.get(blockManagerId);
+      host <- info.slaveActor.path.address.host;
+      port <- info.slaveActor.path.address.port
+    ) yield {
+      (host, port)
+    }
   }
 }
 
@@ -417,7 +437,14 @@ case class BlockStatus(
     storageLevel: StorageLevel,
     memSize: Long,
     diskSize: Long,
-    tachyonSize: Long)
+    tachyonSize: Long) {
+  def isCached: Boolean = memSize + diskSize + tachyonSize > 0
+}
+
+@DeveloperApi
+object BlockStatus {
+  def empty: BlockStatus = BlockStatus(StorageLevel.NONE, 0L, 0L, 0L)
+}
 
 private[spark] class BlockManagerInfo(
     val blockManagerId: BlockManagerId,
@@ -432,9 +459,6 @@ private[spark] class BlockManagerInfo(
   // Mapping from block id to its status.
   private val _blocks = new JHashMap[BlockId, BlockStatus]
 
-  logInfo("Registering block manager %s with %s RAM".format(
-    blockManagerId.hostPort, Utils.bytesToString(maxMem)))
-
   def getStatus(blockId: BlockId) = Option(_blocks.get(blockId))
 
   def updateLastSeenMs() {
@@ -452,16 +476,18 @@ private[spark] class BlockManagerInfo(
 
     if (_blocks.containsKey(blockId)) {
       // The block exists on the slave already.
-      val originalLevel: StorageLevel = _blocks.get(blockId).storageLevel
+      val blockStatus: BlockStatus = _blocks.get(blockId)
+      val originalLevel: StorageLevel = blockStatus.storageLevel
+      val originalMemSize: Long = blockStatus.memSize
 
       if (originalLevel.useMemory) {
-        _remainingMem += memSize
+        _remainingMem += originalMemSize
       }
     }
 
     if (storageLevel.isValid) {
       /* isValid means it is either stored in-memory, on-disk or on-Tachyon.
-       * But the memSize here indicates the data size in or dropped from memory,
+       * The memSize here indicates the data size in or dropped from memory,
        * tachyonSize here indicates the data size in or dropped from Tachyon,
        * and the diskSize here indicates the data size in or dropped to disk.
        * They can be both larger than 0, when a block is dropped from memory to disk.
@@ -488,7 +514,6 @@ private[spark] class BlockManagerInfo(
       val blockStatus: BlockStatus = _blocks.get(blockId)
       _blocks.remove(blockId)
       if (blockStatus.storageLevel.useMemory) {
-        _remainingMem += blockStatus.memSize
         logInfo("Removed %s on %s in memory (size: %s, free: %s)".format(
           blockId, blockManagerId.hostPort, Utils.bytesToString(blockStatus.memSize),
           Utils.bytesToString(_remainingMem)))
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
index 2b53bf33b5fba..3f32099d08cc9 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
@@ -21,7 +21,9 @@ import java.io.{Externalizable, ObjectInput, ObjectOutput}
 
 import akka.actor.ActorRef
 
-private[storage] object BlockManagerMessages {
+import org.apache.spark.util.Utils
+
+private[spark] object BlockManagerMessages {
   //////////////////////////////////////////////////////////////////////////////////
   // Messages from the master to slaves.
   //////////////////////////////////////////////////////////////////////////////////
@@ -53,9 +55,7 @@ private[storage] object BlockManagerMessages {
       sender: ActorRef)
     extends ToBlockManagerMaster
 
-  case class HeartBeat(blockManagerId: BlockManagerId) extends ToBlockManagerMaster
-
-  class UpdateBlockInfo(
+  case class UpdateBlockInfo(
       var blockManagerId: BlockManagerId,
       var blockId: BlockId,
       var storageLevel: StorageLevel,
@@ -67,7 +67,7 @@ private[storage] object BlockManagerMessages {
 
     def this() = this(null, null, null, 0, 0, 0)  // For deserialization only
 
-    override def writeExternal(out: ObjectOutput) {
+    override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
       blockManagerId.writeExternal(out)
       out.writeUTF(blockId.name)
       storageLevel.writeExternal(out)
@@ -76,7 +76,7 @@ private[storage] object BlockManagerMessages {
       out.writeLong(tachyonSize)
     }
 
-    override def readExternal(in: ObjectInput) {
+    override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
       blockManagerId = BlockManagerId(in)
       blockId = BlockId(in.readUTF())
       storageLevel = StorageLevel(in)
@@ -86,29 +86,13 @@ private[storage] object BlockManagerMessages {
     }
   }
 
-  object UpdateBlockInfo {
-    def apply(
-        blockManagerId: BlockManagerId,
-        blockId: BlockId,
-        storageLevel: StorageLevel,
-        memSize: Long,
-        diskSize: Long,
-        tachyonSize: Long): UpdateBlockInfo = {
-      new UpdateBlockInfo(blockManagerId, blockId, storageLevel, memSize, diskSize, tachyonSize)
-    }
-
-    // For pattern-matching
-    def unapply(h: UpdateBlockInfo)
-      : Option[(BlockManagerId, BlockId, StorageLevel, Long, Long, Long)] = {
-      Some((h.blockManagerId, h.blockId, h.storageLevel, h.memSize, h.diskSize, h.tachyonSize))
-    }
-  }
-
   case class GetLocations(blockId: BlockId) extends ToBlockManagerMaster
 
   case class GetLocationsMultipleBlockIds(blockIds: Array[BlockId]) extends ToBlockManagerMaster
 
-  case class GetPeers(blockManagerId: BlockManagerId, size: Int) extends ToBlockManagerMaster
+  case class GetPeers(blockManagerId: BlockManagerId) extends ToBlockManagerMaster
+
+  case class GetActorSystemHostPortForExecutor(executorId: String) extends ToBlockManagerMaster
 
   case class RemoveExecutor(execId: String) extends ToBlockManagerMaster
 
@@ -124,5 +108,7 @@ private[storage] object BlockManagerMessages {
   case class GetMatchingBlockIds(filter: BlockId => Boolean, askSlaves: Boolean = true)
     extends ToBlockManagerMaster
 
+  case class BlockManagerHeartbeat(blockManagerId: BlockManagerId) extends ToBlockManagerMaster
+
   case object ExpireDeadHosts extends ToBlockManagerMaster
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala
index 6d4db064dff58..8462871e798a5 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala
@@ -21,8 +21,9 @@ import scala.concurrent.Future
 
 import akka.actor.{ActorRef, Actor}
 
-import org.apache.spark.{Logging, MapOutputTracker}
+import org.apache.spark.{Logging, MapOutputTracker, SparkEnv}
 import org.apache.spark.storage.BlockManagerMessages._
+import org.apache.spark.util.ActorLogReceive
 
 /**
  * An actor to take commands from the master to execute options. For example,
@@ -32,12 +33,12 @@ private[storage]
 class BlockManagerSlaveActor(
     blockManager: BlockManager,
     mapOutputTracker: MapOutputTracker)
-  extends Actor with Logging {
+  extends Actor with ActorLogReceive with Logging {
 
   import context.dispatcher
 
   // Operations that involve removing blocks may be slow and should be done asynchronously
-  override def receive = {
+  override def receiveWithLogging = {
     case RemoveBlock(blockId) =>
       doAsync[Boolean]("removing block " + blockId, sender) {
         blockManager.removeBlock(blockId)
@@ -54,12 +55,12 @@ class BlockManagerSlaveActor(
         if (mapOutputTracker != null) {
           mapOutputTracker.unregisterShuffle(shuffleId)
         }
-        blockManager.shuffleBlockManager.removeShuffle(shuffleId)
+        SparkEnv.get.shuffleManager.unregisterShuffle(shuffleId)
       }
 
-    case RemoveBroadcast(broadcastId, tellMaster) =>
+    case RemoveBroadcast(broadcastId, _) =>
       doAsync[Int]("removing broadcast " + broadcastId, sender) {
-        blockManager.removeBroadcast(broadcastId, tellMaster)
+        blockManager.removeBroadcast(broadcastId, tellMaster = true)
       }
 
     case GetBlockStatus(blockId, _) =>
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala
index 687586490abfe..8569c6f3cbbc3 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala
@@ -22,15 +22,15 @@ import com.codahale.metrics.{Gauge,MetricRegistry}
 import org.apache.spark.SparkContext
 import org.apache.spark.metrics.source.Source
 
-private[spark] class BlockManagerSource(val blockManager: BlockManager, sc: SparkContext)
+private[spark] class BlockManagerSource(val blockManager: BlockManager)
     extends Source {
-  val metricRegistry = new MetricRegistry()
-  val sourceName = "%s.BlockManager".format(sc.appName)
+  override val metricRegistry = new MetricRegistry()
+  override val sourceName = "BlockManager"
 
   metricRegistry.register(MetricRegistry.name("memory", "maxMem_MB"), new Gauge[Long] {
     override def getValue: Long = {
       val storageStatusList = blockManager.master.getStorageStatus
-      val maxMem = storageStatusList.map(_.maxMem).reduce(_ + _)
+      val maxMem = storageStatusList.map(_.maxMem).sum
       maxMem / 1024 / 1024
     }
   })
@@ -38,7 +38,7 @@ private[spark] class BlockManagerSource(val blockManager: BlockManager, sc: Spar
   metricRegistry.register(MetricRegistry.name("memory", "remainingMem_MB"), new Gauge[Long] {
     override def getValue: Long = {
       val storageStatusList = blockManager.master.getStorageStatus
-      val remainingMem = storageStatusList.map(_.memRemaining).reduce(_ + _)
+      val remainingMem = storageStatusList.map(_.memRemaining).sum
       remainingMem / 1024 / 1024
     }
   })
@@ -46,20 +46,15 @@ private[spark] class BlockManagerSource(val blockManager: BlockManager, sc: Spar
   metricRegistry.register(MetricRegistry.name("memory", "memUsed_MB"), new Gauge[Long] {
     override def getValue: Long = {
       val storageStatusList = blockManager.master.getStorageStatus
-      val maxMem = storageStatusList.map(_.maxMem).reduce(_ + _)
-      val remainingMem = storageStatusList.map(_.memRemaining).reduce(_ + _)
-      (maxMem - remainingMem) / 1024 / 1024
+      val memUsed = storageStatusList.map(_.memUsed).sum
+      memUsed / 1024 / 1024
     }
   })
 
   metricRegistry.register(MetricRegistry.name("disk", "diskSpaceUsed_MB"), new Gauge[Long] {
     override def getValue: Long = {
       val storageStatusList = blockManager.master.getStorageStatus
-      val diskSpaceUsed = storageStatusList
-        .flatMap(_.blocks.values.map(_.diskSize))
-        .reduceOption(_ + _)
-        .getOrElse(0L)
-
+      val diskSpaceUsed = storageStatusList.map(_.diskUsed).sum
       diskSpaceUsed / 1024 / 1024
     }
   })
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala
deleted file mode 100644
index c7766a3a65671..0000000000000
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.storage
-
-import java.nio.ByteBuffer
-
-import org.apache.spark.Logging
-import org.apache.spark.network._
-import org.apache.spark.util.Utils
-
-/**
- * A network interface for BlockManager. Each slave should have one
- * BlockManagerWorker.
- *
- * TODO: Use event model.
- */
-private[spark] class BlockManagerWorker(val blockManager: BlockManager) extends Logging {
-
-  blockManager.connectionManager.onReceiveMessage(onBlockMessageReceive)
-
-  def onBlockMessageReceive(msg: Message, id: ConnectionManagerId): Option[Message] = {
-    logDebug("Handling message " + msg)
-    msg match {
-      case bufferMessage: BufferMessage => {
-        try {
-          logDebug("Handling as a buffer message " + bufferMessage)
-          val blockMessages = BlockMessageArray.fromBufferMessage(bufferMessage)
-          logDebug("Parsed as a block message array")
-          val responseMessages = blockMessages.map(processBlockMessage).filter(_ != None).map(_.get)
-          Some(new BlockMessageArray(responseMessages).toBufferMessage)
-        } catch {
-          case e: Exception => logError("Exception handling buffer message", e)
-          None
-        }
-      }
-      case otherMessage: Any => {
-        logError("Unknown type message received: " + otherMessage)
-        None
-      }
-    }
-  }
-
-  def processBlockMessage(blockMessage: BlockMessage): Option[BlockMessage] = {
-    blockMessage.getType match {
-      case BlockMessage.TYPE_PUT_BLOCK => {
-        val pB = PutBlock(blockMessage.getId, blockMessage.getData, blockMessage.getLevel)
-        logDebug("Received [" + pB + "]")
-        putBlock(pB.id, pB.data, pB.level)
-        None
-      }
-      case BlockMessage.TYPE_GET_BLOCK => {
-        val gB = new GetBlock(blockMessage.getId)
-        logDebug("Received [" + gB + "]")
-        val buffer = getBlock(gB.id)
-        if (buffer == null) {
-          return None
-        }
-        Some(BlockMessage.fromGotBlock(GotBlock(gB.id, buffer)))
-      }
-      case _ => None
-    }
-  }
-
-  private def putBlock(id: BlockId, bytes: ByteBuffer, level: StorageLevel) {
-    val startTimeMs = System.currentTimeMillis()
-    logDebug("PutBlock " + id + " started from " + startTimeMs + " with data: " + bytes)
-    blockManager.putBytes(id, bytes, level)
-    logDebug("PutBlock " + id + " used " + Utils.getUsedTimeMs(startTimeMs)
-        + " with data size: " + bytes.limit)
-  }
-
-  private def getBlock(id: BlockId): ByteBuffer = {
-    val startTimeMs = System.currentTimeMillis()
-    logDebug("GetBlock " + id + " started from " + startTimeMs)
-    val buffer = blockManager.getLocalBytes(id) match {
-      case Some(bytes) => bytes
-      case None => null
-    }
-    logDebug("GetBlock " + id + " used " + Utils.getUsedTimeMs(startTimeMs)
-        + " and got buffer " + buffer)
-    buffer
-  }
-}
-
-private[spark] object BlockManagerWorker extends Logging {
-  private var blockManagerWorker: BlockManagerWorker = null
-
-  def startBlockManagerWorker(manager: BlockManager) {
-    blockManagerWorker = new BlockManagerWorker(manager)
-  }
-
-  def syncPutBlock(msg: PutBlock, toConnManagerId: ConnectionManagerId): Boolean = {
-    val blockManager = blockManagerWorker.blockManager
-    val connectionManager = blockManager.connectionManager
-    val blockMessage = BlockMessage.fromPutBlock(msg)
-    val blockMessageArray = new BlockMessageArray(blockMessage)
-    val resultMessage = connectionManager.sendMessageReliablySync(
-        toConnManagerId, blockMessageArray.toBufferMessage)
-    resultMessage.isDefined
-  }
-
-  def syncGetBlock(msg: GetBlock, toConnManagerId: ConnectionManagerId): ByteBuffer = {
-    val blockManager = blockManagerWorker.blockManager
-    val connectionManager = blockManager.connectionManager
-    val blockMessage = BlockMessage.fromGetBlock(msg)
-    val blockMessageArray = new BlockMessageArray(blockMessage)
-    val responseMessage = connectionManager.sendMessageReliablySync(
-        toConnManagerId, blockMessageArray.toBufferMessage)
-    responseMessage match {
-      case Some(message) => {
-        val bufferMessage = message.asInstanceOf[BufferMessage]
-        logDebug("Response message received " + bufferMessage)
-        BlockMessageArray.fromBufferMessage(bufferMessage).foreach(blockMessage => {
-            logDebug("Found " + blockMessage)
-            return blockMessage.getData
-          })
-      }
-      case None => logDebug("No response message received")
-    }
-    null
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockNotFoundException.scala b/core/src/main/scala/org/apache/spark/storage/BlockNotFoundException.scala
new file mode 100644
index 0000000000000..81f5f2d31dbd8
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/storage/BlockNotFoundException.scala
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+class BlockNotFoundException(blockId: String) extends Exception(s"Block $blockId not found")
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
index a2687e6be4e34..9c469370ffe1f 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
@@ -22,6 +22,7 @@ import java.nio.channels.FileChannel
 
 import org.apache.spark.Logging
 import org.apache.spark.serializer.{SerializationStream, Serializer}
+import org.apache.spark.executor.ShuffleWriteMetrics
 
 /**
  * An interface for writing JVM objects to some underlying storage. This interface allows
@@ -39,16 +40,16 @@ private[spark] abstract class BlockObjectWriter(val blockId: BlockId) {
   def isOpen: Boolean
 
   /**
-   * Flush the partial writes and commit them as a single atomic block. Return the
-   * number of bytes written for this commit.
+   * Flush the partial writes and commit them as a single atomic block.
    */
-  def commit(): Long
+  def commitAndClose(): Unit
 
   /**
    * Reverts writes that haven't been flushed yet. Callers should invoke this function
-   * when there are runtime exceptions.
+   * when there are runtime exceptions. This method will not throw, though it may be
+   * unsuccessful in truncating written data.
    */
-  def revertPartialWrites()
+  def revertPartialWritesAndClose()
 
   /**
    * Writes an object.
@@ -57,43 +58,29 @@ private[spark] abstract class BlockObjectWriter(val blockId: BlockId) {
 
   /**
    * Returns the file segment of committed data that this Writer has written.
+   * This is only valid after commitAndClose() has been called.
    */
   def fileSegment(): FileSegment
-
-  /**
-   * Cumulative time spent performing blocking writes, in ns.
-   */
-  def timeWriting(): Long
-
-  /**
-   * Number of bytes written so far
-   */
-  def bytesWritten: Long
 }
 
-/** BlockObjectWriter which writes directly to a file on disk. Appends to the given file. */
+/**
+ * BlockObjectWriter which writes directly to a file on disk. Appends to the given file.
+ */
 private[spark] class DiskBlockObjectWriter(
     blockId: BlockId,
     file: File,
     serializer: Serializer,
     bufferSize: Int,
     compressStream: OutputStream => OutputStream,
-    syncWrites: Boolean)
+    syncWrites: Boolean,
+    // These write metrics concurrently shared with other active BlockObjectWriter's who
+    // are themselves performing writes. All updates must be relative.
+    writeMetrics: ShuffleWriteMetrics)
   extends BlockObjectWriter(blockId)
   with Logging
 {
-
   /** Intercepts write calls and tracks total time spent writing. Not thread safe. */
   private class TimeTrackingOutputStream(out: OutputStream) extends OutputStream {
-    def timeWriting = _timeWriting
-    private var _timeWriting = 0L
-
-    private def callWithTiming(f: => Unit) = {
-      val start = System.nanoTime()
-      f
-      _timeWriting += (System.nanoTime() - start)
-    }
-
     def write(i: Int): Unit = callWithTiming(out.write(i))
     override def write(b: Array[Byte]) = callWithTiming(out.write(b))
     override def write(b: Array[Byte], off: Int, len: Int) = callWithTiming(out.write(b, off, len))
@@ -107,16 +94,35 @@ private[spark] class DiskBlockObjectWriter(
   private var fos: FileOutputStream = null
   private var ts: TimeTrackingOutputStream = null
   private var objOut: SerializationStream = null
-  private val initialPosition = file.length()
-  private var lastValidPosition = initialPosition
   private var initialized = false
-  private var _timeWriting = 0L
+
+  /**
+   * Cursors used to represent positions in the file.
+   *
+   * xxxxxxxx|--------|---       |
+   *         ^        ^          ^
+   *         |        |        finalPosition
+   *         |      reportedPosition
+   *       initialPosition
+   *
+   * initialPosition: Offset in the file where we start writing. Immutable.
+   * reportedPosition: Position at the time of the last update to the write metrics.
+   * finalPosition: Offset where we stopped writing. Set on closeAndCommit() then never changed.
+   * -----: Current writes to the underlying file.
+   * xxxxx: Existing contents of the file.
+   */
+  private val initialPosition = file.length()
+  private var finalPosition: Long = -1
+  private var reportedPosition = initialPosition
+
+  /** Calling channel.position() to update the write metrics can be a little bit expensive, so we
+    * only call it every N writes */
+  private var writesSinceMetricsUpdate = 0
 
   override def open(): BlockObjectWriter = {
     fos = new FileOutputStream(file, true)
     ts = new TimeTrackingOutputStream(fos)
     channel = fos.getChannel()
-    lastValidPosition = initialPosition
     bs = compressStream(new BufferedOutputStream(ts, bufferSize))
     objOut = serializer.newInstance().serializeStream(bs)
     initialized = true
@@ -128,14 +134,11 @@ private[spark] class DiskBlockObjectWriter(
       if (syncWrites) {
         // Force outstanding writes to disk and track how long it takes
         objOut.flush()
-        val start = System.nanoTime()
-        fos.getFD.sync()
-        _timeWriting += System.nanoTime() - start
+        def sync = fos.getFD.sync()
+        callWithTiming(sync)
       }
       objOut.close()
 
-      _timeWriting += ts.timeWriting
-
       channel = null
       bs = null
       fos = null
@@ -147,28 +150,40 @@ private[spark] class DiskBlockObjectWriter(
 
   override def isOpen: Boolean = objOut != null
 
-  override def commit(): Long = {
+  override def commitAndClose(): Unit = {
     if (initialized) {
       // NOTE: Because Kryo doesn't flush the underlying stream we explicitly flush both the
       //       serializer stream and the lower level stream.
       objOut.flush()
       bs.flush()
-      val prevPos = lastValidPosition
-      lastValidPosition = channel.position()
-      lastValidPosition - prevPos
-    } else {
-      // lastValidPosition is zero if stream is uninitialized
-      lastValidPosition
+      close()
     }
+    finalPosition = file.length()
+    // In certain compression codecs, more bytes are written after close() is called
+    writeMetrics.shuffleBytesWritten += (finalPosition - reportedPosition)
   }
 
-  override def revertPartialWrites() {
-    if (initialized) {
-      // Discard current writes. We do this by flushing the outstanding writes and
-      // truncate the file to the last valid position.
-      objOut.flush()
-      bs.flush()
-      channel.truncate(lastValidPosition)
+  // Discard current writes. We do this by flushing the outstanding writes and then
+  // truncating the file to its initial position.
+  override def revertPartialWritesAndClose() {
+    try {
+      writeMetrics.shuffleBytesWritten -= (reportedPosition - initialPosition)
+
+      if (initialized) {
+        objOut.flush()
+        bs.flush()
+        close()
+      }
+
+      val truncateStream = new FileOutputStream(file, true)
+      try {
+        truncateStream.getChannel.truncate(initialPosition)
+      } finally {
+        truncateStream.close()
+      }
+    } catch {
+      case e: Exception =>
+        logError("Uncaught exception while reverting partial writes to file " + file, e)
     }
   }
 
@@ -176,18 +191,40 @@ private[spark] class DiskBlockObjectWriter(
     if (!initialized) {
       open()
     }
+
     objOut.writeObject(value)
+
+    if (writesSinceMetricsUpdate == 32) {
+      writesSinceMetricsUpdate = 0
+      updateBytesWritten()
+    } else {
+      writesSinceMetricsUpdate += 1
+    }
   }
 
   override def fileSegment(): FileSegment = {
-    new FileSegment(file, initialPosition, bytesWritten)
+    new FileSegment(file, initialPosition, finalPosition - initialPosition)
   }
 
-  // Only valid if called after close()
-  override def timeWriting() = _timeWriting
+  /**
+   * Report the number of bytes written in this writer's shuffle write metrics.
+   * Note that this is only valid before the underlying streams are closed.
+   */
+  private def updateBytesWritten() {
+    val pos = channel.position()
+    writeMetrics.shuffleBytesWritten += (pos - reportedPosition)
+    reportedPosition = pos
+  }
+
+  private def callWithTiming(f: => Unit) = {
+    val start = System.nanoTime()
+    f
+    writeMetrics.shuffleWriteTime += (System.nanoTime() - start)
+  }
 
-  // Only valid if called after commit()
-  override def bytesWritten: Long = {
-    lastValidPosition - initialPosition
+  // For testing
+  private[spark] def flush() {
+    objOut.flush()
+    bs.flush()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
index b9b53b1a2f118..69985c9759e2d 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
@@ -37,15 +37,15 @@ private[spark] abstract class BlockStore(val blockManager: BlockManager) extends
    * @return a PutResult that contains the size of the data, as well as the values put if
    *         returnValues is true (if not, the result's data field can be null)
    */
-  def putValues(
+  def putIterator(
     blockId: BlockId,
     values: Iterator[Any],
     level: StorageLevel,
     returnValues: Boolean): PutResult
 
-  def putValues(
+  def putArray(
     blockId: BlockId,
-    values: ArrayBuffer[Any],
+    values: Array[Any],
     level: StorageLevel,
     returnValues: Boolean): PutResult
 
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index 673fc19c060a4..58fba54710510 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -21,9 +21,8 @@ import java.io.File
 import java.text.SimpleDateFormat
 import java.util.{Date, Random, UUID}
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.executor.ExecutorExitCode
-import org.apache.spark.network.netty.{PathResolver, ShuffleSender}
 import org.apache.spark.util.Utils
 
 /**
@@ -32,41 +31,31 @@ import org.apache.spark.util.Utils
  * However, it is also possible to have a block map to only a segment of a file, by calling
  * mapBlockToFileSegment().
  *
- * @param rootDirs The directories to use for storing block files. Data will be hashed among these.
+ * Block files are hashed among the directories listed in spark.local.dir (or in
+ * SPARK_LOCAL_DIRS, if it's set).
  */
-private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootDirs: String)
-  extends PathResolver with Logging {
+private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkConf)
+  extends Logging {
 
   private val MAX_DIR_CREATION_ATTEMPTS: Int = 10
-  private val subDirsPerLocalDir = shuffleManager.conf.getInt("spark.diskStore.subDirectories", 64)
+  private[spark]
+  val subDirsPerLocalDir = blockManager.conf.getInt("spark.diskStore.subDirectories", 64)
 
   /* Create one local directory for each path mentioned in spark.local.dir; then, inside this
    * directory, create multiple subdirectories that we will hash files into, in order to avoid
    * having really large inodes at the top level. */
-  private val localDirs: Array[File] = createLocalDirs()
+  private[spark] val localDirs: Array[File] = createLocalDirs(conf)
   if (localDirs.isEmpty) {
     logError("Failed to create any local dir.")
     System.exit(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR)
   }
   private val subDirs = Array.fill(localDirs.length)(new Array[File](subDirsPerLocalDir))
-  private var shuffleSender : ShuffleSender = null
 
   addShutdownHook()
 
-  /**
-   * Returns the physical file segment in which the given BlockId is located.
-   * If the BlockId has been mapped to a specific FileSegment, that will be returned.
-   * Otherwise, we assume the Block is mapped to a whole file identified by the BlockId directly.
-   */
-  def getBlockLocation(blockId: BlockId): FileSegment = {
-    if (blockId.isShuffle && shuffleManager.consolidateShuffleFiles) {
-      shuffleManager.getBlockLocation(blockId.asInstanceOf[ShuffleBlockId])
-    } else {
-      val file = getFile(blockId.name)
-      new FileSegment(file, 0, file.length())
-    }
-  }
-
+  /** Looks up a file by hashing it into one of our local subdirectories. */
+  // This method should be kept in sync with
+  // org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getFile().
   def getFile(filename: String): File = {
     // Figure out which local directory it hashes to, and which subdirectory in that
     val hash = Utils.nonNegativeHash(filename)
@@ -96,31 +85,44 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD
 
   /** Check if disk block manager has a block. */
   def containsBlock(blockId: BlockId): Boolean = {
-    getBlockLocation(blockId).file.exists()
+    getFile(blockId.name).exists()
   }
 
-  /** List all the blocks currently stored on disk by the disk manager. */
-  def getAllBlocks(): Seq[BlockId] = {
+  /** List all the files currently stored on disk by the disk manager. */
+  def getAllFiles(): Seq[File] = {
     // Get all the files inside the array of array of directories
     subDirs.flatten.filter(_ != null).flatMap { dir =>
-      val files = dir.list()
+      val files = dir.listFiles()
       if (files != null) files else Seq.empty
-    }.map(BlockId.apply)
+    }
   }
 
-  /** Produces a unique block id and File suitable for intermediate results. */
-  def createTempBlock(): (TempBlockId, File) = {
-    var blockId = new TempBlockId(UUID.randomUUID())
+  /** List all the blocks currently stored on disk by the disk manager. */
+  def getAllBlocks(): Seq[BlockId] = {
+    getAllFiles().map(f => BlockId(f.getName))
+  }
+
+  /** Produces a unique block id and File suitable for storing local intermediate results. */
+  def createTempLocalBlock(): (TempLocalBlockId, File) = {
+    var blockId = new TempLocalBlockId(UUID.randomUUID())
     while (getFile(blockId).exists()) {
-      blockId = new TempBlockId(UUID.randomUUID())
+      blockId = new TempLocalBlockId(UUID.randomUUID())
     }
     (blockId, getFile(blockId))
   }
 
-  private def createLocalDirs(): Array[File] = {
-    logDebug(s"Creating local directories at root dirs '$rootDirs'")
+  /** Produces a unique block id and File suitable for storing shuffled intermediate results. */
+  def createTempShuffleBlock(): (TempShuffleBlockId, File) = {
+    var blockId = new TempShuffleBlockId(UUID.randomUUID())
+    while (getFile(blockId).exists()) {
+      blockId = new TempShuffleBlockId(UUID.randomUUID())
+    }
+    (blockId, getFile(blockId))
+  }
+
+  private def createLocalDirs(conf: SparkConf): Array[File] = {
     val dateFormat = new SimpleDateFormat("yyyyMMddHHmmss")
-    rootDirs.split(",").flatMap { rootDir =>
+    Utils.getOrCreateLocalRootDirs(conf).flatMap { rootDir =>
       var foundLocalDir = false
       var localDir: File = null
       var localDirId: String = null
@@ -151,7 +153,6 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD
   }
 
   private def addShutdownHook() {
-    localDirs.foreach(localDir => Utils.registerShutdownDeleteDir(localDir))
     Runtime.getRuntime.addShutdownHook(new Thread("delete Spark local dirs") {
       override def run(): Unit = Utils.logUncaughtExceptions {
         logDebug("Shutdown hook called")
@@ -162,25 +163,18 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD
 
   /** Cleanup local dirs and stop shuffle sender. */
   private[spark] def stop() {
-    localDirs.foreach { localDir =>
-      if (localDir.isDirectory() && localDir.exists()) {
-        try {
-          if (!Utils.hasRootAsShutdownDeleteDir(localDir)) Utils.deleteRecursively(localDir)
-        } catch {
-          case e: Exception =>
-            logError(s"Exception while deleting local spark dir: $localDir", e)
+    // Only perform cleanup if an external service is not serving our shuffle files.
+    if (!blockManager.externalShuffleServiceEnabled) {
+      localDirs.foreach { localDir =>
+        if (localDir.isDirectory() && localDir.exists()) {
+          try {
+            if (!Utils.hasRootAsShutdownDeleteDir(localDir)) Utils.deleteRecursively(localDir)
+          } catch {
+            case e: Exception =>
+              logError(s"Exception while deleting local spark dir: $localDir", e)
+          }
         }
       }
     }
-
-    if (shuffleSender != null) {
-      shuffleSender.stop()
-    }
-  }
-
-  private[storage] def startShuffleBlockSender(port: Int): Int = {
-    shuffleSender = new ShuffleSender(port, this)
-    logInfo(s"Created ShuffleSender binding to port: ${shuffleSender.port}")
-    shuffleSender.port
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index ebff0cb5ba153..8dadf6794039e 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -17,12 +17,10 @@
 
 package org.apache.spark.storage
 
-import java.io.{FileOutputStream, RandomAccessFile}
+import java.io.{IOException, File, FileOutputStream, RandomAccessFile}
 import java.nio.ByteBuffer
 import java.nio.channels.FileChannel.MapMode
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.Logging
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.util.Utils
@@ -30,13 +28,13 @@ import org.apache.spark.util.Utils
 /**
  * Stores BlockManager blocks on disk.
  */
-private class DiskStore(blockManager: BlockManager, diskManager: DiskBlockManager)
+private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBlockManager)
   extends BlockStore(blockManager) with Logging {
 
   val minMemoryMapBytes = blockManager.conf.getLong("spark.storage.memoryMapThreshold", 2 * 4096L)
 
   override def getSize(blockId: BlockId): Long = {
-    diskManager.getBlockLocation(blockId).length
+    diskManager.getFile(blockId.name).length
   }
 
   override def putBytes(blockId: BlockId, _bytes: ByteBuffer, level: StorageLevel): PutResult = {
@@ -57,15 +55,15 @@ private class DiskStore(blockManager: BlockManager, diskManager: DiskBlockManage
     PutResult(bytes.limit(), Right(bytes.duplicate()))
   }
 
-  override def putValues(
+  override def putArray(
       blockId: BlockId,
-      values: ArrayBuffer[Any],
+      values: Array[Any],
       level: StorageLevel,
       returnValues: Boolean): PutResult = {
-    putValues(blockId, values.toIterator, level, returnValues)
+    putIterator(blockId, values.toIterator, level, returnValues)
   }
 
-  override def putValues(
+  override def putIterator(
       blockId: BlockId,
       values: Iterator[Any],
       level: StorageLevel,
@@ -75,7 +73,21 @@ private class DiskStore(blockManager: BlockManager, diskManager: DiskBlockManage
     val startTime = System.currentTimeMillis
     val file = diskManager.getFile(blockId)
     val outputStream = new FileOutputStream(file)
-    blockManager.dataSerializeStream(blockId, outputStream, values)
+    try {
+      try {
+        blockManager.dataSerializeStream(blockId, outputStream, values)
+      } finally {
+        // Close outputStream here because it should be closed before file is deleted.
+        outputStream.close()
+      }
+    } catch {
+      case e: Throwable =>
+        if (file.exists()) {
+          file.delete()
+        }
+        throw e
+    }
+
     val length = file.length
 
     val timeTaken = System.currentTimeMillis - startTime
@@ -91,25 +103,39 @@ private class DiskStore(blockManager: BlockManager, diskManager: DiskBlockManage
     }
   }
 
-  override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
-    val segment = diskManager.getBlockLocation(blockId)
-    val channel = new RandomAccessFile(segment.file, "r").getChannel
+  private def getBytes(file: File, offset: Long, length: Long): Option[ByteBuffer] = {
+    val channel = new RandomAccessFile(file, "r").getChannel
 
     try {
       // For small files, directly read rather than memory map
-      if (segment.length < minMemoryMapBytes) {
-        val buf = ByteBuffer.allocate(segment.length.toInt)
-        channel.read(buf, segment.offset)
+      if (length < minMemoryMapBytes) {
+        val buf = ByteBuffer.allocate(length.toInt)
+        channel.position(offset)
+        while (buf.remaining() != 0) {
+          if (channel.read(buf) == -1) {
+            throw new IOException("Reached EOF before filling buffer\n" +
+              s"offset=$offset\nfile=${file.getAbsolutePath}\nbuf.remaining=${buf.remaining}")
+          }
+        }
         buf.flip()
         Some(buf)
       } else {
-        Some(channel.map(MapMode.READ_ONLY, segment.offset, segment.length))
+        Some(channel.map(MapMode.READ_ONLY, offset, length))
       }
     } finally {
       channel.close()
     }
   }
 
+  override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
+    val file = diskManager.getFile(blockId.name)
+    getBytes(file, 0, file.length)
+  }
+
+  def getBytes(segment: FileSegment): Option[ByteBuffer] = {
+    getBytes(segment.file, segment.offset, segment.length)
+  }
+
   override def getValues(blockId: BlockId): Option[Iterator[Any]] = {
     getBytes(blockId).map(buffer => blockManager.dataDeserialize(blockId, buffer))
   }
@@ -119,24 +145,25 @@ private class DiskStore(blockManager: BlockManager, diskManager: DiskBlockManage
    * shuffle short-circuit code.
    */
   def getValues(blockId: BlockId, serializer: Serializer): Option[Iterator[Any]] = {
+    // TODO: Should bypass getBytes and use a stream based implementation, so that
+    // we won't use a lot of memory during e.g. external sort merge.
     getBytes(blockId).map(bytes => blockManager.dataDeserialize(blockId, bytes, serializer))
   }
 
   override def remove(blockId: BlockId): Boolean = {
-    val fileSegment = diskManager.getBlockLocation(blockId)
-    val file = fileSegment.file
-    if (file.exists() && file.length() == fileSegment.length) {
+    val file = diskManager.getFile(blockId.name)
+    // If consolidation mode is used With HashShuffleMananger, the physical filename for the block
+    // is different from blockId.name. So the file returns here will not be exist, thus we avoid to
+    // delete the whole consolidated file by mistake.
+    if (file.exists()) {
       file.delete()
     } else {
-      if (fileSegment.length < file.length()) {
-        logWarning(s"Could not delete block associated with only a part of a file: $blockId")
-      }
       false
     }
   }
 
   override def contains(blockId: BlockId): Boolean = {
-    val file = diskManager.getBlockLocation(blockId).file
+    val file = diskManager.getFile(blockId.name)
     file.exists()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index 71f66c826c5b3..71305a46bf570 100644
--- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -20,27 +20,55 @@ package org.apache.spark.storage
 import java.nio.ByteBuffer
 import java.util.LinkedHashMap
 
+import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.util.{SizeEstimator, Utils}
+import org.apache.spark.util.collection.SizeTrackingVector
 
 private case class MemoryEntry(value: Any, size: Long, deserialized: Boolean)
 
 /**
- * Stores blocks in memory, either as ArrayBuffers of deserialized Java objects or as
+ * Stores blocks in memory, either as Arrays of deserialized Java objects or as
  * serialized ByteBuffers.
  */
-private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
+private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
   extends BlockStore(blockManager) {
 
+  private val conf = blockManager.conf
   private val entries = new LinkedHashMap[BlockId, MemoryEntry](32, 0.75f, true)
+
   @volatile private var currentMemory = 0L
-  // Object used to ensure that only one thread is putting blocks and if necessary, dropping
-  // blocks from the memory store.
-  private val putLock = new Object()
+
+  // Ensure only one thread is putting, and if necessary, dropping blocks at any given time
+  private val accountingLock = new Object
+
+  // A mapping from thread ID to amount of memory used for unrolling a block (in bytes)
+  // All accesses of this map are assumed to have manually synchronized on `accountingLock`
+  private val unrollMemoryMap = mutable.HashMap[Long, Long]()
+
+  /**
+   * The amount of space ensured for unrolling values in memory, shared across all cores.
+   * This space is not reserved in advance, but allocated dynamically by dropping existing blocks.
+   */
+  private val maxUnrollMemory: Long = {
+    val unrollFraction = conf.getDouble("spark.storage.unrollFraction", 0.2)
+    (maxMemory * unrollFraction).toLong
+  }
+
+  // Initial memory to request before unrolling any block
+  private val unrollMemoryThreshold: Long =
+    conf.getLong("spark.storage.unrollMemoryThreshold", 1024 * 1024)
+
+  if (maxMemory < unrollMemoryThreshold) {
+    logWarning(s"Max memory ${Utils.bytesToString(maxMemory)} is less than the initial memory " +
+      s"threshold ${Utils.bytesToString(unrollMemoryThreshold)} needed to store a block in " +
+      s"memory. Please configure Spark with more memory.")
+  }
 
   logInfo("MemoryStore started with capacity %s".format(Utils.bytesToString(maxMemory)))
 
+  /** Free memory not occupied by existing blocks. Note that this does not include unroll memory. */
   def freeMemory: Long = maxMemory - currentMemory
 
   override def getSize(blockId: BlockId): Long = {
@@ -55,20 +83,16 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     bytes.rewind()
     if (level.deserialized) {
       val values = blockManager.dataDeserialize(blockId, bytes)
-      val elements = new ArrayBuffer[Any]
-      elements ++= values
-      val sizeEstimate = SizeEstimator.estimate(elements.asInstanceOf[AnyRef])
-      val putAttempt = tryToPut(blockId, elements, sizeEstimate, deserialized = true)
-      PutResult(sizeEstimate, Left(values.toIterator), putAttempt.droppedBlocks)
+      putIterator(blockId, values, level, returnValues = true)
     } else {
       val putAttempt = tryToPut(blockId, bytes, bytes.limit, deserialized = false)
       PutResult(bytes.limit(), Right(bytes.duplicate()), putAttempt.droppedBlocks)
     }
   }
 
-  override def putValues(
+  override def putArray(
       blockId: BlockId,
-      values: ArrayBuffer[Any],
+      values: Array[Any],
       level: StorageLevel,
       returnValues: Boolean): PutResult = {
     if (level.deserialized) {
@@ -82,14 +106,50 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     }
   }
 
-  override def putValues(
+  override def putIterator(
       blockId: BlockId,
       values: Iterator[Any],
       level: StorageLevel,
       returnValues: Boolean): PutResult = {
-    val valueEntries = new ArrayBuffer[Any]()
-    valueEntries ++= values
-    putValues(blockId, valueEntries, level, returnValues)
+    putIterator(blockId, values, level, returnValues, allowPersistToDisk = true)
+  }
+
+  /**
+   * Attempt to put the given block in memory store.
+   *
+   * There may not be enough space to fully unroll the iterator in memory, in which case we
+   * optionally drop the values to disk if
+   *   (1) the block's storage level specifies useDisk, and
+   *   (2) `allowPersistToDisk` is true.
+   *
+   * One scenario in which `allowPersistToDisk` is false is when the BlockManager reads a block
+   * back from disk and attempts to cache it in memory. In this case, we should not persist the
+   * block back on disk again, as it is already in disk store.
+   */
+  private[storage] def putIterator(
+      blockId: BlockId,
+      values: Iterator[Any],
+      level: StorageLevel,
+      returnValues: Boolean,
+      allowPersistToDisk: Boolean): PutResult = {
+    val droppedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]
+    val unrolledValues = unrollSafely(blockId, values, droppedBlocks)
+    unrolledValues match {
+      case Left(arrayValues) =>
+        // Values are fully unrolled in memory, so store them as an array
+        val res = putArray(blockId, arrayValues, level, returnValues)
+        droppedBlocks ++= res.droppedBlocks
+        PutResult(res.size, res.data, droppedBlocks)
+      case Right(iteratorValues) =>
+        // Not enough space to unroll this block; drop to disk if applicable
+        if (level.useDisk && allowPersistToDisk) {
+          logWarning(s"Persisting block $blockId to disk instead.")
+          val res = blockManager.diskStore.putIterator(blockId, iteratorValues, level, returnValues)
+          PutResult(res.size, res.data, droppedBlocks)
+        } else {
+          PutResult(0, Left(iteratorValues), droppedBlocks)
+        }
+    }
   }
 
   override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
@@ -99,7 +159,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     if (entry == null) {
       None
     } else if (entry.deserialized) {
-      Some(blockManager.dataSerialize(blockId, entry.value.asInstanceOf[ArrayBuffer[Any]].iterator))
+      Some(blockManager.dataSerialize(blockId, entry.value.asInstanceOf[Array[Any]].iterator))
     } else {
       Some(entry.value.asInstanceOf[ByteBuffer].duplicate()) // Doesn't actually copy the data
     }
@@ -112,7 +172,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     if (entry == null) {
       None
     } else if (entry.deserialized) {
-      Some(entry.value.asInstanceOf[ArrayBuffer[Any]].iterator)
+      Some(entry.value.asInstanceOf[Array[Any]].iterator)
     } else {
       val buffer = entry.value.asInstanceOf[ByteBuffer].duplicate() // Doesn't actually copy data
       Some(blockManager.dataDeserialize(blockId, buffer))
@@ -140,6 +200,99 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     logInfo("MemoryStore cleared")
   }
 
+  /**
+   * Unroll the given block in memory safely.
+   *
+   * The safety of this operation refers to avoiding potential OOM exceptions caused by
+   * unrolling the entirety of the block in memory at once. This is achieved by periodically
+   * checking whether the memory restrictions for unrolling blocks are still satisfied,
+   * stopping immediately if not. This check is a safeguard against the scenario in which
+   * there is not enough free memory to accommodate the entirety of a single block.
+   *
+   * This method returns either an array with the contents of the entire block or an iterator
+   * containing the values of the block (if the array would have exceeded available memory).
+   */
+  def unrollSafely(
+      blockId: BlockId,
+      values: Iterator[Any],
+      droppedBlocks: ArrayBuffer[(BlockId, BlockStatus)])
+    : Either[Array[Any], Iterator[Any]] = {
+
+    // Number of elements unrolled so far
+    var elementsUnrolled = 0
+    // Whether there is still enough memory for us to continue unrolling this block
+    var keepUnrolling = true
+    // Initial per-thread memory to request for unrolling blocks (bytes). Exposed for testing.
+    val initialMemoryThreshold = unrollMemoryThreshold
+    // How often to check whether we need to request more memory
+    val memoryCheckPeriod = 16
+    // Memory currently reserved by this thread for this particular unrolling operation
+    var memoryThreshold = initialMemoryThreshold
+    // Memory to request as a multiple of current vector size
+    val memoryGrowthFactor = 1.5
+    // Previous unroll memory held by this thread, for releasing later (only at the very end)
+    val previousMemoryReserved = currentUnrollMemoryForThisThread
+    // Underlying vector for unrolling the block
+    var vector = new SizeTrackingVector[Any]
+
+    // Request enough memory to begin unrolling
+    keepUnrolling = reserveUnrollMemoryForThisThread(initialMemoryThreshold)
+
+    if (!keepUnrolling) {
+      logWarning(s"Failed to reserve initial memory threshold of " +
+        s"${Utils.bytesToString(initialMemoryThreshold)} for computing block $blockId in memory.")
+    }
+
+    // Unroll this block safely, checking whether we have exceeded our threshold periodically
+    try {
+      while (values.hasNext && keepUnrolling) {
+        vector += values.next()
+        if (elementsUnrolled % memoryCheckPeriod == 0) {
+          // If our vector's size has exceeded the threshold, request more memory
+          val currentSize = vector.estimateSize()
+          if (currentSize >= memoryThreshold) {
+            val amountToRequest = (currentSize * memoryGrowthFactor - memoryThreshold).toLong
+            // Hold the accounting lock, in case another thread concurrently puts a block that
+            // takes up the unrolling space we just ensured here
+            accountingLock.synchronized {
+              if (!reserveUnrollMemoryForThisThread(amountToRequest)) {
+                // If the first request is not granted, try again after ensuring free space
+                // If there is still not enough space, give up and drop the partition
+                val spaceToEnsure = maxUnrollMemory - currentUnrollMemory
+                if (spaceToEnsure > 0) {
+                  val result = ensureFreeSpace(blockId, spaceToEnsure)
+                  droppedBlocks ++= result.droppedBlocks
+                }
+                keepUnrolling = reserveUnrollMemoryForThisThread(amountToRequest)
+              }
+            }
+            // New threshold is currentSize * memoryGrowthFactor
+            memoryThreshold += amountToRequest
+          }
+        }
+        elementsUnrolled += 1
+      }
+
+      if (keepUnrolling) {
+        // We successfully unrolled the entirety of this block
+        Left(vector.toArray)
+      } else {
+        // We ran out of space while unrolling the values for this block
+        logUnrollFailureMessage(blockId, vector.estimateSize())
+        Right(vector.iterator ++ values)
+      }
+
+    } finally {
+      // If we return an array, the values returned do not depend on the underlying vector and
+      // we can immediately free up space for other threads. Otherwise, if we return an iterator,
+      // we release the memory claimed by this thread later on when the task finishes.
+      if (keepUnrolling) {
+        val amountToRelease = currentUnrollMemoryForThisThread - previousMemoryReserved
+        releaseUnrollMemoryForThisThread(amountToRelease)
+      }
+    }
+  }
+
   /**
    * Return the RDD ID that a given block ID is from, or None if it is not an RDD block.
    */
@@ -149,10 +302,10 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
 
   /**
    * Try to put in a set of values, if we can free up enough space. The value should either be
-   * an ArrayBuffer if deserialized is true or a ByteBuffer otherwise. Its (possibly estimated)
-   * size must also be passed by the caller.
+   * an Array if deserialized is true or a ByteBuffer otherwise. Its (possibly estimated) size
+   * must also be passed by the caller.
    *
-   * Lock on the object putLock to ensure that all the put requests and its associated block
+   * Synchronize on `accountingLock` to ensure that all the put requests and its associated block
    * dropping is done by only on thread at a time. Otherwise while one thread is dropping
    * blocks to free memory for one block, another thread may use up the freed space for
    * another block.
@@ -174,7 +327,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     var putSuccess = false
     val droppedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]
 
-    putLock.synchronized {
+    accountingLock.synchronized {
       val freeSpaceResult = ensureFreeSpace(blockId, size)
       val enoughFreeSpace = freeSpaceResult.success
       droppedBlocks ++= freeSpaceResult.droppedBlocks
@@ -193,7 +346,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
         // Tell the block manager that we couldn't put it in memory so that it can drop it to
         // disk if the block allows disk storage.
         val data = if (deserialized) {
-          Left(value.asInstanceOf[ArrayBuffer[Any]])
+          Left(value.asInstanceOf[Array[Any]])
         } else {
           Right(value.asInstanceOf[ByteBuffer].duplicate())
         }
@@ -210,12 +363,14 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
    * from the same RDD (which leads to a wasteful cyclic replacement pattern for RDDs that
    * don't fit into memory that we want to avoid).
    *
-   * Assume that a lock is held by the caller to ensure only one thread is dropping blocks.
-   * Otherwise, the freed space may fill up before the caller puts in their new value.
+   * Assume that `accountingLock` is held by the caller to ensure only one thread is dropping
+   * blocks. Otherwise, the freed space may fill up before the caller puts in their new value.
    *
    * Return whether there is enough free space, along with the blocks dropped in the process.
    */
-  private def ensureFreeSpace(blockIdToAdd: BlockId, space: Long): ResultWithDroppedBlocks = {
+  private def ensureFreeSpace(
+      blockIdToAdd: BlockId,
+      space: Long): ResultWithDroppedBlocks = {
     logInfo(s"ensureFreeSpace($space) called with curMem=$currentMemory, maxMem=$maxMemory")
 
     val droppedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]
@@ -225,9 +380,12 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
       return ResultWithDroppedBlocks(success = false, droppedBlocks)
     }
 
-    if (maxMemory - currentMemory < space) {
+    // Take into account the amount of memory currently occupied by unrolling blocks
+    val actualFreeMemory = freeMemory - currentUnrollMemory
+
+    if (actualFreeMemory < space) {
       val rddToAdd = getRddId(blockIdToAdd)
-      val selectedBlocks = new ArrayBuffer[BlockId]()
+      val selectedBlocks = new ArrayBuffer[BlockId]
       var selectedMemory = 0L
 
       // This is synchronized to ensure that the set of entries is not changed
@@ -235,7 +393,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
       // can lead to exceptions.
       entries.synchronized {
         val iterator = entries.entrySet().iterator()
-        while (maxMemory - (currentMemory - selectedMemory) < space && iterator.hasNext) {
+        while (actualFreeMemory + selectedMemory < space && iterator.hasNext) {
           val pair = iterator.next()
           val blockId = pair.getKey
           if (rddToAdd.isEmpty || rddToAdd != getRddId(blockId)) {
@@ -245,7 +403,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
         }
       }
 
-      if (maxMemory - (currentMemory - selectedMemory) >= space) {
+      if (actualFreeMemory + selectedMemory >= space) {
         logInfo(s"${selectedBlocks.size} blocks selected for dropping")
         for (blockId <- selectedBlocks) {
           val entry = entries.synchronized { entries.get(blockId) }
@@ -254,7 +412,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
           // future safety.
           if (entry != null) {
             val data = if (entry.deserialized) {
-              Left(entry.value.asInstanceOf[ArrayBuffer[Any]])
+              Left(entry.value.asInstanceOf[Array[Any]])
             } else {
               Right(entry.value.asInstanceOf[ByteBuffer].duplicate())
             }
@@ -275,8 +433,90 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
   override def contains(blockId: BlockId): Boolean = {
     entries.synchronized { entries.containsKey(blockId) }
   }
+
+  /**
+   * Reserve additional memory for unrolling blocks used by this thread.
+   * Return whether the request is granted.
+   */
+  def reserveUnrollMemoryForThisThread(memory: Long): Boolean = {
+    accountingLock.synchronized {
+      val granted = freeMemory > currentUnrollMemory + memory
+      if (granted) {
+        val threadId = Thread.currentThread().getId
+        unrollMemoryMap(threadId) = unrollMemoryMap.getOrElse(threadId, 0L) + memory
+      }
+      granted
+    }
+  }
+
+  /**
+   * Release memory used by this thread for unrolling blocks.
+   * If the amount is not specified, remove the current thread's allocation altogether.
+   */
+  def releaseUnrollMemoryForThisThread(memory: Long = -1L): Unit = {
+    val threadId = Thread.currentThread().getId
+    accountingLock.synchronized {
+      if (memory < 0) {
+        unrollMemoryMap.remove(threadId)
+      } else {
+        unrollMemoryMap(threadId) = unrollMemoryMap.getOrElse(threadId, memory) - memory
+        // If this thread claims no more unroll memory, release it completely
+        if (unrollMemoryMap(threadId) <= 0) {
+          unrollMemoryMap.remove(threadId)
+        }
+      }
+    }
+  }
+
+  /**
+   * Return the amount of memory currently occupied for unrolling blocks across all threads.
+   */
+  def currentUnrollMemory: Long = accountingLock.synchronized {
+    unrollMemoryMap.values.sum
+  }
+
+  /**
+   * Return the amount of memory currently occupied for unrolling blocks by this thread.
+   */
+  def currentUnrollMemoryForThisThread: Long = accountingLock.synchronized {
+    unrollMemoryMap.getOrElse(Thread.currentThread().getId, 0L)
+  }
+
+  /**
+   * Return the number of threads currently unrolling blocks.
+   */
+  def numThreadsUnrolling: Int = accountingLock.synchronized { unrollMemoryMap.keys.size }
+
+  /**
+   * Log information about current memory usage.
+   */
+  def logMemoryUsage(): Unit = {
+    val blocksMemory = currentMemory
+    val unrollMemory = currentUnrollMemory
+    val totalMemory = blocksMemory + unrollMemory
+    logInfo(
+      s"Memory use = ${Utils.bytesToString(blocksMemory)} (blocks) + " +
+      s"${Utils.bytesToString(unrollMemory)} (scratch space shared across " +
+      s"$numThreadsUnrolling thread(s)) = ${Utils.bytesToString(totalMemory)}. " +
+      s"Storage limit = ${Utils.bytesToString(maxMemory)}."
+    )
+  }
+
+  /**
+   * Log a warning for failing to unroll a block.
+   *
+   * @param blockId ID of the block we are trying to unroll.
+   * @param finalVectorSize Final size of the vector before unrolling failed.
+   */
+  def logUnrollFailureMessage(blockId: BlockId, finalVectorSize: Long): Unit = {
+    logWarning(
+      s"Not enough space to cache $blockId in memory! " +
+      s"(computed ${Utils.bytesToString(finalVectorSize)} so far)"
+    )
+    logMemoryUsage()
+  }
 }
 
-private case class ResultWithDroppedBlocks(
+private[spark] case class ResultWithDroppedBlocks(
     success: Boolean,
     droppedBlocks: Seq[(BlockId, BlockStatus)])
diff --git a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
index 5a72e216872a6..120c327a7e580 100644
--- a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
+++ b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
@@ -34,6 +34,8 @@ class RDDInfo(
   var diskSize = 0L
   var tachyonSize = 0L
 
+  def isCached: Boolean = (memSize + diskSize + tachyonSize > 0) && numCachedPartitions > 0
+
   override def toString = {
     import Utils.bytesToString
     ("RDD \"%s\" (%d) StorageLevel: %s; CachedPartitions: %d; TotalPartitions: %d; " +
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
new file mode 100644
index 0000000000000..6b1f57a069431
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+import java.util.concurrent.LinkedBlockingQueue
+
+import scala.collection.mutable.{ArrayBuffer, HashSet, Queue}
+import scala.util.{Failure, Success, Try}
+
+import org.apache.spark.{Logging, TaskContext}
+import org.apache.spark.network.BlockTransferService
+import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient}
+import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.util.{CompletionIterator, Utils}
+
+/**
+ * An iterator that fetches multiple blocks. For local blocks, it fetches from the local block
+ * manager. For remote blocks, it fetches them using the provided BlockTransferService.
+ *
+ * This creates an iterator of (BlockID, values) tuples so the caller can handle blocks in a
+ * pipelined fashion as they are received.
+ *
+ * The implementation throttles the remote fetches to they don't exceed maxBytesInFlight to avoid
+ * using too much memory.
+ *
+ * @param context [[TaskContext]], used for metrics update
+ * @param shuffleClient [[ShuffleClient]] for fetching remote blocks
+ * @param blockManager [[BlockManager]] for reading local blocks
+ * @param blocksByAddress list of blocks to fetch grouped by the [[BlockManagerId]].
+ *                        For each block we also require the size (in bytes as a long field) in
+ *                        order to throttle the memory usage.
+ * @param serializer serializer used to deserialize the data.
+ * @param maxBytesInFlight max size (in bytes) of remote blocks to fetch at any given point.
+ */
+private[spark]
+final class ShuffleBlockFetcherIterator(
+    context: TaskContext,
+    shuffleClient: ShuffleClient,
+    blockManager: BlockManager,
+    blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])],
+    serializer: Serializer,
+    maxBytesInFlight: Long)
+  extends Iterator[(BlockId, Try[Iterator[Any]])] with Logging {
+
+  import ShuffleBlockFetcherIterator._
+
+  /**
+   * Total number of blocks to fetch. This can be smaller than the total number of blocks
+   * in [[blocksByAddress]] because we filter out zero-sized blocks in [[initialize]].
+   *
+   * This should equal localBlocks.size + remoteBlocks.size.
+   */
+  private[this] var numBlocksToFetch = 0
+
+  /**
+   * The number of blocks proccessed by the caller. The iterator is exhausted when
+   * [[numBlocksProcessed]] == [[numBlocksToFetch]].
+   */
+  private[this] var numBlocksProcessed = 0
+
+  private[this] val startTime = System.currentTimeMillis
+
+  /** Local blocks to fetch, excluding zero-sized blocks. */
+  private[this] val localBlocks = new ArrayBuffer[BlockId]()
+
+  /** Remote blocks to fetch, excluding zero-sized blocks. */
+  private[this] val remoteBlocks = new HashSet[BlockId]()
+
+  /**
+   * A queue to hold our results. This turns the asynchronous model provided by
+   * [[BlockTransferService]] into a synchronous model (iterator).
+   */
+  private[this] val results = new LinkedBlockingQueue[FetchResult]
+
+  /**
+   * Current [[FetchResult]] being processed. We track this so we can release the current buffer
+   * in case of a runtime exception when processing the current buffer.
+   */
+  @volatile private[this] var currentResult: FetchResult = null
+
+  /**
+   * Queue of fetch requests to issue; we'll pull requests off this gradually to make sure that
+   * the number of bytes in flight is limited to maxBytesInFlight.
+   */
+  private[this] val fetchRequests = new Queue[FetchRequest]
+
+  /** Current bytes in flight from our requests */
+  private[this] var bytesInFlight = 0L
+
+  private[this] val shuffleMetrics = context.taskMetrics.createShuffleReadMetricsForDependency()
+
+  /**
+   * Whether the iterator is still active. If isZombie is true, the callback interface will no
+   * longer place fetched blocks into [[results]].
+   */
+  @volatile private[this] var isZombie = false
+
+  initialize()
+
+  /**
+   * Mark the iterator as zombie, and release all buffers that haven't been deserialized yet.
+   */
+  private[this] def cleanup() {
+    isZombie = true
+    // Release the current buffer if necessary
+    currentResult match {
+      case SuccessFetchResult(_, _, buf) => buf.release()
+      case _ =>
+    }
+
+    // Release buffers in the results queue
+    val iter = results.iterator()
+    while (iter.hasNext) {
+      val result = iter.next()
+      result match {
+        case SuccessFetchResult(_, _, buf) => buf.release()
+        case _ =>
+      }
+    }
+  }
+
+  private[this] def sendRequest(req: FetchRequest) {
+    logDebug("Sending request for %d blocks (%s) from %s".format(
+      req.blocks.size, Utils.bytesToString(req.size), req.address.hostPort))
+    bytesInFlight += req.size
+
+    // so we can look up the size of each blockID
+    val sizeMap = req.blocks.map { case (blockId, size) => (blockId.toString, size) }.toMap
+    val blockIds = req.blocks.map(_._1.toString)
+
+    val address = req.address
+    shuffleClient.fetchBlocks(address.host, address.port, address.executorId, blockIds.toArray,
+      new BlockFetchingListener {
+        override def onBlockFetchSuccess(blockId: String, buf: ManagedBuffer): Unit = {
+          // Only add the buffer to results queue if the iterator is not zombie,
+          // i.e. cleanup() has not been called yet.
+          if (!isZombie) {
+            // Increment the ref count because we need to pass this to a different thread.
+            // This needs to be released after use.
+            buf.retain()
+            results.put(new SuccessFetchResult(BlockId(blockId), sizeMap(blockId), buf))
+            shuffleMetrics.remoteBytesRead += buf.size
+            shuffleMetrics.remoteBlocksFetched += 1
+          }
+          logTrace("Got remote block " + blockId + " after " + Utils.getUsedTimeMs(startTime))
+        }
+
+        override def onBlockFetchFailure(blockId: String, e: Throwable): Unit = {
+          logError(s"Failed to get block(s) from ${req.address.host}:${req.address.port}", e)
+          results.put(new FailureFetchResult(BlockId(blockId), e))
+        }
+      }
+    )
+  }
+
+  private[this] def splitLocalRemoteBlocks(): ArrayBuffer[FetchRequest] = {
+    // Make remote requests at most maxBytesInFlight / 5 in length; the reason to keep them
+    // smaller than maxBytesInFlight is to allow multiple, parallel fetches from up to 5
+    // nodes, rather than blocking on reading output from one node.
+    val targetRequestSize = math.max(maxBytesInFlight / 5, 1L)
+    logDebug("maxBytesInFlight: " + maxBytesInFlight + ", targetRequestSize: " + targetRequestSize)
+
+    // Split local and remote blocks. Remote blocks are further split into FetchRequests of size
+    // at most maxBytesInFlight in order to limit the amount of data in flight.
+    val remoteRequests = new ArrayBuffer[FetchRequest]
+
+    // Tracks total number of blocks (including zero sized blocks)
+    var totalBlocks = 0
+    for ((address, blockInfos) <- blocksByAddress) {
+      totalBlocks += blockInfos.size
+      if (address.executorId == blockManager.blockManagerId.executorId) {
+        // Filter out zero-sized blocks
+        localBlocks ++= blockInfos.filter(_._2 != 0).map(_._1)
+        numBlocksToFetch += localBlocks.size
+      } else {
+        val iterator = blockInfos.iterator
+        var curRequestSize = 0L
+        var curBlocks = new ArrayBuffer[(BlockId, Long)]
+        while (iterator.hasNext) {
+          val (blockId, size) = iterator.next()
+          // Skip empty blocks
+          if (size > 0) {
+            curBlocks += ((blockId, size))
+            remoteBlocks += blockId
+            numBlocksToFetch += 1
+            curRequestSize += size
+          } else if (size < 0) {
+            throw new BlockException(blockId, "Negative block size " + size)
+          }
+          if (curRequestSize >= targetRequestSize) {
+            // Add this FetchRequest
+            remoteRequests += new FetchRequest(address, curBlocks)
+            curBlocks = new ArrayBuffer[(BlockId, Long)]
+            logDebug(s"Creating fetch request of $curRequestSize at $address")
+            curRequestSize = 0
+          }
+        }
+        // Add in the final request
+        if (curBlocks.nonEmpty) {
+          remoteRequests += new FetchRequest(address, curBlocks)
+        }
+      }
+    }
+    logInfo(s"Getting $numBlocksToFetch non-empty blocks out of $totalBlocks blocks")
+    remoteRequests
+  }
+
+  /**
+   * Fetch the local blocks while we are fetching remote blocks. This is ok because
+   * [[ManagedBuffer]]'s memory is allocated lazily when we create the input stream, so all we
+   * track in-memory are the ManagedBuffer references themselves.
+   */
+  private[this] def fetchLocalBlocks() {
+    val iter = localBlocks.iterator
+    while (iter.hasNext) {
+      val blockId = iter.next()
+      try {
+        val buf = blockManager.getBlockData(blockId)
+        shuffleMetrics.localBlocksFetched += 1
+        buf.retain()
+        results.put(new SuccessFetchResult(blockId, 0, buf))
+      } catch {
+        case e: Exception =>
+          // If we see an exception, stop immediately.
+          logError(s"Error occurred while fetching local blocks", e)
+          results.put(new FailureFetchResult(blockId, e))
+          return
+      }
+    }
+  }
+
+  private[this] def initialize(): Unit = {
+    // Add a task completion callback (called in both success case and failure case) to cleanup.
+    context.addTaskCompletionListener(_ => cleanup())
+
+    // Split local and remote blocks.
+    val remoteRequests = splitLocalRemoteBlocks()
+    // Add the remote requests into our queue in a random order
+    fetchRequests ++= Utils.randomize(remoteRequests)
+
+    // Send out initial requests for blocks, up to our maxBytesInFlight
+    while (fetchRequests.nonEmpty &&
+      (bytesInFlight == 0 || bytesInFlight + fetchRequests.front.size <= maxBytesInFlight)) {
+      sendRequest(fetchRequests.dequeue())
+    }
+
+    val numFetches = remoteRequests.size - fetchRequests.size
+    logInfo("Started " + numFetches + " remote fetches in" + Utils.getUsedTimeMs(startTime))
+
+    // Get Local Blocks
+    fetchLocalBlocks()
+    logDebug("Got local blocks in " + Utils.getUsedTimeMs(startTime) + " ms")
+  }
+
+  override def hasNext: Boolean = numBlocksProcessed < numBlocksToFetch
+
+  override def next(): (BlockId, Try[Iterator[Any]]) = {
+    numBlocksProcessed += 1
+    val startFetchWait = System.currentTimeMillis()
+    currentResult = results.take()
+    val result = currentResult
+    val stopFetchWait = System.currentTimeMillis()
+    shuffleMetrics.fetchWaitTime += (stopFetchWait - startFetchWait)
+
+    result match {
+      case SuccessFetchResult(_, size, _) => bytesInFlight -= size
+      case _ =>
+    }
+    // Send fetch requests up to maxBytesInFlight
+    while (fetchRequests.nonEmpty &&
+      (bytesInFlight == 0 || bytesInFlight + fetchRequests.front.size <= maxBytesInFlight)) {
+      sendRequest(fetchRequests.dequeue())
+    }
+
+    val iteratorTry: Try[Iterator[Any]] = result match {
+      case FailureFetchResult(_, e) => Failure(e)
+      case SuccessFetchResult(blockId, _, buf) => {
+        val is = blockManager.wrapForCompression(blockId, buf.createInputStream())
+        val iter = serializer.newInstance().deserializeStream(is).asIterator
+        Success(CompletionIterator[Any, Iterator[Any]](iter, {
+          // Once the iterator is exhausted, release the buffer and set currentResult to null
+          // so we don't release it again in cleanup.
+          currentResult = null
+          buf.release()
+        }))
+      }
+    }
+
+    (result.blockId, iteratorTry)
+  }
+}
+
+
+private[storage]
+object ShuffleBlockFetcherIterator {
+
+  /**
+   * A request to fetch blocks from a remote BlockManager.
+   * @param address remote BlockManager to fetch from.
+   * @param blocks Sequence of tuple, where the first element is the block id,
+   *               and the second element is the estimated size, used to calculate bytesInFlight.
+   */
+  case class FetchRequest(address: BlockManagerId, blocks: Seq[(BlockId, Long)]) {
+    val size = blocks.map(_._2).sum
+  }
+
+  /**
+   * Result of a fetch from a remote block.
+   */
+  private[storage] sealed trait FetchResult {
+    val blockId: BlockId
+  }
+
+  /**
+   * Result of a fetch from a remote block successfully.
+   * @param blockId block id
+   * @param size estimated size of the block, used to calculate bytesInFlight.
+   *             Note that this is NOT the exact bytes.
+   * @param buf [[ManagedBuffer]] for the content.
+   */
+  private[storage] case class SuccessFetchResult(blockId: BlockId, size: Long, buf: ManagedBuffer)
+    extends FetchResult {
+    require(buf != null)
+    require(size >= 0)
+  }
+
+  /**
+   * Result of a fetch from a remote block unsuccessfully.
+   * @param blockId block id
+   * @param e the failure exception
+   */
+  private[storage] case class FailureFetchResult(blockId: BlockId, e: Throwable)
+    extends FetchResult
+}
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
deleted file mode 100644
index 35910e552fe86..0000000000000
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.storage
-
-import java.io.File
-import java.util.concurrent.ConcurrentLinkedQueue
-import java.util.concurrent.atomic.AtomicInteger
-
-import scala.collection.JavaConversions._
-
-import org.apache.spark.Logging
-import org.apache.spark.serializer.Serializer
-import org.apache.spark.storage.ShuffleBlockManager.ShuffleFileGroup
-import org.apache.spark.util.{MetadataCleaner, MetadataCleanerType, TimeStampedHashMap}
-import org.apache.spark.util.collection.{PrimitiveKeyOpenHashMap, PrimitiveVector}
-
-/** A group of writers for a ShuffleMapTask, one writer per reducer. */
-private[spark] trait ShuffleWriterGroup {
-  val writers: Array[BlockObjectWriter]
-
-  /** @param success Indicates all writes were successful. If false, no blocks will be recorded. */
-  def releaseWriters(success: Boolean)
-}
-
-/**
- * Manages assigning disk-based block writers to shuffle tasks. Each shuffle task gets one file
- * per reducer (this set of files is called a ShuffleFileGroup).
- *
- * As an optimization to reduce the number of physical shuffle files produced, multiple shuffle
- * blocks are aggregated into the same file. There is one "combined shuffle file" per reducer
- * per concurrently executing shuffle task. As soon as a task finishes writing to its shuffle
- * files, it releases them for another task.
- * Regarding the implementation of this feature, shuffle files are identified by a 3-tuple:
- *   - shuffleId: The unique id given to the entire shuffle stage.
- *   - bucketId: The id of the output partition (i.e., reducer id)
- *   - fileId: The unique id identifying a group of "combined shuffle files." Only one task at a
- *       time owns a particular fileId, and this id is returned to a pool when the task finishes.
- * Each shuffle file is then mapped to a FileSegment, which is a 3-tuple (file, offset, length)
- * that specifies where in a given file the actual block data is located.
- *
- * Shuffle file metadata is stored in a space-efficient manner. Rather than simply mapping
- * ShuffleBlockIds directly to FileSegments, each ShuffleFileGroup maintains a list of offsets for
- * each block stored in each file. In order to find the location of a shuffle block, we search the
- * files within a ShuffleFileGroups associated with the block's reducer.
- */
-private[spark]
-class ShuffleBlockManager(blockManager: BlockManager) extends Logging {
-  def conf = blockManager.conf
-
-  // Turning off shuffle file consolidation causes all shuffle Blocks to get their own file.
-  // TODO: Remove this once the shuffle file consolidation feature is stable.
-  val consolidateShuffleFiles =
-    conf.getBoolean("spark.shuffle.consolidateFiles", false)
-
-  private val bufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
-
-  /**
-   * Contains all the state related to a particular shuffle. This includes a pool of unused
-   * ShuffleFileGroups, as well as all ShuffleFileGroups that have been created for the shuffle.
-   */
-  private class ShuffleState(val numBuckets: Int) {
-    val nextFileId = new AtomicInteger(0)
-    val unusedFileGroups = new ConcurrentLinkedQueue[ShuffleFileGroup]()
-    val allFileGroups = new ConcurrentLinkedQueue[ShuffleFileGroup]()
-
-    /**
-     * The mapIds of all map tasks completed on this Executor for this shuffle.
-     * NB: This is only populated if consolidateShuffleFiles is FALSE. We don't need it otherwise.
-     */
-    val completedMapTasks = new ConcurrentLinkedQueue[Int]()
-  }
-
-  type ShuffleId = Int
-  private val shuffleStates = new TimeStampedHashMap[ShuffleId, ShuffleState]
-
-  private val metadataCleaner =
-    new MetadataCleaner(MetadataCleanerType.SHUFFLE_BLOCK_MANAGER, this.cleanup, conf)
-
-  def forMapTask(shuffleId: Int, mapId: Int, numBuckets: Int, serializer: Serializer) = {
-    new ShuffleWriterGroup {
-      shuffleStates.putIfAbsent(shuffleId, new ShuffleState(numBuckets))
-      private val shuffleState = shuffleStates(shuffleId)
-      private var fileGroup: ShuffleFileGroup = null
-
-      val writers: Array[BlockObjectWriter] = if (consolidateShuffleFiles) {
-        fileGroup = getUnusedFileGroup()
-        Array.tabulate[BlockObjectWriter](numBuckets) { bucketId =>
-          val blockId = ShuffleBlockId(shuffleId, mapId, bucketId)
-          blockManager.getDiskWriter(blockId, fileGroup(bucketId), serializer, bufferSize)
-        }
-      } else {
-        Array.tabulate[BlockObjectWriter](numBuckets) { bucketId =>
-          val blockId = ShuffleBlockId(shuffleId, mapId, bucketId)
-          val blockFile = blockManager.diskBlockManager.getFile(blockId)
-          // Because of previous failures, the shuffle file may already exist on this machine.
-          // If so, remove it.
-          if (blockFile.exists) {
-            if (blockFile.delete()) {
-              logInfo(s"Removed existing shuffle file $blockFile")
-            } else {
-              logWarning(s"Failed to remove existing shuffle file $blockFile")
-            }
-          }
-          blockManager.getDiskWriter(blockId, blockFile, serializer, bufferSize)
-        }
-      }
-
-      override def releaseWriters(success: Boolean) {
-        if (consolidateShuffleFiles) {
-          if (success) {
-            val offsets = writers.map(_.fileSegment().offset)
-            fileGroup.recordMapOutput(mapId, offsets)
-          }
-          recycleFileGroup(fileGroup)
-        } else {
-          shuffleState.completedMapTasks.add(mapId)
-        }
-      }
-
-      private def getUnusedFileGroup(): ShuffleFileGroup = {
-        val fileGroup = shuffleState.unusedFileGroups.poll()
-        if (fileGroup != null) fileGroup else newFileGroup()
-      }
-
-      private def newFileGroup(): ShuffleFileGroup = {
-        val fileId = shuffleState.nextFileId.getAndIncrement()
-        val files = Array.tabulate[File](numBuckets) { bucketId =>
-          val filename = physicalFileName(shuffleId, bucketId, fileId)
-          blockManager.diskBlockManager.getFile(filename)
-        }
-        val fileGroup = new ShuffleFileGroup(fileId, shuffleId, files)
-        shuffleState.allFileGroups.add(fileGroup)
-        fileGroup
-      }
-
-      private def recycleFileGroup(group: ShuffleFileGroup) {
-        shuffleState.unusedFileGroups.add(group)
-      }
-    }
-  }
-
-  /**
-   * Returns the physical file segment in which the given BlockId is located.
-   * This function should only be called if shuffle file consolidation is enabled, as it is
-   * an error condition if we don't find the expected block.
-   */
-  def getBlockLocation(id: ShuffleBlockId): FileSegment = {
-    // Search all file groups associated with this shuffle.
-    val shuffleState = shuffleStates(id.shuffleId)
-    for (fileGroup <- shuffleState.allFileGroups) {
-      val segment = fileGroup.getFileSegmentFor(id.mapId, id.reduceId)
-      if (segment.isDefined) { return segment.get }
-    }
-    throw new IllegalStateException("Failed to find shuffle block: " + id)
-  }
-
-  /** Remove all the blocks / files and metadata related to a particular shuffle. */
-  def removeShuffle(shuffleId: ShuffleId): Boolean = {
-    // Do not change the ordering of this, if shuffleStates should be removed only
-    // after the corresponding shuffle blocks have been removed
-    val cleaned = removeShuffleBlocks(shuffleId)
-    shuffleStates.remove(shuffleId)
-    cleaned
-  }
-
-  /** Remove all the blocks / files related to a particular shuffle. */
-  private def removeShuffleBlocks(shuffleId: ShuffleId): Boolean = {
-    shuffleStates.get(shuffleId) match {
-      case Some(state) =>
-        if (consolidateShuffleFiles) {
-          for (fileGroup <- state.allFileGroups; file <- fileGroup.files) {
-            file.delete()
-          }
-        } else {
-          for (mapId <- state.completedMapTasks; reduceId <- 0 until state.numBuckets) {
-            val blockId = new ShuffleBlockId(shuffleId, mapId, reduceId)
-            blockManager.diskBlockManager.getFile(blockId).delete()
-          }
-        }
-        logInfo("Deleted all files for shuffle " + shuffleId)
-        true
-      case None =>
-        logInfo("Could not find files for shuffle " + shuffleId + " for deleting")
-        false
-    }
-  }
-
-  private def physicalFileName(shuffleId: Int, bucketId: Int, fileId: Int) = {
-    "merged_shuffle_%d_%d_%d".format(shuffleId, bucketId, fileId)
-  }
-
-  private def cleanup(cleanupTime: Long) {
-    shuffleStates.clearOldValues(cleanupTime, (shuffleId, state) => removeShuffleBlocks(shuffleId))
-  }
-
-  def stop() {
-    metadataCleaner.cancel()
-  }
-}
-
-private[spark]
-object ShuffleBlockManager {
-  /**
-   * A group of shuffle files, one per reducer.
-   * A particular mapper will be assigned a single ShuffleFileGroup to write its output to.
-   */
-  private class ShuffleFileGroup(val shuffleId: Int, val fileId: Int, val files: Array[File]) {
-    /**
-     * Stores the absolute index of each mapId in the files of this group. For instance,
-     * if mapId 5 is the first block in each file, mapIdToIndex(5) = 0.
-     */
-    private val mapIdToIndex = new PrimitiveKeyOpenHashMap[Int, Int]()
-
-    /**
-     * Stores consecutive offsets of blocks into each reducer file, ordered by position in the file.
-     * This ordering allows us to compute block lengths by examining the following block offset.
-     * Note: mapIdToIndex(mapId) returns the index of the mapper into the vector for every
-     * reducer.
-     */
-    private val blockOffsetsByReducer = Array.fill[PrimitiveVector[Long]](files.length) {
-      new PrimitiveVector[Long]()
-    }
-
-    def numBlocks = mapIdToIndex.size
-
-    def apply(bucketId: Int) = files(bucketId)
-
-    def recordMapOutput(mapId: Int, offsets: Array[Long]) {
-      mapIdToIndex(mapId) = numBlocks
-      for (i <- 0 until offsets.length) {
-        blockOffsetsByReducer(i) += offsets(i)
-      }
-    }
-
-    /** Returns the FileSegment associated with the given map task, or None if no entry exists. */
-    def getFileSegmentFor(mapId: Int, reducerId: Int): Option[FileSegment] = {
-      val file = files(reducerId)
-      val blockOffsets = blockOffsetsByReducer(reducerId)
-      val index = mapIdToIndex.getOrElse(mapId, -1)
-      if (index >= 0) {
-        val offset = blockOffsets(index)
-        val length =
-          if (index + 1 < numBlocks) {
-            blockOffsets(index + 1) - offset
-          } else {
-            file.length() - offset
-          }
-        assert(length >= 0)
-        Some(new FileSegment(file, offset, length))
-      } else {
-        None
-      }
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
index 1e35abaab5353..56edc4fe2e4ad 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
@@ -20,6 +20,7 @@ package org.apache.spark.storage
 import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.util.Utils
 
 /**
  * :: DeveloperApi ::
@@ -97,12 +98,12 @@ class StorageLevel private(
     ret
   }
 
-  override def writeExternal(out: ObjectOutput) {
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     out.writeByte(toInt)
     out.writeByte(_replication)
   }
 
-  override def readExternal(in: ObjectInput) {
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     val flags = in.readByte()
     _useDisk = (flags & 8) != 0
     _useMemory = (flags & 4) != 0
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
index 41c960c867e2e..def49e80a3605 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
@@ -19,6 +19,7 @@ package org.apache.spark.storage
 
 import scala.collection.mutable
 
+import org.apache.spark.SparkContext
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.scheduler._
 
@@ -35,13 +36,12 @@ class StorageStatusListener extends SparkListener {
 
   /** Update storage status list to reflect updated block statuses */
   private def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, BlockStatus)]) {
-    val filteredStatus = executorIdToStorageStatus.get(execId)
-    filteredStatus.foreach { storageStatus =>
+    executorIdToStorageStatus.get(execId).foreach { storageStatus =>
       updatedBlocks.foreach { case (blockId, updatedStatus) =>
         if (updatedStatus.storageLevel == StorageLevel.NONE) {
-          storageStatus.blocks.remove(blockId)
+          storageStatus.removeBlock(blockId)
         } else {
-          storageStatus.blocks(blockId) = updatedStatus
+          storageStatus.updateBlock(blockId, updatedStatus)
         }
       }
     }
@@ -50,9 +50,8 @@ class StorageStatusListener extends SparkListener {
   /** Update storage status list to reflect the removal of an RDD from the cache */
   private def updateStorageStatus(unpersistedRDDId: Int) {
     storageStatusList.foreach { storageStatus =>
-      val unpersistedBlocksIds = storageStatus.rddBlocks.keys.filter(_.rddId == unpersistedRDDId)
-      unpersistedBlocksIds.foreach { blockId =>
-        storageStatus.blocks.remove(blockId)
+      storageStatus.rddBlocksById(unpersistedRDDId).foreach { case (blockId, _) =>
+        storageStatus.removeBlock(blockId)
       }
     }
   }
@@ -61,10 +60,9 @@ class StorageStatusListener extends SparkListener {
     val info = taskEnd.taskInfo
     val metrics = taskEnd.taskMetrics
     if (info != null && metrics != null) {
-      val execId = formatExecutorId(info.executorId)
       val updatedBlocks = metrics.updatedBlocks.getOrElse(Seq[(BlockId, BlockStatus)]())
       if (updatedBlocks.length > 0) {
-        updateStorageStatus(execId, updatedBlocks)
+        updateStorageStatus(info.executorId, updatedBlocks)
       }
     }
   }
@@ -90,13 +88,4 @@ class StorageStatusListener extends SparkListener {
     }
   }
 
-  /**
-   * In the local mode, there is a discrepancy between the executor ID according to the
-   * task ("localhost") and that according to SparkEnv ("<driver>"). In the UI, this
-   * results in duplicate rows for the same executor. Thus, in this mode, we aggregate
-   * these two rows and use the executor ID of "<driver>" to be consistent.
-   */
-  def formatExecutorId(execId: String): String = {
-    if (execId == "localhost") "<driver>" else execId
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
index 177281f663367..2bd6b749be261 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
@@ -20,122 +20,255 @@ package org.apache.spark.storage
 import scala.collection.Map
 import scala.collection.mutable
 
-import org.apache.spark.SparkContext
 import org.apache.spark.annotation.DeveloperApi
 
 /**
  * :: DeveloperApi ::
  * Storage information for each BlockManager.
+ *
+ * This class assumes BlockId and BlockStatus are immutable, such that the consumers of this
+ * class cannot mutate the source of the information. Accesses are not thread-safe.
  */
 @DeveloperApi
-class StorageStatus(
-    val blockManagerId: BlockManagerId,
-    val maxMem: Long,
-    val blocks: mutable.Map[BlockId, BlockStatus] = mutable.Map.empty) {
+class StorageStatus(val blockManagerId: BlockManagerId, val maxMem: Long) {
 
-  def memUsed = blocks.values.map(_.memSize).reduceOption(_ + _).getOrElse(0L)
+  /**
+   * Internal representation of the blocks stored in this block manager.
+   *
+   * We store RDD blocks and non-RDD blocks separately to allow quick retrievals of RDD blocks.
+   * These collections should only be mutated through the add/update/removeBlock methods.
+   */
+  private val _rddBlocks = new mutable.HashMap[Int, mutable.Map[BlockId, BlockStatus]]
+  private val _nonRddBlocks = new mutable.HashMap[BlockId, BlockStatus]
 
-  def memUsedByRDD(rddId: Int) =
-    rddBlocks.filterKeys(_.rddId == rddId).values.map(_.memSize).reduceOption(_ + _).getOrElse(0L)
+  /**
+   * Storage information of the blocks that entails memory, disk, and off-heap memory usage.
+   *
+   * As with the block maps, we store the storage information separately for RDD blocks and
+   * non-RDD blocks for the same reason. In particular, RDD storage information is stored
+   * in a map indexed by the RDD ID to the following 4-tuple:
+   *
+   *   (memory size, disk size, off-heap size, storage level)
+   *
+   * We assume that all the blocks that belong to the same RDD have the same storage level.
+   * This field is not relevant to non-RDD blocks, however, so the storage information for
+   * non-RDD blocks contains only the first 3 fields (in the same order).
+   */
+  private val _rddStorageInfo = new mutable.HashMap[Int, (Long, Long, Long, StorageLevel)]
+  private var _nonRddStorageInfo: (Long, Long, Long) = (0L, 0L, 0L)
 
-  def diskUsed = blocks.values.map(_.diskSize).reduceOption(_ + _).getOrElse(0L)
+  /** Create a storage status with an initial set of blocks, leaving the source unmodified. */
+  def this(bmid: BlockManagerId, maxMem: Long, initialBlocks: Map[BlockId, BlockStatus]) {
+    this(bmid, maxMem)
+    initialBlocks.foreach { case (bid, bstatus) => addBlock(bid, bstatus) }
+  }
 
-  def diskUsedByRDD(rddId: Int) =
-    rddBlocks.filterKeys(_.rddId == rddId).values.map(_.diskSize).reduceOption(_ + _).getOrElse(0L)
+  /**
+   * Return the blocks stored in this block manager.
+   *
+   * Note that this is somewhat expensive, as it involves cloning the underlying maps and then
+   * concatenating them together. Much faster alternatives exist for common operations such as
+   * contains, get, and size.
+   */
+  def blocks: Map[BlockId, BlockStatus] = _nonRddBlocks ++ rddBlocks
 
-  def memRemaining: Long = maxMem - memUsed
+  /**
+   * Return the RDD blocks stored in this block manager.
+   *
+   * Note that this is somewhat expensive, as it involves cloning the underlying maps and then
+   * concatenating them together. Much faster alternatives exist for common operations such as
+   * getting the memory, disk, and off-heap memory sizes occupied by this RDD.
+   */
+  def rddBlocks: Map[BlockId, BlockStatus] = _rddBlocks.flatMap { case (_, blocks) => blocks }
 
-  def rddBlocks = blocks.collect { case (rdd: RDDBlockId, status) => (rdd, status) }
-}
+  /** Return the blocks that belong to the given RDD stored in this block manager. */
+  def rddBlocksById(rddId: Int): Map[BlockId, BlockStatus] = {
+    _rddBlocks.get(rddId).getOrElse(Map.empty)
+  }
 
-/** Helper methods for storage-related objects. */
-private[spark] object StorageUtils {
+  /** Add the given block to this storage status. If it already exists, overwrite it. */
+  private[spark] def addBlock(blockId: BlockId, blockStatus: BlockStatus): Unit = {
+    updateStorageInfo(blockId, blockStatus)
+    blockId match {
+      case RDDBlockId(rddId, _) =>
+        _rddBlocks.getOrElseUpdate(rddId, new mutable.HashMap)(blockId) = blockStatus
+      case _ =>
+        _nonRddBlocks(blockId) = blockStatus
+    }
+  }
+
+  /** Update the given block in this storage status. If it doesn't already exist, add it. */
+  private[spark] def updateBlock(blockId: BlockId, blockStatus: BlockStatus): Unit = {
+    addBlock(blockId, blockStatus)
+  }
+
+  /** Remove the given block from this storage status. */
+  private[spark] def removeBlock(blockId: BlockId): Option[BlockStatus] = {
+    updateStorageInfo(blockId, BlockStatus.empty)
+    blockId match {
+      case RDDBlockId(rddId, _) =>
+        // Actually remove the block, if it exists
+        if (_rddBlocks.contains(rddId)) {
+          val removed = _rddBlocks(rddId).remove(blockId)
+          // If the given RDD has no more blocks left, remove the RDD
+          if (_rddBlocks(rddId).isEmpty) {
+            _rddBlocks.remove(rddId)
+          }
+          removed
+        } else {
+          None
+        }
+      case _ =>
+        _nonRddBlocks.remove(blockId)
+    }
+  }
 
   /**
-   * Returns basic information of all RDDs persisted in the given SparkContext. This does not
-   * include storage information.
+   * Return whether the given block is stored in this block manager in O(1) time.
+   * Note that this is much faster than `this.blocks.contains`, which is O(blocks) time.
    */
-  def rddInfoFromSparkContext(sc: SparkContext): Array[RDDInfo] = {
-    sc.persistentRdds.values.map { rdd =>
-      val rddName = Option(rdd.name).getOrElse(rdd.id.toString)
-      val rddNumPartitions = rdd.partitions.size
-      val rddStorageLevel = rdd.getStorageLevel
-      val rddInfo = new RDDInfo(rdd.id, rddName, rddNumPartitions, rddStorageLevel)
-      rddInfo
-    }.toArray
+  def containsBlock(blockId: BlockId): Boolean = {
+    blockId match {
+      case RDDBlockId(rddId, _) =>
+        _rddBlocks.get(rddId).exists(_.contains(blockId))
+      case _ =>
+        _nonRddBlocks.contains(blockId)
+    }
   }
 
-  /** Returns storage information of all RDDs persisted in the given SparkContext. */
-  def rddInfoFromStorageStatus(
-      storageStatuses: Seq[StorageStatus],
-      sc: SparkContext): Array[RDDInfo] = {
-    rddInfoFromStorageStatus(storageStatuses, rddInfoFromSparkContext(sc))
+  /**
+   * Return the given block stored in this block manager in O(1) time.
+   * Note that this is much faster than `this.blocks.get`, which is O(blocks) time.
+   */
+  def getBlock(blockId: BlockId): Option[BlockStatus] = {
+    blockId match {
+      case RDDBlockId(rddId, _) =>
+        _rddBlocks.get(rddId).map(_.get(blockId)).flatten
+      case _ =>
+        _nonRddBlocks.get(blockId)
+    }
   }
 
-  /** Returns storage information of all RDDs in the given list. */
-  def rddInfoFromStorageStatus(
-      storageStatuses: Seq[StorageStatus],
-      rddInfos: Seq[RDDInfo],
-      updatedBlocks: Seq[(BlockId, BlockStatus)] = Seq.empty): Array[RDDInfo] = {
-
-    // Mapping from a block ID -> its status
-    val blockMap = mutable.Map(storageStatuses.flatMap(_.rddBlocks): _*)
-
-    // Record updated blocks, if any
-    updatedBlocks
-      .collect { case (id: RDDBlockId, status) => (id, status) }
-      .foreach { case (id, status) => blockMap(id) = status }
-
-    // Mapping from RDD ID -> an array of associated BlockStatuses
-    val rddBlockMap = blockMap
-      .groupBy { case (k, _) => k.rddId }
-      .mapValues(_.values.toArray)
-
-    // Mapping from RDD ID -> the associated RDDInfo (with potentially outdated storage information)
-    val rddInfoMap = rddInfos.map { info => (info.id, info) }.toMap
-
-    val rddStorageInfos = rddBlockMap.flatMap { case (rddId, blocks) =>
-      // Add up memory, disk and Tachyon sizes
-      val persistedBlocks =
-        blocks.filter { status => status.memSize + status.diskSize + status.tachyonSize > 0 }
-      val _storageLevel =
-        if (persistedBlocks.length > 0) persistedBlocks(0).storageLevel else StorageLevel.NONE
-      val memSize = persistedBlocks.map(_.memSize).reduceOption(_ + _).getOrElse(0L)
-      val diskSize = persistedBlocks.map(_.diskSize).reduceOption(_ + _).getOrElse(0L)
-      val tachyonSize = persistedBlocks.map(_.tachyonSize).reduceOption(_ + _).getOrElse(0L)
-      rddInfoMap.get(rddId).map { rddInfo =>
-        rddInfo.storageLevel = _storageLevel
-        rddInfo.numCachedPartitions = persistedBlocks.length
-        rddInfo.memSize = memSize
-        rddInfo.diskSize = diskSize
-        rddInfo.tachyonSize = tachyonSize
-        rddInfo
-      }
-    }.toArray
+  /**
+   * Return the number of blocks stored in this block manager in O(RDDs) time.
+   * Note that this is much faster than `this.blocks.size`, which is O(blocks) time.
+   */
+  def numBlocks: Int = _nonRddBlocks.size + numRddBlocks
+
+  /**
+   * Return the number of RDD blocks stored in this block manager in O(RDDs) time.
+   * Note that this is much faster than `this.rddBlocks.size`, which is O(RDD blocks) time.
+   */
+  def numRddBlocks: Int = _rddBlocks.values.map(_.size).sum
 
-    scala.util.Sorting.quickSort(rddStorageInfos)
-    rddStorageInfos
+  /**
+   * Return the number of blocks that belong to the given RDD in O(1) time.
+   * Note that this is much faster than `this.rddBlocksById(rddId).size`, which is
+   * O(blocks in this RDD) time.
+   */
+  def numRddBlocksById(rddId: Int): Int = _rddBlocks.get(rddId).map(_.size).getOrElse(0)
+
+  /** Return the memory remaining in this block manager. */
+  def memRemaining: Long = maxMem - memUsed
+
+  /** Return the memory used by this block manager. */
+  def memUsed: Long = _nonRddStorageInfo._1 + _rddBlocks.keys.toSeq.map(memUsedByRdd).sum
+
+  /** Return the disk space used by this block manager. */
+  def diskUsed: Long = _nonRddStorageInfo._2 + _rddBlocks.keys.toSeq.map(diskUsedByRdd).sum
+
+  /** Return the off-heap space used by this block manager. */
+  def offHeapUsed: Long = _nonRddStorageInfo._3 + _rddBlocks.keys.toSeq.map(offHeapUsedByRdd).sum
+
+  /** Return the memory used by the given RDD in this block manager in O(1) time. */
+  def memUsedByRdd(rddId: Int): Long = _rddStorageInfo.get(rddId).map(_._1).getOrElse(0L)
+
+  /** Return the disk space used by the given RDD in this block manager in O(1) time. */
+  def diskUsedByRdd(rddId: Int): Long = _rddStorageInfo.get(rddId).map(_._2).getOrElse(0L)
+
+  /** Return the off-heap space used by the given RDD in this block manager in O(1) time. */
+  def offHeapUsedByRdd(rddId: Int): Long = _rddStorageInfo.get(rddId).map(_._3).getOrElse(0L)
+
+  /** Return the storage level, if any, used by the given RDD in this block manager. */
+  def rddStorageLevel(rddId: Int): Option[StorageLevel] = _rddStorageInfo.get(rddId).map(_._4)
+
+  /**
+   * Update the relevant storage info, taking into account any existing status for this block.
+   */
+  private def updateStorageInfo(blockId: BlockId, newBlockStatus: BlockStatus): Unit = {
+    val oldBlockStatus = getBlock(blockId).getOrElse(BlockStatus.empty)
+    val changeInMem = newBlockStatus.memSize - oldBlockStatus.memSize
+    val changeInDisk = newBlockStatus.diskSize - oldBlockStatus.diskSize
+    val changeInTachyon = newBlockStatus.tachyonSize - oldBlockStatus.tachyonSize
+    val level = newBlockStatus.storageLevel
+
+    // Compute new info from old info
+    val (oldMem, oldDisk, oldTachyon) = blockId match {
+      case RDDBlockId(rddId, _) =>
+        _rddStorageInfo.get(rddId)
+          .map { case (mem, disk, tachyon, _) => (mem, disk, tachyon) }
+          .getOrElse((0L, 0L, 0L))
+      case _ =>
+        _nonRddStorageInfo
+    }
+    val newMem = math.max(oldMem + changeInMem, 0L)
+    val newDisk = math.max(oldDisk + changeInDisk, 0L)
+    val newTachyon = math.max(oldTachyon + changeInTachyon, 0L)
+
+    // Set the correct info
+    blockId match {
+      case RDDBlockId(rddId, _) =>
+        // If this RDD is no longer persisted, remove it
+        if (newMem + newDisk + newTachyon == 0) {
+          _rddStorageInfo.remove(rddId)
+        } else {
+          _rddStorageInfo(rddId) = (newMem, newDisk, newTachyon, level)
+        }
+      case _ =>
+        _nonRddStorageInfo = (newMem, newDisk, newTachyon)
+    }
   }
 
-  /** Returns a mapping from BlockId to the locations of the associated block. */
-  def blockLocationsFromStorageStatus(
-      storageStatuses: Seq[StorageStatus]): Map[BlockId, Seq[String]] = {
-    val blockLocationPairs = storageStatuses.flatMap { storageStatus =>
-      storageStatus.blocks.map { case (bid, _) => (bid, storageStatus.blockManagerId.hostPort) }
+}
+
+/** Helper methods for storage-related objects. */
+private[spark] object StorageUtils {
+
+  /**
+   * Update the given list of RDDInfo with the given list of storage statuses.
+   * This method overwrites the old values stored in the RDDInfo's.
+   */
+  def updateRddInfo(rddInfos: Seq[RDDInfo], statuses: Seq[StorageStatus]): Unit = {
+    rddInfos.foreach { rddInfo =>
+      val rddId = rddInfo.id
+      // Assume all blocks belonging to the same RDD have the same storage level
+      val storageLevel = statuses
+        .flatMap(_.rddStorageLevel(rddId)).headOption.getOrElse(StorageLevel.NONE)
+      val numCachedPartitions = statuses.map(_.numRddBlocksById(rddId)).sum
+      val memSize = statuses.map(_.memUsedByRdd(rddId)).sum
+      val diskSize = statuses.map(_.diskUsedByRdd(rddId)).sum
+      val tachyonSize = statuses.map(_.offHeapUsedByRdd(rddId)).sum
+
+      rddInfo.storageLevel = storageLevel
+      rddInfo.numCachedPartitions = numCachedPartitions
+      rddInfo.memSize = memSize
+      rddInfo.diskSize = diskSize
+      rddInfo.tachyonSize = tachyonSize
     }
-    blockLocationPairs.toMap
-      .groupBy { case (blockId, _) => blockId }
-      .mapValues(_.values.toSeq)
   }
 
-  /** Filters the given list of StorageStatus by the given RDD ID. */
-  def filterStorageStatusByRDD(
-      storageStatuses: Seq[StorageStatus],
-      rddId: Int): Array[StorageStatus] = {
-    storageStatuses.map { status =>
-      val filteredBlocks = status.rddBlocks.filterKeys(_.rddId == rddId).toSeq
-      val filteredBlockMap = mutable.Map[BlockId, BlockStatus](filteredBlocks: _*)
-      new StorageStatus(status.blockManagerId, status.maxMem, filteredBlockMap)
-    }.toArray
+  /**
+   * Return a mapping from block ID to its locations for each block that belongs to the given RDD.
+   */
+  def getRddBlockLocations(rddId: Int, statuses: Seq[StorageStatus]): Map[BlockId, Seq[String]] = {
+    val blockLocations = new mutable.HashMap[BlockId, mutable.ListBuffer[String]]
+    statuses.foreach { status =>
+      status.rddBlocksById(rddId).foreach { case (bid, _) =>
+        val location = status.blockManagerId.hostPort
+        blockLocations.getOrElseUpdate(bid, mutable.ListBuffer.empty) += location
+      }
+    }
+    blockLocations
   }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
index a6cbe3aa440ff..6908a59a79e60 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
@@ -35,7 +35,7 @@ import org.apache.spark.util.Utils
  * @param rootDirs The directories to use for storing block files. Data will be hashed among these.
  */
 private[spark] class TachyonBlockManager(
-    shuffleManager: ShuffleBlockManager,
+    blockManager: BlockManager,
     rootDirs: String,
     val master: String)
   extends Logging {
@@ -49,7 +49,7 @@ private[spark] class TachyonBlockManager(
 
   private val MAX_DIR_CREATION_ATTEMPTS = 10
   private val subDirsPerTachyonDir =
-    shuffleManager.conf.get("spark.tachyonStore.subDirectories", "64").toInt
+    blockManager.conf.get("spark.tachyonStore.subDirectories", "64").toInt
 
   // Create one Tachyon directory for each path mentioned in spark.tachyonStore.folderName;
   // then, inside this directory, create multiple subdirectories that we will hash files into,
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
index d8ff4ff6bd42c..6dbad5ff0518e 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
@@ -20,8 +20,7 @@ package org.apache.spark.storage
 import java.io.IOException
 import java.nio.ByteBuffer
 
-import scala.collection.mutable.ArrayBuffer
-
+import com.google.common.io.ByteStreams
 import tachyon.client.{ReadType, WriteType}
 
 import org.apache.spark.Logging
@@ -30,7 +29,7 @@ import org.apache.spark.util.Utils
 /**
  * Stores BlockManager blocks on Tachyon.
  */
-private class TachyonStore(
+private[spark] class TachyonStore(
     blockManager: BlockManager,
     tachyonManager: TachyonBlockManager)
   extends BlockStore(blockManager: BlockManager) with Logging {
@@ -45,15 +44,15 @@ private class TachyonStore(
     putIntoTachyonStore(blockId, bytes, returnValues = true)
   }
 
-  override def putValues(
+  override def putArray(
       blockId: BlockId,
-      values: ArrayBuffer[Any],
+      values: Array[Any],
       level: StorageLevel,
       returnValues: Boolean): PutResult = {
-    putValues(blockId, values.toIterator, level, returnValues)
+    putIterator(blockId, values.toIterator, level, returnValues)
   }
 
-  override def putValues(
+  override def putIterator(
       blockId: BlockId,
       values: Iterator[Any],
       level: StorageLevel,
@@ -107,25 +106,17 @@ private class TachyonStore(
       return None
     }
     val is = file.getInStream(ReadType.CACHE)
-    var buffer: ByteBuffer = null
+    assert (is != null)
     try {
-      if (is != null) {
-        val size = file.length
-        val bs = new Array[Byte](size.asInstanceOf[Int])
-        val fetchSize = is.read(bs, 0, size.asInstanceOf[Int])
-        buffer = ByteBuffer.wrap(bs)
-        if (fetchSize != size) {
-          logWarning(s"Failed to fetch the block $blockId from Tachyon: Size $size " +
-            s"is not equal to fetched size $fetchSize")
-          return None
-        }
-      }
+      val size = file.length
+      val bs = new Array[Byte](size.asInstanceOf[Int])
+      ByteStreams.readFully(is, bs)
+      Some(ByteBuffer.wrap(bs))
     } catch {
       case ioe: IOException =>
         logWarning(s"Failed to fetch the block $blockId from Tachyon", ioe)
-        return None
+        None
     }
-    Some(buffer)
   }
 
   override def contains(blockId: BlockId): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala b/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala
deleted file mode 100644
index 328be158db680..0000000000000
--- a/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.storage
-
-import java.util.concurrent.ArrayBlockingQueue
-
-import akka.actor._
-import util.Random
-
-import org.apache.spark.{MapOutputTrackerMaster, SecurityManager, SparkConf}
-import org.apache.spark.scheduler.LiveListenerBus
-import org.apache.spark.serializer.KryoSerializer
-
-/**
- * This class tests the BlockManager and MemoryStore for thread safety and
- * deadlocks. It spawns a number of producer and consumer threads. Producer
- * threads continuously pushes blocks into the BlockManager and consumer
- * threads continuously retrieves the blocks form the BlockManager and tests
- * whether the block is correct or not.
- */
-private[spark] object ThreadingTest {
-
-  val numProducers = 5
-  val numBlocksPerProducer = 20000
-
-  private[spark] class ProducerThread(manager: BlockManager, id: Int) extends Thread {
-    val queue = new ArrayBlockingQueue[(BlockId, Seq[Int])](100)
-
-    override def run() {
-      for (i <- 1 to numBlocksPerProducer) {
-        val blockId = TestBlockId("b-" + id + "-" + i)
-        val blockSize = Random.nextInt(1000)
-        val block = (1 to blockSize).map(_ => Random.nextInt())
-        val level = randomLevel()
-        val startTime = System.currentTimeMillis()
-        manager.put(blockId, block.iterator, level, tellMaster = true)
-        println("Pushed block " + blockId + " in " + (System.currentTimeMillis - startTime) + " ms")
-        queue.add((blockId, block))
-      }
-      println("Producer thread " + id + " terminated")
-    }
-
-    def randomLevel(): StorageLevel = {
-      math.abs(Random.nextInt()) % 4 match {
-        case 0 => StorageLevel.MEMORY_ONLY
-        case 1 => StorageLevel.MEMORY_ONLY_SER
-        case 2 => StorageLevel.MEMORY_AND_DISK
-        case 3 => StorageLevel.MEMORY_AND_DISK_SER
-      }
-    }
-  }
-
-  private[spark] class ConsumerThread(
-      manager: BlockManager,
-      queue: ArrayBlockingQueue[(BlockId, Seq[Int])]
-    ) extends Thread {
-    var numBlockConsumed = 0
-
-    override def run() {
-      println("Consumer thread started")
-      while(numBlockConsumed < numBlocksPerProducer) {
-        val (blockId, block) = queue.take()
-        val startTime = System.currentTimeMillis()
-        manager.get(blockId) match {
-          case Some(retrievedBlock) =>
-            assert(retrievedBlock.data.toList.asInstanceOf[List[Int]] == block.toList,
-              "Block " + blockId + " did not match")
-            println("Got block " + blockId + " in " +
-              (System.currentTimeMillis - startTime) + " ms")
-          case None =>
-            assert(false, "Block " + blockId + " could not be retrieved")
-        }
-        numBlockConsumed += 1
-      }
-      println("Consumer thread terminated")
-    }
-  }
-
-  def main(args: Array[String]) {
-    System.setProperty("spark.kryoserializer.buffer.mb", "1")
-    val actorSystem = ActorSystem("test")
-    val conf = new SparkConf()
-    val serializer = new KryoSerializer(conf)
-    val blockManagerMaster = new BlockManagerMaster(
-      actorSystem.actorOf(Props(new BlockManagerMasterActor(true, conf, new LiveListenerBus))),
-      conf)
-    val blockManager = new BlockManager(
-      "<driver>", actorSystem, blockManagerMaster, serializer, 1024 * 1024, conf,
-      new SecurityManager(conf), new MapOutputTrackerMaster(conf))
-    val producers = (1 to numProducers).map(i => new ProducerThread(blockManager, i))
-    val consumers = producers.map(p => new ConsumerThread(blockManager, p.queue))
-    producers.foreach(_.start)
-    consumers.foreach(_.start)
-    producers.foreach(_.join)
-    consumers.foreach(_.join)
-    blockManager.stop()
-    blockManagerMaster.stop()
-    actorSystem.shutdown()
-    actorSystem.awaitTermination()
-    println("Everything stopped.")
-    println(
-      "It will take sometime for the JVM to clean all temporary files and shutdown. Sit tight.")
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index a2535e3c1c41f..2a27d49d2de05 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -21,9 +21,7 @@ import java.net.{InetSocketAddress, URL}
 import javax.servlet.DispatcherType
 import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 
-import scala.annotation.tailrec
 import scala.language.implicitConversions
-import scala.util.{Failure, Success, Try}
 import scala.xml.Node
 
 import org.eclipse.jetty.server.Server
@@ -93,7 +91,7 @@ private[spark] object JettyUtils extends Logging {
   def createServletHandler(
       path: String,
       servlet: HttpServlet,
-      basePath: String = ""): ServletContextHandler = {
+      basePath: String): ServletContextHandler = {
     val prefixedPath = attachPrefix(basePath, path)
     val contextHandler = new ServletContextHandler
     val holder = new ServletHolder(servlet)
@@ -147,15 +145,19 @@ private[spark] object JettyUtils extends Logging {
           val holder : FilterHolder = new FilterHolder()
           holder.setClassName(filter)
           // Get any parameters for each filter
-          val paramName = "spark." + filter + ".params"
-          val params = conf.get(paramName, "").split(',').map(_.trim()).toSet
-          params.foreach {
-            case param : String =>
+          conf.get("spark." + filter + ".params", "").split(',').map(_.trim()).toSet.foreach {
+            param: String =>
               if (!param.isEmpty) {
                 val parts = param.split("=")
                 if (parts.length == 2) holder.setInitParameter(parts(0), parts(1))
              }
           }
+
+          val prefix = s"spark.$filter.param."
+          conf.getAll
+            .filter { case (k, v) => k.length() > prefix.length() && k.startsWith(prefix) }
+            .foreach { case (k, v) => holder.setInitParameter(k.substring(prefix.length()), v) }
+
           val enumDispatcher = java.util.EnumSet.of(DispatcherType.ASYNC, DispatcherType.ERROR,
             DispatcherType.FORWARD, DispatcherType.INCLUDE, DispatcherType.REQUEST)
           handlers.foreach { case(handler) => handler.addFilter(holder, "/*", enumDispatcher) }
@@ -174,40 +176,32 @@ private[spark] object JettyUtils extends Logging {
       hostName: String,
       port: Int,
       handlers: Seq[ServletContextHandler],
-      conf: SparkConf): ServerInfo = {
+      conf: SparkConf,
+      serverName: String = ""): ServerInfo = {
 
     val collection = new ContextHandlerCollection
     collection.setHandlers(handlers.toArray)
     addFilters(handlers, conf)
 
-    @tailrec
+    // Bind to the given port, or throw a java.net.BindException if the port is occupied
     def connect(currentPort: Int): (Server, Int) = {
       val server = new Server(new InetSocketAddress(hostName, currentPort))
       val pool = new QueuedThreadPool
       pool.setDaemon(true)
       server.setThreadPool(pool)
       server.setHandler(collection)
-
-      Try {
+      try {
         server.start()
-      } match {
-        case s: Success[_] =>
-          (server, server.getConnectors.head.getLocalPort)
-        case f: Failure[_] =>
-          val nextPort = (currentPort + 1) % 65536
+        (server, server.getConnectors.head.getLocalPort)
+      } catch {
+        case e: Exception =>
           server.stop()
           pool.stop()
-          val msg = s"Failed to create UI on port $currentPort. Trying again on port $nextPort."
-          if (f.toString.contains("Address already in use")) {
-            logWarning(s"$msg - $f")
-          } else {
-            logError(msg, f.exception)
-          }
-          connect(nextPort)
+          throw e
       }
     }
 
-    val (server, boundPort) = connect(port)
+    val (server, boundPort) = Utils.startServiceOnPort[Server](port, connect, serverName)
     ServerInfo(server, boundPort, collection)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index 097a1b81e1dd1..049938f827291 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -21,47 +21,30 @@ import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext}
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.StorageStatusListener
 import org.apache.spark.ui.JettyUtils._
-import org.apache.spark.ui.env.EnvironmentTab
-import org.apache.spark.ui.exec.ExecutorsTab
-import org.apache.spark.ui.jobs.JobProgressTab
-import org.apache.spark.ui.storage.StorageTab
+import org.apache.spark.ui.env.{EnvironmentListener, EnvironmentTab}
+import org.apache.spark.ui.exec.{ExecutorsListener, ExecutorsTab}
+import org.apache.spark.ui.jobs.{JobProgressListener, JobProgressTab}
+import org.apache.spark.ui.storage.{StorageListener, StorageTab}
 
 /**
  * Top level user interface for a Spark application.
  */
-private[spark] class SparkUI(
-    val sc: SparkContext,
+private[spark] class SparkUI private (
+    val sc: Option[SparkContext],
     val conf: SparkConf,
     val securityManager: SecurityManager,
-    val listenerBus: SparkListenerBus,
+    val environmentListener: EnvironmentListener,
+    val storageStatusListener: StorageStatusListener,
+    val executorsListener: ExecutorsListener,
+    val jobProgressListener: JobProgressListener,
+    val storageListener: StorageListener,
     var appName: String,
-    val basePath: String = "")
-  extends WebUI(securityManager, SparkUI.getUIPort(conf), conf, basePath)
+    val basePath: String)
+  extends WebUI(securityManager, SparkUI.getUIPort(conf), conf, basePath, "SparkUI")
   with Logging {
 
-  def this(sc: SparkContext) = this(sc, sc.conf, sc.env.securityManager, sc.listenerBus, sc.appName)
-  def this(conf: SparkConf, listenerBus: SparkListenerBus, appName: String, basePath: String) =
-    this(null, conf, new SecurityManager(conf), listenerBus, appName, basePath)
-
-  def this(
-      conf: SparkConf,
-      securityManager: SecurityManager,
-      listenerBus: SparkListenerBus,
-      appName: String,
-      basePath: String) =
-    this(null, conf, securityManager, listenerBus, appName, basePath)
-
-  // If SparkContext is not provided, assume the associated application is not live
-  val live = sc != null
-
-  // Maintain executor storage status through Spark events
-  val storageStatusListener = new StorageStatusListener
-
-  initialize()
-
   /** Initialize all components of the server. */
   def initialize() {
-    listenerBus.addListener(storageStatusListener)
     val jobProgressTab = new JobProgressTab(this)
     attachTab(jobProgressTab)
     attachTab(new StorageTab(this))
@@ -71,21 +54,18 @@ private[spark] class SparkUI(
     attachHandler(createRedirectHandler("/", "/stages", basePath = basePath))
     attachHandler(
       createRedirectHandler("/stages/stage/kill", "/stages", jobProgressTab.handleKillRequest))
-    if (live) {
-      sc.env.metricsSystem.getServletHandlers.foreach(attachHandler)
-    }
+    // If the UI is live, then serve
+    sc.foreach { _.env.metricsSystem.getServletHandlers.foreach(attachHandler) }
   }
+  initialize()
+
+  def getAppName = appName
 
   /** Set the app name for this UI. */
   def setAppName(name: String) {
     appName = name
   }
 
-  /** Register the given listener with the listener bus. */
-  def registerListener(listener: SparkListener) {
-    listenerBus.addListener(listener)
-  }
-
   /** Stop the server behind this web interface. Only valid after bind(). */
   override def stop() {
     super.stop()
@@ -100,6 +80,13 @@ private[spark] class SparkUI(
   private[spark] def appUIAddress = s"http://$appUIHostPort"
 }
 
+private[spark] abstract class SparkUITab(parent: SparkUI, prefix: String)
+  extends WebUITab(parent, prefix) {
+
+  def appName: String = parent.getAppName
+
+}
+
 private[spark] object SparkUI {
   val DEFAULT_PORT = 4040
   val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static"
@@ -107,4 +94,60 @@ private[spark] object SparkUI {
   def getUIPort(conf: SparkConf): Int = {
     conf.getInt("spark.ui.port", SparkUI.DEFAULT_PORT)
   }
+
+  def createLiveUI(
+      sc: SparkContext,
+      conf: SparkConf,
+      listenerBus: SparkListenerBus,
+      jobProgressListener: JobProgressListener,
+      securityManager: SecurityManager,
+      appName: String): SparkUI =  {
+    create(Some(sc), conf, listenerBus, securityManager, appName,
+      jobProgressListener = Some(jobProgressListener))
+  }
+
+  def createHistoryUI(
+      conf: SparkConf,
+      listenerBus: SparkListenerBus,
+      securityManager: SecurityManager,
+      appName: String,
+      basePath: String): SparkUI = {
+    create(None, conf, listenerBus, securityManager, appName, basePath)
+  }
+
+  /**
+   * Create a new Spark UI.
+   *
+   * @param sc optional SparkContext; this can be None when reconstituting a UI from event logs.
+   * @param jobProgressListener if supplied, this JobProgressListener will be used; otherwise, the
+   *                            web UI will create and register its own JobProgressListener.
+   */
+  private def create(
+      sc: Option[SparkContext],
+      conf: SparkConf,
+      listenerBus: SparkListenerBus,
+      securityManager: SecurityManager,
+      appName: String,
+      basePath: String = "",
+      jobProgressListener: Option[JobProgressListener] = None): SparkUI = {
+
+    val _jobProgressListener: JobProgressListener = jobProgressListener.getOrElse {
+      val listener = new JobProgressListener(conf)
+      listenerBus.addListener(listener)
+      listener
+    }
+
+    val environmentListener = new EnvironmentListener
+    val storageStatusListener = new StorageStatusListener
+    val executorsListener = new ExecutorsListener(storageStatusListener)
+    val storageListener = new StorageListener(storageStatusListener)
+
+    listenerBus.addListener(environmentListener)
+    listenerBus.addListener(storageStatusListener)
+    listenerBus.addListener(executorsListener)
+    listenerBus.addListener(storageListener)
+
+    new SparkUI(sc, conf, securityManager, environmentListener, storageStatusListener,
+      executorsListener, _jobProgressListener, storageListener, appName, basePath)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
new file mode 100644
index 0000000000000..6f446c5a95a0a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui
+
+private[spark] object ToolTips {
+  val SCHEDULER_DELAY =
+    """Scheduler delay includes time to ship the task from the scheduler to
+       the executor, and time to send the task result from the executor to the scheduler. If
+       scheduler delay is large, consider decreasing the size of tasks or decreasing the size
+       of task results."""
+
+  val TASK_DESERIALIZATION_TIME =
+    """Time spent deserializating the task closure on the executor."""
+
+  val INPUT = "Bytes read from Hadoop or from Spark storage."
+
+  val OUTPUT = "Bytes written to Hadoop."
+
+  val SHUFFLE_WRITE = "Bytes written to disk in order to be read by a shuffle in a future stage."
+
+  val SHUFFLE_READ =
+    """Bytes read from remote executors. Typically less than shuffle write bytes
+       because this does not include shuffle data read locally."""
+
+  val GETTING_RESULT_TIME =
+    """Time that the driver spends fetching task results from workers. If this is large, consider
+       decreasing the amount of data returned from each task."""
+
+  val RESULT_SERIALIZATION_TIME =
+    """Time spent serializing the task result on the executor before sending it back to the
+       driver."""
+
+  val GC_TIME =
+    """Time that the executor spent paused for Java garbage collection while the task was
+       running."""
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 1b104253d545d..7bc1e24d58711 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -20,11 +20,13 @@ package org.apache.spark.ui
 import java.text.SimpleDateFormat
 import java.util.{Locale, Date}
 
-import scala.xml.Node
+import scala.xml.{Node, Text}
+
 import org.apache.spark.Logging
 
 /** Utility functions for generating XML pages with spark content. */
 private[spark] object UIUtils extends Logging {
+  val TABLE_CLASS = "table table-bordered table-striped-custom table-condensed sortable"
 
   // SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
   private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
@@ -135,34 +137,51 @@ private[spark] object UIUtils extends Logging {
   }
 
   // Yarn has to go through a proxy so the base uri is provided and has to be on all links
-  val uiRoot : String = Option(System.getenv("APPLICATION_WEB_PROXY_BASE")).getOrElse("")
+  def uiRoot: String = {
+    if (System.getenv("APPLICATION_WEB_PROXY_BASE") != null) {
+      System.getenv("APPLICATION_WEB_PROXY_BASE")
+    } else if (System.getProperty("spark.ui.proxyBase") != null) {
+      System.getProperty("spark.ui.proxyBase")
+    }
+    else {
+      ""
+    }
+  }
 
   def prependBaseUri(basePath: String = "", resource: String = "") = uiRoot + basePath + resource
 
+  def commonHeaderNodes = {
+    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
+    <link rel="stylesheet" href={prependBaseUri("/static/bootstrap.min.css")}
+          type="text/css" />
+    <link rel="stylesheet" href={prependBaseUri("/static/webui.css")}
+          type="text/css" />
+    <script src={prependBaseUri("/static/sorttable.js")} ></script>
+    <script src={prependBaseUri("/static/jquery-1.11.1.min.js")}></script>
+    <script src={prependBaseUri("/static/bootstrap-tooltip.js")}></script>
+    <script src={prependBaseUri("/static/initialize-tooltips.js")}></script>
+    <script src={prependBaseUri("/static/table.js")}></script>
+    <script src={prependBaseUri("/static/additional-metrics.js")}></script>
+  }
+
   /** Returns a spark page with correctly formatted headers */
   def headerSparkPage(
-      content: => Seq[Node],
-      basePath: String,
-      appName: String,
       title: String,
-      tabs: Seq[WebUITab],
-      activeTab: WebUITab,
+      content: => Seq[Node],
+      activeTab: SparkUITab,
       refreshInterval: Option[Int] = None): Seq[Node] = {
 
-    val header = tabs.map { tab =>
+    val appName = activeTab.appName
+    val shortAppName = if (appName.length < 36) appName else appName.take(32) + "..."
+    val header = activeTab.headerTabs.map { tab =>
       <li class={if (tab == activeTab) "active" else ""}>
-        <a href={prependBaseUri(basePath, "/" + tab.prefix)}>{tab.name}</a>
+        <a href={prependBaseUri(activeTab.basePath, "/" + tab.prefix + "/")}>{tab.name}</a>
       </li>
     }
 
     <html>
       <head>
-        <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
-        <link rel="stylesheet" href={prependBaseUri("/static/bootstrap.min.css")}
-              type="text/css" />
-        <link rel="stylesheet" href={prependBaseUri("/static/webui.css")}
-              type="text/css" />
-        <script src={prependBaseUri("/static/sorttable.js")} ></script>
+        {commonHeaderNodes}
         <title>{appName} - {title}</title>
       </head>
       <body>
@@ -172,7 +191,9 @@ private[spark] object UIUtils extends Logging {
               <img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")} />
             </a>
             <ul class="nav">{header}</ul>
-            <p class="navbar-text pull-right"><strong>{appName}</strong> application UI</p>
+            <p class="navbar-text pull-right">
+              <strong title={appName}>{shortAppName}</strong> application UI
+            </p>
           </div>
         </div>
         <div class="container-fluid">
@@ -193,11 +214,7 @@ private[spark] object UIUtils extends Logging {
   def basicSparkPage(content: => Seq[Node], title: String): Seq[Node] = {
     <html>
       <head>
-        <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
-        <link rel="stylesheet" href={prependBaseUri("/static/bootstrap.min.css")}
-              type="text/css" />
-        <link rel="stylesheet" href={prependBaseUri("/static/webui.css")}  type="text/css" />
-        <script src={prependBaseUri("/static/sorttable.js")} ></script>
+        {commonHeaderNodes}
         <title>{title}</title>
       </head>
       <body>
@@ -205,8 +222,10 @@ private[spark] object UIUtils extends Logging {
           <div class="row-fluid">
             <div class="span12">
               <h3 style="vertical-align: middle; display: inline-block;">
-                <img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")}
-                     style="margin-right: 15px;" />
+                <a style="text-decoration: none" href={prependBaseUri("/")}>
+                  <img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")}
+                       style="margin-right: 15px;" />
+                </a>
                 {title}
               </h3>
             </div>
@@ -221,32 +240,43 @@ private[spark] object UIUtils extends Logging {
   def listingTable[T](
       headers: Seq[String],
       generateDataRow: T => Seq[Node],
-      data: Seq[T],
-      fixedWidth: Boolean = false): Seq[Node] = {
+      data: Iterable[T],
+      fixedWidth: Boolean = false,
+      id: Option[String] = None,
+      headerClasses: Seq[String] = Seq.empty): Seq[Node] = {
 
-    var tableClass = "table table-bordered table-striped table-condensed sortable"
+    var listingTableClass = TABLE_CLASS
     if (fixedWidth) {
-      tableClass += " table-fixed"
+      listingTableClass += " table-fixed"
     }
     val colWidth = 100.toDouble / headers.size
     val colWidthAttr = if (fixedWidth) colWidth + "%" else ""
-    val headerRow: Seq[Node] = {
-      // if none of the headers have "\n" in them
-      if (headers.forall(!_.contains("\n"))) {
-        // represent header as simple text
-        headers.map(h => <th width={colWidthAttr}>{h}</th>)
+
+    def getClass(index: Int): String = {
+      if (index < headerClasses.size) {
+        headerClasses(index)
       } else {
-        // represent header text as list while respecting "\n"
-        headers.map { case h =>
-          <th width={colWidthAttr}>
-            <ul class ="unstyled">
-              { h.split("\n").map { case t => <li> {t} </li> } }
-            </ul>
-          </th>
-        }
+        ""
+      }
+    }
+
+    val newlinesInHeader = headers.exists(_.contains("\n"))
+    def getHeaderContent(header: String): Seq[Node] = {
+      if (newlinesInHeader) {
+        <ul class="unstyled">
+          { header.split("\n").map { case t => <li> {t} </li> } }
+        </ul>
+      } else {
+        Text(header)
+      }
+    }
+
+    val headerRow: Seq[Node] = {
+      headers.view.zipWithIndex.map { x =>
+        <th width={colWidthAttr} class={getClass(x._2)}>{getHeaderContent(x._1)}</th>
       }
     }
-    <table class={tableClass}>
+    <table class={listingTableClass} id={id.map(Text.apply)}>
       <thead>{headerRow}</thead>
       <tbody>
         {data.map(r => generateDataRow(r))}
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index 856273e1d4e21..9be65a4a39a09 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -39,7 +39,8 @@ private[spark] abstract class WebUI(
     securityManager: SecurityManager,
     port: Int,
     conf: SparkConf,
-    basePath: String = "")
+    basePath: String = "",
+    name: String = "")
   extends Logging {
 
   protected val tabs = ArrayBuffer[WebUITab]()
@@ -49,6 +50,7 @@ private[spark] abstract class WebUI(
   protected val publicHostName = Option(System.getenv("SPARK_PUBLIC_DNS")).getOrElse(localHostName)
   private val className = Utils.getFormattedClassName(this)
 
+  def getBasePath: String = basePath
   def getTabs: Seq[WebUITab] = tabs.toSeq
   def getHandlers: Seq[ServletContextHandler] = handlers.toSeq
   def getSecurityManager: SecurityManager = securityManager
@@ -80,7 +82,7 @@ private[spark] abstract class WebUI(
   }
 
   /** Detach a handler from this UI. */
-  def detachHandler(handler: ServletContextHandler) {
+  protected def detachHandler(handler: ServletContextHandler) {
     handlers -= handler
     serverInfo.foreach { info =>
       info.rootHandler.removeHandler(handler)
@@ -97,7 +99,7 @@ private[spark] abstract class WebUI(
   def bind() {
     assert(!serverInfo.isDefined, "Attempted to bind %s more than once!".format(className))
     try {
-      serverInfo = Some(startJettyServer("0.0.0.0", port, handlers, conf))
+      serverInfo = Some(startJettyServer("0.0.0.0", port, handlers, conf, name))
       logInfo("Started %s at http://%s:%d".format(className, publicHostName, boundPort))
     } catch {
       case e: Exception =>
@@ -134,6 +136,8 @@ private[spark] abstract class WebUITab(parent: WebUI, val prefix: String) {
 
   /** Get a list of header tabs from the parent UI. */
   def headerTabs: Seq[WebUITab] = parent.getTabs
+
+  def basePath: String = parent.getBasePath
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
index b347eb1b83c1f..f0a1174a71d34 100644
--- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
@@ -24,8 +24,6 @@ import scala.xml.Node
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 
 private[ui] class EnvironmentPage(parent: EnvironmentTab) extends WebUIPage("") {
-  private val appName = parent.appName
-  private val basePath = parent.basePath
   private val listener = parent.listener
 
   def render(request: HttpServletRequest): Seq[Node] = {
@@ -45,7 +43,7 @@ private[ui] class EnvironmentPage(parent: EnvironmentTab) extends WebUIPage("")
         <h4>Classpath Entries</h4> {classpathEntriesTable}
       </span>
 
-    UIUtils.headerSparkPage(content, basePath, appName, "Environment", parent.headerTabs, parent)
+    UIUtils.headerSparkPage("Environment", content, parent)
   }
 
   private def propertyHeader = Seq("Name", "Value")
diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala
index bbbe55ecf44a1..f62260c6f6e1d 100644
--- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala
@@ -21,13 +21,9 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.scheduler._
 import org.apache.spark.ui._
 
-private[ui] class EnvironmentTab(parent: SparkUI) extends WebUITab(parent, "environment") {
-  val appName = parent.appName
-  val basePath = parent.basePath
-  val listener = new EnvironmentListener
-
+private[ui] class EnvironmentTab(parent: SparkUI) extends SparkUITab(parent, "environment") {
+  val listener = parent.environmentListener
   attachPage(new EnvironmentPage(this))
-  parent.registerListener(listener)
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
new file mode 100644
index 0000000000000..c82730f524eb7
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.exec
+
+import java.net.URLDecoder
+import javax.servlet.http.HttpServletRequest
+
+import scala.util.Try
+import scala.xml.{Text, Node}
+
+import org.apache.spark.ui.{UIUtils, WebUIPage}
+
+private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage("threadDump") {
+
+  private val sc = parent.sc
+
+  def render(request: HttpServletRequest): Seq[Node] = {
+    val executorId = Option(request.getParameter("executorId")).map {
+      executorId =>
+        // Due to YARN-2844, "<driver>" in the url will be encoded to "%25253Cdriver%25253E" when
+        // running in yarn-cluster mode. `request.getParameter("executorId")` will return
+        // "%253Cdriver%253E". Therefore we need to decode it until we get the real id.
+        var id = executorId
+        var decodedId = URLDecoder.decode(id, "UTF-8")
+        while (id != decodedId) {
+          id = decodedId
+          decodedId = URLDecoder.decode(id, "UTF-8")
+        }
+        id
+    }.getOrElse {
+      return Text(s"Missing executorId parameter")
+    }
+    val time = System.currentTimeMillis()
+    val maybeThreadDump = sc.get.getExecutorThreadDump(executorId)
+
+    val content = maybeThreadDump.map { threadDump =>
+      val dumpRows = threadDump.map { thread =>
+        <div class="accordion-group">
+          <div class="accordion-heading" onclick="$(this).next().toggleClass('hidden')">
+            <a class="accordion-toggle">
+              Thread {thread.threadId}: {thread.threadName} ({thread.threadState})
+            </a>
+          </div>
+          <div class="accordion-body hidden">
+            <div class="accordion-inner">
+              <pre>{thread.stackTrace}</pre>
+            </div>
+          </div>
+        </div>
+      }
+
+      <div class="row-fluid">
+        <p>Updated at {UIUtils.formatDate(time)}</p>
+        {
+          // scalastyle:off
+          <p><a class="expandbutton"
+                onClick="$('.accordion-body').removeClass('hidden'); $('.expandbutton').toggleClass('hidden')">
+            Expand All
+          </a></p>
+          <p><a class="expandbutton hidden"
+                onClick="$('.accordion-body').addClass('hidden'); $('.expandbutton').toggleClass('hidden')">
+            Collapse All
+          </a></p>
+          // scalastyle:on
+        }
+        <div class="accordion">{dumpRows}</div>
+      </div>
+    }.getOrElse(Text("Error fetching thread dump"))
+    UIUtils.headerSparkPage(s"Thread dump for executor $executorId", content, parent)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
index 95b4a4e91d333..71b59b1d078ca 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
@@ -17,27 +17,74 @@
 
 package org.apache.spark.ui.exec
 
+import java.net.URLEncoder
 import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.ui.{WebUIPage, UIUtils}
+import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
-private[ui] class ExecutorsPage(parent: ExecutorsTab) extends WebUIPage("") {
-  private val appName = parent.appName
-  private val basePath = parent.basePath
+/** Summary information about an executor to display in the UI. */
+private case class ExecutorSummaryInfo(
+    id: String,
+    hostPort: String,
+    rddBlocks: Int,
+    memoryUsed: Long,
+    diskUsed: Long,
+    activeTasks: Int,
+    failedTasks: Int,
+    completedTasks: Int,
+    totalTasks: Int,
+    totalDuration: Long,
+    totalInputBytes: Long,
+    totalShuffleRead: Long,
+    totalShuffleWrite: Long,
+    maxMemory: Long)
+
+private[ui] class ExecutorsPage(
+    parent: ExecutorsTab,
+    threadDumpEnabled: Boolean)
+  extends WebUIPage("") {
   private val listener = parent.listener
 
   def render(request: HttpServletRequest): Seq[Node] = {
     val storageStatusList = listener.storageStatusList
-    val maxMem = storageStatusList.map(_.maxMem).fold(0L)(_ + _)
-    val memUsed = storageStatusList.map(_.memUsed).fold(0L)(_ + _)
-    val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize)).fold(0L)(_ + _)
+    val maxMem = storageStatusList.map(_.maxMem).sum
+    val memUsed = storageStatusList.map(_.memUsed).sum
+    val diskUsed = storageStatusList.map(_.diskUsed).sum
     val execInfo = for (statusId <- 0 until storageStatusList.size) yield getExecInfo(statusId)
-    val execInfoSorted = execInfo.sortBy(_.getOrElse("Executor ID", ""))
-    val execTable = UIUtils.listingTable(execHeader, execRow, execInfoSorted)
+    val execInfoSorted = execInfo.sortBy(_.id)
+
+    val execTable =
+      <table class={UIUtils.TABLE_CLASS}>
+        <thead>
+          <th>Executor ID</th>
+          <th>Address</th>
+          <th>RDD Blocks</th>
+          <th>Memory Used</th>
+          <th>Disk Used</th>
+          <th>Active Tasks</th>
+          <th>Failed Tasks</th>
+          <th>Complete Tasks</th>
+          <th>Total Tasks</th>
+          <th>Task Time</th>
+          <th><span data-toggle="tooltip" title={ToolTips.INPUT}>Input</span></th>
+          <th><span data-toggle="tooltip" title={ToolTips.SHUFFLE_READ}>Shuffle Read</span></th>
+          <th>
+            <!-- Place the shuffle write tooltip on the left (rather than the default position
+              of on top) because the shuffle write column is the last column on the right side and
+              the tooltip is wider than the column, so it doesn't fit on top. -->
+            <span data-toggle="tooltip" data-placement="left" title={ToolTips.SHUFFLE_WRITE}>
+              Shuffle Write
+            </span>
+          </th>
+          {if (threadDumpEnabled) <th class="sorttable_nosort">Thread Dump</th> else Seq.empty}
+        </thead>
+        <tbody>
+          {execInfoSorted.map(execRow)}
+        </tbody>
+      </table>
 
     val content =
       <div class="row-fluid">
@@ -46,7 +93,7 @@ private[ui] class ExecutorsPage(parent: ExecutorsTab) extends WebUIPage("") {
             <li><strong>Memory:</strong>
               {Utils.bytesToString(memUsed)} Used
               ({Utils.bytesToString(maxMem)} Total) </li>
-            <li><strong>Disk:</strong> {Utils.bytesToString(diskSpaceUsed)} Used </li>
+            <li><strong>Disk:</strong> {Utils.bytesToString(diskUsed)} Used </li>
           </ul>
         </div>
       </div>
@@ -56,61 +103,60 @@ private[ui] class ExecutorsPage(parent: ExecutorsTab) extends WebUIPage("") {
         </div>
       </div>;
 
-    UIUtils.headerSparkPage(content, basePath, appName, "Executors (" + execInfo.size + ")",
-      parent.headerTabs, parent)
+    UIUtils.headerSparkPage("Executors (" + execInfo.size + ")", content, parent)
   }
 
-  /** Header fields for the executors table */
-  private def execHeader = Seq(
-    "Executor ID",
-    "Address",
-    "RDD Blocks",
-    "Memory Used",
-    "Disk Used",
-    "Active Tasks",
-    "Failed Tasks",
-    "Complete Tasks",
-    "Total Tasks",
-    "Task Time",
-    "Input Bytes",
-    "Shuffle Read",
-    "Shuffle Write")
-
   /** Render an HTML row representing an executor */
-  private def execRow(values: Map[String, String]): Seq[Node] = {
-    val maximumMemory = values("Maximum Memory")
-    val memoryUsed = values("Memory Used")
-    val diskUsed = values("Disk Used")
-    // scalastyle:off
+  private def execRow(info: ExecutorSummaryInfo): Seq[Node] = {
+    val maximumMemory = info.maxMemory
+    val memoryUsed = info.memoryUsed
+    val diskUsed = info.diskUsed
     <tr>
-      <td>{values("Executor ID")}</td>
-      <td>{values("Address")}</td>
-      <td>{values("RDD Blocks")}</td>
-      <td sorttable_customkey={memoryUsed}>
-        {Utils.bytesToString(memoryUsed.toLong)} /
-        {Utils.bytesToString(maximumMemory.toLong)}
+      <td>{info.id}</td>
+      <td>{info.hostPort}</td>
+      <td>{info.rddBlocks}</td>
+      <td sorttable_customkey={memoryUsed.toString}>
+        {Utils.bytesToString(memoryUsed)} /
+        {Utils.bytesToString(maximumMemory)}
+      </td>
+      <td sorttable_customkey={diskUsed.toString}>
+        {Utils.bytesToString(diskUsed)}
+      </td>
+      <td>{info.activeTasks}</td>
+      <td>{info.failedTasks}</td>
+      <td>{info.completedTasks}</td>
+      <td>{info.totalTasks}</td>
+      <td sorttable_customkey={info.totalDuration.toString}>
+        {Utils.msDurationToString(info.totalDuration)}
       </td>
-      <td sorttable_customkey={diskUsed}>
-        {Utils.bytesToString(diskUsed.toLong)}
+      <td sorttable_customkey={info.totalInputBytes.toString}>
+        {Utils.bytesToString(info.totalInputBytes)}
       </td>
-      <td>{values("Active Tasks")}</td>
-      <td>{values("Failed Tasks")}</td>
-      <td>{values("Complete Tasks")}</td>
-      <td>{values("Total Tasks")}</td>
-      <td sorttable_customkey={values("Task Time")}>{Utils.msDurationToString(values("Task Time").toLong)}</td>
-      <td sorttable_customkey={values("Input Bytes")}>{Utils.bytesToString(values("Input Bytes").toLong)}</td>
-      <td sorttable_customkey={values("Shuffle Read")}>{Utils.bytesToString(values("Shuffle Read").toLong)}</td>
-      <td sorttable_customkey={values("Shuffle Write")} >{Utils.bytesToString(values("Shuffle Write").toLong)}</td>
+      <td sorttable_customkey={info.totalShuffleRead.toString}>
+        {Utils.bytesToString(info.totalShuffleRead)}
+      </td>
+      <td sorttable_customkey={info.totalShuffleWrite.toString}>
+        {Utils.bytesToString(info.totalShuffleWrite)}
+      </td>
+      {
+        if (threadDumpEnabled) {
+          val encodedId = URLEncoder.encode(info.id, "UTF-8")
+          <td>
+            <a href={s"threadDump/?executorId=${encodedId}"}>Thread Dump</a>
+          </td>
+        } else {
+          Seq.empty
+        }
+      }
     </tr>
-    // scalastyle:on
   }
 
   /** Represent an executor's info as a map given a storage status index */
-  private def getExecInfo(statusId: Int): Map[String, String] = {
+  private def getExecInfo(statusId: Int): ExecutorSummaryInfo = {
     val status = listener.storageStatusList(statusId)
     val execId = status.blockManagerId.executorId
     val hostPort = status.blockManagerId.hostPort
-    val rddBlocks = status.blocks.size
+    val rddBlocks = status.numBlocks
     val memUsed = status.memUsed
     val maxMem = status.maxMem
     val diskUsed = status.diskUsed
@@ -118,15 +164,12 @@ private[ui] class ExecutorsPage(parent: ExecutorsTab) extends WebUIPage("") {
     val failedTasks = listener.executorToTasksFailed.getOrElse(execId, 0)
     val completedTasks = listener.executorToTasksComplete.getOrElse(execId, 0)
     val totalTasks = activeTasks + failedTasks + completedTasks
-    val totalDuration = listener.executorToDuration.getOrElse(execId, 0)
-    val totalInputBytes = listener.executorToInputBytes.getOrElse(execId, 0)
-    val totalShuffleRead = listener.executorToShuffleRead.getOrElse(execId, 0)
-    val totalShuffleWrite = listener.executorToShuffleWrite.getOrElse(execId, 0)
+    val totalDuration = listener.executorToDuration.getOrElse(execId, 0L)
+    val totalInputBytes = listener.executorToInputBytes.getOrElse(execId, 0L)
+    val totalShuffleRead = listener.executorToShuffleRead.getOrElse(execId, 0L)
+    val totalShuffleWrite = listener.executorToShuffleWrite.getOrElse(execId, 0L)
 
-    // Also include fields not in the header
-    val execFields = execHeader ++ Seq("Maximum Memory")
-
-    val execValues = Seq(
+    new ExecutorSummaryInfo(
       execId,
       hostPort,
       rddBlocks,
@@ -141,8 +184,6 @@ private[ui] class ExecutorsPage(parent: ExecutorsTab) extends WebUIPage("") {
       totalShuffleRead,
       totalShuffleWrite,
       maxMem
-    ).map(_.toString)
-
-    execFields.zip(execValues).toMap
+    )
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
index 5c2d1d1fe75d3..dd1c2b78c4094 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
@@ -23,15 +23,18 @@ import org.apache.spark.ExceptionFailure
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.StorageStatusListener
-import org.apache.spark.ui.{SparkUI, WebUITab}
+import org.apache.spark.ui.{SparkUI, SparkUITab}
 
-private[ui] class ExecutorsTab(parent: SparkUI) extends WebUITab(parent, "executors") {
-  val appName = parent.appName
-  val basePath = parent.basePath
-  val listener = new ExecutorsListener(parent.storageStatusListener)
+private[ui] class ExecutorsTab(parent: SparkUI) extends SparkUITab(parent, "executors") {
+  val listener = parent.executorsListener
+  val sc = parent.sc
+  val threadDumpEnabled =
+    sc.isDefined && parent.conf.getBoolean("spark.ui.threadDumpsEnabled", true)
 
-  attachPage(new ExecutorsPage(this))
-  parent.registerListener(listener)
+  attachPage(new ExecutorsPage(this, threadDumpEnabled))
+  if (threadDumpEnabled) {
+    attachPage(new ExecutorThreadDumpPage(this))
+  }
 }
 
 /**
@@ -45,20 +48,21 @@ class ExecutorsListener(storageStatusListener: StorageStatusListener) extends Sp
   val executorToTasksFailed = HashMap[String, Int]()
   val executorToDuration = HashMap[String, Long]()
   val executorToInputBytes = HashMap[String, Long]()
+  val executorToOutputBytes = HashMap[String, Long]()
   val executorToShuffleRead = HashMap[String, Long]()
   val executorToShuffleWrite = HashMap[String, Long]()
 
   def storageStatusList = storageStatusListener.storageStatusList
 
   override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized {
-    val eid = formatExecutorId(taskStart.taskInfo.executorId)
+    val eid = taskStart.taskInfo.executorId
     executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 0) + 1
   }
 
   override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized {
     val info = taskEnd.taskInfo
     if (info != null) {
-      val eid = formatExecutorId(info.executorId)
+      val eid = info.executorId
       executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 1) - 1
       executorToDuration(eid) = executorToDuration.getOrElse(eid, 0L) + info.duration
       taskEnd.reason match {
@@ -75,6 +79,10 @@ class ExecutorsListener(storageStatusListener: StorageStatusListener) extends Sp
           executorToInputBytes(eid) =
             executorToInputBytes.getOrElse(eid, 0L) + inputMetrics.bytesRead
         }
+        metrics.outputMetrics.foreach { outputMetrics =>
+          executorToOutputBytes(eid) =
+            executorToOutputBytes.getOrElse(eid, 0L) + outputMetrics.bytesWritten
+        }
         metrics.shuffleReadMetrics.foreach { shuffleRead =>
           executorToShuffleRead(eid) =
             executorToShuffleRead.getOrElse(eid, 0L) + shuffleRead.remoteBytesRead
@@ -87,6 +95,4 @@ class ExecutorsListener(storageStatusListener: StorageStatusListener) extends Sp
     }
   }
 
-  // This addresses executor ID inconsistencies in the local mode
-  private def formatExecutorId(execId: String) = storageStatusListener.formatExecutorId(execId)
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorSummary.scala b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorSummary.scala
deleted file mode 100644
index c4a8996c0b9a9..0000000000000
--- a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorSummary.scala
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ui.jobs
-
-import org.apache.spark.annotation.DeveloperApi
-
-/**
- * :: DeveloperApi ::
- * Class for reporting aggregated metrics for each executor in stage UI.
- */
-@DeveloperApi
-class ExecutorSummary {
-  var taskTime : Long = 0
-  var failedTasks : Int = 0
-  var succeededTasks : Int = 0
-  var inputBytes: Long = 0
-  var shuffleRead : Long = 0
-  var shuffleWrite : Long = 0
-  var memoryBytesSpilled : Long = 0
-  var diskBytesSpilled : Long = 0
-}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
index 2a34a9af925d6..fa0f96bff34ff 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
@@ -20,11 +20,12 @@ package org.apache.spark.ui.jobs
 import scala.collection.mutable
 import scala.xml.Node
 
-import org.apache.spark.ui.UIUtils
+import org.apache.spark.ui.{ToolTips, UIUtils}
+import org.apache.spark.ui.jobs.UIData.StageUIData
 import org.apache.spark.util.Utils
 
-/** Page showing executor summary */
-private[ui] class ExecutorTable(stageId: Int, parent: JobProgressTab) {
+/** Stage summary grouped by executors. */
+private[ui] class ExecutorTable(stageId: Int, stageAttemptId: Int, parent: JobProgressTab) {
   private val listener = parent.listener
 
   def toNodeSeq: Seq[Node] = {
@@ -35,7 +36,7 @@ private[ui] class ExecutorTable(stageId: Int, parent: JobProgressTab) {
 
   /** Special table which merges two header cells. */
   private def executorTable[T](): Seq[Node] = {
-    <table class="table table-bordered table-striped table-condensed sortable">
+    <table class={UIUtils.TABLE_CLASS}>
       <thead>
         <th>Executor ID</th>
         <th>Address</th>
@@ -43,9 +44,10 @@ private[ui] class ExecutorTable(stageId: Int, parent: JobProgressTab) {
         <th>Total Tasks</th>
         <th>Failed Tasks</th>
         <th>Succeeded Tasks</th>
-        <th>Input Bytes</th>
-        <th>Shuffle Read</th>
-        <th>Shuffle Write</th>
+        <th><span data-toggle="tooltip" title={ToolTips.INPUT}>Input</span></th>
+        <th><span data-toggle="tooltip" title={ToolTips.OUTPUT}>Output</span></th>
+        <th><span data-toggle="tooltip" title={ToolTips.SHUFFLE_READ}>Shuffle Read</span></th>
+        <th><span data-toggle="tooltip" title={ToolTips.SHUFFLE_WRITE}>Shuffle Write</span></th>
         <th>Shuffle Spill (Memory)</th>
         <th>Shuffle Spill (Disk)</th>
       </thead>
@@ -64,28 +66,32 @@ private[ui] class ExecutorTable(stageId: Int, parent: JobProgressTab) {
       executorIdToAddress.put(executorId, address)
     }
 
-    val executorIdToSummary = listener.stageIdToExecutorSummaries.get(stageId)
-    executorIdToSummary match {
-      case Some(x) =>
-        x.toSeq.sortBy(_._1).map { case (k, v) => {
-          // scalastyle:off
+    listener.stageIdToData.get((stageId, stageAttemptId)) match {
+      case Some(stageData: StageUIData) =>
+        stageData.executorSummary.toSeq.sortBy(_._1).map { case (k, v) =>
           <tr>
             <td>{k}</td>
             <td>{executorIdToAddress.getOrElse(k, "CANNOT FIND ADDRESS")}</td>
-            <td sorttable_customekey={v.taskTime.toString}>{UIUtils.formatDuration(v.taskTime)}</td>
+            <td sorttable_customkey={v.taskTime.toString}>{UIUtils.formatDuration(v.taskTime)}</td>
             <td>{v.failedTasks + v.succeededTasks}</td>
             <td>{v.failedTasks}</td>
             <td>{v.succeededTasks}</td>
-            <td sorttable_customekey={v.inputBytes.toString}>{Utils.bytesToString(v.inputBytes)}</td>
-            <td sorttable_customekey={v.shuffleRead.toString}>{Utils.bytesToString(v.shuffleRead)}</td>
-            <td sorttable_customekey={v.shuffleWrite.toString}>{Utils.bytesToString(v.shuffleWrite)}</td>
-            <td sorttable_customekey={v.memoryBytesSpilled.toString} >{Utils.bytesToString(v.memoryBytesSpilled)}</td>
-            <td sorttable_customekey={v.diskBytesSpilled.toString} >{Utils.bytesToString(v.diskBytesSpilled)}</td>
+            <td sorttable_customkey={v.inputBytes.toString}>
+              {Utils.bytesToString(v.inputBytes)}</td>
+            <td sorttable_customkey={v.outputBytes.toString}>
+              {Utils.bytesToString(v.outputBytes)}</td>
+            <td sorttable_customkey={v.shuffleRead.toString}>
+              {Utils.bytesToString(v.shuffleRead)}</td>
+            <td sorttable_customkey={v.shuffleWrite.toString}>
+              {Utils.bytesToString(v.shuffleWrite)}</td>
+            <td sorttable_customkey={v.memoryBytesSpilled.toString}>
+              {Utils.bytesToString(v.memoryBytesSpilled)}</td>
+            <td sorttable_customkey={v.diskBytesSpilled.toString}>
+              {Utils.bytesToString(v.diskBytesSpilled)}</td>
           </tr>
-          // scalastyle:on
         }
-      }
-      case _ => Seq[Node]()
+      case None =>
+        Seq.empty[Node]
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
index 2286a7f952f28..8bbde51e1801c 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -25,6 +25,7 @@ import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.ui.jobs.UIData._
 
 /**
  * :: DeveloperApi ::
@@ -35,31 +36,38 @@ import org.apache.spark.storage.BlockManagerId
  * updating the internal data structures concurrently.
  */
 @DeveloperApi
-class JobProgressListener(conf: SparkConf) extends SparkListener {
+class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
 
   import JobProgressListener._
 
+  type JobId = Int
+  type StageId = Int
+  type StageAttemptId = Int
+
   // How many stages to remember
   val retainedStages = conf.getInt("spark.ui.retainedStages", DEFAULT_RETAINED_STAGES)
+  // How many jobs to remember
+  val retailedJobs = conf.getInt("spark.ui.retainedJobs", DEFAULT_RETAINED_JOBS)
+
+  val activeJobs = new HashMap[JobId, JobUIData]
+  val completedJobs = ListBuffer[JobUIData]()
+  val failedJobs = ListBuffer[JobUIData]()
+  val jobIdToData = new HashMap[JobId, JobUIData]
 
-  val activeStages = HashMap[Int, StageInfo]()
+  val activeStages = new HashMap[StageId, StageInfo]
   val completedStages = ListBuffer[StageInfo]()
   val failedStages = ListBuffer[StageInfo]()
-
-  // TODO: Should probably consolidate all following into a single hash map.
-  val stageIdToTime = HashMap[Int, Long]()
-  val stageIdToInputBytes = HashMap[Int, Long]()
-  val stageIdToShuffleRead = HashMap[Int, Long]()
-  val stageIdToShuffleWrite = HashMap[Int, Long]()
-  val stageIdToMemoryBytesSpilled = HashMap[Int, Long]()
-  val stageIdToDiskBytesSpilled = HashMap[Int, Long]()
-  val stageIdToTasksActive = HashMap[Int, HashMap[Long, TaskInfo]]()
-  val stageIdToTasksComplete = HashMap[Int, Int]()
-  val stageIdToTasksFailed = HashMap[Int, Int]()
-  val stageIdToTaskData = HashMap[Int, HashMap[Long, TaskUIData]]()
-  val stageIdToExecutorSummaries = HashMap[Int, HashMap[String, ExecutorSummary]]()
-  val stageIdToPool = HashMap[Int, String]()
-  val stageIdToDescription = HashMap[Int, String]()
+  val stageIdToData = new HashMap[(StageId, StageAttemptId), StageUIData]
+  val stageIdToInfo = new HashMap[StageId, StageInfo]
+  
+  // Number of completed and failed stages, may not actually equal to completedStages.size and 
+  // failedStages.size respectively due to completedStage and failedStages only maintain the latest
+  // part of the stages, the earlier ones will be removed when there are too many stages for 
+  // memory sake.
+  var numCompletedStages = 0
+  var numFailedStages = 0
+
+  // Map from pool name to a hash map (map from stage id to StageInfo).
   val poolToActiveStages = HashMap[String, HashMap[Int, StageInfo]]()
 
   val executorIdToBlockManagerId = HashMap[String, BlockManagerId]()
@@ -68,17 +76,52 @@ class JobProgressListener(conf: SparkConf) extends SparkListener {
 
   def blockManagerIds = executorIdToBlockManagerId.values.toSeq
 
+  override def onJobStart(jobStart: SparkListenerJobStart) = synchronized {
+    val jobGroup = Option(jobStart.properties).map(_.getProperty(SparkContext.SPARK_JOB_GROUP_ID))
+    val jobData: JobUIData =
+      new JobUIData(jobStart.jobId, jobStart.stageIds, jobGroup, JobExecutionStatus.RUNNING)
+    jobIdToData(jobStart.jobId) = jobData
+    activeJobs(jobStart.jobId) = jobData
+  }
+
+  override def onJobEnd(jobEnd: SparkListenerJobEnd) = synchronized {
+    val jobData = activeJobs.remove(jobEnd.jobId).getOrElse {
+      logWarning(s"Job completed for unknown job ${jobEnd.jobId}")
+      new JobUIData(jobId = jobEnd.jobId)
+    }
+    jobEnd.jobResult match {
+      case JobSucceeded =>
+        completedJobs += jobData
+        jobData.status = JobExecutionStatus.SUCCEEDED
+      case JobFailed(exception) =>
+        failedJobs += jobData
+        jobData.status = JobExecutionStatus.FAILED
+    }
+  }
+
   override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) = synchronized {
     val stage = stageCompleted.stageInfo
-    val stageId = stage.stageId
-    // Remove by stageId, rather than by StageInfo, in case the StageInfo is from storage
-    poolToActiveStages(stageIdToPool(stageId)).remove(stageId)
-    activeStages.remove(stageId)
+    stageIdToInfo(stage.stageId) = stage
+    val stageData = stageIdToData.getOrElseUpdate((stage.stageId, stage.attemptId), {
+      logWarning("Stage completed for unknown stage " + stage.stageId)
+      new StageUIData
+    })
+
+    for ((id, info) <- stageCompleted.stageInfo.accumulables) {
+      stageData.accumulables(id) = info
+    }
+
+    poolToActiveStages.get(stageData.schedulingPool).foreach { hashMap =>
+      hashMap.remove(stage.stageId)
+    }
+    activeStages.remove(stage.stageId)
     if (stage.failureReason.isEmpty) {
       completedStages += stage
+      numCompletedStages += 1
       trimIfNecessary(completedStages)
     } else {
       failedStages += stage
+      numFailedStages += 1
       trimIfNecessary(failedStages)
     }
   }
@@ -88,19 +131,8 @@ class JobProgressListener(conf: SparkConf) extends SparkListener {
     if (stages.size > retainedStages) {
       val toRemove = math.max(retainedStages / 10, 1)
       stages.take(toRemove).foreach { s =>
-        stageIdToTime.remove(s.stageId)
-        stageIdToInputBytes.remove(s.stageId)
-        stageIdToShuffleRead.remove(s.stageId)
-        stageIdToShuffleWrite.remove(s.stageId)
-        stageIdToMemoryBytesSpilled.remove(s.stageId)
-        stageIdToDiskBytesSpilled.remove(s.stageId)
-        stageIdToTasksActive.remove(s.stageId)
-        stageIdToTasksComplete.remove(s.stageId)
-        stageIdToTasksFailed.remove(s.stageId)
-        stageIdToTaskData.remove(s.stageId)
-        stageIdToExecutorSummaries.remove(s.stageId)
-        stageIdToPool.remove(s.stageId)
-        stageIdToDescription.remove(s.stageId)
+        stageIdToData.remove((s.stageId, s.attemptId))
+        stageIdToInfo.remove(s.stageId)
       }
       stages.trimStart(toRemove)
     }
@@ -114,26 +146,28 @@ class JobProgressListener(conf: SparkConf) extends SparkListener {
     val poolName = Option(stageSubmitted.properties).map {
       p => p.getProperty("spark.scheduler.pool", DEFAULT_POOL_NAME)
     }.getOrElse(DEFAULT_POOL_NAME)
-    stageIdToPool(stage.stageId) = poolName
 
-    val description = Option(stageSubmitted.properties).flatMap {
+    stageIdToInfo(stage.stageId) = stage
+    val stageData = stageIdToData.getOrElseUpdate((stage.stageId, stage.attemptId), new StageUIData)
+    stageData.schedulingPool = poolName
+
+    stageData.description = Option(stageSubmitted.properties).flatMap {
       p => Option(p.getProperty(SparkContext.SPARK_JOB_DESCRIPTION))
     }
-    description.map(d => stageIdToDescription(stage.stageId) = d)
 
-    val stages = poolToActiveStages.getOrElseUpdate(poolName, new HashMap[Int, StageInfo]())
+    val stages = poolToActiveStages.getOrElseUpdate(poolName, new HashMap[Int, StageInfo])
     stages(stage.stageId) = stage
   }
 
   override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized {
-    val sid = taskStart.stageId
     val taskInfo = taskStart.taskInfo
     if (taskInfo != null) {
-      val tasksActive = stageIdToTasksActive.getOrElseUpdate(sid, new HashMap[Long, TaskInfo]())
-      tasksActive(taskInfo.taskId) = taskInfo
-      val taskMap = stageIdToTaskData.getOrElse(sid, HashMap[Long, TaskUIData]())
-      taskMap(taskInfo.taskId) = new TaskUIData(taskInfo)
-      stageIdToTaskData(sid) = taskMap
+      val stageData = stageIdToData.getOrElseUpdate((taskStart.stageId, taskStart.stageAttemptId), {
+        logWarning("Task start for unknown stage " + taskStart.stageId)
+        new StageUIData
+      })
+      stageData.numActiveTasks += 1
+      stageData.taskData.put(taskInfo.taskId, new TaskUIData(taskInfo))
     }
   }
 
@@ -143,86 +177,125 @@ class JobProgressListener(conf: SparkConf) extends SparkListener {
   }
 
   override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized {
-    val sid = taskEnd.stageId
     val info = taskEnd.taskInfo
-
-    if (info != null) {
-      // create executor summary map if necessary
-      val executorSummaryMap = stageIdToExecutorSummaries.getOrElseUpdate(key = sid,
-        op = new HashMap[String, ExecutorSummary]())
-      executorSummaryMap.getOrElseUpdate(key = info.executorId, op = new ExecutorSummary)
-
-      val executorSummary = executorSummaryMap.get(info.executorId)
-      executorSummary match {
-        case Some(y) => {
-          // first update failed-task, succeed-task
-          taskEnd.reason match {
-            case Success =>
-              y.succeededTasks += 1
-            case _ =>
-              y.failedTasks += 1
-          }
-
-          // update duration
-          y.taskTime += info.duration
-
-          val metrics = taskEnd.taskMetrics
-          if (metrics != null) {
-            metrics.inputMetrics.foreach { y.inputBytes += _.bytesRead }
-            metrics.shuffleReadMetrics.foreach { y.shuffleRead += _.remoteBytesRead }
-            metrics.shuffleWriteMetrics.foreach { y.shuffleWrite += _.shuffleBytesWritten }
-            y.memoryBytesSpilled += metrics.memoryBytesSpilled
-            y.diskBytesSpilled += metrics.diskBytesSpilled
-          }
-        }
-        case _ => {}
+    // If stage attempt id is -1, it means the DAGScheduler had no idea which attempt this task
+    // compeletion event is for. Let's just drop it here. This means we might have some speculation
+    // tasks on the web ui that's never marked as complete.
+    if (info != null && taskEnd.stageAttemptId != -1) {
+      val stageData = stageIdToData.getOrElseUpdate((taskEnd.stageId, taskEnd.stageAttemptId), {
+        logWarning("Task end for unknown stage " + taskEnd.stageId)
+        new StageUIData
+      })
+
+      for (accumulableInfo <- info.accumulables) {
+        stageData.accumulables(accumulableInfo.id) = accumulableInfo
       }
 
-      val tasksActive = stageIdToTasksActive.getOrElseUpdate(sid, new HashMap[Long, TaskInfo]())
-      // Remove by taskId, rather than by TaskInfo, in case the TaskInfo is from storage
-      tasksActive.remove(info.taskId)
+      val execSummaryMap = stageData.executorSummary
+      val execSummary = execSummaryMap.getOrElseUpdate(info.executorId, new ExecutorSummary)
+
+      taskEnd.reason match {
+        case Success =>
+          execSummary.succeededTasks += 1
+        case _ =>
+          execSummary.failedTasks += 1
+      }
+      execSummary.taskTime += info.duration
+      stageData.numActiveTasks -= 1
 
       val (errorMessage, metrics): (Option[String], Option[TaskMetrics]) =
         taskEnd.reason match {
           case org.apache.spark.Success =>
-            stageIdToTasksComplete(sid) = stageIdToTasksComplete.getOrElse(sid, 0) + 1
+            stageData.completedIndices.add(info.index)
+            stageData.numCompleteTasks += 1
             (None, Option(taskEnd.taskMetrics))
           case e: ExceptionFailure =>  // Handle ExceptionFailure because we might have metrics
-            stageIdToTasksFailed(sid) = stageIdToTasksFailed.getOrElse(sid, 0) + 1
+            stageData.numFailedTasks += 1
             (Some(e.toErrorString), e.metrics)
           case e: TaskFailedReason =>  // All other failure cases
-            stageIdToTasksFailed(sid) = stageIdToTasksFailed.getOrElse(sid, 0) + 1
+            stageData.numFailedTasks += 1
             (Some(e.toErrorString), None)
         }
 
-      stageIdToTime.getOrElseUpdate(sid, 0L)
-      val time = metrics.map(_.executorRunTime).getOrElse(0L)
-      stageIdToTime(sid) += time
-
-      stageIdToInputBytes.getOrElseUpdate(sid, 0L)
-      val inputBytes = metrics.flatMap(_.inputMetrics).map(_.bytesRead).getOrElse(0L)
-      stageIdToInputBytes(sid) += inputBytes
-
-      stageIdToShuffleRead.getOrElseUpdate(sid, 0L)
-      val shuffleRead = metrics.flatMap(_.shuffleReadMetrics).map(_.remoteBytesRead).getOrElse(0L)
-      stageIdToShuffleRead(sid) += shuffleRead
-
-      stageIdToShuffleWrite.getOrElseUpdate(sid, 0L)
-      val shuffleWrite =
-        metrics.flatMap(_.shuffleWriteMetrics).map(_.shuffleBytesWritten).getOrElse(0L)
-      stageIdToShuffleWrite(sid) += shuffleWrite
+      if (!metrics.isEmpty) {
+        val oldMetrics = stageData.taskData.get(info.taskId).flatMap(_.taskMetrics)
+        updateAggregateMetrics(stageData, info.executorId, metrics.get, oldMetrics)
+      }
 
-      stageIdToMemoryBytesSpilled.getOrElseUpdate(sid, 0L)
-      val memoryBytesSpilled = metrics.map(_.memoryBytesSpilled).getOrElse(0L)
-      stageIdToMemoryBytesSpilled(sid) += memoryBytesSpilled
+      val taskData = stageData.taskData.getOrElseUpdate(info.taskId, new TaskUIData(info))
+      taskData.taskInfo = info
+      taskData.taskMetrics = metrics
+      taskData.errorMessage = errorMessage
+    }
+  }
 
-      stageIdToDiskBytesSpilled.getOrElseUpdate(sid, 0L)
-      val diskBytesSpilled = metrics.map(_.diskBytesSpilled).getOrElse(0L)
-      stageIdToDiskBytesSpilled(sid) += diskBytesSpilled
+  /**
+   * Upon receiving new metrics for a task, updates the per-stage and per-executor-per-stage
+   * aggregate metrics by calculating deltas between the currently recorded metrics and the new
+   * metrics.
+   */
+  def updateAggregateMetrics(
+      stageData: StageUIData,
+      execId: String,
+      taskMetrics: TaskMetrics,
+      oldMetrics: Option[TaskMetrics]) {
+    val execSummary = stageData.executorSummary.getOrElseUpdate(execId, new ExecutorSummary)
+
+    val shuffleWriteDelta =
+      (taskMetrics.shuffleWriteMetrics.map(_.shuffleBytesWritten).getOrElse(0L)
+      - oldMetrics.flatMap(_.shuffleWriteMetrics).map(_.shuffleBytesWritten).getOrElse(0L))
+    stageData.shuffleWriteBytes += shuffleWriteDelta
+    execSummary.shuffleWrite += shuffleWriteDelta
+
+    val shuffleReadDelta =
+      (taskMetrics.shuffleReadMetrics.map(_.remoteBytesRead).getOrElse(0L)
+      - oldMetrics.flatMap(_.shuffleReadMetrics).map(_.remoteBytesRead).getOrElse(0L))
+    stageData.shuffleReadBytes += shuffleReadDelta
+    execSummary.shuffleRead += shuffleReadDelta
+
+    val inputBytesDelta =
+      (taskMetrics.inputMetrics.map(_.bytesRead).getOrElse(0L)
+      - oldMetrics.flatMap(_.inputMetrics).map(_.bytesRead).getOrElse(0L))
+    stageData.inputBytes += inputBytesDelta
+    execSummary.inputBytes += inputBytesDelta
+
+    val outputBytesDelta =
+      (taskMetrics.outputMetrics.map(_.bytesWritten).getOrElse(0L)
+        - oldMetrics.flatMap(_.outputMetrics).map(_.bytesWritten).getOrElse(0L))
+    stageData.outputBytes += outputBytesDelta
+    execSummary.outputBytes += outputBytesDelta
+
+    val diskSpillDelta =
+      taskMetrics.diskBytesSpilled - oldMetrics.map(_.diskBytesSpilled).getOrElse(0L)
+    stageData.diskBytesSpilled += diskSpillDelta
+    execSummary.diskBytesSpilled += diskSpillDelta
+
+    val memorySpillDelta =
+      taskMetrics.memoryBytesSpilled - oldMetrics.map(_.memoryBytesSpilled).getOrElse(0L)
+    stageData.memoryBytesSpilled += memorySpillDelta
+    execSummary.memoryBytesSpilled += memorySpillDelta
+
+    val timeDelta =
+      taskMetrics.executorRunTime - oldMetrics.map(_.executorRunTime).getOrElse(0L)
+    stageData.executorRunTime += timeDelta
+  }
 
-      val taskMap = stageIdToTaskData.getOrElse(sid, HashMap[Long, TaskUIData]())
-      taskMap(info.taskId) = new TaskUIData(info, metrics, errorMessage)
-      stageIdToTaskData(sid) = taskMap
+  override def onExecutorMetricsUpdate(executorMetricsUpdate: SparkListenerExecutorMetricsUpdate) {
+    for ((taskId, sid, sAttempt, taskMetrics) <- executorMetricsUpdate.taskMetrics) {
+      val stageData = stageIdToData.getOrElseUpdate((sid, sAttempt), {
+        logWarning("Metrics update for task in unknown stage " + sid)
+        new StageUIData
+      })
+      val taskData = stageData.taskData.get(taskId)
+      taskData.map { t =>
+        if (!t.taskInfo.finished) {
+          updateAggregateMetrics(stageData, executorMetricsUpdate.execId, taskMetrics,
+            t.taskMetrics)
+
+          // Overwrite task metrics
+          t.taskMetrics = Some(taskMetrics)
+        }
+      }
     }
   }
 
@@ -252,13 +325,8 @@ class JobProgressListener(conf: SparkConf) extends SparkListener {
 
 }
 
-@DeveloperApi
-case class TaskUIData(
-    taskInfo: TaskInfo,
-    taskMetrics: Option[TaskMetrics] = None,
-    errorMessage: Option[String] = None)
-
 private object JobProgressListener {
   val DEFAULT_POOL_NAME = "default"
   val DEFAULT_RETAINED_STAGES = 1000
+  val DEFAULT_RETAINED_JOBS = 1000
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressPage.scala
index 0da62892118d4..83a7898071c9b 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressPage.scala
@@ -26,18 +26,17 @@ import org.apache.spark.ui.{WebUIPage, UIUtils}
 
 /** Page showing list of all ongoing and recently finished stages and pools */
 private[ui] class JobProgressPage(parent: JobProgressTab) extends WebUIPage("") {
-  private val appName = parent.appName
-  private val basePath = parent.basePath
-  private val live = parent.live
   private val sc = parent.sc
   private val listener = parent.listener
-  private lazy val isFairScheduler = parent.isFairScheduler
+  private def isFairScheduler = parent.isFairScheduler
 
   def render(request: HttpServletRequest): Seq[Node] = {
     listener.synchronized {
       val activeStages = listener.activeStages.values.toSeq
       val completedStages = listener.completedStages.reverse.toSeq
+      val numCompletedStages = listener.numCompletedStages
       val failedStages = listener.failedStages.reverse.toSeq
+      val numFailedStages = listener.numFailedStages
       val now = System.currentTimeMillis
 
       val activeStagesTable =
@@ -49,17 +48,17 @@ private[ui] class JobProgressPage(parent: JobProgressTab) extends WebUIPage("")
         new FailedStageTable(failedStages.sortBy(_.submissionTime).reverse, parent)
 
       // For now, pool information is only accessible in live UIs
-      val pools = if (live) sc.getAllPools else Seq[Schedulable]()
+      val pools = sc.map(_.getAllPools).getOrElse(Seq.empty[Schedulable])
       val poolTable = new PoolTable(pools, parent)
 
       val summary: NodeSeq =
         <div>
           <ul class="unstyled">
-            {if (live) {
+            {if (sc.isDefined) {
               // Total duration is not meaningful unless the UI is live
               <li>
                 <strong>Total Duration: </strong>
-                {UIUtils.formatDuration(now - sc.startTime)}
+                {UIUtils.formatDuration(now - sc.get.startTime)}
               </li>
             }}
             <li>
@@ -72,29 +71,29 @@ private[ui] class JobProgressPage(parent: JobProgressTab) extends WebUIPage("")
             </li>
             <li>
               <a href="#completed"><strong>Completed Stages:</strong></a>
-              {completedStages.size}
+              {numCompletedStages}
             </li>
              <li>
              <a href="#failed"><strong>Failed Stages:</strong></a>
-              {failedStages.size}
+              {numFailedStages}
             </li>
           </ul>
         </div>
 
       val content = summary ++
-        {if (live && isFairScheduler) {
+        {if (sc.isDefined && isFairScheduler) {
           <h4>{pools.size} Fair Scheduler Pools</h4> ++ poolTable.toNodeSeq
         } else {
           Seq[Node]()
         }} ++
         <h4 id="active">Active Stages ({activeStages.size})</h4> ++
         activeStagesTable.toNodeSeq ++
-        <h4 id="completed">Completed Stages ({completedStages.size})</h4> ++
+        <h4 id="completed">Completed Stages ({numCompletedStages})</h4> ++
         completedStagesTable.toNodeSeq ++
-        <h4 id ="failed">Failed Stages ({failedStages.size})</h4> ++
+        <h4 id ="failed">Failed Stages ({numFailedStages})</h4> ++
         failedStagesTable.toNodeSeq
 
-      UIUtils.headerSparkPage(content, basePath, appName, "Spark Stages", parent.headerTabs, parent)
+      UIUtils.headerSparkPage("Spark Stages", content, parent)
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressTab.scala
index 3308c8c8a3d37..03ca918e2e8b3 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressTab.scala
@@ -21,31 +21,27 @@ import javax.servlet.http.HttpServletRequest
 
 import org.apache.spark.SparkConf
 import org.apache.spark.scheduler.SchedulingMode
-import org.apache.spark.ui.{SparkUI, WebUITab}
+import org.apache.spark.ui.{SparkUI, SparkUITab}
 
 /** Web UI showing progress status of all jobs in the given SparkContext. */
-private[ui] class JobProgressTab(parent: SparkUI) extends WebUITab(parent, "stages") {
-  val appName = parent.appName
-  val basePath = parent.basePath
-  val live = parent.live
+private[ui] class JobProgressTab(parent: SparkUI) extends SparkUITab(parent, "stages") {
   val sc = parent.sc
-  val conf = if (live) sc.conf else new SparkConf
-  val killEnabled = conf.getBoolean("spark.ui.killEnabled", true)
-  val listener = new JobProgressListener(conf)
+  val conf = sc.map(_.conf).getOrElse(new SparkConf)
+  val killEnabled = sc.map(_.conf.getBoolean("spark.ui.killEnabled", true)).getOrElse(false)
+  val listener = parent.jobProgressListener
 
   attachPage(new JobProgressPage(this))
   attachPage(new StagePage(this))
   attachPage(new PoolPage(this))
-  parent.registerListener(listener)
 
   def isFairScheduler = listener.schedulingMode.exists(_ == SchedulingMode.FAIR)
 
   def handleKillRequest(request: HttpServletRequest) =  {
-    if (killEnabled) {
+    if ((killEnabled) && (parent.securityManager.checkModifyPermissions(request.getRemoteUser))) {
       val killFlag = Option(request.getParameter("terminate")).getOrElse("false").toBoolean
       val stageId = Option(request.getParameter("id")).getOrElse("-1").toInt
       if (stageId >= 0 && killFlag && listener.activeStages.contains(stageId)) {
-        sc.cancelStage(stageId)
+        sc.get.cancelStage(stageId)
       }
       // Do a quick pause here to give Spark time to kill the stage so it shows up as
       // killed after the refresh. Note that this will block the serving thread so the
@@ -53,4 +49,5 @@ private[ui] class JobProgressTab(parent: SparkUI) extends WebUITab(parent, "stag
       Thread.sleep(100)
     }
   }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
index 0a2bf31833d2b..770d99eea1c9d 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
@@ -26,9 +26,6 @@ import org.apache.spark.ui.{WebUIPage, UIUtils}
 
 /** Page showing specific pool details */
 private[ui] class PoolPage(parent: JobProgressTab) extends WebUIPage("pool") {
-  private val appName = parent.appName
-  private val basePath = parent.basePath
-  private val live = parent.live
   private val sc = parent.sc
   private val listener = parent.listener
 
@@ -44,15 +41,14 @@ private[ui] class PoolPage(parent: JobProgressTab) extends WebUIPage("pool") {
         new StageTableBase(activeStages.sortBy(_.submissionTime).reverse, parent)
 
       // For now, pool information is only accessible in live UIs
-      val pools = if (live) Seq(sc.getPoolForName(poolName).get) else Seq[Schedulable]()
+      val pools = sc.map(_.getPoolForName(poolName).get).toSeq
       val poolTable = new PoolTable(pools, parent)
 
       val content =
         <h4>Summary </h4> ++ poolTable.toNodeSeq ++
         <h4>{activeStages.size} Active Stages</h4> ++ activeStagesTable.toNodeSeq
 
-      UIUtils.headerSparkPage(content, basePath, appName, "Fair Scheduler Pool: " + poolName,
-        parent.headerTabs, parent)
+      UIUtils.headerSparkPage("Fair Scheduler Pool: " + poolName, content, parent)
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
index f4b68f241966d..64178e1e33d41 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
@@ -25,7 +25,6 @@ import org.apache.spark.ui.UIUtils
 
 /** Table showing list of pools */
 private[ui] class PoolTable(pools: Seq[Schedulable], parent: JobProgressTab) {
-  private val basePath = parent.basePath
   private val listener = parent.listener
 
   def toNodeSeq: Seq[Node] = {
@@ -59,11 +58,11 @@ private[ui] class PoolTable(pools: Seq[Schedulable], parent: JobProgressTab) {
       case Some(stages) => stages.size
       case None => 0
     }
+    val href = "%s/stages/pool?poolname=%s"
+      .format(UIUtils.prependBaseUri(parent.basePath), p.name)
     <tr>
       <td>
-        <a href={"%s/stages/pool?poolname=%s".format(UIUtils.prependBaseUri(basePath), p.name)}>
-          {p.name}
-        </a>
+        <a href={href}>{p.name}</a>
       </td>
       <td>{p.minShare}</td>
       <td>{p.weight}</td>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index afb8ed754ff8b..16bc3f6c18d09 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -20,101 +20,170 @@ package org.apache.spark.ui.jobs
 import java.util.Date
 import javax.servlet.http.HttpServletRequest
 
-import scala.xml.Node
+import scala.xml.{Node, Unparsed}
 
-import org.apache.spark.ui.{WebUIPage, UIUtils}
+import org.apache.commons.lang3.StringEscapeUtils
+
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.ui.{ToolTips, WebUIPage, UIUtils}
+import org.apache.spark.ui.jobs.UIData._
 import org.apache.spark.util.{Utils, Distribution}
+import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
 
 /** Page showing statistics and task list for a given stage */
 private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
-  private val appName = parent.appName
-  private val basePath = parent.basePath
   private val listener = parent.listener
 
   def render(request: HttpServletRequest): Seq[Node] = {
     listener.synchronized {
       val stageId = request.getParameter("id").toInt
+      val stageAttemptId = request.getParameter("attempt").toInt
+      val stageDataOption = listener.stageIdToData.get((stageId, stageAttemptId))
 
-      if (!listener.stageIdToTaskData.contains(stageId)) {
+      if (stageDataOption.isEmpty || stageDataOption.get.taskData.isEmpty) {
         val content =
           <div>
             <h4>Summary Metrics</h4> No tasks have started yet
             <h4>Tasks</h4> No tasks have started yet
           </div>
-        return UIUtils.headerSparkPage(content, basePath, appName,
-          "Details for Stage %s".format(stageId), parent.headerTabs, parent)
+        return UIUtils.headerSparkPage(
+          s"Details for Stage $stageId (Attempt $stageAttemptId)", content, parent)
       }
 
-      val tasks = listener.stageIdToTaskData(stageId).values.toSeq.sortBy(_.taskInfo.launchTime)
+      val stageData = stageDataOption.get
+      val tasks = stageData.taskData.values.toSeq.sortBy(_.taskInfo.launchTime)
 
       val numCompleted = tasks.count(_.taskInfo.finished)
-      val inputBytes = listener.stageIdToInputBytes.getOrElse(stageId, 0L)
-      val hasInput = inputBytes > 0
-      val shuffleReadBytes = listener.stageIdToShuffleRead.getOrElse(stageId, 0L)
-      val hasShuffleRead = shuffleReadBytes > 0
-      val shuffleWriteBytes = listener.stageIdToShuffleWrite.getOrElse(stageId, 0L)
-      val hasShuffleWrite = shuffleWriteBytes > 0
-      val memoryBytesSpilled = listener.stageIdToMemoryBytesSpilled.getOrElse(stageId, 0L)
-      val diskBytesSpilled = listener.stageIdToDiskBytesSpilled.getOrElse(stageId, 0L)
-      val hasBytesSpilled = memoryBytesSpilled > 0 && diskBytesSpilled > 0
-
-      var activeTime = 0L
-      val now = System.currentTimeMillis
-      val tasksActive = listener.stageIdToTasksActive(stageId).values
-      tasksActive.foreach(activeTime += _.timeRunning(now))
+      val accumulables = listener.stageIdToData((stageId, stageAttemptId)).accumulables
+      val hasAccumulators = accumulables.size > 0
+      val hasInput = stageData.inputBytes > 0
+      val hasOutput = stageData.outputBytes > 0
+      val hasShuffleRead = stageData.shuffleReadBytes > 0
+      val hasShuffleWrite = stageData.shuffleWriteBytes > 0
+      val hasBytesSpilled = stageData.memoryBytesSpilled > 0 && stageData.diskBytesSpilled > 0
 
-      // scalastyle:off
       val summary =
         <div>
           <ul class="unstyled">
             <li>
               <strong>Total task time across all tasks: </strong>
-              {UIUtils.formatDuration(listener.stageIdToTime.getOrElse(stageId, 0L) + activeTime)}
+              {UIUtils.formatDuration(stageData.executorRunTime)}
             </li>
-            {if (hasInput)
+            {if (hasInput) {
               <li>
                 <strong>Input: </strong>
-                {Utils.bytesToString(inputBytes)}
+                {Utils.bytesToString(stageData.inputBytes)}
               </li>
-            }
-            {if (hasShuffleRead)
+            }}
+            {if (hasOutput) {
+              <li>
+                <strong>Output: </strong>
+                {Utils.bytesToString(stageData.outputBytes)}
+              </li>
+            }}
+            {if (hasShuffleRead) {
               <li>
                 <strong>Shuffle read: </strong>
-                {Utils.bytesToString(shuffleReadBytes)}
+                {Utils.bytesToString(stageData.shuffleReadBytes)}
               </li>
-            }
-            {if (hasShuffleWrite)
+            }}
+            {if (hasShuffleWrite) {
               <li>
                 <strong>Shuffle write: </strong>
-                {Utils.bytesToString(shuffleWriteBytes)}
+                {Utils.bytesToString(stageData.shuffleWriteBytes)}
               </li>
-            }
-            {if (hasBytesSpilled)
-            <li>
-              <strong>Shuffle spill (memory): </strong>
-              {Utils.bytesToString(memoryBytesSpilled)}
-            </li>
-            <li>
-              <strong>Shuffle spill (disk): </strong>
-              {Utils.bytesToString(diskBytesSpilled)}
-            </li>
-            }
+            }}
+            {if (hasBytesSpilled) {
+              <li>
+                <strong>Shuffle spill (memory): </strong>
+                {Utils.bytesToString(stageData.memoryBytesSpilled)}
+              </li>
+              <li>
+                <strong>Shuffle spill (disk): </strong>
+                {Utils.bytesToString(stageData.diskBytesSpilled)}
+              </li>
+            }}
           </ul>
         </div>
-        // scalastyle:on
-      val taskHeaders: Seq[String] =
+
+      val showAdditionalMetrics =
+        <div>
+          <span class="expand-additional-metrics">
+            <span class="expand-additional-metrics-arrow arrow-closed"></span>
+            <strong>Show additional metrics</strong>
+          </span>
+          <div class="additional-metrics collapsed">
+            <ul style="list-style-type:none">
+              <li>
+                <span data-toggle="tooltip"
+                      title={ToolTips.SCHEDULER_DELAY} data-placement="right">
+                  <input type="checkbox" name={TaskDetailsClassNames.SCHEDULER_DELAY}/>
+                  <span class="additional-metric-title">Scheduler Delay</span>
+                </span>
+              </li>
+              <li>
+                <span data-toggle="tooltip"
+                      title={ToolTips.TASK_DESERIALIZATION_TIME} data-placement="right">
+                  <input type="checkbox" name={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}/>
+                  <span class="additional-metric-title">Task Deserialization Time</span>
+                </span>
+              </li>
+              <li>
+                <span data-toggle="tooltip"
+                      title={ToolTips.GC_TIME} data-placement="right">
+                  <input type="checkbox" name={TaskDetailsClassNames.GC_TIME}/>
+                  <span class="additional-metric-title">GC Time</span>
+                </span>
+              </li>
+              <li>
+                <span data-toggle="tooltip"
+                      title={ToolTips.RESULT_SERIALIZATION_TIME} data-placement="right">
+                  <input type="checkbox" name={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}/>
+                  <span class="additional-metric-title">Result Serialization Time</span>
+                </span>
+              </li>
+              <li>
+                <span data-toggle="tooltip"
+                      title={ToolTips.GETTING_RESULT_TIME} data-placement="right">
+                  <input type="checkbox" name={TaskDetailsClassNames.GETTING_RESULT_TIME}/>
+                  <span class="additional-metric-title">Getting Result Time</span>
+                </span>
+              </li>
+            </ul>
+          </div>
+        </div>
+
+      val accumulableHeaders: Seq[String] = Seq("Accumulable", "Value")
+      def accumulableRow(acc: AccumulableInfo) = <tr><td>{acc.name}</td><td>{acc.value}</td></tr>
+      val accumulableTable = UIUtils.listingTable(accumulableHeaders, accumulableRow,
+        accumulables.values.toSeq)
+
+      val taskHeadersAndCssClasses: Seq[(String, String)] =
         Seq(
-          "Index", "ID", "Attempt", "Status", "Locality Level", "Executor",
-          "Launch Time", "Duration", "GC Time") ++
-        {if (hasInput) Seq("Input") else Nil} ++
-        {if (hasShuffleRead) Seq("Shuffle Read")  else Nil} ++
-        {if (hasShuffleWrite) Seq("Write Time", "Shuffle Write") else Nil} ++
-        {if (hasBytesSpilled) Seq("Shuffle Spill (Memory)", "Shuffle Spill (Disk)") else Nil} ++
-        Seq("Errors")
+          ("Index", ""), ("ID", ""), ("Attempt", ""), ("Status", ""), ("Locality Level", ""),
+          ("Executor ID / Host", ""), ("Launch Time", ""), ("Duration", ""),
+          ("Scheduler Delay", TaskDetailsClassNames.SCHEDULER_DELAY),
+          ("Task Deserialization Time", TaskDetailsClassNames.TASK_DESERIALIZATION_TIME),
+          ("GC Time", TaskDetailsClassNames.GC_TIME),
+          ("Result Serialization Time", TaskDetailsClassNames.RESULT_SERIALIZATION_TIME),
+          ("Getting Result Time", TaskDetailsClassNames.GETTING_RESULT_TIME)) ++
+        {if (hasAccumulators) Seq(("Accumulators", "")) else Nil} ++
+        {if (hasInput) Seq(("Input", "")) else Nil} ++
+        {if (hasOutput) Seq(("Output", "")) else Nil} ++
+        {if (hasShuffleRead) Seq(("Shuffle Read", ""))  else Nil} ++
+        {if (hasShuffleWrite) Seq(("Write Time", ""), ("Shuffle Write", "")) else Nil} ++
+        {if (hasBytesSpilled) Seq(("Shuffle Spill (Memory)", ""), ("Shuffle Spill (Disk)", ""))
+          else Nil} ++
+        Seq(("Errors", ""))
+
+      val unzipped = taskHeadersAndCssClasses.unzip
 
       val taskTable = UIUtils.listingTable(
-        taskHeaders, taskRow(hasInput, hasShuffleRead, hasShuffleWrite, hasBytesSpilled), tasks)
-
+        unzipped._1,
+        taskRow(hasAccumulators, hasInput, hasOutput, hasShuffleRead, hasShuffleWrite,
+          hasBytesSpilled),
+        tasks,
+        headerClasses = unzipped._2)
       // Excludes tasks which failed and have incomplete metrics
       val validTasks = tasks.filter(t => t.taskInfo.status == "SUCCESS" && t.taskMetrics.isDefined)
 
@@ -123,18 +192,48 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
           None
         }
         else {
-          val serializationTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
-            metrics.get.resultSerializationTime.toDouble
+          def getFormattedTimeQuantiles(times: Seq[Double]): Seq[Node] = {
+            Distribution(times).get.getQuantiles().map { millis =>
+              <td>{UIUtils.formatDuration(millis.toLong)}</td>
+            }
           }
-          val serializationQuantiles =
-            "Result serialization time" +: Distribution(serializationTimes).
-              get.getQuantiles().map(ms => UIUtils.formatDuration(ms.toLong))
+
+          val deserializationTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
+            metrics.get.executorDeserializeTime.toDouble
+          }
+          val deserializationQuantiles =
+            <td>
+              <span data-toggle="tooltip" title={ToolTips.TASK_DESERIALIZATION_TIME}
+                    data-placement="right">
+                Task Deserialization Time
+              </span>
+            </td> +: getFormattedTimeQuantiles(deserializationTimes)
 
           val serviceTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.executorRunTime.toDouble
           }
-          val serviceQuantiles = "Duration" +: Distribution(serviceTimes).get.getQuantiles()
-            .map(ms => UIUtils.formatDuration(ms.toLong))
+          val serviceQuantiles = <td>Duration</td> +: getFormattedTimeQuantiles(serviceTimes)
+
+          val gcTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
+            metrics.get.jvmGCTime.toDouble
+          }
+          val gcQuantiles =
+            <td>
+              <span data-toggle="tooltip"
+                  title={ToolTips.GC_TIME} data-placement="right">GC Time
+              </span>
+            </td> +: getFormattedTimeQuantiles(gcTimes)
+
+          val serializationTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
+            metrics.get.resultSerializationTime.toDouble
+          }
+          val serializationQuantiles =
+            <td>
+              <span data-toggle="tooltip"
+                    title={ToolTips.RESULT_SERIALIZATION_TIME} data-placement="right">
+                Result Serialization Time
+              </span>
+            </td> +: getFormattedTimeQuantiles(serializationTimes)
 
           val gettingResultTimes = validTasks.map { case TaskUIData(info, _, _) =>
             if (info.gettingResultTime > 0) {
@@ -143,89 +242,108 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
               0.0
             }
           }
-          val gettingResultQuantiles = "Time spent fetching task results" +:
-            Distribution(gettingResultTimes).get.getQuantiles().map { millis =>
-              UIUtils.formatDuration(millis.toLong)
-            }
+          val gettingResultQuantiles =
+            <td>
+              <span data-toggle="tooltip"
+                  title={ToolTips.GETTING_RESULT_TIME} data-placement="right">
+                Getting Result Time
+              </span>
+            </td> +:
+            getFormattedTimeQuantiles(gettingResultTimes)
           // The scheduler delay includes the network delay to send the task to the worker
           // machine and to send back the result (but not the time to fetch the task result,
           // if it needed to be fetched from the block manager on the worker).
           val schedulerDelays = validTasks.map { case TaskUIData(info, metrics, _) =>
-            val totalExecutionTime = {
-              if (info.gettingResultTime > 0) {
-                (info.gettingResultTime - info.launchTime).toDouble
-              } else {
-                (info.finishTime - info.launchTime).toDouble
-              }
-            }
-            totalExecutionTime - metrics.get.executorRunTime
+            getSchedulerDelay(info, metrics.get).toDouble
           }
-          val schedulerDelayQuantiles = "Scheduler delay" +:
-            Distribution(schedulerDelays).get.getQuantiles().map { millis =>
-              UIUtils.formatDuration(millis.toLong)
-            }
+          val schedulerDelayTitle = <td><span data-toggle="tooltip"
+            title={ToolTips.SCHEDULER_DELAY} data-placement="right">Scheduler Delay</span></td>
+          val schedulerDelayQuantiles = schedulerDelayTitle +:
+            getFormattedTimeQuantiles(schedulerDelays)
 
-          def getQuantileCols(data: Seq[Double]) =
-            Distribution(data).get.getQuantiles().map(d => Utils.bytesToString(d.toLong))
+          def getFormattedSizeQuantiles(data: Seq[Double]) =
+            Distribution(data).get.getQuantiles().map(d => <td>{Utils.bytesToString(d.toLong)}</td>)
 
           val inputSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.inputMetrics.map(_.bytesRead).getOrElse(0L).toDouble
           }
-          val inputQuantiles = "Input" +: getQuantileCols(inputSizes)
+          val inputQuantiles = <td>Input</td> +: getFormattedSizeQuantiles(inputSizes)
+
+          val outputSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
+            metrics.get.outputMetrics.map(_.bytesWritten).getOrElse(0L).toDouble
+          }
+          val outputQuantiles = <td>Output</td> +: getFormattedSizeQuantiles(outputSizes)
 
           val shuffleReadSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.shuffleReadMetrics.map(_.remoteBytesRead).getOrElse(0L).toDouble
           }
-          val shuffleReadQuantiles = "Shuffle Read (Remote)" +: getQuantileCols(shuffleReadSizes)
+          val shuffleReadQuantiles = <td>Shuffle Read (Remote)</td> +:
+            getFormattedSizeQuantiles(shuffleReadSizes)
 
           val shuffleWriteSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.shuffleWriteMetrics.map(_.shuffleBytesWritten).getOrElse(0L).toDouble
           }
-          val shuffleWriteQuantiles = "Shuffle Write" +: getQuantileCols(shuffleWriteSizes)
+          val shuffleWriteQuantiles = <td>Shuffle Write</td> +:
+            getFormattedSizeQuantiles(shuffleWriteSizes)
 
           val memoryBytesSpilledSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.memoryBytesSpilled.toDouble
           }
-          val memoryBytesSpilledQuantiles = "Shuffle spill (memory)" +:
-            getQuantileCols(memoryBytesSpilledSizes)
+          val memoryBytesSpilledQuantiles = <td>Shuffle spill (memory)</td> +:
+            getFormattedSizeQuantiles(memoryBytesSpilledSizes)
 
           val diskBytesSpilledSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.diskBytesSpilled.toDouble
           }
-          val diskBytesSpilledQuantiles = "Shuffle spill (disk)" +:
-            getQuantileCols(diskBytesSpilledSizes)
-
-          val listings: Seq[Seq[String]] = Seq(
-            serializationQuantiles,
-            serviceQuantiles,
-            gettingResultQuantiles,
-            schedulerDelayQuantiles,
-            if (hasInput) inputQuantiles else Nil,
-            if (hasShuffleRead) shuffleReadQuantiles else Nil,
-            if (hasShuffleWrite) shuffleWriteQuantiles else Nil,
-            if (hasBytesSpilled) memoryBytesSpilledQuantiles else Nil,
-            if (hasBytesSpilled) diskBytesSpilledQuantiles else Nil)
+          val diskBytesSpilledQuantiles = <td>Shuffle spill (disk)</td> +:
+            getFormattedSizeQuantiles(diskBytesSpilledSizes)
+
+          val listings: Seq[Seq[Node]] = Seq(
+            <tr>{serviceQuantiles}</tr>,
+            <tr class={TaskDetailsClassNames.SCHEDULER_DELAY}>{schedulerDelayQuantiles}</tr>,
+            <tr class={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}>
+              {deserializationQuantiles}
+            </tr>
+            <tr class={TaskDetailsClassNames.GC_TIME}>{gcQuantiles}</tr>,
+            <tr class={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}>
+              {serializationQuantiles}
+            </tr>,
+            <tr class={TaskDetailsClassNames.GETTING_RESULT_TIME}>{gettingResultQuantiles}</tr>,
+            if (hasInput) <tr>{inputQuantiles}</tr> else Nil,
+            if (hasOutput) <tr>{outputQuantiles}</tr> else Nil,
+            if (hasShuffleRead) <tr>{shuffleReadQuantiles}</tr> else Nil,
+            if (hasShuffleWrite) <tr>{shuffleWriteQuantiles}</tr> else Nil,
+            if (hasBytesSpilled) <tr>{memoryBytesSpilledQuantiles}</tr> else Nil,
+            if (hasBytesSpilled) <tr>{diskBytesSpilledQuantiles}</tr> else Nil)
 
           val quantileHeaders = Seq("Metric", "Min", "25th percentile",
             "Median", "75th percentile", "Max")
-          def quantileRow(data: Seq[String]): Seq[Node] = <tr> {data.map(d => <td>{d}</td>)} </tr>
-          Some(UIUtils.listingTable(quantileHeaders, quantileRow, listings, fixedWidth = true))
+          Some(UIUtils.listingTable(
+            quantileHeaders, identity[Seq[Node]], listings, fixedWidth = true))
         }
-      val executorTable = new ExecutorTable(stageId, parent)
+
+      val executorTable = new ExecutorTable(stageId, stageAttemptId, parent)
+
+      val maybeAccumulableTable: Seq[Node] =
+        if (accumulables.size > 0) { <h4>Accumulators</h4> ++ accumulableTable } else Seq()
+
       val content =
         summary ++
+        showAdditionalMetrics ++
         <h4>Summary Metrics for {numCompleted} Completed Tasks</h4> ++
         <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
         <h4>Aggregated Metrics by Executor</h4> ++ executorTable.toNodeSeq ++
+        maybeAccumulableTable ++
         <h4>Tasks</h4> ++ taskTable
 
-      UIUtils.headerSparkPage(content, basePath, appName, "Details for Stage %d".format(stageId),
-        parent.headerTabs, parent)
+      UIUtils.headerSparkPage("Details for Stage %d".format(stageId), content, parent)
     }
   }
 
   def taskRow(
+      hasAccumulators: Boolean,
       hasInput: Boolean,
+      hasOutput: Boolean,
       hasShuffleRead: Boolean,
       hasShuffleWrite: Boolean,
       hasBytesSpilled: Boolean)(taskData: TaskUIData): Seq[Node] = {
@@ -234,8 +352,14 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
         else metrics.map(_.executorRunTime).getOrElse(1L)
       val formatDuration = if (info.status == "RUNNING") UIUtils.formatDuration(duration)
         else metrics.map(m => UIUtils.formatDuration(m.executorRunTime)).getOrElse("")
+      val schedulerDelay = metrics.map(getSchedulerDelay(info, _)).getOrElse(0L)
       val gcTime = metrics.map(_.jvmGCTime).getOrElse(0L)
+      val taskDeserializationTime = metrics.map(_.executorDeserializeTime).getOrElse(0L)
       val serializationTime = metrics.map(_.resultSerializationTime).getOrElse(0L)
+      val gettingResultTime = info.gettingResultTime
+
+      val maybeAccumulators = info.accumulables
+      val accumulatorsReadable = maybeAccumulators.map{acc => s"${acc.name}: ${acc.update.get}"}
 
       val maybeInput = metrics.flatMap(_.inputMetrics)
       val inputSortable = maybeInput.map(_.bytesRead.toString).getOrElse("")
@@ -243,6 +367,12 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
         .map(m => s"${Utils.bytesToString(m.bytesRead)} (${m.readMethod.toString.toLowerCase()})")
         .getOrElse("")
 
+      val maybeOutput = metrics.flatMap(_.outputMetrics)
+      val outputSortable = maybeOutput.map(_.bytesWritten.toString).getOrElse("")
+      val outputReadable = maybeOutput
+        .map(m => s"${Utils.bytesToString(m.bytesWritten)}")
+        .getOrElse("")
+
       val maybeShuffleRead = metrics.flatMap(_.shuffleReadMetrics).map(_.remoteBytesRead)
       val shuffleReadSortable = maybeShuffleRead.map(_.toString).getOrElse("")
       val shuffleReadReadable = maybeShuffleRead.map(Utils.bytesToString).getOrElse("")
@@ -275,25 +405,45 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
         }</td>
         <td>{info.status}</td>
         <td>{info.taskLocality}</td>
-        <td>{info.host}</td>
+        <td>{info.executorId} / {info.host}</td>
         <td>{UIUtils.formatDate(new Date(info.launchTime))}</td>
         <td sorttable_customkey={duration.toString}>
           {formatDuration}
         </td>
-        <td sorttable_customkey={gcTime.toString}>
+        <td sorttable_customkey={schedulerDelay.toString}
+            class={TaskDetailsClassNames.SCHEDULER_DELAY}>
+          {UIUtils.formatDuration(schedulerDelay.toLong)}
+        </td>
+        <td sorttable_customkey={taskDeserializationTime.toString}
+            class={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}>
+          {UIUtils.formatDuration(taskDeserializationTime.toLong)}
+        </td>
+        <td sorttable_customkey={gcTime.toString} class={TaskDetailsClassNames.GC_TIME}>
           {if (gcTime > 0) UIUtils.formatDuration(gcTime) else ""}
         </td>
-        <!--
-        TODO: Add this back after we add support to hide certain columns.
-        <td sorttable_customkey={serializationTime.toString}>
-          {if (serializationTime > 0) UIUtils.formatDuration(serializationTime) else ""}
+        <td sorttable_customkey={serializationTime.toString}
+            class={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}>
+          {UIUtils.formatDuration(serializationTime)}
         </td>
-        -->
+        <td sorttable_customkey={gettingResultTime.toString}
+            class={TaskDetailsClassNames.GETTING_RESULT_TIME}>
+          {UIUtils.formatDuration(gettingResultTime)}
+        </td>
+        {if (hasAccumulators) {
+          <td>
+            {Unparsed(accumulatorsReadable.mkString("<br/>"))}
+          </td>
+        }}
         {if (hasInput) {
           <td sorttable_customkey={inputSortable}>
             {inputReadable}
           </td>
         }}
+        {if (hasOutput) {
+          <td sorttable_customkey={outputSortable}>
+            {outputReadable}
+          </td>
+        }}
         {if (hasShuffleRead) {
            <td sorttable_customkey={shuffleReadSortable}>
              {shuffleReadReadable}
@@ -315,10 +465,47 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
             {diskBytesSpilledReadable}
           </td>
         }}
-        <td>
-          {errorMessage.map { e => <pre>{e}</pre> }.getOrElse("")}
-        </td>
+        {errorMessageCell(errorMessage)}
       </tr>
     }
   }
+
+  private def errorMessageCell(errorMessage: Option[String]): Seq[Node] = {
+    val error = errorMessage.getOrElse("")
+    val isMultiline = error.indexOf('\n') >= 0
+    // Display the first line by default
+    val errorSummary = StringEscapeUtils.escapeHtml4(
+      if (isMultiline) {
+        error.substring(0, error.indexOf('\n'))
+      } else {
+        error
+      })
+    val details = if (isMultiline) {
+      // scalastyle:off
+      <span onclick="this.parentNode.querySelector('.stacktrace-details').classList.toggle('collapsed')"
+            class="expand-details">
+        +details
+      </span> ++
+        <div class="stacktrace-details collapsed">
+          <pre>{error}</pre>
+        </div>
+      // scalastyle:on
+    } else {
+      ""
+    }
+    <td>{errorSummary}{details}</td>
+  }
+
+  private def getSchedulerDelay(info: TaskInfo, metrics: TaskMetrics): Long = {
+    val totalExecutionTime = {
+      if (info.gettingResultTime > 0) {
+        (info.gettingResultTime - info.launchTime)
+      } else {
+        (info.finishTime - info.launchTime)
+      }
+    }
+    val executorOverhead = (metrics.executorDeserializeTime +
+      metrics.resultSerializationTime)
+    totalExecutionTime - metrics.executorRunTime - executorOverhead
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index a9ac6d5bee9c9..2ff561ccc7da0 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -17,13 +17,15 @@
 
 package org.apache.spark.ui.jobs
 
+import scala.xml.Node
+import scala.xml.Text
+
 import java.util.Date
 
-import scala.collection.mutable.HashMap
-import scala.xml.Node
+import org.apache.commons.lang3.StringEscapeUtils
 
-import org.apache.spark.scheduler.{StageInfo, TaskInfo}
-import org.apache.spark.ui.UIUtils
+import org.apache.spark.scheduler.StageInfo
+import org.apache.spark.ui.{ToolTips, UIUtils}
 import org.apache.spark.util.Utils
 
 /** Page showing list of all ongoing and recently finished stages */
@@ -32,7 +34,6 @@ private[ui] class StageTableBase(
     parent: JobProgressTab,
     killEnabled: Boolean = false) {
 
-  private val basePath = parent.basePath
   private val listener = parent.listener
   protected def isFairScheduler = parent.isFairScheduler
 
@@ -43,9 +44,17 @@ private[ui] class StageTableBase(
     <th>Submitted</th>
     <th>Duration</th>
     <th>Tasks: Succeeded/Total</th>
-    <th>Input</th>
-    <th>Shuffle Read</th>
-    <th>Shuffle Write</th>
+    <th><span data-toggle="tooltip" title={ToolTips.INPUT}>Input</span></th>
+    <th><span data-toggle="tooltip" title={ToolTips.OUTPUT}>Output</span></th>
+    <th><span data-toggle="tooltip" title={ToolTips.SHUFFLE_READ}>Shuffle Read</span></th>
+    <th>
+      <!-- Place the shuffle write tooltip on the left (rather than the default position
+        of on top) because the shuffle write column is the last column on the right side and
+        the tooltip is wider than the column, so it doesn't fit on top. -->
+      <span data-toggle="tooltip" data-placement="left" title={ToolTips.SHUFFLE_WRITE}>
+        Shuffle Write
+      </span>
+    </th>
   }
 
   def toNodeSeq: Seq[Node] = {
@@ -64,14 +73,14 @@ private[ui] class StageTableBase(
     </table>
   }
 
-  private def makeProgressBar(started: Int, completed: Int, failed: String, total: Int): Seq[Node] =
+  private def makeProgressBar(started: Int, completed: Int, failed: Int, total: Int): Seq[Node] =
   {
     val completeWidth = "width: %s%%".format((completed.toDouble/total)*100)
     val startWidth = "width: %s%%".format((started.toDouble/total)*100)
 
     <div class="progress">
       <span style="text-align:center; position:absolute; width:100%; left:0;">
-        {completed}/{total} {failed}
+        {completed}/{total} { if (failed > 0) s"($failed failed)" else "" }
       </span>
       <div class="bar bar-completed" style={completeWidth}></div>
       <div class="bar bar-running" style={startWidth}></div>
@@ -81,32 +90,56 @@ private[ui] class StageTableBase(
   private def makeDescription(s: StageInfo): Seq[Node] = {
     // scalastyle:off
     val killLink = if (killEnabled) {
+      val killLinkUri = "%s/stages/stage/kill?id=%s&terminate=true"
+        .format(UIUtils.prependBaseUri(parent.basePath), s.stageId)
+      val confirm = "return window.confirm('Are you sure you want to kill stage %s ?');"
+        .format(s.stageId)
       <span class="kill-link">
-        (<a href={"%s/stages/stage/kill?id=%s&terminate=true".format(UIUtils.prependBaseUri(basePath), s.stageId)}>kill</a>)
+        (<a href={killLinkUri} onclick={confirm}>kill</a>)
       </span>
     }
     // scalastyle:on
 
-    val nameLink =
-      <a href={"%s/stages/stage?id=%s".format(UIUtils.prependBaseUri(basePath), s.stageId)}>
-        {s.name}
-      </a>
+    val nameLinkUri ="%s/stages/stage?id=%s&attempt=%s"
+      .format(UIUtils.prependBaseUri(parent.basePath), s.stageId, s.attemptId)
+    val nameLink = <a href={nameLinkUri}>{s.name}</a>
 
+    val cachedRddInfos = s.rddInfos.filter(_.numCachedPartitions > 0)
     val details = if (s.details.nonEmpty) {
       <span onclick="this.parentNode.querySelector('.stage-details').classList.toggle('collapsed')"
             class="expand-details">
-        +show details
-      </span>
-      <pre class="stage-details collapsed">{s.details}</pre>
+        +details
+      </span> ++
+      <div class="stage-details collapsed">
+        {if (cachedRddInfos.nonEmpty) {
+          Text("RDD: ") ++
+          // scalastyle:off
+          cachedRddInfos.map { i =>
+            <a href={"%s/storage/rdd?id=%d".format(UIUtils.prependBaseUri(parent.basePath), i.id)}>{i.name}</a>
+          }
+          // scalastyle:on
+        }}
+        <pre>{s.details}</pre>
+      </div>
     }
 
-    listener.stageIdToDescription.get(s.stageId)
-      .map(d => <div><em>{d}</em></div><div>{nameLink} {killLink}</div>)
-      .getOrElse(<div>{killLink} {nameLink} {details}</div>)
+    val stageDesc = for {
+      stageData <- listener.stageIdToData.get((s.stageId, s.attemptId))
+      desc <- stageData.description
+    } yield {
+      <div><em>{desc}</em></div>
+    }
+
+    <div>{stageDesc.getOrElse("")} {killLink} {nameLink} {details}</div>
   }
 
   protected def stageRow(s: StageInfo): Seq[Node] = {
-    val poolName = listener.stageIdToPool.get(s.stageId)
+    val stageDataOption = listener.stageIdToData.get((s.stageId, s.attemptId))
+    if (stageDataOption.isEmpty) {
+      return <td>{s.stageId}</td><td>No data available for this stage</td>
+    }
+
+    val stageData = stageDataOption.get
     val submissionTime = s.submissionTime match {
       case Some(t) => UIUtils.formatDate(new Date(t))
       case None => "Unknown"
@@ -116,49 +149,44 @@ private[ui] class StageTableBase(
       if (finishTime > t) finishTime - t else System.currentTimeMillis - t
     }
     val formattedDuration = duration.map(d => UIUtils.formatDuration(d)).getOrElse("Unknown")
-    val startedTasks =
-      listener.stageIdToTasksActive.getOrElse(s.stageId, HashMap[Long, TaskInfo]()).size
-    val completedTasks = listener.stageIdToTasksComplete.getOrElse(s.stageId, 0)
-    val failedTasks = listener.stageIdToTasksFailed.getOrElse(s.stageId, 0) match {
-      case f if f > 0 => "(%s failed)".format(f)
-      case _ => ""
-    }
-    val totalTasks = s.numTasks
-    val inputSortable = listener.stageIdToInputBytes.getOrElse(s.stageId, 0L)
-    val inputRead = inputSortable match {
-      case 0 => ""
-      case b => Utils.bytesToString(b)
-    }
-    val shuffleReadSortable = listener.stageIdToShuffleRead.getOrElse(s.stageId, 0L)
-    val shuffleRead = shuffleReadSortable match {
-      case 0 => ""
-      case b => Utils.bytesToString(b)
-    }
-    val shuffleWriteSortable = listener.stageIdToShuffleWrite.getOrElse(s.stageId, 0L)
-    val shuffleWrite = shuffleWriteSortable match {
-      case 0 => ""
-      case b => Utils.bytesToString(b)
-    }
-    <td>{s.stageId}</td> ++
+
+    val inputRead = stageData.inputBytes
+    val inputReadWithUnit = if (inputRead > 0) Utils.bytesToString(inputRead) else ""
+    val outputWrite = stageData.outputBytes
+    val outputWriteWithUnit = if (outputWrite > 0) Utils.bytesToString(outputWrite) else ""
+    val shuffleRead = stageData.shuffleReadBytes
+    val shuffleReadWithUnit = if (shuffleRead > 0) Utils.bytesToString(shuffleRead) else ""
+    val shuffleWrite = stageData.shuffleWriteBytes
+    val shuffleWriteWithUnit = if (shuffleWrite > 0) Utils.bytesToString(shuffleWrite) else ""
+
+    {if (s.attemptId > 0) {
+      <td>{s.stageId} (retry {s.attemptId})</td>
+    } else {
+      <td>{s.stageId}</td>
+    }} ++
     {if (isFairScheduler) {
       <td>
         <a href={"%s/stages/pool?poolname=%s"
-          .format(UIUtils.prependBaseUri(basePath), poolName.get)}>
-          {poolName.get}
+          .format(UIUtils.prependBaseUri(parent.basePath), stageData.schedulingPool)}>
+          {stageData.schedulingPool}
         </a>
       </td>
     } else {
       Seq.empty
     }} ++
     <td>{makeDescription(s)}</td>
-    <td valign="middle">{submissionTime}</td>
+    <td sorttable_customkey={s.submissionTime.getOrElse(0).toString} valign="middle">
+      {submissionTime}
+    </td>
     <td sorttable_customkey={duration.getOrElse(-1).toString}>{formattedDuration}</td>
     <td class="progress-cell">
-      {makeProgressBar(startedTasks, completedTasks, failedTasks, totalTasks)}
+      {makeProgressBar(stageData.numActiveTasks, stageData.completedIndices.size,
+        stageData.numFailedTasks, s.numTasks)}
     </td>
-    <td sorttable_customekey={inputSortable.toString}>{inputRead}</td>
-    <td sorttable_customekey={shuffleReadSortable.toString}>{shuffleRead}</td>
-    <td sorttable_customekey={shuffleWriteSortable.toString}>{shuffleWrite}</td>
+    <td sorttable_customkey={inputRead.toString}>{inputReadWithUnit}</td>
+    <td sorttable_customkey={outputWrite.toString}>{outputWriteWithUnit}</td>
+    <td sorttable_customkey={shuffleRead.toString}>{shuffleReadWithUnit}</td>
+    <td sorttable_customkey={shuffleWrite.toString}>{shuffleWriteWithUnit}</td>
   }
 
   /** Render an HTML row that represents a stage */
@@ -175,7 +203,29 @@ private[ui] class FailedStageTable(
 
   override protected def stageRow(s: StageInfo): Seq[Node] = {
     val basicColumns = super.stageRow(s)
-    val failureReason = <td valign="middle"><pre>{s.failureReason.getOrElse("")}</pre></td>
-    basicColumns ++ failureReason
+    val failureReason = s.failureReason.getOrElse("")
+    val isMultiline = failureReason.indexOf('\n') >= 0
+    // Display the first line by default
+    val failureReasonSummary = StringEscapeUtils.escapeHtml4(
+      if (isMultiline) {
+        failureReason.substring(0, failureReason.indexOf('\n'))
+      } else {
+        failureReason
+      })
+    val details = if (isMultiline) {
+      // scalastyle:off
+      <span onclick="this.parentNode.querySelector('.stacktrace-details').classList.toggle('collapsed')"
+            class="expand-details">
+        +details
+      </span> ++
+        <div class="stacktrace-details collapsed">
+          <pre>{failureReason}</pre>
+        </div>
+      // scalastyle:on
+    } else {
+      ""
+    }
+    val failureReasonHtml = <td valign="middle">{failureReasonSummary}{details}</td>
+    basicColumns ++ failureReasonHtml
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/TaskDetailsClassNames.scala b/core/src/main/scala/org/apache/spark/ui/jobs/TaskDetailsClassNames.scala
new file mode 100644
index 0000000000000..eb371bd0ea7ed
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/TaskDetailsClassNames.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+/**
+ * Names of the CSS classes corresponding to each type of task detail. Used to allow users
+ * to optionally show/hide columns.
+ */
+private object TaskDetailsClassNames {
+  val SCHEDULER_DELAY = "scheduler_delay"
+  val GC_TIME = "gc_time"
+  val TASK_DESERIALIZATION_TIME = "deserialization_time"
+  val RESULT_SERIALIZATION_TIME = "serialization_time"
+  val GETTING_RESULT_TIME = "getting_result_time"
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
new file mode 100644
index 0000000000000..2f7d618df5f6f
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import org.apache.spark.JobExecutionStatus
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
+import org.apache.spark.util.collection.OpenHashSet
+
+import scala.collection.mutable.HashMap
+
+private[jobs] object UIData {
+
+  class ExecutorSummary {
+    var taskTime : Long = 0
+    var failedTasks : Int = 0
+    var succeededTasks : Int = 0
+    var inputBytes : Long = 0
+    var outputBytes : Long = 0
+    var shuffleRead : Long = 0
+    var shuffleWrite : Long = 0
+    var memoryBytesSpilled : Long = 0
+    var diskBytesSpilled : Long = 0
+  }
+
+  class JobUIData(
+    var jobId: Int = -1,
+    var stageIds: Seq[Int] = Seq.empty,
+    var jobGroup: Option[String] = None,
+    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN
+  )
+
+  class StageUIData {
+    var numActiveTasks: Int = _
+    var numCompleteTasks: Int = _
+    var completedIndices = new OpenHashSet[Int]()
+    var numFailedTasks: Int = _
+
+    var executorRunTime: Long = _
+
+    var inputBytes: Long = _
+    var outputBytes: Long = _
+    var shuffleReadBytes: Long = _
+    var shuffleWriteBytes: Long = _
+    var memoryBytesSpilled: Long = _
+    var diskBytesSpilled: Long = _
+
+    var schedulingPool: String = ""
+    var description: Option[String] = None
+
+    var accumulables = new HashMap[Long, AccumulableInfo]
+    var taskData = new HashMap[Long, TaskUIData]
+    var executorSummary = new HashMap[String, ExecutorSummary]
+  }
+
+  /**
+   * These are kept mutable and reused throughout a task's lifetime to avoid excessive reallocation.
+   */
+  case class TaskUIData(
+      var taskInfo: TaskInfo,
+      var taskMetrics: Option[TaskMetrics] = None,
+      var errorMessage: Option[String] = None)
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 2155633b8096f..12d23a92878cf 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -27,8 +27,6 @@ import org.apache.spark.util.Utils
 
 /** Page showing storage details for a given RDD */
 private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
-  private val appName = parent.appName
-  private val basePath = parent.basePath
   private val listener = parent.listener
 
   def render(request: HttpServletRequest): Seq[Node] = {
@@ -36,22 +34,24 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
     val storageStatusList = listener.storageStatusList
     val rddInfo = listener.rddInfoList.find(_.id == rddId).getOrElse {
       // Rather than crashing, render an "RDD Not Found" page
-      return UIUtils.headerSparkPage(Seq[Node](), basePath, appName, "RDD Not Found",
-        parent.headerTabs, parent)
+      return UIUtils.headerSparkPage("RDD Not Found", Seq[Node](), parent)
     }
 
     // Worker table
     val workers = storageStatusList.map((rddId, _))
-    val workerTable = UIUtils.listingTable(workerHeader, workerRow, workers)
+    val workerTable = UIUtils.listingTable(workerHeader, workerRow, workers,
+      id = Some("rdd-storage-by-worker-table"))
 
     // Block table
-    val filteredStorageStatusList = StorageUtils.filterStorageStatusByRDD(storageStatusList, rddId)
-    val blockStatuses = filteredStorageStatusList.flatMap(_.blocks).sortWith(_._1.name < _._1.name)
-    val blockLocations = StorageUtils.blockLocationsFromStorageStatus(filteredStorageStatusList)
-    val blocks = blockStatuses.map { case (blockId, status) =>
-      (blockId, status, blockLocations.get(blockId).getOrElse(Seq[String]("Unknown")))
-    }
-    val blockTable = UIUtils.listingTable(blockHeader, blockRow, blocks)
+    val blockLocations = StorageUtils.getRddBlockLocations(rddId, storageStatusList)
+    val blocks = storageStatusList
+      .flatMap(_.rddBlocksById(rddId))
+      .sortWith(_._1.name < _._1.name)
+      .map { case (blockId, status) =>
+        (blockId, status, blockLocations.get(blockId).getOrElse(Seq[String]("Unknown")))
+      }
+    val blockTable = UIUtils.listingTable(blockHeader, blockRow, blocks,
+      id = Some("rdd-storage-by-block-table"))
 
     val content =
       <div class="row-fluid">
@@ -95,8 +95,7 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
         </div>
       </div>;
 
-    UIUtils.headerSparkPage(content, basePath, appName, "RDD Storage Info for " + rddInfo.name,
-      parent.headerTabs, parent)
+    UIUtils.headerSparkPage("RDD Storage Info for " + rddInfo.name, content, parent)
   }
 
   /** Header fields for the worker table */
@@ -119,10 +118,10 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
     <tr>
       <td>{status.blockManagerId.host + ":" + status.blockManagerId.port}</td>
       <td>
-        {Utils.bytesToString(status.memUsedByRDD(rddId))}
+        {Utils.bytesToString(status.memUsedByRdd(rddId))}
         ({Utils.bytesToString(status.memRemaining)} Remaining)
       </td>
-      <td>{Utils.bytesToString(status.diskUsedByRDD(rddId))}</td>
+      <td>{Utils.bytesToString(status.diskUsedByRdd(rddId))}</td>
     </tr>
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
index 9813d9330ac7f..6ced6052d2b18 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
@@ -27,14 +27,12 @@ import org.apache.spark.util.Utils
 
 /** Page showing list of RDD's currently stored in the cluster */
 private[ui] class StoragePage(parent: StorageTab) extends WebUIPage("") {
-  private val appName = parent.appName
-  private val basePath = parent.basePath
   private val listener = parent.listener
 
   def render(request: HttpServletRequest): Seq[Node] = {
     val rdds = listener.rddInfoList
-    val content = UIUtils.listingTable(rddHeader, rddRow, rdds)
-    UIUtils.headerSparkPage(content, basePath, appName, "Storage ", parent.headerTabs, parent)
+    val content = UIUtils.listingTable(rddHeader, rddRow, rdds, id = Some("storage-by-rdd-table"))
+    UIUtils.headerSparkPage("Storage", content, parent)
   }
 
   /** Header fields for the RDD table */
@@ -52,7 +50,7 @@ private[ui] class StoragePage(parent: StorageTab) extends WebUIPage("") {
     // scalastyle:off
     <tr>
       <td>
-        <a href={"%s/storage/rdd?id=%s".format(UIUtils.prependBaseUri(basePath), rdd.id)}>
+        <a href={"%s/storage/rdd?id=%s".format(UIUtils.prependBaseUri(parent.basePath), rdd.id)}>
           {rdd.name}
         </a>
       </td>
@@ -60,9 +58,9 @@ private[ui] class StoragePage(parent: StorageTab) extends WebUIPage("") {
       </td>
       <td>{rdd.numCachedPartitions}</td>
       <td>{"%.0f%%".format(rdd.numCachedPartitions * 100.0 / rdd.numPartitions)}</td>
-      <td sorttable_customekey={rdd.memSize.toString}>{Utils.bytesToString(rdd.memSize)}</td>
-      <td sorttable_customekey={rdd.tachyonSize.toString}>{Utils.bytesToString(rdd.tachyonSize)}</td>
-      <td sorttable_customekey={rdd.diskSize.toString} >{Utils.bytesToString(rdd.diskSize)}</td>
+      <td sorttable_customkey={rdd.memSize.toString}>{Utils.bytesToString(rdd.memSize)}</td>
+      <td sorttable_customkey={rdd.tachyonSize.toString}>{Utils.bytesToString(rdd.tachyonSize)}</td>
+      <td sorttable_customkey={rdd.diskSize.toString} >{Utils.bytesToString(rdd.diskSize)}</td>
     </tr>
     // scalastyle:on
   }
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
index 0cc0cf3117173..a81291d505583 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
@@ -25,14 +25,11 @@ import org.apache.spark.scheduler._
 import org.apache.spark.storage._
 
 /** Web UI showing storage status of all RDD's in the given SparkContext. */
-private[ui] class StorageTab(parent: SparkUI) extends WebUITab(parent, "storage") {
-  val appName = parent.appName
-  val basePath = parent.basePath
-  val listener = new StorageListener(parent.storageStatusListener)
+private[ui] class StorageTab(parent: SparkUI) extends SparkUITab(parent, "storage") {
+  val listener = parent.storageListener
 
   attachPage(new StoragePage(this))
   attachPage(new RDDPage(this))
-  parent.registerListener(listener)
 }
 
 /**
@@ -41,19 +38,18 @@ private[ui] class StorageTab(parent: SparkUI) extends WebUITab(parent, "storage"
  */
 @DeveloperApi
 class StorageListener(storageStatusListener: StorageStatusListener) extends SparkListener {
-  private val _rddInfoMap = mutable.Map[Int, RDDInfo]()
+  private[ui] val _rddInfoMap = mutable.Map[Int, RDDInfo]() // exposed for testing
 
   def storageStatusList = storageStatusListener.storageStatusList
 
   /** Filter RDD info to include only those with cached partitions */
   def rddInfoList = _rddInfoMap.values.filter(_.numCachedPartitions > 0).toSeq
 
-  /** Update each RDD's info to reflect any updates to the RDD's storage status */
-  private def updateRDDInfo(updatedBlocks: Seq[(BlockId, BlockStatus)] = Seq.empty) {
-    val rddInfos = _rddInfoMap.values.toSeq
-    val updatedRddInfos =
-      StorageUtils.rddInfoFromStorageStatus(storageStatusList, rddInfos, updatedBlocks)
-    updatedRddInfos.foreach { info => _rddInfoMap(info.id) = info }
+  /** Update the storage info of the RDDs whose blocks are among the given updated blocks */
+  private def updateRDDInfo(updatedBlocks: Seq[(BlockId, BlockStatus)]): Unit = {
+    val rddIdsToUpdate = updatedBlocks.flatMap { case (bid, _) => bid.asRDDId.map(_.rddId) }.toSet
+    val rddInfosToUpdate = _rddInfoMap.values.toSeq.filter { s => rddIdsToUpdate.contains(s.id) }
+    StorageUtils.updateRddInfo(rddInfosToUpdate, storageStatusList)
   }
 
   /**
@@ -73,8 +69,11 @@ class StorageListener(storageStatusListener: StorageStatusListener) extends Spar
   }
 
   override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) = synchronized {
-    // Remove all partitions that are no longer cached
-    _rddInfoMap.retain { case (_, info) => info.numCachedPartitions > 0 }
+    // Remove all partitions that are no longer cached in current completed stage
+    val completedRddIds = stageCompleted.stageInfo.rddInfos.map(r => r.id).toSet
+    _rddInfoMap.retain { case (id, info) =>
+      !completedRddIds.contains(id) || info.numCachedPartitions > 0
+    }
   }
 
   override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD) = synchronized {
diff --git a/core/src/main/scala/org/apache/spark/util/ActorLogReceive.scala b/core/src/main/scala/org/apache/spark/util/ActorLogReceive.scala
new file mode 100644
index 0000000000000..332d0cbb2dc0c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/ActorLogReceive.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import akka.actor.Actor
+import org.slf4j.Logger
+
+/**
+ * A trait to enable logging all Akka actor messages. Here's an example of using this:
+ *
+ * {{{
+ *   class BlockManagerMasterActor extends Actor with ActorLogReceive with Logging {
+ *     ...
+ *     override def receiveWithLogging = {
+ *       case GetLocations(blockId) =>
+ *         sender ! getLocations(blockId)
+ *       ...
+ *     }
+ *     ...
+ *   }
+ * }}}
+ *
+ */
+private[spark] trait ActorLogReceive {
+  self: Actor =>
+
+  override def receive: Actor.Receive = new Actor.Receive {
+
+    private val _receiveWithLogging = receiveWithLogging
+
+    override def isDefinedAt(o: Any): Boolean = _receiveWithLogging.isDefinedAt(o)
+
+    override def apply(o: Any): Unit = {
+      if (log.isDebugEnabled) {
+        log.debug(s"[actor] received message $o from ${self.sender}")
+      }
+      val start = System.nanoTime
+      _receiveWithLogging.apply(o)
+      val timeTaken = (System.nanoTime - start).toDouble / 1000000
+      if (log.isDebugEnabled) {
+        log.debug(s"[actor] handled message ($timeTaken ms) $o from ${self.sender}")
+      }
+    }
+  }
+
+  def receiveWithLogging: Actor.Receive
+
+  protected def log: Logger
+}
diff --git a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
index 9930c717492f2..10010bdfa1a51 100644
--- a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
@@ -18,13 +18,16 @@
 package org.apache.spark.util
 
 import scala.collection.JavaConversions.mapAsJavaMap
+import scala.concurrent.Await
 import scala.concurrent.duration.{Duration, FiniteDuration}
 
-import akka.actor.{ActorSystem, ExtendedActorSystem}
+import akka.actor.{ActorRef, ActorSystem, ExtendedActorSystem}
+import akka.pattern.ask
+
 import com.typesafe.config.ConfigFactory
 import org.apache.log4j.{Level, Logger}
 
-import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkEnv, SparkException}
 
 /**
  * Various utility classes for working with Akka.
@@ -41,14 +44,28 @@ private[spark] object AkkaUtils extends Logging {
    * If indestructible is set to true, the Actor System will continue running in the event
    * of a fatal exception. This is used by [[org.apache.spark.executor.Executor]].
    */
-  def createActorSystem(name: String, host: String, port: Int,
-    conf: SparkConf, securityManager: SecurityManager): (ActorSystem, Int) = {
+  def createActorSystem(
+      name: String,
+      host: String,
+      port: Int,
+      conf: SparkConf,
+      securityManager: SecurityManager): (ActorSystem, Int) = {
+    val startService: Int => (ActorSystem, Int) = { actualPort =>
+      doCreateActorSystem(name, host, actualPort, conf, securityManager)
+    }
+    Utils.startServiceOnPort(port, startService, name)
+  }
+
+  private def doCreateActorSystem(
+      name: String,
+      host: String,
+      port: Int,
+      conf: SparkConf,
+      securityManager: SecurityManager): (ActorSystem, Int) = {
 
     val akkaThreads   = conf.getInt("spark.akka.threads", 4)
     val akkaBatchSize = conf.getInt("spark.akka.batchSize", 15)
-
     val akkaTimeout = conf.getInt("spark.akka.timeout", 100)
-
     val akkaFrameSize = maxFrameSizeBytes(conf)
     val akkaLogLifecycleEvents = conf.getBoolean("spark.akka.logLifecycleEvents", false)
     val lifecycleEvents = if (akkaLogLifecycleEvents) "on" else "off"
@@ -60,7 +77,7 @@ private[spark] object AkkaUtils extends Logging {
 
     val logAkkaConfig = if (conf.getBoolean("spark.akka.logAkkaConfig", false)) "on" else "off"
 
-    val akkaHeartBeatPauses = conf.getInt("spark.akka.heartbeat.pauses", 600)
+    val akkaHeartBeatPauses = conf.getInt("spark.akka.heartbeat.pauses", 6000)
     val akkaFailureDetector =
       conf.getDouble("spark.akka.failure-detector.threshold", 300.0)
     val akkaHeartBeatInterval = conf.getInt("spark.akka.heartbeat.interval", 1000)
@@ -124,4 +141,89 @@ private[spark] object AkkaUtils extends Logging {
 
   /** Space reserved for extra data in an Akka message besides serialized task or task result. */
   val reservedSizeBytes = 200 * 1024
+
+  /** Returns the configured number of times to retry connecting */
+  def numRetries(conf: SparkConf): Int = {
+    conf.getInt("spark.akka.num.retries", 3)
+  }
+
+  /** Returns the configured number of milliseconds to wait on each retry */
+  def retryWaitMs(conf: SparkConf): Int = {
+    conf.getInt("spark.akka.retry.wait", 3000)
+  }
+
+  /**
+   * Send a message to the given actor and get its result within a default timeout, or
+   * throw a SparkException if this fails.
+   */
+  def askWithReply[T](
+      message: Any,
+      actor: ActorRef,
+      timeout: FiniteDuration): T = {
+    askWithReply[T](message, actor, maxAttempts = 1, retryInterval = Int.MaxValue, timeout)
+  }
+
+  /**
+   * Send a message to the given actor and get its result within a default timeout, or
+   * throw a SparkException if this fails even after the specified number of retries.
+   */
+  def askWithReply[T](
+      message: Any,
+      actor: ActorRef,
+      maxAttempts: Int,
+      retryInterval: Int,
+      timeout: FiniteDuration): T = {
+    // TODO: Consider removing multiple attempts
+    if (actor == null) {
+      throw new SparkException("Error sending message as actor is null " +
+        "[message = " + message + "]")
+    }
+    var attempts = 0
+    var lastException: Exception = null
+    while (attempts < maxAttempts) {
+      attempts += 1
+      try {
+        val future = actor.ask(message)(timeout)
+        val result = Await.result(future, timeout)
+        if (result == null) {
+          throw new SparkException("Actor returned null")
+        }
+        return result.asInstanceOf[T]
+      } catch {
+        case ie: InterruptedException => throw ie
+        case e: Exception =>
+          lastException = e
+          logWarning("Error sending message in " + attempts + " attempts", e)
+      }
+      Thread.sleep(retryInterval)
+    }
+
+    throw new SparkException(
+      "Error sending message [message = " + message + "]", lastException)
+  }
+
+  def makeDriverRef(name: String, conf: SparkConf, actorSystem: ActorSystem): ActorRef = {
+    val driverActorSystemName = SparkEnv.driverActorSystemName
+    val driverHost: String = conf.get("spark.driver.host", "localhost")
+    val driverPort: Int = conf.getInt("spark.driver.port", 7077)
+    Utils.checkHost(driverHost, "Expected hostname")
+    val url = s"akka.tcp://$driverActorSystemName@$driverHost:$driverPort/user/$name"
+    val timeout = AkkaUtils.lookupTimeout(conf)
+    logInfo(s"Connecting to $name: $url")
+    Await.result(actorSystem.actorSelection(url).resolveOne(timeout), timeout)
+  }
+
+  def makeExecutorRef(
+      name: String,
+      conf: SparkConf,
+      host: String,
+      port: Int,
+      actorSystem: ActorSystem): ActorRef = {
+    val executorActorSystemName = SparkEnv.executorActorSystemName
+    Utils.checkHost(host, "Expected hostname")
+    val url = s"akka.tcp://$executorActorSystemName@$host:$port/user/$name"
+    val timeout = AkkaUtils.lookupTimeout(conf)
+    logInfo(s"Connecting to $name: $url")
+    Await.result(actorSystem.actorSelection(url).resolveOne(timeout), timeout)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/CollectionsUtil.scala b/core/src/main/scala/org/apache/spark/util/CollectionsUtil.scala
deleted file mode 100644
index e4c254b9dd6b9..0000000000000
--- a/core/src/main/scala/org/apache/spark/util/CollectionsUtil.scala
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util
-
-import java.util
-
-import scala.Array
-import scala.reflect._
-
-private[spark] object CollectionsUtils {
-  def makeBinarySearch[K : Ordering : ClassTag] : (Array[K], K) => Int = {
-    classTag[K] match {
-      case ClassTag.Float =>
-        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Float]], x.asInstanceOf[Float])
-      case ClassTag.Double =>
-        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Double]], x.asInstanceOf[Double])
-      case ClassTag.Byte =>
-        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Byte]], x.asInstanceOf[Byte])
-      case ClassTag.Char =>
-        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Char]], x.asInstanceOf[Char])
-      case ClassTag.Short =>
-        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Short]], x.asInstanceOf[Short])
-      case ClassTag.Int =>
-        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Int]], x.asInstanceOf[Int])
-      case ClassTag.Long =>
-        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Long]], x.asInstanceOf[Long])
-      case _ =>
-        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[AnyRef]], x)
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/util/CollectionsUtils.scala b/core/src/main/scala/org/apache/spark/util/CollectionsUtils.scala
new file mode 100644
index 0000000000000..85da2842e8ddb
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/CollectionsUtils.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util
+
+import scala.reflect.{classTag, ClassTag}
+
+private[spark] object CollectionsUtils {
+  def makeBinarySearch[K : Ordering : ClassTag] : (Array[K], K) => Int = {
+    // For primitive keys, we can use the natural ordering. Otherwise, use the Ordering comparator.
+    classTag[K] match {
+      case ClassTag.Float =>
+        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Float]], x.asInstanceOf[Float])
+      case ClassTag.Double =>
+        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Double]], x.asInstanceOf[Double])
+      case ClassTag.Byte =>
+        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Byte]], x.asInstanceOf[Byte])
+      case ClassTag.Char =>
+        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Char]], x.asInstanceOf[Char])
+      case ClassTag.Short =>
+        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Short]], x.asInstanceOf[Short])
+      case ClassTag.Int =>
+        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Int]], x.asInstanceOf[Int])
+      case ClassTag.Long =>
+        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Long]], x.asInstanceOf[Long])
+      case _ =>
+        val comparator = implicitly[Ordering[K]].asInstanceOf[java.util.Comparator[Any]]
+        (l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[AnyRef]], x, comparator)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
index b6a099825f01b..390310243ee0a 100644
--- a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
@@ -25,10 +25,13 @@ private[spark]
 // scalastyle:off
 abstract class CompletionIterator[ +A, +I <: Iterator[A]](sub: I) extends Iterator[A] {
 // scalastyle:on
+
+  private[this] var completed = false
   def next() = sub.next()
   def hasNext = {
     val r = sub.hasNext
-    if (!r) {
+    if (!r && !completed) {
+      completed = true
       completion()
     }
     r
diff --git a/core/src/main/scala/org/apache/spark/util/FileLogger.scala b/core/src/main/scala/org/apache/spark/util/FileLogger.scala
index 6a95dc06e155d..fdc73f08261a6 100644
--- a/core/src/main/scala/org/apache/spark/util/FileLogger.scala
+++ b/core/src/main/scala/org/apache/spark/util/FileLogger.scala
@@ -41,18 +41,55 @@ import org.apache.spark.io.CompressionCodec
 private[spark] class FileLogger(
     logDir: String,
     sparkConf: SparkConf,
-    hadoopConf: Configuration = SparkHadoopUtil.get.newConfiguration(),
+    hadoopConf: Configuration,
     outputBufferSize: Int = 8 * 1024, // 8 KB
     compress: Boolean = false,
     overwrite: Boolean = true,
     dirPermissions: Option[FsPermission] = None)
   extends Logging {
 
+  def this(
+      logDir: String,
+      sparkConf: SparkConf,
+      compress: Boolean,
+      overwrite: Boolean) = {
+    this(logDir, sparkConf, SparkHadoopUtil.get.newConfiguration(sparkConf), compress = compress,
+      overwrite = overwrite)
+  }
+
+  def this(
+      logDir: String,
+      sparkConf: SparkConf,
+      compress: Boolean) = {
+    this(logDir, sparkConf, SparkHadoopUtil.get.newConfiguration(sparkConf), compress = compress,
+      overwrite = true)
+  }
+
+  def this(
+      logDir: String,
+      sparkConf: SparkConf) = {
+    this(logDir, sparkConf, SparkHadoopUtil.get.newConfiguration(sparkConf), compress = false,
+      overwrite = true)
+  }
+
   private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
     override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
   }
 
-  private val fileSystem = Utils.getHadoopFileSystem(new URI(logDir))
+  /**
+   * To avoid effects of FileSystem#close or FileSystem.closeAll called from other modules,
+   * create unique FileSystem instance only for FileLogger
+   */
+  private val fileSystem = {
+    val conf = SparkHadoopUtil.get.newConfiguration(sparkConf)
+    val logUri = new URI(logDir)
+    val scheme = logUri.getScheme
+    if (scheme == "hdfs") {
+      conf.setBoolean("fs.hdfs.impl.disable.cache", true)
+    }
+    FileSystem.get(logUri, conf)
+  }
+
   var fileIndex = 0
 
   // Only used if compression is enabled
@@ -196,6 +233,5 @@ private[spark] class FileLogger(
   def stop() {
     hadoopDataStream.foreach(_.close())
     writer.foreach(_.close())
-    fileSystem.close()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 47eb44b530379..7e536edfe807b 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -26,8 +26,7 @@ import org.json4s.DefaultFormats
 import org.json4s.JsonDSL._
 import org.json4s.JsonAST._
 
-import org.apache.spark.executor.{DataReadMethod, InputMetrics, ShuffleReadMetrics,
-  ShuffleWriteMetrics, TaskMetrics}
+import org.apache.spark.executor._
 import org.apache.spark.scheduler._
 import org.apache.spark.storage._
 import org.apache.spark._
@@ -70,8 +69,9 @@ private[spark] object JsonProtocol {
       case applicationEnd: SparkListenerApplicationEnd =>
         applicationEndToJson(applicationEnd)
 
-      // Not used, but keeps compiler happy
+      // These aren't used, but keeps compiler happy
       case SparkListenerShutdown => JNothing
+      case SparkListenerExecutorMetricsUpdate(_, _) => JNothing
     }
   }
 
@@ -93,6 +93,7 @@ private[spark] object JsonProtocol {
     val taskInfo = taskStart.taskInfo
     ("Event" -> Utils.getFormattedClassName(taskStart)) ~
     ("Stage ID" -> taskStart.stageId) ~
+    ("Stage Attempt ID" -> taskStart.stageAttemptId) ~
     ("Task Info" -> taskInfoToJson(taskInfo))
   }
 
@@ -109,6 +110,7 @@ private[spark] object JsonProtocol {
     val taskMetricsJson = if (taskMetrics != null) taskMetricsToJson(taskMetrics) else JNothing
     ("Event" -> Utils.getFormattedClassName(taskEnd)) ~
     ("Stage ID" -> taskEnd.stageId) ~
+    ("Stage Attempt ID" -> taskEnd.stageAttemptId) ~
     ("Task Type" -> taskEnd.taskType) ~
     ("Task End Reason" -> taskEndReason) ~
     ("Task Info" -> taskInfoToJson(taskInfo)) ~
@@ -147,13 +149,15 @@ private[spark] object JsonProtocol {
     val blockManagerId = blockManagerIdToJson(blockManagerAdded.blockManagerId)
     ("Event" -> Utils.getFormattedClassName(blockManagerAdded)) ~
     ("Block Manager ID" -> blockManagerId) ~
-    ("Maximum Memory" -> blockManagerAdded.maxMem)
+    ("Maximum Memory" -> blockManagerAdded.maxMem) ~
+    ("Timestamp" -> blockManagerAdded.time)
   }
 
   def blockManagerRemovedToJson(blockManagerRemoved: SparkListenerBlockManagerRemoved): JValue = {
     val blockManagerId = blockManagerIdToJson(blockManagerRemoved.blockManagerId)
     ("Event" -> Utils.getFormattedClassName(blockManagerRemoved)) ~
-    ("Block Manager ID" -> blockManagerId)
+    ("Block Manager ID" -> blockManagerId) ~
+    ("Timestamp" -> blockManagerRemoved.time)
   }
 
   def unpersistRDDToJson(unpersistRDD: SparkListenerUnpersistRDD): JValue = {
@@ -164,6 +168,7 @@ private[spark] object JsonProtocol {
   def applicationStartToJson(applicationStart: SparkListenerApplicationStart): JValue = {
     ("Event" -> Utils.getFormattedClassName(applicationStart)) ~
     ("App Name" -> applicationStart.appName) ~
+    ("App ID" -> applicationStart.appId.map(JString(_)).getOrElse(JNothing)) ~
     ("Timestamp" -> applicationStart.time) ~
     ("User" -> applicationStart.sparkUser)
   }
@@ -184,13 +189,16 @@ private[spark] object JsonProtocol {
     val completionTime = stageInfo.completionTime.map(JInt(_)).getOrElse(JNothing)
     val failureReason = stageInfo.failureReason.map(JString(_)).getOrElse(JNothing)
     ("Stage ID" -> stageInfo.stageId) ~
+    ("Stage Attempt ID" -> stageInfo.attemptId) ~
     ("Stage Name" -> stageInfo.name) ~
     ("Number of Tasks" -> stageInfo.numTasks) ~
     ("RDD Info" -> rddInfo) ~
     ("Details" -> stageInfo.details) ~
     ("Submission Time" -> submissionTime) ~
     ("Completion Time" -> completionTime) ~
-    ("Failure Reason" -> failureReason)
+    ("Failure Reason" -> failureReason) ~
+    ("Accumulables" -> JArray(
+        stageInfo.accumulables.values.map(accumulableInfoToJson).toList))
   }
 
   def taskInfoToJson(taskInfo: TaskInfo): JValue = {
@@ -204,7 +212,15 @@ private[spark] object JsonProtocol {
     ("Speculative" -> taskInfo.speculative) ~
     ("Getting Result Time" -> taskInfo.gettingResultTime) ~
     ("Finish Time" -> taskInfo.finishTime) ~
-    ("Failed" -> taskInfo.failed)
+    ("Failed" -> taskInfo.failed) ~
+    ("Accumulables" -> JArray(taskInfo.accumulables.map(accumulableInfoToJson).toList))
+  }
+
+  def accumulableInfoToJson(accumulableInfo: AccumulableInfo): JValue = {
+    ("ID" -> accumulableInfo.id) ~
+    ("Name" -> accumulableInfo.name) ~
+    ("Update" -> accumulableInfo.update.map(new JString(_)).getOrElse(JNothing)) ~
+    ("Value" -> accumulableInfo.value)
   }
 
   def taskMetricsToJson(taskMetrics: TaskMetrics): JValue = {
@@ -214,6 +230,8 @@ private[spark] object JsonProtocol {
       taskMetrics.shuffleWriteMetrics.map(shuffleWriteMetricsToJson).getOrElse(JNothing)
     val inputMetrics =
       taskMetrics.inputMetrics.map(inputMetricsToJson).getOrElse(JNothing)
+    val outputMetrics =
+      taskMetrics.outputMetrics.map(outputMetricsToJson).getOrElse(JNothing)
     val updatedBlocks =
       taskMetrics.updatedBlocks.map { blocks =>
         JArray(blocks.toList.map { case (id, status) =>
@@ -232,12 +250,11 @@ private[spark] object JsonProtocol {
     ("Shuffle Read Metrics" -> shuffleReadMetrics) ~
     ("Shuffle Write Metrics" -> shuffleWriteMetrics) ~
     ("Input Metrics" -> inputMetrics) ~
+    ("Output Metrics" -> outputMetrics) ~
     ("Updated Blocks" -> updatedBlocks)
   }
 
   def shuffleReadMetricsToJson(shuffleReadMetrics: ShuffleReadMetrics): JValue = {
-    ("Shuffle Finish Time" -> shuffleReadMetrics.shuffleFinishTime) ~
-    ("Total Blocks Fetched" -> shuffleReadMetrics.totalBlocksFetched) ~
     ("Remote Blocks Fetched" -> shuffleReadMetrics.remoteBlocksFetched) ~
     ("Local Blocks Fetched" -> shuffleReadMetrics.localBlocksFetched) ~
     ("Fetch Wait Time" -> shuffleReadMetrics.fetchWaitTime) ~
@@ -254,22 +271,32 @@ private[spark] object JsonProtocol {
     ("Bytes Read" -> inputMetrics.bytesRead)
   }
 
+  def outputMetricsToJson(outputMetrics: OutputMetrics): JValue = {
+    ("Data Write Method" -> outputMetrics.writeMethod.toString) ~
+    ("Bytes Written" -> outputMetrics.bytesWritten)
+  }
+
   def taskEndReasonToJson(taskEndReason: TaskEndReason): JValue = {
     val reason = Utils.getFormattedClassName(taskEndReason)
-    val json = taskEndReason match {
+    val json: JObject = taskEndReason match {
       case fetchFailed: FetchFailed =>
-        val blockManagerAddress = blockManagerIdToJson(fetchFailed.bmAddress)
+        val blockManagerAddress = Option(fetchFailed.bmAddress).
+          map(blockManagerIdToJson).getOrElse(JNothing)
         ("Block Manager Address" -> blockManagerAddress) ~
         ("Shuffle ID" -> fetchFailed.shuffleId) ~
         ("Map ID" -> fetchFailed.mapId) ~
-        ("Reduce ID" -> fetchFailed.reduceId)
+        ("Reduce ID" -> fetchFailed.reduceId) ~
+        ("Message" -> fetchFailed.message)
       case exceptionFailure: ExceptionFailure =>
         val stackTrace = stackTraceToJson(exceptionFailure.stackTrace)
         val metrics = exceptionFailure.metrics.map(taskMetricsToJson).getOrElse(JNothing)
         ("Class Name" -> exceptionFailure.className) ~
         ("Description" -> exceptionFailure.description) ~
         ("Stack Trace" -> stackTrace) ~
+        ("Full Stack Trace" -> exceptionFailure.fullStackTrace) ~
         ("Metrics" -> metrics)
+      case ExecutorLostFailure(executorId) =>
+        ("Executor ID" -> executorId)
       case _ => Utils.emptyJson
     }
     ("Reason" -> reason) ~ json
@@ -278,8 +305,7 @@ private[spark] object JsonProtocol {
   def blockManagerIdToJson(blockManagerId: BlockManagerId): JValue = {
     ("Executor ID" -> blockManagerId.executorId) ~
     ("Host" -> blockManagerId.host) ~
-    ("Port" -> blockManagerId.port) ~
-    ("Netty Port" -> blockManagerId.nettyPort)
+    ("Port" -> blockManagerId.port)
   }
 
   def jobResultToJson(jobResult: JobResult): JValue = {
@@ -405,8 +431,9 @@ private[spark] object JsonProtocol {
 
   def taskStartFromJson(json: JValue): SparkListenerTaskStart = {
     val stageId = (json \ "Stage ID").extract[Int]
+    val stageAttemptId = (json \ "Stage Attempt ID").extractOpt[Int].getOrElse(0)
     val taskInfo = taskInfoFromJson(json \ "Task Info")
-    SparkListenerTaskStart(stageId, taskInfo)
+    SparkListenerTaskStart(stageId, stageAttemptId, taskInfo)
   }
 
   def taskGettingResultFromJson(json: JValue): SparkListenerTaskGettingResult = {
@@ -416,11 +443,12 @@ private[spark] object JsonProtocol {
 
   def taskEndFromJson(json: JValue): SparkListenerTaskEnd = {
     val stageId = (json \ "Stage ID").extract[Int]
+    val stageAttemptId = (json \ "Stage Attempt ID").extractOpt[Int].getOrElse(0)
     val taskType = (json \ "Task Type").extract[String]
     val taskEndReason = taskEndReasonFromJson(json \ "Task End Reason")
     val taskInfo = taskInfoFromJson(json \ "Task Info")
     val taskMetrics = taskMetricsFromJson(json \ "Task Metrics")
-    SparkListenerTaskEnd(stageId, taskType, taskEndReason, taskInfo, taskMetrics)
+    SparkListenerTaskEnd(stageId, stageAttemptId, taskType, taskEndReason, taskInfo, taskMetrics)
   }
 
   def jobStartFromJson(json: JValue): SparkListenerJobStart = {
@@ -448,12 +476,14 @@ private[spark] object JsonProtocol {
   def blockManagerAddedFromJson(json: JValue): SparkListenerBlockManagerAdded = {
     val blockManagerId = blockManagerIdFromJson(json \ "Block Manager ID")
     val maxMem = (json \ "Maximum Memory").extract[Long]
-    SparkListenerBlockManagerAdded(blockManagerId, maxMem)
+    val time = Utils.jsonOption(json \ "Timestamp").map(_.extract[Long]).getOrElse(-1L)
+    SparkListenerBlockManagerAdded(time, blockManagerId, maxMem)
   }
 
   def blockManagerRemovedFromJson(json: JValue): SparkListenerBlockManagerRemoved = {
     val blockManagerId = blockManagerIdFromJson(json \ "Block Manager ID")
-    SparkListenerBlockManagerRemoved(blockManagerId)
+    val time = Utils.jsonOption(json \ "Timestamp").map(_.extract[Long]).getOrElse(-1L)
+    SparkListenerBlockManagerRemoved(time, blockManagerId)
   }
 
   def unpersistRDDFromJson(json: JValue): SparkListenerUnpersistRDD = {
@@ -462,9 +492,10 @@ private[spark] object JsonProtocol {
 
   def applicationStartFromJson(json: JValue): SparkListenerApplicationStart = {
     val appName = (json \ "App Name").extract[String]
+    val appId = Utils.jsonOption(json \ "App ID").map(_.extract[String])
     val time = (json \ "Timestamp").extract[Long]
     val sparkUser = (json \ "User").extract[String]
-    SparkListenerApplicationStart(appName, time, sparkUser)
+    SparkListenerApplicationStart(appName, appId, time, sparkUser)
   }
 
   def applicationEndFromJson(json: JValue): SparkListenerApplicationEnd = {
@@ -478,18 +509,26 @@ private[spark] object JsonProtocol {
 
   def stageInfoFromJson(json: JValue): StageInfo = {
     val stageId = (json \ "Stage ID").extract[Int]
+    val attemptId = (json \ "Attempt ID").extractOpt[Int].getOrElse(0)
     val stageName = (json \ "Stage Name").extract[String]
     val numTasks = (json \ "Number of Tasks").extract[Int]
-    val rddInfos = (json \ "RDD Info").extract[List[JValue]].map(rddInfoFromJson)
+    val rddInfos = (json \ "RDD Info").extract[List[JValue]].map(rddInfoFromJson(_))
     val details = (json \ "Details").extractOpt[String].getOrElse("")
     val submissionTime = Utils.jsonOption(json \ "Submission Time").map(_.extract[Long])
     val completionTime = Utils.jsonOption(json \ "Completion Time").map(_.extract[Long])
     val failureReason = Utils.jsonOption(json \ "Failure Reason").map(_.extract[String])
+    val accumulatedValues = (json \ "Accumulables").extractOpt[List[JValue]] match {
+      case Some(values) => values.map(accumulableInfoFromJson(_))
+      case None => Seq[AccumulableInfo]()
+    }
 
-    val stageInfo = new StageInfo(stageId, stageName, numTasks, rddInfos, details)
+    val stageInfo = new StageInfo(stageId, attemptId, stageName, numTasks, rddInfos, details)
     stageInfo.submissionTime = submissionTime
     stageInfo.completionTime = completionTime
     stageInfo.failureReason = failureReason
+    for (accInfo <- accumulatedValues) {
+      stageInfo.accumulables(accInfo.id) = accInfo
+    }
     stageInfo
   }
 
@@ -505,15 +544,28 @@ private[spark] object JsonProtocol {
     val gettingResultTime = (json \ "Getting Result Time").extract[Long]
     val finishTime = (json \ "Finish Time").extract[Long]
     val failed = (json \ "Failed").extract[Boolean]
+    val accumulables = (json \ "Accumulables").extractOpt[Seq[JValue]] match {
+      case Some(values) => values.map(accumulableInfoFromJson(_))
+      case None => Seq[AccumulableInfo]()
+    }
 
     val taskInfo =
       new TaskInfo(taskId, index, attempt, launchTime, executorId, host, taskLocality, speculative)
     taskInfo.gettingResultTime = gettingResultTime
     taskInfo.finishTime = finishTime
     taskInfo.failed = failed
+    accumulables.foreach { taskInfo.accumulables += _ }
     taskInfo
   }
 
+  def accumulableInfoFromJson(json: JValue): AccumulableInfo = {
+    val id = (json \ "ID").extract[Long]
+    val name = (json \ "Name").extract[String]
+    val update = Utils.jsonOption(json \ "Update").map(_.extract[String])
+    val value = (json \ "Value").extract[String]
+    AccumulableInfo(id, name, update, value)
+  }
+
   def taskMetricsFromJson(json: JValue): TaskMetrics = {
     if (json == JNothing) {
       return TaskMetrics.empty
@@ -527,12 +579,14 @@ private[spark] object JsonProtocol {
     metrics.resultSerializationTime = (json \ "Result Serialization Time").extract[Long]
     metrics.memoryBytesSpilled = (json \ "Memory Bytes Spilled").extract[Long]
     metrics.diskBytesSpilled = (json \ "Disk Bytes Spilled").extract[Long]
-    metrics.shuffleReadMetrics =
-      Utils.jsonOption(json \ "Shuffle Read Metrics").map(shuffleReadMetricsFromJson)
+    metrics.setShuffleReadMetrics(
+      Utils.jsonOption(json \ "Shuffle Read Metrics").map(shuffleReadMetricsFromJson))
     metrics.shuffleWriteMetrics =
       Utils.jsonOption(json \ "Shuffle Write Metrics").map(shuffleWriteMetricsFromJson)
     metrics.inputMetrics =
       Utils.jsonOption(json \ "Input Metrics").map(inputMetricsFromJson)
+    metrics.outputMetrics =
+      Utils.jsonOption(json \ "Output Metrics").map(outputMetricsFromJson)
     metrics.updatedBlocks =
       Utils.jsonOption(json \ "Updated Blocks").map { value =>
         value.extract[List[JValue]].map { block =>
@@ -546,8 +600,6 @@ private[spark] object JsonProtocol {
 
   def shuffleReadMetricsFromJson(json: JValue): ShuffleReadMetrics = {
     val metrics = new ShuffleReadMetrics
-    metrics.shuffleFinishTime = (json \ "Shuffle Finish Time").extract[Long]
-    metrics.totalBlocksFetched = (json \ "Total Blocks Fetched").extract[Int]
     metrics.remoteBlocksFetched = (json \ "Remote Blocks Fetched").extract[Int]
     metrics.localBlocksFetched = (json \ "Local Blocks Fetched").extract[Int]
     metrics.fetchWaitTime = (json \ "Fetch Wait Time").extract[Long]
@@ -569,6 +621,13 @@ private[spark] object JsonProtocol {
     metrics
   }
 
+  def outputMetricsFromJson(json: JValue): OutputMetrics = {
+    val metrics = new OutputMetrics(
+      DataWriteMethod.withName((json \ "Data Write Method").extract[String]))
+    metrics.bytesWritten = (json \ "Bytes Written").extract[Long]
+    metrics
+  }
+
   def taskEndReasonFromJson(json: JValue): TaskEndReason = {
     val success = Utils.getFormattedClassName(Success)
     val resubmitted = Utils.getFormattedClassName(Resubmitted)
@@ -587,16 +646,22 @@ private[spark] object JsonProtocol {
         val shuffleId = (json \ "Shuffle ID").extract[Int]
         val mapId = (json \ "Map ID").extract[Int]
         val reduceId = (json \ "Reduce ID").extract[Int]
-        new FetchFailed(blockManagerAddress, shuffleId, mapId, reduceId)
+        val message = Utils.jsonOption(json \ "Message").map(_.extract[String])
+        new FetchFailed(blockManagerAddress, shuffleId, mapId, reduceId,
+          message.getOrElse("Unknown reason"))
       case `exceptionFailure` =>
         val className = (json \ "Class Name").extract[String]
         val description = (json \ "Description").extract[String]
         val stackTrace = stackTraceFromJson(json \ "Stack Trace")
+        val fullStackTrace = Utils.jsonOption(json \ "Full Stack Trace").
+          map(_.extract[String]).orNull
         val metrics = Utils.jsonOption(json \ "Metrics").map(taskMetricsFromJson)
-        new ExceptionFailure(className, description, stackTrace, metrics)
+        ExceptionFailure(className, description, stackTrace, fullStackTrace, metrics)
       case `taskResultLost` => TaskResultLost
       case `taskKilled` => TaskKilled
-      case `executorLostFailure` => ExecutorLostFailure
+      case `executorLostFailure` =>
+        val executorId = Utils.jsonOption(json \ "Executor ID").map(_.extract[String])
+        ExecutorLostFailure(executorId.getOrElse("Unknown"))
       case `unknownReason` => UnknownReason
     }
   }
@@ -605,8 +670,7 @@ private[spark] object JsonProtocol {
     val executorId = (json \ "Executor ID").extract[String]
     val host = (json \ "Host").extract[String]
     val port = (json \ "Port").extract[Int]
-    val nettyPort = (json \ "Netty Port").extract[Int]
-    BlockManagerId(executorId, host, port, nettyPort)
+    BlockManagerId(executorId, host, port)
   }
 
   def jobResultFromJson(json: JValue): JobResult = {
diff --git a/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala b/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala
index 2b452ad33b021..770ff9d5ad6ae 100644
--- a/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala
@@ -29,7 +29,7 @@ private[spark]
 class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
   def value = buffer
 
-  private def readObject(in: ObjectInputStream) {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
     val length = in.readInt()
     buffer = ByteBuffer.allocate(length)
     var amountRead = 0
@@ -44,7 +44,7 @@ class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable
     buffer.rewind() // Allow us to read it later
   }
 
-  private def writeObject(out: ObjectOutputStream) {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     out.writeInt(buffer.limit())
     if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
       throw new IOException("Could not fully write buffer to output stream")
diff --git a/core/src/main/scala/org/apache/spark/util/SignalLogger.scala b/core/src/main/scala/org/apache/spark/util/SignalLogger.scala
index d769b54fa2fae..f77488ef3d449 100644
--- a/core/src/main/scala/org/apache/spark/util/SignalLogger.scala
+++ b/core/src/main/scala/org/apache/spark/util/SignalLogger.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.util
 
-import org.apache.commons.lang.SystemUtils
+import org.apache.commons.lang3.SystemUtils
 import org.slf4j.Logger
 import sun.misc.{Signal, SignalHandler}
 
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 08465575309c6..bce3b3afe9aba 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -180,7 +180,7 @@ private[spark] object SizeEstimator extends Logging {
     }
   }
 
-  // Estimat the size of arrays larger than ARRAY_SIZE_FOR_SAMPLING by sampling.
+  // Estimate the size of arrays larger than ARRAY_SIZE_FOR_SAMPLING by sampling.
   private val ARRAY_SIZE_FOR_SAMPLING = 200
   private val ARRAY_SAMPLE_SIZE = 100 // should be lower than ARRAY_SIZE_FOR_SAMPLING
 
diff --git a/core/src/main/scala/org/apache/spark/util/SparkExitCode.scala b/core/src/main/scala/org/apache/spark/util/SparkExitCode.scala
new file mode 100644
index 0000000000000..c93b1cca9f564
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/SparkExitCode.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+private[spark] object SparkExitCode {
+  /** The default uncaught exception handler was reached. */
+  val UNCAUGHT_EXCEPTION = 50
+
+  /** The default uncaught exception handler was called and an exception was encountered while
+      logging the exception. */
+  val UNCAUGHT_EXCEPTION_TWICE = 51
+
+  /** The default uncaught exception handler was reached, and the uncaught exception was an
+      OutOfMemoryError. */
+  val OOM = 52
+
+}
diff --git a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
new file mode 100644
index 0000000000000..ad3db1fbb57ed
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import org.apache.spark.Logging
+
+/**
+ * The default uncaught exception handler for Executors terminates the whole process, to avoid
+ * getting into a bad state indefinitely. Since Executors are relatively lightweight, it's better
+ * to fail fast when things go wrong.
+ */
+private[spark] object SparkUncaughtExceptionHandler
+  extends Thread.UncaughtExceptionHandler with Logging {
+
+  override def uncaughtException(thread: Thread, exception: Throwable) {
+    try {
+      logError("Uncaught exception in thread " + thread, exception)
+
+      // We may have been called from a shutdown hook. If so, we must not call System.exit().
+      // (If we do, we will deadlock.)
+      if (!Utils.inShutdown()) {
+        if (exception.isInstanceOf[OutOfMemoryError]) {
+          System.exit(SparkExitCode.OOM)
+        } else {
+          System.exit(SparkExitCode.UNCAUGHT_EXCEPTION)
+        }
+      }
+    } catch {
+      case oom: OutOfMemoryError => Runtime.getRuntime.halt(SparkExitCode.OOM)
+      case t: Throwable => Runtime.getRuntime.halt(SparkExitCode.UNCAUGHT_EXCEPTION_TWICE)
+    }
+  }
+
+  def uncaughtException(exception: Throwable) {
+    uncaughtException(Thread.currentThread(), exception)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/TaskCompletionListener.scala b/core/src/main/scala/org/apache/spark/util/TaskCompletionListener.scala
new file mode 100644
index 0000000000000..c1b8bf052c0ca
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/TaskCompletionListener.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util.EventListener
+
+import org.apache.spark.TaskContext
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * :: DeveloperApi ::
+ *
+ * Listener providing a callback function to invoke when a task's execution completes.
+ */
+@DeveloperApi
+trait TaskCompletionListener extends EventListener {
+  def onTaskCompletion(context: TaskContext)
+}
diff --git a/core/src/main/scala/org/apache/spark/util/TaskCompletionListenerException.scala b/core/src/main/scala/org/apache/spark/util/TaskCompletionListenerException.scala
new file mode 100644
index 0000000000000..f64e069cd1724
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/TaskCompletionListenerException.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+/**
+ * Exception thrown when there is an exception in
+ * executing the callback in TaskCompletionListener.
+ */
+private[spark]
+class TaskCompletionListenerException(errorMessages: Seq[String]) extends Exception {
+
+  override def getMessage: String = {
+    if (errorMessages.size == 1) {
+      errorMessages.head
+    } else {
+      errorMessages.zipWithIndex.map { case (msg, i) => s"Exception $i: $msg" }.mkString("\n")
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
new file mode 100644
index 0000000000000..d4e0ad93b966a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+/**
+ * Used for shipping per-thread stacktraces from the executors to driver.
+ */
+private[spark] case class ThreadStackTrace(
+  threadId: Long,
+  threadName: String,
+  threadState: Thread.State,
+  stackTrace: String)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index a2454e120a8ab..eb4a598dbf857 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -18,10 +18,11 @@
 package org.apache.spark.util
 
 import java.io._
-import java.net.{InetAddress, Inet4Address, NetworkInterface, URI, URL, URLConnection}
+import java.lang.management.ManagementFactory
+import java.net._
 import java.nio.ByteBuffer
-import java.util.{Locale, Random, UUID}
-import java.util.concurrent.{ConcurrentHashMap, Executors, ThreadPoolExecutor}
+import java.util.concurrent.{ConcurrentHashMap, Executors, ThreadFactory, ThreadPoolExecutor}
+import java.util.{Locale, Properties, Random, UUID}
 
 import scala.collection.JavaConversions._
 import scala.collection.Map
@@ -31,20 +32,27 @@ import scala.reflect.ClassTag
 import scala.util.Try
 import scala.util.control.{ControlThrowable, NonFatal}
 
-import com.google.common.io.Files
+import com.google.common.io.{ByteStreams, Files}
 import com.google.common.util.concurrent.ThreadFactoryBuilder
 import org.apache.commons.lang3.SystemUtils
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
+import org.apache.log4j.PropertyConfigurator
+import org.eclipse.jetty.util.MultiException
 import org.json4s._
-import tachyon.client.{TachyonFile,TachyonFS}
+import tachyon.client.{TachyonFS, TachyonFile}
 
-import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException}
+import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.executor.ExecutorUncaughtExceptionHandler
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
 
 /** CallSite represents a place in user code. It can have a short and a long form. */
-private[spark] case class CallSite(val short: String, val long: String)
+private[spark] case class CallSite(shortForm: String, longForm: String)
+
+private[spark] object CallSite {
+  val SHORT_FORM = "callSite.short"
+  val LONG_FORM = "callSite.long"
+}
 
 /**
  * Various utility methods used by Spark.
@@ -52,11 +60,6 @@ private[spark] case class CallSite(val short: String, val long: String)
 private[spark] object Utils extends Logging {
   val random = new Random()
 
-  def sparkBin(sparkHome: String, which: String): File = {
-    val suffix = if (isWindows) ".cmd" else ""
-    new File(sparkHome + File.separator + "bin", which + suffix)
-  }
-
   /** Serialize an object using Java serialization */
   def serialize[T](o: T): Array[Byte] = {
     val bos = new ByteArrayOutputStream()
@@ -83,7 +86,7 @@ private[spark] object Utils extends Logging {
     ois.readObject.asInstanceOf[T]
   }
 
-  /** Deserialize a Long value (used for {@link org.apache.spark.api.python.PythonPartitioner}) */
+  /** Deserialize a Long value (used for [[org.apache.spark.api.python.PythonPartitioner]]) */
   def deserializeLongValue(bytes: Array[Byte]) : Long = {
     // Note: we assume that we are given a Long value encoded in network (big-endian) byte order
     var result = bytes(7) & 0xFFL
@@ -146,8 +149,11 @@ private[spark] object Utils extends Logging {
     Try { Class.forName(clazz, false, getContextOrSparkClassLoader) }.isSuccess
   }
 
+  /** Preferred alternative to Class.forName(className) */
+  def classForName(className: String) = Class.forName(className, true, getContextOrSparkClassLoader)
+
   /**
-   * Primitive often used when writing {@link java.nio.ByteBuffer} to {@link java.io.DataOutput}.
+   * Primitive often used when writing [[java.nio.ByteBuffer]] to [[java.io.DataOutput]]
    */
   def writeByteBuffer(bb: ByteBuffer, out: ObjectOutput) = {
     if (bb.hasArray) {
@@ -159,32 +165,22 @@ private[spark] object Utils extends Logging {
     }
   }
 
-  def isAlpha(c: Char): Boolean = {
-    (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
-  }
+  private val shutdownDeletePaths = new scala.collection.mutable.HashSet[String]()
+  private val shutdownDeleteTachyonPaths = new scala.collection.mutable.HashSet[String]()
 
-  /** Split a string into words at non-alphabetic characters */
-  def splitWords(s: String): Seq[String] = {
-    val buf = new ArrayBuffer[String]
-    var i = 0
-    while (i < s.length) {
-      var j = i
-      while (j < s.length && isAlpha(s.charAt(j))) {
-        j += 1
-      }
-      if (j > i) {
-        buf += s.substring(i, j)
-      }
-      i = j
-      while (i < s.length && !isAlpha(s.charAt(i))) {
-        i += 1
+  // Add a shutdown hook to delete the temp dirs when the JVM exits
+  Runtime.getRuntime.addShutdownHook(new Thread("delete Spark temp dirs") {
+    override def run(): Unit = Utils.logUncaughtExceptions {
+      logDebug("Shutdown hook called")
+      shutdownDeletePaths.foreach { dirPath =>
+        try {
+          Utils.deleteRecursively(new File(dirPath))
+        } catch {
+          case e: Exception => logError(s"Exception while deleting Spark temp dir: $dirPath", e)
+        }
       }
     }
-    buf
-  }
-
-  private val shutdownDeletePaths = new scala.collection.mutable.HashSet[String]()
-  private val shutdownDeleteTachyonPaths = new scala.collection.mutable.HashSet[String]()
+  })
 
   // Register the path to be deleted via shutdown hook
   def registerShutdownDeleteDir(file: File) {
@@ -270,33 +266,67 @@ private[spark] object Utils extends Logging {
     }
 
     registerShutdownDeleteDir(dir)
-
-    // Add a shutdown hook to delete the temp dir when the JVM exits
-    Runtime.getRuntime.addShutdownHook(new Thread("delete Spark temp dir " + dir) {
-      override def run() {
-        // Attempt to delete if some patch which is parent of this is not already registered.
-        if (! hasRootAsShutdownDeleteDir(dir)) Utils.deleteRecursively(dir)
-      }
-    })
     dir
   }
 
-  /** Copy all data from an InputStream to an OutputStream */
+  /** Copy all data from an InputStream to an OutputStream. NIO way of file stream to file stream
+    * copying is disabled by default unless explicitly set transferToEnabled as true,
+    * the parameter transferToEnabled should be configured by spark.file.transferTo = [true|false].
+    */
   def copyStream(in: InputStream,
                  out: OutputStream,
-                 closeStreams: Boolean = false)
+                 closeStreams: Boolean = false,
+                 transferToEnabled: Boolean = false): Long =
   {
-    val buf = new Array[Byte](8192)
-    var n = 0
-    while (n != -1) {
-      n = in.read(buf)
-      if (n != -1) {
-        out.write(buf, 0, n)
+    var count = 0L
+    try {
+      if (in.isInstanceOf[FileInputStream] && out.isInstanceOf[FileOutputStream]
+        && transferToEnabled) {
+        // When both streams are File stream, use transferTo to improve copy performance.
+        val inChannel = in.asInstanceOf[FileInputStream].getChannel()
+        val outChannel = out.asInstanceOf[FileOutputStream].getChannel()
+        val initialPos = outChannel.position()
+        val size = inChannel.size()
+
+        // In case transferTo method transferred less data than we have required.
+        while (count < size) {
+          count += inChannel.transferTo(count, size - count, outChannel)
+        }
+
+        // Check the position after transferTo loop to see if it is in the right position and
+        // give user information if not.
+        // Position will not be increased to the expected length after calling transferTo in
+        // kernel version 2.6.32, this issue can be seen in
+        // https://bugs.openjdk.java.net/browse/JDK-7052359
+        // This will lead to stream corruption issue when using sort-based shuffle (SPARK-3948).
+        val finalPos = outChannel.position()
+        assert(finalPos == initialPos + size,
+          s"""
+             |Current position $finalPos do not equal to expected position ${initialPos + size}
+             |after transferTo, please check your kernel version to see if it is 2.6.32,
+             |this is a kernel bug which will lead to unexpected behavior when using transferTo.
+             |You can set spark.file.transferTo = false to disable this NIO feature.
+           """.stripMargin)
+      } else {
+        val buf = new Array[Byte](8192)
+        var n = 0
+        while (n != -1) {
+          n = in.read(buf)
+          if (n != -1) {
+            out.write(buf, 0, n)
+            count += n
+          }
+        }
+      }
+      count
+    } finally {
+      if (closeStreams) {
+        try {
+          in.close()
+        } finally {
+          out.close()
+        }
       }
-    }
-    if (closeStreams) {
-      in.close()
-      out.close()
     }
   }
 
@@ -317,20 +347,90 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Download a file requested by the executor. Supports fetching the file in a variety of ways,
+   * Download a file to target directory. Supports fetching the file in a variety of ways,
+   * including HTTP, HDFS and files on a standard filesystem, based on the URL parameter.
+   *
+   * If `useCache` is true, first attempts to fetch the file to a local cache that's shared
+   * across executors running the same application. `useCache` is used mainly for
+   * the executors, and not in local mode.
+   *
+   * Throws SparkException if the target file already exists and has different contents than
+   * the requested file.
+   */
+  def fetchFile(
+      url: String,
+      targetDir: File,
+      conf: SparkConf,
+      securityMgr: SecurityManager,
+      hadoopConf: Configuration,
+      timestamp: Long,
+      useCache: Boolean) {
+    val fileName = url.split("/").last
+    val targetFile = new File(targetDir, fileName)
+    if (useCache) {
+      val cachedFileName = s"${url.hashCode}${timestamp}_cache"
+      val lockFileName = s"${url.hashCode}${timestamp}_lock"
+      val localDir = new File(getLocalDir(conf))
+      val lockFile = new File(localDir, lockFileName)
+      val raf = new RandomAccessFile(lockFile, "rw")
+      // Only one executor entry.
+      // The FileLock is only used to control synchronization for executors download file,
+      // it's always safe regardless of lock type (mandatory or advisory).
+      val lock = raf.getChannel().lock()
+      val cachedFile = new File(localDir, cachedFileName)
+      try {
+        if (!cachedFile.exists()) {
+          doFetchFile(url, localDir, cachedFileName, conf, securityMgr, hadoopConf)
+        }
+      } finally {
+        lock.release()
+      }
+      if (targetFile.exists && !Files.equal(cachedFile, targetFile)) {
+        if (conf.getBoolean("spark.files.overwrite", false)) {
+          targetFile.delete()
+          logInfo((s"File $targetFile exists and does not match contents of $url, " +
+            s"replacing it with $url"))
+        } else {
+          throw new SparkException(s"File $targetFile exists and does not match contents of $url")
+        }
+      }
+      Files.copy(cachedFile, targetFile)
+    } else {
+      doFetchFile(url, targetDir, fileName, conf, securityMgr, hadoopConf)
+    }
+
+    // Decompress the file if it's a .tar or .tar.gz
+    if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
+      logInfo("Untarring " + fileName)
+      Utils.execute(Seq("tar", "-xzf", fileName), targetDir)
+    } else if (fileName.endsWith(".tar")) {
+      logInfo("Untarring " + fileName)
+      Utils.execute(Seq("tar", "-xf", fileName), targetDir)
+    }
+    // Make the file executable - That's necessary for scripts
+    FileUtil.chmod(targetFile.getAbsolutePath, "a+x")
+  }
+
+  /**
+   * Download a file to target directory. Supports fetching the file in a variety of ways,
    * including HTTP, HDFS and files on a standard filesystem, based on the URL parameter.
    *
    * Throws SparkException if the target file already exists and has different contents than
    * the requested file.
    */
-  def fetchFile(url: String, targetDir: File, conf: SparkConf, securityMgr: SecurityManager) {
-    val filename = url.split("/").last
+  private def doFetchFile(
+      url: String,
+      targetDir: File,
+      filename: String,
+      conf: SparkConf,
+      securityMgr: SecurityManager,
+      hadoopConf: Configuration) {
     val tempDir = getLocalDir(conf)
     val tempFile =  File.createTempFile("fetchFileTemp", null, new File(tempDir))
     val targetFile = new File(targetDir, filename)
     val uri = new URI(url)
-    val fileOverwrite = conf.getBoolean("spark.files.overwrite", false)
-    uri.getScheme match {
+    val fileOverwrite = conf.getBoolean("spark.files.overwrite", defaultValue = false)
+    Option(uri.getScheme).getOrElse("file") match {
       case "http" | "https" | "ftp" =>
         logInfo("Fetching " + url + " to " + tempFile)
 
@@ -351,7 +451,7 @@ private[spark] object Utils extends Logging {
         uc.connect()
         val in = uc.getInputStream()
         val out = new FileOutputStream(tempFile)
-        Utils.copyStream(in, out, true)
+        Utils.copyStream(in, out, closeStreams = true)
         if (targetFile.exists && !Files.equal(tempFile, targetFile)) {
           if (fileOverwrite) {
             targetFile.delete()
@@ -364,7 +464,7 @@ private[spark] object Utils extends Logging {
           }
         }
         Files.move(tempFile, targetFile)
-      case "file" | null =>
+      case "file" =>
         // In the case of a local file, copy the local file to the target directory.
         // Note the difference between uri vs url.
         val sourceFile = if (uri.isAbsolute) new File(uri) else new File(url)
@@ -395,10 +495,10 @@ private[spark] object Utils extends Logging {
         }
       case _ =>
         // Use the Hadoop filesystem library, which supports file://, hdfs://, s3://, and others
-        val fs = getHadoopFileSystem(uri)
+        val fs = getHadoopFileSystem(uri, hadoopConf)
         val in = fs.open(new Path(uri))
         val out = new FileOutputStream(tempFile)
-        Utils.copyStream(in, out, true)
+        Utils.copyStream(in, out, closeStreams = true)
         if (targetFile.exists && !Files.equal(tempFile, targetFile)) {
           if (fileOverwrite) {
             targetFile.delete()
@@ -412,25 +512,74 @@ private[spark] object Utils extends Logging {
         }
         Files.move(tempFile, targetFile)
     }
-    // Decompress the file if it's a .tar or .tar.gz
-    if (filename.endsWith(".tar.gz") || filename.endsWith(".tgz")) {
-      logInfo("Untarring " + filename)
-      Utils.execute(Seq("tar", "-xzf", filename), targetDir)
-    } else if (filename.endsWith(".tar")) {
-      logInfo("Untarring " + filename)
-      Utils.execute(Seq("tar", "-xf", filename), targetDir)
-    }
-    // Make the file executable - That's necessary for scripts
-    FileUtil.chmod(targetFile.getAbsolutePath, "a+x")
   }
 
   /**
-   * Get a temporary directory using Spark's spark.local.dir property, if set. This will always
-   * return a single directory, even though the spark.local.dir property might be a list of
-   * multiple paths.
+   * Get the path of a temporary directory.  Spark's local directories can be configured through
+   * multiple settings, which are used with the following precedence:
+   *
+   *   - If called from inside of a YARN container, this will return a directory chosen by YARN.
+   *   - If the SPARK_LOCAL_DIRS environment variable is set, this will return a directory from it.
+   *   - Otherwise, if the spark.local.dir is set, this will return a directory from it.
+   *   - Otherwise, this will return java.io.tmpdir.
+   *
+   * Some of these configuration options might be lists of multiple paths, but this method will
+   * always return a single directory.
    */
   def getLocalDir(conf: SparkConf): String = {
-    conf.get("spark.local.dir",  System.getProperty("java.io.tmpdir")).split(',')(0)
+    getOrCreateLocalRootDirs(conf)(0)
+  }
+
+  private[spark] def isRunningInYarnContainer(conf: SparkConf): Boolean = {
+    // These environment variables are set by YARN.
+    // For Hadoop 0.23.X, we check for YARN_LOCAL_DIRS (we use this below in getYarnLocalDirs())
+    // For Hadoop 2.X, we check for CONTAINER_ID.
+    conf.getenv("CONTAINER_ID") != null || conf.getenv("YARN_LOCAL_DIRS") != null
+  }
+
+  /**
+   * Gets or creates the directories listed in spark.local.dir or SPARK_LOCAL_DIRS,
+   * and returns only the directories that exist / could be created.
+   *
+   * If no directories could be created, this will return an empty list.
+   */
+  private[spark] def getOrCreateLocalRootDirs(conf: SparkConf): Array[String] = {
+    val confValue = if (isRunningInYarnContainer(conf)) {
+      // If we are in yarn mode, systems can have different disk layouts so we must set it
+      // to what Yarn on this system said was available.
+      getYarnLocalDirs(conf)
+    } else {
+      Option(conf.getenv("SPARK_LOCAL_DIRS")).getOrElse(
+        conf.get("spark.local.dir", System.getProperty("java.io.tmpdir")))
+    }
+    val rootDirs = confValue.split(',')
+    logDebug(s"Getting/creating local root dirs at '$confValue'")
+
+    rootDirs.flatMap { rootDir =>
+      val localDir: File = new File(rootDir)
+      val foundLocalDir = localDir.exists || localDir.mkdirs()
+      if (!foundLocalDir) {
+        logError(s"Failed to create local root dir in $rootDir.  Ignoring this directory.")
+        None
+      } else {
+        Some(rootDir)
+      }
+    }
+  }
+
+  /** Get the Yarn approved local directories. */
+  private def getYarnLocalDirs(conf: SparkConf): String = {
+    // Hadoop 0.23 and 2.x have different Environment variable names for the
+    // local dirs, so lets check both. We assume one of the 2 is set.
+    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
+    val localDirs = Option(conf.getenv("YARN_LOCAL_DIRS"))
+      .getOrElse(Option(conf.getenv("LOCAL_DIRS"))
+      .getOrElse(""))
+
+    if (localDirs.isEmpty) {
+      throw new Exception("Yarn Local dirs can't be empty")
+    }
+    localDirs
   }
 
   /**
@@ -472,7 +621,12 @@ private[spark] object Utils extends Logging {
       if (address.isLoopbackAddress) {
         // Address resolves to something like 127.0.1.1, which happens on Debian; try to find
         // a better address using the local network interfaces
-        for (ni <- NetworkInterface.getNetworkInterfaces) {
+        // getNetworkInterfaces returns ifs in reverse order compared to ifconfig output order
+        // on unix-like system. On windows, it returns in index order.
+        // It's more proper to pick ip address following system output order.
+        val activeNetworkIFs = NetworkInterface.getNetworkInterfaces.toList
+        val reOrderedNetworkIFs = if (isWindows) activeNetworkIFs else activeNetworkIFs.reverse
+        for (ni <- reOrderedNetworkIFs) {
           for (addr <- ni.getInetAddresses if !addr.isLinkLocalAddress &&
                !addr.isLoopbackAddress && addr.isInstanceOf[Inet4Address]) {
             // We've found an address that looks reasonable!
@@ -553,19 +707,19 @@ private[spark] object Utils extends Logging {
     new ThreadFactoryBuilder().setDaemon(true)
 
   /**
-   * Wrapper over newCachedThreadPool. Thread names are formatted as prefix-ID, where ID is a
-   * unique, sequentially assigned integer.
+   * Create a thread factory that names threads with a prefix and also sets the threads to daemon.
    */
-  def newDaemonCachedThreadPool(prefix: String): ThreadPoolExecutor = {
-    val threadFactory = daemonThreadFactoryBuilder.setNameFormat(prefix + "-%d").build()
-    Executors.newCachedThreadPool(threadFactory).asInstanceOf[ThreadPoolExecutor]
+  def namedThreadFactory(prefix: String): ThreadFactory = {
+    daemonThreadFactoryBuilder.setNameFormat(prefix + "-%d").build()
   }
 
   /**
-   * Return the string to tell how long has passed in milliseconds.
+   * Wrapper over newCachedThreadPool. Thread names are formatted as prefix-ID, where ID is a
+   * unique, sequentially assigned integer.
    */
-  def getUsedTimeMs(startTimeMs: Long): String = {
-    " " + (System.currentTimeMillis - startTimeMs) + " ms"
+  def newDaemonCachedThreadPool(prefix: String): ThreadPoolExecutor = {
+    val threadFactory = namedThreadFactory(prefix)
+    Executors.newCachedThreadPool(threadFactory).asInstanceOf[ThreadPoolExecutor]
   }
 
   /**
@@ -573,33 +727,60 @@ private[spark] object Utils extends Logging {
    * unique, sequentially assigned integer.
    */
   def newDaemonFixedThreadPool(nThreads: Int, prefix: String): ThreadPoolExecutor = {
-    val threadFactory = daemonThreadFactoryBuilder.setNameFormat(prefix + "-%d").build()
+    val threadFactory = namedThreadFactory(prefix)
     Executors.newFixedThreadPool(nThreads, threadFactory).asInstanceOf[ThreadPoolExecutor]
   }
 
+  /**
+   * Return the string to tell how long has passed in milliseconds.
+   */
+  def getUsedTimeMs(startTimeMs: Long): String = {
+    " " + (System.currentTimeMillis - startTimeMs) + " ms"
+  }
+
   private def listFilesSafely(file: File): Seq[File] = {
-    val files = file.listFiles()
-    if (files == null) {
-      throw new IOException("Failed to list files for dir: " + file)
+    if (file.exists()) {
+      val files = file.listFiles()
+      if (files == null) {
+        throw new IOException("Failed to list files for dir: " + file)
+      }
+      files
+    } else {
+      List()
     }
-    files
   }
 
   /**
    * Delete a file or directory and its contents recursively.
    * Don't follow directories if they are symlinks.
+   * Throws an exception if deletion is unsuccessful.
    */
   def deleteRecursively(file: File) {
     if (file != null) {
-      if ((file.isDirectory) && !isSymlink(file)) {
-        for (child <- listFilesSafely(file)) {
-          deleteRecursively(child)
+      try {
+        if (file.isDirectory && !isSymlink(file)) {
+          var savedIOException: IOException = null
+          for (child <- listFilesSafely(file)) {
+            try {
+              deleteRecursively(child)
+            } catch {
+              // In case of multiple exceptions, only last one will be thrown
+              case ioe: IOException => savedIOException = ioe
+            }
+          }
+          if (savedIOException != null) {
+            throw savedIOException
+          }
+          shutdownDeletePaths.synchronized {
+            shutdownDeletePaths.remove(file.getAbsolutePath)
+          }
         }
-      }
-      if (!file.delete()) {
-        // Delete can also fail if the file simply did not exist
-        if (file.exists()) {
-          throw new IOException("Failed to delete: " + file.getAbsolutePath)
+      } finally {
+        if (!file.delete()) {
+          // Delete can also fail if the file simply did not exist
+          if (file.exists()) {
+            throw new IOException("Failed to delete: " + file.getAbsolutePath)
+          }
         }
       }
     }
@@ -626,26 +807,27 @@ private[spark] object Utils extends Logging {
       new File(file.getParentFile().getCanonicalFile(), file.getName())
     }
 
-    if (fileInCanonicalDir.getCanonicalFile().equals(fileInCanonicalDir.getAbsoluteFile())) {
-      return false
-    } else {
-      return true
-    }
+    !fileInCanonicalDir.getCanonicalFile().equals(fileInCanonicalDir.getAbsoluteFile())
   }
 
   /**
-   * Finds all the files in a directory whose last modified time is older than cutoff seconds.
-   * @param dir  must be the path to a directory, or IllegalArgumentException is thrown
-   * @param cutoff measured in seconds. Files older than this are returned.
+   * Determines if a directory contains any files newer than cutoff seconds.
+   *
+   * @param dir must be the path to a directory, or IllegalArgumentException is thrown
+   * @param cutoff measured in seconds. Returns true if there are any files or directories in the
+   *               given directory whose last modified time is later than this many seconds ago
    */
-  def findOldFiles(dir: File, cutoff: Long): Seq[File] = {
-    val currentTimeMillis = System.currentTimeMillis
-    if (dir.isDirectory) {
-      val files = listFilesSafely(dir)
-      files.filter { file => file.lastModified < (currentTimeMillis - cutoff * 1000) }
-    } else {
-      throw new IllegalArgumentException(dir + " is not a directory!")
+  def doesDirectoryContainAnyNewFiles(dir: File, cutoff: Long): Boolean = {
+    if (!dir.isDirectory) {
+      throw new IllegalArgumentException(s"$dir is not a directory!")
     }
+    val filesAndDirs = dir.listFiles()
+    val cutoffTimeInMillis = System.currentTimeMillis - (cutoff * 1000)
+
+    filesAndDirs.exists(_.lastModified() > cutoffTimeInMillis) ||
+    filesAndDirs.filter(_.isDirectory).exists(
+      subdir => doesDirectoryContainAnyNewFiles(subdir, cutoff)
+    )
   }
 
   /**
@@ -729,7 +911,7 @@ private[spark] object Utils extends Logging {
         .start()
     new Thread("read stdout for " + command(0)) {
       override def run() {
-        for (line <- Source.fromInputStream(process.getInputStream).getLines) {
+        for (line <- Source.fromInputStream(process.getInputStream).getLines()) {
           System.err.println(line)
         }
       }
@@ -740,19 +922,13 @@ private[spark] object Utils extends Logging {
     }
   }
 
-  /**
-   * Execute a command in the current working directory, throwing an exception if it completes
-   * with an exit code other than 0.
-   */
-  def execute(command: Seq[String]) {
-    execute(command, new File("."))
-  }
-
   /**
    * Execute a command and get its output, throwing an exception if it yields a code other than 0.
    */
-  def executeAndGetOutput(command: Seq[String], workingDir: File = new File("."),
-                          extraEnvironment: Map[String, String] = Map.empty): String = {
+  def executeAndGetOutput(
+      command: Seq[String],
+      workingDir: File = new File("."),
+      extraEnvironment: Map[String, String] = Map.empty): String = {
     val builder = new ProcessBuilder(command: _*)
         .directory(workingDir)
     val environment = builder.environment()
@@ -762,7 +938,7 @@ private[spark] object Utils extends Logging {
     val process = builder.start()
     new Thread("read stderr for " + command(0)) {
       override def run() {
-        for (line <- Source.fromInputStream(process.getErrorStream).getLines) {
+        for (line <- Source.fromInputStream(process.getErrorStream).getLines()) {
           System.err.println(line)
         }
       }
@@ -770,7 +946,7 @@ private[spark] object Utils extends Logging {
     val output = new StringBuffer
     val stdoutThread = new Thread("read stdout for " + command(0)) {
       override def run() {
-        for (line <- Source.fromInputStream(process.getInputStream).getLines) {
+        for (line <- Source.fromInputStream(process.getInputStream).getLines()) {
           output.append(line)
         }
       }
@@ -779,7 +955,8 @@ private[spark] object Utils extends Logging {
     val exitCode = process.waitFor()
     stdoutThread.join()   // Wait for it to finish reading output
     if (exitCode != 0) {
-      throw new SparkException("Process " + command + " exited with code " + exitCode)
+      logError(s"Process $command exited with code $exitCode: $output")
+      throw new SparkException(s"Process $command exited with code $exitCode")
     }
     output.toString
   }
@@ -792,24 +969,68 @@ private[spark] object Utils extends Logging {
     try {
       block
     } catch {
-      case t: Throwable => ExecutorUncaughtExceptionHandler.uncaughtException(t)
+      case e: ControlThrowable => throw e
+      case t: Throwable => SparkUncaughtExceptionHandler.uncaughtException(t)
+    }
+  }
+
+  /**
+   * Execute a block of code that evaluates to Unit, re-throwing any non-fatal uncaught
+   * exceptions as IOException.  This is used when implementing Externalizable and Serializable's
+   * read and write methods, since Java's serializer will not report non-IOExceptions properly;
+   * see SPARK-4080 for more context.
+   */
+  def tryOrIOException(block: => Unit) {
+    try {
+      block
+    } catch {
+      case e: IOException => throw e
+      case NonFatal(t) => throw new IOException(t)
     }
   }
 
   /**
-   * A regular expression to match classes of the "core" Spark API that we want to skip when
-   * finding the call site of a method.
+   * Execute a block of code that returns a value, re-throwing any non-fatal uncaught
+   * exceptions as IOException. This is used when implementing Externalizable and Serializable's
+   * read and write methods, since Java's serializer will not report non-IOExceptions properly;
+   * see SPARK-4080 for more context.
    */
-  private val SPARK_CLASS_REGEX = """^org\.apache\.spark(\.api\.java)?(\.util)?(\.rdd)?\.[A-Z]""".r
+  def tryOrIOException[T](block: => T): T = {
+    try {
+      block
+    } catch {
+      case e: IOException => throw e
+      case NonFatal(t) => throw new IOException(t)
+    }
+  }
+
+  /** Default filtering function for finding call sites using `getCallSite`. */
+  private def coreExclusionFunction(className: String): Boolean = {
+    // A regular expression to match classes of the "core" Spark API that we want to skip when
+    // finding the call site of a method.
+    val SPARK_CORE_CLASS_REGEX =
+      """^org\.apache\.spark(\.api\.java)?(\.util)?(\.rdd)?(\.broadcast)?\.[A-Z]""".r
+    val SCALA_CLASS_REGEX = """^scala""".r
+    val isSparkCoreClass = SPARK_CORE_CLASS_REGEX.findFirstIn(className).isDefined
+    val isScalaClass = SCALA_CLASS_REGEX.findFirstIn(className).isDefined
+    // If the class is a Spark internal class or a Scala class, then exclude.
+    isSparkCoreClass || isScalaClass
+  }
 
   /**
    * When called inside a class in the spark package, returns the name of the user code class
    * (outside the spark package) that called into Spark, as well as which Spark method they called.
    * This is used, for example, to tell users where in their code each RDD got created.
+   *
+   * @param skipClass Function that is used to exclude non-user-code classes.
    */
-  def getCallSite: CallSite = {
-    val trace = Thread.currentThread.getStackTrace()
-      .filterNot(_.getMethodName.contains("getStackTrace"))
+  def getCallSite(skipClass: String => Boolean = coreExclusionFunction): CallSite = {
+    val trace = Thread.currentThread.getStackTrace().filterNot { ste: StackTraceElement =>
+      // When running under some profilers, the current stack trace might contain some bogus
+      // frames. This is intended to ensure that we don't crash in these situations by
+      // ignoring any frames that we can't examine.
+      ste == null || ste.getMethodName == null || ste.getMethodName.contains("getStackTrace")
+    }
 
     // Keep crawling up the stack trace until we find the first function not inside of the spark
     // package. We track the last (shallowest) contiguous Spark method. This might be an RDD
@@ -823,7 +1044,7 @@ private[spark] object Utils extends Logging {
 
     for (el <- trace) {
       if (insideSpark) {
-        if (SPARK_CLASS_REGEX.findFirstIn(el.getClassName).isDefined) {
+        if (skipClass(el.getClassName)) {
           lastSparkMethod = if (el.getMethodName == "<init>") {
             // Spark method is a constructor; get its class name
             el.getClassName.substring(el.getClassName.lastIndexOf('.') + 1)
@@ -843,8 +1064,8 @@ private[spark] object Utils extends Logging {
     }
     val callStackDepth = System.getProperty("spark.callstack.depth", "20").toInt
     CallSite(
-      short = "%s at %s:%s".format(lastSparkMethod, firstUserFile, firstUserLine),
-      long = callStack.take(callStackDepth).mkString("\n"))
+      shortForm = s"$lastSparkMethod at $firstUserFile:$firstUserLine",
+      longForm = callStack.take(callStackDepth).mkString("\n"))
   }
 
   /** Return a string containing part of a file from byte 'start' to 'end'. */
@@ -856,9 +1077,12 @@ private[spark] object Utils extends Logging {
     val buff = new Array[Byte]((effectiveEnd-effectiveStart).toInt)
     val stream = new FileInputStream(file)
 
-    stream.skip(effectiveStart)
-    stream.read(buff)
-    stream.close()
+    try {
+      ByteStreams.skipFully(stream, effectiveStart)
+      ByteStreams.readFully(stream, buff)
+    } finally {
+      stream.close()
+    }
     Source.fromBytes(buff).mkString
   }
 
@@ -943,7 +1167,7 @@ private[spark] object Utils extends Logging {
     false
   }
 
-  def isSpace(c: Char): Boolean = {
+  private def isSpace(c: Char): Boolean = {
     " \t\r\n".indexOf(c) != -1
   }
 
@@ -1016,6 +1240,8 @@ private[spark] object Utils extends Logging {
   }
 
   // Handles idiosyncracies with hash (add more as required)
+  // This method should be kept in sync with
+  // org.apache.spark.network.util.JavaUtils#nonNegativeHash().
   def nonNegativeHash(obj: AnyRef): Int = {
 
     // Required ?
@@ -1049,12 +1275,28 @@ private[spark] object Utils extends Logging {
   /**
    * Timing method based on iterations that permit JVM JIT optimization.
    * @param numIters number of iterations
-   * @param f function to be executed
+   * @param f function to be executed. If prepare is not None, the running time of each call to f
+   *          must be an order of magnitude longer than one millisecond for accurate timing.
+   * @param prepare function to be executed before each call to f. Its running time doesn't count.
+   * @return the total time across all iterations (not couting preparation time)
    */
-  def timeIt(numIters: Int)(f: => Unit): Long = {
-    val start = System.currentTimeMillis
-    times(numIters)(f)
-    System.currentTimeMillis - start
+  def timeIt(numIters: Int)(f: => Unit, prepare: Option[() => Unit] = None): Long = {
+    if (prepare.isEmpty) {
+      val start = System.currentTimeMillis
+      times(numIters)(f)
+      System.currentTimeMillis - start
+    } else {
+      var i = 0
+      var sum = 0L
+      while (i < numIters) {
+        prepare.get.apply()
+        val start = System.currentTimeMillis
+        f
+        sum += System.currentTimeMillis - start
+        i += 1
+      }
+      sum
+    }
   }
 
   /**
@@ -1095,7 +1337,7 @@ private[spark] object Utils extends Logging {
     }
     import scala.sys.process._
     (linkCmd + src.getAbsolutePath() + " " + dst.getPath() + cmdSuffix) lines_!
-      ProcessLogger(line => (logInfo(line)))
+    ProcessLogger(line => logInfo(line))
   }
 
 
@@ -1118,15 +1360,15 @@ private[spark] object Utils extends Logging {
   /**
    * Return a Hadoop FileSystem with the scheme encoded in the given path.
    */
-  def getHadoopFileSystem(path: URI): FileSystem = {
-    FileSystem.get(path, SparkHadoopUtil.get.newConfiguration())
+  def getHadoopFileSystem(path: URI, conf: Configuration): FileSystem = {
+    FileSystem.get(path, conf)
   }
 
   /**
    * Return a Hadoop FileSystem with the scheme encoded in the given path.
    */
-  def getHadoopFileSystem(path: String): FileSystem = {
-    getHadoopFileSystem(new URI(path))
+  def getHadoopFileSystem(path: String, conf: Configuration): FileSystem = {
+    getHadoopFileSystem(new URI(path), conf)
   }
 
   /**
@@ -1143,6 +1385,11 @@ private[spark] object Utils extends Logging {
    */
   val isWindows = SystemUtils.IS_OS_WINDOWS
 
+  /**
+   * Whether the underlying operating system is Mac OS X.
+   */
+  val isMac = SystemUtils.IS_OS_MAC_OSX
+
   /**
    * Pattern for matching a Windows drive, which contains only a single alphabet character.
    */
@@ -1176,7 +1423,7 @@ private[spark] object Utils extends Logging {
     val startTime = System.currentTimeMillis
     while (!terminated) {
       try {
-        process.exitValue
+        process.exitValue()
         terminated = true
       } catch {
         case e: IllegalThreadStateException =>
@@ -1203,7 +1450,7 @@ private[spark] object Utils extends Logging {
     }
   }
 
-  /** 
+  /**
    * Execute the given block, logging and re-throwing any uncaught exception.
    * This is particularly useful for wrapping code that runs in a thread, to ensure
    * that exceptions are printed, and to avoid having to catch Throwable.
@@ -1220,6 +1467,20 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  /** Executes the given block in a Try, logging any uncaught exceptions. */
+  def tryLog[T](f: => T): Try[T] = {
+    try {
+      val res = f
+      scala.util.Success(res)
+    } catch {
+      case ct: ControlThrowable =>
+        throw ct
+      case t: Throwable =>
+        logError(s"Uncaught exception in thread ${Thread.currentThread().getName}", t)
+        scala.util.Failure(t)
+    }
+  }
+
   /** Returns true if the given exception was fatal. See docs for scala.util.control.NonFatal. */
   def isFatalError(e: Throwable): Boolean = {
     e match {
@@ -1246,16 +1507,17 @@ private[spark] object Utils extends Logging {
     if (uri.getPath == null) {
       throw new IllegalArgumentException(s"Given path is malformed: $uri")
     }
-    uri.getScheme match {
-      case windowsDrive(d) if windows =>
+
+    Option(uri.getScheme) match {
+      case Some(windowsDrive(d)) if windows =>
         new URI("file:/" + uri.toString.stripPrefix("/"))
-      case null =>
+      case None =>
         // Preserve fragments for HDFS file name substitution (denoted by "#")
         // For instance, in "abc.py#xyz.py", "xyz.py" is the name observed by the application
         val fragment = uri.getFragment
         val part = new File(uri.getPath).toURI
         new URI(part.getScheme, part.getPath, fragment)
-      case _ =>
+      case Some(other) =>
         uri
     }
   }
@@ -1277,13 +1539,273 @@ private[spark] object Utils extends Logging {
     } else {
       paths.split(",").filter { p =>
         val formattedPath = if (windows) formatWindowsPath(p) else p
-        new URI(formattedPath).getScheme match {
+        val uri = new URI(formattedPath)
+        Option(uri.getScheme).getOrElse("file") match {
           case windowsDrive(d) if windows => false
-          case "local" | "file" | null => false
+          case "local" | "file" => false
           case _ => true
         }
       }
     }
   }
 
+  /**
+   * Load default Spark properties from the given file. If no file is provided,
+   * use the common defaults file. This mutates state in the given SparkConf and
+   * in this JVM's system properties if the config specified in the file is not
+   * already set. Return the path of the properties file used.
+   */
+  def loadDefaultSparkProperties(conf: SparkConf, filePath: String = null): String = {
+    val path = Option(filePath).getOrElse(getDefaultPropertiesFile())
+    Option(path).foreach { confFile =>
+      getPropertiesFromFile(confFile).filter { case (k, v) =>
+        k.startsWith("spark.")
+      }.foreach { case (k, v) =>
+        conf.setIfMissing(k, v)
+        sys.props.getOrElseUpdate(k, v)
+      }
+    }
+    path
+  }
+
+  /** Load properties present in the given file. */
+  def getPropertiesFromFile(filename: String): Map[String, String] = {
+    val file = new File(filename)
+    require(file.exists(), s"Properties file $file does not exist")
+    require(file.isFile(), s"Properties file $file is not a normal file")
+
+    val inReader = new InputStreamReader(new FileInputStream(file), "UTF-8")
+    try {
+      val properties = new Properties()
+      properties.load(inReader)
+      properties.stringPropertyNames().map(k => (k, properties(k).trim)).toMap
+    } catch {
+      case e: IOException =>
+        throw new SparkException(s"Failed when loading Spark properties from $filename", e)
+    } finally {
+      inReader.close()
+    }
+  }
+
+  /** Return the path of the default Spark properties file. */
+  def getDefaultPropertiesFile(env: Map[String, String] = sys.env): String = {
+    env.get("SPARK_CONF_DIR")
+      .orElse(env.get("SPARK_HOME").map { t => s"$t${File.separator}conf" })
+      .map { t => new File(s"$t${File.separator}spark-defaults.conf")}
+      .filter(_.isFile)
+      .map(_.getAbsolutePath)
+      .orNull
+  }
+
+  /**
+   * Return a nice string representation of the exception. It will call "printStackTrace" to
+   * recursively generate the stack trace including the exception and its causes.
+   */
+  def exceptionString(e: Throwable): String = {
+    if (e == null) {
+      ""
+    } else {
+      // Use e.printStackTrace here because e.getStackTrace doesn't include the cause
+      val stringWriter = new StringWriter()
+      e.printStackTrace(new PrintWriter(stringWriter))
+      stringWriter.toString
+    }
+  }
+
+  /** Return a thread dump of all threads' stacktraces.  Used to capture dumps for the web UI */
+  def getThreadDump(): Array[ThreadStackTrace] = {
+    // We need to filter out null values here because dumpAllThreads() may return null array
+    // elements for threads that are dead / don't exist.
+    val threadInfos = ManagementFactory.getThreadMXBean.dumpAllThreads(true, true).filter(_ != null)
+    threadInfos.sortBy(_.getThreadId).map { case threadInfo =>
+      val stackTrace = threadInfo.getStackTrace.map(_.toString).mkString("\n")
+      ThreadStackTrace(threadInfo.getThreadId, threadInfo.getThreadName,
+        threadInfo.getThreadState, stackTrace)
+    }
+  }
+
+  /**
+   * Convert all spark properties set in the given SparkConf to a sequence of java options.
+   */
+  def sparkJavaOpts(conf: SparkConf, filterKey: (String => Boolean) = _ => true): Seq[String] = {
+    conf.getAll
+      .filter { case (k, _) => filterKey(k) }
+      .map { case (k, v) => s"-D$k=$v" }
+  }
+
+  /**
+   * Default maximum number of retries when binding to a port before giving up.
+   */
+  val portMaxRetries: Int = {
+    if (sys.props.contains("spark.testing")) {
+      // Set a higher number of retries for tests...
+      sys.props.get("spark.port.maxRetries").map(_.toInt).getOrElse(100)
+    } else {
+      Option(SparkEnv.get)
+        .flatMap(_.conf.getOption("spark.port.maxRetries"))
+        .map(_.toInt)
+        .getOrElse(16)
+    }
+  }
+
+  /**
+   * Attempt to start a service on the given port, or fail after a number of attempts.
+   * Each subsequent attempt uses 1 + the port used in the previous attempt (unless the port is 0).
+   *
+   * @param startPort The initial port to start the service on.
+   * @param maxRetries Maximum number of retries to attempt.
+   *                   A value of 3 means attempting ports n, n+1, n+2, and n+3, for example.
+   * @param startService Function to start service on a given port.
+   *                     This is expected to throw java.net.BindException on port collision.
+   */
+  def startServiceOnPort[T](
+      startPort: Int,
+      startService: Int => (T, Int),
+      serviceName: String = "",
+      maxRetries: Int = portMaxRetries): (T, Int) = {
+    val serviceString = if (serviceName.isEmpty) "" else s" '$serviceName'"
+    for (offset <- 0 to maxRetries) {
+      // Do not increment port if startPort is 0, which is treated as a special port
+      val tryPort = if (startPort == 0) {
+        startPort
+      } else {
+        // If the new port wraps around, do not try a privilege port
+        ((startPort + offset - 1024) % (65536 - 1024)) + 1024
+      }
+      try {
+        val (service, port) = startService(tryPort)
+        logInfo(s"Successfully started service$serviceString on port $port.")
+        return (service, port)
+      } catch {
+        case e: Exception if isBindCollision(e) =>
+          if (offset >= maxRetries) {
+            val exceptionMessage =
+              s"${e.getMessage}: Service$serviceString failed after $maxRetries retries!"
+            val exception = new BindException(exceptionMessage)
+            // restore original stack trace
+            exception.setStackTrace(e.getStackTrace)
+            throw exception
+          }
+          logWarning(s"Service$serviceString could not bind on port $tryPort. " +
+            s"Attempting port ${tryPort + 1}.")
+      }
+    }
+    // Should never happen
+    throw new SparkException(s"Failed to start service$serviceString on port $startPort")
+  }
+
+  /**
+   * Return whether the exception is caused by an address-port collision when binding.
+   */
+  def isBindCollision(exception: Throwable): Boolean = {
+    exception match {
+      case e: BindException =>
+        if (e.getMessage != null) {
+          return true
+        }
+        isBindCollision(e.getCause)
+      case e: MultiException => e.getThrowables.exists(isBindCollision)
+      case e: Exception => isBindCollision(e.getCause)
+      case _ => false
+    }
+  }
+
+  /**
+   * config a log4j properties used for testsuite
+   */
+  def configTestLog4j(level: String): Unit = {
+    val pro = new Properties()
+    pro.put("log4j.rootLogger", s"$level, console")
+    pro.put("log4j.appender.console", "org.apache.log4j.ConsoleAppender")
+    pro.put("log4j.appender.console.target", "System.err")
+    pro.put("log4j.appender.console.layout", "org.apache.log4j.PatternLayout")
+    pro.put("log4j.appender.console.layout.ConversionPattern",
+      "%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n")
+    PropertyConfigurator.configure(pro)
+  }
+
+  def invoke(
+      clazz: Class[_],
+      obj: AnyRef,
+      methodName: String,
+      args: (Class[_], AnyRef)*): AnyRef = {
+    val (types, values) = args.unzip
+    val method = clazz.getDeclaredMethod(methodName, types: _*)
+    method.setAccessible(true)
+    method.invoke(obj, values.toSeq: _*)
+  }
+
+  // Limit of bytes for total size of results (default is 1GB)
+  def getMaxResultSize(conf: SparkConf): Long = {
+    memoryStringToMb(conf.get("spark.driver.maxResultSize", "1g")).toLong << 20
+  }
+
+  /**
+   * Return the current system LD_LIBRARY_PATH name
+   */
+  def libraryPathEnvName: String = {
+    if (isWindows) {
+      "PATH"
+    } else if (isMac) {
+      "DYLD_LIBRARY_PATH"
+    } else {
+      "LD_LIBRARY_PATH"
+    }
+  }
+
+  /**
+   * Return the prefix of a command that appends the given library paths to the
+   * system-specific library path environment variable. On Unix, for instance,
+   * this returns the string LD_LIBRARY_PATH="path1:path2:$LD_LIBRARY_PATH".
+   */
+  def libraryPathEnvPrefix(libraryPaths: Seq[String]): String = {
+    val libraryPathScriptVar = if (isWindows) {
+      s"%${libraryPathEnvName}%"
+    } else {
+      "$" + libraryPathEnvName
+    }
+    val libraryPath = (libraryPaths :+ libraryPathScriptVar).mkString("\"",
+      File.pathSeparator, "\"")
+    val ampersand = if (Utils.isWindows) {
+      " &"
+    } else {
+      ""
+    }
+    s"$libraryPathEnvName=$libraryPath$ampersand"
+  }
+
+  /**
+   * Return the value of a config either through the SparkConf or the Hadoop configuration
+   * if this is Yarn mode. In the latter case, this defaults to the value set through SparkConf
+   * if the key is not set in the Hadoop configuration.
+   */
+  def getSparkOrYarnConfig(conf: SparkConf, key: String, default: String): String = {
+    val sparkValue = conf.get(key, default)
+    if (SparkHadoopUtil.get.isYarnMode) {
+      SparkHadoopUtil.get.newConfiguration(conf).get(key, sparkValue)
+    } else {
+      sparkValue
+    }
+  }
+}
+
+/**
+ * A utility class to redirect the child process's stdout or stderr.
+ */
+private[spark] class RedirectThread(in: InputStream, out: OutputStream, name: String)
+  extends Thread(name) {
+
+  setDaemon(true)
+  override def run() {
+    scala.util.control.Exception.ignoring(classOf[IOException]) {
+      // FIXME: We copy the stream on the level of bytes to avoid encoding problems.
+      val buf = new Array[Byte](1024)
+      var len = in.read(buf)
+      while (len != -1) {
+        out.write(buf, 0, len)
+        out.flush()
+        len = in.read(buf)
+      }
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
index 1a6f1c2b55799..290282c9c2e28 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
@@ -254,26 +254,21 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64)
    * Return an iterator of the map in sorted order. This provides a way to sort the map without
    * using additional memory, at the expense of destroying the validity of the map.
    */
-  def destructiveSortedIterator(cmp: Comparator[(K, V)]): Iterator[(K, V)] = {
+  def destructiveSortedIterator(keyComparator: Comparator[K]): Iterator[(K, V)] = {
     destroyed = true
     // Pack KV pairs into the front of the underlying array
     var keyIndex, newIndex = 0
     while (keyIndex < capacity) {
       if (data(2 * keyIndex) != null) {
-        data(newIndex) = (data(2 * keyIndex), data(2 * keyIndex + 1))
+        data(2 * newIndex) = data(2 * keyIndex)
+        data(2 * newIndex + 1) = data(2 * keyIndex + 1)
         newIndex += 1
       }
       keyIndex += 1
     }
     assert(curSize == newIndex + (if (haveNullValue) 1 else 0))
 
-    // Sort by the given ordering
-    val rawOrdering = new Comparator[AnyRef] {
-      def compare(x: AnyRef, y: AnyRef): Int = {
-        cmp.compare(x.asInstanceOf[(K, V)], y.asInstanceOf[(K, V)])
-      }
-    }
-    Arrays.sort(data, 0, newIndex, rawOrdering)
+    new Sorter(new KVArraySortDataFormat[K, AnyRef]).sort(data, 0, newIndex, keyComparator)
 
     new Iterator[(K, V)] {
       var i = 0
@@ -284,7 +279,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64)
           nullValueReady = false
           (null.asInstanceOf[K], nullValue)
         } else {
-          val item = data(i).asInstanceOf[(K, V)]
+          val item = (data(2 * i).asInstanceOf[K], data(2 * i + 1).asInstanceOf[V])
           i += 1
           item
         }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala
new file mode 100644
index 0000000000000..d44e15e3c97ea
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+/**
+ * An append-only buffer similar to ArrayBuffer, but more memory-efficient for small buffers.
+ * ArrayBuffer always allocates an Object array to store the data, with 16 entries by default,
+ * so it has about 80-100 bytes of overhead. In contrast, CompactBuffer can keep up to two
+ * elements in fields of the main object, and only allocates an Array[AnyRef] if there are more
+ * entries than that. This makes it more efficient for operations like groupBy where we expect
+ * some keys to have very few elements.
+ */
+private[spark] class CompactBuffer[T] extends Seq[T] with Serializable {
+  // First two elements
+  private var element0: T = _
+  private var element1: T = _
+
+  // Number of elements, including our two in the main object
+  private var curSize = 0
+
+  // Array for extra elements
+  private var otherElements: Array[AnyRef] = null
+
+  def apply(position: Int): T = {
+    if (position < 0 || position >= curSize) {
+      throw new IndexOutOfBoundsException
+    }
+    if (position == 0) {
+      element0
+    } else if (position == 1) {
+      element1
+    } else {
+      otherElements(position - 2).asInstanceOf[T]
+    }
+  }
+
+  private def update(position: Int, value: T): Unit = {
+    if (position < 0 || position >= curSize) {
+      throw new IndexOutOfBoundsException
+    }
+    if (position == 0) {
+      element0 = value
+    } else if (position == 1) {
+      element1 = value
+    } else {
+      otherElements(position - 2) = value.asInstanceOf[AnyRef]
+    }
+  }
+
+  def += (value: T): CompactBuffer[T] = {
+    val newIndex = curSize
+    if (newIndex == 0) {
+      element0 = value
+      curSize = 1
+    } else if (newIndex == 1) {
+      element1 = value
+      curSize = 2
+    } else {
+      growToSize(curSize + 1)
+      otherElements(newIndex - 2) = value.asInstanceOf[AnyRef]
+    }
+    this
+  }
+
+  def ++= (values: TraversableOnce[T]): CompactBuffer[T] = {
+    values match {
+      // Optimize merging of CompactBuffers, used in cogroup and groupByKey
+      case compactBuf: CompactBuffer[T] =>
+        val oldSize = curSize
+        // Copy the other buffer's size and elements to local variables in case it is equal to us
+        val itsSize = compactBuf.curSize
+        val itsElements = compactBuf.otherElements
+        growToSize(curSize + itsSize)
+        if (itsSize == 1) {
+          this(oldSize) = compactBuf.element0
+        } else if (itsSize == 2) {
+          this(oldSize) = compactBuf.element0
+          this(oldSize + 1) = compactBuf.element1
+        } else if (itsSize > 2) {
+          this(oldSize) = compactBuf.element0
+          this(oldSize + 1) = compactBuf.element1
+          // At this point our size is also above 2, so just copy its array directly into ours.
+          // Note that since we added two elements above, the index in this.otherElements that we
+          // should copy to is oldSize.
+          System.arraycopy(itsElements, 0, otherElements, oldSize, itsSize - 2)
+        }
+
+      case _ =>
+        values.foreach(e => this += e)
+    }
+    this
+  }
+
+  override def length: Int = curSize
+
+  override def size: Int = curSize
+
+  override def iterator: Iterator[T] = new Iterator[T] {
+    private var pos = 0
+    override def hasNext: Boolean = pos < curSize
+    override def next(): T = {
+      if (!hasNext) {
+        throw new NoSuchElementException
+      }
+      pos += 1
+      apply(pos - 1)
+    }
+  }
+
+  /** Increase our size to newSize and grow the backing array if needed. */
+  private def growToSize(newSize: Int): Unit = {
+    if (newSize < 0) {
+      throw new UnsupportedOperationException("Can't grow buffer past Int.MaxValue elements")
+    }
+    val capacity = if (otherElements != null) otherElements.length + 2 else 2
+    if (newSize > capacity) {
+      var newArrayLen = 8
+      while (newSize - 2 > newArrayLen) {
+        newArrayLen *= 2
+        if (newArrayLen == Int.MinValue) {
+          // Prevent overflow if we double from 2^30 to 2^31, which will become Int.MinValue.
+          // Note that we set the new array length to Int.MaxValue - 2 so that our capacity
+          // calculation above still gives a positive integer.
+          newArrayLen = Int.MaxValue - 2
+        }
+      }
+      val newArray = new Array[AnyRef](newArrayLen)
+      if (otherElements != null) {
+        System.arraycopy(otherElements, 0, newArray, 0, otherElements.length)
+      }
+      otherElements = newArray
+    }
+    curSize = newSize
+  }
+}
+
+private[spark] object CompactBuffer {
+  def apply[T](): CompactBuffer[T] = new CompactBuffer[T]
+
+  def apply[T](value: T): CompactBuffer[T] = {
+    val buf = new CompactBuffer[T]
+    buf += value
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 292d0962f4fdb..26fa0cb6d7bde 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.util.collection
 
-import java.io.{InputStream, BufferedInputStream, FileInputStream, File, Serializable, EOFException}
+import java.io._
 import java.util.Comparator
 
 import scala.collection.BufferedIterator
@@ -28,8 +28,10 @@ import com.google.common.io.ByteStreams
 
 import org.apache.spark.{Logging, SparkEnv}
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.serializer.Serializer
+import org.apache.spark.serializer.{DeserializationStream, Serializer}
 import org.apache.spark.storage.{BlockId, BlockManager}
+import org.apache.spark.util.collection.ExternalAppendOnlyMap.HashComparator
+import org.apache.spark.executor.ShuffleWriteMetrics
 
 /**
  * :: DeveloperApi ::
@@ -64,27 +66,19 @@ class ExternalAppendOnlyMap[K, V, C](
     mergeCombiners: (C, C) => C,
     serializer: Serializer = SparkEnv.get.serializer,
     blockManager: BlockManager = SparkEnv.get.blockManager)
-  extends Iterable[(K, C)] with Serializable with Logging {
-
-  import ExternalAppendOnlyMap._
+  extends Iterable[(K, C)]
+  with Serializable
+  with Logging
+  with Spillable[SizeTracker] {
 
   private var currentMap = new SizeTrackingAppendOnlyMap[K, C]
   private val spilledMaps = new ArrayBuffer[DiskMapIterator]
   private val sparkConf = SparkEnv.get.conf
   private val diskBlockManager = blockManager.diskBlockManager
 
-  // Collective memory threshold shared across all running tasks
-  private val maxMemoryThreshold = {
-    val memoryFraction = sparkConf.getDouble("spark.shuffle.memoryFraction", 0.3)
-    val safetyFraction = sparkConf.getDouble("spark.shuffle.safetyFraction", 0.8)
-    (Runtime.getRuntime.maxMemory * memoryFraction * safetyFraction).toLong
-  }
-
-  // Number of pairs in the in-memory map
-  private var numPairsInMemory = 0L
-
-  // Number of in-memory pairs inserted before tracking the map's shuffle memory usage
-  private val trackMemoryThreshold = 1000
+  // Number of pairs inserted since last spill; note that we count them even if a value is merged
+  // with a previous key in case we're doing something like groupBy where the result grows
+  protected[this] var elementsRead = 0L
 
   /**
    * Size of object batches when reading/writing from serializers.
@@ -97,67 +91,72 @@ class ExternalAppendOnlyMap[K, V, C](
    */
   private val serializerBatchSize = sparkConf.getLong("spark.shuffle.spill.batchSize", 10000)
 
-  // How many times we have spilled so far
-  private var spillCount = 0
-
   // Number of bytes spilled in total
-  private var _memoryBytesSpilled = 0L
   private var _diskBytesSpilled = 0L
 
-  private val fileBufferSize = sparkConf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
-  private val comparator = new KCComparator[K, C]
+  private val fileBufferSize = sparkConf.getInt("spark.shuffle.file.buffer.kb", 32) * 1024
+
+  // Write metrics for current spill
+  private var curWriteMetrics: ShuffleWriteMetrics = _
+
+  private val keyComparator = new HashComparator[K]
   private val ser = serializer.newInstance()
 
   /**
    * Insert the given key and value into the map.
+   */
+  def insert(key: K, value: V): Unit = {
+    insertAll(Iterator((key, value)))
+  }
+
+  /**
+   * Insert the given iterator of keys and values into the map.
    *
-   * If the underlying map is about to grow, check if the global pool of shuffle memory has
+   * When the underlying map needs to grow, check if the global pool of shuffle memory has
    * enough room for this to happen. If so, allocate the memory required to grow the map;
    * otherwise, spill the in-memory map to disk.
    *
    * The shuffle memory usage of the first trackMemoryThreshold entries is not tracked.
    */
-  def insert(key: K, value: V) {
+  def insertAll(entries: Iterator[Product2[K, V]]): Unit = {
+    // An update function for the map that we reuse across entries to avoid allocating
+    // a new closure each time
+    var curEntry: Product2[K, V] = null
     val update: (Boolean, C) => C = (hadVal, oldVal) => {
-      if (hadVal) mergeValue(oldVal, value) else createCombiner(value)
+      if (hadVal) mergeValue(oldVal, curEntry._2) else createCombiner(curEntry._2)
     }
-    if (numPairsInMemory > trackMemoryThreshold && currentMap.atGrowThreshold) {
-      val mapSize = currentMap.estimateSize()
-      var shouldSpill = false
-      val shuffleMemoryMap = SparkEnv.get.shuffleMemoryMap
-
-      // Atomically check whether there is sufficient memory in the global pool for
-      // this map to grow and, if possible, allocate the required amount
-      shuffleMemoryMap.synchronized {
-        val threadId = Thread.currentThread().getId
-        val previouslyOccupiedMemory = shuffleMemoryMap.get(threadId)
-        val availableMemory = maxMemoryThreshold -
-          (shuffleMemoryMap.values.sum - previouslyOccupiedMemory.getOrElse(0L))
-
-        // Assume map growth factor is 2x
-        shouldSpill = availableMemory < mapSize * 2
-        if (!shouldSpill) {
-          shuffleMemoryMap(threadId) = mapSize * 2
-        }
-      }
-      // Do not synchronize spills
-      if (shouldSpill) {
-        spill(mapSize)
+
+    while (entries.hasNext) {
+      curEntry = entries.next()
+      if (maybeSpill(currentMap, currentMap.estimateSize())) {
+        currentMap = new SizeTrackingAppendOnlyMap[K, C]
       }
+      currentMap.changeValue(curEntry._1, update)
+      elementsRead += 1
     }
-    currentMap.changeValue(key, update)
-    numPairsInMemory += 1
+  }
+
+  /**
+   * Insert the given iterable of keys and values into the map.
+   *
+   * When the underlying map needs to grow, check if the global pool of shuffle memory has
+   * enough room for this to happen. If so, allocate the memory required to grow the map;
+   * otherwise, spill the in-memory map to disk.
+   *
+   * The shuffle memory usage of the first trackMemoryThreshold entries is not tracked.
+   */
+  def insertAll(entries: Iterable[Product2[K, V]]): Unit = {
+    insertAll(entries.iterator)
   }
 
   /**
    * Sort the existing contents of the in-memory map and spill them to a temporary file on disk.
    */
-  private def spill(mapSize: Long) {
-    spillCount += 1
-    logWarning("Spilling in-memory map of %d MB to disk (%d time%s so far)"
-      .format(mapSize / (1024 * 1024), spillCount, if (spillCount > 1) "s" else ""))
-    val (blockId, file) = diskBlockManager.createTempBlock()
-    var writer = blockManager.getDiskWriter(blockId, file, serializer, fileBufferSize)
+  override protected[this] def spill(collection: SizeTracker): Unit = {
+    val (blockId, file) = diskBlockManager.createTempLocalBlock()
+    curWriteMetrics = new ShuffleWriteMetrics()
+    var writer = blockManager.getDiskWriter(blockId, file, serializer, fileBufferSize,
+      curWriteMetrics)
     var objectsWritten = 0
 
     // List of batch sizes (bytes) in the order they are written to disk
@@ -165,15 +164,17 @@ class ExternalAppendOnlyMap[K, V, C](
 
     // Flush the disk writer's contents to disk, and update relevant variables
     def flush() = {
-      writer.commit()
-      val bytesWritten = writer.bytesWritten
-      batchSizes.append(bytesWritten)
-      _diskBytesSpilled += bytesWritten
+      val w = writer
+      writer = null
+      w.commitAndClose()
+      _diskBytesSpilled += curWriteMetrics.shuffleBytesWritten
+      batchSizes.append(curWriteMetrics.shuffleBytesWritten)
       objectsWritten = 0
     }
 
+    var success = false
     try {
-      val it = currentMap.destructiveSortedIterator(comparator)
+      val it = currentMap.destructiveSortedIterator(keyComparator)
       while (it.hasNext) {
         val kv = it.next()
         writer.write(kv)
@@ -181,31 +182,37 @@ class ExternalAppendOnlyMap[K, V, C](
 
         if (objectsWritten == serializerBatchSize) {
           flush()
-          writer.close()
-          writer = blockManager.getDiskWriter(blockId, file, serializer, fileBufferSize)
+          curWriteMetrics = new ShuffleWriteMetrics()
+          writer = blockManager.getDiskWriter(blockId, file, serializer, fileBufferSize,
+            curWriteMetrics)
         }
       }
       if (objectsWritten > 0) {
         flush()
+      } else if (writer != null) {
+        val w = writer
+        writer = null
+        w.revertPartialWritesAndClose()
       }
+      success = true
     } finally {
-      // Partial failures cannot be tolerated; do not revert partial writes
-      writer.close()
+      if (!success) {
+        // This code path only happens if an exception was thrown above before we set success;
+        // close our stuff and let the exception be thrown further
+        if (writer != null) {
+          writer.revertPartialWritesAndClose()
+        }
+        if (file.exists()) {
+          file.delete()
+        }
+      }
     }
 
-    currentMap = new SizeTrackingAppendOnlyMap[K, C]
     spilledMaps.append(new DiskMapIterator(file, blockId, batchSizes))
 
-    // Reset the amount of shuffle memory used by this map in the global pool
-    val shuffleMemoryMap = SparkEnv.get.shuffleMemoryMap
-    shuffleMemoryMap.synchronized {
-      shuffleMemoryMap(Thread.currentThread().getId) = 0
-    }
-    numPairsInMemory = 0
-    _memoryBytesSpilled += mapSize
+    elementsRead = 0
   }
 
-  def memoryBytesSpilled: Long = _memoryBytesSpilled
   def diskBytesSpilled: Long = _diskBytesSpilled
 
   /**
@@ -231,34 +238,36 @@ class ExternalAppendOnlyMap[K, V, C](
 
     // Input streams are derived both from the in-memory map and spilled maps on disk
     // The in-memory map is sorted in place, while the spilled maps are already in sorted order
-    private val sortedMap = currentMap.destructiveSortedIterator(comparator)
+    private val sortedMap = currentMap.destructiveSortedIterator(keyComparator)
     private val inputStreams = (Seq(sortedMap) ++ spilledMaps).map(it => it.buffered)
 
     inputStreams.foreach { it =>
-      val kcPairs = getMorePairs(it)
+      val kcPairs = new ArrayBuffer[(K, C)]
+      readNextHashCode(it, kcPairs)
       if (kcPairs.length > 0) {
         mergeHeap.enqueue(new StreamBuffer(it, kcPairs))
       }
     }
 
     /**
-     * Fetch from the given iterator until a key of different hash is retrieved.
+     * Fill a buffer with the next set of keys with the same hash code from a given iterator. We
+     * read streams one hash code at a time to ensure we don't miss elements when they are merged.
+     *
+     * Assumes the given iterator is in sorted order of hash code.
      *
-     * In the event of key hash collisions, this ensures no pairs are hidden from being merged.
-     * Assume the given iterator is in sorted order.
+     * @param it iterator to read from
+     * @param buf buffer to write the results into
      */
-    private def getMorePairs(it: BufferedIterator[(K, C)]): ArrayBuffer[(K, C)] = {
-      val kcPairs = new ArrayBuffer[(K, C)]
+    private def readNextHashCode(it: BufferedIterator[(K, C)], buf: ArrayBuffer[(K, C)]): Unit = {
       if (it.hasNext) {
         var kc = it.next()
-        kcPairs += kc
-        val minHash = getKeyHashCode(kc)
+        buf += kc
+        val minHash = hashKey(kc)
         while (it.hasNext && it.head._1.hashCode() == minHash) {
           kc = it.next()
-          kcPairs += kc
+          buf += kc
         }
       }
-      kcPairs
     }
 
     /**
@@ -268,16 +277,31 @@ class ExternalAppendOnlyMap[K, V, C](
     private def mergeIfKeyExists(key: K, baseCombiner: C, buffer: StreamBuffer): C = {
       var i = 0
       while (i < buffer.pairs.length) {
-        val (k, c) = buffer.pairs(i)
-        if (k == key) {
-          buffer.pairs.remove(i)
-          return mergeCombiners(baseCombiner, c)
+        val pair = buffer.pairs(i)
+        if (pair._1 == key) {
+          // Note that there's at most one pair in the buffer with a given key, since we always
+          // merge stuff in a map before spilling, so it's safe to return after the first we find
+          removeFromBuffer(buffer.pairs, i)
+          return mergeCombiners(baseCombiner, pair._2)
         }
         i += 1
       }
       baseCombiner
     }
 
+    /**
+     * Remove the index'th element from an ArrayBuffer in constant time, swapping another element
+     * into its place. This is more efficient than the ArrayBuffer.remove method because it does
+     * not have to shift all the elements in the array over. It works for our array buffers because
+     * we don't care about the order of elements inside, we just want to search them for a key.
+     */
+    private def removeFromBuffer[T](buffer: ArrayBuffer[T], index: Int): T = {
+      val elem = buffer(index)
+      buffer(index) = buffer(buffer.size - 1)  // This also works if index == buffer.size - 1
+      buffer.reduceToSize(buffer.size - 1)
+      elem
+    }
+
     /**
      * Return true if there exists an input stream that still has unvisited pairs.
      */
@@ -293,10 +317,12 @@ class ExternalAppendOnlyMap[K, V, C](
       }
       // Select a key from the StreamBuffer that holds the lowest key hash
       val minBuffer = mergeHeap.dequeue()
-      val (minPairs, minHash) = (minBuffer.pairs, minBuffer.minKeyHash)
-      val minPair = minPairs.remove(0)
-      var (minKey, minCombiner) = minPair
-      assert(getKeyHashCode(minPair) == minHash)
+      val minPairs = minBuffer.pairs
+      val minHash = minBuffer.minKeyHash
+      val minPair = removeFromBuffer(minPairs, 0)
+      val minKey = minPair._1
+      var minCombiner = minPair._2
+      assert(hashKey(minPair) == minHash)
 
       // For all other streams that may have this key (i.e. have the same minimum key hash),
       // merge in the corresponding value (if any) from that stream
@@ -310,7 +336,7 @@ class ExternalAppendOnlyMap[K, V, C](
       // Repopulate each visited stream buffer and add it back to the queue if it is non-empty
       mergedBuffers.foreach { buffer =>
         if (buffer.isEmpty) {
-          buffer.pairs ++= getMorePairs(buffer.iterator)
+          readNextHashCode(buffer.iterator, buffer.pairs)
         }
         if (!buffer.isEmpty) {
           mergeHeap.enqueue(buffer)
@@ -322,10 +348,13 @@ class ExternalAppendOnlyMap[K, V, C](
 
     /**
      * A buffer for streaming from a map iterator (in-memory or on-disk) sorted by key hash.
-     * Each buffer maintains the lowest-ordered keys in the corresponding iterator. Due to
-     * hash collisions, it is possible for multiple keys to be "tied" for being the lowest.
+     * Each buffer maintains all of the key-value pairs with what is currently the lowest hash
+     * code among keys in the stream. There may be multiple keys if there are hash collisions.
+     * Note that because when we spill data out, we only spill one value for each key, there is
+     * at most one element for each key.
      *
-     * StreamBuffers are ordered by the minimum key hash found across all of their own pairs.
+     * StreamBuffers are ordered by the minimum key hash currently available in their stream so
+     * that we can put them into a heap and sort that.
      */
     private class StreamBuffer(
         val iterator: BufferedIterator[(K, C)],
@@ -337,7 +366,7 @@ class ExternalAppendOnlyMap[K, V, C](
       // Invalid if there are no more pairs in this stream
       def minKeyHash: Int = {
         assert(pairs.length > 0)
-        getKeyHashCode(pairs.head)
+        hashKey(pairs.head)
       }
 
       override def compareTo(other: StreamBuffer): Int = {
@@ -351,27 +380,56 @@ class ExternalAppendOnlyMap[K, V, C](
    * An iterator that returns (K, C) pairs in sorted order from an on-disk map
    */
   private class DiskMapIterator(file: File, blockId: BlockId, batchSizes: ArrayBuffer[Long])
-    extends Iterator[(K, C)] {
-    private val fileStream = new FileInputStream(file)
-    private val bufferedStream = new BufferedInputStream(fileStream, fileBufferSize)
+    extends Iterator[(K, C)]
+  {
+    private val batchOffsets = batchSizes.scanLeft(0L)(_ + _)  // Size will be batchSize.length + 1
+    assert(file.length() == batchOffsets.last,
+      "File length is not equal to the last batch offset:\n" +
+      s"    file length = ${file.length}\n" +
+      s"    last batch offset = ${batchOffsets.last}\n" +
+      s"    all batch offsets = ${batchOffsets.mkString(",")}"
+    )
+
+    private var batchIndex = 0  // Which batch we're in
+    private var fileStream: FileInputStream = null
 
     // An intermediate stream that reads from exactly one batch
     // This guards against pre-fetching and other arbitrary behavior of higher level streams
-    private var batchStream = nextBatchStream()
-    private var compressedStream = blockManager.wrapForCompression(blockId, batchStream)
-    private var deserializeStream = ser.deserializeStream(compressedStream)
+    private var deserializeStream = nextBatchStream()
     private var nextItem: (K, C) = null
     private var objectsRead = 0
 
     /**
      * Construct a stream that reads only from the next batch.
      */
-    private def nextBatchStream(): InputStream = {
-      if (batchSizes.length > 0) {
-        ByteStreams.limit(bufferedStream, batchSizes.remove(0))
+    private def nextBatchStream(): DeserializationStream = {
+      // Note that batchOffsets.length = numBatches + 1 since we did a scan above; check whether
+      // we're still in a valid batch.
+      if (batchIndex < batchOffsets.length - 1) {
+        if (deserializeStream != null) {
+          deserializeStream.close()
+          fileStream.close()
+          deserializeStream = null
+          fileStream = null
+        }
+
+        val start = batchOffsets(batchIndex)
+        fileStream = new FileInputStream(file)
+        fileStream.getChannel.position(start)
+        batchIndex += 1
+
+        val end = batchOffsets(batchIndex)
+
+        assert(end >= start, "start = " + start + ", end = " + end +
+          ", batchOffsets = " + batchOffsets.mkString("[", ", ", "]"))
+
+        val bufferedStream = new BufferedInputStream(ByteStreams.limit(fileStream, end - start))
+        val compressedStream = blockManager.wrapForCompression(blockId, bufferedStream)
+        ser.deserializeStream(compressedStream)
       } else {
         // No more batches left
-        bufferedStream
+        cleanup()
+        null
       }
     }
 
@@ -386,10 +444,8 @@ class ExternalAppendOnlyMap[K, V, C](
         val item = deserializeStream.readObject().asInstanceOf[(K, C)]
         objectsRead += 1
         if (objectsRead == serializerBatchSize) {
-          batchStream = nextBatchStream()
-          compressedStream = blockManager.wrapForCompression(blockId, batchStream)
-          deserializeStream = ser.deserializeStream(compressedStream)
           objectsRead = 0
+          deserializeStream = nextBatchStream()
         }
         item
       } catch {
@@ -401,6 +457,9 @@ class ExternalAppendOnlyMap[K, V, C](
 
     override def hasNext: Boolean = {
       if (nextItem == null) {
+        if (deserializeStream == null) {
+          return false
+        }
         nextItem = readNextItem()
       }
       nextItem != null
@@ -417,29 +476,35 @@ class ExternalAppendOnlyMap[K, V, C](
 
     // TODO: Ensure this gets called even if the iterator isn't drained.
     private def cleanup() {
-      deserializeStream.close()
+      batchIndex = batchOffsets.length  // Prevent reading any other batch
+      val ds = deserializeStream
+      deserializeStream = null
+      fileStream = null
+      ds.close()
       file.delete()
     }
   }
+
+  /** Convenience function to hash the given (K, C) pair by the key. */
+  private def hashKey(kc: (K, C)): Int = ExternalAppendOnlyMap.hash(kc._1)
 }
 
 private[spark] object ExternalAppendOnlyMap {
 
   /**
-   * Return the key hash code of the given (key, combiner) pair.
-   * If the key is null, return a special hash code.
+   * Return the hash code of the given object. If the object is null, return a special hash code.
    */
-  private def getKeyHashCode[K, C](kc: (K, C)): Int = {
-    if (kc._1 == null) 0 else kc._1.hashCode()
+  private def hash[T](obj: T): Int = {
+    if (obj == null) 0 else obj.hashCode()
   }
 
   /**
-   * A comparator for (key, combiner) pairs based on their key hash codes.
+   * A comparator which sorts arbitrary keys based on their hash codes.
    */
-  private class KCComparator[K, C] extends Comparator[(K, C)] {
-    def compare(kc1: (K, C), kc2: (K, C)): Int = {
-      val hash1 = getKeyHashCode(kc1)
-      val hash2 = getKeyHashCode(kc2)
+  private class HashComparator[K] extends Comparator[K] {
+    def compare(key1: K, key2: K): Int = {
+      val hash1 = hash(key1)
+      val hash2 = hash(key2)
       if (hash1 < hash2) -1 else if (hash1 == hash2) 0 else 1
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
new file mode 100644
index 0000000000000..c1ce13683b569
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -0,0 +1,814 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import java.io._
+import java.util.Comparator
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable
+
+import com.google.common.io.ByteStreams
+
+import org.apache.spark._
+import org.apache.spark.serializer.{DeserializationStream, Serializer}
+import org.apache.spark.executor.ShuffleWriteMetrics
+import org.apache.spark.storage.{BlockObjectWriter, BlockId}
+
+/**
+ * Sorts and potentially merges a number of key-value pairs of type (K, V) to produce key-combiner
+ * pairs of type (K, C). Uses a Partitioner to first group the keys into partitions, and then
+ * optionally sorts keys within each partition using a custom Comparator. Can output a single
+ * partitioned file with a different byte range for each partition, suitable for shuffle fetches.
+ *
+ * If combining is disabled, the type C must equal V -- we'll cast the objects at the end.
+ *
+ * Note: Although ExternalSorter is a fairly generic sorter, some of its configuration is tied
+ * to its use in sort-based shuffle (for example, its block compression is controlled by
+ * `spark.shuffle.compress`).  We may need to revisit this if ExternalSorter is used in other
+ * non-shuffle contexts where we might want to use different configuration settings.
+ *
+ * @param aggregator optional Aggregator with combine functions to use for merging data
+ * @param partitioner optional Partitioner; if given, sort by partition ID and then key
+ * @param ordering optional Ordering to sort keys within each partition; should be a total ordering
+ * @param serializer serializer to use when spilling to disk
+ *
+ * Note that if an Ordering is given, we'll always sort using it, so only provide it if you really
+ * want the output keys to be sorted. In a map task without map-side combine for example, you
+ * probably want to pass None as the ordering to avoid extra sorting. On the other hand, if you do
+ * want to do combining, having an Ordering is more efficient than not having it.
+ *
+ * At a high level, this class works as follows:
+ *
+ * - We repeatedly fill up buffers of in-memory data, using either a SizeTrackingAppendOnlyMap if
+ *   we want to combine by key, or an simple SizeTrackingBuffer if we don't. Inside these buffers,
+ *   we sort elements of type ((Int, K), C) where the Int is the partition ID. This is done to
+ *   avoid calling the partitioner multiple times on the same key (e.g. for RangePartitioner).
+ *
+ * - When each buffer reaches our memory limit, we spill it to a file. This file is sorted first
+ *   by partition ID and possibly second by key or by hash code of the key, if we want to do
+ *   aggregation. For each file, we track how many objects were in each partition in memory, so we
+ *   don't have to write out the partition ID for every element.
+ *
+ * - When the user requests an iterator, the spilled files are merged, along with any remaining
+ *   in-memory data, using the same sort order defined above (unless both sorting and aggregation
+ *   are disabled). If we need to aggregate by key, we either use a total ordering from the
+ *   ordering parameter, or read the keys with the same hash code and compare them with each other
+ *   for equality to merge values.
+ *
+ * - Users are expected to call stop() at the end to delete all the intermediate files.
+ *
+ * As a special case, if no Ordering and no Aggregator is given, and the number of partitions is
+ * less than spark.shuffle.sort.bypassMergeThreshold, we bypass the merge-sort and just write to
+ * separate files for each partition each time we spill, similar to the HashShuffleWriter. We can
+ * then concatenate these files to produce a single sorted file, without having to serialize and
+ * de-serialize each item twice (as is needed during the merge). This speeds up the map side of
+ * groupBy, sort, etc operations since they do no partial aggregation.
+ */
+private[spark] class ExternalSorter[K, V, C](
+    aggregator: Option[Aggregator[K, V, C]] = None,
+    partitioner: Option[Partitioner] = None,
+    ordering: Option[Ordering[K]] = None,
+    serializer: Option[Serializer] = None)
+  extends Logging with Spillable[SizeTrackingPairCollection[(Int, K), C]] {
+
+  private val numPartitions = partitioner.map(_.numPartitions).getOrElse(1)
+  private val shouldPartition = numPartitions > 1
+
+  private val blockManager = SparkEnv.get.blockManager
+  private val diskBlockManager = blockManager.diskBlockManager
+  private val ser = Serializer.getSerializer(serializer)
+  private val serInstance = ser.newInstance()
+
+  private val conf = SparkEnv.get.conf
+  private val spillingEnabled = conf.getBoolean("spark.shuffle.spill", true)
+  private val fileBufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 32) * 1024
+  private val transferToEnabled = conf.getBoolean("spark.file.transferTo", true)
+
+  // Size of object batches when reading/writing from serializers.
+  //
+  // Objects are written in batches, with each batch using its own serialization stream. This
+  // cuts down on the size of reference-tracking maps constructed when deserializing a stream.
+  //
+  // NOTE: Setting this too low can cause excessive copying when serializing, since some serializers
+  // grow internal data structures by growing + copying every time the number of objects doubles.
+  private val serializerBatchSize = conf.getLong("spark.shuffle.spill.batchSize", 10000)
+
+  private def getPartition(key: K): Int = {
+    if (shouldPartition) partitioner.get.getPartition(key) else 0
+  }
+
+  // Data structures to store in-memory objects before we spill. Depending on whether we have an
+  // Aggregator set, we either put objects into an AppendOnlyMap where we combine them, or we
+  // store them in an array buffer.
+  private var map = new SizeTrackingAppendOnlyMap[(Int, K), C]
+  private var buffer = new SizeTrackingPairBuffer[(Int, K), C]
+
+  // Number of pairs read from input since last spill; note that we count them even if a value is
+  // merged with a previous key in case we're doing something like groupBy where the result grows
+  protected[this] var elementsRead = 0L
+
+  // Total spilling statistics
+  private var _diskBytesSpilled = 0L
+
+  // Write metrics for current spill
+  private var curWriteMetrics: ShuffleWriteMetrics = _
+
+  // If there are fewer than spark.shuffle.sort.bypassMergeThreshold partitions and we don't need
+  // local aggregation and sorting, write numPartitions files directly and just concatenate them
+  // at the end. This avoids doing serialization and deserialization twice to merge together the
+  // spilled files, which would happen with the normal code path. The downside is having multiple
+  // files open at a time and thus more memory allocated to buffers.
+  private val bypassMergeThreshold = conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
+  private val bypassMergeSort =
+    (numPartitions <= bypassMergeThreshold && aggregator.isEmpty && ordering.isEmpty)
+
+  // Array of file writers for each partition, used if bypassMergeSort is true and we've spilled
+  private var partitionWriters: Array[BlockObjectWriter] = null
+
+  // A comparator for keys K that orders them within a partition to allow aggregation or sorting.
+  // Can be a partial ordering by hash code if a total ordering is not provided through by the
+  // user. (A partial ordering means that equal keys have comparator.compare(k, k) = 0, but some
+  // non-equal keys also have this, so we need to do a later pass to find truly equal keys).
+  // Note that we ignore this if no aggregator and no ordering are given.
+  private val keyComparator: Comparator[K] = ordering.getOrElse(new Comparator[K] {
+    override def compare(a: K, b: K): Int = {
+      val h1 = if (a == null) 0 else a.hashCode()
+      val h2 = if (b == null) 0 else b.hashCode()
+      if (h1 < h2) -1 else if (h1 == h2) 0 else 1
+    }
+  })
+
+  // A comparator for (Int, K) pairs that orders them by only their partition ID
+  private val partitionComparator: Comparator[(Int, K)] = new Comparator[(Int, K)] {
+    override def compare(a: (Int, K), b: (Int, K)): Int = {
+      a._1 - b._1
+    }
+  }
+
+  // A comparator that orders (Int, K) pairs by partition ID and then possibly by key
+  private val partitionKeyComparator: Comparator[(Int, K)] = {
+    if (ordering.isDefined || aggregator.isDefined) {
+      // Sort by partition ID then key comparator
+      new Comparator[(Int, K)] {
+        override def compare(a: (Int, K), b: (Int, K)): Int = {
+          val partitionDiff = a._1 - b._1
+          if (partitionDiff != 0) {
+            partitionDiff
+          } else {
+            keyComparator.compare(a._2, b._2)
+          }
+        }
+      }
+    } else {
+      // Just sort it by partition ID
+      partitionComparator
+    }
+  }
+
+  // Information about a spilled file. Includes sizes in bytes of "batches" written by the
+  // serializer as we periodically reset its stream, as well as number of elements in each
+  // partition, used to efficiently keep track of partitions when merging.
+  private[this] case class SpilledFile(
+    file: File,
+    blockId: BlockId,
+    serializerBatchSizes: Array[Long],
+    elementsPerPartition: Array[Long])
+  private val spills = new ArrayBuffer[SpilledFile]
+
+  def insertAll(records: Iterator[_ <: Product2[K, V]]): Unit = {
+    // TODO: stop combining if we find that the reduction factor isn't high
+    val shouldCombine = aggregator.isDefined
+
+    if (shouldCombine) {
+      // Combine values in-memory first using our AppendOnlyMap
+      val mergeValue = aggregator.get.mergeValue
+      val createCombiner = aggregator.get.createCombiner
+      var kv: Product2[K, V] = null
+      val update = (hadValue: Boolean, oldValue: C) => {
+        if (hadValue) mergeValue(oldValue, kv._2) else createCombiner(kv._2)
+      }
+      while (records.hasNext) {
+        elementsRead += 1
+        kv = records.next()
+        map.changeValue((getPartition(kv._1), kv._1), update)
+        maybeSpillCollection(usingMap = true)
+      }
+    } else {
+      // Stick values into our buffer
+      while (records.hasNext) {
+        elementsRead += 1
+        val kv = records.next()
+        buffer.insert((getPartition(kv._1), kv._1), kv._2.asInstanceOf[C])
+        maybeSpillCollection(usingMap = false)
+      }
+    }
+  }
+
+  /**
+   * Spill the current in-memory collection to disk if needed.
+   *
+   * @param usingMap whether we're using a map or buffer as our current in-memory collection
+   */
+  private def maybeSpillCollection(usingMap: Boolean): Unit = {
+    if (!spillingEnabled) {
+      return
+    }
+
+    if (usingMap) {
+      if (maybeSpill(map, map.estimateSize())) {
+        map = new SizeTrackingAppendOnlyMap[(Int, K), C]
+      }
+    } else {
+      if (maybeSpill(buffer, buffer.estimateSize())) {
+        buffer = new SizeTrackingPairBuffer[(Int, K), C]
+      }
+    }
+  }
+
+  /**
+   * Spill the current in-memory collection to disk, adding a new file to spills, and clear it.
+   */
+  override protected[this] def spill(collection: SizeTrackingPairCollection[(Int, K), C]): Unit = {
+    if (bypassMergeSort) {
+      spillToPartitionFiles(collection)
+    } else {
+      spillToMergeableFile(collection)
+    }
+  }
+
+  /**
+   * Spill our in-memory collection to a sorted file that we can merge later (normal code path).
+   * We add this file into spilledFiles to find it later.
+   *
+   * Alternatively, if bypassMergeSort is true, we spill to separate files for each partition.
+   * See spillToPartitionedFiles() for that code path.
+   *
+   * @param collection whichever collection we're using (map or buffer)
+   */
+  private def spillToMergeableFile(collection: SizeTrackingPairCollection[(Int, K), C]): Unit = {
+    assert(!bypassMergeSort)
+
+    // Because these files may be read during shuffle, their compression must be controlled by
+    // spark.shuffle.compress instead of spark.shuffle.spill.compress, so we need to use
+    // createTempShuffleBlock here; see SPARK-3426 for more context.
+    val (blockId, file) = diskBlockManager.createTempShuffleBlock()
+    curWriteMetrics = new ShuffleWriteMetrics()
+    var writer = blockManager.getDiskWriter(blockId, file, ser, fileBufferSize, curWriteMetrics)
+    var objectsWritten = 0   // Objects written since the last flush
+
+    // List of batch sizes (bytes) in the order they are written to disk
+    val batchSizes = new ArrayBuffer[Long]
+
+    // How many elements we have in each partition
+    val elementsPerPartition = new Array[Long](numPartitions)
+
+    // Flush the disk writer's contents to disk, and update relevant variables.
+    // The writer is closed at the end of this process, and cannot be reused.
+    def flush() = {
+      val w = writer
+      writer = null
+      w.commitAndClose()
+      _diskBytesSpilled += curWriteMetrics.shuffleBytesWritten
+      batchSizes.append(curWriteMetrics.shuffleBytesWritten)
+      objectsWritten = 0
+    }
+
+    var success = false
+    try {
+      val it = collection.destructiveSortedIterator(partitionKeyComparator)
+      while (it.hasNext) {
+        val elem = it.next()
+        val partitionId = elem._1._1
+        val key = elem._1._2
+        val value = elem._2
+        writer.write(key)
+        writer.write(value)
+        elementsPerPartition(partitionId) += 1
+        objectsWritten += 1
+
+        if (objectsWritten == serializerBatchSize) {
+          flush()
+          curWriteMetrics = new ShuffleWriteMetrics()
+          writer = blockManager.getDiskWriter(blockId, file, ser, fileBufferSize, curWriteMetrics)
+        }
+      }
+      if (objectsWritten > 0) {
+        flush()
+      } else if (writer != null) {
+        val w = writer
+        writer = null
+        w.revertPartialWritesAndClose()
+      }
+      success = true
+    } finally {
+      if (!success) {
+        // This code path only happens if an exception was thrown above before we set success;
+        // close our stuff and let the exception be thrown further
+        if (writer != null) {
+          writer.revertPartialWritesAndClose()
+        }
+        if (file.exists()) {
+          file.delete()
+        }
+      }
+    }
+
+    spills.append(SpilledFile(file, blockId, batchSizes.toArray, elementsPerPartition))
+  }
+
+  /**
+   * Spill our in-memory collection to separate files, one for each partition. This is used when
+   * there's no aggregator and ordering and the number of partitions is small, because it allows
+   * writePartitionedFile to just concatenate files without deserializing data.
+   *
+   * @param collection whichever collection we're using (map or buffer)
+   */
+  private def spillToPartitionFiles(collection: SizeTrackingPairCollection[(Int, K), C]): Unit = {
+    assert(bypassMergeSort)
+
+    // Create our file writers if we haven't done so yet
+    if (partitionWriters == null) {
+      curWriteMetrics = new ShuffleWriteMetrics()
+      partitionWriters = Array.fill(numPartitions) {
+        // Because these files may be read during shuffle, their compression must be controlled by
+        // spark.shuffle.compress instead of spark.shuffle.spill.compress, so we need to use
+        // createTempShuffleBlock here; see SPARK-3426 for more context.
+        val (blockId, file) = diskBlockManager.createTempShuffleBlock()
+        blockManager.getDiskWriter(blockId, file, ser, fileBufferSize, curWriteMetrics).open()
+      }
+    }
+
+    val it = collection.iterator  // No need to sort stuff, just write each element out
+    while (it.hasNext) {
+      val elem = it.next()
+      val partitionId = elem._1._1
+      val key = elem._1._2
+      val value = elem._2
+      partitionWriters(partitionId).write((key, value))
+    }
+  }
+
+  /**
+   * Merge a sequence of sorted files, giving an iterator over partitions and then over elements
+   * inside each partition. This can be used to either write out a new file or return data to
+   * the user.
+   *
+   * Returns an iterator over all the data written to this object, grouped by partition. For each
+   * partition we then have an iterator over its contents, and these are expected to be accessed
+   * in order (you can't "skip ahead" to one partition without reading the previous one).
+   * Guaranteed to return a key-value pair for each partition, in order of partition ID.
+   */
+  private def merge(spills: Seq[SpilledFile], inMemory: Iterator[((Int, K), C)])
+      : Iterator[(Int, Iterator[Product2[K, C]])] = {
+    val readers = spills.map(new SpillReader(_))
+    val inMemBuffered = inMemory.buffered
+    (0 until numPartitions).iterator.map { p =>
+      val inMemIterator = new IteratorForPartition(p, inMemBuffered)
+      val iterators = readers.map(_.readNextPartition()) ++ Seq(inMemIterator)
+      if (aggregator.isDefined) {
+        // Perform partial aggregation across partitions
+        (p, mergeWithAggregation(
+          iterators, aggregator.get.mergeCombiners, keyComparator, ordering.isDefined))
+      } else if (ordering.isDefined) {
+        // No aggregator given, but we have an ordering (e.g. used by reduce tasks in sortByKey);
+        // sort the elements without trying to merge them
+        (p, mergeSort(iterators, ordering.get))
+      } else {
+        (p, iterators.iterator.flatten)
+      }
+    }
+  }
+
+  /**
+   * Merge-sort a sequence of (K, C) iterators using a given a comparator for the keys.
+   */
+  private def mergeSort(iterators: Seq[Iterator[Product2[K, C]]], comparator: Comparator[K])
+      : Iterator[Product2[K, C]] =
+  {
+    val bufferedIters = iterators.filter(_.hasNext).map(_.buffered)
+    type Iter = BufferedIterator[Product2[K, C]]
+    val heap = new mutable.PriorityQueue[Iter]()(new Ordering[Iter] {
+      // Use the reverse of comparator.compare because PriorityQueue dequeues the max
+      override def compare(x: Iter, y: Iter): Int = -comparator.compare(x.head._1, y.head._1)
+    })
+    heap.enqueue(bufferedIters: _*)  // Will contain only the iterators with hasNext = true
+    new Iterator[Product2[K, C]] {
+      override def hasNext: Boolean = !heap.isEmpty
+
+      override def next(): Product2[K, C] = {
+        if (!hasNext) {
+          throw new NoSuchElementException
+        }
+        val firstBuf = heap.dequeue()
+        val firstPair = firstBuf.next()
+        if (firstBuf.hasNext) {
+          heap.enqueue(firstBuf)
+        }
+        firstPair
+      }
+    }
+  }
+
+  /**
+   * Merge a sequence of (K, C) iterators by aggregating values for each key, assuming that each
+   * iterator is sorted by key with a given comparator. If the comparator is not a total ordering
+   * (e.g. when we sort objects by hash code and different keys may compare as equal although
+   * they're not), we still merge them by doing equality tests for all keys that compare as equal.
+   */
+  private def mergeWithAggregation(
+      iterators: Seq[Iterator[Product2[K, C]]],
+      mergeCombiners: (C, C) => C,
+      comparator: Comparator[K],
+      totalOrder: Boolean)
+      : Iterator[Product2[K, C]] =
+  {
+    if (!totalOrder) {
+      // We only have a partial ordering, e.g. comparing the keys by hash code, which means that
+      // multiple distinct keys might be treated as equal by the ordering. To deal with this, we
+      // need to read all keys considered equal by the ordering at once and compare them.
+      new Iterator[Iterator[Product2[K, C]]] {
+        val sorted = mergeSort(iterators, comparator).buffered
+
+        // Buffers reused across elements to decrease memory allocation
+        val keys = new ArrayBuffer[K]
+        val combiners = new ArrayBuffer[C]
+
+        override def hasNext: Boolean = sorted.hasNext
+
+        override def next(): Iterator[Product2[K, C]] = {
+          if (!hasNext) {
+            throw new NoSuchElementException
+          }
+          keys.clear()
+          combiners.clear()
+          val firstPair = sorted.next()
+          keys += firstPair._1
+          combiners += firstPair._2
+          val key = firstPair._1
+          while (sorted.hasNext && comparator.compare(sorted.head._1, key) == 0) {
+            val pair = sorted.next()
+            var i = 0
+            var foundKey = false
+            while (i < keys.size && !foundKey) {
+              if (keys(i) == pair._1) {
+                combiners(i) = mergeCombiners(combiners(i), pair._2)
+                foundKey = true
+              }
+              i += 1
+            }
+            if (!foundKey) {
+              keys += pair._1
+              combiners += pair._2
+            }
+          }
+
+          // Note that we return an iterator of elements since we could've had many keys marked
+          // equal by the partial order; we flatten this below to get a flat iterator of (K, C).
+          keys.iterator.zip(combiners.iterator)
+        }
+      }.flatMap(i => i)
+    } else {
+      // We have a total ordering, so the objects with the same key are sequential.
+      new Iterator[Product2[K, C]] {
+        val sorted = mergeSort(iterators, comparator).buffered
+
+        override def hasNext: Boolean = sorted.hasNext
+
+        override def next(): Product2[K, C] = {
+          if (!hasNext) {
+            throw new NoSuchElementException
+          }
+          val elem = sorted.next()
+          val k = elem._1
+          var c = elem._2
+          while (sorted.hasNext && sorted.head._1 == k) {
+            c = mergeCombiners(c, sorted.head._2)
+          }
+          (k, c)
+        }
+      }
+    }
+  }
+
+  /**
+   * An internal class for reading a spilled file partition by partition. Expects all the
+   * partitions to be requested in order.
+   */
+  private[this] class SpillReader(spill: SpilledFile) {
+    // Serializer batch offsets; size will be batchSize.length + 1
+    val batchOffsets = spill.serializerBatchSizes.scanLeft(0L)(_ + _)
+
+    // Track which partition and which batch stream we're in. These will be the indices of
+    // the next element we will read. We'll also store the last partition read so that
+    // readNextPartition() can figure out what partition that was from.
+    var partitionId = 0
+    var indexInPartition = 0L
+    var batchId = 0
+    var indexInBatch = 0
+    var lastPartitionId = 0
+
+    skipToNextPartition()
+
+    // Intermediate file and deserializer streams that read from exactly one batch
+    // This guards against pre-fetching and other arbitrary behavior of higher level streams
+    var fileStream: FileInputStream = null
+    var deserializeStream = nextBatchStream()  // Also sets fileStream
+
+    var nextItem: (K, C) = null
+    var finished = false
+
+    /** Construct a stream that only reads from the next batch */
+    def nextBatchStream(): DeserializationStream = {
+      // Note that batchOffsets.length = numBatches + 1 since we did a scan above; check whether
+      // we're still in a valid batch.
+      if (batchId < batchOffsets.length - 1) {
+        if (deserializeStream != null) {
+          deserializeStream.close()
+          fileStream.close()
+          deserializeStream = null
+          fileStream = null
+        }
+
+        val start = batchOffsets(batchId)
+        fileStream = new FileInputStream(spill.file)
+        fileStream.getChannel.position(start)
+        batchId += 1
+
+        val end = batchOffsets(batchId)
+
+        assert(end >= start, "start = " + start + ", end = " + end +
+          ", batchOffsets = " + batchOffsets.mkString("[", ", ", "]"))
+
+        val bufferedStream = new BufferedInputStream(ByteStreams.limit(fileStream, end - start))
+        val compressedStream = blockManager.wrapForCompression(spill.blockId, bufferedStream)
+        serInstance.deserializeStream(compressedStream)
+      } else {
+        // No more batches left
+        cleanup()
+        null
+      }
+    }
+
+    /**
+     * Update partitionId if we have reached the end of our current partition, possibly skipping
+     * empty partitions on the way.
+     */
+    private def skipToNextPartition() {
+      while (partitionId < numPartitions &&
+          indexInPartition == spill.elementsPerPartition(partitionId)) {
+        partitionId += 1
+        indexInPartition = 0L
+      }
+    }
+
+    /**
+     * Return the next (K, C) pair from the deserialization stream and update partitionId,
+     * indexInPartition, indexInBatch and such to match its location.
+     *
+     * If the current batch is drained, construct a stream for the next batch and read from it.
+     * If no more pairs are left, return null.
+     */
+    private def readNextItem(): (K, C) = {
+      if (finished || deserializeStream == null) {
+        return null
+      }
+      val k = deserializeStream.readObject().asInstanceOf[K]
+      val c = deserializeStream.readObject().asInstanceOf[C]
+      lastPartitionId = partitionId
+      // Start reading the next batch if we're done with this one
+      indexInBatch += 1
+      if (indexInBatch == serializerBatchSize) {
+        indexInBatch = 0
+        deserializeStream = nextBatchStream()
+      }
+      // Update the partition location of the element we're reading
+      indexInPartition += 1
+      skipToNextPartition()
+      // If we've finished reading the last partition, remember that we're done
+      if (partitionId == numPartitions) {
+        finished = true
+        if (deserializeStream != null) {
+          deserializeStream.close()
+        }
+      }
+      (k, c)
+    }
+
+    var nextPartitionToRead = 0
+
+    def readNextPartition(): Iterator[Product2[K, C]] = new Iterator[Product2[K, C]] {
+      val myPartition = nextPartitionToRead
+      nextPartitionToRead += 1
+
+      override def hasNext: Boolean = {
+        if (nextItem == null) {
+          nextItem = readNextItem()
+          if (nextItem == null) {
+            return false
+          }
+        }
+        assert(lastPartitionId >= myPartition)
+        // Check that we're still in the right partition; note that readNextItem will have returned
+        // null at EOF above so we would've returned false there
+        lastPartitionId == myPartition
+      }
+
+      override def next(): Product2[K, C] = {
+        if (!hasNext) {
+          throw new NoSuchElementException
+        }
+        val item = nextItem
+        nextItem = null
+        item
+      }
+    }
+
+    // Clean up our open streams and put us in a state where we can't read any more data
+    def cleanup() {
+      batchId = batchOffsets.length  // Prevent reading any other batch
+      val ds = deserializeStream
+      deserializeStream = null
+      fileStream = null
+      ds.close()
+      // NOTE: We don't do file.delete() here because that is done in ExternalSorter.stop().
+      // This should also be fixed in ExternalAppendOnlyMap.
+    }
+  }
+
+  /**
+   * Return an iterator over all the data written to this object, grouped by partition and
+   * aggregated by the requested aggregator. For each partition we then have an iterator over its
+   * contents, and these are expected to be accessed in order (you can't "skip ahead" to one
+   * partition without reading the previous one). Guaranteed to return a key-value pair for each
+   * partition, in order of partition ID.
+   *
+   * For now, we just merge all the spilled files in once pass, but this can be modified to
+   * support hierarchical merging.
+   */
+  def partitionedIterator: Iterator[(Int, Iterator[Product2[K, C]])] = {
+    val usingMap = aggregator.isDefined
+    val collection: SizeTrackingPairCollection[(Int, K), C] = if (usingMap) map else buffer
+    if (spills.isEmpty && partitionWriters == null) {
+      // Special case: if we have only in-memory data, we don't need to merge streams, and perhaps
+      // we don't even need to sort by anything other than partition ID
+      if (!ordering.isDefined) {
+        // The user hasn't requested sorted keys, so only sort by partition ID, not key
+        groupByPartition(collection.destructiveSortedIterator(partitionComparator))
+      } else {
+        // We do need to sort by both partition ID and key
+        groupByPartition(collection.destructiveSortedIterator(partitionKeyComparator))
+      }
+    } else if (bypassMergeSort) {
+      // Read data from each partition file and merge it together with the data in memory;
+      // note that there's no ordering or aggregator in this case -- we just partition objects
+      val collIter = groupByPartition(collection.destructiveSortedIterator(partitionComparator))
+      collIter.map { case (partitionId, values) =>
+        (partitionId, values ++ readPartitionFile(partitionWriters(partitionId)))
+      }
+    } else {
+      // Merge spilled and in-memory data
+      merge(spills, collection.destructiveSortedIterator(partitionKeyComparator))
+    }
+  }
+
+  /**
+   * Return an iterator over all the data written to this object, aggregated by our aggregator.
+   */
+  def iterator: Iterator[Product2[K, C]] = partitionedIterator.flatMap(pair => pair._2)
+
+  /**
+   * Write all the data added into this ExternalSorter into a file in the disk store. This is
+   * called by the SortShuffleWriter and can go through an efficient path of just concatenating
+   * binary files if we decided to avoid merge-sorting.
+   *
+   * @param blockId block ID to write to. The index file will be blockId.name + ".index".
+   * @param context a TaskContext for a running Spark task, for us to update shuffle metrics.
+   * @return array of lengths, in bytes, of each partition of the file (used by map output tracker)
+   */
+  def writePartitionedFile(
+      blockId: BlockId,
+      context: TaskContext,
+      outputFile: File): Array[Long] = {
+
+    // Track location of each range in the output file
+    val lengths = new Array[Long](numPartitions)
+
+    if (bypassMergeSort && partitionWriters != null) {
+      // We decided to write separate files for each partition, so just concatenate them. To keep
+      // this simple we spill out the current in-memory collection so that everything is in files.
+      spillToPartitionFiles(if (aggregator.isDefined) map else buffer)
+      partitionWriters.foreach(_.commitAndClose())
+      var out: FileOutputStream = null
+      var in: FileInputStream = null
+      try {
+        out = new FileOutputStream(outputFile, true)
+        for (i <- 0 until numPartitions) {
+          in = new FileInputStream(partitionWriters(i).fileSegment().file)
+          val size = org.apache.spark.util.Utils.copyStream(in, out, false, transferToEnabled)
+          in.close()
+          in = null
+          lengths(i) = size
+        }
+      } finally {
+        if (out != null) {
+          out.close()
+        }
+        if (in != null) {
+          in.close()
+        }
+      }
+    } else {
+      // Either we're not bypassing merge-sort or we have only in-memory data; get an iterator by
+      // partition and just write everything directly.
+      for ((id, elements) <- this.partitionedIterator) {
+        if (elements.hasNext) {
+          val writer = blockManager.getDiskWriter(
+            blockId, outputFile, ser, fileBufferSize, context.taskMetrics.shuffleWriteMetrics.get)
+          for (elem <- elements) {
+            writer.write(elem)
+          }
+          writer.commitAndClose()
+          val segment = writer.fileSegment()
+          lengths(id) = segment.length
+        }
+      }
+    }
+
+    context.taskMetrics.memoryBytesSpilled += memoryBytesSpilled
+    context.taskMetrics.diskBytesSpilled += diskBytesSpilled
+
+    lengths
+  }
+
+  /**
+   * Read a partition file back as an iterator (used in our iterator method)
+   */
+  def readPartitionFile(writer: BlockObjectWriter): Iterator[Product2[K, C]] = {
+    if (writer.isOpen) {
+      writer.commitAndClose()
+    }
+    blockManager.diskStore.getValues(writer.blockId, ser).get.asInstanceOf[Iterator[Product2[K, C]]]
+  }
+
+  def stop(): Unit = {
+    spills.foreach(s => s.file.delete())
+    spills.clear()
+    if (partitionWriters != null) {
+      partitionWriters.foreach { w =>
+        w.revertPartialWritesAndClose()
+        diskBlockManager.getFile(w.blockId).delete()
+      }
+      partitionWriters = null
+    }
+  }
+
+  def diskBytesSpilled: Long = _diskBytesSpilled
+
+  /**
+   * Given a stream of ((partition, key), combiner) pairs *assumed to be sorted by partition ID*,
+   * group together the pairs for each partition into a sub-iterator.
+   *
+   * @param data an iterator of elements, assumed to already be sorted by partition ID
+   */
+  private def groupByPartition(data: Iterator[((Int, K), C)])
+      : Iterator[(Int, Iterator[Product2[K, C]])] =
+  {
+    val buffered = data.buffered
+    (0 until numPartitions).iterator.map(p => (p, new IteratorForPartition(p, buffered)))
+  }
+
+  /**
+   * An iterator that reads only the elements for a given partition ID from an underlying buffered
+   * stream, assuming this partition is the next one to be read. Used to make it easier to return
+   * partitioned iterators from our in-memory collection.
+   */
+  private[this] class IteratorForPartition(partitionId: Int, data: BufferedIterator[((Int, K), C)])
+    extends Iterator[Product2[K, C]]
+  {
+    override def hasNext: Boolean = data.hasNext && data.head._1._1 == partitionId
+
+    override def next(): Product2[K, C] = {
+      if (!hasNext) {
+        throw new NoSuchElementException
+      }
+      val elem = data.next()
+      (elem._1._2, elem._2)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
index b84eb65c62bc7..7e76d060d6000 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
@@ -36,7 +36,7 @@ class PrimitiveVector[@specialized(Long, Int, Double) V: ClassTag](initialSize:
     _array(index)
   }
 
-  def +=(value: V) {
+  def +=(value: V): Unit = {
     if (_numElements == _array.length) {
       resize(_array.length * 2)
     }
@@ -50,6 +50,19 @@ class PrimitiveVector[@specialized(Long, Int, Double) V: ClassTag](initialSize:
 
   def size: Int = _numElements
 
+  def iterator: Iterator[V] = new Iterator[V] {
+    var index = 0
+    override def hasNext: Boolean = index < _numElements
+    override def next(): V = {
+      if (!hasNext) {
+        throw new NoSuchElementException
+      }
+      val value = _array(index)
+      index += 1
+      value
+    }
+  }
+
   /** Gets the underlying array backing this vector. */
   def array: Array[V] = _array
 
diff --git a/core/src/main/scala/org/apache/spark/util/collection/SizeTracker.scala b/core/src/main/scala/org/apache/spark/util/collection/SizeTracker.scala
new file mode 100644
index 0000000000000..3eb1010dc1e8d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/SizeTracker.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import scala.collection.mutable
+
+import org.apache.spark.util.SizeEstimator
+
+/**
+ * A general interface for collections to keep track of their estimated sizes in bytes.
+ * We sample with a slow exponential back-off using the SizeEstimator to amortize the time,
+ * as each call to SizeEstimator is somewhat expensive (order of a few milliseconds).
+ */
+private[spark] trait SizeTracker {
+
+  import SizeTracker._
+
+  /**
+   * Controls the base of the exponential which governs the rate of sampling.
+   * E.g., a value of 2 would mean we sample at 1, 2, 4, 8, ... elements.
+   */
+  private val SAMPLE_GROWTH_RATE = 1.1
+
+  /** Samples taken since last resetSamples(). Only the last two are kept for extrapolation. */
+  private val samples = new mutable.Queue[Sample]
+
+  /** The average number of bytes per update between our last two samples. */
+  private var bytesPerUpdate: Double = _
+
+  /** Total number of insertions and updates into the map since the last resetSamples(). */
+  private var numUpdates: Long = _
+
+  /** The value of 'numUpdates' at which we will take our next sample. */
+  private var nextSampleNum: Long = _
+
+  resetSamples()
+
+  /**
+   * Reset samples collected so far.
+   * This should be called after the collection undergoes a dramatic change in size.
+   */
+  protected def resetSamples(): Unit = {
+    numUpdates = 1
+    nextSampleNum = 1
+    samples.clear()
+    takeSample()
+  }
+
+  /**
+   * Callback to be invoked after every update.
+   */
+  protected def afterUpdate(): Unit = {
+    numUpdates += 1
+    if (nextSampleNum == numUpdates) {
+      takeSample()
+    }
+  }
+
+  /**
+   * Take a new sample of the current collection's size.
+   */
+  private def takeSample(): Unit = {
+    samples.enqueue(Sample(SizeEstimator.estimate(this), numUpdates))
+    // Only use the last two samples to extrapolate
+    if (samples.size > 2) {
+      samples.dequeue()
+    }
+    val bytesDelta = samples.toList.reverse match {
+      case latest :: previous :: tail =>
+        (latest.size - previous.size).toDouble / (latest.numUpdates - previous.numUpdates)
+      // If fewer than 2 samples, assume no change
+      case _ => 0
+    }
+    bytesPerUpdate = math.max(0, bytesDelta)
+    nextSampleNum = math.ceil(numUpdates * SAMPLE_GROWTH_RATE).toLong
+  }
+
+  /**
+   * Estimate the current size of the collection in bytes. O(1) time.
+   */
+  def estimateSize(): Long = {
+    assert(samples.nonEmpty)
+    val extrapolatedDelta = bytesPerUpdate * (numUpdates - samples.last.numUpdates)
+    (samples.last.size + extrapolatedDelta).toLong
+  }
+}
+
+private object SizeTracker {
+  case class Sample(size: Long, numUpdates: Long)
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingAppendOnlyMap.scala
index 204330dad48b9..eb4de413867a0 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingAppendOnlyMap.scala
@@ -17,85 +17,25 @@
 
 package org.apache.spark.util.collection
 
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.util.SizeEstimator
-import org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.Sample
-
 /**
- * Append-only map that keeps track of its estimated size in bytes.
- * We sample with a slow exponential back-off using the SizeEstimator to amortize the time,
- * as each call to SizeEstimator can take a sizable amount of time (order of a few milliseconds).
+ * An append-only map that keeps track of its estimated size in bytes.
  */
-private[spark] class SizeTrackingAppendOnlyMap[K, V] extends AppendOnlyMap[K, V] {
-
-  /**
-   * Controls the base of the exponential which governs the rate of sampling.
-   * E.g., a value of 2 would mean we sample at 1, 2, 4, 8, ... elements.
-   */
-  private val SAMPLE_GROWTH_RATE = 1.1
-
-  /** All samples taken since last resetSamples(). Only the last two are used for extrapolation. */
-  private val samples = new ArrayBuffer[Sample]()
-
-  /** Total number of insertions and updates into the map since the last resetSamples(). */
-  private var numUpdates: Long = _
-
-  /** The value of 'numUpdates' at which we will take our next sample. */
-  private var nextSampleNum: Long = _
-
-  /** The average number of bytes per update between our last two samples. */
-  private var bytesPerUpdate: Double = _
-
-  resetSamples()
-
-  /** Called after the map grows in size, as this can be a dramatic change for small objects. */
-  def resetSamples() {
-    numUpdates = 1
-    nextSampleNum = 1
-    samples.clear()
-    takeSample()
-  }
-
+private[spark] class SizeTrackingAppendOnlyMap[K, V]
+  extends AppendOnlyMap[K, V] with SizeTracker with SizeTrackingPairCollection[K, V]
+{
   override def update(key: K, value: V): Unit = {
     super.update(key, value)
-    numUpdates += 1
-    if (nextSampleNum == numUpdates) { takeSample() }
+    super.afterUpdate()
   }
 
   override def changeValue(key: K, updateFunc: (Boolean, V) => V): V = {
     val newValue = super.changeValue(key, updateFunc)
-    numUpdates += 1
-    if (nextSampleNum == numUpdates) { takeSample() }
+    super.afterUpdate()
     newValue
   }
 
-  /** Takes a new sample of the current map's size. */
-  def takeSample() {
-    samples += Sample(SizeEstimator.estimate(this), numUpdates)
-    // Only use the last two samples to extrapolate. If fewer than 2 samples, assume no change.
-    bytesPerUpdate = math.max(0, samples.toSeq.reverse match {
-      case latest :: previous :: tail =>
-        (latest.size - previous.size).toDouble / (latest.numUpdates - previous.numUpdates)
-      case _ =>
-        0
-    })
-    nextSampleNum = math.ceil(numUpdates * SAMPLE_GROWTH_RATE).toLong
-  }
-
-  override protected def growTable() {
+  override protected def growTable(): Unit = {
     super.growTable()
     resetSamples()
   }
-
-  /** Estimates the current size of the map in bytes. O(1) time. */
-  def estimateSize(): Long = {
-    assert(samples.nonEmpty)
-    val extrapolatedDelta = bytesPerUpdate * (numUpdates - samples.last.numUpdates)
-    (samples.last.size + extrapolatedDelta).toLong
-  }
-}
-
-private object SizeTrackingAppendOnlyMap {
-  case class Sample(size: Long, numUpdates: Long)
 }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingPairBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingPairBuffer.scala
new file mode 100644
index 0000000000000..9e9c16c5a2962
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingPairBuffer.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import java.util.Comparator
+
+/**
+ * Append-only buffer of key-value pairs that keeps track of its estimated size in bytes.
+ */
+private[spark] class SizeTrackingPairBuffer[K, V](initialCapacity: Int = 64)
+  extends SizeTracker with SizeTrackingPairCollection[K, V]
+{
+  require(initialCapacity <= (1 << 29), "Can't make capacity bigger than 2^29 elements")
+  require(initialCapacity >= 1, "Invalid initial capacity")
+
+  // Basic growable array data structure. We use a single array of AnyRef to hold both the keys
+  // and the values, so that we can sort them efficiently with KVArraySortDataFormat.
+  private var capacity = initialCapacity
+  private var curSize = 0
+  private var data = new Array[AnyRef](2 * initialCapacity)
+
+  /** Add an element into the buffer */
+  def insert(key: K, value: V): Unit = {
+    if (curSize == capacity) {
+      growArray()
+    }
+    data(2 * curSize) = key.asInstanceOf[AnyRef]
+    data(2 * curSize + 1) = value.asInstanceOf[AnyRef]
+    curSize += 1
+    afterUpdate()
+  }
+
+  /** Total number of elements in buffer */
+  override def size: Int = curSize
+
+  /** Iterate over the elements of the buffer */
+  override def iterator: Iterator[(K, V)] = new Iterator[(K, V)] {
+    var pos = 0
+
+    override def hasNext: Boolean = pos < curSize
+
+    override def next(): (K, V) = {
+      if (!hasNext) {
+        throw new NoSuchElementException
+      }
+      val pair = (data(2 * pos).asInstanceOf[K], data(2 * pos + 1).asInstanceOf[V])
+      pos += 1
+      pair
+    }
+  }
+
+  /** Double the size of the array because we've reached capacity */
+  private def growArray(): Unit = {
+    if (capacity == (1 << 29)) {
+      // Doubling the capacity would create an array bigger than Int.MaxValue, so don't
+      throw new Exception("Can't grow buffer beyond 2^29 elements")
+    }
+    val newCapacity = capacity * 2
+    val newArray = new Array[AnyRef](2 * newCapacity)
+    System.arraycopy(data, 0, newArray, 0, 2 * capacity)
+    data = newArray
+    capacity = newCapacity
+    resetSamples()
+  }
+
+  /** Iterate through the data in a given order. For this class this is not really destructive. */
+  override def destructiveSortedIterator(keyComparator: Comparator[K]): Iterator[(K, V)] = {
+    new Sorter(new KVArraySortDataFormat[K, AnyRef]).sort(data, 0, curSize, keyComparator)
+    iterator
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingPairCollection.scala b/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingPairCollection.scala
new file mode 100644
index 0000000000000..faa4e2b12ddb6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingPairCollection.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import java.util.Comparator
+
+/**
+ * A common interface for our size-tracking collections of key-value pairs, which are used in
+ * external operations. These all support estimating the size and obtaining a memory-efficient
+ * sorted iterator.
+ */
+// TODO: should extend Iterable[Product2[K, V]] instead of (K, V)
+private[spark] trait SizeTrackingPairCollection[K, V] extends Iterable[(K, V)] {
+  /** Estimate the collection's current memory usage in bytes. */
+  def estimateSize(): Long
+
+  /** Iterate through the data in a given key order. This may destroy the underlying collection. */
+  def destructiveSortedIterator(keyComparator: Comparator[K]): Iterator[(K, V)]
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingVector.scala b/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingVector.scala
new file mode 100644
index 0000000000000..65a7b4e0d497b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingVector.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import scala.reflect.ClassTag
+
+/**
+ * An append-only buffer that keeps track of its estimated size in bytes.
+ */
+private[spark] class SizeTrackingVector[T: ClassTag]
+  extends PrimitiveVector[T]
+  with SizeTracker {
+
+  override def +=(value: T): Unit = {
+    super.+=(value)
+    super.afterUpdate()
+  }
+
+  override def resize(newLength: Int): PrimitiveVector[T] = {
+    super.resize(newLength)
+    resetSamples()
+    this
+  }
+
+  /**
+   * Return a trimmed version of the underlying array.
+   */
+  def toArray: Array[T] = {
+    super.iterator.toArray
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala b/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala
new file mode 100644
index 0000000000000..4f0bf8384afc9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import scala.reflect.ClassTag
+
+/**
+ * Abstraction for sorting an arbitrary input buffer of data. This interface requires determining
+ * the sort key for a given element index, as well as swapping elements and moving data from one
+ * buffer to another.
+ *
+ * Example format: an array of numbers, where each element is also the key.
+ * See [[KVArraySortDataFormat]] for a more exciting format.
+ *
+ * Note: Declaring and instantiating multiple subclasses of this class would prevent JIT inlining
+ * overridden methods and hence decrease the shuffle performance.
+ *
+ * @tparam K Type of the sort key of each element
+ * @tparam Buffer Internal data structure used by a particular format (e.g., Array[Int]).
+ */
+// TODO: Making Buffer a real trait would be a better abstraction, but adds some complexity.
+private[spark]
+abstract class SortDataFormat[K, Buffer] {
+
+  /**
+   * Creates a new mutable key for reuse. This should be implemented if you want to override
+   * [[getKey(Buffer, Int, K)]].
+   */
+  def newKey(): K = null.asInstanceOf[K]
+
+  /** Return the sort key for the element at the given index. */
+  protected def getKey(data: Buffer, pos: Int): K
+
+  /**
+   * Returns the sort key for the element at the given index and reuse the input key if possible.
+   * The default implementation ignores the reuse parameter and invokes [[getKey(Buffer, Int]].
+   * If you want to override this method, you must implement [[newKey()]].
+   */
+  def getKey(data: Buffer, pos: Int, reuse: K): K = {
+    getKey(data, pos)
+  }
+
+  /** Swap two elements. */
+  def swap(data: Buffer, pos0: Int, pos1: Int): Unit
+
+  /** Copy a single element from src(srcPos) to dst(dstPos). */
+  def copyElement(src: Buffer, srcPos: Int, dst: Buffer, dstPos: Int): Unit
+
+  /**
+   * Copy a range of elements starting at src(srcPos) to dst, starting at dstPos.
+   * Overlapping ranges are allowed.
+   */
+  def copyRange(src: Buffer, srcPos: Int, dst: Buffer, dstPos: Int, length: Int): Unit
+
+  /**
+   * Allocates a Buffer that can hold up to 'length' elements.
+   * All elements of the buffer should be considered invalid until data is explicitly copied in.
+   */
+  def allocate(length: Int): Buffer
+}
+
+/**
+ * Supports sorting an array of key-value pairs where the elements of the array alternate between
+ * keys and values, as used in [[AppendOnlyMap]].
+ *
+ * @tparam K Type of the sort key of each element
+ * @tparam T Type of the Array we're sorting. Typically this must extend AnyRef, to support cases
+ *           when the keys and values are not the same type.
+ */
+private[spark]
+class KVArraySortDataFormat[K, T <: AnyRef : ClassTag] extends SortDataFormat[K, Array[T]] {
+
+  override def getKey(data: Array[T], pos: Int): K = data(2 * pos).asInstanceOf[K]
+
+  override def swap(data: Array[T], pos0: Int, pos1: Int) {
+    val tmpKey = data(2 * pos0)
+    val tmpVal = data(2 * pos0 + 1)
+    data(2 * pos0)     = data(2 * pos1)
+    data(2 * pos0 + 1) = data(2 * pos1 + 1)
+    data(2 * pos1)     = tmpKey
+    data(2 * pos1 + 1) = tmpVal
+  }
+
+  override def copyElement(src: Array[T], srcPos: Int, dst: Array[T], dstPos: Int) {
+    dst(2 * dstPos) = src(2 * srcPos)
+    dst(2 * dstPos + 1) = src(2 * srcPos + 1)
+  }
+
+  override def copyRange(src: Array[T], srcPos: Int, dst: Array[T], dstPos: Int, length: Int) {
+    System.arraycopy(src, 2 * srcPos, dst, 2 * dstPos, 2 * length)
+  }
+
+  override def allocate(length: Int): Array[T] = {
+    new Array[T](2 * length)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/Sorter.scala b/core/src/main/scala/org/apache/spark/util/collection/Sorter.scala
new file mode 100644
index 0000000000000..39f66b8c428c6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/Sorter.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import java.util.Comparator
+
+/**
+ * A simple wrapper over the Java implementation [[TimSort]].
+ *
+ * The Java implementation is package private, and hence it cannot be called outside package
+ * org.apache.spark.util.collection. This is a simple wrapper of it that is available to spark.
+ */
+private[spark]
+class Sorter[K, Buffer](private val s: SortDataFormat[K, Buffer]) {
+
+  private val timSort = new TimSort(s)
+
+  /**
+   * Sorts the input buffer within range [lo, hi).
+   */
+  def sort(a: Buffer, lo: Int, hi: Int, c: Comparator[_ >: K]): Unit = {
+    timSort.sort(a, lo, hi, c)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
new file mode 100644
index 0000000000000..0e4c6d633a4a9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import org.apache.spark.Logging
+import org.apache.spark.SparkEnv
+
+/**
+ * Spills contents of an in-memory collection to disk when the memory threshold
+ * has been exceeded.
+ */
+private[spark] trait Spillable[C] {
+
+  this: Logging =>
+
+  /**
+   * Spills the current in-memory collection to disk, and releases the memory.
+   *
+   * @param collection collection to spill to disk
+   */
+  protected def spill(collection: C): Unit
+
+  // Number of elements read from input since last spill
+  protected var elementsRead: Long
+
+  // Memory manager that can be used to acquire/release memory
+  private[this] val shuffleMemoryManager = SparkEnv.get.shuffleMemoryManager
+
+  // What threshold of elementsRead we start estimating collection size at
+  private[this] val trackMemoryThreshold = 1000
+
+  // How much of the shared memory pool this collection has claimed
+  private[this] var myMemoryThreshold = 0L
+
+  // Number of bytes spilled in total
+  private[this] var _memoryBytesSpilled = 0L
+
+  // Number of spills
+  private[this] var _spillCount = 0
+
+  /**
+   * Spills the current in-memory collection to disk if needed. Attempts to acquire more
+   * memory before spilling.
+   *
+   * @param collection collection to spill to disk
+   * @param currentMemory estimated size of the collection in bytes
+   * @return true if `collection` was spilled to disk; false otherwise
+   */
+  protected def maybeSpill(collection: C, currentMemory: Long): Boolean = {
+    if (elementsRead > trackMemoryThreshold && elementsRead % 32 == 0 &&
+        currentMemory >= myMemoryThreshold) {
+      // Claim up to double our current memory from the shuffle memory pool
+      val amountToRequest = 2 * currentMemory - myMemoryThreshold
+      val granted = shuffleMemoryManager.tryToAcquire(amountToRequest)
+      myMemoryThreshold += granted
+      if (myMemoryThreshold <= currentMemory) {
+        // We were granted too little memory to grow further (either tryToAcquire returned 0,
+        // or we already had more memory than myMemoryThreshold); spill the current collection
+        _spillCount += 1
+        logSpillage(currentMemory)
+
+        spill(collection)
+
+        // Keep track of spills, and release memory
+        _memoryBytesSpilled += currentMemory
+        releaseMemoryForThisThread()
+        return true
+      }
+    }
+    false
+  }
+
+  /**
+   * @return number of bytes spilled in total
+   */
+  def memoryBytesSpilled: Long = _memoryBytesSpilled
+
+  /**
+   * Release our memory back to the shuffle pool so that other threads can grab it.
+   */
+  private def releaseMemoryForThisThread(): Unit = {
+    shuffleMemoryManager.release(myMemoryThreshold)
+    myMemoryThreshold = 0L
+  }
+
+  /**
+   * Prints a standard log message detailing spillage.
+   *
+   * @param size number of bytes spilled
+   */
+  @inline private def logSpillage(size: Long) {
+    val threadId = Thread.currentThread().getId
+    logInfo("Thread %d spilling in-memory map of %s to disk (%d time%s so far)"
+        .format(threadId, org.apache.spark.util.Utils.bytesToString(size),
+            _spillCount, if (_spillCount > 1) "s" else ""))
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
new file mode 100644
index 0000000000000..daac6f971eb20
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.io
+
+import java.io.OutputStream
+
+import scala.collection.mutable.ArrayBuffer
+
+
+/**
+ * An OutputStream that writes to fixed-size chunks of byte arrays.
+ *
+ * @param chunkSize size of each chunk, in bytes.
+ */
+private[spark]
+class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
+
+  private val chunks = new ArrayBuffer[Array[Byte]]
+
+  /** Index of the last chunk. Starting with -1 when the chunks array is empty. */
+  private var lastChunkIndex = -1
+
+  /**
+   * Next position to write in the last chunk.
+   *
+   * If this equals chunkSize, it means for next write we need to allocate a new chunk.
+   * This can also never be 0.
+   */
+  private var position = chunkSize
+
+  override def write(b: Int): Unit = {
+    allocateNewChunkIfNeeded()
+    chunks(lastChunkIndex)(position) = b.toByte
+    position += 1
+  }
+
+  override def write(bytes: Array[Byte], off: Int, len: Int): Unit = {
+    var written = 0
+    while (written < len) {
+      allocateNewChunkIfNeeded()
+      val thisBatch = math.min(chunkSize - position, len - written)
+      System.arraycopy(bytes, written + off, chunks(lastChunkIndex), position, thisBatch)
+      written += thisBatch
+      position += thisBatch
+    }
+  }
+
+  @inline
+  private def allocateNewChunkIfNeeded(): Unit = {
+    if (position == chunkSize) {
+      chunks += new Array[Byte](chunkSize)
+      lastChunkIndex += 1
+      position = 0
+    }
+  }
+
+  def toArrays: Array[Array[Byte]] = {
+    if (lastChunkIndex == -1) {
+      new Array[Array[Byte]](0)
+    } else {
+      // Copy the first n-1 chunks to the output, and then create an array that fits the last chunk.
+      // An alternative would have been returning an array of ByteBuffers, with the last buffer
+      // bounded to only the last chunk's position. However, given our use case in Spark (to put
+      // the chunks in block manager), only limiting the view bound of the buffer would still
+      // require the block manager to store the whole chunk.
+      val ret = new Array[Array[Byte]](chunks.size)
+      for (i <- 0 until chunks.size - 1) {
+        ret(i) = chunks(i)
+      }
+      if (position == chunkSize) {
+        ret(lastChunkIndex) = chunks(lastChunkIndex)
+      } else {
+        ret(lastChunkIndex) = new Array[Byte](position)
+        System.arraycopy(chunks(lastChunkIndex), 0, ret(lastChunkIndex), 0, position)
+      }
+      ret
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
index 32c5fdad75e58..76e7a2760bcd1 100644
--- a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
@@ -19,8 +19,10 @@ package org.apache.spark.util.random
 
 import java.util.Random
 
-import cern.jet.random.Poisson
-import cern.jet.random.engine.DRand
+import scala.reflect.ClassTag
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.commons.math3.distribution.PoissonDistribution
 
 import org.apache.spark.annotation.DeveloperApi
 
@@ -39,13 +41,47 @@ trait RandomSampler[T, U] extends Pseudorandom with Cloneable with Serializable
   /** take a random sample */
   def sample(items: Iterator[T]): Iterator[U]
 
+  /** return a copy of the RandomSampler object */
   override def clone: RandomSampler[T, U] =
     throw new NotImplementedError("clone() is not implemented.")
 }
 
+private[spark]
+object RandomSampler {
+  /** Default random number generator used by random samplers. */
+  def newDefaultRNG: Random = new XORShiftRandom
+
+  /**
+   * Default maximum gap-sampling fraction.
+   * For sampling fractions <= this value, the gap sampling optimization will be applied.
+   * Above this value, it is assumed that "tradtional" Bernoulli sampling is faster.  The
+   * optimal value for this will depend on the RNG.  More expensive RNGs will tend to make
+   * the optimal value higher.  The most reliable way to determine this value for a new RNG
+   * is to experiment.  When tuning for a new RNG, I would expect a value of 0.5 to be close
+   * in most cases, as an initial guess.
+   */
+  val defaultMaxGapSamplingFraction = 0.4
+
+  /**
+   * Default epsilon for floating point numbers sampled from the RNG.
+   * The gap-sampling compute logic requires taking log(x), where x is sampled from an RNG.
+   * To guard against errors from taking log(0), a positive epsilon lower bound is applied.
+   * A good value for this parameter is at or near the minimum positive floating
+   * point value returned by "nextDouble()" (or equivalent), for the RNG being used.
+   */
+  val rngEpsilon = 5e-11
+
+  /**
+   * Sampling fraction arguments may be results of computation, and subject to floating
+   * point jitter.  I check the arguments with this epsilon slop factor to prevent spurious
+   * warnings for cases such as summing some numbers to get a sampling fraction of 1.000000001
+   */
+  val roundingEpsilon = 1e-6
+}
+
 /**
  * :: DeveloperApi ::
- * A sampler based on Bernoulli trials.
+ * A sampler based on Bernoulli trials for partitioning a data sequence.
  *
  * @param lb lower bound of the acceptance range
  * @param ub upper bound of the acceptance range
@@ -53,56 +89,262 @@ trait RandomSampler[T, U] extends Pseudorandom with Cloneable with Serializable
  * @tparam T item type
  */
 @DeveloperApi
-class BernoulliSampler[T](lb: Double, ub: Double, complement: Boolean = false)
+class BernoulliCellSampler[T](lb: Double, ub: Double, complement: Boolean = false)
   extends RandomSampler[T, T] {
 
-  private[random] var rng: Random = new XORShiftRandom
+  /** epsilon slop to avoid failure from floating point jitter. */
+  require(
+    lb <= (ub + RandomSampler.roundingEpsilon),
+    s"Lower bound ($lb) must be <= upper bound ($ub)")
+  require(
+    lb >= (0.0 - RandomSampler.roundingEpsilon),
+    s"Lower bound ($lb) must be >= 0.0")
+  require(
+    ub <= (1.0 + RandomSampler.roundingEpsilon),
+    s"Upper bound ($ub) must be <= 1.0")
 
-  def this(ratio: Double) = this(0.0d, ratio)
+  private val rng: Random = new XORShiftRandom
 
   override def setSeed(seed: Long) = rng.setSeed(seed)
 
   override def sample(items: Iterator[T]): Iterator[T] = {
-    items.filter { item =>
-      val x = rng.nextDouble()
-      (x >= lb && x < ub) ^ complement
+    if (ub - lb <= 0.0) {
+      if (complement) items else Iterator.empty
+    } else {
+      if (complement) {
+        items.filter { item => {
+          val x = rng.nextDouble()
+          (x < lb) || (x >= ub)
+        }}
+      } else {
+        items.filter { item => {
+          val x = rng.nextDouble()
+          (x >= lb) && (x < ub)
+        }}
+      }
     }
   }
 
   /**
    *  Return a sampler that is the complement of the range specified of the current sampler.
    */
-  def cloneComplement(): BernoulliSampler[T] = new BernoulliSampler[T](lb, ub, !complement)
+  def cloneComplement(): BernoulliCellSampler[T] =
+    new BernoulliCellSampler[T](lb, ub, !complement)
+
+  override def clone = new BernoulliCellSampler[T](lb, ub, complement)
+}
+
+
+/**
+ * :: DeveloperApi ::
+ * A sampler based on Bernoulli trials.
+ *
+ * @param fraction the sampling fraction, aka Bernoulli sampling probability
+ * @tparam T item type
+ */
+@DeveloperApi
+class BernoulliSampler[T: ClassTag](fraction: Double) extends RandomSampler[T, T] {
+
+  /** epsilon slop to avoid failure from floating point jitter */
+  require(
+    fraction >= (0.0 - RandomSampler.roundingEpsilon)
+      && fraction <= (1.0 + RandomSampler.roundingEpsilon),
+    s"Sampling fraction ($fraction) must be on interval [0, 1]")
 
-  override def clone = new BernoulliSampler[T](lb, ub, complement)
+  private val rng: Random = RandomSampler.newDefaultRNG
+
+  override def setSeed(seed: Long) = rng.setSeed(seed)
+
+  override def sample(items: Iterator[T]): Iterator[T] = {
+    if (fraction <= 0.0) {
+      Iterator.empty
+    } else if (fraction >= 1.0) {
+      items
+    } else if (fraction <= RandomSampler.defaultMaxGapSamplingFraction) {
+      new GapSamplingIterator(items, fraction, rng, RandomSampler.rngEpsilon)
+    } else {
+      items.filter { _ => rng.nextDouble() <= fraction }
+    }
+  }
+
+  override def clone = new BernoulliSampler[T](fraction)
 }
 
+
 /**
  * :: DeveloperApi ::
- * A sampler based on values drawn from Poisson distribution.
+ * A sampler for sampling with replacement, based on values drawn from Poisson distribution.
  *
- * @param mean Poisson mean
+ * @param fraction the sampling fraction (with replacement)
  * @tparam T item type
  */
 @DeveloperApi
-class PoissonSampler[T](mean: Double) extends RandomSampler[T, T] {
+class PoissonSampler[T: ClassTag](fraction: Double) extends RandomSampler[T, T] {
+
+  /** Epsilon slop to avoid failure from floating point jitter. */
+  require(
+    fraction >= (0.0 - RandomSampler.roundingEpsilon),
+    s"Sampling fraction ($fraction) must be >= 0")
 
-  private[random] var rng = new Poisson(mean, new DRand)
+  // PoissonDistribution throws an exception when fraction <= 0
+  // If fraction is <= 0, Iterator.empty is used below, so we can use any placeholder value.
+  private val rng = new PoissonDistribution(if (fraction > 0.0) fraction else 1.0)
+  private val rngGap = RandomSampler.newDefaultRNG
 
   override def setSeed(seed: Long) {
-    rng = new Poisson(mean, new DRand(seed.toInt))
+    rng.reseedRandomGenerator(seed)
+    rngGap.setSeed(seed)
   }
 
   override def sample(items: Iterator[T]): Iterator[T] = {
-    items.flatMap { item =>
-      val count = rng.nextInt()
-      if (count == 0) {
-        Iterator.empty
-      } else {
-        Iterator.fill(count)(item)
-      }
+    if (fraction <= 0.0) {
+      Iterator.empty
+    } else if (fraction <= RandomSampler.defaultMaxGapSamplingFraction) {
+        new GapSamplingReplacementIterator(items, fraction, rngGap, RandomSampler.rngEpsilon)
+    } else {
+      items.flatMap { item => {
+        val count = rng.sample()
+        if (count == 0) Iterator.empty else Iterator.fill(count)(item)
+      }}
+    }
+  }
+
+  override def clone = new PoissonSampler[T](fraction)
+}
+
+
+private[spark]
+class GapSamplingIterator[T: ClassTag](
+    var data: Iterator[T],
+    f: Double,
+    rng: Random = RandomSampler.newDefaultRNG,
+    epsilon: Double = RandomSampler.rngEpsilon) extends Iterator[T] {
+
+  require(f > 0.0  &&  f < 1.0, s"Sampling fraction ($f) must reside on open interval (0, 1)")
+  require(epsilon > 0.0, s"epsilon ($epsilon) must be > 0")
+
+  /** implement efficient linear-sequence drop until Scala includes fix for jira SI-8835. */
+  private val iterDrop: Int => Unit = {
+    val arrayClass = Array.empty[T].iterator.getClass
+    val arrayBufferClass = ArrayBuffer.empty[T].iterator.getClass
+    data.getClass match {
+      case `arrayClass` => ((n: Int) => { data = data.drop(n) })
+      case `arrayBufferClass` => ((n: Int) => { data = data.drop(n) })
+      case _ => ((n: Int) => {
+          var j = 0
+          while (j < n && data.hasNext) {
+            data.next()
+            j += 1
+          }
+        })
+    }
+  }
+
+  override def hasNext: Boolean = data.hasNext
+
+  override def next(): T = {
+    val r = data.next()
+    advance
+    r
+  }
+
+  private val lnq = math.log1p(-f)
+
+  /** skip elements that won't be sampled, according to geometric dist P(k) = (f)(1-f)^k. */
+  private def advance: Unit = {
+    val u = math.max(rng.nextDouble(), epsilon)
+    val k = (math.log(u) / lnq).toInt
+    iterDrop(k)
+  }
+
+  /** advance to first sample as part of object construction. */
+  advance
+  // Attempting to invoke this closer to the top with other object initialization
+  // was causing it to break in strange ways, so I'm invoking it last, which seems to
+  // work reliably.
+}
+
+private[spark]
+class GapSamplingReplacementIterator[T: ClassTag](
+    var data: Iterator[T],
+    f: Double,
+    rng: Random = RandomSampler.newDefaultRNG,
+    epsilon: Double = RandomSampler.rngEpsilon) extends Iterator[T] {
+
+  require(f > 0.0, s"Sampling fraction ($f) must be > 0")
+  require(epsilon > 0.0, s"epsilon ($epsilon) must be > 0")
+
+  /** implement efficient linear-sequence drop until scala includes fix for jira SI-8835. */
+  private val iterDrop: Int => Unit = {
+    val arrayClass = Array.empty[T].iterator.getClass
+    val arrayBufferClass = ArrayBuffer.empty[T].iterator.getClass
+    data.getClass match {
+      case `arrayClass` => ((n: Int) => { data = data.drop(n) })
+      case `arrayBufferClass` => ((n: Int) => { data = data.drop(n) })
+      case _ => ((n: Int) => {
+          var j = 0
+          while (j < n && data.hasNext) {
+            data.next()
+            j += 1
+          }
+        })
+    }
+  }
+
+  /** current sampling value, and its replication factor, as we are sampling with replacement. */
+  private var v: T = _
+  private var rep: Int = 0
+
+  override def hasNext: Boolean = data.hasNext || rep > 0
+
+  override def next(): T = {
+    val r = v
+    rep -= 1
+    if (rep <= 0) advance
+    r
+  }
+
+  /**
+   * Skip elements with replication factor zero (i.e. elements that won't be sampled).
+   * Samples 'k' from geometric distribution  P(k) = (1-q)(q)^k, where q = e^(-f), that is
+   * q is the probabililty of Poisson(0; f)
+   */
+  private def advance: Unit = {
+    val u = math.max(rng.nextDouble(), epsilon)
+    val k = (math.log(u) / (-f)).toInt
+    iterDrop(k)
+    // set the value and replication factor for the next value
+    if (data.hasNext) {
+      v = data.next()
+      rep = poissonGE1
+    }
+  }
+
+  private val q = math.exp(-f)
+
+  /**
+   * Sample from Poisson distribution, conditioned such that the sampled value is >= 1.
+   * This is an adaptation from the algorithm for Generating Poisson distributed random variables:
+   * http://en.wikipedia.org/wiki/Poisson_distribution
+   */
+  private def poissonGE1: Int = {
+    // simulate that the standard poisson sampling
+    // gave us at least one iteration, for a sample of >= 1
+    var pp = q + ((1.0 - q) * rng.nextDouble())
+    var r = 1
+
+    // now continue with standard poisson sampling algorithm
+    pp *= rng.nextDouble()
+    while (pp > q) {
+      r += 1
+      pp *= rng.nextDouble()
     }
+    r
   }
 
-  override def clone = new PoissonSampler[T](mean)
+  /** advance to first sample as part of object construction. */
+  advance
+  // Attempting to invoke this closer to the top with other object initialization
+  // was causing it to break in strange ways, so I'm invoking it last, which seems to
+  // work reliably.
 }
diff --git a/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala b/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala
index a79e3ee756fc6..c9a864ae62778 100644
--- a/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala
@@ -17,8 +17,54 @@
 
 package org.apache.spark.util.random
 
+import scala.reflect.ClassTag
+import scala.util.Random
+
 private[spark] object SamplingUtils {
 
+  /**
+   * Reservoir sampling implementation that also returns the input size.
+   *
+   * @param input input size
+   * @param k reservoir size
+   * @param seed random seed
+   * @return (samples, input size)
+   */
+  def reservoirSampleAndCount[T: ClassTag](
+      input: Iterator[T],
+      k: Int,
+      seed: Long = Random.nextLong())
+    : (Array[T], Int) = {
+    val reservoir = new Array[T](k)
+    // Put the first k elements in the reservoir.
+    var i = 0
+    while (i < k && input.hasNext) {
+      val item = input.next()
+      reservoir(i) = item
+      i += 1
+    }
+
+    // If we have consumed all the elements, return them. Otherwise do the replacement.
+    if (i < k) {
+      // If input size < k, trim the array to return only an array of input size.
+      val trimReservoir = new Array[T](i)
+      System.arraycopy(reservoir, 0, trimReservoir, 0, i)
+      (trimReservoir, i)
+    } else {
+      // If input size > k, continue the sampling process.
+      val rand = new XORShiftRandom(seed)
+      while (input.hasNext) {
+        val item = input.next()
+        val replacementIndex = rand.nextInt(i)
+        if (replacementIndex < k) {
+          reservoir(replacementIndex) = item
+        }
+        i += 1
+      }
+      (reservoir, i)
+    }
+  }
+
   /**
    * Returns a sampling rate that guarantees a sample of size >= sampleSizeLowerBound 99.99% of
    * the time.
@@ -35,6 +81,9 @@ private[spark] object SamplingUtils {
    *     ~ Binomial(total, fraction) and our choice of q guarantees 1-delta, or 0.9999 success
    *     rate, where success rate is defined the same as in sampling with replacement.
    *
+   * The smallest sampling rate supported is 1e-10 (in order to avoid running into the limit of the
+   * RNG's resolution).
+   *
    * @param sampleSizeLowerBound sample size
    * @param total size of RDD
    * @param withReplacement whether sampling with replacement
@@ -42,14 +91,73 @@ private[spark] object SamplingUtils {
    */
   def computeFractionForSampleSize(sampleSizeLowerBound: Int, total: Long,
       withReplacement: Boolean): Double = {
-    val fraction = sampleSizeLowerBound.toDouble / total
     if (withReplacement) {
-      val numStDev = if (sampleSizeLowerBound < 12) 9 else 5
-      fraction + numStDev * math.sqrt(fraction / total)
+      PoissonBounds.getUpperBound(sampleSizeLowerBound) / total
     } else {
-      val delta = 1e-4
-      val gamma = - math.log(delta) / total
-      math.min(1, fraction + gamma + math.sqrt(gamma * gamma + 2 * gamma * fraction))
+      val fraction = sampleSizeLowerBound.toDouble / total
+      BinomialBounds.getUpperBound(1e-4, total, fraction)
     }
   }
 }
+
+/**
+ * Utility functions that help us determine bounds on adjusted sampling rate to guarantee exact
+ * sample sizes with high confidence when sampling with replacement.
+ */
+private[spark] object PoissonBounds {
+
+  /**
+   * Returns a lambda such that Pr[X > s] is very small, where X ~ Pois(lambda).
+   */
+  def getLowerBound(s: Double): Double = {
+    math.max(s - numStd(s) * math.sqrt(s), 1e-15)
+  }
+
+  /**
+   * Returns a lambda such that Pr[X < s] is very small, where X ~ Pois(lambda).
+   *
+   * @param s sample size
+   */
+  def getUpperBound(s: Double): Double = {
+    math.max(s + numStd(s) * math.sqrt(s), 1e-10)
+  }
+
+  private def numStd(s: Double): Double = {
+    // TODO: Make it tighter.
+    if (s < 6.0) {
+      12.0
+    } else if (s < 16.0) {
+      9.0
+    } else {
+      6.0
+    }
+  }
+}
+
+/**
+ * Utility functions that help us determine bounds on adjusted sampling rate to guarantee exact
+ * sample size with high confidence when sampling without replacement.
+ */
+private[spark] object BinomialBounds {
+
+  val minSamplingRate = 1e-10
+
+  /**
+   * Returns a threshold `p` such that if we conduct n Bernoulli trials with success rate = `p`,
+   * it is very unlikely to have more than `fraction * n` successes.
+   */
+  def getLowerBound(delta: Double, n: Long, fraction: Double): Double = {
+    val gamma = - math.log(delta) / n * (2.0 / 3.0)
+    fraction + gamma - math.sqrt(gamma * gamma + 3 * gamma * fraction)
+  }
+
+  /**
+   * Returns a threshold `p` such that if we conduct n Bernoulli trials with success rate = `p`,
+   * it is very unlikely to have less than `fraction * n` successes.
+   */
+  def getUpperBound(delta: Double, n: Long, fraction: Double): Double = {
+    val gamma = - math.log(delta) / n
+    math.min(1,
+      math.max(minSamplingRate, fraction + gamma + math.sqrt(gamma * gamma + 2 * gamma * fraction)))
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
new file mode 100644
index 0000000000000..4fa357edd6f07
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.random
+
+import scala.collection.Map
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+import scala.reflect.ClassTag
+
+import org.apache.commons.math3.distribution.PoissonDistribution
+
+import org.apache.spark.Logging
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
+
+/**
+ * Auxiliary functions and data structures for the sampleByKey method in PairRDDFunctions.
+ *
+ * Essentially, when exact sample size is necessary, we make additional passes over the RDD to
+ * compute the exact threshold value to use for each stratum to guarantee exact sample size with
+ * high probability. This is achieved by maintaining a waitlist of size O(log(s)), where s is the
+ * desired sample size for each stratum.
+ *
+ * Like in simple random sampling, we generate a random value for each item from the
+ * uniform  distribution [0.0, 1.0]. All items with values <= min(values of items in the waitlist)
+ * are accepted into the sample instantly. The threshold for instant accept is designed so that
+ * s - numAccepted = O(sqrt(s)), where s is again the desired sample size. Thus, by maintaining a
+ * waitlist size = O(sqrt(s)), we will be able to create a sample of the exact size s by adding
+ * a portion of the waitlist to the set of items that are instantly accepted. The exact threshold
+ * is computed by sorting the values in the waitlist and picking the value at (s - numAccepted).
+ *
+ * Note that since we use the same seed for the RNG when computing the thresholds and the actual
+ * sample, our computed thresholds are guaranteed to produce the desired sample size.
+ *
+ * For more theoretical background on the sampling techniques used here, please refer to
+ * http://jmlr.org/proceedings/papers/v28/meng13a.html
+ */
+
+private[spark] object StratifiedSamplingUtils extends Logging {
+
+  /**
+   * Count the number of items instantly accepted and generate the waitlist for each stratum.
+   *
+   * This is only invoked when exact sample size is required.
+   */
+  def getAcceptanceResults[K, V](rdd: RDD[(K, V)],
+      withReplacement: Boolean,
+      fractions: Map[K, Double],
+      counts: Option[Map[K, Long]],
+      seed: Long): mutable.Map[K, AcceptanceResult] = {
+    val combOp = getCombOp[K]
+    val mappedPartitionRDD = rdd.mapPartitionsWithIndex { case (partition, iter) =>
+      val zeroU: mutable.Map[K, AcceptanceResult] = new mutable.HashMap[K, AcceptanceResult]()
+      val rng = new RandomDataGenerator()
+      rng.reSeed(seed + partition)
+      val seqOp = getSeqOp(withReplacement, fractions, rng, counts)
+      Iterator(iter.aggregate(zeroU)(seqOp, combOp))
+    }
+    mappedPartitionRDD.reduce(combOp)
+  }
+
+  /**
+   * Returns the function used by aggregate to collect sampling statistics for each partition.
+   */
+  def getSeqOp[K, V](withReplacement: Boolean,
+      fractions: Map[K, Double],
+      rng: RandomDataGenerator,
+      counts: Option[Map[K, Long]]):
+    (mutable.Map[K, AcceptanceResult], (K, V)) => mutable.Map[K, AcceptanceResult] = {
+    val delta = 5e-5
+    (result: mutable.Map[K, AcceptanceResult], item: (K, V)) => {
+      val key = item._1
+      val fraction = fractions(key)
+      if (!result.contains(key)) {
+        result += (key -> new AcceptanceResult())
+      }
+      val acceptResult = result(key)
+
+      if (withReplacement) {
+        // compute acceptBound and waitListBound only if they haven't been computed already
+        // since they don't change from iteration to iteration.
+        // TODO change this to the streaming version
+        if (acceptResult.areBoundsEmpty) {
+          val n = counts.get(key)
+          val sampleSize = math.ceil(n * fraction).toLong
+          val lmbd1 = PoissonBounds.getLowerBound(sampleSize)
+          val lmbd2 = PoissonBounds.getUpperBound(sampleSize)
+          acceptResult.acceptBound = lmbd1 / n
+          acceptResult.waitListBound = (lmbd2 - lmbd1) / n
+        }
+        val acceptBound = acceptResult.acceptBound
+        val copiesAccepted = if (acceptBound == 0.0) 0L else rng.nextPoisson(acceptBound)
+        if (copiesAccepted > 0) {
+          acceptResult.numAccepted += copiesAccepted
+        }
+        val copiesWaitlisted = rng.nextPoisson(acceptResult.waitListBound)
+        if (copiesWaitlisted > 0) {
+          acceptResult.waitList ++= ArrayBuffer.fill(copiesWaitlisted)(rng.nextUniform())
+        }
+      } else {
+        // We use the streaming version of the algorithm for sampling without replacement to avoid
+        // using an extra pass over the RDD for computing the count.
+        // Hence, acceptBound and waitListBound change on every iteration.
+        acceptResult.acceptBound =
+          BinomialBounds.getLowerBound(delta, acceptResult.numItems, fraction)
+        acceptResult.waitListBound =
+          BinomialBounds.getUpperBound(delta, acceptResult.numItems, fraction)
+
+        val x = rng.nextUniform()
+        if (x < acceptResult.acceptBound) {
+          acceptResult.numAccepted += 1
+        } else if (x < acceptResult.waitListBound) {
+          acceptResult.waitList += x
+        }
+      }
+      acceptResult.numItems += 1
+      result
+    }
+  }
+
+  /**
+   * Returns the function used combine results returned by seqOp from different partitions.
+   */
+  def getCombOp[K]: (mutable.Map[K, AcceptanceResult], mutable.Map[K, AcceptanceResult])
+    => mutable.Map[K, AcceptanceResult] = {
+    (result1: mutable.Map[K, AcceptanceResult], result2: mutable.Map[K, AcceptanceResult]) => {
+      // take union of both key sets in case one partition doesn't contain all keys
+      result1.keySet.union(result2.keySet).foreach { key =>
+        // Use result2 to keep the combined result since r1 is usual empty
+        val entry1 = result1.get(key)
+        if (result2.contains(key)) {
+          result2(key).merge(entry1)
+        } else {
+          if (entry1.isDefined) {
+            result2 += (key -> entry1.get)
+          }
+        }
+      }
+      result2
+    }
+  }
+
+  /**
+   * Given the result returned by getCounts, determine the threshold for accepting items to
+   * generate exact sample size.
+   *
+   * To do so, we compute sampleSize = math.ceil(size * samplingRate) for each stratum and compare
+   * it to the number of items that were accepted instantly and the number of items in the waitlist
+   * for that stratum. Most of the time, numAccepted <= sampleSize <= (numAccepted + numWaitlisted),
+   * which means we need to sort the elements in the waitlist by their associated values in order
+   * to find the value T s.t. |{elements in the stratum whose associated values <= T}| = sampleSize.
+   * Note that all elements in the waitlist have values >= bound for instant accept, so a T value
+   * in the waitlist range would allow all elements that were instantly accepted on the first pass
+   * to be included in the sample.
+   */
+  def computeThresholdByKey[K](finalResult: Map[K, AcceptanceResult],
+      fractions: Map[K, Double]): Map[K, Double] = {
+    val thresholdByKey = new mutable.HashMap[K, Double]()
+    for ((key, acceptResult) <- finalResult) {
+      val sampleSize = math.ceil(acceptResult.numItems * fractions(key)).toLong
+      if (acceptResult.numAccepted > sampleSize) {
+        logWarning("Pre-accepted too many")
+        thresholdByKey += (key -> acceptResult.acceptBound)
+      } else {
+        val numWaitListAccepted = (sampleSize - acceptResult.numAccepted).toInt
+        if (numWaitListAccepted >= acceptResult.waitList.size) {
+          logWarning("WaitList too short")
+          thresholdByKey += (key -> acceptResult.waitListBound)
+        } else {
+          thresholdByKey += (key -> acceptResult.waitList.sorted.apply(numWaitListAccepted))
+        }
+      }
+    }
+    thresholdByKey
+  }
+
+  /**
+   * Return the per partition sampling function used for sampling without replacement.
+   *
+   * When exact sample size is required, we make an additional pass over the RDD to determine the
+   * exact sampling rate that guarantees sample size with high confidence.
+   *
+   * The sampling function has a unique seed per partition.
+   */
+  def getBernoulliSamplingFunction[K, V](rdd: RDD[(K,  V)],
+      fractions: Map[K, Double],
+      exact: Boolean,
+      seed: Long): (Int, Iterator[(K, V)]) => Iterator[(K, V)] = {
+    var samplingRateByKey = fractions
+    if (exact) {
+      // determine threshold for each stratum and resample
+      val finalResult = getAcceptanceResults(rdd, false, fractions, None, seed)
+      samplingRateByKey = computeThresholdByKey(finalResult, fractions)
+    }
+    (idx: Int, iter: Iterator[(K, V)]) => {
+      val rng = new RandomDataGenerator()
+      rng.reSeed(seed + idx)
+      // Must use the same invoke pattern on the rng as in getSeqOp for without replacement
+      // in order to generate the same sequence of random numbers when creating the sample
+      iter.filter(t => rng.nextUniform() < samplingRateByKey(t._1))
+    }
+  }
+
+  /**
+   * Return the per partition sampling function used for sampling with replacement.
+   *
+   * When exact sample size is required, we make two additional passed over the RDD to determine
+   * the exact sampling rate that guarantees sample size with high confidence. The first pass
+   * counts the number of items in each stratum (group of items with the same key) in the RDD, and
+   * the second pass uses the counts to determine exact sampling rates.
+   *
+   * The sampling function has a unique seed per partition.
+   */
+  def getPoissonSamplingFunction[K: ClassTag, V: ClassTag](rdd: RDD[(K, V)],
+      fractions: Map[K, Double],
+      exact: Boolean,
+      seed: Long): (Int, Iterator[(K, V)]) => Iterator[(K, V)] = {
+    // TODO implement the streaming version of sampling w/ replacement that doesn't require counts
+    if (exact) {
+      val counts = Some(rdd.countByKey())
+      val finalResult = getAcceptanceResults(rdd, true, fractions, counts, seed)
+      val thresholdByKey = computeThresholdByKey(finalResult, fractions)
+      (idx: Int, iter: Iterator[(K, V)]) => {
+        val rng = new RandomDataGenerator()
+        rng.reSeed(seed + idx)
+        iter.flatMap { item =>
+          val key = item._1
+          val acceptBound = finalResult(key).acceptBound
+          // Must use the same invoke pattern on the rng as in getSeqOp for with replacement
+          // in order to generate the same sequence of random numbers when creating the sample
+          val copiesAccepted = if (acceptBound == 0) 0L else rng.nextPoisson(acceptBound)
+          val copiesWaitlisted = rng.nextPoisson(finalResult(key).waitListBound)
+          val copiesInSample = copiesAccepted +
+            (0 until copiesWaitlisted).count(i => rng.nextUniform() < thresholdByKey(key))
+          if (copiesInSample > 0) {
+            Iterator.fill(copiesInSample.toInt)(item)
+          } else {
+            Iterator.empty
+          }
+        }
+      }
+    } else {
+      (idx: Int, iter: Iterator[(K, V)]) => {
+        val rng = new RandomDataGenerator()
+        rng.reSeed(seed + idx)
+        iter.flatMap { item =>
+          val count = rng.nextPoisson(fractions(item._1))
+          if (count == 0) {
+            Iterator.empty
+          } else {
+            Iterator.fill(count)(item)
+          }
+        }
+      }
+    }
+  }
+
+  /** A random data generator that generates both uniform values and Poisson values. */
+  private class RandomDataGenerator {
+    val uniform = new XORShiftRandom()
+    // commons-math3 doesn't have a method to generate Poisson from an arbitrary mean;
+    // maintain a cache of Poisson(m) distributions for various m
+    val poissonCache = mutable.Map[Double, PoissonDistribution]()
+    var poissonSeed = 0L
+
+    def reSeed(seed: Long): Unit = {
+      uniform.setSeed(seed)
+      poissonSeed = seed
+      poissonCache.clear()
+    }
+
+    def nextPoisson(mean: Double): Int = {
+      val poisson = poissonCache.getOrElseUpdate(mean, {
+        val newPoisson = new PoissonDistribution(mean)
+        newPoisson.reseedRandomGenerator(poissonSeed)
+        newPoisson
+      })
+      poisson.sample()
+    }
+
+    def nextUniform(): Double = {
+      uniform.nextDouble()
+    }
+  }
+}
+
+/**
+ * Object used by seqOp to keep track of the number of items accepted and items waitlisted per
+ * stratum, as well as the bounds for accepting and waitlisting items.
+ *
+ * `[random]` here is necessary since it's in the return type signature of seqOp defined above
+ */
+private[random] class AcceptanceResult(var numItems: Long = 0L, var numAccepted: Long = 0L)
+  extends Serializable {
+
+  val waitList = new ArrayBuffer[Double]
+  var acceptBound: Double = Double.NaN // upper bound for accepting item instantly
+  var waitListBound: Double = Double.NaN // upper bound for adding item to waitlist
+
+  def areBoundsEmpty = acceptBound.isNaN || waitListBound.isNaN
+
+  def merge(other: Option[AcceptanceResult]): Unit = {
+    if (other.isDefined) {
+      waitList ++= other.get.waitList
+      numAccepted += other.get.numAccepted
+      numItems += other.get.numItems
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
index 55b5713706178..467b890fb4bb9 100644
--- a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
@@ -96,13 +96,9 @@ private[spark] object XORShiftRandom {
       xorRand.nextInt()
     }
 
-    val iters = timeIt(numIters)(_)
-
     /* Return results as a map instead of just printing to screen
     in case the user wants to do something with them */
-    Map("javaTime" -> iters {javaRand.nextInt()},
-        "xorTime" -> iters {xorRand.nextInt()})
-
+    Map("javaTime" -> timeIt(numIters) { javaRand.nextInt() },
+        "xorTime" -> timeIt(numIters) { xorRand.nextInt() })
   }
-
 }
diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java
index b2868b59ce6c6..59c86eecac5e8 100644
--- a/core/src/test/java/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java
@@ -18,17 +18,22 @@
 package org.apache.spark;
 
 import java.io.*;
+import java.nio.channels.FileChannel;
+import java.nio.ByteBuffer;
 import java.net.URI;
 import java.util.*;
+import java.util.concurrent.*;
 
+import org.apache.spark.input.PortableDataStream;
 import scala.Tuple2;
 import scala.Tuple3;
 import scala.Tuple4;
 
-
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.base.Throwables;
 import com.google.common.base.Optional;
 import com.google.common.base.Charsets;
 import com.google.common.io.Files;
@@ -43,10 +48,7 @@
 import org.junit.Before;
 import org.junit.Test;
 
-import org.apache.spark.api.java.JavaDoubleRDD;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.*;
 import org.apache.spark.api.java.function.*;
 import org.apache.spark.executor.TaskMetrics;
 import org.apache.spark.partial.BoundedDouble;
@@ -118,8 +120,7 @@ public void intersection() {
     JavaRDD<Integer> intersections = s1.intersection(s2);
     Assert.assertEquals(3, intersections.count());
 
-    List<Integer> list = new ArrayList<Integer>();
-    JavaRDD<Integer> empty = sc.parallelize(list);
+    JavaRDD<Integer> empty = sc.emptyRDD();
     JavaRDD<Integer> emptyIntersection = empty.intersection(s2);
     Assert.assertEquals(0, emptyIntersection.count());
 
@@ -142,11 +143,10 @@ public void intersection() {
   public void sample() {
     List<Integer> ints = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
     JavaRDD<Integer> rdd = sc.parallelize(ints);
-    JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 11);
-    // expected 2 but of course result varies randomly a bit
-    Assert.assertEquals(3, sample20.count());
-    JavaRDD<Integer> sample20NoReplacement = rdd.sample(false, 0.2, 11);
-    Assert.assertEquals(2, sample20NoReplacement.count());
+    JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 3);
+    Assert.assertEquals(2, sample20.count());
+    JavaRDD<Integer> sample20WithoutReplacement = rdd.sample(false, 0.2, 5);
+    Assert.assertEquals(2, sample20WithoutReplacement.count());
   }
 
   @Test
@@ -184,6 +184,42 @@ public void sortByKey() {
     Assert.assertEquals(new Tuple2<Integer, Integer>(3, 2), sortedPairs.get(2));
   }
 
+  @Test
+  public void repartitionAndSortWithinPartitions() {
+    List<Tuple2<Integer, Integer>> pairs = new ArrayList<Tuple2<Integer, Integer>>();
+    pairs.add(new Tuple2<Integer, Integer>(0, 5));
+    pairs.add(new Tuple2<Integer, Integer>(3, 8));
+    pairs.add(new Tuple2<Integer, Integer>(2, 6));
+    pairs.add(new Tuple2<Integer, Integer>(0, 8));
+    pairs.add(new Tuple2<Integer, Integer>(3, 8));
+    pairs.add(new Tuple2<Integer, Integer>(1, 3));
+
+    JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
+
+    Partitioner partitioner = new Partitioner() {
+      public int numPartitions() {
+        return 2;
+      }
+      public int getPartition(Object key) {
+        return ((Integer)key).intValue() % 2;
+      }
+    };
+
+    JavaPairRDD<Integer, Integer> repartitioned =
+        rdd.repartitionAndSortWithinPartitions(partitioner);
+    List<List<Tuple2<Integer, Integer>>> partitions = repartitioned.glom().collect();
+    Assert.assertEquals(partitions.get(0), Arrays.asList(new Tuple2<Integer, Integer>(0, 5),
+        new Tuple2<Integer, Integer>(0, 8), new Tuple2<Integer, Integer>(2, 6)));
+    Assert.assertEquals(partitions.get(1), Arrays.asList(new Tuple2<Integer, Integer>(1, 3),
+        new Tuple2<Integer, Integer>(3, 8), new Tuple2<Integer, Integer>(3, 8)));
+  }
+
+  @Test
+  public void emptyRDD() {
+    JavaRDD<String> rdd = sc.emptyRDD();
+    Assert.assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
+  }
+
   @Test
   public void sortBy() {
     List<Tuple2<Integer, Integer>> pairs = new ArrayList<Tuple2<Integer, Integer>>();
@@ -741,7 +777,7 @@ public void persist() {
   @Test
   public void iterator() {
     JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 2);
-    TaskContext context = new TaskContext(0, 0, 0, false, new TaskMetrics());
+    TaskContext context = new TaskContextImpl(0, 0, 0L, false, new TaskMetrics());
     Assert.assertEquals(1, rdd.iterator(rdd.partitions().get(0), context).next().intValue());
   }
 
@@ -830,6 +866,82 @@ public Tuple2<Integer, String> call(Tuple2<IntWritable, Text> pair) {
     Assert.assertEquals(pairs, readRDD.collect());
   }
 
+  @Test
+  public void binaryFiles() throws Exception {
+    // Reusing the wholeText files example
+    byte[] content1 = "spark is easy to use.\n".getBytes("utf-8");
+
+    String tempDirName = tempDir.getAbsolutePath();
+    File file1 = new File(tempDirName + "/part-00000");
+
+    FileOutputStream fos1 = new FileOutputStream(file1);
+
+    FileChannel channel1 = fos1.getChannel();
+    ByteBuffer bbuf = java.nio.ByteBuffer.wrap(content1);
+    channel1.write(bbuf);
+    channel1.close();
+    JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName, 3);
+    List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
+    for (Tuple2<String, PortableDataStream> res : result) {
+      Assert.assertArrayEquals(content1, res._2().toArray());
+    }
+  }
+
+  @Test
+  public void binaryFilesCaching() throws Exception {
+    // Reusing the wholeText files example
+    byte[] content1 = "spark is easy to use.\n".getBytes("utf-8");
+
+    String tempDirName = tempDir.getAbsolutePath();
+    File file1 = new File(tempDirName + "/part-00000");
+
+    FileOutputStream fos1 = new FileOutputStream(file1);
+
+    FileChannel channel1 = fos1.getChannel();
+    ByteBuffer bbuf = java.nio.ByteBuffer.wrap(content1);
+    channel1.write(bbuf);
+    channel1.close();
+
+    JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
+    readRDD.foreach(new VoidFunction<Tuple2<String,PortableDataStream>>() {
+      @Override
+      public void call(Tuple2<String, PortableDataStream> pair) throws Exception {
+        pair._2().toArray(); // force the file to read
+      }
+    });
+
+    List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
+    for (Tuple2<String, PortableDataStream> res : result) {
+      Assert.assertArrayEquals(content1, res._2().toArray());
+    }
+  }
+
+  @Test
+  public void binaryRecords() throws Exception {
+    // Reusing the wholeText files example
+    byte[] content1 = "spark isn't always easy to use.\n".getBytes("utf-8");
+    int numOfCopies = 10;
+    String tempDirName = tempDir.getAbsolutePath();
+    File file1 = new File(tempDirName + "/part-00000");
+
+    FileOutputStream fos1 = new FileOutputStream(file1);
+
+    FileChannel channel1 = fos1.getChannel();
+
+    for (int i = 0; i < numOfCopies; i++) {
+      ByteBuffer bbuf = java.nio.ByteBuffer.wrap(content1);
+      channel1.write(bbuf);
+    }
+    channel1.close();
+
+    JavaRDD<byte[]> readRDD = sc.binaryRecords(tempDirName, content1.length);
+    Assert.assertEquals(numOfCopies,readRDD.count());
+    List<byte[]> result = readRDD.collect();
+    for (byte[] res : result) {
+      Assert.assertArrayEquals(content1, res);
+    }
+  }
+
   @SuppressWarnings("unchecked")
   @Test
   public void writeWithNewAPIHadoopFile() {
@@ -1203,4 +1315,197 @@ public Tuple2<Integer, int[]> call(Integer x) {
     pairRDD.collect();  // Works fine
     pairRDD.collectAsMap();  // Used to crash with ClassCastException
   }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void sampleByKey() {
+    JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 3);
+    JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(
+      new PairFunction<Integer, Integer, Integer>() {
+        @Override
+        public Tuple2<Integer, Integer> call(Integer i) {
+          return new Tuple2<Integer, Integer>(i % 2, 1);
+        }
+      });
+    Map<Integer, Object> fractions = Maps.newHashMap();
+    fractions.put(0, 0.5);
+    fractions.put(1, 1.0);
+    JavaPairRDD<Integer, Integer> wr = rdd2.sampleByKey(true, fractions, 1L);
+    Map<Integer, Long> wrCounts = (Map<Integer, Long>) (Object) wr.countByKey();
+    Assert.assertTrue(wrCounts.size() == 2);
+    Assert.assertTrue(wrCounts.get(0) > 0);
+    Assert.assertTrue(wrCounts.get(1) > 0);
+    JavaPairRDD<Integer, Integer> wor = rdd2.sampleByKey(false, fractions, 1L);
+    Map<Integer, Long> worCounts = (Map<Integer, Long>) (Object) wor.countByKey();
+    Assert.assertTrue(worCounts.size() == 2);
+    Assert.assertTrue(worCounts.get(0) > 0);
+    Assert.assertTrue(worCounts.get(1) > 0);
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void sampleByKeyExact() {
+    JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 3);
+    JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(
+      new PairFunction<Integer, Integer, Integer>() {
+          @Override
+          public Tuple2<Integer, Integer> call(Integer i) {
+              return new Tuple2<Integer, Integer>(i % 2, 1);
+          }
+      });
+    Map<Integer, Object> fractions = Maps.newHashMap();
+    fractions.put(0, 0.5);
+    fractions.put(1, 1.0);
+    JavaPairRDD<Integer, Integer> wrExact = rdd2.sampleByKeyExact(true, fractions, 1L);
+    Map<Integer, Long> wrExactCounts = (Map<Integer, Long>) (Object) wrExact.countByKey();
+    Assert.assertTrue(wrExactCounts.size() == 2);
+    Assert.assertTrue(wrExactCounts.get(0) == 2);
+    Assert.assertTrue(wrExactCounts.get(1) == 4);
+    JavaPairRDD<Integer, Integer> worExact = rdd2.sampleByKeyExact(false, fractions, 1L);
+    Map<Integer, Long> worExactCounts = (Map<Integer, Long>) (Object) worExact.countByKey();
+    Assert.assertTrue(worExactCounts.size() == 2);
+    Assert.assertTrue(worExactCounts.get(0) == 2);
+    Assert.assertTrue(worExactCounts.get(1) == 4);
+  }
+
+  private static class SomeCustomClass implements Serializable {
+    public SomeCustomClass() {
+      // Intentionally left blank
+    }
+  }
+
+  @Test
+  public void collectUnderlyingScalaRDD() {
+    List<SomeCustomClass> data = new ArrayList<SomeCustomClass>();
+    for (int i = 0; i < 100; i++) {
+      data.add(new SomeCustomClass());
+    }
+    JavaRDD<SomeCustomClass> rdd = sc.parallelize(data);
+    SomeCustomClass[] collected = (SomeCustomClass[]) rdd.rdd().retag(SomeCustomClass.class).collect();
+    Assert.assertEquals(data.size(), collected.length);
+  }
+
+  private static final class BuggyMapFunction<T> implements Function<T, T> {
+
+    @Override
+    public T call(T x) throws Exception {
+      throw new IllegalStateException("Custom exception!");
+    }
+  }
+
+  @Test
+  public void collectAsync() throws Exception {
+    List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
+    JavaRDD<Integer> rdd = sc.parallelize(data, 1);
+    JavaFutureAction<List<Integer>> future = rdd.collectAsync();
+    List<Integer> result = future.get();
+    Assert.assertEquals(data, result);
+    Assert.assertFalse(future.isCancelled());
+    Assert.assertTrue(future.isDone());
+    Assert.assertEquals(1, future.jobIds().size());
+  }
+
+  @Test
+  public void foreachAsync() throws Exception {
+    List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
+    JavaRDD<Integer> rdd = sc.parallelize(data, 1);
+    JavaFutureAction<Void> future = rdd.foreachAsync(
+        new VoidFunction<Integer>() {
+          @Override
+          public void call(Integer integer) throws Exception {
+            // intentionally left blank.
+          }
+        }
+    );
+    future.get();
+    Assert.assertFalse(future.isCancelled());
+    Assert.assertTrue(future.isDone());
+    Assert.assertEquals(1, future.jobIds().size());
+  }
+
+  @Test
+  public void countAsync() throws Exception {
+    List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
+    JavaRDD<Integer> rdd = sc.parallelize(data, 1);
+    JavaFutureAction<Long> future = rdd.countAsync();
+    long count = future.get();
+    Assert.assertEquals(data.size(), count);
+    Assert.assertFalse(future.isCancelled());
+    Assert.assertTrue(future.isDone());
+    Assert.assertEquals(1, future.jobIds().size());
+  }
+
+  @Test
+  public void testAsyncActionCancellation() throws Exception {
+    List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
+    JavaRDD<Integer> rdd = sc.parallelize(data, 1);
+    JavaFutureAction<Void> future = rdd.foreachAsync(new VoidFunction<Integer>() {
+      @Override
+      public void call(Integer integer) throws Exception {
+        Thread.sleep(10000);  // To ensure that the job won't finish before it's cancelled.
+      }
+    });
+    future.cancel(true);
+    Assert.assertTrue(future.isCancelled());
+    Assert.assertTrue(future.isDone());
+    try {
+      future.get(2000, TimeUnit.MILLISECONDS);
+      Assert.fail("Expected future.get() for cancelled job to throw CancellationException");
+    } catch (CancellationException ignored) {
+      // pass
+    }
+  }
+
+  @Test
+  public void testAsyncActionErrorWrapping() throws Exception {
+    List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
+    JavaRDD<Integer> rdd = sc.parallelize(data, 1);
+    JavaFutureAction<Long> future = rdd.map(new BuggyMapFunction<Integer>()).countAsync();
+    try {
+      future.get(2, TimeUnit.SECONDS);
+      Assert.fail("Expected future.get() for failed job to throw ExcecutionException");
+    } catch (ExecutionException ee) {
+      Assert.assertTrue(Throwables.getStackTraceAsString(ee).contains("Custom exception!"));
+    }
+    Assert.assertTrue(future.isDone());
+  }
+
+
+  /**
+   * Test for SPARK-3647. This test needs to use the maven-built assembly to trigger the issue,
+   * since that's the only artifact where Guava classes have been relocated.
+   */
+  @Test
+  public void testGuavaOptional() {
+    // Stop the context created in setUp() and start a local-cluster one, to force usage of the
+    // assembly.
+    sc.stop();
+    JavaSparkContext localCluster = new JavaSparkContext("local-cluster[1,1,512]", "JavaAPISuite");
+    try {
+      JavaRDD<Integer> rdd1 = localCluster.parallelize(Arrays.asList(1, 2, null), 3);
+      JavaRDD<Optional<Integer>> rdd2 = rdd1.map(
+        new Function<Integer, Optional<Integer>>() {
+          @Override
+          public Optional<Integer> call(Integer i) {
+            return Optional.fromNullable(i);
+          }
+        });
+      rdd2.collect();
+    } finally {
+      localCluster.stop();
+    }
+  }
+
+  static class Class1 {}
+  static class Class2 {}
+
+  @Test
+  public void testRegisterKryoClasses() {
+    SparkConf conf = new SparkConf();
+    conf.registerKryoClasses(new Class[]{ Class1.class, Class2.class });
+    Assert.assertEquals(
+        Class1.class.getName() + "," + Class2.class.getName(),
+        conf.get("spark.kryo.classesToRegister"));
+  }
+
 }
diff --git a/core/src/test/java/org/apache/spark/serializer/TestJavaSerializerImpl.java b/core/src/test/java/org/apache/spark/serializer/TestJavaSerializerImpl.java
new file mode 100644
index 0000000000000..3d50ab4fabe42
--- /dev/null
+++ b/core/src/test/java/org/apache/spark/serializer/TestJavaSerializerImpl.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.serializer;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+import scala.Option;
+import scala.reflect.ClassTag;
+
+
+/**
+ * A simple Serializer implementation to make sure the API is Java-friendly.
+ */
+class TestJavaSerializerImpl extends Serializer {
+
+  @Override
+  public SerializerInstance newInstance() {
+    return null;
+  }
+
+  static class SerializerInstanceImpl extends SerializerInstance {
+      @Override
+      public <T> ByteBuffer serialize(T t, ClassTag<T> evidence$1) {
+        return null;
+      }
+
+      @Override
+    public <T> T deserialize(ByteBuffer bytes, ClassLoader loader, ClassTag<T> evidence$1) {
+      return null;
+    }
+
+    @Override
+    public <T> T deserialize(ByteBuffer bytes, ClassTag<T> evidence$1) {
+      return null;
+    }
+
+    @Override
+    public SerializationStream serializeStream(OutputStream s) {
+      return null;
+    }
+
+    @Override
+    public DeserializationStream deserializeStream(InputStream s) {
+      return null;
+    }
+  }
+
+  static class SerializationStreamImpl extends SerializationStream {
+
+    @Override
+    public <T> SerializationStream writeObject(T t, ClassTag<T> evidence$1) {
+      return null;
+    }
+
+    @Override
+    public void flush() {
+
+    }
+
+    @Override
+    public void close() {
+
+    }
+  }
+
+  static class DeserializationStreamImpl extends DeserializationStream {
+
+    @Override
+    public <T> T readObject(ClassTag<T> evidence$1) {
+      return null;
+    }
+
+    @Override
+    public void close() {
+
+    }
+  }
+}
diff --git a/core/src/test/java/org/apache/spark/util/JavaTaskCompletionListenerImpl.java b/core/src/test/java/org/apache/spark/util/JavaTaskCompletionListenerImpl.java
new file mode 100644
index 0000000000000..e9ec700e32e15
--- /dev/null
+++ b/core/src/test/java/org/apache/spark/util/JavaTaskCompletionListenerImpl.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util;
+
+import org.apache.spark.TaskContext;
+
+
+/**
+ * A simple implementation of TaskCompletionListener that makes sure TaskCompletionListener and
+ * TaskContext is Java friendly.
+ */
+public class JavaTaskCompletionListenerImpl implements TaskCompletionListener {
+
+  @Override
+  public void onTaskCompletion(TaskContext context) {
+    context.isCompleted();
+    context.isInterrupted();
+    context.stageId();
+    context.partitionId();
+    context.isRunningLocally();
+    context.addTaskCompletionListener(this);
+  }
+}
diff --git a/core/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties
index 26b73a1b39744..9dd05f17f012b 100644
--- a/core/src/test/resources/log4j.properties
+++ b/core/src/test/resources/log4j.properties
@@ -21,7 +21,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/core/src/test/scala/org/apache/spark/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/BroadcastSuite.scala
deleted file mode 100644
index c9936256a5b95..0000000000000
--- a/core/src/test/scala/org/apache/spark/BroadcastSuite.scala
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.storage._
-import org.apache.spark.broadcast.{Broadcast, HttpBroadcast}
-import org.apache.spark.storage.BroadcastBlockId
-
-class BroadcastSuite extends FunSuite with LocalSparkContext {
-
-  private val httpConf = broadcastConf("HttpBroadcastFactory")
-  private val torrentConf = broadcastConf("TorrentBroadcastFactory")
-
-  test("Using HttpBroadcast locally") {
-    sc = new SparkContext("local", "test", httpConf)
-    val list = List[Int](1, 2, 3, 4)
-    val broadcast = sc.broadcast(list)
-    val results = sc.parallelize(1 to 2).map(x => (x, broadcast.value.sum))
-    assert(results.collect().toSet === Set((1, 10), (2, 10)))
-  }
-
-  test("Accessing HttpBroadcast variables from multiple threads") {
-    sc = new SparkContext("local[10]", "test", httpConf)
-    val list = List[Int](1, 2, 3, 4)
-    val broadcast = sc.broadcast(list)
-    val results = sc.parallelize(1 to 10).map(x => (x, broadcast.value.sum))
-    assert(results.collect().toSet === (1 to 10).map(x => (x, 10)).toSet)
-  }
-
-  test("Accessing HttpBroadcast variables in a local cluster") {
-    val numSlaves = 4
-    sc = new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", httpConf)
-    val list = List[Int](1, 2, 3, 4)
-    val broadcast = sc.broadcast(list)
-    val results = sc.parallelize(1 to numSlaves).map(x => (x, broadcast.value.sum))
-    assert(results.collect().toSet === (1 to numSlaves).map(x => (x, 10)).toSet)
-  }
-
-  test("Using TorrentBroadcast locally") {
-    sc = new SparkContext("local", "test", torrentConf)
-    val list = List[Int](1, 2, 3, 4)
-    val broadcast = sc.broadcast(list)
-    val results = sc.parallelize(1 to 2).map(x => (x, broadcast.value.sum))
-    assert(results.collect().toSet === Set((1, 10), (2, 10)))
-  }
-
-  test("Accessing TorrentBroadcast variables from multiple threads") {
-    sc = new SparkContext("local[10]", "test", torrentConf)
-    val list = List[Int](1, 2, 3, 4)
-    val broadcast = sc.broadcast(list)
-    val results = sc.parallelize(1 to 10).map(x => (x, broadcast.value.sum))
-    assert(results.collect().toSet === (1 to 10).map(x => (x, 10)).toSet)
-  }
-
-  test("Accessing TorrentBroadcast variables in a local cluster") {
-    val numSlaves = 4
-    sc = new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", torrentConf)
-    val list = List[Int](1, 2, 3, 4)
-    val broadcast = sc.broadcast(list)
-    val results = sc.parallelize(1 to numSlaves).map(x => (x, broadcast.value.sum))
-    assert(results.collect().toSet === (1 to numSlaves).map(x => (x, 10)).toSet)
-  }
-
-  test("Unpersisting HttpBroadcast on executors only in local mode") {
-    testUnpersistHttpBroadcast(distributed = false, removeFromDriver = false)
-  }
-
-  test("Unpersisting HttpBroadcast on executors and driver in local mode") {
-    testUnpersistHttpBroadcast(distributed = false, removeFromDriver = true)
-  }
-
-  test("Unpersisting HttpBroadcast on executors only in distributed mode") {
-    testUnpersistHttpBroadcast(distributed = true, removeFromDriver = false)
-  }
-
-  test("Unpersisting HttpBroadcast on executors and driver in distributed mode") {
-    testUnpersistHttpBroadcast(distributed = true, removeFromDriver = true)
-  }
-
-  test("Unpersisting TorrentBroadcast on executors only in local mode") {
-    testUnpersistTorrentBroadcast(distributed = false, removeFromDriver = false)
-  }
-
-  test("Unpersisting TorrentBroadcast on executors and driver in local mode") {
-    testUnpersistTorrentBroadcast(distributed = false, removeFromDriver = true)
-  }
-
-  test("Unpersisting TorrentBroadcast on executors only in distributed mode") {
-    testUnpersistTorrentBroadcast(distributed = true, removeFromDriver = false)
-  }
-
-  test("Unpersisting TorrentBroadcast on executors and driver in distributed mode") {
-    testUnpersistTorrentBroadcast(distributed = true, removeFromDriver = true)
-  }
-  /**
-   * Verify the persistence of state associated with an HttpBroadcast in either local mode or
-   * local-cluster mode (when distributed = true).
-   *
-   * This test creates a broadcast variable, uses it on all executors, and then unpersists it.
-   * In between each step, this test verifies that the broadcast blocks and the broadcast file
-   * are present only on the expected nodes.
-   */
-  private def testUnpersistHttpBroadcast(distributed: Boolean, removeFromDriver: Boolean) {
-    val numSlaves = if (distributed) 2 else 0
-
-    def getBlockIds(id: Long) = Seq[BroadcastBlockId](BroadcastBlockId(id))
-
-    // Verify that the broadcast file is created, and blocks are persisted only on the driver
-    def afterCreation(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      assert(blockIds.size === 1)
-      val statuses = bmm.getBlockStatus(blockIds.head, askSlaves = true)
-      assert(statuses.size === 1)
-      statuses.head match { case (bm, status) =>
-        assert(bm.executorId === "<driver>", "Block should only be on the driver")
-        assert(status.storageLevel === StorageLevel.MEMORY_AND_DISK)
-        assert(status.memSize > 0, "Block should be in memory store on the driver")
-        assert(status.diskSize === 0, "Block should not be in disk store on the driver")
-      }
-      if (distributed) {
-        // this file is only generated in distributed mode
-        assert(HttpBroadcast.getFile(blockIds.head.broadcastId).exists, "Broadcast file not found!")
-      }
-    }
-
-    // Verify that blocks are persisted in both the executors and the driver
-    def afterUsingBroadcast(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      assert(blockIds.size === 1)
-      val statuses = bmm.getBlockStatus(blockIds.head, askSlaves = true)
-      assert(statuses.size === numSlaves + 1)
-      statuses.foreach { case (_, status) =>
-        assert(status.storageLevel === StorageLevel.MEMORY_AND_DISK)
-        assert(status.memSize > 0, "Block should be in memory store")
-        assert(status.diskSize === 0, "Block should not be in disk store")
-      }
-    }
-
-    // Verify that blocks are unpersisted on all executors, and on all nodes if removeFromDriver
-    // is true. In the latter case, also verify that the broadcast file is deleted on the driver.
-    def afterUnpersist(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      assert(blockIds.size === 1)
-      val statuses = bmm.getBlockStatus(blockIds.head, askSlaves = true)
-      val expectedNumBlocks = if (removeFromDriver) 0 else 1
-      val possiblyNot = if (removeFromDriver) "" else " not"
-      assert(statuses.size === expectedNumBlocks,
-        "Block should%s be unpersisted on the driver".format(possiblyNot))
-      if (distributed && removeFromDriver) {
-        // this file is only generated in distributed mode
-        assert(!HttpBroadcast.getFile(blockIds.head.broadcastId).exists,
-          "Broadcast file should%s be deleted".format(possiblyNot))
-      }
-    }
-
-    testUnpersistBroadcast(distributed, numSlaves, httpConf, getBlockIds, afterCreation,
-      afterUsingBroadcast, afterUnpersist, removeFromDriver)
-  }
-
-  /**
-   * Verify the persistence of state associated with an TorrentBroadcast in a local-cluster.
-   *
-   * This test creates a broadcast variable, uses it on all executors, and then unpersists it.
-   * In between each step, this test verifies that the broadcast blocks are present only on the
-   * expected nodes.
-   */
-  private def testUnpersistTorrentBroadcast(distributed: Boolean, removeFromDriver: Boolean) {
-    val numSlaves = if (distributed) 2 else 0
-
-    def getBlockIds(id: Long) = {
-      val broadcastBlockId = BroadcastBlockId(id)
-      val metaBlockId = BroadcastBlockId(id, "meta")
-      // Assume broadcast value is small enough to fit into 1 piece
-      val pieceBlockId = BroadcastBlockId(id, "piece0")
-      if (distributed) {
-        // the metadata and piece blocks are generated only in distributed mode
-        Seq[BroadcastBlockId](broadcastBlockId, metaBlockId, pieceBlockId)
-      } else {
-        Seq[BroadcastBlockId](broadcastBlockId)
-      }
-    }
-
-    // Verify that blocks are persisted only on the driver
-    def afterCreation(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      blockIds.foreach { blockId =>
-        val statuses = bmm.getBlockStatus(blockIds.head, askSlaves = true)
-        assert(statuses.size === 1)
-        statuses.head match { case (bm, status) =>
-          assert(bm.executorId === "<driver>", "Block should only be on the driver")
-          assert(status.storageLevel === StorageLevel.MEMORY_AND_DISK)
-          assert(status.memSize > 0, "Block should be in memory store on the driver")
-          assert(status.diskSize === 0, "Block should not be in disk store on the driver")
-        }
-      }
-    }
-
-    // Verify that blocks are persisted in both the executors and the driver
-    def afterUsingBroadcast(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      blockIds.foreach { blockId =>
-        val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
-        if (blockId.field == "meta") {
-          // Meta data is only on the driver
-          assert(statuses.size === 1)
-          statuses.head match { case (bm, _) => assert(bm.executorId === "<driver>") }
-        } else {
-          // Other blocks are on both the executors and the driver
-          assert(statuses.size === numSlaves + 1,
-            blockId + " has " + statuses.size + " statuses: " + statuses.mkString(","))
-          statuses.foreach { case (_, status) =>
-            assert(status.storageLevel === StorageLevel.MEMORY_AND_DISK)
-            assert(status.memSize > 0, "Block should be in memory store")
-            assert(status.diskSize === 0, "Block should not be in disk store")
-          }
-        }
-      }
-    }
-
-    // Verify that blocks are unpersisted on all executors, and on all nodes if removeFromDriver
-    // is true.
-    def afterUnpersist(blockIds: Seq[BroadcastBlockId], bmm: BlockManagerMaster) {
-      val expectedNumBlocks = if (removeFromDriver) 0 else 1
-      val possiblyNot = if (removeFromDriver) "" else " not"
-      blockIds.foreach { blockId =>
-        val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
-        assert(statuses.size === expectedNumBlocks,
-          "Block should%s be unpersisted on the driver".format(possiblyNot))
-      }
-    }
-
-    testUnpersistBroadcast(distributed, numSlaves,  torrentConf, getBlockIds, afterCreation,
-      afterUsingBroadcast, afterUnpersist, removeFromDriver)
-  }
-
-  /**
-   * This test runs in 4 steps:
-   *
-   * 1) Create broadcast variable, and verify that all state is persisted on the driver.
-   * 2) Use the broadcast variable on all executors, and verify that all state is persisted
-   *    on both the driver and the executors.
-   * 3) Unpersist the broadcast, and verify that all state is removed where they should be.
-   * 4) [Optional] If removeFromDriver is false, we verify that the broadcast is re-usable.
-   */
-  private def testUnpersistBroadcast(
-      distributed: Boolean,
-      numSlaves: Int,  // used only when distributed = true
-      broadcastConf: SparkConf,
-      getBlockIds: Long => Seq[BroadcastBlockId],
-      afterCreation: (Seq[BroadcastBlockId], BlockManagerMaster) => Unit,
-      afterUsingBroadcast: (Seq[BroadcastBlockId], BlockManagerMaster) => Unit,
-      afterUnpersist: (Seq[BroadcastBlockId], BlockManagerMaster) => Unit,
-      removeFromDriver: Boolean) {
-
-    sc = if (distributed) {
-      new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", broadcastConf)
-    } else {
-      new SparkContext("local", "test", broadcastConf)
-    }
-    val blockManagerMaster = sc.env.blockManager.master
-    val list = List[Int](1, 2, 3, 4)
-
-    // Create broadcast variable
-    val broadcast = sc.broadcast(list)
-    val blocks = getBlockIds(broadcast.id)
-    afterCreation(blocks, blockManagerMaster)
-
-    // Use broadcast variable on all executors
-    val partitions = 10
-    assert(partitions > numSlaves)
-    val results = sc.parallelize(1 to partitions, partitions).map(x => (x, broadcast.value.sum))
-    assert(results.collect().toSet === (1 to partitions).map(x => (x, list.sum)).toSet)
-    afterUsingBroadcast(blocks, blockManagerMaster)
-
-    // Unpersist broadcast
-    if (removeFromDriver) {
-      broadcast.destroy(blocking = true)
-    } else {
-      broadcast.unpersist(blocking = true)
-    }
-    afterUnpersist(blocks, blockManagerMaster)
-
-    // If the broadcast is removed from driver, all subsequent uses of the broadcast variable
-    // should throw SparkExceptions. Otherwise, the result should be the same as before.
-    if (removeFromDriver) {
-      // Using this variable on the executors crashes them, which hangs the test.
-      // Instead, crash the driver by directly accessing the broadcast value.
-      intercept[SparkException] { broadcast.value }
-      intercept[SparkException] { broadcast.unpersist() }
-      intercept[SparkException] { broadcast.destroy(blocking = true) }
-    } else {
-      val results = sc.parallelize(1 to partitions, partitions).map(x => (x, broadcast.value.sum))
-      assert(results.collect().toSet === (1 to partitions).map(x => (x, list.sum)).toSet)
-    }
-  }
-
-  /** Helper method to create a SparkConf that uses the given broadcast factory. */
-  private def broadcastConf(factoryName: String): SparkConf = {
-    val conf = new SparkConf
-    conf.set("spark.broadcast.factory", "org.apache.spark.broadcast.%s".format(factoryName))
-    conf
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/CacheManagerSuite.scala b/core/src/test/scala/org/apache/spark/CacheManagerSuite.scala
index 7f5d0b061e8b0..c0735f448d193 100644
--- a/core/src/test/scala/org/apache/spark/CacheManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CacheManagerSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.scalatest.{BeforeAndAfter, FunSuite}
 import org.scalatest.mock.EasyMockSugar
 
@@ -34,6 +32,8 @@ class CacheManagerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
   var split: Partition = _
   /** An RDD which returns the values [1, 2, 3, 4]. */
   var rdd: RDD[Int] = _
+  var rdd2: RDD[Int] = _
+  var rdd3: RDD[Int] = _
 
   before {
     sc = new SparkContext("local", "test")
@@ -45,6 +45,16 @@ class CacheManagerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
       override val getDependencies = List[Dependency[_]]()
       override def compute(split: Partition, context: TaskContext) = Array(1, 2, 3, 4).iterator
     }
+    rdd2 = new RDD[Int](sc, List(new OneToOneDependency(rdd))) {
+      override def getPartitions: Array[Partition] = firstParent[Int].partitions
+      override def compute(split: Partition, context: TaskContext) =
+        firstParent[Int].iterator(split, context)
+    }.cache()
+    rdd3 = new RDD[Int](sc, List(new OneToOneDependency(rdd2))) {
+      override def getPartitions: Array[Partition] = firstParent[Int].partitions
+      override def compute(split: Partition, context: TaskContext) =
+        firstParent[Int].iterator(split, context)
+    }.cache()
   }
 
   after {
@@ -52,27 +62,26 @@ class CacheManagerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
   }
 
   test("get uncached rdd") {
-    expecting {
-      blockManager.get(RDDBlockId(0, 0)).andReturn(None)
-      blockManager.put(RDDBlockId(0, 0), ArrayBuffer[Any](1, 2, 3, 4), StorageLevel.MEMORY_ONLY,
-        true).andStubReturn(Seq[(BlockId, BlockStatus)]())
-    }
-
-    whenExecuting(blockManager) {
-      val context = new TaskContext(0, 0, 0)
-      val value = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY)
-      assert(value.toList === List(1, 2, 3, 4))
-    }
+    // Do not mock this test, because attempting to match Array[Any], which is not covariant,
+    // in blockManager.put is a losing battle. You have been warned.
+    blockManager = sc.env.blockManager
+    cacheManager = sc.env.cacheManager
+    val context = new TaskContextImpl(0, 0, 0)
+    val computeValue = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY)
+    val getValue = blockManager.get(RDDBlockId(rdd.id, split.index))
+    assert(computeValue.toList === List(1, 2, 3, 4))
+    assert(getValue.isDefined, "Block cached from getOrCompute is not found!")
+    assert(getValue.get.data.toList === List(1, 2, 3, 4))
   }
 
   test("get cached rdd") {
     expecting {
-      val result = new BlockResult(ArrayBuffer(5, 6, 7).iterator, DataReadMethod.Memory, 12)
+      val result = new BlockResult(Array(5, 6, 7).iterator, DataReadMethod.Memory, 12)
       blockManager.get(RDDBlockId(0, 0)).andReturn(Some(result))
     }
 
     whenExecuting(blockManager) {
-      val context = new TaskContext(0, 0, 0)
+      val context = new TaskContextImpl(0, 0, 0)
       val value = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY)
       assert(value.toList === List(5, 6, 7))
     }
@@ -85,9 +94,16 @@ class CacheManagerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     }
 
     whenExecuting(blockManager) {
-      val context = new TaskContext(0, 0, 0, runningLocally = true)
+      val context = new TaskContextImpl(0, 0, 0, true)
       val value = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY)
       assert(value.toList === List(1, 2, 3, 4))
     }
   }
+
+  test("verify task metrics updated correctly") {
+    cacheManager = sc.env.cacheManager
+    val context = new TaskContextImpl(0, 0, 0)
+    cacheManager.getOrCompute(rdd3, split, context, StorageLevel.MEMORY_ONLY)
+    assert(context.taskMetrics.updatedBlocks.getOrElse(Seq()).size === 2)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index fc00458083a33..e0d1ccea8ecdc 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -56,6 +56,16 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     assert(flatMappedRDD.collect() === result)
   }
 
+  test("checkpointing with external function") {
+    val parCollection = sc.makeRDD(1 to 4)
+    val flatMappedRDD = parCollection.flatMap(x => 1 to x)
+    flatMappedRDD.checkpoint{ rdd => rdd.sparkContext.makeRDD(rdd.collect, rdd.partitions.size) }
+    assert(flatMappedRDD.dependencies.head.rdd == parCollection)
+    val result = flatMappedRDD.collect()
+    assert(flatMappedRDD.dependencies.head.rdd != parCollection)
+    assert(flatMappedRDD.collect() === result)
+  }
+
   test("RDDs with one-to-one dependencies") {
     testRDD(_.map(x => x.toString))
     testRDD(_.flatMap(x => 1 to x))
@@ -99,7 +109,7 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
   test("ShuffledRDD") {
     testRDD(rdd => {
       // Creating ShuffledRDD directly as PairRDDFunctions.combineByKey produces a MapPartitionedRDD
-      new ShuffledRDD[Int, Int, Int, (Int, Int)](rdd.map(x => (x % 2, 1)), partitioner)
+      new ShuffledRDD[Int, Int, Int](rdd.map(x => (x % 2, 1)), partitioner)
     })
   }
 
@@ -156,15 +166,20 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
 
   test("CoGroupedRDD") {
     val longLineageRDD1 = generateFatPairRDD()
+
+    // Collect the RDD as sequences instead of arrays to enable equality tests in testRDD
+    val seqCollectFunc = (rdd: RDD[(Int, Array[Iterable[Int]])]) =>
+      rdd.map{case (p, a) => (p, a.toSeq)}.collect(): Any
+
     testRDD(rdd => {
       CheckpointSuite.cogroup(longLineageRDD1, rdd.map(x => (x % 2, 1)), partitioner)
-    })
+    }, seqCollectFunc)
 
     val longLineageRDD2 = generateFatPairRDD()
     testRDDPartitions(rdd => {
       CheckpointSuite.cogroup(
         longLineageRDD2, sc.makeRDD(1 to 2, 2).map(x => (x % 2, 1)), partitioner)
-    })
+    }, seqCollectFunc)
   }
 
   test("ZippedPartitionsRDD") {
@@ -235,12 +250,19 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     assert(rdd.partitions.size === 0)
   }
 
+  def defaultCollectFunc[T](rdd: RDD[T]): Any = rdd.collect()
+
   /**
    * Test checkpointing of the RDD generated by the given operation. It tests whether the
    * serialized size of the RDD is reduce after checkpointing or not. This function should be called
    * on all RDDs that have a parent RDD (i.e., do not call on ParallelCollection, BlockRDD, etc.).
+   *
+   * @param op an operation to run on the RDD
+   * @param collectFunc a function for collecting the values in the RDD, in case there are
+   *   non-comparable types like arrays that we want to convert to something that supports ==
    */
-  def testRDD[U: ClassTag](op: (RDD[Int]) => RDD[U]) {
+  def testRDD[U: ClassTag](op: (RDD[Int]) => RDD[U],
+      collectFunc: RDD[U] => Any = defaultCollectFunc[U] _) {
     // Generate the final RDD using given RDD operation
     val baseRDD = generateFatRDD()
     val operatedRDD = op(baseRDD)
@@ -258,13 +280,13 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     logInfo("RDD after checkpoint: " + operatedRDD + "\n" + operatedRDD.toDebugString)
     val (rddSizeBeforeCheckpoint, partitionSizeBeforeCheckpoint) = getSerializedSizes(operatedRDD)
     operatedRDD.checkpoint()
-    val result = operatedRDD.collect()
+    val result = collectFunc(operatedRDD)
     operatedRDD.collect() // force re-initialization of post-checkpoint lazy variables
     val (rddSizeAfterCheckpoint, partitionSizeAfterCheckpoint) = getSerializedSizes(operatedRDD)
     logInfo("RDD after checkpoint: " + operatedRDD + "\n" + operatedRDD.toDebugString)
 
     // Test whether the checkpoint file has been created
-    assert(sc.checkpointFile[U](operatedRDD.getCheckpointFile.get).collect() === result)
+    assert(collectFunc(sc.checkpointFile[U](operatedRDD.getCheckpointFile.get)) === result)
 
     // Test whether dependencies have been changed from its earlier parent RDD
     assert(operatedRDD.dependencies.head.rdd != parentRDD)
@@ -279,7 +301,7 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     assert(operatedRDD.partitions.length === numPartitions)
 
     // Test whether the data in the checkpointed RDD is same as original
-    assert(operatedRDD.collect() === result)
+    assert(collectFunc(operatedRDD) === result)
 
     // Test whether serialized size of the RDD has reduced.
     logInfo("Size of " + rddType +
@@ -289,7 +311,6 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
       "Size of " + rddType + " did not reduce after checkpointing " +
         " [" + rddSizeBeforeCheckpoint + " --> " + rddSizeAfterCheckpoint + "]"
     )
-
   }
 
   /**
@@ -300,8 +321,12 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
    * This function should be called only those RDD whose partitions refer to parent RDD's
    * partitions (i.e., do not call it on simple RDD like MappedRDD).
    *
+   * @param op an operation to run on the RDD
+   * @param collectFunc a function for collecting the values in the RDD, in case there are
+   *   non-comparable types like arrays that we want to convert to something that supports ==
    */
-  def testRDDPartitions[U: ClassTag](op: (RDD[Int]) => RDD[U]) {
+  def testRDDPartitions[U: ClassTag](op: (RDD[Int]) => RDD[U],
+       collectFunc: RDD[U] => Any = defaultCollectFunc[U] _) {
     // Generate the final RDD using given RDD operation
     val baseRDD = generateFatRDD()
     val operatedRDD = op(baseRDD)
@@ -316,13 +341,13 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     logInfo("RDD after checkpoint: " + operatedRDD + "\n" + operatedRDD.toDebugString)
     val (rddSizeBeforeCheckpoint, partitionSizeBeforeCheckpoint) = getSerializedSizes(operatedRDD)
     parentRDDs.foreach(_.checkpoint())  // checkpoint the parent RDD, not the generated one
-    val result = operatedRDD.collect()  // force checkpointing
+    val result = collectFunc(operatedRDD)  // force checkpointing
     operatedRDD.collect() // force re-initialization of post-checkpoint lazy variables
     val (rddSizeAfterCheckpoint, partitionSizeAfterCheckpoint) = getSerializedSizes(operatedRDD)
     logInfo("RDD after checkpoint: " + operatedRDD + "\n" + operatedRDD.toDebugString)
 
     // Test whether the data in the checkpointed RDD is same as original
-    assert(operatedRDD.collect() === result)
+    assert(collectFunc(operatedRDD) === result)
 
     // Test whether serialized size of the partitions has reduced
     logInfo("Size of partitions of " + rddType +
@@ -436,7 +461,7 @@ object CheckpointSuite {
     new CoGroupedRDD[K](
       Seq(first.asInstanceOf[RDD[(K, _)]], second.asInstanceOf[RDD[(K, _)]]),
       part
-    ).asInstanceOf[RDD[(K, Seq[Seq[V]])]]
+    ).asInstanceOf[RDD[(K, Array[Iterable[V]])]]
   }
 
 }
diff --git a/core/src/test/scala/org/apache/spark/ConnectionManagerSuite.scala b/core/src/test/scala/org/apache/spark/ConnectionManagerSuite.scala
deleted file mode 100644
index df6b2604c8d8a..0000000000000
--- a/core/src/test/scala/org/apache/spark/ConnectionManagerSuite.scala
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark
-
-import org.scalatest.FunSuite
-
-import java.nio._
-
-import org.apache.spark.network.{ConnectionManager, Message, ConnectionManagerId}
-import scala.concurrent.Await
-import scala.concurrent.TimeoutException
-import scala.concurrent.duration._
-import scala.language.postfixOps
-
-/**
-  * Test the ConnectionManager with various security settings.
-  */
-class ConnectionManagerSuite extends FunSuite {
-
-  test("security default off") {
-    val conf = new SparkConf
-    val securityManager = new SecurityManager(conf)
-    val manager = new ConnectionManager(0, conf, securityManager)
-    var receivedMessage = false
-    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      receivedMessage = true
-      None
-    })
-
-    val size = 10 * 1024 * 1024
-    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
-    buffer.flip
-
-    val bufferMessage = Message.createBufferMessage(buffer.duplicate)
-    manager.sendMessageReliablySync(manager.id, bufferMessage)
-
-    assert(receivedMessage == true)
-
-    manager.stop()
-  }
-
-  test("security on same password") {
-    val conf = new SparkConf
-    conf.set("spark.authenticate", "true")
-    conf.set("spark.authenticate.secret", "good")
-    val securityManager = new SecurityManager(conf)
-    val manager = new ConnectionManager(0, conf, securityManager)
-    var numReceivedMessages = 0
-
-    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      numReceivedMessages += 1
-      None
-    })
-    val managerServer = new ConnectionManager(0, conf, securityManager)
-    var numReceivedServerMessages = 0
-    managerServer.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      numReceivedServerMessages += 1
-      None
-    })
-
-    val size = 10 * 1024 * 1024
-    val count = 10
-    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
-    buffer.flip
-
-    (0 until count).map(i => {
-      val bufferMessage = Message.createBufferMessage(buffer.duplicate)
-      manager.sendMessageReliablySync(managerServer.id, bufferMessage)
-    })
-
-    assert(numReceivedServerMessages == 10)
-    assert(numReceivedMessages == 0)
-
-    manager.stop()
-    managerServer.stop()
-  }
-
-  test("security mismatch password") {
-    val conf = new SparkConf
-    conf.set("spark.authenticate", "true")
-    conf.set("spark.authenticate.secret", "good")
-    val securityManager = new SecurityManager(conf)
-    val manager = new ConnectionManager(0, conf, securityManager)
-    var numReceivedMessages = 0
-
-    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      numReceivedMessages += 1
-      None
-    })
-
-    val badconf = new SparkConf
-    badconf.set("spark.authenticate", "true")
-    badconf.set("spark.authenticate.secret", "bad")
-    val badsecurityManager = new SecurityManager(badconf)
-    val managerServer = new ConnectionManager(0, badconf, badsecurityManager)
-    var numReceivedServerMessages = 0
-
-    managerServer.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      numReceivedServerMessages += 1
-      None
-    })
-
-    val size = 10 * 1024 * 1024
-    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
-    buffer.flip
-    val bufferMessage = Message.createBufferMessage(buffer.duplicate)
-    manager.sendMessageReliablySync(managerServer.id, bufferMessage)
-
-    assert(numReceivedServerMessages == 0)
-    assert(numReceivedMessages == 0)
-
-    manager.stop()
-    managerServer.stop()
-  }
-
-  test("security mismatch auth off") {
-    val conf = new SparkConf
-    conf.set("spark.authenticate", "false")
-    conf.set("spark.authenticate.secret", "good")
-    val securityManager = new SecurityManager(conf)
-    val manager = new ConnectionManager(0, conf, securityManager)
-    var numReceivedMessages = 0
-
-    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      numReceivedMessages += 1
-      None
-    })
-
-    val badconf = new SparkConf
-    badconf.set("spark.authenticate", "true")
-    badconf.set("spark.authenticate.secret", "good")
-    val badsecurityManager = new SecurityManager(badconf)
-    val managerServer = new ConnectionManager(0, badconf, badsecurityManager)
-    var numReceivedServerMessages = 0
-    managerServer.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      numReceivedServerMessages += 1
-      None
-    })
-
-    val size = 10 * 1024 * 1024
-    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
-    buffer.flip
-    val bufferMessage = Message.createBufferMessage(buffer.duplicate)
-    (0 until 1).map(i => {
-      val bufferMessage = Message.createBufferMessage(buffer.duplicate)
-      manager.sendMessageReliably(managerServer.id, bufferMessage)
-    }).foreach(f => {
-      try {
-        val g = Await.result(f, 1 second)
-        assert(false)
-      } catch {
-        case e: TimeoutException => {
-          // we should timeout here since the client can't do the negotiation
-          assert(true)
-        }
-      }
-    })
-
-    assert(numReceivedServerMessages == 0)
-    assert(numReceivedMessages == 0)
-    manager.stop()
-    managerServer.stop()
-  }
-
-  test("security auth off") {
-    val conf = new SparkConf
-    conf.set("spark.authenticate", "false")
-    val securityManager = new SecurityManager(conf)
-    val manager = new ConnectionManager(0, conf, securityManager)
-    var numReceivedMessages = 0
-
-    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      numReceivedMessages += 1
-      None
-    })
-
-    val badconf = new SparkConf
-    badconf.set("spark.authenticate", "false")
-    val badsecurityManager = new SecurityManager(badconf)
-    val managerServer = new ConnectionManager(0, badconf, badsecurityManager)
-    var numReceivedServerMessages = 0
-
-    managerServer.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
-      numReceivedServerMessages += 1
-      None
-    })
-
-    val size = 10 * 1024 * 1024
-    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
-    buffer.flip
-    val bufferMessage = Message.createBufferMessage(buffer.duplicate)
-    (0 until 10).map(i => {
-      val bufferMessage = Message.createBufferMessage(buffer.duplicate)
-      manager.sendMessageReliably(managerServer.id, bufferMessage)
-    }).foreach(f => {
-      try {
-        val g = Await.result(f, 1 second)
-        if (!g.isDefined) assert(false) else assert(true)
-      } catch {
-        case e: Exception => {
-          assert(false)
-        }
-      }
-    })
-    assert(numReceivedServerMessages == 10)
-    assert(numReceivedMessages == 0)
-
-    manager.stop()
-    managerServer.stop()
-  }
-
-
-
-}
-
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index 13b415cccb647..2e3fc5ef0e336 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -21,7 +21,6 @@ import java.lang.ref.WeakReference
 
 import scala.collection.mutable.{HashSet, SynchronizedSet}
 import scala.language.existentials
-import scala.language.postfixOps
 import scala.util.Random
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
@@ -31,15 +30,29 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.{BlockId, BroadcastBlockId, RDDBlockId, ShuffleBlockId}
-
-class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkContext {
-
+import org.apache.spark.storage._
+import org.apache.spark.shuffle.hash.HashShuffleManager
+import org.apache.spark.shuffle.sort.SortShuffleManager
+import org.apache.spark.storage.BroadcastBlockId
+import org.apache.spark.storage.RDDBlockId
+import org.apache.spark.storage.ShuffleBlockId
+import org.apache.spark.storage.ShuffleIndexBlockId
+
+/**
+ * An abstract base class for context cleaner tests, which sets up a context with a config
+ * suitable for cleaner tests and provides some utility functions. Subclasses can use different
+ * config options, in particular, a different shuffle manager class
+ */
+abstract class ContextCleanerSuiteBase(val shuffleManager: Class[_] = classOf[HashShuffleManager])
+  extends FunSuite with BeforeAndAfter with LocalSparkContext
+{
   implicit val defaultTimeout = timeout(10000 millis)
   val conf = new SparkConf()
     .setMaster("local[2]")
     .setAppName("ContextCleanerSuite")
     .set("spark.cleaner.referenceTracking.blocking", "true")
+    .set("spark.cleaner.referenceTracking.blocking.shuffle", "true")
+    .set("spark.shuffle.manager", shuffleManager.getName)
 
   before {
     sc = new SparkContext(conf)
@@ -52,9 +65,61 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
     }
   }
 
+  //------ Helper functions ------
+
+  protected def newRDD() = sc.makeRDD(1 to 10)
+  protected def newPairRDD() = newRDD().map(_ -> 1)
+  protected def newShuffleRDD() = newPairRDD().reduceByKey(_ + _)
+  protected def newBroadcast() = sc.broadcast(1 to 100)
+
+  protected def newRDDWithShuffleDependencies(): (RDD[_], Seq[ShuffleDependency[_, _, _]]) = {
+    def getAllDependencies(rdd: RDD[_]): Seq[Dependency[_]] = {
+      rdd.dependencies ++ rdd.dependencies.flatMap { dep =>
+        getAllDependencies(dep.rdd)
+      }
+    }
+    val rdd = newShuffleRDD()
+
+    // Get all the shuffle dependencies
+    val shuffleDeps = getAllDependencies(rdd)
+      .filter(_.isInstanceOf[ShuffleDependency[_, _, _]])
+      .map(_.asInstanceOf[ShuffleDependency[_, _, _]])
+    (rdd, shuffleDeps)
+  }
+
+  protected def randomRdd() = {
+    val rdd: RDD[_] = Random.nextInt(3) match {
+      case 0 => newRDD()
+      case 1 => newShuffleRDD()
+      case 2 => newPairRDD.join(newPairRDD())
+    }
+    if (Random.nextBoolean()) rdd.persist()
+    rdd.count()
+    rdd
+  }
+
+  /** Run GC and make sure it actually has run */
+  protected def runGC() {
+    val weakRef = new WeakReference(new Object())
+    val startTime = System.currentTimeMillis
+    System.gc() // Make a best effort to run the garbage collection. It *usually* runs GC.
+    // Wait until a weak reference object has been GCed
+    while (System.currentTimeMillis - startTime < 10000 && weakRef.get != null) {
+      System.gc()
+      Thread.sleep(200)
+    }
+  }
+
+  protected def cleaner = sc.cleaner.get
+}
+
 
+/**
+ * Basic ContextCleanerSuite, which uses sort-based shuffle
+ */
+class ContextCleanerSuite extends ContextCleanerSuiteBase {
   test("cleanup RDD") {
-    val rdd = newRDD.persist()
+    val rdd = newRDD().persist()
     val collected = rdd.collect().toList
     val tester = new CleanerTester(sc, rddIds = Seq(rdd.id))
 
@@ -67,7 +132,7 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
   }
 
   test("cleanup shuffle") {
-    val (rdd, shuffleDeps) = newRDDWithShuffleDependencies
+    val (rdd, shuffleDeps) = newRDDWithShuffleDependencies()
     val collected = rdd.collect().toList
     val tester = new CleanerTester(sc, shuffleIds = shuffleDeps.map(_.shuffleId))
 
@@ -80,7 +145,7 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
   }
 
   test("cleanup broadcast") {
-    val broadcast = newBroadcast
+    val broadcast = newBroadcast()
     val tester = new CleanerTester(sc, broadcastIds = Seq(broadcast.id))
 
     // Explicit cleanup
@@ -89,7 +154,7 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
   }
 
   test("automatically cleanup RDD") {
-    var rdd = newRDD.persist()
+    var rdd = newRDD().persist()
     rdd.count()
 
     // Test that GC does not cause RDD cleanup due to a strong reference
@@ -107,7 +172,7 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
   }
 
   test("automatically cleanup shuffle") {
-    var rdd = newShuffleRDD
+    var rdd = newShuffleRDD()
     rdd.count()
 
     // Test that GC does not cause shuffle cleanup due to a strong reference
@@ -125,7 +190,7 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
   }
 
   test("automatically cleanup broadcast") {
-    var broadcast = newBroadcast
+    var broadcast = newBroadcast()
 
     // Test that GC does not cause broadcast cleanup due to a strong reference
     val preGCTester =  new CleanerTester(sc, broadcastIds = Seq(broadcast.id))
@@ -144,11 +209,11 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
   test("automatically cleanup RDD + shuffle + broadcast") {
     val numRdds = 100
     val numBroadcasts = 4 // Broadcasts are more costly
-    val rddBuffer = (1 to numRdds).map(i => randomRdd).toBuffer
-    val broadcastBuffer = (1 to numBroadcasts).map(i => randomBroadcast).toBuffer
+    val rddBuffer = (1 to numRdds).map(i => randomRdd()).toBuffer
+    val broadcastBuffer = (1 to numBroadcasts).map(i => newBroadcast()).toBuffer
     val rddIds = sc.persistentRdds.keys.toSeq
     val shuffleIds = 0 until sc.newShuffleId
-    val broadcastIds = 0L until numBroadcasts
+    val broadcastIds = broadcastBuffer.map(_.id)
 
     val preGCTester =  new CleanerTester(sc, rddIds, shuffleIds, broadcastIds)
     runGC()
@@ -162,6 +227,13 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
     rddBuffer.clear()
     runGC()
     postGCTester.assertCleanup()
+
+    // Make sure the broadcasted task closure no longer exists after GC.
+    val taskClosureBroadcastId = broadcastIds.max + 1
+    assert(sc.env.blockManager.master.getMatchingBlockIds({
+      case BroadcastBlockId(`taskClosureBroadcastId`, _) => true
+      case _ => false
+    }, askSlaves = true).isEmpty)
   }
 
   test("automatically cleanup RDD + shuffle + broadcast in distributed mode") {
@@ -171,15 +243,17 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
       .setMaster("local-cluster[2, 1, 512]")
       .setAppName("ContextCleanerSuite")
       .set("spark.cleaner.referenceTracking.blocking", "true")
+      .set("spark.cleaner.referenceTracking.blocking.shuffle", "true")
+      .set("spark.shuffle.manager", shuffleManager.getName)
     sc = new SparkContext(conf2)
 
     val numRdds = 10
     val numBroadcasts = 4 // Broadcasts are more costly
-    val rddBuffer = (1 to numRdds).map(i => randomRdd).toBuffer
-    val broadcastBuffer = (1 to numBroadcasts).map(i => randomBroadcast).toBuffer
+    val rddBuffer = (1 to numRdds).map(i => randomRdd()).toBuffer
+    val broadcastBuffer = (1 to numBroadcasts).map(i => newBroadcast()).toBuffer
     val rddIds = sc.persistentRdds.keys.toSeq
     val shuffleIds = 0 until sc.newShuffleId
-    val broadcastIds = 0L until numBroadcasts
+    val broadcastIds = broadcastBuffer.map(_.id)
 
     val preGCTester = new CleanerTester(sc, rddIds, shuffleIds, broadcastIds)
     runGC()
@@ -193,57 +267,91 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
     rddBuffer.clear()
     runGC()
     postGCTester.assertCleanup()
+
+    // Make sure the broadcasted task closure no longer exists after GC.
+    val taskClosureBroadcastId = broadcastIds.max + 1
+    assert(sc.env.blockManager.master.getMatchingBlockIds({
+      case BroadcastBlockId(`taskClosureBroadcastId`, _) => true
+      case _ => false
+    }, askSlaves = true).isEmpty)
   }
+}
 
-  //------ Helper functions ------
 
-  def newRDD = sc.makeRDD(1 to 10)
-  def newPairRDD = newRDD.map(_ -> 1)
-  def newShuffleRDD = newPairRDD.reduceByKey(_ + _)
-  def newBroadcast = sc.broadcast(1 to 100)
-  def newRDDWithShuffleDependencies: (RDD[_], Seq[ShuffleDependency[_, _, _]]) = {
-    def getAllDependencies(rdd: RDD[_]): Seq[Dependency[_]] = {
-      rdd.dependencies ++ rdd.dependencies.flatMap { dep =>
-        getAllDependencies(dep.rdd)
-      }
-    }
-    val rdd = newShuffleRDD
+/**
+ * A copy of the shuffle tests for sort-based shuffle
+ */
+class SortShuffleContextCleanerSuite extends ContextCleanerSuiteBase(classOf[SortShuffleManager]) {
+  test("cleanup shuffle") {
+    val (rdd, shuffleDeps) = newRDDWithShuffleDependencies()
+    val collected = rdd.collect().toList
+    val tester = new CleanerTester(sc, shuffleIds = shuffleDeps.map(_.shuffleId))
 
-    // Get all the shuffle dependencies
-    val shuffleDeps = getAllDependencies(rdd)
-      .filter(_.isInstanceOf[ShuffleDependency[_, _, _]])
-      .map(_.asInstanceOf[ShuffleDependency[_, _, _]])
-    (rdd, shuffleDeps)
+    // Explicit cleanup
+    shuffleDeps.foreach(s => cleaner.doCleanupShuffle(s.shuffleId, blocking = true))
+    tester.assertCleanup()
+
+    // Verify that shuffles can be re-executed after cleaning up
+    assert(rdd.collect().toList.equals(collected))
   }
 
-  def randomRdd = {
-    val rdd: RDD[_] = Random.nextInt(3) match {
-      case 0 => newRDD
-      case 1 => newShuffleRDD
-      case 2 => newPairRDD.join(newPairRDD)
-    }
-    if (Random.nextBoolean()) rdd.persist()
+  test("automatically cleanup shuffle") {
+    var rdd = newShuffleRDD()
     rdd.count()
-    rdd
-  }
 
-  def randomBroadcast = {
-    sc.broadcast(Random.nextInt(Int.MaxValue))
+    // Test that GC does not cause shuffle cleanup due to a strong reference
+    val preGCTester = new CleanerTester(sc, shuffleIds = Seq(0))
+    runGC()
+    intercept[Exception] {
+      preGCTester.assertCleanup()(timeout(1000 millis))
+    }
+
+    // Test that GC causes shuffle cleanup after dereferencing the RDD
+    val postGCTester = new CleanerTester(sc, shuffleIds = Seq(0))
+    rdd = null  // Make RDD out of scope, so that corresponding shuffle goes out of scope
+    runGC()
+    postGCTester.assertCleanup()
   }
 
-  /** Run GC and make sure it actually has run */
-  def runGC() {
-    val weakRef = new WeakReference(new Object())
-    val startTime = System.currentTimeMillis
-    System.gc() // Make a best effort to run the garbage collection. It *usually* runs GC.
-    // Wait until a weak reference object has been GCed
-    while(System.currentTimeMillis - startTime < 10000 && weakRef.get != null) {
-      System.gc()
-      Thread.sleep(200)
+  test("automatically cleanup RDD + shuffle + broadcast in distributed mode") {
+    sc.stop()
+
+    val conf2 = new SparkConf()
+      .setMaster("local-cluster[2, 1, 512]")
+      .setAppName("ContextCleanerSuite")
+      .set("spark.cleaner.referenceTracking.blocking", "true")
+      .set("spark.cleaner.referenceTracking.blocking.shuffle", "true")
+      .set("spark.shuffle.manager", shuffleManager.getName)
+    sc = new SparkContext(conf2)
+
+    val numRdds = 10
+    val numBroadcasts = 4 // Broadcasts are more costly
+    val rddBuffer = (1 to numRdds).map(i => randomRdd).toBuffer
+    val broadcastBuffer = (1 to numBroadcasts).map(i => newBroadcast).toBuffer
+    val rddIds = sc.persistentRdds.keys.toSeq
+    val shuffleIds = 0 until sc.newShuffleId()
+    val broadcastIds = broadcastBuffer.map(_.id)
+
+    val preGCTester = new CleanerTester(sc, rddIds, shuffleIds, broadcastIds)
+    runGC()
+    intercept[Exception] {
+      preGCTester.assertCleanup()(timeout(1000 millis))
     }
-  }
 
-  def cleaner = sc.cleaner.get
+    // Test that GC triggers the cleanup of all variables after the dereferencing them
+    val postGCTester = new CleanerTester(sc, rddIds, shuffleIds, broadcastIds)
+    broadcastBuffer.clear()
+    rddBuffer.clear()
+    runGC()
+    postGCTester.assertCleanup()
+
+    // Make sure the broadcasted task closure no longer exists after GC.
+    val taskClosureBroadcastId = broadcastIds.max + 1
+    assert(sc.env.blockManager.master.getMatchingBlockIds({
+      case BroadcastBlockId(`taskClosureBroadcastId`, _) => true
+      case _ => false
+    }, askSlaves = true).isEmpty)
+  }
 }
 
 
@@ -401,6 +509,7 @@ class CleanerTester(
   private def getShuffleBlocks(shuffleId: Int): Seq[BlockId] = {
     blockManager.master.getMatchingBlockIds( _ match {
       case ShuffleBlockId(`shuffleId`, _, _) => true
+      case ShuffleIndexBlockId(`shuffleId`, _, _) => true
       case _ => false
     }, askSlaves = true)
   }
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 41c294f727b3c..429199f2075c6 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -24,8 +24,7 @@ import org.scalatest.Matchers
 import org.scalatest.time.{Millis, Span}
 
 import org.apache.spark.SparkContext._
-import org.apache.spark.network.ConnectionManagerId
-import org.apache.spark.storage.{BlockManagerWorker, GetBlock, RDDBlockId, StorageLevel}
+import org.apache.spark.storage.{RDDBlockId, StorageLevel}
 
 class NotSerializableClass
 class NotSerializableExn(val notSer: NotSerializableClass) extends Throwable() {}
@@ -136,7 +135,6 @@ class DistributedSuite extends FunSuite with Matchers with BeforeAndAfter
         sc.parallelize(1 to 10, 2).foreach { x => if (x == 1) System.exit(42) }
       }
       assert(thrown.getClass === classOf[SparkException])
-      System.out.println(thrown.getMessage)
       assert(thrown.getMessage.contains("failed 4 times"))
     }
   }
@@ -202,12 +200,14 @@ class DistributedSuite extends FunSuite with Matchers with BeforeAndAfter
     val blockIds = data.partitions.indices.map(index => RDDBlockId(data.id, index)).toArray
     val blockId = blockIds(0)
     val blockManager = SparkEnv.get.blockManager
-    blockManager.master.getLocations(blockId).foreach(id => {
-      val bytes = BlockManagerWorker.syncGetBlock(
-        GetBlock(blockId), ConnectionManagerId(id.host, id.port))
-      val deserialized = blockManager.dataDeserialize(blockId, bytes).asInstanceOf[Iterator[Int]].toList
+    val blockTransfer = SparkEnv.get.blockTransferService
+    blockManager.master.getLocations(blockId).foreach { cmId =>
+      val bytes = blockTransfer.fetchBlockSync(cmId.host, cmId.port, cmId.executorId,
+        blockId.toString)
+      val deserialized = blockManager.dataDeserialize(blockId, bytes.nioByteBuffer())
+        .asInstanceOf[Iterator[Int]].toList
       assert(deserialized === (1 to 100).toList)
-    })
+    }
   }
 
   test("compute without caching when no partitions fit in memory") {
diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index de4bd90c8f7e5..5265ba904032f 100644
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -19,9 +19,6 @@ package org.apache.spark
 
 import java.io.File
 
-import org.apache.log4j.Logger
-import org.apache.log4j.Level
-
 import org.scalatest.FunSuite
 import org.scalatest.concurrent.Timeouts
 import org.scalatest.prop.TableDrivenPropertyChecks._
@@ -29,12 +26,10 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.util.Utils
 
-import scala.language.postfixOps
-
 class DriverSuite extends FunSuite with Timeouts {
 
   test("driver should exit after finishing") {
-    val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.props.get("spark.home")).get
+    val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
     // Regression test for SPARK-530: "Spark driver process doesn't exit after finishing"
     val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]"))
     forAll(masters) { (master: String) =>
@@ -54,7 +49,7 @@ class DriverSuite extends FunSuite with Timeouts {
  */
 object DriverWithoutCleanup {
   def main(args: Array[String]) {
-    Logger.getRootLogger().setLevel(Level.WARN)
+    Utils.configTestLog4j("INFO")
     val sc = new SparkContext(args(0), "DriverWithoutCleanup")
     sc.parallelize(1 to 100, 4).count()
   }
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
new file mode 100644
index 0000000000000..4b27477790212
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -0,0 +1,710 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.scalatest.{FunSuite, PrivateMethodTester}
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler._
+import org.apache.spark.storage.BlockManagerId
+
+/**
+ * Test add and remove behavior of ExecutorAllocationManager.
+ */
+class ExecutorAllocationManagerSuite extends FunSuite with LocalSparkContext {
+  import ExecutorAllocationManager._
+  import ExecutorAllocationManagerSuite._
+
+  test("verify min/max executors") {
+    // No min or max
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test-executor-allocation-manager")
+      .set("spark.dynamicAllocation.enabled", "true")
+    intercept[SparkException] { new SparkContext(conf) }
+    SparkEnv.get.stop() // cleanup the created environment
+
+    // Only min
+    val conf1 = conf.clone().set("spark.dynamicAllocation.minExecutors", "1")
+    intercept[SparkException] { new SparkContext(conf1) }
+    SparkEnv.get.stop()
+
+    // Only max
+    val conf2 = conf.clone().set("spark.dynamicAllocation.maxExecutors", "2")
+    intercept[SparkException] { new SparkContext(conf2) }
+    SparkEnv.get.stop()
+
+    // Both min and max, but min > max
+    intercept[SparkException] { createSparkContext(2, 1) }
+    SparkEnv.get.stop()
+
+    // Both min and max, and min == max
+    val sc1 = createSparkContext(1, 1)
+    assert(sc1.executorAllocationManager.isDefined)
+    sc1.stop()
+
+    // Both min and max, and min < max
+    val sc2 = createSparkContext(1, 2)
+    assert(sc2.executorAllocationManager.isDefined)
+    sc2.stop()
+  }
+
+  test("starting state") {
+    sc = createSparkContext()
+    val manager = sc.executorAllocationManager.get
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    assert(executorIds(manager).isEmpty)
+    assert(addTime(manager) === ExecutorAllocationManager.NOT_SET)
+    assert(removeTimes(manager).isEmpty)
+  }
+
+  test("add executors") {
+    sc = createSparkContext(1, 10)
+    val manager = sc.executorAllocationManager.get
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 1000)))
+
+    // Keep adding until the limit is reached
+    assert(numExecutorsPending(manager) === 0)
+    assert(numExecutorsToAdd(manager) === 1)
+    assert(addExecutors(manager) === 1)
+    assert(numExecutorsPending(manager) === 1)
+    assert(numExecutorsToAdd(manager) === 2)
+    assert(addExecutors(manager) === 2)
+    assert(numExecutorsPending(manager) === 3)
+    assert(numExecutorsToAdd(manager) === 4)
+    assert(addExecutors(manager) === 4)
+    assert(numExecutorsPending(manager) === 7)
+    assert(numExecutorsToAdd(manager) === 8)
+    assert(addExecutors(manager) === 3) // reached the limit of 10
+    assert(numExecutorsPending(manager) === 10)
+    assert(numExecutorsToAdd(manager) === 1)
+    assert(addExecutors(manager) === 0)
+    assert(numExecutorsPending(manager) === 10)
+    assert(numExecutorsToAdd(manager) === 1)
+
+    // Register previously requested executors
+    onExecutorAdded(manager, "first")
+    assert(numExecutorsPending(manager) === 9)
+    onExecutorAdded(manager, "second")
+    onExecutorAdded(manager, "third")
+    onExecutorAdded(manager, "fourth")
+    assert(numExecutorsPending(manager) === 6)
+    onExecutorAdded(manager, "first") // duplicates should not count
+    onExecutorAdded(manager, "second")
+    assert(numExecutorsPending(manager) === 6)
+
+    // Try adding again
+    // This should still fail because the number pending + running is still at the limit
+    assert(addExecutors(manager) === 0)
+    assert(numExecutorsPending(manager) === 6)
+    assert(numExecutorsToAdd(manager) === 1)
+    assert(addExecutors(manager) === 0)
+    assert(numExecutorsPending(manager) === 6)
+    assert(numExecutorsToAdd(manager) === 1)
+  }
+
+  test("add executors capped by num pending tasks") {
+    sc = createSparkContext(1, 10)
+    val manager = sc.executorAllocationManager.get
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 5)))
+
+    // Verify that we're capped at number of tasks in the stage
+    assert(numExecutorsPending(manager) === 0)
+    assert(numExecutorsToAdd(manager) === 1)
+    assert(addExecutors(manager) === 1)
+    assert(numExecutorsPending(manager) === 1)
+    assert(numExecutorsToAdd(manager) === 2)
+    assert(addExecutors(manager) === 2)
+    assert(numExecutorsPending(manager) === 3)
+    assert(numExecutorsToAdd(manager) === 4)
+    assert(addExecutors(manager) === 2)
+    assert(numExecutorsPending(manager) === 5)
+    assert(numExecutorsToAdd(manager) === 1)
+
+    // Verify that running a task reduces the cap
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(1, 3)))
+    sc.listenerBus.postToAll(SparkListenerTaskStart(1, 0, createTaskInfo(0, 0, "executor-1")))
+    assert(addExecutors(manager) === 1)
+    assert(numExecutorsPending(manager) === 6)
+    assert(numExecutorsToAdd(manager) === 2)
+    assert(addExecutors(manager) === 1)
+    assert(numExecutorsPending(manager) === 7)
+    assert(numExecutorsToAdd(manager) === 1)
+
+    // Verify that re-running a task doesn't reduce the cap further
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(2, 3)))
+    sc.listenerBus.postToAll(SparkListenerTaskStart(2, 0, createTaskInfo(0, 0, "executor-1")))
+    sc.listenerBus.postToAll(SparkListenerTaskStart(2, 0, createTaskInfo(1, 0, "executor-1")))
+    assert(addExecutors(manager) === 1)
+    assert(numExecutorsPending(manager) === 8)
+    assert(numExecutorsToAdd(manager) === 2)
+    assert(addExecutors(manager) === 1)
+    assert(numExecutorsPending(manager) === 9)
+    assert(numExecutorsToAdd(manager) === 1)
+
+    // Verify that running a task once we're at our limit doesn't blow things up
+    sc.listenerBus.postToAll(SparkListenerTaskStart(2, 0, createTaskInfo(0, 1, "executor-1")))
+    assert(addExecutors(manager) === 0)
+    assert(numExecutorsPending(manager) === 9)
+  }
+
+  test("remove executors") {
+    sc = createSparkContext(5, 10)
+    val manager = sc.executorAllocationManager.get
+    (1 to 10).map(_.toString).foreach { id => onExecutorAdded(manager, id) }
+
+    // Keep removing until the limit is reached
+    assert(executorsPendingToRemove(manager).isEmpty)
+    assert(removeExecutor(manager, "1"))
+    assert(executorsPendingToRemove(manager).size === 1)
+    assert(executorsPendingToRemove(manager).contains("1"))
+    assert(removeExecutor(manager, "2"))
+    assert(removeExecutor(manager, "3"))
+    assert(executorsPendingToRemove(manager).size === 3)
+    assert(executorsPendingToRemove(manager).contains("2"))
+    assert(executorsPendingToRemove(manager).contains("3"))
+    assert(!removeExecutor(manager, "100")) // remove non-existent executors
+    assert(!removeExecutor(manager, "101"))
+    assert(executorsPendingToRemove(manager).size === 3)
+    assert(removeExecutor(manager, "4"))
+    assert(removeExecutor(manager, "5"))
+    assert(!removeExecutor(manager, "6")) // reached the limit of 5
+    assert(executorsPendingToRemove(manager).size === 5)
+    assert(executorsPendingToRemove(manager).contains("4"))
+    assert(executorsPendingToRemove(manager).contains("5"))
+    assert(!executorsPendingToRemove(manager).contains("6"))
+
+    // Kill executors previously requested to remove
+    onExecutorRemoved(manager, "1")
+    assert(executorsPendingToRemove(manager).size === 4)
+    assert(!executorsPendingToRemove(manager).contains("1"))
+    onExecutorRemoved(manager, "2")
+    onExecutorRemoved(manager, "3")
+    assert(executorsPendingToRemove(manager).size === 2)
+    assert(!executorsPendingToRemove(manager).contains("2"))
+    assert(!executorsPendingToRemove(manager).contains("3"))
+    onExecutorRemoved(manager, "2") // duplicates should not count
+    onExecutorRemoved(manager, "3")
+    assert(executorsPendingToRemove(manager).size === 2)
+    onExecutorRemoved(manager, "4")
+    onExecutorRemoved(manager, "5")
+    assert(executorsPendingToRemove(manager).isEmpty)
+
+    // Try removing again
+    // This should still fail because the number pending + running is still at the limit
+    assert(!removeExecutor(manager, "7"))
+    assert(executorsPendingToRemove(manager).isEmpty)
+    assert(!removeExecutor(manager, "8"))
+    assert(executorsPendingToRemove(manager).isEmpty)
+  }
+
+  test ("interleaving add and remove") {
+    sc = createSparkContext(5, 10)
+    val manager = sc.executorAllocationManager.get
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 1000)))
+
+    // Add a few executors
+    assert(addExecutors(manager) === 1)
+    assert(addExecutors(manager) === 2)
+    assert(addExecutors(manager) === 4)
+    onExecutorAdded(manager, "1")
+    onExecutorAdded(manager, "2")
+    onExecutorAdded(manager, "3")
+    onExecutorAdded(manager, "4")
+    onExecutorAdded(manager, "5")
+    onExecutorAdded(manager, "6")
+    onExecutorAdded(manager, "7")
+    assert(executorIds(manager).size === 7)
+
+    // Remove until limit
+    assert(removeExecutor(manager, "1"))
+    assert(removeExecutor(manager, "2"))
+    assert(!removeExecutor(manager, "3")) // lower limit reached
+    assert(!removeExecutor(manager, "4"))
+    onExecutorRemoved(manager, "1")
+    onExecutorRemoved(manager, "2")
+    assert(executorIds(manager).size === 5)
+
+    // Add until limit
+    assert(addExecutors(manager) === 5) // upper limit reached
+    assert(addExecutors(manager) === 0)
+    assert(!removeExecutor(manager, "3")) // still at lower limit
+    assert(!removeExecutor(manager, "4"))
+    onExecutorAdded(manager, "8")
+    onExecutorAdded(manager, "9")
+    onExecutorAdded(manager, "10")
+    onExecutorAdded(manager, "11")
+    onExecutorAdded(manager, "12")
+    assert(executorIds(manager).size === 10)
+
+    // Remove succeeds again, now that we are no longer at the lower limit
+    assert(removeExecutor(manager, "3"))
+    assert(removeExecutor(manager, "4"))
+    assert(removeExecutor(manager, "5"))
+    assert(removeExecutor(manager, "6"))
+    assert(executorIds(manager).size === 10)
+    assert(addExecutors(manager) === 0) // still at upper limit
+    onExecutorRemoved(manager, "3")
+    onExecutorRemoved(manager, "4")
+    assert(executorIds(manager).size === 8)
+
+    // Add succeeds again, now that we are no longer at the upper limit
+    // Number of executors added restarts at 1
+    assert(addExecutors(manager) === 1)
+    assert(addExecutors(manager) === 1) // upper limit reached again
+    assert(addExecutors(manager) === 0)
+    assert(executorIds(manager).size === 8)
+    onExecutorRemoved(manager, "5")
+    onExecutorRemoved(manager, "6")
+    onExecutorAdded(manager, "13")
+    onExecutorAdded(manager, "14")
+    assert(executorIds(manager).size === 8)
+    assert(addExecutors(manager) === 1)
+    assert(addExecutors(manager) === 1) // upper limit reached again
+    assert(addExecutors(manager) === 0)
+    onExecutorAdded(manager, "15")
+    onExecutorAdded(manager, "16")
+    assert(executorIds(manager).size === 10)
+  }
+
+  test("starting/canceling add timer") {
+    sc = createSparkContext(2, 10)
+    val clock = new TestClock(8888L)
+    val manager = sc.executorAllocationManager.get
+    manager.setClock(clock)
+
+    // Starting add timer is idempotent
+    assert(addTime(manager) === NOT_SET)
+    onSchedulerBacklogged(manager)
+    val firstAddTime = addTime(manager)
+    assert(firstAddTime === clock.getTimeMillis + schedulerBacklogTimeout * 1000)
+    clock.tick(100L)
+    onSchedulerBacklogged(manager)
+    assert(addTime(manager) === firstAddTime) // timer is already started
+    clock.tick(200L)
+    onSchedulerBacklogged(manager)
+    assert(addTime(manager) === firstAddTime)
+    onSchedulerQueueEmpty(manager)
+
+    // Restart add timer
+    clock.tick(1000L)
+    assert(addTime(manager) === NOT_SET)
+    onSchedulerBacklogged(manager)
+    val secondAddTime = addTime(manager)
+    assert(secondAddTime === clock.getTimeMillis + schedulerBacklogTimeout * 1000)
+    clock.tick(100L)
+    onSchedulerBacklogged(manager)
+    assert(addTime(manager) === secondAddTime) // timer is already started
+    assert(addTime(manager) !== firstAddTime)
+    assert(firstAddTime !== secondAddTime)
+  }
+
+  test("starting/canceling remove timers") {
+    sc = createSparkContext(2, 10)
+    val clock = new TestClock(14444L)
+    val manager = sc.executorAllocationManager.get
+    manager.setClock(clock)
+
+    // Starting remove timer is idempotent for each executor
+    assert(removeTimes(manager).isEmpty)
+    onExecutorIdle(manager, "1")
+    assert(removeTimes(manager).size === 1)
+    assert(removeTimes(manager).contains("1"))
+    val firstRemoveTime = removeTimes(manager)("1")
+    assert(firstRemoveTime === clock.getTimeMillis + executorIdleTimeout * 1000)
+    clock.tick(100L)
+    onExecutorIdle(manager, "1")
+    assert(removeTimes(manager)("1") === firstRemoveTime) // timer is already started
+    clock.tick(200L)
+    onExecutorIdle(manager, "1")
+    assert(removeTimes(manager)("1") === firstRemoveTime)
+    clock.tick(300L)
+    onExecutorIdle(manager, "2")
+    assert(removeTimes(manager)("2") !== firstRemoveTime) // different executor
+    assert(removeTimes(manager)("2") === clock.getTimeMillis + executorIdleTimeout * 1000)
+    clock.tick(400L)
+    onExecutorIdle(manager, "3")
+    assert(removeTimes(manager)("3") !== firstRemoveTime)
+    assert(removeTimes(manager)("3") === clock.getTimeMillis + executorIdleTimeout * 1000)
+    assert(removeTimes(manager).size === 3)
+    assert(removeTimes(manager).contains("2"))
+    assert(removeTimes(manager).contains("3"))
+
+    // Restart remove timer
+    clock.tick(1000L)
+    onExecutorBusy(manager, "1")
+    assert(removeTimes(manager).size === 2)
+    onExecutorIdle(manager, "1")
+    assert(removeTimes(manager).size === 3)
+    assert(removeTimes(manager).contains("1"))
+    val secondRemoveTime = removeTimes(manager)("1")
+    assert(secondRemoveTime === clock.getTimeMillis + executorIdleTimeout * 1000)
+    assert(removeTimes(manager)("1") === secondRemoveTime) // timer is already started
+    assert(removeTimes(manager)("1") !== firstRemoveTime)
+    assert(firstRemoveTime !== secondRemoveTime)
+  }
+
+  test("mock polling loop with no events") {
+    sc = createSparkContext(1, 20)
+    val manager = sc.executorAllocationManager.get
+    val clock = new TestClock(2020L)
+    manager.setClock(clock)
+
+    // No events - we should not be adding or removing
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    clock.tick(100L)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    clock.tick(1000L)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    clock.tick(10000L)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+  }
+
+  test("mock polling loop add behavior") {
+    sc = createSparkContext(1, 20)
+    val clock = new TestClock(2020L)
+    val manager = sc.executorAllocationManager.get
+    manager.setClock(clock)
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 1000)))
+
+    // Scheduler queue backlogged
+    onSchedulerBacklogged(manager)
+    clock.tick(schedulerBacklogTimeout * 1000 / 2)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 0) // timer not exceeded yet
+    clock.tick(schedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 1) // first timer exceeded
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000 / 2)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 1) // second timer not exceeded yet
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 1 + 2) // second timer exceeded
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 1 + 2 + 4) // third timer exceeded
+
+    // Scheduler queue drained
+    onSchedulerQueueEmpty(manager)
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 7) // timer is canceled
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 7)
+
+    // Scheduler queue backlogged again
+    onSchedulerBacklogged(manager)
+    clock.tick(schedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 7 + 1) // timer restarted
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 7 + 1 + 2)
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 7 + 1 + 2 + 4)
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 20) // limit reached
+  }
+
+  test("mock polling loop remove behavior") {
+    sc = createSparkContext(1, 20)
+    val clock = new TestClock(2020L)
+    val manager = sc.executorAllocationManager.get
+    manager.setClock(clock)
+
+    // Remove idle executors on timeout
+    onExecutorAdded(manager, "executor-1")
+    onExecutorAdded(manager, "executor-2")
+    onExecutorAdded(manager, "executor-3")
+    assert(removeTimes(manager).size === 3)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    clock.tick(executorIdleTimeout * 1000 / 2)
+    schedule(manager)
+    assert(removeTimes(manager).size === 3) // idle threshold not reached yet
+    assert(executorsPendingToRemove(manager).isEmpty)
+    clock.tick(executorIdleTimeout * 1000)
+    schedule(manager)
+    assert(removeTimes(manager).isEmpty) // idle threshold exceeded
+    assert(executorsPendingToRemove(manager).size === 2) // limit reached (1 executor remaining)
+
+    // Mark a subset as busy - only idle executors should be removed
+    onExecutorAdded(manager, "executor-4")
+    onExecutorAdded(manager, "executor-5")
+    onExecutorAdded(manager, "executor-6")
+    onExecutorAdded(manager, "executor-7")
+    assert(removeTimes(manager).size === 5)              // 5 active executors
+    assert(executorsPendingToRemove(manager).size === 2) // 2 pending to be removed
+    onExecutorBusy(manager, "executor-4")
+    onExecutorBusy(manager, "executor-5")
+    onExecutorBusy(manager, "executor-6") // 3 busy and 2 idle (of the 5 active ones)
+    schedule(manager)
+    assert(removeTimes(manager).size === 2) // remove only idle executors
+    assert(!removeTimes(manager).contains("executor-4"))
+    assert(!removeTimes(manager).contains("executor-5"))
+    assert(!removeTimes(manager).contains("executor-6"))
+    assert(executorsPendingToRemove(manager).size === 2)
+    clock.tick(executorIdleTimeout * 1000)
+    schedule(manager)
+    assert(removeTimes(manager).isEmpty) // idle executors are removed
+    assert(executorsPendingToRemove(manager).size === 4)
+    assert(!executorsPendingToRemove(manager).contains("executor-4"))
+    assert(!executorsPendingToRemove(manager).contains("executor-5"))
+    assert(!executorsPendingToRemove(manager).contains("executor-6"))
+
+    // Busy executors are now idle and should be removed
+    onExecutorIdle(manager, "executor-4")
+    onExecutorIdle(manager, "executor-5")
+    onExecutorIdle(manager, "executor-6")
+    schedule(manager)
+    assert(removeTimes(manager).size === 3) // 0 busy and 3 idle
+    assert(removeTimes(manager).contains("executor-4"))
+    assert(removeTimes(manager).contains("executor-5"))
+    assert(removeTimes(manager).contains("executor-6"))
+    assert(executorsPendingToRemove(manager).size === 4)
+    clock.tick(executorIdleTimeout * 1000)
+    schedule(manager)
+    assert(removeTimes(manager).isEmpty)
+    assert(executorsPendingToRemove(manager).size === 6) // limit reached (1 executor remaining)
+  }
+
+  test("listeners trigger add executors correctly") {
+    sc = createSparkContext(2, 10)
+    val manager = sc.executorAllocationManager.get
+    assert(addTime(manager) === NOT_SET)
+
+    // Starting a stage should start the add timer
+    val numTasks = 10
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, numTasks)))
+    assert(addTime(manager) !== NOT_SET)
+
+    // Starting a subset of the tasks should not cancel the add timer
+    val taskInfos = (0 to numTasks - 1).map { i => createTaskInfo(i, i, "executor-1") }
+    taskInfos.tail.foreach { info => sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, info)) }
+    assert(addTime(manager) !== NOT_SET)
+
+    // Starting all remaining tasks should cancel the add timer
+    sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, taskInfos.head))
+    assert(addTime(manager) === NOT_SET)
+
+    // Start two different stages
+    // The add timer should be canceled only if all tasks in both stages start running
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(1, numTasks)))
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(2, numTasks)))
+    assert(addTime(manager) !== NOT_SET)
+    taskInfos.foreach { info => sc.listenerBus.postToAll(SparkListenerTaskStart(1, 0, info)) }
+    assert(addTime(manager) !== NOT_SET)
+    taskInfos.foreach { info => sc.listenerBus.postToAll(SparkListenerTaskStart(2, 0, info)) }
+    assert(addTime(manager) === NOT_SET)
+  }
+
+  test("listeners trigger remove executors correctly") {
+    sc = createSparkContext(2, 10)
+    val manager = sc.executorAllocationManager.get
+    assert(removeTimes(manager).isEmpty)
+
+    // Added executors should start the remove timers for each executor
+    (1 to 5).map("executor-" + _).foreach { id => onExecutorAdded(manager, id) }
+    assert(removeTimes(manager).size === 5)
+
+    // Starting a task cancel the remove timer for that executor
+    sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, createTaskInfo(0, 0, "executor-1")))
+    sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, createTaskInfo(1, 1, "executor-1")))
+    sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, createTaskInfo(2, 2, "executor-2")))
+    assert(removeTimes(manager).size === 3)
+    assert(!removeTimes(manager).contains("executor-1"))
+    assert(!removeTimes(manager).contains("executor-2"))
+
+    // Finishing all tasks running on an executor should start the remove timer for that executor
+    sc.listenerBus.postToAll(SparkListenerTaskEnd(
+      0, 0, "task-type", Success, createTaskInfo(0, 0, "executor-1"), new TaskMetrics))
+    sc.listenerBus.postToAll(SparkListenerTaskEnd(
+      0, 0, "task-type", Success, createTaskInfo(2, 2, "executor-2"), new TaskMetrics))
+    assert(removeTimes(manager).size === 4)
+    assert(!removeTimes(manager).contains("executor-1")) // executor-1 has not finished yet
+    assert(removeTimes(manager).contains("executor-2"))
+    sc.listenerBus.postToAll(SparkListenerTaskEnd(
+      0, 0, "task-type", Success, createTaskInfo(1, 1, "executor-1"), new TaskMetrics))
+    assert(removeTimes(manager).size === 5)
+    assert(removeTimes(manager).contains("executor-1")) // executor-1 has now finished
+  }
+
+  test("listeners trigger add and remove executor callbacks correctly") {
+    sc = createSparkContext(2, 10)
+    val manager = sc.executorAllocationManager.get
+    assert(executorIds(manager).isEmpty)
+    assert(removeTimes(manager).isEmpty)
+
+    // New executors have registered
+    sc.listenerBus.postToAll(SparkListenerBlockManagerAdded(
+      0L, BlockManagerId("executor-1", "host1", 1), 100L))
+    assert(executorIds(manager).size === 1)
+    assert(executorIds(manager).contains("executor-1"))
+    assert(removeTimes(manager).size === 1)
+    assert(removeTimes(manager).contains("executor-1"))
+    sc.listenerBus.postToAll(SparkListenerBlockManagerAdded(
+      0L, BlockManagerId("executor-2", "host2", 1), 100L))
+    assert(executorIds(manager).size === 2)
+    assert(executorIds(manager).contains("executor-2"))
+    assert(removeTimes(manager).size === 2)
+    assert(removeTimes(manager).contains("executor-2"))
+
+    // Existing executors have disconnected
+    sc.listenerBus.postToAll(SparkListenerBlockManagerRemoved(
+      0L, BlockManagerId("executor-1", "host1", 1)))
+    assert(executorIds(manager).size === 1)
+    assert(!executorIds(manager).contains("executor-1"))
+    assert(removeTimes(manager).size === 1)
+    assert(!removeTimes(manager).contains("executor-1"))
+
+    // Unknown executor has disconnected
+    sc.listenerBus.postToAll(SparkListenerBlockManagerRemoved(
+      0L, BlockManagerId("executor-3", "host3", 1)))
+    assert(executorIds(manager).size === 1)
+    assert(removeTimes(manager).size === 1)
+  }
+
+}
+
+/**
+ * Helper methods for testing ExecutorAllocationManager.
+ * This includes methods to access private methods and fields in ExecutorAllocationManager.
+ */
+private object ExecutorAllocationManagerSuite extends PrivateMethodTester {
+  private val schedulerBacklogTimeout = 1L
+  private val sustainedSchedulerBacklogTimeout = 2L
+  private val executorIdleTimeout = 3L
+
+  private def createSparkContext(minExecutors: Int = 1, maxExecutors: Int = 5): SparkContext = {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test-executor-allocation-manager")
+      .set("spark.dynamicAllocation.enabled", "true")
+      .set("spark.dynamicAllocation.minExecutors", minExecutors.toString)
+      .set("spark.dynamicAllocation.maxExecutors", maxExecutors.toString)
+      .set("spark.dynamicAllocation.schedulerBacklogTimeout", schedulerBacklogTimeout.toString)
+      .set("spark.dynamicAllocation.sustainedSchedulerBacklogTimeout",
+        sustainedSchedulerBacklogTimeout.toString)
+      .set("spark.dynamicAllocation.executorIdleTimeout", executorIdleTimeout.toString)
+      .set("spark.dynamicAllocation.testing", "true")
+    new SparkContext(conf)
+  }
+
+  private def createStageInfo(stageId: Int, numTasks: Int): StageInfo = {
+    new StageInfo(stageId, 0, "name", numTasks, Seq.empty, "no details")
+  }
+
+  private def createTaskInfo(taskId: Int, taskIndex: Int, executorId: String): TaskInfo = {
+    new TaskInfo(taskId, taskIndex, 0, 0, executorId, "", TaskLocality.ANY, speculative = false)
+  }
+
+  /* ------------------------------------------------------- *
+   | Helper methods for accessing private methods and fields |
+   * ------------------------------------------------------- */
+
+  private val _numExecutorsToAdd = PrivateMethod[Int]('numExecutorsToAdd)
+  private val _numExecutorsPending = PrivateMethod[Int]('numExecutorsPending)
+  private val _executorsPendingToRemove =
+    PrivateMethod[collection.Set[String]]('executorsPendingToRemove)
+  private val _executorIds = PrivateMethod[collection.Set[String]]('executorIds)
+  private val _addTime = PrivateMethod[Long]('addTime)
+  private val _removeTimes = PrivateMethod[collection.Map[String, Long]]('removeTimes)
+  private val _schedule = PrivateMethod[Unit]('schedule)
+  private val _addExecutors = PrivateMethod[Int]('addExecutors)
+  private val _removeExecutor = PrivateMethod[Boolean]('removeExecutor)
+  private val _onExecutorAdded = PrivateMethod[Unit]('onExecutorAdded)
+  private val _onExecutorRemoved = PrivateMethod[Unit]('onExecutorRemoved)
+  private val _onSchedulerBacklogged = PrivateMethod[Unit]('onSchedulerBacklogged)
+  private val _onSchedulerQueueEmpty = PrivateMethod[Unit]('onSchedulerQueueEmpty)
+  private val _onExecutorIdle = PrivateMethod[Unit]('onExecutorIdle)
+  private val _onExecutorBusy = PrivateMethod[Unit]('onExecutorBusy)
+
+  private def numExecutorsToAdd(manager: ExecutorAllocationManager): Int = {
+    manager invokePrivate _numExecutorsToAdd()
+  }
+
+  private def numExecutorsPending(manager: ExecutorAllocationManager): Int = {
+    manager invokePrivate _numExecutorsPending()
+  }
+
+  private def executorsPendingToRemove(
+      manager: ExecutorAllocationManager): collection.Set[String] = {
+    manager invokePrivate _executorsPendingToRemove()
+  }
+
+  private def executorIds(manager: ExecutorAllocationManager): collection.Set[String] = {
+    manager invokePrivate _executorIds()
+  }
+
+  private def addTime(manager: ExecutorAllocationManager): Long = {
+    manager invokePrivate _addTime()
+  }
+
+  private def removeTimes(manager: ExecutorAllocationManager): collection.Map[String, Long] = {
+    manager invokePrivate _removeTimes()
+  }
+
+  private def schedule(manager: ExecutorAllocationManager): Unit = {
+    manager invokePrivate _schedule()
+  }
+
+  private def addExecutors(manager: ExecutorAllocationManager): Int = {
+    manager invokePrivate _addExecutors()
+  }
+
+  private def removeExecutor(manager: ExecutorAllocationManager, id: String): Boolean = {
+    manager invokePrivate _removeExecutor(id)
+  }
+
+  private def onExecutorAdded(manager: ExecutorAllocationManager, id: String): Unit = {
+    manager invokePrivate _onExecutorAdded(id)
+  }
+
+  private def onExecutorRemoved(manager: ExecutorAllocationManager, id: String): Unit = {
+    manager invokePrivate _onExecutorRemoved(id)
+  }
+
+  private def onSchedulerBacklogged(manager: ExecutorAllocationManager): Unit = {
+    manager invokePrivate _onSchedulerBacklogged()
+  }
+
+  private def onSchedulerQueueEmpty(manager: ExecutorAllocationManager): Unit = {
+    manager invokePrivate _onSchedulerQueueEmpty()
+  }
+
+  private def onExecutorIdle(manager: ExecutorAllocationManager, id: String): Unit = {
+    manager invokePrivate _onExecutorIdle(id)
+  }
+
+  private def onExecutorBusy(manager: ExecutorAllocationManager, id: String): Unit = {
+    manager invokePrivate _onExecutorBusy(id)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
new file mode 100644
index 0000000000000..55799f55146cb
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.util.concurrent.atomic.AtomicInteger
+
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.network.TransportContext
+import org.apache.spark.network.netty.SparkTransportConf
+import org.apache.spark.network.server.TransportServer
+import org.apache.spark.network.shuffle.{ExternalShuffleBlockHandler, ExternalShuffleClient}
+
+/**
+ * This suite creates an external shuffle server and routes all shuffle fetches through it.
+ * Note that failures in this suite may arise due to changes in Spark that invalidate expectations
+ * set up in [[ExternalShuffleBlockHandler]], such as changing the format of shuffle files or how
+ * we hash files into folders.
+ */
+class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll {
+  var server: TransportServer = _
+  var rpcHandler: ExternalShuffleBlockHandler = _
+
+  override def beforeAll() {
+    val transportConf = SparkTransportConf.fromSparkConf(conf, numUsableCores = 2)
+    rpcHandler = new ExternalShuffleBlockHandler(transportConf)
+    val transportContext = new TransportContext(transportConf, rpcHandler)
+    server = transportContext.createServer()
+
+    conf.set("spark.shuffle.manager", "sort")
+    conf.set("spark.shuffle.service.enabled", "true")
+    conf.set("spark.shuffle.service.port", server.getPort.toString)
+  }
+
+  override def afterAll() {
+    server.close()
+  }
+
+  // This test ensures that the external shuffle service is actually in use for the other tests.
+  test("using external shuffle service") {
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
+    sc.env.blockManager.externalShuffleServiceEnabled should equal(true)
+    sc.env.blockManager.shuffleClient.getClass should equal(classOf[ExternalShuffleClient])
+
+    val rdd = sc.parallelize(0 until 1000, 10).map(i => (i, 1)).reduceByKey(_ + _)
+
+    rdd.count()
+    rdd.count()
+
+    // Invalidate the registered executors, disallowing access to their shuffle blocks (without
+    // deleting the actual shuffle files, so we could access them without the shuffle service).
+    rpcHandler.applicationRemoved(sc.conf.getAppId, false /* cleanupLocalDirs */)
+
+    // Now Spark will receive FetchFailed, and not retry the stage due to "spark.test.noStageRetry"
+    // being set.
+    val e = intercept[SparkException] {
+      rdd.count()
+    }
+    e.getMessage should include ("Fetch failure will not retry stage due to testing config")
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/FailureSuite.scala b/core/src/test/scala/org/apache/spark/FailureSuite.scala
index e755d2e309398..2229e6acc425d 100644
--- a/core/src/test/scala/org/apache/spark/FailureSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FailureSuite.scala
@@ -104,8 +104,9 @@ class FailureSuite extends FunSuite with LocalSparkContext {
       results.collect()
     }
     assert(thrown.getClass === classOf[SparkException])
-    assert(thrown.getMessage.contains("NotSerializableException") || 
-      thrown.getCause.getClass === classOf[NotSerializableException])
+    assert(thrown.getMessage.contains("serializable") ||
+      thrown.getCause.getClass === classOf[NotSerializableException],
+      "Exception does not contain \"serializable\": " + thrown.getMessage)
 
     FailureSuiteState.clear()
   }
diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
index 7e18f45de7b5b..379c2a6ea4b55 100644
--- a/core/src/test/scala/org/apache/spark/FileServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark
 import java.io._
 import java.util.jar.{JarEntry, JarOutputStream}
 
-import com.google.common.io.Files
+import com.google.common.io.ByteStreams
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkContext._
@@ -41,8 +41,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
   override def beforeAll() {
     super.beforeAll()
 
-    tmpDir = Files.createTempDir()
-    tmpDir.deleteOnExit()
+    tmpDir = Utils.createTempDir()
     val testTempDir = new File(tmpDir, "test")
     testTempDir.mkdir()
 
@@ -60,12 +59,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
     jar.putNextEntry(jarEntry)
 
     val in = new FileInputStream(textFile)
-    val buffer = new Array[Byte](10240)
-    var nRead = 0
-    while (nRead <= 0) {
-      nRead = in.read(buffer, 0, buffer.length)
-      jar.write(buffer, 0, nRead)
-    }
+    ByteStreams.copy(in, jar)
 
     in.close()
     jar.close()
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 070e974657860..5e24196101fbc 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -19,17 +19,21 @@ package org.apache.spark
 
 import java.io.{File, FileWriter}
 
+import org.apache.spark.input.PortableDataStream
+import org.apache.spark.storage.StorageLevel
+
 import scala.io.Source
 
-import com.google.common.io.Files
 import org.apache.hadoop.io._
 import org.apache.hadoop.io.compress.DefaultCodec
-import org.apache.hadoop.mapred.{JobConf, FileAlreadyExistsException, TextOutputFormat}
-import org.apache.hadoop.mapreduce.lib.output.{TextOutputFormat => NewTextOutputFormat}
+import org.apache.hadoop.mapred.{JobConf, FileAlreadyExistsException, FileSplit, TextInputFormat, TextOutputFormat}
 import org.apache.hadoop.mapreduce.Job
+import org.apache.hadoop.mapreduce.lib.input.{FileSplit => NewFileSplit, TextInputFormat => NewTextInputFormat}
+import org.apache.hadoop.mapreduce.lib.output.{TextOutputFormat => NewTextOutputFormat}
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.{NewHadoopRDD, HadoopRDD}
 import org.apache.spark.util.Utils
 
 class FileSuite extends FunSuite with LocalSparkContext {
@@ -37,8 +41,7 @@ class FileSuite extends FunSuite with LocalSparkContext {
 
   override def beforeEach() {
     super.beforeEach()
-    tempDir = Files.createTempDir()
-    tempDir.deleteOnExit()
+    tempDir = Utils.createTempDir()
   }
 
   override def afterEach() {
@@ -177,6 +180,31 @@ class FileSuite extends FunSuite with LocalSparkContext {
     assert(output.collect().toList === List((1, "a"), (2, "aa"), (3, "aaa")))
   }
 
+  test("object files of classes from a JAR") {
+    val original = Thread.currentThread().getContextClassLoader
+    val className = "FileSuiteObjectFileTest"
+    val jar = TestUtils.createJarWithClasses(Seq(className))
+    val loader = new java.net.URLClassLoader(Array(jar), Utils.getContextOrSparkClassLoader)
+    Thread.currentThread().setContextClassLoader(loader)
+    try {
+      sc = new SparkContext("local", "test")
+      val objs = sc.makeRDD(1 to 3).map { x =>
+        val loader = Thread.currentThread().getContextClassLoader
+        Class.forName(className, true, loader).newInstance()
+      }
+      val outputDir = new File(tempDir, "output").getAbsolutePath
+      objs.saveAsObjectFile(outputDir)
+      // Try reading the output back as an object file
+      val ct = reflect.ClassTag[Any](Class.forName(className, true, loader))
+      val output = sc.objectFile[Any](outputDir)
+      assert(output.collect().size === 3)
+      assert(output.collect().head.getClass.getName === className)
+    }
+    finally {
+      Thread.currentThread().setContextClassLoader(original)
+    }
+  }
+
   test("write SequenceFile using new Hadoop API") {
     import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
     sc = new SparkContext("local", "test")
@@ -199,6 +227,187 @@ class FileSuite extends FunSuite with LocalSparkContext {
     assert(output.map(_.toString).collect().toList === List("(1,a)", "(2,aa)", "(3,aaa)"))
   }
 
+  test("binary file input as byte array") {
+    sc = new SparkContext("local", "test")
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    channel.write(bbuf)
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryFiles(outFileName)
+    val (infile: String, indata: PortableDataStream) = inRdd.collect.head
+
+    // Make sure the name and array match
+    assert(infile.contains(outFileName)) // a prefix may get added
+    assert(indata.toArray === testOutput)
+  }
+
+  test("portabledatastream caching tests") {
+    sc = new SparkContext("local", "test")
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    channel.write(bbuf)
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryFiles(outFileName).cache()
+    inRdd.foreach{
+      curData: (String, PortableDataStream) =>
+       curData._2.toArray() // force the file to read
+    }
+    val mappedRdd = inRdd.map {
+      curData: (String, PortableDataStream) =>
+        (curData._2.getPath(), curData._2)
+    }
+    val (infile: String, indata: PortableDataStream) = mappedRdd.collect.head
+
+    // Try reading the output back as an object file
+
+    assert(indata.toArray === testOutput)
+  }
+
+  test("portabledatastream persist disk storage") {
+    sc = new SparkContext("local", "test")
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    channel.write(bbuf)
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryFiles(outFileName).persist(StorageLevel.DISK_ONLY)
+    inRdd.foreach{
+      curData: (String, PortableDataStream) =>
+        curData._2.toArray() // force the file to read
+    }
+    val mappedRdd = inRdd.map {
+      curData: (String, PortableDataStream) =>
+        (curData._2.getPath(), curData._2)
+    }
+    val (infile: String, indata: PortableDataStream) = mappedRdd.collect.head
+
+    // Try reading the output back as an object file
+
+    assert(indata.toArray === testOutput)
+  }
+
+  test("portabledatastream flatmap tests") {
+    sc = new SparkContext("local", "test")
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val numOfCopies = 3
+    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    channel.write(bbuf)
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryFiles(outFileName)
+    val mappedRdd = inRdd.map {
+      curData: (String, PortableDataStream) =>
+        (curData._2.getPath(), curData._2)
+    }
+    val copyRdd = mappedRdd.flatMap {
+      curData: (String, PortableDataStream) =>
+        for(i <- 1 to numOfCopies) yield (i, curData._2)
+    }
+
+    val copyArr: Array[(Int, PortableDataStream)] = copyRdd.collect()
+
+    // Try reading the output back as an object file
+    assert(copyArr.length == numOfCopies)
+    copyArr.foreach{
+      cEntry: (Int, PortableDataStream) =>
+        assert(cEntry._2.toArray === testOutput)
+    }
+
+  }
+
+  test("fixed record length binary file as byte array") {
+    // a fixed length of 6 bytes
+
+    sc = new SparkContext("local", "test")
+
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val testOutputCopies = 10
+
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    for(i <- 1 to testOutputCopies) {
+      val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+      channel.write(bbuf)
+    }
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryRecords(outFileName, testOutput.length)
+    // make sure there are enough elements
+    assert(inRdd.count == testOutputCopies)
+
+    // now just compare the first one
+    val indata: Array[Byte] = inRdd.collect.head
+    assert(indata === testOutput)
+  }
+
+  test ("negative binary record length should raise an exception") {
+    // a fixed length of 6 bytes
+    sc = new SparkContext("local", "test")
+
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val testOutputCopies = 10
+
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    for(i <- 1 to testOutputCopies) {
+      val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+      channel.write(bbuf)
+    }
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryRecords(outFileName, -1)
+
+    intercept[SparkException] {
+      inRdd.count
+    }
+  }
+
   test("file caching") {
     sc = new SparkContext("local", "test")
     val out = new FileWriter(tempDir + "/input")
@@ -293,4 +502,32 @@ class FileSuite extends FunSuite with LocalSparkContext {
     randomRDD.saveAsNewAPIHadoopDataset(job.getConfiguration)
     assert(new File(tempDir.getPath + "/outputDataset_new/part-r-00000").exists() === true)
   }
+
+  test("Get input files via old Hadoop API") {
+    sc = new SparkContext("local", "test")
+    val outDir = new File(tempDir, "output").getAbsolutePath
+    sc.makeRDD(1 to 4, 2).saveAsTextFile(outDir)
+
+    val inputPaths =
+      sc.hadoopFile(outDir, classOf[TextInputFormat], classOf[LongWritable], classOf[Text])
+        .asInstanceOf[HadoopRDD[_, _]]
+        .mapPartitionsWithInputSplit { (split, part) =>
+          Iterator(split.asInstanceOf[FileSplit].getPath.toUri.getPath)
+        }.collect()
+    assert(inputPaths.toSet === Set(s"$outDir/part-00000", s"$outDir/part-00001"))
+  }
+
+  test("Get input files via new Hadoop API") {
+    sc = new SparkContext("local", "test")
+    val outDir = new File(tempDir, "output").getAbsolutePath
+    sc.makeRDD(1 to 4, 2).saveAsTextFile(outDir)
+
+    val inputPaths =
+      sc.newAPIHadoopFile(outDir, classOf[NewTextInputFormat], classOf[LongWritable], classOf[Text])
+        .asInstanceOf[NewHadoopRDD[_, _]]
+        .mapPartitionsWithInputSplit { (split, part) =>
+          Iterator(split.asInstanceOf[NewFileSplit].getPath.toUri.getPath)
+        }.collect()
+    assert(inputPaths.toSet === Set(s"$outDir/part-00000", s"$outDir/part-00001"))
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/FutureActionSuite.scala b/core/src/test/scala/org/apache/spark/FutureActionSuite.scala
new file mode 100644
index 0000000000000..db9c25fc457a4
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/FutureActionSuite.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import scala.concurrent.Await
+import scala.concurrent.duration.Duration
+
+import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
+
+import org.apache.spark.SparkContext._
+
+class FutureActionSuite extends FunSuite with BeforeAndAfter with Matchers with LocalSparkContext {
+
+  before {
+    sc = new SparkContext("local", "FutureActionSuite")
+  }
+
+  test("simple async action") {
+    val rdd = sc.parallelize(1 to 10, 2)
+    val job = rdd.countAsync()
+    val res = Await.result(job, Duration.Inf)
+    res should be (10)
+    job.jobIds.size should be (1)
+  }
+
+  test("complex async action") {
+    val rdd = sc.parallelize(1 to 15, 3)
+    val job = rdd.takeAsync(10)
+    val res = Await.result(job, Duration.Inf)
+    res should be (1 to 10)
+    job.jobIds.size should be (2)
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/HashShuffleSuite.scala b/core/src/test/scala/org/apache/spark/HashShuffleSuite.scala
new file mode 100644
index 0000000000000..19180e88ebe0a
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/HashShuffleSuite.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.scalatest.BeforeAndAfterAll
+
+class HashShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {
+
+  // This test suite should run all tests in ShuffleSuite with hash-based shuffle.
+
+  override def beforeAll() {
+    conf.set("spark.shuffle.manager", "hash")
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 9702838085627..d27880f4bc32f 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -23,32 +23,13 @@ import akka.actor._
 import akka.testkit.TestActorRef
 import org.scalatest.FunSuite
 
-import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.scheduler.{CompressedMapStatus, MapStatus}
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.AkkaUtils
 
-class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
+class MapOutputTrackerSuite extends FunSuite {
   private val conf = new SparkConf
-  test("compressSize") {
-    assert(MapOutputTracker.compressSize(0L) === 0)
-    assert(MapOutputTracker.compressSize(1L) === 1)
-    assert(MapOutputTracker.compressSize(2L) === 8)
-    assert(MapOutputTracker.compressSize(10L) === 25)
-    assert((MapOutputTracker.compressSize(1000000L) & 0xFF) === 145)
-    assert((MapOutputTracker.compressSize(1000000000L) & 0xFF) === 218)
-    // This last size is bigger than we can encode in a byte, so check that we just return 255
-    assert((MapOutputTracker.compressSize(1000000000000000000L) & 0xFF) === 255)
-  }
-
-  test("decompressSize") {
-    assert(MapOutputTracker.decompressSize(0) === 0)
-    for (size <- Seq(2L, 10L, 100L, 50000L, 1000000L, 1000000000L)) {
-      val size2 = MapOutputTracker.decompressSize(MapOutputTracker.compressSize(size))
-      assert(size2 >= 0.99 * size && size2 <= 1.11 * size,
-        "size " + size + " decompressed to " + size2 + ", which is out of range")
-    }
-  }
 
   test("master start and stop") {
     val actorSystem = ActorSystem("test")
@@ -56,6 +37,7 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     tracker.trackerActor =
       actorSystem.actorOf(Props(new MapOutputTrackerMasterActor(tracker, conf)))
     tracker.stop()
+    actorSystem.shutdown()
   }
 
   test("master register shuffle and fetch") {
@@ -65,18 +47,17 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
       actorSystem.actorOf(Props(new MapOutputTrackerMasterActor(tracker, conf)))
     tracker.registerShuffle(10, 2)
     assert(tracker.containsShuffle(10))
-    val compressedSize1000 = MapOutputTracker.compressSize(1000L)
-    val compressedSize10000 = MapOutputTracker.compressSize(10000L)
-    val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
-    val size10000 = MapOutputTracker.decompressSize(compressedSize10000)
-    tracker.registerMapOutput(10, 0, new MapStatus(BlockManagerId("a", "hostA", 1000, 0),
-        Array(compressedSize1000, compressedSize10000)))
-    tracker.registerMapOutput(10, 1, new MapStatus(BlockManagerId("b", "hostB", 1000, 0),
-        Array(compressedSize10000, compressedSize1000)))
+    val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
+    val size10000 = MapStatus.decompressSize(MapStatus.compressSize(10000L))
+    tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
+        Array(1000L, 10000L)))
+    tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
+        Array(10000L, 1000L)))
     val statuses = tracker.getServerStatuses(10, 0)
-    assert(statuses.toSeq === Seq((BlockManagerId("a", "hostA", 1000, 0), size1000),
-                                  (BlockManagerId("b", "hostB", 1000, 0), size10000)))
+    assert(statuses.toSeq === Seq((BlockManagerId("a", "hostA", 1000), size1000),
+                                  (BlockManagerId("b", "hostB", 1000), size10000)))
     tracker.stop()
+    actorSystem.shutdown()
   }
 
   test("master register and unregister shuffle") {
@@ -84,17 +65,20 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     val tracker = new MapOutputTrackerMaster(conf)
     tracker.trackerActor = actorSystem.actorOf(Props(new MapOutputTrackerMasterActor(tracker, conf)))
     tracker.registerShuffle(10, 2)
-    val compressedSize1000 = MapOutputTracker.compressSize(1000L)
-    val compressedSize10000 = MapOutputTracker.compressSize(10000L)
-    tracker.registerMapOutput(10, 0, new MapStatus(BlockManagerId("a", "hostA", 1000, 0),
+    val compressedSize1000 = MapStatus.compressSize(1000L)
+    val compressedSize10000 = MapStatus.compressSize(10000L)
+    tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
       Array(compressedSize1000, compressedSize10000)))
-    tracker.registerMapOutput(10, 1, new MapStatus(BlockManagerId("b", "hostB", 1000, 0),
+    tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
       Array(compressedSize10000, compressedSize1000)))
     assert(tracker.containsShuffle(10))
     assert(tracker.getServerStatuses(10, 0).nonEmpty)
     tracker.unregisterShuffle(10)
     assert(!tracker.containsShuffle(10))
     assert(tracker.getServerStatuses(10, 0).isEmpty)
+
+    tracker.stop()
+    actorSystem.shutdown()
   }
 
   test("master register shuffle and unregister map output and fetch") {
@@ -103,21 +87,24 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     tracker.trackerActor =
       actorSystem.actorOf(Props(new MapOutputTrackerMasterActor(tracker, conf)))
     tracker.registerShuffle(10, 2)
-    val compressedSize1000 = MapOutputTracker.compressSize(1000L)
-    val compressedSize10000 = MapOutputTracker.compressSize(10000L)
-    tracker.registerMapOutput(10, 0, new MapStatus(BlockManagerId("a", "hostA", 1000, 0),
+    val compressedSize1000 = MapStatus.compressSize(1000L)
+    val compressedSize10000 = MapStatus.compressSize(10000L)
+    tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
         Array(compressedSize1000, compressedSize1000, compressedSize1000)))
-    tracker.registerMapOutput(10, 1, new MapStatus(BlockManagerId("b", "hostB", 1000, 0),
+    tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
         Array(compressedSize10000, compressedSize1000, compressedSize1000)))
 
     // As if we had two simultaneous fetch failures
-    tracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0))
-    tracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0))
+    tracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000))
+    tracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000))
 
     // The remaining reduce task might try to grab the output despite the shuffle failure;
     // this should cause it to fail, and the scheduler will ignore the failure due to the
     // stage already being aborted.
     intercept[FetchFailedException] { tracker.getServerStatuses(10, 1) }
+
+    tracker.stop()
+    actorSystem.shutdown()
   }
 
   test("remote fetch") {
@@ -142,22 +129,26 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     slaveTracker.updateEpoch(masterTracker.getEpoch)
     intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
 
-    val compressedSize1000 = MapOutputTracker.compressSize(1000L)
-    val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
-    masterTracker.registerMapOutput(10, 0, new MapStatus(
-      BlockManagerId("a", "hostA", 1000, 0), Array(compressedSize1000)))
+    val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
+    masterTracker.registerMapOutput(10, 0, MapStatus(
+      BlockManagerId("a", "hostA", 1000), Array(1000L)))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
     assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
-      Seq((BlockManagerId("a", "hostA", 1000, 0), size1000)))
+      Seq((BlockManagerId("a", "hostA", 1000), size1000)))
 
-    masterTracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0))
+    masterTracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
     intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
 
     // failure should be cached
     intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+
+    masterTracker.stop()
+    slaveTracker.stop()
+    actorSystem.shutdown()
+    slaveSystem.shutdown()
   }
 
   test("remote fetch below akka frame size") {
@@ -168,14 +159,17 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     val masterTracker = new MapOutputTrackerMaster(conf)
     val actorSystem = ActorSystem("test")
     val actorRef = TestActorRef[MapOutputTrackerMasterActor](
-      new MapOutputTrackerMasterActor(masterTracker, newConf))(actorSystem)
+      Props(new MapOutputTrackerMasterActor(masterTracker, newConf)))(actorSystem)
     val masterActor = actorRef.underlyingActor
 
     // Frame size should be ~123B, and no exception should be thrown
     masterTracker.registerShuffle(10, 1)
-    masterTracker.registerMapOutput(10, 0, new MapStatus(
-      BlockManagerId("88", "mph", 1000, 0), Array.fill[Byte](10)(0)))
+    masterTracker.registerMapOutput(10, 0, MapStatus(
+      BlockManagerId("88", "mph", 1000), Array.fill[Long](10)(0)))
     masterActor.receive(GetMapOutputStatuses(10))
+
+//    masterTracker.stop() // this throws an exception
+    actorSystem.shutdown()
   }
 
   test("remote fetch exceeds akka frame size") {
@@ -186,7 +180,7 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     val masterTracker = new MapOutputTrackerMaster(conf)
     val actorSystem = ActorSystem("test")
     val actorRef = TestActorRef[MapOutputTrackerMasterActor](
-      new MapOutputTrackerMasterActor(masterTracker, newConf))(actorSystem)
+      Props(new MapOutputTrackerMasterActor(masterTracker, newConf)))(actorSystem)
     val masterActor = actorRef.underlyingActor
 
     // Frame size should be ~1.1MB, and MapOutputTrackerMasterActor should throw exception.
@@ -194,9 +188,12 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     // being sent.
     masterTracker.registerShuffle(20, 100)
     (0 until 100).foreach { i =>
-      masterTracker.registerMapOutput(20, i, new MapStatus(
-        BlockManagerId("999", "mps", 1000, 0), Array.fill[Byte](4000000)(0)))
+      masterTracker.registerMapOutput(20, i, new CompressedMapStatus(
+        BlockManagerId("999", "mps", 1000), Array.fill[Long](4000000)(0)))
     }
     intercept[SparkException] { masterActor.receive(GetMapOutputStatuses(20)) }
+
+//    masterTracker.stop() // this throws an exception
+    actorSystem.shutdown()
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
index 7c30626a0c421..646ede30ae6ff 100644
--- a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
+++ b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark
 
+import scala.collection.mutable.ArrayBuffer
 import scala.math.abs
 
 import org.scalatest.{FunSuite, PrivateMethodTester}
@@ -52,14 +53,12 @@ class PartitioningSuite extends FunSuite with SharedSparkContext with PrivateMet
 
     assert(p2 === p2)
     assert(p4 === p4)
-    assert(p2 != p4)
-    assert(p4 != p2)
+    assert(p2 === p4)
     assert(p4 === anotherP4)
     assert(anotherP4 === p4)
     assert(descendingP2 === descendingP2)
     assert(descendingP4 === descendingP4)
-    assert(descendingP2 != descendingP4)
-    assert(descendingP4 != descendingP2)
+    assert(descendingP2 === descendingP4)
     assert(p2 != descendingP2)
     assert(p4 != descendingP4)
     assert(descendingP2 != p2)
@@ -91,6 +90,74 @@ class PartitioningSuite extends FunSuite with SharedSparkContext with PrivateMet
     }
   }
 
+  test("RangePartitioner for keys that are not Comparable (but with Ordering)") {
+    // Row does not extend Comparable, but has an implicit Ordering defined.
+    implicit object RowOrdering extends Ordering[Row] {
+      override def compare(x: Row, y: Row) = x.value - y.value
+    }
+
+    val rdd = sc.parallelize(1 to 4500).map(x => (Row(x), Row(x)))
+    val partitioner = new RangePartitioner(1500, rdd)
+    partitioner.getPartition(Row(100))
+  }
+
+  test("RangPartitioner.sketch") {
+    val rdd = sc.makeRDD(0 until 20, 20).flatMap { i =>
+      val random = new java.util.Random(i)
+      Iterator.fill(i)(random.nextDouble())
+    }.cache()
+    val sampleSizePerPartition = 10
+    val (count, sketched) = RangePartitioner.sketch(rdd, sampleSizePerPartition)
+    assert(count === rdd.count())
+    sketched.foreach { case (idx, n, sample) =>
+      assert(n === idx)
+      assert(sample.size === math.min(n, sampleSizePerPartition))
+    }
+  }
+
+  test("RangePartitioner.determineBounds") {
+    assert(RangePartitioner.determineBounds(ArrayBuffer.empty[(Int, Float)], 10).isEmpty,
+      "Bounds on an empty candidates set should be empty.")
+    val candidates = ArrayBuffer(
+      (0.7, 2.0f), (0.1, 1.0f), (0.4, 1.0f), (0.3, 1.0f), (0.2, 1.0f), (0.5, 1.0f), (1.0, 3.0f))
+    assert(RangePartitioner.determineBounds(candidates, 3) === Array(0.4, 0.7))
+  }
+
+  test("RangePartitioner should run only one job if data is roughly balanced") {
+    val rdd = sc.makeRDD(0 until 20, 20).flatMap { i =>
+      val random = new java.util.Random(i)
+      Iterator.fill(5000 * i)((random.nextDouble() + i, i))
+    }.cache()
+    for (numPartitions <- Seq(10, 20, 40)) {
+      val partitioner = new RangePartitioner(numPartitions, rdd)
+      assert(partitioner.numPartitions === numPartitions)
+      val counts = rdd.keys.map(key => partitioner.getPartition(key)).countByValue().values
+      assert(counts.max < 3.0 * counts.min)
+    }
+  }
+
+  test("RangePartitioner should work well on unbalanced data") {
+    val rdd = sc.makeRDD(0 until 20, 20).flatMap { i =>
+      val random = new java.util.Random(i)
+      Iterator.fill(20 * i * i * i)((random.nextDouble() + i, i))
+    }.cache()
+    for (numPartitions <- Seq(2, 4, 8)) {
+      val partitioner = new RangePartitioner(numPartitions, rdd)
+      assert(partitioner.numPartitions === numPartitions)
+      val counts = rdd.keys.map(key => partitioner.getPartition(key)).countByValue().values
+      assert(counts.max < 3.0 * counts.min)
+    }
+  }
+
+  test("RangePartitioner should return a single partition for empty RDDs") {
+    val empty1 = sc.emptyRDD[(Int, Double)]
+    val partitioner1 = new RangePartitioner(0, empty1)
+    assert(partitioner1.numPartitions === 1)
+    val empty2 = sc.makeRDD(0 until 2, 2).flatMap(i => Seq.empty[(Int, Double)])
+    val partitioner2 = new RangePartitioner(2, empty2)
+    assert(partitioner2.numPartitions === 1)
+  }
+
   test("HashPartitioner not equal to RangePartitioner") {
     val rdd = sc.parallelize(1 to 10).map(x => (x, x))
     val rangeP2 = new RangePartitioner(2, rdd)
@@ -126,11 +193,13 @@ class PartitioningSuite extends FunSuite with SharedSparkContext with PrivateMet
     assert(grouped2.join(grouped4).partitioner === grouped4.partitioner)
     assert(grouped2.leftOuterJoin(grouped4).partitioner === grouped4.partitioner)
     assert(grouped2.rightOuterJoin(grouped4).partitioner === grouped4.partitioner)
+    assert(grouped2.fullOuterJoin(grouped4).partitioner === grouped4.partitioner)
     assert(grouped2.cogroup(grouped4).partitioner === grouped4.partitioner)
 
     assert(grouped2.join(reduced2).partitioner === grouped2.partitioner)
     assert(grouped2.leftOuterJoin(reduced2).partitioner === grouped2.partitioner)
     assert(grouped2.rightOuterJoin(reduced2).partitioner === grouped2.partitioner)
+    assert(grouped2.fullOuterJoin(reduced2).partitioner === grouped2.partitioner)
     assert(grouped2.cogroup(reduced2).partitioner === grouped2.partitioner)
 
     assert(grouped2.map(_ => 1).partitioner === None)
@@ -151,6 +220,7 @@ class PartitioningSuite extends FunSuite with SharedSparkContext with PrivateMet
     assert(intercept[SparkException]{ arrPairs.join(arrPairs) }.getMessage.contains("array"))
     assert(intercept[SparkException]{ arrPairs.leftOuterJoin(arrPairs) }.getMessage.contains("array"))
     assert(intercept[SparkException]{ arrPairs.rightOuterJoin(arrPairs) }.getMessage.contains("array"))
+    assert(intercept[SparkException]{ arrPairs.fullOuterJoin(arrPairs) }.getMessage.contains("array"))
     assert(intercept[SparkException]{ arrPairs.groupByKey() }.getMessage.contains("array"))
     assert(intercept[SparkException]{ arrPairs.countByKey() }.getMessage.contains("array"))
     assert(intercept[SparkException]{ arrPairs.countByKeyApprox(1) }.getMessage.contains("array"))
@@ -177,3 +247,6 @@ class PartitioningSuite extends FunSuite with SharedSparkContext with PrivateMet
     // Add other tests here for classes that should be able to handle empty partitions correctly
   }
 }
+
+
+private sealed case class Row(value: Int)
diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
index e39093e24d68a..fcca0867b8072 100644
--- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
@@ -31,7 +31,7 @@ class SecurityManagerSuite extends FunSuite {
     conf.set("spark.ui.view.acls", "user1,user2")
     val securityManager = new SecurityManager(conf);
     assert(securityManager.isAuthenticationEnabled() === true)
-    assert(securityManager.uiAclsEnabled() === true)
+    assert(securityManager.aclsEnabled() === true)
     assert(securityManager.checkUIViewPermissions("user1") === true)
     assert(securityManager.checkUIViewPermissions("user2") === true)
     assert(securityManager.checkUIViewPermissions("user3") === false)
@@ -41,17 +41,17 @@ class SecurityManagerSuite extends FunSuite {
     val conf = new SparkConf
     conf.set("spark.ui.view.acls", "user1,user2")
     val securityManager = new SecurityManager(conf);
-    securityManager.setUIAcls(true)
-    assert(securityManager.uiAclsEnabled() === true)
-    securityManager.setUIAcls(false)
-    assert(securityManager.uiAclsEnabled() === false)
+    securityManager.setAcls(true)
+    assert(securityManager.aclsEnabled() === true)
+    securityManager.setAcls(false)
+    assert(securityManager.aclsEnabled() === false)
 
     // acls are off so doesn't matter what view acls set to
     assert(securityManager.checkUIViewPermissions("user4") === true)
 
-    securityManager.setUIAcls(true)
-    assert(securityManager.uiAclsEnabled() === true)
-    securityManager.setViewAcls(ArrayBuffer[String]("user5"), "user6,user7")
+    securityManager.setAcls(true)
+    assert(securityManager.aclsEnabled() === true)
+    securityManager.setViewAcls(Set[String]("user5"), "user6,user7")
     assert(securityManager.checkUIViewPermissions("user1") === false)
     assert(securityManager.checkUIViewPermissions("user5") === true)
     assert(securityManager.checkUIViewPermissions("user6") === true)
@@ -59,5 +59,72 @@ class SecurityManagerSuite extends FunSuite {
     assert(securityManager.checkUIViewPermissions("user8") === false)
     assert(securityManager.checkUIViewPermissions(null) === true)
   }
+
+  test("set security modify acls") {
+    val conf = new SparkConf
+    conf.set("spark.modify.acls", "user1,user2")
+
+    val securityManager = new SecurityManager(conf);
+    securityManager.setAcls(true)
+    assert(securityManager.aclsEnabled() === true)
+    securityManager.setAcls(false)
+    assert(securityManager.aclsEnabled() === false)
+
+    // acls are off so doesn't matter what view acls set to
+    assert(securityManager.checkModifyPermissions("user4") === true)
+
+    securityManager.setAcls(true)
+    assert(securityManager.aclsEnabled() === true)
+    securityManager.setModifyAcls(Set("user5"), "user6,user7")
+    assert(securityManager.checkModifyPermissions("user1") === false)
+    assert(securityManager.checkModifyPermissions("user5") === true)
+    assert(securityManager.checkModifyPermissions("user6") === true)
+    assert(securityManager.checkModifyPermissions("user7") === true)
+    assert(securityManager.checkModifyPermissions("user8") === false)
+    assert(securityManager.checkModifyPermissions(null) === true)
+  }
+
+  test("set security admin acls") {
+    val conf = new SparkConf
+    conf.set("spark.admin.acls", "user1,user2")
+    conf.set("spark.ui.view.acls", "user3")
+    conf.set("spark.modify.acls", "user4")
+
+    val securityManager = new SecurityManager(conf);
+    securityManager.setAcls(true)
+    assert(securityManager.aclsEnabled() === true)
+
+    assert(securityManager.checkModifyPermissions("user1") === true)
+    assert(securityManager.checkModifyPermissions("user2") === true)
+    assert(securityManager.checkModifyPermissions("user4") === true)
+    assert(securityManager.checkModifyPermissions("user3") === false)
+    assert(securityManager.checkModifyPermissions("user5") === false)
+    assert(securityManager.checkModifyPermissions(null) === true)
+    assert(securityManager.checkUIViewPermissions("user1") === true)
+    assert(securityManager.checkUIViewPermissions("user2") === true)
+    assert(securityManager.checkUIViewPermissions("user3") === true)
+    assert(securityManager.checkUIViewPermissions("user4") === false)
+    assert(securityManager.checkUIViewPermissions("user5") === false)
+    assert(securityManager.checkUIViewPermissions(null) === true)
+
+    securityManager.setAdminAcls("user6")
+    securityManager.setViewAcls(Set[String]("user8"), "user9")
+    securityManager.setModifyAcls(Set("user11"), "user9")
+    assert(securityManager.checkModifyPermissions("user6") === true)
+    assert(securityManager.checkModifyPermissions("user11") === true)
+    assert(securityManager.checkModifyPermissions("user9") === true)
+    assert(securityManager.checkModifyPermissions("user1") === false)
+    assert(securityManager.checkModifyPermissions("user4") === false)
+    assert(securityManager.checkModifyPermissions(null) === true)
+    assert(securityManager.checkUIViewPermissions("user6") === true)
+    assert(securityManager.checkUIViewPermissions("user8") === true)
+    assert(securityManager.checkUIViewPermissions("user9") === true)
+    assert(securityManager.checkUIViewPermissions("user1") === false)
+    assert(securityManager.checkUIViewPermissions("user3") === false)
+    assert(securityManager.checkUIViewPermissions(null) === true)
+
+  }
+
+
 }
 
diff --git a/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
index 47df00050c1e2..d78c99c2e1e06 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
@@ -24,10 +24,6 @@ class ShuffleNettySuite extends ShuffleSuite with BeforeAndAfterAll {
   // This test suite should run all tests in ShuffleSuite with Netty shuffle mode.
 
   override def beforeAll() {
-    System.setProperty("spark.shuffle.use.netty", "true")
-  }
-
-  override def afterAll() {
-    System.setProperty("spark.shuffle.use.netty", "false")
+    conf.set("spark.shuffle.blockTransferService", "netty")
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index c4f2f7e34f4d5..cda942e15a704 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -26,14 +26,18 @@ import org.apache.spark.rdd.{CoGroupedRDD, OrderedRDDFunctions, RDD, ShuffledRDD
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.util.MutablePair
 
-class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
+abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
 
   val conf = new SparkConf(loadDefaults = false)
 
+  // Ensure that the DAGScheduler doesn't retry stages whose fetches fail, so that we accurately
+  // test that the shuffle works (rather than retrying until all blocks are local to one Executor).
+  conf.set("spark.test.noStageRetry", "true")
+
   test("groupByKey without compression") {
     try {
       System.setProperty("spark.shuffle.compress", "false")
-      sc = new SparkContext("local", "test")
+      sc = new SparkContext("local", "test", conf)
       val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)), 4)
       val groups = pairs.groupByKey(4).collect()
       assert(groups.size === 2)
@@ -47,7 +51,7 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
   }
 
   test("shuffle non-zero block size") {
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     val NUM_BLOCKS = 3
 
     val a = sc.parallelize(1 to 10, 2)
@@ -58,8 +62,7 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
     // default Java serializer cannot handle the non serializable class.
     val c = new ShuffledRDD[Int,
       NonJavaSerializableClass,
-      NonJavaSerializableClass,
-      (Int, NonJavaSerializableClass)](b, new HashPartitioner(NUM_BLOCKS))
+      NonJavaSerializableClass](b, new HashPartitioner(NUM_BLOCKS))
     c.setSerializer(new KryoSerializer(conf))
     val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]].shuffleId
 
@@ -74,7 +77,7 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
 
   test("shuffle serializer") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     val a = sc.parallelize(1 to 10, 2)
     val b = a.map { x =>
       (x, new NonJavaSerializableClass(x * 2))
@@ -83,15 +86,14 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
     // default Java serializer cannot handle the non serializable class.
     val c = new ShuffledRDD[Int,
       NonJavaSerializableClass,
-      NonJavaSerializableClass,
-      (Int, NonJavaSerializableClass)](b, new HashPartitioner(3))
+      NonJavaSerializableClass](b, new HashPartitioner(3))
     c.setSerializer(new KryoSerializer(conf))
     assert(c.count === 10)
   }
 
   test("zero sized blocks") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
 
     // 10 partitions from 4 keys
     val NUM_BLOCKS = 10
@@ -100,7 +102,7 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
 
     // NOTE: The default Java serializer doesn't create zero-sized blocks.
     //       So, use Kryo
-    val c = new ShuffledRDD[Int, Int, Int, (Int, Int)](b, new HashPartitioner(10))
+    val c = new ShuffledRDD[Int, Int, Int](b, new HashPartitioner(10))
       .setSerializer(new KryoSerializer(conf))
 
     val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]].shuffleId
@@ -118,7 +120,7 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
 
   test("zero sized blocks without kryo") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
 
     // 10 partitions from 4 keys
     val NUM_BLOCKS = 10
@@ -126,7 +128,7 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
     val b = a.map(x => (x, x*2))
 
     // NOTE: The default Java serializer should create zero-sized blocks
-    val c = new ShuffledRDD[Int, Int, Int, (Int, Int)](b, new HashPartitioner(10))
+    val c = new ShuffledRDD[Int, Int, Int](b, new HashPartitioner(10))
 
     val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]].shuffleId
     assert(c.count === 4)
@@ -141,42 +143,44 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
     assert(nonEmptyBlocks.size <= 4)
   }
 
-  test("shuffle using mutable pairs") {
+  test("shuffle on mutable pairs") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
     val data = Array(p(1, 1), p(1, 2), p(1, 3), p(2, 1))
     val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data, 2)
-    val results = new ShuffledRDD[Int, Int, Int, MutablePair[Int, Int]](pairs,
+    val results = new ShuffledRDD[Int, Int, Int](pairs,
       new HashPartitioner(2)).collect()
 
-    data.foreach { pair => results should contain (pair) }
+    data.foreach { pair => results should contain ((pair._1, pair._2)) }
   }
 
-  test("sorting using mutable pairs") {
+  test("sorting on mutable pairs") {
     // This is not in SortingSuite because of the local cluster setup.
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
     val data = Array(p(1, 11), p(3, 33), p(100, 100), p(2, 22))
     val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data, 2)
     val results = new OrderedRDDFunctions[Int, Int, MutablePair[Int, Int]](pairs)
       .sortByKey().collect()
-    results(0) should be (p(1, 11))
-    results(1) should be (p(2, 22))
-    results(2) should be (p(3, 33))
-    results(3) should be (p(100, 100))
+    results(0) should be ((1, 11))
+    results(1) should be ((2, 22))
+    results(2) should be ((3, 33))
+    results(3) should be ((100, 100))
   }
 
   test("cogroup using mutable pairs") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
     val data1 = Seq(p(1, 1), p(1, 2), p(1, 3), p(2, 1))
     val data2 = Seq(p(1, "11"), p(1, "12"), p(2, "22"), p(3, "3"))
     val pairs1: RDD[MutablePair[Int, Int]] = sc.parallelize(data1, 2)
     val pairs2: RDD[MutablePair[Int, String]] = sc.parallelize(data2, 2)
-    val results = new CoGroupedRDD[Int](Seq(pairs1, pairs2), new HashPartitioner(2)).collectAsMap()
+    val results = new CoGroupedRDD[Int](Seq(pairs1, pairs2), new HashPartitioner(2))
+      .map(p => (p._1, p._2.map(_.toArray)))
+      .collectAsMap()
 
     assert(results(1)(0).length === 3)
     assert(results(1)(0).contains(1))
@@ -195,7 +199,7 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
 
   test("subtract mutable pairs") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
     val data1 = Seq(p(1, 1), p(1, 2), p(1, 3), p(2, 1), p(3, 33))
     val data2 = Seq(p(1, "11"), p(1, "12"), p(2, "22"))
@@ -209,11 +213,8 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
 
   test("sort with Java non serializable class - Kryo") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    val conf = new SparkConf()
-      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-      .setAppName("test")
-      .setMaster("local-cluster[2,1,512]")
-    sc = new SparkContext(conf)
+    val myConf = conf.clone().set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", myConf)
     val a = sc.parallelize(1 to 10, 2)
     val b = a.map { x =>
       (new NonJavaSerializableClass(x), x)
@@ -226,10 +227,7 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
 
   test("sort with Java non serializable class - Java") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    val conf = new SparkConf()
-      .setAppName("test")
-      .setMaster("local-cluster[2,1,512]")
-    sc = new SparkContext(conf)
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     val a = sc.parallelize(1 to 10, 2)
     val b = a.map { x =>
       (new NonJavaSerializableClass(x), x)
@@ -240,7 +238,31 @@ class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
     }
 
     assert(thrown.getClass === classOf[SparkException])
-    assert(thrown.getMessage.contains("NotSerializableException"))
+    assert(thrown.getMessage.toLowerCase.contains("serializable"))
+  }
+
+  test("shuffle with different compression settings (SPARK-3426)") {
+    for (
+      shuffleSpillCompress <- Set(true, false);
+      shuffleCompress <- Set(true, false)
+    ) {
+      val conf = new SparkConf()
+        .setAppName("test")
+        .setMaster("local")
+        .set("spark.shuffle.spill.compress", shuffleSpillCompress.toString)
+        .set("spark.shuffle.compress", shuffleCompress.toString)
+        .set("spark.shuffle.memoryFraction", "0.001")
+      resetSparkContext()
+      sc = new SparkContext(conf)
+      try {
+        sc.parallelize(0 until 100000).map(i => (i / 4, i)).groupByKey().collect()
+      } catch {
+        case e: Exception =>
+          val errMsg = s"Failed with spark.shuffle.spill.compress=$shuffleSpillCompress," +
+            s" spark.shuffle.compress=$shuffleCompress"
+          throw new Exception(errMsg, e)
+      }
+    }
   }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
new file mode 100644
index 0000000000000..63358172ea1f4
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.scalatest.BeforeAndAfterAll
+
+class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {
+
+  // This test suite should run all tests in ShuffleSuite with sort-based shuffle.
+
+  override def beforeAll() {
+    conf.set("spark.shuffle.manager", "sort")
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 87e9012622456..5d018ea9868a7 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -18,6 +18,8 @@
 package org.apache.spark
 
 import org.scalatest.FunSuite
+import org.apache.spark.serializer.{KryoRegistrator, KryoSerializer}
+import com.esotericsoftware.kryo.Kryo
 
 class SparkConfSuite extends FunSuite with LocalSparkContext {
   test("loading from system properties") {
@@ -133,4 +135,64 @@ class SparkConfSuite extends FunSuite with LocalSparkContext {
       System.clearProperty("spark.test.a.b.c")
     }
   }
+
+  test("register kryo classes through registerKryoClasses") {
+    val conf = new SparkConf().set("spark.kryo.registrationRequired", "true")
+
+    conf.registerKryoClasses(Array(classOf[Class1], classOf[Class2]))
+    assert(conf.get("spark.kryo.classesToRegister") ===
+      classOf[Class1].getName + "," + classOf[Class2].getName)
+
+    conf.registerKryoClasses(Array(classOf[Class3]))
+    assert(conf.get("spark.kryo.classesToRegister") ===
+      classOf[Class1].getName + "," + classOf[Class2].getName + "," + classOf[Class3].getName)
+
+    conf.registerKryoClasses(Array(classOf[Class2]))
+    assert(conf.get("spark.kryo.classesToRegister") ===
+      classOf[Class1].getName + "," + classOf[Class2].getName + "," + classOf[Class3].getName)
+
+    // Kryo doesn't expose a way to discover registered classes, but at least make sure this doesn't
+    // blow up.
+    val serializer = new KryoSerializer(conf)
+    serializer.newInstance().serialize(new Class1())
+    serializer.newInstance().serialize(new Class2())
+    serializer.newInstance().serialize(new Class3())
+  }
+
+  test("register kryo classes through registerKryoClasses and custom registrator") {
+    val conf = new SparkConf().set("spark.kryo.registrationRequired", "true")
+
+    conf.registerKryoClasses(Array(classOf[Class1]))
+    assert(conf.get("spark.kryo.classesToRegister") === classOf[Class1].getName)
+
+    conf.set("spark.kryo.registrator", classOf[CustomRegistrator].getName)
+
+    // Kryo doesn't expose a way to discover registered classes, but at least make sure this doesn't
+    // blow up.
+    val serializer = new KryoSerializer(conf)
+    serializer.newInstance().serialize(new Class1())
+    serializer.newInstance().serialize(new Class2())
+  }
+
+  test("register kryo classes through conf") {
+    val conf = new SparkConf().set("spark.kryo.registrationRequired", "true")
+    conf.set("spark.kryo.classesToRegister", "java.lang.StringBuffer")
+    conf.set("spark.serializer", classOf[KryoSerializer].getName)
+
+    // Kryo doesn't expose a way to discover registered classes, but at least make sure this doesn't
+    // blow up.
+    val serializer = new KryoSerializer(conf)
+    serializer.newInstance().serialize(new StringBuffer())
+  }
+
+}
+
+class Class1 {}
+class Class2 {}
+class Class3 {}
+
+class CustomRegistrator extends KryoRegistrator {
+  def registerClasses(kryo: Kryo) {
+    kryo.register(classOf[Class2])
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
index 1fde4badda949..e6ab538d77bcc 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark
 
 import org.scalatest.{Assertions, FunSuite}
+import org.apache.spark.storage.StorageLevel
 
 class SparkContextInfoSuite extends FunSuite with LocalSparkContext {
   test("getPersistentRDDs only returns RDDs that are marked as cached") {
@@ -35,26 +36,33 @@ class SparkContextInfoSuite extends FunSuite with LocalSparkContext {
   test("getPersistentRDDs returns an immutable map") {
     sc = new SparkContext("local", "test")
     val rdd1 = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
-
     val myRdds = sc.getPersistentRDDs
     assert(myRdds.size === 1)
-    assert(myRdds.values.head === rdd1)
+    assert(myRdds(0) === rdd1)
+    assert(myRdds(0).getStorageLevel === StorageLevel.MEMORY_ONLY)
 
+    // myRdds2 should have 2 RDDs, but myRdds should not change
     val rdd2 = sc.makeRDD(Array(5, 6, 7, 8), 1).cache()
-
-    // getPersistentRDDs should have 2 RDDs, but myRdds should not change
-    assert(sc.getPersistentRDDs.size === 2)
+    val myRdds2 = sc.getPersistentRDDs
+    assert(myRdds2.size === 2)
+    assert(myRdds2(0) === rdd1)
+    assert(myRdds2(1) === rdd2)
+    assert(myRdds2(0).getStorageLevel === StorageLevel.MEMORY_ONLY)
+    assert(myRdds2(1).getStorageLevel === StorageLevel.MEMORY_ONLY)
     assert(myRdds.size === 1)
+    assert(myRdds(0) === rdd1)
+    assert(myRdds(0).getStorageLevel === StorageLevel.MEMORY_ONLY)
   }
 
   test("getRDDStorageInfo only reports on RDDs that actually persist data") {
     sc = new SparkContext("local", "test")
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
-
     assert(sc.getRDDStorageInfo.size === 0)
-
     rdd.collect()
     assert(sc.getRDDStorageInfo.size === 1)
+    assert(sc.getRDDStorageInfo.head.isCached)
+    assert(sc.getRDDStorageInfo.head.memSize > 0)
+    assert(sc.getRDDStorageInfo.head.storageLevel === StorageLevel.MEMORY_ONLY)
   }
 
   test("call sites report correct locations") {
@@ -70,7 +78,7 @@ package object testPackage extends Assertions {
   def runCallSiteTest(sc: SparkContext) {
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2)
     val rddCreationSite = rdd.getCreationSite
-    val curCallSite = sc.getCallSite().short // note: 2 lines after definition of "rdd"
+    val curCallSite = sc.getCallSite().shortForm // note: 2 lines after definition of "rdd"
 
     val rddCreationLine = rddCreationSite match {
       case CALL_SITE_REGEX(func, file, line) => {
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
index 67e3be21c3c93..0390a2e4f1dbb 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
@@ -19,20 +19,21 @@ package org.apache.spark
 
 import org.scalatest.{FunSuite, PrivateMethodTester}
 
-import org.apache.spark.scheduler.{TaskScheduler, TaskSchedulerImpl}
+import org.apache.spark.scheduler.{SchedulerBackend, TaskScheduler, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.{SimrSchedulerBackend, SparkDeploySchedulerBackend}
 import org.apache.spark.scheduler.cluster.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
 import org.apache.spark.scheduler.local.LocalBackend
 
 class SparkContextSchedulerCreationSuite
-  extends FunSuite with PrivateMethodTester with LocalSparkContext with Logging {
+  extends FunSuite with LocalSparkContext with PrivateMethodTester with Logging {
 
   def createTaskScheduler(master: String): TaskSchedulerImpl = {
     // Create local SparkContext to setup a SparkEnv. We don't actually want to start() the
     // real schedulers, so we don't want to create a full SparkContext with the desired scheduler.
     sc = new SparkContext("local", "test")
-    val createTaskSchedulerMethod = PrivateMethod[TaskScheduler]('createTaskScheduler)
-    val sched = SparkContext invokePrivate createTaskSchedulerMethod(sc, master)
+    val createTaskSchedulerMethod =
+      PrivateMethod[Tuple2[SchedulerBackend, TaskScheduler]]('createTaskScheduler)
+    val (_, sched) = SparkContext invokePrivate createTaskSchedulerMethod(sc, master)
     sched.asInstanceOf[TaskSchedulerImpl]
   }
 
@@ -68,6 +69,15 @@ class SparkContextSchedulerCreationSuite
     }
   }
 
+  test("local-*-n-failures") {
+    val sched = createTaskScheduler("local[* ,2]")
+    assert(sched.maxTaskFailures === 2)
+    sched.backend match {
+      case s: LocalBackend => assert(s.totalCores === Runtime.getRuntime.availableProcessors())
+      case _ => fail()
+    }
+  }
+
   test("local-n-failures") {
     val sched = createTaskScheduler("local[4, 2]")
     assert(sched.maxTaskFailures === 2)
@@ -77,6 +87,20 @@ class SparkContextSchedulerCreationSuite
     }
   }
 
+  test("bad-local-n") {
+    val e = intercept[SparkException] {
+      createTaskScheduler("local[2*]")
+    }
+    assert(e.getMessage.contains("Could not parse Master URL"))
+  }
+
+  test("bad-local-n-failures") {
+    val e = intercept[SparkException] {
+      createTaskScheduler("local[2*,4]")
+    }
+    assert(e.getMessage.contains("Could not parse Master URL"))
+  }
+
   test("local-default-parallelism") {
     val defaultParallelism = System.getProperty("spark.default.parallelism")
     System.setProperty("spark.default.parallelism", "16")
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
new file mode 100644
index 0000000000000..31edad1c56c73
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.scalatest.FunSuite
+
+import org.apache.hadoop.io.BytesWritable
+
+class SparkContextSuite extends FunSuite {
+  //Regression test for SPARK-3121
+  test("BytesWritable implicit conversion is correct") {
+    val bytesWritable = new BytesWritable()
+    val inputArray = (1 to 10).map(_.toByte).toArray
+    bytesWritable.set(inputArray, 0, 10)
+    bytesWritable.set(inputArray, 0, 5)
+
+    val converter = SparkContext.bytesWritableConverter()
+    val byteArray = converter.convert(bytesWritable)
+    assert(byteArray.length === 5)
+
+    bytesWritable.set(inputArray, 0, 0)
+    val byteArray2 = converter.convert(bytesWritable)
+    assert(byteArray2.length === 0)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
new file mode 100644
index 0000000000000..8577e4ac7e33e
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import scala.concurrent.duration._
+import scala.language.implicitConversions
+import scala.language.postfixOps
+
+import org.scalatest.{Matchers, FunSuite}
+import org.scalatest.concurrent.Eventually._
+
+import org.apache.spark.JobExecutionStatus._
+import org.apache.spark.SparkContext._
+
+class StatusTrackerSuite extends FunSuite with Matchers with LocalSparkContext {
+
+  test("basic status API usage") {
+    sc = new SparkContext("local", "test", new SparkConf(false))
+    val jobFuture = sc.parallelize(1 to 10000, 2).map(identity).groupBy(identity).collectAsync()
+    val jobId: Int = eventually(timeout(10 seconds)) {
+      val jobIds = jobFuture.jobIds
+      jobIds.size should be(1)
+      jobIds.head
+    }
+    val jobInfo = eventually(timeout(10 seconds)) {
+      sc.statusTracker.getJobInfo(jobId).get
+    }
+    jobInfo.status() should not be FAILED
+    val stageIds = jobInfo.stageIds()
+    stageIds.size should be(2)
+
+    val firstStageInfo = eventually(timeout(10 seconds)) {
+      sc.statusTracker.getStageInfo(stageIds(0)).get
+    }
+    firstStageInfo.stageId() should be(stageIds(0))
+    firstStageInfo.currentAttemptId() should be(0)
+    firstStageInfo.numTasks() should be(2)
+    eventually(timeout(10 seconds)) {
+      val updatedFirstStageInfo = sc.statusTracker.getStageInfo(stageIds(0)).get
+      updatedFirstStageInfo.numCompletedTasks() should be(2)
+      updatedFirstStageInfo.numActiveTasks() should be(0)
+      updatedFirstStageInfo.numFailedTasks() should be(0)
+    }
+  }
+
+  test("getJobIdsForGroup()") {
+    sc = new SparkContext("local", "test", new SparkConf(false))
+    // Passing `null` should return jobs that were not run in a job group:
+    val defaultJobGroupFuture = sc.parallelize(1 to 1000).countAsync()
+    val defaultJobGroupJobId = eventually(timeout(10 seconds)) {
+      defaultJobGroupFuture.jobIds.head
+    }
+    eventually(timeout(10 seconds)) {
+      sc.statusTracker.getJobIdsForGroup(null).toSet should be (Set(defaultJobGroupJobId))
+    }
+    // Test jobs submitted in job groups:
+    sc.setJobGroup("my-job-group", "description")
+    sc.statusTracker.getJobIdsForGroup("my-job-group") should be (Seq.empty)
+    val firstJobFuture = sc.parallelize(1 to 1000).countAsync()
+    val firstJobId = eventually(timeout(10 seconds)) {
+      firstJobFuture.jobIds.head
+    }
+    eventually(timeout(10 seconds)) {
+      sc.statusTracker.getJobIdsForGroup("my-job-group") should be (Seq(firstJobId))
+    }
+    val secondJobFuture = sc.parallelize(1 to 1000).countAsync()
+    val secondJobId = eventually(timeout(10 seconds)) {
+      secondJobFuture.jobIds.head
+    }
+    eventually(timeout(10 seconds)) {
+      sc.statusTracker.getJobIdsForGroup("my-job-group").toSet should be (Set(firstJobId, secondJobId))
+    }
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
new file mode 100644
index 0000000000000..b0a70f012f1f3
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -0,0 +1,359 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.broadcast
+
+import scala.util.Random
+
+import org.scalatest.{Assertions, FunSuite}
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkEnv}
+import org.apache.spark.io.SnappyCompressionCodec
+import org.apache.spark.rdd.RDD
+import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.storage._
+
+// Dummy class that creates a broadcast variable but doesn't use it
+class DummyBroadcastClass(rdd: RDD[Int]) extends Serializable {
+  @transient val list = List(1, 2, 3, 4)
+  val broadcast = rdd.context.broadcast(list)
+  val bid = broadcast.id
+
+  def doSomething() = {
+    rdd.map { x =>
+      val bm = SparkEnv.get.blockManager
+      // Check if broadcast block was fetched
+      val isFound = bm.getLocal(BroadcastBlockId(bid)).isDefined
+      (x, isFound)
+    }.collect().toSet
+  }
+}
+
+class BroadcastSuite extends FunSuite with LocalSparkContext {
+
+  private val httpConf = broadcastConf("HttpBroadcastFactory")
+  private val torrentConf = broadcastConf("TorrentBroadcastFactory")
+
+  test("Using HttpBroadcast locally") {
+    sc = new SparkContext("local", "test", httpConf)
+    val list = List[Int](1, 2, 3, 4)
+    val broadcast = sc.broadcast(list)
+    val results = sc.parallelize(1 to 2).map(x => (x, broadcast.value.sum))
+    assert(results.collect().toSet === Set((1, 10), (2, 10)))
+  }
+
+  test("Accessing HttpBroadcast variables from multiple threads") {
+    sc = new SparkContext("local[10]", "test", httpConf)
+    val list = List[Int](1, 2, 3, 4)
+    val broadcast = sc.broadcast(list)
+    val results = sc.parallelize(1 to 10).map(x => (x, broadcast.value.sum))
+    assert(results.collect().toSet === (1 to 10).map(x => (x, 10)).toSet)
+  }
+
+  test("Accessing HttpBroadcast variables in a local cluster") {
+    val numSlaves = 4
+    val conf = httpConf.clone
+    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+    conf.set("spark.broadcast.compress", "true")
+    sc = new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", conf)
+    val list = List[Int](1, 2, 3, 4)
+    val broadcast = sc.broadcast(list)
+    val results = sc.parallelize(1 to numSlaves).map(x => (x, broadcast.value.sum))
+    assert(results.collect().toSet === (1 to numSlaves).map(x => (x, 10)).toSet)
+  }
+
+  test("Using TorrentBroadcast locally") {
+    sc = new SparkContext("local", "test", torrentConf)
+    val list = List[Int](1, 2, 3, 4)
+    val broadcast = sc.broadcast(list)
+    val results = sc.parallelize(1 to 2).map(x => (x, broadcast.value.sum))
+    assert(results.collect().toSet === Set((1, 10), (2, 10)))
+  }
+
+  test("Accessing TorrentBroadcast variables from multiple threads") {
+    sc = new SparkContext("local[10]", "test", torrentConf)
+    val list = List[Int](1, 2, 3, 4)
+    val broadcast = sc.broadcast(list)
+    val results = sc.parallelize(1 to 10).map(x => (x, broadcast.value.sum))
+    assert(results.collect().toSet === (1 to 10).map(x => (x, 10)).toSet)
+  }
+
+  test("Accessing TorrentBroadcast variables in a local cluster") {
+    val numSlaves = 4
+    val conf = torrentConf.clone
+    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+    conf.set("spark.broadcast.compress", "true")
+    sc = new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", conf)
+    val list = List[Int](1, 2, 3, 4)
+    val broadcast = sc.broadcast(list)
+    val results = sc.parallelize(1 to numSlaves).map(x => (x, broadcast.value.sum))
+    assert(results.collect().toSet === (1 to numSlaves).map(x => (x, 10)).toSet)
+  }
+
+  test("TorrentBroadcast's blockifyObject and unblockifyObject are inverses") {
+    import org.apache.spark.broadcast.TorrentBroadcast._
+    val blockSize = 1024
+    val conf = new SparkConf()
+    val compressionCodec = Some(new SnappyCompressionCodec(conf))
+    val serializer = new JavaSerializer(conf)
+    val seed = 42
+    val rand = new Random(seed)
+    for (trial <- 1 to 100) {
+      val size = 1 + rand.nextInt(1024 * 10)
+      val data: Array[Byte] = new Array[Byte](size)
+      rand.nextBytes(data)
+      val blocks = blockifyObject(data, blockSize, serializer, compressionCodec)
+      val unblockified = unBlockifyObject[Array[Byte]](blocks, serializer, compressionCodec)
+      assert(unblockified === data)
+    }
+  }
+
+  test("Test Lazy Broadcast variables with TorrentBroadcast") {
+    val numSlaves = 2
+    val conf = torrentConf.clone
+    sc = new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", conf)
+    val rdd = sc.parallelize(1 to numSlaves)
+
+    val results = new DummyBroadcastClass(rdd).doSomething()
+
+    assert(results.toSet === (1 to numSlaves).map(x => (x, false)).toSet)
+  }
+
+  test("Unpersisting HttpBroadcast on executors only in local mode") {
+    testUnpersistHttpBroadcast(distributed = false, removeFromDriver = false)
+  }
+
+  test("Unpersisting HttpBroadcast on executors and driver in local mode") {
+    testUnpersistHttpBroadcast(distributed = false, removeFromDriver = true)
+  }
+
+  test("Unpersisting HttpBroadcast on executors only in distributed mode") {
+    testUnpersistHttpBroadcast(distributed = true, removeFromDriver = false)
+  }
+
+  test("Unpersisting HttpBroadcast on executors and driver in distributed mode") {
+    testUnpersistHttpBroadcast(distributed = true, removeFromDriver = true)
+  }
+
+  test("Unpersisting TorrentBroadcast on executors only in local mode") {
+    testUnpersistTorrentBroadcast(distributed = false, removeFromDriver = false)
+  }
+
+  test("Unpersisting TorrentBroadcast on executors and driver in local mode") {
+    testUnpersistTorrentBroadcast(distributed = false, removeFromDriver = true)
+  }
+
+  test("Unpersisting TorrentBroadcast on executors only in distributed mode") {
+    testUnpersistTorrentBroadcast(distributed = true, removeFromDriver = false)
+  }
+
+  test("Unpersisting TorrentBroadcast on executors and driver in distributed mode") {
+    testUnpersistTorrentBroadcast(distributed = true, removeFromDriver = true)
+  }
+
+  test("Using broadcast after destroy prints callsite") {
+    sc = new SparkContext("local", "test")
+    testPackage.runCallSiteTest(sc)
+  }
+
+  /**
+   * Verify the persistence of state associated with an HttpBroadcast in either local mode or
+   * local-cluster mode (when distributed = true).
+   *
+   * This test creates a broadcast variable, uses it on all executors, and then unpersists it.
+   * In between each step, this test verifies that the broadcast blocks and the broadcast file
+   * are present only on the expected nodes.
+   */
+  private def testUnpersistHttpBroadcast(distributed: Boolean, removeFromDriver: Boolean) {
+    val numSlaves = if (distributed) 2 else 0
+
+    // Verify that the broadcast file is created, and blocks are persisted only on the driver
+    def afterCreation(broadcastId: Long, bmm: BlockManagerMaster) {
+      val blockId = BroadcastBlockId(broadcastId)
+      val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      assert(statuses.size === 1)
+      statuses.head match { case (bm, status) =>
+        assert(bm.isDriver, "Block should only be on the driver")
+        assert(status.storageLevel === StorageLevel.MEMORY_AND_DISK)
+        assert(status.memSize > 0, "Block should be in memory store on the driver")
+        assert(status.diskSize === 0, "Block should not be in disk store on the driver")
+      }
+      if (distributed) {
+        // this file is only generated in distributed mode
+        assert(HttpBroadcast.getFile(blockId.broadcastId).exists, "Broadcast file not found!")
+      }
+    }
+
+    // Verify that blocks are persisted in both the executors and the driver
+    def afterUsingBroadcast(broadcastId: Long, bmm: BlockManagerMaster) {
+      val blockId = BroadcastBlockId(broadcastId)
+      val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      assert(statuses.size === numSlaves + 1)
+      statuses.foreach { case (_, status) =>
+        assert(status.storageLevel === StorageLevel.MEMORY_AND_DISK)
+        assert(status.memSize > 0, "Block should be in memory store")
+        assert(status.diskSize === 0, "Block should not be in disk store")
+      }
+    }
+
+    // Verify that blocks are unpersisted on all executors, and on all nodes if removeFromDriver
+    // is true. In the latter case, also verify that the broadcast file is deleted on the driver.
+    def afterUnpersist(broadcastId: Long, bmm: BlockManagerMaster) {
+      val blockId = BroadcastBlockId(broadcastId)
+      val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      val expectedNumBlocks = if (removeFromDriver) 0 else 1
+      val possiblyNot = if (removeFromDriver) "" else " not"
+      assert(statuses.size === expectedNumBlocks,
+        "Block should%s be unpersisted on the driver".format(possiblyNot))
+      if (distributed && removeFromDriver) {
+        // this file is only generated in distributed mode
+        assert(!HttpBroadcast.getFile(blockId.broadcastId).exists,
+          "Broadcast file should%s be deleted".format(possiblyNot))
+      }
+    }
+
+    testUnpersistBroadcast(distributed, numSlaves, httpConf, afterCreation,
+      afterUsingBroadcast, afterUnpersist, removeFromDriver)
+  }
+
+  /**
+   * Verify the persistence of state associated with an TorrentBroadcast in a local-cluster.
+   *
+   * This test creates a broadcast variable, uses it on all executors, and then unpersists it.
+   * In between each step, this test verifies that the broadcast blocks are present only on the
+   * expected nodes.
+   */
+  private def testUnpersistTorrentBroadcast(distributed: Boolean, removeFromDriver: Boolean) {
+    val numSlaves = if (distributed) 2 else 0
+
+    // Verify that blocks are persisted only on the driver
+    def afterCreation(broadcastId: Long, bmm: BlockManagerMaster) {
+      var blockId = BroadcastBlockId(broadcastId)
+      var statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      assert(statuses.size === 1)
+
+      blockId = BroadcastBlockId(broadcastId, "piece0")
+      statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      assert(statuses.size === 1)
+    }
+
+    // Verify that blocks are persisted in both the executors and the driver
+    def afterUsingBroadcast(broadcastId: Long, bmm: BlockManagerMaster) {
+      var blockId = BroadcastBlockId(broadcastId)
+      val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      assert(statuses.size === numSlaves + 1)
+
+      blockId = BroadcastBlockId(broadcastId, "piece0")
+      assert(statuses.size === numSlaves + 1)
+    }
+
+    // Verify that blocks are unpersisted on all executors, and on all nodes if removeFromDriver
+    // is true.
+    def afterUnpersist(broadcastId: Long, bmm: BlockManagerMaster) {
+      var blockId = BroadcastBlockId(broadcastId)
+      var expectedNumBlocks = if (removeFromDriver) 0 else 1
+      var statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      assert(statuses.size === expectedNumBlocks)
+
+      blockId = BroadcastBlockId(broadcastId, "piece0")
+      expectedNumBlocks = if (removeFromDriver) 0 else 1
+      statuses = bmm.getBlockStatus(blockId, askSlaves = true)
+      assert(statuses.size === expectedNumBlocks)
+    }
+
+    testUnpersistBroadcast(distributed, numSlaves,  torrentConf, afterCreation,
+      afterUsingBroadcast, afterUnpersist, removeFromDriver)
+  }
+
+  /**
+   * This test runs in 4 steps:
+   *
+   * 1) Create broadcast variable, and verify that all state is persisted on the driver.
+   * 2) Use the broadcast variable on all executors, and verify that all state is persisted
+   *    on both the driver and the executors.
+   * 3) Unpersist the broadcast, and verify that all state is removed where they should be.
+   * 4) [Optional] If removeFromDriver is false, we verify that the broadcast is re-usable.
+   */
+  private def testUnpersistBroadcast(
+      distributed: Boolean,
+      numSlaves: Int,  // used only when distributed = true
+      broadcastConf: SparkConf,
+      afterCreation: (Long, BlockManagerMaster) => Unit,
+      afterUsingBroadcast: (Long, BlockManagerMaster) => Unit,
+      afterUnpersist: (Long, BlockManagerMaster) => Unit,
+      removeFromDriver: Boolean) {
+
+    sc = if (distributed) {
+      new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", broadcastConf)
+    } else {
+      new SparkContext("local", "test", broadcastConf)
+    }
+    val blockManagerMaster = sc.env.blockManager.master
+    val list = List[Int](1, 2, 3, 4)
+
+    // Create broadcast variable
+    val broadcast = sc.broadcast(list)
+    afterCreation(broadcast.id, blockManagerMaster)
+
+    // Use broadcast variable on all executors
+    val partitions = 10
+    assert(partitions > numSlaves)
+    val results = sc.parallelize(1 to partitions, partitions).map(x => (x, broadcast.value.sum))
+    assert(results.collect().toSet === (1 to partitions).map(x => (x, list.sum)).toSet)
+    afterUsingBroadcast(broadcast.id, blockManagerMaster)
+
+    // Unpersist broadcast
+    if (removeFromDriver) {
+      broadcast.destroy(blocking = true)
+    } else {
+      broadcast.unpersist(blocking = true)
+    }
+    afterUnpersist(broadcast.id, blockManagerMaster)
+
+    // If the broadcast is removed from driver, all subsequent uses of the broadcast variable
+    // should throw SparkExceptions. Otherwise, the result should be the same as before.
+    if (removeFromDriver) {
+      // Using this variable on the executors crashes them, which hangs the test.
+      // Instead, crash the driver by directly accessing the broadcast value.
+      intercept[SparkException] { broadcast.value }
+      intercept[SparkException] { broadcast.unpersist() }
+      intercept[SparkException] { broadcast.destroy(blocking = true) }
+    } else {
+      val results = sc.parallelize(1 to partitions, partitions).map(x => (x, broadcast.value.sum))
+      assert(results.collect().toSet === (1 to partitions).map(x => (x, list.sum)).toSet)
+    }
+  }
+
+  /** Helper method to create a SparkConf that uses the given broadcast factory. */
+  private def broadcastConf(factoryName: String): SparkConf = {
+    val conf = new SparkConf
+    conf.set("spark.broadcast.factory", "org.apache.spark.broadcast.%s".format(factoryName))
+    conf
+  }
+}
+
+package object testPackage extends Assertions {
+
+  def runCallSiteTest(sc: SparkContext) {
+    val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2)
+    val broadcast = sc.broadcast(rdd)
+    broadcast.destroy()
+    val thrown = intercept[SparkException] { broadcast.value }
+    assert(thrown.getMessage.contains("BroadcastSuite.scala"))
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala
index 4161aede1d1d0..94a2bdd74e744 100644
--- a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala
@@ -29,6 +29,12 @@ class ClientSuite extends FunSuite with Matchers {
     ClientArguments.isValidJarUrl("hdfs://someHost:1234/foo") should be (false)
     ClientArguments.isValidJarUrl("/missing/a/protocol/jarfile.jar") should be (false)
     ClientArguments.isValidJarUrl("not-even-a-path.jar") should be (false)
+
+    // No authority
+    ClientArguments.isValidJarUrl("hdfs:someHost:1234/jarfile.jar") should be (false)
+
+    // Invalid syntax
+    ClientArguments.isValidJarUrl("hdfs:") should be (false)
   }
 
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/CommandUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/CommandUtilsSuite.scala
new file mode 100644
index 0000000000000..7915ee75d8778
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/CommandUtilsSuite.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+import org.apache.spark.deploy.worker.CommandUtils
+import org.apache.spark.util.Utils
+
+import org.scalatest.{FunSuite, Matchers}
+
+class CommandUtilsSuite extends FunSuite with Matchers {
+
+  test("set libraryPath correctly") {
+    val appId = "12345-worker321-9876"
+    val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
+    val cmd = new Command("mainClass", Seq(), Map(), Seq(), Seq("libraryPathToB"), Seq())
+    val builder = CommandUtils.buildProcessBuilder(cmd, 512, sparkHome, t => t)
+    val libraryPath = Utils.libraryPathEnvName
+    val env = builder.environment
+    env.keySet should contain(libraryPath)
+    assert(env.get(libraryPath).startsWith("libraryPathToB"))
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
index 01ab2d549325c..3f1cd0752e766 100644
--- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
@@ -88,8 +88,8 @@ class JsonProtocolSuite extends FunSuite {
   }
 
   def createAppDesc(): ApplicationDescription = {
-    val cmd = new Command("mainClass", List("arg1", "arg2"), Map(), Seq(), Seq())
-    new ApplicationDescription("name", Some(4), 1234, cmd, Some("sparkHome"), "appUiUrl")
+    val cmd = new Command("mainClass", List("arg1", "arg2"), Map(), Seq(), Seq(), Seq())
+    new ApplicationDescription("name", Some(4), 1234, cmd, "appUiUrl")
   }
 
   def createAppInfo() : ApplicationInfo = {
@@ -101,7 +101,7 @@ class JsonProtocolSuite extends FunSuite {
 
   def createDriverCommand() = new Command(
     "org.apache.spark.FakeClass", Seq("some arg --and-some options -g foo"),
-    Map(("K1", "V1"), ("K2", "V2")), Seq("cp1", "cp2"), Seq("lp1", "lp2"), Some("-Dfoo")
+    Map(("K1", "V1"), ("K2", "V2")), Seq("cp1", "cp2"), Seq("lp1", "lp2"), Seq("-Dfoo")
   )
 
   def createDriverDesc() = new DriverDescription("hdfs://some-dir/some.jar", 100, 3,
@@ -115,14 +115,16 @@ class JsonProtocolSuite extends FunSuite {
     workerInfo.lastHeartbeat = JsonConstants.currTimeInMillis
     workerInfo
   }
+
   def createExecutorRunner(): ExecutorRunner = {
     new ExecutorRunner("appId", 123, createAppDesc(), 4, 1234, null, "workerId", "host",
       new File("sparkHome"), new File("workDir"), "akka://worker",
       new SparkConf, ExecutorState.RUNNING)
   }
+
   def createDriverRunner(): DriverRunner = {
-    new DriverRunner("driverId", new File("workDir"), new File("sparkHome"), createDriverDesc(),
-      null, "akka://worker")
+    new DriverRunner(new SparkConf(), "driverId", new File("workDir"), new File("sparkHome"),
+      createDriverDesc(), null, "akka://worker")
   }
 
   def assertValidJson(json: JValue) {
@@ -169,8 +171,7 @@ object JsonConstants {
   val appDescJsonStr =
     """
       |{"name":"name","cores":4,"memoryperslave":1234,
-      |"user":"%s","sparkhome":"sparkHome",
-      |"command":"Command(mainClass,List(arg1, arg2),Map(),List(),List(),None)"}
+      |"user":"%s","command":"Command(mainClass,List(arg1, arg2),Map(),List(),List(),List())"}
     """.format(System.getProperty("user.name", "<unknown>")).stripMargin
 
   val executorRunnerJsonStr =
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 565c53e9529ff..eb7bd7ab3986e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.deploy
 
-import java.io.{File, OutputStream, PrintStream}
+import java.io._
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkException, TestUtils}
+import org.apache.spark._
 import org.apache.spark.deploy.SparkSubmit._
 import org.apache.spark.util.Utils
 import org.scalatest.FunSuite
@@ -106,6 +106,18 @@ class SparkSubmitSuite extends FunSuite with Matchers {
     appArgs.childArgs should be (Seq("some", "--weird", "args"))
   }
 
+  test("handles arguments to user program with name collision") {
+    val clArgs = Seq(
+      "--name", "myApp",
+      "--class", "Foo",
+      "userjar.jar",
+      "--master", "local",
+      "some",
+      "--weird", "args")
+    val appArgs = new SparkSubmitArguments(clArgs)
+    appArgs.childArgs should be (Seq("--master", "local", "some", "--weird", "args"))
+  }
+
   test("handles YARN cluster mode") {
     val clArgs = Seq(
       "--deploy-mode", "cluster",
@@ -120,6 +132,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
       "--archives", "archive1.txt,archive2.txt",
       "--num-executors", "6",
       "--name", "beauty",
+      "--conf", "spark.shuffle.spill=false",
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
@@ -139,7 +152,9 @@ class SparkSubmitSuite extends FunSuite with Matchers {
     mainClass should be ("org.apache.spark.deploy.yarn.Client")
     classpath should have length (0)
     sysProps("spark.app.name") should be ("beauty")
+    sysProps("spark.shuffle.spill") should be ("false")
     sysProps("SPARK_SUBMIT") should be ("true")
+    sysProps.keys should not contain ("spark.jars")
   }
 
   test("handles YARN client mode") {
@@ -156,6 +171,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
       "--archives", "archive1.txt,archive2.txt",
       "--num-executors", "6",
       "--name", "trill",
+      "--conf", "spark.shuffle.spill=false",
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
@@ -176,6 +192,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
     sysProps("spark.yarn.dist.archives") should include regex (".*archive1.txt,.*archive2.txt")
     sysProps("spark.jars") should include regex (".*one.jar,.*two.jar,.*three.jar,.*thejar.jar")
     sysProps("SPARK_SUBMIT") should be ("true")
+    sysProps("spark.shuffle.spill") should be ("false")
   }
 
   test("handles standalone cluster mode") {
@@ -186,6 +203,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
       "--supervise",
       "--driver-memory", "4g",
       "--driver-cores", "5",
+      "--conf", "spark.shuffle.spill=false",
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
@@ -195,9 +213,13 @@ class SparkSubmitSuite extends FunSuite with Matchers {
     childArgsStr should include regex ("launch spark://h:p .*thejar.jar org.SomeClass arg1 arg2")
     mainClass should be ("org.apache.spark.deploy.Client")
     classpath should have size (0)
-    sysProps should have size (2)
-    sysProps.keys should contain ("spark.jars")
+    sysProps should have size (5)
     sysProps.keys should contain ("SPARK_SUBMIT")
+    sysProps.keys should contain ("spark.master")
+    sysProps.keys should contain ("spark.app.name")
+    sysProps.keys should contain ("spark.jars")
+    sysProps.keys should contain ("spark.shuffle.spill")
+    sysProps("spark.shuffle.spill") should be ("false")
   }
 
   test("handles standalone client mode") {
@@ -208,6 +230,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
       "--total-executor-cores", "5",
       "--class", "org.SomeClass",
       "--driver-memory", "4g",
+      "--conf", "spark.shuffle.spill=false",
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
@@ -218,6 +241,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
     classpath(0) should endWith ("thejar.jar")
     sysProps("spark.executor.memory") should be ("5g")
     sysProps("spark.cores.max") should be ("5")
+    sysProps("spark.shuffle.spill") should be ("false")
   }
 
   test("handles mesos client mode") {
@@ -228,6 +252,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
       "--total-executor-cores", "5",
       "--class", "org.SomeClass",
       "--driver-memory", "4g",
+      "--conf", "spark.shuffle.spill=false",
       "thejar.jar",
       "arg1", "arg2")
     val appArgs = new SparkSubmitArguments(clArgs)
@@ -238,6 +263,23 @@ class SparkSubmitSuite extends FunSuite with Matchers {
     classpath(0) should endWith ("thejar.jar")
     sysProps("spark.executor.memory") should be ("5g")
     sysProps("spark.cores.max") should be ("5")
+    sysProps("spark.shuffle.spill") should be ("false")
+  }
+
+  test("handles confs with flag equivalents") {
+    val clArgs = Seq(
+      "--deploy-mode", "cluster",
+      "--executor-memory", "5g",
+      "--class", "org.SomeClass",
+      "--conf", "spark.executor.memory=4g",
+      "--conf", "spark.master=yarn",
+      "thejar.jar",
+      "arg1", "arg2")
+    val appArgs = new SparkSubmitArguments(clArgs)
+    val (_, _, sysProps, mainClass) = createLaunchEnv(appArgs)
+    sysProps("spark.executor.memory") should be ("5g")
+    sysProps("spark.master") should be ("yarn-cluster")
+    mainClass should be ("org.apache.spark.deploy.yarn.Client")
   }
 
   test("launch simple application with spark-submit") {
@@ -250,7 +292,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
     runSparkSubmit(args)
   }
 
-  test("spark submit includes jars passed in through --jar") {
+  test("includes jars passed in through --jars") {
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
     val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB"))
@@ -264,39 +306,177 @@ class SparkSubmitSuite extends FunSuite with Matchers {
     runSparkSubmit(args)
   }
 
+  test("resolves command line argument paths correctly") {
+    val jars = "/jar1,/jar2"                 // --jars
+    val files = "hdfs:/file1,file2"          // --files
+    val archives = "file:/archive1,archive2" // --archives
+    val pyFiles = "py-file1,py-file2"        // --py-files
+
+    // Test jars and files
+    val clArgs = Seq(
+      "--master", "local",
+      "--class", "org.SomeClass",
+      "--jars", jars,
+      "--files", files,
+      "thejar.jar")
+    val appArgs = new SparkSubmitArguments(clArgs)
+    val sysProps = SparkSubmit.createLaunchEnv(appArgs)._3
+    appArgs.jars should be (Utils.resolveURIs(jars))
+    appArgs.files should be (Utils.resolveURIs(files))
+    sysProps("spark.jars") should be (Utils.resolveURIs(jars + ",thejar.jar"))
+    sysProps("spark.files") should be (Utils.resolveURIs(files))
+
+    // Test files and archives (Yarn)
+    val clArgs2 = Seq(
+      "--master", "yarn-client",
+      "--class", "org.SomeClass",
+      "--files", files,
+      "--archives", archives,
+      "thejar.jar"
+    )
+    val appArgs2 = new SparkSubmitArguments(clArgs2)
+    val sysProps2 = SparkSubmit.createLaunchEnv(appArgs2)._3
+    appArgs2.files should be (Utils.resolveURIs(files))
+    appArgs2.archives should be (Utils.resolveURIs(archives))
+    sysProps2("spark.yarn.dist.files") should be (Utils.resolveURIs(files))
+    sysProps2("spark.yarn.dist.archives") should be (Utils.resolveURIs(archives))
+
+    // Test python files
+    val clArgs3 = Seq(
+      "--master", "local",
+      "--py-files", pyFiles,
+      "mister.py"
+    )
+    val appArgs3 = new SparkSubmitArguments(clArgs3)
+    val sysProps3 = SparkSubmit.createLaunchEnv(appArgs3)._3
+    appArgs3.pyFiles should be (Utils.resolveURIs(pyFiles))
+    sysProps3("spark.submit.pyFiles") should be (
+      PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
+  }
+
+  test("resolves config paths correctly") {
+    val jars = "/jar1,/jar2" // spark.jars
+    val files = "hdfs:/file1,file2" // spark.files / spark.yarn.dist.files
+    val archives = "file:/archive1,archive2" // spark.yarn.dist.archives
+    val pyFiles = "py-file1,py-file2" // spark.submit.pyFiles
+
+    // Test jars and files
+    val f1 = File.createTempFile("test-submit-jars-files", "")
+    val writer1 = new PrintWriter(f1)
+    writer1.println("spark.jars " + jars)
+    writer1.println("spark.files " + files)
+    writer1.close()
+    val clArgs = Seq(
+      "--master", "local",
+      "--class", "org.SomeClass",
+      "--properties-file", f1.getPath,
+      "thejar.jar"
+    )
+    val appArgs = new SparkSubmitArguments(clArgs)
+    val sysProps = SparkSubmit.createLaunchEnv(appArgs)._3
+    sysProps("spark.jars") should be(Utils.resolveURIs(jars + ",thejar.jar"))
+    sysProps("spark.files") should be(Utils.resolveURIs(files))
+
+    // Test files and archives (Yarn)
+    val f2 = File.createTempFile("test-submit-files-archives", "")
+    val writer2 = new PrintWriter(f2)
+    writer2.println("spark.yarn.dist.files " + files)
+    writer2.println("spark.yarn.dist.archives " + archives)
+    writer2.close()
+    val clArgs2 = Seq(
+      "--master", "yarn-client",
+      "--class", "org.SomeClass",
+      "--properties-file", f2.getPath,
+      "thejar.jar"
+    )
+    val appArgs2 = new SparkSubmitArguments(clArgs2)
+    val sysProps2 = SparkSubmit.createLaunchEnv(appArgs2)._3
+    sysProps2("spark.yarn.dist.files") should be(Utils.resolveURIs(files))
+    sysProps2("spark.yarn.dist.archives") should be(Utils.resolveURIs(archives))
+
+    // Test python files
+    val f3 = File.createTempFile("test-submit-python-files", "")
+    val writer3 = new PrintWriter(f3)
+    writer3.println("spark.submit.pyFiles " + pyFiles)
+    writer3.close()
+    val clArgs3 = Seq(
+      "--master", "local",
+      "--properties-file", f3.getPath,
+      "mister.py"
+    )
+    val appArgs3 = new SparkSubmitArguments(clArgs3)
+    val sysProps3 = SparkSubmit.createLaunchEnv(appArgs3)._3
+    sysProps3("spark.submit.pyFiles") should be(
+      PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
+  }
+
+  test("SPARK_CONF_DIR overrides spark-defaults.conf") {
+    forConfDir(Map("spark.executor.memory" -> "2.3g")) { path =>
+      val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+      val args = Seq(
+        "--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
+        "--name", "testApp",
+        "--master", "local",
+        unusedJar.toString)
+      val appArgs = new SparkSubmitArguments(args, Map("SPARK_CONF_DIR" -> path))
+      assert(appArgs.propertiesFile != null)
+      assert(appArgs.propertiesFile.startsWith(path))
+      appArgs.executorMemory should  be ("2.3g")
+    }
+  }
+
   // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
   def runSparkSubmit(args: Seq[String]): String = {
-    val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.props.get("spark.home")).get
+    val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
     Utils.executeAndGetOutput(
       Seq("./bin/spark-submit") ++ args,
       new File(sparkHome),
       Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
   }
+
+  def forConfDir(defaults: Map[String, String]) (f: String => Unit) = {
+    val tmpDir = Utils.createTempDir()
+
+    val defaultsConf = new File(tmpDir.getAbsolutePath, "spark-defaults.conf")
+    val writer = new OutputStreamWriter(new FileOutputStream(defaultsConf))
+    for ((key, value) <- defaults) writer.write(s"$key $value\n")
+
+    writer.close()
+
+    try {
+      f(tmpDir.getAbsolutePath)
+    } finally {
+      Utils.deleteRecursively(tmpDir)
+    }
+  }
 }
 
-object JarCreationTest {
+object JarCreationTest extends Logging {
   def main(args: Array[String]) {
+    Utils.configTestLog4j("INFO")
     val conf = new SparkConf()
     val sc = new SparkContext(conf)
     val result = sc.makeRDD(1 to 100, 10).mapPartitions { x =>
-      var foundClasses = false
+      var exception: String = null
       try {
         Class.forName("SparkSubmitClassA", true, Thread.currentThread().getContextClassLoader)
         Class.forName("SparkSubmitClassA", true, Thread.currentThread().getContextClassLoader)
-        foundClasses = true
       } catch {
-        case _: Throwable => // catch all
+        case t: Throwable =>
+          exception = t + "\n" + t.getStackTraceString
+          exception = exception.replaceAll("\n", "\n\t")
       }
-      Seq(foundClasses).iterator
+      Option(exception).toSeq.iterator
     }.collect()
-    if (result.contains(false)) {
-      throw new Exception("Could not load user defined classes inside of executors")
+    if (result.nonEmpty) {
+      throw new Exception("Could not load user class from jar:\n" + result(0))
     }
   }
 }
 
 object SimpleApplicationTest {
   def main(args: Array[String]) {
+    Utils.configTestLog4j("INFO")
     val conf = new SparkConf()
     val sc = new SparkContext(conf)
     val configs = Seq("spark.master", "spark.app.name")
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
index 4633bc3f7f25e..b6f4411e0587a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
@@ -25,14 +25,15 @@ import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.FunSuite
 
+import org.apache.spark.SparkConf
 import org.apache.spark.deploy.{Command, DriverDescription}
 
 class DriverRunnerTest extends FunSuite {
   private def createDriverRunner() = {
-    val command = new Command("mainClass", Seq(), Map(), Seq(), Seq())
+    val command = new Command("mainClass", Seq(), Map(), Seq(), Seq(), Seq())
     val driverDescription = new DriverDescription("jarUrl", 512, 1, true, command)
-    new DriverRunner("driverId", new File("workDir"), new File("sparkHome"), driverDescription,
-      null, "akka://1.2.3.4/worker/")
+    new DriverRunner(new SparkConf(), "driverId", new File("workDir"), new File("sparkHome"),
+      driverDescription, null, "akka://1.2.3.4/worker/")
   }
 
   private def createProcessBuilderAndProcess(): (ProcessBuilderLike, Process) = {
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
index e5f748d55500d..196217062991e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
@@ -19,6 +19,8 @@ package org.apache.spark.deploy.worker
 
 import java.io.File
 
+import scala.collection.JavaConversions._
+
 import org.scalatest.FunSuite
 
 import org.apache.spark.deploy.{ApplicationDescription, Command, ExecutorState}
@@ -26,15 +28,13 @@ import org.apache.spark.SparkConf
 
 class ExecutorRunnerTest extends FunSuite {
   test("command includes appId") {
-    def f(s:String) = new File(s)
-    val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.props.get("spark.home"))
-    val appDesc = new ApplicationDescription("app name", Some(8), 500,
-      Command("foo", Seq(), Map(), Seq(), Seq()),
-      sparkHome, "appUiUrl")
     val appId = "12345-worker321-9876"
-    val er = new ExecutorRunner(appId, 1, appDesc, 8, 500, null, "blah", "worker321", f(sparkHome.getOrElse(".")),
-      f("ooga"), "blah", new SparkConf, ExecutorState.RUNNING)
-
-    assert(er.getCommandSeq.last === appId)
+    val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
+    val appDesc = new ApplicationDescription("app name", Some(8), 500,
+      Command("foo", Seq(appId), Map(), Seq(), Seq(), Seq()), "appUiUrl")
+    val er = new ExecutorRunner(appId, 1, appDesc, 8, 500, null, "blah", "worker321",
+      new File(sparkHome), new File("ooga"), "blah", new SparkConf, ExecutorState.RUNNING)
+    val builder = CommandUtils.buildProcessBuilder(appDesc.command, 512, sparkHome, er.substituteVariables)
+    assert(builder.command().last === appId)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala
new file mode 100644
index 0000000000000..1a28a9a187cd7
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.spark.deploy.worker
+
+import org.apache.spark.SparkConf
+import org.scalatest.FunSuite
+
+
+class WorkerArgumentsTest extends FunSuite {
+
+  test("Memory can't be set to 0 when cmd line args leave off M or G") {
+    val conf = new SparkConf
+    val args = Array("-m", "10000", "spark://localhost:0000  ")
+    intercept[IllegalStateException] {
+      new WorkerArguments(args, conf)
+    }
+  }
+
+
+  test("Memory can't be set to 0 when SPARK_WORKER_MEMORY env property leaves off M or G") {
+    val args = Array("spark://localhost:0000  ")
+
+    class MySparkConf extends SparkConf(false) {
+      override def getenv(name: String) = {
+        if (name == "SPARK_WORKER_MEMORY") "50000"
+        else super.getenv(name)
+      }
+
+      override def clone: SparkConf = {
+        new MySparkConf().setAll(settings)
+      }
+    }
+    val conf = new MySparkConf()
+    intercept[IllegalStateException] {
+      new WorkerArguments(args, conf)
+    }
+  }
+
+  test("Memory correctly set when SPARK_WORKER_MEMORY env property appends G") {
+    val args = Array("spark://localhost:0000  ")
+
+    class MySparkConf extends SparkConf(false) {
+      override def getenv(name: String) = {
+        if (name == "SPARK_WORKER_MEMORY") "5G"
+        else super.getenv(name)
+      }
+
+      override def clone: SparkConf = {
+        new MySparkConf().setAll(settings)
+      }
+    }
+    val conf = new MySparkConf()
+    val workerArgs =  new WorkerArguments(args, conf)
+    assert(workerArgs.memory === 5120)
+  }
+
+  test("Memory correctly set from args with M appended to memory value") {
+    val conf = new SparkConf
+    val args = Array("-m", "10000M", "spark://localhost:0000  ")
+
+    val workerArgs = new WorkerArguments(args, conf)
+    assert(workerArgs.memory === 10000)
+
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
index d5ebfb3f3fae1..98b0a16ce88ba 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
@@ -23,8 +23,6 @@ import java.io.FileOutputStream
 
 import scala.collection.immutable.IndexedSeq
 
-import com.google.common.io.Files
-
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
 
@@ -32,6 +30,7 @@ import org.apache.hadoop.io.Text
 
 import org.apache.spark.SparkContext
 import org.apache.spark.util.Utils
+import org.apache.hadoop.io.compress.{DefaultCodec, CompressionCodecFactory, GzipCodec}
 
 /**
  * Tests the correctness of
@@ -40,20 +39,32 @@ import org.apache.spark.util.Utils
  */
 class WholeTextFileRecordReaderSuite extends FunSuite with BeforeAndAfterAll {
   private var sc: SparkContext = _
+  private var factory: CompressionCodecFactory = _
 
   override def beforeAll() {
     sc = new SparkContext("local", "test")
 
     // Set the block size of local file system to test whether files are split right or not.
     sc.hadoopConfiguration.setLong("fs.local.block.size", 32)
+    sc.hadoopConfiguration.set("io.compression.codecs",
+      "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec")
+    factory = new CompressionCodecFactory(sc.hadoopConfiguration)
   }
 
   override def afterAll() {
     sc.stop()
   }
 
-  private def createNativeFile(inputDir: File, fileName: String, contents: Array[Byte]) = {
-    val out = new DataOutputStream(new FileOutputStream(s"${inputDir.toString}/$fileName"))
+  private def createNativeFile(inputDir: File, fileName: String, contents: Array[Byte],
+                               compress: Boolean) = {
+    val out = if (compress) {
+      val codec = new GzipCodec
+      val path = s"${inputDir.toString}/$fileName${codec.getDefaultExtension}"
+      codec.createOutputStream(new DataOutputStream(new FileOutputStream(path)))
+    } else {
+      val path = s"${inputDir.toString}/$fileName"
+      new DataOutputStream(new FileOutputStream(path))
+    }
     out.write(contents, 0, contents.length)
     out.close()
   }
@@ -66,13 +77,11 @@ class WholeTextFileRecordReaderSuite extends FunSuite with BeforeAndAfterAll {
    *   3) Does the contents be the same.
    */
   test("Correctness of WholeTextFileRecordReader.") {
-
-    val dir = Files.createTempDir()
-    dir.deleteOnExit()
+    val dir = Utils.createTempDir()
     println(s"Local disk address is ${dir.toString}.")
 
     WholeTextFileRecordReaderSuite.files.foreach { case (filename, contents) =>
-      createNativeFile(dir, filename, contents)
+      createNativeFile(dir, filename, contents, false)
     }
 
     val res = sc.wholeTextFiles(dir.toString, 3).collect()
@@ -90,6 +99,31 @@ class WholeTextFileRecordReaderSuite extends FunSuite with BeforeAndAfterAll {
 
     Utils.deleteRecursively(dir)
   }
+
+  test("Correctness of WholeTextFileRecordReader with GzipCodec.") {
+    val dir = Utils.createTempDir()
+    println(s"Local disk address is ${dir.toString}.")
+
+    WholeTextFileRecordReaderSuite.files.foreach { case (filename, contents) =>
+      createNativeFile(dir, filename, contents, true)
+    }
+
+    val res = sc.wholeTextFiles(dir.toString, 3).collect()
+
+    assert(res.size === WholeTextFileRecordReaderSuite.fileNames.size,
+      "Number of files read out does not fit with the actual value.")
+
+    for ((filename, contents) <- res) {
+      val shortName = filename.split('/').last.split('.')(0)
+
+      assert(WholeTextFileRecordReaderSuite.fileNames.contains(shortName),
+        s"Missing file name $filename.")
+      assert(contents === new Text(WholeTextFileRecordReaderSuite.files(shortName)).toString,
+        s"file $filename contents can not match.")
+    }
+
+    Utils.deleteRecursively(dir)
+  }
 }
 
 /**
diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
index 68a0ea36aa545..25be7f25c21bb 100644
--- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
@@ -46,7 +46,19 @@ class CompressionCodecSuite extends FunSuite {
 
   test("default compression codec") {
     val codec = CompressionCodec.createCodec(conf)
-    assert(codec.getClass === classOf[LZFCompressionCodec])
+    assert(codec.getClass === classOf[SnappyCompressionCodec])
+    testCodec(codec)
+  }
+
+  test("lz4 compression codec") {
+    val codec = CompressionCodec.createCodec(conf, classOf[LZ4CompressionCodec].getName)
+    assert(codec.getClass === classOf[LZ4CompressionCodec])
+    testCodec(codec)
+  }
+
+  test("lz4 compression codec short form") {
+    val codec = CompressionCodec.createCodec(conf, "lz4")
+    assert(codec.getClass === classOf[LZ4CompressionCodec])
     testCodec(codec)
   }
 
@@ -56,9 +68,21 @@ class CompressionCodecSuite extends FunSuite {
     testCodec(codec)
   }
 
+  test("lzf compression codec short form") {
+    val codec = CompressionCodec.createCodec(conf, "lzf")
+    assert(codec.getClass === classOf[LZFCompressionCodec])
+    testCodec(codec)
+  }
+
   test("snappy compression codec") {
     val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
     assert(codec.getClass === classOf[SnappyCompressionCodec])
     testCodec(codec)
   }
+
+  test("snappy compression codec short form") {
+    val codec = CompressionCodec.createCodec(conf, "snappy")
+    assert(codec.getClass === classOf[SnappyCompressionCodec])
+    testCodec(codec)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
new file mode 100644
index 0000000000000..ca226fd4e694f
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics
+
+import java.io.{FileWriter, PrintWriter, File}
+
+import org.apache.spark.SharedSparkContext
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.scheduler.{SparkListenerTaskEnd, SparkListener}
+
+import org.scalatest.FunSuite
+import org.scalatest.matchers.ShouldMatchers
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{Path, FileSystem}
+
+import scala.collection.mutable.ArrayBuffer
+
+class InputOutputMetricsSuite extends FunSuite with SharedSparkContext with ShouldMatchers {
+  test("input metrics when reading text file with single split") {
+    val file = new File(getClass.getSimpleName + ".txt")
+    val pw = new PrintWriter(new FileWriter(file))
+    pw.println("some stuff")
+    pw.println("some other stuff")
+    pw.println("yet more stuff")
+    pw.println("too much stuff")
+    pw.close()
+    file.deleteOnExit()
+
+    val taskBytesRead = new ArrayBuffer[Long]()
+    sc.addSparkListener(new SparkListener() {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+        taskBytesRead += taskEnd.taskMetrics.inputMetrics.get.bytesRead
+      }
+    })
+    sc.textFile("file://" + file.getAbsolutePath, 2).count()
+
+    // Wait for task end events to come in
+    sc.listenerBus.waitUntilEmpty(500)
+    assert(taskBytesRead.length == 2)
+    assert(taskBytesRead.sum >= file.length())
+  }
+
+  test("input metrics when reading text file with multiple splits") {
+    val file = new File(getClass.getSimpleName + ".txt")
+    val pw = new PrintWriter(new FileWriter(file))
+    for (i <- 0 until 10000) {
+      pw.println("some stuff")
+    }
+    pw.close()
+    file.deleteOnExit()
+
+    val taskBytesRead = new ArrayBuffer[Long]()
+    sc.addSparkListener(new SparkListener() {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+        taskBytesRead += taskEnd.taskMetrics.inputMetrics.get.bytesRead
+      }
+    })
+    sc.textFile("file://" + file.getAbsolutePath, 2).count()
+
+    // Wait for task end events to come in
+    sc.listenerBus.waitUntilEmpty(500)
+    assert(taskBytesRead.length == 2)
+    assert(taskBytesRead.sum >= file.length())
+  }
+
+  test("output metrics when writing text file") {
+    val fs = FileSystem.getLocal(new Configuration())
+    val outPath = new Path(fs.getWorkingDirectory, "outdir")
+
+    if (SparkHadoopUtil.get.getFSBytesWrittenOnThreadCallback(outPath, fs.getConf).isDefined) {
+      val taskBytesWritten = new ArrayBuffer[Long]()
+      sc.addSparkListener(new SparkListener() {
+        override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+          taskBytesWritten += taskEnd.taskMetrics.outputMetrics.get.bytesWritten
+        }
+      })
+
+      val rdd = sc.parallelize(Array("a", "b", "c", "d"), 2)
+
+      try {
+        rdd.saveAsTextFile(outPath.toString)
+        sc.listenerBus.waitUntilEmpty(500)
+        assert(taskBytesWritten.length == 2)
+        val outFiles = fs.listStatus(outPath).filter(_.getPath.getName != "_SUCCESS")
+        taskBytesWritten.zip(outFiles).foreach { case (bytes, fileStatus) =>
+          assert(bytes >= fileStatus.getLen)
+        }
+      } finally {
+        fs.delete(outPath, true)
+      }
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
index 96a5a1231813e..bbdc9568a6ddb 100644
--- a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
@@ -17,42 +17,171 @@
 
 package org.apache.spark.metrics
 
-import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.scalatest.{BeforeAndAfter, FunSuite, PrivateMethodTester}
+
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.master.MasterSource
+import org.apache.spark.metrics.source.Source
+
+import com.codahale.metrics.MetricRegistry
+
+import scala.collection.mutable.ArrayBuffer
 
-class MetricsSystemSuite extends FunSuite with BeforeAndAfter {
+class MetricsSystemSuite extends FunSuite with BeforeAndAfter with PrivateMethodTester{
   var filePath: String = _
   var conf: SparkConf = null
   var securityMgr: SecurityManager = null
 
   before {
-    filePath = getClass.getClassLoader.getResource("test_metrics_system.properties").getFile()
+    filePath = getClass.getClassLoader.getResource("test_metrics_system.properties").getFile
     conf = new SparkConf(false).set("spark.metrics.conf", filePath)
     securityMgr = new SecurityManager(conf)
   }
 
   test("MetricsSystem with default config") {
     val metricsSystem = MetricsSystem.createMetricsSystem("default", conf, securityMgr)
-    val sources = metricsSystem.sources
-    val sinks = metricsSystem.sinks
+    metricsSystem.start()
+    val sources = PrivateMethod[ArrayBuffer[Source]]('sources)
+    val sinks = PrivateMethod[ArrayBuffer[Source]]('sinks)
 
-    assert(sources.length === 0)
-    assert(sinks.length === 0)
-    assert(!metricsSystem.getServletHandlers.isEmpty)
+    assert(metricsSystem.invokePrivate(sources()).length === 0)
+    assert(metricsSystem.invokePrivate(sinks()).length === 0)
+    assert(metricsSystem.getServletHandlers.nonEmpty)
   }
 
   test("MetricsSystem with sources add") {
     val metricsSystem = MetricsSystem.createMetricsSystem("test", conf, securityMgr)
-    val sources = metricsSystem.sources
-    val sinks = metricsSystem.sinks
+    metricsSystem.start()
+    val sources = PrivateMethod[ArrayBuffer[Source]]('sources)
+    val sinks = PrivateMethod[ArrayBuffer[Source]]('sinks)
 
-    assert(sources.length === 0)
-    assert(sinks.length === 1)
-    assert(!metricsSystem.getServletHandlers.isEmpty)
+    assert(metricsSystem.invokePrivate(sources()).length === 0)
+    assert(metricsSystem.invokePrivate(sinks()).length === 1)
+    assert(metricsSystem.getServletHandlers.nonEmpty)
 
     val source = new MasterSource(null)
     metricsSystem.registerSource(source)
-    assert(sources.length === 1)
+    assert(metricsSystem.invokePrivate(sources()).length === 1)
+  }
+
+  test("MetricsSystem with Driver instance") {
+    val source = new Source {
+      override val sourceName = "dummySource"
+      override val metricRegistry = new MetricRegistry()
+    }
+
+    val appId = "testId"
+    val executorId = "driver"
+    conf.set("spark.app.id", appId)
+    conf.set("spark.executor.id", executorId)
+
+    val instanceName = "driver"
+    val driverMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
+
+    val metricName = driverMetricsSystem.buildRegistryName(source)
+    assert(metricName === s"$appId.$executorId.${source.sourceName}")
+  }
+
+  test("MetricsSystem with Driver instance and spark.app.id is not set") {
+    val source = new Source {
+      override val sourceName = "dummySource"
+      override val metricRegistry = new MetricRegistry()
+    }
+
+    val executorId = "driver"
+    conf.set("spark.executor.id", executorId)
+
+    val instanceName = "driver"
+    val driverMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
+
+    val metricName = driverMetricsSystem.buildRegistryName(source)
+    assert(metricName === source.sourceName)
+  }
+
+  test("MetricsSystem with Driver instance and spark.executor.id is not set") {
+    val source = new Source {
+      override val sourceName = "dummySource"
+      override val metricRegistry = new MetricRegistry()
+    }
+
+    val appId = "testId"
+    conf.set("spark.app.id", appId)
+
+    val instanceName = "driver"
+    val driverMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
+
+    val metricName = driverMetricsSystem.buildRegistryName(source)
+    assert(metricName === source.sourceName)
+  }
+
+  test("MetricsSystem with Executor instance") {
+    val source = new Source {
+      override val sourceName = "dummySource"
+      override val metricRegistry = new MetricRegistry()
+    }
+
+    val appId = "testId"
+    val executorId = "1"
+    conf.set("spark.app.id", appId)
+    conf.set("spark.executor.id", executorId)
+
+    val instanceName = "executor"
+    val driverMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
+
+    val metricName = driverMetricsSystem.buildRegistryName(source)
+    assert(metricName === s"$appId.$executorId.${source.sourceName}")
+  }
+
+  test("MetricsSystem with Executor instance and spark.app.id is not set") {
+    val source = new Source {
+      override val sourceName = "dummySource"
+      override val metricRegistry = new MetricRegistry()
+    }
+
+    val executorId = "1"
+    conf.set("spark.executor.id", executorId)
+
+    val instanceName = "executor"
+    val driverMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
+
+    val metricName = driverMetricsSystem.buildRegistryName(source)
+    assert(metricName === source.sourceName)
+  }
+
+  test("MetricsSystem with Executor instance and spark.executor.id is not set") {
+    val source = new Source {
+      override val sourceName = "dummySource"
+      override val metricRegistry = new MetricRegistry()
+    }
+
+    val appId = "testId"
+    conf.set("spark.app.id", appId)
+
+    val instanceName = "executor"
+    val driverMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
+
+    val metricName = driverMetricsSystem.buildRegistryName(source)
+    assert(metricName === source.sourceName)
+  }
+
+  test("MetricsSystem with instance which is neither Driver nor Executor") {
+    val source = new Source {
+      override val sourceName = "dummySource"
+      override val metricRegistry = new MetricRegistry()
+    }
+
+    val appId = "testId"
+    val executorId = "dummyExecutorId"
+    conf.set("spark.app.id", appId)
+    conf.set("spark.executor.id", executorId)
+
+    val instanceName = "testInstance"
+    val driverMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
+
+    val metricName = driverMetricsSystem.buildRegistryName(source)
+
+    // Even if spark.app.id and spark.executor.id are set, they are not used for the metric name.
+    assert(metricName != s"$appId.$executorId.${source.sourceName}")
+    assert(metricName === source.sourceName)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
new file mode 100644
index 0000000000000..94bfa67451892
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import java.nio._
+import java.util.concurrent.TimeUnit
+
+import scala.concurrent.duration._
+import scala.concurrent.{Await, Promise}
+import scala.util.{Failure, Success, Try}
+
+import org.apache.commons.io.IOUtils
+import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.shuffle.BlockFetchingListener
+import org.apache.spark.network.{BlockDataManager, BlockTransferService}
+import org.apache.spark.storage.{BlockId, ShuffleBlockId}
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.mockito.Mockito._
+import org.scalatest.mock.MockitoSugar
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite, ShouldMatchers}
+
+class NettyBlockTransferSecuritySuite extends FunSuite with MockitoSugar with ShouldMatchers {
+  test("security default off") {
+    val conf = new SparkConf()
+      .set("spark.app.id", "app-id")
+    testConnection(conf, conf) match {
+      case Success(_) => // expected
+      case Failure(t) => fail(t)
+    }
+  }
+
+  test("security on same password") {
+    val conf = new SparkConf()
+      .set("spark.authenticate", "true")
+      .set("spark.authenticate.secret", "good")
+      .set("spark.app.id", "app-id")
+    testConnection(conf, conf) match {
+      case Success(_) => // expected
+      case Failure(t) => fail(t)
+    }
+  }
+
+  test("security on mismatch password") {
+    val conf0 = new SparkConf()
+      .set("spark.authenticate", "true")
+      .set("spark.authenticate.secret", "good")
+      .set("spark.app.id", "app-id")
+    val conf1 = conf0.clone.set("spark.authenticate.secret", "bad")
+    testConnection(conf0, conf1) match {
+      case Success(_) => fail("Should have failed")
+      case Failure(t) => t.getMessage should include ("Mismatched response")
+    }
+  }
+
+  test("security mismatch auth off on server") {
+    val conf0 = new SparkConf()
+      .set("spark.authenticate", "true")
+      .set("spark.authenticate.secret", "good")
+      .set("spark.app.id", "app-id")
+    val conf1 = conf0.clone.set("spark.authenticate", "false")
+    testConnection(conf0, conf1) match {
+      case Success(_) => fail("Should have failed")
+      case Failure(t) => // any funny error may occur, sever will interpret SASL token as RPC
+    }
+  }
+
+  test("security mismatch auth off on client") {
+    val conf0 = new SparkConf()
+      .set("spark.authenticate", "false")
+      .set("spark.authenticate.secret", "good")
+      .set("spark.app.id", "app-id")
+    val conf1 = conf0.clone.set("spark.authenticate", "true")
+    testConnection(conf0, conf1) match {
+      case Success(_) => fail("Should have failed")
+      case Failure(t) => t.getMessage should include ("Expected SaslMessage")
+    }
+  }
+
+  /**
+   * Creates two servers with different configurations and sees if they can talk.
+   * Returns Success() if they can transfer a block, and Failure() if the block transfer was failed
+   * properly. We will throw an out-of-band exception if something other than that goes wrong.
+   */
+  private def testConnection(conf0: SparkConf, conf1: SparkConf): Try[Unit] = {
+    val blockManager = mock[BlockDataManager]
+    val blockId = ShuffleBlockId(0, 1, 2)
+    val blockString = "Hello, world!"
+    val blockBuffer = new NioManagedBuffer(ByteBuffer.wrap(blockString.getBytes))
+    when(blockManager.getBlockData(blockId)).thenReturn(blockBuffer)
+
+    val securityManager0 = new SecurityManager(conf0)
+    val exec0 = new NettyBlockTransferService(conf0, securityManager0, numCores = 1)
+    exec0.init(blockManager)
+
+    val securityManager1 = new SecurityManager(conf1)
+    val exec1 = new NettyBlockTransferService(conf1, securityManager1, numCores = 1)
+    exec1.init(blockManager)
+
+    val result = fetchBlock(exec0, exec1, "1", blockId) match {
+      case Success(buf) =>
+        IOUtils.toString(buf.createInputStream()) should equal(blockString)
+        buf.release()
+        Success()
+      case Failure(t) =>
+        Failure(t)
+    }
+    exec0.close()
+    exec1.close()
+    result
+  }
+
+  /** Synchronously fetches a single block, acting as the given executor fetching from another. */
+  private def fetchBlock(
+      self: BlockTransferService,
+      from: BlockTransferService,
+      execId: String,
+      blockId: BlockId): Try[ManagedBuffer] = {
+
+    val promise = Promise[ManagedBuffer]()
+
+    self.fetchBlocks(from.hostName, from.port, execId, Array(blockId.toString),
+      new BlockFetchingListener {
+        override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = {
+          promise.failure(exception)
+        }
+
+        override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
+          promise.success(data.retain())
+        }
+      })
+
+    Await.ready(promise.future, FiniteDuration(1000, TimeUnit.MILLISECONDS))
+    promise.future.value.get
+  }
+}
+
diff --git a/core/src/test/scala/org/apache/spark/network/nio/ConnectionManagerSuite.scala b/core/src/test/scala/org/apache/spark/network/nio/ConnectionManagerSuite.scala
new file mode 100644
index 0000000000000..716f875d30b8a
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/nio/ConnectionManagerSuite.scala
@@ -0,0 +1,298 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.nio
+
+import java.io.IOException
+import java.nio._
+
+import scala.concurrent.duration._
+import scala.concurrent.{Await, TimeoutException}
+import scala.language.postfixOps
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.util.Utils
+
+/**
+  * Test the ConnectionManager with various security settings.
+  */
+class ConnectionManagerSuite extends FunSuite {
+
+  test("security default off") {
+    val conf = new SparkConf
+    val securityManager = new SecurityManager(conf)
+    val manager = new ConnectionManager(0, conf, securityManager)
+    var receivedMessage = false
+    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      receivedMessage = true
+      None
+    })
+
+    val size = 10 * 1024 * 1024
+    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
+    buffer.flip
+
+    val bufferMessage = Message.createBufferMessage(buffer.duplicate)
+    Await.result(manager.sendMessageReliably(manager.id, bufferMessage), 10 seconds)
+
+    assert(receivedMessage == true)
+
+    manager.stop()
+  }
+
+  test("security on same password") {
+    val conf = new SparkConf
+    conf.set("spark.authenticate", "true")
+    conf.set("spark.authenticate.secret", "good")
+    conf.set("spark.app.id", "app-id")
+    val securityManager = new SecurityManager(conf)
+    val manager = new ConnectionManager(0, conf, securityManager)
+    var numReceivedMessages = 0
+
+    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      numReceivedMessages += 1
+      None
+    })
+    val managerServer = new ConnectionManager(0, conf, securityManager)
+    var numReceivedServerMessages = 0
+    managerServer.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      numReceivedServerMessages += 1
+      None
+    })
+
+    val size = 10 * 1024 * 1024
+    val count = 10
+    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
+    buffer.flip
+
+    (0 until count).map(i => {
+      val bufferMessage = Message.createBufferMessage(buffer.duplicate)
+      Await.result(manager.sendMessageReliably(managerServer.id, bufferMessage), 10 seconds)
+    })
+
+    assert(numReceivedServerMessages == 10)
+    assert(numReceivedMessages == 0)
+
+    manager.stop()
+    managerServer.stop()
+  }
+
+  test("security mismatch password") {
+    val conf = new SparkConf
+    conf.set("spark.authenticate", "true")
+    conf.set("spark.app.id", "app-id")
+    conf.set("spark.authenticate.secret", "good")
+    val securityManager = new SecurityManager(conf)
+    val manager = new ConnectionManager(0, conf, securityManager)
+    var numReceivedMessages = 0
+
+    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      numReceivedMessages += 1
+      None
+    })
+
+    val badconf = conf.clone.set("spark.authenticate.secret", "bad")
+    val badsecurityManager = new SecurityManager(badconf)
+    val managerServer = new ConnectionManager(0, badconf, badsecurityManager)
+    var numReceivedServerMessages = 0
+
+    managerServer.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      numReceivedServerMessages += 1
+      None
+    })
+
+    val size = 10 * 1024 * 1024
+    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
+    buffer.flip
+    val bufferMessage = Message.createBufferMessage(buffer.duplicate)
+    // Expect managerServer to close connection, which we'll report as an error:
+    intercept[IOException] {
+      Await.result(manager.sendMessageReliably(managerServer.id, bufferMessage), 10 seconds)
+    }
+
+    assert(numReceivedServerMessages == 0)
+    assert(numReceivedMessages == 0)
+
+    manager.stop()
+    managerServer.stop()
+  }
+
+  test("security mismatch auth off") {
+    val conf = new SparkConf
+    conf.set("spark.authenticate", "false")
+    conf.set("spark.authenticate.secret", "good")
+    val securityManager = new SecurityManager(conf)
+    val manager = new ConnectionManager(0, conf, securityManager)
+    var numReceivedMessages = 0
+
+    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      numReceivedMessages += 1
+      None
+    })
+
+    val badconf = new SparkConf
+    badconf.set("spark.authenticate", "true")
+    badconf.set("spark.authenticate.secret", "good")
+    val badsecurityManager = new SecurityManager(badconf)
+    val managerServer = new ConnectionManager(0, badconf, badsecurityManager)
+    var numReceivedServerMessages = 0
+    managerServer.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      numReceivedServerMessages += 1
+      None
+    })
+
+    val size = 10 * 1024 * 1024
+    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
+    buffer.flip
+    val bufferMessage = Message.createBufferMessage(buffer.duplicate)
+    (0 until 1).map(i => {
+      val bufferMessage = Message.createBufferMessage(buffer.duplicate)
+      manager.sendMessageReliably(managerServer.id, bufferMessage)
+    }).foreach(f => {
+      try {
+        val g = Await.result(f, 1 second)
+        assert(false)
+      } catch {
+        case i: IOException =>
+          assert(true)
+        case e: TimeoutException => {
+          // we should timeout here since the client can't do the negotiation
+          assert(true)
+        }
+      }
+    })
+
+    assert(numReceivedServerMessages == 0)
+    assert(numReceivedMessages == 0)
+    manager.stop()
+    managerServer.stop()
+  }
+
+  test("security auth off") {
+    val conf = new SparkConf
+    conf.set("spark.authenticate", "false")
+    val securityManager = new SecurityManager(conf)
+    val manager = new ConnectionManager(0, conf, securityManager)
+    var numReceivedMessages = 0
+
+    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      numReceivedMessages += 1
+      None
+    })
+
+    val badconf = new SparkConf
+    badconf.set("spark.authenticate", "false")
+    val badsecurityManager = new SecurityManager(badconf)
+    val managerServer = new ConnectionManager(0, badconf, badsecurityManager)
+    var numReceivedServerMessages = 0
+
+    managerServer.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      numReceivedServerMessages += 1
+      None
+    })
+
+    val size = 10 * 1024 * 1024
+    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
+    buffer.flip
+    val bufferMessage = Message.createBufferMessage(buffer.duplicate)
+    (0 until 10).map(i => {
+      val bufferMessage = Message.createBufferMessage(buffer.duplicate)
+      manager.sendMessageReliably(managerServer.id, bufferMessage)
+    }).foreach(f => {
+      try {
+        val g = Await.result(f, 1 second)
+      } catch {
+        case e: Exception => {
+          assert(false)
+        }
+      }
+    })
+    assert(numReceivedServerMessages == 10)
+    assert(numReceivedMessages == 0)
+
+    manager.stop()
+    managerServer.stop()
+  }
+
+  test("Ack error message") {
+    val conf = new SparkConf
+    conf.set("spark.authenticate", "false")
+    val securityManager = new SecurityManager(conf)
+    val manager = new ConnectionManager(0, conf, securityManager)
+    val managerServer = new ConnectionManager(0, conf, securityManager)
+    managerServer.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      throw new Exception("Custom exception text")
+    })
+
+    val size = 10 * 1024 * 1024
+    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
+    buffer.flip
+    val bufferMessage = Message.createBufferMessage(buffer)
+
+    val future = manager.sendMessageReliably(managerServer.id, bufferMessage)
+
+    val exception = intercept[IOException] {
+      Await.result(future, 1 second)
+    }
+    assert(Utils.exceptionString(exception).contains("Custom exception text"))
+
+    manager.stop()
+    managerServer.stop()
+
+  }
+
+  test("sendMessageReliably timeout") {
+    val clientConf = new SparkConf
+    clientConf.set("spark.authenticate", "false")
+    val ackTimeout = 30
+    clientConf.set("spark.core.connection.ack.wait.timeout", s"${ackTimeout}")
+
+    val clientSecurityManager = new SecurityManager(clientConf)
+    val manager = new ConnectionManager(0, clientConf, clientSecurityManager)
+
+    val serverConf = new SparkConf
+    serverConf.set("spark.authenticate", "false")
+    val serverSecurityManager = new SecurityManager(serverConf)
+    val managerServer = new ConnectionManager(0, serverConf, serverSecurityManager)
+    managerServer.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
+      // sleep 60 sec > ack timeout for simulating server slow down or hang up
+      Thread.sleep(ackTimeout * 3 * 1000)
+      None
+    })
+
+    val size = 10 * 1024 * 1024
+    val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
+    buffer.flip
+    val bufferMessage = Message.createBufferMessage(buffer.duplicate)
+
+    val future = manager.sendMessageReliably(managerServer.id, bufferMessage)
+
+    // Future should throw IOException in 30 sec.
+    // Otherwise TimeoutExcepton is thrown from Await.result.
+    // We expect TimeoutException is not thrown.
+    intercept[IOException] {
+      Await.result(future, (ackTimeout * 2) second)
+    }
+
+    manager.stop()
+    managerServer.stop()
+  }
+
+}
+
diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
index 28197657e9bad..3b833f2e41867 100644
--- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -22,7 +22,6 @@ import java.util.concurrent.Semaphore
 import scala.concurrent.{Await, TimeoutException}
 import scala.concurrent.duration.Duration
 import scala.concurrent.ExecutionContext.Implicits.global
-import scala.language.postfixOps
 
 import org.scalatest.{BeforeAndAfterAll, FunSuite}
 import org.scalatest.concurrent.Timeouts
diff --git a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
index a822bd18bfdbd..f89bdb6e07dea 100644
--- a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
@@ -245,6 +245,29 @@ class DoubleRDDSuite extends FunSuite with SharedSparkContext {
     assert(histogramBuckets === expectedHistogramBuckets)
   }
 
+  test("WorksWithoutBucketsForLargerDatasets") {
+    // Verify the case of slighly larger datasets
+    val rdd = sc.parallelize(6 to 99)
+    val (histogramBuckets, histogramResults) = rdd.histogram(8)
+    val expectedHistogramResults =
+      Array(12, 12, 11, 12, 12, 11, 12, 12)
+    val expectedHistogramBuckets =
+      Array(6.0, 17.625, 29.25, 40.875, 52.5, 64.125, 75.75, 87.375, 99.0)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramBuckets === expectedHistogramBuckets)
+  }
+
+  test("WorksWithoutBucketsWithIrrationalBucketEdges") {
+    // Verify the case of buckets with irrational edges. See #SPARK-2862.
+    val rdd = sc.parallelize(6 to 99)
+    val (histogramBuckets, histogramResults) = rdd.histogram(9)
+    val expectedHistogramResults =
+      Array(11, 10, 11, 10, 10, 11, 10, 10, 11)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramBuckets(0) === 6.0)
+    assert(histogramBuckets(9) === 99.0)
+  }
+
   // Test the failure mode with an invalid RDD
   test("ThrowsExceptionOnInvalidRDDs") {
     // infinity
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 447e38ec9dbd0..3620e251cc139 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -17,17 +17,22 @@
 
 package org.apache.spark.rdd
 
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashSet
-import scala.util.Random
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.mapred._
+import org.apache.hadoop.util.Progressable
 
-import org.scalatest.FunSuite
-import com.google.common.io.Files
-import org.apache.hadoop.mapreduce._
-import org.apache.hadoop.conf.{Configuration, Configurable}
+import scala.collection.mutable.{ArrayBuffer, HashSet}
+import scala.util.Random
 
-import org.apache.spark.SparkContext._
+import org.apache.hadoop.conf.{Configurable, Configuration}
+import org.apache.hadoop.mapreduce.{JobContext => NewJobContext, OutputCommitter => NewOutputCommitter,
+OutputFormat => NewOutputFormat, RecordWriter => NewRecordWriter,
+TaskAttemptContext => NewTaskAttempContext}
 import org.apache.spark.{Partitioner, SharedSparkContext}
+import org.apache.spark.SparkContext._
+import org.apache.spark.util.Utils
+
+import org.scalatest.FunSuite
 
 class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
   test("aggregateByKey") {
@@ -83,6 +88,85 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
     assert(valuesFor2.toList.sorted === List(1))
   }
 
+  test("sampleByKey") {
+
+    val defaultSeed = 1L
+
+    // vary RDD size
+    for (n <- List(100, 1000, 1000000)) {
+      val data = sc.parallelize(1 to n, 2)
+      val fractionPositive = 0.3
+      val stratifiedData = data.keyBy(StratifiedAuxiliary.stratifier(fractionPositive))
+      val samplingRate = 0.1
+      StratifiedAuxiliary.testSample(stratifiedData, samplingRate, defaultSeed, n)
+    }
+
+    // vary fractionPositive
+    for (fractionPositive <- List(0.1, 0.3, 0.5, 0.7, 0.9)) {
+      val n = 100
+      val data = sc.parallelize(1 to n, 2)
+      val stratifiedData = data.keyBy(StratifiedAuxiliary.stratifier(fractionPositive))
+      val samplingRate = 0.1
+      StratifiedAuxiliary.testSample(stratifiedData, samplingRate, defaultSeed, n)
+    }
+
+    // Use the same data for the rest of the tests
+    val fractionPositive = 0.3
+    val n = 100
+    val data = sc.parallelize(1 to n, 2)
+    val stratifiedData = data.keyBy(StratifiedAuxiliary.stratifier(fractionPositive))
+
+    // vary seed
+    for (seed <- defaultSeed to defaultSeed + 5L) {
+      val samplingRate = 0.1
+      StratifiedAuxiliary.testSample(stratifiedData, samplingRate, seed, n)
+    }
+
+    // vary sampling rate
+    for (samplingRate <- List(0.01, 0.05, 0.1, 0.5)) {
+      StratifiedAuxiliary.testSample(stratifiedData, samplingRate, defaultSeed, n)
+    }
+  }
+
+  test("sampleByKeyExact") {
+    val defaultSeed = 1L
+
+    // vary RDD size
+    for (n <- List(100, 1000, 1000000)) {
+      val data = sc.parallelize(1 to n, 2)
+      val fractionPositive = 0.3
+      val stratifiedData = data.keyBy(StratifiedAuxiliary.stratifier(fractionPositive))
+      val samplingRate = 0.1
+      StratifiedAuxiliary.testSampleExact(stratifiedData, samplingRate, defaultSeed, n)
+    }
+
+    // vary fractionPositive
+    for (fractionPositive <- List(0.1, 0.3, 0.5, 0.7, 0.9)) {
+      val n = 100
+      val data = sc.parallelize(1 to n, 2)
+      val stratifiedData = data.keyBy(StratifiedAuxiliary.stratifier(fractionPositive))
+      val samplingRate = 0.1
+      StratifiedAuxiliary.testSampleExact(stratifiedData, samplingRate, defaultSeed, n)
+    }
+
+    // Use the same data for the rest of the tests
+    val fractionPositive = 0.3
+    val n = 100
+    val data = sc.parallelize(1 to n, 2)
+    val stratifiedData = data.keyBy(StratifiedAuxiliary.stratifier(fractionPositive))
+
+    // vary seed
+    for (seed <- defaultSeed to defaultSeed + 5L) {
+      val samplingRate = 0.1
+      StratifiedAuxiliary.testSampleExact(stratifiedData, samplingRate, seed, n)
+    }
+
+    // vary sampling rate
+    for (samplingRate <- List(0.01, 0.05, 0.1, 0.5)) {
+      StratifiedAuxiliary.testSampleExact(stratifiedData, samplingRate, defaultSeed, n)
+    }
+  }
+
   test("reduceByKey") {
     val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
     val sums = pairs.reduceByKey(_+_).collect()
@@ -215,6 +299,21 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
     ))
   }
 
+  test("fullOuterJoin") {
+    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
+    val joined = rdd1.fullOuterJoin(rdd2).collect()
+    assert(joined.size === 6)
+    assert(joined.toSet === Set(
+      (1, (Some(1), Some('x'))),
+      (1, (Some(2), Some('x'))),
+      (2, (Some(1), Some('y'))),
+      (2, (Some(1), Some('z'))),
+      (3, (Some(1), None)),
+      (4, (None, Some('w')))
+    ))
+  }
+
   test("join with no matches") {
     val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.parallelize(Array((4, 'x'), (5, 'y'), (5, 'z'), (6, 'w')))
@@ -283,14 +382,16 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
   }
 
   test("zero-partition RDD") {
-    val emptyDir = Files.createTempDir()
-    emptyDir.deleteOnExit()
-    val file = sc.textFile(emptyDir.getAbsolutePath)
-    assert(file.partitions.size == 0)
-    assert(file.collect().toList === Nil)
-    // Test that a shuffle on the file works, because this used to be a bug
-    assert(file.map(line => (line, 1)).reduceByKey(_ + _).collect().toList === Nil)
-    emptyDir.delete()
+    val emptyDir = Utils.createTempDir()
+    try {
+      val file = sc.textFile(emptyDir.getAbsolutePath)
+      assert(file.partitions.isEmpty)
+      assert(file.collect().toList === Nil)
+      // Test that a shuffle on the file works, because this used to be a bug
+      assert(file.map(line => (line, 1)).reduceByKey(_ + _).collect().toList === Nil)
+    } finally {
+      Utils.deleteRecursively(emptyDir)
+    }
   }
 
   test("keys and values") {
@@ -388,7 +489,7 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
     val pairs = sc.parallelize(Array((new Integer(1), new Integer(1))))
 
     // No error, non-configurable formats still work
-    pairs.saveAsNewAPIHadoopFile[FakeFormat]("ignored")
+    pairs.saveAsNewAPIHadoopFile[NewFakeFormat]("ignored")
 
     /*
       Check that configurable formats get configured:
@@ -399,6 +500,17 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
     pairs.saveAsNewAPIHadoopFile[ConfigTestFormat]("ignored")
   }
 
+  test("saveAsHadoopFile should respect configured output committers") {
+    val pairs = sc.parallelize(Array((new Integer(1), new Integer(1))))
+    val conf = new JobConf()
+    conf.setOutputCommitter(classOf[FakeOutputCommitter])
+
+    FakeOutputCommitter.ran = false
+    pairs.saveAsHadoopFile("ignored", pairs.keyClass, pairs.valueClass, classOf[FakeOutputFormat], conf)
+
+    assert(FakeOutputCommitter.ran, "OutputCommitter was never called")
+  }
+
   test("lookup") {
     val pairs = sc.parallelize(Array((1,2), (3,4), (5,6), (5,7)))
 
@@ -440,6 +552,98 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
     intercept[IllegalArgumentException] {shuffled.lookup(-1)}
   }
 
+  private object StratifiedAuxiliary {
+    def stratifier (fractionPositive: Double) = {
+      (x: Int) => if (x % 10 < (10 * fractionPositive).toInt) "1" else "0"
+    }
+
+    def checkSize(exact: Boolean,
+        withReplacement: Boolean,
+        expected: Long,
+        actual: Long,
+        p: Double): Boolean = {
+      if (exact) {
+        return expected == actual
+      }
+      val stdev = if (withReplacement) math.sqrt(expected) else math.sqrt(expected * p * (1 - p))
+      // Very forgiving margin since we're dealing with very small sample sizes most of the time
+      math.abs(actual - expected) <= 6 * stdev
+    }
+
+    def testSampleExact(stratifiedData: RDD[(String, Int)],
+        samplingRate: Double,
+        seed: Long,
+        n: Long) = {
+      testBernoulli(stratifiedData, true, samplingRate, seed, n)
+      testPoisson(stratifiedData, true, samplingRate, seed, n)
+    }
+
+    def testSample(stratifiedData: RDD[(String, Int)],
+        samplingRate: Double,
+        seed: Long,
+        n: Long) = {
+      testBernoulli(stratifiedData, false, samplingRate, seed, n)
+      testPoisson(stratifiedData, false, samplingRate, seed, n)
+    }
+
+    // Without replacement validation
+    def testBernoulli(stratifiedData: RDD[(String, Int)],
+        exact: Boolean,
+        samplingRate: Double,
+        seed: Long,
+        n: Long) = {
+      val expectedSampleSize = stratifiedData.countByKey()
+        .mapValues(count => math.ceil(count * samplingRate).toInt)
+      val fractions = Map("1" -> samplingRate, "0" -> samplingRate)
+      val sample = if (exact) {
+        stratifiedData.sampleByKeyExact(false, fractions, seed)
+      } else {
+        stratifiedData.sampleByKey(false, fractions, seed)
+      }
+      val sampleCounts = sample.countByKey()
+      val takeSample = sample.collect()
+      sampleCounts.foreach { case(k, v) =>
+        assert(checkSize(exact, false, expectedSampleSize(k), v, samplingRate)) }
+      assert(takeSample.size === takeSample.toSet.size)
+      takeSample.foreach { x => assert(1 <= x._2 && x._2 <= n, s"elements not in [1, $n]") }
+    }
+
+    // With replacement validation
+    def testPoisson(stratifiedData: RDD[(String, Int)],
+        exact: Boolean,
+        samplingRate: Double,
+        seed: Long,
+        n: Long) = {
+      val expectedSampleSize = stratifiedData.countByKey().mapValues(count =>
+        math.ceil(count * samplingRate).toInt)
+      val fractions = Map("1" -> samplingRate, "0" -> samplingRate)
+      val sample = if (exact) {
+        stratifiedData.sampleByKeyExact(true, fractions, seed)
+      } else {
+        stratifiedData.sampleByKey(true, fractions, seed)
+      }
+      val sampleCounts = sample.countByKey()
+      val takeSample = sample.collect()
+      sampleCounts.foreach { case (k, v) =>
+        assert(checkSize(exact, true, expectedSampleSize(k), v, samplingRate))
+      }
+      val groupedByKey = takeSample.groupBy(_._1)
+      for ((key, v) <- groupedByKey) {
+        if (expectedSampleSize(key) >= 100 && samplingRate >= 0.1) {
+          // sample large enough for there to be repeats with high likelihood
+          assert(v.toSet.size < expectedSampleSize(key))
+        } else {
+          if (exact) {
+            assert(v.toSet.size <= expectedSampleSize(key))
+          } else {
+            assert(checkSize(false, true, expectedSampleSize(key), v.toSet.size, samplingRate))
+          }
+        }
+      }
+      takeSample.foreach(x => assert(1 <= x._2 && x._2 <= n, s"elements not in [1, $n]"))
+    }
+  }
+
 }
 
 /*
@@ -450,40 +654,86 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
   and the test will therefore throw InstantiationException when saveAsNewAPIHadoopFile
   tries to instantiate them with Class.newInstance.
  */
+
+/*
+ * Original Hadoop API
+ */
 class FakeWriter extends RecordWriter[Integer, Integer] {
+  override def write(key: Integer, value: Integer): Unit = ()
+
+  override def close(reporter: Reporter): Unit = ()
+}
 
-  def close(p1: TaskAttemptContext) = ()
+class FakeOutputCommitter() extends OutputCommitter() {
+  override def setupJob(jobContext: JobContext): Unit = ()
+
+  override def needsTaskCommit(taskContext: TaskAttemptContext): Boolean = true
+
+  override def setupTask(taskContext: TaskAttemptContext): Unit = ()
+
+  override def commitTask(taskContext: TaskAttemptContext): Unit = {
+    FakeOutputCommitter.ran = true
+    ()
+  }
+
+  override def abortTask(taskContext: TaskAttemptContext): Unit = ()
+}
+
+/*
+ * Used to communicate state between the test harness and the OutputCommitter.
+ */
+object FakeOutputCommitter {
+  var ran = false
+}
+
+class FakeOutputFormat() extends OutputFormat[Integer, Integer]() {
+  override def getRecordWriter(
+      ignored: FileSystem,
+      job: JobConf, name: String,
+      progress: Progressable): RecordWriter[Integer, Integer] = {
+    new FakeWriter()
+  }
+
+  override def checkOutputSpecs(ignored: FileSystem, job: JobConf): Unit = ()
+}
+
+/*
+ * New-style Hadoop API
+ */
+class NewFakeWriter extends NewRecordWriter[Integer, Integer] {
+
+  def close(p1: NewTaskAttempContext) = ()
 
   def write(p1: Integer, p2: Integer) = ()
 
 }
 
-class FakeCommitter extends OutputCommitter {
-  def setupJob(p1: JobContext) = ()
+class NewFakeCommitter extends NewOutputCommitter {
+  def setupJob(p1: NewJobContext) = ()
 
-  def needsTaskCommit(p1: TaskAttemptContext): Boolean = false
+  def needsTaskCommit(p1: NewTaskAttempContext): Boolean = false
 
-  def setupTask(p1: TaskAttemptContext) = ()
+  def setupTask(p1: NewTaskAttempContext) = ()
 
-  def commitTask(p1: TaskAttemptContext) = ()
+  def commitTask(p1: NewTaskAttempContext) = ()
 
-  def abortTask(p1: TaskAttemptContext) = ()
+  def abortTask(p1: NewTaskAttempContext) = ()
 }
 
-class FakeFormat() extends OutputFormat[Integer, Integer]() {
+class NewFakeFormat() extends NewOutputFormat[Integer, Integer]() {
 
-  def checkOutputSpecs(p1: JobContext)  = ()
+  def checkOutputSpecs(p1: NewJobContext)  = ()
 
-  def getRecordWriter(p1: TaskAttemptContext): RecordWriter[Integer, Integer] = {
-    new FakeWriter()
+  def getRecordWriter(p1: NewTaskAttempContext): NewRecordWriter[Integer, Integer] = {
+    new NewFakeWriter()
   }
 
-  def getOutputCommitter(p1: TaskAttemptContext): OutputCommitter = {
-    new FakeCommitter()
+  def getOutputCommitter(p1: NewTaskAttempContext): NewOutputCommitter = {
+    new NewFakeCommitter()
   }
 }
 
-class ConfigTestFormat() extends FakeFormat() with Configurable {
+class ConfigTestFormat() extends NewFakeFormat() with Configurable {
 
   var setConfCalled = false
   def setConf(p1: Configuration) = {
@@ -493,7 +743,7 @@ class ConfigTestFormat() extends FakeFormat() with Configurable {
 
   def getConf: Configuration = null
 
-  override def getRecordWriter(p1: TaskAttemptContext): RecordWriter[Integer, Integer] = {
+  override def getRecordWriter(p1: NewTaskAttempContext): NewRecordWriter[Integer, Integer] = {
     assert(setConfCalled, "setConf was never called")
     super.getRecordWriter(p1)
   }
diff --git a/core/src/test/scala/org/apache/spark/rdd/PartitionPruningRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PartitionPruningRDDSuite.scala
index 956c2b9cbd321..8408d7e785c65 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PartitionPruningRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PartitionPruningRDDSuite.scala
@@ -38,9 +38,7 @@ class PartitionPruningRDDSuite extends FunSuite with SharedSparkContext {
         Iterator()
       }
     }
-    val prunedRDD = PartitionPruningRDD.create(rdd, {
-      x => if (x == 2) true else false
-    })
+    val prunedRDD = PartitionPruningRDD.create(rdd, _ == 2)
     assert(prunedRDD.partitions.length == 1)
     val p = prunedRDD.partitions(0)
     assert(p.index == 0)
@@ -62,13 +60,10 @@ class PartitionPruningRDDSuite extends FunSuite with SharedSparkContext {
         List(split.asInstanceOf[TestPartition].testValue).iterator
       }
     }
-    val prunedRDD1 = PartitionPruningRDD.create(rdd, {
-      x => if (x == 0) true else false
-    })
+    val prunedRDD1 = PartitionPruningRDD.create(rdd, _ == 0)
 
-    val prunedRDD2 = PartitionPruningRDD.create(rdd, {
-      x => if (x == 2) true else false
-    })
+
+    val prunedRDD2 = PartitionPruningRDD.create(rdd, _ == 2)
 
     val merged = prunedRDD1 ++ prunedRDD2
     assert(merged.count() == 2)
diff --git a/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
index 5dd8de319a654..a0483886f8db3 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
@@ -43,7 +43,7 @@ class PartitionwiseSampledRDDSuite extends FunSuite with SharedSparkContext {
   test("seed distribution") {
     val rdd = sc.makeRDD(Array(1L, 2L, 3L, 4L), 2)
     val sampler = new MockSampler
-    val sample = new PartitionwiseSampledRDD[Long, Long](rdd, sampler, 0L)
+    val sample = new PartitionwiseSampledRDD[Long, Long](rdd, sampler, false, 0L)
     assert(sample.distinct().count == 2, "Seeds must be different.")
   }
 
@@ -52,7 +52,7 @@ class PartitionwiseSampledRDDSuite extends FunSuite with SharedSparkContext {
     // We want to make sure there are no concurrency issues.
     val rdd = sc.parallelize(0 until 111, 10)
     for (sampler <- Seq(new BernoulliSampler[Int](0.5), new PoissonSampler[Int](0.5))) {
-      val sampled = new PartitionwiseSampledRDD[Int, Int](rdd, sampler)
+      val sampled = new PartitionwiseSampledRDD[Int, Int](rdd, sampler, true)
       sampled.zip(sampled).count()
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/PipedRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
similarity index 95%
rename from core/src/test/scala/org/apache/spark/PipedRDDSuite.scala
rename to core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
index db56a4acdd6f5..271a90c6646bb 100644
--- a/core/src/test/scala/org/apache/spark/PipedRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
@@ -15,25 +15,21 @@
  * limitations under the License.
  */
 
-package org.apache.spark
+package org.apache.spark.rdd
 
 import java.io.File
 
-import org.scalatest.FunSuite
-
-import org.apache.spark.rdd.{HadoopRDD, PipedRDD, HadoopPartition}
-import org.apache.hadoop.mapred.{JobConf, TextInputFormat, FileSplit}
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.{LongWritable, Text}
+import org.apache.hadoop.mapred.{FileSplit, JobConf, TextInputFormat}
+import org.apache.spark._
+import org.scalatest.FunSuite
 
 import scala.collection.Map
 import scala.language.postfixOps
 import scala.sys.process._
 import scala.util.Try
 
-import org.apache.hadoop.io.{Text, LongWritable}
-
-import org.apache.spark.executor.TaskMetrics
-
 class PipedRDDSuite extends FunSuite with SharedSparkContext {
 
   test("basic pipe") {
@@ -178,7 +174,7 @@ class PipedRDDSuite extends FunSuite with SharedSparkContext {
       }
       val hadoopPart1 = generateFakeHadoopPartition()
       val pipedRdd = new PipedRDD(nums, "printenv " + varName)
-      val tContext = new TaskContext(0, 0, 0)
+      val tContext = new TaskContextImpl(0, 0, 0)
       val rddIter = pipedRdd.compute(hadoopPart1, tContext)
       val arr = rddIter.toArray
       assert(arr(0) == "/some/path")
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 0f9cbe213ea17..6d2e696dc2fc4 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.rdd
 
 import scala.collection.mutable.{ArrayBuffer, HashMap}
+import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
 import org.scalatest.FunSuite
@@ -26,6 +27,7 @@ import org.apache.spark._
 import org.apache.spark.SparkContext._
 import org.apache.spark.util.Utils
 
+import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.rdd.RDDSuiteUtils._
 
 class RDDSuite extends FunSuite with SharedSparkContext {
@@ -79,11 +81,11 @@ class RDDSuite extends FunSuite with SharedSparkContext {
 
     def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
 
-    val size = 100
-    val uniformDistro = for (i <- 1 to 100000) yield i % size
-    val simpleRdd = sc.makeRDD(uniformDistro)
-    assert(error(simpleRdd.countApproxDistinct(4, 0), size) < 0.4)
-    assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.1)
+    val size = 1000
+    val uniformDistro = for (i <- 1 to 5000) yield i % size
+    val simpleRdd = sc.makeRDD(uniformDistro, 10)
+    assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.2)
+    assert(error(simpleRdd.countApproxDistinct(12, 0), size) < 0.1)
   }
 
   test("SparkContext.union") {
@@ -121,6 +123,18 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     assert(union.partitioner === nums1.partitioner)
   }
 
+  test("UnionRDD partition serialized size should be small") {
+    val largeVariable = new Array[Byte](1000 * 1000)
+    val rdd1 = sc.parallelize(1 to 10, 2).map(i => largeVariable.length)
+    val rdd2 = sc.parallelize(1 to 10, 3)
+
+    val ser = SparkEnv.get.closureSerializer.newInstance()
+    val union = rdd1.union(rdd2)
+    // The UnionRDD itself should be large, but each individual partition should be small.
+    assert(ser.serialize(union).limit() > 2000)
+    assert(ser.serialize(union.partitions.head).limit() < 2000)
+  }
+
   test("aggregate") {
     val pairs = sc.makeRDD(Array(("a", 1), ("b", 2), ("a", 2), ("c", 5), ("a", 3)))
     type StringMap = HashMap[String, Int]
@@ -155,19 +169,13 @@ class RDDSuite extends FunSuite with SharedSparkContext {
       override def getPartitions: Array[Partition] = Array(onlySplit)
       override val getDependencies = List[Dependency[_]]()
       override def compute(split: Partition, context: TaskContext): Iterator[Int] = {
-        if (shouldFail) {
-          throw new Exception("injected failure")
-        } else {
-          Array(1, 2, 3, 4).iterator
-        }
+        throw new Exception("injected failure")
       }
     }.cache()
     val thrown = intercept[Exception]{
       rdd.collect()
     }
     assert(thrown.getMessage.contains("injected failure"))
-    shouldFail = false
-    assert(rdd.collect().toList === List(1, 2, 3, 4))
   }
 
   test("empty RDD") {
@@ -185,6 +193,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     assert(rdd.join(emptyKv).collect().size === 0)
     assert(rdd.rightOuterJoin(emptyKv).collect().size === 0)
     assert(rdd.leftOuterJoin(emptyKv).collect().size === 2)
+    assert(rdd.fullOuterJoin(emptyKv).collect().size === 2)
     assert(rdd.cogroup(emptyKv).collect().size === 2)
     assert(rdd.union(emptyKv).collect().size === 2)
   }
@@ -276,7 +285,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     // we can optionally shuffle to keep the upstream parallel
     val coalesced5 = data.coalesce(1, shuffle = true)
     val isEquals = coalesced5.dependencies.head.rdd.dependencies.head.rdd.
-      asInstanceOf[ShuffledRDD[_, _, _, _]] != null
+      asInstanceOf[ShuffledRDD[_, _, _]] != null
     assert(isEquals)
 
     // when shuffling, we can increase the number of partitions
@@ -351,6 +360,20 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     }
   }
 
+  // Test for SPARK-2412 -- ensure that the second pass of the algorithm does not throw an exception
+  test("coalesced RDDs with locality, fail first pass") {
+    val initialPartitions = 1000
+    val targetLen = 50
+    val couponCount = 2 * (math.log(targetLen)*targetLen + targetLen + 0.5).toInt // = 492
+
+    val blocks = (1 to initialPartitions).map { i =>
+      (i, List(if (i > couponCount) "m2" else "m1"))
+    }
+    val data = sc.makeRDD(blocks)
+    val coalesced = data.coalesce(targetLen)
+    assert(coalesced.partitions.length == targetLen)
+  }
+
   test("zipped RDDs") {
     val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
     val zipped = nums.zip(nums.map(_ + 1.0))
@@ -379,6 +402,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
   test("mapWith") {
     import java.util.Random
     val ones = sc.makeRDD(Array(1, 1, 1, 1, 1, 1), 2)
+    @deprecated("suppress compile time deprecation warning", "1.0.0")
     val randoms = ones.mapWith(
       (index: Int) => new Random(index + 42))
       {(t: Int, prng: Random) => prng.nextDouble * t}.collect()
@@ -397,6 +421,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
   test("flatMapWith") {
     import java.util.Random
     val ones = sc.makeRDD(Array(1, 1, 1, 1, 1, 1), 2)
+    @deprecated("suppress compile time deprecation warning", "1.0.0")
     val randoms = ones.flatMapWith(
       (index: Int) => new Random(index + 42))
       {(t: Int, prng: Random) =>
@@ -418,6 +443,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
   test("filterWith") {
     import java.util.Random
     val ints = sc.makeRDD(Array(1, 2, 3, 4, 5, 6), 2)
+    @deprecated("suppress compile time deprecation warning", "1.0.0")
     val sample = ints.filterWith(
       (index: Int) => new Random(index + 42))
       {(t: Int, prng: Random) => prng.nextInt(3) == 0}.
@@ -433,6 +459,11 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     for (i <- 0 until sample.size) assert(sample(i) === checkSample(i))
   }
 
+  test("collect large number of empty partitions") {
+    // Regression test for SPARK-4019
+    assert(sc.makeRDD(0 until 10, 1000).repartition(2001).collect().toSet === (0 until 10).toSet)
+  }
+
   test("take") {
     var nums = sc.makeRDD(Range(1, 1000), 1)
     assert(nums.take(0).size === 0)
@@ -496,6 +527,13 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     assert(sortedLowerK === Array(1, 2, 3, 4, 5))
   }
 
+  test("takeOrdered with limit 0") {
+    val nums = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+    val rdd = sc.makeRDD(nums, 2)
+    val sortedLowerK = rdd.takeOrdered(0)
+    assert(sortedLowerK.size === 0)
+  }
+
   test("takeOrdered with custom ordering") {
     val nums = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
     implicit val ord = implicitly[Ordering[Int]].reverse
@@ -506,6 +544,15 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     assert(sortedTopK === nums.sorted(ord).take(5))
   }
 
+  test("sample preserves partitioner") {
+    val partitioner = new HashPartitioner(2)
+    val rdd = sc.parallelize(Seq((0, 1), (2, 3))).partitionBy(partitioner)
+    for (withReplacement <- Seq(true, false)) {
+      val sampled = rdd.sample(withReplacement, 1.0)
+      assert(sampled.partitioner === rdd.partitioner)
+    }
+  }
+
   test("takeSample") {
     val n = 1000000
     val data = sc.parallelize(1 to n, 2)
@@ -587,6 +634,11 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     }
   }
 
+  test("sort an empty RDD") {
+    val data = sc.emptyRDD[Int]
+    assert(data.sortBy(x => x).collect() === Array.empty)
+  }
+
   test("sortByKey") {
     val data = sc.parallelize(Seq("5|50|A","4|60|C", "6|40|B"))
 
@@ -636,6 +688,20 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     assert(data.sortBy(parse, true, 2)(NameOrdering, classTag[Person]).collect() === nameOrdered)
   }
 
+  test("repartitionAndSortWithinPartitions") {
+    val data = sc.parallelize(Seq((0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)), 2)
+
+    val partitioner = new Partitioner {
+      def numPartitions: Int = 2
+      def getPartition(key: Any): Int = key.asInstanceOf[Int] % 2
+    }
+
+    val repartitioned = data.repartitionAndSortWithinPartitions(partitioner)
+    val partitions = repartitioned.glom().collect()
+    assert(partitions(0) === Seq((0, 5), (0, 8), (2, 6)))
+    assert(partitions(1) === Seq((1, 3), (3, 8), (3, 8)))
+  }
+
   test("intersection") {
     val all = sc.parallelize(1 to 10)
     val evens = sc.parallelize(2 to 10 by 2)
@@ -681,6 +747,22 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     assert(ids.length === n)
   }
 
+  test("retag with implicit ClassTag") {
+    val jsc: JavaSparkContext = new JavaSparkContext(sc)
+    val jrdd: JavaRDD[String] = jsc.parallelize(Seq("A", "B", "C").asJava)
+    jrdd.rdd.retag.collect()
+  }
+
+  test("parent method") {
+    val rdd1 = sc.parallelize(1 to 10, 2)
+    val rdd2 = rdd1.filter(_ % 2 == 0)
+    val rdd3 = rdd2.map(_ + 1)
+    val rdd4 = new UnionRDD(sc, List(rdd1, rdd2, rdd3))
+    assert(rdd4.parent(0).isInstanceOf[ParallelCollectionRDD[_]])
+    assert(rdd4.parent(1).isInstanceOf[FilteredRDD[_]])
+    assert(rdd4.parent(2).isInstanceOf[MappedRDD[_, _]])
+  }
+
   test("getNarrowAncestors") {
     val rdd1 = sc.parallelize(1 to 100, 4)
     val rdd2 = rdd1.filter(_ % 2 == 0).map(_ + 1)
@@ -705,9 +787,9 @@ class RDDSuite extends FunSuite with SharedSparkContext {
 
     // Any ancestors before the shuffle are not considered
     assert(ancestors4.size === 0)
-    assert(ancestors4.count(_.isInstanceOf[ShuffledRDD[_, _, _, _]]) === 0)
+    assert(ancestors4.count(_.isInstanceOf[ShuffledRDD[_, _, _]]) === 0)
     assert(ancestors5.size === 3)
-    assert(ancestors5.count(_.isInstanceOf[ShuffledRDD[_, _, _, _]]) === 1)
+    assert(ancestors5.count(_.isInstanceOf[ShuffledRDD[_, _, _]]) === 1)
     assert(ancestors5.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 0)
     assert(ancestors5.count(_.isInstanceOf[MappedValuesRDD[_, _, _]]) === 2)
   }
diff --git a/core/src/test/scala/org/apache/spark/ZippedPartitionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/ZippedPartitionsSuite.scala
similarity index 95%
rename from core/src/test/scala/org/apache/spark/ZippedPartitionsSuite.scala
rename to core/src/test/scala/org/apache/spark/rdd/ZippedPartitionsSuite.scala
index 4f87fd8654c4a..72596e86865b2 100644
--- a/core/src/test/scala/org/apache/spark/ZippedPartitionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/ZippedPartitionsSuite.scala
@@ -15,8 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark
+package org.apache.spark.rdd
 
+import org.apache.spark.SharedSparkContext
 import org.scalatest.FunSuite
 
 object ZippedPartitionsSuite {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 9f498d579a095..819f95634bcdc 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -17,18 +17,22 @@
 
 package org.apache.spark.scheduler
 
-import scala.collection.mutable.{HashSet, HashMap, Map}
+import scala.collection.mutable.{ArrayBuffer, HashSet, HashMap, Map}
 import scala.language.reflectiveCalls
 
 import akka.actor._
 import akka.testkit.{ImplicitSender, TestKit, TestActorRef}
 import org.scalatest.{BeforeAndAfter, FunSuiteLike}
+import org.scalatest.concurrent.Timeouts
+import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
+import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.storage.{BlockId, BlockManagerId, BlockManagerMaster}
 import org.apache.spark.util.CallSite
+import org.apache.spark.executor.TaskMetrics
 
 class BuggyDAGEventProcessActor extends Actor {
   val state = 0
@@ -37,10 +41,33 @@ class BuggyDAGEventProcessActor extends Actor {
   }
 }
 
+/**
+ * An RDD for passing to DAGScheduler. These RDDs will use the dependencies and
+ * preferredLocations (if any) that are passed to them. They are deliberately not executable
+ * so we can test that DAGScheduler does not try to execute RDDs locally.
+ */
+class MyRDD(
+    sc: SparkContext,
+    numPartitions: Int,
+    dependencies: List[Dependency[_]],
+    locations: Seq[Seq[String]] = Nil) extends RDD[(Int, Int)](sc, dependencies) with Serializable {
+  override def compute(split: Partition, context: TaskContext): Iterator[(Int, Int)] =
+    throw new RuntimeException("should not be reached")
+  override def getPartitions = (0 until numPartitions).map(i => new Partition {
+    override def index = i
+  }).toArray
+  override def getPreferredLocations(split: Partition): Seq[String] =
+    if (locations.isDefinedAt(split.index))
+      locations(split.index)
+    else
+      Nil
+  override def toString: String = "DAGSchedulerSuiteRDD " + id
+}
+
 class DAGSchedulerSuiteDummyException extends Exception
 
 class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with FunSuiteLike
-  with ImplicitSender with BeforeAndAfter with LocalSparkContext {
+  with ImplicitSender with BeforeAndAfter with LocalSparkContext with Timeouts {
 
   val conf = new SparkConf
   /** Set of TaskSets the DAGScheduler has requested executed. */
@@ -54,6 +81,8 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     override def schedulingMode: SchedulingMode = SchedulingMode.NONE
     override def start() = {}
     override def stop() = {}
+    override def executorHeartbeatReceived(execId: String, taskMetrics: Array[(Long, TaskMetrics)],
+      blockManagerId: BlockManagerId): Boolean = true
     override def submitTasks(taskSet: TaskSet) = {
       // normally done by TaskSetManager
       taskSet.tasks.foreach(_.epoch = mapOutputTracker.getEpoch)
@@ -69,10 +98,12 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   /** Length of time to wait while draining listener events. */
   val WAIT_TIMEOUT_MILLIS = 10000
   val sparkListener = new SparkListener() {
-    val successfulStages = new HashSet[Int]()
-    val failedStages = new HashSet[Int]()
+    val successfulStages = new HashSet[Int]
+    val failedStages = new ArrayBuffer[Int]
+    val stageByOrderOfExecution = new ArrayBuffer[Int]
     override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) {
       val stageInfo = stageCompleted.stageInfo
+      stageByOrderOfExecution += stageInfo.stageId
       if (stageInfo.failureReason.isEmpty) {
         successfulStages += stageInfo.stageId
       } else {
@@ -92,7 +123,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
    */
   val cacheLocations = new HashMap[(Int, Int), Seq[BlockManagerId]]
   // stub out BlockManagerMaster.getLocations to use our cacheLocations
-  val blockManagerMaster = new BlockManagerMaster(null, conf) {
+  val blockManagerMaster = new BlockManagerMaster(null, conf, true) {
       override def getLocations(blockIds: Array[BlockId]): Seq[Seq[BlockManagerId]] = {
         blockIds.map {
           _.asRDDId.map(id => (id.rddId -> id.splitIndex)).flatMap(key => cacheLocations.get(key)).
@@ -113,7 +144,9 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   before {
-    sc = new SparkContext("local", "DAGSchedulerSuite")
+    // Enable local execution for this test
+    val conf = new SparkConf().set("spark.localExecution.enabled", "true")
+    sc = new SparkContext("local", "DAGSchedulerSuite", conf)
     sparkListener.successfulStages.clear()
     sparkListener.failedStages.clear()
     failure = null
@@ -148,34 +181,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
    * Type of RDD we use for testing. Note that we should never call the real RDD compute methods.
    * This is a pair RDD type so it can always be used in ShuffleDependencies.
    */
-  type MyRDD = RDD[(Int, Int)]
-
-  /**
-   * Create an RDD for passing to DAGScheduler. These RDDs will use the dependencies and
-   * preferredLocations (if any) that are passed to them. They are deliberately not executable
-   * so we can test that DAGScheduler does not try to execute RDDs locally.
-   */
-  private def makeRdd(
-        numPartitions: Int,
-        dependencies: List[Dependency[_]],
-        locations: Seq[Seq[String]] = Nil
-      ): MyRDD = {
-    val maxPartition = numPartitions - 1
-    val newRDD = new MyRDD(sc, dependencies) {
-      override def compute(split: Partition, context: TaskContext): Iterator[(Int, Int)] =
-        throw new RuntimeException("should not be reached")
-      override def getPartitions = (0 to maxPartition).map(i => new Partition {
-        override def index = i
-      }).toArray
-      override def getPreferredLocations(split: Partition): Seq[String] =
-        if (locations.isDefinedAt(split.index))
-          locations(split.index)
-        else
-          Nil
-      override def toString: String = "DAGSchedulerSuiteRDD " + id
-    }
-    newRDD
-  }
+  type PairOfIntsRDD = RDD[(Int, Int)]
 
   /**
    * Process the supplied event as if it were the top of the DAGScheduler event queue, expecting
@@ -228,25 +234,32 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     runEvent(JobCancelled(jobId))
   }
 
+  test("[SPARK-3353] parent stage should have lower stage id") {
+    sparkListener.stageByOrderOfExecution.clear()
+    sc.parallelize(1 to 10).map(x => (x, x)).reduceByKey(_ + _, 4).count()
+    assert(sparkListener.stageByOrderOfExecution.length === 2)
+    assert(sparkListener.stageByOrderOfExecution(0) < sparkListener.stageByOrderOfExecution(1))
+  }
+
   test("zero split job") {
     var numResults = 0
     val fakeListener = new JobListener() {
       override def taskSucceeded(partition: Int, value: Any) = numResults += 1
       override def jobFailed(exception: Exception) = throw exception
     }
-    submit(makeRdd(0, Nil), Array(), listener = fakeListener)
+    submit(new MyRDD(sc, 0, Nil), Array(), listener = fakeListener)
     assert(numResults === 0)
   }
 
   test("run trivial job") {
-    submit(makeRdd(1, Nil), Array(0))
+    submit(new MyRDD(sc, 1, Nil), Array(0))
     complete(taskSets(0), List((Success, 42)))
     assert(results === Map(0 -> 42))
     assertDataStructuresEmpty
   }
 
   test("local job") {
-    val rdd = new MyRDD(sc, Nil) {
+    val rdd = new PairOfIntsRDD(sc, Nil) {
       override def compute(split: Partition, context: TaskContext): Iterator[(Int, Int)] =
         Array(42 -> 0).iterator
       override def getPartitions = Array( new Partition { override def index = 0 } )
@@ -260,7 +273,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   test("local job oom") {
-    val rdd = new MyRDD(sc, Nil) {
+    val rdd = new PairOfIntsRDD(sc, Nil) {
       override def compute(split: Partition, context: TaskContext): Iterator[(Int, Int)] =
         throw new java.lang.OutOfMemoryError("test local job oom")
       override def getPartitions = Array( new Partition { override def index = 0 } )
@@ -274,8 +287,8 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   test("run trivial job w/ dependency") {
-    val baseRdd = makeRdd(1, Nil)
-    val finalRdd = makeRdd(1, List(new OneToOneDependency(baseRdd)))
+    val baseRdd = new MyRDD(sc, 1, Nil)
+    val finalRdd = new MyRDD(sc, 1, List(new OneToOneDependency(baseRdd)))
     submit(finalRdd, Array(0))
     complete(taskSets(0), Seq((Success, 42)))
     assert(results === Map(0 -> 42))
@@ -283,8 +296,8 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   test("cache location preferences w/ dependency") {
-    val baseRdd = makeRdd(1, Nil)
-    val finalRdd = makeRdd(1, List(new OneToOneDependency(baseRdd)))
+    val baseRdd = new MyRDD(sc, 1, Nil)
+    val finalRdd = new MyRDD(sc, 1, List(new OneToOneDependency(baseRdd)))
     cacheLocations(baseRdd.id -> 0) =
       Seq(makeBlockManagerId("hostA"), makeBlockManagerId("hostB"))
     submit(finalRdd, Array(0))
@@ -295,8 +308,34 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     assertDataStructuresEmpty
   }
 
+  test("avoid exponential blowup when getting preferred locs list") {
+    // Build up a complex dependency graph with repeated zip operations, without preferred locations.
+    var rdd: RDD[_] = new MyRDD(sc, 1, Nil)
+    (1 to 30).foreach(_ => rdd = rdd.zip(rdd))
+    // getPreferredLocs runs quickly, indicating that exponential graph traversal is avoided.
+    failAfter(10 seconds) {
+      val preferredLocs = scheduler.getPreferredLocs(rdd,0)
+      // No preferred locations are returned.
+      assert(preferredLocs.length === 0)
+    }
+  }
+
+  test("unserializable task") {
+    val unserializableRdd = new MyRDD(sc, 1, Nil) {
+      class UnserializableClass
+      val unserializable = new UnserializableClass
+    }
+    submit(unserializableRdd, Array(0))
+    assert(failure.getMessage.startsWith(
+      "Job aborted due to stage failure: Task not serializable:"))
+    assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
+    assert(sparkListener.failedStages.contains(0))
+    assert(sparkListener.failedStages.size === 1)
+    assertDataStructuresEmpty
+  }
+
   test("trivial job failure") {
-    submit(makeRdd(1, Nil), Array(0))
+    submit(new MyRDD(sc, 1, Nil), Array(0))
     failed(taskSets(0), "some failure")
     assert(failure.getMessage === "Job aborted due to stage failure: some failure")
     assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
@@ -306,7 +345,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   test("trivial job cancellation") {
-    val rdd = makeRdd(1, Nil)
+    val rdd = new MyRDD(sc, 1, Nil)
     val jobId = submit(rdd, Array(0))
     cancel(jobId)
     assert(failure.getMessage === s"Job $jobId cancelled ")
@@ -332,6 +371,8 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
       }
       override def setDAGScheduler(dagScheduler: DAGScheduler) = {}
       override def defaultParallelism() = 2
+      override def executorHeartbeatReceived(execId: String, taskMetrics: Array[(Long, TaskMetrics)],
+        blockManagerId: BlockManagerId): Boolean = true
     }
     val noKillScheduler = new DAGScheduler(
       sc,
@@ -347,8 +388,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     }
     dagEventProcessTestActor = TestActorRef[DAGSchedulerEventProcessActor](
       Props(classOf[DAGSchedulerEventProcessActor], noKillScheduler))(system)
-    val rdd = makeRdd(1, Nil)
-    val jobId = submit(rdd, Array(0))
+    val jobId = submit(new MyRDD(sc, 1, Nil), Array(0))
     cancel(jobId)
     // Because the job wasn't actually cancelled, we shouldn't have received a failure message.
     assert(failure === null)
@@ -364,10 +404,10 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   test("run trivial shuffle") {
-    val shuffleMapRdd = makeRdd(2, Nil)
+    val shuffleMapRdd = new MyRDD(sc, 2, Nil)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, null)
     val shuffleId = shuffleDep.shuffleId
-    val reduceRdd = makeRdd(1, List(shuffleDep))
+    val reduceRdd = new MyRDD(sc, 1, List(shuffleDep))
     submit(reduceRdd, Array(0))
     complete(taskSets(0), Seq(
         (Success, makeMapStatus("hostA", 1)),
@@ -380,10 +420,10 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   test("run trivial shuffle with fetch failure") {
-    val shuffleMapRdd = makeRdd(2, Nil)
+    val shuffleMapRdd = new MyRDD(sc, 2, Nil)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, null)
     val shuffleId = shuffleDep.shuffleId
-    val reduceRdd = makeRdd(2, List(shuffleDep))
+    val reduceRdd = new MyRDD(sc, 2, List(shuffleDep))
     submit(reduceRdd, Array(0, 1))
     complete(taskSets(0), Seq(
         (Success, makeMapStatus("hostA", 1)),
@@ -391,7 +431,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     // the 2nd ResultTask failed
     complete(taskSets(1), Seq(
         (Success, 42),
-        (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0), null)))
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"), null)))
     // this will get called
     // blockManagerMaster.removeExecutor("exec-hostA")
     // ask the scheduler to try it again
@@ -405,11 +445,48 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     assertDataStructuresEmpty
   }
 
+  test("trivial shuffle with multiple fetch failures") {
+    val shuffleMapRdd = new MyRDD(sc, 2, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, null)
+    val shuffleId = shuffleDep.shuffleId
+    val reduceRdd = new MyRDD(sc, 2, List(shuffleDep))
+    submit(reduceRdd, Array(0, 1))
+    complete(taskSets(0), Seq(
+      (Success, makeMapStatus("hostA", 1)),
+      (Success, makeMapStatus("hostB", 1))))
+    // The MapOutputTracker should know about both map output locations.
+    assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1.host) ===
+      Array("hostA", "hostB"))
+
+    // The first result task fails, with a fetch failure for the output from the first mapper.
+    runEvent(CompletionEvent(
+      taskSets(1).tasks(0),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      null,
+      Map[Long, Any](),
+      null,
+      null))
+    assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
+    assert(sparkListener.failedStages.contains(1))
+
+    // The second ResultTask fails, with a fetch failure for the output from the second mapper.
+    runEvent(CompletionEvent(
+      taskSets(1).tasks(0),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 1, 1, "ignored"),
+      null,
+      Map[Long, Any](),
+      null,
+      null))
+    // The SparkListener should not receive redundant failure events.
+    assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
+    assert(sparkListener.failedStages.size == 1)
+  }
+
   test("ignore late map task completions") {
-    val shuffleMapRdd = makeRdd(2, Nil)
+    val shuffleMapRdd = new MyRDD(sc, 2, Nil)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, null)
     val shuffleId = shuffleDep.shuffleId
-    val reduceRdd = makeRdd(2, List(shuffleDep))
+    val reduceRdd = new MyRDD(sc, 2, List(shuffleDep))
     submit(reduceRdd, Array(0, 1))
     // pretend we were told hostA went away
     val oldEpoch = mapOutputTracker.getEpoch
@@ -435,9 +512,9 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   test("run shuffle with map stage failure") {
-    val shuffleMapRdd = makeRdd(2, Nil)
+    val shuffleMapRdd = new MyRDD(sc, 2, Nil)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, null)
-    val reduceRdd = makeRdd(2, List(shuffleDep))
+    val reduceRdd = new MyRDD(sc, 2, List(shuffleDep))
     submit(reduceRdd, Array(0, 1))
 
     // Fail the map stage.  This should cause the entire job to fail.
@@ -448,8 +525,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     // Listener bus should get told about the map stage failing, but not the reduce stage
     // (since the reduce stage hasn't been started yet).
     assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
-    assert(sparkListener.failedStages.contains(1))
-    assert(sparkListener.failedStages.size === 1)
+    assert(sparkListener.failedStages.toSet === Set(0))
 
     assertDataStructuresEmpty
   }
@@ -472,13 +548,13 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
    * without shuffleMapRdd1.
    */
   test("failure of stage used by two jobs") {
-    val shuffleMapRdd1 = makeRdd(2, Nil)
+    val shuffleMapRdd1 = new MyRDD(sc, 2, Nil)
     val shuffleDep1 = new ShuffleDependency(shuffleMapRdd1, null)
-    val shuffleMapRdd2 = makeRdd(2, Nil)
+    val shuffleMapRdd2 = new MyRDD(sc, 2, Nil)
     val shuffleDep2 = new ShuffleDependency(shuffleMapRdd2, null)
 
-    val reduceRdd1 = makeRdd(2, List(shuffleDep1))
-    val reduceRdd2 = makeRdd(2, List(shuffleDep1, shuffleDep2))
+    val reduceRdd1 = new MyRDD(sc, 2, List(shuffleDep1))
+    val reduceRdd2 = new MyRDD(sc, 2, List(shuffleDep1, shuffleDep2))
 
     // We need to make our own listeners for this test, since by default submit uses the same
     // listener for all jobs, and here we want to capture the failure for each job separately.
@@ -496,14 +572,12 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     val stageFailureMessage = "Exception failure in map stage"
     failed(taskSets(0), stageFailureMessage)
 
-    assert(cancelledStages.contains(1))
+    assert(cancelledStages.toSet === Set(0, 2))
 
     // Make sure the listeners got told about both failed stages.
     assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
     assert(sparkListener.successfulStages.isEmpty)
-    assert(sparkListener.failedStages.contains(1))
-    assert(sparkListener.failedStages.contains(3))
-    assert(sparkListener.failedStages.size === 2)
+    assert(sparkListener.failedStages.toSet === Set(0, 2))
 
     assert(listener1.failureMessage === s"Job aborted due to stage failure: $stageFailureMessage")
     assert(listener2.failureMessage === s"Job aborted due to stage failure: $stageFailureMessage")
@@ -511,10 +585,10 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   test("run trivial shuffle with out-of-band failure and retry") {
-    val shuffleMapRdd = makeRdd(2, Nil)
+    val shuffleMapRdd = new MyRDD(sc, 2, Nil)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, null)
     val shuffleId = shuffleDep.shuffleId
-    val reduceRdd = makeRdd(1, List(shuffleDep))
+    val reduceRdd = new MyRDD(sc, 1, List(shuffleDep))
     submit(reduceRdd, Array(0))
     // blockManagerMaster.removeExecutor("exec-hostA")
     // pretend we were told hostA went away
@@ -534,11 +608,11 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   test("recursive shuffle failures") {
-    val shuffleOneRdd = makeRdd(2, Nil)
+    val shuffleOneRdd = new MyRDD(sc, 2, Nil)
     val shuffleDepOne = new ShuffleDependency(shuffleOneRdd, null)
-    val shuffleTwoRdd = makeRdd(2, List(shuffleDepOne))
+    val shuffleTwoRdd = new MyRDD(sc, 2, List(shuffleDepOne))
     val shuffleDepTwo = new ShuffleDependency(shuffleTwoRdd, null)
-    val finalRdd = makeRdd(1, List(shuffleDepTwo))
+    val finalRdd = new MyRDD(sc, 1, List(shuffleDepTwo))
     submit(finalRdd, Array(0))
     // have the first stage complete normally
     complete(taskSets(0), Seq(
@@ -550,7 +624,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
         (Success, makeMapStatus("hostC", 1))))
     // fail the third stage because hostA went down
     complete(taskSets(2), Seq(
-        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)))
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0, "ignored"), null)))
     // TODO assert this:
     // blockManagerMaster.removeExecutor("exec-hostA")
     // have DAGScheduler try again
@@ -563,11 +637,11 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   test("cached post-shuffle") {
-    val shuffleOneRdd = makeRdd(2, Nil)
+    val shuffleOneRdd = new MyRDD(sc, 2, Nil)
     val shuffleDepOne = new ShuffleDependency(shuffleOneRdd, null)
-    val shuffleTwoRdd = makeRdd(2, List(shuffleDepOne))
+    val shuffleTwoRdd = new MyRDD(sc, 2, List(shuffleDepOne))
     val shuffleDepTwo = new ShuffleDependency(shuffleTwoRdd, null)
-    val finalRdd = makeRdd(1, List(shuffleDepTwo))
+    val finalRdd = new MyRDD(sc, 1, List(shuffleDepTwo))
     submit(finalRdd, Array(0))
     cacheLocations(shuffleTwoRdd.id -> 0) = Seq(makeBlockManagerId("hostD"))
     cacheLocations(shuffleTwoRdd.id -> 1) = Seq(makeBlockManagerId("hostC"))
@@ -581,7 +655,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
         (Success, makeMapStatus("hostB", 1))))
     // pretend stage 0 failed because hostA went down
     complete(taskSets(2), Seq(
-        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)))
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0, "ignored"), null)))
     // TODO assert this:
     // blockManagerMaster.removeExecutor("exec-hostA")
     // DAGScheduler should notice the cached copy of the second shuffle and try to get it rerun.
@@ -594,8 +668,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     assertDataStructuresEmpty
   }
 
-  // TODO: Fix this and un-ignore the test.
-  ignore("misbehaved accumulator should not crash DAGScheduler and SparkContext") {
+  test("misbehaved accumulator should not crash DAGScheduler and SparkContext") {
     val acc = new Accumulator[Int](0, new AccumulatorParam[Int] {
       override def addAccumulator(t1: Int, t2: Int): Int = t1 + t2
       override def zero(initialValue: Int): Int = 0
@@ -605,14 +678,10 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     })
 
     // Run this on executors
-    intercept[SparkDriverExecutionException] {
-      sc.parallelize(1 to 10, 2).foreach { item => acc.add(1) }
-    }
+    sc.parallelize(1 to 10, 2).foreach { item => acc.add(1) }
 
     // Run this within a local thread
-    intercept[SparkDriverExecutionException] {
-      sc.parallelize(1 to 10, 2).map { item => acc.add(1) }.take(1)
-    }
+    sc.parallelize(1 to 10, 2).map { item => acc.add(1) }.take(1)
 
     // Make sure we can still run local commands as well as cluster commands.
     assert(sc.parallelize(1 to 10, 2).count() === 10)
@@ -671,21 +740,17 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   }
 
   private def makeMapStatus(host: String, reduces: Int): MapStatus =
-   new MapStatus(makeBlockManagerId(host), Array.fill[Byte](reduces)(2))
+    MapStatus(makeBlockManagerId(host), Array.fill[Long](reduces)(2))
 
   private def makeBlockManagerId(host: String): BlockManagerId =
-    BlockManagerId("exec-" + host, host, 12345, 0)
+    BlockManagerId("exec-" + host, host, 12345)
 
   private def assertDataStructuresEmpty = {
-    assert(scheduler.pendingTasks.isEmpty)
     assert(scheduler.activeJobs.isEmpty)
     assert(scheduler.failedStages.isEmpty)
     assert(scheduler.jobIdToActiveJob.isEmpty)
     assert(scheduler.jobIdToStageIds.isEmpty)
-    assert(scheduler.stageIdToJobIds.isEmpty)
     assert(scheduler.stageIdToStage.isEmpty)
-    assert(scheduler.stageToInfos.isEmpty)
-    assert(scheduler.resultStageToJob.isEmpty)
     assert(scheduler.runningStages.isEmpty)
     assert(scheduler.shuffleToMapStage.isEmpty)
     assert(scheduler.waitingStages.isEmpty)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index 21e3db34b8b7a..abc300fcffaf9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -20,13 +20,14 @@ package org.apache.spark.scheduler
 import scala.collection.mutable
 import scala.io.Source
 
-import com.google.common.io.Files
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.json4s.jackson.JsonMethods._
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
 import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.io.CompressionCodec
+import org.apache.spark.SPARK_VERSION
 import org.apache.spark.util.{JsonProtocol, Utils}
 
 import java.io.File
@@ -39,7 +40,8 @@ import java.io.File
  * read and deserialized into actual SparkListenerEvents.
  */
 class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
-  private val fileSystem = Utils.getHadoopFileSystem("/")
+  private val fileSystem = Utils.getHadoopFileSystem("/",
+    SparkHadoopUtil.get.newConfiguration(new SparkConf()))
   private val allCompressionCodecs = Seq[String](
     "org.apache.spark.io.LZFCompressionCodec",
     "org.apache.spark.io.SnappyCompressionCodec"
@@ -48,8 +50,7 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
   private var logDirPath: Path = _
 
   before {
-    testDir = Files.createTempDir()
-    testDir.deleteOnExit()
+    testDir = Utils.createTempDir()
     logDirPath = Utils.getFilePath(testDir, "spark-events")
   }
 
@@ -166,7 +167,9 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
 
     // Verify logging directory exists
     val conf = getLoggingConf(logDirPath, compressionCodec)
-    val eventLogger = new EventLoggingListener("test", conf)
+    val logBaseDir = conf.get("spark.eventLog.dir")
+    val appId = EventLoggingListenerSuite.getUniqueApplicationId
+    val eventLogger = new EventLoggingListener(appId, logBaseDir, conf)
     eventLogger.start()
     val logPath = new Path(eventLogger.logDir)
     assert(fileSystem.exists(logPath))
@@ -194,7 +197,7 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
 
     def assertInfoCorrect(info: EventLoggingInfo, loggerStopped: Boolean) {
       assert(info.logPaths.size > 0)
-      assert(info.sparkVersion === SparkContext.SPARK_VERSION)
+      assert(info.sparkVersion === SPARK_VERSION)
       assert(info.compressionCodec.isDefined === compressionCodec.isDefined)
       info.compressionCodec.foreach { codec =>
         assert(compressionCodec.isDefined)
@@ -206,7 +209,9 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
 
     // Verify that all information is correctly parsed before stop()
     val conf = getLoggingConf(logDirPath, compressionCodec)
-    val eventLogger = new EventLoggingListener("test", conf)
+    val logBaseDir = conf.get("spark.eventLog.dir")
+    val appId = EventLoggingListenerSuite.getUniqueApplicationId
+    val eventLogger = new EventLoggingListener(appId, logBaseDir, conf)
     eventLogger.start()
     var eventLoggingInfo = EventLoggingListener.parseLoggingInfo(eventLogger.logDir, fileSystem)
     assertInfoCorrect(eventLoggingInfo, loggerStopped = false)
@@ -225,9 +230,12 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
    */
   private def testEventLogging(compressionCodec: Option[String] = None) {
     val conf = getLoggingConf(logDirPath, compressionCodec)
-    val eventLogger = new EventLoggingListener("test", conf)
+    val logBaseDir = conf.get("spark.eventLog.dir")
+    val appId = EventLoggingListenerSuite.getUniqueApplicationId
+    val eventLogger = new EventLoggingListener(appId, logBaseDir, conf)
     val listenerBus = new LiveListenerBus
-    val applicationStart = SparkListenerApplicationStart("Greatest App (N)ever", 125L, "Mickey")
+    val applicationStart = SparkListenerApplicationStart("Greatest App (N)ever", None,
+      125L, "Mickey")
     val applicationEnd = SparkListenerApplicationEnd(1000L)
 
     // A comprehensive test on JSON de/serialization of all events is in JsonProtocolSuite
@@ -259,7 +267,7 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
     assert(sc.eventLogger.isDefined)
     val eventLogger = sc.eventLogger.get
     val expectedLogDir = logDirPath.toString
-    assert(eventLogger.logDir.startsWith(expectedLogDir))
+    assert(eventLogger.logDir.contains(expectedLogDir))
 
     // Begin listening for events that trigger asserts
     val eventExistenceListener = new EventExistenceListener(eventLogger)
@@ -378,7 +386,7 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
   private def assertSparkVersionIsValid(logFiles: Array[FileStatus]) {
     val file = logFiles.map(_.getPath.getName).find(EventLoggingListener.isSparkVersionFile)
     assert(file.isDefined)
-    assert(EventLoggingListener.parseSparkVersion(file.get) === SparkContext.SPARK_VERSION)
+    assert(EventLoggingListener.parseSparkVersion(file.get) === SPARK_VERSION)
   }
 
   private def assertCompressionCodecIsValid(logFiles: Array[FileStatus], compressionCodec: String) {
@@ -404,4 +412,6 @@ object EventLoggingListenerSuite {
     }
     conf
   }
+
+  def getUniqueApplicationId = "test-" + System.currentTimeMillis
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
new file mode 100644
index 0000000000000..950c6dc58e332
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import org.apache.spark.storage.BlockManagerId
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkConf
+import org.apache.spark.serializer.JavaSerializer
+
+import scala.util.Random
+
+class MapStatusSuite extends FunSuite {
+
+  test("compressSize") {
+    assert(MapStatus.compressSize(0L) === 0)
+    assert(MapStatus.compressSize(1L) === 1)
+    assert(MapStatus.compressSize(2L) === 8)
+    assert(MapStatus.compressSize(10L) === 25)
+    assert((MapStatus.compressSize(1000000L) & 0xFF) === 145)
+    assert((MapStatus.compressSize(1000000000L) & 0xFF) === 218)
+    // This last size is bigger than we can encode in a byte, so check that we just return 255
+    assert((MapStatus.compressSize(1000000000000000000L) & 0xFF) === 255)
+  }
+
+  test("decompressSize") {
+    assert(MapStatus.decompressSize(0) === 0)
+    for (size <- Seq(2L, 10L, 100L, 50000L, 1000000L, 1000000000L)) {
+      val size2 = MapStatus.decompressSize(MapStatus.compressSize(size))
+      assert(size2 >= 0.99 * size && size2 <= 1.11 * size,
+        "size " + size + " decompressed to " + size2 + ", which is out of range")
+    }
+  }
+
+  test("MapStatus should never report non-empty blocks' sizes as 0") {
+    import Math._
+    for (
+      numSizes <- Seq(1, 10, 100, 1000, 10000);
+      mean <- Seq(0L, 100L, 10000L, Int.MaxValue.toLong);
+      stddev <- Seq(0.0, 0.01, 0.5, 1.0)
+    ) {
+      val sizes = Array.fill[Long](numSizes)(abs(round(Random.nextGaussian() * stddev)) + mean)
+      val status = MapStatus(BlockManagerId("a", "b", 10), sizes)
+      val status1 = compressAndDecompressMapStatus(status)
+      for (i <- 0 until numSizes) {
+        if (sizes(i) != 0) {
+          val failureMessage = s"Failed with $numSizes sizes with mean=$mean, stddev=$stddev"
+          assert(status.getSizeForBlock(i) !== 0, failureMessage)
+          assert(status1.getSizeForBlock(i) !== 0, failureMessage)
+        }
+      }
+    }
+  }
+
+  test("large tasks should use " + classOf[HighlyCompressedMapStatus].getName) {
+    val sizes = Array.fill[Long](2001)(150L)
+    val status = MapStatus(null, sizes)
+    assert(status.isInstanceOf[HighlyCompressedMapStatus])
+    assert(status.getSizeForBlock(10) === 150L)
+    assert(status.getSizeForBlock(50) === 150L)
+    assert(status.getSizeForBlock(99) === 150L)
+    assert(status.getSizeForBlock(2000) === 150L)
+  }
+
+  test("HighlyCompressedMapStatus: estimated size should be the average non-empty block size") {
+    val sizes = Array.tabulate[Long](3000) { i => i.toLong }
+    val avg = sizes.sum / sizes.filter(_ != 0).length
+    val loc = BlockManagerId("a", "b", 10)
+    val status = MapStatus(loc, sizes)
+    val status1 = compressAndDecompressMapStatus(status)
+    assert(status1.isInstanceOf[HighlyCompressedMapStatus])
+    assert(status1.location == loc)
+    for (i <- 0 until 3000) {
+      val estimate = status1.getSizeForBlock(i)
+      if (sizes(i) > 0) {
+        assert(estimate === avg)
+      }
+    }
+  }
+
+  def compressAndDecompressMapStatus(status: MapStatus): MapStatus = {
+    val ser = new JavaSerializer(new SparkConf)
+    val buf = ser.newInstance().serialize(status)
+    ser.newInstance().deserialize[MapStatus](buf)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
index d81499ac6abef..e05f373392d4a 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
@@ -19,12 +19,12 @@ package org.apache.spark.scheduler
 
 import java.io.{File, PrintWriter}
 
-import com.google.common.io.Files
 import org.json4s.jackson.JsonMethods._
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
 import org.apache.spark.SparkContext._
 import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.util.{JsonProtocol, Utils}
 
@@ -32,16 +32,13 @@ import org.apache.spark.util.{JsonProtocol, Utils}
  * Test whether ReplayListenerBus replays events from logs correctly.
  */
 class ReplayListenerSuite extends FunSuite with BeforeAndAfter {
-  private val fileSystem = Utils.getHadoopFileSystem("/")
-  private val allCompressionCodecs = Seq[String](
-    "org.apache.spark.io.LZFCompressionCodec",
-    "org.apache.spark.io.SnappyCompressionCodec"
-  )
+  private val fileSystem = Utils.getHadoopFileSystem("/",
+    SparkHadoopUtil.get.newConfiguration(new SparkConf()))
+  private val allCompressionCodecs = CompressionCodec.ALL_COMPRESSION_CODECS
   private var testDir: File = _
 
   before {
-    testDir = Files.createTempDir()
-    testDir.deleteOnExit()
+    testDir = Utils.createTempDir()
   }
 
   after {
@@ -84,7 +81,8 @@ class ReplayListenerSuite extends FunSuite with BeforeAndAfter {
     val fstream = fileSystem.create(logFilePath)
     val cstream = codec.map(_.compressedOutputStream(fstream)).getOrElse(fstream)
     val writer = new PrintWriter(cstream)
-    val applicationStart = SparkListenerApplicationStart("Greatest App (N)ever", 125L, "Mickey")
+    val applicationStart = SparkListenerApplicationStart("Greatest App (N)ever", None,
+      125L, "Mickey")
     val applicationEnd = SparkListenerApplicationEnd(1000L)
     writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
     writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationEnd))))
@@ -155,7 +153,8 @@ class ReplayListenerSuite extends FunSuite with BeforeAndAfter {
    * This child listener inherits only the event buffering functionality, but does not actually
    * log the events.
    */
-  private class EventMonster(conf: SparkConf) extends EventLoggingListener("test", conf) {
+  private class EventMonster(conf: SparkConf)
+    extends EventLoggingListener("test", "testdir", conf) {
     logger.close()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index 71f48e295ecca..abe0dc35b07e2 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -180,7 +180,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
     rdd3.count()
     assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
     listener.stageInfos.size should be {2} // Shuffle map stage + result stage
-    val stageInfo3 = listener.stageInfos.keys.find(_.stageId == 2).get
+    val stageInfo3 = listener.stageInfos.keys.find(_.stageId == 3).get
     stageInfo3.rddInfos.size should be {1} // ShuffledRDD
     stageInfo3.rddInfos.forall(_.numPartitions == 4) should be {true}
     stageInfo3.rddInfos.exists(_.name == "Trois") should be {true}
@@ -252,14 +252,15 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
         taskMetrics.resultSize should be > (0l)
         if (stageInfo.rddInfos.exists(info => info.name == d2.name || info.name == d3.name)) {
           taskMetrics.inputMetrics should not be ('defined)
+          taskMetrics.outputMetrics should not be ('defined)
           taskMetrics.shuffleWriteMetrics should be ('defined)
           taskMetrics.shuffleWriteMetrics.get.shuffleBytesWritten should be > (0l)
         }
         if (stageInfo.rddInfos.exists(_.name == d4.name)) {
           taskMetrics.shuffleReadMetrics should be ('defined)
           val sm = taskMetrics.shuffleReadMetrics.get
-          sm.totalBlocksFetched should be > (0)
-          sm.localBlocksFetched should be > (0)
+          sm.totalBlocksFetched should be (128)
+          sm.localBlocksFetched should be (128)
           sm.remoteBlocksFetched should be (0)
           sm.remoteBytesRead should be (0l)
         }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index 8bb5317cd2875..561a5e9cd90c4 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -17,34 +17,56 @@
 
 package org.apache.spark.scheduler
 
+import org.mockito.Mockito._
+import org.mockito.Matchers.any
+
 import org.scalatest.FunSuite
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.LocalSparkContext
-import org.apache.spark.Partition
-import org.apache.spark.SparkContext
-import org.apache.spark.TaskContext
+import org.apache.spark._
 import org.apache.spark.rdd.RDD
+import org.apache.spark.util.{TaskCompletionListenerException, TaskCompletionListener}
+
 
 class TaskContextSuite extends FunSuite with BeforeAndAfter with LocalSparkContext {
 
-  test("Calls executeOnCompleteCallbacks after failure") {
-    var completed = false
+  test("calls TaskCompletionListener after failure") {
+    TaskContextSuite.completed = false
     sc = new SparkContext("local", "test")
     val rdd = new RDD[String](sc, List()) {
       override def getPartitions = Array[Partition](StubPartition(0))
       override def compute(split: Partition, context: TaskContext) = {
-        context.addOnCompleteCallback(() => completed = true)
+        context.addTaskCompletionListener(context => TaskContextSuite.completed = true)
         sys.error("failed")
       }
     }
-    val func = (c: TaskContext, i: Iterator[String]) => i.next
-    val task = new ResultTask[String, String](0, rdd, func, 0, Seq(), 0)
+    val closureSerializer = SparkEnv.get.closureSerializer.newInstance()
+    val func = (c: TaskContext, i: Iterator[String]) => i.next()
+    val task = new ResultTask[String, String](
+      0, sc.broadcast(closureSerializer.serialize((rdd, func)).array), rdd.partitions(0), Seq(), 0)
     intercept[RuntimeException] {
       task.run(0)
     }
-    assert(completed === true)
+    assert(TaskContextSuite.completed === true)
   }
 
-  case class StubPartition(val index: Int) extends Partition
+  test("all TaskCompletionListeners should be called even if some fail") {
+    val context = new TaskContextImpl(0, 0, 0)
+    val listener = mock(classOf[TaskCompletionListener])
+    context.addTaskCompletionListener(_ => throw new Exception("blah"))
+    context.addTaskCompletionListener(listener)
+    context.addTaskCompletionListener(_ => throw new Exception("blah"))
+
+    intercept[TaskCompletionListenerException] {
+      context.markTaskCompleted()
+    }
+
+    verify(listener, times(1)).onTaskCompletion(any())
+  }
+}
+
+private object TaskContextSuite {
+  @volatile var completed = false
 }
+
+private case class StubPartition(index: Int) extends Partition
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
index c4e7a4bb7d385..5768a3a733f00 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
@@ -40,7 +40,7 @@ class ResultDeletingTaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedule
       // Only remove the result once, since we'd like to test the case where the task eventually
       // succeeds.
       serializer.get().deserialize[TaskResult[_]](serializedData) match {
-        case IndirectTaskResult(blockId) =>
+        case IndirectTaskResult(blockId, size) =>
           sparkEnv.blockManager.master.removeBlock(blockId)
         case directResult: DirectTaskResult[_] =>
           taskSetManager.abort("Internal error: expect only indirect results")
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 9ff2a487005c4..1809b5396d53e 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -54,6 +54,23 @@ class FakeDAGScheduler(sc: SparkContext, taskScheduler: FakeTaskScheduler)
   }
 }
 
+// Get the rack for a given host
+object FakeRackUtil {
+  private val hostToRack = new mutable.HashMap[String, String]()
+
+  def cleanUp() {
+    hostToRack.clear()
+  }
+
+  def assignHostToRack(host: String, rack: String) {
+    hostToRack(host) = rack
+  }
+
+  def getRackForHost(host: String) = {
+    hostToRack.get(host)
+  }
+}
+
 /**
  * A mock TaskSchedulerImpl implementation that just remembers information about tasks started and
  * feedback received from the TaskSetManagers. Note that it's important to initialize this with
@@ -68,11 +85,31 @@ class FakeTaskScheduler(sc: SparkContext, liveExecutors: (String, String)* /* ex
   val finishedManagers = new ArrayBuffer[TaskSetManager]
   val taskSetsFailed = new ArrayBuffer[String]
 
-  val executors = new mutable.HashMap[String, String] ++ liveExecutors
+  val executors = new mutable.HashMap[String, String]
+  for ((execId, host) <- liveExecutors) {
+    addExecutor(execId, host)
+  }
+
+  for ((execId, host) <- liveExecutors; rack <- getRackForHost(host)) {
+    hostsByRack.getOrElseUpdate(rack, new mutable.HashSet[String]()) += host
+  }
 
   dagScheduler = new FakeDAGScheduler(sc, this)
 
-  def removeExecutor(execId: String): Unit = executors -= execId
+  def removeExecutor(execId: String) {
+    executors -= execId
+    val host = executorIdToHost.get(execId)
+    assert(host != None)
+    val hostId = host.get
+    val executorsOnHost = executorsByHost(hostId)
+    executorsOnHost -= execId
+    for (rack <- getRackForHost(hostId); hosts <- hostsByRack.get(rack)) {
+      hosts -= hostId
+      if (hosts.isEmpty) {
+        hostsByRack -= rack
+      }
+    }
+  }
 
   override def taskSetFinished(manager: TaskSetManager): Unit = finishedManagers += manager
 
@@ -80,9 +117,21 @@ class FakeTaskScheduler(sc: SparkContext, liveExecutors: (String, String)* /* ex
 
   override def hasExecutorsAliveOnHost(host: String): Boolean = executors.values.exists(_ == host)
 
+  override def hasHostAliveOnRack(rack: String): Boolean = {
+    hostsByRack.get(rack) != None
+  }
+
   def addExecutor(execId: String, host: String) {
     executors.put(execId, host)
+    val executorsOnHost = executorsByHost.getOrElseUpdate(host, new mutable.HashSet[String])
+    executorsOnHost += execId
+    executorIdToHost += execId -> host
+    for (rack <- getRackForHost(host)) {
+      hostsByRack.getOrElseUpdate(rack, new mutable.HashSet[String]()) += host
+    }
   }
+
+  override def getRackForHost(value: String): Option[String] = FakeRackUtil.getRackForHost(value)
 }
 
 /**
@@ -98,29 +147,29 @@ class LargeTask(stageId: Int) extends Task[Array[Byte]](stageId, 0) {
 }
 
 class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
-  import TaskLocality.{ANY, PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL}
+  import TaskLocality.{ANY, PROCESS_LOCAL, NO_PREF, NODE_LOCAL, RACK_LOCAL}
 
   private val conf = new SparkConf
 
   val LOCALITY_WAIT = conf.getLong("spark.locality.wait", 3000)
   val MAX_TASK_FAILURES = 4
 
+  override def beforeEach() {
+    super.beforeEach()
+    FakeRackUtil.cleanUp()
+  }
+
   test("TaskSet with no preferences") {
     sc = new SparkContext("local", "test")
     val sched = new FakeTaskScheduler(sc, ("exec1", "host1"))
     val taskSet = FakeTask.createTaskSet(1)
-    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES)
+    val clock = new FakeClock
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
 
-    // Offer a host with process-local as the constraint; this should work because the TaskSet
-    // above won't have any locality preferences
-    val taskOption = manager.resourceOffer("exec1", "host1", TaskLocality.PROCESS_LOCAL)
+    // Offer a host with NO_PREF as the constraint,
+    // we should get a nopref task immediately since that's what we only have
+    var taskOption = manager.resourceOffer("exec1", "host1", NO_PREF)
     assert(taskOption.isDefined)
-    val task = taskOption.get
-    assert(task.executorId === "exec1")
-    assert(sched.startedTasks.contains(0))
-
-    // Re-offer the host -- now we should get no more tasks
-    assert(manager.resourceOffer("exec1", "host1", PROCESS_LOCAL) === None)
 
     // Tell it the task has finished
     manager.handleSuccessfulTask(0, createTaskResult(0))
@@ -136,7 +185,7 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
 
     // First three offers should all find tasks
     for (i <- 0 until 3) {
-      val taskOption = manager.resourceOffer("exec1", "host1", PROCESS_LOCAL)
+      var taskOption = manager.resourceOffer("exec1", "host1", NO_PREF)
       assert(taskOption.isDefined)
       val task = taskOption.get
       assert(task.executorId === "exec1")
@@ -144,7 +193,7 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
     assert(sched.startedTasks.toSet === Set(0, 1, 2))
 
     // Re-offer the host -- now we should get no more tasks
-    assert(manager.resourceOffer("exec1", "host1", PROCESS_LOCAL) === None)
+    assert(manager.resourceOffer("exec1", "host1", NO_PREF) === None)
 
     // Finish the first two tasks
     manager.handleSuccessfulTask(0, createTaskResult(0))
@@ -186,37 +235,40 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
     )
     val clock = new FakeClock
     val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
-
     // First offer host1, exec1: first task should be chosen
     assert(manager.resourceOffer("exec1", "host1", ANY).get.index === 0)
-
-    // Offer host1, exec1 again: the last task, which has no prefs, should be chosen
-    assert(manager.resourceOffer("exec1", "host1", ANY).get.index === 3)
-
-    // Offer host1, exec1 again, at PROCESS_LOCAL level: nothing should get chosen
-    assert(manager.resourceOffer("exec1", "host1", PROCESS_LOCAL) === None)
+    assert(manager.resourceOffer("exec1", "host1", PROCESS_LOCAL) == None)
 
     clock.advance(LOCALITY_WAIT)
-
-    // Offer host1, exec1 again, at PROCESS_LOCAL level: nothing should get chosen
-    assert(manager.resourceOffer("exec1", "host1", PROCESS_LOCAL) === None)
-
-    // Offer host1, exec1 again, at NODE_LOCAL level: we should choose task 2
+    // Offer host1, exec1 again, at NODE_LOCAL level: the node local (task 2) should
+    // get chosen before the noPref task
     assert(manager.resourceOffer("exec1", "host1", NODE_LOCAL).get.index == 2)
 
-    // Offer host1, exec1 again, at NODE_LOCAL level: nothing should get chosen
-    assert(manager.resourceOffer("exec1", "host1", NODE_LOCAL) === None)
-
-    // Offer host1, exec1 again, at ANY level: nothing should get chosen
-    assert(manager.resourceOffer("exec1", "host1", ANY) === None)
+    // Offer host2, exec3 again, at NODE_LOCAL level: we should choose task 2
+    assert(manager.resourceOffer("exec2", "host2", NODE_LOCAL).get.index == 1)
 
+    // Offer host2, exec3 again, at NODE_LOCAL level: we should get noPref task
+    // after failing to find a node_Local task
+    assert(manager.resourceOffer("exec2", "host2", NODE_LOCAL) == None)
     clock.advance(LOCALITY_WAIT)
+    assert(manager.resourceOffer("exec2", "host2", NO_PREF).get.index == 3)
+  }
 
-    // Offer host1, exec1 again, at ANY level: task 1 should get chosen
-    assert(manager.resourceOffer("exec1", "host1", ANY).get.index === 1)
-
-    // Offer host1, exec1 again, at ANY level: nothing should be chosen as we've launched all tasks
-    assert(manager.resourceOffer("exec1", "host1", ANY) === None)
+  test("we do not need to delay scheduling when we only have noPref tasks in the queue") {
+    sc = new SparkContext("local", "test")
+    val sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec3", "host2"))
+    val taskSet = FakeTask.createTaskSet(3,
+      Seq(TaskLocation("host1", "exec1")),
+      Seq(TaskLocation("host2", "exec3")),
+      Seq()   // Last task has no locality prefs
+    )
+    val clock = new FakeClock
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
+    // First offer host1, exec1: first task should be chosen
+    assert(manager.resourceOffer("exec1", "host1", PROCESS_LOCAL).get.index === 0)
+    assert(manager.resourceOffer("exec3", "host2", PROCESS_LOCAL).get.index === 1)
+    assert(manager.resourceOffer("exec3", "host2", NODE_LOCAL) == None)
+    assert(manager.resourceOffer("exec3", "host2", NO_PREF).get.index === 2)
   }
 
   test("delay scheduling with fallback") {
@@ -273,20 +325,24 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
     // First offer host1: first task should be chosen
     assert(manager.resourceOffer("exec1", "host1", ANY).get.index === 0)
 
-    // Offer host1 again: third task should be chosen immediately because host3 is not up
-    assert(manager.resourceOffer("exec1", "host1", ANY).get.index === 2)
-
-    // After this, nothing should get chosen
+    // After this, nothing should get chosen, because we have separated tasks with unavailable preference
+    // from the noPrefPendingTasks
     assert(manager.resourceOffer("exec1", "host1", ANY) === None)
 
     // Now mark host2 as dead
     sched.removeExecutor("exec2")
     manager.executorLost("exec2", "host2")
 
-    // Task 1 should immediately be launched on host1 because its original host is gone
+    // nothing should be chosen
+    assert(manager.resourceOffer("exec1", "host1", ANY) === None)
+
+    clock.advance(LOCALITY_WAIT * 2)
+
+    // task 1 and 2 would be scheduled as nonLocal task
     assert(manager.resourceOffer("exec1", "host1", ANY).get.index === 1)
+    assert(manager.resourceOffer("exec1", "host1", ANY).get.index === 2)
 
-    // Now that all tasks have launched, nothing new should be launched anywhere else
+    // all finished
     assert(manager.resourceOffer("exec1", "host1", ANY) === None)
     assert(manager.resourceOffer("exec2", "host2", ANY) === None)
   }
@@ -348,7 +404,7 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
     val manager = new TaskSetManager(sched, taskSet, 4, clock)
 
     {
-      val offerResult = manager.resourceOffer("exec1", "host1", TaskLocality.PROCESS_LOCAL)
+      val offerResult = manager.resourceOffer("exec1", "host1", PROCESS_LOCAL)
       assert(offerResult.isDefined, "Expect resource offer to return a task")
 
       assert(offerResult.get.index === 0)
@@ -359,15 +415,15 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
       assert(!sched.taskSetsFailed.contains(taskSet.id))
 
       // Ensure scheduling on exec1 fails after failure 1 due to blacklist
-      assert(manager.resourceOffer("exec1", "host1", TaskLocality.PROCESS_LOCAL).isEmpty)
-      assert(manager.resourceOffer("exec1", "host1", TaskLocality.NODE_LOCAL).isEmpty)
-      assert(manager.resourceOffer("exec1", "host1", TaskLocality.RACK_LOCAL).isEmpty)
-      assert(manager.resourceOffer("exec1", "host1", TaskLocality.ANY).isEmpty)
+      assert(manager.resourceOffer("exec1", "host1", PROCESS_LOCAL).isEmpty)
+      assert(manager.resourceOffer("exec1", "host1", NODE_LOCAL).isEmpty)
+      assert(manager.resourceOffer("exec1", "host1", RACK_LOCAL).isEmpty)
+      assert(manager.resourceOffer("exec1", "host1", ANY).isEmpty)
     }
 
     // Run the task on exec1.1 - should work, and then fail it on exec1.1
     {
-      val offerResult = manager.resourceOffer("exec1.1", "host1", TaskLocality.NODE_LOCAL)
+      val offerResult = manager.resourceOffer("exec1.1", "host1", NODE_LOCAL)
       assert(offerResult.isDefined,
         "Expect resource offer to return a task for exec1.1, offerResult = " + offerResult)
 
@@ -379,12 +435,12 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
       assert(!sched.taskSetsFailed.contains(taskSet.id))
 
       // Ensure scheduling on exec1.1 fails after failure 2 due to blacklist
-      assert(manager.resourceOffer("exec1.1", "host1", TaskLocality.NODE_LOCAL).isEmpty)
+      assert(manager.resourceOffer("exec1.1", "host1", NODE_LOCAL).isEmpty)
     }
 
     // Run the task on exec2 - should work, and then fail it on exec2
     {
-      val offerResult = manager.resourceOffer("exec2", "host2", TaskLocality.ANY)
+      val offerResult = manager.resourceOffer("exec2", "host2", ANY)
       assert(offerResult.isDefined, "Expect resource offer to return a task")
 
       assert(offerResult.get.index === 0)
@@ -395,20 +451,20 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
       assert(!sched.taskSetsFailed.contains(taskSet.id))
 
       // Ensure scheduling on exec2 fails after failure 3 due to blacklist
-      assert(manager.resourceOffer("exec2", "host2", TaskLocality.ANY).isEmpty)
+      assert(manager.resourceOffer("exec2", "host2", ANY).isEmpty)
     }
 
     // After reschedule delay, scheduling on exec1 should be possible.
     clock.advance(rescheduleDelay)
 
     {
-      val offerResult = manager.resourceOffer("exec1", "host1", TaskLocality.PROCESS_LOCAL)
+      val offerResult = manager.resourceOffer("exec1", "host1", PROCESS_LOCAL)
       assert(offerResult.isDefined, "Expect resource offer to return a task")
 
       assert(offerResult.get.index === 0)
       assert(offerResult.get.executorId === "exec1")
 
-      assert(manager.resourceOffer("exec1", "host1", TaskLocality.PROCESS_LOCAL).isEmpty)
+      assert(manager.resourceOffer("exec1", "host1", PROCESS_LOCAL).isEmpty)
 
       // Cause exec1 to fail : failure 4
       manager.handleFailedTask(offerResult.get.taskId, TaskState.FINISHED, TaskResultLost)
@@ -418,7 +474,9 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
     assert(sched.taskSetsFailed.contains(taskSet.id))
   }
 
-  test("new executors get added") {
+  test("new executors get added and lost") {
+    // Assign host2 to rack2
+    FakeRackUtil.assignHostToRack("host2", "rack2")
     sc = new SparkContext("local", "test")
     val sched = new FakeTaskScheduler(sc)
     val taskSet = FakeTask.createTaskSet(4,
@@ -428,24 +486,54 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
       Seq())
     val clock = new FakeClock
     val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
-    // All tasks added to no-pref list since no preferred location is available
-    assert(manager.pendingTasksWithNoPrefs.size === 4)
     // Only ANY is valid
-    assert(manager.myLocalityLevels.sameElements(Array(ANY)))
+    assert(manager.myLocalityLevels.sameElements(Array(NO_PREF, ANY)))
     // Add a new executor
     sched.addExecutor("execD", "host1")
     manager.executorAdded()
-    // Task 0 and 1 should be removed from no-pref list
-    assert(manager.pendingTasksWithNoPrefs.size === 2)
     // Valid locality should contain NODE_LOCAL and ANY
-    assert(manager.myLocalityLevels.sameElements(Array(NODE_LOCAL, ANY)))
+    assert(manager.myLocalityLevels.sameElements(Array(NODE_LOCAL, NO_PREF, ANY)))
     // Add another executor
     sched.addExecutor("execC", "host2")
     manager.executorAdded()
-    // No-pref list now only contains task 3
-    assert(manager.pendingTasksWithNoPrefs.size === 1)
-    // Valid locality should contain PROCESS_LOCAL, NODE_LOCAL and ANY
-    assert(manager.myLocalityLevels.sameElements(Array(PROCESS_LOCAL, NODE_LOCAL, ANY)))
+    // Valid locality should contain PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL and ANY
+    assert(manager.myLocalityLevels.sameElements(Array(PROCESS_LOCAL, NODE_LOCAL, NO_PREF, RACK_LOCAL, ANY)))
+    // test if the valid locality is recomputed when the executor is lost
+    sched.removeExecutor("execC")
+    manager.executorLost("execC", "host2")
+    assert(manager.myLocalityLevels.sameElements(Array(NODE_LOCAL, NO_PREF, ANY)))
+    sched.removeExecutor("execD")
+    manager.executorLost("execD", "host1")
+    assert(manager.myLocalityLevels.sameElements(Array(NO_PREF, ANY)))
+  }
+
+  test("test RACK_LOCAL tasks") {
+    // Assign host1 to rack1
+    FakeRackUtil.assignHostToRack("host1", "rack1")
+    // Assign host2 to rack1
+    FakeRackUtil.assignHostToRack("host2", "rack1")
+    // Assign host3 to rack2
+    FakeRackUtil.assignHostToRack("host3", "rack2")
+    sc = new SparkContext("local", "test")
+    val sched = new FakeTaskScheduler(sc,
+      ("execA", "host1"), ("execB", "host2"), ("execC", "host3"))
+    val taskSet = FakeTask.createTaskSet(2,
+      Seq(TaskLocation("host1", "execA")),
+      Seq(TaskLocation("host1", "execA")))
+    val clock = new FakeClock
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
+
+    assert(manager.myLocalityLevels.sameElements(Array(PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY)))
+    // Set allowed locality to ANY
+    clock.advance(LOCALITY_WAIT * 3)
+    // Offer host3
+    // No task is scheduled if we restrict locality to RACK_LOCAL
+    assert(manager.resourceOffer("execC", "host3", RACK_LOCAL) === None)
+    // Task 0 can be scheduled with ANY
+    assert(manager.resourceOffer("execC", "host3", ANY).get.index === 0)
+    // Offer host2
+    // Task 1 can be scheduled with RACK_LOCAL
+    assert(manager.resourceOffer("execB", "host2", RACK_LOCAL).get.index === 1)
   }
 
   test("do not emit warning when serialized task is small") {
@@ -475,6 +563,133 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
     assert(manager.emittedTaskSizeWarning)
   }
 
+  test("abort the job if total size of results is too large") {
+    val conf = new SparkConf().set("spark.driver.maxResultSize", "2m")
+    sc = new SparkContext("local", "test", conf)
+
+    def genBytes(size: Int) = { (x: Int) =>
+      val bytes = Array.ofDim[Byte](size)
+      scala.util.Random.nextBytes(bytes)
+      bytes
+    }
+
+    // multiple 1k result
+    val r = sc.makeRDD(0 until 10, 10).map(genBytes(1024)).collect()
+    assert(10 === r.size )
+
+    // single 10M result
+    val thrown = intercept[SparkException] {sc.makeRDD(genBytes(10 << 20)(0), 1).collect()}
+    assert(thrown.getMessage().contains("bigger than maxResultSize"))
+
+    // multiple 1M results
+    val thrown2 = intercept[SparkException] {
+      sc.makeRDD(0 until 10, 10).map(genBytes(1 << 20)).collect()
+    }
+    assert(thrown2.getMessage().contains("bigger than maxResultSize"))
+  }
+
+  test("speculative and noPref task should be scheduled after node-local") {
+    sc = new SparkContext("local", "test")
+    val sched = new FakeTaskScheduler(sc, ("execA", "host1"), ("execB", "host2"), ("execC", "host3"))
+    val taskSet = FakeTask.createTaskSet(4,
+      Seq(TaskLocation("host1", "execA")),
+      Seq(TaskLocation("host2"), TaskLocation("host1")),
+      Seq(),
+      Seq(TaskLocation("host3", "execC")))
+    val clock = new FakeClock
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
+
+    assert(manager.resourceOffer("execA", "host1", PROCESS_LOCAL).get.index === 0)
+    assert(manager.resourceOffer("execA", "host1", NODE_LOCAL) == None)
+    assert(manager.resourceOffer("execA", "host1", NO_PREF).get.index == 1)
+
+    manager.speculatableTasks += 1
+    clock.advance(LOCALITY_WAIT)
+    // schedule the nonPref task
+    assert(manager.resourceOffer("execA", "host1", NO_PREF).get.index === 2)
+    // schedule the speculative task
+    assert(manager.resourceOffer("execB", "host2", NO_PREF).get.index === 1)
+    clock.advance(LOCALITY_WAIT * 3)
+    // schedule non-local tasks
+    assert(manager.resourceOffer("execB", "host2", ANY).get.index === 3)
+  }
+
+  test("node-local tasks should be scheduled right away when there are only node-local and no-preference tasks") {
+    sc = new SparkContext("local", "test")
+    val sched = new FakeTaskScheduler(sc, ("execA", "host1"), ("execB", "host2"), ("execC", "host3"))
+    val taskSet = FakeTask.createTaskSet(4,
+      Seq(TaskLocation("host1")),
+      Seq(TaskLocation("host2")),
+      Seq(),
+      Seq(TaskLocation("host3")))
+    val clock = new FakeClock
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
+
+    // node-local tasks are scheduled without delay
+    assert(manager.resourceOffer("execA", "host1", NODE_LOCAL).get.index === 0)
+    assert(manager.resourceOffer("execA", "host2", NODE_LOCAL).get.index === 1)
+    assert(manager.resourceOffer("execA", "host3", NODE_LOCAL).get.index === 3)
+    assert(manager.resourceOffer("execA", "host3", NODE_LOCAL) === None)
+
+    // schedule no-preference after node local ones
+    assert(manager.resourceOffer("execA", "host3", NO_PREF).get.index === 2)
+  }
+
+  test("Ensure TaskSetManager is usable after addition of levels") {
+    // Regression test for SPARK-2931
+    sc = new SparkContext("local", "test")
+    val sched = new FakeTaskScheduler(sc)
+    val taskSet = FakeTask.createTaskSet(2,
+      Seq(TaskLocation("host1", "execA")),
+      Seq(TaskLocation("host2", "execB.1")))
+    val clock = new FakeClock
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
+    // Only ANY is valid
+    assert(manager.myLocalityLevels.sameElements(Array(ANY)))
+    // Add a new executor
+    sched.addExecutor("execA", "host1")
+    sched.addExecutor("execB.2", "host2")
+    manager.executorAdded()
+    assert(manager.pendingTasksWithNoPrefs.size === 0)
+    // Valid locality should contain PROCESS_LOCAL, NODE_LOCAL and ANY
+    assert(manager.myLocalityLevels.sameElements(Array(PROCESS_LOCAL, NODE_LOCAL, ANY)))
+    assert(manager.resourceOffer("execA", "host1", ANY) !== None)
+    clock.advance(LOCALITY_WAIT * 4)
+    assert(manager.resourceOffer("execB.2", "host2", ANY) !== None)
+    sched.removeExecutor("execA")
+    sched.removeExecutor("execB.2")
+    manager.executorLost("execA", "host1")
+    manager.executorLost("execB.2", "host2")
+    clock.advance(LOCALITY_WAIT * 4)
+    sched.addExecutor("execC", "host3")
+    manager.executorAdded()
+    // Prior to the fix, this line resulted in an ArrayIndexOutOfBoundsException:
+    assert(manager.resourceOffer("execC", "host3", ANY) !== None)
+  }
+
+  test("Test that locations with HDFSCacheTaskLocation are treated as PROCESS_LOCAL.") {
+    // Regression test for SPARK-2931
+    sc = new SparkContext("local", "test")
+    val sched = new FakeTaskScheduler(sc,
+        ("execA", "host1"), ("execB", "host2"), ("execC", "host3"))
+    val taskSet = FakeTask.createTaskSet(3,
+      Seq(HostTaskLocation("host1")),
+      Seq(HostTaskLocation("host2")),
+      Seq(HDFSCacheTaskLocation("host3")))
+    val clock = new FakeClock
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
+    assert(manager.myLocalityLevels.sameElements(Array(PROCESS_LOCAL, NODE_LOCAL, ANY)))
+    sched.removeExecutor("execA")
+    manager.executorAdded()
+    assert(manager.myLocalityLevels.sameElements(Array(PROCESS_LOCAL, NODE_LOCAL, ANY)))
+    sched.removeExecutor("execB")
+    manager.executorAdded()
+    assert(manager.myLocalityLevels.sameElements(Array(PROCESS_LOCAL, NODE_LOCAL, ANY)))
+    sched.removeExecutor("execC")
+    manager.executorAdded()
+    assert(manager.myLocalityLevels.sameElements(Array(ANY)))
+  }
+
   def createTaskResult(id: Int): DirectTaskResult[Int] = {
     val valueSer = SparkEnv.get.serializer.newInstance()
     new DirectTaskResult[Int](valueSer.serialize(id), mutable.Map.empty, new TaskMetrics)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackendSuite.scala
new file mode 100644
index 0000000000000..bef8d3a58ba63
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackendSuite.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.mesos
+
+import org.scalatest.FunSuite
+import org.apache.spark.{scheduler, SparkConf, SparkContext, LocalSparkContext}
+import org.apache.spark.scheduler.{TaskDescription, WorkerOffer, TaskSchedulerImpl}
+import org.apache.spark.scheduler.cluster.mesos.{MemoryUtils, MesosSchedulerBackend}
+import org.apache.mesos.SchedulerDriver
+import org.apache.mesos.Protos._
+import org.scalatest.mock.EasyMockSugar
+import org.apache.mesos.Protos.Value.Scalar
+import org.easymock.{Capture, EasyMock}
+import java.nio.ByteBuffer
+import java.util.Collections
+import java.util
+import scala.collection.mutable
+
+class MesosSchedulerBackendSuite extends FunSuite with LocalSparkContext with EasyMockSugar {
+  test("mesos resource offer is launching tasks") {
+    def createOffer(id: Int, mem: Int, cpu: Int) = {
+      val builder = Offer.newBuilder()
+      builder.addResourcesBuilder()
+        .setName("mem")
+        .setType(Value.Type.SCALAR)
+        .setScalar(Scalar.newBuilder().setValue(mem))
+      builder.addResourcesBuilder()
+        .setName("cpus")
+        .setType(Value.Type.SCALAR)
+        .setScalar(Scalar.newBuilder().setValue(cpu))
+      builder.setId(OfferID.newBuilder().setValue(id.toString).build()).setFrameworkId(FrameworkID.newBuilder().setValue("f1"))
+        .setSlaveId(SlaveID.newBuilder().setValue("s1")).setHostname("localhost").build()
+    }
+
+    val driver = EasyMock.createMock(classOf[SchedulerDriver])
+    val taskScheduler = EasyMock.createMock(classOf[TaskSchedulerImpl])
+
+    val sc = EasyMock.createMock(classOf[SparkContext])
+
+    EasyMock.expect(sc.executorMemory).andReturn(100).anyTimes()
+    EasyMock.expect(sc.getSparkHome()).andReturn(Option("/path")).anyTimes()
+    EasyMock.expect(sc.executorEnvs).andReturn(new mutable.HashMap).anyTimes()
+    EasyMock.expect(sc.conf).andReturn(new SparkConf).anyTimes()
+    EasyMock.replay(sc)
+    val minMem = MemoryUtils.calculateTotalMemory(sc).toInt
+    val minCpu = 4
+    val offers = new java.util.ArrayList[Offer]
+    offers.add(createOffer(1, minMem, minCpu))
+    offers.add(createOffer(1, minMem - 1, minCpu))
+    val backend = new MesosSchedulerBackend(taskScheduler, sc, "master")
+    val workerOffers = Seq(offers.get(0)).map(o => new WorkerOffer(
+      o.getSlaveId.getValue,
+      o.getHostname,
+      2
+    ))
+    val taskDesc = new TaskDescription(1L, "s1", "n1", 0, ByteBuffer.wrap(new Array[Byte](0)))
+    EasyMock.expect(taskScheduler.resourceOffers(EasyMock.eq(workerOffers))).andReturn(Seq(Seq(taskDesc)))
+    EasyMock.expect(taskScheduler.CPUS_PER_TASK).andReturn(2).anyTimes()
+    EasyMock.replay(taskScheduler)
+    val capture = new Capture[util.Collection[TaskInfo]]
+    EasyMock.expect(
+      driver.launchTasks(
+        EasyMock.eq(Collections.singleton(offers.get(0).getId)),
+        EasyMock.capture(capture),
+        EasyMock.anyObject(classOf[Filters])
+      )
+    ).andReturn(Status.valueOf(1))
+    EasyMock.expect(driver.declineOffer(offers.get(1).getId)).andReturn(Status.valueOf(1))
+    EasyMock.replay(driver)
+    backend.resourceOffers(driver, offers)
+    assert(capture.getValue.size() == 1)
+    val taskInfo = capture.getValue.iterator().next()
+    assert(taskInfo.getName.equals("n1"))
+    val cpus = taskInfo.getResourcesList.get(0)
+    assert(cpus.getName.equals("cpus"))
+    assert(cpus.getScalar.getValue.equals(2.0))
+    assert(taskInfo.getSlaveId.getValue.equals("s1"))
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
new file mode 100644
index 0000000000000..11e8c9c4cb37f
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.serializer
+
+import org.apache.spark.util.Utils
+
+import com.esotericsoftware.kryo.Kryo
+import org.scalatest.FunSuite
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkEnv, TestUtils}
+import org.apache.spark.SparkContext._
+import org.apache.spark.serializer.KryoDistributedTest._
+
+class KryoSerializerDistributedSuite extends FunSuite {
+
+  test("kryo objects are serialised consistently in different processes") {
+    val conf = new SparkConf(false)
+    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+    conf.set("spark.kryo.registrator", classOf[AppJarRegistrator].getName)
+    conf.set("spark.task.maxFailures", "1")
+
+    val jar = TestUtils.createJarWithClasses(List(AppJarRegistrator.customClassName))
+    conf.setJars(List(jar.getPath))
+
+    val sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
+    val original = Thread.currentThread.getContextClassLoader
+    val loader = new java.net.URLClassLoader(Array(jar), Utils.getContextOrSparkClassLoader)
+    SparkEnv.get.serializer.setDefaultClassLoader(loader)
+
+    val cachedRDD = sc.parallelize((0 until 10).map((_, new MyCustomClass)), 3).cache()
+
+    // Randomly mix the keys so that the join below will require a shuffle with each partition
+    // sending data to multiple other partitions.
+    val shuffledRDD = cachedRDD.map { case (i, o) => (i * i * i - 10 * i * i, o)}
+
+    // Join the two RDDs, and force evaluation
+    assert(shuffledRDD.join(cachedRDD).collect().size == 1)
+
+    LocalSparkContext.stop(sc)
+  }
+}
+
+object KryoDistributedTest {
+  class MyCustomClass
+
+  class AppJarRegistrator extends KryoRegistrator {
+    override def registerClasses(k: Kryo) {
+      val classLoader = Thread.currentThread.getContextClassLoader
+      k.register(Class.forName(AppJarRegistrator.customClassName, true, classLoader))
+    }
+  }
+
+  object AppJarRegistrator {
+    val customClassName = "KryoSerializerDistributedSuiteCustomClass"
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerResizableOutputSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerResizableOutputSuite.scala
new file mode 100644
index 0000000000000..967c9e9899c9d
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerResizableOutputSuite.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.serializer
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.spark.LocalSparkContext
+import org.apache.spark.SparkException
+
+
+class KryoSerializerResizableOutputSuite extends FunSuite {
+
+  // trial and error showed this will not serialize with 1mb buffer
+  val x = (1 to 400000).toArray
+
+  test("kryo without resizable output buffer should fail on large array") {
+    val conf = new SparkConf(false)
+    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+    conf.set("spark.kryoserializer.buffer.mb", "1")
+    conf.set("spark.kryoserializer.buffer.max.mb", "1")
+    val sc = new SparkContext("local", "test", conf)
+    intercept[SparkException](sc.parallelize(x).collect())
+    LocalSparkContext.stop(sc)
+  }
+
+  test("kryo with resizable output buffer should succeed on large array") {
+    val conf = new SparkConf(false)
+    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+    conf.set("spark.kryoserializer.buffer.mb", "1")
+    conf.set("spark.kryoserializer.buffer.max.mb", "2")
+    val sc = new SparkContext("local", "test", conf)
+    assert(sc.parallelize(x).collect() === x)
+    LocalSparkContext.stop(sc)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 79280d1a06653..a70f67af2e62e 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -23,9 +23,10 @@ import scala.reflect.ClassTag
 import com.esotericsoftware.kryo.Kryo
 import org.scalatest.FunSuite
 
-import org.apache.spark.SharedSparkContext
+import org.apache.spark.{SparkConf, SharedSparkContext}
 import org.apache.spark.serializer.KryoTest._
 
+
 class KryoSerializerSuite extends FunSuite with SharedSparkContext {
   conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
   conf.set("spark.kryo.registrator", classOf[MyRegistrator].getName)
@@ -200,16 +201,55 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
     assert(control.sum === result)
   }
 
-  // TODO: this still doesn't work
-  ignore("kryo with fold") {
+  test("kryo with fold") {
     val control = 1 :: 2 :: Nil
+    // zeroValue must not be a ClassWithoutNoArgConstructor instance because it will be
+    // serialized by spark.closure.serializer but spark.closure.serializer only supports
+    // the default Java serializer.
     val result = sc.parallelize(control, 2).map(new ClassWithoutNoArgConstructor(_))
-        .fold(new ClassWithoutNoArgConstructor(10))((t1, t2) => new ClassWithoutNoArgConstructor(t1.x + t2.x)).x
-    assert(10 + control.sum === result)
+      .fold(null)((t1, t2) => {
+      val t1x = if (t1 == null) 0 else t1.x
+      new ClassWithoutNoArgConstructor(t1x + t2.x)
+    }).x
+    assert(control.sum === result)
+  }
+
+  test("kryo with nonexistent custom registrator should fail") {
+    import org.apache.spark.SparkException
+
+    val conf = new SparkConf(false)
+    conf.set("spark.kryo.registrator", "this.class.does.not.exist")
+
+    val thrown = intercept[SparkException](new KryoSerializer(conf).newInstance())
+    assert(thrown.getMessage.contains("Failed to register classes with Kryo"))
+  }
+
+  test("default class loader can be set by a different thread") {
+    val ser = new KryoSerializer(new SparkConf)
+
+    // First serialize the object
+    val serInstance = ser.newInstance()
+    val bytes = serInstance.serialize(new ClassLoaderTestingObject)
+
+    // Deserialize the object to make sure normal deserialization works
+    serInstance.deserialize[ClassLoaderTestingObject](bytes)
+
+    // Set a special, broken ClassLoader and make sure we get an exception on deserialization
+    ser.setDefaultClassLoader(new ClassLoader() {
+      override def loadClass(name: String) = throw new UnsupportedOperationException
+    })
+    intercept[UnsupportedOperationException] {
+      ser.newInstance().deserialize[ClassLoaderTestingObject](bytes)
+    }
   }
 }
 
+
+class ClassLoaderTestingObject
+
+
 object KryoTest {
+
   case class CaseClass(i: Int, s: String) {}
 
   class ClassWithNoArgConstructor {
diff --git a/core/src/test/scala/org/apache/spark/serializer/ProactiveClosureSerializationSuite.scala b/core/src/test/scala/org/apache/spark/serializer/ProactiveClosureSerializationSuite.scala
index 5d15a68ac7e4f..d037e2c19a64d 100644
--- a/core/src/test/scala/org/apache/spark/serializer/ProactiveClosureSerializationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/ProactiveClosureSerializationSuite.scala
@@ -15,15 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.serializer;
-
-import java.io.NotSerializableException
+package org.apache.spark.serializer
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.{SharedSparkContext, SparkException}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkException
-import org.apache.spark.SharedSparkContext
 
 /* A trivial (but unserializable) container for trivial functions */
 class UnserializableClass {
@@ -38,53 +35,47 @@ class ProactiveClosureSerializationSuite extends FunSuite with SharedSparkContex
 
   test("throws expected serialization exceptions on actions") {
     val (data, uc) = fixture
-      
     val ex = intercept[SparkException] {
-      data.map(uc.op(_)).count
+      data.map(uc.op(_)).count()
     }
-        
     assert(ex.getMessage.contains("Task not serializable"))
   }
 
   // There is probably a cleaner way to eliminate boilerplate here, but we're
   // iterating over a map from transformation names to functions that perform that
   // transformation on a given RDD, creating one test case for each
-  
+
   for (transformation <- 
-      Map("map" -> xmap _, "flatMap" -> xflatMap _, "filter" -> xfilter _, 
-          "mapWith" -> xmapWith _, "mapPartitions" -> xmapPartitions _, 
-          "mapPartitionsWithIndex" -> xmapPartitionsWithIndex _,
-          "mapPartitionsWithContext" -> xmapPartitionsWithContext _, 
-          "filterWith" -> xfilterWith _)) {
+      Map("map" -> xmap _,
+          "flatMap" -> xflatMap _,
+          "filter" -> xfilter _,
+          "mapPartitions" -> xmapPartitions _,
+          "mapPartitionsWithIndex" -> xmapPartitionsWithIndex _)) {
     val (name, xf) = transformation
-    
+
     test(s"$name transformations throw proactive serialization exceptions") {
       val (data, uc) = fixture
-      
       val ex = intercept[SparkException] {
         xf(data, uc)
       }
-
       assert(ex.getMessage.contains("Task not serializable"), 
         s"RDD.$name doesn't proactively throw NotSerializableException")
     }
   }
-  
+
   private def xmap(x: RDD[String], uc: UnserializableClass): RDD[String] = 
     x.map(y=>uc.op(y))
-  private def xmapWith(x: RDD[String], uc: UnserializableClass): RDD[String] = 
-    x.mapWith(x => x.toString)((x,y)=>x + uc.op(y))
+
   private def xflatMap(x: RDD[String], uc: UnserializableClass): RDD[String] = 
     x.flatMap(y=>Seq(uc.op(y)))
+
   private def xfilter(x: RDD[String], uc: UnserializableClass): RDD[String] = 
     x.filter(y=>uc.pred(y))
-  private def xfilterWith(x: RDD[String], uc: UnserializableClass): RDD[String] = 
-    x.filterWith(x => x.toString)((x,y)=>uc.pred(y))
+
   private def xmapPartitions(x: RDD[String], uc: UnserializableClass): RDD[String] = 
     x.mapPartitions(_.map(y=>uc.op(y)))
+
   private def xmapPartitionsWithIndex(x: RDD[String], uc: UnserializableClass): RDD[String] = 
     x.mapPartitionsWithIndex((_, it) => it.map(y=>uc.op(y)))
-  private def xmapPartitionsWithContext(x: RDD[String], uc: UnserializableClass): RDD[String] = 
-    x.mapPartitionsWithContext((_, it) => it.map(y=>uc.op(y)))
   
 }
diff --git a/core/src/test/scala/org/apache/spark/serializer/TestSerializer.scala b/core/src/test/scala/org/apache/spark/serializer/TestSerializer.scala
new file mode 100644
index 0000000000000..0ade1bab18d7e
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/serializer/TestSerializer.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.serializer
+
+import java.io.{EOFException, OutputStream, InputStream}
+import java.nio.ByteBuffer
+
+import scala.reflect.ClassTag
+
+
+/**
+ * A serializer implementation that always return a single element in a deserialization stream.
+ */
+class TestSerializer extends Serializer {
+  override def newInstance() = new TestSerializerInstance
+}
+
+
+class TestSerializerInstance extends SerializerInstance {
+  override def serialize[T: ClassTag](t: T): ByteBuffer = ???
+
+  override def serializeStream(s: OutputStream): SerializationStream = ???
+
+  override def deserializeStream(s: InputStream) = new TestDeserializationStream
+
+  override def deserialize[T: ClassTag](bytes: ByteBuffer): T = ???
+
+  override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T = ???
+}
+
+
+class TestDeserializationStream extends DeserializationStream {
+
+  private var count = 0
+
+  override def readObject[T: ClassTag](): T = {
+    count += 1
+    if (count == 2) {
+      throw new EOFException
+    }
+    new Object().asInstanceOf[T]
+  }
+
+  override def close(): Unit = {}
+}
diff --git a/core/src/test/scala/org/apache/spark/shuffle/ShuffleMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/ShuffleMemoryManagerSuite.scala
new file mode 100644
index 0000000000000..d31bc22ee74f7
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/shuffle/ShuffleMemoryManagerSuite.scala
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import org.scalatest.FunSuite
+import org.scalatest.concurrent.Timeouts
+import org.scalatest.time.SpanSugar._
+import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.CountDownLatch
+
+class ShuffleMemoryManagerSuite extends FunSuite with Timeouts {
+  /** Launch a thread with the given body block and return it. */
+  private def startThread(name: String)(body: => Unit): Thread = {
+    val thread = new Thread("ShuffleMemorySuite " + name) {
+      override def run() {
+        body
+      }
+    }
+    thread.start()
+    thread
+  }
+
+  test("single thread requesting memory") {
+    val manager = new ShuffleMemoryManager(1000L)
+
+    assert(manager.tryToAcquire(100L) === 100L)
+    assert(manager.tryToAcquire(400L) === 400L)
+    assert(manager.tryToAcquire(400L) === 400L)
+    assert(manager.tryToAcquire(200L) === 100L)
+    assert(manager.tryToAcquire(100L) === 0L)
+    assert(manager.tryToAcquire(100L) === 0L)
+
+    manager.release(500L)
+    assert(manager.tryToAcquire(300L) === 300L)
+    assert(manager.tryToAcquire(300L) === 200L)
+
+    manager.releaseMemoryForThisThread()
+    assert(manager.tryToAcquire(1000L) === 1000L)
+    assert(manager.tryToAcquire(100L) === 0L)
+  }
+
+  test("two threads requesting full memory") {
+    // Two threads request 500 bytes first, wait for each other to get it, and then request
+    // 500 more; we should immediately return 0 as both are now at 1 / N
+
+    val manager = new ShuffleMemoryManager(1000L)
+
+    class State {
+      var t1Result1 = -1L
+      var t2Result1 = -1L
+      var t1Result2 = -1L
+      var t2Result2 = -1L
+    }
+    val state = new State
+
+    val t1 = startThread("t1") {
+      val r1 = manager.tryToAcquire(500L)
+      state.synchronized {
+        state.t1Result1 = r1
+        state.notifyAll()
+        while (state.t2Result1 === -1L) {
+          state.wait()
+        }
+      }
+      val r2 = manager.tryToAcquire(500L)
+      state.synchronized { state.t1Result2 = r2 }
+    }
+
+    val t2 = startThread("t2") {
+      val r1 = manager.tryToAcquire(500L)
+      state.synchronized {
+        state.t2Result1 = r1
+        state.notifyAll()
+        while (state.t1Result1 === -1L) {
+          state.wait()
+        }
+      }
+      val r2 = manager.tryToAcquire(500L)
+      state.synchronized { state.t2Result2 = r2 }
+    }
+
+    failAfter(20 seconds) {
+      t1.join()
+      t2.join()
+    }
+
+    assert(state.t1Result1 === 500L)
+    assert(state.t2Result1 === 500L)
+    assert(state.t1Result2 === 0L)
+    assert(state.t2Result2 === 0L)
+  }
+
+
+  test("threads cannot grow past 1 / N") {
+    // Two threads request 250 bytes first, wait for each other to get it, and then request
+    // 500 more; we should only grant 250 bytes to each of them on this second request
+
+    val manager = new ShuffleMemoryManager(1000L)
+
+    class State {
+      var t1Result1 = -1L
+      var t2Result1 = -1L
+      var t1Result2 = -1L
+      var t2Result2 = -1L
+    }
+    val state = new State
+
+    val t1 = startThread("t1") {
+      val r1 = manager.tryToAcquire(250L)
+      state.synchronized {
+        state.t1Result1 = r1
+        state.notifyAll()
+        while (state.t2Result1 === -1L) {
+          state.wait()
+        }
+      }
+      val r2 = manager.tryToAcquire(500L)
+      state.synchronized { state.t1Result2 = r2 }
+    }
+
+    val t2 = startThread("t2") {
+      val r1 = manager.tryToAcquire(250L)
+      state.synchronized {
+        state.t2Result1 = r1
+        state.notifyAll()
+        while (state.t1Result1 === -1L) {
+          state.wait()
+        }
+      }
+      val r2 = manager.tryToAcquire(500L)
+      state.synchronized { state.t2Result2 = r2 }
+    }
+
+    failAfter(20 seconds) {
+      t1.join()
+      t2.join()
+    }
+
+    assert(state.t1Result1 === 250L)
+    assert(state.t2Result1 === 250L)
+    assert(state.t1Result2 === 250L)
+    assert(state.t2Result2 === 250L)
+  }
+
+  test("threads can block to get at least 1 / 2N memory") {
+    // t1 grabs 1000 bytes and then waits until t2 is ready to make a request. It sleeps
+    // for a bit and releases 250 bytes, which should then be greanted to t2. Further requests
+    // by t2 will return false right away because it now has 1 / 2N of the memory.
+
+    val manager = new ShuffleMemoryManager(1000L)
+
+    class State {
+      var t1Requested = false
+      var t2Requested = false
+      var t1Result = -1L
+      var t2Result = -1L
+      var t2Result2 = -1L
+      var t2WaitTime = 0L
+    }
+    val state = new State
+
+    val t1 = startThread("t1") {
+      state.synchronized {
+        state.t1Result = manager.tryToAcquire(1000L)
+        state.t1Requested = true
+        state.notifyAll()
+        while (!state.t2Requested) {
+          state.wait()
+        }
+      }
+      // Sleep a bit before releasing our memory; this is hacky but it would be difficult to make
+      // sure the other thread blocks for some time otherwise
+      Thread.sleep(300)
+      manager.release(250L)
+    }
+
+    val t2 = startThread("t2") {
+      state.synchronized {
+        while (!state.t1Requested) {
+          state.wait()
+        }
+        state.t2Requested = true
+        state.notifyAll()
+      }
+      val startTime = System.currentTimeMillis()
+      val result = manager.tryToAcquire(250L)
+      val endTime = System.currentTimeMillis()
+      state.synchronized {
+        state.t2Result = result
+        // A second call should return 0 because we're now already at 1 / 2N
+        state.t2Result2 = manager.tryToAcquire(100L)
+        state.t2WaitTime = endTime - startTime
+      }
+    }
+
+    failAfter(20 seconds) {
+      t1.join()
+      t2.join()
+    }
+
+    // Both threads should've been able to acquire their memory; the second one will have waited
+    // until the first one acquired 1000 bytes and then released 250
+    state.synchronized {
+      assert(state.t1Result === 1000L, "t1 could not allocate memory")
+      assert(state.t2Result === 250L, "t2 could not allocate memory")
+      assert(state.t2WaitTime > 200, s"t2 waited less than 200 ms (${state.t2WaitTime})")
+      assert(state.t2Result2 === 0L, "t1 got extra memory the second time")
+    }
+  }
+
+  test("releaseMemoryForThisThread") {
+    // t1 grabs 1000 bytes and then waits until t2 is ready to make a request. It sleeps
+    // for a bit and releases all its memory. t2 should now be able to grab all the memory.
+
+    val manager = new ShuffleMemoryManager(1000L)
+
+    class State {
+      var t1Requested = false
+      var t2Requested = false
+      var t1Result = -1L
+      var t2Result1 = -1L
+      var t2Result2 = -1L
+      var t2Result3 = -1L
+      var t2WaitTime = 0L
+    }
+    val state = new State
+
+    val t1 = startThread("t1") {
+      state.synchronized {
+        state.t1Result = manager.tryToAcquire(1000L)
+        state.t1Requested = true
+        state.notifyAll()
+        while (!state.t2Requested) {
+          state.wait()
+        }
+      }
+      // Sleep a bit before releasing our memory; this is hacky but it would be difficult to make
+      // sure the other thread blocks for some time otherwise
+      Thread.sleep(300)
+      manager.releaseMemoryForThisThread()
+    }
+
+    val t2 = startThread("t2") {
+      state.synchronized {
+        while (!state.t1Requested) {
+          state.wait()
+        }
+        state.t2Requested = true
+        state.notifyAll()
+      }
+      val startTime = System.currentTimeMillis()
+      val r1 = manager.tryToAcquire(500L)
+      val endTime = System.currentTimeMillis()
+      val r2 = manager.tryToAcquire(500L)
+      val r3 = manager.tryToAcquire(500L)
+      state.synchronized {
+        state.t2Result1 = r1
+        state.t2Result2 = r2
+        state.t2Result3 = r3
+        state.t2WaitTime = endTime - startTime
+      }
+    }
+
+    failAfter(20 seconds) {
+      t1.join()
+      t2.join()
+    }
+
+    // Both threads should've been able to acquire their memory; the second one will have waited
+    // until the first one acquired 1000 bytes and then released all of it
+    state.synchronized {
+      assert(state.t1Result === 1000L, "t1 could not allocate memory")
+      assert(state.t2Result1 === 500L, "t2 didn't get 500 bytes the first time")
+      assert(state.t2Result2 === 500L, "t2 didn't get 500 bytes the second time")
+      assert(state.t2Result3 === 0L, s"t2 got more bytes a third time (${state.t2Result3})")
+      assert(state.t2WaitTime > 200, s"t2 waited less than 200 ms (${state.t2WaitTime})")
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala
new file mode 100644
index 0000000000000..6790388f96603
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.hash
+
+import java.io.{File, FileWriter}
+
+import scala.language.reflectiveCalls
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.{SparkEnv, SparkContext, LocalSparkContext, SparkConf}
+import org.apache.spark.executor.ShuffleWriteMetrics
+import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
+import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.shuffle.FileShuffleBlockManager
+import org.apache.spark.storage.{ShuffleBlockId, FileSegment}
+
+class HashShuffleManagerSuite extends FunSuite with LocalSparkContext {
+  private val testConf = new SparkConf(false)
+
+  private def checkSegments(expected: FileSegment, buffer: ManagedBuffer) {
+    assert(buffer.isInstanceOf[FileSegmentManagedBuffer])
+    val segment = buffer.asInstanceOf[FileSegmentManagedBuffer]
+    assert(expected.file.getCanonicalPath === segment.getFile.getCanonicalPath)
+    assert(expected.offset === segment.getOffset)
+    assert(expected.length === segment.getLength)
+  }
+
+  test("consolidated shuffle can write to shuffle group without messing existing offsets/lengths") {
+
+    val conf = new SparkConf(false)
+    // reset after EACH object write. This is to ensure that there are bytes appended after
+    // an object is written. So if the codepaths assume writeObject is end of data, this should
+    // flush those bugs out. This was common bug in ExternalAppendOnlyMap, etc.
+    conf.set("spark.serializer.objectStreamReset", "1")
+    conf.set("spark.serializer", "org.apache.spark.serializer.JavaSerializer")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.hash.HashShuffleManager")
+
+    sc = new SparkContext("local", "test", conf)
+
+    val shuffleBlockManager =
+      SparkEnv.get.shuffleManager.shuffleBlockManager.asInstanceOf[FileShuffleBlockManager]
+
+    val shuffle1 = shuffleBlockManager.forMapTask(1, 1, 1, new JavaSerializer(conf),
+      new ShuffleWriteMetrics)
+    for (writer <- shuffle1.writers) {
+      writer.write("test1")
+      writer.write("test2")
+    }
+    for (writer <- shuffle1.writers) {
+      writer.commitAndClose()
+    }
+
+    val shuffle1Segment = shuffle1.writers(0).fileSegment()
+    shuffle1.releaseWriters(success = true)
+
+    val shuffle2 = shuffleBlockManager.forMapTask(1, 2, 1, new JavaSerializer(conf),
+      new ShuffleWriteMetrics)
+
+    for (writer <- shuffle2.writers) {
+      writer.write("test3")
+      writer.write("test4")
+    }
+    for (writer <- shuffle2.writers) {
+      writer.commitAndClose()
+    }
+    val shuffle2Segment = shuffle2.writers(0).fileSegment()
+    shuffle2.releaseWriters(success = true)
+
+    // Now comes the test :
+    // Write to shuffle 3; and close it, but before registering it, check if the file lengths for
+    // previous task (forof shuffle1) is the same as 'segments'. Earlier, we were inferring length
+    // of block based on remaining data in file : which could mess things up when there is concurrent read
+    // and writes happening to the same shuffle group.
+
+    val shuffle3 = shuffleBlockManager.forMapTask(1, 3, 1, new JavaSerializer(testConf),
+      new ShuffleWriteMetrics)
+    for (writer <- shuffle3.writers) {
+      writer.write("test3")
+      writer.write("test4")
+    }
+    for (writer <- shuffle3.writers) {
+      writer.commitAndClose()
+    }
+    // check before we register.
+    checkSegments(shuffle2Segment, shuffleBlockManager.getBlockData(ShuffleBlockId(1, 2, 0)))
+    shuffle3.releaseWriters(success = true)
+    checkSegments(shuffle2Segment, shuffleBlockManager.getBlockData(ShuffleBlockId(1, 2, 0)))
+    shuffleBlockManager.removeShuffle(1)
+  }
+
+  def writeToFile(file: File, numBytes: Int) {
+    val writer = new FileWriter(file, true)
+    for (i <- 0 until numBytes) writer.write(i)
+    writer.close()
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
new file mode 100644
index 0000000000000..c2903c8597997
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -0,0 +1,422 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
+import scala.language.implicitConversions
+import scala.language.postfixOps
+
+import akka.actor.{ActorSystem, Props}
+import org.mockito.Mockito.{mock, when}
+import org.scalatest.{BeforeAndAfter, FunSuite, Matchers, PrivateMethodTester}
+import org.scalatest.concurrent.Eventually._
+
+import org.apache.spark.{MapOutputTrackerMaster, SparkConf, SparkContext, SecurityManager}
+import org.apache.spark.network.BlockTransferService
+import org.apache.spark.network.nio.NioBlockTransferService
+import org.apache.spark.scheduler.LiveListenerBus
+import org.apache.spark.serializer.KryoSerializer
+import org.apache.spark.shuffle.hash.HashShuffleManager
+import org.apache.spark.storage.StorageLevel._
+import org.apache.spark.util.{AkkaUtils, SizeEstimator}
+
+/** Testsuite that tests block replication in BlockManager */
+class BlockManagerReplicationSuite extends FunSuite with Matchers with BeforeAndAfter {
+
+  private val conf = new SparkConf(false)
+  var actorSystem: ActorSystem = null
+  var master: BlockManagerMaster = null
+  val securityMgr = new SecurityManager(conf)
+  val mapOutputTracker = new MapOutputTrackerMaster(conf)
+  val shuffleManager = new HashShuffleManager(conf)
+
+  // List of block manager created during an unit test, so that all of the them can be stopped
+  // after the unit test.
+  val allStores = new ArrayBuffer[BlockManager]
+
+  // Reuse a serializer across tests to avoid creating a new thread-local buffer on each test
+  conf.set("spark.kryoserializer.buffer.mb", "1")
+  val serializer = new KryoSerializer(conf)
+
+  // Implicitly convert strings to BlockIds for test clarity.
+  implicit def StringToBlockId(value: String): BlockId = new TestBlockId(value)
+
+  private def makeBlockManager(
+      maxMem: Long,
+      name: String = SparkContext.DRIVER_IDENTIFIER): BlockManager = {
+    val transfer = new NioBlockTransferService(conf, securityMgr)
+    val store = new BlockManager(name, actorSystem, master, serializer, maxMem, conf,
+      mapOutputTracker, shuffleManager, transfer, securityMgr, 0)
+    store.initialize("app-id")
+    allStores += store
+    store
+  }
+
+  before {
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem(
+      "test", "localhost", 0, conf = conf, securityManager = securityMgr)
+    this.actorSystem = actorSystem
+
+    conf.set("spark.authenticate", "false")
+    conf.set("spark.driver.port", boundPort.toString)
+    conf.set("spark.storage.unrollFraction", "0.4")
+    conf.set("spark.storage.unrollMemoryThreshold", "512")
+
+    // to make a replication attempt to inactive store fail fast
+    conf.set("spark.core.connection.ack.wait.timeout", "1")
+    // to make cached peers refresh frequently
+    conf.set("spark.storage.cachedPeersTtl", "10")
+
+    master = new BlockManagerMaster(
+      actorSystem.actorOf(Props(new BlockManagerMasterActor(true, conf, new LiveListenerBus))),
+      conf, true)
+    allStores.clear()
+  }
+
+  after {
+    allStores.foreach { _.stop() }
+    allStores.clear()
+    actorSystem.shutdown()
+    actorSystem.awaitTermination()
+    actorSystem = null
+    master = null
+  }
+
+
+  test("get peers with addition and removal of block managers") {
+    val numStores = 4
+    val stores = (1 to numStores - 1).map { i => makeBlockManager(1000, s"store$i") }
+    val storeIds = stores.map { _.blockManagerId }.toSet
+    assert(master.getPeers(stores(0).blockManagerId).toSet ===
+      storeIds.filterNot { _ == stores(0).blockManagerId })
+    assert(master.getPeers(stores(1).blockManagerId).toSet ===
+      storeIds.filterNot { _ == stores(1).blockManagerId })
+    assert(master.getPeers(stores(2).blockManagerId).toSet ===
+      storeIds.filterNot { _ == stores(2).blockManagerId })
+
+    // Add driver store and test whether it is filtered out
+    val driverStore = makeBlockManager(1000, SparkContext.DRIVER_IDENTIFIER)
+    assert(master.getPeers(stores(0).blockManagerId).forall(!_.isDriver))
+    assert(master.getPeers(stores(1).blockManagerId).forall(!_.isDriver))
+    assert(master.getPeers(stores(2).blockManagerId).forall(!_.isDriver))
+
+    // Add a new store and test whether get peers returns it
+    val newStore = makeBlockManager(1000, s"store$numStores")
+    assert(master.getPeers(stores(0).blockManagerId).toSet ===
+      storeIds.filterNot { _ == stores(0).blockManagerId } + newStore.blockManagerId)
+    assert(master.getPeers(stores(1).blockManagerId).toSet ===
+      storeIds.filterNot { _ == stores(1).blockManagerId } + newStore.blockManagerId)
+    assert(master.getPeers(stores(2).blockManagerId).toSet ===
+      storeIds.filterNot { _ == stores(2).blockManagerId } + newStore.blockManagerId)
+    assert(master.getPeers(newStore.blockManagerId).toSet === storeIds)
+
+    // Remove a store and test whether get peers returns it
+    val storeIdToRemove = stores(0).blockManagerId
+    master.removeExecutor(storeIdToRemove.executorId)
+    assert(!master.getPeers(stores(1).blockManagerId).contains(storeIdToRemove))
+    assert(!master.getPeers(stores(2).blockManagerId).contains(storeIdToRemove))
+    assert(!master.getPeers(newStore.blockManagerId).contains(storeIdToRemove))
+
+    // Test whether asking for peers of a unregistered block manager id returns empty list
+    assert(master.getPeers(stores(0).blockManagerId).isEmpty)
+    assert(master.getPeers(BlockManagerId("", "", 1)).isEmpty)
+  }
+
+
+  test("block replication - 2x replication") {
+    testReplication(2,
+      Seq(MEMORY_ONLY, MEMORY_ONLY_SER, DISK_ONLY, MEMORY_AND_DISK_2, MEMORY_AND_DISK_SER_2)
+    )
+  }
+
+  test("block replication - 3x replication") {
+    // Generate storage levels with 3x replication
+    val storageLevels = {
+      Seq(MEMORY_ONLY, MEMORY_ONLY_SER, DISK_ONLY, MEMORY_AND_DISK, MEMORY_AND_DISK_SER).map {
+        level => StorageLevel(
+          level.useDisk, level.useMemory, level.useOffHeap, level.deserialized, 3)
+      }
+    }
+    testReplication(3, storageLevels)
+  }
+
+  test("block replication - mixed between 1x to 5x") {
+    // Generate storage levels with varying replication
+    val storageLevels = Seq(
+      MEMORY_ONLY,
+      MEMORY_ONLY_SER_2,
+      StorageLevel(true, false, false, false, 3),
+      StorageLevel(true, true, false, true, 4),
+      StorageLevel(true, true, false, false, 5),
+      StorageLevel(true, true, false, true, 4),
+      StorageLevel(true, false, false, false, 3),
+      MEMORY_ONLY_SER_2,
+      MEMORY_ONLY
+    )
+    testReplication(5, storageLevels)
+  }
+
+  test("block replication - 2x replication without peers") {
+    intercept[org.scalatest.exceptions.TestFailedException] {
+      testReplication(1,
+        Seq(StorageLevel.MEMORY_AND_DISK_2, StorageLevel(true, false, false, false, 3)))
+    }
+  }
+
+  test("block replication - deterministic node selection") {
+    val blockSize = 1000
+    val storeSize = 10000
+    val stores = (1 to 5).map {
+      i => makeBlockManager(storeSize, s"store$i")
+    }
+    val storageLevel2x = StorageLevel.MEMORY_AND_DISK_2
+    val storageLevel3x = StorageLevel(true, true, false, true, 3)
+    val storageLevel4x = StorageLevel(true, true, false, true, 4)
+
+    def putBlockAndGetLocations(blockId: String, level: StorageLevel): Set[BlockManagerId] = {
+      stores.head.putSingle(blockId, new Array[Byte](blockSize), level)
+      val locations = master.getLocations(blockId).sortBy { _.executorId }.toSet
+      stores.foreach { _.removeBlock(blockId) }
+      master.removeBlock(blockId)
+      locations
+    }
+
+    // Test if two attempts to 2x replication returns same set of locations
+    val a1Locs = putBlockAndGetLocations("a1", storageLevel2x)
+    assert(putBlockAndGetLocations("a1", storageLevel2x) === a1Locs,
+      "Inserting a 2x replicated block second time gave different locations from the first")
+
+    // Test if two attempts to 3x replication returns same set of locations
+    val a2Locs3x = putBlockAndGetLocations("a2", storageLevel3x)
+    assert(putBlockAndGetLocations("a2", storageLevel3x) === a2Locs3x,
+      "Inserting a 3x replicated block second time gave different locations from the first")
+
+    // Test if 2x replication of a2 returns a strict subset of the locations of 3x replication
+    val a2Locs2x = putBlockAndGetLocations("a2", storageLevel2x)
+    assert(
+      a2Locs2x.subsetOf(a2Locs3x),
+      "Inserting a with 2x replication gave locations that are not a subset of locations" +
+        s" with 3x replication [3x: ${a2Locs3x.mkString(",")}; 2x: ${a2Locs2x.mkString(",")}"
+    )
+
+    // Test if 4x replication of a2 returns a strict superset of the locations of 3x replication
+    val a2Locs4x = putBlockAndGetLocations("a2", storageLevel4x)
+    assert(
+      a2Locs3x.subsetOf(a2Locs4x),
+      "Inserting a with 4x replication gave locations that are not a superset of locations " +
+        s"with 3x replication [3x: ${a2Locs3x.mkString(",")}; 4x: ${a2Locs4x.mkString(",")}"
+    )
+
+    // Test if 3x replication of two different blocks gives two different sets of locations
+    val a3Locs3x = putBlockAndGetLocations("a3", storageLevel3x)
+    assert(a3Locs3x !== a2Locs3x, "Two blocks gave same locations with 3x replication")
+  }
+
+  test("block replication - replication failures") {
+    /*
+      Create a system of three block managers / stores. One of them (say, failableStore)
+      cannot receive blocks. So attempts to use that as replication target fails.
+
+            +-----------/fails/-----------> failableStore
+            |
+        normalStore
+            |
+            +-----------/works/-----------> anotherNormalStore
+
+        We are first going to add a normal block manager (i.e. normalStore) and the failable block
+        manager (i.e. failableStore), and test whether 2x replication fails to create two
+        copies of a block. Then we are going to add another normal block manager
+        (i.e., anotherNormalStore), and test that now 2x replication works as the
+        new store will be used for replication.
+     */
+
+    // Add a normal block manager
+    val store = makeBlockManager(10000, "store")
+
+    // Insert a block with 2x replication and return the number of copies of the block
+    def replicateAndGetNumCopies(blockId: String): Int = {
+      store.putSingle(blockId, new Array[Byte](1000), StorageLevel.MEMORY_AND_DISK_2)
+      val numLocations = master.getLocations(blockId).size
+      allStores.foreach { _.removeBlock(blockId) }
+      numLocations
+    }
+
+    // Add a failable block manager with a mock transfer service that does not
+    // allow receiving of blocks. So attempts to use it as a replication target will fail.
+    val failableTransfer = mock(classOf[BlockTransferService]) // this wont actually work
+    when(failableTransfer.hostName).thenReturn("some-hostname")
+    when(failableTransfer.port).thenReturn(1000)
+    val failableStore = new BlockManager("failable-store", actorSystem, master, serializer,
+      10000, conf, mapOutputTracker, shuffleManager, failableTransfer, securityMgr, 0)
+    failableStore.initialize("app-id")
+    allStores += failableStore // so that this gets stopped after test
+    assert(master.getPeers(store.blockManagerId).toSet === Set(failableStore.blockManagerId))
+
+    // Test that 2x replication fails by creating only one copy of the block
+    assert(replicateAndGetNumCopies("a1") === 1)
+
+    // Add another normal block manager and test that 2x replication works
+    makeBlockManager(10000, "anotherStore")
+    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+      assert(replicateAndGetNumCopies("a2") === 2)
+    }
+  }
+
+  test("block replication - addition and deletion of block managers") {
+    val blockSize = 1000
+    val storeSize = 10000
+    val initialStores = (1 to 2).map { i => makeBlockManager(storeSize, s"store$i") }
+
+    // Insert a block with given replication factor and return the number of copies of the block\
+    def replicateAndGetNumCopies(blockId: String, replicationFactor: Int): Int = {
+      val storageLevel = StorageLevel(true, true, false, true, replicationFactor)
+      initialStores.head.putSingle(blockId, new Array[Byte](blockSize), storageLevel)
+      val numLocations = master.getLocations(blockId).size
+      allStores.foreach { _.removeBlock(blockId) }
+      numLocations
+    }
+
+    // 2x replication should work, 3x replication should only replicate 2x
+    assert(replicateAndGetNumCopies("a1", 2) === 2)
+    assert(replicateAndGetNumCopies("a2", 3) === 2)
+
+    // Add another store, 3x replication should work now, 4x replication should only replicate 3x
+    val newStore1 = makeBlockManager(storeSize, s"newstore1")
+    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+      assert(replicateAndGetNumCopies("a3", 3) === 3)
+    }
+    assert(replicateAndGetNumCopies("a4", 4) === 3)
+
+    // Add another store, 4x replication should work now
+    val newStore2 = makeBlockManager(storeSize, s"newstore2")
+    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+      assert(replicateAndGetNumCopies("a5", 4) === 4)
+    }
+
+    // Remove all but the 1st store, 2x replication should fail
+    (initialStores.tail ++ Seq(newStore1, newStore2)).foreach {
+      store =>
+        master.removeExecutor(store.blockManagerId.executorId)
+        store.stop()
+    }
+    assert(replicateAndGetNumCopies("a6", 2) === 1)
+
+    // Add new stores, 3x replication should work
+    val newStores = (3 to 5).map {
+      i => makeBlockManager(storeSize, s"newstore$i")
+    }
+    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+      assert(replicateAndGetNumCopies("a7", 3) === 3)
+    }
+  }
+
+  /**
+   * Test replication of blocks with different storage levels (various combinations of
+   * memory, disk & serialization). For each storage level, this function tests every store
+   * whether the block is present and also tests the master whether its knowledge of blocks
+   * is correct. Then it also drops the block from memory of each store (using LRU) and
+   * again checks whether the master's knowledge gets updated.
+   */
+  private def testReplication(maxReplication: Int, storageLevels: Seq[StorageLevel]) {
+    import org.apache.spark.storage.StorageLevel._
+
+    assert(maxReplication > 1,
+      s"Cannot test replication factor $maxReplication")
+
+    // storage levels to test with the given replication factor
+
+    val storeSize = 10000
+    val blockSize = 1000
+
+    // As many stores as the replication factor
+    val stores = (1 to maxReplication).map {
+      i => makeBlockManager(storeSize, s"store$i")
+    }
+
+    storageLevels.foreach { storageLevel =>
+      // Put the block into one of the stores
+      val blockId = new TestBlockId(
+        "block-with-" + storageLevel.description.replace(" ", "-").toLowerCase)
+      stores(0).putSingle(blockId, new Array[Byte](blockSize), storageLevel)
+
+      // Assert that master know two locations for the block
+      val blockLocations = master.getLocations(blockId).map(_.executorId).toSet
+      assert(blockLocations.size === storageLevel.replication,
+        s"master did not have ${storageLevel.replication} locations for $blockId")
+
+      // Test state of the stores that contain the block
+      stores.filter {
+        testStore => blockLocations.contains(testStore.blockManagerId.executorId)
+      }.foreach { testStore =>
+        val testStoreName = testStore.blockManagerId.executorId
+        assert(testStore.getLocal(blockId).isDefined, s"$blockId was not found in $testStoreName")
+        assert(master.getLocations(blockId).map(_.executorId).toSet.contains(testStoreName),
+          s"master does not have status for ${blockId.name} in $testStoreName")
+
+        val blockStatus = master.getBlockStatus(blockId)(testStore.blockManagerId)
+
+        // Assert that block status in the master for this store has expected storage level
+        assert(
+          blockStatus.storageLevel.useDisk === storageLevel.useDisk &&
+            blockStatus.storageLevel.useMemory === storageLevel.useMemory &&
+            blockStatus.storageLevel.useOffHeap === storageLevel.useOffHeap &&
+            blockStatus.storageLevel.deserialized === storageLevel.deserialized,
+          s"master does not know correct storage level for ${blockId.name} in $testStoreName")
+
+        // Assert that the block status in the master for this store has correct memory usage info
+        assert(!blockStatus.storageLevel.useMemory || blockStatus.memSize >= blockSize,
+          s"master does not know size of ${blockId.name} stored in memory of $testStoreName")
+
+
+        // If the block is supposed to be in memory, then drop the copy of the block in
+        // this store test whether master is updated with zero memory usage this store
+        if (storageLevel.useMemory) {
+          // Force the block to be dropped by adding a number of dummy blocks
+          (1 to 10).foreach {
+            i =>
+              testStore.putSingle(s"dummy-block-$i", new Array[Byte](1000), MEMORY_ONLY_SER)
+          }
+          (1 to 10).foreach {
+            i => testStore.removeBlock(s"dummy-block-$i")
+          }
+
+          val newBlockStatusOption = master.getBlockStatus(blockId).get(testStore.blockManagerId)
+
+          // Assert that the block status in the master either does not exist (block removed
+          // from every store) or has zero memory usage for this store
+          assert(
+            newBlockStatusOption.isEmpty || newBlockStatusOption.get.memSize === 0,
+            s"after dropping, master does not know size of ${blockId.name} " +
+              s"stored in memory of $testStoreName"
+          )
+        }
+
+        // If the block is supposed to be in disk (after dropping or otherwise, then
+        // test whether master has correct disk usage for this store
+        if (storageLevel.useDisk) {
+          assert(master.getBlockStatus(blockId)(testStore.blockManagerId).diskSize >= blockSize,
+            s"after dropping, master does not know size of ${blockId.name} " +
+              s"stored in disk of $testStoreName"
+          )
+        }
+      }
+      master.removeBlock(blockId)
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 23cb6905bfdeb..5554efbcbadf8 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -19,30 +19,37 @@ package org.apache.spark.storage
 
 import java.nio.{ByteBuffer, MappedByteBuffer}
 import java.util.Arrays
+import java.util.concurrent.TimeUnit
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.Await
+import scala.concurrent.duration._
+import scala.language.implicitConversions
+import scala.language.postfixOps
 
 import akka.actor._
-import org.apache.spark.SparkConf
-import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
-import org.apache.spark.util.{AkkaUtils, ByteBufferInputStream, SizeEstimator, Utils}
+import akka.pattern.ask
+import akka.util.Timeout
+
 import org.mockito.Mockito.{mock, when}
-import org.scalatest.{BeforeAndAfter, FunSuite, PrivateMethodTester}
+
+import org.scalatest.{BeforeAndAfter, FunSuite, Matchers, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.concurrent.Timeouts._
-import org.scalatest.Matchers
-import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.{MapOutputTrackerMaster, SecurityManager, SparkConf, SparkContext}
+import org.apache.spark.{MapOutputTrackerMaster, SparkConf, SparkContext, SecurityManager}
 import org.apache.spark.executor.DataReadMethod
+import org.apache.spark.network.nio.NioBlockTransferService
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
+import org.apache.spark.shuffle.hash.HashShuffleManager
+import org.apache.spark.storage.BlockManagerMessages.BlockManagerHeartbeat
 import org.apache.spark.util.{AkkaUtils, ByteBufferInputStream, SizeEstimator, Utils}
 
-import scala.collection.mutable.ArrayBuffer
-import scala.language.implicitConversions
-import scala.language.postfixOps
 
 class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   with PrivateMethodTester {
+
   private val conf = new SparkConf(false)
   var store: BlockManager = null
   var store2: BlockManager = null
@@ -52,6 +59,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   conf.set("spark.authenticate", "false")
   val securityMgr = new SecurityManager(conf)
   val mapOutputTracker = new MapOutputTrackerMaster(conf)
+  val shuffleManager = new HashShuffleManager(conf)
 
   // Reuse a serializer across tests to avoid creating a new thread-local buffer on each test
   conf.set("spark.kryoserializer.buffer.mb", "1")
@@ -61,21 +69,33 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   implicit def StringToBlockId(value: String): BlockId = new TestBlockId(value)
   def rdd(rddId: Int, splitId: Int) = RDDBlockId(rddId, splitId)
 
+  private def makeBlockManager(
+      maxMem: Long,
+      name: String = SparkContext.DRIVER_IDENTIFIER): BlockManager = {
+    val transfer = new NioBlockTransferService(conf, securityMgr)
+    val manager = new BlockManager(name, actorSystem, master, serializer, maxMem, conf,
+      mapOutputTracker, shuffleManager, transfer, securityMgr, 0)
+    manager.initialize("app-id")
+    manager
+  }
+
   before {
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("test", "localhost", 0, conf = conf,
-      securityManager = securityMgr)
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem(
+      "test", "localhost", 0, conf = conf, securityManager = securityMgr)
     this.actorSystem = actorSystem
-    conf.set("spark.driver.port", boundPort.toString)
-
-    master = new BlockManagerMaster(
-      actorSystem.actorOf(Props(new BlockManagerMasterActor(true, conf, new LiveListenerBus))),
-      conf)
 
     // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
     oldArch = System.setProperty("os.arch", "amd64")
     conf.set("os.arch", "amd64")
     conf.set("spark.test.useCompressedOops", "true")
-    conf.set("spark.storage.disableBlockManagerHeartBeat", "true")
+    conf.set("spark.driver.port", boundPort.toString)
+    conf.set("spark.storage.unrollFraction", "0.4")
+    conf.set("spark.storage.unrollMemoryThreshold", "512")
+
+    master = new BlockManagerMaster(
+      actorSystem.actorOf(Props(new BlockManagerMasterActor(true, conf, new LiveListenerBus))),
+      conf, true)
+
     val initialize = PrivateMethod[Unit]('initialize)
     SizeEstimator invokePrivate initialize()
   }
@@ -121,9 +141,9 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("BlockManagerId object caching") {
-    val id1 = BlockManagerId("e1", "XXX", 1, 0)
-    val id2 = BlockManagerId("e1", "XXX", 1, 0) // this should return the same object as id1
-    val id3 = BlockManagerId("e1", "XXX", 2, 0) // this should return a different object
+    val id1 = BlockManagerId("e1", "XXX", 1)
+    val id2 = BlockManagerId("e1", "XXX", 1) // this should return the same object as id1
+    val id3 = BlockManagerId("e1", "XXX", 2) // this should return a different object
     assert(id2 === id1, "id2 is not same as id1")
     assert(id2.eq(id1), "id2 is not the same object as id1")
     assert(id3 != id1, "id3 is same as id1")
@@ -138,11 +158,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("master + 1 manager interaction") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf,
-      securityMgr, mapOutputTracker)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
+    store = makeBlockManager(20000)
+    val a1 = new Array[Byte](4000)
+    val a2 = new Array[Byte](4000)
+    val a3 = new Array[Byte](4000)
 
     // Putting a1, a2  and a3 in memory and telling master only about a1 and a2
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
@@ -169,12 +188,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("master + 2 managers interaction") {
-    store = new BlockManager("exec1", actorSystem, master, serializer, 2000, conf,
-      securityMgr, mapOutputTracker)
-    store2 = new BlockManager("exec2", actorSystem, master, new KryoSerializer(conf), 2000, conf,
-      securityMgr, mapOutputTracker)
+    store = makeBlockManager(2000, "exec1")
+    store2 = makeBlockManager(2000, "exec2")
 
-    val peers = master.getPeers(store.blockManagerId, 1)
+    val peers = master.getPeers(store.blockManagerId)
     assert(peers.size === 1, "master did not return the other manager as a peer")
     assert(peers.head === store2.blockManagerId, "peer returned by master is not the other manager")
 
@@ -187,11 +204,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("removing block") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf,
-      securityMgr, mapOutputTracker)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
+    store = makeBlockManager(20000)
+    val a1 = new Array[Byte](4000)
+    val a2 = new Array[Byte](4000)
+    val a3 = new Array[Byte](4000)
 
     // Putting a1, a2 and a3 in memory and telling master only about a1 and a2
     store.putSingle("a1-to-remove", a1, StorageLevel.MEMORY_ONLY)
@@ -200,8 +216,8 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
 
     // Checking whether blocks are in memory and memory size
     val memStatus = master.getMemoryStatus.head._2
-    assert(memStatus._1 == 2000L, "total memory " + memStatus._1 + " should equal 2000")
-    assert(memStatus._2 <= 1200L, "remaining memory " + memStatus._2 + " should <= 1200")
+    assert(memStatus._1 == 20000L, "total memory " + memStatus._1 + " should equal 20000")
+    assert(memStatus._2 <= 12000L, "remaining memory " + memStatus._2 + " should <= 12000")
     assert(store.getSingle("a1-to-remove").isDefined, "a1 was not in store")
     assert(store.getSingle("a2-to-remove").isDefined, "a2 was not in store")
     assert(store.getSingle("a3-to-remove").isDefined, "a3 was not in store")
@@ -230,17 +246,16 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     }
     eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
       val memStatus = master.getMemoryStatus.head._2
-      memStatus._1 should equal (2000L)
-      memStatus._2 should equal (2000L)
+      memStatus._1 should equal (20000L)
+      memStatus._2 should equal (20000L)
     }
   }
 
   test("removing rdd") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf,
-      securityMgr, mapOutputTracker)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
+    store = makeBlockManager(20000)
+    val a1 = new Array[Byte](4000)
+    val a2 = new Array[Byte](4000)
+    val a3 = new Array[Byte](4000)
     // Putting a1, a2 and a3 in memory.
     store.putSingle(rdd(0, 0), a1, StorageLevel.MEMORY_ONLY)
     store.putSingle(rdd(0, 1), a2, StorageLevel.MEMORY_ONLY)
@@ -270,11 +285,9 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("removing broadcast") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf,
-      securityMgr, mapOutputTracker)
+    store = makeBlockManager(2000)
     val driverStore = store
-    val executorStore = new BlockManager("executor", actorSystem, master, serializer, 2000, conf,
-      securityMgr, mapOutputTracker)
+    val executorStore = makeBlockManager(2000, "executor")
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -342,9 +355,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("reregistration on heart beat") {
-    val heartBeat = PrivateMethod[Unit]('heartBeat)
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf,
-      securityMgr, mapOutputTracker)
+    store = makeBlockManager(2000)
     val a1 = new Array[Byte](400)
 
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
@@ -355,13 +366,15 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     master.removeExecutor(store.blockManagerId.executorId)
     assert(master.getLocations("a1").size == 0, "a1 was not removed from master")
 
-    store invokePrivate heartBeat()
-    assert(master.getLocations("a1").size > 0, "a1 was not reregistered with master")
+    implicit val timeout = Timeout(30, TimeUnit.SECONDS)
+    val reregister = !Await.result(
+      master.driverActor ? BlockManagerHeartbeat(store.blockManagerId),
+      timeout.duration).asInstanceOf[Boolean]
+    assert(reregister == true)
   }
 
   test("reregistration on block update") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf,
-      securityMgr, mapOutputTracker)
+    store = makeBlockManager(2000)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
 
@@ -379,9 +392,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("reregistration doesn't dead lock") {
-    val heartBeat = PrivateMethod[Unit]('heartBeat)
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf,
-      securityMgr, mapOutputTracker)
+    store = makeBlockManager(2000)
     val a1 = new Array[Byte](400)
     val a2 = List(new Array[Byte](400))
 
@@ -390,7 +401,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
       master.removeExecutor(store.blockManagerId.executorId)
       val t1 = new Thread {
         override def run() {
-          store.put("a2", a2.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+          store.putIterator("a2", a2.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
         }
       }
       val t2 = new Thread {
@@ -400,7 +411,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
       }
       val t3 = new Thread {
         override def run() {
-          store invokePrivate heartBeat()
+          store.reregister()
         }
       }
 
@@ -418,19 +429,14 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("correct BlockResult returned from get() calls") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf, securityMgr,
-      mapOutputTracker)
-    val list1 = List(new Array[Byte](200), new Array[Byte](200))
-    val list1ForSizeEstimate = new ArrayBuffer[Any]
-    list1ForSizeEstimate ++= list1.iterator
-    val list1SizeEstimate = SizeEstimator.estimate(list1ForSizeEstimate)
-    val list2 = List(new Array[Byte](50), new Array[Byte](100), new Array[Byte](150))
-    val list2ForSizeEstimate = new ArrayBuffer[Any]
-    list2ForSizeEstimate ++= list2.iterator
-    val list2SizeEstimate = SizeEstimator.estimate(list2ForSizeEstimate)
-    store.put("list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-    store.put("list2memory", list2.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-    store.put("list2disk", list2.iterator, StorageLevel.DISK_ONLY, tellMaster = true)
+    store = makeBlockManager(12000)
+    val list1 = List(new Array[Byte](2000), new Array[Byte](2000))
+    val list2 = List(new Array[Byte](500), new Array[Byte](1000), new Array[Byte](1500))
+    val list1SizeEstimate = SizeEstimator.estimate(list1.iterator.toArray)
+    val list2SizeEstimate = SizeEstimator.estimate(list2.iterator.toArray)
+    store.putIterator("list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+    store.putIterator("list2memory", list2.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+    store.putIterator("list2disk", list2.iterator, StorageLevel.DISK_ONLY, tellMaster = true)
     val list1Get = store.get("list1")
     assert(list1Get.isDefined, "list1 expected to be in store")
     assert(list1Get.get.data.size === 2)
@@ -444,18 +450,16 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     val list2DiskGet = store.get("list2disk")
     assert(list2DiskGet.isDefined, "list2memory expected to be in store")
     assert(list2DiskGet.get.data.size === 3)
-    System.out.println(list2DiskGet)
     // We don't know the exact size of the data on disk, but it should certainly be > 0.
     assert(list2DiskGet.get.inputMetrics.bytesRead > 0)
     assert(list2DiskGet.get.inputMetrics.readMethod === DataReadMethod.Disk)
   }
 
   test("in-memory LRU storage") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
+    store = makeBlockManager(12000)
+    val a1 = new Array[Byte](4000)
+    val a2 = new Array[Byte](4000)
+    val a3 = new Array[Byte](4000)
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
     store.putSingle("a2", a2, StorageLevel.MEMORY_ONLY)
     store.putSingle("a3", a3, StorageLevel.MEMORY_ONLY)
@@ -471,11 +475,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("in-memory LRU storage with serialization") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
+    store = makeBlockManager(12000)
+    val a1 = new Array[Byte](4000)
+    val a2 = new Array[Byte](4000)
+    val a3 = new Array[Byte](4000)
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY_SER)
     store.putSingle("a2", a2, StorageLevel.MEMORY_ONLY_SER)
     store.putSingle("a3", a3, StorageLevel.MEMORY_ONLY_SER)
@@ -491,11 +494,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("in-memory LRU for partitions of same RDD") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
+    store = makeBlockManager(12000)
+    val a1 = new Array[Byte](4000)
+    val a2 = new Array[Byte](4000)
+    val a3 = new Array[Byte](4000)
     store.putSingle(rdd(0, 1), a1, StorageLevel.MEMORY_ONLY)
     store.putSingle(rdd(0, 2), a2, StorageLevel.MEMORY_ONLY)
     store.putSingle(rdd(0, 3), a3, StorageLevel.MEMORY_ONLY)
@@ -511,11 +513,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("in-memory LRU for partitions of multiple RDDs") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    store.putSingle(rdd(0, 1), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
-    store.putSingle(rdd(0, 2), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
-    store.putSingle(rdd(1, 1), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
+    store = makeBlockManager(12000)
+    store.putSingle(rdd(0, 1), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
+    store.putSingle(rdd(0, 2), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
+    store.putSingle(rdd(1, 1), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
     // At this point rdd_1_1 should've replaced rdd_0_1
     assert(store.memoryStore.contains(rdd(1, 1)), "rdd_1_1 was not in store")
     assert(!store.memoryStore.contains(rdd(0, 1)), "rdd_0_1 was in store")
@@ -523,8 +524,8 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     // Do a get() on rdd_0_2 so that it is the most recently used item
     assert(store.getSingle(rdd(0, 2)).isDefined, "rdd_0_2 was not in store")
     // Put in more partitions from RDD 0; they should replace rdd_1_1
-    store.putSingle(rdd(0, 3), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
-    store.putSingle(rdd(0, 4), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
+    store.putSingle(rdd(0, 3), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
+    store.putSingle(rdd(0, 4), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
     // Now rdd_1_1 should be dropped to add rdd_0_3, but then rdd_0_2 should *not* be dropped
     // when we try to add rdd_0_4.
     assert(!store.memoryStore.contains(rdd(1, 1)), "rdd_1_1 was in store")
@@ -538,8 +539,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     // TODO Make the spark.test.tachyon.enable true after using tachyon 0.5.0 testing jar.
     val tachyonUnitTestEnabled = conf.getBoolean("spark.test.tachyon.enable", false)
     if (tachyonUnitTestEnabled) {
-      store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-        securityMgr, mapOutputTracker)
+      store = makeBlockManager(1200)
       val a1 = new Array[Byte](400)
       val a2 = new Array[Byte](400)
       val a3 = new Array[Byte](400)
@@ -555,8 +555,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("on-disk storage") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
+    store = makeBlockManager(1200)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -569,11 +568,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("disk and memory storage") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
+    store = makeBlockManager(12000)
+    val a1 = new Array[Byte](4000)
+    val a2 = new Array[Byte](4000)
+    val a3 = new Array[Byte](4000)
     store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK)
     store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK)
     store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK)
@@ -585,11 +583,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("disk and memory storage with getLocalBytes") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
+    store = makeBlockManager(12000)
+    val a1 = new Array[Byte](4000)
+    val a2 = new Array[Byte](4000)
+    val a3 = new Array[Byte](4000)
     store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK)
     store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK)
     store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK)
@@ -601,11 +598,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("disk and memory storage with serialization") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
+    store = makeBlockManager(12000)
+    val a1 = new Array[Byte](4000)
+    val a2 = new Array[Byte](4000)
+    val a3 = new Array[Byte](4000)
     store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK_SER)
     store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK_SER)
     store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK_SER)
@@ -617,11 +613,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("disk and memory storage with serialization and getLocalBytes") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
+    store = makeBlockManager(12000)
+    val a1 = new Array[Byte](4000)
+    val a2 = new Array[Byte](4000)
+    val a3 = new Array[Byte](4000)
     store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK_SER)
     store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK_SER)
     store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK_SER)
@@ -633,12 +628,11 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("LRU with mixed storage levels") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val a1 = new Array[Byte](400)
-    val a2 = new Array[Byte](400)
-    val a3 = new Array[Byte](400)
-    val a4 = new Array[Byte](400)
+    store = makeBlockManager(12000)
+    val a1 = new Array[Byte](4000)
+    val a2 = new Array[Byte](4000)
+    val a3 = new Array[Byte](4000)
+    val a4 = new Array[Byte](4000)
     // First store a1 and a2, both in memory, and a3, on disk only
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY_SER)
     store.putSingle("a2", a2, StorageLevel.MEMORY_ONLY_SER)
@@ -656,14 +650,13 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("in-memory LRU with streams") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val list1 = List(new Array[Byte](200), new Array[Byte](200))
-    val list2 = List(new Array[Byte](200), new Array[Byte](200))
-    val list3 = List(new Array[Byte](200), new Array[Byte](200))
-    store.put("list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-    store.put("list2", list2.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-    store.put("list3", list3.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+    store = makeBlockManager(12000)
+    val list1 = List(new Array[Byte](2000), new Array[Byte](2000))
+    val list2 = List(new Array[Byte](2000), new Array[Byte](2000))
+    val list3 = List(new Array[Byte](2000), new Array[Byte](2000))
+    store.putIterator("list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+    store.putIterator("list2", list2.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+    store.putIterator("list3", list3.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
     assert(store.get("list2").isDefined, "list2 was not in store")
     assert(store.get("list2").get.data.size === 2)
     assert(store.get("list3").isDefined, "list3 was not in store")
@@ -672,7 +665,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     assert(store.get("list2").isDefined, "list2 was not in store")
     assert(store.get("list2").get.data.size === 2)
     // At this point list2 was gotten last, so LRU will getSingle rid of list3
-    store.put("list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+    store.putIterator("list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
     assert(store.get("list1").isDefined, "list1 was not in store")
     assert(store.get("list1").get.data.size === 2)
     assert(store.get("list2").isDefined, "list2 was not in store")
@@ -681,16 +674,15 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("LRU with mixed storage levels and streams") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val list1 = List(new Array[Byte](200), new Array[Byte](200))
-    val list2 = List(new Array[Byte](200), new Array[Byte](200))
-    val list3 = List(new Array[Byte](200), new Array[Byte](200))
-    val list4 = List(new Array[Byte](200), new Array[Byte](200))
+    store = makeBlockManager(12000)
+    val list1 = List(new Array[Byte](2000), new Array[Byte](2000))
+    val list2 = List(new Array[Byte](2000), new Array[Byte](2000))
+    val list3 = List(new Array[Byte](2000), new Array[Byte](2000))
+    val list4 = List(new Array[Byte](2000), new Array[Byte](2000))
     // First store list1 and list2, both in memory, and list3, on disk only
-    store.put("list1", list1.iterator, StorageLevel.MEMORY_ONLY_SER, tellMaster = true)
-    store.put("list2", list2.iterator, StorageLevel.MEMORY_ONLY_SER, tellMaster = true)
-    store.put("list3", list3.iterator, StorageLevel.DISK_ONLY, tellMaster = true)
+    store.putIterator("list1", list1.iterator, StorageLevel.MEMORY_ONLY_SER, tellMaster = true)
+    store.putIterator("list2", list2.iterator, StorageLevel.MEMORY_ONLY_SER, tellMaster = true)
+    store.putIterator("list3", list3.iterator, StorageLevel.DISK_ONLY, tellMaster = true)
     val listForSizeEstimate = new ArrayBuffer[Any]
     listForSizeEstimate ++= list1.iterator
     val listSize = SizeEstimator.estimate(listForSizeEstimate)
@@ -708,7 +700,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     assert(store.get("list3").isDefined, "list3 was not in store")
     assert(store.get("list3").get.data.size === 2)
     // Now let's add in list4, which uses both disk and memory; list1 should drop out
-    store.put("list4", list4.iterator, StorageLevel.MEMORY_AND_DISK_SER, tellMaster = true)
+    store.putIterator("list4", list4.iterator, StorageLevel.MEMORY_AND_DISK_SER, tellMaster = true)
     assert(store.get("list1") === None, "list1 was in store")
     assert(store.get("list2").isDefined, "list2 was not in store")
     assert(store.get("list2").get.data.size === 2)
@@ -731,11 +723,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("overly large block") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 500, conf,
-      securityMgr, mapOutputTracker)
-    store.putSingle("a1", new Array[Byte](1000), StorageLevel.MEMORY_ONLY)
+    store = makeBlockManager(5000)
+    store.putSingle("a1", new Array[Byte](10000), StorageLevel.MEMORY_ONLY)
     assert(store.getSingle("a1") === None, "a1 was in store")
-    store.putSingle("a2", new Array[Byte](1000), StorageLevel.MEMORY_AND_DISK)
+    store.putSingle("a2", new Array[Byte](10000), StorageLevel.MEMORY_AND_DISK)
     assert(store.memoryStore.getValues("a2") === None, "a2 was in memory store")
     assert(store.getSingle("a2").isDefined, "a2 was not in store")
   }
@@ -743,8 +734,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   test("block compression") {
     try {
       conf.set("spark.shuffle.compress", "true")
-      store = new BlockManager("exec1", actorSystem, master, serializer, 2000, conf,
-        securityMgr, mapOutputTracker)
+      store = makeBlockManager(20000, "exec1")
       store.putSingle(ShuffleBlockId(0, 0, 0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(ShuffleBlockId(0, 0, 0)) <= 100,
         "shuffle_0_0_0 was not compressed")
@@ -752,52 +742,46 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
       store = null
 
       conf.set("spark.shuffle.compress", "false")
-      store = new BlockManager("exec2", actorSystem, master, serializer, 2000, conf,
-        securityMgr, mapOutputTracker)
-      store.putSingle(ShuffleBlockId(0, 0, 0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
-      assert(store.memoryStore.getSize(ShuffleBlockId(0, 0, 0)) >= 1000,
+      store = makeBlockManager(20000, "exec2")
+      store.putSingle(ShuffleBlockId(0, 0, 0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
+      assert(store.memoryStore.getSize(ShuffleBlockId(0, 0, 0)) >= 10000,
         "shuffle_0_0_0 was compressed")
       store.stop()
       store = null
 
       conf.set("spark.broadcast.compress", "true")
-      store = new BlockManager("exec3", actorSystem, master, serializer, 2000, conf,
-        securityMgr, mapOutputTracker)
-      store.putSingle(BroadcastBlockId(0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
-      assert(store.memoryStore.getSize(BroadcastBlockId(0)) <= 100,
+      store = makeBlockManager(20000, "exec3")
+      store.putSingle(BroadcastBlockId(0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
+      assert(store.memoryStore.getSize(BroadcastBlockId(0)) <= 1000,
         "broadcast_0 was not compressed")
       store.stop()
       store = null
 
       conf.set("spark.broadcast.compress", "false")
-      store = new BlockManager("exec4", actorSystem, master, serializer, 2000, conf,
-        securityMgr, mapOutputTracker)
-      store.putSingle(BroadcastBlockId(0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
-      assert(store.memoryStore.getSize(BroadcastBlockId(0)) >= 1000, "broadcast_0 was compressed")
+      store = makeBlockManager(20000, "exec4")
+      store.putSingle(BroadcastBlockId(0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
+      assert(store.memoryStore.getSize(BroadcastBlockId(0)) >= 10000, "broadcast_0 was compressed")
       store.stop()
       store = null
 
       conf.set("spark.rdd.compress", "true")
-      store = new BlockManager("exec5", actorSystem, master, serializer, 2000, conf,
-        securityMgr, mapOutputTracker)
-      store.putSingle(rdd(0, 0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
-      assert(store.memoryStore.getSize(rdd(0, 0)) <= 100, "rdd_0_0 was not compressed")
+      store = makeBlockManager(20000, "exec5")
+      store.putSingle(rdd(0, 0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
+      assert(store.memoryStore.getSize(rdd(0, 0)) <= 1000, "rdd_0_0 was not compressed")
       store.stop()
       store = null
 
       conf.set("spark.rdd.compress", "false")
-      store = new BlockManager("exec6", actorSystem, master, serializer, 2000, conf,
-        securityMgr, mapOutputTracker)
-      store.putSingle(rdd(0, 0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
-      assert(store.memoryStore.getSize(rdd(0, 0)) >= 1000, "rdd_0_0 was compressed")
+      store = makeBlockManager(20000, "exec6")
+      store.putSingle(rdd(0, 0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
+      assert(store.memoryStore.getSize(rdd(0, 0)) >= 10000, "rdd_0_0 was compressed")
       store.stop()
       store = null
 
       // Check that any other block types are also kept uncompressed
-      store = new BlockManager("exec7", actorSystem, master, serializer, 2000, conf,
-        securityMgr, mapOutputTracker)
-      store.putSingle("other_block", new Array[Byte](1000), StorageLevel.MEMORY_ONLY)
-      assert(store.memoryStore.getSize("other_block") >= 1000, "other_block was compressed")
+      store = makeBlockManager(20000, "exec7")
+      store.putSingle("other_block", new Array[Byte](10000), StorageLevel.MEMORY_ONLY)
+      assert(store.memoryStore.getSize("other_block") >= 10000, "other_block was compressed")
       store.stop()
       store = null
     } finally {
@@ -809,8 +793,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
 
   test("block store put failure") {
     // Use Java serializer so we can create an unserializable error.
-    store = new BlockManager("<driver>", actorSystem, master, new JavaSerializer(conf), 1200, conf,
-      securityMgr, mapOutputTracker)
+    val transfer = new NioBlockTransferService(conf, securityMgr)
+    store = new BlockManager(SparkContext.DRIVER_IDENTIFIER, actorSystem, master,
+      new JavaSerializer(conf), 1200, conf, mapOutputTracker, shuffleManager, transfer, securityMgr,
+      0)
 
     // The put should fail since a1 is not serializable.
     class UnserializableClass
@@ -840,12 +826,9 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     // be nice to refactor classes involved in disk storage in a way that
     // allows for easier testing.
     val blockManager = mock(classOf[BlockManager])
-    val shuffleBlockManager = mock(classOf[ShuffleBlockManager])
-    when(shuffleBlockManager.conf).thenReturn(conf)
-    val diskBlockManager = new DiskBlockManager(shuffleBlockManager,
-      System.getProperty("java.io.tmpdir"))
-
     when(blockManager.conf).thenReturn(conf.clone.set(confKey, 0.toString))
+    val diskBlockManager = new DiskBlockManager(blockManager, conf)
+
     val diskStoreMapped = new DiskStore(blockManager, diskBlockManager)
     diskStoreMapped.putBytes(blockId, byteBuffer, StorageLevel.DISK_ONLY)
     val mapped = diskStoreMapped.getBytes(blockId).get
@@ -871,30 +854,29 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     assert(Arrays.equals(mappedAsArray, bytes))
     assert(Arrays.equals(notMappedAsArray, bytes))
   }
-  
+
   test("updated block statuses") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val list = List.fill(2)(new Array[Byte](200))
-    val bigList = List.fill(8)(new Array[Byte](200))
+    store = makeBlockManager(12000)
+    val list = List.fill(2)(new Array[Byte](2000))
+    val bigList = List.fill(8)(new Array[Byte](2000))
 
     // 1 updated block (i.e. list1)
     val updatedBlocks1 =
-      store.put("list1", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+      store.putIterator("list1", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
     assert(updatedBlocks1.size === 1)
     assert(updatedBlocks1.head._1 === TestBlockId("list1"))
     assert(updatedBlocks1.head._2.storageLevel === StorageLevel.MEMORY_ONLY)
 
     // 1 updated block (i.e. list2)
     val updatedBlocks2 =
-      store.put("list2", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
+      store.putIterator("list2", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
     assert(updatedBlocks2.size === 1)
     assert(updatedBlocks2.head._1 === TestBlockId("list2"))
     assert(updatedBlocks2.head._2.storageLevel === StorageLevel.MEMORY_ONLY)
 
     // 2 updated blocks - list1 is kicked out of memory while list3 is added
     val updatedBlocks3 =
-      store.put("list3", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+      store.putIterator("list3", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
     assert(updatedBlocks3.size === 2)
     updatedBlocks3.foreach { case (id, status) =>
       id match {
@@ -903,11 +885,11 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
         case _ => fail("Updated block is neither list1 nor list3")
       }
     }
-    assert(store.get("list3").isDefined, "list3 was not in store")
+    assert(store.memoryStore.contains("list3"), "list3 was not in memory store")
 
     // 2 updated blocks - list2 is kicked out of memory (but put on disk) while list4 is added
     val updatedBlocks4 =
-      store.put("list4", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+      store.putIterator("list4", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
     assert(updatedBlocks4.size === 2)
     updatedBlocks4.foreach { case (id, status) =>
       id match {
@@ -916,26 +898,37 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
         case _ => fail("Updated block is neither list2 nor list4")
       }
     }
-    assert(store.get("list4").isDefined, "list4 was not in store")
+    assert(store.diskStore.contains("list2"), "list2 was not in disk store")
+    assert(store.memoryStore.contains("list4"), "list4 was not in memory store")
 
-    // No updated blocks - nothing is kicked out of memory because list5 is too big to be added
+    // No updated blocks - list5 is too big to fit in store and nothing is kicked out
     val updatedBlocks5 =
-      store.put("list5", bigList.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+      store.putIterator("list5", bigList.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
     assert(updatedBlocks5.size === 0)
-    assert(store.get("list2").isDefined, "list2 was not in store")
-    assert(store.get("list4").isDefined, "list4 was not in store")
-    assert(!store.get("list5").isDefined, "list5 was in store")
+
+    // memory store contains only list3 and list4
+    assert(!store.memoryStore.contains("list1"), "list1 was in memory store")
+    assert(!store.memoryStore.contains("list2"), "list2 was in memory store")
+    assert(store.memoryStore.contains("list3"), "list3 was not in memory store")
+    assert(store.memoryStore.contains("list4"), "list4 was not in memory store")
+    assert(!store.memoryStore.contains("list5"), "list5 was in memory store")
+
+    // disk store contains only list2
+    assert(!store.diskStore.contains("list1"), "list1 was in disk store")
+    assert(store.diskStore.contains("list2"), "list2 was not in disk store")
+    assert(!store.diskStore.contains("list3"), "list3 was in disk store")
+    assert(!store.diskStore.contains("list4"), "list4 was in disk store")
+    assert(!store.diskStore.contains("list5"), "list5 was in disk store")
   }
 
   test("query block statuses") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val list = List.fill(2)(new Array[Byte](200))
+    store = makeBlockManager(12000)
+    val list = List.fill(2)(new Array[Byte](2000))
 
     // Tell master. By LRU, only list2 and list3 remains.
-    store.put("list1", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-    store.put("list2", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
-    store.put("list3", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+    store.putIterator("list1", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+    store.putIterator("list2", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
+    store.putIterator("list3", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
 
     // getLocations and getBlockStatus should yield the same locations
     assert(store.master.getLocations("list1").size === 0)
@@ -949,9 +942,9 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     assert(store.master.getBlockStatus("list3", askSlaves = true).size === 1)
 
     // This time don't tell master and see what happens. By LRU, only list5 and list6 remains.
-    store.put("list4", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = false)
-    store.put("list5", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
-    store.put("list6", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = false)
+    store.putIterator("list4", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = false)
+    store.putIterator("list5", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
+    store.putIterator("list6", list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = false)
 
     // getLocations should return nothing because the master is not informed
     // getBlockStatus without asking slaves should have the same result
@@ -968,23 +961,22 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("get matching blocks") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    val list = List.fill(2)(new Array[Byte](10))
+    store = makeBlockManager(12000)
+    val list = List.fill(2)(new Array[Byte](100))
 
     // insert some blocks
-    store.put("list1", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
-    store.put("list2", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
-    store.put("list3", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
+    store.putIterator("list1", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
+    store.putIterator("list2", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
+    store.putIterator("list3", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
 
     // getLocations and getBlockStatus should yield the same locations
     assert(store.master.getMatchingBlockIds(_.toString.contains("list"), askSlaves = false).size === 3)
     assert(store.master.getMatchingBlockIds(_.toString.contains("list1"), askSlaves = false).size === 1)
 
     // insert some more blocks
-    store.put("newlist1", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
-    store.put("newlist2", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
-    store.put("newlist3", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
+    store.putIterator("newlist1", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = true)
+    store.putIterator("newlist2", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
+    store.putIterator("newlist3", list.iterator, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
 
     // getLocations and getBlockStatus should yield the same locations
     assert(store.master.getMatchingBlockIds(_.toString.contains("newlist"), askSlaves = false).size === 1)
@@ -992,7 +984,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
 
     val blockIds = Seq(RDDBlockId(1, 0), RDDBlockId(1, 1), RDDBlockId(2, 0))
     blockIds.foreach { blockId =>
-      store.put(blockId, list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
+      store.putIterator(blockId, list.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
     }
     val matchedBlockIds = store.master.getMatchingBlockIds(_ match {
       case RDDBlockId(1, _) => true
@@ -1002,17 +994,240 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("SPARK-1194 regression: fix the same-RDD rule for cache replacement") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
-      securityMgr, mapOutputTracker)
-    store.putSingle(rdd(0, 0), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
-    store.putSingle(rdd(1, 0), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
+    store = makeBlockManager(12000)
+    store.putSingle(rdd(0, 0), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
+    store.putSingle(rdd(1, 0), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
     // Access rdd_1_0 to ensure it's not least recently used.
     assert(store.getSingle(rdd(1, 0)).isDefined, "rdd_1_0 was not in store")
     // According to the same-RDD rule, rdd_1_0 should be replaced here.
-    store.putSingle(rdd(0, 1), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
+    store.putSingle(rdd(0, 1), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
     // rdd_1_0 should have been replaced, even it's not least recently used.
     assert(store.memoryStore.contains(rdd(0, 0)), "rdd_0_0 was not in store")
     assert(store.memoryStore.contains(rdd(0, 1)), "rdd_0_1 was not in store")
     assert(!store.memoryStore.contains(rdd(1, 0)), "rdd_1_0 was in store")
   }
+
+  test("reserve/release unroll memory") {
+    store = makeBlockManager(12000)
+    val memoryStore = store.memoryStore
+    assert(memoryStore.currentUnrollMemory === 0)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+
+    // Reserve
+    memoryStore.reserveUnrollMemoryForThisThread(100)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 100)
+    memoryStore.reserveUnrollMemoryForThisThread(200)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 300)
+    memoryStore.reserveUnrollMemoryForThisThread(500)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 800)
+    memoryStore.reserveUnrollMemoryForThisThread(1000000)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 800) // not granted
+    // Release
+    memoryStore.releaseUnrollMemoryForThisThread(100)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 700)
+    memoryStore.releaseUnrollMemoryForThisThread(100)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 600)
+    // Reserve again
+    memoryStore.reserveUnrollMemoryForThisThread(4400)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 5000)
+    memoryStore.reserveUnrollMemoryForThisThread(20000)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 5000) // not granted
+    // Release again
+    memoryStore.releaseUnrollMemoryForThisThread(1000)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 4000)
+    memoryStore.releaseUnrollMemoryForThisThread() // release all
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+  }
+
+  /**
+   * Verify the result of MemoryStore#unrollSafely is as expected.
+   */
+  private def verifyUnroll(
+      expected: Iterator[Any],
+      result: Either[Array[Any], Iterator[Any]],
+      shouldBeArray: Boolean): Unit = {
+    val actual: Iterator[Any] = result match {
+      case Left(arr: Array[Any]) =>
+        assert(shouldBeArray, "expected iterator from unroll!")
+        arr.iterator
+      case Right(it: Iterator[Any]) =>
+        assert(!shouldBeArray, "expected array from unroll!")
+        it
+      case _ =>
+        fail("unroll returned neither an iterator nor an array...")
+    }
+    expected.zip(actual).foreach { case (e, a) =>
+      assert(e === a, "unroll did not return original values!")
+    }
+  }
+
+  test("safely unroll blocks") {
+    store = makeBlockManager(12000)
+    val smallList = List.fill(40)(new Array[Byte](100))
+    val bigList = List.fill(40)(new Array[Byte](1000))
+    val memoryStore = store.memoryStore
+    val droppedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+
+    // Unroll with all the space in the world. This should succeed and return an array.
+    var unrollResult = memoryStore.unrollSafely("unroll", smallList.iterator, droppedBlocks)
+    verifyUnroll(smallList.iterator, unrollResult, shouldBeArray = true)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+
+    // Unroll with not enough space. This should succeed after kicking out someBlock1.
+    store.putIterator("someBlock1", smallList.iterator, StorageLevel.MEMORY_ONLY)
+    store.putIterator("someBlock2", smallList.iterator, StorageLevel.MEMORY_ONLY)
+    unrollResult = memoryStore.unrollSafely("unroll", smallList.iterator, droppedBlocks)
+    verifyUnroll(smallList.iterator, unrollResult, shouldBeArray = true)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+    assert(droppedBlocks.size === 1)
+    assert(droppedBlocks.head._1 === TestBlockId("someBlock1"))
+    droppedBlocks.clear()
+
+    // Unroll huge block with not enough space. Even after ensuring free space of 12000 * 0.4 =
+    // 4800 bytes, there is still not enough room to unroll this block. This returns an iterator.
+    // In the mean time, however, we kicked out someBlock2 before giving up.
+    store.putIterator("someBlock3", smallList.iterator, StorageLevel.MEMORY_ONLY)
+    unrollResult = memoryStore.unrollSafely("unroll", bigList.iterator, droppedBlocks)
+    verifyUnroll(bigList.iterator, unrollResult, shouldBeArray = false)
+    assert(memoryStore.currentUnrollMemoryForThisThread > 0) // we returned an iterator
+    assert(droppedBlocks.size === 1)
+    assert(droppedBlocks.head._1 === TestBlockId("someBlock2"))
+    droppedBlocks.clear()
+  }
+
+  test("safely unroll blocks through putIterator") {
+    store = makeBlockManager(12000)
+    val memOnly = StorageLevel.MEMORY_ONLY
+    val memoryStore = store.memoryStore
+    val smallList = List.fill(40)(new Array[Byte](100))
+    val bigList = List.fill(40)(new Array[Byte](1000))
+    def smallIterator = smallList.iterator.asInstanceOf[Iterator[Any]]
+    def bigIterator = bigList.iterator.asInstanceOf[Iterator[Any]]
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+
+    // Unroll with plenty of space. This should succeed and cache both blocks.
+    val result1 = memoryStore.putIterator("b1", smallIterator, memOnly, returnValues = true)
+    val result2 = memoryStore.putIterator("b2", smallIterator, memOnly, returnValues = true)
+    assert(memoryStore.contains("b1"))
+    assert(memoryStore.contains("b2"))
+    assert(result1.size > 0) // unroll was successful
+    assert(result2.size > 0)
+    assert(result1.data.isLeft) // unroll did not drop this block to disk
+    assert(result2.data.isLeft)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+
+    // Re-put these two blocks so block manager knows about them too. Otherwise, block manager
+    // would not know how to drop them from memory later.
+    memoryStore.remove("b1")
+    memoryStore.remove("b2")
+    store.putIterator("b1", smallIterator, memOnly)
+    store.putIterator("b2", smallIterator, memOnly)
+
+    // Unroll with not enough space. This should succeed but kick out b1 in the process.
+    val result3 = memoryStore.putIterator("b3", smallIterator, memOnly, returnValues = true)
+    assert(result3.size > 0)
+    assert(result3.data.isLeft)
+    assert(!memoryStore.contains("b1"))
+    assert(memoryStore.contains("b2"))
+    assert(memoryStore.contains("b3"))
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+    memoryStore.remove("b3")
+    store.putIterator("b3", smallIterator, memOnly)
+
+    // Unroll huge block with not enough space. This should fail and kick out b2 in the process.
+    val result4 = memoryStore.putIterator("b4", bigIterator, memOnly, returnValues = true)
+    assert(result4.size === 0) // unroll was unsuccessful
+    assert(result4.data.isLeft)
+    assert(!memoryStore.contains("b1"))
+    assert(!memoryStore.contains("b2"))
+    assert(memoryStore.contains("b3"))
+    assert(!memoryStore.contains("b4"))
+    assert(memoryStore.currentUnrollMemoryForThisThread > 0) // we returned an iterator
+  }
+
+  /**
+   * This test is essentially identical to the preceding one, except that it uses MEMORY_AND_DISK.
+   */
+  test("safely unroll blocks through putIterator (disk)") {
+    store = makeBlockManager(12000)
+    val memAndDisk = StorageLevel.MEMORY_AND_DISK
+    val memoryStore = store.memoryStore
+    val diskStore = store.diskStore
+    val smallList = List.fill(40)(new Array[Byte](100))
+    val bigList = List.fill(40)(new Array[Byte](1000))
+    def smallIterator = smallList.iterator.asInstanceOf[Iterator[Any]]
+    def bigIterator = bigList.iterator.asInstanceOf[Iterator[Any]]
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+
+    store.putIterator("b1", smallIterator, memAndDisk)
+    store.putIterator("b2", smallIterator, memAndDisk)
+
+    // Unroll with not enough space. This should succeed but kick out b1 in the process.
+    // Memory store should contain b2 and b3, while disk store should contain only b1
+    val result3 = memoryStore.putIterator("b3", smallIterator, memAndDisk, returnValues = true)
+    assert(result3.size > 0)
+    assert(!memoryStore.contains("b1"))
+    assert(memoryStore.contains("b2"))
+    assert(memoryStore.contains("b3"))
+    assert(diskStore.contains("b1"))
+    assert(!diskStore.contains("b2"))
+    assert(!diskStore.contains("b3"))
+    memoryStore.remove("b3")
+    store.putIterator("b3", smallIterator, StorageLevel.MEMORY_ONLY)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+
+    // Unroll huge block with not enough space. This should fail and drop the new block to disk
+    // directly in addition to kicking out b2 in the process. Memory store should contain only
+    // b3, while disk store should contain b1, b2 and b4.
+    val result4 = memoryStore.putIterator("b4", bigIterator, memAndDisk, returnValues = true)
+    assert(result4.size > 0)
+    assert(result4.data.isRight) // unroll returned bytes from disk
+    assert(!memoryStore.contains("b1"))
+    assert(!memoryStore.contains("b2"))
+    assert(memoryStore.contains("b3"))
+    assert(!memoryStore.contains("b4"))
+    assert(diskStore.contains("b1"))
+    assert(diskStore.contains("b2"))
+    assert(!diskStore.contains("b3"))
+    assert(diskStore.contains("b4"))
+    assert(memoryStore.currentUnrollMemoryForThisThread > 0) // we returned an iterator
+  }
+
+  test("multiple unrolls by the same thread") {
+    store = makeBlockManager(12000)
+    val memOnly = StorageLevel.MEMORY_ONLY
+    val memoryStore = store.memoryStore
+    val smallList = List.fill(40)(new Array[Byte](100))
+    def smallIterator = smallList.iterator.asInstanceOf[Iterator[Any]]
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+
+    // All unroll memory used is released because unrollSafely returned an array
+    memoryStore.putIterator("b1", smallIterator, memOnly, returnValues = true)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+    memoryStore.putIterator("b2", smallIterator, memOnly, returnValues = true)
+    assert(memoryStore.currentUnrollMemoryForThisThread === 0)
+
+    // Unroll memory is not released because unrollSafely returned an iterator
+    // that still depends on the underlying vector used in the process
+    memoryStore.putIterator("b3", smallIterator, memOnly, returnValues = true)
+    val unrollMemoryAfterB3 = memoryStore.currentUnrollMemoryForThisThread
+    assert(unrollMemoryAfterB3 > 0)
+
+    // The unroll memory owned by this thread builds on top of its value after the previous unrolls
+    memoryStore.putIterator("b4", smallIterator, memOnly, returnValues = true)
+    val unrollMemoryAfterB4 = memoryStore.currentUnrollMemoryForThisThread
+    assert(unrollMemoryAfterB4 > unrollMemoryAfterB3)
+
+    // ... but only to a certain extent (until we run out of free space to grant new unroll memory)
+    memoryStore.putIterator("b5", smallIterator, memOnly, returnValues = true)
+    val unrollMemoryAfterB5 = memoryStore.currentUnrollMemoryForThisThread
+    memoryStore.putIterator("b6", smallIterator, memOnly, returnValues = true)
+    val unrollMemoryAfterB6 = memoryStore.currentUnrollMemoryForThisThread
+    memoryStore.putIterator("b7", smallIterator, memOnly, returnValues = true)
+    val unrollMemoryAfterB7 = memoryStore.currentUnrollMemoryForThisThread
+    assert(unrollMemoryAfterB5 === unrollMemoryAfterB4)
+    assert(unrollMemoryAfterB6 === unrollMemoryAfterB4)
+    assert(unrollMemoryAfterB7 === unrollMemoryAfterB4)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockObjectWriterSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockObjectWriterSuite.scala
new file mode 100644
index 0000000000000..bbc7e1357b90d
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/storage/BlockObjectWriterSuite.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.storage
+
+import org.scalatest.FunSuite
+import java.io.File
+import org.apache.spark.executor.ShuffleWriteMetrics
+import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.SparkConf
+
+class BlockObjectWriterSuite extends FunSuite {
+  test("verify write metrics") {
+    val file = new File("somefile")
+    file.deleteOnExit()
+    val writeMetrics = new ShuffleWriteMetrics()
+    val writer = new DiskBlockObjectWriter(new TestBlockId("0"), file,
+      new JavaSerializer(new SparkConf()), 1024, os => os, true, writeMetrics)
+
+    writer.write(Long.box(20))
+    // Metrics don't update on every write
+    assert(writeMetrics.shuffleBytesWritten == 0)
+    // After 32 writes, metrics should update
+    for (i <- 0 until 32) {
+      writer.flush()
+      writer.write(Long.box(i))
+    }
+    assert(writeMetrics.shuffleBytesWritten > 0)
+    writer.commitAndClose()
+    assert(file.length() == writeMetrics.shuffleBytesWritten)
+  }
+
+  test("verify write metrics on revert") {
+    val file = new File("somefile")
+    file.deleteOnExit()
+    val writeMetrics = new ShuffleWriteMetrics()
+    val writer = new DiskBlockObjectWriter(new TestBlockId("0"), file,
+      new JavaSerializer(new SparkConf()), 1024, os => os, true, writeMetrics)
+
+    writer.write(Long.box(20))
+    // Metrics don't update on every write
+    assert(writeMetrics.shuffleBytesWritten == 0)
+    // After 32 writes, metrics should update
+    for (i <- 0 until 32) {
+      writer.flush()
+      writer.write(Long.box(i))
+    }
+    assert(writeMetrics.shuffleBytesWritten > 0)
+    writer.revertPartialWritesAndClose()
+    assert(writeMetrics.shuffleBytesWritten == 0)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
index aaa7714049732..bc5c74c126b74 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
@@ -19,10 +19,9 @@ package org.apache.spark.storage
 
 import java.io.{File, FileWriter}
 
-import scala.collection.mutable
 import scala.language.reflectiveCalls
 
-import com.google.common.io.Files
+import org.mockito.Mockito.{mock, when}
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
 
 import org.apache.spark.SparkConf
@@ -34,26 +33,15 @@ class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach with Before
   private var rootDir1: File = _
   private var rootDirs: String = _
 
-  // This suite focuses primarily on consolidation features,
-  // so we coerce consolidation if not already enabled.
-  testConf.set("spark.shuffle.consolidateFiles", "true")
-
-  val shuffleBlockManager = new ShuffleBlockManager(null) {
-    override def conf = testConf.clone
-    var idToSegmentMap = mutable.Map[ShuffleBlockId, FileSegment]()
-    override def getBlockLocation(id: ShuffleBlockId) = idToSegmentMap(id)
-  }
-
+  val blockManager = mock(classOf[BlockManager])
+  when(blockManager.conf).thenReturn(testConf)
   var diskBlockManager: DiskBlockManager = _
 
   override def beforeAll() {
     super.beforeAll()
-    rootDir0 = Files.createTempDir()
-    rootDir0.deleteOnExit()
-    rootDir1 = Files.createTempDir()
-    rootDir1.deleteOnExit()
+    rootDir0 = Utils.createTempDir()
+    rootDir1 = Utils.createTempDir()
     rootDirs = rootDir0.getAbsolutePath + "," + rootDir1.getAbsolutePath
-    println("Created root dirs: " + rootDirs)
   }
 
   override def afterAll() {
@@ -63,22 +51,19 @@ class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach with Before
   }
 
   override def beforeEach() {
-    diskBlockManager = new DiskBlockManager(shuffleBlockManager, rootDirs)
-    shuffleBlockManager.idToSegmentMap.clear()
+    val conf = testConf.clone
+    conf.set("spark.local.dir", rootDirs)
+    diskBlockManager = new DiskBlockManager(blockManager, conf)
   }
 
   override def afterEach() {
     diskBlockManager.stop()
-    shuffleBlockManager.idToSegmentMap.clear()
   }
 
   test("basic block creation") {
     val blockId = new TestBlockId("test")
-    assertSegmentEquals(blockId, blockId.name, 0, 0)
-
     val newFile = diskBlockManager.getFile(blockId)
     writeToFile(newFile, 10)
-    assertSegmentEquals(blockId, blockId.name, 0, 10)
     assert(diskBlockManager.containsBlock(blockId))
     newFile.delete()
     assert(!diskBlockManager.containsBlock(blockId))
@@ -91,43 +76,6 @@ class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach with Before
     assert(diskBlockManager.getAllBlocks.toSet === ids.toSet)
   }
 
-  test("block appending") {
-    val blockId = new TestBlockId("test")
-    val newFile = diskBlockManager.getFile(blockId)
-    writeToFile(newFile, 15)
-    assertSegmentEquals(blockId, blockId.name, 0, 15)
-    val newFile2 = diskBlockManager.getFile(blockId)
-    assert(newFile === newFile2)
-    writeToFile(newFile2, 12)
-    assertSegmentEquals(blockId, blockId.name, 0, 27)
-    newFile.delete()
-  }
-
-  test("block remapping") {
-    val filename = "test"
-    val blockId0 = new ShuffleBlockId(1, 2, 3)
-    val newFile = diskBlockManager.getFile(filename)
-    writeToFile(newFile, 15)
-    shuffleBlockManager.idToSegmentMap(blockId0) = new FileSegment(newFile, 0, 15)
-    assertSegmentEquals(blockId0, filename, 0, 15)
-
-    val blockId1 = new ShuffleBlockId(1, 2, 4)
-    val newFile2 = diskBlockManager.getFile(filename)
-    writeToFile(newFile2, 12)
-    shuffleBlockManager.idToSegmentMap(blockId1) = new FileSegment(newFile, 15, 12)
-    assertSegmentEquals(blockId1, filename, 15, 12)
-
-    assert(newFile === newFile2)
-    newFile.delete()
-  }
-
-  def assertSegmentEquals(blockId: BlockId, filename: String, offset: Int, length: Int) {
-    val segment = diskBlockManager.getBlockLocation(blockId)
-    assert(segment.file.getName === filename)
-    assert(segment.offset === offset)
-    assert(segment.length === length)
-  }
-
   def writeToFile(file: File, numBytes: Int) {
     val writer = new FileWriter(file, true)
     for (i <- 0 until numBytes) writer.write(i)
diff --git a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
new file mode 100644
index 0000000000000..dae7bf0e336de
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+import java.io.File
+
+import org.apache.spark.util.Utils
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkConf
+
+
+/**
+ * Tests for the spark.local.dir and SPARK_LOCAL_DIRS configuration options.
+ */
+class LocalDirsSuite extends FunSuite {
+
+  test("Utils.getLocalDir() returns a valid directory, even if some local dirs are missing") {
+    // Regression test for SPARK-2974
+    assert(!new File("/NONEXISTENT_DIR").exists())
+    val conf = new SparkConf(false)
+      .set("spark.local.dir", s"/NONEXISTENT_PATH,${System.getProperty("java.io.tmpdir")}")
+    assert(new File(Utils.getLocalDir(conf)).exists())
+  }
+
+  test("SPARK_LOCAL_DIRS override also affects driver") {
+    // Regression test for SPARK-2975
+    assert(!new File("/NONEXISTENT_DIR").exists())
+    // SPARK_LOCAL_DIRS is a valid directory:
+    class MySparkConf extends SparkConf(false) {
+      override def getenv(name: String) = {
+        if (name == "SPARK_LOCAL_DIRS") System.getProperty("java.io.tmpdir")
+        else super.getenv(name)
+      }
+
+      override def clone: SparkConf = {
+        new MySparkConf().setAll(settings)
+      }
+    }
+    // spark.local.dir only contains invalid directories, but that's not a problem since
+    // SPARK_LOCAL_DIRS will override it on both the driver and workers:
+    val conf = new MySparkConf().set("spark.local.dir", "/NONEXISTENT_PATH")
+    assert(new File(Utils.getLocalDir(conf)).exists())
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
new file mode 100644
index 0000000000000..1eaabb93adbed
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+import java.util.concurrent.Semaphore
+
+import scala.concurrent.future
+import scala.concurrent.ExecutionContext.Implicits.global
+
+import org.mockito.Matchers.{any, eq => meq}
+import org.mockito.Mockito._
+import org.mockito.invocation.InvocationOnMock
+import org.mockito.stubbing.Answer
+import org.scalatest.FunSuite
+
+import org.apache.spark.{SparkConf, TaskContextImpl}
+import org.apache.spark.network._
+import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.network.shuffle.BlockFetchingListener
+import org.apache.spark.serializer.TestSerializer
+
+class ShuffleBlockFetcherIteratorSuite extends FunSuite {
+  // Some of the tests are quite tricky because we are testing the cleanup behavior
+  // in the presence of faults.
+
+  /** Creates a mock [[BlockTransferService]] that returns data from the given map. */
+  private def createMockTransfer(data: Map[BlockId, ManagedBuffer]): BlockTransferService = {
+    val transfer = mock(classOf[BlockTransferService])
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+      override def answer(invocation: InvocationOnMock): Unit = {
+        val blocks = invocation.getArguments()(3).asInstanceOf[Array[String]]
+        val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+
+        for (blockId <- blocks) {
+          if (data.contains(BlockId(blockId))) {
+            listener.onBlockFetchSuccess(blockId, data(BlockId(blockId)))
+          } else {
+            listener.onBlockFetchFailure(blockId, new BlockNotFoundException(blockId))
+          }
+        }
+      }
+    })
+    transfer
+  }
+
+  private val conf = new SparkConf
+
+  test("successful 3 local reads + 2 remote reads") {
+    val blockManager = mock(classOf[BlockManager])
+    val localBmId = BlockManagerId("test-client", "test-client", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+
+    // Make sure blockManager.getBlockData would return the blocks
+    val localBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 0, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 1, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 2, 0) -> mock(classOf[ManagedBuffer]))
+    localBlocks.foreach { case (blockId, buf) =>
+      doReturn(buf).when(blockManager).getBlockData(meq(blockId))
+    }
+
+    // Make sure remote blocks would return
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val remoteBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 3, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 4, 0) -> mock(classOf[ManagedBuffer])
+    )
+
+    val transfer = createMockTransfer(remoteBlocks)
+
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
+      (localBmId, localBlocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq),
+      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)
+    )
+
+    val iterator = new ShuffleBlockFetcherIterator(
+      new TaskContextImpl(0, 0, 0),
+      transfer,
+      blockManager,
+      blocksByAddress,
+      new TestSerializer,
+      48 * 1024 * 1024)
+
+    // 3 local blocks fetched in initialization
+    verify(blockManager, times(3)).getBlockData(any())
+
+    for (i <- 0 until 5) {
+      assert(iterator.hasNext, s"iterator should have 5 elements but actually has $i elements")
+      val (blockId, subIterator) = iterator.next()
+      assert(subIterator.isSuccess,
+        s"iterator should have 5 elements defined but actually has $i elements")
+
+      // Make sure we release the buffer once the iterator is exhausted.
+      val mockBuf = localBlocks.getOrElse(blockId, remoteBlocks(blockId))
+      verify(mockBuf, times(0)).release()
+      subIterator.get.foreach(_ => Unit)  // exhaust the iterator
+      verify(mockBuf, times(1)).release()
+    }
+
+    // 3 local blocks, and 2 remote blocks
+    // (but from the same block manager so one call to fetchBlocks)
+    verify(blockManager, times(3)).getBlockData(any())
+    verify(transfer, times(1)).fetchBlocks(any(), any(), any(), any(), any())
+  }
+
+  test("release current unexhausted buffer in case the task completes early") {
+    val blockManager = mock(classOf[BlockManager])
+    val localBmId = BlockManagerId("test-client", "test-client", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+
+    // Make sure remote blocks would return
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val blocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 0, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 1, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 2, 0) -> mock(classOf[ManagedBuffer])
+    )
+
+    // Semaphore to coordinate event sequence in two different threads.
+    val sem = new Semaphore(0)
+
+    val transfer = mock(classOf[BlockTransferService])
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+      override def answer(invocation: InvocationOnMock): Unit = {
+        val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+        future {
+          // Return the first two blocks, and wait till task completion before returning the 3rd one
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 0, 0).toString, blocks(ShuffleBlockId(0, 0, 0)))
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 1, 0).toString, blocks(ShuffleBlockId(0, 1, 0)))
+          sem.acquire()
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 2, 0).toString, blocks(ShuffleBlockId(0, 2, 0)))
+        }
+      }
+    })
+
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq))
+
+    val taskContext = new TaskContextImpl(0, 0, 0)
+    val iterator = new ShuffleBlockFetcherIterator(
+      taskContext,
+      transfer,
+      blockManager,
+      blocksByAddress,
+      new TestSerializer,
+      48 * 1024 * 1024)
+
+    // Exhaust the first block, and then it should be released.
+    iterator.next()._2.get.foreach(_ => Unit)
+    verify(blocks(ShuffleBlockId(0, 0, 0)), times(1)).release()
+
+    // Get the 2nd block but do not exhaust the iterator
+    val subIter = iterator.next()._2.get
+
+    // Complete the task; then the 2nd block buffer should be exhausted
+    verify(blocks(ShuffleBlockId(0, 1, 0)), times(0)).release()
+    taskContext.markTaskCompleted()
+    verify(blocks(ShuffleBlockId(0, 1, 0)), times(1)).release()
+
+    // The 3rd block should not be retained because the iterator is already in zombie state
+    sem.release()
+    verify(blocks(ShuffleBlockId(0, 2, 0)), times(0)).retain()
+    verify(blocks(ShuffleBlockId(0, 2, 0)), times(0)).release()
+  }
+
+  test("fail all blocks if any of the remote request fails") {
+    val blockManager = mock(classOf[BlockManager])
+    val localBmId = BlockManagerId("test-client", "test-client", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+
+    // Make sure remote blocks would return
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val blocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 0, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 1, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 2, 0) -> mock(classOf[ManagedBuffer])
+    )
+
+    // Semaphore to coordinate event sequence in two different threads.
+    val sem = new Semaphore(0)
+
+    val transfer = mock(classOf[BlockTransferService])
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+      override def answer(invocation: InvocationOnMock): Unit = {
+        val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+        future {
+          // Return the first block, and then fail.
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 0, 0).toString, blocks(ShuffleBlockId(0, 0, 0)))
+          listener.onBlockFetchFailure(
+            ShuffleBlockId(0, 1, 0).toString, new BlockNotFoundException("blah"))
+          listener.onBlockFetchFailure(
+            ShuffleBlockId(0, 2, 0).toString, new BlockNotFoundException("blah"))
+          sem.release()
+        }
+      }
+    })
+
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq))
+
+    val taskContext = new TaskContextImpl(0, 0, 0)
+    val iterator = new ShuffleBlockFetcherIterator(
+      taskContext,
+      transfer,
+      blockManager,
+      blocksByAddress,
+      new TestSerializer,
+      48 * 1024 * 1024)
+
+    // Continue only after the mock calls onBlockFetchFailure
+    sem.acquire()
+
+    // The first block should be defined, and the last two are not defined (due to failure)
+    assert(iterator.next()._2.isSuccess)
+    assert(iterator.next()._2.isFailure)
+    assert(iterator.next()._2.isFailure)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala
index 2179c6dd3302e..3a45875391e29 100644
--- a/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala
@@ -26,8 +26,8 @@ import org.apache.spark.scheduler._
  * Test the behavior of StorageStatusListener in response to all relevant events.
  */
 class StorageStatusListenerSuite extends FunSuite {
-  private val bm1 = BlockManagerId("big", "dog", 1, 1)
-  private val bm2 = BlockManagerId("fat", "duck", 2, 2)
+  private val bm1 = BlockManagerId("big", "dog", 1)
+  private val bm2 = BlockManagerId("fat", "duck", 2)
   private val taskInfo1 = new TaskInfo(0, 0, 0, 0, "big", "dog", TaskLocality.ANY, false)
   private val taskInfo2 = new TaskInfo(0, 0, 0, 0, "fat", "duck", TaskLocality.ANY, false)
 
@@ -36,25 +36,25 @@ class StorageStatusListenerSuite extends FunSuite {
 
     // Block manager add
     assert(listener.executorIdToStorageStatus.size === 0)
-    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(bm1, 1000L))
+    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
     assert(listener.executorIdToStorageStatus.size === 1)
     assert(listener.executorIdToStorageStatus.get("big").isDefined)
     assert(listener.executorIdToStorageStatus("big").blockManagerId === bm1)
     assert(listener.executorIdToStorageStatus("big").maxMem === 1000L)
-    assert(listener.executorIdToStorageStatus("big").blocks.isEmpty)
-    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(bm2, 2000L))
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 0)
+    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm2, 2000L))
     assert(listener.executorIdToStorageStatus.size === 2)
     assert(listener.executorIdToStorageStatus.get("fat").isDefined)
     assert(listener.executorIdToStorageStatus("fat").blockManagerId === bm2)
     assert(listener.executorIdToStorageStatus("fat").maxMem === 2000L)
-    assert(listener.executorIdToStorageStatus("fat").blocks.isEmpty)
+    assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
 
     // Block manager remove
-    listener.onBlockManagerRemoved(SparkListenerBlockManagerRemoved(bm1))
+    listener.onBlockManagerRemoved(SparkListenerBlockManagerRemoved(1L, bm1))
     assert(listener.executorIdToStorageStatus.size === 1)
     assert(!listener.executorIdToStorageStatus.get("big").isDefined)
     assert(listener.executorIdToStorageStatus.get("fat").isDefined)
-    listener.onBlockManagerRemoved(SparkListenerBlockManagerRemoved(bm2))
+    listener.onBlockManagerRemoved(SparkListenerBlockManagerRemoved(1L, bm2))
     assert(listener.executorIdToStorageStatus.size === 0)
     assert(!listener.executorIdToStorageStatus.get("big").isDefined)
     assert(!listener.executorIdToStorageStatus.get("fat").isDefined)
@@ -62,25 +62,25 @@ class StorageStatusListenerSuite extends FunSuite {
 
   test("task end without updated blocks") {
     val listener = new StorageStatusListener
-    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(bm1, 1000L))
-    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(bm2, 2000L))
+    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
+    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm2, 2000L))
     val taskMetrics = new TaskMetrics
 
     // Task end with no updated blocks
-    assert(listener.executorIdToStorageStatus("big").blocks.isEmpty)
-    assert(listener.executorIdToStorageStatus("fat").blocks.isEmpty)
-    listener.onTaskEnd(SparkListenerTaskEnd(1, "obliteration", Success, taskInfo1, taskMetrics))
-    assert(listener.executorIdToStorageStatus("big").blocks.isEmpty)
-    assert(listener.executorIdToStorageStatus("fat").blocks.isEmpty)
-    listener.onTaskEnd(SparkListenerTaskEnd(1, "obliteration", Success, taskInfo2, taskMetrics))
-    assert(listener.executorIdToStorageStatus("big").blocks.isEmpty)
-    assert(listener.executorIdToStorageStatus("fat").blocks.isEmpty)
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 0)
+    assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
+    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo1, taskMetrics))
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 0)
+    assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
+    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo2, taskMetrics))
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 0)
+    assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
   }
 
   test("task end with updated blocks") {
     val listener = new StorageStatusListener
-    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(bm1, 1000L))
-    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(bm2, 2000L))
+    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
+    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm2, 2000L))
     val taskMetrics1 = new TaskMetrics
     val taskMetrics2 = new TaskMetrics
     val block1 = (RDDBlockId(1, 1), BlockStatus(StorageLevel.DISK_ONLY, 0L, 100L, 0L))
@@ -90,20 +90,20 @@ class StorageStatusListenerSuite extends FunSuite {
     taskMetrics2.updatedBlocks = Some(Seq(block3))
 
     // Task end with new blocks
-    assert(listener.executorIdToStorageStatus("big").blocks.isEmpty)
-    assert(listener.executorIdToStorageStatus("fat").blocks.isEmpty)
-    listener.onTaskEnd(SparkListenerTaskEnd(1, "obliteration", Success, taskInfo1, taskMetrics1))
-    assert(listener.executorIdToStorageStatus("big").blocks.size === 2)
-    assert(listener.executorIdToStorageStatus("fat").blocks.size === 0)
-    assert(listener.executorIdToStorageStatus("big").blocks.contains(RDDBlockId(1, 1)))
-    assert(listener.executorIdToStorageStatus("big").blocks.contains(RDDBlockId(1, 2)))
-    assert(listener.executorIdToStorageStatus("fat").blocks.isEmpty)
-    listener.onTaskEnd(SparkListenerTaskEnd(1, "obliteration", Success, taskInfo2, taskMetrics2))
-    assert(listener.executorIdToStorageStatus("big").blocks.size === 2)
-    assert(listener.executorIdToStorageStatus("fat").blocks.size === 1)
-    assert(listener.executorIdToStorageStatus("big").blocks.contains(RDDBlockId(1, 1)))
-    assert(listener.executorIdToStorageStatus("big").blocks.contains(RDDBlockId(1, 2)))
-    assert(listener.executorIdToStorageStatus("fat").blocks.contains(RDDBlockId(4, 0)))
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 0)
+    assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
+    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo1, taskMetrics1))
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 2)
+    assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
+    assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 1)))
+    assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 2)))
+    assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
+    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo2, taskMetrics2))
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 2)
+    assert(listener.executorIdToStorageStatus("fat").numBlocks === 1)
+    assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 1)))
+    assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 2)))
+    assert(listener.executorIdToStorageStatus("fat").containsBlock(RDDBlockId(4, 0)))
 
     // Task end with dropped blocks
     val droppedBlock1 = (RDDBlockId(1, 1), BlockStatus(StorageLevel.NONE, 0L, 0L, 0L))
@@ -111,23 +111,24 @@ class StorageStatusListenerSuite extends FunSuite {
     val droppedBlock3 = (RDDBlockId(4, 0), BlockStatus(StorageLevel.NONE, 0L, 0L, 0L))
     taskMetrics1.updatedBlocks = Some(Seq(droppedBlock1, droppedBlock3))
     taskMetrics2.updatedBlocks = Some(Seq(droppedBlock2, droppedBlock3))
-    listener.onTaskEnd(SparkListenerTaskEnd(1, "obliteration", Success, taskInfo1, taskMetrics1))
-    assert(listener.executorIdToStorageStatus("big").blocks.size === 1)
-    assert(listener.executorIdToStorageStatus("fat").blocks.size === 1)
-    assert(!listener.executorIdToStorageStatus("big").blocks.contains(RDDBlockId(1, 1)))
-    assert(listener.executorIdToStorageStatus("big").blocks.contains(RDDBlockId(1, 2)))
-    assert(listener.executorIdToStorageStatus("fat").blocks.contains(RDDBlockId(4, 0)))
-    listener.onTaskEnd(SparkListenerTaskEnd(1, "obliteration", Success, taskInfo2, taskMetrics2))
-    assert(listener.executorIdToStorageStatus("big").blocks.size === 1)
-    assert(listener.executorIdToStorageStatus("fat").blocks.size === 0)
-    assert(!listener.executorIdToStorageStatus("big").blocks.contains(RDDBlockId(1, 1)))
-    assert(listener.executorIdToStorageStatus("big").blocks.contains(RDDBlockId(1, 2)))
-    assert(listener.executorIdToStorageStatus("fat").blocks.isEmpty)
+
+    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo1, taskMetrics1))
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 1)
+    assert(listener.executorIdToStorageStatus("fat").numBlocks === 1)
+    assert(!listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 1)))
+    assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 2)))
+    assert(listener.executorIdToStorageStatus("fat").containsBlock(RDDBlockId(4, 0)))
+    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo2, taskMetrics2))
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 1)
+    assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
+    assert(!listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 1)))
+    assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 2)))
+    assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
   }
 
   test("unpersist RDD") {
     val listener = new StorageStatusListener
-    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(bm1, 1000L))
+    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
     val taskMetrics1 = new TaskMetrics
     val taskMetrics2 = new TaskMetrics
     val block1 = (RDDBlockId(1, 1), BlockStatus(StorageLevel.DISK_ONLY, 0L, 100L, 0L))
@@ -135,18 +136,18 @@ class StorageStatusListenerSuite extends FunSuite {
     val block3 = (RDDBlockId(4, 0), BlockStatus(StorageLevel.DISK_ONLY, 0L, 300L, 0L))
     taskMetrics1.updatedBlocks = Some(Seq(block1, block2))
     taskMetrics2.updatedBlocks = Some(Seq(block3))
-    listener.onTaskEnd(SparkListenerTaskEnd(1, "obliteration", Success, taskInfo1, taskMetrics1))
-    listener.onTaskEnd(SparkListenerTaskEnd(1, "obliteration", Success, taskInfo1, taskMetrics2))
-    assert(listener.executorIdToStorageStatus("big").blocks.size === 3)
+    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo1, taskMetrics1))
+    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo1, taskMetrics2))
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 3)
 
     // Unpersist RDD
     listener.onUnpersistRDD(SparkListenerUnpersistRDD(9090))
-    assert(listener.executorIdToStorageStatus("big").blocks.size === 3)
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 3)
     listener.onUnpersistRDD(SparkListenerUnpersistRDD(4))
-    assert(listener.executorIdToStorageStatus("big").blocks.size === 2)
-    assert(listener.executorIdToStorageStatus("big").blocks.contains(RDDBlockId(1, 1)))
-    assert(listener.executorIdToStorageStatus("big").blocks.contains(RDDBlockId(1, 2)))
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 2)
+    assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 1)))
+    assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 2)))
     listener.onUnpersistRDD(SparkListenerUnpersistRDD(1))
-    assert(listener.executorIdToStorageStatus("big").blocks.isEmpty)
+    assert(listener.executorIdToStorageStatus("big").numBlocks === 0)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/storage/StorageSuite.scala b/core/src/test/scala/org/apache/spark/storage/StorageSuite.scala
new file mode 100644
index 0000000000000..ef5c55f91c39a
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/storage/StorageSuite.scala
@@ -0,0 +1,354 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+import org.scalatest.FunSuite
+
+/**
+ * Test various functionalities in StorageUtils and StorageStatus.
+ */
+class StorageSuite extends FunSuite {
+  private val memAndDisk = StorageLevel.MEMORY_AND_DISK
+
+  // For testing add, update, and remove (for non-RDD blocks)
+  private def storageStatus1: StorageStatus = {
+    val status = new StorageStatus(BlockManagerId("big", "dog", 1), 1000L)
+    assert(status.blocks.isEmpty)
+    assert(status.rddBlocks.isEmpty)
+    assert(status.memUsed === 0L)
+    assert(status.memRemaining === 1000L)
+    assert(status.diskUsed === 0L)
+    assert(status.offHeapUsed === 0L)
+    status.addBlock(TestBlockId("foo"), BlockStatus(memAndDisk, 10L, 20L, 1L))
+    status.addBlock(TestBlockId("fee"), BlockStatus(memAndDisk, 10L, 20L, 1L))
+    status.addBlock(TestBlockId("faa"), BlockStatus(memAndDisk, 10L, 20L, 1L))
+    status
+  }
+
+  test("storage status add non-RDD blocks") {
+    val status = storageStatus1
+    assert(status.blocks.size === 3)
+    assert(status.blocks.contains(TestBlockId("foo")))
+    assert(status.blocks.contains(TestBlockId("fee")))
+    assert(status.blocks.contains(TestBlockId("faa")))
+    assert(status.rddBlocks.isEmpty)
+    assert(status.memUsed === 30L)
+    assert(status.memRemaining === 970L)
+    assert(status.diskUsed === 60L)
+    assert(status.offHeapUsed === 3L)
+  }
+
+  test("storage status update non-RDD blocks") {
+    val status = storageStatus1
+    status.updateBlock(TestBlockId("foo"), BlockStatus(memAndDisk, 50L, 100L, 1L))
+    status.updateBlock(TestBlockId("fee"), BlockStatus(memAndDisk, 100L, 20L, 0L))
+    assert(status.blocks.size === 3)
+    assert(status.memUsed === 160L)
+    assert(status.memRemaining === 840L)
+    assert(status.diskUsed === 140L)
+    assert(status.offHeapUsed === 2L)
+  }
+
+  test("storage status remove non-RDD blocks") {
+    val status = storageStatus1
+    status.removeBlock(TestBlockId("foo"))
+    status.removeBlock(TestBlockId("faa"))
+    assert(status.blocks.size === 1)
+    assert(status.blocks.contains(TestBlockId("fee")))
+    assert(status.memUsed === 10L)
+    assert(status.memRemaining === 990L)
+    assert(status.diskUsed === 20L)
+    assert(status.offHeapUsed === 1L)
+  }
+
+  // For testing add, update, remove, get, and contains etc. for both RDD and non-RDD blocks
+  private def storageStatus2: StorageStatus = {
+    val status = new StorageStatus(BlockManagerId("big", "dog", 1), 1000L)
+    assert(status.rddBlocks.isEmpty)
+    status.addBlock(TestBlockId("dan"), BlockStatus(memAndDisk, 10L, 20L, 0L))
+    status.addBlock(TestBlockId("man"), BlockStatus(memAndDisk, 10L, 20L, 0L))
+    status.addBlock(RDDBlockId(0, 0), BlockStatus(memAndDisk, 10L, 20L, 1L))
+    status.addBlock(RDDBlockId(1, 1), BlockStatus(memAndDisk, 100L, 200L, 1L))
+    status.addBlock(RDDBlockId(2, 2), BlockStatus(memAndDisk, 10L, 20L, 1L))
+    status.addBlock(RDDBlockId(2, 3), BlockStatus(memAndDisk, 10L, 20L, 0L))
+    status.addBlock(RDDBlockId(2, 4), BlockStatus(memAndDisk, 10L, 40L, 0L))
+    status
+  }
+
+  test("storage status add RDD blocks") {
+    val status = storageStatus2
+    assert(status.blocks.size === 7)
+    assert(status.rddBlocks.size === 5)
+    assert(status.rddBlocks.contains(RDDBlockId(0, 0)))
+    assert(status.rddBlocks.contains(RDDBlockId(1, 1)))
+    assert(status.rddBlocks.contains(RDDBlockId(2, 2)))
+    assert(status.rddBlocks.contains(RDDBlockId(2, 3)))
+    assert(status.rddBlocks.contains(RDDBlockId(2, 4)))
+    assert(status.rddBlocksById(0).size === 1)
+    assert(status.rddBlocksById(0).contains(RDDBlockId(0, 0)))
+    assert(status.rddBlocksById(1).size === 1)
+    assert(status.rddBlocksById(1).contains(RDDBlockId(1, 1)))
+    assert(status.rddBlocksById(2).size === 3)
+    assert(status.rddBlocksById(2).contains(RDDBlockId(2, 2)))
+    assert(status.rddBlocksById(2).contains(RDDBlockId(2, 3)))
+    assert(status.rddBlocksById(2).contains(RDDBlockId(2, 4)))
+    assert(status.memUsedByRdd(0) === 10L)
+    assert(status.memUsedByRdd(1) === 100L)
+    assert(status.memUsedByRdd(2) === 30L)
+    assert(status.diskUsedByRdd(0) === 20L)
+    assert(status.diskUsedByRdd(1) === 200L)
+    assert(status.diskUsedByRdd(2) === 80L)
+    assert(status.offHeapUsedByRdd(0) === 1L)
+    assert(status.offHeapUsedByRdd(1) === 1L)
+    assert(status.offHeapUsedByRdd(2) === 1L)
+    assert(status.rddStorageLevel(0) === Some(memAndDisk))
+    assert(status.rddStorageLevel(1) === Some(memAndDisk))
+    assert(status.rddStorageLevel(2) === Some(memAndDisk))
+
+    // Verify default values for RDDs that don't exist
+    assert(status.rddBlocksById(10).isEmpty)
+    assert(status.memUsedByRdd(10) === 0L)
+    assert(status.diskUsedByRdd(10) === 0L)
+    assert(status.offHeapUsedByRdd(10) === 0L)
+    assert(status.rddStorageLevel(10) === None)
+  }
+
+  test("storage status update RDD blocks") {
+    val status = storageStatus2
+    status.updateBlock(TestBlockId("dan"), BlockStatus(memAndDisk, 5000L, 0L, 0L))
+    status.updateBlock(RDDBlockId(0, 0), BlockStatus(memAndDisk, 0L, 0L, 0L))
+    status.updateBlock(RDDBlockId(2, 2), BlockStatus(memAndDisk, 0L, 1000L, 0L))
+    assert(status.blocks.size === 7)
+    assert(status.rddBlocks.size === 5)
+    assert(status.rddBlocksById(0).size === 1)
+    assert(status.rddBlocksById(1).size === 1)
+    assert(status.rddBlocksById(2).size === 3)
+    assert(status.memUsedByRdd(0) === 0L)
+    assert(status.memUsedByRdd(1) === 100L)
+    assert(status.memUsedByRdd(2) === 20L)
+    assert(status.diskUsedByRdd(0) === 0L)
+    assert(status.diskUsedByRdd(1) === 200L)
+    assert(status.diskUsedByRdd(2) === 1060L)
+    assert(status.offHeapUsedByRdd(0) === 0L)
+    assert(status.offHeapUsedByRdd(1) === 1L)
+    assert(status.offHeapUsedByRdd(2) === 0L)
+  }
+
+  test("storage status remove RDD blocks") {
+    val status = storageStatus2
+    status.removeBlock(TestBlockId("man"))
+    status.removeBlock(RDDBlockId(1, 1))
+    status.removeBlock(RDDBlockId(2, 2))
+    status.removeBlock(RDDBlockId(2, 4))
+    assert(status.blocks.size === 3)
+    assert(status.rddBlocks.size === 2)
+    assert(status.rddBlocks.contains(RDDBlockId(0, 0)))
+    assert(status.rddBlocks.contains(RDDBlockId(2, 3)))
+    assert(status.rddBlocksById(0).size === 1)
+    assert(status.rddBlocksById(0).contains(RDDBlockId(0, 0)))
+    assert(status.rddBlocksById(1).size === 0)
+    assert(status.rddBlocksById(2).size === 1)
+    assert(status.rddBlocksById(2).contains(RDDBlockId(2, 3)))
+    assert(status.memUsedByRdd(0) === 10L)
+    assert(status.memUsedByRdd(1) === 0L)
+    assert(status.memUsedByRdd(2) === 10L)
+    assert(status.diskUsedByRdd(0) === 20L)
+    assert(status.diskUsedByRdd(1) === 0L)
+    assert(status.diskUsedByRdd(2) === 20L)
+    assert(status.offHeapUsedByRdd(0) === 1L)
+    assert(status.offHeapUsedByRdd(1) === 0L)
+    assert(status.offHeapUsedByRdd(2) === 0L)
+  }
+
+  test("storage status containsBlock") {
+    val status = storageStatus2
+    // blocks that actually exist
+    assert(status.blocks.contains(TestBlockId("dan")) === status.containsBlock(TestBlockId("dan")))
+    assert(status.blocks.contains(TestBlockId("man")) === status.containsBlock(TestBlockId("man")))
+    assert(status.blocks.contains(RDDBlockId(0, 0)) === status.containsBlock(RDDBlockId(0, 0)))
+    assert(status.blocks.contains(RDDBlockId(1, 1)) === status.containsBlock(RDDBlockId(1, 1)))
+    assert(status.blocks.contains(RDDBlockId(2, 2)) === status.containsBlock(RDDBlockId(2, 2)))
+    assert(status.blocks.contains(RDDBlockId(2, 3)) === status.containsBlock(RDDBlockId(2, 3)))
+    assert(status.blocks.contains(RDDBlockId(2, 4)) === status.containsBlock(RDDBlockId(2, 4)))
+    // blocks that don't exist
+    assert(status.blocks.contains(TestBlockId("fan")) === status.containsBlock(TestBlockId("fan")))
+    assert(status.blocks.contains(RDDBlockId(100, 0)) === status.containsBlock(RDDBlockId(100, 0)))
+  }
+
+  test("storage status getBlock") {
+    val status = storageStatus2
+    // blocks that actually exist
+    assert(status.blocks.get(TestBlockId("dan")) === status.getBlock(TestBlockId("dan")))
+    assert(status.blocks.get(TestBlockId("man")) === status.getBlock(TestBlockId("man")))
+    assert(status.blocks.get(RDDBlockId(0, 0)) === status.getBlock(RDDBlockId(0, 0)))
+    assert(status.blocks.get(RDDBlockId(1, 1)) === status.getBlock(RDDBlockId(1, 1)))
+    assert(status.blocks.get(RDDBlockId(2, 2)) === status.getBlock(RDDBlockId(2, 2)))
+    assert(status.blocks.get(RDDBlockId(2, 3)) === status.getBlock(RDDBlockId(2, 3)))
+    assert(status.blocks.get(RDDBlockId(2, 4)) === status.getBlock(RDDBlockId(2, 4)))
+    // blocks that don't exist
+    assert(status.blocks.get(TestBlockId("fan")) === status.getBlock(TestBlockId("fan")))
+    assert(status.blocks.get(RDDBlockId(100, 0)) === status.getBlock(RDDBlockId(100, 0)))
+  }
+
+  test("storage status num[Rdd]Blocks") {
+    val status = storageStatus2
+    assert(status.blocks.size === status.numBlocks)
+    assert(status.rddBlocks.size === status.numRddBlocks)
+    status.addBlock(TestBlockId("Foo"), BlockStatus(memAndDisk, 0L, 0L, 100L))
+    status.addBlock(RDDBlockId(4, 4), BlockStatus(memAndDisk, 0L, 0L, 100L))
+    status.addBlock(RDDBlockId(4, 8), BlockStatus(memAndDisk, 0L, 0L, 100L))
+    assert(status.blocks.size === status.numBlocks)
+    assert(status.rddBlocks.size === status.numRddBlocks)
+    assert(status.rddBlocksById(4).size === status.numRddBlocksById(4))
+    assert(status.rddBlocksById(10).size === status.numRddBlocksById(10))
+    status.updateBlock(TestBlockId("Foo"), BlockStatus(memAndDisk, 0L, 10L, 400L))
+    status.updateBlock(RDDBlockId(4, 0), BlockStatus(memAndDisk, 0L, 0L, 100L))
+    status.updateBlock(RDDBlockId(4, 8), BlockStatus(memAndDisk, 0L, 0L, 100L))
+    status.updateBlock(RDDBlockId(10, 10), BlockStatus(memAndDisk, 0L, 0L, 100L))
+    assert(status.blocks.size === status.numBlocks)
+    assert(status.rddBlocks.size === status.numRddBlocks)
+    assert(status.rddBlocksById(4).size === status.numRddBlocksById(4))
+    assert(status.rddBlocksById(10).size === status.numRddBlocksById(10))
+    assert(status.rddBlocksById(100).size === status.numRddBlocksById(100))
+    status.removeBlock(RDDBlockId(4, 0))
+    status.removeBlock(RDDBlockId(10, 10))
+    assert(status.blocks.size === status.numBlocks)
+    assert(status.rddBlocks.size === status.numRddBlocks)
+    assert(status.rddBlocksById(4).size === status.numRddBlocksById(4))
+    assert(status.rddBlocksById(10).size === status.numRddBlocksById(10))
+    // remove a block that doesn't exist
+    status.removeBlock(RDDBlockId(1000, 999))
+    assert(status.blocks.size === status.numBlocks)
+    assert(status.rddBlocks.size === status.numRddBlocks)
+    assert(status.rddBlocksById(4).size === status.numRddBlocksById(4))
+    assert(status.rddBlocksById(10).size === status.numRddBlocksById(10))
+    assert(status.rddBlocksById(1000).size === status.numRddBlocksById(1000))
+  }
+
+  test("storage status memUsed, diskUsed, tachyonUsed") {
+    val status = storageStatus2
+    def actualMemUsed: Long = status.blocks.values.map(_.memSize).sum
+    def actualDiskUsed: Long = status.blocks.values.map(_.diskSize).sum
+    def actualOffHeapUsed: Long = status.blocks.values.map(_.tachyonSize).sum
+    assert(status.memUsed === actualMemUsed)
+    assert(status.diskUsed === actualDiskUsed)
+    assert(status.offHeapUsed === actualOffHeapUsed)
+    status.addBlock(TestBlockId("fire"), BlockStatus(memAndDisk, 4000L, 5000L, 6000L))
+    status.addBlock(TestBlockId("wire"), BlockStatus(memAndDisk, 400L, 500L, 600L))
+    status.addBlock(RDDBlockId(25, 25), BlockStatus(memAndDisk, 40L, 50L, 60L))
+    assert(status.memUsed === actualMemUsed)
+    assert(status.diskUsed === actualDiskUsed)
+    assert(status.offHeapUsed === actualOffHeapUsed)
+    status.updateBlock(TestBlockId("dan"), BlockStatus(memAndDisk, 4L, 5L, 6L))
+    status.updateBlock(RDDBlockId(0, 0), BlockStatus(memAndDisk, 4L, 5L, 6L))
+    status.updateBlock(RDDBlockId(1, 1), BlockStatus(memAndDisk, 4L, 5L, 6L))
+    assert(status.memUsed === actualMemUsed)
+    assert(status.diskUsed === actualDiskUsed)
+    assert(status.offHeapUsed === actualOffHeapUsed)
+    status.removeBlock(TestBlockId("fire"))
+    status.removeBlock(TestBlockId("man"))
+    status.removeBlock(RDDBlockId(2, 2))
+    status.removeBlock(RDDBlockId(2, 3))
+    assert(status.memUsed === actualMemUsed)
+    assert(status.diskUsed === actualDiskUsed)
+    assert(status.offHeapUsed === actualOffHeapUsed)
+  }
+
+  // For testing StorageUtils.updateRddInfo and StorageUtils.getRddBlockLocations
+  private def stockStorageStatuses: Seq[StorageStatus] = {
+    val status1 = new StorageStatus(BlockManagerId("big", "dog", 1), 1000L)
+    val status2 = new StorageStatus(BlockManagerId("fat", "duck", 2), 2000L)
+    val status3 = new StorageStatus(BlockManagerId("fat", "cat", 3), 3000L)
+    status1.addBlock(RDDBlockId(0, 0), BlockStatus(memAndDisk, 1L, 2L, 0L))
+    status1.addBlock(RDDBlockId(0, 1), BlockStatus(memAndDisk, 1L, 2L, 0L))
+    status2.addBlock(RDDBlockId(0, 2), BlockStatus(memAndDisk, 1L, 2L, 0L))
+    status2.addBlock(RDDBlockId(0, 3), BlockStatus(memAndDisk, 1L, 2L, 0L))
+    status2.addBlock(RDDBlockId(1, 0), BlockStatus(memAndDisk, 1L, 2L, 0L))
+    status2.addBlock(RDDBlockId(1, 1), BlockStatus(memAndDisk, 1L, 2L, 0L))
+    status3.addBlock(RDDBlockId(0, 4), BlockStatus(memAndDisk, 1L, 2L, 0L))
+    status3.addBlock(RDDBlockId(1, 2), BlockStatus(memAndDisk, 1L, 2L, 0L))
+    Seq(status1, status2, status3)
+  }
+
+  // For testing StorageUtils.updateRddInfo
+  private def stockRDDInfos: Seq[RDDInfo] = {
+    val info0 = new RDDInfo(0, "0", 10, memAndDisk)
+    val info1 = new RDDInfo(1, "1", 3, memAndDisk)
+    Seq(info0, info1)
+  }
+
+  test("StorageUtils.updateRddInfo") {
+    val storageStatuses = stockStorageStatuses
+    val rddInfos = stockRDDInfos
+    StorageUtils.updateRddInfo(rddInfos, storageStatuses)
+    assert(rddInfos(0).storageLevel === memAndDisk)
+    assert(rddInfos(0).numCachedPartitions === 5)
+    assert(rddInfos(0).memSize === 5L)
+    assert(rddInfos(0).diskSize === 10L)
+    assert(rddInfos(0).tachyonSize === 0L)
+    assert(rddInfos(1).storageLevel === memAndDisk)
+    assert(rddInfos(1).numCachedPartitions === 3)
+    assert(rddInfos(1).memSize === 3L)
+    assert(rddInfos(1).diskSize === 6L)
+    assert(rddInfos(1).tachyonSize === 0L)
+  }
+
+  test("StorageUtils.getRddBlockLocations") {
+    val storageStatuses = stockStorageStatuses
+    val blockLocations0 = StorageUtils.getRddBlockLocations(0, storageStatuses)
+    val blockLocations1 = StorageUtils.getRddBlockLocations(1, storageStatuses)
+    assert(blockLocations0.size === 5)
+    assert(blockLocations1.size === 3)
+    assert(blockLocations0.contains(RDDBlockId(0, 0)))
+    assert(blockLocations0.contains(RDDBlockId(0, 1)))
+    assert(blockLocations0.contains(RDDBlockId(0, 2)))
+    assert(blockLocations0.contains(RDDBlockId(0, 3)))
+    assert(blockLocations0.contains(RDDBlockId(0, 4)))
+    assert(blockLocations1.contains(RDDBlockId(1, 0)))
+    assert(blockLocations1.contains(RDDBlockId(1, 1)))
+    assert(blockLocations1.contains(RDDBlockId(1, 2)))
+    assert(blockLocations0(RDDBlockId(0, 0)) === Seq("dog:1"))
+    assert(blockLocations0(RDDBlockId(0, 1)) === Seq("dog:1"))
+    assert(blockLocations0(RDDBlockId(0, 2)) === Seq("duck:2"))
+    assert(blockLocations0(RDDBlockId(0, 3)) === Seq("duck:2"))
+    assert(blockLocations0(RDDBlockId(0, 4)) === Seq("cat:3"))
+    assert(blockLocations1(RDDBlockId(1, 0)) === Seq("duck:2"))
+    assert(blockLocations1(RDDBlockId(1, 1)) === Seq("duck:2"))
+    assert(blockLocations1(RDDBlockId(1, 2)) === Seq("cat:3"))
+  }
+
+  test("StorageUtils.getRddBlockLocations with multiple locations") {
+    val storageStatuses = stockStorageStatuses
+    storageStatuses(0).addBlock(RDDBlockId(1, 0), BlockStatus(memAndDisk, 1L, 2L, 0L))
+    storageStatuses(0).addBlock(RDDBlockId(0, 4), BlockStatus(memAndDisk, 1L, 2L, 0L))
+    storageStatuses(2).addBlock(RDDBlockId(0, 0), BlockStatus(memAndDisk, 1L, 2L, 0L))
+    val blockLocations0 = StorageUtils.getRddBlockLocations(0, storageStatuses)
+    val blockLocations1 = StorageUtils.getRddBlockLocations(1, storageStatuses)
+    assert(blockLocations0.size === 5)
+    assert(blockLocations1.size === 3)
+    assert(blockLocations0(RDDBlockId(0, 0)) === Seq("dog:1", "cat:3"))
+    assert(blockLocations0(RDDBlockId(0, 1)) === Seq("dog:1"))
+    assert(blockLocations0(RDDBlockId(0, 2)) === Seq("duck:2"))
+    assert(blockLocations0(RDDBlockId(0, 3)) === Seq("duck:2"))
+    assert(blockLocations0(RDDBlockId(0, 4)) === Seq("dog:1", "cat:3"))
+    assert(blockLocations1(RDDBlockId(1, 0)) === Seq("dog:1", "duck:2"))
+    assert(blockLocations1(RDDBlockId(1, 1)) === Seq("duck:2"))
+    assert(blockLocations1(RDDBlockId(1, 2)) === Seq("cat:3"))
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
new file mode 100644
index 0000000000000..bacf6a16fc233
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui
+
+import org.apache.spark.api.java.StorageLevels
+import org.apache.spark.{SparkException, SparkConf, SparkContext}
+import org.openqa.selenium.WebDriver
+import org.openqa.selenium.htmlunit.HtmlUnitDriver
+import org.scalatest._
+import org.scalatest.concurrent.Eventually._
+import org.scalatest.selenium.WebBrowser
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.LocalSparkContext._
+
+/**
+ * Selenium tests for the Spark Web UI.  These tests are not run by default
+ * because they're slow.
+ */
+@DoNotDiscover
+class UISeleniumSuite extends FunSuite with WebBrowser with Matchers {
+  implicit val webDriver: WebDriver = new HtmlUnitDriver
+
+  /**
+   * Create a test SparkContext with the SparkUI enabled.
+   * It is safe to `get` the SparkUI directly from the SparkContext returned here.
+   */
+  private def newSparkContext(): SparkContext = {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test")
+      .set("spark.ui.enabled", "true")
+    val sc = new SparkContext(conf)
+    assert(sc.ui.isDefined)
+    sc
+  }
+
+  test("effects of unpersist() / persist() should be reflected") {
+    // Regression test for SPARK-2527
+    withSpark(newSparkContext()) { sc =>
+      val ui = sc.ui.get
+      val rdd = sc.parallelize(Seq(1, 2, 3))
+      rdd.persist(StorageLevels.DISK_ONLY).count()
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (ui.appUIAddress.stripSuffix("/") + "/storage")
+        val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq
+        tableRowText should contain (StorageLevels.DISK_ONLY.description)
+      }
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (ui.appUIAddress.stripSuffix("/") + "/storage/rdd/?id=0")
+        val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq
+        tableRowText should contain (StorageLevels.DISK_ONLY.description)
+      }
+
+      rdd.unpersist()
+      rdd.persist(StorageLevels.MEMORY_ONLY).count()
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (ui.appUIAddress.stripSuffix("/") + "/storage")
+        val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq
+        tableRowText should contain (StorageLevels.MEMORY_ONLY.description)
+      }
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (ui.appUIAddress.stripSuffix("/") + "/storage/rdd/?id=0")
+        val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq
+        tableRowText should contain (StorageLevels.MEMORY_ONLY.description)
+      }
+    }
+  }
+
+  test("failed stages should not appear to be active") {
+    withSpark(newSparkContext()) { sc =>
+      // Regression test for SPARK-3021
+      intercept[SparkException] {
+        sc.parallelize(1 to 10).map { x => throw new Exception()}.collect()
+      }
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to sc.ui.get.appUIAddress
+        find(id("active")).get.text should be("Active Stages (0)")
+        find(id("failed")).get.text should be("Failed Stages (1)")
+      }
+
+      // Regression test for SPARK-2105
+      class NotSerializable
+      val unserializableObject = new NotSerializable
+      intercept[SparkException] {
+        sc.parallelize(1 to 10).map { x => unserializableObject}.collect()
+      }
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to sc.ui.get.appUIAddress
+        find(id("active")).get.text should be("Active Stages (0)")
+        // The failure occurs before the stage becomes active, hence we should still show only one
+        // failed stage, not two:
+        find(id("failed")).get.text should be("Failed Stages (1)")
+      }
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 038746d2eda4b..92a21f82f3c21 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -21,10 +21,8 @@ import java.net.ServerSocket
 import javax.servlet.http.HttpServletRequest
 
 import scala.io.Source
-import scala.language.postfixOps
 import scala.util.{Failure, Success, Try}
 
-import org.eclipse.jetty.server.Server
 import org.eclipse.jetty.servlet.ServletContextHandler
 import org.scalatest.FunSuite
 import org.scalatest.concurrent.Eventually._
@@ -36,11 +34,25 @@ import scala.xml.Node
 
 class UISuite extends FunSuite {
 
+  /**
+   * Create a test SparkContext with the SparkUI enabled.
+   * It is safe to `get` the SparkUI directly from the SparkContext returned here.
+   */
+  private def newSparkContext(): SparkContext = {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test")
+      .set("spark.ui.enabled", "true")
+    val sc = new SparkContext(conf)
+    assert(sc.ui.isDefined)
+    sc
+  }
+
   ignore("basic ui visibility") {
-    withSpark(new SparkContext("local", "test")) { sc =>
+    withSpark(newSparkContext()) { sc =>
       // test if the ui is visible, and all the expected tabs are visible
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        val html = Source.fromURL(sc.ui.appUIAddress).mkString
+        val html = Source.fromURL(sc.ui.get.appUIAddress).mkString
         assert(!html.contains("random data that should not be present"))
         assert(html.toLowerCase.contains("stages"))
         assert(html.toLowerCase.contains("storage"))
@@ -51,7 +63,7 @@ class UISuite extends FunSuite {
   }
 
   ignore("visibility at localhost:4040") {
-    withSpark(new SparkContext("local", "test")) { sc =>
+    withSpark(newSparkContext()) { sc =>
       // test if visible from http://localhost:4040
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
         val html = Source.fromURL("http://localhost:4040").mkString
@@ -61,8 +73,8 @@ class UISuite extends FunSuite {
   }
 
   ignore("attaching a new tab") {
-    withSpark(new SparkContext("local", "test")) { sc =>
-      val sparkUI = sc.ui
+    withSpark(newSparkContext()) { sc =>
+      val sparkUI = sc.ui.get
 
       val newTab = new WebUITab(sparkUI, "foo") {
         attachPage(new WebUIPage("") {
@@ -73,7 +85,7 @@ class UISuite extends FunSuite {
       }
       sparkUI.attachTab(newTab)
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        val html = Source.fromURL(sc.ui.appUIAddress).mkString
+        val html = Source.fromURL(sparkUI.appUIAddress).mkString
         assert(!html.contains("random data that should not be present"))
 
         // check whether new page exists
@@ -87,7 +99,7 @@ class UISuite extends FunSuite {
       }
 
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        val html = Source.fromURL(sc.ui.appUIAddress.stripSuffix("/") + "/foo").mkString
+        val html = Source.fromURL(sparkUI.appUIAddress.stripSuffix("/") + "/foo").mkString
         // check whether new page exists
         assert(html.contains("magic"))
       }
@@ -95,14 +107,8 @@ class UISuite extends FunSuite {
   }
 
   test("jetty selects different port under contention") {
-    val startPort = 4040
-    val server = new Server(startPort)
-
-    Try { server.start() } match {
-      case Success(s) =>
-      case Failure(e) =>
-      // Either case server port is busy hence setup for test complete
-    }
+    val server = new ServerSocket(0)
+    val startPort = server.getLocalPort
     val serverInfo1 = JettyUtils.startJettyServer(
       "0.0.0.0", startPort, Seq[ServletContextHandler](), new SparkConf)
     val serverInfo2 = JettyUtils.startJettyServer(
@@ -113,6 +119,9 @@ class UISuite extends FunSuite {
     assert(boundPort1 != startPort)
     assert(boundPort2 != startPort)
     assert(boundPort1 != boundPort2)
+    serverInfo1.server.stop()
+    serverInfo2.server.stop()
+    server.close()
   }
 
   test("jetty binds to port 0 correctly") {
@@ -129,16 +138,20 @@ class UISuite extends FunSuite {
   }
 
   test("verify appUIAddress contains the scheme") {
-    withSpark(new SparkContext("local", "test")) { sc =>
-      val uiAddress = sc.ui.appUIAddress
-      assert(uiAddress.equals("http://" + sc.ui.appUIHostPort))
+    withSpark(newSparkContext()) { sc =>
+      val ui = sc.ui.get
+      val uiAddress = ui.appUIAddress
+      val uiHostPort = ui.appUIHostPort
+      assert(uiAddress.equals("http://" + uiHostPort))
     }
   }
 
   test("verify appUIAddress contains the port") {
-    withSpark(new SparkContext("local", "test")) { sc =>
-      val splitUIAddress = sc.ui.appUIAddress.split(':')
-      assert(splitUIAddress(2).toInt == sc.ui.boundPort)
+    withSpark(newSparkContext()) { sc =>
+      val ui = sc.ui.get
+      val splitUIAddress = ui.appUIAddress.split(':')
+      val boundPort = ui.boundPort
+      assert(splitUIAddress(2).toInt == boundPort)
     }
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
index fa43b66c6cb5a..7c102cc7f4049 100644
--- a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
@@ -21,23 +21,25 @@ import org.scalatest.FunSuite
 import org.scalatest.Matchers
 
 import org.apache.spark._
-import org.apache.spark.executor.{ShuffleReadMetrics, TaskMetrics}
+import org.apache.spark.{LocalSparkContext, SparkConf, Success}
+import org.apache.spark.executor._
 import org.apache.spark.scheduler._
 import org.apache.spark.util.Utils
 
 class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matchers {
+
   test("test LRU eviction of stages") {
     val conf = new SparkConf()
     conf.set("spark.ui.retainedStages", 5.toString)
     val listener = new JobProgressListener(conf)
 
     def createStageStartEvent(stageId: Int) = {
-      val stageInfo = new StageInfo(stageId, stageId.toString, 0, null, "")
+      val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, "")
       SparkListenerStageSubmitted(stageInfo)
     }
 
     def createStageEndEvent(stageId: Int) = {
-      val stageInfo = new StageInfo(stageId, stageId.toString, 0, null, "")
+      val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, "")
       SparkListenerStageCompleted(stageInfo)
     }
 
@@ -47,11 +49,11 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc
     }
 
     listener.completedStages.size should be (5)
-    listener.completedStages.filter(_.stageId == 50).size should be (1)
-    listener.completedStages.filter(_.stageId == 49).size should be (1)
-    listener.completedStages.filter(_.stageId == 48).size should be (1)
-    listener.completedStages.filter(_.stageId == 47).size should be (1)
-    listener.completedStages.filter(_.stageId == 46).size should be (1)
+    listener.completedStages.count(_.stageId == 50) should be (1)
+    listener.completedStages.count(_.stageId == 49) should be (1)
+    listener.completedStages.count(_.stageId == 48) should be (1)
+    listener.completedStages.count(_.stageId == 47) should be (1)
+    listener.completedStages.count(_.stageId == 46) should be (1)
   }
 
   test("test executor id to summary") {
@@ -59,48 +61,46 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc
     val listener = new JobProgressListener(conf)
     val taskMetrics = new TaskMetrics()
     val shuffleReadMetrics = new ShuffleReadMetrics()
-
-    // nothing in it
-    assert(listener.stageIdToExecutorSummaries.size == 0)
+    assert(listener.stageIdToData.size === 0)
 
     // finish this task, should get updated shuffleRead
     shuffleReadMetrics.remoteBytesRead = 1000
-    taskMetrics.shuffleReadMetrics = Some(shuffleReadMetrics)
+    taskMetrics.setShuffleReadMetrics(Some(shuffleReadMetrics))
     var taskInfo = new TaskInfo(1234L, 0, 1, 0L, "exe-1", "host1", TaskLocality.NODE_LOCAL, false)
     taskInfo.finishTime = 1
-    var task = new ShuffleMapTask(0, null, null, 0, null)
+    var task = new ShuffleMapTask(0)
     val taskType = Utils.getFormattedClassName(task)
-    listener.onTaskEnd(SparkListenerTaskEnd(task.stageId, taskType, Success, taskInfo, taskMetrics))
-    assert(listener.stageIdToExecutorSummaries.getOrElse(0, fail()).getOrElse("exe-1", fail())
-      .shuffleRead == 1000)
+    listener.onTaskEnd(
+      SparkListenerTaskEnd(task.stageId, 0, taskType, Success, taskInfo, taskMetrics))
+    assert(listener.stageIdToData.getOrElse((0, 0), fail())
+      .executorSummary.getOrElse("exe-1", fail()).shuffleRead === 1000)
 
     // finish a task with unknown executor-id, nothing should happen
     taskInfo =
       new TaskInfo(1234L, 0, 1, 1000L, "exe-unknown", "host1", TaskLocality.NODE_LOCAL, true)
     taskInfo.finishTime = 1
-    task = new ShuffleMapTask(0, null, null, 0, null)
-    listener.onTaskEnd(SparkListenerTaskEnd(task.stageId, taskType, Success, taskInfo, taskMetrics))
-    assert(listener.stageIdToExecutorSummaries.size == 1)
+    task = new ShuffleMapTask(0)
+    listener.onTaskEnd(
+      SparkListenerTaskEnd(task.stageId, 0, taskType, Success, taskInfo, taskMetrics))
+    assert(listener.stageIdToData.size === 1)
 
     // finish this task, should get updated duration
-    shuffleReadMetrics.remoteBytesRead = 1000
-    taskMetrics.shuffleReadMetrics = Some(shuffleReadMetrics)
     taskInfo = new TaskInfo(1235L, 0, 1, 0L, "exe-1", "host1", TaskLocality.NODE_LOCAL, false)
     taskInfo.finishTime = 1
-    task = new ShuffleMapTask(0, null, null, 0, null)
-    listener.onTaskEnd(SparkListenerTaskEnd(task.stageId, taskType, Success, taskInfo, taskMetrics))
-    assert(listener.stageIdToExecutorSummaries.getOrElse(0, fail()).getOrElse("exe-1", fail())
-      .shuffleRead == 2000)
+    task = new ShuffleMapTask(0)
+    listener.onTaskEnd(
+      SparkListenerTaskEnd(task.stageId, 0, taskType, Success, taskInfo, taskMetrics))
+    assert(listener.stageIdToData.getOrElse((0, 0), fail())
+      .executorSummary.getOrElse("exe-1", fail()).shuffleRead === 2000)
 
     // finish this task, should get updated duration
-    shuffleReadMetrics.remoteBytesRead = 1000
-    taskMetrics.shuffleReadMetrics = Some(shuffleReadMetrics)
     taskInfo = new TaskInfo(1236L, 0, 2, 0L, "exe-2", "host1", TaskLocality.NODE_LOCAL, false)
     taskInfo.finishTime = 1
-    task = new ShuffleMapTask(0, null, null, 0, null)
-    listener.onTaskEnd(SparkListenerTaskEnd(task.stageId, taskType, Success, taskInfo, taskMetrics))
-    assert(listener.stageIdToExecutorSummaries.getOrElse(0, fail()).getOrElse("exe-2", fail())
-      .shuffleRead == 1000)
+    task = new ShuffleMapTask(0)
+    listener.onTaskEnd(
+      SparkListenerTaskEnd(task.stageId, 0, taskType, Success, taskInfo, taskMetrics))
+    assert(listener.stageIdToData.getOrElse((0, 0), fail())
+      .executorSummary.getOrElse("exe-2", fail()).shuffleRead === 1000)
   }
 
   test("test task success vs failure counting for different task end reasons") {
@@ -109,25 +109,128 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc
     val metrics = new TaskMetrics()
     val taskInfo = new TaskInfo(1234L, 0, 3, 0L, "exe-1", "host1", TaskLocality.NODE_LOCAL, false)
     taskInfo.finishTime = 1
-    val task = new ShuffleMapTask(0, null, null, 0, null)
+    val task = new ShuffleMapTask(0)
     val taskType = Utils.getFormattedClassName(task)
 
     // Go through all the failure cases to make sure we are counting them as failures.
     val taskFailedReasons = Seq(
       Resubmitted,
-      new FetchFailed(null, 0, 0, 0),
-      new ExceptionFailure("Exception", "description", null, None),
+      new FetchFailed(null, 0, 0, 0, "ignored"),
+      ExceptionFailure("Exception", "description", null, null, None),
       TaskResultLost,
       TaskKilled,
-      ExecutorLostFailure,
+      ExecutorLostFailure("0"),
       UnknownReason)
+    var failCount = 0
     for (reason <- taskFailedReasons) {
-      listener.onTaskEnd(SparkListenerTaskEnd(task.stageId, taskType, reason, taskInfo, metrics))
-      assert(listener.stageIdToTasksComplete.get(task.stageId) === None)
+      listener.onTaskEnd(
+        SparkListenerTaskEnd(task.stageId, 0, taskType, reason, taskInfo, metrics))
+      failCount += 1
+      assert(listener.stageIdToData((task.stageId, 0)).numCompleteTasks === 0)
+      assert(listener.stageIdToData((task.stageId, 0)).numFailedTasks === failCount)
     }
 
     // Make sure we count success as success.
-    listener.onTaskEnd(SparkListenerTaskEnd(task.stageId, taskType, Success, taskInfo, metrics))
-    assert(listener.stageIdToTasksComplete.get(task.stageId) === Some(1))
+    listener.onTaskEnd(
+      SparkListenerTaskEnd(task.stageId, 1, taskType, Success, taskInfo, metrics))
+    assert(listener.stageIdToData((task.stageId, 1)).numCompleteTasks === 1)
+    assert(listener.stageIdToData((task.stageId, 0)).numFailedTasks === failCount)
+  }
+
+  test("test update metrics") {
+    val conf = new SparkConf()
+    val listener = new JobProgressListener(conf)
+
+    val taskType = Utils.getFormattedClassName(new ShuffleMapTask(0))
+    val execId = "exe-1"
+
+    def makeTaskMetrics(base: Int) = {
+      val taskMetrics = new TaskMetrics()
+      val shuffleReadMetrics = new ShuffleReadMetrics()
+      val shuffleWriteMetrics = new ShuffleWriteMetrics()
+      taskMetrics.setShuffleReadMetrics(Some(shuffleReadMetrics))
+      taskMetrics.shuffleWriteMetrics = Some(shuffleWriteMetrics)
+      shuffleReadMetrics.remoteBytesRead = base + 1
+      shuffleReadMetrics.remoteBlocksFetched = base + 2
+      shuffleWriteMetrics.shuffleBytesWritten = base + 3
+      taskMetrics.executorRunTime = base + 4
+      taskMetrics.diskBytesSpilled = base + 5
+      taskMetrics.memoryBytesSpilled = base + 6
+      val inputMetrics = new InputMetrics(DataReadMethod.Hadoop)
+      taskMetrics.inputMetrics = Some(inputMetrics)
+      inputMetrics.bytesRead = base + 7
+      val outputMetrics = new OutputMetrics(DataWriteMethod.Hadoop)
+      taskMetrics.outputMetrics = Some(outputMetrics)
+      outputMetrics.bytesWritten = base + 8
+      taskMetrics
+    }
+
+    def makeTaskInfo(taskId: Long, finishTime: Int = 0) = {
+      val taskInfo = new TaskInfo(taskId, 0, 1, 0L, execId, "host1", TaskLocality.NODE_LOCAL,
+        false)
+      taskInfo.finishTime = finishTime
+      taskInfo
+    }
+
+    listener.onTaskStart(SparkListenerTaskStart(0, 0, makeTaskInfo(1234L)))
+    listener.onTaskStart(SparkListenerTaskStart(0, 0, makeTaskInfo(1235L)))
+    listener.onTaskStart(SparkListenerTaskStart(1, 0, makeTaskInfo(1236L)))
+    listener.onTaskStart(SparkListenerTaskStart(1, 0, makeTaskInfo(1237L)))
+
+    listener.onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate(execId, Array(
+      (1234L, 0, 0, makeTaskMetrics(0)),
+      (1235L, 0, 0, makeTaskMetrics(100)),
+      (1236L, 1, 0, makeTaskMetrics(200)))))
+
+    var stage0Data = listener.stageIdToData.get((0, 0)).get
+    var stage1Data = listener.stageIdToData.get((1, 0)).get
+    assert(stage0Data.shuffleReadBytes == 102)
+    assert(stage1Data.shuffleReadBytes == 201)
+    assert(stage0Data.shuffleWriteBytes == 106)
+    assert(stage1Data.shuffleWriteBytes == 203)
+    assert(stage0Data.executorRunTime == 108)
+    assert(stage1Data.executorRunTime == 204)
+    assert(stage0Data.diskBytesSpilled == 110)
+    assert(stage1Data.diskBytesSpilled == 205)
+    assert(stage0Data.memoryBytesSpilled == 112)
+    assert(stage1Data.memoryBytesSpilled == 206)
+    assert(stage0Data.inputBytes == 114)
+    assert(stage1Data.inputBytes == 207)
+    assert(stage0Data.outputBytes == 116)
+    assert(stage1Data.outputBytes == 208)
+    assert(stage0Data.taskData.get(1234L).get.taskMetrics.get.shuffleReadMetrics.get
+      .totalBlocksFetched == 2)
+    assert(stage0Data.taskData.get(1235L).get.taskMetrics.get.shuffleReadMetrics.get
+      .totalBlocksFetched == 102)
+    assert(stage1Data.taskData.get(1236L).get.taskMetrics.get.shuffleReadMetrics.get
+      .totalBlocksFetched == 202)
+
+    // task that was included in a heartbeat
+    listener.onTaskEnd(SparkListenerTaskEnd(0, 0, taskType, Success, makeTaskInfo(1234L, 1),
+      makeTaskMetrics(300)))
+    // task that wasn't included in a heartbeat
+    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, taskType, Success, makeTaskInfo(1237L, 1),
+      makeTaskMetrics(400)))
+
+    stage0Data = listener.stageIdToData.get((0, 0)).get
+    stage1Data = listener.stageIdToData.get((1, 0)).get
+    assert(stage0Data.shuffleReadBytes == 402)
+    assert(stage1Data.shuffleReadBytes == 602)
+    assert(stage0Data.shuffleWriteBytes == 406)
+    assert(stage1Data.shuffleWriteBytes == 606)
+    assert(stage0Data.executorRunTime == 408)
+    assert(stage1Data.executorRunTime == 608)
+    assert(stage0Data.diskBytesSpilled == 410)
+    assert(stage1Data.diskBytesSpilled == 610)
+    assert(stage0Data.memoryBytesSpilled == 412)
+    assert(stage1Data.memoryBytesSpilled == 612)
+    assert(stage0Data.inputBytes == 414)
+    assert(stage1Data.inputBytes == 614)
+    assert(stage0Data.outputBytes == 416)
+    assert(stage1Data.outputBytes == 616)
+    assert(stage0Data.taskData.get(1234L).get.taskMetrics.get.shuffleReadMetrics.get
+      .totalBlocksFetched == 302)
+    assert(stage1Data.taskData.get(1237L).get.taskMetrics.get.shuffleReadMetrics.get
+      .totalBlocksFetched == 402)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala b/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala
new file mode 100644
index 0000000000000..e1bc1379b5d80
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.storage
+
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.apache.spark.Success
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler._
+import org.apache.spark.storage._
+
+/**
+ * Test various functionality in the StorageListener that supports the StorageTab.
+ */
+class StorageTabSuite extends FunSuite with BeforeAndAfter {
+  private var bus: LiveListenerBus = _
+  private var storageStatusListener: StorageStatusListener = _
+  private var storageListener: StorageListener = _
+  private val memAndDisk = StorageLevel.MEMORY_AND_DISK
+  private val memOnly = StorageLevel.MEMORY_ONLY
+  private val none = StorageLevel.NONE
+  private val taskInfo = new TaskInfo(0, 0, 0, 0, "big", "dog", TaskLocality.ANY, false)
+  private val taskInfo1 = new TaskInfo(1, 1, 1, 1, "big", "cat", TaskLocality.ANY, false)
+  private def rddInfo0 = new RDDInfo(0, "freedom", 100, memOnly)
+  private def rddInfo1 = new RDDInfo(1, "hostage", 200, memOnly)
+  private def rddInfo2 = new RDDInfo(2, "sanity", 300, memAndDisk)
+  private def rddInfo3 = new RDDInfo(3, "grace", 400, memAndDisk)
+  private val bm1 = BlockManagerId("big", "dog", 1)
+
+  before {
+    bus = new LiveListenerBus
+    storageStatusListener = new StorageStatusListener
+    storageListener = new StorageListener(storageStatusListener)
+    bus.addListener(storageStatusListener)
+    bus.addListener(storageListener)
+  }
+
+  test("stage submitted / completed") {
+    assert(storageListener._rddInfoMap.isEmpty)
+    assert(storageListener.rddInfoList.isEmpty)
+
+    // 2 RDDs are known, but none are cached
+    val stageInfo0 = new StageInfo(0, 0, "0", 100, Seq(rddInfo0, rddInfo1), "details")
+    bus.postToAll(SparkListenerStageSubmitted(stageInfo0))
+    assert(storageListener._rddInfoMap.size === 2)
+    assert(storageListener.rddInfoList.isEmpty)
+
+    // 4 RDDs are known, but only 2 are cached
+    val rddInfo2Cached = rddInfo2
+    val rddInfo3Cached = rddInfo3
+    rddInfo2Cached.numCachedPartitions = 1
+    rddInfo3Cached.numCachedPartitions = 1
+    val stageInfo1 = new StageInfo(1, 0, "0", 100, Seq(rddInfo2Cached, rddInfo3Cached), "details")
+    bus.postToAll(SparkListenerStageSubmitted(stageInfo1))
+    assert(storageListener._rddInfoMap.size === 4)
+    assert(storageListener.rddInfoList.size === 2)
+
+    // Submitting RDDInfos with duplicate IDs does nothing
+    val rddInfo0Cached = new RDDInfo(0, "freedom", 100, StorageLevel.MEMORY_ONLY)
+    rddInfo0Cached.numCachedPartitions = 1
+    val stageInfo0Cached = new StageInfo(0, 0, "0", 100, Seq(rddInfo0), "details")
+    bus.postToAll(SparkListenerStageSubmitted(stageInfo0Cached))
+    assert(storageListener._rddInfoMap.size === 4)
+    assert(storageListener.rddInfoList.size === 2)
+
+    // We only keep around the RDDs that are cached
+    bus.postToAll(SparkListenerStageCompleted(stageInfo0))
+    assert(storageListener._rddInfoMap.size === 2)
+    assert(storageListener.rddInfoList.size === 2)
+  }
+
+  test("unpersist") {
+    val rddInfo0Cached = rddInfo0
+    val rddInfo1Cached = rddInfo1
+    rddInfo0Cached.numCachedPartitions = 1
+    rddInfo1Cached.numCachedPartitions = 1
+    val stageInfo0 = new StageInfo(0, 0, "0", 100, Seq(rddInfo0Cached, rddInfo1Cached), "details")
+    bus.postToAll(SparkListenerStageSubmitted(stageInfo0))
+    assert(storageListener._rddInfoMap.size === 2)
+    assert(storageListener.rddInfoList.size === 2)
+    bus.postToAll(SparkListenerUnpersistRDD(0))
+    assert(storageListener._rddInfoMap.size === 1)
+    assert(storageListener.rddInfoList.size === 1)
+    bus.postToAll(SparkListenerUnpersistRDD(4)) // doesn't exist
+    assert(storageListener._rddInfoMap.size === 1)
+    assert(storageListener.rddInfoList.size === 1)
+    bus.postToAll(SparkListenerUnpersistRDD(1))
+    assert(storageListener._rddInfoMap.size === 0)
+    assert(storageListener.rddInfoList.size === 0)
+  }
+
+  test("task end") {
+    val myRddInfo0 = rddInfo0
+    val myRddInfo1 = rddInfo1
+    val myRddInfo2 = rddInfo2
+    val stageInfo0 = new StageInfo(0, 0, "0", 100, Seq(myRddInfo0, myRddInfo1, myRddInfo2), "details")
+    bus.postToAll(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
+    bus.postToAll(SparkListenerStageSubmitted(stageInfo0))
+    assert(storageListener._rddInfoMap.size === 3)
+    assert(storageListener.rddInfoList.size === 0) // not cached
+    assert(!storageListener._rddInfoMap(0).isCached)
+    assert(!storageListener._rddInfoMap(1).isCached)
+    assert(!storageListener._rddInfoMap(2).isCached)
+
+    // Task end with no updated blocks. This should not change anything.
+    bus.postToAll(SparkListenerTaskEnd(0, 0, "obliteration", Success, taskInfo, new TaskMetrics))
+    assert(storageListener._rddInfoMap.size === 3)
+    assert(storageListener.rddInfoList.size === 0)
+
+    // Task end with a few new persisted blocks, some from the same RDD
+    val metrics1 = new TaskMetrics
+    metrics1.updatedBlocks = Some(Seq(
+      (RDDBlockId(0, 100), BlockStatus(memAndDisk, 400L, 0L, 0L)),
+      (RDDBlockId(0, 101), BlockStatus(memAndDisk, 0L, 400L, 0L)),
+      (RDDBlockId(0, 102), BlockStatus(memAndDisk, 400L, 0L, 200L)),
+      (RDDBlockId(1, 20), BlockStatus(memAndDisk, 0L, 240L, 0L))
+    ))
+    bus.postToAll(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo, metrics1))
+    assert(storageListener._rddInfoMap(0).memSize === 800L)
+    assert(storageListener._rddInfoMap(0).diskSize === 400L)
+    assert(storageListener._rddInfoMap(0).tachyonSize === 200L)
+    assert(storageListener._rddInfoMap(0).numCachedPartitions === 3)
+    assert(storageListener._rddInfoMap(0).isCached)
+    assert(storageListener._rddInfoMap(1).memSize === 0L)
+    assert(storageListener._rddInfoMap(1).diskSize === 240L)
+    assert(storageListener._rddInfoMap(1).tachyonSize === 0L)
+    assert(storageListener._rddInfoMap(1).numCachedPartitions === 1)
+    assert(storageListener._rddInfoMap(1).isCached)
+    assert(!storageListener._rddInfoMap(2).isCached)
+    assert(storageListener._rddInfoMap(2).numCachedPartitions === 0)
+
+    // Task end with a few dropped blocks
+    val metrics2 = new TaskMetrics
+    metrics2.updatedBlocks = Some(Seq(
+      (RDDBlockId(0, 100), BlockStatus(none, 0L, 0L, 0L)),
+      (RDDBlockId(1, 20), BlockStatus(none, 0L, 0L, 0L)),
+      (RDDBlockId(2, 40), BlockStatus(none, 0L, 0L, 0L)), // doesn't actually exist
+      (RDDBlockId(4, 80), BlockStatus(none, 0L, 0L, 0L)) // doesn't actually exist
+    ))
+    bus.postToAll(SparkListenerTaskEnd(2, 0, "obliteration", Success, taskInfo, metrics2))
+    assert(storageListener._rddInfoMap(0).memSize === 400L)
+    assert(storageListener._rddInfoMap(0).diskSize === 400L)
+    assert(storageListener._rddInfoMap(0).tachyonSize === 200L)
+    assert(storageListener._rddInfoMap(0).numCachedPartitions === 2)
+    assert(storageListener._rddInfoMap(0).isCached)
+    assert(!storageListener._rddInfoMap(1).isCached)
+    assert(storageListener._rddInfoMap(2).numCachedPartitions === 0)
+    assert(!storageListener._rddInfoMap(2).isCached)
+    assert(storageListener._rddInfoMap(2).numCachedPartitions === 0)
+  }
+
+  test("verify StorageTab contains all cached rdds") {
+
+    val rddInfo0 = new RDDInfo(0, "rdd0", 1, memOnly)
+    val rddInfo1 = new RDDInfo(1, "rdd1", 1 ,memOnly)
+    val stageInfo0 = new StageInfo(0, 0, "stage0", 1, Seq(rddInfo0), "details")
+    val stageInfo1 = new StageInfo(1, 0, "stage1", 1, Seq(rddInfo1), "details")
+    val taskMetrics0 = new TaskMetrics
+    val taskMetrics1 = new TaskMetrics
+    val block0 = (RDDBlockId(0, 1), BlockStatus(memOnly, 100L, 0L, 0L))
+    val block1 = (RDDBlockId(1, 1), BlockStatus(memOnly, 200L, 0L, 0L))
+    taskMetrics0.updatedBlocks = Some(Seq(block0))
+    taskMetrics1.updatedBlocks = Some(Seq(block1))
+    bus.postToAll(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
+    bus.postToAll(SparkListenerStageSubmitted(stageInfo0))
+    assert(storageListener.rddInfoList.size === 0)
+    bus.postToAll(SparkListenerTaskEnd(0, 0, "big", Success, taskInfo, taskMetrics0))
+    assert(storageListener.rddInfoList.size === 1)
+    bus.postToAll(SparkListenerStageSubmitted(stageInfo1))
+    assert(storageListener.rddInfoList.size === 1)
+    bus.postToAll(SparkListenerStageCompleted(stageInfo0))
+    assert(storageListener.rddInfoList.size === 1)
+    bus.postToAll(SparkListenerTaskEnd(1, 0, "small", Success, taskInfo1, taskMetrics1))
+    assert(storageListener.rddInfoList.size === 2)
+    bus.postToAll(SparkListenerStageCompleted(stageInfo1))
+    assert(storageListener.rddInfoList.size === 2)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/AkkaUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala
similarity index 90%
rename from core/src/test/scala/org/apache/spark/AkkaUtilsSuite.scala
rename to core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala
index 4ab870e751778..7bca1711ae226 100644
--- a/core/src/test/scala/org/apache/spark/AkkaUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala
@@ -15,15 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.spark
+package org.apache.spark.util
 
-import org.scalatest.FunSuite
+import scala.concurrent.Await
 
 import akka.actor._
+
+import org.scalatest.FunSuite
+
+import org.apache.spark._
 import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.storage.BlockManagerId
-import org.apache.spark.util.AkkaUtils
-import scala.concurrent.Await
+
 
 /**
   * Test the AkkaUtils with various security settings.
@@ -35,7 +38,7 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
     conf.set("spark.authenticate", "true")
     conf.set("spark.authenticate.secret", "good")
 
-    val securityManager = new SecurityManager(conf);
+    val securityManager = new SecurityManager(conf)
     val hostname = "localhost"
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
       conf = conf, securityManager = securityManager)
@@ -103,16 +106,15 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
 
-    val compressedSize1000 = MapOutputTracker.compressSize(1000L)
-    val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
-    masterTracker.registerMapOutput(10, 0, new MapStatus(
-      BlockManagerId("a", "hostA", 1000, 0), Array(compressedSize1000)))
+    val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
+    masterTracker.registerMapOutput(10, 0,
+      MapStatus(BlockManagerId("a", "hostA", 1000), Array(1000L)))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
 
     // this should succeed since security off
     assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
-           Seq((BlockManagerId("a", "hostA", 1000, 0), size1000)))
+           Seq((BlockManagerId("a", "hostA", 1000), size1000)))
 
     actorSystem.shutdown()
     slaveSystem.shutdown()
@@ -154,16 +156,15 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
 
-    val compressedSize1000 = MapOutputTracker.compressSize(1000L)
-    val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
-    masterTracker.registerMapOutput(10, 0, new MapStatus(
-      BlockManagerId("a", "hostA", 1000, 0), Array(compressedSize1000)))
+    val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
+    masterTracker.registerMapOutput(10, 0, MapStatus(
+      BlockManagerId("a", "hostA", 1000), Array(1000L)))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
 
     // this should succeed since security on and passwords match
     assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
-           Seq((BlockManagerId("a", "hostA", 1000, 0), size1000)))
+           Seq((BlockManagerId("a", "hostA", 1000), size1000)))
 
     actorSystem.shutdown()
     slaveSystem.shutdown()
diff --git a/core/src/test/scala/org/apache/spark/util/CompletionIteratorSuite.scala b/core/src/test/scala/org/apache/spark/util/CompletionIteratorSuite.scala
new file mode 100644
index 0000000000000..3755d43e25ea8
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/CompletionIteratorSuite.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import org.scalatest.FunSuite
+
+class CompletionIteratorSuite extends FunSuite {
+  test("basic test") {
+    var numTimesCompleted = 0
+    val iter = List(1, 2, 3).iterator
+    val completionIter = CompletionIterator[Int, Iterator[Int]](iter, { numTimesCompleted += 1 })
+
+    assert(completionIter.hasNext)
+    assert(completionIter.next() === 1)
+    assert(numTimesCompleted === 0)
+
+    assert(completionIter.hasNext)
+    assert(completionIter.next() === 2)
+    assert(numTimesCompleted === 0)
+
+    assert(completionIter.hasNext)
+    assert(completionIter.next() === 3)
+    assert(numTimesCompleted === 0)
+
+    assert(!completionIter.hasNext)
+    assert(numTimesCompleted === 1)
+
+    // SPARK-4264: Calling hasNext should not trigger the completion callback again.
+    assert(!completionIter.hasNext)
+    assert(numTimesCompleted === 1)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index ca37d707b06ca..4dc5b6103db74 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -18,13 +18,13 @@
 package org.apache.spark.util
 
 import java.io._
-import java.nio.charset.Charset
 
 import scala.collection.mutable.HashSet
 import scala.reflect._
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
+import com.google.common.base.Charsets.UTF_8
 import com.google.common.io.Files
 
 import org.apache.spark.{Logging, SparkConf}
@@ -44,11 +44,11 @@ class FileAppenderSuite extends FunSuite with BeforeAndAfter with Logging {
 
   test("basic file appender") {
     val testString = (1 to 1000).mkString(", ")
-    val inputStream = new ByteArrayInputStream(testString.getBytes(Charset.forName("UTF-8")))
+    val inputStream = new ByteArrayInputStream(testString.getBytes(UTF_8))
     val appender = new FileAppender(inputStream, testFile)
     inputStream.close()
     appender.awaitTermination()
-    assert(Files.toString(testFile, Charset.forName("UTF-8")) === testString)
+    assert(Files.toString(testFile, UTF_8) === testString)
   }
 
   test("rolling file appender - time-based rolling") {
@@ -96,7 +96,7 @@ class FileAppenderSuite extends FunSuite with BeforeAndAfter with Logging {
     val allGeneratedFiles = new HashSet[String]()
     val items = (1 to 10).map { _.toString * 10000 }
     for (i <- 0 until items.size) {
-      testOutputStream.write(items(i).getBytes(Charset.forName("UTF-8")))
+      testOutputStream.write(items(i).getBytes(UTF_8))
       testOutputStream.flush()
       allGeneratedFiles ++= RollingFileAppender.getSortedRolledOverFiles(
         testFile.getParentFile.toString, testFile.getName).map(_.toString)
@@ -135,12 +135,11 @@ class FileAppenderSuite extends FunSuite with BeforeAndAfter with Logging {
       val testOutputStream = new PipedOutputStream()
       val testInputStream = new PipedInputStream(testOutputStream)
       val appender = FileAppender(testInputStream, testFile, conf)
-      assert(appender.isInstanceOf[ExpectedAppender])
+      //assert(appender.getClass === classTag[ExpectedAppender].getClass)
       assert(appender.getClass.getSimpleName ===
         classTag[ExpectedAppender].runtimeClass.getSimpleName)
       if (appender.isInstanceOf[RollingFileAppender]) {
         val rollingPolicy = appender.asInstanceOf[RollingFileAppender].rollingPolicy
-        rollingPolicy.isInstanceOf[ExpectedRollingPolicy]
         val policyParam = if (rollingPolicy.isInstanceOf[TimeBasedRollingPolicy]) {
           rollingPolicy.asInstanceOf[TimeBasedRollingPolicy].rolloverIntervalMillis
         } else {
@@ -200,7 +199,7 @@ class FileAppenderSuite extends FunSuite with BeforeAndAfter with Logging {
     // send data to appender through the input stream, and wait for the data to be written
     val expectedText = textToAppend.mkString("")
     for (i <- 0 until textToAppend.size) {
-      outputStream.write(textToAppend(i).getBytes(Charset.forName("UTF-8")))
+      outputStream.write(textToAppend(i).getBytes(UTF_8))
       outputStream.flush()
       Thread.sleep(sleepTimeBetweenTexts)
     }
@@ -215,7 +214,7 @@ class FileAppenderSuite extends FunSuite with BeforeAndAfter with Logging {
     logInfo("Filtered files: \n" + generatedFiles.mkString("\n"))
     assert(generatedFiles.size > 1)
     val allText = generatedFiles.map { file =>
-      Files.toString(file, Charset.forName("UTF-8"))
+      Files.toString(file, UTF_8)
     }.mkString("")
     assert(allText === expectedText)
     generatedFiles
diff --git a/core/src/test/scala/org/apache/spark/util/FileLoggerSuite.scala b/core/src/test/scala/org/apache/spark/util/FileLoggerSuite.scala
index 44332fc8dbc23..72466a3aa1130 100644
--- a/core/src/test/scala/org/apache/spark/util/FileLoggerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileLoggerSuite.scala
@@ -21,18 +21,19 @@ import java.io.{File, IOException}
 
 import scala.io.Source
 
-import com.google.common.io.Files
 import org.apache.hadoop.fs.Path
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
 import org.apache.spark.SparkConf
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.io.CompressionCodec
 
 /**
  * Test writing files through the FileLogger.
  */
 class FileLoggerSuite extends FunSuite with BeforeAndAfter {
-  private val fileSystem = Utils.getHadoopFileSystem("/")
+  private val fileSystem = Utils.getHadoopFileSystem("/",
+    SparkHadoopUtil.get.newConfiguration(new SparkConf()))
   private val allCompressionCodecs = Seq[String](
     "org.apache.spark.io.LZFCompressionCodec",
     "org.apache.spark.io.SnappyCompressionCodec"
@@ -42,7 +43,7 @@ class FileLoggerSuite extends FunSuite with BeforeAndAfter {
   private var logDirPathString: String = _
 
   before {
-    testDir = Files.createTempDir()
+    testDir = Utils.createTempDir()
     logDirPath = Utils.getFilePath(testDir, "test-file-logger")
     logDirPathString = logDirPath.toString
   }
@@ -73,13 +74,13 @@ class FileLoggerSuite extends FunSuite with BeforeAndAfter {
 
   test("Logging when directory already exists") {
     // Create the logging directory multiple times
-    new FileLogger(logDirPathString, new SparkConf, overwrite = true).start()
-    new FileLogger(logDirPathString, new SparkConf, overwrite = true).start()
-    new FileLogger(logDirPathString, new SparkConf, overwrite = true).start()
+    new FileLogger(logDirPathString, new SparkConf, compress = false, overwrite = true).start()
+    new FileLogger(logDirPathString, new SparkConf, compress = false, overwrite = true).start()
+    new FileLogger(logDirPathString, new SparkConf, compress = false, overwrite = true).start()
 
     // If overwrite is not enabled, an exception should be thrown
     intercept[IOException] {
-      new FileLogger(logDirPathString, new SparkConf, overwrite = false).start()
+      new FileLogger(logDirPathString, new SparkConf, compress = false, overwrite = false).start()
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index 058d31453081a..50f42054b9296 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -35,15 +35,18 @@ class JsonProtocolSuite extends FunSuite {
     val stageSubmitted =
       SparkListenerStageSubmitted(makeStageInfo(100, 200, 300, 400L, 500L), properties)
     val stageCompleted = SparkListenerStageCompleted(makeStageInfo(101, 201, 301, 401L, 501L))
-    val taskStart = SparkListenerTaskStart(111, makeTaskInfo(222L, 333, 1, 444L, false))
+    val taskStart = SparkListenerTaskStart(111, 0, makeTaskInfo(222L, 333, 1, 444L, false))
     val taskGettingResult =
       SparkListenerTaskGettingResult(makeTaskInfo(1000L, 2000, 5, 3000L, true))
-    val taskEnd = SparkListenerTaskEnd(1, "ShuffleMapTask", Success,
+    val taskEnd = SparkListenerTaskEnd(1, 0, "ShuffleMapTask", Success,
       makeTaskInfo(123L, 234, 67, 345L, false),
-      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = false))
-    val taskEndWithHadoopInput = SparkListenerTaskEnd(1, "ShuffleMapTask", Success,
+      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = false, hasOutput = false))
+    val taskEndWithHadoopInput = SparkListenerTaskEnd(1, 0, "ShuffleMapTask", Success,
       makeTaskInfo(123L, 234, 67, 345L, false),
-      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = true))
+      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = true, hasOutput = false))
+    val taskEndWithOutput = SparkListenerTaskEnd(1, 0, "ResultTask", Success,
+      makeTaskInfo(123L, 234, 67, 345L, false),
+      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = true, hasOutput = true))
     val jobStart = SparkListenerJobStart(10, Seq[Int](1, 2, 3, 4), properties)
     val jobEnd = SparkListenerJobEnd(20, JobSucceeded)
     val environmentUpdate = SparkListenerEnvironmentUpdate(Map[String, Seq[(String, String)]](
@@ -52,12 +55,12 @@ class JsonProtocolSuite extends FunSuite {
       "System Properties" -> Seq(("Username", "guest"), ("Password", "guest")),
       "Classpath Entries" -> Seq(("Super library", "/tmp/super_library"))
     ))
-    val blockManagerAdded = SparkListenerBlockManagerAdded(
-      BlockManagerId("Stars", "In your multitude...", 300, 400), 500)
-    val blockManagerRemoved = SparkListenerBlockManagerRemoved(
-      BlockManagerId("Scarce", "to be counted...", 100, 200))
+    val blockManagerAdded = SparkListenerBlockManagerAdded(1L,
+      BlockManagerId("Stars", "In your multitude...", 300), 500)
+    val blockManagerRemoved = SparkListenerBlockManagerRemoved(2L,
+      BlockManagerId("Scarce", "to be counted...", 100))
     val unpersistRdd = SparkListenerUnpersistRDD(12345)
-    val applicationStart = SparkListenerApplicationStart("The winner of all", 42L, "Garfield")
+    val applicationStart = SparkListenerApplicationStart("The winner of all", None, 42L, "Garfield")
     val applicationEnd = SparkListenerApplicationEnd(42L)
 
     testEvent(stageSubmitted, stageSubmittedJsonString)
@@ -66,6 +69,7 @@ class JsonProtocolSuite extends FunSuite {
     testEvent(taskGettingResult, taskGettingResultJsonString)
     testEvent(taskEnd, taskEndJsonString)
     testEvent(taskEndWithHadoopInput, taskEndWithHadoopInputJsonString)
+    testEvent(taskEndWithOutput, taskEndWithOutputJsonString)
     testEvent(jobStart, jobStartJsonString)
     testEvent(jobEnd, jobEndJsonString)
     testEvent(environmentUpdate, environmentUpdateJsonString)
@@ -80,8 +84,9 @@ class JsonProtocolSuite extends FunSuite {
     testRDDInfo(makeRddInfo(2, 3, 4, 5L, 6L))
     testStageInfo(makeStageInfo(10, 20, 30, 40L, 50L))
     testTaskInfo(makeTaskInfo(999L, 888, 55, 777L, false))
-    testTaskMetrics(makeTaskMetrics(33333L, 44444L, 55555L, 66666L, 7, 8, hasHadoopInput = false))
-    testBlockManagerId(BlockManagerId("Hong", "Kong", 500, 1000))
+    testTaskMetrics(makeTaskMetrics(
+      33333L, 44444L, 55555L, 66666L, 7, 8, hasHadoopInput = false, hasOutput = false))
+    testBlockManagerId(BlockManagerId("Hong", "Kong", 500))
 
     // StorageLevel
     testStorageLevel(StorageLevel.NONE)
@@ -104,15 +109,16 @@ class JsonProtocolSuite extends FunSuite {
     testJobResult(jobFailed)
 
     // TaskEndReason
-    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15, 16), 17, 18, 19)
-    val exceptionFailure = ExceptionFailure("To be", "or not to be", stackTrace, None)
+    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
+      "Some exception")
+    val exceptionFailure = new ExceptionFailure(exception, None)
     testTaskEndReason(Success)
     testTaskEndReason(Resubmitted)
     testTaskEndReason(fetchFailed)
     testTaskEndReason(exceptionFailure)
     testTaskEndReason(TaskResultLost)
     testTaskEndReason(TaskKilled)
-    testTaskEndReason(ExecutorLostFailure)
+    testTaskEndReason(ExecutorLostFailure("100"))
     testTaskEndReason(UnknownReason)
 
     // BlockId
@@ -123,20 +129,34 @@ class JsonProtocolSuite extends FunSuite {
     testBlockId(StreamBlockId(1, 2L))
   }
 
-  test("StageInfo.details backward compatibility") {
-    // StageInfo.details was added after 1.0.0.
+  test("ExceptionFailure backward compatibility") {
+    val exceptionFailure = ExceptionFailure("To be", "or not to be", stackTrace, null, None)
+    val oldEvent = JsonProtocol.taskEndReasonToJson(exceptionFailure)
+      .removeField({ _._1 == "Full Stack Trace" })
+    assertEquals(exceptionFailure, JsonProtocol.taskEndReasonFromJson(oldEvent))
+  }
+
+  test("StageInfo backward compatibility") {
     val info = makeStageInfo(1, 2, 3, 4L, 5L)
-    assert(info.details.nonEmpty)
     val newJson = JsonProtocol.stageInfoToJson(info)
-    val oldJson = newJson.removeField { case (field, _) => field == "Details" }
+
+    // Fields added after 1.0.0.
+    assert(info.details.nonEmpty)
+    assert(info.accumulables.nonEmpty)
+    val oldJson = newJson
+      .removeField { case (field, _) => field == "Details" }
+      .removeField { case (field, _) => field == "Accumulables" }
+
     val newInfo = JsonProtocol.stageInfoFromJson(oldJson)
+
     assert(info.name === newInfo.name)
     assert("" === newInfo.details)
+    assert(0 === newInfo.accumulables.size)
   }
 
   test("InputMetrics backward compatibility") {
     // InputMetrics were added after 1.0.1.
-    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, hasHadoopInput = true)
+    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, hasHadoopInput = true, hasOutput = false)
     assert(metrics.inputMetrics.nonEmpty)
     val newJson = JsonProtocol.taskMetricsToJson(metrics)
     val oldJson = newJson.removeField { case (field, _) => field == "Input Metrics" }
@@ -144,6 +164,65 @@ class JsonProtocolSuite extends FunSuite {
     assert(newMetrics.inputMetrics.isEmpty)
   }
 
+  test("OutputMetrics backward compatibility") {
+    // OutputMetrics were added after 1.1
+    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, hasHadoopInput = false, hasOutput = true)
+    assert(metrics.outputMetrics.nonEmpty)
+    val newJson = JsonProtocol.taskMetricsToJson(metrics)
+    val oldJson = newJson.removeField { case (field, _) => field == "Output Metrics" }
+    val newMetrics = JsonProtocol.taskMetricsFromJson(oldJson)
+    assert(newMetrics.outputMetrics.isEmpty)
+  }
+
+  test("BlockManager events backward compatibility") {
+    // SparkListenerBlockManagerAdded/Removed in Spark 1.0.0 do not have a "time" property.
+    val blockManagerAdded = SparkListenerBlockManagerAdded(1L,
+      BlockManagerId("Stars", "In your multitude...", 300), 500)
+    val blockManagerRemoved = SparkListenerBlockManagerRemoved(2L,
+      BlockManagerId("Scarce", "to be counted...", 100))
+
+    val oldBmAdded = JsonProtocol.blockManagerAddedToJson(blockManagerAdded)
+      .removeField({ _._1 == "Timestamp" })
+
+    val deserializedBmAdded = JsonProtocol.blockManagerAddedFromJson(oldBmAdded)
+    assert(SparkListenerBlockManagerAdded(-1L, blockManagerAdded.blockManagerId,
+      blockManagerAdded.maxMem) === deserializedBmAdded)
+
+    val oldBmRemoved = JsonProtocol.blockManagerRemovedToJson(blockManagerRemoved)
+      .removeField({ _._1 == "Timestamp" })
+
+    val deserializedBmRemoved = JsonProtocol.blockManagerRemovedFromJson(oldBmRemoved)
+    assert(SparkListenerBlockManagerRemoved(-1L, blockManagerRemoved.blockManagerId) ===
+      deserializedBmRemoved)
+  }
+
+  test("FetchFailed backwards compatibility") {
+    // FetchFailed in Spark 1.1.0 does not have an "Message" property.
+    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
+      "ignored")
+    val oldEvent = JsonProtocol.taskEndReasonToJson(fetchFailed)
+      .removeField({ _._1 == "Message" })
+    val expectedFetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
+      "Unknown reason")
+    assert(expectedFetchFailed === JsonProtocol.taskEndReasonFromJson(oldEvent))
+  }
+
+  test("SparkListenerApplicationStart backwards compatibility") {
+    // SparkListenerApplicationStart in Spark 1.0.0 do not have an "appId" property.
+    val applicationStart = SparkListenerApplicationStart("test", None, 1L, "user")
+    val oldEvent = JsonProtocol.applicationStartToJson(applicationStart)
+      .removeField({ _._1 == "App ID" })
+    assert(applicationStart === JsonProtocol.applicationStartFromJson(oldEvent))
+  }
+
+  test("ExecutorLostFailure backward compatibility") {
+    // ExecutorLostFailure in Spark 1.1.0 does not have an "Executor ID" property.
+    val executorLostFailure = ExecutorLostFailure("100")
+    val oldEvent = JsonProtocol.taskEndReasonToJson(executorLostFailure)
+      .removeField({ _._1 == "Executor ID" })
+    val expectedExecutorLostFailure = ExecutorLostFailure("Unknown")
+    assert(expectedExecutorLostFailure === JsonProtocol.taskEndReasonFromJson(oldEvent))
+  }
 
   /** -------------------------- *
    | Helper test running methods |
@@ -235,8 +314,10 @@ class JsonProtocolSuite extends FunSuite {
         assertEquals(e1.environmentDetails, e2.environmentDetails)
       case (e1: SparkListenerBlockManagerAdded, e2: SparkListenerBlockManagerAdded) =>
         assert(e1.maxMem === e2.maxMem)
+        assert(e1.time === e2.time)
         assertEquals(e1.blockManagerId, e2.blockManagerId)
       case (e1: SparkListenerBlockManagerRemoved, e2: SparkListenerBlockManagerRemoved) =>
+        assert(e1.time === e2.time)
         assertEquals(e1.blockManagerId, e2.blockManagerId)
       case (e1: SparkListenerUnpersistRDD, e2: SparkListenerUnpersistRDD) =>
         assert(e1.rddId == e2.rddId)
@@ -261,6 +342,7 @@ class JsonProtocolSuite extends FunSuite {
     (0 until info1.rddInfos.size).foreach { i =>
       assertEquals(info1.rddInfos(i), info2.rddInfos(i))
     }
+    assert(info1.accumulables === info2.accumulables)
     assert(info1.details === info2.details)
   }
 
@@ -293,6 +375,7 @@ class JsonProtocolSuite extends FunSuite {
     assert(info1.gettingResultTime === info2.gettingResultTime)
     assert(info1.finishTime === info2.finishTime)
     assert(info1.failed === info2.failed)
+    assert(info1.accumulables === info2.accumulables)
   }
 
   private def assertEquals(metrics1: TaskMetrics, metrics2: TaskMetrics) {
@@ -313,8 +396,6 @@ class JsonProtocolSuite extends FunSuite {
   }
 
   private def assertEquals(metrics1: ShuffleReadMetrics, metrics2: ShuffleReadMetrics) {
-    assert(metrics1.shuffleFinishTime === metrics2.shuffleFinishTime)
-    assert(metrics1.totalBlocksFetched === metrics2.totalBlocksFetched)
     assert(metrics1.remoteBlocksFetched === metrics2.remoteBlocksFetched)
     assert(metrics1.localBlocksFetched === metrics2.localBlocksFetched)
     assert(metrics1.fetchWaitTime === metrics2.fetchWaitTime)
@@ -335,7 +416,6 @@ class JsonProtocolSuite extends FunSuite {
     assert(bm1.executorId === bm2.executorId)
     assert(bm1.host === bm2.host)
     assert(bm1.port === bm2.port)
-    assert(bm1.nettyPort === bm2.nettyPort)
   }
 
   private def assertEquals(result1: JobResult, result2: JobResult) {
@@ -356,14 +436,17 @@ class JsonProtocolSuite extends FunSuite {
         assert(r1.mapId === r2.mapId)
         assert(r1.reduceId === r2.reduceId)
         assertEquals(r1.bmAddress, r2.bmAddress)
+        assert(r1.message === r2.message)
       case (r1: ExceptionFailure, r2: ExceptionFailure) =>
         assert(r1.className === r2.className)
         assert(r1.description === r2.description)
         assertSeqEquals(r1.stackTrace, r2.stackTrace, assertStackTraceElementEquals)
+        assert(r1.fullStackTrace === r2.fullStackTrace)
         assertOptionEquals(r1.metrics, r2.metrics, assertTaskMetricsEquals)
       case (TaskResultLost, TaskResultLost) =>
       case (TaskKilled, TaskKilled) =>
-      case (ExecutorLostFailure, ExecutorLostFailure) =>
+      case (ExecutorLostFailure(execId1), ExecutorLostFailure(execId2)) =>
+        assert(execId1 === execId2)
       case (UnknownReason, UnknownReason) =>
       case _ => fail("Task end reasons don't match in types!")
     }
@@ -389,7 +472,8 @@ class JsonProtocolSuite extends FunSuite {
 
   private def assertJsonStringEquals(json1: String, json2: String) {
     val formatJsonString = (json: String) => json.replaceAll("[\\s|]", "")
-    assert(formatJsonString(json1) === formatJsonString(json2))
+    assert(formatJsonString(json1) === formatJsonString(json2),
+      s"input ${formatJsonString(json1)} got ${formatJsonString(json2)}")
   }
 
   private def assertSeqEquals[T](seq1: Seq[T], seq2: Seq[T], assertEquals: (T, T) => Unit) {
@@ -477,13 +561,27 @@ class JsonProtocolSuite extends FunSuite {
 
   private def makeStageInfo(a: Int, b: Int, c: Int, d: Long, e: Long) = {
     val rddInfos = (0 until a % 5).map { i => makeRddInfo(a + i, b + i, c + i, d + i, e + i) }
-    new StageInfo(a, "greetings", b, rddInfos, "details")
+    val stageInfo = new StageInfo(a, 0, "greetings", b, rddInfos, "details")
+    val (acc1, acc2) = (makeAccumulableInfo(1), makeAccumulableInfo(2))
+    stageInfo.accumulables(acc1.id) = acc1
+    stageInfo.accumulables(acc2.id) = acc2
+    stageInfo
   }
 
   private def makeTaskInfo(a: Long, b: Int, c: Int, d: Long, speculative: Boolean) = {
-    new TaskInfo(a, b, c, d, "executor", "your kind sir", TaskLocality.NODE_LOCAL, speculative)
+    val taskInfo = new TaskInfo(a, b, c, d, "executor", "your kind sir", TaskLocality.NODE_LOCAL,
+      speculative)
+    val (acc1, acc2, acc3) =
+      (makeAccumulableInfo(1), makeAccumulableInfo(2), makeAccumulableInfo(3))
+    taskInfo.accumulables += acc1
+    taskInfo.accumulables += acc2
+    taskInfo.accumulables += acc3
+    taskInfo
   }
 
+  private def makeAccumulableInfo(id: Int): AccumulableInfo =
+    AccumulableInfo(id, " Accumulable " + id, Some("delta" + id), "val" + id)
+
   /**
    * Creates a TaskMetrics object describing a task that read data from Hadoop (if hasHadoopInput is
    * set to true) or read data from a shuffle otherwise.
@@ -495,9 +593,9 @@ class JsonProtocolSuite extends FunSuite {
       d: Long,
       e: Int,
       f: Int,
-      hasHadoopInput: Boolean) = {
+      hasHadoopInput: Boolean,
+      hasOutput: Boolean) = {
     val t = new TaskMetrics
-    val sw = new ShuffleWriteMetrics
     t.hostname = "localhost"
     t.executorDeserializeTime = a
     t.executorRunTime = b
@@ -512,17 +610,22 @@ class JsonProtocolSuite extends FunSuite {
       t.inputMetrics = Some(inputMetrics)
     } else {
       val sr = new ShuffleReadMetrics
-      sr.shuffleFinishTime = b + c
-      sr.totalBlocksFetched = e + f
       sr.remoteBytesRead = b + d
       sr.localBlocksFetched = e
       sr.fetchWaitTime = a + d
       sr.remoteBlocksFetched = f
-      t.shuffleReadMetrics = Some(sr)
+      t.setShuffleReadMetrics(Some(sr))
+    }
+    if (hasOutput) {
+      val outputMetrics = new OutputMetrics(DataWriteMethod.Hadoop)
+      outputMetrics.bytesWritten = a + b + c
+      t.outputMetrics = Some(outputMetrics)
+    } else {
+      val sw = new ShuffleWriteMetrics
+      sw.shuffleBytesWritten = a + b + c
+      sw.shuffleWriteTime = b + c + d
+      t.shuffleWriteMetrics = Some(sw)
     }
-    sw.shuffleBytesWritten = a + b + c
-    sw.shuffleWriteTime = b + c + d
-    t.shuffleWriteMetrics = Some(sw)
     // Make at most 6 blocks
     t.updatedBlocks = Some((1 to (e % 5 + 1)).map { i =>
       (RDDBlockId(e % i, f % i), BlockStatus(StorageLevel.MEMORY_AND_DISK_SER_2, a % i, b % i, c%i))
@@ -537,73 +640,245 @@ class JsonProtocolSuite extends FunSuite {
 
   private val stageSubmittedJsonString =
     """
-      {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":100,"Stage Name":
-      "greetings","Number of Tasks":200,"RDD Info":[],"Details":"details"},"Properties":
-      {"France":"Paris","Germany":"Berlin","Russia":"Moscow","Ukraine":"Kiev"}}
+      |{
+      |  "Event": "SparkListenerStageSubmitted",
+      |  "Stage Info": {
+      |    "Stage ID": 100,
+      |    "Stage Attempt ID": 0,
+      |    "Stage Name": "greetings",
+      |    "Number of Tasks": 200,
+      |    "RDD Info": [],
+      |    "Details": "details",
+      |    "Accumulables": [
+      |      {
+      |        "ID": 2,
+      |        "Name": "Accumulable2",
+      |        "Update": "delta2",
+      |        "Value": "val2"
+      |      },
+      |      {
+      |        "ID": 1,
+      |        "Name": "Accumulable1",
+      |        "Update": "delta1",
+      |        "Value": "val1"
+      |      }
+      |    ]
+      |  },
+      |  "Properties": {
+      |    "France": "Paris",
+      |    "Germany": "Berlin",
+      |    "Russia": "Moscow",
+      |    "Ukraine": "Kiev"
+      |  }
+      |}
     """
 
   private val stageCompletedJsonString =
     """
-      {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":101,"Stage Name":
-      "greetings","Number of Tasks":201,"RDD Info":[{"RDD ID":101,"Name":"mayor","Storage
-      Level":{"Use Disk":true,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,
-      "Replication":1},"Number of Partitions":201,"Number of Cached Partitions":301,
-      "Memory Size":401,"Tachyon Size":0,"Disk Size":501}],"Details":"details"}}
+      |{
+      |  "Event": "SparkListenerStageCompleted",
+      |  "Stage Info": {
+      |    "Stage ID": 101,
+      |    "Stage Attempt ID": 0,
+      |    "Stage Name": "greetings",
+      |    "Number of Tasks": 201,
+      |    "RDD Info": [
+      |      {
+      |        "RDD ID": 101,
+      |        "Name": "mayor",
+      |        "Storage Level": {
+      |          "Use Disk": true,
+      |          "Use Memory": true,
+      |          "Use Tachyon": false,
+      |          "Deserialized": true,
+      |          "Replication": 1
+      |        },
+      |        "Number of Partitions": 201,
+      |        "Number of Cached Partitions": 301,
+      |        "Memory Size": 401,
+      |        "Tachyon Size": 0,
+      |        "Disk Size": 501
+      |      }
+      |    ],
+      |    "Details": "details",
+      |    "Accumulables": [
+      |      {
+      |        "ID": 2,
+      |        "Name": "Accumulable2",
+      |        "Update": "delta2",
+      |        "Value": "val2"
+      |      },
+      |      {
+      |        "ID": 1,
+      |        "Name": "Accumulable1",
+      |        "Update": "delta1",
+      |        "Value": "val1"
+      |      }
+      |    ]
+      |  }
+      |}
     """
 
   private val taskStartJsonString =
     """
-      |{"Event":"SparkListenerTaskStart","Stage ID":111,"Task Info":{"Task ID":222,
-      |"Index":333,"Attempt":1,"Launch Time":444,"Executor ID":"executor","Host":"your kind sir",
-      |"Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,
-      |"Failed":false}}
+      |{
+      |  "Event": "SparkListenerTaskStart",
+      |  "Stage ID": 111,
+      |  "Stage Attempt ID": 0,
+      |  "Task Info": {
+      |    "Task ID": 222,
+      |    "Index": 333,
+      |    "Attempt": 1,
+      |    "Launch Time": 444,
+      |    "Executor ID": "executor",
+      |    "Host": "your kind sir",
+      |    "Locality": "NODE_LOCAL",
+      |    "Speculative": false,
+      |    "Getting Result Time": 0,
+      |    "Finish Time": 0,
+      |    "Failed": false,
+      |    "Accumulables": [
+      |      {
+      |        "ID": 1,
+      |        "Name": "Accumulable1",
+      |        "Update": "delta1",
+      |        "Value": "val1"
+      |      },
+      |      {
+      |        "ID": 2,
+      |        "Name": "Accumulable2",
+      |        "Update": "delta2",
+      |        "Value": "val2"
+      |      },
+      |      {
+      |        "ID": 3,
+      |        "Name": "Accumulable3",
+      |        "Update": "delta3",
+      |        "Value": "val3"
+      |      }
+      |    ]
+      |  }
+      |}
     """.stripMargin
 
   private val taskGettingResultJsonString =
     """
-      |{"Event":"SparkListenerTaskGettingResult","Task Info":
-      |  {"Task ID":1000,"Index":2000,"Attempt":5,"Launch Time":3000,"Executor ID":"executor",
-      |   "Host":"your kind sir","Locality":"NODE_LOCAL","Speculative":true,"Getting Result Time":0,
-      |   "Finish Time":0,"Failed":false
+      |{
+      |  "Event": "SparkListenerTaskGettingResult",
+      |  "Task Info": {
+      |    "Task ID": 1000,
+      |    "Index": 2000,
+      |    "Attempt": 5,
+      |    "Launch Time": 3000,
+      |    "Executor ID": "executor",
+      |    "Host": "your kind sir",
+      |    "Locality": "NODE_LOCAL",
+      |    "Speculative": true,
+      |    "Getting Result Time": 0,
+      |    "Finish Time": 0,
+      |    "Failed": false,
+      |    "Accumulables": [
+      |      {
+      |        "ID": 1,
+      |        "Name": "Accumulable1",
+      |        "Update": "delta1",
+      |        "Value": "val1"
+      |      },
+      |      {
+      |        "ID": 2,
+      |        "Name": "Accumulable2",
+      |        "Update": "delta2",
+      |        "Value": "val2"
+      |      },
+      |      {
+      |        "ID": 3,
+      |        "Name": "Accumulable3",
+      |        "Update": "delta3",
+      |        "Value": "val3"
+      |      }
+      |    ]
       |  }
       |}
     """.stripMargin
 
   private val taskEndJsonString =
     """
-      |{"Event":"SparkListenerTaskEnd","Stage ID":1,"Task Type":"ShuffleMapTask",
-      |"Task End Reason":{"Reason":"Success"},
-      |"Task Info":{
-      |  "Task ID":123,"Index":234,"Attempt":67,"Launch Time":345,"Executor ID":"executor",
-      |  "Host":"your kind sir","Locality":"NODE_LOCAL","Speculative":false,
-      |  "Getting Result Time":0,"Finish Time":0,"Failed":false
-      |},
-      |"Task Metrics":{
-      |  "Host Name":"localhost","Executor Deserialize Time":300,"Executor Run Time":400,
-      |  "Result Size":500,"JVM GC Time":600,"Result Serialization Time":700,
-      |  "Memory Bytes Spilled":800,"Disk Bytes Spilled":0,
-      |  "Shuffle Read Metrics":{
-      |    "Shuffle Finish Time":900,
-      |    "Total Blocks Fetched":1500,
-      |    "Remote Blocks Fetched":800,
-      |    "Local Blocks Fetched":700,
-      |    "Fetch Wait Time":900,
-      |    "Remote Bytes Read":1000
+      |{
+      |  "Event": "SparkListenerTaskEnd",
+      |  "Stage ID": 1,
+      |  "Stage Attempt ID": 0,
+      |  "Task Type": "ShuffleMapTask",
+      |  "Task End Reason": {
+      |    "Reason": "Success"
       |  },
-      |  "Shuffle Write Metrics":{
-      |    "Shuffle Bytes Written":1200,
-      |    "Shuffle Write Time":1500
+      |  "Task Info": {
+      |    "Task ID": 123,
+      |    "Index": 234,
+      |    "Attempt": 67,
+      |    "Launch Time": 345,
+      |    "Executor ID": "executor",
+      |    "Host": "your kind sir",
+      |    "Locality": "NODE_LOCAL",
+      |    "Speculative": false,
+      |    "Getting Result Time": 0,
+      |    "Finish Time": 0,
+      |    "Failed": false,
+      |    "Accumulables": [
+      |      {
+      |        "ID": 1,
+      |        "Name": "Accumulable1",
+      |        "Update": "delta1",
+      |        "Value": "val1"
+      |      },
+      |      {
+      |        "ID": 2,
+      |        "Name": "Accumulable2",
+      |        "Update": "delta2",
+      |        "Value": "val2"
+      |      },
+      |      {
+      |        "ID": 3,
+      |        "Name": "Accumulable3",
+      |        "Update": "delta3",
+      |        "Value": "val3"
+      |      }
+      |    ]
       |  },
-      |  "Updated Blocks":[
-      |    {"Block ID":"rdd_0_0",
-      |      "Status":{
-      |        "Storage Level":{
-      |          "Use Disk":true,"Use Memory":true,"Use Tachyon":false,"Deserialized":false,
-      |          "Replication":2
-      |        },
-      |        "Memory Size":0,"Tachyon Size":0,"Disk Size":0
+      |  "Task Metrics": {
+      |    "Host Name": "localhost",
+      |    "Executor Deserialize Time": 300,
+      |    "Executor Run Time": 400,
+      |    "Result Size": 500,
+      |    "JVM GC Time": 600,
+      |    "Result Serialization Time": 700,
+      |    "Memory Bytes Spilled": 800,
+      |    "Disk Bytes Spilled": 0,
+      |    "Shuffle Read Metrics": {
+      |      "Remote Blocks Fetched": 800,
+      |      "Local Blocks Fetched": 700,
+      |      "Fetch Wait Time": 900,
+      |      "Remote Bytes Read": 1000
+      |    },
+      |    "Shuffle Write Metrics": {
+      |      "Shuffle Bytes Written": 1200,
+      |      "Shuffle Write Time": 1500
+      |    },
+      |    "Updated Blocks": [
+      |      {
+      |        "Block ID": "rdd_0_0",
+      |        "Status": {
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": false,
+      |            "Replication": 2
+      |          },
+      |          "Memory Size": 0,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 0
+      |        }
       |      }
-      |    }
       |    ]
       |  }
       |}
@@ -611,77 +886,268 @@ class JsonProtocolSuite extends FunSuite {
 
   private val taskEndWithHadoopInputJsonString =
     """
-      |{"Event":"SparkListenerTaskEnd","Stage ID":1,"Task Type":"ShuffleMapTask",
-      |"Task End Reason":{"Reason":"Success"},
-      |"Task Info":{
-      |  "Task ID":123,"Index":234,"Attempt":67,"Launch Time":345,"Executor ID":"executor",
-      |  "Host":"your kind sir","Locality":"NODE_LOCAL","Speculative":false,
-      |  "Getting Result Time":0,"Finish Time":0,"Failed":false
-      |},
-      |"Task Metrics":{
-      |  "Host Name":"localhost","Executor Deserialize Time":300,"Executor Run Time":400,
-      |  "Result Size":500,"JVM GC Time":600,"Result Serialization Time":700,
-      |  "Memory Bytes Spilled":800,"Disk Bytes Spilled":0,
-      |  "Shuffle Write Metrics":{"Shuffle Bytes Written":1200,"Shuffle Write Time":1500},
-      |  "Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":2100},
-      |  "Updated Blocks":[
-      |    {"Block ID":"rdd_0_0",
-      |      "Status":{
-      |        "Storage Level":{
-      |          "Use Disk":true,"Use Memory":true,"Use Tachyon":false,"Deserialized":false,
-      |          "Replication":2
-      |        },
-      |        "Memory Size":0,"Tachyon Size":0,"Disk Size":0
+      |{
+      |  "Event": "SparkListenerTaskEnd",
+      |  "Stage ID": 1,
+      |  "Stage Attempt ID": 0,
+      |  "Task Type": "ShuffleMapTask",
+      |  "Task End Reason": {
+      |    "Reason": "Success"
+      |  },
+      |  "Task Info": {
+      |    "Task ID": 123,
+      |    "Index": 234,
+      |    "Attempt": 67,
+      |    "Launch Time": 345,
+      |    "Executor ID": "executor",
+      |    "Host": "your kind sir",
+      |    "Locality": "NODE_LOCAL",
+      |    "Speculative": false,
+      |    "Getting Result Time": 0,
+      |    "Finish Time": 0,
+      |    "Failed": false,
+      |    "Accumulables": [
+      |      {
+      |        "ID": 1,
+      |        "Name": "Accumulable1",
+      |        "Update": "delta1",
+      |        "Value": "val1"
+      |      },
+      |      {
+      |        "ID": 2,
+      |        "Name": "Accumulable2",
+      |        "Update": "delta2",
+      |        "Value": "val2"
+      |      },
+      |      {
+      |        "ID": 3,
+      |        "Name": "Accumulable3",
+      |        "Update": "delta3",
+      |        "Value": "val3"
+      |      }
+      |    ]
+      |  },
+      |  "Task Metrics": {
+      |    "Host Name": "localhost",
+      |    "Executor Deserialize Time": 300,
+      |    "Executor Run Time": 400,
+      |    "Result Size": 500,
+      |    "JVM GC Time": 600,
+      |    "Result Serialization Time": 700,
+      |    "Memory Bytes Spilled": 800,
+      |    "Disk Bytes Spilled": 0,
+      |    "Shuffle Write Metrics": {
+      |      "Shuffle Bytes Written": 1200,
+      |      "Shuffle Write Time": 1500
+      |    },
+      |    "Input Metrics": {
+      |      "Data Read Method": "Hadoop",
+      |      "Bytes Read": 2100
+      |    },
+      |    "Updated Blocks": [
+      |      {
+      |        "Block ID": "rdd_0_0",
+      |        "Status": {
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": false,
+      |            "Replication": 2
+      |          },
+      |          "Memory Size": 0,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 0
+      |        }
+      |      }
+      |    ]
+      |  }
+      |}
+    """
+
+  private val taskEndWithOutputJsonString =
+    """
+      |{
+      |  "Event": "SparkListenerTaskEnd",
+      |  "Stage ID": 1,
+      |  "Stage Attempt ID": 0,
+      |  "Task Type": "ResultTask",
+      |  "Task End Reason": {
+      |    "Reason": "Success"
+      |  },
+      |  "Task Info": {
+      |    "Task ID": 123,
+      |    "Index": 234,
+      |    "Attempt": 67,
+      |    "Launch Time": 345,
+      |    "Executor ID": "executor",
+      |    "Host": "your kind sir",
+      |    "Locality": "NODE_LOCAL",
+      |    "Speculative": false,
+      |    "Getting Result Time": 0,
+      |    "Finish Time": 0,
+      |    "Failed": false,
+      |    "Accumulables": [
+      |      {
+      |        "ID": 1,
+      |        "Name": "Accumulable1",
+      |        "Update": "delta1",
+      |        "Value": "val1"
+      |      },
+      |      {
+      |        "ID": 2,
+      |        "Name": "Accumulable2",
+      |        "Update": "delta2",
+      |        "Value": "val2"
+      |      },
+      |      {
+      |        "ID": 3,
+      |        "Name": "Accumulable3",
+      |        "Update": "delta3",
+      |        "Value": "val3"
+      |      }
+      |    ]
+      |  },
+      |  "Task Metrics": {
+      |    "Host Name": "localhost",
+      |    "Executor Deserialize Time": 300,
+      |    "Executor Run Time": 400,
+      |    "Result Size": 500,
+      |    "JVM GC Time": 600,
+      |    "Result Serialization Time": 700,
+      |    "Memory Bytes Spilled": 800,
+      |    "Disk Bytes Spilled": 0,
+      |    "Input Metrics": {
+      |      "Data Read Method": "Hadoop",
+      |      "Bytes Read": 2100
+      |    },
+      |    "Output Metrics": {
+      |      "Data Write Method": "Hadoop",
+      |      "Bytes Written": 1200
+      |    },
+      |    "Updated Blocks": [
+      |      {
+      |        "Block ID": "rdd_0_0",
+      |        "Status": {
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": false,
+      |            "Replication": 2
+      |          },
+      |          "Memory Size": 0,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 0
+      |        }
       |      }
-      |    }
-      |  ]}
+      |    ]
+      |  }
       |}
     """
 
   private val jobStartJsonString =
     """
-      {"Event":"SparkListenerJobStart","Job ID":10,"Stage IDs":[1,2,3,4],"Properties":
-      {"France":"Paris","Germany":"Berlin","Russia":"Moscow","Ukraine":"Kiev"}}
+      |{
+      |  "Event": "SparkListenerJobStart",
+      |  "Job ID": 10,
+      |  "Stage IDs": [
+      |    1,
+      |    2,
+      |    3,
+      |    4
+      |  ],
+      |  "Properties": {
+      |    "France": "Paris",
+      |    "Germany": "Berlin",
+      |    "Russia": "Moscow",
+      |    "Ukraine": "Kiev"
+      |  }
+      |}
     """
 
   private val jobEndJsonString =
     """
-      {"Event":"SparkListenerJobEnd","Job ID":20,"Job Result":{"Result":"JobSucceeded"}}
+      |{
+      |  "Event": "SparkListenerJobEnd",
+      |  "Job ID": 20,
+      |  "Job Result": {
+      |    "Result": "JobSucceeded"
+      |  }
+      |}
     """
 
   private val environmentUpdateJsonString =
     """
-      {"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"GC speed":"9999 objects/s",
-      "Java home":"Land of coffee"},"Spark Properties":{"Job throughput":"80000 jobs/s,
-      regardless of job type"},"System Properties":{"Username":"guest","Password":"guest"},
-      "Classpath Entries":{"Super library":"/tmp/super_library"}}
+      |{
+      |  "Event": "SparkListenerEnvironmentUpdate",
+      |  "JVM Information": {
+      |    "GC speed": "9999 objects/s",
+      |    "Java home": "Land of coffee"
+      |  },
+      |  "Spark Properties": {
+      |    "Job throughput": "80000 jobs/s, regardless of job type"
+      |  },
+      |  "System Properties": {
+      |    "Username": "guest",
+      |    "Password": "guest"
+      |  },
+      |  "Classpath Entries": {
+      |    "Super library": "/tmp/super_library"
+      |  }
+      |}
     """
 
   private val blockManagerAddedJsonString =
     """
-      {"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"Stars",
-      "Host":"In your multitude...","Port":300,"Netty Port":400},"Maximum Memory":500}
+      |{
+      |  "Event": "SparkListenerBlockManagerAdded",
+      |  "Block Manager ID": {
+      |    "Executor ID": "Stars",
+      |    "Host": "In your multitude...",
+      |    "Port": 300
+      |  },
+      |  "Maximum Memory": 500,
+      |  "Timestamp": 1
+      |}
     """
 
   private val blockManagerRemovedJsonString =
     """
-      {"Event":"SparkListenerBlockManagerRemoved","Block Manager ID":{"Executor ID":"Scarce",
-      "Host":"to be counted...","Port":100,"Netty Port":200}}
+      |{
+      |  "Event": "SparkListenerBlockManagerRemoved",
+      |  "Block Manager ID": {
+      |    "Executor ID": "Scarce",
+      |    "Host": "to be counted...",
+      |    "Port": 100
+      |  },
+      |  "Timestamp": 2
+      |}
     """
 
   private val unpersistRDDJsonString =
     """
-      {"Event":"SparkListenerUnpersistRDD","RDD ID":12345}
+      |{
+      |  "Event": "SparkListenerUnpersistRDD",
+      |  "RDD ID": 12345
+      |}
     """
 
   private val applicationStartJsonString =
     """
-      {"Event":"SparkListenerApplicationStart","App Name":"The winner of all","Timestamp":42,
-      "User":"Garfield"}
+      |{
+      |  "Event": "SparkListenerApplicationStart",
+      |  "App Name": "The winner of all",
+      |  "Timestamp": 42,
+      |  "User": "Garfield"
+      |}
     """
 
   private val applicationEndJsonString =
     """
-      {"Event":"SparkListenerApplicationEnd","Timestamp":42}
+      |{
+      |  "Event": "SparkListenerApplicationEnd",
+      |  "Timestamp": 42
+      |}
     """
 }
diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
index f9d1af88f3a13..0ea2d13a83505 100644
--- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
@@ -118,7 +118,7 @@ class SizeEstimatorSuite
     // TODO: If we sample 100 elements, this should always be 4176 ?
     val estimatedSize = SizeEstimator.estimate(Array.fill(1000)(d1))
     assert(estimatedSize >= 4000, "Estimated size " + estimatedSize + " should be more than 4000")
-    assert(estimatedSize <= 4200, "Estimated size " + estimatedSize + " should be less than 4100")
+    assert(estimatedSize <= 4200, "Estimated size " + estimatedSize + " should be less than 4200")
   }
 
   test("32-bit arch") {
diff --git a/core/src/test/scala/org/apache/spark/util/SizeTrackingAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeTrackingAppendOnlyMapSuite.scala
deleted file mode 100644
index 93f0c6a8e6408..0000000000000
--- a/core/src/test/scala/org/apache/spark/util/SizeTrackingAppendOnlyMapSuite.scala
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util
-
-import scala.util.Random
-
-import org.scalatest.{BeforeAndAfterAll, FunSuite}
-
-import org.apache.spark.util.SizeTrackingAppendOnlyMapSuite.LargeDummyClass
-import org.apache.spark.util.collection.{AppendOnlyMap, SizeTrackingAppendOnlyMap}
-
-class SizeTrackingAppendOnlyMapSuite extends FunSuite with BeforeAndAfterAll {
-  val NORMAL_ERROR = 0.20
-  val HIGH_ERROR = 0.30
-
-  test("fixed size insertions") {
-    testWith[Int, Long](10000, i => (i, i.toLong))
-    testWith[Int, (Long, Long)](10000, i => (i, (i.toLong, i.toLong)))
-    testWith[Int, LargeDummyClass](10000, i => (i, new LargeDummyClass()))
-  }
-
-  test("variable size insertions") {
-    val rand = new Random(123456789)
-    def randString(minLen: Int, maxLen: Int): String = {
-      "a" * (rand.nextInt(maxLen - minLen) + minLen)
-    }
-    testWith[Int, String](10000, i => (i, randString(0, 10)))
-    testWith[Int, String](10000, i => (i, randString(0, 100)))
-    testWith[Int, String](10000, i => (i, randString(90, 100)))
-  }
-
-  test("updates") {
-    val rand = new Random(123456789)
-    def randString(minLen: Int, maxLen: Int): String = {
-      "a" * (rand.nextInt(maxLen - minLen) + minLen)
-    }
-    testWith[String, Int](10000, i => (randString(0, 10000), i))
-  }
-
-  def testWith[K, V](numElements: Int, makeElement: (Int) => (K, V)) {
-    val map = new SizeTrackingAppendOnlyMap[K, V]()
-    for (i <- 0 until numElements) {
-      val (k, v) = makeElement(i)
-      map(k) = v
-      expectWithinError(map, map.estimateSize(), if (i < 32) HIGH_ERROR else NORMAL_ERROR)
-    }
-  }
-
-  def expectWithinError(obj: AnyRef, estimatedSize: Long, error: Double) {
-    val betterEstimatedSize = SizeEstimator.estimate(obj)
-    assert(betterEstimatedSize * (1 - error) < estimatedSize,
-      s"Estimated size $estimatedSize was less than expected size $betterEstimatedSize")
-    assert(betterEstimatedSize * (1 + 2 * error) > estimatedSize,
-      s"Estimated size $estimatedSize was greater than expected size $betterEstimatedSize")
-  }
-}
-
-object SizeTrackingAppendOnlyMapSuite {
-  // Speed test, for reproducibility of results.
-  // These could be highly non-deterministic in general, however.
-  // Results:
-  // AppendOnlyMap:   31 ms
-  // SizeTracker:     54 ms
-  // SizeEstimator: 1500 ms
-  def main(args: Array[String]) {
-    val numElements = 100000
-
-    val baseTimes = for (i <- 0 until 10) yield time {
-      val map = new AppendOnlyMap[Int, LargeDummyClass]()
-      for (i <- 0 until numElements) {
-        map(i) = new LargeDummyClass()
-      }
-    }
-
-    val sampledTimes = for (i <- 0 until 10) yield time {
-      val map = new SizeTrackingAppendOnlyMap[Int, LargeDummyClass]()
-      for (i <- 0 until numElements) {
-        map(i) = new LargeDummyClass()
-        map.estimateSize()
-      }
-    }
-
-    val unsampledTimes = for (i <- 0 until 3) yield time {
-      val map = new AppendOnlyMap[Int, LargeDummyClass]()
-      for (i <- 0 until numElements) {
-        map(i) = new LargeDummyClass()
-        SizeEstimator.estimate(map)
-      }
-    }
-
-    println("Base: " + baseTimes)
-    println("SizeTracker (sampled): " + sampledTimes)
-    println("SizeEstimator (unsampled): " + unsampledTimes)
-  }
-
-  def time(f: => Unit): Long = {
-    val start = System.currentTimeMillis()
-    f
-    System.currentTimeMillis() - start
-  }
-
-  private class LargeDummyClass {
-    val arr = new Array[Int](100)
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 1ee936bc78f49..f9d4bea823f7c 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -20,13 +20,17 @@ package org.apache.spark.util
 import scala.util.Random
 
 import java.io.{File, ByteArrayOutputStream, ByteArrayInputStream, FileOutputStream}
-import java.net.URI
+import java.net.{BindException, ServerSocket, URI}
 import java.nio.{ByteBuffer, ByteOrder}
+import java.text.DecimalFormatSymbols
+import java.util.Locale
 
-import com.google.common.base.Charsets
+import com.google.common.base.Charsets.UTF_8
 import com.google.common.io.Files
 import org.scalatest.FunSuite
 
+import org.apache.spark.SparkConf
+
 class UtilsSuite extends FunSuite {
 
   test("bytesToString") {
@@ -101,22 +105,24 @@ class UtilsSuite extends FunSuite {
     val hour = minute * 60
     def str = Utils.msDurationToString(_)
 
+    val sep = new DecimalFormatSymbols(Locale.getDefault()).getDecimalSeparator()
+
     assert(str(123) === "123 ms")
-    assert(str(second) === "1.0 s")
-    assert(str(second + 462) === "1.5 s")
-    assert(str(hour) === "1.00 h")
-    assert(str(minute) === "1.0 m")
-    assert(str(minute + 4 * second + 34) === "1.1 m")
-    assert(str(10 * hour + minute + 4 * second) === "10.02 h")
-    assert(str(10 * hour + 59 * minute + 59 * second + 999) === "11.00 h")
+    assert(str(second) === "1" + sep + "0 s")
+    assert(str(second + 462) === "1" + sep + "5 s")
+    assert(str(hour) === "1" + sep + "00 h")
+    assert(str(minute) === "1" + sep + "0 m")
+    assert(str(minute + 4 * second + 34) === "1" + sep + "1 m")
+    assert(str(10 * hour + minute + 4 * second) === "10" + sep + "02 h")
+    assert(str(10 * hour + 59 * minute + 59 * second + 999) === "11" + sep + "00 h")
   }
 
   test("reading offset bytes of a file") {
-    val tmpDir2 = Files.createTempDir()
+    val tmpDir2 = Utils.createTempDir()
     tmpDir2.deleteOnExit()
     val f1Path = tmpDir2 + "/f1"
     val f1 = new FileOutputStream(f1Path)
-    f1.write("1\n2\n3\n4\n5\n6\n7\n8\n9\n".getBytes(Charsets.UTF_8))
+    f1.write("1\n2\n3\n4\n5\n6\n7\n8\n9\n".getBytes(UTF_8))
     f1.close()
 
     // Read first few bytes
@@ -141,12 +147,12 @@ class UtilsSuite extends FunSuite {
   }
 
   test("reading offset bytes across multiple files") {
-    val tmpDir = Files.createTempDir()
+    val tmpDir = Utils.createTempDir()
     tmpDir.deleteOnExit()
     val files = (1 to 3).map(i => new File(tmpDir, i.toString))
-    Files.write("0123456789", files(0), Charsets.UTF_8)
-    Files.write("abcdefghij", files(1), Charsets.UTF_8)
-    Files.write("ABCDEFGHIJ", files(2), Charsets.UTF_8)
+    Files.write("0123456789", files(0), UTF_8)
+    Files.write("abcdefghij", files(1), UTF_8)
+    Files.write("ABCDEFGHIJ", files(2), UTF_8)
 
     // Read first few bytes in the 1st file
     assert(Utils.offsetBytes(files, 0, 5) === "01234")
@@ -189,24 +195,40 @@ class UtilsSuite extends FunSuite {
     assert(Utils.getIteratorSize(iterator) === 5L)
   }
 
-  test("findOldFiles") {
+  test("doesDirectoryContainFilesNewerThan") {
     // create some temporary directories and files
     val parent: File = Utils.createTempDir()
     val child1: File = Utils.createTempDir(parent.getCanonicalPath) // The parent directory has two child directories
     val child2: File = Utils.createTempDir(parent.getCanonicalPath)
-    // set the last modified time of child1 to 10 secs old
-    child1.setLastModified(System.currentTimeMillis() - (1000 * 10))
+    val child3: File = Utils.createTempDir(child1.getCanonicalPath)
+    // set the last modified time of child1 to 30 secs old
+    child1.setLastModified(System.currentTimeMillis() - (1000 * 30))
+
+    // although child1 is old, child2 is still new so return true
+    assert(Utils.doesDirectoryContainAnyNewFiles(parent, 5)) 
+
+    child2.setLastModified(System.currentTimeMillis - (1000 * 30))
+    assert(Utils.doesDirectoryContainAnyNewFiles(parent, 5)) 
+
+    parent.setLastModified(System.currentTimeMillis - (1000 * 30))
+    // although parent and its immediate children are new, child3 is still old
+    // we expect a full recursive search for new files.
+    assert(Utils.doesDirectoryContainAnyNewFiles(parent, 5)) 
 
-    val result = Utils.findOldFiles(parent, 5) // find files older than 5 secs
-    assert(result.size.equals(1))
-    assert(result(0).getCanonicalPath.equals(child1.getCanonicalPath))
+    child3.setLastModified(System.currentTimeMillis - (1000 * 30))
+    assert(!Utils.doesDirectoryContainAnyNewFiles(parent, 5)) 
   }
 
   test("resolveURI") {
     def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = {
-      assume(before.split(",").length == 1)
-      assert(Utils.resolveURI(before, testWindows) === new URI(after))
-      assert(Utils.resolveURI(after, testWindows) === new URI(after))
+      // This should test only single paths
+      assume(before.split(",").length === 1)
+      // Repeated invocations of resolveURI should yield the same result
+      def resolve(uri: String): String = Utils.resolveURI(uri, testWindows).toString
+      assert(resolve(after) === after)
+      assert(resolve(resolve(after)) === after)
+      assert(resolve(resolve(resolve(after))) === after)
+      // Also test resolveURIs with single paths
       assert(new URI(Utils.resolveURIs(before, testWindows)) === new URI(after))
       assert(new URI(Utils.resolveURIs(after, testWindows)) === new URI(after))
     }
@@ -222,16 +244,27 @@ class UtilsSuite extends FunSuite {
     assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt", testWindows = true)
     intercept[IllegalArgumentException] { Utils.resolveURI("file:foo") }
     intercept[IllegalArgumentException] { Utils.resolveURI("file:foo:baby") }
+  }
 
-    // Test resolving comma-delimited paths
-    assert(Utils.resolveURIs("jar1,jar2") === s"file:$cwd/jar1,file:$cwd/jar2")
-    assert(Utils.resolveURIs("file:/jar1,file:/jar2") === "file:/jar1,file:/jar2")
-    assert(Utils.resolveURIs("hdfs:/jar1,file:/jar2,jar3") ===
-      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3")
-    assert(Utils.resolveURIs("hdfs:/jar1,file:/jar2,jar3,jar4#jar5") ===
+  test("resolveURIs with multiple paths") {
+    def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = {
+      assume(before.split(",").length > 1)
+      assert(Utils.resolveURIs(before, testWindows) === after)
+      assert(Utils.resolveURIs(after, testWindows) === after)
+      // Repeated invocations of resolveURIs should yield the same result
+      def resolve(uri: String): String = Utils.resolveURIs(uri, testWindows)
+      assert(resolve(after) === after)
+      assert(resolve(resolve(after)) === after)
+      assert(resolve(resolve(resolve(after))) === after)
+    }
+    val cwd = System.getProperty("user.dir")
+    assertResolves("jar1,jar2", s"file:$cwd/jar1,file:$cwd/jar2")
+    assertResolves("file:/jar1,file:/jar2", "file:/jar1,file:/jar2")
+    assertResolves("hdfs:/jar1,file:/jar2,jar3", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3")
+    assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5",
       s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4#jar5")
-    assert(Utils.resolveURIs("hdfs:/jar1,file:/jar2,jar3,C:\\pi.py#py.pi", testWindows = true) ===
-      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi")
+    assertResolves("hdfs:/jar1,file:/jar2,jar3,C:\\pi.py#py.pi",
+      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi", testWindows = true)
   }
 
   test("nonLocalPaths") {
@@ -265,4 +298,87 @@ class UtilsSuite extends FunSuite {
       Array("hdfs:/a.jar", "s3:/another.jar"))
   }
 
+  test("isBindCollision") {
+    // Negatives
+    assert(!Utils.isBindCollision(null))
+    assert(!Utils.isBindCollision(new Exception))
+    assert(!Utils.isBindCollision(new Exception(new Exception)))
+    assert(!Utils.isBindCollision(new Exception(new BindException)))
+
+    // Positives
+    val be = new BindException("Random Message")
+    val be1 = new Exception(new BindException("Random Message"))
+    val be2 = new Exception(new Exception(new BindException("Random Message")))
+    assert(Utils.isBindCollision(be))
+    assert(Utils.isBindCollision(be1))
+    assert(Utils.isBindCollision(be2))
+
+    // Actual bind exception
+    var server1: ServerSocket = null
+    var server2: ServerSocket = null
+    try {
+      server1 = new java.net.ServerSocket(0)
+      server2 = new java.net.ServerSocket(server1.getLocalPort)
+    } catch {
+      case e: Exception =>
+        assert(e.isInstanceOf[java.net.BindException])
+        assert(Utils.isBindCollision(e))
+    } finally {
+      Option(server1).foreach(_.close())
+      Option(server2).foreach(_.close())
+    }
+  }
+
+  test("deleteRecursively") {
+    val tempDir1 = Utils.createTempDir()
+    assert(tempDir1.exists())
+    Utils.deleteRecursively(tempDir1)
+    assert(!tempDir1.exists())
+
+    val tempDir2 = Utils.createTempDir()
+    val tempFile1 = new File(tempDir2, "foo.txt")
+    Files.touch(tempFile1)
+    assert(tempFile1.exists())
+    Utils.deleteRecursively(tempFile1)
+    assert(!tempFile1.exists())
+
+    val tempDir3 = new File(tempDir2, "subdir")
+    assert(tempDir3.mkdir())
+    val tempFile2 = new File(tempDir3, "bar.txt")
+    Files.touch(tempFile2)
+    assert(tempFile2.exists())
+    Utils.deleteRecursively(tempDir2)
+    assert(!tempDir2.exists())
+    assert(!tempDir3.exists())
+    assert(!tempFile2.exists())
+  }
+
+  test("loading properties from file") {
+    val outFile = File.createTempFile("test-load-spark-properties", "test")
+    try {
+      System.setProperty("spark.test.fileNameLoadB", "2")
+      Files.write("spark.test.fileNameLoadA true\n" +
+        "spark.test.fileNameLoadB 1\n", outFile, UTF_8)
+      val properties = Utils.getPropertiesFromFile(outFile.getAbsolutePath)
+      properties
+        .filter { case (k, v) => k.startsWith("spark.")}
+        .foreach { case (k, v) => sys.props.getOrElseUpdate(k, v)}
+      val sparkConf = new SparkConf
+      assert(sparkConf.getBoolean("spark.test.fileNameLoadA", false) === true)
+      assert(sparkConf.getInt("spark.test.fileNameLoadB", 1) === 2)
+    } finally {
+      outFile.delete()
+    }
+  }
+
+  test("timeIt with prepare") {
+    var cnt = 0
+    val prepare = () => {
+      cnt += 1
+      Thread.sleep(1000)
+    }
+    val time = Utils.timeIt(2)({}, Some(prepare))
+    require(cnt === 2, "prepare should be called twice")
+    require(time < 500, "preparation time should not count")
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/VectorSuite.scala b/core/src/test/scala/org/apache/spark/util/VectorSuite.scala
index 7006571ef0ef6..794a55d61750b 100644
--- a/core/src/test/scala/org/apache/spark/util/VectorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/VectorSuite.scala
@@ -24,6 +24,7 @@ import org.scalatest.FunSuite
 /**
  * Tests org.apache.spark.util.Vector functionality
  */
+@deprecated("suppress compile time deprecation warning", "1.0.0")
 class VectorSuite extends FunSuite {
 
   def verifyVector(vector: Vector, expectedLength: Int) = {
diff --git a/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
index 52c7288e18b69..cb99d14b27af4 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
@@ -170,10 +170,10 @@ class AppendOnlyMapSuite extends FunSuite {
       case e: IllegalStateException => fail()
     }
 
-    val it = map.destructiveSortedIterator(new Comparator[(String, String)] {
-      def compare(kv1: (String, String), kv2: (String, String)): Int = {
-        val x = if (kv1 != null && kv1._1 != null) kv1._1.toInt else Int.MinValue
-        val y = if (kv2 != null && kv2._1 != null) kv2._1.toInt else Int.MinValue
+    val it = map.destructiveSortedIterator(new Comparator[String] {
+      def compare(key1: String, key2: String): Int = {
+        val x = if (key1 != null) key1.toInt else Int.MinValue
+        val y = if (key2 != null) key2.toInt else Int.MinValue
         x.compareTo(y)
       }
     })
diff --git a/core/src/test/scala/org/apache/spark/util/collection/CompactBufferSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/CompactBufferSuite.scala
new file mode 100644
index 0000000000000..6c956d93dc80d
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/collection/CompactBufferSuite.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import org.scalatest.FunSuite
+
+class CompactBufferSuite extends FunSuite {
+  test("empty buffer") {
+    val b = new CompactBuffer[Int]
+    assert(b.size === 0)
+    assert(b.iterator.toList === Nil)
+    assert(b.size === 0)
+    assert(b.iterator.toList === Nil)
+    intercept[IndexOutOfBoundsException] { b(0) }
+    intercept[IndexOutOfBoundsException] { b(1) }
+    intercept[IndexOutOfBoundsException] { b(2) }
+    intercept[IndexOutOfBoundsException] { b(-1) }
+  }
+
+  test("basic inserts") {
+    val b = new CompactBuffer[Int]
+    assert(b.size === 0)
+    assert(b.iterator.toList === Nil)
+    for (i <- 0 until 1000) {
+      b += i
+      assert(b.size === i + 1)
+      assert(b(i) === i)
+    }
+    assert(b.iterator.toList === (0 until 1000).toList)
+    assert(b.iterator.toList === (0 until 1000).toList)
+    assert(b.size === 1000)
+  }
+
+  test("adding sequences") {
+    val b = new CompactBuffer[Int]
+    assert(b.size === 0)
+    assert(b.iterator.toList === Nil)
+
+    // Add some simple lists and iterators
+    b ++= List(0)
+    assert(b.size === 1)
+    assert(b.iterator.toList === List(0))
+    b ++= Iterator(1)
+    assert(b.size === 2)
+    assert(b.iterator.toList === List(0, 1))
+    b ++= List(2)
+    assert(b.size === 3)
+    assert(b.iterator.toList === List(0, 1, 2))
+    b ++= Iterator(3, 4, 5, 6, 7, 8, 9)
+    assert(b.size === 10)
+    assert(b.iterator.toList === (0 until 10).toList)
+
+    // Add CompactBuffers
+    val b2 = new CompactBuffer[Int]
+    b2 ++= 0 until 10
+    b ++= b2
+    assert(b.iterator.toList === (1 to 2).flatMap(i => 0 until 10).toList)
+    b ++= b2
+    assert(b.iterator.toList === (1 to 3).flatMap(i => 0 until 10).toList)
+    b ++= b2
+    assert(b.iterator.toList === (1 to 4).flatMap(i => 0 until 10).toList)
+
+    // Add some small CompactBuffers as well
+    val b3 = new CompactBuffer[Int]
+    b ++= b3
+    assert(b.iterator.toList === (1 to 4).flatMap(i => 0 until 10).toList)
+    b3 += 0
+    b ++= b3
+    assert(b.iterator.toList === (1 to 4).flatMap(i => 0 until 10).toList ++ List(0))
+    b3 += 1
+    b ++= b3
+    assert(b.iterator.toList === (1 to 4).flatMap(i => 0 until 10).toList ++ List(0, 0, 1))
+    b3 += 2
+    b ++= b3
+    assert(b.iterator.toList === (1 to 4).flatMap(i => 0 until 10).toList ++ List(0, 0, 1, 0, 1, 2))
+  }
+
+  test("adding the same buffer to itself") {
+    val b = new CompactBuffer[Int]
+    assert(b.size === 0)
+    assert(b.iterator.toList === Nil)
+    b += 1
+    assert(b.toList === List(1))
+    for (j <- 1 until 8) {
+      b ++= b
+      assert(b.size === (1 << j))
+      assert(b.iterator.toList === (1 to (1 << j)).map(i => 1).toList)
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index 428822949c085..511d76c9144cc 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -23,26 +23,43 @@ import org.scalatest.FunSuite
 
 import org.apache.spark._
 import org.apache.spark.SparkContext._
+import org.apache.spark.io.CompressionCodec
 
 class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
-
-  private def createCombiner(i: Int) = ArrayBuffer[Int](i)
-  private def mergeValue(buffer: ArrayBuffer[Int], i: Int) = buffer += i
-  private def mergeCombiners(buf1: ArrayBuffer[Int], buf2: ArrayBuffer[Int]) = buf1 ++= buf2
+  private val allCompressionCodecs = CompressionCodec.ALL_COMPRESSION_CODECS
+  private def createCombiner[T](i: T) = ArrayBuffer[T](i)
+  private def mergeValue[T](buffer: ArrayBuffer[T], i: T): ArrayBuffer[T] = buffer += i
+  private def mergeCombiners[T](buf1: ArrayBuffer[T], buf2: ArrayBuffer[T]): ArrayBuffer[T] =
+    buf1 ++= buf2
+
+  private def createExternalMap[T] = new ExternalAppendOnlyMap[T, T, ArrayBuffer[T]](
+    createCombiner[T], mergeValue[T], mergeCombiners[T])
+
+  private def createSparkConf(loadDefaults: Boolean, codec: Option[String] = None): SparkConf = {
+    val conf = new SparkConf(loadDefaults)
+    // Make the Java serializer write a reset instruction (TC_RESET) after each object to test
+    // for a bug we had with bytes written past the last object in a batch (SPARK-2792)
+    conf.set("spark.serializer.objectStreamReset", "1")
+    conf.set("spark.serializer", "org.apache.spark.serializer.JavaSerializer")
+    conf.set("spark.shuffle.spill.compress", codec.isDefined.toString)
+    conf.set("spark.shuffle.compress", codec.isDefined.toString)
+    codec.foreach { c => conf.set("spark.io.compression.codec", c) }
+    // Ensure that we actually have multiple batches per spill file
+    conf.set("spark.shuffle.spill.batchSize", "10")
+    conf
+  }
 
   test("simple insert") {
-    val conf = new SparkConf(false)
+    val conf = createSparkConf(loadDefaults = false)
     sc = new SparkContext("local", "test", conf)
-
-    val map = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
-      mergeValue, mergeCombiners)
+    val map = createExternalMap[Int]
 
     // Single insert
     map.insert(1, 10)
     var it = map.iterator
     assert(it.hasNext)
     val kv = it.next()
-    assert(kv._1 == 1 && kv._2 == ArrayBuffer[Int](10))
+    assert(kv._1 === 1 && kv._2 === ArrayBuffer[Int](10))
     assert(!it.hasNext)
 
     // Multiple insert
@@ -50,52 +67,50 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
     map.insert(3, 30)
     it = map.iterator
     assert(it.hasNext)
-    assert(it.toSet == Set[(Int, ArrayBuffer[Int])](
+    assert(it.toSet === Set[(Int, ArrayBuffer[Int])](
       (1, ArrayBuffer[Int](10)),
       (2, ArrayBuffer[Int](20)),
       (3, ArrayBuffer[Int](30))))
+    sc.stop()
   }
 
   test("insert with collision") {
-    val conf = new SparkConf(false)
+    val conf = createSparkConf(loadDefaults = false)
     sc = new SparkContext("local", "test", conf)
-
-    val map = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
-      mergeValue, mergeCombiners)
-
-    map.insert(1, 10)
-    map.insert(2, 20)
-    map.insert(3, 30)
-    map.insert(1, 100)
-    map.insert(2, 200)
-    map.insert(1, 1000)
+    val map = createExternalMap[Int]
+
+    map.insertAll(Seq(
+      (1, 10),
+      (2, 20),
+      (3, 30),
+      (1, 100),
+      (2, 200),
+      (1, 1000)))
     val it = map.iterator
     assert(it.hasNext)
     val result = it.toSet[(Int, ArrayBuffer[Int])].map(kv => (kv._1, kv._2.toSet))
-    assert(result == Set[(Int, Set[Int])](
+    assert(result === Set[(Int, Set[Int])](
       (1, Set[Int](10, 100, 1000)),
       (2, Set[Int](20, 200)),
       (3, Set[Int](30))))
+    sc.stop()
   }
 
   test("ordering") {
-    val conf = new SparkConf(false)
+    val conf = createSparkConf(loadDefaults = false)
     sc = new SparkContext("local", "test", conf)
 
-    val map1 = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
-      mergeValue, mergeCombiners)
+    val map1 = createExternalMap[Int]
     map1.insert(1, 10)
     map1.insert(2, 20)
     map1.insert(3, 30)
 
-    val map2 = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
-      mergeValue, mergeCombiners)
+    val map2 = createExternalMap[Int]
     map2.insert(2, 20)
     map2.insert(3, 30)
     map2.insert(1, 10)
 
-    val map3 = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
-      mergeValue, mergeCombiners)
+    val map3 = createExternalMap[Int]
     map3.insert(3, 30)
     map3.insert(1, 10)
     map3.insert(2, 20)
@@ -107,33 +122,33 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
     var kv1 = it1.next()
     var kv2 = it2.next()
     var kv3 = it3.next()
-    assert(kv1._1 == kv2._1 && kv2._1 == kv3._1)
-    assert(kv1._2 == kv2._2 && kv2._2 == kv3._2)
+    assert(kv1._1 === kv2._1 && kv2._1 === kv3._1)
+    assert(kv1._2 === kv2._2 && kv2._2 === kv3._2)
 
     kv1 = it1.next()
     kv2 = it2.next()
     kv3 = it3.next()
-    assert(kv1._1 == kv2._1 && kv2._1 == kv3._1)
-    assert(kv1._2 == kv2._2 && kv2._2 == kv3._2)
+    assert(kv1._1 === kv2._1 && kv2._1 === kv3._1)
+    assert(kv1._2 === kv2._2 && kv2._2 === kv3._2)
 
     kv1 = it1.next()
     kv2 = it2.next()
     kv3 = it3.next()
-    assert(kv1._1 == kv2._1 && kv2._1 == kv3._1)
-    assert(kv1._2 == kv2._2 && kv2._2 == kv3._2)
+    assert(kv1._1 === kv2._1 && kv2._1 === kv3._1)
+    assert(kv1._2 === kv2._2 && kv2._2 === kv3._2)
+    sc.stop()
   }
 
   test("null keys and values") {
-    val conf = new SparkConf(false)
+    val conf = createSparkConf(loadDefaults = false)
     sc = new SparkContext("local", "test", conf)
 
-    val map = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
-      mergeValue, mergeCombiners)
+    val map = createExternalMap[Int]
     map.insert(1, 5)
     map.insert(2, 6)
     map.insert(3, 7)
     assert(map.size === 3)
-    assert(map.iterator.toSet == Set[(Int, Seq[Int])](
+    assert(map.iterator.toSet === Set[(Int, Seq[Int])](
       (1, Seq[Int](5)),
       (2, Seq[Int](6)),
       (3, Seq[Int](7))
@@ -143,7 +158,7 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
     val nullInt = null.asInstanceOf[Int]
     map.insert(nullInt, 8)
     assert(map.size === 4)
-    assert(map.iterator.toSet == Set[(Int, Seq[Int])](
+    assert(map.iterator.toSet === Set[(Int, Seq[Int])](
       (1, Seq[Int](5)),
       (2, Seq[Int](6)),
       (3, Seq[Int](7)),
@@ -155,32 +170,34 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
     map.insert(nullInt, nullInt)
     assert(map.size === 5)
     val result = map.iterator.toSet[(Int, ArrayBuffer[Int])].map(kv => (kv._1, kv._2.toSet))
-    assert(result == Set[(Int, Set[Int])](
+    assert(result === Set[(Int, Set[Int])](
       (1, Set[Int](5)),
       (2, Set[Int](6)),
       (3, Set[Int](7)),
       (4, Set[Int](nullInt)),
       (nullInt, Set[Int](nullInt, 8))
     ))
+    sc.stop()
   }
 
   test("simple aggregator") {
-    val conf = new SparkConf(false)
+    val conf = createSparkConf(loadDefaults = false)
     sc = new SparkContext("local", "test", conf)
 
     // reduceByKey
     val rdd = sc.parallelize(1 to 10).map(i => (i%2, 1))
     val result1 = rdd.reduceByKey(_+_).collect()
-    assert(result1.toSet == Set[(Int, Int)]((0, 5), (1, 5)))
+    assert(result1.toSet === Set[(Int, Int)]((0, 5), (1, 5)))
 
     // groupByKey
     val result2 = rdd.groupByKey().collect().map(x => (x._1, x._2.toList)).toSet
-    assert(result2.toSet == Set[(Int, Seq[Int])]
+    assert(result2.toSet === Set[(Int, Seq[Int])]
       ((0, List[Int](1, 1, 1, 1, 1)), (1, List[Int](1, 1, 1, 1, 1))))
+    sc.stop()
   }
 
   test("simple cogroup") {
-    val conf = new SparkConf(false)
+    val conf = createSparkConf(loadDefaults = false)
     sc = new SparkContext("local", "test", conf)
     val rdd1 = sc.parallelize(1 to 4).map(i => (i, i))
     val rdd2 = sc.parallelize(1 to 4).map(i => (i%2, i))
@@ -188,79 +205,98 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
 
     result.foreach { case (i, (seq1, seq2)) =>
       i match {
-        case 0 => assert(seq1.toSet == Set[Int]() && seq2.toSet == Set[Int](2, 4))
-        case 1 => assert(seq1.toSet == Set[Int](1) && seq2.toSet == Set[Int](1, 3))
-        case 2 => assert(seq1.toSet == Set[Int](2) && seq2.toSet == Set[Int]())
-        case 3 => assert(seq1.toSet == Set[Int](3) && seq2.toSet == Set[Int]())
-        case 4 => assert(seq1.toSet == Set[Int](4) && seq2.toSet == Set[Int]())
+        case 0 => assert(seq1.toSet === Set[Int]() && seq2.toSet === Set[Int](2, 4))
+        case 1 => assert(seq1.toSet === Set[Int](1) && seq2.toSet === Set[Int](1, 3))
+        case 2 => assert(seq1.toSet === Set[Int](2) && seq2.toSet === Set[Int]())
+        case 3 => assert(seq1.toSet === Set[Int](3) && seq2.toSet === Set[Int]())
+        case 4 => assert(seq1.toSet === Set[Int](4) && seq2.toSet === Set[Int]())
       }
     }
+    sc.stop()
   }
 
   test("spilling") {
-    val conf = new SparkConf(true)  // Load defaults, otherwise SPARK_HOME is not found
+    testSimpleSpilling()
+  }
+
+  test("spilling with compression") {
+    // Keep track of which compression codec we're using to report in test failure messages
+    var lastCompressionCodec: Option[String] = None
+    try {
+      allCompressionCodecs.foreach { c =>
+        lastCompressionCodec = Some(c)
+        testSimpleSpilling(Some(c))
+      }
+    } catch {
+      // Include compression codec used in test failure message
+      // We need to catch Throwable here because assertion failures are not covered by Exceptions
+      case t: Throwable =>
+        val compressionMessage = lastCompressionCodec
+          .map { c => "with compression using codec " + c }
+          .getOrElse("without compression")
+        val newException = new Exception(s"Test failed $compressionMessage:\n\n${t.getMessage}")
+        newException.setStackTrace(t.getStackTrace)
+        throw newException
+    }
+  }
+
+  /**
+   * Test spilling through simple aggregations and cogroups.
+   * If a compression codec is provided, use it. Otherwise, do not compress spills.
+   */
+  private def testSimpleSpilling(codec: Option[String] = None): Unit = {
+    val conf = createSparkConf(loadDefaults = true, codec)  // Load defaults for Spark home
     conf.set("spark.shuffle.memoryFraction", "0.001")
     sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
 
     // reduceByKey - should spill ~8 times
     val rddA = sc.parallelize(0 until 100000).map(i => (i/2, i))
     val resultA = rddA.reduceByKey(math.max).collect()
-    assert(resultA.length == 50000)
-    resultA.foreach { case(k, v) =>
-      k match {
-        case 0 => assert(v == 1)
-        case 25000 => assert(v == 50001)
-        case 49999 => assert(v == 99999)
-        case _ =>
-      }
+    assert(resultA.length === 50000)
+    resultA.foreach { case (k, v) =>
+      assert(v === k * 2 + 1, s"Value for $k was wrong: expected ${k * 2 + 1}, got $v")
     }
 
     // groupByKey - should spill ~17 times
     val rddB = sc.parallelize(0 until 100000).map(i => (i/4, i))
     val resultB = rddB.groupByKey().collect()
-    assert(resultB.length == 25000)
-    resultB.foreach { case(i, seq) =>
-      i match {
-        case 0 => assert(seq.toSet == Set[Int](0, 1, 2, 3))
-        case 12500 => assert(seq.toSet == Set[Int](50000, 50001, 50002, 50003))
-        case 24999 => assert(seq.toSet == Set[Int](99996, 99997, 99998, 99999))
-        case _ =>
-      }
+    assert(resultB.length === 25000)
+    resultB.foreach { case (i, seq) =>
+      val expected = Set(i * 4, i * 4 + 1, i * 4 + 2, i * 4 + 3)
+      assert(seq.toSet === expected,
+        s"Value for $i was wrong: expected $expected, got ${seq.toSet}")
     }
 
     // cogroup - should spill ~7 times
     val rddC1 = sc.parallelize(0 until 10000).map(i => (i, i))
     val rddC2 = sc.parallelize(0 until 10000).map(i => (i%1000, i))
     val resultC = rddC1.cogroup(rddC2).collect()
-    assert(resultC.length == 10000)
-    resultC.foreach { case(i, (seq1, seq2)) =>
+    assert(resultC.length === 10000)
+    resultC.foreach { case (i, (seq1, seq2)) =>
       i match {
         case 0 =>
-          assert(seq1.toSet == Set[Int](0))
-          assert(seq2.toSet == Set[Int](0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000))
+          assert(seq1.toSet === Set[Int](0))
+          assert(seq2.toSet === Set[Int](0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000))
+        case 1 =>
+          assert(seq1.toSet === Set[Int](1))
+          assert(seq2.toSet === Set[Int](1, 1001, 2001, 3001, 4001, 5001, 6001, 7001, 8001, 9001))
         case 5000 =>
-          assert(seq1.toSet == Set[Int](5000))
-          assert(seq2.toSet == Set[Int]())
+          assert(seq1.toSet === Set[Int](5000))
+          assert(seq2.toSet === Set[Int]())
         case 9999 =>
-          assert(seq1.toSet == Set[Int](9999))
-          assert(seq2.toSet == Set[Int]())
+          assert(seq1.toSet === Set[Int](9999))
+          assert(seq2.toSet === Set[Int]())
         case _ =>
       }
     }
+    sc.stop()
   }
 
   test("spilling with hash collisions") {
-    val conf = new SparkConf(true)
+    val conf = createSparkConf(loadDefaults = true)
     conf.set("spark.shuffle.memoryFraction", "0.001")
     sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
-
-    def createCombiner(i: String) = ArrayBuffer[String](i)
-    def mergeValue(buffer: ArrayBuffer[String], i: String) = buffer += i
-    def mergeCombiners(buffer1: ArrayBuffer[String], buffer2: ArrayBuffer[String]) =
-      buffer1 ++= buffer2
-
-    val map = new ExternalAppendOnlyMap[String, String, ArrayBuffer[String]](
-      createCombiner, mergeValue, mergeCombiners)
+    val map = createExternalMap[String]
 
     val collisionPairs = Seq(
       ("Aa", "BB"),                   // 2112
@@ -282,7 +318,7 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
       assert(w1.hashCode === w2.hashCode)
     }
 
-    (1 to 100000).map(_.toString).foreach { i => map.insert(i, i) }
+    map.insertAll((1 to 100000).iterator.map(_.toString).map(i => (i, i)))
     collisionPairs.foreach { case (w1, w2) =>
       map.insert(w1, w2)
       map.insert(w2, w1)
@@ -302,13 +338,13 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
       count += 1
     }
     assert(count === 100000 + collisionPairs.size * 2)
+    sc.stop()
   }
 
   test("spilling with many hash collisions") {
-    val conf = new SparkConf(true)
+    val conf = createSparkConf(loadDefaults = true)
     conf.set("spark.shuffle.memoryFraction", "0.0001")
     sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
-
     val map = new ExternalAppendOnlyMap[FixedHashObject, Int, Int](_ => 1, _ + _, _ + _)
 
     // Insert 10 copies each of lots of objects whose hash codes are either 0 or 1. This causes
@@ -327,15 +363,14 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
       count += 1
     }
     assert(count === 10000)
+    sc.stop()
   }
 
   test("spilling with hash collisions using the Int.MaxValue key") {
-    val conf = new SparkConf(true)
+    val conf = createSparkConf(loadDefaults = true)
     conf.set("spark.shuffle.memoryFraction", "0.001")
     sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
-
-    val map = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](
-      createCombiner, mergeValue, mergeCombiners)
+    val map = createExternalMap[Int]
 
     (1 to 100000).foreach { i => map.insert(i, i) }
     map.insert(Int.MaxValue, Int.MaxValue)
@@ -345,17 +380,16 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
       // Should not throw NoSuchElementException
       it.next()
     }
+    sc.stop()
   }
 
   test("spilling with null keys and values") {
-    val conf = new SparkConf(true)
+    val conf = createSparkConf(loadDefaults = true)
     conf.set("spark.shuffle.memoryFraction", "0.001")
     sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
+    val map = createExternalMap[Int]
 
-    val map = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](
-      createCombiner, mergeValue, mergeCombiners)
-
-    (1 to 100000).foreach { i => map.insert(i, i) }
+    map.insertAll((1 to 100000).iterator.map(i => (i, i)))
     map.insert(null.asInstanceOf[Int], 1)
     map.insert(1, null.asInstanceOf[Int])
     map.insert(null.asInstanceOf[Int], null.asInstanceOf[Int])
@@ -365,13 +399,7 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
       // Should not throw NullPointerException
       it.next()
     }
+    sc.stop()
   }
 
 }
-
-/**
- * A dummy class that always returns the same hash code, to easily test hash collisions
- */
-case class FixedHashObject(v: Int, h: Int) extends Serializable {
-  override def hashCode(): Int = h
-}
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
new file mode 100644
index 0000000000000..f26e40fbd4b36
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -0,0 +1,765 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.{PrivateMethodTester, FunSuite}
+
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+
+import scala.util.Random
+
+class ExternalSorterSuite extends FunSuite with LocalSparkContext with PrivateMethodTester {
+  private def createSparkConf(loadDefaults: Boolean): SparkConf = {
+    val conf = new SparkConf(loadDefaults)
+    // Make the Java serializer write a reset instruction (TC_RESET) after each object to test
+    // for a bug we had with bytes written past the last object in a batch (SPARK-2792)
+    conf.set("spark.serializer.objectStreamReset", "1")
+    conf.set("spark.serializer", "org.apache.spark.serializer.JavaSerializer")
+    // Ensure that we actually have multiple batches per spill file
+    conf.set("spark.shuffle.spill.batchSize", "10")
+    conf
+  }
+
+  private def assertBypassedMergeSort(sorter: ExternalSorter[_, _, _]): Unit = {
+    val bypassMergeSort = PrivateMethod[Boolean]('bypassMergeSort)
+    assert(sorter.invokePrivate(bypassMergeSort()), "sorter did not bypass merge-sort")
+  }
+
+  private def assertDidNotBypassMergeSort(sorter: ExternalSorter[_, _, _]): Unit = {
+    val bypassMergeSort = PrivateMethod[Boolean]('bypassMergeSort)
+    assert(!sorter.invokePrivate(bypassMergeSort()), "sorter bypassed merge-sort")
+  }
+
+  test("empty data stream") {
+    val conf = new SparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+
+    val agg = new Aggregator[Int, Int, Int](i => i, (i, j) => i + j, (i, j) => i + j)
+    val ord = implicitly[Ordering[Int]]
+
+    // Both aggregator and ordering
+    val sorter = new ExternalSorter[Int, Int, Int](
+      Some(agg), Some(new HashPartitioner(3)), Some(ord), None)
+    assert(sorter.iterator.toSeq === Seq())
+    sorter.stop()
+
+    // Only aggregator
+    val sorter2 = new ExternalSorter[Int, Int, Int](
+      Some(agg), Some(new HashPartitioner(3)), None, None)
+    assert(sorter2.iterator.toSeq === Seq())
+    sorter2.stop()
+
+    // Only ordering
+    val sorter3 = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(3)), Some(ord), None)
+    assert(sorter3.iterator.toSeq === Seq())
+    sorter3.stop()
+
+    // Neither aggregator nor ordering
+    val sorter4 = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(3)), None, None)
+    assert(sorter4.iterator.toSeq === Seq())
+    sorter4.stop()
+  }
+
+  test("few elements per partition") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+
+    val agg = new Aggregator[Int, Int, Int](i => i, (i, j) => i + j, (i, j) => i + j)
+    val ord = implicitly[Ordering[Int]]
+    val elements = Set((1, 1), (2, 2), (5, 5))
+    val expected = Set(
+      (0, Set()), (1, Set((1, 1))), (2, Set((2, 2))), (3, Set()), (4, Set()),
+      (5, Set((5, 5))), (6, Set()))
+
+    // Both aggregator and ordering
+    val sorter = new ExternalSorter[Int, Int, Int](
+      Some(agg), Some(new HashPartitioner(7)), Some(ord), None)
+    sorter.insertAll(elements.iterator)
+    assert(sorter.partitionedIterator.map(p => (p._1, p._2.toSet)).toSet === expected)
+    sorter.stop()
+
+    // Only aggregator
+    val sorter2 = new ExternalSorter[Int, Int, Int](
+      Some(agg), Some(new HashPartitioner(7)), None, None)
+    sorter2.insertAll(elements.iterator)
+    assert(sorter2.partitionedIterator.map(p => (p._1, p._2.toSet)).toSet === expected)
+    sorter2.stop()
+
+    // Only ordering
+    val sorter3 = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(7)), Some(ord), None)
+    sorter3.insertAll(elements.iterator)
+    assert(sorter3.partitionedIterator.map(p => (p._1, p._2.toSet)).toSet === expected)
+    sorter3.stop()
+
+    // Neither aggregator nor ordering
+    val sorter4 = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(7)), None, None)
+    sorter4.insertAll(elements.iterator)
+    assert(sorter4.partitionedIterator.map(p => (p._1, p._2.toSet)).toSet === expected)
+    sorter4.stop()
+  }
+
+  test("empty partitions with spilling") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+
+    val ord = implicitly[Ordering[Int]]
+    val elements = Iterator((1, 1), (5, 5)) ++ (0 until 100000).iterator.map(x => (2, 2))
+
+    val sorter = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(7)), Some(ord), None)
+    assertDidNotBypassMergeSort(sorter)
+    sorter.insertAll(elements)
+    assert(sc.env.blockManager.diskBlockManager.getAllFiles().length > 0) // Make sure it spilled
+    val iter = sorter.partitionedIterator.map(p => (p._1, p._2.toList))
+    assert(iter.next() === (0, Nil))
+    assert(iter.next() === (1, List((1, 1))))
+    assert(iter.next() === (2, (0 until 100000).map(x => (2, 2)).toList))
+    assert(iter.next() === (3, Nil))
+    assert(iter.next() === (4, Nil))
+    assert(iter.next() === (5, List((5, 5))))
+    assert(iter.next() === (6, Nil))
+    sorter.stop()
+  }
+
+  test("empty partitions with spilling, bypass merge-sort") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+
+    val elements = Iterator((1, 1), (5, 5)) ++ (0 until 100000).iterator.map(x => (2, 2))
+
+    val sorter = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(7)), None, None)
+    assertBypassedMergeSort(sorter)
+    sorter.insertAll(elements)
+    assert(sc.env.blockManager.diskBlockManager.getAllFiles().length > 0) // Make sure it spilled
+    val iter = sorter.partitionedIterator.map(p => (p._1, p._2.toList))
+    assert(iter.next() === (0, Nil))
+    assert(iter.next() === (1, List((1, 1))))
+    assert(iter.next() === (2, (0 until 100000).map(x => (2, 2)).toList))
+    assert(iter.next() === (3, Nil))
+    assert(iter.next() === (4, Nil))
+    assert(iter.next() === (5, List((5, 5))))
+    assert(iter.next() === (6, Nil))
+    sorter.stop()
+  }
+
+  test("spilling in local cluster") {
+    val conf = createSparkConf(true)  // Load defaults, otherwise SPARK_HOME is not found
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
+
+    // reduceByKey - should spill ~8 times
+    val rddA = sc.parallelize(0 until 100000).map(i => (i/2, i))
+    val resultA = rddA.reduceByKey(math.max).collect()
+    assert(resultA.length == 50000)
+    resultA.foreach { case(k, v) =>
+      if (v != k * 2 + 1) {
+        fail(s"Value for ${k} was wrong: expected ${k * 2 + 1}, got ${v}")
+      }
+    }
+
+    // groupByKey - should spill ~17 times
+    val rddB = sc.parallelize(0 until 100000).map(i => (i/4, i))
+    val resultB = rddB.groupByKey().collect()
+    assert(resultB.length == 25000)
+    resultB.foreach { case(i, seq) =>
+      val expected = Set(i * 4, i * 4 + 1, i * 4 + 2, i * 4 + 3)
+      if (seq.toSet != expected) {
+        fail(s"Value for ${i} was wrong: expected ${expected}, got ${seq.toSet}")
+      }
+    }
+
+    // cogroup - should spill ~7 times
+    val rddC1 = sc.parallelize(0 until 10000).map(i => (i, i))
+    val rddC2 = sc.parallelize(0 until 10000).map(i => (i%1000, i))
+    val resultC = rddC1.cogroup(rddC2).collect()
+    assert(resultC.length == 10000)
+    resultC.foreach { case(i, (seq1, seq2)) =>
+      i match {
+        case 0 =>
+          assert(seq1.toSet == Set[Int](0))
+          assert(seq2.toSet == Set[Int](0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000))
+        case 1 =>
+          assert(seq1.toSet == Set[Int](1))
+          assert(seq2.toSet == Set[Int](1, 1001, 2001, 3001, 4001, 5001, 6001, 7001, 8001, 9001))
+        case 5000 =>
+          assert(seq1.toSet == Set[Int](5000))
+          assert(seq2.toSet == Set[Int]())
+        case 9999 =>
+          assert(seq1.toSet == Set[Int](9999))
+          assert(seq2.toSet == Set[Int]())
+        case _ =>
+      }
+    }
+
+    // larger cogroup - should spill ~7 times
+    val rddD1 = sc.parallelize(0 until 10000).map(i => (i/2, i))
+    val rddD2 = sc.parallelize(0 until 10000).map(i => (i/2, i))
+    val resultD = rddD1.cogroup(rddD2).collect()
+    assert(resultD.length == 5000)
+    resultD.foreach { case(i, (seq1, seq2)) =>
+      val expected = Set(i * 2, i * 2 + 1)
+      if (seq1.toSet != expected) {
+        fail(s"Value 1 for ${i} was wrong: expected ${expected}, got ${seq1.toSet}")
+      }
+      if (seq2.toSet != expected) {
+        fail(s"Value 2 for ${i} was wrong: expected ${expected}, got ${seq2.toSet}")
+      }
+    }
+
+    // sortByKey - should spill ~17 times
+    val rddE = sc.parallelize(0 until 100000).map(i => (i/4, i))
+    val resultE = rddE.sortByKey().collect().toSeq
+    assert(resultE === (0 until 100000).map(i => (i/4, i)).toSeq)
+  }
+
+  test("spilling in local cluster with many reduce tasks") {
+    val conf = createSparkConf(true)  // Load defaults, otherwise SPARK_HOME is not found
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
+
+    // reduceByKey - should spill ~4 times per executor
+    val rddA = sc.parallelize(0 until 100000).map(i => (i/2, i))
+    val resultA = rddA.reduceByKey(math.max _, 100).collect()
+    assert(resultA.length == 50000)
+    resultA.foreach { case(k, v) =>
+      if (v != k * 2 + 1) {
+        fail(s"Value for ${k} was wrong: expected ${k * 2 + 1}, got ${v}")
+      }
+    }
+
+    // groupByKey - should spill ~8 times per executor
+    val rddB = sc.parallelize(0 until 100000).map(i => (i/4, i))
+    val resultB = rddB.groupByKey(100).collect()
+    assert(resultB.length == 25000)
+    resultB.foreach { case(i, seq) =>
+      val expected = Set(i * 4, i * 4 + 1, i * 4 + 2, i * 4 + 3)
+      if (seq.toSet != expected) {
+        fail(s"Value for ${i} was wrong: expected ${expected}, got ${seq.toSet}")
+      }
+    }
+
+    // cogroup - should spill ~4 times per executor
+    val rddC1 = sc.parallelize(0 until 10000).map(i => (i, i))
+    val rddC2 = sc.parallelize(0 until 10000).map(i => (i%1000, i))
+    val resultC = rddC1.cogroup(rddC2, 100).collect()
+    assert(resultC.length == 10000)
+    resultC.foreach { case(i, (seq1, seq2)) =>
+      i match {
+        case 0 =>
+          assert(seq1.toSet == Set[Int](0))
+          assert(seq2.toSet == Set[Int](0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000))
+        case 1 =>
+          assert(seq1.toSet == Set[Int](1))
+          assert(seq2.toSet == Set[Int](1, 1001, 2001, 3001, 4001, 5001, 6001, 7001, 8001, 9001))
+        case 5000 =>
+          assert(seq1.toSet == Set[Int](5000))
+          assert(seq2.toSet == Set[Int]())
+        case 9999 =>
+          assert(seq1.toSet == Set[Int](9999))
+          assert(seq2.toSet == Set[Int]())
+        case _ =>
+      }
+    }
+
+    // larger cogroup - should spill ~4 times per executor
+    val rddD1 = sc.parallelize(0 until 10000).map(i => (i/2, i))
+    val rddD2 = sc.parallelize(0 until 10000).map(i => (i/2, i))
+    val resultD = rddD1.cogroup(rddD2).collect()
+    assert(resultD.length == 5000)
+    resultD.foreach { case(i, (seq1, seq2)) =>
+      val expected = Set(i * 2, i * 2 + 1)
+      if (seq1.toSet != expected) {
+        fail(s"Value 1 for ${i} was wrong: expected ${expected}, got ${seq1.toSet}")
+      }
+      if (seq2.toSet != expected) {
+        fail(s"Value 2 for ${i} was wrong: expected ${expected}, got ${seq2.toSet}")
+      }
+    }
+
+    // sortByKey - should spill ~8 times per executor
+    val rddE = sc.parallelize(0 until 100000).map(i => (i/4, i))
+    val resultE = rddE.sortByKey().collect().toSeq
+    assert(resultE === (0 until 100000).map(i => (i/4, i)).toSeq)
+  }
+
+  test("cleanup of intermediate files in sorter") {
+    val conf = createSparkConf(true)  // Load defaults, otherwise SPARK_HOME is not found
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+    val diskBlockManager = SparkEnv.get.blockManager.diskBlockManager
+
+    val ord = implicitly[Ordering[Int]]
+
+    val sorter = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(3)), Some(ord), None)
+    assertDidNotBypassMergeSort(sorter)
+    sorter.insertAll((0 until 100000).iterator.map(i => (i, i)))
+    assert(diskBlockManager.getAllFiles().length > 0)
+    sorter.stop()
+    assert(diskBlockManager.getAllBlocks().length === 0)
+
+    val sorter2 = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(3)), Some(ord), None)
+    assertDidNotBypassMergeSort(sorter2)
+    sorter2.insertAll((0 until 100000).iterator.map(i => (i, i)))
+    assert(diskBlockManager.getAllFiles().length > 0)
+    assert(sorter2.iterator.toSet === (0 until 100000).map(i => (i, i)).toSet)
+    sorter2.stop()
+    assert(diskBlockManager.getAllBlocks().length === 0)
+  }
+
+  test("cleanup of intermediate files in sorter, bypass merge-sort") {
+    val conf = createSparkConf(true)  // Load defaults, otherwise SPARK_HOME is not found
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+    val diskBlockManager = SparkEnv.get.blockManager.diskBlockManager
+
+    val sorter = new ExternalSorter[Int, Int, Int](None, Some(new HashPartitioner(3)), None, None)
+    assertBypassedMergeSort(sorter)
+    sorter.insertAll((0 until 100000).iterator.map(i => (i, i)))
+    assert(diskBlockManager.getAllFiles().length > 0)
+    sorter.stop()
+    assert(diskBlockManager.getAllBlocks().length === 0)
+
+    val sorter2 = new ExternalSorter[Int, Int, Int](None, Some(new HashPartitioner(3)), None, None)
+    assertBypassedMergeSort(sorter2)
+    sorter2.insertAll((0 until 100000).iterator.map(i => (i, i)))
+    assert(diskBlockManager.getAllFiles().length > 0)
+    assert(sorter2.iterator.toSet === (0 until 100000).map(i => (i, i)).toSet)
+    sorter2.stop()
+    assert(diskBlockManager.getAllBlocks().length === 0)
+  }
+
+  test("cleanup of intermediate files in sorter if there are errors") {
+    val conf = createSparkConf(true)  // Load defaults, otherwise SPARK_HOME is not found
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+    val diskBlockManager = SparkEnv.get.blockManager.diskBlockManager
+
+    val ord = implicitly[Ordering[Int]]
+
+    val sorter = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(3)), Some(ord), None)
+    assertDidNotBypassMergeSort(sorter)
+    intercept[SparkException] {
+      sorter.insertAll((0 until 100000).iterator.map(i => {
+        if (i == 99990) {
+          throw new SparkException("Intentional failure")
+        }
+        (i, i)
+      }))
+    }
+    assert(diskBlockManager.getAllFiles().length > 0)
+    sorter.stop()
+    assert(diskBlockManager.getAllBlocks().length === 0)
+  }
+
+  test("cleanup of intermediate files in sorter if there are errors, bypass merge-sort") {
+    val conf = createSparkConf(true)  // Load defaults, otherwise SPARK_HOME is not found
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+    val diskBlockManager = SparkEnv.get.blockManager.diskBlockManager
+
+    val sorter = new ExternalSorter[Int, Int, Int](None, Some(new HashPartitioner(3)), None, None)
+    assertBypassedMergeSort(sorter)
+    intercept[SparkException] {
+      sorter.insertAll((0 until 100000).iterator.map(i => {
+        if (i == 99990) {
+          throw new SparkException("Intentional failure")
+        }
+        (i, i)
+      }))
+    }
+    assert(diskBlockManager.getAllFiles().length > 0)
+    sorter.stop()
+    assert(diskBlockManager.getAllBlocks().length === 0)
+  }
+
+  test("cleanup of intermediate files in shuffle") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+    val diskBlockManager = SparkEnv.get.blockManager.diskBlockManager
+
+    val data = sc.parallelize(0 until 100000, 2).map(i => (i, i))
+    assert(data.reduceByKey(_ + _).count() === 100000)
+
+    // After the shuffle, there should be only 4 files on disk: our two map output files and
+    // their index files. All other intermediate files should've been deleted.
+    assert(diskBlockManager.getAllFiles().length === 4)
+  }
+
+  test("cleanup of intermediate files in shuffle with errors") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+    val diskBlockManager = SparkEnv.get.blockManager.diskBlockManager
+
+    val data = sc.parallelize(0 until 100000, 2).map(i => {
+      if (i == 99990) {
+        throw new Exception("Intentional failure")
+      }
+      (i, i)
+    })
+    intercept[SparkException] {
+      data.reduceByKey(_ + _).count()
+    }
+
+    // After the shuffle, there should be only 2 files on disk: the output of task 1 and its index.
+    // All other files (map 2's output and intermediate merge files) should've been deleted.
+    assert(diskBlockManager.getAllFiles().length === 2)
+  }
+
+  test("no partial aggregation or sorting") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+
+    val sorter = new ExternalSorter[Int, Int, Int](None, Some(new HashPartitioner(3)), None, None)
+    sorter.insertAll((0 until 100000).iterator.map(i => (i / 4, i)))
+    val results = sorter.partitionedIterator.map{case (p, vs) => (p, vs.toSet)}.toSet
+    val expected = (0 until 3).map(p => {
+      (p, (0 until 100000).map(i => (i / 4, i)).filter(_._1 % 3 == p).toSet)
+    }).toSet
+    assert(results === expected)
+  }
+
+  test("partial aggregation without spill") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+
+    val agg = new Aggregator[Int, Int, Int](i => i, (i, j) => i + j, (i, j) => i + j)
+    val sorter = new ExternalSorter(Some(agg), Some(new HashPartitioner(3)), None, None)
+    sorter.insertAll((0 until 100).iterator.map(i => (i / 2, i)))
+    val results = sorter.partitionedIterator.map{case (p, vs) => (p, vs.toSet)}.toSet
+    val expected = (0 until 3).map(p => {
+      (p, (0 until 50).map(i => (i, i * 4 + 1)).filter(_._1 % 3 == p).toSet)
+    }).toSet
+    assert(results === expected)
+  }
+
+  test("partial aggregation with spill, no ordering") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+
+    val agg = new Aggregator[Int, Int, Int](i => i, (i, j) => i + j, (i, j) => i + j)
+    val sorter = new ExternalSorter(Some(agg), Some(new HashPartitioner(3)), None, None)
+    sorter.insertAll((0 until 100000).iterator.map(i => (i / 2, i)))
+    val results = sorter.partitionedIterator.map{case (p, vs) => (p, vs.toSet)}.toSet
+    val expected = (0 until 3).map(p => {
+      (p, (0 until 50000).map(i => (i, i * 4 + 1)).filter(_._1 % 3 == p).toSet)
+    }).toSet
+    assert(results === expected)
+  }
+
+  test("partial aggregation with spill, with ordering") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+
+    val agg = new Aggregator[Int, Int, Int](i => i, (i, j) => i + j, (i, j) => i + j)
+    val ord = implicitly[Ordering[Int]]
+    val sorter = new ExternalSorter(Some(agg), Some(new HashPartitioner(3)), Some(ord), None)
+    sorter.insertAll((0 until 100000).iterator.map(i => (i / 2, i)))
+    val results = sorter.partitionedIterator.map{case (p, vs) => (p, vs.toSet)}.toSet
+    val expected = (0 until 3).map(p => {
+      (p, (0 until 50000).map(i => (i, i * 4 + 1)).filter(_._1 % 3 == p).toSet)
+    }).toSet
+    assert(results === expected)
+  }
+
+  test("sorting without aggregation, no spill") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+
+    val ord = implicitly[Ordering[Int]]
+    val sorter = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(3)), Some(ord), None)
+    sorter.insertAll((0 until 100).iterator.map(i => (i, i)))
+    val results = sorter.partitionedIterator.map{case (p, vs) => (p, vs.toSeq)}.toSeq
+    val expected = (0 until 3).map(p => {
+      (p, (0 until 100).map(i => (i, i)).filter(_._1 % 3 == p).toSeq)
+    }).toSeq
+    assert(results === expected)
+  }
+
+  test("sorting without aggregation, with spill") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+
+    val ord = implicitly[Ordering[Int]]
+    val sorter = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(3)), Some(ord), None)
+    sorter.insertAll((0 until 100000).iterator.map(i => (i, i)))
+    val results = sorter.partitionedIterator.map{case (p, vs) => (p, vs.toSeq)}.toSeq
+    val expected = (0 until 3).map(p => {
+      (p, (0 until 100000).map(i => (i, i)).filter(_._1 % 3 == p).toSeq)
+    }).toSeq
+    assert(results === expected)
+  }
+
+  test("spilling with hash collisions") {
+    val conf = createSparkConf(true)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
+
+    def createCombiner(i: String) = ArrayBuffer[String](i)
+    def mergeValue(buffer: ArrayBuffer[String], i: String) = buffer += i
+    def mergeCombiners(buffer1: ArrayBuffer[String], buffer2: ArrayBuffer[String]) =
+      buffer1 ++= buffer2
+
+    val agg = new Aggregator[String, String, ArrayBuffer[String]](
+      createCombiner _, mergeValue _, mergeCombiners _)
+
+    val sorter = new ExternalSorter[String, String, ArrayBuffer[String]](
+      Some(agg), None, None, None)
+
+    val collisionPairs = Seq(
+      ("Aa", "BB"),                   // 2112
+      ("to", "v1"),                   // 3707
+      ("variants", "gelato"),         // -1249574770
+      ("Teheran", "Siblings"),        // 231609873
+      ("misused", "horsemints"),      // 1069518484
+      ("isohel", "epistolaries"),     // -1179291542
+      ("righto", "buzzards"),         // -931102253
+      ("hierarch", "crinolines"),     // -1732884796
+      ("inwork", "hypercatalexes"),   // -1183663690
+      ("wainages", "presentencing"),  // 240183619
+      ("trichothecenes", "locular"),  // 339006536
+      ("pomatoes", "eructation")      // 568647356
+    )
+
+    collisionPairs.foreach { case (w1, w2) =>
+      // String.hashCode is documented to use a specific algorithm, but check just in case
+      assert(w1.hashCode === w2.hashCode)
+    }
+
+    val toInsert = (1 to 100000).iterator.map(_.toString).map(s => (s, s)) ++
+      collisionPairs.iterator ++ collisionPairs.iterator.map(_.swap)
+
+    sorter.insertAll(toInsert)
+
+    // A map of collision pairs in both directions
+    val collisionPairsMap = (collisionPairs ++ collisionPairs.map(_.swap)).toMap
+
+    // Avoid map.size or map.iterator.length because this destructively sorts the underlying map
+    var count = 0
+
+    val it = sorter.iterator
+    while (it.hasNext) {
+      val kv = it.next()
+      val expectedValue = ArrayBuffer[String](collisionPairsMap.getOrElse(kv._1, kv._1))
+      assert(kv._2.equals(expectedValue))
+      count += 1
+    }
+    assert(count === 100000 + collisionPairs.size * 2)
+  }
+
+  test("spilling with many hash collisions") {
+    val conf = createSparkConf(true)
+    conf.set("spark.shuffle.memoryFraction", "0.0001")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
+
+    val agg = new Aggregator[FixedHashObject, Int, Int](_ => 1, _ + _, _ + _)
+    val sorter = new ExternalSorter[FixedHashObject, Int, Int](Some(agg), None, None, None)
+
+    // Insert 10 copies each of lots of objects whose hash codes are either 0 or 1. This causes
+    // problems if the map fails to group together the objects with the same code (SPARK-2043).
+    val toInsert = for (i <- 1 to 10; j <- 1 to 10000) yield (FixedHashObject(j, j % 2), 1)
+    sorter.insertAll(toInsert.iterator)
+
+    val it = sorter.iterator
+    var count = 0
+    while (it.hasNext) {
+      val kv = it.next()
+      assert(kv._2 === 10)
+      count += 1
+    }
+    assert(count === 10000)
+  }
+
+  test("spilling with hash collisions using the Int.MaxValue key") {
+    val conf = createSparkConf(true)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
+
+    def createCombiner(i: Int) = ArrayBuffer[Int](i)
+    def mergeValue(buffer: ArrayBuffer[Int], i: Int) = buffer += i
+    def mergeCombiners(buf1: ArrayBuffer[Int], buf2: ArrayBuffer[Int]) = buf1 ++= buf2
+
+    val agg = new Aggregator[Int, Int, ArrayBuffer[Int]](createCombiner, mergeValue, mergeCombiners)
+    val sorter = new ExternalSorter[Int, Int, ArrayBuffer[Int]](Some(agg), None, None, None)
+
+    sorter.insertAll((1 to 100000).iterator.map(i => (i, i)) ++ Iterator((Int.MaxValue, Int.MaxValue)))
+
+    val it = sorter.iterator
+    while (it.hasNext) {
+      // Should not throw NoSuchElementException
+      it.next()
+    }
+  }
+
+  test("spilling with null keys and values") {
+    val conf = createSparkConf(true)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
+
+    def createCombiner(i: String) = ArrayBuffer[String](i)
+    def mergeValue(buffer: ArrayBuffer[String], i: String) = buffer += i
+    def mergeCombiners(buf1: ArrayBuffer[String], buf2: ArrayBuffer[String]) = buf1 ++= buf2
+
+    val agg = new Aggregator[String, String, ArrayBuffer[String]](
+      createCombiner, mergeValue, mergeCombiners)
+
+    val sorter = new ExternalSorter[String, String, ArrayBuffer[String]](
+      Some(agg), None, None, None)
+
+    sorter.insertAll((1 to 100000).iterator.map(i => (i.toString, i.toString)) ++ Iterator(
+      (null.asInstanceOf[String], "1"),
+      ("1", null.asInstanceOf[String]),
+      (null.asInstanceOf[String], null.asInstanceOf[String])
+    ))
+
+    val it = sorter.iterator
+    while (it.hasNext) {
+      // Should not throw NullPointerException
+      it.next()
+    }
+  }
+
+  test("conditions for bypassing merge-sort") {
+    val conf = createSparkConf(false)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
+    sc = new SparkContext("local", "test", conf)
+
+    val agg = new Aggregator[Int, Int, Int](i => i, (i, j) => i + j, (i, j) => i + j)
+    val ord = implicitly[Ordering[Int]]
+
+    // Numbers of partitions that are above and below the default bypassMergeThreshold
+    val FEW_PARTITIONS = 50
+    val MANY_PARTITIONS = 10000
+
+    // Sorters with no ordering or aggregator: should bypass unless # of partitions is high
+
+    val sorter1 = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(FEW_PARTITIONS)), None, None)
+    assertBypassedMergeSort(sorter1)
+
+    val sorter2 = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(MANY_PARTITIONS)), None, None)
+    assertDidNotBypassMergeSort(sorter2)
+
+    // Sorters with an ordering or aggregator: should not bypass even if they have few partitions
+
+    val sorter3 = new ExternalSorter[Int, Int, Int](
+      None, Some(new HashPartitioner(FEW_PARTITIONS)), Some(ord), None)
+    assertDidNotBypassMergeSort(sorter3)
+
+    val sorter4 = new ExternalSorter[Int, Int, Int](
+      Some(agg), Some(new HashPartitioner(FEW_PARTITIONS)), None, None)
+    assertDidNotBypassMergeSort(sorter4)
+  }
+
+  test("sort without breaking sorting contracts") {
+    val conf = createSparkConf(true)
+    conf.set("spark.shuffle.memoryFraction", "0.01")
+    conf.set("spark.shuffle.manager", "sort")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
+
+    // Using wrongOrdering to show integer overflow introduced exception.
+    val rand = new Random(100L)
+    val wrongOrdering = new Ordering[String] {
+      override def compare(a: String, b: String) = {
+        val h1 = if (a == null) 0 else a.hashCode()
+        val h2 = if (b == null) 0 else b.hashCode()
+        h1 - h2
+      }
+    }
+
+    val testData = Array.tabulate(100000) { _ => rand.nextInt().toString }
+
+    val sorter1 = new ExternalSorter[String, String, String](
+      None, None, Some(wrongOrdering), None)
+    val thrown = intercept[IllegalArgumentException] {
+      sorter1.insertAll(testData.iterator.map(i => (i, i)))
+      sorter1.iterator
+    }
+
+    assert(thrown.getClass() === classOf[IllegalArgumentException])
+    assert(thrown.getMessage().contains("Comparison method violates its general contract"))
+    sorter1.stop()
+
+    // Using aggregation and external spill to make sure ExternalSorter using
+    // partitionKeyComparator.
+    def createCombiner(i: String) = ArrayBuffer(i)
+    def mergeValue(c: ArrayBuffer[String], i: String) = c += i
+    def mergeCombiners(c1: ArrayBuffer[String], c2: ArrayBuffer[String]) = c1 ++= c2
+
+    val agg = new Aggregator[String, String, ArrayBuffer[String]](
+      createCombiner, mergeValue, mergeCombiners)
+
+    val sorter2 = new ExternalSorter[String, String, ArrayBuffer[String]](
+      Some(agg), None, None, None)
+    sorter2.insertAll(testData.iterator.map(i => (i, i)))
+
+    // To validate the hash ordering of key
+    var minKey = Int.MinValue
+    sorter2.iterator.foreach { case (k, v) =>
+      val h = k.hashCode()
+      assert(h >= minKey)
+      minKey = h
+    }
+
+    sorter2.stop()
+ }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala b/core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala
new file mode 100644
index 0000000000000..c787b5f066e00
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+/**
+ * A dummy class that always returns the same hash code, to easily test hash collisions
+ */
+case class FixedHashObject(v: Int, h: Int) extends Serializable {
+  override def hashCode(): Int = h
+}
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
new file mode 100644
index 0000000000000..1f33967249654
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import scala.reflect.ClassTag
+import scala.util.Random
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.util.SizeEstimator
+
+class SizeTrackerSuite extends FunSuite {
+  val NORMAL_ERROR = 0.20
+  val HIGH_ERROR = 0.30
+
+  import SizeTrackerSuite._
+
+  test("vector fixed size insertions") {
+    testVector[Long](10000, i => i.toLong)
+    testVector[(Long, Long)](10000, i => (i.toLong, i.toLong))
+    testVector[LargeDummyClass](10000, i => new LargeDummyClass)
+  }
+
+  test("vector variable size insertions") {
+    val rand = new Random(123456789)
+    def randString(minLen: Int, maxLen: Int): String = {
+      "a" * (rand.nextInt(maxLen - minLen) + minLen)
+    }
+    testVector[String](10000, i => randString(0, 10))
+    testVector[String](10000, i => randString(0, 100))
+    testVector[String](10000, i => randString(90, 100))
+  }
+
+  test("map fixed size insertions") {
+    testMap[Int, Long](10000, i => (i, i.toLong))
+    testMap[Int, (Long, Long)](10000, i => (i, (i.toLong, i.toLong)))
+    testMap[Int, LargeDummyClass](10000, i => (i, new LargeDummyClass))
+  }
+
+  test("map variable size insertions") {
+    val rand = new Random(123456789)
+    def randString(minLen: Int, maxLen: Int): String = {
+      "a" * (rand.nextInt(maxLen - minLen) + minLen)
+    }
+    testMap[Int, String](10000, i => (i, randString(0, 10)))
+    testMap[Int, String](10000, i => (i, randString(0, 100)))
+    testMap[Int, String](10000, i => (i, randString(90, 100)))
+  }
+
+  test("map updates") {
+    val rand = new Random(123456789)
+    def randString(minLen: Int, maxLen: Int): String = {
+      "a" * (rand.nextInt(maxLen - minLen) + minLen)
+    }
+    testMap[String, Int](10000, i => (randString(0, 10000), i))
+  }
+
+  def testVector[T: ClassTag](numElements: Int, makeElement: Int => T) {
+    val vector = new SizeTrackingVector[T]
+    for (i <- 0 until numElements) {
+      val item = makeElement(i)
+      vector += item
+      expectWithinError(vector, vector.estimateSize(), if (i < 32) HIGH_ERROR else NORMAL_ERROR)
+    }
+  }
+
+  def testMap[K, V](numElements: Int, makeElement: (Int) => (K, V)) {
+    val map = new SizeTrackingAppendOnlyMap[K, V]
+    for (i <- 0 until numElements) {
+      val (k, v) = makeElement(i)
+      map(k) = v
+      expectWithinError(map, map.estimateSize(), if (i < 32) HIGH_ERROR else NORMAL_ERROR)
+    }
+  }
+
+  def expectWithinError(obj: AnyRef, estimatedSize: Long, error: Double) {
+    val betterEstimatedSize = SizeEstimator.estimate(obj)
+    assert(betterEstimatedSize * (1 - error) < estimatedSize,
+      s"Estimated size $estimatedSize was less than expected size $betterEstimatedSize")
+    assert(betterEstimatedSize * (1 + 2 * error) > estimatedSize,
+      s"Estimated size $estimatedSize was greater than expected size $betterEstimatedSize")
+  }
+}
+
+private object SizeTrackerSuite {
+
+  /**
+   * Run speed tests for size tracking collections.
+   */
+  def main(args: Array[String]): Unit = {
+    if (args.size < 1) {
+      println("Usage: SizeTrackerSuite [num elements]")
+      System.exit(1)
+    }
+    val numElements = args(0).toInt
+    vectorSpeedTest(numElements)
+    mapSpeedTest(numElements)
+  }
+
+  /**
+   * Speed test for SizeTrackingVector.
+   *
+   * Results for 100000 elements (possibly non-deterministic):
+   *   PrimitiveVector  15 ms
+   *   SizeTracker      51 ms
+   *   SizeEstimator    2000 ms
+   */
+  def vectorSpeedTest(numElements: Int): Unit = {
+    val baseTimes = for (i <- 0 until 10) yield time {
+      val vector = new PrimitiveVector[LargeDummyClass]
+      for (i <- 0 until numElements) {
+        vector += new LargeDummyClass
+      }
+    }
+    val sampledTimes = for (i <- 0 until 10) yield time {
+      val vector = new SizeTrackingVector[LargeDummyClass]
+      for (i <- 0 until numElements) {
+        vector += new LargeDummyClass
+        vector.estimateSize()
+      }
+    }
+    val unsampledTimes = for (i <- 0 until 3) yield time {
+      val vector = new PrimitiveVector[LargeDummyClass]
+      for (i <- 0 until numElements) {
+        vector += new LargeDummyClass
+        SizeEstimator.estimate(vector)
+      }
+    }
+    printSpeedTestResult("SizeTrackingVector", baseTimes, sampledTimes, unsampledTimes)
+  }
+
+  /**
+   * Speed test for SizeTrackingAppendOnlyMap.
+   *
+   * Results for 100000 elements (possibly non-deterministic):
+   *   AppendOnlyMap  30 ms
+   *   SizeTracker    41 ms
+   *   SizeEstimator  1666 ms
+   */
+  def mapSpeedTest(numElements: Int): Unit = {
+    val baseTimes = for (i <- 0 until 10) yield time {
+      val map = new AppendOnlyMap[Int, LargeDummyClass]
+      for (i <- 0 until numElements) {
+        map(i) = new LargeDummyClass
+      }
+    }
+    val sampledTimes = for (i <- 0 until 10) yield time {
+      val map = new SizeTrackingAppendOnlyMap[Int, LargeDummyClass]
+      for (i <- 0 until numElements) {
+        map(i) = new LargeDummyClass
+        map.estimateSize()
+      }
+    }
+    val unsampledTimes = for (i <- 0 until 3) yield time {
+      val map = new AppendOnlyMap[Int, LargeDummyClass]
+      for (i <- 0 until numElements) {
+        map(i) = new LargeDummyClass
+        SizeEstimator.estimate(map)
+      }
+    }
+    printSpeedTestResult("SizeTrackingAppendOnlyMap", baseTimes, sampledTimes, unsampledTimes)
+  }
+
+  def printSpeedTestResult(
+      testName: String,
+      baseTimes: Seq[Long],
+      sampledTimes: Seq[Long],
+      unsampledTimes: Seq[Long]): Unit = {
+    println(s"Average times for $testName (ms):")
+    println("  Base - " + averageTime(baseTimes))
+    println("  SizeTracker (sampled) - " + averageTime(sampledTimes))
+    println("  SizeEstimator (unsampled) - " + averageTime(unsampledTimes))
+    println()
+  }
+
+  def time(f: => Unit): Long = {
+    val start = System.currentTimeMillis()
+    f
+    System.currentTimeMillis() - start
+  }
+
+  def averageTime(v: Seq[Long]): Long = {
+    v.sum / v.size
+  }
+
+  private class LargeDummyClass {
+    val arr = new Array[Int](100)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
new file mode 100644
index 0000000000000..0cb1ed7397655
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import java.lang.{Float => JFloat, Integer => JInteger}
+import java.util.{Arrays, Comparator}
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.util.random.XORShiftRandom
+
+class SorterSuite extends FunSuite {
+
+  test("equivalent to Arrays.sort") {
+    val rand = new XORShiftRandom(123)
+    val data0 = Array.tabulate[Int](10000) { i => rand.nextInt() }
+    val data1 = data0.clone()
+    val data2 = data0.clone()
+
+    Arrays.sort(data0)
+    new Sorter(new IntArraySortDataFormat).sort(data1, 0, data1.length, Ordering.Int)
+    new Sorter(new KeyReuseIntArraySortDataFormat)
+      .sort(data2, 0, data2.length, Ordering[IntWrapper])
+
+    assert(data0.view === data1.view)
+    assert(data0.view === data2.view)
+  }
+
+  test("KVArraySorter") {
+    val rand = new XORShiftRandom(456)
+
+    // Construct an array of keys (to Java sort) and an array where the keys and values
+    // alternate. Keys are random doubles, values are ordinals from 0 to length.
+    val keys = Array.tabulate[Double](5000) { i => rand.nextDouble() }
+    val keyValueArray = Array.tabulate[Number](10000) { i =>
+      if (i % 2 == 0) keys(i / 2) else new Integer(i / 2)
+    }
+
+    // Map from generated keys to values, to verify correctness later
+    val kvMap =
+      keyValueArray.grouped(2).map { case Array(k, v) => k.doubleValue() -> v.intValue() }.toMap
+
+    Arrays.sort(keys)
+    new Sorter(new KVArraySortDataFormat[Double, Number])
+      .sort(keyValueArray, 0, keys.length, Ordering.Double)
+
+    keys.zipWithIndex.foreach { case (k, i) =>
+      assert(k === keyValueArray(2 * i))
+      assert(kvMap(k) === keyValueArray(2 * i + 1))
+    }
+  }
+
+  /** Runs an experiment several times. */
+  def runExperiment(name: String, skip: Boolean = false)(f: => Unit, prepare: () => Unit): Unit = {
+    if (skip) {
+      println(s"Skipped experiment $name.")
+      return
+    }
+
+    val firstTry = org.apache.spark.util.Utils.timeIt(1)(f, Some(prepare))
+    System.gc()
+
+    var i = 0
+    var next10: Long = 0
+    while (i < 10) {
+      val time = org.apache.spark.util.Utils.timeIt(1)(f, Some(prepare))
+      next10 += time
+      println(s"$name: Took $time ms")
+      i += 1
+    }
+
+    println(s"$name: ($firstTry ms first try, ${next10 / 10} ms average)")
+  }
+
+  /**
+   * This provides a simple benchmark for comparing the Sorter with Java internal sorting.
+   * Ideally these would be executed one at a time, each in their own JVM, so their listing
+   * here is mainly to have the code. Running multiple tests within the same JVM session would
+   * prevent JIT inlining overridden methods and hence hurt the performance.
+   *
+   * The goal of this code is to sort an array of key-value pairs, where the array physically
+   * has the keys and values alternating. The basic Java sorts work only on the keys, so the
+   * real Java solution is to make Tuple2s to store the keys and values and sort an array of
+   * those, while the Sorter approach can work directly on the input data format.
+   *
+   * Note that the Java implementation varies tremendously between Java 6 and Java 7, when
+   * the Java sort changed from merge sort to TimSort.
+   */
+  ignore("Sorter benchmark for key-value pairs") {
+    val numElements = 25000000 // 25 mil
+    val rand = new XORShiftRandom(123)
+
+    // Test our key-value pairs where each element is a Tuple2[Float, Integer].
+
+    val kvTuples = Array.tabulate(numElements) { i =>
+      (new JFloat(rand.nextFloat()), new JInteger(i))
+    }
+
+    val kvTupleArray = new Array[AnyRef](numElements)
+    val prepareKvTupleArray = () => {
+      System.arraycopy(kvTuples, 0, kvTupleArray, 0, numElements)
+    }
+    runExperiment("Tuple-sort using Arrays.sort()")({
+      Arrays.sort(kvTupleArray, new Comparator[AnyRef] {
+        override def compare(x: AnyRef, y: AnyRef): Int =
+          x.asInstanceOf[(JFloat, _)]._1.compareTo(y.asInstanceOf[(JFloat, _)]._1)
+      })
+    }, prepareKvTupleArray)
+
+    // Test our Sorter where each element alternates between Float and Integer, non-primitive
+
+    val keyValues = {
+      val data = new Array[AnyRef](numElements * 2)
+      var i = 0
+      while (i < numElements) {
+        data(2 * i) = kvTuples(i)._1
+        data(2 * i + 1) = kvTuples(i)._2
+        i += 1
+      }
+      data
+    }
+
+    val keyValueArray = new Array[AnyRef](numElements * 2)
+    val prepareKeyValueArray = () => {
+      System.arraycopy(keyValues, 0, keyValueArray, 0, numElements * 2)
+    }
+
+    val sorter = new Sorter(new KVArraySortDataFormat[JFloat, AnyRef])
+    runExperiment("KV-sort using Sorter")({
+      sorter.sort(keyValueArray, 0, numElements, new Comparator[JFloat] {
+        override def compare(x: JFloat, y: JFloat): Int = x.compareTo(y)
+      })
+    }, prepareKeyValueArray)
+  }
+
+  /**
+   * Tests for sorting with primitive keys with/without key reuse. Java's Arrays.sort is used as
+   * reference, which is expected to be faster but it can only sort a single array. Sorter can be
+   * used to sort parallel arrays.
+   *
+   * Ideally these would be executed one at a time, each in their own JVM, so their listing
+   * here is mainly to have the code. Running multiple tests within the same JVM session would
+   * prevent JIT inlining overridden methods and hence hurt the performance.
+   */
+  ignore("Sorter benchmark for primitive int array") {
+    val numElements = 25000000 // 25 mil
+    val rand = new XORShiftRandom(123)
+
+    val ints = Array.fill(numElements)(rand.nextInt())
+    val intObjects = {
+      val data = new Array[JInteger](numElements)
+      var i = 0
+      while (i < numElements) {
+        data(i) = new JInteger(ints(i))
+        i += 1
+      }
+      data
+    }
+
+    val intObjectArray = new Array[JInteger](numElements)
+    val prepareIntObjectArray = () => {
+      System.arraycopy(intObjects, 0, intObjectArray, 0, numElements)
+    }
+
+    runExperiment("Java Arrays.sort() on non-primitive int array")({
+      Arrays.sort(intObjectArray, new Comparator[JInteger] {
+        override def compare(x: JInteger, y: JInteger): Int = x.compareTo(y)
+      })
+    }, prepareIntObjectArray)
+
+    val intPrimitiveArray = new Array[Int](numElements)
+    val prepareIntPrimitiveArray = () => {
+      System.arraycopy(ints, 0, intPrimitiveArray, 0, numElements)
+    }
+
+    runExperiment("Java Arrays.sort() on primitive int array")({
+      Arrays.sort(intPrimitiveArray)
+    }, prepareIntPrimitiveArray)
+
+    val sorterWithoutKeyReuse = new Sorter(new IntArraySortDataFormat)
+    runExperiment("Sorter without key reuse on primitive int array")({
+      sorterWithoutKeyReuse.sort(intPrimitiveArray, 0, numElements, Ordering[Int])
+    }, prepareIntPrimitiveArray)
+
+    val sorterWithKeyReuse = new Sorter(new KeyReuseIntArraySortDataFormat)
+    runExperiment("Sorter with key reuse on primitive int array")({
+      sorterWithKeyReuse.sort(intPrimitiveArray, 0, numElements, Ordering[IntWrapper])
+    }, prepareIntPrimitiveArray)
+  }
+}
+
+abstract class AbstractIntArraySortDataFormat[K] extends SortDataFormat[K, Array[Int]] {
+
+  override def swap(data: Array[Int], pos0: Int, pos1: Int): Unit = {
+    val tmp = data(pos0)
+    data(pos0) = data(pos1)
+    data(pos1) = tmp
+  }
+
+  override def copyElement(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int) {
+    dst(dstPos) = src(srcPos)
+  }
+
+  /** Copy a range of elements starting at src(srcPos) to dest, starting at destPos. */
+  override def copyRange(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int, length: Int) {
+    System.arraycopy(src, srcPos, dst, dstPos, length)
+  }
+
+  /** Allocates a new structure that can hold up to 'length' elements. */
+  override def allocate(length: Int): Array[Int] = {
+    new Array[Int](length)
+  }
+}
+
+/** Format to sort a simple Array[Int]. Could be easily generified and specialized. */
+class IntArraySortDataFormat extends AbstractIntArraySortDataFormat[Int] {
+
+  override protected def getKey(data: Array[Int], pos: Int): Int = {
+    data(pos)
+  }
+}
+
+/** Wrapper of Int for key reuse. */
+class IntWrapper(var key: Int = 0) extends Ordered[IntWrapper] {
+
+  override def compare(that: IntWrapper): Int = {
+    Ordering.Int.compare(key, that.key)
+  }
+}
+
+/** SortDataFormat for Array[Int] with reused keys. */
+class KeyReuseIntArraySortDataFormat extends AbstractIntArraySortDataFormat[IntWrapper] {
+
+  override def newKey(): IntWrapper = {
+    new IntWrapper()
+  }
+
+  override def getKey(data: Array[Int], pos: Int, reuse: IntWrapper): IntWrapper = {
+    if (reuse == null) {
+      new IntWrapper(data(pos))
+    } else {
+      reuse.key = data(pos)
+      reuse
+    }
+  }
+
+  override protected def getKey(data: Array[Int], pos: Int): IntWrapper = {
+    getKey(data, pos, null)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
new file mode 100644
index 0000000000000..f855831b8e367
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.io
+
+import scala.util.Random
+
+import org.scalatest.FunSuite
+
+
+class ByteArrayChunkOutputStreamSuite extends FunSuite {
+
+  test("empty output") {
+    val o = new ByteArrayChunkOutputStream(1024)
+    assert(o.toArrays.length === 0)
+  }
+
+  test("write a single byte") {
+    val o = new ByteArrayChunkOutputStream(1024)
+    o.write(10)
+    assert(o.toArrays.length === 1)
+    assert(o.toArrays.head.toSeq === Seq(10.toByte))
+  }
+
+  test("write a single near boundary") {
+    val o = new ByteArrayChunkOutputStream(10)
+    o.write(new Array[Byte](9))
+    o.write(99)
+    assert(o.toArrays.length === 1)
+    assert(o.toArrays.head(9) === 99.toByte)
+  }
+
+  test("write a single at boundary") {
+    val o = new ByteArrayChunkOutputStream(10)
+    o.write(new Array[Byte](10))
+    o.write(99)
+    assert(o.toArrays.length === 2)
+    assert(o.toArrays(1).length === 1)
+    assert(o.toArrays(1)(0) === 99.toByte)
+  }
+
+  test("single chunk output") {
+    val ref = new Array[Byte](8)
+    Random.nextBytes(ref)
+    val o = new ByteArrayChunkOutputStream(10)
+    o.write(ref)
+    val arrays = o.toArrays
+    assert(arrays.length === 1)
+    assert(arrays.head.length === ref.length)
+    assert(arrays.head.toSeq === ref.toSeq)
+  }
+
+  test("single chunk output at boundary size") {
+    val ref = new Array[Byte](10)
+    Random.nextBytes(ref)
+    val o = new ByteArrayChunkOutputStream(10)
+    o.write(ref)
+    val arrays = o.toArrays
+    assert(arrays.length === 1)
+    assert(arrays.head.length === ref.length)
+    assert(arrays.head.toSeq === ref.toSeq)
+  }
+
+  test("multiple chunk output") {
+    val ref = new Array[Byte](26)
+    Random.nextBytes(ref)
+    val o = new ByteArrayChunkOutputStream(10)
+    o.write(ref)
+    val arrays = o.toArrays
+    assert(arrays.length === 3)
+    assert(arrays(0).length === 10)
+    assert(arrays(1).length === 10)
+    assert(arrays(2).length === 6)
+
+    assert(arrays(0).toSeq === ref.slice(0, 10))
+    assert(arrays(1).toSeq === ref.slice(10, 20))
+    assert(arrays(2).toSeq === ref.slice(20, 26))
+  }
+
+  test("multiple chunk output at boundary size") {
+    val ref = new Array[Byte](30)
+    Random.nextBytes(ref)
+    val o = new ByteArrayChunkOutputStream(10)
+    o.write(ref)
+    val arrays = o.toArrays
+    assert(arrays.length === 3)
+    assert(arrays(0).length === 10)
+    assert(arrays(1).length === 10)
+    assert(arrays(2).length === 10)
+
+    assert(arrays(0).toSeq === ref.slice(0, 10))
+    assert(arrays(1).toSeq === ref.slice(10, 20))
+    assert(arrays(2).toSeq === ref.slice(20, 30))
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
index 36877476e708e..20944b62473c5 100644
--- a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
@@ -18,96 +18,523 @@
 package org.apache.spark.util.random
 
 import java.util.Random
+import scala.collection.mutable.ArrayBuffer
+import org.apache.commons.math3.distribution.PoissonDistribution
 
-import cern.jet.random.Poisson
-import org.scalatest.{BeforeAndAfter, FunSuite}
-import org.scalatest.mock.EasyMockSugar
-
-class RandomSamplerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar {
-
-  val a = List(1, 2, 3, 4, 5, 6, 7, 8, 9)
-
-  var random: Random = _
-  var poisson: Poisson = _
-
-  before {
-    random = mock[Random]
-    poisson = mock[Poisson]
-  }
-
-  test("BernoulliSamplerWithRange") {
-    expecting {
-      for(x <- Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
-        random.nextDouble().andReturn(x)
-      }
-    }
-    whenExecuting(random) {
-      val sampler = new BernoulliSampler[Int](0.25, 0.55)
-      sampler.rng = random
-      assert(sampler.sample(a.iterator).toList == List(3, 4, 5))
-    }
-  }
-
-  test("BernoulliSamplerWithRangeInverse") {
-    expecting {
-      for(x <- Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
-        random.nextDouble().andReturn(x)
-      }
-    }
-    whenExecuting(random) {
-      val sampler = new BernoulliSampler[Int](0.25, 0.55, true)
-      sampler.rng = random
-      assert(sampler.sample(a.iterator).toList === List(1, 2, 6, 7, 8, 9))
-    }
-  }
-
-  test("BernoulliSamplerWithRatio") {
-    expecting {
-      for(x <- Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
-        random.nextDouble().andReturn(x)
-      }
-    }
-    whenExecuting(random) {
-      val sampler = new BernoulliSampler[Int](0.35)
-      sampler.rng = random
-      assert(sampler.sample(a.iterator).toList == List(1, 2, 3))
-    }
-  }
-
-  test("BernoulliSamplerWithComplement") {
-    expecting {
-      for(x <- Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
-        random.nextDouble().andReturn(x)
-      }
-    }
-    whenExecuting(random) {
-      val sampler = new BernoulliSampler[Int](0.25, 0.55, true)
-      sampler.rng = random
-      assert(sampler.sample(a.iterator).toList == List(1, 2, 6, 7, 8, 9))
-    }
-  }
-
-  test("BernoulliSamplerSetSeed") {
-    expecting {
-      random.setSeed(10L)
-    }
-    whenExecuting(random) {
-      val sampler = new BernoulliSampler[Int](0.2)
-      sampler.rng = random
-      sampler.setSeed(10L)
-    }
-  }
-
-  test("PoissonSampler") {
-    expecting {
-      for(x <- Seq(0, 1, 2, 0, 1, 1, 0, 0, 0)) {
-        poisson.nextInt().andReturn(x)
-      }
-    }
-    whenExecuting(poisson) {
-      val sampler = new PoissonSampler[Int](0.2)
-      sampler.rng = poisson
-      assert(sampler.sample(a.iterator).toList == List(2, 3, 3, 5, 6))
-    }
+import org.scalatest.{FunSuite, Matchers}
+
+class RandomSamplerSuite extends FunSuite with Matchers {
+  /**
+   * My statistical testing methodology is to run a Kolmogorov-Smirnov (KS) test
+   * between the random samplers and simple reference samplers (known to work correctly).
+   * The sampling gap sizes between chosen samples should show up as having the same
+   * distributions between test and reference, if things are working properly.  That is,
+   * the KS test will fail to strongly reject the null hypothesis that the distributions of
+   * sampling gaps are the same.
+   * There are no actual KS tests implemented for scala (that I can find) - and so what I
+   * have done here is pre-compute "D" - the KS statistic - that corresponds to a "weak"
+   * p-value for a particular sample size.  I can then test that my measured KS stats
+   * are less than D.  Computing D-values is easy, and implemented below.
+   *
+   * I used the scipy 'kstwobign' distribution to pre-compute my D value:
+   *
+   * def ksdval(q=0.1, n=1000):
+   *     en = np.sqrt(float(n) / 2.0)
+   *     return stats.kstwobign.isf(float(q)) / (en + 0.12 + 0.11 / en)
+   *
+   * When comparing KS stats I take the median of a small number of independent test runs
+   * to compensate for the issue that any sampled statistic will show "false positive" with
+   * some probability.  Even when two distributions are the same, they will register as
+   * different 10% of the time at a p-value of 0.1
+   */
+
+  // This D value is the precomputed KS statistic for p-value 0.1, sample size 1000:
+  val sampleSize = 1000
+  val D = 0.0544280747619
+
+  // I'm not a big fan of fixing seeds, but unit testing based on running statistical tests
+  // will always fail with some nonzero probability, so I'll fix the seed to prevent these
+  // tests from generating random failure noise in CI testing, etc.
+  val rngSeed: Random = RandomSampler.newDefaultRNG
+  rngSeed.setSeed(235711)
+
+  // Reference implementation of sampling without replacement (bernoulli)
+  def sample[T](data: Iterator[T], f: Double): Iterator[T] = {
+    val rng: Random = RandomSampler.newDefaultRNG
+    rng.setSeed(rngSeed.nextLong)
+    data.filter(_ => (rng.nextDouble <= f))
+  }
+
+  // Reference implementation of sampling with replacement
+  def sampleWR[T](data: Iterator[T], f: Double): Iterator[T] = {
+    val rng = new PoissonDistribution(f)
+    rng.reseedRandomGenerator(rngSeed.nextLong)
+    data.flatMap { v => {
+      val rep = rng.sample()
+      if (rep == 0) Iterator.empty else Iterator.fill(rep)(v)
+    }}
+  }
+
+  // Returns iterator over gap lengths between samples.
+  // This function assumes input data is integers sampled from the sequence of 
+  // increasing integers: {0, 1, 2, ...}.  This works because that is how I generate them,
+  // and the samplers preserve their input order
+  def gaps(data: Iterator[Int]): Iterator[Int] = {
+    data.sliding(2).withPartial(false).map { x => x(1) - x(0) }
+  }
+
+  // Returns the cumulative distribution from a histogram
+  def cumulativeDist(hist: Array[Int]): Array[Double] = {
+    val n = hist.sum.toDouble
+    assert(n > 0.0)
+    hist.scanLeft(0)(_ + _).drop(1).map { _.toDouble / n }
+  }
+
+  // Returns aligned cumulative distributions from two arrays of data
+  def cumulants(d1: Array[Int], d2: Array[Int],
+      ss: Int = sampleSize): (Array[Double], Array[Double]) = {
+    assert(math.min(d1.length, d2.length) > 0)
+    assert(math.min(d1.min, d2.min)  >=  0)
+    val m = 1 + math.max(d1.max, d2.max)
+    val h1 = Array.fill[Int](m)(0)
+    val h2 = Array.fill[Int](m)(0)
+    for (v <- d1) { h1(v) += 1 }
+    for (v <- d2) { h2(v) += 1 }
+    assert(h1.sum == h2.sum)
+    assert(h1.sum == ss)
+    (cumulativeDist(h1), cumulativeDist(h2))
+  }
+
+  // Computes the Kolmogorov-Smirnov 'D' statistic from two cumulative distributions
+  def KSD(cdf1: Array[Double], cdf2: Array[Double]): Double = {
+    assert(cdf1.length == cdf2.length)
+    val n = cdf1.length
+    assert(n > 0)
+    assert(cdf1(n-1) == 1.0)
+    assert(cdf2(n-1) == 1.0)
+    cdf1.zip(cdf2).map { x => Math.abs(x._1 - x._2) }.max
+  }
+
+  // Returns the median KS 'D' statistic between two samples, over (m) sampling trials
+  def medianKSD(data1: => Iterator[Int], data2: => Iterator[Int], m: Int = 5): Double = {
+    val t = Array.fill[Double](m) {
+      val (c1, c2) = cumulants(data1.take(sampleSize).toArray,
+                               data2.take(sampleSize).toArray)
+      KSD(c1, c2)
+    }.sorted
+    // return the median KS statistic
+    t(m / 2)
+  }
+
+  test("utilities") {
+    val s1 = Array(0, 1, 1, 0, 2)
+    val s2 = Array(1, 0, 3, 2, 1)
+    val (c1, c2) = cumulants(s1, s2, ss = 5)
+    c1 should be (Array(0.4, 0.8, 1.0, 1.0))
+    c2 should be (Array(0.2, 0.6, 0.8, 1.0))
+    KSD(c1, c2) should be (0.2 +- 0.000001)
+    KSD(c2, c1) should be (KSD(c1, c2))
+    gaps(List(0, 1, 1, 2, 4, 11).iterator).toArray should be (Array(1, 0, 1, 2, 7))
+  }
+
+  test("sanity check medianKSD against references") {
+    var d: Double = 0.0
+
+    // should be statistically same, i.e. fail to reject null hypothesis strongly
+    d = medianKSD(gaps(sample(Iterator.from(0), 0.5)), gaps(sample(Iterator.from(0), 0.5)))
+    d should be < D
+
+    // should be statistically different - null hypothesis will have high D value,
+    // corresponding to low p-value that rejects the null hypothesis
+    d = medianKSD(gaps(sample(Iterator.from(0), 0.4)), gaps(sample(Iterator.from(0), 0.5)))
+    d should be > D
+
+    // same!
+    d = medianKSD(gaps(sampleWR(Iterator.from(0), 0.5)), gaps(sampleWR(Iterator.from(0), 0.5)))
+    d should be < D
+
+    // different!
+    d = medianKSD(gaps(sampleWR(Iterator.from(0), 0.5)), gaps(sampleWR(Iterator.from(0), 0.6)))
+    d should be > D
+  }
+
+  test("bernoulli sampling") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    var sampler: RandomSampler[Int, Int] = new BernoulliSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.5)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.7)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.7)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.9)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.9)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new BernoulliSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.6)))
+    d should be > D
+  }
+
+  test("bernoulli sampling with gap sampling optimization") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    var sampler: RandomSampler[Int, Int] = new BernoulliSampler[Int](0.01)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.01)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.1)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.3)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new BernoulliSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.4)))
+    d should be > D
+  }
+
+  test("bernoulli boundary cases") {
+    val data = (1 to 100).toArray
+
+    var sampler = new BernoulliSampler[Int](0.0)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliSampler[Int](1.0)
+    sampler.sample(data.iterator).toArray should be (data)
+
+    sampler = new BernoulliSampler[Int](0.0 - (RandomSampler.roundingEpsilon / 2.0))
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliSampler[Int](1.0 + (RandomSampler.roundingEpsilon / 2.0))
+    sampler.sample(data.iterator).toArray should be (data)
+  }
+
+  test("bernoulli data types") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+    var sampler = new BernoulliSampler[Int](0.1)
+    sampler.setSeed(rngSeed.nextLong)
+
+    // Array iterator (indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).toArray.iterator)),
+      gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    // ArrayBuffer iterator (indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).to[ArrayBuffer].iterator)),
+      gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    // List iterator (non-indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).toList.iterator)),
+      gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+  }
+
+  test("bernoulli clone") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d = 0.0
+    var sampler = new BernoulliSampler[Int](0.1).clone
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.9).clone
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.9)))
+    d should be < D
+  }
+
+  test("bernoulli set seed") {
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+    var sampler1 = new BernoulliSampler[Int](0.2)
+    var sampler2 = new BernoulliSampler[Int](0.2)
+
+    // distributions should be identical if seeds are set same
+    sampler1.setSeed(73)
+    sampler2.setSeed(73)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be (0.0)
+
+    // should be different for different seeds
+    sampler1.setSeed(73)
+    sampler2.setSeed(37)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be > 0.0
+    d should be < D
+
+    sampler1 = new BernoulliSampler[Int](0.8)
+    sampler2 = new BernoulliSampler[Int](0.8)
+
+    // distributions should be identical if seeds are set same
+    sampler1.setSeed(73)
+    sampler2.setSeed(73)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be (0.0)
+
+    // should be different for different seeds
+    sampler1.setSeed(73)
+    sampler2.setSeed(37)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be > 0.0
+    d should be < D
+  }
+
+  test("replacement sampling") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    var sampler = new PoissonSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.5)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.7)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.7)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.9)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.9)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new PoissonSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.6)))
+    d should be > D
+  }
+
+  test("replacement sampling with gap sampling") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    var sampler = new PoissonSampler[Int](0.01)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.01)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.1)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.3)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new PoissonSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.4)))
+    d should be > D
+  }
+
+  test("replacement boundary cases") {
+    val data = (1 to 100).toArray
+
+    var sampler = new PoissonSampler[Int](0.0)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new PoissonSampler[Int](0.0 - (RandomSampler.roundingEpsilon / 2.0))
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    // sampling with replacement has no upper bound on sampling fraction
+    sampler = new PoissonSampler[Int](2.0)
+    sampler.sample(data.iterator).length should be > (data.length)
+  }
+
+  test("replacement data types") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+    var sampler = new PoissonSampler[Int](0.1)
+    sampler.setSeed(rngSeed.nextLong)
+
+    // Array iterator (indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).toArray.iterator)),
+      gaps(sampleWR(Iterator.from(0), 0.1)))
+    d should be < D
+
+    // ArrayBuffer iterator (indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).to[ArrayBuffer].iterator)),
+      gaps(sampleWR(Iterator.from(0), 0.1)))
+    d should be < D
+
+    // List iterator (non-indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).toList.iterator)),
+      gaps(sampleWR(Iterator.from(0), 0.1)))
+    d should be < D
+  }
+
+  test("replacement clone") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d = 0.0
+    var sampler = new PoissonSampler[Int](0.1).clone
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.9).clone
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.9)))
+    d should be < D
+  }
+
+  test("replacement set seed") {
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+    var sampler1 = new PoissonSampler[Int](0.2)
+    var sampler2 = new PoissonSampler[Int](0.2)
+
+    // distributions should be identical if seeds are set same
+    sampler1.setSeed(73)
+    sampler2.setSeed(73)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be (0.0)
+
+    // should be different for different seeds
+    sampler1.setSeed(73)
+    sampler2.setSeed(37)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be > 0.0
+    d should be < D
+
+    sampler1 = new PoissonSampler[Int](0.8)
+    sampler2 = new PoissonSampler[Int](0.8)
+
+    // distributions should be identical if seeds are set same
+    sampler1.setSeed(73)
+    sampler2.setSeed(73)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be (0.0)
+
+    // should be different for different seeds
+    sampler1.setSeed(73)
+    sampler2.setSeed(37)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be > 0.0
+    d should be < D
+  }
+
+  test("bernoulli partitioning sampling") {
+    var d: Double = 0.0
+
+    var sampler = new BernoulliCellSampler[Int](0.1, 0.2)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new BernoulliCellSampler[Int](0.1, 0.2, true)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.9)))
+    d should be < D
+  }
+
+  test("bernoulli partitioning boundary cases") {
+    val data = (1 to 100).toArray
+    val d = RandomSampler.roundingEpsilon / 2.0
+
+    var sampler = new BernoulliCellSampler[Int](0.0, 0.0)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliCellSampler[Int](0.5, 0.5)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliCellSampler[Int](1.0, 1.0)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliCellSampler[Int](0.0, 1.0)
+    sampler.sample(data.iterator).toArray should be (data)
+
+    sampler = new BernoulliCellSampler[Int](0.0 - d, 1.0 + d)
+    sampler.sample(data.iterator).toArray should be (data)
+
+    sampler = new BernoulliCellSampler[Int](0.5, 0.5 - d)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+  }
+
+  test("bernoulli partitioning data") {
+    val seed = rngSeed.nextLong
+    val data = (1 to 100).toArray
+
+    var sampler = new BernoulliCellSampler[Int](0.4, 0.6)
+    sampler.setSeed(seed)
+    val s1 = sampler.sample(data.iterator).toArray
+    s1.length should be > 0
+
+    sampler = new BernoulliCellSampler[Int](0.4, 0.6, true)
+    sampler.setSeed(seed)
+    val s2 = sampler.sample(data.iterator).toArray
+    s2.length should be > 0
+
+    (s1 ++ s2).sorted should be (data)
+
+    sampler = new BernoulliCellSampler[Int](0.5, 0.5)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliCellSampler[Int](0.5, 0.5, true)
+    sampler.sample(data.iterator).toArray should be (data)
+  }
+
+  test("bernoulli partitioning clone") {
+    val seed = rngSeed.nextLong
+    val data = (1 to 100).toArray
+    val base = new BernoulliCellSampler[Int](0.35, 0.65)
+
+    var sampler = base.clone
+    sampler.setSeed(seed)
+    val s1 = sampler.sample(data.iterator).toArray
+    s1.length should be > 0
+
+    sampler = base.cloneComplement
+    sampler.setSeed(seed)
+    val s2 = sampler.sample(data.iterator).toArray
+    s2.length should be > 0
+
+    (s1 ++ s2).sorted should be (data)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/random/SamplingUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/random/SamplingUtilsSuite.scala
index accfe2e9b7f2a..73a9d029b0248 100644
--- a/core/src/test/scala/org/apache/spark/util/random/SamplingUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/SamplingUtilsSuite.scala
@@ -17,11 +17,32 @@
 
 package org.apache.spark.util.random
 
+import scala.util.Random
+
 import org.apache.commons.math3.distribution.{BinomialDistribution, PoissonDistribution}
 import org.scalatest.FunSuite
 
 class SamplingUtilsSuite extends FunSuite {
 
+  test("reservoirSampleAndCount") {
+    val input = Seq.fill(100)(Random.nextInt())
+
+    // input size < k
+    val (sample1, count1) = SamplingUtils.reservoirSampleAndCount(input.iterator, 150)
+    assert(count1 === 100)
+    assert(input === sample1.toSeq)
+
+    // input size == k
+    val (sample2, count2) = SamplingUtils.reservoirSampleAndCount(input.iterator, 100)
+    assert(count2 === 100)
+    assert(input === sample2.toSeq)
+
+    // input size > k
+    val (sample3, count3) = SamplingUtils.reservoirSampleAndCount(input.iterator, 10)
+    assert(count3 === 100)
+    assert(sample3.length === 10)
+  }
+
   test("computeFraction") {
     // test that the computed fraction guarantees enough data points
     // in the sample with a failure rate <= 0.0001
diff --git a/mllib/data/als/test.data b/data/mllib/als/test.data
similarity index 100%
rename from mllib/data/als/test.data
rename to data/mllib/als/test.data
diff --git a/data/kmeans_data.txt b/data/mllib/kmeans_data.txt
similarity index 100%
rename from data/kmeans_data.txt
rename to data/mllib/kmeans_data.txt
diff --git a/mllib/data/lr-data/random.data b/data/mllib/lr-data/random.data
similarity index 100%
rename from mllib/data/lr-data/random.data
rename to data/mllib/lr-data/random.data
diff --git a/data/lr_data.txt b/data/mllib/lr_data.txt
similarity index 100%
rename from data/lr_data.txt
rename to data/mllib/lr_data.txt
diff --git a/data/pagerank_data.txt b/data/mllib/pagerank_data.txt
similarity index 100%
rename from data/pagerank_data.txt
rename to data/mllib/pagerank_data.txt
diff --git a/mllib/data/ridge-data/lpsa.data b/data/mllib/ridge-data/lpsa.data
similarity index 100%
rename from mllib/data/ridge-data/lpsa.data
rename to data/mllib/ridge-data/lpsa.data
diff --git a/mllib/data/sample_libsvm_data.txt b/data/mllib/sample_libsvm_data.txt
similarity index 100%
rename from mllib/data/sample_libsvm_data.txt
rename to data/mllib/sample_libsvm_data.txt
diff --git a/mllib/data/sample_naive_bayes_data.txt b/data/mllib/sample_naive_bayes_data.txt
similarity index 100%
rename from mllib/data/sample_naive_bayes_data.txt
rename to data/mllib/sample_naive_bayes_data.txt
diff --git a/mllib/data/sample_svm_data.txt b/data/mllib/sample_svm_data.txt
similarity index 100%
rename from mllib/data/sample_svm_data.txt
rename to data/mllib/sample_svm_data.txt
diff --git a/mllib/data/sample_tree_data.csv b/data/mllib/sample_tree_data.csv
similarity index 100%
rename from mllib/data/sample_tree_data.csv
rename to data/mllib/sample_tree_data.csv
diff --git a/dev/audit-release/audit_release.py b/dev/audit-release/audit_release.py
index 230e900ecd4de..0b7069f6e116a 100755
--- a/dev/audit-release/audit_release.py
+++ b/dev/audit-release/audit_release.py
@@ -30,71 +30,84 @@
 import time
 import urllib2
 
-# Fill in release details here:
-RELEASE_URL = "http://people.apache.org/~pwendell/spark-1.0.0-rc1/"
-RELEASE_KEY = "9E4FE3AF"
-RELEASE_REPOSITORY = "https://repository.apache.org/content/repositories/orgapachespark-1006/"
-RELEASE_VERSION = "1.0.0"
+# Note: The following variables must be set before use!
+RELEASE_URL = "http://people.apache.org/~andrewor14/spark-1.1.1-rc1/"
+RELEASE_KEY = "XXXXXXXX" # Your 8-digit hex
+RELEASE_REPOSITORY = "https://repository.apache.org/content/repositories/orgapachespark-1033"
+RELEASE_VERSION = "1.1.1"
 SCALA_VERSION = "2.10.4"
 SCALA_BINARY_VERSION = "2.10"
-#
 
+# Do not set these
 LOG_FILE_NAME = "spark_audit_%s" % time.strftime("%h_%m_%Y_%I_%M_%S")
 LOG_FILE = open(LOG_FILE_NAME, 'w')
 WORK_DIR = "/tmp/audit_%s" % int(time.time())
 MAVEN_CMD = "mvn"
 GPG_CMD = "gpg"
+SBT_CMD = "sbt -Dsbt.log.noformat=true"
 
-print "Starting tests, log output in %s. Test results printed below:" % LOG_FILE_NAME
-
-# Track failures
+# Track failures to print them at the end
 failures = []
 
+# Log a message. Use sparingly because this flushes every write.
+def log(msg):
+    LOG_FILE.write(msg + "\n")
+    LOG_FILE.flush()
 
+def log_and_print(msg):
+    print msg
+    log(msg)
+
+# Prompt the user to delete the scratch directory used
 def clean_work_files():
-    print "OK to delete scratch directory '%s'? (y/N): " % WORK_DIR
-    response = raw_input()
+    response = raw_input("OK to delete scratch directory '%s'? (y/N) " % WORK_DIR)
     if response == "y":
         shutil.rmtree(WORK_DIR)
-    print "Should I delete the log output file '%s'? (y/N): " % LOG_FILE_NAME
-    response = raw_input()
-    if response == "y":
-        os.unlink(LOG_FILE_NAME)
-
 
+# Run the given command and log its output to the log file
 def run_cmd(cmd, exit_on_failure=True):
-    print >> LOG_FILE, "Running command: %s" % cmd
+    log("Running command: %s" % cmd)
     ret = subprocess.call(cmd, shell=True, stdout=LOG_FILE, stderr=LOG_FILE)
     if ret != 0 and exit_on_failure:
-        print "Command failed: %s" % cmd
+        log_and_print("Command failed: %s" % cmd)
         clean_work_files()
         sys.exit(-1)
     return ret
 
-
 def run_cmd_with_output(cmd):
-    print >> sys.stderr, "Running command: %s" % cmd
+    log_and_print("Running command: %s" % cmd)
     return subprocess.check_output(cmd, shell=True, stderr=LOG_FILE)
 
+# Test if the given condition is successful
+# If so, print the pass message; otherwise print the failure message
+def test(cond, msg):
+    return passed(msg) if cond else failed(msg)
 
-def test(bool, str):
-    if bool:
-        return passed(str)
-    failed(str)
-
-
-def passed(str):
-    print "[PASSED] %s" % str
-
-
-def failed(str):
-    failures.append(str)
-    print "[**FAILED**] %s" % str
+def passed(msg):
+    log_and_print("[PASSED] %s" % msg)
 
+def failed(msg):
+    failures.append(msg)
+    log_and_print("[**FAILED**] %s" % msg)
 
 def get_url(url):
     return urllib2.urlopen(url).read()
 
+# If the path exists, prompt the user to delete it
+# If the resource is not deleted, abort
+def ensure_path_not_present(path):
+    full_path = os.path.expanduser(path)
+    if os.path.exists(full_path):
+        print "Found %s locally." % full_path
+        response = raw_input("This can interfere with testing published artifacts. OK to delete? (y/N) ")
+        if response == "y":
+            shutil.rmtree(full_path)
+        else:
+            print "Abort."
+            sys.exit(-1)
+
+log_and_print("|-------- Starting Spark audit tests for release %s --------|" % RELEASE_VERSION)
+log_and_print("Log output can be found in %s" % LOG_FILE_NAME)
 
 original_dir = os.getcwd()
 
@@ -105,7 +118,7 @@ def get_url(url):
     "spark-core", "spark-bagel", "spark-mllib", "spark-streaming", "spark-repl",
     "spark-graphx", "spark-streaming-flume", "spark-streaming-kafka",
     "spark-streaming-mqtt", "spark-streaming-twitter", "spark-streaming-zeromq",
-    "spark-catalyst", "spark-sql", "spark-hive"
+    "spark-catalyst", "spark-sql", "spark-hive", "spark-streaming-kinesis-asl"
 ]
 modules = map(lambda m: "%s_%s" % (m, SCALA_BINARY_VERSION), modules)
 
@@ -114,37 +127,36 @@ def get_url(url):
 cache_ivy_spark = "~/.ivy2/cache/org.apache.spark"
 local_maven_kafka = "~/.m2/repository/org/apache/kafka"
 local_maven_kafka = "~/.m2/repository/org/apache/spark"
-
-
-def ensure_path_not_present(x):
-    if os.path.exists(os.path.expanduser(x)):
-        print "Please remove %s, it can interfere with testing published artifacts." % x
-        sys.exit(-1)
-
 map(ensure_path_not_present, [local_ivy_spark, cache_ivy_spark, local_maven_kafka])
 
 # SBT build tests
+log_and_print("==== Building SBT modules ====")
 os.chdir("blank_sbt_build")
 os.environ["SPARK_VERSION"] = RELEASE_VERSION
 os.environ["SCALA_VERSION"] = SCALA_VERSION
 os.environ["SPARK_RELEASE_REPOSITORY"] = RELEASE_REPOSITORY
 os.environ["SPARK_AUDIT_MASTER"] = "local"
 for module in modules:
+    log("==== Building module %s in SBT ====" % module)
     os.environ["SPARK_MODULE"] = module
-    ret = run_cmd("sbt clean update", exit_on_failure=False)
-    test(ret == 0, "sbt build against '%s' module" % module)
+    ret = run_cmd("%s clean update" % SBT_CMD, exit_on_failure=False)
+    test(ret == 0, "SBT build against '%s' module" % module)
 os.chdir(original_dir)
 
 # SBT application tests
-for app in ["sbt_app_core", "sbt_app_graphx", "sbt_app_streaming", "sbt_app_sql", "sbt_app_hive"]:
+log_and_print("==== Building SBT applications ====")
+for app in ["sbt_app_core", "sbt_app_graphx", "sbt_app_streaming", "sbt_app_sql", "sbt_app_hive", "sbt_app_kinesis"]:
+    log("==== Building application %s in SBT ====" % app)
     os.chdir(app)
-    ret = run_cmd("sbt clean run", exit_on_failure=False)
-    test(ret == 0, "sbt application (%s)" % app)
+    ret = run_cmd("%s clean run" % SBT_CMD, exit_on_failure=False)
+    test(ret == 0, "SBT application (%s)" % app)
     os.chdir(original_dir)
 
 # Maven build tests
 os.chdir("blank_maven_build")
+log_and_print("==== Building Maven modules ====")
 for module in modules:
+    log("==== Building module %s in maven ====" % module)
     cmd = ('%s --update-snapshots -Dspark.release.repository="%s" -Dspark.version="%s" '
            '-Dspark.module="%s" clean compile' %
            (MAVEN_CMD, RELEASE_REPOSITORY, RELEASE_VERSION, module))
@@ -152,6 +164,8 @@ def ensure_path_not_present(x):
     test(ret == 0, "maven build against '%s' module" % module)
 os.chdir(original_dir)
 
+# Maven application tests
+log_and_print("==== Building Maven applications ====")
 os.chdir("maven_app_core")
 mvn_exec_cmd = ('%s --update-snapshots -Dspark.release.repository="%s" -Dspark.version="%s" '
                 '-Dscala.binary.version="%s" clean compile '
@@ -172,15 +186,14 @@ def ensure_path_not_present(x):
 artifact_regex = r = re.compile("<a href=\"(.*.tgz)\">")
 artifacts = r.findall(index_page)
 
+# Verify artifact integrity
 for artifact in artifacts:
-    print "==== Verifying download integrity for artifact: %s ====" % artifact
+    log_and_print("==== Verifying download integrity for artifact: %s ====" % artifact)
 
     artifact_url = "%s/%s" % (RELEASE_URL, artifact)
-    run_cmd("wget %s" % artifact_url)
-
     key_file = "%s.asc" % artifact
+    run_cmd("wget %s" % artifact_url)
     run_cmd("wget %s/%s" % (RELEASE_URL, key_file))
-
     run_cmd("wget %s%s" % (artifact_url, ".sha"))
 
     # Verify signature
@@ -208,31 +221,17 @@ def ensure_path_not_present(x):
 
     os.chdir(WORK_DIR)
 
-for artifact in artifacts:
-    print "==== Verifying build and tests for artifact: %s ====" % artifact
-    os.chdir(os.path.join(WORK_DIR, dir_name))
-
-    os.environ["MAVEN_OPTS"] = "-Xmx3g -XX:MaxPermSize=1g -XX:ReservedCodeCacheSize=1g"
-    # Verify build
-    print "==> Running build"
-    run_cmd("sbt assembly")
-    passed("sbt build successful")
-    run_cmd("%s package -DskipTests" % MAVEN_CMD)
-    passed("Maven build successful")
-
-    # Verify tests
-    print "==> Performing unit tests"
-    run_cmd("%s test" % MAVEN_CMD)
-    passed("Tests successful")
-    os.chdir(WORK_DIR)
-
-clean_work_files()
-
+# Report result
+log_and_print("\n")
 if len(failures) == 0:
-    print "ALL TESTS PASSED"
+    log_and_print("*** ALL TESTS PASSED ***")
 else:
-    print "SOME TESTS DID NOT PASS"
+    log_and_print("XXXXX SOME TESTS DID NOT PASS XXXXX")
     for f in failures:
-        print f
-
+        log_and_print("  %s" % f)
 os.chdir(original_dir)
+
+# Clean up
+clean_work_files()
+
+log_and_print("|-------- Spark release audit complete --------|")
diff --git a/dev/audit-release/blank_sbt_build/build.sbt b/dev/audit-release/blank_sbt_build/build.sbt
index 696c7f651837c..62815542e5bd9 100644
--- a/dev/audit-release/blank_sbt_build/build.sbt
+++ b/dev/audit-release/blank_sbt_build/build.sbt
@@ -19,10 +19,12 @@ name := "Spark Release Auditor"
 
 version := "1.0"
 
-scalaVersion := "2.9.3"
+scalaVersion := System.getenv.get("SCALA_VERSION")
 
 libraryDependencies += "org.apache.spark" % System.getenv.get("SPARK_MODULE") % System.getenv.get("SPARK_VERSION")
 
 resolvers ++= Seq(
   "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
+  "Eclipse Paho Repository" at "https://repo.eclipse.org/content/repositories/paho-releases/",
+  "Maven Repository" at "http://repo1.maven.org/maven2/",
   "Spray Repository" at "http://repo.spray.cc/")
diff --git a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
index 77bbd167b199a..fc03fec9866a6 100644
--- a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
+++ b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
@@ -50,5 +50,12 @@ object SimpleApp {
       println("Ganglia sink was loaded via spark-core")
       System.exit(-1)
     }
+
+    // Remove kinesis from default build due to ASL license issue
+    val foundKinesis = Try(Class.forName("org.apache.spark.streaming.kinesis.KinesisUtils")).isSuccess
+    if (foundKinesis) {
+      println("Kinesis was loaded via spark-core")
+      System.exit(-1)
+    }
   }
 }
diff --git a/dev/audit-release/sbt_app_hive/build.sbt b/dev/audit-release/sbt_app_hive/build.sbt
index a0d4f25da5842..c8824f2b15e55 100644
--- a/dev/audit-release/sbt_app_hive/build.sbt
+++ b/dev/audit-release/sbt_app_hive/build.sbt
@@ -25,4 +25,5 @@ libraryDependencies += "org.apache.spark" %% "spark-hive" % System.getenv.get("S
 
 resolvers ++= Seq(
   "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
+  "Maven Repository" at "http://repo1.maven.org/maven2/",
   "Spray Repository" at "http://repo.spray.cc/")
diff --git a/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml b/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml
deleted file mode 100644
index 93b835813d535..0000000000000
--- a/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml
+++ /dev/null
@@ -1,213 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<configuration>
-
-<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files  -->
-<!-- that are implied by Hadoop setup variables.                                                -->
-<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive    -->
-<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
-<!-- resource).                                                                                 -->
-
-<!-- Hive Execution Parameters -->
-
-<property name="build.dir" value="build" />
-
-<property> 
-  <name>build.dir</name>
-  <value>${user.dir}/build</value>
-</property>
-
-<property> 
-  <name>build.dir.hive</name>
-  <value>${build.dir}/hive</value>
-</property>
-
-<property>
-  <name>hadoop.tmp.dir</name>
-  <value>${build.dir.hive}/test/hadoop-${user.name}</value>
-  <description>A base for other temporary directories.</description>
-</property>
-
-<!--
-<property>
-  <name>hive.exec.reducers.max</name>
-  <value>1</value>
-  <description>maximum number of reducers</description>
-</property>
--->
-
-<property>
-  <name>hive.exec.scratchdir</name>
-  <value>${build.dir}/scratchdir</value>
-  <description>Scratch space for Hive jobs</description>
-</property>
-
-<property>
-  <name>hive.exec.local.scratchdir</name>
-  <value>${build.dir}/localscratchdir/</value>
-  <description>Local scratch space for Hive jobs</description>
-</property>
-
-<property>
-  <name>javax.jdo.option.ConnectionURL</name>
-  <!-- note: variable substituion not working here because it's loaded by jdo, not Hive -->
-  <value>jdbc:derby:;databaseName=../build/test/junit_metastore_db;create=true</value>
-</property>
-
-<property>
-  <name>javax.jdo.option.ConnectionDriverName</name>
-  <value>org.apache.derby.jdbc.EmbeddedDriver</value>
-</property>
-
-<property>
-  <name>javax.jdo.option.ConnectionUserName</name>
-  <value>APP</value>
-</property>
-
-<property>
-  <name>javax.jdo.option.ConnectionPassword</name>
-  <value>mine</value>
-</property>
-
-<property>
-  <!--  this should eventually be deprecated since the metastore should supply this -->
-  <name>hive.metastore.warehouse.dir</name>
-  <value>${test.warehouse.dir}</value>
-  <description></description>
-</property>
-
-<property>
-  <name>hive.metastore.metadb.dir</name>
-  <value>${build.dir}/test/data/metadb/</value>
-  <description>
-  Required by metastore server or if the uris argument below is not supplied
-  </description>
-</property>
-
-<property>
-  <name>test.log.dir</name>
-  <value>${build.dir}/test/logs</value>
-  <description></description>
-</property>
-
-<property>
-  <name>test.src.dir</name>
-  <value>${build.dir}/src/test</value>
-  <description></description>
-</property>
-
-<!--
-<property>
-  <name>test.data.files</name>
-  <value>${user.dir}/../data/files</value>
-  <description></description>
-</property>
-
-<property>
-  <name>test.query.file1</name>
-  <value>file://${user.dir}/../ql/src/test/org/apache/hadoop/hive/ql/input2.q</value>
-  <value></value>
-  <description></description>
-</property>
--->
-
-<property>
-  <name>hive.jar.path</name>
-  <value>${build.dir.hive}/ql/hive-exec-${version}.jar</value>
-  <description></description>
-</property>
-
-<property>
-  <name>hive.metastore.rawstore.impl</name>
-  <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
-  <description>Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database</description>
-</property>
-
-<property>
-  <name>hive.querylog.location</name>
-  <value>${build.dir}/tmp</value>
-  <description>Location of the structured hive logs</description>
-</property>
-
-<!--
-<property>
-  <name>hive.exec.pre.hooks</name>
-  <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
-  <description>Pre Execute Hook for Tests</description>
-</property>
-<property>
-  <name>hive.exec.post.hooks</name>
-  <value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
-  <description>Post Execute Hook for Tests</description>
-</property>
--->
-
-<property>
-  <name>hive.task.progress</name>
-  <value>false</value>
-  <description>Track progress of a task</description>
-</property>
-
-<property>
-  <name>hive.support.concurrency</name>
-  <value>false</value>
-  <description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description>
-</property>
-
-<property>
-  <name>fs.pfile.impl</name>
-  <value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
-  <description>A proxy for local file system used for cross file system testing</description>
-</property>
-
-<property>
-  <name>hive.exec.mode.local.auto</name>
-  <value>false</value>
-  <description>
-    Let hive determine whether to run in local mode automatically
-    Disabling this for tests so that minimr is not affected
-  </description>
-</property>
-
-<property>
-  <name>hive.auto.convert.join</name>
-  <value>false</value>
-  <description>Whether Hive enable the optimization about converting common join into mapjoin based on the input file size</description>
-</property>
-
-<property>
-  <name>hive.ignore.mapjoin.hint</name>
-  <value>false</value>
-  <description>Whether Hive ignores the mapjoin hint</description>
-</property>
-
-<property>
-  <name>hive.input.format</name>
-  <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
-  <description>The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat. </description>
-</property>
-
-<property>
-  <name>hive.default.rcfile.serde</name>
-  <value>org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</value>
-  <description>The default SerDe hive will use for the rcfile format</description>
-</property>
-
-</configuration>
diff --git a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
index 7257d17d10116..5111bc0adb772 100644
--- a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
+++ b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.{ListBuffer, Queue}
 import org.apache.spark.SparkConf
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.hive.LocalHiveContext
+import org.apache.spark.sql.hive.HiveContext
 
 case class Person(name: String, age: Int)
 
@@ -34,13 +34,13 @@ object SparkSqlExample {
       case None => new SparkConf().setAppName("Simple Sql App")
     }
     val sc = new SparkContext(conf)
-    val hiveContext = new LocalHiveContext(sc)
+    val hiveContext = new HiveContext(sc)
 
     import hiveContext._
-    hql("DROP TABLE IF EXISTS src")
-    hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
-    hql("LOAD DATA LOCAL INPATH 'data.txt' INTO TABLE src")
-    val results = hql("FROM src SELECT key, value WHERE key >= 0 AND KEY < 5").collect()
+    sql("DROP TABLE IF EXISTS src")
+    sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+    sql("LOAD DATA LOCAL INPATH 'data.txt' INTO TABLE src")
+    val results = sql("FROM src SELECT key, value WHERE key >= 0 AND KEY < 5").collect()
     results.foreach(println)
     
     def test(f: => Boolean, failureMsg: String) = {
diff --git a/dev/audit-release/sbt_app_kinesis/build.sbt b/dev/audit-release/sbt_app_kinesis/build.sbt
new file mode 100644
index 0000000000000..981bc7957b5ed
--- /dev/null
+++ b/dev/audit-release/sbt_app_kinesis/build.sbt
@@ -0,0 +1,28 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+name := "Kinesis Test"
+
+version := "1.0"
+
+scalaVersion := System.getenv.get("SCALA_VERSION")
+
+libraryDependencies += "org.apache.spark" %% "spark-streaming-kinesis-asl" % System.getenv.get("SPARK_VERSION")
+
+resolvers ++= Seq(
+  "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
+  "Spray Repository" at "http://repo.spray.cc/")
diff --git a/dev/audit-release/sbt_app_kinesis/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_kinesis/src/main/scala/SparkApp.scala
new file mode 100644
index 0000000000000..9f85066501472
--- /dev/null
+++ b/dev/audit-release/sbt_app_kinesis/src/main/scala/SparkApp.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package main.scala
+
+import scala.util.Try
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+
+object SimpleApp {
+  def main(args: Array[String]) {
+    val foundKinesis = Try(Class.forName("org.apache.spark.streaming.kinesis.KinesisUtils")).isSuccess
+    if (!foundKinesis) {
+      println("Kinesis not loaded via kinesis-asl")
+      System.exit(-1)
+    }
+  }
+}
diff --git a/dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala b/dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala
index 50af90c213b5a..d888de929fdda 100644
--- a/dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala
+++ b/dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala
@@ -38,7 +38,7 @@ object SparkSqlExample {
 
     import sqlContext._
     val people = sc.makeRDD(1 to 100, 10).map(x => Person(s"Name$x", x))
-    people.registerAsTable("people")
+    people.registerTempTable("people")
     val teenagers = sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
     val teenagerNames = teenagers.map(t => "Name: " + t(0)).collect()
     teenagerNames.foreach(println)
diff --git a/dev/change-version-to-2.10.sh b/dev/change-version-to-2.10.sh
new file mode 100755
index 0000000000000..7473c20d28e09
--- /dev/null
+++ b/dev/change-version-to-2.10.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+find . -name 'pom.xml' | grep -v target \
+  | xargs -I {} sed -i -e 's|\(artifactId.*\)_2.11|\1_2.10|g' {}  
diff --git a/dev/change-version-to-2.11.sh b/dev/change-version-to-2.11.sh
new file mode 100755
index 0000000000000..3957a9f3ba258
--- /dev/null
+++ b/dev/change-version-to-2.11.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+find . -name 'pom.xml' | grep -v target \
+  | xargs -I {} sed -i -e 's|\(artifactId.*\)_2.10|\1_2.11|g' {} 
diff --git a/dev/check-license b/dev/check-license
index fbd2dd465bb18..72b1013479964 100755
--- a/dev/check-license
+++ b/dev/check-license
@@ -20,57 +20,58 @@
 
 acquire_rat_jar () {
 
-  URL1="http://search.maven.org/remotecontent?filepath=org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
-  URL2="http://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
+  URL="http://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
+
+  JAR="$rat_jar"
 
-  JAR=$rat_jar
-  
   if [[ ! -f "$rat_jar" ]]; then
     # Download rat launch jar if it hasn't been downloaded yet
-    if [ ! -f ${JAR} ]; then
+    if [ ! -f "$JAR" ]; then
     # Download
     printf "Attempting to fetch rat\n"
-    JAR_DL=${JAR}.part
+    JAR_DL="${JAR}.part"
     if hash curl 2>/dev/null; then
-      (curl --progress-bar ${URL1} > ${JAR_DL} || curl --progress-bar ${URL2} > ${JAR_DL}) && mv ${JAR_DL} ${JAR}
+      curl --silent "${URL}" > "$JAR_DL" && mv "$JAR_DL" "$JAR"
     elif hash wget 2>/dev/null; then
-      (wget --progress=bar ${URL1} -O ${JAR_DL} || wget --progress=bar ${URL2} -O ${JAR_DL}) && mv ${JAR_DL} ${JAR}
+      wget --quiet ${URL} -O "$JAR_DL" && mv "$JAR_DL" "$JAR"
     else
       printf "You do not have curl or wget installed, please install rat manually.\n"
       exit -1
     fi
     fi
-    if [ ! -f ${JAR} ]; then
-    # We failed to download
-    printf "Our attempt to download rat locally to ${JAR} failed. Please install rat manually.\n"
-    exit -1
+
+    unzip -tq $JAR &> /dev/null
+    if [ $? -ne 0 ]; then
+      # We failed to download
+      printf "Our attempt to download rat locally to ${JAR} failed. Please install rat manually.\n"
+      exit -1
     fi
     printf "Launching rat from ${JAR}\n"
   fi
 }
 
 # Go to the Spark project root directory
-FWDIR="$(cd `dirname $0`/..; pwd)"
-cd $FWDIR
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
+cd "$FWDIR"
 
 if test -x "$JAVA_HOME/bin/java"; then
     declare java_cmd="$JAVA_HOME/bin/java"
-else 
+else
     declare java_cmd=java
 fi
 
 export RAT_VERSION=0.10
-export rat_jar=$FWDIR/lib/apache-rat-${RAT_VERSION}.jar
-mkdir -p $FWDIR/lib
+export rat_jar="$FWDIR"/lib/apache-rat-${RAT_VERSION}.jar
+mkdir -p "$FWDIR"/lib
 
 [[ -f "$rat_jar" ]] || acquire_rat_jar || {
     echo "Download failed. Obtain the rat jar manually and place it at $rat_jar"
     exit 1
 }
 
-$java_cmd -jar $rat_jar -E $FWDIR/.rat-excludes  -d $FWDIR > rat-results.txt
+$java_cmd -jar "$rat_jar" -E "$FWDIR"/.rat-excludes  -d "$FWDIR" > rat-results.txt
 
-ERRORS=$(cat rat-results.txt | grep -e "??")
+ERRORS="$(cat rat-results.txt | grep -e "??")"
 
 if test ! -z "$ERRORS"; then 
     echo "Could not find Apache license headers in the following files:"
diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh
index 49bf78f60763a..a6e90a15ee84b 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -27,6 +27,7 @@
 # Would be nice to add:
 #  - Send output to stderr and have useful logging in stdout
 
+# Note: The following variables must be set before use!
 GIT_USERNAME=${GIT_USERNAME:-pwendell}
 GIT_PASSWORD=${GIT_PASSWORD:-XXX}
 GPG_PASSPHRASE=${GPG_PASSPHRASE:-XXX}
@@ -35,6 +36,12 @@ RELEASE_VERSION=${RELEASE_VERSION:-1.0.0}
 RC_NAME=${RC_NAME:-rc2}
 USER_NAME=${USER_NAME:-pwendell}
 
+if [ -z "$JAVA_HOME" ]; then
+  echo "Error: JAVA_HOME is not set, cannot proceed."
+  exit -1
+fi
+JAVA_7_HOME=${JAVA_7_HOME:-$JAVA_HOME}
+
 set -e
 
 GIT_TAG=v$RELEASE_VERSION-$RC_NAME
@@ -53,15 +60,15 @@ if [[ ! "$@" =~ --package-only ]]; then
     -Dusername=$GIT_USERNAME -Dpassword=$GIT_PASSWORD \
     -Dmaven.javadoc.skip=true \
     -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
-    -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl\
     -Dtag=$GIT_TAG -DautoVersionSubmodules=true \
+    -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
     --batch-mode release:prepare
 
   mvn -DskipTests \
     -Darguments="-DskipTests=true -Dmaven.javadoc.skip=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \
     -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
     -Dmaven.javadoc.skip=true \
-    -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl\
+    -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
     release:perform
 
   cd ..
@@ -95,7 +102,7 @@ make_binary_release() {
   cp -r spark spark-$RELEASE_VERSION-bin-$NAME
   
   cd spark-$RELEASE_VERSION-bin-$NAME
-  ./make-distribution.sh $FLAGS --name $NAME --tgz
+  ./make-distribution.sh --name $NAME --tgz $FLAGS 2>&1 | tee ../binary-release-$NAME.log
   cd ..
   cp spark-$RELEASE_VERSION-bin-$NAME/spark-$RELEASE_VERSION-bin-$NAME.tgz .
   rm -rf spark-$RELEASE_VERSION-bin-$NAME
@@ -111,9 +118,14 @@ make_binary_release() {
     spark-$RELEASE_VERSION-bin-$NAME.tgz.sha
 }
 
-make_binary_release "hadoop1" "--with-hive --hadoop 1.0.4"
-make_binary_release "cdh4" "--with-hive --hadoop 2.0.0-mr1-cdh4.2.0"
-make_binary_release "hadoop2" "--with-hive --with-yarn --hadoop 2.2.0"
+make_binary_release "hadoop1" "-Phive -Phive-thriftserver -Dhadoop.version=1.0.4" &
+make_binary_release "cdh4" "-Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" &
+make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" &
+make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" &
+make_binary_release "hadoop2.4-without-hive" "-Phadoop-2.4 -Pyarn" &
+make_binary_release "mapr3" "-Pmapr3 -Phive -Phive-thriftserver" &
+make_binary_release "mapr4" "-Pmapr4 -Pyarn -Phive -Phive-thriftserver" &
+wait
 
 # Copy data
 echo "Copying release tarballs"
@@ -127,7 +139,8 @@ scp spark-* \
 cd spark
 sbt/sbt clean
 cd docs
-PRODUCTION=1 jekyll build
+# Compile docs with Java 7 to use nicer format
+JAVA_HOME=$JAVA_7_HOME PRODUCTION=1 jekyll build
 echo "Copying release documentation"
 rc_docs_folder=${rc_folder}-docs
 ssh $USER_NAME@people.apache.org \
diff --git a/dev/create-release/generate-changelist.py b/dev/create-release/generate-changelist.py
index de1b5d4ae1314..2e1a35a629342 100755
--- a/dev/create-release/generate-changelist.py
+++ b/dev/create-release/generate-changelist.py
@@ -125,8 +125,8 @@ def cleanup(ask=True):
             pr_num = [line.split()[1].lstrip("#") for line in body_lines if "Closes #" in line][0]
             github_url = "github.com/apache/spark/pull/%s" % pr_num
             day = time.strptime(date.split()[0], "%Y-%m-%d")
-            if day < SPARK_REPO_CHANGE_DATE1 or
-            (day < SPARK_REPO_CHANGE_DATE2 and pr_num < SPARK_REPO_PR_NUM_THRESH):
+            if (day < SPARK_REPO_CHANGE_DATE1 or
+                (day < SPARK_REPO_CHANGE_DATE2 and pr_num < SPARK_REPO_PR_NUM_THRESH)):
                 github_url = "github.com/apache/incubator-spark/pull/%s" % pr_num
 
         append_to_changelist("  %s" % subject)
diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py
new file mode 100755
index 0000000000000..8051080117062
--- /dev/null
+++ b/dev/github_jira_sync.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Utility for updating JIRA's with information about Github pull requests
+
+import json
+import os
+import re
+import sys
+import urllib2
+
+try:
+    import jira.client
+except ImportError:
+    print "This tool requires the jira-python library"
+    print "Install using 'sudo pip install jira-python'"
+    sys.exit(-1)
+
+# User facing configs
+GITHUB_API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
+JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
+JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "apachespark")
+JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "XXX")
+# Maximum number of updates to perform in one run
+MAX_UPDATES = int(os.environ.get("MAX_UPDATES", "100000"))
+# Cut-off for oldest PR on which to comment. Useful for avoiding
+# "notification overload" when running for the first time.
+MIN_COMMENT_PR = int(os.environ.get("MIN_COMMENT_PR", "1496"))
+
+# File used as an opitimization to store maximum previously seen PR
+# Used mostly because accessing ASF JIRA is slow, so we want to avoid checking
+# the state of JIRA's that are tied to PR's we've already looked at.
+MAX_FILE = ".github-jira-max"
+
+def get_url(url):
+    try:
+        return urllib2.urlopen(url)
+    except urllib2.HTTPError as e:
+        print "Unable to fetch URL, exiting: %s" % url
+        sys.exit(-1)
+
+def get_json(urllib_response):
+    return json.load(urllib_response)
+
+# Return a list of (JIRA id, JSON dict) tuples:
+# e.g. [('SPARK-1234', {.. json ..}), ('SPARK-5687', {.. json ..})}
+def get_jira_prs():
+    result = []
+    has_next_page = True
+    page_num = 0
+    while has_next_page:
+	page = get_url(GITHUB_API_BASE + "/pulls?page=%s&per_page=100" % page_num)
+	page_json = get_json(page)
+
+	for pull in page_json:
+	    jiras = re.findall("SPARK-[0-9]{4,5}", pull['title'])
+	    for jira in jiras:
+		result = result + [(jira,  pull)]
+
+	# Check if there is another page
+	link_header = filter(lambda k: k.startswith("Link"), page.info().headers)[0]
+	if not "next"in link_header:
+	    has_next_page = False
+	else:
+	    page_num = page_num + 1
+    return result
+
+def set_max_pr(max_val):
+    f = open(MAX_FILE, 'w')
+    f.write("%s" % max_val)
+    f.close()
+    print "Writing largest PR number seen: %s" % max_val
+
+def get_max_pr():
+    if os.path.exists(MAX_FILE):
+        result = int(open(MAX_FILE, 'r').read())
+        print "Read largest PR number previously seen: %s" % result
+        return result
+    else:
+        return 0
+
+jira_client = jira.client.JIRA({'server': JIRA_API_BASE},
+                                basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
+
+jira_prs = get_jira_prs()
+
+previous_max = get_max_pr()
+print "Retrieved %s JIRA PR's from Github" % len(jira_prs)
+jira_prs = [(k, v) for k, v in jira_prs if int(v['number']) > previous_max]
+print "%s PR's remain after excluding visted ones" % len(jira_prs)
+
+num_updates = 0
+considered = []
+for issue, pr in sorted(jira_prs, key=lambda (k, v): int(v['number'])):
+    if num_updates >= MAX_UPDATES:
+      break
+    pr_num = int(pr['number'])
+
+    print "Checking issue %s" % issue
+    considered = considered + [pr_num]
+
+    url = pr['html_url']
+    title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login']) 
+    try:
+      existing_links = map(lambda l: l.raw['object']['url'], jira_client.remote_links(issue))
+    except:
+      print "Failure reading JIRA %s (does it exist?)" % issue
+      print sys.exc_info()[0]
+      continue
+
+    if url in existing_links:
+        continue
+
+    icon = {"title": "Pull request #%s" % pr['number'], 
+      "url16x16": "https://assets-cdn.github.com/favicon.ico"}
+    destination = {"title": title, "url": url, "icon": icon}
+    # For all possible fields see:
+    # https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links     
+    # application = {"name": "Github pull requests", "type": "org.apache.spark.jira.github"} 
+    jira_client.add_remote_link(issue, destination)
+    
+    comment = "User '%s' has created a pull request for this issue:" % pr['user']['login']
+    comment = comment + ("\n%s" % pr['html_url'])
+    if pr_num >= MIN_COMMENT_PR:
+        jira_client.add_comment(issue, comment)
+    
+    print "Added link %s <-> PR #%s" % (issue, pr['number'])
+    num_updates = num_updates + 1
+
+if len(considered) > 0:
+    set_max_pr(max(considered))
diff --git a/dev/lint-python b/dev/lint-python
new file mode 100755
index 0000000000000..772f856154ae0
--- /dev/null
+++ b/dev/lint-python
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
+SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
+PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
+
+cd "$SPARK_ROOT_DIR"
+
+# Get pep8 at runtime so that we don't rely on it being installed on the build server.
+#+ See: https://github.com/apache/spark/pull/1744#issuecomment-50982162
+#+ TODOs:
+#+  - Dynamically determine latest release version of pep8 and use that.
+#+  - Download this from a more reliable source. (GitHub raw can be flaky, apparently. (?))
+PEP8_SCRIPT_PATH="$SPARK_ROOT_DIR/dev/pep8.py"
+PEP8_SCRIPT_REMOTE_PATH="https://raw.githubusercontent.com/jcrocholl/pep8/1.5.7/pep8.py"
+PEP8_PATHS_TO_CHECK="./python/pyspark/ ./ec2/spark_ec2.py ./examples/src/main/python/"
+
+curl --silent -o "$PEP8_SCRIPT_PATH" "$PEP8_SCRIPT_REMOTE_PATH"    
+curl_status=$?
+
+if [ $curl_status -ne 0 ]; then
+    echo "Failed to download pep8.py from \"$PEP8_SCRIPT_REMOTE_PATH\"."
+    exit $curl_status
+fi
+
+
+# There is no need to write this output to a file
+#+ first, but we do so so that the check status can
+#+ be output before the report, like with the
+#+ scalastyle and RAT checks.
+python "$PEP8_SCRIPT_PATH" $PEP8_PATHS_TO_CHECK > "$PEP8_REPORT_PATH"
+pep8_status=${PIPESTATUS[0]} #$?
+
+if [ $pep8_status -ne 0 ]; then
+    echo "PEP 8 checks failed."
+    cat "$PEP8_REPORT_PATH"
+else
+    echo "PEP 8 checks passed."
+fi
+
+rm "$PEP8_REPORT_PATH"
+rm "$PEP8_SCRIPT_PATH"
+
+exit $pep8_status
diff --git a/dev/lint-scala b/dev/lint-scala
new file mode 100755
index 0000000000000..c676dfdf4f44e
--- /dev/null
+++ b/dev/lint-scala
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
+SPARK_ROOT_DIR="$(dirname $SCRIPT_DIR)"
+
+"$SCRIPT_DIR/scalastyle"
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index c44320239bbbf..02ac20984add9 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -29,7 +29,6 @@
 import re
 import subprocess
 import sys
-import tempfile
 import urllib2
 
 try:
@@ -39,7 +38,7 @@
     JIRA_IMPORTED = False
 
 # Location of your Spark git development area
-SPARK_HOME = os.environ.get("SPARK_HOME", "/home/patrick/Documents/spark")
+SPARK_HOME = os.environ.get("SPARK_HOME", os.getcwd())
 # Remote name which points to the Gihub site
 PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME", "apache-github")
 # Remote name which points to Apache git
@@ -47,7 +46,7 @@
 # ASF JIRA username
 JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "pwendell")
 # ASF JIRA password
-JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "1234")
+JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "35500")
 
 GITHUB_BASE = "https://github.com/apache/spark/pull"
 GITHUB_API_BASE = "https://api.github.com/repos/apache/spark"
@@ -74,6 +73,7 @@ def fail(msg):
 
 
 def run_cmd(cmd):
+    print cmd
     if isinstance(cmd, list):
         return subprocess.check_output(cmd)
     else:
@@ -129,7 +129,7 @@ def merge_pr(pr_num, target_ref):
     merge_message_flags = []
 
     merge_message_flags += ["-m", title]
-    if body != None:
+    if body is not None:
         # We remove @ symbols from the body to avoid triggering e-mails
         # to people every time someone creates a public fork of Spark.
         merge_message_flags += ["-m", body.replace("@", "")]
@@ -179,7 +179,14 @@ def cherry_pick(pr_num, merge_hash, default_branch):
 
     run_cmd("git fetch %s %s:%s" % (PUSH_REMOTE_NAME, pick_ref, pick_branch_name))
     run_cmd("git checkout %s" % pick_branch_name)
-    run_cmd("git cherry-pick -sx %s" % merge_hash)
+
+    try:
+        run_cmd("git cherry-pick -sx %s" % merge_hash)
+    except Exception as e:
+        msg = "Error cherry-picking: %s\nWould you like to manually fix-up this merge?" % e
+        continue_maybe(msg)
+        msg = "Okay, please fix any conflicts and finish the cherry-pick. Finished?"
+        continue_maybe(msg)
 
     continue_maybe("Pick complete (local ref %s). Push to %s?" % (
         pick_branch_name, PUSH_REMOTE_NAME))
@@ -280,6 +287,7 @@ def get_version_json(version_str):
 
 pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ")
 pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
+pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
 
 url = pr["url"]
 title = pr["title"]
@@ -289,19 +297,23 @@ def get_version_json(version_str):
 base_ref = pr["head"]["ref"]
 pr_repo_desc = "%s/%s" % (user_login, base_ref)
 
-if pr["merged"] is True:
+# Merged pull requests don't appear as merged in the GitHub API;
+# Instead, they're closed by asfgit.
+merge_commits = \
+    [e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"]
+
+if merge_commits:
+    merge_hash = merge_commits[0]["commit_id"]
+    message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]
+
     print "Pull request %s has already been merged, assuming you want to backport" % pr_num
-    merge_commit_desc = run_cmd([
-        'git', 'log', '--merges', '--first-parent',
-        '--grep=pull request #%s' % pr_num, '--oneline']).split("\n")[0]
-    if merge_commit_desc == "":
+    commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
+                                    "%s^{commit}" % merge_hash]).strip() != ""
+    if not commit_is_downloaded:
         fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)
 
-    merge_hash = merge_commit_desc[:7]
-    message = merge_commit_desc[8:]
-
-    print "Found: %s" % message
-    maybe_cherry_pick(pr_num, merge_hash, latest_branch)
+    print "Found commit %s:\n%s" % (merge_hash, message)
+    cherry_pick(pr_num, merge_hash, latest_branch)
     sys.exit(0)
 
 if not bool(pr["mergeable"]):
@@ -323,9 +335,13 @@ def get_version_json(version_str):
     merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)]
 
 if JIRA_IMPORTED:
-    continue_maybe("Would you like to update an associated JIRA?")
-    jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
-    resolve_jira(title, merged_refs, jira_comment)
+    if JIRA_USERNAME and JIRA_PASSWORD:
+        continue_maybe("Would you like to update an associated JIRA?")
+        jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
+        resolve_jira(title, merged_refs, jira_comment)
+    else:
+        print "JIRA_USERNAME and JIRA_PASSWORD not set"
+        print "Exiting without trying to close the associated JIRA."
 else:
     print "Could not find jira-python library. Run 'sudo pip install jira-python' to install."
     print "Exiting without trying to close the associated JIRA."
diff --git a/dev/mima b/dev/mima
index 7857294f61caf..40603166c21ae 100755
--- a/dev/mima
+++ b/dev/mima
@@ -21,13 +21,23 @@ set -o pipefail
 set -e
 
 # Go to the Spark project root directory
-FWDIR="$(cd `dirname $0`/..; pwd)"
-cd $FWDIR
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
+cd "$FWDIR"
 
 echo -e "q\n" | sbt/sbt oldDeps/update
+rm -f .generated-mima*
 
-export SPARK_CLASSPATH=`find lib_managed \( -name '*spark*jar' -a -type f \) -printf "%p:" `
+# Generate Mima Ignore is called twice, first with latest built jars 
+# on the classpath and then again with previous version jars on the classpath.
+# Because of a bug in GenerateMIMAIgnore that when old jars are ahead on classpath
+# it did not process the new classes (which are in assembly jar). 
 ./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
+
+export SPARK_CLASSPATH="`find lib_managed \( -name '*spark*jar' -a -type f \) | tr "\\n" ":"`"
+echo "SPARK_CLASSPATH=$SPARK_CLASSPATH"
+
+./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
+
 echo -e "q\n" | sbt/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
 ret_val=$?
 
diff --git a/dev/run-tests b/dev/run-tests
index d9df020f7563c..328a73bd8b26d 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -18,67 +18,201 @@
 #
 
 # Go to the Spark project root directory
-FWDIR="$(cd `dirname $0`/..; pwd)"
-cd $FWDIR
-
-export SPARK_HADOOP_VERSION=2.3.0
-export SPARK_YARN=true
+FWDIR="$(cd "`dirname $0`"/..; pwd)"
+cd "$FWDIR"
 
 # Remove work directory
 rm -rf ./work
 
-if test -x "$JAVA_HOME/bin/java"; then
-    declare java_cmd="$JAVA_HOME/bin/java"
-else 
-    declare java_cmd=java
-fi
-JAVA_VERSION=$($java_cmd -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
-[ "$JAVA_VERSION" -ge 18 ] && echo "" || echo "[Warn] Java 8 tests will not run because JDK version is < 1.8."
+source "$FWDIR/dev/run-tests-codes.sh"
+
+CURRENT_BLOCK=$BLOCK_GENERAL
+
+function handle_error () {
+  echo "[error] Got a return code of $? on line $1 of the run-tests script."
+  exit $CURRENT_BLOCK
+}
+
+
+# Build against the right verison of Hadoop.
+{
+  if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
+    if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then
+      export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4"
+    elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
+      export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
+    elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
+      export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0"
+    elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
+      export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
+    fi
+  fi
+
+  if [ -z "$SBT_MAVEN_PROFILES_ARGS" ]; then
+    export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
+  fi
+}
+
+export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl"
 
-# Partial solution for SPARK-1455. Only run Hive tests if there are sql changes.
+# Determine Java path and version.
+{
+  if test -x "$JAVA_HOME/bin/java"; then
+      declare java_cmd="$JAVA_HOME/bin/java"
+  else 
+      declare java_cmd=java
+  fi
+  
+  # We can't use sed -r -e due to OS X / BSD compatibility; hence, all the parentheses.
+  JAVA_VERSION=$(
+    $java_cmd -version 2>&1 \
+    | grep -e "^java version" --max-count=1 \
+    | sed "s/java version \"\(.*\)\.\(.*\)\.\(.*\)\"/\1\2/"
+  )
+  
+  if [ "$JAVA_VERSION" -lt 18 ]; then
+    echo "[warn] Java 8 tests will not run because JDK version is < 1.8."
+  fi
+}
+
+# Only run Hive tests if there are sql changes.
+# Partial solution for SPARK-1455.
 if [ -n "$AMPLAB_JENKINS" ]; then
   git fetch origin master:master
-  diffs=`git diff --dirstat master | awk '{ print $2; }' | grep "^sql/"`
-  if [ -n "$diffs" ]; then
-    echo "Detected changes in SQL. Will run Hive test suite."
-    export _RUN_SQL_TESTS=true # exported for PySpark tests
+  
+  sql_diffs=$(
+    git diff --name-only master \
+    | grep -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
+  )
+
+  non_sql_diffs=$(
+    git diff --name-only master \
+    | grep -v -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
+  )
+
+  if [ -n "$sql_diffs" ]; then
+    echo "[info] Detected changes in SQL. Will run Hive test suite."
+    _RUN_SQL_TESTS=true
+    
+    if [ -z "$non_sql_diffs" ]; then
+      echo "[info] Detected no changes except in SQL. Will only run SQL tests."
+      _SQL_TESTS_ONLY=true
+    fi
   fi
 fi
 
-# Fail fast
-set -e
 set -o pipefail
+trap 'handle_error $LINENO' ERR
 
+echo ""
 echo "========================================================================="
 echo "Running Apache RAT checks"
 echo "========================================================================="
-dev/check-license
 
+CURRENT_BLOCK=$BLOCK_RAT
+
+./dev/check-license
+
+echo ""
 echo "========================================================================="
 echo "Running Scala style checks"
 echo "========================================================================="
-dev/scalastyle
 
+CURRENT_BLOCK=$BLOCK_SCALA_STYLE
+
+./dev/lint-scala
+
+echo ""
+echo "========================================================================="
+echo "Running Python style checks"
+echo "========================================================================="
+
+CURRENT_BLOCK=$BLOCK_PYTHON_STYLE
+
+./dev/lint-python
+
+echo ""
+echo "========================================================================="
+echo "Building Spark"
+echo "========================================================================="
+
+CURRENT_BLOCK=$BLOCK_BUILD
+
+{
+
+  # NOTE: echo "q" is needed because sbt on encountering a build file with failure
+  #+ (either resolution or compilation) prompts the user for input either q, r, etc
+  #+ to quit or retry. This echo is there to make it not block.
+  # NOTE: Do not quote $BUILD_MVN_PROFILE_ARGS or else it will be interpreted as a
+  #+ single argument!
+  # QUESTION: Why doesn't 'yes "q"' work?
+  # QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
+  # First build with 0.12 to ensure patches do not break the hive 12 build
+  HIVE_12_BUILD_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver -Phive-0.12.0"
+  echo "[info] Compile with hive 0.12"
+  echo -e "q\n" \
+    | sbt/sbt $HIVE_12_BUILD_ARGS clean hive/compile hive-thriftserver/compile \
+    | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
+
+  # Then build with default version(0.13.1) because tests are based on this version
+  echo "[info] Building Spark with these arguments: $SBT_MAVEN_PROFILES_ARGS"\
+    " -Phive -Phive-thriftserver"
+  echo -e "q\n" \
+    | sbt/sbt $SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver package assembly/assembly  \
+    | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
+}
+
+echo ""
 echo "========================================================================="
 echo "Running Spark unit tests"
 echo "========================================================================="
-# echo "q" is needed because sbt on encountering a build file with failure 
-# (either resolution or compilation) prompts the user for input either q, r, 
-# etc to quit or retry. This echo is there to make it not block.
-if [ -n "$_RUN_SQL_TESTS" ]; then
-  echo -e "q\n" | SPARK_HIVE=true sbt/sbt clean assembly test | \
-    grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
-else
-  echo -e "q\n" | sbt/sbt clean assembly test | \
-    grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
-fi
 
+CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
+
+{
+  # If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled.
+  # This must be a single argument, as it is.
+  if [ -n "$_RUN_SQL_TESTS" ]; then
+    SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver"
+  fi
+  
+  if [ -n "$_SQL_TESTS_ONLY" ]; then
+    # This must be an array of individual arguments. Otherwise, having one long string
+    #+ will be interpreted as a single test, which doesn't work.
+    SBT_MAVEN_TEST_ARGS=("catalyst/test" "sql/test" "hive/test" "mllib/test")
+  else
+    SBT_MAVEN_TEST_ARGS=("test")
+  fi
+  
+  echo "[info] Running Spark tests with these arguments: $SBT_MAVEN_PROFILES_ARGS ${SBT_MAVEN_TEST_ARGS[@]}"
+  
+  # NOTE: echo "q" is needed because sbt on encountering a build file with failure
+  #+ (either resolution or compilation) prompts the user for input either q, r, etc
+  #+ to quit or retry. This echo is there to make it not block.
+  # NOTE: Do not quote $SBT_MAVEN_PROFILES_ARGS or else it will be interpreted as a 
+  #+ single argument!
+  #+ "${SBT_MAVEN_TEST_ARGS[@]}" is cool because it's an array.
+  # QUESTION: Why doesn't 'yes "q"' work?
+  # QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
+  echo -e "q\n" \
+    | sbt/sbt $SBT_MAVEN_PROFILES_ARGS "${SBT_MAVEN_TEST_ARGS[@]}" \
+    | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
+}
+
+echo ""
 echo "========================================================================="
 echo "Running PySpark tests"
 echo "========================================================================="
+
+CURRENT_BLOCK=$BLOCK_PYSPARK_UNIT_TESTS
+
 ./python/run-tests
 
+echo ""
 echo "========================================================================="
 echo "Detecting binary incompatibilites with MiMa"
 echo "========================================================================="
-dev/mima
+
+CURRENT_BLOCK=$BLOCK_MIMA
+
+./dev/mima
diff --git a/dev/run-tests-codes.sh b/dev/run-tests-codes.sh
new file mode 100644
index 0000000000000..1348e0609dda4
--- /dev/null
+++ b/dev/run-tests-codes.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+readonly BLOCK_GENERAL=10
+readonly BLOCK_RAT=11
+readonly BLOCK_SCALA_STYLE=12
+readonly BLOCK_PYTHON_STYLE=13
+readonly BLOCK_BUILD=14
+readonly BLOCK_SPARK_UNIT_TESTS=15
+readonly BLOCK_PYSPARK_UNIT_TESTS=16
+readonly BLOCK_MIMA=17
diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins
new file mode 100755
index 0000000000000..6a849e4f77207
--- /dev/null
+++ b/dev/run-tests-jenkins
@@ -0,0 +1,243 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Wrapper script that runs the Spark tests then reports QA results
+# to github via its API.
+# Environment variables are populated by the code here:
+#+ https://github.com/jenkinsci/ghprb-plugin/blob/master/src/main/java/org/jenkinsci/plugins/ghprb/GhprbTrigger.java#L139
+
+# Go to the Spark project root directory
+FWDIR="$(cd `dirname $0`/..; pwd)"
+cd "$FWDIR"
+
+source "$FWDIR/dev/run-tests-codes.sh"
+
+COMMENTS_URL="https://api.github.com/repos/apache/spark/issues/$ghprbPullId/comments"
+PULL_REQUEST_URL="https://github.com/apache/spark/pull/$ghprbPullId"
+
+# Important Environment Variables
+# ---
+# $ghprbActualCommit
+#+  This is the hash of the most recent commit in the PR.
+#+  The merge-base of this and master is the commit from which the PR was branched.
+# $sha1
+#+  If the patch merges cleanly, this is a reference to the merge commit hash
+#+    (e.g. "origin/pr/2606/merge").
+#+  If the patch does not merge cleanly, it is equal to $ghprbActualCommit.
+#+  The merge-base of this and master in the case of a clean merge is the most recent commit
+#+    against master.
+
+COMMIT_URL="https://github.com/apache/spark/commit/${ghprbActualCommit}"
+# GitHub doesn't auto-link short hashes when submitted via the API, unfortunately. :(
+SHORT_COMMIT_HASH="${ghprbActualCommit:0:7}"
+
+TESTS_TIMEOUT="120m" # format: http://linux.die.net/man/1/timeout
+
+function post_message () {
+  local message=$1
+  local data="{\"body\": \"$message\"}"
+  local HTTP_CODE_HEADER="HTTP Response Code: "
+
+  echo "Attempting to post to Github..."
+
+  local curl_output=$(
+    curl `#--dump-header -` \
+      --silent \
+      --user x-oauth-basic:$GITHUB_OAUTH_KEY \
+      --request POST \
+      --data "$data" \
+      --write-out "${HTTP_CODE_HEADER}%{http_code}\n" \
+      --header "Content-Type: application/json" \
+      "$COMMENTS_URL" #> /dev/null #| "$FWDIR/dev/jq" .id #| head -n 8
+  )
+  local curl_status=${PIPESTATUS[0]}
+
+  if [ "$curl_status" -ne 0 ]; then
+      echo "Failed to post message to GitHub." >&2
+      echo " > curl_status: ${curl_status}" >&2
+      echo " > curl_output: ${curl_output}" >&2
+      echo " > data: ${data}" >&2
+      # exit $curl_status
+  fi
+
+  local api_response=$(
+    echo "${curl_output}" \
+    | grep -v -e "^${HTTP_CODE_HEADER}"
+  )
+
+  local http_code=$(
+    echo "${curl_output}" \
+    | grep -e "^${HTTP_CODE_HEADER}" \
+    | sed -r -e "s/^${HTTP_CODE_HEADER}//g"
+  )
+
+  if [ -n "$http_code" ] && [ "$http_code" -ne "201" ]; then
+      echo " > http_code: ${http_code}." >&2
+      echo " > api_response: ${api_response}" >&2
+      echo " > data: ${data}" >&2
+  fi
+
+  if [ "$curl_status" -eq 0 ] && [ "$http_code" -eq "201" ]; then
+    echo " > Post successful."
+  fi
+}
+
+function send_archived_logs () {
+  echo "Archiving unit tests logs..."
+
+  local log_files=$(
+    find .\
+      -name "unit-tests.log" -o\
+      -path "./sql/hive/target/HiveCompatibilitySuite.failed" -o\
+      -path "./sql/hive/target/HiveCompatibilitySuite.hiveFailed" -o\
+      -path "./sql/hive/target/HiveCompatibilitySuite.wrong"
+  )
+
+  if [ -z "$log_files" ]; then
+    echo "> No log files found." >&2
+  else
+    local log_archive="unit-tests-logs.tar.gz"
+    echo "$log_files" | xargs tar czf ${log_archive}
+
+    local jenkins_build_dir=${JENKINS_HOME}/jobs/${JOB_NAME}/builds/${BUILD_NUMBER}
+    local scp_output=$(scp ${log_archive} amp-jenkins-master:${jenkins_build_dir}/${log_archive})
+    local scp_status="$?"
+
+    if [ "$scp_status" -ne 0 ]; then
+      echo "Failed to send archived unit tests logs to Jenkins master." >&2
+      echo "> scp_status: ${scp_status}" >&2
+      echo "> scp_output: ${scp_output}" >&2
+    else
+      echo "> Send successful."
+    fi
+
+    rm -f ${log_archive}
+  fi
+}
+
+
+# We diff master...$ghprbActualCommit because that gets us changes introduced in the PR
+#+ and not anything else added to master since the PR was branched.
+
+# check PR merge-ability and check for new public classes
+{
+  if [ "$sha1" == "$ghprbActualCommit" ]; then
+    merge_note=" * This patch **does not merge cleanly**."
+  else
+    merge_note=" * This patch merges cleanly."
+  fi
+
+  source_files=$(
+      git diff master...$ghprbActualCommit --name-only  `# diff patch against master from branch point` \
+    | grep -v -e "\/test"                               `# ignore files in test directories` \
+    | grep -e "\.py$" -e "\.java$" -e "\.scala$"        `# include only code files` \
+    | tr "\n" " "
+  )
+  new_public_classes=$(
+      git diff master...$ghprbActualCommit ${source_files}      `# diff patch against master from branch point` \
+    | grep "^\+"                              `# filter in only added lines` \
+    | sed -r -e "s/^\+//g"                    `# remove the leading +` \
+    | grep -e "trait " -e "class "            `# filter in lines with these key words` \
+    | grep -e "{" -e "("                      `# filter in lines with these key words, too` \
+    | grep -v -e "\@\@" -e "private"          `# exclude lines with these words` \
+    | grep -v -e "^// " -e "^/\*" -e "^ \* "  `# exclude comment lines` \
+    | sed -r -e "s/\{.*//g"                   `# remove from the { onwards` \
+    | sed -r -e "s/\}//g"                     `# just in case, remove }; they mess the JSON` \
+    | sed -r -e "s/\"/\\\\\"/g"               `# escape double quotes; they mess the JSON` \
+    | sed -r -e "s/^(.*)$/\`\1\`/g"           `# surround with backticks for style` \
+    | sed -r -e "s/^/  \* /g"                 `# prepend '  *' to start of line` \
+    | sed -r -e "s/$/\\\n/g"                  `# append newline to end of line` \
+    | tr -d "\n"                              `# remove actual LF characters`
+  )
+
+  if [ -z "$new_public_classes" ]; then
+    public_classes_note=" * This patch adds no public classes."
+  else
+    public_classes_note=" * This patch adds the following public classes _(experimental)_:"
+    public_classes_note="${public_classes_note}\n${new_public_classes}"
+  fi
+}
+
+# post start message
+{
+  start_message="\
+  [Test build ${BUILD_DISPLAY_NAME} has started](${BUILD_URL}consoleFull) for \
+  PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL})."
+
+  start_message="${start_message}\n${merge_note}"
+  # start_message="${start_message}\n${public_classes_note}"
+
+  post_message "$start_message"
+}
+
+# run tests
+{
+  timeout "${TESTS_TIMEOUT}" ./dev/run-tests
+  test_result="$?"
+
+  if [ "$test_result" -eq "124" ]; then
+    fail_message="**[Test build ${BUILD_DISPLAY_NAME} timed out](${BUILD_URL}consoleFull)** \
+    for PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL}) \
+    after a configured wait of \`${TESTS_TIMEOUT}\`."
+
+    post_message "$fail_message"
+    exit $test_result
+  elif [ "$test_result" -eq "0" ]; then
+    test_result_note=" * This patch **passes all tests**."
+  else
+    if [ "$test_result" -eq "$BLOCK_GENERAL" ]; then
+      failing_test="some tests"
+    elif [ "$test_result" -eq "$BLOCK_RAT" ]; then
+      failing_test="RAT tests"
+    elif [ "$test_result" -eq "$BLOCK_SCALA_STYLE" ]; then
+      failing_test="Scala style tests"
+    elif [ "$test_result" -eq "$BLOCK_PYTHON_STYLE" ]; then
+      failing_test="Python style tests"
+    elif [ "$test_result" -eq "$BLOCK_BUILD" ]; then
+      failing_test="to build"
+    elif [ "$test_result" -eq "$BLOCK_SPARK_UNIT_TESTS" ]; then
+      failing_test="Spark unit tests"
+    elif [ "$test_result" -eq "$BLOCK_PYSPARK_UNIT_TESTS" ]; then
+      failing_test="PySpark unit tests"
+    elif [ "$test_result" -eq "$BLOCK_MIMA" ]; then
+      failing_test="MiMa tests"
+    else
+      failing_test="some tests"
+    fi
+
+    test_result_note=" * This patch **fails $failing_test**."
+  fi
+
+  send_archived_logs
+}
+
+# post end message
+{
+  result_message="\
+  [Test build ${BUILD_DISPLAY_NAME} has finished](${BUILD_URL}consoleFull) for \
+  PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL})."
+
+  result_message="${result_message}\n${test_result_note}"
+  result_message="${result_message}\n${merge_note}"
+  result_message="${result_message}\n${public_classes_note}"
+
+  post_message "$result_message"
+}
+
+exit $test_result
diff --git a/dev/scalastyle b/dev/scalastyle
index 0e8fd5cc8d64c..c3c6012e74ffa 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -17,18 +17,20 @@
 # limitations under the License.
 #
 
-echo -e "q\n" | SPARK_HIVE=true sbt/sbt scalastyle > scalastyle.txt
+echo -e "q\n" | sbt/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt
 # Check style with YARN alpha built too
-echo -e "q\n" | SPARK_HADOOP_VERSION=0.23.9 SPARK_YARN=true sbt/sbt yarn-alpha/scalastyle \
+echo -e "q\n" | sbt/sbt -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.9 yarn-alpha/scalastyle \
   >> scalastyle.txt
 # Check style with YARN built too
-echo -e "q\n" | SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt/sbt yarn/scalastyle \
+echo -e "q\n" | sbt/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 yarn/scalastyle \
   >> scalastyle.txt
 
-ERRORS=$(cat scalastyle.txt | grep -e "\<error\>")
+ERRORS=$(cat scalastyle.txt | awk '{if($1~/error/)print}')
+rm scalastyle.txt
+
 if test ! -z "$ERRORS"; then
     echo -e "Scalastyle checks failed at following occurrences:\n$ERRORS"
     exit 1
 else
-    echo -e "Scalastyle checks passed.\n"
+    echo -e "Scalastyle checks passed."
 fi
diff --git a/docs/README.md b/docs/README.md
index fd7ba4e0d72ea..d2d58e435d4c4 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -20,17 +20,21 @@ In this directory you will find textfiles formatted using Markdown, with an ".md
 read those text files directly if you want. Start with index.md.
 
 The markdown code can be compiled to HTML using the [Jekyll tool](http://jekyllrb.com).
-To use the `jekyll` command, you will need to have Jekyll installed. 
-The easiest way to do this is via a Ruby Gem, see the 
-[jekyll installation instructions](http://jekyllrb.com/docs/installation).
-If not already installed, you need to install `kramdown` with `sudo gem install kramdown`.
+`Jekyll` and a few dependencies must be installed for this to work. We recommend
+installing via the Ruby Gem dependency manager. Since the exact HTML output 
+varies between versions of Jekyll and its dependencies, we list specific versions here
+in some cases:
+
+    $ sudo gem install jekyll
+    $ sudo gem install jekyll-redirect-from
+
 Execute `jekyll` from the `docs/` directory. Compiling the site with Jekyll will create a directory
 called `_site` containing index.html as well as the rest of the compiled files.
 
 You can modify the default Jekyll build as follows:
 
     # Skip generating API docs (which takes a while)
-    $ SKIP_SCALADOC=1 jekyll build
+    $ SKIP_API=1 jekyll build
     # Serve content locally on port 4000
     $ jekyll serve --watch
     # Build the site with extra features used on the live page
@@ -49,19 +53,19 @@ phase, use the following sytax:
     // supported languages too.
     {% endhighlight %}
 
-## API Docs (Scaladoc and Epydoc)
+## API Docs (Scaladoc and Sphinx)
 
 You can build just the Spark scaladoc by running `sbt/sbt doc` from the SPARK_PROJECT_ROOT directory.
 
-Similarly, you can build just the PySpark epydoc by running `epydoc --config epydoc.conf` from the
-SPARK_PROJECT_ROOT/pyspark directory. Documentation is only generated for classes that are listed as
+Similarly, you can build just the PySpark docs by running `make html` from the
+SPARK_PROJECT_ROOT/python/docs directory. Documentation is only generated for classes that are listed as
 public in `__init__.py`.
 
 When you run `jekyll` in the `docs` directory, it will also copy over the scaladoc for the various
 Spark subprojects into the `docs` directory (and then also into the `_site` directory). We use a
 jekyll plugin to run `sbt/sbt doc` before building the site so if you haven't run it (recently) it
 may take some time as it generates all of the scaladoc.  The jekyll plugin also generates the
-PySpark docs using [epydoc](http://epydoc.sourceforge.net/).
+PySpark docs [Sphinx](http://sphinx-doc.org/).
 
 NOTE: To skip the step of building and copying over the Scala and Python API docs, run `SKIP_API=1
 jekyll`.
diff --git a/docs/_config.yml b/docs/_config.yml
index 45b78fe724a50..cdea02fcffbc5 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -1,10 +1,20 @@
-pygments: true
+highlighter: pygments
 markdown: kramdown
+gems:
+  - jekyll-redirect-from
 
-# These allow the documentation to be updated with nerw releases
+# For some reason kramdown seems to behave differently on different
+# OS/packages wrt encoding. So we hard code this config.
+kramdown:
+  entity_output: numeric
+
+include:
+  - _static
+
+# These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 1.0.0-SNAPSHOT
-SPARK_VERSION_SHORT: 1.0.0
+SPARK_VERSION: 1.2.0-SNAPSHOT
+SPARK_VERSION_SHORT: 1.2.0
 SCALA_BINARY_VERSION: "2.10"
 SCALA_VERSION: "2.10.4"
 MESOS_VERSION: 0.18.1
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index b30ab1e5218c0..627ed37de4a9c 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -109,8 +109,9 @@
                                 <li><a href="hardware-provisioning.html">Hardware Provisioning</a></li>
                                 <li><a href="hadoop-third-party-distributions.html">3<sup>rd</sup>-Party Hadoop Distros</a></li>
                                 <li class="divider"></li>
-                                <li><a href="building-with-maven.html">Building Spark with Maven</a></li>
+                                <li><a href="building-spark.html">Building Spark</a></li>
                                 <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark">Contributing to Spark</a></li>
+                                <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Supplemental+Spark+Projects">Supplemental Projects</a></li>
                             </ul>
                         </li>
                     </ul>
@@ -151,7 +152,7 @@ <h1 class="title">{{ page.title }}</h1>
                     MathJax.Hub.Config({
                         tex2jax: {
                             inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
-                            displayMath: [ ["$$","$$"], ["\\[", "\\]"] ], 
+                            displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
                             processEscapes: true,
                             skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
                         }
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index 2dbbbf6feb4b8..4566a2fff562b 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -25,8 +25,8 @@
   curr_dir = pwd
   cd("..")
 
-  puts "Running 'sbt/sbt compile unidoc' from " + pwd + "; this may take a few minutes..."
-  puts `sbt/sbt compile unidoc`
+  puts "Running 'sbt/sbt -Pkinesis-asl compile unidoc' from " + pwd + "; this may take a few minutes..."
+  puts `sbt/sbt -Pkinesis-asl compile unidoc`
 
   puts "Moving back into docs dir."
   cd("docs")
@@ -63,19 +63,20 @@
   puts "cp -r " + source + "/. " + dest
   cp_r(source + "/.", dest)
 
-  # Build Epydoc for Python
-  puts "Moving to python directory and building epydoc."
-  cd("../python")
-  puts `epydoc --config epydoc.conf`
+  # Build Sphinx docs for Python
 
-  puts "Moving back into docs dir."
-  cd("../docs")
+  puts "Moving to python/docs directory and building sphinx."
+  cd("../python/docs")
+  puts `make html`
+
+  puts "Moving back into home dir."
+  cd("../../")
 
   puts "Making directory api/python"
-  mkdir_p "api/python"
+  mkdir_p "docs/api/python"
 
-  puts "cp -r ../python/docs/. api/python"
-  cp_r("../python/docs/.", "api/python")
+  puts "cp -r python/docs/_build/html/. docs/api/python"
+  cp_r("python/docs/_build/html/.", "docs/api/python")
 
   cd("..")
 end
diff --git a/docs/bagel-programming-guide.md b/docs/bagel-programming-guide.md
index b280df0c8eeb8..7e55131754a3f 100644
--- a/docs/bagel-programming-guide.md
+++ b/docs/bagel-programming-guide.md
@@ -46,7 +46,7 @@ import org.apache.spark.bagel.Bagel._
 Next, we load a sample graph from a text file as a distributed dataset and package it into `PRVertex` objects. We also cache the distributed dataset because Bagel will use it multiple times and we'd like to avoid recomputing it.
 
 {% highlight scala %}
-val input = sc.textFile("data/pagerank_data.txt")
+val input = sc.textFile("data/mllib/pagerank_data.txt")
 
 val numVerts = input.count()
 
diff --git a/docs/building-spark.md b/docs/building-spark.md
new file mode 100644
index 0000000000000..bb18414092aae
--- /dev/null
+++ b/docs/building-spark.md
@@ -0,0 +1,211 @@
+---
+layout: global
+title: Building Spark
+redirect_from: "building-with-maven.html"
+---
+
+* This will become a table of contents (this text will be scraped).
+{:toc}
+
+Building Spark using Maven requires Maven 3.0.4 or newer and Java 6+.
+
+
+# Setting up Maven's Memory Usage
+
+You'll need to configure Maven to use more memory than usual by setting `MAVEN_OPTS`. We recommend the following settings:
+
+{% highlight bash %}
+export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
+{% endhighlight %}
+
+If you don't run this, you may see errors like the following:
+
+    [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_BINARY_VERSION}}/classes...
+    [ERROR] PermGen space -> [Help 1]
+
+    [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_BINARY_VERSION}}/classes...
+    [ERROR] Java heap space -> [Help 1]
+
+You can fix this by setting the `MAVEN_OPTS` variable as discussed before.
+
+**Note:** *For Java 8 and above this step is not required.*
+
+# Specifying the Hadoop Version
+
+Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default. Note that certain build profiles are required for particular Hadoop versions:
+
+<table class="table">
+  <thead>
+    <tr><th>Hadoop version</th><th>Profile required</th></tr>
+  </thead>
+  <tbody>
+    <tr><td>0.23.x</td><td>hadoop-0.23</td></tr>
+    <tr><td>1.x to 2.1.x</td><td>(none)</td></tr>
+    <tr><td>2.2.x</td><td>hadoop-2.2</td></tr>
+    <tr><td>2.3.x</td><td>hadoop-2.3</td></tr>
+    <tr><td>2.4.x</td><td>hadoop-2.4</td></tr>
+  </tbody>
+</table>
+
+For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hadoop versions without YARN, use:
+
+{% highlight bash %}
+# Apache Hadoop 1.2.1
+mvn -Dhadoop.version=1.2.1 -DskipTests clean package
+
+# Cloudera CDH 4.2.0 with MapReduce v1
+mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package
+
+# Apache Hadoop 0.23.x
+mvn -Phadoop-0.23 -Dhadoop.version=0.23.7 -DskipTests clean package
+{% endhighlight %}
+
+For Apache Hadoop 2.x, 0.23.x, Cloudera CDH, and other Hadoop versions with YARN, you can enable the "yarn-alpha" or "yarn" profile and optionally set the "yarn.version" property if it is different from "hadoop.version". The additional build profile required depends on the YARN version:
+
+<table class="table">
+  <thead>
+    <tr><th>YARN version</th><th>Profile required</th></tr>
+  </thead>
+  <tbody>
+    <tr><td>0.23.x to 2.1.x</td><td>yarn-alpha (Deprecated.)</td></tr>
+    <tr><td>2.2.x and later</td><td>yarn</td></tr>
+  </tbody>
+</table>
+
+Note: Support for YARN-alpha API's will be removed in Spark 1.3 (see SPARK-3445).
+
+Examples:
+
+{% highlight bash %}
+# Apache Hadoop 2.0.5-alpha
+mvn -Pyarn-alpha -Dhadoop.version=2.0.5-alpha -DskipTests clean package
+
+# Cloudera CDH 4.2.0
+mvn -Pyarn-alpha -Dhadoop.version=2.0.0-cdh4.2.0 -DskipTests clean package
+
+# Apache Hadoop 0.23.x
+mvn -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.7 -DskipTests clean package
+
+# Apache Hadoop 2.2.X
+mvn -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package
+
+# Apache Hadoop 2.3.X
+mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package
+
+# Apache Hadoop 2.4.X
+mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package
+
+# Different versions of HDFS and YARN.
+mvn -Pyarn-alpha -Phadoop-2.3 -Dhadoop.version=2.3.0 -Dyarn.version=0.23.7 -DskipTests clean package
+{% endhighlight %}
+
+# Building With Hive and JDBC Support
+To enable Hive integration for Spark SQL along with its JDBC server and CLI,
+add the `-Phive` and `Phive-thriftserver` profiles to your existing build options.
+By default Spark will build with Hive 0.13.1 bindings. You can also build for 
+Hive 0.12.0 using the `-Phive-0.12.0` profile.
+{% highlight bash %}
+# Apache Hadoop 2.4.X with Hive 13 support
+mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -Phive-thriftserver -DskipTests clean package
+
+# Apache Hadoop 2.4.X with Hive 12 support
+mvn -Pyarn -Phive -Phive-thriftserver-0.12.0 -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -Phive-thriftserver -DskipTests clean package
+{% endhighlight %}
+
+# Building for Scala 2.11
+To produce a Spark package compiled with Scala 2.11, use the `-Dscala-2.11` property:
+
+    mvn -Pyarn -Phadoop-2.4 -Dscala-2.11 -DskipTests clean package
+
+Scala 2.11 support in Spark is experimental and does not support a few features.
+Specifically, Spark's external Kafka library and JDBC component are not yet
+supported in Scala 2.11 builds.
+
+# Spark Tests in Maven
+
+Tests are run by default via the [ScalaTest Maven plugin](http://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin). 
+
+Some of the tests require Spark to be packaged first, so always run `mvn package` with `-DskipTests` the first time.  The following is an example of a correct (build, test) sequence:
+
+    mvn -Pyarn -Phadoop-2.3 -DskipTests -Phive -Phive-thriftserver clean package
+    mvn -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver test
+
+The ScalaTest plugin also supports running only a specific test suite as follows:
+
+    mvn -Dhadoop.version=... -DwildcardSuites=org.apache.spark.repl.ReplSuite test
+
+
+# Continuous Compilation
+
+We use the scala-maven-plugin which supports incremental and continuous compilation. E.g.
+
+    mvn scala:cc
+
+should run continuous compilation (i.e. wait for changes). However, this has not been tested extensively.
+
+# Using With IntelliJ IDEA
+
+This setup works fine in IntelliJ IDEA 11.1.4. After opening the project via the pom.xml file in the project root folder, you only need to activate either the hadoop1 or hadoop2 profile in the "Maven Properties" popout. We have not tried Eclipse/Scala IDE with this.
+
+# Building Spark Debian Packages
+
+The Maven build includes support for building a Debian package containing the assembly 'fat-jar', PySpark, and the necessary scripts and configuration files. This can be created by specifying the following:
+
+    mvn -Pdeb -DskipTests clean package
+
+The debian package can then be found under assembly/target. We added the short commit hash to the file name so that we can distinguish individual packages built for SNAPSHOT versions.
+
+# Running Java 8 Test Suites
+
+Running only Java 8 tests and nothing else.
+
+    mvn install -DskipTests -Pjava8-tests
+    
+Java 8 tests are run when `-Pjava8-tests` profile is enabled, they will run in spite of `-DskipTests`. 
+For these tests to run your system must have a JDK 8 installation. 
+If you have JDK 8 installed but it is not the system default, you can set JAVA_HOME to point to JDK 8 before running the tests.
+
+# Building for PySpark on YARN
+
+PySpark on YARN is only supported if the jar is built with Maven. Further, there is a known problem
+with building this assembly jar on Red Hat based operating systems (see [SPARK-1753](https://issues.apache.org/jira/browse/SPARK-1753)). If you wish to
+run PySpark on a YARN cluster with Red Hat installed, we recommend that you build the jar elsewhere,
+then ship it over to the cluster. We are investigating the exact cause for this.
+
+# Packaging without Hadoop Dependencies for YARN
+
+The assembly jar produced by `mvn package` will, by default, include all of Spark's dependencies, including Hadoop and some of its ecosystem projects. On YARN deployments, this causes multiple versions of these to appear on executor classpaths: the version packaged in the Spark assembly and the version on each node, included with yarn.application.classpath.  The `hadoop-provided` profile builds the assembly without including Hadoop-ecosystem projects, like ZooKeeper and Hadoop itself. 
+
+# Building with SBT
+
+Maven is the official recommendation for packaging Spark, and is the "build of reference".
+But SBT is supported for day-to-day development since it can provide much faster iterative
+compilation. More advanced developers may wish to use SBT.
+
+The SBT build is derived from the Maven POM files, and so the same Maven profiles and variables
+can be set to control the SBT build. For example:
+
+    sbt/sbt -Pyarn -Phadoop-2.3 assembly
+
+# Testing with SBT
+
+Some of the tests require Spark to be packaged first, so always run `sbt/sbt assembly` the first time.  The following is an example of a correct (build, test) sequence:
+
+    sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver assembly
+    sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver test
+
+To run only a specific test suite as follows:
+
+    sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver "test-only org.apache.spark.repl.ReplSuite"
+
+To run test suites of a specific sub project as follows:
+
+    sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver core/test
+
+# Speeding up Compilation with Zinc
+
+[Zinc](https://github.com/typesafehub/zinc) is a long-running server version of SBT's incremental
+compiler. When run locally as a background process, it speeds up builds of Scala-based projects
+like Spark. Developers who regularly recompile Spark with Maven will be the most interested in
+Zinc. The project site gives instructions for building and running `zinc`; OS X users can
+install it using `brew install zinc`.
diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md
deleted file mode 100644
index 55a9e37dfed83..0000000000000
--- a/docs/building-with-maven.md
+++ /dev/null
@@ -1,149 +0,0 @@
----
-layout: global
-title: Building Spark with Maven
----
-
-* This will become a table of contents (this text will be scraped).
-{:toc}
-
-Building Spark using Maven requires Maven 3.0.4 or newer and Java 6+.
-
-
-# Setting up Maven's Memory Usage
-
-You'll need to configure Maven to use more memory than usual by setting `MAVEN_OPTS`. We recommend the following settings:
-
-{% highlight bash %}
-export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
-{% endhighlight %}
-
-If you don't run this, you may see errors like the following:
-
-    [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_BINARY_VERSION}}/classes...
-    [ERROR] PermGen space -> [Help 1]
-
-    [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_BINARY_VERSION}}/classes...
-    [ERROR] Java heap space -> [Help 1]
-
-You can fix this by setting the `MAVEN_OPTS` variable as discussed before.
-
-**Note:** *For Java 8 and above this step is not required.*
-
-# Specifying the Hadoop Version
-
-Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default. Note that certain build profiles are required for particular Hadoop versions:
-
-<table class="table">
-  <thead>
-    <tr><th>Hadoop version</th><th>Profile required</th></tr>
-  </thead>
-  <tbody>
-    <tr><td>0.23.x</td><td>hadoop-0.23</td></tr>
-    <tr><td>1.x to 2.1.x</td><td>(none)</td></tr>
-    <tr><td>2.2.x</td><td>hadoop-2.2</td></tr>
-    <tr><td>2.3.x</td><td>hadoop-2.3</td></tr>
-    <tr><td>2.4.x</td><td>hadoop-2.4</td></tr>
-  </tbody>
-</table>
-
-For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hadoop versions without YARN, use:
-
-{% highlight bash %}
-# Apache Hadoop 1.2.1
-mvn -Dhadoop.version=1.2.1 -DskipTests clean package
-
-# Cloudera CDH 4.2.0 with MapReduce v1
-mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package
-
-# Apache Hadoop 0.23.x
-mvn -Phadoop-0.23 -Dhadoop.version=0.23.7 -DskipTests clean package
-{% endhighlight %}
-
-For Apache Hadoop 2.x, 0.23.x, Cloudera CDH, and other Hadoop versions with YARN, you can enable the "yarn-alpha" or "yarn" profile and optionally set the "yarn.version" property if it is different from "hadoop.version". The additional build profile required depends on the YARN version:
-
-<table class="table">
-  <thead>
-    <tr><th>YARN version</th><th>Profile required</th></tr>
-  </thead>
-  <tbody>
-    <tr><td>0.23.x to 2.1.x</td><td>yarn-alpha</td></tr>
-    <tr><td>2.2.x and later</td><td>yarn</td></tr>
-  </tbody>
-</table>
-
-Examples:
-
-{% highlight bash %}
-# Apache Hadoop 2.0.5-alpha
-mvn -Pyarn-alpha -Dhadoop.version=2.0.5-alpha -DskipTests clean package
-
-# Cloudera CDH 4.2.0
-mvn -Pyarn-alpha -Dhadoop.version=2.0.0-cdh4.2.0 -DskipTests clean package
-
-# Apache Hadoop 0.23.x
-mvn -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.7 -DskipTests clean package
-
-# Apache Hadoop 2.2.X
-mvn -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package
-
-# Apache Hadoop 2.3.X
-mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package
-
-# Apache Hadoop 2.4.X
-mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package
-
-# Different versions of HDFS and YARN.
-mvn -Pyarn-alpha -Phadoop-2.3 -Dhadoop.version=2.3.0 -Dyarn.version=0.23.7 -DskipTests clean package
-{% endhighlight %}
-
-# Spark Tests in Maven
-
-Tests are run by default via the [ScalaTest Maven plugin](http://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin). Some of the require Spark to be packaged first, so always run `mvn package` with `-DskipTests` the first time. You can then run the tests with `mvn -Dhadoop.version=... test`.
-
-The ScalaTest plugin also supports running only a specific test suite as follows:
-
-    mvn -Dhadoop.version=... -DwildcardSuites=org.apache.spark.repl.ReplSuite test
-
-
-# Continuous Compilation
-
-We use the scala-maven-plugin which supports incremental and continuous compilation. E.g.
-
-    mvn scala:cc
-
-should run continuous compilation (i.e. wait for changes). However, this has not been tested extensively.
-
-# Using With IntelliJ IDEA
-
-This setup works fine in IntelliJ IDEA 11.1.4. After opening the project via the pom.xml file in the project root folder, you only need to activate either the hadoop1 or hadoop2 profile in the "Maven Properties" popout. We have not tried Eclipse/Scala IDE with this.
-
-# Building Spark Debian Packages
-
-The Maven build includes support for building a Debian package containing the assembly 'fat-jar', PySpark, and the necessary scripts and configuration files. This can be created by specifying the following:
-
-    mvn -Pdeb -DskipTests clean package
-
-The debian package can then be found under assembly/target. We added the short commit hash to the file name so that we can distinguish individual packages built for SNAPSHOT versions.
-
-# Running Java 8 Test Suites
-
-Running only Java 8 tests and nothing else.
-
-    mvn install -DskipTests -Pjava8-tests
-    
-Java 8 tests are run when `-Pjava8-tests` profile is enabled, they will run in spite of `-DskipTests`. 
-For these tests to run your system must have a JDK 8 installation. 
-If you have JDK 8 installed but it is not the system default, you can set JAVA_HOME to point to JDK 8 before running the tests.
-
-# Building for PySpark on YARN
-
-PySpark on YARN is only supported if the jar is built with Maven. Further, there is a known problem
-with building this assembly jar on Red Hat based operating systems (see [SPARK-1753](https://issues.apache.org/jira/browse/SPARK-1753)). If you wish to
-run PySpark on a YARN cluster with Red Hat installed, we recommend that you build the jar elsewhere,
-then ship it over to the cluster. We are investigating the exact cause for this.
-
-# Packaging without Hadoop Dependencies for YARN
-
-The assembly jar produced by `mvn package` will, by default, include all of Spark's dependencies, including Hadoop and some of its ecosystem projects. On YARN deployments, this causes multiple versions of these to appear on executor classpaths: the version packaged in the Spark assembly and the version on each node, included with yarn.application.classpath.  The `hadoop-provided` profile builds the assembly without including Hadoop-ecosystem projects, like ZooKeeper and Hadoop itself. 
-
-
diff --git a/docs/configuration.md b/docs/configuration.md
index b84104cc7e653..8839162c3a13e 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -21,16 +21,22 @@ application. These properties can be set directly on a
 [SparkConf](api/scala/index.html#org.apache.spark.SparkConf) passed to your
 `SparkContext`. `SparkConf` allows you to configure some of the common properties
 (e.g. master URL and application name), as well as arbitrary key-value pairs through the
-`set()` method. For example, we could initialize an application as follows:
+`set()` method. For example, we could initialize an application with two threads as follows:
+
+Note that we run with local[2], meaning two threads - which represents "minimal" parallelism, 
+which can help detect bugs that only exist when we run in a distributed context. 
 
 {% highlight scala %}
 val conf = new SparkConf()
-             .setMaster("local")
+             .setMaster("local[2]")
              .setAppName("CountingSheep")
              .set("spark.executor.memory", "1g")
 val sc = new SparkContext(conf)
 {% endhighlight %}
 
+Note that we can have more than 1 thread in local mode, and in cases like spark streaming, we may actually
+require one to prevent any sort of starvation issues.  
+
 ## Dynamically Loading Spark Properties
 In some cases, you may want to avoid hard-coding certain configurations in a `SparkConf`. For
 instance, if you'd like to run the same application with different masters or different
@@ -42,13 +48,15 @@ val sc = new SparkContext(new SparkConf())
 
 Then, you can supply configuration values at runtime:
 {% highlight bash %}
-./bin/spark-submit --name "My fancy app" --master local[4] myApp.jar
+./bin/spark-submit --name "My app" --master local[4] --conf spark.shuffle.spill=false 
+  --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" myApp.jar 
 {% endhighlight %}
 
-The Spark shell and [`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit)
+The Spark shell and [`spark-submit`](submitting-applications.html)
 tool support two ways to load configurations dynamically. The first are command line options,
-such as `--master`, as shown above. Running `./bin/spark-submit --help` will show the entire list
-of options.
+such as `--master`, as shown above. `spark-submit` can accept any Spark property using the `--conf`
+flag, but uses special flags for properties that play a part in launching the Spark application.
+Running `./bin/spark-submit --help` will show the entire list of these options.
 
 `bin/spark-submit` will also read configuration options from `conf/spark-defaults.conf`, in which
 each line consists of a key and a value separated by whitespace. For example:
@@ -101,6 +109,26 @@ of the most common options to set are:
     (e.g. <code>512m</code>, <code>2g</code>).
   </td>
 </tr>
+<tr>
+  <td><code>spark.driver.memory</code></td>
+  <td>512m</td>
+  <td>
+    Amount of memory to use for the driver process, i.e. where SparkContext is initialized.
+    (e.g. <code>512m</code>, <code>2g</code>).
+  </td>
+</tr>
+<tr>
+  <td><code>spark.driver.maxResultSize</code></td>
+  <td>1g</td>
+  <td>
+    Limit of total size of serialized results of all partitions for each Spark action (e.g. collect).
+    Should be at least 1M, or 0 for unlimited. Jobs will be aborted if the total size
+    is above this limit. 
+    Having a high limit may cause out-of-memory errors in driver (depends on spark.driver.memory
+    and memory overhead of objects in JVM). Setting a proper limit can protect the driver from
+    out-of-memory errors.
+  </td>
+</tr>
 <tr>
   <td><code>spark.serializer</code></td>
   <td>org.apache.spark.serializer.<br />JavaSerializer</td>
@@ -114,12 +142,23 @@ of the most common options to set are:
     <code>org.apache.spark.Serializer</code></a>.
   </td>
 </tr>
+<tr>
+  <td><code>spark.kryo.classesToRegister</code></td>
+  <td>(none)</td>
+  <td>
+    If you use Kryo serialization, give a comma-separated list of custom class names to register
+    with Kryo.
+    See the <a href="tuning.html#data-serialization">tuning guide</a> for more details.
+  </td>
+</tr>
 <tr>
   <td><code>spark.kryo.registrator</code></td>
   <td>(none)</td>
   <td>
-    If you use Kryo serialization, set this class to register your custom classes with Kryo.
-    It should be set to a class that extends
+    If you use Kryo serialization, set this class to register your custom classes with Kryo. This
+    property is useful if you need to register your classes in a custom way, e.g. to specify a custom
+    field serializer. Otherwise <code>spark.kryo.classesToRegister</code> is simpler. It should be
+    set to a class that extends
     <a href="api/scala/index.html#org.apache.spark.serializer.KryoRegistrator">
     <code>KryoRegistrator</code></a>.
     See the <a href="tuning.html#data-serialization">tuning guide</a> for more details.
@@ -151,14 +190,6 @@ Apart from these, the following properties are also available, and may be useful
 #### Runtime Environment
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
-<tr>
-  <td><code>spark.executor.memory</code></td>
-  <td>512m</td>
-  <td>
-    Amount of memory to use per executor process, in the same format as JVM memory strings
-    (e.g. <code>512m</code>, <code>2g</code>).
-  </td>
-</tr>
 <tr>
   <td><code>spark.executor.extraJavaOptions</code></td>
   <td>(none)</td>
@@ -195,6 +226,73 @@ Apart from these, the following properties are also available, and may be useful
     Spark's dependencies and user dependencies. It is currently an experimental feature.
   </td>
 </tr>
+<tr>
+  <td><code>spark.python.worker.memory</code></td>
+  <td>512m</td>
+  <td>
+    Amount of memory to use per python worker process during aggregation, in the same
+    format as JVM memory strings (e.g. <code>512m</code>, <code>2g</code>). If the memory
+    used during aggregation goes above this amount, it will spill the data into disks.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.python.profile</code></td>
+  <td>false</td>
+  <td>
+    Enable profiling in Python worker, the profile result will show up by `sc.show_profiles()`,
+    or it will be displayed before the driver exiting. It also can be dumped into disk by
+    `sc.dump_profiles(path)`. If some of the profile results had been displayed maually,
+    they will not be displayed automatically before driver exiting.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.python.profile.dump</code></td>
+  <td>(none)</td>
+  <td>
+    The directory which is used to dump the profile result before driver exiting. 
+    The results will be dumped as separated file for each RDD. They can be loaded
+    by ptats.Stats(). If this is specified, the profile result will not be displayed
+    automatically.
+</tr>
+<tr>
+  <td><code>spark.python.worker.reuse</code></td>
+  <td>true</td>
+  <td>
+    Reuse Python worker or not. If yes, it will use a fixed number of Python workers,
+    does not need to fork() a Python process for every tasks. It will be very useful
+    if there is large broadcast, then the broadcast will not be needed to transfered
+    from JVM to Python worker for every task.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.executorEnv.[EnvironmentVariableName]</code></td>
+  <td>(none)</td>
+  <td>
+    Add the environment variable specified by <code>EnvironmentVariableName</code> to the Executor 
+    process. The user can specify multiple of these and to set multiple environment variables. 
+  </td>
+</tr>
+<tr>
+  <td><code>spark.mesos.executor.home</code></td>
+  <td>driver side <code>SPARK_HOME</code></td>
+  <td>
+    Set the directory in which Spark is installed on the executors in Mesos. By default, the
+    executors will simply use the driver's Spark home directory, which may not be visible to
+    them. Note that this is only relevant if a Spark binary package is not specified through
+    <code>spark.executor.uri</code>.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.mesos.executor.memoryOverhead</code></td>
+  <td>executor memory * 0.07, with minimum of 384</td>
+  <td>
+    This value is an additive for <code>spark.executor.memory</code>, specified in MiB,
+    which is used to calculate the total Mesos task memory. A value of <code>384</code>
+    implies a 384MiB overhead. Additionally, there is a hard-coded 7% minimum
+    overhead. The final overhead will be the larger of either
+    `spark.mesos.executor.memoryOverhead` or 7% of `spark.executor.memory`.
+  </td>
+</tr>
 </table>
 
 #### Shuffle Behavior
@@ -228,7 +326,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.shuffle.memoryFraction</code></td>
-  <td>0.3</td>
+  <td>0.2</td>
   <td>
     Fraction of Java heap to use for aggregation and cogroups during shuffles, if
     <code>spark.shuffle.spill</code> is true. At any given time, the collective size of
@@ -247,7 +345,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.shuffle.file.buffer.kb</code></td>
-  <td>100</td>
+  <td>32</td>
   <td>
     Size of the in-memory buffer for each shuffle file output stream, in kilobytes. These buffers
     reduce the number of disk seeks and system calls made in creating intermediate shuffle files.
@@ -262,6 +360,33 @@ Apart from these, the following properties are also available, and may be useful
     overhead per reduce task, so keep it small unless you have a large amount of memory.
   </td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.manager</code></td>
+  <td>sort</td>
+  <td>
+    Implementation to use for shuffling data. There are two implementations available:
+    <code>sort</code> and <code>hash</code>. Sort-based shuffle is more memory-efficient and is
+    the default option starting in 1.2.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.sort.bypassMergeThreshold</code></td>
+  <td>200</td>
+  <td>
+    (Advanced) In the sort-based shuffle manager, avoid merge-sorting data if there is no
+    map-side aggregation and there are at most this many reduce partitions.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.blockTransferService</code></td>
+  <td>netty</td>
+  <td>
+    Implementation to use for transferring shuffle and cached blocks between executors. There
+    are two implementations available: <code>netty</code> and <code>nio</code>. Netty-based
+    block transfer is intended to be simpler but equally efficient and is the default option
+    starting in 1.2.
+  </td>
+</tr>
 </table>
 
 #### Spark UI
@@ -271,14 +396,23 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.ui.port</code></td>
   <td>4040</td>
   <td>
-    Port for your application's dashboard, which shows memory and workload data
+    Port for your application's dashboard, which shows memory and workload data.
   </td>
 </tr>
 <tr>
   <td><code>spark.ui.retainedStages</code></td>
   <td>1000</td>
   <td>
-    How many stages the Spark UI remembers before garbage collecting.
+    How many stages the Spark UI and status APIs remember before garbage
+    collecting.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.ui.retainedJobs</code></td>
+  <td>1000</td>
+  <td>
+    How many stages the Spark UI and status APIs remember before garbage
+    collecting.
   </td>
 </tr>
 <tr>
@@ -336,13 +470,15 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.io.compression.codec</code></td>
-  <td>org.apache.spark.io.<br />LZFCompressionCodec</td>
+  <td>snappy</td>
   <td>
-    The codec used to compress internal data such as RDD partitions and shuffle outputs.
-    By default, Spark provides two codecs: <code>org.apache.spark.io.LZFCompressionCodec</code>
-    and <code>org.apache.spark.io.SnappyCompressionCodec</code>. Of these two choices,
-    Snappy offers faster compression and decompression, while LZF offers a better compression
-    ratio.
+    The codec used to compress internal data such as RDD partitions, broadcast variables and
+    shuffle outputs. By default, Spark provides three codecs: <code>lz4</code>, <code>lzf</code>,
+    and <code>snappy</code>. You can also use fully qualified class names to specify the codec, 
+    e.g. 
+    <code>org.apache.spark.io.LZ4CompressionCodec</code>,    
+    <code>org.apache.spark.io.LZFCompressionCodec</code>,
+    and <code>org.apache.spark.io.SnappyCompressionCodec</code>.
   </td>
 </tr>
 <tr>
@@ -350,7 +486,15 @@ Apart from these, the following properties are also available, and may be useful
   <td>32768</td>
   <td>
     Block size (in bytes) used in Snappy compression, in the case when Snappy compression codec
-    is used.
+    is used. Lowering this block size will also lower shuffle memory usage when Snappy is used.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.io.compression.lz4.block.size</code></td>
+  <td>32768</td>
+  <td>
+    Block size (in bytes) used in LZ4 compression, in the case when LZ4 compression codec
+    is used. Lowering this block size will also lower shuffle memory usage when LZ4 is used.
   </td>
 </tr>
 <tr>
@@ -362,13 +506,13 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.serializer.objectStreamReset</code></td>
-  <td>10000</td>
+  <td>100</td>
   <td>
     When serializing using org.apache.spark.serializer.JavaSerializer, the serializer caches
     objects to prevent writing redundant data, however that stops garbage collection of those
     objects. By calling 'reset' you flush that info from the serializer, and allow old
-    objects to be collected. To turn off this periodic reset set it to a value &lt;= 0.
-    By default it will reset the serializer every 10,000 objects.
+    objects to be collected. To turn off this periodic reset set it to -1.
+    By default it will reset the serializer every 100 objects.
   </td>
 </tr>
 <tr>
@@ -381,14 +525,33 @@ Apart from these, the following properties are also available, and may be useful
     case.
   </td>
 </tr>
+<tr>
+  <td><code>spark.kryo.registrationRequired</code></td>
+  <td>false</td>
+  <td>
+    Whether to require registration with Kryo. If set to 'true', Kryo will throw an exception
+    if an unregistered class is serialized. If set to false (the default), Kryo will write
+    unregistered class names along with each object. Writing class names can cause
+    significant performance overhead, so enabling this option can enforce strictly that a
+    user has not omitted classes from registration.
+  </td>
+</tr>
 <tr>
   <td><code>spark.kryoserializer.buffer.mb</code></td>
-  <td>2</td>
+  <td>0.064</td>
   <td>
-    Maximum object size to allow within Kryo (the library needs to create a buffer at least as
-    large as the largest single object you'll serialize). Increase this if you get a "buffer limit
-    exceeded" exception inside Kryo. Note that there will be one buffer <i>per core</i> on each
-    worker.
+    Initial size of Kryo's serialization buffer, in megabytes. Note that there will be one buffer
+     <i>per core</i> on each worker. This buffer will grow up to
+     <code>spark.kryoserializer.buffer.max.mb</code> if needed.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.kryoserializer.buffer.max.mb</code></td>
+  <td>64</td>
+  <td>
+    Maximum allowable size of Kryo serialization buffer, in megabytes. This must be larger than any
+    object you attempt to serialize. Increase this if you get a "buffer limit exceeded" exception
+    inside Kryo.
   </td>
 </tr>
 </table>
@@ -399,6 +562,9 @@ Apart from these, the following properties are also available, and may be useful
 <tr>
   <td><code>spark.default.parallelism</code></td>
   <td>
+    For distributed shuffle operations like <code>reduceByKey</code> and <code>join</code>, the
+    largest number of partitions in a parent RDD.  For operations like <code>parallelize</code>
+    with no parent RDDs, it depends on the cluster manager:
     <ul>
       <li>Local mode: number of cores on the local machine</li>
       <li>Mesos fine grained mode: 8</li>
@@ -406,13 +572,13 @@ Apart from these, the following properties are also available, and may be useful
     </ul>
   </td>
   <td>
-    Default number of tasks to use across the cluster for distributed shuffle operations
-    (<code>groupByKey</code>, <code>reduceByKey</code>, etc) when not set by user.
+    Default number of partitions in RDDs returned by transformations like <code>join</code>,
+    <code>reduceByKey</code>, and <code>parallelize</code> when not set by user.
   </td>
 </tr>
 <tr>
   <td><code>spark.broadcast.factory</code></td>
-  <td>org.apache.spark.broadcast.<br />HttpBroadcastFactory</td>
+  <td>org.apache.spark.broadcast.<br />TorrentBroadcastFactory</td>
   <td>
     Which broadcast implementation to use.
   </td>
@@ -436,10 +602,10 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.files.fetchTimeout</code></td>
-  <td>false</td>
+  <td>60</td>
   <td>
     Communication timeout to use when fetching files added through SparkContext.addFile() from
-    the driver.
+    the driver, in seconds.
   </td>
 </tr>
 <tr>
@@ -451,6 +617,15 @@ Apart from these, the following properties are also available, and may be useful
     increase it if you configure your own old generation size.
   </td>
 </tr>
+<tr>
+  <td><code>spark.storage.unrollFraction</code></td>
+  <td>0.2</td>
+  <td>
+    Fraction of <code>spark.storage.memoryFraction</code> to use for unrolling blocks in memory.
+    This is dynamically allocated by dropping existing blocks when there is not enough free
+    storage space to unroll the new block in its entirety.
+  </td>
+</tr>
 <tr>
   <td><code>spark.tachyonStore.baseDir</code></td>
   <td>System.getProperty("java.io.tmpdir")</td>
@@ -488,13 +663,29 @@ Apart from these, the following properties are also available, and may be useful
   </td>
 </tr>
 <tr>
-    <td>spark.hadoop.validateOutputSpecs</td>
+    <td><code>spark.hadoop.validateOutputSpecs</code></td>
     <td>true</td>
-    <td>If set to true, validates the output specification (e.g. checking if the output directory already exists) 
-    used in saveAsHadoopFile and other variants. This can be disabled to silence exceptions due to pre-existing 
-    output directories. We recommend that users do not disable this except if trying to achieve compatibility with 
+    <td>If set to true, validates the output specification (e.g. checking if the output directory already exists)
+    used in saveAsHadoopFile and other variants. This can be disabled to silence exceptions due to pre-existing
+    output directories. We recommend that users do not disable this except if trying to achieve compatibility with
     previous versions of Spark. Simply use Hadoop's FileSystem API to delete output directories by hand.</td>
 </tr>
+<tr>
+    <td><code>spark.hadoop.cloneConf</code></td>
+    <td>false</td>
+    <td>If set to true, clones a new Hadoop <code>Configuration</code> object for each task.  This
+    option should be enabled to work around <code>Configuration</code> thread-safety issues (see
+    <a href="https://issues.apache.org/jira/browse/SPARK-2546">SPARK-2546</a> for more details).
+    This is disabled by default in order to avoid unexpected performance regressions for jobs that
+    are not affected by these issues.</td>
+</tr>
+<tr>
+    <td><code>spark.executor.heartbeatInterval</code></td>
+    <td>10000</td>
+    <td>Interval (milliseconds) between each executor's heartbeats to the driver.  Heartbeats let
+    the driver know that the executor is still alive and update it with metrics for in-progress
+    tasks.</td>
+</tr>
 </table>
 
 #### Networking
@@ -505,6 +696,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>(local hostname)</td>
   <td>
     Hostname or IP address for the driver to listen on.
+    This is used for communicating with the executors and the standalone Master.
   </td>
 </tr>
 <tr>
@@ -512,6 +704,51 @@ Apart from these, the following properties are also available, and may be useful
   <td>(random)</td>
   <td>
     Port for the driver to listen on.
+    This is used for communicating with the executors and the standalone Master.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.fileserver.port</code></td>
+  <td>(random)</td>
+  <td>
+    Port for the driver's HTTP file server to listen on.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.broadcast.port</code></td>
+  <td>(random)</td>
+  <td>
+    Port for the driver's HTTP broadcast server to listen on.
+    This is not relevant for torrent broadcast.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.replClassServer.port</code></td>
+  <td>(random)</td>
+  <td>
+    Port for the driver's HTTP class server to listen on.
+    This is only relevant for the Spark shell.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.blockManager.port</code></td>
+  <td>(random)</td>
+  <td>
+    Port for all block managers to listen on. These exist on both the driver and the executors.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.executor.port</code></td>
+  <td>(random)</td>
+  <td>
+    Port for the executor to listen on. This is used for communicating with the driver.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.port.maxRetries</code></td>
+  <td>16</td>
+  <td>
+    Default maximum number of retries when binding to a port before giving up.
   </td>
 </tr>
 <tr>
@@ -540,7 +777,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.akka.heartbeat.pauses</code></td>
-  <td>600</td>
+  <td>6000</td>
   <td>
      This is set to a larger value to disable failure detector that comes inbuilt akka. It can be
      enabled again, if you plan to use this feature (Not recommended). Acceptable heart beat pause
@@ -695,8 +932,37 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.scheduler.revive.interval</code></td>
   <td>1000</td>
   <td>
-    The interval length for the scheduler to revive the worker resource offers to run tasks.
-    (in milliseconds)
+    The interval length for the scheduler to revive the worker resource offers to run tasks
+    (in milliseconds).
+  </td>
+</tr>
+</tr>
+  <td><code>spark.scheduler.minRegisteredResourcesRatio</code></td>
+  <td>0</td>
+  <td>
+    The minimum ratio of registered resources (registered resources / total expected resources)
+    (resources are executors in yarn mode, CPU cores in standalone mode)
+    to wait for before scheduling begins. Specified as a double between 0 and 1.
+    Regardless of whether the minimum ratio of resources has been reached,
+    the maximum amount of time it will wait before scheduling begins is controlled by config 
+    <code>spark.scheduler.maxRegisteredResourcesWaitingTime</code>.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.scheduler.maxRegisteredResourcesWaitingTime</code></td>
+  <td>30000</td>
+  <td>
+    Maximum amount of time to wait for resources to register before scheduling begins
+    (in milliseconds).  
+  </td>
+</tr>
+<tr>
+  <td><code>spark.localExecution.enabled</code></td>
+  <td>false</td>
+  <td>
+    Enables Spark to run certain jobs, such as first() or take() on the driver, without sending
+    tasks to the cluster. This can make certain jobs execute very quickly, but may require
+    shipping a whole partition of data to the driver.
   </td>
 </tr>
 </table>
@@ -728,6 +994,15 @@ Apart from these, the following properties are also available, and may be useful
     out and giving up.
   </td>
 </tr>
+<tr>
+  <td><code>spark.core.connection.ack.wait.timeout</code></td>
+  <td>60</td>
+  <td>
+    Number of seconds for the connection to wait for ack to occur before timing
+    out and giving up. To avoid unwilling timeout caused by long pause like GC,
+    you can set larger value.
+  </td>
+</tr>
 <tr>
   <td><code>spark.ui.filters</code></td>
   <td>None</td>
@@ -743,13 +1018,13 @@ Apart from these, the following properties are also available, and may be useful
   </td>
 </tr>
 <tr>
-  <td><code>spark.ui.acls.enable</code></td>
+  <td><code>spark.acls.enable</code></td>
   <td>false</td>
   <td>
-    Whether Spark web ui acls should are enabled. If enabled, this checks to see if the user has
-    access permissions to view the web ui. See <code>spark.ui.view.acls</code> for more details.
-    Also note this requires the user to be known, if the user comes across as null no checks
-    are done. Filters can be used to authenticate and set the user.
+    Whether Spark acls should are enabled. If enabled, this checks to see if the user has
+    access permissions to view or modify the job.  Note this requires the user to be known, 
+    so if the user comes across as null no checks are done. Filters can be used with the UI
+    to authenticate and set the user.
   </td>
 </tr>
 <tr>
@@ -760,6 +1035,23 @@ Apart from these, the following properties are also available, and may be useful
     user that started the Spark job has view access.
   </td>
 </tr>
+<tr>
+  <td><code>spark.modify.acls</code></td>
+  <td>Empty</td>
+  <td>
+    Comma separated list of users that have modify access to the Spark job. By default only the
+    user that started the Spark job has access to modify it (kill it for example).
+  </td>
+</tr>
+<tr>
+  <td><code>spark.admin.acls</code></td>
+  <td>Empty</td>
+  <td>
+    Comma separated list of users/administrators that have view and modify access to all Spark jobs.
+    This can be used if you run on a shared cluster and have a set of administrators or devs who
+    help debug when things work.
+  </td>
+</tr>
 </table>
 
 #### Spark Streaming
@@ -773,6 +1065,15 @@ Apart from these, the following properties are also available, and may be useful
     into blocks of data before storing them in Spark.
   </td>
 </tr>
+<tr>
+  <td><code>spark.streaming.receiver.maxRate</code></td>
+  <td>infinite</td>
+  <td>
+    Maximum rate (per second) at which each receiver will push data into blocks. Effectively,
+    each stream will consume at most this number of records per second.
+    Setting this configuration to 0 or a negative number will put no limit on the rate.
+  </td>
+</tr>
 <tr>
   <td><code>spark.streaming.unpersist</code></td>
   <td>true</td>
@@ -826,7 +1127,7 @@ Apart from these, the following properties are also available, and may be useful
 </table>
 
 #### Cluster Managers
-Each cluster manager in Spark has additional configuration options. Configurations 
+Each cluster manager in Spark has additional configuration options. Configurations
 can be found on the pages for each mode:
 
  * [YARN](running-on-yarn.html#configuration)
@@ -878,3 +1179,10 @@ compute `SPARK_LOCAL_IP` by looking up the IP of a specific network interface.
 Spark uses [log4j](http://logging.apache.org/log4j/) for logging. You can configure it by adding a
 `log4j.properties` file in the `conf` directory. One way to start is to copy the existing
 `log4j.properties.template` located there.
+
+# Overriding configuration directory
+
+To specify a different configuration directory other than the default "SPARK_HOME/conf",
+you can set SPARK_CONF_DIR. Spark will use the the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc)
+from this directory.
+
diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md
index 156a727026790..530798f2b8022 100644
--- a/docs/ec2-scripts.md
+++ b/docs/ec2-scripts.md
@@ -12,14 +12,16 @@ on the [Amazon Web Services site](http://aws.amazon.com/).
 
 `spark-ec2` is designed to manage multiple named clusters. You can
 launch a new cluster (telling the script its size and giving it a name),
-shutdown an existing cluster, or log into a cluster. Each cluster is
-identified by placing its machines into EC2 security groups whose names
-are derived from the name of the cluster. For example, a cluster named
+shutdown an existing cluster, or log into a cluster. Each cluster
+launches a set of instances, which are tagged with the cluster name,
+and placed into EC2 security groups.  If you don't specify a security
+group, the `spark-ec2` script will create security groups based on the
+cluster name you request. For example, a cluster named
 `test` will contain a master node in a security group called
 `test-master`, and a number of slave nodes in a security group called
-`test-slaves`. The `spark-ec2` script will create these security groups
-for you based on the cluster name you request. You can also use them to
-identify machines belonging to each cluster in the Amazon EC2 Console.
+`test-slaves`.  You can also specify a security group prefix to be used
+in place of the cluster name.  Machines in a cluster can be identified
+by looking for the "Name" tag of the instance in the Amazon EC2 Console.
 
 
 # Before You Start
@@ -46,6 +48,15 @@ identify machines belonging to each cluster in the Amazon EC2 Console.
     key pair, `<num-slaves>` is the number of slave nodes to launch (try
     1 at first), and `<cluster-name>` is the name to give to your
     cluster.
+
+    For example:
+
+    ```bash
+    export AWS_SECRET_ACCESS_KEY=AaBbCcDdEeFGgHhIiJjKkLlMmNnOoPpQqRrSsTtU
+export AWS_ACCESS_KEY_ID=ABCDEFG1234567890123
+./spark-ec2 --key-pair=awskey --identity-file=awskey.pem --region=us-west-1 --zone=us-west-1a --spark-version=1.1.0 launch my-spark-cluster
+    ```
+
 -   After everything launches, check that the cluster scheduler is up and sees
     all the slaves by going to its web UI, which will be printed at the end of
     the script (typically `http://<master-hostname>:8080`).
@@ -53,27 +64,27 @@ identify machines belonging to each cluster in the Amazon EC2 Console.
 You can also run `./spark-ec2 --help` to see more usage options. The
 following options are worth pointing out:
 
--   `--instance-type=<INSTANCE_TYPE>` can be used to specify an EC2
+-   `--instance-type=<instance-type>` can be used to specify an EC2
 instance type to use. For now, the script only supports 64-bit instance
 types, and the default type is `m1.large` (which has 2 cores and 7.5 GB
 RAM). Refer to the Amazon pages about [EC2 instance
 types](http://aws.amazon.com/ec2/instance-types) and [EC2
 pricing](http://aws.amazon.com/ec2/#pricing) for information about other
 instance types. 
--    `--region=<EC2_REGION>` specifies an EC2 region in which to launch
+-    `--region=<ec2-region>` specifies an EC2 region in which to launch
 instances. The default region is `us-east-1`.
--    `--zone=<EC2_ZONE>` can be used to specify an EC2 availability zone
+-    `--zone=<ec2-zone>` can be used to specify an EC2 availability zone
 to launch instances in. Sometimes, you will get an error because there
 is not enough capacity in one zone, and you should try to launch in
 another.
--    `--ebs-vol-size=GB` will attach an EBS volume with a given amount
+-    `--ebs-vol-size=<GB>` will attach an EBS volume with a given amount
      of space to each node so that you can have a persistent HDFS cluster
      on your nodes across cluster restarts (see below).
--    `--spot-price=PRICE` will launch the worker nodes as
+-    `--spot-price=<price>` will launch the worker nodes as
      [Spot Instances](http://aws.amazon.com/ec2/spot-instances/),
      bidding for the given maximum price (in dollars).
--    `--spark-version=VERSION` will pre-load the cluster with the
-     specified version of Spark. VERSION can be a version number
+-    `--spark-version=<version>` will pre-load the cluster with the
+     specified version of Spark. The `<version>` can be a version number
      (e.g. "0.7.3") or a specific git hash. By default, a recent
      version will be used.
 -    If one of your launches fails due to e.g. not having the right
@@ -135,11 +146,11 @@ cost you any EC2 cycles, but ***will*** continue to cost money for EBS
 storage.
 
 - To stop one of your clusters, go into the `ec2` directory and run
-`./spark-ec2 stop <cluster-name>`.
+`./spark-ec2 --region=<ec2-region> stop <cluster-name>`.
 - To restart it later, run
-`./spark-ec2 -i <key-file> start <cluster-name>`.
+`./spark-ec2 -i <key-file> --region=<ec2-region> start <cluster-name>`.
 - To ultimately destroy the cluster and stop consuming EBS space, run
-`./spark-ec2 destroy <cluster-name>` as described in the previous
+`./spark-ec2 --region=<ec2-region> destroy <cluster-name>` as described in the previous
 section.
 
 # Limitations
@@ -154,6 +165,6 @@ If you have a patch or suggestion for one of these limitations, feel free to
 
 # Accessing Data in S3
 
-Spark's file interface allows it to process data in Amazon S3 using the same URI formats that are supported for Hadoop. You can specify a path in S3 as input through a URI of the form `s3n://<bucket>/path`. You will also need to set your Amazon security credentials, either by setting the environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` before your program or through `SparkContext.hadoopConfiguration`. Full instructions on S3 access using the Hadoop input libraries can be found on the [Hadoop S3 page](http://wiki.apache.org/hadoop/AmazonS3).
+Spark's file interface allows it to process data in Amazon S3 using the same URI formats that are supported for Hadoop. You can specify a path in S3 as input through a URI of the form `s3n://<bucket>/path`. To provide AWS credentials for S3 access, launch the Spark cluster with the option `--copy-aws-credentials`. Full instructions on S3 access using the Hadoop input libraries can be found on the [Hadoop S3 page](http://wiki.apache.org/hadoop/AmazonS3).
 
 In addition to using a single input file, you can also use a directory of files as input by simply giving the path to the directory.
diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md
index 32403bc6957a2..dd73e9dc54440 100644
--- a/docs/hadoop-third-party-distributions.md
+++ b/docs/hadoop-third-party-distributions.md
@@ -11,7 +11,7 @@ with these distributions:
 
 When compiling Spark, you'll need to specify the Hadoop version by defining the `hadoop.version`
 property. For certain versions, you will need to specify additional profiles. For more detail,
-see the guide on [building with maven](building-with-maven.html#specifying-the-hadoop-version):
+see the guide on [building with maven](building-spark.html#specifying-the-hadoop-version):
 
     mvn -Dhadoop.version=1.0.4 -DskipTests clean package
     mvn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package
@@ -48,9 +48,9 @@ the _exact_ Hadoop version you are running to avoid any compatibility errors.
   </tr>
 </table>
 
-In SBT, the equivalent can be achieved by setting the SPARK_HADOOP_VERSION flag:
+In SBT, the equivalent can be achieved by setting the the `hadoop.version` property:
 
-    SPARK_HADOOP_VERSION=1.0.4 sbt/sbt assembly
+    sbt/sbt -Dhadoop.version=1.0.4 assembly
 
 # Linking Applications to the Hadoop Version
 
diff --git a/docs/img/streaming-arch.png b/docs/img/streaming-arch.png
index bc57b460fdf8b..ac35f1d34cf3d 100644
Binary files a/docs/img/streaming-arch.png and b/docs/img/streaming-arch.png differ
diff --git a/docs/img/streaming-figures.pptx b/docs/img/streaming-figures.pptx
index 1b18c2ee0ea3e..d1cc25e379f46 100644
Binary files a/docs/img/streaming-figures.pptx and b/docs/img/streaming-figures.pptx differ
diff --git a/docs/img/streaming-kinesis-arch.png b/docs/img/streaming-kinesis-arch.png
new file mode 100644
index 0000000000000..bea5fa88df985
Binary files /dev/null and b/docs/img/streaming-kinesis-arch.png differ
diff --git a/docs/index.md b/docs/index.md
index 4ac0982ae54f1..171d6ddad62f3 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -12,7 +12,7 @@ It also supports a rich set of higher-level tools including [Spark SQL](sql-prog
 
 Get Spark from the [downloads page](http://spark.apache.org/downloads.html) of the project website. This documentation is for Spark version {{site.SPARK_VERSION}}. The downloads page 
 contains Spark packages for many popular HDFS versions. If you'd like to build Spark from 
-scratch, visit [building Spark with Maven](building-with-maven.html).
+scratch, visit [Building Spark](building-spark.html).
 
 Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS). It's easy to run
 locally on one machine --- all you need is to have `java` installed on your system `PATH`,
@@ -103,12 +103,16 @@ options for deployment:
 * [Security](security.html): Spark security support
 * [Hardware Provisioning](hardware-provisioning.html): recommendations for cluster hardware
 * [3<sup>rd</sup> Party Hadoop Distributions](hadoop-third-party-distributions.html): using common Hadoop distributions
-* [Building Spark with Maven](building-with-maven.html): build Spark using the Maven system
+* Integration with other storage systems:
+  * [OpenStack Swift](storage-openstack-swift.html)
+* [Building Spark](building-spark.html): build Spark using the Maven system
 * [Contributing to Spark](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark)
+* [Supplemental Projects](https://cwiki.apache.org/confluence/display/SPARK/Supplemental+Spark+Projects): related third party Spark projects
 
 **External Resources:**
 
 * [Spark Homepage](http://spark.apache.org)
+* [Spark Wiki](https://cwiki.apache.org/confluence/display/SPARK)
 * [Mailing Lists](http://spark.apache.org/mailing-lists.html): ask questions about Spark here
 * [AMP Camps](http://ampcamp.berkeley.edu/): a series of training camps at UC Berkeley that featured talks and
   exercises about Spark, Spark Streaming, Mesos, and more. [Videos](http://ampcamp.berkeley.edu/3/),
diff --git a/docs/mllib-basics.md b/docs/mllib-basics.md
deleted file mode 100644
index 5796e16e8f99c..0000000000000
--- a/docs/mllib-basics.md
+++ /dev/null
@@ -1,527 +0,0 @@
----
-layout: global
-title: Basics - MLlib
-displayTitle: <a href="mllib-guide.html">MLlib</a> - Basics
----
-
-* Table of contents
-{:toc}
-
-MLlib supports local vectors and matrices stored on a single machine, 
-as well as distributed matrices backed by one or more RDDs.
-In the current implementation, local vectors and matrices are simple data models 
-to serve public interfaces. The underlying linear algebra operations are provided by
-[Breeze](http://www.scalanlp.org/) and [jblas](http://jblas.org/).
-A training example used in supervised learning is called "labeled point" in MLlib.
-
-## Local vector
-
-A local vector has integer-typed and 0-based indices and double-typed values, stored on a single
-machine.  MLlib supports two types of local vectors: dense and sparse.  A dense vector is backed by
-a double array representing its entry values, while a sparse vector is backed by two parallel
-arrays: indices and values.  For example, a vector $(1.0, 0.0, 3.0)$ can be represented in dense
-format as `[1.0, 0.0, 3.0]` or in sparse format as `(3, [0, 2], [1.0, 3.0])`, where `3` is the size
-of the vector.
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-
-The base class of local vectors is
-[`Vector`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector), and we provide two
-implementations: [`DenseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseVector) and
-[`SparseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.SparseVector).  We recommend
-using the factory methods implemented in
-[`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) to create local vectors.
-
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
-
-// Create a dense vector (1.0, 0.0, 3.0).
-val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
-// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries.
-val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
-// Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries.
-val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0)))
-{% endhighlight %}
-
-***Note***
-
-Scala imports `scala.collection.immutable.Vector` by default, so you have to import
-`org.apache.spark.mllib.linalg.Vector` explicitly to use MLlib's `Vector`.
-
-</div>
-
-<div data-lang="java" markdown="1">
-
-The base class of local vectors is
-[`Vector`](api/java/org/apache/spark/mllib/linalg/Vector.html), and we provide two
-implementations: [`DenseVector`](api/java/org/apache/spark/mllib/linalg/DenseVector.html) and
-[`SparseVector`](api/java/org/apache/spark/mllib/linalg/SparseVector.html).  We recommend
-using the factory methods implemented in
-[`Vectors`](api/java/org/apache/spark/mllib/linalg/Vector.html) to create local vectors.
-
-{% highlight java %}
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-
-// Create a dense vector (1.0, 0.0, 3.0).
-Vector dv = Vectors.dense(1.0, 0.0, 3.0);
-// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries.
-Vector sv = Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0});
-{% endhighlight %}
-</div>
-
-<div data-lang="python" markdown="1">
-MLlib recognizes the following types as dense vectors:
-
-* NumPy's [`array`](http://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html)
-* Python's list, e.g., `[1, 2, 3]`
-
-and the following as sparse vectors:
-
-* MLlib's [`SparseVector`](api/python/pyspark.mllib.linalg.SparseVector-class.html).
-* SciPy's
-  [`csc_matrix`](http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html#scipy.sparse.csc_matrix)
-  with a single column
-
-We recommend using NumPy arrays over lists for efficiency, and using the factory methods implemented
-in [`Vectors`](api/python/pyspark.mllib.linalg.Vectors-class.html) to create sparse vectors.
-
-{% highlight python %}
-import numpy as np
-import scipy.sparse as sps
-from pyspark.mllib.linalg import Vectors
-
-# Use a NumPy array as a dense vector.
-dv1 = np.array([1.0, 0.0, 3.0])
-# Use a Python list as a dense vector.
-dv2 = [1.0, 0.0, 3.0]
-# Create a SparseVector.
-sv1 = Vectors.sparse(3, [0, 2], [1.0, 3.0])
-# Use a single-column SciPy csc_matrix as a sparse vector.
-sv2 = sps.csc_matrix((np.array([1.0, 3.0]), np.array([0, 2]), np.array([0, 2])), shape = (3, 1))
-{% endhighlight %}
-
-</div>
-</div>
-
-## Labeled point
-
-A labeled point is a local vector, either dense or sparse, associated with a label/response.
-In MLlib, labeled points are used in supervised learning algorithms.
-We use a double to store a label, so we can use labeled points in both regression and classification.
-For binary classification, label should be either $0$ (negative) or $1$ (positive).
-For multiclass classification, labels should be class indices staring from zero: $0, 1, 2, \ldots$.
-
-<div class="codetabs">
-
-<div data-lang="scala" markdown="1">
-
-A labeled point is represented by the case class
-[`LabeledPoint`](api/scala/index.html#org.apache.spark.mllib.regression.LabeledPoint).
-
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.regression.LabeledPoint
-
-// Create a labeled point with a positive label and a dense feature vector.
-val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))
-
-// Create a labeled point with a negative label and a sparse feature vector.
-val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)))
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-
-A labeled point is represented by
-[`LabeledPoint`](api/java/org/apache/spark/mllib/regression/LabeledPoint.html).
-
-{% highlight java %}
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.regression.LabeledPoint;
-
-// Create a labeled point with a positive label and a dense feature vector.
-LabeledPoint pos = new LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0));
-
-// Create a labeled point with a negative label and a sparse feature vector.
-LabeledPoint neg = new LabeledPoint(1.0, Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0}));
-{% endhighlight %}
-</div>
-
-<div data-lang="python" markdown="1">
-
-A labeled point is represented by
-[`LabeledPoint`](api/python/pyspark.mllib.regression.LabeledPoint-class.html).
-
-{% highlight python %}
-from pyspark.mllib.linalg import SparseVector
-from pyspark.mllib.regression import LabeledPoint
-
-# Create a labeled point with a positive label and a dense feature vector.
-pos = LabeledPoint(1.0, [1.0, 0.0, 3.0])
-
-# Create a labeled point with a negative label and a sparse feature vector.
-neg = LabeledPoint(0.0, SparseVector(3, [0, 2], [1.0, 3.0]))
-{% endhighlight %}
-</div>
-</div>
-
-***Sparse data***
-
-It is very common in practice to have sparse training data.  MLlib supports reading training
-examples stored in `LIBSVM` format, which is the default format used by
-[`LIBSVM`](http://www.csie.ntu.edu.tw/~cjlin/libsvm/) and
-[`LIBLINEAR`](http://www.csie.ntu.edu.tw/~cjlin/liblinear/).  It is a text format.  Each line
-represents a labeled sparse feature vector using the following format:
-
-~~~
-label index1:value1 index2:value2 ...
-~~~
-
-where the indices are one-based and in ascending order. 
-After loading, the feature indices are converted to zero-based.
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-
-[`MLUtils.loadLibSVMFile`](api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) reads training
-examples stored in LIBSVM format.
-
-{% highlight scala %}
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.rdd.RDD
-
-val examples: RDD[LabeledPoint] = MLUtils.loadLibSVMFile(sc, "mllib/data/sample_libsvm_data.txt")
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-[`MLUtils.loadLibSVMFile`](api/java/org/apache/spark/mllib/util/MLUtils.html) reads training
-examples stored in LIBSVM format.
-
-{% highlight java %}
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.util.MLUtils;
-import org.apache.spark.api.java.JavaRDD;
-
-JavaRDD<LabeledPoint> examples = 
-  MLUtils.loadLibSVMFile(jsc.sc(), "mllib/data/sample_libsvm_data.txt").toJavaRDD();
-{% endhighlight %}
-</div>
-
-<div data-lang="python" markdown="1">
-[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.util.MLUtils-class.html) reads training
-examples stored in LIBSVM format.
-
-{% highlight python %}
-from pyspark.mllib.util import MLUtils
-
-examples = MLUtils.loadLibSVMFile(sc, "mllib/data/sample_libsvm_data.txt")
-{% endhighlight %}
-</div>
-</div>
-
-## Local matrix
-
-A local matrix has integer-typed row and column indices and double-typed values, stored on a single
-machine.  MLlib supports dense matrix, whose entry values are stored in a single double array in
-column major.  For example, the following matrix `\[ \begin{pmatrix}
-1.0 & 2.0 \\
-3.0 & 4.0 \\
-5.0 & 6.0
-\end{pmatrix}
-\]`
-is stored in a one-dimensional array `[1.0, 3.0, 5.0, 2.0, 4.0, 6.0]` with the matrix size `(3, 2)`.
-We are going to add sparse matrix in the next release.
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-
-The base class of local matrices is
-[`Matrix`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrix), and we provide one
-implementation: [`DenseMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseMatrix).
-Sparse matrix will be added in the next release.  We recommend using the factory methods implemented
-in [`Matrices`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices) to create local
-matrices.
-
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.{Matrix, Matrices}
-
-// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
-val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-
-The base class of local matrices is
-[`Matrix`](api/java/org/apache/spark/mllib/linalg/Matrix.html), and we provide one
-implementation: [`DenseMatrix`](api/java/org/apache/spark/mllib/linalg/DenseMatrix.html).
-Sparse matrix will be added in the next release.  We recommend using the factory methods implemented
-in [`Matrices`](api/java/org/apache/spark/mllib/linalg/Matrices.html) to create local
-matrices.
-
-{% highlight java %}
-import org.apache.spark.mllib.linalg.Matrix;
-import org.apache.spark.mllib.linalg.Matrices;
-
-// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
-Matrix dm = Matrices.dense(3, 2, new double[] {1.0, 3.0, 5.0, 2.0, 4.0, 6.0});
-{% endhighlight %}
-</div>
-
-</div>
-
-## Distributed matrix
-
-A distributed matrix has long-typed row and column indices and double-typed values, stored
-distributively in one or more RDDs.  It is very important to choose the right format to store large
-and distributed matrices.  Converting a distributed matrix to a different format may require a
-global shuffle, which is quite expensive.  We implemented three types of distributed matrices in
-this release and will add more types in the future.
-
-The basic type is called `RowMatrix`. A `RowMatrix` is a row-oriented distributed
-matrix without meaningful row indices, e.g., a collection of feature vectors.
-It is backed by an RDD of its rows, where each row is a local vector.
-We assume that the number of columns is not huge for a `RowMatrix`.
-An `IndexedRowMatrix` is similar to a `RowMatrix` but with row indices,
-which can be used for identifying rows and joins.
-A `CoordinateMatrix` is a distributed matrix stored in [coordinate list (COO)](https://en.wikipedia.org/wiki/Sparse_matrix) format,
-backed by an RDD of its entries.
-
-***Note***
-
-The underlying RDDs of a distributed matrix must be deterministic, because we cache the matrix size.
-It is always error-prone to have non-deterministic RDDs.
-
-### RowMatrix
-
-A `RowMatrix` is a row-oriented distributed matrix without meaningful row indices, backed by an RDD
-of its rows, where each row is a local vector.  This is similar to `data matrix` in the context of
-multivariate statistics.  Since each row is represented by a local vector, the number of columns is
-limited by the integer range but it should be much smaller in practice.
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-
-A [`RowMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) can be
-created from an `RDD[Vector]` instance.  Then we can compute its column summary statistics.
-
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.linalg.distributed.RowMatrix
-
-val rows: RDD[Vector] = ... // an RDD of local vectors
-// Create a RowMatrix from an RDD[Vector].
-val mat: RowMatrix = new RowMatrix(rows)
-
-// Get its size.
-val m = mat.numRows()
-val n = mat.numCols()
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-
-A [`RowMatrix`](api/java/org/apache/spark/mllib/linalg/distributed/RowMatrix.html) can be
-created from a `JavaRDD<Vector>` instance.  Then we can compute its column summary statistics.
-
-{% highlight java %}
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-
-JavaRDD<Vector> rows = ... // a JavaRDD of local vectors
-// Create a RowMatrix from an JavaRDD<Vector>.
-RowMatrix mat = new RowMatrix(rows.rdd());
-
-// Get its size.
-long m = mat.numRows();
-long n = mat.numCols();
-{% endhighlight %}
-</div>
-</div>
-
-#### Multivariate summary statistics
-
-We provide column summary statistics for `RowMatrix`. 
-If the number of columns is not large, say, smaller than 3000, you can also compute
-the covariance matrix as a local matrix, which requires $\mathcal{O}(n^2)$ storage where $n$ is the
-number of columns. The total CPU time is $\mathcal{O}(m n^2)$, where $m$ is the number of rows,
-which could be faster if the rows are sparse.
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-
-[`RowMatrix#computeColumnSummaryStatistics`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) returns an instance of
-[`MultivariateStatisticalSummary`](api/scala/index.html#org.apache.spark.mllib.stat.MultivariateStatisticalSummary),
-which contains the column-wise max, min, mean, variance, and number of nonzeros, as well as the
-total count.
-
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Matrix
-import org.apache.spark.mllib.linalg.distributed.RowMatrix
-import org.apache.spark.mllib.stat.MultivariateStatisticalSummary
-
-val mat: RowMatrix = ... // a RowMatrix
-
-// Compute column summary statistics.
-val summary: MultivariateStatisticalSummary = mat.computeColumnSummaryStatistics()
-println(summary.mean) // a dense vector containing the mean value for each column
-println(summary.variance) // column-wise variance
-println(summary.numNonzeros) // number of nonzeros in each column
-
-// Compute the covariance matrix.
-val cov: Matrix = mat.computeCovariance()
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-
-[`RowMatrix#computeColumnSummaryStatistics`](api/java/org/apache/spark/mllib/linalg/distributed/RowMatrix.html#computeColumnSummaryStatistics()) returns an instance of
-[`MultivariateStatisticalSummary`](api/java/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.html),
-which contains the column-wise max, min, mean, variance, and number of nonzeros, as well as the
-total count.
-
-{% highlight java %}
-import org.apache.spark.mllib.linalg.Matrix;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-import org.apache.spark.mllib.stat.MultivariateStatisticalSummary;
-
-RowMatrix mat = ... // a RowMatrix
-
-// Compute column summary statistics.
-MultivariateStatisticalSummary summary = mat.computeColumnSummaryStatistics();
-System.out.println(summary.mean()); // a dense vector containing the mean value for each column
-System.out.println(summary.variance()); // column-wise variance
-System.out.println(summary.numNonzeros()); // number of nonzeros in each column
-
-// Compute the covariance matrix.
-Matrix cov = mat.computeCovariance();
-{% endhighlight %}
-</div>
-</div>
-
-### IndexedRowMatrix
-
-An `IndexedRowMatrix` is similar to a `RowMatrix` but with meaningful row indices.  It is backed by
-an RDD of indexed rows, which each row is represented by its index (long-typed) and a local vector.
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-
-An
-[`IndexedRowMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix)
-can be created from an `RDD[IndexedRow]` instance, where
-[`IndexedRow`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.IndexedRow) is a
-wrapper over `(Long, Vector)`.  An `IndexedRowMatrix` can be converted to a `RowMatrix` by dropping
-its row indices.
-
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
-
-val rows: RDD[IndexedRow] = ... // an RDD of indexed rows
-// Create an IndexedRowMatrix from an RDD[IndexedRow].
-val mat: IndexedRowMatrix = new IndexedRowMatrix(rows)
-
-// Get its size.
-val m = mat.numRows()
-val n = mat.numCols()
-
-// Drop its row indices.
-val rowMat: RowMatrix = mat.toRowMatrix()
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-
-An
-[`IndexedRowMatrix`](api/java/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.html)
-can be created from an `JavaRDD<IndexedRow>` instance, where
-[`IndexedRow`](api/java/org/apache/spark/mllib/linalg/distributed/IndexedRow.html) is a
-wrapper over `(long, Vector)`.  An `IndexedRowMatrix` can be converted to a `RowMatrix` by dropping
-its row indices.
-
-{% highlight java %}
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.distributed.IndexedRow;
-import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-
-JavaRDD<IndexedRow> rows = ... // a JavaRDD of indexed rows
-// Create an IndexedRowMatrix from a JavaRDD<IndexedRow>.
-IndexedRowMatrix mat = new IndexedRowMatrix(rows.rdd());
-
-// Get its size.
-long m = mat.numRows();
-long n = mat.numCols();
-
-// Drop its row indices.
-RowMatrix rowMat = mat.toRowMatrix();
-{% endhighlight %}
-</div></div>
-
-### CoordinateMatrix
-
-A `CoordinateMatrix` is a distributed matrix backed by an RDD of its entries.  Each entry is a tuple
-of `(i: Long, j: Long, value: Double)`, where `i` is the row index, `j` is the column index, and
-`value` is the entry value.  A `CoordinateMatrix` should be used only in the case when both
-dimensions of the matrix are huge and the matrix is very sparse.
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-
-A
-[`CoordinateMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.CoordinateMatrix)
-can be created from an `RDD[MatrixEntry]` instance, where
-[`MatrixEntry`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.MatrixEntry) is a
-wrapper over `(Long, Long, Double)`.  A `CoordinateMatrix` can be converted to a `IndexedRowMatrix`
-with sparse rows by calling `toIndexedRowMatrix`.  In this release, we do not provide other
-computation for `CoordinateMatrix`.
-
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}
-
-val entries: RDD[MatrixEntry] = ... // an RDD of matrix entries
-// Create a CoordinateMatrix from an RDD[MatrixEntry].
-val mat: CoordinateMatrix = new CoordinateMatrix(entries)
-
-// Get its size.
-val m = mat.numRows()
-val n = mat.numCols()
-
-// Convert it to an IndexRowMatrix whose rows are sparse vectors.
-val indexedRowMatrix = mat.toIndexedRowMatrix()
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-
-A
-[`CoordinateMatrix`](api/java/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.html)
-can be created from a `JavaRDD<MatrixEntry>` instance, where
-[`MatrixEntry`](api/java/org/apache/spark/mllib/linalg/distributed/MatrixEntry.html) is a
-wrapper over `(long, long, double)`.  A `CoordinateMatrix` can be converted to a `IndexedRowMatrix`
-with sparse rows by calling `toIndexedRowMatrix`.
-
-{% highlight java %}
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
-import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
-import org.apache.spark.mllib.linalg.distributed.MatrixEntry;
-
-JavaRDD<MatrixEntry> entries = ... // a JavaRDD of matrix entries
-// Create a CoordinateMatrix from a JavaRDD<MatrixEntry>.
-CoordinateMatrix mat = new CoordinateMatrix(entries.rdd());
-
-// Get its size.
-long m = mat.numRows();
-long n = mat.numCols();
-
-// Convert it to an IndexRowMatrix whose rows are sparse vectors.
-IndexedRowMatrix indexedRowMatrix = mat.toIndexedRowMatrix();
-{% endhighlight %}
-</div>
-</div>
diff --git a/docs/mllib-classification-regression.md b/docs/mllib-classification-regression.md
new file mode 100644
index 0000000000000..719cc95767b00
--- /dev/null
+++ b/docs/mllib-classification-regression.md
@@ -0,0 +1,37 @@
+---
+layout: global
+title: Classification and Regression - MLlib
+displayTitle: <a href="mllib-guide.html">MLlib</a> - Classification and Regression
+---
+
+MLlib supports various methods for 
+[binary classification](http://en.wikipedia.org/wiki/Binary_classification),
+[multiclass
+classification](http://en.wikipedia.org/wiki/Multiclass_classification), and
+[regression analysis](http://en.wikipedia.org/wiki/Regression_analysis). The table below outlines
+the supported algorithms for each type of problem.
+
+<table class="table">
+  <thead>
+    <tr><th>Problem Type</th><th>Supported Methods</th></tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>Binary Classification</td><td>linear SVMs, logistic regression, decision trees, naive Bayes</td>
+    </tr>
+    <tr>
+      <td>Multiclass Classification</td><td>decision trees, naive Bayes</td>
+    </tr>
+    <tr>
+      <td>Regression</td><td>linear least squares, Lasso, ridge regression, decision trees</td>
+    </tr>
+  </tbody>
+</table>
+
+More details for these methods can be found here:
+
+* [Linear models](mllib-linear-methods.html)
+  * [binary classification (SVMs, logistic regression)](mllib-linear-methods.html#binary-classification)
+  * [linear regression (least squares, Lasso, ridge)](mllib-linear-methods.html#linear-least-squares-lasso-and-ridge-regression)
+* [Decision trees](mllib-decision-tree.html)
+* [Naive Bayes](mllib-naive-bayes.html)
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index 429cdf8d40cec..c696ae9c8e8c8 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -34,11 +34,11 @@ a given dataset, the algorithm returns the best clustering result).
 * *initializationSteps* determines the number of steps in the k-means\|\| algorithm.
 * *epsilon* determines the distance threshold within which we consider k-means to have converged. 
 
-## Examples
+### Examples
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-Following code snippets can be executed in `spark-shell`.
+The following code snippets can be executed in `spark-shell`.
 
 In the following example after loading and parsing data, we use the
 [`KMeans`](api/scala/index.html#org.apache.spark.mllib.clustering.KMeans) object to cluster the data
@@ -51,8 +51,8 @@ import org.apache.spark.mllib.clustering.KMeans
 import org.apache.spark.mllib.linalg.Vectors
 
 // Load and parse the data
-val data = sc.textFile("data/kmeans_data.txt")
-val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble)))
+val data = sc.textFile("data/mllib/kmeans_data.txt")
+val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache()
 
 // Cluster the data into two classes using KMeans
 val numClusters = 2
@@ -69,11 +69,54 @@ println("Within Set Sum of Squared Errors = " + WSSSE)
 All of MLlib's methods use Java-friendly types, so you can import and call them there the same
 way you do in Scala. The only caveat is that the methods take Scala RDD objects, while the
 Spark Java API uses a separate `JavaRDD` class. You can convert a Java RDD to a Scala one by
-calling `.rdd()` on your `JavaRDD` object.
+calling `.rdd()` on your `JavaRDD` object. A self-contained application example
+that is equivalent to the provided example in Scala is given below:
+
+{% highlight java %}
+import org.apache.spark.api.java.*;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.mllib.clustering.KMeans;
+import org.apache.spark.mllib.clustering.KMeansModel;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.SparkConf;
+
+public class KMeansExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("K-means Example");
+    JavaSparkContext sc = new JavaSparkContext(conf);
+
+    // Load and parse data
+    String path = "data/mllib/kmeans_data.txt";
+    JavaRDD<String> data = sc.textFile(path);
+    JavaRDD<Vector> parsedData = data.map(
+      new Function<String, Vector>() {
+        public Vector call(String s) {
+          String[] sarray = s.split(" ");
+          double[] values = new double[sarray.length];
+          for (int i = 0; i < sarray.length; i++)
+            values[i] = Double.parseDouble(sarray[i]);
+          return Vectors.dense(values);
+        }
+      }
+    );
+    parsedData.cache();
+
+    // Cluster the data into two classes using KMeans
+    int numClusters = 2;
+    int numIterations = 20;
+    KMeansModel clusters = KMeans.train(parsedData.rdd(), numClusters, numIterations);
+
+    // Evaluate clustering by computing Within Set Sum of Squared Errors
+    double WSSSE = clusters.computeCost(parsedData.rdd());
+    System.out.println("Within Set Sum of Squared Errors = " + WSSSE);
+  }
+}
+{% endhighlight %}
 </div>
 
 <div data-lang="python" markdown="1">
-Following examples can be tested in the PySpark shell.
+The following examples can be tested in the PySpark shell.
 
 In the following example after loading and parsing data, we use the KMeans object to cluster the
 data into two clusters. The number of desired clusters is passed to the algorithm. We then compute
@@ -86,7 +129,7 @@ from numpy import array
 from math import sqrt
 
 # Load and parse the data
-data = sc.textFile("data/kmeans_data.txt")
+data = sc.textFile("data/mllib/kmeans_data.txt")
 parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')]))
 
 # Build the model (cluster the data)
@@ -104,3 +147,103 @@ print("Within Set Sum of Squared Error = " + str(WSSSE))
 </div>
 
 </div>
+
+In order to run the above application, follow the instructions
+provided in the [Self-Contained Applications](quick-start.html#self-contained-applications)
+section of the Spark
+Quick Start guide. Be sure to also include *spark-mllib* to your build file as
+a dependency.
+
+## Streaming clustering
+
+When data arrive in a stream, we may want to estimate clusters dynamically, 
+updating them as new data arrive. MLlib provides support for streaming k-means clustering, 
+with parameters to control the decay (or "forgetfulness") of the estimates. The algorithm 
+uses a generalization of the mini-batch k-means update rule. For each batch of data, we assign 
+all points to their nearest cluster, compute new cluster centers, then update each cluster using:
+
+`\begin{equation}
+    c_{t+1} = \frac{c_tn_t\alpha + x_tm_t}{n_t\alpha+m_t}
+\end{equation}`
+`\begin{equation}
+    n_{t+1} = n_t + m_t  
+\end{equation}`
+
+Where `$c_t$` is the previous center for the cluster, `$n_t$` is the number of points assigned 
+to the cluster thus far, `$x_t$` is the new cluster center from the current batch, and `$m_t$` 
+is the number of points added to the cluster in the current batch. The decay factor `$\alpha$` 
+can be used to ignore the past: with `$\alpha$=1` all data will be used from the beginning; 
+with `$\alpha$=0` only the most recent data will be used. This is analogous to an 
+exponentially-weighted moving average. 
+
+The decay can be specified using a `halfLife` parameter, which determines the 
+correct decay factor `a` such that, for data acquired
+at time `t`, its contribution by time `t + halfLife` will have dropped to 0.5.
+The unit of time can be specified either as `batches` or `points` and the update rule
+will be adjusted accordingly.
+
+### Examples
+
+This example shows how to estimate clusters on streaming data.
+
+<div class="codetabs">
+
+<div data-lang="scala" markdown="1">
+
+First we import the neccessary classes.
+
+{% highlight scala %}
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.clustering.StreamingKMeans
+
+{% endhighlight %}
+
+Then we make an input stream of vectors for training, as well as a stream of labeled data 
+points for testing. We assume a StreamingContext `ssc` has been created, see 
+[Spark Streaming Programming Guide](streaming-programming-guide.html#initializing) for more info.  
+
+{% highlight scala %}
+
+val trainingData = ssc.textFileStream("/training/data/dir").map(Vectors.parse)
+val testData = ssc.textFileStream("/testing/data/dir").map(LabeledPoint.parse)
+
+{% endhighlight %}
+
+We create a model with random clusters and specify the number of clusters to find
+
+{% highlight scala %}
+
+val numDimensions = 3
+val numClusters = 2
+val model = new StreamingKMeans()
+  .setK(numClusters)
+  .setDecayFactor(1.0)
+  .setRandomCenters(numDimensions, 0.0)
+
+{% endhighlight %}
+
+Now register the streams for training and testing and start the job, printing 
+the predicted cluster assignments on new data points as they arrive.
+
+{% highlight scala %}
+
+model.trainOn(trainingData)
+model.predictOnValues(testData).print()
+
+ssc.start()
+ssc.awaitTermination()
+ 
+{% endhighlight %}
+
+As you add new text files with data the cluster centers will update. Each training 
+point should be formatted as `[x1, x2, x3]`, and each test data point
+should be formatted as `(y, [x1, x2, x3])`, where `y` is some useful label or identifier 
+(e.g. a true category assignment). Anytime a text file is placed in `/training/data/dir` 
+the model will update. Anytime a text file is placed in `/testing/data/dir` 
+you will see predictions. With new data, the cluster centers will change!
+
+</div>
+
+</div>
diff --git a/docs/mllib-collaborative-filtering.md b/docs/mllib-collaborative-filtering.md
index d51002f015670..2094963392295 100644
--- a/docs/mllib-collaborative-filtering.md
+++ b/docs/mllib-collaborative-filtering.md
@@ -14,13 +14,13 @@ is commonly used for recommender systems.  These techniques aim to fill in the
 missing entries of a user-item association matrix.  MLlib currently supports
 model-based collaborative filtering, in which users and products are described
 by a small set of latent factors that can be used to predict missing entries.
-In particular, we implement the [alternating least squares
+MLlib uses the [alternating least squares
 (ALS)](http://dl.acm.org/citation.cfm?id=1608614)
 algorithm to learn these latent factors. The implementation in MLlib has the
 following parameters:
 
 * *numBlocks* is the number of blocks used to parallelize computation (set to -1 to auto-configure).
-* *rank* is the number of latent factors in our model.
+* *rank* is the number of latent factors in the model.
 * *iterations* is the number of iterations to run.
 * *lambda* specifies the regularization parameter in ALS.
 * *implicitPrefs* specifies whether to use the *explicit feedback* ALS variant or one adapted for
@@ -43,6 +43,17 @@ level of confidence in observed user preferences, rather than explicit ratings g
 model then tries to find latent factors that can be used to predict the expected preference of a
 user for an item.
 
+### Scaling of the regularization parameter
+
+Since v1.1, we scale the regularization parameter `lambda` in solving each least squares problem by
+the number of ratings the user generated in updating user factors,
+or the number of ratings the product received in updating product factors.
+This approach is named "ALS-WR" and discussed in the paper
+"[Large-Scale Parallel Collaborative Filtering for the Netflix Prize](http://dx.doi.org/10.1007/978-3-540-68880-8_32)".
+It makes `lambda` less dependent on the scale of the dataset.
+So we can apply the best parameter learned from a sampled subset to the full dataset
+and expect similar performance.
+
 ## Examples
 
 <div class="codetabs">
@@ -58,7 +69,7 @@ import org.apache.spark.mllib.recommendation.ALS
 import org.apache.spark.mllib.recommendation.Rating
 
 // Load and parse the data
-val data = sc.textFile("mllib/data/als/test.data")
+val data = sc.textFile("data/mllib/als/test.data")
 val ratings = data.map(_.split(',') match { case Array(user, item, rate) =>
     Rating(user.toInt, item.toInt, rate.toDouble)
   })
@@ -86,8 +97,8 @@ val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
 println("Mean Squared Error = " + MSE)
 {% endhighlight %}
 
-If the rating matrix is derived from other source of information (i.e., it is inferred from
-other signals), you can use the trainImplicit method to get better results.
+If the rating matrix is derived from another source of information (e.g., it is inferred from
+other signals), you can use the `trainImplicit` method to get better results.
 
 {% highlight scala %}
 val alpha = 0.01
@@ -99,7 +110,80 @@ val model = ALS.trainImplicit(ratings, rank, numIterations, alpha)
 All of MLlib's methods use Java-friendly types, so you can import and call them there the same
 way you do in Scala. The only caveat is that the methods take Scala RDD objects, while the
 Spark Java API uses a separate `JavaRDD` class. You can convert a Java RDD to a Scala one by
-calling `.rdd()` on your `JavaRDD` object.
+calling `.rdd()` on your `JavaRDD` object. A self-contained application example
+that is equivalent to the provided example in Scala is given bellow:
+
+{% highlight java %}
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.mllib.recommendation.ALS;
+import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
+import org.apache.spark.mllib.recommendation.Rating;
+import org.apache.spark.SparkConf;
+
+public class CollaborativeFiltering {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("Collaborative Filtering Example");
+    JavaSparkContext sc = new JavaSparkContext(conf);
+
+    // Load and parse the data
+    String path = "data/mllib/als/test.data";
+    JavaRDD<String> data = sc.textFile(path);
+    JavaRDD<Rating> ratings = data.map(
+      new Function<String, Rating>() {
+        public Rating call(String s) {
+          String[] sarray = s.split(",");
+          return new Rating(Integer.parseInt(sarray[0]), Integer.parseInt(sarray[1]), 
+                            Double.parseDouble(sarray[2]));
+        }
+      }
+    );
+
+    // Build the recommendation model using ALS
+    int rank = 10;
+    int numIterations = 20;
+    MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), rank, numIterations, 0.01); 
+
+    // Evaluate the model on rating data
+    JavaRDD<Tuple2<Object, Object>> userProducts = ratings.map(
+      new Function<Rating, Tuple2<Object, Object>>() {
+        public Tuple2<Object, Object> call(Rating r) {
+          return new Tuple2<Object, Object>(r.user(), r.product());
+        }
+      }
+    );
+    JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions = JavaPairRDD.fromJavaRDD(
+      model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD().map(
+        new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
+          public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
+            return new Tuple2<Tuple2<Integer, Integer>, Double>(
+              new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
+          }
+        }
+    ));
+    JavaRDD<Tuple2<Double, Double>> ratesAndPreds = 
+      JavaPairRDD.fromJavaRDD(ratings.map(
+        new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
+          public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
+            return new Tuple2<Tuple2<Integer, Integer>, Double>(
+              new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
+          }
+        }
+    )).join(predictions).values();
+    double MSE = JavaDoubleRDD.fromRDD(ratesAndPreds.map(
+      new Function<Tuple2<Double, Double>, Object>() {
+        public Object call(Tuple2<Double, Double> pair) {
+          Double err = pair._1() - pair._2();
+          return err * err;
+        }
+      }
+    ).rdd()).mean();
+    System.out.println("Mean Squared Error = " + MSE);
+  }
+}
+{% endhighlight %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -112,7 +196,7 @@ from pyspark.mllib.recommendation import ALS
 from numpy import array
 
 # Load and parse the data
-data = sc.textFile("mllib/data/als/test.data")
+data = sc.textFile("data/mllib/als/test.data")
 ratings = data.map(lambda line: array([float(x) for x in line.split(',')]))
 
 # Build the recommendation model using Alternating Least Squares
@@ -139,7 +223,13 @@ model = ALS.trainImplicit(ratings, rank, numIterations, alpha = 0.01)
 
 </div>
 
+In order to run the above application, follow the instructions
+provided in the [Self-Contained Applications](quick-start.html#self-contained-applications)
+section of the Spark
+Quick Start guide. Be sure to also include *spark-mllib* to your build file as
+a dependency.
+
 ## Tutorial
 
-[AMP Camp](http://ampcamp.berkeley.edu/) provides a hands-on tutorial for
-[personalized movie recommendation with MLlib](http://ampcamp.berkeley.edu/big-data-mini-course/movie-recommendation-with-mllib.html).
+The [training exercises](https://databricks-training.s3.amazonaws.com/index.html) from the Spark Summit 2014 include a hands-on tutorial for
+[personalized movie recommendation with MLlib](https://databricks-training.s3.amazonaws.com/movie-recommendation-with-mllib.html).
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
new file mode 100644
index 0000000000000..101dc2f8695f3
--- /dev/null
+++ b/docs/mllib-data-types.md
@@ -0,0 +1,468 @@
+---
+layout: global
+title: Data Types - MLlib
+displayTitle: <a href="mllib-guide.html">MLlib</a> - Data Types
+---
+
+* Table of contents
+{:toc}
+
+MLlib supports local vectors and matrices stored on a single machine, 
+as well as distributed matrices backed by one or more RDDs.
+Local vectors and local matrices are simple data models 
+that serve as public interfaces. The underlying linear algebra operations are provided by
+[Breeze](http://www.scalanlp.org/) and [jblas](http://jblas.org/).
+A training example used in supervised learning is called a "labeled point" in MLlib.
+
+## Local vector
+
+A local vector has integer-typed and 0-based indices and double-typed values, stored on a single
+machine.  MLlib supports two types of local vectors: dense and sparse.  A dense vector is backed by
+a double array representing its entry values, while a sparse vector is backed by two parallel
+arrays: indices and values.  For example, a vector `(1.0, 0.0, 3.0)` can be represented in dense
+format as `[1.0, 0.0, 3.0]` or in sparse format as `(3, [0, 2], [1.0, 3.0])`, where `3` is the size
+of the vector.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+The base class of local vectors is
+[`Vector`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector), and we provide two
+implementations: [`DenseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseVector) and
+[`SparseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.SparseVector).  We recommend
+using the factory methods implemented in
+[`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) to create local vectors.
+
+{% highlight scala %}
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+
+// Create a dense vector (1.0, 0.0, 3.0).
+val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
+// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries.
+val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
+// Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries.
+val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0)))
+{% endhighlight %}
+
+***Note:***
+Scala imports `scala.collection.immutable.Vector` by default, so you have to import
+`org.apache.spark.mllib.linalg.Vector` explicitly to use MLlib's `Vector`.
+
+</div>
+
+<div data-lang="java" markdown="1">
+
+The base class of local vectors is
+[`Vector`](api/java/org/apache/spark/mllib/linalg/Vector.html), and we provide two
+implementations: [`DenseVector`](api/java/org/apache/spark/mllib/linalg/DenseVector.html) and
+[`SparseVector`](api/java/org/apache/spark/mllib/linalg/SparseVector.html).  We recommend
+using the factory methods implemented in
+[`Vectors`](api/java/org/apache/spark/mllib/linalg/Vector.html) to create local vectors.
+
+{% highlight java %}
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+
+// Create a dense vector (1.0, 0.0, 3.0).
+Vector dv = Vectors.dense(1.0, 0.0, 3.0);
+// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries.
+Vector sv = Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0});
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+MLlib recognizes the following types as dense vectors:
+
+* NumPy's [`array`](http://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html)
+* Python's list, e.g., `[1, 2, 3]`
+
+and the following as sparse vectors:
+
+* MLlib's [`SparseVector`](api/python/pyspark.mllib.linalg.SparseVector-class.html).
+* SciPy's
+  [`csc_matrix`](http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html#scipy.sparse.csc_matrix)
+  with a single column
+
+We recommend using NumPy arrays over lists for efficiency, and using the factory methods implemented
+in [`Vectors`](api/python/pyspark.mllib.linalg.Vectors-class.html) to create sparse vectors.
+
+{% highlight python %}
+import numpy as np
+import scipy.sparse as sps
+from pyspark.mllib.linalg import Vectors
+
+# Use a NumPy array as a dense vector.
+dv1 = np.array([1.0, 0.0, 3.0])
+# Use a Python list as a dense vector.
+dv2 = [1.0, 0.0, 3.0]
+# Create a SparseVector.
+sv1 = Vectors.sparse(3, [0, 2], [1.0, 3.0])
+# Use a single-column SciPy csc_matrix as a sparse vector.
+sv2 = sps.csc_matrix((np.array([1.0, 3.0]), np.array([0, 2]), np.array([0, 2])), shape = (3, 1))
+{% endhighlight %}
+
+</div>
+</div>
+
+## Labeled point
+
+A labeled point is a local vector, either dense or sparse, associated with a label/response.
+In MLlib, labeled points are used in supervised learning algorithms.
+We use a double to store a label, so we can use labeled points in both regression and classification.
+For binary classification, a label should be either `0` (negative) or `1` (positive).
+For multiclass classification, labels should be class indices starting from zero: `0, 1, 2, ...`.
+
+<div class="codetabs">
+
+<div data-lang="scala" markdown="1">
+
+A labeled point is represented by the case class
+[`LabeledPoint`](api/scala/index.html#org.apache.spark.mllib.regression.LabeledPoint).
+
+{% highlight scala %}
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+
+// Create a labeled point with a positive label and a dense feature vector.
+val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))
+
+// Create a labeled point with a negative label and a sparse feature vector.
+val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)))
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+A labeled point is represented by
+[`LabeledPoint`](api/java/org/apache/spark/mllib/regression/LabeledPoint.html).
+
+{% highlight java %}
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.regression.LabeledPoint;
+
+// Create a labeled point with a positive label and a dense feature vector.
+LabeledPoint pos = new LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0));
+
+// Create a labeled point with a negative label and a sparse feature vector.
+LabeledPoint neg = new LabeledPoint(1.0, Vectors.sparse(3, new int[] {0, 2}, new double[] {1.0, 3.0}));
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+
+A labeled point is represented by
+[`LabeledPoint`](api/python/pyspark.mllib.regression.LabeledPoint-class.html).
+
+{% highlight python %}
+from pyspark.mllib.linalg import SparseVector
+from pyspark.mllib.regression import LabeledPoint
+
+# Create a labeled point with a positive label and a dense feature vector.
+pos = LabeledPoint(1.0, [1.0, 0.0, 3.0])
+
+# Create a labeled point with a negative label and a sparse feature vector.
+neg = LabeledPoint(0.0, SparseVector(3, [0, 2], [1.0, 3.0]))
+{% endhighlight %}
+</div>
+</div>
+
+***Sparse data***
+
+It is very common in practice to have sparse training data.  MLlib supports reading training
+examples stored in `LIBSVM` format, which is the default format used by
+[`LIBSVM`](http://www.csie.ntu.edu.tw/~cjlin/libsvm/) and
+[`LIBLINEAR`](http://www.csie.ntu.edu.tw/~cjlin/liblinear/).  It is a text format in which each line
+represents a labeled sparse feature vector using the following format:
+
+~~~
+label index1:value1 index2:value2 ...
+~~~
+
+where the indices are one-based and in ascending order. 
+After loading, the feature indices are converted to zero-based.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+[`MLUtils.loadLibSVMFile`](api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) reads training
+examples stored in LIBSVM format.
+
+{% highlight scala %}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+
+val examples: RDD[LabeledPoint] = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+[`MLUtils.loadLibSVMFile`](api/java/org/apache/spark/mllib/util/MLUtils.html) reads training
+examples stored in LIBSVM format.
+
+{% highlight java %}
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.api.java.JavaRDD;
+
+JavaRDD<LabeledPoint> examples = 
+  MLUtils.loadLibSVMFile(jsc.sc(), "data/mllib/sample_libsvm_data.txt").toJavaRDD();
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.util.MLUtils-class.html) reads training
+examples stored in LIBSVM format.
+
+{% highlight python %}
+from pyspark.mllib.util import MLUtils
+
+examples = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+{% endhighlight %}
+</div>
+</div>
+
+## Local matrix
+
+A local matrix has integer-typed row and column indices and double-typed values, stored on a single
+machine.  MLlib supports dense matrices, whose entry values are stored in a single double array in
+column major.  For example, the following matrix `\[ \begin{pmatrix}
+1.0 & 2.0 \\
+3.0 & 4.0 \\
+5.0 & 6.0
+\end{pmatrix}
+\]`
+is stored in a one-dimensional array `[1.0, 3.0, 5.0, 2.0, 4.0, 6.0]` with the matrix size `(3, 2)`.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+The base class of local matrices is
+[`Matrix`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrix), and we provide one
+implementation: [`DenseMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseMatrix).
+We recommend using the factory methods implemented
+in [`Matrices`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices) to create local
+matrices.
+
+{% highlight scala %}
+import org.apache.spark.mllib.linalg.{Matrix, Matrices}
+
+// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
+val dm: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+The base class of local matrices is
+[`Matrix`](api/java/org/apache/spark/mllib/linalg/Matrix.html), and we provide one
+implementation: [`DenseMatrix`](api/java/org/apache/spark/mllib/linalg/DenseMatrix.html).
+We recommend using the factory methods implemented
+in [`Matrices`](api/java/org/apache/spark/mllib/linalg/Matrices.html) to create local
+matrices.
+
+{% highlight java %}
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.Matrices;
+
+// Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
+Matrix dm = Matrices.dense(3, 2, new double[] {1.0, 3.0, 5.0, 2.0, 4.0, 6.0});
+{% endhighlight %}
+</div>
+
+</div>
+
+## Distributed matrix
+
+A distributed matrix has long-typed row and column indices and double-typed values, stored
+distributively in one or more RDDs.  It is very important to choose the right format to store large
+and distributed matrices.  Converting a distributed matrix to a different format may require a
+global shuffle, which is quite expensive.  Three types of distributed matrices have been implemented
+so far.
+
+The basic type is called `RowMatrix`. A `RowMatrix` is a row-oriented distributed
+matrix without meaningful row indices, e.g., a collection of feature vectors.
+It is backed by an RDD of its rows, where each row is a local vector.
+We assume that the number of columns is not huge for a `RowMatrix` so that a single
+local vector can be reasonably communicated to the driver and can also be stored /
+operated on using a single node. 
+An `IndexedRowMatrix` is similar to a `RowMatrix` but with row indices,
+which can be used for identifying rows and executing joins.
+A `CoordinateMatrix` is a distributed matrix stored in [coordinate list (COO)](https://en.wikipedia.org/wiki/Sparse_matrix#Coordinate_list_.28COO.29) format,
+backed by an RDD of its entries.
+
+***Note***
+
+The underlying RDDs of a distributed matrix must be deterministic, because we cache the matrix size.
+In general the use of non-deterministic RDDs can lead to errors.
+
+### RowMatrix
+
+A `RowMatrix` is a row-oriented distributed matrix without meaningful row indices, backed by an RDD
+of its rows, where each row is a local vector.
+Since each row is represented by a local vector, the number of columns is
+limited by the integer range but it should be much smaller in practice.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+A [`RowMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) can be
+created from an `RDD[Vector]` instance.  Then we can compute its column summary statistics.
+
+{% highlight scala %}
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.linalg.distributed.RowMatrix
+
+val rows: RDD[Vector] = ... // an RDD of local vectors
+// Create a RowMatrix from an RDD[Vector].
+val mat: RowMatrix = new RowMatrix(rows)
+
+// Get its size.
+val m = mat.numRows()
+val n = mat.numCols()
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+A [`RowMatrix`](api/java/org/apache/spark/mllib/linalg/distributed/RowMatrix.html) can be
+created from a `JavaRDD<Vector>` instance.  Then we can compute its column summary statistics.
+
+{% highlight java %}
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.distributed.RowMatrix;
+
+JavaRDD<Vector> rows = ... // a JavaRDD of local vectors
+// Create a RowMatrix from an JavaRDD<Vector>.
+RowMatrix mat = new RowMatrix(rows.rdd());
+
+// Get its size.
+long m = mat.numRows();
+long n = mat.numCols();
+{% endhighlight %}
+</div>
+</div>
+
+### IndexedRowMatrix
+
+An `IndexedRowMatrix` is similar to a `RowMatrix` but with meaningful row indices.  It is backed by
+an RDD of indexed rows, so that each row is represented by its index (long-typed) and a local vector.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+An
+[`IndexedRowMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix)
+can be created from an `RDD[IndexedRow]` instance, where
+[`IndexedRow`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.IndexedRow) is a
+wrapper over `(Long, Vector)`.  An `IndexedRowMatrix` can be converted to a `RowMatrix` by dropping
+its row indices.
+
+{% highlight scala %}
+import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
+
+val rows: RDD[IndexedRow] = ... // an RDD of indexed rows
+// Create an IndexedRowMatrix from an RDD[IndexedRow].
+val mat: IndexedRowMatrix = new IndexedRowMatrix(rows)
+
+// Get its size.
+val m = mat.numRows()
+val n = mat.numCols()
+
+// Drop its row indices.
+val rowMat: RowMatrix = mat.toRowMatrix()
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+An
+[`IndexedRowMatrix`](api/java/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.html)
+can be created from an `JavaRDD<IndexedRow>` instance, where
+[`IndexedRow`](api/java/org/apache/spark/mllib/linalg/distributed/IndexedRow.html) is a
+wrapper over `(long, Vector)`.  An `IndexedRowMatrix` can be converted to a `RowMatrix` by dropping
+its row indices.
+
+{% highlight java %}
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.distributed.IndexedRow;
+import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
+import org.apache.spark.mllib.linalg.distributed.RowMatrix;
+
+JavaRDD<IndexedRow> rows = ... // a JavaRDD of indexed rows
+// Create an IndexedRowMatrix from a JavaRDD<IndexedRow>.
+IndexedRowMatrix mat = new IndexedRowMatrix(rows.rdd());
+
+// Get its size.
+long m = mat.numRows();
+long n = mat.numCols();
+
+// Drop its row indices.
+RowMatrix rowMat = mat.toRowMatrix();
+{% endhighlight %}
+</div></div>
+
+### CoordinateMatrix
+
+A `CoordinateMatrix` is a distributed matrix backed by an RDD of its entries.  Each entry is a tuple
+of `(i: Long, j: Long, value: Double)`, where `i` is the row index, `j` is the column index, and
+`value` is the entry value.  A `CoordinateMatrix` should be used only when both
+dimensions of the matrix are huge and the matrix is very sparse.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+A
+[`CoordinateMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.CoordinateMatrix)
+can be created from an `RDD[MatrixEntry]` instance, where
+[`MatrixEntry`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.MatrixEntry) is a
+wrapper over `(Long, Long, Double)`.  A `CoordinateMatrix` can be converted to an `IndexedRowMatrix`
+with sparse rows by calling `toIndexedRowMatrix`.  Other computations for 
+`CoordinateMatrix` are not currently supported.
+
+{% highlight scala %}
+import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}
+
+val entries: RDD[MatrixEntry] = ... // an RDD of matrix entries
+// Create a CoordinateMatrix from an RDD[MatrixEntry].
+val mat: CoordinateMatrix = new CoordinateMatrix(entries)
+
+// Get its size.
+val m = mat.numRows()
+val n = mat.numCols()
+
+// Convert it to an IndexRowMatrix whose rows are sparse vectors.
+val indexedRowMatrix = mat.toIndexedRowMatrix()
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+A
+[`CoordinateMatrix`](api/java/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.html)
+can be created from a `JavaRDD<MatrixEntry>` instance, where
+[`MatrixEntry`](api/java/org/apache/spark/mllib/linalg/distributed/MatrixEntry.html) is a
+wrapper over `(long, long, double)`.  A `CoordinateMatrix` can be converted to an `IndexedRowMatrix`
+with sparse rows by calling `toIndexedRowMatrix`. Other computations for 
+`CoordinateMatrix` are not currently supported.
+
+{% highlight java %}
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
+import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
+import org.apache.spark.mllib.linalg.distributed.MatrixEntry;
+
+JavaRDD<MatrixEntry> entries = ... // a JavaRDD of matrix entries
+// Create a CoordinateMatrix from a JavaRDD<MatrixEntry>.
+CoordinateMatrix mat = new CoordinateMatrix(entries.rdd());
+
+// Get its size.
+long m = mat.numRows();
+long n = mat.numCols();
+
+// Convert it to an IndexRowMatrix whose rows are sparse vectors.
+IndexedRowMatrix indexedRowMatrix = mat.toIndexedRowMatrix();
+{% endhighlight %}
+</div>
+</div>
diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md
index 3002a66a4fdb3..12a6afbeea829 100644
--- a/docs/mllib-decision-tree.md
+++ b/docs/mllib-decision-tree.md
@@ -7,20 +7,26 @@ displayTitle: <a href="mllib-guide.html">MLlib</a> - Decision Tree
 * Table of contents
 {:toc}
 
-Decision trees and their ensembles are popular methods for the machine learning tasks of
+[Decision trees](http://en.wikipedia.org/wiki/Decision_tree_learning)
+and their ensembles are popular methods for the machine learning tasks of
 classification and regression. Decision trees are widely used since they are easy to interpret,
-handle categorical variables, extend to the multiclass classification setting, do not require
+handle categorical features, extend to the multiclass classification setting, do not require
 feature scaling and are able to capture nonlinearities and feature interactions. Tree ensemble
-algorithms such as decision forest and boosting are among the top performers for classification and
+algorithms such as random forests and boosting are among the top performers for classification and
 regression tasks.
 
+MLlib supports decision trees for binary and multiclass classification and for regression,
+using both continuous and categorical features. The implementation partitions data by rows,
+allowing distributed training with millions of instances.
+
 ## Basic algorithm
 
 The decision tree is a greedy algorithm that performs a recursive binary partitioning of the feature
-space by choosing a single element from the *best split set* where each element of the set maximizes
-the information gain at a tree node. In other words, the split chosen at each tree node is chosen
-from the set `$\underset{s}{\operatorname{argmax}} IG(D,s)$` where `$IG(D,s)$` is the information
-gain when a split `$s$` is applied to a dataset `$D$`.
+space.  The tree predicts the same label for each bottommost (leaf) partition.
+Each partition is chosen greedily by selecting the *best split* from a set of possible splits,
+in order to maximize the information gain at a tree node. In other words, the split chosen at each
+tree node is chosen from the set `$\underset{s}{\operatorname{argmax}} IG(D,s)$` where `$IG(D,s)$`
+is the information gain when a split `$s$` is applied to a dataset `$D$`.
 
 ### Node impurity and information gain
 
@@ -52,9 +58,10 @@ impurity measure for regression (variance).
   </tbody>
 </table>
 
-The *information gain* is the difference in the parent node impurity and the weighted sum of the two
-child node impurities. Assuming that a split $s$ partitions the dataset `$D$` of size `$N$` into two
-datasets `$D_{left}$` and `$D_{right}$` of sizes `$N_{left}$` and `$N_{right}$`, respectively:
+The *information gain* is the difference between the parent node impurity and the weighted sum of
+the two child node impurities. Assuming that a split $s$ partitions the dataset `$D$` of size `$N$`
+into two datasets `$D_{left}$` and `$D_{right}$` of sizes `$N_{left}$` and `$N_{right}$`,
+respectively, the information gain is:
 
 `$IG(D,s) = Impurity(D) - \frac{N_{left}}{N} Impurity(D_{left}) - \frac{N_{right}}{N} Impurity(D_{right})$`
 
@@ -62,122 +69,331 @@ datasets `$D_{left}$` and `$D_{right}$` of sizes `$N_{left}$` and `$N_{right}$`,
 
 **Continuous features**
 
-For small datasets in single machine implementations, the split candidates for each continuous
+For small datasets in single-machine implementations, the split candidates for each continuous
 feature are typically the unique values for the feature. Some implementations sort the feature
 values and then use the ordered unique values as split candidates for faster tree calculations.
 
-Finding ordered unique feature values is computationally intensive for large distributed
-datasets. One can get an approximate set of split candidates by performing a quantile calculation
-over a sampled fraction of the data. The ordered splits create "bins" and the maximum number of such
-bins can be specified using the `maxBins` parameters.
+Sorting feature values is expensive for large distributed datasets.
+This implementation computes an approximate set of split candidates by performing a quantile
+calculation over a sampled fraction of the data.
+The ordered splits create "bins" and the maximum number of such
+bins can be specified using the `maxBins` parameter.
 
 Note that the number of bins cannot be greater than the number of instances `$N$` (a rare scenario
-since the default `maxBins` value is 100). The tree algorithm automatically reduces the number of
+since the default `maxBins` value is 32). The tree algorithm automatically reduces the number of
 bins if the condition is not satisfied.
 
 **Categorical features**
 
-For `$M$` categorical features, one could come up with `$2^M-1$` split candidates. However, for
-binary classification, the number of split candidates can be reduced to `$M-1$` by ordering the
-categorical feature values by the proportion of labels falling in one of the two classes (see
-Section 9.2.4 in
+For a categorical feature with `$M$` possible values (categories), one could come up with
+`$2^{M-1}-1$` split candidates. For binary (0/1) classification and regression,
+we can reduce the number of split candidates to `$M-1$` by ordering the
+categorical feature values by the average label. (See Section 9.2.4 in
 [Elements of Statistical Machine Learning](http://statweb.stanford.edu/~tibs/ElemStatLearn/) for
-details). For example, for a binary classification problem with one categorical feature with three
-categories A, B and C with corresponding proportion of label 1 as 0.2, 0.6 and 0.4, the categorical
-features are ordered as A followed by C followed B or A, B, C. The two split candidates are A \| C, B
-and A , B \| C where \| denotes the split.
+details.) For example, for a binary classification problem with one categorical feature with three
+categories A, B and C whose corresponding proportions of label 1 are 0.2, 0.6 and 0.4, the categorical
+features are ordered as A, C, B. The two split candidates are A \| C, B
+and A , C \| B where \| denotes the split.
+
+In multiclass classification, all `$2^{M-1}-1$` possible splits are used whenever possible.
+When `$2^{M-1}-1$` is greater than the `maxBins` parameter, we use a (heuristic) method
+similar to the method used for binary classification and regression.
+The `$M$` categorical feature values are ordered by impurity,
+and the resulting `$M-1$` split candidates are considered.
 
 ### Stopping rule
 
 The recursive tree construction is stopped at a node when one of the two conditions is met:
 
-1. The node depth is equal to the `maxDepth` training parameter
+1. The node depth is equal to the `maxDepth` training parameter.
 2. No split candidate leads to an information gain at the node.
 
+## Implementation details
+
 ### Max memory requirements
 
-For faster processing, the decision tree algorithm performs simultaneous histogram computations for all nodes at each level of the tree. This could lead to high memory requirements at deeper levels of the tree leading to memory overflow errors. To alleviate this problem, a 'maxMemoryInMB' training parameter is provided which specifies the maximum amount of memory at the workers (twice as much at the master) to be allocated to the histogram computation. The default value is conservatively chosen to be 128 MB to allow the decision algorithm to work in most scenarios. Once the memory requirements for a level-wise computation crosses the `maxMemoryInMB` threshold, the node training tasks at each subsequent level is split into smaller tasks.
+For faster processing, the decision tree algorithm performs simultaneous histogram computations for
+all nodes at each level of the tree. This could lead to high memory requirements at deeper levels
+of the tree, potentially leading to memory overflow errors. To alleviate this problem, a `maxMemoryInMB`
+training parameter specifies the maximum amount of memory at the workers (twice as much at the
+master) to be allocated to the histogram computation. The default value is conservatively chosen to
+be 256 MB to allow the decision algorithm to work in most scenarios. Once the memory requirements
+for a level-wise computation cross the `maxMemoryInMB` threshold, the node training tasks at each
+subsequent level are split into smaller tasks.
+
+Note that, if you have a large amount of memory, increasing `maxMemoryInMB` can lead to faster
+training by requiring fewer passes over the data.
+
+### Binning feature values
+
+Increasing `maxBins` allows the algorithm to consider more split candidates and make fine-grained
+split decisions.  However, it also increases computation and communication.
+
+Note that the `maxBins` parameter must be at least the maximum number of categories `$M$` for
+any categorical feature.
+
+### Scaling
 
-### Practical limitations
+Computation scales approximately linearly in the number of training instances,
+in the number of features, and in the `maxBins` parameter.
+Communication scales approximately linearly in the number of features and in `maxBins`.
 
-1. The implemented algorithm reads both sparse and dense data. However, it is not optimized for sparse input.
-2. Python is not supported in this release.
+The implemented algorithm reads both sparse and dense data. However, it is not optimized for sparse input.
 
 ## Examples
 
 ### Classification
 
-The example below demonstrates how to load a CSV file, parse it as an RDD of `LabeledPoint` and then
-perform classification using a decision tree using Gini impurity as an impurity measure and a
+The example below demonstrates how to load a
+[LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/),
+parse it as an RDD of `LabeledPoint` and then
+perform classification using a decision tree with Gini impurity as an impurity measure and a
 maximum tree depth of 5. The training error is calculated to measure the algorithm accuracy.
 
 <div class="codetabs">
+
 <div data-lang="scala">
 {% highlight scala %}
-import org.apache.spark.SparkContext
 import org.apache.spark.mllib.tree.DecisionTree
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.tree.configuration.Algo._
-import org.apache.spark.mllib.tree.impurity.Gini
-
-// Load and parse the data file
-val data = sc.textFile("mllib/data/sample_tree_data.csv")
-val parsedData = data.map { line =>
-  val parts = line.split(',').map(_.toDouble)
-  LabeledPoint(parts(0), Vectors.dense(parts.tail))
-}
+import org.apache.spark.mllib.util.MLUtils
 
-// Run training algorithm to build the model
+// Load and parse the data file.
+// Cache the data since we will use it again to compute training error.
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").cache()
+
+// Train a DecisionTree model.
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+val numClasses = 2
+val categoricalFeaturesInfo = Map[Int, Int]()
+val impurity = "gini"
 val maxDepth = 5
-val model = DecisionTree.train(parsedData, Classification, Gini, maxDepth)
+val maxBins = 32
+
+val model = DecisionTree.trainClassifier(data, numClasses, categoricalFeaturesInfo, impurity,
+  maxDepth, maxBins)
 
-// Evaluate model on training examples and compute training error
-val labelAndPreds = parsedData.map { point =>
+// Evaluate model on training instances and compute training error
+val labelAndPreds = data.map { point =>
   val prediction = model.predict(point.features)
   (point.label, prediction)
 }
-val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / parsedData.count
+val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / data.count
 println("Training Error = " + trainErr)
+println("Learned classification tree model:\n" + model)
+{% endhighlight %}
+</div>
+
+<div data-lang="java">
+{% highlight java %}
+import java.util.HashMap;
+import scala.Tuple2;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.DecisionTree;
+import org.apache.spark.mllib.tree.model.DecisionTreeModel;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.SparkConf;
+
+SparkConf sparkConf = new SparkConf().setAppName("JavaDecisionTree");
+JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+// Load and parse the data file.
+// Cache the data since we will use it again to compute training error.
+String datapath = "data/mllib/sample_libsvm_data.txt";
+JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD().cache();
+
+// Set parameters.
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+Integer numClasses = 2;
+HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+String impurity = "gini";
+Integer maxDepth = 5;
+Integer maxBins = 32;
+
+// Train a DecisionTree model for classification.
+final DecisionTreeModel model = DecisionTree.trainClassifier(data, numClasses,
+  categoricalFeaturesInfo, impurity, maxDepth, maxBins);
+
+// Evaluate model on training instances and compute training error
+JavaPairRDD<Double, Double> predictionAndLabel =
+  data.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+    @Override public Tuple2<Double, Double> call(LabeledPoint p) {
+      return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+    }
+  });
+Double trainErr =
+  1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
+    @Override public Boolean call(Tuple2<Double, Double> pl) {
+      return !pl._1().equals(pl._2());
+    }
+  }).count() / data.count();
+System.out.println("Training error: " + trainErr);
+System.out.println("Learned classification tree model:\n" + model);
+{% endhighlight %}
+</div>
+
+<div data-lang="python">
+{% highlight python %}
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.tree import DecisionTree
+from pyspark.mllib.util import MLUtils
+
+# Load and parse the data file into an RDD of LabeledPoint.
+# Cache the data since we will use it again to compute training error.
+data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt').cache()
+
+# Train a DecisionTree model.
+#  Empty categoricalFeaturesInfo indicates all features are continuous.
+model = DecisionTree.trainClassifier(data, numClasses=2, categoricalFeaturesInfo={},
+                                     impurity='gini', maxDepth=5, maxBins=32)
+
+# Evaluate model on training instances and compute training error
+predictions = model.predict(data.map(lambda x: x.features))
+labelsAndPredictions = data.map(lambda lp: lp.label).zip(predictions)
+trainErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(data.count())
+print('Training Error = ' + str(trainErr))
+print('Learned classification tree model:')
+print(model)
 {% endhighlight %}
+
+Note: When making predictions for a dataset, it is more efficient to do batch prediction rather
+than separately calling `predict` on each data point.  This is because the Python code makes calls
+to an underlying `DecisionTree` model in Scala.
 </div>
+
 </div>
 
 ### Regression
 
-The example below demonstrates how to load a CSV file, parse it as an RDD of `LabeledPoint` and then
-perform regression using a decision tree using variance as an impurity measure and a maximum tree
+The example below demonstrates how to load a
+[LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/),
+parse it as an RDD of `LabeledPoint` and then
+perform regression using a decision tree with variance as an impurity measure and a maximum tree
 depth of 5. The Mean Squared Error (MSE) is computed at the end to evaluate
 [goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit).
 
 <div class="codetabs">
+
 <div data-lang="scala">
 {% highlight scala %}
-import org.apache.spark.SparkContext
 import org.apache.spark.mllib.tree.DecisionTree
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.tree.configuration.Algo._
-import org.apache.spark.mllib.tree.impurity.Variance
-
-// Load and parse the data file
-val data = sc.textFile("mllib/data/sample_tree_data.csv")
-val parsedData = data.map { line =>
-  val parts = line.split(',').map(_.toDouble)
-  LabeledPoint(parts(0), Vectors.dense(parts.tail))
-}
+import org.apache.spark.mllib.util.MLUtils
 
-// Run training algorithm to build the model
+// Load and parse the data file.
+// Cache the data since we will use it again to compute training error.
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").cache()
+
+// Train a DecisionTree model.
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+val categoricalFeaturesInfo = Map[Int, Int]()
+val impurity = "variance"
 val maxDepth = 5
-val model = DecisionTree.train(parsedData, Regression, Variance, maxDepth)
+val maxBins = 32
+
+val model = DecisionTree.trainRegressor(data, categoricalFeaturesInfo, impurity,
+  maxDepth, maxBins)
 
-// Evaluate model on training examples and compute training error
-val valuesAndPreds = parsedData.map { point =>
+// Evaluate model on training instances and compute training error
+val labelsAndPredictions = data.map { point =>
   val prediction = model.predict(point.features)
   (point.label, prediction)
 }
-val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2)}.mean()
-println("training Mean Squared Error = " + MSE)
+val trainMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean()
+println("Training Mean Squared Error = " + trainMSE)
+println("Learned regression tree model:\n" + model)
 {% endhighlight %}
 </div>
+
+<div data-lang="java">
+{% highlight java %}
+import java.util.HashMap;
+import scala.Tuple2;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.DecisionTree;
+import org.apache.spark.mllib.tree.model.DecisionTreeModel;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.SparkConf;
+
+// Load and parse the data file.
+// Cache the data since we will use it again to compute training error.
+String datapath = "data/mllib/sample_libsvm_data.txt";
+JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD().cache();
+
+SparkConf sparkConf = new SparkConf().setAppName("JavaDecisionTree");
+JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+// Set parameters.
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+String impurity = "variance";
+Integer maxDepth = 5;
+Integer maxBins = 32;
+
+// Train a DecisionTree model.
+final DecisionTreeModel model = DecisionTree.trainRegressor(data,
+  categoricalFeaturesInfo, impurity, maxDepth, maxBins);
+
+// Evaluate model on training instances and compute training error
+JavaPairRDD<Double, Double> predictionAndLabel =
+  data.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+    @Override public Tuple2<Double, Double> call(LabeledPoint p) {
+      return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+    }
+  });
+Double trainMSE =
+  predictionAndLabel.map(new Function<Tuple2<Double, Double>, Double>() {
+    @Override public Double call(Tuple2<Double, Double> pl) {
+      Double diff = pl._1() - pl._2();
+      return diff * diff;
+    }
+  }).reduce(new Function2<Double, Double, Double>() {
+    @Override public Double call(Double a, Double b) {
+      return a + b;
+    }
+  }) / data.count();
+System.out.println("Training Mean Squared Error: " + trainMSE);
+System.out.println("Learned regression tree model:\n" + model);
+{% endhighlight %}
+</div>
+
+<div data-lang="python">
+{% highlight python %}
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.tree import DecisionTree
+from pyspark.mllib.util import MLUtils
+
+# Load and parse the data file into an RDD of LabeledPoint.
+# Cache the data since we will use it again to compute training error.
+data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt').cache()
+
+# Train a DecisionTree model.
+#  Empty categoricalFeaturesInfo indicates all features are continuous.
+model = DecisionTree.trainRegressor(data, categoricalFeaturesInfo={},
+                                    impurity='variance', maxDepth=5, maxBins=32)
+
+# Evaluate model on training instances and compute training error
+predictions = model.predict(data.map(lambda x: x.features))
+labelsAndPredictions = data.map(lambda lp: lp.label).zip(predictions)
+trainMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() / float(data.count())
+print('Training Mean Squared Error = ' + str(trainMSE))
+print('Learned regression tree model:')
+print(model)
+{% endhighlight %}
+
+Note: When making predictions for a dataset, it is more efficient to do batch prediction rather
+than separately calling `predict` on each data point.  This is because the Python code makes calls
+to an underlying `DecisionTree` model in Scala.
+</div>
+
 </div>
diff --git a/docs/mllib-dimensionality-reduction.md b/docs/mllib-dimensionality-reduction.md
index e3608075fbb13..870fed6cc5024 100644
--- a/docs/mllib-dimensionality-reduction.md
+++ b/docs/mllib-dimensionality-reduction.md
@@ -9,9 +9,9 @@ displayTitle: <a href="mllib-guide.html">MLlib</a> - Dimensionality Reduction
 
 [Dimensionality reduction](http://en.wikipedia.org/wiki/Dimensionality_reduction) is the process 
 of reducing the number of variables under consideration.
-It is used to extract latent features from raw and noisy features,
+It can be used to extract latent features from raw and noisy features
 or compress data while maintaining the structure.
-In this release, we provide preliminary support for dimensionality reduction on tall-and-skinny matrices.
+MLlib provides support for dimensionality reduction on the <a href="mllib-data-types.html#rowmatrix">RowMatrix</a> class.
 
 ## Singular value decomposition (SVD)
 
@@ -30,17 +30,35 @@ where
 * $V$ is an orthonormal matrix, whose columns are called right singular vectors.
  
 For large matrices, usually we don't need the complete factorization but only the top singular
-values and its associated singular vectors.  This can save storage, and more importantly, de-noise
+values and its associated singular vectors.  This can save storage, de-noise
 and recover the low-rank structure of the matrix.
 
-If we keep the top $k$ singular values, then the dimensions of the return will be:
+If we keep the top $k$ singular values, then the dimensions of the resulting low-rank matrix will be:
 
 * `$U$`: `$m \times k$`,
 * `$\Sigma$`: `$k \times k$`,
 * `$V$`: `$n \times k$`.
  
-In this release, we provide SVD computation to row-oriented matrices that have only a few columns,
-say, less than $1000$, but many rows, which we call *tall-and-skinny*.
+### Performance
+We assume $n$ is smaller than $m$. The singular values and the right singular vectors are derived
+from the eigenvalues and the eigenvectors of the Gramian matrix $A^T A$. The matrix
+storing the left singular vectors $U$, is computed via matrix multiplication as
+$U = A (V S^{-1})$, if requested by the user via the computeU parameter. 
+The actual method to use is determined automatically based on the computational cost:
+
+* If $n$ is small ($n < 100$) or $k$ is large compared with $n$ ($k > n / 2$), we compute the Gramian matrix
+first and then compute its top eigenvalues and eigenvectors locally on the driver.
+This requires a single pass with $O(n^2)$ storage on each executor and on the driver, and
+$O(n^2 k)$ time on the driver.
+* Otherwise, we compute $(A^T A) v$ in a distributive way and send it to
+<a href="http://www.caam.rice.edu/software/ARPACK/">ARPACK</a> to
+compute $(A^T A)$'s top eigenvalues and eigenvectors on the driver node. This requires $O(k)$
+passes, $O(n)$ storage on each executor, and $O(n k)$ storage on the driver.
+
+### SVD Example
+ 
+MLlib provides SVD functionality to row-oriented matrices, provided in the
+<a href="mllib-data-types.html#rowmatrix">RowMatrix</a> class. 
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
@@ -57,9 +75,59 @@ val U: RowMatrix = svd.U // The U factor is a RowMatrix.
 val s: Vector = svd.s // The singular values are stored in a local dense vector.
 val V: Matrix = svd.V // The V factor is a local dense matrix.
 {% endhighlight %}
+
+The same code applies to `IndexedRowMatrix` if `U` is defined as an
+`IndexedRowMatrix`.
+</div>
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import java.util.LinkedList;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.mllib.linalg.distributed.RowMatrix;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.SingularValueDecomposition;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.rdd.RDD;
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+
+public class SVD {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("SVD Example");
+    SparkContext sc = new SparkContext(conf);
+     
+    double[][] array = ...
+    LinkedList<Vector> rowsList = new LinkedList<Vector>();
+    for (int i = 0; i < array.length; i++) {
+      Vector currentRow = Vectors.dense(array[i]);
+      rowsList.add(currentRow);
+    }
+    JavaRDD<Vector> rows = JavaSparkContext.fromSparkContext(sc).parallelize(rowsList);
+
+    // Create a RowMatrix from JavaRDD<Vector>.
+    RowMatrix mat = new RowMatrix(rows.rdd());
+
+    // Compute the top 4 singular values and corresponding singular vectors.
+    SingularValueDecomposition<RowMatrix, Matrix> svd = mat.computeSVD(4, true, 1.0E-9d);
+    RowMatrix U = svd.U();
+    Vector s = svd.s();
+    Matrix V = svd.V();
+  }
+}
+{% endhighlight %}
+
+The same code applies to `IndexedRowMatrix` if `U` is defined as an
+`IndexedRowMatrix`.
+
+In order to run the above application, follow the instructions
+provided in the [Self-Contained
+Applications](quick-start.html#self-contained-applications) section of the Spark
+quick-start guide. Be sure to also include *spark-mllib* to your build file as
+a dependency.
+
 </div>
-Same code applies to `IndexedRowMatrix`.
-The only difference that the `U` matrix becomes an `IndexedRowMatrix`.
 </div>
 
 ## Principal component analysis (PCA)
@@ -69,14 +137,13 @@ statistical method to find a rotation such that the first coordinate has the lar
 possible, and each succeeding coordinate in turn has the largest variance possible. The columns of
 the rotation matrix are called principal components. PCA is used widely in dimensionality reduction.
 
-In this release, we implement PCA for tall-and-skinny matrices stored in row-oriented format.
+MLlib supports PCA for tall-and-skinny matrices stored in row-oriented format.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-The following code demonstrates how to compute principal components on a tall-and-skinny `RowMatrix`
+The following code demonstrates how to compute principal components on a `RowMatrix`
 and use them to project the vectors into a low-dimensional space.
-The number of columns should be small, e.g, less than 1000.
 
 {% highlight scala %}
 import org.apache.spark.mllib.linalg.Matrix
@@ -91,4 +158,53 @@ val pc: Matrix = mat.computePrincipalComponents(10) // Principal components are
 val projected: RowMatrix = mat.multiply(pc)
 {% endhighlight %}
 </div>
+
+<div data-lang="java" markdown="1">
+
+The following code demonstrates how to compute principal components on a `RowMatrix`
+and use them to project the vectors into a low-dimensional space.
+The number of columns should be small, e.g, less than 1000.
+
+{% highlight java %}
+import java.util.LinkedList;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.mllib.linalg.distributed.RowMatrix;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.rdd.RDD;
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+
+public class PCA {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("PCA Example");
+    SparkContext sc = new SparkContext(conf);
+     
+    double[][] array = ...
+    LinkedList<Vector> rowsList = new LinkedList<Vector>();
+    for (int i = 0; i < array.length; i++) {
+      Vector currentRow = Vectors.dense(array[i]);
+      rowsList.add(currentRow);
+    }
+    JavaRDD<Vector> rows = JavaSparkContext.fromSparkContext(sc).parallelize(rowsList);
+
+    // Create a RowMatrix from JavaRDD<Vector>.
+    RowMatrix mat = new RowMatrix(rows.rdd());
+
+    // Compute the top 3 principal components.
+    Matrix pc = mat.computePrincipalComponents(3);
+    RowMatrix projected = mat.multiply(pc);
+  }
+}
+{% endhighlight %}
+
 </div>
+</div>
+
+In order to run the above application, follow the instructions
+provided in the [Self-Contained Applications](quick-start.html#self-contained-applications)
+section of the Spark
+quick-start guide. Be sure to also include *spark-mllib* to your build file as
+a dependency.
diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md
new file mode 100644
index 0000000000000..197bc77d506c6
--- /dev/null
+++ b/docs/mllib-feature-extraction.md
@@ -0,0 +1,372 @@
+---
+layout: global
+title: Feature Extraction and Transformation - MLlib
+displayTitle: <a href="mllib-guide.html">MLlib</a> - Feature Extraction and Transformation
+---
+
+* Table of contents
+{:toc}
+
+
+## TF-IDF
+
+[Term frequency-inverse document frequency (TF-IDF)](http://en.wikipedia.org/wiki/Tf%E2%80%93idf) is a feature 
+vectorization method widely used in text mining to reflect the importance of a term to a document in the corpus.
+Denote a term by `$t$`, a document by `$d$`, and the corpus by `$D$`.
+Term frequency `$TF(t, d)$` is the number of times that term `$t$` appears in document `$d$`,
+while document frequency `$DF(t, D)$` is the number of documents that contains term `$t$`.
+If we only use term frequency to measure the importance, it is very easy to over-emphasize terms that
+appear very often but carry little information about the document, e.g., "a", "the", and "of".
+If a term appears very often across the corpus, it means it doesn't carry special information about
+a particular document.
+Inverse document frequency is a numerical measure of how much information a term provides:
+`\[
+IDF(t, D) = \log \frac{|D| + 1}{DF(t, D) + 1},
+\]`
+where `$|D|$` is the total number of documents in the corpus.
+Since logarithm is used, if a term appears in all documents, its IDF value becomes 0.
+Note that a smoothing term is applied to avoid dividing by zero for terms outside the corpus.
+The TF-IDF measure is simply the product of TF and IDF:
+`\[
+TFIDF(t, d, D) = TF(t, d) \cdot IDF(t, D).
+\]`
+There are several variants on the definition of term frequency and document frequency.
+In MLlib, we separate TF and IDF to make them flexible.
+
+Our implementation of term frequency utilizes the
+[hashing trick](http://en.wikipedia.org/wiki/Feature_hashing).
+A raw feature is mapped into an index (term) by applying a hash function.
+Then term frequencies are calculated based on the mapped indices.
+This approach avoids the need to compute a global term-to-index map,
+which can be expensive for a large corpus, but it suffers from potential hash collisions,
+where different raw features may become the same term after hashing.
+To reduce the chance of collision, we can increase the target feature dimension, i.e., 
+the number of buckets of the hash table.
+The default feature dimension is `$2^{20} = 1,048,576$`.
+
+**Note:** MLlib doesn't provide tools for text segmentation.
+We refer users to the [Stanford NLP Group](http://nlp.stanford.edu/) and 
+[scalanlp/chalk](https://github.com/scalanlp/chalk).
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+TF and IDF are implemented in [HashingTF](api/scala/index.html#org.apache.spark.mllib.feature.HashingTF)
+and [IDF](api/scala/index.html#org.apache.spark.mllib.feature.IDF).
+`HashingTF` takes an `RDD[Iterable[_]]` as the input.
+Each record could be an iterable of strings or other types.
+
+{% highlight scala %}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.feature.HashingTF
+import org.apache.spark.mllib.linalg.Vector
+
+val sc: SparkContext = ...
+
+// Load documents (one per line).
+val documents: RDD[Seq[String]] = sc.textFile("...").map(_.split(" ").toSeq)
+
+val hashingTF = new HashingTF()
+val tf: RDD[Vector] = hashingTF.transform(documents)
+{% endhighlight %}
+
+While applying `HashingTF` only needs a single pass to the data, applying `IDF` needs two passes: 
+first to compute the IDF vector and second to scale the term frequencies by IDF.
+
+{% highlight scala %}
+import org.apache.spark.mllib.feature.IDF
+
+// ... continue from the previous example
+tf.cache()
+val idf = new IDF().fit(tf)
+val tfidf: RDD[Vector] = idf.transform(tf)
+{% endhighlight %}
+
+MLlib's IDF implementation provides an option for ignoring terms which occur in less than a
+minimum number of documents.  In such cases, the IDF for these terms is set to 0.  This feature
+can be used by passing the `minDocFreq` value to the IDF constructor.
+
+{% highlight scala %}
+import org.apache.spark.mllib.feature.IDF
+
+// ... continue from the previous example
+tf.cache()
+val idf = new IDF(minDocFreq = 2).fit(tf)
+val tfidf: RDD[Vector] = idf.transform(tf)
+{% endhighlight %}
+</div>
+<div data-lang="python" markdown="1">
+
+TF and IDF are implemented in [HashingTF](api/python/pyspark.mllib.html#pyspark.mllib.feature.HashingTF)
+and [IDF](api/python/pyspark.mllib.html#pyspark.mllib.feature.IDF).
+`HashingTF` takes an RDD of list as the input.
+Each record could be an iterable of strings or other types.
+
+{% highlight python %}
+from pyspark import SparkContext
+from pyspark.mllib.feature import HashingTF
+
+sc = SparkContext()
+
+# Load documents (one per line).
+documents = sc.textFile("...").map(lambda line: line.split(" "))
+
+hashingTF = HashingTF()
+tf = hashingTF.transform(documents)
+{% endhighlight %}
+
+While applying `HashingTF` only needs a single pass to the data, applying `IDF` needs two passes: 
+first to compute the IDF vector and second to scale the term frequencies by IDF.
+
+{% highlight python %}
+from pyspark.mllib.feature import IDF
+
+# ... continue from the previous example
+tf.cache()
+idf = IDF().fit(tf)
+tfidf = idf.transform(tf)
+{% endhighlight %}
+
+MLLib's IDF implementation provides an option for ignoring terms which occur in less than a
+minimum number of documents.  In such cases, the IDF for these terms is set to 0.  This feature
+can be used by passing the `minDocFreq` value to the IDF constructor.
+
+{% highlight python %}
+# ... continue from the previous example
+tf.cache()
+idf = IDF(minDocFreq=2).fit(tf)
+tfidf = idf.transform(tf)
+{% endhighlight %}
+</div>
+</div>
+
+## Word2Vec 
+
+[Word2Vec](https://code.google.com/p/word2vec/) computes distributed vector representation of words.
+The main advantage of the distributed
+representations is that similar words are close in the vector space, which makes generalization to 
+novel patterns easier and model estimation more robust. Distributed vector representation is 
+showed to be useful in many natural language processing applications such as named entity 
+recognition, disambiguation, parsing, tagging and machine translation.
+
+### Model
+
+In our implementation of Word2Vec, we used skip-gram model. The training objective of skip-gram is
+to learn word vector representations that are good at predicting its context in the same sentence. 
+Mathematically, given a sequence of training words `$w_1, w_2, \dots, w_T$`, the objective of the
+skip-gram model is to maximize the average log-likelihood 
+`\[
+\frac{1}{T} \sum_{t = 1}^{T}\sum_{j=-k}^{j=k} \log p(w_{t+j} | w_t)
+\]`
+where $k$ is the size of the training window.  
+
+In the skip-gram model, every word $w$ is associated with two vectors $u_w$ and $v_w$ which are 
+vector representations of $w$ as word and context respectively. The probability of correctly 
+predicting word $w_i$ given word $w_j$ is determined by the softmax model, which is 
+`\[
+p(w_i | w_j ) = \frac{\exp(u_{w_i}^{\top}v_{w_j})}{\sum_{l=1}^{V} \exp(u_l^{\top}v_{w_j})}
+\]`
+where $V$ is the vocabulary size. 
+
+The skip-gram model with softmax is expensive because the cost of computing $\log p(w_i | w_j)$ 
+is proportional to $V$, which can be easily in order of millions. To speed up training of Word2Vec, 
+we used hierarchical softmax, which reduced the complexity of computing of $\log p(w_i | w_j)$ to
+$O(\log(V))$
+
+### Example 
+
+The example below demonstrates how to load a text file, parse it as an RDD of `Seq[String]`,
+construct a `Word2Vec` instance and then fit a `Word2VecModel` with the input data. Finally,
+we display the top 40 synonyms of the specified word. To run the example, first download
+the [text8](http://mattmahoney.net/dc/text8.zip) data and extract it to your preferred directory.
+Here we assume the extracted file is `text8` and in same directory as you run the spark shell.  
+
+<div class="codetabs">
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark._
+import org.apache.spark.rdd._
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.feature.Word2Vec
+
+val input = sc.textFile("text8").map(line => line.split(" ").toSeq)
+
+val word2vec = new Word2Vec()
+
+val model = word2vec.fit(input)
+
+val synonyms = model.findSynonyms("china", 40)
+
+for((synonym, cosineSimilarity) <- synonyms) {
+  println(s"$synonym $cosineSimilarity")
+}
+{% endhighlight %}
+</div>
+<div data-lang="python">
+{% highlight python %}
+from pyspark import SparkContext
+from pyspark.mllib.feature import Word2Vec
+
+sc = SparkContext(appName='Word2Vec')
+inp = sc.textFile("text8_lines").map(lambda row: row.split(" "))
+
+word2vec = Word2Vec()
+model = word2vec.fit(inp)
+
+synonyms = model.findSynonyms('china', 40)
+
+for word, cosine_distance in synonyms:
+    print "{}: {}".format(word, cosine_distance)
+{% endhighlight %}
+</div>
+</div>
+
+## StandardScaler
+
+Standardizes features by scaling to unit variance and/or removing the mean using column summary
+statistics on the samples in the training set. This is a very common pre-processing step.
+
+For example, RBF kernel of Support Vector Machines or the L1 and L2 regularized linear models
+typically work better when all features have unit variance and/or zero mean.
+
+Standardization can improve the convergence rate during the optimization process, and also prevents
+against features with very large variances exerting an overly large influence during model training.
+
+### Model Fitting
+
+[`StandardScaler`](api/scala/index.html#org.apache.spark.mllib.feature.StandardScaler) has the
+following parameters in the constructor:
+
+* `withMean` False by default. Centers the data with mean before scaling. It will build a dense
+output, so this does not work on sparse input and will raise an exception.
+* `withStd` True by default. Scales the data to unit variance.
+
+We provide a [`fit`](api/scala/index.html#org.apache.spark.mllib.feature.StandardScaler) method in
+`StandardScaler` which can take an input of `RDD[Vector]`, learn the summary statistics, and then
+return a model which can transform the input dataset into unit variance and/or zero mean features
+depending how we configure the `StandardScaler`.
+
+This model implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer)
+which can apply the standardization on a `Vector` to produce a transformed `Vector` or on
+an `RDD[Vector]` to produce a transformed `RDD[Vector]`.
+
+Note that if the variance of a feature is zero, it will return default `0.0` value in the `Vector`
+for that feature.
+
+### Example
+
+The example below demonstrates how to load a dataset in libsvm format, and standardize the features
+so that the new features have unit variance and/or zero mean.
+
+<div class="codetabs">
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.feature.StandardScaler
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.MLUtils
+
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+val scaler1 = new StandardScaler().fit(data.map(x => x.features))
+val scaler2 = new StandardScaler(withMean = true, withStd = true).fit(data.map(x => x.features))
+
+// data1 will be unit variance.
+val data1 = data.map(x => (x.label, scaler1.transform(x.features)))
+
+// Without converting the features into dense vectors, transformation with zero mean will raise
+// exception on sparse vector.
+// data2 will be unit variance and zero mean.
+val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray))))
+{% endhighlight %}
+</div>
+
+<div data-lang="python">
+{% highlight python %}
+from pyspark.mllib.util import MLUtils
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.feature import StandardScaler
+
+data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+label = data.map(lambda x: x.label)
+features = data.map(lambda x: x.features)
+
+scaler1 = StandardScaler().fit(features)
+scaler2 = StandardScaler(withMean=True, withStd=True).fit(features)
+
+# data1 will be unit variance.
+data1 = label.zip(scaler1.transform(features))
+
+# Without converting the features into dense vectors, transformation with zero mean will raise
+# exception on sparse vector.
+# data2 will be unit variance and zero mean.
+data2 = label.zip(scaler1.transform(features.map(lambda x: Vectors.dense(x.toArray()))))
+{% endhighlight %}
+</div>
+</div>
+
+## Normalizer
+
+Normalizer scales individual samples to have unit $L^p$ norm. This is a common operation for text
+classification or clustering. For example, the dot product of two $L^2$ normalized TF-IDF vectors
+is the cosine similarity of the vectors.
+
+[`Normalizer`](api/scala/index.html#org.apache.spark.mllib.feature.Normalizer) has the following
+parameter in the constructor:
+
+* `p` Normalization in $L^p$ space, $p = 2$ by default.
+
+`Normalizer` implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer)
+which can apply the normalization on a `Vector` to produce a transformed `Vector` or on
+an `RDD[Vector]` to produce a transformed `RDD[Vector]`.
+
+Note that if the norm of the input is zero, it will return the input vector.
+
+### Example
+
+The example below demonstrates how to load a dataset in libsvm format, and normalizes the features
+with $L^2$ norm, and $L^\infty$ norm.
+
+<div class="codetabs">
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.feature.Normalizer
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.MLUtils
+
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+val normalizer1 = new Normalizer()
+val normalizer2 = new Normalizer(p = Double.PositiveInfinity)
+
+// Each sample in data1 will be normalized using $L^2$ norm.
+val data1 = data.map(x => (x.label, normalizer1.transform(x.features)))
+
+// Each sample in data2 will be normalized using $L^\infty$ norm.
+val data2 = data.map(x => (x.label, normalizer2.transform(x.features)))
+{% endhighlight %}
+</div>
+
+<div data-lang="python">
+{% highlight python %}
+from pyspark.mllib.util import MLUtils
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.feature import Normalizer
+
+data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+labels = data.map(lambda x: x.label)
+features = data.map(lambda x: x.features)
+
+normalizer1 = Normalizer()
+normalizer2 = Normalizer(p=float("inf"))
+
+# Each sample in data1 will be normalized using $L^2$ norm.
+data1 = labels.zip(normalizer1.transform(features))
+
+# Each sample in data2 will be normalized using $L^\infty$ norm.
+data2 = labels.zip(normalizer2.transform(features))
+{% endhighlight %}
+</div>
+</div>
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 95ee6bc96801f..94fc98ce4fabe 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -3,18 +3,20 @@ layout: global
 title: Machine Learning Library (MLlib)
 ---
 
-MLlib is a Spark implementation of some common machine learning algorithms and utilities,
+MLlib is Spark's scalable machine learning library consisting of common learning algorithms and utilities,
 including classification, regression, clustering, collaborative
-filtering, dimensionality reduction, as well as underlying optimization primitives:
+filtering, dimensionality reduction, as well as underlying optimization primitives, as outlined below:
 
-* [Basics](mllib-basics.html)
-  * data types 
+* [Data types](mllib-data-types.html)
+* [Basic statistics](mllib-statistics.html)
   * summary statistics
-* Classification and regression
-  * [linear support vector machine (SVM)](mllib-linear-methods.html#linear-support-vector-machine-svm)
-  * [logistic regression](mllib-linear-methods.html#logistic-regression)
-  * [linear least squares, Lasso, and ridge regression](mllib-linear-methods.html#linear-least-squares-lasso-and-ridge-regression)
-  * [decision tree](mllib-decision-tree.html)
+  * correlations
+  * stratified sampling
+  * hypothesis testing
+  * random data generation  
+* [Classification and regression](mllib-classification-regression.html)
+  * [linear models (SVMs, logistic regression, linear regression)](mllib-linear-methods.html)
+  * [decision trees](mllib-decision-tree.html)
   * [naive Bayes](mllib-naive-bayes.html)
 * [Collaborative filtering](mllib-collaborative-filtering.html)
   * alternating least squares (ALS)
@@ -23,28 +25,34 @@ filtering, dimensionality reduction, as well as underlying optimization primitiv
 * [Dimensionality reduction](mllib-dimensionality-reduction.html)
   * singular value decomposition (SVD)
   * principal component analysis (PCA)
-* [Optimization](mllib-optimization.html)
+* [Feature extraction and transformation](mllib-feature-extraction.html)
+* [Optimization (developer)](mllib-optimization.html)
   * stochastic gradient descent
   * limited-memory BFGS (L-BFGS)
 
-MLlib is a new component under active development.
+MLlib is under active development.
 The APIs marked `Experimental`/`DeveloperApi` may change in future releases, 
-and we will provide migration guide between releases.
+and the migration guide below will explain all changes between releases.
 
 # Dependencies
 
-MLlib uses linear algebra packages [Breeze](http://www.scalanlp.org/), which depends on
-[netlib-java](https://github.com/fommil/netlib-java), and
-[jblas](https://github.com/mikiobraun/jblas). 
+MLlib uses the linear algebra package [Breeze](http://www.scalanlp.org/),
+which depends on [netlib-java](https://github.com/fommil/netlib-java),
+and [jblas](https://github.com/mikiobraun/jblas). 
 `netlib-java` and `jblas` depend on native Fortran routines.
 You need to install the
-[gfortran runtime library](https://github.com/mikiobraun/jblas/wiki/Missing-Libraries) if it is not
-already present on your nodes. MLlib will throw a linking error if it cannot detect these libraries
-automatically.  Due to license issues, we do not include `netlib-java`'s native libraries in MLlib's
-dependency set. If no native library is available at runtime, you will see a warning message.  To
-use native libraries from `netlib-java`, please include artifact
-`com.github.fommil.netlib:all:1.1.2` as a dependency of your project or build your own (see
-[instructions](https://github.com/fommil/netlib-java/blob/master/README.md#machine-optimised-system-libraries)).
+[gfortran runtime library](https://github.com/mikiobraun/jblas/wiki/Missing-Libraries)
+if it is not already present on your nodes.
+MLlib will throw a linking error if it cannot detect these libraries automatically.
+Due to license issues, we do not include `netlib-java`'s native libraries in MLlib's
+dependency set under default settings.
+If no native library is available at runtime, you will see a warning message.
+To use native libraries from `netlib-java`, please build Spark with `-Pnetlib-lgpl` or
+include `com.github.fommil.netlib:all:1.1.2` as a dependency of your project.
+If you want to use optimized BLAS/LAPACK libraries such as
+[OpenBLAS](http://www.openblas.net/), please link its shared libraries to
+`/usr/lib/libblas.so.3` and `/usr/lib/liblapack.so.3`, respectively.
+BLAS/LAPACK libraries on worker nodes should be built without multithreading.
 
 To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.4 or newer.
 
@@ -52,11 +60,37 @@ To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.4
 
 # Migration Guide
 
+## From 1.0 to 1.1
+
+The only API changes in MLlib v1.1 are in
+[`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree),
+which continues to be an experimental API in MLlib 1.1:
+
+1. *(Breaking change)* The meaning of tree depth has been changed by 1 in order to match
+the implementations of trees in
+[scikit-learn](http://scikit-learn.org/stable/modules/classes.html#module-sklearn.tree)
+and in [rpart](http://cran.r-project.org/web/packages/rpart/index.html).
+In MLlib v1.0, a depth-1 tree had 1 leaf node, and a depth-2 tree had 1 root node and 2 leaf nodes.
+In MLlib v1.1, a depth-0 tree has 1 leaf node, and a depth-1 tree has 1 root node and 2 leaf nodes.
+This depth is specified by the `maxDepth` parameter in
+[`Strategy`](api/scala/index.html#org.apache.spark.mllib.tree.configuration.Strategy)
+or via [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree)
+static `trainClassifier` and `trainRegressor` methods.
+
+2. *(Non-breaking change)* We recommend using the newly added `trainClassifier` and `trainRegressor`
+methods to build a [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree),
+rather than using the old parameter class `Strategy`.  These new training methods explicitly
+separate classification and regression, and they replace specialized parameter types with
+simple `String` types.
+
+Examples of the new, recommended `trainClassifier` and `trainRegressor` are given in the
+[Decision Trees Guide](mllib-decision-tree.html#examples).
+
 ## From 0.9 to 1.0
 
 In MLlib v1.0, we support both dense and sparse input in a unified way, which introduces a few
 breaking changes.  If your data is sparse, please store it in a sparse format instead of dense to
-take advantage of sparsity in both storage and computation.
+take advantage of sparsity in both storage and computation. Details are described below.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
@@ -77,7 +111,7 @@ val vector: Vector = Vectors.dense(array) // a dense vector
 
 [`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) provides factory methods to create sparse vectors.
 
-*Note*. Scala imports `scala.collection.immutable.Vector` by default, so you have to import `org.apache.spark.mllib.linalg.Vector` explicitly to use MLlib's `Vector`.
+*Note*: Scala imports `scala.collection.immutable.Vector` by default, so you have to import `org.apache.spark.mllib.linalg.Vector` explicitly to use MLlib's `Vector`.
 
 </div>
 
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 4dfbebbcd04b7..bc914a1899801 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -33,24 +33,24 @@ the task of finding a minimizer of a convex function `$f$` that depends on a var
 Formally, we can write this as the optimization problem `$\min_{\wv \in\R^d} \; f(\wv)$`, where
 the objective function is of the form
 `\begin{equation}
-    f(\wv) := 
-    \frac1n \sum_{i=1}^n L(\wv;\x_i,y_i) +
-    \lambda\, R(\wv_i)
+    f(\wv) := \lambda\, R(\wv) +
+    \frac1n \sum_{i=1}^n L(\wv;\x_i,y_i)
     \label{eq:regPrimal}
     \ .
 \end{equation}`
 Here the vectors `$\x_i\in\R^d$` are the training data examples, for `$1\le i\le n$`, and
 `$y_i\in\R$` are their corresponding labels, which we want to predict. 
 We call the method *linear* if $L(\wv; \x, y)$ can be expressed as a function of $\wv^T x$ and $y$.
-Several MLlib's classification and regression algorithms fall into this category,
+Several of MLlib's classification and regression algorithms fall into this category,
 and are discussed here.
 
 The objective function `$f$` has two parts:
-the loss that measures the error of the model on the training data, 
-and the regularizer that measures the complexity of the model.
-The loss function `$L(\wv;.)$` must be a convex function in `$\wv$`.
-The fixed regularization parameter `$\lambda \ge 0$` (`regParam` in the code) defines the trade-off
-between the two goals of small loss and small model complexity.
+the regularizer that controls the complexity of the model,
+and the loss that measures the error of the model on the training data.
+The loss function `$L(\wv;.)$` is typically a convex function in `$\wv$`.  The
+fixed regularization parameter `$\lambda \ge 0$` (`regParam` in the code)
+defines the trade-off between the two goals of minimizing the loss (i.e.,
+training error) and minimizing model complexity (i.e., to avoid overfitting).
 
 ### Loss functions
 
@@ -80,10 +80,10 @@ methods MLlib supports:
 
 ### Regularizers
 
-The purpose of the [regularizer](http://en.wikipedia.org/wiki/Regularization_(mathematics)) is to
-encourage simple models, by punishing the complexity of the model `$\wv$`, in order to e.g. avoid
-over-fitting.
-We support the following regularizers in MLlib:
+The purpose of the
+[regularizer](http://en.wikipedia.org/wiki/Regularization_(mathematics)) is to
+encourage simple models and avoid overfitting.  We support the following
+regularizers in MLlib:
 
 <table class="table">
   <thead>
@@ -106,27 +106,28 @@ Here `$\mathrm{sign}(\wv)$` is the vector consisting of the signs (`$\pm1$`) of
 of `$\wv$`.
 
 L2-regularized problems are generally easier to solve than L1-regularized due to smoothness.
-However, L1 regularization can help promote sparsity in weights, leading to simpler models, which is
-also used for feature selection.  It is not recommended to train models without any regularization,
+However, L1 regularization can help promote sparsity in weights leading to smaller and more interpretable models, the latter of which can be useful for feature selection.
+It is not recommended to train models without any regularization,
 especially when the number of training examples is small.
 
 ## Binary classification
 
-[Binary classification](http://en.wikipedia.org/wiki/Binary_classification) is to divide items into
-two categories: positive and negative.  MLlib supports two linear methods for binary classification:
-linear support vector machine (SVM) and logistic regression.  The training data set is represented
-by an RDD of [LabeledPoint](mllib-data-types.html) in MLlib.  Note that, in the mathematical
-formulation, a training label $y$ is either $+1$ (positive) or $-1$ (negative), which is convenient
-for the formulation.  *However*, the negative label is represented by $0$ in MLlib instead of $-1$,
-to be consistent with multiclass labeling.
+[Binary classification](http://en.wikipedia.org/wiki/Binary_classification)
+aims to divide items into two categories: positive and negative.  MLlib
+supports two linear methods for binary classification: linear support vector
+machines (SVMs) and logistic regression. For both methods, MLlib supports
+L1 and L2 regularized variants. The training data set is represented by an RDD
+of [LabeledPoint](mllib-data-types.html) in MLlib.  Note that, in the
+mathematical formulation in this guide, a training label $y$ is denoted as
+either $+1$ (positive) or $-1$ (negative), which is convenient for the
+formulation.  *However*, the negative label is represented by $0$ in MLlib
+instead of $-1$, to be consistent with multiclass labeling.
 
-### Linear support vector machine (SVM)
+### Linear support vector machines (SVMs)
 
 The [linear SVM](http://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM)
-has become a standard choice for large-scale classification tasks.
-The name "linear SVM" is actually ambiguous.
-By "linear SVM", we mean specifically the linear method with the loss function in formulation
-`$\eqref{eq:regPrimal}$` given by the hinge loss
+is a standard method for large-scale classification tasks. It is a linear method as described above in equation `$\eqref{eq:regPrimal}$`, with the loss function in the formulation given by the hinge loss:
+
 `\[
 L(\wv;\x,y) := \max \{0, 1-y \wv^T \x \}.
 \]`
@@ -134,39 +135,44 @@ By default, linear SVMs are trained with an L2 regularization.
 We also support alternative L1 regularization. In this case,
 the problem becomes a [linear program](http://en.wikipedia.org/wiki/Linear_programming).
 
-Linear SVM algorithm outputs a SVM model, which makes predictions based on the value of $\wv^T \x$.
-By the default, if $\wv^T \x \geq 0$, the outcome is positive, or negative otherwise.
-However, quite often in practice, the default threshold $0$ is not a good choice.
-The threshold should be determined via model evaluation.
+The linear SVMs algorithm outputs an SVM model. Given a new data point,
+denoted by $\x$, the model makes predictions based on the value of $\wv^T \x$.
+By the default, if $\wv^T \x \geq 0$ then the outcome is positive, and negative
+otherwise.
 
 ### Logistic regression
 
 [Logistic regression](http://en.wikipedia.org/wiki/Logistic_regression) is widely used to predict a
-binary response.  It is a linear method with the loss function in formulation
-`$\eqref{eq:regPrimal}$` given by the logistic loss
+binary response. 
+It is a linear method as described above in equation `$\eqref{eq:regPrimal}$`, with the loss
+function in the formulation given by the logistic loss:
 `\[
 L(\wv;\x,y) :=  \log(1+\exp( -y \wv^T \x)).
 \]`
 
-Logistic regression algorithm outputs a logistic regression model, which makes predictions by
+The logistic regression algorithm outputs a logistic regression model.  Given a
+new data point, denoted by $\x$, the model makes predictions by
 applying the logistic function
 `\[
-\mathrm{logit}(z) = \frac{1}{1 + e^{-z}}
+\mathrm{f}(z) = \frac{1}{1 + e^{-z}}
 \]`
-$\wv^T \x$.
-By default, if $\mathrm{logit}(\wv^T x) > 0.5$, the outcome is positive, or negative otherwise.
-For the same reason mentioned above, quite often in practice, this default threshold is not a good choice.
-The threshold should be determined via model evaluation.
+where $z = \wv^T \x$.
+By default, if $\mathrm{f}(\wv^T x) > 0.5$, the outcome is positive, or
+negative otherwise, though unlike linear SVMs, the raw output of the logistic regression
+model, $\mathrm{f}(z)$, has a probabilistic interpretation (i.e., the probability
+that $\x$ is positive).
 
 ### Evaluation metrics
 
-MLlib supports common evaluation metrics for binary classification (not available in Python).  This
+MLlib supports common evaluation metrics for binary classification (not available in PySpark). 
+This
 includes precision, recall, [F-measure](http://en.wikipedia.org/wiki/F1_score),
 [receiver operating characteristic (ROC)](http://en.wikipedia.org/wiki/Receiver_operating_characteristic),
 precision-recall curve, and
 [area under the curves (AUC)](http://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve).
-Among the metrics, area under ROC is commonly used to compare models and precision/recall/F-measure
-can help determine the threshold to use.
+AUC is commonly used to compare the performance of various models while
+precision/recall/F-measure can help determine the appropriate threshold to use
+for prediction purposes. 
 
 ### Examples
 
@@ -187,7 +193,7 @@ import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.MLUtils
 
 // Load training data in LIBSVM format.
-val data = MLUtils.loadLibSVMFile(sc, "mllib/data/sample_libsvm_data.txt")
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
 
 // Split data into training (60%) and test (40%).
 val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
@@ -233,8 +239,7 @@ svmAlg.optimizer.
 val modelL1 = svmAlg.run(training)
 {% endhighlight %}
 
-Similarly, you can use replace `SVMWithSGD` by
-[`LogisticRegressionWithSGD`](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithSGD).
+[`LogisticRegressionWithSGD`](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithSGD) can be used in a similar fashion as `SVMWithSGD`.
 
 </div>
 
@@ -242,7 +247,87 @@ Similarly, you can use replace `SVMWithSGD` by
 All of MLlib's methods use Java-friendly types, so you can import and call them there the same
 way you do in Scala. The only caveat is that the methods take Scala RDD objects, while the
 Spark Java API uses a separate `JavaRDD` class. You can convert a Java RDD to a Scala one by
-calling `.rdd()` on your `JavaRDD` object.
+calling `.rdd()` on your `JavaRDD` object. A self-contained application example
+that is equivalent to the provided example in Scala is given bellow:
+
+{% highlight java %}
+import java.util.Random;
+
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.mllib.classification.*;
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+
+public class SVMClassifier {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("SVM Classifier Example");
+    SparkContext sc = new SparkContext(conf);
+    String path = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();
+
+    // Split initial RDD into two... [60% training data, 40% testing data].
+    JavaRDD<LabeledPoint> training = data.sample(false, 0.6, 11L);
+    training.cache();
+    JavaRDD<LabeledPoint> test = data.subtract(training);
+    
+    // Run training algorithm to build the model.
+    int numIterations = 100;
+    final SVMModel model = SVMWithSGD.train(training.rdd(), numIterations);
+    
+    // Clear the default threshold.
+    model.clearThreshold();
+
+    // Compute raw scores on the test set.
+    JavaRDD<Tuple2<Object, Object>> scoreAndLabels = test.map(
+      new Function<LabeledPoint, Tuple2<Object, Object>>() {
+        public Tuple2<Object, Object> call(LabeledPoint p) {
+          Double score = model.predict(p.features());
+          return new Tuple2<Object, Object>(score, p.label());
+        }
+      }
+    );
+    
+    // Get evaluation metrics.
+    BinaryClassificationMetrics metrics = 
+      new BinaryClassificationMetrics(JavaRDD.toRDD(scoreAndLabels));
+    double auROC = metrics.areaUnderROC();
+    
+    System.out.println("Area under ROC = " + auROC);
+  }
+}
+{% endhighlight %}
+
+The `SVMWithSGD.train()` method by default performs L2 regularization with the
+regularization parameter set to 1.0. If we want to configure this algorithm, we
+can customize `SVMWithSGD` further by creating a new object directly and
+calling setter methods. All other MLlib algorithms support customization in
+this way as well. For example, the following code produces an L1 regularized
+variant of SVMs with regularization parameter set to 0.1, and runs the training
+algorithm for 200 iterations.
+
+{% highlight java %}
+import org.apache.spark.mllib.optimization.L1Updater;
+
+SVMWithSGD svmAlg = new SVMWithSGD();
+svmAlg.optimizer()
+  .setNumIterations(200)
+  .setRegParam(0.1)
+  .setUpdater(new L1Updater());
+final SVMModel modelL1 = svmAlg.run(training.rdd());
+{% endhighlight %}
+
+In order to run the above application, follow the instructions
+provided in the [Self-Contained
+Applications](quick-start.html#self-contained-applications) section of the Spark
+quick-start guide. Be sure to also include *spark-mllib* to your build file as
+a dependency.
 </div>
 
 <div data-lang="python" markdown="1">
@@ -259,7 +344,7 @@ def parsePoint(line):
     values = [float(x) for x in line.split(' ')]
     return LabeledPoint(values[0], values[1:])
 
-data = sc.textFile("mllib/data/sample_svm_data.txt")
+data = sc.textFile("data/mllib/sample_svm_data.txt")
 parsedData = data.map(parsePoint)
 
 # Build the model
@@ -275,24 +360,22 @@ print("Training Error = " + str(trainErr))
 
 ## Linear least squares, Lasso, and ridge regression
 
-Linear least squares is a family of linear methods with the loss function in formulation
-`$\eqref{eq:regPrimal}$` given by the squared loss
 
+Linear least squares is the most common formulation for regression problems. 
+It is a linear method as described above in equation `$\eqref{eq:regPrimal}$`, with the loss
+function in the formulation given by the squared loss:
 `\[
 L(\wv;\x,y) :=  \frac{1}{2} (\wv^T \x - y)^2.
 \]`
 
-Depending on the regularization type, we call the method
-[*ordinary least squares*](http://en.wikipedia.org/wiki/Ordinary_least_squares) or simply
-[*linear least squares*](http://en.wikipedia.org/wiki/Linear_least_squares_(mathematics)) if there
-is no regularization, [*ridge regression*](http://en.wikipedia.org/wiki/Ridge_regression) if L2
-regularization is used, and [*Lasso*](http://en.wikipedia.org/wiki/Lasso_(statistics)) if L1
-regularization is used.  This average loss $\frac{1}{n} \sum_{i=1}^n (\wv^T x_i - y_i)^2$ is also
+Various related regression methods are derived by using different types of regularization:
+[*ordinary least squares*](http://en.wikipedia.org/wiki/Ordinary_least_squares) or 
+[*linear least squares*](http://en.wikipedia.org/wiki/Linear_least_squares_(mathematics)) uses 
+ no regularization; [*ridge regression*](http://en.wikipedia.org/wiki/Ridge_regression) uses L2
+regularization; and [*Lasso*](http://en.wikipedia.org/wiki/Lasso_(statistics)) uses L1
+regularization.  For all of these models, the average loss or training error, $\frac{1}{n} \sum_{i=1}^n (\wv^T x_i - y_i)^2$, is
 known as the [mean squared error](http://en.wikipedia.org/wiki/Mean_squared_error).
 
-Note that the squared loss is sensitive to outliers. 
-Regularization or a robust alternative (e.g., $\ell_1$ regression) is usually necessary in practice.
-
 ### Examples
 
 <div class="codetabs">
@@ -300,7 +383,7 @@ Regularization or a robust alternative (e.g., $\ell_1$ regression) is usually ne
 <div data-lang="scala" markdown="1">
 The following example demonstrate how to load training data, parse it as an RDD of LabeledPoint.
 The example then uses LinearRegressionWithSGD to build a simple linear model to predict label 
-values. We compute the Mean Squared Error at the end to evaluate
+values. We compute the mean squared error at the end to evaluate
 [goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit).
 
 {% highlight scala %}
@@ -309,11 +392,11 @@ import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.linalg.Vectors
 
 // Load and parse the data
-val data = sc.textFile("mllib/data/ridge-data/lpsa.data")
+val data = sc.textFile("data/mllib/ridge-data/lpsa.data")
 val parsedData = data.map { line =>
   val parts = line.split(',')
   LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
-}
+}.cache()
 
 // Building the model
 val numIterations = 100
@@ -328,9 +411,8 @@ val MSE = valuesAndPreds.map{case(v, p) => math.pow((v - p), 2)}.mean()
 println("training Mean Squared Error = " + MSE)
 {% endhighlight %}
 
-Similarly you can use
 [`RidgeRegressionWithSGD`](api/scala/index.html#org.apache.spark.mllib.regression.RidgeRegressionWithSGD)
-and [`LassoWithSGD`](api/scala/index.html#org.apache.spark.mllib.regression.LassoWithSGD).
+and [`LassoWithSGD`](api/scala/index.html#org.apache.spark.mllib.regression.LassoWithSGD) can be used in a similar fashion as `LinearRegressionWithSGD`.
 
 </div>
 
@@ -338,13 +420,74 @@ and [`LassoWithSGD`](api/scala/index.html#org.apache.spark.mllib.regression.Lass
 All of MLlib's methods use Java-friendly types, so you can import and call them there the same
 way you do in Scala. The only caveat is that the methods take Scala RDD objects, while the
 Spark Java API uses a separate `JavaRDD` class. You can convert a Java RDD to a Scala one by
-calling `.rdd()` on your `JavaRDD` object.
+calling `.rdd()` on your `JavaRDD` object. The corresponding Java example to
+the Scala snippet provided, is presented bellow:
+
+{% highlight java %}
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.regression.LinearRegressionModel;
+import org.apache.spark.mllib.regression.LinearRegressionWithSGD;
+import org.apache.spark.SparkConf;
+
+public class LinearRegression {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("Linear Regression Example");
+    JavaSparkContext sc = new JavaSparkContext(conf);
+    
+    // Load and parse the data
+    String path = "data/mllib/ridge-data/lpsa.data";
+    JavaRDD<String> data = sc.textFile(path);
+    JavaRDD<LabeledPoint> parsedData = data.map(
+      new Function<String, LabeledPoint>() {
+        public LabeledPoint call(String line) {
+          String[] parts = line.split(",");
+          String[] features = parts[1].split(" ");
+          double[] v = new double[features.length];
+          for (int i = 0; i < features.length - 1; i++)
+            v[i] = Double.parseDouble(features[i]);
+          return new LabeledPoint(Double.parseDouble(parts[0]), Vectors.dense(v));
+        }
+      }
+    );
+    parsedData.cache();
+
+    // Building the model
+    int numIterations = 100;
+    final LinearRegressionModel model = 
+      LinearRegressionWithSGD.train(JavaRDD.toRDD(parsedData), numIterations);
+
+    // Evaluate model on training examples and compute training error
+    JavaRDD<Tuple2<Double, Double>> valuesAndPreds = parsedData.map(
+      new Function<LabeledPoint, Tuple2<Double, Double>>() {
+        public Tuple2<Double, Double> call(LabeledPoint point) {
+          double prediction = model.predict(point.features());
+          return new Tuple2<Double, Double>(prediction, point.label());
+        }
+      }
+    );
+    double MSE = new JavaDoubleRDD(valuesAndPreds.map(
+      new Function<Tuple2<Double, Double>, Object>() {
+        public Object call(Tuple2<Double, Double> pair) {
+          return Math.pow(pair._1() - pair._2(), 2.0);
+        }
+      }
+    ).rdd()).mean();
+    System.out.println("training Mean Squared Error = " + MSE);
+  }
+}
+{% endhighlight %}
 </div>
 
 <div data-lang="python" markdown="1">
 The following example demonstrate how to load training data, parse it as an RDD of LabeledPoint.
 The example then uses LinearRegressionWithSGD to build a simple linear model to predict label 
-values. We compute the Mean Squared Error at the end to evaluate
+values. We compute the mean squared error at the end to evaluate
 [goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit).
 
 {% highlight python %}
@@ -356,7 +499,7 @@ def parsePoint(line):
     values = [float(x) for x in line.replace(',', ' ').split(' ')]
     return LabeledPoint(values[0], values[1:])
 
-data = sc.textFile("mllib/data/ridge-data/lpsa.data")
+data = sc.textFile("data/mllib/ridge-data/lpsa.data")
 parsedData = data.map(parsePoint)
 
 # Build the model
@@ -370,6 +513,87 @@ print("Mean Squared Error = " + str(MSE))
 </div>
 </div>
 
+In order to run the above application, follow the instructions
+provided in the [Self-Contained Applications](quick-start.html#self-contained-applications)
+section of the Spark
+quick-start guide. Be sure to also include *spark-mllib* to your build file as
+a dependency.
+
+## Streaming linear regression
+
+When data arrive in a streaming fashion, it is useful to fit regression models online, 
+updating the parameters of the model as new data arrives. MLlib currently supports 
+streaming linear regression using ordinary least squares. The fitting is similar
+to that performed offline, except fitting occurs on each batch of data, so that
+the model continually updates to reflect the data from the stream.
+
+### Examples
+
+The following example demonstrates how to load training and testing data from two different
+input streams of text files, parse the streams as labeled points, fit a linear regression model
+online to the first stream, and make predictions on the second stream.
+
+<div class="codetabs">
+
+<div data-lang="scala" markdown="1">
+
+First, we import the necessary classes for parsing our input data and creating the model. 
+
+{% highlight scala %}
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD
+
+{% endhighlight %}
+
+Then we make input streams for training and testing data. We assume a StreamingContext `ssc`
+has already been created, see [Spark Streaming Programming Guide](streaming-programming-guide.html#initializing)
+for more info. For this example, we use labeled points in training and testing streams, 
+but in practice you will likely want to use unlabeled vectors for test data.
+
+{% highlight scala %}
+
+val trainingData = ssc.textFileStream("/training/data/dir").map(LabeledPoint.parse).cache()
+val testData = ssc.textFileStream("/testing/data/dir").map(LabeledPoint.parse)
+
+{% endhighlight %}
+
+We create our model by initializing the weights to 0
+
+{% highlight scala %}
+
+val numFeatures = 3
+val model = new StreamingLinearRegressionWithSGD()
+    .setInitialWeights(Vectors.zeros(numFeatures))
+
+{% endhighlight %}
+
+Now we register the streams for training and testing and start the job. 
+Printing predictions alongside true labels lets us easily see the result.
+
+{% highlight scala %}
+
+model.trainOn(trainingData)
+model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
+
+ssc.start()
+ssc.awaitTermination()
+ 
+{% endhighlight %}
+
+We can now save text files with data to the training or testing folders.
+Each line should be a data point formatted as `(y,[x1,x2,x3])` where `y` is the label 
+and `x1,x2,x3` are the features. Anytime a text file is placed in `/training/data/dir` 
+the model will update. Anytime a text file is placed in `/testing/data/dir` you will see predictions. 
+As you feed more data to the training directory, the predictions 
+will get better!
+
+</div>
+
+</div>
+
+
 ## Implementation (developer)
 
 Behind the scene, MLlib implements a simple distributed version of stochastic gradient descent
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index 1d1d7dcf6ffcb..d5b044d94fdd7 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -4,23 +4,24 @@ title: Naive Bayes - MLlib
 displayTitle: <a href="mllib-guide.html">MLlib</a> - Naive Bayes
 ---
 
-Naive Bayes is a simple multiclass classification algorithm with the assumption of independence
-between every pair of features. Naive Bayes can be trained very efficiently. Within a single pass to
-the training data, it computes the conditional probability distribution of each feature given label,
-and then it applies Bayes' theorem to compute the conditional probability distribution of label
-given an observation and use it for prediction. For more details, please visit the Wikipedia page
-[Naive Bayes classifier](http://en.wikipedia.org/wiki/Naive_Bayes_classifier).
-
-In MLlib, we implemented multinomial naive Bayes, which is typically used for document
-classification. Within that context, each observation is a document, each feature represents a term,
-whose value is the frequency of the term. For its formulation, please visit the Wikipedia page
-[Multinomial Naive Bayes](http://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes)
-or the section
-[Naive Bayes text classification](http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html)
-from the book Introduction to Information
-Retrieval. [Additive smoothing](http://en.wikipedia.org/wiki/Lidstone_smoothing) can be used by
+[Naive Bayes](http://en.wikipedia.org/wiki/Naive_Bayes_classifier) is a simple
+multiclass classification algorithm with the assumption of independence between
+every pair of features. Naive Bayes can be trained very efficiently. Within a
+single pass to the training data, it computes the conditional probability
+distribution of each feature given label, and then it applies Bayes' theorem to
+compute the conditional probability distribution of label given an observation
+and use it for prediction.
+
+MLlib supports [multinomial naive
+Bayes](http://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes),
+which is typically used for [document
+classification](http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html).
+Within that context, each observation is a document and each
+feature represents a term whose value is the frequency of the term.
+Feature values must be nonnegative to represent term frequencies.
+[Additive smoothing](http://en.wikipedia.org/wiki/Lidstone_smoothing) can be used by
 setting the parameter $\lambda$ (default to $1.0$). For document classification, the input feature
-vectors are usually sparse. Please supply sparse vectors as input to take advantage of
+vectors are usually sparse, and sparse vectors should be supplied as input to take advantage of
 sparsity. Since the training data is only used once, it is not necessary to cache it.
 
 ## Examples
@@ -40,7 +41,7 @@ import org.apache.spark.mllib.classification.NaiveBayes
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 
-val data = sc.textFile("mllib/data/sample_naive_bayes_data.txt")
+val data = sc.textFile("data/mllib/sample_naive_bayes_data.txt")
 val parsedData = data.map { line =>
   val parts = line.split(',')
   LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
@@ -87,11 +88,11 @@ JavaPairRDD<Double, Double> predictionAndLabel =
       return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
     }
   });
-double accuracy = 1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
+double accuracy = predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
     @Override public Boolean call(Tuple2<Double, Double> pl) {
-      return pl._1() == pl._2();
+      return pl._1().equals(pl._2());
     }
-  }).count() / test.count();
+  }).count() / (double) test.count();
 {% endhighlight %}
 </div>
 
diff --git a/docs/mllib-optimization.md b/docs/mllib-optimization.md
index ae9ede58e8e60..45141c235be90 100644
--- a/docs/mllib-optimization.md
+++ b/docs/mllib-optimization.md
@@ -207,14 +207,19 @@ the loss computed for every iteration.
 
 Here is an example to train binary logistic regression with L2 regularization using
 L-BFGS optimizer. 
+
+<div class="codetabs">
+
+<div data-lang="scala" markdown="1">
 {% highlight scala %}
 import org.apache.spark.SparkContext
 import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.mllib.classification.LogisticRegressionModel
+import org.apache.spark.mllib.optimization.{LBFGS, LogisticGradient, SquaredL2Updater}
 
-val data = MLUtils.loadLibSVMFile(sc, "mllib/data/sample_libsvm_data.txt")
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
 val numFeatures = data.take(1)(0).features.size
 
 // Split data into training (60%) and test (40%).
@@ -263,7 +268,97 @@ println("Loss of each step in training process")
 loss.foreach(println)
 println("Area under ROC = " + auROC)
 {% endhighlight %}
-
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import java.util.Arrays;
+import java.util.Random;
+
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.mllib.classification.LogisticRegressionModel;
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.optimization.*;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+
+public class LBFGSExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("L-BFGS Example");
+    SparkContext sc = new SparkContext(conf);
+    String path = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();
+    int numFeatures = data.take(1).get(0).features().size();
+    
+    // Split initial RDD into two... [60% training data, 40% testing data].
+    JavaRDD<LabeledPoint> trainingInit = data.sample(false, 0.6, 11L);
+    JavaRDD<LabeledPoint> test = data.subtract(trainingInit);
+    
+    // Append 1 into the training data as intercept.
+    JavaRDD<Tuple2<Object, Vector>> training = data.map(
+      new Function<LabeledPoint, Tuple2<Object, Vector>>() {
+        public Tuple2<Object, Vector> call(LabeledPoint p) {
+          return new Tuple2<Object, Vector>(p.label(), MLUtils.appendBias(p.features()));
+        }
+      });
+    training.cache();
+
+    // Run training algorithm to build the model.
+    int numCorrections = 10;
+    double convergenceTol = 1e-4;
+    int maxNumIterations = 20;
+    double regParam = 0.1;
+    Vector initialWeightsWithIntercept = Vectors.dense(new double[numFeatures + 1]);
+
+    Tuple2<Vector, double[]> result = LBFGS.runLBFGS(
+      training.rdd(),
+      new LogisticGradient(),
+      new SquaredL2Updater(),
+      numCorrections,
+      convergenceTol,
+      maxNumIterations,
+      regParam,
+      initialWeightsWithIntercept);
+    Vector weightsWithIntercept = result._1();
+    double[] loss = result._2();
+
+    final LogisticRegressionModel model = new LogisticRegressionModel(
+      Vectors.dense(Arrays.copyOf(weightsWithIntercept.toArray(), weightsWithIntercept.size() - 1)),
+      (weightsWithIntercept.toArray())[weightsWithIntercept.size() - 1]);
+
+    // Clear the default threshold.
+    model.clearThreshold();
+
+    // Compute raw scores on the test set.
+    JavaRDD<Tuple2<Object, Object>> scoreAndLabels = test.map(
+      new Function<LabeledPoint, Tuple2<Object, Object>>() {
+      public Tuple2<Object, Object> call(LabeledPoint p) {
+        Double score = model.predict(p.features());
+        return new Tuple2<Object, Object>(score, p.label());
+      }
+    });
+
+    // Get evaluation metrics.
+    BinaryClassificationMetrics metrics = 
+      new BinaryClassificationMetrics(scoreAndLabels.rdd());
+    double auROC = metrics.areaUnderROC();
+     
+    System.out.println("Loss of each step in training process");
+    for (double l : loss)
+      System.out.println(l);
+    System.out.println("Area under ROC = " + auROC);
+  }
+}
+{% endhighlight %}
+</div>
+</div>
 #### Developer's note
 Since the Hessian is constructed approximately from previous gradient evaluations, 
 the objective function can not be changed during the optimization process. 
diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
new file mode 100644
index 0000000000000..ca8c29218f52d
--- /dev/null
+++ b/docs/mllib-statistics.md
@@ -0,0 +1,497 @@
+---
+layout: global
+title: Basic Statistics - MLlib
+displayTitle: <a href="mllib-guide.html">MLlib</a> - Basic Statistics 
+---
+
+* Table of contents
+{:toc}
+
+
+`\[
+\newcommand{\R}{\mathbb{R}}
+\newcommand{\E}{\mathbb{E}} 
+\newcommand{\x}{\mathbf{x}}
+\newcommand{\y}{\mathbf{y}}
+\newcommand{\wv}{\mathbf{w}}
+\newcommand{\av}{\mathbf{\alpha}}
+\newcommand{\bv}{\mathbf{b}}
+\newcommand{\N}{\mathbb{N}}
+\newcommand{\id}{\mathbf{I}} 
+\newcommand{\ind}{\mathbf{1}} 
+\newcommand{\0}{\mathbf{0}} 
+\newcommand{\unit}{\mathbf{e}} 
+\newcommand{\one}{\mathbf{1}} 
+\newcommand{\zero}{\mathbf{0}}
+\]`
+
+## Summary statistics 
+
+We provide column summary statistics for `RDD[Vector]` through the function `colStats` 
+available in `Statistics`.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+[`colStats()`](api/scala/index.html#org.apache.spark.mllib.stat.Statistics$) returns an instance of
+[`MultivariateStatisticalSummary`](api/scala/index.html#org.apache.spark.mllib.stat.MultivariateStatisticalSummary),
+which contains the column-wise max, min, mean, variance, and number of nonzeros, as well as the
+total count.
+
+{% highlight scala %}
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Statistics}
+
+val observations: RDD[Vector] = ... // an RDD of Vectors
+
+// Compute column summary statistics.
+val summary: MultivariateStatisticalSummary = Statistics.colStats(observations)
+println(summary.mean) // a dense vector containing the mean value for each column
+println(summary.variance) // column-wise variance
+println(summary.numNonzeros) // number of nonzeros in each column
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+[`colStats()`](api/java/org/apache/spark/mllib/stat/Statistics.html) returns an instance of
+[`MultivariateStatisticalSummary`](api/java/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.html),
+which contains the column-wise max, min, mean, variance, and number of nonzeros, as well as the
+total count.
+
+{% highlight java %}
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.stat.MultivariateStatisticalSummary;
+import org.apache.spark.mllib.stat.Statistics;
+
+JavaSparkContext jsc = ...
+
+JavaRDD<Vector> mat = ... // an RDD of Vectors
+
+// Compute column summary statistics.
+MultivariateStatisticalSummary summary = Statistics.colStats(mat.rdd());
+System.out.println(summary.mean()); // a dense vector containing the mean value for each column
+System.out.println(summary.variance()); // column-wise variance
+System.out.println(summary.numNonzeros()); // number of nonzeros in each column
+
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+[`colStats()`](api/python/pyspark.mllib.stat.Statistics-class.html#colStats) returns an instance of
+[`MultivariateStatisticalSummary`](api/python/pyspark.mllib.stat.MultivariateStatisticalSummary-class.html),
+which contains the column-wise max, min, mean, variance, and number of nonzeros, as well as the
+total count.
+
+{% highlight python %}
+from pyspark.mllib.stat import Statistics
+
+sc = ... # SparkContext
+
+mat = ... # an RDD of Vectors
+
+# Compute column summary statistics.
+summary = Statistics.colStats(mat)
+print summary.mean()
+print summary.variance()
+print summary.numNonzeros()
+
+{% endhighlight %}
+</div>
+
+</div>
+
+## Correlations
+
+Calculating the correlation between two series of data is a common operation in Statistics. In MLlib
+we provide the flexibility to calculate pairwise correlations among many series. The supported 
+correlation methods are currently Pearson's and Spearman's correlation.
+ 
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+[`Statistics`](api/scala/index.html#org.apache.spark.mllib.stat.Statistics$) provides methods to 
+calculate correlations between series. Depending on the type of input, two `RDD[Double]`s or 
+an `RDD[Vector]`, the output will be a `Double` or the correlation `Matrix` respectively.
+
+{% highlight scala %}
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.stat.Statistics
+
+val sc: SparkContext = ...
+
+val seriesX: RDD[Double] = ... // a series
+val seriesY: RDD[Double] = ... // must have the same number of partitions and cardinality as seriesX
+
+// compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a 
+// method is not specified, Pearson's method will be used by default. 
+val correlation: Double = Statistics.corr(seriesX, seriesY, "pearson")
+
+val data: RDD[Vector] = ... // note that each Vector is a row and not a column
+
+// calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
+// If a method is not specified, Pearson's method will be used by default. 
+val correlMatrix: Matrix = Statistics.corr(data, "pearson")
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+[`Statistics`](api/java/org/apache/spark/mllib/stat/Statistics.html) provides methods to 
+calculate correlations between series. Depending on the type of input, two `JavaDoubleRDD`s or 
+a `JavaRDD<Vector>`, the output will be a `Double` or the correlation `Matrix` respectively.
+
+{% highlight java %}
+import org.apache.spark.api.java.JavaDoubleRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.*;
+import org.apache.spark.mllib.stat.Statistics;
+
+JavaSparkContext jsc = ...
+
+JavaDoubleRDD seriesX = ... // a series
+JavaDoubleRDD seriesY = ... // must have the same number of partitions and cardinality as seriesX
+
+// compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a 
+// method is not specified, Pearson's method will be used by default. 
+Double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
+
+JavaRDD<Vector> data = ... // note that each Vector is a row and not a column
+
+// calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
+// If a method is not specified, Pearson's method will be used by default. 
+Matrix correlMatrix = Statistics.corr(data.rdd(), "pearson");
+
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+[`Statistics`](api/python/pyspark.mllib.stat.Statistics-class.html) provides methods to 
+calculate correlations between series. Depending on the type of input, two `RDD[Double]`s or 
+an `RDD[Vector]`, the output will be a `Double` or the correlation `Matrix` respectively.
+
+{% highlight python %}
+from pyspark.mllib.stat import Statistics
+
+sc = ... # SparkContext
+
+seriesX = ... # a series
+seriesY = ... # must have the same number of partitions and cardinality as seriesX
+
+# Compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a 
+# method is not specified, Pearson's method will be used by default. 
+print Statistics.corr(seriesX, seriesY, method="pearson")
+
+data = ... # an RDD of Vectors
+# calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
+# If a method is not specified, Pearson's method will be used by default. 
+print Statistics.corr(data, method="pearson")
+
+{% endhighlight %}
+</div>
+
+</div>
+
+## Stratified sampling
+
+Unlike the other statistics functions, which reside in MLlib, stratified sampling methods,
+`sampleByKey` and `sampleByKeyExact`, can be performed on RDD's of key-value pairs. For stratified
+sampling, the keys can be thought of as a label and the value as a specific attribute. For example 
+the key can be man or woman, or document ids, and the respective values can be the list of ages 
+of the people in the population or the list of words in the documents. The `sampleByKey` method 
+will flip a coin to decide whether an observation will be sampled or not, therefore requires one 
+pass over the data, and provides an *expected* sample size. `sampleByKeyExact` requires significant 
+more resources than the per-stratum simple random sampling used in `sampleByKey`, but will provide
+the exact sampling size with 99.99% confidence. `sampleByKeyExact` is currently not supported in 
+python.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+[`sampleByKeyExact()`](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions) allows users to
+sample exactly $\lceil f_k \cdot n_k \rceil \, \forall k \in K$ items, where $f_k$ is the desired 
+fraction for key $k$, $n_k$ is the number of key-value pairs for key $k$, and $K$ is the set of
+keys. Sampling without replacement requires one additional pass over the RDD to guarantee sample 
+size, whereas sampling with replacement requires two additional passes.
+
+{% highlight scala %}
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.PairRDDFunctions
+
+val sc: SparkContext = ...
+
+val data = ... // an RDD[(K, V)] of any key value pairs
+val fractions: Map[K, Double] = ... // specify the exact fraction desired from each key
+
+// Get an exact sample from each stratum
+val approxSample = data.sampleByKey(withReplacement = false, fractions)
+val exactSample = data.sampleByKeyExact(withReplacement = false, fractions)
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+[`sampleByKeyExact()`](api/java/org/apache/spark/api/java/JavaPairRDD.html) allows users to
+sample exactly $\lceil f_k \cdot n_k \rceil \, \forall k \in K$ items, where $f_k$ is the desired 
+fraction for key $k$, $n_k$ is the number of key-value pairs for key $k$, and $K$ is the set of
+keys. Sampling without replacement requires one additional pass over the RDD to guarantee sample 
+size, whereas sampling with replacement requires two additional passes.
+
+{% highlight java %}
+import java.util.Map;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+JavaSparkContext jsc = ...
+
+JavaPairRDD<K, V> data = ... // an RDD of any key value pairs
+Map<K, Object> fractions = ... // specify the exact fraction desired from each key
+
+// Get an exact sample from each stratum
+JavaPairRDD<K, V> approxSample = data.sampleByKey(false, fractions);
+JavaPairRDD<K, V> exactSample = data.sampleByKeyExact(false, fractions);
+
+{% endhighlight %}
+</div>
+<div data-lang="python" markdown="1">
+[`sampleByKey()`](api/python/pyspark.rdd.RDD-class.html#sampleByKey) allows users to
+sample approximately $\lceil f_k \cdot n_k \rceil \, \forall k \in K$ items, where $f_k$ is the 
+desired fraction for key $k$, $n_k$ is the number of key-value pairs for key $k$, and $K$ is the 
+set of keys.
+
+*Note:* `sampleByKeyExact()` is currently not supported in Python.
+
+{% highlight python %}
+
+sc = ... # SparkContext
+
+data = ... # an RDD of any key value pairs
+fractions = ... # specify the exact fraction desired from each key as a dictionary
+
+approxSample = data.sampleByKey(False, fractions);
+
+{% endhighlight %}
+</div>
+
+</div>
+
+## Hypothesis testing
+
+Hypothesis testing is a powerful tool in statistics to determine whether a result is statistically 
+significant, whether this result occurred by chance or not. MLlib currently supports Pearson's 
+chi-squared ( $\chi^2$) tests for goodness of fit and independence. The input data types determine 
+whether the goodness of fit or the independence test is conducted. The goodness of fit test requires 
+an input type of `Vector`, whereas the independence test requires a `Matrix` as input.
+
+MLlib also supports the input type `RDD[LabeledPoint]` to enable feature selection via chi-squared 
+independence tests.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+[`Statistics`](api/scala/index.html#org.apache.spark.mllib.stat.Statistics$) provides methods to 
+run Pearson's chi-squared tests. The following example demonstrates how to run and interpret 
+hypothesis tests.
+
+{% highlight scala %}
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.stat.Statistics._
+
+val sc: SparkContext = ...
+
+val vec: Vector = ... // a vector composed of the frequencies of events
+
+// compute the goodness of fit. If a second vector to test against is not supplied as a parameter, 
+// the test runs against a uniform distribution.  
+val goodnessOfFitTestResult = Statistics.chiSqTest(vec)
+println(goodnessOfFitTestResult) // summary of the test including the p-value, degrees of freedom, 
+                                 // test statistic, the method used, and the null hypothesis.
+
+val mat: Matrix = ... // a contingency matrix
+
+// conduct Pearson's independence test on the input contingency matrix
+val independenceTestResult = Statistics.chiSqTest(mat) 
+println(independenceTestResult) // summary of the test including the p-value, degrees of freedom...
+
+val obs: RDD[LabeledPoint] = ... // (feature, label) pairs.
+
+// The contingency table is constructed from the raw (feature, label) pairs and used to conduct
+// the independence test. Returns an array containing the ChiSquaredTestResult for every feature 
+// against the label.
+val featureTestResults: Array[ChiSqTestResult] = Statistics.chiSqTest(obs)
+var i = 1
+featureTestResults.foreach { result =>
+    println(s"Column $i:\n$result")
+    i += 1
+} // summary of the test 
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+[`Statistics`](api/java/org/apache/spark/mllib/stat/Statistics.html) provides methods to 
+run Pearson's chi-squared tests. The following example demonstrates how to run and interpret 
+hypothesis tests.
+
+{% highlight java %}
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.*;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.stat.Statistics;
+import org.apache.spark.mllib.stat.test.ChiSqTestResult;
+
+JavaSparkContext jsc = ...
+
+Vector vec = ... // a vector composed of the frequencies of events
+
+// compute the goodness of fit. If a second vector to test against is not supplied as a parameter, 
+// the test runs against a uniform distribution.  
+ChiSqTestResult goodnessOfFitTestResult = Statistics.chiSqTest(vec);
+// summary of the test including the p-value, degrees of freedom, test statistic, the method used, 
+// and the null hypothesis.
+System.out.println(goodnessOfFitTestResult);
+
+Matrix mat = ... // a contingency matrix
+
+// conduct Pearson's independence test on the input contingency matrix
+ChiSqTestResult independenceTestResult = Statistics.chiSqTest(mat);
+// summary of the test including the p-value, degrees of freedom...
+System.out.println(independenceTestResult);
+
+JavaRDD<LabeledPoint> obs = ... // an RDD of labeled points
+
+// The contingency table is constructed from the raw (feature, label) pairs and used to conduct
+// the independence test. Returns an array containing the ChiSquaredTestResult for every feature 
+// against the label.
+ChiSqTestResult[] featureTestResults = Statistics.chiSqTest(obs.rdd());
+int i = 1;
+for (ChiSqTestResult result : featureTestResults) {
+    System.out.println("Column " + i + ":");
+    System.out.println(result); // summary of the test
+    i++;
+}
+
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+[`Statistics`](api/python/index.html#pyspark.mllib.stat.Statistics$) provides methods to
+run Pearson's chi-squared tests. The following example demonstrates how to run and interpret
+hypothesis tests.
+
+{% highlight python %}
+from pyspark import SparkContext
+from pyspark.mllib.linalg import Vectors, Matrices
+from pyspark.mllib.regresssion import LabeledPoint
+from pyspark.mllib.stat import Statistics
+
+sc = SparkContext()
+
+vec = Vectors.dense(...) # a vector composed of the frequencies of events
+
+# compute the goodness of fit. If a second vector to test against is not supplied as a parameter,
+# the test runs against a uniform distribution.
+goodnessOfFitTestResult = Statistics.chiSqTest(vec)
+print goodnessOfFitTestResult # summary of the test including the p-value, degrees of freedom,
+                              # test statistic, the method used, and the null hypothesis.
+
+mat = Matrices.dense(...) # a contingency matrix
+
+# conduct Pearson's independence test on the input contingency matrix
+independenceTestResult = Statistics.chiSqTest(mat)
+print independenceTestResult  # summary of the test including the p-value, degrees of freedom...
+
+obs = sc.parallelize(...)  # LabeledPoint(feature, label) .
+
+# The contingency table is constructed from an RDD of LabeledPoint and used to conduct
+# the independence test. Returns an array containing the ChiSquaredTestResult for every feature
+# against the label.
+featureTestResults = Statistics.chiSqTest(obs)
+
+for i, result in enumerate(featureTestResults):
+    print "Column $d:" % (i + 1)
+    print result
+{% endhighlight %}
+</div>
+
+</div>
+
+## Random data generation
+
+Random data generation is useful for randomized algorithms, prototyping, and performance testing.
+MLlib supports generating random RDDs with i.i.d. values drawn from a given distribution:
+uniform, standard normal, or Poisson.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+[`RandomRDDs`](api/scala/index.html#org.apache.spark.mllib.random.RandomRDDs) provides factory
+methods to generate random double RDDs or vector RDDs.
+The following example generates a random double RDD, whose values follows the standard normal
+distribution `N(0, 1)`, and then map it to `N(1, 4)`.
+
+{% highlight scala %}
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.random.RandomRDDs._
+
+val sc: SparkContext = ...
+
+// Generate a random double RDD that contains 1 million i.i.d. values drawn from the
+// standard normal distribution `N(0, 1)`, evenly distributed in 10 partitions.
+val u = normalRDD(sc, 1000000L, 10)
+// Apply a transform to get a random double RDD following `N(1, 4)`.
+val v = u.map(x => 1.0 + 2.0 * x)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+[`RandomRDDs`](api/java/index.html#org.apache.spark.mllib.random.RandomRDDs) provides factory
+methods to generate random double RDDs or vector RDDs.
+The following example generates a random double RDD, whose values follows the standard normal
+distribution `N(0, 1)`, and then map it to `N(1, 4)`.
+
+{% highlight java %}
+import org.apache.spark.SparkContext;
+import org.apache.spark.api.JavaDoubleRDD;
+import static org.apache.spark.mllib.random.RandomRDDs.*;
+
+JavaSparkContext jsc = ...
+
+// Generate a random double RDD that contains 1 million i.i.d. values drawn from the
+// standard normal distribution `N(0, 1)`, evenly distributed in 10 partitions.
+JavaDoubleRDD u = normalJavaRDD(jsc, 1000000L, 10);
+// Apply a transform to get a random double RDD following `N(1, 4)`.
+JavaDoubleRDD v = u.map(
+  new Function<Double, Double>() {
+    public Double call(Double x) {
+      return 1.0 + 2.0 * x;
+    }
+  });
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+[`RandomRDDs`](api/python/pyspark.mllib.random.RandomRDDs-class.html) provides factory
+methods to generate random double RDDs or vector RDDs.
+The following example generates a random double RDD, whose values follows the standard normal
+distribution `N(0, 1)`, and then map it to `N(1, 4)`.
+
+{% highlight python %}
+from pyspark.mllib.random import RandomRDDs
+
+sc = ... # SparkContext
+
+# Generate a random double RDD that contains 1 million i.i.d. values drawn from the
+# standard normal distribution `N(0, 1)`, evenly distributed in 10 partitions.
+u = RandomRDDs.uniformRDD(sc, 1000000L, 10)
+# Apply a transform to get a random double RDD following `N(1, 4)`.
+v = u.map(lambda x: 1.0 + 2.0 * x)
+{% endhighlight %}
+</div>
+
+</div>
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 84073fe4d949a..e3f81a76acdbb 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -33,7 +33,7 @@ application's UI after the application has finished.
 
 If Spark is run on Mesos or YARN, it is still possible to reconstruct the UI of a finished
 application through Spark's history server, provided that the application's event logs exist.
-You can start a the history server by executing:
+You can start the history server by executing:
 
     ./sbin/start-history-server.sh
 
@@ -77,6 +77,13 @@ follows:
     one implementation, provided by Spark, which looks for application logs stored in the
     file system.</td>
   </tr>
+  <tr>
+    <td>spark.history.fs.logDirectory</td>
+    <td>(none)</td>
+    <td>
+     Directory that contains application event logs to be loaded by the history server
+    </td>
+  </tr>
   <tr>
     <td>spark.history.fs.updateInterval</td>
     <td>10</td>
@@ -106,7 +113,7 @@ follows:
     <td>
       Indicates whether the history server should use kerberos to login. This is useful
       if the history server is accessing HDFS files on a secure Hadoop cluster. If this is 
-      true it looks uses the configs <code>spark.history.kerberos.principal</code> and
+      true, it uses the configs <code>spark.history.kerberos.principal</code> and
       <code>spark.history.kerberos.keytab</code>. 
     </td>
   </tr>
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index b09d6347cd1b2..9de2f914b8b4c 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -211,17 +211,17 @@ For a complete list of options, run `pyspark --help`. Behind the scenes,
 
 It is also possible to launch the PySpark shell in [IPython](http://ipython.org), the
 enhanced Python interpreter. PySpark works with IPython 1.0.0 and later. To
-use IPython, set the `IPYTHON` variable to `1` when running `bin/pyspark`:
+use IPython, set the `PYSPARK_DRIVER_PYTHON` variable to `ipython` when running `bin/pyspark`:
 
 {% highlight bash %}
-$ IPYTHON=1 ./bin/pyspark
+$ PYSPARK_DRIVER_PYTHON=ipython ./bin/pyspark
 {% endhighlight %}
 
-You can customize the `ipython` command by setting `IPYTHON_OPTS`. For example, to launch
+You can customize the `ipython` command by setting `PYSPARK_DRIVER_PYTHON_OPTS`. For example, to launch
 the [IPython Notebook](http://ipython.org/notebook.html) with PyLab plot support:
 
 {% highlight bash %}
-$ IPYTHON_OPTS="notebook --pylab inline" ./bin/pyspark
+$ PYSPARK_DRIVER_PYTHON=ipython PYSPARK_DRIVER_PYTHON_OPTS="notebook --pylab inline" ./bin/pyspark
 {% endhighlight %}
 
 </div>
@@ -286,7 +286,7 @@ We describe operations on distributed datasets later on.
 
 </div>
 
-One important parameter for parallel collections is the number of *slices* to cut the dataset into. Spark will run one task for each slice of the cluster. Typically you want 2-4 slices for each CPU in your cluster. Normally, Spark tries to set the number of slices automatically based on your cluster. However, you can also set it manually by passing it as a second parameter to `parallelize` (e.g. `sc.parallelize(data, 10)`).
+One important parameter for parallel collections is the number of *partitions* to cut the dataset into. Spark will run one task for each partition of the cluster. Typically you want 2-4 partitions for each CPU in your cluster. Normally, Spark tries to set the number of partitions automatically based on your cluster. However, you can also set it manually by passing it as a second parameter to `parallelize` (e.g. `sc.parallelize(data, 10)`). Note: some places in the code use the term slices (a synonym for partitions) to maintain backward compatibility.
 
 ## External Datasets
 
@@ -311,7 +311,7 @@ Some notes on reading files with Spark:
 
 * All of Spark's file-based input methods, including `textFile`, support running on directories, compressed files, and wildcards as well. For example, you can use `textFile("/my/directory")`, `textFile("/my/directory/*.txt")`, and `textFile("/my/directory/*.gz")`.
 
-* The `textFile` method also takes an optional second argument for controlling the number of slices of the file. By default, Spark creates one slice for each block of the file (blocks being 64MB by default in HDFS), but you can also ask for a higher number of slices by passing a larger value. Note that you cannot have fewer slices than blocks.
+* The `textFile` method also takes an optional second argument for controlling the number of partitions of the file. By default, Spark creates one partition for each block of the file (blocks being 64MB by default in HDFS), but you can also ask for a higher number of partitions by passing a larger value. Note that you cannot have fewer partitions than blocks.
 
 Apart from text files, Spark's Scala API also supports several other data formats:
 
@@ -343,7 +343,7 @@ Some notes on reading files with Spark:
 
 * All of Spark's file-based input methods, including `textFile`, support running on directories, compressed files, and wildcards as well. For example, you can use `textFile("/my/directory")`, `textFile("/my/directory/*.txt")`, and `textFile("/my/directory/*.gz")`.
 
-* The `textFile` method also takes an optional second argument for controlling the number of slices of the file. By default, Spark creates one slice for each block of the file (blocks being 64MB by default in HDFS), but you can also ask for a higher number of slices by passing a larger value. Note that you cannot have fewer slices than blocks.
+* The `textFile` method also takes an optional second argument for controlling the number of partitions of the file. By default, Spark creates one partition for each block of the file (blocks being 64MB by default in HDFS), but you can also ask for a higher number of partitions by passing a larger value. Note that you cannot have fewer partitions than blocks.
 
 Apart from text files, Spark's Java API also supports several other data formats:
 
@@ -375,7 +375,7 @@ Some notes on reading files with Spark:
 
 * All of Spark's file-based input methods, including `textFile`, support running on directories, compressed files, and wildcards as well. For example, you can use `textFile("/my/directory")`, `textFile("/my/directory/*.txt")`, and `textFile("/my/directory/*.gz")`.
 
-* The `textFile` method also takes an optional second argument for controlling the number of slices of the file. By default, Spark creates one slice for each block of the file (blocks being 64MB by default in HDFS), but you can also ask for a higher number of slices by passing a larger value. Note that you cannot have fewer slices than blocks.
+* The `textFile` method also takes an optional second argument for controlling the number of partitions of the file. By default, Spark creates one partition for each block of the file (blocks being 64MB by default in HDFS), but you can also ask for a higher number of partitions by passing a larger value. Note that you cannot have fewer partitions than blocks.
 
 Apart from text files, Spark's Python API also supports several other data formats:
 
@@ -383,16 +383,16 @@ Apart from text files, Spark's Python API also supports several other data forma
 
 * `RDD.saveAsPickleFile` and `SparkContext.pickleFile` support saving an RDD in a simple format consisting of pickled Python objects. Batching is used on pickle serialization, with default batch size 10.
 
-* Details on reading `SequenceFile` and arbitrary Hadoop `InputFormat` are given below.
+* SequenceFile and Hadoop Input/Output Formats
 
-### SequenceFile and Hadoop InputFormats
+**Note** this feature is currently marked ```Experimental``` and is intended for advanced users. It may be replaced in future with read/write support based on Spark SQL, in which case Spark SQL is the preferred approach.
 
-**Note** this feature is currently marked ```Experimental``` and is intended for advanced users. It may be replaced in future with read/write support based on SparkSQL, in which case SparkSQL is the preferred approach.
+**Writable Support**
 
-#### Writable Support
-
-PySpark SequenceFile support loads an RDD within Java, and pickles the resulting Java objects using
-[Pyrolite](https://github.com/irmen/Pyrolite/). The following Writables are automatically converted:
+PySpark SequenceFile support loads an RDD of key-value pairs within Java, converts Writables to base Java types, and pickles the 
+resulting Java objects using [Pyrolite](https://github.com/irmen/Pyrolite/). When saving an RDD of key-value pairs to SequenceFile, 
+PySpark does the reverse. It unpickles Python objects into Java objects and then converts them to Writables. The following 
+Writables are automatically converted:
 
 <table class="table">
 <tr><th>Writable Type</th><th>Python Type</th></tr>
@@ -403,32 +403,30 @@ PySpark SequenceFile support loads an RDD within Java, and pickles the resulting
 <tr><td>BooleanWritable</td><td>bool</td></tr>
 <tr><td>BytesWritable</td><td>bytearray</td></tr>
 <tr><td>NullWritable</td><td>None</td></tr>
-<tr><td>ArrayWritable</td><td>list of primitives, or tuple of objects</td></tr>
 <tr><td>MapWritable</td><td>dict</td></tr>
-<tr><td>Custom Class conforming to Java Bean conventions</td>
-    <td>dict of public properties (via JavaBean getters and setters) + __class__ for the class type</td></tr>
 </table>
 
-#### Loading SequenceFiles
+Arrays are not handled out-of-the-box. Users need to specify custom `ArrayWritable` subtypes when reading or writing. When writing, 
+users also need to specify custom converters that convert arrays to custom `ArrayWritable` subtypes. When reading, the default 
+converter will convert custom `ArrayWritable` subtypes to Java `Object[]`, which then get pickled to Python tuples. To get 
+Python `array.array` for arrays of primitive types, users need to specify custom converters.
+
+**Saving and Loading SequenceFiles**
 
-Similarly to text files, SequenceFiles can be loaded by specifying the path. The key and value
+Similarly to text files, SequenceFiles can be saved and loaded by specifying the path. The key and value
 classes can be specified, but for standard Writables this is not required.
 
 {% highlight python %}
->>> rdd = sc.sequenceFile("path/to/sequencefile/of/doubles")
->>> rdd.collect()         # this example has DoubleWritable keys and Text values
-[(1.0, u'aa'),
- (2.0, u'bb'),
- (2.0, u'aa'),
- (3.0, u'cc'),
- (2.0, u'bb'),
- (1.0, u'aa')]
+>>> rdd = sc.parallelize(range(1, 4)).map(lambda x: (x, "a" * x ))
+>>> rdd.saveAsSequenceFile("path/to/file")
+>>> sorted(sc.sequenceFile("path/to/file").collect())
+[(1, u'a'), (2, u'aa'), (3, u'aaa')]
 {% endhighlight %}
 
-#### Loading Other Hadoop InputFormats
+**Saving and Loading Other Hadoop Input/Output Formats**
 
-PySpark can also read any Hadoop InputFormat, for both 'new' and 'old' Hadoop APIs. If required,
-a Hadoop configuration can be passed in as a Python dict. Here is an example using the
+PySpark can also read any Hadoop InputFormat or write any Hadoop OutputFormat, for both 'new' and 'old' Hadoop MapReduce APIs. 
+If required, a Hadoop configuration can be passed in as a Python dict. Here is an example using the
 Elasticsearch ESInputFormat:
 
 {% highlight python %}
@@ -447,8 +445,7 @@ Note that, if the InputFormat simply depends on a Hadoop configuration and/or in
 the key and value classes can easily be converted according to the above table,
 then this approach should work well for such cases.
 
-If you have custom serialized binary data (such as loading data from Cassandra / HBase) or custom
-classes that don't conform to the JavaBean requirements, then you will first need to 
+If you have custom serialized binary data (such as loading data from Cassandra / HBase), then you will first need to 
 transform that data on the Scala/Java side to something which can be handled by Pyrolite's pickler.
 A [Converter](api/scala/index.html#org.apache.spark.api.python.Converter) trait is provided 
 for this. Simply extend this trait and implement your transformation code in the ```convert``` 
@@ -456,11 +453,8 @@ method. Remember to ensure that this class, along with any dependencies required
 classpath.
 
 See the [Python examples]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python) and 
-the [Converter examples]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/pythonconverters) 
-for examples of using HBase and Cassandra ```InputFormat```.
-
-Future support for writing data out as ```SequenceFileOutputFormat``` and other ```OutputFormats```, 
-is forthcoming.
+the [Converter examples]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/pythonconverters) 
+for examples of using Cassandra / HBase ```InputFormat``` and ```OutputFormat``` with custom converters.
 
 </div>
 
@@ -739,7 +733,7 @@ def doStuff(self, rdd):
 
 While most Spark operations work on RDDs containing any type of objects, a few special operations are
 only available on RDDs of key-value pairs.
-The most common ones are distibuted "shuffle" operations, such as grouping or aggregating the elements
+The most common ones are distributed "shuffle" operations, such as grouping or aggregating the elements
 by a key.
 
 In Scala, these operations are automatically available on RDDs containing
@@ -773,7 +767,7 @@ documentation](http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html#ha
 
 While most Spark operations work on RDDs containing any type of objects, a few special operations are
 only available on RDDs of key-value pairs.
-The most common ones are distibuted "shuffle" operations, such as grouping or aggregating the elements
+The most common ones are distributed "shuffle" operations, such as grouping or aggregating the elements
 by a key.
 
 In Java, key-value pairs are represented using the 
@@ -810,7 +804,7 @@ documentation](http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html#ha
 
 While most Spark operations work on RDDs containing any type of objects, a few special operations are
 only available on RDDs of key-value pairs.
-The most common ones are distibuted "shuffle" operations, such as grouping or aggregating the elements
+The most common ones are distributed "shuffle" operations, such as grouping or aggregating the elements
 by a key.
 
 In Python, these operations work on RDDs containing built-in Python tuples such as `(1, 2)`.
@@ -889,7 +883,7 @@ for details.
 <tr>
   <td> <b>groupByKey</b>([<i>numTasks</i>]) </td>
   <td> When called on a dataset of (K, V) pairs, returns a dataset of (K, Iterable&lt;V&gt;) pairs. <br />
-    <b>Note:</b> If you are grouping in order to perform an aggregation (such as a sum or 
+    <b>Note:</b> If you are grouping in order to perform an aggregation (such as a sum or
       average) over each key, using <code>reduceByKey</code> or <code>combineByKey</code> will yield much better 
       performance.
     <br />
@@ -912,7 +906,7 @@ for details.
 <tr>
   <td> <b>join</b>(<i>otherDataset</i>, [<i>numTasks</i>]) </td>
   <td> When called on datasets of type (K, V) and (K, W), returns a dataset of (K, (V, W)) pairs with all pairs of elements for each key.
-    Outer joins are also supported through <code>leftOuterJoin</code> and <code>rightOuterJoin</code>.
+    Outer joins are supported through <code>leftOuterJoin</code>, <code>rightOuterJoin</code>, and <code>fullOuterJoin</code>.
   </td>
 </tr>
 <tr>
@@ -1137,7 +1131,7 @@ method. The code below shows this:
 
 {% highlight scala %}
 scala> val broadcastVar = sc.broadcast(Array(1, 2, 3))
-broadcastVar: spark.Broadcast[Array[Int]] = spark.Broadcast(b5c40191-a864-4c7d-b9bf-d87e1a4e787c)
+broadcastVar: org.apache.spark.broadcast.Broadcast[Array[Int]] = Broadcast(0)
 
 scala> broadcastVar.value
 res0: Array[Int] = Array(1, 2, 3)
@@ -1180,7 +1174,9 @@ value of the broadcast variable (e.g. if the variable is shipped to a new node l
 Accumulators are variables that are only "added" to through an associative operation and can
 therefore be efficiently supported in parallel. They can be used to implement counters (as in
 MapReduce) or sums. Spark natively supports accumulators of numeric types, and programmers
-can add support for new types.
+can add support for new types. If accumulators are created with a name, they will be
+displayed in Spark's UI. This can can be useful for understanding the progress of 
+running stages (NOTE: this is not yet supported in Python).
 
 An accumulator is created from an initial value `v` by calling `SparkContext.accumulator(v)`. Tasks
 running on the cluster can then add to it using the `add` method or the `+=` operator (in Scala and Python).
@@ -1194,7 +1190,7 @@ The code below shows an accumulator being used to add up the elements of an arra
 <div data-lang="scala"  markdown="1">
 
 {% highlight scala %}
-scala> val accum = sc.accumulator(0)
+scala> val accum = sc.accumulator(0, "My Accumulator")
 accum: spark.Accumulator[Int] = 0
 
 scala> sc.parallelize(Array(1, 2, 3, 4)).foreach(x => accum += x)
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 23313d8aa6152..6236de0e1f2c4 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -8,7 +8,7 @@ title: Quick Start
 
 This tutorial provides a quick introduction to using Spark. We will first introduce the API through Spark's
 interactive shell (in Python or Scala),
-then show how to write standalone applications in Java, Scala, and Python.
+then show how to write applications in Java, Scala, and Python.
 See the [programming guide](programming-guide.html) for a more complete reference.
 
 To follow along with this guide, first download a packaged release of Spark from the
@@ -215,8 +215,8 @@ a cluster, as described in the [programming guide](programming-guide.html#initia
 </div>
 </div>
 
-# Standalone Applications
-Now say we wanted to write a standalone application using the Spark API. We will walk through a
+# Self-Contained Applications
+Now say we wanted to write a self-contained application using the Spark API. We will walk through a
 simple application in both Scala (with SBT), Java (with Maven), and Python.
 
 <div class="codetabs">
@@ -387,7 +387,7 @@ Lines with a: 46, Lines with b: 23
 </div>
 <div data-lang="python" markdown="1">
 
-Now we will show how to write a standalone application using the Python API (PySpark).
+Now we will show how to write an application using the Python API (PySpark).
 
 As an example, we'll create a simple Spark application, `SimpleApp.py`:
 
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index bd046cfc1837d..1073abb202c56 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -107,7 +107,7 @@ cluster, or `mesos://zk://host:2181` for a multi-master Mesos cluster using ZooK
 
 The driver also needs some configuration in `spark-env.sh` to interact properly with Mesos:
 
-1. In `spark.env.sh` set some environment variables:
+1. In `spark-env.sh` set some environment variables:
  * `export MESOS_NATIVE_LIBRARY=<path to libmesos.so>`. This path is typically
    `<prefix>/lib/libmesos.so` where the prefix is `/usr/local` by default. See Mesos installation
    instructions above. On Mac OS X, the library is called `libmesos.dylib` instead of
@@ -165,6 +165,8 @@ acquire. By default, it will acquire *all* cores in the cluster (that get offere
 only makes sense if you run just one application at a time. You can cap the maximum number of cores
 using `conf.set("spark.cores.max", "10")` (for example).
 
+# Known issues
+- When using the "fine-grained" mode, make sure that your executors always leave 32 MB free on the slaves. Otherwise it can happen that your Spark job does not proceed anymore. Currently, Apache Mesos only offers resources if there are at least 32 MB memory allocatable. But as Spark allocates memory only for the executor and cpu only for tasks, it can happen on high slave memory usage that no new tasks will be started anymore. More details can be found in [MESOS-1688](https://issues.apache.org/jira/browse/MESOS-1688). Alternatively use the "coarse-gained" mode, which is not affected by this issue.
 
 # Running Alongside Hadoop
 
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 5d8d603aa3e37..dfe2db4b3fce8 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -4,23 +4,19 @@ title: Running Spark on YARN
 ---
 
 Support for running on [YARN (Hadoop
-NextGen)](http://hadoop.apache.org/docs/r2.0.2-alpha/hadoop-yarn/hadoop-yarn-site/YARN.html)
+NextGen)](http://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/YARN.html)
 was added to Spark in version 0.6.0, and improved in subsequent releases.
 
 # Preparations
 
 Running Spark-on-YARN requires a binary distribution of Spark which is built with YARN support.
 Binary distributions can be downloaded from the Spark project website. 
-To build Spark yourself, refer to the [building with Maven guide](building-with-maven.html).
+To build Spark yourself, refer to [Building Spark](building-spark.html).
 
 # Configuration
 
 Most of the configs are the same for Spark on YARN as for other deployment modes. See the [configuration page](configuration.html) for more information on those.  These are configs that are specific to Spark on YARN.
 
-#### Environment Variables
-
-* `SPARK_YARN_USER_ENV`, to add environment variables to the Spark processes launched on YARN. This can be a comma separated list of environment variables, e.g. `SPARK_YARN_USER_ENV="JAVA_HOME=/jdk64,FOO=bar"`.
-
 #### Spark Properties
 
 <table class="table">
@@ -43,7 +39,7 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
   <td><code>spark.yarn.preserve.staging.files</code></td>
   <td>false</td>
   <td>
-    Set to true to preserve the staged files (Spark jar, app jar, distributed cache files) at the end of the job rather then delete them.
+    Set to true to preserve the staged files (Spark jar, app jar, distributed cache files) at the end of the job rather than delete them.
   </td>
 </tr>
 <tr>
@@ -55,7 +51,7 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
 </tr>
 <tr>
   <td><code>spark.yarn.max.executor.failures</code></td>
-  <td>2*numExecutors</td>
+  <td>numExecutors * 2, with minimum of 3</td>
   <td>
     The maximum number of executor failures before failing the application.
   </td>
@@ -79,20 +75,20 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
   <td>(none)</td>
   <td>
     Comma-separated list of files to be placed in the working directory of each executor.
-  <td>
+  </td>
 </tr>
 <tr>
  <td><code>spark.yarn.executor.memoryOverhead</code></td>
-  <td>384</td>
+  <td>executorMemory * 0.07, with minimum of 384 </td>
   <td>
-    The amount of off heap memory (in megabytes) to be allocated per executor. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc.
+    The amount of off heap memory (in megabytes) to be allocated per executor. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the executor size (typically 6-10%).
   </td>
 </tr>
 <tr>
   <td><code>spark.yarn.driver.memoryOverhead</code></td>
-  <td>384</td>
+  <td>driverMemory * 0.07, with minimum of 384 </td>
   <td>
-    The amount of off heap memory (in megabytes) to be allocated per driver. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc.
+    The amount of off heap memory (in megabytes) to be allocated per driver. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the container size (typically 6-10%).
   </td>
 </tr>
 <tr>
@@ -106,6 +102,36 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
     set this configuration to "hdfs:///some/path".
   </td>
 </tr>
+<tr>
+  <td><code>spark.yarn.access.namenodes</code></td>
+  <td>(none)</td>
+  <td>
+    A list of secure HDFS namenodes your Spark application is going to access. For 
+    example, `spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. 
+    The Spark application must have acess to the namenodes listed and Kerberos must 
+    be properly configured to be able to access them (either in the same realm or in 
+    a trusted realm). Spark acquires security tokens for each of the namenodes so that 
+    the Spark application can access those remote HDFS clusters.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.yarn.appMasterEnv.[EnvironmentVariableName]</code></td>
+  <td>(none)</td>
+  <td>
+     Add the environment variable specified by <code>EnvironmentVariableName</code> to the 
+     Application Master process launched on YARN. The user can specify multiple of 
+     these and to set multiple environment variables. In yarn-cluster mode this controls 
+     the environment of the SPARK driver and in yarn-client mode it only controls 
+     the environment of the executor launcher. 
+  </td>
+</tr>
+<tr>
+  <td><code>spark.yarn.containerLauncherMaxThreads</code></td>
+  <td>25</td>
+  <td>
+    The maximum number of threads to use in the application master for launching executor containers.
+  </td>
+</tr>
 </table>
 
 # Launching Spark on YARN
@@ -129,10 +155,11 @@ For example:
         --driver-memory 4g \
         --executor-memory 2g \
         --executor-cores 1 \
+        --queue thequeue \
         lib/spark-examples*.jar \
         10
 
-The above starts a YARN client program which starts the default Application Master. Then SparkPi will be run as a child thread of Application Master. The client will periodically poll the Application Master for status updates and display them in the console. The client will exit once your application has finished running.  Refer to the "Viewing Logs" section below for how to see driver and executor logs.
+The above starts a YARN client program which starts the default Application Master. Then SparkPi will be run as a child thread of Application Master. The client will periodically poll the Application Master for status updates and display them in the console. The client will exit once your application has finished running.  Refer to the "Debugging your Application" section below for how to see driver and executor logs.
 
 To launch a Spark application in yarn-client mode, do the same, but replace "yarn-cluster" with "yarn-client".  To run spark-shell:
 
@@ -154,7 +181,7 @@ In YARN terminology, executors and application masters run inside "containers".
 
     yarn logs -applicationId <app ID>
     
-will print out the contents of all log files from all containers from the given application.
+will print out the contents of all log files from all containers from the given application. You can also view the container log files directly in HDFS using the HDFS shell or API. The directory where they are located can be found by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and `yarn.nodemanager.remote-app-log-dir-suffix`).
 
 When log aggregation isn't turned on, logs are retained locally on each machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and installation. Viewing logs for a container requires going to the host that contains them and looking in this directory.  Subdirectories organize log files by application ID and container ID.
 
@@ -178,6 +205,8 @@ Note that for the first option, both executors and the application master will s
 log4j configuration, which may cause issues when they run on the same node (e.g. trying to write
 to the same log file).
 
+If you need a reference to the proper location to put log files in the YARN so that YARN can properly display and aggregate them, use "${spark.yarn.app.container.log.dir}" in your log4j.properties. For example, log4j.appender.file_appender.File=${spark.yarn.app.container.log.dir}/spark.log. For streaming application, configuring RollingFileAppender and setting file location to YARN's log directory will avoid disk overflow caused by large log file, and logs can be accessed using YARN's log utility.
+
 # Important notes
 
 - Before Hadoop 2.2, YARN does not support cores in container resource requests. Thus, when running against an earlier version, the numbers of cores given via command line arguments cannot be passed to YARN.  Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
diff --git a/docs/security.md b/docs/security.md
index 90ba678033b19..1e206a139fb72 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -8,13 +8,140 @@ Spark currently supports authentication via a shared secret. Authentication can
 * For Spark on [YARN](running-on-yarn.html) deployments, configuring `spark.authenticate` to `true` will automatically handle generating and distributing the shared secret. Each application will use a unique shared secret. 
 * For other types of Spark deployments, the Spark parameter `spark.authenticate.secret` should be configured on each of the nodes. This secret will be used by all the Master/Workers and applications.
 
-The Spark UI can also be secured by using [javax servlet filters](http://docs.oracle.com/javaee/6/api/javax/servlet/Filter.html) via the `spark.ui.filters` setting. A user may want to secure the UI if it has data that other users should not be allowed to see. The javax servlet filter specified by the user can authenticate the user and then once the user is logged in, Spark can compare that user versus the view ACLs to make sure they are authorized to view the UI. The configs `spark.ui.acls.enable` and `spark.ui.view.acls` control the behavior of the ACLs. Note that the user who started the application always has view access to the UI.
-On YARN, the Spark UI uses the standard YARN web application proxy mechanism and will authenticate via any installed Hadoop filters.
+## Web UI
+
+The Spark UI can also be secured by using [javax servlet filters](http://docs.oracle.com/javaee/6/api/javax/servlet/Filter.html) via the `spark.ui.filters` setting. A user may want to secure the UI if it has data that other users should not be allowed to see. The javax servlet filter specified by the user can authenticate the user and then once the user is logged in, Spark can compare that user versus the view ACLs to make sure they are authorized to view the UI. The configs `spark.acls.enable` and `spark.ui.view.acls` control the behavior of the ACLs. Note that the user who started the application always has view access to the UI.  On YARN, the Spark UI uses the standard YARN web application proxy mechanism and will authenticate via any installed Hadoop filters.
+
+Spark also supports modify ACLs to control who has access to modify a running Spark application.  This includes things like killing the application or a task. This is controlled by the configs `spark.acls.enable` and `spark.modify.acls`. Note that if you are authenticating the web UI, in order to use the kill button on the web UI it might be necessary to add the users in the modify acls to the view acls also. On YARN, the modify acls are passed in and control who has modify access via YARN interfaces.
+
+Spark allows for a set of administrators to be specified in the acls who always have view and modify permissions to all the applications. is controlled by the config `spark.admin.acls`. This is useful on a shared cluster where you might have administrators or support staff who help users debug applications.
+
+## Event Logging
 
 If your applications are using event logging, the directory where the event logs go (`spark.eventLog.dir`) should be manually created and have the proper permissions set on it. If you want those log files secured, the permissions should be set to `drwxrwxrwxt` for that directory. The owner of the directory should be the super user who is running the history server and the group permissions should be restricted to super user group. This will allow all users to write to the directory but will prevent unprivileged users from removing or renaming a file unless they own the file or directory. The event log files will be created by Spark with permissions such that only the user and group have read and write access.
 
-**IMPORTANT NOTE:** *The experimental Netty shuffle path (`spark.shuffle.use.netty`) is not secured, so do not use Netty for shuffles if running with authentication.*
+## Configuring Ports for Network Security
+
+Spark makes heavy use of the network, and some environments have strict requirements for using tight
+firewall settings.  Below are the primary ports that Spark uses for its communication and how to
+configure those ports.
+
+### Standalone mode only
+
+<table class="table">
+  <tr>
+    <th>From</th><th>To</th><th>Default Port</th><th>Purpose</th><th>Configuration
+    Setting</th><th>Notes</th>
+  </tr>
+  <tr>
+    <td>Browser</td>
+    <td>Standalone Master</td>
+    <td>8080</td>
+    <td>Web UI</td>
+    <td><code>spark.master.ui.port /<br> SPARK_MASTER_WEBUI_PORT</code></td>
+    <td>Jetty-based. Standalone mode only.</td>
+  </tr>
+  <tr>
+    <td>Browser</td>
+    <td>Standalone Worker</td>
+    <td>8081</td>
+    <td>Web UI</td>
+    <td><code>spark.worker.ui.port /<br> SPARK_WORKER_WEBUI_PORT</code></td>
+    <td>Jetty-based. Standalone mode only.</td>
+  </tr>
+  <tr>
+    <td>Driver /<br> Standalone Worker</td>
+    <td>Standalone Master</td>
+    <td>7077</td>
+    <td>Submit job to cluster /<br> Join cluster</td>
+    <td><code>SPARK_MASTER_PORT</code></td>
+    <td>Akka-based. Set to "0" to choose a port randomly. Standalone mode only.</td>
+  </tr>
+  <tr>
+    <td>Standalone Master</td>
+    <td>Standalone Worker</td>
+    <td>(random)</td>
+    <td>Schedule executors</td>
+    <td><code>SPARK_WORKER_PORT</code></td>
+    <td>Akka-based. Set to "0" to choose a port randomly. Standalone mode only.</td>
+  </tr>
+</table>
+
+### All cluster managers
+
+<table class="table">
+  <tr>
+    <th>From</th><th>To</th><th>Default Port</th><th>Purpose</th><th>Configuration
+    Setting</th><th>Notes</th>
+  </tr>
+  <tr>
+    <td>Browser</td>
+    <td>Application</td>
+    <td>4040</td>
+    <td>Web UI</td>
+    <td><code>spark.ui.port</code></td>
+    <td>Jetty-based</td>
+  </tr>
+  <tr>
+    <td>Browser</td>
+    <td>History Server</td>
+    <td>18080</td>
+    <td>Web UI</td>
+    <td><code>spark.history.ui.port</code></td>
+    <td>Jetty-based</td>
+  </tr>
+  <tr>
+    <td>Executor /<br> Standalone Master</td>
+    <td>Driver</td>
+    <td>(random)</td>
+    <td>Connect to application /<br> Notify executor state changes</td>
+    <td><code>spark.driver.port</code></td>
+    <td>Akka-based. Set to "0" to choose a port randomly.</td>
+  </tr>
+  <tr>
+    <td>Driver</td>
+    <td>Executor</td>
+    <td>(random)</td>
+    <td>Schedule tasks</td>
+    <td><code>spark.executor.port</code></td>
+    <td>Akka-based. Set to "0" to choose a port randomly.</td>
+  </tr>
+  <tr>
+    <td>Executor</td>
+    <td>Driver</td>
+    <td>(random)</td>
+    <td>File server for files and jars</td>
+    <td><code>spark.fileserver.port</code></td>
+    <td>Jetty-based</td>
+  </tr>
+  <tr>
+    <td>Executor</td>
+    <td>Driver</td>
+    <td>(random)</td>
+    <td>HTTP Broadcast</td>
+    <td><code>spark.broadcast.port</code></td>
+    <td>Jetty-based. Not used by TorrentBroadcast, which sends data through the block manager
+    instead.</td>
+  </tr>
+  <tr>
+    <td>Executor</td>
+    <td>Driver</td>
+    <td>(random)</td>
+    <td>Class file server</td>
+    <td><code>spark.replClassServer.port</code></td>
+    <td>Jetty-based. Only used in Spark shells.</td>
+  </tr>
+  <tr>
+    <td>Executor / Driver</td>
+    <td>Executor / Driver</td>
+    <td>(random)</td>
+    <td>Block Manager port</td>
+    <td><code>spark.blockManager.port</code></td>
+    <td>Raw socket via ServerSocketChannel</td>
+  </tr>
+</table>
 
-See the [configuration page](configuration.html) for more details on the security configuration parameters.
 
-See <a href="{{site.SPARK_GITHUB_URL}}/tree/master/core/src/main/scala/org/apache/spark/SecurityManager.scala"><code>org.apache.spark.SecurityManager</code></a> for implementation details about security.
+See the [configuration page](configuration.html) for more details on the security configuration
+parameters, and <a href="{{site.SPARK_GITHUB_URL}}/tree/master/core/src/main/scala/org/apache/spark/SecurityManager.scala">
+<code>org.apache.spark.SecurityManager</code></a> for implementation details about security.
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index f5c0f7cef83d2..a3028aa86dc45 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -62,7 +62,12 @@ Finally, the following configuration options can be passed to the master and wor
 
 # Cluster Launch Scripts
 
-To launch a Spark standalone cluster with the launch scripts, you need to create a file called `conf/slaves` in your Spark directory, which should contain the hostnames of all the machines where you would like to start Spark workers, one per line. The master machine must be able to access each of the slave machines via password-less `ssh` (using a private key). For testing, you can just put `localhost` in this file.
+To launch a Spark standalone cluster with the launch scripts, you should create a file called conf/slaves in your Spark directory,
+which must contain the hostnames of all the machines where you intend to start Spark workers, one per line.
+If conf/slaves does not exist, the launch scripts defaults to a single machine (localhost), which is useful for testing.
+Note, the master machine accesses each of the worker machines via ssh. By default, ssh is run in parallel and requires password-less (using a private key) access to be setup.
+If you do not have a password-less setup, you can set the environment variable SPARK_SSH_FOREGROUND and serially provide a password for each worker.
+
 
 Once you've set up this file, you can launch or stop your cluster with the following shell scripts, based on Hadoop's deploy scripts, and available in `SPARK_HOME/bin`:
 
@@ -156,6 +161,20 @@ SPARK_MASTER_OPTS supports the following system properties:
 
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.deploy.retainedApplications</code></td>
+  <td>200</td>
+  <td>
+    The maximum number of completed applications to display. Older applications will be dropped from the UI to maintain this limit.<br/>
+  </td>
+</tr>
+<tr>
+  <td><code>spark.deploy.retainedDrivers</code></td>
+  <td>200</td>
+  <td>
+   The maximum number of completed drivers to display. Older drivers will be dropped from the UI to maintain this limit.<br/>
+  </td>
+</tr>
 <tr>
   <td><code>spark.deploy.spreadOut</code></td>
   <td>true</td>
@@ -228,17 +247,16 @@ To run an interactive Spark shell against the cluster, run the following command
 
     ./bin/spark-shell --master spark://IP:PORT
 
-Note that if you are running spark-shell from one of the spark cluster machines, the `bin/spark-shell` script will
-automatically set MASTER from the `SPARK_MASTER_IP` and `SPARK_MASTER_PORT` variables in `conf/spark-env.sh`.
-
-You can also pass an option `--cores <numCores>` to control the number of cores that spark-shell uses on the cluster.
+You can also pass an option `--total-executor-cores <numCores>` to control the number of cores that spark-shell uses on the cluster.
 
 # Launching Compiled Spark Applications
 
 The [`spark-submit` script](submitting-applications.html) provides the most straightforward way to
 submit a compiled Spark application to the cluster. For standalone clusters, Spark currently
-only supports deploying the driver inside the client process that is submitting the application
-(`client` deploy mode).
+supports two deploy modes. In `client` mode, the driver is launched in the same process as the
+client that submits the application. In `cluster` mode, however, the driver is launched from one
+of the Worker processes inside the cluster, and the client process exits as soon as it fulfills
+its responsibility of submitting the application without waiting for the application to finish.
 
 If your application is launched through Spark submit, then the application jar is automatically
 distributed to all worker nodes. For any additional jars that your application depends on, you
@@ -288,91 +306,9 @@ You can run Spark alongside your existing Hadoop cluster by just launching it as
 
 # Configuring Ports for Network Security
 
-Spark makes heavy use of the network, and some environments have strict requirements for using tight
-firewall settings.  Below are the primary ports that Spark uses for its communication and how to
-configure those ports.
-
-<table class="table">
-  <tr>
-    <th>From</th><th>To</th><th>Default Port</th><th>Purpose</th><th>Configuration
-    Setting</th><th>Notes</th>
-  </tr>
-  <!-- Web UIs -->
-  <tr>
-    <td>Browser</td>
-    <td>Standalone Cluster Master</td>
-    <td>8080</td>
-    <td>Web UI</td>
-    <td><code>master.ui.port</code></td>
-    <td>Jetty-based</td>
-  </tr>
-  <tr>
-    <td>Browser</td>
-    <td>Driver</td>
-    <td>4040</td>
-    <td>Web UI</td>
-    <td><code>spark.ui.port</code></td>
-    <td>Jetty-based</td>
-  </tr>
-  <tr>
-    <td>Browser</td>
-    <td>History Server</td>
-    <td>18080</td>
-    <td>Web UI</td>
-    <td><code>spark.history.ui.port</code></td>
-    <td>Jetty-based</td>
-  </tr>
-  <tr>
-    <td>Browser</td>
-    <td>Worker</td>
-    <td>8081</td>
-    <td>Web UI</td>
-    <td><code>worker.ui.port</code></td>
-    <td>Jetty-based</td>
-  </tr>
-  <!-- Cluster interactions -->
-  <tr>
-    <td>Application</td>
-    <td>Standalone Cluster Master</td>
-    <td>7077</td>
-    <td>Submit job to cluster</td>
-    <td><code>spark.driver.port</code></td>
-    <td>Akka-based.  Set to "0" to choose a port randomly</td>
-  </tr>
-  <tr>
-    <td>Worker</td>
-    <td>Standalone Cluster Master</td>
-    <td>7077</td>
-    <td>Join cluster</td>
-    <td><code>spark.driver.port</code></td>
-    <td>Akka-based.  Set to "0" to choose a port randomly</td>
-  </tr>
-  <tr>
-    <td>Application</td>
-    <td>Worker</td>
-    <td>(random)</td>
-    <td>Join cluster</td>
-    <td><code>SPARK_WORKER_PORT</code> (standalone cluster)</td>
-    <td>Akka-based</td>
-  </tr>
-
-  <!-- Other misc stuff -->
-  <tr>
-    <td>Driver and other Workers</td>
-    <td>Worker</td>
-    <td>(random)</td>
-    <td>
-      <ul>
-        <li>File server for file and jars</li>
-        <li>Http Broadcast</li>
-        <li>Class file server (Spark Shell only)</li>
-      </ul>
-    </td>
-    <td>None</td>
-    <td>Jetty-based.  Each of these services starts on a random port that cannot be configured</td>
-  </tr>
-
-</table>
+Spark makes heavy use of the network, and some environments have strict requirements for using
+tight firewall settings. For a complete list of ports to configure, see the
+[security page](security.html#configuring-ports-for-network-security).
 
 # High Availability
 
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 522c83884ef42..48e8267ac072c 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -13,10 +13,10 @@ title: Spark SQL Programming Guide
 
 Spark SQL allows relational queries expressed in SQL, HiveQL, or Scala to be executed using
 Spark.  At the core of this component is a new type of RDD,
-[SchemaRDD](api/scala/index.html#org.apache.spark.sql.SchemaRDD).  SchemaRDDs are composed
-[Row](api/scala/index.html#org.apache.spark.sql.catalyst.expressions.Row) objects along with
+[SchemaRDD](api/scala/index.html#org.apache.spark.sql.SchemaRDD).  SchemaRDDs are composed of
+[Row](api/scala/index.html#org.apache.spark.sql.catalyst.expressions.Row) objects, along with
 a schema that describes the data types of each column in the row.  A SchemaRDD is similar to a table
-in a traditional relational database.  A SchemaRDD can be created from an existing RDD, [Parquet](http://parquet.io)
+in a traditional relational database.  A SchemaRDD can be created from an existing RDD, a [Parquet](http://parquet.io)
 file, a JSON dataset, or by running HiveQL against data stored in [Apache Hive](http://hive.apache.org/).
 
 All of the examples on this page use sample data included in the Spark distribution and can be run in the `spark-shell`.
@@ -26,10 +26,10 @@ All of the examples on this page use sample data included in the Spark distribut
 <div data-lang="java"  markdown="1">
 Spark SQL allows relational queries expressed in SQL or HiveQL to be executed using
 Spark.  At the core of this component is a new type of RDD,
-[JavaSchemaRDD](api/scala/index.html#org.apache.spark.sql.api.java.JavaSchemaRDD).  JavaSchemaRDDs are composed
-[Row](api/scala/index.html#org.apache.spark.sql.api.java.Row) objects along with
+[JavaSchemaRDD](api/scala/index.html#org.apache.spark.sql.api.java.JavaSchemaRDD).  JavaSchemaRDDs are composed of
+[Row](api/scala/index.html#org.apache.spark.sql.api.java.Row) objects, along with
 a schema that describes the data types of each column in the row.  A JavaSchemaRDD is similar to a table
-in a traditional relational database.  A JavaSchemaRDD can be created from an existing RDD, [Parquet](http://parquet.io)
+in a traditional relational database.  A JavaSchemaRDD can be created from an existing RDD, a [Parquet](http://parquet.io)
 file, a JSON dataset, or by running HiveQL against data stored in [Apache Hive](http://hive.apache.org/).
 </div>
 
@@ -37,10 +37,10 @@ file, a JSON dataset, or by running HiveQL against data stored in [Apache Hive](
 
 Spark SQL allows relational queries expressed in SQL or HiveQL to be executed using
 Spark.  At the core of this component is a new type of RDD,
-[SchemaRDD](api/python/pyspark.sql.SchemaRDD-class.html).  SchemaRDDs are composed
-[Row](api/python/pyspark.sql.Row-class.html) objects along with
+[SchemaRDD](api/python/pyspark.sql.SchemaRDD-class.html).  SchemaRDDs are composed of
+[Row](api/python/pyspark.sql.Row-class.html) objects, along with
 a schema that describes the data types of each column in the row.  A SchemaRDD is similar to a table
-in a traditional relational database.  A SchemaRDD can be created from an existing RDD, [Parquet](http://parquet.io)
+in a traditional relational database.  A SchemaRDD can be created from an existing RDD, a [Parquet](http://parquet.io)
 file, a JSON dataset, or by running HiveQL against data stored in [Apache Hive](http://hive.apache.org/).
 
 All of the examples on this page use sample data included in the Spark distribution and can be run in the `pyspark` shell.
@@ -68,6 +68,16 @@ val sqlContext = new org.apache.spark.sql.SQLContext(sc)
 import sqlContext.createSchemaRDD
 {% endhighlight %}
 
+In addition to the basic SQLContext, you can also create a HiveContext, which provides a
+superset of the functionality provided by the basic SQLContext. Additional features include
+the ability to write queries using the more complete HiveQL parser, access to HiveUDFs, and the
+ability to read data from Hive tables.  To use a HiveContext, you do not need to have an
+existing Hive setup, and all of the data sources available to a SQLContext are still available.
+HiveContext is only packaged separately to avoid including all of Hive's dependencies in the default
+Spark build.  If these dependencies are not a problem for your application then using HiveContext
+is recommended for the 1.2 release of Spark.  Future releases will focus on bringing SQLContext up to
+feature parity with a HiveContext.
+
 </div>
 
 <div data-lang="java" markdown="1">
@@ -81,6 +91,16 @@ JavaSparkContext sc = ...; // An existing JavaSparkContext.
 JavaSQLContext sqlContext = new org.apache.spark.sql.api.java.JavaSQLContext(sc);
 {% endhighlight %}
 
+In addition to the basic SQLContext, you can also create a HiveContext, which provides a strict
+super set of the functionality provided by the basic SQLContext. Additional features include
+the ability to write queries using the more complete HiveQL parser, access to HiveUDFs, and the
+ability to read data from Hive tables.  To use a HiveContext, you do not need to have an
+existing Hive setup, and all of the data sources available to a SQLContext are still available.
+HiveContext is only packaged separately to avoid including all of Hive's dependencies in the default
+Spark build.  If these dependencies are not a problem for your application then using HiveContext
+is recommended for the 1.2 release of Spark.  Future releases will focus on bringing SQLContext up to
+feature parity with a HiveContext.
+
 </div>
 
 <div data-lang="python"  markdown="1">
@@ -94,36 +114,52 @@ from pyspark.sql import SQLContext
 sqlContext = SQLContext(sc)
 {% endhighlight %}
 
+In addition to the basic SQLContext, you can also create a HiveContext, which provides a strict
+super set of the functionality provided by the basic SQLContext. Additional features include
+the ability to write queries using the more complete HiveQL parser, access to HiveUDFs, and the
+ability to read data from Hive tables.  To use a HiveContext, you do not need to have an
+existing Hive setup, and all of the data sources available to a SQLContext are still available.
+HiveContext is only packaged separately to avoid including all of Hive's dependencies in the default
+Spark build.  If these dependencies are not a problem for your application then using HiveContext
+is recommended for the 1.2 release of Spark.  Future releases will focus on bringing SQLContext up to
+feature parity with a HiveContext.
+
 </div>
 
 </div>
 
+The specific variant of SQL that is used to parse queries can also be selected using the
+`spark.sql.dialect` option.  This parameter can be changed using either the `setConf` method on
+a SQLContext or by using a `SET key=value` command in SQL.  For a SQLContext, the only dialect
+available is "sql" which uses a simple SQL parser provided by Spark SQL.  In a HiveContext, the
+default is "hiveql", though "sql" is also available.  Since the HiveQL parser is much more complete,
+ this is recommended for most use cases.
+
 # Data Sources
 
-<div class="codetabs">
-<div data-lang="scala"  markdown="1">
 Spark SQL supports operating on a variety of data sources through the `SchemaRDD` interface.
-Once a dataset has been loaded, it can be registered as a table and even joined with data from other sources.
-</div>
+A SchemaRDD can be operated on as normal RDDs and can also be registered as a temporary table.
+Registering a SchemaRDD as a table allows you to run SQL queries over its data.  This section
+describes the various methods for loading data into a SchemaRDD.
 
-<div data-lang="java"  markdown="1">
-Spark SQL supports operating on a variety of data sources through the `JavaSchemaRDD` interface.
-Once a dataset has been loaded, it can be registered as a table and even joined with data from other sources.
-</div>
+## RDDs
 
-<div data-lang="python"  markdown="1">
-Spark SQL supports operating on a variety of data sources through the `SchemaRDD` interface.
-Once a dataset has been loaded, it can be registered as a table and even joined with data from other sources.
-</div>
-</div>
+Spark SQL supports two different methods for converting existing RDDs into SchemaRDDs.  The first
+method uses reflection to infer the schema of an RDD that contains specific types of objects.  This
+reflection based approach leads to more concise code and works well when you already know the schema 
+while writing your Spark application.
 
-## RDDs
+The second method for creating SchemaRDDs is through a programmatic interface that allows you to
+construct a schema and then apply it to an existing RDD.  While this method is more verbose, it allows
+you to construct SchemaRDDs when the columns and their types are not known until runtime.
 
+### Inferring the Schema Using Reflection
 <div class="codetabs">
 
 <div data-lang="scala"  markdown="1">
 
-One type of table that is supported by Spark SQL is an RDD of Scala case classes.  The case class
+The Scala interaface for Spark SQL supports automatically converting an RDD containing case classes
+to a SchemaRDD.  The case class
 defines the schema of the table.  The names of the arguments to the case class are read using
 reflection and become the names of the columns. Case classes can also be nested or contain complex
 types such as Sequences or Arrays. This RDD can be implicitly converted to a SchemaRDD and then be
@@ -136,13 +172,13 @@ val sqlContext = new org.apache.spark.sql.SQLContext(sc)
 import sqlContext.createSchemaRDD
 
 // Define the schema using a case class.
-// Note: Case classes in Scala 2.10 can support only up to 22 fields. To work around this limit, 
+// Note: Case classes in Scala 2.10 can support only up to 22 fields. To work around this limit,
 // you can use custom classes that implement the Product interface.
 case class Person(name: String, age: Int)
 
 // Create an RDD of Person objects and register it as a table.
 val people = sc.textFile("examples/src/main/resources/people.txt").map(_.split(",")).map(p => Person(p(0), p(1).trim.toInt))
-people.registerAsTable("people")
+people.registerTempTable("people")
 
 // SQL statements can be run by using the sql methods provided by sqlContext.
 val teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
@@ -156,8 +192,9 @@ teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
 
 <div data-lang="java"  markdown="1">
 
-One type of table that is supported by Spark SQL is an RDD of [JavaBeans](http://stackoverflow.com/questions/3295496/what-is-a-javabean-exactly).  The BeanInfo
-defines the schema of the table. Currently, Spark SQL does not support JavaBeans that contain
+Spark SQL supports automatically converting an RDD of [JavaBeans](http://stackoverflow.com/questions/3295496/what-is-a-javabean-exactly)
+into a Schema RDD.  The BeanInfo, obtained using reflection, defines the schema of the table.
+Currently, Spark SQL does not support JavaBeans that contain
 nested or contain complex types such as Lists or Arrays.  You can create a JavaBean by creating a
 class that implements Serializable and has getters and setters for all of its fields.
 
@@ -192,7 +229,7 @@ for the JavaBean.
 
 {% highlight java %}
 // sc is an existing JavaSparkContext.
-JavaSQLContext sqlContext = new org.apache.spark.sql.api.java.JavaSQLContext(sc)
+JavaSQLContext sqlContext = new org.apache.spark.sql.api.java.JavaSQLContext(sc);
 
 // Load a text file and convert each line to a JavaBean.
 JavaRDD<Person> people = sc.textFile("examples/src/main/resources/people.txt").map(
@@ -210,7 +247,7 @@ JavaRDD<Person> people = sc.textFile("examples/src/main/resources/people.txt").m
 
 // Apply a schema to an RDD of JavaBeans and register it as a table.
 JavaSchemaRDD schemaPeople = sqlContext.applySchema(people, Person.class);
-schemaPeople.registerAsTable("people");
+schemaPeople.registerTempTable("people");
 
 // SQL can be run over RDDs that have been registered as tables.
 JavaSchemaRDD teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
@@ -229,26 +266,26 @@ List<String> teenagerNames = teenagers.map(new Function<Row, String>() {
 
 <div data-lang="python"  markdown="1">
 
-One type of table that is supported by Spark SQL is an RDD of dictionaries.  The keys of the
-dictionary define the columns names of the table, and the types are inferred by looking at the first
-row. Any RDD of dictionaries can converted to a SchemaRDD and then registered as a table.  Tables
-can be used in subsequent SQL statements.
+Spark SQL can convert an RDD of Row objects to a SchemaRDD, inferring the datatypes.  Rows are constructed by passing a list of
+key/value pairs as kwargs to the Row class. The keys of this list define the column names of the table,
+and the types are inferred by looking at the first row.  Since we currently only look at the first
+row, it is important that there is no missing data in the first row of the RDD. In future versions we
+plan to more completely infer the schema by looking at more data, similar to the inference that is
+performed on JSON files.
 
 {% highlight python %}
 # sc is an existing SparkContext.
-from pyspark.sql import SQLContext
+from pyspark.sql import SQLContext, Row
 sqlContext = SQLContext(sc)
 
 # Load a text file and convert each line to a dictionary.
 lines = sc.textFile("examples/src/main/resources/people.txt")
 parts = lines.map(lambda l: l.split(","))
-people = parts.map(lambda p: {"name": p[0], "age": int(p[1])})
+people = parts.map(lambda p: Row(name=p[0], age=int(p[1])))
 
 # Infer the schema, and register the SchemaRDD as a table.
-# In future versions of PySpark we would like to add support for registering RDDs with other
-# datatypes as tables
 schemaPeople = sqlContext.inferSchema(people)
-schemaPeople.registerAsTable("people")
+schemaPeople.registerTempTable("people")
 
 # SQL can be run over SchemaRDDs that have been registered as a table.
 teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
@@ -263,15 +300,191 @@ for teenName in teenNames.collect():
 
 </div>
 
-**Note that Spark SQL currently uses a very basic SQL parser.**
-Users that want a more complete dialect of SQL should look at the HiveQL support provided by
-`HiveContext`.
+### Programmatically Specifying the Schema
+
+<div class="codetabs">
+
+<div data-lang="scala"  markdown="1">
+
+When case classes cannot be defined ahead of time (for example,
+the structure of records is encoded in a string, or a text dataset will be parsed
+and fields will be projected differently for different users),
+a `SchemaRDD` can be created programmatically with three steps.
+
+1. Create an RDD of `Row`s from the original RDD;
+2. Create the schema represented by a `StructType` matching the structure of
+`Row`s in the RDD created in Step 1.
+3. Apply the schema to the RDD of `Row`s via `applySchema` method provided
+by `SQLContext`.
+
+For example:
+{% highlight scala %}
+// sc is an existing SparkContext.
+val sqlContext = new org.apache.spark.sql.SQLContext(sc)
+
+// Create an RDD
+val people = sc.textFile("examples/src/main/resources/people.txt")
+
+// The schema is encoded in a string
+val schemaString = "name age"
+
+// Import Spark SQL data types and Row.
+import org.apache.spark.sql._
+
+// Generate the schema based on the string of schema
+val schema =
+  StructType(
+    schemaString.split(" ").map(fieldName => StructField(fieldName, StringType, true)))
+
+// Convert records of the RDD (people) to Rows.
+val rowRDD = people.map(_.split(",")).map(p => Row(p(0), p(1).trim))
+
+// Apply the schema to the RDD.
+val peopleSchemaRDD = sqlContext.applySchema(rowRDD, schema)
+
+// Register the SchemaRDD as a table.
+peopleSchemaRDD.registerTempTable("people")
+
+// SQL statements can be run by using the sql methods provided by sqlContext.
+val results = sqlContext.sql("SELECT name FROM people")
+
+// The results of SQL queries are SchemaRDDs and support all the normal RDD operations.
+// The columns of a row in the result can be accessed by ordinal.
+results.map(t => "Name: " + t(0)).collect().foreach(println)
+{% endhighlight %}
+
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+When JavaBean classes cannot be defined ahead of time (for example,
+the structure of records is encoded in a string, or a text dataset will be parsed and
+fields will be projected differently for different users),
+a `SchemaRDD` can be created programmatically with three steps.
+
+1. Create an RDD of `Row`s from the original RDD;
+2. Create the schema represented by a `StructType` matching the structure of
+`Row`s in the RDD created in Step 1.
+3. Apply the schema to the RDD of `Row`s via `applySchema` method provided
+by `JavaSQLContext`.
+
+For example:
+{% highlight java %}
+// Import factory methods provided by DataType.
+import org.apache.spark.sql.api.java.DataType
+// Import StructType and StructField
+import org.apache.spark.sql.api.java.StructType
+import org.apache.spark.sql.api.java.StructField
+// Import Row.
+import org.apache.spark.sql.api.java.Row
+
+// sc is an existing JavaSparkContext.
+JavaSQLContext sqlContext = new org.apache.spark.sql.api.java.JavaSQLContext(sc);
+
+// Load a text file and convert each line to a JavaBean.
+JavaRDD<String> people = sc.textFile("examples/src/main/resources/people.txt");
+
+// The schema is encoded in a string
+String schemaString = "name age";
+
+// Generate the schema based on the string of schema
+List<StructField> fields = new ArrayList<StructField>();
+for (String fieldName: schemaString.split(" ")) {
+  fields.add(DataType.createStructField(fieldName, DataType.StringType, true));
+}
+StructType schema = DataType.createStructType(fields);
+
+// Convert records of the RDD (people) to Rows.
+JavaRDD<Row> rowRDD = people.map(
+  new Function<String, Row>() {
+    public Row call(String record) throws Exception {
+      String[] fields = record.split(",");
+      return Row.create(fields[0], fields[1].trim());
+    }
+  });
+
+// Apply the schema to the RDD.
+JavaSchemaRDD peopleSchemaRDD = sqlContext.applySchema(rowRDD, schema);
+
+// Register the SchemaRDD as a table.
+peopleSchemaRDD.registerTempTable("people");
+
+// SQL can be run over RDDs that have been registered as tables.
+JavaSchemaRDD results = sqlContext.sql("SELECT name FROM people");
+
+// The results of SQL queries are SchemaRDDs and support all the normal RDD operations.
+// The columns of a row in the result can be accessed by ordinal.
+List<String> names = results.map(new Function<Row, String>() {
+  public String call(Row row) {
+    return "Name: " + row.getString(0);
+  }
+}).collect();
+
+{% endhighlight %}
+
+</div>
+
+<div data-lang="python"  markdown="1">
+
+When a dictionary of kwargs cannot be defined ahead of time (for example,
+the structure of records is encoded in a string, or a text dataset will be parsed and
+fields will be projected differently for different users),
+a `SchemaRDD` can be created programmatically with three steps.
+
+1. Create an RDD of tuples or lists from the original RDD;
+2. Create the schema represented by a `StructType` matching the structure of
+tuples or lists in the RDD created in the step 1.
+3. Apply the schema to the RDD via `applySchema` method provided by `SQLContext`.
+
+For example:
+{% highlight python %}
+# Import SQLContext and data types
+from pyspark.sql import *
+
+# sc is an existing SparkContext.
+sqlContext = SQLContext(sc)
+
+# Load a text file and convert each line to a tuple.
+lines = sc.textFile("examples/src/main/resources/people.txt")
+parts = lines.map(lambda l: l.split(","))
+people = parts.map(lambda p: (p[0], p[1].strip()))
+
+# The schema is encoded in a string.
+schemaString = "name age"
+
+fields = [StructField(field_name, StringType(), True) for field_name in schemaString.split()]
+schema = StructType(fields)
+
+# Apply the schema to the RDD.
+schemaPeople = sqlContext.applySchema(people, schema)
+
+# Register the SchemaRDD as a table.
+schemaPeople.registerTempTable("people")
+
+# SQL can be run over SchemaRDDs that have been registered as a table.
+results = sqlContext.sql("SELECT name FROM people")
+
+# The results of SQL queries are RDDs and support all the normal RDD operations.
+names = results.map(lambda p: "Name: " + p.name)
+for name in names.collect():
+  print name
+{% endhighlight %}
+
+
+</div>
+
+</div>
 
 ## Parquet Files
 
 [Parquet](http://parquet.io) is a columnar format that is supported by many other data processing systems.
 Spark SQL provides support for both reading and writing Parquet files that automatically preserves the schema
-of the original data.  Using the data from the above example:
+of the original data.
+
+### Loading Data Programmatically
+
+Using the data from the above example:
 
 <div class="codetabs">
 
@@ -292,7 +505,7 @@ people.saveAsParquetFile("people.parquet")
 val parquetFile = sqlContext.parquetFile("people.parquet")
 
 //Parquet files can also be registered as tables and then used in SQL statements.
-parquetFile.registerAsTable("parquetFile")
+parquetFile.registerTempTable("parquetFile")
 val teenagers = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
 teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
 {% endhighlight %}
@@ -314,7 +527,7 @@ schemaPeople.saveAsParquetFile("people.parquet");
 JavaSchemaRDD parquetFile = sqlContext.parquetFile("people.parquet");
 
 //Parquet files can also be registered as tables and then used in SQL statements.
-parquetFile.registerAsTable("parquetFile");
+parquetFile.registerTempTable("parquetFile");
 JavaSchemaRDD teenagers = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
 List<String> teenagerNames = teenagers.map(new Function<Row, String>() {
   public String call(Row row) {
@@ -340,7 +553,7 @@ schemaPeople.saveAsParquetFile("people.parquet")
 parquetFile = sqlContext.parquetFile("people.parquet")
 
 # Parquet files can also be registered as tables and then used in SQL statements.
-parquetFile.registerAsTable("parquetFile");
+parquetFile.registerTempTable("parquetFile");
 teenagers = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
 teenNames = teenagers.map(lambda p: "Name: " + p.name)
 for teenName in teenNames.collect():
@@ -351,6 +564,47 @@ for teenName in teenNames.collect():
 
 </div>
 
+### Configuration
+
+Configuration of Parquet can be done using the `setConf` method on SQLContext or by running 
+`SET key=value` commands using SQL.
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.sql.parquet.binaryAsString</code></td>
+  <td>false</td>
+  <td>
+    Some other Parquet-producing systems, in particular Impala and older versions of Spark SQL, do 
+    not differentiate between binary data and strings when writing out the Parquet schema.  This 
+    flag tells Spark SQL to interpret binary data as a string to provide compatibility with these systems.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.sql.parquet.cacheMetadata</code></td>
+  <td>true</td>
+  <td>
+    Turns on caching of Parquet schema metadata.  Can speed up querying of static data.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.sql.parquet.compression.codec</code></td>
+  <td>gzip</td>
+  <td>
+    Sets the compression codec use when writing Parquet files. Acceptable values include: 
+    uncompressed, snappy, gzip, lzo.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.sql.hive.convertMetastoreParquet</code></td>
+  <td>true</td>
+  <td>
+    When set to false, Spark SQL will use the Hive SerDe for parquet tables instead of the built in
+    support.
+  </td>
+</tr>
+</table>
+
 ## JSON Datasets
 <div class="codetabs">
 
@@ -359,7 +613,7 @@ Spark SQL can automatically infer the schema of a JSON dataset and load it as a
 This conversion can be done using one of two methods in a SQLContext:
 
 * `jsonFile` - loads data from a directory of JSON files where each line of the files is a JSON object.
-* `jsonRdd` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object.
+* `jsonRDD` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object.
 
 {% highlight scala %}
 // sc is an existing SparkContext.
@@ -374,11 +628,11 @@ val people = sqlContext.jsonFile(path)
 // The inferred schema can be visualized using the printSchema() method.
 people.printSchema()
 // root
-//  |-- age: IntegerType
-//  |-- name: StringType
+//  |-- age: integer (nullable = true)
+//  |-- name: string (nullable = true)
 
 // Register this SchemaRDD as a table.
-people.registerAsTable("people")
+people.registerTempTable("people")
 
 // SQL statements can be run by using the sql methods provided by sqlContext.
 val teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
@@ -397,7 +651,7 @@ Spark SQL can automatically infer the schema of a JSON dataset and load it as a
 This conversion can be done using one of two methods in a JavaSQLContext :
 
 * `jsonFile` - loads data from a directory of JSON files where each line of the files is a JSON object.
-* `jsonRdd` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object.
+* `jsonRDD` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object.
 
 {% highlight java %}
 // sc is an existing JavaSparkContext.
@@ -412,11 +666,11 @@ JavaSchemaRDD people = sqlContext.jsonFile(path);
 // The inferred schema can be visualized using the printSchema() method.
 people.printSchema();
 // root
-//  |-- age: IntegerType
-//  |-- name: StringType
+//  |-- age: integer (nullable = true)
+//  |-- name: string (nullable = true)
 
 // Register this JavaSchemaRDD as a table.
-people.registerAsTable("people");
+people.registerTempTable("people");
 
 // SQL statements can be run by using the sql methods provided by sqlContext.
 JavaSchemaRDD teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
@@ -435,7 +689,7 @@ Spark SQL can automatically infer the schema of a JSON dataset and load it as a
 This conversion can be done using one of two methods in a SQLContext:
 
 * `jsonFile` - loads data from a directory of JSON files where each line of the files is a JSON object.
-* `jsonRdd` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object.
+* `jsonRDD` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object.
 
 {% highlight python %}
 # sc is an existing SparkContext.
@@ -451,11 +705,11 @@ people = sqlContext.jsonFile(path)
 # The inferred schema can be visualized using the printSchema() method.
 people.printSchema()
 # root
-#  |-- age: IntegerType
-#  |-- name: StringType
+#  |-- age: integer (nullable = true)
+#  |-- name: string (nullable = true)
 
 # Register this SchemaRDD as a table.
-people.registerAsTable("people")
+people.registerTempTable("people")
 
 # SQL statements can be run by using the sql methods provided by sqlContext.
 teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
@@ -474,10 +728,10 @@ anotherPeople = sqlContext.jsonRDD(anotherPeopleRDD)
 
 Spark SQL also supports reading and writing data stored in [Apache Hive](http://hive.apache.org/).
 However, since Hive has a large number of dependencies, it is not included in the default Spark assembly.
-In order to use Hive you must first run '`SPARK_HIVE=true sbt/sbt assembly/assembly`' (or use `-Phive` for maven).
+Hive support is enabled by adding the `-Phive` and `-Phive-thriftserver` flags to Spark's build.
 This command builds a new assembly jar that includes Hive. Note that this Hive assembly jar must also be present
 on all of the worker nodes, as they will need access to the Hive serialization and deserialization libraries
-(SerDes) in order to acccess data stored in Hive.
+(SerDes) in order to access data stored in Hive.
 
 Configuration of Hive is done by placing your `hive-site.xml` file in `conf/`.
 
@@ -486,20 +740,20 @@ Configuration of Hive is done by placing your `hive-site.xml` file in `conf/`.
 <div data-lang="scala"  markdown="1">
 
 When working with Hive one must construct a `HiveContext`, which inherits from `SQLContext`, and
-adds support for finding tables in in the MetaStore and writing queries using HiveQL. Users who do
-not have an existing Hive deployment can also experiment with the `LocalHiveContext`,
-which is similar to `HiveContext`, but creates a local copy of the `metastore` and `warehouse`
-automatically.
+adds support for finding tables in the MetaStore and writing queries using HiveQL. Users who do
+not have an existing Hive deployment can still create a HiveContext.  When not configured by the
+hive-site.xml, the context automatically creates `metastore_db` and `warehouse` in the current
+directory.
 
 {% highlight scala %}
 // sc is an existing SparkContext.
-val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc)
+val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)
 
-hiveContext.hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
-hiveContext.hql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
+sqlContext.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+sqlContext.sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
 
 // Queries are expressed in HiveQL
-hiveContext.hql("FROM src SELECT key, value").collect().foreach(println)
+sqlContext.sql("FROM src SELECT key, value").collect().foreach(println)
 {% endhighlight %}
 
 </div>
@@ -507,19 +761,19 @@ hiveContext.hql("FROM src SELECT key, value").collect().foreach(println)
 <div data-lang="java"  markdown="1">
 
 When working with Hive one must construct a `JavaHiveContext`, which inherits from `JavaSQLContext`, and
-adds support for finding tables in in the MetaStore and writing queries using HiveQL. In addition to
+adds support for finding tables in the MetaStore and writing queries using HiveQL. In addition to
 the `sql` method a `JavaHiveContext` also provides an `hql` methods, which allows queries to be
 expressed in HiveQL.
 
 {% highlight java %}
 // sc is an existing JavaSparkContext.
-JavaHiveContext hiveContext = new org.apache.spark.sql.hive.api.java.HiveContext(sc);
+JavaHiveContext sqlContext = new org.apache.spark.sql.hive.api.java.HiveContext(sc);
 
-hiveContext.hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)");
-hiveContext.hql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src");
+sqlContext.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)");
+sqlContext.sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src");
 
 // Queries are expressed in HiveQL.
-Row[] results = hiveContext.hql("FROM src SELECT key, value").collect();
+Row[] results = sqlContext.sql("FROM src SELECT key, value").collect();
 
 {% endhighlight %}
 
@@ -528,30 +782,326 @@ Row[] results = hiveContext.hql("FROM src SELECT key, value").collect();
 <div data-lang="python"  markdown="1">
 
 When working with Hive one must construct a `HiveContext`, which inherits from `SQLContext`, and
-adds support for finding tables in in the MetaStore and writing queries using HiveQL. In addition to
+adds support for finding tables in the MetaStore and writing queries using HiveQL. In addition to
 the `sql` method a `HiveContext` also provides an `hql` methods, which allows queries to be
 expressed in HiveQL.
 
 {% highlight python %}
 # sc is an existing SparkContext.
 from pyspark.sql import HiveContext
-hiveContext = HiveContext(sc)
+sqlContext = HiveContext(sc)
 
-hiveContext.hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
-hiveContext.hql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
+sqlContext.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+sqlContext.sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
 
 # Queries can be expressed in HiveQL.
-results = hiveContext.hql("FROM src SELECT key, value").collect()
+results = sqlContext.sql("FROM src SELECT key, value").collect()
 
 {% endhighlight %}
 
 </div>
 </div>
 
+# Performance Tuning
+
+For some workloads it is possible to improve performance by either caching data in memory, or by
+turning on some experimental options.
+
+## Caching Data In Memory
+
+Spark SQL can cache tables using an in-memory columnar format by calling `sqlContext.cacheTable("tableName")`.
+Then Spark SQL will scan only required columns and will automatically tune compression to minimize
+memory usage and GC pressure. You can call `sqlContext.uncacheTable("tableName")` to remove the table from memory.
+
+Note that if you call `schemaRDD.cache()` rather than `sqlContext.cacheTable(...)`, tables will _not_ be cached using
+the in-memory columnar format, and therefore `sqlContext.cacheTable(...)` is strongly recommended for this use case.
+
+Configuration of in-memory caching can be done using the `setConf` method on SQLContext or by running
+`SET key=value` commands using SQL.
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.sql.inMemoryColumnarStorage.compressed</code></td>
+  <td>true</td>
+  <td>
+    When set to true Spark SQL will automatically select a compression codec for each column based
+    on statistics of the data.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.sql.inMemoryColumnarStorage.batchSize</code></td>
+  <td>10000</td>
+  <td>
+    Controls the size of batches for columnar caching.  Larger batch sizes can improve memory utilization
+    and compression, but risk OOMs when caching data.
+  </td>
+</tr>
+
+</table>
+
+## Other Configuration Options
+
+The following options can also be used to tune the performance of query execution.  It is possible
+that these options will be deprecated in future release as more optimizations are performed automatically.
+
+<table class="table">
+  <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+  <tr>
+    <td><code>spark.sql.autoBroadcastJoinThreshold</code></td>
+    <td>10485760 (10 MB)</td>
+    <td>
+      Configures the maximum size in bytes for a table that will be broadcast to all worker nodes when
+      performing a join.  By setting this value to -1 broadcasting can be disabled.  Note that currently
+      statistics are only supported for Hive Metastore tables where the command
+      `ANALYZE TABLE &lt;tableName&gt; COMPUTE STATISTICS noscan` has been run.
+    </td>
+  </tr>
+  <tr>
+    <td><code>spark.sql.codegen</code></td>
+    <td>false</td>
+    <td>
+      When true, code will be dynamically generated at runtime for expression evaluation in a specific
+      query.  For some queries with complicated expression this option can lead to significant speed-ups.
+      However, for simple queries this can actually slow down query execution.
+    </td>
+  </tr>
+  <tr>
+    <td><code>spark.sql.shuffle.partitions</code></td>
+    <td>200</td>
+    <td>
+      Configures the number of partitions to use when shuffling data for joins or aggregations.
+    </td>
+  </tr>
+</table>
+
+# Other SQL Interfaces
+
+Spark SQL also supports interfaces for running SQL queries directly without the need to write any
+code.
+
+## Running the Thrift JDBC/ODBC server
+
+The Thrift JDBC/ODBC server implemented here corresponds to the [`HiveServer2`](https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2)
+in Hive 0.12. You can test the JDBC server with the beeline script that comes with either Spark or Hive 0.12.
+
+To start the JDBC/ODBC server, run the following in the Spark directory:
+
+    ./sbin/start-thriftserver.sh
+
+This script accepts all `bin/spark-submit` command line options, plus a `--hiveconf` option to
+specify Hive properties.  You may run `./sbin/start-thriftserver.sh --help` for a complete list of
+all available options.  By default, the server listens on localhost:10000. You may override this
+bahaviour via either environment variables, i.e.:
+
+{% highlight bash %}
+export HIVE_SERVER2_THRIFT_PORT=<listening-port>
+export HIVE_SERVER2_THRIFT_BIND_HOST=<listening-host>
+./sbin/start-thriftserver.sh \
+  --master <master-uri> \
+  ...
+```
+{% endhighlight %}
+
+or system properties:
+
+{% highlight bash %}
+./sbin/start-thriftserver.sh \
+  --hiveconf hive.server2.thrift.port=<listening-port> \
+  --hiveconf hive.server2.thrift.bind.host=<listening-host> \
+  --master <master-uri>
+  ...
+```
+{% endhighlight %}
+
+Now you can use beeline to test the Thrift JDBC/ODBC server:
+
+    ./bin/beeline
+
+Connect to the JDBC/ODBC server in beeline with:
+
+    beeline> !connect jdbc:hive2://localhost:10000
+
+Beeline will ask you for a username and password. In non-secure mode, simply enter the username on
+your machine and a blank password. For secure mode, please follow the instructions given in the
+[beeline documentation](https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients).
+
+Configuration of Hive is done by placing your `hive-site.xml` file in `conf/`.
+
+You may also use the beeline script that comes with Hive.
+
+## Running the Spark SQL CLI
+
+The Spark SQL CLI is a convenient tool to run the Hive metastore service in local mode and execute
+queries input from the command line. Note that the Spark SQL CLI cannot talk to the Thrift JDBC server.
+
+To start the Spark SQL CLI, run the following in the Spark directory:
+
+    ./bin/spark-sql
+
+Configuration of Hive is done by placing your `hive-site.xml` file in `conf/`.
+You may run `./bin/spark-sql --help` for a complete list of all available
+options.
+
+# Compatibility with Other Systems
+
+## Migration Guide for Shark User
+
+### Scheduling 
+To set a [Fair Scheduler](job-scheduling.html#fair-scheduler-pools) pool for a JDBC client session,
+users can set the `spark.sql.thriftserver.scheduler.pool` variable:
+
+    SET spark.sql.thriftserver.scheduler.pool=accounting;
+
+### Reducer number
+
+In Shark, default reducer number is 1 and is controlled by the property `mapred.reduce.tasks`. Spark
+SQL deprecates this property in favor of `spark.sql.shuffle.partitions`, whose default value
+is 200. Users may customize this property via `SET`:
+
+    SET spark.sql.shuffle.partitions=10;
+    SELECT page, count(*) c
+    FROM logs_last_month_cached
+    GROUP BY page ORDER BY c DESC LIMIT 10;
+
+You may also put this property in `hive-site.xml` to override the default value.
+
+For now, the `mapred.reduce.tasks` property is still recognized, and is converted to
+`spark.sql.shuffle.partitions` automatically.
+
+### Caching
+
+The `shark.cache` table property no longer exists, and tables whose name end with `_cached` are no
+longer automatically cached. Instead, we provide `CACHE TABLE` and `UNCACHE TABLE` statements to
+let user control table caching explicitly:
+
+    CACHE TABLE logs_last_month;
+    UNCACHE TABLE logs_last_month;
+
+**NOTE:** `CACHE TABLE tbl` is lazy, similar to `.cache` on an RDD. This command only marks `tbl` to ensure that
+partitions are cached when calculated but doesn't actually cache it until a query that touches `tbl` is executed.
+To force the table to be cached, you may simply count the table immediately after executing `CACHE TABLE`:
+
+    CACHE TABLE logs_last_month;
+    SELECT COUNT(1) FROM logs_last_month;
+
+Several caching related features are not supported yet:
+
+* User defined partition level cache eviction policy
+* RDD reloading
+* In-memory cache write through policy
+
+## Compatibility with Apache Hive
+
+Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.  Currently Spark
+SQL is based on Hive 0.12.0.
+
+#### Deploying in Existing Hive Warehouses
+
+The Spark SQL Thrift JDBC server is designed to be "out of the box" compatible with existing Hive
+installations. You do not need to modify your existing Hive Metastore or change the data placement
+or partitioning of your tables.
+
+### Supported Hive Features
+
+Spark SQL supports the vast majority of Hive features, such as:
+
+* Hive query statements, including:
+  * `SELECT`
+  * `GROUP BY`
+  * `ORDER BY`
+  * `CLUSTER BY`
+  * `SORT BY`
+* All Hive operators, including:
+  * Relational operators (`=`, `⇔`, `==`, `<>`, `<`, `>`, `>=`, `<=`, etc)
+  * Arithmetic operators (`+`, `-`, `*`, `/`, `%`, etc)
+  * Logical operators (`AND`, `&&`, `OR`, `||`, etc)
+  * Complex type constructors
+  * Mathematical functions (`sign`, `ln`, `cos`, etc)
+  * String functions (`instr`, `length`, `printf`, etc)
+* User defined functions (UDF)
+* User defined aggregation functions (UDAF)
+* User defined serialization formats (SerDes)
+* Joins
+  * `JOIN`
+  * `{LEFT|RIGHT|FULL} OUTER JOIN`
+  * `LEFT SEMI JOIN`
+  * `CROSS JOIN`
+* Unions
+* Sub-queries
+  * `SELECT col FROM ( SELECT a + b AS col from t1) t2`
+* Sampling
+* Explain
+* Partitioned tables
+* All Hive DDL Functions, including:
+  * `CREATE TABLE`
+  * `CREATE TABLE AS SELECT`
+  * `ALTER TABLE`
+* Most Hive Data types, including:
+  * `TINYINT`
+  * `SMALLINT`
+  * `INT`
+  * `BIGINT`
+  * `BOOLEAN`
+  * `FLOAT`
+  * `DOUBLE`
+  * `STRING`
+  * `BINARY`
+  * `TIMESTAMP`
+  * `ARRAY<>`
+  * `MAP<>`
+  * `STRUCT<>`
+
+### Unsupported Hive Functionality
+
+Below is a list of Hive features that we don't support yet. Most of these features are rarely used
+in Hive deployments.
+
+**Major Hive Features**
+
+* Tables with buckets: bucket is the hash partitioning within a Hive table partition. Spark SQL
+  doesn't support buckets yet.
+
+**Esoteric Hive Features**
+
+* Tables with partitions using different input formats: In Spark SQL, all table partitions need to
+  have the same input format.
+* Non-equi outer join: For the uncommon use case of using outer joins with non-equi join conditions
+  (e.g. condition "`key < 10`"), Spark SQL will output wrong result for the `NULL` tuple.
+* `UNION` type and `DATE` type
+* Unique join
+* Single query multi insert
+* Column statistics collecting: Spark SQL does not piggyback scans to collect column statistics at
+  the moment and only supports populating the sizeInBytes field of the hive metastore.
+
+**Hive Input/Output Formats**
+
+* File format for CLI: For results showing back to the CLI, Spark SQL only supports TextOutputFormat.
+* Hadoop archive
+
+**Hive Optimizations**
+
+A handful of Hive optimizations are not yet included in Spark. Some of these (such as indexes) are
+less important due to Spark SQL's in-memory computational model. Others are slotted for future
+releases of Spark SQL.
+
+* Block level bitmap indexes and virtual columns (used to build indexes)
+* Automatically convert a join to map join: For joining a large table with multiple small tables,
+  Hive automatically converts the join into a map join. We are adding this auto conversion in the
+  next release.
+* Automatically determine the number of reducers for joins and groupbys: Currently in Spark SQL, you
+  need to control the degree of parallelism post-shuffle using "`SET spark.sql.shuffle.partitions=[num_tasks];`".
+* Meta-data only query: For queries that can be answered by using only meta data, Spark SQL still
+  launches tasks to compute the result.
+* Skew data flag: Spark SQL does not follow the skew data flags in Hive.
+* `STREAMTABLE` hint in join: Spark SQL does not follow the `STREAMTABLE` hint.
+* Merge multiple small files for query results: if the result output contains multiple small files,
+  Hive can optionally merge the small files into fewer large files to avoid overflowing the HDFS
+  metadata. Spark SQL does not support that.
 
 # Writing Language-Integrated Relational Queries
 
-**Language-Integrated queries are currently only supported in Scala.**
+**Language-Integrated queries are experimental and currently only supported in Scala.**
 
 Spark SQL also supports a domain specific language for writing queries.  Once again,
 using the data from the above examples:
@@ -573,4 +1123,447 @@ prefixed with a tick (`'`).  Implicit conversions turn these symbols into expres
 evaluated by the SQL execution engine.  A full list of the functions supported can be found in the
 [ScalaDoc](api/scala/index.html#org.apache.spark.sql.SchemaRDD).
 
-<!-- TODO: Include the table of operations here. -->
\ No newline at end of file
+<!-- TODO: Include the table of operations here. -->
+
+# Spark SQL DataType Reference
+
+* Numeric types
+    - `ByteType`: Represents 1-byte signed integer numbers.
+    The range of numbers is from `-128` to `127`.
+    - `ShortType`: Represents 2-byte signed integer numbers.
+    The range of numbers is from `-32768` to `32767`.
+    - `IntegerType`: Represents 4-byte signed integer numbers.
+    The range of numbers is from `-2147483648` to `2147483647`.
+    - `LongType`: Represents 8-byte signed integer numbers.
+    The range of numbers is from `-9223372036854775808` to `9223372036854775807`.
+    - `FloatType`: Represents 4-byte single-precision floating point numbers.
+    - `DoubleType`: Represents 8-byte double-precision floating point numbers.
+    - `DecimalType`: Represents arbitrary-precision signed decimal numbers. Backed internally by `java.math.BigDecimal`. A `BigDecimal` consists of an arbitrary precision integer unscaled value and a 32-bit integer scale.
+* String type
+    - `StringType`: Represents character string values.
+* Binary type
+    - `BinaryType`: Represents byte sequence values.
+* Boolean type
+    - `BooleanType`: Represents boolean values.
+* Datetime type
+    - `TimestampType`: Represents values comprising values of fields year, month, day,
+    hour, minute, and second.
+* Complex types
+    - `ArrayType(elementType, containsNull)`: Represents values comprising a sequence of
+    elements with the type of `elementType`. `containsNull` is used to indicate if
+    elements in a `ArrayType` value can have `null` values.
+    - `MapType(keyType, valueType, valueContainsNull)`:
+    Represents values comprising a set of key-value pairs. The data type of keys are
+    described by `keyType` and the data type of values are described by `valueType`.
+    For a `MapType` value, keys are not allowed to have `null` values. `valueContainsNull`
+    is used to indicate if values of a `MapType` value can have `null` values.
+    - `StructType(fields)`: Represents values with the structure described by
+    a sequence of `StructField`s (`fields`).
+        * `StructField(name, dataType, nullable)`: Represents a field in a `StructType`.
+        The name of a field is indicated by `name`. The data type of a field is indicated
+        by `dataType`. `nullable` is used to indicate if values of this fields can have
+        `null` values.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+
+All data types of Spark SQL are located in the package `org.apache.spark.sql`.
+You can access them by doing
+{% highlight scala %}
+import  org.apache.spark.sql._
+{% endhighlight %}
+
+<table class="table">
+<tr>
+  <th style="width:20%">Data type</th>
+  <th style="width:40%">Value type in Scala</th>
+  <th>API to access or create a data type</th></tr>
+<tr>
+  <td> <b>ByteType</b> </td>
+  <td> Byte </td>
+  <td>
+  ByteType
+  </td>
+</tr>
+<tr>
+  <td> <b>ShortType</b> </td>
+  <td> Short </td>
+  <td>
+  ShortType
+  </td>
+</tr>
+<tr>
+  <td> <b>IntegerType</b> </td>
+  <td> Int </td>
+  <td>
+  IntegerType
+  </td>
+</tr>
+<tr>
+  <td> <b>LongType</b> </td>
+  <td> Long </td>
+  <td>
+  LongType
+  </td>
+</tr>
+<tr>
+  <td> <b>FloatType</b> </td>
+  <td> Float </td>
+  <td>
+  FloatType
+  </td>
+</tr>
+<tr>
+  <td> <b>DoubleType</b> </td>
+  <td> Double </td>
+  <td>
+  DoubleType
+  </td>
+</tr>
+<tr>
+  <td> <b>DecimalType</b> </td>
+  <td> scala.math.BigDecimal </td>
+  <td>
+  DecimalType
+  </td>
+</tr>
+<tr>
+  <td> <b>StringType</b> </td>
+  <td> String </td>
+  <td>
+  StringType
+  </td>
+</tr>
+<tr>
+  <td> <b>BinaryType</b> </td>
+  <td> Array[Byte] </td>
+  <td>
+  BinaryType
+  </td>
+</tr>
+<tr>
+  <td> <b>BooleanType</b> </td>
+  <td> Boolean </td>
+  <td>
+  BooleanType
+  </td>
+</tr>
+<tr>
+  <td> <b>TimestampType</b> </td>
+  <td> java.sql.Timestamp </td>
+  <td>
+  TimestampType
+  </td>
+</tr>
+<tr>
+  <td> <b>ArrayType</b> </td>
+  <td> scala.collection.Seq </td>
+  <td>
+  ArrayType(<i>elementType</i>, [<i>containsNull</i>])<br />
+  <b>Note:</b> The default value of <i>containsNull</i> is <i>true</i>.
+  </td>
+</tr>
+<tr>
+  <td> <b>MapType</b> </td>
+  <td> scala.collection.Map </td>
+  <td>
+  MapType(<i>keyType</i>, <i>valueType</i>, [<i>valueContainsNull</i>])<br />
+  <b>Note:</b> The default value of <i>valueContainsNull</i> is <i>true</i>.
+  </td>
+</tr>
+<tr>
+  <td> <b>StructType</b> </td>
+  <td> org.apache.spark.sql.Row </td>
+  <td>
+  StructType(<i>fields</i>)<br />
+  <b>Note:</b> <i>fields</i> is a Seq of StructFields. Also, two fields with the same
+  name are not allowed.
+  </td>
+</tr>
+<tr>
+  <td> <b>StructField</b> </td>
+  <td> The value type in Scala of the data type of this field
+  (For example, Int for a StructField with the data type IntegerType) </td>
+  <td>
+  StructField(<i>name</i>, <i>dataType</i>, <i>nullable</i>)
+  </td>
+</tr>
+</table>
+
+</div>
+
+<div data-lang="java" markdown="1">
+
+All data types of Spark SQL are located in the package of
+`org.apache.spark.sql.api.java`. To access or create a data type,
+please use factory methods provided in
+`org.apache.spark.sql.api.java.DataType`.
+
+<table class="table">
+<tr>
+  <th style="width:20%">Data type</th>
+  <th style="width:40%">Value type in Java</th>
+  <th>API to access or create a data type</th></tr>
+<tr>
+  <td> <b>ByteType</b> </td>
+  <td> byte or Byte </td>
+  <td>
+  DataType.ByteType
+  </td>
+</tr>
+<tr>
+  <td> <b>ShortType</b> </td>
+  <td> short or Short </td>
+  <td>
+  DataType.ShortType
+  </td>
+</tr>
+<tr>
+  <td> <b>IntegerType</b> </td>
+  <td> int or Integer </td>
+  <td>
+  DataType.IntegerType
+  </td>
+</tr>
+<tr>
+  <td> <b>LongType</b> </td>
+  <td> long or Long </td>
+  <td>
+  DataType.LongType
+  </td>
+</tr>
+<tr>
+  <td> <b>FloatType</b> </td>
+  <td> float or Float </td>
+  <td>
+  DataType.FloatType
+  </td>
+</tr>
+<tr>
+  <td> <b>DoubleType</b> </td>
+  <td> double or Double </td>
+  <td>
+  DataType.DoubleType
+  </td>
+</tr>
+<tr>
+  <td> <b>DecimalType</b> </td>
+  <td> java.math.BigDecimal </td>
+  <td>
+  DataType.DecimalType
+  </td>
+</tr>
+<tr>
+  <td> <b>StringType</b> </td>
+  <td> String </td>
+  <td>
+  DataType.StringType
+  </td>
+</tr>
+<tr>
+  <td> <b>BinaryType</b> </td>
+  <td> byte[] </td>
+  <td>
+  DataType.BinaryType
+  </td>
+</tr>
+<tr>
+  <td> <b>BooleanType</b> </td>
+  <td> boolean or Boolean </td>
+  <td>
+  DataType.BooleanType
+  </td>
+</tr>
+<tr>
+  <td> <b>TimestampType</b> </td>
+  <td> java.sql.Timestamp </td>
+  <td>
+  DataType.TimestampType
+  </td>
+</tr>
+<tr>
+  <td> <b>ArrayType</b> </td>
+  <td> java.util.List </td>
+  <td>
+  DataType.createArrayType(<i>elementType</i>)<br />
+  <b>Note:</b> The value of <i>containsNull</i> will be <i>true</i><br />
+  DataType.createArrayType(<i>elementType</i>, <i>containsNull</i>).
+  </td>
+</tr>
+<tr>
+  <td> <b>MapType</b> </td>
+  <td> java.util.Map </td>
+  <td>
+  DataType.createMapType(<i>keyType</i>, <i>valueType</i>)<br />
+  <b>Note:</b> The value of <i>valueContainsNull</i> will be <i>true</i>.<br />
+  DataType.createMapType(<i>keyType</i>, <i>valueType</i>, <i>valueContainsNull</i>)<br />
+  </td>
+</tr>
+<tr>
+  <td> <b>StructType</b> </td>
+  <td> org.apache.spark.sql.api.java.Row </td>
+  <td>
+  DataType.createStructType(<i>fields</i>)<br />
+  <b>Note:</b> <i>fields</i> is a List or an array of StructFields.
+  Also, two fields with the same name are not allowed.
+  </td>
+</tr>
+<tr>
+  <td> <b>StructField</b> </td>
+  <td> The value type in Java of the data type of this field
+  (For example, int for a StructField with the data type IntegerType) </td>
+  <td>
+  DataType.createStructField(<i>name</i>, <i>dataType</i>, <i>nullable</i>)
+  </td>
+</tr>
+</table>
+
+</div>
+
+<div data-lang="python"  markdown="1">
+
+All data types of Spark SQL are located in the package of `pyspark.sql`.
+You can access them by doing
+{% highlight python %}
+from pyspark.sql import *
+{% endhighlight %}
+
+<table class="table">
+<tr>
+  <th style="width:20%">Data type</th>
+  <th style="width:40%">Value type in Python</th>
+  <th>API to access or create a data type</th></tr>
+<tr>
+  <td> <b>ByteType</b> </td>
+  <td>
+  int or long <br />
+  <b>Note:</b> Numbers will be converted to 1-byte signed integer numbers at runtime.
+  Please make sure that numbers are within the range of -128 to 127.
+  </td>
+  <td>
+  ByteType()
+  </td>
+</tr>
+<tr>
+  <td> <b>ShortType</b> </td>
+  <td>
+  int or long <br />
+  <b>Note:</b> Numbers will be converted to 2-byte signed integer numbers at runtime.
+  Please make sure that numbers are within the range of -32768 to 32767.
+  </td>
+  <td>
+  ShortType()
+  </td>
+</tr>
+<tr>
+  <td> <b>IntegerType</b> </td>
+  <td> int or long </td>
+  <td>
+  IntegerType()
+  </td>
+</tr>
+<tr>
+  <td> <b>LongType</b> </td>
+  <td>
+  long <br />
+  <b>Note:</b> Numbers will be converted to 8-byte signed integer numbers at runtime.
+  Please make sure that numbers are within the range of
+  -9223372036854775808 to 9223372036854775807.
+  Otherwise, please convert data to decimal.Decimal and use DecimalType.
+  </td>
+  <td>
+  LongType()
+  </td>
+</tr>
+<tr>
+  <td> <b>FloatType</b> </td>
+  <td>
+  float <br />
+  <b>Note:</b> Numbers will be converted to 4-byte single-precision floating
+  point numbers at runtime.
+  </td>
+  <td>
+  FloatType()
+  </td>
+</tr>
+<tr>
+  <td> <b>DoubleType</b> </td>
+  <td> float </td>
+  <td>
+  DoubleType()
+  </td>
+</tr>
+<tr>
+  <td> <b>DecimalType</b> </td>
+  <td> decimal.Decimal </td>
+  <td>
+  DecimalType()
+  </td>
+</tr>
+<tr>
+  <td> <b>StringType</b> </td>
+  <td> string </td>
+  <td>
+  StringType()
+  </td>
+</tr>
+<tr>
+  <td> <b>BinaryType</b> </td>
+  <td> bytearray </td>
+  <td>
+  BinaryType()
+  </td>
+</tr>
+<tr>
+  <td> <b>BooleanType</b> </td>
+  <td> bool </td>
+  <td>
+  BooleanType()
+  </td>
+</tr>
+<tr>
+  <td> <b>TimestampType</b> </td>
+  <td> datetime.datetime </td>
+  <td>
+  TimestampType()
+  </td>
+</tr>
+<tr>
+  <td> <b>ArrayType</b> </td>
+  <td> list, tuple, or array </td>
+  <td>
+  ArrayType(<i>elementType</i>, [<i>containsNull</i>])<br />
+  <b>Note:</b> The default value of <i>containsNull</i> is <i>True</i>.
+  </td>
+</tr>
+<tr>
+  <td> <b>MapType</b> </td>
+  <td> dict </td>
+  <td>
+  MapType(<i>keyType</i>, <i>valueType</i>, [<i>valueContainsNull</i>])<br />
+  <b>Note:</b> The default value of <i>valueContainsNull</i> is <i>True</i>.
+  </td>
+</tr>
+<tr>
+  <td> <b>StructType</b> </td>
+  <td> list or tuple </td>
+  <td>
+  StructType(<i>fields</i>)<br />
+  <b>Note:</b> <i>fields</i> is a Seq of StructFields. Also, two fields with the same
+  name are not allowed.
+  </td>
+</tr>
+<tr>
+  <td> <b>StructField</b> </td>
+  <td> The value type in Python of the data type of this field
+  (For example, Int for a StructField with the data type IntegerType) </td>
+  <td>
+  StructField(<i>name</i>, <i>dataType</i>, <i>nullable</i>)
+  </td>
+</tr>
+</table>
+
+</div>
+
+</div>
+
diff --git a/docs/storage-openstack-swift.md b/docs/storage-openstack-swift.md
new file mode 100644
index 0000000000000..c39ef1ce59e1c
--- /dev/null
+++ b/docs/storage-openstack-swift.md
@@ -0,0 +1,152 @@
+---
+layout: global
+title: Accessing OpenStack Swift from Spark
+---
+
+Spark's support for Hadoop InputFormat allows it to process data in OpenStack Swift using the
+same URI formats as in Hadoop. You can specify a path in Swift as input through a 
+URI of the form <code>swift://container.PROVIDER/path</code>. You will also need to set your 
+Swift security credentials, through <code>core-site.xml</code> or via
+<code>SparkContext.hadoopConfiguration</code>.
+Current Swift driver requires Swift to use Keystone authentication method.
+
+# Configuring Swift for Better Data Locality
+
+Although not mandatory, it is recommended to configure the proxy server of Swift with
+<code>list_endpoints</code> to have better data locality. More information is
+[available here](https://github.com/openstack/swift/blob/master/swift/common/middleware/list_endpoints.py).
+
+
+# Dependencies
+
+The Spark application should include <code>hadoop-openstack</code> dependency.
+For example, for Maven support, add the following to the <code>pom.xml</code> file:
+
+{% highlight xml %}
+<dependencyManagement>
+  ...
+  <dependency>
+    <groupId>org.apache.hadoop</groupId>
+    <artifactId>hadoop-openstack</artifactId>
+    <version>2.3.0</version>
+  </dependency>
+  ...
+</dependencyManagement>
+{% endhighlight %}
+
+
+# Configuration Parameters
+
+Create <code>core-site.xml</code> and place it inside Spark's <code>conf</code> directory.
+There are two main categories of parameters that should to be configured: declaration of the
+Swift driver and the parameters that are required by Keystone. 
+
+Configuration of Hadoop to use Swift File system achieved via 
+
+<table class="table">
+<tr><th>Property Name</th><th>Value</th></tr>
+<tr>
+  <td>fs.swift.impl</td>
+  <td>org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem</td>
+</tr>
+</table>
+
+Additional parameters required by Keystone (v2.0) and should be provided to the Swift driver. Those 
+parameters will be used to perform authentication in Keystone to access Swift. The following table 
+contains a list of Keystone mandatory parameters. <code>PROVIDER</code> can be any name.
+
+<table class="table">
+<tr><th>Property Name</th><th>Meaning</th><th>Required</th></tr>
+<tr>
+  <td><code>fs.swift.service.PROVIDER.auth.url</code></td>
+  <td>Keystone Authentication URL</td>
+  <td>Mandatory</td>
+</tr>
+<tr>
+  <td><code>fs.swift.service.PROVIDER.auth.endpoint.prefix</code></td>
+  <td>Keystone endpoints prefix</td>
+  <td>Optional</td>
+</tr>
+<tr>
+  <td><code>fs.swift.service.PROVIDER.tenant</code></td>
+  <td>Tenant</td>
+  <td>Mandatory</td>
+</tr>
+<tr>
+  <td><code>fs.swift.service.PROVIDER.username</code></td>
+  <td>Username</td>
+  <td>Mandatory</td>
+</tr>
+<tr>
+  <td><code>fs.swift.service.PROVIDER.password</code></td>
+  <td>Password</td>
+  <td>Mandatory</td>
+</tr>
+<tr>
+  <td><code>fs.swift.service.PROVIDER.http.port</code></td>
+  <td>HTTP port</td>
+  <td>Mandatory</td>
+</tr>
+<tr>
+  <td><code>fs.swift.service.PROVIDER.region</code></td>
+  <td>Keystone region</td>
+  <td>Mandatory</td>
+</tr>
+<tr>
+  <td><code>fs.swift.service.PROVIDER.public</code></td>
+  <td>Indicates if all URLs are public</td>
+  <td>Mandatory</td>
+</tr>
+</table>
+
+For example, assume <code>PROVIDER=SparkTest</code> and Keystone contains user <code>tester</code> with password <code>testing</code>
+defined for tenant <code>test</code>. Then <code>core-site.xml</code> should include:
+
+{% highlight xml %}
+<configuration>
+  <property>
+    <name>fs.swift.impl</name>
+    <value>org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem</value>
+  </property>
+  <property>
+    <name>fs.swift.service.SparkTest.auth.url</name>
+    <value>http://127.0.0.1:5000/v2.0/tokens</value>
+  </property>
+  <property>
+    <name>fs.swift.service.SparkTest.auth.endpoint.prefix</name>
+    <value>endpoints</value>
+  </property>
+    <name>fs.swift.service.SparkTest.http.port</name>
+    <value>8080</value>
+  </property>
+  <property>
+    <name>fs.swift.service.SparkTest.region</name>
+    <value>RegionOne</value>
+  </property>
+  <property>
+    <name>fs.swift.service.SparkTest.public</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>fs.swift.service.SparkTest.tenant</name>
+    <value>test</value>
+  </property>
+  <property>
+    <name>fs.swift.service.SparkTest.username</name>
+    <value>tester</value>
+  </property>
+  <property>
+    <name>fs.swift.service.SparkTest.password</name>
+    <value>testing</value>
+  </property>
+</configuration>
+{% endhighlight %}
+
+Notice that
+<code>fs.swift.service.PROVIDER.tenant</code>,
+<code>fs.swift.service.PROVIDER.username</code>, 
+<code>fs.swift.service.PROVIDER.password</code> contains sensitive information and keeping them in
+<code>core-site.xml</code> is not always a good approach.
+We suggest to keep those parameters in <code>core-site.xml</code> for testing purposes when running Spark
+via <code>spark-shell</code>.
+For job submissions they should be provided via <code>sparkContext.hadoopConfiguration</code>.
diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
index a2dc3a8961dfc..27cd085782f66 100644
--- a/docs/streaming-custom-receivers.md
+++ b/docs/streaming-custom-receivers.md
@@ -4,7 +4,7 @@ title: Spark Streaming Custom Receivers
 ---
 
 Spark Streaming can receive streaming data from any arbitrary data source beyond
-the one's for which it has in-built support (that is, beyond Flume, Kafka, files, sockets, etc.).
+the one's for which it has in-built support (that is, beyond Flume, Kafka, Kinesis, files, sockets, etc.).
 This requires the developer to implement a *receiver* that is customized for receiving data from
 the concerned data source. This guide walks through the process of implementing a custom receiver
 and using it in a Spark Streaming application.
@@ -174,7 +174,7 @@ val words = lines.flatMap(_.split(" "))
 ...
 {% endhighlight %}
 
-The full source code is in the example [CustomReceiver.scala](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/streaming/examples/CustomReceiver.scala).
+The full source code is in the example [CustomReceiver.scala](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala).
 
 </div>
 <div data-lang="java" markdown="1">
@@ -186,7 +186,7 @@ JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>()
 ...
 {% endhighlight %}
 
-The full source code is in the example [JavaCustomReceiver.java](https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/streaming/examples/JavaCustomReceiver.java).
+The full source code is in the example [JavaCustomReceiver.java](https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java).
 
 </div>
 </div>
@@ -215,7 +215,7 @@ And a new input stream can be created with this custom actor as
 val lines = ssc.actorStream[String](Props(new CustomActor()), "CustomReceiver")
 {% endhighlight %}
 
-See [ActorWordCount.scala](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala)
+See [ActorWordCount.scala](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/ActorWordCount.scala)
 for an end-to-end example.
 
 
diff --git a/docs/streaming-flume-integration.md b/docs/streaming-flume-integration.md
new file mode 100644
index 0000000000000..d57c3e0ef9ba0
--- /dev/null
+++ b/docs/streaming-flume-integration.md
@@ -0,0 +1,132 @@
+---
+layout: global
+title: Spark Streaming + Flume Integration Guide
+---
+
+[Apache Flume](https://flume.apache.org/) is a distributed, reliable, and available service for efficiently collecting, aggregating, and moving large amounts of log data. Here we explain how to configure Flume and Spark Streaming to receive data from Flume. There are two approaches to this.
+
+## Approach 1: Flume-style Push-based Approach
+Flume is designed to push data between Flume agents. In this approach, Spark Streaming essentially sets up a receiver that acts an Avro agent for Flume, to which Flume can push the data. Here are the configuration steps.
+
+#### General Requirements
+Choose a machine in your cluster such that
+
+- When your Flume + Spark Streaming application is launched, one of the Spark workers must run on that machine.
+
+- Flume can be configured to push data to a port on that machine.
+
+Due to the push model, the streaming application needs to be up, with the receiver scheduled and listening on the chosen port, for Flume to be able push data.
+
+#### Configuring Flume
+Configure Flume agent to send data to an Avro sink by having the following in the configuration file.
+
+	agent.sinks = avroSink
+	agent.sinks.avroSink.type = avro
+    agent.sinks.avroSink.channel = memoryChannel
+    agent.sinks.avroSink.hostname = <chosen machine's hostname>
+	agent.sinks.avroSink.port = <chosen port on the machine>
+
+See the [Flume's documentation](https://flume.apache.org/documentation.html) for more information about
+configuring Flume agents.
+
+#### Configuring Spark Streaming Application
+1. **Linking:** In your SBT/Maven projrect definition, link your streaming application against the following artifact (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide for further information).
+
+		groupId = org.apache.spark
+		artifactId = spark-streaming-flume_{{site.SCALA_BINARY_VERSION}}
+		version = {{site.SPARK_VERSION_SHORT}}
+
+2. **Programming:** In the streaming application code, import `FlumeUtils` and create input DStream as follows.
+
+	<div class="codetabs">
+	<div data-lang="scala" markdown="1">
+		import org.apache.spark.streaming.flume._
+
+		val flumeStream = FlumeUtils.createStream(streamingContext, [chosen machine's hostname], [chosen port])
+
+	See the [API docs](api/scala/index.html#org.apache.spark.streaming.flume.FlumeUtils$)
+	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming/FlumeEventCount.scala).
+	</div>
+	<div data-lang="java" markdown="1">
+		import org.apache.spark.streaming.flume.*;
+
+		JavaReceiverInputDStream<SparkFlumeEvent> flumeStream =
+        	FlumeUtils.createStream(streamingContext, [chosen machine's hostname], [chosen port]);
+
+	See the [API docs](api/java/index.html?org/apache/spark/streaming/flume/FlumeUtils.html)
+	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaFlumeEventCount.java).
+	</div>
+	</div>
+
+	Note that the hostname should be the same as the one used by the resource manager in the
+    cluster (Mesos, YARN or Spark Standalone), so that resource allocation can match the names and launch
+    the receiver in the right machine.
+
+3. **Deploying:** Package `spark-streaming-flume_{{site.SCALA_BINARY_VERSION}}` and its dependencies (except `spark-core_{{site.SCALA_BINARY_VERSION}}` and `spark-streaming_{{site.SCALA_BINARY_VERSION}}` which are provided by `spark-submit`) into the application JAR. Then use `spark-submit` to launch your application (see [Deploying section](streaming-programming-guide.html#deploying-applications) in the main programming guide).
+
+## Approach 2 (Experimental): Pull-based Approach using a Custom Sink
+Instead of Flume pushing data directly to Spark Streaming, this approach runs a custom Flume sink that allows the following.
+- Flume pushes data into the sink, and the data stays buffered.
+- Spark Streaming uses transactions to pull data from the sink. Transactions succeed only after data is received and replicated by Spark Streaming.
+This ensures that better reliability and fault-tolerance than the previous approach. However, this requires configuring Flume to run a custom sink. Here are the configuration steps.
+
+#### General Requirements
+Choose a machine that will run the custom sink in a Flume agent. The rest of the Flume pipeline is configured to send data to that agent. Machines in the Spark cluster should have access to the chosen machine running the custom sink.
+
+#### Configuring Flume
+Configuring Flume on the chosen machine requires the following two steps.
+
+1. **Sink JARs**: Add the following JARs to Flume's classpath (see [Flume's documentation](https://flume.apache.org/documentation.html) to see how) in the machine designated to run the custom sink .
+
+	(i) *Custom sink JAR*: Download the JAR corresponding to the following artifact (or [direct link](http://search.maven.org/remotecontent?filepath=org/apache/spark/spark-streaming-flume-sink_{{site.SCALA_BINARY_VERSION}}/{{site.SPARK_VERSION_SHORT}}/spark-streaming-flume-sink_{{site.SCALA_BINARY_VERSION}}-{{site.SPARK_VERSION_SHORT}}.jar)).
+
+		groupId = org.apache.spark
+		artifactId = spark-streaming-flume-sink_{{site.SCALA_BINARY_VERSION}}
+		version = {{site.SPARK_VERSION_SHORT}}
+
+	(ii) *Scala library JAR*: Download the Scala library JAR for Scala {{site.SCALA_VERSION}}. It can be found with the following artifact detail (or, [direct link](http://search.maven.org/remotecontent?filepath=org/scala-lang/scala-library/{{site.SCALA_VERSION}}/scala-library-{{site.SCALA_VERSION}}.jar)).
+
+		groupId = org.scala-lang
+		artifactId = scala-library
+		version = {{site.SCALA_VERSION}}
+
+2. **Configuration file**: On that machine, configure Flume agent to send data to an Avro sink by having the following in the configuration file.
+
+		agent.sinks = spark
+		agent.sinks.spark.type = org.apache.spark.streaming.flume.sink.SparkSink
+		agent.sinks.spark.hostname = <hostname of the local machine>
+		agent.sinks.spark.port = <port to listen on for connection from Spark>
+		agent.sinks.spark.channel = memoryChannel
+
+	Also make sure that the upstream Flume pipeline is configured to send the data to the Flume agent running this sink.
+
+See the [Flume's documentation](https://flume.apache.org/documentation.html) for more information about
+configuring Flume agents.
+
+#### Configuring Spark Streaming Application
+1. **Linking:** In your SBT/Maven projrect definition, link your streaming application against the `spark-streaming-flume_{{site.SCALA_BINARY_VERSION}}` (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide).
+
+2. **Programming:** In the streaming application code, import `FlumeUtils` and create input DStream as follows.
+
+	<div class="codetabs">
+	<div data-lang="scala" markdown="1">
+		import org.apache.spark.streaming.flume._
+
+		val flumeStream = FlumeUtils.createPollingStream(streamingContext, [sink machine hostname], [sink port])
+	</div>
+	<div data-lang="java" markdown="1">
+		import org.apache.spark.streaming.flume.*;
+
+		JavaReceiverInputDStream<SparkFlumeEvent>flumeStream =
+			FlumeUtils.createPollingStream(streamingContext, [sink machine hostname], [sink port]);
+	</div>
+	</div>
+
+	See the Scala example [FlumePollingEventCount]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming/FlumePollingEventCount.scala).
+
+	Note that each input DStream can be configured to receive data from multiple sinks.
+
+3. **Deploying:** Package `spark-streaming-flume_{{site.SCALA_BINARY_VERSION}}` and its dependencies (except `spark-core_{{site.SCALA_BINARY_VERSION}}` and `spark-streaming_{{site.SCALA_BINARY_VERSION}}` which are provided by `spark-submit`) into the application JAR. Then use `spark-submit` to launch your application (see [Deploying section](streaming-programming-guide.html#deploying-applications) in the main programming guide).
+
+
+
diff --git a/docs/streaming-kafka-integration.md b/docs/streaming-kafka-integration.md
new file mode 100644
index 0000000000000..a3b705d4c31d0
--- /dev/null
+++ b/docs/streaming-kafka-integration.md
@@ -0,0 +1,42 @@
+---
+layout: global
+title: Spark Streaming + Kafka Integration Guide
+---
+[Apache Kafka](http://kafka.apache.org/) is publish-subscribe messaging rethought as a distributed, partitioned, replicated commit log service.  Here we explain how to configure Spark Streaming to receive data from Kafka.
+
+1. **Linking:** In your SBT/Maven projrect definition, link your streaming application against the following artifact (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide for further information).
+
+		groupId = org.apache.spark
+		artifactId = spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}
+		version = {{site.SPARK_VERSION_SHORT}}
+
+2. **Programming:** In the streaming application code, import `KafkaUtils` and create input DStream as follows.
+
+	<div class="codetabs">
+	<div data-lang="scala" markdown="1">
+		import org.apache.spark.streaming.kafka._
+
+		val kafkaStream = KafkaUtils.createStream(
+        	streamingContext, [zookeeperQuorum], [group id of the consumer], [per-topic number of Kafka partitions to consume])
+
+	See the [API docs](api/scala/index.html#org.apache.spark.streaming.kafka.KafkaUtils$)
+	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala).
+	</div>
+	<div data-lang="java" markdown="1">
+		import org.apache.spark.streaming.kafka.*;
+
+		JavaPairReceiverInputDStream<String, String> kafkaStream = KafkaUtils.createStream(
+        	streamingContext, [zookeeperQuorum], [group id of the consumer], [per-topic number of Kafka partitions to consume]);
+
+	See the [API docs](api/java/index.html?org/apache/spark/streaming/kafka/KafkaUtils.html)
+	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java).
+	</div>
+	</div>
+
+	*Points to remember:*
+
+	- Topic partitions in Kafka does not correlate to partitions of RDDs generated in Spark Streaming. So increasing the number of topic-specific partitions in the `KafkaUtils.createStream()` only increases the number of threads using which topics that are consumed within a single receiver. It does not increase the parallelism of Spark in processing the data. Refer to the main document for more information on that.
+
+	- Multiple Kafka input DStreams can be created with different groups and topics for parallel receiving of data using multiple receivers.
+
+3. **Deploying:** Package `spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}` and its dependencies (except `spark-core_{{site.SCALA_BINARY_VERSION}}` and `spark-streaming_{{site.SCALA_BINARY_VERSION}}` which are provided by `spark-submit`) into the application JAR. Then use `spark-submit` to launch your application (see [Deploying section](streaming-programming-guide.html#deploying-applications) in the main programming guide).
diff --git a/docs/streaming-kinesis-integration.md b/docs/streaming-kinesis-integration.md
new file mode 100644
index 0000000000000..379eb513d521e
--- /dev/null
+++ b/docs/streaming-kinesis-integration.md
@@ -0,0 +1,150 @@
+---
+layout: global
+title: Spark Streaming + Kinesis Integration
+---
+[Amazon Kinesis](http://aws.amazon.com/kinesis/) is a fully managed service for real-time processing of streaming data at massive scale.
+The Kinesis receiver creates an input DStream using the Kinesis Client Library (KCL) provided by Amazon under the Amazon Software License (ASL).
+The KCL builds on top of the Apache 2.0 licensed AWS Java SDK and provides load-balancing, fault-tolerance, checkpointing through the concepts of Workers, Checkpoints, and Shard Leases.
+Here we explain how to configure Spark Streaming to receive data from Kinesis.
+
+#### Configuring Kinesis
+
+A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or more shards per the following
+[guide](http://docs.aws.amazon.com/kinesis/latest/dev/step-one-create-stream.html).
+
+
+#### Configuring Spark Streaming Application
+
+1. **Linking:** In your SBT/Maven project definition, link your streaming application against the following artifact (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide for further information).
+
+		groupId = org.apache.spark
+		artifactId = spark-streaming-kinesis-asl_{{site.SCALA_BINARY_VERSION}}
+		version = {{site.SPARK_VERSION_SHORT}}
+
+	**Note that by linking to this library, you will include [ASL](https://aws.amazon.com/asl/)-licensed code in your application.**
+
+2. **Programming:** In the streaming application code, import `KinesisUtils` and create the input DStream as follows:
+
+	<div class="codetabs">
+	<div data-lang="scala" markdown="1">
+		import org.apache.spark.streaming.Duration
+		import org.apache.spark.streaming.kinesis._
+		import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+
+		val kinesisStream = KinesisUtils.createStream(
+        	streamingContext, [Kinesis stream name], [endpoint URL], [checkpoint interval], [initial position])
+
+	See the [API docs](api/scala/index.html#org.apache.spark.streaming.kinesis.KinesisUtils$)
+	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala). Refer to the Running the Example section for instructions on how to run the example.
+
+	</div>
+	<div data-lang="java" markdown="1">
+		import org.apache.spark.streaming.Duration;
+		import org.apache.spark.streaming.kinesis.*;
+		import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
+
+		JavaReceiverInputDStream<byte[]> kinesisStream = KinesisUtils.createStream(
+        	streamingContext, [Kinesis stream name], [endpoint URL], [checkpoint interval], [initial position]);
+
+	See the [API docs](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisUtils.html)
+	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java). Refer to the next subsection for instructions to run the example.
+
+	</div>
+	</div>
+
+    - `streamingContext`: StreamingContext containg an application name used by Kinesis to tie this Kinesis application to the Kinesis stream
+
+	- `[Kinesis stream name]`: The Kinesis stream that this streaming application receives from
+		- The application name used in the streaming context becomes the Kinesis application name
+		- The application name must be unique for a given account and region.
+		- The Kinesis backend automatically associates the application name to the Kinesis stream using a DynamoDB table (always in the us-east-1 region) created during Kinesis Client Library initialization. 
+		- Changing the application name or stream name can lead to Kinesis errors in some cases.  If you see errors, you may need to manually delete the DynamoDB table.
+
+
+	- `[endpoint URL]`: Valid Kinesis endpoints URL can be found [here](http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region).
+
+	- `[checkpoint interval]`: The interval (e.g., Duration(2000) = 2 seconds) at which the Kinesis Client Library saves its position in the stream.  For starters, set it to the same as the batch interval of the streaming application.
+
+	- `[initial position]`: Can be either `InitialPositionInStream.TRIM_HORIZON` or `InitialPositionInStream.LATEST` (see Kinesis Checkpointing section and Amazon Kinesis API documentation for more details).
+
+
+3. **Deploying:** Package `spark-streaming-kinesis-asl_{{site.SCALA_BINARY_VERSION}}` and its dependencies (except `spark-core_{{site.SCALA_BINARY_VERSION}}` and `spark-streaming_{{site.SCALA_BINARY_VERSION}}` which are provided by `spark-submit`) into the application JAR. Then use `spark-submit` to launch your application (see [Deploying section](streaming-programming-guide.html#deploying-applications) in the main programming guide).
+
+	*Points to remember at runtime:*
+
+	- Kinesis data processing is ordered per partition and occurs at-least once per message.
+
+	- Multiple applications can read from the same Kinesis stream.  Kinesis will maintain the application-specific shard and checkpoint info in DynamodDB.
+
+	- A single Kinesis stream shard is processed by one input DStream at a time.
+
+	<p style="text-align: center;">
+  		<img src="img/streaming-kinesis-arch.png"
+       		title="Spark Streaming Kinesis Architecture"
+       		alt="Spark Streaming Kinesis Architecture"
+	       width="60%" 
+        />
+	  	<!-- Images are downsized intentionally to improve quality on retina displays -->
+	</p>
+
+	- A single Kinesis input DStream can read from multiple shards of a Kinesis stream by creating multiple KinesisRecordProcessor threads.
+
+	- Multiple input DStreams running in separate processes/instances can read from a Kinesis stream.
+
+	- You never need more Kinesis input DStreams than the number of Kinesis stream shards as each input DStream will create at least one KinesisRecordProcessor thread that handles a single shard.
+
+	- Horizontal scaling is achieved by adding/removing  Kinesis input DStreams (within a single process or across multiple processes/instances) - up to the total number of Kinesis stream shards per the previous point.
+
+	- The Kinesis input DStream will balance the load between all DStreams - even across processes/instances.
+
+	- The Kinesis input DStream will balance the load during re-shard events (merging and splitting) due to changes in load.
+
+	- As a best practice, it's recommended that you avoid re-shard jitter by over-provisioning when possible.
+
+	- Each Kinesis input DStream maintains its own checkpoint info.  See the Kinesis Checkpointing section for more details.
+
+	- There is no correlation between the number of Kinesis stream shards and the number of RDD partitions/shards created across the Spark cluster during input DStream processing.  These are 2 independent partitioning schemes.
+
+#### Running the Example
+To run the example,
+
+- Download Spark source and follow the [instructions](building-spark.html) to build Spark with profile *-Pkinesis-asl*.
+
+        mvn -Pkinesis-asl -DskipTests clean package
+
+
+- Set up Kinesis stream (see earlier section) within AWS. Note the name of the Kinesis stream and the endpoint URL corresponding to the region where the stream was created.
+
+- Set up the environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_KEY with your AWS credentials.
+
+- In the Spark root directory, run the example as
+
+	<div class="codetabs">
+	<div data-lang="scala" markdown="1">
+
+    	bin/run-example streaming.KinesisWordCountASL [Kinesis stream name] [endpoint URL]
+
+	</div>
+	<div data-lang="java" markdown="1">
+
+        bin/run-example streaming.JavaKinesisWordCountASL [Kinesis stream name] [endpoint URL]
+
+	</div>
+	</div>
+
+    This will wait for data to be received from the Kinesis stream.
+
+- To generate random string data to put onto the Kinesis stream, in another terminal, run the associated Kinesis data producer.
+
+		bin/run-example streaming.KinesisWordCountProducerASL [Kinesis stream name] [endpoint URL] 1000 10
+
+	This will push 1000 lines per second of 10 random numbers per line to the Kinesis stream.  This data should then be received and processed by the running example.
+
+#### Kinesis Checkpointing
+- Each Kinesis input DStream periodically stores the current position of the stream in the backing DynamoDB table.  This allows the system to recover from failures and continue processing where the DStream left off.
+
+- Checkpointing too frequently will cause excess load on the AWS checkpoint storage layer and may lead to AWS throttling.  The provided example handles this throttling with a random-backoff-retry strategy.
+
+- If no Kinesis checkpoint info exists when the input DStream starts, it will start either from the oldest record available (InitialPositionInStream.TRIM_HORIZON) or from the latest tip (InitialPostitionInStream.LATEST).  This is configurable.
+- InitialPositionInStream.LATEST could lead to missed records if data is added to the stream while no input DStreams are running (and no checkpoint info is being stored). 
+- InitialPositionInStream.TRIM_HORIZON may lead to duplicate processing of records where the impact is dependent on checkpoint frequency and processing idempotency.
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 90a0eef60c200..44a1f3ad7560b 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -7,12 +7,12 @@ title: Spark Streaming Programming Guide
 {:toc}
 
 # Overview
-Spark Streaming is an extension of the core Spark API that allows enables high-throughput,
+Spark Streaming is an extension of the core Spark API that allows enables scalable, high-throughput,
 fault-tolerant stream processing of live data streams. Data can be ingested from many sources
-like Kafka, Flume, Twitter, ZeroMQ or plain old TCP sockets and be processed using complex
+like Kafka, Flume, Twitter, ZeroMQ, Kinesis or plain old TCP sockets and be processed using complex
 algorithms expressed with high-level functions like `map`, `reduce`, `join` and `window`.
 Finally, processed data can be pushed out to filesystems, databases,
-and live dashboards. In fact, you can apply Spark's in-built
+and live dashboards. In fact, you can apply Spark's
 [machine learning](mllib-guide.html) algorithms, and
 [graph processing](graphx-programming-guide.html) algorithms on data streams.
 
@@ -38,7 +38,7 @@ stream of results in batches.
 
 Spark Streaming provides a high-level abstraction called *discretized stream* or *DStream*,
 which represents a continuous stream of data. DStreams can be created either from input data
-stream from sources such as Kafka and Flume, or by applying high-level
+stream from sources such as Kafka, Flume, and Kinesis, or by applying high-level
 operations on other DStreams. Internally, a DStream is represented as a sequence of
 [RDDs](api/scala/index.html#org.apache.spark.rdd.RDD).
 
@@ -60,35 +60,26 @@ do is as follows.
 <div data-lang="scala"  markdown="1" >
 First, we import the names of the Spark Streaming classes, and some implicit
 conversions from StreamingContext into our environment, to add useful methods to
-other classes we need (like DStream).
-
-[StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) is the
-main entry point for all streaming functionality.
+other classes we need (like DStream). [StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) is the
+main entry point for all streaming functionality. We create a local StreamingContext with two execution threads,  and batch interval of 1 second.
 
 {% highlight scala %}
+import org.apache.spark._
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.StreamingContext._
-{% endhighlight %}
 
-Then we create a
-[StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) object.
-Besides Spark's configuration, we specify that any DStream will be processed
-in 1 second batches.
+// Create a local StreamingContext with two working thread and batch interval of 1 second.
+// The master requires 2 cores to prevent from a starvation scenario.
 
-{% highlight scala %}
-import org.apache.spark.api.java.function._
-import org.apache.spark.streaming._
-import org.apache.spark.streaming.api._
-// Create a StreamingContext with a local master
-// Spark Streaming needs at least two working thread
-val ssc = new StreamingContext("local[2]", "NetworkWordCount", Seconds(1))
+val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
+val ssc = new StreamingContext(conf, Seconds(1))
 {% endhighlight %}
 
-Using this context, we then create a new DStream
-by specifying the IP address and port of the data server.
+Using this context, we can create a DStream that represents streaming data from a TCP
+source hostname, e.g. `localhost`, and port, e.g. `9999`
 
 {% highlight scala %}
-// Create a DStream that will connect to serverIP:serverPort, like localhost:9999
+// Create a DStream that will connect to hostname:port, like localhost:9999
 val lines = ssc.socketTextStream("localhost", 9999)
 {% endhighlight %}
 
@@ -112,7 +103,7 @@ import org.apache.spark.streaming.StreamingContext._
 val pairs = words.map(word => (word, 1))
 val wordCounts = pairs.reduceByKey(_ + _)
 
-// Print a few of the counts to the console
+// Print the first ten elements of each RDD generated in this DStream to the console
 wordCounts.print()
 {% endhighlight %}
 
@@ -139,23 +130,25 @@ The complete code can be found in the Spark Streaming example
 First, we create a
 [JavaStreamingContext](api/java/index.html?org/apache/spark/streaming/api/java/JavaStreamingContext.html) object,
 which is the main entry point for all streaming
-functionality. Besides Spark's configuration, we specify that any DStream would be processed
-in 1 second batches.
+functionality. We create a local StreamingContext with two execution threads, and a batch interval of 1 second.
 
 {% highlight java %}
+import org.apache.spark.*;
 import org.apache.spark.api.java.function.*;
 import org.apache.spark.streaming.*;
 import org.apache.spark.streaming.api.java.*;
 import scala.Tuple2;
-// Create a StreamingContext with a local master
-JavaStreamingContext jssc = new JavaStreamingContext("local[2]", "JavaNetworkWordCount", new Duration(1000))
+
+// Create a local StreamingContext with two working thread and batch interval of 1 second
+SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
+JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(1000))
 {% endhighlight %}
 
-Using this context, we then create a new DStream
-by specifying the IP address and port of the data server.
+Using this context, we can create a DStream that represents streaming data from a TCP
+source hostname, e.g. `localhost`, and port, e.g. `9999`
 
 {% highlight java %}
-// Create a DStream that will connect to serverIP:serverPort, like localhost:9999
+// Create a DStream that will connect to hostname:port, like localhost:9999
 JavaReceiverInputDStream<String> lines = jssc.socketTextStream("localhost", 9999);
 {% endhighlight %}
 
@@ -197,7 +190,9 @@ JavaPairDStream<String, Integer> wordCounts = pairs.reduceByKey(
       return i1 + i2;
     }
   });
-wordCounts.print();     // Print a few of the counts to the console
+
+// Print the first ten elements of each RDD generated in this DStream to the console
+wordCounts.print();
 {% endhighlight %}
 
 The `words` DStream is further mapped (one-to-one transformation) to a DStream of `(word,
@@ -207,8 +202,8 @@ using a [Function2](api/scala/index.html#org.apache.spark.api.java.function.Func
 Finally, `wordCounts.print()` will print a few of the counts generated every second.
 
 Note that when these lines are executed, Spark Streaming only sets up the computation it
-will perform when it is started, and no real processing has started yet. To start the processing
-after all the transformations have been setup, we finally call
+will perform after it is started, and no real processing has started yet. To start the processing
+after all the transformations have been setup, we finally call `start` method.
 
 {% highlight java %}
 jssc.start();              // Start the computation
@@ -219,6 +214,67 @@ The complete code can be found in the Spark Streaming example
 [JavaNetworkWordCount]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java).
 <br>
 
+</div>
+<div data-lang="python"  markdown="1" >
+First, we import StreamingContext, which is the main entry point for all streaming functionality. We create a local StreamingContext with two execution threads, and batch interval of 1 second.
+
+{% highlight python %}
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+# Create a local StreamingContext with two working thread and batch interval of 1 second
+sc = SparkContext("local[2]", "NetworkWordCount")
+ssc = StreamingContext(sc, 1)
+{% endhighlight %}
+
+Using this context, we can create a DStream that represents streaming data from a TCP
+source hostname, e.g. `localhost`, and port, e.g. `9999`
+
+{% highlight python %}
+# Create a DStream that will connect to hostname:port, like localhost:9999
+lines = ssc.socketTextStream("localhost", 9999)
+{% endhighlight %}
+
+This `lines` DStream represents the stream of data that will be received from the data
+server. Each record in this DStream is a line of text. Next, we want to split the lines by
+space into words.
+
+{% highlight python %}
+# Split each line into words
+words = lines.flatMap(lambda line: line.split(" "))
+{% endhighlight %}
+
+`flatMap` is a one-to-many DStream operation that creates a new DStream by
+generating multiple new records from each record in the source DStream. In this case,
+each line will be split into multiple words and the stream of words is represented as the
+`words` DStream.  Next, we want to count these words.
+
+{% highlight python %}
+# Count each word in each batch
+pairs = words.map(lambda word: (word, 1))
+wordCounts = pairs.reduceByKey(lambda x, y: x + y)
+
+# Print the first ten elements of each RDD generated in this DStream to the console
+wordCounts.pprint()
+{% endhighlight %}
+
+The `words` DStream is further mapped (one-to-one transformation) to a DStream of `(word,
+1)` pairs, which is then reduced to get the frequency of words in each batch of data.
+Finally, `wordCounts.pprint()` will print a few of the counts generated every second.
+
+Note that when these lines are executed, Spark Streaming only sets up the computation it
+will perform when it is started, and no real processing has started yet. To start the processing
+after all the transformations have been setup, we finally call
+
+{% highlight python %}
+ssc.start()             # Start the computation
+ssc.awaitTermination()  # Wait for the computation to terminate
+{% endhighlight %}
+
+The complete code can be found in the Spark Streaming example
+[NetworkWordCount]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/python/streaming/network_wordcount.py).
+<br>
+
 </div>
 </div>
 
@@ -235,12 +291,17 @@ Then, in a different terminal, you can start the example by using
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 {% highlight bash %}
-$ ./bin/run-example org.apache.spark.examples.streaming.NetworkWordCount localhost 9999
+$ ./bin/run-example streaming.NetworkWordCount localhost 9999
 {% endhighlight %}
 </div>
 <div data-lang="java" markdown="1">
 {% highlight bash %}
-$ ./bin/run-example org.apache.spark.examples.streaming.JavaNetworkWordCount localhost 9999
+$ ./bin/run-example streaming.JavaNetworkWordCount localhost 9999
+{% endhighlight %}
+</div>
+<div data-lang="python" markdown="1">
+{% highlight bash %}
+$ ./bin/spark-submit examples/src/main/python/streaming/network_wordcount.py localhost 9999
 {% endhighlight %}
 </div>
 </div>
@@ -266,10 +327,13 @@ hello world
     </td>
     <td width="2%"></td>
     <td>
+<div class="codetabs">
+
+<div data-lang="scala" markdown="1">
 {% highlight bash %}
-# TERMINAL 2: RUNNING NetworkWordCount or JavaNetworkWordCount
+# TERMINAL 2: RUNNING NetworkWordCount
 
-$ ./bin/run-example org.apache.spark.examples.streaming.NetworkWordCount localhost 9999
+$ ./bin/run-example streaming.NetworkWordCount localhost 9999
 ...
 -------------------------------------------
 Time: 1357008430000 ms
@@ -278,108 +342,213 @@ Time: 1357008430000 ms
 (world,1)
 ...
 {% endhighlight %}
-    </td>
-</table>
-
-You can also use Spark Streaming directly from the Spark shell:
+</div>
 
+<div data-lang="java" markdown="1">
 {% highlight bash %}
-$ bin/spark-shell
-{% endhighlight %}
+# TERMINAL 2: RUNNING JavaNetworkWordCount
 
-... and create your StreamingContext by wrapping the existing interactive shell
-SparkContext object, `sc`:
+$ ./bin/run-example streaming.JavaNetworkWordCount localhost 9999
+...
+-------------------------------------------
+Time: 1357008430000 ms
+-------------------------------------------
+(hello,1)
+(world,1)
+...
+{% endhighlight %}
+</div>
+<div data-lang="python" markdown="1">
+{% highlight bash %}
+# TERMINAL 2: RUNNING network_wordcount.py
 
-{% highlight scala %}
-val ssc = new StreamingContext(sc, Seconds(1))
+$ ./bin/spark-submit examples/src/main/python/streaming/network_wordcount.py localhost 9999
+...
+-------------------------------------------
+Time: 2014-10-14 15:25:21
+-------------------------------------------
+(hello,1)
+(world,1)
+...
 {% endhighlight %}
+</div>
+</div>    
+    </td>
+</table>
 
-When working with the shell, you may also need to send a `^D` to your netcat session
-to force the pipeline to print the word counts to the console at the sink.
 
-***************************************************************************************************  
+***************************************************************************************************
+***************************************************************************************************
 
-# Basics
+# Basic Concepts
 
 Next, we move beyond the simple example and elaborate on the basics of Spark Streaming that you
 need to know to write your streaming applications.
 
 ## Linking
 
-To write your own Spark Streaming program, you will have to add the following dependency to your
- SBT or Maven project:
+Similar to Spark, Spark Streaming is available through Maven Central. To write your own Spark Streaming program, you will have to add the following dependency to your SBT or Maven project.
+
+<div class="codetabs">
+<div data-lang="Maven" markdown="1">
+
+	<dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-streaming_{{site.SCALA_BINARY_VERSION}}</artifactId>
+        <version>{{site.SPARK_VERSION}}</version>
+    </dependency>
+</div>
+<div data-lang="SBT" markdown="1">
 
-    groupId = org.apache.spark
-    artifactId = spark-streaming_{{site.SCALA_BINARY_VERSION}}
-    version = {{site.SPARK_VERSION}}
+	libraryDependencies += "org.apache.spark" % "spark-streaming_{{site.SCALA_BINARY_VERSION}}" % "{{site.SPARK_VERSION}}"
+</div>
+</div>
 
-For ingesting data from sources like Kafka and Flume that are not present in the Spark
+For ingesting data from sources like Kafka, Flume, and Kinesis that are not present in the Spark
 Streaming core
  API, you will have to add the corresponding
 artifact `spark-streaming-xyz_{{site.SCALA_BINARY_VERSION}}` to the dependencies. For example,
 some of the common ones are as follows.
 
-
 <table class="table">
 <tr><th>Source</th><th>Artifact</th></tr>
 <tr><td> Kafka </td><td> spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}} </td></tr>
 <tr><td> Flume </td><td> spark-streaming-flume_{{site.SCALA_BINARY_VERSION}} </td></tr>
+<tr><td> Kinesis<br/></td><td>spark-streaming-kinesis-asl_{{site.SCALA_BINARY_VERSION}} [Apache Software License] </td></tr>
 <tr><td> Twitter </td><td> spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}} </td></tr>
 <tr><td> ZeroMQ </td><td> spark-streaming-zeromq_{{site.SCALA_BINARY_VERSION}} </td></tr>
 <tr><td> MQTT </td><td> spark-streaming-mqtt_{{site.SCALA_BINARY_VERSION}} </td></tr>
-<tr><td> </td><td></td></tr>
+<tr><td></td><td></td></tr>
 </table>
 
 For an up-to-date list, please refer to the
-[Apache repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.spark%22%20AND%20v%3A%22{{site.SPARK_VERSION}}%22)
+[Apache repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.spark%22%20AND%20v%3A%22{{site.SPARK_VERSION_SHORT}}%22)
 for the full list of supported sources and artifacts.
 
-## Initializing
+***
+
+## Initializing StreamingContext
+
+To initialize a Spark Streaming program, a **StreamingContext** object has to be created which is the main entry point of all Spark Streaming functionality.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-To initialize a Spark Streaming program in Scala, a
-[`StreamingContext`](api/scala/index.html#org.apache.spark.streaming.StreamingContext)
-object has to be created, which is the main entry point of all Spark Streaming functionality.
-A `StreamingContext` object can be created by using
+A [StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) object can be created from a [SparkConf](api/scala/index.html#org.apache.spark.SparkConf) object.
 
 {% highlight scala %}
-new StreamingContext(master, appName, batchDuration, [sparkHome], [jars])
+import org.apache.spark._
+import org.apache.spark.streaming._
+
+val conf = new SparkConf().setAppName(appName).setMaster(master)
+val ssc = new StreamingContext(conf, Seconds(1))
 {% endhighlight %}
-</div>
-<div data-lang="java" markdown="1">
 
-To initialize a Spark Streaming program in Java, a
-[`JavaStreamingContext`](api/scala/index.html#org.apache.spark.streaming.api.java.JavaStreamingContext)
-object has to be created, which is the main entry point of all Spark Streaming functionality.
-A `JavaStreamingContext` object can be created by using
+The `appName` parameter is a name for your application to show on the cluster UI.
+`master` is a [Spark, Mesos or YARN cluster URL](submitting-applications.html#master-urls),
+or a special __"local[\*]"__ string to run in local mode. In practice, when running on a cluster, 
+you will not want to hardcode `master` in the program,
+but rather [launch the application with `spark-submit`](submitting-applications.html) and
+receive it there. However, for local testing and unit tests, you can pass "local[\*]" to run Spark Streaming
+in-process (detects the number of cores in the local system). Note that this internally creates a [SparkContext](api/scala/index.html#org.apache.spark.SparkContext) (starting point of all Spark functionality) which can be accessed as `ssc.sparkContext`.
+
+The batch interval must be set based on the latency requirements of your application
+and available cluster resources. See the [Performance Tuning](#setting-the-right-batch-size)
+section for more details.
+
+A `StreamingContext` object can also be created from an existing `SparkContext` object.
 
 {% highlight scala %}
-new JavaStreamingContext(master, appName, batchInterval, [sparkHome], [jars])
+import org.apache.spark.streaming._
+
+val sc = ...                // existing SparkContext
+val ssc = new StreamingContext(sc, Seconds(1))
 {% endhighlight %}
+
+
 </div>
+<div data-lang="java" markdown="1">
+
+A [JavaStreamingContext](api/java/index.html?org/apache/spark/streaming/api/java/JavaStreamingContext.html) object can be created from a [SparkConf](api/java/index.html?org/apache/spark/SparkConf.html) object.
+
+{% highlight java %}
+import org.apache.spark.*;
+import org.apache.spark.streaming.api.java.*;
+
+SparkConf conf = new SparkConf().setAppName(appName).setMaster(master);
+JavaStreamingContext ssc = new JavaStreamingContext(conf, Duration(1000));
+{% endhighlight %}
+
+The `appName` parameter is a name for your application to show on the cluster UI.
+`master` is a [Spark, Mesos or YARN cluster URL](submitting-applications.html#master-urls),
+or a special __"local[\*]"__ string to run in local mode. In practice, when running on a cluster, 
+you will not want to hardcode `master` in the program,
+but rather [launch the application with `spark-submit`](submitting-applications.html) and
+receive it there. However, for local testing and unit tests, you can pass "local[*]" to run Spark Streaming
+in-process. Note that this internally creates a [JavaSparkContext](api/java/index.html?org/apache/spark/api/java/JavaSparkContext.html) (starting point of all Spark functionality) which can be accessed as `ssc.sparkContext`.
+
+The batch interval must be set based on the latency requirements of your application
+and available cluster resources. See the [Performance Tuning](#setting-the-right-batch-size)
+section for more details.
+
+A `JavaStreamingContext` object can also be created from an existing `JavaSparkContext`.
+
+{% highlight java %}
+import org.apache.spark.streaming.api.java.*;
+
+JavaSparkContext sc = ...   //existing JavaSparkContext
+JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000));
+{% endhighlight %} 
 </div>
+<div data-lang="python" markdown="1">
+
+A [StreamingContext](api/python/pyspark.streaming.html#pyspark.streaming.StreamingContext) object can be created from a [SparkContext](api/python/pyspark.html#pyspark.SparkContext) object.
 
-The `master` parameter is a standard [Spark cluster URL](programming-guide.html#master-urls)
-and can be "local" for local testing. The `appName` is a name of your program,
-which will be shown on your cluster's web UI. The `batchInterval` is the size of the batches,
-as explained earlier. Finally, the last two parameters are needed to deploy your code to a cluster
- if running in distributed mode, as described in the
- [Spark programming guide](programming-guide.html#deploying-code-on-a-cluster).
- Additionally, the underlying SparkContext can be accessed as
-`ssc.sparkContext`.
+{% highlight python %}
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+sc = SparkContext(master, appName)
+ssc = StreamingContext(sc, 1)
+{% endhighlight %}
+
+The `appName` parameter is a name for your application to show on the cluster UI.
+`master` is a [Spark, Mesos or YARN cluster URL](submitting-applications.html#master-urls),
+or a special __"local[\*]"__ string to run in local mode. In practice, when running on a cluster, 
+you will not want to hardcode `master` in the program,
+but rather [launch the application with `spark-submit`](submitting-applications.html) and
+receive it there. However, for local testing and unit tests, you can pass "local[\*]" to run Spark Streaming
+in-process (detects the number of cores in the local system).
 
 The batch interval must be set based on the latency requirements of your application
 and available cluster resources. See the [Performance Tuning](#setting-the-right-batch-size)
 section for more details.
+</div>
+</div>
+
+After a context is defined, you have to do the follow steps.
+
+1. Define the input sources.
+1. Setup the streaming computations.
+1. Start the receiving and procesing of data using `streamingContext.start()`.
+1. The processing will continue until `streamingContext.stop()` is called.
 
-## DStreams
-*Discretized Stream* or *DStream* is the basic abstraction provided by Spark Streaming.
+##### Points to remember:
+{:.no_toc}
+- Once a context has been started, no new streaming computations can be setup or added to it.
+- Once a context has been stopped, it cannot be started (that is, re-used) again.
+- Only one StreamingContext can be active in a JVM at the same time.
+- stop() on StreamingContext also stops the SparkContext. To stop only the StreamingContext, set optional parameter of `stop()` called `stopSparkContext` to false.
+- A SparkContext can be re-used to create multiple StreamingContexts, as long as the previous StreamingContext is stopped (without stopping the SparkContext) before the next StreamingContext is created.
+
+***
+
+## Discretized Streams (DStreams)
+**Discretized Stream** or **DStream** is the basic abstraction provided by Spark Streaming.
 It represents a continuous stream of data, either the input data stream received from source,
 or the processed data stream generated by transforming the input stream. Internally,
-it is represented by a continuous sequence of RDDs, which is Spark's abstraction of an immutable,
-distributed dataset. Each RDD in a DStream contains data from a certain interval,
+a DStream is represented by a continuous series of RDDs, which is Spark's abstraction of an immutable,
+distributed dataset (see [Spark Programming Guide](programming-guide.html#resilient-distributed-datasets-rdds) for more details). Each RDD in a DStream contains data from a certain interval,
 as shown in the following figure.
 
 <p style="text-align: center;">
@@ -391,8 +560,8 @@ as shown in the following figure.
 
 Any operation applied on a DStream translates to operations on the underlying RDDs. For example,
 in the [earlier example](#a-quick-example) of converting a stream of lines to words,
-the `flatmap` operation is applied on each RDD in the `lines` DStream to generate the RDDs of the
- `words` DStream. This is shown the following figure.
+the `flatMap` operation is applied on each RDD in the `lines` DStream to generate the RDDs of the
+ `words` DStream. This is shown in the following figure.
 
 <p style="text-align: center;">
   <img src="img/streaming-dstream-ops.png"
@@ -403,80 +572,123 @@ the `flatmap` operation is applied on each RDD in the `lines` DStream to generat
 
 
 These underlying RDD transformations are computed by the Spark engine. The DStream operations
-hide most of these details and provides the developer with higher-level API for convenience.
+hide most of these details and provide the developer with higher-level API for convenience.
 These operations are discussed in detail in later sections.
 
-## Input Sources
+***
+
+## Input DStreams
+Input DStreams are DStreams representing the stream of raw data received from streaming sources.
+Spark Streaming has two categories of streaming sources.
+
+- *Basic sources*: Sources directly available in the StreamingContext API. Example: file systems, socket connections, and Akka actors.
+- *Advanced sources*: Sources like Kafka, Flume, Kinesis, Twitter, etc. are available through extra utility classes. These require linking against extra dependencies as discussed in the [linking](#linking) section.
+
+Every input DStream (except file stream) is associated with a single [Receiver](api/scala/index.html#org.apache.spark.streaming.receiver.Receiver) object which receives the data from a source and stores it in Spark's memory for processing. So every input DStream receives a single stream of data. Note that in a streaming application, you can create multiple input DStreams to receive multiple streams of data in parallel. This is discussed later in the [Performance Tuning](#level-of-parallelism-in-data-receiving) section.
+
+A receiver is run within a Spark worker/executor as a long-running task, hence it occupies one of the cores allocated to the Spark Streaming application. Hence, it is important to remember that Spark Streaming application needs to be allocated enough cores to process the received data, as well as, to run the receiver(s). Therefore, few important points to remember are:
+
+##### Points to remember
+{:.no_toc}
+- If the number of threads allocated to the application is less than or equal to the number of input DStreams / receivers, then the system will receive data, but not be able to process them.
+- When running locally, if you master URL is set to "local", then there is only one core to run tasks.  That is insufficient for programs using a DStream as the receiver (file streams are okay).  So, a "local" master URL in a streaming app is generally going to cause starvation for the processor.  
+Thus in any streaming app, you generally will want to allocate more than one thread (i.e. set your master to "local[2]") when testing locally.
+See [Spark Properties] (configuration.html#spark-properties.html).
+  
+### Basic Sources
+{:.no_toc}
 
 We have already taken a look at the `ssc.socketTextStream(...)` in the [quick
 example](#a-quick-example) which creates a DStream from text
-data received over a TCP socket connection. Besides sockets, the core Spark Streaming API provides
+data received over a TCP socket connection. Besides sockets, the StreamingContext API provides
 methods for creating DStreams from files and Akka actors as input sources.
 
-Specifically, for files, the DStream can be created as
+- **File Streams:** For reading data from files on any file system compatible with the HDFS API (that is, HDFS, S3, NFS, etc.), a DStream can be created as
 
-<div class="codetabs">
-<div data-lang="scala">
-{% highlight scala %}
-ssc.fileStream(dataDirectory)
-{% endhighlight %}
-</div>
-<div data-lang="java">
-{% highlight java %}
-jssc.fileStream(dataDirectory);
-{% endhighlight %}
-</div>
-</div>
+    <div class="codetabs">
+    <div data-lang="scala" markdown="1">
+		streamingContext.fileStream[keyClass, valueClass, inputFormatClass](dataDirectory)
+    </div>
+    <div data-lang="java" markdown="1">
+		streamingContext.fileStream<keyClass, valueClass, inputFormatClass>(dataDirectory);
+    </div>
+    <div data-lang="python" markdown="1">
+		streamingContext.textFileStream(dataDirectory)
+    </div>
+    </div>
+
+	Spark Streaming will monitor the directory `dataDirectory` and process any files created in that directory (files written in nested directories not supported). Note that
 
-Spark Streaming will monitor the directory `dataDirectory` for any Hadoop-compatible filesystem
-and process any files created in that directory. Note that
+     + The files must have the same data format.
+     + The files must be created in the `dataDirectory` by atomically *moving* or *renaming* them into
+     the data directory.
+     + Once moved, the files must not be changed. So if the files are being continuously appended, the new data will not be read.
 
- * The files must have the same data format.
- * The files must be created in the `dataDirectory` by atomically *moving* or *renaming* them into
- the data directory.
- * Once moved the files must not be changed.
+	For simple text files, there is an easier method `streamingContext.textFileStream(dataDirectory)`. And file streams do not require running a receiver, hence does not require allocating cores.
 
-For more details on streams from files, Akka actors and sockets,
+- **Streams based on Custom Actors:** DStreams can be created with data streams received through Akka actors by using `streamingContext.actorStream(actorProps, actor-name)`. See the [Custom Receiver Guide](streaming-custom-receivers.html#implementing-and-using-a-custom-actor-based-receiver) for more details.
+
+- **Queue of RDDs as a Stream:** For testing a Spark Streaming application with test data, one can also create a DStream based on a queue of RDDs, using `streamingContext.queueStream(queueOfRDDs)`. Each RDD pushed into the queue will be treated as a batch of data in the DStream, and processed like a stream.
+
+For more details on streams from sockets, files, and actors,
 see the API documentations of the relevant functions in
 [StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) for
-Scala and [JavaStreamingContext](api/scala/index.html#org.apache.spark.streaming.api.java.JavaStreamingContext)
- for Java.
+Scala and [JavaStreamingContext](api/java/index.html?org/apache/spark/streaming/api/java/JavaStreamingContext.html) for Java.
 
-Additional functionality for creating DStreams from sources such as Kafka, Flume, and Twitter
-can be imported by adding the right dependencies as explained in an
-[earlier](#linking) section. To take the
-case of Kafka, after adding the artifact `spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}` to the
-project dependencies, you can create a DStream from Kafka as
+### Advanced Sources
+{:.no_toc}
+This category of sources require interfacing with external non-Spark libraries, some of them with complex dependencies (e.g., Kafka and Flume). Hence, to minimize issues related to version conflicts of dependencies, the functionality to create DStreams from these sources have been moved to separate libraries, that can be [linked to](#linking) explicitly as necessary.  For example, if you want to create a DStream using data from Twitter's stream of tweets, you have to do the following.
+
+1. *Linking*: Add the artifact `spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}}` to the SBT/Maven project dependencies.
+1. *Programming*: Import the `TwitterUtils` class and create a DStream with `TwitterUtils.createStream` as shown below.
+1. *Deploying*: Generate an uber JAR with all the dependencies (including the dependency `spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}}` and its transitive dependencies) and then deploy the application. This is further explained in the [Deploying section](#deploying-applications).
 
 <div class="codetabs">
 <div data-lang="scala">
 {% highlight scala %}
-import org.apache.spark.streaming.kafka._
-KafkaUtils.createStream(ssc, kafkaParams, ...)
+import org.apache.spark.streaming.twitter._
+
+TwitterUtils.createStream(ssc)
 {% endhighlight %}
 </div>
 <div data-lang="java">
 {% highlight java %}
-import org.apache.spark.streaming.kafka.*;
-KafkaUtils.createStream(jssc, kafkaParams, ...);
+import org.apache.spark.streaming.twitter.*;
+
+TwitterUtils.createStream(jssc);
 {% endhighlight %}
 </div>
 </div>
 
-For more details on these additional sources, see the corresponding [API documentation](#where-to-go-from-here).
-Furthermore, you can also implement your own custom receiver for your sources. See the
-[Custom Receiver Guide](streaming-custom-receivers.html).
+Note that these advanced sources are not available in the `spark-shell`, hence applications based on these
+advanced sources cannot be tested in the shell.
+
+Some of these advanced sources are as follows.
+
+- **Twitter:** Spark Streaming's TwitterUtils uses Twitter4j 3.0.3 to get the public stream of tweets using
+    [Twitter's Streaming API](https://dev.twitter.com/docs/streaming-apis). Authentication information
+    can be provided by any of the [methods](http://twitter4j.org/en/configuration.html) supported by
+    Twitter4J library. You can either get the public stream, or get the filtered stream based on a
+    keywords. See the API documentation ([Scala](api/scala/index.html#org.apache.spark.streaming.twitter.TwitterUtils$), [Java](api/java/index.html?org/apache/spark/streaming/twitter/TwitterUtils.html)) and examples ([TwitterPopularTags]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala) and
+    [TwitterAlgebirdCMS]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdCMS.scala)).
+
+- **Flume:** Spark Streaming {{site.SPARK_VERSION_SHORT}} can received data from Flume 1.4.0. See the [Flume Integration Guide](streaming-flume-integration.html) for more details.
+
+- **Kafka:** Spark Streaming {{site.SPARK_VERSION_SHORT}} can receive data from Kafka 0.8.0. See the [Kafka Integration Guide](streaming-kafka-integration.html) for more details.
 
-## Operations
-There are two kinds of DStream operations - _transformations_ and _output operations_. Similar to
-RDD transformations, DStream transformations operate on one or more DStreams to create new DStreams
-with transformed data. After applying a sequence of transformations to the input streams, output
-operations need to called, which write data out to an external data sink, such as a filesystem or a
-database.
+- **Kinesis:** See the [Kinesis Integration Guide](streaming-kinesis-integration.html) for more details.
 
-### Transformations
-DStreams support many of the transformations available on normal Spark RDD's. Some of the
-common ones are as follows.
+### Custom Sources
+{:.no_toc}
+Input DStreams can also be created out of custom data sources. All you have to do is implement an user-defined **receiver** (see next section to understand what that is) that can receive data from the custom sources and push it into Spark. See the
+[Custom Receiver Guide](streaming-custom-receivers.html) for details.
+
+***
+
+## Transformations on DStreams
+Similar to that of RDDs, transformations allow the data from the input DStream to be modified.
+DStreams support many of the transformations available on normal Spark RDD's. 
+Some of the common ones are as follows.
 
 <table class="table">
 <tr><th style="width:25%">Transformation</th><th>Meaning</th></tr>
@@ -553,8 +765,8 @@ common ones are as follows.
 
 The last two transformations are worth highlighting again.
 
-<h4>UpdateStateByKey Operation</h4>
-
+#### UpdateStateByKey Operation
+{:.no_toc}
 The `updateStateByKey` operation allows you to maintain arbitrary state while continuously updating
 it with new information. To use this, you will have to do two steps.
 
@@ -604,16 +816,33 @@ This is applied on a DStream containing words (say, the `pairs` DStream containi
 JavaPairDStream<String, Integer> runningCounts = pairs.updateStateByKey(updateFunction);
 {% endhighlight %}
 
+</div>
+<div data-lang="python" markdown="1">
+
+{% highlight python %}
+def updateFunction(newValues, runningCount):
+    if runningCount is None:
+       runningCount = 0
+    return sum(newValues, runningCount)  # add the new values with the previous running count to get the new count
+{% endhighlight %}
+
+This is applied on a DStream containing words (say, the `pairs` DStream containing `(word,
+1)` pairs in the [earlier example](#a-quick-example)).
+
+{% highlight python %}
+runningCounts = pairs.updateStateByKey(updateFunction)
+{% endhighlight %}
+
 </div>
 </div>
 
 The update function will be called for each word, with `newValues` having a sequence of 1's (from
 the `(word, 1)` pairs) and the `runningCount` having the previous count. For the complete
 Scala code, take a look at the example
-[StatefulNetworkWordCount]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala).
-
-<h4>Transform Operation</h4>
+[stateful_network_wordcount.py]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/python/streaming/stateful_network_wordcount.py).
 
+#### Transform Operation
+{:.no_toc}
 The `transform` operation (along with its variations like `transformWith`) allows
 arbitrary RDD-to-RDD functions to be applied on a DStream. It can be used to apply any RDD
 operation that is not exposed in the DStream API.
@@ -652,14 +881,23 @@ JavaPairDStream<String, Integer> cleanedDStream = wordCounts.transform(
   });
 {% endhighlight %}
 
+</div>
+<div data-lang="python" markdown="1">
+
+{% highlight python %}
+spamInfoRDD = sc.pickleFile(...) # RDD containing spam information
+
+# join data stream with spam information to do data cleaning
+cleanedDStream = wordCounts.transform(lambda rdd: rdd.join(spamInfoRDD).filter(...))
+{% endhighlight %}
 </div>
 </div>
 
 In fact, you can also use [machine learning](mllib-guide.html) and
 [graph computation](graphx-programming-guide.html) algorithms in the `transform` method.
 
-<h4>Window Operations</h4>
-
+#### Window Operations
+{:.no_toc}
 Finally, Spark Streaming also provides *windowed computations*, which allow you to apply
 transformations over a sliding window of data. This following figure illustrates this sliding
 window.
@@ -674,11 +912,11 @@ window.
 As shown in the figure, every time the window *slides* over a source DStream,
 the source RDDs that fall within the window are combined and operated upon to produce the
 RDDs of the windowed DStream. In this specific case, the operation is applied over last 3 time
-units of data, and slides by 2 time units. This shows that any window-based operation needs to
+units of data, and slides by 2 time units. This shows that any window operation needs to
 specify two parameters.
 
  * <i>window length</i> - The duration of the window (3 in the figure)
- * <i>slide interval</i> - The interval at which the window-based operation is performed (2 in
+ * <i>sliding interval</i> - The interval at which the window operation is performed (2 in
  the figure).
 
 These two parameters must be multiples of the batch interval of the source DStream (1 in the
@@ -713,10 +951,18 @@ Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer
 JavaPairDStream<String, Integer> windowedWordCounts = pairs.reduceByKeyAndWindow(reduceFunc, new Duration(30000), new Duration(10000));
 {% endhighlight %}
 
+</div>
+<div data-lang="python" markdown="1">
+
+{% highlight python %}
+# Reduce last 30 seconds of data, every 10 seconds
+windowedWordCounts = pairs.reduceByKeyAndWindow(lambda x, y: x + y, lambda x, y: x - y, 30, 10)
+{% endhighlight %}
+
 </div>
 </div>
 
-Some of the common window-based operations are as follows. All of these operations take the
+Some of the common window operations are as follows. All of these operations take the
 said two parameters - <i>windowLength</i> and <i>slideInterval</i>.
 
 <table class="table">
@@ -774,21 +1020,31 @@ said two parameters - <i>windowLength</i> and <i>slideInterval</i>.
 <tr><td></td><td></td></tr>
 </table>
 
-### Output Operations
-When an output operator is called, it triggers the computation of a stream. Currently the following
-output operators are defined:
+
+The complete list of DStream transformations is available in the API documentation. For the Scala API,
+see [DStream](api/scala/index.html#org.apache.spark.streaming.dstream.DStream)
+and [PairDStreamFunctions](api/scala/index.html#org.apache.spark.streaming.dstream.PairDStreamFunctions).
+For the Java API, see [JavaDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaDStream.html)
+and [JavaPairDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaPairDStream.html).
+For the Python API, see [DStream](api/python/pyspark.streaming.html#pyspark.streaming.DStream)
+
+***
+
+## Output Operations on DStreams
+Output operations allow DStream's data to be pushed out external systems like a database or a file systems.
+Since the output operations actually allow the transformed data to be consumed by external systems,
+they trigger the actual execution of all the DStream transformations (similar to actions for RDDs). 
+Currently, the following output operations are defined:
 
 <table class="table">
 <tr><th style="width:30%">Output Operation</th><th>Meaning</th></tr>
 <tr>
-  <td> <b>print</b>() </td>
-  <td> Prints first ten elements of every batch of data in a DStream on the driver. </td>
-</tr>
-<tr>
-  <td> <b>foreachRDD</b>(<i>func</i>) </td>
-  <td> The fundamental output operator. Applies a function, <i>func</i>, to each RDD generated from
-  the stream. This function should have side effects, such as printing output, saving the RDD to
-  external files, or writing it over the network to an external system. </td>
+  <td> <b>print</b>()</td>
+  <td> Prints first ten elements of every batch of data in a DStream on the driver. 
+  This is useful for development and debugging. 
+  <br/>
+  <b>PS</b>: called <b>pprint</b>() in Python)
+  </td>
 </tr>
 <tr>
   <td> <b>saveAsObjectFiles</b>(<i>prefix</i>, [<i>suffix</i>]) </td>
@@ -807,17 +1063,162 @@ output operators are defined:
   <td> Save this DStream's contents as a Hadoop file. The file name at each batch interval is
   generated based on <i>prefix</i> and <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>. </td>
 </tr>
+<tr>
+  <td> <b>foreachRDD</b>(<i>func</i>) </td>
+  <td> The most generic output operator that applies a function, <i>func</i>, to each RDD generated from
+  the stream. This function should push the data in each RDD to a external system, like saving the RDD to
+  files, or writing it over the network to a database. Note that the function <i>func</i> is executed
+  at the driver, and will usually have RDD actions in it that will force the computation of the streaming RDDs.</td>
+</tr>
 <tr><td></td><td></td></tr>
 </table>
 
+### Design Patterns for using foreachRDD
+{:.no_toc}
+`dstream.foreachRDD` is a powerful primitive that allows data to sent out to external systems.
+However, it is important to understand how to use this primitive correctly and efficiently.
+Some of the common mistakes to avoid are as follows.
 
-The complete list of DStream operations is available in the API documentation. For the Scala API,
-see [DStream](api/scala/index.html#org.apache.spark.streaming.dstream.DStream)
-and [PairDStreamFunctions](api/scala/index.html#org.apache.spark.streaming.dstream.PairDStreamFunctions).
-For the Java API, see [JavaDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaDStream.html)
-and [JavaPairDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaPairDStream.html).
+- Often writing data to external system requires creating a connection object
+(e.g. TCP connection to a remote server) and using it to send data to a remote system.
+For this purpose, a developer may inadvertantly try creating a connection object at
+the Spark driver, but try to use it in a Spark worker to save records in the RDDs.
+For example (in Scala),
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+{% highlight scala %}
+        dstream.foreachRDD(rdd => {
+            val connection = createNewConnection()  // executed at the driver
+            rdd.foreach(record => {
+                connection.send(record) // executed at the worker
+            })
+        })
+{% endhighlight %}
+
+</div>
+<div data-lang="python" markdown="1">
+
+{% highlight python %}
+def sendRecord(rdd):
+    connection = createNewConnection()  # executed at the driver
+    rdd.foreach(lambda record: connection.send(record))
+    connection.close()
+        
+dstream.foreachRDD(sendRecord)
+{% endhighlight %}
+
+</div>
+</div>
 
-## Persistence
+  This is incorrect as this requires the connection object to be serialized and sent from the driver to the worker. Such connection objects are rarely transferrable across machines. This error may manifest as serialization errors (connection object not serializable), initialization errors (connection object needs to be initialized at the workers), etc. The correct solution is to create the connection object at the worker.
+
+- However, this can lead to another common mistake - creating a new connection for every record. For example,
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+{% highlight scala %}
+        dstream.foreachRDD(rdd => {
+            rdd.foreach(record => {
+                val connection = createNewConnection()
+                connection.send(record)
+                connection.close()
+            })
+        })
+{% endhighlight %}
+
+</div>
+<div data-lang="python" markdown="1">
+
+{% highlight python %}
+def sendRecord(record):
+    connection = createNewConnection()
+    connection.send(record)
+    connection.close()
+        
+dstream.foreachRDD(lambda rdd: rdd.foreach(sendRecord))
+{% endhighlight %}
+
+</div>
+</div>
+
+  Typically, creating a connection object has time and resource overheads. Therefore, creating and destroying a connection object for each record can incur unnecessarily high overheads and can significantly reduce the overall throughput of the system. A better solution is to use `rdd.foreachPartition` - create a single connection object and send all the records in a RDD partition using that connection.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+        dstream.foreachRDD(rdd => {
+            rdd.foreachPartition(partitionOfRecords => {
+                val connection = createNewConnection()
+                partitionOfRecords.foreach(record => connection.send(record))
+                connection.close()
+            })
+        })
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+def sendPartition(iter):
+    connection = createNewConnection()
+    for record in iter:
+        connection.send(record)
+    connection.close()
+    
+dstream.foreachRDD(lambda rdd: rdd.foreachPartition(sendPartition))
+{% endhighlight %}
+</div>
+</div>    
+
+  This amortizes the connection creation overheads over many records.
+
+- Finally, this can be further optimized by reusing connection objects across multiple RDDs/batches.
+	One can maintain a static pool of connection objects than can be reused as
+    RDDs of multiple batches are pushed to the external system, thus further reducing the overheads.
+    
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+        dstream.foreachRDD(rdd => {
+            rdd.foreachPartition(partitionOfRecords => {
+                // ConnectionPool is a static, lazily initialized pool of connections
+                val connection = ConnectionPool.getConnection()
+                partitionOfRecords.foreach(record => connection.send(record))
+                ConnectionPool.returnConnection(connection)  // return to the pool for future reuse
+            })
+        })
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+def sendPartition(iter):
+    # ConnectionPool is a static, lazily initialized pool of connections
+    connection = ConnectionPool.getConnection()
+    for record in iter:
+        connection.send(record)
+    # return to the pool for future reuse
+    ConnectionPool.returnConnection(connection)
+    
+dstream.foreachRDD(lambda rdd: rdd.foreachPartition(sendPartition))
+{% endhighlight %}
+</div>
+</div> 
+
+Note that the connections in the pool should be lazily created on demand and timed out if not used for a while. This achieves the most efficient sending of data to external systems.
+
+
+##### Other points to remember:
+{:.no_toc}
+- DStreams are executed lazily by the output operations, just like RDDs are lazily executed by RDD actions. Specifically, RDD actions inside the DStream output operations force the processing of the received data. Hence, if your application does not have any output operation, or has output operations like `dstream.foreachRDD()` without any RDD action inside them, then nothing will get executed. The system will simply receive the data and discard it.
+
+- By default, output operations are executed one-at-a-time. And they are executed in the order they are defined in the application.
+
+***
+
+## Caching / Persistence
 Similar to RDDs, DStreams also allow developers to persist the stream's data in memory. That is,
 using `persist()` method on a DStream would automatically persist every RDD of that DStream in
 memory. This is useful if the data in the DStream will be computed multiple times (e.g., multiple
@@ -834,7 +1235,9 @@ memory. This is further discussed in the [Performance Tuning](#memory-tuning) se
 information on different persistence levels can be found in
 [Spark Programming Guide](programming-guide.html#rdd-persistence).
 
-## RDD Checkpointing
+***
+
+## Checkpointing
 A _stateful operation_ is one which operates over multiple batches of data. This includes all
 window-based operations and the `updateStateByKey` operation. Since stateful operations have a
 dependency on previous batches of data, they continuously accumulate metadata over time.
@@ -863,10 +1266,19 @@ For DStreams that must be checkpointed (that is, DStreams created by `updateStat
 `reduceByKeyAndWindow` with inverse function), the checkpoint interval of the DStream is by
 default set to a multiple of the DStream's sliding interval such that its at least 10 seconds.
 
-## Deployment
+***
+
+## Deploying Applications
 A Spark Streaming application is deployed on a cluster in the same way as any other Spark application.
 Please refer to the [deployment guide](cluster-overview.html) for more details.
 
+Note that the applications
+that use [advanced sources](#advanced-sources) (e.g. Kafka, Flume, Twitter) are also required to package the
+extra artifact they link to, along with their dependencies, in the JAR that is used to deploy the application.
+For example, an application using `TwitterUtils` will have to include
+`spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}}` and all its transitive
+dependencies in the application JAR.
+
 If a running Spark Streaming application needs to be upgraded (with new application code), then
 there are two possible mechanism.
 
@@ -885,7 +1297,9 @@ application left off. Note that this can be done only with input sources that su
 (like Kafka, and Flume) as data needs to be buffered while the previous application down and
 the upgraded application is not yet up.
 
-## Monitoring
+***
+
+## Monitoring Applications
 Beyond Spark's [monitoring capabilities](monitoring.html), there are additional capabilities
 specific to Spark Streaming. When a StreamingContext is used, the
 [Spark web UI](monitoring.html#web-interfaces) shows
@@ -908,22 +1322,18 @@ The progress of a Spark Streaming program can also be monitored using the
 which allows you to get receiver status and processing times. Note that this is a developer API
 and it is likely to be improved upon (i.e., more information reported) in the future.
 
-***************************************************************************************************  
+***************************************************************************************************
+***************************************************************************************************
 
 # Performance Tuning
 Getting the best performance of a Spark Streaming application on a cluster requires a bit of
 tuning. This section explains a number of the parameters and configurations that can tuned to
 improve the performance of you application. At a high level, you need to consider two things:
 
-<ol>
-<li>
-  Reducing the processing time of each batch of data by efficiently using cluster resources.
-</li>
-<li>
-  Setting the right batch size such that the batches of data can be processed as fast as they
-  are received (that is, data processing keeps up with the data ingestion).
-</li>
-</ol>
+1. Reducing the processing time of each batch of data by efficiently using cluster resources.
+
+2. Setting the right batch size such that the batches of data can be processed as fast as they
+  	are received (that is, data processing keeps up with the data ingestion).
 
 ## Reducing the Processing Time of each Batch
 There are a number of optimizations that can be done in Spark to minimize the processing time of
@@ -931,15 +1341,41 @@ each batch. These have been discussed in detail in [Tuning Guide](tuning.html).
 highlights some of the most important ones.
 
 ### Level of Parallelism in Data Receiving
+{:.no_toc}
 Receiving data over the network (like Kafka, Flume, socket, etc.) requires the data to deserialized
 and stored in Spark. If the data receiving becomes a bottleneck in the system, then consider
 parallelizing the data receiving. Note that each input DStream
 creates a single receiver (running on a worker machine) that receives a single stream of data.
 Receiving multiple data streams can therefore be achieved by creating multiple input DStreams
 and configuring them to receive different partitions of the data stream from the source(s).
-For example, a single Kafka input stream receiving two topics of data can be split into two
+For example, a single Kafka input DStream receiving two topics of data can be split into two
 Kafka input streams, each receiving only one topic. This would run two receivers on two workers,
-thus allowing data to received in parallel, and increasing overall throughput.
+thus allowing data to be received in parallel, and increasing overall throughput. These multiple
+DStream can be unioned together to create a single DStream. Then the transformations that was
+being applied on the single input DStream can applied on the unified stream. This is done as follows.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+val numStreams = 5
+val kafkaStreams = (1 to numStreams).map { i => KafkaUtils.createStream(...) }
+val unifiedStream = streamingContext.union(kafkaStreams)
+unifiedStream.print()
+{% endhighlight %}
+</div>
+<div data-lang="java" markdown="1">
+{% highlight java %}
+int numStreams = 5;
+List<JavaPairDStream<String, String>> kafkaStreams = new ArrayList<JavaPairDStream<String, String>>(numStreams);
+for (int i = 0; i < numStreams; i++) {
+  kafkaStreams.add(KafkaUtils.createStream(...));
+}
+JavaPairDStream<String, String> unifiedStream = streamingContext.union(kafkaStreams.get(0), kafkaStreams.subList(1, kafkaStreams.size()));
+unifiedStream.print();
+{% endhighlight %}
+</div>
+</div>
+
 
 Another parameter that should be considered is the receiver's blocking interval. For most receivers,
 the received data is coalesced together into large blocks of data before storing inside Spark's memory.
@@ -954,7 +1390,8 @@ This distributes the received batches of data across specified number of machine
 before further processing.
 
 ### Level of Parallelism in Data Processing
-Cluster resources maybe under-utilized if the number of parallel tasks used in any stage of the
+{:.no_toc}
+Cluster resources can be under-utilized if the number of parallel tasks used in any stage of the
 computation is not high enough. For example, for distributed reduce operations like `reduceByKey`
 and `reduceByKeyAndWindow`, the default number of parallel tasks is decided by the [config property]
 (configuration.html#spark-properties) `spark.default.parallelism`. You can pass the level of
@@ -964,6 +1401,7 @@ documentation), or set the [config property](configuration.html#spark-properties
 `spark.default.parallelism` to change the default.
 
 ### Data Serialization
+{:.no_toc}
 The overhead of data serialization can be significant, especially when sub-second batch sizes are
  to be achieved. There are two aspects to it.
 
@@ -976,11 +1414,12 @@ The overhead of data serialization can be significant, especially when sub-secon
   serialization format. Hence, the deserialization overhead of input data may be a bottleneck.
 
 ### Task Launching Overheads
+{:.no_toc}
 If the number of tasks launched per second is high (say, 50 or more per second), then the overhead
 of sending out tasks to the slaves maybe significant and will make it hard to achieve sub-second
 latencies. The overhead can be reduced by the following changes:
 
-* **Task Serialization**: Using Kryo serialization for serializing tasks can reduced the task
+* **Task Serialization**: Using Kryo serialization for serializing tasks can reduce the task
   sizes, and therefore reduce the time taken to send them to the slaves.
 
 * **Execution mode**: Running Spark in Standalone mode or coarse-grained Mesos mode leads to
@@ -990,6 +1429,8 @@ latencies. The overhead can be reduced by the following changes:
 These changes may reduce batch processing time by 100s of milliseconds,
 thus allowing sub-second batch size to be viable.
 
+***
+
 ## Setting the Right Batch Size
 For a Spark Streaming application running on a cluster to be stable, the system should be able to
 process data as fast as it is being received. In other words, batches of data should be processed
@@ -1018,6 +1459,8 @@ data rate and/or reducing the batch size. Note that momentary increase in the de
 temporary data rate increases maybe fine as long as the delay reduces back to a low value
 (i.e., less than batch size).
 
+***
+
 ## Memory Tuning
 Tuning the memory usage and GC behavior of Spark applications have been discussed in great detail
 in the [Tuning Guide](tuning.html). It is recommended that you read that. In this section,
@@ -1033,7 +1476,7 @@ Even though keeping the data serialized incurs higher serialization/deserializat
 it significantly reduces GC pauses.
 
 * **Clearing persistent RDDs**: By default, all persistent RDDs generated by Spark Streaming will
- be cleared from memory based on Spark's in-built policy (LRU). If `spark.cleaner.ttl` is set,
+ be cleared from memory based on Spark's built-in policy (LRU). If `spark.cleaner.ttl` is set,
  then persistent RDDs that are older than that value are periodically cleared. As mentioned
  [earlier](#operation), this needs to be careful set based on operations used in the Spark
  Streaming program. However, a smarter unpersisting of RDDs can be enabled by setting the
@@ -1047,7 +1490,8 @@ minimizes the variability of GC pauses. Even though concurrent GC is known to re
 overall processing throughput of the system, its use is still recommended to achieve more
 consistent batch processing times.
 
-***************************************************************************************************  
+***************************************************************************************************
+***************************************************************************************************
 
 # Fault-tolerance Properties
 In this section, we are going to discuss the behavior of Spark Streaming application in the event
@@ -1120,7 +1564,7 @@ def functionToCreateContext(): StreamingContext = {
     ssc
 }
 
-// Get StreaminContext from checkpoint data or create a new one
+// Get StreamingContext from checkpoint data or create a new one
 val context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext _)
 
 // Do additional setup on context that needs to be done,
@@ -1174,15 +1618,50 @@ context.awaitTermination();
 If the `checkpointDirectory` exists, then the context will be recreated from the checkpoint data.
 If the directory does not exist (i.e., running for the first time),
 then the function `contextFactory` will be called to create a new
-context and set up the DStreams. See the Scala example
-[JavaRecoverableWordCount]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming/JavaRecoverableWordCount.scala)
-(note that this example is missing in the 0.9 release, so you can test it using the master branch).
-This example appends the word counts of network data into a file.
+context and set up the DStreams.
 
 You can also explicitly create a `JavaStreamingContext` from the checkpoint data and start
 the computation by using `new JavaStreamingContext(checkpointDirectory)`.
 
 </div>
+<div data-lang="python" markdown="1">
+
+This behavior is made simple by using `StreamingContext.getOrCreate`. This is used as follows.
+
+{% highlight python %}
+# Function to create and setup a new StreamingContext
+def functionToCreateContext():
+    sc = SparkContext(...)   # new context
+    ssc = new StreamingContext(...)  
+    lines = ssc.socketTextStream(...) # create DStreams
+    ...
+    ssc.checkpoint(checkpointDirectory)   # set checkpoint directory
+    return ssc
+
+# Get StreamingContext from checkpoint data or create a new one
+context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext)
+
+# Do additional setup on context that needs to be done,
+# irrespective of whether it is being started or restarted
+context. ...
+
+# Start the context
+context.start()
+context.awaitTermination()
+{% endhighlight %}
+
+If the `checkpointDirectory` exists, then the context will be recreated from the checkpoint data.
+If the directory does not exist (i.e., running for the first time),
+then the function `functionToCreateContext` will be called to create a new
+context and set up the DStreams. See the Python example
+[recoverable_network_wordcount.py]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python/streaming/recoverable_network_wordcount.py).
+This example appends the word counts of network data into a file.
+
+You can also explicitly create a `StreamingContext` from the checkpoint data and start the
+ computation by using `StreamingContext.getOrCreate(checkpointDirectory, None)`.
+
+</div>
+
 </div>
 
 **Note**: If Spark Streaming and/or the Spark Streaming program is recompiled,
@@ -1204,7 +1683,8 @@ automatically restarted, and the word counts will cont
 For other deployment environments like Mesos and Yarn, you have to restart the driver through other
 mechanisms.
 
-<h4>Recovery Semantics</h4>
+#### Recovery Semantics
+{:.no_toc}
 
 There are two different failure behaviors based on which input sources are used.
 
@@ -1302,7 +1782,8 @@ in the file. This is what the sequence of outputs would be with and without a dr
 If the driver had crashed in the middle of the processing of time 3, then it will process time 3
 and output 30 after recovery.
 
-***************************************************************************************************  
+***************************************************************************************************
+***************************************************************************************************
 
 # Migration Guide from 0.9.1 or below to 1.x
 Between Spark 0.9.1 and Spark 1.0, there were a few API changes made to ensure future API stability.
@@ -1328,7 +1809,7 @@ replaced by [Receiver](api/scala/index.html#org.apache.spark.streaming.receiver.
 the following advantages.
 
 * Methods like `stop` and `restart` have been added to for better control of the lifecycle of a receiver. See
-the [custom receiver guide](streaming-custom-receiver.html) for more details.
+the [custom receiver guide](streaming-custom-receivers.html) for more details.
 * Custom receivers can be implemented using both Scala and Java.
 
 To migrate your existing custom receivers from the earlier NetworkReceiver to the new Receiver, you have
@@ -1352,6 +1833,7 @@ the `org.apache.spark.streaming.receivers` package were also moved
 to [`org.apache.spark.streaming.receiver`](api/scala/index.html#org.apache.spark.streaming.receiver.package)
 package and renamed for better clarity.
 
+***************************************************************************************************
 ***************************************************************************************************
 
 # Where to Go from Here
@@ -1362,6 +1844,7 @@ package and renamed for better clarity.
   [DStream](api/scala/index.html#org.apache.spark.streaming.dstream.DStream)
     * [KafkaUtils](api/scala/index.html#org.apache.spark.streaming.kafka.KafkaUtils$),
     [FlumeUtils](api/scala/index.html#org.apache.spark.streaming.flume.FlumeUtils$),
+    [KinesisUtils](api/scala/index.html#org.apache.spark.streaming.kinesis.KinesisUtils$),
     [TwitterUtils](api/scala/index.html#org.apache.spark.streaming.twitter.TwitterUtils$),
     [ZeroMQUtils](api/scala/index.html#org.apache.spark.streaming.zeromq.ZeroMQUtils$), and
     [MQTTUtils](api/scala/index.html#org.apache.spark.streaming.mqtt.MQTTUtils$)
@@ -1371,10 +1854,15 @@ package and renamed for better clarity.
     [PairJavaDStream](api/java/index.html?org/apache/spark/streaming/api/java/PairJavaDStream.html)
     * [KafkaUtils](api/java/index.html?org/apache/spark/streaming/kafka/KafkaUtils.html),
     [FlumeUtils](api/java/index.html?org/apache/spark/streaming/flume/FlumeUtils.html),
+    [KinesisUtils](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisUtils.html)
     [TwitterUtils](api/java/index.html?org/apache/spark/streaming/twitter/TwitterUtils.html),
     [ZeroMQUtils](api/java/index.html?org/apache/spark/streaming/zeromq/ZeroMQUtils.html), and
     [MQTTUtils](api/java/index.html?org/apache/spark/streaming/mqtt/MQTTUtils.html)
+  - Python docs
+    * [StreamingContext](api/python/pyspark.streaming.html#pyspark.streaming.StreamingContext)
+    * [DStream](api/python/pyspark.streaming.html#pyspark.streaming.DStream)
 
 * More examples in [Scala]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming)
   and [Java]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples/streaming)
+  and [Python] ({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python/streaming)
 * [Paper](http://www.eecs.berkeley.edu/Pubs/TechRpts/2012/EECS-2012-259.pdf) and [video](http://youtu.be/g171ndOHgJ0) describing Spark Streaming.
diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index e05883072bfa8..45b70b1a5457a 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -33,6 +33,7 @@ dependencies, and can support different cluster managers and deploy modes that S
   --class <main-class>
   --master <master-url> \
   --deploy-mode <deploy-mode> \
+  --conf <key>=<value> \
   ... # other options
   <application-jar> \
   [application-arguments]
@@ -43,6 +44,7 @@ Some of the commonly used options are:
 * `--class`: The entry point for your application (e.g. `org.apache.spark.examples.SparkPi`)
 * `--master`: The [master URL](#master-urls) for the cluster (e.g. `spark://23.195.26.187:7077`)
 * `--deploy-mode`: Whether to deploy your driver on the worker nodes (`cluster`) or locally as an external client (`client`) (default: `client`)*
+* `--conf`: Arbitrary Spark configuration property in key=value format. For values that contain spaces wrap "key=value" in quotes (as shown).
 * `application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes.
 * `application-arguments`: Arguments passed to the main method of your main class, if any
 
diff --git a/docs/tuning.md b/docs/tuning.md
index 4917c11bc1147..9b5c9adac6a4f 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -32,7 +32,7 @@ in your operations) and performance. It provides two serialization libraries:
   [`java.io.Externalizable`](http://docs.oracle.com/javase/6/docs/api/java/io/Externalizable.html).
   Java serialization is flexible but often quite slow, and leads to large
   serialized formats for many classes.
-* [Kryo serialization](http://code.google.com/p/kryo/): Spark can also use
+* [Kryo serialization](https://github.com/EsotericSoftware/kryo): Spark can also use
   the Kryo library (version 2) to serialize objects more quickly. Kryo is significantly
   faster and more compact than Java serialization (often as much as 10x), but does not support all
   `Serializable` types and requires you to *register* the classes you'll use in the program in advance
@@ -47,28 +47,15 @@ registration requirement, but we recommend trying it in any network-intensive ap
 Spark automatically includes Kryo serializers for the many commonly-used core Scala classes covered
 in the AllScalaRegistrar from the [Twitter chill](https://github.com/twitter/chill) library.
 
-To register your own custom classes with Kryo, create a public class that extends
-[`org.apache.spark.serializer.KryoRegistrator`](api/scala/index.html#org.apache.spark.serializer.KryoRegistrator) and set the
-`spark.kryo.registrator` config property to point to it, as follows:
+To register your own custom classes with Kryo, use the `registerKryoClasses` method.
 
 {% highlight scala %}
-import com.esotericsoftware.kryo.Kryo
-import org.apache.spark.serializer.KryoRegistrator
-
-class MyRegistrator extends KryoRegistrator {
-  override def registerClasses(kryo: Kryo) {
-    kryo.register(classOf[MyClass1])
-    kryo.register(classOf[MyClass2])
-  }
-}
-
 val conf = new SparkConf().setMaster(...).setAppName(...)
-conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-conf.set("spark.kryo.registrator", "mypackage.MyRegistrator")
+conf.registerKryoClasses(Seq(classOf[MyClass1], classOf[MyClass2]))
 val sc = new SparkContext(conf)
 {% endhighlight %}
 
-The [Kryo documentation](http://code.google.com/p/kryo/) describes more advanced
+The [Kryo documentation](https://github.com/EsotericSoftware/kryo) describes more advanced
 registration options, such as adding custom serialization code.
 
 If your objects are large, you may also need to increase the `spark.kryoserializer.buffer.mb`
diff --git a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
index 3570891be804e..740c267fd9866 100644
--- a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
+++ b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
@@ -30,3 +30,5 @@ export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}"
 export SWAP_MB="{{swap}}"
 export SPARK_WORKER_INSTANCES="{{spark_worker_instances}}"
 export SPARK_MASTER_OPTS="{{spark_master_opts}}"
+export AWS_ACCESS_KEY_ID="{{aws_access_key_id}}"
+export AWS_SECRET_ACCESS_KEY="{{aws_secret_access_key}}"
\ No newline at end of file
diff --git a/ec2/spark-ec2 b/ec2/spark-ec2
index 31f9771223e51..4aa908242eeaa 100755
--- a/ec2/spark-ec2
+++ b/ec2/spark-ec2
@@ -18,5 +18,9 @@
 # limitations under the License.
 #
 
-cd "`dirname $0`"
-PYTHONPATH="./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" python ./spark_ec2.py "$@"
+# Preserve the user's CWD so that relative paths are passed correctly to 
+#+ the underlying Python script.
+SPARK_EC2_DIR="$(dirname $0)"
+
+PYTHONPATH="${SPARK_EC2_DIR}/third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" \
+    python "${SPARK_EC2_DIR}/spark_ec2.py" "$@"
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index f5c2bfb697c81..a5396c2375915 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -26,19 +26,25 @@
 import pipes
 import random
 import shutil
+import string
 import subprocess
 import sys
 import tempfile
 import time
 import urllib2
+import warnings
 from optparse import OptionParser
 from sys import stderr
 import boto
-from boto.ec2.blockdevicemapping import BlockDeviceMapping, EBSBlockDeviceType
+from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType, EBSBlockDeviceType
 from boto import ec2
 
+DEFAULT_SPARK_VERSION = "1.1.0"
+SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__))
+
+MESOS_SPARK_EC2_BRANCH = "v4"
 # A URL prefix from which to fetch AMI information
-AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/v2/ami-list"
+AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/{b}/ami-list".format(b=MESOS_SPARK_EC2_BRANCH)
 
 
 class UsageError(Exception):
@@ -49,17 +55,17 @@ class UsageError(Exception):
 def parse_args():
     parser = OptionParser(
         usage="spark-ec2 [options] <action> <cluster_name>"
-        + "\n\n<action> can be: launch, destroy, login, stop, start, get-master",
+        + "\n\n<action> can be: launch, destroy, login, stop, start, get-master, reboot-slaves",
         add_help_option=False)
     parser.add_option(
         "-h", "--help", action="help",
         help="Show this help message and exit")
     parser.add_option(
         "-s", "--slaves", type="int", default=1,
-        help="Number of slaves to launch (default: 1)")
+        help="Number of slaves to launch (default: %default)")
     parser.add_option(
-        "-w", "--wait", type="int", default=120,
-        help="Seconds to wait for nodes to start (default: 120)")
+        "-w", "--wait", type="int",
+        help="DEPRECATED (no longer necessary) - Seconds to wait for nodes to start")
     parser.add_option(
         "-k", "--key-pair",
         help="Key pair to use on instances")
@@ -68,7 +74,7 @@ def parse_args():
         help="SSH private key file to use for logging into instances")
     parser.add_option(
         "-t", "--instance-type", default="m1.large",
-        help="Type of instance to launch (default: m1.large). " +
+        help="Type of instance to launch (default: %default). " +
              "WARNING: must be 64-bit; small instances won't work")
     parser.add_option(
         "-m", "--master-instance-type", default="",
@@ -83,15 +89,15 @@ def parse_args():
              "between zones applies)")
     parser.add_option("-a", "--ami", help="Amazon Machine Image ID to use")
     parser.add_option(
-        "-v", "--spark-version", default="1.0.0",
-        help="Version of Spark to use: 'X.Y.Z' or a specific git hash")
+        "-v", "--spark-version", default=DEFAULT_SPARK_VERSION,
+        help="Version of Spark to use: 'X.Y.Z' or a specific git hash (default: %default)")
     parser.add_option(
         "--spark-git-repo",
         default="https://github.com/apache/spark",
         help="Github repo from which to checkout supplied commit hash")
     parser.add_option(
         "--hadoop-major-version", default="1",
-        help="Major version of Hadoop (default: 1)")
+        help="Major version of Hadoop (default: %default)")
     parser.add_option(
         "-D", metavar="[ADDRESS:]PORT", dest="proxy_port",
         help="Use SSH dynamic port forwarding to create a SOCKS proxy at " +
@@ -102,39 +108,62 @@ def parse_args():
              "(for debugging)")
     parser.add_option(
         "--ebs-vol-size", metavar="SIZE", type="int", default=0,
-        help="Attach a new EBS volume of size SIZE (in GB) to each node as " +
-             "/vol. The volumes will be deleted when the instances terminate. " +
-             "Only possible on EBS-backed AMIs.")
+        help="Size (in GB) of each EBS volume.")
+    parser.add_option(
+        "--ebs-vol-type", default="standard",
+        help="EBS volume type (e.g. 'gp2', 'standard').")
+    parser.add_option(
+        "--ebs-vol-num", type="int", default=1,
+        help="Number of EBS volumes to attach to each node as /vol[x]. " +
+             "The volumes will be deleted when the instances terminate. " +
+             "Only possible on EBS-backed AMIs. " +
+             "EBS volumes are only attached if --ebs-vol-size > 0." +
+             "Only support up to 8 EBS volumes.")
     parser.add_option(
         "--swap", metavar="SWAP", type="int", default=1024,
-        help="Swap space to set up per node, in MB (default: 1024)")
+        help="Swap space to set up per node, in MB (default: %default)")
     parser.add_option(
         "--spot-price", metavar="PRICE", type="float",
         help="If specified, launch slaves as spot instances with the given " +
              "maximum price (in dollars)")
     parser.add_option(
         "--ganglia", action="store_true", default=True,
-        help="Setup Ganglia monitoring on cluster (default: on). NOTE: " +
+        help="Setup Ganglia monitoring on cluster (default: %default). NOTE: " +
              "the Ganglia page will be publicly accessible")
     parser.add_option(
         "--no-ganglia", action="store_false", dest="ganglia",
         help="Disable Ganglia monitoring for the cluster")
     parser.add_option(
         "-u", "--user", default="root",
-        help="The SSH user you want to connect as (default: root)")
+        help="The SSH user you want to connect as (default: %default)")
     parser.add_option(
         "--delete-groups", action="store_true", default=False,
-        help="When destroying a cluster, delete the security groups that were created")
+        help="When destroying a cluster, delete the security groups that were created.")
     parser.add_option(
         "--use-existing-master", action="store_true", default=False,
         help="Launch fresh slaves, but use an existing stopped master if possible")
     parser.add_option(
         "--worker-instances", type="int", default=1,
-        help="Number of instances per worker: variable SPARK_WORKER_INSTANCES (default: 1)")
+        help="Number of instances per worker: variable SPARK_WORKER_INSTANCES (default: %default)")
     parser.add_option(
         "--master-opts", type="string", default="",
         help="Extra options to give to master through SPARK_MASTER_OPTS variable " +
              "(e.g -Dspark.worker.timeout=180)")
+    parser.add_option(
+        "--user-data", type="string", default="",
+        help="Path to a user-data file (most AMI's interpret this as an initialization script)")
+    parser.add_option(
+        "--security-group-prefix", type="string", default=None,
+        help="Use this prefix for the security group rather than the cluster name.")
+    parser.add_option(
+        "--authorized-address", type="string", default="0.0.0.0/0",
+        help="Address to authorize on created security groups (default: %default)")
+    parser.add_option(
+        "--additional-security-group", type="string", default="",
+        help="Additional security group to place the machines in")
+    parser.add_option(
+        "--copy-aws-credentials", action="store_true", default=False,
+        help="Add AWS credentials to hadoop configuration to allow Spark to access S3")
 
     (opts, args) = parser.parse_args()
     if len(args) != 2:
@@ -169,18 +198,6 @@ def get_or_make_group(conn, name):
         return conn.create_security_group(name, "Spark EC2 group")
 
 
-# Wait for a set of launched instances to exit the "pending" state
-# (i.e. either to start running or to fail and be terminated)
-def wait_for_instances(conn, instances):
-    while True:
-        for i in instances:
-            i.update()
-        if len([i for i in instances if i.state == 'pending']) > 0:
-            time.sleep(5)
-        else:
-            return
-
-
 # Check whether a given EC2 instance object is in a state we consider active,
 # i.e. not terminating or terminated. We count both stopping and stopped as
 # active since we can restart stopped clusters.
@@ -191,8 +208,15 @@ def is_active(instance):
 # Return correct versions of Spark and Shark, given the supplied Spark version
 def get_spark_shark_version(opts):
     spark_shark_map = {
-        "0.7.3": "0.7.1", "0.8.0": "0.8.0", "0.8.1": "0.8.1", "0.9.0": "0.9.0", "0.9.1": "0.9.1",
-        "1.0.0": "1.0.0"
+        "0.7.3": "0.7.1",
+        "0.8.0": "0.8.0",
+        "0.8.1": "0.8.1",
+        "0.9.0": "0.9.0",
+        "0.9.1": "0.9.1",
+        "1.0.0": "1.0.0",
+        "1.0.1": "1.0.1",
+        "1.0.2": "1.0.2",
+        "1.1.0": "1.1.0",
     }
     version = opts.spark_version.replace("v", "")
     if version not in spark_shark_map:
@@ -201,46 +225,49 @@ def get_spark_shark_version(opts):
     return (version, spark_shark_map[version])
 
 
-# Attempt to resolve an appropriate AMI given the architecture and
-# region of the request.
-# Information regarding Amazon Linux AMI instance type was update on 2014-6-20:
-# http://aws.amazon.com/amazon-linux-ami/instance-type-matrix/
+# Attempt to resolve an appropriate AMI given the architecture and region of the request.
+# Source: http://aws.amazon.com/amazon-linux-ami/instance-type-matrix/
+# Last Updated: 2014-06-20
+# For easy maintainability, please keep this manually-inputted dictionary sorted by key.
 def get_spark_ami(opts):
     instance_types = {
-        "m1.small":    "pvm",
-        "m1.medium":   "pvm",
-        "m1.large":    "pvm",
-        "m1.xlarge":   "pvm",
-        "t1.micro":    "pvm",
         "c1.medium":   "pvm",
         "c1.xlarge":   "pvm",
-        "m2.xlarge":   "pvm",
-        "m2.2xlarge":  "pvm",
-        "m2.4xlarge":  "pvm",
+        "c3.2xlarge":  "pvm",
+        "c3.4xlarge":  "pvm",
+        "c3.8xlarge":  "pvm",
+        "c3.large":    "pvm",
+        "c3.xlarge":   "pvm",
         "cc1.4xlarge": "hvm",
         "cc2.8xlarge": "hvm",
         "cg1.4xlarge": "hvm",
-        "hs1.8xlarge": "pvm",
-        "hi1.4xlarge": "pvm",
-        "m3.medium":   "pvm",
-        "m3.large":    "pvm",
-        "m3.xlarge":   "pvm",
-        "m3.2xlarge":  "pvm",
         "cr1.8xlarge": "hvm",
-        "i2.xlarge":   "hvm",
+        "hi1.4xlarge": "pvm",
+        "hs1.8xlarge": "pvm",
         "i2.2xlarge":  "hvm",
         "i2.4xlarge":  "hvm",
         "i2.8xlarge":  "hvm",
-        "c3.large":    "pvm",
-        "c3.xlarge":   "pvm",
-        "c3.2xlarge":  "pvm",
-        "c3.4xlarge":  "pvm",
-        "c3.8xlarge":  "pvm",
-        "r3.large":    "hvm",
-        "r3.xlarge":   "hvm",
+        "i2.xlarge":   "hvm",
+        "m1.large":    "pvm",
+        "m1.medium":   "pvm",
+        "m1.small":    "pvm",
+        "m1.xlarge":   "pvm",
+        "m2.2xlarge":  "pvm",
+        "m2.4xlarge":  "pvm",
+        "m2.xlarge":   "pvm",
+        "m3.2xlarge":  "hvm",
+        "m3.large":    "hvm",
+        "m3.medium":   "hvm",
+        "m3.xlarge":   "hvm",
         "r3.2xlarge":  "hvm",
         "r3.4xlarge":  "hvm",
-        "r3.8xlarge":  "hvm"
+        "r3.8xlarge":  "hvm",
+        "r3.large":    "hvm",
+        "r3.xlarge":   "hvm",
+        "t1.micro":    "pvm",
+        "t2.medium":   "hvm",
+        "t2.micro":    "hvm",
+        "t2.small":    "hvm",
     }
     if opts.instance_type in instance_types:
         instance_type = instance_types[opts.instance_type]
@@ -271,43 +298,59 @@ def launch_cluster(conn, opts, cluster_name):
     if opts.key_pair is None:
         print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
         sys.exit(1)
+
+    user_data_content = None
+    if opts.user_data:
+        with open(opts.user_data) as user_data_file:
+            user_data_content = user_data_file.read()
+
     print "Setting up security groups..."
-    master_group = get_or_make_group(conn, cluster_name + "-master")
-    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
+    if opts.security_group_prefix is None:
+        master_group = get_or_make_group(conn, cluster_name + "-master")
+        slave_group = get_or_make_group(conn, cluster_name + "-slaves")
+    else:
+        master_group = get_or_make_group(conn, opts.security_group_prefix + "-master")
+        slave_group = get_or_make_group(conn, opts.security_group_prefix + "-slaves")
+    authorized_address = opts.authorized_address
     if master_group.rules == []:  # Group was just now created
         master_group.authorize(src_group=master_group)
         master_group.authorize(src_group=slave_group)
-        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
-        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
-        master_group.authorize('tcp', 18080, 18080, '0.0.0.0/0')
-        master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
-        master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
-        master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
-        master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
-        master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
+        master_group.authorize('tcp', 22, 22, authorized_address)
+        master_group.authorize('tcp', 8080, 8081, authorized_address)
+        master_group.authorize('tcp', 18080, 18080, authorized_address)
+        master_group.authorize('tcp', 19999, 19999, authorized_address)
+        master_group.authorize('tcp', 50030, 50030, authorized_address)
+        master_group.authorize('tcp', 50070, 50070, authorized_address)
+        master_group.authorize('tcp', 60070, 60070, authorized_address)
+        master_group.authorize('tcp', 4040, 4045, authorized_address)
         if opts.ganglia:
-            master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
+            master_group.authorize('tcp', 5080, 5080, authorized_address)
     if slave_group.rules == []:  # Group was just now created
         slave_group.authorize(src_group=master_group)
         slave_group.authorize(src_group=slave_group)
-        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
-        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
-        slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
-        slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
-        slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
-        slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
-
-    # Check if instances are already running in our groups
+        slave_group.authorize('tcp', 22, 22, authorized_address)
+        slave_group.authorize('tcp', 8080, 8081, authorized_address)
+        slave_group.authorize('tcp', 50060, 50060, authorized_address)
+        slave_group.authorize('tcp', 50075, 50075, authorized_address)
+        slave_group.authorize('tcp', 60060, 60060, authorized_address)
+        slave_group.authorize('tcp', 60075, 60075, authorized_address)
+
+    # Check if instances are already running with the cluster name
     existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                              die_on_error=False)
     if existing_slaves or (existing_masters and not opts.use_existing_master):
-        print >> stderr, ("ERROR: There are already instances running in " +
-                          "group %s or %s" % (master_group.name, slave_group.name))
+        print >> stderr, ("ERROR: There are already instances for name: %s " % cluster_name)
         sys.exit(1)
 
     # Figure out Spark AMI
     if opts.ami is None:
         opts.ami = get_spark_ami(opts)
+
+    additional_groups = []
+    if opts.additional_security_group:
+        additional_groups = [sg
+                             for sg in conn.get_all_security_groups()
+                             if opts.additional_security_group in (sg.name, sg.id)]
     print "Launching instances..."
 
     try:
@@ -316,13 +359,25 @@ def launch_cluster(conn, opts, cluster_name):
         print >> stderr, "Could not find AMI " + opts.ami
         sys.exit(1)
 
-    # Create block device mapping so that we can add an EBS volume if asked to
+    # Create block device mapping so that we can add EBS volumes if asked to.
+    # The first drive is attached as /dev/sds, 2nd as /dev/sdt, ... /dev/sdz
     block_map = BlockDeviceMapping()
     if opts.ebs_vol_size > 0:
-        device = EBSBlockDeviceType()
-        device.size = opts.ebs_vol_size
-        device.delete_on_termination = True
-        block_map["/dev/sdv"] = device
+        for i in range(opts.ebs_vol_num):
+            device = EBSBlockDeviceType()
+            device.size = opts.ebs_vol_size
+            device.volume_type = opts.ebs_vol_type
+            device.delete_on_termination = True
+            block_map["/dev/sd" + chr(ord('s') + i)] = device
+
+    # AWS ignores the AMI-specified block device mapping for M3 (see SPARK-3342).
+    if opts.instance_type.startswith('m3.'):
+        for i in range(get_num_disks(opts.instance_type)):
+            dev = BlockDeviceType()
+            dev.ephemeral_name = 'ephemeral%d' % i
+            # The first ephemeral drive is /dev/sdb.
+            name = '/dev/sd' + string.letters[i + 1]
+            block_map[name] = dev
 
     # Launch slaves
     if opts.spot_price is not None:
@@ -342,9 +397,10 @@ def launch_cluster(conn, opts, cluster_name):
                 placement=zone,
                 count=num_slaves_this_zone,
                 key_name=opts.key_pair,
-                security_groups=[slave_group],
+                security_groups=[slave_group] + additional_groups,
                 instance_type=opts.instance_type,
-                block_device_map=block_map)
+                block_device_map=block_map,
+                user_data=user_data_content)
             my_req_ids += [req.id for req in slave_reqs]
             i += 1
 
@@ -357,9 +413,13 @@ def launch_cluster(conn, opts, cluster_name):
                 for r in reqs:
                     id_to_req[r.id] = r
                 active_instance_ids = []
+                outstanding_request_ids = []
                 for i in my_req_ids:
-                    if i in id_to_req and id_to_req[i].state == "active":
-                        active_instance_ids.append(id_to_req[i].instance_id)
+                    if i in id_to_req:
+                        if id_to_req[i].state == "active":
+                            active_instance_ids.append(id_to_req[i].instance_id)
+                        else:
+                            outstanding_request_ids.append(i)
                 if len(active_instance_ids) == opts.slaves:
                     print "All %d slaves granted" % opts.slaves
                     reservations = conn.get_all_instances(active_instance_ids)
@@ -368,8 +428,8 @@ def launch_cluster(conn, opts, cluster_name):
                         slave_nodes += r.instances
                     break
                 else:
-                    print "%d of %d slaves granted, waiting longer" % (
-                        len(active_instance_ids), opts.slaves)
+                    print "%d of %d slaves granted, waiting longer for request ids including %s" % (
+                        len(active_instance_ids), opts.slaves, outstanding_request_ids[0:10])
         except:
             print "Canceling spot instance requests"
             conn.cancel_spot_instance_requests(my_req_ids)
@@ -390,12 +450,13 @@ def launch_cluster(conn, opts, cluster_name):
             num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
             if num_slaves_this_zone > 0:
                 slave_res = image.run(key_name=opts.key_pair,
-                                      security_groups=[slave_group],
+                                      security_groups=[slave_group] + additional_groups,
                                       instance_type=opts.instance_type,
                                       placement=zone,
                                       min_count=num_slaves_this_zone,
                                       max_count=num_slaves_this_zone,
-                                      block_device_map=block_map)
+                                      block_device_map=block_map,
+                                      user_data=user_data_content)
                 slave_nodes += slave_res.instances
                 print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                                 zone, slave_res.id)
@@ -415,58 +476,88 @@ def launch_cluster(conn, opts, cluster_name):
         if opts.zone == 'all':
             opts.zone = random.choice(conn.get_all_zones()).name
         master_res = image.run(key_name=opts.key_pair,
-                               security_groups=[master_group],
+                               security_groups=[master_group] + additional_groups,
                                instance_type=master_type,
                                placement=opts.zone,
                                min_count=1,
                                max_count=1,
-                               block_device_map=block_map)
+                               block_device_map=block_map,
+                               user_data=user_data_content)
         master_nodes = master_res.instances
         print "Launched master in %s, regid = %s" % (zone, master_res.id)
 
     # Give the instances descriptive names
     for master in master_nodes:
-        master.add_tag(
-            key='Name',
-            value='spark-{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id))
+        name = '{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)
+        tag_instance(master, name)
+
     for slave in slave_nodes:
-        slave.add_tag(
-            key='Name',
-            value='spark-{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id))
+        name = '{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)
+        tag_instance(slave, name)
 
     # Return all the instances
     return (master_nodes, slave_nodes)
 
 
+def tag_instance(instance, name):
+    for i in range(0, 5):
+        try:
+            instance.add_tag(key='Name', value=name)
+            break
+        except:
+            print "Failed attempt %i of 5 to tag %s" % ((i + 1), name)
+            if i == 5:
+                raise "Error - failed max attempts to add name tag"
+            time.sleep(5)
+
 # Get the EC2 instances in an existing cluster if available.
 # Returns a tuple of lists of EC2 instance objects for the masters and slaves
+
+
 def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
     print "Searching for existing cluster " + cluster_name + "..."
+    # Search all the spot instance requests, and copy any tags from the spot
+    # instance request to the cluster.
+    spot_instance_requests = conn.get_all_spot_instance_requests()
+    for req in spot_instance_requests:
+        if req.state != u'active':
+            continue
+        name = req.tags.get(u'Name', "")
+        if name.startswith(cluster_name):
+            reservations = conn.get_all_instances(instance_ids=[req.instance_id])
+            for res in reservations:
+                active = [i for i in res.instances if is_active(i)]
+                for instance in active:
+                    if instance.tags.get(u'Name') is None:
+                        tag_instance(instance, name)
+    # Now proceed to detect master and slaves instances.
     reservations = conn.get_all_instances()
     master_nodes = []
     slave_nodes = []
     for res in reservations:
         active = [i for i in res.instances if is_active(i)]
         for inst in active:
-            group_names = [g.name for g in inst.groups]
-            if group_names == [cluster_name + "-master"]:
+            name = inst.tags.get(u'Name', "")
+            if name.startswith(cluster_name + "-master"):
                 master_nodes.append(inst)
-            elif group_names == [cluster_name + "-slaves"]:
+            elif name.startswith(cluster_name + "-slave"):
                 slave_nodes.append(inst)
     if any((master_nodes, slave_nodes)):
-        print ("Found %d master(s), %d slaves" % (len(master_nodes), len(slave_nodes)))
+        print "Found %d master(s), %d slaves" % (len(master_nodes), len(slave_nodes))
     if master_nodes != [] or not die_on_error:
         return (master_nodes, slave_nodes)
     else:
         if master_nodes == [] and slave_nodes != []:
-            print >> sys.stderr, "ERROR: Could not find master in group " + cluster_name + "-master"
+            print >> sys.stderr, "ERROR: Could not find master in with name " + \
+                cluster_name + "-master"
         else:
             print >> sys.stderr, "ERROR: Could not find any existing cluster"
         sys.exit(1)
 
-
 # Deploy configuration files and run setup scripts on a newly launched
 # or started EC2 cluster.
+
+
 def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
     master = master_nodes[0].public_dns_name
     if deploy_ssh_key:
@@ -494,10 +585,23 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
 
     # NOTE: We should clone the repository before running deploy_files to
     # prevent ec2-variables.sh from being overwritten
-    ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v3")
+    ssh(
+        host=master,
+        opts=opts,
+        command="rm -rf spark-ec2"
+        + " && "
+        + "git clone https://github.com/mesos/spark-ec2.git -b {b}".format(b=MESOS_SPARK_EC2_BRANCH)
+    )
 
     print "Deploying files to master..."
-    deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, modules)
+    deploy_files(
+        conn=conn,
+        root_dir=SPARK_EC2_DIR + "/" + "deploy.generic",
+        opts=opts,
+        master_nodes=master_nodes,
+        slave_nodes=slave_nodes,
+        modules=modules
+    )
 
     print "Running setup on master..."
     setup_spark_cluster(master, opts)
@@ -519,57 +623,107 @@ def setup_spark_cluster(master, opts):
         print "Ganglia started at http://%s:5080/ganglia" % master
 
 
-# Wait for a whole cluster (masters, slaves and ZooKeeper) to start up
-def wait_for_cluster(conn, wait_secs, master_nodes, slave_nodes):
-    print "Waiting for instances to start up..."
-    time.sleep(5)
-    wait_for_instances(conn, master_nodes)
-    wait_for_instances(conn, slave_nodes)
-    print "Waiting %d more seconds..." % wait_secs
-    time.sleep(wait_secs)
+def is_ssh_available(host, opts):
+    "Checks if SSH is available on the host."
+    try:
+        with open(os.devnull, 'w') as devnull:
+            ret = subprocess.check_call(
+                ssh_command(opts) + ['-t', '-t', '-o', 'ConnectTimeout=3',
+                                     '%s@%s' % (opts.user, host), stringify_command('true')],
+                stdout=devnull,
+                stderr=devnull
+            )
+        return ret == 0
+    except subprocess.CalledProcessError as e:
+        return False
+
+
+def is_cluster_ssh_available(cluster_instances, opts):
+    for i in cluster_instances:
+        if not is_ssh_available(host=i.ip_address, opts=opts):
+            return False
+    else:
+        return True
+
+
+def wait_for_cluster_state(cluster_instances, cluster_state, opts):
+    """
+    cluster_instances: a list of boto.ec2.instance.Instance
+    cluster_state: a string representing the desired state of all the instances in the cluster
+           value can be 'ssh-ready' or a valid value from boto.ec2.instance.InstanceState such as
+           'running', 'terminated', etc.
+           (would be nice to replace this with a proper enum: http://stackoverflow.com/a/1695250)
+    """
+    sys.stdout.write(
+        "Waiting for all instances in cluster to enter '{s}' state.".format(s=cluster_state)
+    )
+    sys.stdout.flush()
+
+    num_attempts = 0
+
+    while True:
+        time.sleep(3 * num_attempts)
+
+        for i in cluster_instances:
+            s = i.update()  # capture output to suppress print to screen in newer versions of boto
+
+        if cluster_state == 'ssh-ready':
+            if all(i.state == 'running' for i in cluster_instances) and \
+               is_cluster_ssh_available(cluster_instances, opts):
+                break
+        else:
+            if all(i.state == cluster_state for i in cluster_instances):
+                break
+
+        num_attempts += 1
+
+        sys.stdout.write(".")
+        sys.stdout.flush()
+
+    sys.stdout.write("\n")
 
 
 # Get number of local disks available for a given EC2 instance type.
 def get_num_disks(instance_type):
-    # From http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html
-    # Updated 2014-6-20
+    # Source: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html
+    # Last Updated: 2014-06-20
+    # For easy maintainability, please keep this manually-inputted dictionary sorted by key.
     disks_by_instance = {
-        "m1.small":    1,
-        "m1.medium":   1,
-        "m1.large":    2,
-        "m1.xlarge":   4,
-        "t1.micro":    1,
         "c1.medium":   1,
         "c1.xlarge":   4,
-        "m2.xlarge":   1,
-        "m2.2xlarge":  1,
-        "m2.4xlarge":  2,
+        "c3.2xlarge":  2,
+        "c3.4xlarge":  2,
+        "c3.8xlarge":  2,
+        "c3.large":    2,
+        "c3.xlarge":   2,
         "cc1.4xlarge": 2,
         "cc2.8xlarge": 4,
         "cg1.4xlarge": 2,
-        "hs1.8xlarge": 24,
         "cr1.8xlarge": 2,
+        "g2.2xlarge":  1,
         "hi1.4xlarge": 2,
-        "m3.medium":   1,
-        "m3.large":    1,
-        "m3.xlarge":   2,
-        "m3.2xlarge":  2,
-        "i2.xlarge":   1,
+        "hs1.8xlarge": 24,
         "i2.2xlarge":  2,
         "i2.4xlarge":  4,
         "i2.8xlarge":  8,
-        "c3.large":    2,
-        "c3.xlarge":   2,
-        "c3.2xlarge":  2,
-        "c3.4xlarge":  2,
-        "c3.8xlarge":  2,
-        "r3.large":    1,
-        "r3.xlarge":   1,
+        "i2.xlarge":   1,
+        "m1.large":    2,
+        "m1.medium":   1,
+        "m1.small":    1,
+        "m1.xlarge":   4,
+        "m2.2xlarge":  1,
+        "m2.4xlarge":  2,
+        "m2.xlarge":   1,
+        "m3.2xlarge":  2,
+        "m3.large":    1,
+        "m3.medium":   1,
+        "m3.xlarge":   2,
         "r3.2xlarge":  1,
         "r3.4xlarge":  1,
         "r3.8xlarge":  2,
-        "g2.2xlarge":  1,
-        "t1.micro":    0
+        "r3.large":    1,
+        "r3.xlarge":   1,
+        "t1.micro":    0,
     }
     if instance_type in disks_by_instance:
         return disks_by_instance[instance_type]
@@ -584,6 +738,8 @@ def get_num_disks(instance_type):
 # cluster (e.g. lists of masters and slaves). Files are only deployed to
 # the first master instance in the cluster, and we expect the setup
 # script to be run on that instance to copy them to other nodes.
+#
+# root_dir should be an absolute path to the directory with the files we want to deploy.
 def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
     active_master = master_nodes[0].public_dns_name
 
@@ -625,6 +781,13 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
         "spark_master_opts": opts.master_opts
     }
 
+    if opts.copy_aws_credentials:
+        template_vars["aws_access_key_id"] = conn.aws_access_key_id
+        template_vars["aws_secret_access_key"] = conn.aws_secret_access_key
+    else:
+        template_vars["aws_access_key_id"] = ""
+        template_vars["aws_secret_access_key"] = ""
+
     # Create a temp directory in which we will place all the files to be
     # deployed after we substitue template parameters in them
     tmp_dir = tempfile.mkdtemp()
@@ -685,7 +848,7 @@ def ssh(host, opts, command):
                 ssh_command(opts) + ['-t', '-t', '%s@%s' % (opts.user, host),
                                      stringify_command(command)])
         except subprocess.CalledProcessError as e:
-            if (tries > 5):
+            if tries > 5:
                 # If this was an ssh failure, provide the user with hints.
                 if e.returncode == 255:
                     raise UsageError(
@@ -699,6 +862,7 @@ def ssh(host, opts, command):
             time.sleep(30)
             tries = tries + 1
 
+
 # Backported from Python 2.7 for compatiblity with 2.6 (See SPARK-1990)
 def _check_output(*popenargs, **kwargs):
     if 'stdout' in kwargs:
@@ -719,18 +883,18 @@ def ssh_read(host, opts, command):
         ssh_command(opts) + ['%s@%s' % (opts.user, host), stringify_command(command)])
 
 
-def ssh_write(host, opts, command, input):
+def ssh_write(host, opts, command, arguments):
     tries = 0
     while True:
         proc = subprocess.Popen(
             ssh_command(opts) + ['%s@%s' % (opts.user, host), stringify_command(command)],
             stdin=subprocess.PIPE)
-        proc.stdin.write(input)
+        proc.stdin.write(arguments)
         proc.stdin.close()
         status = proc.wait()
         if status == 0:
             break
-        elif (tries > 5):
+        elif tries > 5:
             raise RuntimeError("ssh_write failed with error %s" % proc.returncode)
         else:
             print >> stderr, \
@@ -758,6 +922,22 @@ def get_partition(total, num_partitions, current_partitions):
 
 def real_main():
     (opts, action, cluster_name) = parse_args()
+
+    # Input parameter validation
+    if opts.wait is not None:
+        # NOTE: DeprecationWarnings are silent in 2.7+ by default.
+        #       To show them, run Python with the -Wdefault switch.
+        # See: https://docs.python.org/3.5/whatsnew/2.7.html
+        warnings.warn(
+            "This option is deprecated and has no effect. "
+            "spark-ec2 automatically waits as long as necessary for clusters to startup.",
+            DeprecationWarning
+        )
+
+    if opts.ebs_vol_num > 8:
+        print >> stderr, "ebs-vol-num cannot be greater than 8"
+        sys.exit(1)
+
     try:
         conn = ec2.connect_to_region(opts.region)
     except Exception as e:
@@ -776,7 +956,11 @@ def real_main():
             (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name)
         else:
             (master_nodes, slave_nodes) = launch_cluster(conn, opts, cluster_name)
-            wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes)
+        wait_for_cluster_state(
+            cluster_instances=(master_nodes + slave_nodes),
+            cluster_state='ssh-ready',
+            opts=opts
+        )
         setup_cluster(conn, master_nodes, slave_nodes, opts, True)
 
     elif action == "destroy":
@@ -800,8 +984,16 @@ def real_main():
             # Delete security groups as well
             if opts.delete_groups:
                 print "Deleting security groups (this will take some time)..."
-                group_names = [cluster_name + "-master", cluster_name + "-slaves"]
-
+                if opts.security_group_prefix is None:
+                    group_names = [cluster_name + "-master", cluster_name + "-slaves"]
+                else:
+                    group_names = [opts.security_group_prefix + "-master",
+                                   opts.security_group_prefix + "-slaves"]
+                wait_for_cluster_state(
+                    cluster_instances=(master_nodes + slave_nodes),
+                    cluster_state='terminated',
+                    opts=opts
+                )
                 attempt = 1
                 while attempt <= 3:
                     print "Attempt %d" % attempt
@@ -850,6 +1042,20 @@ def real_main():
         subprocess.check_call(
             ssh_command(opts) + proxy_opt + ['-t', '-t', "%s@%s" % (opts.user, master)])
 
+    elif action == "reboot-slaves":
+        response = raw_input(
+            "Are you sure you want to reboot the cluster " +
+            cluster_name + " slaves?\n" +
+            "Reboot cluster slaves " + cluster_name + " (y/N): ")
+        if response == "y":
+            (master_nodes, slave_nodes) = get_existing_cluster(
+                conn, opts, cluster_name, die_on_error=False)
+            print "Rebooting slaves..."
+            for inst in slave_nodes:
+                if inst.state not in ["shutting-down", "terminated"]:
+                    print "Rebooting " + inst.id
+                    inst.reboot()
+
     elif action == "get-master":
         (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name)
         print master_nodes[0].public_dns_name
@@ -887,7 +1093,11 @@ def real_main():
         for inst in master_nodes:
             if inst.state not in ["shutting-down", "terminated"]:
                 inst.start()
-        wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes)
+        wait_for_cluster_state(
+            cluster_instances=(master_nodes + slave_nodes),
+            cluster_state='ssh-ready',
+            opts=opts
+        )
         setup_cluster(conn, master_nodes, slave_nodes, opts, False)
 
     else:
diff --git a/examples/pom.xml b/examples/pom.xml
index 4f6d7fdb87d47..2752ce3ca9821 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,17 +21,26 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-examples_2.10</artifactId>
+  <properties>
+    <sbt.project.name>examples</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project Examples</name>
   <url>http://spark.apache.org/</url>
 
   <dependencies>
+    <!-- Promote Guava to compile scope in this module so it's included while shading. -->
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <scope>compile</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -73,11 +82,6 @@
       <artifactId>spark-streaming-twitter_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming-flume_${scala.binary.version}</artifactId>
@@ -85,49 +89,133 @@
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming-zeromq_${scala.binary.version}</artifactId>
+      <artifactId>spark-streaming-mqtt_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming-mqtt_${scala.binary.version}</artifactId>
+      <artifactId>spark-streaming-zeromq_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase</artifactId>
-      <version>${hbase.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>asm</groupId>
-          <artifactId>asm</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.jboss.netty</groupId>
-          <artifactId>netty</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>commons-logging</groupId>
-          <artifactId>commons-logging</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.jruby</groupId>
-          <artifactId>jruby-complete</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
     </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-testing-util</artifactId>
+        <version>${hbase.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.jruby</groupId>
+            <artifactId>jruby-complete</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-protocol</artifactId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-common</artifactId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-client</artifactId>
+        <version>${hbase.version}</version>
+        <exclusions>
+         <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty</artifactId>
+         </exclusion>
+       </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-server</artifactId>
+        <version>${hbase.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-core</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-mapreduce-client-core</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-auth</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-annotations</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-hdfs</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase-hadoop1-compat</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-math</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-core</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-server</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-core</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-json</artifactId>
+          </exclusion>
+          <exclusion>
+            <!-- hbase uses v2.4, which is better, but ...-->
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-hadoop-compat</artifactId>
+        <version>${hbase.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-hadoop-compat</artifactId>
+        <version>${hbase.version}</version>
+        <type>test-jar</type>
+        <scope>test</scope>
+      </dependency>
     <dependency>
-      <groupId>com.twitter</groupId>
-      <artifactId>algebird-core_${scala.binary.version}</artifactId>
-      <version>0.1.11</version>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-math3</artifactId>
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
@@ -181,6 +269,20 @@
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-deploy-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-install-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-shade-plugin</artifactId>
@@ -193,6 +295,12 @@
             </includes>
           </artifactSet>
           <filters>
+            <filter>
+              <artifact>com.google.guava:guava</artifact>
+              <excludes>
+                <exclude>com/google/common/base/Optional*</exclude>
+              </excludes>
+            </filter>
             <filter>
               <artifact>*:*</artifact>
               <excludes>
@@ -210,6 +318,22 @@
               <goal>shade</goal>
             </goals>
             <configuration>
+              <relocations>
+                <relocation>
+                  <pattern>com.google</pattern>
+                  <shadedPattern>org.spark-project.guava</shadedPattern>
+                  <includes>
+                    <include>com.google.common.**</include>
+                  </includes>
+                  <excludes>
+                    <exclude>com.google.common.base.Optional**</exclude>
+                  </excludes>
+                </relocation>
+                <relocation>
+                  <pattern>org.apache.commons.math3</pattern>
+                  <shadedPattern>org.spark-project.commons.math3</shadedPattern>
+                </relocation>
+              </relocations>
               <transformers>
                 <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
@@ -225,4 +349,88 @@
       </plugin>
     </plugins>
   </build>
+  <profiles>
+    <profile>
+      <id>kinesis-asl</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.httpcomponents</groupId>
+          <artifactId>httpclient</artifactId>
+          <version>${commons.httpclient.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
+    <profile>
+      <id>hbase-hadoop2</id>
+      <activation>
+        <property>
+          <name>hbase.profile</name>
+          <value>hadoop2</value>
+        </property>
+      </activation>
+      <properties>
+        <hbase.version>0.98.7-hadoop2</hbase.version>
+      </properties>
+    </profile>
+    <profile>
+      <id>hbase-hadoop1</id>
+      <activation>
+        <property>
+          <name>!hbase.profile</name>
+        </property>
+      </activation>
+      <properties>
+        <hbase.version>0.98.7-hadoop1</hbase.version>
+      </properties>
+    </profile>
+    <profile>
+      <!-- We add a source directory specific to Scala 2.10 since Kafka and Algebird
+           only work with it -->
+      <id>scala-2.10</id>
+      <activation>
+        <property><name>!scala-2.11</name></property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+        <dependency>
+          <groupId>com.twitter</groupId>
+          <artifactId>algebird-core_${scala.binary.version}</artifactId>
+          <version>0.1.11</version>
+        </dependency>
+      </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-scala-sources</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>add-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>src/main/scala</source>
+                    <source>scala-2.10/src/main/scala</source>
+                    <source>scala-2.10/src/main/java</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
 </project>
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java b/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java
similarity index 100%
rename from examples/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java
rename to examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala b/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
similarity index 97%
rename from examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
rename to examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
index 566ba6f911e02..c9e1511278ede 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
+++ b/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
@@ -53,8 +53,8 @@ object KafkaWordCount {
     val ssc =  new StreamingContext(sparkConf, Seconds(2))
     ssc.checkpoint("checkpoint")
 
-    val topicpMap = topics.split(",").map((_,numThreads.toInt)).toMap
-    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicpMap).map(_._2)
+    val topicMap = topics.split(",").map((_,numThreads.toInt)).toMap
+    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
     val words = lines.flatMap(_.split(" "))
     val wordCounts = words.map(x => (x, 1L))
       .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdCMS.scala b/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdCMS.scala
similarity index 100%
rename from examples/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdCMS.scala
rename to examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdCMS.scala
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdHLL.scala b/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdHLL.scala
similarity index 100%
rename from examples/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdHLL.scala
rename to examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdHLL.scala
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
index 6c177de359b60..31a79ddd3fff1 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
@@ -30,12 +30,25 @@
 
 /**
  * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
  */
 public final class JavaHdfsLR {
 
   private static final int D = 10;   // Number of dimensions
   private static final Random rand = new Random(42);
 
+  static void showWarning() {
+    String warning = "WARN: This is a naive implementation of Logistic Regression " +
+            "and is given as an example!\n" +
+            "Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD " +
+            "or org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS " +
+            "for more conventional use.";
+    System.err.println(warning);
+  }
+
   static class DataPoint implements Serializable {
     DataPoint(double[] x, double y) {
       this.x = x;
@@ -109,6 +122,8 @@ public static void main(String[] args) {
       System.exit(1);
     }
 
+    showWarning();
+
     SparkConf sparkConf = new SparkConf().setAppName("JavaHdfsLR");
     JavaSparkContext sc = new JavaSparkContext(sparkConf);
     JavaRDD<String> lines = sc.textFile(args[0]);
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
index 7ea6df9c17245..a5db8accdf138 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
@@ -45,10 +45,21 @@
  * URL         neighbor URL
  * ...
  * where URL and their neighbors are separated by space(s).
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.graphx.lib.PageRank
  */
 public final class JavaPageRank {
   private static final Pattern SPACES = Pattern.compile("\\s+");
 
+  static void showWarning() {
+    String warning = "WARN: This is a naive implementation of PageRank " +
+            "and is given as an example! \n" +
+            "Please use the PageRank implementation found in " +
+            "org.apache.spark.graphx.lib.PageRank for more conventional use.";
+    System.err.println(warning);
+  }
+
   private static class Sum implements Function2<Double, Double, Double> {
     @Override
     public Double call(Double a, Double b) {
@@ -62,6 +73,8 @@ public static void main(String[] args) throws Exception {
       System.exit(1);
     }
 
+    showWarning();
+
     SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank");
     JavaSparkContext ctx = new JavaSparkContext(sparkConf);
 
@@ -96,7 +109,7 @@ public Double call(Iterable<String> rs) {
         .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {
           @Override
           public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
-	    int urlCount = Iterables.size(s._1);
+            int urlCount = Iterables.size(s._1);
             List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
             for (String n : s._1) {
               results.add(new Tuple2<String, Double>(n, s._2() / urlCount));
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
index 11157d7573fae..0f07cb4098325 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
@@ -31,7 +31,6 @@
  * Usage: JavaSparkPi [slices]
  */
 public final class JavaSparkPi {
-  
 
   public static void main(String[] args) throws Exception {
     SparkConf sparkConf = new SparkConf().setAppName("JavaSparkPi");
@@ -61,5 +60,7 @@ public Integer call(Integer integer, Integer integer2) {
     });
 
     System.out.println("Pi is roughly " + 4.0 * count / n);
+
+    jsc.stop();
   }
 }
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaStatusTrackerDemo.java b/examples/src/main/java/org/apache/spark/examples/JavaStatusTrackerDemo.java
new file mode 100644
index 0000000000000..e68ec74c3ed54
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/JavaStatusTrackerDemo.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkJobInfo;
+import org.apache.spark.SparkStageInfo;
+import org.apache.spark.api.java.JavaFutureAction;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Example of using Spark's status APIs from Java.
+ */
+public final class JavaStatusTrackerDemo {
+
+  public static final String APP_NAME = "JavaStatusAPIDemo";
+
+  public static final class IdentityWithDelay<T> implements Function<T, T> {
+    @Override
+    public T call(T x) throws Exception {
+      Thread.sleep(2 * 1000);  // 2 seconds
+      return x;
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    SparkConf sparkConf = new SparkConf().setAppName(APP_NAME);
+    final JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+    // Example of implementing a progress reporter for a simple job.
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5).map(
+        new IdentityWithDelay<Integer>());
+    JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync();
+    while (!jobFuture.isDone()) {
+      Thread.sleep(1000);  // 1 second
+      List<Integer> jobIds = jobFuture.jobIds();
+      if (jobIds.isEmpty()) {
+        continue;
+      }
+      int currentJobId = jobIds.get(jobIds.size() - 1);
+      SparkJobInfo jobInfo = sc.statusTracker().getJobInfo(currentJobId);
+      SparkStageInfo stageInfo = sc.statusTracker().getStageInfo(jobInfo.stageIds()[0]);
+      System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() +
+          " active, " + stageInfo.numCompletedTasks() + " complete");
+    }
+
+    System.out.println("Job results are: " + jobFuture.get());
+    sc.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java
new file mode 100644
index 0000000000000..22ba68d8c354c
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.List;
+
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.feature.HashingTF;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.sql.api.java.JavaSQLContext;
+import org.apache.spark.sql.api.java.JavaSchemaRDD;
+import org.apache.spark.sql.api.java.Row;
+import org.apache.spark.SparkConf;
+
+/**
+ * A simple text classification pipeline that recognizes "spark" from input text. It uses the Java
+ * bean classes {@link LabeledDocument} and {@link Document} defined in the Scala counterpart of
+ * this example {@link SimpleTextClassificationPipeline}. Run with
+ * <pre>
+ * bin/run-example ml.JavaSimpleTextClassificationPipeline
+ * </pre>
+ */
+public class JavaSimpleTextClassificationPipeline {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaSimpleTextClassificationPipeline");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    JavaSQLContext jsql = new JavaSQLContext(jsc);
+
+    // Prepare training documents, which are labeled.
+    List<LabeledDocument> localTraining = Lists.newArrayList(
+      new LabeledDocument(0L, "a b c d e spark", 1.0),
+      new LabeledDocument(1L, "b d", 0.0),
+      new LabeledDocument(2L, "spark f g h", 1.0),
+      new LabeledDocument(3L, "hadoop mapreduce", 0.0));
+    JavaSchemaRDD training =
+      jsql.applySchema(jsc.parallelize(localTraining), LabeledDocument.class);
+
+    // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+    Tokenizer tokenizer = new Tokenizer()
+      .setInputCol("text")
+      .setOutputCol("words");
+    HashingTF hashingTF = new HashingTF()
+      .setNumFeatures(1000)
+      .setInputCol(tokenizer.getOutputCol())
+      .setOutputCol("features");
+    LogisticRegression lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.01);
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[] {tokenizer, hashingTF, lr});
+
+    // Fit the pipeline to training documents.
+    PipelineModel model = pipeline.fit(training);
+
+    // Prepare test documents, which are unlabeled.
+    List<Document> localTest = Lists.newArrayList(
+      new Document(4L, "spark i j k"),
+      new Document(5L, "l m n"),
+      new Document(6L, "mapreduce spark"),
+      new Document(7L, "apache hadoop"));
+    JavaSchemaRDD test =
+      jsql.applySchema(jsc.parallelize(localTest), Document.class);
+
+    // Make predictions on test documents.
+    model.transform(test).registerAsTable("prediction");
+    JavaSchemaRDD predictions = jsql.sql("SELECT id, text, score, prediction FROM prediction");
+    for (Row r: predictions.collect()) {
+      System.out.println(r);
+    }
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaALS.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaALS.java
index 8d381d4e0a943..95a430f1da234 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaALS.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaALS.java
@@ -32,7 +32,7 @@
 import scala.Tuple2;
 
 /**
- * Example using MLLib ALS from Java.
+ * Example using MLlib ALS from Java.
  */
 public final class JavaALS {
 
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaDecisionTree.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaDecisionTree.java
new file mode 100644
index 0000000000000..1f82e3f4cb18e
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaDecisionTree.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import java.util.HashMap;
+
+import scala.Tuple2;
+
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.DecisionTree;
+import org.apache.spark.mllib.tree.model.DecisionTreeModel;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.SparkConf;
+
+/**
+ * Classification and regression using decision trees.
+ */
+public final class JavaDecisionTree {
+
+  public static void main(String[] args) {
+    String datapath = "data/mllib/sample_libsvm_data.txt";
+    if (args.length == 1) {
+      datapath = args[0];
+    } else if (args.length > 1) {
+      System.err.println("Usage: JavaDecisionTree <libsvm format data file>");
+      System.exit(1);
+    }
+    SparkConf sparkConf = new SparkConf().setAppName("JavaDecisionTree");
+    JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD().cache();
+
+    // Compute the number of classes from the data.
+    Integer numClasses = data.map(new Function<LabeledPoint, Double>() {
+      @Override public Double call(LabeledPoint p) {
+        return p.label();
+      }
+    }).countByValue().size();
+
+    // Set parameters.
+    //  Empty categoricalFeaturesInfo indicates all features are continuous.
+    HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+    String impurity = "gini";
+    Integer maxDepth = 5;
+    Integer maxBins = 32;
+
+    // Train a DecisionTree model for classification.
+    final DecisionTreeModel model = DecisionTree.trainClassifier(data, numClasses,
+      categoricalFeaturesInfo, impurity, maxDepth, maxBins);
+
+    // Evaluate model on training instances and compute training error
+    JavaPairRDD<Double, Double> predictionAndLabel =
+      data.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+        @Override public Tuple2<Double, Double> call(LabeledPoint p) {
+          return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+        }
+      });
+    Double trainErr =
+      1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
+        @Override public Boolean call(Tuple2<Double, Double> pl) {
+          return !pl._1().equals(pl._2());
+        }
+      }).count() / data.count();
+    System.out.println("Training error: " + trainErr);
+    System.out.println("Learned classification tree model:\n" + model);
+
+    // Train a DecisionTree model for regression.
+    impurity = "variance";
+    final DecisionTreeModel regressionModel = DecisionTree.trainRegressor(data,
+        categoricalFeaturesInfo, impurity, maxDepth, maxBins);
+
+    // Evaluate model on training instances and compute training error
+    JavaPairRDD<Double, Double> regressorPredictionAndLabel =
+      data.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+        @Override public Tuple2<Double, Double> call(LabeledPoint p) {
+          return new Tuple2<Double, Double>(regressionModel.predict(p.features()), p.label());
+        }
+      });
+    Double trainMSE =
+      regressorPredictionAndLabel.map(new Function<Tuple2<Double, Double>, Double>() {
+        @Override public Double call(Tuple2<Double, Double> pl) {
+          Double diff = pl._1() - pl._2();
+          return diff * diff;
+        }
+      }).reduce(new Function2<Double, Double, Double>() {
+        @Override public Double call(Double a, Double b) {
+          return a + b;
+        }
+      }) / data.count();
+    System.out.println("Training Mean Squared Error: " + trainMSE);
+    System.out.println("Learned regression tree model:\n" + regressionModel);
+
+    sc.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTrees.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTrees.java
new file mode 100644
index 0000000000000..1af2067b2b929
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTrees.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import scala.Tuple2;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.GradientBoosting;
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy;
+import org.apache.spark.mllib.tree.model.WeightedEnsembleModel;
+import org.apache.spark.mllib.util.MLUtils;
+
+/**
+ * Classification and regression using gradient-boosted decision trees.
+ */
+public final class JavaGradientBoostedTrees {
+
+  private static void usage() {
+    System.err.println("Usage: JavaGradientBoostedTrees <libsvm format data file>" +
+        " <Classification/Regression>");
+    System.exit(-1);
+  }
+
+  public static void main(String[] args) {
+    String datapath = "data/mllib/sample_libsvm_data.txt";
+    String algo = "Classification";
+    if (args.length >= 1) {
+      datapath = args[0];
+    }
+    if (args.length >= 2) {
+      algo = args[1];
+    }
+    if (args.length > 2) {
+      usage();
+    }
+    SparkConf sparkConf = new SparkConf().setAppName("JavaGradientBoostedTrees");
+    JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD().cache();
+
+    // Set parameters.
+    //  Note: All features are treated as continuous.
+    BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams(algo);
+    boostingStrategy.setNumIterations(10);
+    boostingStrategy.weakLearnerParams().setMaxDepth(5);
+
+    if (algo.equals("Classification")) {
+      // Compute the number of classes from the data.
+      Integer numClasses = data.map(new Function<LabeledPoint, Double>() {
+        @Override public Double call(LabeledPoint p) {
+          return p.label();
+        }
+      }).countByValue().size();
+      boostingStrategy.setNumClassesForClassification(numClasses); // ignored for Regression
+
+      // Train a GradientBoosting model for classification.
+      final WeightedEnsembleModel model = GradientBoosting.trainClassifier(data, boostingStrategy);
+
+      // Evaluate model on training instances and compute training error
+      JavaPairRDD<Double, Double> predictionAndLabel =
+          data.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+            @Override public Tuple2<Double, Double> call(LabeledPoint p) {
+              return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+            }
+          });
+      Double trainErr =
+          1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
+            @Override public Boolean call(Tuple2<Double, Double> pl) {
+              return !pl._1().equals(pl._2());
+            }
+          }).count() / data.count();
+      System.out.println("Training error: " + trainErr);
+      System.out.println("Learned classification tree model:\n" + model);
+    } else if (algo.equals("Regression")) {
+      // Train a GradientBoosting model for classification.
+      final WeightedEnsembleModel model = GradientBoosting.trainRegressor(data, boostingStrategy);
+
+      // Evaluate model on training instances and compute training error
+      JavaPairRDD<Double, Double> predictionAndLabel =
+          data.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+            @Override public Tuple2<Double, Double> call(LabeledPoint p) {
+              return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+            }
+          });
+      Double trainMSE =
+          predictionAndLabel.map(new Function<Tuple2<Double, Double>, Double>() {
+            @Override public Double call(Tuple2<Double, Double> pl) {
+              Double diff = pl._1() - pl._2();
+              return diff * diff;
+            }
+          }).reduce(new Function2<Double, Double, Double>() {
+            @Override public Double call(Double a, Double b) {
+              return a + b;
+            }
+          }) / data.count();
+      System.out.println("Training Mean Squared Error: " + trainMSE);
+      System.out.println("Learned regression tree model:\n" + model);
+    } else {
+      usage();
+    }
+
+    sc.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaKMeans.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaKMeans.java
index f796123a25727..e575eedeb465c 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaKMeans.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaKMeans.java
@@ -30,7 +30,7 @@
 import org.apache.spark.mllib.linalg.Vectors;
 
 /**
- * Example using MLLib KMeans from Java.
+ * Example using MLlib KMeans from Java.
  */
 public final class JavaKMeans {
 
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
index 607df3eddd550..01c77bd44337e 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
@@ -61,7 +61,8 @@ public static void main(String[] args) throws Exception {
     // Load a text file and convert each line to a Java Bean.
     JavaRDD<Person> people = ctx.textFile("examples/src/main/resources/people.txt").map(
       new Function<String, Person>() {
-        public Person call(String line) throws Exception {
+        @Override
+        public Person call(String line) {
           String[] parts = line.split(",");
 
           Person person = new Person();
@@ -74,7 +75,7 @@ public Person call(String line) throws Exception {
 
     // Apply a schema to an RDD of Java Beans and register it as a table.
     JavaSchemaRDD schemaPeople = sqlCtx.applySchema(people, Person.class);
-    schemaPeople.registerAsTable("people");
+    schemaPeople.registerTempTable("people");
 
     // SQL can be run over RDDs that have been registered as tables.
     JavaSchemaRDD teenagers = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
@@ -82,6 +83,7 @@ public Person call(String line) throws Exception {
     // The results of SQL queries are SchemaRDDs and support all the normal RDD operations.
     // The columns of a row in the result can be accessed by ordinal.
     List<String> teenagerNames = teenagers.map(new Function<Row, String>() {
+      @Override
       public String call(Row row) {
         return "Name: " + row.getString(0);
       }
@@ -100,10 +102,11 @@ public String call(Row row) {
     JavaSchemaRDD parquetFile = sqlCtx.parquetFile("people.parquet");
 
     //Parquet files can also be registered as tables and then used in SQL statements.
-    parquetFile.registerAsTable("parquetFile");
+    parquetFile.registerTempTable("parquetFile");
     JavaSchemaRDD teenagers2 =
       sqlCtx.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
     teenagerNames = teenagers2.map(new Function<Row, String>() {
+      @Override
       public String call(Row row) {
           return "Name: " + row.getString(0);
       }
@@ -128,7 +131,7 @@ public String call(Row row) {
     //  |-- name: StringType
 
     // Register this JavaSchemaRDD as a table.
-    peopleFromJsonFile.registerAsTable("people");
+    peopleFromJsonFile.registerTempTable("people");
 
     // SQL statements can be run by using the sql methods provided by sqlCtx.
     JavaSchemaRDD teenagers3 = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
@@ -136,6 +139,7 @@ public String call(Row row) {
     // The results of SQL queries are JavaSchemaRDDs and support all the normal RDD operations.
     // The columns of a row in the result can be accessed by ordinal.
     teenagerNames = teenagers3.map(new Function<Row, String>() {
+      @Override
       public String call(Row row) { return "Name: " + row.getString(0); }
     }).collect();
     for (String name: teenagerNames) {
@@ -158,10 +162,11 @@ public String call(Row row) {
     //  |    |-- state: StringType
     //  |-- name: StringType
 
-    peopleFromJsonRDD.registerAsTable("people2");
+    peopleFromJsonRDD.registerTempTable("people2");
 
     JavaSchemaRDD peopleWithCity = sqlCtx.sql("SELECT name, address.city FROM people2");
     List<String> nameAndCity = peopleWithCity.map(new Function<Row, String>() {
+      @Override
       public String call(Row row) {
         return "Name: " + row.getString(0) + ", City: " + row.getString(1);
       }
@@ -169,5 +174,7 @@ public String call(Row row) {
     for (String name: nameAndCity) {
       System.out.println(name);
     }
+
+    ctx.stop();
   }
 }
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
index 5622df5ce03ff..99df259b4e8e6 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
@@ -57,7 +57,7 @@ public class JavaCustomReceiver extends Receiver<String> {
 
   public static void main(String[] args) {
     if (args.length < 2) {
-      System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
+      System.err.println("Usage: JavaCustomReceiver <hostname> <port>");
       System.exit(1);
     }
 
@@ -70,7 +70,7 @@ public static void main(String[] args) {
     // Create a input stream with the custom receiver on target ip:port and count the
     // words in input stream of \n delimited text (eg. generated by 'nc')
     JavaReceiverInputDStream<String> lines = ssc.receiverStream(
-      new JavaCustomReceiver(args[1], Integer.parseInt(args[2])));
+      new JavaCustomReceiver(args[0], Integer.parseInt(args[1])));
     JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
       @Override
       public Iterable<String> call(String x) {
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java
index 45bcedebb4117..3e9f0f4b8f127 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java
@@ -25,7 +25,7 @@
 import org.apache.spark.api.java.function.Function2;
 import org.apache.spark.api.java.function.PairFunction;
 import org.apache.spark.api.java.StorageLevels;
-import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.Durations;
 import org.apache.spark.streaming.api.java.JavaDStream;
 import org.apache.spark.streaming.api.java.JavaPairDStream;
 import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
@@ -35,8 +35,9 @@
 
 /**
  * Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
+ *
  * Usage: JavaNetworkWordCount <hostname> <port>
- *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+ * <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
  *
  * To run this on your local machine, you need to first run a Netcat server
  *    `$ nc -lk 9999`
@@ -56,7 +57,7 @@ public static void main(String[] args) {
 
     // Create the context with a 1 second batch size
     SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount");
-    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf,  new Duration(1000));
+    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
 
     // Create a JavaReceiverInputDStream on target ip:port and count the
     // words in input stream of \n delimited text (eg. generated by 'nc')
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
new file mode 100644
index 0000000000000..bceda97f058ea
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.streaming.Durations;
+import org.apache.spark.streaming.Time;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.streaming.api.java.JavaStreamingContextFactory;
+
+/**
+ * Counts words in text encoded with UTF8 received from the network every second.
+ *
+ * Usage: JavaRecoverableNetworkWordCount <hostname> <port> <checkpoint-directory> <output-file>
+ *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
+ *   data. <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
+ *   <output-file> file to which the word counts will be appended
+ *
+ * <checkpoint-directory> and <output-file> must be absolute paths
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *
+ *      `$ nc -lk 9999`
+ *
+ * and run the example as
+ *
+ *      `$ ./bin/run-example org.apache.spark.examples.streaming.JavaRecoverableNetworkWordCount \
+ *              localhost 9999 ~/checkpoint/ ~/out`
+ *
+ * If the directory ~/checkpoint/ does not exist (e.g. running for the first time), it will create
+ * a new StreamingContext (will print "Creating new context" to the console). Otherwise, if
+ * checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from
+ * the checkpoint data.
+ *
+ * Refer to the online documentation for more details.
+ */
+public final class JavaRecoverableNetworkWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  private static JavaStreamingContext createContext(String ip,
+                                                    int port,
+                                                    String checkpointDirectory,
+                                                    String outputPath) {
+
+    // If you do not see this printed, that means the StreamingContext has been loaded
+    // from the new checkpoint
+    System.out.println("Creating new context");
+    final File outputFile = new File(outputPath);
+    if (outputFile.exists()) {
+      outputFile.delete();
+    }
+    SparkConf sparkConf = new SparkConf().setAppName("JavaRecoverableNetworkWordCount");
+    // Create the context with a 1 second batch size
+    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
+    ssc.checkpoint(checkpointDirectory);
+
+    // Create a socket stream on target ip:port and count the
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(ip, port);
+    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
+      @Override
+      public Iterable<String> call(String x) {
+        return Lists.newArrayList(SPACE.split(x));
+      }
+    });
+    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
+      new PairFunction<String, String, Integer>() {
+        @Override
+        public Tuple2<String, Integer> call(String s) {
+          return new Tuple2<String, Integer>(s, 1);
+        }
+      }).reduceByKey(new Function2<Integer, Integer, Integer>() {
+        @Override
+        public Integer call(Integer i1, Integer i2) {
+          return i1 + i2;
+        }
+      });
+
+    wordCounts.foreachRDD(new Function2<JavaPairRDD<String, Integer>, Time, Void>() {
+      @Override
+      public Void call(JavaPairRDD<String, Integer> rdd, Time time) throws IOException {
+        String counts = "Counts at time " + time + " " + rdd.collect();
+        System.out.println(counts);
+        System.out.println("Appending to " + outputFile.getAbsolutePath());
+        Files.append(counts + "\n", outputFile, Charset.defaultCharset());
+        return null;
+      }
+    });
+
+    return ssc;
+  }
+
+  public static void main(String[] args) {
+    if (args.length != 4) {
+      System.err.println("You arguments were " + Arrays.asList(args));
+      System.err.println(
+          "Usage: JavaRecoverableNetworkWordCount <hostname> <port> <checkpoint-directory>\n" +
+          "     <output-file>. <hostname> and <port> describe the TCP server that Spark\n" +
+          "     Streaming would connect to receive data. <checkpoint-directory> directory to\n" +
+          "     HDFS-compatible file system which checkpoint data <output-file> file to which\n" +
+          "     the word counts will be appended\n" +
+          "\n" +
+          "In local mode, <master> should be 'local[n]' with n > 1\n" +
+          "Both <checkpoint-directory> and <output-file> must be absolute paths");
+      System.exit(1);
+    }
+
+    final String ip = args[0];
+    final int port = Integer.parseInt(args[1]);
+    final String checkpointDirectory = args[2];
+    final String outputPath = args[3];
+    JavaStreamingContextFactory factory = new JavaStreamingContextFactory() {
+      @Override
+      public JavaStreamingContext create() {
+        return createContext(ip, port, checkpointDirectory, outputPath);
+      }
+    };
+    JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(checkpointDirectory, factory);
+    ssc.start();
+    ssc.awaitTermination();
+  }
+}
diff --git a/examples/src/main/python/als.py b/examples/src/main/python/als.py
index 1a7c4c51f48cd..70b6146e39a87 100755
--- a/examples/src/main/python/als.py
+++ b/examples/src/main/python/als.py
@@ -16,6 +16,9 @@
 #
 
 """
+This is an example implementation of ALS for learning how to use Spark. Please refer to
+ALS in pyspark.mllib.recommendation for more conventional use.
+
 This example requires numpy (http://www.numpy.org/)
 """
 from os.path import realpath
@@ -49,18 +52,24 @@ def update(i, vec, mat, ratings):
 
 
 if __name__ == "__main__":
+
     """
-    Usage: als [M] [U] [F] [iterations] [slices]"
+    Usage: als [M] [U] [F] [iterations] [partitions]"
     """
+
+    print >> sys.stderr, """WARN: This is a naive implementation of ALS and is given as an
+      example. Please use the ALS method found in pyspark.mllib.recommendation for more
+      conventional use."""
+
     sc = SparkContext(appName="PythonALS")
     M = int(sys.argv[1]) if len(sys.argv) > 1 else 100
     U = int(sys.argv[2]) if len(sys.argv) > 2 else 500
     F = int(sys.argv[3]) if len(sys.argv) > 3 else 10
     ITERATIONS = int(sys.argv[4]) if len(sys.argv) > 4 else 5
-    slices = int(sys.argv[5]) if len(sys.argv) > 5 else 2
+    partitions = int(sys.argv[5]) if len(sys.argv) > 5 else 2
 
-    print "Running ALS with M=%d, U=%d, F=%d, iters=%d, slices=%d\n" % \
-        (M, U, F, ITERATIONS, slices)
+    print "Running ALS with M=%d, U=%d, F=%d, iters=%d, partitions=%d\n" % \
+        (M, U, F, ITERATIONS, partitions)
 
     R = matrix(rand(M, F)) * matrix(rand(U, F).T)
     ms = matrix(rand(M, F))
@@ -71,7 +80,7 @@ def update(i, vec, mat, ratings):
     usb = sc.broadcast(us)
 
     for i in range(ITERATIONS):
-        ms = sc.parallelize(range(M), slices) \
+        ms = sc.parallelize(range(M), partitions) \
                .map(lambda x: update(x, msb.value[x, :], usb.value, Rb.value)) \
                .collect()
         # collect() returns a list, so array ends up being
@@ -79,7 +88,7 @@ def update(i, vec, mat, ratings):
         ms = matrix(np.array(ms)[:, :, 0])
         msb = sc.broadcast(ms)
 
-        us = sc.parallelize(range(U), slices) \
+        us = sc.parallelize(range(U), partitions) \
                .map(lambda x: update(x, usb.value[x, :], msb.value, Rb.value.T)) \
                .collect()
         us = matrix(np.array(us)[:, :, 0])
@@ -88,3 +97,5 @@ def update(i, vec, mat, ratings):
         error = rmse(R, ms, us)
         print "Iteration %d:" % i
         print "\nRMSE: %5.4f\n" % error
+
+    sc.stop()
diff --git a/examples/src/main/python/avro_inputformat.py b/examples/src/main/python/avro_inputformat.py
new file mode 100644
index 0000000000000..4626bbb7e3b02
--- /dev/null
+++ b/examples/src/main/python/avro_inputformat.py
@@ -0,0 +1,82 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+from pyspark import SparkContext
+
+"""
+Read data file users.avro in local Spark distro:
+
+$ cd $SPARK_HOME
+$ ./bin/spark-submit --driver-class-path /path/to/example/jar \
+> ./examples/src/main/python/avro_inputformat.py \
+> examples/src/main/resources/users.avro
+{u'favorite_color': None, u'name': u'Alyssa', u'favorite_numbers': [3, 9, 15, 20]}
+{u'favorite_color': u'red', u'name': u'Ben', u'favorite_numbers': []}
+
+To read name and favorite_color fields only, specify the following reader schema:
+
+$ cat examples/src/main/resources/user.avsc
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+     {"name": "name", "type": "string"},
+     {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
+
+$ ./bin/spark-submit --driver-class-path /path/to/example/jar \
+> ./examples/src/main/python/avro_inputformat.py \
+> examples/src/main/resources/users.avro examples/src/main/resources/user.avsc
+{u'favorite_color': None, u'name': u'Alyssa'}
+{u'favorite_color': u'red', u'name': u'Ben'}
+"""
+if __name__ == "__main__":
+    if len(sys.argv) != 2 and len(sys.argv) != 3:
+        print >> sys.stderr, """
+        Usage: avro_inputformat <data_file> [reader_schema_file]
+
+        Run with example jar:
+        ./bin/spark-submit --driver-class-path /path/to/example/jar \
+        /path/to/examples/avro_inputformat.py <data_file> [reader_schema_file]
+        Assumes you have Avro data stored in <data_file>. Reader schema can be optionally specified
+        in [reader_schema_file].
+        """
+        exit(-1)
+
+    path = sys.argv[1]
+    sc = SparkContext(appName="AvroKeyInputFormat")
+
+    conf = None
+    if len(sys.argv) == 3:
+        schema_rdd = sc.textFile(sys.argv[2], 1).collect()
+        conf = {"avro.schema.input.key": reduce(lambda x, y: x + y, schema_rdd)}
+
+    avro_rdd = sc.newAPIHadoopFile(
+        path,
+        "org.apache.avro.mapreduce.AvroKeyInputFormat",
+        "org.apache.avro.mapred.AvroKey",
+        "org.apache.hadoop.io.NullWritable",
+        keyConverter="org.apache.spark.examples.pythonconverters.AvroWrapperToJavaConverter",
+        conf=conf)
+    output = avro_rdd.map(lambda x: x[0]).collect()
+    for k in output:
+        print k
+
+    sc.stop()
diff --git a/examples/src/main/python/cassandra_inputformat.py b/examples/src/main/python/cassandra_inputformat.py
index 39fa6b0d22ef5..05f34b74df45a 100644
--- a/examples/src/main/python/cassandra_inputformat.py
+++ b/examples/src/main/python/cassandra_inputformat.py
@@ -51,7 +51,8 @@
         Usage: cassandra_inputformat <host> <keyspace> <cf>
 
         Run with example jar:
-        ./bin/spark-submit --driver-class-path /path/to/example/jar /path/to/examples/cassandra_inputformat.py <host> <keyspace> <cf>
+        ./bin/spark-submit --driver-class-path /path/to/example/jar \
+        /path/to/examples/cassandra_inputformat.py <host> <keyspace> <cf>
         Assumes you have some data in Cassandra already, running on <host>, in <keyspace> and <cf>
         """
         exit(-1)
@@ -61,12 +62,12 @@
     cf = sys.argv[3]
     sc = SparkContext(appName="CassandraInputFormat")
 
-    conf = {"cassandra.input.thrift.address":host,
-            "cassandra.input.thrift.port":"9160",
-            "cassandra.input.keyspace":keyspace,
-            "cassandra.input.columnfamily":cf,
-            "cassandra.input.partitioner.class":"Murmur3Partitioner",
-            "cassandra.input.page.row.size":"3"}
+    conf = {"cassandra.input.thrift.address": host,
+            "cassandra.input.thrift.port": "9160",
+            "cassandra.input.keyspace": keyspace,
+            "cassandra.input.columnfamily": cf,
+            "cassandra.input.partitioner.class": "Murmur3Partitioner",
+            "cassandra.input.page.row.size": "3"}
     cass_rdd = sc.newAPIHadoopRDD(
         "org.apache.cassandra.hadoop.cql3.CqlPagingInputFormat",
         "java.util.Map",
@@ -77,3 +78,5 @@
     output = cass_rdd.collect()
     for (k, v) in output:
         print (k, v)
+
+    sc.stop()
diff --git a/examples/src/main/python/cassandra_outputformat.py b/examples/src/main/python/cassandra_outputformat.py
new file mode 100644
index 0000000000000..d144539e58b8f
--- /dev/null
+++ b/examples/src/main/python/cassandra_outputformat.py
@@ -0,0 +1,86 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+from pyspark import SparkContext
+
+"""
+Create data in Cassandra fist
+(following: https://wiki.apache.org/cassandra/GettingStarted)
+
+cqlsh> CREATE KEYSPACE test
+   ... WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };
+cqlsh> use test;
+cqlsh:test> CREATE TABLE users (
+        ...   user_id int PRIMARY KEY,
+        ...   fname text,
+        ...   lname text
+        ... );
+
+> cassandra_outputformat <host> test users 1745 john smith
+> cassandra_outputformat <host> test users 1744 john doe
+> cassandra_outputformat <host> test users 1746 john smith
+
+cqlsh:test> SELECT * FROM users;
+
+ user_id | fname | lname
+---------+-------+-------
+    1745 |  john | smith
+    1744 |  john |   doe
+    1746 |  john | smith
+"""
+if __name__ == "__main__":
+    if len(sys.argv) != 7:
+        print >> sys.stderr, """
+        Usage: cassandra_outputformat <host> <keyspace> <cf> <user_id> <fname> <lname>
+
+        Run with example jar:
+        ./bin/spark-submit --driver-class-path /path/to/example/jar \
+        /path/to/examples/cassandra_outputformat.py <args>
+        Assumes you have created the following table <cf> in Cassandra already,
+        running on <host>, in <keyspace>.
+
+        cqlsh:<keyspace>> CREATE TABLE <cf> (
+           ...   user_id int PRIMARY KEY,
+           ...   fname text,
+           ...   lname text
+           ... );
+        """
+        exit(-1)
+
+    host = sys.argv[1]
+    keyspace = sys.argv[2]
+    cf = sys.argv[3]
+    sc = SparkContext(appName="CassandraOutputFormat")
+
+    conf = {"cassandra.output.thrift.address": host,
+            "cassandra.output.thrift.port": "9160",
+            "cassandra.output.keyspace": keyspace,
+            "cassandra.output.partitioner.class": "Murmur3Partitioner",
+            "cassandra.output.cql": "UPDATE " + keyspace + "." + cf + " SET fname = ?, lname = ?",
+            "mapreduce.output.basename": cf,
+            "mapreduce.outputformat.class": "org.apache.cassandra.hadoop.cql3.CqlOutputFormat",
+            "mapreduce.job.output.key.class": "java.util.Map",
+            "mapreduce.job.output.value.class": "java.util.List"}
+    key = {"user_id": int(sys.argv[4])}
+    sc.parallelize([(key, sys.argv[5:])]).saveAsNewAPIHadoopDataset(
+        conf=conf,
+        keyConverter="org.apache.spark.examples.pythonconverters.ToCassandraCQLKeyConverter",
+        valueConverter="org.apache.spark.examples.pythonconverters.ToCassandraCQLValueConverter")
+
+    sc.stop()
diff --git a/examples/src/main/python/hbase_inputformat.py b/examples/src/main/python/hbase_inputformat.py
index 3289d9880a0f5..3b16010f1cb97 100644
--- a/examples/src/main/python/hbase_inputformat.py
+++ b/examples/src/main/python/hbase_inputformat.py
@@ -51,7 +51,8 @@
         Usage: hbase_inputformat <host> <table>
 
         Run with example jar:
-        ./bin/spark-submit --driver-class-path /path/to/example/jar /path/to/examples/hbase_inputformat.py <host> <table>
+        ./bin/spark-submit --driver-class-path /path/to/example/jar \
+        /path/to/examples/hbase_inputformat.py <host> <table>
         Assumes you have some data in HBase already, running on <host>, in <table>
         """
         exit(-1)
@@ -61,12 +62,18 @@
     sc = SparkContext(appName="HBaseInputFormat")
 
     conf = {"hbase.zookeeper.quorum": host, "hbase.mapreduce.inputtable": table}
+    keyConv = "org.apache.spark.examples.pythonconverters.ImmutableBytesWritableToStringConverter"
+    valueConv = "org.apache.spark.examples.pythonconverters.HBaseResultToStringConverter"
+
     hbase_rdd = sc.newAPIHadoopRDD(
         "org.apache.hadoop.hbase.mapreduce.TableInputFormat",
         "org.apache.hadoop.hbase.io.ImmutableBytesWritable",
         "org.apache.hadoop.hbase.client.Result",
-        valueConverter="org.apache.spark.examples.pythonconverters.HBaseConverter",
+        keyConverter=keyConv,
+        valueConverter=valueConv,
         conf=conf)
     output = hbase_rdd.collect()
     for (k, v) in output:
         print (k, v)
+
+    sc.stop()
diff --git a/examples/src/main/python/hbase_outputformat.py b/examples/src/main/python/hbase_outputformat.py
new file mode 100644
index 0000000000000..abb425b1f886a
--- /dev/null
+++ b/examples/src/main/python/hbase_outputformat.py
@@ -0,0 +1,71 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+from pyspark import SparkContext
+
+"""
+Create test table in HBase first:
+
+hbase(main):001:0> create 'test', 'f1'
+0 row(s) in 0.7840 seconds
+
+> hbase_outputformat <host> test row1 f1 q1 value1
+> hbase_outputformat <host> test row2 f1 q1 value2
+> hbase_outputformat <host> test row3 f1 q1 value3
+> hbase_outputformat <host> test row4 f1 q1 value4
+
+hbase(main):002:0> scan 'test'
+ROW                   COLUMN+CELL
+ row1                 column=f1:q1, timestamp=1405659615726, value=value1
+ row2                 column=f1:q1, timestamp=1405659626803, value=value2
+ row3                 column=f1:q1, timestamp=1405659640106, value=value3
+ row4                 column=f1:q1, timestamp=1405659650292, value=value4
+4 row(s) in 0.0780 seconds
+"""
+if __name__ == "__main__":
+    if len(sys.argv) != 7:
+        print >> sys.stderr, """
+        Usage: hbase_outputformat <host> <table> <row> <family> <qualifier> <value>
+
+        Run with example jar:
+        ./bin/spark-submit --driver-class-path /path/to/example/jar \
+        /path/to/examples/hbase_outputformat.py <args>
+        Assumes you have created <table> with column family <family> in HBase
+        running on <host> already
+        """
+        exit(-1)
+
+    host = sys.argv[1]
+    table = sys.argv[2]
+    sc = SparkContext(appName="HBaseOutputFormat")
+
+    conf = {"hbase.zookeeper.quorum": host,
+            "hbase.mapred.outputtable": table,
+            "mapreduce.outputformat.class": "org.apache.hadoop.hbase.mapreduce.TableOutputFormat",
+            "mapreduce.job.output.key.class": "org.apache.hadoop.hbase.io.ImmutableBytesWritable",
+            "mapreduce.job.output.value.class": "org.apache.hadoop.io.Writable"}
+    keyConv = "org.apache.spark.examples.pythonconverters.StringToImmutableBytesWritableConverter"
+    valueConv = "org.apache.spark.examples.pythonconverters.StringListToPutConverter"
+
+    sc.parallelize([sys.argv[3:]]).map(lambda x: (x[0], x)).saveAsNewAPIHadoopDataset(
+        conf=conf,
+        keyConverter=keyConv,
+        valueConverter=valueConv)
+
+    sc.stop()
diff --git a/examples/src/main/python/kmeans.py b/examples/src/main/python/kmeans.py
index 988fc45baf3bc..86ef6f32c84e8 100755
--- a/examples/src/main/python/kmeans.py
+++ b/examples/src/main/python/kmeans.py
@@ -45,9 +45,15 @@ def closestPoint(p, centers):
 
 
 if __name__ == "__main__":
+
     if len(sys.argv) != 4:
         print >> sys.stderr, "Usage: kmeans <file> <k> <convergeDist>"
         exit(-1)
+
+    print >> sys.stderr, """WARN: This is a naive implementation of KMeans Clustering and is given
+       as an example! Please refer to examples/src/main/python/mllib/kmeans.py for an example on
+       how to use MLlib's KMeans implementation."""
+
     sc = SparkContext(appName="PythonKMeans")
     lines = sc.textFile(sys.argv[1])
     data = lines.map(parseVector).cache()
@@ -71,3 +77,5 @@ def closestPoint(p, centers):
             kPoints[x] = y
 
     print "Final centers: " + str(kPoints)
+
+    sc.stop()
diff --git a/examples/src/main/python/logistic_regression.py b/examples/src/main/python/logistic_regression.py
index 6c33deabfd6ea..3aa56b0528168 100755
--- a/examples/src/main/python/logistic_regression.py
+++ b/examples/src/main/python/logistic_regression.py
@@ -47,9 +47,15 @@ def readPointBatch(iterator):
     return [matrix]
 
 if __name__ == "__main__":
+
     if len(sys.argv) != 3:
         print >> sys.stderr, "Usage: logistic_regression <file> <iterations>"
         exit(-1)
+
+    print >> sys.stderr,  """WARN: This is a naive implementation of Logistic Regression and is
+      given as an example! Please refer to examples/src/main/python/mllib/logistic_regression.py
+      to see how MLlib's implementation is used."""
+
     sc = SparkContext(appName="PythonLR")
     points = sc.textFile(sys.argv[1]).mapPartitions(readPointBatch).cache()
     iterations = int(sys.argv[2])
@@ -74,3 +80,5 @@ def add(x, y):
         w -= points.map(lambda m: gradient(m, w)).reduce(add)
 
     print "Final w: " + str(w)
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/correlations.py b/examples/src/main/python/mllib/correlations.py
new file mode 100755
index 0000000000000..4218eca822a99
--- /dev/null
+++ b/examples/src/main/python/mllib/correlations.py
@@ -0,0 +1,60 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Correlations using MLlib.
+"""
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.stat import Statistics
+from pyspark.mllib.util import MLUtils
+
+
+if __name__ == "__main__":
+    if len(sys.argv) not in [1, 2]:
+        print >> sys.stderr, "Usage: correlations (<file>)"
+        exit(-1)
+    sc = SparkContext(appName="PythonCorrelations")
+    if len(sys.argv) == 2:
+        filepath = sys.argv[1]
+    else:
+        filepath = 'data/mllib/sample_linear_regression_data.txt'
+    corrType = 'pearson'
+
+    points = MLUtils.loadLibSVMFile(sc, filepath)\
+        .map(lambda lp: LabeledPoint(lp.label, lp.features.toArray()))
+
+    print
+    print 'Summary of data file: ' + filepath
+    print '%d data points' % points.count()
+
+    # Statistics (correlations)
+    print
+    print 'Correlation (%s) between label and each feature' % corrType
+    print 'Feature\tCorrelation'
+    numFeatures = points.take(1)[0].features.size
+    labelRDD = points.map(lambda lp: lp.label)
+    for i in range(numFeatures):
+        featureRDD = points.map(lambda lp: lp.features[i])
+        corr = Statistics.corr(labelRDD, featureRDD, corrType)
+        print '%d\t%g' % (i, corr)
+    print
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/dataset_example.py b/examples/src/main/python/mllib/dataset_example.py
new file mode 100644
index 0000000000000..540dae785f6ea
--- /dev/null
+++ b/examples/src/main/python/mllib/dataset_example.py
@@ -0,0 +1,62 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example of how to use SchemaRDD as a dataset for ML. Run with::
+    bin/spark-submit examples/src/main/python/mllib/dataset_example.py
+"""
+
+import os
+import sys
+import tempfile
+import shutil
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+from pyspark.mllib.util import MLUtils
+from pyspark.mllib.stat import Statistics
+
+
+def summarize(dataset):
+    print "schema: %s" % dataset.schema().json()
+    labels = dataset.map(lambda r: r.label)
+    print "label average: %f" % labels.mean()
+    features = dataset.map(lambda r: r.features)
+    summary = Statistics.colStats(features)
+    print "features average: %r" % summary.mean()
+
+if __name__ == "__main__":
+    if len(sys.argv) > 2:
+        print >> sys.stderr, "Usage: dataset_example.py <libsvm file>"
+        exit(-1)
+    sc = SparkContext(appName="DatasetExample")
+    sqlCtx = SQLContext(sc)
+    if len(sys.argv) == 2:
+        input = sys.argv[1]
+    else:
+        input = "data/mllib/sample_libsvm_data.txt"
+    points = MLUtils.loadLibSVMFile(sc, input)
+    dataset0 = sqlCtx.inferSchema(points).setName("dataset0").cache()
+    summarize(dataset0)
+    tempdir = tempfile.NamedTemporaryFile(delete=False).name
+    os.unlink(tempdir)
+    print "Save dataset as a Parquet file to %s." % tempdir
+    dataset0.saveAsParquetFile(tempdir)
+    print "Load it back and summarize it again."
+    dataset1 = sqlCtx.parquetFile(tempdir).setName("dataset1").cache()
+    summarize(dataset1)
+    shutil.rmtree(tempdir)
diff --git a/examples/src/main/python/mllib/decision_tree_runner.py b/examples/src/main/python/mllib/decision_tree_runner.py
new file mode 100755
index 0000000000000..61ea4e06ecf3a
--- /dev/null
+++ b/examples/src/main/python/mllib/decision_tree_runner.py
@@ -0,0 +1,142 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Decision tree classification and regression using MLlib.
+
+This example requires NumPy (http://www.numpy.org/).
+"""
+
+import numpy
+import os
+import sys
+
+from operator import add
+
+from pyspark import SparkContext
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.tree import DecisionTree
+from pyspark.mllib.util import MLUtils
+
+
+def getAccuracy(dtModel, data):
+    """
+    Return accuracy of DecisionTreeModel on the given RDD[LabeledPoint].
+    """
+    seqOp = (lambda acc, x: acc + (x[0] == x[1]))
+    predictions = dtModel.predict(data.map(lambda x: x.features))
+    truth = data.map(lambda p: p.label)
+    trainCorrect = predictions.zip(truth).aggregate(0, seqOp, add)
+    if data.count() == 0:
+        return 0
+    return trainCorrect / (0.0 + data.count())
+
+
+def getMSE(dtModel, data):
+    """
+    Return mean squared error (MSE) of DecisionTreeModel on the given
+    RDD[LabeledPoint].
+    """
+    seqOp = (lambda acc, x: acc + numpy.square(x[0] - x[1]))
+    predictions = dtModel.predict(data.map(lambda x: x.features))
+    truth = data.map(lambda p: p.label)
+    trainMSE = predictions.zip(truth).aggregate(0, seqOp, add)
+    if data.count() == 0:
+        return 0
+    return trainMSE / (0.0 + data.count())
+
+
+def reindexClassLabels(data):
+    """
+    Re-index class labels in a dataset to the range {0,...,numClasses-1}.
+    If all labels in that range already appear at least once,
+     then the returned RDD is the same one (without a mapping).
+    Note: If a label simply does not appear in the data,
+          the index will not include it.
+          Be aware of this when reindexing subsampled data.
+    :param data: RDD of LabeledPoint where labels are integer values
+                 denoting labels for a classification problem.
+    :return: Pair (reindexedData, origToNewLabels) where
+             reindexedData is an RDD of LabeledPoint with labels in
+              the range {0,...,numClasses-1}, and
+             origToNewLabels is a dictionary mapping original labels
+              to new labels.
+    """
+    # classCounts: class --> # examples in class
+    classCounts = data.map(lambda x: x.label).countByValue()
+    numExamples = sum(classCounts.values())
+    sortedClasses = sorted(classCounts.keys())
+    numClasses = len(classCounts)
+    # origToNewLabels: class --> index in 0,...,numClasses-1
+    if (numClasses < 2):
+        print >> sys.stderr, \
+            "Dataset for classification should have at least 2 classes." + \
+            " The given dataset had only %d classes." % numClasses
+        exit(1)
+    origToNewLabels = dict([(sortedClasses[i], i) for i in range(0, numClasses)])
+
+    print "numClasses = %d" % numClasses
+    print "Per-class example fractions, counts:"
+    print "Class\tFrac\tCount"
+    for c in sortedClasses:
+        frac = classCounts[c] / (numExamples + 0.0)
+        print "%g\t%g\t%d" % (c, frac, classCounts[c])
+
+    if (sortedClasses[0] == 0 and sortedClasses[-1] == numClasses - 1):
+        return (data, origToNewLabels)
+    else:
+        reindexedData = \
+            data.map(lambda x: LabeledPoint(origToNewLabels[x.label], x.features))
+        return (reindexedData, origToNewLabels)
+
+
+def usage():
+    print >> sys.stderr, \
+        "Usage: decision_tree_runner [libsvm format data filepath]\n" + \
+        " Note: This only supports binary classification."
+    exit(1)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 2:
+        usage()
+    sc = SparkContext(appName="PythonDT")
+
+    # Load data.
+    dataPath = 'data/mllib/sample_libsvm_data.txt'
+    if len(sys.argv) == 2:
+        dataPath = sys.argv[1]
+    if not os.path.isfile(dataPath):
+        sc.stop()
+        usage()
+    points = MLUtils.loadLibSVMFile(sc, dataPath)
+
+    # Re-index class labels if needed.
+    (reindexedData, origToNewLabels) = reindexClassLabels(points)
+
+    # Train a classifier.
+    categoricalFeaturesInfo = {}  # no categorical features
+    model = DecisionTree.trainClassifier(reindexedData, numClasses=2,
+                                         categoricalFeaturesInfo=categoricalFeaturesInfo)
+    # Print learned tree and stats.
+    print "Trained DecisionTree for classification:"
+    print "  Model numNodes: %d\n" % model.numNodes()
+    print "  Model depth: %d\n" % model.depth()
+    print "  Training accuracy: %g\n" % getAccuracy(model, reindexedData)
+    print model
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/kmeans.py b/examples/src/main/python/mllib/kmeans.py
index b308132c9aeeb..2eeb1abeeb12b 100755
--- a/examples/src/main/python/mllib/kmeans.py
+++ b/examples/src/main/python/mllib/kmeans.py
@@ -42,3 +42,4 @@ def parseVector(line):
     k = int(sys.argv[2])
     model = KMeans.train(data, k)
     print "Final centers: " + str(model.clusterCenters)
+    sc.stop()
diff --git a/examples/src/main/python/mllib/logistic_regression.py b/examples/src/main/python/mllib/logistic_regression.py
index 6e0f7a4ee5a81..8cae27fc4a52d 100755
--- a/examples/src/main/python/mllib/logistic_regression.py
+++ b/examples/src/main/python/mllib/logistic_regression.py
@@ -30,8 +30,10 @@
 from pyspark.mllib.classification import LogisticRegressionWithSGD
 
 
-# Parse a line of text into an MLlib LabeledPoint object
 def parsePoint(line):
+    """
+    Parse a line of text into an MLlib LabeledPoint object.
+    """
     values = [float(s) for s in line.split(' ')]
     if values[0] == -1:   # Convert -1 labels to 0 for MLlib
         values[0] = 0
@@ -48,3 +50,4 @@ def parsePoint(line):
     model = LogisticRegressionWithSGD.train(points, iterations)
     print "Final weights: " + str(model.weights)
     print "Final intercept: " + str(model.intercept)
+    sc.stop()
diff --git a/examples/src/main/python/mllib/random_rdd_generation.py b/examples/src/main/python/mllib/random_rdd_generation.py
new file mode 100755
index 0000000000000..1e8892741e714
--- /dev/null
+++ b/examples/src/main/python/mllib/random_rdd_generation.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Randomly generated RDDs.
+"""
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.mllib.random import RandomRDDs
+
+
+if __name__ == "__main__":
+    if len(sys.argv) not in [1, 2]:
+        print >> sys.stderr, "Usage: random_rdd_generation"
+        exit(-1)
+
+    sc = SparkContext(appName="PythonRandomRDDGeneration")
+
+    numExamples = 10000  # number of examples to generate
+    fraction = 0.1  # fraction of data to sample
+
+    # Example: RandomRDDs.normalRDD
+    normalRDD = RandomRDDs.normalRDD(sc, numExamples)
+    print 'Generated RDD of %d examples sampled from the standard normal distribution'\
+        % normalRDD.count()
+    print '  First 5 samples:'
+    for sample in normalRDD.take(5):
+        print '    ' + str(sample)
+    print
+
+    # Example: RandomRDDs.normalVectorRDD
+    normalVectorRDD = RandomRDDs.normalVectorRDD(sc, numRows=numExamples, numCols=2)
+    print 'Generated RDD of %d examples of length-2 vectors.' % normalVectorRDD.count()
+    print '  First 5 samples:'
+    for sample in normalVectorRDD.take(5):
+        print '    ' + str(sample)
+    print
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/sampled_rdds.py b/examples/src/main/python/mllib/sampled_rdds.py
new file mode 100755
index 0000000000000..92af3af5ebd1e
--- /dev/null
+++ b/examples/src/main/python/mllib/sampled_rdds.py
@@ -0,0 +1,86 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Randomly sampled RDDs.
+"""
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.mllib.util import MLUtils
+
+
+if __name__ == "__main__":
+    if len(sys.argv) not in [1, 2]:
+        print >> sys.stderr, "Usage: sampled_rdds <libsvm data file>"
+        exit(-1)
+    if len(sys.argv) == 2:
+        datapath = sys.argv[1]
+    else:
+        datapath = 'data/mllib/sample_binary_classification_data.txt'
+
+    sc = SparkContext(appName="PythonSampledRDDs")
+
+    fraction = 0.1  # fraction of data to sample
+
+    examples = MLUtils.loadLibSVMFile(sc, datapath)
+    numExamples = examples.count()
+    if numExamples == 0:
+        print >> sys.stderr, "Error: Data file had no samples to load."
+        exit(1)
+    print 'Loaded data with %d examples from file: %s' % (numExamples, datapath)
+
+    # Example: RDD.sample() and RDD.takeSample()
+    expectedSampleSize = int(numExamples * fraction)
+    print 'Sampling RDD using fraction %g.  Expected sample size = %d.' \
+        % (fraction, expectedSampleSize)
+    sampledRDD = examples.sample(withReplacement=True, fraction=fraction)
+    print '  RDD.sample(): sample has %d examples' % sampledRDD.count()
+    sampledArray = examples.takeSample(withReplacement=True, num=expectedSampleSize)
+    print '  RDD.takeSample(): sample has %d examples' % len(sampledArray)
+
+    print
+
+    # Example: RDD.sampleByKey()
+    keyedRDD = examples.map(lambda lp: (int(lp.label), lp.features))
+    print '  Keyed data using label (Int) as key ==> Orig'
+    #  Count examples per label in original data.
+    keyCountsA = keyedRDD.countByKey()
+
+    #  Subsample, and count examples per label in sampled data.
+    fractions = {}
+    for k in keyCountsA.keys():
+        fractions[k] = fraction
+    sampledByKeyRDD = keyedRDD.sampleByKey(withReplacement=True, fractions=fractions)
+    keyCountsB = sampledByKeyRDD.countByKey()
+    sizeB = sum(keyCountsB.values())
+    print '  Sampled %d examples using approximate stratified sampling (by label). ==> Sample' \
+        % sizeB
+
+    #  Compare samples
+    print '   \tFractions of examples with key'
+    print 'Key\tOrig\tSample'
+    for k in sorted(keyCountsA.keys()):
+        fracA = keyCountsA[k] / float(numExamples)
+        if sizeB != 0:
+            fracB = keyCountsB.get(k, 0) / float(sizeB)
+        else:
+            fracB = 0
+        print '%d\t%g\t%g' % (k, fracA, fracB)
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/word2vec.py b/examples/src/main/python/mllib/word2vec.py
new file mode 100644
index 0000000000000..99fef4276a369
--- /dev/null
+++ b/examples/src/main/python/mllib/word2vec.py
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This example uses text8 file from http://mattmahoney.net/dc/text8.zip
+# The file was downloadded, unziped and split into multiple lines using
+#
+# wget http://mattmahoney.net/dc/text8.zip
+# unzip text8.zip
+# grep -o -E '\w+(\W+\w+){0,15}' text8 > text8_lines
+# This was done so that the example can be run in local mode
+
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.mllib.feature import Word2Vec
+
+USAGE = ("bin/spark-submit --driver-memory 4g "
+         "examples/src/main/python/mllib/word2vec.py text8_lines")
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print USAGE
+        sys.exit("Argument for file not provided")
+    file_path = sys.argv[1]
+    sc = SparkContext(appName='Word2Vec')
+    inp = sc.textFile(file_path).map(lambda row: row.split(" "))
+
+    word2vec = Word2Vec()
+    model = word2vec.fit(inp)
+
+    synonyms = model.findSynonyms('china', 40)
+
+    for word, cosine_distance in synonyms:
+        print "{}: {}".format(word, cosine_distance)
+    sc.stop()
diff --git a/examples/src/main/python/pagerank.py b/examples/src/main/python/pagerank.py
index 0b96343158d44..a5f25d78c1146 100755
--- a/examples/src/main/python/pagerank.py
+++ b/examples/src/main/python/pagerank.py
@@ -15,6 +15,11 @@
 # limitations under the License.
 #
 
+"""
+This is an example implementation of PageRank. For more conventional use,
+Please refer to PageRank implementation provided by graphx
+"""
+
 import re
 import sys
 from operator import add
@@ -40,6 +45,9 @@ def parseNeighbors(urls):
         print >> sys.stderr, "Usage: pagerank <file> <iterations>"
         exit(-1)
 
+    print >> sys.stderr,  """WARN: This is a naive implementation of PageRank and is
+          given as an example! Please refer to PageRank implementation provided by graphx"""
+
     # Initialize the spark context.
     sc = SparkContext(appName="PythonPageRank")
 
@@ -68,3 +76,5 @@ def parseNeighbors(urls):
     # Collects all URL ranks and dump them to console.
     for (link, rank) in ranks.collect():
         print "%s has rank: %s." % (link, rank)
+
+    sc.stop()
diff --git a/examples/src/main/python/parquet_inputformat.py b/examples/src/main/python/parquet_inputformat.py
new file mode 100644
index 0000000000000..fa4c20ab20281
--- /dev/null
+++ b/examples/src/main/python/parquet_inputformat.py
@@ -0,0 +1,61 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+from pyspark import SparkContext
+
+"""
+Read data file users.parquet in local Spark distro:
+
+$ cd $SPARK_HOME
+$ export AVRO_PARQUET_JARS=/path/to/parquet-avro-1.5.0.jar
+$ ./bin/spark-submit --driver-class-path /path/to/example/jar \\
+        --jars $AVRO_PARQUET_JARS \\
+        ./examples/src/main/python/parquet_inputformat.py \\
+        examples/src/main/resources/users.parquet
+<...lots of log output...>
+{u'favorite_color': None, u'name': u'Alyssa', u'favorite_numbers': [3, 9, 15, 20]}
+{u'favorite_color': u'red', u'name': u'Ben', u'favorite_numbers': []}
+<...more log output...>
+"""
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print >> sys.stderr, """
+        Usage: parquet_inputformat.py <data_file>
+
+        Run with example jar:
+        ./bin/spark-submit --driver-class-path /path/to/example/jar \\
+                /path/to/examples/parquet_inputformat.py <data_file>
+        Assumes you have Parquet data stored in <data_file>.
+        """
+        exit(-1)
+
+    path = sys.argv[1]
+    sc = SparkContext(appName="ParquetInputFormat")
+
+    parquet_rdd = sc.newAPIHadoopFile(
+        path,
+        'parquet.avro.AvroParquetInputFormat',
+        'java.lang.Void',
+        'org.apache.avro.generic.IndexedRecord',
+        valueConverter='org.apache.spark.examples.pythonconverters.IndexedRecordToJavaConverter')
+    output = parquet_rdd.map(lambda x: x[1]).collect()
+    for k in output:
+        print k
+
+    sc.stop()
diff --git a/examples/src/main/python/pi.py b/examples/src/main/python/pi.py
index 21d94a2cd4b64..a7c74e969cdb9 100755
--- a/examples/src/main/python/pi.py
+++ b/examples/src/main/python/pi.py
@@ -24,16 +24,18 @@
 
 if __name__ == "__main__":
     """
-        Usage: pi [slices]
+        Usage: pi [partitions]
     """
     sc = SparkContext(appName="PythonPi")
-    slices = int(sys.argv[1]) if len(sys.argv) > 1 else 2
-    n = 100000 * slices
+    partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
+    n = 100000 * partitions
 
     def f(_):
         x = random() * 2 - 1
         y = random() * 2 - 1
         return 1 if x ** 2 + y ** 2 < 1 else 0
 
-    count = sc.parallelize(xrange(1, n+1), slices).map(f).reduce(add)
+    count = sc.parallelize(xrange(1, n + 1), partitions).map(f).reduce(add)
     print "Pi is roughly %f" % (4.0 * count / n)
+
+    sc.stop()
diff --git a/examples/src/main/python/sort.py b/examples/src/main/python/sort.py
index 41d00c1b79133..bb686f17518a0 100755
--- a/examples/src/main/python/sort.py
+++ b/examples/src/main/python/sort.py
@@ -34,3 +34,5 @@
     output = sortedCount.collect()
     for (num, unitcount) in output:
         print num
+
+    sc.stop()
diff --git a/examples/src/main/python/sql.py b/examples/src/main/python/sql.py
new file mode 100644
index 0000000000000..d2c5ca48c6cb8
--- /dev/null
+++ b/examples/src/main/python/sql.py
@@ -0,0 +1,73 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+from pyspark.sql import Row, StructField, StructType, StringType, IntegerType
+
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonSQL")
+    sqlContext = SQLContext(sc)
+
+    # RDD is created from a list of rows
+    some_rdd = sc.parallelize([Row(name="John", age=19),
+                              Row(name="Smith", age=23),
+                              Row(name="Sarah", age=18)])
+    # Infer schema from the first row, create a SchemaRDD and print the schema
+    some_schemardd = sqlContext.inferSchema(some_rdd)
+    some_schemardd.printSchema()
+
+    # Another RDD is created from a list of tuples
+    another_rdd = sc.parallelize([("John", 19), ("Smith", 23), ("Sarah", 18)])
+    # Schema with two fields - person_name and person_age
+    schema = StructType([StructField("person_name", StringType(), False),
+                        StructField("person_age", IntegerType(), False)])
+    # Create a SchemaRDD by applying the schema to the RDD and print the schema
+    another_schemardd = sqlContext.applySchema(another_rdd, schema)
+    another_schemardd.printSchema()
+    # root
+    #  |-- age: integer (nullable = true)
+    #  |-- name: string (nullable = true)
+
+    # A JSON dataset is pointed to by path.
+    # The path can be either a single text file or a directory storing text files.
+    path = os.path.join(os.environ['SPARK_HOME'], "examples/src/main/resources/people.json")
+    # Create a SchemaRDD from the file(s) pointed to by path
+    people = sqlContext.jsonFile(path)
+    # root
+    #  |-- person_name: string (nullable = false)
+    #  |-- person_age: integer (nullable = false)
+
+    # The inferred schema can be visualized using the printSchema() method.
+    people.printSchema()
+    # root
+    #  |-- age: IntegerType
+    #  |-- name: StringType
+
+    # Register this SchemaRDD as a table.
+    people.registerAsTable("people")
+
+    # SQL statements can be run by using the sql methods provided by sqlContext
+    teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+
+    for each in teenagers.collect():
+        print each[0]
+
+    sc.stop()
diff --git a/examples/src/main/python/streaming/hdfs_wordcount.py b/examples/src/main/python/streaming/hdfs_wordcount.py
new file mode 100644
index 0000000000000..f7ffb5379681e
--- /dev/null
+++ b/examples/src/main/python/streaming/hdfs_wordcount.py
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+ Counts words in new text files created in the given directory
+ Usage: hdfs_wordcount.py <directory>
+   <directory> is the directory that Spark Streaming will use to find and read new text files.
+
+ To run this on your local machine on directory `localdir`, run this example
+    $ bin/spark-submit examples/src/main/python/streaming/hdfs_wordcount.py localdir
+
+ Then create a text file in `localdir` and the words in the file will get counted.
+"""
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print >> sys.stderr, "Usage: hdfs_wordcount.py <directory>"
+        exit(-1)
+
+    sc = SparkContext(appName="PythonStreamingHDFSWordCount")
+    ssc = StreamingContext(sc, 1)
+
+    lines = ssc.textFileStream(sys.argv[1])
+    counts = lines.flatMap(lambda line: line.split(" "))\
+                  .map(lambda x: (x, 1))\
+                  .reduceByKey(lambda a, b: a+b)
+    counts.pprint()
+
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/examples/src/main/python/streaming/network_wordcount.py b/examples/src/main/python/streaming/network_wordcount.py
new file mode 100644
index 0000000000000..cfa9c1ff5bfbc
--- /dev/null
+++ b/examples/src/main/python/streaming/network_wordcount.py
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+ Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
+ Usage: network_wordcount.py <hostname> <port>
+   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+
+ To run this on your local machine, you need to first run a Netcat server
+    `$ nc -lk 9999`
+ and then run the example
+    `$ bin/spark-submit examples/src/main/python/streaming/network_wordcount.py localhost 9999`
+"""
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print >> sys.stderr, "Usage: network_wordcount.py <hostname> <port>"
+        exit(-1)
+    sc = SparkContext(appName="PythonStreamingNetworkWordCount")
+    ssc = StreamingContext(sc, 1)
+
+    lines = ssc.socketTextStream(sys.argv[1], int(sys.argv[2]))
+    counts = lines.flatMap(lambda line: line.split(" "))\
+                  .map(lambda word: (word, 1))\
+                  .reduceByKey(lambda a, b: a+b)
+    counts.pprint()
+
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/examples/src/main/python/streaming/recoverable_network_wordcount.py b/examples/src/main/python/streaming/recoverable_network_wordcount.py
new file mode 100644
index 0000000000000..fc6827c82bf9b
--- /dev/null
+++ b/examples/src/main/python/streaming/recoverable_network_wordcount.py
@@ -0,0 +1,80 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+ Counts words in text encoded with UTF8 received from the network every second.
+
+ Usage: recoverable_network_wordcount.py <hostname> <port> <checkpoint-directory> <output-file>
+   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
+   data. <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
+   <output-file> file to which the word counts will be appended
+
+ To run this on your local machine, you need to first run a Netcat server
+    `$ nc -lk 9999`
+
+ and then run the example
+    `$ bin/spark-submit examples/src/main/python/streaming/recoverable_network_wordcount.py \
+        localhost 9999 ~/checkpoint/ ~/out`
+
+ If the directory ~/checkpoint/ does not exist (e.g. running for the first time), it will create
+ a new StreamingContext (will print "Creating new context" to the console). Otherwise, if
+ checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from
+ the checkpoint data.
+"""
+
+import os
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+
+def createContext(host, port, outputPath):
+    # If you do not see this printed, that means the StreamingContext has been loaded
+    # from the new checkpoint
+    print "Creating new context"
+    if os.path.exists(outputPath):
+        os.remove(outputPath)
+    sc = SparkContext(appName="PythonStreamingRecoverableNetworkWordCount")
+    ssc = StreamingContext(sc, 1)
+
+    # Create a socket stream on target ip:port and count the
+    # words in input stream of \n delimited text (eg. generated by 'nc')
+    lines = ssc.socketTextStream(host, port)
+    words = lines.flatMap(lambda line: line.split(" "))
+    wordCounts = words.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
+
+    def echo(time, rdd):
+        counts = "Counts at time %s %s" % (time, rdd.collect())
+        print counts
+        print "Appending to " + os.path.abspath(outputPath)
+        with open(outputPath, 'a') as f:
+            f.write(counts + "\n")
+
+    wordCounts.foreachRDD(echo)
+    return ssc
+
+if __name__ == "__main__":
+    if len(sys.argv) != 5:
+        print >> sys.stderr, "Usage: recoverable_network_wordcount.py <hostname> <port> "\
+                             "<checkpoint-directory> <output-file>"
+        exit(-1)
+    host, port, checkpoint, output = sys.argv[1:]
+    ssc = StreamingContext.getOrCreate(checkpoint,
+                                       lambda: createContext(host, int(port), output))
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/examples/src/main/python/streaming/stateful_network_wordcount.py b/examples/src/main/python/streaming/stateful_network_wordcount.py
new file mode 100644
index 0000000000000..18a9a5a452ffb
--- /dev/null
+++ b/examples/src/main/python/streaming/stateful_network_wordcount.py
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+ Counts words in UTF8 encoded, '\n' delimited text received from the
+ network every second.
+
+ Usage: stateful_network_wordcount.py <hostname> <port>
+   <hostname> and <port> describe the TCP server that Spark Streaming
+    would connect to receive data.
+
+ To run this on your local machine, you need to first run a Netcat server
+    `$ nc -lk 9999`
+ and then run the example
+    `$ bin/spark-submit examples/src/main/python/streaming/stateful_network_wordcount.py \
+        localhost 9999`
+"""
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print >> sys.stderr, "Usage: stateful_network_wordcount.py <hostname> <port>"
+        exit(-1)
+    sc = SparkContext(appName="PythonStreamingStatefulNetworkWordCount")
+    ssc = StreamingContext(sc, 1)
+    ssc.checkpoint("checkpoint")
+
+    def updateFunc(new_values, last_sum):
+        return sum(new_values) + (last_sum or 0)
+
+    lines = ssc.socketTextStream(sys.argv[1], int(sys.argv[2]))
+    running_counts = lines.flatMap(lambda line: line.split(" "))\
+                          .map(lambda word: (word, 1))\
+                          .updateStateByKey(updateFunc)
+
+    running_counts.pprint()
+
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/examples/src/main/python/transitive_closure.py b/examples/src/main/python/transitive_closure.py
index 8698369b13d84..00a281bfb6506 100755
--- a/examples/src/main/python/transitive_closure.py
+++ b/examples/src/main/python/transitive_closure.py
@@ -37,11 +37,11 @@ def generateGraph():
 
 if __name__ == "__main__":
     """
-    Usage: transitive_closure [slices]
+    Usage: transitive_closure [partitions]
     """
     sc = SparkContext(appName="PythonTransitiveClosure")
-    slices = int(sys.argv[1]) if len(sys.argv) > 1 else 2
-    tc = sc.parallelize(generateGraph(), slices).cache()
+    partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
+    tc = sc.parallelize(generateGraph(), partitions).cache()
 
     # Linear transitive closure: each round grows paths by one edge,
     # by joining the graph's edges with the already-discovered paths.
@@ -64,3 +64,5 @@ def generateGraph():
             break
 
     print "TC has %i edges" % tc.count()
+
+    sc.stop()
diff --git a/examples/src/main/python/wordcount.py b/examples/src/main/python/wordcount.py
index dcc095fdd0ed9..ae6cd13b83d92 100755
--- a/examples/src/main/python/wordcount.py
+++ b/examples/src/main/python/wordcount.py
@@ -33,3 +33,5 @@
     output = counts.collect()
     for (word, count) in output:
         print "%s: %i" % (word, count)
+
+    sc.stop()
diff --git a/examples/src/main/resources/full_user.avsc b/examples/src/main/resources/full_user.avsc
new file mode 100644
index 0000000000000..04e7ba2dca4f6
--- /dev/null
+++ b/examples/src/main/resources/full_user.avsc
@@ -0,0 +1 @@
+{"type": "record", "namespace": "example.avro", "name": "User", "fields": [{"type": "string", "name": "name"}, {"type": ["string", "null"], "name": "favorite_color"}, {"type": {"items": "int", "type": "array"}, "name": "favorite_numbers"}]}
\ No newline at end of file
diff --git a/examples/src/main/resources/user.avsc b/examples/src/main/resources/user.avsc
new file mode 100644
index 0000000000000..4995357ab3736
--- /dev/null
+++ b/examples/src/main/resources/user.avsc
@@ -0,0 +1,8 @@
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+     {"name": "name", "type": "string"},
+     {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
diff --git a/examples/src/main/resources/users.avro b/examples/src/main/resources/users.avro
new file mode 100644
index 0000000000000..27c526ab114b2
Binary files /dev/null and b/examples/src/main/resources/users.avro differ
diff --git a/examples/src/main/resources/users.parquet b/examples/src/main/resources/users.parquet
new file mode 100644
index 0000000000000..aa527338c43a8
Binary files /dev/null and b/examples/src/main/resources/users.parquet differ
diff --git a/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala b/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala
index 71f53af68f4d3..11d5c92c5952d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala
@@ -136,5 +136,7 @@ object CassandraCQLTest {
         classOf[CqlOutputFormat],
         job.getConfiguration()
       )
+
+    sc.stop()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala b/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala
index 91ba364a346a5..ec689474aecb0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala
@@ -126,6 +126,8 @@ object CassandraTest {
       }
     }.saveAsNewAPIHadoopFile("casDemo", classOf[ByteBuffer], classOf[List[Mutation]],
       classOf[ColumnFamilyOutputFormat], job.getConfiguration)
+
+    sc.stop()
   }
 }
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
index efd91bb054981..15f6678648b29 100644
--- a/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
@@ -44,11 +44,11 @@ object GroupByTest {
         arr1(i) = (ranGen.nextInt(Int.MaxValue), byteArr)
       }
       arr1
-    }.cache
+    }.cache()
     // Enforce that everything has been calculated and in cache
-    pairs1.count
+    pairs1.count()
 
-    println(pairs1.groupByKey(numReducers).count)
+    println(pairs1.groupByKey(numReducers).count())
 
     sc.stop()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala b/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala
index 4893b017ed819..822673347bdce 100644
--- a/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala
@@ -31,12 +31,12 @@ object HBaseTest {
     val conf = HBaseConfiguration.create()
     // Other options for configuring scan behavior are available. More information available at
     // http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableInputFormat.html
-    conf.set(TableInputFormat.INPUT_TABLE, args(1))
+    conf.set(TableInputFormat.INPUT_TABLE, args(0))
 
     // Initialize hBase table if necessary
     val admin = new HBaseAdmin(conf)
-    if(!admin.isTableAvailable(args(1))) {
-      val tableDesc = new HTableDescriptor(args(1))
+    if (!admin.isTableAvailable(args(0))) {
+      val tableDesc = new HTableDescriptor(args(0))
       admin.createTable(tableDesc)
     }
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
index 331de3ad1ef53..ed2b38e2ca6f8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
@@ -19,16 +19,22 @@ package org.apache.spark.examples
 
 import org.apache.spark._
 
+
 object HdfsTest {
+
+  /** Usage: HdfsTest [file] */
   def main(args: Array[String]) {
+    if (args.length < 1) {
+      System.err.println("Usage: HdfsTest <file>")
+      System.exit(1)
+    }
     val sparkConf = new SparkConf().setAppName("HdfsTest")
     val sc = new SparkContext(sparkConf)
-    val file = sc.textFile(args(1))
+    val file = sc.textFile(args(0))
     val mapped = file.map(s => s.length).cache()
     for (iter <- 1 to 10) {
       val start = System.currentTimeMillis()
       for (x <- mapped) { x + 2 }
-      //  println("Processing: " + x)
       val end = System.currentTimeMillis()
       println("Iteration " + iter + " took " + (end-start) + " ms")
     }
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
index 658f73d96a86a..3d5259463003d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
@@ -17,97 +17,91 @@
 
 package org.apache.spark.examples
 
-import scala.math.sqrt
-
-import cern.colt.matrix._
-import cern.colt.matrix.linalg._
-import cern.jet.math._
+import org.apache.commons.math3.linear._
 
 /**
  * Alternating least squares matrix factorization.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.recommendation.ALS
  */
 object LocalALS {
+
   // Parameters set through command line arguments
   var M = 0 // Number of movies
   var U = 0 // Number of users
   var F = 0 // Number of features
   var ITERATIONS = 0
-
   val LAMBDA = 0.01 // Regularization coefficient
 
-  // Some COLT objects
-  val factory2D = DoubleFactory2D.dense
-  val factory1D = DoubleFactory1D.dense
-  val algebra = Algebra.DEFAULT
-  val blas = SeqBlas.seqBlas
-
-  def generateR(): DoubleMatrix2D = {
-    val mh = factory2D.random(M, F)
-    val uh = factory2D.random(U, F)
-    algebra.mult(mh, algebra.transpose(uh))
+  def generateR(): RealMatrix = {
+    val mh = randomMatrix(M, F)
+    val uh = randomMatrix(U, F)
+    mh.multiply(uh.transpose())
   }
 
-  def rmse(targetR: DoubleMatrix2D, ms: Array[DoubleMatrix1D],
-    us: Array[DoubleMatrix1D]): Double =
-  {
-    val r = factory2D.make(M, U)
+  def rmse(targetR: RealMatrix, ms: Array[RealVector], us: Array[RealVector]): Double = {
+    val r = new Array2DRowRealMatrix(M, U)
+    for (i <- 0 until M; j <- 0 until U) {
+      r.setEntry(i, j, ms(i).dotProduct(us(j)))
+    }
+    val diffs = r.subtract(targetR)
+    var sumSqs = 0.0
     for (i <- 0 until M; j <- 0 until U) {
-      r.set(i, j, blas.ddot(ms(i), us(j)))
+      val diff = diffs.getEntry(i, j)
+      sumSqs += diff * diff
     }
-    blas.daxpy(-1, targetR, r)
-    val sumSqs = r.aggregate(Functions.plus, Functions.square)
-    sqrt(sumSqs / (M * U))
+    math.sqrt(sumSqs / (M.toDouble * U.toDouble))
   }
 
-  def updateMovie(i: Int, m: DoubleMatrix1D, us: Array[DoubleMatrix1D],
-    R: DoubleMatrix2D) : DoubleMatrix1D =
-  {
-    val XtX = factory2D.make(F, F)
-    val Xty = factory1D.make(F)
+  def updateMovie(i: Int, m: RealVector, us: Array[RealVector], R: RealMatrix) : RealVector = {
+    var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
+    var Xty: RealVector = new ArrayRealVector(F)
     // For each user that rated the movie
     for (j <- 0 until U) {
       val u = us(j)
       // Add u * u^t to XtX
-      blas.dger(1, u, u, XtX)
+      XtX = XtX.add(u.outerProduct(u))
       // Add u * rating to Xty
-      blas.daxpy(R.get(i, j), u, Xty)
+      Xty = Xty.add(u.mapMultiply(R.getEntry(i, j)))
     }
-    // Add regularization coefs to diagonal terms
+    // Add regularization coefficients to diagonal terms
     for (d <- 0 until F) {
-      XtX.set(d, d, XtX.get(d, d) + LAMBDA * U)
+      XtX.addToEntry(d, d, LAMBDA * U)
     }
     // Solve it with Cholesky
-    val ch = new CholeskyDecomposition(XtX)
-    val Xty2D = factory2D.make(Xty.toArray, F)
-    val solved2D = ch.solve(Xty2D)
-    solved2D.viewColumn(0)
+    new CholeskyDecomposition(XtX).getSolver.solve(Xty)
   }
 
-  def updateUser(j: Int, u: DoubleMatrix1D, ms: Array[DoubleMatrix1D],
-    R: DoubleMatrix2D) : DoubleMatrix1D =
-  {
-    val XtX = factory2D.make(F, F)
-    val Xty = factory1D.make(F)
+  def updateUser(j: Int, u: RealVector, ms: Array[RealVector], R: RealMatrix) : RealVector = {
+    var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
+    var Xty: RealVector = new ArrayRealVector(F)
     // For each movie that the user rated
     for (i <- 0 until M) {
       val m = ms(i)
       // Add m * m^t to XtX
-      blas.dger(1, m, m, XtX)
+      XtX = XtX.add(m.outerProduct(m))
       // Add m * rating to Xty
-      blas.daxpy(R.get(i, j), m, Xty)
+      Xty = Xty.add(m.mapMultiply(R.getEntry(i, j)))
     }
-    // Add regularization coefs to diagonal terms
+    // Add regularization coefficients to diagonal terms
     for (d <- 0 until F) {
-      XtX.set(d, d, XtX.get(d, d) + LAMBDA * M)
+      XtX.addToEntry(d, d, LAMBDA * M)
     }
     // Solve it with Cholesky
-    val ch = new CholeskyDecomposition(XtX)
-    val Xty2D = factory2D.make(Xty.toArray, F)
-    val solved2D = ch.solve(Xty2D)
-    solved2D.viewColumn(0)
+    new CholeskyDecomposition(XtX).getSolver.solve(Xty)
+  }
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of ALS and is given as an example!
+        |Please use the ALS method found in org.apache.spark.mllib.recommendation
+        |for more conventional use.
+      """.stripMargin)
   }
 
   def main(args: Array[String]) {
+
     args match {
       case Array(m, u, f, iters) => {
         M = m.toInt
@@ -120,21 +114,31 @@ object LocalALS {
         System.exit(1)
       }
     }
-    printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
+
+    showWarning()
+
+    println(s"Running with M=$M, U=$U, F=$F, iters=$ITERATIONS")
 
     val R = generateR()
 
     // Initialize m and u randomly
-    var ms = Array.fill(M)(factory1D.random(F))
-    var us = Array.fill(U)(factory1D.random(F))
+    var ms = Array.fill(M)(randomVector(F))
+    var us = Array.fill(U)(randomVector(F))
 
     // Iteratively update movies then users
     for (iter <- 1 to ITERATIONS) {
-      println("Iteration " + iter + ":")
+      println(s"Iteration $iter:")
       ms = (0 until M).map(i => updateMovie(i, ms(i), us, R)).toArray
       us = (0 until U).map(j => updateUser(j, us(j), ms, R)).toArray
       println("RMSE = " + rmse(R, ms, us))
       println()
     }
   }
+
+  private def randomVector(n: Int): RealVector =
+    new ArrayRealVector(Array.fill(n)(math.random))
+
+  private def randomMatrix(rows: Int, cols: Int): RealMatrix =
+    new Array2DRowRealMatrix(Array.fill(rows, cols)(math.random))
+
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
index 0ef3001ca4ccd..ac2ea35bbd0e0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -21,6 +21,13 @@ import java.util.Random
 
 import breeze.linalg.{Vector, DenseVector}
 
+/**
+ * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
+ */
 object LocalFileLR {
   val D = 10   // Numer of dimensions
   val rand = new Random(42)
@@ -32,7 +39,19 @@ object LocalFileLR {
     DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
   }
 
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+        |for more conventional use.
+      """.stripMargin)
+  }
+
   def main(args: Array[String]) {
+
+    showWarning()
+
     val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
     val points = lines.map(parsePoint _)
     val ITERATIONS = args(1).toInt
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
index e33a1b336d163..17624c20cff3d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -28,6 +28,9 @@ import org.apache.spark.SparkContext._
 
 /**
  * K-means clustering.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.clustering.KMeans
  */
 object LocalKMeans {
   val N = 1000
@@ -61,7 +64,18 @@ object LocalKMeans {
     bestIndex
   }
 
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
+        |Please use the KMeans method found in org.apache.spark.mllib.clustering
+        |for more conventional use.
+      """.stripMargin)
+  }
+
   def main(args: Array[String]) {
+
+    showWarning()
+
     val data = generateData
     var points = new HashSet[Vector[Double]]
     var kPoints = new HashMap[Int, Vector[Double]]
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
index 385b48089d572..92a683ad57ea1 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
@@ -23,6 +23,10 @@ import breeze.linalg.{Vector, DenseVector}
 
 /**
  * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
  */
 object LocalLR {
   val N = 10000  // Number of data points
@@ -42,9 +46,20 @@ object LocalLR {
     Array.tabulate(N)(generatePoint)
   }
 
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+        |for more conventional use.
+      """.stripMargin)
+  }
+
   def main(args: Array[String]) {
-    val data = generateData
 
+    showWarning()
+
+    val data = generateData
     // Initialize w to a random value
     var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
     println("Initial w: " + w)
diff --git a/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
index 4c655b84fde2e..74620ad007d83 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
@@ -79,5 +79,7 @@ object LogQuery {
       .reduceByKey((a, b) => a.merge(b))
       .collect().foreach{
         case (user, query) => println("%s\t%s".format(user, query))}
+
+    sc.stop()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
index 5cbc966bf06ca..6c0ac8013ce34 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -17,77 +17,76 @@
 
 package org.apache.spark.examples
 
-import scala.math.sqrt
-
-import cern.colt.matrix._
-import cern.colt.matrix.linalg._
-import cern.jet.math._
+import org.apache.commons.math3.linear._
 
 import org.apache.spark._
 
 /**
  * Alternating least squares matrix factorization.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.recommendation.ALS
  */
 object SparkALS {
+
   // Parameters set through command line arguments
   var M = 0 // Number of movies
   var U = 0 // Number of users
   var F = 0 // Number of features
   var ITERATIONS = 0
-
   val LAMBDA = 0.01 // Regularization coefficient
 
-  // Some COLT objects
-  val factory2D = DoubleFactory2D.dense
-  val factory1D = DoubleFactory1D.dense
-  val algebra = Algebra.DEFAULT
-  val blas = SeqBlas.seqBlas
-
-  def generateR(): DoubleMatrix2D = {
-    val mh = factory2D.random(M, F)
-    val uh = factory2D.random(U, F)
-    algebra.mult(mh, algebra.transpose(uh))
+  def generateR(): RealMatrix = {
+    val mh = randomMatrix(M, F)
+    val uh = randomMatrix(U, F)
+    mh.multiply(uh.transpose())
   }
 
-  def rmse(targetR: DoubleMatrix2D, ms: Array[DoubleMatrix1D],
-    us: Array[DoubleMatrix1D]): Double =
-  {
-    val r = factory2D.make(M, U)
+  def rmse(targetR: RealMatrix, ms: Array[RealVector], us: Array[RealVector]): Double = {
+    val r = new Array2DRowRealMatrix(M, U)
+    for (i <- 0 until M; j <- 0 until U) {
+      r.setEntry(i, j, ms(i).dotProduct(us(j)))
+    }
+    val diffs = r.subtract(targetR)
+    var sumSqs = 0.0
     for (i <- 0 until M; j <- 0 until U) {
-      r.set(i, j, blas.ddot(ms(i), us(j)))
+      val diff = diffs.getEntry(i, j)
+      sumSqs += diff * diff
     }
-    blas.daxpy(-1, targetR, r)
-    val sumSqs = r.aggregate(Functions.plus, Functions.square)
-    sqrt(sumSqs / (M * U))
+    math.sqrt(sumSqs / (M.toDouble * U.toDouble))
   }
 
-  def update(i: Int, m: DoubleMatrix1D, us: Array[DoubleMatrix1D],
-    R: DoubleMatrix2D) : DoubleMatrix1D =
-  {
+  def update(i: Int, m: RealVector, us: Array[RealVector], R: RealMatrix) : RealVector = {
     val U = us.size
-    val F = us(0).size
-    val XtX = factory2D.make(F, F)
-    val Xty = factory1D.make(F)
+    val F = us(0).getDimension
+    var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
+    var Xty: RealVector = new ArrayRealVector(F)
     // For each user that rated the movie
     for (j <- 0 until U) {
       val u = us(j)
       // Add u * u^t to XtX
-      blas.dger(1, u, u, XtX)
+      XtX = XtX.add(u.outerProduct(u))
       // Add u * rating to Xty
-      blas.daxpy(R.get(i, j), u, Xty)
+      Xty = Xty.add(u.mapMultiply(R.getEntry(i, j)))
     }
     // Add regularization coefs to diagonal terms
     for (d <- 0 until F) {
-      XtX.set(d, d, XtX.get(d, d) + LAMBDA * U)
+      XtX.addToEntry(d, d, LAMBDA * U)
     }
     // Solve it with Cholesky
-    val ch = new CholeskyDecomposition(XtX)
-    val Xty2D = factory2D.make(Xty.toArray, F)
-    val solved2D = ch.solve(Xty2D)
-    solved2D.viewColumn(0)
+    new CholeskyDecomposition(XtX).getSolver.solve(Xty)
+  }
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of ALS and is given as an example!
+        |Please use the ALS method found in org.apache.spark.mllib.recommendation
+        |for more conventional use.
+      """.stripMargin)
   }
 
   def main(args: Array[String]) {
+
     var slices = 0
 
     val options = (0 to 4).map(i => if (i < args.length) Some(args(i)) else None)
@@ -103,28 +102,32 @@ object SparkALS {
         System.err.println("Usage: SparkALS [M] [U] [F] [iters] [slices]")
         System.exit(1)
     }
-    printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
+
+    showWarning()
+
+    println(s"Running with M=$M, U=$U, F=$F, iters=$ITERATIONS")
+
     val sparkConf = new SparkConf().setAppName("SparkALS")
     val sc = new SparkContext(sparkConf)
 
     val R = generateR()
 
     // Initialize m and u randomly
-    var ms = Array.fill(M)(factory1D.random(F))
-    var us = Array.fill(U)(factory1D.random(F))
+    var ms = Array.fill(M)(randomVector(F))
+    var us = Array.fill(U)(randomVector(F))
 
     // Iteratively update movies then users
     val Rc  = sc.broadcast(R)
     var msb = sc.broadcast(ms)
     var usb = sc.broadcast(us)
     for (iter <- 1 to ITERATIONS) {
-      println("Iteration " + iter + ":")
+      println(s"Iteration $iter:")
       ms = sc.parallelize(0 until M, slices)
                 .map(i => update(i, msb.value(i), usb.value, Rc.value))
                 .collect()
       msb = sc.broadcast(ms) // Re-broadcast ms because it was updated
       us = sc.parallelize(0 until U, slices)
-                .map(i => update(i, usb.value(i), msb.value, algebra.transpose(Rc.value)))
+                .map(i => update(i, usb.value(i), msb.value, Rc.value.transpose()))
                 .collect()
       usb = sc.broadcast(us) // Re-broadcast us because it was updated
       println("RMSE = " + rmse(R, ms, us))
@@ -133,4 +136,11 @@ object SparkALS {
 
     sc.stop()
   }
+
+  private def randomVector(n: Int): RealVector =
+    new ArrayRealVector(Array.fill(n)(math.random))
+
+  private def randomMatrix(rows: Int, cols: Int): RealMatrix =
+    new Array2DRowRealMatrix(Array.fill(rows, cols)(math.random))
+
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
index 4906a696e90a7..9099c2fcc90b3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -22,14 +22,18 @@ import java.util.Random
 import scala.math.exp
 
 import breeze.linalg.{Vector, DenseVector}
+import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark._
-import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.scheduler.InputFormatInfo
 
 
 /**
  * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
  */
 object SparkHdfsLR {
   val D = 10   // Numer of dimensions
@@ -48,15 +52,27 @@ object SparkHdfsLR {
     DataPoint(new DenseVector(x), y)
   }
 
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+        |for more conventional use.
+      """.stripMargin)
+  }
+
   def main(args: Array[String]) {
+
     if (args.length < 2) {
       System.err.println("Usage: SparkHdfsLR <file> <iters>")
       System.exit(1)
     }
 
+    showWarning()
+
     val sparkConf = new SparkConf().setAppName("SparkHdfsLR")
     val inputPath = args(0)
-    val conf = SparkHadoopUtil.get.newConfiguration()
+    val conf = new Configuration()
     val sc = new SparkContext(sparkConf,
       InputFormatInfo.computePreferredLocations(
         Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath))
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
index 4d28e0aad6597..48e8d11cdf95b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.examples
 
-import java.util.Random
-
 import breeze.linalg.{Vector, DenseVector, squaredDistance}
 
 import org.apache.spark.{SparkConf, SparkContext}
@@ -26,17 +24,17 @@ import org.apache.spark.SparkContext._
 
 /**
  * K-means clustering.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.clustering.KMeans
  */
 object SparkKMeans {
-  val R = 1000     // Scaling factor
-  val rand = new Random(42)
 
   def parseVector(line: String): Vector[Double] = {
     DenseVector(line.split(' ').map(_.toDouble))
   }
 
   def closestPoint(p: Vector[Double], centers: Array[Vector[Double]]): Int = {
-    var index = 0
     var bestIndex = 0
     var closest = Double.PositiveInfinity
 
@@ -51,11 +49,23 @@ object SparkKMeans {
     bestIndex
   }
 
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
+        |Please use the KMeans method found in org.apache.spark.mllib.clustering
+        |for more conventional use.
+      """.stripMargin)
+  }
+
   def main(args: Array[String]) {
+
     if (args.length < 3) {
       System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
       System.exit(1)
     }
+
+    showWarning()
+
     val sparkConf = new SparkConf().setAppName("SparkKMeans")
     val sc = new SparkContext(sparkConf)
     val lines = sc.textFile(args(0))
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
index 99ceb3089e9fe..257a7d29f922a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -28,6 +28,10 @@ import org.apache.spark._
 /**
  * Logistic regression based classification.
  * Usage: SparkLR [slices]
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
  */
 object SparkLR {
   val N = 10000  // Number of data points
@@ -47,7 +51,19 @@ object SparkLR {
     Array.tabulate(N)(generatePoint)
   }
 
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+        |for more conventional use.
+      """.stripMargin)
+  }
+
   def main(args: Array[String]) {
+
+    showWarning()
+
     val sparkConf = new SparkConf().setAppName("SparkLR")
     val sc = new SparkContext(sparkConf)
     val numSlices = if (args.length > 0) args(0).toInt else 2
@@ -66,6 +82,7 @@ object SparkLR {
     }
 
     println("Final w: " + w)
+
     sc.stop()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
index 40b36c779afd6..8d092b6506d33 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
@@ -28,11 +28,30 @@ import org.apache.spark.{SparkConf, SparkContext}
  * URL         neighbor URL
  * ...
  * where URL and their neighbors are separated by space(s).
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.graphx.lib.PageRank
  */
 object SparkPageRank {
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of PageRank and is given as an example!
+        |Please use the PageRank implementation found in org.apache.spark.graphx.lib.PageRank
+        |for more conventional use.
+      """.stripMargin)
+  }
+
   def main(args: Array[String]) {
+    if (args.length < 1) {
+      System.err.println("Usage: SparkPageRank <file> <iter>")
+      System.exit(1)
+    }
+
+    showWarning()
+
     val sparkConf = new SparkConf().setAppName("PageRank")
-    var iters = args(1).toInt
+    val iters = if (args.length > 0) args(1).toInt else 10
     val ctx = new SparkContext(sparkConf)
     val lines = ctx.textFile(args(0), 1)
     val links = lines.map{ s =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkTachyonHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkTachyonHdfsLR.scala
index 22127621867e1..4393b99e636b6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkTachyonHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkTachyonHdfsLR.scala
@@ -22,9 +22,9 @@ import java.util.Random
 import scala.math.exp
 
 import breeze.linalg.{Vector, DenseVector}
+import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark._
-import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.scheduler.InputFormatInfo
 import org.apache.spark.storage.StorageLevel
 
@@ -32,11 +32,24 @@ import org.apache.spark.storage.StorageLevel
 /**
  * Logistic regression based classification.
  * This example uses Tachyon to persist rdds during computation.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
  */
 object SparkTachyonHdfsLR {
   val D = 10   // Numer of dimensions
   val rand = new Random(42)
 
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+        |for more conventional use.
+      """.stripMargin)
+  }
+
   case class DataPoint(x: Vector[Double], y: Double)
 
   def parsePoint(line: String): DataPoint = {
@@ -51,9 +64,12 @@ object SparkTachyonHdfsLR {
   }
 
   def main(args: Array[String]) {
+
+    showWarning()
+
     val inputPath = args(0)
-    val conf = SparkHadoopUtil.get.newConfiguration()
     val sparkConf = new SparkConf().setAppName("SparkTachyonHdfsLR")
+    val conf = new Configuration()
     val sc = new SparkContext(sparkConf,
       InputFormatInfo.computePreferredLocations(
         Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath))
diff --git a/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala
index e06f4dcd54442..e322d4ce5a745 100644
--- a/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala
@@ -18,17 +18,7 @@
 package org.apache.spark.examples.bagel
 
 import org.apache.spark._
-import org.apache.spark.SparkContext._
-import org.apache.spark.serializer.KryoRegistrator
-
 import org.apache.spark.bagel._
-import org.apache.spark.bagel.Bagel._
-
-import scala.collection.mutable.ArrayBuffer
-
-import java.io.{InputStream, OutputStream, DataInputStream, DataOutputStream}
-
-import com.esotericsoftware.kryo._
 
 class PageRankUtils extends Serializable {
   def computeWithCombiner(numVertices: Long, epsilon: Double)(
@@ -99,13 +89,6 @@ class PRMessage() extends Message[String] with Serializable {
   }
 }
 
-class PRKryoRegistrator extends KryoRegistrator {
-  def registerClasses(kryo: Kryo) {
-    kryo.register(classOf[PRVertex])
-    kryo.register(classOf[PRMessage])
-  }
-}
-
 class CustomPartitioner(partitions: Int) extends Partitioner {
   def numPartitions = partitions
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala
index 235c3bf820244..859abedf2a55e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala
@@ -21,7 +21,6 @@ import org.apache.spark._
 import org.apache.spark.SparkContext._
 
 import org.apache.spark.bagel._
-import org.apache.spark.bagel.Bagel._
 
 import scala.xml.{XML,NodeSeq}
 
@@ -39,8 +38,7 @@ object WikipediaPageRank {
     }
     val sparkConf = new SparkConf()
     sparkConf.setAppName("WikipediaPageRank")
-    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-    sparkConf.set("spark.kryo.registrator",  classOf[PRKryoRegistrator].getName)
+    sparkConf.registerKryoClasses(Array(classOf[PRVertex], classOf[PRMessage]))
 
     val inputFile = args(0)
     val threshold = args(1).toDouble
@@ -78,9 +76,9 @@ object WikipediaPageRank {
       (id, new PRVertex(1.0 / numVertices, outEdges))
     })
     if (usePartitioner) {
-      vertices = vertices.partitionBy(new HashPartitioner(sc.defaultParallelism)).cache
+      vertices = vertices.partitionBy(new HashPartitioner(sc.defaultParallelism)).cache()
     } else {
-      vertices = vertices.cache
+      vertices = vertices.cache()
     }
     println("Done parsing input file.")
 
@@ -100,7 +98,9 @@ object WikipediaPageRank {
       (result
        .filter { case (id, vertex) => vertex.value >= threshold }
        .map { case (id, vertex) => "%s\t%s\n".format(id, vertex.value) }
-       .collect.mkString)
+       .collect().mkString)
     println(top)
+
+    sc.stop()
   }
 }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/Analytics.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala
similarity index 82%
rename from graphx/src/main/scala/org/apache/spark/graphx/lib/Analytics.scala
rename to examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala
index c1513a00453cf..828cffb01ca1e 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/Analytics.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala
@@ -15,12 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.graphx.lib
+package org.apache.spark.examples.graphx
 
 import scala.collection.mutable
 import org.apache.spark._
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.graphx._
+import org.apache.spark.graphx.lib._
 import org.apache.spark.graphx.PartitionStrategy._
 
 /**
@@ -45,28 +46,15 @@ object Analytics extends Logging {
     }
     val options = mutable.Map(optionsList: _*)
 
-    def pickPartitioner(v: String): PartitionStrategy = {
-      // TODO: Use reflection rather than listing all the partitioning strategies here.
-      v match {
-        case "RandomVertexCut" => RandomVertexCut
-        case "EdgePartition1D" => EdgePartition1D
-        case "EdgePartition2D" => EdgePartition2D
-        case "CanonicalRandomVertexCut" => CanonicalRandomVertexCut
-        case _ => throw new IllegalArgumentException("Invalid PartitionStrategy: " + v)
-      }
-    }
-
-    val conf = new SparkConf()
-      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-      .set("spark.kryo.registrator", "org.apache.spark.graphx.GraphKryoRegistrator")
-      .set("spark.locality.wait", "100000")
+    val conf = new SparkConf().set("spark.locality.wait", "100000")
+    GraphXUtils.registerKryoClasses(conf)
 
     val numEPart = options.remove("numEPart").map(_.toInt).getOrElse {
       println("Set the number of edge partitions using --numEPart.")
       sys.exit(1)
     }
     val partitionStrategy: Option[PartitionStrategy] = options.remove("partStrategy")
-      .map(pickPartitioner(_))
+      .map(PartitionStrategy.fromString(_))
     val edgeStorageLevel = options.remove("edgeStorageLevel")
       .map(StorageLevel.fromString(_)).getOrElse(StorageLevel.MEMORY_ONLY)
     val vertexStorageLevel = options.remove("vertexStorageLevel")
@@ -89,7 +77,7 @@ object Analytics extends Logging {
         val sc = new SparkContext(conf.setAppName("PageRank(" + fname + ")"))
 
         val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
-          minEdgePartitions = numEPart,
+          numEdgePartitions = numEPart,
           edgeStorageLevel = edgeStorageLevel,
           vertexStorageLevel = vertexStorageLevel).cache()
         val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
@@ -106,7 +94,7 @@ object Analytics extends Logging {
 
         if (!outFname.isEmpty) {
           logWarning("Saving pageranks of pages to " + outFname)
-          pr.map{case (id, r) => id + "\t" + r}.saveAsTextFile(outFname)
+          pr.map { case (id, r) => id + "\t" + r }.saveAsTextFile(outFname)
         }
 
         sc.stop()
@@ -122,13 +110,13 @@ object Analytics extends Logging {
 
         val sc = new SparkContext(conf.setAppName("ConnectedComponents(" + fname + ")"))
         val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
-          minEdgePartitions = numEPart,
+          numEdgePartitions = numEPart,
           edgeStorageLevel = edgeStorageLevel,
           vertexStorageLevel = vertexStorageLevel).cache()
         val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
 
         val cc = ConnectedComponents.run(graph)
-        println("Components: " + cc.vertices.map{ case (vid,data) => data}.distinct())
+        println("Components: " + cc.vertices.map { case (vid, data) => data }.distinct())
         sc.stop()
 
       case "triangles" =>
@@ -143,10 +131,10 @@ object Analytics extends Logging {
         val sc = new SparkContext(conf.setAppName("TriangleCount(" + fname + ")"))
         val graph = GraphLoader.edgeListFile(sc, fname,
           canonicalOrientation = true,
-          minEdgePartitions = numEPart,
+          numEdgePartitions = numEPart,
           edgeStorageLevel = edgeStorageLevel,
           vertexStorageLevel = vertexStorageLevel)
-        // TriangleCount requires the graph to be partitioned
+          // TriangleCount requires the graph to be partitioned
           .partitionBy(partitionStrategy.getOrElse(RandomVertexCut)).cache()
         val triangles = TriangleCount.run(graph)
         println("Triangles: " + triangles.vertices.map {
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
index 6ef3b62dcbedc..e809a65b79975 100644
--- a/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
@@ -20,7 +20,7 @@ package org.apache.spark.examples.graphx
 import org.apache.spark.SparkContext._
 import org.apache.spark._
 import org.apache.spark.graphx._
-import org.apache.spark.graphx.lib.Analytics
+
 
 /**
  * Uses GraphX to run PageRank on a LiveJournal social network graph. Download the dataset from
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
index 551c339b19523..3ec20d594b784 100644
--- a/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.examples.graphx
 
 import org.apache.spark.SparkContext._
-import org.apache.spark.graphx.PartitionStrategy
+import org.apache.spark.graphx.{GraphXUtils, PartitionStrategy}
 import org.apache.spark.{SparkContext, SparkConf}
 import org.apache.spark.graphx.util.GraphGenerators
 import java.io.{PrintWriter, FileOutputStream}
@@ -38,12 +38,13 @@ object SynthBenchmark {
    * Options:
    *   -app "pagerank" or "cc" for pagerank or connected components. (Default: pagerank)
    *   -niters the number of iterations of pagerank to use (Default: 10)
-   *   -numVertices the number of vertices in the graph (Default: 1000000)
+   *   -nverts the number of vertices in the graph (Default: 1000000)
    *   -numEPart the number of edge partitions in the graph (Default: number of cores)
    *   -partStrategy the graph partitioning strategy to use
    *   -mu the mean parameter for the log-normal graph (Default: 4.0)
    *   -sigma the stdev parameter for the log-normal graph (Default: 1.3)
    *   -degFile the local file to save the degree information (Default: Empty)
+   *   -seed seed to use for RNGs (Default: -1, picks seed randomly)
    */
   def main(args: Array[String]) {
     val options = args.map {
@@ -62,30 +63,31 @@ object SynthBenchmark {
     var mu: Double = 4.0
     var sigma: Double = 1.3
     var degFile: String = ""
+    var seed: Int = -1
 
     options.foreach {
       case ("app", v) => app = v
-      case ("niter", v) => niter = v.toInt
+      case ("niters", v) => niter = v.toInt
       case ("nverts", v) => numVertices = v.toInt
       case ("numEPart", v) => numEPart = Some(v.toInt)
       case ("partStrategy", v) => partitionStrategy = Some(PartitionStrategy.fromString(v))
       case ("mu", v) => mu = v.toDouble
       case ("sigma", v) => sigma = v.toDouble
       case ("degFile", v) => degFile = v
+      case ("seed", v) => seed = v.toInt
       case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
     }
 
     val conf = new SparkConf()
       .setAppName(s"GraphX Synth Benchmark (nverts = $numVertices, app = $app)")
-      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-      .set("spark.kryo.registrator", "org.apache.spark.graphx.GraphKryoRegistrator")
+    GraphXUtils.registerKryoClasses(conf)
 
     val sc = new SparkContext(conf)
 
     // Create the graph
     println(s"Creating graph...")
     val unpartitionedGraph = GraphGenerators.logNormalGraph(sc, numVertices,
-      numEPart.getOrElse(sc.defaultParallelism), mu, sigma)
+      numEPart.getOrElse(sc.defaultParallelism), mu, sigma, seed)
     // Repartition the graph
     val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_)).cache()
 
@@ -113,7 +115,7 @@ object SynthBenchmark {
       println(s"Total PageRank = $totalPR")
     } else if (app == "cc") {
       println("Running Connected Components")
-      val numComponents = graph.connectedComponents.vertices.map(_._2).distinct()
+      val numComponents = graph.connectedComponents.vertices.map(_._2).distinct().count()
       println(s"Number of components = $numComponents")
     }
     val runTime = System.currentTimeMillis() - startTime
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
new file mode 100644
index 0000000000000..ee7897d9062d9
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import scala.beans.BeanInfo
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
+import org.apache.spark.sql.SQLContext
+
+@BeanInfo
+case class LabeledDocument(id: Long, text: String, label: Double)
+
+@BeanInfo
+case class Document(id: Long, text: String)
+
+/**
+ * A simple text classification pipeline that recognizes "spark" from input text. This is to show
+ * how to create and configure an ML pipeline. Run with
+ * {{{
+ * bin/run-example ml.SimpleTextClassificationPipeline
+ * }}}
+ */
+object SimpleTextClassificationPipeline {
+
+  def main(args: Array[String]) {
+    val conf = new SparkConf().setAppName("SimpleTextClassificationPipeline")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+    import sqlContext._
+
+    // Prepare training documents, which are labeled.
+    val training = sparkContext.parallelize(Seq(
+      LabeledDocument(0L, "a b c d e spark", 1.0),
+      LabeledDocument(1L, "b d", 0.0),
+      LabeledDocument(2L, "spark f g h", 1.0),
+      LabeledDocument(3L, "hadoop mapreduce", 0.0)))
+
+    // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+    val tokenizer = new Tokenizer()
+      .setInputCol("text")
+      .setOutputCol("words")
+    val hashingTF = new HashingTF()
+      .setNumFeatures(1000)
+      .setInputCol(tokenizer.getOutputCol)
+      .setOutputCol("features")
+    val lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.01)
+    val pipeline = new Pipeline()
+      .setStages(Array(tokenizer, hashingTF, lr))
+
+    // Fit the pipeline to training documents.
+    val model = pipeline.fit(training)
+
+    // Prepare test documents, which are unlabeled.
+    val test = sparkContext.parallelize(Seq(
+      Document(4L, "spark i j k"),
+      Document(5L, "l m n"),
+      Document(6L, "mapreduce spark"),
+      Document(7L, "apache hadoop")))
+
+    // Make predictions on test documents.
+    model.transform(test)
+      .select('id, 'text, 'score, 'prediction)
+      .collect()
+      .foreach(println)
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/AbstractParams.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/AbstractParams.scala
new file mode 100644
index 0000000000000..ae6057758d6fc
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/AbstractParams.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import scala.reflect.runtime.universe._
+
+/**
+ * Abstract class for parameter case classes.
+ * This overrides the [[toString]] method to print all case class fields by name and value.
+ * @tparam T  Concrete parameter class.
+ */
+abstract class AbstractParams[T: TypeTag] {
+
+  private def tag: TypeTag[T] = typeTag[T]
+
+  /**
+   * Finds all case class fields in concrete class instance, and outputs them in JSON-style format:
+   * {
+   *   [field name]:\t[field value]\n
+   *   [field name]:\t[field value]\n
+   *   ...
+   * }
+   */
+  override def toString: String = {
+    val tpe = tag.tpe
+    val allAccessors = tpe.declarations.collect {
+      case m: MethodSymbol if m.isCaseAccessor => m
+    }
+    val mirror = runtimeMirror(getClass.getClassLoader)
+    val instanceMirror = mirror.reflect(this)
+    allAccessors.map { f =>
+      val paramName = f.name.toString
+      val fieldMirror = instanceMirror.reflectField(f)
+      val paramValue = fieldMirror.get
+      s"  $paramName:\t$paramValue"
+    }.mkString("{\n", ",\n", "\n}")
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
index 56b02b65d8724..a113653810b93 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
@@ -21,7 +21,7 @@ import org.apache.log4j.{Level, Logger}
 import scopt.OptionParser
 
 import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.mllib.classification.{LogisticRegressionWithSGD, SVMWithSGD}
+import org.apache.spark.mllib.classification.{LogisticRegressionWithLBFGS, SVMWithSGD}
 import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.mllib.optimization.{SquaredL2Updater, L1Updater}
@@ -55,7 +55,7 @@ object BinaryClassification {
       stepSize: Double = 1.0,
       algorithm: Algorithm = LR,
       regType: RegType = L2,
-      regParam: Double = 0.1)
+      regParam: Double = 0.01) extends AbstractParams[Params]
 
   def main(args: Array[String]) {
     val defaultParams = Params()
@@ -66,7 +66,8 @@ object BinaryClassification {
         .text("number of iterations")
         .action((x, c) => c.copy(numIterations = x))
       opt[Double]("stepSize")
-        .text(s"initial step size, default: ${defaultParams.stepSize}")
+        .text("initial step size (ignored by logistic regression), " +
+          s"default: ${defaultParams.stepSize}")
         .action((x, c) => c.copy(stepSize = x))
       opt[String]("algorithm")
         .text(s"algorithm (${Algorithm.values.mkString(",")}), " +
@@ -125,10 +126,9 @@ object BinaryClassification {
 
     val model = params.algorithm match {
       case LR =>
-        val algorithm = new LogisticRegressionWithSGD()
+        val algorithm = new LogisticRegressionWithLBFGS()
         algorithm.optimizer
           .setNumIterations(params.numIterations)
-          .setStepSize(params.stepSize)
           .setUpdater(updater)
           .setRegParam(params.regParam)
         algorithm.run(training).clearThreshold()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala
new file mode 100644
index 0000000000000..e49129c4e7844
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import scopt.OptionParser
+
+import org.apache.spark.mllib.stat.Statistics
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.{SparkConf, SparkContext}
+
+
+/**
+ * An example app for summarizing multivariate data from a file. Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.Correlations
+ * }}}
+ * By default, this loads a synthetic dataset from `data/mllib/sample_linear_regression_data.txt`.
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object Correlations {
+
+  case class Params(input: String = "data/mllib/sample_linear_regression_data.txt")
+    extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("Correlations") {
+      head("Correlations: an example app for computing correlations")
+      opt[String]("input")
+        .text(s"Input path to labeled examples in LIBSVM format, default: ${defaultParams.input}")
+        .action((x, c) => c.copy(input = x))
+      note(
+        """
+        |For example, the following command runs this app on a synthetic dataset:
+        |
+        | bin/spark-submit --class org.apache.spark.examples.mllib.Correlations \
+        |  examples/target/scala-*/spark-examples-*.jar \
+        |  --input data/mllib/sample_linear_regression_data.txt
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    } getOrElse {
+        sys.exit(1)
+    }
+  }
+
+  def run(params: Params) {
+    val conf = new SparkConf().setAppName(s"Correlations with $params")
+    val sc = new SparkContext(conf)
+
+    val examples = MLUtils.loadLibSVMFile(sc, params.input).cache()
+
+    println(s"Summary of data file: ${params.input}")
+    println(s"${examples.count()} data points")
+
+    // Calculate label -- feature correlations
+    val labelRDD = examples.map(_.label)
+    val numFeatures = examples.take(1)(0).features.size
+    val corrType = "pearson"
+    println()
+    println(s"Correlation ($corrType) between label and each feature")
+    println(s"Feature\tCorrelation")
+    var feature = 0
+    while (feature < numFeatures) {
+      val featureRDD = examples.map(_.features(feature))
+      val corr = Statistics.corr(labelRDD, featureRDD)
+      println(s"$feature\t$corr")
+      feature += 1
+    }
+    println()
+
+    sc.stop()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala
new file mode 100644
index 0000000000000..cb1abbd18fd4d
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import scopt.OptionParser
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.distributed.{MatrixEntry, RowMatrix}
+import org.apache.spark.{SparkConf, SparkContext}
+
+/**
+ * Compute the similar columns of a matrix, using cosine similarity.
+ *
+ * The input matrix must be stored in row-oriented dense format, one line per row with its entries
+ * separated by space. For example,
+ * {{{
+ * 0.5 1.0
+ * 2.0 3.0
+ * 4.0 5.0
+ * }}}
+ * represents a 3-by-2 matrix, whose first row is (0.5, 1.0).
+ *
+ * Example invocation:
+ *
+ * bin/run-example mllib.CosineSimilarity \
+ * --threshold 0.1 data/mllib/sample_svm_data.txt
+ */
+object CosineSimilarity {
+  case class Params(inputFile: String = null, threshold: Double = 0.1)
+    extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("CosineSimilarity") {
+      head("CosineSimilarity: an example app.")
+      opt[Double]("threshold")
+        .required()
+        .text(s"threshold similarity: to tradeoff computation vs quality estimate")
+        .action((x, c) => c.copy(threshold = x))
+      arg[String]("<inputFile>")
+        .required()
+        .text(s"input file, one row per line, space-separated")
+        .action((x, c) => c.copy(inputFile = x))
+      note(
+        """
+          |For example, the following command runs this app on a dataset:
+          |
+          | ./bin/spark-submit  --class org.apache.spark.examples.mllib.CosineSimilarity \
+          | examplesjar.jar \
+          | --threshold 0.1 data/mllib/sample_svm_data.txt
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    } getOrElse {
+      System.exit(1)
+    }
+  }
+
+  def run(params: Params) {
+    val conf = new SparkConf().setAppName("CosineSimilarity")
+    val sc = new SparkContext(conf)
+
+    // Load and parse the data file.
+    val rows = sc.textFile(params.inputFile).map { line =>
+      val values = line.split(' ').map(_.toDouble)
+      Vectors.dense(values)
+    }.cache()
+    val mat = new RowMatrix(rows)
+
+    // Compute similar columns perfectly, with brute force.
+    val exact = mat.columnSimilarities()
+
+    // Compute similar columns with estimation using DIMSUM
+    val approx = mat.columnSimilarities(params.threshold)
+
+    val exactEntries = exact.entries.map { case MatrixEntry(i, j, u) => ((i, j), u) }
+    val approxEntries = approx.entries.map { case MatrixEntry(i, j, v) => ((i, j), v) }
+    val MAE = exactEntries.leftOuterJoin(approxEntries).values.map {
+      case (u, Some(v)) =>
+        math.abs(u - v)
+      case (u, None) =>
+        math.abs(u)
+    }.mean()
+
+    println(s"Average absolute error in estimate is: $MAE")
+
+    sc.stop()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
new file mode 100644
index 0000000000000..f8d83f4ec7327
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import java.io.File
+
+import com.google.common.io.Files
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Row, SQLContext, SchemaRDD}
+
+/**
+ * An example of how to use [[org.apache.spark.sql.SchemaRDD]] as a Dataset for ML. Run with
+ * {{{
+ * ./bin/run-example org.apache.spark.examples.mllib.DatasetExample [options]
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object DatasetExample {
+
+  case class Params(
+      input: String = "data/mllib/sample_libsvm_data.txt",
+      dataFormat: String = "libsvm") extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("DatasetExample") {
+      head("Dataset: an example app using SchemaRDD as a Dataset for ML.")
+      opt[String]("input")
+        .text(s"input path to dataset")
+        .action((x, c) => c.copy(input = x))
+      opt[String]("dataFormat")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        success
+      }
+    }
+
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    }.getOrElse {
+      sys.exit(1)
+    }
+  }
+
+  def run(params: Params) {
+
+    val conf = new SparkConf().setAppName(s"DatasetExample with $params")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+    import sqlContext._ // for implicit conversions
+
+    // Load input data
+    val origData: RDD[LabeledPoint] = params.dataFormat match {
+      case "dense" => MLUtils.loadLabeledPoints(sc, params.input)
+      case "libsvm" => MLUtils.loadLibSVMFile(sc, params.input)
+    }
+    println(s"Loaded ${origData.count()} instances from file: ${params.input}")
+
+    // Convert input data to SchemaRDD explicitly.
+    val schemaRDD: SchemaRDD = origData
+    println(s"Inferred schema:\n${schemaRDD.schema.prettyJson}")
+    println(s"Converted to SchemaRDD with ${schemaRDD.count()} records")
+
+    // Select columns, using implicit conversion to SchemaRDD.
+    val labelsSchemaRDD: SchemaRDD = origData.select('label)
+    val labels: RDD[Double] = labelsSchemaRDD.map { case Row(v: Double) => v }
+    val numLabels = labels.count()
+    val meanLabel = labels.fold(0.0)(_ + _) / numLabels
+    println(s"Selected label column with average value $meanLabel")
+
+    val featuresSchemaRDD: SchemaRDD = origData.select('features)
+    val features: RDD[Vector] = featuresSchemaRDD.map { case Row(v: Vector) => v }
+    val featureSummary = features.aggregate(new MultivariateOnlineSummarizer())(
+      (summary, feat) => summary.add(feat),
+      (sum1, sum2) => sum1.merge(sum2))
+    println(s"Selected features column with average values:\n ${featureSummary.mean.toString}")
+
+    val tmpDir = Files.createTempDir()
+    tmpDir.deleteOnExit()
+    val outputDir = new File(tmpDir, "dataset").toString
+    println(s"Saving to $outputDir as Parquet file.")
+    schemaRDD.saveAsParquetFile(outputDir)
+
+    println(s"Loading Parquet file with UDT from $outputDir.")
+    val newDataset = sqlContext.parquetFile(outputDir)
+
+    println(s"Schema from Parquet: ${newDataset.schema.prettyJson}")
+    val newFeatures = newDataset.select('features).map { case Row(v: Vector) => v }
+    val newFeaturesSummary = newFeatures.aggregate(new MultivariateOnlineSummarizer())(
+      (summary, feat) => summary.add(feat),
+      (sum1, sum2) => sum1.merge(sum2))
+    println(s"Selected features column with average values:\n ${newFeaturesSummary.mean.toString}")
+
+    sc.stop()
+  }
+
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
index b3cc361154198..63f02cf7b98b9 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
@@ -21,21 +21,25 @@ import scopt.OptionParser
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.SparkContext._
-import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
 import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.tree.{DecisionTree, impurity}
+import org.apache.spark.mllib.tree.{RandomForest, DecisionTree, impurity}
 import org.apache.spark.mllib.tree.configuration.{Algo, Strategy}
 import org.apache.spark.mllib.tree.configuration.Algo._
-import org.apache.spark.mllib.tree.model.DecisionTreeModel
+import org.apache.spark.mllib.tree.model.{WeightedEnsembleModel, DecisionTreeModel}
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
 
 /**
- * An example runner for decision tree. Run with
+ * An example runner for decision trees and random forests. Run with
  * {{{
- * ./bin/spark-example org.apache.spark.examples.mllib.DecisionTreeRunner [options]
+ * ./bin/run-example org.apache.spark.examples.mllib.DecisionTreeRunner [options]
  * }}}
  * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ *
+ * Note: This script treats all features as real-valued (not categorical).
+ *       To include categorical features, modify categoricalFeaturesInfo.
  */
 object DecisionTreeRunner {
 
@@ -48,10 +52,20 @@ object DecisionTreeRunner {
 
   case class Params(
       input: String = null,
+      testInput: String = "",
+      dataFormat: String = "libsvm",
       algo: Algo = Classification,
       maxDepth: Int = 5,
       impurity: ImpurityType = Gini,
-      maxBins: Int = 100)
+      maxBins: Int = 32,
+      minInstancesPerNode: Int = 1,
+      minInfoGain: Double = 0.0,
+      numTrees: Int = 1,
+      featureSubsetStrategy: String = "auto",
+      fracTest: Double = 0.2,
+      useNodeIdCache: Boolean = false,
+      checkpointDir: Option[String] = None,
+      checkpointInterval: Int = 10) extends AbstractParams[Params]
 
   def main(args: Array[String]) {
     val defaultParams = Params()
@@ -71,18 +85,64 @@ object DecisionTreeRunner {
       opt[Int]("maxBins")
         .text(s"max number of bins, default: ${defaultParams.maxBins}")
         .action((x, c) => c.copy(maxBins = x))
+      opt[Int]("minInstancesPerNode")
+        .text(s"min number of instances required at child nodes to create the parent split," +
+          s" default: ${defaultParams.minInstancesPerNode}")
+        .action((x, c) => c.copy(minInstancesPerNode = x))
+      opt[Double]("minInfoGain")
+        .text(s"min info gain required to create a split, default: ${defaultParams.minInfoGain}")
+        .action((x, c) => c.copy(minInfoGain = x))
+      opt[Int]("numTrees")
+        .text(s"number of trees (1 = decision tree, 2+ = random forest)," +
+          s" default: ${defaultParams.numTrees}")
+        .action((x, c) => c.copy(numTrees = x))
+      opt[String]("featureSubsetStrategy")
+        .text(s"feature subset sampling strategy" +
+          s" (${RandomForest.supportedFeatureSubsetStrategies.mkString(", ")}}), " +
+          s"default: ${defaultParams.featureSubsetStrategy}")
+        .action((x, c) => c.copy(featureSubsetStrategy = x))
+      opt[Double]("fracTest")
+        .text(s"fraction of data to hold out for testing.  If given option testInput, " +
+          s"this option is ignored. default: ${defaultParams.fracTest}")
+        .action((x, c) => c.copy(fracTest = x))
+      opt[Boolean]("useNodeIdCache")
+        .text(s"whether to use node Id cache during training, " +
+          s"default: ${defaultParams.useNodeIdCache}")
+        .action((x, c) => c.copy(useNodeIdCache = x))
+      opt[String]("checkpointDir")
+        .text(s"checkpoint directory where intermediate node Id caches will be stored, " +
+         s"default: ${defaultParams.checkpointDir match {
+           case Some(strVal) => strVal
+           case None => "None"
+         }}")
+        .action((x, c) => c.copy(checkpointDir = Some(x)))
+      opt[Int]("checkpointInterval")
+        .text(s"how often to checkpoint the node Id cache, " +
+         s"default: ${defaultParams.checkpointInterval}")
+        .action((x, c) => c.copy(checkpointInterval = x))
+      opt[String]("testInput")
+        .text(s"input path to test dataset.  If given, option fracTest is ignored." +
+          s" default: ${defaultParams.testInput}")
+        .action((x, c) => c.copy(testInput = x))
+      opt[String]("<dataFormat>")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(dataFormat = x))
       arg[String]("<input>")
-        .text("input paths to labeled examples in dense format (label,f0 f1 f2 ...)")
+        .text("input path to labeled examples")
         .required()
         .action((x, c) => c.copy(input = x))
       checkConfig { params =>
-        if (params.algo == Classification &&
-            (params.impurity == Gini || params.impurity == Entropy)) {
-          success
-        } else if (params.algo == Regression && params.impurity == Variance) {
-          success
+        if (params.fracTest < 0 || params.fracTest > 1) {
+          failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1].")
         } else {
-          failure(s"Algo ${params.algo} is not compatible with impurity ${params.impurity}.")
+          if (params.algo == Classification &&
+            (params.impurity == Gini || params.impurity == Entropy)) {
+            success
+          } else if (params.algo == Regression && params.impurity == Variance) {
+            success
+          } else {
+            failure(s"Algo ${params.algo} is not compatible with impurity ${params.impurity}.")
+          }
         }
       }
     }
@@ -94,65 +154,217 @@ object DecisionTreeRunner {
     }
   }
 
-  def run(params: Params) {
-    val conf = new SparkConf().setAppName("DecisionTreeRunner")
-    val sc = new SparkContext(conf)
-
+  /**
+   * Load training and test data from files.
+   * @param input  Path to input dataset.
+   * @param dataFormat  "libsvm" or "dense"
+   * @param testInput  Path to test dataset.
+   * @param algo  Classification or Regression
+   * @param fracTest  Fraction of input data to hold out for testing.  Ignored if testInput given.
+   * @return  (training dataset, test dataset, number of classes),
+   *          where the number of classes is inferred from data (and set to 0 for Regression)
+   */
+  private[mllib] def loadDatasets(
+      sc: SparkContext,
+      input: String,
+      dataFormat: String,
+      testInput: String,
+      algo: Algo,
+      fracTest: Double): (RDD[LabeledPoint], RDD[LabeledPoint], Int) = {
     // Load training data and cache it.
-    val examples = MLUtils.loadLabeledPoints(sc, params.input).cache()
+    val origExamples = dataFormat match {
+      case "dense" => MLUtils.loadLabeledPoints(sc, input).cache()
+      case "libsvm" => MLUtils.loadLibSVMFile(sc, input).cache()
+    }
+    // For classification, re-index classes if needed.
+    val (examples, classIndexMap, numClasses) = algo match {
+      case Classification => {
+        // classCounts: class --> # examples in class
+        val classCounts = origExamples.map(_.label).countByValue()
+        val sortedClasses = classCounts.keys.toList.sorted
+        val numClasses = classCounts.size
+        // classIndexMap: class --> index in 0,...,numClasses-1
+        val classIndexMap = {
+          if (classCounts.keySet != Set(0.0, 1.0)) {
+            sortedClasses.zipWithIndex.toMap
+          } else {
+            Map[Double, Int]()
+          }
+        }
+        val examples = {
+          if (classIndexMap.isEmpty) {
+            origExamples
+          } else {
+            origExamples.map(lp => LabeledPoint(classIndexMap(lp.label), lp.features))
+          }
+        }
+        val numExamples = examples.count()
+        println(s"numClasses = $numClasses.")
+        println(s"Per-class example fractions, counts:")
+        println(s"Class\tFrac\tCount")
+        sortedClasses.foreach { c =>
+          val frac = classCounts(c) / numExamples.toDouble
+          println(s"$c\t$frac\t${classCounts(c)}")
+        }
+        (examples, classIndexMap, numClasses)
+      }
+      case Regression =>
+        (origExamples, null, 0)
+      case _ =>
+        throw new IllegalArgumentException("Algo ${params.algo} not supported.")
+    }
 
-    val splits = examples.randomSplit(Array(0.8, 0.2))
+    // Create training, test sets.
+    val splits = if (testInput != "") {
+      // Load testInput.
+      val numFeatures = examples.take(1)(0).features.size
+      val origTestExamples = dataFormat match {
+        case "dense" => MLUtils.loadLabeledPoints(sc, testInput)
+        case "libsvm" => MLUtils.loadLibSVMFile(sc, testInput, numFeatures)
+      }
+      algo match {
+        case Classification => {
+          // classCounts: class --> # examples in class
+          val testExamples = {
+            if (classIndexMap.isEmpty) {
+              origTestExamples
+            } else {
+              origTestExamples.map(lp => LabeledPoint(classIndexMap(lp.label), lp.features))
+            }
+          }
+          Array(examples, testExamples)
+        }
+        case Regression =>
+          Array(examples, origTestExamples)
+      }
+    } else {
+      // Split input into training, test.
+      examples.randomSplit(Array(1.0 - fracTest, fracTest))
+    }
     val training = splits(0).cache()
     val test = splits(1).cache()
 
     val numTraining = training.count()
     val numTest = test.count()
-
     println(s"numTraining = $numTraining, numTest = $numTest.")
 
     examples.unpersist(blocking = false)
 
+    (training, test, numClasses)
+  }
+
+  def run(params: Params) {
+
+    val conf = new SparkConf().setAppName(s"DecisionTreeRunner with $params")
+    val sc = new SparkContext(conf)
+
+    println(s"DecisionTreeRunner with parameters:\n$params")
+
+    // Load training and test data and cache it.
+    val (training, test, numClasses) = loadDatasets(sc, params.input, params.dataFormat,
+      params.testInput, params.algo, params.fracTest)
+
     val impurityCalculator = params.impurity match {
       case Gini => impurity.Gini
       case Entropy => impurity.Entropy
       case Variance => impurity.Variance
     }
 
-    val strategy = new Strategy(params.algo, impurityCalculator, params.maxDepth, params.maxBins)
-    val model = DecisionTree.train(training, strategy)
-
-    if (params.algo == Classification) {
-      val accuracy = accuracyScore(model, test)
-      println(s"Test accuracy = $accuracy.")
-    }
-
-    if (params.algo == Regression) {
-      val mse = meanSquaredError(model, test)
-      println(s"Test mean squared error = $mse.")
+    val strategy
+      = new Strategy(
+          algo = params.algo,
+          impurity = impurityCalculator,
+          maxDepth = params.maxDepth,
+          maxBins = params.maxBins,
+          numClassesForClassification = numClasses,
+          minInstancesPerNode = params.minInstancesPerNode,
+          minInfoGain = params.minInfoGain,
+          useNodeIdCache = params.useNodeIdCache,
+          checkpointDir = params.checkpointDir,
+          checkpointInterval = params.checkpointInterval)
+    if (params.numTrees == 1) {
+      val startTime = System.nanoTime()
+      val model = DecisionTree.train(training, strategy)
+      val elapsedTime = (System.nanoTime() - startTime) / 1e9
+      println(s"Training time: $elapsedTime seconds")
+      if (model.numNodes < 20) {
+        println(model.toDebugString) // Print full model.
+      } else {
+        println(model) // Print model summary.
+      }
+      if (params.algo == Classification) {
+        val trainAccuracy =
+          new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label)))
+            .precision
+        println(s"Train accuracy = $trainAccuracy")
+        val testAccuracy =
+          new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).precision
+        println(s"Test accuracy = $testAccuracy")
+      }
+      if (params.algo == Regression) {
+        val trainMSE = meanSquaredError(model, training)
+        println(s"Train mean squared error = $trainMSE")
+        val testMSE = meanSquaredError(model, test)
+        println(s"Test mean squared error = $testMSE")
+      }
+    } else {
+      val randomSeed = Utils.random.nextInt()
+      if (params.algo == Classification) {
+        val startTime = System.nanoTime()
+        val model = RandomForest.trainClassifier(training, strategy, params.numTrees,
+          params.featureSubsetStrategy, randomSeed)
+        val elapsedTime = (System.nanoTime() - startTime) / 1e9
+        println(s"Training time: $elapsedTime seconds")
+        if (model.totalNumNodes < 30) {
+          println(model.toDebugString) // Print full model.
+        } else {
+          println(model) // Print model summary.
+        }
+        val trainAccuracy =
+          new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label)))
+            .precision
+        println(s"Train accuracy = $trainAccuracy")
+        val testAccuracy =
+          new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).precision
+        println(s"Test accuracy = $testAccuracy")
+      }
+      if (params.algo == Regression) {
+        val startTime = System.nanoTime()
+        val model = RandomForest.trainRegressor(training, strategy, params.numTrees,
+          params.featureSubsetStrategy, randomSeed)
+        val elapsedTime = (System.nanoTime() - startTime) / 1e9
+        println(s"Training time: $elapsedTime seconds")
+        if (model.totalNumNodes < 30) {
+          println(model.toDebugString) // Print full model.
+        } else {
+          println(model) // Print model summary.
+        }
+        val trainMSE = meanSquaredError(model, training)
+        println(s"Train mean squared error = $trainMSE")
+        val testMSE = meanSquaredError(model, test)
+        println(s"Test mean squared error = $testMSE")
+      }
     }
 
     sc.stop()
   }
 
   /**
-   * Calculates the classifier accuracy.
+   * Calculates the mean squared error for regression.
    */
-  private def accuracyScore(
-      model: DecisionTreeModel,
-      data: RDD[LabeledPoint],
-      threshold: Double = 0.5): Double = {
-    def predictedValue(features: Vector): Double = {
-      if (model.predict(features) < threshold) 0.0 else 1.0
-    }
-    val correctCount = data.filter(y => predictedValue(y.features) == y.label).count()
-    val count = data.count()
-    correctCount.toDouble / count
+  private def meanSquaredError(tree: DecisionTreeModel, data: RDD[LabeledPoint]): Double = {
+    data.map { y =>
+      val err = tree.predict(y.features) - y.label
+      err * err
+    }.mean()
   }
 
   /**
    * Calculates the mean squared error for regression.
    */
-  private def meanSquaredError(tree: DecisionTreeModel, data: RDD[LabeledPoint]): Double = {
+  private[mllib] def meanSquaredError(
+      tree: WeightedEnsembleModel,
+      data: RDD[LabeledPoint]): Double = {
     data.map { y =>
       val err = tree.predict(y.features) - y.label
       err * err
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala
index f96bc1bf00b92..11e35598baf50 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala
@@ -27,7 +27,7 @@ import org.apache.spark.mllib.linalg.Vectors
 /**
  * An example k-means app. Run with
  * {{{
- * ./bin/spark-example org.apache.spark.examples.mllib.DenseKMeans [options] <input>
+ * ./bin/run-example org.apache.spark.examples.mllib.DenseKMeans [options] <input>
  * }}}
  * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
  */
@@ -44,7 +44,7 @@ object DenseKMeans {
       input: String = null,
       k: Int = -1,
       numIterations: Int = 10,
-      initializationMode: InitializationMode = Parallel)
+      initializationMode: InitializationMode = Parallel) extends AbstractParams[Params]
 
   def main(args: Array[String]) {
     val defaultParams = Params()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTrees.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTrees.scala
new file mode 100644
index 0000000000000..9b6db01448be0
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTrees.scala
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.tree.GradientBoosting
+import org.apache.spark.mllib.tree.configuration.{BoostingStrategy, Algo}
+import org.apache.spark.util.Utils
+
+/**
+ * An example runner for Gradient Boosting using decision trees as weak learners. Run with
+ * {{{
+ * ./bin/run-example org.apache.spark.examples.mllib.GradientBoostedTrees [options]
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ *
+ * Note: This script treats all features as real-valued (not categorical).
+ *       To include categorical features, modify categoricalFeaturesInfo.
+ */
+object GradientBoostedTrees {
+
+  case class Params(
+      input: String = null,
+      testInput: String = "",
+      dataFormat: String = "libsvm",
+      algo: String = "Classification",
+      maxDepth: Int = 5,
+      numIterations: Int = 10,
+      fracTest: Double = 0.2) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("GradientBoostedTrees") {
+      head("GradientBoostedTrees: an example decision tree app.")
+      opt[String]("algo")
+        .text(s"algorithm (${Algo.values.mkString(",")}), default: ${defaultParams.algo}")
+        .action((x, c) => c.copy(algo = x))
+      opt[Int]("maxDepth")
+        .text(s"max depth of the tree, default: ${defaultParams.maxDepth}")
+        .action((x, c) => c.copy(maxDepth = x))
+      opt[Int]("numIterations")
+        .text(s"number of iterations of boosting," + s" default: ${defaultParams.numIterations}")
+        .action((x, c) => c.copy(numIterations = x))
+      opt[Double]("fracTest")
+        .text(s"fraction of data to hold out for testing.  If given option testInput, " +
+          s"this option is ignored. default: ${defaultParams.fracTest}")
+        .action((x, c) => c.copy(fracTest = x))
+      opt[String]("testInput")
+        .text(s"input path to test dataset.  If given, option fracTest is ignored." +
+          s" default: ${defaultParams.testInput}")
+        .action((x, c) => c.copy(testInput = x))
+      opt[String]("<dataFormat>")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(dataFormat = x))
+      arg[String]("<input>")
+        .text("input path to labeled examples")
+        .required()
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        if (params.fracTest < 0 || params.fracTest > 1) {
+          failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1].")
+        } else {
+          success
+        }
+      }
+    }
+
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    }.getOrElse {
+      sys.exit(1)
+    }
+  }
+
+  def run(params: Params) {
+
+    val conf = new SparkConf().setAppName(s"GradientBoostedTrees with $params")
+    val sc = new SparkContext(conf)
+
+    println(s"GradientBoostedTrees with parameters:\n$params")
+
+    // Load training and test data and cache it.
+    val (training, test, numClasses) = DecisionTreeRunner.loadDatasets(sc, params.input,
+      params.dataFormat, params.testInput, Algo.withName(params.algo), params.fracTest)
+
+    val boostingStrategy = BoostingStrategy.defaultParams(params.algo)
+    boostingStrategy.numClassesForClassification = numClasses
+    boostingStrategy.numIterations = params.numIterations
+    boostingStrategy.weakLearnerParams.maxDepth = params.maxDepth
+
+    val randomSeed = Utils.random.nextInt()
+    if (params.algo == "Classification") {
+      val startTime = System.nanoTime()
+      val model = GradientBoosting.trainClassifier(training, boostingStrategy)
+      val elapsedTime = (System.nanoTime() - startTime) / 1e9
+      println(s"Training time: $elapsedTime seconds")
+      if (model.totalNumNodes < 30) {
+        println(model.toDebugString) // Print full model.
+      } else {
+        println(model) // Print model summary.
+      }
+      val trainAccuracy =
+        new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label)))
+          .precision
+      println(s"Train accuracy = $trainAccuracy")
+      val testAccuracy =
+        new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).precision
+      println(s"Test accuracy = $testAccuracy")
+    } else if (params.algo == "Regression") {
+      val startTime = System.nanoTime()
+      val model = GradientBoosting.trainRegressor(training, boostingStrategy)
+      val elapsedTime = (System.nanoTime() - startTime) / 1e9
+      println(s"Training time: $elapsedTime seconds")
+      if (model.totalNumNodes < 30) {
+        println(model.toDebugString) // Print full model.
+      } else {
+        println(model) // Print model summary.
+      }
+      val trainMSE = DecisionTreeRunner.meanSquaredError(model, training)
+      println(s"Train mean squared error = $trainMSE")
+      val testMSE = DecisionTreeRunner.meanSquaredError(model, test)
+      println(s"Test mean squared error = $testMSE")
+    }
+
+    sc.stop()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
index 4811bb70e4b28..6815b1c052208 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
@@ -47,7 +47,7 @@ object LinearRegression extends App {
       numIterations: Int = 100,
       stepSize: Double = 1.0,
       regType: RegType = L2,
-      regParam: Double = 0.1)
+      regParam: Double = 0.01) extends AbstractParams[Params]
 
   val defaultParams = Params()
 
@@ -91,7 +91,7 @@ object LinearRegression extends App {
 
     Logger.getRootLogger.setLevel(Level.WARN)
 
-    val examples = MLUtils.loadLibSVMFile(sc, params.input, multiclass = true).cache()
+    val examples = MLUtils.loadLibSVMFile(sc, params.input).cache()
 
     val splits = examples.randomSplit(Array(0.8, 0.2))
     val training = splits(0).cache()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
index 98aaedb9d7dc9..91a0a860d6c71 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
@@ -19,7 +19,6 @@ package org.apache.spark.examples.mllib
 
 import scala.collection.mutable
 
-import com.esotericsoftware.kryo.Kryo
 import org.apache.log4j.{Level, Logger}
 import scopt.OptionParser
 
@@ -27,7 +26,6 @@ import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.serializer.{KryoSerializer, KryoRegistrator}
 
 /**
  * An example app for ALS on MovieLens data (http://grouplens.org/datasets/movielens/).
@@ -40,13 +38,6 @@ import org.apache.spark.serializer.{KryoSerializer, KryoRegistrator}
  */
 object MovieLensALS {
 
-  class ALSRegistrator extends KryoRegistrator {
-    override def registerClasses(kryo: Kryo) {
-      kryo.register(classOf[Rating])
-      kryo.register(classOf[mutable.BitSet])
-    }
-  }
-
   case class Params(
       input: String = null,
       kryo: Boolean = false,
@@ -55,7 +46,7 @@ object MovieLensALS {
       rank: Int = 10,
       numUserBlocks: Int = -1,
       numProductBlocks: Int = -1,
-      implicitPrefs: Boolean = false)
+      implicitPrefs: Boolean = false) extends AbstractParams[Params]
 
   def main(args: Array[String]) {
     val defaultParams = Params()
@@ -108,17 +99,18 @@ object MovieLensALS {
   def run(params: Params) {
     val conf = new SparkConf().setAppName(s"MovieLensALS with $params")
     if (params.kryo) {
-      conf.set("spark.serializer", classOf[KryoSerializer].getName)
-        .set("spark.kryo.registrator", classOf[ALSRegistrator].getName)
+      conf.registerKryoClasses(Array(classOf[mutable.BitSet], classOf[Rating]))
         .set("spark.kryoserializer.buffer.mb", "8")
     }
     val sc = new SparkContext(conf)
 
     Logger.getRootLogger.setLevel(Level.WARN)
 
+    val implicitPrefs = params.implicitPrefs
+
     val ratings = sc.textFile(params.input).map { line =>
       val fields = line.split("::")
-      if (params.implicitPrefs) {
+      if (implicitPrefs) {
         /*
          * MovieLens ratings are on a scale of 1-5:
          * 5: Must see
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala
new file mode 100644
index 0000000000000..6e4e2d07f284b
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import scopt.OptionParser
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.{SparkConf, SparkContext}
+
+
+/**
+ * An example app for summarizing multivariate data from a file. Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.MultivariateSummarizer
+ * }}}
+ * By default, this loads a synthetic dataset from `data/mllib/sample_linear_regression_data.txt`.
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object MultivariateSummarizer {
+
+  case class Params(input: String = "data/mllib/sample_linear_regression_data.txt")
+    extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("MultivariateSummarizer") {
+      head("MultivariateSummarizer: an example app for MultivariateOnlineSummarizer")
+      opt[String]("input")
+        .text(s"Input path to labeled examples in LIBSVM format, default: ${defaultParams.input}")
+        .action((x, c) => c.copy(input = x))
+      note(
+        """
+        |For example, the following command runs this app on a synthetic dataset:
+        |
+        | bin/spark-submit --class org.apache.spark.examples.mllib.MultivariateSummarizer \
+        |  examples/target/scala-*/spark-examples-*.jar \
+        |  --input data/mllib/sample_linear_regression_data.txt
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    } getOrElse {
+        sys.exit(1)
+    }
+  }
+
+  def run(params: Params) {
+    val conf = new SparkConf().setAppName(s"MultivariateSummarizer with $params")
+    val sc = new SparkContext(conf)
+
+    val examples = MLUtils.loadLibSVMFile(sc, params.input).cache()
+
+    println(s"Summary of data file: ${params.input}")
+    println(s"${examples.count()} data points")
+
+    // Summarize labels
+    val labelSummary = examples.aggregate(new MultivariateOnlineSummarizer())(
+      (summary, lp) => summary.add(Vectors.dense(lp.label)),
+      (sum1, sum2) => sum1.merge(sum2))
+
+    // Summarize features
+    val featureSummary = examples.aggregate(new MultivariateOnlineSummarizer())(
+      (summary, lp) => summary.add(lp.features),
+      (sum1, sum2) => sum1.merge(sum2))
+
+    println()
+    println(s"Summary statistics")
+    println(s"\tLabel\tFeatures")
+    println(s"mean\t${labelSummary.mean(0)}\t${featureSummary.mean.toArray.mkString("\t")}")
+    println(s"var\t${labelSummary.variance(0)}\t${featureSummary.variance.toArray.mkString("\t")}")
+    println(
+      s"nnz\t${labelSummary.numNonzeros(0)}\t${featureSummary.numNonzeros.toArray.mkString("\t")}")
+    println(s"max\t${labelSummary.max(0)}\t${featureSummary.max.toArray.mkString("\t")}")
+    println(s"min\t${labelSummary.min(0)}\t${featureSummary.min.toArray.mkString("\t")}")
+    println()
+
+    sc.stop()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala
new file mode 100644
index 0000000000000..924b586e3af99
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.mllib.random.RandomRDDs
+import org.apache.spark.rdd.RDD
+
+import org.apache.spark.{SparkConf, SparkContext}
+
+/**
+ * An example app for randomly generated RDDs. Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.RandomRDDGeneration
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object RandomRDDGeneration {
+
+  def main(args: Array[String]) {
+
+    val conf = new SparkConf().setAppName(s"RandomRDDGeneration")
+    val sc = new SparkContext(conf)
+
+    val numExamples = 10000 // number of examples to generate
+    val fraction = 0.1 // fraction of data to sample
+
+    // Example: RandomRDDs.normalRDD
+    val normalRDD: RDD[Double] = RandomRDDs.normalRDD(sc, numExamples)
+    println(s"Generated RDD of ${normalRDD.count()}" +
+      " examples sampled from the standard normal distribution")
+    println("  First 5 samples:")
+    normalRDD.take(5).foreach( x => println(s"    $x") )
+
+    // Example: RandomRDDs.normalVectorRDD
+    val normalVectorRDD = RandomRDDs.normalVectorRDD(sc, numRows = numExamples, numCols = 2)
+    println(s"Generated RDD of ${normalVectorRDD.count()} examples of length-2 vectors.")
+    println("  First 5 samples:")
+    normalVectorRDD.take(5).foreach( x => println(s"    $x") )
+
+    println()
+
+    sc.stop()
+  }
+
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
new file mode 100644
index 0000000000000..663c12734af68
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.mllib.util.MLUtils
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.SparkContext._
+
+/**
+ * An example app for randomly generated and sampled RDDs. Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.SampledRDDs
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object SampledRDDs {
+
+  case class Params(input: String = "data/mllib/sample_binary_classification_data.txt")
+    extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("SampledRDDs") {
+      head("SampledRDDs: an example app for randomly generated and sampled RDDs.")
+      opt[String]("input")
+        .text(s"Input path to labeled examples in LIBSVM format, default: ${defaultParams.input}")
+        .action((x, c) => c.copy(input = x))
+      note(
+        """
+        |For example, the following command runs this app:
+        |
+        | bin/spark-submit --class org.apache.spark.examples.mllib.SampledRDDs \
+        |  examples/target/scala-*/spark-examples-*.jar
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    } getOrElse {
+      sys.exit(1)
+    }
+  }
+
+  def run(params: Params) {
+    val conf = new SparkConf().setAppName(s"SampledRDDs with $params")
+    val sc = new SparkContext(conf)
+
+    val fraction = 0.1 // fraction of data to sample
+
+    val examples = MLUtils.loadLibSVMFile(sc, params.input)
+    val numExamples = examples.count()
+    if (numExamples == 0) {
+      throw new RuntimeException("Error: Data file had no samples to load.")
+    }
+    println(s"Loaded data with $numExamples examples from file: ${params.input}")
+
+    // Example: RDD.sample() and RDD.takeSample()
+    val expectedSampleSize = (numExamples * fraction).toInt
+    println(s"Sampling RDD using fraction $fraction.  Expected sample size = $expectedSampleSize.")
+    val sampledRDD = examples.sample(withReplacement = true, fraction = fraction)
+    println(s"  RDD.sample(): sample has ${sampledRDD.count()} examples")
+    val sampledArray = examples.takeSample(withReplacement = true, num = expectedSampleSize)
+    println(s"  RDD.takeSample(): sample has ${sampledArray.size} examples")
+
+    println()
+
+    // Example: RDD.sampleByKey() and RDD.sampleByKeyExact()
+    val keyedRDD = examples.map { lp => (lp.label.toInt, lp.features) }
+    println(s"  Keyed data using label (Int) as key ==> Orig")
+    //  Count examples per label in original data.
+    val keyCounts = keyedRDD.countByKey()
+
+    //  Subsample, and count examples per label in sampled data. (approximate)
+    val fractions = keyCounts.keys.map((_, fraction)).toMap
+    val sampledByKeyRDD = keyedRDD.sampleByKey(withReplacement = true, fractions = fractions)
+    val keyCountsB = sampledByKeyRDD.countByKey()
+    val sizeB = keyCountsB.values.sum
+    println(s"  Sampled $sizeB examples using approximate stratified sampling (by label)." +
+      " ==> Approx Sample")
+
+    //  Subsample, and count examples per label in sampled data. (approximate)
+    val sampledByKeyRDDExact =
+      keyedRDD.sampleByKeyExact(withReplacement = true, fractions = fractions)
+    val keyCountsBExact = sampledByKeyRDDExact.countByKey()
+    val sizeBExact = keyCountsBExact.values.sum
+    println(s"  Sampled $sizeBExact examples using exact stratified sampling (by label)." +
+      " ==> Exact Sample")
+
+    //  Compare samples
+    println(s"   \tFractions of examples with key")
+    println(s"Key\tOrig\tApprox Sample\tExact Sample")
+    keyCounts.keys.toSeq.sorted.foreach { key =>
+      val origFrac = keyCounts(key) / numExamples.toDouble
+      val approxFrac = if (sizeB != 0) {
+        keyCountsB.getOrElse(key, 0L) / sizeB.toDouble
+      } else {
+        0
+      }
+      val exactFrac = if (sizeBExact != 0) {
+        keyCountsBExact.getOrElse(key, 0L) / sizeBExact.toDouble
+      } else {
+        0
+      }
+      println(s"$key\t$origFrac\t$approxFrac\t$exactFrac")
+    }
+
+    sc.stop()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala
index 537e68a0991aa..f1ff4e6911f5e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala
@@ -22,12 +22,12 @@ import scopt.OptionParser
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.mllib.classification.NaiveBayes
-import org.apache.spark.mllib.util.{MLUtils, MulticlassLabelParser}
+import org.apache.spark.mllib.util.MLUtils
 
 /**
  * An example naive Bayes app. Run with
  * {{{
- * ./bin/spark-example org.apache.spark.examples.mllib.SparseNaiveBayes [options] <input>
+ * ./bin/run-example org.apache.spark.examples.mllib.SparseNaiveBayes [options] <input>
  * }}}
  * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
  */
@@ -37,7 +37,7 @@ object SparseNaiveBayes {
       input: String = null,
       minPartitions: Int = 0,
       numFeatures: Int = -1,
-      lambda: Double = 1.0)
+      lambda: Double = 1.0) extends AbstractParams[Params]
 
   def main(args: Array[String]) {
     val defaultParams = Params()
@@ -76,7 +76,7 @@ object SparseNaiveBayes {
       if (params.minPartitions > 0) params.minPartitions else sc.defaultMinPartitions
 
     val examples =
-      MLUtils.loadLibSVMFile(sc, params.input, multiclass = true, params.numFeatures, minPartitions)
+      MLUtils.loadLibSVMFile(sc, params.input, params.numFeatures, minPartitions)
     // Cache examples because it will be used in both training and evaluation.
     examples.cache()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeans.scala
new file mode 100644
index 0000000000000..33e5760aed997
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeans.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.clustering.StreamingKMeans
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+
+/**
+ * Estimate clusters on one stream of data and make predictions
+ * on another stream, where the data streams arrive as text files
+ * into two different directories.
+ *
+ * The rows of the training text files must be vector data in the form
+ * `[x1,x2,x3,...,xn]`
+ * Where n is the number of dimensions.
+ *
+ * The rows of the test text files must be labeled data in the form
+ * `(y,[x1,x2,x3,...,xn])`
+ * Where y is some identifier. n must be the same for train and test.
+ *
+ * Usage: StreamingKmeans <trainingDir> <testDir> <batchDuration> <numClusters> <numDimensions>
+ *
+ * To run on your local machine using the two directories `trainingDir` and `testDir`,
+ * with updates every 5 seconds, 2 dimensions per data point, and 3 clusters, call:
+ *    $ bin/run-example \
+ *        org.apache.spark.examples.mllib.StreamingKMeans trainingDir testDir 5 3 2
+ *
+ * As you add text files to `trainingDir` the clusters will continuously update.
+ * Anytime you add text files to `testDir`, you'll see predicted labels using the current model.
+ *
+ */
+object StreamingKMeans {
+
+  def main(args: Array[String]) {
+    if (args.length != 5) {
+      System.err.println(
+        "Usage: StreamingKMeans " +
+          "<trainingDir> <testDir> <batchDuration> <numClusters> <numDimensions>")
+      System.exit(1)
+    }
+
+    val conf = new SparkConf().setMaster("local").setAppName("StreamingLinearRegression")
+    val ssc = new StreamingContext(conf, Seconds(args(2).toLong))
+
+    val trainingData = ssc.textFileStream(args(0)).map(Vectors.parse)
+    val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse)
+
+    val model = new StreamingKMeans()
+      .setK(args(3).toInt)
+      .setDecayFactor(1.0)
+      .setRandomCenters(args(4).toInt, 0.0)
+
+    model.trainOn(trainingData)
+    model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
+
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala
new file mode 100644
index 0000000000000..c5bd5b0b178d9
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.{LabeledPoint, StreamingLinearRegressionWithSGD}
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+
+/**
+ * Train a linear regression model on one stream of data and make predictions
+ * on another stream, where the data streams arrive as text files
+ * into two different directories.
+ *
+ * The rows of the text files must be labeled data points in the form
+ * `(y,[x1,x2,x3,...,xn])`
+ * Where n is the number of features. n must be the same for train and test.
+ *
+ * Usage: StreamingLinearRegression <trainingDir> <testDir> <batchDuration> <numFeatures>
+ *
+ * To run on your local machine using the two directories `trainingDir` and `testDir`,
+ * with updates every 5 seconds, and 2 features per data point, call:
+ *    $ bin/run-example \
+ *        org.apache.spark.examples.mllib.StreamingLinearRegression trainingDir testDir 5 2
+ *
+ * As you add text files to `trainingDir` the model will continuously update.
+ * Anytime you add text files to `testDir`, you'll see predictions from the current model.
+ *
+ */
+object StreamingLinearRegression {
+
+  def main(args: Array[String]) {
+
+    if (args.length != 4) {
+      System.err.println(
+        "Usage: StreamingLinearRegression <trainingDir> <testDir> <batchDuration> <numFeatures>")
+      System.exit(1)
+    }
+
+    val conf = new SparkConf().setMaster("local").setAppName("StreamingLinearRegression")
+    val ssc = new StreamingContext(conf, Seconds(args(2).toLong))
+
+    val trainingData = ssc.textFileStream(args(0)).map(LabeledPoint.parse)
+    val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse)
+
+    val model = new StreamingLinearRegressionWithSGD()
+      .setInitialWeights(Vectors.zeros(args(3).toInt))
+
+    model.trainOn(trainingData)
+    model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
+
+    ssc.start()
+    ssc.awaitTermination()
+
+  }
+
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala
new file mode 100644
index 0000000000000..a11890d6f2b1c
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.pythonconverters
+
+import java.util.{Collection => JCollection, Map => JMap}
+
+import scala.collection.JavaConversions._
+
+import org.apache.avro.generic.{GenericFixed, IndexedRecord}
+import org.apache.avro.mapred.AvroWrapper
+import org.apache.avro.Schema
+import org.apache.avro.Schema.Type._
+
+import org.apache.spark.api.python.Converter
+import org.apache.spark.SparkException
+
+
+object AvroConversionUtil extends Serializable {
+  def fromAvro(obj: Any, schema: Schema): Any = {
+    if (obj == null) {
+      return null
+    }
+    schema.getType match {
+      case UNION   => unpackUnion(obj, schema)
+      case ARRAY   => unpackArray(obj, schema)
+      case FIXED   => unpackFixed(obj, schema)
+      case MAP     => unpackMap(obj, schema)
+      case BYTES   => unpackBytes(obj)
+      case RECORD  => unpackRecord(obj)
+      case STRING  => obj.toString
+      case ENUM    => obj.toString
+      case NULL    => obj
+      case BOOLEAN => obj
+      case DOUBLE  => obj
+      case FLOAT   => obj
+      case INT     => obj
+      case LONG    => obj
+      case other   => throw new SparkException(
+        s"Unknown Avro schema type ${other.getName}")
+    }
+  }
+
+  def unpackRecord(obj: Any): JMap[String, Any] = {
+    val map = new java.util.HashMap[String, Any]
+    obj match {
+      case record: IndexedRecord =>
+        record.getSchema.getFields.zipWithIndex.foreach { case (f, i) =>
+          map.put(f.name, fromAvro(record.get(i), f.schema))
+        }
+      case other => throw new SparkException(
+        s"Unsupported RECORD type ${other.getClass.getName}")
+    }
+    map
+  }
+
+  def unpackMap(obj: Any, schema: Schema): JMap[String, Any] = {
+    obj.asInstanceOf[JMap[_, _]].map { case (key, value) =>
+      (key.toString, fromAvro(value, schema.getValueType))
+    }
+  }
+
+  def unpackFixed(obj: Any, schema: Schema): Array[Byte] = {
+    unpackBytes(obj.asInstanceOf[GenericFixed].bytes())
+  }
+
+  def unpackBytes(obj: Any): Array[Byte] = {
+    val bytes: Array[Byte] = obj match {
+      case buf: java.nio.ByteBuffer => buf.array()
+      case arr: Array[Byte] => arr
+      case other => throw new SparkException(
+        s"Unknown BYTES type ${other.getClass.getName}")
+    }
+    val bytearray = new Array[Byte](bytes.length)
+    System.arraycopy(bytes, 0, bytearray, 0, bytes.length)
+    bytearray
+  }
+
+  def unpackArray(obj: Any, schema: Schema): JCollection[Any] = obj match {
+    case c: JCollection[_] =>
+      c.map(fromAvro(_, schema.getElementType))
+    case arr: Array[_] if arr.getClass.getComponentType.isPrimitive =>
+      arr.toSeq
+    case arr: Array[_] =>
+      arr.map(fromAvro(_, schema.getElementType)).toSeq
+    case other => throw new SparkException(
+      s"Unknown ARRAY type ${other.getClass.getName}")
+  }
+
+  def unpackUnion(obj: Any, schema: Schema): Any = {
+    schema.getTypes.toList match {
+      case List(s) => fromAvro(obj, s)
+      case List(n, s) if n.getType == NULL => fromAvro(obj, s)
+      case List(s, n) if n.getType == NULL => fromAvro(obj, s)
+      case _ => throw new SparkException(
+        "Unions may only consist of a concrete type and null")
+    }
+  }
+}
+
+/**
+ * Implementation of [[org.apache.spark.api.python.Converter]] that converts
+ * an Avro IndexedRecord (e.g., derived from AvroParquetInputFormat) to a Java Map.
+ */
+class IndexedRecordToJavaConverter extends Converter[IndexedRecord, JMap[String, Any]]{
+  override def convert(record: IndexedRecord): JMap[String, Any] = {
+    if (record == null) {
+      return null
+    }
+    val map = new java.util.HashMap[String, Any]
+    AvroConversionUtil.unpackRecord(record)
+  }
+}
+
+/**
+ * Implementation of [[org.apache.spark.api.python.Converter]] that converts
+ * an Avro Record wrapped in an AvroKey (or AvroValue) to a Java Map. It tries
+ * to work with all 3 Avro data mappings (Generic, Specific and Reflect).
+ */
+class AvroWrapperToJavaConverter extends Converter[Any, Any] {
+  override def convert(obj: Any): Any = {
+    if (obj == null) {
+      return null
+    }
+    obj.asInstanceOf[AvroWrapper[_]].datum() match {
+      case null => null
+      case record: IndexedRecord => AvroConversionUtil.unpackRecord(record)
+      case other => throw new SparkException(
+        s"Unsupported top-level Avro data type ${other.getClass.getName}")
+    }
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/CassandraConverters.scala b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/CassandraConverters.scala
index 29a65c7a5f295..83feb5703b908 100644
--- a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/CassandraConverters.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/CassandraConverters.scala
@@ -20,7 +20,7 @@ package org.apache.spark.examples.pythonconverters
 import org.apache.spark.api.python.Converter
 import java.nio.ByteBuffer
 import org.apache.cassandra.utils.ByteBufferUtil
-import collection.JavaConversions.{mapAsJavaMap, mapAsScalaMap}
+import collection.JavaConversions._
 
 
 /**
@@ -44,3 +44,25 @@ class CassandraCQLValueConverter extends Converter[Any, java.util.Map[String, St
     mapAsJavaMap(result.mapValues(bb => ByteBufferUtil.string(bb)))
   }
 }
+
+/**
+ * Implementation of [[org.apache.spark.api.python.Converter]] that converts a
+ * Map[String, Int] to Cassandra key
+ */
+class ToCassandraCQLKeyConverter extends Converter[Any, java.util.Map[String, ByteBuffer]] {
+  override def convert(obj: Any): java.util.Map[String, ByteBuffer] = {
+    val input = obj.asInstanceOf[java.util.Map[String, Int]]
+    mapAsJavaMap(input.mapValues(i => ByteBufferUtil.bytes(i)))
+  }
+}
+
+/**
+ * Implementation of [[org.apache.spark.api.python.Converter]] that converts a
+ * List[String] to Cassandra value
+ */
+class ToCassandraCQLValueConverter extends Converter[Any, java.util.List[ByteBuffer]] {
+  override def convert(obj: Any): java.util.List[ByteBuffer] = {
+    val input = obj.asInstanceOf[java.util.List[String]]
+    seqAsJavaList(input.map(s => ByteBufferUtil.bytes(s)))
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/HBaseConverter.scala b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/HBaseConverter.scala
deleted file mode 100644
index 42ae960bd64a1..0000000000000
--- a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/HBaseConverter.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.pythonconverters
-
-import org.apache.spark.api.python.Converter
-import org.apache.hadoop.hbase.client.Result
-import org.apache.hadoop.hbase.util.Bytes
-
-/**
- * Implementation of [[org.apache.spark.api.python.Converter]] that converts a HBase Result
- * to a String
- */
-class HBaseConverter extends Converter[Any, String] {
-  override def convert(obj: Any): String = {
-    val result = obj.asInstanceOf[Result]
-    Bytes.toStringBinary(result.value())
-  }
-}
diff --git a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/HBaseConverters.scala b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/HBaseConverters.scala
new file mode 100644
index 0000000000000..273bee0a8b30f
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/HBaseConverters.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.pythonconverters
+
+import scala.collection.JavaConversions._
+
+import org.apache.spark.api.python.Converter
+import org.apache.hadoop.hbase.client.{Put, Result}
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable
+import org.apache.hadoop.hbase.util.Bytes
+
+/**
+ * Implementation of [[org.apache.spark.api.python.Converter]] that converts an
+ * HBase Result to a String
+ */
+class HBaseResultToStringConverter extends Converter[Any, String] {
+  override def convert(obj: Any): String = {
+    val result = obj.asInstanceOf[Result]
+    Bytes.toStringBinary(result.value())
+  }
+}
+
+/**
+ * Implementation of [[org.apache.spark.api.python.Converter]] that converts an
+ * ImmutableBytesWritable to a String
+ */
+class ImmutableBytesWritableToStringConverter extends Converter[Any, String] {
+  override def convert(obj: Any): String = {
+    val key = obj.asInstanceOf[ImmutableBytesWritable]
+    Bytes.toStringBinary(key.get())
+  }
+}
+
+/**
+ * Implementation of [[org.apache.spark.api.python.Converter]] that converts a
+ * String to an ImmutableBytesWritable
+ */
+class StringToImmutableBytesWritableConverter extends Converter[Any, ImmutableBytesWritable] {
+  override def convert(obj: Any): ImmutableBytesWritable = {
+    val bytes = Bytes.toBytes(obj.asInstanceOf[String])
+    new ImmutableBytesWritable(bytes)
+  }
+}
+
+/**
+ * Implementation of [[org.apache.spark.api.python.Converter]] that converts a
+ * list of Strings to HBase Put
+ */
+class StringListToPutConverter extends Converter[Any, Put] {
+  override def convert(obj: Any): Put = {
+    val output = obj.asInstanceOf[java.util.ArrayList[String]].map(Bytes.toBytes(_)).toArray
+    val put = new Put(output(0))
+    put.add(output(1), output(2), output(3))
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
index 63db688bfb8c0..2e98b2dc30b80 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
@@ -36,7 +36,7 @@ object RDDRelation {
     val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i")))
     // Any RDD containing case classes can be registered as a table.  The schema of the table is
     // automatically inferred using scala reflection.
-    rdd.registerAsTable("records")
+    rdd.registerTempTable("records")
 
     // Once tables have been registered, you can run SQL queries over them.
     println("Result of SELECT *:")
@@ -51,7 +51,7 @@ object RDDRelation {
     val rddFromSql = sql("SELECT key, value FROM records WHERE key < 10")
 
     println("Result of RDD.map:")
-    rddFromSql.map(row => s"Key: ${row(0)}, Value: ${row(1)}").collect.foreach(println)
+    rddFromSql.map(row => s"Key: ${row(0)}, Value: ${row(1)}").collect().foreach(println)
 
     // Queries can also be written using a LINQ-like Scala DSL.
     rdd.where('key === 1).orderBy('value.asc).select('key).collect().foreach(println)
@@ -66,7 +66,9 @@ object RDDRelation {
     parquetFile.where('key === 1).select('value as 'a).collect().foreach(println)
 
     // These files can also be registered as tables.
-    parquetFile.registerAsTable("parquetFile")
+    parquetFile.registerTempTable("parquetFile")
     sql("SELECT * FROM parquetFile").collect().foreach(println)
+
+    sc.stop()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
index b262fabbe0e0d..0c52ef8ed96ac 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
@@ -19,7 +19,7 @@ package org.apache.spark.examples.sql.hive
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql._
-import org.apache.spark.sql.hive.LocalHiveContext
+import org.apache.spark.sql.hive.HiveContext
 
 object HiveFromSpark {
   case class Record(key: Int, value: String)
@@ -28,26 +28,26 @@ object HiveFromSpark {
     val sparkConf = new SparkConf().setAppName("HiveFromSpark")
     val sc = new SparkContext(sparkConf)
 
-    // A local hive context creates an instance of the Hive Metastore in process, storing the
+    // A local hive context creates an instance of the Hive Metastore in process, storing 
     // the warehouse data in the current directory.  This location can be overridden by
     // specifying a second parameter to the constructor.
-    val hiveContext = new LocalHiveContext(sc)
+    val hiveContext = new HiveContext(sc)
     import hiveContext._
 
-    hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
-    hql("LOAD DATA LOCAL INPATH 'src/main/resources/kv1.txt' INTO TABLE src")
+    sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+    sql("LOAD DATA LOCAL INPATH 'src/main/resources/kv1.txt' INTO TABLE src")
 
     // Queries are expressed in HiveQL
     println("Result of 'SELECT *': ")
-    hql("SELECT * FROM src").collect.foreach(println)
+    sql("SELECT * FROM src").collect().foreach(println)
 
     // Aggregation queries are also supported.
-    val count = hql("SELECT COUNT(*) FROM src").collect().head.getInt(0)
+    val count = sql("SELECT COUNT(*) FROM src").collect().head.getLong(0)
     println(s"COUNT(*): $count")
 
     // The results of SQL queries are themselves RDDs and support all normal RDD functions.  The
     // items in the RDD are of type Row, which allows you to access each column by ordinal.
-    val rddFromSql = hql("SELECT key, value FROM src WHERE key < 10 ORDER BY key")
+    val rddFromSql = sql("SELECT key, value FROM src WHERE key < 10 ORDER BY key")
 
     println("Result of RDD.map:")
     val rddAsStrings = rddFromSql.map {
@@ -56,10 +56,12 @@ object HiveFromSpark {
 
     // You can also register RDDs as temporary tables within a HiveContext.
     val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i")))
-    rdd.registerAsTable("records")
+    rdd.registerTempTable("records")
 
     // Queries can then join RDD data with data stored in Hive.
     println("Result of SELECT *:")
-    hql("SELECT * FROM records r JOIN src s ON r.key = s.key").collect().foreach(println)
+    sql("SELECT * FROM records r JOIN src s ON r.key = s.key").collect().foreach(println)
+
+    sc.stop()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/FlumePollingEventCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/FlumePollingEventCount.scala
new file mode 100644
index 0000000000000..1cc8c8d5c23b6
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/FlumePollingEventCount.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming
+
+import org.apache.spark.SparkConf
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.flume._
+import org.apache.spark.util.IntParam
+import java.net.InetSocketAddress
+
+/**
+ *  Produces a count of events received from Flume.
+ *
+ *  This should be used in conjunction with the Spark Sink running in a Flume agent. See
+ *  the Spark Streaming programming guide for more details.
+ *
+ *  Usage: FlumePollingEventCount <host> <port>
+ *    `host` is the host on which the Spark Sink is running.
+ *    `port` is the port at which the Spark Sink is listening.
+ *
+ *  To run this example:
+ *    `$ bin/run-example org.apache.spark.examples.streaming.FlumePollingEventCount [host] [port] `
+ */
+object FlumePollingEventCount {
+  def main(args: Array[String]) {
+    if (args.length < 2) {
+      System.err.println(
+        "Usage: FlumePollingEventCount <host> <port>")
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    val Array(host, IntParam(port)) = args
+
+    val batchInterval = Milliseconds(2000)
+
+    // Create the context and set the batch size
+    val sparkConf = new SparkConf().setAppName("FlumePollingEventCount")
+    val ssc = new StreamingContext(sparkConf, batchInterval)
+
+    // Create a flume stream that polls the Spark Sink running in a Flume agent
+    val stream = FlumeUtils.createPollingStream(ssc, host, port)
+
+    // Print out the count of events received from this server in each batch
+    stream.count().map(cnt => "Received " + cnt + " flume events." ).print()
+
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
index 6af3a0f33efc2..19427e629f76d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
@@ -31,15 +31,13 @@ import org.apache.spark.util.IntParam
 /**
  * Counts words in text encoded with UTF8 received from the network every second.
  *
- * Usage: NetworkWordCount <hostname> <port> <checkpoint-directory> <output-file>
+ * Usage: RecoverableNetworkWordCount <hostname> <port> <checkpoint-directory> <output-file>
  *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
  *   data. <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
  *   <output-file> file to which the word counts will be appended
  *
- * In local mode, <master> should be 'local[n]' with n > 1
  * <checkpoint-directory> and <output-file> must be absolute paths
  *
- *
  * To run this on your local machine, you need to first run a Netcat server
  *
  *      `$ nc -lk 9999`
@@ -54,22 +52,11 @@ import org.apache.spark.util.IntParam
  * checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from
  * the checkpoint data.
  *
- * To run this example in a local standalone cluster with automatic driver recovery,
- *
- *      `$ bin/spark-class org.apache.spark.deploy.Client -s launch <cluster-url> \
- *              <path-to-examples-jar> \
- *              org.apache.spark.examples.streaming.RecoverableNetworkWordCount <cluster-url> \
- *              localhost 9999 ~/checkpoint ~/out`
- *
- * <path-to-examples-jar> would typically be
- * <spark-dir>/examples/target/scala-XX/spark-examples....jar
- *
  * Refer to the online documentation for more details.
  */
-
 object RecoverableNetworkWordCount {
 
-  def createContext(ip: String, port: Int, outputPath: String) = {
+  def createContext(ip: String, port: Int, outputPath: String, checkpointDirectory: String) = {
 
     // If you do not see this printed, that means the StreamingContext has been loaded
     // from the new checkpoint
@@ -79,6 +66,7 @@ object RecoverableNetworkWordCount {
     val sparkConf = new SparkConf().setAppName("RecoverableNetworkWordCount")
     // Create the context with a 1 second batch size
     val ssc = new StreamingContext(sparkConf, Seconds(1))
+    ssc.checkpoint(checkpointDirectory)
 
     // Create a socket stream on target ip:port and count the
     // words in input stream of \n delimited text (eg. generated by 'nc')
@@ -114,7 +102,7 @@ object RecoverableNetworkWordCount {
     val Array(ip, IntParam(port), checkpointDirectory, outputPath) = args
     val ssc = StreamingContext.getOrCreate(checkpointDirectory,
       () => {
-        createContext(ip, port, outputPath)
+        createContext(ip, port, outputPath, checkpointDirectory)
       })
     ssc.start()
     ssc.awaitTermination()
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
index daa1ced63c701..514252b89e74e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
@@ -18,12 +18,13 @@
 package org.apache.spark.examples.streaming
 
 import org.apache.spark.SparkConf
+import org.apache.spark.HashPartitioner
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.StreamingContext._
 
 /**
  * Counts words cumulatively in UTF8 encoded, '\n' delimited text received from the network every
- * second.
+ * second starting with initial value of word count.
  * Usage: StatefulNetworkWordCount <hostname> <port>
  *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
  *   data.
@@ -44,18 +45,25 @@ object StatefulNetworkWordCount {
     StreamingExamples.setStreamingLogLevels()
 
     val updateFunc = (values: Seq[Int], state: Option[Int]) => {
-      val currentCount = values.foldLeft(0)(_ + _)
+      val currentCount = values.sum
 
       val previousCount = state.getOrElse(0)
 
       Some(currentCount + previousCount)
     }
 
+    val newUpdateFunc = (iterator: Iterator[(String, Seq[Int], Option[Int])]) => {
+      iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s)))
+    }
+
     val sparkConf = new SparkConf().setAppName("StatefulNetworkWordCount")
     // Create the context with a 1 second batch size
     val ssc = new StreamingContext(sparkConf, Seconds(1))
     ssc.checkpoint(".")
 
+    // Initial RDD input to updateStateByKey
+    val initialRDD = ssc.sparkContext.parallelize(List(("hello", 1), ("world", 1)))
+
     // Create a NetworkInputDStream on target ip:port and count the
     // words in input stream of \n delimited test (eg. generated by 'nc')
     val lines = ssc.socketTextStream(args(0), args(1).toInt)
@@ -64,7 +72,8 @@ object StatefulNetworkWordCount {
 
     // Update the cumulative count using updateStateByKey
     // This will give a Dstream made of state (which is the cumulative count of the words)
-    val stateDstream = wordDstream.updateStateByKey[Int](updateFunc)
+    val stateDstream = wordDstream.updateStateByKey[Int](newUpdateFunc,
+      new HashPartitioner (ssc.sparkContext.defaultParallelism), true, initialRDD)
     stateDstream.print()
     ssc.start()
     ssc.awaitTermination()
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
new file mode 100644
index 0000000000000..ac291bd4fde20
--- /dev/null
+++ b/external/flume-sink/pom.xml
@@ -0,0 +1,117 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>1.2.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-streaming-flume-sink_2.10</artifactId>
+  <properties>
+    <sbt.project.name>streaming-flume-sink</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Spark Project External Flume Sink</name>
+  <url>http://spark.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.flume</groupId>
+      <artifactId>flume-ng-sdk</artifactId>
+      <version>${flume.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.thrift</groupId>
+          <artifactId>libthrift</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.flume</groupId>
+      <artifactId>flume-ng-core</artifactId>
+      <version>${flume.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.thrift</groupId>
+          <artifactId>libthrift</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
+    </dependency>
+    <dependency>
+      <!--
+        Netty explicitly added in test as it has been excluded from
+        Flume dependency (to avoid runtime problems when running with
+        Spark) but unit tests need it. Version of Netty on which
+        Flume 1.4.0 depends on is "3.4.0.Final" .
+      -->
+      <groupId>io.netty</groupId>
+      <artifactId>netty</artifactId>
+      <version>3.4.0.Final</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-maven-plugin</artifactId>
+        <version>${avro.version}</version>
+        <configuration>
+          <!-- Generate the output in the same directory as the sbt-avro-plugin -->
+          <outputDirectory>${project.basedir}/target/scala-${scala.binary.version}/src_managed/main/compiled_avro</outputDirectory>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>idl-protocol</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/external/flume-sink/src/main/avro/sparkflume.avdl b/external/flume-sink/src/main/avro/sparkflume.avdl
new file mode 100644
index 0000000000000..8806e863ac7c6
--- /dev/null
+++ b/external/flume-sink/src/main/avro/sparkflume.avdl
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+@namespace("org.apache.spark.streaming.flume.sink")
+
+protocol SparkFlumeProtocol {
+
+  record SparkSinkEvent {
+    map<string> headers;
+    bytes body;
+  }
+
+  record EventBatch {
+    string errorMsg = ""; // If this is empty it is a valid message, else it represents an error
+    string sequenceNumber;
+    array<SparkSinkEvent> events;
+  }
+
+  EventBatch getEventBatch (int n);
+
+  void ack (string sequenceNumber);
+
+  void nack (string sequenceNumber);
+}
diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala
new file mode 100644
index 0000000000000..17cbc6707b5ea
--- /dev/null
+++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.flume.sink
+
+import org.slf4j.{Logger, LoggerFactory}
+
+/**
+ * Copy of the org.apache.spark.Logging for being used in the Spark Sink.
+ * The org.apache.spark.Logging is not used so that all of Spark is not brought
+ * in as a dependency.
+ */
+private[sink] trait Logging {
+  // Make the log field transient so that objects with Logging can
+  // be serialized and used on another machine
+  @transient private var log_ : Logger = null
+
+  // Method to get or create the logger for this object
+  protected def log: Logger = {
+    if (log_ == null) {
+      initializeIfNecessary()
+      var className = this.getClass.getName
+      // Ignore trailing $'s in the class names for Scala objects
+      if (className.endsWith("$")) {
+        className = className.substring(0, className.length - 1)
+      }
+      log_ = LoggerFactory.getLogger(className)
+    }
+    log_
+  }
+
+  // Log methods that take only a String
+  protected def logInfo(msg: => String) {
+    if (log.isInfoEnabled) log.info(msg)
+  }
+
+  protected def logDebug(msg: => String) {
+    if (log.isDebugEnabled) log.debug(msg)
+  }
+
+  protected def logTrace(msg: => String) {
+    if (log.isTraceEnabled) log.trace(msg)
+  }
+
+  protected def logWarning(msg: => String) {
+    if (log.isWarnEnabled) log.warn(msg)
+  }
+
+  protected def logError(msg: => String) {
+    if (log.isErrorEnabled) log.error(msg)
+  }
+
+  // Log methods that take Throwables (Exceptions/Errors) too
+  protected def logInfo(msg: => String, throwable: Throwable) {
+    if (log.isInfoEnabled) log.info(msg, throwable)
+  }
+
+  protected def logDebug(msg: => String, throwable: Throwable) {
+    if (log.isDebugEnabled) log.debug(msg, throwable)
+  }
+
+  protected def logTrace(msg: => String, throwable: Throwable) {
+    if (log.isTraceEnabled) log.trace(msg, throwable)
+  }
+
+  protected def logWarning(msg: => String, throwable: Throwable) {
+    if (log.isWarnEnabled) log.warn(msg, throwable)
+  }
+
+  protected def logError(msg: => String, throwable: Throwable) {
+    if (log.isErrorEnabled) log.error(msg, throwable)
+  }
+
+  protected def isTraceEnabled(): Boolean = {
+    log.isTraceEnabled
+  }
+
+  private def initializeIfNecessary() {
+    if (!Logging.initialized) {
+      Logging.initLock.synchronized {
+        if (!Logging.initialized) {
+          initializeLogging()
+        }
+      }
+    }
+  }
+
+  private def initializeLogging() {
+    Logging.initialized = true
+
+    // Force a call into slf4j to initialize it. Avoids this happening from mutliple threads
+    // and triggering this: http://mailman.qos.ch/pipermail/slf4j-dev/2010-April/002956.html
+    log
+  }
+}
+
+private[sink] object Logging {
+  @volatile private var initialized = false
+  val initLock = new Object()
+  try {
+    // We use reflection here to handle the case where users remove the
+    // slf4j-to-jul bridge order to route their logs to JUL.
+    val bridgeClass = Class.forName("org.slf4j.bridge.SLF4JBridgeHandler")
+    bridgeClass.getMethod("removeHandlersForRootLogger").invoke(null)
+    val installed = bridgeClass.getMethod("isInstalled").invoke(null).asInstanceOf[Boolean]
+    if (!installed) {
+      bridgeClass.getMethod("install").invoke(null)
+    }
+  } catch {
+    case e: ClassNotFoundException => // can't log anything yet so just fail silently
+  }
+}
diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkAvroCallbackHandler.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkAvroCallbackHandler.scala
new file mode 100644
index 0000000000000..3c656a381bd9b
--- /dev/null
+++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkAvroCallbackHandler.scala
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.flume.sink
+
+import java.util.concurrent.{CountDownLatch, ConcurrentHashMap, Executors}
+import java.util.concurrent.atomic.AtomicLong
+
+import scala.collection.JavaConversions._
+
+import org.apache.flume.Channel
+import org.apache.commons.lang.RandomStringUtils
+import com.google.common.util.concurrent.ThreadFactoryBuilder
+
+/**
+ * Class that implements the SparkFlumeProtocol, that is used by the Avro Netty Server to process
+ * requests. Each getEvents, ack and nack call is forwarded to an instance of this class.
+ * @param threads Number of threads to use to process requests.
+ * @param channel The channel that the sink pulls events from
+ * @param transactionTimeout Timeout in millis after which the transaction if not acked by Spark
+ *                           is rolled back.
+ */
+// Flume forces transactions to be thread-local. So each transaction *must* be committed, or
+// rolled back from the thread it was originally created in. So each getEvents call from Spark
+// creates a TransactionProcessor which runs in a new thread, in which the transaction is created
+// and events are pulled off the channel. Once the events are sent to spark,
+// that thread is blocked and the TransactionProcessor is saved in a map,
+// until an ACK or NACK comes back or the transaction times out (after the specified timeout).
+// When the response comes or a timeout is hit, the TransactionProcessor is retrieved and then
+// unblocked, at which point the transaction is committed or rolled back.
+
+private[flume] class SparkAvroCallbackHandler(val threads: Int, val channel: Channel,
+  val transactionTimeout: Int, val backOffInterval: Int) extends SparkFlumeProtocol with Logging {
+  val transactionExecutorOpt = Option(Executors.newFixedThreadPool(threads,
+    new ThreadFactoryBuilder().setDaemon(true)
+      .setNameFormat("Spark Sink Processor Thread - %d").build()))
+  private val sequenceNumberToProcessor =
+    new ConcurrentHashMap[CharSequence, TransactionProcessor]()
+  // This sink will not persist sequence numbers and reuses them if it gets restarted.
+  // So it is possible to commit a transaction which may have been meant for the sink before the
+  // restart.
+  // Since the new txn may not have the same sequence number we must guard against accidentally
+  // committing a new transaction. To reduce the probability of that happening a random string is
+  // prepended to the sequence number. Does not change for life of sink
+  private val seqBase = RandomStringUtils.randomAlphanumeric(8)
+  private val seqCounter = new AtomicLong(0)
+
+
+  @volatile private var stopped = false
+
+  @volatile private var isTest = false
+  private var testLatch: CountDownLatch = null
+
+  /**
+   * Returns a bunch of events to Spark over Avro RPC.
+   * @param n Maximum number of events to return in a batch
+   * @return [[EventBatch]] instance that has a sequence number and an array of at most n events
+   */
+  override def getEventBatch(n: Int): EventBatch = {
+    logDebug("Got getEventBatch call from Spark.")
+    val sequenceNumber = seqBase + seqCounter.incrementAndGet()
+    createProcessor(sequenceNumber, n) match {
+      case Some(processor) =>
+        transactionExecutorOpt.foreach(_.submit(processor))
+        // Wait until a batch is available - will be an error if error message is non-empty
+        val batch = processor.getEventBatch
+        if (SparkSinkUtils.isErrorBatch(batch)) {
+          // Remove the processor if it is an error batch since no ACK is sent.
+          removeAndGetProcessor(sequenceNumber)
+          logWarning("Received an error batch - no events were received from channel! ")
+        }
+        batch
+      case None =>
+        new EventBatch("Spark sink has been stopped!", "", java.util.Collections.emptyList())
+    }
+  }
+
+  private def createProcessor(seq: String, n: Int): Option[TransactionProcessor] = {
+    sequenceNumberToProcessor.synchronized {
+      if (!stopped) {
+        val processor = new TransactionProcessor(
+          channel, seq, n, transactionTimeout, backOffInterval, this)
+        sequenceNumberToProcessor.put(seq, processor)
+        if (isTest) {
+          processor.countDownWhenBatchAcked(testLatch)
+        }
+        Some(processor)
+      } else {
+        None
+      }
+    }
+  }
+
+  /**
+   * Called by Spark to indicate successful commit of a batch
+   * @param sequenceNumber The sequence number of the event batch that was successful
+   */
+  override def ack(sequenceNumber: CharSequence): Void = {
+    logDebug("Received Ack for batch with sequence number: " + sequenceNumber)
+    completeTransaction(sequenceNumber, success = true)
+    null
+  }
+
+  /**
+   * Called by Spark to indicate failed commit of a batch
+   * @param sequenceNumber The sequence number of the event batch that failed
+   * @return
+   */
+  override def nack(sequenceNumber: CharSequence): Void = {
+    completeTransaction(sequenceNumber, success = false)
+    logInfo("Spark failed to commit transaction. Will reattempt events.")
+    null
+  }
+
+  /**
+   * Helper method to commit or rollback a transaction.
+   * @param sequenceNumber The sequence number of the batch that was completed
+   * @param success Whether the batch was successful or not.
+   */
+  private def completeTransaction(sequenceNumber: CharSequence, success: Boolean) {
+    Option(removeAndGetProcessor(sequenceNumber)).foreach(processor => {
+      processor.batchProcessed(success)
+    })
+  }
+
+  /**
+   * Helper method to remove the TxnProcessor for a Sequence Number. Can be used to avoid a leak.
+   * @param sequenceNumber
+   * @return The transaction processor for the corresponding batch. Note that this instance is no
+   *         longer tracked and the caller is responsible for that txn processor.
+   */
+  private[sink] def removeAndGetProcessor(sequenceNumber: CharSequence): TransactionProcessor = {
+    sequenceNumberToProcessor.synchronized {
+      sequenceNumberToProcessor.remove(sequenceNumber.toString)
+    }
+  }
+
+  private[sink] def countDownWhenBatchAcked(latch: CountDownLatch) {
+    testLatch = latch
+    isTest = true
+  }
+
+  /**
+   * Shuts down the executor used to process transactions.
+   */
+  def shutdown() {
+    logInfo("Shutting down Spark Avro Callback Handler")
+    sequenceNumberToProcessor.synchronized {
+      stopped = true
+      sequenceNumberToProcessor.values().foreach(_.shutdown())
+    }
+    transactionExecutorOpt.foreach(_.shutdownNow())
+  }
+}
diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSink.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSink.scala
new file mode 100644
index 0000000000000..14dffb15fef98
--- /dev/null
+++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSink.scala
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.flume.sink
+
+import java.net.InetSocketAddress
+import java.util.concurrent._
+
+import org.apache.avro.ipc.NettyServer
+import org.apache.avro.ipc.specific.SpecificResponder
+import org.apache.flume.Context
+import org.apache.flume.Sink.Status
+import org.apache.flume.conf.{Configurable, ConfigurationException}
+import org.apache.flume.sink.AbstractSink
+
+/**
+ * A sink that uses Avro RPC to run a server that can be polled by Spark's
+ * FlumePollingInputDStream. This sink has the following configuration parameters:
+ *
+ * hostname - The hostname to bind to. Default: 0.0.0.0
+ * port - The port to bind to. (No default - mandatory)
+ * timeout - Time in seconds after which a transaction is rolled back,
+ * if an ACK is not received from Spark within that time
+ * threads - Number of threads to use to receive requests from Spark (Default: 10)
+ *
+ * This sink is unlike other Flume sinks in the sense that it does not push data,
+ * instead the process method in this sink simply blocks the SinkRunner the first time it is
+ * called. This sink starts up an Avro IPC server that uses the SparkFlumeProtocol.
+ *
+ * Each time a getEventBatch call comes, creates a transaction and reads events
+ * from the channel. When enough events are read, the events are sent to the Spark receiver and
+ * the thread itself is blocked and a reference to it saved off.
+ *
+ * When the ack for that batch is received,
+ * the thread which created the transaction is is retrieved and it commits the transaction with the
+ * channel from the same thread it was originally created in (since Flume transactions are
+ * thread local). If a nack is received instead, the sink rolls back the transaction. If no ack
+ * is received within the specified timeout, the transaction is rolled back too. If an ack comes
+ * after that, it is simply ignored and the events get re-sent.
+ *
+ */
+
+class SparkSink extends AbstractSink with Logging with Configurable {
+
+  // Size of the pool to use for holding transaction processors.
+  private var poolSize: Integer = SparkSinkConfig.DEFAULT_THREADS
+
+  // Timeout for each transaction. If spark does not respond in this much time,
+  // rollback the transaction
+  private var transactionTimeout = SparkSinkConfig.DEFAULT_TRANSACTION_TIMEOUT
+
+  // Address info to bind on
+  private var hostname: String = SparkSinkConfig.DEFAULT_HOSTNAME
+  private var port: Int = 0
+
+  private var backOffInterval: Int = 200
+
+  // Handle to the server
+  private var serverOpt: Option[NettyServer] = None
+
+  // The handler that handles the callback from Avro
+  private var handler: Option[SparkAvroCallbackHandler] = None
+
+  // Latch that blocks off the Flume framework from wasting 1 thread.
+  private val blockingLatch = new CountDownLatch(1)
+
+  override def start() {
+    logInfo("Starting Spark Sink: " + getName + " on port: " + port + " and interface: " +
+      hostname + " with " + "pool size: " + poolSize + " and transaction timeout: " +
+      transactionTimeout + ".")
+    handler = Option(new SparkAvroCallbackHandler(poolSize, getChannel, transactionTimeout,
+      backOffInterval))
+    val responder = new SpecificResponder(classOf[SparkFlumeProtocol], handler.get)
+    // Using the constructor that takes specific thread-pools requires bringing in netty
+    // dependencies which are being excluded in the build. In practice,
+    // Netty dependencies are already available on the JVM as Flume would have pulled them in.
+    serverOpt = Option(new NettyServer(responder, new InetSocketAddress(hostname, port)))
+    serverOpt.foreach(server => {
+      logInfo("Starting Avro server for sink: " + getName)
+      server.start()
+    })
+    super.start()
+  }
+
+  override def stop() {
+    logInfo("Stopping Spark Sink: " + getName)
+    handler.foreach(callbackHandler => {
+      callbackHandler.shutdown()
+    })
+    serverOpt.foreach(server => {
+      logInfo("Stopping Avro Server for sink: " + getName)
+      server.close()
+      server.join()
+    })
+    blockingLatch.countDown()
+    super.stop()
+  }
+
+  override def configure(ctx: Context) {
+    import SparkSinkConfig._
+    hostname = ctx.getString(CONF_HOSTNAME, DEFAULT_HOSTNAME)
+    port = Option(ctx.getInteger(CONF_PORT)).
+      getOrElse(throw new ConfigurationException("The port to bind to must be specified"))
+    poolSize = ctx.getInteger(THREADS, DEFAULT_THREADS)
+    transactionTimeout = ctx.getInteger(CONF_TRANSACTION_TIMEOUT, DEFAULT_TRANSACTION_TIMEOUT)
+    backOffInterval = ctx.getInteger(CONF_BACKOFF_INTERVAL, DEFAULT_BACKOFF_INTERVAL)
+    logInfo("Configured Spark Sink with hostname: " + hostname + ", port: " + port + ", " +
+      "poolSize: " + poolSize + ", transactionTimeout: " + transactionTimeout + ", " +
+      "backoffInterval: " + backOffInterval)
+  }
+
+  override def process(): Status = {
+    // This method is called in a loop by the Flume framework - block it until the sink is
+    // stopped to save CPU resources. The sink runner will interrupt this thread when the sink is
+    // being shut down.
+    logInfo("Blocking Sink Runner, sink will continue to run..")
+    blockingLatch.await()
+    Status.BACKOFF
+  }
+
+  private[flume] def getPort(): Int = {
+    serverOpt
+      .map(_.getPort)
+      .getOrElse(
+        throw new RuntimeException("Server was not started!")
+      )
+  }
+
+  /**
+   * Pass in a [[CountDownLatch]] for testing purposes. This batch is counted down when each
+   * batch is received. The test can simply call await on this latch till the expected number of
+   * batches are received.
+   * @param latch
+   */
+  private[flume] def countdownWhenBatchReceived(latch: CountDownLatch) {
+    handler.foreach(_.countDownWhenBatchAcked(latch))
+  }
+}
+
+/**
+ * Configuration parameters and their defaults.
+ */
+private[flume]
+object SparkSinkConfig {
+  val THREADS = "threads"
+  val DEFAULT_THREADS = 10
+
+  val CONF_TRANSACTION_TIMEOUT = "timeout"
+  val DEFAULT_TRANSACTION_TIMEOUT = 60
+
+  val CONF_HOSTNAME = "hostname"
+  val DEFAULT_HOSTNAME = "0.0.0.0"
+
+  val CONF_PORT = "port"
+
+  val CONF_BACKOFF_INTERVAL = "backoffInterval"
+  val DEFAULT_BACKOFF_INTERVAL = 200
+}
diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSinkUtils.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSinkUtils.scala
new file mode 100644
index 0000000000000..47c0e294d6b52
--- /dev/null
+++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSinkUtils.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.flume.sink
+
+private[flume] object SparkSinkUtils {
+  /**
+   * This method determines if this batch represents an error or not.
+   * @param batch - The batch to check
+   * @return - true if the batch represents an error
+   */
+  def isErrorBatch(batch: EventBatch): Boolean = {
+    !batch.getErrorMsg.toString.equals("") // If there is an error message, it is an error batch.
+  }
+}
diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/TransactionProcessor.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/TransactionProcessor.scala
new file mode 100644
index 0000000000000..ea45b14294df9
--- /dev/null
+++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/TransactionProcessor.scala
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.flume.sink
+
+import java.nio.ByteBuffer
+import java.util
+import java.util.concurrent.{Callable, CountDownLatch, TimeUnit}
+
+import scala.util.control.Breaks
+
+import org.apache.flume.{Transaction, Channel}
+
+// Flume forces transactions to be thread-local (horrible, I know!)
+// So the sink basically spawns a new thread to pull the events out within a transaction.
+// The thread fills in the event batch object that is set before the thread is scheduled.
+// After filling it in, the thread waits on a condition - which is released only
+// when the success message comes back for the specific sequence number for that event batch.
+/**
+ * This class represents a transaction on the Flume channel. This class runs a separate thread
+ * which owns the transaction. The thread is blocked until the success call for that transaction
+ * comes back with an ACK or NACK.
+ * @param channel The channel from which to pull events
+ * @param seqNum The sequence number to use for the transaction. Must be unique
+ * @param maxBatchSize The maximum number of events to process per batch
+ * @param transactionTimeout Time in seconds after which a transaction must be rolled back
+ *                           without waiting for an ACK from Spark
+ * @param parent The parent [[SparkAvroCallbackHandler]] instance, for reporting timeouts
+ */
+private class TransactionProcessor(val channel: Channel, val seqNum: String,
+  var maxBatchSize: Int, val transactionTimeout: Int, val backOffInterval: Int,
+  val parent: SparkAvroCallbackHandler) extends Callable[Void] with Logging {
+
+  // If a real batch is not returned, we always have to return an error batch.
+  @volatile private var eventBatch: EventBatch = new EventBatch("Unknown Error", "",
+    util.Collections.emptyList())
+
+  // Synchronization primitives
+  val batchGeneratedLatch = new CountDownLatch(1)
+  val batchAckLatch = new CountDownLatch(1)
+
+  // Sanity check to ensure we don't loop like crazy
+  val totalAttemptsToRemoveFromChannel = Int.MaxValue / 2
+
+  // OK to use volatile, since the change would only make this true (otherwise it will be
+  // changed to false - we never apply a negation operation to this) - which means the transaction
+  // succeeded.
+  @volatile private var batchSuccess = false
+
+  @volatile private var stopped = false
+
+  @volatile private var isTest = false
+
+  private var testLatch: CountDownLatch = null
+
+  // The transaction that this processor would handle
+  var txOpt: Option[Transaction] = None
+
+  /**
+   * Get an event batch from the channel. This method will block until a batch of events is
+   * available from the channel. If no events are available after a large number of attempts of
+   * polling the channel, this method will return an [[EventBatch]] with a non-empty error message
+   *
+   * @return An [[EventBatch]] instance with sequence number set to seqNum, filled with a
+   *         maximum of maxBatchSize events
+   */
+  def getEventBatch: EventBatch = {
+    batchGeneratedLatch.await()
+    eventBatch
+  }
+
+  /**
+   * This method is to be called by the sink when it receives an ACK or NACK from Spark. This
+   * method is a no-op if it is called after transactionTimeout has expired since
+   * getEventBatch returned a batch of events.
+   * @param success True if an ACK was received and the transaction should be committed, else false.
+   */
+  def batchProcessed(success: Boolean) {
+    logDebug("Batch processed for sequence number: " + seqNum)
+    batchSuccess = success
+    batchAckLatch.countDown()
+  }
+
+  private[flume] def shutdown(): Unit = {
+    logDebug("Shutting down transaction processor")
+    stopped = true
+  }
+
+  /**
+   * Populates events into the event batch. If the batch cannot be populated,
+   * this method will not set the events into the event batch, but it sets an error message.
+   */
+  private def populateEvents() {
+    try {
+      txOpt = Option(channel.getTransaction)
+      if(txOpt.isEmpty) {
+        eventBatch.setErrorMsg("Something went wrong. Channel was " +
+          "unable to create a transaction!")
+      }
+      txOpt.foreach(tx => {
+        tx.begin()
+        val events = new util.ArrayList[SparkSinkEvent](maxBatchSize)
+        val loop = new Breaks
+        var gotEventsInThisTxn = false
+        var loopCounter: Int = 0
+        loop.breakable {
+          while (!stopped && events.size() < maxBatchSize
+            && loopCounter < totalAttemptsToRemoveFromChannel) {
+            loopCounter += 1
+            Option(channel.take()) match {
+              case Some(event) =>
+                events.add(new SparkSinkEvent(toCharSequenceMap(event.getHeaders),
+                  ByteBuffer.wrap(event.getBody)))
+                gotEventsInThisTxn = true
+              case None =>
+                if (!gotEventsInThisTxn && !stopped) {
+                  logDebug("Sleeping for " + backOffInterval + " millis as no events were read in" +
+                    " the current transaction")
+                  TimeUnit.MILLISECONDS.sleep(backOffInterval)
+                } else {
+                  loop.break()
+                }
+            }
+          }
+        }
+        if (!gotEventsInThisTxn && !stopped) {
+          val msg = "Tried several times, " +
+            "but did not get any events from the channel!"
+          logWarning(msg)
+          eventBatch.setErrorMsg(msg)
+        } else {
+          // At this point, the events are available, so fill them into the event batch
+          eventBatch = new EventBatch("",seqNum, events)
+        }
+      })
+    } catch {
+      case interrupted: InterruptedException =>
+        // Don't pollute logs if the InterruptedException came from this being stopped
+        if (!stopped) {
+          logWarning("Error while processing transaction.", interrupted)
+        }
+      case e: Exception =>
+        logWarning("Error while processing transaction.", e)
+        eventBatch.setErrorMsg(e.getMessage)
+        try {
+          txOpt.foreach(tx => {
+            rollbackAndClose(tx, close = true)
+          })
+        } finally {
+          txOpt = None
+        }
+    } finally {
+      batchGeneratedLatch.countDown()
+    }
+  }
+
+  /**
+   * Waits for upto transactionTimeout seconds for an ACK. If an ACK comes in
+   * this method commits the transaction with the channel. If the ACK does not come in within
+   * that time or a NACK comes in, this method rolls back the transaction.
+   */
+  private def processAckOrNack() {
+    batchAckLatch.await(transactionTimeout, TimeUnit.SECONDS)
+    txOpt.foreach(tx => {
+      if (batchSuccess) {
+        try {
+          logDebug("Committing transaction")
+          tx.commit()
+        } catch {
+          case e: Exception =>
+            logWarning("Error while attempting to commit transaction. Transaction will be rolled " +
+              "back", e)
+            rollbackAndClose(tx, close = false) // tx will be closed later anyway
+        } finally {
+          tx.close()
+          if (isTest) {
+            testLatch.countDown()
+          }
+        }
+      } else {
+        logWarning("Spark could not commit transaction, NACK received. Rolling back transaction.")
+        rollbackAndClose(tx, close = true)
+        // This might have been due to timeout or a NACK. Either way the following call does not
+        // cause issues. This is required to ensure the TransactionProcessor instance is not leaked
+        parent.removeAndGetProcessor(seqNum)
+      }
+    })
+  }
+
+  /**
+   * Helper method to rollback and optionally close a transaction
+   * @param tx The transaction to rollback
+   * @param close Whether the transaction should be closed or not after rolling back
+   */
+  private def rollbackAndClose(tx: Transaction, close: Boolean) {
+    try {
+      logWarning("Spark was unable to successfully process the events. Transaction is being " +
+        "rolled back.")
+      tx.rollback()
+    } catch {
+      case e: Exception =>
+        logError("Error rolling back transaction. Rollback may have failed!", e)
+    } finally {
+      if (close) {
+        tx.close()
+      }
+    }
+  }
+
+  /**
+   * Helper method to convert a Map[String, String] to Map[CharSequence, CharSequence]
+   * @param inMap The map to be converted
+   * @return The converted map
+   */
+  private def toCharSequenceMap(inMap: java.util.Map[String, String]): java.util.Map[CharSequence,
+    CharSequence] = {
+    val charSeqMap = new util.HashMap[CharSequence, CharSequence](inMap.size())
+    charSeqMap.putAll(inMap)
+    charSeqMap
+  }
+
+  /**
+   * When the thread is started it sets as many events as the batch size or less (if enough
+   * events aren't available) into the eventBatch and object and lets any threads waiting on the
+   * [[getEventBatch]] method to proceed. Then this thread waits for acks or nacks to come in,
+   * or for a specified timeout and commits or rolls back the transaction.
+   * @return
+   */
+  override def call(): Void = {
+    populateEvents()
+    processAckOrNack()
+    null
+  }
+
+  private[sink] def countDownWhenBatchAcked(latch: CountDownLatch) {
+    testLatch = latch
+    isTest = true
+  }
+}
diff --git a/external/flume-sink/src/test/resources/log4j.properties b/external/flume-sink/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..4411d6e20c52a
--- /dev/null
+++ b/external/flume-sink/src/test/resources/log4j.properties
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file streaming/target/unit-tests.log
+log4j.rootCategory=INFO, file
+# log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+
diff --git a/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala b/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala
new file mode 100644
index 0000000000000..650b2fbe1c142
--- /dev/null
+++ b/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.flume.sink
+
+import java.net.InetSocketAddress
+import java.util.concurrent.atomic.AtomicInteger
+import java.util.concurrent.{TimeUnit, CountDownLatch, Executors}
+
+import scala.collection.JavaConversions._
+import scala.concurrent.{ExecutionContext, Future}
+import scala.util.{Failure, Success}
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder
+import org.apache.avro.ipc.NettyTransceiver
+import org.apache.avro.ipc.specific.SpecificRequestor
+import org.apache.flume.Context
+import org.apache.flume.channel.MemoryChannel
+import org.apache.flume.event.EventBuilder
+import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory
+import org.scalatest.FunSuite
+
+class SparkSinkSuite extends FunSuite {
+  val eventsPerBatch = 1000
+  val channelCapacity = 5000
+
+  test("Success with ack") {
+    val (channel, sink, latch) = initializeChannelAndSink()
+    channel.start()
+    sink.start()
+
+    putEvents(channel, eventsPerBatch)
+
+    val port = sink.getPort
+    val address = new InetSocketAddress("0.0.0.0", port)
+
+    val (transceiver, client) = getTransceiverAndClient(address, 1)(0)
+    val events = client.getEventBatch(1000)
+    client.ack(events.getSequenceNumber)
+    assert(events.getEvents.size() === 1000)
+    latch.await(1, TimeUnit.SECONDS)
+    assertChannelIsEmpty(channel)
+    sink.stop()
+    channel.stop()
+    transceiver.close()
+  }
+
+  test("Failure with nack") {
+    val (channel, sink, latch) = initializeChannelAndSink()
+    channel.start()
+    sink.start()
+    putEvents(channel, eventsPerBatch)
+
+    val port = sink.getPort
+    val address = new InetSocketAddress("0.0.0.0", port)
+
+    val (transceiver, client) = getTransceiverAndClient(address, 1)(0)
+    val events = client.getEventBatch(1000)
+    assert(events.getEvents.size() === 1000)
+    client.nack(events.getSequenceNumber)
+    latch.await(1, TimeUnit.SECONDS)
+    assert(availableChannelSlots(channel) === 4000)
+    sink.stop()
+    channel.stop()
+    transceiver.close()
+  }
+
+  test("Failure with timeout") {
+    val (channel, sink, latch) = initializeChannelAndSink(Map(SparkSinkConfig
+      .CONF_TRANSACTION_TIMEOUT -> 1.toString))
+    channel.start()
+    sink.start()
+    putEvents(channel, eventsPerBatch)
+    val port = sink.getPort
+    val address = new InetSocketAddress("0.0.0.0", port)
+
+    val (transceiver, client) = getTransceiverAndClient(address, 1)(0)
+    val events = client.getEventBatch(1000)
+    assert(events.getEvents.size() === 1000)
+    latch.await(1, TimeUnit.SECONDS)
+    assert(availableChannelSlots(channel) === 4000)
+    sink.stop()
+    channel.stop()
+    transceiver.close()
+  }
+
+  test("Multiple consumers") {
+    testMultipleConsumers(failSome = false)
+  }
+
+  test("Multiple consumers with some failures") {
+    testMultipleConsumers(failSome = true)
+  }
+
+  def testMultipleConsumers(failSome: Boolean): Unit = {
+    implicit val executorContext = ExecutionContext
+      .fromExecutorService(Executors.newFixedThreadPool(5))
+    val (channel, sink, latch) = initializeChannelAndSink(Map.empty, 5)
+    channel.start()
+    sink.start()
+    (1 to 5).foreach(_ => putEvents(channel, eventsPerBatch))
+    val port = sink.getPort
+    val address = new InetSocketAddress("0.0.0.0", port)
+    val transceiversAndClients = getTransceiverAndClient(address, 5)
+    val batchCounter = new CountDownLatch(5)
+    val counter = new AtomicInteger(0)
+    transceiversAndClients.foreach(x => {
+      Future {
+        val client = x._2
+        val events = client.getEventBatch(1000)
+        if (!failSome || counter.getAndIncrement() % 2 == 0) {
+          client.ack(events.getSequenceNumber)
+        } else {
+          client.nack(events.getSequenceNumber)
+          throw new RuntimeException("Sending NACK for failure!")
+        }
+        events
+      }.onComplete {
+        case Success(events) =>
+          assert(events.getEvents.size() === 1000)
+          batchCounter.countDown()
+        case Failure(t) =>
+          // Don't re-throw the exception, causes a nasty unnecessary stack trace on stdout
+          batchCounter.countDown()
+      }
+    })
+    batchCounter.await()
+    latch.await(1, TimeUnit.SECONDS)
+    executorContext.shutdown()
+    if(failSome) {
+      assert(availableChannelSlots(channel) === 3000)
+    } else {
+      assertChannelIsEmpty(channel)
+    }
+    sink.stop()
+    channel.stop()
+    transceiversAndClients.foreach(x => x._1.close())
+  }
+
+  private def initializeChannelAndSink(overrides: Map[String, String] = Map.empty,
+    batchCounter: Int = 1): (MemoryChannel, SparkSink, CountDownLatch) = {
+    val channel = new MemoryChannel()
+    val channelContext = new Context()
+
+    channelContext.put("capacity", channelCapacity.toString)
+    channelContext.put("transactionCapacity", 1000.toString)
+    channelContext.put("keep-alive", 0.toString)
+    channelContext.putAll(overrides)
+    channel.setName(scala.util.Random.nextString(10))
+    channel.configure(channelContext)
+
+    val sink = new SparkSink()
+    val sinkContext = new Context()
+    sinkContext.put(SparkSinkConfig.CONF_HOSTNAME, "0.0.0.0")
+    sinkContext.put(SparkSinkConfig.CONF_PORT, 0.toString)
+    sink.configure(sinkContext)
+    sink.setChannel(channel)
+    val latch = new CountDownLatch(batchCounter)
+    sink.countdownWhenBatchReceived(latch)
+    (channel, sink, latch)
+  }
+
+  private def putEvents(ch: MemoryChannel, count: Int): Unit = {
+    val tx = ch.getTransaction
+    tx.begin()
+    (1 to count).foreach(x => ch.put(EventBuilder.withBody(x.toString.getBytes)))
+    tx.commit()
+    tx.close()
+  }
+
+  private def getTransceiverAndClient(address: InetSocketAddress,
+    count: Int): Seq[(NettyTransceiver, SparkFlumeProtocol.Callback)] = {
+
+    (1 to count).map(_ => {
+      lazy val channelFactoryExecutor =
+        Executors.newCachedThreadPool(new ThreadFactoryBuilder().setDaemon(true).
+          setNameFormat("Flume Receiver Channel Thread - %d").build())
+      lazy val channelFactory =
+        new NioClientSocketChannelFactory(channelFactoryExecutor, channelFactoryExecutor)
+      val transceiver = new NettyTransceiver(address, channelFactory)
+      val client = SpecificRequestor.getClient(classOf[SparkFlumeProtocol.Callback], transceiver)
+      (transceiver, client)
+    })
+  }
+
+  private def assertChannelIsEmpty(channel: MemoryChannel): Unit = {
+    assert(availableChannelSlots(channel) === channelCapacity)
+  }
+
+  private def availableChannelSlots(channel: MemoryChannel): Int = {
+    val queueRemaining = channel.getClass.getDeclaredField("queueRemaining")
+    queueRemaining.setAccessible(true)
+    val m = queueRemaining.get(channel).getClass.getDeclaredMethod("availablePermits")
+    m.invoke(queueRemaining.get(channel)).asInstanceOf[Int]
+  }
+}
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index c1f581967777b..7d31e32283d88 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,12 +21,15 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-streaming-flume_2.10</artifactId>
+  <properties>
+    <sbt.project.name>streaming-flume</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project External Flume</name>
   <url>http://spark.apache.org/</url>
@@ -37,6 +40,11 @@
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming-flume-sink_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
@@ -47,7 +55,7 @@
     <dependency>
       <groupId>org.apache.flume</groupId>
       <artifactId>flume-ng-sdk</artifactId>
-      <version>1.4.0</version>
+      <version>${flume.version}</version>
       <exclusions>
         <exclusion>
           <groupId>io.netty</groupId>
@@ -69,6 +77,11 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/EventTransformer.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/EventTransformer.scala
new file mode 100644
index 0000000000000..dc629df4f4ac2
--- /dev/null
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/EventTransformer.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.flume
+
+import java.io.{ObjectOutput, ObjectInput}
+
+import scala.collection.JavaConversions._
+
+import org.apache.spark.util.Utils
+import org.apache.spark.Logging
+
+/**
+ * A simple object that provides the implementation of readExternal and writeExternal for both
+ * the wrapper classes for Flume-style Events.
+ */
+private[streaming] object EventTransformer extends Logging {
+  def readExternal(in: ObjectInput): (java.util.HashMap[CharSequence, CharSequence],
+    Array[Byte]) = {
+    val bodyLength = in.readInt()
+    val bodyBuff = new Array[Byte](bodyLength)
+    in.readFully(bodyBuff)
+
+    val numHeaders = in.readInt()
+    val headers = new java.util.HashMap[CharSequence, CharSequence]
+
+    for (i <- 0 until numHeaders) {
+      val keyLength = in.readInt()
+      val keyBuff = new Array[Byte](keyLength)
+      in.readFully(keyBuff)
+      val key: String = Utils.deserialize(keyBuff)
+
+      val valLength = in.readInt()
+      val valBuff = new Array[Byte](valLength)
+      in.readFully(valBuff)
+      val value: String = Utils.deserialize(valBuff)
+
+      headers.put(key, value)
+    }
+    (headers, bodyBuff)
+  }
+
+  def writeExternal(out: ObjectOutput, headers: java.util.Map[CharSequence, CharSequence],
+    body: Array[Byte]) {
+    out.writeInt(body.length)
+    out.write(body)
+    val numHeaders = headers.size()
+    out.writeInt(numHeaders)
+    for ((k,v) <- headers) {
+      val keyBuff = Utils.serialize(k.toString)
+      out.writeInt(keyBuff.length)
+      out.write(keyBuff)
+      val valBuff = Utils.serialize(v.toString)
+      out.writeInt(valBuff.length)
+      out.write(valBuff)
+    }
+  }
+}
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
new file mode 100644
index 0000000000000..88cc2aa3bf022
--- /dev/null
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.flume
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.ArrayBuffer
+
+import com.google.common.base.Throwables
+
+import org.apache.spark.Logging
+import org.apache.spark.streaming.flume.sink._
+
+/**
+ * This class implements the core functionality of [[FlumePollingReceiver]]. When started it
+ * pulls data from Flume, stores it to Spark and then sends an Ack or Nack. This class should be
+ * run via an [[java.util.concurrent.Executor]] as this implements [[Runnable]]
+ *
+ * @param receiver The receiver that owns this instance.
+ */
+
+private[flume] class FlumeBatchFetcher(receiver: FlumePollingReceiver) extends Runnable with
+  Logging {
+
+  def run(): Unit = {
+    while (!receiver.isStopped()) {
+      val connection = receiver.getConnections.poll()
+      val client = connection.client
+      var batchReceived = false
+      var seq: CharSequence = null
+      try {
+        getBatch(client) match {
+          case Some(eventBatch) =>
+            batchReceived = true
+            seq = eventBatch.getSequenceNumber
+            val events = toSparkFlumeEvents(eventBatch.getEvents)
+            if (store(events)) {
+              sendAck(client, seq)
+            } else {
+              sendNack(batchReceived, client, seq)
+            }
+          case None =>
+        }
+      } catch {
+        case e: Exception =>
+          Throwables.getRootCause(e) match {
+            // If the cause was an InterruptedException, then check if the receiver is stopped -
+            // if yes, just break out of the loop. Else send a Nack and log a warning.
+            // In the unlikely case, the cause was not an Exception,
+            // then just throw it out and exit.
+            case interrupted: InterruptedException =>
+              if (!receiver.isStopped()) {
+                logWarning("Interrupted while receiving data from Flume", interrupted)
+                sendNack(batchReceived, client, seq)
+              }
+            case exception: Exception =>
+              logWarning("Error while receiving data from Flume", exception)
+              sendNack(batchReceived, client, seq)
+          }
+      } finally {
+        receiver.getConnections.add(connection)
+      }
+    }
+  }
+
+  /**
+   * Gets a batch of events from the specified client. This method does not handle any exceptions
+   * which will be propogated to the caller.
+   * @param client Client to get events from
+   * @return [[Some]] which contains the event batch if Flume sent any events back, else [[None]]
+   */
+  private def getBatch(client: SparkFlumeProtocol.Callback): Option[EventBatch] = {
+    val eventBatch = client.getEventBatch(receiver.getMaxBatchSize)
+    if (!SparkSinkUtils.isErrorBatch(eventBatch)) {
+      // No error, proceed with processing data
+      logDebug(s"Received batch of ${eventBatch.getEvents.size} events with sequence " +
+        s"number: ${eventBatch.getSequenceNumber}")
+      Some(eventBatch)
+    } else {
+      logWarning("Did not receive events from Flume agent due to error on the Flume agent: " +
+        eventBatch.getErrorMsg)
+      None
+    }
+  }
+
+  /**
+   * Store the events in the buffer to Spark. This method will not propogate any exceptions,
+   * but will propogate any other errors.
+   * @param buffer The buffer to store
+   * @return true if the data was stored without any exception being thrown, else false
+   */
+  private def store(buffer: ArrayBuffer[SparkFlumeEvent]): Boolean = {
+    try {
+      receiver.store(buffer)
+      true
+    } catch {
+      case e: Exception =>
+        logWarning("Error while attempting to store data received from Flume", e)
+        false
+    }
+  }
+
+  /**
+   * Send an ack to the client for the sequence number. This method does not handle any exceptions
+   * which will be propagated to the caller.
+   * @param client client to send the ack to
+   * @param seq sequence number of the batch to be ack-ed.
+   * @return
+   */
+  private def sendAck(client: SparkFlumeProtocol.Callback, seq: CharSequence): Unit = {
+    logDebug("Sending ack for sequence number: " + seq)
+    client.ack(seq)
+    logDebug("Ack sent for sequence number: " + seq)
+  }
+
+  /**
+   * This method sends a Nack if a batch was received to the client with the given sequence
+   * number. Any exceptions thrown by the RPC call is simply thrown out as is - no effort is made
+   * to handle it.
+   * @param batchReceived true if a batch was received. If this is false, no nack is sent
+   * @param client The client to which the nack should be sent
+   * @param seq The sequence number of the batch that is being nack-ed.
+   */
+  private def sendNack(batchReceived: Boolean, client: SparkFlumeProtocol.Callback,
+    seq: CharSequence): Unit = {
+    if (batchReceived) {
+      // Let Flume know that the events need to be pushed back into the channel.
+      logDebug("Sending nack for sequence number: " + seq)
+      client.nack(seq) // If the agent is down, even this could fail and throw
+      logDebug("Nack sent for sequence number: " + seq)
+    }
+  }
+
+  /**
+   * Utility method to convert [[SparkSinkEvent]]s to [[SparkFlumeEvent]]s
+   * @param events - Events to convert to SparkFlumeEvents
+   * @return - The SparkFlumeEvent generated from SparkSinkEvent
+   */
+  private def toSparkFlumeEvents(events: java.util.List[SparkSinkEvent]):
+    ArrayBuffer[SparkFlumeEvent] = {
+    // Convert each Flume event to a serializable SparkFlumeEvent
+    val buffer = new ArrayBuffer[SparkFlumeEvent](events.size())
+    var j = 0
+    while (j < events.size()) {
+      val event = events(j)
+      val sparkFlumeEvent = new SparkFlumeEvent()
+      sparkFlumeEvent.event.setBody(event.getBody)
+      sparkFlumeEvent.event.setHeaders(event.getHeaders)
+      buffer += sparkFlumeEvent
+      j += 1
+    }
+    buffer
+  }
+}
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
index ed35e34ad45ab..2de2a7926bfd1 100644
--- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
@@ -20,6 +20,7 @@ package org.apache.spark.streaming.flume
 import java.net.InetSocketAddress
 import java.io.{ObjectInput, ObjectOutput, Externalizable}
 import java.nio.ByteBuffer
+import java.util.concurrent.Executors
 
 import scala.collection.JavaConversions._
 import scala.reflect.ClassTag
@@ -29,24 +30,29 @@ import org.apache.flume.source.avro.AvroFlumeEvent
 import org.apache.flume.source.avro.Status
 import org.apache.avro.ipc.specific.SpecificResponder
 import org.apache.avro.ipc.NettyServer
-
+import org.apache.spark.Logging
 import org.apache.spark.util.Utils
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.streaming.dstream._
-import org.apache.spark.Logging
+import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.streaming.receiver.Receiver
 
+import org.jboss.netty.channel.ChannelPipelineFactory
+import org.jboss.netty.channel.Channels
+import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory
+import org.jboss.netty.handler.codec.compression._
+
 private[streaming]
 class FlumeInputDStream[T: ClassTag](
   @transient ssc_ : StreamingContext,
   host: String,
   port: Int,
-  storageLevel: StorageLevel
+  storageLevel: StorageLevel,
+  enableDecompression: Boolean
 ) extends ReceiverInputDStream[SparkFlumeEvent](ssc_) {
 
   override def getReceiver(): Receiver[SparkFlumeEvent] = {
-    new FlumeReceiver(host, port, storageLevel)
+    new FlumeReceiver(host, port, storageLevel, enableDecompression)
   }
 }
 
@@ -60,7 +66,7 @@ class SparkFlumeEvent() extends Externalizable {
   var event : AvroFlumeEvent = new AvroFlumeEvent()
 
   /* De-serialize from bytes. */
-  def readExternal(in: ObjectInput) {
+  def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     val bodyLength = in.readInt()
     val bodyBuff = new Array[Byte](bodyLength)
     in.readFully(bodyBuff)
@@ -87,7 +93,7 @@ class SparkFlumeEvent() extends Externalizable {
   }
 
   /* Serialize to bytes. */
-  def writeExternal(out: ObjectOutput) {
+  def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     val body = event.getBody.array()
     out.writeInt(body.length)
     out.write(body)
@@ -134,22 +140,71 @@ private[streaming]
 class FlumeReceiver(
     host: String,
     port: Int,
-    storageLevel: StorageLevel
+    storageLevel: StorageLevel,
+    enableDecompression: Boolean
   ) extends Receiver[SparkFlumeEvent](storageLevel) with Logging {
 
   lazy val responder = new SpecificResponder(
     classOf[AvroSourceProtocol], new FlumeEventServer(this))
-  lazy val server = new NettyServer(responder, new InetSocketAddress(host, port))
+  var server: NettyServer = null
+
+  private def initServer() = {
+    if (enableDecompression) {
+      val channelFactory = new NioServerSocketChannelFactory(Executors.newCachedThreadPool(),
+                                                             Executors.newCachedThreadPool())
+      val channelPipelineFactory = new CompressionChannelPipelineFactory()
+      
+      new NettyServer(
+        responder, 
+        new InetSocketAddress(host, port),
+        channelFactory,
+        channelPipelineFactory,
+        null)
+    } else {
+      new NettyServer(responder, new InetSocketAddress(host, port))
+    }
+  }
 
   def onStart() {
-    server.start()
+    synchronized {
+      if (server == null) {
+        server = initServer()
+        server.start()
+      } else {
+        logWarning("Flume receiver being asked to start more then once with out close")
+      }
+    }
     logInfo("Flume receiver started")
   }
 
   def onStop() {
-    server.close()
+    synchronized {
+      if (server != null) {
+        server.close()
+        server = null
+      }
+    }
     logInfo("Flume receiver stopped")
   }
 
   override def preferredLocation = Some(host)
+  
+  /** A Netty Pipeline factory that will decompress incoming data from 
+    * and the Netty client and compress data going back to the client.
+    *
+    * The compression on the return is required because Flume requires
+    * a successful response to indicate it can remove the event/batch 
+    * from the configured channel 
+    */
+  private[streaming]
+  class CompressionChannelPipelineFactory extends ChannelPipelineFactory {
+
+    def getPipeline() = {
+      val pipeline = Channels.pipeline()
+      val encoder = new ZlibEncoder(6)
+      pipeline.addFirst("deflater", encoder)
+      pipeline.addFirst("inflater", new ZlibDecoder())
+      pipeline
+  }
+}
 }
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala
new file mode 100644
index 0000000000000..92fa5b41be89e
--- /dev/null
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.flume
+
+
+import java.net.InetSocketAddress
+import java.util.concurrent.{LinkedBlockingQueue, Executors}
+
+import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder
+import org.apache.avro.ipc.NettyTransceiver
+import org.apache.avro.ipc.specific.SpecificRequestor
+import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory
+
+import org.apache.spark.Logging
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import org.apache.spark.streaming.receiver.Receiver
+import org.apache.spark.streaming.flume.sink._
+
+/**
+ * A [[ReceiverInputDStream]] that can be used to read data from several Flume agents running
+ * [[org.apache.spark.streaming.flume.sink.SparkSink]]s.
+ * @param _ssc Streaming context that will execute this input stream
+ * @param addresses List of addresses at which SparkSinks are listening
+ * @param maxBatchSize Maximum size of a batch
+ * @param parallelism Number of parallel connections to open
+ * @param storageLevel The storage level to use.
+ * @tparam T Class type of the object of this stream
+ */
+private[streaming] class FlumePollingInputDStream[T: ClassTag](
+    @transient _ssc: StreamingContext,
+    val addresses: Seq[InetSocketAddress],
+    val maxBatchSize: Int,
+    val parallelism: Int,
+    storageLevel: StorageLevel
+  ) extends ReceiverInputDStream[SparkFlumeEvent](_ssc) {
+
+  override def getReceiver(): Receiver[SparkFlumeEvent] = {
+    new FlumePollingReceiver(addresses, maxBatchSize, parallelism, storageLevel)
+  }
+}
+
+private[streaming] class FlumePollingReceiver(
+    addresses: Seq[InetSocketAddress],
+    maxBatchSize: Int,
+    parallelism: Int,
+    storageLevel: StorageLevel
+  ) extends Receiver[SparkFlumeEvent](storageLevel) with Logging {
+
+  lazy val channelFactoryExecutor =
+    Executors.newCachedThreadPool(new ThreadFactoryBuilder().setDaemon(true).
+      setNameFormat("Flume Receiver Channel Thread - %d").build())
+
+  lazy val channelFactory =
+    new NioClientSocketChannelFactory(channelFactoryExecutor, channelFactoryExecutor)
+
+  lazy val receiverExecutor = Executors.newFixedThreadPool(parallelism,
+    new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Flume Receiver Thread - %d").build())
+
+  private lazy val connections = new LinkedBlockingQueue[FlumeConnection]()
+
+  override def onStart(): Unit = {
+    // Create the connections to each Flume agent.
+    addresses.foreach(host => {
+      val transceiver = new NettyTransceiver(host, channelFactory)
+      val client = SpecificRequestor.getClient(classOf[SparkFlumeProtocol.Callback], transceiver)
+      connections.add(new FlumeConnection(transceiver, client))
+    })
+    for (i <- 0 until parallelism) {
+      logInfo("Starting Flume Polling Receiver worker threads..")
+      // Threads that pull data from Flume.
+      receiverExecutor.submit(new FlumeBatchFetcher(this))
+    }
+  }
+
+  override def onStop(): Unit = {
+    logInfo("Shutting down Flume Polling Receiver")
+    receiverExecutor.shutdownNow()
+    connections.foreach(connection => {
+      connection.transceiver.close()
+    })
+    channelFactory.releaseExternalResources()
+  }
+
+  private[flume] def getConnections: LinkedBlockingQueue[FlumeConnection] = {
+    this.connections
+  }
+
+  private[flume] def getMaxBatchSize: Int = {
+    this.maxBatchSize
+  }
+}
+
+/**
+ * A wrapper around the transceiver and the Avro IPC API. 
+ * @param transceiver The transceiver to use for communication with Flume
+ * @param client The client that the callbacks are received on.
+ */
+private[flume] class FlumeConnection(val transceiver: NettyTransceiver,
+  val client: SparkFlumeProtocol.Callback)
+
+
+
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeUtils.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeUtils.scala
index 499f3560ef768..4b732c1592ab2 100644
--- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeUtils.scala
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeUtils.scala
@@ -17,12 +17,19 @@
 
 package org.apache.spark.streaming.flume
 
+import java.net.InetSocketAddress
+
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContext
-import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaInputDStream, JavaStreamingContext, JavaDStream}
-import org.apache.spark.streaming.dstream.{ReceiverInputDStream, DStream}
+import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext}
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+
 
 object FlumeUtils {
+  private val DEFAULT_POLLING_PARALLELISM = 5
+  private val DEFAULT_POLLING_BATCH_SIZE = 1000
+
   /**
    * Create a input stream from a Flume source.
    * @param ssc      StreamingContext object
@@ -36,7 +43,27 @@ object FlumeUtils {
       port: Int,
       storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
     ): ReceiverInputDStream[SparkFlumeEvent] = {
-    val inputStream = new FlumeInputDStream[SparkFlumeEvent](ssc, hostname, port, storageLevel)
+    createStream(ssc, hostname, port, storageLevel, false)
+  }
+
+  /**
+   * Create a input stream from a Flume source.
+   * @param ssc      StreamingContext object
+   * @param hostname Hostname of the slave machine to which the flume data will be sent
+   * @param port     Port of the slave machine to which the flume data will be sent
+   * @param storageLevel  Storage level to use for storing the received objects
+   * @param enableDecompression  should netty server decompress input stream
+   */
+  def createStream (
+      ssc: StreamingContext,
+      hostname: String,
+      port: Int,
+      storageLevel: StorageLevel,
+      enableDecompression: Boolean
+    ): ReceiverInputDStream[SparkFlumeEvent] = {
+    val inputStream = new FlumeInputDStream[SparkFlumeEvent](
+        ssc, hostname, port, storageLevel, enableDecompression)
+
     inputStream
   }
 
@@ -66,6 +93,154 @@ object FlumeUtils {
       port: Int,
       storageLevel: StorageLevel
     ): JavaReceiverInputDStream[SparkFlumeEvent] = {
-    createStream(jssc.ssc, hostname, port, storageLevel)
+    createStream(jssc.ssc, hostname, port, storageLevel, false)
+  }
+
+  /**
+   * Creates a input stream from a Flume source.
+   * @param hostname Hostname of the slave machine to which the flume data will be sent
+   * @param port     Port of the slave machine to which the flume data will be sent
+   * @param storageLevel  Storage level to use for storing the received objects
+   * @param enableDecompression  should netty server decompress input stream
+   */
+  def createStream(
+      jssc: JavaStreamingContext,
+      hostname: String,
+      port: Int,
+      storageLevel: StorageLevel,
+      enableDecompression: Boolean
+    ): JavaReceiverInputDStream[SparkFlumeEvent] = {
+    createStream(jssc.ssc, hostname, port, storageLevel, enableDecompression)
+  }
+
+  /**
+   * Creates an input stream that is to be used with the Spark Sink deployed on a Flume agent.
+   * This stream will poll the sink for data and will pull events as they are available.
+   * This stream will use a batch size of 1000 events and run 5 threads to pull data.
+   * @param hostname Address of the host on which the Spark Sink is running
+   * @param port Port of the host at which the Spark Sink is listening
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  @Experimental
+  def createPollingStream(
+      ssc: StreamingContext,
+      hostname: String,
+      port: Int,
+      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
+    ): ReceiverInputDStream[SparkFlumeEvent] = {
+    createPollingStream(ssc, Seq(new InetSocketAddress(hostname, port)), storageLevel)
+  }
+
+  /**
+   * Creates an input stream that is to be used with the Spark Sink deployed on a Flume agent.
+   * This stream will poll the sink for data and will pull events as they are available.
+   * This stream will use a batch size of 1000 events and run 5 threads to pull data.
+   * @param addresses List of InetSocketAddresses representing the hosts to connect to.
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  @Experimental
+  def createPollingStream(
+      ssc: StreamingContext,
+      addresses: Seq[InetSocketAddress],
+      storageLevel: StorageLevel
+    ): ReceiverInputDStream[SparkFlumeEvent] = {
+    createPollingStream(ssc, addresses, storageLevel,
+      DEFAULT_POLLING_BATCH_SIZE, DEFAULT_POLLING_PARALLELISM)
+  }
+
+  /**
+   * Creates an input stream that is to be used with the Spark Sink deployed on a Flume agent.
+   * This stream will poll the sink for data and will pull events as they are available.
+   * @param addresses List of InetSocketAddresses representing the hosts to connect to.
+   * @param maxBatchSize Maximum number of events to be pulled from the Spark sink in a
+   *                     single RPC call
+   * @param parallelism Number of concurrent requests this stream should send to the sink. Note
+   *                    that having a higher number of requests concurrently being pulled will
+   *                    result in this stream using more threads
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  @Experimental
+  def createPollingStream(
+      ssc: StreamingContext,
+      addresses: Seq[InetSocketAddress],
+      storageLevel: StorageLevel,
+      maxBatchSize: Int,
+      parallelism: Int
+    ): ReceiverInputDStream[SparkFlumeEvent] = {
+    new FlumePollingInputDStream[SparkFlumeEvent](ssc, addresses, maxBatchSize,
+      parallelism, storageLevel)
+  }
+
+  /**
+   * Creates an input stream that is to be used with the Spark Sink deployed on a Flume agent.
+   * This stream will poll the sink for data and will pull events as they are available.
+   * This stream will use a batch size of 1000 events and run 5 threads to pull data.
+   * @param hostname Hostname of the host on which the Spark Sink is running
+   * @param port     Port of the host at which the Spark Sink is listening
+   */
+  @Experimental
+  def createPollingStream(
+      jssc: JavaStreamingContext,
+      hostname: String,
+      port: Int
+    ): JavaReceiverInputDStream[SparkFlumeEvent] = {
+    createPollingStream(jssc, hostname, port, StorageLevel.MEMORY_AND_DISK_SER_2)
+  }
+
+  /**
+   * Creates an input stream that is to be used with the Spark Sink deployed on a Flume agent.
+   * This stream will poll the sink for data and will pull events as they are available.
+   * This stream will use a batch size of 1000 events and run 5 threads to pull data.
+   * @param hostname     Hostname of the host on which the Spark Sink is running
+   * @param port         Port of the host at which the Spark Sink is listening
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  @Experimental
+  def createPollingStream(
+      jssc: JavaStreamingContext,
+      hostname: String,
+      port: Int,
+      storageLevel: StorageLevel
+    ): JavaReceiverInputDStream[SparkFlumeEvent] = {
+    createPollingStream(jssc, Array(new InetSocketAddress(hostname, port)), storageLevel)
+  }
+
+  /**
+   * Creates an input stream that is to be used with the Spark Sink deployed on a Flume agent.
+   * This stream will poll the sink for data and will pull events as they are available.
+   * This stream will use a batch size of 1000 events and run 5 threads to pull data.
+   * @param addresses    List of InetSocketAddresses on which the Spark Sink is running.
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  @Experimental
+  def createPollingStream(
+      jssc: JavaStreamingContext,
+      addresses: Array[InetSocketAddress],
+      storageLevel: StorageLevel
+    ): JavaReceiverInputDStream[SparkFlumeEvent] = {
+    createPollingStream(jssc, addresses, storageLevel,
+      DEFAULT_POLLING_BATCH_SIZE, DEFAULT_POLLING_PARALLELISM)
+  }
+
+  /**
+   * Creates an input stream that is to be used with the Spark Sink deployed on a Flume agent.
+   * This stream will poll the sink for data and will pull events as they are available.
+   * @param addresses    List of InetSocketAddresses on which the Spark Sink is running
+   * @param maxBatchSize The maximum number of events to be pulled from the Spark sink in a
+   *                     single RPC call
+   * @param parallelism  Number of concurrent requests this stream should send to the sink. Note
+   *                     that having a higher number of requests concurrently being pulled will
+   *                     result in this stream using more threads
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  @Experimental
+  def createPollingStream(
+      jssc: JavaStreamingContext,
+      addresses: Array[InetSocketAddress],
+      storageLevel: StorageLevel,
+      maxBatchSize: Int,
+      parallelism: Int
+    ): JavaReceiverInputDStream[SparkFlumeEvent] = {
+    createPollingStream(jssc.ssc, addresses, storageLevel, maxBatchSize, parallelism)
   }
 }
diff --git a/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumePollingStreamSuite.java b/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumePollingStreamSuite.java
new file mode 100644
index 0000000000000..79c5b91654b42
--- /dev/null
+++ b/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumePollingStreamSuite.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.flume;
+
+import java.net.InetSocketAddress;
+
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.LocalJavaStreamingContext;
+
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
+import org.junit.Test;
+
+public class JavaFlumePollingStreamSuite extends LocalJavaStreamingContext {
+  @Test
+  public void testFlumeStream() {
+    // tests the API, does not actually test data receiving
+    InetSocketAddress[] addresses = new InetSocketAddress[] {
+        new InetSocketAddress("localhost", 12345)
+    };
+    JavaReceiverInputDStream<SparkFlumeEvent> test1 =
+        FlumeUtils.createPollingStream(ssc, "localhost", 12345);
+    JavaReceiverInputDStream<SparkFlumeEvent> test2 = FlumeUtils.createPollingStream(
+        ssc, "localhost", 12345, StorageLevel.MEMORY_AND_DISK_SER_2());
+    JavaReceiverInputDStream<SparkFlumeEvent> test3 = FlumeUtils.createPollingStream(
+        ssc, addresses, StorageLevel.MEMORY_AND_DISK_SER_2());
+    JavaReceiverInputDStream<SparkFlumeEvent> test4 = FlumeUtils.createPollingStream(
+        ssc, addresses, StorageLevel.MEMORY_AND_DISK_SER_2(), 100, 5);
+  }
+}
diff --git a/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumeStreamSuite.java b/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumeStreamSuite.java
index e0ad4f1015205..3b5e0c7746b2c 100644
--- a/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumeStreamSuite.java
+++ b/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumeStreamSuite.java
@@ -30,5 +30,7 @@ public void testFlumeStream() {
     JavaReceiverInputDStream<SparkFlumeEvent> test1 = FlumeUtils.createStream(ssc, "localhost", 12345);
     JavaReceiverInputDStream<SparkFlumeEvent> test2 = FlumeUtils.createStream(ssc, "localhost", 12345,
       StorageLevel.MEMORY_AND_DISK_SER_2());
+    JavaReceiverInputDStream<SparkFlumeEvent> test3 = FlumeUtils.createStream(ssc, "localhost", 12345,
+      StorageLevel.MEMORY_AND_DISK_SER_2(), false);
   }
 }
diff --git a/external/flume/src/test/resources/log4j.properties b/external/flume/src/test/resources/log4j.properties
index 45d2ec676df66..4411d6e20c52a 100644
--- a/external/flume/src/test/resources/log4j.properties
+++ b/external/flume/src/test/resources/log4j.properties
@@ -22,7 +22,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
new file mode 100644
index 0000000000000..475026e8eb140
--- /dev/null
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming.flume
+
+import java.net.InetSocketAddress
+import java.util.concurrent.{Callable, ExecutorCompletionService, Executors}
+import java.util.Random
+
+import org.apache.spark.TestUtils
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
+
+import org.apache.flume.Context
+import org.apache.flume.channel.MemoryChannel
+import org.apache.flume.conf.Configurables
+import org.apache.flume.event.EventBuilder
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.streaming.{TestSuiteBase, TestOutputStream, StreamingContext}
+import org.apache.spark.streaming.flume.sink._
+import org.apache.spark.util.Utils
+
+class FlumePollingStreamSuite extends TestSuiteBase {
+
+  val batchCount = 5
+  val eventsPerBatch = 100
+  val totalEventsPerChannel = batchCount * eventsPerBatch
+  val channelCapacity = 5000
+  val maxAttempts = 5
+
+  test("flume polling test") {
+    testMultipleTimes(testFlumePolling)
+  }
+
+  test("flume polling test multiple hosts") {
+    testMultipleTimes(testFlumePollingMultipleHost)
+  }
+
+  /**
+   * Run the given test until no more java.net.BindException's are thrown.
+   * Do this only up to a certain attempt limit.
+   */
+  private def testMultipleTimes(test: () => Unit): Unit = {
+    var testPassed = false
+    var attempt = 0
+    while (!testPassed && attempt < maxAttempts) {
+      try {
+        test()
+        testPassed = true
+      } catch {
+        case e: Exception if Utils.isBindCollision(e) =>
+          logWarning("Exception when running flume polling test: " + e)
+          attempt += 1
+      }
+    }
+    assert(testPassed, s"Test failed after $attempt attempts!")
+  }
+
+  private def testFlumePolling(): Unit = {
+    // Start the channel and sink.
+    val context = new Context()
+    context.put("capacity", channelCapacity.toString)
+    context.put("transactionCapacity", "1000")
+    context.put("keep-alive", "0")
+    val channel = new MemoryChannel()
+    Configurables.configure(channel, context)
+
+    val sink = new SparkSink()
+    context.put(SparkSinkConfig.CONF_HOSTNAME, "localhost")
+    context.put(SparkSinkConfig.CONF_PORT, String.valueOf(0))
+    Configurables.configure(sink, context)
+    sink.setChannel(channel)
+    sink.start()
+    // Set up the streaming context and input streams
+    val ssc = new StreamingContext(conf, batchDuration)
+    val flumeStream: ReceiverInputDStream[SparkFlumeEvent] =
+      FlumeUtils.createPollingStream(ssc, Seq(new InetSocketAddress("localhost", sink.getPort())),
+        StorageLevel.MEMORY_AND_DISK, eventsPerBatch, 1)
+    val outputBuffer = new ArrayBuffer[Seq[SparkFlumeEvent]]
+      with SynchronizedBuffer[Seq[SparkFlumeEvent]]
+    val outputStream = new TestOutputStream(flumeStream, outputBuffer)
+    outputStream.register()
+    ssc.start()
+
+    writeAndVerify(Seq(channel), ssc, outputBuffer)
+    assertChannelIsEmpty(channel)
+    sink.stop()
+    channel.stop()
+  }
+
+  private def testFlumePollingMultipleHost(): Unit = {
+    // Start the channel and sink.
+    val context = new Context()
+    context.put("capacity", channelCapacity.toString)
+    context.put("transactionCapacity", "1000")
+    context.put("keep-alive", "0")
+    val channel = new MemoryChannel()
+    Configurables.configure(channel, context)
+
+    val channel2 = new MemoryChannel()
+    Configurables.configure(channel2, context)
+
+    val sink = new SparkSink()
+    context.put(SparkSinkConfig.CONF_HOSTNAME, "localhost")
+    context.put(SparkSinkConfig.CONF_PORT, String.valueOf(0))
+    Configurables.configure(sink, context)
+    sink.setChannel(channel)
+    sink.start()
+
+    val sink2 = new SparkSink()
+    context.put(SparkSinkConfig.CONF_HOSTNAME, "localhost")
+    context.put(SparkSinkConfig.CONF_PORT, String.valueOf(0))
+    Configurables.configure(sink2, context)
+    sink2.setChannel(channel2)
+    sink2.start()
+
+    // Set up the streaming context and input streams
+    val ssc = new StreamingContext(conf, batchDuration)
+    val addresses = Seq(sink.getPort(), sink2.getPort()).map(new InetSocketAddress("localhost", _))
+    val flumeStream: ReceiverInputDStream[SparkFlumeEvent] =
+      FlumeUtils.createPollingStream(ssc, addresses, StorageLevel.MEMORY_AND_DISK,
+        eventsPerBatch, 5)
+    val outputBuffer = new ArrayBuffer[Seq[SparkFlumeEvent]]
+      with SynchronizedBuffer[Seq[SparkFlumeEvent]]
+    val outputStream = new TestOutputStream(flumeStream, outputBuffer)
+    outputStream.register()
+
+    ssc.start()
+    try {
+      writeAndVerify(Seq(channel, channel2), ssc, outputBuffer)
+      assertChannelIsEmpty(channel)
+      assertChannelIsEmpty(channel2)
+    } finally {
+      sink.stop()
+      sink2.stop()
+      channel.stop()
+      channel2.stop()
+    }
+  }
+
+  def writeAndVerify(channels: Seq[MemoryChannel], ssc: StreamingContext,
+    outputBuffer: ArrayBuffer[Seq[SparkFlumeEvent]]) {
+    val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+    val executor = Executors.newCachedThreadPool()
+    val executorCompletion = new ExecutorCompletionService[Void](executor)
+    channels.map(channel => {
+      executorCompletion.submit(new TxnSubmitter(channel, clock))
+    })
+    for (i <- 0 until channels.size) {
+      executorCompletion.take()
+    }
+    val startTime = System.currentTimeMillis()
+    while (outputBuffer.size < batchCount * channels.size &&
+      System.currentTimeMillis() - startTime < 15000) {
+      logInfo("output.size = " + outputBuffer.size)
+      Thread.sleep(100)
+    }
+    val timeTaken = System.currentTimeMillis() - startTime
+    assert(timeTaken < 15000, "Operation timed out after " + timeTaken + " ms")
+    logInfo("Stopping context")
+    ssc.stop()
+
+    val flattenedBuffer = outputBuffer.flatten
+    assert(flattenedBuffer.size === totalEventsPerChannel * channels.size)
+    var counter = 0
+    for (k <- 0 until channels.size; i <- 0 until totalEventsPerChannel) {
+      val eventToVerify = EventBuilder.withBody((channels(k).getName + " - " +
+        String.valueOf(i)).getBytes("utf-8"),
+        Map[String, String]("test-" + i.toString -> "header"))
+      var found = false
+      var j = 0
+      while (j < flattenedBuffer.size && !found) {
+        val strToCompare = new String(flattenedBuffer(j).event.getBody.array(), "utf-8")
+        if (new String(eventToVerify.getBody, "utf-8") == strToCompare &&
+          eventToVerify.getHeaders.get("test-" + i.toString)
+            .equals(flattenedBuffer(j).event.getHeaders.get("test-" + i.toString))) {
+          found = true
+          counter += 1
+        }
+        j += 1
+      }
+    }
+    assert(counter === totalEventsPerChannel * channels.size)
+  }
+
+  def assertChannelIsEmpty(channel: MemoryChannel) = {
+    val queueRemaining = channel.getClass.getDeclaredField("queueRemaining")
+    queueRemaining.setAccessible(true)
+    val m = queueRemaining.get(channel).getClass.getDeclaredMethod("availablePermits")
+    assert(m.invoke(queueRemaining.get(channel)).asInstanceOf[Int] === 5000)
+  }
+
+  private class TxnSubmitter(channel: MemoryChannel, clock: ManualClock) extends Callable[Void] {
+    override def call(): Void = {
+      var t = 0
+      for (i <- 0 until batchCount) {
+        val tx = channel.getTransaction
+        tx.begin()
+        for (j <- 0 until eventsPerBatch) {
+          channel.put(EventBuilder.withBody((channel.getName + " - " + String.valueOf(t)).getBytes(
+            "utf-8"),
+            Map[String, String]("test-" + t.toString -> "header")))
+          t += 1
+        }
+        tx.commit()
+        tx.close()
+        Thread.sleep(500) // Allow some time for the events to reach
+        clock.addToTime(batchDuration.milliseconds)
+      }
+      null
+    }
+  }
+}
diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
index dd287d0ef90a0..13943ed5442b9 100644
--- a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
@@ -17,72 +17,146 @@
 
 package org.apache.spark.streaming.flume
 
-import scala.collection.JavaConversions._
-import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
-
-import java.net.InetSocketAddress
+import java.net.{InetSocketAddress, ServerSocket}
 import java.nio.ByteBuffer
 import java.nio.charset.Charset
 
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
+import scala.concurrent.duration._
+import scala.language.postfixOps
+
 import org.apache.avro.ipc.NettyTransceiver
 import org.apache.avro.ipc.specific.SpecificRequestor
+import org.apache.flume.source.avro
 import org.apache.flume.source.avro.{AvroFlumeEvent, AvroSourceProtocol}
+import org.jboss.netty.channel.ChannelPipeline
+import org.jboss.netty.channel.socket.SocketChannel
+import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory
+import org.jboss.netty.handler.codec.compression._
+import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
+import org.scalatest.concurrent.Eventually._
 
+import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.{TestOutputStream, StreamingContext, TestSuiteBase}
-import org.apache.spark.streaming.util.ManualClock
-import org.apache.spark.streaming.api.java.JavaReceiverInputDStream
+import org.apache.spark.streaming.{Milliseconds, StreamingContext, TestOutputStream}
+import org.apache.spark.streaming.scheduler.{StreamingListener, StreamingListenerReceiverStarted}
+import org.apache.spark.util.Utils
 
-class FlumeStreamSuite extends TestSuiteBase {
+class FlumeStreamSuite extends FunSuite with BeforeAndAfter with Matchers with Logging {
+  val conf = new SparkConf().setMaster("local[4]").setAppName("FlumeStreamSuite")
 
-  val testPort = 9999
+  var ssc: StreamingContext = null
+  var transceiver: NettyTransceiver = null
+
+  after {
+    if (ssc != null) {
+      ssc.stop()
+    }
+    if (transceiver != null) {
+      transceiver.close()
+    }
+  }
 
   test("flume input stream") {
-    // Set up the streaming context and input streams
-    val ssc = new StreamingContext(conf, batchDuration)
-    val flumeStream: JavaReceiverInputDStream[SparkFlumeEvent] =
-      FlumeUtils.createStream(ssc, "localhost", testPort, StorageLevel.MEMORY_AND_DISK)
+    testFlumeStream(testCompression = false)
+  }
+
+  test("flume input compressed stream") {
+    testFlumeStream(testCompression = true)
+  }
+
+  /** Run test on flume stream */
+  private def testFlumeStream(testCompression: Boolean): Unit = {
+    val input = (1 to 100).map { _.toString }
+    val testPort = findFreePort()
+    val outputBuffer = startContext(testPort, testCompression)
+    writeAndVerify(input, testPort, outputBuffer, testCompression)
+  }
+
+  /** Find a free port */
+  private def findFreePort(): Int = {
+    Utils.startServiceOnPort(23456, (trialPort: Int) => {
+      val socket = new ServerSocket(trialPort)
+      socket.close()
+      (null, trialPort)
+    })._2
+  }
+
+  /** Setup and start the streaming context */
+  private def startContext(
+      testPort: Int, testCompression: Boolean): (ArrayBuffer[Seq[SparkFlumeEvent]]) = {
+    ssc = new StreamingContext(conf, Milliseconds(200))
+    val flumeStream = FlumeUtils.createStream(
+      ssc, "localhost", testPort, StorageLevel.MEMORY_AND_DISK, testCompression)
     val outputBuffer = new ArrayBuffer[Seq[SparkFlumeEvent]]
       with SynchronizedBuffer[Seq[SparkFlumeEvent]]
-    val outputStream = new TestOutputStream(flumeStream.receiverInputDStream, outputBuffer)
+    val outputStream = new TestOutputStream(flumeStream, outputBuffer)
     outputStream.register()
     ssc.start()
+    outputBuffer
+  }
 
-    val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    val input = Seq(1, 2, 3, 4, 5)
-    Thread.sleep(1000)
-    val transceiver = new NettyTransceiver(new InetSocketAddress("localhost", testPort))
-    val client = SpecificRequestor.getClient(
-      classOf[AvroSourceProtocol], transceiver)
+  /** Send data to the flume receiver and verify whether the data was received */
+  private def writeAndVerify(
+      input: Seq[String],
+      testPort: Int,
+      outputBuffer: ArrayBuffer[Seq[SparkFlumeEvent]],
+      enableCompression: Boolean
+    ) {
+    val testAddress = new InetSocketAddress("localhost", testPort)
 
-    for (i <- 0 until input.size) {
+    val inputEvents = input.map { item =>
       val event = new AvroFlumeEvent
-      event.setBody(ByteBuffer.wrap(input(i).toString.getBytes("utf-8")))
+      event.setBody(ByteBuffer.wrap(item.getBytes("UTF-8")))
       event.setHeaders(Map[CharSequence, CharSequence]("test" -> "header"))
-      client.append(event)
-      Thread.sleep(500)
-      clock.addToTime(batchDuration.milliseconds)
+      event
     }
 
-    val startTime = System.currentTimeMillis()
-    while (outputBuffer.size < input.size && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
-      logInfo("output.size = " + outputBuffer.size + ", input.size = " + input.size)
-      Thread.sleep(100)
+    eventually(timeout(10 seconds), interval(100 milliseconds)) {
+      // if last attempted transceiver had succeeded, close it
+      if (transceiver != null) {
+        transceiver.close()
+        transceiver = null
+      }
+
+      // Create transceiver
+      transceiver = {
+        if (enableCompression) {
+          new NettyTransceiver(testAddress, new CompressionChannelFactory(6))
+        } else {
+          new NettyTransceiver(testAddress)
+        }
+      }
+
+      // Create Avro client with the transceiver
+      val client = SpecificRequestor.getClient(classOf[AvroSourceProtocol], transceiver)
+      client should not be null
+
+      // Send data
+      val status = client.appendBatch(inputEvents.toList)
+      status should be (avro.Status.OK)
     }
-    Thread.sleep(1000)
-    val timeTaken = System.currentTimeMillis() - startTime
-    assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
-    logInfo("Stopping context")
-    ssc.stop()
-
-    val decoder = Charset.forName("UTF-8").newDecoder()
-
-    assert(outputBuffer.size === input.length)
-    for (i <- 0 until outputBuffer.size) {
-      assert(outputBuffer(i).size === 1)
-      val str = decoder.decode(outputBuffer(i).head.event.getBody)
-      assert(str.toString === input(i).toString)
-      assert(outputBuffer(i).head.event.getHeaders.get("test") === "header")
+    
+    val decoder = Charset.forName("UTF-8").newDecoder()    
+    eventually(timeout(10 seconds), interval(100 milliseconds)) {
+      val outputEvents = outputBuffer.flatten.map { _.event }
+      outputEvents.foreach {
+        event =>
+          event.getHeaders.get("test") should be("header")
+      }
+      val output = outputEvents.map(event => decoder.decode(event.getBody()).toString)
+      output should be (input)
+    }
+  }
+
+  /** Class to create socket channel with compression */
+  private class CompressionChannelFactory(compressionLevel: Int) extends NioClientSocketChannelFactory {
+    override def newChannel(pipeline: ChannelPipeline): SocketChannel = {
+      val encoder = new ZlibEncoder(compressionLevel)
+      pipeline.addFirst("deflater", encoder)
+      pipeline.addFirst("inflater", new ZlibDecoder())
+      super.newChannel(pipeline)
     }
   }
 }
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index d014a7aad0fca..2067c473f0e3f 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,12 +21,15 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-streaming-kafka_2.10</artifactId>
+  <properties>
+    <sbt.project.name>streaming-kafka</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project External Kafka</name>
   <url>http://spark.apache.org/</url>
@@ -65,8 +68,18 @@
           <groupId>org.slf4j</groupId>
           <artifactId>slf4j-simple</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.apache.zookeeper</groupId>
+          <artifactId>zookeeper</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
+    <dependency>
+      <groupId>net.sf.jopt-simple</groupId>
+      <artifactId>jopt-simple</artifactId>
+      <version>3.2</version>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>
@@ -77,6 +90,11 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
index 38095e88dcea9..4d26b640e8d74 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
@@ -17,23 +17,21 @@
 
 package org.apache.spark.streaming.kafka
 
-import scala.collection.Map
-import scala.reflect.ClassTag
-
 import java.util.Properties
-import java.util.concurrent.Executors
 
-import kafka.consumer._
+import scala.collection.Map
+import scala.reflect.{classTag, ClassTag}
+
+import kafka.consumer.{KafkaStream, Consumer, ConsumerConfig, ConsumerConnector}
 import kafka.serializer.Decoder
 import kafka.utils.VerifiableProperties
-import kafka.utils.ZKStringSerializer
-import org.I0Itec.zkclient._
 
 import org.apache.spark.Logging
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.streaming.dstream._
 import org.apache.spark.streaming.receiver.Receiver
+import org.apache.spark.util.Utils
 
 /**
  * Input stream that pulls messages from a Kafka Broker.
@@ -48,17 +46,21 @@ private[streaming]
 class KafkaInputDStream[
   K: ClassTag,
   V: ClassTag,
-  U <: Decoder[_]: Manifest,
-  T <: Decoder[_]: Manifest](
+  U <: Decoder[_]: ClassTag,
+  T <: Decoder[_]: ClassTag](
     @transient ssc_ : StreamingContext,
     kafkaParams: Map[String, String],
     topics: Map[String, Int],
+    useReliableReceiver: Boolean,
     storageLevel: StorageLevel
   ) extends ReceiverInputDStream[(K, V)](ssc_) with Logging {
 
   def getReceiver(): Receiver[(K, V)] = {
-    new KafkaReceiver[K, V, U, T](kafkaParams, topics, storageLevel)
-        .asInstanceOf[Receiver[(K, V)]]
+    if (!useReliableReceiver) {
+      new KafkaReceiver[K, V, U, T](kafkaParams, topics, storageLevel)
+    } else {
+      new ReliableKafkaReceiver[K, V, U, T](kafkaParams, topics, storageLevel)
+    }
   }
 }
 
@@ -66,19 +68,20 @@ private[streaming]
 class KafkaReceiver[
   K: ClassTag,
   V: ClassTag,
-  U <: Decoder[_]: Manifest,
-  T <: Decoder[_]: Manifest](
+  U <: Decoder[_]: ClassTag,
+  T <: Decoder[_]: ClassTag](
     kafkaParams: Map[String, String],
     topics: Map[String, Int],
     storageLevel: StorageLevel
-  ) extends Receiver[Any](storageLevel) with Logging {
+  ) extends Receiver[(K, V)](storageLevel) with Logging {
 
   // Connection to Kafka
-  var consumerConnector : ConsumerConnector = null
+  var consumerConnector: ConsumerConnector = null
 
   def onStop() {
     if (consumerConnector != null) {
       consumerConnector.shutdown()
+      consumerConnector = null
     }
   }
 
@@ -97,24 +100,18 @@ class KafkaReceiver[
     consumerConnector = Consumer.create(consumerConfig)
     logInfo("Connected to " + zkConnect)
 
-    // When auto.offset.reset is defined, it is our responsibility to try and whack the
-    // consumer group zk node.
-    if (kafkaParams.contains("auto.offset.reset")) {
-      tryZookeeperConsumerGroupCleanup(zkConnect, kafkaParams("group.id"))
-    }
-
-    val keyDecoder = manifest[U].runtimeClass.getConstructor(classOf[VerifiableProperties])
+    val keyDecoder = classTag[U].runtimeClass.getConstructor(classOf[VerifiableProperties])
       .newInstance(consumerConfig.props)
       .asInstanceOf[Decoder[K]]
-    val valueDecoder = manifest[T].runtimeClass.getConstructor(classOf[VerifiableProperties])
+    val valueDecoder = classTag[T].runtimeClass.getConstructor(classOf[VerifiableProperties])
       .newInstance(consumerConfig.props)
       .asInstanceOf[Decoder[V]]
 
-    // Create Threads for each Topic/Message Stream we are listening
+    // Create threads for each topic/message Stream we are listening
     val topicMessageStreams = consumerConnector.createMessageStreams(
       topics, keyDecoder, valueDecoder)
 
-    val executorPool = Executors.newFixedThreadPool(topics.values.sum)
+    val executorPool = Utils.newDaemonFixedThreadPool(topics.values.sum, "KafkaMessageHandler")
     try {
       // Start the messages handler for each partition
       topicMessageStreams.values.foreach { streams =>
@@ -125,13 +122,15 @@ class KafkaReceiver[
     }
   }
 
-  // Handles Kafka Messages
-  private class MessageHandler[K: ClassTag, V: ClassTag](stream: KafkaStream[K, V])
+  // Handles Kafka messages
+  private class MessageHandler(stream: KafkaStream[K, V])
     extends Runnable {
     def run() {
       logInfo("Starting MessageHandler.")
       try {
-        for (msgAndMetadata <- stream) {
+        val streamIterator = stream.iterator()
+        while (streamIterator.hasNext()) {
+          val msgAndMetadata = streamIterator.next()
           store((msgAndMetadata.key, msgAndMetadata.message))
         }
       } catch {
@@ -139,26 +138,4 @@ class KafkaReceiver[
       }
     }
   }
-
-  // It is our responsibility to delete the consumer group when specifying auto.offset.reset. This
-  // is because Kafka 0.7.2 only honors this param when the group is not in zookeeper.
-  //
-  // The kafka high level consumer doesn't expose setting offsets currently, this is a trick copied
-  // from Kafka's ConsoleConsumer. See code related to 'auto.offset.reset' when it is set to
-  // 'smallest'/'largest':
-  // scalastyle:off
-  // https://github.com/apache/kafka/blob/0.7.2/core/src/main/scala/kafka/consumer/ConsoleConsumer.scala
-  // scalastyle:on
-  private def tryZookeeperConsumerGroupCleanup(zkUrl: String, groupId: String) {
-    val dir = "/consumers/" + groupId
-    logInfo("Cleaning up temporary Zookeeper data under " + dir + ".")
-    val zk = new ZkClient(zkUrl, 30*1000, 30*1000, ZKStringSerializer)
-    try {
-      zk.deleteRecursive(dir)
-    } catch {
-      case e: Throwable => logWarning("Error cleaning up temporary Zookeeper data", e)
-    } finally {
-      zk.close()
-    }
-  }
 }
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
index 86bb91f362d29..b4ac929e0c070 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
@@ -17,19 +17,18 @@
 
 package org.apache.spark.streaming.kafka
 
-import scala.reflect.ClassTag
-import scala.collection.JavaConversions._
-
 import java.lang.{Integer => JInt}
 import java.util.{Map => JMap}
 
+import scala.reflect.ClassTag
+import scala.collection.JavaConversions._
+
 import kafka.serializer.{Decoder, StringDecoder}
 
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContext
-import org.apache.spark.streaming.api.java.{JavaPairReceiverInputDStream, JavaStreamingContext, JavaPairDStream}
-import org.apache.spark.streaming.dstream.{ReceiverInputDStream, DStream}
-
+import org.apache.spark.streaming.api.java.{JavaPairReceiverInputDStream, JavaStreamingContext}
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
 
 object KafkaUtils {
   /**
@@ -65,13 +64,14 @@ object KafkaUtils {
    *                    in its own thread.
    * @param storageLevel Storage level to use for storing the received objects
    */
-  def createStream[K: ClassTag, V: ClassTag, U <: Decoder[_]: Manifest, T <: Decoder[_]: Manifest](
+  def createStream[K: ClassTag, V: ClassTag, U <: Decoder[_]: ClassTag, T <: Decoder[_]: ClassTag](
       ssc: StreamingContext,
       kafkaParams: Map[String, String],
       topics: Map[String, Int],
       storageLevel: StorageLevel
     ): ReceiverInputDStream[(K, V)] = {
-    new KafkaInputDStream[K, V, U, T](ssc, kafkaParams, topics, storageLevel)
+    val walEnabled = ssc.conf.getBoolean("spark.streaming.receiver.writeAheadLog.enable", false)
+    new KafkaInputDStream[K, V, U, T](ssc, kafkaParams, topics, walEnabled, storageLevel)
   }
 
   /**
@@ -89,8 +89,6 @@ object KafkaUtils {
       groupId: String,
       topics: JMap[String, JInt]
     ): JavaPairReceiverInputDStream[String, String] = {
-    implicit val cmt: ClassTag[String] =
-      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
     createStream(jssc.ssc, zkQuorum, groupId, Map(topics.mapValues(_.intValue()).toSeq: _*))
   }
 
@@ -102,7 +100,6 @@ object KafkaUtils {
    * @param topics    Map of (topic_name -> numPartitions) to consume. Each partition is consumed
    *                  in its own thread.
    * @param storageLevel RDD storage level.
-   *
    */
   def createStream(
       jssc: JavaStreamingContext,
@@ -111,8 +108,6 @@ object KafkaUtils {
       topics: JMap[String, JInt],
       storageLevel: StorageLevel
     ): JavaPairReceiverInputDStream[String, String] = {
-    implicit val cmt: ClassTag[String] =
-      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
     createStream(jssc.ssc, zkQuorum, groupId, Map(topics.mapValues(_.intValue()).toSeq: _*),
       storageLevel)
   }
@@ -140,13 +135,11 @@ object KafkaUtils {
       topics: JMap[String, JInt],
       storageLevel: StorageLevel
     ): JavaPairReceiverInputDStream[K, V] = {
-    implicit val keyCmt: ClassTag[K] =
-      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]]
-    implicit val valueCmt: ClassTag[V] =
-      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
+    implicit val keyCmt: ClassTag[K] = ClassTag(keyTypeClass)
+    implicit val valueCmt: ClassTag[V] = ClassTag(valueTypeClass)
 
-    implicit val keyCmd: Manifest[U] = implicitly[Manifest[AnyRef]].asInstanceOf[Manifest[U]]
-    implicit val valueCmd: Manifest[T] = implicitly[Manifest[AnyRef]].asInstanceOf[Manifest[T]]
+    implicit val keyCmd: ClassTag[U] = ClassTag(keyDecoderClass)
+    implicit val valueCmd: ClassTag[T] = ClassTag(valueDecoderClass)
 
     createStream[K, V, U, T](
       jssc.ssc, kafkaParams.toMap, Map(topics.mapValues(_.intValue()).toSeq: _*), storageLevel)
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala
new file mode 100644
index 0000000000000..be734b80272d1
--- /dev/null
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import java.util.Properties
+import java.util.concurrent.{ThreadPoolExecutor, ConcurrentHashMap}
+
+import scala.collection.{Map, mutable}
+import scala.reflect.{ClassTag, classTag}
+
+import kafka.common.TopicAndPartition
+import kafka.consumer.{Consumer, ConsumerConfig, ConsumerConnector, KafkaStream}
+import kafka.message.MessageAndMetadata
+import kafka.serializer.Decoder
+import kafka.utils.{VerifiableProperties, ZKGroupTopicDirs, ZKStringSerializer, ZkUtils}
+import org.I0Itec.zkclient.ZkClient
+
+import org.apache.spark.{Logging, SparkEnv}
+import org.apache.spark.storage.{StorageLevel, StreamBlockId}
+import org.apache.spark.streaming.receiver.{BlockGenerator, BlockGeneratorListener, Receiver}
+import org.apache.spark.util.Utils
+
+/**
+ * ReliableKafkaReceiver offers the ability to reliably store data into BlockManager without loss.
+ * It is turned off by default and will be enabled when
+ * spark.streaming.receiver.writeAheadLog.enable is true. The difference compared to KafkaReceiver
+ * is that this receiver manages topic-partition/offset itself and updates the offset information
+ * after data is reliably stored as write-ahead log. Offsets will only be updated when data is
+ * reliably stored, so the potential data loss problem of KafkaReceiver can be eliminated.
+ *
+ * Note: ReliableKafkaReceiver will set auto.commit.enable to false to turn off automatic offset
+ * commit mechanism in Kafka consumer. So setting this configuration manually within kafkaParams
+ * will not take effect.
+ */
+private[streaming]
+class ReliableKafkaReceiver[
+  K: ClassTag,
+  V: ClassTag,
+  U <: Decoder[_]: ClassTag,
+  T <: Decoder[_]: ClassTag](
+    kafkaParams: Map[String, String],
+    topics: Map[String, Int],
+    storageLevel: StorageLevel)
+    extends Receiver[(K, V)](storageLevel) with Logging {
+
+  private val groupId = kafkaParams("group.id")
+  private val AUTO_OFFSET_COMMIT = "auto.commit.enable"
+  private def conf = SparkEnv.get.conf
+
+  /** High level consumer to connect to Kafka. */
+  private var consumerConnector: ConsumerConnector = null
+
+  /** zkClient to connect to Zookeeper to commit the offsets. */
+  private var zkClient: ZkClient = null
+
+  /**
+   * A HashMap to manage the offset for each topic/partition, this HashMap is called in
+   * synchronized block, so mutable HashMap will not meet concurrency issue.
+   */
+  private var topicPartitionOffsetMap: mutable.HashMap[TopicAndPartition, Long] = null
+
+  /** A concurrent HashMap to store the stream block id and related offset snapshot. */
+  private var blockOffsetMap: ConcurrentHashMap[StreamBlockId, Map[TopicAndPartition, Long]] = null
+
+  /**
+   * Manage the BlockGenerator in receiver itself for better managing block store and offset
+   * commit.
+   */
+  private var blockGenerator: BlockGenerator = null
+
+  /** Thread pool running the handlers for receiving message from multiple topics and partitions. */
+  private var messageHandlerThreadPool: ThreadPoolExecutor = null
+
+  override def onStart(): Unit = {
+    logInfo(s"Starting Kafka Consumer Stream with group: $groupId")
+
+    // Initialize the topic-partition / offset hash map.
+    topicPartitionOffsetMap = new mutable.HashMap[TopicAndPartition, Long]
+
+    // Initialize the stream block id / offset snapshot hash map.
+    blockOffsetMap = new ConcurrentHashMap[StreamBlockId, Map[TopicAndPartition, Long]]()
+
+    // Initialize the block generator for storing Kafka message.
+    blockGenerator = new BlockGenerator(new GeneratedBlockHandler, streamId, conf)
+
+    if (kafkaParams.contains(AUTO_OFFSET_COMMIT) && kafkaParams(AUTO_OFFSET_COMMIT) == "true") {
+      logWarning(s"$AUTO_OFFSET_COMMIT should be set to false in ReliableKafkaReceiver, " +
+        "otherwise we will manually set it to false to turn off auto offset commit in Kafka")
+    }
+
+    val props = new Properties()
+    kafkaParams.foreach(param => props.put(param._1, param._2))
+    // Manually set "auto.commit.enable" to "false" no matter user explicitly set it to true,
+    // we have to make sure this property is set to false to turn off auto commit mechanism in
+    // Kafka.
+    props.setProperty(AUTO_OFFSET_COMMIT, "false")
+
+    val consumerConfig = new ConsumerConfig(props)
+
+    assert(!consumerConfig.autoCommitEnable)
+
+    logInfo(s"Connecting to Zookeeper: ${consumerConfig.zkConnect}")
+    consumerConnector = Consumer.create(consumerConfig)
+    logInfo(s"Connected to Zookeeper: ${consumerConfig.zkConnect}")
+
+    zkClient = new ZkClient(consumerConfig.zkConnect, consumerConfig.zkSessionTimeoutMs,
+      consumerConfig.zkConnectionTimeoutMs, ZKStringSerializer)
+
+    messageHandlerThreadPool = Utils.newDaemonFixedThreadPool(
+      topics.values.sum, "KafkaMessageHandler")
+
+    blockGenerator.start()
+
+    val keyDecoder = classTag[U].runtimeClass.getConstructor(classOf[VerifiableProperties])
+      .newInstance(consumerConfig.props)
+      .asInstanceOf[Decoder[K]]
+
+    val valueDecoder = classTag[T].runtimeClass.getConstructor(classOf[VerifiableProperties])
+      .newInstance(consumerConfig.props)
+      .asInstanceOf[Decoder[V]]
+
+    val topicMessageStreams = consumerConnector.createMessageStreams(
+      topics, keyDecoder, valueDecoder)
+
+    topicMessageStreams.values.foreach { streams =>
+      streams.foreach { stream =>
+        messageHandlerThreadPool.submit(new MessageHandler(stream))
+      }
+    }
+  }
+
+  override def onStop(): Unit = {
+    if (messageHandlerThreadPool != null) {
+      messageHandlerThreadPool.shutdown()
+      messageHandlerThreadPool = null
+    }
+
+    if (consumerConnector != null) {
+      consumerConnector.shutdown()
+      consumerConnector = null
+    }
+
+    if (zkClient != null) {
+      zkClient.close()
+      zkClient = null
+    }
+
+    if (blockGenerator != null) {
+      blockGenerator.stop()
+      blockGenerator = null
+    }
+
+    if (topicPartitionOffsetMap != null) {
+      topicPartitionOffsetMap.clear()
+      topicPartitionOffsetMap = null
+    }
+
+    if (blockOffsetMap != null) {
+      blockOffsetMap.clear()
+      blockOffsetMap = null
+    }
+  }
+
+  /** Store a Kafka message and the associated metadata as a tuple. */
+  private def storeMessageAndMetadata(
+      msgAndMetadata: MessageAndMetadata[K, V]): Unit = {
+    val topicAndPartition = TopicAndPartition(msgAndMetadata.topic, msgAndMetadata.partition)
+    val data = (msgAndMetadata.key, msgAndMetadata.message)
+    val metadata = (topicAndPartition, msgAndMetadata.offset)
+    blockGenerator.addDataWithCallback(data, metadata)
+  }
+
+  /** Update stored offset */
+  private def updateOffset(topicAndPartition: TopicAndPartition, offset: Long): Unit = {
+    topicPartitionOffsetMap.put(topicAndPartition, offset)
+  }
+
+  /**
+   * Remember the current offsets for each topic and partition. This is called when a block is
+   * generated.
+   */
+  private def rememberBlockOffsets(blockId: StreamBlockId): Unit = {
+    // Get a snapshot of current offset map and store with related block id.
+    val offsetSnapshot = topicPartitionOffsetMap.toMap
+    blockOffsetMap.put(blockId, offsetSnapshot)
+    topicPartitionOffsetMap.clear()
+  }
+
+  /** Store the ready-to-be-stored block and commit the related offsets to zookeeper. */
+  private def storeBlockAndCommitOffset(
+      blockId: StreamBlockId, arrayBuffer: mutable.ArrayBuffer[_]): Unit = {
+    store(arrayBuffer.asInstanceOf[mutable.ArrayBuffer[(K, V)]])
+    Option(blockOffsetMap.get(blockId)).foreach(commitOffset)
+    blockOffsetMap.remove(blockId)
+  }
+
+  /**
+   * Commit the offset of Kafka's topic/partition, the commit mechanism follow Kafka 0.8.x's
+   * metadata schema in Zookeeper.
+   */
+  private def commitOffset(offsetMap: Map[TopicAndPartition, Long]): Unit = {
+    if (zkClient == null) {
+      val thrown = new IllegalStateException("Zookeeper client is unexpectedly null")
+      stop("Zookeeper client is not initialized before commit offsets to ZK", thrown)
+      return
+    }
+
+    for ((topicAndPart, offset) <- offsetMap) {
+      try {
+        val topicDirs = new ZKGroupTopicDirs(groupId, topicAndPart.topic)
+        val zkPath = s"${topicDirs.consumerOffsetDir}/${topicAndPart.partition}"
+
+        ZkUtils.updatePersistentPath(zkClient, zkPath, offset.toString)
+      } catch {
+        case e: Exception =>
+          logWarning(s"Exception during commit offset $offset for topic" +
+            s"${topicAndPart.topic}, partition ${topicAndPart.partition}", e)
+      }
+
+      logInfo(s"Committed offset $offset for topic ${topicAndPart.topic}, " +
+        s"partition ${topicAndPart.partition}")
+    }
+  }
+
+  /** Class to handle received Kafka message. */
+  private final class MessageHandler(stream: KafkaStream[K, V]) extends Runnable {
+    override def run(): Unit = {
+      while (!isStopped) {
+        try {
+          val streamIterator = stream.iterator()
+          while (streamIterator.hasNext) {
+            storeMessageAndMetadata(streamIterator.next)
+          }
+        } catch {
+          case e: Exception =>
+            logError("Error handling message", e)
+        }
+      }
+    }
+  }
+
+  /** Class to handle blocks generated by the block generator. */
+  private final class GeneratedBlockHandler extends BlockGeneratorListener {
+
+    def onAddData(data: Any, metadata: Any): Unit = {
+      // Update the offset of the data that was added to the generator
+      if (metadata != null) {
+        val (topicAndPartition, offset) = metadata.asInstanceOf[(TopicAndPartition, Long)]
+        updateOffset(topicAndPartition, offset)
+      }
+    }
+
+    def onGenerateBlock(blockId: StreamBlockId): Unit = {
+      // Remember the offsets of topics/partitions when a block has been generated
+      rememberBlockOffsets(blockId)
+    }
+
+    def onPushBlock(blockId: StreamBlockId, arrayBuffer: mutable.ArrayBuffer[_]): Unit = {
+      // Store block and commit the blocks offset
+      storeBlockAndCommitOffset(blockId, arrayBuffer)
+    }
+
+    def onError(message: String, throwable: Throwable): Unit = {
+      reportError(message, throwable)
+    }
+  }
+}
diff --git a/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java b/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java
index 9f8046bf00f8f..6e1abf3f385ee 100644
--- a/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java
+++ b/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java
@@ -17,31 +17,126 @@
 
 package org.apache.spark.streaming.kafka;
 
+import java.io.Serializable;
 import java.util.HashMap;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.Duration;
+import scala.Predef;
+import scala.Tuple2;
+import scala.collection.JavaConverters;
+
+import junit.framework.Assert;
 
-import org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream;
-import org.junit.Test;
-import com.google.common.collect.Maps;
 import kafka.serializer.StringDecoder;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.function.Function;
 import org.apache.spark.storage.StorageLevel;
-import org.apache.spark.streaming.LocalJavaStreamingContext;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+
+import org.junit.Test;
+import org.junit.After;
+import org.junit.Before;
+
+public class JavaKafkaStreamSuite implements Serializable {
+  private transient JavaStreamingContext ssc = null;
+  private transient Random random = new Random();
+  private transient KafkaStreamSuiteBase suiteBase = null;
+
+  @Before
+  public void setUp() {
+    suiteBase = new KafkaStreamSuiteBase() { };
+    suiteBase.setupKafka();
+    System.clearProperty("spark.driver.port");
+    SparkConf sparkConf = new SparkConf()
+      .setMaster("local[4]").setAppName(this.getClass().getSimpleName());
+    ssc = new JavaStreamingContext(sparkConf, new Duration(500));
+  }
+
+  @After
+  public void tearDown() {
+    ssc.stop();
+    ssc = null;
+    System.clearProperty("spark.driver.port");
+    suiteBase.tearDownKafka();
+  }
 
-public class JavaKafkaStreamSuite extends LocalJavaStreamingContext {
   @Test
-  public void testKafkaStream() {
-    HashMap<String, Integer> topics = Maps.newHashMap();
-
-    // tests the API, does not actually test data receiving
-    JavaPairReceiverInputDStream<String, String> test1 =
-            KafkaUtils.createStream(ssc, "localhost:12345", "group", topics);
-    JavaPairReceiverInputDStream<String, String> test2 = KafkaUtils.createStream(ssc, "localhost:12345", "group", topics,
-      StorageLevel.MEMORY_AND_DISK_SER_2());
-
-    HashMap<String, String> kafkaParams = Maps.newHashMap();
-    kafkaParams.put("zookeeper.connect", "localhost:12345");
-    kafkaParams.put("group.id","consumer-group");
-      JavaPairReceiverInputDStream<String, String> test3 = KafkaUtils.createStream(ssc,
-      String.class, String.class, StringDecoder.class, StringDecoder.class,
-      kafkaParams, topics, StorageLevel.MEMORY_AND_DISK_SER_2());
+  public void testKafkaStream() throws InterruptedException {
+    String topic = "topic1";
+    HashMap<String, Integer> topics = new HashMap<String, Integer>();
+    topics.put(topic, 1);
+
+    HashMap<String, Integer> sent = new HashMap<String, Integer>();
+    sent.put("a", 5);
+    sent.put("b", 3);
+    sent.put("c", 10);
+
+    suiteBase.createTopic(topic);
+    HashMap<String, Object> tmp = new HashMap<String, Object>(sent);
+    suiteBase.produceAndSendMessage(topic,
+        JavaConverters.mapAsScalaMapConverter(tmp).asScala().toMap(
+            Predef.<Tuple2<String, Object>>conforms()));
+
+    HashMap<String, String> kafkaParams = new HashMap<String, String>();
+    kafkaParams.put("zookeeper.connect", suiteBase.zkAddress());
+    kafkaParams.put("group.id", "test-consumer-" + random.nextInt(10000));
+    kafkaParams.put("auto.offset.reset", "smallest");
+
+    JavaPairDStream<String, String> stream = KafkaUtils.createStream(ssc,
+      String.class,
+      String.class,
+      StringDecoder.class,
+      StringDecoder.class,
+      kafkaParams,
+      topics,
+      StorageLevel.MEMORY_ONLY_SER());
+
+    final HashMap<String, Long> result = new HashMap<String, Long>();
+
+    JavaDStream<String> words = stream.map(
+      new Function<Tuple2<String, String>, String>() {
+        @Override
+        public String call(Tuple2<String, String> tuple2) throws Exception {
+          return tuple2._2();
+        }
+      }
+    );
+
+    words.countByValue().foreachRDD(
+      new Function<JavaPairRDD<String, Long>, Void>() {
+        @Override
+        public Void call(JavaPairRDD<String, Long> rdd) throws Exception {
+          List<Tuple2<String, Long>> ret = rdd.collect();
+          for (Tuple2<String, Long> r : ret) {
+            if (result.containsKey(r._1())) {
+              result.put(r._1(), result.get(r._1()) + r._2());
+            } else {
+              result.put(r._1(), r._2());
+            }
+          }
+
+          return null;
+        }
+      }
+    );
+
+    ssc.start();
+    long startTime = System.currentTimeMillis();
+    boolean sizeMatches = false;
+    while (!sizeMatches && System.currentTimeMillis() - startTime < 20000) {
+      sizeMatches = sent.size() == result.size();
+      Thread.sleep(200);
+    }
+    Assert.assertEquals(sent.size(), result.size());
+    for (String k : sent.keySet()) {
+      Assert.assertEquals(sent.get(k).intValue(), result.get(k).intValue());
+    }
+    ssc.stop();
   }
 }
diff --git a/external/kafka/src/test/resources/log4j.properties b/external/kafka/src/test/resources/log4j.properties
index 45d2ec676df66..4411d6e20c52a 100644
--- a/external/kafka/src/test/resources/log4j.properties
+++ b/external/kafka/src/test/resources/log4j.properties
@@ -22,7 +22,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
index e6f2c4a5cf5d1..b19c053ebfc44 100644
--- a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
+++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
@@ -17,28 +17,231 @@
 
 package org.apache.spark.streaming.kafka
 
-import kafka.serializer.StringDecoder
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import java.io.File
+import java.net.InetSocketAddress
+import java.util.Properties
+
+import scala.collection.mutable
+import scala.concurrent.duration._
+import scala.language.postfixOps
+import scala.util.Random
+
+import kafka.admin.CreateTopicCommand
+import kafka.common.{KafkaException, TopicAndPartition}
+import kafka.producer.{KeyedMessage, Producer, ProducerConfig}
+import kafka.serializer.{StringDecoder, StringEncoder}
+import kafka.server.{KafkaConfig, KafkaServer}
+import kafka.utils.ZKStringSerializer
+import org.I0Itec.zkclient.ZkClient
+import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.scalatest.concurrent.Eventually
+
+import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import org.apache.spark.streaming.{Milliseconds, StreamingContext}
+import org.apache.spark.util.Utils
+
+/**
+ * This is an abstract base class for Kafka testsuites. This has the functionality to set up
+ * and tear down local Kafka servers, and to push data using Kafka producers.
+ */
+abstract class KafkaStreamSuiteBase extends FunSuite with Eventually with Logging {
+
+  var zkAddress: String = _
+  var zkClient: ZkClient = _
+
+  private val zkHost = "localhost"
+  private val zkConnectionTimeout = 6000
+  private val zkSessionTimeout = 6000
+  private var zookeeper: EmbeddedZookeeper = _
+  private var zkPort: Int = 0
+  private var brokerPort = 9092
+  private var brokerConf: KafkaConfig = _
+  private var server: KafkaServer = _
+  private var producer: Producer[String, String] = _
+
+  def setupKafka() {
+    // Zookeeper server startup
+    zookeeper = new EmbeddedZookeeper(s"$zkHost:$zkPort")
+    // Get the actual zookeeper binding port
+    zkPort = zookeeper.actualPort
+    zkAddress = s"$zkHost:$zkPort"
+    logInfo("==================== 0 ====================")
+
+    zkClient = new ZkClient(zkAddress, zkSessionTimeout, zkConnectionTimeout,
+      ZKStringSerializer)
+    logInfo("==================== 1 ====================")
+
+    // Kafka broker startup
+    var bindSuccess: Boolean = false
+    while(!bindSuccess) {
+      try {
+        val brokerProps = getBrokerConfig()
+        brokerConf = new KafkaConfig(brokerProps)
+        server = new KafkaServer(brokerConf)
+        logInfo("==================== 2 ====================")
+        server.startup()
+        logInfo("==================== 3 ====================")
+        bindSuccess = true
+      } catch {
+        case e: KafkaException =>
+          if (e.getMessage != null && e.getMessage.contains("Socket server failed to bind to")) {
+            brokerPort += 1
+          }
+        case e: Exception => throw new Exception("Kafka server create failed", e)
+      }
+    }
+
+    Thread.sleep(2000)
+    logInfo("==================== 4 ====================")
+  }
+
+  def tearDownKafka() {
+    if (producer != null) {
+      producer.close()
+      producer = null
+    }
+
+    if (server != null) {
+      server.shutdown()
+      server = null
+    }
+
+    brokerConf.logDirs.foreach { f => Utils.deleteRecursively(new File(f)) }
+
+    if (zkClient != null) {
+      zkClient.close()
+      zkClient = null
+    }
+
+    if (zookeeper != null) {
+      zookeeper.shutdown()
+      zookeeper = null
+    }
+  }
+
+  private def createTestMessage(topic: String, sent: Map[String, Int])
+    : Seq[KeyedMessage[String, String]] = {
+    val messages = for ((s, freq) <- sent; i <- 0 until freq) yield {
+      new KeyedMessage[String, String](topic, s)
+    }
+    messages.toSeq
+  }
+
+  def createTopic(topic: String) {
+    CreateTopicCommand.createTopic(zkClient, topic, 1, 1, "0")
+    logInfo("==================== 5 ====================")
+    // wait until metadata is propagated
+    waitUntilMetadataIsPropagated(topic, 0)
+  }
+
+  def produceAndSendMessage(topic: String, sent: Map[String, Int]) {
+    producer = new Producer[String, String](new ProducerConfig(getProducerConfig()))
+    producer.send(createTestMessage(topic, sent): _*)
+    producer.close()
+    logInfo("==================== 6 ====================")
+  }
 
-class KafkaStreamSuite extends TestSuiteBase {
+  private def getBrokerConfig(): Properties = {
+    val props = new Properties()
+    props.put("broker.id", "0")
+    props.put("host.name", "localhost")
+    props.put("port", brokerPort.toString)
+    props.put("log.dir", Utils.createTempDir().getAbsolutePath)
+    props.put("zookeeper.connect", zkAddress)
+    props.put("log.flush.interval.messages", "1")
+    props.put("replica.socket.timeout.ms", "1500")
+    props
+  }
 
-  test("kafka input stream") {
-    val ssc = new StreamingContext(master, framework, batchDuration)
-    val topics = Map("my-topic" -> 1)
+  private def getProducerConfig(): Properties = {
+    val brokerAddr = brokerConf.hostName + ":" + brokerConf.port
+    val props = new Properties()
+    props.put("metadata.broker.list", brokerAddr)
+    props.put("serializer.class", classOf[StringEncoder].getName)
+    props
+  }
 
-    // tests the API, does not actually test data receiving
-    val test1: ReceiverInputDStream[(String, String)] =
-      KafkaUtils.createStream(ssc, "localhost:1234", "group", topics)
-    val test2: ReceiverInputDStream[(String, String)] =
-      KafkaUtils.createStream(ssc, "localhost:12345", "group", topics, StorageLevel.MEMORY_AND_DISK_SER_2)
-    val kafkaParams = Map("zookeeper.connect"->"localhost:12345","group.id"->"consumer-group")
-    val test3: ReceiverInputDStream[(String, String)] =
-      KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
-      ssc, kafkaParams, topics, StorageLevel.MEMORY_AND_DISK_SER_2)
+  private def waitUntilMetadataIsPropagated(topic: String, partition: Int) {
+    eventually(timeout(1000 milliseconds), interval(100 milliseconds)) {
+      assert(
+        server.apis.leaderCache.keySet.contains(TopicAndPartition(topic, partition)),
+        s"Partition [$topic, $partition] metadata not propagated after timeout"
+      )
+    }
+  }
 
-    // TODO: Actually test receiving data
+  class EmbeddedZookeeper(val zkConnect: String) {
+    val random = new Random()
+    val snapshotDir = Utils.createTempDir()
+    val logDir = Utils.createTempDir()
+
+    val zookeeper = new ZooKeeperServer(snapshotDir, logDir, 500)
+    val (ip, port) = {
+      val splits = zkConnect.split(":")
+      (splits(0), splits(1).toInt)
+    }
+    val factory = new NIOServerCnxnFactory()
+    factory.configure(new InetSocketAddress(ip, port), 16)
+    factory.startup(zookeeper)
+
+    val actualPort = factory.getLocalPort
+
+    def shutdown() {
+      factory.shutdown()
+      Utils.deleteRecursively(snapshotDir)
+      Utils.deleteRecursively(logDir)
+    }
+  }
+}
+
+
+class KafkaStreamSuite extends KafkaStreamSuiteBase with BeforeAndAfter {
+  var ssc: StreamingContext = _
+
+  before {
+    setupKafka()
+  }
+
+  after {
+    if (ssc != null) {
+      ssc.stop()
+      ssc = null
+    }
+    tearDownKafka()
+  }
+
+  test("Kafka input stream") {
+    val sparkConf = new SparkConf().setMaster("local[4]").setAppName(this.getClass.getSimpleName)
+    ssc = new StreamingContext(sparkConf, Milliseconds(500))
+    val topic = "topic1"
+    val sent = Map("a" -> 5, "b" -> 3, "c" -> 10)
+    createTopic(topic)
+    produceAndSendMessage(topic, sent)
+
+    val kafkaParams = Map("zookeeper.connect" -> zkAddress,
+      "group.id" -> s"test-consumer-${Random.nextInt(10000)}",
+      "auto.offset.reset" -> "smallest")
+
+    val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
+      ssc, kafkaParams, Map(topic -> 1), StorageLevel.MEMORY_ONLY)
+    val result = new mutable.HashMap[String, Long]()
+    stream.map(_._2).countByValue().foreachRDD { r =>
+      val ret = r.collect()
+      ret.toMap.foreach { kv =>
+        val count = result.getOrElseUpdate(kv._1, 0) + kv._2
+        result.put(kv._1, count)
+      }
+    }
+    ssc.start()
+    eventually(timeout(10000 milliseconds), interval(100 milliseconds)) {
+      assert(sent.size === result.size)
+      sent.keys.foreach { k =>
+        assert(sent(k) === result(k).toInt)
+      }
+    }
     ssc.stop()
   }
 }
+
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala
new file mode 100644
index 0000000000000..64ccc92c81fa9
--- /dev/null
+++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+
+import java.io.File
+
+import scala.collection.mutable
+import scala.concurrent.duration._
+import scala.language.postfixOps
+import scala.util.Random
+
+import com.google.common.io.Files
+import kafka.serializer.StringDecoder
+import kafka.utils.{ZKGroupTopicDirs, ZkUtils}
+import org.apache.commons.io.FileUtils
+import org.scalatest.BeforeAndAfter
+import org.scalatest.concurrent.Eventually
+
+import org.apache.spark.SparkConf
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Milliseconds, StreamingContext}
+
+class ReliableKafkaStreamSuite extends KafkaStreamSuiteBase with BeforeAndAfter with Eventually {
+
+  val sparkConf = new SparkConf()
+    .setMaster("local[4]")
+    .setAppName(this.getClass.getSimpleName)
+    .set("spark.streaming.receiver.writeAheadLog.enable", "true")
+  val data = Map("a" -> 10, "b" -> 10, "c" -> 10)
+
+
+  var groupId: String = _
+  var kafkaParams: Map[String, String] = _
+  var ssc: StreamingContext = _
+  var tempDirectory: File = null
+
+  before {
+    setupKafka()
+    groupId = s"test-consumer-${Random.nextInt(10000)}"
+    kafkaParams = Map(
+      "zookeeper.connect" -> zkAddress,
+      "group.id" -> groupId,
+      "auto.offset.reset" -> "smallest"
+    )
+
+    ssc = new StreamingContext(sparkConf, Milliseconds(500))
+    tempDirectory = Files.createTempDir()
+    ssc.checkpoint(tempDirectory.getAbsolutePath)
+  }
+
+  after {
+    if (ssc != null) {
+      ssc.stop()
+    }
+    if (tempDirectory != null && tempDirectory.exists()) {
+      FileUtils.deleteDirectory(tempDirectory)
+      tempDirectory = null
+    }
+    tearDownKafka()
+  }
+
+
+  test("Reliable Kafka input stream with single topic") {
+    var topic = "test-topic"
+    createTopic(topic)
+    produceAndSendMessage(topic, data)
+
+    // Verify whether the offset of this group/topic/partition is 0 before starting.
+    assert(getCommitOffset(groupId, topic, 0) === None)
+
+    val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
+      ssc, kafkaParams, Map(topic -> 1), StorageLevel.MEMORY_ONLY)
+    val result = new mutable.HashMap[String, Long]()
+    stream.map { case (k, v) => v }.foreachRDD { r =>
+        val ret = r.collect()
+        ret.foreach { v =>
+          val count = result.getOrElseUpdate(v, 0) + 1
+          result.put(v, count)
+        }
+      }
+    ssc.start()
+    eventually(timeout(20000 milliseconds), interval(200 milliseconds)) {
+      // A basic process verification for ReliableKafkaReceiver.
+      // Verify whether received message number is equal to the sent message number.
+      assert(data.size === result.size)
+      // Verify whether each message is the same as the data to be verified.
+      data.keys.foreach { k => assert(data(k) === result(k).toInt) }
+      // Verify the offset number whether it is equal to the total message number.
+      assert(getCommitOffset(groupId, topic, 0) === Some(29L))
+    }
+    ssc.stop()
+  }
+
+  test("Reliable Kafka input stream with multiple topics") {
+    val topics = Map("topic1" -> 1, "topic2" -> 1, "topic3" -> 1)
+    topics.foreach { case (t, _) =>
+      createTopic(t)
+      produceAndSendMessage(t, data)
+    }
+
+    // Before started, verify all the group/topic/partition offsets are 0.
+    topics.foreach { case (t, _) => assert(getCommitOffset(groupId, t, 0) === None) }
+
+    // Consuming all the data sent to the broker which will potential commit the offsets internally.
+    val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
+      ssc, kafkaParams, topics, StorageLevel.MEMORY_ONLY)
+    stream.foreachRDD(_ => Unit)
+    ssc.start()
+    eventually(timeout(20000 milliseconds), interval(100 milliseconds)) {
+      // Verify the offset for each group/topic to see whether they are equal to the expected one.
+      topics.foreach { case (t, _) => assert(getCommitOffset(groupId, t, 0) === Some(29L)) }
+    }
+    ssc.stop()
+  }
+
+
+  /** Getting partition offset from Zookeeper. */
+  private def getCommitOffset(groupId: String, topic: String, partition: Int): Option[Long] = {
+    assert(zkClient != null, "Zookeeper client is not initialized")
+    val topicDirs = new ZKGroupTopicDirs(groupId, topic)
+    val zkPath = s"${topicDirs.consumerOffsetDir}/$partition"
+    ZkUtils.readDataMaybeNull(zkClient, zkPath)._1.map(_.toLong)
+  }
+}
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 4980208cba3b0..362a76e515938 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,12 +21,15 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-streaming-mqtt_2.10</artifactId>
+  <properties>
+    <sbt.project.name>streaming-mqtt</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project External MQTT</name>
   <url>http://spark.apache.org/</url>
@@ -49,11 +52,6 @@
       <artifactId>mqtt-client</artifactId>
        <version>0.4.0</version>
     </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-zeromq_${scala.binary.version}</artifactId>
-      <version>${akka.version}</version>
-    </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>
@@ -64,6 +62,11 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
diff --git a/external/mqtt/src/test/resources/log4j.properties b/external/mqtt/src/test/resources/log4j.properties
index 45d2ec676df66..4411d6e20c52a 100644
--- a/external/mqtt/src/test/resources/log4j.properties
+++ b/external/mqtt/src/test/resources/log4j.properties
@@ -22,7 +22,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 7073bd4404d9c..1d7dd49d15c22 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,12 +21,15 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-streaming-twitter_2.10</artifactId>
+  <properties>
+    <sbt.project.name>streaming-twitter</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project External Twitter</name>
   <url>http://spark.apache.org/</url>
@@ -59,6 +62,11 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
diff --git a/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
index 5ea2e5549d7df..4eacc47da5699 100644
--- a/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
+++ b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
@@ -63,7 +63,8 @@ class TwitterReceiver(
     storageLevel: StorageLevel
   ) extends Receiver[Status](storageLevel) with Logging {
 
-  private var twitterStream: TwitterStream = _
+  @volatile private var twitterStream: TwitterStream = _
+  @volatile private var stopped = false
 
   def onStart() {
     try {
@@ -78,7 +79,9 @@ class TwitterReceiver(
         def onScrubGeo(l: Long, l1: Long) {}
         def onStallWarning(stallWarning: StallWarning) {}
         def onException(e: Exception) {
-          restart("Error receiving tweets", e)
+          if (!stopped) {
+            restart("Error receiving tweets", e)
+          }
         }
       })
 
@@ -91,12 +94,14 @@ class TwitterReceiver(
       }
       setTwitterStream(newTwitterStream)
       logInfo("Twitter receiver started")
+      stopped = false
     } catch {
       case e: Exception => restart("Error starting Twitter stream", e)
     }
   }
 
   def onStop() {
+    stopped = true
     setTwitterStream(null)
     logInfo("Twitter receiver stopped")
   }
diff --git a/external/twitter/src/test/resources/log4j.properties b/external/twitter/src/test/resources/log4j.properties
index 45d2ec676df66..4411d6e20c52a 100644
--- a/external/twitter/src/test/resources/log4j.properties
+++ b/external/twitter/src/test/resources/log4j.properties
@@ -22,7 +22,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index cf306e0dca8bd..7e48968feb3bc 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,12 +21,15 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-streaming-zeromq_2.10</artifactId>
+  <properties>
+    <sbt.project.name>streaming-zeromq</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project External ZeroMQ</name>
   <url>http://spark.apache.org/</url>
@@ -59,6 +62,11 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
diff --git a/external/zeromq/src/test/resources/log4j.properties b/external/zeromq/src/test/resources/log4j.properties
index 45d2ec676df66..4411d6e20c52a 100644
--- a/external/zeromq/src/test/resources/log4j.properties
+++ b/external/zeromq/src/test/resources/log4j.properties
@@ -22,7 +22,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/extras/java8-tests/pom.xml b/extras/java8-tests/pom.xml
index 955ec1a8c3033..7e478bed62da7 100644
--- a/extras/java8-tests/pom.xml
+++ b/extras/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -28,7 +28,11 @@
   <artifactId>java8-tests_2.10</artifactId>
   <packaging>pom</packaging>
   <name>Spark Project Java8 Tests POM</name>
-  
+
+  <properties>
+    <sbt.project.name>java8-tests</sbt.project.name>
+  </properties>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -46,6 +50,11 @@
       <version>${project.version}</version>
       <type>test-jar</type>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
@@ -65,6 +74,20 @@
   </profiles>
   <build>
     <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-deploy-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-install-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-plugin</artifactId>
diff --git a/extras/java8-tests/src/test/resources/log4j.properties b/extras/java8-tests/src/test/resources/log4j.properties
index 180beaa8cc5a7..bb0ab319a0080 100644
--- a/extras/java8-tests/src/test/resources/log4j.properties
+++ b/extras/java8-tests/src/test/resources/log4j.properties
@@ -21,7 +21,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/extras/kinesis-asl/pom.xml b/extras/kinesis-asl/pom.xml
new file mode 100644
index 0000000000000..560244ad93369
--- /dev/null
+++ b/extras/kinesis-asl/pom.xml
@@ -0,0 +1,96 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+~ Licensed to the Apache Software Foundation (ASF) under one or more
+~ contributor license agreements.  See the NOTICE file distributed with
+~ this work for additional information regarding copyright ownership.
+~ The ASF licenses this file to You under the Apache License, Version 2.0
+~ (the "License"); you may not use this file except in compliance with
+~ the License.  You may obtain a copy of the License at
+~
+~    http://www.apache.org/licenses/LICENSE-2.0
+~
+~ Unless required by applicable law or agreed to in writing, software
+~ distributed under the License is distributed on an "AS IS" BASIS,
+~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~ See the License for the specific language governing permissions and
+~ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>1.2.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <!-- Kinesis integration is not included by default due to ASL-licensed code. -->
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-streaming-kinesis-asl_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Kinesis Integration</name>
+
+  <properties>
+    <sbt.project.name>kinesis-asl</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>amazon-kinesis-client</artifactId>
+      <version>${aws.kinesis.client.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>aws-java-sdk</artifactId>
+      <version>${aws.java.sdk.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.easymock</groupId>
+      <artifactId>easymockclassextension</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
new file mode 100644
index 0000000000000..b0bff27a61c19
--- /dev/null
+++ b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.streaming;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.apache.log4j.Logger;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.streaming.kinesis.KinesisUtils;
+
+import scala.Tuple2;
+
+import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
+import com.amazonaws.services.kinesis.AmazonKinesisClient;
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
+import com.google.common.collect.Lists;
+
+/**
+ * Java-friendly Kinesis Spark Streaming WordCount example
+ *
+ * See http://spark.apache.org/docs/latest/streaming-kinesis.html for more details
+ * on the Kinesis Spark Streaming integration.
+ *
+ * This example spins up 1 Kinesis Worker (Spark Streaming Receiver) per shard
+ *   for the given stream.
+ * It then starts pulling from the last checkpointed sequence number of the given
+ *   <stream-name> and <endpoint-url>. 
+ *
+ * Valid endpoint urls:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
+ *
+ * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials 
+ *  in the following order of precedence: 
+ *         Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
+ *         Java System Properties - aws.accessKeyId and aws.secretKey
+ *         Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
+ *         Instance profile credentials - delivered through the Amazon EC2 metadata service
+ *
+ * Usage: JavaKinesisWordCountASL <stream-name> <endpoint-url>
+ *         <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
+ *         <endpoint-url> is the endpoint of the Kinesis service 
+ *           (ie. https://kinesis.us-east-1.amazonaws.com)
+ *
+ * Example:
+ *      $ export AWS_ACCESS_KEY_ID=<your-access-key>
+ *      $ export AWS_SECRET_KEY=<your-secret-key>
+ *      $ $SPARK_HOME/bin/run-example \
+ *            org.apache.spark.examples.streaming.JavaKinesisWordCountASL mySparkStream \
+ *            https://kinesis.us-east-1.amazonaws.com
+ *
+ * Note that number of workers/threads should be 1 more than the number of receivers.
+ * This leaves one thread available for actually processing the data.
+ *
+ * There is a companion helper class called KinesisWordCountProducerASL which puts dummy data 
+ *   onto the Kinesis stream. 
+ * Usage instructions for KinesisWordCountProducerASL are provided in the class definition.
+ */
+public final class JavaKinesisWordCountASL { // needs to be public for access from run-example
+    private static final Pattern WORD_SEPARATOR = Pattern.compile(" ");
+    private static final Logger logger = Logger.getLogger(JavaKinesisWordCountASL.class);
+
+    /* Make the constructor private to enforce singleton */
+    private JavaKinesisWordCountASL() {
+    }
+
+    public static void main(String[] args) {
+        /* Check that all required args were passed in. */
+        if (args.length < 2) {
+          System.err.println(
+              "Usage: JavaKinesisWordCountASL <stream-name> <endpoint-url>\n" +
+              "    <stream-name> is the name of the Kinesis stream\n" +
+              "    <endpoint-url> is the endpoint of the Kinesis service\n" +
+              "                   (e.g. https://kinesis.us-east-1.amazonaws.com)\n");
+          System.exit(1);
+        }
+
+        StreamingExamples.setStreamingLogLevels();
+
+        /* Populate the appropriate variables from the given args */
+        String streamName = args[0];
+        String endpointUrl = args[1];
+        /* Set the batch interval to a fixed 2000 millis (2 seconds) */
+        Duration batchInterval = new Duration(2000);
+
+        /* Create a Kinesis client in order to determine the number of shards for the given stream */
+        AmazonKinesisClient kinesisClient = new AmazonKinesisClient(
+                new DefaultAWSCredentialsProviderChain());
+        kinesisClient.setEndpoint(endpointUrl);
+
+        /* Determine the number of shards from the stream */
+        int numShards = kinesisClient.describeStream(streamName)
+                .getStreamDescription().getShards().size();
+
+        /* In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard */ 
+        int numStreams = numShards;
+
+        /* Setup the Spark config. */
+        SparkConf sparkConfig = new SparkConf().setAppName("KinesisWordCount");
+
+        /* Kinesis checkpoint interval.  Same as batchInterval for this example. */
+        Duration checkpointInterval = batchInterval;
+
+        /* Setup the StreamingContext */
+        JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval);
+
+        /* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
+        List<JavaDStream<byte[]>> streamsList = new ArrayList<JavaDStream<byte[]>>(numStreams);
+        for (int i = 0; i < numStreams; i++) {
+          streamsList.add(
+            KinesisUtils.createStream(jssc, streamName, endpointUrl, checkpointInterval, 
+            InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2())
+          );
+        }
+
+        /* Union all the streams if there is more than 1 stream */
+        JavaDStream<byte[]> unionStreams;
+        if (streamsList.size() > 1) {
+            unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
+        } else {
+            /* Otherwise, just use the 1 stream */
+            unionStreams = streamsList.get(0);
+        }
+
+        /*
+         * Split each line of the union'd DStreams into multiple words using flatMap to produce the collection.
+         * Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR.
+         */
+        JavaDStream<String> words = unionStreams.flatMap(new FlatMapFunction<byte[], String>() {
+                @Override
+                public Iterable<String> call(byte[] line) {
+                    return Lists.newArrayList(WORD_SEPARATOR.split(new String(line)));
+                }
+            });
+
+        /* Map each word to a (word, 1) tuple, then reduce/aggregate by word. */
+        JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
+            new PairFunction<String, String, Integer>() {
+                @Override
+                public Tuple2<String, Integer> call(String s) {
+                    return new Tuple2<String, Integer>(s, 1);
+                }
+            }).reduceByKey(new Function2<Integer, Integer, Integer>() {
+                @Override
+                public Integer call(Integer i1, Integer i2) {
+                  return i1 + i2;
+                }
+            });
+
+        /* Print the first 10 wordCounts */
+        wordCounts.print();
+
+        /* Start the streaming context and await termination */
+        jssc.start();
+        jssc.awaitTermination();
+    }
+}
diff --git a/extras/kinesis-asl/src/main/resources/log4j.properties b/extras/kinesis-asl/src/main/resources/log4j.properties
new file mode 100644
index 0000000000000..97348fb5b6123
--- /dev/null
+++ b/extras/kinesis-asl/src/main/resources/log4j.properties
@@ -0,0 +1,37 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+log4j.rootCategory=WARN, console
+
+# File appender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+
+# Console appender
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.out
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.eclipse.jetty=WARN
+log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
\ No newline at end of file
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
new file mode 100644
index 0000000000000..32da0858d1a1d
--- /dev/null
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming
+
+import java.nio.ByteBuffer
+import scala.util.Random
+import org.apache.spark.Logging
+import org.apache.spark.SparkConf
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.Milliseconds
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
+import org.apache.spark.streaming.kinesis.KinesisUtils
+import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
+import com.amazonaws.services.kinesis.AmazonKinesisClient
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import com.amazonaws.services.kinesis.model.PutRecordRequest
+import org.apache.log4j.Logger
+import org.apache.log4j.Level
+
+/**
+ * Kinesis Spark Streaming WordCount example.
+ *
+ * See http://spark.apache.org/docs/latest/streaming-kinesis.html for more details on
+ *   the Kinesis Spark Streaming integration.
+ *
+ * This example spins up 1 Kinesis Worker (Spark Streaming Receiver) per shard 
+ *   for the given stream.
+ * It then starts pulling from the last checkpointed sequence number of the given 
+ *   <stream-name> and <endpoint-url>. 
+ *
+ * Valid endpoint urls:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
+ * 
+ * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials
+ *   in the following order of precedence:
+ * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
+ * Java System Properties - aws.accessKeyId and aws.secretKey
+ * Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
+ * Instance profile credentials - delivered through the Amazon EC2 metadata service
+ *
+ * Usage: KinesisWordCountASL <stream-name> <endpoint-url>
+ *   <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
+ *   <endpoint-url> is the endpoint of the Kinesis service
+ *     (ie. https://kinesis.us-east-1.amazonaws.com)
+ *
+ * Example:
+ *    $ export AWS_ACCESS_KEY_ID=<your-access-key>
+ *    $ export AWS_SECRET_KEY=<your-secret-key>
+ *    $ $SPARK_HOME/bin/run-example \
+ *        org.apache.spark.examples.streaming.KinesisWordCountASL mySparkStream \
+ *        https://kinesis.us-east-1.amazonaws.com
+ *
+ * 
+ * Note that number of workers/threads should be 1 more than the number of receivers.
+ * This leaves one thread available for actually processing the data.
+ *
+ * There is a companion helper class below called KinesisWordCountProducerASL which puts
+ *   dummy data onto the Kinesis stream.
+ * Usage instructions for KinesisWordCountProducerASL are provided in that class definition.
+ */
+private object KinesisWordCountASL extends Logging {
+  def main(args: Array[String]) {
+    /* Check that all required args were passed in. */
+    if (args.length < 2) {
+      System.err.println(
+        """
+          |Usage: KinesisWordCount <stream-name> <endpoint-url>
+          |    <stream-name> is the name of the Kinesis stream
+          |    <endpoint-url> is the endpoint of the Kinesis service
+          |                   (e.g. https://kinesis.us-east-1.amazonaws.com)
+        """.stripMargin)
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    /* Populate the appropriate variables from the given args */
+    val Array(streamName, endpointUrl) = args
+
+    /* Determine the number of shards from the stream */
+    val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
+    kinesisClient.setEndpoint(endpointUrl)
+    val numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards()
+      .size()
+
+    /* In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard. */
+    val numStreams = numShards
+
+    /* Setup the and SparkConfig and StreamingContext */
+    /* Spark Streaming batch interval */
+    val batchInterval = Milliseconds(2000)
+    val sparkConfig = new SparkConf().setAppName("KinesisWordCount")
+    val ssc = new StreamingContext(sparkConfig, batchInterval)
+
+    /* Kinesis checkpoint interval.  Same as batchInterval for this example. */
+    val kinesisCheckpointInterval = batchInterval
+
+    /* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
+    val kinesisStreams = (0 until numStreams).map { i =>
+      KinesisUtils.createStream(ssc, streamName, endpointUrl, kinesisCheckpointInterval,
+          InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2)
+    }
+
+    /* Union all the streams */
+    val unionStreams = ssc.union(kinesisStreams)
+
+    /* Convert each line of Array[Byte] to String, split into words, and count them */
+    val words = unionStreams.flatMap(byteArray => new String(byteArray)
+      .split(" "))
+
+    /* Map each word to a (word, 1) tuple so we can reduce/aggregate by key. */
+    val wordCounts = words.map(word => (word, 1)).reduceByKey(_ + _)
+
+    /* Print the first 10 wordCounts */
+    wordCounts.print()
+
+    /* Start the streaming context and await termination */
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+
+/**
+ * Usage: KinesisWordCountProducerASL <stream-name> <kinesis-endpoint-url>
+ *     <recordsPerSec> <wordsPerRecord>
+ *   <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
+ *   <kinesis-endpoint-url> is the endpoint of the Kinesis service
+ *     (ie. https://kinesis.us-east-1.amazonaws.com)
+ *   <records-per-sec> is the rate of records per second to put onto the stream
+ *   <words-per-record> is the rate of records per second to put onto the stream
+ *
+ * Example:
+ *    $ export AWS_ACCESS_KEY_ID=<your-access-key>
+ *    $ export AWS_SECRET_KEY=<your-secret-key>
+ *    $ $SPARK_HOME/bin/run-example \
+ *         org.apache.spark.examples.streaming.KinesisWordCountProducerASL mySparkStream \
+ *         https://kinesis.us-east-1.amazonaws.com 10 5
+ */
+private object KinesisWordCountProducerASL {
+  def main(args: Array[String]) {
+    if (args.length < 4) {
+      System.err.println("Usage: KinesisWordCountProducerASL <stream-name> <endpoint-url>" +
+          " <records-per-sec> <words-per-record>")
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    /* Populate the appropriate variables from the given args */
+    val Array(stream, endpoint, recordsPerSecond, wordsPerRecord) = args
+
+    /* Generate the records and return the totals */
+    val totals = generate(stream, endpoint, recordsPerSecond.toInt, wordsPerRecord.toInt)
+
+    /* Print the array of (index, total) tuples */
+    println("Totals")
+    totals.foreach(total => println(total.toString()))
+  }
+
+  def generate(stream: String,
+      endpoint: String,
+      recordsPerSecond: Int,
+      wordsPerRecord: Int): Seq[(Int, Int)] = {
+
+    val MaxRandomInts = 10
+
+    /* Create the Kinesis client */
+    val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
+    kinesisClient.setEndpoint(endpoint)
+
+    println(s"Putting records onto stream $stream and endpoint $endpoint at a rate of" +
+      s" $recordsPerSecond records per second and $wordsPerRecord words per record");
+
+    val totals = new Array[Int](MaxRandomInts)
+    /* Put String records onto the stream per the given recordPerSec and wordsPerRecord */
+    for (i <- 1 to 5) {
+
+      /* Generate recordsPerSec records to put onto the stream */
+      val records = (1 to recordsPerSecond.toInt).map { recordNum =>
+        /* 
+         *  Randomly generate each wordsPerRec words between 0 (inclusive)
+         *  and MAX_RANDOM_INTS (exclusive) 
+         */
+        val data = (1 to wordsPerRecord.toInt).map(x => {
+          /* Generate the random int */
+          val randomInt = Random.nextInt(MaxRandomInts)
+
+          /* Keep track of the totals */
+          totals(randomInt) += 1
+
+          randomInt.toString()
+        }).mkString(" ")
+
+        /* Create a partitionKey based on recordNum */
+        val partitionKey = s"partitionKey-$recordNum"
+
+        /* Create a PutRecordRequest with an Array[Byte] version of the data */
+        val putRecordRequest = new PutRecordRequest().withStreamName(stream)
+            .withPartitionKey(partitionKey)
+            .withData(ByteBuffer.wrap(data.getBytes()));
+
+        /* Put the record onto the stream and capture the PutRecordResult */
+        val putRecordResult = kinesisClient.putRecord(putRecordRequest);
+      }
+
+      /* Sleep for a second */
+      Thread.sleep(1000)
+      println("Sent " + recordsPerSecond + " records")
+    }
+
+    /* Convert the totals to (index, total) tuple */
+    (0 to (MaxRandomInts - 1)).zip(totals)
+  }
+}
+
+/** 
+ *  Utility functions for Spark Streaming examples. 
+ *  This has been lifted from the examples/ project to remove the circular dependency.
+ */
+private[streaming] object StreamingExamples extends Logging {
+
+  /** Set reasonable logging levels for streaming if the user has not configured log4j. */
+  def setStreamingLogLevels() {
+    val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
+    if (!log4jInitialized) {
+      // We first log something to initialize Spark's default logging, then we override the
+      // logging level.
+      logInfo("Setting log level to [WARN] for streaming example." +
+        " To override add a custom log4j.properties to the classpath.")
+      Logger.getRootLogger.setLevel(Level.WARN)
+    }
+  }
+}
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala
new file mode 100644
index 0000000000000..0b80b611cdce7
--- /dev/null
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import org.apache.spark.Logging
+import org.apache.spark.streaming.Duration
+import org.apache.spark.streaming.util.Clock
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.streaming.util.SystemClock
+
+/**
+ * This is a helper class for managing checkpoint clocks.
+ *
+ * @param checkpointInterval 
+ * @param currentClock.  Default to current SystemClock if none is passed in (mocking purposes)
+ */
+private[kinesis] class KinesisCheckpointState(
+    checkpointInterval: Duration, 
+    currentClock: Clock = new SystemClock())
+  extends Logging {
+  
+  /* Initialize the checkpoint clock using the given currentClock + checkpointInterval millis */
+  val checkpointClock = new ManualClock()
+  checkpointClock.setTime(currentClock.currentTime() + checkpointInterval.milliseconds)
+
+  /**
+   * Check if it's time to checkpoint based on the current time and the derived time 
+   *   for the next checkpoint
+   *
+   * @return true if it's time to checkpoint
+   */
+  def shouldCheckpoint(): Boolean = {
+    new SystemClock().currentTime() > checkpointClock.currentTime()
+  }
+
+  /**
+   * Advance the checkpoint clock by the checkpoint interval.
+   */
+  def advanceCheckpoint() = {
+    checkpointClock.addToTime(checkpointInterval.milliseconds)
+  }
+}
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
new file mode 100644
index 0000000000000..1bd1f324298e7
--- /dev/null
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import java.net.InetAddress
+import java.util.UUID
+
+import org.apache.spark.Logging
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.Duration
+import org.apache.spark.streaming.receiver.Receiver
+
+import com.amazonaws.auth.AWSCredentialsProvider
+import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker
+
+/**
+ * Custom AWS Kinesis-specific implementation of Spark Streaming's Receiver.
+ * This implementation relies on the Kinesis Client Library (KCL) Worker as described here:
+ * https://github.com/awslabs/amazon-kinesis-client
+ * This is a custom receiver used with StreamingContext.receiverStream(Receiver) 
+ *   as described here:
+ *     http://spark.apache.org/docs/latest/streaming-custom-receivers.html
+ * Instances of this class will get shipped to the Spark Streaming Workers 
+ *   to run within a Spark Executor.
+ *
+ * @param appName  Kinesis application name. Kinesis Apps are mapped to Kinesis Streams
+ *                 by the Kinesis Client Library.  If you change the App name or Stream name,
+ *                 the KCL will throw errors.  This usually requires deleting the backing  
+ *                 DynamoDB table with the same name this Kinesis application.
+ * @param streamName   Kinesis stream name
+ * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+ * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
+ *                            See the Kinesis Spark Streaming documentation for more
+ *                            details on the different types of checkpoints.
+ * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
+ *                                 worker's initial starting position in the stream.
+ *                                 The values are either the beginning of the stream
+ *                                 per Kinesis' limit of 24 hours
+ *                                 (InitialPositionInStream.TRIM_HORIZON) or
+ *                                 the tip of the stream (InitialPositionInStream.LATEST).
+ * @param storageLevel Storage level to use for storing the received objects
+ *
+ * @return ReceiverInputDStream[Array[Byte]]   
+ */
+private[kinesis] class KinesisReceiver(
+    appName: String,
+    streamName: String,
+    endpointUrl: String,
+    checkpointInterval: Duration,
+    initialPositionInStream: InitialPositionInStream,
+    storageLevel: StorageLevel)
+  extends Receiver[Array[Byte]](storageLevel) with Logging { receiver =>
+
+  /*
+   * The following vars are built in the onStart() method which executes in the Spark Worker after
+   *   this code is serialized and shipped remotely.
+   */
+
+  /*
+   *  workerId should be based on the ip address of the actual Spark Worker where this code runs
+   *   (not the Driver's ip address.)
+   */
+  var workerId: String = null
+
+  /*
+   * This impl uses the DefaultAWSCredentialsProviderChain and searches for credentials 
+   *   in the following order of precedence:
+   * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
+   * Java System Properties - aws.accessKeyId and aws.secretKey
+   * Credential profiles file at the default location (~/.aws/credentials) shared by all 
+   *   AWS SDKs and the AWS CLI
+   * Instance profile credentials delivered through the Amazon EC2 metadata service
+   */
+  var credentialsProvider: AWSCredentialsProvider = null
+
+  /* KCL config instance. */
+  var kinesisClientLibConfiguration: KinesisClientLibConfiguration = null
+
+  /*
+   *  RecordProcessorFactory creates impls of IRecordProcessor.
+   *  IRecordProcessor adapts the KCL to our Spark KinesisReceiver via the 
+   *    IRecordProcessor.processRecords() method.
+   *  We're using our custom KinesisRecordProcessor in this case.
+   */
+  var recordProcessorFactory: IRecordProcessorFactory = null
+
+  /*
+   * Create a Kinesis Worker.
+   * This is the core client abstraction from the Kinesis Client Library (KCL).
+   * We pass the RecordProcessorFactory from above as well as the KCL config instance.
+   * A Kinesis Worker can process 1..* shards from the given stream - each with its 
+   *   own RecordProcessor.
+   */
+  var worker: Worker = null
+
+  /**
+   *  This is called when the KinesisReceiver starts and must be non-blocking.
+   *  The KCL creates and manages the receiving/processing thread pool through the Worker.run() 
+   *    method.
+   */
+  override def onStart() {
+    workerId = InetAddress.getLocalHost.getHostAddress() + ":" + UUID.randomUUID()
+    credentialsProvider = new DefaultAWSCredentialsProviderChain()
+    kinesisClientLibConfiguration = new KinesisClientLibConfiguration(appName, streamName,
+      credentialsProvider, workerId).withKinesisEndpoint(endpointUrl)
+      .withInitialPositionInStream(initialPositionInStream).withTaskBackoffTimeMillis(500)
+    recordProcessorFactory = new IRecordProcessorFactory {
+      override def createProcessor: IRecordProcessor = new KinesisRecordProcessor(receiver,
+        workerId, new KinesisCheckpointState(checkpointInterval))
+    }
+    worker = new Worker(recordProcessorFactory, kinesisClientLibConfiguration)
+    worker.run()
+    logInfo(s"Started receiver with workerId $workerId")
+  }
+
+  /**
+   *  This is called when the KinesisReceiver stops.
+   *  The KCL worker.shutdown() method stops the receiving/processing threads.
+   *  The KCL will do its best to drain and checkpoint any in-flight records upon shutdown.
+   */
+  override def onStop() {
+    worker.shutdown()
+    logInfo(s"Shut down receiver with workerId $workerId")
+    workerId = null
+    credentialsProvider = null
+    kinesisClientLibConfiguration = null
+    recordProcessorFactory = null
+    worker = null
+  }
+}
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
new file mode 100644
index 0000000000000..8ecc2d90160b1
--- /dev/null
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import java.util.List
+
+import scala.collection.JavaConversions.asScalaBuffer
+import scala.util.Random
+
+import org.apache.spark.Logging
+
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibDependencyException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
+import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason
+import com.amazonaws.services.kinesis.model.Record
+
+/**
+ * Kinesis-specific implementation of the Kinesis Client Library (KCL) IRecordProcessor.
+ * This implementation operates on the Array[Byte] from the KinesisReceiver.
+ * The Kinesis Worker creates an instance of this KinesisRecordProcessor upon startup.
+ *
+ * @param receiver Kinesis receiver
+ * @param workerId for logging purposes
+ * @param checkpointState represents the checkpoint state including the next checkpoint time.
+ *   It's injected here for mocking purposes.
+ */
+private[kinesis] class KinesisRecordProcessor(
+    receiver: KinesisReceiver,
+    workerId: String,
+    checkpointState: KinesisCheckpointState) extends IRecordProcessor with Logging {
+
+  /* shardId to be populated during initialize() */
+  var shardId: String = _
+
+  /**
+   * The Kinesis Client Library calls this method during IRecordProcessor initialization.
+   *
+   * @param shardId assigned by the KCL to this particular RecordProcessor.
+   */
+  override def initialize(shardId: String) {
+    logInfo(s"Initialize:  Initializing workerId $workerId with shardId $shardId")
+    this.shardId = shardId
+  }
+
+  /**
+   * This method is called by the KCL when a batch of records is pulled from the Kinesis stream.
+   * This is the record-processing bridge between the KCL's IRecordProcessor.processRecords()
+   * and Spark Streaming's Receiver.store().
+   *
+   * @param batch list of records from the Kinesis stream shard
+   * @param checkpointer used to update Kinesis when this batch has been processed/stored 
+   *   in the DStream
+   */
+  override def processRecords(batch: List[Record], checkpointer: IRecordProcessorCheckpointer) {
+    if (!receiver.isStopped()) {
+      try {
+        /*
+         * Note:  If we try to store the raw ByteBuffer from record.getData(), the Spark Streaming
+         * Receiver.store(ByteBuffer) attempts to deserialize the ByteBuffer using the
+         *   internally-configured Spark serializer (kryo, etc).
+         * This is not desirable, so we instead store a raw Array[Byte] and decouple
+         *   ourselves from Spark's internal serialization strategy.
+         */
+        batch.foreach(record => receiver.store(record.getData().array()))
+        
+        logDebug(s"Stored:  Worker $workerId stored ${batch.size} records for shardId $shardId")
+
+        /*
+         * Checkpoint the sequence number of the last record successfully processed/stored 
+         *   in the batch.
+         * In this implementation, we're checkpointing after the given checkpointIntervalMillis.
+         * Note that this logic requires that processRecords() be called AND that it's time to 
+         *   checkpoint.  I point this out because there is no background thread running the 
+         *   checkpointer.  Checkpointing is tested and trigger only when a new batch comes in.
+         * If the worker is shutdown cleanly, checkpoint will happen (see shutdown() below).
+         * However, if the worker dies unexpectedly, a checkpoint may not happen.
+         * This could lead to records being processed more than once.
+         */
+        if (checkpointState.shouldCheckpoint()) {
+          /* Perform the checkpoint */
+          KinesisRecordProcessor.retryRandom(checkpointer.checkpoint(), 4, 100)
+
+          /* Update the next checkpoint time */
+          checkpointState.advanceCheckpoint()
+
+          logDebug(s"Checkpoint:  WorkerId $workerId completed checkpoint of ${batch.size}" +
+              s" records for shardId $shardId")
+          logDebug(s"Checkpoint:  Next checkpoint is at " +
+              s" ${checkpointState.checkpointClock.currentTime()} for shardId $shardId")
+        }
+      } catch {
+        case e: Throwable => {
+          /*
+           *  If there is a failure within the batch, the batch will not be checkpointed.
+           *  This will potentially cause records since the last checkpoint to be processed
+           *     more than once.
+           */
+          logError(s"Exception:  WorkerId $workerId encountered and exception while storing " +
+              " or checkpointing a batch for workerId $workerId and shardId $shardId.", e)
+
+          /* Rethrow the exception to the Kinesis Worker that is managing this RecordProcessor.*/
+          throw e
+        }
+      }
+    } else {
+      /* RecordProcessor has been stopped. */
+      logInfo(s"Stopped:  The Spark KinesisReceiver has stopped for workerId $workerId" + 
+          s" and shardId $shardId.  No more records will be processed.")
+    }
+  }
+
+  /**
+   * Kinesis Client Library is shutting down this Worker for 1 of 2 reasons:
+   * 1) the stream is resharding by splitting or merging adjacent shards 
+   *     (ShutdownReason.TERMINATE)
+   * 2) the failed or latent Worker has stopped sending heartbeats for whatever reason 
+   *     (ShutdownReason.ZOMBIE)
+   *
+   * @param checkpointer used to perform a Kinesis checkpoint for ShutdownReason.TERMINATE
+   * @param reason for shutdown (ShutdownReason.TERMINATE or ShutdownReason.ZOMBIE)
+   */
+  override def shutdown(checkpointer: IRecordProcessorCheckpointer, reason: ShutdownReason) {
+    logInfo(s"Shutdown:  Shutting down workerId $workerId with reason $reason")
+    reason match {
+      /*
+       * TERMINATE Use Case.  Checkpoint.
+       * Checkpoint to indicate that all records from the shard have been drained and processed.
+       * It's now OK to read from the new shards that resulted from a resharding event.
+       */
+      case ShutdownReason.TERMINATE => 
+        KinesisRecordProcessor.retryRandom(checkpointer.checkpoint(), 4, 100)
+
+      /*
+       * ZOMBIE Use Case.  NoOp.
+       * No checkpoint because other workers may have taken over and already started processing
+       *    the same records.
+       * This may lead to records being processed more than once.
+       */
+      case ShutdownReason.ZOMBIE =>
+
+      /* Unknown reason.  NoOp */
+      case _ =>
+    }
+  }
+}
+
+private[kinesis] object KinesisRecordProcessor extends Logging {
+  /**
+   * Retry the given amount of times with a random backoff time (millis) less than the
+   *   given maxBackOffMillis
+   *
+   * @param expression expression to evalute
+   * @param numRetriesLeft number of retries left
+   * @param maxBackOffMillis: max millis between retries
+   *
+   * @return evaluation of the given expression
+   * @throws Unretryable exception, unexpected exception,
+   *  or any exception that persists after numRetriesLeft reaches 0
+   */
+  @annotation.tailrec
+  def retryRandom[T](expression: => T, numRetriesLeft: Int, maxBackOffMillis: Int): T = {
+    util.Try { expression } match {
+      /* If the function succeeded, evaluate to x. */
+      case util.Success(x) => x
+      /* If the function failed, either retry or throw the exception */
+      case util.Failure(e) => e match {
+        /* Retry:  Throttling or other Retryable exception has occurred */
+        case _: ThrottlingException | _: KinesisClientLibDependencyException if numRetriesLeft > 1
+          => {
+               val backOffMillis = Random.nextInt(maxBackOffMillis)
+               Thread.sleep(backOffMillis)
+               logError(s"Retryable Exception:  Random backOffMillis=${backOffMillis}", e)
+               retryRandom(expression, numRetriesLeft - 1, maxBackOffMillis)
+             }
+        /* Throw:  Shutdown has been requested by the Kinesis Client Library.*/
+        case _: ShutdownException => {
+          logError(s"ShutdownException:  Caught shutdown exception, skipping checkpoint.", e)
+          throw e
+        }
+        /* Throw:  Non-retryable exception has occurred with the Kinesis Client Library */
+        case _: InvalidStateException => {
+          logError(s"InvalidStateException:  Cannot save checkpoint to the DynamoDB table used" +
+              s" by the Amazon Kinesis Client Library.  Table likely doesn't exist.", e)
+          throw e
+        }
+        /* Throw:  Unexpected exception has occurred */
+        case _ => {
+          logError(s"Unexpected, non-retryable exception.", e)
+          throw e
+        }
+      }
+    }
+  }
+}
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
new file mode 100644
index 0000000000000..96f4399accd3a
--- /dev/null
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.Duration
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream
+import org.apache.spark.streaming.api.java.JavaStreamingContext
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+
+
+/**
+ * Helper class to create Amazon Kinesis Input Stream
+ * :: Experimental ::
+ */
+@Experimental
+object KinesisUtils {
+  /**
+   * Create an InputDStream that pulls messages from a Kinesis stream.
+   * :: Experimental ::
+   * @param ssc    StreamingContext object
+   * @param streamName   Kinesis stream name
+   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
+   *                            See the Kinesis Spark Streaming documentation for more
+   *                            details on the different types of checkpoints.
+   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
+   *                                 worker's initial starting position in the stream.
+   *                                 The values are either the beginning of the stream
+   *                                 per Kinesis' limit of 24 hours
+   *                                 (InitialPositionInStream.TRIM_HORIZON) or
+   *                                 the tip of the stream (InitialPositionInStream.LATEST).
+   * @param storageLevel Storage level to use for storing the received objects
+   *
+   * @return ReceiverInputDStream[Array[Byte]]
+   */
+  @Experimental
+  def createStream(
+      ssc: StreamingContext,
+      streamName: String,
+      endpointUrl: String,
+      checkpointInterval: Duration,
+      initialPositionInStream: InitialPositionInStream,
+      storageLevel: StorageLevel): ReceiverInputDStream[Array[Byte]] = {
+    ssc.receiverStream(new KinesisReceiver(ssc.sc.appName, streamName, endpointUrl,
+        checkpointInterval, initialPositionInStream, storageLevel))
+  }
+
+  /**
+   * Create a Java-friendly InputDStream that pulls messages from a Kinesis stream.
+   * :: Experimental ::
+   * @param jssc Java StreamingContext object
+   * @param streamName   Kinesis stream name
+   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
+   *                            See the Kinesis Spark Streaming documentation for more
+   *                            details on the different types of checkpoints.
+   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
+   *                                 worker's initial starting position in the stream.
+   *                                 The values are either the beginning of the stream
+   *                                 per Kinesis' limit of 24 hours
+   *                                 (InitialPositionInStream.TRIM_HORIZON) or
+   *                                 the tip of the stream (InitialPositionInStream.LATEST).
+   * @param storageLevel Storage level to use for storing the received objects
+   *
+   * @return JavaReceiverInputDStream[Array[Byte]]
+   */
+  @Experimental
+  def createStream(
+      jssc: JavaStreamingContext, 
+      streamName: String, 
+      endpointUrl: String, 
+      checkpointInterval: Duration,
+      initialPositionInStream: InitialPositionInStream,
+      storageLevel: StorageLevel): JavaReceiverInputDStream[Array[Byte]] = {
+    jssc.receiverStream(new KinesisReceiver(jssc.ssc.sc.appName, streamName,
+        endpointUrl, checkpointInterval, initialPositionInStream, storageLevel))
+  }
+}
diff --git a/extras/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java b/extras/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java
new file mode 100644
index 0000000000000..87954a31f60ce
--- /dev/null
+++ b/extras/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kinesis;
+
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.LocalJavaStreamingContext;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.junit.Test;
+
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
+
+/**
+ * Demonstrate the use of the KinesisUtils Java API
+ */
+public class JavaKinesisStreamSuite extends LocalJavaStreamingContext {
+  @Test
+  public void testKinesisStream() {
+    // Tests the API, does not actually test data receiving
+    JavaDStream<byte[]> kinesisStream = KinesisUtils.createStream(ssc, "mySparkStream",
+        "https://kinesis.us-west-2.amazonaws.com", new Duration(2000), 
+        InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2());
+    
+    ssc.stop();
+  }
+}
diff --git a/extras/kinesis-asl/src/test/resources/log4j.properties b/extras/kinesis-asl/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..d9d08f68687d3
--- /dev/null
+++ b/extras/kinesis-asl/src/test/resources/log4j.properties
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+log4j.rootCategory=INFO, file
+# log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
new file mode 100644
index 0000000000000..41dbd64c2b1fa
--- /dev/null
+++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import java.nio.ByteBuffer
+
+import scala.collection.JavaConversions.seqAsJavaList
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.Milliseconds
+import org.apache.spark.streaming.Seconds
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.TestSuiteBase
+import org.apache.spark.streaming.util.Clock
+import org.apache.spark.streaming.util.ManualClock
+import org.scalatest.BeforeAndAfter
+import org.scalatest.Matchers
+import org.scalatest.mock.EasyMockSugar
+
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibDependencyException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason
+import com.amazonaws.services.kinesis.model.Record
+
+/**
+ *  Suite of Kinesis streaming receiver tests focusing mostly on the KinesisRecordProcessor 
+ */
+class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAfter
+    with EasyMockSugar {
+
+  val app = "TestKinesisReceiver"
+  val stream = "mySparkStream"
+  val endpoint = "endpoint-url"
+  val workerId = "dummyWorkerId"
+  val shardId = "dummyShardId"
+
+  val record1 = new Record()
+  record1.setData(ByteBuffer.wrap("Spark In Action".getBytes()))
+  val record2 = new Record()
+  record2.setData(ByteBuffer.wrap("Learning Spark".getBytes()))
+  val batch = List[Record](record1, record2)
+
+  var receiverMock: KinesisReceiver = _
+  var checkpointerMock: IRecordProcessorCheckpointer = _
+  var checkpointClockMock: ManualClock = _
+  var checkpointStateMock: KinesisCheckpointState = _
+  var currentClockMock: Clock = _
+
+  override def beforeFunction() = {
+    receiverMock = mock[KinesisReceiver]
+    checkpointerMock = mock[IRecordProcessorCheckpointer]
+    checkpointClockMock = mock[ManualClock]
+    checkpointStateMock = mock[KinesisCheckpointState]
+    currentClockMock = mock[Clock]
+  }
+
+  test("kinesis utils api") {
+    val ssc = new StreamingContext(master, framework, batchDuration)
+    // Tests the API, does not actually test data receiving
+    val kinesisStream = KinesisUtils.createStream(ssc, "mySparkStream",
+      "https://kinesis.us-west-2.amazonaws.com", Seconds(2),
+      InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2);
+    ssc.stop()
+  }
+
+  test("process records including store and checkpoint") {
+    val expectedCheckpointIntervalMillis = 10
+    expecting {
+      receiverMock.isStopped().andReturn(false).once()
+      receiverMock.store(record1.getData().array()).once()
+      receiverMock.store(record2.getData().array()).once()
+      checkpointStateMock.shouldCheckpoint().andReturn(true).once()
+      checkpointerMock.checkpoint().once()
+      checkpointStateMock.advanceCheckpoint().once()
+    }
+    whenExecuting(receiverMock, checkpointerMock, checkpointStateMock) {
+      val recordProcessor = new KinesisRecordProcessor(receiverMock, workerId,
+          checkpointStateMock)
+      recordProcessor.processRecords(batch, checkpointerMock)
+    }
+  }
+
+  test("shouldn't store and checkpoint when receiver is stopped") {
+    expecting {
+      receiverMock.isStopped().andReturn(true).once()
+    }
+    whenExecuting(receiverMock, checkpointerMock, checkpointStateMock) {
+      val recordProcessor = new KinesisRecordProcessor(receiverMock, workerId,
+          checkpointStateMock)
+      recordProcessor.processRecords(batch, checkpointerMock)
+    }
+  }
+
+  test("shouldn't checkpoint when exception occurs during store") {
+    expecting {
+      receiverMock.isStopped().andReturn(false).once()
+      receiverMock.store(record1.getData().array()).andThrow(new RuntimeException()).once()
+    }
+    whenExecuting(receiverMock, checkpointerMock, checkpointStateMock) {
+      intercept[RuntimeException] {
+        val recordProcessor = new KinesisRecordProcessor(receiverMock, workerId,
+            checkpointStateMock)
+        recordProcessor.processRecords(batch, checkpointerMock)
+      }
+    }
+  }
+
+  test("should set checkpoint time to currentTime + checkpoint interval upon instantiation") {
+    expecting {
+      currentClockMock.currentTime().andReturn(0).once()
+    }
+    whenExecuting(currentClockMock) {
+    val checkpointIntervalMillis = 10
+    val checkpointState = new KinesisCheckpointState(Milliseconds(checkpointIntervalMillis), currentClockMock)
+    assert(checkpointState.checkpointClock.currentTime() == checkpointIntervalMillis)
+    }
+  }
+
+  test("should checkpoint if we have exceeded the checkpoint interval") {
+    expecting {
+      currentClockMock.currentTime().andReturn(0).once()
+    }
+    whenExecuting(currentClockMock) {
+      val checkpointState = new KinesisCheckpointState(Milliseconds(Long.MinValue), currentClockMock)
+      assert(checkpointState.shouldCheckpoint())
+    }
+  }
+
+  test("shouldn't checkpoint if we have not exceeded the checkpoint interval") {
+    expecting {
+      currentClockMock.currentTime().andReturn(0).once()
+    }
+    whenExecuting(currentClockMock) {
+      val checkpointState = new KinesisCheckpointState(Milliseconds(Long.MaxValue), currentClockMock)
+      assert(!checkpointState.shouldCheckpoint())
+    }
+  }
+
+  test("should add to time when advancing checkpoint") {
+    expecting {
+      currentClockMock.currentTime().andReturn(0).once()
+    }
+    whenExecuting(currentClockMock) {
+      val checkpointIntervalMillis = 10
+      val checkpointState = new KinesisCheckpointState(Milliseconds(checkpointIntervalMillis), currentClockMock)
+      assert(checkpointState.checkpointClock.currentTime() == checkpointIntervalMillis)
+      checkpointState.advanceCheckpoint()
+      assert(checkpointState.checkpointClock.currentTime() == (2 * checkpointIntervalMillis))
+    }
+  }
+
+  test("shutdown should checkpoint if the reason is TERMINATE") {
+    expecting {
+      checkpointerMock.checkpoint().once()
+    }
+    whenExecuting(checkpointerMock, checkpointStateMock) {
+      val recordProcessor = new KinesisRecordProcessor(receiverMock, workerId, 
+          checkpointStateMock)
+      val reason = ShutdownReason.TERMINATE
+      recordProcessor.shutdown(checkpointerMock, reason)
+    }
+  }
+
+  test("shutdown should not checkpoint if the reason is something other than TERMINATE") {
+    expecting {
+    }
+    whenExecuting(checkpointerMock, checkpointStateMock) {
+      val recordProcessor = new KinesisRecordProcessor(receiverMock, workerId, 
+          checkpointStateMock)
+      recordProcessor.shutdown(checkpointerMock, ShutdownReason.ZOMBIE)
+      recordProcessor.shutdown(checkpointerMock, null)
+    }
+  }
+
+  test("retry success on first attempt") {
+    val expectedIsStopped = false
+    expecting {
+      receiverMock.isStopped().andReturn(expectedIsStopped).once()
+    }
+    whenExecuting(receiverMock) {
+      val actualVal = KinesisRecordProcessor.retryRandom(receiverMock.isStopped(), 2, 100)
+      assert(actualVal == expectedIsStopped)
+    }
+  }
+
+  test("retry success on second attempt after a Kinesis throttling exception") {
+    val expectedIsStopped = false
+    expecting {
+      receiverMock.isStopped().andThrow(new ThrottlingException("error message"))
+        .andReturn(expectedIsStopped).once()
+    }
+    whenExecuting(receiverMock) {
+      val actualVal = KinesisRecordProcessor.retryRandom(receiverMock.isStopped(), 2, 100)
+      assert(actualVal == expectedIsStopped)
+    }
+  }
+
+  test("retry success on second attempt after a Kinesis dependency exception") {
+    val expectedIsStopped = false
+    expecting {
+      receiverMock.isStopped().andThrow(new KinesisClientLibDependencyException("error message"))
+        .andReturn(expectedIsStopped).once()
+    }
+    whenExecuting(receiverMock) {
+      val actualVal = KinesisRecordProcessor.retryRandom(receiverMock.isStopped(), 2, 100)
+      assert(actualVal == expectedIsStopped)
+    }
+  }
+
+  test("retry failed after a shutdown exception") {
+    expecting {
+      checkpointerMock.checkpoint().andThrow(new ShutdownException("error message")).once()
+    }
+    whenExecuting(checkpointerMock) {
+      intercept[ShutdownException] {
+        KinesisRecordProcessor.retryRandom(checkpointerMock.checkpoint(), 2, 100)
+      }
+    }
+  }
+
+  test("retry failed after an invalid state exception") {
+    expecting {
+      checkpointerMock.checkpoint().andThrow(new InvalidStateException("error message")).once()
+    }
+    whenExecuting(checkpointerMock) {
+      intercept[InvalidStateException] {
+        KinesisRecordProcessor.retryRandom(checkpointerMock.checkpoint(), 2, 100)
+      }
+    }
+  }
+
+  test("retry failed after unexpected exception") {
+    expecting {
+      checkpointerMock.checkpoint().andThrow(new RuntimeException("error message")).once()
+    }
+    whenExecuting(checkpointerMock) {
+      intercept[RuntimeException] {
+        KinesisRecordProcessor.retryRandom(checkpointerMock.checkpoint(), 2, 100)
+      }
+    }
+  }
+
+  test("retry failed after exhausing all retries") {
+    val expectedErrorMessage = "final try error message"
+    expecting {
+      checkpointerMock.checkpoint().andThrow(new ThrottlingException("error message"))
+        .andThrow(new ThrottlingException(expectedErrorMessage)).once()
+    }
+    whenExecuting(checkpointerMock) {
+      val exception = intercept[RuntimeException] {
+        KinesisRecordProcessor.retryRandom(checkpointerMock.checkpoint(), 2, 100)
+      }
+      exception.getMessage().shouldBe(expectedErrorMessage)
+    }
+  }
+}
diff --git a/extras/spark-ganglia-lgpl/pom.xml b/extras/spark-ganglia-lgpl/pom.xml
index 22ea330b4374d..71a078d58a8d8 100644
--- a/extras/spark-ganglia-lgpl/pom.xml
+++ b/extras/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -29,7 +29,11 @@
   <artifactId>spark-ganglia-lgpl_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Ganglia Integration</name>
-  
+
+  <properties>
+    <sbt.project.name>ganglia-lgpl</sbt.project.name>
+  </properties>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala b/extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
index d03d7774e8c80..3b1880e143513 100644
--- a/extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
+++ b/extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
@@ -82,5 +82,9 @@ class GangliaSink(val property: Properties, val registry: MetricRegistry,
   override def stop() {
     reporter.stop()
   }
+
+  override def report() {
+    reporter.report()
+  }
 }
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 7d5d83e7f3bb9..3f49b1d63b6e1 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,12 +21,15 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-graphx_2.10</artifactId>
+  <properties>
+    <sbt.project.name>graphx</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project GraphX</name>
   <url>http://spark.apache.org/</url>
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala
new file mode 100644
index 0000000000000..f70715fca6eea
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+/**
+ * Represents an edge along with its neighboring vertices and allows sending messages along the
+ * edge. Used in [[Graph#aggregateMessages]].
+ */
+abstract class EdgeContext[VD, ED, A] {
+  /** The vertex id of the edge's source vertex. */
+  def srcId: VertexId
+  /** The vertex id of the edge's destination vertex. */
+  def dstId: VertexId
+  /** The vertex attribute of the edge's source vertex. */
+  def srcAttr: VD
+  /** The vertex attribute of the edge's destination vertex. */
+  def dstAttr: VD
+  /** The attribute associated with the edge. */
+  def attr: ED
+
+  /** Sends a message to the source vertex. */
+  def sendToSrc(msg: A): Unit
+  /** Sends a message to the destination vertex. */
+  def sendToDst(msg: A): Unit
+
+  /** Converts the edge and vertex properties into an [[EdgeTriplet]] for convenience. */
+  def toEdgeTriplet: EdgeTriplet[VD, ED] = {
+    val et = new EdgeTriplet[VD, ED]
+    et.srcId = srcId
+    et.srcAttr = srcAttr
+    et.dstId = dstId
+    et.dstAttr = dstAttr
+    et.attr = attr
+    et
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
index 899a3cbd62b60..869ef15893eb9 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
@@ -17,14 +17,18 @@
 
 package org.apache.spark.graphx
 
-import scala.reflect.{classTag, ClassTag}
+import scala.reflect.ClassTag
 
-import org.apache.spark.{OneToOneDependency, Partition, Partitioner, TaskContext}
+import org.apache.spark.Dependency
+import org.apache.spark.Partition
+import org.apache.spark.SparkContext
+import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
 import org.apache.spark.graphx.impl.EdgePartition
 import org.apache.spark.graphx.impl.EdgePartitionBuilder
+import org.apache.spark.graphx.impl.EdgeRDDImpl
 
 /**
  * `EdgeRDD[ED, VD]` extends `RDD[Edge[ED]]` by storing the edges in columnar format on each
@@ -32,23 +36,14 @@ import org.apache.spark.graphx.impl.EdgePartitionBuilder
  * edge to provide the triplet view. Shipping of the vertex attributes is managed by
  * `impl.ReplicatedVertexView`.
  */
-class EdgeRDD[@specialized ED: ClassTag, VD: ClassTag](
-    val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])],
-    val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
-  extends RDD[Edge[ED]](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+abstract class EdgeRDD[ED, VD](
+    @transient sc: SparkContext,
+    @transient deps: Seq[Dependency[_]]) extends RDD[Edge[ED]](sc, deps) {
 
-  partitionsRDD.setName("EdgeRDD")
+  private[graphx] def partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])]
 
   override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
 
-  /**
-   * If `partitionsRDD` already has a partitioner, use it. Otherwise assume that the
-   * [[PartitionID]]s in `partitionsRDD` correspond to the actual partitions and create a new
-   * partitioner that allows co-partitioning with `partitionsRDD`.
-   */
-  override val partitioner =
-    partitionsRDD.partitioner.orElse(Some(Partitioner.defaultPartitioner(partitionsRDD)))
-
   override def compute(part: Partition, context: TaskContext): Iterator[Edge[ED]] = {
     val p = firstParent[(PartitionID, EdgePartition[ED, VD])].iterator(part, context)
     if (p.hasNext) {
@@ -58,40 +53,6 @@ class EdgeRDD[@specialized ED: ClassTag, VD: ClassTag](
     }
   }
 
-  override def collect(): Array[Edge[ED]] = this.map(_.copy()).collect()
-
-  /**
-   * Persists the edge partitions at the specified storage level, ignoring any existing target
-   * storage level.
-   */
-  override def persist(newLevel: StorageLevel): this.type = {
-    partitionsRDD.persist(newLevel)
-    this
-  }
-
-  override def unpersist(blocking: Boolean = true): this.type = {
-    partitionsRDD.unpersist(blocking)
-    this
-  }
-
-  /** Persists the vertex partitions using `targetStorageLevel`, which defaults to MEMORY_ONLY. */
-  override def cache(): this.type = {
-    partitionsRDD.persist(targetStorageLevel)
-    this
-  }
-
-  private[graphx] def mapEdgePartitions[ED2: ClassTag, VD2: ClassTag](
-      f: (PartitionID, EdgePartition[ED, VD]) => EdgePartition[ED2, VD2]): EdgeRDD[ED2, VD2] = {
-    this.withPartitionsRDD[ED2, VD2](partitionsRDD.mapPartitions({ iter =>
-      if (iter.hasNext) {
-        val (pid, ep) = iter.next()
-        Iterator(Tuple2(pid, f(pid, ep)))
-      } else {
-        Iterator.empty
-      }
-    }, preservesPartitioning = true))
-  }
-
   /**
    * Map the values in an edge partitioning preserving the structure but changing the values.
    *
@@ -99,22 +60,19 @@ class EdgeRDD[@specialized ED: ClassTag, VD: ClassTag](
    * @param f the function from an edge to a new edge value
    * @return a new EdgeRDD containing the new edge values
    */
-  def mapValues[ED2: ClassTag](f: Edge[ED] => ED2): EdgeRDD[ED2, VD] =
-    mapEdgePartitions((pid, part) => part.map(f))
+  def mapValues[ED2: ClassTag](f: Edge[ED] => ED2): EdgeRDD[ED2, VD]
 
   /**
    * Reverse all the edges in this RDD.
    *
    * @return a new EdgeRDD containing all the edges reversed
    */
-  def reverse: EdgeRDD[ED, VD] = mapEdgePartitions((pid, part) => part.reverse)
+  def reverse: EdgeRDD[ED, VD]
 
   /** Removes all edges but those matching `epred` and where both vertices match `vpred`. */
   def filter(
       epred: EdgeTriplet[VD, ED] => Boolean,
-      vpred: (VertexId, VD) => Boolean): EdgeRDD[ED, VD] = {
-    mapEdgePartitions((pid, part) => part.filter(epred, vpred))
-  }
+      vpred: (VertexId, VD) => Boolean): EdgeRDD[ED, VD]
 
   /**
    * Inner joins this EdgeRDD with another EdgeRDD, assuming both are partitioned using the same
@@ -127,22 +85,14 @@ class EdgeRDD[@specialized ED: ClassTag, VD: ClassTag](
    */
   def innerJoin[ED2: ClassTag, ED3: ClassTag]
       (other: EdgeRDD[ED2, _])
-      (f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3, VD] = {
-    val ed2Tag = classTag[ED2]
-    val ed3Tag = classTag[ED3]
-    this.withPartitionsRDD[ED3, VD](partitionsRDD.zipPartitions(other.partitionsRDD, true) {
-      (thisIter, otherIter) =>
-        val (pid, thisEPart) = thisIter.next()
-        val (_, otherEPart) = otherIter.next()
-        Iterator(Tuple2(pid, thisEPart.innerJoin(otherEPart)(f)(ed2Tag, ed3Tag)))
-    })
-  }
+      (f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3, VD]
 
-  /** Replaces the vertex partitions while preserving all other properties of the VertexRDD. */
+  private[graphx] def mapEdgePartitions[ED2: ClassTag, VD2: ClassTag](
+      f: (PartitionID, EdgePartition[ED, VD]) => EdgePartition[ED2, VD2]): EdgeRDD[ED2, VD2]
+
+  /** Replaces the edge partitions while preserving all other properties of the EdgeRDD. */
   private[graphx] def withPartitionsRDD[ED2: ClassTag, VD2: ClassTag](
-      partitionsRDD: RDD[(PartitionID, EdgePartition[ED2, VD2])]): EdgeRDD[ED2, VD2] = {
-    new EdgeRDD(partitionsRDD, this.targetStorageLevel)
-  }
+      partitionsRDD: RDD[(PartitionID, EdgePartition[ED2, VD2])]): EdgeRDD[ED2, VD2]
 
   /**
    * Changes the target storage level while preserving all other properties of the
@@ -151,11 +101,7 @@ class EdgeRDD[@specialized ED: ClassTag, VD: ClassTag](
    * This does not actually trigger a cache; to do this, call
    * [[org.apache.spark.graphx.EdgeRDD#cache]] on the returned EdgeRDD.
    */
-  private[graphx] def withTargetStorageLevel(
-      targetStorageLevel: StorageLevel): EdgeRDD[ED, VD] = {
-    new EdgeRDD(this.partitionsRDD, targetStorageLevel)
-  }
-
+  private[graphx] def withTargetStorageLevel(targetStorageLevel: StorageLevel): EdgeRDD[ED, VD]
 }
 
 object EdgeRDD {
@@ -184,6 +130,6 @@ object EdgeRDD {
    */
   def fromEdgePartitions[ED: ClassTag, VD: ClassTag](
       edgePartitions: RDD[(Int, EdgePartition[ED, VD])]): EdgeRDD[ED, VD] = {
-    new EdgeRDD(edgePartitions)
+    new EdgeRDDImpl(edgePartitions)
   }
 }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 4db45c9af8fae..2c1b9518a3d16 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -107,14 +107,16 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
   /**
    * Repartitions the edges in the graph according to `partitionStrategy`.
    *
-   * @param the partitioning strategy to use when partitioning the edges in the graph.
+   * @param partitionStrategy the partitioning strategy to use when partitioning the edges
+   * in the graph.
    */
   def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED]
 
   /**
    * Repartitions the edges in the graph according to `partitionStrategy`.
    *
-   * @param the partitioning strategy to use when partitioning the edges in the graph.
+   * @param partitionStrategy the partitioning strategy to use when partitioning the edges
+   * in the graph.
    * @param numPartitions the number of edge partitions in the new graph.
    */
   def partitionBy(partitionStrategy: PartitionStrategy, numPartitions: Int): Graph[VD, ED]
@@ -206,7 +208,37 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    *
    */
   def mapTriplets[ED2: ClassTag](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2] = {
-    mapTriplets((pid, iter) => iter.map(map))
+    mapTriplets((pid, iter) => iter.map(map), TripletFields.All)
+  }
+
+  /**
+   * Transforms each edge attribute using the map function, passing it the adjacent vertex
+   * attributes as well. If adjacent vertex values are not required,
+   * consider using `mapEdges` instead.
+   *
+   * @note This does not change the structure of the
+   * graph or modify the values of this graph.  As a consequence
+   * the underlying index structures can be reused.
+   *
+   * @param map the function from an edge object to a new edge value.
+   * @param tripletFields which fields should be included in the edge triplet passed to the map
+   *   function. If not all fields are needed, specifying this can improve performance.
+   *
+   * @tparam ED2 the new edge data type
+   *
+   * @example This function might be used to initialize edge
+   * attributes based on the attributes associated with each vertex.
+   * {{{
+   * val rawGraph: Graph[Int, Int] = someLoadFunction()
+   * val graph = rawGraph.mapTriplets[Int]( edge =>
+   *   edge.src.data - edge.dst.data)
+   * }}}
+   *
+   */
+  def mapTriplets[ED2: ClassTag](
+      map: EdgeTriplet[VD, ED] => ED2,
+      tripletFields: TripletFields): Graph[VD, ED2] = {
+    mapTriplets((pid, iter) => iter.map(map), tripletFields)
   }
 
   /**
@@ -221,12 +253,15 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    * the underlying index structures can be reused.
    *
    * @param map the iterator transform
+   * @param tripletFields which fields should be included in the edge triplet passed to the map
+   *   function. If not all fields are needed, specifying this can improve performance.
    *
    * @tparam ED2 the new edge data type
    *
    */
-  def mapTriplets[ED2: ClassTag](map: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2])
-    : Graph[VD, ED2]
+  def mapTriplets[ED2: ClassTag](
+      map: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2],
+      tripletFields: TripletFields): Graph[VD, ED2]
 
   /**
    * Reverses all edges in the graph.  If this graph contains an edge from a to b then the returned
@@ -285,6 +320,8 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    * "sent" to either vertex in the edge.  The `reduceFunc` is then used to combine the output of
    * the map phase destined to each vertex.
    *
+   * This function is deprecated in 1.2.0 because of SPARK-3936. Use aggregateMessages instead.
+   *
    * @tparam A the type of "message" to be sent to each vertex
    *
    * @param mapFunc the user defined map function which returns 0 or
@@ -294,13 +331,15 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    * be commutative and associative and is used to combine the output
    * of the map phase
    *
-   * @param activeSetOpt optionally, a set of "active" vertices and a direction of edges to
-   * consider when running `mapFunc`. If the direction is `In`, `mapFunc` will only be run on
-   * edges with destination in the active set.  If the direction is `Out`,
-   * `mapFunc` will only be run on edges originating from vertices in the active set. If the
-   * direction is `Either`, `mapFunc` will be run on edges with *either* vertex in the active set
-   * . If the direction is `Both`, `mapFunc` will be run on edges with *both* vertices in the
-   * active set. The active set must have the same index as the graph's vertices.
+   * @param activeSetOpt an efficient way to run the aggregation on a subset of the edges if
+   * desired. This is done by specifying a set of "active" vertices and an edge direction. The
+   * `sendMsg` function will then run only on edges connected to active vertices by edges in the
+   * specified direction. If the direction is `In`, `sendMsg` will only be run on edges with
+   * destination in the active set. If the direction is `Out`, `sendMsg` will only be run on edges
+   * originating from vertices in the active set. If the direction is `Either`, `sendMsg` will be
+   * run on edges with *either* vertex in the active set. If the direction is `Both`, `sendMsg`
+   * will be run on edges with *both* vertices in the active set. The active set must have the
+   * same index as the graph's vertices.
    *
    * @example We can use this function to compute the in-degree of each
    * vertex
@@ -317,6 +356,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    * predicate or implement PageRank.
    *
    */
+  @deprecated("use aggregateMessages", "1.2.0")
   def mapReduceTriplets[A: ClassTag](
       mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
       reduceFunc: (A, A) => A,
@@ -324,8 +364,80 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
     : VertexRDD[A]
 
   /**
-   * Joins the vertices with entries in the `table` RDD and merges the results using `mapFunc`.  The
-   * input table should contain at most one entry for each vertex.  If no entry in `other` is
+   * Aggregates values from the neighboring edges and vertices of each vertex. The user-supplied
+   * `sendMsg` function is invoked on each edge of the graph, generating 0 or more messages to be
+   * sent to either vertex in the edge. The `mergeMsg` function is then used to combine all messages
+   * destined to the same vertex.
+   *
+   * @tparam A the type of message to be sent to each vertex
+   *
+   * @param sendMsg runs on each edge, sending messages to neighboring vertices using the
+   *   [[EdgeContext]].
+   * @param mergeMsg used to combine messages from `sendMsg` destined to the same vertex. This
+   *   combiner should be commutative and associative.
+   * @param tripletFields which fields should be included in the [[EdgeContext]] passed to the
+   *   `sendMsg` function. If not all fields are needed, specifying this can improve performance.
+   *
+   * @example We can use this function to compute the in-degree of each
+   * vertex
+   * {{{
+   * val rawGraph: Graph[_, _] = Graph.textFile("twittergraph")
+   * val inDeg: RDD[(VertexId, Int)] =
+   *   aggregateMessages[Int](ctx => ctx.sendToDst(1), _ + _)
+   * }}}
+   *
+   * @note By expressing computation at the edge level we achieve
+   * maximum parallelism.  This is one of the core functions in the
+   * Graph API in that enables neighborhood level computation. For
+   * example this function can be used to count neighbors satisfying a
+   * predicate or implement PageRank.
+   *
+   */
+  def aggregateMessages[A: ClassTag](
+      sendMsg: EdgeContext[VD, ED, A] => Unit,
+      mergeMsg: (A, A) => A,
+      tripletFields: TripletFields = TripletFields.All)
+    : VertexRDD[A] = {
+    aggregateMessagesWithActiveSet(sendMsg, mergeMsg, tripletFields, None)
+  }
+
+  /**
+   * Aggregates values from the neighboring edges and vertices of each vertex. The user-supplied
+   * `sendMsg` function is invoked on each edge of the graph, generating 0 or more messages to be
+   * sent to either vertex in the edge. The `mergeMsg` function is then used to combine all messages
+   * destined to the same vertex.
+   *
+   * This variant can take an active set to restrict the computation and is intended for internal
+   * use only.
+   *
+   * @tparam A the type of message to be sent to each vertex
+   *
+   * @param sendMsg runs on each edge, sending messages to neighboring vertices using the
+   *   [[EdgeContext]].
+   * @param mergeMsg used to combine messages from `sendMsg` destined to the same vertex. This
+   *   combiner should be commutative and associative.
+   * @param tripletFields which fields should be included in the [[EdgeContext]] passed to the
+   *   `sendMsg` function. If not all fields are needed, specifying this can improve performance.
+   * @param activeSetOpt an efficient way to run the aggregation on a subset of the edges if
+   *   desired. This is done by specifying a set of "active" vertices and an edge direction. The
+   *   `sendMsg` function will then run on only edges connected to active vertices by edges in the
+   *   specified direction. If the direction is `In`, `sendMsg` will only be run on edges with
+   *   destination in the active set. If the direction is `Out`, `sendMsg` will only be run on edges
+   *   originating from vertices in the active set. If the direction is `Either`, `sendMsg` will be
+   *   run on edges with *either* vertex in the active set. If the direction is `Both`, `sendMsg`
+   *   will be run on edges with *both* vertices in the active set. The active set must have the
+   *   same index as the graph's vertices.
+   */
+  private[graphx] def aggregateMessagesWithActiveSet[A: ClassTag](
+      sendMsg: EdgeContext[VD, ED, A] => Unit,
+      mergeMsg: (A, A) => A,
+      tripletFields: TripletFields,
+      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)])
+    : VertexRDD[A]
+
+  /**
+   * Joins the vertices with entries in the `table` RDD and merges the results using `mapFunc`.
+   * The input table should contain at most one entry for each vertex.  If no entry in `other` is
    * provided for a particular vertex in the graph, the map function receives `None`.
    *
    * @tparam U the type of entry in the table of updates
@@ -342,7 +454,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    *
    * {{{
    * val rawGraph: Graph[_, _] = Graph.textFile("webgraph")
-   * val outDeg: RDD[(VertexId, Int)] = rawGraph.outDegrees()
+   * val outDeg: RDD[(VertexId, Int)] = rawGraph.outDegrees
    * val graph = rawGraph.outerJoinVertices(outDeg) {
    *   (vid, data, optDeg) => optDeg.getOrElse(0)
    * }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
index f97f329c0e832..563c948957ecf 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
@@ -27,17 +27,14 @@ import org.apache.spark.graphx.impl._
 import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
 import org.apache.spark.util.collection.OpenHashSet
 
-
 /**
  * Registers GraphX classes with Kryo for improved performance.
  */
+@deprecated("Register GraphX classes with Kryo using GraphXUtils.registerKryoClasses", "1.2.0")
 class GraphKryoRegistrator extends KryoRegistrator {
 
   def registerClasses(kryo: Kryo) {
     kryo.register(classOf[Edge[Object]])
-    kryo.register(classOf[MessageToPartition[Object]])
-    kryo.register(classOf[VertexBroadcastMsg[Object]])
-    kryo.register(classOf[RoutingTableMessage])
     kryo.register(classOf[(VertexId, Object)])
     kryo.register(classOf[EdgePartition[Object, Object]])
     kryo.register(classOf[BitSet])
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
index f4c79365b16da..4933aecba1286 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
@@ -48,7 +48,8 @@ object GraphLoader extends Logging {
    * @param path the path to the file (e.g., /home/data/file or hdfs://file)
    * @param canonicalOrientation whether to orient edges in the positive
    *        direction
-   * @param minEdgePartitions the number of partitions for the edge RDD
+   * @param numEdgePartitions the number of partitions for the edge RDD
+   * Setting this value to -1 will use the default parallelism.
    * @param edgeStorageLevel the desired storage level for the edge partitions
    * @param vertexStorageLevel the desired storage level for the vertex partitions
    */
@@ -56,7 +57,7 @@ object GraphLoader extends Logging {
       sc: SparkContext,
       path: String,
       canonicalOrientation: Boolean = false,
-      minEdgePartitions: Int = 1,
+      numEdgePartitions: Int = -1,
       edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY,
       vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
     : Graph[Int, Int] =
@@ -64,7 +65,12 @@ object GraphLoader extends Logging {
     val startTime = System.currentTimeMillis
 
     // Parse the edge data table directly into edge partitions
-    val lines = sc.textFile(path, minEdgePartitions).coalesce(minEdgePartitions)
+    val lines =
+      if (numEdgePartitions > 0) {
+        sc.textFile(path, numEdgePartitions).coalesce(numEdgePartitions)
+      } else {
+        sc.textFile(path)
+      }
     val edges = lines.mapPartitionsWithIndex { (pid, iter) =>
       val builder = new EdgePartitionBuilder[Int, Int]
       iter.foreach { line =>
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index edd5b79da1522..d5150382d599b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -69,11 +69,12 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
    */
   private def degreesRDD(edgeDirection: EdgeDirection): VertexRDD[Int] = {
     if (edgeDirection == EdgeDirection.In) {
-      graph.mapReduceTriplets(et => Iterator((et.dstId,1)), _ + _)
+      graph.aggregateMessages(_.sendToDst(1), _ + _, TripletFields.None)
     } else if (edgeDirection == EdgeDirection.Out) {
-      graph.mapReduceTriplets(et => Iterator((et.srcId,1)), _ + _)
+      graph.aggregateMessages(_.sendToSrc(1), _ + _, TripletFields.None)
     } else { // EdgeDirection.Either
-      graph.mapReduceTriplets(et => Iterator((et.srcId,1), (et.dstId,1)), _ + _)
+      graph.aggregateMessages(ctx => { ctx.sendToSrc(1); ctx.sendToDst(1) }, _ + _,
+        TripletFields.None)
     }
   }
 
@@ -88,18 +89,17 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
   def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]] = {
     val nbrs =
       if (edgeDirection == EdgeDirection.Either) {
-        graph.mapReduceTriplets[Array[VertexId]](
-          mapFunc = et => Iterator((et.srcId, Array(et.dstId)), (et.dstId, Array(et.srcId))),
-          reduceFunc = _ ++ _
-        )
+        graph.aggregateMessages[Array[VertexId]](
+          ctx => { ctx.sendToSrc(Array(ctx.dstId)); ctx.sendToDst(Array(ctx.srcId)) },
+          _ ++ _, TripletFields.None)
       } else if (edgeDirection == EdgeDirection.Out) {
-        graph.mapReduceTriplets[Array[VertexId]](
-          mapFunc = et => Iterator((et.srcId, Array(et.dstId))),
-          reduceFunc = _ ++ _)
+        graph.aggregateMessages[Array[VertexId]](
+          ctx => ctx.sendToSrc(Array(ctx.dstId)),
+          _ ++ _, TripletFields.None)
       } else if (edgeDirection == EdgeDirection.In) {
-        graph.mapReduceTriplets[Array[VertexId]](
-          mapFunc = et => Iterator((et.dstId, Array(et.srcId))),
-          reduceFunc = _ ++ _)
+        graph.aggregateMessages[Array[VertexId]](
+          ctx => ctx.sendToDst(Array(ctx.srcId)),
+          _ ++ _, TripletFields.None)
       } else {
         throw new SparkException("It doesn't make sense to collect neighbor ids without a " +
           "direction. (EdgeDirection.Both is not supported; use EdgeDirection.Either instead.)")
@@ -122,22 +122,27 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
    * @return the vertex set of neighboring vertex attributes for each vertex
    */
   def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexId, VD)]] = {
-    val nbrs = graph.mapReduceTriplets[Array[(VertexId,VD)]](
-      edge => {
-        val msgToSrc = (edge.srcId, Array((edge.dstId, edge.dstAttr)))
-        val msgToDst = (edge.dstId, Array((edge.srcId, edge.srcAttr)))
-        edgeDirection match {
-          case EdgeDirection.Either => Iterator(msgToSrc, msgToDst)
-          case EdgeDirection.In => Iterator(msgToDst)
-          case EdgeDirection.Out => Iterator(msgToSrc)
-          case EdgeDirection.Both =>
-            throw new SparkException("collectNeighbors does not support EdgeDirection.Both. Use" +
-              "EdgeDirection.Either instead.")
-        }
-      },
-      (a, b) => a ++ b)
-
-    graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
+    val nbrs = edgeDirection match {
+      case EdgeDirection.Either =>
+        graph.aggregateMessages[Array[(VertexId,VD)]](
+          ctx => {
+            ctx.sendToSrc(Array((ctx.dstId, ctx.dstAttr)))
+            ctx.sendToDst(Array((ctx.srcId, ctx.srcAttr)))
+          },
+          (a, b) => a ++ b, TripletFields.SrcDstOnly)
+      case EdgeDirection.In =>
+        graph.aggregateMessages[Array[(VertexId,VD)]](
+          ctx => ctx.sendToDst(Array((ctx.srcId, ctx.srcAttr))),
+          (a, b) => a ++ b, TripletFields.SrcOnly)
+      case EdgeDirection.Out =>
+        graph.aggregateMessages[Array[(VertexId,VD)]](
+          ctx => ctx.sendToSrc(Array((ctx.dstId, ctx.dstAttr))),
+          (a, b) => a ++ b, TripletFields.DstOnly)
+      case EdgeDirection.Both =>
+        throw new SparkException("collectEdges does not support EdgeDirection.Both. Use" +
+          "EdgeDirection.Either instead.")
+    }
+    graph.vertices.leftJoin(nbrs) { (vid, vdata, nbrsOpt) =>
       nbrsOpt.getOrElse(Array.empty[(VertexId, VD)])
     }
   } // end of collectNeighbor
@@ -160,18 +165,20 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
   def collectEdges(edgeDirection: EdgeDirection): VertexRDD[Array[Edge[ED]]] = {
     edgeDirection match {
       case EdgeDirection.Either =>
-        graph.mapReduceTriplets[Array[Edge[ED]]](
-          edge => Iterator((edge.srcId, Array(new Edge(edge.srcId, edge.dstId, edge.attr))),
-                           (edge.dstId, Array(new Edge(edge.srcId, edge.dstId, edge.attr)))),
-          (a, b) => a ++ b)
+        graph.aggregateMessages[Array[Edge[ED]]](
+          ctx => {
+            ctx.sendToSrc(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr)))
+            ctx.sendToDst(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr)))
+          },
+          (a, b) => a ++ b, TripletFields.EdgeOnly)
       case EdgeDirection.In =>
-        graph.mapReduceTriplets[Array[Edge[ED]]](
-          edge => Iterator((edge.dstId, Array(new Edge(edge.srcId, edge.dstId, edge.attr)))),
-          (a, b) => a ++ b)
+        graph.aggregateMessages[Array[Edge[ED]]](
+          ctx => ctx.sendToDst(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr))),
+          (a, b) => a ++ b, TripletFields.EdgeOnly)
       case EdgeDirection.Out =>
-        graph.mapReduceTriplets[Array[Edge[ED]]](
-          edge => Iterator((edge.srcId, Array(new Edge(edge.srcId, edge.dstId, edge.attr)))),
-          (a, b) => a ++ b)
+        graph.aggregateMessages[Array[Edge[ED]]](
+          ctx => ctx.sendToSrc(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr))),
+          (a, b) => a ++ b, TripletFields.EdgeOnly)
       case EdgeDirection.Both =>
         throw new SparkException("collectEdges does not support EdgeDirection.Both. Use" +
           "EdgeDirection.Either instead.")
@@ -198,10 +205,10 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
    *
    * {{{
    * val rawGraph: Graph[Int, Int] = GraphLoader.edgeListFile(sc, "webgraph")
-   *   .mapVertices(v => 0)
-   * val outDeg: RDD[(Int, Int)] = rawGraph.outDegrees
-   * val graph = rawGraph.leftJoinVertices[Int,Int](outDeg,
-   *   (v, deg) => deg )
+   *   .mapVertices((_, _) => 0)
+   * val outDeg = rawGraph.outDegrees
+   * val graph = rawGraph.joinVertices[Int](outDeg)
+   *   ((_, _, outDeg) => outDeg)
    * }}}
    *
    */
@@ -254,7 +261,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
    * Picks a random vertex from the graph and returns its ID.
    */
   def pickRandomVertex(): VertexId = {
-    val probability = 50 / graph.numVertices
+    val probability = 50.0 / graph.numVertices
     var found = false
     var retVal: VertexId = null.asInstanceOf[VertexId]
     while (!found) {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala
new file mode 100644
index 0000000000000..2cb07937eaa2a
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import org.apache.spark.SparkConf
+
+import org.apache.spark.graphx.impl._
+import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
+
+import org.apache.spark.util.collection.{OpenHashSet, BitSet}
+import org.apache.spark.util.BoundedPriorityQueue
+
+object GraphXUtils {
+  /**
+   * Registers classes that GraphX uses with Kryo.
+   */
+  def registerKryoClasses(conf: SparkConf) {
+    conf.registerKryoClasses(Array(
+      classOf[Edge[Object]],
+      classOf[(VertexId, Object)],
+      classOf[EdgePartition[Object, Object]],
+      classOf[BitSet],
+      classOf[VertexIdToIndexMap],
+      classOf[VertexAttributeBlock[Object]],
+      classOf[PartitionStrategy],
+      classOf[BoundedPriorityQueue[Object]],
+      classOf[EdgeDirection],
+      classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]],
+      classOf[OpenHashSet[Int]],
+      classOf[OpenHashSet[Long]]))
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
index 5e7e72a764cc8..13033fee0e6b5 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
@@ -91,7 +91,7 @@ object PartitionStrategy {
   case object EdgePartition1D extends PartitionStrategy {
     override def getPartition(src: VertexId, dst: VertexId, numParts: PartitionID): PartitionID = {
       val mixingPrime: VertexId = 1125899906842597L
-      (math.abs(src) * mixingPrime).toInt % numParts
+      (math.abs(src * mixingPrime) % numParts).toInt
     }
   }
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/TripletFields.java b/graphx/src/main/scala/org/apache/spark/graphx/TripletFields.java
new file mode 100644
index 0000000000000..34df4b7ee7a06
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/TripletFields.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx;
+
+import java.io.Serializable;
+
+/**
+ * Represents a subset of the fields of an [[EdgeTriplet]] or [[EdgeContext]]. This allows the
+ * system to populate only those fields for efficiency.
+ */
+public class TripletFields implements Serializable {
+  public final boolean useSrc;
+  public final boolean useDst;
+  public final boolean useEdge;
+
+  public TripletFields() {
+    this(true, true, true);
+  }
+
+  public TripletFields(boolean useSrc, boolean useDst, boolean useEdge) {
+    this.useSrc = useSrc;
+    this.useDst = useDst;
+    this.useEdge = useEdge;
+  }
+
+  public static final TripletFields None = new TripletFields(false, false, false);
+  public static final TripletFields EdgeOnly = new TripletFields(false, false, true);
+  public static final TripletFields SrcOnly = new TripletFields(true, false, false);
+  public static final TripletFields DstOnly = new TripletFields(false, true, false);
+  public static final TripletFields SrcDstOnly = new TripletFields(true, true, false);
+  public static final TripletFields SrcAndEdge = new TripletFields(true, false, true);
+  public static final TripletFields Src = SrcAndEdge;
+  public static final TripletFields DstAndEdge = new TripletFields(false, true, true);
+  public static final TripletFields Dst = DstAndEdge;
+  public static final TripletFields All = new TripletFields(true, true, true);
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
index f1b6df9a3025e..f8be17669d892 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
@@ -27,8 +27,7 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.graphx.impl.RoutingTablePartition
 import org.apache.spark.graphx.impl.ShippableVertexPartition
 import org.apache.spark.graphx.impl.VertexAttributeBlock
-import org.apache.spark.graphx.impl.RoutingTableMessageRDDFunctions._
-import org.apache.spark.graphx.impl.VertexRDDFunctions._
+import org.apache.spark.graphx.impl.VertexRDDImpl
 
 /**
  * Extends `RDD[(VertexId, VD)]` by ensuring that there is only one entry for each vertex and by
@@ -55,62 +54,16 @@ import org.apache.spark.graphx.impl.VertexRDDFunctions._
  *
  * @tparam VD the vertex attribute associated with each vertex in the set.
  */
-class VertexRDD[@specialized VD: ClassTag](
-    val partitionsRDD: RDD[ShippableVertexPartition[VD]],
-    val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
-  extends RDD[(VertexId, VD)](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+abstract class VertexRDD[VD](
+    @transient sc: SparkContext,
+    @transient deps: Seq[Dependency[_]]) extends RDD[(VertexId, VD)](sc, deps) {
 
-  require(partitionsRDD.partitioner.isDefined)
+  implicit protected def vdTag: ClassTag[VD]
 
-  /**
-   * Construct a new VertexRDD that is indexed by only the visible vertices. The resulting
-   * VertexRDD will be based on a different index and can no longer be quickly joined with this
-   * RDD.
-   */
-  def reindex(): VertexRDD[VD] = this.withPartitionsRDD(partitionsRDD.map(_.reindex()))
-
-  override val partitioner = partitionsRDD.partitioner
+  private[graphx] def partitionsRDD: RDD[ShippableVertexPartition[VD]]
 
   override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
 
-  override protected def getPreferredLocations(s: Partition): Seq[String] =
-    partitionsRDD.preferredLocations(s)
-
-  override def setName(_name: String): this.type = {
-    if (partitionsRDD.name != null) {
-      partitionsRDD.setName(partitionsRDD.name + ", " + _name)
-    } else {
-      partitionsRDD.setName(_name)
-    }
-    this
-  }
-  setName("VertexRDD")
-
-  /**
-   * Persists the vertex partitions at the specified storage level, ignoring any existing target
-   * storage level.
-   */
-  override def persist(newLevel: StorageLevel): this.type = {
-    partitionsRDD.persist(newLevel)
-    this
-  }
-
-  override def unpersist(blocking: Boolean = true): this.type = {
-    partitionsRDD.unpersist(blocking)
-    this
-  }
-
-  /** Persists the vertex partitions at `targetStorageLevel`, which defaults to MEMORY_ONLY. */
-  override def cache(): this.type = {
-    partitionsRDD.persist(targetStorageLevel)
-    this
-  }
-
-  /** The number of vertices in the RDD. */
-  override def count(): Long = {
-    partitionsRDD.map(_.size).reduce(_ + _)
-  }
-
   /**
    * Provides the `RDD[(VertexId, VD)]` equivalent output.
    */
@@ -118,22 +71,28 @@ class VertexRDD[@specialized VD: ClassTag](
     firstParent[ShippableVertexPartition[VD]].iterator(part, context).next.iterator
   }
 
+  /**
+   * Construct a new VertexRDD that is indexed by only the visible vertices. The resulting
+   * VertexRDD will be based on a different index and can no longer be quickly joined with this
+   * RDD.
+   */
+  def reindex(): VertexRDD[VD]
+
   /**
    * Applies a function to each `VertexPartition` of this RDD and returns a new VertexRDD.
    */
   private[graphx] def mapVertexPartitions[VD2: ClassTag](
       f: ShippableVertexPartition[VD] => ShippableVertexPartition[VD2])
-    : VertexRDD[VD2] = {
-    val newPartitionsRDD = partitionsRDD.mapPartitions(_.map(f), preservesPartitioning = true)
-    this.withPartitionsRDD(newPartitionsRDD)
-  }
-
+    : VertexRDD[VD2]
 
   /**
    * Restricts the vertex set to the set of vertices satisfying the given predicate. This operation
    * preserves the index for efficient joins with the original RDD, and it sets bits in the bitmask
    * rather than allocating new memory.
    *
+   * It is declared and defined here to allow refining the return type from `RDD[(VertexId, VD)]` to
+   * `VertexRDD[VD]`.
+   *
    * @param pred the user defined predicate, which takes a tuple to conform to the
    * `RDD[(VertexId, VD)]` interface
    */
@@ -149,8 +108,7 @@ class VertexRDD[@specialized VD: ClassTag](
    * @return a new VertexRDD with values obtained by applying `f` to each of the entries in the
    * original VertexRDD
    */
-  def mapValues[VD2: ClassTag](f: VD => VD2): VertexRDD[VD2] =
-    this.mapVertexPartitions(_.map((vid, attr) => f(attr)))
+  def mapValues[VD2: ClassTag](f: VD => VD2): VertexRDD[VD2]
 
   /**
    * Maps each vertex attribute, additionally supplying the vertex ID.
@@ -161,29 +119,19 @@ class VertexRDD[@specialized VD: ClassTag](
    * @return a new VertexRDD with values obtained by applying `f` to each of the entries in the
    * original VertexRDD.  The resulting VertexRDD retains the same index.
    */
-  def mapValues[VD2: ClassTag](f: (VertexId, VD) => VD2): VertexRDD[VD2] =
-    this.mapVertexPartitions(_.map(f))
+  def mapValues[VD2: ClassTag](f: (VertexId, VD) => VD2): VertexRDD[VD2]
 
   /**
    * Hides vertices that are the same between `this` and `other`; for vertices that are different,
    * keeps the values from `other`.
    */
-  def diff(other: VertexRDD[VD]): VertexRDD[VD] = {
-    val newPartitionsRDD = partitionsRDD.zipPartitions(
-      other.partitionsRDD, preservesPartitioning = true
-    ) { (thisIter, otherIter) =>
-      val thisPart = thisIter.next()
-      val otherPart = otherIter.next()
-      Iterator(thisPart.diff(otherPart))
-    }
-    this.withPartitionsRDD(newPartitionsRDD)
-  }
+  def diff(other: VertexRDD[VD]): VertexRDD[VD]
 
   /**
    * Left joins this RDD with another VertexRDD with the same index. This function will fail if
    * both VertexRDDs do not share the same index. The resulting vertex set contains an entry for
-   * each
-   * vertex in `this`. If `other` is missing any vertex in this VertexRDD, `f` is passed `None`.
+   * each vertex in `this`.
+   * If `other` is missing any vertex in this VertexRDD, `f` is passed `None`.
    *
    * @tparam VD2 the attribute type of the other VertexRDD
    * @tparam VD3 the attribute type of the resulting VertexRDD
@@ -194,16 +142,7 @@ class VertexRDD[@specialized VD: ClassTag](
    * @return a VertexRDD containing the results of `f`
    */
   def leftZipJoin[VD2: ClassTag, VD3: ClassTag]
-      (other: VertexRDD[VD2])(f: (VertexId, VD, Option[VD2]) => VD3): VertexRDD[VD3] = {
-    val newPartitionsRDD = partitionsRDD.zipPartitions(
-      other.partitionsRDD, preservesPartitioning = true
-    ) { (thisIter, otherIter) =>
-      val thisPart = thisIter.next()
-      val otherPart = otherIter.next()
-      Iterator(thisPart.leftJoin(otherPart)(f))
-    }
-    this.withPartitionsRDD(newPartitionsRDD)
-  }
+      (other: VertexRDD[VD2])(f: (VertexId, VD, Option[VD2]) => VD3): VertexRDD[VD3]
 
   /**
    * Left joins this VertexRDD with an RDD containing vertex attribute pairs. If the other RDD is
@@ -224,37 +163,14 @@ class VertexRDD[@specialized VD: ClassTag](
   def leftJoin[VD2: ClassTag, VD3: ClassTag]
       (other: RDD[(VertexId, VD2)])
       (f: (VertexId, VD, Option[VD2]) => VD3)
-    : VertexRDD[VD3] = {
-    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
-    // If the other set is a VertexRDD then we use the much more efficient leftZipJoin
-    other match {
-      case other: VertexRDD[_] =>
-        leftZipJoin(other)(f)
-      case _ =>
-        this.withPartitionsRDD[VD3](
-          partitionsRDD.zipPartitions(
-            other.copartitionWithVertices(this.partitioner.get), preservesPartitioning = true) {
-            (partIter, msgs) => partIter.map(_.leftJoin(msgs)(f))
-          }
-        )
-    }
-  }
+    : VertexRDD[VD3]
 
   /**
    * Efficiently inner joins this VertexRDD with another VertexRDD sharing the same index. See
    * [[innerJoin]] for the behavior of the join.
    */
   def innerZipJoin[U: ClassTag, VD2: ClassTag](other: VertexRDD[U])
-      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
-    val newPartitionsRDD = partitionsRDD.zipPartitions(
-      other.partitionsRDD, preservesPartitioning = true
-    ) { (thisIter, otherIter) =>
-      val thisPart = thisIter.next()
-      val otherPart = otherIter.next()
-      Iterator(thisPart.innerJoin(otherPart)(f))
-    }
-    this.withPartitionsRDD(newPartitionsRDD)
-  }
+      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2]
 
   /**
    * Inner joins this VertexRDD with an RDD containing vertex attribute pairs. If the other RDD is
@@ -268,21 +184,7 @@ class VertexRDD[@specialized VD: ClassTag](
    *         `this` and `other`, with values supplied by `f`
    */
   def innerJoin[U: ClassTag, VD2: ClassTag](other: RDD[(VertexId, U)])
-      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
-    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
-    // If the other set is a VertexRDD then we use the much more efficient innerZipJoin
-    other match {
-      case other: VertexRDD[_] =>
-        innerZipJoin(other)(f)
-      case _ =>
-        this.withPartitionsRDD(
-          partitionsRDD.zipPartitions(
-            other.copartitionWithVertices(this.partitioner.get), preservesPartitioning = true) {
-            (partIter, msgs) => partIter.map(_.innerJoin(msgs)(f))
-          }
-        )
-    }
-  }
+      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2]
 
   /**
    * Aggregates vertices in `messages` that have the same ids using `reduceFunc`, returning a
@@ -296,38 +198,20 @@ class VertexRDD[@specialized VD: ClassTag](
    * messages.
    */
   def aggregateUsingIndex[VD2: ClassTag](
-      messages: RDD[(VertexId, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2] = {
-    val shuffled = messages.copartitionWithVertices(this.partitioner.get)
-    val parts = partitionsRDD.zipPartitions(shuffled, true) { (thisIter, msgIter) =>
-      thisIter.map(_.aggregateUsingIndex(msgIter, reduceFunc))
-    }
-    this.withPartitionsRDD[VD2](parts)
-  }
+      messages: RDD[(VertexId, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2]
 
   /**
    * Returns a new `VertexRDD` reflecting a reversal of all edge directions in the corresponding
    * [[EdgeRDD]].
    */
-  def reverseRoutingTables(): VertexRDD[VD] =
-    this.mapVertexPartitions(vPart => vPart.withRoutingTable(vPart.routingTable.reverse))
+  def reverseRoutingTables(): VertexRDD[VD]
 
   /** Prepares this VertexRDD for efficient joins with the given EdgeRDD. */
-  def withEdges(edges: EdgeRDD[_, _]): VertexRDD[VD] = {
-    val routingTables = VertexRDD.createRoutingTables(edges, this.partitioner.get)
-    val vertexPartitions = partitionsRDD.zipPartitions(routingTables, true) {
-      (partIter, routingTableIter) =>
-        val routingTable =
-          if (routingTableIter.hasNext) routingTableIter.next() else RoutingTablePartition.empty
-        partIter.map(_.withRoutingTable(routingTable))
-    }
-    this.withPartitionsRDD(vertexPartitions)
-  }
+  def withEdges(edges: EdgeRDD[_, _]): VertexRDD[VD]
 
   /** Replaces the vertex partitions while preserving all other properties of the VertexRDD. */
   private[graphx] def withPartitionsRDD[VD2: ClassTag](
-      partitionsRDD: RDD[ShippableVertexPartition[VD2]]): VertexRDD[VD2] = {
-    new VertexRDD(partitionsRDD, this.targetStorageLevel)
-  }
+      partitionsRDD: RDD[ShippableVertexPartition[VD2]]): VertexRDD[VD2]
 
   /**
    * Changes the target storage level while preserving all other properties of the
@@ -337,20 +221,14 @@ class VertexRDD[@specialized VD: ClassTag](
    * [[org.apache.spark.graphx.VertexRDD#cache]] on the returned VertexRDD.
    */
   private[graphx] def withTargetStorageLevel(
-      targetStorageLevel: StorageLevel): VertexRDD[VD] = {
-    new VertexRDD(this.partitionsRDD, targetStorageLevel)
-  }
+      targetStorageLevel: StorageLevel): VertexRDD[VD]
 
   /** Generates an RDD of vertex attributes suitable for shipping to the edge partitions. */
   private[graphx] def shipVertexAttributes(
-      shipSrc: Boolean, shipDst: Boolean): RDD[(PartitionID, VertexAttributeBlock[VD])] = {
-    partitionsRDD.mapPartitions(_.flatMap(_.shipVertexAttributes(shipSrc, shipDst)))
-  }
+      shipSrc: Boolean, shipDst: Boolean): RDD[(PartitionID, VertexAttributeBlock[VD])]
 
   /** Generates an RDD of vertex IDs suitable for shipping to the edge partitions. */
-  private[graphx] def shipVertexIds(): RDD[(PartitionID, Array[VertexId])] = {
-    partitionsRDD.mapPartitions(_.flatMap(_.shipVertexIds()))
-  }
+  private[graphx] def shipVertexIds(): RDD[(PartitionID, Array[VertexId])]
 
 } // end of VertexRDD
 
@@ -371,12 +249,12 @@ object VertexRDD {
   def apply[VD: ClassTag](vertices: RDD[(VertexId, VD)]): VertexRDD[VD] = {
     val vPartitioned: RDD[(VertexId, VD)] = vertices.partitioner match {
       case Some(p) => vertices
-      case None => vertices.copartitionWithVertices(new HashPartitioner(vertices.partitions.size))
+      case None => vertices.partitionBy(new HashPartitioner(vertices.partitions.size))
     }
     val vertexPartitions = vPartitioned.mapPartitions(
       iter => Iterator(ShippableVertexPartition(iter)),
       preservesPartitioning = true)
-    new VertexRDD(vertexPartitions)
+    new VertexRDDImpl(vertexPartitions)
   }
 
   /**
@@ -392,7 +270,7 @@ object VertexRDD {
    */
   def apply[VD: ClassTag](
       vertices: RDD[(VertexId, VD)], edges: EdgeRDD[_, _], defaultVal: VD): VertexRDD[VD] = {
-    VertexRDD(vertices, edges, defaultVal, (a, b) => b)
+    VertexRDD(vertices, edges, defaultVal, (a, b) => a)
   }
 
   /**
@@ -412,16 +290,16 @@ object VertexRDD {
     ): VertexRDD[VD] = {
     val vPartitioned: RDD[(VertexId, VD)] = vertices.partitioner match {
       case Some(p) => vertices
-      case None => vertices.copartitionWithVertices(new HashPartitioner(vertices.partitions.size))
+      case None => vertices.partitionBy(new HashPartitioner(vertices.partitions.size))
     }
     val routingTables = createRoutingTables(edges, vPartitioned.partitioner.get)
     val vertexPartitions = vPartitioned.zipPartitions(routingTables, preservesPartitioning = true) {
       (vertexIter, routingTableIter) =>
         val routingTable =
           if (routingTableIter.hasNext) routingTableIter.next() else RoutingTablePartition.empty
-        Iterator(ShippableVertexPartition(vertexIter, routingTable, defaultVal))
+        Iterator(ShippableVertexPartition(vertexIter, routingTable, defaultVal, mergeFunc))
     }
-    new VertexRDD(vertexPartitions)
+    new VertexRDDImpl(vertexPartitions)
   }
 
   /**
@@ -443,10 +321,10 @@ object VertexRDD {
         if (routingTableIter.hasNext) routingTableIter.next() else RoutingTablePartition.empty
       Iterator(ShippableVertexPartition(Iterator.empty, routingTable, defaultVal))
     }, preservesPartitioning = true)
-    new VertexRDD(vertexPartitions)
+    new VertexRDDImpl(vertexPartitions)
   }
 
-  private def createRoutingTables(
+  private[graphx] def createRoutingTables(
       edges: EdgeRDD[_, _], vertexPartitioner: Partitioner): RDD[RoutingTablePartition] = {
     // Determine which vertices each edge partition needs by creating a mapping from vid to pid.
     val vid2pid = edges.partitionsRDD.mapPartitions(_.flatMap(
@@ -454,7 +332,7 @@ object VertexRDD {
       .setName("VertexRDD.createRoutingTables - vid2pid (aggregation)")
 
     val numEdgePartitions = edges.partitions.size
-    vid2pid.copartitionWithVertices(vertexPartitioner).mapPartitions(
+    vid2pid.partitionBy(vertexPartitioner).mapPartitions(
       iter => Iterator(RoutingTablePartition.fromMsgs(numEdgePartitions, iter)),
       preservesPartitioning = true)
   }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeActiveness.java b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeActiveness.java
new file mode 100644
index 0000000000000..377ae849f045c
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeActiveness.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl;
+
+/**
+ * Criteria for filtering edges based on activeness. For internal use only.
+ */
+public enum EdgeActiveness {
+  /** Neither the source vertex nor the destination vertex need be active. */
+  Neither,
+  /** The source vertex must be active. */
+  SrcOnly,
+  /** The destination vertex must be active. */
+  DstOnly,
+  /** Both vertices must be active. */
+  Both,
+  /** At least one vertex must be active. */
+  Either
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
index a5c9cd1f8b4e6..373af75448374 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
@@ -21,63 +21,94 @@ import scala.reflect.{classTag, ClassTag}
 
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
+import org.apache.spark.util.collection.BitSet
 
 /**
- * A collection of edges stored in columnar format, along with any vertex attributes referenced. The
- * edges are stored in 3 large columnar arrays (src, dst, attribute). The arrays are clustered by
- * src. There is an optional active vertex set for filtering computation on the edges.
+ * A collection of edges, along with referenced vertex attributes and an optional active vertex set
+ * for filtering computation on the edges.
+ *
+ * The edges are stored in columnar format in `localSrcIds`, `localDstIds`, and `data`. All
+ * referenced global vertex ids are mapped to a compact set of local vertex ids according to the
+ * `global2local` map. Each local vertex id is a valid index into `vertexAttrs`, which stores the
+ * corresponding vertex attribute, and `local2global`, which stores the reverse mapping to global
+ * vertex id. The global vertex ids that are active are optionally stored in `activeSet`.
+ *
+ * The edges are clustered by source vertex id, and the mapping from global vertex id to the index
+ * of the corresponding edge cluster is stored in `index`.
  *
  * @tparam ED the edge attribute type
  * @tparam VD the vertex attribute type
  *
- * @param srcIds the source vertex id of each edge
- * @param dstIds the destination vertex id of each edge
+ * @param localSrcIds the local source vertex id of each edge as an index into `local2global` and
+ *   `vertexAttrs`
+ * @param localDstIds the local destination vertex id of each edge as an index into `local2global`
+ *   and `vertexAttrs`
  * @param data the attribute associated with each edge
- * @param index a clustered index on source vertex id
- * @param vertices a map from referenced vertex ids to their corresponding attributes. Must
- *   contain all vertex ids from `srcIds` and `dstIds`, though not necessarily valid attributes for
- *   those vertex ids. The mask is not used.
+ * @param index a clustered index on source vertex id as a map from each global source vertex id to
+ *   the offset in the edge arrays where the cluster for that vertex id begins
+ * @param global2local a map from referenced vertex ids to local ids which index into vertexAttrs
+ * @param local2global an array of global vertex ids where the offsets are local vertex ids
+ * @param vertexAttrs an array of vertex attributes where the offsets are local vertex ids
  * @param activeSet an optional active vertex set for filtering computation on the edges
  */
 private[graphx]
 class EdgePartition[
     @specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED: ClassTag, VD: ClassTag](
-    val srcIds: Array[VertexId] = null,
-    val dstIds: Array[VertexId] = null,
-    val data: Array[ED] = null,
-    val index: GraphXPrimitiveKeyOpenHashMap[VertexId, Int] = null,
-    val vertices: VertexPartition[VD] = null,
-    val activeSet: Option[VertexSet] = None
-  ) extends Serializable {
+    localSrcIds: Array[Int],
+    localDstIds: Array[Int],
+    data: Array[ED],
+    index: GraphXPrimitiveKeyOpenHashMap[VertexId, Int],
+    global2local: GraphXPrimitiveKeyOpenHashMap[VertexId, Int],
+    local2global: Array[VertexId],
+    vertexAttrs: Array[VD],
+    activeSet: Option[VertexSet])
+  extends Serializable {
 
-  /** Return a new `EdgePartition` with the specified edge data. */
-  def withData[ED2: ClassTag](data_ : Array[ED2]): EdgePartition[ED2, VD] = {
-    new EdgePartition(srcIds, dstIds, data_, index, vertices, activeSet)
-  }
+  /** No-arg constructor for serialization. */
+  private def this() = this(null, null, null, null, null, null, null, null)
 
-  /** Return a new `EdgePartition` with the specified vertex partition. */
-  def withVertices[VD2: ClassTag](
-      vertices_ : VertexPartition[VD2]): EdgePartition[ED, VD2] = {
-    new EdgePartition(srcIds, dstIds, data, index, vertices_, activeSet)
+  /** Return a new `EdgePartition` with the specified edge data. */
+  def withData[ED2: ClassTag](data: Array[ED2]): EdgePartition[ED2, VD] = {
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global, vertexAttrs, activeSet)
   }
 
   /** Return a new `EdgePartition` with the specified active set, provided as an iterator. */
   def withActiveSet(iter: Iterator[VertexId]): EdgePartition[ED, VD] = {
-    val newActiveSet = new VertexSet
-    iter.foreach(newActiveSet.add(_))
-    new EdgePartition(srcIds, dstIds, data, index, vertices, Some(newActiveSet))
-  }
-
-  /** Return a new `EdgePartition` with the specified active set. */
-  def withActiveSet(activeSet_ : Option[VertexSet]): EdgePartition[ED, VD] = {
-    new EdgePartition(srcIds, dstIds, data, index, vertices, activeSet_)
+    val activeSet = new VertexSet
+    while (iter.hasNext) { activeSet.add(iter.next()) }
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global, vertexAttrs,
+      Some(activeSet))
   }
 
   /** Return a new `EdgePartition` with updates to vertex attributes specified in `iter`. */
   def updateVertices(iter: Iterator[(VertexId, VD)]): EdgePartition[ED, VD] = {
-    this.withVertices(vertices.innerJoinKeepLeft(iter))
+    val newVertexAttrs = new Array[VD](vertexAttrs.length)
+    System.arraycopy(vertexAttrs, 0, newVertexAttrs, 0, vertexAttrs.length)
+    while (iter.hasNext) {
+      val kv = iter.next()
+      newVertexAttrs(global2local(kv._1)) = kv._2
+    }
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global, newVertexAttrs,
+      activeSet)
   }
 
+  /** Return a new `EdgePartition` without any locally cached vertex attributes. */
+  def withoutVertexAttributes[VD2: ClassTag](): EdgePartition[ED, VD2] = {
+    val newVertexAttrs = new Array[VD2](vertexAttrs.length)
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global, newVertexAttrs,
+      activeSet)
+  }
+
+  @inline private def srcIds(pos: Int): VertexId = local2global(localSrcIds(pos))
+
+  @inline private def dstIds(pos: Int): VertexId = local2global(localDstIds(pos))
+
+  @inline private def attrs(pos: Int): ED = data(pos)
+
   /** Look up vid in activeSet, throwing an exception if it is None. */
   def isActive(vid: VertexId): Boolean = {
     activeSet.get.contains(vid)
@@ -92,11 +123,19 @@ class EdgePartition[
    * @return a new edge partition with all edges reversed.
    */
   def reverse: EdgePartition[ED, VD] = {
-    val builder = new EdgePartitionBuilder(size)(classTag[ED], classTag[VD])
-    for (e <- iterator) {
-      builder.add(e.dstId, e.srcId, e.attr)
+    val builder = new ExistingEdgePartitionBuilder[ED, VD](
+      global2local, local2global, vertexAttrs, activeSet, size)
+    var i = 0
+    while (i < size) {
+      val localSrcId = localSrcIds(i)
+      val localDstId = localDstIds(i)
+      val srcId = local2global(localSrcId)
+      val dstId = local2global(localDstId)
+      val attr = data(i)
+      builder.add(dstId, srcId, localDstId, localSrcId, attr)
+      i += 1
     }
-    builder.toEdgePartition.withVertices(vertices).withActiveSet(activeSet)
+    builder.toEdgePartition
   }
 
   /**
@@ -157,13 +196,25 @@ class EdgePartition[
   def filter(
       epred: EdgeTriplet[VD, ED] => Boolean,
       vpred: (VertexId, VD) => Boolean): EdgePartition[ED, VD] = {
-    val filtered = tripletIterator().filter(et =>
-      vpred(et.srcId, et.srcAttr) && vpred(et.dstId, et.dstAttr) && epred(et))
-    val builder = new EdgePartitionBuilder[ED, VD]
-    for (e <- filtered) {
-      builder.add(e.srcId, e.dstId, e.attr)
+    val builder = new ExistingEdgePartitionBuilder[ED, VD](
+      global2local, local2global, vertexAttrs, activeSet)
+    var i = 0
+    while (i < size) {
+      // The user sees the EdgeTriplet, so we can't reuse it and must create one per edge.
+      val localSrcId = localSrcIds(i)
+      val localDstId = localDstIds(i)
+      val et = new EdgeTriplet[VD, ED]
+      et.srcId = local2global(localSrcId)
+      et.dstId = local2global(localDstId)
+      et.srcAttr = vertexAttrs(localSrcId)
+      et.dstAttr = vertexAttrs(localDstId)
+      et.attr = data(i)
+      if (vpred(et.srcId, et.srcAttr) && vpred(et.dstId, et.dstAttr) && epred(et)) {
+        builder.add(et.srcId, et.dstId, localSrcId, localDstId, et.attr)
+      }
+      i += 1
     }
-    builder.toEdgePartition.withVertices(vertices).withActiveSet(activeSet)
+    builder.toEdgePartition
   }
 
   /**
@@ -183,28 +234,40 @@ class EdgePartition[
    * @return a new edge partition without duplicate edges
    */
   def groupEdges(merge: (ED, ED) => ED): EdgePartition[ED, VD] = {
-    val builder = new EdgePartitionBuilder[ED, VD]
+    val builder = new ExistingEdgePartitionBuilder[ED, VD](
+      global2local, local2global, vertexAttrs, activeSet)
     var currSrcId: VertexId = null.asInstanceOf[VertexId]
     var currDstId: VertexId = null.asInstanceOf[VertexId]
+    var currLocalSrcId = -1
+    var currLocalDstId = -1
     var currAttr: ED = null.asInstanceOf[ED]
+    // Iterate through the edges, accumulating runs of identical edges using the curr* variables and
+    // releasing them to the builder when we see the beginning of the next run
     var i = 0
     while (i < size) {
       if (i > 0 && currSrcId == srcIds(i) && currDstId == dstIds(i)) {
+        // This edge should be accumulated into the existing run
         currAttr = merge(currAttr, data(i))
       } else {
+        // This edge starts a new run of edges
         if (i > 0) {
-          builder.add(currSrcId, currDstId, currAttr)
+          // First release the existing run to the builder
+          builder.add(currSrcId, currDstId, currLocalSrcId, currLocalDstId, currAttr)
         }
+        // Then start accumulating for a new run
         currSrcId = srcIds(i)
         currDstId = dstIds(i)
+        currLocalSrcId = localSrcIds(i)
+        currLocalDstId = localDstIds(i)
         currAttr = data(i)
       }
       i += 1
     }
+    // Finally, release the last accumulated run
     if (size > 0) {
-      builder.add(currSrcId, currDstId, currAttr)
+      builder.add(currSrcId, currDstId, currLocalSrcId, currLocalDstId, currAttr)
     }
-    builder.toEdgePartition.withVertices(vertices).withActiveSet(activeSet)
+    builder.toEdgePartition
   }
 
   /**
@@ -220,7 +283,8 @@ class EdgePartition[
   def innerJoin[ED2: ClassTag, ED3: ClassTag]
       (other: EdgePartition[ED2, _])
       (f: (VertexId, VertexId, ED, ED2) => ED3): EdgePartition[ED3, VD] = {
-    val builder = new EdgePartitionBuilder[ED3, VD]
+    val builder = new ExistingEdgePartitionBuilder[ED3, VD](
+      global2local, local2global, vertexAttrs, activeSet)
     var i = 0
     var j = 0
     // For i = index of each edge in `this`...
@@ -233,12 +297,13 @@ class EdgePartition[
         while (j < other.size && other.srcIds(j) == srcId && other.dstIds(j) < dstId) { j += 1 }
         if (j < other.size && other.srcIds(j) == srcId && other.dstIds(j) == dstId) {
           // ... run `f` on the matching edge
-          builder.add(srcId, dstId, f(srcId, dstId, this.data(i), other.data(j)))
+          builder.add(srcId, dstId, localSrcIds(i), localDstIds(i),
+            f(srcId, dstId, this.data(i), other.attrs(j)))
         }
       }
       i += 1
     }
-    builder.toEdgePartition.withVertices(vertices).withActiveSet(activeSet)
+    builder.toEdgePartition
   }
 
   /**
@@ -246,7 +311,7 @@ class EdgePartition[
    *
    * @return size of the partition
    */
-  val size: Int = srcIds.size
+  val size: Int = localSrcIds.size
 
   /** The number of unique source vertices in the partition. */
   def indexSize: Int = index.size
@@ -280,55 +345,198 @@ class EdgePartition[
    * It is safe to keep references to the objects from this iterator.
    */
   def tripletIterator(
-      includeSrc: Boolean = true, includeDst: Boolean = true): Iterator[EdgeTriplet[VD, ED]] = {
-    new EdgeTripletIterator(this, includeSrc, includeDst)
+      includeSrc: Boolean = true, includeDst: Boolean = true)
+      : Iterator[EdgeTriplet[VD, ED]] = new Iterator[EdgeTriplet[VD, ED]] {
+    private[this] var pos = 0
+
+    override def hasNext: Boolean = pos < EdgePartition.this.size
+
+    override def next() = {
+      val triplet = new EdgeTriplet[VD, ED]
+      val localSrcId = localSrcIds(pos)
+      val localDstId = localDstIds(pos)
+      triplet.srcId = local2global(localSrcId)
+      triplet.dstId = local2global(localDstId)
+      if (includeSrc) {
+        triplet.srcAttr = vertexAttrs(localSrcId)
+      }
+      if (includeDst) {
+        triplet.dstAttr = vertexAttrs(localDstId)
+      }
+      triplet.attr = data(pos)
+      pos += 1
+      triplet
+    }
   }
 
   /**
-   * Upgrade the given edge iterator into a triplet iterator.
+   * Send messages along edges and aggregate them at the receiving vertices. Implemented by scanning
+   * all edges sequentially.
    *
-   * Be careful not to keep references to the objects from this iterator. To improve GC performance
-   * the same object is re-used in `next()`.
+   * @param sendMsg generates messages to neighboring vertices of an edge
+   * @param mergeMsg the combiner applied to messages destined to the same vertex
+   * @param tripletFields which triplet fields `sendMsg` uses
+   * @param activeness criteria for filtering edges based on activeness
+   *
+   * @return iterator aggregated messages keyed by the receiving vertex id
    */
-  def upgradeIterator(
-      edgeIter: Iterator[Edge[ED]], includeSrc: Boolean = true, includeDst: Boolean = true)
-    : Iterator[EdgeTriplet[VD, ED]] = {
-    new ReusingEdgeTripletIterator(edgeIter, this, includeSrc, includeDst)
+  def aggregateMessagesEdgeScan[A: ClassTag](
+      sendMsg: EdgeContext[VD, ED, A] => Unit,
+      mergeMsg: (A, A) => A,
+      tripletFields: TripletFields,
+      activeness: EdgeActiveness): Iterator[(VertexId, A)] = {
+    val aggregates = new Array[A](vertexAttrs.length)
+    val bitset = new BitSet(vertexAttrs.length)
+
+    var ctx = new AggregatingEdgeContext[VD, ED, A](mergeMsg, aggregates, bitset)
+    var i = 0
+    while (i < size) {
+      val localSrcId = localSrcIds(i)
+      val srcId = local2global(localSrcId)
+      val localDstId = localDstIds(i)
+      val dstId = local2global(localDstId)
+      val edgeIsActive =
+        if (activeness == EdgeActiveness.Neither) true
+        else if (activeness == EdgeActiveness.SrcOnly) isActive(srcId)
+        else if (activeness == EdgeActiveness.DstOnly) isActive(dstId)
+        else if (activeness == EdgeActiveness.Both) isActive(srcId) && isActive(dstId)
+        else if (activeness == EdgeActiveness.Either) isActive(srcId) || isActive(dstId)
+        else throw new Exception("unreachable")
+      if (edgeIsActive) {
+        val srcAttr = if (tripletFields.useSrc) vertexAttrs(localSrcId) else null.asInstanceOf[VD]
+        val dstAttr = if (tripletFields.useDst) vertexAttrs(localDstId) else null.asInstanceOf[VD]
+        ctx.set(srcId, dstId, localSrcId, localDstId, srcAttr, dstAttr, data(i))
+        sendMsg(ctx)
+      }
+      i += 1
+    }
+
+    bitset.iterator.map { localId => (local2global(localId), aggregates(localId)) }
   }
 
   /**
-   * Get an iterator over the edges in this partition whose source vertex ids match srcIdPred. The
-   * iterator is generated using an index scan, so it is efficient at skipping edges that don't
-   * match srcIdPred.
+   * Send messages along edges and aggregate them at the receiving vertices. Implemented by
+   * filtering the source vertex index, then scanning each edge cluster.
    *
-   * Be careful not to keep references to the objects from this iterator. To improve GC performance
-   * the same object is re-used in `next()`.
-   */
-  def indexIterator(srcIdPred: VertexId => Boolean): Iterator[Edge[ED]] =
-    index.iterator.filter(kv => srcIdPred(kv._1)).flatMap(Function.tupled(clusterIterator))
-
-  /**
-   * Get an iterator over the cluster of edges in this partition with source vertex id `srcId`. The
-   * cluster must start at position `index`.
+   * @param sendMsg generates messages to neighboring vertices of an edge
+   * @param mergeMsg the combiner applied to messages destined to the same vertex
+   * @param tripletFields which triplet fields `sendMsg` uses
+   * @param activeness criteria for filtering edges based on activeness
    *
-   * Be careful not to keep references to the objects from this iterator. To improve GC performance
-   * the same object is re-used in `next()`.
+   * @return iterator aggregated messages keyed by the receiving vertex id
    */
-  private def clusterIterator(srcId: VertexId, index: Int) = new Iterator[Edge[ED]] {
-    private[this] val edge = new Edge[ED]
-    private[this] var pos = index
+  def aggregateMessagesIndexScan[A: ClassTag](
+      sendMsg: EdgeContext[VD, ED, A] => Unit,
+      mergeMsg: (A, A) => A,
+      tripletFields: TripletFields,
+      activeness: EdgeActiveness): Iterator[(VertexId, A)] = {
+    val aggregates = new Array[A](vertexAttrs.length)
+    val bitset = new BitSet(vertexAttrs.length)
+
+    var ctx = new AggregatingEdgeContext[VD, ED, A](mergeMsg, aggregates, bitset)
+    index.iterator.foreach { cluster =>
+      val clusterSrcId = cluster._1
+      val clusterPos = cluster._2
+      val clusterLocalSrcId = localSrcIds(clusterPos)
 
-    override def hasNext: Boolean = {
-      pos >= 0 && pos < EdgePartition.this.size && srcIds(pos) == srcId
+      val scanCluster =
+        if (activeness == EdgeActiveness.Neither) true
+        else if (activeness == EdgeActiveness.SrcOnly) isActive(clusterSrcId)
+        else if (activeness == EdgeActiveness.DstOnly) true
+        else if (activeness == EdgeActiveness.Both) isActive(clusterSrcId)
+        else if (activeness == EdgeActiveness.Either) true
+        else throw new Exception("unreachable")
+
+      if (scanCluster) {
+        var pos = clusterPos
+        val srcAttr =
+          if (tripletFields.useSrc) vertexAttrs(clusterLocalSrcId) else null.asInstanceOf[VD]
+        ctx.setSrcOnly(clusterSrcId, clusterLocalSrcId, srcAttr)
+        while (pos < size && localSrcIds(pos) == clusterLocalSrcId) {
+          val localDstId = localDstIds(pos)
+          val dstId = local2global(localDstId)
+          val edgeIsActive =
+            if (activeness == EdgeActiveness.Neither) true
+            else if (activeness == EdgeActiveness.SrcOnly) true
+            else if (activeness == EdgeActiveness.DstOnly) isActive(dstId)
+            else if (activeness == EdgeActiveness.Both) isActive(dstId)
+            else if (activeness == EdgeActiveness.Either) isActive(clusterSrcId) || isActive(dstId)
+            else throw new Exception("unreachable")
+          if (edgeIsActive) {
+            val dstAttr =
+              if (tripletFields.useDst) vertexAttrs(localDstId) else null.asInstanceOf[VD]
+            ctx.setRest(dstId, localDstId, dstAttr, data(pos))
+            sendMsg(ctx)
+          }
+          pos += 1
+        }
+      }
     }
 
-    override def next(): Edge[ED] = {
-      assert(srcIds(pos) == srcId)
-      edge.srcId = srcIds(pos)
-      edge.dstId = dstIds(pos)
-      edge.attr = data(pos)
-      pos += 1
-      edge
+    bitset.iterator.map { localId => (local2global(localId), aggregates(localId)) }
+  }
+}
+
+private class AggregatingEdgeContext[VD, ED, A](
+    mergeMsg: (A, A) => A,
+    aggregates: Array[A],
+    bitset: BitSet)
+  extends EdgeContext[VD, ED, A] {
+
+  private[this] var _srcId: VertexId = _
+  private[this] var _dstId: VertexId = _
+  private[this] var _localSrcId: Int = _
+  private[this] var _localDstId: Int = _
+  private[this] var _srcAttr: VD = _
+  private[this] var _dstAttr: VD = _
+  private[this] var _attr: ED = _
+
+  def set(
+      srcId: VertexId, dstId: VertexId,
+      localSrcId: Int, localDstId: Int,
+      srcAttr: VD, dstAttr: VD,
+      attr: ED) {
+    _srcId = srcId
+    _dstId = dstId
+    _localSrcId = localSrcId
+    _localDstId = localDstId
+    _srcAttr = srcAttr
+    _dstAttr = dstAttr
+    _attr = attr
+  }
+
+  def setSrcOnly(srcId: VertexId, localSrcId: Int, srcAttr: VD) {
+    _srcId = srcId
+    _localSrcId = localSrcId
+    _srcAttr = srcAttr
+  }
+
+  def setRest(dstId: VertexId, localDstId: Int, dstAttr: VD, attr: ED) {
+    _dstId = dstId
+    _localDstId = localDstId
+    _dstAttr = dstAttr
+    _attr = attr
+  }
+
+  override def srcId = _srcId
+  override def dstId = _dstId
+  override def srcAttr = _srcAttr
+  override def dstAttr = _dstAttr
+  override def attr = _attr
+
+  override def sendToSrc(msg: A) {
+    send(_localSrcId, msg)
+  }
+  override def sendToDst(msg: A) {
+    send(_localDstId, msg)
+  }
+
+  @inline private def send(localId: Int, msg: A) {
+    if (bitset.get(localId)) {
+      aggregates(localId) = mergeMsg(aggregates(localId), msg)
+    } else {
+      aggregates(localId) = msg
+      bitset.set(localId)
     }
   }
 }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
index 4520beb991515..b0cb0fe47d461 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
@@ -25,10 +25,11 @@ import org.apache.spark.util.collection.{BitSet, OpenHashSet, PrimitiveVector}
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
 
+/** Constructs an EdgePartition from scratch. */
 private[graphx]
 class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag, VD: ClassTag](
     size: Int = 64) {
-  var edges = new PrimitiveVector[Edge[ED]](size)
+  private[this] val edges = new PrimitiveVector[Edge[ED]](size)
 
   /** Add a new edge to the partition. */
   def add(src: VertexId, dst: VertexId, d: ED) {
@@ -38,19 +39,78 @@ class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag, VD: Cla
   def toEdgePartition: EdgePartition[ED, VD] = {
     val edgeArray = edges.trim().array
     Sorting.quickSort(edgeArray)(Edge.lexicographicOrdering)
-    val srcIds = new Array[VertexId](edgeArray.size)
-    val dstIds = new Array[VertexId](edgeArray.size)
+    val localSrcIds = new Array[Int](edgeArray.size)
+    val localDstIds = new Array[Int](edgeArray.size)
+    val data = new Array[ED](edgeArray.size)
+    val index = new GraphXPrimitiveKeyOpenHashMap[VertexId, Int]
+    val global2local = new GraphXPrimitiveKeyOpenHashMap[VertexId, Int]
+    val local2global = new PrimitiveVector[VertexId]
+    var vertexAttrs = Array.empty[VD]
+    // Copy edges into columnar structures, tracking the beginnings of source vertex id clusters and
+    // adding them to the index. Also populate a map from vertex id to a sequential local offset.
+    if (edgeArray.length > 0) {
+      index.update(edgeArray(0).srcId, 0)
+      var currSrcId: VertexId = edgeArray(0).srcId
+      var currLocalId = -1
+      var i = 0
+      while (i < edgeArray.size) {
+        val srcId = edgeArray(i).srcId
+        val dstId = edgeArray(i).dstId
+        localSrcIds(i) = global2local.changeValue(srcId,
+          { currLocalId += 1; local2global += srcId; currLocalId }, identity)
+        localDstIds(i) = global2local.changeValue(dstId,
+          { currLocalId += 1; local2global += dstId; currLocalId }, identity)
+        data(i) = edgeArray(i).attr
+        if (srcId != currSrcId) {
+          currSrcId = srcId
+          index.update(currSrcId, i)
+        }
+
+        i += 1
+      }
+      vertexAttrs = new Array[VD](currLocalId + 1)
+    }
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global.trim().array, vertexAttrs,
+      None)
+  }
+}
+
+/**
+ * Constructs an EdgePartition from an existing EdgePartition with the same vertex set. This enables
+ * reuse of the local vertex ids. Intended for internal use in EdgePartition only.
+ */
+private[impl]
+class ExistingEdgePartitionBuilder[
+    @specialized(Long, Int, Double) ED: ClassTag, VD: ClassTag](
+    global2local: GraphXPrimitiveKeyOpenHashMap[VertexId, Int],
+    local2global: Array[VertexId],
+    vertexAttrs: Array[VD],
+    activeSet: Option[VertexSet],
+    size: Int = 64) {
+  private[this] val edges = new PrimitiveVector[EdgeWithLocalIds[ED]](size)
+
+  /** Add a new edge to the partition. */
+  def add(src: VertexId, dst: VertexId, localSrc: Int, localDst: Int, d: ED) {
+    edges += EdgeWithLocalIds(src, dst, localSrc, localDst, d)
+  }
+
+  def toEdgePartition: EdgePartition[ED, VD] = {
+    val edgeArray = edges.trim().array
+    Sorting.quickSort(edgeArray)(EdgeWithLocalIds.lexicographicOrdering)
+    val localSrcIds = new Array[Int](edgeArray.size)
+    val localDstIds = new Array[Int](edgeArray.size)
     val data = new Array[ED](edgeArray.size)
     val index = new GraphXPrimitiveKeyOpenHashMap[VertexId, Int]
     // Copy edges into columnar structures, tracking the beginnings of source vertex id clusters and
     // adding them to the index
     if (edgeArray.length > 0) {
-      index.update(srcIds(0), 0)
-      var currSrcId: VertexId = srcIds(0)
+      index.update(edgeArray(0).srcId, 0)
+      var currSrcId: VertexId = edgeArray(0).srcId
       var i = 0
       while (i < edgeArray.size) {
-        srcIds(i) = edgeArray(i).srcId
-        dstIds(i) = edgeArray(i).dstId
+        localSrcIds(i) = edgeArray(i).localSrcId
+        localDstIds(i) = edgeArray(i).localDstId
         data(i) = edgeArray(i).attr
         if (edgeArray(i).srcId != currSrcId) {
           currSrcId = edgeArray(i).srcId
@@ -60,13 +120,24 @@ class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag, VD: Cla
       }
     }
 
-    // Create and populate a VertexPartition with vids from the edges, but no attributes
-    val vidsIter = srcIds.iterator ++ dstIds.iterator
-    val vertexIds = new OpenHashSet[VertexId]
-    vidsIter.foreach(vid => vertexIds.add(vid))
-    val vertices = new VertexPartition(
-      vertexIds, new Array[VD](vertexIds.capacity), vertexIds.getBitSet)
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global, vertexAttrs, activeSet)
+  }
+}
 
-    new EdgePartition(srcIds, dstIds, data, index, vertices)
+private[impl] case class EdgeWithLocalIds[@specialized ED](
+    srcId: VertexId, dstId: VertexId, localSrcId: Int, localDstId: Int, attr: ED)
+
+private[impl] object EdgeWithLocalIds {
+  implicit def lexicographicOrdering[ED] = new Ordering[EdgeWithLocalIds[ED]] {
+    override def compare(a: EdgeWithLocalIds[ED], b: EdgeWithLocalIds[ED]): Int = {
+      if (a.srcId == b.srcId) {
+        if (a.dstId == b.dstId) 0
+        else if (a.dstId < b.dstId) -1
+        else 1
+      } else if (a.srcId < b.srcId) -1
+      else 1
+    }
   }
+
 }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
new file mode 100644
index 0000000000000..4100a85d17ee3
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.{OneToOneDependency, Partition, Partitioner, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+
+import org.apache.spark.graphx._
+
+class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] (
+    override val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])],
+    val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
+  extends EdgeRDD[ED, VD](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+
+  override def setName(_name: String): this.type = {
+    if (partitionsRDD.name != null) {
+      partitionsRDD.setName(partitionsRDD.name + ", " + _name)
+    } else {
+      partitionsRDD.setName(_name)
+    }
+    this
+  }
+  setName("EdgeRDD")
+
+  /**
+   * If `partitionsRDD` already has a partitioner, use it. Otherwise assume that the
+   * [[PartitionID]]s in `partitionsRDD` correspond to the actual partitions and create a new
+   * partitioner that allows co-partitioning with `partitionsRDD`.
+   */
+  override val partitioner =
+    partitionsRDD.partitioner.orElse(Some(Partitioner.defaultPartitioner(partitionsRDD)))
+
+  override def collect(): Array[Edge[ED]] = this.map(_.copy()).collect()
+
+  /**
+   * Persists the edge partitions at the specified storage level, ignoring any existing target
+   * storage level.
+   */
+  override def persist(newLevel: StorageLevel): this.type = {
+    partitionsRDD.persist(newLevel)
+    this
+  }
+
+  override def unpersist(blocking: Boolean = true): this.type = {
+    partitionsRDD.unpersist(blocking)
+    this
+  }
+
+  /** Persists the edge partitions using `targetStorageLevel`, which defaults to MEMORY_ONLY. */
+  override def cache(): this.type = {
+    partitionsRDD.persist(targetStorageLevel)
+    this
+  }
+
+  /** The number of edges in the RDD. */
+  override def count(): Long = {
+    partitionsRDD.map(_._2.size.toLong).reduce(_ + _)
+  }
+
+  override def mapValues[ED2: ClassTag](f: Edge[ED] => ED2): EdgeRDD[ED2, VD] =
+    mapEdgePartitions((pid, part) => part.map(f))
+
+  override def reverse: EdgeRDD[ED, VD] = mapEdgePartitions((pid, part) => part.reverse)
+
+  override def filter(
+      epred: EdgeTriplet[VD, ED] => Boolean,
+      vpred: (VertexId, VD) => Boolean): EdgeRDD[ED, VD] = {
+    mapEdgePartitions((pid, part) => part.filter(epred, vpred))
+  }
+
+  override def innerJoin[ED2: ClassTag, ED3: ClassTag]
+      (other: EdgeRDD[ED2, _])
+      (f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3, VD] = {
+    val ed2Tag = classTag[ED2]
+    val ed3Tag = classTag[ED3]
+    this.withPartitionsRDD[ED3, VD](partitionsRDD.zipPartitions(other.partitionsRDD, true) {
+      (thisIter, otherIter) =>
+        val (pid, thisEPart) = thisIter.next()
+        val (_, otherEPart) = otherIter.next()
+        Iterator(Tuple2(pid, thisEPart.innerJoin(otherEPart)(f)(ed2Tag, ed3Tag)))
+    })
+  }
+
+  override private[graphx] def mapEdgePartitions[ED2: ClassTag, VD2: ClassTag](
+      f: (PartitionID, EdgePartition[ED, VD]) => EdgePartition[ED2, VD2]): EdgeRDD[ED2, VD2] = {
+    this.withPartitionsRDD[ED2, VD2](partitionsRDD.mapPartitions({ iter =>
+      if (iter.hasNext) {
+        val (pid, ep) = iter.next()
+        Iterator(Tuple2(pid, f(pid, ep)))
+      } else {
+        Iterator.empty
+      }
+    }, preservesPartitioning = true))
+  }
+
+  override private[graphx] def withPartitionsRDD[ED2: ClassTag, VD2: ClassTag](
+      partitionsRDD: RDD[(PartitionID, EdgePartition[ED2, VD2])]): EdgeRDD[ED2, VD2] = {
+    new EdgeRDDImpl(partitionsRDD, this.targetStorageLevel)
+  }
+
+  override private[graphx] def withTargetStorageLevel(
+      targetStorageLevel: StorageLevel): EdgeRDD[ED, VD] = {
+    new EdgeRDDImpl(this.partitionsRDD, targetStorageLevel)
+  }
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
deleted file mode 100644
index 56f79a7097fce..0000000000000
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graphx.impl
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.graphx._
-import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
-
-/**
- * The Iterator type returned when constructing edge triplets. This could be an anonymous class in
- * EdgePartition.tripletIterator, but we name it here explicitly so it is easier to debug / profile.
- */
-private[impl]
-class EdgeTripletIterator[VD: ClassTag, ED: ClassTag](
-    val edgePartition: EdgePartition[ED, VD],
-    val includeSrc: Boolean,
-    val includeDst: Boolean)
-  extends Iterator[EdgeTriplet[VD, ED]] {
-
-  // Current position in the array.
-  private var pos = 0
-
-  override def hasNext: Boolean = pos < edgePartition.size
-
-  override def next() = {
-    val triplet = new EdgeTriplet[VD, ED]
-    triplet.srcId = edgePartition.srcIds(pos)
-    if (includeSrc) {
-      triplet.srcAttr = edgePartition.vertices(triplet.srcId)
-    }
-    triplet.dstId = edgePartition.dstIds(pos)
-    if (includeDst) {
-      triplet.dstAttr = edgePartition.vertices(triplet.dstId)
-    }
-    triplet.attr = edgePartition.data(pos)
-    pos += 1
-    triplet
-  }
-}
-
-/**
- * An Iterator type for internal use that reuses EdgeTriplet objects. This could be an anonymous
- * class in EdgePartition.upgradeIterator, but we name it here explicitly so it is easier to debug /
- * profile.
- */
-private[impl]
-class ReusingEdgeTripletIterator[VD: ClassTag, ED: ClassTag](
-    val edgeIter: Iterator[Edge[ED]],
-    val edgePartition: EdgePartition[ED, VD],
-    val includeSrc: Boolean,
-    val includeDst: Boolean)
-  extends Iterator[EdgeTriplet[VD, ED]] {
-
-  private val triplet = new EdgeTriplet[VD, ED]
-
-  override def hasNext = edgeIter.hasNext
-
-  override def next() = {
-    triplet.set(edgeIter.next())
-    if (includeSrc) {
-      triplet.srcAttr = edgePartition.vertices(triplet.srcId)
-    }
-    if (includeDst) {
-      triplet.dstAttr = edgePartition.vertices(triplet.dstId)
-    }
-    triplet
-  }
-}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index ccdaa82eb9162..2b4636a6c6ddf 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -23,10 +23,8 @@ import org.apache.spark.HashPartitioner
 import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.{RDD, ShuffledRDD}
 import org.apache.spark.storage.StorageLevel
-
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.impl.GraphImpl._
-import org.apache.spark.graphx.impl.MsgRDDFunctions._
 import org.apache.spark.graphx.util.BytecodeUtils
 
 
@@ -83,15 +81,13 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
     val vdTag = classTag[VD]
     val newEdges = edges.withPartitionsRDD(edges.map { e =>
       val part: PartitionID = partitionStrategy.getPartition(e.srcId, e.dstId, numPartitions)
-
-      // Should we be using 3-tuple or an optimized class
-      new MessageToPartition(part, (e.srcId, e.dstId, e.attr))
+      (part, (e.srcId, e.dstId, e.attr))
     }
       .partitionBy(new HashPartitioner(numPartitions))
       .mapPartitionsWithIndex( { (pid, iter) =>
         val builder = new EdgePartitionBuilder[ED, VD]()(edTag, vdTag)
         iter.foreach { message =>
-          val data = message.data
+          val data = message._2
           builder.add(data._1, data._2, data._3)
         }
         val edgePartition = builder.toEdgePartition
@@ -130,13 +126,12 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
   }
 
   override def mapTriplets[ED2: ClassTag](
-      f: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2]): Graph[VD, ED2] = {
+      f: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2],
+      tripletFields: TripletFields): Graph[VD, ED2] = {
     vertices.cache()
-    val mapUsesSrcAttr = accessesVertexAttr(f, "srcAttr")
-    val mapUsesDstAttr = accessesVertexAttr(f, "dstAttr")
-    replicatedVertexView.upgrade(vertices, mapUsesSrcAttr, mapUsesDstAttr)
+    replicatedVertexView.upgrade(vertices, tripletFields.useSrc, tripletFields.useDst)
     val newEdges = replicatedVertexView.edges.mapEdgePartitions { (pid, part) =>
-      part.map(f(pid, part.tripletIterator(mapUsesSrcAttr, mapUsesDstAttr)))
+      part.map(f(pid, part.tripletIterator(tripletFields.useSrc, tripletFields.useDst)))
     }
     new GraphImpl(vertices, replicatedVertexView.withEdges(newEdges))
   }
@@ -174,15 +169,38 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
   override def mapReduceTriplets[A: ClassTag](
       mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
       reduceFunc: (A, A) => A,
-      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None): VertexRDD[A] = {
+      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)]): VertexRDD[A] = {
+
+    def sendMsg(ctx: EdgeContext[VD, ED, A]) {
+      mapFunc(ctx.toEdgeTriplet).foreach { kv =>
+        val id = kv._1
+        val msg = kv._2
+        if (id == ctx.srcId) {
+          ctx.sendToSrc(msg)
+        } else {
+          assert(id == ctx.dstId)
+          ctx.sendToDst(msg)
+        }
+      }
+    }
 
-    vertices.cache()
+    val mapUsesSrcAttr = accessesVertexAttr(mapFunc, "srcAttr")
+    val mapUsesDstAttr = accessesVertexAttr(mapFunc, "dstAttr")
+    val tripletFields = new TripletFields(mapUsesSrcAttr, mapUsesDstAttr, true)
 
+    aggregateMessagesWithActiveSet(sendMsg, reduceFunc, tripletFields, activeSetOpt)
+  }
+
+  override def aggregateMessagesWithActiveSet[A: ClassTag](
+      sendMsg: EdgeContext[VD, ED, A] => Unit,
+      mergeMsg: (A, A) => A,
+      tripletFields: TripletFields,
+      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)]): VertexRDD[A] = {
+
+    vertices.cache()
     // For each vertex, replicate its attribute only to partitions where it is
     // in the relevant position in an edge.
-    val mapUsesSrcAttr = accessesVertexAttr(mapFunc, "srcAttr")
-    val mapUsesDstAttr = accessesVertexAttr(mapFunc, "dstAttr")
-    replicatedVertexView.upgrade(vertices, mapUsesSrcAttr, mapUsesDstAttr)
+    replicatedVertexView.upgrade(vertices, tripletFields.useSrc, tripletFields.useDst)
     val view = activeSetOpt match {
       case Some((activeSet, _)) =>
         replicatedVertexView.withActiveSet(activeSet)
@@ -196,42 +214,40 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
       case (pid, edgePartition) =>
         // Choose scan method
         val activeFraction = edgePartition.numActives.getOrElse(0) / edgePartition.indexSize.toFloat
-        val edgeIter = activeDirectionOpt match {
+        activeDirectionOpt match {
           case Some(EdgeDirection.Both) =>
             if (activeFraction < 0.8) {
-              edgePartition.indexIterator(srcVertexId => edgePartition.isActive(srcVertexId))
-                .filter(e => edgePartition.isActive(e.dstId))
+              edgePartition.aggregateMessagesIndexScan(sendMsg, mergeMsg, tripletFields,
+                EdgeActiveness.Both)
             } else {
-              edgePartition.iterator.filter(e =>
-                edgePartition.isActive(e.srcId) && edgePartition.isActive(e.dstId))
+              edgePartition.aggregateMessagesEdgeScan(sendMsg, mergeMsg, tripletFields,
+                EdgeActiveness.Both)
             }
           case Some(EdgeDirection.Either) =>
             // TODO: Because we only have a clustered index on the source vertex ID, we can't filter
             // the index here. Instead we have to scan all edges and then do the filter.
-            edgePartition.iterator.filter(e =>
-              edgePartition.isActive(e.srcId) || edgePartition.isActive(e.dstId))
+            edgePartition.aggregateMessagesEdgeScan(sendMsg, mergeMsg, tripletFields,
+              EdgeActiveness.Either)
           case Some(EdgeDirection.Out) =>
             if (activeFraction < 0.8) {
-              edgePartition.indexIterator(srcVertexId => edgePartition.isActive(srcVertexId))
+              edgePartition.aggregateMessagesIndexScan(sendMsg, mergeMsg, tripletFields,
+                EdgeActiveness.SrcOnly)
             } else {
-              edgePartition.iterator.filter(e => edgePartition.isActive(e.srcId))
+              edgePartition.aggregateMessagesEdgeScan(sendMsg, mergeMsg, tripletFields,
+                EdgeActiveness.SrcOnly)
             }
           case Some(EdgeDirection.In) =>
-            edgePartition.iterator.filter(e => edgePartition.isActive(e.dstId))
+            edgePartition.aggregateMessagesEdgeScan(sendMsg, mergeMsg, tripletFields,
+              EdgeActiveness.DstOnly)
           case _ => // None
-            edgePartition.iterator
+            edgePartition.aggregateMessagesEdgeScan(sendMsg, mergeMsg, tripletFields,
+              EdgeActiveness.Neither)
         }
-
-        // Scan edges and run the map function
-        val mapOutputs = edgePartition.upgradeIterator(edgeIter, mapUsesSrcAttr, mapUsesDstAttr)
-          .flatMap(mapFunc(_))
-        // Note: This doesn't allow users to send messages to arbitrary vertices.
-        edgePartition.vertices.aggregateUsingIndex(mapOutputs, reduceFunc).iterator
-    }).setName("GraphImpl.mapReduceTriplets - preAgg")
+    }).setName("GraphImpl.aggregateMessages - preAgg")
 
     // do the final reduction reusing the index map
-    vertices.aggregateUsingIndex(preAgg, reduceFunc)
-  } // end of mapReduceTriplets
+    vertices.aggregateUsingIndex(preAgg, mergeMsg)
+  }
 
   override def outerJoinVertices[U: ClassTag, VD2: ClassTag]
       (other: RDD[(VertexId, U)])
@@ -309,9 +325,7 @@ object GraphImpl {
       vertices: VertexRDD[VD],
       edges: EdgeRDD[ED, _]): GraphImpl[VD, ED] = {
     // Convert the vertex partitions in edges to the correct type
-    val newEdges = edges.mapEdgePartitions(
-      (pid, part) => part.withVertices(part.vertices.map(
-        (vid, attr) => null.asInstanceOf[VD])))
+    val newEdges = edges.mapEdgePartitions((pid, part) => part.withoutVertexAttributes[VD])
     GraphImpl.fromExistingRDDs(vertices, newEdges)
   }
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
deleted file mode 100644
index d85afa45b1264..0000000000000
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graphx.impl
-
-import scala.language.implicitConversions
-import scala.reflect.{classTag, ClassTag}
-
-import org.apache.spark.Partitioner
-import org.apache.spark.graphx.{PartitionID, VertexId}
-import org.apache.spark.rdd.{ShuffledRDD, RDD}
-
-
-private[graphx]
-class VertexBroadcastMsg[@specialized(Int, Long, Double, Boolean) T](
-    @transient var partition: PartitionID,
-    var vid: VertexId,
-    var data: T)
-  extends Product2[PartitionID, (VertexId, T)] with Serializable {
-
-  override def _1 = partition
-
-  override def _2 = (vid, data)
-
-  override def canEqual(that: Any): Boolean = that.isInstanceOf[VertexBroadcastMsg[_]]
-}
-
-
-/**
- * A message used to send a specific value to a partition.
- * @param partition index of the target partition.
- * @param data value to send
- */
-private[graphx]
-class MessageToPartition[@specialized(Int, Long, Double, Char, Boolean/* , AnyRef */) T](
-    @transient var partition: PartitionID,
-    var data: T)
-  extends Product2[PartitionID, T] with Serializable {
-
-  override def _1 = partition
-
-  override def _2 = data
-
-  override def canEqual(that: Any): Boolean = that.isInstanceOf[MessageToPartition[_]]
-}
-
-
-private[graphx]
-class VertexBroadcastMsgRDDFunctions[T: ClassTag](self: RDD[VertexBroadcastMsg[T]]) {
-  def partitionBy(partitioner: Partitioner): RDD[VertexBroadcastMsg[T]] = {
-    val rdd = new ShuffledRDD[PartitionID, (VertexId, T), (VertexId, T), VertexBroadcastMsg[T]](
-      self, partitioner)
-
-    // Set a custom serializer if the data is of int or double type.
-    if (classTag[T] == ClassTag.Int) {
-      rdd.setSerializer(new IntVertexBroadcastMsgSerializer)
-    } else if (classTag[T] == ClassTag.Long) {
-      rdd.setSerializer(new LongVertexBroadcastMsgSerializer)
-    } else if (classTag[T] == ClassTag.Double) {
-      rdd.setSerializer(new DoubleVertexBroadcastMsgSerializer)
-    }
-    rdd
-  }
-}
-
-
-private[graphx]
-class MsgRDDFunctions[T: ClassTag](self: RDD[MessageToPartition[T]]) {
-
-  /**
-   * Return a copy of the RDD partitioned using the specified partitioner.
-   */
-  def partitionBy(partitioner: Partitioner): RDD[MessageToPartition[T]] = {
-    new ShuffledRDD[PartitionID, T, T, MessageToPartition[T]](self, partitioner)
-  }
-
-}
-
-private[graphx]
-object MsgRDDFunctions {
-  implicit def rdd2PartitionRDDFunctions[T: ClassTag](rdd: RDD[MessageToPartition[T]]) = {
-    new MsgRDDFunctions(rdd)
-  }
-
-  implicit def rdd2vertexMessageRDDFunctions[T: ClassTag](rdd: RDD[VertexBroadcastMsg[T]]) = {
-    new VertexBroadcastMsgRDDFunctions(rdd)
-  }
-}
-
-private[graphx]
-class VertexRDDFunctions[VD: ClassTag](self: RDD[(VertexId, VD)]) {
-  def copartitionWithVertices(partitioner: Partitioner): RDD[(VertexId, VD)] = {
-    val rdd = new ShuffledRDD[VertexId, VD, VD, (VertexId, VD)](self, partitioner)
-
-    // Set a custom serializer if the data is of int or double type.
-    if (classTag[VD] == ClassTag.Int) {
-      rdd.setSerializer(new IntAggMsgSerializer)
-    } else if (classTag[VD] == ClassTag.Long) {
-      rdd.setSerializer(new LongAggMsgSerializer)
-    } else if (classTag[VD] == ClassTag.Double) {
-      rdd.setSerializer(new DoubleAggMsgSerializer)
-    }
-    rdd
-  }
-}
-
-private[graphx]
-object VertexRDDFunctions {
-  implicit def rdd2VertexRDDFunctions[VD: ClassTag](rdd: RDD[(VertexId, VD)]) = {
-    new VertexRDDFunctions(rdd)
-  }
-}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
index 3827ac8d0fd6a..eb3c997e0f3c0 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
@@ -27,41 +27,27 @@ import org.apache.spark.util.collection.{BitSet, PrimitiveVector}
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
 
-/**
- * A message from the edge partition `pid` to the vertex partition containing `vid` specifying that
- * the edge partition references `vid` in the specified `position` (src, dst, or both).
-*/
-private[graphx]
-class RoutingTableMessage(
-    var vid: VertexId,
-    var pid: PartitionID,
-    var position: Byte)
-  extends Product2[VertexId, (PartitionID, Byte)] with Serializable {
-  override def _1 = vid
-  override def _2 = (pid, position)
-  override def canEqual(that: Any): Boolean = that.isInstanceOf[RoutingTableMessage]
-}
-
-private[graphx]
-class RoutingTableMessageRDDFunctions(self: RDD[RoutingTableMessage]) {
-  /** Copartition an `RDD[RoutingTableMessage]` with the vertex RDD with the given `partitioner`. */
-  def copartitionWithVertices(partitioner: Partitioner): RDD[RoutingTableMessage] = {
-    new ShuffledRDD[VertexId, (PartitionID, Byte), (PartitionID, Byte), RoutingTableMessage](
-      self, partitioner).setSerializer(new RoutingTableMessageSerializer)
-  }
-}
+import org.apache.spark.graphx.impl.RoutingTablePartition.RoutingTableMessage
 
 private[graphx]
-object RoutingTableMessageRDDFunctions {
-  import scala.language.implicitConversions
+object RoutingTablePartition {
+  /**
+   * A message from an edge partition to a vertex specifying the position in which the edge
+   * partition references the vertex (src, dst, or both). The edge partition is encoded in the lower
+   * 30 bytes of the Int, and the position is encoded in the upper 2 bytes of the Int.
+   */
+  type RoutingTableMessage = (VertexId, Int)
 
-  implicit def rdd2RoutingTableMessageRDDFunctions(rdd: RDD[RoutingTableMessage]) = {
-    new RoutingTableMessageRDDFunctions(rdd)
+  private def toMessage(vid: VertexId, pid: PartitionID, position: Byte): RoutingTableMessage = {
+    val positionUpper2 = position << 30
+    val pidLower30 = pid & 0x3FFFFFFF
+    (vid, positionUpper2 | pidLower30)
   }
-}
 
-private[graphx]
-object RoutingTablePartition {
+  private def vidFromMessage(msg: RoutingTableMessage): VertexId = msg._1
+  private def pidFromMessage(msg: RoutingTableMessage): PartitionID = msg._2 & 0x3FFFFFFF
+  private def positionFromMessage(msg: RoutingTableMessage): Byte = (msg._2 >> 30).toByte
+
   val empty: RoutingTablePartition = new RoutingTablePartition(Array.empty)
 
   /** Generate a `RoutingTableMessage` for each vertex referenced in `edgePartition`. */
@@ -70,14 +56,14 @@ object RoutingTablePartition {
     // Determine which positions each vertex id appears in using a map where the low 2 bits
     // represent src and dst
     val map = new GraphXPrimitiveKeyOpenHashMap[VertexId, Byte]
-    edgePartition.srcIds.iterator.foreach { srcId =>
-      map.changeValue(srcId, 0x1, (b: Byte) => (b | 0x1).toByte)
-    }
-    edgePartition.dstIds.iterator.foreach { dstId =>
-      map.changeValue(dstId, 0x2, (b: Byte) => (b | 0x2).toByte)
+    edgePartition.iterator.foreach { e =>
+      map.changeValue(e.srcId, 0x1, (b: Byte) => (b | 0x1).toByte)
+      map.changeValue(e.dstId, 0x2, (b: Byte) => (b | 0x2).toByte)
     }
     map.iterator.map { vidAndPosition =>
-      new RoutingTableMessage(vidAndPosition._1, pid, vidAndPosition._2)
+      val vid = vidAndPosition._1
+      val position = vidAndPosition._2
+      toMessage(vid, pid, position)
     }
   }
 
@@ -88,9 +74,12 @@ object RoutingTablePartition {
     val srcFlags = Array.fill(numEdgePartitions)(new PrimitiveVector[Boolean])
     val dstFlags = Array.fill(numEdgePartitions)(new PrimitiveVector[Boolean])
     for (msg <- iter) {
-      pid2vid(msg.pid) += msg.vid
-      srcFlags(msg.pid) += (msg.position & 0x1) != 0
-      dstFlags(msg.pid) += (msg.position & 0x2) != 0
+      val vid = vidFromMessage(msg)
+      val pid = pidFromMessage(msg)
+      val position = positionFromMessage(msg)
+      pid2vid(pid) += vid
+      srcFlags(pid) += (position & 0x1) != 0
+      dstFlags(pid) += (position & 0x2) != 0
     }
 
     new RoutingTablePartition(pid2vid.zipWithIndex.map {
@@ -119,7 +108,7 @@ object RoutingTablePartition {
  */
 private[graphx]
 class RoutingTablePartition(
-    private val routingTable: Array[(Array[VertexId], BitSet, BitSet)]) {
+    private val routingTable: Array[(Array[VertexId], BitSet, BitSet)]) extends Serializable {
   /** The maximum number of edge partitions this `RoutingTablePartition` is built to join with. */
   val numEdgePartitions: Int = routingTable.size
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
deleted file mode 100644
index 033237f597216..0000000000000
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graphx.impl
-
-import scala.language.existentials
-
-import java.io.{EOFException, InputStream, OutputStream}
-import java.nio.ByteBuffer
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.graphx._
-import org.apache.spark.serializer._
-
-private[graphx]
-class RoutingTableMessageSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream): SerializationStream =
-      new ShuffleSerializationStream(s) {
-        def writeObject[T: ClassTag](t: T): SerializationStream = {
-          val msg = t.asInstanceOf[RoutingTableMessage]
-          writeVarLong(msg.vid, optimizePositive = false)
-          writeUnsignedVarInt(msg.pid)
-          // TODO: Write only the bottom two bits of msg.position
-          s.write(msg.position)
-          this
-        }
-      }
-
-    override def deserializeStream(s: InputStream): DeserializationStream =
-      new ShuffleDeserializationStream(s) {
-        override def readObject[T: ClassTag](): T = {
-          val a = readVarLong(optimizePositive = false)
-          val b = readUnsignedVarInt()
-          val c = s.read()
-          if (c == -1) throw new EOFException
-          new RoutingTableMessage(a, b, c.toByte).asInstanceOf[T]
-        }
-      }
-  }
-}
-
-private[graphx]
-class VertexIdMsgSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T: ClassTag](t: T) = {
-        val msg = t.asInstanceOf[(VertexId, _)]
-        writeVarLong(msg._1, optimizePositive = false)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T: ClassTag](): T = {
-        (readVarLong(optimizePositive = false), null).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for VertexBroadcastMessage[Int]. */
-private[graphx]
-class IntVertexBroadcastMsgSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T: ClassTag](t: T) = {
-        val msg = t.asInstanceOf[VertexBroadcastMsg[Int]]
-        writeVarLong(msg.vid, optimizePositive = false)
-        writeInt(msg.data)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T: ClassTag](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readInt()
-        new VertexBroadcastMsg[Int](0, a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for VertexBroadcastMessage[Long]. */
-private[graphx]
-class LongVertexBroadcastMsgSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T: ClassTag](t: T) = {
-        val msg = t.asInstanceOf[VertexBroadcastMsg[Long]]
-        writeVarLong(msg.vid, optimizePositive = false)
-        writeLong(msg.data)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T: ClassTag](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readLong()
-        new VertexBroadcastMsg[Long](0, a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for VertexBroadcastMessage[Double]. */
-private[graphx]
-class DoubleVertexBroadcastMsgSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T: ClassTag](t: T) = {
-        val msg = t.asInstanceOf[VertexBroadcastMsg[Double]]
-        writeVarLong(msg.vid, optimizePositive = false)
-        writeDouble(msg.data)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      def readObject[T: ClassTag](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readDouble()
-        new VertexBroadcastMsg[Double](0, a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for AggregationMessage[Int]. */
-private[graphx]
-class IntAggMsgSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T: ClassTag](t: T) = {
-        val msg = t.asInstanceOf[(VertexId, Int)]
-        writeVarLong(msg._1, optimizePositive = false)
-        writeUnsignedVarInt(msg._2)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T: ClassTag](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readUnsignedVarInt()
-        (a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for AggregationMessage[Long]. */
-private[graphx]
-class LongAggMsgSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T: ClassTag](t: T) = {
-        val msg = t.asInstanceOf[(VertexId, Long)]
-        writeVarLong(msg._1, optimizePositive = false)
-        writeVarLong(msg._2, optimizePositive = true)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T: ClassTag](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readVarLong(optimizePositive = true)
-        (a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for AggregationMessage[Double]. */
-private[graphx]
-class DoubleAggMsgSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T: ClassTag](t: T) = {
-        val msg = t.asInstanceOf[(VertexId, Double)]
-        writeVarLong(msg._1, optimizePositive = false)
-        writeDouble(msg._2)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      def readObject[T: ClassTag](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readDouble()
-        (a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Helper classes to shorten the implementation of those special serializers.
-////////////////////////////////////////////////////////////////////////////////
-
-private[graphx]
-abstract class ShuffleSerializationStream(s: OutputStream) extends SerializationStream {
-  // The implementation should override this one.
-  def writeObject[T: ClassTag](t: T): SerializationStream
-
-  def writeInt(v: Int) {
-    s.write(v >> 24)
-    s.write(v >> 16)
-    s.write(v >> 8)
-    s.write(v)
-  }
-
-  def writeUnsignedVarInt(value: Int) {
-    if ((value >>> 7) == 0) {
-      s.write(value.toInt)
-    } else if ((value >>> 14) == 0) {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7)
-    } else if ((value >>> 21) == 0) {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7 | 0x80)
-      s.write(value >>> 14)
-    } else if ((value >>> 28) == 0) {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7 | 0x80)
-      s.write(value >>> 14 | 0x80)
-      s.write(value >>> 21)
-    } else {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7 | 0x80)
-      s.write(value >>> 14 | 0x80)
-      s.write(value >>> 21 | 0x80)
-      s.write(value >>> 28)
-    }
-  }
-
-  def writeVarLong(value: Long, optimizePositive: Boolean) {
-    val v = if (!optimizePositive) (value << 1) ^ (value >> 63) else value
-    if ((v >>> 7) == 0) {
-      s.write(v.toInt)
-    } else if ((v >>> 14) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7).toInt)
-    } else if ((v >>> 21) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14).toInt)
-    } else if ((v >>> 28) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21).toInt)
-    } else if ((v >>> 35) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28).toInt)
-    } else if ((v >>> 42) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35).toInt)
-    } else if ((v >>> 49) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35 | 0x80).toInt)
-      s.write((v >>> 42).toInt)
-    } else if ((v >>> 56) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35 | 0x80).toInt)
-      s.write((v >>> 42 | 0x80).toInt)
-      s.write((v >>> 49).toInt)
-    } else {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35 | 0x80).toInt)
-      s.write((v >>> 42 | 0x80).toInt)
-      s.write((v >>> 49 | 0x80).toInt)
-      s.write((v >>> 56).toInt)
-    }
-  }
-
-  def writeLong(v: Long) {
-    s.write((v >>> 56).toInt)
-    s.write((v >>> 48).toInt)
-    s.write((v >>> 40).toInt)
-    s.write((v >>> 32).toInt)
-    s.write((v >>> 24).toInt)
-    s.write((v >>> 16).toInt)
-    s.write((v >>> 8).toInt)
-    s.write(v.toInt)
-  }
-
-  def writeDouble(v: Double): Unit = writeLong(java.lang.Double.doubleToLongBits(v))
-
-  override def flush(): Unit = s.flush()
-
-  override def close(): Unit = s.close()
-}
-
-private[graphx]
-abstract class ShuffleDeserializationStream(s: InputStream) extends DeserializationStream {
-  // The implementation should override this one.
-  def readObject[T: ClassTag](): T
-
-  def readInt(): Int = {
-    val first = s.read()
-    if (first < 0) throw new EOFException
-    (first & 0xFF) << 24 | (s.read() & 0xFF) << 16 | (s.read() & 0xFF) << 8 | (s.read() & 0xFF)
-  }
-
-  def readUnsignedVarInt(): Int = {
-    var value: Int = 0
-    var i: Int = 0
-    def readOrThrow(): Int = {
-      val in = s.read()
-      if (in < 0) throw new EOFException
-      in & 0xFF
-    }
-    var b: Int = readOrThrow()
-    while ((b & 0x80) != 0) {
-      value |= (b & 0x7F) << i
-      i += 7
-      if (i > 35) throw new IllegalArgumentException("Variable length quantity is too long")
-      b = readOrThrow()
-    }
-    value | (b << i)
-  }
-
-  def readVarLong(optimizePositive: Boolean): Long = {
-    def readOrThrow(): Int = {
-      val in = s.read()
-      if (in < 0) throw new EOFException
-      in & 0xFF
-    }
-    var b = readOrThrow()
-    var ret: Long = b & 0x7F
-    if ((b & 0x80) != 0) {
-      b = readOrThrow()
-      ret |= (b & 0x7F) << 7
-      if ((b & 0x80) != 0) {
-        b = readOrThrow()
-        ret |= (b & 0x7F) << 14
-        if ((b & 0x80) != 0) {
-          b = readOrThrow()
-          ret |= (b & 0x7F) << 21
-          if ((b & 0x80) != 0) {
-            b = readOrThrow()
-            ret |= (b & 0x7F).toLong << 28
-            if ((b & 0x80) != 0) {
-              b = readOrThrow()
-              ret |= (b & 0x7F).toLong << 35
-              if ((b & 0x80) != 0) {
-                b = readOrThrow()
-                ret |= (b & 0x7F).toLong << 42
-                if ((b & 0x80) != 0) {
-                  b = readOrThrow()
-                  ret |= (b & 0x7F).toLong << 49
-                  if ((b & 0x80) != 0) {
-                    b = readOrThrow()
-                    ret |= b.toLong << 56
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-    if (!optimizePositive) (ret >>> 1) ^ -(ret & 1) else ret
-  }
-
-  def readLong(): Long = {
-    val first = s.read()
-    if (first < 0) throw new EOFException()
-    (first.toLong << 56) |
-      (s.read() & 0xFF).toLong << 48 |
-      (s.read() & 0xFF).toLong << 40 |
-      (s.read() & 0xFF).toLong << 32 |
-      (s.read() & 0xFF).toLong << 24 |
-      (s.read() & 0xFF) << 16 |
-      (s.read() & 0xFF) << 8 |
-      (s.read() & 0xFF)
-  }
-
-  def readDouble(): Double = java.lang.Double.longBitsToDouble(readLong())
-
-  override def close(): Unit = s.close()
-}
-
-private[graphx] sealed trait ShuffleSerializerInstance extends SerializerInstance {
-
-  override def serialize[T: ClassTag](t: T): ByteBuffer = throw new UnsupportedOperationException
-
-  override def deserialize[T: ClassTag](bytes: ByteBuffer): T =
-    throw new UnsupportedOperationException
-
-  override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T =
-    throw new UnsupportedOperationException
-
-  // The implementation should override the following two.
-  override def serializeStream(s: OutputStream): SerializationStream
-  override def deserializeStream(s: InputStream): DeserializationStream
-}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
index dca54b8a7da86..5412d720475dc 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
@@ -36,7 +36,7 @@ private[graphx]
 object ShippableVertexPartition {
   /** Construct a `ShippableVertexPartition` from the given vertices without any routing table. */
   def apply[VD: ClassTag](iter: Iterator[(VertexId, VD)]): ShippableVertexPartition[VD] =
-    apply(iter, RoutingTablePartition.empty, null.asInstanceOf[VD])
+    apply(iter, RoutingTablePartition.empty, null.asInstanceOf[VD], (a, b) => a)
 
   /**
    * Construct a `ShippableVertexPartition` from the given vertices with the specified routing
@@ -44,10 +44,28 @@ object ShippableVertexPartition {
    */
   def apply[VD: ClassTag](
       iter: Iterator[(VertexId, VD)], routingTable: RoutingTablePartition, defaultVal: VD)
-    : ShippableVertexPartition[VD] = {
-    val fullIter = iter ++ routingTable.iterator.map(vid => (vid, defaultVal))
-    val (index, values, mask) = VertexPartitionBase.initFrom(fullIter, (a: VD, b: VD) => a)
-    new ShippableVertexPartition(index, values, mask, routingTable)
+    : ShippableVertexPartition[VD] =
+    apply(iter, routingTable, defaultVal, (a, b) => a)
+
+  /**
+   * Construct a `ShippableVertexPartition` from the given vertices with the specified routing
+   * table, filling in missing vertices mentioned in the routing table using `defaultVal`,
+   * and merging duplicate vertex atrribute with mergeFunc.
+   */
+  def apply[VD: ClassTag](
+      iter: Iterator[(VertexId, VD)], routingTable: RoutingTablePartition, defaultVal: VD,
+      mergeFunc: (VD, VD) => VD): ShippableVertexPartition[VD] = {
+    val map = new GraphXPrimitiveKeyOpenHashMap[VertexId, VD]
+    // Merge the given vertices using mergeFunc
+    iter.foreach { pair =>
+      map.setMerge(pair._1, pair._2, mergeFunc)
+    }
+    // Fill in missing vertices mentioned in the routing table
+    routingTable.iterator.foreach { vid =>
+      map.changeValue(vid, defaultVal, identity)
+    }
+
+    new ShippableVertexPartition(map.keySet, map._values, map.keySet.getBitSet, routingTable)
   }
 
   import scala.language.implicitConversions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala
index 34939b24440aa..5ad6390a56c4f 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala
@@ -60,7 +60,8 @@ private[graphx] object VertexPartitionBase {
  * `VertexPartitionBaseOpsConstructor` typeclass (for example,
  * [[VertexPartition.VertexPartitionOpsConstructor]]).
  */
-private[graphx] abstract class VertexPartitionBase[@specialized(Long, Int, Double) VD: ClassTag] {
+private[graphx] abstract class VertexPartitionBase[@specialized(Long, Int, Double) VD: ClassTag]
+  extends Serializable {
 
   def index: VertexIdToIndexMap
   def values: Array[VD]
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
index a4f769b294010..b40aa1b417a0f 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
@@ -35,7 +35,7 @@ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
 private[graphx] abstract class VertexPartitionBaseOps
     [VD: ClassTag, Self[X] <: VertexPartitionBase[X] : VertexPartitionBaseOpsConstructor]
     (self: Self[VD])
-    extends Logging {
+  extends Serializable with Logging {
 
   def withIndex(index: VertexIdToIndexMap): Self[VD]
   def withValues[VD2: ClassTag](values: Array[VD2]): Self[VD2]
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
new file mode 100644
index 0000000000000..08405629bc052
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd._
+import org.apache.spark.storage.StorageLevel
+
+import org.apache.spark.graphx._
+
+class VertexRDDImpl[VD] private[graphx] (
+    val partitionsRDD: RDD[ShippableVertexPartition[VD]],
+    val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
+  (implicit override protected val vdTag: ClassTag[VD])
+  extends VertexRDD[VD](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+
+  require(partitionsRDD.partitioner.isDefined)
+
+  override def reindex(): VertexRDD[VD] = this.withPartitionsRDD(partitionsRDD.map(_.reindex()))
+
+  override val partitioner = partitionsRDD.partitioner
+
+  override protected def getPreferredLocations(s: Partition): Seq[String] =
+    partitionsRDD.preferredLocations(s)
+
+  override def setName(_name: String): this.type = {
+    if (partitionsRDD.name != null) {
+      partitionsRDD.setName(partitionsRDD.name + ", " + _name)
+    } else {
+      partitionsRDD.setName(_name)
+    }
+    this
+  }
+  setName("VertexRDD")
+
+  /**
+   * Persists the vertex partitions at the specified storage level, ignoring any existing target
+   * storage level.
+   */
+  override def persist(newLevel: StorageLevel): this.type = {
+    partitionsRDD.persist(newLevel)
+    this
+  }
+
+  override def unpersist(blocking: Boolean = true): this.type = {
+    partitionsRDD.unpersist(blocking)
+    this
+  }
+
+  /** Persists the vertex partitions at `targetStorageLevel`, which defaults to MEMORY_ONLY. */
+  override def cache(): this.type = {
+    partitionsRDD.persist(targetStorageLevel)
+    this
+  }
+
+  /** The number of vertices in the RDD. */
+  override def count(): Long = {
+    partitionsRDD.map(_.size).reduce(_ + _)
+  }
+
+  override private[graphx] def mapVertexPartitions[VD2: ClassTag](
+      f: ShippableVertexPartition[VD] => ShippableVertexPartition[VD2])
+    : VertexRDD[VD2] = {
+    val newPartitionsRDD = partitionsRDD.mapPartitions(_.map(f), preservesPartitioning = true)
+    this.withPartitionsRDD(newPartitionsRDD)
+  }
+
+  override def mapValues[VD2: ClassTag](f: VD => VD2): VertexRDD[VD2] =
+    this.mapVertexPartitions(_.map((vid, attr) => f(attr)))
+
+  override def mapValues[VD2: ClassTag](f: (VertexId, VD) => VD2): VertexRDD[VD2] =
+    this.mapVertexPartitions(_.map(f))
+
+  override def diff(other: VertexRDD[VD]): VertexRDD[VD] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.diff(otherPart))
+    }
+    this.withPartitionsRDD(newPartitionsRDD)
+  }
+
+  override def leftZipJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: VertexRDD[VD2])(f: (VertexId, VD, Option[VD2]) => VD3): VertexRDD[VD3] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.leftJoin(otherPart)(f))
+    }
+    this.withPartitionsRDD(newPartitionsRDD)
+  }
+
+  override def leftJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: RDD[(VertexId, VD2)])
+      (f: (VertexId, VD, Option[VD2]) => VD3)
+    : VertexRDD[VD3] = {
+    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
+    // If the other set is a VertexRDD then we use the much more efficient leftZipJoin
+    other match {
+      case other: VertexRDD[_] =>
+        leftZipJoin(other)(f)
+      case _ =>
+        this.withPartitionsRDD[VD3](
+          partitionsRDD.zipPartitions(
+            other.partitionBy(this.partitioner.get), preservesPartitioning = true) {
+            (partIter, msgs) => partIter.map(_.leftJoin(msgs)(f))
+          }
+        )
+    }
+  }
+
+  override def innerZipJoin[U: ClassTag, VD2: ClassTag](other: VertexRDD[U])
+      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.innerJoin(otherPart)(f))
+    }
+    this.withPartitionsRDD(newPartitionsRDD)
+  }
+
+  override def innerJoin[U: ClassTag, VD2: ClassTag](other: RDD[(VertexId, U)])
+      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
+    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
+    // If the other set is a VertexRDD then we use the much more efficient innerZipJoin
+    other match {
+      case other: VertexRDD[_] =>
+        innerZipJoin(other)(f)
+      case _ =>
+        this.withPartitionsRDD(
+          partitionsRDD.zipPartitions(
+            other.partitionBy(this.partitioner.get), preservesPartitioning = true) {
+            (partIter, msgs) => partIter.map(_.innerJoin(msgs)(f))
+          }
+        )
+    }
+  }
+
+  override def aggregateUsingIndex[VD2: ClassTag](
+      messages: RDD[(VertexId, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2] = {
+    val shuffled = messages.partitionBy(this.partitioner.get)
+    val parts = partitionsRDD.zipPartitions(shuffled, true) { (thisIter, msgIter) =>
+      thisIter.map(_.aggregateUsingIndex(msgIter, reduceFunc))
+    }
+    this.withPartitionsRDD[VD2](parts)
+  }
+
+  override def reverseRoutingTables(): VertexRDD[VD] =
+    this.mapVertexPartitions(vPart => vPart.withRoutingTable(vPart.routingTable.reverse))
+
+  override def withEdges(edges: EdgeRDD[_, _]): VertexRDD[VD] = {
+    val routingTables = VertexRDD.createRoutingTables(edges, this.partitioner.get)
+    val vertexPartitions = partitionsRDD.zipPartitions(routingTables, true) {
+      (partIter, routingTableIter) =>
+        val routingTable =
+          if (routingTableIter.hasNext) routingTableIter.next() else RoutingTablePartition.empty
+        partIter.map(_.withRoutingTable(routingTable))
+    }
+    this.withPartitionsRDD(vertexPartitions)
+  }
+
+  override private[graphx] def withPartitionsRDD[VD2: ClassTag](
+      partitionsRDD: RDD[ShippableVertexPartition[VD2]]): VertexRDD[VD2] = {
+    new VertexRDDImpl(partitionsRDD, this.targetStorageLevel)
+  }
+
+  override private[graphx] def withTargetStorageLevel(
+      targetStorageLevel: StorageLevel): VertexRDD[VD] = {
+    new VertexRDDImpl(this.partitionsRDD, targetStorageLevel)
+  }
+
+  override private[graphx] def shipVertexAttributes(
+      shipSrc: Boolean, shipDst: Boolean): RDD[(PartitionID, VertexAttributeBlock[VD])] = {
+    partitionsRDD.mapPartitions(_.flatMap(_.shipVertexAttributes(shipSrc, shipDst)))
+  }
+
+  override private[graphx] def shipVertexIds(): RDD[(PartitionID, Array[VertexId])] = {
+    partitionsRDD.mapPartitions(_.flatMap(_.shipVertexIds()))
+  }
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index 614555a054dfb..e40ae0d615466 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -79,30 +79,43 @@ object PageRank extends Logging {
   def run[VD: ClassTag, ED: ClassTag](
       graph: Graph[VD, ED], numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] =
   {
-    // Initialize the pagerankGraph with each edge attribute having
+    // Initialize the PageRank graph with each edge attribute having
     // weight 1/outDegree and each vertex with attribute 1.0.
-    val pagerankGraph: Graph[Double, Double] = graph
+    var rankGraph: Graph[Double, Double] = graph
       // Associate the degree with each vertex
       .outerJoinVertices(graph.outDegrees) { (vid, vdata, deg) => deg.getOrElse(0) }
       // Set the weight on the edges based on the degree
-      .mapTriplets( e => 1.0 / e.srcAttr )
+      .mapTriplets( e => 1.0 / e.srcAttr, TripletFields.SrcOnly )
       // Set the vertex attributes to the initial pagerank values
-      .mapVertices( (id, attr) => 1.0 )
-      .cache()
+      .mapVertices( (id, attr) => resetProb )
 
-    // Define the three functions needed to implement PageRank in the GraphX
-    // version of Pregel
-    def vertexProgram(id: VertexId, attr: Double, msgSum: Double): Double =
-      resetProb + (1.0 - resetProb) * msgSum
-    def sendMessage(edge: EdgeTriplet[Double, Double]) =
-      Iterator((edge.dstId, edge.srcAttr * edge.attr))
-    def messageCombiner(a: Double, b: Double): Double = a + b
-    // The initial message received by all vertices in PageRank
-    val initialMessage = 0.0
+    var iteration = 0
+    var prevRankGraph: Graph[Double, Double] = null
+    while (iteration < numIter) {
+      rankGraph.cache()
 
-    // Execute pregel for a fixed number of iterations.
-    Pregel(pagerankGraph, initialMessage, numIter, activeDirection = EdgeDirection.Out)(
-      vertexProgram, sendMessage, messageCombiner)
+      // Compute the outgoing rank contributions of each vertex, perform local preaggregation, and
+      // do the final aggregation at the receiving vertices. Requires a shuffle for aggregation.
+      val rankUpdates = rankGraph.aggregateMessages[Double](
+        ctx => ctx.sendToDst(ctx.srcAttr * ctx.attr), _ + _, TripletFields.SrcAndEdge)
+
+      // Apply the final rank updates to get the new ranks, using join to preserve ranks of vertices
+      // that didn't receive a message. Requires a shuffle for broadcasting updated ranks to the
+      // edge partitions.
+      prevRankGraph = rankGraph
+      rankGraph = rankGraph.joinVertices(rankUpdates) {
+        (id, oldRank, msgSum) => resetProb + (1.0 - resetProb) * msgSum
+      }.cache()
+
+      rankGraph.edges.foreachPartition(x => {}) // also materializes rankGraph.vertices
+      logInfo(s"PageRank finished iteration $iteration.")
+      prevRankGraph.vertices.unpersist(false)
+      prevRankGraph.edges.unpersist(false)
+
+      iteration += 1
+    }
+
+    rankGraph
   }
 
   /**
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index ccd7de537b6e3..f58587e10a820 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -74,9 +74,9 @@ object SVDPlusPlus {
     var g = Graph.fromEdges(edges, defaultF(conf.rank)).cache()
 
     // Calculate initial bias and norm
-    val t0 = g.mapReduceTriplets(
-      et => Iterator((et.srcId, (1L, et.attr)), (et.dstId, (1L, et.attr))),
-        (g1: (Long, Double), g2: (Long, Double)) => (g1._1 + g2._1, g1._2 + g2._2))
+    val t0 = g.aggregateMessages[(Long, Double)](
+      ctx => { ctx.sendToSrc((1L, ctx.attr)); ctx.sendToDst((1L, ctx.attr)) },
+      (g1, g2) => (g1._1 + g2._1, g1._2 + g2._2))
 
     g = g.outerJoinVertices(t0) {
       (vid: VertexId, vd: (DoubleMatrix, DoubleMatrix, Double, Double),
@@ -84,15 +84,17 @@ object SVDPlusPlus {
         (vd._1, vd._2, msg.get._2 / msg.get._1, 1.0 / scala.math.sqrt(msg.get._1))
     }
 
-    def mapTrainF(conf: Conf, u: Double)
-        (et: EdgeTriplet[(DoubleMatrix, DoubleMatrix, Double, Double), Double])
-      : Iterator[(VertexId, (DoubleMatrix, DoubleMatrix, Double))] = {
-      val (usr, itm) = (et.srcAttr, et.dstAttr)
+    def sendMsgTrainF(conf: Conf, u: Double)
+        (ctx: EdgeContext[
+          (DoubleMatrix, DoubleMatrix, Double, Double),
+          Double,
+          (DoubleMatrix, DoubleMatrix, Double)]) {
+      val (usr, itm) = (ctx.srcAttr, ctx.dstAttr)
       val (p, q) = (usr._1, itm._1)
       var pred = u + usr._3 + itm._3 + q.dot(usr._2)
       pred = math.max(pred, conf.minVal)
       pred = math.min(pred, conf.maxVal)
-      val err = et.attr - pred
+      val err = ctx.attr - pred
       val updateP = q.mul(err)
         .subColumnVector(p.mul(conf.gamma7))
         .mul(conf.gamma2)
@@ -102,16 +104,16 @@ object SVDPlusPlus {
       val updateY = q.mul(err * usr._4)
         .subColumnVector(itm._2.mul(conf.gamma7))
         .mul(conf.gamma2)
-      Iterator((et.srcId, (updateP, updateY, (err - conf.gamma6 * usr._3) * conf.gamma1)),
-        (et.dstId, (updateQ, updateY, (err - conf.gamma6 * itm._3) * conf.gamma1)))
+      ctx.sendToSrc((updateP, updateY, (err - conf.gamma6 * usr._3) * conf.gamma1))
+      ctx.sendToDst((updateQ, updateY, (err - conf.gamma6 * itm._3) * conf.gamma1))
     }
 
     for (i <- 0 until conf.maxIters) {
       // Phase 1, calculate pu + |N(u)|^(-0.5)*sum(y) for user nodes
       g.cache()
-      val t1 = g.mapReduceTriplets(
-        et => Iterator((et.srcId, et.dstAttr._2)),
-        (g1: DoubleMatrix, g2: DoubleMatrix) => g1.addColumnVector(g2))
+      val t1 = g.aggregateMessages[DoubleMatrix](
+        ctx => ctx.sendToSrc(ctx.dstAttr._2),
+        (g1, g2) => g1.addColumnVector(g2))
       g = g.outerJoinVertices(t1) {
         (vid: VertexId, vd: (DoubleMatrix, DoubleMatrix, Double, Double),
          msg: Option[DoubleMatrix]) =>
@@ -121,8 +123,8 @@ object SVDPlusPlus {
 
       // Phase 2, update p for user nodes and q, y for item nodes
       g.cache()
-      val t2 = g.mapReduceTriplets(
-        mapTrainF(conf, u),
+      val t2 = g.aggregateMessages(
+        sendMsgTrainF(conf, u),
         (g1: (DoubleMatrix, DoubleMatrix, Double), g2: (DoubleMatrix, DoubleMatrix, Double)) =>
           (g1._1.addColumnVector(g2._1), g1._2.addColumnVector(g2._2), g1._3 + g2._3))
       g = g.outerJoinVertices(t2) {
@@ -135,20 +137,18 @@ object SVDPlusPlus {
     }
 
     // calculate error on training set
-    def mapTestF(conf: Conf, u: Double)
-        (et: EdgeTriplet[(DoubleMatrix, DoubleMatrix, Double, Double), Double])
-      : Iterator[(VertexId, Double)] =
-    {
-      val (usr, itm) = (et.srcAttr, et.dstAttr)
+    def sendMsgTestF(conf: Conf, u: Double)
+        (ctx: EdgeContext[(DoubleMatrix, DoubleMatrix, Double, Double), Double, Double]) {
+      val (usr, itm) = (ctx.srcAttr, ctx.dstAttr)
       val (p, q) = (usr._1, itm._1)
       var pred = u + usr._3 + itm._3 + q.dot(usr._2)
       pred = math.max(pred, conf.minVal)
       pred = math.min(pred, conf.maxVal)
-      val err = (et.attr - pred) * (et.attr - pred)
-      Iterator((et.dstId, err))
+      val err = (ctx.attr - pred) * (ctx.attr - pred)
+      ctx.sendToDst(err)
     }
     g.cache()
-    val t3 = g.mapReduceTriplets(mapTestF(conf, u), (g1: Double, g2: Double) => g1 + g2)
+    val t3 = g.aggregateMessages[Double](sendMsgTestF(conf, u), _ + _)
     g = g.outerJoinVertices(t3) {
       (vid: VertexId, vd: (DoubleMatrix, DoubleMatrix, Double, Double), msg: Option[Double]) =>
         if (msg.isDefined) (vd._1, vd._2, vd._3, msg.get) else vd
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
index 46da38eeb725a..8dd958033b338 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
@@ -75,7 +75,7 @@ object StronglyConnectedComponents {
         sccWorkGraph, Long.MaxValue, activeDirection = EdgeDirection.Out)(
         (vid, myScc, neighborScc) => (math.min(myScc._1, neighborScc), myScc._2),
         e => {
-          if (e.srcId < e.dstId) {
+          if (e.srcAttr._1 < e.dstAttr._1) {
             Iterator((e.dstId, e.srcAttr._1))
           } else {
             Iterator()
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
index 7c396e6e66a28..daf162085e3e4 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -61,26 +61,27 @@ object TriangleCount {
       (vid, _, optSet) => optSet.getOrElse(null)
     }
     // Edge function computes intersection of smaller vertex with larger vertex
-    def edgeFunc(et: EdgeTriplet[VertexSet, ED]): Iterator[(VertexId, Int)] = {
-      assert(et.srcAttr != null)
-      assert(et.dstAttr != null)
-      val (smallSet, largeSet) = if (et.srcAttr.size < et.dstAttr.size) {
-        (et.srcAttr, et.dstAttr)
+    def edgeFunc(ctx: EdgeContext[VertexSet, ED, Int]) {
+      assert(ctx.srcAttr != null)
+      assert(ctx.dstAttr != null)
+      val (smallSet, largeSet) = if (ctx.srcAttr.size < ctx.dstAttr.size) {
+        (ctx.srcAttr, ctx.dstAttr)
       } else {
-        (et.dstAttr, et.srcAttr)
+        (ctx.dstAttr, ctx.srcAttr)
       }
       val iter = smallSet.iterator
       var counter: Int = 0
       while (iter.hasNext) {
         val vid = iter.next()
-        if (vid != et.srcId && vid != et.dstId && largeSet.contains(vid)) {
+        if (vid != ctx.srcId && vid != ctx.dstId && largeSet.contains(vid)) {
           counter += 1
         }
       }
-      Iterator((et.srcId, counter), (et.dstId, counter))
+      ctx.sendToSrc(counter)
+      ctx.sendToDst(counter)
     }
     // compute the intersection along edges
-    val counters: VertexRDD[Int] = setGraph.mapReduceTriplets(edgeFunc, _ + _)
+    val counters: VertexRDD[Int] = setGraph.aggregateMessages(edgeFunc, _ + _)
     // Merge counters with the graph and divide by two since each triangle is counted twice
     g.outerJoinVertices(counters) {
       (vid, _, optCounter: Option[Int]) =>
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
index ff17edeaf8f16..6aab28ff05355 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/package.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
@@ -30,7 +30,7 @@ package object graphx {
    */
   type VertexId = Long
 
-  /** Integer identifer of a graph partition. */
+  /** Integer identifer of a graph partition. Must be less than 2^30. */
   // TODO: Consider using Char.
   type PartitionID = Int
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
index 635514f09ece0..8a13c74221546 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
@@ -40,7 +40,7 @@ object GraphGenerators {
   val RMATd = 0.25
 
   /**
-   * Generate a graph whose vertex out degree is log normal.
+   * Generate a graph whose vertex out degree distribution is log normal.
    *
    * The default values for mu and sigma are taken from the Pregel paper:
    *
@@ -48,33 +48,36 @@ object GraphGenerators {
    * Ilan Horn, Naty Leiser, and Grzegorz Czajkowski. 2010.
    * Pregel: a system for large-scale graph processing. SIGMOD '10.
    *
-   * @param sc
-   * @param numVertices
-   * @param mu
-   * @param sigma
-   * @return
+   * If the seed is -1 (default), a random seed is chosen. Otherwise, use
+   * the user-specified seed.
+   *
+   * @param sc Spark Context
+   * @param numVertices number of vertices in generated graph
+   * @param numEParts (optional) number of partitions
+   * @param mu (optional, default: 4.0) mean of out-degree distribution
+   * @param sigma (optional, default: 1.3) standard deviation of out-degree distribution
+   * @param seed (optional, default: -1) seed for RNGs, -1 causes a random seed to be chosen
+   * @return Graph object
    */
-  def logNormalGraph(sc: SparkContext, numVertices: Int, numEParts: Int,
-                     mu: Double = 4.0, sigma: Double = 1.3): Graph[Long, Int] = {
-    val vertices = sc.parallelize(0 until numVertices, numEParts).map { src =>
-      // Initialize the random number generator with the source vertex id
-      val rand = new Random(src)
-      val degree = math.min(numVertices.toLong, math.exp(rand.nextGaussian() * sigma + mu).toLong)
-      (src.toLong, degree)
+  def logNormalGraph(
+      sc: SparkContext, numVertices: Int, numEParts: Int = 0, mu: Double = 4.0,
+      sigma: Double = 1.3, seed: Long = -1): Graph[Long, Int] = {
+
+    val evalNumEParts = if (numEParts == 0) sc.defaultParallelism else numEParts
+
+    // Enable deterministic seeding
+    val seedRand = if (seed == -1) new Random() else new Random(seed)
+    val seed1 = seedRand.nextInt()
+    val seed2 = seedRand.nextInt()
+
+    val vertices: RDD[(VertexId, Long)] = sc.parallelize(0 until numVertices, evalNumEParts).map {
+      src => (src, sampleLogNormal(mu, sigma, numVertices, seed = (seed1 ^ src)))
     }
+
     val edges = vertices.flatMap { case (src, degree) =>
-      new Iterator[Edge[Int]] {
-        // Initialize the random number generator with the source vertex id
-        val rand = new Random(src)
-        var i = 0
-        override def hasNext(): Boolean = { i < degree }
-        override def next(): Edge[Int] = {
-          val nextEdge = Edge[Int](src, rand.nextInt(numVertices), i)
-          i += 1
-          nextEdge
-        }
-      }
+      generateRandomEdges(src.toInt, degree.toInt, numVertices, seed = (seed2 ^ src))
     }
+
     Graph(vertices, edges, 0)
   }
 
@@ -82,9 +85,10 @@ object GraphGenerators {
   // the edge data is the weight (default 1)
   val RMATc = 0.15
 
-  def generateRandomEdges(src: Int, numEdges: Int, maxVertexId: Int): Array[Edge[Int]] = {
-    val rand = new Random()
-    Array.fill(maxVertexId) { Edge[Int](src, rand.nextInt(maxVertexId), 1) }
+  def generateRandomEdges(
+      src: Int, numEdges: Int, maxVertexId: Int, seed: Long = -1): Array[Edge[Int]] = {
+    val rand = if (seed == -1) new Random() else new Random(seed)
+    Array.fill(numEdges) { Edge[Int](src, rand.nextInt(maxVertexId), 1) }
   }
 
   /**
@@ -97,11 +101,16 @@ object GraphGenerators {
    * @param mu the mean of the normal distribution
    * @param sigma the standard deviation of the normal distribution
    * @param maxVal exclusive upper bound on the value of the sample
+   * @param seed optional seed
    */
-  private def sampleLogNormal(mu: Double, sigma: Double, maxVal: Int): Int = {
-    val rand = new Random()
-    val m = math.exp(mu + (sigma * sigma) / 2.0)
-    val s = math.sqrt((math.exp(sigma*sigma) - 1) * math.exp(2*mu + sigma*sigma))
+  private[spark] def sampleLogNormal(
+      mu: Double, sigma: Double, maxVal: Int, seed: Long = -1): Int = {
+    val rand = if (seed == -1) new Random() else new Random(seed)
+
+    val sigmaSq = sigma * sigma
+    val m = math.exp(mu + sigmaSq / 2.0)
+    // expm1 is exp(m)-1 with better accuracy for tiny m
+    val s = math.sqrt(math.expm1(sigmaSq) * math.exp(2*mu + sigmaSq))
     // Z ~ N(0, 1)
     var X: Double = maxVal
 
@@ -109,7 +118,7 @@ object GraphGenerators {
       val Z = rand.nextGaussian()
       X = math.exp(mu + sigma*Z)
     }
-    math.round(X.toFloat)
+    math.floor(X).toInt
   }
 
   /**
diff --git a/graphx/src/test/resources/log4j.properties b/graphx/src/test/resources/log4j.properties
index 26b73a1b39744..9dd05f17f012b 100644
--- a/graphx/src/test/resources/log4j.properties
+++ b/graphx/src/test/resources/log4j.properties
@@ -21,7 +21,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
index 6506bac73d71c..df773db6e4326 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -118,7 +118,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
       // Each vertex should be replicated to at most 2 * sqrt(p) partitions
       val partitionSets = partitionedGraph.edges.partitionsRDD.mapPartitions { iter =>
         val part = iter.next()._2
-        Iterator((part.srcIds ++ part.dstIds).toSet)
+        Iterator((part.iterator.flatMap(e => Iterator(e.srcId, e.dstId))).toSet)
       }.collect
       if (!verts.forall(id => partitionSets.count(_.contains(id)) <= bound)) {
         val numFailures = verts.count(id => partitionSets.count(_.contains(id)) > bound)
@@ -130,7 +130,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
       // This should not be true for the default hash partitioning
       val partitionSetsUnpartitioned = graph.edges.partitionsRDD.mapPartitions { iter =>
         val part = iter.next()._2
-        Iterator((part.srcIds ++ part.dstIds).toSet)
+        Iterator((part.iterator.flatMap(e => Iterator(e.srcId, e.dstId))).toSet)
       }.collect
       assert(verts.exists(id => partitionSetsUnpartitioned.count(_.contains(id)) > bound))
 
@@ -318,6 +318,21 @@ class GraphSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test("aggregateMessages") {
+    withSpark { sc =>
+      val n = 5
+      val agg = starGraph(sc, n).aggregateMessages[String](
+        ctx => {
+          if (ctx.dstAttr != null) {
+            throw new Exception(
+              "expected ctx.dstAttr to be null due to TripletFields, but it was " + ctx.dstAttr)
+          }
+          ctx.sendToDst(ctx.srcAttr)
+        }, _ + _, TripletFields.SrcOnly)
+      assert(agg.collect().toSet === (1 to n).map(x => (x: VertexId, "v")).toSet)
+    }
+  }
+
   test("outerJoinVertices") {
     withSpark { sc =>
       val n = 5
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala b/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala
index 47594a800a3b1..a3e28efc75a98 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala
@@ -17,9 +17,6 @@
 
 package org.apache.spark.graphx
 
-import org.scalatest.Suite
-import org.scalatest.BeforeAndAfterEach
-
 import org.apache.spark.SparkConf
 import org.apache.spark.SparkContext
 
@@ -31,8 +28,7 @@ trait LocalSparkContext {
   /** Runs `f` on a new SparkContext and ensures that it is stopped afterwards. */
   def withSpark[T](f: SparkContext => T) = {
     val conf = new SparkConf()
-      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-      .set("spark.kryo.registrator", "org.apache.spark.graphx.GraphKryoRegistrator")
+    GraphXUtils.registerKryoClasses(conf)
     val sc = new SparkContext("local", "test", conf)
     try {
       f(sc)
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
deleted file mode 100644
index 91caa6b605a1e..0000000000000
--- a/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graphx
-
-import java.io.{EOFException, ByteArrayInputStream, ByteArrayOutputStream}
-
-import scala.util.Random
-import scala.reflect.ClassTag
-
-import org.scalatest.FunSuite
-
-import org.apache.spark._
-import org.apache.spark.graphx.impl._
-import org.apache.spark.graphx.impl.MsgRDDFunctions._
-import org.apache.spark.serializer.SerializationStream
-
-
-class SerializerSuite extends FunSuite with LocalSparkContext {
-
-  test("IntVertexBroadcastMsgSerializer") {
-    val outMsg = new VertexBroadcastMsg[Int](3, 4, 5)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new IntVertexBroadcastMsgSerializer().newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new IntVertexBroadcastMsgSerializer().newInstance().deserializeStream(bin)
-    val inMsg1: VertexBroadcastMsg[Int] = inStrm.readObject()
-    val inMsg2: VertexBroadcastMsg[Int] = inStrm.readObject()
-    assert(outMsg.vid === inMsg1.vid)
-    assert(outMsg.vid === inMsg2.vid)
-    assert(outMsg.data === inMsg1.data)
-    assert(outMsg.data === inMsg2.data)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("LongVertexBroadcastMsgSerializer") {
-    val outMsg = new VertexBroadcastMsg[Long](3, 4, 5)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new LongVertexBroadcastMsgSerializer().newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new LongVertexBroadcastMsgSerializer().newInstance().deserializeStream(bin)
-    val inMsg1: VertexBroadcastMsg[Long] = inStrm.readObject()
-    val inMsg2: VertexBroadcastMsg[Long] = inStrm.readObject()
-    assert(outMsg.vid === inMsg1.vid)
-    assert(outMsg.vid === inMsg2.vid)
-    assert(outMsg.data === inMsg1.data)
-    assert(outMsg.data === inMsg2.data)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("DoubleVertexBroadcastMsgSerializer") {
-    val outMsg = new VertexBroadcastMsg[Double](3, 4, 5.0)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new DoubleVertexBroadcastMsgSerializer().newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new DoubleVertexBroadcastMsgSerializer().newInstance().deserializeStream(bin)
-    val inMsg1: VertexBroadcastMsg[Double] = inStrm.readObject()
-    val inMsg2: VertexBroadcastMsg[Double] = inStrm.readObject()
-    assert(outMsg.vid === inMsg1.vid)
-    assert(outMsg.vid === inMsg2.vid)
-    assert(outMsg.data === inMsg1.data)
-    assert(outMsg.data === inMsg2.data)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("IntAggMsgSerializer") {
-    val outMsg = (4: VertexId, 5)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new IntAggMsgSerializer().newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new IntAggMsgSerializer().newInstance().deserializeStream(bin)
-    val inMsg1: (VertexId, Int) = inStrm.readObject()
-    val inMsg2: (VertexId, Int) = inStrm.readObject()
-    assert(outMsg === inMsg1)
-    assert(outMsg === inMsg2)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("LongAggMsgSerializer") {
-    val outMsg = (4: VertexId, 1L << 32)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new LongAggMsgSerializer().newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new LongAggMsgSerializer().newInstance().deserializeStream(bin)
-    val inMsg1: (VertexId, Long) = inStrm.readObject()
-    val inMsg2: (VertexId, Long) = inStrm.readObject()
-    assert(outMsg === inMsg1)
-    assert(outMsg === inMsg2)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("DoubleAggMsgSerializer") {
-    val outMsg = (4: VertexId, 5.0)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new DoubleAggMsgSerializer().newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new DoubleAggMsgSerializer().newInstance().deserializeStream(bin)
-    val inMsg1: (VertexId, Double) = inStrm.readObject()
-    val inMsg2: (VertexId, Double) = inStrm.readObject()
-    assert(outMsg === inMsg1)
-    assert(outMsg === inMsg2)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("TestShuffleVertexBroadcastMsg") {
-    withSpark { sc =>
-      val bmsgs = sc.parallelize(0 until 100, 10).map { pid =>
-        new VertexBroadcastMsg[Int](pid, pid, pid)
-      }
-      bmsgs.partitionBy(new HashPartitioner(3)).collect()
-    }
-  }
-
-  test("variable long encoding") {
-    def testVarLongEncoding(v: Long, optimizePositive: Boolean) {
-      val bout = new ByteArrayOutputStream
-      val stream = new ShuffleSerializationStream(bout) {
-        def writeObject[T: ClassTag](t: T): SerializationStream = {
-          writeVarLong(t.asInstanceOf[Long], optimizePositive = optimizePositive)
-          this
-        }
-      }
-      stream.writeObject(v)
-
-      val bin = new ByteArrayInputStream(bout.toByteArray)
-      val dstream = new ShuffleDeserializationStream(bin) {
-        def readObject[T: ClassTag](): T = {
-          readVarLong(optimizePositive).asInstanceOf[T]
-        }
-      }
-      val read = dstream.readObject[Long]()
-      assert(read === v)
-    }
-
-    // Test all variable encoding code path (each branch uses 7 bits, i.e. 1L << 7 difference)
-    val d = Random.nextLong() % 128
-    Seq[Long](0, 1L << 0 + d, 1L << 7 + d, 1L << 14 + d, 1L << 21 + d, 1L << 28 + d, 1L << 35 + d,
-      1L << 42 + d, 1L << 49 + d, 1L << 56 + d, 1L << 63 + d).foreach { number =>
-      testVarLongEncoding(number, optimizePositive = false)
-      testVarLongEncoding(number, optimizePositive = true)
-      testVarLongEncoding(-number, optimizePositive = false)
-      testVarLongEncoding(-number, optimizePositive = true)
-    }
-  }
-}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
index cc86bafd2d644..42d3f21dbae98 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
@@ -99,4 +99,15 @@ class VertexRDDSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test("mergeFunc") {
+    // test to see if the mergeFunc is working correctly
+    withSpark { sc =>
+      val verts = sc.parallelize(List((0L, 0), (1L, 1), (1L, 2), (2L, 3), (2L, 3), (2L, 3)))
+      val edges = EdgeRDD.fromEdges(sc.parallelize(List.empty[Edge[Int]]))
+      val rdd = VertexRDD(verts, edges, 0, (a: Int, b: Int) => a + b)
+      // test merge function
+      assert(rdd.collect.toSet == Set((0L, 0), (1L, 3), (2L, 9)))
+    }
+  }
+
 }
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
index 28fd112f2b124..515f3a9cd02eb 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
@@ -23,6 +23,7 @@ import scala.util.Random
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkConf
+import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.serializer.KryoSerializer
 
 import org.apache.spark.graphx._
@@ -81,29 +82,6 @@ class EdgePartitionSuite extends FunSuite {
     assert(edgePartition.groupEdges(_ + _).iterator.map(_.copy()).toList === groupedEdges)
   }
 
-  test("upgradeIterator") {
-    val edges = List((0, 1, 0), (1, 0, 0))
-    val verts = List((0L, 1), (1L, 2))
-    val part = makeEdgePartition(edges).updateVertices(verts.iterator)
-    assert(part.upgradeIterator(part.iterator).map(_.toTuple).toList ===
-      part.tripletIterator().toList.map(_.toTuple))
-  }
-
-  test("indexIterator") {
-    val edgesFrom0 = List(Edge(0, 1, 0))
-    val edgesFrom1 = List(Edge(1, 0, 0), Edge(1, 2, 0))
-    val sortedEdges = edgesFrom0 ++ edgesFrom1
-    val builder = new EdgePartitionBuilder[Int, Nothing]
-    for (e <- Random.shuffle(sortedEdges)) {
-      builder.add(e.srcId, e.dstId, e.attr)
-    }
-
-    val edgePartition = builder.toEdgePartition
-    assert(edgePartition.iterator.map(_.copy()).toList === sortedEdges)
-    assert(edgePartition.indexIterator(_ == 0).map(_.copy()).toList === edgesFrom0)
-    assert(edgePartition.indexIterator(_ == 1).map(_.copy()).toList === edgesFrom1)
-  }
-
   test("innerJoin") {
     val aList = List((0, 1, 0), (1, 0, 0), (1, 2, 0), (5, 4, 0), (5, 5, 0))
     val bList = List((0, 1, 0), (1, 0, 0), (1, 1, 0), (3, 4, 0), (5, 5, 0))
@@ -124,18 +102,27 @@ class EdgePartitionSuite extends FunSuite {
     assert(ep.numActives == Some(2))
   }
 
-  test("Kryo serialization") {
-    val aList = List((0, 1, 0), (1, 0, 0), (1, 2, 0), (5, 4, 0), (5, 5, 0))
+  test("tripletIterator") {
+    val builder = new EdgePartitionBuilder[Int, Int]
+    builder.add(1, 2, 0)
+    builder.add(1, 3, 0)
+    builder.add(1, 4, 0)
+    val ep = builder.toEdgePartition
+    val result = ep.tripletIterator().toList.map(et => (et.srcId, et.dstId))
+    assert(result === Seq((1, 2), (1, 3), (1, 4)))
+  }
+
+  test("serialization") {
+    val aList = List((0, 1, 1), (1, 0, 2), (1, 2, 3), (5, 4, 4), (5, 5, 5))
     val a: EdgePartition[Int, Int] = makeEdgePartition(aList)
+    val javaSer = new JavaSerializer(new SparkConf())
     val conf = new SparkConf()
-      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-      .set("spark.kryo.registrator", "org.apache.spark.graphx.GraphKryoRegistrator")
-    val s = new KryoSerializer(conf).newInstance()
-    val aSer: EdgePartition[Int, Int] = s.deserialize(s.serialize(a))
-    assert(aSer.srcIds.toList === a.srcIds.toList)
-    assert(aSer.dstIds.toList === a.dstIds.toList)
-    assert(aSer.data.toList === a.data.toList)
-    assert(aSer.index != null)
-    assert(aSer.vertices.iterator.toSet === a.vertices.iterator.toSet)
+    GraphXUtils.registerKryoClasses(conf)
+    val kryoSer = new KryoSerializer(conf)
+
+    for (ser <- List(javaSer, kryoSer); s = ser.newInstance()) {
+      val aSer: EdgePartition[Int, Int] = s.deserialize(s.serialize(a))
+      assert(aSer.tripletIterator().toList === a.tripletIterator().toList)
+    }
   }
 }
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgeTripletIteratorSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgeTripletIteratorSuite.scala
deleted file mode 100644
index 49b2704390fea..0000000000000
--- a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgeTripletIteratorSuite.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graphx.impl
-
-import scala.reflect.ClassTag
-import scala.util.Random
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.graphx._
-
-class EdgeTripletIteratorSuite extends FunSuite {
-  test("iterator.toList") {
-    val builder = new EdgePartitionBuilder[Int, Int]
-    builder.add(1, 2, 0)
-    builder.add(1, 3, 0)
-    builder.add(1, 4, 0)
-    val iter = new EdgeTripletIterator[Int, Int](builder.toEdgePartition, true, true)
-    val result = iter.toList.map(et => (et.srcId, et.dstId))
-    assert(result === Seq((1, 2), (1, 3), (1, 4)))
-  }
-}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala
index 8bf1384d514c1..fe8304c1cdc32 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala
@@ -17,9 +17,14 @@
 
 package org.apache.spark.graphx.impl
 
-import org.apache.spark.graphx._
 import org.scalatest.FunSuite
 
+import org.apache.spark.SparkConf
+import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.serializer.KryoSerializer
+
+import org.apache.spark.graphx._
+
 class VertexPartitionSuite extends FunSuite {
 
   test("isDefined, filter") {
@@ -116,4 +121,17 @@ class VertexPartitionSuite extends FunSuite {
     assert(vp3.index.getPos(2) === -1)
   }
 
+  test("serialization") {
+    val verts = Set((0L, 1), (1L, 1), (2L, 1))
+    val vp = VertexPartition(verts.iterator)
+    val javaSer = new JavaSerializer(new SparkConf())
+    val conf = new SparkConf()
+    GraphXUtils.registerKryoClasses(conf)
+    val kryoSer = new KryoSerializer(conf)
+
+    for (ser <- List(javaSer, kryoSer); s = ser.newInstance()) {
+      val vpSer: VertexPartition[Int] = s.deserialize(s.serialize(vp))
+      assert(vpSer.iterator.toSet === verts)
+    }
+  }
 }
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala
new file mode 100644
index 0000000000000..3abefbe52fa8a
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.util
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.graphx.LocalSparkContext
+
+class GraphGeneratorsSuite extends FunSuite with LocalSparkContext {
+
+  test("GraphGenerators.generateRandomEdges") {
+    val src = 5
+    val numEdges10 = 10
+    val numEdges20 = 20
+    val maxVertexId = 100
+
+    val edges10 = GraphGenerators.generateRandomEdges(src, numEdges10, maxVertexId)
+    assert(edges10.length == numEdges10)
+
+    val correctSrc = edges10.forall(e => e.srcId == src)
+    assert(correctSrc)
+
+    val correctWeight = edges10.forall(e => e.attr == 1)
+    assert(correctWeight)
+
+    val correctRange = edges10.forall(e => e.dstId >= 0 && e.dstId <= maxVertexId)
+    assert(correctRange)
+
+    val edges20 = GraphGenerators.generateRandomEdges(src, numEdges20, maxVertexId)
+    assert(edges20.length == numEdges20)
+
+    val edges10_round1 =
+      GraphGenerators.generateRandomEdges(src, numEdges10, maxVertexId, seed = 12345)
+    val edges10_round2 =
+      GraphGenerators.generateRandomEdges(src, numEdges10, maxVertexId, seed = 12345)
+    assert(edges10_round1.zip(edges10_round2).forall { case (e1, e2) =>
+      e1.srcId == e2.srcId && e1.dstId == e2.dstId && e1.attr == e2.attr
+    })
+
+    val edges10_round3 =
+      GraphGenerators.generateRandomEdges(src, numEdges10, maxVertexId, seed = 3467)
+    assert(!edges10_round1.zip(edges10_round3).forall { case (e1, e2) =>
+      e1.srcId == e2.srcId && e1.dstId == e2.dstId && e1.attr == e2.attr
+    })
+  }
+
+  test("GraphGenerators.sampleLogNormal") {
+    val mu = 4.0
+    val sigma = 1.3
+    val maxVal = 100
+
+    val trials = 1000
+    for (i <- 1 to trials) {
+      val dstId = GraphGenerators.sampleLogNormal(mu, sigma, maxVal)
+      assert(dstId < maxVal)
+    }
+
+    val dstId_round1 = GraphGenerators.sampleLogNormal(mu, sigma, maxVal, 12345)
+    val dstId_round2 = GraphGenerators.sampleLogNormal(mu, sigma, maxVal, 12345)
+    assert(dstId_round1 == dstId_round2)
+
+    val dstId_round3 = GraphGenerators.sampleLogNormal(mu, sigma, maxVal, 789)
+    assert(dstId_round1 != dstId_round3)
+  }
+
+  test("GraphGenerators.logNormalGraph") {
+    withSpark { sc =>
+      val mu = 4.0
+      val sigma = 1.3
+      val numVertices100 = 100
+
+      val graph = GraphGenerators.logNormalGraph(sc, numVertices100, mu = mu, sigma = sigma)
+      assert(graph.vertices.count() == numVertices100)
+
+      val graph_round1 =
+        GraphGenerators.logNormalGraph(sc, numVertices100, mu = mu, sigma = sigma, seed = 12345)
+      val graph_round2 =
+        GraphGenerators.logNormalGraph(sc, numVertices100, mu = mu, sigma = sigma, seed = 12345)
+
+      val graph_round1_edges = graph_round1.edges.collect()
+      val graph_round2_edges = graph_round2.edges.collect()
+
+      assert(graph_round1_edges.zip(graph_round2_edges).forall { case (e1, e2) =>
+        e1.srcId == e2.srcId && e1.dstId == e2.dstId && e1.attr == e2.attr
+      })
+
+      val graph_round3 =
+        GraphGenerators.logNormalGraph(sc, numVertices100, mu = mu, sigma = sigma, seed = 567)
+
+      val graph_round3_edges = graph_round3.edges.collect()
+
+      assert(!graph_round1_edges.zip(graph_round3_edges).forall { case (e1, e2) =>
+        e1.srcId == e2.srcId && e1.dstId == e2.dstId && e1.attr == e2.attr
+      })
+    }
+  }
+
+}
diff --git a/make-distribution.sh b/make-distribution.sh
index 94b473bf91cd3..2267b1aa08a6c 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -23,49 +23,44 @@
 # The distribution contains fat (assembly) jars that include the Scala library,
 # so it is completely self contained.
 # It does not contain source or *.class files.
-#
-# Optional Arguments
-#      --tgz: Additionally creates spark-$VERSION-bin.tar.gz
-#      --hadoop VERSION: Builds against specified version of Hadoop.
-#      --with-yarn: Enables support for Hadoop YARN.
-#      --with-hive: Enable support for reading Hive tables.
-#      --name: A moniker for the release target. Defaults to the Hadoop verison.
-#
-# Recommended deploy/testing procedure (standalone mode):
-# 1) Rsync / deploy the dist/ dir to one host
-# 2) cd to deploy dir; ./sbin/start-master.sh
-# 3) Verify master is up by visiting web page, ie http://master-ip:8080.  Note the spark:// URL.
-# 4) ./sbin/start-slave.sh 1 <<spark:// URL>>
-# 5) ./bin/spark-shell --master spark://my-master-ip:7077
-#
 
 set -o pipefail
 set -e
 
 # Figure out where the Spark framework is installed
-FWDIR="$(cd `dirname $0`; pwd)"
+FWDIR="$(cd "`dirname "$0"`"; pwd)"
 DISTDIR="$FWDIR/dist"
 
-# Initialize defaults
-SPARK_HADOOP_VERSION=1.0.4
-SPARK_YARN=false
-SPARK_HIVE=false
 SPARK_TACHYON=false
 MAKE_TGZ=false
 NAME=none
 
+function exit_with_usage {
+  echo "make-distribution.sh - tool for making binary distributions of Spark"
+  echo ""
+  echo "usage:"
+  echo "./make-distribution.sh [--name] [--tgz] [--with-tachyon] <maven build options>"
+  echo "See Spark's \"Building Spark\" doc for correct Maven options."
+  echo ""
+  exit 1
+}
+
 # Parse arguments
 while (( "$#" )); do
   case $1 in
     --hadoop)
-      SPARK_HADOOP_VERSION="$2"
-      shift
+      echo "Error: '--hadoop' is no longer supported:"
+      echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead."
+      echo "Error: Related profiles include hadoop-0.23, hdaoop-2.2, hadoop-2.3 and hadoop-2.4."
+      exit_with_usage
       ;;
     --with-yarn)
-      SPARK_YARN=true
+      echo "Error: '--with-yarn' is no longer supported, use Maven option -Pyarn"
+      exit_with_usage
       ;;
     --with-hive)
-      SPARK_HIVE=true
+      echo "Error: '--with-hive' is no longer supported, use Maven options -Phive and -Phive-thriftserver"
+      exit_with_usage
       ;;
     --skip-java-test)
       SKIP_JAVA_TEST=true
@@ -80,6 +75,12 @@ while (( "$#" )); do
       NAME="$2"
       shift
       ;;
+    --help)
+      exit_with_usage
+      ;;
+    *)
+      break
+      ;;
   esac
   shift
 done
@@ -113,7 +114,17 @@ if ! which mvn &>/dev/null; then
     echo -e "Download Maven from https://maven.apache.org/"
     exit -1;
 fi
+
 VERSION=$(mvn help:evaluate -Dexpression=project.version 2>/dev/null | grep -v "INFO" | tail -n 1)
+SPARK_HADOOP_VERSION=$(mvn help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\
+    | grep -v "INFO"\
+    | tail -n 1)
+SPARK_HIVE=$(mvn help:evaluate -Dexpression=project.activeProfiles $@ 2>/dev/null\
+    | grep -v "INFO"\
+    | fgrep --count "<id>hive</id>";\
+    # Reset exit status to 0, otherwise the script stops here if the last grep finds nothing\
+    # because we use "set -o pipefail"
+    echo -n)
 
 JAVA_CMD="$JAVA_HOME"/bin/java
 JAVA_VERSION=$("$JAVA_CMD" -version 2>&1)
@@ -128,7 +139,7 @@ if [[ ! "$JAVA_VERSION" =~ "1.6" && -z "$SKIP_JAVA_TEST" ]]; then
   if [[ ! $REPLY =~ ^[Yy]$ ]]; then
     echo "Okay, exiting."
     exit 1
-  fi 
+  fi
 fi
 
 if [ "$NAME" == "none" ]; then
@@ -143,14 +154,6 @@ else
   echo "Making distribution for Spark $VERSION in $DISTDIR..."
 fi
 
-echo "Hadoop version set to $SPARK_HADOOP_VERSION"
-echo "Release name set to $NAME"
-if [ "$SPARK_YARN" == "true" ]; then
-  echo "YARN enabled"
-else
-  echo "YARN disabled"
-fi
-
 if [ "$SPARK_TACHYON" == "true" ]; then
   echo "Tachyon Enabled"
 else
@@ -158,37 +161,16 @@ else
 fi
 
 # Build uber fat JAR
-cd $FWDIR
+cd "$FWDIR"
 
 export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
 
-BUILD_COMMAND="mvn clean package"
-
-# Use special profiles for hadoop versions 0.23.x, 2.2.x, 2.3.x, 2.4.x
-if [[ "$SPARK_HADOOP_VERSION" =~ ^0\.23\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-0.23"; fi
-if [[ "$SPARK_HADOOP_VERSION" =~ ^2\.2\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-2.2"; fi
-if [[ "$SPARK_HADOOP_VERSION" =~ ^2\.3\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-2.3"; fi
-if [[ "$SPARK_HADOOP_VERSION" =~ ^2\.4\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-2.4"; fi
-if [[ "$SPARK_HIVE" == "true" ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phive"; fi
-if [[ "$SPARK_YARN" == "true" ]]; then
-  # For hadoop versions 0.23.x to 2.1.x, use the yarn-alpha profile
-  if [[ "$SPARK_HADOOP_VERSION" =~ ^0\.2[3-9]\. ]] ||
-     [[ "$SPARK_HADOOP_VERSION" =~ ^0\.[3-9][0-9]\. ]] ||
-     [[ "$SPARK_HADOOP_VERSION" =~ ^1\.[0-9]\. ]] ||
-     [[ "$SPARK_HADOOP_VERSION" =~ ^2\.[0-1]\. ]]; then
-    BUILD_COMMAND="$BUILD_COMMAND -Pyarn-alpha"
-  # For hadoop versions 2.2+, use the yarn profile
-  elif [[ "$SPARK_HADOOP_VERSION" =~ ^2.[2-9]. ]]; then
-    BUILD_COMMAND="$BUILD_COMMAND -Pyarn"
-  fi
-  BUILD_COMMAND="$BUILD_COMMAND -Dyarn.version=$SPARK_HADOOP_VERSION"
-fi
-BUILD_COMMAND="$BUILD_COMMAND -Dhadoop.version=$SPARK_HADOOP_VERSION"
-BUILD_COMMAND="$BUILD_COMMAND -DskipTests"
+BUILD_COMMAND="mvn clean package -DskipTests $@"
 
 # Actually build the jar
 echo -e "\nBuilding with..."
 echo -e "\$ $BUILD_COMMAND\n"
+
 ${BUILD_COMMAND}
 
 # Make directories
@@ -197,29 +179,31 @@ mkdir -p "$DISTDIR/lib"
 echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
 
 # Copy jars
-cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
-cp $FWDIR/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
+cp "$FWDIR"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
+cp "$FWDIR"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
+# This will fail if the -Pyarn profile is not provided
+# In this case, silence the error and ignore the return code of this command
+cp "$FWDIR"/network/yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || :
 
 # Copy example sources (needed for python and SQL)
 mkdir -p "$DISTDIR/examples/src/main"
-cp -r $FWDIR/examples/src/main "$DISTDIR/examples/src/" 
+cp -r "$FWDIR"/examples/src/main "$DISTDIR/examples/src/"
 
-if [ "$SPARK_HIVE" == "true" ]; then
-  cp $FWDIR/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/"
+if [ "$SPARK_HIVE" == "1" ]; then
+  cp "$FWDIR"/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/"
 fi
 
 # Copy license and ASF files
 cp "$FWDIR/LICENSE" "$DISTDIR"
 cp "$FWDIR/NOTICE" "$DISTDIR"
 
-if [ -e $FWDIR/CHANGES.txt ]; then
+if [ -e "$FWDIR"/CHANGES.txt ]; then
   cp "$FWDIR/CHANGES.txt" "$DISTDIR"
 fi
 
 # Copy other things
 mkdir "$DISTDIR"/conf
 cp "$FWDIR"/conf/*.template "$DISTDIR"/conf
-cp "$FWDIR"/conf/slaves "$DISTDIR"/conf
 cp "$FWDIR/README.md" "$DISTDIR"
 cp -r "$FWDIR/bin" "$DISTDIR"
 cp -r "$FWDIR/python" "$DISTDIR"
@@ -228,7 +212,7 @@ cp -r "$FWDIR/ec2" "$DISTDIR"
 
 # Download and copy in tachyon, if requested
 if [ "$SPARK_TACHYON" == "true" ]; then
-  TACHYON_VERSION="0.4.1"
+  TACHYON_VERSION="0.5.0"
   TACHYON_URL="https://github.com/amplab/tachyon/releases/download/v${TACHYON_VERSION}/tachyon-${TACHYON_VERSION}-bin.tar.gz"
 
   TMPD=`mktemp -d 2>/dev/null || mktemp -d -t 'disttmp'`
@@ -238,10 +222,10 @@ if [ "$SPARK_TACHYON" == "true" ]; then
   wget "$TACHYON_URL"
 
   tar xf "tachyon-${TACHYON_VERSION}-bin.tar.gz"
-  cp "tachyon-${TACHYON_VERSION}/target/tachyon-${TACHYON_VERSION}-jar-with-dependencies.jar" "$DISTDIR/lib"
+  cp "tachyon-${TACHYON_VERSION}/core/target/tachyon-${TACHYON_VERSION}-jar-with-dependencies.jar" "$DISTDIR/lib"
   mkdir -p "$DISTDIR/tachyon/src/main/java/tachyon/web"
   cp -r "tachyon-${TACHYON_VERSION}"/{bin,conf,libexec} "$DISTDIR/tachyon"
-  cp -r "tachyon-${TACHYON_VERSION}"/src/main/java/tachyon/web/resources "$DISTDIR/tachyon/src/main/java/tachyon/web"
+  cp -r "tachyon-${TACHYON_VERSION}"/core/src/main/java/tachyon/web "$DISTDIR/tachyon/src/main/java/tachyon/web"
 
   if [[ `uname -a` == Darwin* ]]; then
     # need to run sed differently on osx
diff --git a/mllib/pom.xml b/mllib/pom.xml
index b622f96dd7901..dd68b27a78bdc 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,12 +21,15 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-mllib_2.10</artifactId>
+  <properties>
+    <sbt.project.name>mllib</sbt.project.name>
+  </properties>  
   <packaging>jar</packaging>
   <name>Spark Project ML Library</name>
   <url>http://spark.apache.org/</url>
@@ -37,6 +40,16 @@
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
@@ -49,7 +62,7 @@
     <dependency>
       <groupId>org.scalanlp</groupId>
       <artifactId>breeze_${scala.binary.version}</artifactId>
-      <version>0.7</version>
+      <version>0.10</version>
       <exclusions>
         <!-- This is included as a compile-scoped dependency by jtransforms, which is
              a dependency of breeze. -->
@@ -57,8 +70,16 @@
           <groupId>junit</groupId>
           <artifactId>junit</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.apache.commons</groupId>
+          <artifactId>commons-math3</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-math3</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>
@@ -69,12 +90,42 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
+  <profiles>
+    <profile>
+      <id>netlib-lgpl</id>
+      <dependencies>
+        <dependency>
+          <groupId>com.github.fommil.netlib</groupId>
+          <artifactId>all</artifactId>
+          <version>1.1.2</version>
+          <type>pom</type>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala b/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala
new file mode 100644
index 0000000000000..fdbee743e8177
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import scala.annotation.varargs
+import scala.collection.JavaConverters._
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param.{ParamMap, ParamPair, Params}
+import org.apache.spark.sql.SchemaRDD
+import org.apache.spark.sql.api.java.JavaSchemaRDD
+
+/**
+ * :: AlphaComponent ::
+ * Abstract class for estimators that fit models to data.
+ */
+@AlphaComponent
+abstract class Estimator[M <: Model[M]] extends PipelineStage with Params {
+
+  /**
+   * Fits a single model to the input data with optional parameters.
+   *
+   * @param dataset input dataset
+   * @param paramPairs optional list of param pairs (overwrite embedded params)
+   * @return fitted model
+   */
+  @varargs
+  def fit(dataset: SchemaRDD, paramPairs: ParamPair[_]*): M = {
+    val map = new ParamMap().put(paramPairs: _*)
+    fit(dataset, map)
+  }
+
+  /**
+   * Fits a single model to the input data with provided parameter map.
+   *
+   * @param dataset input dataset
+   * @param paramMap parameter map
+   * @return fitted model
+   */
+  def fit(dataset: SchemaRDD, paramMap: ParamMap): M
+
+  /**
+   * Fits multiple models to the input data with multiple sets of parameters.
+   * The default implementation uses a for loop on each parameter map.
+   * Subclasses could overwrite this to optimize multi-model training.
+   *
+   * @param dataset input dataset
+   * @param paramMaps an array of parameter maps
+   * @return fitted models, matching the input parameter maps
+   */
+  def fit(dataset: SchemaRDD, paramMaps: Array[ParamMap]): Seq[M] = {
+    paramMaps.map(fit(dataset, _))
+  }
+
+  // Java-friendly versions of fit.
+
+  /**
+   * Fits a single model to the input data with optional parameters.
+   *
+   * @param dataset input dataset
+   * @param paramPairs optional list of param pairs (overwrite embedded params)
+   * @return fitted model
+   */
+  @varargs
+  def fit(dataset: JavaSchemaRDD, paramPairs: ParamPair[_]*): M = {
+    fit(dataset.schemaRDD, paramPairs: _*)
+  }
+
+  /**
+   * Fits a single model to the  input data with provided parameter map.
+   *
+   * @param dataset input dataset
+   * @param paramMap parameter map
+   * @return fitted model
+   */
+  def fit(dataset: JavaSchemaRDD, paramMap: ParamMap): M = {
+    fit(dataset.schemaRDD, paramMap)
+  }
+
+  /**
+   * Fits multiple models to the input data with multiple sets of parameters.
+   *
+   * @param dataset input dataset
+   * @param paramMaps an array of parameter maps
+   * @return fitted models, matching the input parameter maps
+   */
+  def fit(dataset: JavaSchemaRDD, paramMaps: Array[ParamMap]): java.util.List[M] = {
+    fit(dataset.schemaRDD, paramMaps).asJava
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala
new file mode 100644
index 0000000000000..db563dd550e56
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.sql.SchemaRDD
+
+/**
+ * :: AlphaComponent ::
+ * Abstract class for evaluators that compute metrics from predictions.
+ */
+@AlphaComponent
+abstract class Evaluator extends Identifiable {
+
+  /**
+   * Evaluates the output.
+   *
+   * @param dataset a dataset that contains labels/observations and predictions.
+   * @param paramMap parameter map that specifies the input columns and output metrics
+   * @return metric
+   */
+  def evaluate(dataset: SchemaRDD, paramMap: ParamMap): Double
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala b/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala
new file mode 100644
index 0000000000000..cd84b05bfb496
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import java.util.UUID
+
+/**
+ * Object with a unique id.
+ */
+private[ml] trait Identifiable extends Serializable {
+
+  /**
+   * A unique id for the object. The default implementation concatenates the class name, "-", and 8
+   * random hex chars.
+   */
+  private[ml] val uid: String =
+    this.getClass.getSimpleName + "-" + UUID.randomUUID().toString.take(8)
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Model.scala b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
new file mode 100644
index 0000000000000..cae5082b51196
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param.ParamMap
+
+/**
+ * :: AlphaComponent ::
+ * A fitted model, i.e., a [[Transformer]] produced by an [[Estimator]].
+ *
+ * @tparam M model type
+ */
+@AlphaComponent
+abstract class Model[M <: Model[M]] extends Transformer {
+  /**
+   * The parent estimator that produced this model.
+   */
+  val parent: Estimator[M]
+
+  /**
+   * Fitting parameters, such that parent.fit(..., fittingParamMap) could reproduce the model.
+   */
+  val fittingParamMap: ParamMap
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
new file mode 100644
index 0000000000000..e545df1e37b9c
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import scala.collection.mutable.ListBuffer
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param.{Params, Param, ParamMap}
+import org.apache.spark.sql.{SchemaRDD, StructType}
+
+/**
+ * :: AlphaComponent ::
+ * A stage in a pipeline, either an [[Estimator]] or a [[Transformer]].
+ */
+@AlphaComponent
+abstract class PipelineStage extends Serializable with Logging {
+
+  /**
+   * Derives the output schema from the input schema and parameters.
+   */
+  private[ml] def transformSchema(schema: StructType, paramMap: ParamMap): StructType
+
+  /**
+   * Derives the output schema from the input schema and parameters, optionally with logging.
+   */
+  protected def transformSchema(
+      schema: StructType,
+      paramMap: ParamMap,
+      logging: Boolean): StructType = {
+    if (logging) {
+      logDebug(s"Input schema: ${schema.json}")
+    }
+    val outputSchema = transformSchema(schema, paramMap)
+    if (logging) {
+      logDebug(s"Expected output schema: ${outputSchema.json}")
+    }
+    outputSchema
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * A simple pipeline, which acts as an estimator. A Pipeline consists of a sequence of stages, each
+ * of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline.fit]] is called, the
+ * stages are executed in order. If a stage is an [[Estimator]], its [[Estimator.fit]] method will
+ * be called on the input dataset to fit a model. Then the model, which is a transformer, will be
+ * used to transform the dataset as the input to the next stage. If a stage is a [[Transformer]],
+ * its [[Transformer.transform]] method will be called to produce the dataset for the next stage.
+ * The fitted model from a [[Pipeline]] is an [[PipelineModel]], which consists of fitted models and
+ * transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as
+ * an identity transformer.
+ */
+@AlphaComponent
+class Pipeline extends Estimator[PipelineModel] {
+
+  /** param for pipeline stages */
+  val stages: Param[Array[PipelineStage]] = new Param(this, "stages", "stages of the pipeline")
+  def setStages(value: Array[PipelineStage]): this.type = { set(stages, value); this }
+  def getStages: Array[PipelineStage] = get(stages)
+
+  /**
+   * Fits the pipeline to the input dataset with additional parameters. If a stage is an
+   * [[Estimator]], its [[Estimator.fit]] method will be called on the input dataset to fit a model.
+   * Then the model, which is a transformer, will be used to transform the dataset as the input to
+   * the next stage. If a stage is a [[Transformer]], its [[Transformer.transform]] method will be
+   * called to produce the dataset for the next stage. The fitted model from a [[Pipeline]] is an
+   * [[PipelineModel]], which consists of fitted models and transformers, corresponding to the
+   * pipeline stages. If there are no stages, the output model acts as an identity transformer.
+   *
+   * @param dataset input dataset
+   * @param paramMap parameter map
+   * @return fitted pipeline
+   */
+  override def fit(dataset: SchemaRDD, paramMap: ParamMap): PipelineModel = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    val map = this.paramMap ++ paramMap
+    val theStages = map(stages)
+    // Search for the last estimator.
+    var indexOfLastEstimator = -1
+    theStages.view.zipWithIndex.foreach { case (stage, index) =>
+      stage match {
+        case _: Estimator[_] =>
+          indexOfLastEstimator = index
+        case _ =>
+      }
+    }
+    var curDataset = dataset
+    val transformers = ListBuffer.empty[Transformer]
+    theStages.view.zipWithIndex.foreach { case (stage, index) =>
+      if (index <= indexOfLastEstimator) {
+        val transformer = stage match {
+          case estimator: Estimator[_] =>
+            estimator.fit(curDataset, paramMap)
+          case t: Transformer =>
+            t
+          case _ =>
+            throw new IllegalArgumentException(
+              s"Do not support stage $stage of type ${stage.getClass}")
+        }
+        curDataset = transformer.transform(curDataset, paramMap)
+        transformers += transformer
+      } else {
+        transformers += stage.asInstanceOf[Transformer]
+      }
+    }
+
+    new PipelineModel(this, map, transformers.toArray)
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    val map = this.paramMap ++ paramMap
+    val theStages = map(stages)
+    require(theStages.toSet.size == theStages.size,
+      "Cannot have duplicate components in a pipeline.")
+    theStages.foldLeft(schema)((cur, stage) => stage.transformSchema(cur, paramMap))
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * Represents a compiled pipeline.
+ */
+@AlphaComponent
+class PipelineModel private[ml] (
+    override val parent: Pipeline,
+    override val fittingParamMap: ParamMap,
+    private[ml] val stages: Array[Transformer])
+  extends Model[PipelineModel] with Logging {
+
+  /**
+   * Gets the model produced by the input estimator. Throws an NoSuchElementException is the input
+   * estimator does not exist in the pipeline.
+   */
+  def getModel[M <: Model[M]](stage: Estimator[M]): M = {
+    val matched = stages.filter {
+      case m: Model[_] => m.parent.eq(stage)
+      case _ => false
+    }
+    if (matched.isEmpty) {
+      throw new NoSuchElementException(s"Cannot find stage $stage from the pipeline.")
+    } else if (matched.size > 1) {
+      throw new IllegalStateException(s"Cannot have duplicate estimators in the sample pipeline.")
+    } else {
+      matched.head.asInstanceOf[M]
+    }
+  }
+
+  override def transform(dataset: SchemaRDD, paramMap: ParamMap): SchemaRDD = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    stages.foldLeft(dataset)((cur, transformer) => transformer.transform(cur, paramMap))
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    stages.foldLeft(schema)((cur, transformer) => transformer.transformSchema(cur, paramMap))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
new file mode 100644
index 0000000000000..490e6609ad311
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import scala.annotation.varargs
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param._
+import org.apache.spark.sql.SchemaRDD
+import org.apache.spark.sql.api.java.JavaSchemaRDD
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.analysis.Star
+import org.apache.spark.sql.catalyst.dsl._
+import org.apache.spark.sql.catalyst.types._
+
+/**
+ * :: AlphaComponent ::
+ * Abstract class for transformers that transform one dataset into another.
+ */
+@AlphaComponent
+abstract class Transformer extends PipelineStage with Params {
+
+  /**
+   * Transforms the dataset with optional parameters
+   * @param dataset input dataset
+   * @param paramPairs optional list of param pairs, overwrite embedded params
+   * @return transformed dataset
+   */
+  @varargs
+  def transform(dataset: SchemaRDD, paramPairs: ParamPair[_]*): SchemaRDD = {
+    val map = new ParamMap()
+    paramPairs.foreach(map.put(_))
+    transform(dataset, map)
+  }
+
+  /**
+   * Transforms the dataset with provided parameter map as additional parameters.
+   * @param dataset input dataset
+   * @param paramMap additional parameters, overwrite embedded params
+   * @return transformed dataset
+   */
+  def transform(dataset: SchemaRDD, paramMap: ParamMap): SchemaRDD
+
+  // Java-friendly versions of transform.
+
+  /**
+   * Transforms the dataset with optional parameters.
+   * @param dataset input datset
+   * @param paramPairs optional list of param pairs, overwrite embedded params
+   * @return transformed dataset
+   */
+  @varargs
+  def transform(dataset: JavaSchemaRDD, paramPairs: ParamPair[_]*): JavaSchemaRDD = {
+    transform(dataset.schemaRDD, paramPairs: _*).toJavaSchemaRDD
+  }
+
+  /**
+   * Transforms the dataset with provided parameter map as additional parameters.
+   * @param dataset input dataset
+   * @param paramMap additional parameters, overwrite embedded params
+   * @return transformed dataset
+   */
+  def transform(dataset: JavaSchemaRDD, paramMap: ParamMap): JavaSchemaRDD = {
+    transform(dataset.schemaRDD, paramMap).toJavaSchemaRDD
+  }
+}
+
+/**
+ * Abstract class for transformers that take one input column, apply transformation, and output the
+ * result as a new column.
+ */
+private[ml] abstract class UnaryTransformer[IN, OUT: TypeTag, T <: UnaryTransformer[IN, OUT, T]]
+  extends Transformer with HasInputCol with HasOutputCol with Logging {
+
+  def setInputCol(value: String): T = set(inputCol, value).asInstanceOf[T]
+  def setOutputCol(value: String): T = set(outputCol, value).asInstanceOf[T]
+
+  /**
+   * Creates the transform function using the given param map. The input param map already takes
+   * account of the embedded param map. So the param values should be determined solely by the input
+   * param map.
+   */
+  protected def createTransformFunc(paramMap: ParamMap): IN => OUT
+
+  /**
+   * Validates the input type. Throw an exception if it is invalid.
+   */
+  protected def validateInputType(inputType: DataType): Unit = {}
+
+  override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    val map = this.paramMap ++ paramMap
+    val inputType = schema(map(inputCol)).dataType
+    validateInputType(inputType)
+    if (schema.fieldNames.contains(map(outputCol))) {
+      throw new IllegalArgumentException(s"Output column ${map(outputCol)} already exists.")
+    }
+    val output = ScalaReflection.schemaFor[OUT]
+    val outputFields = schema.fields :+
+      StructField(map(outputCol), output.dataType, output.nullable)
+    StructType(outputFields)
+  }
+
+  override def transform(dataset: SchemaRDD, paramMap: ParamMap): SchemaRDD = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    import dataset.sqlContext._
+    val map = this.paramMap ++ paramMap
+    val udf = this.createTransformFunc(map)
+    dataset.select(Star(None), udf.call(map(inputCol).attr) as map(outputCol))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
new file mode 100644
index 0000000000000..85b8899636ca5
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml._
+import org.apache.spark.ml.param._
+import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+import org.apache.spark.mllib.linalg.{BLAS, Vector, VectorUDT}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.analysis.Star
+import org.apache.spark.sql.catalyst.dsl._
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * :: AlphaComponent ::
+ * Params for logistic regression.
+ */
+@AlphaComponent
+private[classification] trait LogisticRegressionParams extends Params
+  with HasRegParam with HasMaxIter with HasLabelCol with HasThreshold with HasFeaturesCol
+  with HasScoreCol with HasPredictionCol {
+
+  /**
+   * Validates and transforms the input schema with the provided param map.
+   * @param schema input schema
+   * @param paramMap additional parameters
+   * @param fitting whether this is in fitting
+   * @return output schema
+   */
+  protected def validateAndTransformSchema(
+      schema: StructType,
+      paramMap: ParamMap,
+      fitting: Boolean): StructType = {
+    val map = this.paramMap ++ paramMap
+    val featuresType = schema(map(featuresCol)).dataType
+    // TODO: Support casting Array[Double] and Array[Float] to Vector.
+    require(featuresType.isInstanceOf[VectorUDT],
+      s"Features column ${map(featuresCol)} must be a vector column but got $featuresType.")
+    if (fitting) {
+      val labelType = schema(map(labelCol)).dataType
+      require(labelType == DoubleType,
+        s"Cannot convert label column ${map(labelCol)} of type $labelType to a double column.")
+    }
+    val fieldNames = schema.fieldNames
+    require(!fieldNames.contains(map(scoreCol)), s"Score column ${map(scoreCol)} already exists.")
+    require(!fieldNames.contains(map(predictionCol)),
+      s"Prediction column ${map(predictionCol)} already exists.")
+    val outputFields = schema.fields ++ Seq(
+      StructField(map(scoreCol), DoubleType, false),
+      StructField(map(predictionCol), DoubleType, false))
+    StructType(outputFields)
+  }
+}
+
+/**
+ * Logistic regression.
+ */
+class LogisticRegression extends Estimator[LogisticRegressionModel] with LogisticRegressionParams {
+
+  setRegParam(0.1)
+  setMaxIter(100)
+  setThreshold(0.5)
+
+  def setRegParam(value: Double): this.type = set(regParam, value)
+  def setMaxIter(value: Int): this.type = set(maxIter, value)
+  def setLabelCol(value: String): this.type = set(labelCol, value)
+  def setThreshold(value: Double): this.type = set(threshold, value)
+  def setFeaturesCol(value: String): this.type = set(featuresCol, value)
+  def setScoreCol(value: String): this.type = set(scoreCol, value)
+  def setPredictionCol(value: String): this.type = set(predictionCol, value)
+
+  override def fit(dataset: SchemaRDD, paramMap: ParamMap): LogisticRegressionModel = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    import dataset.sqlContext._
+    val map = this.paramMap ++ paramMap
+    val instances = dataset.select(map(labelCol).attr, map(featuresCol).attr)
+      .map { case Row(label: Double, features: Vector) =>
+        LabeledPoint(label, features)
+      }.persist(StorageLevel.MEMORY_AND_DISK)
+    val lr = new LogisticRegressionWithLBFGS
+    lr.optimizer
+      .setRegParam(map(regParam))
+      .setNumIterations(map(maxIter))
+    val lrm = new LogisticRegressionModel(this, map, lr.run(instances).weights)
+    instances.unpersist()
+    // copy model params
+    Params.inheritValues(map, this, lrm)
+    lrm
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    validateAndTransformSchema(schema, paramMap, fitting = true)
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * Model produced by [[LogisticRegression]].
+ */
+@AlphaComponent
+class LogisticRegressionModel private[ml] (
+    override val parent: LogisticRegression,
+    override val fittingParamMap: ParamMap,
+    weights: Vector)
+  extends Model[LogisticRegressionModel] with LogisticRegressionParams {
+
+  def setThreshold(value: Double): this.type = set(threshold, value)
+  def setFeaturesCol(value: String): this.type = set(featuresCol, value)
+  def setScoreCol(value: String): this.type = set(scoreCol, value)
+  def setPredictionCol(value: String): this.type = set(predictionCol, value)
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    validateAndTransformSchema(schema, paramMap, fitting = false)
+  }
+
+  override def transform(dataset: SchemaRDD, paramMap: ParamMap): SchemaRDD = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    import dataset.sqlContext._
+    val map = this.paramMap ++ paramMap
+    val score: Vector => Double = (v) => {
+      val margin = BLAS.dot(v, weights)
+      1.0 / (1.0 + math.exp(-margin))
+    }
+    val t = map(threshold)
+    val predict: Double => Double = (score) => {
+      if (score > t) 1.0 else 0.0
+    }
+    dataset.select(Star(None), score.call(map(featuresCol).attr) as map(scoreCol))
+      .select(Star(None), predict.call(map(scoreCol).attr) as map(predictionCol))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
new file mode 100644
index 0000000000000..0b0504e036ec9
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.evaluation
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml._
+import org.apache.spark.ml.param._
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+import org.apache.spark.sql.{DoubleType, Row, SchemaRDD}
+
+/**
+ * :: AlphaComponent ::
+ * Evaluator for binary classification, which expects two input columns: score and label.
+ */
+@AlphaComponent
+class BinaryClassificationEvaluator extends Evaluator with Params
+  with HasScoreCol with HasLabelCol {
+
+  /** param for metric name in evaluation */
+  val metricName: Param[String] = new Param(this, "metricName",
+    "metric name in evaluation (areaUnderROC|areaUnderPR)", Some("areaUnderROC"))
+  def getMetricName: String = get(metricName)
+  def setMetricName(value: String): this.type = set(metricName, value)
+
+  def setScoreCol(value: String): this.type = set(scoreCol, value)
+  def setLabelCol(value: String): this.type = set(labelCol, value)
+
+  override def evaluate(dataset: SchemaRDD, paramMap: ParamMap): Double = {
+    val map = this.paramMap ++ paramMap
+
+    val schema = dataset.schema
+    val scoreType = schema(map(scoreCol)).dataType
+    require(scoreType == DoubleType,
+      s"Score column ${map(scoreCol)} must be double type but found $scoreType")
+    val labelType = schema(map(labelCol)).dataType
+    require(labelType == DoubleType,
+      s"Label column ${map(labelCol)} must be double type but found $labelType")
+
+    import dataset.sqlContext._
+    val scoreAndLabels = dataset.select(map(scoreCol).attr, map(labelCol).attr)
+      .map { case Row(score: Double, label: Double) =>
+        (score, label)
+      }
+    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
+    val metric = map(metricName) match {
+      case "areaUnderROC" =>
+        metrics.areaUnderROC()
+      case "areaUnderPR" =>
+        metrics.areaUnderPR()
+      case other =>
+        throw new IllegalArgumentException(s"Does not support metric $other.")
+    }
+    metrics.unpersist()
+    metric
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
new file mode 100644
index 0000000000000..b98b1755a3584
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.param.{IntParam, ParamMap}
+import org.apache.spark.mllib.feature
+import org.apache.spark.mllib.linalg.Vector
+
+/**
+ * :: AlphaComponent ::
+ * Maps a sequence of terms to their term frequencies using the hashing trick.
+ */
+@AlphaComponent
+class HashingTF extends UnaryTransformer[Iterable[_], Vector, HashingTF] {
+
+  /** number of features */
+  val numFeatures = new IntParam(this, "numFeatures", "number of features", Some(1 << 18))
+  def setNumFeatures(value: Int) = set(numFeatures, value)
+  def getNumFeatures: Int = get(numFeatures)
+
+  override protected def createTransformFunc(paramMap: ParamMap): Iterable[_] => Vector = {
+    val hashingTF = new feature.HashingTF(paramMap(numFeatures))
+    hashingTF.transform
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
new file mode 100644
index 0000000000000..896a6b83b67bf
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml._
+import org.apache.spark.ml.param._
+import org.apache.spark.mllib.feature
+import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.analysis.Star
+import org.apache.spark.sql.catalyst.dsl._
+
+/**
+ * Params for [[StandardScaler]] and [[StandardScalerModel]].
+ */
+private[feature] trait StandardScalerParams extends Params with HasInputCol with HasOutputCol
+
+/**
+ * :: AlphaComponent ::
+ * Standardizes features by removing the mean and scaling to unit variance using column summary
+ * statistics on the samples in the training set.
+ */
+@AlphaComponent
+class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerParams {
+
+  def setInputCol(value: String): this.type = set(inputCol, value)
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
+  override def fit(dataset: SchemaRDD, paramMap: ParamMap): StandardScalerModel = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    import dataset.sqlContext._
+    val map = this.paramMap ++ paramMap
+    val input = dataset.select(map(inputCol).attr)
+      .map { case Row(v: Vector) =>
+        v
+      }
+    val scaler = new feature.StandardScaler().fit(input)
+    val model = new StandardScalerModel(this, map, scaler)
+    Params.inheritValues(map, this, model)
+    model
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    val map = this.paramMap ++ paramMap
+    val inputType = schema(map(inputCol)).dataType
+    require(inputType.isInstanceOf[VectorUDT],
+      s"Input column ${map(inputCol)} must be a vector column")
+    require(!schema.fieldNames.contains(map(outputCol)),
+      s"Output column ${map(outputCol)} already exists.")
+    val outputFields = schema.fields :+ StructField(map(outputCol), new VectorUDT, false)
+    StructType(outputFields)
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * Model fitted by [[StandardScaler]].
+ */
+@AlphaComponent
+class StandardScalerModel private[ml] (
+    override val parent: StandardScaler,
+    override val fittingParamMap: ParamMap,
+    scaler: feature.StandardScalerModel)
+  extends Model[StandardScalerModel] with StandardScalerParams {
+
+  def setInputCol(value: String): this.type = set(inputCol, value)
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
+  override def transform(dataset: SchemaRDD, paramMap: ParamMap): SchemaRDD = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    import dataset.sqlContext._
+    val map = this.paramMap ++ paramMap
+    val scale: (Vector) => Vector = (v) => {
+      scaler.transform(v)
+    }
+    dataset.select(Star(None), scale.call(map(inputCol).attr) as map(outputCol))
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    val map = this.paramMap ++ paramMap
+    val inputType = schema(map(inputCol)).dataType
+    require(inputType.isInstanceOf[VectorUDT],
+      s"Input column ${map(inputCol)} must be a vector column")
+    require(!schema.fieldNames.contains(map(outputCol)),
+      s"Output column ${map(outputCol)} already exists.")
+    val outputFields = schema.fields :+ StructField(map(outputCol), new VectorUDT, false)
+    StructType(outputFields)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
new file mode 100644
index 0000000000000..0a6599b64c011
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.sql.{DataType, StringType}
+
+/**
+ * :: AlphaComponent ::
+ * A tokenizer that converts the input string to lowercase and then splits it by white spaces.
+ */
+@AlphaComponent
+class Tokenizer extends UnaryTransformer[String, Seq[String], Tokenizer] {
+
+  protected override def createTransformFunc(paramMap: ParamMap): String => Seq[String] = {
+    _.toLowerCase.split("\\s")
+  }
+
+  protected override def validateInputType(inputType: DataType): Unit = {
+    require(inputType == StringType, s"Input type must be string type but got $inputType.")
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/package-info.java b/mllib/src/main/scala/org/apache/spark/ml/package-info.java
new file mode 100644
index 0000000000000..00d9c802e930d
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/package-info.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Spark ML is an ALPHA component that adds a new set of machine learning APIs to let users quickly
+ * assemble and configure practical machine learning pipelines.
+ */
+@AlphaComponent
+package org.apache.spark.ml;
+
+import org.apache.spark.annotation.AlphaComponent;
diff --git a/mllib/src/main/scala/org/apache/spark/ml/package.scala b/mllib/src/main/scala/org/apache/spark/ml/package.scala
new file mode 100644
index 0000000000000..51cd48c90432a
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/package.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+/**
+ * Spark ML is an ALPHA component that adds a new set of machine learning APIs to let users quickly
+ * assemble and configure practical machine learning pipelines.
+ */
+package object ml
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
new file mode 100644
index 0000000000000..8fd46aef4b99d
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -0,0 +1,321 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.param
+
+import java.lang.reflect.Modifier
+
+import org.apache.spark.annotation.AlphaComponent
+
+import scala.annotation.varargs
+import scala.collection.mutable
+
+import org.apache.spark.ml.Identifiable
+
+/**
+ * :: AlphaComponent ::
+ * A param with self-contained documentation and optionally default value. Primitive-typed param
+ * should use the specialized versions, which are more friendly to Java users.
+ *
+ * @param parent parent object
+ * @param name param name
+ * @param doc documentation
+ * @tparam T param value type
+ */
+@AlphaComponent
+class Param[T] (
+    val parent: Params,
+    val name: String,
+    val doc: String,
+    val defaultValue: Option[T] = None)
+  extends Serializable {
+
+  /**
+   * Creates a param pair with the given value (for Java).
+   */
+  def w(value: T): ParamPair[T] = this -> value
+
+  /**
+   * Creates a param pair with the given value (for Scala).
+   */
+  def ->(value: T): ParamPair[T] = ParamPair(this, value)
+
+  override def toString: String = {
+    if (defaultValue.isDefined) {
+      s"$name: $doc (default: ${defaultValue.get})"
+    } else {
+      s"$name: $doc"
+    }
+  }
+}
+
+// specialize primitive-typed params because Java doesn't recognize scala.Double, scala.Int, ...
+
+/** Specialized version of [[Param[Double]]] for Java. */
+class DoubleParam(parent: Params, name: String, doc: String, defaultValue: Option[Double] = None)
+  extends Param[Double](parent, name, doc, defaultValue) {
+
+  override def w(value: Double): ParamPair[Double] = super.w(value)
+}
+
+/** Specialized version of [[Param[Int]]] for Java. */
+class IntParam(parent: Params, name: String, doc: String, defaultValue: Option[Int] = None)
+  extends Param[Int](parent, name, doc, defaultValue) {
+
+  override def w(value: Int): ParamPair[Int] = super.w(value)
+}
+
+/** Specialized version of [[Param[Float]]] for Java. */
+class FloatParam(parent: Params, name: String, doc: String, defaultValue: Option[Float] = None)
+  extends Param[Float](parent, name, doc, defaultValue) {
+
+  override def w(value: Float): ParamPair[Float] = super.w(value)
+}
+
+/** Specialized version of [[Param[Long]]] for Java. */
+class LongParam(parent: Params, name: String, doc: String, defaultValue: Option[Long] = None)
+  extends Param[Long](parent, name, doc, defaultValue) {
+
+  override def w(value: Long): ParamPair[Long] = super.w(value)
+}
+
+/** Specialized version of [[Param[Boolean]]] for Java. */
+class BooleanParam(parent: Params, name: String, doc: String, defaultValue: Option[Boolean] = None)
+  extends Param[Boolean](parent, name, doc, defaultValue) {
+
+  override def w(value: Boolean): ParamPair[Boolean] = super.w(value)
+}
+
+/**
+ * A param amd its value.
+ */
+case class ParamPair[T](param: Param[T], value: T)
+
+/**
+ * :: AlphaComponent ::
+ * Trait for components that take parameters. This also provides an internal param map to store
+ * parameter values attached to the instance.
+ */
+@AlphaComponent
+trait Params extends Identifiable with Serializable {
+
+  /** Returns all params. */
+  def params: Array[Param[_]] = {
+    val methods = this.getClass.getMethods
+    methods.filter { m =>
+        Modifier.isPublic(m.getModifiers) &&
+          classOf[Param[_]].isAssignableFrom(m.getReturnType) &&
+          m.getParameterTypes.isEmpty
+      }.sortBy(_.getName)
+      .map(m => m.invoke(this).asInstanceOf[Param[_]])
+  }
+
+  /**
+   * Validates parameter values stored internally plus the input parameter map.
+   * Raises an exception if any parameter is invalid.
+   */
+  def validate(paramMap: ParamMap): Unit = {}
+
+  /**
+   * Validates parameter values stored internally.
+   * Raise an exception if any parameter value is invalid.
+   */
+  def validate(): Unit = validate(ParamMap.empty)
+
+  /**
+   * Returns the documentation of all params.
+   */
+  def explainParams(): String = params.mkString("\n")
+
+  /** Checks whether a param is explicitly set. */
+  def isSet(param: Param[_]): Boolean = {
+    require(param.parent.eq(this))
+    paramMap.contains(param)
+  }
+
+  /** Gets a param by its name. */
+  private[ml] def getParam(paramName: String): Param[Any] = {
+    val m = this.getClass.getMethod(paramName)
+    assert(Modifier.isPublic(m.getModifiers) &&
+      classOf[Param[_]].isAssignableFrom(m.getReturnType) &&
+      m.getParameterTypes.isEmpty)
+    m.invoke(this).asInstanceOf[Param[Any]]
+  }
+
+  /**
+   * Sets a parameter in the embedded param map.
+   */
+  private[ml] def set[T](param: Param[T], value: T): this.type = {
+    require(param.parent.eq(this))
+    paramMap.put(param.asInstanceOf[Param[Any]], value)
+    this
+  }
+
+  /**
+   * Gets the value of a parameter in the embedded param map.
+   */
+  private[ml] def get[T](param: Param[T]): T = {
+    require(param.parent.eq(this))
+    paramMap(param)
+  }
+
+  /**
+   * Internal param map.
+   */
+  protected val paramMap: ParamMap = ParamMap.empty
+}
+
+private[ml] object Params {
+
+  /**
+   * Copies parameter values from the parent estimator to the child model it produced.
+   * @param paramMap the param map that holds parameters of the parent
+   * @param parent the parent estimator
+   * @param child the child model
+   */
+  def inheritValues[E <: Params, M <: E](
+      paramMap: ParamMap,
+      parent: E,
+      child: M): Unit = {
+    parent.params.foreach { param =>
+      if (paramMap.contains(param)) {
+        child.set(child.getParam(param.name), paramMap(param))
+      }
+    }
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * A param to value map.
+ */
+@AlphaComponent
+class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) extends Serializable {
+
+  /**
+   * Creates an empty param map.
+   */
+  def this() = this(mutable.Map.empty[Param[Any], Any])
+
+  /**
+   * Puts a (param, value) pair (overwrites if the input param exists).
+   */
+  def put[T](param: Param[T], value: T): this.type = {
+    map(param.asInstanceOf[Param[Any]]) = value
+    this
+  }
+
+  /**
+   * Puts a list of param pairs (overwrites if the input params exists).
+   */
+  def put(paramPairs: ParamPair[_]*): this.type = {
+    paramPairs.foreach { p =>
+      put(p.param.asInstanceOf[Param[Any]], p.value)
+    }
+    this
+  }
+
+  /**
+   * Optionally returns the value associated with a param or its default.
+   */
+  def get[T](param: Param[T]): Option[T] = {
+    map.get(param.asInstanceOf[Param[Any]])
+      .orElse(param.defaultValue)
+      .asInstanceOf[Option[T]]
+  }
+
+  /**
+   * Gets the value of the input param or its default value if it does not exist.
+   * Raises a NoSuchElementException if there is no value associated with the input param.
+   */
+  def apply[T](param: Param[T]): T = {
+    val value = get(param)
+    if (value.isDefined) {
+      value.get
+    } else {
+      throw new NoSuchElementException(s"Cannot find param ${param.name}.")
+    }
+  }
+
+  /**
+   * Checks whether a parameter is explicitly specified.
+   */
+  def contains(param: Param[_]): Boolean = {
+    map.contains(param.asInstanceOf[Param[Any]])
+  }
+
+  /**
+   * Filters this param map for the given parent.
+   */
+  def filter(parent: Params): ParamMap = {
+    val filtered = map.filterKeys(_.parent == parent)
+    new ParamMap(filtered.asInstanceOf[mutable.Map[Param[Any], Any]])
+  }
+
+  /**
+   * Make a copy of this param map.
+   */
+  def copy: ParamMap = new ParamMap(map.clone())
+
+  override def toString: String = {
+    map.map { case (param, value) =>
+      s"\t${param.parent.uid}-${param.name}: $value"
+    }.mkString("{\n", ",\n", "\n}")
+  }
+
+  /**
+   * Returns a new param map that contains parameters in this map and the given map,
+   * where the latter overwrites this if there exists conflicts.
+   */
+  def ++(other: ParamMap): ParamMap = {
+    new ParamMap(this.map ++ other.map)
+  }
+
+
+  /**
+   * Adds all parameters from the input param map into this param map.
+   */
+  def ++=(other: ParamMap): this.type = {
+    this.map ++= other.map
+    this
+  }
+
+  /**
+   * Converts this param map to a sequence of param pairs.
+   */
+  def toSeq: Seq[ParamPair[_]] = {
+    map.toSeq.map { case (param, value) =>
+      ParamPair(param, value)
+    }
+  }
+}
+
+object ParamMap {
+
+  /**
+   * Returns an empty param map.
+   */
+  def empty: ParamMap = new ParamMap()
+
+  /**
+   * Constructs a param map by specifying its entries.
+   */
+  @varargs
+  def apply(paramPairs: ParamPair[_]*): ParamMap = {
+    new ParamMap().put(paramPairs: _*)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/sharedParams.scala
new file mode 100644
index 0000000000000..ef141d3eb2b06
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/sharedParams.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.param
+
+private[ml] trait HasRegParam extends Params {
+  /** param for regularization parameter */
+  val regParam: DoubleParam = new DoubleParam(this, "regParam", "regularization parameter")
+  def getRegParam: Double = get(regParam)
+}
+
+private[ml] trait HasMaxIter extends Params {
+  /** param for max number of iterations */
+  val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations")
+  def getMaxIter: Int = get(maxIter)
+}
+
+private[ml] trait HasFeaturesCol extends Params {
+  /** param for features column name */
+  val featuresCol: Param[String] =
+    new Param(this, "featuresCol", "features column name", Some("features"))
+  def getFeaturesCol: String = get(featuresCol)
+}
+
+private[ml] trait HasLabelCol extends Params {
+  /** param for label column name */
+  val labelCol: Param[String] = new Param(this, "labelCol", "label column name", Some("label"))
+  def getLabelCol: String = get(labelCol)
+}
+
+private[ml] trait HasScoreCol extends Params {
+  /** param for score column name */
+  val scoreCol: Param[String] = new Param(this, "scoreCol", "score column name", Some("score"))
+  def getScoreCol: String = get(scoreCol)
+}
+
+private[ml] trait HasPredictionCol extends Params {
+  /** param for prediction column name */
+  val predictionCol: Param[String] =
+    new Param(this, "predictionCol", "prediction column name", Some("prediction"))
+  def getPredictionCol: String = get(predictionCol)
+}
+
+private[ml] trait HasThreshold extends Params {
+  /** param for threshold in (binary) prediction */
+  val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in prediction")
+  def getThreshold: Double = get(threshold)
+}
+
+private[ml] trait HasInputCol extends Params {
+  /** param for input column name */
+  val inputCol: Param[String] = new Param(this, "inputCol", "input column name")
+  def getInputCol: String = get(inputCol)
+}
+
+private[ml] trait HasOutputCol extends Params {
+  /** param for output column name */
+  val outputCol: Param[String] = new Param(this, "outputCol", "output column name")
+  def getOutputCol: String = get(outputCol)
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
new file mode 100644
index 0000000000000..194b9bfd9a9e6
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.tuning
+
+import com.github.fommil.netlib.F2jBLAS
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml._
+import org.apache.spark.ml.param.{IntParam, Param, ParamMap, Params}
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.sql.{SchemaRDD, StructType}
+
+/**
+ * Params for [[CrossValidator]] and [[CrossValidatorModel]].
+ */
+private[ml] trait CrossValidatorParams extends Params {
+  /** param for the estimator to be cross-validated */
+  val estimator: Param[Estimator[_]] = new Param(this, "estimator", "estimator for selection")
+  def getEstimator: Estimator[_] = get(estimator)
+
+  /** param for estimator param maps */
+  val estimatorParamMaps: Param[Array[ParamMap]] =
+    new Param(this, "estimatorParamMaps", "param maps for the estimator")
+  def getEstimatorParamMaps: Array[ParamMap] = get(estimatorParamMaps)
+
+  /** param for the evaluator for selection */
+  val evaluator: Param[Evaluator] = new Param(this, "evaluator", "evaluator for selection")
+  def getEvaluator: Evaluator = get(evaluator)
+
+  /** param for number of folds for cross validation */
+  val numFolds: IntParam =
+    new IntParam(this, "numFolds", "number of folds for cross validation", Some(3))
+  def getNumFolds: Int = get(numFolds)
+}
+
+/**
+ * :: AlphaComponent ::
+ * K-fold cross validation.
+ */
+@AlphaComponent
+class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorParams with Logging {
+
+  private val f2jBLAS = new F2jBLAS
+
+  def setEstimator(value: Estimator[_]): this.type = set(estimator, value)
+  def setEstimatorParamMaps(value: Array[ParamMap]): this.type = set(estimatorParamMaps, value)
+  def setEvaluator(value: Evaluator): this.type = set(evaluator, value)
+  def setNumFolds(value: Int): this.type = set(numFolds, value)
+
+  override def fit(dataset: SchemaRDD, paramMap: ParamMap): CrossValidatorModel = {
+    val map = this.paramMap ++ paramMap
+    val schema = dataset.schema
+    transformSchema(dataset.schema, paramMap, logging = true)
+    val sqlCtx = dataset.sqlContext
+    val est = map(estimator)
+    val eval = map(evaluator)
+    val epm = map(estimatorParamMaps)
+    val numModels = epm.size
+    val metrics = new Array[Double](epm.size)
+    val splits = MLUtils.kFold(dataset, map(numFolds), 0)
+    splits.zipWithIndex.foreach { case ((training, validation), splitIndex) =>
+      val trainingDataset = sqlCtx.applySchema(training, schema).cache()
+      val validationDataset = sqlCtx.applySchema(validation, schema).cache()
+      // multi-model training
+      logDebug(s"Train split $splitIndex with multiple sets of parameters.")
+      val models = est.fit(trainingDataset, epm).asInstanceOf[Seq[Model[_]]]
+      var i = 0
+      while (i < numModels) {
+        val metric = eval.evaluate(models(i).transform(validationDataset, epm(i)), map)
+        logDebug(s"Got metric $metric for model trained with ${epm(i)}.")
+        metrics(i) += metric
+        i += 1
+      }
+    }
+    f2jBLAS.dscal(numModels, 1.0 / map(numFolds), metrics, 1)
+    logInfo(s"Average cross-validation metrics: ${metrics.toSeq}")
+    val (bestMetric, bestIndex) = metrics.zipWithIndex.maxBy(_._1)
+    logInfo(s"Best set of parameters:\n${epm(bestIndex)}")
+    logInfo(s"Best cross-validation metric: $bestMetric.")
+    val bestModel = est.fit(dataset, epm(bestIndex)).asInstanceOf[Model[_]]
+    val cvModel = new CrossValidatorModel(this, map, bestModel)
+    Params.inheritValues(map, this, cvModel)
+    cvModel
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    val map = this.paramMap ++ paramMap
+    map(estimator).transformSchema(schema, paramMap)
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * Model from k-fold cross validation.
+ */
+@AlphaComponent
+class CrossValidatorModel private[ml] (
+    override val parent: CrossValidator,
+    override val fittingParamMap: ParamMap,
+    val bestModel: Model[_])
+  extends Model[CrossValidatorModel] with CrossValidatorParams {
+
+  override def transform(dataset: SchemaRDD, paramMap: ParamMap): SchemaRDD = {
+    bestModel.transform(dataset, paramMap)
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    bestModel.transformSchema(schema, paramMap)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
new file mode 100644
index 0000000000000..dafe73d82c00a
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.tuning
+
+import scala.annotation.varargs
+import scala.collection.mutable
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param._
+
+/**
+ * :: AlphaComponent ::
+ * Builder for a param grid used in grid search-based model selection.
+ */
+@AlphaComponent
+class ParamGridBuilder {
+
+  private val paramGrid = mutable.Map.empty[Param[_], Iterable[_]]
+
+  /**
+   * Sets the given parameters in this grid to fixed values.
+   */
+  def baseOn(paramMap: ParamMap): this.type = {
+    baseOn(paramMap.toSeq: _*)
+    this
+  }
+
+  /**
+   * Sets the given parameters in this grid to fixed values.
+   */
+  @varargs
+  def baseOn(paramPairs: ParamPair[_]*): this.type = {
+    paramPairs.foreach { p =>
+      addGrid(p.param.asInstanceOf[Param[Any]], Seq(p.value))
+    }
+    this
+  }
+
+  /**
+   * Adds a param with multiple values (overwrites if the input param exists).
+   */
+  def addGrid[T](param: Param[T], values: Iterable[T]): this.type = {
+    paramGrid.put(param, values)
+    this
+  }
+
+  // specialized versions of addGrid for Java.
+
+  /**
+   * Adds a double param with multiple values.
+   */
+  def addGrid(param: DoubleParam, values: Array[Double]): this.type = {
+    addGrid[Double](param, values)
+  }
+
+  /**
+   * Adds a int param with multiple values.
+   */
+  def addGrid(param: IntParam, values: Array[Int]): this.type = {
+    addGrid[Int](param, values)
+  }
+
+  /**
+   * Adds a float param with multiple values.
+   */
+  def addGrid(param: FloatParam, values: Array[Float]): this.type = {
+    addGrid[Float](param, values)
+  }
+
+  /**
+   * Adds a long param with multiple values.
+   */
+  def addGrid(param: LongParam, values: Array[Long]): this.type = {
+    addGrid[Long](param, values)
+  }
+
+  /**
+   * Adds a boolean param with true and false.
+   */
+  def addGrid(param: BooleanParam): this.type = {
+    addGrid[Boolean](param, Array(true, false))
+  }
+
+  /**
+   * Builds and returns all combinations of parameters specified by the param grid.
+   */
+  def build(): Array[ParamMap] = {
+    var paramMaps = Array(new ParamMap)
+    paramGrid.foreach { case (param, values) =>
+      val newParamMaps = values.flatMap { v =>
+        paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v))
+      }
+      paramMaps = newParamMaps.toArray
+    }
+    paramMaps
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index c44173793b39a..c8476a5370b6c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -17,179 +17,45 @@
 
 package org.apache.spark.mllib.api.python
 
-import java.nio.{ByteBuffer, ByteOrder}
+import java.io.OutputStream
+import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
+
+import scala.collection.JavaConverters._
+import scala.language.existentials
+import scala.reflect.ClassTag
+
+import net.razorvine.pickle._
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.api.java.{JavaSparkContext, JavaRDD}
+import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
+import org.apache.spark.api.python.SerDeUtil
 import org.apache.spark.mllib.classification._
 import org.apache.spark.mllib.clustering._
-import org.apache.spark.mllib.linalg.{SparseVector, Vector, Vectors}
+import org.apache.spark.mllib.feature._
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.random.{RandomRDDs => RG}
 import org.apache.spark.mllib.recommendation._
 import org.apache.spark.mllib.regression._
+import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Statistics}
+import org.apache.spark.mllib.stat.correlation.CorrelationNames
+import org.apache.spark.mllib.stat.test.ChiSqTestResult
+import org.apache.spark.mllib.tree.DecisionTree
+import org.apache.spark.mllib.tree.configuration.{Algo, Strategy}
+import org.apache.spark.mllib.tree.impurity._
+import org.apache.spark.mllib.tree.model.DecisionTreeModel
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
 
 /**
  * :: DeveloperApi ::
  * The Java stubs necessary for the Python mllib bindings.
- *
- * See python/pyspark/mllib/_common.py for the mutually agreed upon data format.
  */
 @DeveloperApi
 class PythonMLLibAPI extends Serializable {
-  private val DENSE_VECTOR_MAGIC: Byte = 1
-  private val SPARSE_VECTOR_MAGIC: Byte = 2
-  private val DENSE_MATRIX_MAGIC: Byte = 3
-  private val LABELED_POINT_MAGIC: Byte = 4
-
-  private[python] def deserializeDoubleVector(bytes: Array[Byte], offset: Int = 0): Vector = {
-    require(bytes.length - offset >= 5, "Byte array too short")
-    val magic = bytes(offset)
-    if (magic == DENSE_VECTOR_MAGIC) {
-      deserializeDenseVector(bytes, offset)
-    } else if (magic == SPARSE_VECTOR_MAGIC) {
-      deserializeSparseVector(bytes, offset)
-    } else {
-      throw new IllegalArgumentException("Magic " + magic + " is wrong.")
-    }
-  }
 
-  private def deserializeDenseVector(bytes: Array[Byte], offset: Int = 0): Vector = {
-    val packetLength = bytes.length - offset
-    require(packetLength >= 5, "Byte array too short")
-    val bb = ByteBuffer.wrap(bytes, offset, bytes.length - offset)
-    bb.order(ByteOrder.nativeOrder())
-    val magic = bb.get()
-    require(magic == DENSE_VECTOR_MAGIC, "Invalid magic: " + magic)
-    val length = bb.getInt()
-    require (packetLength == 5 + 8 * length, "Invalid packet length: " + packetLength)
-    val db = bb.asDoubleBuffer()
-    val ans = new Array[Double](length.toInt)
-    db.get(ans)
-    Vectors.dense(ans)
-  }
-
-  private def deserializeSparseVector(bytes: Array[Byte], offset: Int = 0): Vector = {
-    val packetLength = bytes.length - offset
-    require(packetLength >= 9, "Byte array too short")
-    val bb = ByteBuffer.wrap(bytes, offset, bytes.length - offset)
-    bb.order(ByteOrder.nativeOrder())
-    val magic = bb.get()
-    require(magic == SPARSE_VECTOR_MAGIC, "Invalid magic: " + magic)
-    val size = bb.getInt()
-    val nonZeros = bb.getInt()
-    require (packetLength == 9 + 12 * nonZeros, "Invalid packet length: " + packetLength)
-    val ib = bb.asIntBuffer()
-    val indices = new Array[Int](nonZeros)
-    ib.get(indices)
-    bb.position(bb.position() + 4 * nonZeros)
-    val db = bb.asDoubleBuffer()
-    val values = new Array[Double](nonZeros)
-    db.get(values)
-    Vectors.sparse(size, indices, values)
-  }
-
-  private def serializeDenseVector(doubles: Array[Double]): Array[Byte] = {
-    val len = doubles.length
-    val bytes = new Array[Byte](5 + 8 * len)
-    val bb = ByteBuffer.wrap(bytes)
-    bb.order(ByteOrder.nativeOrder())
-    bb.put(DENSE_VECTOR_MAGIC)
-    bb.putInt(len)
-    val db = bb.asDoubleBuffer()
-    db.put(doubles)
-    bytes
-  }
-
-  private def serializeSparseVector(vector: SparseVector): Array[Byte] = {
-    val nonZeros = vector.indices.length
-    val bytes = new Array[Byte](9 + 12 * nonZeros)
-    val bb = ByteBuffer.wrap(bytes)
-    bb.order(ByteOrder.nativeOrder())
-    bb.put(SPARSE_VECTOR_MAGIC)
-    bb.putInt(vector.size)
-    bb.putInt(nonZeros)
-    val ib = bb.asIntBuffer()
-    ib.put(vector.indices)
-    bb.position(bb.position() + 4 * nonZeros)
-    val db = bb.asDoubleBuffer()
-    db.put(vector.values)
-    bytes
-  }
-
-  private[python] def serializeDoubleVector(vector: Vector): Array[Byte] = vector match {
-    case s: SparseVector =>
-      serializeSparseVector(s)
-    case _ =>
-      serializeDenseVector(vector.toArray)
-  }
-
-  private def deserializeDoubleMatrix(bytes: Array[Byte]): Array[Array[Double]] = {
-    val packetLength = bytes.length
-    if (packetLength < 9) {
-      throw new IllegalArgumentException("Byte array too short.")
-    }
-    val bb = ByteBuffer.wrap(bytes)
-    bb.order(ByteOrder.nativeOrder())
-    val magic = bb.get()
-    if (magic != DENSE_MATRIX_MAGIC) {
-      throw new IllegalArgumentException("Magic " + magic + " is wrong.")
-    }
-    val rows = bb.getInt()
-    val cols = bb.getInt()
-    if (packetLength != 9 + 8 * rows * cols) {
-      throw new IllegalArgumentException("Size " + rows + "x" + cols + " is wrong.")
-    }
-    val db = bb.asDoubleBuffer()
-    val ans = new Array[Array[Double]](rows.toInt)
-    for (i <- 0 until rows.toInt) {
-      ans(i) = new Array[Double](cols.toInt)
-      db.get(ans(i))
-    }
-    ans
-  }
-
-  private def serializeDoubleMatrix(doubles: Array[Array[Double]]): Array[Byte] = {
-    val rows = doubles.length
-    var cols = 0
-    if (rows > 0) {
-      cols = doubles(0).length
-    }
-    val bytes = new Array[Byte](9 + 8 * rows * cols)
-    val bb = ByteBuffer.wrap(bytes)
-    bb.order(ByteOrder.nativeOrder())
-    bb.put(DENSE_MATRIX_MAGIC)
-    bb.putInt(rows)
-    bb.putInt(cols)
-    val db = bb.asDoubleBuffer()
-    for (i <- 0 until rows) {
-      db.put(doubles(i))
-    }
-    bytes
-  }
-
-  private[python] def serializeLabeledPoint(p: LabeledPoint): Array[Byte] = {
-    val fb = serializeDoubleVector(p.features)
-    val bytes = new Array[Byte](1 + 8 + fb.length)
-    val bb = ByteBuffer.wrap(bytes)
-    bb.order(ByteOrder.nativeOrder())
-    bb.put(LABELED_POINT_MAGIC)
-    bb.putDouble(p.label)
-    bb.put(fb)
-    bytes
-  }
-
-  private[python] def deserializeLabeledPoint(bytes: Array[Byte]): LabeledPoint = {
-    require(bytes.length >= 9, "Byte array too short")
-    val magic = bytes(0)
-    if (magic != LABELED_POINT_MAGIC) {
-      throw new IllegalArgumentException("Magic " + magic + " is wrong.")
-    }
-    val labelBytes = ByteBuffer.wrap(bytes, 1, 8)
-    labelBytes.order(ByteOrder.nativeOrder())
-    val label = labelBytes.asDoubleBuffer().get(0)
-    LabeledPoint(label, deserializeDoubleVector(bytes, 9))
-  }
 
   /**
    * Loads and serializes labeled points saved with `RDD#saveAsTextFile`.
@@ -201,201 +67,211 @@ class PythonMLLibAPI extends Serializable {
   def loadLabeledPoints(
       jsc: JavaSparkContext,
       path: String,
-      minPartitions: Int): JavaRDD[Array[Byte]] =
-    MLUtils.loadLabeledPoints(jsc.sc, path, minPartitions).map(serializeLabeledPoint).toJavaRDD()
+      minPartitions: Int): JavaRDD[LabeledPoint] =
+    MLUtils.loadLabeledPoints(jsc.sc, path, minPartitions)
 
   private def trainRegressionModel(
-      trainFunc: (RDD[LabeledPoint], Vector) => GeneralizedLinearModel,
-      dataBytesJRDD: JavaRDD[Array[Byte]],
-      initialWeightsBA: Array[Byte]): java.util.LinkedList[java.lang.Object] = {
-    val data = dataBytesJRDD.rdd.map(deserializeLabeledPoint)
-    val initialWeights = deserializeDoubleVector(initialWeightsBA)
-    val model = trainFunc(data, initialWeights)
-    val ret = new java.util.LinkedList[java.lang.Object]()
-    ret.add(serializeDoubleVector(model.weights))
-    ret.add(model.intercept: java.lang.Double)
-    ret
+      learner: GeneralizedLinearAlgorithm[_ <: GeneralizedLinearModel],
+      data: JavaRDD[LabeledPoint],
+      initialWeights: Vector): JList[Object] = {
+    // Disable the uncached input warning because 'data' is a deliberately uncached MappedRDD.
+    learner.disableUncachedWarning()
+    val model = learner.run(data.rdd, initialWeights)
+    List(model.weights, model.intercept).map(_.asInstanceOf[Object]).asJava
   }
 
   /**
    * Java stub for Python mllib LinearRegressionWithSGD.train()
    */
   def trainLinearRegressionModelWithSGD(
-      dataBytesJRDD: JavaRDD[Array[Byte]],
+      data: JavaRDD[LabeledPoint],
       numIterations: Int,
       stepSize: Double,
       miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+      initialWeights: Vector,
+      regParam: Double,
+      regType: String,
+      intercept: Boolean): JList[Object] = {
+    val lrAlg = new LinearRegressionWithSGD()
+    lrAlg.setIntercept(intercept)
+    lrAlg.optimizer
+      .setNumIterations(numIterations)
+      .setRegParam(regParam)
+      .setStepSize(stepSize)
+      .setMiniBatchFraction(miniBatchFraction)
+    if (regType == "l2") {
+      lrAlg.optimizer.setUpdater(new SquaredL2Updater)
+    } else if (regType == "l1") {
+      lrAlg.optimizer.setUpdater(new L1Updater)
+    } else if (regType == null) {
+      lrAlg.optimizer.setUpdater(new SimpleUpdater)
+    } else {
+        throw new java.lang.IllegalArgumentException("Invalid value for 'regType' parameter."
+          + " Can only be initialized using the following string values: ['l1', 'l2', None].")
+    }
     trainRegressionModel(
-      (data, initialWeights) =>
-        LinearRegressionWithSGD.train(
-          data,
-          numIterations,
-          stepSize,
-          miniBatchFraction,
-          initialWeights),
-      dataBytesJRDD,
-      initialWeightsBA)
+      lrAlg,
+      data,
+      initialWeights)
   }
 
   /**
    * Java stub for Python mllib LassoWithSGD.train()
    */
   def trainLassoModelWithSGD(
-      dataBytesJRDD: JavaRDD[Array[Byte]],
+      data: JavaRDD[LabeledPoint],
       numIterations: Int,
       stepSize: Double,
       regParam: Double,
       miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+      initialWeights: Vector): JList[Object] = {
+    val lassoAlg = new LassoWithSGD()
+    lassoAlg.optimizer
+      .setNumIterations(numIterations)
+      .setRegParam(regParam)
+      .setStepSize(stepSize)
+      .setMiniBatchFraction(miniBatchFraction)
     trainRegressionModel(
-      (data, initialWeights) =>
-        LassoWithSGD.train(
-          data,
-          numIterations,
-          stepSize,
-          regParam,
-          miniBatchFraction,
-          initialWeights),
-      dataBytesJRDD,
-      initialWeightsBA)
+      lassoAlg,
+      data,
+      initialWeights)
   }
 
   /**
    * Java stub for Python mllib RidgeRegressionWithSGD.train()
    */
   def trainRidgeModelWithSGD(
-      dataBytesJRDD: JavaRDD[Array[Byte]],
+      data: JavaRDD[LabeledPoint],
       numIterations: Int,
       stepSize: Double,
       regParam: Double,
       miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+      initialWeights: Vector): JList[Object] = {
+    val ridgeAlg = new RidgeRegressionWithSGD()
+    ridgeAlg.optimizer
+      .setNumIterations(numIterations)
+      .setRegParam(regParam)
+      .setStepSize(stepSize)
+      .setMiniBatchFraction(miniBatchFraction)
     trainRegressionModel(
-      (data, initialWeights) =>
-        RidgeRegressionWithSGD.train(
-          data,
-          numIterations,
-          stepSize,
-          regParam,
-          miniBatchFraction,
-          initialWeights),
-      dataBytesJRDD,
-      initialWeightsBA)
+      ridgeAlg,
+      data,
+      initialWeights)
   }
 
   /**
    * Java stub for Python mllib SVMWithSGD.train()
    */
   def trainSVMModelWithSGD(
-      dataBytesJRDD: JavaRDD[Array[Byte]],
+      data: JavaRDD[LabeledPoint],
       numIterations: Int,
       stepSize: Double,
       regParam: Double,
       miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+      initialWeights: Vector,
+      regType: String,
+      intercept: Boolean): JList[Object] = {
+    val SVMAlg = new SVMWithSGD()
+    SVMAlg.setIntercept(intercept)
+    SVMAlg.optimizer
+      .setNumIterations(numIterations)
+      .setRegParam(regParam)
+      .setStepSize(stepSize)
+      .setMiniBatchFraction(miniBatchFraction)
+    if (regType == "l2") {
+      SVMAlg.optimizer.setUpdater(new SquaredL2Updater)
+    } else if (regType == "l1") {
+      SVMAlg.optimizer.setUpdater(new L1Updater)
+    } else if (regType == null) {
+      SVMAlg.optimizer.setUpdater(new SimpleUpdater)
+    } else {
+      throw new java.lang.IllegalArgumentException("Invalid value for 'regType' parameter."
+        + " Can only be initialized using the following string values: ['l1', 'l2', None].")
+    }
     trainRegressionModel(
-      (data, initialWeights) =>
-        SVMWithSGD.train(
-          data,
-          numIterations,
-          stepSize,
-          regParam,
-          miniBatchFraction,
-          initialWeights),
-      dataBytesJRDD,
-      initialWeightsBA)
+      SVMAlg,
+      data,
+      initialWeights)
   }
 
   /**
    * Java stub for Python mllib LogisticRegressionWithSGD.train()
    */
   def trainLogisticRegressionModelWithSGD(
-      dataBytesJRDD: JavaRDD[Array[Byte]],
+      data: JavaRDD[LabeledPoint],
       numIterations: Int,
       stepSize: Double,
       miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+      initialWeights: Vector,
+      regParam: Double,
+      regType: String,
+      intercept: Boolean): JList[Object] = {
+    val LogRegAlg = new LogisticRegressionWithSGD()
+    LogRegAlg.setIntercept(intercept)
+    LogRegAlg.optimizer
+      .setNumIterations(numIterations)
+      .setRegParam(regParam)
+      .setStepSize(stepSize)
+      .setMiniBatchFraction(miniBatchFraction)
+    if (regType == "l2") {
+      LogRegAlg.optimizer.setUpdater(new SquaredL2Updater)
+    } else if (regType == "l1") {
+      LogRegAlg.optimizer.setUpdater(new L1Updater)
+    } else if (regType == null) {
+      LogRegAlg.optimizer.setUpdater(new SimpleUpdater)
+    } else {
+      throw new java.lang.IllegalArgumentException("Invalid value for 'regType' parameter."
+        + " Can only be initialized using the following string values: ['l1', 'l2', None].")
+    }
     trainRegressionModel(
-      (data, initialWeights) =>
-        LogisticRegressionWithSGD.train(
-          data,
-          numIterations,
-          stepSize,
-          miniBatchFraction,
-          initialWeights),
-      dataBytesJRDD,
-      initialWeightsBA)
+      LogRegAlg,
+      data,
+      initialWeights)
   }
 
   /**
    * Java stub for NaiveBayes.train()
    */
   def trainNaiveBayes(
-      dataBytesJRDD: JavaRDD[Array[Byte]],
-      lambda: Double): java.util.List[java.lang.Object] = {
-    val data = dataBytesJRDD.rdd.map(deserializeLabeledPoint)
-    val model = NaiveBayes.train(data, lambda)
-    val ret = new java.util.LinkedList[java.lang.Object]()
-    ret.add(serializeDoubleVector(Vectors.dense(model.labels)))
-    ret.add(serializeDoubleVector(Vectors.dense(model.pi)))
-    ret.add(serializeDoubleMatrix(model.theta))
-    ret
+      data: JavaRDD[LabeledPoint],
+      lambda: Double): JList[Object] = {
+    val model = NaiveBayes.train(data.rdd, lambda)
+    List(Vectors.dense(model.labels), Vectors.dense(model.pi), model.theta).
+      map(_.asInstanceOf[Object]).asJava
   }
 
   /**
    * Java stub for Python mllib KMeans.train()
    */
   def trainKMeansModel(
-      dataBytesJRDD: JavaRDD[Array[Byte]],
+      data: JavaRDD[Vector],
       k: Int,
       maxIterations: Int,
       runs: Int,
-      initializationMode: String): java.util.List[java.lang.Object] = {
-    val data = dataBytesJRDD.rdd.map(bytes => deserializeDoubleVector(bytes))
-    val model = KMeans.train(data, k, maxIterations, runs, initializationMode)
-    val ret = new java.util.LinkedList[java.lang.Object]()
-    ret.add(serializeDoubleMatrix(model.clusterCenters.map(_.toArray)))
-    ret
-  }
-
-  /** Unpack a Rating object from an array of bytes */
-  private def unpackRating(ratingBytes: Array[Byte]): Rating = {
-    val bb = ByteBuffer.wrap(ratingBytes)
-    bb.order(ByteOrder.nativeOrder())
-    val user = bb.getInt()
-    val product = bb.getInt()
-    val rating = bb.getDouble()
-    new Rating(user, product, rating)
-  }
-
-  /** Unpack a tuple of Ints from an array of bytes */
-  private[spark] def unpackTuple(tupleBytes: Array[Byte]): (Int, Int) = {
-    val bb = ByteBuffer.wrap(tupleBytes)
-    bb.order(ByteOrder.nativeOrder())
-    val v1 = bb.getInt()
-    val v2 = bb.getInt()
-    (v1, v2)
-  }
-
-  /**
-    * Serialize a Rating object into an array of bytes.
-    * It can be deserialized using RatingDeserializer().
-    *
-    * @param rate the Rating object to serialize
-    * @return
-    */
-  private[spark] def serializeRating(rate: Rating): Array[Byte] = {
-    val len = 3
-    val bytes = new Array[Byte](4 + 8 * len)
-    val bb = ByteBuffer.wrap(bytes)
-    bb.order(ByteOrder.nativeOrder())
-    bb.putInt(len)
-    val db = bb.asDoubleBuffer()
-    db.put(rate.user.toDouble)
-    db.put(rate.product.toDouble)
-    db.put(rate.rating)
-    bytes
+      initializationMode: String): KMeansModel = {
+    val kMeansAlg = new KMeans()
+      .setK(k)
+      .setMaxIterations(maxIterations)
+      .setRuns(runs)
+      .setInitializationMode(initializationMode)
+      // Disable the uncached input warning because 'data' is a deliberately uncached MappedRDD.
+      .disableUncachedWarning()
+    kMeansAlg.run(data.rdd)
+  }
+
+  /**
+   * A Wrapper of MatrixFactorizationModel to provide helpfer method for Python
+   */
+  private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel)
+    extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) {
+
+    def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] =
+      predict(SerDe.asTupleRDD(userAndProducts.rdd))
+
+    def getUserFeatures = SerDe.fromTuple2RDD(userFeatures.asInstanceOf[RDD[(Any, Any)]])
+
+    def getProductFeatures = SerDe.fromTuple2RDD(productFeatures.asInstanceOf[RDD[(Any, Any)]])
+
   }
 
   /**
@@ -405,13 +281,25 @@ class PythonMLLibAPI extends Serializable {
    * the Py4J documentation.
    */
   def trainALSModel(
-      ratingsBytesJRDD: JavaRDD[Array[Byte]],
+      ratingsJRDD: JavaRDD[Rating],
       rank: Int,
       iterations: Int,
       lambda: Double,
-      blocks: Int): MatrixFactorizationModel = {
-    val ratings = ratingsBytesJRDD.rdd.map(unpackRating)
-    ALS.train(ratings, rank, iterations, lambda, blocks)
+      blocks: Int,
+      nonnegative: Boolean,
+      seed: java.lang.Long): MatrixFactorizationModel = {
+
+    val als = new ALS()
+      .setRank(rank)
+      .setIterations(iterations)
+      .setLambda(lambda)
+      .setBlocks(blocks)
+      .setNonnegative(nonnegative)
+
+    if (seed != null) als.setSeed(seed)
+
+    val model =  als.run(ratingsJRDD.rdd)
+    new MatrixFactorizationModelWrapper(model)
   }
 
   /**
@@ -421,13 +309,511 @@ class PythonMLLibAPI extends Serializable {
    * exit; see the Py4J documentation.
    */
   def trainImplicitALSModel(
-      ratingsBytesJRDD: JavaRDD[Array[Byte]],
+      ratingsJRDD: JavaRDD[Rating],
       rank: Int,
       iterations: Int,
       lambda: Double,
       blocks: Int,
-      alpha: Double): MatrixFactorizationModel = {
-    val ratings = ratingsBytesJRDD.rdd.map(unpackRating)
-    ALS.trainImplicit(ratings, rank, iterations, lambda, blocks, alpha)
+      alpha: Double,
+      nonnegative: Boolean,
+      seed: java.lang.Long): MatrixFactorizationModel = {
+
+    val als = new ALS()
+      .setImplicitPrefs(true)
+      .setRank(rank)
+      .setIterations(iterations)
+      .setLambda(lambda)
+      .setBlocks(blocks)
+      .setAlpha(alpha)
+      .setNonnegative(nonnegative)
+
+    if (seed != null) als.setSeed(seed)
+
+    val model =  als.run(ratingsJRDD.rdd)
+    new MatrixFactorizationModelWrapper(model)
+  }
+
+  /**
+   * Java stub for Normalizer.transform()
+   */
+  def normalizeVector(p: Double, vector: Vector): Vector = {
+    new Normalizer(p).transform(vector)
+  }
+
+  /**
+   * Java stub for Normalizer.transform()
+   */
+  def normalizeVector(p: Double, rdd: JavaRDD[Vector]): JavaRDD[Vector] = {
+    new Normalizer(p).transform(rdd)
+  }
+
+  /**
+   * Java stub for IDF.fit(). This stub returns a
+   * handle to the Java object instead of the content of the Java object.
+   * Extra care needs to be taken in the Python code to ensure it gets freed on
+   * exit; see the Py4J documentation.
+   */
+  def fitStandardScaler(
+      withMean: Boolean,
+      withStd: Boolean,
+      data: JavaRDD[Vector]): StandardScalerModel = {
+    new StandardScaler(withMean, withStd).fit(data.rdd)
+  }
+
+  /**
+   * Java stub for IDF.fit(). This stub returns a
+   * handle to the Java object instead of the content of the Java object.
+   * Extra care needs to be taken in the Python code to ensure it gets freed on
+   * exit; see the Py4J documentation.
+   */
+  def fitIDF(minDocFreq: Int, dataset: JavaRDD[Vector]): IDFModel = {
+    new IDF(minDocFreq).fit(dataset)
+  }
+
+  /**
+   * Java stub for Python mllib Word2Vec fit(). This stub returns a
+   * handle to the Java object instead of the content of the Java object.
+   * Extra care needs to be taken in the Python code to ensure it gets freed on
+   * exit; see the Py4J documentation.
+   * @param dataJRDD input JavaRDD
+   * @param vectorSize size of vector
+   * @param learningRate initial learning rate
+   * @param numPartitions number of partitions
+   * @param numIterations number of iterations
+   * @param seed initial seed for random generator
+   * @return A handle to java Word2VecModelWrapper instance at python side
+   */
+  def trainWord2Vec(
+      dataJRDD: JavaRDD[java.util.ArrayList[String]],
+      vectorSize: Int,
+      learningRate: Double,
+      numPartitions: Int,
+      numIterations: Int,
+      seed: Long): Word2VecModelWrapper = {
+    val data = dataJRDD.rdd.persist(StorageLevel.MEMORY_AND_DISK_SER)
+    val word2vec = new Word2Vec()
+      .setVectorSize(vectorSize)
+      .setLearningRate(learningRate)
+      .setNumPartitions(numPartitions)
+      .setNumIterations(numIterations)
+      .setSeed(seed)
+    val model = word2vec.fit(data)
+    data.unpersist()
+    new Word2VecModelWrapper(model)
+  }
+
+  private[python] class Word2VecModelWrapper(model: Word2VecModel) {
+    def transform(word: String): Vector = {
+      model.transform(word)
+    }
+
+    /**
+     * Transforms an RDD of words to its vector representation
+     * @param rdd an RDD of words
+     * @return an RDD of vector representations of words
+     */
+    def transform(rdd: JavaRDD[String]): JavaRDD[Vector] = {
+      rdd.rdd.map(model.transform)
+    }
+
+    def findSynonyms(word: String, num: Int): JList[Object] = {
+      val vec = transform(word)
+      findSynonyms(vec, num)
+    }
+
+    def findSynonyms(vector: Vector, num: Int): JList[Object] = {
+      val result = model.findSynonyms(vector, num)
+      val similarity = Vectors.dense(result.map(_._2))
+      val words = result.map(_._1)
+      List(words, similarity).map(_.asInstanceOf[Object]).asJava
+    }
+  }
+
+  /**
+   * Java stub for Python mllib DecisionTree.train().
+   * This stub returns a handle to the Java object instead of the content of the Java object.
+   * Extra care needs to be taken in the Python code to ensure it gets freed on exit;
+   * see the Py4J documentation.
+   * @param data  Training data
+   * @param categoricalFeaturesInfo  Categorical features info, as Java map
+   */
+  def trainDecisionTreeModel(
+      data: JavaRDD[LabeledPoint],
+      algoStr: String,
+      numClasses: Int,
+      categoricalFeaturesInfo: JMap[Int, Int],
+      impurityStr: String,
+      maxDepth: Int,
+      maxBins: Int,
+      minInstancesPerNode: Int,
+      minInfoGain: Double): DecisionTreeModel = {
+
+    val algo = Algo.fromString(algoStr)
+    val impurity = Impurities.fromString(impurityStr)
+
+    val strategy = new Strategy(
+      algo = algo,
+      impurity = impurity,
+      maxDepth = maxDepth,
+      numClassesForClassification = numClasses,
+      maxBins = maxBins,
+      categoricalFeaturesInfo = categoricalFeaturesInfo.asScala.toMap,
+      minInstancesPerNode = minInstancesPerNode,
+      minInfoGain = minInfoGain)
+
+    DecisionTree.train(data.rdd, strategy)
+  }
+
+  /**
+   * Java stub for mllib Statistics.colStats(X: RDD[Vector]).
+   * TODO figure out return type.
+   */
+  def colStats(rdd: JavaRDD[Vector]): MultivariateStatisticalSummary = {
+    Statistics.colStats(rdd.rdd)
+  }
+
+  /**
+   * Java stub for mllib Statistics.corr(X: RDD[Vector], method: String).
+   * Returns the correlation matrix serialized into a byte array understood by deserializers in
+   * pyspark.
+   */
+  def corr(x: JavaRDD[Vector], method: String): Matrix = {
+    Statistics.corr(x.rdd, getCorrNameOrDefault(method))
+  }
+
+  /**
+   * Java stub for mllib Statistics.corr(x: RDD[Double], y: RDD[Double], method: String).
+   */
+  def corr(x: JavaRDD[Double], y: JavaRDD[Double], method: String): Double = {
+    Statistics.corr(x.rdd, y.rdd, getCorrNameOrDefault(method))
+  }
+
+  /**
+   * Java stub for mllib Statistics.chiSqTest()
+   */
+  def chiSqTest(observed: Vector, expected: Vector): ChiSqTestResult = {
+    if (expected == null) {
+      Statistics.chiSqTest(observed)
+    } else {
+      Statistics.chiSqTest(observed, expected)
+    }
+  }
+
+  /**
+   * Java stub for mllib Statistics.chiSqTest(observed: Matrix)
+   */
+  def chiSqTest(observed: Matrix): ChiSqTestResult = {
+    Statistics.chiSqTest(observed)
+  }
+
+  /**
+   * Java stub for mllib Statistics.chiSqTest(RDD[LabelPoint])
+   */
+  def chiSqTest(data: JavaRDD[LabeledPoint]): Array[ChiSqTestResult] = {
+    Statistics.chiSqTest(data.rdd)
+  }
+
+  // used by the corr methods to retrieve the name of the correlation method passed in via pyspark
+  private def getCorrNameOrDefault(method: String) = {
+    if (method == null) CorrelationNames.defaultCorrName else method
+  }
+
+  // Used by the *RDD methods to get default seed if not passed in from pyspark
+  private def getSeedOrDefault(seed: java.lang.Long): Long = {
+    if (seed == null) Utils.random.nextLong else seed
+  }
+
+  // Used by *RDD methods to get default numPartitions if not passed in from pyspark
+  private def getNumPartitionsOrDefault(numPartitions: java.lang.Integer,
+      jsc: JavaSparkContext): Int = {
+    if (numPartitions == null) {
+      jsc.sc.defaultParallelism
+    } else {
+      numPartitions
+    }
+  }
+
+  // Note: for the following methods, numPartitions and seed are boxed to allow nulls to be passed
+  // in for either argument from pyspark
+
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.uniformRDD()
+   */
+  def uniformRDD(jsc: JavaSparkContext,
+      size: Long,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Double] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.uniformRDD(jsc.sc, size, parts, s)
+  }
+
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.normalRDD()
+   */
+  def normalRDD(jsc: JavaSparkContext,
+      size: Long,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Double] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.normalRDD(jsc.sc, size, parts, s)
+  }
+
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.poissonRDD()
+   */
+  def poissonRDD(jsc: JavaSparkContext,
+      mean: Double,
+      size: Long,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Double] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.poissonRDD(jsc.sc, mean, size, parts, s)
+  }
+
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.uniformVectorRDD()
+   */
+  def uniformVectorRDD(jsc: JavaSparkContext,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Vector] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.uniformVectorRDD(jsc.sc, numRows, numCols, parts, s)
+  }
+
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.normalVectorRDD()
+   */
+  def normalVectorRDD(jsc: JavaSparkContext,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Vector] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.normalVectorRDD(jsc.sc, numRows, numCols, parts, s)
+  }
+
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.poissonVectorRDD()
+   */
+  def poissonVectorRDD(jsc: JavaSparkContext,
+      mean: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Vector] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.poissonVectorRDD(jsc.sc, mean, numRows, numCols, parts, s)
+  }
+
+}
+
+/**
+ * SerDe utility functions for PythonMLLibAPI.
+ */
+private[spark] object SerDe extends Serializable {
+
+  val PYSPARK_PACKAGE = "pyspark.mllib"
+
+  /**
+   * Base class used for pickle
+   */
+  private[python] abstract class BasePickler[T: ClassTag]
+    extends IObjectPickler with IObjectConstructor {
+
+    private val cls = implicitly[ClassTag[T]].runtimeClass
+    private val module = PYSPARK_PACKAGE + "." + cls.getName.split('.')(4)
+    private val name = cls.getSimpleName
+
+    // register this to Pickler and Unpickler
+    def register(): Unit = {
+      Pickler.registerCustomPickler(this.getClass, this)
+      Pickler.registerCustomPickler(cls, this)
+      Unpickler.registerConstructor(module, name, this)
+    }
+
+    def pickle(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
+      if (obj == this) {
+        out.write(Opcodes.GLOBAL)
+        out.write((module + "\n" + name + "\n").getBytes())
+      } else {
+        pickler.save(this)  // it will be memorized by Pickler
+        saveState(obj, out, pickler)
+        out.write(Opcodes.REDUCE)
+      }
+    }
+
+    private[python] def saveObjects(out: OutputStream, pickler: Pickler, objects: Any*) = {
+      if (objects.length == 0 || objects.length > 3) {
+        out.write(Opcodes.MARK)
+      }
+      objects.foreach(pickler.save)
+      val code = objects.length match {
+        case 1 => Opcodes.TUPLE1
+        case 2 => Opcodes.TUPLE2
+        case 3 => Opcodes.TUPLE3
+        case _ => Opcodes.TUPLE
+      }
+      out.write(code)
+    }
+
+    private[python] def saveState(obj: Object, out: OutputStream, pickler: Pickler)
+  }
+
+  // Pickler for DenseVector
+  private[python] class DenseVectorPickler extends BasePickler[DenseVector] {
+
+    def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
+      val vector: DenseVector = obj.asInstanceOf[DenseVector]
+      saveObjects(out, pickler, vector.toArray)
+    }
+
+    def construct(args: Array[Object]): Object = {
+      require(args.length == 1)
+      if (args.length != 1) {
+        throw new PickleException("should be 1")
+      }
+      new DenseVector(args(0).asInstanceOf[Array[Double]])
+    }
+  }
+
+  // Pickler for DenseMatrix
+  private[python] class DenseMatrixPickler extends BasePickler[DenseMatrix] {
+
+    def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
+      val m: DenseMatrix = obj.asInstanceOf[DenseMatrix]
+      saveObjects(out, pickler, m.numRows, m.numCols, m.values)
+    }
+
+    def construct(args: Array[Object]): Object = {
+      if (args.length != 3) {
+        throw new PickleException("should be 3")
+      }
+      new DenseMatrix(args(0).asInstanceOf[Int], args(1).asInstanceOf[Int],
+        args(2).asInstanceOf[Array[Double]])
+    }
+  }
+
+  // Pickler for SparseVector
+  private[python] class SparseVectorPickler extends BasePickler[SparseVector] {
+
+    def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
+      val v: SparseVector = obj.asInstanceOf[SparseVector]
+      saveObjects(out, pickler, v.size, v.indices, v.values)
+    }
+
+    def construct(args: Array[Object]): Object = {
+      if (args.length != 3) {
+        throw new PickleException("should be 3")
+      }
+      new SparseVector(args(0).asInstanceOf[Int], args(1).asInstanceOf[Array[Int]],
+        args(2).asInstanceOf[Array[Double]])
+    }
+  }
+
+  // Pickler for LabeledPoint
+  private[python] class LabeledPointPickler extends BasePickler[LabeledPoint] {
+
+    def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
+      val point: LabeledPoint = obj.asInstanceOf[LabeledPoint]
+      saveObjects(out, pickler, point.label, point.features)
+    }
+
+    def construct(args: Array[Object]): Object = {
+      if (args.length != 2) {
+        throw new PickleException("should be 2")
+      }
+      new LabeledPoint(args(0).asInstanceOf[Double], args(1).asInstanceOf[Vector])
+    }
+  }
+
+  // Pickler for Rating
+  private[python] class RatingPickler extends BasePickler[Rating] {
+
+    def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
+      val rating: Rating = obj.asInstanceOf[Rating]
+      saveObjects(out, pickler, rating.user, rating.product, rating.rating)
+    }
+
+    def construct(args: Array[Object]): Object = {
+      if (args.length != 3) {
+        throw new PickleException("should be 3")
+      }
+      new Rating(args(0).asInstanceOf[Int], args(1).asInstanceOf[Int],
+        args(2).asInstanceOf[Double])
+    }
+  }
+
+  var initialized = false
+  // This should be called before trying to serialize any above classes
+  // In cluster mode, this should be put in the closure
+  def initialize(): Unit = {
+    SerDeUtil.initialize()
+    synchronized {
+      if (!initialized) {
+        new DenseVectorPickler().register()
+        new DenseMatrixPickler().register()
+        new SparseVectorPickler().register()
+        new LabeledPointPickler().register()
+        new RatingPickler().register()
+        initialized = true
+      }
+    }
+  }
+  // will not called in Executor automatically
+  initialize()
+
+  def dumps(obj: AnyRef): Array[Byte] = {
+    new Pickler().dumps(obj)
+  }
+
+  def loads(bytes: Array[Byte]): AnyRef = {
+    new Unpickler().loads(bytes)
+  }
+
+  /* convert object into Tuple */
+  def asTupleRDD(rdd: RDD[Array[Any]]): RDD[(Int, Int)] = {
+    rdd.map(x => (x(0).asInstanceOf[Int], x(1).asInstanceOf[Int]))
+  }
+
+  /* convert RDD[Tuple2[,]] to RDD[Array[Any]] */
+  def fromTuple2RDD(rdd: RDD[(Any, Any)]): RDD[Array[Any]]  = {
+    rdd.map(x => Array(x._1, x._2))
+  }
+
+  /**
+   * Convert an RDD of Java objects to an RDD of serialized Python objects, that is usable by
+   * PySpark.
+   */
+  def javaToPython(jRDD: JavaRDD[Any]): JavaRDD[Array[Byte]] = {
+    jRDD.rdd.mapPartitions { iter =>
+      initialize()  // let it called in executor
+      new SerDeUtil.AutoBatchedPickler(iter)
+    }
+  }
+
+  /**
+   * Convert an RDD of serialized Python objects to RDD of objects, that is usable by PySpark.
+   */
+  def pythonToJava(pyRDD: JavaRDD[Array[Byte]], batched: Boolean): JavaRDD[Any] = {
+    pyRDD.rdd.mapPartitions { iter =>
+      initialize()  // let it called in executor
+      val unpickle = new Unpickler
+      iter.flatMap { row =>
+        val obj = unpickle.loads(row)
+        if (batched) {
+          obj.asInstanceOf[JArrayList[_]].asScala
+        } else {
+          Seq(obj)
+        }
+      }
+    }.toJavaRDD()
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/package.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/package.scala
index 87bdc8558aaf5..c67a6d3ae6cce 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/package.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.api
 
 /**
- * Internal support for MLLib Python API.
+ * Internal support for MLlib Python API.
  *
  * @see [[org.apache.spark.mllib.api.python.PythonMLLibAPI]]
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 90aa8ac998ba9..18b95f1edc0b0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -30,7 +30,7 @@ import org.apache.spark.rdd.RDD
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
  */
-class LogisticRegressionModel private[mllib] (
+class LogisticRegressionModel (
     override val weights: Vector,
     override val intercept: Double)
   extends GeneralizedLinearModel(weights, intercept) with ClassificationModel with Serializable {
@@ -62,7 +62,7 @@ class LogisticRegressionModel private[mllib] (
   override protected def predictPoint(dataMatrix: Vector, weightMatrix: Vector,
       intercept: Double) = {
     val margin = weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
-    val score = 1.0/ (1.0 + math.exp(-margin))
+    val score = 1.0 / (1.0 + math.exp(-margin))
     threshold match {
       case Some(t) => if (score < t) 0.0 else 1.0
       case None => score
@@ -71,8 +71,11 @@ class LogisticRegressionModel private[mllib] (
 }
 
 /**
- * Train a classification model for Logistic Regression using Stochastic Gradient Descent.
- * NOTE: Labels used in Logistic Regression should be {0, 1}
+ * Train a classification model for Logistic Regression using Stochastic Gradient Descent. By
+ * default L2 regularization is used, which can be changed via
+ * [[LogisticRegressionWithSGD.optimizer]].
+ * NOTE: Labels used in Logistic Regression should be {0, 1}.
+ * Using [[LogisticRegressionWithLBFGS]] is recommended over this.
  */
 class LogisticRegressionWithSGD private (
     private var stepSize: Double,
@@ -82,7 +85,7 @@ class LogisticRegressionWithSGD private (
   extends GeneralizedLinearAlgorithm[LogisticRegressionModel] with Serializable {
 
   private val gradient = new LogisticGradient()
-  private val updater = new SimpleUpdater()
+  private val updater = new SquaredL2Updater()
   override val optimizer = new GradientDescent(gradient, updater)
     .setStepSize(stepSize)
     .setNumIterations(numIterations)
@@ -91,9 +94,10 @@ class LogisticRegressionWithSGD private (
   override protected val validators = List(DataValidators.binaryLabelValidator)
 
   /**
-   * Construct a LogisticRegression object with default parameters
+   * Construct a LogisticRegression object with default parameters: {stepSize: 1.0,
+   * numIterations: 100, regParm: 0.01, miniBatchFraction: 1.0}.
    */
-  def this() = this(1.0, 100, 0.0, 1.0)
+  def this() = this(1.0, 100, 0.01, 1.0)
 
   override protected def createModel(weights: Vector, intercept: Double) = {
     new LogisticRegressionModel(weights, intercept)
@@ -101,7 +105,7 @@ class LogisticRegressionWithSGD private (
 }
 
 /**
- * Top-level methods for calling Logistic Regression.
+ * Top-level methods for calling Logistic Regression using Stochastic Gradient Descent.
  * NOTE: Labels used in Logistic Regression should be {0, 1}
  */
 object LogisticRegressionWithSGD {
@@ -188,3 +192,22 @@ object LogisticRegressionWithSGD {
     train(input, numIterations, 1.0, 1.0)
   }
 }
+
+/**
+ * Train a classification model for Logistic Regression using Limited-memory BFGS.
+ * Standard feature scaling and L2 regularization are used by default.
+ * NOTE: Labels used in Logistic Regression should be {0, 1}
+ */
+class LogisticRegressionWithLBFGS
+  extends GeneralizedLinearAlgorithm[LogisticRegressionModel] with Serializable {
+
+  this.setFeatureScaling(true)
+
+  override val optimizer = new LBFGS(new LogisticGradient, new SquaredL2Updater)
+
+  override protected val validators = List(DataValidators.binaryLabelValidator)
+
+  override protected def createModel(weights: Vector, intercept: Double) = {
+    new LogisticRegressionModel(weights, intercept)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index b6e0c4a80e27b..8c8e4a161aa5b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -19,9 +19,9 @@ package org.apache.spark.mllib.classification
 
 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum}
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkException, Logging}
 import org.apache.spark.SparkContext._
-import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.rdd.RDD
 
@@ -54,7 +54,13 @@ class NaiveBayesModel private[mllib] (
     }
   }
 
-  override def predict(testData: RDD[Vector]): RDD[Double] = testData.map(predict)
+  override def predict(testData: RDD[Vector]): RDD[Double] = {
+    val bcModel = testData.context.broadcast(this)
+    testData.mapPartitions { iter =>
+      val model = bcModel.value
+      iter.map(model.predict)
+    }
+  }
 
   override def predict(testData: Vector): Double = {
     labels(brzArgmax(brzPi + brzTheta * testData.toBreeze))
@@ -67,7 +73,7 @@ class NaiveBayesModel private[mllib] (
  * This is the Multinomial NB ([[http://tinyurl.com/lsdw6p]]) which can handle all kinds of
  * discrete data.  For example, by converting documents into TF-IDF vectors, it can be used for
  * document classification.  By making every vector a 0-1 vector, it can also be used as
- * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]).
+ * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]). The input feature values must be nonnegative.
  */
 class NaiveBayes private (private var lambda: Double) extends Serializable with Logging {
 
@@ -85,12 +91,30 @@ class NaiveBayes private (private var lambda: Double) extends Serializable with
    * @param data RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    */
   def run(data: RDD[LabeledPoint]) = {
+    val requireNonnegativeValues: Vector => Unit = (v: Vector) => {
+      val values = v match {
+        case sv: SparseVector =>
+          sv.values
+        case dv: DenseVector =>
+          dv.values
+      }
+      if (!values.forall(_ >= 0.0)) {
+        throw new SparkException(s"Naive Bayes requires nonnegative feature values but found $v.")
+      }
+    }
+
     // Aggregates term frequencies per label.
     // TODO: Calling combineByKey and collect creates two stages, we can implement something
     // TODO: similar to reduceByKeyLocally to save one stage.
     val aggregated = data.map(p => (p.label, p.features)).combineByKey[(Long, BDV[Double])](
-      createCombiner = (v: Vector) => (1L, v.toBreeze.toDenseVector),
-      mergeValue = (c: (Long, BDV[Double]), v: Vector) => (c._1 + 1L, c._2 += v.toBreeze),
+      createCombiner = (v: Vector) => {
+        requireNonnegativeValues(v)
+        (1L, v.toBreeze.toDenseVector)
+      },
+      mergeValue = (c: (Long, BDV[Double]), v: Vector) => {
+        requireNonnegativeValues(v)
+        (c._1 + 1L, c._2 += v.toBreeze)
+      },
       mergeCombiners = (c1: (Long, BDV[Double]), c2: (Long, BDV[Double])) =>
         (c1._1 + c2._1, c1._2 += c2._2)
     ).collect()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 316ecd713b715..ab9515b2a6db8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -30,7 +30,7 @@ import org.apache.spark.rdd.RDD
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
  */
-class SVMModel private[mllib] (
+class SVMModel (
     override val weights: Vector,
     override val intercept: Double)
   extends GeneralizedLinearModel(weights, intercept) with ClassificationModel with Serializable {
@@ -72,7 +72,8 @@ class SVMModel private[mllib] (
 }
 
 /**
- * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent.
+ * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent. By default L2
+ * regularization is used, which can be changed via [[SVMWithSGD.optimizer]].
  * NOTE: Labels used in SVM should be {0, 1}.
  */
 class SVMWithSGD private (
@@ -92,9 +93,10 @@ class SVMWithSGD private (
   override protected val validators = List(DataValidators.binaryLabelValidator)
 
   /**
-   * Construct a SVM object with default parameters
+   * Construct a SVM object with default parameters: {stepSize: 1.0, numIterations: 100,
+   * regParm: 0.01, miniBatchFraction: 1.0}.
    */
-  def this() = this(1.0, 100, 1.0, 1.0)
+  def this() = this(1.0, 100, 0.01, 1.0)
 
   override protected def createModel(weights: Vector, intercept: Double) = {
     new SVMModel(weights, intercept)
@@ -185,6 +187,6 @@ object SVMWithSGD {
    * @return a SVMModel which has the weights and offset from training.
    */
   def train(input: RDD[LabeledPoint], numIterations: Int): SVMModel = {
-    train(input, numIterations, 1.0, 1.0, 1.0)
+    train(input, numIterations, 1.0, 0.01, 1.0)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index de22fbb6ffc10..7443f232ec3e7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -27,6 +27,7 @@ import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.random.XORShiftRandom
 
 /**
@@ -52,13 +53,13 @@ class KMeans private (
   def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4)
 
   /** Set the number of clusters to create (k). Default: 2. */
-  def setK(k: Int): KMeans = {
+  def setK(k: Int): this.type = {
     this.k = k
     this
   }
 
   /** Set maximum number of iterations to run. Default: 20. */
-  def setMaxIterations(maxIterations: Int): KMeans = {
+  def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
     this
   }
@@ -68,7 +69,7 @@ class KMeans private (
    * initial cluster centers, or "k-means||" to use a parallel variant of k-means++
    * (Bahmani et al., Scalable K-Means++, VLDB 2012). Default: k-means||.
    */
-  def setInitializationMode(initializationMode: String): KMeans = {
+  def setInitializationMode(initializationMode: String): this.type = {
     if (initializationMode != KMeans.RANDOM && initializationMode != KMeans.K_MEANS_PARALLEL) {
       throw new IllegalArgumentException("Invalid initialization mode: " + initializationMode)
     }
@@ -83,7 +84,7 @@ class KMeans private (
    * return the best clustering found over any run. Default: 1.
    */
   @Experimental
-  def setRuns(runs: Int): KMeans = {
+  def setRuns(runs: Int): this.type = {
     if (runs <= 0) {
       throw new IllegalArgumentException("Number of runs must be positive")
     }
@@ -95,7 +96,7 @@ class KMeans private (
    * Set the number of steps for the k-means|| initialization mode. This is an advanced
    * setting -- the default of 5 is almost always enough. Default: 5.
    */
-  def setInitializationSteps(initializationSteps: Int): KMeans = {
+  def setInitializationSteps(initializationSteps: Int): this.type = {
     if (initializationSteps <= 0) {
       throw new IllegalArgumentException("Number of initialization steps must be positive")
     }
@@ -107,16 +108,31 @@ class KMeans private (
    * Set the distance threshold within which we've consider centers to have converged.
    * If all centers move less than this Euclidean distance, we stop iterating one run.
    */
-  def setEpsilon(epsilon: Double): KMeans = {
+  def setEpsilon(epsilon: Double): this.type = {
     this.epsilon = epsilon
     this
   }
 
+  /** Whether a warning should be logged if the input RDD is uncached. */
+  private var warnOnUncachedInput = true
+
+  /** Disable warnings about uncached input. */
+  private[spark] def disableUncachedWarning(): this.type = {
+    warnOnUncachedInput = false
+    this
+  }  
+
   /**
    * Train a K-means model on the given set of points; `data` should be cached for high
    * performance, because this is an iterative algorithm.
    */
   def run(data: RDD[Vector]): KMeansModel = {
+
+    if (warnOnUncachedInput && data.getStorageLevel == StorageLevel.NONE) {
+      logWarning("The input data is not directly cached, which may hurt performance if its"
+        + " parent RDDs are also uncached.")
+    }
+
     // Compute squared norms and cache them.
     val norms = data.map(v => breezeNorm(v.toBreeze, 2.0))
     norms.persist()
@@ -125,6 +141,12 @@ class KMeans private (
     }
     val model = runBreeze(breezeData)
     norms.unpersist()
+
+    // Warn at the end of the run as well, for increased visibility.
+    if (warnOnUncachedInput && data.getStorageLevel == StorageLevel.NONE) {
+      logWarning("The input data was not directly cached, which may hurt performance if its"
+        + " parent RDDs are also uncached.")
+    }
     model
   }
 
@@ -165,18 +187,21 @@ class KMeans private (
       val activeCenters = activeRuns.map(r => centers(r)).toArray
       val costAccums = activeRuns.map(_ => sc.accumulator(0.0))
 
+      val bcActiveCenters = sc.broadcast(activeCenters)
+
       // Find the sum and count of points mapping to each center
       val totalContribs = data.mapPartitions { points =>
-        val runs = activeCenters.length
-        val k = activeCenters(0).length
-        val dims = activeCenters(0)(0).vector.length
+        val thisActiveCenters = bcActiveCenters.value
+        val runs = thisActiveCenters.length
+        val k = thisActiveCenters(0).length
+        val dims = thisActiveCenters(0)(0).vector.length
 
         val sums = Array.fill(runs, k)(BDV.zeros[Double](dims).asInstanceOf[BV[Double]])
         val counts = Array.fill(runs, k)(0L)
 
         points.foreach { point =>
           (0 until runs).foreach { i =>
-            val (bestCenter, cost) = KMeans.findClosest(activeCenters(i), point)
+            val (bestCenter, cost) = KMeans.findClosest(thisActiveCenters(i), point)
             costAccums(i) += cost
             sums(i)(bestCenter) += point.vector
             counts(i)(bestCenter) += 1
@@ -264,16 +289,17 @@ class KMeans private (
     // to their squared distance from that run's current centers
     var step = 0
     while (step < initializationSteps) {
+      val bcCenters = data.context.broadcast(centers)
       val sumCosts = data.flatMap { point =>
         (0 until runs).map { r =>
-          (r, KMeans.pointCost(centers(r), point))
+          (r, KMeans.pointCost(bcCenters.value(r), point))
         }
       }.reduceByKey(_ + _).collectAsMap()
       val chosen = data.mapPartitionsWithIndex { (index, points) =>
         val rand = new XORShiftRandom(seed ^ (step << 16) ^ index)
         points.flatMap { p =>
           (0 until runs).filter { r =>
-            rand.nextDouble() < 2.0 * KMeans.pointCost(centers(r), p) * k / sumCosts(r)
+            rand.nextDouble() < 2.0 * KMeans.pointCost(bcCenters.value(r), p) * k / sumCosts(r)
           }.map((_, p))
         }
       }.collect()
@@ -286,9 +312,10 @@ class KMeans private (
     // Finally, we might have a set of more than k candidate centers for each run; weigh each
     // candidate by the number of points in the dataset mapping to it and run a local k-means++
     // on the weighted centers to pick just k of them
+    val bcCenters = data.context.broadcast(centers)
     val weightMap = data.flatMap { p =>
       (0 until runs).map { r =>
-        ((r, KMeans.findClosest(centers(r), p)._1), 1.0)
+        ((r, KMeans.findClosest(bcCenters.value(r), p)._1), 1.0)
       }
     }.reduceByKey(_ + _).collectAsMap()
     val finalCenters = (0 until runs).map { r =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index fba21aefaaacd..12a3d91cd31a6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -25,7 +25,7 @@ import org.apache.spark.mllib.linalg.Vector
 /**
  * A clustering model for K-means. Each point belongs to the cluster with the closest center.
  */
-class KMeansModel private[mllib] (val clusterCenters: Array[Vector]) extends Serializable {
+class KMeansModel (val clusterCenters: Array[Vector]) extends Serializable {
 
   /** Total number of clusters. */
   def k: Int = clusterCenters.length
@@ -38,7 +38,8 @@ class KMeansModel private[mllib] (val clusterCenters: Array[Vector]) extends Ser
   /** Maps given points to their cluster indices. */
   def predict(points: RDD[Vector]): RDD[Int] = {
     val centersWithNorm = clusterCentersWithNorm
-    points.map(p => KMeans.findClosest(centersWithNorm, new BreezeVectorWithNorm(p))._1)
+    val bcCentersWithNorm = points.context.broadcast(centersWithNorm)
+    points.map(p => KMeans.findClosest(bcCentersWithNorm.value, new BreezeVectorWithNorm(p))._1)
   }
 
   /** Maps given points to their cluster indices. */
@@ -51,7 +52,8 @@ class KMeansModel private[mllib] (val clusterCenters: Array[Vector]) extends Ser
    */
   def computeCost(data: RDD[Vector]): Double = {
     val centersWithNorm = clusterCentersWithNorm
-    data.map(p => KMeans.pointCost(centersWithNorm, new BreezeVectorWithNorm(p))).sum()
+    val bcCentersWithNorm = data.context.broadcast(centersWithNorm)
+    data.map(p => KMeans.pointCost(bcCentersWithNorm.value, new BreezeVectorWithNorm(p))).sum()
   }
 
   private def clusterCentersWithNorm: Iterable[BreezeVectorWithNorm] =
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
index 2e3a4ce783de7..f0722d7c14a46 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
@@ -59,7 +59,13 @@ private[mllib] object LocalKMeans extends Logging {
         cumulativeScore += weights(j) * KMeans.pointCost(curCenters, points(j))
         j += 1
       }
-      centers(i) = points(j-1).toDense
+      if (j == 0) {
+        logWarning("kMeansPlusPlus initialization ran out of distinct points for centers." +
+          s" Using duplicate point for center k = $i.")
+        centers(i) = points(0).toDense
+      } else {
+        centers(i) = points(j - 1).toDense
+      }
     }
 
     // Run up to maxIterations iterations of Lloyd's algorithm
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
new file mode 100644
index 0000000000000..6189dce9b27da
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.Logging
+import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.dstream.DStream
+import org.apache.spark.util.Utils
+import org.apache.spark.util.random.XORShiftRandom
+
+/**
+ * :: DeveloperApi ::
+ * StreamingKMeansModel extends MLlib's KMeansModel for streaming
+ * algorithms, so it can keep track of a continuously updated weight
+ * associated with each cluster, and also update the model by
+ * doing a single iteration of the standard k-means algorithm.
+ *
+ * The update algorithm uses the "mini-batch" KMeans rule,
+ * generalized to incorporate forgetfullness (i.e. decay).
+ * The update rule (for each cluster) is:
+ *
+ * c_t+1 = [(c_t * n_t * a) + (x_t * m_t)] / [n_t + m_t]
+ * n_t+t = n_t * a + m_t
+ *
+ * Where c_t is the previously estimated centroid for that cluster,
+ * n_t is the number of points assigned to it thus far, x_t is the centroid
+ * estimated on the current batch, and m_t is the number of points assigned
+ * to that centroid in the current batch.
+ *
+ * The decay factor 'a' scales the contribution of the clusters as estimated thus far,
+ * by applying a as a discount weighting on the current point when evaluating
+ * new incoming data. If a=1, all batches are weighted equally. If a=0, new centroids
+ * are determined entirely by recent data. Lower values correspond to
+ * more forgetting.
+ *
+ * Decay can optionally be specified by a half life and associated
+ * time unit. The time unit can either be a batch of data or a single
+ * data point. Considering data arrived at time t, the half life h is defined
+ * such that at time t + h the discount applied to the data from t is 0.5.
+ * The definition remains the same whether the time unit is given
+ * as batches or points.
+ *
+ */
+@DeveloperApi
+class StreamingKMeansModel(
+    override val clusterCenters: Array[Vector],
+    val clusterWeights: Array[Double]) extends KMeansModel(clusterCenters) with Logging {
+
+  /** Perform a k-means update on a batch of data. */
+  def update(data: RDD[Vector], decayFactor: Double, timeUnit: String): StreamingKMeansModel = {
+
+    // find nearest cluster to each point
+    val closest = data.map(point => (this.predict(point), (point, 1L)))
+
+    // get sums and counts for updating each cluster
+    val mergeContribs: ((Vector, Long), (Vector, Long)) => (Vector, Long) = (p1, p2) => {
+      BLAS.axpy(1.0, p2._1, p1._1)
+      (p1._1, p1._2 + p2._2)
+    }
+    val dim = clusterCenters(0).size
+    val pointStats: Array[(Int, (Vector, Long))] = closest
+      .aggregateByKey((Vectors.zeros(dim), 0L))(mergeContribs, mergeContribs)
+      .collect()
+
+    val discount = timeUnit match {
+      case StreamingKMeans.BATCHES => decayFactor
+      case StreamingKMeans.POINTS =>
+        val numNewPoints = pointStats.view.map { case (_, (_, n)) =>
+          n
+        }.sum
+        math.pow(decayFactor, numNewPoints)
+    }
+
+    // apply discount to weights
+    BLAS.scal(discount, Vectors.dense(clusterWeights))
+
+    // implement update rule
+    pointStats.foreach { case (label, (sum, count)) =>
+      val centroid = clusterCenters(label)
+
+      val updatedWeight = clusterWeights(label) + count
+      val lambda = count / math.max(updatedWeight, 1e-16)
+
+      clusterWeights(label) = updatedWeight
+      BLAS.scal(1.0 - lambda, centroid)
+      BLAS.axpy(lambda / count, sum, centroid)
+
+      // display the updated cluster centers
+      val display = clusterCenters(label).size match {
+        case x if x > 100 => centroid.toArray.take(100).mkString("[", ",", "...")
+        case _ => centroid.toArray.mkString("[", ",", "]")
+      }
+
+      logInfo(s"Cluster $label updated with weight $updatedWeight and centroid: $display")
+    }
+
+    // Check whether the smallest cluster is dying. If so, split the largest cluster.
+    val weightsWithIndex = clusterWeights.view.zipWithIndex
+    val (maxWeight, largest) = weightsWithIndex.maxBy(_._1)
+    val (minWeight, smallest) = weightsWithIndex.minBy(_._1)
+    if (minWeight < 1e-8 * maxWeight) {
+      logInfo(s"Cluster $smallest is dying. Split the largest cluster $largest into two.")
+      val weight = (maxWeight + minWeight) / 2.0
+      clusterWeights(largest) = weight
+      clusterWeights(smallest) = weight
+      val largestClusterCenter = clusterCenters(largest)
+      val smallestClusterCenter = clusterCenters(smallest)
+      var j = 0
+      while (j < dim) {
+        val x = largestClusterCenter(j)
+        val p = 1e-14 * math.max(math.abs(x), 1.0)
+        largestClusterCenter.toBreeze(j) = x + p
+        smallestClusterCenter.toBreeze(j) = x - p
+        j += 1
+      }
+    }
+
+    this
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ * StreamingKMeans provides methods for configuring a
+ * streaming k-means analysis, training the model on streaming,
+ * and using the model to make predictions on streaming data.
+ * See KMeansModel for details on algorithm and update rules.
+ *
+ * Use a builder pattern to construct a streaming k-means analysis
+ * in an application, like:
+ *
+ *  val model = new StreamingKMeans()
+ *    .setDecayFactor(0.5)
+ *    .setK(3)
+ *    .setRandomCenters(5, 100.0)
+ *    .trainOn(DStream)
+ */
+@DeveloperApi
+class StreamingKMeans(
+    var k: Int,
+    var decayFactor: Double,
+    var timeUnit: String) extends Logging {
+
+  def this() = this(2, 1.0, StreamingKMeans.BATCHES)
+
+  protected var model: StreamingKMeansModel = new StreamingKMeansModel(null, null)
+
+  /** Set the number of clusters. */
+  def setK(k: Int): this.type = {
+    this.k = k
+    this
+  }
+
+  /** Set the decay factor directly (for forgetful algorithms). */
+  def setDecayFactor(a: Double): this.type = {
+    this.decayFactor = decayFactor
+    this
+  }
+
+  /** Set the half life and time unit ("batches" or "points") for forgetful algorithms. */
+  def setHalfLife(halfLife: Double, timeUnit: String): this.type = {
+    if (timeUnit != StreamingKMeans.BATCHES && timeUnit != StreamingKMeans.POINTS) {
+      throw new IllegalArgumentException("Invalid time unit for decay: " + timeUnit)
+    }
+    this.decayFactor = math.exp(math.log(0.5) / halfLife)
+    logInfo("Setting decay factor to: %g ".format (this.decayFactor))
+    this.timeUnit = timeUnit
+    this
+  }
+
+  /** Specify initial centers directly. */
+  def setInitialCenters(centers: Array[Vector], weights: Array[Double]): this.type = {
+    model = new StreamingKMeansModel(centers, weights)
+    this
+  }
+
+  /**
+   * Initialize random centers, requiring only the number of dimensions.
+   *
+   * @param dim Number of dimensions
+   * @param weight Weight for each center
+   * @param seed Random seed
+   */
+  def setRandomCenters(dim: Int, weight: Double, seed: Long = Utils.random.nextLong): this.type = {
+    val random = new XORShiftRandom(seed)
+    val centers = Array.fill(k)(Vectors.dense(Array.fill(dim)(random.nextGaussian())))
+    val weights = Array.fill(k)(weight)
+    model = new StreamingKMeansModel(centers, weights)
+    this
+  }
+
+  /** Return the latest model. */
+  def latestModel(): StreamingKMeansModel = {
+    model
+  }
+
+  /**
+   * Update the clustering model by training on batches of data from a DStream.
+   * This operation registers a DStream for training the model,
+   * checks whether the cluster centers have been initialized,
+   * and updates the model using each batch of data from the stream.
+   *
+   * @param data DStream containing vector data
+   */
+  def trainOn(data: DStream[Vector]) {
+    assertInitialized()
+    data.foreachRDD { (rdd, time) =>
+      model = model.update(rdd, decayFactor, timeUnit)
+    }
+  }
+
+  /**
+   * Use the clustering model to make predictions on batches of data from a DStream.
+   *
+   * @param data DStream containing vector data
+   * @return DStream containing predictions
+   */
+  def predictOn(data: DStream[Vector]): DStream[Int] = {
+    assertInitialized()
+    data.map(model.predict)
+  }
+
+  /**
+   * Use the model to make predictions on the values of a DStream and carry over its keys.
+   *
+   * @param data DStream containing (key, feature vector) pairs
+   * @tparam K key type
+   * @return DStream containing the input keys and the predictions as values
+   */
+  def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Int)] = {
+    assertInitialized()
+    data.mapValues(model.predict)
+  }
+
+  /** Check whether cluster centers have been initialized. */
+  private[this] def assertInitialized(): Unit = {
+    if (model.clusterCenters == null) {
+      throw new IllegalStateException(
+        "Initial cluster centers must be set before starting predictions")
+    }
+  }
+}
+
+private[clustering] object StreamingKMeans {
+  final val BATCHES = "batches"
+  final val POINTS = "points"
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
index 7858ec602483f..078fbfbe4f0e1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
@@ -43,7 +43,7 @@ private[evaluation] object AreaUnderCurve {
    */
   def of(curve: RDD[(Double, Double)]): Double = {
     curve.sliding(2).aggregate(0.0)(
-      seqOp = (auc: Double, points: Seq[(Double, Double)]) => auc + trapezoid(points),
+      seqOp = (auc: Double, points: Array[(Double, Double)]) => auc + trapezoid(points),
       combOp = _ + _
     )
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index 079743742d86d..1af40de2c7fcf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -103,11 +103,11 @@ class BinaryClassificationMetrics(scoreAndLabels: RDD[(Double, Double)]) extends
       mergeValue = (c: BinaryLabelCounter, label: Double) => c += label,
       mergeCombiners = (c1: BinaryLabelCounter, c2: BinaryLabelCounter) => c1 += c2
     ).sortByKey(ascending = false)
-    val agg = counts.values.mapPartitions({ iter =>
+    val agg = counts.values.mapPartitions { iter =>
       val agg = new BinaryLabelCounter()
       iter.foreach(agg += _)
       Iterator(agg)
-    }, preservesPartitioning = true).collect()
+    }.collect()
     val partitionwiseCumulativeCounts =
       agg.scanLeft(new BinaryLabelCounter())(
         (agg: BinaryLabelCounter, c: BinaryLabelCounter) => agg.clone() += c)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
new file mode 100644
index 0000000000000..666362ae6739a
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import scala.collection.Map
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.{Matrices, Matrix}
+import org.apache.spark.rdd.RDD
+
+/**
+ * ::Experimental::
+ * Evaluator for multiclass classification.
+ *
+ * @param predictionAndLabels an RDD of (prediction, label) pairs.
+ */
+@Experimental
+class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
+
+  private lazy val labelCountByClass: Map[Double, Long] = predictionAndLabels.values.countByValue()
+  private lazy val labelCount: Long = labelCountByClass.values.sum
+  private lazy val tpByClass: Map[Double, Int] = predictionAndLabels
+    .map { case (prediction, label) =>
+      (label, if (label == prediction) 1 else 0)
+    }.reduceByKey(_ + _)
+    .collectAsMap()
+  private lazy val fpByClass: Map[Double, Int] = predictionAndLabels
+    .map { case (prediction, label) =>
+      (prediction, if (prediction != label) 1 else 0)
+    }.reduceByKey(_ + _)
+    .collectAsMap()
+  private lazy val confusions = predictionAndLabels
+    .map { case (prediction, label) =>
+      ((label, prediction), 1)
+    }.reduceByKey(_ + _)
+    .collectAsMap()
+
+  /**
+   * Returns confusion matrix:
+   * predicted classes are in columns,
+   * they are ordered by class label ascending,
+   * as in "labels"
+   */
+  def confusionMatrix: Matrix = {
+    val n = labels.size
+    val values = Array.ofDim[Double](n * n)
+    var i = 0
+    while (i < n) {
+      var j = 0
+      while (j < n) {
+        values(i + j * n) = confusions.getOrElse((labels(i), labels(j)), 0).toDouble
+        j += 1
+      }
+      i += 1
+    }
+    Matrices.dense(n, n, values)
+  }
+
+  /**
+   * Returns true positive rate for a given label (category)
+   * @param label the label.
+   */
+  def truePositiveRate(label: Double): Double = recall(label)
+
+  /**
+   * Returns false positive rate for a given label (category)
+   * @param label the label.
+   */
+  def falsePositiveRate(label: Double): Double = {
+    val fp = fpByClass.getOrElse(label, 0)
+    fp.toDouble / (labelCount - labelCountByClass(label))
+  }
+
+  /**
+   * Returns precision for a given label (category)
+   * @param label the label.
+   */
+  def precision(label: Double): Double = {
+    val tp = tpByClass(label)
+    val fp = fpByClass.getOrElse(label, 0)
+    if (tp + fp == 0) 0 else tp.toDouble / (tp + fp)
+  }
+
+  /**
+   * Returns recall for a given label (category)
+   * @param label the label.
+   */
+  def recall(label: Double): Double = tpByClass(label).toDouble / labelCountByClass(label)
+
+  /**
+   * Returns f-measure for a given label (category)
+   * @param label the label.
+   * @param beta the beta parameter.
+   */
+  def fMeasure(label: Double, beta: Double): Double = {
+    val p = precision(label)
+    val r = recall(label)
+    val betaSqrd = beta * beta
+    if (p + r == 0) 0 else (1 + betaSqrd) * p * r / (betaSqrd * p + r)
+  }
+
+  /**
+   * Returns f1-measure for a given label (category)
+   * @param label the label.
+   */
+  def fMeasure(label: Double): Double = fMeasure(label, 1.0)
+
+  /**
+   * Returns precision
+   */
+  lazy val precision: Double = tpByClass.values.sum.toDouble / labelCount
+
+  /**
+   * Returns recall
+   * (equals to precision for multiclass classifier
+   * because sum of all false positives is equal to sum
+   * of all false negatives)
+   */
+  lazy val recall: Double = precision
+
+  /**
+   * Returns f-measure
+   * (equals to precision and recall because precision equals recall)
+   */
+  lazy val fMeasure: Double = precision
+
+  /**
+   * Returns weighted true positive rate
+   * (equals to precision, recall and f-measure)
+   */
+  lazy val weightedTruePositiveRate: Double = weightedRecall
+
+  /**
+   * Returns weighted false positive rate
+   */
+  lazy val weightedFalsePositiveRate: Double = labelCountByClass.map { case (category, count) =>
+    falsePositiveRate(category) * count.toDouble / labelCount
+  }.sum
+
+  /**
+   * Returns weighted averaged recall
+   * (equals to precision, recall and f-measure)
+   */
+  lazy val weightedRecall: Double = labelCountByClass.map { case (category, count) =>
+    recall(category) * count.toDouble / labelCount
+  }.sum
+
+  /**
+   * Returns weighted averaged precision
+   */
+  lazy val weightedPrecision: Double = labelCountByClass.map { case (category, count) =>
+    precision(category) * count.toDouble / labelCount
+  }.sum
+
+  /**
+   * Returns weighted averaged f-measure
+   * @param beta the beta parameter.
+   */
+  def weightedFMeasure(beta: Double): Double = labelCountByClass.map { case (category, count) =>
+    fMeasure(category, beta) * count.toDouble / labelCount
+  }.sum
+
+  /**
+   * Returns weighted averaged f1-measure
+   */
+  lazy val weightedFMeasure: Double = labelCountByClass.map { case (category, count) =>
+    fMeasure(category, 1.0) * count.toDouble / labelCount
+  }.sum
+
+  /**
+   * Returns the sequence of labels in ascending order
+   */
+  lazy val labels: Array[Double] = tpByClass.keys.toArray.sorted
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
new file mode 100644
index 0000000000000..ea10bde5fa252
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext._
+
+/**
+ * Evaluator for multilabel classification.
+ * @param predictionAndLabels an RDD of (predictions, labels) pairs,
+ * both are non-null Arrays, each with unique elements.
+ */
+class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
+
+  private lazy val numDocs: Long = predictionAndLabels.count()
+
+  private lazy val numLabels: Long = predictionAndLabels.flatMap { case (_, labels) =>
+    labels}.distinct().count()
+
+  /**
+   * Returns subset accuracy
+   * (for equal sets of labels)
+   */
+  lazy val subsetAccuracy: Double = predictionAndLabels.filter { case (predictions, labels) =>
+    predictions.deep == labels.deep
+  }.count().toDouble / numDocs
+
+  /**
+   * Returns accuracy
+   */
+  lazy val accuracy: Double = predictionAndLabels.map { case (predictions, labels) =>
+    labels.intersect(predictions).size.toDouble /
+      (labels.size + predictions.size - labels.intersect(predictions).size)}.sum / numDocs
+
+
+  /**
+   * Returns Hamming-loss
+   */
+  lazy val hammingLoss: Double = predictionAndLabels.map { case (predictions, labels) =>
+    labels.size + predictions.size - 2 * labels.intersect(predictions).size
+  }.sum / (numDocs * numLabels)
+
+  /**
+   * Returns document-based precision averaged by the number of documents
+   */
+  lazy val precision: Double = predictionAndLabels.map { case (predictions, labels) =>
+    if (predictions.size > 0) {
+      predictions.intersect(labels).size.toDouble / predictions.size
+    } else {
+      0
+    }
+  }.sum / numDocs
+
+  /**
+   * Returns document-based recall averaged by the number of documents
+   */
+  lazy val recall: Double = predictionAndLabels.map { case (predictions, labels) =>
+    labels.intersect(predictions).size.toDouble / labels.size
+  }.sum / numDocs
+
+  /**
+   * Returns document-based f1-measure averaged by the number of documents
+   */
+  lazy val f1Measure: Double = predictionAndLabels.map { case (predictions, labels) =>
+    2.0 * predictions.intersect(labels).size / (predictions.size + labels.size)
+  }.sum / numDocs
+
+  private lazy val tpPerClass = predictionAndLabels.flatMap { case (predictions, labels) =>
+    predictions.intersect(labels)
+  }.countByValue()
+
+  private lazy val fpPerClass = predictionAndLabels.flatMap { case (predictions, labels) =>
+    predictions.diff(labels)
+  }.countByValue()
+
+  private lazy val fnPerClass = predictionAndLabels.flatMap { case(predictions, labels) =>
+    labels.diff(predictions)
+  }.countByValue()
+
+  /**
+   * Returns precision for a given label (category)
+   * @param label the label.
+   */
+  def precision(label: Double) = {
+    val tp = tpPerClass(label)
+    val fp = fpPerClass.getOrElse(label, 0L)
+    if (tp + fp == 0) 0 else tp.toDouble / (tp + fp)
+  }
+
+  /**
+   * Returns recall for a given label (category)
+   * @param label the label.
+   */
+  def recall(label: Double) = {
+    val tp = tpPerClass(label)
+    val fn = fnPerClass.getOrElse(label, 0L)
+    if (tp + fn == 0) 0 else tp.toDouble / (tp + fn)
+  }
+
+  /**
+   * Returns f1-measure for a given label (category)
+   * @param label the label.
+   */
+  def f1Measure(label: Double) = {
+    val p = precision(label)
+    val r = recall(label)
+    if((p + r) == 0) 0 else 2 * p * r / (p + r)
+  }
+
+  private lazy val sumTp = tpPerClass.foldLeft(0L) { case (sum, (_, tp)) => sum + tp }
+  private lazy val sumFpClass = fpPerClass.foldLeft(0L) { case (sum, (_, fp)) => sum + fp }
+  private lazy val sumFnClass = fnPerClass.foldLeft(0L) { case (sum, (_, fn)) => sum + fn }
+
+  /**
+   * Returns micro-averaged label-based precision
+   * (equals to micro-averaged document-based precision)
+   */
+  lazy val microPrecision = {
+    val sumFp = fpPerClass.foldLeft(0L){ case(cum, (_, fp)) => cum + fp}
+    sumTp.toDouble / (sumTp + sumFp)
+  }
+
+  /**
+   * Returns micro-averaged label-based recall
+   * (equals to micro-averaged document-based recall)
+   */
+  lazy val microRecall = {
+    val sumFn = fnPerClass.foldLeft(0.0){ case(cum, (_, fn)) => cum + fn}
+    sumTp.toDouble / (sumTp + sumFn)
+  }
+
+  /**
+   * Returns micro-averaged label-based f1-measure
+   * (equals to micro-averaged document-based f1-measure)
+   */
+  lazy val microF1Measure = 2.0 * sumTp / (2 * sumTp + sumFnClass + sumFpClass)
+
+  /**
+   * Returns the sequence of labels in ascending order
+   */
+  lazy val labels: Array[Double] = tpPerClass.keys.toArray.sorted
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
new file mode 100644
index 0000000000000..93a7353e2c070
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.Logging
+import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.rdd.RDD
+
+/**
+ * ::Experimental::
+ * Evaluator for ranking algorithms.
+ *
+ * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pairs.
+ */
+@Experimental
+class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])])
+  extends Logging with Serializable {
+
+  /**
+   * Compute the average precision of all the queries, truncated at ranking position k.
+   *
+   * If for a query, the ranking algorithm returns n (n < k) results, the precision value will be
+   * computed as #(relevant items retrieved) / k. This formula also applies when the size of the
+   * ground truth set is less than k.
+   *
+   * If a query has an empty ground truth set, zero will be used as precision together with
+   * a log warning.
+   *
+   * See the following paper for detail:
+   *
+   * IR evaluation methods for retrieving highly relevant documents. K. Jarvelin and J. Kekalainen
+   *
+   * @param k the position to compute the truncated precision, must be positive
+   * @return the average precision at the first k ranking positions
+   */
+  def precisionAt(k: Int): Double = {
+    require(k > 0, "ranking position k should be positive")
+    predictionAndLabels.map { case (pred, lab) =>
+      val labSet = lab.toSet
+
+      if (labSet.nonEmpty) {
+        val n = math.min(pred.length, k)
+        var i = 0
+        var cnt = 0
+        while (i < n) {
+          if (labSet.contains(pred(i))) {
+            cnt += 1
+          }
+          i += 1
+        }
+        cnt.toDouble / k
+      } else {
+        logWarning("Empty ground truth set, check input data")
+        0.0
+      }
+    }.mean
+  }
+
+  /**
+   * Returns the mean average precision (MAP) of all the queries.
+   * If a query has an empty ground truth set, the average precision will be zero and a log
+   * warining is generated.
+   */
+  lazy val meanAveragePrecision: Double = {
+    predictionAndLabels.map { case (pred, lab) =>
+      val labSet = lab.toSet
+
+      if (labSet.nonEmpty) {
+        var i = 0
+        var cnt = 0
+        var precSum = 0.0
+        val n = pred.length
+        while (i < n) {
+          if (labSet.contains(pred(i))) {
+            cnt += 1
+            precSum += cnt.toDouble / (i + 1)
+          }
+          i += 1
+        }
+        precSum / labSet.size
+      } else {
+        logWarning("Empty ground truth set, check input data")
+        0.0
+      }
+    }.mean
+  }
+
+  /**
+   * Compute the average NDCG value of all the queries, truncated at ranking position k.
+   * The discounted cumulative gain at position k is computed as:
+   *    sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1),
+   * and the NDCG is obtained by dividing the DCG value on the ground truth set. In the current
+   * implementation, the relevance value is binary.
+
+   * If a query has an empty ground truth set, zero will be used as ndcg together with
+   * a log warning.
+   *
+   * See the following paper for detail:
+   *
+   * IR evaluation methods for retrieving highly relevant documents. K. Jarvelin and J. Kekalainen
+   *
+   * @param k the position to compute the truncated ndcg, must be positive
+   * @return the average ndcg at the first k ranking positions
+   */
+  def ndcgAt(k: Int): Double = {
+    require(k > 0, "ranking position k should be positive")
+    predictionAndLabels.map { case (pred, lab) =>
+      val labSet = lab.toSet
+
+      if (labSet.nonEmpty) {
+        val labSetSize = labSet.size
+        val n = math.min(math.max(pred.length, labSetSize), k)
+        var maxDcg = 0.0
+        var dcg = 0.0
+        var i = 0
+        while (i < n) {
+          val gain = 1.0 / math.log(i + 2)
+          if (labSet.contains(pred(i))) {
+            dcg += gain
+          }
+          if (i < labSetSize) {
+            maxDcg += gain
+          }
+          i += 1
+        }
+        dcg / maxDcg
+      } else {
+        logWarning("Empty ground truth set, check input data")
+        0.0
+      }
+    }.mean
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
new file mode 100644
index 0000000000000..693117d820580
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.rdd.RDD
+import org.apache.spark.Logging
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer}
+
+/**
+ * :: Experimental ::
+ * Evaluator for regression.
+ *
+ * @param predictionAndObservations an RDD of (prediction, observation) pairs.
+ */
+@Experimental
+class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging {
+
+  /**
+   * Use MultivariateOnlineSummarizer to calculate summary statistics of observations and errors.
+   */
+  private lazy val summary: MultivariateStatisticalSummary = {
+    val summary: MultivariateStatisticalSummary = predictionAndObservations.map {
+      case (prediction, observation) => Vectors.dense(observation, observation - prediction)
+    }.aggregate(new MultivariateOnlineSummarizer())(
+        (summary, v) => summary.add(v),
+        (sum1, sum2) => sum1.merge(sum2)
+      )
+    summary
+  }
+
+  /**
+   * Returns the explained variance regression score.
+   * explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
+   * Reference: [[http://en.wikipedia.org/wiki/Explained_variation]]
+   */
+  def explainedVariance: Double = {
+    1 - summary.variance(1) / summary.variance(0)
+  }
+
+  /**
+   * Returns the mean absolute error, which is a risk function corresponding to the
+   * expected value of the absolute error loss or l1-norm loss.
+   */
+  def meanAbsoluteError: Double = {
+    summary.normL1(1) / summary.count
+  }
+
+  /**
+   * Returns the mean squared error, which is a risk function corresponding to the
+   * expected value of the squared error loss or quadratic loss.
+   */
+  def meanSquaredError: Double = {
+    val rmse = summary.normL2(1) / math.sqrt(summary.count)
+    rmse * rmse
+  }
+
+  /**
+   * Returns the root mean squared error, which is defined as the square root of
+   * the mean squared error.
+   */
+  def rootMeanSquaredError: Double = {
+    summary.normL2(1) / math.sqrt(summary.count)
+  }
+
+  /**
+   * Returns R^2^, the coefficient of determination.
+   * Reference: [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
+   */
+  def r2: Double = {
+    1 - math.pow(summary.normL2(1), 2) / (summary.variance(0) * (summary.count - 1))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
index 562663ad36b40..be3319d60ce25 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
@@ -24,26 +24,43 @@ private[evaluation] trait BinaryClassificationMetricComputer extends Serializabl
   def apply(c: BinaryConfusionMatrix): Double
 }
 
-/** Precision. */
+/** Precision. Defined as 1.0 when there are no positive examples. */
 private[evaluation] object Precision extends BinaryClassificationMetricComputer {
-  override def apply(c: BinaryConfusionMatrix): Double =
-    c.numTruePositives.toDouble / (c.numTruePositives + c.numFalsePositives)
+  override def apply(c: BinaryConfusionMatrix): Double = {
+    val totalPositives = c.numTruePositives + c.numFalsePositives
+    if (totalPositives == 0) {
+      1.0
+    } else {
+      c.numTruePositives.toDouble / totalPositives
+    }
+  }
 }
 
-/** False positive rate. */
+/** False positive rate. Defined as 0.0 when there are no negative examples. */
 private[evaluation] object FalsePositiveRate extends BinaryClassificationMetricComputer {
-  override def apply(c: BinaryConfusionMatrix): Double =
-    c.numFalsePositives.toDouble / c.numNegatives
+  override def apply(c: BinaryConfusionMatrix): Double = {
+    if (c.numNegatives == 0) {
+      0.0
+    } else {
+      c.numFalsePositives.toDouble / c.numNegatives
+    }
+  }
 }
 
-/** Recall. */
+/** Recall. Defined as 0.0 when there are no positive examples. */
 private[evaluation] object Recall extends BinaryClassificationMetricComputer {
-  override def apply(c: BinaryConfusionMatrix): Double =
-    c.numTruePositives.toDouble / c.numPositives
+  override def apply(c: BinaryConfusionMatrix): Double = {
+    if (c.numPositives == 0) {
+      0.0
+    } else {
+      c.numTruePositives.toDouble / c.numPositives
+    }
+  }
 }
 
 /**
- * F-Measure.
+ * F-Measure. Defined as 0 if both precision and recall are 0. EG in the case that all examples
+ * are false positives.
  * @param beta the beta constant in F-Measure
  * @see http://en.wikipedia.org/wiki/F1_score
  */
@@ -52,6 +69,10 @@ private[evaluation] case class FMeasure(beta: Double) extends BinaryClassificati
   override def apply(c: BinaryConfusionMatrix): Double = {
     val precision = Precision(c)
     val recall = Recall(c)
-    (1.0 + beta2) * (precision * recall) / (beta2 * precision + recall)
+    if (precision + recall == 0) {
+      0.0
+    } else {
+      (1.0 + beta2) * (precision * recall) / (beta2 * precision + recall)
+    }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
new file mode 100644
index 0000000000000..c53475818395f
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import java.lang.{Iterable => JavaIterable}
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
+/**
+ * :: Experimental ::
+ * Maps a sequence of terms to their term frequencies using the hashing trick.
+ *
+ * @param numFeatures number of features (default: 2^20^)
+ */
+@Experimental
+class HashingTF(val numFeatures: Int) extends Serializable {
+
+  def this() = this(1 << 20)
+
+  /**
+   * Returns the index of the input term.
+   */
+  def indexOf(term: Any): Int = Utils.nonNegativeMod(term.##, numFeatures)
+
+  /**
+   * Transforms the input document into a sparse term frequency vector.
+   */
+  def transform(document: Iterable[_]): Vector = {
+    val termFrequencies = mutable.HashMap.empty[Int, Double]
+    document.foreach { term =>
+      val i = indexOf(term)
+      termFrequencies.put(i, termFrequencies.getOrElse(i, 0.0) + 1.0)
+    }
+    Vectors.sparse(numFeatures, termFrequencies.toSeq)
+  }
+
+  /**
+   * Transforms the input document into a sparse term frequency vector (Java version).
+   */
+  def transform(document: JavaIterable[_]): Vector = {
+    transform(document.asScala)
+  }
+
+  /**
+   * Transforms the input document to term frequency vectors.
+   */
+  def transform[D <: Iterable[_]](dataset: RDD[D]): RDD[Vector] = {
+    dataset.map(this.transform)
+  }
+
+  /**
+   * Transforms the input document to term frequency vectors (Java version).
+   */
+  def transform[D <: JavaIterable[_]](dataset: JavaRDD[D]): JavaRDD[Vector] = {
+    dataset.rdd.map(this.transform).toJavaRDD()
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
new file mode 100644
index 0000000000000..720bb70b08dbf
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import breeze.linalg.{DenseVector => BDV}
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
+import org.apache.spark.mllib.rdd.RDDFunctions._
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: Experimental ::
+ * Inverse document frequency (IDF).
+ * The standard formulation is used: `idf = log((m + 1) / (d(t) + 1))`, where `m` is the total
+ * number of documents and `d(t)` is the number of documents that contain term `t`.
+ *
+ * This implementation supports filtering out terms which do not appear in a minimum number
+ * of documents (controlled by the variable `minDocFreq`). For terms that are not in
+ * at least `minDocFreq` documents, the IDF is found as 0, resulting in TF-IDFs of 0.
+ *
+ * @param minDocFreq minimum of documents in which a term
+ *                   should appear for filtering
+ */
+@Experimental
+class IDF(val minDocFreq: Int) {
+
+  def this() = this(0)
+
+  // TODO: Allow different IDF formulations.
+
+  /**
+   * Computes the inverse document frequency.
+   * @param dataset an RDD of term frequency vectors
+   */
+  def fit(dataset: RDD[Vector]): IDFModel = {
+    val idf = dataset.treeAggregate(new IDF.DocumentFrequencyAggregator(
+          minDocFreq = minDocFreq))(
+      seqOp = (df, v) => df.add(v),
+      combOp = (df1, df2) => df1.merge(df2)
+    ).idf()
+    new IDFModel(idf)
+  }
+
+  /**
+   * Computes the inverse document frequency.
+   * @param dataset a JavaRDD of term frequency vectors
+   */
+  def fit(dataset: JavaRDD[Vector]): IDFModel = {
+    fit(dataset.rdd)
+  }
+}
+
+private object IDF {
+
+  /** Document frequency aggregator. */
+  class DocumentFrequencyAggregator(val minDocFreq: Int) extends Serializable {
+
+    /** number of documents */
+    private var m = 0L
+    /** document frequency vector */
+    private var df: BDV[Long] = _
+
+
+    def this() = this(0)
+
+    /** Adds a new document. */
+    def add(doc: Vector): this.type = {
+      if (isEmpty) {
+        df = BDV.zeros(doc.size)
+      }
+      doc match {
+        case sv: SparseVector =>
+          val nnz = sv.indices.size
+          var k = 0
+          while (k < nnz) {
+            if (sv.values(k) > 0) {
+              df(sv.indices(k)) += 1L
+            }
+            k += 1
+          }
+        case dv: DenseVector =>
+          val n = dv.size
+          var j = 0
+          while (j < n) {
+            if (dv.values(j) > 0.0) {
+              df(j) += 1L
+            }
+            j += 1
+          }
+        case other =>
+          throw new UnsupportedOperationException(
+            s"Only sparse and dense vectors are supported but got ${other.getClass}.")
+      }
+      m += 1L
+      this
+    }
+
+    /** Merges another. */
+    def merge(other: DocumentFrequencyAggregator): this.type = {
+      if (!other.isEmpty) {
+        m += other.m
+        if (df == null) {
+          df = other.df.copy
+        } else {
+          df += other.df
+        }
+      }
+      this
+    }
+
+    private def isEmpty: Boolean = m == 0L
+
+    /** Returns the current IDF vector. */
+    def idf(): Vector = {
+      if (isEmpty) {
+        throw new IllegalStateException("Haven't seen any document yet.")
+      }
+      val n = df.length
+      val inv = new Array[Double](n)
+      var j = 0
+      while (j < n) {
+        /*
+         * If the term is not present in the minimum
+         * number of documents, set IDF to 0. This
+         * will cause multiplication in IDFModel to
+         * set TF-IDF to 0.
+         *
+         * Since arrays are initialized to 0 by default,
+         * we just omit changing those entries.
+         */
+        if(df(j) >= minDocFreq) {
+          inv(j) = math.log((m + 1.0) / (df(j) + 1.0))
+        }
+        j += 1
+      }
+      Vectors.dense(inv)
+    }
+  }
+}
+
+/**
+ * :: Experimental ::
+ * Represents an IDF model that can transform term frequency vectors.
+ */
+@Experimental
+class IDFModel private[mllib] (val idf: Vector) extends Serializable {
+
+  /**
+   * Transforms term frequency (TF) vectors to TF-IDF vectors.
+   *
+   * If `minDocFreq` was set for the IDF calculation,
+   * the terms which occur in fewer than `minDocFreq`
+   * documents will have an entry of 0.
+   *
+   * @param dataset an RDD of term frequency vectors
+   * @return an RDD of TF-IDF vectors
+   */
+  def transform(dataset: RDD[Vector]): RDD[Vector] = {
+    val bcIdf = dataset.context.broadcast(idf)
+    dataset.mapPartitions { iter =>
+      val thisIdf = bcIdf.value
+      iter.map { v =>
+        val n = v.size
+        v match {
+          case sv: SparseVector =>
+            val nnz = sv.indices.size
+            val newValues = new Array[Double](nnz)
+            var k = 0
+            while (k < nnz) {
+              newValues(k) = sv.values(k) * thisIdf(sv.indices(k))
+              k += 1
+            }
+            Vectors.sparse(n, sv.indices, newValues)
+          case dv: DenseVector =>
+            val newValues = new Array[Double](n)
+            var j = 0
+            while (j < n) {
+              newValues(j) = dv.values(j) * thisIdf(j)
+              j += 1
+            }
+            Vectors.dense(newValues)
+          case other =>
+            throw new UnsupportedOperationException(
+              s"Only sparse and dense vectors are supported but got ${other.getClass}.")
+        }
+      }
+    }
+  }
+
+  /**
+   * Transforms term frequency (TF) vectors to TF-IDF vectors (Java version).
+   * @param dataset a JavaRDD of term frequency vectors
+   * @return a JavaRDD of TF-IDF vectors
+   */
+  def transform(dataset: JavaRDD[Vector]): JavaRDD[Vector] = {
+    transform(dataset.rdd).toJavaRDD()
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
new file mode 100644
index 0000000000000..dfad25d57c947
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, norm => brzNorm}
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+
+/**
+ * :: Experimental ::
+ * Normalizes samples individually to unit L^p^ norm
+ *
+ * For any 1 &lt;= p &lt; Double.PositiveInfinity, normalizes samples using
+ * sum(abs(vector).^p^)^(1/p)^ as norm.
+ *
+ * For p = Double.PositiveInfinity, max(abs(vector)) will be used as norm for normalization.
+ *
+ * @param p Normalization in L^p^ space, p = 2 by default.
+ */
+@Experimental
+class Normalizer(p: Double) extends VectorTransformer {
+
+  def this() = this(2)
+
+  require(p >= 1.0)
+
+  /**
+   * Applies unit length normalization on a vector.
+   *
+   * @param vector vector to be normalized.
+   * @return normalized vector. If the norm of the input is zero, it will return the input vector.
+   */
+  override def transform(vector: Vector): Vector = {
+    var norm = brzNorm(vector.toBreeze, p)
+
+    if (norm != 0.0) {
+      // For dense vector, we've to allocate new memory for new output vector.
+      // However, for sparse vector, the `index` array will not be changed,
+      // so we can re-use it to save memory.
+      vector.toBreeze match {
+        case dv: BDV[Double] => Vectors.fromBreeze(dv :/ norm)
+        case sv: BSV[Double] =>
+          val output = new BSV[Double](sv.index, sv.data.clone(), sv.length)
+          var i = 0
+          while (i < output.data.length) {
+            output.data(i) /= norm
+            i += 1
+          }
+          Vectors.fromBreeze(output)
+        case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
+      }
+    } else {
+      // Since the norm is zero, return the input vector object itself.
+      // Note that it's safe since we always assume that the data in RDD
+      // should be immutable.
+      vector
+    }
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
new file mode 100644
index 0000000000000..4dfd1f0ab8134
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.rdd.RDDFunctions._
+import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: Experimental ::
+ * Standardizes features by removing the mean and scaling to unit variance using column summary
+ * statistics on the samples in the training set.
+ *
+ * @param withMean False by default. Centers the data with mean before scaling. It will build a
+ *                 dense output, so this does not work on sparse input and will raise an exception.
+ * @param withStd True by default. Scales the data to unit standard deviation.
+ */
+@Experimental
+class StandardScaler(withMean: Boolean, withStd: Boolean) extends Logging {
+
+  def this() = this(false, true)
+
+  if (!(withMean || withStd)) {
+    logWarning("Both withMean and withStd are false. The model does nothing.")
+  }
+
+  /**
+   * Computes the mean and variance and stores as a model to be used for later scaling.
+   *
+   * @param data The data used to compute the mean and variance to build the transformation model.
+   * @return a StandardScalarModel
+   */
+  def fit(data: RDD[Vector]): StandardScalerModel = {
+    // TODO: skip computation if both withMean and withStd are false
+    val summary = data.treeAggregate(new MultivariateOnlineSummarizer)(
+      (aggregator, data) => aggregator.add(data),
+      (aggregator1, aggregator2) => aggregator1.merge(aggregator2))
+    new StandardScalerModel(withMean, withStd, summary.mean, summary.variance)
+  }
+}
+
+/**
+ * :: Experimental ::
+ * Represents a StandardScaler model that can transform vectors.
+ *
+ * @param withMean whether to center the data before scaling
+ * @param withStd whether to scale the data to have unit standard deviation
+ * @param mean column mean values
+ * @param variance column variance values
+ */
+@Experimental
+class StandardScalerModel private[mllib] (
+    val withMean: Boolean,
+    val withStd: Boolean,
+    val mean: Vector,
+    val variance: Vector) extends VectorTransformer {
+
+  require(mean.size == variance.size)
+
+  private lazy val factor: BDV[Double] = {
+    val f = BDV.zeros[Double](variance.size)
+    var i = 0
+    while (i < f.size) {
+      f(i) = if (variance(i) != 0.0) 1.0 / math.sqrt(variance(i)) else 0.0
+      i += 1
+    }
+    f
+  }
+
+  /**
+   * Applies standardization transformation on a vector.
+   *
+   * @param vector Vector to be standardized.
+   * @return Standardized vector. If the variance of a column is zero, it will return default `0.0`
+   *         for the column with zero variance.
+   */
+  override def transform(vector: Vector): Vector = {
+    require(mean.size == vector.size)
+    if (withMean) {
+      vector.toBreeze match {
+        case dv: BDV[Double] =>
+          val output = vector.toBreeze.copy
+          var i = 0
+          while (i < output.length) {
+            output(i) = (output(i) - mean(i)) * (if (withStd) factor(i) else 1.0)
+            i += 1
+          }
+          Vectors.fromBreeze(output)
+        case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
+      }
+    } else if (withStd) {
+      vector.toBreeze match {
+        case dv: BDV[Double] => Vectors.fromBreeze(dv :* factor)
+        case sv: BSV[Double] =>
+          // For sparse vector, the `index` array inside sparse vector object will not be changed,
+          // so we can re-use it to save memory.
+          val output = new BSV[Double](sv.index, sv.data.clone(), sv.length)
+          var i = 0
+          while (i < output.data.length) {
+            output.data(i) *= factor(output.index(i))
+            i += 1
+          }
+          Vectors.fromBreeze(output)
+        case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
+      }
+    } else {
+      // Note that it's safe since we always assume that the data in RDD should be immutable.
+      vector
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
new file mode 100644
index 0000000000000..7358c1c84f79c
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: DeveloperApi ::
+ * Trait for transformation of a vector
+ */
+@DeveloperApi
+trait VectorTransformer extends Serializable {
+
+  /**
+   * Applies transformation on a vector.
+   *
+   * @param vector vector to be transformed.
+   * @return transformed vector.
+   */
+  def transform(vector: Vector): Vector
+
+  /**
+   * Applies transformation on an RDD[Vector].
+   *
+   * @param data RDD[Vector] to be transformed.
+   * @return transformed RDD[Vector].
+   */
+  def transform(data: RDD[Vector]): RDD[Vector] = {
+    // Later in #1498 , all RDD objects are sent via broadcasting instead of akka.
+    // So it should be no longer necessary to explicitly broadcast `this` object.
+    data.map(x => this.transform(x))
+  }
+
+  /**
+   * Applies transformation on an JavaRDD[Vector].
+   *
+   * @param data JavaRDD[Vector] to be transformed.
+   * @return transformed JavaRDD[Vector].
+   */
+  def transform(data: JavaRDD[Vector]): JavaRDD[Vector] = {
+    transform(data.rdd)
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
new file mode 100644
index 0000000000000..f5f7ad613d4c4
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -0,0 +1,464 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import java.lang.{Iterable => JavaIterable}
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+import com.github.fommil.netlib.BLAS.{getInstance => blas}
+
+import org.apache.spark.Logging
+import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.rdd._
+import org.apache.spark.util.Utils
+import org.apache.spark.util.random.XORShiftRandom
+
+/**
+ *  Entry in vocabulary 
+ */
+private case class VocabWord(
+  var word: String,
+  var cn: Int,
+  var point: Array[Int],
+  var code: Array[Int],
+  var codeLen:Int
+)
+
+/**
+ * :: Experimental ::
+ * Word2Vec creates vector representation of words in a text corpus.
+ * The algorithm first constructs a vocabulary from the corpus
+ * and then learns vector representation of words in the vocabulary. 
+ * The vector representation can be used as features in 
+ * natural language processing and machine learning algorithms.
+ * 
+ * We used skip-gram model in our implementation and hierarchical softmax 
+ * method to train the model. The variable names in the implementation
+ * matches the original C implementation.
+ *
+ * For original C implementation, see https://code.google.com/p/word2vec/ 
+ * For research papers, see 
+ * Efficient Estimation of Word Representations in Vector Space
+ * and 
+ * Distributed Representations of Words and Phrases and their Compositionality.
+ */
+@Experimental
+class Word2Vec extends Serializable with Logging {
+
+  private var vectorSize = 100
+  private var learningRate = 0.025
+  private var numPartitions = 1
+  private var numIterations = 1
+  private var seed = Utils.random.nextLong()
+
+  /**
+   * Sets vector size (default: 100).
+   */
+  def setVectorSize(vectorSize: Int): this.type = {
+    this.vectorSize = vectorSize
+    this
+  }
+
+  /**
+   * Sets initial learning rate (default: 0.025).
+   */
+  def setLearningRate(learningRate: Double): this.type = {
+    this.learningRate = learningRate
+    this
+  }
+
+  /**
+   * Sets number of partitions (default: 1). Use a small number for accuracy.
+   */
+  def setNumPartitions(numPartitions: Int): this.type = {
+    require(numPartitions > 0, s"numPartitions must be greater than 0 but got $numPartitions")
+    this.numPartitions = numPartitions
+    this
+  }
+
+  /**
+   * Sets number of iterations (default: 1), which should be smaller than or equal to number of
+   * partitions.
+   */
+  def setNumIterations(numIterations: Int): this.type = {
+    this.numIterations = numIterations
+    this
+  }
+
+  /**
+   * Sets random seed (default: a random long integer).
+   */
+  def setSeed(seed: Long): this.type = {
+    this.seed = seed
+    this
+  }
+
+  private val EXP_TABLE_SIZE = 1000
+  private val MAX_EXP = 6
+  private val MAX_CODE_LENGTH = 40
+  private val MAX_SENTENCE_LENGTH = 1000
+
+  /** context words from [-window, window] */
+  private val window = 5
+
+  /** minimum frequency to consider a vocabulary word */
+  private val minCount = 5
+
+  private var trainWordsCount = 0
+  private var vocabSize = 0
+  private var vocab: Array[VocabWord] = null
+  private var vocabHash = mutable.HashMap.empty[String, Int]
+
+  private def learnVocab(words: RDD[String]): Unit = {
+    vocab = words.map(w => (w, 1))
+      .reduceByKey(_ + _)
+      .map(x => VocabWord(
+        x._1,
+        x._2,
+        new Array[Int](MAX_CODE_LENGTH), 
+        new Array[Int](MAX_CODE_LENGTH), 
+        0))
+      .filter(_.cn >= minCount)
+      .collect()
+      .sortWith((a, b) => a.cn > b.cn)
+    
+    vocabSize = vocab.length
+    var a = 0
+    while (a < vocabSize) {
+      vocabHash += vocab(a).word -> a
+      trainWordsCount += vocab(a).cn
+      a += 1
+    }
+    logInfo("trainWordsCount = " + trainWordsCount)
+  }
+
+  private def createExpTable(): Array[Float] = {
+    val expTable = new Array[Float](EXP_TABLE_SIZE)
+    var i = 0
+    while (i < EXP_TABLE_SIZE) {
+      val tmp = math.exp((2.0 * i / EXP_TABLE_SIZE - 1.0) * MAX_EXP)
+      expTable(i) = (tmp / (tmp + 1.0)).toFloat
+      i += 1
+    }
+    expTable
+  }
+
+  private def createBinaryTree(): Unit = {
+    val count = new Array[Long](vocabSize * 2 + 1)
+    val binary = new Array[Int](vocabSize * 2 + 1)
+    val parentNode = new Array[Int](vocabSize * 2 + 1)
+    val code = new Array[Int](MAX_CODE_LENGTH)
+    val point = new Array[Int](MAX_CODE_LENGTH)
+    var a = 0
+    while (a < vocabSize) {
+      count(a) = vocab(a).cn
+      a += 1
+    }
+    while (a < 2 * vocabSize) {
+      count(a) = 1e9.toInt
+      a += 1
+    }
+    var pos1 = vocabSize - 1
+    var pos2 = vocabSize
+    
+    var min1i = 0 
+    var min2i = 0
+
+    a = 0
+    while (a < vocabSize - 1) {
+      if (pos1 >= 0) {
+        if (count(pos1) < count(pos2)) {
+          min1i = pos1
+          pos1 -= 1
+        } else {
+          min1i = pos2
+          pos2 += 1
+        }
+      } else {
+        min1i = pos2
+        pos2 += 1
+      }
+      if (pos1 >= 0) {
+        if (count(pos1) < count(pos2)) {
+          min2i = pos1
+          pos1 -= 1
+        } else {
+          min2i = pos2
+          pos2 += 1
+        }
+      } else {
+        min2i = pos2
+        pos2 += 1
+      }
+      count(vocabSize + a) = count(min1i) + count(min2i)
+      parentNode(min1i) = vocabSize + a
+      parentNode(min2i) = vocabSize + a
+      binary(min2i) = 1
+      a += 1
+    }
+    // Now assign binary code to each vocabulary word
+    var i = 0
+    a = 0
+    while (a < vocabSize) {
+      var b = a
+      i = 0
+      while (b != vocabSize * 2 - 2) {
+        code(i) = binary(b)
+        point(i) = b
+        i += 1
+        b = parentNode(b)
+      }
+      vocab(a).codeLen = i
+      vocab(a).point(0) = vocabSize - 2
+      b = 0
+      while (b < i) {
+        vocab(a).code(i - b - 1) = code(b)
+        vocab(a).point(i - b) = point(b) - vocabSize
+        b += 1
+      }
+      a += 1
+    }
+  }
+
+  /**
+   * Computes the vector representation of each word in vocabulary.
+   * @param dataset an RDD of words
+   * @return a Word2VecModel
+   */
+  def fit[S <: Iterable[String]](dataset: RDD[S]): Word2VecModel = {
+
+    val words = dataset.flatMap(x => x)
+
+    learnVocab(words)
+    
+    createBinaryTree()
+    
+    val sc = dataset.context
+
+    val expTable = sc.broadcast(createExpTable())
+    val bcVocab = sc.broadcast(vocab)
+    val bcVocabHash = sc.broadcast(vocabHash)
+    
+    val sentences: RDD[Array[Int]] = words.mapPartitions { iter =>
+      new Iterator[Array[Int]] {
+        def hasNext: Boolean = iter.hasNext
+
+        def next(): Array[Int] = {
+          var sentence = new ArrayBuffer[Int]
+          var sentenceLength = 0
+          while (iter.hasNext && sentenceLength < MAX_SENTENCE_LENGTH) {
+            val word = bcVocabHash.value.get(iter.next())
+            word match {
+              case Some(w) =>
+                sentence += w
+                sentenceLength += 1
+              case None =>
+            }
+          }
+          sentence.toArray
+        }
+      }
+    }
+    
+    val newSentences = sentences.repartition(numPartitions).cache()
+    val initRandom = new XORShiftRandom(seed)
+    val syn0Global =
+      Array.fill[Float](vocabSize * vectorSize)((initRandom.nextFloat() - 0.5f) / vectorSize)
+    val syn1Global = new Array[Float](vocabSize * vectorSize)
+    var alpha = learningRate
+    for (k <- 1 to numIterations) {
+      val partial = newSentences.mapPartitionsWithIndex { case (idx, iter) =>
+        val random = new XORShiftRandom(seed ^ ((idx + 1) << 16) ^ ((-k - 1) << 8))
+        val syn0Modify = new Array[Int](vocabSize)
+        val syn1Modify = new Array[Int](vocabSize)
+        val model = iter.foldLeft((syn0Global, syn1Global, 0, 0)) {
+          case ((syn0, syn1, lastWordCount, wordCount), sentence) =>
+            var lwc = lastWordCount
+            var wc = wordCount
+            if (wordCount - lastWordCount > 10000) {
+              lwc = wordCount
+              // TODO: discount by iteration?
+              alpha =
+                learningRate * (1 - numPartitions * wordCount.toDouble / (trainWordsCount + 1))
+              if (alpha < learningRate * 0.0001) alpha = learningRate * 0.0001
+              logInfo("wordCount = " + wordCount + ", alpha = " + alpha)
+            }
+            wc += sentence.size
+            var pos = 0
+            while (pos < sentence.size) {
+              val word = sentence(pos)
+              val b = random.nextInt(window)
+              // Train Skip-gram
+              var a = b
+              while (a < window * 2 + 1 - b) {
+                if (a != window) {
+                  val c = pos - window + a
+                  if (c >= 0 && c < sentence.size) {
+                    val lastWord = sentence(c)
+                    val l1 = lastWord * vectorSize
+                    val neu1e = new Array[Float](vectorSize)
+                    // Hierarchical softmax
+                    var d = 0
+                    while (d < bcVocab.value(word).codeLen) {
+                      val inner = bcVocab.value(word).point(d)
+                      val l2 = inner * vectorSize
+                      // Propagate hidden -> output
+                      var f = blas.sdot(vectorSize, syn0, l1, 1, syn1, l2, 1)
+                      if (f > -MAX_EXP && f < MAX_EXP) {
+                        val ind = ((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2.0)).toInt
+                        f = expTable.value(ind)
+                        val g = ((1 - bcVocab.value(word).code(d) - f) * alpha).toFloat
+                        blas.saxpy(vectorSize, g, syn1, l2, 1, neu1e, 0, 1)
+                        blas.saxpy(vectorSize, g, syn0, l1, 1, syn1, l2, 1)
+                        syn1Modify(inner) += 1
+                      }
+                      d += 1
+                    }
+                    blas.saxpy(vectorSize, 1.0f, neu1e, 0, 1, syn0, l1, 1)
+                    syn0Modify(lastWord) += 1
+                  }
+                }
+                a += 1
+              }
+              pos += 1
+            }
+            (syn0, syn1, lwc, wc)
+        }
+        val syn0Local = model._1
+        val syn1Local = model._2
+        // Only output modified vectors.
+        Iterator.tabulate(vocabSize) { index =>
+          if (syn0Modify(index) > 0) {
+            Some((index, syn0Local.slice(index * vectorSize, (index + 1) * vectorSize)))
+          } else {
+            None
+          }
+        }.flatten ++ Iterator.tabulate(vocabSize) { index =>
+          if (syn1Modify(index) > 0) {
+            Some((index + vocabSize, syn1Local.slice(index * vectorSize, (index + 1) * vectorSize)))
+          } else {
+            None
+          }
+        }.flatten
+      }
+      val synAgg = partial.reduceByKey { case (v1, v2) =>
+          blas.saxpy(vectorSize, 1.0f, v2, 1, v1, 1)
+          v1
+      }.collect()
+      var i = 0
+      while (i < synAgg.length) {
+        val index = synAgg(i)._1
+        if (index < vocabSize) {
+          Array.copy(synAgg(i)._2, 0, syn0Global, index * vectorSize, vectorSize)
+        } else {
+          Array.copy(synAgg(i)._2, 0, syn1Global, (index - vocabSize) * vectorSize, vectorSize)
+        }
+        i += 1
+      }
+    }
+    newSentences.unpersist()
+    
+    val word2VecMap = mutable.HashMap.empty[String, Array[Float]]
+    var i = 0
+    while (i < vocabSize) {
+      val word = bcVocab.value(i).word
+      val vector = new Array[Float](vectorSize)
+      Array.copy(syn0Global, i * vectorSize, vector, 0, vectorSize)
+      word2VecMap += word -> vector
+      i += 1
+    }
+
+    new Word2VecModel(word2VecMap.toMap)
+  }
+
+  /**
+   * Computes the vector representation of each word in vocabulary (Java version).
+   * @param dataset a JavaRDD of words
+   * @return a Word2VecModel
+   */
+  def fit[S <: JavaIterable[String]](dataset: JavaRDD[S]): Word2VecModel = {
+    fit(dataset.rdd.map(_.asScala))
+  }
+}
+
+/**
+ * :: Experimental ::
+ * Word2Vec model
+ */
+@Experimental
+class Word2VecModel private[mllib] (
+    private val model: Map[String, Array[Float]]) extends Serializable {
+
+  private def cosineSimilarity(v1: Array[Float], v2: Array[Float]): Double = {
+    require(v1.length == v2.length, "Vectors should have the same length")
+    val n = v1.length
+    val norm1 = blas.snrm2(n, v1, 1)
+    val norm2 = blas.snrm2(n, v2, 1)
+    if (norm1 == 0 || norm2 == 0) return 0.0
+    blas.sdot(n, v1, 1, v2,1) / norm1 / norm2
+  }
+  
+  /**
+   * Transforms a word to its vector representation
+   * @param word a word 
+   * @return vector representation of word
+   */
+  def transform(word: String): Vector = {
+    model.get(word) match {
+      case Some(vec) =>
+        Vectors.dense(vec.map(_.toDouble))
+      case None =>
+        throw new IllegalStateException(s"$word not in vocabulary")
+    }
+  }
+
+  /**
+   * Find synonyms of a word
+   * @param word a word
+   * @param num number of synonyms to find  
+   * @return array of (word, cosineSimilarity)
+   */
+  def findSynonyms(word: String, num: Int): Array[(String, Double)] = {
+    val vector = transform(word)
+    findSynonyms(vector,num)
+  }
+
+  /**
+   * Find synonyms of the vector representation of a word
+   * @param vector vector representation of a word
+   * @param num number of synonyms to find  
+   * @return array of (word, cosineSimilarity)
+   */
+  def findSynonyms(vector: Vector, num: Int): Array[(String, Double)] = {
+    require(num > 0, "Number of similar words should > 0")
+    // TODO: optimize top-k
+    val fVector = vector.toArray.map(_.toFloat)
+    model.mapValues(vec => cosineSimilarity(fVector, vec))
+      .toSeq
+      .sortBy(- _._2)
+      .take(num + 1)
+      .tail
+      .toArray
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
new file mode 100644
index 0000000000000..89539e600f48c
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@@ -0,0 +1,528 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.linalg
+
+import com.github.fommil.netlib.{BLAS => NetlibBLAS, F2jBLAS}
+import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS}
+
+import org.apache.spark.Logging
+
+/**
+ * BLAS routines for MLlib's vectors and matrices.
+ */
+private[spark] object BLAS extends Serializable with Logging {
+
+  @transient private var _f2jBLAS: NetlibBLAS = _
+  @transient private var _nativeBLAS: NetlibBLAS = _
+
+  // For level-1 routines, we use Java implementation.
+  private def f2jBLAS: NetlibBLAS = {
+    if (_f2jBLAS == null) {
+      _f2jBLAS = new F2jBLAS
+    }
+    _f2jBLAS
+  }
+
+  /**
+   * y += a * x
+   */
+  def axpy(a: Double, x: Vector, y: Vector): Unit = {
+    require(x.size == y.size)
+    y match {
+      case dy: DenseVector =>
+        x match {
+          case sx: SparseVector =>
+            axpy(a, sx, dy)
+          case dx: DenseVector =>
+            axpy(a, dx, dy)
+          case _ =>
+            throw new UnsupportedOperationException(
+              s"axpy doesn't support x type ${x.getClass}.")
+        }
+      case _ =>
+        throw new IllegalArgumentException(
+          s"axpy only supports adding to a dense vector but got type ${y.getClass}.")
+    }
+  }
+
+  /**
+   * y += a * x
+   */
+  private def axpy(a: Double, x: DenseVector, y: DenseVector): Unit = {
+    val n = x.size
+    f2jBLAS.daxpy(n, a, x.values, 1, y.values, 1)
+  }
+
+  /**
+   * y += a * x
+   */
+  private def axpy(a: Double, x: SparseVector, y: DenseVector): Unit = {
+    val nnz = x.indices.size
+    if (a == 1.0) {
+      var k = 0
+      while (k < nnz) {
+        y.values(x.indices(k)) += x.values(k)
+        k += 1
+      }
+    } else {
+      var k = 0
+      while (k < nnz) {
+        y.values(x.indices(k)) += a * x.values(k)
+        k += 1
+      }
+    }
+  }
+
+  /**
+   * dot(x, y)
+   */
+  def dot(x: Vector, y: Vector): Double = {
+    require(x.size == y.size)
+    (x, y) match {
+      case (dx: DenseVector, dy: DenseVector) =>
+        dot(dx, dy)
+      case (sx: SparseVector, dy: DenseVector) =>
+        dot(sx, dy)
+      case (dx: DenseVector, sy: SparseVector) =>
+        dot(sy, dx)
+      case (sx: SparseVector, sy: SparseVector) =>
+        dot(sx, sy)
+      case _ =>
+        throw new IllegalArgumentException(s"dot doesn't support (${x.getClass}, ${y.getClass}).")
+    }
+  }
+
+  /**
+   * dot(x, y)
+   */
+  private def dot(x: DenseVector, y: DenseVector): Double = {
+    val n = x.size
+    f2jBLAS.ddot(n, x.values, 1, y.values, 1)
+  }
+
+  /**
+   * dot(x, y)
+   */
+  private def dot(x: SparseVector, y: DenseVector): Double = {
+    val nnz = x.indices.size
+    var sum = 0.0
+    var k = 0
+    while (k < nnz) {
+      sum += x.values(k) * y.values(x.indices(k))
+      k += 1
+    }
+    sum
+  }
+
+  /**
+   * dot(x, y)
+   */
+  private def dot(x: SparseVector, y: SparseVector): Double = {
+    var kx = 0
+    val nnzx = x.indices.size
+    var ky = 0
+    val nnzy = y.indices.size
+    var sum = 0.0
+    // y catching x
+    while (kx < nnzx && ky < nnzy) {
+      val ix = x.indices(kx)
+      while (ky < nnzy && y.indices(ky) < ix) {
+        ky += 1
+      }
+      if (ky < nnzy && y.indices(ky) == ix) {
+        sum += x.values(kx) * y.values(ky)
+        ky += 1
+      }
+      kx += 1
+    }
+    sum
+  }
+
+  /**
+   * y = x
+   */
+  def copy(x: Vector, y: Vector): Unit = {
+    val n = y.size
+    require(x.size == n)
+    y match {
+      case dy: DenseVector =>
+        x match {
+          case sx: SparseVector =>
+            var i = 0
+            var k = 0
+            val nnz = sx.indices.size
+            while (k < nnz) {
+              val j = sx.indices(k)
+              while (i < j) {
+                dy.values(i) = 0.0
+                i += 1
+              }
+              dy.values(i) = sx.values(k)
+              i += 1
+              k += 1
+            }
+            while (i < n) {
+              dy.values(i) = 0.0
+              i += 1
+            }
+          case dx: DenseVector =>
+            Array.copy(dx.values, 0, dy.values, 0, n)
+        }
+      case _ =>
+        throw new IllegalArgumentException(s"y must be dense in copy but got ${y.getClass}")
+    }
+  }
+
+  /**
+   * x = a * x
+   */
+  def scal(a: Double, x: Vector): Unit = {
+    x match {
+      case sx: SparseVector =>
+        f2jBLAS.dscal(sx.values.size, a, sx.values, 1)
+      case dx: DenseVector =>
+        f2jBLAS.dscal(dx.values.size, a, dx.values, 1)
+      case _ =>
+        throw new IllegalArgumentException(s"scal doesn't support vector type ${x.getClass}.")
+    }
+  }
+
+  // For level-3 routines, we use the native BLAS.
+  private def nativeBLAS: NetlibBLAS = {
+    if (_nativeBLAS == null) {
+      _nativeBLAS = NativeBLAS
+    }
+    _nativeBLAS
+  }
+
+  /**
+   * C := alpha * A * B + beta * C
+   * @param transA whether to use the transpose of matrix A (true), or A itself (false).
+   * @param transB whether to use the transpose of matrix B (true), or B itself (false).
+   * @param alpha a scalar to scale the multiplication A * B.
+   * @param A the matrix A that will be left multiplied to B. Size of m x k.
+   * @param B the matrix B that will be left multiplied by A. Size of k x n.
+   * @param beta a scalar that can be used to scale matrix C.
+   * @param C the resulting matrix C. Size of m x n.
+   */
+  def gemm(
+      transA: Boolean,
+      transB: Boolean,
+      alpha: Double,
+      A: Matrix,
+      B: DenseMatrix,
+      beta: Double,
+      C: DenseMatrix): Unit = {
+    if (alpha == 0.0) {
+      logDebug("gemm: alpha is equal to 0. Returning C.")
+    } else {
+      A match {
+        case sparse: SparseMatrix =>
+          gemm(transA, transB, alpha, sparse, B, beta, C)
+        case dense: DenseMatrix =>
+          gemm(transA, transB, alpha, dense, B, beta, C)
+        case _ =>
+          throw new IllegalArgumentException(s"gemm doesn't support matrix type ${A.getClass}.")
+      }
+    }
+  }
+
+  /**
+   * C := alpha * A * B + beta * C
+   *
+   * @param alpha a scalar to scale the multiplication A * B.
+   * @param A the matrix A that will be left multiplied to B. Size of m x k.
+   * @param B the matrix B that will be left multiplied by A. Size of k x n.
+   * @param beta a scalar that can be used to scale matrix C.
+   * @param C the resulting matrix C. Size of m x n.
+   */
+  def gemm(
+      alpha: Double,
+      A: Matrix,
+      B: DenseMatrix,
+      beta: Double,
+      C: DenseMatrix): Unit = {
+    gemm(false, false, alpha, A, B, beta, C)
+  }
+
+  /**
+   * C := alpha * A * B + beta * C
+   * For `DenseMatrix` A.
+   */
+  private def gemm(
+      transA: Boolean,
+      transB: Boolean,
+      alpha: Double,
+      A: DenseMatrix,
+      B: DenseMatrix,
+      beta: Double,
+      C: DenseMatrix): Unit = {
+    val mA: Int = if (!transA) A.numRows else A.numCols
+    val nB: Int = if (!transB) B.numCols else B.numRows
+    val kA: Int = if (!transA) A.numCols else A.numRows
+    val kB: Int = if (!transB) B.numRows else B.numCols
+    val tAstr = if (!transA) "N" else "T"
+    val tBstr = if (!transB) "N" else "T"
+
+    require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB")
+    require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA")
+    require(nB == C.numCols,
+      s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB")
+
+    nativeBLAS.dgemm(tAstr, tBstr, mA, nB, kA, alpha, A.values, A.numRows, B.values, B.numRows,
+      beta, C.values, C.numRows)
+  }
+
+  /**
+   * C := alpha * A * B + beta * C
+   * For `SparseMatrix` A.
+   */
+  private def gemm(
+      transA: Boolean,
+      transB: Boolean,
+      alpha: Double,
+      A: SparseMatrix,
+      B: DenseMatrix,
+      beta: Double,
+      C: DenseMatrix): Unit = {
+    val mA: Int = if (!transA) A.numRows else A.numCols
+    val nB: Int = if (!transB) B.numCols else B.numRows
+    val kA: Int = if (!transA) A.numCols else A.numRows
+    val kB: Int = if (!transB) B.numRows else B.numCols
+
+    require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB")
+    require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA")
+    require(nB == C.numCols,
+      s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB")
+
+    val Avals = A.values
+    val Arows = if (!transA) A.rowIndices else A.colPtrs
+    val Acols = if (!transA) A.colPtrs else A.rowIndices
+
+    // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
+    if (transA){
+      var colCounterForB = 0
+      if (!transB) { // Expensive to put the check inside the loop
+        while (colCounterForB < nB) {
+          var rowCounterForA = 0
+          val Cstart = colCounterForB * mA
+          val Bstart = colCounterForB * kA
+          while (rowCounterForA < mA) {
+            var i = Arows(rowCounterForA)
+            val indEnd = Arows(rowCounterForA + 1)
+            var sum = 0.0
+            while (i < indEnd) {
+              sum += Avals(i) * B.values(Bstart + Acols(i))
+              i += 1
+            }
+            val Cindex = Cstart + rowCounterForA
+            C.values(Cindex) = beta * C.values(Cindex) + sum * alpha
+            rowCounterForA += 1
+          }
+          colCounterForB += 1
+        }
+      } else {
+        while (colCounterForB < nB) {
+          var rowCounter = 0
+          val Cstart = colCounterForB * mA
+          while (rowCounter < mA) {
+            var i = Arows(rowCounter)
+            val indEnd = Arows(rowCounter + 1)
+            var sum = 0.0
+            while (i < indEnd) {
+              sum += Avals(i) * B(colCounterForB, Acols(i))
+              i += 1
+            }
+            val Cindex = Cstart + rowCounter
+            C.values(Cindex) = beta * C.values(Cindex) + sum * alpha
+            rowCounter += 1
+          }
+          colCounterForB += 1
+        }
+      }
+    } else {
+      // Scale matrix first if `beta` is not equal to 0.0
+      if (beta != 0.0){
+        f2jBLAS.dscal(C.values.length, beta, C.values, 1)
+      }
+      // Perform matrix multiplication and add to C. The rows of A are multiplied by the columns of
+      // B, and added to C.
+      var colCounterForB = 0 // the column to be updated in C
+      if (!transB) { // Expensive to put the check inside the loop
+        while (colCounterForB < nB) {
+          var colCounterForA = 0 // The column of A to multiply with the row of B
+          val Bstart = colCounterForB * kB
+          val Cstart = colCounterForB * mA
+          while (colCounterForA < kA) {
+            var i = Acols(colCounterForA)
+            val indEnd = Acols(colCounterForA + 1)
+            val Bval = B.values(Bstart + colCounterForA) * alpha
+            while (i < indEnd){
+              C.values(Cstart + Arows(i)) += Avals(i) * Bval
+              i += 1
+            }
+            colCounterForA += 1
+          }
+          colCounterForB += 1
+        }
+      } else {
+        while (colCounterForB < nB) {
+          var colCounterForA = 0 // The column of A to multiply with the row of B
+          val Cstart = colCounterForB * mA
+          while (colCounterForA < kA){
+            var i = Acols(colCounterForA)
+            val indEnd = Acols(colCounterForA + 1)
+            val Bval = B(colCounterForB, colCounterForA) * alpha
+            while (i < indEnd){
+              C.values(Cstart + Arows(i)) += Avals(i) * Bval
+              i += 1
+            }
+            colCounterForA += 1
+          }
+          colCounterForB += 1
+        }
+      }
+    }
+  }
+
+  /**
+   * y := alpha * A * x + beta * y
+   * @param trans whether to use the transpose of matrix A (true), or A itself (false).
+   * @param alpha a scalar to scale the multiplication A * x.
+   * @param A the matrix A that will be left multiplied to x. Size of m x n.
+   * @param x the vector x that will be left multiplied by A. Size of n x 1.
+   * @param beta a scalar that can be used to scale vector y.
+   * @param y the resulting vector y. Size of m x 1.
+   */
+  def gemv(
+      trans: Boolean,
+      alpha: Double,
+      A: Matrix,
+      x: DenseVector,
+      beta: Double,
+      y: DenseVector): Unit = {
+
+    val mA: Int = if (!trans) A.numRows else A.numCols
+    val nx: Int = x.size
+    val nA: Int = if (!trans) A.numCols else A.numRows
+
+    require(nA == nx, s"The columns of A don't match the number of elements of x. A: $nA, x: $nx")
+    require(mA == y.size,
+      s"The rows of A don't match the number of elements of y. A: $mA, y:${y.size}}")
+    if (alpha == 0.0) {
+      logDebug("gemv: alpha is equal to 0. Returning y.")
+    } else {
+      A match {
+        case sparse: SparseMatrix =>
+          gemv(trans, alpha, sparse, x, beta, y)
+        case dense: DenseMatrix =>
+          gemv(trans, alpha, dense, x, beta, y)
+        case _ =>
+          throw new IllegalArgumentException(s"gemv doesn't support matrix type ${A.getClass}.")
+      }
+    }
+  }
+
+  /**
+   * y := alpha * A * x + beta * y
+   *
+   * @param alpha a scalar to scale the multiplication A * x.
+   * @param A the matrix A that will be left multiplied to x. Size of m x n.
+   * @param x the vector x that will be left multiplied by A. Size of n x 1.
+   * @param beta a scalar that can be used to scale vector y.
+   * @param y the resulting vector y. Size of m x 1.
+   */
+  def gemv(
+      alpha: Double,
+      A: Matrix,
+      x: DenseVector,
+      beta: Double,
+      y: DenseVector): Unit = {
+    gemv(false, alpha, A, x, beta, y)
+  }
+
+  /**
+   * y := alpha * A * x + beta * y
+   * For `DenseMatrix` A.
+   */
+  private def gemv(
+      trans: Boolean,
+      alpha: Double,
+      A: DenseMatrix,
+      x: DenseVector,
+      beta: Double,
+      y: DenseVector): Unit =  {
+    val tStrA = if (!trans) "N" else "T"
+    nativeBLAS.dgemv(tStrA, A.numRows, A.numCols, alpha, A.values, A.numRows, x.values, 1, beta,
+      y.values, 1)
+  }
+
+  /**
+   * y := alpha * A * x + beta * y
+   * For `SparseMatrix` A.
+   */
+  private def gemv(
+      trans: Boolean,
+      alpha: Double,
+      A: SparseMatrix,
+      x: DenseVector,
+      beta: Double,
+      y: DenseVector): Unit =  {
+
+    val mA: Int = if(!trans) A.numRows else A.numCols
+    val nA: Int = if(!trans) A.numCols else A.numRows
+
+    val Avals = A.values
+    val Arows = if (!trans) A.rowIndices else A.colPtrs
+    val Acols = if (!trans) A.colPtrs else A.rowIndices
+
+    // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
+    if (trans){
+      var rowCounter = 0
+      while (rowCounter < mA){
+        var i = Arows(rowCounter)
+        val indEnd = Arows(rowCounter + 1)
+        var sum = 0.0
+        while(i < indEnd){
+          sum += Avals(i) * x.values(Acols(i))
+          i += 1
+        }
+        y.values(rowCounter) =  beta * y.values(rowCounter) + sum * alpha
+        rowCounter += 1
+      }
+    } else {
+      // Scale vector first if `beta` is not equal to 0.0
+      if (beta != 0.0){
+        scal(beta, y)
+      }
+      // Perform matrix-vector multiplication and add to y
+      var colCounterForA = 0
+      while (colCounterForA < nA){
+        var i = Acols(colCounterForA)
+        val indEnd = Acols(colCounterForA + 1)
+        val xVal = x.values(colCounterForA) * alpha
+        while (i < indEnd){
+          val rowIndex = Arows(i)
+          y.values(rowIndex) += Avals(i) * xVal
+          i += 1
+        }
+        colCounterForA += 1
+      }
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
new file mode 100644
index 0000000000000..3515461b52493
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.linalg
+
+import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
+import com.github.fommil.netlib.ARPACK
+import org.netlib.util.{intW, doubleW}
+
+import org.apache.spark.annotation.Experimental
+
+/**
+ * :: Experimental ::
+ * Compute eigen-decomposition.
+ */
+@Experimental
+private[mllib] object EigenValueDecomposition {
+  /**
+   * Compute the leading k eigenvalues and eigenvectors on a symmetric square matrix using ARPACK.
+   * The caller needs to ensure that the input matrix is real symmetric. This function requires
+   * memory for `n*(4*k+4)` doubles.
+   *
+   * @param mul a function that multiplies the symmetric matrix with a DenseVector.
+   * @param n dimension of the square matrix (maximum Int.MaxValue).
+   * @param k number of leading eigenvalues required, 0 < k < n.
+   * @param tol tolerance of the eigs computation.
+   * @param maxIterations the maximum number of Arnoldi update iterations.
+   * @return a dense vector of eigenvalues in descending order and a dense matrix of eigenvectors
+   *         (columns of the matrix).
+   * @note The number of computed eigenvalues might be smaller than k when some Ritz values do not
+   *       satisfy the convergence criterion specified by tol (see ARPACK Users Guide, Chapter 4.6
+   *       for more details). The maximum number of Arnoldi update iterations is set to 300 in this
+   *       function.
+   */
+  private[mllib] def symmetricEigs(
+      mul: BDV[Double] => BDV[Double],
+      n: Int,
+      k: Int,
+      tol: Double,
+      maxIterations: Int): (BDV[Double], BDM[Double]) = {
+    // TODO: remove this function and use eigs in breeze when switching breeze version
+    require(n > k, s"Number of required eigenvalues $k must be smaller than matrix dimension $n")
+
+    val arpack = ARPACK.getInstance()
+
+    // tolerance used in stopping criterion
+    val tolW = new doubleW(tol)
+    // number of desired eigenvalues, 0 < nev < n
+    val nev = new intW(k)
+    // nev Lanczos vectors are generated in the first iteration
+    // ncv-nev Lanczos vectors are generated in each subsequent iteration
+    // ncv must be smaller than n
+    val ncv = math.min(2 * k, n)
+
+    // "I" for standard eigenvalue problem, "G" for generalized eigenvalue problem
+    val bmat = "I"
+    // "LM" : compute the NEV largest (in magnitude) eigenvalues
+    val which = "LM"
+
+    var iparam = new Array[Int](11)
+    // use exact shift in each iteration
+    iparam(0) = 1
+    // maximum number of Arnoldi update iterations, or the actual number of iterations on output
+    iparam(2) = maxIterations
+    // Mode 1: A*x = lambda*x, A symmetric
+    iparam(6) = 1
+
+    var ido = new intW(0)
+    var info = new intW(0)
+    var resid = new Array[Double](n)
+    var v = new Array[Double](n * ncv)
+    var workd = new Array[Double](n * 3)
+    var workl = new Array[Double](ncv * (ncv + 8))
+    var ipntr = new Array[Int](11)
+
+    // call ARPACK's reverse communication, first iteration with ido = 0
+    arpack.dsaupd(ido, bmat, n, which, nev.`val`, tolW, resid, ncv, v, n, iparam, ipntr, workd,
+      workl, workl.length, info)
+
+    val w = BDV(workd)
+
+    // ido = 99 : done flag in reverse communication
+    while (ido.`val` != 99) {
+      if (ido.`val` != -1 && ido.`val` != 1) {
+        throw new IllegalStateException("ARPACK returns ido = " + ido.`val` +
+            " This flag is not compatible with Mode 1: A*x = lambda*x, A symmetric.")
+      }
+      // multiply working vector with the matrix
+      val inputOffset = ipntr(0) - 1
+      val outputOffset = ipntr(1) - 1
+      val x = w.slice(inputOffset, inputOffset + n)
+      val y = w.slice(outputOffset, outputOffset + n)
+      y := mul(x)
+      // call ARPACK's reverse communication
+      arpack.dsaupd(ido, bmat, n, which, nev.`val`, tolW, resid, ncv, v, n, iparam, ipntr,
+        workd, workl, workl.length, info)
+    }
+
+    if (info.`val` != 0) {
+      info.`val` match {
+        case 1 => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` +
+            " Maximum number of iterations taken. (Refer ARPACK user guide for details)")
+        case 2 => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` +
+            " No shifts could be applied. Try to increase NCV. " +
+            "(Refer ARPACK user guide for details)")
+        case _ => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` +
+            " Please refer ARPACK user guide for error message.")
+      }
+    }
+
+    val d = new Array[Double](nev.`val`)
+    val select = new Array[Boolean](ncv)
+    // copy the Ritz vectors
+    val z = java.util.Arrays.copyOfRange(v, 0, nev.`val` * n)
+
+    // call ARPACK's post-processing for eigenvectors
+    arpack.dseupd(true, "A", select, d, z, n, 0.0, bmat, n, which, nev, tol, resid, ncv, v, n,
+      iparam, ipntr, workd, workl, workl.length, info)
+
+    // number of computed eigenvalues, might be smaller than k
+    val computed = iparam(4)
+
+    val eigenPairs = java.util.Arrays.copyOfRange(d, 0, computed).zipWithIndex.map { r =>
+      (r._1, java.util.Arrays.copyOfRange(z, r._2 * n, r._2 * n + n))
+    }
+
+    // sort the eigen-pairs in descending order
+    val sortedEigenPairs = eigenPairs.sortBy(- _._1)
+
+    // copy eigenvectors in descending order of eigenvalues
+    val sortedU = BDM.zeros[Double](n, computed)
+    sortedEigenPairs.zipWithIndex.foreach { r =>
+      val b = r._2 * n
+      var i = 0
+      while (i < n) {
+        sortedU.data(b + i) = r._1._2(i)
+        i += 1
+      }
+    }
+
+    (BDV[Double](sortedEigenPairs.map(_._1)), sortedU)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index b11ba5d30fbd3..2cc52e94282ba 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -17,12 +17,16 @@
 
 package org.apache.spark.mllib.linalg
 
-import breeze.linalg.{Matrix => BM, DenseMatrix => BDM}
+import java.util.Arrays
+
+import breeze.linalg.{Matrix => BM, DenseMatrix => BDM, CSCMatrix => BSM}
+
+import org.apache.spark.util.random.XORShiftRandom
 
 /**
  * Trait for a local matrix.
  */
-trait Matrix extends Serializable {
+sealed trait Matrix extends Serializable {
 
   /** Number of rows. */
   def numRows: Int
@@ -37,13 +41,51 @@ trait Matrix extends Serializable {
   private[mllib] def toBreeze: BM[Double]
 
   /** Gets the (i, j)-th element. */
-  private[mllib] def apply(i: Int, j: Int): Double = toBreeze(i, j)
+  private[mllib] def apply(i: Int, j: Int): Double
+
+  /** Return the index for the (i, j)-th element in the backing array. */
+  private[mllib] def index(i: Int, j: Int): Int
+
+  /** Update element at (i, j) */
+  private[mllib] def update(i: Int, j: Int, v: Double): Unit
+
+  /** Get a deep copy of the matrix. */
+  def copy: Matrix
 
+  /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */
+  def multiply(y: DenseMatrix): DenseMatrix = {
+    val C: DenseMatrix = Matrices.zeros(numRows, y.numCols).asInstanceOf[DenseMatrix]
+    BLAS.gemm(false, false, 1.0, this, y, 0.0, C)
+    C
+  }
+
+  /** Convenience method for `Matrix`-`DenseVector` multiplication. */
+  def multiply(y: DenseVector): DenseVector = {
+    val output = new DenseVector(new Array[Double](numRows))
+    BLAS.gemv(1.0, this, y, 0.0, output)
+    output
+  }
+
+  /** Convenience method for `Matrix`^T^-`DenseMatrix` multiplication. */
+  def transposeMultiply(y: DenseMatrix): DenseMatrix = {
+    val C: DenseMatrix = Matrices.zeros(numCols, y.numCols).asInstanceOf[DenseMatrix]
+    BLAS.gemm(true, false, 1.0, this, y, 0.0, C)
+    C
+  }
+
+  /** Convenience method for `Matrix`^T^-`DenseVector` multiplication. */
+  def transposeMultiply(y: DenseVector): DenseVector = {
+    val output = new DenseVector(new Array[Double](numCols))
+    BLAS.gemv(true, 1.0, this, y, 0.0, output)
+    output
+  }
+
+  /** A human readable representation of the matrix */
   override def toString: String = toBreeze.toString()
 }
 
 /**
- * Column-majored dense matrix.
+ * Column-major dense matrix.
  * The entry values are stored in a single array of doubles with columns listed in sequence.
  * For example, the following matrix
  * {{{
@@ -59,11 +101,104 @@ trait Matrix extends Serializable {
  */
 class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) extends Matrix {
 
-  require(values.length == numRows * numCols)
+  require(values.length == numRows * numCols, "The number of values supplied doesn't match the " +
+    s"size of the matrix! values.length: ${values.length}, numRows * numCols: ${numRows * numCols}")
 
   override def toArray: Array[Double] = values
 
-  private[mllib] override def toBreeze: BM[Double] = new BDM[Double](numRows, numCols, values)
+  override def equals(o: Any) = o match {
+    case m: DenseMatrix =>
+      m.numRows == numRows && m.numCols == numCols && Arrays.equals(toArray, m.toArray)
+    case _ => false
+  }
+
+  private[mllib] def toBreeze: BM[Double] = new BDM[Double](numRows, numCols, values)
+
+  private[mllib] def apply(i: Int): Double = values(i)
+
+  private[mllib] def apply(i: Int, j: Int): Double = values(index(i, j))
+
+  private[mllib] def index(i: Int, j: Int): Int = i + numRows * j
+
+  private[mllib] def update(i: Int, j: Int, v: Double): Unit = {
+    values(index(i, j)) = v
+  }
+
+  override def copy = new DenseMatrix(numRows, numCols, values.clone())
+}
+
+/**
+ * Column-major sparse matrix.
+ * The entry values are stored in Compressed Sparse Column (CSC) format.
+ * For example, the following matrix
+ * {{{
+ *   1.0 0.0 4.0
+ *   0.0 3.0 5.0
+ *   2.0 0.0 6.0
+ * }}}
+ * is stored as `values: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]`,
+ * `rowIndices=[0, 2, 1, 0, 1, 2]`, `colPointers=[0, 2, 3, 6]`.
+ *
+ * @param numRows number of rows
+ * @param numCols number of columns
+ * @param colPtrs the index corresponding to the start of a new column
+ * @param rowIndices the row index of the entry. They must be in strictly increasing order for each
+ *                   column
+ * @param values non-zero matrix entries in column major
+ */
+class SparseMatrix(
+    val numRows: Int,
+    val numCols: Int,
+    val colPtrs: Array[Int],
+    val rowIndices: Array[Int],
+    val values: Array[Double]) extends Matrix {
+
+  require(values.length == rowIndices.length, "The number of row indices and values don't match! " +
+    s"values.length: ${values.length}, rowIndices.length: ${rowIndices.length}")
+  require(colPtrs.length == numCols + 1, "The length of the column indices should be the " +
+    s"number of columns + 1. Currently, colPointers.length: ${colPtrs.length}, " +
+    s"numCols: $numCols")
+
+  override def toArray: Array[Double] = {
+    val arr = new Array[Double](numRows * numCols)
+    var j = 0
+    while (j < numCols) {
+      var i = colPtrs(j)
+      val indEnd = colPtrs(j + 1)
+      val offset = j * numRows
+      while (i < indEnd) {
+        val rowIndex = rowIndices(i)
+        arr(offset + rowIndex) = values(i)
+        i += 1
+      }
+      j += 1
+    }
+    arr
+  }
+
+  private[mllib] def toBreeze: BM[Double] =
+    new BSM[Double](values, numRows, numCols, colPtrs, rowIndices)
+
+  private[mllib] def apply(i: Int, j: Int): Double = {
+    val ind = index(i, j)
+    if (ind < 0) 0.0 else values(ind)
+  }
+
+  private[mllib] def index(i: Int, j: Int): Int = {
+    Arrays.binarySearch(rowIndices, colPtrs(j), colPtrs(j + 1), i)
+  }
+
+  private[mllib] def update(i: Int, j: Int, v: Double): Unit = {
+    val ind = index(i, j)
+    if (ind == -1){
+      throw new NoSuchElementException("The given row and column indices correspond to a zero " +
+        "value. Only non-zero elements in Sparse Matrices can be updated.")
+    } else {
+      values(index(i, j)) = v
+    }
+  }
+
+  override def copy = new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone())
 }
 
 /**
@@ -72,7 +207,7 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double])
 object Matrices {
 
   /**
-   * Creates a column-majored dense matrix.
+   * Creates a column-major dense matrix.
    *
    * @param numRows number of rows
    * @param numCols number of columns
@@ -82,6 +217,24 @@ object Matrices {
     new DenseMatrix(numRows, numCols, values)
   }
 
+  /**
+   * Creates a column-major sparse matrix in Compressed Sparse Column (CSC) format.
+   *
+   * @param numRows number of rows
+   * @param numCols number of columns
+   * @param colPtrs the index corresponding to the start of a new column
+   * @param rowIndices the row index of the entry
+   * @param values non-zero matrix entries in column major
+   */
+  def sparse(
+     numRows: Int,
+     numCols: Int,
+     colPtrs: Array[Int],
+     rowIndices: Array[Int],
+     values: Array[Double]): Matrix = {
+    new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values)
+  }
+
   /**
    * Creates a Matrix instance from a breeze matrix.
    * @param breeze a breeze matrix
@@ -93,9 +246,84 @@ object Matrices {
         require(dm.majorStride == dm.rows,
           "Do not support stride size different from the number of rows.")
         new DenseMatrix(dm.rows, dm.cols, dm.data)
+      case sm: BSM[Double] =>
+        new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data)
       case _ =>
         throw new UnsupportedOperationException(
           s"Do not support conversion from type ${breeze.getClass.getName}.")
     }
   }
+
+  /**
+   * Generate a `DenseMatrix` consisting of zeros.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros
+   */
+  def zeros(numRows: Int, numCols: Int): Matrix =
+    new DenseMatrix(numRows, numCols, new Array[Double](numRows * numCols))
+
+  /**
+   * Generate a `DenseMatrix` consisting of ones.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values of ones
+   */
+  def ones(numRows: Int, numCols: Int): Matrix =
+    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(1.0))
+
+  /**
+   * Generate an Identity Matrix in `DenseMatrix` format.
+   * @param n number of rows and columns of the matrix
+   * @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal
+   */
+  def eye(n: Int): Matrix = {
+    val identity = Matrices.zeros(n, n)
+    var i = 0
+    while (i < n){
+      identity.update(i, i, 1.0)
+      i += 1
+    }
+    identity
+  }
+
+  /**
+   * Generate a `DenseMatrix` consisting of i.i.d. uniform random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
+   */
+  def rand(numRows: Int, numCols: Int): Matrix = {
+    val rand = new XORShiftRandom
+    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(rand.nextDouble()))
+  }
+
+  /**
+   * Generate a `DenseMatrix` consisting of i.i.d. gaussian random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
+   */
+  def randn(numRows: Int, numCols: Int): Matrix = {
+    val rand = new XORShiftRandom
+    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(rand.nextGaussian()))
+  }
+
+  /**
+   * Generate a diagonal matrix in `DenseMatrix` format from the supplied values.
+   * @param vector a `Vector` tat will form the values on the diagonal of the matrix
+   * @return Square `DenseMatrix` with size `values.length` x `values.length` and `values`
+   *         on the diagonal
+   */
+  def diag(vector: Vector): Matrix = {
+    val n = vector.size
+    val matrix = Matrices.eye(n)
+    val values = vector.toArray
+    var i = 0
+    while (i < n) {
+      matrix.update(i, i, values(i))
+      i += 1
+    }
+    matrix
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index c818a0b9c3e43..60ab2aaa8f27a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -17,21 +17,27 @@
 
 package org.apache.spark.mllib.linalg
 
+import java.util
 import java.lang.{Double => JavaDouble, Integer => JavaInteger, Iterable => JavaIterable}
-import java.util.Arrays
 
 import scala.annotation.varargs
 import scala.collection.JavaConverters._
 
 import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
 
-import org.apache.spark.mllib.util.NumericParser
 import org.apache.spark.SparkException
+import org.apache.spark.mllib.util.NumericParser
+import org.apache.spark.sql.catalyst.annotation.SQLUserDefinedType
+import org.apache.spark.sql.catalyst.expressions.{GenericMutableRow, Row}
+import org.apache.spark.sql.catalyst.types._
 
 /**
  * Represents a numeric vector, whose index type is Int and value type is Double.
+ *
+ * Note: Users should not implement this interface.
  */
-trait Vector extends Serializable {
+@SQLUserDefinedType(udt = classOf[VectorUDT])
+sealed trait Vector extends Serializable {
 
   /**
    * Size of the vector.
@@ -46,12 +52,12 @@ trait Vector extends Serializable {
   override def equals(other: Any): Boolean = {
     other match {
       case v: Vector =>
-        Arrays.equals(this.toArray, v.toArray)
+        util.Arrays.equals(this.toArray, v.toArray)
       case _ => false
     }
   }
 
-  override def hashCode(): Int = Arrays.hashCode(this.toArray)
+  override def hashCode(): Int = util.Arrays.hashCode(this.toArray)
 
   /**
    * Converts the instance to a breeze vector.
@@ -62,7 +68,76 @@ trait Vector extends Serializable {
    * Gets the value of the ith element.
    * @param i index
    */
-  private[mllib] def apply(i: Int): Double = toBreeze(i)
+  def apply(i: Int): Double = toBreeze(i)
+
+  /**
+   * Makes a deep copy of this vector.
+   */
+  def copy: Vector = {
+    throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.")
+  }
+}
+
+/**
+ * User-defined type for [[Vector]] which allows easy interaction with SQL
+ * via [[org.apache.spark.sql.SchemaRDD]].
+ */
+private[spark] class VectorUDT extends UserDefinedType[Vector] {
+
+  override def sqlType: StructType = {
+    // type: 0 = sparse, 1 = dense
+    // We only use "values" for dense vectors, and "size", "indices", and "values" for sparse
+    // vectors. The "values" field is nullable because we might want to add binary vectors later,
+    // which uses "size" and "indices", but not "values".
+    StructType(Seq(
+      StructField("type", ByteType, nullable = false),
+      StructField("size", IntegerType, nullable = true),
+      StructField("indices", ArrayType(IntegerType, containsNull = false), nullable = true),
+      StructField("values", ArrayType(DoubleType, containsNull = false), nullable = true)))
+  }
+
+  override def serialize(obj: Any): Row = {
+    val row = new GenericMutableRow(4)
+    obj match {
+      case sv: SparseVector =>
+        row.setByte(0, 0)
+        row.setInt(1, sv.size)
+        row.update(2, sv.indices.toSeq)
+        row.update(3, sv.values.toSeq)
+      case dv: DenseVector =>
+        row.setByte(0, 1)
+        row.setNullAt(1)
+        row.setNullAt(2)
+        row.update(3, dv.values.toSeq)
+    }
+    row
+  }
+
+  override def deserialize(datum: Any): Vector = {
+    datum match {
+      // TODO: something wrong with UDT serialization
+      case v: Vector =>
+        v
+      case row: Row =>
+        require(row.length == 4,
+          s"VectorUDT.deserialize given row with length ${row.length} but requires length == 4")
+        val tpe = row.getByte(0)
+        tpe match {
+          case 0 =>
+            val size = row.getInt(1)
+            val indices = row.getAs[Iterable[Int]](2).toArray
+            val values = row.getAs[Iterable[Double]](3).toArray
+            new SparseVector(size, indices, values)
+          case 1 =>
+            val values = row.getAs[Iterable[Double]](3).toArray
+            new DenseVector(values)
+        }
+    }
+  }
+
+  override def pyUDT: String = "pyspark.mllib.linalg.VectorUDT"
+
+  override def userClass: Class[Vector] = classOf[Vector]
 }
 
 /**
@@ -127,6 +202,16 @@ object Vectors {
     }.toSeq)
   }
 
+  /**
+   * Creates a dense vector of all zeros.
+   *
+   * @param size vector size
+   * @return a zero vector
+   */
+  def zeros(size: Int): Vector = {
+    new DenseVector(new Array[Double](size))
+  }
+
   /**
    * Parses a string resulted from `Vector#toString` into
    * an [[org.apache.spark.mllib.linalg.Vector]].
@@ -142,7 +227,7 @@ object Vectors {
       case Seq(size: Double, indices: Array[Double], values: Array[Double]) =>
         Vectors.sparse(size.toInt, indices.map(_.toInt), values)
       case other =>
-       throw new SparkException(s"Cannot parse $other.")
+        throw new SparkException(s"Cannot parse $other.")
     }
   }
 
@@ -152,7 +237,7 @@ object Vectors {
   private[mllib] def fromBreeze(breezeVector: BV[Double]): Vector = {
     breezeVector match {
       case v: BDV[Double] =>
-        if (v.offset == 0 && v.stride == 1) {
+        if (v.offset == 0 && v.stride == 1 && v.length == v.data.length) {
           new DenseVector(v.data)
         } else {
           new DenseVector(v.toArray)  // Can't use underlying array directly, so make a new one
@@ -172,6 +257,7 @@ object Vectors {
 /**
  * A dense vector represented by a value array.
  */
+@SQLUserDefinedType(udt = classOf[VectorUDT])
 class DenseVector(val values: Array[Double]) extends Vector {
 
   override def size: Int = values.length
@@ -183,6 +269,10 @@ class DenseVector(val values: Array[Double]) extends Vector {
   private[mllib] override def toBreeze: BV[Double] = new BDV[Double](values)
 
   override def apply(i: Int) = values(i)
+
+  override def copy: DenseVector = {
+    new DenseVector(values.clone())
+  }
 }
 
 /**
@@ -192,6 +282,7 @@ class DenseVector(val values: Array[Double]) extends Vector {
  * @param indices index array, assume to be strictly increasing.
  * @param values value array, must have the same length as the index array.
  */
+@SQLUserDefinedType(udt = classOf[VectorUDT])
 class SparseVector(
     override val size: Int,
     val indices: Array[Int],
@@ -213,5 +304,9 @@ class SparseVector(
     data
   }
 
+  override def copy: SparseVector = {
+    new SparseVector(size, indices.clone(), values.clone())
+  }
+
   private[mllib] override def toBreeze: BV[Double] = new BSV[Double](indices, values, size)
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 132b3af72d9ce..5c1acca0ec532 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -76,16 +76,12 @@ class IndexedRowMatrix(
   }
 
   /**
-   * Computes the singular value decomposition of this matrix.
+   * Computes the singular value decomposition of this IndexedRowMatrix.
    * Denote this matrix by A (m x n), this will compute matrices U, S, V such that A = U * S * V'.
    *
-   * There is no restriction on m, but we require `n^2` doubles to fit in memory.
-   * Further, n should be less than m.
-
-   * The decomposition is computed by first computing A'A = V S^2 V',
-   * computing svd locally on that (since n x n is small), from which we recover S and V.
-   * Then we compute U via easy matrix multiplication as U =  A * (V * S^-1).
-   * Note that this approach requires `O(n^3)` time on the master node.
+   * The cost and implementation of this method is identical to that in
+   * [[org.apache.spark.mllib.linalg.distributed.RowMatrix]]
+   * With the addition of indices.
    *
    * At most k largest non-zero singular values and associated vectors are returned.
    * If there are k such values, then the dimensions of the return will be:
@@ -130,7 +126,7 @@ class IndexedRowMatrix(
     val indexedRows = rows.map(_.index).zip(mat.rows).map { case (i, v) =>
       IndexedRow(i, v)
     }
-    new IndexedRowMatrix(indexedRows, nRows, nCols)
+    new IndexedRowMatrix(indexedRows, nRows, B.numCols)
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 695e03b736baf..10a515af88802 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -17,148 +17,24 @@
 
 package org.apache.spark.mllib.linalg.distributed
 
-import java.util
+import java.util.Arrays
 
-import breeze.linalg.{Vector => BV, DenseMatrix => BDM, DenseVector => BDV, svd => brzSvd}
+import scala.collection.mutable.ListBuffer
+
+import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, SparseVector => BSV, axpy => brzAxpy,
+  svd => brzSvd}
 import breeze.numerics.{sqrt => brzSqrt}
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
+import org.apache.spark.Logging
+import org.apache.spark.SparkContext._
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.rdd.RDDFunctions._
+import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, MultivariateStatisticalSummary}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.Logging
-import org.apache.spark.mllib.stat.MultivariateStatisticalSummary
-
-/**
- * Column statistics aggregator implementing
- * [[org.apache.spark.mllib.stat.MultivariateStatisticalSummary]]
- * together with add() and merge() function.
- * A numerically stable algorithm is implemented to compute sample mean and variance:
-  *[[http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance variance-wiki]].
- * Zero elements (including explicit zero values) are skipped when calling add() and merge(),
- * to have time complexity O(nnz) instead of O(n) for each column.
- */
-private class ColumnStatisticsAggregator(private val n: Int)
-    extends MultivariateStatisticalSummary with Serializable {
-
-  private val currMean: BDV[Double] = BDV.zeros[Double](n)
-  private val currM2n: BDV[Double] = BDV.zeros[Double](n)
-  private var totalCnt = 0.0
-  private val nnz: BDV[Double] = BDV.zeros[Double](n)
-  private val currMax: BDV[Double] = BDV.fill(n)(Double.MinValue)
-  private val currMin: BDV[Double] = BDV.fill(n)(Double.MaxValue)
-
-  override def mean: Vector = {
-    val realMean = BDV.zeros[Double](n)
-    var i = 0
-    while (i < n) {
-      realMean(i) = currMean(i) * nnz(i) / totalCnt
-      i += 1
-    }
-    Vectors.fromBreeze(realMean)
-  }
-
-  override def variance: Vector = {
-    val realVariance = BDV.zeros[Double](n)
-
-    val denominator = totalCnt - 1.0
-
-    // Sample variance is computed, if the denominator is less than 0, the variance is just 0.
-    if (denominator > 0.0) {
-      val deltaMean = currMean
-      var i = 0
-      while (i < currM2n.size) {
-        realVariance(i) =
-          currM2n(i) + deltaMean(i) * deltaMean(i) * nnz(i) * (totalCnt - nnz(i)) / totalCnt
-        realVariance(i) /= denominator
-        i += 1
-      }
-    }
-
-    Vectors.fromBreeze(realVariance)
-  }
-
-  override def count: Long = totalCnt.toLong
-
-  override def numNonzeros: Vector = Vectors.fromBreeze(nnz)
-
-  override def max: Vector = {
-    var i = 0
-    while (i < n) {
-      if ((nnz(i) < totalCnt) && (currMax(i) < 0.0)) currMax(i) = 0.0
-      i += 1
-    }
-    Vectors.fromBreeze(currMax)
-  }
-
-  override def min: Vector = {
-    var i = 0
-    while (i < n) {
-      if ((nnz(i) < totalCnt) && (currMin(i) > 0.0)) currMin(i) = 0.0
-      i += 1
-    }
-    Vectors.fromBreeze(currMin)
-  }
-
-  /**
-   * Aggregates a row.
-   */
-  def add(currData: BV[Double]): this.type = {
-    currData.activeIterator.foreach {
-      case (_, 0.0) => // Skip explicit zero elements.
-      case (i, value) =>
-        if (currMax(i) < value) {
-          currMax(i) = value
-        }
-        if (currMin(i) > value) {
-          currMin(i) = value
-        }
-
-        val tmpPrevMean = currMean(i)
-        currMean(i) = (currMean(i) * nnz(i) + value) / (nnz(i) + 1.0)
-        currM2n(i) += (value - currMean(i)) * (value - tmpPrevMean)
-
-        nnz(i) += 1.0
-    }
-
-    totalCnt += 1.0
-    this
-  }
-
-  /**
-   * Merges another aggregator.
-   */
-  def merge(other: ColumnStatisticsAggregator): this.type = {
-    require(n == other.n, s"Dimensions mismatch. Expecting $n but got ${other.n}.")
-
-    totalCnt += other.totalCnt
-    val deltaMean = currMean - other.currMean
-
-    var i = 0
-    while (i < n) {
-      // merge mean together
-      if (other.currMean(i) != 0.0) {
-        currMean(i) = (currMean(i) * nnz(i) + other.currMean(i) * other.nnz(i)) /
-          (nnz(i) + other.nnz(i))
-      }
-      // merge m2n together
-      if (nnz(i) + other.nnz(i) != 0.0) {
-        currM2n(i) += other.currM2n(i) + deltaMean(i) * deltaMean(i) * nnz(i) * other.nnz(i) /
-          (nnz(i) + other.nnz(i))
-      }
-      if (currMax(i) < other.currMax(i)) {
-        currMax(i) = other.currMax(i)
-      }
-      if (currMin(i) > other.currMin(i)) {
-        currMin(i) = other.currMin(i)
-      }
-      i += 1
-    }
-
-    nnz += other.nnz
-    this
-  }
-}
+import org.apache.spark.util.random.XORShiftRandom
+import org.apache.spark.storage.StorageLevel
 
 /**
  * :: Experimental ::
@@ -182,8 +58,14 @@ class RowMatrix(
   /** Gets or computes the number of columns. */
   override def numCols(): Long = {
     if (nCols <= 0) {
-      // Calling `first` will throw an exception if `rows` is empty.
-      nCols = rows.first().size
+      try {
+        // Calling `first` will throw an exception if `rows` is empty.
+        nCols = rows.first().size
+      } catch {
+        case err: UnsupportedOperationException =>
+          sys.error("Cannot determine the number of cols because it is not specified in the " +
+            "constructor and the rows RDD is empty.")
+      }
     }
     nCols
   }
@@ -200,70 +82,195 @@ class RowMatrix(
     nRows
   }
 
+  /**
+   * Multiplies the Gramian matrix `A^T A` by a dense vector on the right without computing `A^T A`.
+   *
+   * @param v a dense vector whose length must match the number of columns of this matrix
+   * @return a dense vector representing the product
+   */
+  private[mllib] def multiplyGramianMatrixBy(v: BDV[Double]): BDV[Double] = {
+    val n = numCols().toInt
+    val vbr = rows.context.broadcast(v)
+    rows.treeAggregate(BDV.zeros[Double](n))(
+      seqOp = (U, r) => {
+        val rBrz = r.toBreeze
+        val a = rBrz.dot(vbr.value)
+        rBrz match {
+          // use specialized axpy for better performance
+          case _: BDV[_] => brzAxpy(a, rBrz.asInstanceOf[BDV[Double]], U)
+          case _: BSV[_] => brzAxpy(a, rBrz.asInstanceOf[BSV[Double]], U)
+          case _ => throw new UnsupportedOperationException(
+            s"Do not support vector operation from type ${rBrz.getClass.getName}.")
+        }
+        U
+      }, combOp = (U1, U2) => U1 += U2)
+  }
+
   /**
    * Computes the Gramian matrix `A^T A`.
    */
   def computeGramianMatrix(): Matrix = {
     val n = numCols().toInt
-    val nt: Int = n * (n + 1) / 2
+    checkNumColumns(n)
+    // Computes n*(n+1)/2, avoiding overflow in the multiplication.
+    // This succeeds when n <= 65535, which is checked above
+    val nt: Int = if (n % 2 == 0) ((n / 2) * (n + 1)) else (n * ((n + 1) / 2))
 
     // Compute the upper triangular part of the gram matrix.
-    val GU = rows.aggregate(new BDV[Double](new Array[Double](nt)))(
+    val GU = rows.treeAggregate(new BDV[Double](new Array[Double](nt)))(
       seqOp = (U, v) => {
         RowMatrix.dspr(1.0, v, U.data)
         U
-      },
-      combOp = (U1, U2) => U1 += U2
-    )
+      }, combOp = (U1, U2) => U1 += U2)
 
     RowMatrix.triuToFull(n, GU.data)
   }
 
+  private def checkNumColumns(cols: Int): Unit = {
+    if (cols > 65535) {
+      throw new IllegalArgumentException(s"Argument with more than 65535 cols: $cols")
+    }
+    if (cols > 10000) {
+      val mem = cols * cols * 8
+      logWarning(s"$cols columns will require at least $mem bytes of memory!")
+    }
+  }
+
   /**
-   * Computes the singular value decomposition of this matrix.
-   * Denote this matrix by A (m x n), this will compute matrices U, S, V such that A = U * S * V'.
+   * Computes singular value decomposition of this matrix. Denote this matrix by A (m x n). This
+   * will compute matrices U, S, V such that A ~= U * S * V', where S contains the leading k
+   * singular values, U and V contain the corresponding singular vectors.
    *
-   * There is no restriction on m, but we require `n^2` doubles to fit in memory.
-   * Further, n should be less than m.
-
-   * The decomposition is computed by first computing A'A = V S^2 V',
-   * computing svd locally on that (since n x n is small), from which we recover S and V.
-   * Then we compute U via easy matrix multiplication as U =  A * (V * S^-1).
-   * Note that this approach requires `O(n^3)` time on the master node.
+   * At most k largest non-zero singular values and associated vectors are returned. If there are k
+   * such values, then the dimensions of the return will be:
+   *  - U is a RowMatrix of size m x k that satisfies U' * U = eye(k),
+   *  - s is a Vector of size k, holding the singular values in descending order,
+   *  - V is a Matrix of size n x k that satisfies V' * V = eye(k).
+   *
+   * We assume n is smaller than m. The singular values and the right singular vectors are derived
+   * from the eigenvalues and the eigenvectors of the Gramian matrix A' * A. U, the matrix
+   * storing the right singular vectors, is computed via matrix multiplication as
+   * U = A * (V * S^-1^), if requested by user. The actual method to use is determined
+   * automatically based on the cost:
+   *  - If n is small (n &lt; 100) or k is large compared with n (k > n / 2), we compute the Gramian
+   *    matrix first and then compute its top eigenvalues and eigenvectors locally on the driver.
+   *    This requires a single pass with O(n^2^) storage on each executor and on the driver, and
+   *    O(n^2^ k) time on the driver.
+   *  - Otherwise, we compute (A' * A) * v in a distributive way and send it to ARPACK's DSAUPD to
+   *    compute (A' * A)'s top eigenvalues and eigenvectors on the driver node. This requires O(k)
+   *    passes, O(n) storage on each executor, and O(n k) storage on the driver.
    *
-   * At most k largest non-zero singular values and associated vectors are returned.
-   * If there are k such values, then the dimensions of the return will be:
+   * Several internal parameters are set to default values. The reciprocal condition number rCond
+   * is set to 1e-9. All singular values smaller than rCond * sigma(0) are treated as zeros, where
+   * sigma(0) is the largest singular value. The maximum number of Arnoldi update iterations for
+   * ARPACK is set to 300 or k * 3, whichever is larger. The numerical tolerance for ARPACK's
+   * eigen-decomposition is set to 1e-10.
    *
-   * U is a RowMatrix of size m x k that satisfies U'U = eye(k),
-   * s is a Vector of size k, holding the singular values in descending order,
-   * and V is a Matrix of size n x k that satisfies V'V = eye(k).
+   * @note The conditions that decide which method to use internally and the default parameters are
+   *       subject to change.
    *
-   * @param k number of singular values to keep. We might return less than k if there are
-   *          numerically zero singular values. See rCond.
+   * @param k number of leading singular values to keep (0 &lt; k &lt;= n).
+   *          It might return less than k if
+   *          there are numerically zero singular values or there are not enough Ritz values
+   *          converged before the maximum number of Arnoldi update iterations is reached (in case
+   *          that matrix A is ill-conditioned).
    * @param computeU whether to compute U
    * @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0)
    *              are treated as zero, where sigma(0) is the largest singular value.
-   * @return SingularValueDecomposition(U, s, V)
+   * @return SingularValueDecomposition(U, s, V). U = null if computeU = false.
    */
   def computeSVD(
       k: Int,
       computeU: Boolean = false,
       rCond: Double = 1e-9): SingularValueDecomposition[RowMatrix, Matrix] = {
+    // maximum number of Arnoldi update iterations for invoking ARPACK
+    val maxIter = math.max(300, k * 3)
+    // numerical tolerance for invoking ARPACK
+    val tol = 1e-10
+    computeSVD(k, computeU, rCond, maxIter, tol, "auto")
+  }
+
+  /**
+   * The actual SVD implementation, visible for testing.
+   *
+   * @param k number of leading singular values to keep (0 &lt; k &lt;= n)
+   * @param computeU whether to compute U
+   * @param rCond the reciprocal condition number
+   * @param maxIter max number of iterations (if ARPACK is used)
+   * @param tol termination tolerance (if ARPACK is used)
+   * @param mode computation mode (auto: determine automatically which mode to use,
+   *             local-svd: compute gram matrix and computes its full SVD locally,
+   *             local-eigs: compute gram matrix and computes its top eigenvalues locally,
+   *             dist-eigs: compute the top eigenvalues of the gram matrix distributively)
+   * @return SingularValueDecomposition(U, s, V). U = null if computeU = false.
+   */
+  private[mllib] def computeSVD(
+      k: Int,
+      computeU: Boolean,
+      rCond: Double,
+      maxIter: Int,
+      tol: Double,
+      mode: String): SingularValueDecomposition[RowMatrix, Matrix] = {
     val n = numCols().toInt
-    require(k > 0 && k <= n, s"Request up to n singular values k=$k n=$n.")
+    require(k > 0 && k <= n, s"Request up to n singular values but got k=$k and n=$n.")
+
+    object SVDMode extends Enumeration {
+      val LocalARPACK, LocalLAPACK, DistARPACK = Value
+    }
+
+    val computeMode = mode match {
+      case "auto" =>
+        // TODO: The conditions below are not fully tested.
+        if (n < 100 || k > n / 2) {
+          // If n is small or k is large compared with n, we better compute the Gramian matrix first
+          // and then compute its eigenvalues locally, instead of making multiple passes.
+          if (k < n / 3) {
+            SVDMode.LocalARPACK
+          } else {
+            SVDMode.LocalLAPACK
+          }
+        } else {
+          // If k is small compared with n, we use ARPACK with distributed multiplication.
+          SVDMode.DistARPACK
+        }
+      case "local-svd" => SVDMode.LocalLAPACK
+      case "local-eigs" => SVDMode.LocalARPACK
+      case "dist-eigs" => SVDMode.DistARPACK
+      case _ => throw new IllegalArgumentException(s"Do not support mode $mode.")
+    }
 
-    val G = computeGramianMatrix()
+    // Compute the eigen-decomposition of A' * A.
+    val (sigmaSquares: BDV[Double], u: BDM[Double]) = computeMode match {
+      case SVDMode.LocalARPACK =>
+        require(k < n, s"k must be smaller than n in local-eigs mode but got k=$k and n=$n.")
+        val G = computeGramianMatrix().toBreeze.asInstanceOf[BDM[Double]]
+        EigenValueDecomposition.symmetricEigs(v => G * v, n, k, tol, maxIter)
+      case SVDMode.LocalLAPACK =>
+        val G = computeGramianMatrix().toBreeze.asInstanceOf[BDM[Double]]
+        val brzSvd.SVD(uFull: BDM[Double], sigmaSquaresFull: BDV[Double], _) = brzSvd(G)
+        (sigmaSquaresFull, uFull)
+      case SVDMode.DistARPACK =>
+        if (rows.getStorageLevel == StorageLevel.NONE) {
+          logWarning("The input data is not directly cached, which may hurt performance if its"
+            + " parent RDDs are also uncached.")
+        }
+        require(k < n, s"k must be smaller than n in dist-eigs mode but got k=$k and n=$n.")
+        EigenValueDecomposition.symmetricEigs(multiplyGramianMatrixBy, n, k, tol, maxIter)
+    }
 
-    // TODO: Use sparse SVD instead.
-    val (u: BDM[Double], sigmaSquares: BDV[Double], v: BDM[Double]) =
-      brzSvd(G.toBreeze.asInstanceOf[BDM[Double]])
     val sigmas: BDV[Double] = brzSqrt(sigmaSquares)
 
-    // Determine effective rank.
+    // Determine the effective rank.
     val sigma0 = sigmas(0)
     val threshold = rCond * sigma0
     var i = 0
-    while (i < k && sigmas(i) >= threshold) {
+    // sigmas might have a length smaller than k, if some Ritz values do not satisfy the convergence
+    // criterion specified by tol after max number of iterations.
+    // Thus use i < min(k, sigmas.length) instead of i < k.
+    if (sigmas.length < k) {
+      logWarning(s"Requested $k singular values but only found ${sigmas.length} converged.")
+    }
+    while (i < math.min(k, sigmas.length) && sigmas(i) >= threshold) {
       i += 1
     }
     val sk = i
@@ -272,12 +279,18 @@ class RowMatrix(
       logWarning(s"Requested $k singular values but only found $sk nonzeros.")
     }
 
-    val s = Vectors.dense(util.Arrays.copyOfRange(sigmas.data, 0, sk))
-    val V = Matrices.dense(n, sk, util.Arrays.copyOfRange(u.data, 0, n * sk))
+    // Warn at the end of the run as well, for increased visibility.
+    if (computeMode == SVDMode.DistARPACK && rows.getStorageLevel == StorageLevel.NONE) {
+      logWarning("The input data was not directly cached, which may hurt performance if its"
+        + " parent RDDs are also uncached.")
+    }
+
+    val s = Vectors.dense(Arrays.copyOfRange(sigmas.data, 0, sk))
+    val V = Matrices.dense(n, sk, Arrays.copyOfRange(u.data, 0, n * sk))
 
     if (computeU) {
       // N = Vk * Sk^{-1}
-      val N = new BDM[Double](n, sk, util.Arrays.copyOfRange(u.data, 0, n * sk))
+      val N = new BDM[Double](n, sk, Arrays.copyOfRange(u.data, 0, n * sk))
       var i = 0
       var j = 0
       while (j < sk) {
@@ -302,18 +315,18 @@ class RowMatrix(
    */
   def computeCovariance(): Matrix = {
     val n = numCols().toInt
+    checkNumColumns(n)
 
-    if (n > 10000) {
-      val mem = n * n * java.lang.Double.SIZE / java.lang.Byte.SIZE
-      logWarning(s"The number of columns $n is greater than 10000! " +
-        s"We need at least $mem bytes of memory.")
-    }
-
-    val (m, mean) = rows.aggregate[(Long, BDV[Double])]((0L, BDV.zeros[Double](n)))(
+    val (m, mean) = rows.treeAggregate[(Long, BDV[Double])]((0L, BDV.zeros[Double](n)))(
       seqOp = (s: (Long, BDV[Double]), v: Vector) => (s._1 + 1L, s._2 += v.toBreeze),
-      combOp = (s1: (Long, BDV[Double]), s2: (Long, BDV[Double])) => (s1._1 + s2._1, s1._2 += s2._2)
+      combOp = (s1: (Long, BDV[Double]), s2: (Long, BDV[Double])) =>
+        (s1._1 + s2._1, s1._2 += s2._2)
     )
 
+    if (m <= 1) {
+      sys.error(s"RowMatrix.computeCovariance called on matrix with only $m rows." +
+        "  Cannot compute the covariance of a RowMatrix with <= 1 row.")
+    }
     updateNumRows(m)
 
     mean :/= m.toDouble
@@ -359,12 +372,12 @@ class RowMatrix(
 
     val Cov = computeCovariance().toBreeze.asInstanceOf[BDM[Double]]
 
-    val (u: BDM[Double], _, _) = brzSvd(Cov)
+    val brzSvd.SVD(u: BDM[Double], _, _) = brzSvd(Cov)
 
     if (k == n) {
       Matrices.dense(n, k, u.data)
     } else {
-      Matrices.dense(n, k, util.Arrays.copyOfRange(u.data, 0, n * k))
+      Matrices.dense(n, k, Arrays.copyOfRange(u.data, 0, n * k))
     }
   }
 
@@ -372,11 +385,9 @@ class RowMatrix(
    * Computes column-wise summary statistics.
    */
   def computeColumnSummaryStatistics(): MultivariateStatisticalSummary = {
-    val zeroValue = new ColumnStatisticsAggregator(numCols().toInt)
-    val summary = rows.map(_.toBreeze).aggregate[ColumnStatisticsAggregator](zeroValue)(
+    val summary = rows.treeAggregate(new MultivariateOnlineSummarizer)(
       (aggregator, data) => aggregator.add(data),
-      (aggregator1, aggregator2) => aggregator1.merge(aggregator2)
-    )
+      (aggregator1, aggregator2) => aggregator1.merge(aggregator2))
     updateNumRows(summary.count)
     summary
   }
@@ -390,20 +401,188 @@ class RowMatrix(
    */
   def multiply(B: Matrix): RowMatrix = {
     val n = numCols().toInt
+    val k = B.numCols
     require(n == B.numRows, s"Dimension mismatch: $n vs ${B.numRows}")
 
     require(B.isInstanceOf[DenseMatrix],
       s"Only support dense matrix at this time but found ${B.getClass.getName}.")
 
-    val Bb = rows.context.broadcast(B)
-    val AB = rows.mapPartitions({ iter =>
-      val Bi = Bb.value.toBreeze.asInstanceOf[BDM[Double]]
-      iter.map(v => Vectors.fromBreeze(Bi.t * v.toBreeze))
-    }, preservesPartitioning = true)
+    val Bb = rows.context.broadcast(B.toBreeze.asInstanceOf[BDM[Double]].toDenseVector.toArray)
+    val AB = rows.mapPartitions { iter =>
+      val Bi = Bb.value
+      iter.map { row =>
+        val v = BDV.zeros[Double](k)
+        var i = 0
+        while (i < k) {
+          v(i) = row.toBreeze.dot(new BDV(Bi, i * n, 1, n))
+          i += 1
+        }
+        Vectors.fromBreeze(v)
+      }
+    }
 
     new RowMatrix(AB, nRows, B.numCols)
   }
 
+  /**
+   * Compute all cosine similarities between columns of this matrix using the brute-force
+   * approach of computing normalized dot products.
+   *
+   * @return An n x n sparse upper-triangular matrix of cosine similarities between
+   *         columns of this matrix.
+   */
+  def columnSimilarities(): CoordinateMatrix = {
+    columnSimilarities(0.0)
+  }
+
+  /**
+   * Compute similarities between columns of this matrix using a sampling approach.
+   *
+   * The threshold parameter is a trade-off knob between estimate quality and computational cost.
+   *
+   * Setting a threshold of 0 guarantees deterministic correct results, but comes at exactly
+   * the same cost as the brute-force approach. Setting the threshold to positive values
+   * incurs strictly less computational cost than the brute-force approach, however the
+   * similarities computed will be estimates.
+   *
+   * The sampling guarantees relative-error correctness for those pairs of columns that have
+   * similarity greater than the given similarity threshold.
+   *
+   * To describe the guarantee, we set some notation:
+   * Let A be the smallest in magnitude non-zero element of this matrix.
+   * Let B be the largest  in magnitude non-zero element of this matrix.
+   * Let L be the maximum number of non-zeros per row.
+   *
+   * For example, for {0,1} matrices: A=B=1.
+   * Another example, for the Netflix matrix: A=1, B=5
+   *
+   * For those column pairs that are above the threshold,
+   * the computed similarity is correct to within 20% relative error with probability
+   * at least 1 - (0.981)^10/B^
+   *
+   * The shuffle size is bounded by the *smaller* of the following two expressions:
+   *
+   * O(n log(n) L / (threshold * A))
+   * O(m L^2^)
+   *
+   * The latter is the cost of the brute-force approach, so for non-zero thresholds,
+   * the cost is always cheaper than the brute-force approach.
+   *
+   * @param threshold Set to 0 for deterministic guaranteed correctness.
+   *                  Similarities above this threshold are estimated
+   *                  with the cost vs estimate quality trade-off described above.
+   * @return An n x n sparse upper-triangular matrix of cosine similarities
+   *         between columns of this matrix.
+   */
+  def columnSimilarities(threshold: Double): CoordinateMatrix = {
+    require(threshold >= 0, s"Threshold cannot be negative: $threshold")
+
+    if (threshold > 1) {
+      logWarning(s"Threshold is greater than 1: $threshold " +
+      "Computation will be more efficient with promoted sparsity, " +
+      " however there is no correctness guarantee.")
+    }
+
+    val gamma = if (threshold < 1e-6) {
+      Double.PositiveInfinity
+    } else {
+      10 * math.log(numCols()) / threshold
+    }
+
+    columnSimilaritiesDIMSUM(computeColumnSummaryStatistics().normL2.toArray, gamma)
+  }
+
+  /**
+   * Find all similar columns using the DIMSUM sampling algorithm, described in two papers
+   *
+   * http://arxiv.org/abs/1206.2082
+   * http://arxiv.org/abs/1304.1467
+   *
+   * @param colMags A vector of column magnitudes
+   * @param gamma The oversampling parameter. For provable results, set to 10 * log(n) / s,
+   *              where s is the smallest similarity score to be estimated,
+   *              and n is the number of columns
+   * @return An n x n sparse upper-triangular matrix of cosine similarities
+   *         between columns of this matrix.
+   */
+  private[mllib] def columnSimilaritiesDIMSUM(
+      colMags: Array[Double],
+      gamma: Double): CoordinateMatrix = {
+    require(gamma > 1.0, s"Oversampling should be greater than 1: $gamma")
+    require(colMags.size == this.numCols(), "Number of magnitudes didn't match column dimension")
+    val sg = math.sqrt(gamma) // sqrt(gamma) used many times
+
+    // Don't divide by zero for those columns with zero magnitude
+    val colMagsCorrected = colMags.map(x => if (x == 0) 1.0 else x)
+
+    val sc = rows.context
+    val pBV = sc.broadcast(colMagsCorrected.map(c => sg / c))
+    val qBV = sc.broadcast(colMagsCorrected.map(c => math.min(sg, c)))
+
+    val sims = rows.mapPartitionsWithIndex { (indx, iter) =>
+      val p = pBV.value
+      val q = qBV.value
+
+      val rand = new XORShiftRandom(indx)
+      val scaled = new Array[Double](p.size)
+      iter.flatMap { row =>
+        val buf = new ListBuffer[((Int, Int), Double)]()
+        row match {
+          case sv: SparseVector =>
+            val nnz = sv.indices.size
+            var k = 0
+            while (k < nnz) {
+              scaled(k) = sv.values(k) / q(sv.indices(k))
+              k += 1
+            }
+            k = 0
+            while (k < nnz) {
+              val i = sv.indices(k)
+              val iVal = scaled(k)
+              if (iVal != 0 && rand.nextDouble() < p(i)) {
+                var l = k + 1
+                while (l < nnz) {
+                  val j = sv.indices(l)
+                  val jVal = scaled(l)
+                  if (jVal != 0 && rand.nextDouble() < p(j)) {
+                    buf += (((i, j), iVal * jVal))
+                  }
+                  l += 1
+                }
+              }
+              k += 1
+            }
+          case dv: DenseVector =>
+            val n = dv.values.size
+            var i = 0
+            while (i < n) {
+              scaled(i) = dv.values(i) / q(i)
+              i += 1
+            }
+            i = 0
+            while (i < n) {
+              val iVal = scaled(i)
+              if (iVal != 0 && rand.nextDouble() < p(i)) {
+                var j = i + 1
+                while (j < n) {
+                  val jVal = scaled(j)
+                  if (jVal != 0 && rand.nextDouble() < p(j)) {
+                    buf += (((i, j), iVal * jVal))
+                  }
+                  j += 1
+                }
+              }
+              i += 1
+            }
+        }
+        buf
+      }
+    }.reduceByKey(_ + _).map { case ((i, j), sim) =>
+      MatrixEntry(i.toLong, j.toLong, sim)
+    }
+    new CoordinateMatrix(sims, numCols(), numCols())
+  }
+
   private[mllib] override def toBreeze(): BDM[Double] = {
     val m = numRows().toInt
     val n = numCols().toInt
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index 679842f831c2a..45dbf6044fcc5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.mllib.optimization
 
-import breeze.linalg.{axpy => brzAxpy}
-
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.mllib.linalg.{Vectors, Vector}
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.linalg.BLAS.{axpy, dot, scal}
 
 /**
  * :: DeveloperApi ::
@@ -61,19 +60,18 @@ abstract class Gradient extends Serializable {
 @DeveloperApi
 class LogisticGradient extends Gradient {
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    val brzData = data.toBreeze
-    val brzWeights = weights.toBreeze
-    val margin: Double = -1.0 * brzWeights.dot(brzData)
+    val margin = -1.0 * dot(data, weights)
     val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label
-    val gradient = brzData * gradientMultiplier
+    val gradient = data.copy
+    scal(gradientMultiplier, gradient)
     val loss =
       if (label > 0) {
-        math.log(1 + math.exp(margin))
+        math.log1p(math.exp(margin)) // log1p is log(1+p) but more accurate for small p
       } else {
-        math.log(1 + math.exp(margin)) - margin
+        math.log1p(math.exp(margin)) - margin
       }
 
-    (Vectors.fromBreeze(gradient), loss)
+    (gradient, loss)
   }
 
   override def compute(
@@ -81,17 +79,13 @@ class LogisticGradient extends Gradient {
       label: Double,
       weights: Vector,
       cumGradient: Vector): Double = {
-    val brzData = data.toBreeze
-    val brzWeights = weights.toBreeze
-    val margin: Double = -1.0 * brzWeights.dot(brzData)
+    val margin = -1.0 * dot(data, weights)
     val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label
-
-    brzAxpy(gradientMultiplier, brzData, cumGradient.toBreeze)
-
+    axpy(gradientMultiplier, data, cumGradient)
     if (label > 0) {
-      math.log(1 + math.exp(margin))
+      math.log1p(math.exp(margin))
     } else {
-      math.log(1 + math.exp(margin)) - margin
+      math.log1p(math.exp(margin)) - margin
     }
   }
 }
@@ -106,13 +100,11 @@ class LogisticGradient extends Gradient {
 @DeveloperApi
 class LeastSquaresGradient extends Gradient {
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    val brzData = data.toBreeze
-    val brzWeights = weights.toBreeze
-    val diff = brzWeights.dot(brzData) - label
+    val diff = dot(data, weights) - label
     val loss = diff * diff
-    val gradient = brzData * (2.0 * diff)
-
-    (Vectors.fromBreeze(gradient), loss)
+    val gradient = data.copy
+    scal(2.0 * diff, gradient)
+    (gradient, loss)
   }
 
   override def compute(
@@ -120,12 +112,8 @@ class LeastSquaresGradient extends Gradient {
       label: Double,
       weights: Vector,
       cumGradient: Vector): Double = {
-    val brzData = data.toBreeze
-    val brzWeights = weights.toBreeze
-    val diff = brzWeights.dot(brzData) - label
-
-    brzAxpy(2.0 * diff, brzData, cumGradient.toBreeze)
-
+    val diff = dot(data, weights) - label
+    axpy(2.0 * diff, data, cumGradient)
     diff * diff
   }
 }
@@ -139,18 +127,16 @@ class LeastSquaresGradient extends Gradient {
 @DeveloperApi
 class HingeGradient extends Gradient {
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    val brzData = data.toBreeze
-    val brzWeights = weights.toBreeze
-    val dotProduct = brzWeights.dot(brzData)
-
-    // Our loss function with {0, 1} labels is max(0, 1 - (2y – 1) (f_w(x)))
+    val dotProduct = dot(data, weights)
+    // Our loss function with {0, 1} labels is max(0, 1 - (2y - 1) (f_w(x)))
     // Therefore the gradient is -(2y - 1)*x
     val labelScaled = 2 * label - 1.0
-
     if (1.0 > labelScaled * dotProduct) {
-      (Vectors.fromBreeze(brzData * (-labelScaled)), 1.0 - labelScaled * dotProduct)
+      val gradient = data.copy
+      scal(-labelScaled, gradient)
+      (gradient, 1.0 - labelScaled * dotProduct)
     } else {
-      (Vectors.dense(new Array[Double](weights.size)), 0.0)
+      (Vectors.sparse(weights.size, Array.empty, Array.empty), 0.0)
     }
   }
 
@@ -159,16 +145,12 @@ class HingeGradient extends Gradient {
       label: Double,
       weights: Vector,
       cumGradient: Vector): Double = {
-    val brzData = data.toBreeze
-    val brzWeights = weights.toBreeze
-    val dotProduct = brzWeights.dot(brzData)
-
-    // Our loss function with {0, 1} labels is max(0, 1 - (2y – 1) (f_w(x)))
+    val dotProduct = dot(data, weights)
+    // Our loss function with {0, 1} labels is max(0, 1 - (2y - 1) (f_w(x)))
     // Therefore the gradient is -(2y - 1)*x
     val labelScaled = 2 * label - 1.0
-
     if (1.0 > labelScaled * dotProduct) {
-      brzAxpy(-labelScaled, brzData, cumGradient.toBreeze)
+      axpy(-labelScaled, data, cumGradient)
       1.0 - labelScaled * dotProduct
     } else {
       0.0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index 7030eeabe400a..a6912056395d7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -25,6 +25,7 @@ import org.apache.spark.annotation.{Experimental, DeveloperApi}
 import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
+import org.apache.spark.mllib.rdd.RDDFunctions._
 
 /**
  * Class used to solve an optimization problem using Gradient Descent.
@@ -161,8 +162,17 @@ object GradientDescent extends Logging {
     val numExamples = data.count()
     val miniBatchSize = numExamples * miniBatchFraction
 
+    // if no data, return initial weights to avoid NaNs
+    if (numExamples == 0) {
+
+      logInfo("GradientDescent.runMiniBatchSGD returning initial weights, no data found")
+      return (initialWeights, stochasticLossHistory.toArray)
+
+    }
+
     // Initialize weights as a column vector
     var weights = Vectors.dense(initialWeights.toArray)
+    val n = weights.size
 
     /**
      * For the first iteration, the regVal will be initialized as sum of weight squares
@@ -172,12 +182,13 @@ object GradientDescent extends Logging {
       weights, Vectors.dense(new Array[Double](weights.size)), 0, 1, regParam)._2
 
     for (i <- 1 to numIterations) {
+      val bcWeights = data.context.broadcast(weights)
       // Sample a subset (fraction miniBatchFraction) of the total data
       // compute and sum up the subgradients on this subset (this is one map-reduce)
       val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42 + i)
-        .aggregate((BDV.zeros[Double](weights.size), 0.0))(
+        .treeAggregate((BDV.zeros[Double](n), 0.0))(
           seqOp = (c, v) => (c, v) match { case ((grad, loss), (label, features)) =>
-            val l = gradient.compute(features, label, weights, Vectors.fromBreeze(grad))
+            val l = gradient.compute(features, label, bcWeights.value, Vectors.fromBreeze(grad))
             (grad, loss + l)
           },
           combOp = (c1, c2) => (c1, c2) match { case ((grad1, loss1), (grad2, loss2)) =>
@@ -199,5 +210,6 @@ object GradientDescent extends Logging {
       stochasticLossHistory.takeRight(10).mkString(", ")))
 
     (weights, stochasticLossHistory.toArray)
+
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index 7bbed9c8fdbef..d16d0daf08565 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -19,13 +19,15 @@ package org.apache.spark.mllib.optimization
 
 import scala.collection.mutable.ArrayBuffer
 
-import breeze.linalg.{DenseVector => BDV, axpy}
+import breeze.linalg.{DenseVector => BDV}
 import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS}
 
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.linalg.BLAS.axpy
+import org.apache.spark.mllib.rdd.RDDFunctions._
 import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
  * :: DeveloperApi ::
@@ -67,8 +69,17 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater)
 
   /**
    * Set the maximal number of iterations for L-BFGS. Default 100.
+   * @deprecated use [[LBFGS#setNumIterations]] instead
    */
+  @deprecated("use setNumIterations instead", "1.1.0")
   def setMaxNumIterations(iters: Int): this.type = {
+    this.setNumIterations(iters)
+  }
+
+  /**
+   * Set the maximal number of iterations for L-BFGS. Default 100.
+   */
+  def setNumIterations(iters: Int): this.type = {
     this.maxNumIterations = iters
     this
   }
@@ -191,30 +202,29 @@ object LBFGS extends Logging {
     regParam: Double,
     numExamples: Long) extends DiffFunction[BDV[Double]] {
 
-    private var i = 0
-
-    override def calculate(weights: BDV[Double]) = {
+    override def calculate(weights: BDV[Double]): (Double, BDV[Double]) = {
       // Have a local copy to avoid the serialization of CostFun object which is not serializable.
-      val localData = data
+      val w = Vectors.fromBreeze(weights)
+      val n = w.size
+      val bcW = data.context.broadcast(w)
       val localGradient = gradient
 
-      val (gradientSum, lossSum) = localData.aggregate((BDV.zeros[Double](weights.size), 0.0))(
+      val (gradientSum, lossSum) = data.treeAggregate((Vectors.zeros(n), 0.0))(
           seqOp = (c, v) => (c, v) match { case ((grad, loss), (label, features)) =>
             val l = localGradient.compute(
-              features, label, Vectors.fromBreeze(weights), Vectors.fromBreeze(grad))
+              features, label, bcW.value, grad)
             (grad, loss + l)
           },
           combOp = (c1, c2) => (c1, c2) match { case ((grad1, loss1), (grad2, loss2)) =>
-            (grad1 += grad2, loss1 + loss2)
+            axpy(1.0, grad2, grad1)
+            (grad1, loss1 + loss2)
           })
 
       /**
        * regVal is sum of weight squares if it's L2 updater;
        * for other updater, the same logic is followed.
        */
-      val regVal = updater.compute(
-        Vectors.fromBreeze(weights),
-        Vectors.dense(new Array[Double](weights.size)), 0, 1, regParam)._2
+      val regVal = updater.compute(w, Vectors.zeros(n), 0, 1, regParam)._2
 
       val loss = lossSum / numExamples + regVal
       /**
@@ -234,17 +244,13 @@ object LBFGS extends Logging {
        */
       // The following gradientTotal is actually the regularization part of gradient.
       // Will add the gradientSum computed from the data with weights in the next step.
-      val gradientTotal = weights - updater.compute(
-        Vectors.fromBreeze(weights),
-        Vectors.dense(new Array[Double](weights.size)), 1, 1, regParam)._1.toBreeze
+      val gradientTotal = w.copy
+      axpy(-1.0, updater.compute(w, Vectors.zeros(n), 1, 1, regParam)._1, gradientTotal)
 
       // gradientTotal = gradientSum / numExamples + gradientTotal
       axpy(1.0 / numExamples, gradientSum, gradientTotal)
 
-      i += 1
-
-      (loss, gradientTotal)
+      (loss, gradientTotal.toBreeze.asInstanceOf[BDV[Double]])
     }
   }
-
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
index e4b436b023794..fef062e02b6ec 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
@@ -79,7 +79,7 @@ private[mllib] object NNLS {
     // stopping condition
     def stop(step: Double, ndir: Double, nx: Double): Boolean = {
         ((step.isNaN) // NaN
-      || (step < 1e-6) // too small or negative
+      || (step < 1e-7) // too small or negative
       || (step > 1e40) // too small; almost certainly numerical problems
       || (ndir < 1e-12 * nx) // gradient relatively too small
       || (ndir < 1e-32) // gradient absolutely too small; numerical issues may lurk
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
new file mode 100644
index 0000000000000..51f9b8657c640
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.random
+
+import org.apache.commons.math3.distribution.PoissonDistribution
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.util.random.{XORShiftRandom, Pseudorandom}
+
+/**
+ * :: DeveloperApi ::
+ * Trait for random data generators that generate i.i.d. data.
+ */
+@DeveloperApi
+trait RandomDataGenerator[T] extends Pseudorandom with Serializable {
+
+  /**
+   * Returns an i.i.d. sample as a generic type from an underlying distribution.
+   */
+  def nextValue(): T
+
+  /**
+   * Returns a copy of the RandomDataGenerator with a new instance of the rng object used in the
+   * class when applicable for non-locking concurrent usage.
+   */
+  def copy(): RandomDataGenerator[T]
+}
+
+/**
+ * :: DeveloperApi ::
+ * Generates i.i.d. samples from U[0.0, 1.0]
+ */
+@DeveloperApi
+class UniformGenerator extends RandomDataGenerator[Double] {
+
+  // XORShiftRandom for better performance. Thread safety isn't necessary here.
+  private val random = new XORShiftRandom()
+
+  override def nextValue(): Double = {
+    random.nextDouble()
+  }
+
+  override def setSeed(seed: Long) = random.setSeed(seed)
+
+  override def copy(): UniformGenerator = new UniformGenerator()
+}
+
+/**
+ * :: DeveloperApi ::
+ * Generates i.i.d. samples from the standard normal distribution.
+ */
+@DeveloperApi
+class StandardNormalGenerator extends RandomDataGenerator[Double] {
+
+  // XORShiftRandom for better performance. Thread safety isn't necessary here.
+  private val random = new XORShiftRandom()
+
+  override def nextValue(): Double = {
+      random.nextGaussian()
+  }
+
+  override def setSeed(seed: Long) = random.setSeed(seed)
+
+  override def copy(): StandardNormalGenerator = new StandardNormalGenerator()
+}
+
+/**
+ * :: DeveloperApi ::
+ * Generates i.i.d. samples from the Poisson distribution with the given mean.
+ *
+ * @param mean mean for the Poisson distribution.
+ */
+@DeveloperApi
+class PoissonGenerator(val mean: Double) extends RandomDataGenerator[Double] {
+
+  private var rng = new PoissonDistribution(mean)
+
+  override def nextValue(): Double = rng.sample()
+
+  override def setSeed(seed: Long) {
+    rng = new PoissonDistribution(mean)
+    rng.reseedRandomGenerator(seed)
+  }
+
+  override def copy(): PoissonGenerator = new PoissonGenerator(mean)
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
new file mode 100644
index 0000000000000..c5f4b084321f7
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
@@ -0,0 +1,399 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.random
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.SparkContext
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.api.java.{JavaDoubleRDD, JavaRDD, JavaSparkContext}
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.rdd.{RandomRDD, RandomVectorRDD}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
+/**
+ * :: Experimental ::
+ * Generator methods for creating RDDs comprised of i.i.d. samples from some distribution.
+ */
+@Experimental
+object RandomRDDs {
+
+  /**
+   * Generates an RDD comprised of i.i.d. samples from the uniform distribution `U(0.0, 1.0)`.
+   *
+   * To transform the distribution in the generated RDD from `U(0.0, 1.0)` to `U(a, b)`, use
+   * `RandomRDDs.uniformRDD(sc, n, p, seed).map(v => a + (b - a) * v)`.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param size Size of the RDD.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Double] comprised of i.i.d. samples ~ `U(0.0, 1.0)`.
+   */
+  def uniformRDD(
+      sc: SparkContext,
+      size: Long,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Double] = {
+    val uniform = new UniformGenerator()
+    randomRDD(sc, uniform,  size, numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#uniformRDD]].
+   */
+  def uniformJavaRDD(
+      jsc: JavaSparkContext,
+      size: Long,
+      numPartitions: Int,
+      seed: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(uniformRDD(jsc.sc, size, numPartitions, seed))
+  }
+
+  /**
+   * [[RandomRDDs#uniformJavaRDD]] with the default seed.
+   */
+  def uniformJavaRDD(jsc: JavaSparkContext, size: Long, numPartitions: Int): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(uniformRDD(jsc.sc, size, numPartitions))
+  }
+
+  /**
+   * [[RandomRDDs#uniformJavaRDD]] with the default number of partitions and the default seed.
+   */
+  def uniformJavaRDD(jsc: JavaSparkContext, size: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(uniformRDD(jsc.sc, size))
+  }
+
+  /**
+   * Generates an RDD comprised of i.i.d. samples from the standard normal distribution.
+   *
+   * To transform the distribution in the generated RDD from standard normal to some other normal
+   * `N(mean, sigma^2^)`, use `RandomRDDs.normalRDD(sc, n, p, seed).map(v => mean + sigma * v)`.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param size Size of the RDD.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Double] comprised of i.i.d. samples ~ N(0.0, 1.0).
+   */
+  def normalRDD(
+      sc: SparkContext,
+      size: Long,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Double] = {
+    val normal = new StandardNormalGenerator()
+    randomRDD(sc, normal, size, numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#normalRDD]].
+   */
+  def normalJavaRDD(
+      jsc: JavaSparkContext,
+      size: Long,
+      numPartitions: Int,
+      seed: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(normalRDD(jsc.sc, size, numPartitions, seed))
+  }
+
+  /**
+   * [[RandomRDDs#normalJavaRDD]] with the default seed.
+   */
+  def normalJavaRDD(jsc: JavaSparkContext, size: Long, numPartitions: Int): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(normalRDD(jsc.sc, size, numPartitions))
+  }
+
+  /**
+   * [[RandomRDDs#normalJavaRDD]] with the default number of partitions and the default seed.
+   */
+  def normalJavaRDD(jsc: JavaSparkContext, size: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(normalRDD(jsc.sc, size))
+  }
+
+  /**
+   * Generates an RDD comprised of i.i.d. samples from the Poisson distribution with the input mean.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param mean Mean, or lambda, for the Poisson distribution.
+   * @param size Size of the RDD.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Double] comprised of i.i.d. samples ~ Pois(mean).
+   */
+  def poissonRDD(
+      sc: SparkContext,
+      mean: Double,
+      size: Long,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Double] = {
+    val poisson = new PoissonGenerator(mean)
+    randomRDD(sc, poisson, size, numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#poissonRDD]].
+   */
+  def poissonJavaRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      size: Long,
+      numPartitions: Int,
+      seed: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(poissonRDD(jsc.sc, mean, size, numPartitions, seed))
+  }
+
+  /**
+   * [[RandomRDDs#poissonJavaRDD]] with the default seed.
+   */
+  def poissonJavaRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      size: Long,
+      numPartitions: Int): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(poissonRDD(jsc.sc, mean, size, numPartitions))
+  }
+
+  /**
+   * [[RandomRDDs#poissonJavaRDD]] with the default number of partitions and the default seed.
+   */
+  def poissonJavaRDD(jsc: JavaSparkContext, mean: Double, size: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(poissonRDD(jsc.sc, mean, size))
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Generates an RDD comprised of i.i.d. samples produced by the input RandomDataGenerator.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param generator RandomDataGenerator used to populate the RDD.
+   * @param size Size of the RDD.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Double] comprised of i.i.d. samples produced by generator.
+   */
+  @DeveloperApi
+  def randomRDD[T: ClassTag](
+      sc: SparkContext,
+      generator: RandomDataGenerator[T],
+      size: Long,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[T] = {
+    new RandomRDD[T](sc, size, numPartitionsOrDefault(sc, numPartitions), generator, seed)
+  }
+
+  // TODO Generate RDD[Vector] from multivariate distributions.
+
+  /**
+   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
+   * uniform distribution on `U(0.0, 1.0)`.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param numRows Number of Vectors in the RDD.
+   * @param numCols Number of elements in each Vector.
+   * @param numPartitions Number of partitions in the RDD.
+   * @param seed Seed for the RNG that generates the seed for the generator in each partition.
+   * @return RDD[Vector] with vectors containing i.i.d samples ~ `U(0.0, 1.0)`.
+   */
+  def uniformVectorRDD(
+      sc: SparkContext,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Vector] = {
+    val uniform = new UniformGenerator()
+    randomVectorRDD(sc, uniform, numRows, numCols, numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#uniformVectorRDD]].
+   */
+  def uniformJavaVectorRDD(
+      jsc: JavaSparkContext,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int,
+      seed: Long): JavaRDD[Vector] = {
+    uniformVectorRDD(jsc.sc, numRows, numCols, numPartitions, seed).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#uniformJavaVectorRDD]] with the default seed.
+   */
+  def uniformJavaVectorRDD(
+      jsc: JavaSparkContext,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int): JavaRDD[Vector] = {
+    uniformVectorRDD(jsc.sc, numRows, numCols, numPartitions).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#uniformJavaVectorRDD]] with the default number of partitions and the default seed.
+   */
+  def uniformJavaVectorRDD(
+      jsc: JavaSparkContext,
+      numRows: Long,
+      numCols: Int): JavaRDD[Vector] = {
+    uniformVectorRDD(jsc.sc, numRows, numCols).toJavaRDD()
+  }
+
+  /**
+   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
+   * standard normal distribution.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param numRows Number of Vectors in the RDD.
+   * @param numCols Number of elements in each Vector.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Vector] with vectors containing i.i.d. samples ~ `N(0.0, 1.0)`.
+   */
+  def normalVectorRDD(
+      sc: SparkContext,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Vector] = {
+    val normal = new StandardNormalGenerator()
+    randomVectorRDD(sc, normal, numRows, numCols, numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#normalVectorRDD]].
+   */
+  def normalJavaVectorRDD(
+      jsc: JavaSparkContext,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int,
+      seed: Long): JavaRDD[Vector] = {
+    normalVectorRDD(jsc.sc, numRows, numCols, numPartitions, seed).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#normalJavaVectorRDD]] with the default seed.
+   */
+  def normalJavaVectorRDD(
+      jsc: JavaSparkContext,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int): JavaRDD[Vector] = {
+    normalVectorRDD(jsc.sc, numRows, numCols, numPartitions).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#normalJavaVectorRDD]] with the default number of partitions and the default seed.
+   */
+  def normalJavaVectorRDD(
+      jsc: JavaSparkContext,
+      numRows: Long,
+      numCols: Int): JavaRDD[Vector] = {
+    normalVectorRDD(jsc.sc, numRows, numCols).toJavaRDD()
+  }
+
+  /**
+   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
+   * Poisson distribution with the input mean.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param mean Mean, or lambda, for the Poisson distribution.
+   * @param numRows Number of Vectors in the RDD.
+   * @param numCols Number of elements in each Vector.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`)
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Vector] with vectors containing i.i.d. samples ~ Pois(mean).
+   */
+  def poissonVectorRDD(
+      sc: SparkContext,
+      mean: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Vector] = {
+    val poisson = new PoissonGenerator(mean)
+    randomVectorRDD(sc, poisson, numRows, numCols, numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#poissonVectorRDD]].
+   */
+  def poissonJavaVectorRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int,
+      seed: Long): JavaRDD[Vector] = {
+    poissonVectorRDD(jsc.sc, mean, numRows, numCols, numPartitions, seed).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#poissonJavaVectorRDD]] with the default seed.
+   */
+  def poissonJavaVectorRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int): JavaRDD[Vector] = {
+    poissonVectorRDD(jsc.sc, mean, numRows, numCols, numPartitions).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#poissonJavaVectorRDD]] with the default number of partitions and the default seed.
+   */
+  def poissonJavaVectorRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      numRows: Long,
+      numCols: Int): JavaRDD[Vector] = {
+    poissonVectorRDD(jsc.sc, mean, numRows, numCols).toJavaRDD()
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Generates an RDD[Vector] with vectors containing i.i.d. samples produced by the
+   * input RandomDataGenerator.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param generator RandomDataGenerator used to populate the RDD.
+   * @param numRows Number of Vectors in the RDD.
+   * @param numCols Number of elements in each Vector.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Vector] with vectors containing i.i.d. samples produced by generator.
+   */
+  @DeveloperApi
+  def randomVectorRDD(sc: SparkContext,
+      generator: RandomDataGenerator[Double],
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Vector] = {
+    new RandomVectorRDD(
+      sc, numRows, numCols, numPartitionsOrDefault(sc, numPartitions), generator, seed)
+  }
+
+  /**
+   * Returns `numPartitions` if it is positive, or `sc.defaultParallelism` otherwise.
+   */
+  private def numPartitionsOrDefault(sc: SparkContext, numPartitions: Int): Int = {
+    if (numPartitions > 0) numPartitions else sc.defaultMinPartitions
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
index 365b5e75d7f75..57c0768084e41 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
@@ -20,13 +20,17 @@ package org.apache.spark.mllib.rdd
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.HashPartitioner
+import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
 
 /**
  * Machine learning specific RDD functions.
  */
-private[mllib]
-class RDDFunctions[T: ClassTag](self: RDD[T]) {
+@DeveloperApi
+class RDDFunctions[T: ClassTag](self: RDD[T]) extends Serializable {
 
   /**
    * Returns a RDD from grouping items of its parent RDD in fixed size blocks by passing a sliding
@@ -36,17 +40,80 @@ class RDDFunctions[T: ClassTag](self: RDD[T]) {
    * trigger a Spark job if the parent RDD has more than one partitions and the window size is
    * greater than 1.
    */
-  def sliding(windowSize: Int): RDD[Seq[T]] = {
+  def sliding(windowSize: Int): RDD[Array[T]] = {
     require(windowSize > 0, s"Sliding window size must be positive, but got $windowSize.")
     if (windowSize == 1) {
-      self.map(Seq(_))
+      self.map(Array(_))
     } else {
       new SlidingRDD[T](self, windowSize)
     }
   }
+
+  /**
+   * Reduces the elements of this RDD in a multi-level tree pattern.
+   *
+   * @param depth suggested depth of the tree (default: 2)
+   * @see [[org.apache.spark.rdd.RDD#reduce]]
+   */
+  def treeReduce(f: (T, T) => T, depth: Int = 2): T = {
+    require(depth >= 1, s"Depth must be greater than or equal to 1 but got $depth.")
+    val cleanF = self.context.clean(f)
+    val reducePartition: Iterator[T] => Option[T] = iter => {
+      if (iter.hasNext) {
+        Some(iter.reduceLeft(cleanF))
+      } else {
+        None
+      }
+    }
+    val partiallyReduced = self.mapPartitions(it => Iterator(reducePartition(it)))
+    val op: (Option[T], Option[T]) => Option[T] = (c, x) => {
+      if (c.isDefined && x.isDefined) {
+        Some(cleanF(c.get, x.get))
+      } else if (c.isDefined) {
+        c
+      } else if (x.isDefined) {
+        x
+      } else {
+        None
+      }
+    }
+    RDDFunctions.fromRDD(partiallyReduced).treeAggregate(Option.empty[T])(op, op, depth)
+      .getOrElse(throw new UnsupportedOperationException("empty collection"))
+  }
+
+  /**
+   * Aggregates the elements of this RDD in a multi-level tree pattern.
+   *
+   * @param depth suggested depth of the tree (default: 2)
+   * @see [[org.apache.spark.rdd.RDD#aggregate]]
+   */
+  def treeAggregate[U: ClassTag](zeroValue: U)(
+      seqOp: (U, T) => U,
+      combOp: (U, U) => U,
+      depth: Int = 2): U = {
+    require(depth >= 1, s"Depth must be greater than or equal to 1 but got $depth.")
+    if (self.partitions.size == 0) {
+      return Utils.clone(zeroValue, self.context.env.closureSerializer.newInstance())
+    }
+    val cleanSeqOp = self.context.clean(seqOp)
+    val cleanCombOp = self.context.clean(combOp)
+    val aggregatePartition = (it: Iterator[T]) => it.aggregate(zeroValue)(cleanSeqOp, cleanCombOp)
+    var partiallyAggregated = self.mapPartitions(it => Iterator(aggregatePartition(it)))
+    var numPartitions = partiallyAggregated.partitions.size
+    val scale = math.max(math.ceil(math.pow(numPartitions, 1.0 / depth)).toInt, 2)
+    // If creating an extra level doesn't help reduce the wall-clock time, we stop tree aggregation.
+    while (numPartitions > scale + numPartitions / scale) {
+      numPartitions /= scale
+      val curNumPartitions = numPartitions
+      partiallyAggregated = partiallyAggregated.mapPartitionsWithIndex { (i, iter) =>
+        iter.map((i % curNumPartitions, _))
+      }.reduceByKey(new HashPartitioner(curNumPartitions), cleanCombOp).values
+    }
+    partiallyAggregated.reduce(cleanCombOp)
+  }
 }
 
-private[mllib]
+@DeveloperApi
 object RDDFunctions {
 
   /** Implicit conversion from an RDD to RDDFunctions. */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RandomRDD.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RandomRDD.scala
new file mode 100644
index 0000000000000..910eff9540a47
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RandomRDD.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.rdd
+
+import org.apache.spark.{Partition, SparkContext, TaskContext}
+import org.apache.spark.mllib.linalg.{DenseVector, Vector}
+import org.apache.spark.mllib.random.RandomDataGenerator
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
+import scala.reflect.ClassTag
+import scala.util.Random
+
+private[mllib] class RandomRDDPartition[T](override val index: Int,
+    val size: Int,
+    val generator: RandomDataGenerator[T],
+    val seed: Long) extends Partition {
+
+  require(size >= 0, "Non-negative partition size required.")
+}
+
+// These two classes are necessary since Range objects in Scala cannot have size > Int.MaxValue
+private[mllib] class RandomRDD[T: ClassTag](@transient sc: SparkContext,
+    size: Long,
+    numPartitions: Int,
+    @transient rng: RandomDataGenerator[T],
+    @transient seed: Long = Utils.random.nextLong) extends RDD[T](sc, Nil) {
+
+  require(size > 0, "Positive RDD size required.")
+  require(numPartitions > 0, "Positive number of partitions required")
+  require(math.ceil(size.toDouble / numPartitions) <= Int.MaxValue,
+    "Partition size cannot exceed Int.MaxValue")
+
+  override def compute(splitIn: Partition, context: TaskContext): Iterator[T] = {
+    val split = splitIn.asInstanceOf[RandomRDDPartition[T]]
+    RandomRDD.getPointIterator[T](split)
+  }
+
+  override def getPartitions: Array[Partition] = {
+    RandomRDD.getPartitions(size, numPartitions, rng, seed)
+  }
+}
+
+private[mllib] class RandomVectorRDD(@transient sc: SparkContext,
+    size: Long,
+    vectorSize: Int,
+    numPartitions: Int,
+    @transient rng: RandomDataGenerator[Double],
+    @transient seed: Long = Utils.random.nextLong) extends RDD[Vector](sc, Nil) {
+
+  require(size > 0, "Positive RDD size required.")
+  require(numPartitions > 0, "Positive number of partitions required")
+  require(vectorSize > 0, "Positive vector size required.")
+  require(math.ceil(size.toDouble / numPartitions) <= Int.MaxValue,
+    "Partition size cannot exceed Int.MaxValue")
+
+  override def compute(splitIn: Partition, context: TaskContext): Iterator[Vector] = {
+    val split = splitIn.asInstanceOf[RandomRDDPartition[Double]]
+    RandomRDD.getVectorIterator(split, vectorSize)
+  }
+
+  override protected def getPartitions: Array[Partition] = {
+    RandomRDD.getPartitions(size, numPartitions, rng, seed)
+  }
+}
+
+private[mllib] object RandomRDD {
+
+  def getPartitions[T](size: Long,
+      numPartitions: Int,
+      rng: RandomDataGenerator[T],
+      seed: Long): Array[Partition] = {
+
+    val partitions = new Array[RandomRDDPartition[T]](numPartitions)
+    var i = 0
+    var start: Long = 0
+    var end: Long = 0
+    val random = new Random(seed)
+    while (i < numPartitions) {
+      end = ((i + 1) * size) / numPartitions
+      partitions(i) = new RandomRDDPartition(i, (end - start).toInt, rng, random.nextLong())
+      start = end
+      i += 1
+    }
+    partitions.asInstanceOf[Array[Partition]]
+  }
+
+  // The RNG has to be reset every time the iterator is requested to guarantee same data
+  // every time the content of the RDD is examined.
+  def getPointIterator[T: ClassTag](partition: RandomRDDPartition[T]): Iterator[T] = {
+    val generator = partition.generator.copy()
+    generator.setSeed(partition.seed)
+    Iterator.fill(partition.size)(generator.nextValue())
+  }
+
+  // The RNG has to be reset every time the iterator is requested to guarantee same data
+  // every time the content of the RDD is examined.
+  def getVectorIterator(
+      partition: RandomRDDPartition[Double],
+      vectorSize: Int): Iterator[Vector] = {
+    val generator = partition.generator.copy()
+    generator.setSeed(partition.seed)
+    Iterator.fill(partition.size)(new DenseVector(Array.fill(vectorSize)(generator.nextValue())))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
index dd80782c0f001..35e81fcb3de0d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
@@ -45,15 +45,16 @@ class SlidingRDDPartition[T](val idx: Int, val prev: Partition, val tail: Seq[T]
  */
 private[mllib]
 class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int)
-  extends RDD[Seq[T]](parent) {
+  extends RDD[Array[T]](parent) {
 
   require(windowSize > 1, s"Window size must be greater than 1, but got $windowSize.")
 
-  override def compute(split: Partition, context: TaskContext): Iterator[Seq[T]] = {
+  override def compute(split: Partition, context: TaskContext): Iterator[Array[T]] = {
     val part = split.asInstanceOf[SlidingRDDPartition[T]]
     (firstParent[T].iterator(part.prev, context) ++ part.tail)
       .sliding(windowSize)
       .withPartial(false)
+      .map(_.toArray)
   }
 
   override def getPreferredLocations(split: Partition): Seq[String] =
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index cc56fd6ef28d6..038edc3521f14 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -17,29 +17,31 @@
 
 package org.apache.spark.mllib.recommendation
 
-import scala.collection.mutable.{ArrayBuffer, BitSet}
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
 import scala.math.{abs, sqrt}
-import scala.util.Random
-import scala.util.Sorting
+import scala.util.{Random, Sorting}
 import scala.util.hashing.byteswap32
 
 import org.jblas.{DoubleMatrix, SimpleBlas, Solve}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.{HashPartitioner, Logging, Partitioner}
+import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.{Logging, HashPartitioner, Partitioner}
-import org.apache.spark.storage.StorageLevel
+import org.apache.spark.mllib.optimization.NNLS
 import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkContext._
+import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
-import org.apache.spark.mllib.optimization.NNLS
 
 /**
  * Out-link information for a user or product block. This includes the original user/product IDs
  * of the elements within this block, and the list of destination blocks that each user or
  * product will need to send its feature vector to.
  */
-private[recommendation] case class OutLinkBlock(elementIds: Array[Int], shouldSend: Array[BitSet])
+private[recommendation]
+case class OutLinkBlock(elementIds: Array[Int], shouldSend: Array[mutable.BitSet])
 
 
 /**
@@ -109,11 +111,17 @@ class ALS private (
    */
   def this() = this(-1, -1, 10, 10, 0.01, false, 1.0)
 
+  /** If true, do alternating nonnegative least squares. */
+  private var nonnegative = false
+
+  /** storage level for user/product in/out links */
+  private var intermediateRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK
+
   /**
    * Set the number of blocks for both user blocks and product blocks to parallelize the computation
    * into; pass -1 for an auto-configured number of blocks. Default: -1.
    */
-  def setBlocks(numBlocks: Int): ALS = {
+  def setBlocks(numBlocks: Int): this.type = {
     this.numUserBlocks = numBlocks
     this.numProductBlocks = numBlocks
     this
@@ -122,7 +130,7 @@ class ALS private (
   /**
    * Set the number of user blocks to parallelize the computation.
    */
-  def setUserBlocks(numUserBlocks: Int): ALS = {
+  def setUserBlocks(numUserBlocks: Int): this.type = {
     this.numUserBlocks = numUserBlocks
     this
   }
@@ -130,31 +138,31 @@ class ALS private (
   /**
    * Set the number of product blocks to parallelize the computation.
    */
-  def setProductBlocks(numProductBlocks: Int): ALS = {
+  def setProductBlocks(numProductBlocks: Int): this.type = {
     this.numProductBlocks = numProductBlocks
     this
   }
 
   /** Set the rank of the feature matrices computed (number of features). Default: 10. */
-  def setRank(rank: Int): ALS = {
+  def setRank(rank: Int): this.type = {
     this.rank = rank
     this
   }
 
   /** Set the number of iterations to run. Default: 10. */
-  def setIterations(iterations: Int): ALS = {
+  def setIterations(iterations: Int): this.type = {
     this.iterations = iterations
     this
   }
 
   /** Set the regularization parameter, lambda. Default: 0.01. */
-  def setLambda(lambda: Double): ALS = {
+  def setLambda(lambda: Double): this.type = {
     this.lambda = lambda
     this
   }
 
   /** Sets whether to use implicit preference. Default: false. */
-  def setImplicitPrefs(implicitPrefs: Boolean): ALS = {
+  def setImplicitPrefs(implicitPrefs: Boolean): this.type = {
     this.implicitPrefs = implicitPrefs
     this
   }
@@ -164,29 +172,38 @@ class ALS private (
    * Sets the constant used in computing confidence in implicit ALS. Default: 1.0.
    */
   @Experimental
-  def setAlpha(alpha: Double): ALS = {
+  def setAlpha(alpha: Double): this.type = {
     this.alpha = alpha
     this
   }
 
   /** Sets a random seed to have deterministic results. */
-  def setSeed(seed: Long): ALS = {
+  def setSeed(seed: Long): this.type = {
     this.seed = seed
     this
   }
 
-  /** If true, do alternating nonnegative least squares. */
-  private var nonnegative = false
-
   /**
    * Set whether the least-squares problems solved at each iteration should have
    * nonnegativity constraints.
    */
-  def setNonnegative(b: Boolean): ALS = {
+  def setNonnegative(b: Boolean): this.type = {
     this.nonnegative = b
     this
   }
 
+  /**
+   * :: DeveloperApi ::
+   * Sets storage level for intermediate RDDs (user/product in/out links). The default value is
+   * `MEMORY_AND_DISK`. Users can change it to a serialized storage, e.g., `MEMORY_AND_DISK_SER` and
+   * set `spark.rdd.compress` to `true` to reduce the space requirement, at the cost of speed.
+   */
+  @DeveloperApi
+  def setIntermediateRDDStorageLevel(storageLevel: StorageLevel): this.type = {
+    this.intermediateRDDStorageLevel = storageLevel
+    this
+  }
+
   /**
    * Run ALS with the configured parameters on an input RDD of (user, product, rating) triples.
    * Returns a MatrixFactorizationModel with feature vectors for each user and product.
@@ -252,14 +269,17 @@ class ALS private (
         val YtY = Some(sc.broadcast(computeYtY(users)))
         val previousProducts = products
         products = updateFeatures(numProductBlocks, users, userOutLinks, productInLinks,
-          userPartitioner, rank, lambda, alpha, YtY)
+          rank, lambda, alpha, YtY)
         previousProducts.unpersist()
         logInfo("Re-computing U given I (Iteration %d/%d)".format(iter, iterations))
+        if (sc.checkpointDir.isDefined && (iter % 3 == 0)) {
+          products.checkpoint()
+        }
         products.setName(s"products-$iter").persist()
         val XtX = Some(sc.broadcast(computeYtY(products)))
         val previousUsers = users
         users = updateFeatures(numUserBlocks, products, productOutLinks, userInLinks,
-          productPartitioner, rank, lambda, alpha, XtX)
+          rank, lambda, alpha, XtX)
         previousUsers.unpersist()
       }
     } else {
@@ -267,11 +287,14 @@ class ALS private (
         // perform ALS update
         logInfo("Re-computing I given U (Iteration %d/%d)".format(iter, iterations))
         products = updateFeatures(numProductBlocks, users, userOutLinks, productInLinks,
-          userPartitioner, rank, lambda, alpha, YtY = None)
+          rank, lambda, alpha, YtY = None)
+        if (sc.checkpointDir.isDefined && (iter % 3 == 0)) {
+          products.checkpoint()
+        }
         products.setName(s"products-$iter")
         logInfo("Re-computing U given I (Iteration %d/%d)".format(iter, iterations))
         users = updateFeatures(numUserBlocks, products, productOutLinks, userInLinks,
-          productPartitioner, rank, lambda, alpha, YtY = None)
+          rank, lambda, alpha, YtY = None)
         users.setName(s"users-$iter")
       }
     }
@@ -284,8 +307,8 @@ class ALS private (
     val usersOut = unblockFactors(users, userOutLinks)
     val productsOut = unblockFactors(products, productOutLinks)
 
-    usersOut.setName("usersOut").persist()
-    productsOut.setName("productsOut").persist()
+    usersOut.setName("usersOut").persist(StorageLevel.MEMORY_AND_DISK)
+    productsOut.setName("productsOut").persist(StorageLevel.MEMORY_AND_DISK)
 
     // Materialize usersOut and productsOut.
     usersOut.count()
@@ -302,6 +325,11 @@ class ALS private (
     new MatrixFactorizationModel(rank, usersOut, productsOut)
   }
 
+  /**
+   * Java-friendly version of [[ALS.run]].
+   */
+  def run(ratings: JavaRDD[Rating]): MatrixFactorizationModel = run(ratings.rdd)
+
   /**
    * Computes the (`rank x rank`) matrix `YtY`, where `Y` is the (`nui x rank`) matrix of factors
    * for each user (or product), in a distributed fashion.
@@ -376,7 +404,7 @@ class ALS private (
     val userIds = ratings.map(_.user).distinct.sorted
     val numUsers = userIds.length
     val userIdToPos = userIds.zipWithIndex.toMap
-    val shouldSend = Array.fill(numUsers)(new BitSet(numProductBlocks))
+    val shouldSend = Array.fill(numUsers)(new mutable.BitSet(numProductBlocks))
     for (r <- ratings) {
       shouldSend(userIdToPos(r.user))(productPartitioner.getPartition(r.product)) = true
     }
@@ -430,11 +458,11 @@ class ALS private (
       val inLinkBlock = makeInLinkBlock(numProductBlocks, ratings, productPartitioner)
       val outLinkBlock = makeOutLinkBlock(numProductBlocks, ratings, productPartitioner)
       Iterator.single((blockId, (inLinkBlock, outLinkBlock)))
-    }, true)
+    }, preservesPartitioning = true)
     val inLinks = links.mapValues(_._1)
     val outLinks = links.mapValues(_._2)
-    inLinks.persist(StorageLevel.MEMORY_AND_DISK)
-    outLinks.persist(StorageLevel.MEMORY_AND_DISK)
+    inLinks.persist(intermediateRDDStorageLevel)
+    outLinks.persist(intermediateRDDStorageLevel)
     (inLinks, outLinks)
   }
 
@@ -464,7 +492,6 @@ class ALS private (
       products: RDD[(Int, Array[Array[Double]])],
       productOutLinks: RDD[(Int, OutLinkBlock)],
       userInLinks: RDD[(Int, InLinkBlock)],
-      productPartitioner: Partitioner,
       rank: Int,
       lambda: Double,
       alpha: Double,
@@ -477,7 +504,7 @@ class ALS private (
           }
         }
         toSend.zipWithIndex.map{ case (buf, idx) => (idx, (bid, buf.toArray)) }
-    }.groupByKey(productPartitioner)
+    }.groupByKey(new HashPartitioner(numUserBlocks))
      .join(userInLinks)
      .mapValues{ case (messages, inLinkBlock) =>
         updateBlock(messages, inLinkBlock, rank, lambda, alpha, YtY)
@@ -792,4 +819,120 @@ object ALS {
     : MatrixFactorizationModel = {
     trainImplicit(ratings, rank, iterations, 0.01, -1, 1.0)
   }
+
+  /**
+   * :: DeveloperApi ::
+   * Statistics of a block in ALS computation.
+   *
+   * @param category type of this block, "user" or "product"
+   * @param index index of this block
+   * @param count number of users or products inside this block, the same as the number of
+   *              least-squares problems to solve on this block in each iteration
+   * @param numRatings total number of ratings inside this block, the same as the number of outer
+   *                   products we need to make on this block in each iteration
+   * @param numInLinks total number of incoming links, the same as the number of vectors to retrieve
+   *                   before each iteration
+   * @param numOutLinks total number of outgoing links, the same as the number of vectors to send
+   *                    for the next iteration
+   */
+  @DeveloperApi
+  case class BlockStats(
+      category: String,
+      index: Int,
+      count: Long,
+      numRatings: Long,
+      numInLinks: Long,
+      numOutLinks: Long)
+
+  /**
+   * :: DeveloperApi ::
+   * Given an RDD of ratings, number of user blocks, and number of product blocks, computes the
+   * statistics of each block in ALS computation. This is useful for estimating cost and diagnosing
+   * load balance.
+   *
+   * @param ratings an RDD of ratings
+   * @param numUserBlocks number of user blocks
+   * @param numProductBlocks number of product blocks
+   * @return statistics of user blocks and product blocks
+   */
+  @DeveloperApi
+  def analyzeBlocks(
+      ratings: RDD[Rating],
+      numUserBlocks: Int,
+      numProductBlocks: Int): Array[BlockStats] = {
+
+    val userPartitioner = new ALSPartitioner(numUserBlocks)
+    val productPartitioner = new ALSPartitioner(numProductBlocks)
+
+    val ratingsByUserBlock = ratings.map { rating =>
+      (userPartitioner.getPartition(rating.user), rating)
+    }
+    val ratingsByProductBlock = ratings.map { rating =>
+      (productPartitioner.getPartition(rating.product),
+        Rating(rating.product, rating.user, rating.rating))
+    }
+
+    val als = new ALS()
+    val (userIn, userOut) =
+      als.makeLinkRDDs(numUserBlocks, numProductBlocks, ratingsByUserBlock, userPartitioner)
+    val (prodIn, prodOut) =
+      als.makeLinkRDDs(numProductBlocks, numUserBlocks, ratingsByProductBlock, productPartitioner)
+
+    def sendGrid(outLinks: RDD[(Int, OutLinkBlock)]): Map[(Int, Int), Long] = {
+      outLinks.map { x =>
+        val grid = new mutable.HashMap[(Int, Int), Long]()
+        val uPartition = x._1
+        x._2.shouldSend.foreach { ss =>
+          ss.foreach { pPartition =>
+            val pair = (uPartition, pPartition)
+            grid.put(pair, grid.getOrElse(pair, 0L) + 1L)
+          }
+        }
+        grid
+      }.reduce { (grid1, grid2) =>
+        grid2.foreach { x =>
+          grid1.put(x._1, grid1.getOrElse(x._1, 0L) + x._2)
+        }
+        grid1
+      }.toMap
+    }
+
+    val userSendGrid = sendGrid(userOut)
+    val prodSendGrid = sendGrid(prodOut)
+
+    val userInbound = new Array[Long](numUserBlocks)
+    val prodInbound = new Array[Long](numProductBlocks)
+    val userOutbound = new Array[Long](numUserBlocks)
+    val prodOutbound = new Array[Long](numProductBlocks)
+
+    for (u <- 0 until numUserBlocks; p <- 0 until numProductBlocks) {
+      userOutbound(u) += userSendGrid.getOrElse((u, p), 0L)
+      prodInbound(p) += userSendGrid.getOrElse((u, p), 0L)
+      userInbound(u) += prodSendGrid.getOrElse((p, u), 0L)
+      prodOutbound(p) += prodSendGrid.getOrElse((p, u), 0L)
+    }
+
+    val userCounts = userOut.mapValues(x => x.elementIds.length).collectAsMap()
+    val prodCounts = prodOut.mapValues(x => x.elementIds.length).collectAsMap()
+
+    val userRatings = countRatings(userIn)
+    val prodRatings = countRatings(prodIn)
+
+    val userStats = Array.tabulate(numUserBlocks)(
+      u => BlockStats("user", u, userCounts(u), userRatings(u), userInbound(u), userOutbound(u)))
+    val productStatus = Array.tabulate(numProductBlocks)(
+      p => BlockStats("product", p, prodCounts(p), prodRatings(p), prodInbound(p), prodOutbound(p)))
+
+    (userStats ++ productStatus).toArray
+  }
+
+  private def countRatings(inLinks: RDD[(Int, InLinkBlock)]): Map[Int, Long] = {
+    inLinks.mapValues { ilb =>
+      var numRatings = 0L
+      ilb.ratingsForBlock.foreach { ar =>
+        ar.foreach { p => numRatings += p._1.length }
+      }
+      numRatings
+    }.collectAsMap().toMap
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 899286d235a9d..969e23be21623 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.mllib.recommendation
 
+import java.lang.{Integer => JavaInteger}
+
 import org.jblas.DoubleMatrix
 
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.api.java.JavaRDD
-import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
-import org.apache.spark.mllib.api.python.PythonMLLibAPI
+import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
+import org.apache.spark.rdd.RDD
 
 /**
  * Model representing the result of matrix factorization.
@@ -66,20 +66,51 @@ class MatrixFactorizationModel private[mllib] (
   }
 
   /**
-   * :: DeveloperApi ::
-   * Predict the rating of many users for many products.
-   * This is a Java stub for python predictAll()
-   *
-   * @param usersProductsJRDD A JavaRDD with serialized tuples (user, product)
-   * @return JavaRDD of serialized Rating objects.
+   * Java-friendly version of [[MatrixFactorizationModel.predict]].
    */
-  @DeveloperApi
-  def predict(usersProductsJRDD: JavaRDD[Array[Byte]]): JavaRDD[Array[Byte]] = {
-    val pythonAPI = new PythonMLLibAPI()
-    val usersProducts = usersProductsJRDD.rdd.map(xBytes => pythonAPI.unpackTuple(xBytes))
-    predict(usersProducts).map(rate => pythonAPI.serializeRating(rate))
+  def predict(usersProducts: JavaPairRDD[JavaInteger, JavaInteger]): JavaRDD[Rating] = {
+    predict(usersProducts.rdd.asInstanceOf[RDD[(Int, Int)]]).toJavaRDD()
   }
 
-  // TODO: Figure out what other good bulk prediction methods would look like.
-  // Probably want a way to get the top users for a product or vice-versa.
+  /**
+   * Recommends products to a user.
+   *
+   * @param user the user to recommend products to
+   * @param num how many products to return. The number returned may be less than this.
+   * @return [[Rating]] objects, each of which contains the given user ID, a product ID, and a
+   *  "score" in the rating field. Each represents one recommended product, and they are sorted
+   *  by score, decreasing. The first returned is the one predicted to be most strongly
+   *  recommended to the user. The score is an opaque value that indicates how strongly
+   *  recommended the product is.
+   */
+  def recommendProducts(user: Int, num: Int): Array[Rating] =
+    recommend(userFeatures.lookup(user).head, productFeatures, num)
+      .map(t => Rating(user, t._1, t._2))
+
+  /**
+   * Recommends users to a product. That is, this returns users who are most likely to be
+   * interested in a product.
+   *
+   * @param product the product to recommend users to
+   * @param num how many users to return. The number returned may be less than this.
+   * @return [[Rating]] objects, each of which contains a user ID, the given product ID, and a
+   *  "score" in the rating field. Each represents one recommended user, and they are sorted
+   *  by score, decreasing. The first returned is the one predicted to be most strongly
+   *  recommended to the product. The score is an opaque value that indicates how strongly
+   *  recommended the user is.
+   */
+  def recommendUsers(product: Int, num: Int): Array[Rating] =
+    recommend(productFeatures.lookup(product).head, userFeatures, num)
+      .map(t => Rating(t._1, product, t._2))
+
+  private def recommend(
+      recommendToFeatures: Array[Double],
+      recommendableFeatures: RDD[(Int, Array[Double])],
+      num: Int): Array[(Int, Double)] = {
+    val recommendToVector = new DoubleMatrix(recommendToFeatures)
+    val scored = recommendableFeatures.map { case (id,features) =>
+      (id, recommendToVector.dot(new DoubleMatrix(features)))
+    }
+    scored.top(num)(Ordering.by(_._2))
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 8cca926f1c92e..00dfc86c9e0bd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.mllib.regression
 
-import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
-
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.feature.StandardScaler
 import org.apache.spark.{Logging, SparkException}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
+import org.apache.spark.mllib.util.MLUtils._
+import org.apache.spark.storage.StorageLevel
 
 /**
  * :: DeveloperApi ::
@@ -57,9 +58,12 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
     // A small optimization to avoid serializing the entire model. Only the weightsMatrix
     // and intercept is needed.
     val localWeights = weights
+    val bcWeights = testData.context.broadcast(localWeights)
     val localIntercept = intercept
-
-    testData.map(v => predictPoint(v, localWeights, localIntercept))
+    testData.mapPartitions { iter =>
+      val w = bcWeights.value
+      iter.map(v => predictPoint(v, w, localIntercept))
+    }
   }
 
   /**
@@ -71,6 +75,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
   def predict(testData: Vector): Double = {
     predictPoint(testData, weights, intercept)
   }
+
+  override def toString() = "(weights=%s, intercept=%s)".format(weights, intercept)
 }
 
 /**
@@ -92,6 +98,22 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
 
   protected var validateData: Boolean = true
 
+  /**
+   * Whether to perform feature scaling before model training to reduce the condition numbers
+   * which can significantly help the optimizer converging faster. The scaling correction will be
+   * translated back to resulting model weights, so it's transparent to users.
+   * Note: This technique is used in both libsvm and glmnet packages. Default false.
+   */
+  private var useFeatureScaling = false
+
+  /**
+   * Set if the algorithm should use feature scaling to improve the convergence during optimization.
+   */
+  private[mllib] def setFeatureScaling(useFeatureScaling: Boolean): this.type = {
+    this.useFeatureScaling = useFeatureScaling
+    this
+  }
+
   /**
    * Create a model given the weights and intercept
    */
@@ -114,6 +136,15 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
     this
   }
 
+  /** Whether a warning should be logged if the input RDD is uncached. */
+  private var warnOnUncachedInput = true
+
+  /** Disable warnings about uncached input. */
+  private[spark] def disableUncachedWarning(): this.type = {
+    warnOnUncachedInput = false
+    this
+  }
+
   /**
    * Run the algorithm with the configured parameters on an input
    * RDD of LabeledPoint entries.
@@ -124,50 +155,97 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
     run(input, initialWeights)
   }
 
-  /** Prepends one to the input vector. */
-  private def prependOne(vector: Vector): Vector = {
-    val vector1 = vector.toBreeze match {
-      case dv: BDV[Double] => BDV.vertcat(BDV.ones[Double](1), dv)
-      case sv: BSV[Double] => BSV.vertcat(new BSV[Double](Array(0), Array(1.0), 1), sv)
-      case v: Any => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
-    }
-    Vectors.fromBreeze(vector1)
-  }
-
   /**
    * Run the algorithm with the configured parameters on an input RDD
    * of LabeledPoint entries starting from the initial weights provided.
    */
   def run(input: RDD[LabeledPoint], initialWeights: Vector): M = {
 
+    if (warnOnUncachedInput && input.getStorageLevel == StorageLevel.NONE) {
+      logWarning("The input data is not directly cached, which may hurt performance if its"
+        + " parent RDDs are also uncached.")
+    }
+
     // Check the data properties before running the optimizer
     if (validateData && !validators.forall(func => func(input))) {
       throw new SparkException("Input validation failed.")
     }
 
+    /**
+     * Scaling columns to unit variance as a heuristic to reduce the condition number:
+     *
+     * During the optimization process, the convergence (rate) depends on the condition number of
+     * the training dataset. Scaling the variables often reduces this condition number
+     * heuristically, thus improving the convergence rate. Without reducing the condition number,
+     * some training datasets mixing the columns with different scales may not be able to converge.
+     *
+     * GLMNET and LIBSVM packages perform the scaling to reduce the condition number, and return
+     * the weights in the original scale.
+     * See page 9 in http://cran.r-project.org/web/packages/glmnet/glmnet.pdf
+     *
+     * Here, if useFeatureScaling is enabled, we will standardize the training features by dividing
+     * the variance of each column (without subtracting the mean), and train the model in the
+     * scaled space. Then we transform the coefficients from the scaled space to the original scale
+     * as GLMNET and LIBSVM do.
+     *
+     * Currently, it's only enabled in LogisticRegressionWithLBFGS
+     */
+    val scaler = if (useFeatureScaling) {
+      (new StandardScaler).fit(input.map(x => x.features))
+    } else {
+      null
+    }
+
     // Prepend an extra variable consisting of all 1.0's for the intercept.
     val data = if (addIntercept) {
-      input.map(labeledPoint => (labeledPoint.label, prependOne(labeledPoint.features)))
+      if(useFeatureScaling) {
+        input.map(labeledPoint =>
+          (labeledPoint.label, appendBias(scaler.transform(labeledPoint.features))))
+      } else {
+        input.map(labeledPoint => (labeledPoint.label, appendBias(labeledPoint.features)))
+      }
     } else {
-      input.map(labeledPoint => (labeledPoint.label, labeledPoint.features))
+      if (useFeatureScaling) {
+        input.map(labeledPoint => (labeledPoint.label, scaler.transform(labeledPoint.features)))
+      } else {
+        input.map(labeledPoint => (labeledPoint.label, labeledPoint.features))
+      }
     }
 
     val initialWeightsWithIntercept = if (addIntercept) {
-      prependOne(initialWeights)
+      appendBias(initialWeights)
     } else {
       initialWeights
     }
 
     val weightsWithIntercept = optimizer.optimize(data, initialWeightsWithIntercept)
 
-    val intercept = if (addIntercept) weightsWithIntercept(0) else 0.0
-    val weights =
+    val intercept = if (addIntercept) weightsWithIntercept(weightsWithIntercept.size - 1) else 0.0
+    var weights =
       if (addIntercept) {
-        Vectors.dense(weightsWithIntercept.toArray.slice(1, weightsWithIntercept.size))
+        Vectors.dense(weightsWithIntercept.toArray.slice(0, weightsWithIntercept.size - 1))
       } else {
         weightsWithIntercept
       }
 
+    /**
+     * The weights and intercept are trained in the scaled space; we're converting them back to
+     * the original scale.
+     *
+     * Math shows that if we only perform standardization without subtracting means, the intercept
+     * will not be changed. w_i = w_i' / v_i where w_i' is the coefficient in the scaled space, w_i
+     * is the coefficient in the original space, and v_i is the variance of the column i.
+     */
+    if (useFeatureScaling) {
+      weights = scaler.transform(weights)
+    }
+
+    // Warn at the end of the run as well, for increased visibility.
+    if (warnOnUncachedInput && input.getStorageLevel == StorageLevel.NONE) {
+      logWarning("The input data was not directly cached, which may hurt performance if its"
+        + " parent RDDs are also uncached.")
+    }
+
     createModel(weights, intercept)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
index 62a03af4a9964..2067b36f246b3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.regression
 
+import scala.beans.BeanInfo
+
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 import org.apache.spark.mllib.util.NumericParser
 import org.apache.spark.SparkException
@@ -27,6 +29,7 @@ import org.apache.spark.SparkException
  * @param label Label for this data point.
  * @param features List of features for this data point.
  */
+@BeanInfo
 case class LabeledPoint(label: Double, features: Vector) {
   override def toString: String = {
     "(%s,%s)".format(label, features)
@@ -36,7 +39,7 @@ case class LabeledPoint(label: Double, features: Vector) {
 /**
  * Parser for [[org.apache.spark.mllib.regression.LabeledPoint]].
  */
-private[mllib] object LabeledPointParser {
+object LabeledPoint {
   /**
    * Parses a string resulted from `LabeledPoint#toString` into
    * an [[org.apache.spark.mllib.regression.LabeledPoint]].
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index a05dfc045fb8e..f9791c6571782 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -28,7 +28,7 @@ import org.apache.spark.rdd.RDD
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
  */
-class LassoModel private[mllib] (
+class LassoModel (
     override val weights: Vector,
     override val intercept: Double)
   extends GeneralizedLinearModel(weights, intercept)
@@ -67,9 +67,9 @@ class LassoWithSGD private (
 
   /**
    * Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
-   * regParam: 1.0, miniBatchFraction: 1.0}.
+   * regParam: 0.01, miniBatchFraction: 1.0}.
    */
-  def this() = this(1.0, 100, 1.0, 1.0)
+  def this() = this(1.0, 100, 0.01, 1.0)
 
   override protected def createModel(weights: Vector, intercept: Double) = {
     new LassoModel(weights, intercept)
@@ -161,6 +161,6 @@ object LassoWithSGD {
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int): LassoModel = {
-    train(input, numIterations, 1.0, 1.0, 1.0)
+    train(input, numIterations, 1.0, 0.01, 1.0)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 0ebad4eb58d88..81b6598377ff5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -27,7 +27,7 @@ import org.apache.spark.mllib.optimization._
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
  */
-class LinearRegressionModel private[mllib] (
+class LinearRegressionModel (
     override val weights: Vector,
     override val intercept: Double)
   extends GeneralizedLinearModel(weights, intercept) with RegressionModel with Serializable {
@@ -49,7 +49,7 @@ class LinearRegressionModel private[mllib] (
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
  */
-class LinearRegressionWithSGD private (
+class LinearRegressionWithSGD private[mllib] (
     private var stepSize: Double,
     private var numIterations: Int,
     private var miniBatchFraction: Double)
@@ -68,7 +68,7 @@ class LinearRegressionWithSGD private (
    */
   def this() = this(1.0, 100, 1.0)
 
-  override protected def createModel(weights: Vector, intercept: Double) = {
+  override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
     new LinearRegressionModel(weights, intercept)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index bd983bac001a0..c8cad773f5efb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -28,7 +28,7 @@ import org.apache.spark.mllib.linalg.Vector
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
  */
-class RidgeRegressionModel private[mllib] (
+class RidgeRegressionModel (
     override val weights: Vector,
     override val intercept: Double)
   extends GeneralizedLinearModel(weights, intercept)
@@ -68,9 +68,9 @@ class RidgeRegressionWithSGD private (
 
   /**
    * Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100,
-   * regParam: 1.0, miniBatchFraction: 1.0}.
+   * regParam: 0.01, miniBatchFraction: 1.0}.
    */
-  def this() = this(1.0, 100, 1.0, 1.0)
+  def this() = this(1.0, 100, 0.01, 1.0)
 
   override protected def createModel(weights: Vector, intercept: Double) = {
     new RidgeRegressionModel(weights, intercept)
@@ -143,7 +143,7 @@ object RidgeRegressionWithSGD {
       numIterations: Int,
       stepSize: Double,
       regParam: Double): RidgeRegressionModel = {
-    train(input, numIterations, stepSize, regParam, 1.0)
+    train(input, numIterations, stepSize, regParam, 0.01)
   }
 
   /**
@@ -158,6 +158,6 @@ object RidgeRegressionWithSGD {
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int): RidgeRegressionModel = {
-    train(input, numIterations, 1.0, 1.0, 1.0)
+    train(input, numIterations, 1.0, 0.01, 1.0)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
new file mode 100644
index 0000000000000..8db0442a7a569
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.dstream.DStream
+
+/**
+ * :: DeveloperApi ::
+ * StreamingLinearAlgorithm implements methods for continuously
+ * training a generalized linear model model on streaming data,
+ * and using it for prediction on (possibly different) streaming data.
+ *
+ * This class takes as type parameters a GeneralizedLinearModel,
+ * and a GeneralizedLinearAlgorithm, making it easy to extend to construct
+ * streaming versions of any analyses using GLMs.
+ * Initial weights must be set before calling trainOn or predictOn.
+ * Only weights will be updated, not an intercept. If the model needs
+ * an intercept, it should be manually appended to the input data.
+ *
+ * For example usage, see `StreamingLinearRegressionWithSGD`.
+ *
+ * NOTE(Freeman): In some use cases, the order in which trainOn and predictOn
+ * are called in an application will affect the results. When called on
+ * the same DStream, if trainOn is called before predictOn, when new data
+ * arrive the model will update and the prediction will be based on the new
+ * model. Whereas if predictOn is called first, the prediction will use the model
+ * from the previous update.
+ *
+ * NOTE(Freeman): It is ok to call predictOn repeatedly on multiple streams; this
+ * will generate predictions for each one all using the current model.
+ * It is also ok to call trainOn on different streams; this will update
+ * the model using each of the different sources, in sequence.
+ *
+ */
+@DeveloperApi
+abstract class StreamingLinearAlgorithm[
+    M <: GeneralizedLinearModel,
+    A <: GeneralizedLinearAlgorithm[M]] extends Logging {
+
+  /** The model to be updated and used for prediction. */
+  protected var model: M
+
+  /** The algorithm to use for updating. */
+  protected val algorithm: A
+
+  /** Return the latest model. */
+  def latestModel(): M = {
+    model
+  }
+
+  /**
+   * Update the model by training on batches of data from a DStream.
+   * This operation registers a DStream for training the model,
+   * and updates the model based on every subsequent
+   * batch of data from the stream.
+   *
+   * @param data DStream containing labeled data
+   */
+  def trainOn(data: DStream[LabeledPoint]) {
+    if (Option(model.weights) == None) {
+      logError("Initial weights must be set before starting training")
+      throw new IllegalArgumentException
+    }
+    data.foreachRDD { (rdd, time) =>
+        model = algorithm.run(rdd, model.weights)
+        logInfo("Model updated at time %s".format(time.toString))
+        val display = model.weights.size match {
+          case x if x > 100 => model.weights.toArray.take(100).mkString("[", ",", "...")
+          case _ => model.weights.toArray.mkString("[", ",", "]")
+        }
+        logInfo("Current model: weights, %s".format (display))
+    }
+  }
+
+  /**
+   * Use the model to make predictions on batches of data from a DStream
+   *
+   * @param data DStream containing feature vectors
+   * @return DStream containing predictions
+   */
+  def predictOn(data: DStream[Vector]): DStream[Double] = {
+    if (Option(model.weights) == None) {
+      val msg = "Initial weights must be set before starting prediction"
+      logError(msg)
+      throw new IllegalArgumentException(msg)
+    }
+    data.map(model.predict)
+  }
+
+  /**
+   * Use the model to make predictions on the values of a DStream and carry over its keys.
+   * @param data DStream containing feature vectors
+   * @tparam K key type
+   * @return DStream containing the input keys and the predictions as values
+   */
+  def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Double)] = {
+    if (Option(model.weights) == None) {
+      val msg = "Initial weights must be set before starting prediction"
+      logError(msg)
+      throw new IllegalArgumentException(msg)
+    }
+    data.mapValues(model.predict)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
new file mode 100644
index 0000000000000..1d11fde24712c
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.Vector
+
+/**
+ * Train or predict a linear regression model on streaming data. Training uses
+ * Stochastic Gradient Descent to update the model based on each new batch of
+ * incoming data from a DStream (see `LinearRegressionWithSGD` for model equation)
+ *
+ * Each batch of data is assumed to be an RDD of LabeledPoints.
+ * The number of data points per batch can vary, but the number
+ * of features must be constant. An initial weight
+ * vector must be provided.
+ *
+ * Use a builder pattern to construct a streaming linear regression
+ * analysis in an application, like:
+ *
+ *  val model = new StreamingLinearRegressionWithSGD()
+ *    .setStepSize(0.5)
+ *    .setNumIterations(10)
+ *    .setInitialWeights(Vectors.dense(...))
+ *    .trainOn(DStream)
+ *
+ */
+@Experimental
+class StreamingLinearRegressionWithSGD (
+    private var stepSize: Double,
+    private var numIterations: Int,
+    private var miniBatchFraction: Double,
+    private var initialWeights: Vector)
+  extends StreamingLinearAlgorithm[
+    LinearRegressionModel, LinearRegressionWithSGD] with Serializable {
+
+  /**
+   * Construct a StreamingLinearRegression object with default parameters:
+   * {stepSize: 0.1, numIterations: 50, miniBatchFraction: 1.0}.
+   * Initial weights must be set before using trainOn or predictOn
+   * (see `StreamingLinearAlgorithm`)
+   */
+  def this() = this(0.1, 50, 1.0, null)
+
+  val algorithm = new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction)
+
+  var model = algorithm.createModel(initialWeights, 0.0)
+
+  /** Set the step size for gradient descent. Default: 0.1. */
+  def setStepSize(stepSize: Double): this.type = {
+    this.algorithm.optimizer.setStepSize(stepSize)
+    this
+  }
+
+  /** Set the number of iterations of gradient descent to run per update. Default: 50. */
+  def setNumIterations(numIterations: Int): this.type = {
+    this.algorithm.optimizer.setNumIterations(numIterations)
+    this
+  }
+
+  /** Set the fraction of each batch to use for updates. Default: 1.0. */
+  def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
+    this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction)
+    this
+  }
+
+  /** Set the initial weights. Default: [0.0, 0.0]. */
+  def setInitialWeights(initialWeights: Vector): this.type = {
+    this.model = algorithm.createModel(initialWeights, 0.0)
+    this
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
new file mode 100644
index 0000000000000..654479ac2dd4f
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat
+
+import breeze.linalg.{DenseVector => BDV}
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors, Vector}
+
+/**
+ * :: DeveloperApi ::
+ * MultivariateOnlineSummarizer implements [[MultivariateStatisticalSummary]] to compute the mean,
+ * variance, minimum, maximum, counts, and nonzero counts for samples in sparse or dense vector
+ * format in a online fashion.
+ *
+ * Two MultivariateOnlineSummarizer can be merged together to have a statistical summary of
+ * the corresponding joint dataset.
+ *
+ * A numerically stable algorithm is implemented to compute sample mean and variance:
+ * Reference: [[http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance variance-wiki]]
+ * Zero elements (including explicit zero values) are skipped when calling add(),
+ * to have time complexity O(nnz) instead of O(n) for each column.
+ */
+@DeveloperApi
+class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with Serializable {
+
+  private var n = 0
+  private var currMean: BDV[Double] = _
+  private var currM2n: BDV[Double] = _
+  private var currM2: BDV[Double] = _
+  private var currL1: BDV[Double] = _
+  private var totalCnt: Long = 0
+  private var nnz: BDV[Double] = _
+  private var currMax: BDV[Double] = _
+  private var currMin: BDV[Double] = _
+
+  /**
+   * Adds input value to position i.
+   */
+  private[this] def add(i: Int, value: Double) = {
+    if (value != 0.0) {
+      if (currMax(i) < value) {
+        currMax(i) = value
+      }
+      if (currMin(i) > value) {
+        currMin(i) = value
+      }
+
+      val prevMean = currMean(i)
+      val diff = value - prevMean
+      currMean(i) = prevMean + diff / (nnz(i) + 1.0)
+      currM2n(i) += (value - currMean(i)) * diff
+      currM2(i) += value * value
+      currL1(i) += math.abs(value)
+
+      nnz(i) += 1.0
+    }
+  }
+
+  /**
+   * Add a new sample to this summarizer, and update the statistical summary.
+   *
+   * @param sample The sample in dense/sparse vector format to be added into this summarizer.
+   * @return This MultivariateOnlineSummarizer object.
+   */
+  def add(sample: Vector): this.type = {
+    if (n == 0) {
+      require(sample.size > 0, s"Vector should have dimension larger than zero.")
+      n = sample.size
+
+      currMean = BDV.zeros[Double](n)
+      currM2n = BDV.zeros[Double](n)
+      currM2 = BDV.zeros[Double](n)
+      currL1 = BDV.zeros[Double](n)
+      nnz = BDV.zeros[Double](n)
+      currMax = BDV.fill(n)(Double.MinValue)
+      currMin = BDV.fill(n)(Double.MaxValue)
+    }
+
+    require(n == sample.size, s"Dimensions mismatch when adding new sample." +
+      s" Expecting $n but got ${sample.size}.")
+
+    sample match {
+      case dv: DenseVector => {
+        var j = 0
+        while (j < dv.size) {
+          add(j, dv.values(j))
+          j += 1
+        }
+      }
+      case sv: SparseVector =>
+        var j = 0
+        while (j < sv.indices.size) {
+          add(sv.indices(j), sv.values(j))
+          j += 1
+        }
+      case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
+    }
+
+    totalCnt += 1
+    this
+  }
+
+  /**
+   * Merge another MultivariateOnlineSummarizer, and update the statistical summary.
+   * (Note that it's in place merging; as a result, `this` object will be modified.)
+   *
+   * @param other The other MultivariateOnlineSummarizer to be merged.
+   * @return This MultivariateOnlineSummarizer object.
+   */
+  def merge(other: MultivariateOnlineSummarizer): this.type = {
+    if (this.totalCnt != 0 && other.totalCnt != 0) {
+      require(n == other.n, s"Dimensions mismatch when merging with another summarizer. " +
+        s"Expecting $n but got ${other.n}.")
+      totalCnt += other.totalCnt
+      var i = 0
+      while (i < n) {
+        val thisNnz = nnz(i)
+        val otherNnz = other.nnz(i)
+        val totalNnz = thisNnz + otherNnz
+        if (totalNnz != 0.0) {
+          val deltaMean = other.currMean(i) - currMean(i)
+          // merge mean together
+          currMean(i) += deltaMean * otherNnz / totalNnz
+          // merge m2n together
+          currM2n(i) += other.currM2n(i) + deltaMean * deltaMean * thisNnz * otherNnz / totalNnz
+          // merge m2 together
+          currM2(i) += other.currM2(i)
+          // merge l1 together
+          currL1(i) += other.currL1(i)
+          // merge max and min
+          currMax(i) = math.max(currMax(i), other.currMax(i))
+          currMin(i) = math.min(currMin(i), other.currMin(i))
+        }
+        nnz(i) = totalNnz
+        i += 1
+      }
+    } else if (totalCnt == 0 && other.totalCnt != 0) {
+      this.n = other.n
+      this.currMean = other.currMean.copy
+      this.currM2n = other.currM2n.copy
+      this.currM2 = other.currM2.copy
+      this.currL1 = other.currL1.copy
+      this.totalCnt = other.totalCnt
+      this.nnz = other.nnz.copy
+      this.currMax = other.currMax.copy
+      this.currMin = other.currMin.copy
+    }
+    this
+  }
+
+  override def mean: Vector = {
+    require(totalCnt > 0, s"Nothing has been added to this summarizer.")
+
+    val realMean = BDV.zeros[Double](n)
+    var i = 0
+    while (i < n) {
+      realMean(i) = currMean(i) * (nnz(i) / totalCnt)
+      i += 1
+    }
+    Vectors.fromBreeze(realMean)
+  }
+
+  override def variance: Vector = {
+    require(totalCnt > 0, s"Nothing has been added to this summarizer.")
+
+    val realVariance = BDV.zeros[Double](n)
+
+    val denominator = totalCnt - 1.0
+
+    // Sample variance is computed, if the denominator is less than 0, the variance is just 0.
+    if (denominator > 0.0) {
+      val deltaMean = currMean
+      var i = 0
+      while (i < currM2n.size) {
+        realVariance(i) =
+          currM2n(i) + deltaMean(i) * deltaMean(i) * nnz(i) * (totalCnt - nnz(i)) / totalCnt
+        realVariance(i) /= denominator
+        i += 1
+      }
+    }
+
+    Vectors.fromBreeze(realVariance)
+  }
+
+  override def count: Long = totalCnt
+
+  override def numNonzeros: Vector = {
+    require(totalCnt > 0, s"Nothing has been added to this summarizer.")
+
+    Vectors.fromBreeze(nnz)
+  }
+
+  override def max: Vector = {
+    require(totalCnt > 0, s"Nothing has been added to this summarizer.")
+
+    var i = 0
+    while (i < n) {
+      if ((nnz(i) < totalCnt) && (currMax(i) < 0.0)) currMax(i) = 0.0
+      i += 1
+    }
+    Vectors.fromBreeze(currMax)
+  }
+
+  override def min: Vector = {
+    require(totalCnt > 0, s"Nothing has been added to this summarizer.")
+
+    var i = 0
+    while (i < n) {
+      if ((nnz(i) < totalCnt) && (currMin(i) > 0.0)) currMin(i) = 0.0
+      i += 1
+    }
+    Vectors.fromBreeze(currMin)
+  }
+
+  override def normL2: Vector = {
+    require(totalCnt > 0, s"Nothing has been added to this summarizer.")
+
+    val realMagnitude = BDV.zeros[Double](n)
+
+    var i = 0
+    while (i < currM2.size) {
+      realMagnitude(i) = math.sqrt(currM2(i))
+      i += 1
+    }
+
+    Vectors.fromBreeze(realMagnitude)
+  }
+
+  override def normL1: Vector = {
+    require(totalCnt > 0, s"Nothing has been added to this summarizer.")
+    Vectors.fromBreeze(currL1)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
index f9eb343da2b82..6a364c93284af 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
@@ -53,4 +53,14 @@ trait MultivariateStatisticalSummary {
    * Minimum value of each column.
    */
   def min: Vector
+
+  /**
+   * Euclidean magnitude of each column
+   */
+  def normL2: Vector
+
+  /**
+   * L1 norm of each column
+   */
+  def normL1: Vector
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
new file mode 100644
index 0000000000000..3cf4e807b4cf7
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.distributed.RowMatrix
+import org.apache.spark.mllib.linalg.{Matrix, Vector}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.stat.correlation.Correlations
+import org.apache.spark.mllib.stat.test.{ChiSqTest, ChiSqTestResult}
+import org.apache.spark.rdd.RDD
+
+/**
+ * API for statistical functions in MLlib.
+ */
+@Experimental
+object Statistics {
+
+  /**
+   * :: Experimental ::
+   * Computes column-wise summary statistics for the input RDD[Vector].
+   *
+   * @param X an RDD[Vector] for which column-wise summary statistics are to be computed.
+   * @return [[MultivariateStatisticalSummary]] object containing column-wise summary statistics.
+   */
+  @Experimental
+  def colStats(X: RDD[Vector]): MultivariateStatisticalSummary = {
+    new RowMatrix(X).computeColumnSummaryStatistics()
+  }
+
+  /**
+   * :: Experimental ::
+   * Compute the Pearson correlation matrix for the input RDD of Vectors.
+   * Columns with 0 covariance produce NaN entries in the correlation matrix.
+   *
+   * @param X an RDD[Vector] for which the correlation matrix is to be computed.
+   * @return Pearson correlation matrix comparing columns in X.
+   */
+  @Experimental
+  def corr(X: RDD[Vector]): Matrix = Correlations.corrMatrix(X)
+
+  /**
+   * :: Experimental ::
+   * Compute the correlation matrix for the input RDD of Vectors using the specified method.
+   * Methods currently supported: `pearson` (default), `spearman`.
+   *
+   * Note that for Spearman, a rank correlation, we need to create an RDD[Double] for each column
+   * and sort it in order to retrieve the ranks and then join the columns back into an RDD[Vector],
+   * which is fairly costly. Cache the input RDD before calling corr with `method = "spearman"` to
+   * avoid recomputing the common lineage.
+   *
+   * @param X an RDD[Vector] for which the correlation matrix is to be computed.
+   * @param method String specifying the method to use for computing correlation.
+   *               Supported: `pearson` (default), `spearman`
+   * @return Correlation matrix comparing columns in X.
+   */
+  @Experimental
+  def corr(X: RDD[Vector], method: String): Matrix = Correlations.corrMatrix(X, method)
+
+  /**
+   * :: Experimental ::
+   * Compute the Pearson correlation for the input RDDs.
+   * Returns NaN if either vector has 0 variance.
+   *
+   * Note: the two input RDDs need to have the same number of partitions and the same number of
+   * elements in each partition.
+   *
+   * @param x RDD[Double] of the same cardinality as y.
+   * @param y RDD[Double] of the same cardinality as x.
+   * @return A Double containing the Pearson correlation between the two input RDD[Double]s
+   */
+  @Experimental
+  def corr(x: RDD[Double], y: RDD[Double]): Double = Correlations.corr(x, y)
+
+  /**
+   * :: Experimental ::
+   * Compute the correlation for the input RDDs using the specified method.
+   * Methods currently supported: `pearson` (default), `spearman`.
+   *
+   * Note: the two input RDDs need to have the same number of partitions and the same number of
+   * elements in each partition.
+   *
+   * @param x RDD[Double] of the same cardinality as y.
+   * @param y RDD[Double] of the same cardinality as x.
+   * @param method String specifying the method to use for computing correlation.
+   *               Supported: `pearson` (default), `spearman`
+   *@return A Double containing the correlation between the two input RDD[Double]s using the
+   *         specified method.
+   */
+  @Experimental
+  def corr(x: RDD[Double], y: RDD[Double], method: String): Double = Correlations.corr(x, y, method)
+
+  /**
+   * :: Experimental ::
+   * Conduct Pearson's chi-squared goodness of fit test of the observed data against the
+   * expected distribution.
+   *
+   * Note: the two input Vectors need to have the same size.
+   *       `observed` cannot contain negative values.
+   *       `expected` cannot contain nonpositive values.
+   *
+   * @param observed Vector containing the observed categorical counts/relative frequencies.
+   * @param expected Vector containing the expected categorical counts/relative frequencies.
+   *                 `expected` is rescaled if the `expected` sum differs from the `observed` sum.
+   * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
+   *         the method used, and the null hypothesis.
+   */
+  @Experimental
+  def chiSqTest(observed: Vector, expected: Vector): ChiSqTestResult = {
+    ChiSqTest.chiSquared(observed, expected)
+  }
+
+  /**
+   * :: Experimental ::
+   * Conduct Pearson's chi-squared goodness of fit test of the observed data against the uniform
+   * distribution, with each category having an expected frequency of `1 / observed.size`.
+   *
+   * Note: `observed` cannot contain negative values.
+   *
+   * @param observed Vector containing the observed categorical counts/relative frequencies.
+   * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
+   *         the method used, and the null hypothesis.
+   */
+  @Experimental
+  def chiSqTest(observed: Vector): ChiSqTestResult = ChiSqTest.chiSquared(observed)
+
+  /**
+   * :: Experimental ::
+   * Conduct Pearson's independence test on the input contingency matrix, which cannot contain
+   * negative entries or columns or rows that sum up to 0.
+   *
+   * @param observed The contingency matrix (containing either counts or relative frequencies).
+   * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
+   *         the method used, and the null hypothesis.
+   */
+  @Experimental
+  def chiSqTest(observed: Matrix): ChiSqTestResult = ChiSqTest.chiSquaredMatrix(observed)
+
+  /**
+   * :: Experimental ::
+   * Conduct Pearson's independence test for every feature against the label across the input RDD.
+   * For each feature, the (feature, label) pairs are converted into a contingency matrix for which
+   * the chi-squared statistic is computed. All label and feature values must be categorical.
+   *
+   * @param data an `RDD[LabeledPoint]` containing the labeled dataset with categorical features.
+   *             Real-valued features will be treated as categorical for each distinct value.
+   * @return an array containing the ChiSquaredTestResult for every feature against the label.
+   *         The order of the elements in the returned array reflects the order of input features.
+   */
+  @Experimental
+  def chiSqTest(data: RDD[LabeledPoint]): Array[ChiSqTestResult] = {
+    ChiSqTest.chiSquaredFeatures(data)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/Correlation.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/Correlation.scala
new file mode 100644
index 0000000000000..1fb8d7b3d4f32
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/Correlation.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat.correlation
+
+import org.apache.spark.mllib.linalg.{DenseVector, Matrix, Vector}
+import org.apache.spark.rdd.RDD
+
+/**
+ * Trait for correlation algorithms.
+ */
+private[stat] trait Correlation {
+
+  /**
+   * Compute correlation for two datasets.
+   */
+  def computeCorrelation(x: RDD[Double], y: RDD[Double]): Double
+
+  /**
+   * Compute the correlation matrix S, for the input matrix, where S(i, j) is the correlation
+   * between column i and j. S(i, j) can be NaN if the correlation is undefined for column i and j.
+   */
+  def computeCorrelationMatrix(X: RDD[Vector]): Matrix
+
+  /**
+   * Combine the two input RDD[Double]s into an RDD[Vector] and compute the correlation using the
+   * correlation implementation for RDD[Vector]. Can be NaN if correlation is undefined for the
+   * input vectors.
+   */
+  def computeCorrelationWithMatrixImpl(x: RDD[Double], y: RDD[Double]): Double = {
+    val mat: RDD[Vector] = x.zip(y).map { case (xi, yi) => new DenseVector(Array(xi, yi)) }
+    computeCorrelationMatrix(mat)(0, 1)
+  }
+
+}
+
+/**
+ * Delegates computation to the specific correlation object based on the input method name.
+ */
+private[stat] object Correlations {
+
+  def corr(x: RDD[Double],
+       y: RDD[Double],
+       method: String = CorrelationNames.defaultCorrName): Double = {
+    val correlation = getCorrelationFromName(method)
+    correlation.computeCorrelation(x, y)
+  }
+
+  def corrMatrix(X: RDD[Vector],
+      method: String = CorrelationNames.defaultCorrName): Matrix = {
+    val correlation = getCorrelationFromName(method)
+    correlation.computeCorrelationMatrix(X)
+  }
+
+  // Match input correlation name with a known name via simple string matching.
+  def getCorrelationFromName(method: String): Correlation = {
+    try {
+      CorrelationNames.nameToObjectMap(method)
+    } catch {
+      case nse: NoSuchElementException =>
+        throw new IllegalArgumentException("Unrecognized method name. Supported correlations: "
+          + CorrelationNames.nameToObjectMap.keys.mkString(", "))
+    }
+  }
+}
+
+/**
+ * Maintains supported and default correlation names.
+ *
+ * Currently supported correlations: `pearson`, `spearman`.
+ * Current default correlation: `pearson`.
+ *
+ * After new correlation algorithms are added, please update the documentation here and in
+ * Statistics.scala for the correlation APIs.
+ */
+private[mllib] object CorrelationNames {
+
+  // Note: after new types of correlations are implemented, please update this map.
+  val nameToObjectMap = Map(("pearson", PearsonCorrelation), ("spearman", SpearmanCorrelation))
+  val defaultCorrName: String = "pearson"
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/PearsonCorrelation.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/PearsonCorrelation.scala
new file mode 100644
index 0000000000000..23b291eee070b
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/PearsonCorrelation.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat.correlation
+
+import breeze.linalg.{DenseMatrix => BDM}
+
+import org.apache.spark.Logging
+import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector}
+import org.apache.spark.mllib.linalg.distributed.RowMatrix
+import org.apache.spark.rdd.RDD
+
+/**
+ * Compute Pearson correlation for two RDDs of the type RDD[Double] or the correlation matrix
+ * for an RDD of the type RDD[Vector].
+ *
+ * Definition of Pearson correlation can be found at
+ * http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient
+ */
+private[stat] object PearsonCorrelation extends Correlation with Logging {
+
+  /**
+   * Compute the Pearson correlation for two datasets. NaN if either vector has 0 variance.
+   */
+  override def computeCorrelation(x: RDD[Double], y: RDD[Double]): Double = {
+    computeCorrelationWithMatrixImpl(x, y)
+  }
+
+  /**
+   * Compute the Pearson correlation matrix S, for the input matrix, where S(i, j) is the
+   * correlation between column i and j. 0 covariance results in a correlation value of Double.NaN.
+   */
+  override def computeCorrelationMatrix(X: RDD[Vector]): Matrix = {
+    val rowMatrix = new RowMatrix(X)
+    val cov = rowMatrix.computeCovariance()
+    computeCorrelationMatrixFromCovariance(cov)
+  }
+
+  /**
+   * Compute the Pearson correlation matrix from the covariance matrix.
+   * 0 covariance results in a correlation value of Double.NaN.
+   */
+  def computeCorrelationMatrixFromCovariance(covarianceMatrix: Matrix): Matrix = {
+    val cov = covarianceMatrix.toBreeze.asInstanceOf[BDM[Double]]
+    val n = cov.cols
+
+    // Compute the standard deviation on the diagonals first
+    var i = 0
+    while (i < n) {
+      // TODO remove once covariance numerical issue resolved.
+      cov(i, i) = if (closeToZero(cov(i, i))) 0.0 else math.sqrt(cov(i, i))
+      i +=1
+    }
+
+    // Loop through columns since cov is column major
+    var j = 0
+    var sigma = 0.0
+    var containNaN = false
+    while (j < n) {
+      sigma = cov(j, j)
+      i = 0
+      while (i < j) {
+        val corr = if (sigma == 0.0 || cov(i, i) == 0.0) {
+          containNaN = true
+          Double.NaN
+        } else {
+          cov(i, j) / (sigma * cov(i, i))
+        }
+        cov(i, j) = corr
+        cov(j, i) = corr
+        i += 1
+      }
+      j += 1
+    }
+
+    // put 1.0 on the diagonals
+    i = 0
+    while (i < n) {
+      cov(i, i) = 1.0
+      i +=1
+    }
+
+    if (containNaN) {
+      logWarning("Pearson correlation matrix contains NaN values.")
+    }
+
+    Matrices.fromBreeze(cov)
+  }
+
+  private def closeToZero(value: Double, threshhold: Double = 1e-12): Boolean = {
+    math.abs(value) <= threshhold
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala
new file mode 100644
index 0000000000000..4a6c677f06d28
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat.correlation
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.Logging
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.linalg.{Matrix, Vector, Vectors}
+import org.apache.spark.rdd.RDD
+
+/**
+ * Compute Spearman's correlation for two RDDs of the type RDD[Double] or the correlation matrix
+ * for an RDD of the type RDD[Vector].
+ *
+ * Definition of Spearman's correlation can be found at
+ * http://en.wikipedia.org/wiki/Spearman's_rank_correlation_coefficient
+ */
+private[stat] object SpearmanCorrelation extends Correlation with Logging {
+
+  /**
+   * Compute Spearman's correlation for two datasets.
+   */
+  override def computeCorrelation(x: RDD[Double], y: RDD[Double]): Double = {
+    computeCorrelationWithMatrixImpl(x, y)
+  }
+
+  /**
+   * Compute Spearman's correlation matrix S, for the input matrix, where S(i, j) is the
+   * correlation between column i and j.
+   */
+  override def computeCorrelationMatrix(X: RDD[Vector]): Matrix = {
+    // ((columnIndex, value), rowUid)
+    val colBased = X.zipWithUniqueId().flatMap { case (vec, uid) =>
+      vec.toArray.view.zipWithIndex.map { case (v, j) =>
+        ((j, v), uid)
+      }
+    }
+    // global sort by (columnIndex, value)
+    val sorted = colBased.sortByKey()
+    // assign global ranks (using average ranks for tied values)
+    val globalRanks = sorted.zipWithIndex().mapPartitions { iter =>
+      var preCol = -1
+      var preVal = Double.NaN
+      var startRank = -1.0
+      var cachedUids = ArrayBuffer.empty[Long]
+      val flush: () => Iterable[(Long, (Int, Double))] = () => {
+        val averageRank = startRank + (cachedUids.size - 1) / 2.0
+        val output = cachedUids.map { uid =>
+          (uid, (preCol, averageRank))
+        }
+        cachedUids.clear()
+        output
+      }
+      iter.flatMap { case (((j, v), uid), rank) =>
+        // If we see a new value or cachedUids is too big, we flush ids with their average rank.
+        if (j != preCol || v != preVal || cachedUids.size >= 10000000) {
+          val output = flush()
+          preCol = j
+          preVal = v
+          startRank = rank
+          cachedUids += uid
+          output
+        } else {
+          cachedUids += uid
+          Iterator.empty
+        }
+      } ++ flush()
+    }
+    // Replace values in the input matrix by their ranks compared with values in the same column.
+    // Note that shifting all ranks in a column by a constant value doesn't affect result.
+    val groupedRanks = globalRanks.groupByKey().map { case (uid, iter) =>
+      // sort by column index and then convert values to a vector
+      Vectors.dense(iter.toSeq.sortBy(_._1).map(_._2).toArray)
+    }
+    PearsonCorrelation.computeCorrelationMatrix(groupedRanks)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
new file mode 100644
index 0000000000000..ea82d39b72c03
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat.test
+
+import breeze.linalg.{DenseMatrix => BDM}
+import org.apache.commons.math3.distribution.ChiSquaredDistribution
+
+import org.apache.spark.{SparkException, Logging}
+import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
+
+import scala.collection.mutable
+
+/**
+ * Conduct the chi-squared test for the input RDDs using the specified method.
+ * Goodness-of-fit test is conducted on two `Vectors`, whereas test of independence is conducted
+ * on an input of type `Matrix` in which independence between columns is assessed.
+ * We also provide a method for computing the chi-squared statistic between each feature and the
+ * label for an input `RDD[LabeledPoint]`, return an `Array[ChiSquaredTestResult]` of size =
+ * number of features in the input RDD.
+ *
+ * Supported methods for goodness of fit: `pearson` (default)
+ * Supported methods for independence: `pearson` (default)
+ *
+ * More information on Chi-squared test: http://en.wikipedia.org/wiki/Chi-squared_test
+ */
+private[stat] object ChiSqTest extends Logging {
+
+  /**
+   * @param name String name for the method.
+   * @param chiSqFunc Function for computing the statistic given the observed and expected counts.
+   */
+  case class Method(name: String, chiSqFunc: (Double, Double) => Double)
+
+  // Pearson's chi-squared test: http://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test
+  val PEARSON = new Method("pearson", (observed: Double, expected: Double) => {
+    val dev = observed - expected
+    dev * dev / expected
+  })
+
+  // Null hypothesis for the two different types of chi-squared tests to be included in the result.
+  object NullHypothesis extends Enumeration {
+    type NullHypothesis = Value
+    val goodnessOfFit = Value("observed follows the same distribution as expected.")
+    val independence = Value("the occurrence of the outcomes is statistically independent.")
+  }
+
+  // Method identification based on input methodName string
+  private def methodFromString(methodName: String): Method = {
+    methodName match {
+      case PEARSON.name => PEARSON
+      case _ => throw new IllegalArgumentException("Unrecognized method for Chi squared test.")
+    }
+  }
+
+  /**
+   * Conduct Pearson's independence test for each feature against the label across the input RDD.
+   * The contingency table is constructed from the raw (feature, label) pairs and used to conduct
+   * the independence test.
+   * Returns an array containing the ChiSquaredTestResult for every feature against the label.
+   */
+  def chiSquaredFeatures(data: RDD[LabeledPoint],
+      methodName: String = PEARSON.name): Array[ChiSqTestResult] = {
+    val maxCategories = 10000
+    val numCols = data.first().features.size
+    val results = new Array[ChiSqTestResult](numCols)
+    var labels: Map[Double, Int] = null
+    // at most 1000 columns at a time
+    val batchSize = 1000
+    var batch = 0
+    while (batch * batchSize < numCols) {
+      // The following block of code can be cleaned up and made public as
+      // chiSquared(data: RDD[(V1, V2)])
+      val startCol = batch * batchSize
+      val endCol = startCol + math.min(batchSize, numCols - startCol)
+      val pairCounts = data.mapPartitions { iter =>
+        val distinctLabels = mutable.HashSet.empty[Double]
+        val allDistinctFeatures: Map[Int, mutable.HashSet[Double]] =
+          Map((startCol until endCol).map(col => (col, mutable.HashSet.empty[Double])): _*)
+        var i = 1
+        iter.flatMap { case LabeledPoint(label, features) =>
+          if (i % 1000 == 0) {
+            if (distinctLabels.size > maxCategories) {
+              throw new SparkException(s"Chi-square test expect factors (categorical values) but "
+                + s"found more than $maxCategories distinct label values.")
+            }
+            allDistinctFeatures.foreach { case (col, distinctFeatures) =>
+              if (distinctFeatures.size > maxCategories) {
+                throw new SparkException(s"Chi-square test expect factors (categorical values) but "
+                  + s"found more than $maxCategories distinct values in column $col.")
+              }
+            }
+          }
+          i += 1
+          distinctLabels += label
+          features.toArray.view.zipWithIndex.slice(startCol, endCol).map { case (feature, col) =>
+            allDistinctFeatures(col) += feature
+            (col, feature, label)
+          }
+        }
+      }.countByValue()
+
+      if (labels == null) {
+        // Do this only once for the first column since labels are invariant across features.
+        labels =
+          pairCounts.keys.filter(_._1 == startCol).map(_._3).toArray.distinct.zipWithIndex.toMap
+      }
+      val numLabels = labels.size
+      pairCounts.keys.groupBy(_._1).map { case (col, keys) =>
+        val features = keys.map(_._2).toArray.distinct.zipWithIndex.toMap
+        val numRows = features.size
+        val contingency = new BDM(numRows, numLabels, new Array[Double](numRows * numLabels))
+        keys.foreach { case (_, feature, label) =>
+          val i = features(feature)
+          val j = labels(label)
+          contingency(i, j) += pairCounts((col, feature, label))
+        }
+        results(col) = chiSquaredMatrix(Matrices.fromBreeze(contingency), methodName)
+      }
+      batch += 1
+    }
+    results
+  }
+
+  /*
+   * Pearson's goodness of fit test on the input observed and expected counts/relative frequencies.
+   * Uniform distribution is assumed when `expected` is not passed in.
+   */
+  def chiSquared(observed: Vector,
+      expected: Vector = Vectors.dense(Array[Double]()),
+      methodName: String = PEARSON.name): ChiSqTestResult = {
+
+    // Validate input arguments
+    val method = methodFromString(methodName)
+    if (expected.size != 0 && observed.size != expected.size) {
+      throw new IllegalArgumentException("observed and expected must be of the same size.")
+    }
+    val size = observed.size
+    if (size > 1000) {
+      logWarning("Chi-squared approximation may not be accurate due to low expected frequencies "
+        + s" as a result of a large number of categories: $size.")
+    }
+    val obsArr = observed.toArray
+    val expArr = if (expected.size == 0) Array.tabulate(size)(_ => 1.0 / size) else expected.toArray
+    if (!obsArr.forall(_ >= 0.0)) {
+      throw new IllegalArgumentException("Negative entries disallowed in the observed vector.")
+    }
+    if (expected.size != 0 && ! expArr.forall(_ >= 0.0)) {
+      throw new IllegalArgumentException("Negative entries disallowed in the expected vector.")
+    }
+
+    // Determine the scaling factor for expected
+    val obsSum = obsArr.sum
+    val expSum = if (expected.size == 0.0) 1.0 else expArr.sum
+    val scale = if (math.abs(obsSum - expSum) < 1e-7) 1.0 else obsSum / expSum
+
+    // compute chi-squared statistic
+    val statistic = obsArr.zip(expArr).foldLeft(0.0) { case (stat, (obs, exp)) =>
+      if (exp == 0.0) {
+        if (obs == 0.0) {
+          throw new IllegalArgumentException("Chi-squared statistic undefined for input vectors due"
+            + " to 0.0 values in both observed and expected.")
+        } else {
+          return new ChiSqTestResult(0.0, size - 1, Double.PositiveInfinity, PEARSON.name,
+            NullHypothesis.goodnessOfFit.toString)
+        }
+      }
+      if (scale == 1.0) {
+        stat + method.chiSqFunc(obs, exp)
+      } else {
+        stat + method.chiSqFunc(obs, exp * scale)
+      }
+    }
+    val df = size - 1
+    val pValue = 1.0 - new ChiSquaredDistribution(df).cumulativeProbability(statistic)
+    new ChiSqTestResult(pValue, df, statistic, PEARSON.name, NullHypothesis.goodnessOfFit.toString)
+  }
+
+  /*
+   * Pearson's independence test on the input contingency matrix.
+   * TODO: optimize for SparseMatrix when it becomes supported.
+   */
+  def chiSquaredMatrix(counts: Matrix, methodName:String = PEARSON.name): ChiSqTestResult = {
+    val method = methodFromString(methodName)
+    val numRows = counts.numRows
+    val numCols = counts.numCols
+
+    // get row and column sums
+    val colSums = new Array[Double](numCols)
+    val rowSums = new Array[Double](numRows)
+    val colMajorArr = counts.toArray
+    var i = 0
+    while (i < colMajorArr.size) {
+      val elem = colMajorArr(i)
+      if (elem < 0.0) {
+        throw new IllegalArgumentException("Contingency table cannot contain negative entries.")
+      }
+      colSums(i / numRows) += elem
+      rowSums(i % numRows) += elem
+      i += 1
+    }
+    val total = colSums.sum
+
+    // second pass to collect statistic
+    var statistic = 0.0
+    var j = 0
+    while (j < colMajorArr.size) {
+      val col = j / numRows
+      val colSum = colSums(col)
+      if (colSum == 0.0) {
+        throw new IllegalArgumentException("Chi-squared statistic undefined for input matrix due to"
+          + s"0 sum in column [$col].")
+      }
+      val row = j % numRows
+      val rowSum = rowSums(row)
+      if (rowSum == 0.0) {
+        throw new IllegalArgumentException("Chi-squared statistic undefined for input matrix due to"
+          + s"0 sum in row [$row].")
+      }
+      val expected = colSum * rowSum / total
+      statistic += method.chiSqFunc(colMajorArr(j), expected)
+      j += 1
+    }
+    val df = (numCols - 1) * (numRows - 1)
+    if (df == 0) {
+      // 1 column or 1 row. Constant distribution is independent of anything.
+      // pValue = 1.0 and statistic = 0.0 in this case.
+      new ChiSqTestResult(1.0, 0, 0.0, methodName, NullHypothesis.independence.toString)
+    } else {
+      val pValue = 1.0 - new ChiSquaredDistribution(df).cumulativeProbability(statistic)
+      new ChiSqTestResult(pValue, df, statistic, methodName, NullHypothesis.independence.toString)
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
new file mode 100644
index 0000000000000..4784f9e947908
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat.test
+
+import org.apache.spark.annotation.Experimental
+
+/**
+ * :: Experimental ::
+ * Trait for hypothesis test results.
+ * @tparam DF Return type of `degreesOfFreedom`.
+ */
+@Experimental
+trait TestResult[DF] {
+
+  /**
+   * The probability of obtaining a test statistic result at least as extreme as the one that was
+   * actually observed, assuming that the null hypothesis is true.
+   */
+  def pValue: Double
+
+  /**
+   * Returns the degree(s) of freedom of the hypothesis test.
+   * Return type should be Number(e.g. Int, Double) or tuples of Numbers for toString compatibility.
+   */
+  def degreesOfFreedom: DF
+
+  /**
+   * Test statistic.
+   */
+  def statistic: Double
+
+  /**
+   * Null hypothesis of the test.
+   */
+  def nullHypothesis: String
+
+  /**
+   * String explaining the hypothesis test result.
+   * Specific classes implementing this trait should override this method to output test-specific
+   * information.
+   */
+  override def toString: String = {
+
+    // String explaining what the p-value indicates.
+    val pValueExplain = if (pValue <= 0.01) {
+      s"Very strong presumption against null hypothesis: $nullHypothesis."
+    } else if (0.01 < pValue && pValue <= 0.05) {
+      s"Strong presumption against null hypothesis: $nullHypothesis."
+    } else if (0.05 < pValue && pValue <= 0.1) {
+      s"Low presumption against null hypothesis: $nullHypothesis."
+    } else {
+      s"No presumption against null hypothesis: $nullHypothesis."
+    }
+
+    s"degrees of freedom = ${degreesOfFreedom.toString} \n" +
+    s"statistic = $statistic \n" +
+    s"pValue = $pValue \n" + pValueExplain
+  }
+}
+
+/**
+ * :: Experimental ::
+ * Object containing the test results for the chi-squared hypothesis test.
+ */
+@Experimental
+class ChiSqTestResult private[stat] (override val pValue: Double,
+    override val degreesOfFreedom: Int,
+    override val statistic: Double,
+    val method: String,
+    override val nullHypothesis: String) extends TestResult[Int] {
+
+  override def toString: String = {
+    "Chi squared test summary:\n" +
+      s"method: $method\n" +
+      super.toString
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index 3b13e52a7b445..78acc17f901c1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -17,22 +17,33 @@
 
 package org.apache.spark.mllib.tree
 
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.Logging
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.RandomForest.NodeIndexInfo
 import org.apache.spark.mllib.tree.configuration.Strategy
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.FeatureType._
 import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
-import org.apache.spark.mllib.tree.impurity.Impurity
+import org.apache.spark.mllib.tree.impl._
+import org.apache.spark.mllib.tree.impurity.{Impurities, Impurity}
+import org.apache.spark.mllib.tree.impurity._
 import org.apache.spark.mllib.tree.model._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.random.XORShiftRandom
+import org.apache.spark.SparkContext._
+
 
 /**
  * :: Experimental ::
- * A class that implements a decision tree algorithm for classification and regression. It
- * supports both continuous and categorical features.
+ * A class which implements a decision tree learning algorithm for classification and regression.
+ * It supports both continuous and categorical features.
  * @param strategy The configuration parameters for the tree algorithm which specify the type
  *                 of algorithm (classification, regression, etc.), feature type (continuous,
  *                 categorical), depth of the tree, quantile calculation strategy, etc.
@@ -40,330 +51,423 @@ import org.apache.spark.util.random.XORShiftRandom
 @Experimental
 class DecisionTree (private val strategy: Strategy) extends Serializable with Logging {
 
+  strategy.assertValid()
+
   /**
    * Method to train a decision tree model over an RDD
-   * @param input RDD of [[org.apache.spark.mllib.regression.LabeledPoint]] used as training data
-   * @return a DecisionTreeModel that can be used for prediction
+   * @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
+   * @return DecisionTreeModel that can be used for prediction
    */
   def train(input: RDD[LabeledPoint]): DecisionTreeModel = {
-
-    // Cache input RDD for speedup during multiple passes.
-    input.cache()
-    logDebug("algo = " + strategy.algo)
-
-    // Find the splits and the corresponding bins (interval between the splits) using a sample
-    // of the input data.
-    val (splits, bins) = DecisionTree.findSplitsBins(input, strategy)
-    val numBins = bins(0).length
-    logDebug("numBins = " + numBins)
-
-    // depth of the decision tree
-    val maxDepth = strategy.maxDepth
-    // the max number of nodes possible given the depth of the tree
-    val maxNumNodes = math.pow(2, maxDepth).toInt - 1
-    // Initialize an array to hold filters applied to points for each node.
-    val filters = new Array[List[Filter]](maxNumNodes)
-    // The filter at the top node is an empty list.
-    filters(0) = List()
-    // Initialize an array to hold parent impurity calculations for each node.
-    val parentImpurities = new Array[Double](maxNumNodes)
-    // dummy value for top node (updated during first split calculation)
-    val nodes = new Array[Node](maxNumNodes)
-    // num features
-    val numFeatures = input.take(1)(0).features.size
-
-    // Calculate level for single group construction
-
-    // Max memory usage for aggregates
-    val maxMemoryUsage = strategy.maxMemoryInMB * 1024 * 1024
-    logDebug("max memory usage for aggregates = " + maxMemoryUsage + " bytes.")
-    val numElementsPerNode =
-      strategy.algo match {
-        case Classification => 2 * numBins * numFeatures
-        case Regression => 3 * numBins * numFeatures
-      }
-
-    logDebug("numElementsPerNode = " + numElementsPerNode)
-    val arraySizePerNode = 8 * numElementsPerNode // approx. memory usage for bin aggregate array
-    val maxNumberOfNodesPerGroup = math.max(maxMemoryUsage / arraySizePerNode, 1)
-    logDebug("maxNumberOfNodesPerGroup = " + maxNumberOfNodesPerGroup)
-    // nodes at a level is 2^level. level is zero indexed.
-    val maxLevelForSingleGroup = math.max(
-      (math.log(maxNumberOfNodesPerGroup) / math.log(2)).floor.toInt, 0)
-    logDebug("max level for single group = " + maxLevelForSingleGroup)
-
-    /*
-     * The main idea here is to perform level-wise training of the decision tree nodes thus
-     * reducing the passes over the data from l to log2(l) where l is the total number of nodes.
-     * Each data sample is checked for validity w.r.t to each node at a given level -- i.e.,
-     * the sample is only used for the split calculation at the node if the sampled would have
-     * still survived the filters of the parent nodes.
-     */
-
-    var level = 0
-    var break = false
-    while (level < maxDepth && !break) {
-
-      logDebug("#####################################")
-      logDebug("level = " + level)
-      logDebug("#####################################")
-
-      // Find best split for all nodes at a level.
-      val splitsStatsForLevel = DecisionTree.findBestSplits(input, parentImpurities, strategy,
-        level, filters, splits, bins, maxLevelForSingleGroup)
-
-      for ((nodeSplitStats, index) <- splitsStatsForLevel.view.zipWithIndex) {
-        // Extract info for nodes at the current level.
-        extractNodeInfo(nodeSplitStats, level, index, nodes)
-        // Extract info for nodes at the next lower level.
-        extractInfoForLowerLevels(level, index, maxDepth, nodeSplitStats, parentImpurities,
-          filters)
-        logDebug("final best split = " + nodeSplitStats._1)
-      }
-      require(math.pow(2, level) == splitsStatsForLevel.length)
-      // Check whether all the nodes at the current level at leaves.
-      val allLeaf = splitsStatsForLevel.forall(_._2.gain <= 0)
-      logDebug("all leaf = " + allLeaf)
-      if (allLeaf) {
-        break = true // no more tree construction
-      } else {
-        level += 1
-      }
-    }
-
-    logDebug("#####################################")
-    logDebug("Extracting tree model")
-    logDebug("#####################################")
-
-    // Initialize the top or root node of the tree.
-    val topNode = nodes(0)
-    // Build the full tree using the node info calculated in the level-wise best split calculations.
-    topNode.build(nodes)
-
-    new DecisionTreeModel(topNode, strategy.algo)
+    // Note: random seed will not be used since numTrees = 1.
+    val rf = new RandomForest(strategy, numTrees = 1, featureSubsetStrategy = "all", seed = 0)
+    val rfModel = rf.train(input)
+    rfModel.weakHypotheses(0)
   }
 
-  /**
-   * Extract the decision tree node information for the given tree level and node index
-   */
-  private def extractNodeInfo(
-      nodeSplitStats: (Split, InformationGainStats),
-      level: Int,
-      index: Int,
-      nodes: Array[Node]): Unit = {
-    val split = nodeSplitStats._1
-    val stats = nodeSplitStats._2
-    val nodeIndex = math.pow(2, level).toInt - 1 + index
-    val isLeaf = (stats.gain <= 0) || (level == strategy.maxDepth - 1)
-    val node = new Node(nodeIndex, stats.predict, isLeaf, Some(split), None, None, Some(stats))
-    logDebug("Node = " + node)
-    nodes(nodeIndex) = node
-  }
-
-  /**
-   *  Extract the decision tree node information for the children of the node
-   */
-  private def extractInfoForLowerLevels(
-      level: Int,
-      index: Int,
-      maxDepth: Int,
-      nodeSplitStats: (Split, InformationGainStats),
-      parentImpurities: Array[Double],
-      filters: Array[List[Filter]]): Unit = {
-    // 0 corresponds to the left child node and 1 corresponds to the right child node.
-    var i = 0
-    while (i <= 1) {
-     // Calculate the index of the node from the node level and the index at the current level.
-      val nodeIndex = math.pow(2, level + 1).toInt - 1 + 2 * index + i
-      if (level < maxDepth - 1) {
-        val impurity = if (i == 0) {
-          nodeSplitStats._2.leftImpurity
-        } else {
-          nodeSplitStats._2.rightImpurity
-        }
-        logDebug("nodeIndex = " + nodeIndex + ", impurity = " + impurity)
-        // noting the parent impurities
-        parentImpurities(nodeIndex) = impurity
-        // noting the parents filters for the child nodes
-        val childFilter = new Filter(nodeSplitStats._1, if (i == 0) -1 else 1)
-        filters(nodeIndex) = childFilter :: filters((nodeIndex - 1) / 2)
-        for (filter <- filters(nodeIndex)) {
-          logDebug("Filter = " + filter)
-        }
-      }
-      i += 1
-    }
-  }
 }
 
 object DecisionTree extends Serializable with Logging {
 
   /**
-   * Method to train a decision tree model where the instances are represented as an RDD of
-   * (label, features) pairs. The method supports binary classification and regression. For the
-   * binary classification, the label for each instance should either be 0 or 1 to denote the two
-   * classes. The parameters for the algorithm are specified using the strategy parameter.
+   * Method to train a decision tree model.
+   * The method supports binary and multiclass classification and regression.
    *
-   * @param input RDD of [[org.apache.spark.mllib.regression.LabeledPoint]] used as training data
-   *              for DecisionTree
+   * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
+   *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
+   *       is recommended to clearly separate classification and regression.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              For classification, labels should take values {0, 1, ..., numClasses-1}.
+   *              For regression, labels are real numbers.
    * @param strategy The configuration parameters for the tree algorithm which specify the type
    *                 of algorithm (classification, regression, etc.), feature type (continuous,
    *                 categorical), depth of the tree, quantile calculation strategy, etc.
-   * @return a DecisionTreeModel that can be used for prediction
+   * @return DecisionTreeModel that can be used for prediction
   */
   def train(input: RDD[LabeledPoint], strategy: Strategy): DecisionTreeModel = {
-    new DecisionTree(strategy).train(input: RDD[LabeledPoint])
+    new DecisionTree(strategy).train(input)
   }
 
   /**
-   * Method to train a decision tree model where the instances are represented as an RDD of
-   * (label, features) pairs. The method supports binary classification and regression. For the
-   * binary classification, the label for each instance should either be 0 or 1 to denote the two
-   * classes.
+   * Method to train a decision tree model.
+   * The method supports binary and multiclass classification and regression.
+   *
+   * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
+   *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
+   *       is recommended to clearly separate classification and regression.
    *
-   * @param input input RDD of [[org.apache.spark.mllib.regression.LabeledPoint]] used as
-   *              training data
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              For classification, labels should take values {0, 1, ..., numClasses-1}.
+   *              For regression, labels are real numbers.
    * @param algo algorithm, classification or regression
    * @param impurity impurity criterion used for information gain calculation
-   * @param maxDepth maxDepth maximum depth of the tree
-   * @return a DecisionTreeModel that can be used for prediction
+   * @param maxDepth Maximum depth of the tree.
+   *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+   * @return DecisionTreeModel that can be used for prediction
    */
   def train(
       input: RDD[LabeledPoint],
       algo: Algo,
       impurity: Impurity,
       maxDepth: Int): DecisionTreeModel = {
-    val strategy = new Strategy(algo,impurity,maxDepth)
-    new DecisionTree(strategy).train(input: RDD[LabeledPoint])
+    val strategy = new Strategy(algo, impurity, maxDepth)
+    new DecisionTree(strategy).train(input)
   }
 
+  /**
+   * Method to train a decision tree model.
+   * The method supports binary and multiclass classification and regression.
+   *
+   * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
+   *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
+   *       is recommended to clearly separate classification and regression.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              For classification, labels should take values {0, 1, ..., numClasses-1}.
+   *              For regression, labels are real numbers.
+   * @param algo algorithm, classification or regression
+   * @param impurity impurity criterion used for information gain calculation
+   * @param maxDepth Maximum depth of the tree.
+   *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+   * @param numClassesForClassification number of classes for classification. Default value of 2.
+   * @return DecisionTreeModel that can be used for prediction
+   */
+  def train(
+      input: RDD[LabeledPoint],
+      algo: Algo,
+      impurity: Impurity,
+      maxDepth: Int,
+      numClassesForClassification: Int): DecisionTreeModel = {
+    val strategy = new Strategy(algo, impurity, maxDepth, numClassesForClassification)
+    new DecisionTree(strategy).train(input)
+  }
 
   /**
-   * Method to train a decision tree model where the instances are represented as an RDD of
-   * (label, features) pairs. The decision tree method supports binary classification and
-   * regression. For the binary classification, the label for each instance should either be 0 or
-   * 1 to denote the two classes. The method also supports categorical features inputs where the
-   * number of categories can specified using the categoricalFeaturesInfo option.
+   * Method to train a decision tree model.
+   * The method supports binary and multiclass classification and regression.
    *
-   * @param input input RDD of [[org.apache.spark.mllib.regression.LabeledPoint]] used as
-   *              training data for DecisionTree
+   * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
+   *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
+   *       is recommended to clearly separate classification and regression.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              For classification, labels should take values {0, 1, ..., numClasses-1}.
+   *              For regression, labels are real numbers.
    * @param algo classification or regression
    * @param impurity criterion used for information gain calculation
-   * @param maxDepth  maximum depth of the tree
+   * @param maxDepth Maximum depth of the tree.
+   *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+   * @param numClassesForClassification number of classes for classification. Default value of 2.
    * @param maxBins maximum number of bins used for splitting features
    * @param quantileCalculationStrategy  algorithm for calculating quantiles
-   * @param categoricalFeaturesInfo A map storing information about the categorical variables and
-   *                                the number of discrete values they take. For example,
-   *                                an entry (n -> k) implies the feature n is categorical with k
-   *                                categories 0, 1, 2, ... , k-1. It's important to note that
-   *                                features are zero-indexed.
-   * @return a DecisionTreeModel that can be used for prediction
+   * @param categoricalFeaturesInfo Map storing arity of categorical features.
+   *                                E.g., an entry (n -> k) indicates that feature n is categorical
+   *                                with k categories indexed from 0: {0, 1, ..., k-1}.
+   * @return DecisionTreeModel that can be used for prediction
    */
   def train(
       input: RDD[LabeledPoint],
       algo: Algo,
       impurity: Impurity,
       maxDepth: Int,
+      numClassesForClassification: Int,
       maxBins: Int,
       quantileCalculationStrategy: QuantileStrategy,
       categoricalFeaturesInfo: Map[Int,Int]): DecisionTreeModel = {
-    val strategy = new Strategy(algo, impurity, maxDepth, maxBins, quantileCalculationStrategy,
+    val strategy = new Strategy(algo, impurity, maxDepth, numClassesForClassification, maxBins,
+      quantileCalculationStrategy, categoricalFeaturesInfo)
+    new DecisionTree(strategy).train(input)
+  }
+
+  /**
+   * Method to train a decision tree model for binary or multiclass classification.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              Labels should take values {0, 1, ..., numClasses-1}.
+   * @param numClassesForClassification number of classes for classification.
+   * @param categoricalFeaturesInfo Map storing arity of categorical features.
+   *                                E.g., an entry (n -> k) indicates that feature n is categorical
+   *                                with k categories indexed from 0: {0, 1, ..., k-1}.
+   * @param impurity Criterion used for information gain calculation.
+   *                 Supported values: "gini" (recommended) or "entropy".
+   * @param maxDepth Maximum depth of the tree.
+   *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+   *                  (suggested value: 5)
+   * @param maxBins maximum number of bins used for splitting features
+   *                 (suggested value: 32)
+   * @return DecisionTreeModel that can be used for prediction
+   */
+  def trainClassifier(
+      input: RDD[LabeledPoint],
+      numClassesForClassification: Int,
+      categoricalFeaturesInfo: Map[Int, Int],
+      impurity: String,
+      maxDepth: Int,
+      maxBins: Int): DecisionTreeModel = {
+    val impurityType = Impurities.fromString(impurity)
+    train(input, Classification, impurityType, maxDepth, numClassesForClassification, maxBins, Sort,
       categoricalFeaturesInfo)
-    new DecisionTree(strategy).train(input: RDD[LabeledPoint])
   }
 
-  private val InvalidBinIndex = -1
+  /**
+   * Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
+   */
+  def trainClassifier(
+      input: JavaRDD[LabeledPoint],
+      numClassesForClassification: Int,
+      categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer],
+      impurity: String,
+      maxDepth: Int,
+      maxBins: Int): DecisionTreeModel = {
+    trainClassifier(input.rdd, numClassesForClassification,
+      categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap,
+      impurity, maxDepth, maxBins)
+  }
 
   /**
-   * Returns an array of optimal splits for all nodes at a given level. Splits the task into
-   * multiple groups if the level-wise training task could lead to memory overflow.
+   * Method to train a decision tree model for regression.
    *
-   * @param input RDD of [[org.apache.spark.mllib.regression.LabeledPoint]] used as training data
-   *              for DecisionTree
-   * @param parentImpurities Impurities for all parent nodes for the current level
-   * @param strategy [[org.apache.spark.mllib.tree.configuration.Strategy]] instance containing
-   *                parameters for construction the DecisionTree
-   * @param level Level of the tree
-   * @param filters Filters for all nodes at a given level
-   * @param splits possible splits for all features
-   * @param bins possible bins for all features
-   * @param maxLevelForSingleGroup the deepest level for single-group level-wise computation.
-   * @return array of splits with best splits for all nodes at a given level.
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              Labels are real numbers.
+   * @param categoricalFeaturesInfo Map storing arity of categorical features.
+   *                                E.g., an entry (n -> k) indicates that feature n is categorical
+   *                                with k categories indexed from 0: {0, 1, ..., k-1}.
+   * @param impurity Criterion used for information gain calculation.
+   *                 Supported values: "variance".
+   * @param maxDepth Maximum depth of the tree.
+   *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+   *                  (suggested value: 5)
+   * @param maxBins maximum number of bins used for splitting features
+   *                 (suggested value: 32)
+   * @return DecisionTreeModel that can be used for prediction
    */
-  protected[tree] def findBestSplits(
+  def trainRegressor(
       input: RDD[LabeledPoint],
-      parentImpurities: Array[Double],
-      strategy: Strategy,
-      level: Int,
-      filters: Array[List[Filter]],
-      splits: Array[Array[Split]],
+      categoricalFeaturesInfo: Map[Int, Int],
+      impurity: String,
+      maxDepth: Int,
+      maxBins: Int): DecisionTreeModel = {
+    val impurityType = Impurities.fromString(impurity)
+    train(input, Regression, impurityType, maxDepth, 0, maxBins, Sort, categoricalFeaturesInfo)
+  }
+
+  /**
+   * Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
+   */
+  def trainRegressor(
+      input: JavaRDD[LabeledPoint],
+      categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer],
+      impurity: String,
+      maxDepth: Int,
+      maxBins: Int): DecisionTreeModel = {
+    trainRegressor(input.rdd,
+      categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap,
+      impurity, maxDepth, maxBins)
+  }
+
+  /**
+   * Get the node index corresponding to this data point.
+   * This function mimics prediction, passing an example from the root node down to a leaf
+   * or unsplit node; that node's index is returned.
+   *
+   * @param node  Node in tree from which to classify the given data point.
+   * @param binnedFeatures  Binned feature vector for data point.
+   * @param bins possible bins for all features, indexed (numFeatures)(numBins)
+   * @param unorderedFeatures  Set of indices of unordered features.
+   * @return  Leaf index if the data point reaches a leaf.
+   *          Otherwise, last node reachable in tree matching this example.
+   *          Note: This is the global node index, i.e., the index used in the tree.
+   *                This index is different from the index used during training a particular
+   *                group of nodes on one call to [[findBestSplits()]].
+   */
+  private def predictNodeIndex(
+      node: Node,
+      binnedFeatures: Array[Int],
+      bins: Array[Array[Bin]],
+      unorderedFeatures: Set[Int]): Int = {
+    if (node.isLeaf || node.split.isEmpty) {
+      // Node is either leaf, or has not yet been split.
+      node.id
+    } else {
+      val featureIndex = node.split.get.feature
+      val splitLeft = node.split.get.featureType match {
+        case Continuous => {
+          val binIndex = binnedFeatures(featureIndex)
+          val featureValueUpperBound = bins(featureIndex)(binIndex).highSplit.threshold
+          // bin binIndex has range (bin.lowSplit.threshold, bin.highSplit.threshold]
+          // We do not need to check lowSplit since bins are separated by splits.
+          featureValueUpperBound <= node.split.get.threshold
+        }
+        case Categorical => {
+          val featureValue = binnedFeatures(featureIndex)
+          node.split.get.categories.contains(featureValue)
+        }
+        case _ => throw new RuntimeException(s"predictNodeIndex failed for unknown reason.")
+      }
+      if (node.leftNode.isEmpty || node.rightNode.isEmpty) {
+        // Return index from next layer of nodes to train
+        if (splitLeft) {
+          Node.leftChildIndex(node.id)
+        } else {
+          Node.rightChildIndex(node.id)
+        }
+      } else {
+        if (splitLeft) {
+          predictNodeIndex(node.leftNode.get, binnedFeatures, bins, unorderedFeatures)
+        } else {
+          predictNodeIndex(node.rightNode.get, binnedFeatures, bins, unorderedFeatures)
+        }
+      }
+    }
+  }
+
+  /**
+   * Helper for binSeqOp, for data which can contain a mix of ordered and unordered features.
+   *
+   * For ordered features, a single bin is updated.
+   * For unordered features, bins correspond to subsets of categories; either the left or right bin
+   * for each subset is updated.
+   *
+   * @param agg  Array storing aggregate calculation, with a set of sufficient statistics for
+   *             each (feature, bin).
+   * @param treePoint  Data point being aggregated.
+   * @param bins possible bins for all features, indexed (numFeatures)(numBins)
+   * @param unorderedFeatures  Set of indices of unordered features.
+   * @param instanceWeight  Weight (importance) of instance in dataset.
+   */
+  private def mixedBinSeqOp(
+      agg: DTStatsAggregator,
+      treePoint: TreePoint,
       bins: Array[Array[Bin]],
-      maxLevelForSingleGroup: Int): Array[(Split, InformationGainStats)] = {
-    // split into groups to avoid memory overflow during aggregation
-    if (level > maxLevelForSingleGroup) {
-      // When information for all nodes at a given level cannot be stored in memory,
-      // the nodes are divided into multiple groups at each level with the number of groups
-      // increasing exponentially per level. For example, if maxLevelForSingleGroup is 10,
-      // numGroups is equal to 2 at level 11 and 4 at level 12, respectively.
-      val numGroups = math.pow(2, (level - maxLevelForSingleGroup)).toInt
-      logDebug("numGroups = " + numGroups)
-      var bestSplits = new Array[(Split, InformationGainStats)](0)
-      // Iterate over each group of nodes at a level.
-      var groupIndex = 0
-      while (groupIndex < numGroups) {
-        val bestSplitsForGroup = findBestSplitsPerGroup(input, parentImpurities, strategy, level,
-          filters, splits, bins, numGroups, groupIndex)
-        bestSplits = Array.concat(bestSplits, bestSplitsForGroup)
-        groupIndex += 1
+      unorderedFeatures: Set[Int],
+      instanceWeight: Double,
+      featuresForNode: Option[Array[Int]]): Unit = {
+    val numFeaturesPerNode = if (featuresForNode.nonEmpty) {
+      // Use subsampled features
+      featuresForNode.get.size
+    } else {
+      // Use all features
+      agg.metadata.numFeatures
+    }
+    // Iterate over features.
+    var featureIndexIdx = 0
+    while (featureIndexIdx < numFeaturesPerNode) {
+      val featureIndex = if (featuresForNode.nonEmpty) {
+        featuresForNode.get.apply(featureIndexIdx)
+      } else {
+        featureIndexIdx
+      }
+      if (unorderedFeatures.contains(featureIndex)) {
+        // Unordered feature
+        val featureValue = treePoint.binnedFeatures(featureIndex)
+        val (leftNodeFeatureOffset, rightNodeFeatureOffset) =
+          agg.getLeftRightFeatureOffsets(featureIndexIdx)
+        // Update the left or right bin for each split.
+        val numSplits = agg.metadata.numSplits(featureIndex)
+        var splitIndex = 0
+        while (splitIndex < numSplits) {
+          if (bins(featureIndex)(splitIndex).highSplit.categories.contains(featureValue)) {
+            agg.featureUpdate(leftNodeFeatureOffset, splitIndex, treePoint.label,
+              instanceWeight)
+          } else {
+            agg.featureUpdate(rightNodeFeatureOffset, splitIndex, treePoint.label,
+              instanceWeight)
+          }
+          splitIndex += 1
+        }
+      } else {
+        // Ordered feature
+        val binIndex = treePoint.binnedFeatures(featureIndex)
+        agg.update(featureIndexIdx, binIndex, treePoint.label, instanceWeight)
+      }
+      featureIndexIdx += 1
+    }
+  }
+
+  /**
+   * Helper for binSeqOp, for regression and for classification with only ordered features.
+   *
+   * For each feature, the sufficient statistics of one bin are updated.
+   *
+   * @param agg  Array storing aggregate calculation, with a set of sufficient statistics for
+   *             each (feature, bin).
+   * @param treePoint  Data point being aggregated.
+   * @param instanceWeight  Weight (importance) of instance in dataset.
+   */
+  private def orderedBinSeqOp(
+      agg: DTStatsAggregator,
+      treePoint: TreePoint,
+      instanceWeight: Double,
+      featuresForNode: Option[Array[Int]]): Unit = {
+    val label = treePoint.label
+
+    // Iterate over features.
+    if (featuresForNode.nonEmpty) {
+      // Use subsampled features
+      var featureIndexIdx = 0
+      while (featureIndexIdx < featuresForNode.get.size) {
+        val binIndex = treePoint.binnedFeatures(featuresForNode.get.apply(featureIndexIdx))
+        agg.update(featureIndexIdx, binIndex, label, instanceWeight)
+        featureIndexIdx += 1
       }
-      bestSplits
     } else {
-      findBestSplitsPerGroup(input, parentImpurities, strategy, level, filters, splits, bins)
+      // Use all features
+      val numFeatures = agg.metadata.numFeatures
+      var featureIndex = 0
+      while (featureIndex < numFeatures) {
+        val binIndex = treePoint.binnedFeatures(featureIndex)
+        agg.update(featureIndex, binIndex, label, instanceWeight)
+        featureIndex += 1
+      }
     }
   }
 
-    /**
-   * Returns an array of optimal splits for a group of nodes at a given level
+  /**
+   * Given a group of nodes, this finds the best split for each node.
    *
-   * @param input RDD of [[org.apache.spark.mllib.regression.LabeledPoint]] used as training data
-   *              for DecisionTree
-   * @param parentImpurities Impurities for all parent nodes for the current level
-   * @param strategy [[org.apache.spark.mllib.tree.configuration.Strategy]] instance containing
-   *                parameters for construction the DecisionTree
-   * @param level Level of the tree
-   * @param filters Filters for all nodes at a given level
-   * @param splits possible splits for all features
-   * @param bins possible bins for all features
-   * @param numGroups total number of node groups at the current level. Default value is set to 1.
-   * @param groupIndex index of the node group being processed. Default value is set to 0.
-   * @return array of splits with best splits for all nodes at a given level.
+   * @param input Training data: RDD of [[org.apache.spark.mllib.tree.impl.TreePoint]]
+   * @param metadata Learning and dataset metadata
+   * @param topNodes Root node for each tree.  Used for matching instances with nodes.
+   * @param nodesForGroup Mapping: treeIndex --> nodes to be split in tree
+   * @param treeToNodeToIndexInfo Mapping: treeIndex --> nodeIndex --> nodeIndexInfo,
+   *                              where nodeIndexInfo stores the index in the group and the
+   *                              feature subsets (if using feature subsets).
+   * @param splits possible splits for all features, indexed (numFeatures)(numSplits)
+   * @param bins possible bins for all features, indexed (numFeatures)(numBins)
+   * @param nodeQueue  Queue of nodes to split, with values (treeIndex, node).
+   *                   Updated with new non-leaf nodes which are created.
+   * @param nodeIdCache Node Id cache containing an RDD of Array[Int] where
+   *                    each value in the array is the data point's node Id
+   *                    for a corresponding tree. This is used to prevent the need
+   *                    to pass the entire tree to the executors during
+   *                    the node stat aggregation phase.
    */
-  private def findBestSplitsPerGroup(
-      input: RDD[LabeledPoint],
-      parentImpurities: Array[Double],
-      strategy: Strategy,
-      level: Int,
-      filters: Array[List[Filter]],
+  private[tree] def findBestSplits(
+      input: RDD[BaggedPoint[TreePoint]],
+      metadata: DecisionTreeMetadata,
+      topNodes: Array[Node],
+      nodesForGroup: Map[Int, Array[Node]],
+      treeToNodeToIndexInfo: Map[Int, Map[Int, NodeIndexInfo]],
       splits: Array[Array[Split]],
       bins: Array[Array[Bin]],
-      numGroups: Int = 1,
-      groupIndex: Int = 0): Array[(Split, InformationGainStats)] = {
+      nodeQueue: mutable.Queue[(Int, Node)],
+      timer: TimeTracker = new TimeTracker,
+      nodeIdCache: Option[NodeIdCache] = None): Unit = {
 
     /*
-     * The high-level description for the best split optimizations are noted here.
+     * The high-level descriptions of the best split optimizations are noted here.
      *
-     * *Level-wise training*
-     * We perform bin calculations for all nodes at the given level to avoid making multiple
-     * passes over the data. Thus, for a slightly increased computation and storage cost we save
-     * several iterations over the data especially at higher levels of the decision tree.
+     * *Group-wise training*
+     * We perform bin calculations for groups of nodes to reduce the number of
+     * passes over the data.  Each iteration requires more computation and storage,
+     * but saves several iterations over the data.
      *
      * *Bin-wise computation*
      * We use a bin-wise best split computation strategy instead of a straightforward best split
      * computation strategy. Instead of analyzing each sample for contribution to the left/right
      * child node impurity of every split, we first categorize each feature of a sample into a
-     * bin. Each bin is an interval between a low and high split. Since each splits, and thus bin,
-     * is ordered (read ordering for categorical variables in the findSplitsBins method),
-     * we exploit this structure to calculate aggregates for bins and then use these aggregates
+     * bin. We exploit this structure to calculate aggregates for bins and then use these aggregates
      * to calculate information gain for each split.
      *
      * *Aggregation over partitions*
@@ -373,738 +477,702 @@ object DecisionTree extends Serializable with Logging {
      * drastically reduce the communication overhead.
      */
 
-    // common calculations for multiple nested methods
-    val numNodes = math.pow(2, level).toInt / numGroups
+    // numNodes:  Number of nodes in this group
+    val numNodes = nodesForGroup.values.map(_.size).sum
     logDebug("numNodes = " + numNodes)
-    // Find the number of features by looking at the first sample.
-    val numFeatures = input.first().features.size
-    logDebug("numFeatures = " + numFeatures)
-    val numBins = bins(0).length
-    logDebug("numBins = " + numBins)
-
-    // shift when more than one group is used at deep tree level
-    val groupShift = numNodes * groupIndex
-
-    /** Find the filters used before reaching the current code. */
-    def findParentFilters(nodeIndex: Int): List[Filter] = {
-      if (level == 0) {
-        List[Filter]()
-      } else {
-        val nodeFilterIndex = math.pow(2, level).toInt - 1 + nodeIndex + groupShift
-        filters(nodeFilterIndex)
-      }
-    }
+    logDebug("numFeatures = " + metadata.numFeatures)
+    logDebug("numClasses = " + metadata.numClasses)
+    logDebug("isMulticlass = " + metadata.isMulticlass)
+    logDebug("isMulticlassWithCategoricalFeatures = " +
+      metadata.isMulticlassWithCategoricalFeatures)
+    logDebug("using nodeIdCache = " + nodeIdCache.nonEmpty.toString)
 
     /**
-     * Find whether the sample is valid input for the current node, i.e., whether it passes through
-     * all the filters for the current node.
+     * Performs a sequential aggregation over a partition for a particular tree and node.
+     *
+     * For each feature, the aggregate sufficient statistics are updated for the relevant
+     * bins.
+     *
+     * @param treeIndex Index of the tree that we want to perform aggregation for.
+     * @param nodeInfo The node info for the tree node.
+     * @param agg Array storing aggregate calculation, with a set of sufficient statistics
+     *            for each (node, feature, bin).
+     * @param baggedPoint Data point being aggregated.
      */
-    def isSampleValid(parentFilters: List[Filter], labeledPoint: LabeledPoint): Boolean = {
-      // leaf
-      if ((level > 0) && (parentFilters.length == 0)) {
-        return false
-      }
-
-      // Apply each filter and check sample validity. Return false when invalid condition found.
-      for (filter <- parentFilters) {
-        val features = labeledPoint.features
-        val featureIndex = filter.split.feature
-        val threshold = filter.split.threshold
-        val comparison = filter.comparison
-        val categories = filter.split.categories
-        val isFeatureContinuous = filter.split.featureType == Continuous
-        val feature =  features(featureIndex)
-        if (isFeatureContinuous) {
-          comparison match {
-            case -1 => if (feature > threshold) return false
-            case 1 => if (feature <= threshold) return false
-          }
+    def nodeBinSeqOp(
+        treeIndex: Int,
+        nodeInfo: RandomForest.NodeIndexInfo,
+        agg: Array[DTStatsAggregator],
+        baggedPoint: BaggedPoint[TreePoint]): Unit = {
+      if (nodeInfo != null) {
+        val aggNodeIndex = nodeInfo.nodeIndexInGroup
+        val featuresForNode = nodeInfo.featureSubset
+        val instanceWeight = baggedPoint.subsampleWeights(treeIndex)
+        if (metadata.unorderedFeatures.isEmpty) {
+          orderedBinSeqOp(agg(aggNodeIndex), baggedPoint.datum, instanceWeight, featuresForNode)
         } else {
-          val containsFeature = categories.contains(feature)
-          comparison match {
-            case -1 => if (!containsFeature) return false
-            case 1 => if (containsFeature) return false
-          }
-
+          mixedBinSeqOp(agg(aggNodeIndex), baggedPoint.datum, bins, metadata.unorderedFeatures,
+            instanceWeight, featuresForNode)
         }
       }
-
-      // Return true when the sample is valid for all filters.
-      true
     }
 
     /**
-     * Find bin for one feature.
+     * Performs a sequential aggregation over a partition.
+     *
+     * Each data point contributes to one node. For each feature,
+     * the aggregate sufficient statistics are updated for the relevant bins.
+     *
+     * @param agg  Array storing aggregate calculation, with a set of sufficient statistics for
+     *             each (node, feature, bin).
+     * @param baggedPoint   Data point being aggregated.
+     * @return  agg
      */
-    def findBin(
-        featureIndex: Int,
-        labeledPoint: LabeledPoint,
-        isFeatureContinuous: Boolean): Int = {
-      val binForFeatures = bins(featureIndex)
-      val feature = labeledPoint.features(featureIndex)
-
-      /**
-       * Binary search helper method for continuous feature.
-       */
-      def binarySearchForBins(): Int = {
-        var left = 0
-        var right = binForFeatures.length - 1
-        while (left <= right) {
-          val mid = left + (right - left) / 2
-          val bin = binForFeatures(mid)
-          val lowThreshold = bin.lowSplit.threshold
-          val highThreshold = bin.highSplit.threshold
-          if ((lowThreshold < feature) && (highThreshold >= feature)){
-            return mid
-          }
-          else if (lowThreshold >= feature) {
-            right = mid - 1
-          }
-          else {
-            left = mid + 1
-          }
-        }
-        -1
+    def binSeqOp(
+        agg: Array[DTStatsAggregator],
+        baggedPoint: BaggedPoint[TreePoint]): Array[DTStatsAggregator] = {
+      treeToNodeToIndexInfo.foreach { case (treeIndex, nodeIndexToInfo) =>
+        val nodeIndex = predictNodeIndex(topNodes(treeIndex), baggedPoint.datum.binnedFeatures,
+          bins, metadata.unorderedFeatures)
+        nodeBinSeqOp(treeIndex, nodeIndexToInfo.getOrElse(nodeIndex, null), agg, baggedPoint)
       }
 
-      /**
-       * Sequential search helper method to find bin for categorical feature.
-       */
-      def sequentialBinSearchForCategoricalFeature(): Int = {
-        val numCategoricalBins = strategy.categoricalFeaturesInfo(featureIndex)
-        var binIndex = 0
-        while (binIndex < numCategoricalBins) {
-          val bin = bins(featureIndex)(binIndex)
-          val category = bin.category
-          val features = labeledPoint.features
-          if (category == features(featureIndex)) {
-            return binIndex
-          }
-          binIndex += 1
-        }
-        -1
-      }
-
-      if (isFeatureContinuous) {
-        // Perform binary search for finding bin for continuous features.
-        val binIndex = binarySearchForBins()
-        if (binIndex == -1){
-          throw new UnknownError("no bin was found for continuous variable.")
-        }
-        binIndex
-      } else {
-        // Perform sequential search to find bin for categorical features.
-        val binIndex = sequentialBinSearchForCategoricalFeature()
-        if (binIndex == -1){
-          throw new UnknownError("no bin was found for categorical variable.")
-        }
-        binIndex
-      }
+      agg
     }
 
     /**
-     * Finds bins for all nodes (and all features) at a given level.
-     * For l nodes, k features the storage is as follows:
-     * label, b_11, b_12, .. , b_1k, b_21, b_22, .. , b_2k, b_l1, b_l2, .. , b_lk,
-     * where b_ij is an integer between 0 and numBins - 1.
-     * Invalid sample is denoted by noting bin for feature 1 as -1.
+     * Do the same thing as binSeqOp, but with nodeIdCache.
      */
-    def findBinsForLevel(labeledPoint: LabeledPoint): Array[Double] = {
-      // Calculate bin index and label per feature per node.
-      val arr = new Array[Double](1 + (numFeatures * numNodes))
-      arr(0) = labeledPoint.label
-      var nodeIndex = 0
-      while (nodeIndex < numNodes) {
-        val parentFilters = findParentFilters(nodeIndex)
-        // Find out whether the sample qualifies for the particular node.
-        val sampleValid = isSampleValid(parentFilters, labeledPoint)
-        val shift = 1 + numFeatures * nodeIndex
-        if (!sampleValid) {
-          // Mark one bin as -1 is sufficient.
-          arr(shift) = InvalidBinIndex
-        } else {
-          var featureIndex = 0
-          while (featureIndex < numFeatures) {
-            val isFeatureContinuous = strategy.categoricalFeaturesInfo.get(featureIndex).isEmpty
-            arr(shift + featureIndex) = findBin(featureIndex, labeledPoint,isFeatureContinuous)
-            featureIndex += 1
-          }
-        }
-        nodeIndex += 1
+    def binSeqOpWithNodeIdCache(
+        agg: Array[DTStatsAggregator],
+        dataPoint: (BaggedPoint[TreePoint], Array[Int])): Array[DTStatsAggregator] = {
+      treeToNodeToIndexInfo.foreach { case (treeIndex, nodeIndexToInfo) =>
+        val baggedPoint = dataPoint._1
+        val nodeIdCache = dataPoint._2
+        val nodeIndex = nodeIdCache(treeIndex)
+        nodeBinSeqOp(treeIndex, nodeIndexToInfo.getOrElse(nodeIndex, null), agg, baggedPoint)
       }
-      arr
+
+      agg
     }
 
     /**
-     * Performs a sequential aggregation over a partition for classification. For l nodes,
-     * k features, either the left count or the right count of one of the p bins is
-     * incremented based upon whether the feature is classified as 0 or 1.
-     *
-     * @param agg Array[Double] storing aggregate calculation of size
-     *            2 * numSplits * numFeatures*numNodes for classification
-     * @param arr Array[Double] of size 1 + (numFeatures * numNodes)
-     * @return Array[Double] storing aggregate calculation of size
-     *         2 * numSplits * numFeatures * numNodes for classification
+     * Get node index in group --> features indices map,
+     * which is a short cut to find feature indices for a node given node index in group
+     * @param treeToNodeToIndexInfo
+     * @return
      */
-    def classificationBinSeqOp(arr: Array[Double], agg: Array[Double]) {
-      // Iterate over all nodes.
-      var nodeIndex = 0
-      while (nodeIndex < numNodes) {
-        // Check whether the instance was valid for this nodeIndex.
-        val validSignalIndex = 1 + numFeatures * nodeIndex
-        val isSampleValidForNode = arr(validSignalIndex) != InvalidBinIndex
-        if (isSampleValidForNode) {
-          // actual class label
-          val label = arr(0)
-          // Iterate over all features.
-          var featureIndex = 0
-          while (featureIndex < numFeatures) {
-            // Find the bin index for this feature.
-            val arrShift = 1 + numFeatures * nodeIndex
-            val arrIndex = arrShift + featureIndex
-            // Update the left or right count for one bin.
-            val aggShift = 2 * numBins * numFeatures * nodeIndex
-            val aggIndex = aggShift + 2 * featureIndex * numBins + arr(arrIndex).toInt * 2
-            label match {
-              case 0.0 => agg(aggIndex) = agg(aggIndex) + 1
-              case 1.0 => agg(aggIndex + 1) = agg(aggIndex + 1) + 1
-            }
-            featureIndex += 1
-          }
+    def getNodeToFeatures(treeToNodeToIndexInfo: Map[Int, Map[Int, NodeIndexInfo]])
+      : Option[Map[Int, Array[Int]]] = if (!metadata.subsamplingFeatures) {
+      None
+    } else {
+      val mutableNodeToFeatures = new mutable.HashMap[Int, Array[Int]]()
+      treeToNodeToIndexInfo.values.foreach { nodeIdToNodeInfo =>
+        nodeIdToNodeInfo.values.foreach { nodeIndexInfo =>
+          assert(nodeIndexInfo.featureSubset.isDefined)
+          mutableNodeToFeatures(nodeIndexInfo.nodeIndexInGroup) = nodeIndexInfo.featureSubset.get
         }
-        nodeIndex += 1
       }
+      Some(mutableNodeToFeatures.toMap)
     }
 
-    /**
-     * Performs a sequential aggregation over a partition for regression. For l nodes, k features,
-     * the count, sum, sum of squares of one of the p bins is incremented.
-     *
-     * @param agg Array[Double] storing aggregate calculation of size
-     *            3 * numSplits * numFeatures * numNodes for classification
-     * @param arr Array[Double] of size 1 + (numFeatures * numNodes)
-     * @return Array[Double] storing aggregate calculation of size
-     *         3 * numSplits * numFeatures * numNodes for regression
-     */
-    def regressionBinSeqOp(arr: Array[Double], agg: Array[Double]) {
-      // Iterate over all nodes.
-      var nodeIndex = 0
-      while (nodeIndex < numNodes) {
-        // Check whether the instance was valid for this nodeIndex.
-        val validSignalIndex = 1 + numFeatures * nodeIndex
-        val isSampleValidForNode = arr(validSignalIndex) != InvalidBinIndex
-        if (isSampleValidForNode) {
-          // actual class label
-          val label = arr(0)
-          // Iterate over all features.
-          var featureIndex = 0
-          while (featureIndex < numFeatures) {
-            // Find the bin index for this feature.
-            val arrShift = 1 + numFeatures * nodeIndex
-            val arrIndex = arrShift + featureIndex
-            // Update count, sum, and sum^2 for one bin.
-            val aggShift = 3 * numBins * numFeatures * nodeIndex
-            val aggIndex = aggShift + 3 * featureIndex * numBins + arr(arrIndex).toInt * 3
-            agg(aggIndex) = agg(aggIndex) + 1
-            agg(aggIndex + 1) = agg(aggIndex + 1) + label
-            agg(aggIndex + 2) = agg(aggIndex + 2) + label*label
-            featureIndex += 1
-          }
-        }
-        nodeIndex += 1
+    // array of nodes to train indexed by node index in group
+    val nodes = new Array[Node](numNodes)
+    nodesForGroup.foreach { case (treeIndex, nodesForTree) =>
+      nodesForTree.foreach { node =>
+        nodes(treeToNodeToIndexInfo(treeIndex)(node.id).nodeIndexInGroup) = node
       }
     }
 
-    /**
-     * Performs a sequential aggregation over a partition.
-     */
-    def binSeqOp(agg: Array[Double], arr: Array[Double]): Array[Double] = {
-      strategy.algo match {
-        case Classification => classificationBinSeqOp(arr, agg)
-        case Regression => regressionBinSeqOp(arr, agg)
+    // Calculate best splits for all nodes in the group
+    timer.start("chooseSplits")
+
+    // In each partition, iterate all instances and compute aggregate stats for each node,
+    // yield an (nodeIndex, nodeAggregateStats) pair for each node.
+    // After a `reduceByKey` operation,
+    // stats of a node will be shuffled to a particular partition and be combined together,
+    // then best splits for nodes are found there.
+    // Finally, only best Splits for nodes are collected to driver to construct decision tree.
+    val nodeToFeatures = getNodeToFeatures(treeToNodeToIndexInfo)
+    val nodeToFeaturesBc = input.sparkContext.broadcast(nodeToFeatures)
+
+    val partitionAggregates : RDD[(Int, DTStatsAggregator)] = if (nodeIdCache.nonEmpty) {
+      input.zip(nodeIdCache.get.nodeIdsForInstances).mapPartitions { points =>
+        // Construct a nodeStatsAggregators array to hold node aggregate stats,
+        // each node will have a nodeStatsAggregator
+        val nodeStatsAggregators = Array.tabulate(numNodes) { nodeIndex =>
+          val featuresForNode = nodeToFeaturesBc.value.flatMap { nodeToFeatures =>
+            Some(nodeToFeatures(nodeIndex))
+          }
+          new DTStatsAggregator(metadata, featuresForNode)
+        }
+
+        // iterator all instances in current partition and update aggregate stats
+        points.foreach(binSeqOpWithNodeIdCache(nodeStatsAggregators, _))
+
+        // transform nodeStatsAggregators array to (nodeIndex, nodeAggregateStats) pairs,
+        // which can be combined with other partition using `reduceByKey`
+        nodeStatsAggregators.view.zipWithIndex.map(_.swap).iterator
       }
-      agg
-    }
+    } else {
+      input.mapPartitions { points =>
+        // Construct a nodeStatsAggregators array to hold node aggregate stats,
+        // each node will have a nodeStatsAggregator
+        val nodeStatsAggregators = Array.tabulate(numNodes) { nodeIndex =>
+          val featuresForNode = nodeToFeaturesBc.value.flatMap { nodeToFeatures =>
+            Some(nodeToFeatures(nodeIndex))
+          }
+          new DTStatsAggregator(metadata, featuresForNode)
+        }
 
-    // Calculate bin aggregate length for classification or regression.
-    val binAggregateLength = strategy.algo match {
-      case Classification => 2 * numBins * numFeatures * numNodes
-      case Regression =>  3 * numBins * numFeatures * numNodes
-    }
-    logDebug("binAggregateLength = " + binAggregateLength)
+        // iterator all instances in current partition and update aggregate stats
+        points.foreach(binSeqOp(nodeStatsAggregators, _))
 
-    /**
-     * Combines the aggregates from partitions.
-     * @param agg1 Array containing aggregates from one or more partitions
-     * @param agg2 Array containing aggregates from one or more partitions
-     * @return Combined aggregate from agg1 and agg2
-     */
-    def binCombOp(agg1: Array[Double], agg2: Array[Double]): Array[Double] = {
-      var index = 0
-      val combinedAggregate = new Array[Double](binAggregateLength)
-      while (index < binAggregateLength) {
-        combinedAggregate(index) = agg1(index) + agg2(index)
-        index += 1
+        // transform nodeStatsAggregators array to (nodeIndex, nodeAggregateStats) pairs,
+        // which can be combined with other partition using `reduceByKey`
+        nodeStatsAggregators.view.zipWithIndex.map(_.swap).iterator
       }
-      combinedAggregate
     }
 
-    // Find feature bins for all nodes at a level.
-    val binMappedRDD = input.map(x => findBinsForLevel(x))
+    val nodeToBestSplits = partitionAggregates.reduceByKey((a, b) => a.merge(b))
+        .map { case (nodeIndex, aggStats) =>
+          val featuresForNode = nodeToFeaturesBc.value.flatMap { nodeToFeatures =>
+            Some(nodeToFeatures(nodeIndex))
+          }
+
+          // find best split for each node
+          val (split: Split, stats: InformationGainStats, predict: Predict) =
+            binsToBestSplit(aggStats, splits, featuresForNode, nodes(nodeIndex))
+          (nodeIndex, (split, stats, predict))
+        }.collectAsMap()
 
-    // Calculate bin aggregates.
-    val binAggregates = {
-      binMappedRDD.aggregate(Array.fill[Double](binAggregateLength)(0))(binSeqOp,binCombOp)
+    timer.stop("chooseSplits")
+
+    val nodeIdUpdaters = if (nodeIdCache.nonEmpty) {
+      Array.fill[mutable.Map[Int, NodeIndexUpdater]](
+        metadata.numTrees)(mutable.Map[Int, NodeIndexUpdater]())
+    } else {
+      null
     }
-    logDebug("binAggregates.length = " + binAggregates.length)
 
-    /**
-     * Calculates the information gain for all splits based upon left/right split aggregates.
-     * @param leftNodeAgg left node aggregates
-     * @param featureIndex feature index
-     * @param splitIndex split index
-     * @param rightNodeAgg right node aggregate
-     * @param topImpurity impurity of the parent node
-     * @return information gain and statistics for all splits
-     */
-    def calculateGainForSplit(
-        leftNodeAgg: Array[Array[Double]],
-        featureIndex: Int,
-        splitIndex: Int,
-        rightNodeAgg: Array[Array[Double]],
-        topImpurity: Double): InformationGainStats = {
-      strategy.algo match {
-        case Classification =>
-          val left0Count = leftNodeAgg(featureIndex)(2 * splitIndex)
-          val left1Count = leftNodeAgg(featureIndex)(2 * splitIndex + 1)
-          val leftCount = left0Count + left1Count
-
-          val right0Count = rightNodeAgg(featureIndex)(2 * splitIndex)
-          val right1Count = rightNodeAgg(featureIndex)(2 * splitIndex + 1)
-          val rightCount = right0Count + right1Count
-
-          val impurity = {
-            if (level > 0) {
-              topImpurity
-            } else {
-              // Calculate impurity for root node.
-              strategy.impurity.calculate(left0Count + right0Count, left1Count + right1Count)
-            }
+    // Iterate over all nodes in this group.
+    nodesForGroup.foreach { case (treeIndex, nodesForTree) =>
+      nodesForTree.foreach { node =>
+        val nodeIndex = node.id
+        val nodeInfo = treeToNodeToIndexInfo(treeIndex)(nodeIndex)
+        val aggNodeIndex = nodeInfo.nodeIndexInGroup
+        val (split: Split, stats: InformationGainStats, predict: Predict) =
+          nodeToBestSplits(aggNodeIndex)
+        logDebug("best split = " + split)
+
+        // Extract info for this node.  Create children if not leaf.
+        val isLeaf = (stats.gain <= 0) || (Node.indexToLevel(nodeIndex) == metadata.maxDepth)
+        assert(node.id == nodeIndex)
+        node.predict = predict
+        node.isLeaf = isLeaf
+        node.stats = Some(stats)
+        node.impurity = stats.impurity
+        logDebug("Node = " + node)
+
+        if (!isLeaf) {
+          node.split = Some(split)
+          val childIsLeaf = (Node.indexToLevel(nodeIndex) + 1) == metadata.maxDepth
+          val leftChildIsLeaf = childIsLeaf || (stats.leftImpurity == 0.0)
+          val rightChildIsLeaf = childIsLeaf || (stats.rightImpurity == 0.0)
+          node.leftNode = Some(Node(Node.leftChildIndex(nodeIndex),
+            stats.leftPredict, stats.leftImpurity, leftChildIsLeaf))
+          node.rightNode = Some(Node(Node.rightChildIndex(nodeIndex),
+            stats.rightPredict, stats.rightImpurity, rightChildIsLeaf))
+
+          if (nodeIdCache.nonEmpty) {
+            val nodeIndexUpdater = NodeIndexUpdater(
+              split = split,
+              nodeIndex = nodeIndex)
+            nodeIdUpdaters(treeIndex).put(nodeIndex, nodeIndexUpdater)
           }
 
-          if (leftCount == 0) {
-            return new InformationGainStats(0, topImpurity, Double.MinValue, topImpurity,1)
+          // enqueue left child and right child if they are not leaves
+          if (!leftChildIsLeaf) {
+            nodeQueue.enqueue((treeIndex, node.leftNode.get))
           }
-          if (rightCount == 0) {
-            return new InformationGainStats(0, topImpurity, topImpurity, Double.MinValue,0)
+          if (!rightChildIsLeaf) {
+            nodeQueue.enqueue((treeIndex, node.rightNode.get))
           }
 
-          val leftImpurity = strategy.impurity.calculate(left0Count, left1Count)
-          val rightImpurity = strategy.impurity.calculate(right0Count, right1Count)
+          logDebug("leftChildIndex = " + node.leftNode.get.id +
+            ", impurity = " + stats.leftImpurity)
+          logDebug("rightChildIndex = " + node.rightNode.get.id +
+            ", impurity = " + stats.rightImpurity)
+        }
+      }
+    }
 
-          val leftWeight = leftCount.toDouble / (leftCount + rightCount)
-          val rightWeight = rightCount.toDouble / (leftCount + rightCount)
+    if (nodeIdCache.nonEmpty) {
+      // Update the cache if needed.
+      nodeIdCache.get.updateNodeIndices(input, nodeIdUpdaters, bins)
+    }
+  }
 
-          val gain = {
-            if (level > 0) {
-              impurity - leftWeight * leftImpurity - rightWeight * rightImpurity
-            } else {
-              impurity - leftWeight * leftImpurity - rightWeight * rightImpurity
-            }
-          }
+  /**
+   * Calculate the information gain for a given (feature, split) based upon left/right aggregates.
+   * @param leftImpurityCalculator left node aggregates for this (feature, split)
+   * @param rightImpurityCalculator right node aggregate for this (feature, split)
+   * @return information gain and statistics for split
+   */
+  private def calculateGainForSplit(
+      leftImpurityCalculator: ImpurityCalculator,
+      rightImpurityCalculator: ImpurityCalculator,
+      metadata: DecisionTreeMetadata,
+      impurity: Double): InformationGainStats = {
+    val leftCount = leftImpurityCalculator.count
+    val rightCount = rightImpurityCalculator.count
+
+    // If left child or right child doesn't satisfy minimum instances per node,
+    // then this split is invalid, return invalid information gain stats.
+    if ((leftCount < metadata.minInstancesPerNode) ||
+        (rightCount < metadata.minInstancesPerNode)) {
+      return InformationGainStats.invalidInformationGainStats
+    }
 
-          val predict = (left1Count + right1Count) / (leftCount + rightCount)
+    val totalCount = leftCount + rightCount
 
-          new InformationGainStats(gain, impurity, leftImpurity, rightImpurity, predict)
-        case Regression =>
-          val leftCount = leftNodeAgg(featureIndex)(3 * splitIndex)
-          val leftSum = leftNodeAgg(featureIndex)(3 * splitIndex + 1)
-          val leftSumSquares = leftNodeAgg(featureIndex)(3 * splitIndex + 2)
+    val leftImpurity = leftImpurityCalculator.calculate() // Note: This equals 0 if count = 0
+    val rightImpurity = rightImpurityCalculator.calculate()
 
-          val rightCount = rightNodeAgg(featureIndex)(3 * splitIndex)
-          val rightSum = rightNodeAgg(featureIndex)(3 * splitIndex + 1)
-          val rightSumSquares = rightNodeAgg(featureIndex)(3 * splitIndex + 2)
+    val leftWeight = leftCount / totalCount.toDouble
+    val rightWeight = rightCount / totalCount.toDouble
 
-          val impurity = {
-            if (level > 0) {
-              topImpurity
-            } else {
-              // Calculate impurity for root node.
-              val count = leftCount + rightCount
-              val sum = leftSum + rightSum
-              val sumSquares = leftSumSquares + rightSumSquares
-              strategy.impurity.calculate(count, sum, sumSquares)
-            }
-          }
+    val gain = impurity - leftWeight * leftImpurity - rightWeight * rightImpurity
 
-          if (leftCount == 0) {
-            return new InformationGainStats(0, topImpurity, Double.MinValue, topImpurity,
-              rightSum / rightCount)
-          }
-          if (rightCount == 0) {
-            return new InformationGainStats(0, topImpurity ,topImpurity,
-              Double.MinValue, leftSum / leftCount)
-          }
+    // if information gain doesn't satisfy minimum information gain,
+    // then this split is invalid, return invalid information gain stats.
+    if (gain < metadata.minInfoGain) {
+      return InformationGainStats.invalidInformationGainStats
+    }
 
-          val leftImpurity = strategy.impurity.calculate(leftCount, leftSum, leftSumSquares)
-          val rightImpurity = strategy.impurity.calculate(rightCount, rightSum, rightSumSquares)
+    // calculate left and right predict
+    val leftPredict = calculatePredict(leftImpurityCalculator)
+    val rightPredict = calculatePredict(rightImpurityCalculator)
 
-          val leftWeight = leftCount.toDouble / (leftCount + rightCount)
-          val rightWeight = rightCount.toDouble / (leftCount + rightCount)
+    new InformationGainStats(gain, impurity, leftImpurity, rightImpurity,
+      leftPredict, rightPredict)
+  }
 
-          val gain = {
-            if (level > 0) {
-              impurity - leftWeight * leftImpurity - rightWeight * rightImpurity
-            } else {
-              impurity - leftWeight * leftImpurity - rightWeight * rightImpurity
-            }
-          }
+  private def calculatePredict(impurityCalculator: ImpurityCalculator): Predict = {
+    val predict = impurityCalculator.predict
+    val prob = impurityCalculator.prob(predict)
+    new Predict(predict, prob)
+  }
 
-          val predict = (leftSum + rightSum) / (leftCount + rightCount)
-          new InformationGainStats(gain, impurity, leftImpurity, rightImpurity, predict)
-      }
-    }
+  /**
+   * Calculate predict value for current node, given stats of any split.
+   * Note that this function is called only once for each node.
+   * @param leftImpurityCalculator left node aggregates for a split
+   * @param rightImpurityCalculator right node aggregates for a split
+   * @return predict value and impurity for current node
+   */
+  private def calculatePredictImpurity(
+      leftImpurityCalculator: ImpurityCalculator,
+      rightImpurityCalculator: ImpurityCalculator): (Predict, Double) =  {
+    val parentNodeAgg = leftImpurityCalculator.copy
+    parentNodeAgg.add(rightImpurityCalculator)
+    val predict = calculatePredict(parentNodeAgg)
+    val impurity = parentNodeAgg.calculate()
+
+    (predict, impurity)
+  }
 
-    /**
-     * Extracts left and right split aggregates.
-     * @param binData Array[Double] of size 2*numFeatures*numSplits
-     * @return (leftNodeAgg, rightNodeAgg) tuple of type (Array[Double],
-     *         Array[Double]) where each array is of size(numFeature,2*(numSplits-1))
-     */
-    def extractLeftRightNodeAggregates(
-        binData: Array[Double]): (Array[Array[Double]], Array[Array[Double]]) = {
-      strategy.algo match {
-        case Classification =>
-          // Initialize left and right split aggregates.
-          val leftNodeAgg = Array.ofDim[Double](numFeatures, 2 * (numBins - 1))
-          val rightNodeAgg = Array.ofDim[Double](numFeatures, 2 * (numBins - 1))
-          // Iterate over all features.
-          var featureIndex = 0
-          while (featureIndex < numFeatures) {
-            // shift for this featureIndex
-            val shift = 2 * featureIndex * numBins
-
-            // left node aggregate for the lowest split
-            leftNodeAgg(featureIndex)(0) = binData(shift + 0)
-            leftNodeAgg(featureIndex)(1) = binData(shift + 1)
-
-            // right node aggregate for the highest split
-            rightNodeAgg(featureIndex)(2 * (numBins - 2))
-              = binData(shift + (2 * (numBins - 1)))
-            rightNodeAgg(featureIndex)(2 * (numBins - 2) + 1)
-              = binData(shift + (2 * (numBins - 1)) + 1)
-
-            // Iterate over all splits.
-            var splitIndex = 1
-            while (splitIndex < numBins - 1) {
-              // calculating left node aggregate for a split as a sum of left node aggregate of a
-              // lower split and the left bin aggregate of a bin where the split is a high split
-              leftNodeAgg(featureIndex)(2 * splitIndex) = binData(shift + 2 * splitIndex) +
-                leftNodeAgg(featureIndex)(2 * splitIndex - 2)
-              leftNodeAgg(featureIndex)(2 * splitIndex + 1) = binData(shift + 2 * splitIndex + 1) +
-                leftNodeAgg(featureIndex)(2 * splitIndex - 2 + 1)
-
-              // calculating right node aggregate for a split as a sum of right node aggregate of a
-              // higher split and the right bin aggregate of a bin where the split is a low split
-              rightNodeAgg(featureIndex)(2 * (numBins - 2 - splitIndex)) =
-                binData(shift + (2 *(numBins - 2 - splitIndex))) +
-                rightNodeAgg(featureIndex)(2 * (numBins - 1 - splitIndex))
-              rightNodeAgg(featureIndex)(2 * (numBins - 2 - splitIndex) + 1) =
-                binData(shift + (2* (numBins - 2 - splitIndex) + 1)) +
-                  rightNodeAgg(featureIndex)(2 * (numBins - 1 - splitIndex) + 1)
+  /**
+   * Find the best split for a node.
+   * @param binAggregates Bin statistics.
+   * @return tuple for best split: (Split, information gain, prediction at node)
+   */
+  private def binsToBestSplit(
+      binAggregates: DTStatsAggregator,
+      splits: Array[Array[Split]],
+      featuresForNode: Option[Array[Int]],
+      node: Node): (Split, InformationGainStats, Predict) = {
 
-              splitIndex += 1
+    // calculate predict and impurity if current node is top node
+    val level = Node.indexToLevel(node.id)
+    var predictWithImpurity: Option[(Predict, Double)] = if (level == 0) {
+      None
+    } else {
+      Some((node.predict, node.impurity))
+    }
+
+    // For each (feature, split), calculate the gain, and select the best (feature, split).
+    val (bestSplit, bestSplitStats) =
+      Range(0, binAggregates.metadata.numFeaturesPerNode).map { featureIndexIdx =>
+      val featureIndex = if (featuresForNode.nonEmpty) {
+        featuresForNode.get.apply(featureIndexIdx)
+      } else {
+        featureIndexIdx
+      }
+      val numSplits = binAggregates.metadata.numSplits(featureIndex)
+      if (binAggregates.metadata.isContinuous(featureIndex)) {
+        // Cumulative sum (scanLeft) of bin statistics.
+        // Afterwards, binAggregates for a bin is the sum of aggregates for
+        // that bin + all preceding bins.
+        val nodeFeatureOffset = binAggregates.getFeatureOffset(featureIndexIdx)
+        var splitIndex = 0
+        while (splitIndex < numSplits) {
+          binAggregates.mergeForFeature(nodeFeatureOffset, splitIndex + 1, splitIndex)
+          splitIndex += 1
+        }
+        // Find best split.
+        val (bestFeatureSplitIndex, bestFeatureGainStats) =
+          Range(0, numSplits).map { case splitIdx =>
+            val leftChildStats = binAggregates.getImpurityCalculator(nodeFeatureOffset, splitIdx)
+            val rightChildStats = binAggregates.getImpurityCalculator(nodeFeatureOffset, numSplits)
+            rightChildStats.subtract(leftChildStats)
+            predictWithImpurity = Some(predictWithImpurity.getOrElse(
+              calculatePredictImpurity(leftChildStats, rightChildStats)))
+            val gainStats = calculateGainForSplit(leftChildStats,
+              rightChildStats, binAggregates.metadata, predictWithImpurity.get._2)
+            (splitIdx, gainStats)
+          }.maxBy(_._2.gain)
+        (splits(featureIndex)(bestFeatureSplitIndex), bestFeatureGainStats)
+      } else if (binAggregates.metadata.isUnordered(featureIndex)) {
+        // Unordered categorical feature
+        val (leftChildOffset, rightChildOffset) =
+          binAggregates.getLeftRightFeatureOffsets(featureIndexIdx)
+        val (bestFeatureSplitIndex, bestFeatureGainStats) =
+          Range(0, numSplits).map { splitIndex =>
+            val leftChildStats = binAggregates.getImpurityCalculator(leftChildOffset, splitIndex)
+            val rightChildStats = binAggregates.getImpurityCalculator(rightChildOffset, splitIndex)
+            predictWithImpurity = Some(predictWithImpurity.getOrElse(
+              calculatePredictImpurity(leftChildStats, rightChildStats)))
+            val gainStats = calculateGainForSplit(leftChildStats,
+              rightChildStats, binAggregates.metadata, predictWithImpurity.get._2)
+            (splitIndex, gainStats)
+          }.maxBy(_._2.gain)
+        (splits(featureIndex)(bestFeatureSplitIndex), bestFeatureGainStats)
+      } else {
+        // Ordered categorical feature
+        val nodeFeatureOffset = binAggregates.getFeatureOffset(featureIndexIdx)
+        val numBins = binAggregates.metadata.numBins(featureIndex)
+
+        /* Each bin is one category (feature value).
+         * The bins are ordered based on centroidForCategories, and this ordering determines which
+         * splits are considered.  (With K categories, we consider K - 1 possible splits.)
+         *
+         * centroidForCategories is a list: (category, centroid)
+         */
+        val centroidForCategories = if (binAggregates.metadata.isMulticlass) {
+          // For categorical variables in multiclass classification,
+          // the bins are ordered by the impurity of their corresponding labels.
+          Range(0, numBins).map { case featureValue =>
+            val categoryStats = binAggregates.getImpurityCalculator(nodeFeatureOffset, featureValue)
+            val centroid = if (categoryStats.count != 0) {
+              categoryStats.calculate()
+            } else {
+              Double.MaxValue
             }
-            featureIndex += 1
+            (featureValue, centroid)
           }
-          (leftNodeAgg, rightNodeAgg)
-        case Regression =>
-          // Initialize left and right split aggregates.
-          val leftNodeAgg = Array.ofDim[Double](numFeatures, 3 * (numBins - 1))
-          val rightNodeAgg = Array.ofDim[Double](numFeatures, 3 * (numBins - 1))
-          // Iterate over all features.
-          var featureIndex = 0
-          while (featureIndex < numFeatures) {
-            // shift for this featureIndex
-            val shift = 3 * featureIndex * numBins
-            // left node aggregate for the lowest split
-            leftNodeAgg(featureIndex)(0) = binData(shift + 0)
-            leftNodeAgg(featureIndex)(1) = binData(shift + 1)
-            leftNodeAgg(featureIndex)(2) = binData(shift + 2)
-
-            // right node aggregate for the highest split
-            rightNodeAgg(featureIndex)(3 * (numBins - 2)) =
-              binData(shift + (3 * (numBins - 1)))
-            rightNodeAgg(featureIndex)(3 * (numBins - 2) + 1) =
-              binData(shift + (3 * (numBins - 1)) + 1)
-            rightNodeAgg(featureIndex)(3 * (numBins - 2) + 2) =
-              binData(shift + (3 * (numBins - 1)) + 2)
-
-            // Iterate over all splits.
-            var splitIndex = 1
-            while (splitIndex < numBins - 1) {
-              // calculating left node aggregate for a split as a sum of left node aggregate of a
-              // lower split and the left bin aggregate of a bin where the split is a high split
-              leftNodeAgg(featureIndex)(3 * splitIndex) = binData(shift + 3 * splitIndex) +
-                leftNodeAgg(featureIndex)(3 * splitIndex - 3)
-              leftNodeAgg(featureIndex)(3 * splitIndex + 1) = binData(shift + 3 * splitIndex + 1) +
-                leftNodeAgg(featureIndex)(3 * splitIndex - 3 + 1)
-              leftNodeAgg(featureIndex)(3 * splitIndex + 2) = binData(shift + 3 * splitIndex + 2) +
-                leftNodeAgg(featureIndex)(3 * splitIndex - 3 + 2)
-
-              // calculating right node aggregate for a split as a sum of right node aggregate of a
-              // higher split and the right bin aggregate of a bin where the split is a low split
-              rightNodeAgg(featureIndex)(3 * (numBins - 2 - splitIndex)) =
-                binData(shift + (3 * (numBins - 2 - splitIndex))) +
-                  rightNodeAgg(featureIndex)(3 * (numBins - 1 - splitIndex))
-              rightNodeAgg(featureIndex)(3 * (numBins - 2 - splitIndex) + 1) =
-                binData(shift + (3 * (numBins - 2 - splitIndex) + 1)) +
-                  rightNodeAgg(featureIndex)(3 * (numBins - 1 - splitIndex) + 1)
-              rightNodeAgg(featureIndex)(3 * (numBins - 2 - splitIndex) + 2) =
-                binData(shift + (3 * (numBins - 2 - splitIndex) + 2)) +
-                  rightNodeAgg(featureIndex)(3 * (numBins - 1 - splitIndex) + 2)
-
-              splitIndex += 1
+        } else { // regression or binary classification
+          // For categorical variables in regression and binary classification,
+          // the bins are ordered by the centroid of their corresponding labels.
+          Range(0, numBins).map { case featureValue =>
+            val categoryStats = binAggregates.getImpurityCalculator(nodeFeatureOffset, featureValue)
+            val centroid = if (categoryStats.count != 0) {
+              categoryStats.predict
+            } else {
+              Double.MaxValue
             }
-            featureIndex += 1
+            (featureValue, centroid)
           }
-          (leftNodeAgg, rightNodeAgg)
-      }
-    }
-
-    /**
-     * Calculates information gain for all nodes splits.
-     */
-    def calculateGainsForAllNodeSplits(
-        leftNodeAgg: Array[Array[Double]],
-        rightNodeAgg: Array[Array[Double]],
-        nodeImpurity: Double): Array[Array[InformationGainStats]] = {
-      val gains = Array.ofDim[InformationGainStats](numFeatures, numBins - 1)
-
-      for (featureIndex <- 0 until numFeatures) {
-        for (splitIndex <- 0 until numBins - 1) {
-          gains(featureIndex)(splitIndex) = calculateGainForSplit(leftNodeAgg, featureIndex,
-            splitIndex, rightNodeAgg, nodeImpurity)
         }
-      }
-      gains
-    }
-
-    /**
-     * Find the best split for a node.
-     * @param binData Array[Double] of size 2 * numSplits * numFeatures
-     * @param nodeImpurity impurity of the top node
-     * @return tuple of split and information gain
-     */
-    def binsToBestSplit(
-        binData: Array[Double],
-        nodeImpurity: Double): (Split, InformationGainStats) = {
 
-      logDebug("node impurity = " + nodeImpurity)
+        logDebug("Centroids for categorical variable: " + centroidForCategories.mkString(","))
 
-      // Extract left right node aggregates.
-      val (leftNodeAgg, rightNodeAgg) = extractLeftRightNodeAggregates(binData)
+        // bins sorted by centroids
+        val categoriesSortedByCentroid = centroidForCategories.toList.sortBy(_._2)
 
-      // Calculate gains for all splits.
-      val gains = calculateGainsForAllNodeSplits(leftNodeAgg, rightNodeAgg, nodeImpurity)
+        logDebug("Sorted centroids for categorical variable = " +
+          categoriesSortedByCentroid.mkString(","))
 
-      val (bestFeatureIndex,bestSplitIndex, gainStats) = {
-        // Initialize with infeasible values.
-        var bestFeatureIndex = Int.MinValue
-        var bestSplitIndex = Int.MinValue
-        var bestGainStats = new InformationGainStats(Double.MinValue, -1.0, -1.0, -1.0, -1.0)
-        // Iterate over features.
-        var featureIndex = 0
-        while (featureIndex < numFeatures) {
-          // Iterate over all splits.
-          var splitIndex = 0
-          while (splitIndex < numBins - 1) {
-            val gainStats = gains(featureIndex)(splitIndex)
-            if (gainStats.gain > bestGainStats.gain) {
-              bestGainStats = gainStats
-              bestFeatureIndex = featureIndex
-              bestSplitIndex = splitIndex
-            }
-            splitIndex += 1
-          }
-          featureIndex += 1
+        // Cumulative sum (scanLeft) of bin statistics.
+        // Afterwards, binAggregates for a bin is the sum of aggregates for
+        // that bin + all preceding bins.
+        var splitIndex = 0
+        while (splitIndex < numSplits) {
+          val currentCategory = categoriesSortedByCentroid(splitIndex)._1
+          val nextCategory = categoriesSortedByCentroid(splitIndex + 1)._1
+          binAggregates.mergeForFeature(nodeFeatureOffset, nextCategory, currentCategory)
+          splitIndex += 1
         }
-        (bestFeatureIndex, bestSplitIndex, bestGainStats)
+        // lastCategory = index of bin with total aggregates for this (node, feature)
+        val lastCategory = categoriesSortedByCentroid.last._1
+        // Find best split.
+        val (bestFeatureSplitIndex, bestFeatureGainStats) =
+          Range(0, numSplits).map { splitIndex =>
+            val featureValue = categoriesSortedByCentroid(splitIndex)._1
+            val leftChildStats =
+              binAggregates.getImpurityCalculator(nodeFeatureOffset, featureValue)
+            val rightChildStats =
+              binAggregates.getImpurityCalculator(nodeFeatureOffset, lastCategory)
+            rightChildStats.subtract(leftChildStats)
+            predictWithImpurity = Some(predictWithImpurity.getOrElse(
+              calculatePredictImpurity(leftChildStats, rightChildStats)))
+            val gainStats = calculateGainForSplit(leftChildStats,
+              rightChildStats, binAggregates.metadata, predictWithImpurity.get._2)
+            (splitIndex, gainStats)
+          }.maxBy(_._2.gain)
+        val categoriesForSplit =
+          categoriesSortedByCentroid.map(_._1.toDouble).slice(0, bestFeatureSplitIndex + 1)
+        val bestFeatureSplit =
+          new Split(featureIndex, Double.MinValue, Categorical, categoriesForSplit)
+        (bestFeatureSplit, bestFeatureGainStats)
       }
+    }.maxBy(_._2.gain)
 
-      logDebug("best split bin = " + bins(bestFeatureIndex)(bestSplitIndex))
-      logDebug("best split bin = " + splits(bestFeatureIndex)(bestSplitIndex))
-
-      (splits(bestFeatureIndex)(bestSplitIndex), gainStats)
-    }
-
-    /**
-     * Get bin data for one node.
-     */
-    def getBinDataForNode(node: Int): Array[Double] = {
-      strategy.algo match {
-        case Classification =>
-          val shift = 2 * node * numBins * numFeatures
-          val binsForNode = binAggregates.slice(shift, shift + 2 * numBins * numFeatures)
-          binsForNode
-        case Regression =>
-          val shift = 3 * node * numBins * numFeatures
-          val binsForNode = binAggregates.slice(shift, shift + 3 * numBins * numFeatures)
-          binsForNode
-      }
-    }
-
-    // Calculate best splits for all nodes at a given level
-    val bestSplits = new Array[(Split, InformationGainStats)](numNodes)
-    // Iterating over all nodes at this level
-    var node = 0
-    while (node < numNodes) {
-      val nodeImpurityIndex = math.pow(2, level).toInt - 1 + node + groupShift
-      val binsForNode: Array[Double] = getBinDataForNode(node)
-      logDebug("nodeImpurityIndex = " + nodeImpurityIndex)
-      val parentNodeImpurity = parentImpurities(nodeImpurityIndex)
-      logDebug("node impurity = " + parentNodeImpurity)
-      bestSplits(node) = binsToBestSplit(binsForNode, parentNodeImpurity)
-      node += 1
-    }
-
-    bestSplits
+    (bestSplit, bestSplitStats, predictWithImpurity.get._1)
   }
 
   /**
-   * Returns split and bins for decision tree calculation.
-   * @param input RDD of [[org.apache.spark.mllib.regression.LabeledPoint]] used as training data
-   *              for DecisionTree
-   * @param strategy [[org.apache.spark.mllib.tree.configuration.Strategy]] instance containing
-   *                parameters for construction the DecisionTree
-   * @return a tuple of (splits,bins) where splits is an Array of [org.apache.spark.mllib.tree
-   *         .model.Split] of size (numFeatures, numSplits-1) and bins is an Array of [org.apache
-   *         .spark.mllib.tree.model.Bin] of size (numFeatures, numSplits1)
+   * Returns splits and bins for decision tree calculation.
+   * Continuous and categorical features are handled differently.
+   *
+   * Continuous features:
+   *   For each feature, there are numBins - 1 possible splits representing the possible binary
+   *   decisions at each node in the tree.
+   *   This finds locations (feature values) for splits using a subsample of the data.
+   *
+   * Categorical features:
+   *   For each feature, there is 1 bin per split.
+   *   Splits and bins are handled in 2 ways:
+   *   (a) "unordered features"
+   *       For multiclass classification with a low-arity feature
+   *       (i.e., if isMulticlass && isSpaceSufficientForAllCategoricalSplits),
+   *       the feature is split based on subsets of categories.
+   *   (b) "ordered features"
+   *       For regression and binary classification,
+   *       and for multiclass classification with a high-arity feature,
+   *       there is one bin per category.
+   *
+   * @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
+   * @param metadata Learning and dataset metadata
+   * @return A tuple of (splits, bins).
+   *         Splits is an Array of [[org.apache.spark.mllib.tree.model.Split]]
+   *          of size (numFeatures, numSplits).
+   *         Bins is an Array of [[org.apache.spark.mllib.tree.model.Bin]]
+   *          of size (numFeatures, numBins).
    */
   protected[tree] def findSplitsBins(
       input: RDD[LabeledPoint],
-      strategy: Strategy): (Array[Array[Split]], Array[Array[Bin]]) = {
-    val count = input.count()
+      metadata: DecisionTreeMetadata): (Array[Array[Split]], Array[Array[Bin]]) = {
 
-    // Find the number of features by looking at the first sample
-    val numFeatures = input.take(1)(0).features.size
+    logDebug("isMulticlass = " + metadata.isMulticlass)
 
-    val maxBins = strategy.maxBins
-    val numBins = if (maxBins <= count) maxBins else count.toInt
-    logDebug("numBins = " + numBins)
+    val numFeatures = metadata.numFeatures
 
-    /*
-     * TODO: Add a require statement ensuring #bins is always greater than the categories.
-     * It's a limitation of the current implementation but a reasonable trade-off since features
-     * with large number of categories get favored over continuous features.
-     */
-    if (strategy.categoricalFeaturesInfo.size > 0) {
-      val maxCategoriesForFeatures = strategy.categoricalFeaturesInfo.maxBy(_._2)._2
-      require(numBins >= maxCategoriesForFeatures)
+    // Sample the input only if there are continuous features.
+    val hasContinuousFeatures = Range(0, numFeatures).exists(metadata.isContinuous)
+    val sampledInput = if (hasContinuousFeatures) {
+      // Calculate the number of samples for approximate quantile calculation.
+      val requiredSamples = math.max(metadata.maxBins * metadata.maxBins, 10000)
+      val fraction = if (requiredSamples < metadata.numExamples) {
+        requiredSamples.toDouble / metadata.numExamples
+      } else {
+        1.0
+      }
+      logDebug("fraction of data used for calculating quantiles = " + fraction)
+      input.sample(withReplacement = false, fraction, new XORShiftRandom().nextInt()).collect()
+    } else {
+      new Array[LabeledPoint](0)
     }
 
-    // Calculate the number of sample for approximate quantile calculation.
-    val requiredSamples = numBins*numBins
-    val fraction = if (requiredSamples < count) requiredSamples.toDouble / count else 1.0
-    logDebug("fraction of data used for calculating quantiles = " + fraction)
-
-    // sampled input for RDD calculation
-    val sampledInput = input.sample(false, fraction, new XORShiftRandom().nextInt()).collect()
-    val numSamples = sampledInput.length
-
-    val stride: Double = numSamples.toDouble / numBins
-    logDebug("stride = " + stride)
-
-    strategy.quantileCalculationStrategy match {
+    metadata.quantileStrategy match {
       case Sort =>
-        val splits = Array.ofDim[Split](numFeatures, numBins - 1)
-        val bins = Array.ofDim[Bin](numFeatures, numBins)
+        val splits = new Array[Array[Split]](numFeatures)
+        val bins = new Array[Array[Bin]](numFeatures)
 
         // Find all splits.
-
         // Iterate over all features.
         var featureIndex = 0
-        while (featureIndex < numFeatures){
-          // Check whether the feature is continuous.
-          val isFeatureContinuous = strategy.categoricalFeaturesInfo.get(featureIndex).isEmpty
-          if (isFeatureContinuous) {
-            val featureSamples = sampledInput.map(lp => lp.features(featureIndex)).sorted
-            val stride: Double = numSamples.toDouble / numBins
-            logDebug("stride = " + stride)
-            for (index <- 0 until numBins - 1) {
-              val sampleIndex = (index + 1) * stride.toInt
-              val split = new Split(featureIndex, featureSamples(sampleIndex), Continuous, List())
-              splits(featureIndex)(index) = split
-            }
-          } else {
-            val maxFeatureValue = strategy.categoricalFeaturesInfo(featureIndex)
-            require(maxFeatureValue < numBins, "number of categories should be less than number " +
-              "of bins")
-
-            // For categorical variables, each bin is a category. The bins are sorted and they
-            // are ordered by calculating the centroid of their corresponding labels.
-            val centroidForCategories =
-              sampledInput.map(lp => (lp.features(featureIndex),lp.label))
-                .groupBy(_._1)
-                .mapValues(x => x.map(_._2).sum / x.map(_._1).length)
-
-            // Check for missing categorical variables and putting them last in the sorted list.
-            val fullCentroidForCategories = scala.collection.mutable.Map[Double,Double]()
-            for (i <- 0 until maxFeatureValue) {
-              if (centroidForCategories.contains(i)) {
-                fullCentroidForCategories(i) = centroidForCategories(i)
-              } else {
-                fullCentroidForCategories(i) = Double.MaxValue
-              }
+        while (featureIndex < numFeatures) {
+          if (metadata.isContinuous(featureIndex)) {
+            val featureSamples = sampledInput.map(lp => lp.features(featureIndex))
+            val featureSplits = findSplitsForContinuousFeature(featureSamples,
+              metadata, featureIndex)
+
+            val numSplits = featureSplits.length
+            val numBins = numSplits + 1
+            logDebug(s"featureIndex = $featureIndex, numSplits = $numSplits")
+            splits(featureIndex) = new Array[Split](numSplits)
+            bins(featureIndex) = new Array[Bin](numBins)
+
+            var splitIndex = 0
+            while (splitIndex < numSplits) {
+              val threshold = featureSplits(splitIndex)
+              splits(featureIndex)(splitIndex) =
+                new Split(featureIndex, threshold, Continuous, List())
+              splitIndex += 1
             }
+            bins(featureIndex)(0) = new Bin(new DummyLowSplit(featureIndex, Continuous),
+              splits(featureIndex)(0), Continuous, Double.MinValue)
 
-            // bins sorted by centroids
-            val categoriesSortedByCentroid = fullCentroidForCategories.toList.sortBy(_._2)
-
-            logDebug("centriod for categorical variable = " + categoriesSortedByCentroid)
-
-            var categoriesForSplit = List[Double]()
-            categoriesSortedByCentroid.iterator.zipWithIndex.foreach {
-              case ((key, value), index) =>
-                categoriesForSplit = key :: categoriesForSplit
-                splits(featureIndex)(index) = new Split(featureIndex, Double.MinValue, Categorical,
-                  categoriesForSplit)
-                bins(featureIndex)(index) = {
-                  if (index == 0) {
-                    new Bin(new DummyCategoricalSplit(featureIndex, Categorical),
-                      splits(featureIndex)(0), Categorical, key)
+            splitIndex = 1
+            while (splitIndex < numSplits) {
+              bins(featureIndex)(splitIndex) =
+                new Bin(splits(featureIndex)(splitIndex - 1), splits(featureIndex)(splitIndex),
+                  Continuous, Double.MinValue)
+              splitIndex += 1
+            }
+            bins(featureIndex)(numSplits) = new Bin(splits(featureIndex)(numSplits - 1),
+              new DummyHighSplit(featureIndex, Continuous), Continuous, Double.MinValue)
+          } else {
+            val numSplits = metadata.numSplits(featureIndex)
+            val numBins = metadata.numBins(featureIndex)
+            // Categorical feature
+            val featureArity = metadata.featureArity(featureIndex)
+            if (metadata.isUnordered(featureIndex)) {
+              // TODO: The second half of the bins are unused.  Actually, we could just use
+              //       splits and not build bins for unordered features.  That should be part of
+              //       a later PR since it will require changing other code (using splits instead
+              //       of bins in a few places).
+              // Unordered features
+              //   2^(maxFeatureValue - 1) - 1 combinations
+              splits(featureIndex) = new Array[Split](numSplits)
+              bins(featureIndex) = new Array[Bin](numBins)
+              var splitIndex = 0
+              while (splitIndex < numSplits) {
+                val categories: List[Double] =
+                  extractMultiClassCategories(splitIndex + 1, featureArity)
+                splits(featureIndex)(splitIndex) =
+                  new Split(featureIndex, Double.MinValue, Categorical, categories)
+                bins(featureIndex)(splitIndex) = {
+                  if (splitIndex == 0) {
+                    new Bin(
+                      new DummyCategoricalSplit(featureIndex, Categorical),
+                      splits(featureIndex)(0),
+                      Categorical,
+                      Double.MinValue)
                   } else {
-                    new Bin(splits(featureIndex)(index-1), splits(featureIndex)(index),
-                      Categorical, key)
+                    new Bin(
+                      splits(featureIndex)(splitIndex - 1),
+                      splits(featureIndex)(splitIndex),
+                      Categorical,
+                      Double.MinValue)
                   }
                 }
+                splitIndex += 1
+              }
+            } else {
+              // Ordered features
+              //   Bins correspond to feature values, so we do not need to compute splits or bins
+              //   beforehand.  Splits are constructed as needed during training.
+              splits(featureIndex) = new Array[Split](0)
+              bins(featureIndex) = new Array[Bin](0)
             }
           }
           featureIndex += 1
         }
-
-        // Find all bins.
-        featureIndex = 0
-        while (featureIndex < numFeatures) {
-          val isFeatureContinuous = strategy.categoricalFeaturesInfo.get(featureIndex).isEmpty
-          if (isFeatureContinuous) { // Bins for categorical variables are already assigned.
-            bins(featureIndex)(0) = new Bin(new DummyLowSplit(featureIndex, Continuous),
-              splits(featureIndex)(0), Continuous, Double.MinValue)
-            for (index <- 1 until numBins - 1){
-              val bin = new Bin(splits(featureIndex)(index-1), splits(featureIndex)(index),
-                Continuous, Double.MinValue)
-              bins(featureIndex)(index) = bin
-            }
-            bins(featureIndex)(numBins-1) = new Bin(splits(featureIndex)(numBins-2),
-              new DummyHighSplit(featureIndex, Continuous), Continuous, Double.MinValue)
-          }
-          featureIndex += 1
-        }
-        (splits,bins)
+        (splits, bins)
       case MinMax =>
         throw new UnsupportedOperationException("minmax not supported yet.")
       case ApproxHist =>
         throw new UnsupportedOperationException("approximate histogram not supported yet.")
     }
   }
+
+  /**
+   * Nested method to extract list of eligible categories given an index. It extracts the
+   * position of ones in a binary representation of the input. If binary
+   * representation of an number is 01101 (13), the output list should (3.0, 2.0,
+   * 0.0). The maxFeatureValue depict the number of rightmost digits that will be tested for ones.
+   */
+  private[tree] def extractMultiClassCategories(
+      input: Int,
+      maxFeatureValue: Int): List[Double] = {
+    var categories = List[Double]()
+    var j = 0
+    var bitShiftedInput = input
+    while (j < maxFeatureValue) {
+      if (bitShiftedInput % 2 != 0) {
+        // updating the list of categories.
+        categories = j.toDouble :: categories
+      }
+      // Right shift by one
+      bitShiftedInput = bitShiftedInput >> 1
+      j += 1
+    }
+    categories
+  }
+
+  /**
+   * Find splits for a continuous feature
+   * NOTE: Returned number of splits is set based on `featureSamples` and
+   *       could be different from the specified `numSplits`.
+   *       The `numSplits` attribute in the `DecisionTreeMetadata` class will be set accordingly.
+   * @param featureSamples feature values of each sample
+   * @param metadata decision tree metadata
+   *                 NOTE: `metadata.numbins` will be changed accordingly
+   *                       if there are not enough splits to be found
+   * @param featureIndex feature index to find splits
+   * @return array of splits
+   */
+  private[tree] def findSplitsForContinuousFeature(
+      featureSamples: Array[Double],
+      metadata: DecisionTreeMetadata,
+      featureIndex: Int): Array[Double] = {
+    require(metadata.isContinuous(featureIndex),
+      "findSplitsForContinuousFeature can only be used to find splits for a continuous feature.")
+
+    val splits = {
+      val numSplits = metadata.numSplits(featureIndex)
+
+      // get count for each distinct value
+      val valueCountMap = featureSamples.foldLeft(Map.empty[Double, Int]) { (m, x) =>
+        m + ((x, m.getOrElse(x, 0) + 1))
+      }
+      // sort distinct values
+      val valueCounts = valueCountMap.toSeq.sortBy(_._1).toArray
+
+      // if possible splits is not enough or just enough, just return all possible splits
+      val possibleSplits = valueCounts.length
+      if (possibleSplits <= numSplits) {
+        valueCounts.map(_._1)
+      } else {
+        // stride between splits
+        val stride: Double = featureSamples.length.toDouble / (numSplits + 1)
+        logDebug("stride = " + stride)
+
+        // iterate `valueCount` to find splits
+        val splits = new ArrayBuffer[Double]
+        var index = 1
+        // currentCount: sum of counts of values that have been visited
+        var currentCount = valueCounts(0)._2
+        // targetCount: target value for `currentCount`.
+        // If `currentCount` is closest value to `targetCount`,
+        // then current value is a split threshold.
+        // After finding a split threshold, `targetCount` is added by stride.
+        var targetCount = stride
+        while (index < valueCounts.length) {
+          val previousCount = currentCount
+          currentCount += valueCounts(index)._2
+          val previousGap = math.abs(previousCount - targetCount)
+          val currentGap = math.abs(currentCount - targetCount)
+          // If adding count of current value to currentCount
+          // makes the gap between currentCount and targetCount smaller,
+          // previous value is a split threshold.
+          if (previousGap < currentGap) {
+            splits.append(valueCounts(index - 1)._1)
+            targetCount += stride
+          }
+          index += 1
+        }
+
+        splits.toArray
+      }
+    }
+
+    assert(splits.length > 0)
+    // set number of splits accordingly
+    metadata.setNumSplits(featureIndex, splits.length)
+
+    splits
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoosting.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoosting.scala
new file mode 100644
index 0000000000000..f729344a682e2
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoosting.scala
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy
+import org.apache.spark.mllib.tree.configuration.EnsembleCombiningStrategy.Sum
+import org.apache.spark.mllib.tree.impl.TimeTracker
+import org.apache.spark.mllib.tree.model.{WeightedEnsembleModel, DecisionTreeModel}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * :: Experimental ::
+ * A class that implements Stochastic Gradient Boosting
+ * for regression and binary classification problems.
+ *
+ * The implementation is based upon:
+ *   J.H. Friedman.  "Stochastic Gradient Boosting."  1999.
+ *
+ * Notes:
+ *  - This currently can be run with several loss functions.  However, only SquaredError is
+ *    fully supported.  Specifically, the loss function should be used to compute the gradient
+ *    (to re-label training instances on each iteration) and to weight weak hypotheses.
+ *    Currently, gradients are computed correctly for the available loss functions,
+ *    but weak hypothesis weights are not computed correctly for LogLoss or AbsoluteError.
+ *    Running with those losses will likely behave reasonably, but lacks the same guarantees.
+ *
+ * @param boostingStrategy Parameters for the gradient boosting algorithm
+ */
+@Experimental
+class GradientBoosting (
+    private val boostingStrategy: BoostingStrategy) extends Serializable with Logging {
+
+  boostingStrategy.weakLearnerParams.algo = Regression
+  boostingStrategy.weakLearnerParams.impurity = impurity.Variance
+
+  // Ensure values for weak learner are the same as what is provided to the boosting algorithm.
+  boostingStrategy.weakLearnerParams.numClassesForClassification =
+    boostingStrategy.numClassesForClassification
+
+  boostingStrategy.assertValid()
+
+  /**
+   * Method to train a gradient boosting model
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @return WeightedEnsembleModel that can be used for prediction
+   */
+  def train(input: RDD[LabeledPoint]): WeightedEnsembleModel = {
+    val algo = boostingStrategy.algo
+    algo match {
+      case Regression => GradientBoosting.boost(input, boostingStrategy)
+      case Classification =>
+        // Map labels to -1, +1 so binary classification can be treated as regression.
+        val remappedInput = input.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
+        GradientBoosting.boost(remappedInput, boostingStrategy)
+      case _ =>
+        throw new IllegalArgumentException(s"$algo is not supported by the gradient boosting.")
+    }
+  }
+
+}
+
+
+object GradientBoosting extends Logging {
+
+  /**
+   * Method to train a gradient boosting model.
+   *
+   * Note: Using [[org.apache.spark.mllib.tree.GradientBoosting$#trainRegressor]]
+   *       is recommended to clearly specify regression.
+   *       Using [[org.apache.spark.mllib.tree.GradientBoosting$#trainClassifier]]
+   *       is recommended to clearly specify regression.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              For classification, labels should take values {0, 1, ..., numClasses-1}.
+   *              For regression, labels are real numbers.
+   * @param boostingStrategy Configuration options for the boosting algorithm.
+   * @return WeightedEnsembleModel that can be used for prediction
+   */
+  def train(
+      input: RDD[LabeledPoint],
+      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
+    new GradientBoosting(boostingStrategy).train(input)
+  }
+
+  /**
+   * Method to train a gradient boosting classification model.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              For classification, labels should take values {0, 1, ..., numClasses-1}.
+   *              For regression, labels are real numbers.
+   * @param boostingStrategy Configuration options for the boosting algorithm.
+   * @return WeightedEnsembleModel that can be used for prediction
+   */
+  def trainClassifier(
+      input: RDD[LabeledPoint],
+      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
+    val algo = boostingStrategy.algo
+    require(algo == Classification, s"Only Classification algo supported. Provided algo is $algo.")
+    new GradientBoosting(boostingStrategy).train(input)
+  }
+
+  /**
+   * Method to train a gradient boosting regression model.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              For classification, labels should take values {0, 1, ..., numClasses-1}.
+   *              For regression, labels are real numbers.
+   * @param boostingStrategy Configuration options for the boosting algorithm.
+   * @return WeightedEnsembleModel that can be used for prediction
+   */
+  def trainRegressor(
+      input: RDD[LabeledPoint],
+      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
+    val algo = boostingStrategy.algo
+    require(algo == Regression, s"Only Regression algo supported. Provided algo is $algo.")
+    new GradientBoosting(boostingStrategy).train(input)
+  }
+
+  /**
+   * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoosting$#train]]
+   */
+  def train(
+    input: JavaRDD[LabeledPoint],
+    boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
+    train(input.rdd, boostingStrategy)
+  }
+
+  /**
+   * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoosting$#trainClassifier]]
+   */
+  def trainClassifier(
+      input: JavaRDD[LabeledPoint],
+      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
+    trainClassifier(input.rdd, boostingStrategy)
+  }
+
+  /**
+   * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoosting$#trainRegressor]]
+   */
+  def trainRegressor(
+      input: JavaRDD[LabeledPoint],
+      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
+    trainRegressor(input.rdd, boostingStrategy)
+  }
+
+  /**
+   * Internal method for performing regression using trees as base learners.
+   * @param input training dataset
+   * @param boostingStrategy boosting parameters
+   * @return
+   */
+  private def boost(
+      input: RDD[LabeledPoint],
+      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
+
+    val timer = new TimeTracker()
+    timer.start("total")
+    timer.start("init")
+
+    // Initialize gradient boosting parameters
+    val numIterations = boostingStrategy.numIterations
+    val baseLearners = new Array[DecisionTreeModel](numIterations)
+    val baseLearnerWeights = new Array[Double](numIterations)
+    val loss = boostingStrategy.loss
+    val learningRate = boostingStrategy.learningRate
+    val strategy = boostingStrategy.weakLearnerParams
+
+    // Cache input
+    if (input.getStorageLevel == StorageLevel.NONE) {
+      input.persist(StorageLevel.MEMORY_AND_DISK)
+    }
+
+    timer.stop("init")
+
+    logDebug("##########")
+    logDebug("Building tree 0")
+    logDebug("##########")
+    var data = input
+
+    // Initialize tree
+    timer.start("building tree 0")
+    val firstTreeModel = new DecisionTree(strategy).train(data)
+    baseLearners(0) = firstTreeModel
+    baseLearnerWeights(0) = 1.0
+    val startingModel = new WeightedEnsembleModel(Array(firstTreeModel), Array(1.0), Regression,
+      Sum)
+    logDebug("error of gbt = " + loss.computeError(startingModel, input))
+    // Note: A model of type regression is used since we require raw prediction
+    timer.stop("building tree 0")
+
+    // psuedo-residual for second iteration
+    data = input.map(point => LabeledPoint(loss.gradient(startingModel, point),
+      point.features))
+
+    var m = 1
+    while (m < numIterations) {
+      timer.start(s"building tree $m")
+      logDebug("###################################################")
+      logDebug("Gradient boosting tree iteration " + m)
+      logDebug("###################################################")
+      val model = new DecisionTree(strategy).train(data)
+      timer.stop(s"building tree $m")
+      // Create partial model
+      baseLearners(m) = model
+      // Note: The setting of baseLearnerWeights is incorrect for losses other than SquaredError.
+      //       Technically, the weight should be optimized for the particular loss.
+      //       However, the behavior should be reasonable, though not optimal.
+      baseLearnerWeights(m) = learningRate
+      // Note: A model of type regression is used since we require raw prediction
+      val partialModel = new WeightedEnsembleModel(baseLearners.slice(0, m + 1),
+        baseLearnerWeights.slice(0, m + 1), Regression, Sum)
+      logDebug("error of gbt = " + loss.computeError(partialModel, input))
+      // Update data with pseudo-residuals
+      data = input.map(point => LabeledPoint(-loss.gradient(partialModel, point),
+        point.features))
+      m += 1
+    }
+
+    timer.stop("total")
+
+    logInfo("Internal timing for DecisionTree:")
+    logInfo(s"$timer")
+
+    new WeightedEnsembleModel(baseLearners, baseLearnerWeights, boostingStrategy.algo, Sum)
+
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
new file mode 100644
index 0000000000000..9683916d9b3f1
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -0,0 +1,483 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
+import org.apache.spark.mllib.tree.configuration.EnsembleCombiningStrategy.Average
+import org.apache.spark.mllib.tree.configuration.Strategy
+import org.apache.spark.mllib.tree.impl.{BaggedPoint, TreePoint, DecisionTreeMetadata, TimeTracker, NodeIdCache }
+import org.apache.spark.mllib.tree.impurity.Impurities
+import org.apache.spark.mllib.tree.model._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
+
+/**
+ * :: Experimental ::
+ * A class which implements a random forest learning algorithm for classification and regression.
+ * It supports both continuous and categorical features.
+ *
+ * The settings for featureSubsetStrategy are based on the following references:
+ *  - log2: tested in Breiman (2001)
+ *  - sqrt: recommended by Breiman manual for random forests
+ *  - The defaults of sqrt (classification) and onethird (regression) match the R randomForest
+ *    package.
+ * @see [[http://www.stat.berkeley.edu/~breiman/randomforest2001.pdf  Breiman (2001)]]
+ * @see [[http://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf  Breiman manual for
+ *     random forests]]
+ *
+ * @param strategy The configuration parameters for the random forest algorithm which specify
+ *                 the type of algorithm (classification, regression, etc.), feature type
+ *                 (continuous, categorical), depth of the tree, quantile calculation strategy,
+ *                 etc.
+ * @param numTrees If 1, then no bootstrapping is used.  If > 1, then bootstrapping is done.
+ * @param featureSubsetStrategy Number of features to consider for splits at each node.
+ *                              Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+ *                              If "auto" is set, this parameter is set based on numTrees:
+ *                                if numTrees == 1, set to "all";
+ *                                if numTrees > 1 (forest) set to "sqrt" for classification and
+ *                                  to "onethird" for regression.
+ * @param seed Random seed for bootstrapping and choosing feature subsets.
+ */
+@Experimental
+private class RandomForest (
+    private val strategy: Strategy,
+    private val numTrees: Int,
+    featureSubsetStrategy: String,
+    private val seed: Int)
+  extends Serializable with Logging {
+
+  strategy.assertValid()
+  require(numTrees > 0, s"RandomForest requires numTrees > 0, but was given numTrees = $numTrees.")
+  require(RandomForest.supportedFeatureSubsetStrategies.contains(featureSubsetStrategy),
+    s"RandomForest given invalid featureSubsetStrategy: $featureSubsetStrategy." +
+    s" Supported values: ${RandomForest.supportedFeatureSubsetStrategies.mkString(", ")}.")
+
+  /**
+   * Method to train a decision tree model over an RDD
+   * @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
+   * @return WeightedEnsembleModel that can be used for prediction
+   */
+  def train(input: RDD[LabeledPoint]): WeightedEnsembleModel = {
+
+    val timer = new TimeTracker()
+
+    timer.start("total")
+
+    timer.start("init")
+
+    val retaggedInput = input.retag(classOf[LabeledPoint])
+    val metadata =
+      DecisionTreeMetadata.buildMetadata(retaggedInput, strategy, numTrees, featureSubsetStrategy)
+    logDebug("algo = " + strategy.algo)
+    logDebug("numTrees = " + numTrees)
+    logDebug("seed = " + seed)
+    logDebug("maxBins = " + metadata.maxBins)
+    logDebug("featureSubsetStrategy = " + featureSubsetStrategy)
+    logDebug("numFeaturesPerNode = " + metadata.numFeaturesPerNode)
+
+    // Find the splits and the corresponding bins (interval between the splits) using a sample
+    // of the input data.
+    timer.start("findSplitsBins")
+    val (splits, bins) = DecisionTree.findSplitsBins(retaggedInput, metadata)
+    timer.stop("findSplitsBins")
+    logDebug("numBins: feature: number of bins")
+    logDebug(Range(0, metadata.numFeatures).map { featureIndex =>
+        s"\t$featureIndex\t${metadata.numBins(featureIndex)}"
+      }.mkString("\n"))
+
+    // Bin feature values (TreePoint representation).
+    // Cache input RDD for speedup during multiple passes.
+    val treeInput = TreePoint.convertToTreeRDD(retaggedInput, bins, metadata)
+
+    val (subsample, withReplacement) = {
+      // TODO: Have a stricter check for RF in the strategy
+      val isRandomForest = numTrees > 1
+      if (isRandomForest) {
+        (1.0, true)
+      } else {
+        (strategy.subsamplingRate, false)
+      }
+    }
+
+    val baggedInput
+      = BaggedPoint.convertToBaggedRDD(treeInput, subsample, numTrees, withReplacement, seed)
+        .persist(StorageLevel.MEMORY_AND_DISK)
+
+    // depth of the decision tree
+    val maxDepth = strategy.maxDepth
+    require(maxDepth <= 30,
+      s"DecisionTree currently only supports maxDepth <= 30, but was given maxDepth = $maxDepth.")
+
+    // Max memory usage for aggregates
+    // TODO: Calculate memory usage more precisely.
+    val maxMemoryUsage: Long = strategy.maxMemoryInMB * 1024L * 1024L
+    logDebug("max memory usage for aggregates = " + maxMemoryUsage + " bytes.")
+    val maxMemoryPerNode = {
+      val featureSubset: Option[Array[Int]] = if (metadata.subsamplingFeatures) {
+        // Find numFeaturesPerNode largest bins to get an upper bound on memory usage.
+        Some(metadata.numBins.zipWithIndex.sortBy(- _._1)
+          .take(metadata.numFeaturesPerNode).map(_._2))
+      } else {
+        None
+      }
+      RandomForest.aggregateSizeForNode(metadata, featureSubset) * 8L
+    }
+    require(maxMemoryPerNode <= maxMemoryUsage,
+      s"RandomForest/DecisionTree given maxMemoryInMB = ${strategy.maxMemoryInMB}," +
+      " which is too small for the given features." +
+      s"  Minimum value = ${maxMemoryPerNode / (1024L * 1024L)}")
+
+    timer.stop("init")
+
+    /*
+     * The main idea here is to perform group-wise training of the decision tree nodes thus
+     * reducing the passes over the data from (# nodes) to (# nodes / maxNumberOfNodesPerGroup).
+     * Each data sample is handled by a particular node (or it reaches a leaf and is not used
+     * in lower levels).
+     */
+
+    // Create an RDD of node Id cache.
+    // At first, all the rows belong to the root nodes (node Id == 1).
+    val nodeIdCache = if (strategy.useNodeIdCache) {
+      Some(NodeIdCache.init(
+        data = baggedInput,
+        numTrees = numTrees,
+        checkpointDir = strategy.checkpointDir,
+        checkpointInterval = strategy.checkpointInterval,
+        initVal = 1))
+    } else {
+      None
+    }
+
+    // FIFO queue of nodes to train: (treeIndex, node)
+    val nodeQueue = new mutable.Queue[(Int, Node)]()
+
+    val rng = new scala.util.Random()
+    rng.setSeed(seed)
+
+    // Allocate and queue root nodes.
+    val topNodes: Array[Node] = Array.fill[Node](numTrees)(Node.emptyNode(nodeIndex = 1))
+    Range(0, numTrees).foreach(treeIndex => nodeQueue.enqueue((treeIndex, topNodes(treeIndex))))
+
+    while (nodeQueue.nonEmpty) {
+      // Collect some nodes to split, and choose features for each node (if subsampling).
+      // Each group of nodes may come from one or multiple trees, and at multiple levels.
+      val (nodesForGroup, treeToNodeToIndexInfo) =
+        RandomForest.selectNodesToSplit(nodeQueue, maxMemoryUsage, metadata, rng)
+      // Sanity check (should never occur):
+      assert(nodesForGroup.size > 0,
+        s"RandomForest selected empty nodesForGroup.  Error for unknown reason.")
+
+      // Choose node splits, and enqueue new nodes as needed.
+      timer.start("findBestSplits")
+      DecisionTree.findBestSplits(baggedInput, metadata, topNodes, nodesForGroup,
+        treeToNodeToIndexInfo, splits, bins, nodeQueue, timer, nodeIdCache = nodeIdCache)
+      timer.stop("findBestSplits")
+    }
+
+    baggedInput.unpersist()
+
+    timer.stop("total")
+
+    logInfo("Internal timing for DecisionTree:")
+    logInfo(s"$timer")
+
+    // Delete any remaining checkpoints used for node Id cache.
+    if (nodeIdCache.nonEmpty) {
+      nodeIdCache.get.deleteAllCheckpoints()
+    }
+
+    val trees = topNodes.map(topNode => new DecisionTreeModel(topNode, strategy.algo))
+    val treeWeights = Array.fill[Double](numTrees)(1.0)
+    new WeightedEnsembleModel(trees, treeWeights, strategy.algo, Average)
+  }
+
+}
+
+object RandomForest extends Serializable with Logging {
+
+  /**
+   * Method to train a decision tree model for binary or multiclass classification.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              Labels should take values {0, 1, ..., numClasses-1}.
+   * @param strategy Parameters for training each tree in the forest.
+   * @param numTrees Number of trees in the random forest.
+   * @param featureSubsetStrategy Number of features to consider for splits at each node.
+   *                              Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+   *                              If "auto" is set, this parameter is set based on numTrees:
+   *                                if numTrees == 1, set to "all";
+   *                                if numTrees > 1 (forest) set to "sqrt" for classification and
+   *                                  to "onethird" for regression.
+   * @param seed  Random seed for bootstrapping and choosing feature subsets.
+   * @return WeightedEnsembleModel that can be used for prediction
+   */
+  def trainClassifier(
+      input: RDD[LabeledPoint],
+      strategy: Strategy,
+      numTrees: Int,
+      featureSubsetStrategy: String,
+      seed: Int): WeightedEnsembleModel = {
+    require(strategy.algo == Classification,
+      s"RandomForest.trainClassifier given Strategy with invalid algo: ${strategy.algo}")
+    val rf = new RandomForest(strategy, numTrees, featureSubsetStrategy, seed)
+    rf.train(input)
+  }
+
+  /**
+   * Method to train a decision tree model for binary or multiclass classification.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              Labels should take values {0, 1, ..., numClasses-1}.
+   * @param numClassesForClassification number of classes for classification.
+   * @param categoricalFeaturesInfo Map storing arity of categorical features.
+   *                                E.g., an entry (n -> k) indicates that feature n is categorical
+   *                                with k categories indexed from 0: {0, 1, ..., k-1}.
+   * @param numTrees Number of trees in the random forest.
+   * @param featureSubsetStrategy Number of features to consider for splits at each node.
+   *                              Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+   *                              If "auto" is set, this parameter is set based on numTrees:
+   *                                if numTrees == 1, set to "all";
+   *                                if numTrees > 1 (forest) set to "sqrt" for classification and
+   *                                  to "onethird" for regression.
+   * @param impurity Criterion used for information gain calculation.
+   *                 Supported values: "gini" (recommended) or "entropy".
+   * @param maxDepth Maximum depth of the tree.
+   *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+   *                  (suggested value: 4)
+   * @param maxBins maximum number of bins used for splitting features
+   *                 (suggested value: 100)
+   * @param seed  Random seed for bootstrapping and choosing feature subsets.
+   * @return WeightedEnsembleModel that can be used for prediction
+   */
+  def trainClassifier(
+      input: RDD[LabeledPoint],
+      numClassesForClassification: Int,
+      categoricalFeaturesInfo: Map[Int, Int],
+      numTrees: Int,
+      featureSubsetStrategy: String,
+      impurity: String,
+      maxDepth: Int,
+      maxBins: Int,
+      seed: Int = Utils.random.nextInt()): WeightedEnsembleModel = {
+    val impurityType = Impurities.fromString(impurity)
+    val strategy = new Strategy(Classification, impurityType, maxDepth,
+      numClassesForClassification, maxBins, Sort, categoricalFeaturesInfo)
+    trainClassifier(input, strategy, numTrees, featureSubsetStrategy, seed)
+  }
+
+  /**
+   * Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainClassifier]]
+   */
+  def trainClassifier(
+      input: JavaRDD[LabeledPoint],
+      numClassesForClassification: Int,
+      categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer],
+      numTrees: Int,
+      featureSubsetStrategy: String,
+      impurity: String,
+      maxDepth: Int,
+      maxBins: Int,
+      seed: Int): WeightedEnsembleModel = {
+    trainClassifier(input.rdd, numClassesForClassification,
+      categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap,
+      numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, seed)
+  }
+
+  /**
+   * Method to train a decision tree model for regression.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              Labels are real numbers.
+   * @param strategy Parameters for training each tree in the forest.
+   * @param numTrees Number of trees in the random forest.
+   * @param featureSubsetStrategy Number of features to consider for splits at each node.
+   *                              Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+   *                              If "auto" is set, this parameter is set based on numTrees:
+   *                                if numTrees == 1, set to "all";
+   *                                if numTrees > 1 (forest) set to "sqrt" for classification and
+   *                                  to "onethird" for regression.
+   * @param seed  Random seed for bootstrapping and choosing feature subsets.
+   * @return WeightedEnsembleModel that can be used for prediction
+   */
+  def trainRegressor(
+      input: RDD[LabeledPoint],
+      strategy: Strategy,
+      numTrees: Int,
+      featureSubsetStrategy: String,
+      seed: Int): WeightedEnsembleModel = {
+    require(strategy.algo == Regression,
+      s"RandomForest.trainRegressor given Strategy with invalid algo: ${strategy.algo}")
+    val rf = new RandomForest(strategy, numTrees, featureSubsetStrategy, seed)
+    rf.train(input)
+  }
+
+  /**
+   * Method to train a decision tree model for regression.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              Labels are real numbers.
+   * @param categoricalFeaturesInfo Map storing arity of categorical features.
+   *                                E.g., an entry (n -> k) indicates that feature n is categorical
+   *                                with k categories indexed from 0: {0, 1, ..., k-1}.
+   * @param numTrees Number of trees in the random forest.
+   * @param featureSubsetStrategy Number of features to consider for splits at each node.
+   *                              Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+   *                              If "auto" is set, this parameter is set based on numTrees:
+   *                                if numTrees == 1, set to "all";
+   *                                if numTrees > 1 (forest) set to "sqrt" for classification and
+   *                                  to "onethird" for regression.
+   * @param impurity Criterion used for information gain calculation.
+   *                 Supported values: "variance".
+   * @param maxDepth Maximum depth of the tree.
+   *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+   *                  (suggested value: 4)
+   * @param maxBins maximum number of bins used for splitting features
+   *                 (suggested value: 100)
+   * @param seed  Random seed for bootstrapping and choosing feature subsets.
+   * @return WeightedEnsembleModel that can be used for prediction
+   */
+  def trainRegressor(
+      input: RDD[LabeledPoint],
+      categoricalFeaturesInfo: Map[Int, Int],
+      numTrees: Int,
+      featureSubsetStrategy: String,
+      impurity: String,
+      maxDepth: Int,
+      maxBins: Int,
+      seed: Int = Utils.random.nextInt()): WeightedEnsembleModel = {
+    val impurityType = Impurities.fromString(impurity)
+    val strategy = new Strategy(Regression, impurityType, maxDepth,
+      0, maxBins, Sort, categoricalFeaturesInfo)
+    trainRegressor(input, strategy, numTrees, featureSubsetStrategy, seed)
+  }
+
+  /**
+   * Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainRegressor]]
+   */
+  def trainRegressor(
+      input: JavaRDD[LabeledPoint],
+      categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer],
+      numTrees: Int,
+      featureSubsetStrategy: String,
+      impurity: String,
+      maxDepth: Int,
+      maxBins: Int,
+      seed: Int): WeightedEnsembleModel = {
+    trainRegressor(input.rdd,
+      categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap,
+      numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, seed)
+  }
+
+  /**
+   * List of supported feature subset sampling strategies.
+   */
+  val supportedFeatureSubsetStrategies: Array[String] =
+    Array("auto", "all", "sqrt", "log2", "onethird")
+
+  private[tree] class NodeIndexInfo(
+      val nodeIndexInGroup: Int,
+      val featureSubset: Option[Array[Int]]) extends Serializable
+
+  /**
+   * Pull nodes off of the queue, and collect a group of nodes to be split on this iteration.
+   * This tracks the memory usage for aggregates and stops adding nodes when too much memory
+   * will be needed; this allows an adaptive number of nodes since different nodes may require
+   * different amounts of memory (if featureSubsetStrategy is not "all").
+   *
+   * @param nodeQueue  Queue of nodes to split.
+   * @param maxMemoryUsage  Bound on size of aggregate statistics.
+   * @return  (nodesForGroup, treeToNodeToIndexInfo).
+   *          nodesForGroup holds the nodes to split: treeIndex --> nodes in tree.
+   *
+   *          treeToNodeToIndexInfo holds indices selected features for each node:
+   *            treeIndex --> (global) node index --> (node index in group, feature indices).
+   *          The (global) node index is the index in the tree; the node index in group is the
+   *           index in [0, numNodesInGroup) of the node in this group.
+   *          The feature indices are None if not subsampling features.
+   */
+  private[tree] def selectNodesToSplit(
+      nodeQueue: mutable.Queue[(Int, Node)],
+      maxMemoryUsage: Long,
+      metadata: DecisionTreeMetadata,
+      rng: scala.util.Random): (Map[Int, Array[Node]], Map[Int, Map[Int, NodeIndexInfo]]) = {
+    // Collect some nodes to split:
+    //  nodesForGroup(treeIndex) = nodes to split
+    val mutableNodesForGroup = new mutable.HashMap[Int, mutable.ArrayBuffer[Node]]()
+    val mutableTreeToNodeToIndexInfo =
+      new mutable.HashMap[Int, mutable.HashMap[Int, NodeIndexInfo]]()
+    var memUsage: Long = 0L
+    var numNodesInGroup = 0
+    while (nodeQueue.nonEmpty && memUsage < maxMemoryUsage) {
+      val (treeIndex, node) = nodeQueue.head
+      // Choose subset of features for node (if subsampling).
+      val featureSubset: Option[Array[Int]] = if (metadata.subsamplingFeatures) {
+        // TODO: Use more efficient subsampling?  (use selection-and-rejection or reservoir)
+        Some(rng.shuffle(Range(0, metadata.numFeatures).toList)
+          .take(metadata.numFeaturesPerNode).toArray)
+      } else {
+        None
+      }
+      // Check if enough memory remains to add this node to the group.
+      val nodeMemUsage = RandomForest.aggregateSizeForNode(metadata, featureSubset) * 8L
+      if (memUsage + nodeMemUsage <= maxMemoryUsage) {
+        nodeQueue.dequeue()
+        mutableNodesForGroup.getOrElseUpdate(treeIndex, new mutable.ArrayBuffer[Node]()) += node
+        mutableTreeToNodeToIndexInfo
+          .getOrElseUpdate(treeIndex, new mutable.HashMap[Int, NodeIndexInfo]())(node.id)
+          = new NodeIndexInfo(numNodesInGroup, featureSubset)
+      }
+      numNodesInGroup += 1
+      memUsage += nodeMemUsage
+    }
+    // Convert mutable maps to immutable ones.
+    val nodesForGroup: Map[Int, Array[Node]] = mutableNodesForGroup.mapValues(_.toArray).toMap
+    val treeToNodeToIndexInfo = mutableTreeToNodeToIndexInfo.mapValues(_.toMap).toMap
+    (nodesForGroup, treeToNodeToIndexInfo)
+  }
+
+  /**
+   * Get the number of values to be stored for this node in the bin aggregates.
+   * @param featureSubset  Indices of features which may be split at this node.
+   *                       If None, then use all features.
+   */
+  private[tree] def aggregateSizeForNode(
+      metadata: DecisionTreeMetadata,
+      featureSubset: Option[Array[Int]]): Long = {
+    val totalBins = if (featureSubset.nonEmpty) {
+      featureSubset.get.map(featureIndex => metadata.numBins(featureIndex).toLong).sum
+    } else {
+      metadata.numBins.map(_.toLong).sum
+    }
+    if (metadata.isClassification) {
+      metadata.numClasses * totalBins
+    } else {
+      3 * totalBins
+    }
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
index 79a01f58319e8..0ef9c6181a0a0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
@@ -27,4 +27,10 @@ import org.apache.spark.annotation.Experimental
 object Algo extends Enumeration {
   type Algo = Value
   val Classification, Regression = Value
+
+  private[mllib] def fromString(name: String): Algo = name match {
+    case "classification" => Classification
+    case "regression" => Regression
+    case _ => throw new IllegalArgumentException(s"Did not recognize Algo name: $name")
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
new file mode 100644
index 0000000000000..abbda040bd528
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.configuration
+
+import scala.beans.BeanProperty
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss}
+
+/**
+ * :: Experimental ::
+ * Stores all the configuration options for the boosting algorithms
+ * @param algo  Learning goal.  Supported:
+ *              [[org.apache.spark.mllib.tree.configuration.Algo.Classification]],
+ *              [[org.apache.spark.mllib.tree.configuration.Algo.Regression]]
+ * @param numIterations Number of iterations of boosting.  In other words, the number of
+ *                      weak hypotheses used in the final model.
+ * @param loss Loss function used for minimization during gradient boosting.
+ * @param learningRate Learning rate for shrinking the contribution of each estimator. The
+ *                     learning rate should be between in the interval (0, 1]
+ * @param numClassesForClassification Number of classes for classification.
+ *                                    (Ignored for regression.)
+ *                                    This setting overrides any setting in [[weakLearnerParams]].
+ *                                    Default value is 2 (binary classification).
+ * @param weakLearnerParams Parameters for weak learners. Currently only decision trees are
+ *                          supported.
+ */
+@Experimental
+case class BoostingStrategy(
+    // Required boosting parameters
+    @BeanProperty var algo: Algo,
+    @BeanProperty var numIterations: Int,
+    @BeanProperty var loss: Loss,
+    // Optional boosting parameters
+    @BeanProperty var learningRate: Double = 0.1,
+    @BeanProperty var numClassesForClassification: Int = 2,
+    @BeanProperty var weakLearnerParams: Strategy) extends Serializable {
+
+  // Ensure values for weak learner are the same as what is provided to the boosting algorithm.
+  weakLearnerParams.numClassesForClassification = numClassesForClassification
+
+  /**
+   * Sets Algorithm using a String.
+   */
+  def setAlgo(algo: String): Unit = algo match {
+    case "Classification" => setAlgo(Classification)
+    case "Regression" => setAlgo(Regression)
+  }
+
+  /**
+   * Check validity of parameters.
+   * Throws exception if invalid.
+   */
+  private[tree] def assertValid(): Unit = {
+    algo match {
+      case Classification =>
+        require(numClassesForClassification == 2)
+      case Regression =>
+        // nothing
+      case _ =>
+        throw new IllegalArgumentException(
+          s"BoostingStrategy given invalid algo parameter: $algo." +
+            s"  Valid settings are: Classification, Regression.")
+    }
+    require(learningRate > 0 && learningRate <= 1,
+      "Learning rate should be in range (0, 1]. Provided learning rate is " + s"$learningRate.")
+  }
+}
+
+@Experimental
+object BoostingStrategy {
+
+  /**
+   * Returns default configuration for the boosting algorithm
+   * @param algo Learning goal.  Supported:
+   *             [[org.apache.spark.mllib.tree.configuration.Algo.Classification]],
+   *             [[org.apache.spark.mllib.tree.configuration.Algo.Regression]]
+   * @return Configuration for boosting algorithm
+   */
+  def defaultParams(algo: String): BoostingStrategy = {
+    val treeStrategy = Strategy.defaultStrategy("Regression")
+    treeStrategy.maxDepth = 3
+    algo match {
+      case "Classification" =>
+        new BoostingStrategy(Algo.withName(algo), 100, LogLoss, weakLearnerParams = treeStrategy)
+      case "Regression" =>
+        new BoostingStrategy(Algo.withName(algo), 100, SquaredError,
+          weakLearnerParams = treeStrategy)
+      case _ =>
+        throw new IllegalArgumentException(s"$algo is not supported by the boosting.")
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/EnsembleCombiningStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/EnsembleCombiningStrategy.scala
new file mode 100644
index 0000000000000..82889dc00cdad
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/EnsembleCombiningStrategy.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.configuration
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * :: Experimental ::
+ * Enum to select ensemble combining strategy for base learners
+ */
+@DeveloperApi
+object EnsembleCombiningStrategy extends Enumeration {
+  type EnsembleCombiningStrategy = Value
+  val Sum, Average = Value
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index 1b505fd76eb75..b5b1f82177edc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -17,34 +17,161 @@
 
 package org.apache.spark.mllib.tree.configuration
 
+import scala.beans.BeanProperty
+import scala.collection.JavaConverters._
+
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.mllib.tree.impurity.Impurity
+import org.apache.spark.mllib.tree.impurity.{Variance, Entropy, Gini, Impurity}
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
 
 /**
  * :: Experimental ::
  * Stores all the configuration options for tree construction
- * @param algo classification or regression
- * @param impurity criterion used for information gain calculation
- * @param maxDepth maximum depth of the tree
- * @param maxBins maximum number of bins used for splitting features
- * @param quantileCalculationStrategy algorithm for calculating quantiles
+ * @param algo  Learning goal.  Supported:
+ *              [[org.apache.spark.mllib.tree.configuration.Algo.Classification]],
+ *              [[org.apache.spark.mllib.tree.configuration.Algo.Regression]]
+ * @param impurity Criterion used for information gain calculation.
+ *                 Supported for Classification: [[org.apache.spark.mllib.tree.impurity.Gini]],
+ *                  [[org.apache.spark.mllib.tree.impurity.Entropy]].
+ *                 Supported for Regression: [[org.apache.spark.mllib.tree.impurity.Variance]].
+ * @param maxDepth Maximum depth of the tree.
+ *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+ * @param numClassesForClassification Number of classes for classification.
+ *                                    (Ignored for regression.)
+ *                                    Default value is 2 (binary classification).
+ * @param maxBins Maximum number of bins used for discretizing continuous features and
+ *                for choosing how to split on features at each node.
+ *                More bins give higher granularity.
+ * @param quantileCalculationStrategy Algorithm for calculating quantiles.  Supported:
+ *                             [[org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort]]
  * @param categoricalFeaturesInfo A map storing information about the categorical variables and the
  *                                number of discrete values they take. For example, an entry (n ->
  *                                k) implies the feature n is categorical with k categories 0,
  *                                1, 2, ... , k-1. It's important to note that features are
  *                                zero-indexed.
- * @param maxMemoryInMB maximum memory in MB allocated to histogram aggregation. Default value is
- *                      128 MB.
- *
+ * @param minInstancesPerNode Minimum number of instances each child must have after split.
+ *                            Default value is 1. If a split cause left or right child
+ *                            to have less than minInstancesPerNode,
+ *                            this split will not be considered as a valid split.
+ * @param minInfoGain Minimum information gain a split must get. Default value is 0.0.
+ *                    If a split has less information gain than minInfoGain,
+ *                    this split will not be considered as a valid split.
+ * @param maxMemoryInMB Maximum memory in MB allocated to histogram aggregation. Default value is
+ *                      256 MB.
+ * @param subsamplingRate Fraction of the training data used for learning decision tree.
+ * @param useNodeIdCache If this is true, instead of passing trees to executors, the algorithm will
+ *                      maintain a separate RDD of node Id cache for each row.
+ * @param checkpointDir If the node Id cache is used, it will help to checkpoint
+ *                      the node Id cache periodically. This is the checkpoint directory
+ *                      to be used for the node Id cache.
+ * @param checkpointInterval How often to checkpoint when the node Id cache gets updated.
+ *                           E.g. 10 means that the cache will get checkpointed every 10 updates.
  */
 @Experimental
 class Strategy (
-    val algo: Algo,
-    val impurity: Impurity,
-    val maxDepth: Int,
-    val maxBins: Int = 100,
-    val quantileCalculationStrategy: QuantileStrategy = Sort,
-    val categoricalFeaturesInfo: Map[Int, Int] = Map[Int, Int](),
-    val maxMemoryInMB: Int = 128) extends Serializable
+    @BeanProperty var algo: Algo,
+    @BeanProperty var impurity: Impurity,
+    @BeanProperty var maxDepth: Int,
+    @BeanProperty var numClassesForClassification: Int = 2,
+    @BeanProperty var maxBins: Int = 32,
+    @BeanProperty var quantileCalculationStrategy: QuantileStrategy = Sort,
+    @BeanProperty var categoricalFeaturesInfo: Map[Int, Int] = Map[Int, Int](),
+    @BeanProperty var minInstancesPerNode: Int = 1,
+    @BeanProperty var minInfoGain: Double = 0.0,
+    @BeanProperty var maxMemoryInMB: Int = 256,
+    @BeanProperty var subsamplingRate: Double = 1,
+    @BeanProperty var useNodeIdCache: Boolean = false,
+    @BeanProperty var checkpointDir: Option[String] = None,
+    @BeanProperty var checkpointInterval: Int = 10) extends Serializable {
+
+  def isMulticlassClassification =
+    algo == Classification && numClassesForClassification > 2
+  def isMulticlassWithCategoricalFeatures
+    = isMulticlassClassification && (categoricalFeaturesInfo.size > 0)
+
+  /**
+   * Java-friendly constructor for [[org.apache.spark.mllib.tree.configuration.Strategy]]
+   */
+  def this(
+      algo: Algo,
+      impurity: Impurity,
+      maxDepth: Int,
+      numClassesForClassification: Int,
+      maxBins: Int,
+      categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]) {
+    this(algo, impurity, maxDepth, numClassesForClassification, maxBins, Sort,
+      categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap)
+  }
+
+  /**
+   * Sets Algorithm using a String.
+   */
+  def setAlgo(algo: String): Unit = algo match {
+    case "Classification" => setAlgo(Classification)
+    case "Regression" => setAlgo(Regression)
+  }
+
+  /**
+   * Sets categoricalFeaturesInfo using a Java Map.
+   */
+  def setCategoricalFeaturesInfo(
+      categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]): Unit = {
+    setCategoricalFeaturesInfo(
+      categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap)
+  }
+
+  /**
+   * Check validity of parameters.
+   * Throws exception if invalid.
+   */
+  private[tree] def assertValid(): Unit = {
+    algo match {
+      case Classification =>
+        require(numClassesForClassification >= 2,
+          s"DecisionTree Strategy for Classification must have numClassesForClassification >= 2," +
+          s" but numClassesForClassification = $numClassesForClassification.")
+        require(Set(Gini, Entropy).contains(impurity),
+          s"DecisionTree Strategy given invalid impurity for Classification: $impurity." +
+          s"  Valid settings: Gini, Entropy")
+      case Regression =>
+        require(impurity == Variance,
+          s"DecisionTree Strategy given invalid impurity for Regression: $impurity." +
+          s"  Valid settings: Variance")
+      case _ =>
+        throw new IllegalArgumentException(
+          s"DecisionTree Strategy given invalid algo parameter: $algo." +
+          s"  Valid settings are: Classification, Regression.")
+    }
+    require(maxDepth >= 0, s"DecisionTree Strategy given invalid maxDepth parameter: $maxDepth." +
+      s"  Valid values are integers >= 0.")
+    require(maxBins >= 2, s"DecisionTree Strategy given invalid maxBins parameter: $maxBins." +
+      s"  Valid values are integers >= 2.")
+    categoricalFeaturesInfo.foreach { case (feature, arity) =>
+      require(arity >= 2,
+        s"DecisionTree Strategy given invalid categoricalFeaturesInfo setting:" +
+        s" feature $feature has $arity categories.  The number of categories should be >= 2.")
+    }
+    require(minInstancesPerNode >= 1,
+      s"DecisionTree Strategy requires minInstancesPerNode >= 1 but was given $minInstancesPerNode")
+    require(maxMemoryInMB <= 10240,
+      s"DecisionTree Strategy requires maxMemoryInMB <= 10240, but was given $maxMemoryInMB")
+  }
+}
+
+@Experimental
+object Strategy {
+
+  /**
+   * Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]]
+   * @param algo  "Classification" or "Regression"
+   */
+  def defaultStrategy(algo: String): Strategy = algo match {
+    case "Classification" =>
+      new Strategy(algo = Classification, impurity = Gini, maxDepth = 10,
+        numClassesForClassification = 2)
+    case "Regression" =>
+      new Strategy(algo = Regression, impurity = Variance, maxDepth = 10,
+        numClassesForClassification = 0)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/BaggedPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/BaggedPoint.scala
new file mode 100644
index 0000000000000..089010c81ffb6
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/BaggedPoint.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.impl
+
+import org.apache.commons.math3.distribution.PoissonDistribution
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+import org.apache.spark.util.random.XORShiftRandom
+
+/**
+ * Internal representation of a datapoint which belongs to several subsamples of the same dataset,
+ * particularly for bagging (e.g., for random forests).
+ *
+ * This holds one instance, as well as an array of weights which represent the (weighted)
+ * number of times which this instance appears in each subsamplingRate.
+ * E.g., (datum, [1, 0, 4]) indicates that there are 3 subsamples of the dataset and that
+ * this datum has 1 copy, 0 copies, and 4 copies in the 3 subsamples, respectively.
+ *
+ * @param datum  Data instance
+ * @param subsampleWeights  Weight of this instance in each subsampled dataset.
+ *
+ * TODO: This does not currently support (Double) weighted instances.  Once MLlib has weighted
+ *       dataset support, update.  (We store subsampleWeights as Double for this future extension.)
+ */
+private[tree] class BaggedPoint[Datum](val datum: Datum, val subsampleWeights: Array[Double])
+  extends Serializable
+
+private[tree] object BaggedPoint {
+
+  /**
+   * Convert an input dataset into its BaggedPoint representation,
+   * choosing subsamplingRate counts for each instance.
+   * Each subsamplingRate has the same number of instances as the original dataset,
+   * and is created by subsampling without replacement.
+   * @param input Input dataset.
+   * @param subsamplingRate Fraction of the training data used for learning decision tree.
+   * @param numSubsamples Number of subsamples of this RDD to take.
+   * @param withReplacement Sampling with/without replacement.
+   * @param seed Random seed.
+   * @return BaggedPoint dataset representation.
+   */
+  def convertToBaggedRDD[Datum] (
+      input: RDD[Datum],
+      subsamplingRate: Double,
+      numSubsamples: Int,
+      withReplacement: Boolean,
+      seed: Int = Utils.random.nextInt()): RDD[BaggedPoint[Datum]] = {
+    if (withReplacement) {
+      convertToBaggedRDDSamplingWithReplacement(input, subsamplingRate, numSubsamples, seed)
+    } else {
+      if (numSubsamples == 1 && subsamplingRate == 1.0) {
+        convertToBaggedRDDWithoutSampling(input)
+      } else {
+        convertToBaggedRDDSamplingWithoutReplacement(input, subsamplingRate, numSubsamples, seed)
+      }
+    }
+  }
+
+  private def convertToBaggedRDDSamplingWithoutReplacement[Datum] (
+      input: RDD[Datum],
+      subsamplingRate: Double,
+      numSubsamples: Int,
+      seed: Int): RDD[BaggedPoint[Datum]] = {
+    input.mapPartitionsWithIndex { (partitionIndex, instances) =>
+      // Use random seed = seed + partitionIndex + 1 to make generation reproducible.
+      val rng = new XORShiftRandom
+      rng.setSeed(seed + partitionIndex + 1)
+      instances.map { instance =>
+        val subsampleWeights = new Array[Double](numSubsamples)
+        var subsampleIndex = 0
+        while (subsampleIndex < numSubsamples) {
+          val x = rng.nextDouble()
+          subsampleWeights(subsampleIndex) = {
+            if (x < subsamplingRate) 1.0 else 0.0
+          }
+          subsampleIndex += 1
+        }
+        new BaggedPoint(instance, subsampleWeights)
+      }
+    }
+  }
+
+  private def convertToBaggedRDDSamplingWithReplacement[Datum] (
+      input: RDD[Datum],
+      subsample: Double,
+      numSubsamples: Int,
+      seed: Int): RDD[BaggedPoint[Datum]] = {
+    input.mapPartitionsWithIndex { (partitionIndex, instances) =>
+      // Use random seed = seed + partitionIndex + 1 to make generation reproducible.
+      val poisson = new PoissonDistribution(subsample)
+      poisson.reseedRandomGenerator(seed + partitionIndex + 1)
+      instances.map { instance =>
+        val subsampleWeights = new Array[Double](numSubsamples)
+        var subsampleIndex = 0
+        while (subsampleIndex < numSubsamples) {
+          subsampleWeights(subsampleIndex) = poisson.sample()
+          subsampleIndex += 1
+        }
+        new BaggedPoint(instance, subsampleWeights)
+      }
+    }
+  }
+
+  private def convertToBaggedRDDWithoutSampling[Datum] (
+      input: RDD[Datum]): RDD[BaggedPoint[Datum]] = {
+    input.map(datum => new BaggedPoint(datum, Array(1.0)))
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DTStatsAggregator.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DTStatsAggregator.scala
new file mode 100644
index 0000000000000..ce8825cc03229
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DTStatsAggregator.scala
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.impl
+
+import org.apache.spark.mllib.tree.impurity._
+
+
+
+/**
+ * DecisionTree statistics aggregator for a node.
+ * This holds a flat array of statistics for a set of (features, bins)
+ * and helps with indexing.
+ * This class is abstract to support learning with and without feature subsampling.
+ */
+private[tree] class DTStatsAggregator(
+    val metadata: DecisionTreeMetadata,
+    featureSubset: Option[Array[Int]]) extends Serializable {
+
+  /**
+   * [[ImpurityAggregator]] instance specifying the impurity type.
+   */
+  val impurityAggregator: ImpurityAggregator = metadata.impurity match {
+    case Gini => new GiniAggregator(metadata.numClasses)
+    case Entropy => new EntropyAggregator(metadata.numClasses)
+    case Variance => new VarianceAggregator()
+    case _ => throw new IllegalArgumentException(s"Bad impurity parameter: ${metadata.impurity}")
+  }
+
+  /**
+   * Number of elements (Double values) used for the sufficient statistics of each bin.
+   */
+  private val statsSize: Int = impurityAggregator.statsSize
+
+  /**
+   * Number of bins for each feature.  This is indexed by the feature index.
+   */
+  private val numBins: Array[Int] = {
+    if (featureSubset.isDefined) {
+      featureSubset.get.map(metadata.numBins(_))
+    } else {
+      metadata.numBins
+    }
+  }
+
+  /**
+   * Offset for each feature for calculating indices into the [[allStats]] array.
+   */
+  private val featureOffsets: Array[Int] = {
+    numBins.scanLeft(0)((total, nBins) => total + statsSize * nBins)
+  }
+
+  /**
+   * Total number of elements stored in this aggregator
+   */
+  private val allStatsSize: Int = featureOffsets.last
+
+  /**
+   * Flat array of elements.
+   * Index for start of stats for a (feature, bin) is:
+   *   index = featureOffsets(featureIndex) + binIndex * statsSize
+   * Note: For unordered features,
+   *       the left child stats have binIndex in [0, numBins(featureIndex) / 2))
+   *       and the right child stats in [numBins(featureIndex) / 2), numBins(featureIndex))
+   */
+  private val allStats: Array[Double] = new Array[Double](allStatsSize)
+
+
+  /**
+   * Get an [[ImpurityCalculator]] for a given (node, feature, bin).
+   * @param featureOffset  For ordered features, this is a pre-computed (node, feature) offset
+   *                           from [[getFeatureOffset]].
+   *                           For unordered features, this is a pre-computed
+   *                           (node, feature, left/right child) offset from
+   *                           [[getLeftRightFeatureOffsets]].
+   */
+  def getImpurityCalculator(featureOffset: Int, binIndex: Int): ImpurityCalculator = {
+    impurityAggregator.getCalculator(allStats, featureOffset + binIndex * statsSize)
+  }
+
+  /**
+   * Update the stats for a given (feature, bin) for ordered features, using the given label.
+   */
+  def update(featureIndex: Int, binIndex: Int, label: Double, instanceWeight: Double): Unit = {
+    val i = featureOffsets(featureIndex) + binIndex * statsSize
+    impurityAggregator.update(allStats, i, label, instanceWeight)
+  }
+
+  /**
+   * Faster version of [[update]].
+   * Update the stats for a given (feature, bin), using the given label.
+   * @param featureOffset  For ordered features, this is a pre-computed feature offset
+   *                           from [[getFeatureOffset]].
+   *                           For unordered features, this is a pre-computed
+   *                           (feature, left/right child) offset from
+   *                           [[getLeftRightFeatureOffsets]].
+   */
+  def featureUpdate(
+      featureOffset: Int,
+      binIndex: Int,
+      label: Double,
+      instanceWeight: Double): Unit = {
+    impurityAggregator.update(allStats, featureOffset + binIndex * statsSize,
+      label, instanceWeight)
+  }
+
+  /**
+   * Pre-compute feature offset for use with [[featureUpdate]].
+   * For ordered features only.
+   */
+  def getFeatureOffset(featureIndex: Int): Int = featureOffsets(featureIndex)
+
+  /**
+   * Pre-compute feature offset for use with [[featureUpdate]].
+   * For unordered features only.
+   */
+  def getLeftRightFeatureOffsets(featureIndex: Int): (Int, Int) = {
+    val baseOffset = featureOffsets(featureIndex)
+    (baseOffset, baseOffset + (numBins(featureIndex) >> 1) * statsSize)
+  }
+
+  /**
+   * For a given feature, merge the stats for two bins.
+   * @param featureOffset  For ordered features, this is a pre-computed feature offset
+   *                           from [[getFeatureOffset]].
+   *                           For unordered features, this is a pre-computed
+   *                           (feature, left/right child) offset from
+   *                           [[getLeftRightFeatureOffsets]].
+   * @param binIndex  The other bin is merged into this bin.
+   * @param otherBinIndex  This bin is not modified.
+   */
+  def mergeForFeature(featureOffset: Int, binIndex: Int, otherBinIndex: Int): Unit = {
+    impurityAggregator.merge(allStats, featureOffset + binIndex * statsSize,
+      featureOffset + otherBinIndex * statsSize)
+  }
+
+  /**
+   * Merge this aggregator with another, and returns this aggregator.
+   * This method modifies this aggregator in-place.
+   */
+  def merge(other: DTStatsAggregator): DTStatsAggregator = {
+    require(allStatsSize == other.allStatsSize,
+      s"DTStatsAggregator.merge requires that both aggregators have the same length stats vectors."
+        + s" This aggregator is of length $allStatsSize, but the other is ${other.allStatsSize}.")
+    var i = 0
+    // TODO: Test BLAS.axpy
+    while (i < allStatsSize) {
+      allStats(i) += other.allStats(i)
+      i += 1
+    }
+    this
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala
new file mode 100644
index 0000000000000..5bc0f2635c6b1
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.impl
+
+import scala.collection.mutable
+
+import org.apache.spark.Logging
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
+import org.apache.spark.mllib.tree.configuration.Strategy
+import org.apache.spark.mllib.tree.impurity.Impurity
+import org.apache.spark.rdd.RDD
+
+/**
+ * Learning and dataset metadata for DecisionTree.
+ *
+ * @param numClasses    For classification: labels can take values {0, ..., numClasses - 1}.
+ *                      For regression: fixed at 0 (no meaning).
+ * @param maxBins  Maximum number of bins, for all features.
+ * @param featureArity  Map: categorical feature index --> arity.
+ *                      I.e., the feature takes values in {0, ..., arity - 1}.
+ * @param numBins  Number of bins for each feature.
+ */
+private[tree] class DecisionTreeMetadata(
+    val numFeatures: Int,
+    val numExamples: Long,
+    val numClasses: Int,
+    val maxBins: Int,
+    val featureArity: Map[Int, Int],
+    val unorderedFeatures: Set[Int],
+    val numBins: Array[Int],
+    val impurity: Impurity,
+    val quantileStrategy: QuantileStrategy,
+    val maxDepth: Int,
+    val minInstancesPerNode: Int,
+    val minInfoGain: Double,
+    val numTrees: Int,
+    val numFeaturesPerNode: Int) extends Serializable {
+
+  def isUnordered(featureIndex: Int): Boolean = unorderedFeatures.contains(featureIndex)
+
+  def isClassification: Boolean = numClasses >= 2
+
+  def isMulticlass: Boolean = numClasses > 2
+
+  def isMulticlassWithCategoricalFeatures: Boolean = isMulticlass && (featureArity.size > 0)
+
+  def isCategorical(featureIndex: Int): Boolean = featureArity.contains(featureIndex)
+
+  def isContinuous(featureIndex: Int): Boolean = !featureArity.contains(featureIndex)
+
+  /**
+   * Number of splits for the given feature.
+   * For unordered features, there are 2 bins per split.
+   * For ordered features, there is 1 more bin than split.
+   */
+  def numSplits(featureIndex: Int): Int = if (isUnordered(featureIndex)) {
+    numBins(featureIndex) >> 1
+  } else {
+    numBins(featureIndex) - 1
+  }
+
+
+  /**
+   * Set number of splits for a continuous feature.
+   * For a continuous feature, number of bins is number of splits plus 1.
+   */
+  def setNumSplits(featureIndex: Int, numSplits: Int) {
+    require(isContinuous(featureIndex),
+      s"Only number of bin for a continuous feature can be set.")
+    numBins(featureIndex) = numSplits + 1
+  }
+
+  /**
+   * Indicates if feature subsampling is being used.
+   */
+  def subsamplingFeatures: Boolean = numFeatures != numFeaturesPerNode
+
+}
+
+private[tree] object DecisionTreeMetadata extends Logging {
+
+  /**
+   * Construct a [[DecisionTreeMetadata]] instance for this dataset and parameters.
+   * This computes which categorical features will be ordered vs. unordered,
+   * as well as the number of splits and bins for each feature.
+   */
+  def buildMetadata(
+      input: RDD[LabeledPoint],
+      strategy: Strategy,
+      numTrees: Int,
+      featureSubsetStrategy: String): DecisionTreeMetadata = {
+
+    val numFeatures = input.take(1)(0).features.size
+    val numExamples = input.count()
+    val numClasses = strategy.algo match {
+      case Classification => strategy.numClassesForClassification
+      case Regression => 0
+    }
+
+    val maxPossibleBins = math.min(strategy.maxBins, numExamples).toInt
+    if (maxPossibleBins < strategy.maxBins) {
+      logWarning(s"DecisionTree reducing maxBins from ${strategy.maxBins} to $maxPossibleBins" +
+        s" (= number of training instances)")
+    }
+
+    // We check the number of bins here against maxPossibleBins.
+    // This needs to be checked here instead of in Strategy since maxPossibleBins can be modified
+    // based on the number of training examples.
+    if (strategy.categoricalFeaturesInfo.nonEmpty) {
+      val maxCategoriesPerFeature = strategy.categoricalFeaturesInfo.values.max
+      require(maxCategoriesPerFeature <= maxPossibleBins,
+        s"DecisionTree requires maxBins (= $maxPossibleBins) >= max categories " +
+          s"in categorical features (= $maxCategoriesPerFeature)")
+    }
+
+    val unorderedFeatures = new mutable.HashSet[Int]()
+    val numBins = Array.fill[Int](numFeatures)(maxPossibleBins)
+    if (numClasses > 2) {
+      // Multiclass classification
+      val maxCategoriesForUnorderedFeature =
+        ((math.log(maxPossibleBins / 2 + 1) / math.log(2.0)) + 1).floor.toInt
+      strategy.categoricalFeaturesInfo.foreach { case (featureIndex, numCategories) =>
+        // Decide if some categorical features should be treated as unordered features,
+        //  which require 2 * ((1 << numCategories - 1) - 1) bins.
+        // We do this check with log values to prevent overflows in case numCategories is large.
+        // The next check is equivalent to: 2 * ((1 << numCategories - 1) - 1) <= maxBins
+        if (numCategories <= maxCategoriesForUnorderedFeature) {
+          unorderedFeatures.add(featureIndex)
+          numBins(featureIndex) = numUnorderedBins(numCategories)
+        } else {
+          numBins(featureIndex) = numCategories
+        }
+      }
+    } else {
+      // Binary classification or regression
+      strategy.categoricalFeaturesInfo.foreach { case (featureIndex, numCategories) =>
+        numBins(featureIndex) = numCategories
+      }
+    }
+
+    // Set number of features to use per node (for random forests).
+    val _featureSubsetStrategy = featureSubsetStrategy match {
+      case "auto" =>
+        if (numTrees == 1) {
+          "all"
+        } else {
+          if (strategy.algo == Classification) {
+            "sqrt"
+          } else {
+            "onethird"
+          }
+        }
+      case _ => featureSubsetStrategy
+    }
+    val numFeaturesPerNode: Int = _featureSubsetStrategy match {
+      case "all" => numFeatures
+      case "sqrt" => math.sqrt(numFeatures).ceil.toInt
+      case "log2" => math.max(1, (math.log(numFeatures) / math.log(2)).ceil.toInt)
+      case "onethird" => (numFeatures / 3.0).ceil.toInt
+    }
+
+    new DecisionTreeMetadata(numFeatures, numExamples, numClasses, numBins.max,
+      strategy.categoricalFeaturesInfo, unorderedFeatures.toSet, numBins,
+      strategy.impurity, strategy.quantileCalculationStrategy, strategy.maxDepth,
+      strategy.minInstancesPerNode, strategy.minInfoGain, numTrees, numFeaturesPerNode)
+  }
+
+  /**
+   * Version of [[buildMetadata()]] for DecisionTree.
+   */
+  def buildMetadata(
+      input: RDD[LabeledPoint],
+      strategy: Strategy): DecisionTreeMetadata = {
+    buildMetadata(input, strategy, numTrees = 1, featureSubsetStrategy = "all")
+  }
+
+    /**
+   * Given the arity of a categorical feature (arity = number of categories),
+   * return the number of bins for the feature if it is to be treated as an unordered feature.
+   * There is 1 split for every partitioning of categories into 2 disjoint, non-empty sets;
+   * there are math.pow(2, arity - 1) - 1 such splits.
+   * Each split has 2 corresponding bins.
+   */
+  def numUnorderedBins(arity: Int): Int = 2 * ((1 << arity - 1) - 1)
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/NodeIdCache.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/NodeIdCache.scala
new file mode 100644
index 0000000000000..83011b48b7d9b
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/NodeIdCache.scala
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.impl
+
+import scala.collection.mutable
+
+import org.apache.hadoop.fs.{Path, FileSystem}
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.tree.configuration.FeatureType._
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.mllib.tree.model.{Bin, Node, Split}
+
+/**
+ * :: DeveloperApi ::
+ * This is used by the node id cache to find the child id that a data point would belong to.
+ * @param split Split information.
+ * @param nodeIndex The current node index of a data point that this will update.
+ */
+@DeveloperApi
+private[tree] case class NodeIndexUpdater(
+    split: Split,
+    nodeIndex: Int) {
+  /**
+   * Determine a child node index based on the feature value and the split.
+   * @param binnedFeatures Binned feature values.
+   * @param bins Bin information to convert the bin indices to approximate feature values.
+   * @return Child node index to update to.
+   */
+  def updateNodeIndex(binnedFeatures: Array[Int], bins: Array[Array[Bin]]): Int = {
+    if (split.featureType == Continuous) {
+      val featureIndex = split.feature
+      val binIndex = binnedFeatures(featureIndex)
+      val featureValueUpperBound = bins(featureIndex)(binIndex).highSplit.threshold
+      if (featureValueUpperBound <= split.threshold) {
+        Node.leftChildIndex(nodeIndex)
+      } else {
+        Node.rightChildIndex(nodeIndex)
+      }
+    } else {
+      if (split.categories.contains(binnedFeatures(split.feature).toDouble)) {
+        Node.leftChildIndex(nodeIndex)
+      } else {
+        Node.rightChildIndex(nodeIndex)
+      }
+    }
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ * A given TreePoint would belong to a particular node per tree.
+ * Each row in the nodeIdsForInstances RDD is an array over trees of the node index
+ * in each tree. Initially, values should all be 1 for root node.
+ * The nodeIdsForInstances RDD needs to be updated at each iteration.
+ * @param nodeIdsForInstances The initial values in the cache
+ *            (should be an Array of all 1's (meaning the root nodes)).
+ * @param checkpointDir The checkpoint directory where
+ *                      the checkpointed files will be stored.
+ * @param checkpointInterval The checkpointing interval
+ *                           (how often should the cache be checkpointed.).
+ */
+@DeveloperApi
+private[tree] class NodeIdCache(
+  var nodeIdsForInstances: RDD[Array[Int]],
+  val checkpointDir: Option[String],
+  val checkpointInterval: Int) {
+
+  // Keep a reference to a previous node Ids for instances.
+  // Because we will keep on re-persisting updated node Ids,
+  // we want to unpersist the previous RDD.
+  private var prevNodeIdsForInstances: RDD[Array[Int]] = null
+
+  // To keep track of the past checkpointed RDDs.
+  private val checkpointQueue = mutable.Queue[RDD[Array[Int]]]()
+  private var rddUpdateCount = 0
+
+  // If a checkpoint directory is given, and there's no prior checkpoint directory,
+  // then set the checkpoint directory with the given one.
+  if (checkpointDir.nonEmpty && nodeIdsForInstances.sparkContext.getCheckpointDir.isEmpty) {
+    nodeIdsForInstances.sparkContext.setCheckpointDir(checkpointDir.get)
+  }
+
+  /**
+   * Update the node index values in the cache.
+   * This updates the RDD and its lineage.
+   * TODO: Passing bin information to executors seems unnecessary and costly.
+   * @param data The RDD of training rows.
+   * @param nodeIdUpdaters A map of node index updaters.
+   *                       The key is the indices of nodes that we want to update.
+   * @param bins Bin information needed to find child node indices.
+   */
+  def updateNodeIndices(
+      data: RDD[BaggedPoint[TreePoint]],
+      nodeIdUpdaters: Array[mutable.Map[Int, NodeIndexUpdater]],
+      bins: Array[Array[Bin]]): Unit = {
+    if (prevNodeIdsForInstances != null) {
+      // Unpersist the previous one if one exists.
+      prevNodeIdsForInstances.unpersist()
+    }
+
+    prevNodeIdsForInstances = nodeIdsForInstances
+    nodeIdsForInstances = data.zip(nodeIdsForInstances).map {
+      dataPoint => {
+        var treeId = 0
+        while (treeId < nodeIdUpdaters.length) {
+          val nodeIdUpdater = nodeIdUpdaters(treeId).getOrElse(dataPoint._2(treeId), null)
+          if (nodeIdUpdater != null) {
+            val newNodeIndex = nodeIdUpdater.updateNodeIndex(
+              binnedFeatures = dataPoint._1.datum.binnedFeatures,
+              bins = bins)
+            dataPoint._2(treeId) = newNodeIndex
+          }
+
+          treeId += 1
+        }
+
+        dataPoint._2
+      }
+    }
+
+    // Keep on persisting new ones.
+    nodeIdsForInstances.persist(StorageLevel.MEMORY_AND_DISK)
+    rddUpdateCount += 1
+
+    // Handle checkpointing if the directory is not None.
+    if (nodeIdsForInstances.sparkContext.getCheckpointDir.nonEmpty &&
+      (rddUpdateCount % checkpointInterval) == 0) {
+      // Let's see if we can delete previous checkpoints.
+      var canDelete = true
+      while (checkpointQueue.size > 1 && canDelete) {
+        // We can delete the oldest checkpoint iff
+        // the next checkpoint actually exists in the file system.
+        if (checkpointQueue.get(1).get.getCheckpointFile != None) {
+          val old = checkpointQueue.dequeue()
+
+          // Since the old checkpoint is not deleted by Spark,
+          // we'll manually delete it here.
+          val fs = FileSystem.get(old.sparkContext.hadoopConfiguration)
+          fs.delete(new Path(old.getCheckpointFile.get), true)
+        } else {
+          canDelete = false
+        }
+      }
+
+      nodeIdsForInstances.checkpoint()
+      checkpointQueue.enqueue(nodeIdsForInstances)
+    }
+  }
+
+  /**
+   * Call this after training is finished to delete any remaining checkpoints.
+   */
+  def deleteAllCheckpoints(): Unit = {
+    while (checkpointQueue.size > 0) {
+      val old = checkpointQueue.dequeue()
+      if (old.getCheckpointFile != None) {
+        val fs = FileSystem.get(old.sparkContext.hadoopConfiguration)
+        fs.delete(new Path(old.getCheckpointFile.get), true)
+      }
+    }
+  }
+}
+
+@DeveloperApi
+private[tree] object NodeIdCache {
+  /**
+   * Initialize the node Id cache with initial node Id values.
+   * @param data The RDD of training rows.
+   * @param numTrees The number of trees that we want to create cache for.
+   * @param checkpointDir The checkpoint directory where the checkpointed files will be stored.
+   * @param checkpointInterval The checkpointing interval
+   *                           (how often should the cache be checkpointed.).
+   * @param initVal The initial values in the cache.
+   * @return A node Id cache containing an RDD of initial root node Indices.
+   */
+  def init(
+      data: RDD[BaggedPoint[TreePoint]],
+      numTrees: Int,
+      checkpointDir: Option[String],
+      checkpointInterval: Int,
+      initVal: Int = 1): NodeIdCache = {
+    new NodeIdCache(
+      data.map(_ => Array.fill[Int](numTrees)(initVal)),
+      checkpointDir,
+      checkpointInterval)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/TimeTracker.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/TimeTracker.scala
new file mode 100644
index 0000000000000..d215d68c4279e
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/TimeTracker.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.impl
+
+import scala.collection.mutable.{HashMap => MutableHashMap}
+
+import org.apache.spark.annotation.Experimental
+
+/**
+ * Time tracker implementation which holds labeled timers.
+ */
+@Experimental
+private[tree] class TimeTracker extends Serializable {
+
+  private val starts: MutableHashMap[String, Long] = new MutableHashMap[String, Long]()
+
+  private val totals: MutableHashMap[String, Long] = new MutableHashMap[String, Long]()
+
+  /**
+   * Starts a new timer, or re-starts a stopped timer.
+   */
+  def start(timerLabel: String): Unit = {
+    val currentTime = System.nanoTime()
+    if (starts.contains(timerLabel)) {
+      throw new RuntimeException(s"TimeTracker.start(timerLabel) called again on" +
+        s" timerLabel = $timerLabel before that timer was stopped.")
+    }
+    starts(timerLabel) = currentTime
+  }
+
+  /**
+   * Stops a timer and returns the elapsed time in seconds.
+   */
+  def stop(timerLabel: String): Double = {
+    val currentTime = System.nanoTime()
+    if (!starts.contains(timerLabel)) {
+      throw new RuntimeException(s"TimeTracker.stop(timerLabel) called on" +
+        s" timerLabel = $timerLabel, but that timer was not started.")
+    }
+    val elapsed = currentTime - starts(timerLabel)
+    starts.remove(timerLabel)
+    if (totals.contains(timerLabel)) {
+      totals(timerLabel) += elapsed
+    } else {
+      totals(timerLabel) = elapsed
+    }
+    elapsed / 1e9
+  }
+
+  /**
+   * Print all timing results in seconds.
+   */
+  override def toString: String = {
+    totals.map { case (label, elapsed) =>
+        s"  $label: ${elapsed / 1e9}"
+      }.mkString("\n")
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/TreePoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/TreePoint.scala
new file mode 100644
index 0000000000000..35e361ae309cc
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/TreePoint.scala
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.impl
+
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.model.Bin
+import org.apache.spark.rdd.RDD
+
+
+/**
+ * Internal representation of LabeledPoint for DecisionTree.
+ * This bins feature values based on a subsampled of data as follows:
+ *  (a) Continuous features are binned into ranges.
+ *  (b) Unordered categorical features are binned based on subsets of feature values.
+ *      "Unordered categorical features" are categorical features with low arity used in
+ *      multiclass classification.
+ *  (c) Ordered categorical features are binned based on feature values.
+ *      "Ordered categorical features" are categorical features with high arity,
+ *      or any categorical feature used in regression or binary classification.
+ *
+ * @param label  Label from LabeledPoint
+ * @param binnedFeatures  Binned feature values.
+ *                        Same length as LabeledPoint.features, but values are bin indices.
+ */
+private[tree] class TreePoint(val label: Double, val binnedFeatures: Array[Int])
+  extends Serializable {
+}
+
+private[tree] object TreePoint {
+
+  /**
+   * Convert an input dataset into its TreePoint representation,
+   * binning feature values in preparation for DecisionTree training.
+   * @param input     Input dataset.
+   * @param bins      Bins for features, of size (numFeatures, numBins).
+   * @param metadata  Learning and dataset metadata
+   * @return  TreePoint dataset representation
+   */
+  def convertToTreeRDD(
+      input: RDD[LabeledPoint],
+      bins: Array[Array[Bin]],
+      metadata: DecisionTreeMetadata): RDD[TreePoint] = {
+    // Construct arrays for featureArity and isUnordered for efficiency in the inner loop.
+    val featureArity: Array[Int] = new Array[Int](metadata.numFeatures)
+    val isUnordered: Array[Boolean] = new Array[Boolean](metadata.numFeatures)
+    var featureIndex = 0
+    while (featureIndex < metadata.numFeatures) {
+      featureArity(featureIndex) = metadata.featureArity.getOrElse(featureIndex, 0)
+      isUnordered(featureIndex) = metadata.isUnordered(featureIndex)
+      featureIndex += 1
+    }
+    input.map { x =>
+      TreePoint.labeledPointToTreePoint(x, bins, featureArity, isUnordered)
+    }
+  }
+
+  /**
+   * Convert one LabeledPoint into its TreePoint representation.
+   * @param bins      Bins for features, of size (numFeatures, numBins).
+   * @param featureArity  Array indexed by feature, with value 0 for continuous and numCategories
+   *                      for categorical features.
+   * @param isUnordered  Array index by feature, with value true for unordered categorical features.
+   */
+  private def labeledPointToTreePoint(
+      labeledPoint: LabeledPoint,
+      bins: Array[Array[Bin]],
+      featureArity: Array[Int],
+      isUnordered: Array[Boolean]): TreePoint = {
+    val numFeatures = labeledPoint.features.size
+    val arr = new Array[Int](numFeatures)
+    var featureIndex = 0
+    while (featureIndex < numFeatures) {
+      arr(featureIndex) = findBin(featureIndex, labeledPoint, featureArity(featureIndex),
+        isUnordered(featureIndex), bins)
+      featureIndex += 1
+    }
+    new TreePoint(labeledPoint.label, arr)
+  }
+
+  /**
+   * Find bin for one (labeledPoint, feature).
+   *
+   * @param featureArity  0 for continuous features; number of categories for categorical features.
+   * @param isUnorderedFeature  (only applies if feature is categorical)
+   * @param bins   Bins for features, of size (numFeatures, numBins).
+   */
+  private def findBin(
+      featureIndex: Int,
+      labeledPoint: LabeledPoint,
+      featureArity: Int,
+      isUnorderedFeature: Boolean,
+      bins: Array[Array[Bin]]): Int = {
+
+    /**
+     * Binary search helper method for continuous feature.
+     */
+    def binarySearchForBins(): Int = {
+      val binForFeatures = bins(featureIndex)
+      val feature = labeledPoint.features(featureIndex)
+      var left = 0
+      var right = binForFeatures.length - 1
+      while (left <= right) {
+        val mid = left + (right - left) / 2
+        val bin = binForFeatures(mid)
+        val lowThreshold = bin.lowSplit.threshold
+        val highThreshold = bin.highSplit.threshold
+        if ((lowThreshold < feature) && (highThreshold >= feature)) {
+          return mid
+        } else if (lowThreshold >= feature) {
+          right = mid - 1
+        } else {
+          left = mid + 1
+        }
+      }
+      -1
+    }
+
+    if (featureArity == 0) {
+      // Perform binary search for finding bin for continuous features.
+      val binIndex = binarySearchForBins()
+      if (binIndex == -1) {
+        throw new RuntimeException("No bin was found for continuous feature." +
+          " This error can occur when given invalid data values (such as NaN)." +
+          s" Feature index: $featureIndex.  Feature value: ${labeledPoint.features(featureIndex)}")
+      }
+      binIndex
+    } else {
+      // Categorical feature bins are indexed by feature values.
+      val featureValue = labeledPoint.features(featureIndex)
+      if (featureValue < 0 || featureValue >= featureArity) {
+        throw new IllegalArgumentException(
+          s"DecisionTree given invalid data:" +
+            s" Feature $featureIndex is categorical with values in" +
+            s" {0,...,${featureArity - 1}," +
+            s" but a data point gives it value $featureValue.\n" +
+            "  Bad data point: " + labeledPoint.toString)
+      }
+      featureValue.toInt
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index 60f43e9278d2a..0e02345aa3774 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -31,23 +31,130 @@ object Entropy extends Impurity {
 
   /**
    * :: DeveloperApi ::
-   * entropy calculation
-   * @param c0 count of instances with label 0
-   * @param c1 count of instances with label 1
-   * @return entropy value
+   * information calculation for multiclass classification
+   * @param counts Array[Double] with counts for each label
+   * @param totalCount sum of counts for all labels
+   * @return information value, or 0 if totalCount = 0
    */
   @DeveloperApi
-  override def calculate(c0: Double, c1: Double): Double = {
-    if (c0 == 0 || c1 == 0) {
-      0
-    } else {
-      val total = c0 + c1
-      val f0 = c0 / total
-      val f1 = c1 / total
-      -(f0 * log2(f0)) - (f1 * log2(f1))
+  override def calculate(counts: Array[Double], totalCount: Double): Double = {
+    if (totalCount == 0) {
+      return 0
     }
+    val numClasses = counts.length
+    var impurity = 0.0
+    var classIndex = 0
+    while (classIndex < numClasses) {
+      val classCount = counts(classIndex)
+      if (classCount != 0) {
+        val freq = classCount / totalCount
+        impurity -= freq * log2(freq)
+      }
+      classIndex += 1
+    }
+    impurity
   }
 
+  /**
+   * :: DeveloperApi ::
+   * variance calculation
+   * @param count number of instances
+   * @param sum sum of labels
+   * @param sumSquares summation of squares of the labels
+   * @return information value, or 0 if count = 0
+   */
+  @DeveloperApi
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
     throw new UnsupportedOperationException("Entropy.calculate")
+
+  /**
+   * Get this impurity instance.
+   * This is useful for passing impurity parameters to a Strategy in Java.
+   */
+  def instance = this
+
+}
+
+/**
+ * Class for updating views of a vector of sufficient statistics,
+ * in order to compute impurity from a sample.
+ * Note: Instances of this class do not hold the data; they operate on views of the data.
+ * @param numClasses  Number of classes for label.
+ */
+private[tree] class EntropyAggregator(numClasses: Int)
+  extends ImpurityAggregator(numClasses) with Serializable {
+
+  /**
+   * Update stats for one (node, feature, bin) with the given label.
+   * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
+   * @param offset    Start index of stats for this (node, feature, bin).
+   */
+  def update(allStats: Array[Double], offset: Int, label: Double, instanceWeight: Double): Unit = {
+    if (label >= statsSize) {
+      throw new IllegalArgumentException(s"EntropyAggregator given label $label" +
+        s" but requires label < numClasses (= $statsSize).")
+    }
+    allStats(offset + label.toInt) += instanceWeight
+  }
+
+  /**
+   * Get an [[ImpurityCalculator]] for a (node, feature, bin).
+   * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
+   * @param offset    Start index of stats for this (node, feature, bin).
+   */
+  def getCalculator(allStats: Array[Double], offset: Int): EntropyCalculator = {
+    new EntropyCalculator(allStats.view(offset, offset + statsSize).toArray)
+  }
+
+}
+
+/**
+ * Stores statistics for one (node, feature, bin) for calculating impurity.
+ * Unlike [[EntropyAggregator]], this class stores its own data and is for a specific
+ * (node, feature, bin).
+ * @param stats  Array of sufficient statistics for a (node, feature, bin).
+ */
+private[tree] class EntropyCalculator(stats: Array[Double]) extends ImpurityCalculator(stats) {
+
+  /**
+   * Make a deep copy of this [[ImpurityCalculator]].
+   */
+  def copy: EntropyCalculator = new EntropyCalculator(stats.clone())
+
+  /**
+   * Calculate the impurity from the stored sufficient statistics.
+   */
+  def calculate(): Double = Entropy.calculate(stats, stats.sum)
+
+  /**
+   * Number of data points accounted for in the sufficient statistics.
+   */
+  def count: Long = stats.sum.toLong
+
+  /**
+   * Prediction which should be made based on the sufficient statistics.
+   */
+  def predict: Double = if (count == 0) {
+    0
+  } else {
+    indexOfLargestArrayElement(stats)
+  }
+
+  /**
+   * Probability of the label given by [[predict]].
+   */
+  override def prob(label: Double): Double = {
+    val lbl = label.toInt
+    require(lbl < stats.length,
+      s"EntropyCalculator.prob given invalid label: $lbl (should be < ${stats.length}")
+    val cnt = count
+    if (cnt == 0) {
+      0
+    } else {
+      stats(lbl) / cnt
+    }
+  }
+
+  override def toString: String = s"EntropyCalculator(stats = [${stats.mkString(", ")}])"
+
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index c51d76d9b4c5b..7c83cd48e16a0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -30,23 +30,127 @@ object Gini extends Impurity {
 
   /**
    * :: DeveloperApi ::
-   * Gini coefficient calculation
-   * @param c0 count of instances with label 0
-   * @param c1 count of instances with label 1
-   * @return Gini coefficient value
+   * information calculation for multiclass classification
+   * @param counts Array[Double] with counts for each label
+   * @param totalCount sum of counts for all labels
+   * @return information value, or 0 if totalCount = 0
    */
   @DeveloperApi
-  override def calculate(c0: Double, c1: Double): Double = {
-    if (c0 == 0 || c1 == 0) {
-      0
-    } else {
-      val total = c0 + c1
-      val f0 = c0 / total
-      val f1 = c1 / total
-      1 - f0 * f0 - f1 * f1
+  override def calculate(counts: Array[Double], totalCount: Double): Double = {
+    if (totalCount == 0) {
+      return 0
     }
+    val numClasses = counts.length
+    var impurity = 1.0
+    var classIndex = 0
+    while (classIndex < numClasses) {
+      val freq = counts(classIndex) / totalCount
+      impurity -= freq * freq
+      classIndex += 1
+    }
+    impurity
   }
 
+  /**
+   * :: DeveloperApi ::
+   * variance calculation
+   * @param count number of instances
+   * @param sum sum of labels
+   * @param sumSquares summation of squares of the labels
+   * @return information value, or 0 if count = 0
+   */
+  @DeveloperApi
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
     throw new UnsupportedOperationException("Gini.calculate")
+
+  /**
+   * Get this impurity instance.
+   * This is useful for passing impurity parameters to a Strategy in Java.
+   */
+  def instance = this
+
+}
+
+/**
+ * Class for updating views of a vector of sufficient statistics,
+ * in order to compute impurity from a sample.
+ * Note: Instances of this class do not hold the data; they operate on views of the data.
+ * @param numClasses  Number of classes for label.
+ */
+private[tree] class GiniAggregator(numClasses: Int)
+  extends ImpurityAggregator(numClasses) with Serializable {
+
+  /**
+   * Update stats for one (node, feature, bin) with the given label.
+   * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
+   * @param offset    Start index of stats for this (node, feature, bin).
+   */
+  def update(allStats: Array[Double], offset: Int, label: Double, instanceWeight: Double): Unit = {
+    if (label >= statsSize) {
+      throw new IllegalArgumentException(s"GiniAggregator given label $label" +
+        s" but requires label < numClasses (= $statsSize).")
+    }
+    allStats(offset + label.toInt) += instanceWeight
+  }
+
+  /**
+   * Get an [[ImpurityCalculator]] for a (node, feature, bin).
+   * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
+   * @param offset    Start index of stats for this (node, feature, bin).
+   */
+  def getCalculator(allStats: Array[Double], offset: Int): GiniCalculator = {
+    new GiniCalculator(allStats.view(offset, offset + statsSize).toArray)
+  }
+
+}
+
+/**
+ * Stores statistics for one (node, feature, bin) for calculating impurity.
+ * Unlike [[GiniAggregator]], this class stores its own data and is for a specific
+ * (node, feature, bin).
+ * @param stats  Array of sufficient statistics for a (node, feature, bin).
+ */
+private[tree] class GiniCalculator(stats: Array[Double]) extends ImpurityCalculator(stats) {
+
+  /**
+   * Make a deep copy of this [[ImpurityCalculator]].
+   */
+  def copy: GiniCalculator = new GiniCalculator(stats.clone())
+
+  /**
+   * Calculate the impurity from the stored sufficient statistics.
+   */
+  def calculate(): Double = Gini.calculate(stats, stats.sum)
+
+  /**
+   * Number of data points accounted for in the sufficient statistics.
+   */
+  def count: Long = stats.sum.toLong
+
+  /**
+   * Prediction which should be made based on the sufficient statistics.
+   */
+  def predict: Double = if (count == 0) {
+    0
+  } else {
+    indexOfLargestArrayElement(stats)
+  }
+
+  /**
+   * Probability of the label given by [[predict]].
+   */
+  override def prob(label: Double): Double = {
+    val lbl = label.toInt
+    require(lbl < stats.length,
+      s"GiniCalculator.prob given invalid label: $lbl (should be < ${stats.length}")
+    val cnt = count
+    if (cnt == 0) {
+      0
+    } else {
+      stats(lbl) / cnt
+    }
+  }
+
+  override def toString: String = s"GiniCalculator(stats = [${stats.mkString(", ")}])"
+
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurities.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurities.scala
new file mode 100644
index 0000000000000..9a6452aa13a61
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurities.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.impurity
+
+/**
+ * Factory for Impurity instances.
+ */
+private[mllib] object Impurities {
+
+  def fromString(name: String): Impurity = name match {
+    case "gini" => Gini
+    case "entropy" => Entropy
+    case "variance" => Variance
+    case _ => throw new IllegalArgumentException(s"Did not recognize Impurity name: $name")
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index 8eab247cf0932..60e2ab2bb829e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -22,19 +22,22 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
 /**
  * :: Experimental ::
  * Trait for calculating information gain.
+ * This trait is used for
+ *  (a) setting the impurity parameter in [[org.apache.spark.mllib.tree.configuration.Strategy]]
+ *  (b) calculating impurity values from sufficient statistics.
  */
 @Experimental
 trait Impurity extends Serializable {
 
   /**
    * :: DeveloperApi ::
-   * information calculation for binary classification
-   * @param c0 count of instances with label 0
-   * @param c1 count of instances with label 1
-   * @return information value
+   * information calculation for multiclass classification
+   * @param counts Array[Double] with counts for each label
+   * @param totalCount sum of counts for all labels
+   * @return information value, or 0 if totalCount = 0
    */
   @DeveloperApi
-  def calculate(c0 : Double, c1 : Double): Double
+  def calculate(counts: Array[Double], totalCount: Double): Double
 
   /**
    * :: DeveloperApi ::
@@ -42,8 +45,132 @@ trait Impurity extends Serializable {
    * @param count number of instances
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
-   * @return information value
+   * @return information value, or 0 if count = 0
    */
   @DeveloperApi
   def calculate(count: Double, sum: Double, sumSquares: Double): Double
 }
+
+/**
+ * Interface for updating views of a vector of sufficient statistics,
+ * in order to compute impurity from a sample.
+ * Note: Instances of this class do not hold the data; they operate on views of the data.
+ * @param statsSize  Length of the vector of sufficient statistics for one bin.
+ */
+private[tree] abstract class ImpurityAggregator(val statsSize: Int) extends Serializable {
+
+  /**
+   * Merge the stats from one bin into another.
+   * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
+   * @param offset    Start index of stats for (node, feature, bin) which is modified by the merge.
+   * @param otherOffset  Start index of stats for (node, feature, other bin) which is not modified.
+   */
+  def merge(allStats: Array[Double], offset: Int, otherOffset: Int): Unit = {
+    var i = 0
+    while (i < statsSize) {
+      allStats(offset + i) += allStats(otherOffset + i)
+      i += 1
+    }
+  }
+
+  /**
+   * Update stats for one (node, feature, bin) with the given label.
+   * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
+   * @param offset    Start index of stats for this (node, feature, bin).
+   */
+  def update(allStats: Array[Double], offset: Int, label: Double, instanceWeight: Double): Unit
+
+  /**
+   * Get an [[ImpurityCalculator]] for a (node, feature, bin).
+   * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
+   * @param offset    Start index of stats for this (node, feature, bin).
+   */
+  def getCalculator(allStats: Array[Double], offset: Int): ImpurityCalculator
+
+}
+
+/**
+ * Stores statistics for one (node, feature, bin) for calculating impurity.
+ * Unlike [[ImpurityAggregator]], this class stores its own data and is for a specific
+ * (node, feature, bin).
+ * @param stats  Array of sufficient statistics for a (node, feature, bin).
+ */
+private[tree] abstract class ImpurityCalculator(val stats: Array[Double]) {
+
+  /**
+   * Make a deep copy of this [[ImpurityCalculator]].
+   */
+  def copy: ImpurityCalculator
+
+  /**
+   * Calculate the impurity from the stored sufficient statistics.
+   */
+  def calculate(): Double
+
+  /**
+   * Add the stats from another calculator into this one, modifying and returning this calculator.
+   */
+  def add(other: ImpurityCalculator): ImpurityCalculator = {
+    require(stats.size == other.stats.size,
+      s"Two ImpurityCalculator instances cannot be added with different counts sizes." +
+        s"  Sizes are ${stats.size} and ${other.stats.size}.")
+    var i = 0
+    while (i < other.stats.size) {
+      stats(i) += other.stats(i)
+      i += 1
+    }
+    this
+  }
+
+  /**
+   * Subtract the stats from another calculator from this one, modifying and returning this
+   * calculator.
+   */
+  def subtract(other: ImpurityCalculator): ImpurityCalculator = {
+    require(stats.size == other.stats.size,
+      s"Two ImpurityCalculator instances cannot be subtracted with different counts sizes." +
+      s"  Sizes are ${stats.size} and ${other.stats.size}.")
+    var i = 0
+    while (i < other.stats.size) {
+      stats(i) -= other.stats(i)
+      i += 1
+    }
+    this
+  }
+
+  /**
+   * Number of data points accounted for in the sufficient statistics.
+   */
+  def count: Long
+
+  /**
+   * Prediction which should be made based on the sufficient statistics.
+   */
+  def predict: Double
+
+  /**
+   * Probability of the label given by [[predict]], or -1 if no probability is available.
+   */
+  def prob(label: Double): Double = -1
+
+  /**
+   * Return the index of the largest array element.
+   * Fails if the array is empty.
+   */
+  protected def indexOfLargestArrayElement(array: Array[Double]): Int = {
+    val result = array.foldLeft(-1, Double.MinValue, 0) {
+      case ((maxIndex, maxValue, currentIndex), currentValue) =>
+        if (currentValue > maxValue) {
+          (currentIndex, currentValue, currentIndex + 1)
+        } else {
+          (maxIndex, maxValue, currentIndex + 1)
+        }
+    }
+    if (result._1 < 0) {
+      throw new RuntimeException("ImpurityCalculator internal error:" +
+        " indexOfLargestArrayElement failed")
+    }
+    result._1
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index 47d07122af30f..df9eafa5da16a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -25,7 +25,16 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
  */
 @Experimental
 object Variance extends Impurity {
-   override def calculate(c0: Double, c1: Double): Double =
+
+  /**
+   * :: DeveloperApi ::
+   * information calculation for multiclass classification
+   * @param counts Array[Double] with counts for each label
+   * @param totalCount sum of counts for all labels
+   * @return information value, or 0 if totalCount = 0
+   */
+  @DeveloperApi
+  override def calculate(counts: Array[Double], totalCount: Double): Double =
      throw new UnsupportedOperationException("Variance.calculate")
 
   /**
@@ -34,10 +43,93 @@ object Variance extends Impurity {
    * @param count number of instances
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
+   * @return information value, or 0 if count = 0
    */
   @DeveloperApi
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double = {
+    if (count == 0) {
+      return 0
+    }
     val squaredLoss = sumSquares - (sum * sum) / count
     squaredLoss / count
   }
+
+  /**
+   * Get this impurity instance.
+   * This is useful for passing impurity parameters to a Strategy in Java.
+   */
+  def instance = this
+
+}
+
+/**
+ * Class for updating views of a vector of sufficient statistics,
+ * in order to compute impurity from a sample.
+ * Note: Instances of this class do not hold the data; they operate on views of the data.
+ */
+private[tree] class VarianceAggregator()
+  extends ImpurityAggregator(statsSize = 3) with Serializable {
+
+  /**
+   * Update stats for one (node, feature, bin) with the given label.
+   * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
+   * @param offset    Start index of stats for this (node, feature, bin).
+   */
+  def update(allStats: Array[Double], offset: Int, label: Double, instanceWeight: Double): Unit = {
+    allStats(offset) += instanceWeight
+    allStats(offset + 1) += instanceWeight * label
+    allStats(offset + 2) += instanceWeight * label * label
+  }
+
+  /**
+   * Get an [[ImpurityCalculator]] for a (node, feature, bin).
+   * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
+   * @param offset    Start index of stats for this (node, feature, bin).
+   */
+  def getCalculator(allStats: Array[Double], offset: Int): VarianceCalculator = {
+    new VarianceCalculator(allStats.view(offset, offset + statsSize).toArray)
+  }
+
+}
+
+/**
+ * Stores statistics for one (node, feature, bin) for calculating impurity.
+ * Unlike [[GiniAggregator]], this class stores its own data and is for a specific
+ * (node, feature, bin).
+ * @param stats  Array of sufficient statistics for a (node, feature, bin).
+ */
+private[tree] class VarianceCalculator(stats: Array[Double]) extends ImpurityCalculator(stats) {
+
+  require(stats.size == 3,
+    s"VarianceCalculator requires sufficient statistics array stats to be of length 3," +
+    s" but was given array of length ${stats.size}.")
+
+  /**
+   * Make a deep copy of this [[ImpurityCalculator]].
+   */
+  def copy: VarianceCalculator = new VarianceCalculator(stats.clone())
+
+  /**
+   * Calculate the impurity from the stored sufficient statistics.
+   */
+  def calculate(): Double = Variance.calculate(stats(0), stats(1), stats(2))
+
+  /**
+   * Number of data points accounted for in the sufficient statistics.
+   */
+  def count: Long = stats(0).toLong
+
+  /**
+   * Prediction which should be made based on the sufficient statistics.
+   */
+  def predict: Double = if (count == 0) {
+    0
+  } else {
+    stats(1) / count
+  }
+
+  override def toString: String = {
+    s"VarianceAggregator(cnt = ${stats(0)}, sum = ${stats(1)}, sum2 = ${stats(2)})"
+  }
+
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
new file mode 100644
index 0000000000000..d111ffe30ed9e
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.loss
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.model.WeightedEnsembleModel
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: DeveloperApi ::
+ * Class for least absolute error loss calculation.
+ * The features x and the corresponding label y is predicted using the function F.
+ * For each instance:
+ * Loss: |y - F|
+ * Negative gradient: sign(y - F)
+ */
+@DeveloperApi
+object AbsoluteError extends Loss {
+
+  /**
+   * Method to calculate the gradients for the gradient boosting calculation for least
+   * absolute error calculation.
+   * @param model Model of the weak learner
+   * @param point Instance of the training dataset
+   * @return Loss gradient
+   */
+  override def gradient(
+      model: WeightedEnsembleModel,
+      point: LabeledPoint): Double = {
+    if ((point.label - model.predict(point.features)) < 0) 1.0 else -1.0
+  }
+
+  /**
+   * Method to calculate error of the base learner for the gradient boosting calculation.
+   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
+   * purposes.
+   * @param model Model of the weak learner.
+   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @return
+   */
+  override def computeError(model: WeightedEnsembleModel, data: RDD[LabeledPoint]): Double = {
+    val sumOfAbsolutes = data.map { y =>
+      val err = model.predict(y.features) - y.label
+      math.abs(err)
+    }.sum()
+    sumOfAbsolutes / data.count()
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
new file mode 100644
index 0000000000000..6f3d4340f0d3b
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.loss
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.model.WeightedEnsembleModel
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: DeveloperApi ::
+ * Class for least squares error loss calculation.
+ *
+ * The features x and the corresponding label y is predicted using the function F.
+ * For each instance:
+ * Loss: log(1 + exp(-2yF)), y in {-1, 1}
+ * Negative gradient: 2y / ( 1 + exp(2yF))
+ */
+@DeveloperApi
+object LogLoss extends Loss {
+
+  /**
+   * Method to calculate the loss gradients for the gradient boosting calculation for binary
+   * classification
+   * @param model Model of the weak learner
+   * @param point Instance of the training dataset
+   * @return Loss gradient
+   */
+  override def gradient(
+      model: WeightedEnsembleModel,
+      point: LabeledPoint): Double = {
+    val prediction = model.predict(point.features)
+    1.0 / (1.0 + math.exp(-prediction)) - point.label
+  }
+
+  /**
+   * Method to calculate error of the base learner for the gradient boosting calculation.
+   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
+   * purposes.
+   * @param model Model of the weak learner.
+   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @return
+   */
+  override def computeError(model: WeightedEnsembleModel, data: RDD[LabeledPoint]): Double = {
+    val wrongPredictions = data.filter(lp => model.predict(lp.features) != lp.label).count()
+    wrongPredictions / data.count
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
new file mode 100644
index 0000000000000..5580866c879e2
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.loss
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.model.WeightedEnsembleModel
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: DeveloperApi ::
+ * Trait for adding "pluggable" loss functions for the gradient boosting algorithm.
+ */
+@DeveloperApi
+trait Loss extends Serializable {
+
+  /**
+   * Method to calculate the gradients for the gradient boosting calculation.
+   * @param model Model of the weak learner.
+   * @param point Instance of the training dataset.
+   * @return Loss gradient.
+   */
+  def gradient(
+      model: WeightedEnsembleModel,
+      point: LabeledPoint): Double
+
+  /**
+   * Method to calculate error of the base learner for the gradient boosting calculation.
+   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
+   * purposes.
+   * @param model Model of the weak learner.
+   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @return
+   */
+  def computeError(model: WeightedEnsembleModel, data: RDD[LabeledPoint]): Double
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
new file mode 100644
index 0000000000000..42c9ead9884b4
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.loss
+
+object Losses {
+
+  def fromString(name: String): Loss = name match {
+    case "leastSquaresError" => SquaredError
+    case "leastAbsoluteError" => AbsoluteError
+    case "logLoss" => LogLoss
+    case _ => throw new IllegalArgumentException(s"Did not recognize Loss name: $name")
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
new file mode 100644
index 0000000000000..4349fefef2c74
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.loss
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.model.WeightedEnsembleModel
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: DeveloperApi ::
+ * Class for least squares error loss calculation.
+ *
+ * The features x and the corresponding label y is predicted using the function F.
+ * For each instance:
+ * Loss: (y - F)**2/2
+ * Negative gradient: y - F
+ */
+@DeveloperApi
+object SquaredError extends Loss {
+
+  /**
+   * Method to calculate the gradients for the gradient boosting calculation for least
+   * squares error calculation.
+   * @param model Model of the weak learner
+   * @param point Instance of the training dataset
+   * @return Loss gradient
+   */
+  override def gradient(
+    model: WeightedEnsembleModel,
+    point: LabeledPoint): Double = {
+    model.predict(point.features) - point.label
+  }
+
+  /**
+   * Method to calculate error of the base learner for the gradient boosting calculation.
+   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
+   * purposes.
+   * @param model Model of the weak learner.
+   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @return
+   */
+  override def computeError(model: WeightedEnsembleModel, data: RDD[LabeledPoint]): Double = {
+    data.map { y =>
+      val err = model.predict(y.features) - y.label
+      err * err
+    }.mean()
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala
index 2d71e1e366069..0cad473782af1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala
@@ -20,15 +20,28 @@ package org.apache.spark.mllib.tree.model
 import org.apache.spark.mllib.tree.configuration.FeatureType._
 
 /**
- * Used for "binning" the features bins for faster best split calculation. For a continuous
- * feature, a bin is determined by a low and a high "split". For a categorical feature,
- * the a bin is determined using a single label value (category).
+ * Used for "binning" the feature values for faster best split calculation.
+ *
+ * For a continuous feature, the bin is determined by a low and a high split,
+ *  where an example with featureValue falls into the bin s.t.
+ *  lowSplit.threshold < featureValue <= highSplit.threshold.
+ *
+ * For ordered categorical features, there is a 1-1-1 correspondence between
+ *  bins, splits, and feature values.  The bin is determined by category/feature value.
+ *  However, the bins are not necessarily ordered by feature value;
+ *  they are ordered using impurity.
+ *
+ * For unordered categorical features, there is a 1-1 correspondence between bins, splits,
+ *  where bins and splits correspond to subsets of feature values (in highSplit.categories).
+ *  An unordered feature with k categories uses (1 << k - 1) - 1 bins, corresponding to all
+ *  partitionings of categories into 2 disjoint, non-empty sets.
+ *
  * @param lowSplit signifying the lower threshold for the continuous feature to be
  *                 accepted in the bin
  * @param highSplit signifying the upper threshold for the continuous feature to be
- *                 accepted in the bin
+ *                  accepted in the bin
  * @param featureType type of feature -- categorical or continuous
- * @param category categorical label value accepted in the bin
+ * @param category categorical label value accepted in the bin for ordered features
  */
 private[tree]
 case class Bin(lowSplit: Split, highSplit: Split, featureType: FeatureType, category: Double)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index bf692ca8c4bd7..ac4d02ee3928b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.tree.model
 
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.rdd.RDD
@@ -24,7 +25,8 @@ import org.apache.spark.mllib.linalg.Vector
 
 /**
  * :: Experimental ::
- * Model to store the decision tree parameters
+ * Decision tree model for classification or regression.
+ * This model stores the decision tree structure and parameters.
  * @param topNode root node
  * @param algo algorithm type -- classification or regression
  */
@@ -38,16 +40,63 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
    * @return Double prediction from the trained model
    */
   def predict(features: Vector): Double = {
-    topNode.predictIfLeaf(features)
+    topNode.predict(features)
   }
 
   /**
    * Predict values for the given data set using the model trained.
    *
    * @param features RDD representing data points to be predicted
-   * @return RDD[Int] where each entry contains the corresponding prediction
+   * @return RDD of predictions for each of the given data points
    */
   def predict(features: RDD[Vector]): RDD[Double] = {
     features.map(x => predict(x))
   }
+
+
+  /**
+   * Predict values for the given data set using the model trained.
+   *
+   * @param features JavaRDD representing data points to be predicted
+   * @return JavaRDD of predictions for each of the given data points
+   */
+  def predict(features: JavaRDD[Vector]): JavaRDD[Double] = {
+    predict(features.rdd)
+  }
+
+  /**
+   * Get number of nodes in tree, including leaf nodes.
+   */
+  def numNodes: Int = {
+    1 + topNode.numDescendants
+  }
+
+  /**
+   * Get depth of tree.
+   * E.g.: Depth 0 means 1 leaf node.  Depth 1 means 1 internal node and 2 leaf nodes.
+   */
+  def depth: Int = {
+    topNode.subtreeDepth
+  }
+
+  /**
+   * Print a summary of the model.
+   */
+  override def toString: String = algo match {
+    case Classification =>
+      s"DecisionTreeModel classifier of depth $depth with $numNodes nodes"
+    case Regression =>
+      s"DecisionTreeModel regressor of depth $depth with $numNodes nodes"
+    case _ => throw new IllegalArgumentException(
+      s"DecisionTreeModel given unknown algo parameter: $algo.")
+  }
+
+  /**
+   * Print the full model to a string.
+   */
+  def toDebugString: String = {
+    val header = toString + "\n"
+    header + topNode.subtreeToString(2)
+  }
+
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala
deleted file mode 100644
index 2deaf4ae8dcab..0000000000000
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.tree.model
-
-/**
- * Filter specifying a split and type of comparison to be applied on features
- * @param split split specifying the feature index, type and threshold
- * @param comparison integer specifying <,=,>
- */
-private[tree] case class Filter(split: Split, comparison: Int) {
-  // Comparison -1,0,1 signifies <.=,>
-  override def toString = " split = " + split + "comparison = " + comparison
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
index cc8a24cce9614..9a50ecb550c38 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
@@ -26,7 +26,8 @@ import org.apache.spark.annotation.DeveloperApi
  * @param impurity current node impurity
  * @param leftImpurity left node impurity
  * @param rightImpurity right node impurity
- * @param predict predicted value
+ * @param leftPredict left node predict
+ * @param rightPredict right node predict
  */
 @DeveloperApi
 class InformationGainStats(
@@ -34,10 +35,33 @@ class InformationGainStats(
     val impurity: Double,
     val leftImpurity: Double,
     val rightImpurity: Double,
-    val predict: Double) extends Serializable {
+    val leftPredict: Predict,
+    val rightPredict: Predict) extends Serializable {
 
   override def toString = {
-    "gain = %f, impurity = %f, left impurity = %f, right impurity = %f, predict = %f"
-      .format(gain, impurity, leftImpurity, rightImpurity, predict)
+    "gain = %f, impurity = %f, left impurity = %f, right impurity = %f"
+      .format(gain, impurity, leftImpurity, rightImpurity)
   }
+
+  override def equals(o: Any) =
+    o match {
+      case other: InformationGainStats => {
+        gain == other.gain &&
+        impurity == other.impurity &&
+        leftImpurity == other.leftImpurity &&
+        rightImpurity == other.rightImpurity
+      }
+      case _ => false
+    }
+}
+
+
+private[tree] object InformationGainStats {
+  /**
+   * An [[org.apache.spark.mllib.tree.model.InformationGainStats]] object to
+   * denote that current split doesn't satisfies minimum info gain or
+   * minimum number of instances per node.
+   */
+  val invalidInformationGainStats = new InformationGainStats(Double.MinValue, -1.0, -1.0, -1.0,
+    new Predict(0.0, 0.0), new Predict(0.0, 0.0))
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
index 682f213f411a7..2179da8dbe03e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -24,10 +24,16 @@ import org.apache.spark.mllib.linalg.Vector
 
 /**
  * :: DeveloperApi ::
- * Node in a decision tree
- * @param id integer node id
+ * Node in a decision tree.
+ *
+ * About node indexing:
+ *   Nodes are indexed from 1.  Node 1 is the root; nodes 2, 3 are the left, right children.
+ *   Node index 0 is not used.
+ *
+ * @param id integer node id, from 1
  * @param predict predicted value at the node
- * @param isLeaf whether the leaf is a node
+ * @param impurity current node impurity
+ * @param isLeaf whether the node is a leaf
  * @param split split to calculate left and right nodes
  * @param leftNode  left child
  * @param rightNode right child
@@ -36,32 +42,32 @@ import org.apache.spark.mllib.linalg.Vector
 @DeveloperApi
 class Node (
     val id: Int,
-    val predict: Double,
-    val isLeaf: Boolean,
-    val split: Option[Split],
+    var predict: Predict,
+    var impurity: Double,
+    var isLeaf: Boolean,
+    var split: Option[Split],
     var leftNode: Option[Node],
     var rightNode: Option[Node],
-    val stats: Option[InformationGainStats]) extends Serializable with Logging {
+    var stats: Option[InformationGainStats]) extends Serializable with Logging {
 
   override def toString = "id = " + id + ", isLeaf = " + isLeaf + ", predict = " + predict + ", " +
-    "split = " + split + ", stats = " + stats
+    "impurity =  " + impurity + "split = " + split + ", stats = " + stats
 
   /**
    * build the left node and right nodes if not leaf
    * @param nodes array of nodes
    */
+  @deprecated("build should no longer be used since trees are constructed on-the-fly in training",
+    "1.2.0")
   def build(nodes: Array[Node]): Unit = {
-
-    logDebug("building node " + id + " at level " +
-      (scala.math.log(id + 1)/scala.math.log(2)).toInt )
+    logDebug("building node " + id + " at level " + Node.indexToLevel(id))
     logDebug("id = " + id + ", split = " + split)
     logDebug("stats = " + stats)
     logDebug("predict = " + predict)
+    logDebug("impurity = " + impurity)
     if (!isLeaf) {
-      val leftNodeIndex = id * 2 + 1
-      val rightNodeIndex = id * 2 + 2
-      leftNode = Some(nodes(leftNodeIndex))
-      rightNode = Some(nodes(rightNodeIndex))
+      leftNode = Some(nodes(Node.leftChildIndex(id)))
+      rightNode = Some(nodes(Node.rightChildIndex(id)))
       leftNode.get.build(nodes)
       rightNode.get.build(nodes)
     }
@@ -69,26 +75,184 @@ class Node (
 
   /**
    * predict value if node is not leaf
-   * @param feature feature value
+   * @param features feature value
    * @return predicted value
    */
-  def predictIfLeaf(feature: Vector) : Double = {
+  def predict(features: Vector) : Double = {
     if (isLeaf) {
-      predict
+      predict.predict
     } else{
       if (split.get.featureType == Continuous) {
-        if (feature(split.get.feature) <= split.get.threshold) {
-          leftNode.get.predictIfLeaf(feature)
+        if (features(split.get.feature) <= split.get.threshold) {
+          leftNode.get.predict(features)
         } else {
-          rightNode.get.predictIfLeaf(feature)
+          rightNode.get.predict(features)
         }
       } else {
-        if (split.get.categories.contains(feature(split.get.feature))) {
-          leftNode.get.predictIfLeaf(feature)
+        if (split.get.categories.contains(features(split.get.feature))) {
+          leftNode.get.predict(features)
         } else {
-          rightNode.get.predictIfLeaf(feature)
+          rightNode.get.predict(features)
         }
       }
     }
   }
+
+  /**
+   * Returns a deep copy of the subtree rooted at this node.
+   */
+  private[tree] def deepCopy(): Node = {
+    val leftNodeCopy = if (leftNode.isEmpty) {
+      None
+    } else {
+      Some(leftNode.get.deepCopy())
+    }
+    val rightNodeCopy = if (rightNode.isEmpty) {
+      None
+    } else {
+      Some(rightNode.get.deepCopy())
+    }
+    new Node(id, predict, impurity, isLeaf, split, leftNodeCopy, rightNodeCopy, stats)
+  }
+
+  /**
+   * Get the number of nodes in tree below this node, including leaf nodes.
+   * E.g., if this is a leaf, returns 0.  If both children are leaves, returns 2.
+   */
+  private[tree] def numDescendants: Int = if (isLeaf) {
+    0
+  } else {
+    2 + leftNode.get.numDescendants + rightNode.get.numDescendants
+  }
+
+  /**
+   * Get depth of tree from this node.
+   * E.g.: Depth 0 means this is a leaf node.
+   */
+  private[tree] def subtreeDepth: Int = if (isLeaf) {
+    0
+  } else {
+    1 + math.max(leftNode.get.subtreeDepth, rightNode.get.subtreeDepth)
+  }
+
+  /**
+   * Recursive print function.
+   * @param indentFactor  The number of spaces to add to each level of indentation.
+   */
+  private[tree] def subtreeToString(indentFactor: Int = 0): String = {
+
+    def splitToString(split: Split, left: Boolean): String = {
+      split.featureType match {
+        case Continuous => if (left) {
+          s"(feature ${split.feature} <= ${split.threshold})"
+        } else {
+          s"(feature ${split.feature} > ${split.threshold})"
+        }
+        case Categorical => if (left) {
+          s"(feature ${split.feature} in ${split.categories.mkString("{",",","}")})"
+        } else {
+          s"(feature ${split.feature} not in ${split.categories.mkString("{",",","}")})"
+        }
+      }
+    }
+    val prefix: String = " " * indentFactor
+    if (isLeaf) {
+      prefix + s"Predict: ${predict.predict}\n"
+    } else {
+      prefix + s"If ${splitToString(split.get, left=true)}\n" +
+        leftNode.get.subtreeToString(indentFactor + 1) +
+        prefix + s"Else ${splitToString(split.get, left=false)}\n" +
+        rightNode.get.subtreeToString(indentFactor + 1)
+    }
+  }
+
+}
+
+private[tree] object Node {
+
+  /**
+   * Return a node with the given node id (but nothing else set).
+   */
+  def emptyNode(nodeIndex: Int): Node = new Node(nodeIndex, new Predict(Double.MinValue), -1.0,
+    false, None, None, None, None)
+
+  /**
+   * Construct a node with nodeIndex, predict, impurity and isLeaf parameters.
+   * This is used in `DecisionTree.findBestSplits` to construct child nodes
+   * after finding the best splits for parent nodes.
+   * Other fields are set at next level.
+   * @param nodeIndex integer node id, from 1
+   * @param predict predicted value at the node
+   * @param impurity current node impurity
+   * @param isLeaf whether the node is a leaf
+   * @return new node instance
+   */
+  def apply(
+      nodeIndex: Int,
+      predict: Predict,
+      impurity: Double,
+      isLeaf: Boolean): Node = {
+    new Node(nodeIndex, predict, impurity, isLeaf, None, None, None, None)
+  }
+
+  /**
+   * Return the index of the left child of this node.
+   */
+  def leftChildIndex(nodeIndex: Int): Int = nodeIndex << 1
+
+  /**
+   * Return the index of the right child of this node.
+   */
+  def rightChildIndex(nodeIndex: Int): Int = (nodeIndex << 1) + 1
+
+  /**
+   * Get the parent index of the given node, or 0 if it is the root.
+   */
+  def parentIndex(nodeIndex: Int): Int = nodeIndex >> 1
+
+  /**
+   * Return the level of a tree which the given node is in.
+   */
+  def indexToLevel(nodeIndex: Int): Int = if (nodeIndex == 0) {
+    throw new IllegalArgumentException(s"0 is not a valid node index.")
+  } else {
+    java.lang.Integer.numberOfTrailingZeros(java.lang.Integer.highestOneBit(nodeIndex))
+  }
+
+  /**
+   * Returns true if this is a left child.
+   * Note: Returns false for the root.
+   */
+  def isLeftChild(nodeIndex: Int): Boolean = nodeIndex > 1 && nodeIndex % 2 == 0
+
+  /**
+   * Return the maximum number of nodes which can be in the given level of the tree.
+   * @param level  Level of tree (0 = root).
+   */
+  def maxNodesInLevel(level: Int): Int = 1 << level
+
+  /**
+   * Return the index of the first node in the given level.
+   * @param level  Level of tree (0 = root).
+   */
+  def startIndexInLevel(level: Int): Int = 1 << level
+
+  /**
+   * Traces down from a root node to get the node with the given node index.
+   * This assumes the node exists.
+   */
+  def getNode(nodeIndex: Int, rootNode: Node): Node = {
+    var tmpNode: Node = rootNode
+    var levelsToGo = indexToLevel(nodeIndex)
+    while (levelsToGo > 0) {
+      if ((nodeIndex & (1 << levelsToGo - 1)) == 0) {
+        tmpNode = tmpNode.leftNode.get
+      } else {
+        tmpNode = tmpNode.rightNode.get
+      }
+      levelsToGo -= 1
+    }
+    tmpNode
+  }
+
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
new file mode 100644
index 0000000000000..004838ee5ba0e
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.model
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * Predicted value for a node
+ * @param predict predicted value
+ * @param prob probability of the label (classification only)
+ */
+@DeveloperApi
+class Predict(
+    val predict: Double,
+    val prob: Double = 0.0) extends Serializable {
+
+  override def toString = {
+    "predict = %f, prob = %f".format(predict, prob)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index d7ffd386c05ee..b7a85f58544a3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -19,14 +19,17 @@ package org.apache.spark.mllib.tree.model
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
+import org.apache.spark.mllib.tree.configuration.FeatureType
+import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
 
 /**
  * :: DeveloperApi ::
  * Split applied to a feature
  * @param feature feature index
- * @param threshold threshold for continuous feature
+ * @param threshold Threshold for continuous feature.
+ *                  Split left if feature <= threshold, else right.
  * @param featureType type of feature -- categorical or continuous
- * @param categories accepted values for categorical variables
+ * @param categories Split left if categorical feature value is in this set, else right.
  */
 @DeveloperApi
 case class Split(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/WeightedEnsembleModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/WeightedEnsembleModel.scala
new file mode 100644
index 0000000000000..7b052d9163a13
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/WeightedEnsembleModel.scala
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.model
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.configuration.EnsembleCombiningStrategy._
+import org.apache.spark.rdd.RDD
+
+import scala.collection.mutable
+
+@Experimental
+class WeightedEnsembleModel(
+    val weakHypotheses: Array[DecisionTreeModel],
+    val weakHypothesisWeights: Array[Double],
+    val algo: Algo,
+    val combiningStrategy: EnsembleCombiningStrategy) extends Serializable {
+
+  require(numWeakHypotheses > 0, s"WeightedEnsembleModel cannot be created without weakHypotheses" +
+    s". Number of weakHypotheses = $weakHypotheses")
+
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param features array representing a single data point
+   * @return predicted category from the trained model
+   */
+  private def predictRaw(features: Vector): Double = {
+    val treePredictions = weakHypotheses.map(learner => learner.predict(features))
+    if (numWeakHypotheses == 1){
+      treePredictions(0)
+    } else {
+      var prediction = treePredictions(0)
+      var index = 1
+      while (index < numWeakHypotheses) {
+        prediction += weakHypothesisWeights(index) * treePredictions(index)
+        index += 1
+      }
+      prediction
+    }
+  }
+
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param features array representing a single data point
+   * @return predicted category from the trained model
+   */
+  private def predictBySumming(features: Vector): Double = {
+    algo match {
+      case Regression => predictRaw(features)
+      case Classification => {
+        // TODO: predicted labels are +1 or -1 for GBT. Need a better way to store this info.
+        if (predictRaw(features) > 0 ) 1.0 else 0.0
+      }
+      case _ => throw new IllegalArgumentException(
+        s"WeightedEnsembleModel given unknown algo parameter: $algo.")
+    }
+  }
+
+  /**
+   * Predict values for a single data point.
+   *
+   * @param features array representing a single data point
+   * @return Double prediction from the trained model
+   */
+  private def predictByAveraging(features: Vector): Double = {
+    algo match {
+      case Classification =>
+        val predictionToCount = new mutable.HashMap[Int, Int]()
+        weakHypotheses.foreach { learner =>
+          val prediction = learner.predict(features).toInt
+          predictionToCount(prediction) = predictionToCount.getOrElse(prediction, 0) + 1
+        }
+        predictionToCount.maxBy(_._2)._1
+      case Regression =>
+        weakHypotheses.map(_.predict(features)).sum / weakHypotheses.size
+    }
+  }
+
+
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param features array representing a single data point
+   * @return predicted category from the trained model
+   */
+  def predict(features: Vector): Double = {
+    combiningStrategy match {
+      case Sum => predictBySumming(features)
+      case Average => predictByAveraging(features)
+      case _ => throw new IllegalArgumentException(
+        s"WeightedEnsembleModel given unknown combining parameter: $combiningStrategy.")
+    }
+  }
+
+  /**
+   * Predict values for the given data set.
+   *
+   * @param features RDD representing data points to be predicted
+   * @return RDD[Double] where each entry contains the corresponding prediction
+   */
+  def predict(features: RDD[Vector]): RDD[Double] = features.map(x => predict(x))
+
+  /**
+   * Print a summary of the model.
+   */
+  override def toString: String = {
+    algo match {
+      case Classification =>
+        s"WeightedEnsembleModel classifier with $numWeakHypotheses trees\n"
+      case Regression =>
+        s"WeightedEnsembleModel regressor with $numWeakHypotheses trees\n"
+      case _ => throw new IllegalArgumentException(
+        s"WeightedEnsembleModel given unknown algo parameter: $algo.")
+    }
+  }
+
+  /**
+   * Print the full model to a string.
+   */
+  def toDebugString: String = {
+    val header = toString + "\n"
+    header + weakHypotheses.zipWithIndex.map { case (tree, treeIndex) =>
+      s"  Tree $treeIndex:\n" + tree.topNode.subtreeToString(4)
+    }.fold("")(_ + _)
+  }
+
+  /**
+   * Get number of trees in forest.
+   */
+  def numWeakHypotheses: Int = weakHypotheses.size
+
+  // TODO: Remove these helpers methods once class is generalized to support any base learning
+  // algorithms.
+
+  /**
+   * Get total number of nodes, summed over all trees in the forest.
+   */
+  def totalNumNodes: Int = weakHypotheses.map(tree => tree.numNodes).sum
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LabelParsers.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LabelParsers.scala
deleted file mode 100644
index e25bf18b780bf..0000000000000
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LabelParsers.scala
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.util
-
-/** Trait for label parsers. */
-private trait LabelParser extends Serializable {
-  /** Parses a string label into a double label. */
-  def parse(labelString: String): Double
-}
-
-/** Factory methods for label parsers. */
-private object LabelParser {
-  def getInstance(multiclass: Boolean): LabelParser = {
-    if (multiclass) MulticlassLabelParser else BinaryLabelParser
-  }
-}
-
-/**
- * Label parser for binary labels, which outputs 1.0 (positive) if the value is greater than 0.5,
- * or 0.0 (negative) otherwise. So it works with +1/-1 labeling and +1/0 labeling.
- */
-private object BinaryLabelParser extends LabelParser {
-  /** Gets the default instance of BinaryLabelParser. */
-  def getInstance(): LabelParser = this
-
-  /**
-   * Parses the input label into positive (1.0) if the value is greater than 0.5,
-   * or negative (0.0) otherwise.
-   */
-  override def parse(labelString: String): Double = if (labelString.toDouble > 0.5) 1.0 else 0.0
-}
-
-/**
- * Label parser for multiclass labels, which converts the input label to double.
- */
-private object MulticlassLabelParser extends LabelParser {
-  /** Gets the default instance of MulticlassLabelParser. */
-  def getInstance(): LabelParser = this
-
-  override def parse(labelString: String): Double =  labelString.toDouble
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index aaf92a1a8869a..9353351af72a0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -26,10 +26,12 @@ import org.apache.spark.annotation.Experimental
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rdd.PartitionwiseSampledRDD
-import org.apache.spark.util.random.BernoulliSampler
-import org.apache.spark.mllib.regression.{LabeledPointParser, LabeledPoint}
+import org.apache.spark.util.random.BernoulliCellSampler
+import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.dstream.DStream
 
 /**
  * Helper methods to load, save and pre-process data used in ML Lib.
@@ -55,7 +57,6 @@ object MLUtils {
    *
    * @param sc Spark context
    * @param path file or directory path in any Hadoop-supported file system URI
-   * @param labelParser parser for labels
    * @param numFeatures number of features, which will be determined from the input data if a
    *                    nonpositive value is given. This is useful when the dataset is already split
    *                    into multiple files and you want to load them separately, because some
@@ -64,10 +65,9 @@ object MLUtils {
    * @param minPartitions min number of partitions
    * @return labeled data stored as an RDD[LabeledPoint]
    */
-  private def loadLibSVMFile(
+  def loadLibSVMFile(
       sc: SparkContext,
       path: String,
-      labelParser: LabelParser,
       numFeatures: Int,
       minPartitions: Int): RDD[LabeledPoint] = {
     val parsed = sc.textFile(path, minPartitions)
@@ -75,8 +75,8 @@ object MLUtils {
       .filter(line => !(line.isEmpty || line.startsWith("#")))
       .map { line =>
         val items = line.split(' ')
-        val label = labelParser.parse(items.head)
-        val (indices, values) = items.tail.map { item =>
+        val label = items.head.toDouble
+        val (indices, values) = items.tail.filter(_.nonEmpty).map { item =>
           val indexAndValue = item.split(':')
           val index = indexAndValue(0).toInt - 1 // Convert 1-based indices to 0-based.
           val value = indexAndValue(1).toDouble
@@ -102,64 +102,46 @@ object MLUtils {
 
   // Convenient methods for `loadLibSVMFile`.
 
-  /**
-   * Loads labeled data in the LIBSVM format into an RDD[LabeledPoint].
-   * The LIBSVM format is a text-based format used by LIBSVM and LIBLINEAR.
-   * Each line represents a labeled sparse feature vector using the following format:
-   * {{{label index1:value1 index2:value2 ...}}}
-   * where the indices are one-based and in ascending order.
-   * This method parses each line into a [[org.apache.spark.mllib.regression.LabeledPoint]],
-   * where the feature indices are converted to zero-based.
-   *
-   * @param sc Spark context
-   * @param path file or directory path in any Hadoop-supported file system URI
-   * @param multiclass whether the input labels contain more than two classes. If false, any label
-   *                   with value greater than 0.5 will be mapped to 1.0, or 0.0 otherwise. So it
-   *                   works for both +1/-1 and 1/0 cases. If true, the double value parsed directly
-   *                   from the label string will be used as the label value.
-   * @param numFeatures number of features, which will be determined from the input data if a
-   *                    nonpositive value is given. This is useful when the dataset is already split
-   *                    into multiple files and you want to load them separately, because some
-   *                    features may not present in certain files, which leads to inconsistent
-   *                    feature dimensions.
-   * @param minPartitions min number of partitions
-   * @return labeled data stored as an RDD[LabeledPoint]
-   */
-   def loadLibSVMFile(
+  @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
+  def loadLibSVMFile(
       sc: SparkContext,
       path: String,
       multiclass: Boolean,
       numFeatures: Int,
       minPartitions: Int): RDD[LabeledPoint] =
-    loadLibSVMFile(sc, path, LabelParser.getInstance(multiclass), numFeatures, minPartitions)
+    loadLibSVMFile(sc, path, numFeatures, minPartitions)
 
   /**
    * Loads labeled data in the LIBSVM format into an RDD[LabeledPoint], with the default number of
    * partitions.
    */
+  def loadLibSVMFile(
+      sc: SparkContext,
+      path: String,
+      numFeatures: Int): RDD[LabeledPoint] =
+    loadLibSVMFile(sc, path, numFeatures, sc.defaultMinPartitions)
+
+  @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
   def loadLibSVMFile(
       sc: SparkContext,
       path: String,
       multiclass: Boolean,
       numFeatures: Int): RDD[LabeledPoint] =
-    loadLibSVMFile(sc, path, multiclass, numFeatures, sc.defaultMinPartitions)
+    loadLibSVMFile(sc, path, numFeatures)
 
-  /**
-   * Loads labeled data in the LIBSVM format into an RDD[LabeledPoint], with the number of features
-   * determined automatically and the default number of partitions.
-   */
+  @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
   def loadLibSVMFile(
       sc: SparkContext,
       path: String,
       multiclass: Boolean): RDD[LabeledPoint] =
-    loadLibSVMFile(sc, path, multiclass, -1, sc.defaultMinPartitions)
+    loadLibSVMFile(sc, path)
 
   /**
    * Loads binary labeled data in the LIBSVM format into an RDD[LabeledPoint], with number of
    * features determined automatically and the default number of partitions.
    */
   def loadLibSVMFile(sc: SparkContext, path: String): RDD[LabeledPoint] =
-    loadLibSVMFile(sc, path, multiclass = false, -1, sc.defaultMinPartitions)
+    loadLibSVMFile(sc, path, -1)
 
   /**
    * Save labeled data in LIBSVM format.
@@ -203,7 +185,7 @@ object MLUtils {
    * @return labeled points stored as an RDD[LabeledPoint]
    */
   def loadLabeledPoints(sc: SparkContext, path: String, minPartitions: Int): RDD[LabeledPoint] =
-    sc.textFile(path, minPartitions).map(LabeledPointParser.parse)
+    sc.textFile(path, minPartitions).map(LabeledPoint.parse)
 
   /**
    * Loads labeled points saved using `RDD[LabeledPoint].saveAsTextFile` with the default number of
@@ -214,8 +196,8 @@ object MLUtils {
 
   /**
    * Load labeled data from a file. The data format used here is
-   * <L>, <f1> <f2> ...
-   * where <f1>, <f2> are feature values in Double and <L> is the corresponding label as Double.
+   * L, f1 f2 ...
+   * where f1, f2 are feature values in Double and L is the corresponding label as Double.
    *
    * @param sc SparkContext
    * @param dir Directory to the input data files.
@@ -237,8 +219,8 @@ object MLUtils {
 
   /**
    * Save labeled data to a file. The data format used here is
-   * <L>, <f1> <f2> ...
-   * where <f1>, <f2> are feature values in Double and <L> is the corresponding label as Double.
+   * L, f1 f2 ...
+   * where f1, f2 are feature values in Double and L is the corresponding label as Double.
    *
    * @param data An RDD of LabeledPoints containing data to be saved.
    * @param dir Directory to save the data.
@@ -262,10 +244,10 @@ object MLUtils {
   def kFold[T: ClassTag](rdd: RDD[T], numFolds: Int, seed: Int): Array[(RDD[T], RDD[T])] = {
     val numFoldsF = numFolds.toFloat
     (1 to numFolds).map { fold =>
-      val sampler = new BernoulliSampler[T]((fold - 1) / numFoldsF, fold / numFoldsF,
+      val sampler = new BernoulliCellSampler[T]((fold - 1) / numFoldsF, fold / numFoldsF,
         complement = false)
-      val validation = new PartitionwiseSampledRDD(rdd, sampler, seed)
-      val training = new PartitionwiseSampledRDD(rdd, sampler.cloneComplement(), seed)
+      val validation = new PartitionwiseSampledRDD(rdd, sampler, true, seed)
+      val training = new PartitionwiseSampledRDD(rdd, sampler.cloneComplement(), true, seed)
       (training, validation)
     }.toArray
   }
diff --git a/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java
new file mode 100644
index 0000000000000..42846677ed285
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.feature.StandardScaler;
+import org.apache.spark.sql.api.java.JavaSQLContext;
+import org.apache.spark.sql.api.java.JavaSchemaRDD;
+import static org.apache.spark.mllib.classification.LogisticRegressionSuite
+  .generateLogisticInputAsList;
+
+/**
+ * Test Pipeline construction and fitting in Java.
+ */
+public class JavaPipelineSuite {
+
+  private transient JavaSparkContext jsc;
+  private transient JavaSQLContext jsql;
+  private transient JavaSchemaRDD dataset;
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaPipelineSuite");
+    jsql = new JavaSQLContext(jsc);
+    JavaRDD<LabeledPoint> points =
+      jsc.parallelize(generateLogisticInputAsList(1.0, 1.0, 100, 42), 2);
+    dataset = jsql.applySchema(points, LabeledPoint.class);
+  }
+
+  @After
+  public void tearDown() {
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void pipeline() {
+    StandardScaler scaler = new StandardScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures");
+    LogisticRegression lr = new LogisticRegression()
+      .setFeaturesCol("scaledFeatures");
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[] {scaler, lr});
+    PipelineModel model = pipeline.fit(dataset);
+    model.transform(dataset).registerTempTable("prediction");
+    JavaSchemaRDD predictions = jsql.sql("SELECT label, score, prediction FROM prediction");
+    predictions.collect();
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
new file mode 100644
index 0000000000000..76eb7f00329f2
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.sql.api.java.JavaSQLContext;
+import org.apache.spark.sql.api.java.JavaSchemaRDD;
+import static org.apache.spark.mllib.classification.LogisticRegressionSuite
+  .generateLogisticInputAsList;
+
+public class JavaLogisticRegressionSuite implements Serializable {
+
+  private transient JavaSparkContext jsc;
+  private transient JavaSQLContext jsql;
+  private transient JavaSchemaRDD dataset;
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaLogisticRegressionSuite");
+    jsql = new JavaSQLContext(jsc);
+    List<LabeledPoint> points = generateLogisticInputAsList(1.0, 1.0, 100, 42);
+    dataset = jsql.applySchema(jsc.parallelize(points, 2), LabeledPoint.class);
+  }
+
+  @After
+  public void tearDown() {
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void logisticRegression() {
+    LogisticRegression lr = new LogisticRegression();
+    LogisticRegressionModel model = lr.fit(dataset);
+    model.transform(dataset).registerTempTable("prediction");
+    JavaSchemaRDD predictions = jsql.sql("SELECT label, score, prediction FROM prediction");
+    predictions.collect();
+  }
+
+  @Test
+  public void logisticRegressionWithSetters() {
+    LogisticRegression lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(1.0);
+    LogisticRegressionModel model = lr.fit(dataset);
+    model.transform(dataset, model.threshold().w(0.8)) // overwrite threshold
+      .registerTempTable("prediction");
+    JavaSchemaRDD predictions = jsql.sql("SELECT label, score, prediction FROM prediction");
+    predictions.collect();
+  }
+
+  @Test
+  public void logisticRegressionFitWithVarargs() {
+    LogisticRegression lr = new LogisticRegression();
+    lr.fit(dataset, lr.maxIter().w(10), lr.regParam().w(1.0));
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java
new file mode 100644
index 0000000000000..a266ebd2071a1
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.tuning;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator;
+import org.apache.spark.ml.param.ParamMap;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.sql.api.java.JavaSQLContext;
+import org.apache.spark.sql.api.java.JavaSchemaRDD;
+import static org.apache.spark.mllib.classification.LogisticRegressionSuite
+  .generateLogisticInputAsList;
+
+public class JavaCrossValidatorSuite implements Serializable {
+
+  private transient JavaSparkContext jsc;
+  private transient JavaSQLContext jsql;
+  private transient JavaSchemaRDD dataset;
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaCrossValidatorSuite");
+    jsql = new JavaSQLContext(jsc);
+    List<LabeledPoint> points = generateLogisticInputAsList(1.0, 1.0, 100, 42);
+    dataset = jsql.applySchema(jsc.parallelize(points, 2), LabeledPoint.class);
+  }
+
+  @After
+  public void tearDown() {
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void crossValidationWithLogisticRegression() {
+    LogisticRegression lr = new LogisticRegression();
+    ParamMap[] lrParamMaps = new ParamGridBuilder()
+      .addGrid(lr.regParam(), new double[] {0.001, 1000.0})
+      .addGrid(lr.maxIter(), new int[] {0, 10})
+      .build();
+    BinaryClassificationEvaluator eval = new BinaryClassificationEvaluator();
+    CrossValidator cv = new CrossValidator()
+      .setEstimator(lr)
+      .setEstimatorParamMaps(lrParamMaps)
+      .setEvaluator(eval)
+      .setNumFolds(3);
+    CrossValidatorModel cvModel = cv.fit(dataset);
+    ParamMap bestParamMap = cvModel.bestModel().fittingParamMap();
+    Assert.assertEquals(0.001, bestParamMap.apply(lr.regParam()));
+    Assert.assertEquals(10, bestParamMap.apply(lr.maxIter()));
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
index faa675b59cd50..862221d48798a 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
@@ -92,8 +92,6 @@ public void runLRUsingStaticMethods() {
         testRDD.rdd(), 100, 1.0, 1.0);
 
     int numAccurate = validatePrediction(validationData, model);
-      System.out.println(numAccurate);
     Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
   }
-
 }
diff --git a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java
new file mode 100644
index 0000000000000..064263e02cd11
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
+
+public class JavaTfIdfSuite implements Serializable {
+  private transient JavaSparkContext sc;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaTfIdfSuite");
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  public void tfIdf() {
+    // The tests are to check Java compatibility.
+    HashingTF tf = new HashingTF();
+    JavaRDD<ArrayList<String>> documents = sc.parallelize(Lists.newArrayList(
+      Lists.newArrayList("this is a sentence".split(" ")),
+      Lists.newArrayList("this is another sentence".split(" ")),
+      Lists.newArrayList("this is still a sentence".split(" "))), 2);
+    JavaRDD<Vector> termFreqs = tf.transform(documents);
+    termFreqs.collect();
+    IDF idf = new IDF();
+    JavaRDD<Vector> tfIdfs = idf.fit(termFreqs).transform(termFreqs);
+    List<Vector> localTfIdfs = tfIdfs.collect();
+    int indexOfThis = tf.indexOf("this");
+    for (Vector v: localTfIdfs) {
+      Assert.assertEquals(0.0, v.apply(indexOfThis), 1e-15);
+    }
+  }
+
+  @Test
+  public void tfIdfMinimumDocumentFrequency() {
+    // The tests are to check Java compatibility.
+    HashingTF tf = new HashingTF();
+    JavaRDD<ArrayList<String>> documents = sc.parallelize(Lists.newArrayList(
+      Lists.newArrayList("this is a sentence".split(" ")),
+      Lists.newArrayList("this is another sentence".split(" ")),
+      Lists.newArrayList("this is still a sentence".split(" "))), 2);
+    JavaRDD<Vector> termFreqs = tf.transform(documents);
+    termFreqs.collect();
+    IDF idf = new IDF(2);
+    JavaRDD<Vector> tfIdfs = idf.fit(termFreqs).transform(termFreqs);
+    List<Vector> localTfIdfs = tfIdfs.collect();
+    int indexOfThis = tf.indexOf("this");
+    for (Vector v: localTfIdfs) {
+      Assert.assertEquals(0.0, v.apply(indexOfThis), 1e-15);
+    }
+  }
+
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java
new file mode 100644
index 0000000000000..fb7afe8c6434b
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature;
+
+import java.io.Serializable;
+import java.util.List;
+
+import scala.Tuple2;
+
+import com.google.common.collect.Lists;
+import com.google.common.base.Strings;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+public class JavaWord2VecSuite implements Serializable {
+  private transient JavaSparkContext sc;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaWord2VecSuite");
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void word2Vec() {
+    // The tests are to check Java compatibility.
+    String sentence = Strings.repeat("a b ", 100) + Strings.repeat("a c ", 10);
+    List<String> words = Lists.newArrayList(sentence.split(" "));
+    List<List<String>> localDoc = Lists.newArrayList(words, words);
+    JavaRDD<List<String>> doc = sc.parallelize(localDoc);
+    Word2Vec word2vec = new Word2Vec()
+      .setVectorSize(10)
+      .setSeed(42L);
+    Word2VecModel model = word2vec.fit(doc);
+    Tuple2<String, Object>[] syms = model.findSynonyms("a", 2);
+    Assert.assertEquals(2, syms.length);
+    Assert.assertEquals("b", syms[0]._1());
+    Assert.assertEquals("c", syms[1]._1());
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
new file mode 100644
index 0000000000000..a725736ca1a58
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.random;
+
+import com.google.common.collect.Lists;
+import org.apache.spark.api.java.JavaRDD;
+import org.junit.Assert;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaDoubleRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
+import static org.apache.spark.mllib.random.RandomRDDs.*;
+
+public class JavaRandomRDDsSuite {
+  private transient JavaSparkContext sc;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaRandomRDDsSuite");
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  public void testUniformRDD() {
+    long m = 1000L;
+    int p = 2;
+    long seed = 1L;
+    JavaDoubleRDD rdd1 = uniformJavaRDD(sc, m);
+    JavaDoubleRDD rdd2 = uniformJavaRDD(sc, m, p);
+    JavaDoubleRDD rdd3 = uniformJavaRDD(sc, m, p, seed);
+    for (JavaDoubleRDD rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+    }
+  }
+
+  @Test
+  public void testNormalRDD() {
+    long m = 1000L;
+    int p = 2;
+    long seed = 1L;
+    JavaDoubleRDD rdd1 = normalJavaRDD(sc, m);
+    JavaDoubleRDD rdd2 = normalJavaRDD(sc, m, p);
+    JavaDoubleRDD rdd3 = normalJavaRDD(sc, m, p, seed);
+    for (JavaDoubleRDD rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+    }
+  }
+
+  @Test
+  public void testPoissonRDD() {
+    double mean = 2.0;
+    long m = 1000L;
+    int p = 2;
+    long seed = 1L;
+    JavaDoubleRDD rdd1 = poissonJavaRDD(sc, mean, m);
+    JavaDoubleRDD rdd2 = poissonJavaRDD(sc, mean, m, p);
+    JavaDoubleRDD rdd3 = poissonJavaRDD(sc, mean, m, p, seed);
+    for (JavaDoubleRDD rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testUniformVectorRDD() {
+    long m = 100L;
+    int n = 10;
+    int p = 2;
+    long seed = 1L;
+    JavaRDD<Vector> rdd1 = uniformJavaVectorRDD(sc, m, n);
+    JavaRDD<Vector> rdd2 = uniformJavaVectorRDD(sc, m, n, p);
+    JavaRDD<Vector> rdd3 = uniformJavaVectorRDD(sc, m, n, p, seed);
+    for (JavaRDD<Vector> rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+      Assert.assertEquals(n, rdd.first().size());
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testNormalVectorRDD() {
+    long m = 100L;
+    int n = 10;
+    int p = 2;
+    long seed = 1L;
+    JavaRDD<Vector> rdd1 = normalJavaVectorRDD(sc, m, n);
+    JavaRDD<Vector> rdd2 = normalJavaVectorRDD(sc, m, n, p);
+    JavaRDD<Vector> rdd3 = normalJavaVectorRDD(sc, m, n, p, seed);
+    for (JavaRDD<Vector> rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+      Assert.assertEquals(n, rdd.first().size());
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testPoissonVectorRDD() {
+    double mean = 2.0;
+    long m = 100L;
+    int n = 10;
+    int p = 2;
+    long seed = 1L;
+    JavaRDD<Vector> rdd1 = poissonJavaVectorRDD(sc, mean, m, n);
+    JavaRDD<Vector> rdd2 = poissonJavaVectorRDD(sc, mean, m, n, p);
+    JavaRDD<Vector> rdd3 = poissonJavaVectorRDD(sc, mean, m, n, p, seed);
+    for (JavaRDD<Vector> rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+      Assert.assertEquals(n, rdd.first().size());
+    }
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
index bf2365f82044c..af688c504cf1e 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
@@ -20,16 +20,20 @@
 import java.io.Serializable;
 import java.util.List;
 
+import scala.Tuple2;
+import scala.Tuple3;
+
+import com.google.common.collect.Lists;
+import org.jblas.DoubleMatrix;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 
-import org.jblas.DoubleMatrix;
-
 public class JavaALSSuite implements Serializable {
   private transient JavaSparkContext sc;
 
@@ -44,53 +48,48 @@ public void tearDown() {
     sc = null;
   }
 
-  static void validatePrediction(MatrixFactorizationModel model, int users, int products, int features,
-      DoubleMatrix trueRatings, double matchThreshold, boolean implicitPrefs, DoubleMatrix truePrefs) {
-    DoubleMatrix predictedU = new DoubleMatrix(users, features);
-    List<scala.Tuple2<Object, double[]>> userFeatures = model.userFeatures().toJavaRDD().collect();
-    for (int i = 0; i < features; ++i) {
-      for (scala.Tuple2<Object, double[]> userFeature : userFeatures) {
-        predictedU.put((Integer)userFeature._1(), i, userFeature._2()[i]);
-      }
-    }
-    DoubleMatrix predictedP = new DoubleMatrix(products, features);
-
-    List<scala.Tuple2<Object, double[]>> productFeatures =
-      model.productFeatures().toJavaRDD().collect();
-    for (int i = 0; i < features; ++i) {
-      for (scala.Tuple2<Object, double[]> productFeature : productFeatures) {
-        predictedP.put((Integer)productFeature._1(), i, productFeature._2()[i]);
+  void validatePrediction(
+      MatrixFactorizationModel model,
+      int users,
+      int products,
+      DoubleMatrix trueRatings,
+      double matchThreshold,
+      boolean implicitPrefs,
+      DoubleMatrix truePrefs) {
+    List<Tuple2<Integer, Integer>> localUsersProducts =
+      Lists.newArrayListWithCapacity(users * products);
+    for (int u=0; u < users; ++u) {
+      for (int p=0; p < products; ++p) {
+        localUsersProducts.add(new Tuple2<Integer, Integer>(u, p));
       }
     }
-
-    DoubleMatrix predictedRatings = predictedU.mmul(predictedP.transpose());
-
+    JavaPairRDD<Integer, Integer> usersProducts = sc.parallelizePairs(localUsersProducts);
+    List<Rating> predictedRatings = model.predict(usersProducts).collect();
+    Assert.assertEquals(users * products, predictedRatings.size());
     if (!implicitPrefs) {
-      for (int u = 0; u < users; ++u) {
-        for (int p = 0; p < products; ++p) {
-          double prediction = predictedRatings.get(u, p);
-          double correct = trueRatings.get(u, p);
-          Assert.assertTrue(String.format("Prediction=%2.4f not below match threshold of %2.2f",
-                  prediction, matchThreshold), Math.abs(prediction - correct) < matchThreshold);
-        }
+      for (Rating r: predictedRatings) {
+        double prediction = r.rating();
+        double correct = trueRatings.get(r.user(), r.product());
+        Assert.assertTrue(String.format("Prediction=%2.4f not below match threshold of %2.2f",
+          prediction, matchThreshold), Math.abs(prediction - correct) < matchThreshold);
       }
     } else {
-      // For implicit prefs we use the confidence-weighted RMSE to test (ref Mahout's implicit ALS tests)
+      // For implicit prefs we use the confidence-weighted RMSE to test
+      // (ref Mahout's implicit ALS tests)
       double sqErr = 0.0;
       double denom = 0.0;
-      for (int u = 0; u < users; ++u) {
-        for (int p = 0; p < products; ++p) {
-          double prediction = predictedRatings.get(u, p);
-          double truePref = truePrefs.get(u, p);
-          double confidence = 1.0 + /* alpha = */ 1.0 * Math.abs(trueRatings.get(u, p));
-          double err = confidence * (truePref - prediction) * (truePref - prediction);
-          sqErr += err;
-          denom += confidence;
-        }
+      for (Rating r: predictedRatings) {
+        double prediction = r.rating();
+        double truePref = truePrefs.get(r.user(), r.product());
+        double confidence = 1.0 +
+          /* alpha = */ 1.0 * Math.abs(trueRatings.get(r.user(), r.product()));
+        double err = confidence * (truePref - prediction) * (truePref - prediction);
+        sqErr += err;
+        denom += confidence;
       }
       double rmse = Math.sqrt(sqErr / denom);
       Assert.assertTrue(String.format("Confidence-weighted RMSE=%2.4f above threshold of %2.2f",
-              rmse, matchThreshold), rmse < matchThreshold);
+        rmse, matchThreshold), rmse < matchThreshold);
     }
   }
 
@@ -100,12 +99,12 @@ public void runALSUsingStaticMethods() {
     int iterations = 15;
     int users = 50;
     int products = 100;
-    scala.Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
+    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
         users, products, features, 0.7, false, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = ALS.train(data.rdd(), features, iterations);
-    validatePrediction(model, users, products, features, testData._2(), 0.3, false, testData._3());
+    validatePrediction(model, users, products, testData._2(), 0.3, false, testData._3());
   }
 
   @Test
@@ -114,15 +113,15 @@ public void runALSUsingConstructor() {
     int iterations = 15;
     int users = 100;
     int products = 200;
-    scala.Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
+    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
         users, products, features, 0.7, false, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
 
     MatrixFactorizationModel model = new ALS().setRank(features)
-                                              .setIterations(iterations)
-                                              .run(data.rdd());
-    validatePrediction(model, users, products, features, testData._2(), 0.3, false, testData._3());
+      .setIterations(iterations)
+      .run(data);
+    validatePrediction(model, users, products, testData._2(), 0.3, false, testData._3());
   }
 
   @Test
@@ -131,12 +130,12 @@ public void runImplicitALSUsingStaticMethods() {
     int iterations = 15;
     int users = 80;
     int products = 160;
-    scala.Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
+    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
         users, products, features, 0.7, true, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, iterations);
-    validatePrediction(model, users, products, features, testData._2(), 0.4, true, testData._3());
+    validatePrediction(model, users, products, testData._2(), 0.4, true, testData._3());
   }
 
   @Test
@@ -145,7 +144,7 @@ public void runImplicitALSUsingConstructor() {
     int iterations = 15;
     int users = 100;
     int products = 200;
-    scala.Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
+    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
         users, products, features, 0.7, true, false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
@@ -154,7 +153,7 @@ public void runImplicitALSUsingConstructor() {
       .setIterations(iterations)
       .setImplicitPrefs(true)
       .run(data.rdd());
-    validatePrediction(model, users, products, features, testData._2(), 0.4, true, testData._3());
+    validatePrediction(model, users, products, testData._2(), 0.4, true, testData._3());
   }
 
   @Test
@@ -163,12 +162,42 @@ public void runImplicitALSWithNegativeWeight() {
     int iterations = 15;
     int users = 80;
     int products = 160;
-    scala.Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
+    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
         users, products, features, 0.7, true, true);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
-    MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, iterations);
-    validatePrediction(model, users, products, features, testData._2(), 0.4, true, testData._3());
+    MatrixFactorizationModel model = new ALS().setRank(features)
+      .setIterations(iterations)
+      .setImplicitPrefs(true)
+      .setSeed(8675309L)
+      .run(data.rdd());
+    validatePrediction(model, users, products, testData._2(), 0.4, true, testData._3());
+  }
+
+  @Test
+  public void runRecommend() {
+    int features = 5;
+    int iterations = 10;
+    int users = 200;
+    int products = 50;
+    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
+        users, products, features, 0.7, true, false);
+    JavaRDD<Rating> data = sc.parallelize(testData._1());
+    MatrixFactorizationModel model = new ALS().setRank(features)
+      .setIterations(iterations)
+      .setImplicitPrefs(true)
+      .setSeed(8675309L)
+      .run(data.rdd());
+    validateRecommendations(model.recommendProducts(1, 10), 10);
+    validateRecommendations(model.recommendUsers(1, 20), 20);
+  }
+
+  private static void validateRecommendations(Rating[] recommendations, int howMany) {
+    Assert.assertEquals(howMany, recommendations.length);
+    for (int i = 1; i < recommendations.length; i++) {
+      Assert.assertTrue(recommendations[i-1].rating() >= recommendations[i].rating());
+    }
+    Assert.assertTrue(recommendations[0].rating() > 0.7);
   }
 
 }
diff --git a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java
new file mode 100644
index 0000000000000..2c281a1ee7157
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.configuration.Algo;
+import org.apache.spark.mllib.tree.configuration.Strategy;
+import org.apache.spark.mllib.tree.impurity.Gini;
+import org.apache.spark.mllib.tree.model.DecisionTreeModel;
+
+
+public class JavaDecisionTreeSuite implements Serializable {
+  private transient JavaSparkContext sc;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaDecisionTreeSuite");
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  int validatePrediction(List<LabeledPoint> validationData, DecisionTreeModel model) {
+    int numCorrect = 0;
+    for (LabeledPoint point: validationData) {
+      Double prediction = model.predict(point.features());
+      if (prediction == point.label()) {
+        numCorrect++;
+      }
+    }
+    return numCorrect;
+  }
+
+  @Test
+  public void runDTUsingConstructor() {
+    List<LabeledPoint> arr = DecisionTreeSuite.generateCategoricalDataPointsAsJavaList();
+    JavaRDD<LabeledPoint> rdd = sc.parallelize(arr);
+    HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+    categoricalFeaturesInfo.put(1, 2); // feature 1 has 2 categories
+
+    int maxDepth = 4;
+    int numClasses = 2;
+    int maxBins = 100;
+    Strategy strategy = new Strategy(Algo.Classification(), Gini.instance(), maxDepth, numClasses,
+        maxBins, categoricalFeaturesInfo);
+
+    DecisionTree learner = new DecisionTree(strategy);
+    DecisionTreeModel model = learner.train(rdd.rdd());
+
+    int numCorrect = validatePrediction(arr, model);
+    Assert.assertTrue(numCorrect == rdd.count());
+  }
+
+  @Test
+  public void runDTUsingStaticMethods() {
+    List<LabeledPoint> arr = DecisionTreeSuite.generateCategoricalDataPointsAsJavaList();
+    JavaRDD<LabeledPoint> rdd = sc.parallelize(arr);
+    HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+    categoricalFeaturesInfo.put(1, 2); // feature 1 has 2 categories
+
+    int maxDepth = 4;
+    int numClasses = 2;
+    int maxBins = 100;
+    Strategy strategy = new Strategy(Algo.Classification(), Gini.instance(), maxDepth, numClasses,
+        maxBins, categoricalFeaturesInfo);
+
+    DecisionTreeModel model = DecisionTree$.MODULE$.train(rdd.rdd(), strategy);
+
+    int numCorrect = validatePrediction(arr, model);
+    Assert.assertTrue(numCorrect == rdd.count());
+  }
+
+}
diff --git a/mllib/src/test/resources/log4j.properties b/mllib/src/test/resources/log4j.properties
index ddfc4ac6b23ed..a469badf603c6 100644
--- a/mllib/src/test/resources/log4j.properties
+++ b/mllib/src/test/resources/log4j.properties
@@ -21,7 +21,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
new file mode 100644
index 0000000000000..4515084bc7ae9
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.mockito.Matchers.{any, eq => meq}
+import org.mockito.Mockito.when
+import org.scalatest.FunSuite
+import org.scalatest.mock.MockitoSugar.mock
+
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.sql.SchemaRDD
+
+class PipelineSuite extends FunSuite {
+
+  abstract class MyModel extends Model[MyModel]
+
+  test("pipeline") {
+    val estimator0 = mock[Estimator[MyModel]]
+    val model0 = mock[MyModel]
+    val transformer1 = mock[Transformer]
+    val estimator2 = mock[Estimator[MyModel]]
+    val model2 = mock[MyModel]
+    val transformer3 = mock[Transformer]
+    val dataset0 = mock[SchemaRDD]
+    val dataset1 = mock[SchemaRDD]
+    val dataset2 = mock[SchemaRDD]
+    val dataset3 = mock[SchemaRDD]
+    val dataset4 = mock[SchemaRDD]
+
+    when(estimator0.fit(meq(dataset0), any[ParamMap]())).thenReturn(model0)
+    when(model0.transform(meq(dataset0), any[ParamMap]())).thenReturn(dataset1)
+    when(model0.parent).thenReturn(estimator0)
+    when(transformer1.transform(meq(dataset1), any[ParamMap])).thenReturn(dataset2)
+    when(estimator2.fit(meq(dataset2), any[ParamMap]())).thenReturn(model2)
+    when(model2.transform(meq(dataset2), any[ParamMap]())).thenReturn(dataset3)
+    when(model2.parent).thenReturn(estimator2)
+    when(transformer3.transform(meq(dataset3), any[ParamMap]())).thenReturn(dataset4)
+
+    val pipeline = new Pipeline()
+      .setStages(Array(estimator0, transformer1, estimator2, transformer3))
+    val pipelineModel = pipeline.fit(dataset0)
+
+    assert(pipelineModel.stages.size === 4)
+    assert(pipelineModel.stages(0).eq(model0))
+    assert(pipelineModel.stages(1).eq(transformer1))
+    assert(pipelineModel.stages(2).eq(model2))
+    assert(pipelineModel.stages(3).eq(transformer3))
+
+    assert(pipelineModel.getModel(estimator0).eq(model0))
+    assert(pipelineModel.getModel(estimator2).eq(model2))
+    intercept[NoSuchElementException] {
+      pipelineModel.getModel(mock[Estimator[MyModel]])
+    }
+    val output = pipelineModel.transform(dataset0)
+    assert(output.eq(dataset4))
+  }
+
+  test("pipeline with duplicate stages") {
+    val estimator = mock[Estimator[MyModel]]
+    val pipeline = new Pipeline()
+      .setStages(Array(estimator, estimator))
+    val dataset = mock[SchemaRDD]
+    intercept[IllegalArgumentException] {
+      pipeline.fit(dataset)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
new file mode 100644
index 0000000000000..e8030fef55b1d
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInput
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.sql.{SQLContext, SchemaRDD}
+
+class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
+
+  @transient var sqlContext: SQLContext = _
+  @transient var dataset: SchemaRDD = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    sqlContext = new SQLContext(sc)
+    dataset = sqlContext.createSchemaRDD(
+      sc.parallelize(generateLogisticInput(1.0, 1.0, 100, 42), 2))
+  }
+
+  test("logistic regression") {
+    val sqlContext = this.sqlContext
+    import sqlContext._
+    val lr = new LogisticRegression
+    val model = lr.fit(dataset)
+    model.transform(dataset)
+      .select('label, 'prediction)
+      .collect()
+  }
+
+  test("logistic regression with setters") {
+    val sqlContext = this.sqlContext
+    import sqlContext._
+    val lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(1.0)
+    val model = lr.fit(dataset)
+    model.transform(dataset, model.threshold -> 0.8) // overwrite threshold
+      .select('label, 'score, 'prediction)
+      .collect()
+  }
+
+  test("logistic regression fit and transform with varargs") {
+    val sqlContext = this.sqlContext
+    import sqlContext._
+    val lr = new LogisticRegression
+    val model = lr.fit(dataset, lr.maxIter -> 10, lr.regParam -> 1.0)
+    model.transform(dataset, model.threshold -> 0.8, model.scoreCol -> "probability")
+      .select('label, 'probability, 'prediction)
+      .collect()
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
new file mode 100644
index 0000000000000..1ce2987612378
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.param
+
+import org.scalatest.FunSuite
+
+class ParamsSuite extends FunSuite {
+
+  val solver = new TestParams()
+  import solver.{inputCol, maxIter}
+
+  test("param") {
+    assert(maxIter.name === "maxIter")
+    assert(maxIter.doc === "max number of iterations")
+    assert(maxIter.defaultValue.get === 100)
+    assert(maxIter.parent.eq(solver))
+    assert(maxIter.toString === "maxIter: max number of iterations (default: 100)")
+    assert(inputCol.defaultValue === None)
+  }
+
+  test("param pair") {
+    val pair0 = maxIter -> 5
+    val pair1 = maxIter.w(5)
+    val pair2 = ParamPair(maxIter, 5)
+    for (pair <- Seq(pair0, pair1, pair2)) {
+      assert(pair.param.eq(maxIter))
+      assert(pair.value === 5)
+    }
+  }
+
+  test("param map") {
+    val map0 = ParamMap.empty
+
+    assert(!map0.contains(maxIter))
+    assert(map0(maxIter) === maxIter.defaultValue.get)
+    map0.put(maxIter, 10)
+    assert(map0.contains(maxIter))
+    assert(map0(maxIter) === 10)
+
+    assert(!map0.contains(inputCol))
+    intercept[NoSuchElementException] {
+      map0(inputCol)
+    }
+    map0.put(inputCol -> "input")
+    assert(map0.contains(inputCol))
+    assert(map0(inputCol) === "input")
+
+    val map1 = map0.copy
+    val map2 = ParamMap(maxIter -> 10, inputCol -> "input")
+    val map3 = new ParamMap()
+      .put(maxIter, 10)
+      .put(inputCol, "input")
+    val map4 = ParamMap.empty ++ map0
+    val map5 = ParamMap.empty
+    map5 ++= map0
+
+    for (m <- Seq(map1, map2, map3, map4, map5)) {
+      assert(m.contains(maxIter))
+      assert(m(maxIter) === 10)
+      assert(m.contains(inputCol))
+      assert(m(inputCol) === "input")
+    }
+  }
+
+  test("params") {
+    val params = solver.params
+    assert(params.size === 2)
+    assert(params(0).eq(inputCol), "params must be ordered by name")
+    assert(params(1).eq(maxIter))
+    assert(solver.explainParams() === Seq(inputCol, maxIter).mkString("\n"))
+    assert(solver.getParam("inputCol").eq(inputCol))
+    assert(solver.getParam("maxIter").eq(maxIter))
+    intercept[NoSuchMethodException] {
+      solver.getParam("abc")
+    }
+    assert(!solver.isSet(inputCol))
+    intercept[IllegalArgumentException] {
+      solver.validate()
+    }
+    solver.validate(ParamMap(inputCol -> "input"))
+    solver.setInputCol("input")
+    assert(solver.isSet(inputCol))
+    assert(solver.getInputCol === "input")
+    solver.validate()
+    intercept[IllegalArgumentException] {
+      solver.validate(ParamMap(maxIter -> -10))
+    }
+    solver.setMaxIter(-10)
+    intercept[IllegalArgumentException] {
+      solver.validate()
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
new file mode 100644
index 0000000000000..1a65883d78a71
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.param
+
+/** A subclass of Params for testing. */
+class TestParams extends Params {
+
+  val maxIter = new IntParam(this, "maxIter", "max number of iterations", Some(100))
+  def setMaxIter(value: Int): this.type = { set(maxIter, value); this }
+  def getMaxIter: Int = get(maxIter)
+
+  val inputCol = new Param[String](this, "inputCol", "input column name")
+  def setInputCol(value: String): this.type = { set(inputCol, value); this }
+  def getInputCol: String = get(inputCol)
+
+  override def validate(paramMap: ParamMap) = {
+    val m = this.paramMap ++ paramMap
+    require(m(maxIter) >= 0)
+    require(m.contains(inputCol))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
new file mode 100644
index 0000000000000..41cc13da4d5b1
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.tuning
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
+import org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInput
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.sql.{SQLContext, SchemaRDD}
+
+class CrossValidatorSuite extends FunSuite with MLlibTestSparkContext {
+
+  @transient var dataset: SchemaRDD = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    val sqlContext = new SQLContext(sc)
+    dataset = sqlContext.createSchemaRDD(
+      sc.parallelize(generateLogisticInput(1.0, 1.0, 100, 42), 2))
+  }
+
+  test("cross validation with logistic regression") {
+    val lr = new LogisticRegression
+    val lrParamMaps = new ParamGridBuilder()
+      .addGrid(lr.regParam, Array(0.001, 1000.0))
+      .addGrid(lr.maxIter, Array(0, 10))
+      .build()
+    val eval = new BinaryClassificationEvaluator
+    val cv = new CrossValidator()
+      .setEstimator(lr)
+      .setEstimatorParamMaps(lrParamMaps)
+      .setEvaluator(eval)
+      .setNumFolds(3)
+    val cvModel = cv.fit(dataset)
+    val bestParamMap = cvModel.bestModel.fittingParamMap
+    assert(bestParamMap(lr.regParam) === 0.001)
+    assert(bestParamMap(lr.maxIter) === 10)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/ParamGridBuilderSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/ParamGridBuilderSuite.scala
new file mode 100644
index 0000000000000..20aa100112bfe
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/ParamGridBuilderSuite.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.tuning
+
+import scala.collection.mutable
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.ml.param.{ParamMap, TestParams}
+
+class ParamGridBuilderSuite extends FunSuite {
+
+  val solver = new TestParams()
+  import solver.{inputCol, maxIter}
+
+  test("param grid builder") {
+    def validateGrid(maps: Array[ParamMap], expected: mutable.Set[(Int, String)]): Unit = {
+      assert(maps.size === expected.size)
+      maps.foreach { m =>
+        val tuple = (m(maxIter), m(inputCol))
+        assert(expected.contains(tuple))
+        expected.remove(tuple)
+      }
+      assert(expected.isEmpty)
+    }
+
+    val maps0 = new ParamGridBuilder()
+      .baseOn(maxIter -> 10)
+      .addGrid(inputCol, Array("input0", "input1"))
+      .build()
+    val expected0 = mutable.Set(
+      (10, "input0"),
+      (10, "input1"))
+    validateGrid(maps0, expected0)
+
+    val maps1 = new ParamGridBuilder()
+      .baseOn(ParamMap(maxIter -> 5, inputCol -> "input")) // will be overwritten
+      .addGrid(maxIter, Array(10, 20))
+      .addGrid(inputCol, Array("input0", "input1"))
+      .build()
+    val expected1 = mutable.Set(
+      (10, "input0"),
+      (20, "input0"),
+      (10, "input1"),
+      (20, "input1"))
+    validateGrid(maps1, expected1)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
index 642843f90204c..db8ed62fa46ce 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
@@ -19,13 +19,15 @@ package org.apache.spark.mllib.api.python
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Vectors}
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.recommendation.Rating
 
 class PythonMLLibAPISuite extends FunSuite {
-  val py = new PythonMLLibAPI
 
-  test("vector serialization") {
+  SerDe.initialize()
+
+  test("pickle vector") {
     val vectors = Seq(
       Vectors.dense(Array.empty[Double]),
       Vectors.dense(0.0),
@@ -34,14 +36,13 @@ class PythonMLLibAPISuite extends FunSuite {
       Vectors.sparse(1, Array.empty[Int], Array.empty[Double]),
       Vectors.sparse(2, Array(1), Array(-2.0)))
     vectors.foreach { v =>
-      val bytes = py.serializeDoubleVector(v)
-      val u = py.deserializeDoubleVector(bytes)
+      val u = SerDe.loads(SerDe.dumps(v))
       assert(u.getClass === v.getClass)
       assert(u === v)
     }
   }
 
-  test("labeled point serialization") {
+  test("pickle labeled point") {
     val points = Seq(
       LabeledPoint(0.0, Vectors.dense(Array.empty[Double])),
       LabeledPoint(1.0, Vectors.dense(0.0)),
@@ -50,11 +51,44 @@ class PythonMLLibAPISuite extends FunSuite {
       LabeledPoint(1.0, Vectors.sparse(1, Array.empty[Int], Array.empty[Double])),
       LabeledPoint(-0.5, Vectors.sparse(2, Array(1), Array(-2.0))))
     points.foreach { p =>
-      val bytes = py.serializeLabeledPoint(p)
-      val q = py.deserializeLabeledPoint(bytes)
+      val q = SerDe.loads(SerDe.dumps(p)).asInstanceOf[LabeledPoint]
       assert(q.label === p.label)
       assert(q.features.getClass === p.features.getClass)
       assert(q.features === p.features)
     }
   }
+
+  test("pickle double") {
+    for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) {
+      val deser = SerDe.loads(SerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double]
+      // We use `equals` here for comparison because we cannot use `==` for NaN
+      assert(x.equals(deser))
+    }
+  }
+
+  test("pickle matrix") {
+    val values = Array[Double](0, 1.2, 3, 4.56, 7, 8)
+    val matrix = Matrices.dense(2, 3, values)
+    val nm = SerDe.loads(SerDe.dumps(matrix)).asInstanceOf[DenseMatrix]
+    assert(matrix === nm)
+
+    // Test conversion for empty matrix
+    val empty = Array[Double]()
+    val emptyMatrix = Matrices.dense(0, 0, empty)
+    val ne = SerDe.loads(SerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix]
+    assert(emptyMatrix == ne)
+  }
+
+  test("pickle rating") {
+    val rat = new Rating(1, 2, 3.0)
+    val rat2 = SerDe.loads(SerDe.dumps(rat)).asInstanceOf[Rating]
+    assert(rat == rat2)
+
+    // Test name of class only occur once
+    val rats = (1 to 10).map(x => new Rating(x, x + 1, x + 3.0)).toArray
+    val bytes = SerDe.dumps(rats)
+    assert(bytes.toString.split("Rating").length == 1)
+    assert(bytes.length / 10 < 25) //  25 bytes per rating
+
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index 44b757b6a1fb7..4e812994405b3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -25,7 +25,8 @@ import org.scalatest.Matchers
 
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression._
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
+import org.apache.spark.mllib.util.TestingUtils._
 
 object LogisticRegressionSuite {
 
@@ -42,7 +43,7 @@ object LogisticRegressionSuite {
       offset: Double,
       scale: Double,
       nPoints: Int,
-      seed: Int): Seq[LabeledPoint]  = {
+      seed: Int): Seq[LabeledPoint] = {
     val rnd = new Random(seed)
     val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian())
 
@@ -56,17 +57,20 @@ object LogisticRegressionSuite {
   }
 }
 
-class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Matchers {
-  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext with Matchers {
+  def validatePrediction(
+      predictions: Seq[Double],
+      input: Seq[LabeledPoint],
+      expectedAcc: Double = 0.83) {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       prediction != expected.label
     }
     // At least 83% of the predictions should be on.
-    ((input.length - numOffPredictions).toDouble / input.length) should be > 0.83
+    ((input.length - numOffPredictions).toDouble / input.length) should be > expectedAcc
   }
 
   // Test if we can correctly learn A, B where Y = logistic(A + B*X)
-  test("logistic regression") {
+  test("logistic regression with SGD") {
     val nPoints = 10000
     val A = 2.0
     val B = -1.5
@@ -76,14 +80,16 @@ class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Match
     val testRDD = sc.parallelize(testData, 2)
     testRDD.cache()
     val lr = new LogisticRegressionWithSGD().setIntercept(true)
-    lr.optimizer.setStepSize(10.0).setNumIterations(20)
+    lr.optimizer
+      .setStepSize(10.0)
+      .setRegParam(0.0)
+      .setNumIterations(20)
 
     val model = lr.run(testRDD)
 
     // Test the weights
-    val weight0 = model.weights(0)
-    assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]")
-    assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]")
+    assert(model.weights(0) ~== B relTol 0.02)
+    assert(model.intercept ~== A relTol 0.02)
 
     val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
     val validationRDD = sc.parallelize(validationData, 2)
@@ -94,7 +100,34 @@ class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Match
     validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
   }
 
-  test("logistic regression with initial weights") {
+  // Test if we can correctly learn A, B where Y = logistic(A + B*X)
+  test("logistic regression with LBFGS") {
+    val nPoints = 10000
+    val A = 2.0
+    val B = -1.5
+
+    val testData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 42)
+
+    val testRDD = sc.parallelize(testData, 2)
+    testRDD.cache()
+    val lr = new LogisticRegressionWithLBFGS().setIntercept(true)
+
+    val model = lr.run(testRDD)
+
+    // Test the weights
+    assert(model.weights(0) ~== B relTol 0.02)
+    assert(model.intercept ~== A relTol 0.02)
+
+    val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
+    val validationRDD = sc.parallelize(validationData, 2)
+    // Test prediction on RDD.
+    validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
+
+    // Test prediction on Array.
+    validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
+  }
+
+  test("logistic regression with initial weights with SGD") {
     val nPoints = 10000
     val A = 2.0
     val B = -1.5
@@ -109,13 +142,16 @@ class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Match
 
     // Use half as many iterations as the previous test.
     val lr = new LogisticRegressionWithSGD().setIntercept(true)
-    lr.optimizer.setStepSize(10.0).setNumIterations(10)
+    lr.optimizer
+      .setStepSize(10.0)
+      .setRegParam(0.0)
+      .setNumIterations(10)
 
     val model = lr.run(testRDD, initialWeights)
 
-    val weight0 = model.weights(0)
-    assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]")
-    assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]")
+    // Test the weights
+    assert(model.weights(0) ~== B relTol 0.02)
+    assert(model.intercept ~== A relTol 0.02)
 
     val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
     val validationRDD = sc.parallelize(validationData, 2)
@@ -125,4 +161,167 @@ class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Match
     // Test prediction on Array.
     validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
   }
+
+  test("logistic regression with initial weights and non-default regularization parameter") {
+    val nPoints = 10000
+    val A = 2.0
+    val B = -1.5
+
+    val testData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 42)
+
+    val initialB = -1.0
+    val initialWeights = Vectors.dense(initialB)
+
+    val testRDD = sc.parallelize(testData, 2)
+    testRDD.cache()
+
+    // Use half as many iterations as the previous test.
+    val lr = new LogisticRegressionWithSGD().setIntercept(true)
+    lr.optimizer.
+      setStepSize(10.0).
+      setNumIterations(10).
+      setRegParam(1.0)
+
+    val model = lr.run(testRDD, initialWeights)
+
+    // Test the weights
+    assert(model.weights(0) ~== -430000.0 relTol 20000.0)
+    assert(model.intercept ~== 370000.0 relTol 20000.0)
+
+    val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
+    val validationRDD = sc.parallelize(validationData, 2)
+    // Test prediction on RDD.
+    validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData, 0.8)
+
+    // Test prediction on Array.
+    validatePrediction(validationData.map(row => model.predict(row.features)), validationData, 0.8)
+  }
+
+  test("logistic regression with initial weights with LBFGS") {
+    val nPoints = 10000
+    val A = 2.0
+    val B = -1.5
+
+    val testData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 42)
+
+    val initialB = -1.0
+    val initialWeights = Vectors.dense(initialB)
+
+    val testRDD = sc.parallelize(testData, 2)
+    testRDD.cache()
+
+    // Use half as many iterations as the previous test.
+    val lr = new LogisticRegressionWithLBFGS().setIntercept(true)
+
+    val model = lr.run(testRDD, initialWeights)
+
+    // Test the weights
+    assert(model.weights(0) ~== B relTol 0.02)
+    assert(model.intercept ~== A relTol 0.02)
+
+    val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
+    val validationRDD = sc.parallelize(validationData, 2)
+    // Test prediction on RDD.
+    validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
+
+    // Test prediction on Array.
+    validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
+  }
+
+  test("numerical stability of scaling features using logistic regression with LBFGS") {
+    /**
+     * If we rescale the features, the condition number will be changed so the convergence rate
+     * and the solution will not equal to the original solution multiple by the scaling factor
+     * which it should be.
+     *
+     * However, since in the LogisticRegressionWithLBFGS, we standardize the training dataset first,
+     * no matter how we multiple a scaling factor into the dataset, the convergence rate should be
+     * the same, and the solution should equal to the original solution multiple by the scaling
+     * factor.
+     */
+
+    val nPoints = 10000
+    val A = 2.0
+    val B = -1.5
+
+    val testData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 42)
+
+    val initialWeights = Vectors.dense(0.0)
+
+    val testRDD1 = sc.parallelize(testData, 2)
+
+    val testRDD2 = sc.parallelize(
+      testData.map(x => LabeledPoint(x.label, Vectors.fromBreeze(x.features.toBreeze * 1.0E3))), 2)
+
+    val testRDD3 = sc.parallelize(
+      testData.map(x => LabeledPoint(x.label, Vectors.fromBreeze(x.features.toBreeze * 1.0E6))), 2)
+
+    testRDD1.cache()
+    testRDD2.cache()
+    testRDD3.cache()
+
+    val lrA = new LogisticRegressionWithLBFGS().setIntercept(true)
+    val lrB = new LogisticRegressionWithLBFGS().setIntercept(true).setFeatureScaling(false)
+
+    val modelA1 = lrA.run(testRDD1, initialWeights)
+    val modelA2 = lrA.run(testRDD2, initialWeights)
+    val modelA3 = lrA.run(testRDD3, initialWeights)
+
+    val modelB1 = lrB.run(testRDD1, initialWeights)
+    val modelB2 = lrB.run(testRDD2, initialWeights)
+    val modelB3 = lrB.run(testRDD3, initialWeights)
+
+    // For model trained with feature standardization, the weights should
+    // be the same in the scaled space. Note that the weights here are already
+    // in the original space, we transform back to scaled space to compare.
+    assert(modelA1.weights(0) ~== modelA2.weights(0) * 1.0E3 absTol 0.01)
+    assert(modelA1.weights(0) ~== modelA3.weights(0) * 1.0E6 absTol 0.01)
+
+    // Training data with different scales without feature standardization
+    // will not yield the same result in the scaled space due to poor
+    // convergence rate.
+    assert(modelB1.weights(0) !~== modelB2.weights(0) * 1.0E3 absTol 0.1)
+    assert(modelB1.weights(0) !~== modelB3.weights(0) * 1.0E6 absTol 0.1)
+  }
+
+}
+
+class LogisticRegressionClusterSuite extends FunSuite with LocalClusterSparkContext {
+
+  test("task size should be small in both training and prediction using SGD optimizer") {
+    val m = 4
+    val n = 200000
+    val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
+      val random = new Random(idx)
+      iter.map(i => LabeledPoint(1.0, Vectors.dense(Array.fill(n)(random.nextDouble()))))
+    }.cache()
+    // If we serialize data directly in the task closure, the size of the serialized task would be
+    // greater than 1MB and hence Spark would throw an error.
+    val model = LogisticRegressionWithSGD.train(points, 2)
+
+    val predictions = model.predict(points.map(_.features))
+
+    // Materialize the RDDs
+    predictions.count()
+  }
+
+  test("task size should be small in both training and prediction using LBFGS optimizer") {
+    val m = 4
+    val n = 200000
+    val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
+      val random = new Random(idx)
+      iter.map(i => LabeledPoint(1.0, Vectors.dense(Array.fill(n)(random.nextDouble()))))
+    }.cache()
+    // If we serialize data directly in the task closure, the size of the serialized task would be
+    // greater than 1MB and hence Spark would throw an error.
+    val lr = new LogisticRegressionWithLBFGS().setIntercept(true)
+    lr.optimizer.setNumIterations(2)
+    val model = lr.run(points)
+
+    val predictions = model.predict(points.map(_.features))
+
+    // Materialize the RDDs
+    predictions.count()
+  }
+
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
index 516895d04222d..e68fe89d6ccea 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -21,9 +21,10 @@ import scala.util.Random
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.SparkException
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 
 object NaiveBayesSuite {
 
@@ -59,7 +60,7 @@ object NaiveBayesSuite {
   }
 }
 
-class NaiveBayesSuite extends FunSuite with LocalSparkContext {
+class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
     val numOfPredictions = predictions.zip(input).count {
@@ -95,4 +96,49 @@ class NaiveBayesSuite extends FunSuite with LocalSparkContext {
     // Test prediction on Array.
     validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
   }
+
+  test("detect negative values") {
+    val dense = Seq(
+      LabeledPoint(1.0, Vectors.dense(1.0)),
+      LabeledPoint(0.0, Vectors.dense(-1.0)),
+      LabeledPoint(1.0, Vectors.dense(1.0)),
+      LabeledPoint(1.0, Vectors.dense(0.0)))
+    intercept[SparkException] {
+      NaiveBayes.train(sc.makeRDD(dense, 2))
+    }
+    val sparse = Seq(
+      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+      LabeledPoint(0.0, Vectors.sparse(1, Array(0), Array(-1.0))),
+      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+      LabeledPoint(1.0, Vectors.sparse(1, Array.empty, Array.empty)))
+    intercept[SparkException] {
+      NaiveBayes.train(sc.makeRDD(sparse, 2))
+    }
+    val nan = Seq(
+      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+      LabeledPoint(0.0, Vectors.sparse(1, Array(0), Array(Double.NaN))),
+      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+      LabeledPoint(1.0, Vectors.sparse(1, Array.empty, Array.empty)))
+    intercept[SparkException] {
+      NaiveBayes.train(sc.makeRDD(nan, 2))
+    }
+  }
+}
+
+class NaiveBayesClusterSuite extends FunSuite with LocalClusterSparkContext {
+
+  test("task size should be small in both training and prediction") {
+    val m = 10
+    val n = 200000
+    val examples = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
+      val random = new Random(idx)
+      iter.map { i =>
+        LabeledPoint(random.nextInt(2), Vectors.dense(Array.fill(n)(random.nextDouble())))
+      }
+    }
+    // If we serialize data directly in the task closure, the size of the serialized task would be
+    // greater than 1MB and hence Spark would throw an error.
+    val model = NaiveBayes.train(examples)
+    val predictions = model.predict(examples.map(_.features))
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 886c71dde3af7..a2de7fbd41383 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -17,17 +17,16 @@
 
 package org.apache.spark.mllib.classification
 
-import scala.util.Random
 import scala.collection.JavaConversions._
-
-import org.scalatest.FunSuite
+import scala.util.Random
 
 import org.jblas.DoubleMatrix
+import org.scalatest.FunSuite
 
 import org.apache.spark.SparkException
-import org.apache.spark.mllib.regression._
-import org.apache.spark.mllib.util.LocalSparkContext
 import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression._
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 
 object SVMSuite {
 
@@ -59,7 +58,7 @@ object SVMSuite {
 
 }
 
-class SVMSuite extends FunSuite with LocalSparkContext {
+class SVMSuite extends FunSuite with MLlibTestSparkContext {
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
@@ -193,3 +192,19 @@ class SVMSuite extends FunSuite with LocalSparkContext {
     new SVMWithSGD().setValidateData(false).run(testRDDInvalid)
   }
 }
+
+class SVMClusterSuite extends FunSuite with LocalClusterSparkContext {
+
+  test("task size should be small in both training and prediction") {
+    val m = 4
+    val n = 200000
+    val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
+      val random = new Random(idx)
+      iter.map(i => LabeledPoint(1.0, Vectors.dense(Array.fill(n)(random.nextDouble()))))
+    }.cache()
+    // If we serialize data directly in the task closure, the size of the serialized task would be
+    // greater than 1MB and hence Spark would throw an error.
+    val model = SVMWithSGD.train(points, 2)
+    val predictions = model.predict(points.map(_.features))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index 560a4ad71a4de..9ebef8466c831 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -17,14 +17,17 @@
 
 package org.apache.spark.mllib.clustering
 
+import scala.util.Random
+
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.util.LocalSparkContext
-import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
+import org.apache.spark.mllib.util.TestingUtils._
 
-class KMeansSuite extends FunSuite with LocalSparkContext {
+class KMeansSuite extends FunSuite with MLlibTestSparkContext {
 
-  import KMeans.{RANDOM, K_MEANS_PARALLEL}
+  import org.apache.spark.mllib.clustering.KMeans.{K_MEANS_PARALLEL, RANDOM}
 
   test("single cluster") {
     val data = sc.parallelize(Array(
@@ -38,27 +41,53 @@ class KMeansSuite extends FunSuite with LocalSparkContext {
     // No matter how many runs or iterations we use, we should get one cluster,
     // centered at the mean of the points
 
-    var model = KMeans.train(data, k=1, maxIterations=1)
-    assert(model.clusterCenters.head === center)
+    var model = KMeans.train(data, k = 1, maxIterations = 1)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=2)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 2)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=5)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 5)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=1, runs=5)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 1, runs = 5)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=1, runs=5)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 1, runs = 5)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=1, runs=1, initializationMode=RANDOM)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 1, runs = 1, initializationMode = RANDOM)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
     model = KMeans.train(
-      data, k=1, maxIterations=1, runs=1, initializationMode=K_MEANS_PARALLEL)
-    assert(model.clusterCenters.head === center)
+      data, k = 1, maxIterations = 1, runs = 1, initializationMode = K_MEANS_PARALLEL)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
+  }
+
+  test("no distinct points") {
+    val data = sc.parallelize(
+      Array(
+        Vectors.dense(1.0, 2.0, 3.0),
+        Vectors.dense(1.0, 2.0, 3.0),
+        Vectors.dense(1.0, 2.0, 3.0)),
+      2)
+    val center = Vectors.dense(1.0, 2.0, 3.0)
+
+    // Make sure code runs.
+    var model = KMeans.train(data, k=2, maxIterations=1)
+    assert(model.clusterCenters.size === 2)
+  }
+
+  test("more clusters than points") {
+    val data = sc.parallelize(
+      Array(
+        Vectors.dense(1.0, 2.0, 3.0),
+        Vectors.dense(1.0, 3.0, 4.0)),
+      2)
+
+    // Make sure code runs.
+    var model = KMeans.train(data, k=3, maxIterations=1)
+    assert(model.clusterCenters.size === 3)
   }
 
   test("single cluster with big dataset") {
@@ -74,27 +103,28 @@ class KMeansSuite extends FunSuite with LocalSparkContext {
 
     val center = Vectors.dense(1.0, 3.0, 4.0)
 
-    var model = KMeans.train(data, k=1, maxIterations=1)
+    var model = KMeans.train(data, k = 1, maxIterations = 1)
     assert(model.clusterCenters.size === 1)
-    assert(model.clusterCenters.head === center)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=2)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 2)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=5)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 5)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=1, runs=5)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 1, runs = 5)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=1, runs=5)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 1, runs = 5)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=1, runs=1, initializationMode=RANDOM)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 1, runs = 1, initializationMode = RANDOM)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=1, runs=1, initializationMode=K_MEANS_PARALLEL)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 1, runs = 1,
+      initializationMode = K_MEANS_PARALLEL)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
   }
 
   test("single cluster with sparse data") {
@@ -119,31 +149,40 @@ class KMeansSuite extends FunSuite with LocalSparkContext {
 
     val center = Vectors.sparse(n, Seq((0, 1.0), (1, 3.0), (2, 4.0)))
 
-    var model = KMeans.train(data, k=1, maxIterations=1)
-    assert(model.clusterCenters.head === center)
+    var model = KMeans.train(data, k = 1, maxIterations = 1)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=2)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 2)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=5)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 5)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=1, runs=5)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 1, runs = 5)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=1, runs=5)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 1, runs = 5)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=1, runs=1, initializationMode=RANDOM)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 1, runs = 1, initializationMode = RANDOM)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
-    model = KMeans.train(data, k=1, maxIterations=1, runs=1, initializationMode=K_MEANS_PARALLEL)
-    assert(model.clusterCenters.head === center)
+    model = KMeans.train(data, k = 1, maxIterations = 1, runs = 1,
+      initializationMode = K_MEANS_PARALLEL)
+    assert(model.clusterCenters.head ~== center absTol 1E-5)
 
     data.unpersist()
   }
 
   test("k-means|| initialization") {
+
+    case class VectorWithCompare(x: Vector) extends Ordered[VectorWithCompare] {
+      @Override def compare(that: VectorWithCompare): Int = {
+        if(this.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x) >
+          that.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x)) -1 else 1
+      }
+    }
+
     val points = Seq(
       Vectors.dense(1.0, 2.0, 6.0),
       Vectors.dense(1.0, 3.0, 0.0),
@@ -157,16 +196,20 @@ class KMeansSuite extends FunSuite with LocalSparkContext {
     // it will make at least five passes, and it will give non-zero probability to each
     // unselected point as long as it hasn't yet selected all of them
 
-    var model = KMeans.train(rdd, k=5, maxIterations=1)
-    assert(Set(model.clusterCenters: _*) === Set(points: _*))
+    var model = KMeans.train(rdd, k = 5, maxIterations = 1)
+
+    assert(model.clusterCenters.sortBy(VectorWithCompare(_))
+      .zip(points.sortBy(VectorWithCompare(_))).forall(x => x._1 ~== (x._2) absTol 1E-5))
 
     // Iterations of Lloyd's should not change the answer either
-    model = KMeans.train(rdd, k=5, maxIterations=10)
-    assert(Set(model.clusterCenters: _*) === Set(points: _*))
+    model = KMeans.train(rdd, k = 5, maxIterations = 10)
+    assert(model.clusterCenters.sortBy(VectorWithCompare(_))
+      .zip(points.sortBy(VectorWithCompare(_))).forall(x => x._1 ~== (x._2) absTol 1E-5))
 
     // Neither should more runs
-    model = KMeans.train(rdd, k=5, maxIterations=10, runs=5)
-    assert(Set(model.clusterCenters: _*) === Set(points: _*))
+    model = KMeans.train(rdd, k = 5, maxIterations = 10, runs = 5)
+    assert(model.clusterCenters.sortBy(VectorWithCompare(_))
+      .zip(points.sortBy(VectorWithCompare(_))).forall(x => x._1 ~== (x._2) absTol 1E-5))
   }
 
   test("two clusters") {
@@ -194,3 +237,22 @@ class KMeansSuite extends FunSuite with LocalSparkContext {
     }
   }
 }
+
+class KMeansClusterSuite extends FunSuite with LocalClusterSparkContext {
+
+  test("task size should be small in both training and prediction") {
+    val m = 4
+    val n = 200000
+    val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
+      val random = new Random(idx)
+      iter.map(i => Vectors.dense(Array.fill(n)(random.nextDouble)))
+    }.cache()
+    for (initMode <- Seq(KMeans.RANDOM, KMeans.K_MEANS_PARALLEL)) {
+      // If we serialize data directly in the task closure, the size of the serialized task would be
+      // greater than 1MB and hence Spark would throw an error.
+      val model = KMeans.train(points, 2, 2, 1, initMode)
+      val predictions = model.predict(points).collect()
+      val cost = model.computeCost(points)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
new file mode 100644
index 0000000000000..850c9fce507cd
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.streaming.TestSuiteBase
+import org.apache.spark.streaming.dstream.DStream
+import org.apache.spark.util.random.XORShiftRandom
+
+class StreamingKMeansSuite extends FunSuite with TestSuiteBase {
+
+  override def maxWaitTimeMillis = 30000
+
+  test("accuracy for single center and equivalence to grand average") {
+    // set parameters
+    val numBatches = 10
+    val numPoints = 50
+    val k = 1
+    val d = 5
+    val r = 0.1
+
+    // create model with one cluster
+    val model = new StreamingKMeans()
+      .setK(1)
+      .setDecayFactor(1.0)
+      .setInitialCenters(Array(Vectors.dense(0.0, 0.0, 0.0, 0.0, 0.0)), Array(0.0))
+
+    // generate random data for k-means
+    val (input, centers) = StreamingKMeansDataGenerator(numPoints, numBatches, k, d, r, 42)
+
+    // setup and run the model training
+    val ssc = setupStreams(input, (inputDStream: DStream[Vector]) => {
+      model.trainOn(inputDStream)
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
+
+    // estimated center should be close to true center
+    assert(centers(0) ~== model.latestModel().clusterCenters(0) absTol 1E-1)
+
+    // estimated center from streaming should exactly match the arithmetic mean of all data points
+    // because the decay factor is set to 1.0
+    val grandMean =
+      input.flatten.map(x => x.toBreeze).reduce(_+_) / (numBatches * numPoints).toDouble
+    assert(model.latestModel().clusterCenters(0) ~== Vectors.dense(grandMean.toArray) absTol 1E-5)
+  }
+
+  test("accuracy for two centers") {
+    val numBatches = 10
+    val numPoints = 5
+    val k = 2
+    val d = 5
+    val r = 0.1
+
+    // create model with two clusters
+    val kMeans = new StreamingKMeans()
+      .setK(2)
+      .setHalfLife(2, "batches")
+      .setInitialCenters(
+        Array(Vectors.dense(-0.1, 0.1, -0.2, -0.3, -0.1),
+          Vectors.dense(0.1, -0.2, 0.0, 0.2, 0.1)),
+        Array(5.0, 5.0))
+
+    // generate random data for k-means
+    val (input, centers) = StreamingKMeansDataGenerator(numPoints, numBatches, k, d, r, 42)
+
+    // setup and run the model training
+    val ssc = setupStreams(input, (inputDStream: DStream[Vector]) => {
+      kMeans.trainOn(inputDStream)
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
+
+    // check that estimated centers are close to true centers
+    // NOTE exact assignment depends on the initialization!
+    assert(centers(0) ~== kMeans.latestModel().clusterCenters(0) absTol 1E-1)
+    assert(centers(1) ~== kMeans.latestModel().clusterCenters(1) absTol 1E-1)
+  }
+
+  test("detecting dying clusters") {
+    val numBatches = 10
+    val numPoints = 5
+    val k = 1
+    val d = 1
+    val r = 1.0
+
+    // create model with two clusters
+    val kMeans = new StreamingKMeans()
+      .setK(2)
+      .setHalfLife(0.5, "points")
+      .setInitialCenters(
+        Array(Vectors.dense(0.0), Vectors.dense(1000.0)),
+        Array(1.0, 1.0))
+
+    // new data are all around the first cluster 0.0
+    val (input, _) =
+      StreamingKMeansDataGenerator(numPoints, numBatches, k, d, r, 42, Array(Vectors.dense(0.0)))
+
+    // setup and run the model training
+    val ssc = setupStreams(input, (inputDStream: DStream[Vector]) => {
+      kMeans.trainOn(inputDStream)
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
+
+    // check that estimated centers are close to true centers
+    // NOTE exact assignment depends on the initialization!
+    val model = kMeans.latestModel()
+    val c0 = model.clusterCenters(0)(0)
+    val c1 = model.clusterCenters(1)(0)
+
+    assert(c0 * c1 < 0.0, "should have one positive center and one negative center")
+    // 0.8 is the mean of half-normal distribution
+    assert(math.abs(c0) ~== 0.8 absTol 0.6)
+    assert(math.abs(c1) ~== 0.8 absTol 0.6)
+  }
+
+  def StreamingKMeansDataGenerator(
+      numPoints: Int,
+      numBatches: Int,
+      k: Int,
+      d: Int,
+      r: Double,
+      seed: Int,
+      initCenters: Array[Vector] = null): (IndexedSeq[IndexedSeq[Vector]], Array[Vector]) = {
+    val rand = new XORShiftRandom(seed)
+    val centers = initCenters match {
+      case null => Array.fill(k)(Vectors.dense(Array.fill(d)(rand.nextGaussian())))
+      case _ => initCenters
+    }
+    val data = (0 until numBatches).map { i =>
+      (0 until numPoints).map { idx =>
+        val center = centers(idx % k)
+        Vectors.dense(Array.tabulate(d)(x => center(x) + rand.nextGaussian() * r))
+      }
+    }
+    (data, centers)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala
index 1c9844f289fe0..79847633ff0dc 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala
@@ -19,28 +19,29 @@ package org.apache.spark.mllib.evaluation
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
 
-class AreaUnderCurveSuite extends FunSuite with LocalSparkContext {
+class AreaUnderCurveSuite extends FunSuite with MLlibTestSparkContext {
   test("auc computation") {
     val curve = Seq((0.0, 0.0), (1.0, 1.0), (2.0, 3.0), (3.0, 0.0))
     val auc = 4.0
-    assert(AreaUnderCurve.of(curve) === auc)
+    assert(AreaUnderCurve.of(curve) ~== auc absTol 1E-5)
     val rddCurve = sc.parallelize(curve, 2)
-    assert(AreaUnderCurve.of(rddCurve) == auc)
+    assert(AreaUnderCurve.of(rddCurve) ~== auc absTol 1E-5)
   }
 
   test("auc of an empty curve") {
     val curve = Seq.empty[(Double, Double)]
-    assert(AreaUnderCurve.of(curve) === 0.0)
+    assert(AreaUnderCurve.of(curve) ~== 0.0 absTol 1E-5)
     val rddCurve = sc.parallelize(curve, 2)
-    assert(AreaUnderCurve.of(rddCurve) === 0.0)
+    assert(AreaUnderCurve.of(rddCurve) ~== 0.0 absTol 1E-5)
   }
 
   test("auc of a curve with a single point") {
     val curve = Seq((1.0, 1.0))
-    assert(AreaUnderCurve.of(curve) === 0.0)
+    assert(AreaUnderCurve.of(curve) ~== 0.0 absTol 1E-5)
     val rddCurve = sc.parallelize(curve, 2)
-    assert(AreaUnderCurve.of(rddCurve) === 0.0)
+    assert(AreaUnderCurve.of(rddCurve) ~== 0.0 absTol 1E-5)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
index 9d16182f9d8c4..8a18e2971cab6 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
@@ -19,36 +19,109 @@ package org.apache.spark.mllib.evaluation
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class BinaryClassificationMetricsSuite extends FunSuite with MLlibTestSparkContext {
+
+  private def areWithinEpsilon(x: (Double, Double)): Boolean = x._1 ~= (x._2) absTol 1E-5
+
+  private def pairsWithinEpsilon(x: ((Double, Double), (Double, Double))): Boolean =
+    (x._1._1 ~= x._2._1 absTol 1E-5) && (x._1._2 ~= x._2._2 absTol 1E-5)
+
+  private def assertSequencesMatch(left: Seq[Double], right: Seq[Double]): Unit = {
+      assert(left.zip(right).forall(areWithinEpsilon))
+  }
+
+  private def assertTupleSequencesMatch(left: Seq[(Double, Double)],
+       right: Seq[(Double, Double)]): Unit = {
+    assert(left.zip(right).forall(pairsWithinEpsilon))
+  }
+
+  private def validateMetrics(metrics: BinaryClassificationMetrics,
+      expectedThresholds: Seq[Double],
+      expectedROCCurve: Seq[(Double, Double)],
+      expectedPRCurve: Seq[(Double, Double)],
+      expectedFMeasures1: Seq[Double],
+      expectedFmeasures2: Seq[Double],
+      expectedPrecisions: Seq[Double],
+      expectedRecalls: Seq[Double]) = {
+
+    assertSequencesMatch(metrics.thresholds().collect(), expectedThresholds)
+    assertTupleSequencesMatch(metrics.roc().collect(), expectedROCCurve)
+    assert(metrics.areaUnderROC() ~== AreaUnderCurve.of(expectedROCCurve) absTol 1E-5)
+    assertTupleSequencesMatch(metrics.pr().collect(), expectedPRCurve)
+    assert(metrics.areaUnderPR() ~== AreaUnderCurve.of(expectedPRCurve) absTol 1E-5)
+    assertTupleSequencesMatch(metrics.fMeasureByThreshold().collect(),
+      expectedThresholds.zip(expectedFMeasures1))
+    assertTupleSequencesMatch(metrics.fMeasureByThreshold(2.0).collect(),
+      expectedThresholds.zip(expectedFmeasures2))
+    assertTupleSequencesMatch(metrics.precisionByThreshold().collect(),
+      expectedThresholds.zip(expectedPrecisions))
+    assertTupleSequencesMatch(metrics.recallByThreshold().collect(),
+      expectedThresholds.zip(expectedRecalls))
+  }
 
-class BinaryClassificationMetricsSuite extends FunSuite with LocalSparkContext {
   test("binary evaluation metrics") {
     val scoreAndLabels = sc.parallelize(
       Seq((0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)), 2)
     val metrics = new BinaryClassificationMetrics(scoreAndLabels)
-    val threshold = Seq(0.8, 0.6, 0.4, 0.1)
+    val thresholds = Seq(0.8, 0.6, 0.4, 0.1)
     val numTruePositives = Seq(1, 3, 3, 4)
     val numFalsePositives = Seq(0, 1, 2, 3)
     val numPositives = 4
     val numNegatives = 3
-    val precision = numTruePositives.zip(numFalsePositives).map { case (t, f) =>
+    val precisions = numTruePositives.zip(numFalsePositives).map { case (t, f) =>
       t.toDouble / (t + f)
     }
-    val recall = numTruePositives.map(t => t.toDouble / numPositives)
+    val recalls = numTruePositives.map(t => t.toDouble / numPositives)
     val fpr = numFalsePositives.map(f => f.toDouble / numNegatives)
-    val rocCurve = Seq((0.0, 0.0)) ++ fpr.zip(recall) ++ Seq((1.0, 1.0))
-    val pr = recall.zip(precision)
+    val rocCurve = Seq((0.0, 0.0)) ++ fpr.zip(recalls) ++ Seq((1.0, 1.0))
+    val pr = recalls.zip(precisions)
+    val prCurve = Seq((0.0, 1.0)) ++ pr
+    val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)}
+    val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)}
+
+    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls)
+  }
+
+  test("binary evaluation metrics for RDD where all examples have positive label") {
+    val scoreAndLabels = sc.parallelize(Seq((0.5, 1.0), (0.5, 1.0)), 2)
+    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
+
+    val thresholds = Seq(0.5)
+    val precisions = Seq(1.0)
+    val recalls = Seq(1.0)
+    val fpr = Seq(0.0)
+    val rocCurve = Seq((0.0, 0.0)) ++ fpr.zip(recalls) ++ Seq((1.0, 1.0))
+    val pr = recalls.zip(precisions)
     val prCurve = Seq((0.0, 1.0)) ++ pr
-    val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r) }
+    val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)}
     val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)}
-    assert(metrics.thresholds().collect().toSeq === threshold)
-    assert(metrics.roc().collect().toSeq === rocCurve)
-    assert(metrics.areaUnderROC() === AreaUnderCurve.of(rocCurve))
-    assert(metrics.pr().collect().toSeq === prCurve)
-    assert(metrics.areaUnderPR() === AreaUnderCurve.of(prCurve))
-    assert(metrics.fMeasureByThreshold().collect().toSeq === threshold.zip(f1))
-    assert(metrics.fMeasureByThreshold(2.0).collect().toSeq === threshold.zip(f2))
-    assert(metrics.precisionByThreshold().collect().toSeq === threshold.zip(precision))
-    assert(metrics.recallByThreshold().collect().toSeq === threshold.zip(recall))
+
+    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls)
+  }
+
+  test("binary evaluation metrics for RDD where all examples have negative label") {
+    val scoreAndLabels = sc.parallelize(Seq((0.5, 0.0), (0.5, 0.0)), 2)
+    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
+
+    val thresholds = Seq(0.5)
+    val precisions = Seq(0.0)
+    val recalls = Seq(0.0)
+    val fpr = Seq(1.0)
+    val rocCurve = Seq((0.0, 0.0)) ++ fpr.zip(recalls) ++ Seq((1.0, 1.0))
+    val pr = recalls.zip(precisions)
+    val prCurve = Seq((0.0, 1.0)) ++ pr
+    val f1 = pr.map {
+      case (0, 0) => 0.0
+      case (r, p) => 2.0 * (p * r) / (p + r)
+    }
+    val f2 = pr.map {
+      case (0, 0) => 0.0
+      case (r, p) => 5.0 * (p * r) / (4.0 * p + r)
+    }
+
+    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
new file mode 100644
index 0000000000000..7dc4f3cfbc4e4
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.Matrices
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+class MulticlassMetricsSuite extends FunSuite with MLlibTestSparkContext {
+  test("Multiclass evaluation metrics") {
+    /*
+     * Confusion matrix for 3-class classification with total 9 instances:
+     * |2|1|1| true class0 (4 instances)
+     * |1|3|0| true class1 (4 instances)
+     * |0|0|1| true class2 (1 instance)
+     */
+    val confusionMatrix = Matrices.dense(3, 3, Array(2, 1, 0, 1, 3, 0, 1, 0, 1))
+    val labels = Array(0.0, 1.0, 2.0)
+    val predictionAndLabels = sc.parallelize(
+      Seq((0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0),
+        (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)), 2)
+    val metrics = new MulticlassMetrics(predictionAndLabels)
+    val delta = 0.0000001
+    val fpRate0 = 1.0 / (9 - 4)
+    val fpRate1 = 1.0 / (9 - 4)
+    val fpRate2 = 1.0 / (9 - 1)
+    val precision0 = 2.0 / (2 + 1)
+    val precision1 = 3.0 / (3 + 1)
+    val precision2 = 1.0 / (1 + 1)
+    val recall0 = 2.0 / (2 + 2)
+    val recall1 = 3.0 / (3 + 1)
+    val recall2 = 1.0 / (1 + 0)
+    val f1measure0 = 2 * precision0 * recall0 / (precision0 + recall0)
+    val f1measure1 = 2 * precision1 * recall1 / (precision1 + recall1)
+    val f1measure2 = 2 * precision2 * recall2 / (precision2 + recall2)
+    val f2measure0 = (1 + 2 * 2) * precision0 * recall0 / (2 * 2 * precision0 + recall0)
+    val f2measure1 = (1 + 2 * 2) * precision1 * recall1 / (2 * 2 * precision1 + recall1)
+    val f2measure2 = (1 + 2 * 2) * precision2 * recall2 / (2 * 2 * precision2 + recall2)
+
+    assert(metrics.confusionMatrix.toArray.sameElements(confusionMatrix.toArray))
+    assert(math.abs(metrics.falsePositiveRate(0.0) - fpRate0) < delta)
+    assert(math.abs(metrics.falsePositiveRate(1.0) - fpRate1) < delta)
+    assert(math.abs(metrics.falsePositiveRate(2.0) - fpRate2) < delta)
+    assert(math.abs(metrics.precision(0.0) - precision0) < delta)
+    assert(math.abs(metrics.precision(1.0) - precision1) < delta)
+    assert(math.abs(metrics.precision(2.0) - precision2) < delta)
+    assert(math.abs(metrics.recall(0.0) - recall0) < delta)
+    assert(math.abs(metrics.recall(1.0) - recall1) < delta)
+    assert(math.abs(metrics.recall(2.0) - recall2) < delta)
+    assert(math.abs(metrics.fMeasure(0.0) - f1measure0) < delta)
+    assert(math.abs(metrics.fMeasure(1.0) - f1measure1) < delta)
+    assert(math.abs(metrics.fMeasure(2.0) - f1measure2) < delta)
+    assert(math.abs(metrics.fMeasure(0.0, 2.0) - f2measure0) < delta)
+    assert(math.abs(metrics.fMeasure(1.0, 2.0) - f2measure1) < delta)
+    assert(math.abs(metrics.fMeasure(2.0, 2.0) - f2measure2) < delta)
+
+    assert(math.abs(metrics.recall -
+      (2.0 + 3.0 + 1.0) / ((2 + 3 + 1) + (1 + 1 + 1))) < delta)
+    assert(math.abs(metrics.recall - metrics.precision) < delta)
+    assert(math.abs(metrics.recall - metrics.fMeasure) < delta)
+    assert(math.abs(metrics.recall - metrics.weightedRecall) < delta)
+    assert(math.abs(metrics.weightedFalsePositiveRate -
+      ((4.0 / 9) * fpRate0 + (4.0 / 9) * fpRate1 + (1.0 / 9) * fpRate2)) < delta)
+    assert(math.abs(metrics.weightedPrecision -
+      ((4.0 / 9) * precision0 + (4.0 / 9) * precision1 + (1.0 / 9) * precision2)) < delta)
+    assert(math.abs(metrics.weightedRecall -
+      ((4.0 / 9) * recall0 + (4.0 / 9) * recall1 + (1.0 / 9) * recall2)) < delta)
+    assert(math.abs(metrics.weightedFMeasure -
+      ((4.0 / 9) * f1measure0 + (4.0 / 9) * f1measure1 + (1.0 / 9) * f1measure2)) < delta)
+    assert(math.abs(metrics.weightedFMeasure(2.0) -
+      ((4.0 / 9) * f2measure0 + (4.0 / 9) * f2measure1 + (1.0 / 9) * f2measure2)) < delta)
+    assert(metrics.labels.sameElements(labels))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala
new file mode 100644
index 0000000000000..2537dd62c92f2
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.rdd.RDD
+
+class MultilabelMetricsSuite extends FunSuite with MLlibTestSparkContext {
+  test("Multilabel evaluation metrics") {
+    /*
+    * Documents true labels (5x class0, 3x class1, 4x class2):
+    * doc 0 - predict 0, 1 - class 0, 2
+    * doc 1 - predict 0, 2 - class 0, 1
+    * doc 2 - predict none - class 0
+    * doc 3 - predict 2 - class 2
+    * doc 4 - predict 2, 0 - class 2, 0
+    * doc 5 - predict 0, 1, 2 - class 0, 1
+    * doc 6 - predict 1 - class 1, 2
+    *
+    * predicted classes
+    * class 0 - doc 0, 1, 4, 5 (total 4)
+    * class 1 - doc 0, 5, 6 (total 3)
+    * class 2 - doc 1, 3, 4, 5 (total 4)
+    *
+    * true classes
+    * class 0 - doc 0, 1, 2, 4, 5 (total 5)
+    * class 1 - doc 1, 5, 6 (total 3)
+    * class 2 - doc 0, 3, 4, 6 (total 4)
+    *
+    */
+    val scoreAndLabels: RDD[(Array[Double], Array[Double])] = sc.parallelize(
+      Seq((Array(0.0, 1.0), Array(0.0, 2.0)),
+        (Array(0.0, 2.0), Array(0.0, 1.0)),
+        (Array(), Array(0.0)),
+        (Array(2.0), Array(2.0)),
+        (Array(2.0, 0.0), Array(2.0, 0.0)),
+        (Array(0.0, 1.0, 2.0), Array(0.0, 1.0)),
+        (Array(1.0), Array(1.0, 2.0))), 2)
+    val metrics = new MultilabelMetrics(scoreAndLabels)
+    val delta = 0.00001
+    val precision0 = 4.0 / (4 + 0)
+    val precision1 = 2.0 / (2 + 1)
+    val precision2 = 2.0 / (2 + 2)
+    val recall0 = 4.0 / (4 + 1)
+    val recall1 = 2.0 / (2 + 1)
+    val recall2 = 2.0 / (2 + 2)
+    val f1measure0 = 2 * precision0 * recall0 / (precision0 + recall0)
+    val f1measure1 = 2 * precision1 * recall1 / (precision1 + recall1)
+    val f1measure2 = 2 * precision2 * recall2 / (precision2 + recall2)
+    val sumTp = 4 + 2 + 2
+    assert(sumTp == (1 + 1 + 0 + 1 + 2 + 2 + 1))
+    val microPrecisionClass = sumTp.toDouble / (4 + 0 + 2 + 1 + 2 + 2)
+    val microRecallClass = sumTp.toDouble / (4 + 1 + 2 + 1 + 2 + 2)
+    val microF1MeasureClass = 2.0 * sumTp.toDouble /
+      (2 * sumTp.toDouble + (1 + 1 + 2) + (0 + 1 + 2))
+    val macroPrecisionDoc = 1.0 / 7 *
+      (1.0 / 2 + 1.0 / 2 + 0 + 1.0 / 1 + 2.0 / 2 + 2.0 / 3 + 1.0 / 1.0)
+    val macroRecallDoc = 1.0 / 7 *
+      (1.0 / 2 + 1.0 / 2 + 0 / 1 + 1.0 / 1 + 2.0 / 2 + 2.0 / 2 + 1.0 / 2)
+    val macroF1MeasureDoc = (1.0 / 7) *
+      2 * ( 1.0 / (2 + 2) + 1.0 / (2 + 2) + 0 + 1.0 / (1 + 1) +
+        2.0 / (2 + 2) + 2.0 / (3 + 2) + 1.0 / (1 + 2) )
+    val hammingLoss = (1.0 / (7 * 3)) * (2 + 2 + 1 + 0 + 0 + 1 + 1)
+    val strictAccuracy = 2.0 / 7
+    val accuracy = 1.0 / 7 * (1.0 / 3 + 1.0 /3 + 0 + 1.0 / 1 + 2.0 / 2 + 2.0 / 3 + 1.0 / 2)
+    assert(math.abs(metrics.precision(0.0) - precision0) < delta)
+    assert(math.abs(metrics.precision(1.0) - precision1) < delta)
+    assert(math.abs(metrics.precision(2.0) - precision2) < delta)
+    assert(math.abs(metrics.recall(0.0) - recall0) < delta)
+    assert(math.abs(metrics.recall(1.0) - recall1) < delta)
+    assert(math.abs(metrics.recall(2.0) - recall2) < delta)
+    assert(math.abs(metrics.f1Measure(0.0) - f1measure0) < delta)
+    assert(math.abs(metrics.f1Measure(1.0) - f1measure1) < delta)
+    assert(math.abs(metrics.f1Measure(2.0) - f1measure2) < delta)
+    assert(math.abs(metrics.microPrecision - microPrecisionClass) < delta)
+    assert(math.abs(metrics.microRecall - microRecallClass) < delta)
+    assert(math.abs(metrics.microF1Measure - microF1MeasureClass) < delta)
+    assert(math.abs(metrics.precision - macroPrecisionDoc) < delta)
+    assert(math.abs(metrics.recall - macroRecallDoc) < delta)
+    assert(math.abs(metrics.f1Measure - macroF1MeasureDoc) < delta)
+    assert(math.abs(metrics.hammingLoss - hammingLoss) < delta)
+    assert(math.abs(metrics.subsetAccuracy - strictAccuracy) < delta)
+    assert(math.abs(metrics.accuracy - accuracy) < delta)
+    assert(metrics.labels.sameElements(Array(0.0, 1.0, 2.0)))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
new file mode 100644
index 0000000000000..609eed983ff4e
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+class RankingMetricsSuite extends FunSuite with MLlibTestSparkContext {
+  test("Ranking metrics: map, ndcg") {
+    val predictionAndLabels = sc.parallelize(
+      Seq(
+        (Array[Int](1, 6, 2, 7, 8, 3, 9, 10, 4, 5), Array[Int](1, 2, 3, 4, 5)),
+        (Array[Int](4, 1, 5, 6, 2, 7, 3, 8, 9, 10), Array[Int](1, 2, 3)),
+        (Array[Int](1, 2, 3, 4, 5), Array[Int]())
+      ), 2)
+    val eps: Double = 1E-5
+
+    val metrics = new RankingMetrics(predictionAndLabels)
+    val map = metrics.meanAveragePrecision
+
+    assert(metrics.precisionAt(1) ~== 1.0/3 absTol eps)
+    assert(metrics.precisionAt(2) ~== 1.0/3 absTol eps)
+    assert(metrics.precisionAt(3) ~== 1.0/3 absTol eps)
+    assert(metrics.precisionAt(4) ~== 0.75/3 absTol eps)
+    assert(metrics.precisionAt(5) ~== 0.8/3 absTol eps)
+    assert(metrics.precisionAt(10) ~== 0.8/3 absTol eps)
+    assert(metrics.precisionAt(15) ~== 8.0/45 absTol eps)
+
+    assert(map ~== 0.355026 absTol eps)
+
+    assert(metrics.ndcgAt(3) ~== 1.0/3 absTol eps)
+    assert(metrics.ndcgAt(5) ~== 0.328788 absTol eps)
+    assert(metrics.ndcgAt(10) ~== 0.487913 absTol eps)
+    assert(metrics.ndcgAt(15) ~== metrics.ndcgAt(10) absTol eps)
+
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
new file mode 100644
index 0000000000000..670b4c34e6095
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class RegressionMetricsSuite extends FunSuite with MLlibTestSparkContext {
+
+  test("regression metrics") {
+    val predictionAndObservations = sc.parallelize(
+      Seq((2.5,3.0),(0.0,-0.5),(2.0,2.0),(8.0,7.0)), 2)
+    val metrics = new RegressionMetrics(predictionAndObservations)
+    assert(metrics.explainedVariance ~== 0.95717 absTol 1E-5,
+      "explained variance regression score mismatch")
+    assert(metrics.meanAbsoluteError ~== 0.5 absTol 1E-5, "mean absolute error mismatch")
+    assert(metrics.meanSquaredError ~== 0.375 absTol 1E-5, "mean squared error mismatch")
+    assert(metrics.rootMeanSquaredError ~== 0.61237 absTol 1E-5,
+      "root mean squared error mismatch")
+    assert(metrics.r2 ~== 0.94861 absTol 1E-5, "r2 score mismatch")
+  }
+
+  test("regression metrics with complete fitting") {
+    val predictionAndObservations = sc.parallelize(
+      Seq((3.0,3.0),(0.0,0.0),(2.0,2.0),(8.0,8.0)), 2)
+    val metrics = new RegressionMetrics(predictionAndObservations)
+    assert(metrics.explainedVariance ~== 1.0 absTol 1E-5,
+      "explained variance regression score mismatch")
+    assert(metrics.meanAbsoluteError ~== 0.0 absTol 1E-5, "mean absolute error mismatch")
+    assert(metrics.meanSquaredError ~== 0.0 absTol 1E-5, "mean squared error mismatch")
+    assert(metrics.rootMeanSquaredError ~== 0.0 absTol 1E-5,
+      "root mean squared error mismatch")
+    assert(metrics.r2 ~== 1.0 absTol 1E-5, "r2 score mismatch")
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/HashingTFSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/HashingTFSuite.scala
new file mode 100644
index 0000000000000..0c4dfb7b97c7f
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/HashingTFSuite.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+class HashingTFSuite extends FunSuite with MLlibTestSparkContext {
+
+  test("hashing tf on a single doc") {
+    val hashingTF = new HashingTF(1000)
+    val doc = "a a b b c d".split(" ")
+    val n = hashingTF.numFeatures
+    val termFreqs = Seq(
+      (hashingTF.indexOf("a"), 2.0),
+      (hashingTF.indexOf("b"), 2.0),
+      (hashingTF.indexOf("c"), 1.0),
+      (hashingTF.indexOf("d"), 1.0))
+    assert(termFreqs.map(_._1).forall(i => i >= 0 && i < n),
+      "index must be in range [0, #features)")
+    assert(termFreqs.map(_._1).toSet.size === 4, "expecting perfect hashing")
+    val expected = Vectors.sparse(n, termFreqs)
+    assert(hashingTF.transform(doc) === expected)
+  }
+
+  test("hashing tf on an RDD") {
+    val hashingTF = new HashingTF
+    val localDocs: Seq[Seq[String]] = Seq(
+      "a a b b b c d".split(" "),
+      "a b c d a b c".split(" "),
+      "c b a c b a a".split(" "))
+    val docs = sc.parallelize(localDocs, 2)
+    assert(hashingTF.transform(docs).collect().toSet === localDocs.map(hashingTF.transform).toSet)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala
new file mode 100644
index 0000000000000..30147e7fd948f
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class IDFSuite extends FunSuite with MLlibTestSparkContext {
+
+  test("idf") {
+    val n = 4
+    val localTermFrequencies = Seq(
+      Vectors.sparse(n, Array(1, 3), Array(1.0, 2.0)),
+      Vectors.dense(0.0, 1.0, 2.0, 3.0),
+      Vectors.sparse(n, Array(1), Array(1.0))
+    )
+    val m = localTermFrequencies.size
+    val termFrequencies = sc.parallelize(localTermFrequencies, 2)
+    val idf = new IDF
+    val model = idf.fit(termFrequencies)
+    val expected = Vectors.dense(Array(0, 3, 1, 2).map { x =>
+      math.log((m + 1.0) / (x + 1.0))
+    })
+    assert(model.idf ~== expected absTol 1e-12)
+    val tfidf = model.transform(termFrequencies).cache().zipWithIndex().map(_.swap).collectAsMap()
+    assert(tfidf.size === 3)
+    val tfidf0 = tfidf(0L).asInstanceOf[SparseVector]
+    assert(tfidf0.indices === Array(1, 3))
+    assert(Vectors.dense(tfidf0.values) ~==
+      Vectors.dense(1.0 * expected(1), 2.0 * expected(3)) absTol 1e-12)
+    val tfidf1 = tfidf(1L).asInstanceOf[DenseVector]
+    assert(Vectors.dense(tfidf1.values) ~==
+      Vectors.dense(0.0, 1.0 * expected(1), 2.0 * expected(2), 3.0 * expected(3)) absTol 1e-12)
+    val tfidf2 = tfidf(2L).asInstanceOf[SparseVector]
+    assert(tfidf2.indices === Array(1))
+    assert(tfidf2.values(0) ~== (1.0 * expected(1)) absTol 1e-12)
+  }
+
+  test("idf minimum document frequency filtering") {
+    val n = 4
+    val localTermFrequencies = Seq(
+      Vectors.sparse(n, Array(1, 3), Array(1.0, 2.0)),
+      Vectors.dense(0.0, 1.0, 2.0, 3.0),
+      Vectors.sparse(n, Array(1), Array(1.0))
+    )
+    val m = localTermFrequencies.size
+    val termFrequencies = sc.parallelize(localTermFrequencies, 2)
+    val idf = new IDF(minDocFreq = 1)
+    val model = idf.fit(termFrequencies)
+    val expected = Vectors.dense(Array(0, 3, 1, 2).map { x =>
+      if (x > 0) {
+        math.log((m + 1.0) / (x + 1.0))
+      } else {
+        0
+      }
+    })
+    assert(model.idf ~== expected absTol 1e-12)
+    val tfidf = model.transform(termFrequencies).cache().zipWithIndex().map(_.swap).collectAsMap()
+    assert(tfidf.size === 3)
+    val tfidf0 = tfidf(0L).asInstanceOf[SparseVector]
+    assert(tfidf0.indices === Array(1, 3))
+    assert(Vectors.dense(tfidf0.values) ~==
+      Vectors.dense(1.0 * expected(1), 2.0 * expected(3)) absTol 1e-12)
+    val tfidf1 = tfidf(1L).asInstanceOf[DenseVector]
+    assert(Vectors.dense(tfidf1.values) ~==
+      Vectors.dense(0.0, 1.0 * expected(1), 2.0 * expected(2), 3.0 * expected(3)) absTol 1e-12)
+    val tfidf2 = tfidf(2L).asInstanceOf[SparseVector]
+    assert(tfidf2.indices === Array(1))
+    assert(tfidf2.values(0) ~== (1.0 * expected(1)) absTol 1e-12)
+  }
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
new file mode 100644
index 0000000000000..85fdd271b5ed1
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.scalatest.FunSuite
+
+import breeze.linalg.{norm => brzNorm}
+
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class NormalizerSuite extends FunSuite with MLlibTestSparkContext {
+
+  val data = Array(
+    Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
+    Vectors.dense(0.0, 0.0, 0.0),
+    Vectors.dense(0.6, -1.1, -3.0),
+    Vectors.sparse(3, Seq((1, 0.91), (2, 3.2))),
+    Vectors.sparse(3, Seq((0, 5.7), (1, 0.72), (2, 2.7))),
+    Vectors.sparse(3, Seq())
+  )
+
+  lazy val dataRDD = sc.parallelize(data, 3)
+
+  test("Normalization using L1 distance") {
+    val l1Normalizer = new Normalizer(1)
+
+    val data1 = data.map(l1Normalizer.transform)
+    val data1RDD = l1Normalizer.transform(dataRDD)
+
+    assert((data, data1, data1RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after normalization.")
+
+    assert((data1, data1RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+
+    assert(brzNorm(data1(0).toBreeze, 1) ~== 1.0 absTol 1E-5)
+    assert(brzNorm(data1(2).toBreeze, 1) ~== 1.0 absTol 1E-5)
+    assert(brzNorm(data1(3).toBreeze, 1) ~== 1.0 absTol 1E-5)
+    assert(brzNorm(data1(4).toBreeze, 1) ~== 1.0 absTol 1E-5)
+
+    assert(data1(0) ~== Vectors.sparse(3, Seq((0, -0.465116279), (1, 0.53488372))) absTol 1E-5)
+    assert(data1(1) ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(data1(2) ~== Vectors.dense(0.12765957, -0.23404255, -0.63829787) absTol 1E-5)
+    assert(data1(3) ~== Vectors.sparse(3, Seq((1, 0.22141119), (2, 0.7785888))) absTol 1E-5)
+    assert(data1(4) ~== Vectors.dense(0.625, 0.07894737, 0.29605263) absTol 1E-5)
+    assert(data1(5) ~== Vectors.sparse(3, Seq()) absTol 1E-5)
+  }
+
+  test("Normalization using L2 distance") {
+    val l2Normalizer = new Normalizer()
+
+    val data2 = data.map(l2Normalizer.transform)
+    val data2RDD = l2Normalizer.transform(dataRDD)
+
+    assert((data, data2, data2RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after normalization.")
+
+    assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+
+    assert(brzNorm(data2(0).toBreeze, 2) ~== 1.0 absTol 1E-5)
+    assert(brzNorm(data2(2).toBreeze, 2) ~== 1.0 absTol 1E-5)
+    assert(brzNorm(data2(3).toBreeze, 2) ~== 1.0 absTol 1E-5)
+    assert(brzNorm(data2(4).toBreeze, 2) ~== 1.0 absTol 1E-5)
+
+    assert(data2(0) ~== Vectors.sparse(3, Seq((0, -0.65617871), (1, 0.75460552))) absTol 1E-5)
+    assert(data2(1) ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(data2(2) ~== Vectors.dense(0.184549876, -0.3383414, -0.922749378) absTol 1E-5)
+    assert(data2(3) ~== Vectors.sparse(3, Seq((1, 0.27352993), (2, 0.96186349))) absTol 1E-5)
+    assert(data2(4) ~== Vectors.dense(0.897906166, 0.113419726, 0.42532397) absTol 1E-5)
+    assert(data2(5) ~== Vectors.sparse(3, Seq()) absTol 1E-5)
+  }
+
+  test("Normalization using L^Inf distance.") {
+    val lInfNormalizer = new Normalizer(Double.PositiveInfinity)
+
+    val dataInf = data.map(lInfNormalizer.transform)
+    val dataInfRDD = lInfNormalizer.transform(dataRDD)
+
+    assert((data, dataInf, dataInfRDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after normalization.")
+
+    assert((dataInf, dataInfRDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+
+    assert(dataInf(0).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
+    assert(dataInf(2).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
+    assert(dataInf(3).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
+    assert(dataInf(4).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
+
+    assert(dataInf(0) ~== Vectors.sparse(3, Seq((0, -0.86956522), (1, 1.0))) absTol 1E-5)
+    assert(dataInf(1) ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(dataInf(2) ~== Vectors.dense(0.2, -0.36666667, -1.0) absTol 1E-5)
+    assert(dataInf(3) ~== Vectors.sparse(3, Seq((1, 0.284375), (2, 1.0))) absTol 1E-5)
+    assert(dataInf(4) ~== Vectors.dense(1.0, 0.12631579, 0.473684211) absTol 1E-5)
+    assert(dataInf(5) ~== Vectors.sparse(3, Seq()) absTol 1E-5)
+  }
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala
new file mode 100644
index 0000000000000..4c93c0ca4f86c
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.mllib.rdd.RDDFunctions._
+import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer}
+import org.apache.spark.rdd.RDD
+
+class StandardScalerSuite extends FunSuite with MLlibTestSparkContext {
+
+  private def computeSummary(data: RDD[Vector]): MultivariateStatisticalSummary = {
+    data.treeAggregate(new MultivariateOnlineSummarizer)(
+      (aggregator, data) => aggregator.add(data),
+      (aggregator1, aggregator2) => aggregator1.merge(aggregator2))
+  }
+
+  test("Standardization with dense input") {
+    val data = Array(
+      Vectors.dense(-2.0, 2.3, 0),
+      Vectors.dense(0.0, -1.0, -3.0),
+      Vectors.dense(0.0, -5.1, 0.0),
+      Vectors.dense(3.8, 0.0, 1.9),
+      Vectors.dense(1.7, -0.6, 0.0),
+      Vectors.dense(0.0, 1.9, 0.0)
+    )
+
+    val dataRDD = sc.parallelize(data, 3)
+
+    val standardizer1 = new StandardScaler(withMean = true, withStd = true)
+    val standardizer2 = new StandardScaler()
+    val standardizer3 = new StandardScaler(withMean = true, withStd = false)
+
+    val model1 = standardizer1.fit(dataRDD)
+    val model2 = standardizer2.fit(dataRDD)
+    val model3 = standardizer3.fit(dataRDD)
+
+    val data1 = data.map(model1.transform)
+    val data2 = data.map(model2.transform)
+    val data3 = data.map(model3.transform)
+
+    val data1RDD = model1.transform(dataRDD)
+    val data2RDD = model2.transform(dataRDD)
+    val data3RDD = model3.transform(dataRDD)
+
+    val summary = computeSummary(dataRDD)
+    val summary1 = computeSummary(data1RDD)
+    val summary2 = computeSummary(data2RDD)
+    val summary3 = computeSummary(data3RDD)
+
+    assert((data, data1, data1RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after standardization.")
+
+    assert((data, data2, data2RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after standardization.")
+
+    assert((data, data3, data3RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after standardization.")
+
+    assert((data1, data1RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+    assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+    assert((data3, data3RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+
+    assert(summary1.mean ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(summary1.variance ~== Vectors.dense(1.0, 1.0, 1.0) absTol 1E-5)
+
+    assert(summary2.mean !~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(summary2.variance ~== Vectors.dense(1.0, 1.0, 1.0) absTol 1E-5)
+
+    assert(summary3.mean ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(summary3.variance ~== summary.variance absTol 1E-5)
+
+    assert(data1(0) ~== Vectors.dense(-1.31527964, 1.023470449, 0.11637768424) absTol 1E-5)
+    assert(data1(3) ~== Vectors.dense(1.637735298, 0.156973995, 1.32247368462) absTol 1E-5)
+    assert(data2(4) ~== Vectors.dense(0.865538862, -0.22604255, 0.0) absTol 1E-5)
+    assert(data2(5) ~== Vectors.dense(0.0, 0.71580142, 0.0) absTol 1E-5)
+    assert(data3(1) ~== Vectors.dense(-0.58333333, -0.58333333, -2.8166666666) absTol 1E-5)
+    assert(data3(5) ~== Vectors.dense(-0.58333333, 2.316666666, 0.18333333333) absTol 1E-5)
+  }
+
+
+  test("Standardization with sparse input") {
+    val data = Array(
+      Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
+      Vectors.sparse(3, Seq((1, -1.0), (2, -3.0))),
+      Vectors.sparse(3, Seq((1, -5.1))),
+      Vectors.sparse(3, Seq((0, 3.8), (2, 1.9))),
+      Vectors.sparse(3, Seq((0, 1.7), (1, -0.6))),
+      Vectors.sparse(3, Seq((1, 1.9)))
+    )
+
+    val dataRDD = sc.parallelize(data, 3)
+
+    val standardizer1 = new StandardScaler(withMean = true, withStd = true)
+    val standardizer2 = new StandardScaler()
+    val standardizer3 = new StandardScaler(withMean = true, withStd = false)
+
+    val model1 = standardizer1.fit(dataRDD)
+    val model2 = standardizer2.fit(dataRDD)
+    val model3 = standardizer3.fit(dataRDD)
+
+    val data2 = data.map(model2.transform)
+
+    withClue("Standardization with mean can not be applied on sparse input.") {
+      intercept[IllegalArgumentException] {
+        data.map(model1.transform)
+      }
+    }
+
+    withClue("Standardization with mean can not be applied on sparse input.") {
+      intercept[IllegalArgumentException] {
+        data.map(model3.transform)
+      }
+    }
+
+    val data2RDD = model2.transform(dataRDD)
+
+    val summary2 = computeSummary(data2RDD)
+
+    assert((data, data2, data2RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after standardization.")
+
+    assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+
+    assert(summary2.mean !~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(summary2.variance ~== Vectors.dense(1.0, 1.0, 1.0) absTol 1E-5)
+
+    assert(data2(4) ~== Vectors.sparse(3, Seq((0, 0.865538862), (1, -0.22604255))) absTol 1E-5)
+    assert(data2(5) ~== Vectors.sparse(3, Seq((1, 0.71580142))) absTol 1E-5)
+  }
+
+  test("Standardization with constant input") {
+    // When the input data is all constant, the variance is zero. The standardization against
+    // zero variance is not well-defined, but we decide to just set it into zero here.
+    val data = Array(
+      Vectors.dense(2.0),
+      Vectors.dense(2.0),
+      Vectors.dense(2.0)
+    )
+
+    val dataRDD = sc.parallelize(data, 2)
+
+    val standardizer1 = new StandardScaler(withMean = true, withStd = true)
+    val standardizer2 = new StandardScaler(withMean = true, withStd = false)
+    val standardizer3 = new StandardScaler(withMean = false, withStd = true)
+
+    val model1 = standardizer1.fit(dataRDD)
+    val model2 = standardizer2.fit(dataRDD)
+    val model3 = standardizer3.fit(dataRDD)
+
+    val data1 = data.map(model1.transform)
+    val data2 = data.map(model2.transform)
+    val data3 = data.map(model3.transform)
+
+    assert(data1.forall(_.toArray.forall(_ == 0.0)),
+      "The variance is zero, so the transformed result should be 0.0")
+    assert(data2.forall(_.toArray.forall(_ == 0.0)),
+      "The variance is zero, so the transformed result should be 0.0")
+    assert(data3.forall(_.toArray.forall(_ == 0.0)),
+      "The variance is zero, so the transformed result should be 0.0")
+  }
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
new file mode 100644
index 0000000000000..52278690dbd89
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+class Word2VecSuite extends FunSuite with MLlibTestSparkContext {
+
+  // TODO: add more tests
+
+  test("Word2Vec") {
+    val sentence = "a b " * 100 + "a c " * 10
+    val localDoc = Seq(sentence, sentence)
+    val doc = sc.parallelize(localDoc)
+      .map(line => line.split(" ").toSeq)
+    val model = new Word2Vec().setVectorSize(10).setSeed(42L).fit(doc)
+    val syms = model.findSynonyms("a", 2)
+    assert(syms.length == 2)
+    assert(syms(0)._1 == "b")
+    assert(syms(1)._1 == "c")
+  }
+
+  test("Word2VecModel") {
+    val num = 2
+    val word2VecMap = Map(
+      ("china", Array(0.50f, 0.50f, 0.50f, 0.50f)),
+      ("japan", Array(0.40f, 0.50f, 0.50f, 0.50f)),
+      ("taiwan", Array(0.60f, 0.50f, 0.50f, 0.50f)),
+      ("korea", Array(0.45f, 0.60f, 0.60f, 0.60f))
+    )
+    val model = new Word2VecModel(word2VecMap)
+    val syms = model.findSynonyms("china", num)
+    assert(syms.length == num)
+    assert(syms(0)._1 == "taiwan")
+    assert(syms(1)._1 == "japan")
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
new file mode 100644
index 0000000000000..5d70c914f14b0
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.linalg
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.mllib.linalg.BLAS._
+
+class BLASSuite extends FunSuite {
+
+  test("copy") {
+    val sx = Vectors.sparse(4, Array(0, 2), Array(1.0, -2.0))
+    val dx = Vectors.dense(1.0, 0.0, -2.0, 0.0)
+    val sy = Vectors.sparse(4, Array(0, 1, 3), Array(2.0, 1.0, 1.0))
+    val dy = Array(2.0, 1.0, 0.0, 1.0)
+
+    val dy1 = Vectors.dense(dy.clone())
+    copy(sx, dy1)
+    assert(dy1 ~== dx absTol 1e-15)
+
+    val dy2 = Vectors.dense(dy.clone())
+    copy(dx, dy2)
+    assert(dy2 ~== dx absTol 1e-15)
+
+    intercept[IllegalArgumentException] {
+      copy(sx, sy)
+    }
+
+    intercept[IllegalArgumentException] {
+      copy(dx, sy)
+    }
+
+    withClue("vector sizes must match") {
+      intercept[Exception] {
+        copy(sx, Vectors.dense(0.0, 1.0, 2.0))
+      }
+    }
+  }
+
+  test("scal") {
+    val a = 0.1
+    val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
+    val dx = Vectors.dense(1.0, 0.0, -2.0)
+
+    scal(a, sx)
+    assert(sx ~== Vectors.sparse(3, Array(0, 2), Array(0.1, -0.2)) absTol 1e-15)
+
+    scal(a, dx)
+    assert(dx ~== Vectors.dense(0.1, 0.0, -0.2) absTol 1e-15)
+  }
+
+  test("axpy") {
+    val alpha = 0.1
+    val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
+    val dx = Vectors.dense(1.0, 0.0, -2.0)
+    val dy = Array(2.0, 1.0, 0.0)
+    val expected = Vectors.dense(2.1, 1.0, -0.2)
+
+    val dy1 = Vectors.dense(dy.clone())
+    axpy(alpha, sx, dy1)
+    assert(dy1 ~== expected absTol 1e-15)
+
+    val dy2 = Vectors.dense(dy.clone())
+    axpy(alpha, dx, dy2)
+    assert(dy2 ~== expected absTol 1e-15)
+
+    val sy = Vectors.sparse(4, Array(0, 1), Array(2.0, 1.0))
+
+    intercept[IllegalArgumentException] {
+      axpy(alpha, sx, sy)
+    }
+
+    intercept[IllegalArgumentException] {
+      axpy(alpha, dx, sy)
+    }
+
+    withClue("vector sizes must match") {
+      intercept[Exception] {
+        axpy(alpha, sx, Vectors.dense(1.0, 2.0))
+      }
+    }
+  }
+
+  test("dot") {
+    val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
+    val dx = Vectors.dense(1.0, 0.0, -2.0)
+    val sy = Vectors.sparse(3, Array(0, 1), Array(2.0, 1.0))
+    val dy = Vectors.dense(2.0, 1.0, 0.0)
+
+    assert(dot(sx, sy) ~== 2.0 absTol 1e-15)
+    assert(dot(sy, sx) ~== 2.0 absTol 1e-15)
+    assert(dot(sx, dy) ~== 2.0 absTol 1e-15)
+    assert(dot(dy, sx) ~== 2.0 absTol 1e-15)
+    assert(dot(dx, dy) ~== 2.0 absTol 1e-15)
+    assert(dot(dy, dx) ~== 2.0 absTol 1e-15)
+
+    assert(dot(sx, sx) ~== 5.0 absTol 1e-15)
+    assert(dot(dx, dx) ~== 5.0 absTol 1e-15)
+    assert(dot(sx, dx) ~== 5.0 absTol 1e-15)
+    assert(dot(dx, sx) ~== 5.0 absTol 1e-15)
+
+    val sx1 = Vectors.sparse(10, Array(0, 3, 5, 7, 8), Array(1.0, 2.0, 3.0, 4.0, 5.0))
+    val sx2 = Vectors.sparse(10, Array(1, 3, 6, 7, 9), Array(1.0, 2.0, 3.0, 4.0, 5.0))
+    assert(dot(sx1, sx2) ~== 20.0 absTol 1e-15)
+    assert(dot(sx2, sx1) ~== 20.0 absTol 1e-15)
+
+    withClue("vector sizes must match") {
+      intercept[Exception] {
+        dot(sx, Vectors.dense(2.0, 1.0))
+      }
+    }
+  }
+
+  test("gemm") {
+
+    val dA =
+      new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
+    val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
+
+    val B = new DenseMatrix(3, 2, Array(1.0, 0.0, 0.0, 0.0, 2.0, 1.0))
+    val expected = new DenseMatrix(4, 2, Array(0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 3.0))
+
+    assert(dA multiply B ~== expected absTol 1e-15)
+    assert(sA multiply B ~== expected absTol 1e-15)
+
+    val C1 = new DenseMatrix(4, 2, Array(1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0))
+    val C2 = C1.copy
+    val C3 = C1.copy
+    val C4 = C1.copy
+    val C5 = C1.copy
+    val C6 = C1.copy
+    val C7 = C1.copy
+    val C8 = C1.copy
+    val expected2 = new DenseMatrix(4, 2, Array(2.0, 1.0, 4.0, 2.0, 4.0, 0.0, 4.0, 3.0))
+    val expected3 = new DenseMatrix(4, 2, Array(2.0, 2.0, 4.0, 2.0, 8.0, 0.0, 6.0, 6.0))
+
+    gemm(1.0, dA, B, 2.0, C1)
+    gemm(1.0, sA, B, 2.0, C2)
+    gemm(2.0, dA, B, 2.0, C3)
+    gemm(2.0, sA, B, 2.0, C4)
+    assert(C1 ~== expected2 absTol 1e-15)
+    assert(C2 ~== expected2 absTol 1e-15)
+    assert(C3 ~== expected3 absTol 1e-15)
+    assert(C4 ~== expected3 absTol 1e-15)
+
+    withClue("columns of A don't match the rows of B") {
+      intercept[Exception] {
+        gemm(true, false, 1.0, dA, B, 2.0, C1)
+      }
+    }
+
+    val dAT =
+      new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
+    val sAT =
+      new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
+
+    assert(dAT transposeMultiply B ~== expected absTol 1e-15)
+    assert(sAT transposeMultiply B ~== expected absTol 1e-15)
+
+    gemm(true, false, 1.0, dAT, B, 2.0, C5)
+    gemm(true, false, 1.0, sAT, B, 2.0, C6)
+    gemm(true, false, 2.0, dAT, B, 2.0, C7)
+    gemm(true, false, 2.0, sAT, B, 2.0, C8)
+    assert(C5 ~== expected2 absTol 1e-15)
+    assert(C6 ~== expected2 absTol 1e-15)
+    assert(C7 ~== expected3 absTol 1e-15)
+    assert(C8 ~== expected3 absTol 1e-15)
+  }
+
+  test("gemv") {
+
+    val dA =
+      new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
+    val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
+
+    val x = new DenseVector(Array(1.0, 2.0, 3.0))
+    val expected = new DenseVector(Array(4.0, 1.0, 2.0, 9.0))
+
+    assert(dA multiply x ~== expected absTol 1e-15)
+    assert(sA multiply x ~== expected absTol 1e-15)
+
+    val y1 = new DenseVector(Array(1.0, 3.0, 1.0, 0.0))
+    val y2 = y1.copy
+    val y3 = y1.copy
+    val y4 = y1.copy
+    val y5 = y1.copy
+    val y6 = y1.copy
+    val y7 = y1.copy
+    val y8 = y1.copy
+    val expected2 = new DenseVector(Array(6.0, 7.0, 4.0, 9.0))
+    val expected3 = new DenseVector(Array(10.0, 8.0, 6.0, 18.0))
+
+    gemv(1.0, dA, x, 2.0, y1)
+    gemv(1.0, sA, x, 2.0, y2)
+    gemv(2.0, dA, x, 2.0, y3)
+    gemv(2.0, sA, x, 2.0, y4)
+    assert(y1 ~== expected2 absTol 1e-15)
+    assert(y2 ~== expected2 absTol 1e-15)
+    assert(y3 ~== expected3 absTol 1e-15)
+    assert(y4 ~== expected3 absTol 1e-15)
+    withClue("columns of A don't match the rows of B") {
+      intercept[Exception] {
+        gemv(true, 1.0, dA, x, 2.0, y1)
+      }
+    }
+
+    val dAT =
+      new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
+    val sAT =
+      new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
+
+    assert(dAT transposeMultiply x ~== expected absTol 1e-15)
+    assert(sAT transposeMultiply x ~== expected absTol 1e-15)
+
+    gemv(true, 1.0, dAT, x, 2.0, y5)
+    gemv(true, 1.0, sAT, x, 2.0, y6)
+    gemv(true, 2.0, dAT, x, 2.0, y7)
+    gemv(true, 2.0, sAT, x, 2.0, y8)
+    assert(y5 ~== expected2 absTol 1e-15)
+    assert(y6 ~== expected2 absTol 1e-15)
+    assert(y7 ~== expected3 absTol 1e-15)
+    assert(y8 ~== expected3 absTol 1e-15)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala
index 82d49c76ed02b..73a6d3a27d868 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.linalg
 
 import org.scalatest.FunSuite
 
-import breeze.linalg.{DenseMatrix => BDM}
+import breeze.linalg.{DenseMatrix => BDM, CSCMatrix => BSM}
 
 class BreezeMatrixConversionSuite extends FunSuite {
   test("dense matrix to breeze") {
@@ -37,4 +37,26 @@ class BreezeMatrixConversionSuite extends FunSuite {
     assert(mat.numCols === breeze.cols)
     assert(mat.values.eq(breeze.data), "should not copy data")
   }
+
+  test("sparse matrix to breeze") {
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(1, 2, 1, 2)
+    val mat = Matrices.sparse(3, 2, colPtrs, rowIndices, values)
+    val breeze = mat.toBreeze.asInstanceOf[BSM[Double]]
+    assert(breeze.rows === mat.numRows)
+    assert(breeze.cols === mat.numCols)
+    assert(breeze.data.eq(mat.asInstanceOf[SparseMatrix].values), "should not copy data")
+  }
+
+  test("sparse breeze matrix to sparse matrix") {
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(1, 2, 1, 2)
+    val breeze = new BSM[Double](values, 3, 2, colPtrs, rowIndices)
+    val mat = Matrices.fromBreeze(breeze).asInstanceOf[SparseMatrix]
+    assert(mat.numRows === breeze.rows)
+    assert(mat.numCols === breeze.cols)
+    assert(mat.values.eq(breeze.data), "should not copy data")
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index 9c66b4db9f16b..5f8b8c4b72697 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -36,4 +36,80 @@ class MatricesSuite extends FunSuite {
       Matrices.dense(3, 2, Array(0.0, 1.0, 2.0))
     }
   }
+
+  test("sparse matrix construction") {
+    val m = 3
+    val n = 2
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(1, 2, 1, 2)
+    val mat = Matrices.sparse(m, n, colPtrs, rowIndices, values).asInstanceOf[SparseMatrix]
+    assert(mat.numRows === m)
+    assert(mat.numCols === n)
+    assert(mat.values.eq(values), "should not copy data")
+    assert(mat.colPtrs.eq(colPtrs), "should not copy data")
+    assert(mat.rowIndices.eq(rowIndices), "should not copy data")
+  }
+
+  test("sparse matrix construction with wrong number of elements") {
+    intercept[IllegalArgumentException] {
+      Matrices.sparse(3, 2, Array(0, 1), Array(1, 2, 1), Array(0.0, 1.0, 2.0))
+    }
+
+    intercept[IllegalArgumentException] {
+      Matrices.sparse(3, 2, Array(0, 1, 2), Array(1, 2), Array(0.0, 1.0, 2.0))
+    }
+  }
+
+  test("matrix copies are deep copies") {
+    val m = 3
+    val n = 2
+
+    val denseMat = Matrices.dense(m, n, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
+    val denseCopy = denseMat.copy
+
+    assert(!denseMat.toArray.eq(denseCopy.toArray))
+
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(1, 2, 1, 2)
+    val sparseMat = Matrices.sparse(m, n, colPtrs, rowIndices, values)
+    val sparseCopy = sparseMat.copy
+
+    assert(!sparseMat.toArray.eq(sparseCopy.toArray))
+  }
+
+  test("matrix indexing and updating") {
+    val m = 3
+    val n = 2
+    val allValues = Array(0.0, 1.0, 2.0, 3.0, 4.0, 0.0)
+
+    val denseMat = new DenseMatrix(m, n, allValues)
+
+    assert(denseMat(0, 1) === 3.0)
+    assert(denseMat(0, 1) === denseMat.values(3))
+    assert(denseMat(0, 1) === denseMat(3))
+    assert(denseMat(0, 0) === 0.0)
+
+    denseMat.update(0, 0, 10.0)
+    assert(denseMat(0, 0) === 10.0)
+    assert(denseMat.values(0) === 10.0)
+
+    val sparseValues = Array(1.0, 2.0, 3.0, 4.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(1, 2, 0, 1)
+    val sparseMat = new SparseMatrix(m, n, colPtrs, rowIndices, sparseValues)
+
+    assert(sparseMat(0, 1) === 3.0)
+    assert(sparseMat(0, 1) === sparseMat.values(2))
+    assert(sparseMat(0, 0) === 0.0)
+
+    intercept[NoSuchElementException] {
+      sparseMat.update(0, 0, 10.0)
+    }
+
+    sparseMat.update(0, 1, 10.0)
+    assert(sparseMat(0, 1) === 10.0)
+    assert(sparseMat.values(2) === 10.0)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index 7972ceea1fe8a..59cd85eab27d0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.linalg
 
+import breeze.linalg.{DenseMatrix => BDM}
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkException
@@ -125,4 +126,51 @@ class VectorsSuite extends FunSuite {
       }
     }
   }
+
+  test("zeros") {
+    assert(Vectors.zeros(3) === Vectors.dense(0.0, 0.0, 0.0))
+  }
+
+  test("Vector.copy") {
+    val sv = Vectors.sparse(4, Array(0, 2), Array(1.0, 2.0))
+    val svCopy = sv.copy
+    (sv, svCopy) match {
+      case (sv: SparseVector, svCopy: SparseVector) =>
+        assert(sv.size === svCopy.size)
+        assert(sv.indices === svCopy.indices)
+        assert(sv.values === svCopy.values)
+        assert(!sv.indices.eq(svCopy.indices))
+        assert(!sv.values.eq(svCopy.values))
+      case _ =>
+        throw new RuntimeException(s"copy returned ${svCopy.getClass} on ${sv.getClass}.")
+    }
+
+    val dv = Vectors.dense(1.0, 0.0, 2.0)
+    val dvCopy = dv.copy
+    (dv, dvCopy) match {
+      case (dv: DenseVector, dvCopy: DenseVector) =>
+        assert(dv.size === dvCopy.size)
+        assert(dv.values === dvCopy.values)
+        assert(!dv.values.eq(dvCopy.values))
+      case _ =>
+        throw new RuntimeException(s"copy returned ${dvCopy.getClass} on ${dv.getClass}.")
+    }
+  }
+
+  test("VectorUDT") {
+    val dv0 = Vectors.dense(Array.empty[Double])
+    val dv1 = Vectors.dense(1.0, 2.0)
+    val sv0 = Vectors.sparse(2, Array.empty, Array.empty)
+    val sv1 = Vectors.sparse(2, Array(1), Array(2.0))
+    val udt = new VectorUDT()
+    for (v <- Seq(dv0, dv1, sv0, sv1)) {
+      assert(v === udt.deserialize(udt.serialize(v)))
+    }
+  }
+
+  test("fromBreeze") {
+    val x = BDM.zeros[Double](10, 10)
+    val v = Vectors.fromBreeze(x(::, 0))
+    assert(v.size === x.rows)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
index cd45438fb628f..f8709751efce6 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
@@ -21,10 +21,10 @@ import org.scalatest.FunSuite
 
 import breeze.linalg.{DenseMatrix => BDM}
 
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.linalg.Vectors
 
-class CoordinateMatrixSuite extends FunSuite with LocalSparkContext {
+class CoordinateMatrixSuite extends FunSuite with MLlibTestSparkContext {
 
   val m = 5
   val n = 4
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
index f7c46f23b746d..e25bc02b06c9a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
@@ -21,11 +21,11 @@ import org.scalatest.FunSuite
 
 import breeze.linalg.{diag => brzDiag, DenseMatrix => BDM, DenseVector => BDV}
 
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.{Matrices, Vectors}
 
-class IndexedRowMatrixSuite extends FunSuite with LocalSparkContext {
+class IndexedRowMatrixSuite extends FunSuite with MLlibTestSparkContext {
 
   val m = 4
   val n = 3
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
index c9f9acf4c1335..dbf55ff81ca99 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -17,14 +17,15 @@
 
 package org.apache.spark.mllib.linalg.distributed
 
-import org.scalatest.FunSuite
+import scala.util.Random
 
 import breeze.linalg.{DenseVector => BDV, DenseMatrix => BDM, norm => brzNorm, svd => brzSvd}
+import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.util.LocalSparkContext
 import org.apache.spark.mllib.linalg.{Matrices, Vectors, Vector}
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 
-class RowMatrixSuite extends FunSuite with LocalSparkContext {
+class RowMatrixSuite extends FunSuite with MLlibTestSparkContext {
 
   val m = 4
   val n = 3
@@ -94,39 +95,80 @@ class RowMatrixSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test("similar columns") {
+    val colMags = Vectors.dense(Math.sqrt(126), Math.sqrt(66), Math.sqrt(94))
+    val expected = BDM(
+      (0.0, 54.0, 72.0),
+      (0.0, 0.0, 78.0),
+      (0.0, 0.0, 0.0))
+
+    for (i <- 0 until n; j <- 0 until n) {
+      expected(i, j) /= (colMags(i) * colMags(j))
+    }
+
+    for (mat <- Seq(denseMat, sparseMat)) {
+      val G = mat.columnSimilarities(0.11).toBreeze()
+      for (i <- 0 until n; j <- 0 until n) {
+        if (expected(i, j) > 0) {
+          val actual = expected(i, j)
+          val estimate = G(i, j)
+          assert(math.abs(actual - estimate) / actual < 0.2,
+            s"Similarities not close enough: $actual vs $estimate")
+        }
+      }
+    }
+
+    for (mat <- Seq(denseMat, sparseMat)) {
+      val G = mat.columnSimilarities()
+      assert(closeToZero(G.toBreeze() - expected))
+    }
+
+    for (mat <- Seq(denseMat, sparseMat)) {
+      val G = mat.columnSimilaritiesDIMSUM(colMags.toArray, 150.0)
+      assert(closeToZero(G.toBreeze() - expected))
+    }
+  }
+
   test("svd of a full-rank matrix") {
     for (mat <- Seq(denseMat, sparseMat)) {
-      val localMat = mat.toBreeze()
-      val (localU, localSigma, localVt) = brzSvd(localMat)
-      val localV: BDM[Double] = localVt.t.toDenseMatrix
-      for (k <- 1 to n) {
-        val svd = mat.computeSVD(k, computeU = true)
-        val U = svd.U
-        val s = svd.s
-        val V = svd.V
-        assert(U.numRows() === m)
-        assert(U.numCols() === k)
-        assert(s.size === k)
-        assert(V.numRows === n)
-        assert(V.numCols === k)
-        assertColumnEqualUpToSign(U.toBreeze(), localU, k)
-        assertColumnEqualUpToSign(V.toBreeze.asInstanceOf[BDM[Double]], localV, k)
-        assert(closeToZero(s.toBreeze.asInstanceOf[BDV[Double]] - localSigma(0 until k)))
+      for (mode <- Seq("auto", "local-svd", "local-eigs", "dist-eigs")) {
+        val localMat = mat.toBreeze()
+        val brzSvd.SVD(localU, localSigma, localVt) = brzSvd(localMat)
+        val localV: BDM[Double] = localVt.t.toDenseMatrix
+        for (k <- 1 to n) {
+          val skip = (mode == "local-eigs" || mode == "dist-eigs") && k == n
+          if (!skip) {
+            val svd = mat.computeSVD(k, computeU = true, 1e-9, 300, 1e-10, mode)
+            val U = svd.U
+            val s = svd.s
+            val V = svd.V
+            assert(U.numRows() === m)
+            assert(U.numCols() === k)
+            assert(s.size === k)
+            assert(V.numRows === n)
+            assert(V.numCols === k)
+            assertColumnEqualUpToSign(U.toBreeze(), localU, k)
+            assertColumnEqualUpToSign(V.toBreeze.asInstanceOf[BDM[Double]], localV, k)
+            assert(closeToZero(s.toBreeze.asInstanceOf[BDV[Double]] - localSigma(0 until k)))
+          }
+        }
+        val svdWithoutU = mat.computeSVD(1, computeU = false, 1e-9, 300, 1e-10, mode)
+        assert(svdWithoutU.U === null)
       }
-      val svdWithoutU = mat.computeSVD(n)
-      assert(svdWithoutU.U === null)
     }
   }
 
   test("svd of a low-rank matrix") {
-    val rows = sc.parallelize(Array.fill(4)(Vectors.dense(1.0, 1.0)), 2)
-    val mat = new RowMatrix(rows, 4, 2)
-    val svd = mat.computeSVD(2, computeU = true)
-    assert(svd.s.size === 1, "should not return zero singular values")
-    assert(svd.U.numRows() === 4)
-    assert(svd.U.numCols() === 1)
-    assert(svd.V.numRows === 2)
-    assert(svd.V.numCols === 1)
+    val rows = sc.parallelize(Array.fill(4)(Vectors.dense(1.0, 1.0, 1.0)), 2)
+    val mat = new RowMatrix(rows, 4, 3)
+    for (mode <- Seq("auto", "local-svd", "local-eigs", "dist-eigs")) {
+      val svd = mat.computeSVD(2, computeU = true, 1e-6, 300, 1e-10, mode)
+      assert(svd.s.size === 1, s"should not return zero singular values but got ${svd.s}")
+      assert(svd.U.numRows() === 4)
+      assert(svd.U.numCols() === 1)
+      assert(svd.V.numRows === 3)
+      assert(svd.V.numCols === 1)
+    }
   }
 
   def closeToZero(G: BDM[Double]): Boolean = {
@@ -182,7 +224,34 @@ class RowMatrixSuite extends FunSuite with LocalSparkContext {
         assert(summary.numNonzeros === Vectors.dense(3.0, 3.0, 4.0), "nnz mismatch")
         assert(summary.max === Vectors.dense(9.0, 7.0, 8.0), "max mismatch")
         assert(summary.min === Vectors.dense(0.0, 0.0, 1.0), "column mismatch.")
+        assert(summary.normL2 === Vectors.dense(Math.sqrt(126), Math.sqrt(66), Math.sqrt(94)),
+          "magnitude mismatch.")
+        assert(summary.normL1 === Vectors.dense(18.0, 12.0, 16.0), "L1 norm mismatch")
       }
     }
   }
 }
+
+class RowMatrixClusterSuite extends FunSuite with LocalClusterSparkContext {
+
+  var mat: RowMatrix = _
+
+  override def beforeAll() {
+    super.beforeAll()
+    val m = 4
+    val n = 200000
+    val rows = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
+      val random = new Random(idx)
+      iter.map(i => Vectors.dense(Array.fill(n)(random.nextDouble())))
+    }
+    mat = new RowMatrix(rows)
+  }
+
+  test("task size should be small in svd") {
+    val svd = mat.computeSVD(1, computeU = true)
+  }
+
+  test("task size should be small in summarize") {
+    val summary = mat.computeColumnSummaryStatistics()
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
index 951b4f7c6e6f4..86481c6e66200 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
@@ -17,15 +17,15 @@
 
 package org.apache.spark.mllib.optimization
 
-import scala.util.Random
 import scala.collection.JavaConversions._
+import scala.util.Random
 
-import org.scalatest.FunSuite
-import org.scalatest.Matchers
+import org.scalatest.{FunSuite, Matchers}
 
-import org.apache.spark.mllib.regression._
-import org.apache.spark.mllib.util.LocalSparkContext
 import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression._
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
+import org.apache.spark.mllib.util.TestingUtils._
 
 object GradientDescentSuite {
 
@@ -46,7 +46,7 @@ object GradientDescentSuite {
     val rnd = new Random(seed)
     val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian())
 
-    val unifRand = new scala.util.Random(45)
+    val unifRand = new Random(45)
     val rLogis = (0 until nPoints).map { i =>
       val u = unifRand.nextDouble()
       math.log(u) - math.log(1.0-u)
@@ -61,7 +61,7 @@ object GradientDescentSuite {
   }
 }
 
-class GradientDescentSuite extends FunSuite with LocalSparkContext with Matchers {
+class GradientDescentSuite extends FunSuite with MLlibTestSparkContext with Matchers {
 
   test("Assert the loss is decreasing.") {
     val nPoints = 10000
@@ -127,20 +127,38 @@ class GradientDescentSuite extends FunSuite with LocalSparkContext with Matchers
     val (newWeights1, loss1) = GradientDescent.runMiniBatchSGD(
       dataRDD, gradient, updater, 1, 1, regParam1, 1.0, initialWeightsWithIntercept)
 
-    def compareDouble(x: Double, y: Double, tol: Double = 1E-3): Boolean = {
-      math.abs(x - y) / (math.abs(y) + 1e-15) < tol
-    }
-
-    assert(compareDouble(
-      loss1(0),
-      loss0(0) + (math.pow(initialWeightsWithIntercept(0), 2) +
-        math.pow(initialWeightsWithIntercept(1), 2)) / 2),
+    assert(
+      loss1(0) ~= (loss0(0) + (math.pow(initialWeightsWithIntercept(0), 2) +
+        math.pow(initialWeightsWithIntercept(1), 2)) / 2) absTol 1E-5,
       """For non-zero weights, the regVal should be \frac{1}{2}\sum_i w_i^2.""")
 
     assert(
-      compareDouble(newWeights1(0) , newWeights0(0) - initialWeightsWithIntercept(0)) &&
-      compareDouble(newWeights1(1) , newWeights0(1) - initialWeightsWithIntercept(1)),
+      (newWeights1(0) ~= (newWeights0(0) - initialWeightsWithIntercept(0)) absTol 1E-5) &&
+      (newWeights1(1) ~= (newWeights0(1) - initialWeightsWithIntercept(1)) absTol 1E-5),
       "The different between newWeights with/without regularization " +
         "should be initialWeightsWithIntercept.")
   }
 }
+
+class GradientDescentClusterSuite extends FunSuite with LocalClusterSparkContext {
+
+  test("task size should be small") {
+    val m = 4
+    val n = 200000
+    val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
+      val random = new Random(idx)
+      iter.map(i => (1.0, Vectors.dense(Array.fill(n)(random.nextDouble()))))
+    }.cache()
+    // If we serialize data directly in the task closure, the size of the serialized task would be
+    // greater than 1MB and hence Spark would throw an error.
+    val (weights, loss) = GradientDescent.runMiniBatchSGD(
+      points,
+      new LogisticGradient,
+      new SquaredL2Updater,
+      0.1,
+      2,
+      1.0,
+      1.0,
+      Vectors.dense(new Array[Double](n)))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
index fe7a9033cd5f4..70c64775e4c04 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
@@ -17,14 +17,16 @@
 
 package org.apache.spark.mllib.optimization
 
-import org.scalatest.FunSuite
-import org.scalatest.Matchers
+import scala.util.Random
+
+import org.scalatest.{FunSuite, Matchers}
 
-import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
+import org.apache.spark.mllib.util.TestingUtils._
 
-class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
+class LBFGSSuite extends FunSuite with MLlibTestSparkContext with Matchers {
 
   val nPoints = 10000
   val A = 2.0
@@ -48,16 +50,12 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
 
   lazy val dataRDD = sc.parallelize(data, 2).cache()
 
-  def compareDouble(x: Double, y: Double, tol: Double = 1E-3): Boolean = {
-    math.abs(x - y) / (math.abs(y) + 1e-15) < tol
-  }
-
   test("LBFGS loss should be decreasing and match the result of Gradient Descent.") {
     val regParam = 0
 
     val initialWeightsWithIntercept = Vectors.dense(1.0 +: initialWeights.toArray)
     val convergenceTol = 1e-12
-    val maxNumIterations = 10
+    val numIterations = 10
 
     val (_, loss) = LBFGS.runLBFGS(
       dataRDD,
@@ -65,7 +63,7 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
       simpleUpdater,
       numCorrections,
       convergenceTol,
-      maxNumIterations,
+      numIterations,
       regParam,
       initialWeightsWithIntercept)
 
@@ -101,7 +99,7 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
     // Prepare another non-zero weights to compare the loss in the first iteration.
     val initialWeightsWithIntercept = Vectors.dense(0.3, 0.12)
     val convergenceTol = 1e-12
-    val maxNumIterations = 10
+    val numIterations = 10
 
     val (weightLBFGS, lossLBFGS) = LBFGS.runLBFGS(
       dataRDD,
@@ -109,7 +107,7 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
       squaredL2Updater,
       numCorrections,
       convergenceTol,
-      maxNumIterations,
+      numIterations,
       regParam,
       initialWeightsWithIntercept)
 
@@ -125,15 +123,15 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
       miniBatchFrac,
       initialWeightsWithIntercept)
 
-    assert(compareDouble(lossGD(0), lossLBFGS(0)),
+    assert(lossGD(0) ~= lossLBFGS(0) absTol 1E-5,
       "The first losses of LBFGS and GD should be the same.")
 
     // The 2% difference here is based on observation, but is not theoretically guaranteed.
-    assert(compareDouble(lossGD.last, lossLBFGS.last, 0.02),
+    assert(lossGD.last ~= lossLBFGS.last relTol 0.02,
       "The last losses of LBFGS and GD should be within 2% difference.")
 
-    assert(compareDouble(weightLBFGS(0), weightGD(0), 0.02) &&
-      compareDouble(weightLBFGS(1), weightGD(1), 0.02),
+    assert(
+      (weightLBFGS(0) ~= weightGD(0) relTol 0.02) && (weightLBFGS(1) ~= weightGD(1) relTol 0.02),
       "The weight differences between LBFGS and GD should be within 2%.")
   }
 
@@ -142,10 +140,10 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
 
     /**
      * For the first run, we set the convergenceTol to 0.0, so that the algorithm will
-     * run up to the maxNumIterations which is 8 here.
+     * run up to the numIterations which is 8 here.
      */
     val initialWeightsWithIntercept = Vectors.dense(0.0, 0.0)
-    val maxNumIterations = 8
+    val numIterations = 8
     var convergenceTol = 0.0
 
     val (_, lossLBFGS1) = LBFGS.runLBFGS(
@@ -154,7 +152,7 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
       squaredL2Updater,
       numCorrections,
       convergenceTol,
-      maxNumIterations,
+      numIterations,
       regParam,
       initialWeightsWithIntercept)
 
@@ -169,7 +167,7 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
       squaredL2Updater,
       numCorrections,
       convergenceTol,
-      maxNumIterations,
+      numIterations,
       regParam,
       initialWeightsWithIntercept)
 
@@ -184,7 +182,7 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
       squaredL2Updater,
       numCorrections,
       convergenceTol,
-      maxNumIterations,
+      numIterations,
       regParam,
       initialWeightsWithIntercept)
 
@@ -202,12 +200,12 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
     // Prepare another non-zero weights to compare the loss in the first iteration.
     val initialWeightsWithIntercept = Vectors.dense(0.3, 0.12)
     val convergenceTol = 1e-12
-    val maxNumIterations = 10
+    val numIterations = 10
 
     val lbfgsOptimizer = new LBFGS(gradient, squaredL2Updater)
       .setNumCorrections(numCorrections)
       .setConvergenceTol(convergenceTol)
-      .setMaxNumIterations(maxNumIterations)
+      .setNumIterations(numIterations)
       .setRegParam(regParam)
 
     val weightLBFGS = lbfgsOptimizer.optimize(dataRDD, initialWeightsWithIntercept)
@@ -225,8 +223,29 @@ class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
       initialWeightsWithIntercept)
 
     // for class LBFGS and the optimize method, we only look at the weights
-    assert(compareDouble(weightLBFGS(0), weightGD(0), 0.02) &&
-      compareDouble(weightLBFGS(1), weightGD(1), 0.02),
+    assert(
+      (weightLBFGS(0) ~= weightGD(0) relTol 0.02) && (weightLBFGS(1) ~= weightGD(1) relTol 0.02),
       "The weight differences between LBFGS and GD should be within 2%.")
   }
 }
+
+class LBFGSClusterSuite extends FunSuite with LocalClusterSparkContext {
+
+  test("task size should be small") {
+    val m = 10
+    val n = 200000
+    val examples = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
+      val random = new Random(idx)
+      iter.map(i => (1.0, Vectors.dense(Array.fill(n)(random.nextDouble))))
+    }.cache()
+    val lbfgs = new LBFGS(new LogisticGradient, new SquaredL2Updater)
+      .setNumCorrections(1)
+      .setConvergenceTol(1e-12)
+      .setNumIterations(1)
+      .setRegParam(1.0)
+    val random = new Random(0)
+    // If we serialize data directly in the task closure, the size of the serialized task would be
+    // greater than 1MB and hence Spark would throw an error.
+    val weights = lbfgs.optimize(examples, Vectors.dense(Array.fill(n)(random.nextDouble)))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
index bbf385229081a..82c327bd49fcd 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
@@ -21,7 +21,9 @@ import scala.util.Random
 
 import org.scalatest.FunSuite
 
-import org.jblas.{DoubleMatrix, SimpleBlas, NativeBlas}
+import org.jblas.{DoubleMatrix, SimpleBlas}
+
+import org.apache.spark.mllib.util.TestingUtils._
 
 class NNLSSuite extends FunSuite {
   /** Generate an NNLS problem whose optimal solution is the all-ones vector. */
@@ -35,6 +37,12 @@ class NNLSSuite extends FunSuite {
     (ata, atb)
   }
 
+  /** Compute the objective value */
+  def computeObjectiveValue(ata: DoubleMatrix, atb: DoubleMatrix, x: DoubleMatrix): Double = {
+    val res = (x.transpose().mmul(ata).mmul(x)).mul(0.5).sub(atb.dot(x))
+    res.get(0)
+  }
+
   test("NNLS: exact solution cases") {
     val n = 20
     val rand = new Random(12346)
@@ -73,8 +81,32 @@ class NNLSSuite extends FunSuite {
     val ws = NNLS.createWorkspace(n)
     val x = NNLS.solve(ata, atb, ws)
     for (i <- 0 until n) {
-      assert(Math.abs(x(i) - goodx(i)) < 1e-3)
+      assert(x(i) ~== goodx(i) absTol 1E-3)
       assert(x(i) >= 0)
     }
   }
+
+  test("NNLS: objective value test") {
+    val n = 5
+    val ata = new DoubleMatrix(5, 5
+      , 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283
+      , 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884
+      , -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049
+      , 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819
+      , -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814
+    )
+    val atb = new DoubleMatrix(5, 1,
+      -31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017)
+
+    /** reference solution obtained from matlab function quadprog */
+    val refx = new DoubleMatrix(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
+    val refObj = computeObjectiveValue(ata, atb, refx)
+
+
+    val ws = NNLS.createWorkspace(n)
+    val x = new DoubleMatrix(NNLS.solve(ata, atb, ws))
+    val obj = computeObjectiveValue(ata, atb, x)
+
+    assert(obj < refObj + 1E-5)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
new file mode 100644
index 0000000000000..3df7c128af5ab
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.random
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.util.StatCounter
+
+// TODO update tests to use TestingUtils for floating point comparison after PR 1367 is merged
+class RandomDataGeneratorSuite extends FunSuite {
+
+  def apiChecks(gen: RandomDataGenerator[Double]) {
+
+    // resetting seed should generate the same sequence of random numbers
+    gen.setSeed(42L)
+    val array1 = (0 until 1000).map(_ => gen.nextValue())
+    gen.setSeed(42L)
+    val array2 = (0 until 1000).map(_ => gen.nextValue())
+    assert(array1.equals(array2))
+
+    // newInstance should contain a difference instance of the rng
+    // i.e. setting difference seeds for difference instances produces different sequences of
+    // random numbers.
+    val gen2 = gen.copy()
+    gen.setSeed(0L)
+    val array3 = (0 until 1000).map(_ => gen.nextValue())
+    gen2.setSeed(1L)
+    val array4 = (0 until 1000).map(_ => gen2.nextValue())
+    // Compare arrays instead of elements since individual elements can coincide by chance but the
+    // sequences should differ given two different seeds.
+    assert(!array3.equals(array4))
+
+    // test that setting the same seed in the copied instance produces the same sequence of numbers
+    gen.setSeed(0L)
+    val array5 = (0 until 1000).map(_ => gen.nextValue())
+    gen2.setSeed(0L)
+    val array6 = (0 until 1000).map(_ => gen2.nextValue())
+    assert(array5.equals(array6))
+  }
+
+  def distributionChecks(gen: RandomDataGenerator[Double],
+      mean: Double = 0.0,
+      stddev: Double = 1.0,
+      epsilon: Double = 0.01) {
+    for (seed <- 0 until 5) {
+      gen.setSeed(seed.toLong)
+      val sample = (0 until 100000).map { _ => gen.nextValue()}
+      val stats = new StatCounter(sample)
+      assert(math.abs(stats.mean - mean) < epsilon)
+      assert(math.abs(stats.stdev - stddev) < epsilon)
+    }
+  }
+
+  test("UniformGenerator") {
+    val uniform = new UniformGenerator()
+    apiChecks(uniform)
+    // Stddev of uniform distribution = (ub - lb) / math.sqrt(12)
+    distributionChecks(uniform, 0.5, 1 / math.sqrt(12))
+  }
+
+  test("StandardNormalGenerator") {
+    val normal = new StandardNormalGenerator()
+    apiChecks(normal)
+    distributionChecks(normal, 0.0, 1.0)
+  }
+
+  test("PoissonGenerator") {
+    // mean = 0.0 will not pass the API checks since 0.0 is always deterministically produced.
+    for (mean <- List(1.0, 5.0, 100.0)) {
+      val poisson = new PoissonGenerator(mean)
+      apiChecks(poisson)
+      distributionChecks(poisson, mean, math.sqrt(mean), 0.1)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
new file mode 100644
index 0000000000000..ea5889b3ecd5e
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.random
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.rdd.{RandomRDDPartition, RandomRDD}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.StatCounter
+
+/*
+ * Note: avoid including APIs that do not set the seed for the RNG in unit tests
+ * in order to guarantee deterministic behavior.
+ *
+ * TODO update tests to use TestingUtils for floating point comparison after PR 1367 is merged
+ */
+class RandomRDDsSuite extends FunSuite with MLlibTestSparkContext with Serializable {
+
+  def testGeneratedRDD(rdd: RDD[Double],
+      expectedSize: Long,
+      expectedNumPartitions: Int,
+      expectedMean: Double,
+      expectedStddev: Double,
+      epsilon: Double = 0.01) {
+    val stats = rdd.stats()
+    assert(expectedSize === stats.count)
+    assert(expectedNumPartitions === rdd.partitions.size)
+    assert(math.abs(stats.mean - expectedMean) < epsilon)
+    assert(math.abs(stats.stdev - expectedStddev) < epsilon)
+  }
+
+  // assume test RDDs are small
+  def testGeneratedVectorRDD(rdd: RDD[Vector],
+      expectedRows: Long,
+      expectedColumns: Int,
+      expectedNumPartitions: Int,
+      expectedMean: Double,
+      expectedStddev: Double,
+      epsilon: Double = 0.01) {
+    assert(expectedNumPartitions === rdd.partitions.size)
+    val values = new ArrayBuffer[Double]()
+    rdd.collect.foreach { vector => {
+      assert(vector.size === expectedColumns)
+      values ++= vector.toArray
+    }}
+    assert(expectedRows === values.size / expectedColumns)
+    val stats = new StatCounter(values)
+    assert(math.abs(stats.mean - expectedMean) < epsilon)
+    assert(math.abs(stats.stdev - expectedStddev) < epsilon)
+  }
+
+  test("RandomRDD sizes") {
+
+    // some cases where size % numParts != 0 to test getPartitions behaves correctly
+    for ((size, numPartitions) <- List((10000, 6), (12345, 1), (1000, 101))) {
+      val rdd = new RandomRDD(sc, size, numPartitions, new UniformGenerator, 0L)
+      assert(rdd.count() === size)
+      assert(rdd.partitions.size === numPartitions)
+
+      // check that partition sizes are balanced
+      val partSizes = rdd.partitions.map(p =>
+        p.asInstanceOf[RandomRDDPartition[Double]].size.toDouble)
+
+      val partStats = new StatCounter(partSizes)
+      assert(partStats.max - partStats.min <= 1)
+    }
+
+    // size > Int.MaxValue
+    val size = Int.MaxValue.toLong * 100L
+    val numPartitions = 101
+    val rdd = new RandomRDD(sc, size, numPartitions, new UniformGenerator, 0L)
+    assert(rdd.partitions.size === numPartitions)
+    val count = rdd.partitions.foldLeft(0L) { (count, part) =>
+      count + part.asInstanceOf[RandomRDDPartition[Double]].size
+    }
+    assert(count === size)
+
+    // size needs to be positive
+    intercept[IllegalArgumentException] { new RandomRDD(sc, 0, 10, new UniformGenerator, 0L) }
+
+    // numPartitions needs to be positive
+    intercept[IllegalArgumentException] { new RandomRDD(sc, 100, 0, new UniformGenerator, 0L) }
+
+    // partition size needs to be <= Int.MaxValue
+    intercept[IllegalArgumentException] {
+      new RandomRDD(sc, Int.MaxValue.toLong * 100L, 99, new UniformGenerator, 0L)
+    }
+  }
+
+  test("randomRDD for different distributions") {
+    val size = 100000L
+    val numPartitions = 10
+    val poissonMean = 100.0
+
+    for (seed <- 0 until 5) {
+      val uniform = RandomRDDs.uniformRDD(sc, size, numPartitions, seed)
+      testGeneratedRDD(uniform, size, numPartitions, 0.5, 1 / math.sqrt(12))
+
+      val normal = RandomRDDs.normalRDD(sc, size, numPartitions, seed)
+      testGeneratedRDD(normal, size, numPartitions, 0.0, 1.0)
+
+      val poisson = RandomRDDs.poissonRDD(sc, poissonMean, size, numPartitions, seed)
+      testGeneratedRDD(poisson, size, numPartitions, poissonMean, math.sqrt(poissonMean), 0.1)
+    }
+
+    // mock distribution to check that partitions have unique seeds
+    val random = RandomRDDs.randomRDD(sc, new MockDistro(), 1000L, 1000, 0L)
+    assert(random.collect.size === random.collect.distinct.size)
+  }
+
+  test("randomVectorRDD for different distributions") {
+    val rows = 1000L
+    val cols = 100
+    val parts = 10
+    val poissonMean = 100.0
+
+    for (seed <- 0 until 5) {
+      val uniform = RandomRDDs.uniformVectorRDD(sc, rows, cols, parts, seed)
+      testGeneratedVectorRDD(uniform, rows, cols, parts, 0.5, 1 / math.sqrt(12))
+
+      val normal = RandomRDDs.normalVectorRDD(sc, rows, cols, parts, seed)
+      testGeneratedVectorRDD(normal, rows, cols, parts, 0.0, 1.0)
+
+      val poisson = RandomRDDs.poissonVectorRDD(sc, poissonMean, rows, cols, parts, seed)
+      testGeneratedVectorRDD(poisson, rows, cols, parts, poissonMean, math.sqrt(poissonMean), 0.1)
+    }
+  }
+}
+
+private[random] class MockDistro extends RandomDataGenerator[Double] {
+
+  var seed = 0L
+
+  // This allows us to check that each partition has a different seed
+  override def nextValue(): Double = seed.toDouble
+
+  override def setSeed(seed: Long) = this.seed = seed
+
+  override def copy(): MockDistro = new MockDistro
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala
index 3f3b10dfff35e..681ce9263933b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala
@@ -19,10 +19,10 @@ package org.apache.spark.mllib.rdd
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.rdd.RDDFunctions._
 
-class RDDFunctionsSuite extends FunSuite with LocalSparkContext {
+class RDDFunctionsSuite extends FunSuite with MLlibTestSparkContext {
 
   test("sliding") {
     val data = 0 until 6
@@ -42,8 +42,26 @@ class RDDFunctionsSuite extends FunSuite with LocalSparkContext {
     val data = Seq(Seq(1, 2, 3), Seq.empty[Int], Seq(4), Seq.empty[Int], Seq(5, 6, 7))
     val rdd = sc.parallelize(data, data.length).flatMap(s => s)
     assert(rdd.partitions.size === data.length)
-    val sliding = rdd.sliding(3)
-    val expected = data.flatMap(x => x).sliding(3).toList
-    assert(sliding.collect().toList === expected)
+    val sliding = rdd.sliding(3).collect().toSeq.map(_.toSeq)
+    val expected = data.flatMap(x => x).sliding(3).toSeq.map(_.toSeq)
+    assert(sliding === expected)
+  }
+
+  test("treeAggregate") {
+    val rdd = sc.makeRDD(-1000 until 1000, 10)
+    def seqOp = (c: Long, x: Int) => c + x
+    def combOp = (c1: Long, c2: Long) => c1 + c2
+    for (depth <- 1 until 10) {
+      val sum = rdd.treeAggregate(0L)(seqOp, combOp, depth)
+      assert(sum === -1000L)
+    }
+  }
+
+  test("treeReduce") {
+    val rdd = sc.makeRDD(-1000 until 1000, 10)
+    for (depth <- 1 until 10) {
+      val sum = rdd.treeReduce(_ + _, depth)
+      assert(sum === -1000)
+    }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index 81bebec8c7a39..603d0ad127b86 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -22,11 +22,11 @@ import scala.math.abs
 import scala.util.Random
 
 import org.scalatest.FunSuite
-
 import org.jblas.DoubleMatrix
 
-import org.apache.spark.mllib.util.LocalSparkContext
 import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.recommendation.ALS.BlockStats
 
 object ALSSuite {
 
@@ -67,8 +67,10 @@ object ALSSuite {
       case true =>
         // Generate raw values from [0,9], or if negativeWeights, from [-2,7]
         val raw = new DoubleMatrix(users, products,
-          Array.fill(users * products)((if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble): _*)
-        val prefs = new DoubleMatrix(users, products, raw.data.map(v => if (v > 0) 1.0 else 0.0): _*)
+          Array.fill(users * products)(
+            (if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble): _*)
+        val prefs =
+          new DoubleMatrix(users, products, raw.data.map(v => if (v > 0) 1.0 else 0.0): _*)
         (raw, prefs)
       case false => (userMatrix.mmul(productMatrix), null)
     }
@@ -83,7 +85,7 @@ object ALSSuite {
 }
 
 
-class ALSSuite extends FunSuite with LocalSparkContext {
+class ALSSuite extends FunSuite with MLlibTestSparkContext {
 
   test("rank-1 matrices") {
     testALS(50, 100, 1, 15, 0.7, 0.3)
@@ -160,6 +162,22 @@ class ALSSuite extends FunSuite with LocalSparkContext {
     testALS(100, 200, 2, 15, 0.7, 0.4, false, false, false, -1, -1, false)
   }
 
+  test("analyze one user block and one product block") {
+    val localRatings = Seq(
+      Rating(0, 100, 1.0),
+      Rating(0, 101, 2.0),
+      Rating(0, 102, 3.0),
+      Rating(1, 102, 4.0),
+      Rating(2, 103, 5.0))
+    val ratings = sc.makeRDD(localRatings, 2)
+    val stats = ALS.analyzeBlocks(ratings, 1, 1)
+    assert(stats.size === 2)
+    assert(stats(0) === BlockStats("user", 0, 3, 5, 4, 3))
+    assert(stats(1) === BlockStats("product", 0, 4, 5, 3, 4))
+  }
+
+  // TODO: add tests for analyzing multiple user/product blocks
+
   /**
    * Test if we can correctly factorize R = U * P where U and P are of known rank.
    *
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
index d9308aaba6ee1..110c44a7193fd 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
@@ -28,12 +28,12 @@ class LabeledPointSuite extends FunSuite {
       LabeledPoint(1.0, Vectors.dense(1.0, 0.0)),
       LabeledPoint(0.0, Vectors.sparse(2, Array(1), Array(-1.0))))
     points.foreach { p =>
-      assert(p === LabeledPointParser.parse(p.toString))
+      assert(p === LabeledPoint.parse(p.toString))
     }
   }
 
   test("parse labeled points with v0.9 format") {
-    val point = LabeledPointParser.parse("1.0,1.0 0.0 -2.0")
+    val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0")
     assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0)))
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
index bfa42959c8ead..2668dcc14a842 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.mllib.regression
 
+import scala.util.Random
+
 import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.util.{LinearDataGenerator, LocalSparkContext}
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, LinearDataGenerator,
+  MLlibTestSparkContext}
 
-class LassoSuite extends FunSuite with LocalSparkContext {
+class LassoSuite extends FunSuite with MLlibTestSparkContext {
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
@@ -113,3 +116,19 @@ class LassoSuite extends FunSuite with LocalSparkContext {
     validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
   }
 }
+
+class LassoClusterSuite extends FunSuite with LocalClusterSparkContext {
+
+  test("task size should be small in both training and prediction") {
+    val m = 4
+    val n = 200000
+    val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
+      val random = new Random(idx)
+      iter.map(i => LabeledPoint(1.0, Vectors.dense(Array.fill(n)(random.nextDouble()))))
+    }.cache()
+    // If we serialize data directly in the task closure, the size of the serialized task would be
+    // greater than 1MB and hence Spark would throw an error.
+    val model = LassoWithSGD.train(points, 2)
+    val predictions = model.predict(points.map(_.features))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
index 7aaad7d7a3e39..864622a9296a6 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.mllib.regression
 
+import scala.util.Random
+
 import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.util.{LinearDataGenerator, LocalSparkContext}
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, LinearDataGenerator,
+  MLlibTestSparkContext}
 
-class LinearRegressionSuite extends FunSuite with LocalSparkContext {
+class LinearRegressionSuite extends FunSuite with MLlibTestSparkContext {
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
@@ -122,3 +125,19 @@ class LinearRegressionSuite extends FunSuite with LocalSparkContext {
       sparseValidationData.map(row => model.predict(row.features)), sparseValidationData)
   }
 }
+
+class LinearRegressionClusterSuite extends FunSuite with LocalClusterSparkContext {
+
+  test("task size should be small in both training and prediction") {
+    val m = 4
+    val n = 200000
+    val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
+      val random = new Random(idx)
+      iter.map(i => LabeledPoint(1.0, Vectors.dense(Array.fill(n)(random.nextDouble()))))
+    }.cache()
+    // If we serialize data directly in the task closure, the size of the serialized task would be
+    // greater than 1MB and hence Spark would throw an error.
+    val model = LinearRegressionWithSGD.train(points, 2)
+    val predictions = model.predict(points.map(_.features))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index 67768e17fbe6d..18d3bf5ea4eca 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -17,13 +17,16 @@
 
 package org.apache.spark.mllib.regression
 
-import org.scalatest.FunSuite
+import scala.util.Random
 
 import org.jblas.DoubleMatrix
+import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.util.{LinearDataGenerator, LocalSparkContext}
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, LinearDataGenerator,
+  MLlibTestSparkContext}
 
-class RidgeRegressionSuite extends FunSuite with LocalSparkContext {
+class RidgeRegressionSuite extends FunSuite with MLlibTestSparkContext {
 
   def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]) = {
     predictions.zip(input).map { case (prediction, expected) =>
@@ -73,3 +76,19 @@ class RidgeRegressionSuite extends FunSuite with LocalSparkContext {
       "ridgeError (" + ridgeErr + ") was not less than linearError(" + linearErr + ")")
   }
 }
+
+class RidgeRegressionClusterSuite extends FunSuite with LocalClusterSparkContext {
+
+  test("task size should be small in both training and prediction") {
+    val m = 4
+    val n = 200000
+    val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
+      val random = new Random(idx)
+      iter.map(i => LabeledPoint(1.0, Vectors.dense(Array.fill(n)(random.nextDouble()))))
+    }.cache()
+    // If we serialize data directly in the task closure, the size of the serialized task would be
+    // greater than 1MB and hence Spark would throw an error.
+    val model = RidgeRegressionWithSGD.train(points, 2)
+    val predictions = model.predict(points.map(_.features))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
new file mode 100644
index 0000000000000..03b71301e9ab1
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.LinearDataGenerator
+import org.apache.spark.streaming.dstream.DStream
+import org.apache.spark.streaming.TestSuiteBase
+
+class StreamingLinearRegressionSuite extends FunSuite with TestSuiteBase {
+
+  // use longer wait time to ensure job completion
+  override def maxWaitTimeMillis = 20000
+
+  // Assert that two values are equal within tolerance epsilon
+  def assertEqual(v1: Double, v2: Double, epsilon: Double) {
+    def errorMessage = v1.toString + " did not equal " + v2.toString
+    assert(math.abs(v1-v2) <= epsilon, errorMessage)
+  }
+
+  // Assert that model predictions are correct
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+    val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
+      // A prediction is off if the prediction is more than 0.5 away from expected value.
+      math.abs(prediction - expected.label) > 0.5
+    }
+    // At least 80% of the predictions should be on.
+    assert(numOffPredictions < input.length / 5)
+  }
+
+  // Test if we can accurately learn Y = 10*X1 + 10*X2 on streaming data
+  test("parameter accuracy") {
+    // create model
+    val model = new StreamingLinearRegressionWithSGD()
+      .setInitialWeights(Vectors.dense(0.0, 0.0))
+      .setStepSize(0.1)
+      .setNumIterations(25)
+
+    // generate sequence of simulated data
+    val numBatches = 10
+    val input = (0 until numBatches).map { i =>
+      LinearDataGenerator.generateLinearInput(0.0, Array(10.0, 10.0), 100, 42 * (i + 1))
+    }
+
+    // apply model training to input stream
+    val ssc = setupStreams(input, (inputDStream: DStream[LabeledPoint]) => {
+      model.trainOn(inputDStream)
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
+
+    // check accuracy of final parameter estimates
+    assertEqual(model.latestModel().intercept, 0.0, 0.1)
+    assertEqual(model.latestModel().weights(0), 10.0, 0.1)
+    assertEqual(model.latestModel().weights(1), 10.0, 0.1)
+
+    // check accuracy of predictions
+    val validationData = LinearDataGenerator.generateLinearInput(0.0, Array(10.0, 10.0), 100, 17)
+    validatePrediction(validationData.map(row => model.latestModel().predict(row.features)),
+      validationData)
+  }
+
+  // Test that parameter estimates improve when learning Y = 10*X1 on streaming data
+  test("parameter convergence") {
+    // create model
+    val model = new StreamingLinearRegressionWithSGD()
+      .setInitialWeights(Vectors.dense(0.0))
+      .setStepSize(0.1)
+      .setNumIterations(25)
+
+    // generate sequence of simulated data
+    val numBatches = 10
+    val input = (0 until numBatches).map { i =>
+      LinearDataGenerator.generateLinearInput(0.0, Array(10.0), 100, 42 * (i + 1))
+    }
+
+    // create buffer to store intermediate fits
+    val history = new ArrayBuffer[Double](numBatches)
+
+    // apply model training to input stream, storing the intermediate results
+    // (we add a count to ensure the result is a DStream)
+    val ssc = setupStreams(input, (inputDStream: DStream[LabeledPoint]) => {
+      model.trainOn(inputDStream)
+      inputDStream.foreachRDD(x => history.append(math.abs(model.latestModel().weights(0) - 10.0)))
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
+
+    // compute change in error
+    val deltas = history.drop(1).zip(history.dropRight(1))
+    // check error stability (it always either shrinks, or increases with small tol)
+    assert(deltas.forall(x => (x._1 - x._2) <= 0.1))
+    // check that error shrunk on at least 2 batches
+    assert(deltas.map(x => if ((x._1 - x._2) < 0) 1 else 0).sum > 1)
+  }
+
+  // Test predictions on a stream
+  test("predictions") {
+    // create model initialized with true weights
+    val model = new StreamingLinearRegressionWithSGD()
+      .setInitialWeights(Vectors.dense(10.0, 10.0))
+      .setStepSize(0.1)
+      .setNumIterations(25)
+
+    // generate sequence of simulated data for testing
+    val numBatches = 10
+    val nPoints = 100
+    val testInput = (0 until numBatches).map { i =>
+      LinearDataGenerator.generateLinearInput(0.0, Array(10.0, 10.0), nPoints, 42 * (i + 1))
+    }
+
+    // apply model predictions to test stream
+    val ssc = setupStreams(testInput, (inputDStream: DStream[LabeledPoint]) => {
+      model.predictOnValues(inputDStream.map(x => (x.label, x.features)))
+    })
+    // collect the output as (true, estimated) tuples
+    val output: Seq[Seq[(Double, Double)]] = runStreams(ssc, numBatches, numBatches)
+
+    // compute the mean absolute error and check that it's always less than 0.1
+    val errors = output.map(batch => batch.map(p => math.abs(p._1 - p._2)).sum / nPoints)
+    assert(errors.forall(x => x <= 0.1))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
new file mode 100644
index 0000000000000..d20a09b4b4925
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat
+
+import org.scalatest.FunSuite
+
+import breeze.linalg.{DenseMatrix => BDM, Matrix => BM}
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.stat.correlation.{Correlations, PearsonCorrelation,
+  SpearmanCorrelation}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+class CorrelationSuite extends FunSuite with MLlibTestSparkContext {
+
+  // test input data
+  val xData = Array(1.0, 0.0, -2.0)
+  val yData = Array(4.0, 5.0, 3.0)
+  val zeros = new Array[Double](3)
+  val data = Seq(
+    Vectors.dense(1.0, 0.0, 0.0, -2.0),
+    Vectors.dense(4.0, 5.0, 0.0, 3.0),
+    Vectors.dense(6.0, 7.0, 0.0, 8.0),
+    Vectors.dense(9.0, 0.0, 0.0, 1.0)
+  )
+
+  test("corr(x, y) pearson, 1 value in data") {
+    val x = sc.parallelize(Array(1.0))
+    val y = sc.parallelize(Array(4.0))
+    intercept[RuntimeException] {
+      Statistics.corr(x, y, "pearson")
+    }
+    intercept[RuntimeException] {
+      Statistics.corr(x, y, "spearman")
+    }
+  }
+
+  test("corr(x, y) default, pearson") {
+    val x = sc.parallelize(xData)
+    val y = sc.parallelize(yData)
+    val expected = 0.6546537
+    val default = Statistics.corr(x, y)
+    val p1 = Statistics.corr(x, y, "pearson")
+    assert(approxEqual(expected, default))
+    assert(approxEqual(expected, p1))
+
+    // numPartitions >= size for input RDDs
+    for (numParts <- List(xData.size, xData.size * 2)) {
+      val x1 = sc.parallelize(xData, numParts)
+      val y1 = sc.parallelize(yData, numParts)
+      val p2 = Statistics.corr(x1, y1)
+      assert(approxEqual(expected, p2))
+    }
+
+    // RDD of zero variance
+    val z = sc.parallelize(zeros)
+    assert(Statistics.corr(x, z).isNaN)
+  }
+
+  test("corr(x, y) spearman") {
+    val x = sc.parallelize(xData)
+    val y = sc.parallelize(yData)
+    val expected = 0.5
+    val s1 = Statistics.corr(x, y, "spearman")
+    assert(approxEqual(expected, s1))
+
+    // numPartitions >= size for input RDDs
+    for (numParts <- List(xData.size, xData.size * 2)) {
+      val x1 = sc.parallelize(xData, numParts)
+      val y1 = sc.parallelize(yData, numParts)
+      val s2 = Statistics.corr(x1, y1, "spearman")
+      assert(approxEqual(expected, s2))
+    }
+
+    // RDD of zero variance => zero variance in ranks
+    val z = sc.parallelize(zeros)
+    assert(Statistics.corr(x, z, "spearman").isNaN)
+  }
+
+  test("corr(X) default, pearson") {
+    val X = sc.parallelize(data)
+    val defaultMat = Statistics.corr(X)
+    val pearsonMat = Statistics.corr(X, "pearson")
+    val expected = BDM(
+      (1.00000000, 0.05564149, Double.NaN, 0.4004714),
+      (0.05564149, 1.00000000, Double.NaN, 0.9135959),
+      (Double.NaN, Double.NaN, 1.00000000, Double.NaN),
+      (0.40047142, 0.91359586, Double.NaN,1.0000000))
+    assert(matrixApproxEqual(defaultMat.toBreeze, expected))
+    assert(matrixApproxEqual(pearsonMat.toBreeze, expected))
+  }
+
+  test("corr(X) spearman") {
+    val X = sc.parallelize(data)
+    val spearmanMat = Statistics.corr(X, "spearman")
+    val expected = BDM(
+      (1.0000000,  0.1054093,  Double.NaN, 0.4000000),
+      (0.1054093,  1.0000000,  Double.NaN, 0.9486833),
+      (Double.NaN, Double.NaN, 1.00000000, Double.NaN),
+      (0.4000000,  0.9486833,  Double.NaN, 1.0000000))
+    assert(matrixApproxEqual(spearmanMat.toBreeze, expected))
+  }
+
+  test("method identification") {
+    val pearson = PearsonCorrelation
+    val spearman = SpearmanCorrelation
+
+    assert(Correlations.getCorrelationFromName("pearson") === pearson)
+    assert(Correlations.getCorrelationFromName("spearman") === spearman)
+
+    // Should throw IllegalArgumentException
+    try {
+      Correlations.getCorrelationFromName("kendall")
+      assert(false)
+    } catch {
+      case ie: IllegalArgumentException =>
+    }
+  }
+
+  def approxEqual(v1: Double, v2: Double, threshold: Double = 1e-6): Boolean = {
+    if (v1.isNaN) {
+      v2.isNaN
+    } else {
+      math.abs(v1 - v2) <= threshold
+    }
+  }
+
+  def matrixApproxEqual(A: BM[Double], B: BM[Double], threshold: Double = 1e-6): Boolean = {
+    for (i <- 0 until A.rows; j <- 0 until A.cols) {
+      if (!approxEqual(A(i, j), B(i, j), threshold)) {
+        println("i, j = " + i + ", " + j + " actual: " + A(i, j) + " expected:" + B(i, j))
+        return false
+      }
+    }
+    true
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
new file mode 100644
index 0000000000000..15418e6035965
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat
+
+import java.util.Random
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkException
+import org.apache.spark.mllib.linalg.{DenseVector, Matrices, Vectors}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.stat.test.ChiSqTest
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class HypothesisTestSuite extends FunSuite with MLlibTestSparkContext {
+
+  test("chi squared pearson goodness of fit") {
+
+    val observed = new DenseVector(Array[Double](4, 6, 5))
+    val pearson = Statistics.chiSqTest(observed)
+
+    // Results validated against the R command `chisq.test(c(4, 6, 5), p=c(1/3, 1/3, 1/3))`
+    assert(pearson.statistic === 0.4)
+    assert(pearson.degreesOfFreedom === 2)
+    assert(pearson.pValue ~== 0.8187 relTol 1e-4)
+    assert(pearson.method === ChiSqTest.PEARSON.name)
+    assert(pearson.nullHypothesis === ChiSqTest.NullHypothesis.goodnessOfFit.toString)
+
+    // different expected and observed sum
+    val observed1 = new DenseVector(Array[Double](21, 38, 43, 80))
+    val expected1 = new DenseVector(Array[Double](3, 5, 7, 20))
+    val pearson1 = Statistics.chiSqTest(observed1, expected1)
+
+    // Results validated against the R command
+    // `chisq.test(c(21, 38, 43, 80), p=c(3/35, 1/7, 1/5, 4/7))`
+    assert(pearson1.statistic ~== 14.1429 relTol 1e-4)
+    assert(pearson1.degreesOfFreedom === 3)
+    assert(pearson1.pValue ~== 0.002717 relTol 1e-4)
+    assert(pearson1.method === ChiSqTest.PEARSON.name)
+    assert(pearson1.nullHypothesis === ChiSqTest.NullHypothesis.goodnessOfFit.toString)
+
+    // Vectors with different sizes
+    val observed3 = new DenseVector(Array(1.0, 2.0, 3.0))
+    val expected3 = new DenseVector(Array(1.0, 2.0, 3.0, 4.0))
+    intercept[IllegalArgumentException](Statistics.chiSqTest(observed3, expected3))
+
+    // negative counts in observed
+    val negObs = new DenseVector(Array(1.0, 2.0, 3.0, -4.0))
+    intercept[IllegalArgumentException](Statistics.chiSqTest(negObs, expected1))
+
+    // count = 0.0 in expected but not observed
+    val zeroExpected = new DenseVector(Array(1.0, 0.0, 3.0))
+    val inf = Statistics.chiSqTest(observed, zeroExpected)
+    assert(inf.statistic === Double.PositiveInfinity)
+    assert(inf.degreesOfFreedom === 2)
+    assert(inf.pValue === 0.0)
+    assert(inf.method === ChiSqTest.PEARSON.name)
+    assert(inf.nullHypothesis === ChiSqTest.NullHypothesis.goodnessOfFit.toString)
+
+    // 0.0 in expected and observed simultaneously
+    val zeroObserved = new DenseVector(Array(2.0, 0.0, 1.0))
+    intercept[IllegalArgumentException](Statistics.chiSqTest(zeroObserved, zeroExpected))
+  }
+
+  test("chi squared pearson matrix independence") {
+    val data = Array(40.0, 24.0, 29.0, 56.0, 32.0, 42.0, 31.0, 10.0, 0.0, 30.0, 15.0, 12.0)
+    // [[40.0, 56.0, 31.0, 30.0],
+    //  [24.0, 32.0, 10.0, 15.0],
+    //  [29.0, 42.0, 0.0,  12.0]]
+    val chi = Statistics.chiSqTest(Matrices.dense(3, 4, data))
+    // Results validated against R command
+    // `chisq.test(rbind(c(40, 56, 31, 30),c(24, 32, 10, 15), c(29, 42, 0, 12)))`
+    assert(chi.statistic ~== 21.9958 relTol 1e-4)
+    assert(chi.degreesOfFreedom === 6)
+    assert(chi.pValue ~== 0.001213 relTol 1e-4)
+    assert(chi.method === ChiSqTest.PEARSON.name)
+    assert(chi.nullHypothesis === ChiSqTest.NullHypothesis.independence.toString)
+
+    // Negative counts
+    val negCounts = Array(4.0, 5.0, 3.0, -3.0)
+    intercept[IllegalArgumentException](Statistics.chiSqTest(Matrices.dense(2, 2, negCounts)))
+
+    // Row sum = 0.0
+    val rowZero = Array(0.0, 1.0, 0.0, 2.0)
+    intercept[IllegalArgumentException](Statistics.chiSqTest(Matrices.dense(2, 2, rowZero)))
+
+    // Column sum  = 0.0
+    val colZero = Array(0.0, 0.0, 2.0, 2.0)
+    // IllegalArgumentException thrown here since it's thrown on driver, not inside a task
+    intercept[IllegalArgumentException](Statistics.chiSqTest(Matrices.dense(2, 2, colZero)))
+  }
+
+  test("chi squared pearson RDD[LabeledPoint]") {
+    // labels: 1.0 (2 / 6), 0.0 (4 / 6)
+    // feature1: 0.5 (1 / 6), 1.5 (2 / 6), 3.5 (3 / 6)
+    // feature2: 10.0 (1 / 6), 20.0 (1 / 6), 30.0 (2 / 6), 40.0 (2 / 6)
+    val data = Seq(
+      LabeledPoint(0.0, Vectors.dense(0.5, 10.0)),
+      LabeledPoint(0.0, Vectors.dense(1.5, 20.0)),
+      LabeledPoint(1.0, Vectors.dense(1.5, 30.0)),
+      LabeledPoint(0.0, Vectors.dense(3.5, 30.0)),
+      LabeledPoint(0.0, Vectors.dense(3.5, 40.0)),
+      LabeledPoint(1.0, Vectors.dense(3.5, 40.0)))
+    for (numParts <- List(2, 4, 6, 8)) {
+      val chi = Statistics.chiSqTest(sc.parallelize(data, numParts))
+      val feature1 = chi(0)
+      assert(feature1.statistic === 0.75)
+      assert(feature1.degreesOfFreedom === 2)
+      assert(feature1.pValue ~== 0.6873 relTol 1e-4)
+      assert(feature1.method === ChiSqTest.PEARSON.name)
+      assert(feature1.nullHypothesis === ChiSqTest.NullHypothesis.independence.toString)
+      val feature2 = chi(1)
+      assert(feature2.statistic === 1.5)
+      assert(feature2.degreesOfFreedom === 3)
+      assert(feature2.pValue ~== 0.6823 relTol 1e-4)
+      assert(feature2.method === ChiSqTest.PEARSON.name)
+      assert(feature2.nullHypothesis === ChiSqTest.NullHypothesis.independence.toString)
+    }
+
+    // Test that the right number of results is returned
+    val numCols = 1001
+    val sparseData = Array(
+      new LabeledPoint(0.0, Vectors.sparse(numCols, Seq((100, 2.0)))),
+      new LabeledPoint(0.1, Vectors.sparse(numCols, Seq((200, 1.0)))))
+    val chi = Statistics.chiSqTest(sc.parallelize(sparseData))
+    assert(chi.size === numCols)
+    assert(chi(1000) != null) // SPARK-3087
+
+    // Detect continous features or labels
+    val random = new Random(11L)
+    val continuousLabel =
+      Seq.fill(100000)(LabeledPoint(random.nextDouble(), Vectors.dense(random.nextInt(2))))
+    intercept[SparkException] {
+      Statistics.chiSqTest(sc.parallelize(continuousLabel, 2))
+    }
+    val continuousFeature =
+      Seq.fill(100000)(LabeledPoint(random.nextInt(2), Vectors.dense(random.nextDouble())))
+    intercept[SparkException] {
+      Statistics.chiSqTest(sc.parallelize(continuousFeature, 2))
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
new file mode 100644
index 0000000000000..23b0eec865de6
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.TestingUtils._
+
+class MultivariateOnlineSummarizerSuite extends FunSuite {
+
+  test("basic error handing") {
+    val summarizer = new MultivariateOnlineSummarizer
+
+    assert(summarizer.count === 0, "should be zero since nothing is added.")
+
+    withClue("Getting numNonzeros from empty summarizer should throw exception.") {
+      intercept[IllegalArgumentException] {
+        summarizer.numNonzeros
+      }
+    }
+
+    withClue("Getting variance from empty summarizer should throw exception.") {
+      intercept[IllegalArgumentException] {
+        summarizer.variance
+      }
+    }
+
+    withClue("Getting mean from empty summarizer should throw exception.") {
+      intercept[IllegalArgumentException] {
+        summarizer.mean
+      }
+    }
+
+    withClue("Getting max from empty summarizer should throw exception.") {
+      intercept[IllegalArgumentException] {
+        summarizer.max
+      }
+    }
+
+    withClue("Getting min from empty summarizer should throw exception.") {
+      intercept[IllegalArgumentException] {
+        summarizer.min
+      }
+    }
+
+    summarizer.add(Vectors.dense(-1.0, 2.0, 6.0)).add(Vectors.sparse(3, Seq((0, -2.0), (1, 6.0))))
+
+    withClue("Adding a new dense sample with different array size should throw exception.") {
+      intercept[IllegalArgumentException] {
+        summarizer.add(Vectors.dense(3.0, 1.0))
+      }
+    }
+
+    withClue("Adding a new sparse sample with different array size should throw exception.") {
+      intercept[IllegalArgumentException] {
+        summarizer.add(Vectors.sparse(5, Seq((0, -2.0), (1, 6.0))))
+      }
+    }
+
+    val summarizer2 = (new MultivariateOnlineSummarizer).add(Vectors.dense(1.0, -2.0, 0.0, 4.0))
+    withClue("Merging a new summarizer with different dimensions should throw exception.") {
+      intercept[IllegalArgumentException] {
+        summarizer.merge(summarizer2)
+      }
+    }
+  }
+
+  test("dense vector input") {
+    // For column 2, the maximum will be 0.0, and it's not explicitly added since we ignore all
+    // the zeros; it's a case we need to test. For column 3, the minimum will be 0.0 which we
+    // need to test as well.
+    val summarizer = (new MultivariateOnlineSummarizer)
+      .add(Vectors.dense(-1.0, 0.0, 6.0))
+      .add(Vectors.dense(3.0, -3.0, 0.0))
+
+    assert(summarizer.mean ~== Vectors.dense(1.0, -1.5, 3.0) absTol 1E-5, "mean mismatch")
+
+    assert(summarizer.min ~== Vectors.dense(-1.0, -3, 0.0) absTol 1E-5, "min mismatch")
+
+    assert(summarizer.max ~== Vectors.dense(3.0, 0.0, 6.0) absTol 1E-5, "max mismatch")
+
+    assert(summarizer.numNonzeros ~== Vectors.dense(2, 1, 1) absTol 1E-5, "numNonzeros mismatch")
+
+    assert(summarizer.variance ~== Vectors.dense(8.0, 4.5, 18.0) absTol 1E-5, "variance mismatch")
+
+    assert(summarizer.count === 2)
+  }
+
+  test("sparse vector input") {
+    val summarizer = (new MultivariateOnlineSummarizer)
+      .add(Vectors.sparse(3, Seq((0, -1.0), (2, 6.0))))
+      .add(Vectors.sparse(3, Seq((0, 3.0), (1, -3.0))))
+
+    assert(summarizer.mean ~== Vectors.dense(1.0, -1.5, 3.0) absTol 1E-5, "mean mismatch")
+
+    assert(summarizer.min ~== Vectors.dense(-1.0, -3, 0.0) absTol 1E-5, "min mismatch")
+
+    assert(summarizer.max ~== Vectors.dense(3.0, 0.0, 6.0) absTol 1E-5, "max mismatch")
+
+    assert(summarizer.numNonzeros ~== Vectors.dense(2, 1, 1) absTol 1E-5, "numNonzeros mismatch")
+
+    assert(summarizer.variance ~== Vectors.dense(8.0, 4.5, 18.0) absTol 1E-5, "variance mismatch")
+
+    assert(summarizer.count === 2)
+  }
+
+  test("mixing dense and sparse vector input") {
+    val summarizer = (new MultivariateOnlineSummarizer)
+      .add(Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))))
+      .add(Vectors.dense(0.0, -1.0, -3.0))
+      .add(Vectors.sparse(3, Seq((1, -5.1))))
+      .add(Vectors.dense(3.8, 0.0, 1.9))
+      .add(Vectors.dense(1.7, -0.6, 0.0))
+      .add(Vectors.sparse(3, Seq((1, 1.9), (2, 0.0))))
+
+    assert(summarizer.mean ~==
+      Vectors.dense(0.583333333333, -0.416666666666, -0.183333333333) absTol 1E-5, "mean mismatch")
+
+    assert(summarizer.min ~== Vectors.dense(-2.0, -5.1, -3) absTol 1E-5, "min mismatch")
+
+    assert(summarizer.max ~== Vectors.dense(3.8, 2.3, 1.9) absTol 1E-5, "max mismatch")
+
+    assert(summarizer.numNonzeros ~== Vectors.dense(3, 5, 2) absTol 1E-5, "numNonzeros mismatch")
+
+    assert(summarizer.variance ~==
+      Vectors.dense(3.857666666666, 7.0456666666666, 2.48166666666666) absTol 1E-5,
+      "variance mismatch")
+
+    assert(summarizer.count === 6)
+  }
+
+  test("merging two summarizers") {
+    val summarizer1 = (new MultivariateOnlineSummarizer)
+      .add(Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))))
+      .add(Vectors.dense(0.0, -1.0, -3.0))
+
+    val summarizer2 = (new MultivariateOnlineSummarizer)
+      .add(Vectors.sparse(3, Seq((1, -5.1))))
+      .add(Vectors.dense(3.8, 0.0, 1.9))
+      .add(Vectors.dense(1.7, -0.6, 0.0))
+      .add(Vectors.sparse(3, Seq((1, 1.9), (2, 0.0))))
+
+    val summarizer = summarizer1.merge(summarizer2)
+
+    assert(summarizer.mean ~==
+      Vectors.dense(0.583333333333, -0.416666666666, -0.183333333333) absTol 1E-5, "mean mismatch")
+
+    assert(summarizer.min ~== Vectors.dense(-2.0, -5.1, -3) absTol 1E-5, "min mismatch")
+
+    assert(summarizer.max ~== Vectors.dense(3.8, 2.3, 1.9) absTol 1E-5, "max mismatch")
+
+    assert(summarizer.numNonzeros ~== Vectors.dense(3, 5, 2) absTol 1E-5, "numNonzeros mismatch")
+
+    assert(summarizer.variance ~==
+      Vectors.dense(3.857666666666, 7.0456666666666, 2.48166666666666) absTol 1E-5,
+      "variance mismatch")
+
+    assert(summarizer.count === 6)
+  }
+
+  test("merging summarizer with empty summarizer") {
+    // If one of two is non-empty, this should return the non-empty summarizer.
+    // If both of them are empty, then just return the empty summarizer.
+    val summarizer1 = (new MultivariateOnlineSummarizer)
+      .add(Vectors.dense(0.0, -1.0, -3.0)).merge(new MultivariateOnlineSummarizer)
+    assert(summarizer1.count === 1)
+
+    val summarizer2 = (new MultivariateOnlineSummarizer)
+      .merge((new MultivariateOnlineSummarizer).add(Vectors.dense(0.0, -1.0, -3.0)))
+    assert(summarizer2.count === 1)
+
+    val summarizer3 = (new MultivariateOnlineSummarizer).merge(new MultivariateOnlineSummarizer)
+    assert(summarizer3.count === 0)
+
+    assert(summarizer1.mean ~== Vectors.dense(0.0, -1.0, -3.0) absTol 1E-5, "mean mismatch")
+
+    assert(summarizer2.mean ~== Vectors.dense(0.0, -1.0, -3.0) absTol 1E-5, "mean mismatch")
+
+    assert(summarizer1.min ~== Vectors.dense(0.0, -1.0, -3.0) absTol 1E-5, "min mismatch")
+
+    assert(summarizer2.min ~== Vectors.dense(0.0, -1.0, -3.0) absTol 1E-5, "min mismatch")
+
+    assert(summarizer1.max ~== Vectors.dense(0.0, -1.0, -3.0) absTol 1E-5, "max mismatch")
+
+    assert(summarizer2.max ~== Vectors.dense(0.0, -1.0, -3.0) absTol 1E-5, "max mismatch")
+
+    assert(summarizer1.numNonzeros ~== Vectors.dense(0, 1, 1) absTol 1E-5, "numNonzeros mismatch")
+
+    assert(summarizer2.numNonzeros ~== Vectors.dense(0, 1, 1) absTol 1E-5, "numNonzeros mismatch")
+
+    assert(summarizer1.variance ~== Vectors.dense(0, 0, 0) absTol 1E-5, "variance mismatch")
+
+    assert(summarizer2.variance ~== Vectors.dense(0, 0, 0) absTol 1E-5, "variance mismatch")
+  }
+
+  test("merging summarizer when one side has zero mean (SPARK-4355)") {
+    val s0 = new MultivariateOnlineSummarizer()
+      .add(Vectors.dense(2.0))
+      .add(Vectors.dense(2.0))
+    val s1 = new MultivariateOnlineSummarizer()
+      .add(Vectors.dense(1.0))
+      .add(Vectors.dense(-1.0))
+    s0.merge(s1)
+    assert(s0.mean(0) ~== 1.0 absTol 1e-14)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
index 35e92d71dc63f..972c905ec9ffa 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
@@ -17,419 +17,876 @@
 
 package org.apache.spark.mllib.tree
 
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
 import org.scalatest.FunSuite
 
+import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.tree.impurity.{Entropy, Gini, Variance}
-import org.apache.spark.mllib.tree.model.Filter
-import org.apache.spark.mllib.tree.model.Split
-import org.apache.spark.mllib.tree.configuration.{FeatureType, Strategy}
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.FeatureType._
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.tree.configuration.{QuantileStrategy, Strategy}
+import org.apache.spark.mllib.tree.impl.{BaggedPoint, DecisionTreeMetadata, TreePoint}
+import org.apache.spark.mllib.tree.impurity.{Entropy, Gini, Variance}
+import org.apache.spark.mllib.tree.model.{InformationGainStats, DecisionTreeModel, Node}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 
-class DecisionTreeSuite extends FunSuite with LocalSparkContext {
+class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext {
 
-  test("split and bin calculation") {
+  test("Binary classification with continuous features: split and bin calculation") {
     val arr = DecisionTreeSuite.generateOrderedLabeledPointsWithLabel1()
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
-    val strategy = new Strategy(Classification, Gini, 3, 100)
-    val (splits, bins) = DecisionTree.findSplitsBins(rdd, strategy)
+    val strategy = new Strategy(Classification, Gini, 3, 2, 100)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    val (splits, bins) = DecisionTree.findSplitsBins(rdd, metadata)
     assert(splits.length === 2)
     assert(bins.length === 2)
     assert(splits(0).length === 99)
     assert(bins(0).length === 100)
   }
 
-  test("split and bin calculation for categorical variables") {
+  test("Binary classification with binary (ordered) categorical features:" +
+    " split and bin calculation") {
     val arr = DecisionTreeSuite.generateCategoricalDataPoints()
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(
       Classification,
       Gini,
-      maxDepth = 3,
+      maxDepth = 2,
+      numClassesForClassification = 2,
       maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 2, 1-> 2))
-    val (splits, bins) = DecisionTree.findSplitsBins(rdd, strategy)
+
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    val (splits, bins) = DecisionTree.findSplitsBins(rdd, metadata)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    assert(!metadata.isUnordered(featureIndex = 1))
     assert(splits.length === 2)
     assert(bins.length === 2)
-    assert(splits(0).length === 99)
-    assert(bins(0).length === 100)
-
-    // Check splits.
-
-    assert(splits(0)(0).feature === 0)
-    assert(splits(0)(0).threshold === Double.MinValue)
-    assert(splits(0)(0).featureType === Categorical)
-    assert(splits(0)(0).categories.length === 1)
-    assert(splits(0)(0).categories.contains(1.0))
-
-    assert(splits(0)(1).feature === 0)
-    assert(splits(0)(1).threshold === Double.MinValue)
-    assert(splits(0)(1).featureType === Categorical)
-    assert(splits(0)(1).categories.length === 2)
-    assert(splits(0)(1).categories.contains(1.0))
-    assert(splits(0)(1).categories.contains(0.0))
-
-    assert(splits(0)(2) === null)
-
-    assert(splits(1)(0).feature === 1)
-    assert(splits(1)(0).threshold === Double.MinValue)
-    assert(splits(1)(0).featureType === Categorical)
-    assert(splits(1)(0).categories.length === 1)
-    assert(splits(1)(0).categories.contains(0.0))
-
-    assert(splits(1)(1).feature === 1)
-    assert(splits(1)(1).threshold === Double.MinValue)
-    assert(splits(1)(1).featureType === Categorical)
-    assert(splits(1)(1).categories.length === 2)
-    assert(splits(1)(1).categories.contains(1.0))
-    assert(splits(1)(1).categories.contains(0.0))
-
-    assert(splits(1)(2) === null)
+    // no bins or splits pre-computed for ordered categorical features
+    assert(splits(0).length === 0)
+    assert(bins(0).length === 0)
+  }
 
-    // Check bins.
+  test("Binary classification with 3-ary (ordered) categorical features," +
+    " with no samples for one category") {
+    val arr = DecisionTreeSuite.generateCategoricalDataPoints()
+    assert(arr.length === 1000)
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(
+      Classification,
+      Gini,
+      maxDepth = 2,
+      numClassesForClassification = 2,
+      maxBins = 100,
+      categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3))
 
-    assert(bins(0)(0).category === 1.0)
-    assert(bins(0)(0).lowSplit.categories.length === 0)
-    assert(bins(0)(0).highSplit.categories.length === 1)
-    assert(bins(0)(0).highSplit.categories.contains(1.0))
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    assert(!metadata.isUnordered(featureIndex = 1))
+    val (splits, bins) = DecisionTree.findSplitsBins(rdd, metadata)
+    assert(splits.length === 2)
+    assert(bins.length === 2)
+    // no bins or splits pre-computed for ordered categorical features
+    assert(splits(0).length === 0)
+    assert(bins(0).length === 0)
+  }
 
-    assert(bins(0)(1).category === 0.0)
-    assert(bins(0)(1).lowSplit.categories.length === 1)
-    assert(bins(0)(1).lowSplit.categories.contains(1.0))
-    assert(bins(0)(1).highSplit.categories.length === 2)
-    assert(bins(0)(1).highSplit.categories.contains(1.0))
-    assert(bins(0)(1).highSplit.categories.contains(0.0))
+  test("extract categories from a number for multiclass classification") {
+    val l = DecisionTree.extractMultiClassCategories(13, 10)
+    assert(l.length === 3)
+    assert(List(3.0, 2.0, 0.0).toSeq === l.toSeq)
+  }
 
-    assert(bins(0)(2) === null)
+  test("find splits for a continuous feature") {
+    // find splits for normal case
+    {
+      val fakeMetadata = new DecisionTreeMetadata(1, 0, 0, 0,
+        Map(), Set(),
+        Array(6), Gini, QuantileStrategy.Sort,
+        0, 0, 0.0, 0, 0
+      )
+      val featureSamples = Array.fill(200000)(math.random)
+      val splits = DecisionTree.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0)
+      assert(splits.length === 5)
+      assert(fakeMetadata.numSplits(0) === 5)
+      assert(fakeMetadata.numBins(0) === 6)
+      // check returned splits are distinct
+      assert(splits.distinct.length === splits.length)
+    }
 
-    assert(bins(1)(0).category === 0.0)
-    assert(bins(1)(0).lowSplit.categories.length === 0)
-    assert(bins(1)(0).highSplit.categories.length === 1)
-    assert(bins(1)(0).highSplit.categories.contains(0.0))
+    // find splits should not return identical splits
+    // when there are not enough split candidates, reduce the number of splits in metadata
+    {
+      val fakeMetadata = new DecisionTreeMetadata(1, 0, 0, 0,
+        Map(), Set(),
+        Array(5), Gini, QuantileStrategy.Sort,
+        0, 0, 0.0, 0, 0
+      )
+      val featureSamples = Array(1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3).map(_.toDouble)
+      val splits = DecisionTree.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0)
+      assert(splits.length === 3)
+      assert(fakeMetadata.numSplits(0) === 3)
+      assert(fakeMetadata.numBins(0) === 4)
+      // check returned splits are distinct
+      assert(splits.distinct.length === splits.length)
+    }
 
-    assert(bins(1)(1).category === 1.0)
-    assert(bins(1)(1).lowSplit.categories.length === 1)
-    assert(bins(1)(1).lowSplit.categories.contains(0.0))
-    assert(bins(1)(1).highSplit.categories.length === 2)
-    assert(bins(1)(1).highSplit.categories.contains(0.0))
-    assert(bins(1)(1).highSplit.categories.contains(1.0))
+    // find splits when most samples close to the minimum
+    {
+      val fakeMetadata = new DecisionTreeMetadata(1, 0, 0, 0,
+        Map(), Set(),
+        Array(3), Gini, QuantileStrategy.Sort,
+        0, 0, 0.0, 0, 0
+      )
+      val featureSamples = Array(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5).map(_.toDouble)
+      val splits = DecisionTree.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0)
+      assert(splits.length === 2)
+      assert(fakeMetadata.numSplits(0) === 2)
+      assert(fakeMetadata.numBins(0) === 3)
+      assert(splits(0) === 2.0)
+      assert(splits(1) === 3.0)
+    }
 
-    assert(bins(1)(2) === null)
+    // find splits when most samples close to the maximum
+    {
+      val fakeMetadata = new DecisionTreeMetadata(1, 0, 0, 0,
+        Map(), Set(),
+        Array(3), Gini, QuantileStrategy.Sort,
+        0, 0, 0.0, 0, 0
+      )
+      val featureSamples = Array(0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2).map(_.toDouble)
+      val splits = DecisionTree.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0)
+      assert(splits.length === 1)
+      assert(fakeMetadata.numSplits(0) === 1)
+      assert(fakeMetadata.numBins(0) === 2)
+      assert(splits(0) === 1.0)
+    }
   }
 
-  test("split and bin calculations for categorical variables with no sample for one category") {
+  test("Multiclass classification with unordered categorical features:" +
+      " split and bin calculations") {
     val arr = DecisionTreeSuite.generateCategoricalDataPoints()
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(
       Classification,
       Gini,
-      maxDepth = 3,
+      maxDepth = 2,
+      numClassesForClassification = 100,
       maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 3, 1-> 3))
-    val (splits, bins) = DecisionTree.findSplitsBins(rdd, strategy)
 
-    // Check splits.
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(metadata.isUnordered(featureIndex = 0))
+    assert(metadata.isUnordered(featureIndex = 1))
+    val (splits, bins) = DecisionTree.findSplitsBins(rdd, metadata)
+    assert(splits.length === 2)
+    assert(bins.length === 2)
+    assert(splits(0).length === 3)
+    assert(bins(0).length === 6)
 
+    // Expecting 2^2 - 1 = 3 bins/splits
     assert(splits(0)(0).feature === 0)
     assert(splits(0)(0).threshold === Double.MinValue)
     assert(splits(0)(0).featureType === Categorical)
     assert(splits(0)(0).categories.length === 1)
-    assert(splits(0)(0).categories.contains(1.0))
-
-    assert(splits(0)(1).feature === 0)
-    assert(splits(0)(1).threshold === Double.MinValue)
-    assert(splits(0)(1).featureType === Categorical)
-    assert(splits(0)(1).categories.length === 2)
-    assert(splits(0)(1).categories.contains(1.0))
-    assert(splits(0)(1).categories.contains(0.0))
-
-    assert(splits(0)(2).feature === 0)
-    assert(splits(0)(2).threshold === Double.MinValue)
-    assert(splits(0)(2).featureType === Categorical)
-    assert(splits(0)(2).categories.length === 3)
-    assert(splits(0)(2).categories.contains(1.0))
-    assert(splits(0)(2).categories.contains(0.0))
-    assert(splits(0)(2).categories.contains(2.0))
-
-    assert(splits(0)(3) === null)
-
+    assert(splits(0)(0).categories.contains(0.0))
     assert(splits(1)(0).feature === 1)
     assert(splits(1)(0).threshold === Double.MinValue)
     assert(splits(1)(0).featureType === Categorical)
     assert(splits(1)(0).categories.length === 1)
     assert(splits(1)(0).categories.contains(0.0))
 
+    assert(splits(0)(1).feature === 0)
+    assert(splits(0)(1).threshold === Double.MinValue)
+    assert(splits(0)(1).featureType === Categorical)
+    assert(splits(0)(1).categories.length === 1)
+    assert(splits(0)(1).categories.contains(1.0))
     assert(splits(1)(1).feature === 1)
     assert(splits(1)(1).threshold === Double.MinValue)
     assert(splits(1)(1).featureType === Categorical)
-    assert(splits(1)(1).categories.length === 2)
+    assert(splits(1)(1).categories.length === 1)
     assert(splits(1)(1).categories.contains(1.0))
-    assert(splits(1)(1).categories.contains(0.0))
 
+    assert(splits(0)(2).feature === 0)
+    assert(splits(0)(2).threshold === Double.MinValue)
+    assert(splits(0)(2).featureType === Categorical)
+    assert(splits(0)(2).categories.length === 2)
+    assert(splits(0)(2).categories.contains(0.0))
+    assert(splits(0)(2).categories.contains(1.0))
     assert(splits(1)(2).feature === 1)
     assert(splits(1)(2).threshold === Double.MinValue)
     assert(splits(1)(2).featureType === Categorical)
-    assert(splits(1)(2).categories.length === 3)
-    assert(splits(1)(2).categories.contains(1.0))
+    assert(splits(1)(2).categories.length === 2)
     assert(splits(1)(2).categories.contains(0.0))
-    assert(splits(1)(2).categories.contains(2.0))
-
-    assert(splits(1)(3) === null)
+    assert(splits(1)(2).categories.contains(1.0))
 
     // Check bins.
 
-    assert(bins(0)(0).category === 1.0)
+    assert(bins(0)(0).category === Double.MinValue)
     assert(bins(0)(0).lowSplit.categories.length === 0)
     assert(bins(0)(0).highSplit.categories.length === 1)
-    assert(bins(0)(0).highSplit.categories.contains(1.0))
-
-    assert(bins(0)(1).category === 0.0)
-    assert(bins(0)(1).lowSplit.categories.length === 1)
-    assert(bins(0)(1).lowSplit.categories.contains(1.0))
-    assert(bins(0)(1).highSplit.categories.length === 2)
-    assert(bins(0)(1).highSplit.categories.contains(1.0))
-    assert(bins(0)(1).highSplit.categories.contains(0.0))
-
-    assert(bins(0)(2).category === 2.0)
-    assert(bins(0)(2).lowSplit.categories.length === 2)
-    assert(bins(0)(2).lowSplit.categories.contains(1.0))
-    assert(bins(0)(2).lowSplit.categories.contains(0.0))
-    assert(bins(0)(2).highSplit.categories.length === 3)
-    assert(bins(0)(2).highSplit.categories.contains(1.0))
-    assert(bins(0)(2).highSplit.categories.contains(0.0))
-    assert(bins(0)(2).highSplit.categories.contains(2.0))
-
-    assert(bins(0)(3) === null)
-
-    assert(bins(1)(0).category === 0.0)
+    assert(bins(0)(0).highSplit.categories.contains(0.0))
+    assert(bins(1)(0).category === Double.MinValue)
     assert(bins(1)(0).lowSplit.categories.length === 0)
     assert(bins(1)(0).highSplit.categories.length === 1)
     assert(bins(1)(0).highSplit.categories.contains(0.0))
 
-    assert(bins(1)(1).category === 1.0)
+    assert(bins(0)(1).category === Double.MinValue)
+    assert(bins(0)(1).lowSplit.categories.length === 1)
+    assert(bins(0)(1).lowSplit.categories.contains(0.0))
+    assert(bins(0)(1).highSplit.categories.length === 1)
+    assert(bins(0)(1).highSplit.categories.contains(1.0))
+    assert(bins(1)(1).category === Double.MinValue)
     assert(bins(1)(1).lowSplit.categories.length === 1)
     assert(bins(1)(1).lowSplit.categories.contains(0.0))
-    assert(bins(1)(1).highSplit.categories.length === 2)
-    assert(bins(1)(1).highSplit.categories.contains(0.0))
+    assert(bins(1)(1).highSplit.categories.length === 1)
     assert(bins(1)(1).highSplit.categories.contains(1.0))
 
-    assert(bins(1)(2).category === 2.0)
-    assert(bins(1)(2).lowSplit.categories.length === 2)
-    assert(bins(1)(2).lowSplit.categories.contains(0.0))
+    assert(bins(0)(2).category === Double.MinValue)
+    assert(bins(0)(2).lowSplit.categories.length === 1)
+    assert(bins(0)(2).lowSplit.categories.contains(1.0))
+    assert(bins(0)(2).highSplit.categories.length === 2)
+    assert(bins(0)(2).highSplit.categories.contains(1.0))
+    assert(bins(0)(2).highSplit.categories.contains(0.0))
+    assert(bins(1)(2).category === Double.MinValue)
+    assert(bins(1)(2).lowSplit.categories.length === 1)
     assert(bins(1)(2).lowSplit.categories.contains(1.0))
-    assert(bins(1)(2).highSplit.categories.length === 3)
-    assert(bins(1)(2).highSplit.categories.contains(0.0))
+    assert(bins(1)(2).highSplit.categories.length === 2)
     assert(bins(1)(2).highSplit.categories.contains(1.0))
-    assert(bins(1)(2).highSplit.categories.contains(2.0))
+    assert(bins(1)(2).highSplit.categories.contains(0.0))
 
-    assert(bins(1)(3) === null)
   }
 
-  test("classification stump with all categorical variables") {
+  test("Multiclass classification with ordered categorical features: split and bin calculations") {
+    val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlassForOrderedFeatures()
+    assert(arr.length === 3000)
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(
+      Classification,
+      Gini,
+      maxDepth = 2,
+      numClassesForClassification = 100,
+      maxBins = 100,
+      categoricalFeaturesInfo = Map(0 -> 10, 1-> 10))
+    // 2^(10-1) - 1 > 100, so categorical features will be ordered
+
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    assert(!metadata.isUnordered(featureIndex = 1))
+    val (splits, bins) = DecisionTree.findSplitsBins(rdd, metadata)
+    assert(splits.length === 2)
+    assert(bins.length === 2)
+    // no bins or splits pre-computed for ordered categorical features
+    assert(splits(0).length === 0)
+    assert(bins(0).length === 0)
+  }
+
+
+  test("Binary classification stump with ordered categorical features") {
     val arr = DecisionTreeSuite.generateCategoricalDataPoints()
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(
       Classification,
       Gini,
-      maxDepth = 3,
+      numClassesForClassification = 2,
+      maxDepth = 2,
       maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 3, 1-> 3))
-    val (splits, bins) = DecisionTree.findSplitsBins(rdd, strategy)
-    val bestSplits = DecisionTree.findBestSplits(rdd, new Array(7), strategy, 0,
-      Array[List[Filter]](), splits, bins, 10)
 
-    val split = bestSplits(0)._1
-    assert(split.categories.length === 1)
-    assert(split.categories.contains(1.0))
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    assert(!metadata.isUnordered(featureIndex = 1))
+    val (splits, bins) = DecisionTree.findSplitsBins(rdd, metadata)
+    assert(splits.length === 2)
+    assert(bins.length === 2)
+    // no bins or splits pre-computed for ordered categorical features
+    assert(splits(0).length === 0)
+    assert(bins(0).length === 0)
+
+    val rootNode = DecisionTree.train(rdd, strategy).topNode
+
+    val split = rootNode.split.get
+    assert(split.categories === List(1.0))
     assert(split.featureType === Categorical)
     assert(split.threshold === Double.MinValue)
 
-    val stats = bestSplits(0)._2
+    val stats = rootNode.stats.get
     assert(stats.gain > 0)
-    assert(stats.predict > 0.4)
-    assert(stats.predict < 0.5)
+    assert(rootNode.predict.predict === 1)
     assert(stats.impurity > 0.2)
   }
 
-  test("regression stump with all categorical variables") {
+  test("Regression stump with 3-ary (ordered) categorical features") {
     val arr = DecisionTreeSuite.generateCategoricalDataPoints()
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(
       Regression,
       Variance,
-      maxDepth = 3,
+      maxDepth = 2,
       maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 3, 1-> 3))
-    val (splits, bins) = DecisionTree.findSplitsBins(rdd,strategy)
-    val bestSplits = DecisionTree.findBestSplits(rdd, new Array(7), strategy, 0,
-      Array[List[Filter]](), splits, bins, 10)
 
-    val split = bestSplits(0)._1
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    assert(!metadata.isUnordered(featureIndex = 1))
+
+    val rootNode = DecisionTree.train(rdd, strategy).topNode
+
+    val split = rootNode.split.get
     assert(split.categories.length === 1)
     assert(split.categories.contains(1.0))
     assert(split.featureType === Categorical)
     assert(split.threshold === Double.MinValue)
 
-    val stats = bestSplits(0)._2
+    val stats = rootNode.stats.get
     assert(stats.gain > 0)
-    assert(stats.predict > 0.4)
-    assert(stats.predict < 0.5)
+    assert(rootNode.predict.predict === 0.6)
     assert(stats.impurity > 0.2)
   }
 
-  test("stump with fixed label 0 for Gini") {
+  test("Regression stump with binary (ordered) categorical features") {
+    val arr = DecisionTreeSuite.generateCategoricalDataPoints()
+    assert(arr.length === 1000)
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(
+      Regression,
+      Variance,
+      maxDepth = 2,
+      maxBins = 100,
+      categoricalFeaturesInfo = Map(0 -> 2, 1-> 2))
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    assert(!metadata.isUnordered(featureIndex = 1))
+
+    val model = DecisionTree.train(rdd, strategy)
+    DecisionTreeSuite.validateRegressor(model, arr, 0.0)
+    assert(model.numNodes === 3)
+    assert(model.depth === 1)
+  }
+
+  test("Binary classification stump with fixed label 0 for Gini") {
     val arr = DecisionTreeSuite.generateOrderedLabeledPointsWithLabel0()
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
-    val strategy = new Strategy(Classification, Gini, 3, 100)
-    val (splits, bins) = DecisionTree.findSplitsBins(rdd, strategy)
+    val strategy = new Strategy(Classification, Gini, maxDepth = 3,
+      numClassesForClassification = 2, maxBins = 100)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    assert(!metadata.isUnordered(featureIndex = 1))
+
+    val (splits, bins) = DecisionTree.findSplitsBins(rdd, metadata)
     assert(splits.length === 2)
     assert(splits(0).length === 99)
     assert(bins.length === 2)
     assert(bins(0).length === 100)
-    assert(splits(0).length === 99)
-    assert(bins(0).length === 100)
 
-    val bestSplits = DecisionTree.findBestSplits(rdd, new Array(7), strategy, 0,
-      Array[List[Filter]](), splits, bins, 10)
-    assert(bestSplits.length === 1)
-    assert(bestSplits(0)._1.feature === 0)
-    assert(bestSplits(0)._1.threshold === 10)
-    assert(bestSplits(0)._2.gain === 0)
-    assert(bestSplits(0)._2.leftImpurity === 0)
-    assert(bestSplits(0)._2.rightImpurity === 0)
+    val rootNode = DecisionTree.train(rdd, strategy).topNode
+
+    val stats = rootNode.stats.get
+    assert(stats.gain === 0)
+    assert(stats.leftImpurity === 0)
+    assert(stats.rightImpurity === 0)
   }
 
-  test("stump with fixed label 1 for Gini") {
+  test("Binary classification stump with fixed label 1 for Gini") {
     val arr = DecisionTreeSuite.generateOrderedLabeledPointsWithLabel1()
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
-    val strategy = new Strategy(Classification, Gini, 3, 100)
-    val (splits, bins) = DecisionTree.findSplitsBins(rdd, strategy)
+    val strategy = new Strategy(Classification, Gini, maxDepth = 3,
+      numClassesForClassification = 2, maxBins = 100)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    assert(!metadata.isUnordered(featureIndex = 1))
+
+    val (splits, bins) = DecisionTree.findSplitsBins(rdd, metadata)
     assert(splits.length === 2)
     assert(splits(0).length === 99)
     assert(bins.length === 2)
     assert(bins(0).length === 100)
-    assert(splits(0).length === 99)
-    assert(bins(0).length === 100)
 
-    val bestSplits = DecisionTree.findBestSplits(rdd, Array(0.0), strategy, 0,
-      Array[List[Filter]](), splits, bins, 10)
-    assert(bestSplits.length === 1)
-    assert(bestSplits(0)._1.feature === 0)
-    assert(bestSplits(0)._1.threshold === 10)
-    assert(bestSplits(0)._2.gain === 0)
-    assert(bestSplits(0)._2.leftImpurity === 0)
-    assert(bestSplits(0)._2.rightImpurity === 0)
-    assert(bestSplits(0)._2.predict === 1)
+    val rootNode = DecisionTree.train(rdd, strategy).topNode
+
+    val stats = rootNode.stats.get
+    assert(stats.gain === 0)
+    assert(stats.leftImpurity === 0)
+    assert(stats.rightImpurity === 0)
+    assert(rootNode.predict.predict === 1)
   }
 
-  test("stump with fixed label 0 for Entropy") {
+  test("Binary classification stump with fixed label 0 for Entropy") {
     val arr = DecisionTreeSuite.generateOrderedLabeledPointsWithLabel0()
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
-    val strategy = new Strategy(Classification, Entropy, 3, 100)
-    val (splits, bins) = DecisionTree.findSplitsBins(rdd, strategy)
+    val strategy = new Strategy(Classification, Entropy, maxDepth = 3,
+      numClassesForClassification = 2, maxBins = 100)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    assert(!metadata.isUnordered(featureIndex = 1))
+
+    val (splits, bins) = DecisionTree.findSplitsBins(rdd, metadata)
     assert(splits.length === 2)
     assert(splits(0).length === 99)
     assert(bins.length === 2)
     assert(bins(0).length === 100)
-    assert(splits(0).length === 99)
-    assert(bins(0).length === 100)
 
-    val bestSplits = DecisionTree.findBestSplits(rdd, Array(0.0), strategy, 0,
-      Array[List[Filter]](), splits, bins, 10)
-    assert(bestSplits.length === 1)
-    assert(bestSplits(0)._1.feature === 0)
-    assert(bestSplits(0)._1.threshold === 10)
-    assert(bestSplits(0)._2.gain === 0)
-    assert(bestSplits(0)._2.leftImpurity === 0)
-    assert(bestSplits(0)._2.rightImpurity === 0)
-    assert(bestSplits(0)._2.predict === 0)
+    val rootNode = DecisionTree.train(rdd, strategy).topNode
+
+    val stats = rootNode.stats.get
+    assert(stats.gain === 0)
+    assert(stats.leftImpurity === 0)
+    assert(stats.rightImpurity === 0)
+    assert(rootNode.predict.predict === 0)
   }
 
-  test("stump with fixed label 1 for Entropy") {
+  test("Binary classification stump with fixed label 1 for Entropy") {
     val arr = DecisionTreeSuite.generateOrderedLabeledPointsWithLabel1()
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
-    val strategy = new Strategy(Classification, Entropy, 3, 100)
-    val (splits, bins) = DecisionTree.findSplitsBins(rdd, strategy)
+    val strategy = new Strategy(Classification, Entropy, maxDepth = 3,
+      numClassesForClassification = 2, maxBins = 100)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    assert(!metadata.isUnordered(featureIndex = 1))
+
+    val (splits, bins) = DecisionTree.findSplitsBins(rdd, metadata)
     assert(splits.length === 2)
     assert(splits(0).length === 99)
     assert(bins.length === 2)
     assert(bins(0).length === 100)
-    assert(splits(0).length === 99)
-    assert(bins(0).length === 100)
 
-    val bestSplits = DecisionTree.findBestSplits(rdd, Array(0.0), strategy, 0,
-      Array[List[Filter]](), splits, bins, 10)
-    assert(bestSplits.length === 1)
-    assert(bestSplits(0)._1.feature === 0)
-    assert(bestSplits(0)._1.threshold === 10)
-    assert(bestSplits(0)._2.gain === 0)
-    assert(bestSplits(0)._2.leftImpurity === 0)
-    assert(bestSplits(0)._2.rightImpurity === 0)
-    assert(bestSplits(0)._2.predict === 1)
+    val rootNode = DecisionTree.train(rdd, strategy).topNode
+
+    val stats = rootNode.stats.get
+    assert(stats.gain === 0)
+    assert(stats.leftImpurity === 0)
+    assert(stats.rightImpurity === 0)
+    assert(rootNode.predict.predict === 1)
   }
 
-  test("test second level node building with/without groups") {
+  test("Second level node building with vs. without groups") {
     val arr = DecisionTreeSuite.generateOrderedLabeledPoints()
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
-    val strategy = new Strategy(Classification, Entropy, 3, 100)
-    val (splits, bins) = DecisionTree.findSplitsBins(rdd, strategy)
+    val strategy = new Strategy(Classification, Entropy, 3, 2, 100)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    val (splits, bins) = DecisionTree.findSplitsBins(rdd, metadata)
     assert(splits.length === 2)
     assert(splits(0).length === 99)
     assert(bins.length === 2)
     assert(bins(0).length === 100)
-    assert(splits(0).length === 99)
-    assert(bins(0).length === 100)
 
-    val leftFilter = Filter(new Split(0, 400, FeatureType.Continuous, List()), -1)
-    val rightFilter = Filter(new Split(0, 400, FeatureType.Continuous, List()) ,1)
-    val filters = Array[List[Filter]](List(), List(leftFilter), List(rightFilter))
-    val parentImpurities = Array(0.5, 0.5, 0.5)
+    // Train a 1-node model
+    val strategyOneNode = new Strategy(Classification, Entropy, maxDepth = 1,
+      numClassesForClassification = 2, maxBins = 100)
+    val modelOneNode = DecisionTree.train(rdd, strategyOneNode)
+    val rootNode1 = modelOneNode.topNode.deepCopy()
+    val rootNode2 = modelOneNode.topNode.deepCopy()
+    assert(rootNode1.leftNode.nonEmpty)
+    assert(rootNode1.rightNode.nonEmpty)
+
+    val treeInput = TreePoint.convertToTreeRDD(rdd, bins, metadata)
+    val baggedInput = BaggedPoint.convertToBaggedRDD(treeInput, 1.0, 1, false)
 
     // Single group second level tree construction.
-    val bestSplits = DecisionTree.findBestSplits(rdd, parentImpurities, strategy, 1, filters,
-      splits, bins, 10)
-    assert(bestSplits.length === 2)
-    assert(bestSplits(0)._2.gain > 0)
-    assert(bestSplits(1)._2.gain > 0)
-
-    // maxLevelForSingleGroup parameter is set to 0 to force splitting into groups for second
-    // level tree construction.
-    val bestSplitsWithGroups = DecisionTree.findBestSplits(rdd, parentImpurities, strategy, 1,
-      filters, splits, bins, 0)
-    assert(bestSplitsWithGroups.length === 2)
-    assert(bestSplitsWithGroups(0)._2.gain > 0)
-    assert(bestSplitsWithGroups(1)._2.gain > 0)
+    val nodesForGroup = Map((0, Array(rootNode1.leftNode.get, rootNode1.rightNode.get)))
+    val treeToNodeToIndexInfo = Map((0, Map(
+      (rootNode1.leftNode.get.id, new RandomForest.NodeIndexInfo(0, None)),
+      (rootNode1.rightNode.get.id, new RandomForest.NodeIndexInfo(1, None)))))
+    val nodeQueue = new mutable.Queue[(Int, Node)]()
+    DecisionTree.findBestSplits(baggedInput, metadata, Array(rootNode1),
+      nodesForGroup, treeToNodeToIndexInfo, splits, bins, nodeQueue)
+    val children1 = new Array[Node](2)
+    children1(0) = rootNode1.leftNode.get
+    children1(1) = rootNode1.rightNode.get
+
+    // Train one second-level node at a time.
+    val nodesForGroupA = Map((0, Array(rootNode2.leftNode.get)))
+    val treeToNodeToIndexInfoA = Map((0, Map(
+      (rootNode2.leftNode.get.id, new RandomForest.NodeIndexInfo(0, None)))))
+    nodeQueue.clear()
+    DecisionTree.findBestSplits(baggedInput, metadata, Array(rootNode2),
+      nodesForGroupA, treeToNodeToIndexInfoA, splits, bins, nodeQueue)
+    val nodesForGroupB = Map((0, Array(rootNode2.rightNode.get)))
+    val treeToNodeToIndexInfoB = Map((0, Map(
+      (rootNode2.rightNode.get.id, new RandomForest.NodeIndexInfo(0, None)))))
+    nodeQueue.clear()
+    DecisionTree.findBestSplits(baggedInput, metadata, Array(rootNode2),
+      nodesForGroupB, treeToNodeToIndexInfoB, splits, bins, nodeQueue)
+    val children2 = new Array[Node](2)
+    children2(0) = rootNode2.leftNode.get
+    children2(1) = rootNode2.rightNode.get
 
     // Verify whether the splits obtained using single group and multiple group level
     // construction strategies are the same.
-    for (i <- 0 until bestSplits.length) {
-      assert(bestSplits(i)._1 === bestSplitsWithGroups(i)._1)
-      assert(bestSplits(i)._2.gain === bestSplitsWithGroups(i)._2.gain)
-      assert(bestSplits(i)._2.impurity === bestSplitsWithGroups(i)._2.impurity)
-      assert(bestSplits(i)._2.leftImpurity === bestSplitsWithGroups(i)._2.leftImpurity)
-      assert(bestSplits(i)._2.rightImpurity === bestSplitsWithGroups(i)._2.rightImpurity)
-      assert(bestSplits(i)._2.predict === bestSplitsWithGroups(i)._2.predict)
+    for (i <- 0 until 2) {
+      assert(children1(i).stats.nonEmpty && children1(i).stats.get.gain > 0)
+      assert(children2(i).stats.nonEmpty && children2(i).stats.get.gain > 0)
+      assert(children1(i).split === children2(i).split)
+      assert(children1(i).stats.nonEmpty && children2(i).stats.nonEmpty)
+      val stats1 = children1(i).stats.get
+      val stats2 = children2(i).stats.get
+      assert(stats1.gain === stats2.gain)
+      assert(stats1.impurity === stats2.impurity)
+      assert(stats1.leftImpurity === stats2.leftImpurity)
+      assert(stats1.rightImpurity === stats2.rightImpurity)
+      assert(children1(i).predict.predict === children2(i).predict.predict)
     }
+  }
+
+  test("Multiclass classification stump with 3-ary (unordered) categorical features") {
+    val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlass()
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
+      numClassesForClassification = 3, categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3))
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(strategy.isMulticlassClassification)
+    assert(metadata.isUnordered(featureIndex = 0))
+    assert(metadata.isUnordered(featureIndex = 1))
+
+    val rootNode = DecisionTree.train(rdd, strategy).topNode
+
+    val split = rootNode.split.get
+    assert(split.feature === 0)
+    assert(split.categories.length === 1)
+    assert(split.categories.contains(1))
+    assert(split.featureType === Categorical)
+  }
+
+  test("Binary classification stump with 1 continuous feature, to check off-by-1 error") {
+    val arr = new Array[LabeledPoint](4)
+    arr(0) = new LabeledPoint(0.0, Vectors.dense(0.0))
+    arr(1) = new LabeledPoint(1.0, Vectors.dense(1.0))
+    arr(2) = new LabeledPoint(1.0, Vectors.dense(2.0))
+    arr(3) = new LabeledPoint(1.0, Vectors.dense(3.0))
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
+      numClassesForClassification = 2)
+
+    val model = DecisionTree.train(rdd, strategy)
+    DecisionTreeSuite.validateClassifier(model, arr, 1.0)
+    assert(model.numNodes === 3)
+    assert(model.depth === 1)
+  }
+
+  test("Binary classification stump with 2 continuous features") {
+    val arr = new Array[LabeledPoint](4)
+    arr(0) = new LabeledPoint(0.0, Vectors.sparse(2, Seq((0, 0.0))))
+    arr(1) = new LabeledPoint(1.0, Vectors.sparse(2, Seq((1, 1.0))))
+    arr(2) = new LabeledPoint(0.0, Vectors.sparse(2, Seq((0, 0.0))))
+    arr(3) = new LabeledPoint(1.0, Vectors.sparse(2, Seq((1, 2.0))))
+
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
+      numClassesForClassification = 2)
+
+    val model = DecisionTree.train(rdd, strategy)
+    DecisionTreeSuite.validateClassifier(model, arr, 1.0)
+    assert(model.numNodes === 3)
+    assert(model.depth === 1)
+    assert(model.topNode.split.get.feature === 1)
+  }
+
+  test("Multiclass classification stump with unordered categorical features," +
+    " with just enough bins") {
+    val maxBins = 2 * (math.pow(2, 3 - 1).toInt - 1) // just enough bins to allow unordered features
+    val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlass()
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
+      numClassesForClassification = 3, maxBins = maxBins,
+      categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3))
+    assert(strategy.isMulticlassClassification)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(metadata.isUnordered(featureIndex = 0))
+    assert(metadata.isUnordered(featureIndex = 1))
+
+    val model = DecisionTree.train(rdd, strategy)
+    DecisionTreeSuite.validateClassifier(model, arr, 1.0)
+    assert(model.numNodes === 3)
+    assert(model.depth === 1)
+
+    val rootNode = model.topNode
+
+    val split = rootNode.split.get
+    assert(split.feature === 0)
+    assert(split.categories.length === 1)
+    assert(split.categories.contains(1))
+    assert(split.featureType === Categorical)
 
+    val gain = rootNode.stats.get
+    assert(gain.leftImpurity === 0)
+    assert(gain.rightImpurity === 0)
   }
 
+  test("Multiclass classification stump with continuous features") {
+    val arr = DecisionTreeSuite.generateContinuousDataPointsForMulticlass()
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
+      numClassesForClassification = 3, maxBins = 100)
+    assert(strategy.isMulticlassClassification)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+
+    val model = DecisionTree.train(rdd, strategy)
+    DecisionTreeSuite.validateClassifier(model, arr, 0.9)
+
+    val rootNode = model.topNode
+
+    val split = rootNode.split.get
+    assert(split.feature === 1)
+    assert(split.featureType === Continuous)
+    assert(split.threshold > 1980)
+    assert(split.threshold < 2020)
+
+  }
+
+  test("Multiclass classification stump with continuous + unordered categorical features") {
+    val arr = DecisionTreeSuite.generateContinuousDataPointsForMulticlass()
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
+      numClassesForClassification = 3, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 3))
+    assert(strategy.isMulticlassClassification)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(metadata.isUnordered(featureIndex = 0))
+
+    val model = DecisionTree.train(rdd, strategy)
+    DecisionTreeSuite.validateClassifier(model, arr, 0.9)
+
+    val rootNode = model.topNode
+
+    val split = rootNode.split.get
+    assert(split.feature === 1)
+    assert(split.featureType === Continuous)
+    assert(split.threshold > 1980)
+    assert(split.threshold < 2020)
+  }
+
+  test("Multiclass classification stump with 10-ary (ordered) categorical features") {
+    val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlassForOrderedFeatures()
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
+      numClassesForClassification = 3, maxBins = 100,
+      categoricalFeaturesInfo = Map(0 -> 10, 1 -> 10))
+    assert(strategy.isMulticlassClassification)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
+    assert(!metadata.isUnordered(featureIndex = 0))
+    assert(!metadata.isUnordered(featureIndex = 1))
+
+    val rootNode = DecisionTree.train(rdd, strategy).topNode
+
+    val split = rootNode.split.get
+    assert(split.feature === 0)
+    assert(split.categories.length === 1)
+    assert(split.categories.contains(1.0))
+    assert(split.featureType === Categorical)
+  }
+
+  test("Multiclass classification tree with 10-ary (ordered) categorical features," +
+      " with just enough bins") {
+    val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlassForOrderedFeatures()
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
+      numClassesForClassification = 3, maxBins = 10,
+      categoricalFeaturesInfo = Map(0 -> 10, 1 -> 10))
+    assert(strategy.isMulticlassClassification)
+
+    val model = DecisionTree.train(rdd, strategy)
+    DecisionTreeSuite.validateClassifier(model, arr, 0.6)
+  }
+
+  test("split must satisfy min instances per node requirements") {
+    val arr = new Array[LabeledPoint](3)
+    arr(0) = new LabeledPoint(0.0, Vectors.sparse(2, Seq((0, 0.0))))
+    arr(1) = new LabeledPoint(1.0, Vectors.sparse(2, Seq((1, 1.0))))
+    arr(2) = new LabeledPoint(0.0, Vectors.sparse(2, Seq((0, 1.0))))
+
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini,
+      maxDepth = 2, numClassesForClassification = 2, minInstancesPerNode = 2)
+
+    val model = DecisionTree.train(rdd, strategy)
+    assert(model.topNode.isLeaf)
+    assert(model.topNode.predict.predict == 0.0)
+    val predicts = rdd.map(p => model.predict(p.features)).collect()
+    predicts.foreach { predict =>
+      assert(predict == 0.0)
+    }
+
+    // test when no valid split can be found
+    val rootNode = model.topNode
+
+    val gain = rootNode.stats.get
+    assert(gain == InformationGainStats.invalidInformationGainStats)
+  }
+
+  test("do not choose split that does not satisfy min instance per node requirements") {
+    // if a split does not satisfy min instances per node requirements,
+    // this split is invalid, even though the information gain of split is large.
+    val arr = new Array[LabeledPoint](4)
+    arr(0) = new LabeledPoint(0.0, Vectors.dense(0.0, 1.0))
+    arr(1) = new LabeledPoint(1.0, Vectors.dense(1.0, 1.0))
+    arr(2) = new LabeledPoint(0.0, Vectors.dense(0.0, 0.0))
+    arr(3) = new LabeledPoint(0.0, Vectors.dense(0.0, 0.0))
+
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini,
+      maxBins = 2, maxDepth = 2, categoricalFeaturesInfo = Map(0 -> 2, 1-> 2),
+      numClassesForClassification = 2, minInstancesPerNode = 2)
+
+    val rootNode = DecisionTree.train(rdd, strategy).topNode
+
+    val split = rootNode.split.get
+    val gain = rootNode.stats.get
+    assert(split.feature == 1)
+    assert(gain != InformationGainStats.invalidInformationGainStats)
+  }
+
+  test("split must satisfy min info gain requirements") {
+    val arr = new Array[LabeledPoint](3)
+    arr(0) = new LabeledPoint(0.0, Vectors.sparse(2, Seq((0, 0.0))))
+    arr(1) = new LabeledPoint(1.0, Vectors.sparse(2, Seq((1, 1.0))))
+    arr(2) = new LabeledPoint(0.0, Vectors.sparse(2, Seq((0, 1.0))))
+
+    val input = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
+      numClassesForClassification = 2, minInfoGain = 1.0)
+
+    val model = DecisionTree.train(input, strategy)
+    assert(model.topNode.isLeaf)
+    assert(model.topNode.predict.predict == 0.0)
+    val predicts = input.map(p => model.predict(p.features)).collect()
+    predicts.foreach { predict =>
+      assert(predict == 0.0)
+    }
+
+    // test when no valid split can be found
+    val rootNode = model.topNode
+
+    val gain = rootNode.stats.get
+    assert(gain == InformationGainStats.invalidInformationGainStats)
+  }
+
+  test("Avoid aggregation on the last level") {
+    val arr = new Array[LabeledPoint](4)
+    arr(0) = new LabeledPoint(0.0, Vectors.dense(1.0, 0.0, 0.0))
+    arr(1) = new LabeledPoint(1.0, Vectors.dense(0.0, 1.0, 1.0))
+    arr(2) = new LabeledPoint(0.0, Vectors.dense(2.0, 0.0, 0.0))
+    arr(3) = new LabeledPoint(1.0, Vectors.dense(0.0, 2.0, 1.0))
+    val input = sc.parallelize(arr)
+
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 1,
+      numClassesForClassification = 2, categoricalFeaturesInfo = Map(0 -> 3))
+    val metadata = DecisionTreeMetadata.buildMetadata(input, strategy)
+    val (splits, bins) = DecisionTree.findSplitsBins(input, metadata)
+
+    val treeInput = TreePoint.convertToTreeRDD(input, bins, metadata)
+    val baggedInput = BaggedPoint.convertToBaggedRDD(treeInput, 1.0, 1, false)
+
+    val topNode = Node.emptyNode(nodeIndex = 1)
+    assert(topNode.predict.predict === Double.MinValue)
+    assert(topNode.impurity === -1.0)
+    assert(topNode.isLeaf === false)
+
+    val nodesForGroup = Map((0, Array(topNode)))
+    val treeToNodeToIndexInfo = Map((0, Map(
+      (topNode.id, new RandomForest.NodeIndexInfo(0, None))
+      )))
+    val nodeQueue = new mutable.Queue[(Int, Node)]()
+    DecisionTree.findBestSplits(baggedInput, metadata, Array(topNode),
+      nodesForGroup, treeToNodeToIndexInfo, splits, bins, nodeQueue)
+
+    // don't enqueue leaf nodes into node queue
+    assert(nodeQueue.isEmpty)
+
+    // set impurity and predict for topNode
+    assert(topNode.predict.predict !== Double.MinValue)
+    assert(topNode.impurity !== -1.0)
+
+    // set impurity and predict for child nodes
+    assert(topNode.leftNode.get.predict.predict === 0.0)
+    assert(topNode.rightNode.get.predict.predict === 1.0)
+    assert(topNode.leftNode.get.impurity === 0.0)
+    assert(topNode.rightNode.get.impurity === 0.0)
+  }
+
+  test("Avoid aggregation if impurity is 0.0") {
+    val arr = new Array[LabeledPoint](4)
+    arr(0) = new LabeledPoint(0.0, Vectors.dense(1.0, 0.0, 0.0))
+    arr(1) = new LabeledPoint(1.0, Vectors.dense(0.0, 1.0, 1.0))
+    arr(2) = new LabeledPoint(0.0, Vectors.dense(2.0, 0.0, 0.0))
+    arr(3) = new LabeledPoint(1.0, Vectors.dense(0.0, 2.0, 1.0))
+    val input = sc.parallelize(arr)
+
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 5,
+      numClassesForClassification = 2, categoricalFeaturesInfo = Map(0 -> 3))
+    val metadata = DecisionTreeMetadata.buildMetadata(input, strategy)
+    val (splits, bins) = DecisionTree.findSplitsBins(input, metadata)
+
+    val treeInput = TreePoint.convertToTreeRDD(input, bins, metadata)
+    val baggedInput = BaggedPoint.convertToBaggedRDD(treeInput, 1.0, 1, false)
+
+    val topNode = Node.emptyNode(nodeIndex = 1)
+    assert(topNode.predict.predict === Double.MinValue)
+    assert(topNode.impurity === -1.0)
+    assert(topNode.isLeaf === false)
+
+    val nodesForGroup = Map((0, Array(topNode)))
+    val treeToNodeToIndexInfo = Map((0, Map(
+      (topNode.id, new RandomForest.NodeIndexInfo(0, None))
+    )))
+    val nodeQueue = new mutable.Queue[(Int, Node)]()
+    DecisionTree.findBestSplits(baggedInput, metadata, Array(topNode),
+      nodesForGroup, treeToNodeToIndexInfo, splits, bins, nodeQueue)
+
+    // don't enqueue a node into node queue if its impurity is 0.0
+    assert(nodeQueue.isEmpty)
+
+    // set impurity and predict for topNode
+    assert(topNode.predict.predict !== Double.MinValue)
+    assert(topNode.impurity !== -1.0)
+
+    // set impurity and predict for child nodes
+    assert(topNode.leftNode.get.predict.predict === 0.0)
+    assert(topNode.rightNode.get.predict.predict === 1.0)
+    assert(topNode.leftNode.get.impurity === 0.0)
+    assert(topNode.rightNode.get.impurity === 0.0)
+  }
 }
 
 object DecisionTreeSuite {
 
+  def validateClassifier(
+      model: DecisionTreeModel,
+      input: Seq[LabeledPoint],
+      requiredAccuracy: Double) {
+    val predictions = input.map(x => model.predict(x.features))
+    val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
+      prediction != expected.label
+    }
+    val accuracy = (input.length - numOffPredictions).toDouble / input.length
+    assert(accuracy >= requiredAccuracy,
+      s"validateClassifier calculated accuracy $accuracy but required $requiredAccuracy.")
+  }
+
+  def validateRegressor(
+      model: DecisionTreeModel,
+      input: Seq[LabeledPoint],
+      requiredMSE: Double) {
+    val predictions = input.map(x => model.predict(x.features))
+    val squaredError = predictions.zip(input).map { case (prediction, expected) =>
+      val err = prediction - expected.label
+      err * err
+    }.sum
+    val mse = squaredError / input.length
+    assert(mse <= requiredMSE, s"validateRegressor calculated MSE $mse but required $requiredMSE.")
+  }
+
   def generateOrderedLabeledPointsWithLabel0(): Array[LabeledPoint] = {
     val arr = new Array[LabeledPoint](1000)
     for (i <- 0 until 1000) {
@@ -451,13 +908,16 @@ object DecisionTreeSuite {
   def generateOrderedLabeledPoints(): Array[LabeledPoint] = {
     val arr = new Array[LabeledPoint](1000)
     for (i <- 0 until 1000) {
-      if (i < 600) {
-        val lp = new LabeledPoint(0.0, Vectors.dense(i.toDouble, 1000.0 - i))
-        arr(i) = lp
+      val label = if (i < 100) {
+        0.0
+      } else if (i < 500) {
+        1.0
+      } else if (i < 900) {
+        0.0
       } else {
-        val lp = new LabeledPoint(1.0, Vectors.dense(i.toDouble, 1000.0 - i))
-        arr(i) = lp
+        1.0
       }
+      arr(i) = new LabeledPoint(label, Vectors.dense(i.toDouble, 1000.0 - i))
     }
     arr
   }
@@ -473,4 +933,50 @@ object DecisionTreeSuite {
     }
     arr
   }
+
+  def generateCategoricalDataPointsAsJavaList(): java.util.List[LabeledPoint] = {
+    generateCategoricalDataPoints().toList.asJava
+  }
+
+  def generateCategoricalDataPointsForMulticlass(): Array[LabeledPoint] = {
+    val arr = new Array[LabeledPoint](3000)
+    for (i <- 0 until 3000) {
+      if (i < 1000) {
+        arr(i) = new LabeledPoint(2.0, Vectors.dense(2.0, 2.0))
+      } else if (i < 2000) {
+        arr(i) = new LabeledPoint(1.0, Vectors.dense(1.0, 2.0))
+      } else {
+        arr(i) = new LabeledPoint(2.0, Vectors.dense(2.0, 2.0))
+      }
+    }
+    arr
+  }
+
+  def generateContinuousDataPointsForMulticlass(): Array[LabeledPoint] = {
+    val arr = new Array[LabeledPoint](3000)
+    for (i <- 0 until 3000) {
+      if (i < 2000) {
+        arr(i) = new LabeledPoint(2.0, Vectors.dense(2.0, i))
+      } else {
+        arr(i) = new LabeledPoint(1.0, Vectors.dense(2.0, i))
+      }
+    }
+    arr
+  }
+
+  def generateCategoricalDataPointsForMulticlassForOrderedFeatures():
+    Array[LabeledPoint] = {
+    val arr = new Array[LabeledPoint](3000)
+    for (i <- 0 until 3000) {
+      if (i < 1000) {
+        arr(i) = new LabeledPoint(2.0, Vectors.dense(2.0, 2.0))
+      } else if (i < 2000) {
+        arr(i) = new LabeledPoint(1.0, Vectors.dense(1.0, 2.0))
+      } else {
+        arr(i) = new LabeledPoint(1.0, Vectors.dense(2.0, 2.0))
+      }
+    }
+    arr
+  }
+
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
new file mode 100644
index 0000000000000..effb7b8259ffb
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.model.WeightedEnsembleModel
+import org.apache.spark.util.StatCounter
+
+import scala.collection.mutable
+
+object EnsembleTestHelper {
+
+  /**
+   * Aggregates all values in data, and tests whether the empirical mean and stddev are within
+   * epsilon of the expected values.
+   * @param data  Every element of the data should be an i.i.d. sample from some distribution.
+   */
+  def testRandomArrays(
+      data: Array[Array[Double]],
+      numCols: Int,
+      expectedMean: Double,
+      expectedStddev: Double,
+      epsilon: Double) {
+    val values = new mutable.ArrayBuffer[Double]()
+    data.foreach { row =>
+      assert(row.size == numCols)
+      values ++= row
+    }
+    val stats = new StatCounter(values)
+    assert(math.abs(stats.mean - expectedMean) < epsilon)
+    assert(math.abs(stats.stdev - expectedStddev) < epsilon)
+  }
+
+  def validateClassifier(
+      model: WeightedEnsembleModel,
+      input: Seq[LabeledPoint],
+      requiredAccuracy: Double) {
+    val predictions = input.map(x => model.predict(x.features))
+    val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
+      prediction != expected.label
+    }
+    val accuracy = (input.length - numOffPredictions).toDouble / input.length
+    assert(accuracy >= requiredAccuracy,
+      s"validateClassifier calculated accuracy $accuracy but required $requiredAccuracy.")
+  }
+
+  def validateRegressor(
+      model: WeightedEnsembleModel,
+      input: Seq[LabeledPoint],
+      requiredMSE: Double) {
+    val predictions = input.map(x => model.predict(x.features))
+    val squaredError = predictions.zip(input).map { case (prediction, expected) =>
+      val err = prediction - expected.label
+      err * err
+    }.sum
+    val mse = squaredError / input.length
+    assert(mse <= requiredMSE, s"validateRegressor calculated MSE $mse but required $requiredMSE.")
+  }
+
+  def generateOrderedLabeledPoints(numFeatures: Int, numInstances: Int): Array[LabeledPoint] = {
+    val arr = new Array[LabeledPoint](numInstances)
+    for (i <- 0 until numInstances) {
+      val label = if (i < numInstances / 10) {
+        0.0
+      } else if (i < numInstances / 2) {
+        1.0
+      } else if (i < numInstances * 0.9) {
+        0.0
+      } else {
+        1.0
+      }
+      val features = Array.fill[Double](numFeatures)(i.toDouble)
+      arr(i) = new LabeledPoint(label, Vectors.dense(features))
+    }
+    arr
+  }
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
new file mode 100644
index 0000000000000..84de40103d8aa
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.configuration.{BoostingStrategy, Strategy}
+import org.apache.spark.mllib.tree.impurity.Variance
+import org.apache.spark.mllib.tree.loss.{SquaredError, LogLoss}
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+/**
+ * Test suite for [[GradientBoosting]].
+ */
+class GradientBoostingSuite extends FunSuite with MLlibTestSparkContext {
+
+  test("Regression with continuous features: SquaredError") {
+    GradientBoostingSuite.testCombinations.foreach {
+      case (numIterations, learningRate, subsamplingRate) =>
+        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
+        val rdd = sc.parallelize(arr)
+        val categoricalFeaturesInfo = Map.empty[Int, Int]
+
+        val remappedInput = rdd.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
+        val treeStrategy = new Strategy(algo = Regression, impurity = Variance, maxDepth = 2,
+          numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo,
+          subsamplingRate = subsamplingRate)
+
+        val dt = DecisionTree.train(remappedInput, treeStrategy)
+
+        val boostingStrategy = new BoostingStrategy(Regression, numIterations, SquaredError,
+          learningRate, 1, treeStrategy)
+
+        val gbt = GradientBoosting.trainRegressor(rdd, boostingStrategy)
+        assert(gbt.weakHypotheses.size === numIterations)
+        val gbtTree = gbt.weakHypotheses(0)
+
+        EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
+
+        // Make sure trees are the same.
+        assert(gbtTree.toString == dt.toString)
+    }
+  }
+
+  test("Regression with continuous features: Absolute Error") {
+    GradientBoostingSuite.testCombinations.foreach {
+      case (numIterations, learningRate, subsamplingRate) =>
+        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
+        val rdd = sc.parallelize(arr)
+        val categoricalFeaturesInfo = Map.empty[Int, Int]
+
+        val remappedInput = rdd.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
+        val treeStrategy = new Strategy(algo = Regression, impurity = Variance, maxDepth = 2,
+          numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo,
+          subsamplingRate = subsamplingRate)
+
+        val dt = DecisionTree.train(remappedInput, treeStrategy)
+
+        val boostingStrategy = new BoostingStrategy(Regression, numIterations, SquaredError,
+          learningRate, numClassesForClassification = 2, treeStrategy)
+
+        val gbt = GradientBoosting.trainRegressor(rdd, boostingStrategy)
+        assert(gbt.weakHypotheses.size === numIterations)
+        val gbtTree = gbt.weakHypotheses(0)
+
+        EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
+
+        // Make sure trees are the same.
+        assert(gbtTree.toString == dt.toString)
+    }
+  }
+
+  test("Binary classification with continuous features: Log Loss") {
+    GradientBoostingSuite.testCombinations.foreach {
+      case (numIterations, learningRate, subsamplingRate) =>
+        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
+        val rdd = sc.parallelize(arr)
+        val categoricalFeaturesInfo = Map.empty[Int, Int]
+
+        val remappedInput = rdd.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
+        val treeStrategy = new Strategy(algo = Regression, impurity = Variance, maxDepth = 2,
+          numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo,
+          subsamplingRate = subsamplingRate)
+
+        val dt = DecisionTree.train(remappedInput, treeStrategy)
+
+        val boostingStrategy = new BoostingStrategy(Classification, numIterations, LogLoss,
+          learningRate, numClassesForClassification = 2, treeStrategy)
+
+        val gbt = GradientBoosting.trainClassifier(rdd, boostingStrategy)
+        assert(gbt.weakHypotheses.size === numIterations)
+        val gbtTree = gbt.weakHypotheses(0)
+
+        EnsembleTestHelper.validateClassifier(gbt, arr, 0.9)
+
+        // Make sure trees are the same.
+        assert(gbtTree.toString == dt.toString)
+    }
+  }
+
+}
+
+object GradientBoostingSuite {
+
+  // Combinations for estimators, learning rates and subsamplingRate
+  val testCombinations = Array((10, 1.0, 1.0), (10, 0.1, 1.0), (10, 1.0, 0.75), (10, 0.1, 0.75))
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
new file mode 100644
index 0000000000000..2734e089d62e6
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree
+
+import scala.collection.mutable
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.configuration.Strategy
+import org.apache.spark.mllib.tree.impl.DecisionTreeMetadata
+import org.apache.spark.mllib.tree.impurity.{Gini, Variance}
+import org.apache.spark.mllib.tree.model.Node
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+/**
+ * Test suite for [[RandomForest]].
+ */
+class RandomForestSuite extends FunSuite with MLlibTestSparkContext {
+  def binaryClassificationTestWithContinuousFeatures(strategy: Strategy) {
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
+    val rdd = sc.parallelize(arr)
+    val numTrees = 1
+
+    val rf = RandomForest.trainClassifier(rdd, strategy, numTrees = numTrees,
+      featureSubsetStrategy = "auto", seed = 123)
+    assert(rf.weakHypotheses.size === 1)
+    val rfTree = rf.weakHypotheses(0)
+
+    val dt = DecisionTree.train(rdd, strategy)
+
+    EnsembleTestHelper.validateClassifier(rf, arr, 0.9)
+    DecisionTreeSuite.validateClassifier(dt, arr, 0.9)
+
+    // Make sure trees are the same.
+    assert(rfTree.toString == dt.toString)
+  }
+
+  test("Binary classification with continuous features:" +
+    " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
+    val categoricalFeaturesInfo = Map.empty[Int, Int]
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
+      numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo)
+    binaryClassificationTestWithContinuousFeatures(strategy)
+  }
+
+  test("Binary classification with continuous features and node Id cache :" +
+    " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
+    val categoricalFeaturesInfo = Map.empty[Int, Int]
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
+      numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo, useNodeIdCache = true)
+    binaryClassificationTestWithContinuousFeatures(strategy)
+  }
+
+  def regressionTestWithContinuousFeatures(strategy: Strategy) {
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
+    val rdd = sc.parallelize(arr)
+    val numTrees = 1
+
+    val rf = RandomForest.trainRegressor(rdd, strategy, numTrees = numTrees,
+      featureSubsetStrategy = "auto", seed = 123)
+    assert(rf.weakHypotheses.size === 1)
+    val rfTree = rf.weakHypotheses(0)
+
+    val dt = DecisionTree.train(rdd, strategy)
+
+    EnsembleTestHelper.validateRegressor(rf, arr, 0.01)
+    DecisionTreeSuite.validateRegressor(dt, arr, 0.01)
+
+    // Make sure trees are the same.
+    assert(rfTree.toString == dt.toString)
+  }
+
+  test("Regression with continuous features:" +
+    " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
+    val categoricalFeaturesInfo = Map.empty[Int, Int]
+    val strategy = new Strategy(algo = Regression, impurity = Variance,
+      maxDepth = 2, maxBins = 10, numClassesForClassification = 2,
+      categoricalFeaturesInfo = categoricalFeaturesInfo)
+    regressionTestWithContinuousFeatures(strategy)
+  }
+
+  test("Regression with continuous features and node Id cache :" +
+    " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
+    val categoricalFeaturesInfo = Map.empty[Int, Int]
+    val strategy = new Strategy(algo = Regression, impurity = Variance,
+      maxDepth = 2, maxBins = 10, numClassesForClassification = 2,
+      categoricalFeaturesInfo = categoricalFeaturesInfo, useNodeIdCache = true)
+    regressionTestWithContinuousFeatures(strategy)
+  }
+
+  def binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(strategy: Strategy) {
+    val numFeatures = 50
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures, 1000)
+    val rdd = sc.parallelize(arr)
+
+    // Select feature subset for top nodes.  Return true if OK.
+    def checkFeatureSubsetStrategy(
+        numTrees: Int,
+        featureSubsetStrategy: String,
+        numFeaturesPerNode: Int): Unit = {
+      val seeds = Array(123, 5354, 230, 349867, 23987)
+      val maxMemoryUsage: Long = 128 * 1024L * 1024L
+      val metadata =
+        DecisionTreeMetadata.buildMetadata(rdd, strategy, numTrees, featureSubsetStrategy)
+      seeds.foreach { seed =>
+        val failString = s"Failed on test with:" +
+          s"numTrees=$numTrees, featureSubsetStrategy=$featureSubsetStrategy," +
+          s" numFeaturesPerNode=$numFeaturesPerNode, seed=$seed"
+        val nodeQueue = new mutable.Queue[(Int, Node)]()
+        val topNodes: Array[Node] = new Array[Node](numTrees)
+        Range(0, numTrees).foreach { treeIndex =>
+          topNodes(treeIndex) = Node.emptyNode(nodeIndex = 1)
+          nodeQueue.enqueue((treeIndex, topNodes(treeIndex)))
+        }
+        val rng = new scala.util.Random(seed = seed)
+        val (nodesForGroup: Map[Int, Array[Node]],
+            treeToNodeToIndexInfo: Map[Int, Map[Int, RandomForest.NodeIndexInfo]]) =
+          RandomForest.selectNodesToSplit(nodeQueue, maxMemoryUsage, metadata, rng)
+
+        assert(nodesForGroup.size === numTrees, failString)
+        assert(nodesForGroup.values.forall(_.size == 1), failString) // 1 node per tree
+
+        if (numFeaturesPerNode == numFeatures) {
+          // featureSubset values should all be None
+          assert(treeToNodeToIndexInfo.values.forall(_.values.forall(_.featureSubset.isEmpty)),
+            failString)
+        } else {
+          // Check number of features.
+          assert(treeToNodeToIndexInfo.values.forall(_.values.forall(
+            _.featureSubset.get.size === numFeaturesPerNode)), failString)
+        }
+      }
+    }
+
+    checkFeatureSubsetStrategy(numTrees = 1, "auto", numFeatures)
+    checkFeatureSubsetStrategy(numTrees = 1, "all", numFeatures)
+    checkFeatureSubsetStrategy(numTrees = 1, "sqrt", math.sqrt(numFeatures).ceil.toInt)
+    checkFeatureSubsetStrategy(numTrees = 1, "log2",
+      (math.log(numFeatures) / math.log(2)).ceil.toInt)
+    checkFeatureSubsetStrategy(numTrees = 1, "onethird", (numFeatures / 3.0).ceil.toInt)
+
+    checkFeatureSubsetStrategy(numTrees = 2, "all", numFeatures)
+    checkFeatureSubsetStrategy(numTrees = 2, "auto", math.sqrt(numFeatures).ceil.toInt)
+    checkFeatureSubsetStrategy(numTrees = 2, "sqrt", math.sqrt(numFeatures).ceil.toInt)
+    checkFeatureSubsetStrategy(numTrees = 2, "log2",
+      (math.log(numFeatures) / math.log(2)).ceil.toInt)
+    checkFeatureSubsetStrategy(numTrees = 2, "onethird", (numFeatures / 3.0).ceil.toInt)
+  }
+
+  test("Binary classification with continuous features: subsampling features") {
+    val categoricalFeaturesInfo = Map.empty[Int, Int]
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
+      numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo)
+    binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(strategy)
+  }
+
+  test("Binary classification with continuous features and node Id cache: subsampling features") {
+    val categoricalFeaturesInfo = Map.empty[Int, Int]
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
+      numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo, useNodeIdCache = true)
+    binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(strategy)
+  }
+
+  test("alternating categorical and continuous features with multiclass labels to test indexing") {
+    val arr = new Array[LabeledPoint](4)
+    arr(0) = new LabeledPoint(0.0, Vectors.dense(1.0, 0.0, 0.0, 3.0, 1.0))
+    arr(1) = new LabeledPoint(1.0, Vectors.dense(0.0, 1.0, 1.0, 1.0, 2.0))
+    arr(2) = new LabeledPoint(0.0, Vectors.dense(2.0, 0.0, 0.0, 6.0, 3.0))
+    arr(3) = new LabeledPoint(2.0, Vectors.dense(0.0, 2.0, 1.0, 3.0, 2.0))
+    val categoricalFeaturesInfo = Map(0 -> 3, 2 -> 2, 4 -> 4)
+    val input = sc.parallelize(arr)
+
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 5,
+      numClassesForClassification = 3, categoricalFeaturesInfo = categoricalFeaturesInfo)
+    val model = RandomForest.trainClassifier(input, strategy, numTrees = 2,
+      featureSubsetStrategy = "sqrt", seed = 12345)
+    EnsembleTestHelper.validateClassifier(model, arr, 1.0)
+  }
+}
+
+
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/impl/BaggedPointSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/impl/BaggedPointSuite.scala
new file mode 100644
index 0000000000000..b184e936672ca
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/impl/BaggedPointSuite.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.impl
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.tree.EnsembleTestHelper
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+/**
+ * Test suite for [[BaggedPoint]].
+ */
+class BaggedPointSuite extends FunSuite with MLlibTestSparkContext  {
+
+  test("BaggedPoint RDD: without subsampling") {
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val rdd = sc.parallelize(arr)
+    val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, 1, false, 42)
+    baggedRDD.collect().foreach { baggedPoint =>
+      assert(baggedPoint.subsampleWeights.size == 1 && baggedPoint.subsampleWeights(0) == 1)
+    }
+  }
+
+  test("BaggedPoint RDD: with subsampling with replacement (fraction = 1.0)") {
+    val numSubsamples = 100
+    val (expectedMean, expectedStddev) = (1.0, 1.0)
+
+    val seeds = Array(123, 5354, 230, 349867, 23987)
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val rdd = sc.parallelize(arr)
+    seeds.foreach { seed =>
+      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, true, seed)
+      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
+        expectedStddev, epsilon = 0.01)
+    }
+  }
+
+  test("BaggedPoint RDD: with subsampling with replacement (fraction = 0.5)") {
+    val numSubsamples = 100
+    val subsample = 0.5
+    val (expectedMean, expectedStddev) = (subsample, math.sqrt(subsample))
+
+    val seeds = Array(123, 5354, 230, 349867, 23987)
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val rdd = sc.parallelize(arr)
+    seeds.foreach { seed =>
+      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, true, seed)
+      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
+        expectedStddev, epsilon = 0.01)
+    }
+  }
+
+  test("BaggedPoint RDD: with subsampling without replacement (fraction = 1.0)") {
+    val numSubsamples = 100
+    val (expectedMean, expectedStddev) = (1.0, 0)
+
+    val seeds = Array(123, 5354, 230, 349867, 23987)
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val rdd = sc.parallelize(arr)
+    seeds.foreach { seed =>
+      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, false, seed)
+      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
+        expectedStddev, epsilon = 0.01)
+    }
+  }
+
+  test("BaggedPoint RDD: with subsampling without replacement (fraction = 0.5)") {
+    val numSubsamples = 100
+    val subsample = 0.5
+    val (expectedMean, expectedStddev) = (subsample, math.sqrt(subsample * (1 - subsample)))
+
+    val seeds = Array(123, 5354, 230, 349867, 23987)
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val rdd = sc.parallelize(arr)
+    seeds.foreach { seed =>
+      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, false, seed)
+      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
+        expectedStddev, epsilon = 0.01)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/LabelParsersSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/LabelParsersSuite.scala
deleted file mode 100644
index ac85677f2f014..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/LabelParsersSuite.scala
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.util
-
-import org.scalatest.FunSuite
-
-class LabelParsersSuite extends FunSuite {
-  test("binary label parser") {
-    for (parser <- Seq(BinaryLabelParser, BinaryLabelParser.getInstance())) {
-      assert(parser.parse("+1") === 1.0)
-      assert(parser.parse("1") === 1.0)
-      assert(parser.parse("0") === 0.0)
-      assert(parser.parse("-1") === 0.0)
-    }
-  }
-
-  test("multiclass label parser") {
-    for (parser <- Seq(MulticlassLabelParser, MulticlassLabelParser.getInstance())) {
-      assert(parser.parse("0") == 0.0)
-      assert(parser.parse("+1") === 1.0)
-      assert(parser.parse("1") === 1.0)
-      assert(parser.parse("2") === 2.0)
-      assert(parser.parse("3") === 3.0)
-    }
-  }
-}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
new file mode 100644
index 0000000000000..5e9101cdd3804
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.util
+
+import org.scalatest.{Suite, BeforeAndAfterAll}
+
+import org.apache.spark.{SparkConf, SparkContext}
+
+trait LocalClusterSparkContext extends BeforeAndAfterAll { self: Suite =>
+  @transient var sc: SparkContext = _
+
+  override def beforeAll() {
+    val conf = new SparkConf()
+      .setMaster("local-cluster[2, 1, 512]")
+      .setAppName("test-cluster")
+      .set("spark.akka.frameSize", "1") // set to 1MB to detect direct serialization of data
+    sc = new SparkContext(conf)
+    super.beforeAll()
+  }
+
+  override def afterAll() {
+    if (sc != null) {
+      sc.stop()
+    }
+    super.afterAll()
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalSparkContext.scala
deleted file mode 100644
index 0d4868f3d9e42..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalSparkContext.scala
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.util
-
-import org.scalatest.Suite
-import org.scalatest.BeforeAndAfterAll
-
-import org.apache.spark.SparkContext
-
-trait LocalSparkContext extends BeforeAndAfterAll { self: Suite =>
-  @transient var sc: SparkContext = _
-
-  override def beforeAll() {
-    sc = new SparkContext("local", "test")
-    super.beforeAll()
-  }
-
-  override def afterAll() {
-    if (sc != null) {
-      sc.stop()
-    }
-    super.afterAll()
-  }
-}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
index c14870fb969a8..88bc49cc61f94 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.MLUtils._
 import org.apache.spark.util.Utils
 
-class MLUtilsSuite extends FunSuite with LocalSparkContext {
+class MLUtilsSuite extends FunSuite with MLlibTestSparkContext {
 
   test("epsilon computation") {
     assert(1.0 + EPSILON > 1.0, s"EPSILON is too small: $EPSILON.")
@@ -63,17 +63,16 @@ class MLUtilsSuite extends FunSuite with LocalSparkContext {
   test("loadLibSVMFile") {
     val lines =
       """
-        |+1 1:1.0 3:2.0 5:3.0
-        |-1
-        |-1 2:4.0 4:5.0 6:6.0
+        |1 1:1.0 3:2.0 5:3.0
+        |0
+        |0 2:4.0 4:5.0 6:6.0
       """.stripMargin
-    val tempDir = Files.createTempDir()
-    tempDir.deleteOnExit()
+    val tempDir = Utils.createTempDir()
     val file = new File(tempDir.getPath, "part-00000")
     Files.write(lines, file, Charsets.US_ASCII)
     val path = tempDir.toURI.toString
 
-    val pointsWithNumFeatures = loadLibSVMFile(sc, path, multiclass = false, 6).collect()
+    val pointsWithNumFeatures = loadLibSVMFile(sc, path, 6).collect()
     val pointsWithoutNumFeatures = loadLibSVMFile(sc, path).collect()
 
     for (points <- Seq(pointsWithNumFeatures, pointsWithoutNumFeatures)) {
@@ -86,11 +85,11 @@ class MLUtilsSuite extends FunSuite with LocalSparkContext {
       assert(points(2).features === Vectors.sparse(6, Seq((1, 4.0), (3, 5.0), (5, 6.0))))
     }
 
-    val multiclassPoints = loadLibSVMFile(sc, path, multiclass = true).collect()
+    val multiclassPoints = loadLibSVMFile(sc, path).collect()
     assert(multiclassPoints.length === 3)
     assert(multiclassPoints(0).label === 1.0)
-    assert(multiclassPoints(1).label === -1.0)
-    assert(multiclassPoints(2).label === -1.0)
+    assert(multiclassPoints(1).label === 0.0)
+    assert(multiclassPoints(2).label === 0.0)
 
     Utils.deleteRecursively(tempDir)
   }
@@ -100,7 +99,7 @@ class MLUtilsSuite extends FunSuite with LocalSparkContext {
       LabeledPoint(1.1, Vectors.sparse(3, Seq((0, 1.23), (2, 4.56)))),
       LabeledPoint(0.0, Vectors.dense(1.01, 2.02, 3.03))
     ), 2)
-    val tempDir = Files.createTempDir()
+    val tempDir = Utils.createTempDir()
     val outputDir = new File(tempDir, "output")
     MLUtils.saveAsLibSVMFile(examples, outputDir.toURI.toString)
     val lines = outputDir.listFiles()
@@ -166,7 +165,7 @@ class MLUtilsSuite extends FunSuite with LocalSparkContext {
       Vectors.sparse(2, Array(1), Array(-1.0)),
       Vectors.dense(0.0, 1.0)
     ), 2)
-    val tempDir = Files.createTempDir()
+    val tempDir = Utils.createTempDir()
     val outputDir = new File(tempDir, "vectors")
     val path = outputDir.toURI.toString
     vectors.saveAsTextFile(path)
@@ -181,7 +180,7 @@ class MLUtilsSuite extends FunSuite with LocalSparkContext {
       LabeledPoint(0.0, Vectors.sparse(2, Array(1), Array(-1.0))),
       LabeledPoint(1.0, Vectors.dense(0.0, 1.0))
     ), 2)
-    val tempDir = Files.createTempDir()
+    val tempDir = Utils.createTempDir()
     val outputDir = new File(tempDir, "points")
     val path = outputDir.toURI.toString
     points.saveAsTextFile(path)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
new file mode 100644
index 0000000000000..b658889476d37
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.util
+
+import org.scalatest.Suite
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.{SparkConf, SparkContext}
+
+trait MLlibTestSparkContext extends BeforeAndAfterAll { self: Suite =>
+  @transient var sc: SparkContext = _
+
+  override def beforeAll() {
+    super.beforeAll()
+    val conf = new SparkConf()
+      .setMaster("local[2]")
+      .setAppName("MLlibUnitTest")
+    sc = new SparkContext(conf)
+  }
+
+  override def afterAll() {
+    if (sc != null) {
+      sc.stop()
+    }
+    super.afterAll()
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
new file mode 100644
index 0000000000000..30b906aaa3ba4
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.util
+
+import org.apache.spark.mllib.linalg.{Matrix, Vector}
+import org.scalatest.exceptions.TestFailedException
+
+object TestingUtils {
+
+  val ABS_TOL_MSG = " using absolute tolerance"
+  val REL_TOL_MSG = " using relative tolerance"
+
+  /**
+   * Private helper function for comparing two values using relative tolerance.
+   * Note that if x or y is extremely close to zero, i.e., smaller than Double.MinPositiveValue,
+   * the relative tolerance is meaningless, so the exception will be raised to warn users.
+   */
+  private def RelativeErrorComparison(x: Double, y: Double, eps: Double): Boolean = {
+    val absX = math.abs(x)
+    val absY = math.abs(y)
+    val diff = math.abs(x - y)
+    if (x == y) {
+      true
+    } else if (absX < Double.MinPositiveValue || absY < Double.MinPositiveValue) {
+      throw new TestFailedException(
+        s"$x or $y is extremely close to zero, so the relative tolerance is meaningless.", 0)
+    } else {
+      diff < eps * math.min(absX, absY)
+    }
+  }
+
+  /**
+   * Private helper function for comparing two values using absolute tolerance.
+   */
+  private def AbsoluteErrorComparison(x: Double, y: Double, eps: Double): Boolean = {
+    math.abs(x - y) < eps
+  }
+
+  case class CompareDoubleRightSide(
+    fun: (Double, Double, Double) => Boolean, y: Double, eps: Double, method: String)
+
+  /**
+   * Implicit class for comparing two double values using relative tolerance or absolute tolerance.
+   */
+  implicit class DoubleWithAlmostEquals(val x: Double) {
+
+    /**
+     * When the difference of two values are within eps, returns true; otherwise, returns false.
+     */
+    def ~=(r: CompareDoubleRightSide): Boolean = r.fun(x, r.y, r.eps)
+
+    /**
+     * When the difference of two values are within eps, returns false; otherwise, returns true.
+     */
+    def !~=(r: CompareDoubleRightSide): Boolean = !r.fun(x, r.y, r.eps)
+
+    /**
+     * Throws exception when the difference of two values are NOT within eps;
+     * otherwise, returns true.
+     */
+    def ~==(r: CompareDoubleRightSide): Boolean = {
+      if (!r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Expected $x and ${r.y} to be within ${r.eps}${r.method}.", 0)
+      }
+      true
+    }
+
+    /**
+     * Throws exception when the difference of two values are within eps; otherwise, returns true.
+     */
+    def !~==(r: CompareDoubleRightSide): Boolean = {
+      if (r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method}.", 0)
+      }
+      true
+    }
+
+    /**
+     * Comparison using absolute tolerance.
+     */
+    def absTol(eps: Double): CompareDoubleRightSide = CompareDoubleRightSide(AbsoluteErrorComparison,
+      x, eps, ABS_TOL_MSG)
+
+    /**
+     * Comparison using relative tolerance.
+     */
+    def relTol(eps: Double): CompareDoubleRightSide = CompareDoubleRightSide(RelativeErrorComparison,
+      x, eps, REL_TOL_MSG)
+
+    override def toString = x.toString
+  }
+
+  case class CompareVectorRightSide(
+    fun: (Vector, Vector, Double) => Boolean, y: Vector, eps: Double, method: String)
+
+  /**
+   * Implicit class for comparing two vectors using relative tolerance or absolute tolerance.
+   */
+  implicit class VectorWithAlmostEquals(val x: Vector) {
+
+    /**
+     * When the difference of two vectors are within eps, returns true; otherwise, returns false.
+     */
+    def ~=(r: CompareVectorRightSide): Boolean = r.fun(x, r.y, r.eps)
+
+    /**
+     * When the difference of two vectors are within eps, returns false; otherwise, returns true.
+     */
+    def !~=(r: CompareVectorRightSide): Boolean = !r.fun(x, r.y, r.eps)
+
+    /**
+     * Throws exception when the difference of two vectors are NOT within eps;
+     * otherwise, returns true.
+     */
+    def ~==(r: CompareVectorRightSide): Boolean = {
+      if (!r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Expected $x and ${r.y} to be within ${r.eps}${r.method} for all elements.", 0)
+      }
+      true
+    }
+
+    /**
+     * Throws exception when the difference of two vectors are within eps; otherwise, returns true.
+     */
+    def !~==(r: CompareVectorRightSide): Boolean = {
+      if (r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method} for all elements.", 0)
+      }
+      true
+    }
+
+    /**
+     * Comparison using absolute tolerance.
+     */
+    def absTol(eps: Double): CompareVectorRightSide = CompareVectorRightSide(
+      (x: Vector, y: Vector, eps: Double) => {
+        x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 absTol eps)
+      }, x, eps, ABS_TOL_MSG)
+
+    /**
+     * Comparison using relative tolerance. Note that comparing against sparse vector
+     * with elements having value of zero will raise exception because it involves with
+     * comparing against zero.
+     */
+    def relTol(eps: Double): CompareVectorRightSide = CompareVectorRightSide(
+      (x: Vector, y: Vector, eps: Double) => {
+        x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
+      }, x, eps, REL_TOL_MSG)
+
+    override def toString = x.toString
+  }
+
+  case class CompareMatrixRightSide(
+     fun: (Matrix, Matrix, Double) => Boolean, y: Matrix, eps: Double, method: String)
+
+  /**
+   * Implicit class for comparing two matrices using relative tolerance or absolute tolerance.
+   */
+  implicit class MatrixWithAlmostEquals(val x: Matrix) {
+
+    /**
+     * When the difference of two vectors are within eps, returns true; otherwise, returns false.
+     */
+    def ~=(r: CompareMatrixRightSide): Boolean = r.fun(x, r.y, r.eps)
+
+    /**
+     * When the difference of two vectors are within eps, returns false; otherwise, returns true.
+     */
+    def !~=(r: CompareMatrixRightSide): Boolean = !r.fun(x, r.y, r.eps)
+
+    /**
+     * Throws exception when the difference of two vectors are NOT within eps;
+     * otherwise, returns true.
+     */
+    def ~==(r: CompareMatrixRightSide): Boolean = {
+      if (!r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Expected \n$x\n and \n${r.y}\n to be within ${r.eps}${r.method} for all elements.", 0)
+      }
+      true
+    }
+
+    /**
+     * Throws exception when the difference of two matrices are within eps; otherwise, returns true.
+     */
+    def !~==(r: CompareMatrixRightSide): Boolean = {
+      if (r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Did not expect \n$x\n and \n${r.y}\n to be within " +
+            "${r.eps}${r.method} for all elements.", 0)
+      }
+      true
+    }
+
+    /**
+     * Comparison using absolute tolerance.
+     */
+    def absTol(eps: Double): CompareMatrixRightSide = CompareMatrixRightSide(
+      (x: Matrix, y: Matrix, eps: Double) => {
+        x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 absTol eps)
+      }, x, eps, ABS_TOL_MSG)
+
+    /**
+     * Comparison using relative tolerance. Note that comparing against sparse vector
+     * with elements having value of zero will raise exception because it involves with
+     * comparing against zero.
+     */
+    def relTol(eps: Double): CompareMatrixRightSide = CompareMatrixRightSide(
+      (x: Matrix, y: Matrix, eps: Double) => {
+        x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
+      }, x, eps, REL_TOL_MSG)
+
+    override def toString = x.toString
+  }
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala
new file mode 100644
index 0000000000000..b0ecb33c28483
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.util
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.scalatest.FunSuite
+import org.apache.spark.mllib.util.TestingUtils._
+import org.scalatest.exceptions.TestFailedException
+
+class TestingUtilsSuite extends FunSuite {
+
+  test("Comparing doubles using relative error.") {
+
+    assert(23.1 ~== 23.52 relTol 0.02)
+    assert(23.1 ~== 22.74 relTol 0.02)
+    assert(23.1 ~= 23.52 relTol 0.02)
+    assert(23.1 ~= 22.74 relTol 0.02)
+    assert(!(23.1 !~= 23.52 relTol 0.02))
+    assert(!(23.1 !~= 22.74 relTol 0.02))
+
+    // Should throw exception with message when test fails.
+    intercept[TestFailedException](23.1 !~== 23.52 relTol 0.02)
+    intercept[TestFailedException](23.1 !~== 22.74 relTol 0.02)
+    intercept[TestFailedException](23.1 ~== 23.63 relTol 0.02)
+    intercept[TestFailedException](23.1 ~== 22.34 relTol 0.02)
+
+    assert(23.1 !~== 23.63 relTol 0.02)
+    assert(23.1 !~== 22.34 relTol 0.02)
+    assert(23.1 !~= 23.63 relTol 0.02)
+    assert(23.1 !~= 22.34 relTol 0.02)
+    assert(!(23.1 ~= 23.63 relTol 0.02))
+    assert(!(23.1 ~= 22.34 relTol 0.02))
+
+    // Comparing against zero should fail the test and throw exception with message
+    // saying that the relative error is meaningless in this situation.
+    intercept[TestFailedException](0.1 ~== 0.0 relTol 0.032)
+    intercept[TestFailedException](0.1 ~= 0.0 relTol 0.032)
+    intercept[TestFailedException](0.1 !~== 0.0 relTol 0.032)
+    intercept[TestFailedException](0.1 !~= 0.0 relTol 0.032)
+    intercept[TestFailedException](0.0 ~== 0.1 relTol 0.032)
+    intercept[TestFailedException](0.0 ~= 0.1 relTol 0.032)
+    intercept[TestFailedException](0.0 !~== 0.1 relTol 0.032)
+    intercept[TestFailedException](0.0 !~= 0.1 relTol 0.032)
+
+    // Comparisons of numbers very close to zero.
+    assert(10 * Double.MinPositiveValue ~== 9.5 * Double.MinPositiveValue relTol 0.01)
+    assert(10 * Double.MinPositiveValue !~== 11 * Double.MinPositiveValue relTol 0.01)
+
+    assert(-Double.MinPositiveValue ~== 1.18 * -Double.MinPositiveValue relTol 0.012)
+    assert(-Double.MinPositiveValue ~== 1.38 * -Double.MinPositiveValue relTol 0.012)
+  }
+
+  test("Comparing doubles using absolute error.") {
+
+    assert(17.8 ~== 17.99 absTol 0.2)
+    assert(17.8 ~== 17.61 absTol 0.2)
+    assert(17.8 ~= 17.99 absTol 0.2)
+    assert(17.8 ~= 17.61 absTol 0.2)
+    assert(!(17.8 !~= 17.99 absTol 0.2))
+    assert(!(17.8 !~= 17.61 absTol 0.2))
+
+    // Should throw exception with message when test fails.
+    intercept[TestFailedException](17.8 !~== 17.99 absTol 0.2)
+    intercept[TestFailedException](17.8 !~== 17.61 absTol 0.2)
+    intercept[TestFailedException](17.8 ~== 18.01 absTol 0.2)
+    intercept[TestFailedException](17.8 ~== 17.59 absTol 0.2)
+
+    assert(17.8 !~== 18.01 absTol 0.2)
+    assert(17.8 !~== 17.59 absTol 0.2)
+    assert(17.8 !~= 18.01 absTol 0.2)
+    assert(17.8 !~= 17.59 absTol 0.2)
+    assert(!(17.8 ~= 18.01 absTol 0.2))
+    assert(!(17.8 ~= 17.59 absTol 0.2))
+
+    // Comparisons of numbers very close to zero, and both side of zeros
+    assert(Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+    assert(Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+
+    assert(-Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+    assert(Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+  }
+
+  test("Comparing vectors using relative error.") {
+
+    //Comparisons of two dense vectors
+    assert(Vectors.dense(Array(3.1, 3.5)) ~== Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
+    assert(Vectors.dense(Array(3.1, 3.5)) !~== Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
+    assert(Vectors.dense(Array(3.1, 3.5)) ~= Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
+    assert(Vectors.dense(Array(3.1, 3.5)) !~= Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
+    assert(!(Vectors.dense(Array(3.1, 3.5)) !~= Vectors.dense(Array(3.130, 3.534)) relTol 0.01))
+    assert(!(Vectors.dense(Array(3.1, 3.5)) ~= Vectors.dense(Array(3.135, 3.534)) relTol 0.01))
+
+    // Should throw exception with message when test fails.
+    intercept[TestFailedException](
+      Vectors.dense(Array(3.1, 3.5)) !~== Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
+
+    intercept[TestFailedException](
+      Vectors.dense(Array(3.1, 3.5)) ~== Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
+
+    // Comparing against zero should fail the test and throw exception with message
+    // saying that the relative error is meaningless in this situation.
+    intercept[TestFailedException](
+      Vectors.dense(Array(3.1, 0.01)) ~== Vectors.dense(Array(3.13, 0.0)) relTol 0.01)
+
+    intercept[TestFailedException](
+      Vectors.dense(Array(3.1, 0.01)) ~== Vectors.sparse(2, Array(0), Array(3.13)) relTol 0.01)
+
+    // Comparisons of two sparse vectors
+    assert(Vectors.dense(Array(3.1, 3.5)) ~==
+      Vectors.sparse(2, Array(0, 1), Array(3.130, 3.534)) relTol 0.01)
+
+    assert(Vectors.dense(Array(3.1, 3.5)) !~==
+      Vectors.sparse(2, Array(0, 1), Array(3.135, 3.534)) relTol 0.01)
+  }
+
+  test("Comparing vectors using absolute error.") {
+
+    //Comparisons of two dense vectors
+    assert(Vectors.dense(Array(3.1, 3.5, 0.0)) ~==
+      Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
+
+    assert(Vectors.dense(Array(3.1, 3.5, 0.0)) !~==
+      Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
+
+    assert(Vectors.dense(Array(3.1, 3.5, 0.0)) ~=
+      Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
+
+    assert(Vectors.dense(Array(3.1, 3.5, 0.0)) !~=
+      Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
+
+    assert(!(Vectors.dense(Array(3.1, 3.5, 0.0)) !~=
+      Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6))
+
+    assert(!(Vectors.dense(Array(3.1, 3.5, 0.0)) ~=
+      Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6))
+
+    // Should throw exception with message when test fails.
+    intercept[TestFailedException](Vectors.dense(Array(3.1, 3.5, 0.0)) !~==
+      Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
+
+    intercept[TestFailedException](Vectors.dense(Array(3.1, 3.5, 0.0)) ~==
+      Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
+
+    // Comparisons of two sparse vectors
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) ~==
+      Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-8, 2.4 + 1E-7)) absTol 1E-6)
+
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-8, 2.4 + 1E-7)) ~==
+      Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
+
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) !~==
+      Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-3, 2.4)) absTol 1E-6)
+
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-3, 2.4)) !~==
+      Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
+
+    // Comparisons of a dense vector and a sparse vector
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) ~==
+      Vectors.dense(Array(3.1 + 1E-8, 0, 2.4 + 1E-7)) absTol 1E-6)
+
+    assert(Vectors.dense(Array(3.1 + 1E-8, 0, 2.4 + 1E-7)) ~==
+      Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
+
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) !~==
+      Vectors.dense(Array(3.1, 1E-3, 2.4)) absTol 1E-6)
+  }
+}
diff --git a/network/common/pom.xml b/network/common/pom.xml
new file mode 100644
index 0000000000000..2bd0a7d2945dd
--- /dev/null
+++ b/network/common/pom.xml
@@ -0,0 +1,111 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>1.2.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-network-common_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Networking</name>
+  <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>network-common</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <!-- Core dependencies -->
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-all</artifactId>
+    </dependency>
+
+    <!-- Provided dependencies -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <!-- Create a test-jar so network-shuffle can depend on our test utilities. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <version>2.2</version>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>test-jar-on-test-compile</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/network/common/src/main/java/org/apache/spark/network/TransportContext.java b/network/common/src/main/java/org/apache/spark/network/TransportContext.java
new file mode 100644
index 0000000000000..5bc6e5a2418a9
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.util.List;
+
+import com.google.common.collect.Lists;
+import io.netty.channel.Channel;
+import io.netty.channel.socket.SocketChannel;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientBootstrap;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.client.TransportResponseHandler;
+import org.apache.spark.network.protocol.MessageDecoder;
+import org.apache.spark.network.protocol.MessageEncoder;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.TransportChannelHandler;
+import org.apache.spark.network.server.TransportRequestHandler;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.server.StreamManager;
+import org.apache.spark.network.util.NettyUtils;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Contains the context to create a {@link TransportServer}, {@link TransportClientFactory}, and to
+ * setup Netty Channel pipelines with a {@link org.apache.spark.network.server.TransportChannelHandler}.
+ *
+ * There are two communication protocols that the TransportClient provides, control-plane RPCs and
+ * data-plane "chunk fetching". The handling of the RPCs is performed outside of the scope of the
+ * TransportContext (i.e., by a user-provided handler), and it is responsible for setting up streams
+ * which can be streamed through the data plane in chunks using zero-copy IO.
+ *
+ * The TransportServer and TransportClientFactory both create a TransportChannelHandler for each
+ * channel. As each TransportChannelHandler contains a TransportClient, this enables server
+ * processes to send messages back to the client on an existing channel.
+ */
+public class TransportContext {
+  private final Logger logger = LoggerFactory.getLogger(TransportContext.class);
+
+  private final TransportConf conf;
+  private final RpcHandler rpcHandler;
+
+  private final MessageEncoder encoder;
+  private final MessageDecoder decoder;
+
+  public TransportContext(TransportConf conf, RpcHandler rpcHandler) {
+    this.conf = conf;
+    this.rpcHandler = rpcHandler;
+    this.encoder = new MessageEncoder();
+    this.decoder = new MessageDecoder();
+  }
+
+  /**
+   * Initializes a ClientFactory which runs the given TransportClientBootstraps prior to returning
+   * a new Client. Bootstraps will be executed synchronously, and must run successfully in order
+   * to create a Client.
+   */
+  public TransportClientFactory createClientFactory(List<TransportClientBootstrap> bootstraps) {
+    return new TransportClientFactory(this, bootstraps);
+  }
+
+  public TransportClientFactory createClientFactory() {
+    return createClientFactory(Lists.<TransportClientBootstrap>newArrayList());
+  }
+
+  /** Create a server which will attempt to bind to a specific port. */
+  public TransportServer createServer(int port) {
+    return new TransportServer(this, port);
+  }
+
+  /** Creates a new server, binding to any available ephemeral port. */
+  public TransportServer createServer() {
+    return new TransportServer(this, 0);
+  }
+
+  /**
+   * Initializes a client or server Netty Channel Pipeline which encodes/decodes messages and
+   * has a {@link org.apache.spark.network.server.TransportChannelHandler} to handle request or
+   * response messages.
+   *
+   * @return Returns the created TransportChannelHandler, which includes a TransportClient that can
+   * be used to communicate on this channel. The TransportClient is directly associated with a
+   * ChannelHandler to ensure all users of the same channel get the same TransportClient object.
+   */
+  public TransportChannelHandler initializePipeline(SocketChannel channel) {
+    try {
+      TransportChannelHandler channelHandler = createChannelHandler(channel);
+      channel.pipeline()
+        .addLast("encoder", encoder)
+        .addLast("frameDecoder", NettyUtils.createFrameDecoder())
+        .addLast("decoder", decoder)
+        // NOTE: Chunks are currently guaranteed to be returned in the order of request, but this
+        // would require more logic to guarantee if this were not part of the same event loop.
+        .addLast("handler", channelHandler);
+      return channelHandler;
+    } catch (RuntimeException e) {
+      logger.error("Error while initializing Netty pipeline", e);
+      throw e;
+    }
+  }
+
+  /**
+   * Creates the server- and client-side handler which is used to handle both RequestMessages and
+   * ResponseMessages. The channel is expected to have been successfully created, though certain
+   * properties (such as the remoteAddress()) may not be available yet.
+   */
+  private TransportChannelHandler createChannelHandler(Channel channel) {
+    TransportResponseHandler responseHandler = new TransportResponseHandler(channel);
+    TransportClient client = new TransportClient(channel, responseHandler);
+    TransportRequestHandler requestHandler = new TransportRequestHandler(channel, client,
+      rpcHandler);
+    return new TransportChannelHandler(client, responseHandler, requestHandler);
+  }
+
+  public TransportConf getConf() { return conf; }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
new file mode 100644
index 0000000000000..844eff4f4c701
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.buffer;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+import com.google.common.base.Objects;
+import com.google.common.io.ByteStreams;
+import io.netty.channel.DefaultFileRegion;
+
+import org.apache.spark.network.util.JavaUtils;
+import org.apache.spark.network.util.LimitedInputStream;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * A {@link ManagedBuffer} backed by a segment in a file.
+ */
+public final class FileSegmentManagedBuffer extends ManagedBuffer {
+  private final TransportConf conf;
+  private final File file;
+  private final long offset;
+  private final long length;
+
+  public FileSegmentManagedBuffer(TransportConf conf, File file, long offset, long length) {
+    this.conf = conf;
+    this.file = file;
+    this.offset = offset;
+    this.length = length;
+  }
+
+  @Override
+  public long size() {
+    return length;
+  }
+
+  @Override
+  public ByteBuffer nioByteBuffer() throws IOException {
+    FileChannel channel = null;
+    try {
+      channel = new RandomAccessFile(file, "r").getChannel();
+      // Just copy the buffer if it's sufficiently small, as memory mapping has a high overhead.
+      if (length < conf.memoryMapBytes()) {
+        ByteBuffer buf = ByteBuffer.allocate((int) length);
+        channel.position(offset);
+        while (buf.remaining() != 0) {
+          if (channel.read(buf) == -1) {
+            throw new IOException(String.format("Reached EOF before filling buffer\n" +
+              "offset=%s\nfile=%s\nbuf.remaining=%s",
+              offset, file.getAbsoluteFile(), buf.remaining()));
+          }
+        }
+        buf.flip();
+        return buf;
+      } else {
+        return channel.map(FileChannel.MapMode.READ_ONLY, offset, length);
+      }
+    } catch (IOException e) {
+      try {
+        if (channel != null) {
+          long size = channel.size();
+          throw new IOException("Error in reading " + this + " (actual file length " + size + ")",
+            e);
+        }
+      } catch (IOException ignored) {
+        // ignore
+      }
+      throw new IOException("Error in opening " + this, e);
+    } finally {
+      JavaUtils.closeQuietly(channel);
+    }
+  }
+
+  @Override
+  public InputStream createInputStream() throws IOException {
+    FileInputStream is = null;
+    try {
+      is = new FileInputStream(file);
+      ByteStreams.skipFully(is, offset);
+      return new LimitedInputStream(is, length);
+    } catch (IOException e) {
+      try {
+        if (is != null) {
+          long size = file.length();
+          throw new IOException("Error in reading " + this + " (actual file length " + size + ")",
+              e);
+        }
+      } catch (IOException ignored) {
+        // ignore
+      } finally {
+        JavaUtils.closeQuietly(is);
+      }
+      throw new IOException("Error in opening " + this, e);
+    } catch (RuntimeException e) {
+      JavaUtils.closeQuietly(is);
+      throw e;
+    }
+  }
+
+  @Override
+  public ManagedBuffer retain() {
+    return this;
+  }
+
+  @Override
+  public ManagedBuffer release() {
+    return this;
+  }
+
+  @Override
+  public Object convertToNetty() throws IOException {
+    if (conf.lazyFileDescriptor()) {
+      return new LazyFileRegion(file, offset, length);
+    } else {
+      FileChannel fileChannel = new FileInputStream(file).getChannel();
+      return new DefaultFileRegion(fileChannel, offset, length);
+    }
+  }
+
+  public File getFile() { return file; }
+
+  public long getOffset() { return offset; }
+
+  public long getLength() { return length; }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("file", file)
+      .add("offset", offset)
+      .add("length", length)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LazyFileRegion.java b/network/common/src/main/java/org/apache/spark/network/buffer/LazyFileRegion.java
new file mode 100644
index 0000000000000..81bc8ec40fc82
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LazyFileRegion.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.buffer;
+
+import java.io.FileInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.nio.channels.FileChannel;
+import java.nio.channels.WritableByteChannel;
+
+import com.google.common.base.Objects;
+import io.netty.channel.FileRegion;
+import io.netty.util.AbstractReferenceCounted;
+
+import org.apache.spark.network.util.JavaUtils;
+
+/**
+ * A FileRegion implementation that only creates the file descriptor when the region is being
+ * transferred. This cannot be used with Epoll because there is no native support for it.
+ * 
+ * This is mostly copied from DefaultFileRegion implementation in Netty. In the future, we
+ * should push this into Netty so the native Epoll transport can support this feature.
+ */
+public final class LazyFileRegion extends AbstractReferenceCounted implements FileRegion {
+
+  private final File file;
+  private final long position;
+  private final long count;
+
+  private FileChannel channel;
+
+  private long numBytesTransferred = 0L;
+
+  /**
+   * @param file file to transfer.
+   * @param position start position for the transfer.
+   * @param count number of bytes to transfer starting from position.
+   */
+  public LazyFileRegion(File file, long position, long count) {
+    this.file = file;
+    this.position = position;
+    this.count = count;
+  }
+
+  @Override
+  protected void deallocate() {
+    JavaUtils.closeQuietly(channel);
+  }
+
+  @Override
+  public long position() {
+    return position;
+  }
+
+  @Override
+  public long transfered() {
+    return numBytesTransferred;
+  }
+
+  @Override
+  public long count() {
+    return count;
+  }
+
+  @Override
+  public long transferTo(WritableByteChannel target, long position) throws IOException {
+    if (channel == null) {
+      channel = new FileInputStream(file).getChannel();
+    }
+
+    long count = this.count - position;
+    if (count < 0 || position < 0) {
+      throw new IllegalArgumentException(
+          "position out of range: " + position + " (expected: 0 - " + (count - 1) + ')');
+    }
+
+    if (count == 0) {
+      return 0L;
+    }
+
+    long written = channel.transferTo(this.position + position, count, target);
+    if (written > 0) {
+      numBytesTransferred += written;
+    }
+    return written;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+        .add("file", file)
+        .add("position", position)
+        .add("count", count)
+        .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
new file mode 100644
index 0000000000000..a415db593a788
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+/**
+ * This interface provides an immutable view for data in the form of bytes. The implementation
+ * should specify how the data is provided:
+ *
+ * - {@link FileSegmentManagedBuffer}: data backed by part of a file
+ * - {@link NioManagedBuffer}: data backed by a NIO ByteBuffer
+ * - {@link NettyManagedBuffer}: data backed by a Netty ByteBuf
+ *
+ * The concrete buffer implementation might be managed outside the JVM garbage collector.
+ * For example, in the case of {@link NettyManagedBuffer}, the buffers are reference counted.
+ * In that case, if the buffer is going to be passed around to a different thread, retain/release
+ * should be called.
+ */
+public abstract class ManagedBuffer {
+
+  /** Number of bytes of the data. */
+  public abstract long size();
+
+  /**
+   * Exposes this buffer's data as an NIO ByteBuffer. Changing the position and limit of the
+   * returned ByteBuffer should not affect the content of this buffer.
+   */
+  // TODO: Deprecate this, usage may require expensive memory mapping or allocation.
+  public abstract ByteBuffer nioByteBuffer() throws IOException;
+
+  /**
+   * Exposes this buffer's data as an InputStream. The underlying implementation does not
+   * necessarily check for the length of bytes read, so the caller is responsible for making sure
+   * it does not go over the limit.
+   */
+  public abstract InputStream createInputStream() throws IOException;
+
+  /**
+   * Increment the reference count by one if applicable.
+   */
+  public abstract ManagedBuffer retain();
+
+  /**
+   * If applicable, decrement the reference count by one and deallocates the buffer if the
+   * reference count reaches zero.
+   */
+  public abstract ManagedBuffer release();
+
+  /**
+   * Convert the buffer into an Netty object, used to write the data out.
+   */
+  public abstract Object convertToNetty() throws IOException;
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
new file mode 100644
index 0000000000000..c806bfa45bef3
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.ByteBufInputStream;
+
+/**
+ * A {@link ManagedBuffer} backed by a Netty {@link ByteBuf}.
+ */
+public final class NettyManagedBuffer extends ManagedBuffer {
+  private final ByteBuf buf;
+
+  public NettyManagedBuffer(ByteBuf buf) {
+    this.buf = buf;
+  }
+
+  @Override
+  public long size() {
+    return buf.readableBytes();
+  }
+
+  @Override
+  public ByteBuffer nioByteBuffer() throws IOException {
+    return buf.nioBuffer();
+  }
+
+  @Override
+  public InputStream createInputStream() throws IOException {
+    return new ByteBufInputStream(buf);
+  }
+
+  @Override
+  public ManagedBuffer retain() {
+    buf.retain();
+    return this;
+  }
+
+  @Override
+  public ManagedBuffer release() {
+    buf.release();
+    return this;
+  }
+
+  @Override
+  public Object convertToNetty() throws IOException {
+    return buf.duplicate();
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("buf", buf)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
new file mode 100644
index 0000000000000..f55b884bc45ce
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBufInputStream;
+import io.netty.buffer.Unpooled;
+
+/**
+ * A {@link ManagedBuffer} backed by {@link ByteBuffer}.
+ */
+public final class NioManagedBuffer extends ManagedBuffer {
+  private final ByteBuffer buf;
+
+  public NioManagedBuffer(ByteBuffer buf) {
+    this.buf = buf;
+  }
+
+  @Override
+  public long size() {
+    return buf.remaining();
+  }
+
+  @Override
+  public ByteBuffer nioByteBuffer() throws IOException {
+    return buf.duplicate();
+  }
+
+  @Override
+  public InputStream createInputStream() throws IOException {
+    return new ByteBufInputStream(Unpooled.wrappedBuffer(buf));
+  }
+
+  @Override
+  public ManagedBuffer retain() {
+    return this;
+  }
+
+  @Override
+  public ManagedBuffer release() {
+    return this;
+  }
+
+  @Override
+  public Object convertToNetty() throws IOException {
+    return Unpooled.wrappedBuffer(buf);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("buf", buf)
+      .toString();
+  }
+}
+
diff --git a/network/common/src/main/java/org/apache/spark/network/client/ChunkFetchFailureException.java b/network/common/src/main/java/org/apache/spark/network/client/ChunkFetchFailureException.java
new file mode 100644
index 0000000000000..1fbdcd6780785
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/ChunkFetchFailureException.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+/**
+ * General exception caused by a remote exception while fetching a chunk.
+ */
+public class ChunkFetchFailureException extends RuntimeException {
+  public ChunkFetchFailureException(String errorMsg, Throwable cause) {
+    super(errorMsg, cause);
+  }
+
+  public ChunkFetchFailureException(String errorMsg) {
+    super(errorMsg);
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/ChunkReceivedCallback.java b/network/common/src/main/java/org/apache/spark/network/client/ChunkReceivedCallback.java
new file mode 100644
index 0000000000000..519e6cb470d0d
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/ChunkReceivedCallback.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+
+/**
+ * Callback for the result of a single chunk result. For a single stream, the callbacks are
+ * guaranteed to be called by the same thread in the same order as the requests for chunks were
+ * made.
+ *
+ * Note that if a general stream failure occurs, all outstanding chunk requests may be failed.
+ */
+public interface ChunkReceivedCallback {
+  /**
+   * Called upon receipt of a particular chunk.
+   *
+   * The given buffer will initially have a refcount of 1, but will be release()'d as soon as this
+   * call returns. You must therefore either retain() the buffer or copy its contents before
+   * returning.
+   */
+  void onSuccess(int chunkIndex, ManagedBuffer buffer);
+
+  /**
+   * Called upon failure to fetch a particular chunk. Note that this may actually be called due
+   * to failure to fetch a prior chunk in this stream.
+   *
+   * After receiving a failure, the stream may or may not be valid. The client should not assume
+   * that the server's side of the stream has been closed.
+   */
+  void onFailure(int chunkIndex, Throwable e);
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/RpcResponseCallback.java b/network/common/src/main/java/org/apache/spark/network/client/RpcResponseCallback.java
new file mode 100644
index 0000000000000..6ec960d795420
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/RpcResponseCallback.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+/**
+ * Callback for the result of a single RPC. This will be invoked once with either success or
+ * failure.
+ */
+public interface RpcResponseCallback {
+  /** Successful serialized result from server. */
+  void onSuccess(byte[] response);
+
+  /** Exception either propagated from server or raised on client side. */
+  void onFailure(Throwable e);
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java b/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java
new file mode 100644
index 0000000000000..4e944114e8176
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.UUID;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.util.concurrent.SettableFuture;
+import io.netty.channel.Channel;
+import io.netty.channel.ChannelFuture;
+import io.netty.channel.ChannelFutureListener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.protocol.ChunkFetchRequest;
+import org.apache.spark.network.protocol.RpcRequest;
+import org.apache.spark.network.protocol.StreamChunkId;
+import org.apache.spark.network.util.NettyUtils;
+
+/**
+ * Client for fetching consecutive chunks of a pre-negotiated stream. This API is intended to allow
+ * efficient transfer of a large amount of data, broken up into chunks with size ranging from
+ * hundreds of KB to a few MB.
+ *
+ * Note that while this client deals with the fetching of chunks from a stream (i.e., data plane),
+ * the actual setup of the streams is done outside the scope of the transport layer. The convenience
+ * method "sendRPC" is provided to enable control plane communication between the client and server
+ * to perform this setup.
+ *
+ * For example, a typical workflow might be:
+ * client.sendRPC(new OpenFile("/foo")) --> returns StreamId = 100
+ * client.fetchChunk(streamId = 100, chunkIndex = 0, callback)
+ * client.fetchChunk(streamId = 100, chunkIndex = 1, callback)
+ * ...
+ * client.sendRPC(new CloseStream(100))
+ *
+ * Construct an instance of TransportClient using {@link TransportClientFactory}. A single
+ * TransportClient may be used for multiple streams, but any given stream must be restricted to a
+ * single client, in order to avoid out-of-order responses.
+ *
+ * NB: This class is used to make requests to the server, while {@link TransportResponseHandler} is
+ * responsible for handling responses from the server.
+ *
+ * Concurrency: thread safe and can be called from multiple threads.
+ */
+public class TransportClient implements Closeable {
+  private final Logger logger = LoggerFactory.getLogger(TransportClient.class);
+
+  private final Channel channel;
+  private final TransportResponseHandler handler;
+
+  public TransportClient(Channel channel, TransportResponseHandler handler) {
+    this.channel = Preconditions.checkNotNull(channel);
+    this.handler = Preconditions.checkNotNull(handler);
+  }
+
+  public boolean isActive() {
+    return channel.isOpen() || channel.isActive();
+  }
+
+  /**
+   * Requests a single chunk from the remote side, from the pre-negotiated streamId.
+   *
+   * Chunk indices go from 0 onwards. It is valid to request the same chunk multiple times, though
+   * some streams may not support this.
+   *
+   * Multiple fetchChunk requests may be outstanding simultaneously, and the chunks are guaranteed
+   * to be returned in the same order that they were requested, assuming only a single
+   * TransportClient is used to fetch the chunks.
+   *
+   * @param streamId Identifier that refers to a stream in the remote StreamManager. This should
+   *                 be agreed upon by client and server beforehand.
+   * @param chunkIndex 0-based index of the chunk to fetch
+   * @param callback Callback invoked upon successful receipt of chunk, or upon any failure.
+   */
+  public void fetchChunk(
+      long streamId,
+      final int chunkIndex,
+      final ChunkReceivedCallback callback) {
+    final String serverAddr = NettyUtils.getRemoteAddress(channel);
+    final long startTime = System.currentTimeMillis();
+    logger.debug("Sending fetch chunk request {} to {}", chunkIndex, serverAddr);
+
+    final StreamChunkId streamChunkId = new StreamChunkId(streamId, chunkIndex);
+    handler.addFetchRequest(streamChunkId, callback);
+
+    channel.writeAndFlush(new ChunkFetchRequest(streamChunkId)).addListener(
+      new ChannelFutureListener() {
+        @Override
+        public void operationComplete(ChannelFuture future) throws Exception {
+          if (future.isSuccess()) {
+            long timeTaken = System.currentTimeMillis() - startTime;
+            logger.trace("Sending request {} to {} took {} ms", streamChunkId, serverAddr,
+              timeTaken);
+          } else {
+            String errorMsg = String.format("Failed to send request %s to %s: %s", streamChunkId,
+              serverAddr, future.cause());
+            logger.error(errorMsg, future.cause());
+            handler.removeFetchRequest(streamChunkId);
+            channel.close();
+            try {
+              callback.onFailure(chunkIndex, new IOException(errorMsg, future.cause()));
+            } catch (Exception e) {
+              logger.error("Uncaught exception in RPC response callback handler!", e);
+            }
+          }
+        }
+      });
+  }
+
+  /**
+   * Sends an opaque message to the RpcHandler on the server-side. The callback will be invoked
+   * with the server's response or upon any failure.
+   */
+  public void sendRpc(byte[] message, final RpcResponseCallback callback) {
+    final String serverAddr = NettyUtils.getRemoteAddress(channel);
+    final long startTime = System.currentTimeMillis();
+    logger.trace("Sending RPC to {}", serverAddr);
+
+    final long requestId = Math.abs(UUID.randomUUID().getLeastSignificantBits());
+    handler.addRpcRequest(requestId, callback);
+
+    channel.writeAndFlush(new RpcRequest(requestId, message)).addListener(
+      new ChannelFutureListener() {
+        @Override
+        public void operationComplete(ChannelFuture future) throws Exception {
+          if (future.isSuccess()) {
+            long timeTaken = System.currentTimeMillis() - startTime;
+            logger.trace("Sending request {} to {} took {} ms", requestId, serverAddr, timeTaken);
+          } else {
+            String errorMsg = String.format("Failed to send RPC %s to %s: %s", requestId,
+              serverAddr, future.cause());
+            logger.error(errorMsg, future.cause());
+            handler.removeRpcRequest(requestId);
+            channel.close();
+            try {
+              callback.onFailure(new IOException(errorMsg, future.cause()));
+            } catch (Exception e) {
+              logger.error("Uncaught exception in RPC response callback handler!", e);
+            }
+          }
+        }
+      });
+  }
+
+  /**
+   * Synchronously sends an opaque message to the RpcHandler on the server-side, waiting for up to
+   * a specified timeout for a response.
+   */
+  public byte[] sendRpcSync(byte[] message, long timeoutMs) {
+    final SettableFuture<byte[]> result = SettableFuture.create();
+
+    sendRpc(message, new RpcResponseCallback() {
+      @Override
+      public void onSuccess(byte[] response) {
+        result.set(response);
+      }
+
+      @Override
+      public void onFailure(Throwable e) {
+        result.setException(e);
+      }
+    });
+
+    try {
+      return result.get(timeoutMs, TimeUnit.MILLISECONDS);
+    } catch (ExecutionException e) {
+      throw Throwables.propagate(e.getCause());
+    } catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  @Override
+  public void close() {
+    // close is a local operation and should finish with milliseconds; timeout just to be safe
+    channel.close().awaitUninterruptibly(10, TimeUnit.SECONDS);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("remoteAdress", channel.remoteAddress())
+      .add("isActive", isActive())
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/TransportClientBootstrap.java b/network/common/src/main/java/org/apache/spark/network/client/TransportClientBootstrap.java
new file mode 100644
index 0000000000000..65e8020e34121
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/TransportClientBootstrap.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+/**
+ * A bootstrap which is executed on a TransportClient before it is returned to the user.
+ * This enables an initial exchange of information (e.g., SASL authentication tokens) on a once-per-
+ * connection basis.
+ *
+ * Since connections (and TransportClients) are reused as much as possible, it is generally
+ * reasonable to perform an expensive bootstrapping operation, as they often share a lifespan with
+ * the JVM itself.
+ */
+public interface TransportClientBootstrap {
+  /** Performs the bootstrapping operation, throwing an exception on failure. */
+  public void doBootstrap(TransportClient client) throws RuntimeException;
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/network/common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
new file mode 100644
index 0000000000000..76bce8592816a
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.net.InetSocketAddress;
+import java.net.SocketAddress;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicReference;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+import io.netty.bootstrap.Bootstrap;
+import io.netty.buffer.PooledByteBufAllocator;
+import io.netty.channel.Channel;
+import io.netty.channel.ChannelFuture;
+import io.netty.channel.ChannelInitializer;
+import io.netty.channel.ChannelOption;
+import io.netty.channel.EventLoopGroup;
+import io.netty.channel.socket.SocketChannel;
+import io.netty.util.internal.PlatformDependent;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.server.TransportChannelHandler;
+import org.apache.spark.network.util.IOMode;
+import org.apache.spark.network.util.NettyUtils;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Factory for creating {@link TransportClient}s by using createClient.
+ *
+ * The factory maintains a connection pool to other hosts and should return the same
+ * TransportClient for the same remote host. It also shares a single worker thread pool for
+ * all TransportClients.
+ *
+ * TransportClients will be reused whenever possible. Prior to completing the creation of a new
+ * TransportClient, all given {@link TransportClientBootstrap}s will be run.
+ */
+public class TransportClientFactory implements Closeable {
+  private final Logger logger = LoggerFactory.getLogger(TransportClientFactory.class);
+
+  private final TransportContext context;
+  private final TransportConf conf;
+  private final List<TransportClientBootstrap> clientBootstraps;
+  private final ConcurrentHashMap<SocketAddress, TransportClient> connectionPool;
+
+  private final Class<? extends Channel> socketChannelClass;
+  private EventLoopGroup workerGroup;
+
+  public TransportClientFactory(
+      TransportContext context,
+      List<TransportClientBootstrap> clientBootstraps) {
+    this.context = Preconditions.checkNotNull(context);
+    this.conf = context.getConf();
+    this.clientBootstraps = Lists.newArrayList(Preconditions.checkNotNull(clientBootstraps));
+    this.connectionPool = new ConcurrentHashMap<SocketAddress, TransportClient>();
+
+    IOMode ioMode = IOMode.valueOf(conf.ioMode());
+    this.socketChannelClass = NettyUtils.getClientChannelClass(ioMode);
+    // TODO: Make thread pool name configurable.
+    this.workerGroup = NettyUtils.createEventLoop(ioMode, conf.clientThreads(), "shuffle-client");
+  }
+
+  /**
+   * Create a new {@link TransportClient} connecting to the given remote host / port. This will
+   * reuse TransportClients if they are still active and are for the same remote address. Prior
+   * to the creation of a new TransportClient, we will execute all {@link TransportClientBootstrap}s
+   * that are registered with this factory.
+   *
+   * This blocks until a connection is successfully established and fully bootstrapped.
+   *
+   * Concurrency: This method is safe to call from multiple threads.
+   */
+  public TransportClient createClient(String remoteHost, int remotePort) throws IOException {
+    // Get connection from the connection pool first.
+    // If it is not found or not active, create a new one.
+    final InetSocketAddress address = new InetSocketAddress(remoteHost, remotePort);
+    TransportClient cachedClient = connectionPool.get(address);
+    if (cachedClient != null) {
+      if (cachedClient.isActive()) {
+        logger.trace("Returning cached connection to {}: {}", address, cachedClient);
+        return cachedClient;
+      } else {
+        logger.info("Found inactive connection to {}, closing it.", address);
+        connectionPool.remove(address, cachedClient); // Remove inactive clients.
+      }
+    }
+
+    logger.debug("Creating new connection to " + address);
+
+    Bootstrap bootstrap = new Bootstrap();
+    bootstrap.group(workerGroup)
+      .channel(socketChannelClass)
+       // Disable Nagle's Algorithm since we don't want packets to wait
+      .option(ChannelOption.TCP_NODELAY, true)
+      .option(ChannelOption.SO_KEEPALIVE, true)
+      .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, conf.connectionTimeoutMs());
+
+    // Use pooled buffers to reduce temporary buffer allocation
+    bootstrap.option(ChannelOption.ALLOCATOR, NettyUtils.createPooledByteBufAllocator(
+      conf.preferDirectBufs(), false /* allowCache */, conf.clientThreads()));
+
+    final AtomicReference<TransportClient> clientRef = new AtomicReference<TransportClient>();
+
+    bootstrap.handler(new ChannelInitializer<SocketChannel>() {
+      @Override
+      public void initChannel(SocketChannel ch) {
+        TransportChannelHandler clientHandler = context.initializePipeline(ch);
+        clientRef.set(clientHandler.getClient());
+      }
+    });
+
+    // Connect to the remote server
+    long preConnect = System.currentTimeMillis();
+    ChannelFuture cf = bootstrap.connect(address);
+    if (!cf.awaitUninterruptibly(conf.connectionTimeoutMs())) {
+      throw new IOException(
+        String.format("Connecting to %s timed out (%s ms)", address, conf.connectionTimeoutMs()));
+    } else if (cf.cause() != null) {
+      throw new IOException(String.format("Failed to connect to %s", address), cf.cause());
+    }
+
+    TransportClient client = clientRef.get();
+    assert client != null : "Channel future completed successfully with null client";
+
+    // Execute any client bootstraps synchronously before marking the Client as successful.
+    long preBootstrap = System.currentTimeMillis();
+    logger.debug("Connection to {} successful, running bootstraps...", address);
+    try {
+      for (TransportClientBootstrap clientBootstrap : clientBootstraps) {
+        clientBootstrap.doBootstrap(client);
+      }
+    } catch (Exception e) { // catch non-RuntimeExceptions too as bootstrap may be written in Scala
+      long bootstrapTime = System.currentTimeMillis() - preBootstrap;
+      logger.error("Exception while bootstrapping client after " + bootstrapTime + " ms", e);
+      client.close();
+      throw Throwables.propagate(e);
+    }
+    long postBootstrap = System.currentTimeMillis();
+
+    // Successful connection & bootstrap -- in the event that two threads raced to create a client,
+    // use the first one that was put into the connectionPool and close the one we made here.
+    TransportClient oldClient = connectionPool.putIfAbsent(address, client);
+    if (oldClient == null) {
+      logger.debug("Successfully created connection to {} after {} ms ({} ms spent in bootstraps)",
+        address, postBootstrap - preConnect, postBootstrap - preBootstrap);
+      return client;
+    } else {
+      logger.debug("Two clients were created concurrently after {} ms, second will be disposed.",
+        postBootstrap - preConnect);
+      client.close();
+      return oldClient;
+    }
+  }
+
+  /** Close all connections in the connection pool, and shutdown the worker thread pool. */
+  @Override
+  public void close() {
+    for (TransportClient client : connectionPool.values()) {
+      try {
+        client.close();
+      } catch (RuntimeException e) {
+        logger.warn("Ignoring exception during close", e);
+      }
+    }
+    connectionPool.clear();
+
+    if (workerGroup != null) {
+      workerGroup.shutdownGracefully();
+      workerGroup = null;
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/network/common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
new file mode 100644
index 0000000000000..2044afb0d85db
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import com.google.common.annotations.VisibleForTesting;
+import io.netty.channel.Channel;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.protocol.ChunkFetchFailure;
+import org.apache.spark.network.protocol.ChunkFetchSuccess;
+import org.apache.spark.network.protocol.ResponseMessage;
+import org.apache.spark.network.protocol.RpcFailure;
+import org.apache.spark.network.protocol.RpcResponse;
+import org.apache.spark.network.protocol.StreamChunkId;
+import org.apache.spark.network.server.MessageHandler;
+import org.apache.spark.network.util.NettyUtils;
+
+/**
+ * Handler that processes server responses, in response to requests issued from a
+ * [[TransportClient]]. It works by tracking the list of outstanding requests (and their callbacks).
+ *
+ * Concurrency: thread safe and can be called from multiple threads.
+ */
+public class TransportResponseHandler extends MessageHandler<ResponseMessage> {
+  private final Logger logger = LoggerFactory.getLogger(TransportResponseHandler.class);
+
+  private final Channel channel;
+
+  private final Map<StreamChunkId, ChunkReceivedCallback> outstandingFetches;
+
+  private final Map<Long, RpcResponseCallback> outstandingRpcs;
+
+  public TransportResponseHandler(Channel channel) {
+    this.channel = channel;
+    this.outstandingFetches = new ConcurrentHashMap<StreamChunkId, ChunkReceivedCallback>();
+    this.outstandingRpcs = new ConcurrentHashMap<Long, RpcResponseCallback>();
+  }
+
+  public void addFetchRequest(StreamChunkId streamChunkId, ChunkReceivedCallback callback) {
+    outstandingFetches.put(streamChunkId, callback);
+  }
+
+  public void removeFetchRequest(StreamChunkId streamChunkId) {
+    outstandingFetches.remove(streamChunkId);
+  }
+
+  public void addRpcRequest(long requestId, RpcResponseCallback callback) {
+    outstandingRpcs.put(requestId, callback);
+  }
+
+  public void removeRpcRequest(long requestId) {
+    outstandingRpcs.remove(requestId);
+  }
+
+  /**
+   * Fire the failure callback for all outstanding requests. This is called when we have an
+   * uncaught exception or pre-mature connection termination.
+   */
+  private void failOutstandingRequests(Throwable cause) {
+    for (Map.Entry<StreamChunkId, ChunkReceivedCallback> entry : outstandingFetches.entrySet()) {
+      entry.getValue().onFailure(entry.getKey().chunkIndex, cause);
+    }
+    for (Map.Entry<Long, RpcResponseCallback> entry : outstandingRpcs.entrySet()) {
+      entry.getValue().onFailure(cause);
+    }
+
+    // It's OK if new fetches appear, as they will fail immediately.
+    outstandingFetches.clear();
+    outstandingRpcs.clear();
+  }
+
+  @Override
+  public void channelUnregistered() {
+    if (numOutstandingRequests() > 0) {
+      String remoteAddress = NettyUtils.getRemoteAddress(channel);
+      logger.error("Still have {} requests outstanding when connection from {} is closed",
+        numOutstandingRequests(), remoteAddress);
+      failOutstandingRequests(new IOException("Connection from " + remoteAddress + " closed"));
+    }
+  }
+
+  @Override
+  public void exceptionCaught(Throwable cause) {
+    if (numOutstandingRequests() > 0) {
+      String remoteAddress = NettyUtils.getRemoteAddress(channel);
+      logger.error("Still have {} requests outstanding when connection from {} is closed",
+        numOutstandingRequests(), remoteAddress);
+      failOutstandingRequests(cause);
+    }
+  }
+
+  @Override
+  public void handle(ResponseMessage message) {
+    String remoteAddress = NettyUtils.getRemoteAddress(channel);
+    if (message instanceof ChunkFetchSuccess) {
+      ChunkFetchSuccess resp = (ChunkFetchSuccess) message;
+      ChunkReceivedCallback listener = outstandingFetches.get(resp.streamChunkId);
+      if (listener == null) {
+        logger.warn("Ignoring response for block {} from {} since it is not outstanding",
+          resp.streamChunkId, remoteAddress);
+        resp.buffer.release();
+      } else {
+        outstandingFetches.remove(resp.streamChunkId);
+        listener.onSuccess(resp.streamChunkId.chunkIndex, resp.buffer);
+        resp.buffer.release();
+      }
+    } else if (message instanceof ChunkFetchFailure) {
+      ChunkFetchFailure resp = (ChunkFetchFailure) message;
+      ChunkReceivedCallback listener = outstandingFetches.get(resp.streamChunkId);
+      if (listener == null) {
+        logger.warn("Ignoring response for block {} from {} ({}) since it is not outstanding",
+          resp.streamChunkId, remoteAddress, resp.errorString);
+      } else {
+        outstandingFetches.remove(resp.streamChunkId);
+        listener.onFailure(resp.streamChunkId.chunkIndex, new ChunkFetchFailureException(
+          "Failure while fetching " + resp.streamChunkId + ": " + resp.errorString));
+      }
+    } else if (message instanceof RpcResponse) {
+      RpcResponse resp = (RpcResponse) message;
+      RpcResponseCallback listener = outstandingRpcs.get(resp.requestId);
+      if (listener == null) {
+        logger.warn("Ignoring response for RPC {} from {} ({} bytes) since it is not outstanding",
+          resp.requestId, remoteAddress, resp.response.length);
+      } else {
+        outstandingRpcs.remove(resp.requestId);
+        listener.onSuccess(resp.response);
+      }
+    } else if (message instanceof RpcFailure) {
+      RpcFailure resp = (RpcFailure) message;
+      RpcResponseCallback listener = outstandingRpcs.get(resp.requestId);
+      if (listener == null) {
+        logger.warn("Ignoring response for RPC {} from {} ({}) since it is not outstanding",
+          resp.requestId, remoteAddress, resp.errorString);
+      } else {
+        outstandingRpcs.remove(resp.requestId);
+        listener.onFailure(new RuntimeException(resp.errorString));
+      }
+    } else {
+      throw new IllegalStateException("Unknown response type: " + message.type());
+    }
+  }
+
+  /** Returns total number of outstanding requests (fetch requests + rpcs) */
+  @VisibleForTesting
+  public int numOutstandingRequests() {
+    return outstandingFetches.size() + outstandingRpcs.size();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
new file mode 100644
index 0000000000000..986957c1509fd
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/**
+ * Response to {@link ChunkFetchRequest} when there is an error fetching the chunk.
+ */
+public final class ChunkFetchFailure implements ResponseMessage {
+  public final StreamChunkId streamChunkId;
+  public final String errorString;
+
+  public ChunkFetchFailure(StreamChunkId streamChunkId, String errorString) {
+    this.streamChunkId = streamChunkId;
+    this.errorString = errorString;
+  }
+
+  @Override
+  public Type type() { return Type.ChunkFetchFailure; }
+
+  @Override
+  public int encodedLength() {
+    return streamChunkId.encodedLength() + Encoders.Strings.encodedLength(errorString);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    streamChunkId.encode(buf);
+    Encoders.Strings.encode(buf, errorString);
+  }
+
+  public static ChunkFetchFailure decode(ByteBuf buf) {
+    StreamChunkId streamChunkId = StreamChunkId.decode(buf);
+    String errorString = Encoders.Strings.decode(buf);
+    return new ChunkFetchFailure(streamChunkId, errorString);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof ChunkFetchFailure) {
+      ChunkFetchFailure o = (ChunkFetchFailure) other;
+      return streamChunkId.equals(o.streamChunkId) && errorString.equals(o.errorString);
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("streamChunkId", streamChunkId)
+      .add("errorString", errorString)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
new file mode 100644
index 0000000000000..980947cf13f6b
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/**
+ * Request to fetch a sequence of a single chunk of a stream. This will correspond to a single
+ * {@link org.apache.spark.network.protocol.ResponseMessage} (either success or failure).
+ */
+public final class ChunkFetchRequest implements RequestMessage {
+  public final StreamChunkId streamChunkId;
+
+  public ChunkFetchRequest(StreamChunkId streamChunkId) {
+    this.streamChunkId = streamChunkId;
+  }
+
+  @Override
+  public Type type() { return Type.ChunkFetchRequest; }
+
+  @Override
+  public int encodedLength() {
+    return streamChunkId.encodedLength();
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    streamChunkId.encode(buf);
+  }
+
+  public static ChunkFetchRequest decode(ByteBuf buf) {
+    return new ChunkFetchRequest(StreamChunkId.decode(buf));
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof ChunkFetchRequest) {
+      ChunkFetchRequest o = (ChunkFetchRequest) other;
+      return streamChunkId.equals(o.streamChunkId);
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("streamChunkId", streamChunkId)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
new file mode 100644
index 0000000000000..ff4936470c697
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NettyManagedBuffer;
+
+/**
+ * Response to {@link ChunkFetchRequest} when a chunk exists and has been successfully fetched.
+ *
+ * Note that the server-side encoding of this messages does NOT include the buffer itself, as this
+ * may be written by Netty in a more efficient manner (i.e., zero-copy write).
+ * Similarly, the client-side decoding will reuse the Netty ByteBuf as the buffer.
+ */
+public final class ChunkFetchSuccess implements ResponseMessage {
+  public final StreamChunkId streamChunkId;
+  public final ManagedBuffer buffer;
+
+  public ChunkFetchSuccess(StreamChunkId streamChunkId, ManagedBuffer buffer) {
+    this.streamChunkId = streamChunkId;
+    this.buffer = buffer;
+  }
+
+  @Override
+  public Type type() { return Type.ChunkFetchSuccess; }
+
+  @Override
+  public int encodedLength() {
+    return streamChunkId.encodedLength();
+  }
+
+  /** Encoding does NOT include 'buffer' itself. See {@link MessageEncoder}. */
+  @Override
+  public void encode(ByteBuf buf) {
+    streamChunkId.encode(buf);
+  }
+
+  /** Decoding uses the given ByteBuf as our data, and will retain() it. */
+  public static ChunkFetchSuccess decode(ByteBuf buf) {
+    StreamChunkId streamChunkId = StreamChunkId.decode(buf);
+    buf.retain();
+    NettyManagedBuffer managedBuf = new NettyManagedBuffer(buf.duplicate());
+    return new ChunkFetchSuccess(streamChunkId, managedBuf);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof ChunkFetchSuccess) {
+      ChunkFetchSuccess o = (ChunkFetchSuccess) other;
+      return streamChunkId.equals(o.streamChunkId) && buffer.equals(o.buffer);
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("streamChunkId", streamChunkId)
+      .add("buffer", buffer)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/Encodable.java b/network/common/src/main/java/org/apache/spark/network/protocol/Encodable.java
new file mode 100644
index 0000000000000..b4e299471b41a
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/Encodable.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import io.netty.buffer.ByteBuf;
+
+/**
+ * Interface for an object which can be encoded into a ByteBuf. Multiple Encodable objects are
+ * stored in a single, pre-allocated ByteBuf, so Encodables must also provide their length.
+ *
+ * Encodable objects should provide a static "decode(ByteBuf)" method which is invoked by
+ * {@link MessageDecoder}. During decoding, if the object uses the ByteBuf as its data (rather than
+ * just copying data from it), then you must retain() the ByteBuf.
+ *
+ * Additionally, when adding a new Encodable Message, add it to {@link Message.Type}.
+ */
+public interface Encodable {
+  /** Number of bytes of the encoded form of this object. */
+  int encodedLength();
+
+  /**
+   * Serializes this object by writing into the given ByteBuf.
+   * This method must write exactly encodedLength() bytes.
+   */
+  void encode(ByteBuf buf);
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/Encoders.java b/network/common/src/main/java/org/apache/spark/network/protocol/Encoders.java
new file mode 100644
index 0000000000000..873c694250942
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/Encoders.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+
+import com.google.common.base.Charsets;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+
+/** Provides a canonical set of Encoders for simple types. */
+public class Encoders {
+
+  /** Strings are encoded with their length followed by UTF-8 bytes. */
+  public static class Strings {
+    public static int encodedLength(String s) {
+      return 4 + s.getBytes(Charsets.UTF_8).length;
+    }
+
+    public static void encode(ByteBuf buf, String s) {
+      byte[] bytes = s.getBytes(Charsets.UTF_8);
+      buf.writeInt(bytes.length);
+      buf.writeBytes(bytes);
+    }
+
+    public static String decode(ByteBuf buf) {
+      int length = buf.readInt();
+      byte[] bytes = new byte[length];
+      buf.readBytes(bytes);
+      return new String(bytes, Charsets.UTF_8);
+    }
+  }
+
+  /** Byte arrays are encoded with their length followed by bytes. */
+  public static class ByteArrays {
+    public static int encodedLength(byte[] arr) {
+      return 4 + arr.length;
+    }
+
+    public static void encode(ByteBuf buf, byte[] arr) {
+      buf.writeInt(arr.length);
+      buf.writeBytes(arr);
+    }
+
+    public static byte[] decode(ByteBuf buf) {
+      int length = buf.readInt();
+      byte[] bytes = new byte[length];
+      buf.readBytes(bytes);
+      return bytes;
+    }
+  }
+
+  /** String arrays are encoded with the number of strings followed by per-String encoding. */
+  public static class StringArrays {
+    public static int encodedLength(String[] strings) {
+      int totalLength = 4;
+      for (String s : strings) {
+        totalLength += Strings.encodedLength(s);
+      }
+      return totalLength;
+    }
+
+    public static void encode(ByteBuf buf, String[] strings) {
+      buf.writeInt(strings.length);
+      for (String s : strings) {
+        Strings.encode(buf, s);
+      }
+    }
+
+    public static String[] decode(ByteBuf buf) {
+      int numStrings = buf.readInt();
+      String[] strings = new String[numStrings];
+      for (int i = 0; i < strings.length; i ++) {
+        strings[i] = Strings.decode(buf);
+      }
+      return strings;
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/Message.java b/network/common/src/main/java/org/apache/spark/network/protocol/Message.java
new file mode 100644
index 0000000000000..d568370125fd4
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/Message.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import io.netty.buffer.ByteBuf;
+
+/** An on-the-wire transmittable message. */
+public interface Message extends Encodable {
+  /** Used to identify this request type. */
+  Type type();
+
+  /** Preceding every serialized Message is its type, which allows us to deserialize it. */
+  public static enum Type implements Encodable {
+    ChunkFetchRequest(0), ChunkFetchSuccess(1), ChunkFetchFailure(2),
+    RpcRequest(3), RpcResponse(4), RpcFailure(5);
+
+    private final byte id;
+
+    private Type(int id) {
+      assert id < 128 : "Cannot have more than 128 message types";
+      this.id = (byte) id;
+    }
+
+    public byte id() { return id; }
+
+    @Override public int encodedLength() { return 1; }
+
+    @Override public void encode(ByteBuf buf) { buf.writeByte(id); }
+
+    public static Type decode(ByteBuf buf) {
+      byte id = buf.readByte();
+      switch (id) {
+        case 0: return ChunkFetchRequest;
+        case 1: return ChunkFetchSuccess;
+        case 2: return ChunkFetchFailure;
+        case 3: return RpcRequest;
+        case 4: return RpcResponse;
+        case 5: return RpcFailure;
+        default: throw new IllegalArgumentException("Unknown message type: " + id);
+      }
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java b/network/common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java
new file mode 100644
index 0000000000000..81f8d7f96350f
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import java.util.List;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.channel.ChannelHandler;
+import io.netty.channel.ChannelHandlerContext;
+import io.netty.handler.codec.MessageToMessageDecoder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Decoder used by the client side to encode server-to-client responses.
+ * This encoder is stateless so it is safe to be shared by multiple threads.
+ */
+@ChannelHandler.Sharable
+public final class MessageDecoder extends MessageToMessageDecoder<ByteBuf> {
+
+  private final Logger logger = LoggerFactory.getLogger(MessageDecoder.class);
+  @Override
+  public void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) {
+    Message.Type msgType = Message.Type.decode(in);
+    Message decoded = decode(msgType, in);
+    assert decoded.type() == msgType;
+    logger.trace("Received message " + msgType + ": " + decoded);
+    out.add(decoded);
+  }
+
+  private Message decode(Message.Type msgType, ByteBuf in) {
+    switch (msgType) {
+      case ChunkFetchRequest:
+        return ChunkFetchRequest.decode(in);
+
+      case ChunkFetchSuccess:
+        return ChunkFetchSuccess.decode(in);
+
+      case ChunkFetchFailure:
+        return ChunkFetchFailure.decode(in);
+
+      case RpcRequest:
+        return RpcRequest.decode(in);
+
+      case RpcResponse:
+        return RpcResponse.decode(in);
+
+      case RpcFailure:
+        return RpcFailure.decode(in);
+
+      default:
+        throw new IllegalArgumentException("Unexpected message type: " + msgType);
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java b/network/common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
new file mode 100644
index 0000000000000..91d1e8a538a77
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import java.util.List;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.channel.ChannelHandler;
+import io.netty.channel.ChannelHandlerContext;
+import io.netty.handler.codec.MessageToMessageEncoder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Encoder used by the server side to encode server-to-client responses.
+ * This encoder is stateless so it is safe to be shared by multiple threads.
+ */
+@ChannelHandler.Sharable
+public final class MessageEncoder extends MessageToMessageEncoder<Message> {
+
+  private final Logger logger = LoggerFactory.getLogger(MessageEncoder.class);
+
+  /***
+   * Encodes a Message by invoking its encode() method. For non-data messages, we will add one
+   * ByteBuf to 'out' containing the total frame length, the message type, and the message itself.
+   * In the case of a ChunkFetchSuccess, we will also add the ManagedBuffer corresponding to the
+   * data to 'out', in order to enable zero-copy transfer.
+   */
+  @Override
+  public void encode(ChannelHandlerContext ctx, Message in, List<Object> out) {
+    Object body = null;
+    long bodyLength = 0;
+
+    // Only ChunkFetchSuccesses have data besides the header.
+    // The body is used in order to enable zero-copy transfer for the payload.
+    if (in instanceof ChunkFetchSuccess) {
+      ChunkFetchSuccess resp = (ChunkFetchSuccess) in;
+      try {
+        bodyLength = resp.buffer.size();
+        body = resp.buffer.convertToNetty();
+      } catch (Exception e) {
+        // Re-encode this message as BlockFetchFailure.
+        logger.error(String.format("Error opening block %s for client %s",
+          resp.streamChunkId, ctx.channel().remoteAddress()), e);
+        encode(ctx, new ChunkFetchFailure(resp.streamChunkId, e.getMessage()), out);
+        return;
+      }
+    }
+
+    Message.Type msgType = in.type();
+    // All messages have the frame length, message type, and message itself.
+    int headerLength = 8 + msgType.encodedLength() + in.encodedLength();
+    long frameLength = headerLength + bodyLength;
+    ByteBuf header = ctx.alloc().heapBuffer(headerLength);
+    header.writeLong(frameLength);
+    msgType.encode(header);
+    in.encode(header);
+    assert header.writableBytes() == 0;
+
+    out.add(header);
+    if (body != null && bodyLength > 0) {
+      out.add(body);
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/RequestMessage.java b/network/common/src/main/java/org/apache/spark/network/protocol/RequestMessage.java
new file mode 100644
index 0000000000000..31b15bb17a327
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/RequestMessage.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import org.apache.spark.network.protocol.Message;
+
+/** Messages from the client to the server. */
+public interface RequestMessage extends Message {
+  // token interface
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/ResponseMessage.java b/network/common/src/main/java/org/apache/spark/network/protocol/ResponseMessage.java
new file mode 100644
index 0000000000000..6edffd11cf1e2
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/ResponseMessage.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import org.apache.spark.network.protocol.Message;
+
+/** Messages from the server to the client. */
+public interface ResponseMessage extends Message {
+  // token interface
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java b/network/common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
new file mode 100644
index 0000000000000..ebd764eb5eb5f
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/** Response to {@link RpcRequest} for a failed RPC. */
+public final class RpcFailure implements ResponseMessage {
+  public final long requestId;
+  public final String errorString;
+
+  public RpcFailure(long requestId, String errorString) {
+    this.requestId = requestId;
+    this.errorString = errorString;
+  }
+
+  @Override
+  public Type type() { return Type.RpcFailure; }
+
+  @Override
+  public int encodedLength() {
+    return 8 + Encoders.Strings.encodedLength(errorString);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    buf.writeLong(requestId);
+    Encoders.Strings.encode(buf, errorString);
+  }
+
+  public static RpcFailure decode(ByteBuf buf) {
+    long requestId = buf.readLong();
+    String errorString = Encoders.Strings.decode(buf);
+    return new RpcFailure(requestId, errorString);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof RpcFailure) {
+      RpcFailure o = (RpcFailure) other;
+      return requestId == o.requestId && errorString.equals(o.errorString);
+    }
+    return false;
+  }
+
+  @Override
+   public String toString() {
+    return Objects.toStringHelper(this)
+      .add("requestId", requestId)
+      .add("errorString", errorString)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java b/network/common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
new file mode 100644
index 0000000000000..cdee0b0e0316b
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import java.util.Arrays;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/**
+ * A generic RPC which is handled by a remote {@link org.apache.spark.network.server.RpcHandler}.
+ * This will correspond to a single
+ * {@link org.apache.spark.network.protocol.ResponseMessage} (either success or failure).
+ */
+public final class RpcRequest implements RequestMessage {
+  /** Used to link an RPC request with its response. */
+  public final long requestId;
+
+  /** Serialized message to send to remote RpcHandler. */
+  public final byte[] message;
+
+  public RpcRequest(long requestId, byte[] message) {
+    this.requestId = requestId;
+    this.message = message;
+  }
+
+  @Override
+  public Type type() { return Type.RpcRequest; }
+
+  @Override
+  public int encodedLength() {
+    return 8 + Encoders.ByteArrays.encodedLength(message);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    buf.writeLong(requestId);
+    Encoders.ByteArrays.encode(buf, message);
+  }
+
+  public static RpcRequest decode(ByteBuf buf) {
+    long requestId = buf.readLong();
+    byte[] message = Encoders.ByteArrays.decode(buf);
+    return new RpcRequest(requestId, message);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof RpcRequest) {
+      RpcRequest o = (RpcRequest) other;
+      return requestId == o.requestId && Arrays.equals(message, o.message);
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("requestId", requestId)
+      .add("message", message)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java b/network/common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
new file mode 100644
index 0000000000000..0a62e09a8115c
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import java.util.Arrays;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/** Response to {@link RpcRequest} for a successful RPC. */
+public final class RpcResponse implements ResponseMessage {
+  public final long requestId;
+  public final byte[] response;
+
+  public RpcResponse(long requestId, byte[] response) {
+    this.requestId = requestId;
+    this.response = response;
+  }
+
+  @Override
+  public Type type() { return Type.RpcResponse; }
+
+  @Override
+  public int encodedLength() { return 8 + Encoders.ByteArrays.encodedLength(response); }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    buf.writeLong(requestId);
+    Encoders.ByteArrays.encode(buf, response);
+  }
+
+  public static RpcResponse decode(ByteBuf buf) {
+    long requestId = buf.readLong();
+    byte[] response = Encoders.ByteArrays.decode(buf);
+    return new RpcResponse(requestId, response);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof RpcResponse) {
+      RpcResponse o = (RpcResponse) other;
+      return requestId == o.requestId && Arrays.equals(response, o.response);
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("requestId", requestId)
+      .add("response", response)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java b/network/common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
new file mode 100644
index 0000000000000..d46a263884807
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/**
+* Encapsulates a request for a particular chunk of a stream.
+*/
+public final class StreamChunkId implements Encodable {
+  public final long streamId;
+  public final int chunkIndex;
+
+  public StreamChunkId(long streamId, int chunkIndex) {
+    this.streamId = streamId;
+    this.chunkIndex = chunkIndex;
+  }
+
+  @Override
+  public int encodedLength() {
+    return 8 + 4;
+  }
+
+  public void encode(ByteBuf buffer) {
+    buffer.writeLong(streamId);
+    buffer.writeInt(chunkIndex);
+  }
+
+  public static StreamChunkId decode(ByteBuf buffer) {
+    assert buffer.readableBytes() >= 8 + 4;
+    long streamId = buffer.readLong();
+    int chunkIndex = buffer.readInt();
+    return new StreamChunkId(streamId, chunkIndex);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(streamId, chunkIndex);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof StreamChunkId) {
+      StreamChunkId o = (StreamChunkId) other;
+      return streamId == o.streamId && chunkIndex == o.chunkIndex;
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("streamId", streamId)
+      .add("chunkIndex", chunkIndex)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/MessageHandler.java b/network/common/src/main/java/org/apache/spark/network/server/MessageHandler.java
new file mode 100644
index 0000000000000..b80c15106ecbd
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/MessageHandler.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import org.apache.spark.network.protocol.Message;
+
+/**
+ * Handles either request or response messages coming off of Netty. A MessageHandler instance
+ * is associated with a single Netty Channel (though it may have multiple clients on the same
+ * Channel.)
+ */
+public abstract class MessageHandler<T extends Message> {
+  /** Handles the receipt of a single message. */
+  public abstract void handle(T message);
+
+  /** Invoked when an exception was caught on the Channel. */
+  public abstract void exceptionCaught(Throwable cause);
+
+  /** Invoked when the channel this MessageHandler is on has been unregistered. */
+  public abstract void channelUnregistered();
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/NoOpRpcHandler.java b/network/common/src/main/java/org/apache/spark/network/server/NoOpRpcHandler.java
new file mode 100644
index 0000000000000..1502b7489e864
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/NoOpRpcHandler.java
@@ -0,0 +1,38 @@
+package org.apache.spark.network.server;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+
+/** An RpcHandler suitable for a client-only TransportContext, which cannot receive RPCs. */
+public class NoOpRpcHandler extends RpcHandler {
+  private final StreamManager streamManager;
+
+  public NoOpRpcHandler() {
+    streamManager = new OneForOneStreamManager();
+  }
+
+  @Override
+  public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+    throw new UnsupportedOperationException("Cannot handle messages");
+  }
+
+  @Override
+  public StreamManager getStreamManager() { return streamManager; }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/network/common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
new file mode 100644
index 0000000000000..731d48d4d9c6c
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+
+/**
+ * StreamManager which allows registration of an Iterator<ManagedBuffer>, which are individually
+ * fetched as chunks by the client. Each registered buffer is one chunk.
+ */
+public class OneForOneStreamManager extends StreamManager {
+  private final Logger logger = LoggerFactory.getLogger(OneForOneStreamManager.class);
+
+  private final AtomicLong nextStreamId;
+  private final Map<Long, StreamState> streams;
+
+  /** State of a single stream. */
+  private static class StreamState {
+    final Iterator<ManagedBuffer> buffers;
+
+    // Used to keep track of the index of the buffer that the user has retrieved, just to ensure
+    // that the caller only requests each chunk one at a time, in order.
+    int curChunk = 0;
+
+    StreamState(Iterator<ManagedBuffer> buffers) {
+      this.buffers = buffers;
+    }
+  }
+
+  public OneForOneStreamManager() {
+    // For debugging purposes, start with a random stream id to help identifying different streams.
+    // This does not need to be globally unique, only unique to this class.
+    nextStreamId = new AtomicLong((long) new Random().nextInt(Integer.MAX_VALUE) * 1000);
+    streams = new ConcurrentHashMap<Long, StreamState>();
+  }
+
+  @Override
+  public ManagedBuffer getChunk(long streamId, int chunkIndex) {
+    StreamState state = streams.get(streamId);
+    if (chunkIndex != state.curChunk) {
+      throw new IllegalStateException(String.format(
+        "Received out-of-order chunk index %s (expected %s)", chunkIndex, state.curChunk));
+    } else if (!state.buffers.hasNext()) {
+      throw new IllegalStateException(String.format(
+        "Requested chunk index beyond end %s", chunkIndex));
+    }
+    state.curChunk += 1;
+    ManagedBuffer nextChunk = state.buffers.next();
+
+    if (!state.buffers.hasNext()) {
+      logger.trace("Removing stream id {}", streamId);
+      streams.remove(streamId);
+    }
+
+    return nextChunk;
+  }
+
+  @Override
+  public void connectionTerminated(long streamId) {
+    // Release all remaining buffers.
+    StreamState state = streams.remove(streamId);
+    if (state != null && state.buffers != null) {
+      while (state.buffers.hasNext()) {
+        state.buffers.next().release();
+      }
+    }
+  }
+
+  /**
+   * Registers a stream of ManagedBuffers which are served as individual chunks one at a time to
+   * callers. Each ManagedBuffer will be release()'d after it is transferred on the wire. If a
+   * client connection is closed before the iterator is fully drained, then the remaining buffers
+   * will all be release()'d.
+   */
+  public long registerStream(Iterator<ManagedBuffer> buffers) {
+    long myStreamId = nextStreamId.getAndIncrement();
+    streams.put(myStreamId, new StreamState(buffers));
+    return myStreamId;
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
new file mode 100644
index 0000000000000..2ba92a40f8b0a
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+
+/**
+ * Handler for sendRPC() messages sent by {@link org.apache.spark.network.client.TransportClient}s.
+ */
+public abstract class RpcHandler {
+  /**
+   * Receive a single RPC message. Any exception thrown while in this method will be sent back to
+   * the client in string form as a standard RPC failure.
+   *
+   * This method will not be called in parallel for a single TransportClient (i.e., channel).
+   *
+   * @param client A channel client which enables the handler to make requests back to the sender
+   *               of this RPC. This will always be the exact same object for a particular channel.
+   * @param message The serialized bytes of the RPC.
+   * @param callback Callback which should be invoked exactly once upon success or failure of the
+   *                 RPC.
+   */
+  public abstract void receive(
+      TransportClient client,
+      byte[] message,
+      RpcResponseCallback callback);
+
+  /**
+   * Returns the StreamManager which contains the state about which streams are currently being
+   * fetched by a TransportClient.
+   */
+  public abstract StreamManager getStreamManager();
+
+  /**
+   * Invoked when the connection associated with the given client has been invalidated.
+   * No further requests will come from this client.
+   */
+  public void connectionTerminated(TransportClient client) { }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/StreamManager.java b/network/common/src/main/java/org/apache/spark/network/server/StreamManager.java
new file mode 100644
index 0000000000000..5a9a14a180c10
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/StreamManager.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+
+/**
+ * The StreamManager is used to fetch individual chunks from a stream. This is used in
+ * {@link TransportRequestHandler} in order to respond to fetchChunk() requests. Creation of the
+ * stream is outside the scope of the transport layer, but a given stream is guaranteed to be read
+ * by only one client connection, meaning that getChunk() for a particular stream will be called
+ * serially and that once the connection associated with the stream is closed, that stream will
+ * never be used again.
+ */
+public abstract class StreamManager {
+  /**
+   * Called in response to a fetchChunk() request. The returned buffer will be passed as-is to the
+   * client. A single stream will be associated with a single TCP connection, so this method
+   * will not be called in parallel for a particular stream.
+   *
+   * Chunks may be requested in any order, and requests may be repeated, but it is not required
+   * that implementations support this behavior.
+   *
+   * The returned ManagedBuffer will be release()'d after being written to the network.
+   *
+   * @param streamId id of a stream that has been previously registered with the StreamManager.
+   * @param chunkIndex 0-indexed chunk of the stream that's requested
+   */
+  public abstract ManagedBuffer getChunk(long streamId, int chunkIndex);
+
+  /**
+   * Indicates that the TCP connection that was tied to the given stream has been terminated. After
+   * this occurs, we are guaranteed not to read from the stream again, so any state can be cleaned
+   * up.
+   */
+  public void connectionTerminated(long streamId) { }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java b/network/common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
new file mode 100644
index 0000000000000..e491367fa4528
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import io.netty.channel.ChannelHandlerContext;
+import io.netty.channel.SimpleChannelInboundHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportResponseHandler;
+import org.apache.spark.network.protocol.Message;
+import org.apache.spark.network.protocol.RequestMessage;
+import org.apache.spark.network.protocol.ResponseMessage;
+import org.apache.spark.network.util.NettyUtils;
+
+/**
+ * The single Transport-level Channel handler which is used for delegating requests to the
+ * {@link TransportRequestHandler} and responses to the {@link TransportResponseHandler}.
+ *
+ * All channels created in the transport layer are bidirectional. When the Client initiates a Netty
+ * Channel with a RequestMessage (which gets handled by the Server's RequestHandler), the Server
+ * will produce a ResponseMessage (handled by the Client's ResponseHandler). However, the Server
+ * also gets a handle on the same Channel, so it may then begin to send RequestMessages to the
+ * Client.
+ * This means that the Client also needs a RequestHandler and the Server needs a ResponseHandler,
+ * for the Client's responses to the Server's requests.
+ */
+public class TransportChannelHandler extends SimpleChannelInboundHandler<Message> {
+  private final Logger logger = LoggerFactory.getLogger(TransportChannelHandler.class);
+
+  private final TransportClient client;
+  private final TransportResponseHandler responseHandler;
+  private final TransportRequestHandler requestHandler;
+
+  public TransportChannelHandler(
+      TransportClient client,
+      TransportResponseHandler responseHandler,
+      TransportRequestHandler requestHandler) {
+    this.client = client;
+    this.responseHandler = responseHandler;
+    this.requestHandler = requestHandler;
+  }
+
+  public TransportClient getClient() {
+    return client;
+  }
+
+  @Override
+  public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
+    logger.warn("Exception in connection from " + NettyUtils.getRemoteAddress(ctx.channel()),
+      cause);
+    requestHandler.exceptionCaught(cause);
+    responseHandler.exceptionCaught(cause);
+    ctx.close();
+  }
+
+  @Override
+  public void channelUnregistered(ChannelHandlerContext ctx) throws Exception {
+    try {
+      requestHandler.channelUnregistered();
+    } catch (RuntimeException e) {
+      logger.error("Exception from request handler while unregistering channel", e);
+    }
+    try {
+      responseHandler.channelUnregistered();
+    } catch (RuntimeException e) {
+      logger.error("Exception from response handler while unregistering channel", e);
+    }
+    super.channelUnregistered(ctx);
+  }
+
+  @Override
+  public void channelRead0(ChannelHandlerContext ctx, Message request) {
+    if (request instanceof RequestMessage) {
+      requestHandler.handle((RequestMessage) request);
+    } else {
+      responseHandler.handle((ResponseMessage) request);
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java b/network/common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
new file mode 100644
index 0000000000000..1580180cc17e9
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import java.util.Set;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.Sets;
+import io.netty.channel.Channel;
+import io.netty.channel.ChannelFuture;
+import io.netty.channel.ChannelFutureListener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.protocol.Encodable;
+import org.apache.spark.network.protocol.RequestMessage;
+import org.apache.spark.network.protocol.ChunkFetchRequest;
+import org.apache.spark.network.protocol.RpcRequest;
+import org.apache.spark.network.protocol.ChunkFetchFailure;
+import org.apache.spark.network.protocol.ChunkFetchSuccess;
+import org.apache.spark.network.protocol.RpcFailure;
+import org.apache.spark.network.protocol.RpcResponse;
+import org.apache.spark.network.util.NettyUtils;
+
+/**
+ * A handler that processes requests from clients and writes chunk data back. Each handler is
+ * attached to a single Netty channel, and keeps track of which streams have been fetched via this
+ * channel, in order to clean them up if the channel is terminated (see #channelUnregistered).
+ *
+ * The messages should have been processed by the pipeline setup by {@link TransportServer}.
+ */
+public class TransportRequestHandler extends MessageHandler<RequestMessage> {
+  private final Logger logger = LoggerFactory.getLogger(TransportRequestHandler.class);
+
+  /** The Netty channel that this handler is associated with. */
+  private final Channel channel;
+
+  /** Client on the same channel allowing us to talk back to the requester. */
+  private final TransportClient reverseClient;
+
+  /** Handles all RPC messages. */
+  private final RpcHandler rpcHandler;
+
+  /** Returns each chunk part of a stream. */
+  private final StreamManager streamManager;
+
+  /** List of all stream ids that have been read on this handler, used for cleanup. */
+  private final Set<Long> streamIds;
+
+  public TransportRequestHandler(
+      Channel channel,
+      TransportClient reverseClient,
+      RpcHandler rpcHandler) {
+    this.channel = channel;
+    this.reverseClient = reverseClient;
+    this.rpcHandler = rpcHandler;
+    this.streamManager = rpcHandler.getStreamManager();
+    this.streamIds = Sets.newHashSet();
+  }
+
+  @Override
+  public void exceptionCaught(Throwable cause) {
+  }
+
+  @Override
+  public void channelUnregistered() {
+    // Inform the StreamManager that these streams will no longer be read from.
+    for (long streamId : streamIds) {
+      streamManager.connectionTerminated(streamId);
+    }
+    rpcHandler.connectionTerminated(reverseClient);
+  }
+
+  @Override
+  public void handle(RequestMessage request) {
+    if (request instanceof ChunkFetchRequest) {
+      processFetchRequest((ChunkFetchRequest) request);
+    } else if (request instanceof RpcRequest) {
+      processRpcRequest((RpcRequest) request);
+    } else {
+      throw new IllegalArgumentException("Unknown request type: " + request);
+    }
+  }
+
+  private void processFetchRequest(final ChunkFetchRequest req) {
+    final String client = NettyUtils.getRemoteAddress(channel);
+    streamIds.add(req.streamChunkId.streamId);
+
+    logger.trace("Received req from {} to fetch block {}", client, req.streamChunkId);
+
+    ManagedBuffer buf;
+    try {
+      buf = streamManager.getChunk(req.streamChunkId.streamId, req.streamChunkId.chunkIndex);
+    } catch (Exception e) {
+      logger.error(String.format(
+        "Error opening block %s for request from %s", req.streamChunkId, client), e);
+      respond(new ChunkFetchFailure(req.streamChunkId, Throwables.getStackTraceAsString(e)));
+      return;
+    }
+
+    respond(new ChunkFetchSuccess(req.streamChunkId, buf));
+  }
+
+  private void processRpcRequest(final RpcRequest req) {
+    try {
+      rpcHandler.receive(reverseClient, req.message, new RpcResponseCallback() {
+        @Override
+        public void onSuccess(byte[] response) {
+          respond(new RpcResponse(req.requestId, response));
+        }
+
+        @Override
+        public void onFailure(Throwable e) {
+          respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
+        }
+      });
+    } catch (Exception e) {
+      logger.error("Error while invoking RpcHandler#receive() on RPC id " + req.requestId, e);
+      respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
+    }
+  }
+
+  /**
+   * Responds to a single message with some Encodable object. If a failure occurs while sending,
+   * it will be logged and the channel closed.
+   */
+  private void respond(final Encodable result) {
+    final String remoteAddress = channel.remoteAddress().toString();
+    channel.writeAndFlush(result).addListener(
+      new ChannelFutureListener() {
+        @Override
+        public void operationComplete(ChannelFuture future) throws Exception {
+          if (future.isSuccess()) {
+            logger.trace(String.format("Sent result %s to client %s", result, remoteAddress));
+          } else {
+            logger.error(String.format("Error sending result %s to %s; closing connection",
+              result, remoteAddress), future.cause());
+            channel.close();
+          }
+        }
+      }
+    );
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java b/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java
new file mode 100644
index 0000000000000..625c3257d764e
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import java.io.Closeable;
+import java.net.InetSocketAddress;
+import java.util.concurrent.TimeUnit;
+
+import io.netty.bootstrap.ServerBootstrap;
+import io.netty.buffer.PooledByteBufAllocator;
+import io.netty.channel.ChannelFuture;
+import io.netty.channel.ChannelInitializer;
+import io.netty.channel.ChannelOption;
+import io.netty.channel.EventLoopGroup;
+import io.netty.channel.socket.SocketChannel;
+import io.netty.util.internal.PlatformDependent;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.util.IOMode;
+import org.apache.spark.network.util.NettyUtils;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Server for the efficient, low-level streaming service.
+ */
+public class TransportServer implements Closeable {
+  private final Logger logger = LoggerFactory.getLogger(TransportServer.class);
+
+  private final TransportContext context;
+  private final TransportConf conf;
+
+  private ServerBootstrap bootstrap;
+  private ChannelFuture channelFuture;
+  private int port = -1;
+
+  /** Creates a TransportServer that binds to the given port, or to any available if 0. */
+  public TransportServer(TransportContext context, int portToBind) {
+    this.context = context;
+    this.conf = context.getConf();
+
+    init(portToBind);
+  }
+
+  public int getPort() {
+    if (port == -1) {
+      throw new IllegalStateException("Server not initialized");
+    }
+    return port;
+  }
+
+  private void init(int portToBind) {
+
+    IOMode ioMode = IOMode.valueOf(conf.ioMode());
+    EventLoopGroup bossGroup =
+      NettyUtils.createEventLoop(ioMode, conf.serverThreads(), "shuffle-server");
+    EventLoopGroup workerGroup = bossGroup;
+
+    PooledByteBufAllocator allocator = NettyUtils.createPooledByteBufAllocator(
+      conf.preferDirectBufs(), true /* allowCache */, conf.serverThreads());
+
+    bootstrap = new ServerBootstrap()
+      .group(bossGroup, workerGroup)
+      .channel(NettyUtils.getServerChannelClass(ioMode))
+      .option(ChannelOption.ALLOCATOR, allocator)
+      .childOption(ChannelOption.ALLOCATOR, allocator);
+
+    if (conf.backLog() > 0) {
+      bootstrap.option(ChannelOption.SO_BACKLOG, conf.backLog());
+    }
+
+    if (conf.receiveBuf() > 0) {
+      bootstrap.childOption(ChannelOption.SO_RCVBUF, conf.receiveBuf());
+    }
+
+    if (conf.sendBuf() > 0) {
+      bootstrap.childOption(ChannelOption.SO_SNDBUF, conf.sendBuf());
+    }
+
+    bootstrap.childHandler(new ChannelInitializer<SocketChannel>() {
+      @Override
+      protected void initChannel(SocketChannel ch) throws Exception {
+        context.initializePipeline(ch);
+      }
+    });
+
+    channelFuture = bootstrap.bind(new InetSocketAddress(portToBind));
+    channelFuture.syncUninterruptibly();
+
+    port = ((InetSocketAddress) channelFuture.channel().localAddress()).getPort();
+    logger.debug("Shuffle server started on port :" + port);
+  }
+
+  @Override
+  public void close() {
+    if (channelFuture != null) {
+      // close is a local operation and should finish within milliseconds; timeout just to be safe
+      channelFuture.channel().close().awaitUninterruptibly(10, TimeUnit.SECONDS);
+      channelFuture = null;
+    }
+    if (bootstrap != null && bootstrap.group() != null) {
+      bootstrap.group().shutdownGracefully();
+    }
+    if (bootstrap != null && bootstrap.childGroup() != null) {
+      bootstrap.childGroup().shutdownGracefully();
+    }
+    bootstrap = null;
+  }
+
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/ConfigProvider.java b/network/common/src/main/java/org/apache/spark/network/util/ConfigProvider.java
new file mode 100644
index 0000000000000..d944d9da1c7f8
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/ConfigProvider.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.util.NoSuchElementException;
+
+/**
+ * Provides a mechanism for constructing a {@link TransportConf} using some sort of configuration.
+ */
+public abstract class ConfigProvider {
+  /** Obtains the value of the given config, throws NoSuchElementException if it doesn't exist. */
+  public abstract String get(String name);
+
+  public String get(String name, String defaultValue) {
+    try {
+      return get(name);
+    } catch (NoSuchElementException e) {
+      return defaultValue;
+    }
+  }
+
+  public int getInt(String name, int defaultValue) {
+    return Integer.parseInt(get(name, Integer.toString(defaultValue)));
+  }
+
+  public long getLong(String name, long defaultValue) {
+    return Long.parseLong(get(name, Long.toString(defaultValue)));
+  }
+
+  public double getDouble(String name, double defaultValue) {
+    return Double.parseDouble(get(name, Double.toString(defaultValue)));
+  }
+
+  public boolean getBoolean(String name, boolean defaultValue) {
+    return Boolean.parseBoolean(get(name, Boolean.toString(defaultValue)));
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/IOMode.java b/network/common/src/main/java/org/apache/spark/network/util/IOMode.java
new file mode 100644
index 0000000000000..6b208d95bbfbc
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/IOMode.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+/**
+ * Selector for which form of low-level IO we should use.
+ * NIO is always available, while EPOLL is only available on Linux.
+ * AUTO is used to select EPOLL if it's available, or NIO otherwise.
+ */
+public enum IOMode {
+  NIO, EPOLL
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/JavaUtils.java b/network/common/src/main/java/org/apache/spark/network/util/JavaUtils.java
new file mode 100644
index 0000000000000..bf8a1fc42fc6d
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.nio.ByteBuffer;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Closeables;
+import com.google.common.base.Charsets;
+import io.netty.buffer.Unpooled;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * General utilities available in the network package. Many of these are sourced from Spark's
+ * own Utils, just accessible within this package.
+ */
+public class JavaUtils {
+  private static final Logger logger = LoggerFactory.getLogger(JavaUtils.class);
+
+  /** Closes the given object, ignoring IOExceptions. */
+  public static void closeQuietly(Closeable closeable) {
+    try {
+      if (closeable != null) {
+        closeable.close();
+      }
+    } catch (IOException e) {
+      logger.error("IOException should not have been thrown.", e);
+    }
+  }
+
+  /** Returns a hash consistent with Spark's Utils.nonNegativeHash(). */
+  public static int nonNegativeHash(Object obj) {
+    if (obj == null) { return 0; }
+    int hash = obj.hashCode();
+    return hash != Integer.MIN_VALUE ? Math.abs(hash) : 0;
+  }
+
+  /**
+   * Convert the given string to a byte buffer. The resulting buffer can be
+   * converted back to the same string through {@link #bytesToString(ByteBuffer)}.
+   */
+  public static ByteBuffer stringToBytes(String s) {
+    return Unpooled.wrappedBuffer(s.getBytes(Charsets.UTF_8)).nioBuffer();
+  }
+
+  /**
+   * Convert the given byte buffer to a string. The resulting string can be
+   * converted back to the same byte buffer through {@link #stringToBytes(String)}.
+   */
+  public static String bytesToString(ByteBuffer b) {
+    return Unpooled.wrappedBuffer(b).toString(Charsets.UTF_8);
+  }
+
+  /*
+   * Delete a file or directory and its contents recursively.
+   * Don't follow directories if they are symlinks.
+   * Throws an exception if deletion is unsuccessful.
+   */
+  public static void deleteRecursively(File file) throws IOException {
+    if (file == null) { return; }
+
+    if (file.isDirectory() && !isSymlink(file)) {
+      IOException savedIOException = null;
+      for (File child : listFilesSafely(file)) {
+        try {
+          deleteRecursively(child);
+        } catch (IOException e) {
+          // In case of multiple exceptions, only last one will be thrown
+          savedIOException = e;
+        }
+      }
+      if (savedIOException != null) {
+        throw savedIOException;
+      }
+    }
+
+    boolean deleted = file.delete();
+    // Delete can also fail if the file simply did not exist.
+    if (!deleted && file.exists()) {
+      throw new IOException("Failed to delete: " + file.getAbsolutePath());
+    }
+  }
+
+  private static File[] listFilesSafely(File file) throws IOException {
+    if (file.exists()) {
+      File[] files = file.listFiles();
+      if (files == null) {
+        throw new IOException("Failed to list files for dir: " + file);
+      }
+      return files;
+    } else {
+      return new File[0];
+    }
+  }
+
+  private static boolean isSymlink(File file) throws IOException {
+    Preconditions.checkNotNull(file);
+    File fileInCanonicalDir = null;
+    if (file.getParent() == null) {
+      fileInCanonicalDir = file;
+    } else {
+      fileInCanonicalDir = new File(file.getParentFile().getCanonicalFile(), file.getName());
+    }
+    return !fileInCanonicalDir.getCanonicalFile().equals(fileInCanonicalDir.getAbsoluteFile());
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java b/network/common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java
new file mode 100644
index 0000000000000..63ca43c046525
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Wraps a {@link InputStream}, limiting the number of bytes which can be read.
+ *
+ * This code is from Guava's 14.0 source code, because there is no compatible way to
+ * use this functionality in both a Guava 11 environment and a Guava >14 environment.
+ */
+public final class LimitedInputStream extends FilterInputStream {
+  private long left;
+  private long mark = -1;
+
+  public LimitedInputStream(InputStream in, long limit) {
+    super(in);
+    Preconditions.checkNotNull(in);
+    Preconditions.checkArgument(limit >= 0, "limit must be non-negative");
+    left = limit;
+  }
+  @Override public int available() throws IOException {
+    return (int) Math.min(in.available(), left);
+  }
+  // it's okay to mark even if mark isn't supported, as reset won't work
+  @Override public synchronized void mark(int readLimit) {
+    in.mark(readLimit);
+    mark = left;
+  }
+  @Override public int read() throws IOException {
+    if (left == 0) {
+      return -1;
+    }
+    int result = in.read();
+    if (result != -1) {
+      --left;
+    }
+    return result;
+  }
+  @Override public int read(byte[] b, int off, int len) throws IOException {
+    if (left == 0) {
+      return -1;
+    }
+    len = (int) Math.min(len, left);
+    int result = in.read(b, off, len);
+    if (result != -1) {
+      left -= result;
+    }
+    return result;
+  }
+  @Override public synchronized void reset() throws IOException {
+    if (!in.markSupported()) {
+      throw new IOException("Mark not supported");
+    }
+    if (mark == -1) {
+      throw new IOException("Mark not set");
+    }
+    in.reset();
+    left = mark;
+  }
+  @Override public long skip(long n) throws IOException {
+    n = Math.min(n, left);
+    long skipped = in.skip(n);
+    left -= skipped;
+    return skipped;
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/NettyUtils.java b/network/common/src/main/java/org/apache/spark/network/util/NettyUtils.java
new file mode 100644
index 0000000000000..5c654a6fd6ebe
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/NettyUtils.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.lang.reflect.Field;
+import java.util.concurrent.ThreadFactory;
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import io.netty.buffer.PooledByteBufAllocator;
+import io.netty.channel.Channel;
+import io.netty.channel.EventLoopGroup;
+import io.netty.channel.ServerChannel;
+import io.netty.channel.epoll.Epoll;
+import io.netty.channel.epoll.EpollEventLoopGroup;
+import io.netty.channel.epoll.EpollServerSocketChannel;
+import io.netty.channel.epoll.EpollSocketChannel;
+import io.netty.channel.nio.NioEventLoopGroup;
+import io.netty.channel.socket.nio.NioServerSocketChannel;
+import io.netty.channel.socket.nio.NioSocketChannel;
+import io.netty.handler.codec.ByteToMessageDecoder;
+import io.netty.handler.codec.LengthFieldBasedFrameDecoder;
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Utilities for creating various Netty constructs based on whether we're using EPOLL or NIO.
+ */
+public class NettyUtils {
+  /** Creates a new ThreadFactory which prefixes each thread with the given name. */
+  public static ThreadFactory createThreadFactory(String threadPoolPrefix) {
+    return new ThreadFactoryBuilder()
+      .setDaemon(true)
+      .setNameFormat(threadPoolPrefix + "-%d")
+      .build();
+  }
+
+  /** Creates a Netty EventLoopGroup based on the IOMode. */
+  public static EventLoopGroup createEventLoop(IOMode mode, int numThreads, String threadPrefix) {
+    ThreadFactory threadFactory = createThreadFactory(threadPrefix);
+
+    switch (mode) {
+      case NIO:
+        return new NioEventLoopGroup(numThreads, threadFactory);
+      case EPOLL:
+        return new EpollEventLoopGroup(numThreads, threadFactory);
+      default:
+        throw new IllegalArgumentException("Unknown io mode: " + mode);
+    }
+  }
+
+  /** Returns the correct (client) SocketChannel class based on IOMode. */
+  public static Class<? extends Channel> getClientChannelClass(IOMode mode) {
+    switch (mode) {
+      case NIO:
+        return NioSocketChannel.class;
+      case EPOLL:
+        return EpollSocketChannel.class;
+      default:
+        throw new IllegalArgumentException("Unknown io mode: " + mode);
+    }
+  }
+
+  /** Returns the correct ServerSocketChannel class based on IOMode. */
+  public static Class<? extends ServerChannel> getServerChannelClass(IOMode mode) {
+    switch (mode) {
+      case NIO:
+        return NioServerSocketChannel.class;
+      case EPOLL:
+        return EpollServerSocketChannel.class;
+      default:
+        throw new IllegalArgumentException("Unknown io mode: " + mode);
+    }
+  }
+
+  /**
+   * Creates a LengthFieldBasedFrameDecoder where the first 8 bytes are the length of the frame.
+   * This is used before all decoders.
+   */
+  public static ByteToMessageDecoder createFrameDecoder() {
+    // maxFrameLength = 2G
+    // lengthFieldOffset = 0
+    // lengthFieldLength = 8
+    // lengthAdjustment = -8, i.e. exclude the 8 byte length itself
+    // initialBytesToStrip = 8, i.e. strip out the length field itself
+    return new LengthFieldBasedFrameDecoder(Integer.MAX_VALUE, 0, 8, -8, 8);
+  }
+
+  /** Returns the remote address on the channel or "<remote address>" if none exists. */
+  public static String getRemoteAddress(Channel channel) {
+    if (channel != null && channel.remoteAddress() != null) {
+      return channel.remoteAddress().toString();
+    }
+    return "<unknown remote>";
+  }
+
+  /**
+   * Create a pooled ByteBuf allocator but disables the thread-local cache. Thread-local caches
+   * are disabled because the ByteBufs are allocated by the event loop thread, but released by the
+   * executor thread rather than the event loop thread. Those thread-local caches actually delay
+   * the recycling of buffers, leading to larger memory usage.
+   */
+  public static PooledByteBufAllocator createPooledByteBufAllocator(
+      boolean allowDirectBufs,
+      boolean allowCache,
+      int numCores) {
+    if (numCores == 0) {
+      numCores = Runtime.getRuntime().availableProcessors();
+    }
+    return new PooledByteBufAllocator(
+      allowDirectBufs && PlatformDependent.directBufferPreferred(),
+      Math.min(getPrivateStaticField("DEFAULT_NUM_HEAP_ARENA"), numCores),
+      Math.min(getPrivateStaticField("DEFAULT_NUM_DIRECT_ARENA"), allowDirectBufs ? numCores : 0),
+      getPrivateStaticField("DEFAULT_PAGE_SIZE"),
+      getPrivateStaticField("DEFAULT_MAX_ORDER"),
+      allowCache ? getPrivateStaticField("DEFAULT_TINY_CACHE_SIZE") : 0,
+      allowCache ? getPrivateStaticField("DEFAULT_SMALL_CACHE_SIZE") : 0,
+      allowCache ? getPrivateStaticField("DEFAULT_NORMAL_CACHE_SIZE") : 0
+    );
+  }
+
+  /** Used to get defaults from Netty's private static fields. */
+  private static int getPrivateStaticField(String name) {
+    try {
+      Field f = PooledByteBufAllocator.DEFAULT.getClass().getDeclaredField(name);
+      f.setAccessible(true);
+      return f.getInt(null);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/SystemPropertyConfigProvider.java b/network/common/src/main/java/org/apache/spark/network/util/SystemPropertyConfigProvider.java
new file mode 100644
index 0000000000000..5f20b70678d1e
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/SystemPropertyConfigProvider.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.util.NoSuchElementException;
+
+import org.apache.spark.network.util.ConfigProvider;
+
+/** Uses System properties to obtain config values. */
+public class SystemPropertyConfigProvider extends ConfigProvider {
+  @Override
+  public String get(String name) {
+    String value = System.getProperty(name);
+    if (value == null) {
+      throw new NoSuchElementException(name);
+    }
+    return value;
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/TransportConf.java b/network/common/src/main/java/org/apache/spark/network/util/TransportConf.java
new file mode 100644
index 0000000000000..621427d8cba5e
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+/**
+ * A central location that tracks all the settings we expose to users.
+ */
+public class TransportConf {
+  private final ConfigProvider conf;
+
+  public TransportConf(ConfigProvider conf) {
+    this.conf = conf;
+  }
+
+  /** IO mode: nio or epoll */
+  public String ioMode() { return conf.get("spark.shuffle.io.mode", "NIO").toUpperCase(); }
+
+  /** If true, we will prefer allocating off-heap byte buffers within Netty. */
+  public boolean preferDirectBufs() {
+    return conf.getBoolean("spark.shuffle.io.preferDirectBufs", true);
+  }
+
+  /** Connect timeout in secs. Default 120 secs. */
+  public int connectionTimeoutMs() {
+    return conf.getInt("spark.shuffle.io.connectionTimeout", 120) * 1000;
+  }
+
+  /** Requested maximum length of the queue of incoming connections. Default -1 for no backlog. */
+  public int backLog() { return conf.getInt("spark.shuffle.io.backLog", -1); }
+
+  /** Number of threads used in the server thread pool. Default to 0, which is 2x#cores. */
+  public int serverThreads() { return conf.getInt("spark.shuffle.io.serverThreads", 0); }
+
+  /** Number of threads used in the client thread pool. Default to 0, which is 2x#cores. */
+  public int clientThreads() { return conf.getInt("spark.shuffle.io.clientThreads", 0); }
+
+  /**
+   * Receive buffer size (SO_RCVBUF).
+   * Note: the optimal size for receive buffer and send buffer should be
+   *  latency * network_bandwidth.
+   * Assuming latency = 1ms, network_bandwidth = 10Gbps
+   *  buffer size should be ~ 1.25MB
+   */
+  public int receiveBuf() { return conf.getInt("spark.shuffle.io.receiveBuffer", -1); }
+
+  /** Send buffer size (SO_SNDBUF). */
+  public int sendBuf() { return conf.getInt("spark.shuffle.io.sendBuffer", -1); }
+
+  /** Timeout for a single round trip of SASL token exchange, in milliseconds. */
+  public int saslRTTimeout() { return conf.getInt("spark.shuffle.sasl.timeout", 30000); }
+
+  /**
+   * Max number of times we will try IO exceptions (such as connection timeouts) per request.
+   * If set to 0, we will not do any retries.
+   */
+  public int maxIORetries() { return conf.getInt("spark.shuffle.io.maxRetries", 3); }
+
+  /**
+   * Time (in milliseconds) that we will wait in order to perform a retry after an IOException.
+   * Only relevant if maxIORetries > 0.
+   */
+  public int ioRetryWaitTime() { return conf.getInt("spark.shuffle.io.retryWaitMs", 5000); }
+
+  /**
+   * Minimum size of a block that we should start using memory map rather than reading in through
+   * normal IO operations. This prevents Spark from memory mapping very small blocks. In general,
+   * memory mapping has high overhead for blocks close to or below the page size of the OS.
+   */
+  public int memoryMapBytes() {
+    return conf.getInt("spark.storage.memoryMapThreshold", 2 * 1024 * 1024);
+  }
+
+  /**
+   * Whether to initialize shuffle FileDescriptor lazily or not. If true, file descriptors are
+   * created only when data is going to be transferred. This can reduce the number of open files.
+   */
+  public boolean lazyFileDescriptor() {
+    return conf.getBoolean("spark.shuffle.io.lazyFD", true);
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java b/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
new file mode 100644
index 0000000000000..dfb7740344ed0
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.io.File;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.client.ChunkReceivedCallback;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.server.StreamManager;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class ChunkFetchIntegrationSuite {
+  static final long STREAM_ID = 1;
+  static final int BUFFER_CHUNK_INDEX = 0;
+  static final int FILE_CHUNK_INDEX = 1;
+
+  static TransportServer server;
+  static TransportClientFactory clientFactory;
+  static StreamManager streamManager;
+  static File testFile;
+
+  static ManagedBuffer bufferChunk;
+  static ManagedBuffer fileChunk;
+
+  private TransportConf transportConf;
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    int bufSize = 100000;
+    final ByteBuffer buf = ByteBuffer.allocate(bufSize);
+    for (int i = 0; i < bufSize; i ++) {
+      buf.put((byte) i);
+    }
+    buf.flip();
+    bufferChunk = new NioManagedBuffer(buf);
+
+    testFile = File.createTempFile("shuffle-test-file", "txt");
+    testFile.deleteOnExit();
+    RandomAccessFile fp = new RandomAccessFile(testFile, "rw");
+    byte[] fileContent = new byte[1024];
+    new Random().nextBytes(fileContent);
+    fp.write(fileContent);
+    fp.close();
+
+    final TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+    fileChunk = new FileSegmentManagedBuffer(conf, testFile, 10, testFile.length() - 25);
+
+    streamManager = new StreamManager() {
+      @Override
+      public ManagedBuffer getChunk(long streamId, int chunkIndex) {
+        assertEquals(STREAM_ID, streamId);
+        if (chunkIndex == BUFFER_CHUNK_INDEX) {
+          return new NioManagedBuffer(buf);
+        } else if (chunkIndex == FILE_CHUNK_INDEX) {
+          return new FileSegmentManagedBuffer(conf, testFile, 10, testFile.length() - 25);
+        } else {
+          throw new IllegalArgumentException("Invalid chunk index: " + chunkIndex);
+        }
+      }
+    };
+    RpcHandler handler = new RpcHandler() {
+      @Override
+      public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public StreamManager getStreamManager() {
+        return streamManager;
+      }
+    };
+    TransportContext context = new TransportContext(conf, handler);
+    server = context.createServer();
+    clientFactory = context.createClientFactory();
+  }
+
+  @AfterClass
+  public static void tearDown() {
+    server.close();
+    clientFactory.close();
+    testFile.delete();
+  }
+
+  class FetchResult {
+    public Set<Integer> successChunks;
+    public Set<Integer> failedChunks;
+    public List<ManagedBuffer> buffers;
+
+    public void releaseBuffers() {
+      for (ManagedBuffer buffer : buffers) {
+        buffer.release();
+      }
+    }
+  }
+
+  private FetchResult fetchChunks(List<Integer> chunkIndices) throws Exception {
+    TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    final Semaphore sem = new Semaphore(0);
+
+    final FetchResult res = new FetchResult();
+    res.successChunks = Collections.synchronizedSet(new HashSet<Integer>());
+    res.failedChunks = Collections.synchronizedSet(new HashSet<Integer>());
+    res.buffers = Collections.synchronizedList(new LinkedList<ManagedBuffer>());
+
+    ChunkReceivedCallback callback = new ChunkReceivedCallback() {
+      @Override
+      public void onSuccess(int chunkIndex, ManagedBuffer buffer) {
+        buffer.retain();
+        res.successChunks.add(chunkIndex);
+        res.buffers.add(buffer);
+        sem.release();
+      }
+
+      @Override
+      public void onFailure(int chunkIndex, Throwable e) {
+        res.failedChunks.add(chunkIndex);
+        sem.release();
+      }
+    };
+
+    for (int chunkIndex : chunkIndices) {
+      client.fetchChunk(STREAM_ID, chunkIndex, callback);
+    }
+    if (!sem.tryAcquire(chunkIndices.size(), 5, TimeUnit.SECONDS)) {
+      fail("Timeout getting response from the server");
+    }
+    client.close();
+    return res;
+  }
+
+  @Test
+  public void fetchBufferChunk() throws Exception {
+    FetchResult res = fetchChunks(Lists.newArrayList(BUFFER_CHUNK_INDEX));
+    assertEquals(res.successChunks, Sets.newHashSet(BUFFER_CHUNK_INDEX));
+    assertTrue(res.failedChunks.isEmpty());
+    assertBufferListsEqual(res.buffers, Lists.newArrayList(bufferChunk));
+    res.releaseBuffers();
+  }
+
+  @Test
+  public void fetchFileChunk() throws Exception {
+    FetchResult res = fetchChunks(Lists.newArrayList(FILE_CHUNK_INDEX));
+    assertEquals(res.successChunks, Sets.newHashSet(FILE_CHUNK_INDEX));
+    assertTrue(res.failedChunks.isEmpty());
+    assertBufferListsEqual(res.buffers, Lists.newArrayList(fileChunk));
+    res.releaseBuffers();
+  }
+
+  @Test
+  public void fetchNonExistentChunk() throws Exception {
+    FetchResult res = fetchChunks(Lists.newArrayList(12345));
+    assertTrue(res.successChunks.isEmpty());
+    assertEquals(res.failedChunks, Sets.newHashSet(12345));
+    assertTrue(res.buffers.isEmpty());
+  }
+
+  @Test
+  public void fetchBothChunks() throws Exception {
+    FetchResult res = fetchChunks(Lists.newArrayList(BUFFER_CHUNK_INDEX, FILE_CHUNK_INDEX));
+    assertEquals(res.successChunks, Sets.newHashSet(BUFFER_CHUNK_INDEX, FILE_CHUNK_INDEX));
+    assertTrue(res.failedChunks.isEmpty());
+    assertBufferListsEqual(res.buffers, Lists.newArrayList(bufferChunk, fileChunk));
+    res.releaseBuffers();
+  }
+
+  @Test
+  public void fetchChunkAndNonExistent() throws Exception {
+    FetchResult res = fetchChunks(Lists.newArrayList(BUFFER_CHUNK_INDEX, 12345));
+    assertEquals(res.successChunks, Sets.newHashSet(BUFFER_CHUNK_INDEX));
+    assertEquals(res.failedChunks, Sets.newHashSet(12345));
+    assertBufferListsEqual(res.buffers, Lists.newArrayList(bufferChunk));
+    res.releaseBuffers();
+  }
+
+  private void assertBufferListsEqual(List<ManagedBuffer> list0, List<ManagedBuffer> list1)
+      throws Exception {
+    assertEquals(list0.size(), list1.size());
+    for (int i = 0; i < list0.size(); i ++) {
+      assertBuffersEqual(list0.get(i), list1.get(i));
+    }
+  }
+
+  private void assertBuffersEqual(ManagedBuffer buffer0, ManagedBuffer buffer1) throws Exception {
+    ByteBuffer nio0 = buffer0.nioByteBuffer();
+    ByteBuffer nio1 = buffer1.nioByteBuffer();
+
+    int len = nio0.remaining();
+    assertEquals(nio0.remaining(), nio1.remaining());
+    for (int i = 0; i < len; i ++) {
+      assertEquals(nio0.get(), nio1.get());
+    }
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/ProtocolSuite.java b/network/common/src/test/java/org/apache/spark/network/ProtocolSuite.java
new file mode 100644
index 0000000000000..43dc0cf8c7194
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/ProtocolSuite.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import io.netty.channel.embedded.EmbeddedChannel;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.spark.network.protocol.Message;
+import org.apache.spark.network.protocol.StreamChunkId;
+import org.apache.spark.network.protocol.ChunkFetchRequest;
+import org.apache.spark.network.protocol.ChunkFetchFailure;
+import org.apache.spark.network.protocol.ChunkFetchSuccess;
+import org.apache.spark.network.protocol.RpcRequest;
+import org.apache.spark.network.protocol.RpcFailure;
+import org.apache.spark.network.protocol.RpcResponse;
+import org.apache.spark.network.protocol.MessageDecoder;
+import org.apache.spark.network.protocol.MessageEncoder;
+import org.apache.spark.network.util.NettyUtils;
+
+public class ProtocolSuite {
+  private void testServerToClient(Message msg) {
+    EmbeddedChannel serverChannel = new EmbeddedChannel(new MessageEncoder());
+    serverChannel.writeOutbound(msg);
+
+    EmbeddedChannel clientChannel = new EmbeddedChannel(
+        NettyUtils.createFrameDecoder(), new MessageDecoder());
+
+    while (!serverChannel.outboundMessages().isEmpty()) {
+      clientChannel.writeInbound(serverChannel.readOutbound());
+    }
+
+    assertEquals(1, clientChannel.inboundMessages().size());
+    assertEquals(msg, clientChannel.readInbound());
+  }
+
+  private void testClientToServer(Message msg) {
+    EmbeddedChannel clientChannel = new EmbeddedChannel(new MessageEncoder());
+    clientChannel.writeOutbound(msg);
+
+    EmbeddedChannel serverChannel = new EmbeddedChannel(
+        NettyUtils.createFrameDecoder(), new MessageDecoder());
+
+    while (!clientChannel.outboundMessages().isEmpty()) {
+      serverChannel.writeInbound(clientChannel.readOutbound());
+    }
+
+    assertEquals(1, serverChannel.inboundMessages().size());
+    assertEquals(msg, serverChannel.readInbound());
+  }
+
+  @Test
+  public void requests() {
+    testClientToServer(new ChunkFetchRequest(new StreamChunkId(1, 2)));
+    testClientToServer(new RpcRequest(12345, new byte[0]));
+    testClientToServer(new RpcRequest(12345, new byte[100]));
+  }
+
+  @Test
+  public void responses() {
+    testServerToClient(new ChunkFetchSuccess(new StreamChunkId(1, 2), new TestManagedBuffer(10)));
+    testServerToClient(new ChunkFetchSuccess(new StreamChunkId(1, 2), new TestManagedBuffer(0)));
+    testServerToClient(new ChunkFetchFailure(new StreamChunkId(1, 2), "this is an error"));
+    testServerToClient(new ChunkFetchFailure(new StreamChunkId(1, 2), ""));
+    testServerToClient(new RpcResponse(12345, new byte[0]));
+    testServerToClient(new RpcResponse(12345, new byte[1000]));
+    testServerToClient(new RpcFailure(0, "this is an error"));
+    testServerToClient(new RpcFailure(0, ""));
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java b/network/common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
new file mode 100644
index 0000000000000..64b457b4b3f01
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Sets;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.server.OneForOneStreamManager;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.StreamManager;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class RpcIntegrationSuite {
+  static TransportServer server;
+  static TransportClientFactory clientFactory;
+  static RpcHandler rpcHandler;
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+    rpcHandler = new RpcHandler() {
+      @Override
+      public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+        String msg = new String(message, Charsets.UTF_8);
+        String[] parts = msg.split("/");
+        if (parts[0].equals("hello")) {
+          callback.onSuccess(("Hello, " + parts[1] + "!").getBytes(Charsets.UTF_8));
+        } else if (parts[0].equals("return error")) {
+          callback.onFailure(new RuntimeException("Returned: " + parts[1]));
+        } else if (parts[0].equals("throw error")) {
+          throw new RuntimeException("Thrown: " + parts[1]);
+        }
+      }
+
+      @Override
+      public StreamManager getStreamManager() { return new OneForOneStreamManager(); }
+    };
+    TransportContext context = new TransportContext(conf, rpcHandler);
+    server = context.createServer();
+    clientFactory = context.createClientFactory();
+  }
+
+  @AfterClass
+  public static void tearDown() {
+    server.close();
+    clientFactory.close();
+  }
+
+  class RpcResult {
+    public Set<String> successMessages;
+    public Set<String> errorMessages;
+  }
+
+  private RpcResult sendRPC(String ... commands) throws Exception {
+    TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    final Semaphore sem = new Semaphore(0);
+
+    final RpcResult res = new RpcResult();
+    res.successMessages = Collections.synchronizedSet(new HashSet<String>());
+    res.errorMessages = Collections.synchronizedSet(new HashSet<String>());
+
+    RpcResponseCallback callback = new RpcResponseCallback() {
+      @Override
+      public void onSuccess(byte[] message) {
+        res.successMessages.add(new String(message, Charsets.UTF_8));
+        sem.release();
+      }
+
+      @Override
+      public void onFailure(Throwable e) {
+        res.errorMessages.add(e.getMessage());
+        sem.release();
+      }
+    };
+
+    for (String command : commands) {
+      client.sendRpc(command.getBytes(Charsets.UTF_8), callback);
+    }
+
+    if (!sem.tryAcquire(commands.length, 5, TimeUnit.SECONDS)) {
+      fail("Timeout getting response from the server");
+    }
+    client.close();
+    return res;
+  }
+
+  @Test
+  public void singleRPC() throws Exception {
+    RpcResult res = sendRPC("hello/Aaron");
+    assertEquals(res.successMessages, Sets.newHashSet("Hello, Aaron!"));
+    assertTrue(res.errorMessages.isEmpty());
+  }
+
+  @Test
+  public void doubleRPC() throws Exception {
+    RpcResult res = sendRPC("hello/Aaron", "hello/Reynold");
+    assertEquals(res.successMessages, Sets.newHashSet("Hello, Aaron!", "Hello, Reynold!"));
+    assertTrue(res.errorMessages.isEmpty());
+  }
+
+  @Test
+  public void returnErrorRPC() throws Exception {
+    RpcResult res = sendRPC("return error/OK");
+    assertTrue(res.successMessages.isEmpty());
+    assertErrorsContain(res.errorMessages, Sets.newHashSet("Returned: OK"));
+  }
+
+  @Test
+  public void throwErrorRPC() throws Exception {
+    RpcResult res = sendRPC("throw error/uh-oh");
+    assertTrue(res.successMessages.isEmpty());
+    assertErrorsContain(res.errorMessages, Sets.newHashSet("Thrown: uh-oh"));
+  }
+
+  @Test
+  public void doubleTrouble() throws Exception {
+    RpcResult res = sendRPC("return error/OK", "throw error/uh-oh");
+    assertTrue(res.successMessages.isEmpty());
+    assertErrorsContain(res.errorMessages, Sets.newHashSet("Returned: OK", "Thrown: uh-oh"));
+  }
+
+  @Test
+  public void sendSuccessAndFailure() throws Exception {
+    RpcResult res = sendRPC("hello/Bob", "throw error/the", "hello/Builder", "return error/!");
+    assertEquals(res.successMessages, Sets.newHashSet("Hello, Bob!", "Hello, Builder!"));
+    assertErrorsContain(res.errorMessages, Sets.newHashSet("Thrown: the", "Returned: !"));
+  }
+
+  private void assertErrorsContain(Set<String> errors, Set<String> contains) {
+    assertEquals(contains.size(), errors.size());
+
+    Set<String> remainingErrors = Sets.newHashSet(errors);
+    for (String contain : contains) {
+      Iterator<String> it = remainingErrors.iterator();
+      boolean foundMatch = false;
+      while (it.hasNext()) {
+        if (it.next().contains(contain)) {
+          it.remove();
+          foundMatch = true;
+          break;
+        }
+      }
+      assertTrue("Could not find error containing " + contain + "; errors: " + errors, foundMatch);
+    }
+
+    assertTrue(remainingErrors.isEmpty());
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java b/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
new file mode 100644
index 0000000000000..38113a918f795
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import com.google.common.base.Preconditions;
+import io.netty.buffer.Unpooled;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NettyManagedBuffer;
+
+/**
+ * A ManagedBuffer implementation that contains 0, 1, 2, 3, ..., (len-1).
+ *
+ * Used for testing.
+ */
+public class TestManagedBuffer extends ManagedBuffer {
+
+  private final int len;
+  private NettyManagedBuffer underlying;
+
+  public TestManagedBuffer(int len) {
+    Preconditions.checkArgument(len <= Byte.MAX_VALUE);
+    this.len = len;
+    byte[] byteArray = new byte[len];
+    for (int i = 0; i < len; i ++) {
+      byteArray[i] = (byte) i;
+    }
+    this.underlying = new NettyManagedBuffer(Unpooled.wrappedBuffer(byteArray));
+  }
+
+
+  @Override
+  public long size() {
+    return underlying.size();
+  }
+
+  @Override
+  public ByteBuffer nioByteBuffer() throws IOException {
+    return underlying.nioByteBuffer();
+  }
+
+  @Override
+  public InputStream createInputStream() throws IOException {
+    return underlying.createInputStream();
+  }
+
+  @Override
+  public ManagedBuffer retain() {
+    underlying.retain();
+    return this;
+  }
+
+  @Override
+  public ManagedBuffer release() {
+    underlying.release();
+    return this;
+  }
+
+  @Override
+  public Object convertToNetty() throws IOException {
+    return underlying.convertToNetty();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof ManagedBuffer) {
+      try {
+        ByteBuffer nioBuf = ((ManagedBuffer) other).nioByteBuffer();
+        if (nioBuf.remaining() != len) {
+          return false;
+        } else {
+          for (int i = 0; i < len; i ++) {
+            if (nioBuf.get() != i) {
+              return false;
+            }
+          }
+          return true;
+        }
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    return false;
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/TestUtils.java b/network/common/src/test/java/org/apache/spark/network/TestUtils.java
new file mode 100644
index 0000000000000..56a2b805f154c
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/TestUtils.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.net.InetAddress;
+
+public class TestUtils {
+  public static String getLocalHost() {
+    try {
+      return InetAddress.getLocalHost().getHostAddress();
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java b/network/common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
new file mode 100644
index 0000000000000..822bef1d81b2a
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.io.IOException;
+import java.util.concurrent.TimeoutException;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.server.NoOpRpcHandler;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.util.JavaUtils;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class TransportClientFactorySuite {
+  private TransportConf conf;
+  private TransportContext context;
+  private TransportServer server1;
+  private TransportServer server2;
+
+  @Before
+  public void setUp() {
+    conf = new TransportConf(new SystemPropertyConfigProvider());
+    RpcHandler rpcHandler = new NoOpRpcHandler();
+    context = new TransportContext(conf, rpcHandler);
+    server1 = context.createServer();
+    server2 = context.createServer();
+  }
+
+  @After
+  public void tearDown() {
+    JavaUtils.closeQuietly(server1);
+    JavaUtils.closeQuietly(server2);
+  }
+
+  @Test
+  public void createAndReuseBlockClients() throws IOException {
+    TransportClientFactory factory = context.createClientFactory();
+    TransportClient c1 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+    TransportClient c2 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+    TransportClient c3 = factory.createClient(TestUtils.getLocalHost(), server2.getPort());
+    assertTrue(c1.isActive());
+    assertTrue(c3.isActive());
+    assertTrue(c1 == c2);
+    assertTrue(c1 != c3);
+    factory.close();
+  }
+
+  @Test
+  public void neverReturnInactiveClients() throws IOException, InterruptedException {
+    TransportClientFactory factory = context.createClientFactory();
+    TransportClient c1 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+    c1.close();
+
+    long start = System.currentTimeMillis();
+    while (c1.isActive() && (System.currentTimeMillis() - start) < 3000) {
+      Thread.sleep(10);
+    }
+    assertFalse(c1.isActive());
+
+    TransportClient c2 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+    assertFalse(c1 == c2);
+    assertTrue(c2.isActive());
+    factory.close();
+  }
+
+  @Test
+  public void closeBlockClientsWithFactory() throws IOException {
+    TransportClientFactory factory = context.createClientFactory();
+    TransportClient c1 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+    TransportClient c2 = factory.createClient(TestUtils.getLocalHost(), server2.getPort());
+    assertTrue(c1.isActive());
+    assertTrue(c2.isActive());
+    factory.close();
+    assertFalse(c1.isActive());
+    assertFalse(c2.isActive());
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/TransportResponseHandlerSuite.java b/network/common/src/test/java/org/apache/spark/network/TransportResponseHandlerSuite.java
new file mode 100644
index 0000000000000..17a03ebe88a93
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/TransportResponseHandlerSuite.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import io.netty.channel.local.LocalChannel;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.ChunkReceivedCallback;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportResponseHandler;
+import org.apache.spark.network.protocol.ChunkFetchFailure;
+import org.apache.spark.network.protocol.ChunkFetchSuccess;
+import org.apache.spark.network.protocol.RpcFailure;
+import org.apache.spark.network.protocol.RpcResponse;
+import org.apache.spark.network.protocol.StreamChunkId;
+
+public class TransportResponseHandlerSuite {
+  @Test
+  public void handleSuccessfulFetch() {
+    StreamChunkId streamChunkId = new StreamChunkId(1, 0);
+
+    TransportResponseHandler handler = new TransportResponseHandler(new LocalChannel());
+    ChunkReceivedCallback callback = mock(ChunkReceivedCallback.class);
+    handler.addFetchRequest(streamChunkId, callback);
+    assertEquals(1, handler.numOutstandingRequests());
+
+    handler.handle(new ChunkFetchSuccess(streamChunkId, new TestManagedBuffer(123)));
+    verify(callback, times(1)).onSuccess(eq(0), (ManagedBuffer) any());
+    assertEquals(0, handler.numOutstandingRequests());
+  }
+
+  @Test
+  public void handleFailedFetch() {
+    StreamChunkId streamChunkId = new StreamChunkId(1, 0);
+    TransportResponseHandler handler = new TransportResponseHandler(new LocalChannel());
+    ChunkReceivedCallback callback = mock(ChunkReceivedCallback.class);
+    handler.addFetchRequest(streamChunkId, callback);
+    assertEquals(1, handler.numOutstandingRequests());
+
+    handler.handle(new ChunkFetchFailure(streamChunkId, "some error msg"));
+    verify(callback, times(1)).onFailure(eq(0), (Throwable) any());
+    assertEquals(0, handler.numOutstandingRequests());
+  }
+
+  @Test
+  public void clearAllOutstandingRequests() {
+    TransportResponseHandler handler = new TransportResponseHandler(new LocalChannel());
+    ChunkReceivedCallback callback = mock(ChunkReceivedCallback.class);
+    handler.addFetchRequest(new StreamChunkId(1, 0), callback);
+    handler.addFetchRequest(new StreamChunkId(1, 1), callback);
+    handler.addFetchRequest(new StreamChunkId(1, 2), callback);
+    assertEquals(3, handler.numOutstandingRequests());
+
+    handler.handle(new ChunkFetchSuccess(new StreamChunkId(1, 0), new TestManagedBuffer(12)));
+    handler.exceptionCaught(new Exception("duh duh duhhhh"));
+
+    // should fail both b2 and b3
+    verify(callback, times(1)).onSuccess(eq(0), (ManagedBuffer) any());
+    verify(callback, times(1)).onFailure(eq(1), (Throwable) any());
+    verify(callback, times(1)).onFailure(eq(2), (Throwable) any());
+    assertEquals(0, handler.numOutstandingRequests());
+  }
+
+  @Test
+  public void handleSuccessfulRPC() {
+    TransportResponseHandler handler = new TransportResponseHandler(new LocalChannel());
+    RpcResponseCallback callback = mock(RpcResponseCallback.class);
+    handler.addRpcRequest(12345, callback);
+    assertEquals(1, handler.numOutstandingRequests());
+
+    handler.handle(new RpcResponse(54321, new byte[7])); // should be ignored
+    assertEquals(1, handler.numOutstandingRequests());
+
+    byte[] arr = new byte[10];
+    handler.handle(new RpcResponse(12345, arr));
+    verify(callback, times(1)).onSuccess(eq(arr));
+    assertEquals(0, handler.numOutstandingRequests());
+  }
+
+  @Test
+  public void handleFailedRPC() {
+    TransportResponseHandler handler = new TransportResponseHandler(new LocalChannel());
+    RpcResponseCallback callback = mock(RpcResponseCallback.class);
+    handler.addRpcRequest(12345, callback);
+    assertEquals(1, handler.numOutstandingRequests());
+
+    handler.handle(new RpcFailure(54321, "uh-oh!")); // should be ignored
+    assertEquals(1, handler.numOutstandingRequests());
+
+    handler.handle(new RpcFailure(12345, "oh no"));
+    verify(callback, times(1)).onFailure((Throwable) any());
+    assertEquals(0, handler.numOutstandingRequests());
+  }
+}
diff --git a/network/shuffle/pom.xml b/network/shuffle/pom.xml
new file mode 100644
index 0000000000000..12ff034cfe588
--- /dev/null
+++ b/network/shuffle/pom.xml
@@ -0,0 +1,97 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>1.2.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-network-shuffle_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Shuffle Streaming Service</name>
+  <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>network-shuffle</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <!-- Core dependencies -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-network-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <!-- Provided dependencies -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-network-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+</project>
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java
new file mode 100644
index 0000000000000..7bc91e375371f
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientBootstrap;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Bootstraps a {@link TransportClient} by performing SASL authentication on the connection. The
+ * server should be setup with a {@link SaslRpcHandler} with matching keys for the given appId.
+ */
+public class SaslClientBootstrap implements TransportClientBootstrap {
+  private final Logger logger = LoggerFactory.getLogger(SaslClientBootstrap.class);
+
+  private final TransportConf conf;
+  private final String appId;
+  private final SecretKeyHolder secretKeyHolder;
+
+  public SaslClientBootstrap(TransportConf conf, String appId, SecretKeyHolder secretKeyHolder) {
+    this.conf = conf;
+    this.appId = appId;
+    this.secretKeyHolder = secretKeyHolder;
+  }
+
+  /**
+   * Performs SASL authentication by sending a token, and then proceeding with the SASL
+   * challenge-response tokens until we either successfully authenticate or throw an exception
+   * due to mismatch.
+   */
+  @Override
+  public void doBootstrap(TransportClient client) {
+    SparkSaslClient saslClient = new SparkSaslClient(appId, secretKeyHolder);
+    try {
+      byte[] payload = saslClient.firstToken();
+
+      while (!saslClient.isComplete()) {
+        SaslMessage msg = new SaslMessage(appId, payload);
+        ByteBuf buf = Unpooled.buffer(msg.encodedLength());
+        msg.encode(buf);
+
+        byte[] response = client.sendRpcSync(buf.array(), conf.saslRTTimeout());
+        payload = saslClient.response(response);
+      }
+    } finally {
+      try {
+        // Once authentication is complete, the server will trust all remaining communication.
+        saslClient.dispose();
+      } catch (RuntimeException e) {
+        logger.error("Error while disposing SASL client", e);
+      }
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslMessage.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslMessage.java
new file mode 100644
index 0000000000000..cad76ab7aa54e
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslMessage.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encodable;
+import org.apache.spark.network.protocol.Encoders;
+
+/**
+ * Encodes a Sasl-related message which is attempting to authenticate using some credentials tagged
+ * with the given appId. This appId allows a single SaslRpcHandler to multiplex different
+ * applications which may be using different sets of credentials.
+ */
+class SaslMessage implements Encodable {
+
+  /** Serialization tag used to catch incorrect payloads. */
+  private static final byte TAG_BYTE = (byte) 0xEA;
+
+  public final String appId;
+  public final byte[] payload;
+
+  public SaslMessage(String appId, byte[] payload) {
+    this.appId = appId;
+    this.payload = payload;
+  }
+
+  @Override
+  public int encodedLength() {
+    return 1 + Encoders.Strings.encodedLength(appId) + Encoders.ByteArrays.encodedLength(payload);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    buf.writeByte(TAG_BYTE);
+    Encoders.Strings.encode(buf, appId);
+    Encoders.ByteArrays.encode(buf, payload);
+  }
+
+  public static SaslMessage decode(ByteBuf buf) {
+    if (buf.readByte() != TAG_BYTE) {
+      throw new IllegalStateException("Expected SaslMessage, received something else"
+        + " (maybe your client does not have SASL enabled?)");
+    }
+
+    String appId = Encoders.Strings.decode(buf);
+    byte[] payload = Encoders.ByteArrays.decode(buf);
+    return new SaslMessage(appId, payload);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslRpcHandler.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslRpcHandler.java
new file mode 100644
index 0000000000000..3777a18e33f78
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslRpcHandler.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import java.util.concurrent.ConcurrentMap;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Maps;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.protocol.Encodable;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.StreamManager;
+
+/**
+ * RPC Handler which performs SASL authentication before delegating to a child RPC handler.
+ * The delegate will only receive messages if the given connection has been successfully
+ * authenticated. A connection may be authenticated at most once.
+ *
+ * Note that the authentication process consists of multiple challenge-response pairs, each of
+ * which are individual RPCs.
+ */
+public class SaslRpcHandler extends RpcHandler {
+  private final Logger logger = LoggerFactory.getLogger(SaslRpcHandler.class);
+
+  /** RpcHandler we will delegate to for authenticated connections. */
+  private final RpcHandler delegate;
+
+  /** Class which provides secret keys which are shared by server and client on a per-app basis. */
+  private final SecretKeyHolder secretKeyHolder;
+
+  /** Maps each channel to its SASL authentication state. */
+  private final ConcurrentMap<TransportClient, SparkSaslServer> channelAuthenticationMap;
+
+  public SaslRpcHandler(RpcHandler delegate, SecretKeyHolder secretKeyHolder) {
+    this.delegate = delegate;
+    this.secretKeyHolder = secretKeyHolder;
+    this.channelAuthenticationMap = Maps.newConcurrentMap();
+  }
+
+  @Override
+  public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+    SparkSaslServer saslServer = channelAuthenticationMap.get(client);
+    if (saslServer != null && saslServer.isComplete()) {
+      // Authentication complete, delegate to base handler.
+      delegate.receive(client, message, callback);
+      return;
+    }
+
+    SaslMessage saslMessage = SaslMessage.decode(Unpooled.wrappedBuffer(message));
+
+    if (saslServer == null) {
+      // First message in the handshake, setup the necessary state.
+      saslServer = new SparkSaslServer(saslMessage.appId, secretKeyHolder);
+      channelAuthenticationMap.put(client, saslServer);
+    }
+
+    byte[] response = saslServer.response(saslMessage.payload);
+    if (saslServer.isComplete()) {
+      logger.debug("SASL authentication successful for channel {}", client);
+    }
+    callback.onSuccess(response);
+  }
+
+  @Override
+  public StreamManager getStreamManager() {
+    return delegate.getStreamManager();
+  }
+
+  @Override
+  public void connectionTerminated(TransportClient client) {
+    SparkSaslServer saslServer = channelAuthenticationMap.remove(client);
+    if (saslServer != null) {
+      saslServer.dispose();
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SecretKeyHolder.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SecretKeyHolder.java
new file mode 100644
index 0000000000000..81d5766794688
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SecretKeyHolder.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+/**
+ * Interface for getting a secret key associated with some application.
+ */
+public interface SecretKeyHolder {
+  /**
+   * Gets an appropriate SASL User for the given appId.
+   * @throws IllegalArgumentException if the given appId is not associated with a SASL user.
+   */
+  String getSaslUser(String appId);
+
+  /**
+   * Gets an appropriate SASL secret key for the given appId.
+   * @throws IllegalArgumentException if the given appId is not associated with a SASL secret key.
+   */
+  String getSecretKey(String appId);
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
new file mode 100644
index 0000000000000..351c7930a900f
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import java.lang.Override;
+import java.nio.ByteBuffer;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.sasl.SecretKeyHolder;
+import org.apache.spark.network.util.JavaUtils;
+
+/**
+ * A class that manages shuffle secret used by the external shuffle service.
+ */
+public class ShuffleSecretManager implements SecretKeyHolder {
+  private final Logger logger = LoggerFactory.getLogger(ShuffleSecretManager.class);
+  private final ConcurrentHashMap<String, String> shuffleSecretMap;
+
+  // Spark user used for authenticating SASL connections
+  // Note that this must match the value in org.apache.spark.SecurityManager
+  private static final String SPARK_SASL_USER = "sparkSaslUser";
+
+  public ShuffleSecretManager() {
+    shuffleSecretMap = new ConcurrentHashMap<String, String>();
+  }
+
+  /**
+   * Register an application with its secret.
+   * Executors need to first authenticate themselves with the same secret before
+   * fetching shuffle files written by other executors in this application.
+   */
+  public void registerApp(String appId, String shuffleSecret) {
+    if (!shuffleSecretMap.contains(appId)) {
+      shuffleSecretMap.put(appId, shuffleSecret);
+      logger.info("Registered shuffle secret for application {}", appId);
+    } else {
+      logger.debug("Application {} already registered", appId);
+    }
+  }
+
+  /**
+   * Register an application with its secret specified as a byte buffer.
+   */
+  public void registerApp(String appId, ByteBuffer shuffleSecret) {
+    registerApp(appId, JavaUtils.bytesToString(shuffleSecret));
+  }
+
+  /**
+   * Unregister an application along with its secret.
+   * This is called when the application terminates.
+   */
+  public void unregisterApp(String appId) {
+    if (shuffleSecretMap.contains(appId)) {
+      shuffleSecretMap.remove(appId);
+      logger.info("Unregistered shuffle secret for application {}", appId);
+    } else {
+      logger.warn("Attempted to unregister application {} when it is not registered", appId);
+    }
+  }
+
+  /**
+   * Return the Spark user for authenticating SASL connections.
+   */
+  @Override
+  public String getSaslUser(String appId) {
+    return SPARK_SASL_USER;
+  }
+
+  /**
+   * Return the secret key registered with the given application.
+   * This key is used to authenticate the executors before they can fetch shuffle files
+   * written by this application from the external shuffle service. If the specified
+   * application is not registered, return null.
+   */
+  @Override
+  public String getSecretKey(String appId) {
+    return shuffleSecretMap.get(appId);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java
new file mode 100644
index 0000000000000..9abad1f30a259
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+import javax.security.auth.callback.UnsupportedCallbackException;
+import javax.security.sasl.RealmCallback;
+import javax.security.sasl.RealmChoiceCallback;
+import javax.security.sasl.Sasl;
+import javax.security.sasl.SaslClient;
+import javax.security.sasl.SaslException;
+import java.io.IOException;
+
+import com.google.common.base.Throwables;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.spark.network.sasl.SparkSaslServer.*;
+
+/**
+ * A SASL Client for Spark which simply keeps track of the state of a single SASL session, from the
+ * initial state to the "authenticated" state. This client initializes the protocol via a
+ * firstToken, which is then followed by a set of challenges and responses.
+ */
+public class SparkSaslClient {
+  private final Logger logger = LoggerFactory.getLogger(SparkSaslClient.class);
+
+  private final String secretKeyId;
+  private final SecretKeyHolder secretKeyHolder;
+  private SaslClient saslClient;
+
+  public SparkSaslClient(String secretKeyId, SecretKeyHolder secretKeyHolder) {
+    this.secretKeyId = secretKeyId;
+    this.secretKeyHolder = secretKeyHolder;
+    try {
+      this.saslClient = Sasl.createSaslClient(new String[] { DIGEST }, null, null, DEFAULT_REALM,
+        SASL_PROPS, new ClientCallbackHandler());
+    } catch (SaslException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  /** Used to initiate SASL handshake with server. */
+  public synchronized byte[] firstToken() {
+    if (saslClient != null && saslClient.hasInitialResponse()) {
+      try {
+        return saslClient.evaluateChallenge(new byte[0]);
+      } catch (SaslException e) {
+        throw Throwables.propagate(e);
+      }
+    } else {
+      return new byte[0];
+    }
+  }
+
+  /** Determines whether the authentication exchange has completed. */
+  public synchronized boolean isComplete() {
+    return saslClient != null && saslClient.isComplete();
+  }
+
+  /**
+   * Respond to server's SASL token.
+   * @param token contains server's SASL token
+   * @return client's response SASL token
+   */
+  public synchronized byte[] response(byte[] token) {
+    try {
+      return saslClient != null ? saslClient.evaluateChallenge(token) : new byte[0];
+    } catch (SaslException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  /**
+   * Disposes of any system resources or security-sensitive information the
+   * SaslClient might be using.
+   */
+  public synchronized void dispose() {
+    if (saslClient != null) {
+      try {
+        saslClient.dispose();
+      } catch (SaslException e) {
+        // ignore
+      } finally {
+        saslClient = null;
+      }
+    }
+  }
+
+  /**
+   * Implementation of javax.security.auth.callback.CallbackHandler
+   * that works with share secrets.
+   */
+  private class ClientCallbackHandler implements CallbackHandler {
+    @Override
+    public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException {
+
+      for (Callback callback : callbacks) {
+        if (callback instanceof NameCallback) {
+          logger.trace("SASL client callback: setting username");
+          NameCallback nc = (NameCallback) callback;
+          nc.setName(encodeIdentifier(secretKeyHolder.getSaslUser(secretKeyId)));
+        } else if (callback instanceof PasswordCallback) {
+          logger.trace("SASL client callback: setting password");
+          PasswordCallback pc = (PasswordCallback) callback;
+          pc.setPassword(encodePassword(secretKeyHolder.getSecretKey(secretKeyId)));
+        } else if (callback instanceof RealmCallback) {
+          logger.trace("SASL client callback: setting realm");
+          RealmCallback rc = (RealmCallback) callback;
+          rc.setText(rc.getDefaultText());
+        } else if (callback instanceof RealmChoiceCallback) {
+          // ignore (?)
+        } else {
+          throw new UnsupportedCallbackException(callback, "Unrecognized SASL DIGEST-MD5 Callback");
+        }
+      }
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
new file mode 100644
index 0000000000000..e87b17ead1e1a
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+import javax.security.auth.callback.UnsupportedCallbackException;
+import javax.security.sasl.AuthorizeCallback;
+import javax.security.sasl.RealmCallback;
+import javax.security.sasl.Sasl;
+import javax.security.sasl.SaslException;
+import javax.security.sasl.SaslServer;
+import java.io.IOException;
+import java.util.Map;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.ImmutableMap;
+import io.netty.buffer.Unpooled;
+import io.netty.handler.codec.base64.Base64;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A SASL Server for Spark which simply keeps track of the state of a single SASL session, from the
+ * initial state to the "authenticated" state. (It is not a server in the sense of accepting
+ * connections on some socket.)
+ */
+public class SparkSaslServer {
+  private final Logger logger = LoggerFactory.getLogger(SparkSaslServer.class);
+
+  /**
+   * This is passed as the server name when creating the sasl client/server.
+   * This could be changed to be configurable in the future.
+   */
+  static final String DEFAULT_REALM = "default";
+
+  /**
+   * The authentication mechanism used here is DIGEST-MD5. This could be changed to be
+   * configurable in the future.
+   */
+  static final String DIGEST = "DIGEST-MD5";
+
+  /**
+   * The quality of protection is just "auth". This means that we are doing
+   * authentication only, we are not supporting integrity or privacy protection of the
+   * communication channel after authentication. This could be changed to be configurable
+   * in the future.
+   */
+  static final Map<String, String> SASL_PROPS = ImmutableMap.<String, String>builder()
+    .put(Sasl.QOP, "auth")
+    .put(Sasl.SERVER_AUTH, "true")
+    .build();
+
+  /** Identifier for a certain secret key within the secretKeyHolder. */
+  private final String secretKeyId;
+  private final SecretKeyHolder secretKeyHolder;
+  private SaslServer saslServer;
+
+  public SparkSaslServer(String secretKeyId, SecretKeyHolder secretKeyHolder) {
+    this.secretKeyId = secretKeyId;
+    this.secretKeyHolder = secretKeyHolder;
+    try {
+      this.saslServer = Sasl.createSaslServer(DIGEST, null, DEFAULT_REALM, SASL_PROPS,
+        new DigestCallbackHandler());
+    } catch (SaslException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  /**
+   * Determines whether the authentication exchange has completed successfully.
+   */
+  public synchronized boolean isComplete() {
+    return saslServer != null && saslServer.isComplete();
+  }
+
+  /**
+   * Used to respond to server SASL tokens.
+   * @param token Server's SASL token
+   * @return response to send back to the server.
+   */
+  public synchronized byte[] response(byte[] token) {
+    try {
+      return saslServer != null ? saslServer.evaluateResponse(token) : new byte[0];
+    } catch (SaslException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  /**
+   * Disposes of any system resources or security-sensitive information the
+   * SaslServer might be using.
+   */
+  public synchronized void dispose() {
+    if (saslServer != null) {
+      try {
+        saslServer.dispose();
+      } catch (SaslException e) {
+        // ignore
+      } finally {
+        saslServer = null;
+      }
+    }
+  }
+
+  /**
+   * Implementation of javax.security.auth.callback.CallbackHandler for SASL DIGEST-MD5 mechanism.
+   */
+  private class DigestCallbackHandler implements CallbackHandler {
+    @Override
+    public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException {
+      for (Callback callback : callbacks) {
+        if (callback instanceof NameCallback) {
+          logger.trace("SASL server callback: setting username");
+          NameCallback nc = (NameCallback) callback;
+          nc.setName(encodeIdentifier(secretKeyHolder.getSaslUser(secretKeyId)));
+        } else if (callback instanceof PasswordCallback) {
+          logger.trace("SASL server callback: setting password");
+          PasswordCallback pc = (PasswordCallback) callback;
+          pc.setPassword(encodePassword(secretKeyHolder.getSecretKey(secretKeyId)));
+        } else if (callback instanceof RealmCallback) {
+          logger.trace("SASL server callback: setting realm");
+          RealmCallback rc = (RealmCallback) callback;
+          rc.setText(rc.getDefaultText());
+        } else if (callback instanceof AuthorizeCallback) {
+          AuthorizeCallback ac = (AuthorizeCallback) callback;
+          String authId = ac.getAuthenticationID();
+          String authzId = ac.getAuthorizationID();
+          ac.setAuthorized(authId.equals(authzId));
+          if (ac.isAuthorized()) {
+            ac.setAuthorizedID(authzId);
+          }
+          logger.debug("SASL Authorization complete, authorized set to {}", ac.isAuthorized());
+        } else {
+          throw new UnsupportedCallbackException(callback, "Unrecognized SASL DIGEST-MD5 Callback");
+        }
+      }
+    }
+  }
+
+  /* Encode a byte[] identifier as a Base64-encoded string. */
+  public static String encodeIdentifier(String identifier) {
+    Preconditions.checkNotNull(identifier, "User cannot be null if SASL is enabled");
+    return Base64.encode(Unpooled.wrappedBuffer(identifier.getBytes(Charsets.UTF_8)))
+      .toString(Charsets.UTF_8);
+  }
+
+  /** Encode a password as a base64-encoded char[] array. */
+  public static char[] encodePassword(String password) {
+    Preconditions.checkNotNull(password, "Password cannot be null if SASL is enabled");
+    return Base64.encode(Unpooled.wrappedBuffer(password.getBytes(Charsets.UTF_8)))
+      .toString(Charsets.UTF_8).toCharArray();
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/BlockFetchingListener.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/BlockFetchingListener.java
new file mode 100644
index 0000000000000..138fd5389c20a
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/BlockFetchingListener.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.util.EventListener;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+
+public interface BlockFetchingListener extends EventListener {
+  /**
+   * Called once per successfully fetched block. After this call returns, data will be released
+   * automatically. If the data will be passed to another thread, the receiver should retain()
+   * and release() the buffer on their own, or copy the data to a new buffer.
+   */
+  void onBlockFetchSuccess(String blockId, ManagedBuffer data);
+
+  /**
+   * Called at least once per block upon failures.
+   */
+  void onBlockFetchFailure(String blockId, Throwable exception);
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
new file mode 100644
index 0000000000000..46ca9708621b9
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.util.List;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+import org.apache.spark.network.util.TransportConf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.server.OneForOneStreamManager;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.StreamManager;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
+import org.apache.spark.network.shuffle.protocol.OpenBlocks;
+import org.apache.spark.network.shuffle.protocol.RegisterExecutor;
+import org.apache.spark.network.shuffle.protocol.StreamHandle;
+
+/**
+ * RPC Handler for a server which can serve shuffle blocks from outside of an Executor process.
+ *
+ * Handles registering executors and opening shuffle blocks from them. Shuffle blocks are registered
+ * with the "one-for-one" strategy, meaning each Transport-layer Chunk is equivalent to one Spark-
+ * level shuffle block.
+ */
+public class ExternalShuffleBlockHandler extends RpcHandler {
+  private final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockHandler.class);
+
+  private final ExternalShuffleBlockManager blockManager;
+  private final OneForOneStreamManager streamManager;
+
+  public ExternalShuffleBlockHandler(TransportConf conf) {
+    this(new OneForOneStreamManager(), new ExternalShuffleBlockManager(conf));
+  }
+
+  /** Enables mocking out the StreamManager and BlockManager. */
+  @VisibleForTesting
+  ExternalShuffleBlockHandler(
+      OneForOneStreamManager streamManager,
+      ExternalShuffleBlockManager blockManager) {
+    this.streamManager = streamManager;
+    this.blockManager = blockManager;
+  }
+
+  @Override
+  public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+    BlockTransferMessage msgObj = BlockTransferMessage.Decoder.fromByteArray(message);
+
+    if (msgObj instanceof OpenBlocks) {
+      OpenBlocks msg = (OpenBlocks) msgObj;
+      List<ManagedBuffer> blocks = Lists.newArrayList();
+
+      for (String blockId : msg.blockIds) {
+        blocks.add(blockManager.getBlockData(msg.appId, msg.execId, blockId));
+      }
+      long streamId = streamManager.registerStream(blocks.iterator());
+      logger.trace("Registered streamId {} with {} buffers", streamId, msg.blockIds.length);
+      callback.onSuccess(new StreamHandle(streamId, msg.blockIds.length).toByteArray());
+
+    } else if (msgObj instanceof RegisterExecutor) {
+      RegisterExecutor msg = (RegisterExecutor) msgObj;
+      blockManager.registerExecutor(msg.appId, msg.execId, msg.executorInfo);
+      callback.onSuccess(new byte[0]);
+
+    } else {
+      throw new UnsupportedOperationException("Unexpected message: " + msgObj);
+    }
+  }
+
+  @Override
+  public StreamManager getStreamManager() {
+    return streamManager;
+  }
+
+  /**
+   * Removes an application (once it has been terminated), and optionally will clean up any
+   * local directories associated with the executors of that application in a separate thread.
+   */
+  public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
+    blockManager.applicationRemoved(appId, cleanupLocalDirs);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java
new file mode 100644
index 0000000000000..dfe0ba0595090
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Objects;
+import com.google.common.collect.Maps;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.util.JavaUtils;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Manages converting shuffle BlockIds into physical segments of local files, from a process outside
+ * of Executors. Each Executor must register its own configuration about where it stores its files
+ * (local dirs) and how (shuffle manager). The logic for retrieval of individual files is replicated
+ * from Spark's FileShuffleBlockManager and IndexShuffleBlockManager.
+ *
+ * Executors with shuffle file consolidation are not currently supported, as the index is stored in
+ * the Executor's memory, unlike the IndexShuffleBlockManager.
+ */
+public class ExternalShuffleBlockManager {
+  private final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockManager.class);
+
+  // Map containing all registered executors' metadata.
+  private final ConcurrentMap<AppExecId, ExecutorShuffleInfo> executors;
+
+  // Single-threaded Java executor used to perform expensive recursive directory deletion.
+  private final Executor directoryCleaner;
+
+  private final TransportConf conf;
+
+  public ExternalShuffleBlockManager(TransportConf conf) {
+    // TODO: Give this thread a name.
+    this(conf, Executors.newSingleThreadExecutor());
+  }
+
+  // Allows tests to have more control over when directories are cleaned up.
+  @VisibleForTesting
+  ExternalShuffleBlockManager(TransportConf conf, Executor directoryCleaner) {
+    this.conf = conf;
+    this.executors = Maps.newConcurrentMap();
+    this.directoryCleaner = directoryCleaner;
+  }
+
+  /** Registers a new Executor with all the configuration we need to find its shuffle files. */
+  public void registerExecutor(
+      String appId,
+      String execId,
+      ExecutorShuffleInfo executorInfo) {
+    AppExecId fullId = new AppExecId(appId, execId);
+    logger.info("Registered executor {} with {}", fullId, executorInfo);
+    executors.put(fullId, executorInfo);
+  }
+
+  /**
+   * Obtains a FileSegmentManagedBuffer from a shuffle block id. We expect the blockId has the
+   * format "shuffle_ShuffleId_MapId_ReduceId" (from ShuffleBlockId), and additionally make
+   * assumptions about how the hash and sort based shuffles store their data.
+   */
+  public ManagedBuffer getBlockData(String appId, String execId, String blockId) {
+    String[] blockIdParts = blockId.split("_");
+    if (blockIdParts.length < 4) {
+      throw new IllegalArgumentException("Unexpected block id format: " + blockId);
+    } else if (!blockIdParts[0].equals("shuffle")) {
+      throw new IllegalArgumentException("Expected shuffle block id, got: " + blockId);
+    }
+    int shuffleId = Integer.parseInt(blockIdParts[1]);
+    int mapId = Integer.parseInt(blockIdParts[2]);
+    int reduceId = Integer.parseInt(blockIdParts[3]);
+
+    ExecutorShuffleInfo executor = executors.get(new AppExecId(appId, execId));
+    if (executor == null) {
+      throw new RuntimeException(
+        String.format("Executor is not registered (appId=%s, execId=%s)", appId, execId));
+    }
+
+    if ("org.apache.spark.shuffle.hash.HashShuffleManager".equals(executor.shuffleManager)) {
+      return getHashBasedShuffleBlockData(executor, blockId);
+    } else if ("org.apache.spark.shuffle.sort.SortShuffleManager".equals(executor.shuffleManager)) {
+      return getSortBasedShuffleBlockData(executor, shuffleId, mapId, reduceId);
+    } else {
+      throw new UnsupportedOperationException(
+        "Unsupported shuffle manager: " + executor.shuffleManager);
+    }
+  }
+
+  /**
+   * Removes our metadata of all executors registered for the given application, and optionally
+   * also deletes the local directories associated with the executors of that application in a
+   * separate thread.
+   *
+   * It is not valid to call registerExecutor() for an executor with this appId after invoking
+   * this method.
+   */
+  public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
+    logger.info("Application {} removed, cleanupLocalDirs = {}", appId, cleanupLocalDirs);
+    Iterator<Map.Entry<AppExecId, ExecutorShuffleInfo>> it = executors.entrySet().iterator();
+    while (it.hasNext()) {
+      Map.Entry<AppExecId, ExecutorShuffleInfo> entry = it.next();
+      AppExecId fullId = entry.getKey();
+      final ExecutorShuffleInfo executor = entry.getValue();
+
+      // Only touch executors associated with the appId that was removed.
+      if (appId.equals(fullId.appId)) {
+        it.remove();
+
+        if (cleanupLocalDirs) {
+          logger.info("Cleaning up executor {}'s {} local dirs", fullId, executor.localDirs.length);
+
+          // Execute the actual deletion in a different thread, as it may take some time.
+          directoryCleaner.execute(new Runnable() {
+            @Override
+            public void run() {
+              deleteExecutorDirs(executor.localDirs);
+            }
+          });
+        }
+      }
+    }
+  }
+
+  /**
+   * Synchronously deletes each directory one at a time.
+   * Should be executed in its own thread, as this may take a long time.
+   */
+  private void deleteExecutorDirs(String[] dirs) {
+    for (String localDir : dirs) {
+      try {
+        JavaUtils.deleteRecursively(new File(localDir));
+        logger.debug("Successfully cleaned up directory: " + localDir);
+      } catch (Exception e) {
+        logger.error("Failed to delete directory: " + localDir, e);
+      }
+    }
+  }
+
+  /**
+   * Hash-based shuffle data is simply stored as one file per block.
+   * This logic is from FileShuffleBlockManager.
+   */
+  // TODO: Support consolidated hash shuffle files
+  private ManagedBuffer getHashBasedShuffleBlockData(ExecutorShuffleInfo executor, String blockId) {
+    File shuffleFile = getFile(executor.localDirs, executor.subDirsPerLocalDir, blockId);
+    return new FileSegmentManagedBuffer(conf, shuffleFile, 0, shuffleFile.length());
+  }
+
+  /**
+   * Sort-based shuffle data uses an index called "shuffle_ShuffleId_MapId_0.index" into a data file
+   * called "shuffle_ShuffleId_MapId_0.data". This logic is from IndexShuffleBlockManager,
+   * and the block id format is from ShuffleDataBlockId and ShuffleIndexBlockId.
+   */
+  private ManagedBuffer getSortBasedShuffleBlockData(
+    ExecutorShuffleInfo executor, int shuffleId, int mapId, int reduceId) {
+    File indexFile = getFile(executor.localDirs, executor.subDirsPerLocalDir,
+      "shuffle_" + shuffleId + "_" + mapId + "_0.index");
+
+    DataInputStream in = null;
+    try {
+      in = new DataInputStream(new FileInputStream(indexFile));
+      in.skipBytes(reduceId * 8);
+      long offset = in.readLong();
+      long nextOffset = in.readLong();
+      return new FileSegmentManagedBuffer(
+        conf,
+        getFile(executor.localDirs, executor.subDirsPerLocalDir,
+          "shuffle_" + shuffleId + "_" + mapId + "_0.data"),
+        offset,
+        nextOffset - offset);
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to open file: " + indexFile, e);
+    } finally {
+      if (in != null) {
+        JavaUtils.closeQuietly(in);
+      }
+    }
+  }
+
+  /**
+   * Hashes a filename into the corresponding local directory, in a manner consistent with
+   * Spark's DiskBlockManager.getFile().
+   */
+  @VisibleForTesting
+  static File getFile(String[] localDirs, int subDirsPerLocalDir, String filename) {
+    int hash = JavaUtils.nonNegativeHash(filename);
+    String localDir = localDirs[hash % localDirs.length];
+    int subDirId = (hash / localDirs.length) % subDirsPerLocalDir;
+    return new File(new File(localDir, String.format("%02x", subDirId)), filename);
+  }
+
+  /** Simply encodes an executor's full ID, which is appId + execId. */
+  private static class AppExecId {
+    final String appId;
+    final String execId;
+
+    private AppExecId(String appId, String execId) {
+      this.appId = appId;
+      this.execId = execId;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+
+      AppExecId appExecId = (AppExecId) o;
+      return Objects.equal(appId, appExecId.appId) && Objects.equal(execId, appExecId.execId);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(appId, execId);
+    }
+
+    @Override
+    public String toString() {
+      return Objects.toStringHelper(this)
+        .add("appId", appId)
+        .add("execId", execId)
+        .toString();
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
new file mode 100644
index 0000000000000..6e8018b723dc6
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+import java.util.List;
+
+import com.google.common.collect.Lists;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientBootstrap;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.sasl.SaslClientBootstrap;
+import org.apache.spark.network.sasl.SecretKeyHolder;
+import org.apache.spark.network.server.NoOpRpcHandler;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.shuffle.protocol.RegisterExecutor;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Client for reading shuffle blocks which points to an external (outside of executor) server.
+ * This is instead of reading shuffle blocks directly from other executors (via
+ * BlockTransferService), which has the downside of losing the shuffle data if we lose the
+ * executors.
+ */
+public class ExternalShuffleClient extends ShuffleClient {
+  private final Logger logger = LoggerFactory.getLogger(ExternalShuffleClient.class);
+
+  private final TransportConf conf;
+  private final boolean saslEnabled;
+  private final SecretKeyHolder secretKeyHolder;
+
+  private TransportClientFactory clientFactory;
+  private String appId;
+
+  /**
+   * Creates an external shuffle client, with SASL optionally enabled. If SASL is not enabled,
+   * then secretKeyHolder may be null.
+   */
+  public ExternalShuffleClient(
+      TransportConf conf,
+      SecretKeyHolder secretKeyHolder,
+      boolean saslEnabled) {
+    this.conf = conf;
+    this.secretKeyHolder = secretKeyHolder;
+    this.saslEnabled = saslEnabled;
+  }
+
+  @Override
+  public void init(String appId) {
+    this.appId = appId;
+    TransportContext context = new TransportContext(conf, new NoOpRpcHandler());
+    List<TransportClientBootstrap> bootstraps = Lists.newArrayList();
+    if (saslEnabled) {
+      bootstraps.add(new SaslClientBootstrap(conf, appId, secretKeyHolder));
+    }
+    clientFactory = context.createClientFactory(bootstraps);
+  }
+
+  @Override
+  public void fetchBlocks(
+      final String host,
+      final int port,
+      final String execId,
+      String[] blockIds,
+      BlockFetchingListener listener) {
+    assert appId != null : "Called before init()";
+    logger.debug("External shuffle fetch from {}:{} (executor id {})", host, port, execId);
+    try {
+      RetryingBlockFetcher.BlockFetchStarter blockFetchStarter =
+        new RetryingBlockFetcher.BlockFetchStarter() {
+          @Override
+          public void createAndStart(String[] blockIds, BlockFetchingListener listener)
+              throws IOException {
+            TransportClient client = clientFactory.createClient(host, port);
+            new OneForOneBlockFetcher(client, appId, execId, blockIds, listener).start();
+          }
+        };
+
+      int maxRetries = conf.maxIORetries();
+      if (maxRetries > 0) {
+        // Note this Fetcher will correctly handle maxRetries == 0; we avoid it just in case there's
+        // a bug in this code. We should remove the if statement once we're sure of the stability.
+        new RetryingBlockFetcher(conf, blockFetchStarter, blockIds, listener).start();
+      } else {
+        blockFetchStarter.createAndStart(blockIds, listener);
+      }
+    } catch (Exception e) {
+      logger.error("Exception while beginning fetchBlocks", e);
+      for (String blockId : blockIds) {
+        listener.onBlockFetchFailure(blockId, e);
+      }
+    }
+  }
+
+  /**
+   * Registers this executor with an external shuffle server. This registration is required to
+   * inform the shuffle server about where and how we store our shuffle files.
+   *
+   * @param host Host of shuffle server.
+   * @param port Port of shuffle server.
+   * @param execId This Executor's id.
+   * @param executorInfo Contains all info necessary for the service to find our shuffle files.
+   */
+  public void registerWithShuffleServer(
+      String host,
+      int port,
+      String execId,
+      ExecutorShuffleInfo executorInfo) throws IOException {
+    assert appId != null : "Called before init()";
+    TransportClient client = clientFactory.createClient(host, port);
+    byte[] registerMessage = new RegisterExecutor(appId, execId, executorInfo).toByteArray();
+    client.sendRpcSync(registerMessage, 5000 /* timeoutMs */);
+  }
+
+  @Override
+  public void close() {
+    clientFactory.close();
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
new file mode 100644
index 0000000000000..8ed2e0b39ad23
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.util.Arrays;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.ChunkReceivedCallback;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
+import org.apache.spark.network.shuffle.protocol.OpenBlocks;
+import org.apache.spark.network.shuffle.protocol.StreamHandle;
+import org.apache.spark.network.util.JavaUtils;
+
+/**
+ * Simple wrapper on top of a TransportClient which interprets each chunk as a whole block, and
+ * invokes the BlockFetchingListener appropriately. This class is agnostic to the actual RPC
+ * handler, as long as there is a single "open blocks" message which returns a ShuffleStreamHandle,
+ * and Java serialization is used.
+ *
+ * Note that this typically corresponds to a
+ * {@link org.apache.spark.network.server.OneForOneStreamManager} on the server side.
+ */
+public class OneForOneBlockFetcher {
+  private final Logger logger = LoggerFactory.getLogger(OneForOneBlockFetcher.class);
+
+  private final TransportClient client;
+  private final OpenBlocks openMessage;
+  private final String[] blockIds;
+  private final BlockFetchingListener listener;
+  private final ChunkReceivedCallback chunkCallback;
+
+  private StreamHandle streamHandle = null;
+
+  public OneForOneBlockFetcher(
+      TransportClient client,
+      String appId,
+      String execId,
+      String[] blockIds,
+      BlockFetchingListener listener) {
+    this.client = client;
+    this.openMessage = new OpenBlocks(appId, execId, blockIds);
+    this.blockIds = blockIds;
+    this.listener = listener;
+    this.chunkCallback = new ChunkCallback();
+  }
+
+  /** Callback invoked on receipt of each chunk. We equate a single chunk to a single block. */
+  private class ChunkCallback implements ChunkReceivedCallback {
+    @Override
+    public void onSuccess(int chunkIndex, ManagedBuffer buffer) {
+      // On receipt of a chunk, pass it upwards as a block.
+      listener.onBlockFetchSuccess(blockIds[chunkIndex], buffer);
+    }
+
+    @Override
+    public void onFailure(int chunkIndex, Throwable e) {
+      // On receipt of a failure, fail every block from chunkIndex onwards.
+      String[] remainingBlockIds = Arrays.copyOfRange(blockIds, chunkIndex, blockIds.length);
+      failRemainingBlocks(remainingBlockIds, e);
+    }
+  }
+
+  /**
+   * Begins the fetching process, calling the listener with every block fetched.
+   * The given message will be serialized with the Java serializer, and the RPC must return a
+   * {@link StreamHandle}. We will send all fetch requests immediately, without throttling.
+   */
+  public void start() {
+    if (blockIds.length == 0) {
+      throw new IllegalArgumentException("Zero-sized blockIds array");
+    }
+
+    client.sendRpc(openMessage.toByteArray(), new RpcResponseCallback() {
+      @Override
+      public void onSuccess(byte[] response) {
+        try {
+          streamHandle = (StreamHandle) BlockTransferMessage.Decoder.fromByteArray(response);
+          logger.trace("Successfully opened blocks {}, preparing to fetch chunks.", streamHandle);
+
+          // Immediately request all chunks -- we expect that the total size of the request is
+          // reasonable due to higher level chunking in [[ShuffleBlockFetcherIterator]].
+          for (int i = 0; i < streamHandle.numChunks; i++) {
+            client.fetchChunk(streamHandle.streamId, i, chunkCallback);
+          }
+        } catch (Exception e) {
+          logger.error("Failed while starting block fetches after success", e);
+          failRemainingBlocks(blockIds, e);
+        }
+      }
+
+      @Override
+      public void onFailure(Throwable e) {
+        logger.error("Failed while starting block fetches", e);
+        failRemainingBlocks(blockIds, e);
+      }
+    });
+  }
+
+  /** Invokes the "onBlockFetchFailure" callback for every listed block id. */
+  private void failRemainingBlocks(String[] failedBlockIds, Throwable e) {
+    for (String blockId : failedBlockIds) {
+      try {
+        listener.onBlockFetchFailure(blockId, e);
+      } catch (Exception e2) {
+        logger.error("Error in block fetch failure callback", e2);
+      }
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
new file mode 100644
index 0000000000000..f8a1a266863bb
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.collect.Sets;
+import com.google.common.util.concurrent.Uninterruptibles;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.util.NettyUtils;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Wraps another BlockFetcher with the ability to automatically retry fetches which fail due to
+ * IOExceptions, which we hope are due to transient network conditions.
+ *
+ * This fetcher provides stronger guarantees regarding the parent BlockFetchingListener. In
+ * particular, the listener will be invoked exactly once per blockId, with a success or failure.
+ */
+public class RetryingBlockFetcher {
+
+  /**
+   * Used to initiate the first fetch for all blocks, and subsequently for retrying the fetch on any
+   * remaining blocks.
+   */
+  public static interface BlockFetchStarter {
+    /**
+     * Creates a new BlockFetcher to fetch the given block ids which may do some synchronous
+     * bootstrapping followed by fully asynchronous block fetching.
+     * The BlockFetcher must eventually invoke the Listener on every input blockId, or else this
+     * method must throw an exception.
+     *
+     * This method should always attempt to get a new TransportClient from the
+     * {@link org.apache.spark.network.client.TransportClientFactory} in order to fix connection
+     * issues.
+     */
+    void createAndStart(String[] blockIds, BlockFetchingListener listener) throws IOException;
+  }
+
+  /** Shared executor service used for waiting and retrying. */
+  private static final ExecutorService executorService = Executors.newCachedThreadPool(
+    NettyUtils.createThreadFactory("Block Fetch Retry"));
+
+  private final Logger logger = LoggerFactory.getLogger(RetryingBlockFetcher.class);
+
+  /** Used to initiate new Block Fetches on our remaining blocks. */
+  private final BlockFetchStarter fetchStarter;
+
+  /** Parent listener which we delegate all successful or permanently failed block fetches to. */
+  private final BlockFetchingListener listener;
+
+  /** Max number of times we are allowed to retry. */
+  private final int maxRetries;
+
+  /** Milliseconds to wait before each retry. */
+  private final int retryWaitTime;
+
+  // NOTE:
+  // All of our non-final fields are synchronized under 'this' and should only be accessed/mutated
+  // while inside a synchronized block.
+  /** Number of times we've attempted to retry so far. */
+  private int retryCount = 0;
+
+  /**
+   * Set of all block ids which have not been fetched successfully or with a non-IO Exception.
+   * A retry involves requesting every outstanding block. Note that since this is a LinkedHashSet,
+   * input ordering is preserved, so we always request blocks in the same order the user provided.
+   */
+  private final LinkedHashSet<String> outstandingBlocksIds;
+
+  /**
+   * The BlockFetchingListener that is active with our current BlockFetcher.
+   * When we start a retry, we immediately replace this with a new Listener, which causes all any
+   * old Listeners to ignore all further responses.
+   */
+  private RetryingBlockFetchListener currentListener;
+
+  public RetryingBlockFetcher(
+      TransportConf conf,
+      BlockFetchStarter fetchStarter,
+      String[] blockIds,
+      BlockFetchingListener listener) {
+    this.fetchStarter = fetchStarter;
+    this.listener = listener;
+    this.maxRetries = conf.maxIORetries();
+    this.retryWaitTime = conf.ioRetryWaitTime();
+    this.outstandingBlocksIds = Sets.newLinkedHashSet();
+    Collections.addAll(outstandingBlocksIds, blockIds);
+    this.currentListener = new RetryingBlockFetchListener();
+  }
+
+  /**
+   * Initiates the fetch of all blocks provided in the constructor, with possible retries in the
+   * event of transient IOExceptions.
+   */
+  public void start() {
+    fetchAllOutstanding();
+  }
+
+  /**
+   * Fires off a request to fetch all blocks that have not been fetched successfully or permanently
+   * failed (i.e., by a non-IOException).
+   */
+  private void fetchAllOutstanding() {
+    // Start by retrieving our shared state within a synchronized block.
+    String[] blockIdsToFetch;
+    int numRetries;
+    RetryingBlockFetchListener myListener;
+    synchronized (this) {
+      blockIdsToFetch = outstandingBlocksIds.toArray(new String[outstandingBlocksIds.size()]);
+      numRetries = retryCount;
+      myListener = currentListener;
+    }
+
+    // Now initiate the fetch on all outstanding blocks, possibly initiating a retry if that fails.
+    try {
+      fetchStarter.createAndStart(blockIdsToFetch, myListener);
+    } catch (Exception e) {
+      logger.error(String.format("Exception while beginning fetch of %s outstanding blocks %s",
+        blockIdsToFetch.length, numRetries > 0 ? "(after " + numRetries + " retries)" : ""), e);
+
+      if (shouldRetry(e)) {
+        initiateRetry();
+      } else {
+        for (String bid : blockIdsToFetch) {
+          listener.onBlockFetchFailure(bid, e);
+        }
+      }
+    }
+  }
+
+  /**
+   * Lightweight method which initiates a retry in a different thread. The retry will involve
+   * calling fetchAllOutstanding() after a configured wait time.
+   */
+  private synchronized void initiateRetry() {
+    retryCount += 1;
+    currentListener = new RetryingBlockFetchListener();
+
+    logger.info("Retrying fetch ({}/{}) for {} outstanding blocks after {} ms",
+      retryCount, maxRetries, outstandingBlocksIds.size(), retryWaitTime);
+
+    executorService.submit(new Runnable() {
+      @Override
+      public void run() {
+        Uninterruptibles.sleepUninterruptibly(retryWaitTime, TimeUnit.MILLISECONDS);
+        fetchAllOutstanding();
+      }
+    });
+  }
+
+  /**
+   * Returns true if we should retry due a block fetch failure. We will retry if and only if
+   * the exception was an IOException and we haven't retried 'maxRetries' times already.
+   */
+  private synchronized boolean shouldRetry(Throwable e) {
+    boolean isIOException = e instanceof IOException
+      || (e.getCause() != null && e.getCause() instanceof IOException);
+    boolean hasRemainingRetries = retryCount < maxRetries;
+    return isIOException && hasRemainingRetries;
+  }
+
+  /**
+   * Our RetryListener intercepts block fetch responses and forwards them to our parent listener.
+   * Note that in the event of a retry, we will immediately replace the 'currentListener' field,
+   * indicating that any responses from non-current Listeners should be ignored.
+   */
+  private class RetryingBlockFetchListener implements BlockFetchingListener {
+    @Override
+    public void onBlockFetchSuccess(String blockId, ManagedBuffer data) {
+      // We will only forward this success message to our parent listener if this block request is
+      // outstanding and we are still the active listener.
+      boolean shouldForwardSuccess = false;
+      synchronized (RetryingBlockFetcher.this) {
+        if (this == currentListener && outstandingBlocksIds.contains(blockId)) {
+          outstandingBlocksIds.remove(blockId);
+          shouldForwardSuccess = true;
+        }
+      }
+
+      // Now actually invoke the parent listener, outside of the synchronized block.
+      if (shouldForwardSuccess) {
+        listener.onBlockFetchSuccess(blockId, data);
+      }
+    }
+
+    @Override
+    public void onBlockFetchFailure(String blockId, Throwable exception) {
+      // We will only forward this failure to our parent listener if this block request is
+      // outstanding, we are still the active listener, AND we cannot retry the fetch.
+      boolean shouldForwardFailure = false;
+      synchronized (RetryingBlockFetcher.this) {
+        if (this == currentListener && outstandingBlocksIds.contains(blockId)) {
+          if (shouldRetry(exception)) {
+            initiateRetry();
+          } else {
+            logger.error(String.format("Failed to fetch block %s, and will not retry (%s retries)",
+              blockId, retryCount), exception);
+            outstandingBlocksIds.remove(blockId);
+            shouldForwardFailure = true;
+          }
+        }
+      }
+
+      // Now actually invoke the parent listener, outside of the synchronized block.
+      if (shouldForwardFailure) {
+        listener.onBlockFetchFailure(blockId, exception);
+      }
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
new file mode 100644
index 0000000000000..f72ab40690d0d
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.Closeable;
+
+/** Provides an interface for reading shuffle files, either from an Executor or external service. */
+public abstract class ShuffleClient implements Closeable {
+
+  /**
+   * Initializes the ShuffleClient, specifying this Executor's appId.
+   * Must be called before any other method on the ShuffleClient.
+   */
+  public void init(String appId) { }
+
+  /**
+   * Fetch a sequence of blocks from a remote node asynchronously,
+   *
+   * Note that this API takes a sequence so the implementation can batch requests, and does not
+   * return a future so the underlying implementation can invoke onBlockFetchSuccess as soon as
+   * the data of a block is fetched, rather than waiting for all blocks to be fetched.
+   */
+  public abstract void fetchBlocks(
+      String host,
+      int port,
+      String execId,
+      String[] blockIds,
+      BlockFetchingListener listener);
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
new file mode 100644
index 0000000000000..b4b13b8a6ef5d
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+
+import org.apache.spark.network.protocol.Encodable;
+
+/**
+ * Messages handled by the {@link org.apache.spark.network.shuffle.ExternalShuffleBlockHandler}, or
+ * by Spark's NettyBlockTransferService.
+ *
+ * At a high level:
+ *   - OpenBlock is handled by both services, but only services shuffle files for the external
+ *     shuffle service. It returns a StreamHandle.
+ *   - UploadBlock is only handled by the NettyBlockTransferService.
+ *   - RegisterExecutor is only handled by the external shuffle service.
+ */
+public abstract class BlockTransferMessage implements Encodable {
+  protected abstract Type type();
+
+  /** Preceding every serialized message is its type, which allows us to deserialize it. */
+  public static enum Type {
+    OPEN_BLOCKS(0), UPLOAD_BLOCK(1), REGISTER_EXECUTOR(2), STREAM_HANDLE(3);
+
+    private final byte id;
+
+    private Type(int id) {
+      assert id < 128 : "Cannot have more than 128 message types";
+      this.id = (byte) id;
+    }
+
+    public byte id() { return id; }
+  }
+
+  // NB: Java does not support static methods in interfaces, so we must put this in a static class.
+  public static class Decoder {
+    /** Deserializes the 'type' byte followed by the message itself. */
+    public static BlockTransferMessage fromByteArray(byte[] msg) {
+      ByteBuf buf = Unpooled.wrappedBuffer(msg);
+      byte type = buf.readByte();
+      switch (type) {
+        case 0: return OpenBlocks.decode(buf);
+        case 1: return UploadBlock.decode(buf);
+        case 2: return RegisterExecutor.decode(buf);
+        case 3: return StreamHandle.decode(buf);
+        default: throw new IllegalArgumentException("Unknown message type: " + type);
+      }
+    }
+  }
+
+  /** Serializes the 'type' byte followed by the message itself. */
+  public byte[] toByteArray() {
+    ByteBuf buf = Unpooled.buffer(encodedLength());
+    buf.writeByte(type().id);
+    encode(buf);
+    assert buf.writableBytes() == 0 : "Writable bytes remain: " + buf.writableBytes();
+    return buf.array();
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
new file mode 100644
index 0000000000000..cadc8e8369c6a
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import java.util.Arrays;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encodable;
+import org.apache.spark.network.protocol.Encoders;
+
+/** Contains all configuration necessary for locating the shuffle files of an executor. */
+public class ExecutorShuffleInfo implements Encodable {
+  /** The base set of local directories that the executor stores its shuffle files in. */
+  public final String[] localDirs;
+  /** Number of subdirectories created within each localDir. */
+  public final int subDirsPerLocalDir;
+  /** Shuffle manager (SortShuffleManager or HashShuffleManager) that the executor is using. */
+  public final String shuffleManager;
+
+  public ExecutorShuffleInfo(String[] localDirs, int subDirsPerLocalDir, String shuffleManager) {
+    this.localDirs = localDirs;
+    this.subDirsPerLocalDir = subDirsPerLocalDir;
+    this.shuffleManager = shuffleManager;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(subDirsPerLocalDir, shuffleManager) * 41 + Arrays.hashCode(localDirs);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("localDirs", Arrays.toString(localDirs))
+      .add("subDirsPerLocalDir", subDirsPerLocalDir)
+      .add("shuffleManager", shuffleManager)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof ExecutorShuffleInfo) {
+      ExecutorShuffleInfo o = (ExecutorShuffleInfo) other;
+      return Arrays.equals(localDirs, o.localDirs)
+        && Objects.equal(subDirsPerLocalDir, o.subDirsPerLocalDir)
+        && Objects.equal(shuffleManager, o.shuffleManager);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.StringArrays.encodedLength(localDirs)
+        + 4 // int
+        + Encoders.Strings.encodedLength(shuffleManager);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.StringArrays.encode(buf, localDirs);
+    buf.writeInt(subDirsPerLocalDir);
+    Encoders.Strings.encode(buf, shuffleManager);
+  }
+
+  public static ExecutorShuffleInfo decode(ByteBuf buf) {
+    String[] localDirs = Encoders.StringArrays.decode(buf);
+    int subDirsPerLocalDir = buf.readInt();
+    String shuffleManager = Encoders.Strings.decode(buf);
+    return new ExecutorShuffleInfo(localDirs, subDirsPerLocalDir, shuffleManager);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
new file mode 100644
index 0000000000000..62fce9b0d16cd
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import java.util.Arrays;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encoders;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
+/** Request to read a set of blocks. Returns {@link StreamHandle}. */
+public class OpenBlocks extends BlockTransferMessage {
+  public final String appId;
+  public final String execId;
+  public final String[] blockIds;
+
+  public OpenBlocks(String appId, String execId, String[] blockIds) {
+    this.appId = appId;
+    this.execId = execId;
+    this.blockIds = blockIds;
+  }
+
+  @Override
+  protected Type type() { return Type.OPEN_BLOCKS; }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(appId, execId) * 41 + Arrays.hashCode(blockIds);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("appId", appId)
+      .add("execId", execId)
+      .add("blockIds", Arrays.toString(blockIds))
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof OpenBlocks) {
+      OpenBlocks o = (OpenBlocks) other;
+      return Objects.equal(appId, o.appId)
+        && Objects.equal(execId, o.execId)
+        && Arrays.equals(blockIds, o.blockIds);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.Strings.encodedLength(appId)
+      + Encoders.Strings.encodedLength(execId)
+      + Encoders.StringArrays.encodedLength(blockIds);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.Strings.encode(buf, appId);
+    Encoders.Strings.encode(buf, execId);
+    Encoders.StringArrays.encode(buf, blockIds);
+  }
+
+  public static OpenBlocks decode(ByteBuf buf) {
+    String appId = Encoders.Strings.decode(buf);
+    String execId = Encoders.Strings.decode(buf);
+    String[] blockIds = Encoders.StringArrays.decode(buf);
+    return new OpenBlocks(appId, execId, blockIds);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
new file mode 100644
index 0000000000000..7eb4385044077
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encoders;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
+/**
+ * Initial registration message between an executor and its local shuffle server.
+ * Returns nothing (empty bye array).
+ */
+public class RegisterExecutor extends BlockTransferMessage {
+  public final String appId;
+  public final String execId;
+  public final ExecutorShuffleInfo executorInfo;
+
+  public RegisterExecutor(
+      String appId,
+      String execId,
+      ExecutorShuffleInfo executorInfo) {
+    this.appId = appId;
+    this.execId = execId;
+    this.executorInfo = executorInfo;
+  }
+
+  @Override
+  protected Type type() { return Type.REGISTER_EXECUTOR; }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(appId, execId, executorInfo);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("appId", appId)
+      .add("execId", execId)
+      .add("executorInfo", executorInfo)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof RegisterExecutor) {
+      RegisterExecutor o = (RegisterExecutor) other;
+      return Objects.equal(appId, o.appId)
+        && Objects.equal(execId, o.execId)
+        && Objects.equal(executorInfo, o.executorInfo);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.Strings.encodedLength(appId)
+      + Encoders.Strings.encodedLength(execId)
+      + executorInfo.encodedLength();
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.Strings.encode(buf, appId);
+    Encoders.Strings.encode(buf, execId);
+    executorInfo.encode(buf);
+  }
+
+  public static RegisterExecutor decode(ByteBuf buf) {
+    String appId = Encoders.Strings.decode(buf);
+    String execId = Encoders.Strings.decode(buf);
+    ExecutorShuffleInfo executorShuffleInfo = ExecutorShuffleInfo.decode(buf);
+    return new RegisterExecutor(appId, execId, executorShuffleInfo);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
new file mode 100644
index 0000000000000..bc9daa6158ba3
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
+/**
+ * Identifier for a fixed number of chunks to read from a stream created by an "open blocks"
+ * message. This is used by {@link org.apache.spark.network.shuffle.OneForOneBlockFetcher}.
+ */
+public class StreamHandle extends BlockTransferMessage {
+  public final long streamId;
+  public final int numChunks;
+
+  public StreamHandle(long streamId, int numChunks) {
+    this.streamId = streamId;
+    this.numChunks = numChunks;
+  }
+
+  @Override
+  protected Type type() { return Type.STREAM_HANDLE; }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(streamId, numChunks);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("streamId", streamId)
+      .add("numChunks", numChunks)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof StreamHandle) {
+      StreamHandle o = (StreamHandle) other;
+      return Objects.equal(streamId, o.streamId)
+        && Objects.equal(numChunks, o.numChunks);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return 8 + 4;
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    buf.writeLong(streamId);
+    buf.writeInt(numChunks);
+  }
+
+  public static StreamHandle decode(ByteBuf buf) {
+    long streamId = buf.readLong();
+    int numChunks = buf.readInt();
+    return new StreamHandle(streamId, numChunks);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
new file mode 100644
index 0000000000000..0b23e112bd512
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import java.util.Arrays;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encoders;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
+
+/** Request to upload a block with a certain StorageLevel. Returns nothing (empty byte array). */
+public class UploadBlock extends BlockTransferMessage {
+  public final String appId;
+  public final String execId;
+  public final String blockId;
+  // TODO: StorageLevel is serialized separately in here because StorageLevel is not available in
+  // this package. We should avoid this hack.
+  public final byte[] metadata;
+  public final byte[] blockData;
+
+  /**
+   * @param metadata Meta-information about block, typically StorageLevel.
+   * @param blockData The actual block's bytes.
+   */
+  public UploadBlock(
+      String appId,
+      String execId,
+      String blockId,
+      byte[] metadata,
+      byte[] blockData) {
+    this.appId = appId;
+    this.execId = execId;
+    this.blockId = blockId;
+    this.metadata = metadata;
+    this.blockData = blockData;
+  }
+
+  @Override
+  protected Type type() { return Type.UPLOAD_BLOCK; }
+
+  @Override
+  public int hashCode() {
+    int objectsHashCode = Objects.hashCode(appId, execId, blockId);
+    return (objectsHashCode * 41 + Arrays.hashCode(metadata)) * 41 + Arrays.hashCode(blockData);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("appId", appId)
+      .add("execId", execId)
+      .add("blockId", blockId)
+      .add("metadata size", metadata.length)
+      .add("block size", blockData.length)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof UploadBlock) {
+      UploadBlock o = (UploadBlock) other;
+      return Objects.equal(appId, o.appId)
+        && Objects.equal(execId, o.execId)
+        && Objects.equal(blockId, o.blockId)
+        && Arrays.equals(metadata, o.metadata)
+        && Arrays.equals(blockData, o.blockData);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.Strings.encodedLength(appId)
+      + Encoders.Strings.encodedLength(execId)
+      + Encoders.Strings.encodedLength(blockId)
+      + Encoders.ByteArrays.encodedLength(metadata)
+      + Encoders.ByteArrays.encodedLength(blockData);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.Strings.encode(buf, appId);
+    Encoders.Strings.encode(buf, execId);
+    Encoders.Strings.encode(buf, blockId);
+    Encoders.ByteArrays.encode(buf, metadata);
+    Encoders.ByteArrays.encode(buf, blockData);
+  }
+
+  public static UploadBlock decode(ByteBuf buf) {
+    String appId = Encoders.Strings.decode(buf);
+    String execId = Encoders.Strings.decode(buf);
+    String blockId = Encoders.Strings.decode(buf);
+    byte[] metadata = Encoders.ByteArrays.decode(buf);
+    byte[] blockData = Encoders.ByteArrays.decode(buf);
+    return new UploadBlock(appId, execId, blockId, metadata, blockData);
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
new file mode 100644
index 0000000000000..d25283e46ef96
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import java.io.IOException;
+
+import com.google.common.collect.Lists;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.TestUtils;
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientBootstrap;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.server.OneForOneStreamManager;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.StreamManager;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class SaslIntegrationSuite {
+  static ExternalShuffleBlockHandler handler;
+  static TransportServer server;
+  static TransportConf conf;
+  static TransportContext context;
+
+  TransportClientFactory clientFactory;
+
+  /** Provides a secret key holder which always returns the given secret key. */
+  static class TestSecretKeyHolder implements SecretKeyHolder {
+
+    private final String secretKey;
+
+    TestSecretKeyHolder(String secretKey) {
+      this.secretKey = secretKey;
+    }
+
+    @Override
+    public String getSaslUser(String appId) {
+      return "user";
+    }
+    @Override
+    public String getSecretKey(String appId) {
+      return secretKey;
+    }
+  }
+
+
+  @BeforeClass
+  public static void beforeAll() throws IOException {
+    SecretKeyHolder secretKeyHolder = new TestSecretKeyHolder("good-key");
+    SaslRpcHandler handler = new SaslRpcHandler(new TestRpcHandler(), secretKeyHolder);
+    conf = new TransportConf(new SystemPropertyConfigProvider());
+    context = new TransportContext(conf, handler);
+    server = context.createServer();
+  }
+
+
+  @AfterClass
+  public static void afterAll() {
+    server.close();
+  }
+
+  @After
+  public void afterEach() {
+    if (clientFactory != null) {
+      clientFactory.close();
+      clientFactory = null;
+    }
+  }
+
+  @Test
+  public void testGoodClient() throws IOException {
+    clientFactory = context.createClientFactory(
+      Lists.<TransportClientBootstrap>newArrayList(
+        new SaslClientBootstrap(conf, "app-id", new TestSecretKeyHolder("good-key"))));
+
+    TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    String msg = "Hello, World!";
+    byte[] resp = client.sendRpcSync(msg.getBytes(), 1000);
+    assertEquals(msg, new String(resp)); // our rpc handler should just return the given msg
+  }
+
+  @Test
+  public void testBadClient() {
+    clientFactory = context.createClientFactory(
+      Lists.<TransportClientBootstrap>newArrayList(
+        new SaslClientBootstrap(conf, "app-id", new TestSecretKeyHolder("bad-key"))));
+
+    try {
+      // Bootstrap should fail on startup.
+      clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("Mismatched response"));
+    }
+  }
+
+  @Test
+  public void testNoSaslClient() throws IOException {
+    clientFactory = context.createClientFactory(
+      Lists.<TransportClientBootstrap>newArrayList());
+
+    TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    try {
+      client.sendRpcSync(new byte[13], 1000);
+      fail("Should have failed");
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("Expected SaslMessage"));
+    }
+
+    try {
+      // Guessing the right tag byte doesn't magically get you in...
+      client.sendRpcSync(new byte[] { (byte) 0xEA }, 1000);
+      fail("Should have failed");
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("java.lang.IndexOutOfBoundsException"));
+    }
+  }
+
+  @Test
+  public void testNoSaslServer() {
+    RpcHandler handler = new TestRpcHandler();
+    TransportContext context = new TransportContext(conf, handler);
+    clientFactory = context.createClientFactory(
+      Lists.<TransportClientBootstrap>newArrayList(
+        new SaslClientBootstrap(conf, "app-id", new TestSecretKeyHolder("key"))));
+    TransportServer server = context.createServer();
+    try {
+      clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("Digest-challenge format violation"));
+    } finally {
+      server.close();
+    }
+  }
+
+  /** RPC handler which simply responds with the message it received. */
+  public static class TestRpcHandler extends RpcHandler {
+    @Override
+    public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+      callback.onSuccess(message);
+    }
+
+    @Override
+    public StreamManager getStreamManager() {
+      return new OneForOneStreamManager();
+    }
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
new file mode 100644
index 0000000000000..67a07f38eb5a0
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import java.util.Map;
+
+import com.google.common.collect.ImmutableMap;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+/**
+ * Jointly tests SparkSaslClient and SparkSaslServer, as both are black boxes.
+ */
+public class SparkSaslSuite {
+
+  /** Provides a secret key holder which returns secret key == appId */
+  private SecretKeyHolder secretKeyHolder = new SecretKeyHolder() {
+    @Override
+    public String getSaslUser(String appId) {
+      return "user";
+    }
+
+    @Override
+    public String getSecretKey(String appId) {
+      return appId;
+    }
+  };
+
+  @Test
+  public void testMatching() {
+    SparkSaslClient client = new SparkSaslClient("shared-secret", secretKeyHolder);
+    SparkSaslServer server = new SparkSaslServer("shared-secret", secretKeyHolder);
+
+    assertFalse(client.isComplete());
+    assertFalse(server.isComplete());
+
+    byte[] clientMessage = client.firstToken();
+
+    while (!client.isComplete()) {
+      clientMessage = client.response(server.response(clientMessage));
+    }
+    assertTrue(server.isComplete());
+
+    // Disposal should invalidate
+    server.dispose();
+    assertFalse(server.isComplete());
+    client.dispose();
+    assertFalse(client.isComplete());
+  }
+
+
+  @Test
+  public void testNonMatching() {
+    SparkSaslClient client = new SparkSaslClient("my-secret", secretKeyHolder);
+    SparkSaslServer server = new SparkSaslServer("your-secret", secretKeyHolder);
+
+    assertFalse(client.isComplete());
+    assertFalse(server.isComplete());
+
+    byte[] clientMessage = client.firstToken();
+
+    try {
+      while (!client.isComplete()) {
+        clientMessage = client.response(server.response(clientMessage));
+      }
+      fail("Should not have completed");
+    } catch (Exception e) {
+      assertTrue(e.getMessage().contains("Mismatched response"));
+      assertFalse(client.isComplete());
+      assertFalse(server.isComplete());
+    }
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
new file mode 100644
index 0000000000000..d65de9ca550a3
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.shuffle.protocol.*;
+
+/** Verifies that all BlockTransferMessages can be serialized correctly. */
+public class BlockTransferMessagesSuite {
+  @Test
+  public void serializeOpenShuffleBlocks() {
+    checkSerializeDeserialize(new OpenBlocks("app-1", "exec-2", new String[] { "b1", "b2" }));
+    checkSerializeDeserialize(new RegisterExecutor("app-1", "exec-2", new ExecutorShuffleInfo(
+      new String[] { "/local1", "/local2" }, 32, "MyShuffleManager")));
+    checkSerializeDeserialize(new UploadBlock("app-1", "exec-2", "block-3", new byte[] { 1, 2 },
+      new byte[] { 4, 5, 6, 7} ));
+    checkSerializeDeserialize(new StreamHandle(12345, 16));
+  }
+
+  private void checkSerializeDeserialize(BlockTransferMessage msg) {
+    BlockTransferMessage msg2 = BlockTransferMessage.Decoder.fromByteArray(msg.toByteArray());
+    assertEquals(msg, msg2);
+    assertEquals(msg.hashCode(), msg2.hashCode());
+    assertEquals(msg.toString(), msg2.toString());
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
new file mode 100644
index 0000000000000..3f9fe1681cf27
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+
+import static org.junit.Assert.*;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.server.OneForOneStreamManager;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.shuffle.protocol.OpenBlocks;
+import org.apache.spark.network.shuffle.protocol.RegisterExecutor;
+import org.apache.spark.network.shuffle.protocol.StreamHandle;
+import org.apache.spark.network.shuffle.protocol.UploadBlock;
+
+public class ExternalShuffleBlockHandlerSuite {
+  TransportClient client = mock(TransportClient.class);
+
+  OneForOneStreamManager streamManager;
+  ExternalShuffleBlockManager blockManager;
+  RpcHandler handler;
+
+  @Before
+  public void beforeEach() {
+    streamManager = mock(OneForOneStreamManager.class);
+    blockManager = mock(ExternalShuffleBlockManager.class);
+    handler = new ExternalShuffleBlockHandler(streamManager, blockManager);
+  }
+
+  @Test
+  public void testRegisterExecutor() {
+    RpcResponseCallback callback = mock(RpcResponseCallback.class);
+
+    ExecutorShuffleInfo config = new ExecutorShuffleInfo(new String[] {"/a", "/b"}, 16, "sort");
+    byte[] registerMessage = new RegisterExecutor("app0", "exec1", config).toByteArray();
+    handler.receive(client, registerMessage, callback);
+    verify(blockManager, times(1)).registerExecutor("app0", "exec1", config);
+
+    verify(callback, times(1)).onSuccess((byte[]) any());
+    verify(callback, never()).onFailure((Throwable) any());
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testOpenShuffleBlocks() {
+    RpcResponseCallback callback = mock(RpcResponseCallback.class);
+
+    ManagedBuffer block0Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[3]));
+    ManagedBuffer block1Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
+    when(blockManager.getBlockData("app0", "exec1", "b0")).thenReturn(block0Marker);
+    when(blockManager.getBlockData("app0", "exec1", "b1")).thenReturn(block1Marker);
+    byte[] openBlocks = new OpenBlocks("app0", "exec1", new String[] { "b0", "b1" }).toByteArray();
+    handler.receive(client, openBlocks, callback);
+    verify(blockManager, times(1)).getBlockData("app0", "exec1", "b0");
+    verify(blockManager, times(1)).getBlockData("app0", "exec1", "b1");
+
+    ArgumentCaptor<byte[]> response = ArgumentCaptor.forClass(byte[].class);
+    verify(callback, times(1)).onSuccess(response.capture());
+    verify(callback, never()).onFailure((Throwable) any());
+
+    StreamHandle handle =
+      (StreamHandle) BlockTransferMessage.Decoder.fromByteArray(response.getValue());
+    assertEquals(2, handle.numChunks);
+
+    ArgumentCaptor<Iterator> stream = ArgumentCaptor.forClass(Iterator.class);
+    verify(streamManager, times(1)).registerStream(stream.capture());
+    Iterator<ManagedBuffer> buffers = (Iterator<ManagedBuffer>) stream.getValue();
+    assertEquals(block0Marker, buffers.next());
+    assertEquals(block1Marker, buffers.next());
+    assertFalse(buffers.hasNext());
+  }
+
+  @Test
+  public void testBadMessages() {
+    RpcResponseCallback callback = mock(RpcResponseCallback.class);
+
+    byte[] unserializableMsg = new byte[] { 0x12, 0x34, 0x56 };
+    try {
+      handler.receive(client, unserializableMsg, callback);
+      fail("Should have thrown");
+    } catch (Exception e) {
+      // pass
+    }
+
+    byte[] unexpectedMsg = new UploadBlock("a", "e", "b", new byte[1], new byte[2]).toByteArray();
+    try {
+      handler.receive(client, unexpectedMsg, callback);
+      fail("Should have thrown");
+    } catch (UnsupportedOperationException e) {
+      // pass
+    }
+
+    verify(callback, never()).onSuccess((byte[]) any());
+    verify(callback, never()).onFailure((Throwable) any());
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java
new file mode 100644
index 0000000000000..dad6428a836fc
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+
+import com.google.common.io.CharStreams;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class ExternalShuffleBlockManagerSuite {
+  static String sortBlock0 = "Hello!";
+  static String sortBlock1 = "World!";
+
+  static String hashBlock0 = "Elementary";
+  static String hashBlock1 = "Tabular";
+
+  static TestShuffleDataContext dataContext;
+
+  static TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+
+  @BeforeClass
+  public static void beforeAll() throws IOException {
+    dataContext = new TestShuffleDataContext(2, 5);
+
+    dataContext.create();
+    // Write some sort and hash data.
+    dataContext.insertSortShuffleData(0, 0,
+      new byte[][] { sortBlock0.getBytes(), sortBlock1.getBytes() } );
+    dataContext.insertHashShuffleData(1, 0,
+      new byte[][] { hashBlock0.getBytes(), hashBlock1.getBytes() } );
+  }
+
+  @AfterClass
+  public static void afterAll() {
+    dataContext.cleanup();
+  }
+
+  @Test
+  public void testBadRequests() {
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf);
+    // Unregistered executor
+    try {
+      manager.getBlockData("app0", "exec1", "shuffle_1_1_0");
+      fail("Should have failed");
+    } catch (RuntimeException e) {
+      assertTrue("Bad error message: " + e, e.getMessage().contains("not registered"));
+    }
+
+    // Invalid shuffle manager
+    manager.registerExecutor("app0", "exec2", dataContext.createExecutorInfo("foobar"));
+    try {
+      manager.getBlockData("app0", "exec2", "shuffle_1_1_0");
+      fail("Should have failed");
+    } catch (UnsupportedOperationException e) {
+      // pass
+    }
+
+    // Nonexistent shuffle block
+    manager.registerExecutor("app0", "exec3",
+      dataContext.createExecutorInfo("org.apache.spark.shuffle.sort.SortShuffleManager"));
+    try {
+      manager.getBlockData("app0", "exec3", "shuffle_1_1_0");
+      fail("Should have failed");
+    } catch (Exception e) {
+      // pass
+    }
+  }
+
+  @Test
+  public void testSortShuffleBlocks() throws IOException {
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf);
+    manager.registerExecutor("app0", "exec0",
+      dataContext.createExecutorInfo("org.apache.spark.shuffle.sort.SortShuffleManager"));
+
+    InputStream block0Stream =
+      manager.getBlockData("app0", "exec0", "shuffle_0_0_0").createInputStream();
+    String block0 = CharStreams.toString(new InputStreamReader(block0Stream));
+    block0Stream.close();
+    assertEquals(sortBlock0, block0);
+
+    InputStream block1Stream =
+      manager.getBlockData("app0", "exec0", "shuffle_0_0_1").createInputStream();
+    String block1 = CharStreams.toString(new InputStreamReader(block1Stream));
+    block1Stream.close();
+    assertEquals(sortBlock1, block1);
+  }
+
+  @Test
+  public void testHashShuffleBlocks() throws IOException {
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf);
+    manager.registerExecutor("app0", "exec0",
+      dataContext.createExecutorInfo("org.apache.spark.shuffle.hash.HashShuffleManager"));
+
+    InputStream block0Stream =
+      manager.getBlockData("app0", "exec0", "shuffle_1_0_0").createInputStream();
+    String block0 = CharStreams.toString(new InputStreamReader(block0Stream));
+    block0Stream.close();
+    assertEquals(hashBlock0, block0);
+
+    InputStream block1Stream =
+      manager.getBlockData("app0", "exec0", "shuffle_1_0_1").createInputStream();
+    String block1 = CharStreams.toString(new InputStreamReader(block1Stream));
+    block1Stream.close();
+    assertEquals(hashBlock1, block1);
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java
new file mode 100644
index 0000000000000..254e3a7a32b98
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Random;
+import java.util.concurrent.Executor;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import com.google.common.util.concurrent.MoreExecutors;
+import org.junit.Test;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class ExternalShuffleCleanupSuite {
+
+  // Same-thread Executor used to ensure cleanup happens synchronously in test thread.
+  Executor sameThreadExecutor = MoreExecutors.sameThreadExecutor();
+  TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+
+  @Test
+  public void noCleanupAndCleanup() throws IOException {
+    TestShuffleDataContext dataContext = createSomeData();
+
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor);
+    manager.registerExecutor("app", "exec0", dataContext.createExecutorInfo("shuffleMgr"));
+    manager.applicationRemoved("app", false /* cleanup */);
+
+    assertStillThere(dataContext);
+
+    manager.registerExecutor("app", "exec1", dataContext.createExecutorInfo("shuffleMgr"));
+    manager.applicationRemoved("app", true /* cleanup */);
+
+    assertCleanedUp(dataContext);
+  }
+
+  @Test
+  public void cleanupUsesExecutor() throws IOException {
+    TestShuffleDataContext dataContext = createSomeData();
+
+    final AtomicBoolean cleanupCalled = new AtomicBoolean(false);
+
+    // Executor which does nothing to ensure we're actually using it.
+    Executor noThreadExecutor = new Executor() {
+      @Override public void execute(Runnable runnable) { cleanupCalled.set(true); }
+    };
+
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, noThreadExecutor);
+
+    manager.registerExecutor("app", "exec0", dataContext.createExecutorInfo("shuffleMgr"));
+    manager.applicationRemoved("app", true);
+
+    assertTrue(cleanupCalled.get());
+    assertStillThere(dataContext);
+
+    dataContext.cleanup();
+    assertCleanedUp(dataContext);
+  }
+
+  @Test
+  public void cleanupMultipleExecutors() throws IOException {
+    TestShuffleDataContext dataContext0 = createSomeData();
+    TestShuffleDataContext dataContext1 = createSomeData();
+
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor);
+
+    manager.registerExecutor("app", "exec0", dataContext0.createExecutorInfo("shuffleMgr"));
+    manager.registerExecutor("app", "exec1", dataContext1.createExecutorInfo("shuffleMgr"));
+    manager.applicationRemoved("app", true);
+
+    assertCleanedUp(dataContext0);
+    assertCleanedUp(dataContext1);
+  }
+
+  @Test
+  public void cleanupOnlyRemovedApp() throws IOException {
+    TestShuffleDataContext dataContext0 = createSomeData();
+    TestShuffleDataContext dataContext1 = createSomeData();
+
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor);
+
+    manager.registerExecutor("app-0", "exec0", dataContext0.createExecutorInfo("shuffleMgr"));
+    manager.registerExecutor("app-1", "exec0", dataContext1.createExecutorInfo("shuffleMgr"));
+
+    manager.applicationRemoved("app-nonexistent", true);
+    assertStillThere(dataContext0);
+    assertStillThere(dataContext1);
+
+    manager.applicationRemoved("app-0", true);
+    assertCleanedUp(dataContext0);
+    assertStillThere(dataContext1);
+
+    manager.applicationRemoved("app-1", true);
+    assertCleanedUp(dataContext0);
+    assertCleanedUp(dataContext1);
+
+    // Make sure it's not an error to cleanup multiple times
+    manager.applicationRemoved("app-1", true);
+    assertCleanedUp(dataContext0);
+    assertCleanedUp(dataContext1);
+  }
+
+  private void assertStillThere(TestShuffleDataContext dataContext) {
+    for (String localDir : dataContext.localDirs) {
+      assertTrue(localDir + " was cleaned up prematurely", new File(localDir).exists());
+    }
+  }
+
+  private void assertCleanedUp(TestShuffleDataContext dataContext) {
+    for (String localDir : dataContext.localDirs) {
+      assertFalse(localDir + " wasn't cleaned up", new File(localDir).exists());
+    }
+  }
+
+  private TestShuffleDataContext createSomeData() throws IOException {
+    Random rand = new Random(123);
+    TestShuffleDataContext dataContext = new TestShuffleDataContext(10, 5);
+
+    dataContext.create();
+    dataContext.insertSortShuffleData(rand.nextInt(1000), rand.nextInt(1000),
+      new byte[][] { "ABC".getBytes(), "DEF".getBytes() } );
+    dataContext.insertHashShuffleData(rand.nextInt(1000), rand.nextInt(1000) + 1000,
+      new byte[][] { "GHI".getBytes(), "JKLMNOPQRSTUVWXYZ".getBytes() } );
+    return dataContext;
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
new file mode 100644
index 0000000000000..02c10bcb7b261
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.TestUtils;
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class ExternalShuffleIntegrationSuite {
+
+  static String APP_ID = "app-id";
+  static String SORT_MANAGER = "org.apache.spark.shuffle.sort.SortShuffleManager";
+  static String HASH_MANAGER = "org.apache.spark.shuffle.hash.HashShuffleManager";
+
+  // Executor 0 is sort-based
+  static TestShuffleDataContext dataContext0;
+  // Executor 1 is hash-based
+  static TestShuffleDataContext dataContext1;
+
+  static ExternalShuffleBlockHandler handler;
+  static TransportServer server;
+  static TransportConf conf;
+
+  static byte[][] exec0Blocks = new byte[][] {
+    new byte[123],
+    new byte[12345],
+    new byte[1234567],
+  };
+
+  static byte[][] exec1Blocks = new byte[][] {
+    new byte[321],
+    new byte[54321],
+  };
+
+  @BeforeClass
+  public static void beforeAll() throws IOException {
+    Random rand = new Random();
+
+    for (byte[] block : exec0Blocks) {
+      rand.nextBytes(block);
+    }
+    for (byte[] block: exec1Blocks) {
+      rand.nextBytes(block);
+    }
+
+    dataContext0 = new TestShuffleDataContext(2, 5);
+    dataContext0.create();
+    dataContext0.insertSortShuffleData(0, 0, exec0Blocks);
+
+    dataContext1 = new TestShuffleDataContext(6, 2);
+    dataContext1.create();
+    dataContext1.insertHashShuffleData(1, 0, exec1Blocks);
+
+    conf = new TransportConf(new SystemPropertyConfigProvider());
+    handler = new ExternalShuffleBlockHandler(conf);
+    TransportContext transportContext = new TransportContext(conf, handler);
+    server = transportContext.createServer();
+  }
+
+  @AfterClass
+  public static void afterAll() {
+    dataContext0.cleanup();
+    dataContext1.cleanup();
+    server.close();
+  }
+
+  @After
+  public void afterEach() {
+    handler.applicationRemoved(APP_ID, false /* cleanupLocalDirs */);
+  }
+
+  class FetchResult {
+    public Set<String> successBlocks;
+    public Set<String> failedBlocks;
+    public List<ManagedBuffer> buffers;
+
+    public void releaseBuffers() {
+      for (ManagedBuffer buffer : buffers) {
+        buffer.release();
+      }
+    }
+  }
+
+  // Fetch a set of blocks from a pre-registered executor.
+  private FetchResult fetchBlocks(String execId, String[] blockIds) throws Exception {
+    return fetchBlocks(execId, blockIds, server.getPort());
+  }
+
+  // Fetch a set of blocks from a pre-registered executor. Connects to the server on the given port,
+  // to allow connecting to invalid servers.
+  private FetchResult fetchBlocks(String execId, String[] blockIds, int port) throws Exception {
+    final FetchResult res = new FetchResult();
+    res.successBlocks = Collections.synchronizedSet(new HashSet<String>());
+    res.failedBlocks = Collections.synchronizedSet(new HashSet<String>());
+    res.buffers = Collections.synchronizedList(new LinkedList<ManagedBuffer>());
+
+    final Semaphore requestsRemaining = new Semaphore(0);
+
+    ExternalShuffleClient client = new ExternalShuffleClient(conf, null, false);
+    client.init(APP_ID);
+    client.fetchBlocks(TestUtils.getLocalHost(), port, execId, blockIds,
+      new BlockFetchingListener() {
+        @Override
+        public void onBlockFetchSuccess(String blockId, ManagedBuffer data) {
+          synchronized (this) {
+            if (!res.successBlocks.contains(blockId) && !res.failedBlocks.contains(blockId)) {
+              data.retain();
+              res.successBlocks.add(blockId);
+              res.buffers.add(data);
+              requestsRemaining.release();
+            }
+          }
+        }
+
+        @Override
+        public void onBlockFetchFailure(String blockId, Throwable exception) {
+          synchronized (this) {
+            if (!res.successBlocks.contains(blockId) && !res.failedBlocks.contains(blockId)) {
+              res.failedBlocks.add(blockId);
+              requestsRemaining.release();
+            }
+          }
+        }
+      });
+
+    if (!requestsRemaining.tryAcquire(blockIds.length, 5, TimeUnit.SECONDS)) {
+      fail("Timeout getting response from the server");
+    }
+    client.close();
+    return res;
+  }
+
+  @Test
+  public void testFetchOneSort() throws Exception {
+    registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+    FetchResult exec0Fetch = fetchBlocks("exec-0", new String[] { "shuffle_0_0_0" });
+    assertEquals(Sets.newHashSet("shuffle_0_0_0"), exec0Fetch.successBlocks);
+    assertTrue(exec0Fetch.failedBlocks.isEmpty());
+    assertBufferListsEqual(exec0Fetch.buffers, Lists.newArrayList(exec0Blocks[0]));
+    exec0Fetch.releaseBuffers();
+  }
+
+  @Test
+  public void testFetchThreeSort() throws Exception {
+    registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+    FetchResult exec0Fetch = fetchBlocks("exec-0",
+      new String[] { "shuffle_0_0_0", "shuffle_0_0_1", "shuffle_0_0_2" });
+    assertEquals(Sets.newHashSet("shuffle_0_0_0", "shuffle_0_0_1", "shuffle_0_0_2"),
+      exec0Fetch.successBlocks);
+    assertTrue(exec0Fetch.failedBlocks.isEmpty());
+    assertBufferListsEqual(exec0Fetch.buffers, Lists.newArrayList(exec0Blocks));
+    exec0Fetch.releaseBuffers();
+  }
+
+  @Test
+  public void testFetchHash() throws Exception {
+    registerExecutor("exec-1", dataContext1.createExecutorInfo(HASH_MANAGER));
+    FetchResult execFetch = fetchBlocks("exec-1",
+      new String[] { "shuffle_1_0_0", "shuffle_1_0_1" });
+    assertEquals(Sets.newHashSet("shuffle_1_0_0", "shuffle_1_0_1"), execFetch.successBlocks);
+    assertTrue(execFetch.failedBlocks.isEmpty());
+    assertBufferListsEqual(execFetch.buffers, Lists.newArrayList(exec1Blocks));
+    execFetch.releaseBuffers();
+  }
+
+  @Test
+  public void testFetchWrongShuffle() throws Exception {
+    registerExecutor("exec-1", dataContext1.createExecutorInfo(SORT_MANAGER /* wrong manager */));
+    FetchResult execFetch = fetchBlocks("exec-1",
+      new String[] { "shuffle_1_0_0", "shuffle_1_0_1" });
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("shuffle_1_0_0", "shuffle_1_0_1"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchInvalidShuffle() throws Exception {
+    registerExecutor("exec-1", dataContext1.createExecutorInfo("unknown sort manager"));
+    FetchResult execFetch = fetchBlocks("exec-1",
+      new String[] { "shuffle_1_0_0" });
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("shuffle_1_0_0"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchWrongBlockId() throws Exception {
+    registerExecutor("exec-1", dataContext1.createExecutorInfo(SORT_MANAGER /* wrong manager */));
+    FetchResult execFetch = fetchBlocks("exec-1",
+      new String[] { "rdd_1_0_0" });
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("rdd_1_0_0"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchNonexistent() throws Exception {
+    registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+    FetchResult execFetch = fetchBlocks("exec-0",
+      new String[] { "shuffle_2_0_0" });
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("shuffle_2_0_0"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchWrongExecutor() throws Exception {
+    registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+    FetchResult execFetch = fetchBlocks("exec-0",
+      new String[] { "shuffle_0_0_0" /* right */, "shuffle_1_0_0" /* wrong */ });
+    // Both still fail, as we start by checking for all block.
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("shuffle_0_0_0", "shuffle_1_0_0"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchUnregisteredExecutor() throws Exception {
+    registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+    FetchResult execFetch = fetchBlocks("exec-2",
+      new String[] { "shuffle_0_0_0", "shuffle_1_0_0" });
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("shuffle_0_0_0", "shuffle_1_0_0"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchNoServer() throws Exception {
+    System.setProperty("spark.shuffle.io.maxRetries", "0");
+    try {
+      registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+      FetchResult execFetch = fetchBlocks("exec-0",
+        new String[]{"shuffle_1_0_0", "shuffle_1_0_1"}, 1 /* port */);
+      assertTrue(execFetch.successBlocks.isEmpty());
+      assertEquals(Sets.newHashSet("shuffle_1_0_0", "shuffle_1_0_1"), execFetch.failedBlocks);
+    } finally {
+      System.clearProperty("spark.shuffle.io.maxRetries");
+    }
+  }
+
+  private void registerExecutor(String executorId, ExecutorShuffleInfo executorInfo)
+      throws IOException {
+    ExternalShuffleClient client = new ExternalShuffleClient(conf, null, false);
+    client.init(APP_ID);
+    client.registerWithShuffleServer(TestUtils.getLocalHost(), server.getPort(),
+      executorId, executorInfo);
+  }
+
+  private void assertBufferListsEqual(List<ManagedBuffer> list0, List<byte[]> list1)
+    throws Exception {
+    assertEquals(list0.size(), list1.size());
+    for (int i = 0; i < list0.size(); i ++) {
+      assertBuffersEqual(list0.get(i), new NioManagedBuffer(ByteBuffer.wrap(list1.get(i))));
+    }
+  }
+
+  private void assertBuffersEqual(ManagedBuffer buffer0, ManagedBuffer buffer1) throws Exception {
+    ByteBuffer nio0 = buffer0.nioByteBuffer();
+    ByteBuffer nio1 = buffer1.nioByteBuffer();
+
+    int len = nio0.remaining();
+    assertEquals(nio0.remaining(), nio1.remaining());
+    for (int i = 0; i < len; i ++) {
+      assertEquals(nio0.get(), nio1.get());
+    }
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
new file mode 100644
index 0000000000000..759a12910c94d
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.TestUtils;
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.sasl.SaslRpcHandler;
+import org.apache.spark.network.sasl.SecretKeyHolder;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class ExternalShuffleSecuritySuite {
+
+  TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+  TransportServer server;
+
+  @Before
+  public void beforeEach() {
+    RpcHandler handler = new SaslRpcHandler(new ExternalShuffleBlockHandler(conf),
+      new TestSecretKeyHolder("my-app-id", "secret"));
+    TransportContext context = new TransportContext(conf, handler);
+    this.server = context.createServer();
+  }
+
+  @After
+  public void afterEach() {
+    if (server != null) {
+      server.close();
+      server = null;
+    }
+  }
+
+  @Test
+  public void testValid() throws IOException {
+    validate("my-app-id", "secret");
+  }
+
+  @Test
+  public void testBadAppId() {
+    try {
+      validate("wrong-app-id", "secret");
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("Wrong appId!"));
+    }
+  }
+
+  @Test
+  public void testBadSecret() {
+    try {
+      validate("my-app-id", "bad-secret");
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("Mismatched response"));
+    }
+  }
+
+  /** Creates an ExternalShuffleClient and attempts to register with the server. */
+  private void validate(String appId, String secretKey) throws IOException {
+    ExternalShuffleClient client =
+      new ExternalShuffleClient(conf, new TestSecretKeyHolder(appId, secretKey), true);
+    client.init(appId);
+    // Registration either succeeds or throws an exception.
+    client.registerWithShuffleServer(TestUtils.getLocalHost(), server.getPort(), "exec0",
+      new ExecutorShuffleInfo(new String[0], 0, ""));
+    client.close();
+  }
+
+  /** Provides a secret key holder which always returns the given secret key, for a single appId. */
+  static class TestSecretKeyHolder implements SecretKeyHolder {
+    private final String appId;
+    private final String secretKey;
+
+    TestSecretKeyHolder(String appId, String secretKey) {
+      this.appId = appId;
+      this.secretKey = secretKey;
+    }
+
+    @Override
+    public String getSaslUser(String appId) {
+      return "user";
+    }
+
+    @Override
+    public String getSecretKey(String appId) {
+      if (!appId.equals(this.appId)) {
+        throw new IllegalArgumentException("Wrong appId!");
+      }
+      return secretKey;
+    }
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
new file mode 100644
index 0000000000000..842741e3d354f
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.collect.Maps;
+import io.netty.buffer.Unpooled;
+import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NettyManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.client.ChunkReceivedCallback;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
+import org.apache.spark.network.shuffle.protocol.OpenBlocks;
+import org.apache.spark.network.shuffle.protocol.StreamHandle;
+
+public class OneForOneBlockFetcherSuite {
+  @Test
+  public void testFetchOne() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
+
+    BlockFetchingListener listener = fetchBlocks(blocks);
+
+    verify(listener).onBlockFetchSuccess("shuffle_0_0_0", blocks.get("shuffle_0_0_0"));
+  }
+
+  @Test
+  public void testFetchThree() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23])));
+    blocks.put("b2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23])));
+
+    BlockFetchingListener listener = fetchBlocks(blocks);
+
+    for (int i = 0; i < 3; i ++) {
+      verify(listener, times(1)).onBlockFetchSuccess("b" + i, blocks.get("b" + i));
+    }
+  }
+
+  @Test
+  public void testFailure() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b1", null);
+    blocks.put("b2", null);
+
+    BlockFetchingListener listener = fetchBlocks(blocks);
+
+    // Each failure will cause a failure to be invoked in all remaining block fetches.
+    verify(listener, times(1)).onBlockFetchSuccess("b0", blocks.get("b0"));
+    verify(listener, times(1)).onBlockFetchFailure(eq("b1"), (Throwable) any());
+    verify(listener, times(2)).onBlockFetchFailure(eq("b2"), (Throwable) any());
+  }
+
+  @Test
+  public void testFailureAndSuccess() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b1", null);
+    blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[21])));
+
+    BlockFetchingListener listener = fetchBlocks(blocks);
+
+    // We may call both success and failure for the same block.
+    verify(listener, times(1)).onBlockFetchSuccess("b0", blocks.get("b0"));
+    verify(listener, times(1)).onBlockFetchFailure(eq("b1"), (Throwable) any());
+    verify(listener, times(1)).onBlockFetchSuccess("b2", blocks.get("b2"));
+    verify(listener, times(1)).onBlockFetchFailure(eq("b2"), (Throwable) any());
+  }
+
+  @Test
+  public void testEmptyBlockFetch() {
+    try {
+      fetchBlocks(Maps.<String, ManagedBuffer>newLinkedHashMap());
+      fail();
+    } catch (IllegalArgumentException e) {
+      assertEquals("Zero-sized blockIds array", e.getMessage());
+    }
+  }
+
+  /**
+   * Begins a fetch on the given set of blocks by mocking out the server side of the RPC which
+   * simply returns the given (BlockId, Block) pairs.
+   * As "blocks" is a LinkedHashMap, the blocks are guaranteed to be returned in the same order
+   * that they were inserted in.
+   *
+   * If a block's buffer is "null", an exception will be thrown instead.
+   */
+  private BlockFetchingListener fetchBlocks(final LinkedHashMap<String, ManagedBuffer> blocks) {
+    TransportClient client = mock(TransportClient.class);
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+    final String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
+    OneForOneBlockFetcher fetcher =
+      new OneForOneBlockFetcher(client, "app-id", "exec-id", blockIds, listener);
+
+    // Respond to the "OpenBlocks" message with an appropirate ShuffleStreamHandle with streamId 123
+    doAnswer(new Answer<Void>() {
+      @Override
+      public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
+        BlockTransferMessage message = BlockTransferMessage.Decoder.fromByteArray(
+          (byte[]) invocationOnMock.getArguments()[0]);
+        RpcResponseCallback callback = (RpcResponseCallback) invocationOnMock.getArguments()[1];
+        callback.onSuccess(new StreamHandle(123, blocks.size()).toByteArray());
+        assertEquals(new OpenBlocks("app-id", "exec-id", blockIds), message);
+        return null;
+      }
+    }).when(client).sendRpc((byte[]) any(), (RpcResponseCallback) any());
+
+    // Respond to each chunk request with a single buffer from our blocks array.
+    final AtomicInteger expectedChunkIndex = new AtomicInteger(0);
+    final Iterator<ManagedBuffer> blockIterator = blocks.values().iterator();
+    doAnswer(new Answer<Void>() {
+      @Override
+      public Void answer(InvocationOnMock invocation) throws Throwable {
+        try {
+          long streamId = (Long) invocation.getArguments()[0];
+          int myChunkIndex = (Integer) invocation.getArguments()[1];
+          assertEquals(123, streamId);
+          assertEquals(expectedChunkIndex.getAndIncrement(), myChunkIndex);
+
+          ChunkReceivedCallback callback = (ChunkReceivedCallback) invocation.getArguments()[2];
+          ManagedBuffer result = blockIterator.next();
+          if (result != null) {
+            callback.onSuccess(myChunkIndex, result);
+          } else {
+            callback.onFailure(myChunkIndex, new RuntimeException("Failed " + myChunkIndex));
+          }
+        } catch (Exception e) {
+          e.printStackTrace();
+          fail("Unexpected failure");
+        }
+        return null;
+      }
+    }).when(client).fetchChunk(anyLong(), anyInt(), (ChunkReceivedCallback) any());
+
+    fetcher.start();
+    return listener;
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
new file mode 100644
index 0000000000000..0191fe529e1be
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.LinkedHashSet;
+import java.util.Map;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Sets;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+import org.mockito.stubbing.Stubber;
+
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+import static org.apache.spark.network.shuffle.RetryingBlockFetcher.BlockFetchStarter;
+
+/**
+ * Tests retry logic by throwing IOExceptions and ensuring that subsequent attempts are made to
+ * fetch the lost blocks.
+ */
+public class RetryingBlockFetcherSuite {
+
+  ManagedBuffer block0 = new NioManagedBuffer(ByteBuffer.wrap(new byte[13]));
+  ManagedBuffer block1 = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
+  ManagedBuffer block2 = new NioManagedBuffer(ByteBuffer.wrap(new byte[19]));
+
+  @Before
+  public void beforeEach() {
+    System.setProperty("spark.shuffle.io.maxRetries", "2");
+    System.setProperty("spark.shuffle.io.retryWaitMs", "0");
+  }
+
+  @After
+  public void afterEach() {
+    System.clearProperty("spark.shuffle.io.maxRetries");
+    System.clearProperty("spark.shuffle.io.retryWaitMs");
+  }
+
+  @Test
+  public void testNoFailures() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // Immediately return both blocks successfully.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener).onBlockFetchSuccess("b0", block0);
+    verify(listener).onBlockFetchSuccess("b1", block1);
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testUnrecoverableFailure() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // b0 throws a non-IOException error, so it will be failed without retry.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", new RuntimeException("Ouch!"))
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener).onBlockFetchFailure(eq("b0"), (Throwable) any());
+    verify(listener).onBlockFetchSuccess("b1", block1);
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testSingleIOExceptionOnFirst() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // IOException will cause a retry. Since b0 fails, we will retry both.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", new IOException("Connection failed or something"))
+        .put("b1", block1)
+        .build(),
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b0", block0);
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b1", block1);
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testSingleIOExceptionOnSecond() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // IOException will cause a retry. Since b1 fails, we will not retry b0.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", new IOException("Connection failed or something"))
+        .build(),
+      ImmutableMap.<String, Object>builder()
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b0", block0);
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b1", block1);
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testTwoIOExceptions() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // b0's IOException will trigger retry, b1's will be ignored.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", new IOException())
+        .put("b1", new IOException())
+        .build(),
+      // Next, b0 is successful and b1 errors again, so we just request that one.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", new IOException())
+        .build(),
+      // b1 returns successfully within 2 retries.
+      ImmutableMap.<String, Object>builder()
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b0", block0);
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b1", block1);
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testThreeIOExceptions() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // b0's IOException will trigger retry, b1's will be ignored.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", new IOException())
+        .put("b1", new IOException())
+        .build(),
+      // Next, b0 is successful and b1 errors again, so we just request that one.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", new IOException())
+        .build(),
+      // b1 errors again, but this was the last retry
+      ImmutableMap.<String, Object>builder()
+        .put("b1", new IOException())
+        .build(),
+      // This is not reached -- b1 has failed.
+      ImmutableMap.<String, Object>builder()
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b0", block0);
+    verify(listener, timeout(5000)).onBlockFetchFailure(eq("b1"), (Throwable) any());
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testRetryAndUnrecoverable() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // b0's IOException will trigger retry, subsequent messages will be ignored.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", new IOException())
+        .put("b1", new RuntimeException())
+        .put("b2", block2)
+        .build(),
+      // Next, b0 is successful, b1 errors unrecoverably, and b2 triggers a retry.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", new RuntimeException())
+        .put("b2", new IOException())
+        .build(),
+      // b2 succeeds in its last retry.
+      ImmutableMap.<String, Object>builder()
+        .put("b2", block2)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b0", block0);
+    verify(listener, timeout(5000)).onBlockFetchFailure(eq("b1"), (Throwable) any());
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b2", block2);
+    verifyNoMoreInteractions(listener);
+  }
+
+  /**
+   * Performs a set of interactions in response to block requests from a RetryingBlockFetcher.
+   * Each interaction is a Map from BlockId to either ManagedBuffer or Exception. This interaction
+   * means "respond to the next block fetch request with these Successful buffers and these Failure
+   * exceptions". We verify that the expected block ids are exactly the ones requested.
+   *
+   * If multiple interactions are supplied, they will be used in order. This is useful for encoding
+   * retries -- the first interaction may include an IOException, which causes a retry of some
+   * subset of the original blocks in a second interaction.
+   */
+  @SuppressWarnings("unchecked")
+  private void performInteractions(final Map[] interactions, BlockFetchingListener listener)
+    throws IOException {
+
+    TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+    BlockFetchStarter fetchStarter = mock(BlockFetchStarter.class);
+
+    Stubber stub = null;
+
+    // Contains all blockIds that are referenced across all interactions.
+    final LinkedHashSet<String> blockIds = Sets.newLinkedHashSet();
+
+    for (final Map<String, Object> interaction : interactions) {
+      blockIds.addAll(interaction.keySet());
+
+      Answer<Void> answer = new Answer<Void>() {
+        @Override
+        public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
+          try {
+            // Verify that the RetryingBlockFetcher requested the expected blocks.
+            String[] requestedBlockIds = (String[]) invocationOnMock.getArguments()[0];
+            String[] desiredBlockIds = interaction.keySet().toArray(new String[interaction.size()]);
+            assertArrayEquals(desiredBlockIds, requestedBlockIds);
+
+            // Now actually invoke the success/failure callbacks on each block.
+            BlockFetchingListener retryListener =
+              (BlockFetchingListener) invocationOnMock.getArguments()[1];
+            for (Map.Entry<String, Object> block : interaction.entrySet()) {
+              String blockId = block.getKey();
+              Object blockValue = block.getValue();
+
+              if (blockValue instanceof ManagedBuffer) {
+                retryListener.onBlockFetchSuccess(blockId, (ManagedBuffer) blockValue);
+              } else if (blockValue instanceof Exception) {
+                retryListener.onBlockFetchFailure(blockId, (Exception) blockValue);
+              } else {
+                fail("Can only handle ManagedBuffers and Exceptions, got " + blockValue);
+              }
+            }
+            return null;
+          } catch (Throwable e) {
+            e.printStackTrace();
+            throw e;
+          }
+        }
+      };
+
+      // This is either the first stub, or should be chained behind the prior ones.
+      if (stub == null) {
+        stub = doAnswer(answer);
+      } else {
+        stub.doAnswer(answer);
+      }
+    }
+
+    assert stub != null;
+    stub.when(fetchStarter).createAndStart((String[]) any(), (BlockFetchingListener) anyObject());
+    String[] blockIdArray = blockIds.toArray(new String[blockIds.size()]);
+    new RetryingBlockFetcher(conf, fetchStarter, blockIdArray, listener).start();
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
new file mode 100644
index 0000000000000..76639114df5d9
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import com.google.common.io.Files;
+
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+
+/**
+ * Manages some sort- and hash-based shuffle data, including the creation
+ * and cleanup of directories that can be read by the {@link ExternalShuffleBlockManager}.
+ */
+public class TestShuffleDataContext {
+  public final String[] localDirs;
+  public final int subDirsPerLocalDir;
+
+  public TestShuffleDataContext(int numLocalDirs, int subDirsPerLocalDir) {
+    this.localDirs = new String[numLocalDirs];
+    this.subDirsPerLocalDir = subDirsPerLocalDir;
+  }
+
+  public void create() {
+    for (int i = 0; i < localDirs.length; i ++) {
+      localDirs[i] = Files.createTempDir().getAbsolutePath();
+
+      for (int p = 0; p < subDirsPerLocalDir; p ++) {
+        new File(localDirs[i], String.format("%02x", p)).mkdirs();
+      }
+    }
+  }
+
+  public void cleanup() {
+    for (String localDir : localDirs) {
+      deleteRecursively(new File(localDir));
+    }
+  }
+
+  /** Creates reducer blocks in a sort-based data format within our local dirs. */
+  public void insertSortShuffleData(int shuffleId, int mapId, byte[][] blocks) throws IOException {
+    String blockId = "shuffle_" + shuffleId + "_" + mapId + "_0";
+
+    OutputStream dataStream = new FileOutputStream(
+      ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId + ".data"));
+    DataOutputStream indexStream = new DataOutputStream(new FileOutputStream(
+      ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId + ".index")));
+
+    long offset = 0;
+    indexStream.writeLong(offset);
+    for (byte[] block : blocks) {
+      offset += block.length;
+      dataStream.write(block);
+      indexStream.writeLong(offset);
+    }
+
+    dataStream.close();
+    indexStream.close();
+  }
+
+  /** Creates reducer blocks in a hash-based data format within our local dirs. */
+  public void insertHashShuffleData(int shuffleId, int mapId, byte[][] blocks) throws IOException {
+    for (int i = 0; i < blocks.length; i ++) {
+      String blockId = "shuffle_" + shuffleId + "_" + mapId + "_" + i;
+      Files.write(blocks[i],
+        ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId));
+    }
+  }
+
+  /**
+   * Creates an ExecutorShuffleInfo object based on the given shuffle manager which targets this
+   * context's directories.
+   */
+  public ExecutorShuffleInfo createExecutorInfo(String shuffleManager) {
+    return new ExecutorShuffleInfo(localDirs, subDirsPerLocalDir, shuffleManager);
+  }
+
+  private static void deleteRecursively(File f) {
+    assert f != null;
+    if (f.isDirectory()) {
+      File[] children = f.listFiles();
+      if (children != null) {
+        for (File child : children) {
+          deleteRecursively(child);
+        }
+      }
+    }
+    f.delete();
+  }
+}
diff --git a/network/yarn/pom.xml b/network/yarn/pom.xml
new file mode 100644
index 0000000000000..7845011ec3200
--- /dev/null
+++ b/network/yarn/pom.xml
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>1.2.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-network-yarn_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project YARN Shuffle Service</name>
+  <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>network-yarn</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <!-- Core dependencies -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-network-shuffle_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <!-- Provided dependencies -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <scope>provided</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-${project.version}-yarn-shuffle.jar</outputFile>
+          <artifactSet>
+            <includes>
+              <include>*:*</include>
+            </includes>
+          </artifactSet>
+          <filters>
+            <filter>
+              <artifact>*:*</artifact>
+              <excludes>
+                <exclude>META-INF/*.SF</exclude>
+                <exclude>META-INF/*.DSA</exclude>
+                <exclude>META-INF/*.RSA</exclude>
+              </excludes>
+            </filter>
+          </filters>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/network/yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/network/yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
new file mode 100644
index 0000000000000..a34aabe9e78a6
--- /dev/null
+++ b/network/yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.yarn;
+
+import java.lang.Override;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.server.api.AuxiliaryService;
+import org.apache.hadoop.yarn.server.api.ApplicationInitializationContext;
+import org.apache.hadoop.yarn.server.api.ApplicationTerminationContext;
+import org.apache.hadoop.yarn.server.api.ContainerInitializationContext;
+import org.apache.hadoop.yarn.server.api.ContainerTerminationContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.sasl.SaslRpcHandler;
+import org.apache.spark.network.sasl.ShuffleSecretManager;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler;
+import org.apache.spark.network.util.TransportConf;
+import org.apache.spark.network.yarn.util.HadoopConfigProvider;
+
+/**
+ * An external shuffle service used by Spark on Yarn.
+ *
+ * This is intended to be a long-running auxiliary service that runs in the NodeManager process.
+ * A Spark application may connect to this service by setting `spark.shuffle.service.enabled`.
+ * The application also automatically derives the service port through `spark.shuffle.service.port`
+ * specified in the Yarn configuration. This is so that both the clients and the server agree on
+ * the same port to communicate on.
+ *
+ * The service also optionally supports authentication. This ensures that executors from one
+ * application cannot read the shuffle files written by those from another. This feature can be
+ * enabled by setting `spark.authenticate` in the Yarn configuration before starting the NM.
+ * Note that the Spark application must also set `spark.authenticate` manually and, unlike in
+ * the case of the service port, will not inherit this setting from the Yarn configuration. This
+ * is because an application running on the same Yarn cluster may choose to not use the external
+ * shuffle service, in which case its setting of `spark.authenticate` should be independent of
+ * the service's.
+ */
+public class YarnShuffleService extends AuxiliaryService {
+  private final Logger logger = LoggerFactory.getLogger(YarnShuffleService.class);
+
+  // Port on which the shuffle server listens for fetch requests
+  private static final String SPARK_SHUFFLE_SERVICE_PORT_KEY = "spark.shuffle.service.port";
+  private static final int DEFAULT_SPARK_SHUFFLE_SERVICE_PORT = 7337;
+
+  // Whether the shuffle server should authenticate fetch requests
+  private static final String SPARK_AUTHENTICATE_KEY = "spark.authenticate";
+  private static final boolean DEFAULT_SPARK_AUTHENTICATE = false;
+
+  // An entity that manages the shuffle secret per application
+  // This is used only if authentication is enabled
+  private ShuffleSecretManager secretManager;
+
+  // The actual server that serves shuffle files
+  private TransportServer shuffleServer = null;
+
+  public YarnShuffleService() {
+    super("spark_shuffle");
+    logger.info("Initializing YARN shuffle service for Spark");
+  }
+
+  /**
+   * Return whether authentication is enabled as specified by the configuration.
+   * If so, fetch requests will fail unless the appropriate authentication secret
+   * for the application is provided.
+   */
+  private boolean isAuthenticationEnabled() {
+    return secretManager != null;
+  }
+
+  /**
+   * Start the shuffle server with the given configuration.
+   */
+  @Override
+  protected void serviceInit(Configuration conf) {
+    TransportConf transportConf = new TransportConf(new HadoopConfigProvider(conf));
+    // If authentication is enabled, set up the shuffle server to use a
+    // special RPC handler that filters out unauthenticated fetch requests
+    boolean authEnabled = conf.getBoolean(SPARK_AUTHENTICATE_KEY, DEFAULT_SPARK_AUTHENTICATE);
+    RpcHandler rpcHandler = new ExternalShuffleBlockHandler(transportConf);
+    if (authEnabled) {
+      secretManager = new ShuffleSecretManager();
+      rpcHandler = new SaslRpcHandler(rpcHandler, secretManager);
+    }
+
+    int port = conf.getInt(
+      SPARK_SHUFFLE_SERVICE_PORT_KEY, DEFAULT_SPARK_SHUFFLE_SERVICE_PORT);
+    TransportContext transportContext = new TransportContext(transportConf, rpcHandler);
+    shuffleServer = transportContext.createServer(port);
+    String authEnabledString = authEnabled ? "enabled" : "not enabled";
+    logger.info("Started YARN shuffle service for Spark on port {}. " +
+      "Authentication is {}.", port, authEnabledString);
+  }
+
+  @Override
+  public void initializeApplication(ApplicationInitializationContext context) {
+    String appId = context.getApplicationId().toString();
+    try {
+      ByteBuffer shuffleSecret = context.getApplicationDataForService();
+      logger.info("Initializing application {}", appId);
+      if (isAuthenticationEnabled()) {
+        secretManager.registerApp(appId, shuffleSecret);
+      }
+    } catch (Exception e) {
+      logger.error("Exception when initializing application {}", appId, e);
+    }
+  }
+
+  @Override
+  public void stopApplication(ApplicationTerminationContext context) {
+    String appId = context.getApplicationId().toString();
+    try {
+      logger.info("Stopping application {}", appId);
+      if (isAuthenticationEnabled()) {
+        secretManager.unregisterApp(appId);
+      }
+    } catch (Exception e) {
+      logger.error("Exception when stopping application {}", appId, e);
+    }
+  }
+
+  @Override
+  public void initializeContainer(ContainerInitializationContext context) {
+    ContainerId containerId = context.getContainerId();
+    logger.info("Initializing container {}", containerId);
+  }
+
+  @Override
+  public void stopContainer(ContainerTerminationContext context) {
+    ContainerId containerId = context.getContainerId();
+    logger.info("Stopping container {}", containerId);
+  }
+
+  /**
+   * Close the shuffle server to clean up any associated state.
+   */
+  @Override
+  protected void serviceStop() {
+    try {
+      if (shuffleServer != null) {
+        shuffleServer.close();
+      }
+    } catch (Exception e) {
+      logger.error("Exception when stopping service", e);
+    }
+  }
+
+  // Not currently used
+  @Override
+  public ByteBuffer getMetaData() {
+    return ByteBuffer.allocate(0);
+  }
+
+}
diff --git a/network/yarn/src/main/java/org/apache/spark/network/yarn/util/HadoopConfigProvider.java b/network/yarn/src/main/java/org/apache/spark/network/yarn/util/HadoopConfigProvider.java
new file mode 100644
index 0000000000000..884861752e80d
--- /dev/null
+++ b/network/yarn/src/main/java/org/apache/spark/network/yarn/util/HadoopConfigProvider.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.yarn.util;
+
+import java.util.NoSuchElementException;
+
+import org.apache.hadoop.conf.Configuration;
+
+import org.apache.spark.network.util.ConfigProvider;
+
+/** Use the Hadoop configuration to obtain config values. */
+public class HadoopConfigProvider extends ConfigProvider {
+  private final Configuration conf;
+
+  public HadoopConfigProvider(Configuration conf) {
+    this.conf = conf;
+  }
+
+  @Override
+  public String get(String name) {
+    String value = conf.get(name);
+    if (value == null) {
+      throw new NoSuchElementException(name);
+    }
+    return value;
+  }
+}
diff --git a/pom.xml b/pom.xml
index 05f76d566e9d1..639ea22a1fda3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent</artifactId>
-  <version>1.1.0-SNAPSHOT</version>
+  <version>1.2.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
@@ -91,59 +91,72 @@
     <module>graphx</module>
     <module>mllib</module>
     <module>tools</module>
+    <module>network/common</module>
+    <module>network/shuffle</module>
     <module>streaming</module>
     <module>sql/catalyst</module>
     <module>sql/core</module>
     <module>sql/hive</module>
-    <module>repl</module>
     <module>assembly</module>
     <module>external/twitter</module>
-    <module>external/kafka</module>
     <module>external/flume</module>
-    <module>external/zeromq</module>
+    <module>external/flume-sink</module>
     <module>external/mqtt</module>
+    <module>external/zeromq</module>
     <module>examples</module>
+    <module>repl</module>
   </modules>
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-
+    <akka.group>org.spark-project.akka</akka.group>
+    <akka.version>2.3.4-spark</akka.version>
     <java.version>1.6</java.version>
-
-    <scala.version>2.10.4</scala.version>
-    <scala.binary.version>2.10</scala.binary.version>
+    <sbt.project.name>spark</sbt.project.name>
+    <scala.macros.version>2.0.1</scala.macros.version>
     <mesos.version>0.18.1</mesos.version>
     <mesos.classifier>shaded-protobuf</mesos.classifier>
-    <akka.group>org.spark-project.akka</akka.group>
-    <akka.version>2.2.3-shaded-protobuf</akka.version>
     <slf4j.version>1.7.5</slf4j.version>
     <log4j.version>1.2.17</log4j.version>
     <hadoop.version>1.0.4</hadoop.version>
     <protobuf.version>2.4.1</protobuf.version>
     <yarn.version>${hadoop.version}</yarn.version>
     <hbase.version>0.94.6</hbase.version>
+    <flume.version>1.4.0</flume.version>
     <zookeeper.version>3.4.5</zookeeper.version>
-    <hive.version>0.12.0</hive.version>
-    <parquet.version>1.4.3</parquet.version>
+    <!-- Version used in Maven Hive dependency -->
+    <hive.version>0.13.1a</hive.version>
+    <!-- Version used for internal directory structure -->
+    <hive.version.short>0.13.1</hive.version.short>
+    <derby.version>10.10.1.1</derby.version>
+    <parquet.version>1.6.0rc3</parquet.version>
     <jblas.version>1.2.3</jblas.version>
     <jetty.version>8.1.14.v20131031</jetty.version>
-    <chill.version>0.3.6</chill.version>
+    <chill.version>0.5.0</chill.version>
     <codahale.metrics.version>3.0.0</codahale.metrics.version>
     <avro.version>1.7.6</avro.version>
+    <avro.mapred.classifier></avro.mapred.classifier>
     <jets3t.version>0.7.1</jets3t.version>
-
+    <aws.java.sdk.version>1.8.3</aws.java.sdk.version>
+    <aws.kinesis.client.version>1.1.0</aws.kinesis.client.version>
+    <commons.httpclient.version>4.2.6</commons.httpclient.version>
+    <commons.math3.version>3.1.1</commons.math3.version>
+    <test_classpath_file>${project.build.directory}/spark-test-classpath.txt</test_classpath_file>
     <PermGen>64m</PermGen>
     <MaxPermGen>512m</MaxPermGen>
+    <scala.version>2.10.4</scala.version>
+    <scala.binary.version>2.10</scala.binary.version>
+    <jline.version>${scala.version}</jline.version>
+    <jline.groupid>org.scala-lang</jline.groupid>
   </properties>
 
   <repositories>
     <repository>
-      <id>maven-repo</id>
+      <id>central</id>
       <!-- This should be at top, it makes maven try the central repo first and then others and hence faster dep resolution -->
       <name>Maven Repository</name>
-      <!-- HTTPS is unavailable for Maven Central -->
-      <url>http://repo.maven.apache.org/maven2</url>
+      <url>https://repo1.maven.org/maven2</url>
       <releases>
         <enabled>true</enabled>
       </releases>
@@ -209,7 +222,31 @@
     <repository>
       <id>spring-releases</id>
       <name>Spring Release Repository</name>
-      <url>http://repo.spring.io/libs-release</url>
+      <url>https://repo.spring.io/libs-release</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+    <repository>
+      <!-- This is temporarily included to fix issues with Hive 0.12 -->
+      <id>spark-staging</id>
+      <name>Spring Staging Repository</name>
+      <url>https://oss.sonatype.org/content/repositories/orgspark-project-1085</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+    <repository>
+      <!-- This is temporarily included to fix issues with Hive 0.13 -->
+      <id>spark-staging-hive13</id>
+      <name>Spring Staging Repository Hive 13</name>
+      <url>https://oss.sonatype.org/content/repositories/orgspark-project-1089/</url>
       <releases>
         <enabled>true</enabled>
       </releases>
@@ -218,9 +255,78 @@
       </snapshots>
     </repository>
   </repositories>
+  <pluginRepositories>
+    <pluginRepository>
+      <id>central</id>
+      <url>https://repo1.maven.org/maven2</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </pluginRepository>
+  </pluginRepositories>
 
+  <dependencies>
+  <!-- 
+       This is a dummy dependency that is used along with the shading plug-in
+       to create effective poms on publishing (see SPARK-3812).
+  -->
+    <dependency>
+      <groupId>org.spark-project.spark</groupId>
+      <artifactId>unused</artifactId>
+      <version>1.0.0</version>
+    </dependency>
+    <!-- 
+         This depndency has been added to provided scope as it is needed for excuting build
+         specific groovy scripts using gmaven+ and not required for downstream project building
+         with spark.
+    -->
+    <dependency>
+      <groupId>org.codehaus.groovy</groupId>
+      <artifactId>groovy-all</artifactId>
+      <version>2.3.7</version>
+      <scope>provided</scope>
+    </dependency>
+  </dependencies>
   <dependencyManagement>
     <dependencies>
+      <dependency>
+        <groupId>${jline.groupid}</groupId>
+        <artifactId>jline</artifactId>
+        <version>${jline.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>chill_${scala.binary.version}</artifactId>
+        <version>${chill.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm-commons</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>chill-java</artifactId>
+        <version>${chill.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm-commons</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
       <dependency>
         <groupId>org.eclipse.jetty</groupId>
         <artifactId>jetty-util</artifactId>
@@ -245,6 +351,7 @@
         <groupId>com.google.guava</groupId>
         <artifactId>guava</artifactId>
         <version>14.0.1</version>
+        <scope>provided</scope>
       </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
@@ -252,15 +359,26 @@
         <version>3.3.2</version>
       </dependency>
       <dependency>
-	  <groupId>commons-codec</groupId>
-	    <artifactId>commons-codec</artifactId>
-	    <version>1.5</version>
+        <groupId>commons-codec</groupId>
+        <artifactId>commons-codec</artifactId>
+        <version>1.5</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-math3</artifactId>
+        <version>${commons.math3.version}</version>
       </dependency>
       <dependency>
         <groupId>com.google.code.findbugs</groupId>
         <artifactId>jsr305</artifactId>
         <version>1.3.9</version>
       </dependency>
+      <dependency>
+        <groupId>org.seleniumhq.selenium</groupId>
+        <artifactId>selenium-java</artifactId>
+        <version>2.42.2</version>
+        <scope>test</scope>
+      </dependency>
       <dependency>
         <groupId>org.slf4j</groupId>
         <artifactId>slf4j-api</artifactId>
@@ -295,7 +413,12 @@
       <dependency>
         <groupId>org.xerial.snappy</groupId>
         <artifactId>snappy-java</artifactId>
-        <version>1.0.5</version>
+        <version>1.1.1.6</version>
+      </dependency>
+      <dependency>
+        <groupId>net.jpountz.lz4</groupId>
+        <artifactId>lz4</artifactId>
+        <version>1.2.0</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>
@@ -319,36 +442,6 @@
         <artifactId>protobuf-java</artifactId>
         <version>${protobuf.version}</version>
       </dependency>
-      <dependency>
-        <groupId>com.twitter</groupId>
-        <artifactId>chill_${scala.binary.version}</artifactId>
-        <version>${chill.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm-commons</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>com.twitter</groupId>
-        <artifactId>chill-java</artifactId>
-        <version>${chill.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm-commons</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
       <dependency>
         <groupId>${akka.group}</groupId>
         <artifactId>akka-actor_${scala.binary.version}</artifactId>
@@ -369,11 +462,6 @@
         <artifactId>akka-testkit_${scala.binary.version}</artifactId>
         <version>${akka.version}</version>
       </dependency>
-      <dependency>
-        <groupId>colt</groupId>
-        <artifactId>colt</artifactId>
-        <version>1.2.0</version>
-      </dependency>
       <dependency>
         <groupId>org.apache.mesos</groupId>
         <artifactId>mesos</artifactId>
@@ -386,6 +474,11 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>org.roaringbitmap</groupId>
+        <artifactId>RoaringBitmap</artifactId>
+        <version>0.4.5</version>
+      </dependency>
       <dependency>
         <groupId>commons-net</groupId>
         <artifactId>commons-net</artifactId>
@@ -394,12 +487,12 @@
       <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty-all</artifactId>
-        <version>4.0.17.Final</version>
+        <version>4.0.23.Final</version>
       </dependency>
       <dependency>
         <groupId>org.apache.derby</groupId>
         <artifactId>derby</artifactId>
-        <version>10.4.2.0</version>
+        <version>${derby.version}</version>
       </dependency>
       <dependency>
         <groupId>com.codahale.metrics</groupId>
@@ -436,11 +529,6 @@
         <artifactId>scala-reflect</artifactId>
         <version>${scala.version}</version>
       </dependency>
-      <dependency>
-        <groupId>org.scala-lang</groupId>
-        <artifactId>jline</artifactId>
-        <version>${scala.version}</version>
-      </dependency>
       <dependency>
         <groupId>org.scala-lang</groupId>
         <artifactId>scala-library</artifactId>
@@ -459,7 +547,7 @@
       <dependency>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest_${scala.binary.version}</artifactId>
-        <version>2.1.5</version>
+        <version>2.2.1</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -535,6 +623,10 @@
             <groupId>org.mortbay.jetty</groupId>
             <artifactId>servlet-api-2.5</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
@@ -577,6 +669,7 @@
         <groupId>org.apache.avro</groupId>
         <artifactId>avro-mapred</artifactId>
         <version>${avro.version}</version>
+        <classifier>${avro.mapred.classifier}</classifier>
         <exclusions>
           <exclusion>
             <groupId>io.netty</groupId>
@@ -605,7 +698,6 @@
         <groupId>net.java.dev.jets3t</groupId>
         <artifactId>jets3t</artifactId>
         <version>${jets3t.version}</version>
-        <scope>runtime</scope>
         <exclusions>
           <exclusion>
             <groupId>commons-logging</groupId>
@@ -618,6 +710,10 @@
         <artifactId>hadoop-yarn-api</artifactId>
         <version>${yarn.version}</version>
         <exclusions>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>asm</groupId>
             <artifactId>asm</artifactId>
@@ -663,6 +759,35 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-server-tests</artifactId>
+        <version>${yarn.version}</version>
+        <classifier>tests</classifier>
+        <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-yarn-server-web-proxy</artifactId>
@@ -760,7 +885,7 @@
         <plugin>
           <groupId>net.alchim31.maven</groupId>
           <artifactId>scala-maven-plugin</artifactId>
-          <version>3.1.6</version>
+          <version>3.2.0</version>
           <executions>
             <execution>
               <id>scala-compile-first</id>
@@ -792,7 +917,6 @@
               <arg>-unchecked</arg>
               <arg>-deprecation</arg>
               <arg>-feature</arg>
-              <arg>-language:postfixOps</arg>
             </args>
             <jvmArgs>
               <jvmArg>-Xms1024m</jvmArg>
@@ -806,6 +930,15 @@
               <javacArg>-target</javacArg>
               <javacArg>${java.version}</javacArg>
             </javacArgs>
+            <!-- The following plugin is required to use quasiquotes in Scala 2.10 and is used
+                 by Spark SQL for code generation. -->
+            <compilerPlugins>
+              <compilerPlugin>
+                <groupId>org.scalamacros</groupId>
+                <artifactId>paradise_${scala.version}</artifactId>
+                <version>${scala.macros.version}</version>
+              </compilerPlugin>
+            </compilerPlugins>
           </configuration>
         </plugin>
         <plugin>
@@ -832,17 +965,20 @@
         <plugin>
           <groupId>org.scalatest</groupId>
           <artifactId>scalatest-maven-plugin</artifactId>
-          <version>1.0-RC2</version>
+          <version>1.0</version>
           <configuration>
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
-            <filereports>${project.build.directory}/SparkTestSuite.txt</filereports>
+            <filereports>SparkTestSuite.txt</filereports>
             <argLine>-Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
             <stderr/>
-            <environmentVariables>
-              <SPARK_HOME>${session.executionRootDirectory}</SPARK_HOME>
-              <SPARK_TESTING>1</SPARK_TESTING>
-            </environmentVariables>
+            <systemProperties>
+              <java.awt.headless>true</java.awt.headless>
+              <spark.test.home>${session.executionRootDirectory}</spark.test.home>
+              <spark.testing>1</spark.testing>
+              <spark.ui.enabled>false</spark.ui.enabled>
+              <spark.executor.extraClassPath>${test_classpath}</spark.executor.extraClassPath>
+            </systemProperties>
           </configuration>
           <executions>
             <execution>
@@ -903,6 +1039,68 @@
     </pluginManagement>
 
     <plugins>
+      <!-- This plugin dumps the test classpath into a file -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <version>2.9</version>
+        <executions>
+          <execution>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>build-classpath</goal>
+            </goals>
+            <configuration>
+              <includeScope>test</includeScope>
+              <outputFile>${test_classpath_file}</outputFile>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- This plugin reads a file into maven property. And it lets us write groovy !! -->
+      <plugin>
+        <groupId>org.codehaus.gmavenplus</groupId>
+        <artifactId>gmavenplus-plugin</artifactId>
+        <version>1.2</version>
+        <executions>
+          <execution>
+            <phase>process-test-classes</phase>
+            <goals>
+              <goal>execute</goal>
+            </goals>
+            <configuration>
+              <scripts>
+                <script><![CDATA[
+                def file = new File(project.properties.test_classpath_file)
+                project.properties.test_classpath = file.getText().split().join(":")
+                ]]></script>
+              </scripts>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <!-- The shade plug-in is used here to create effective pom's (see SPARK-3812). -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <artifactSet>
+            <includes>
+              <include>org.spark-project.spark:unused</include>
+            </includes>
+          </artifactSet>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-enforcer-plugin</artifactId>
@@ -945,11 +1143,50 @@
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-source-plugin</artifactId>
       </plugin>
+      <plugin>
+        <groupId>org.scalastyle</groupId>
+        <artifactId>scalastyle-maven-plugin</artifactId>
+        <version>0.4.0</version>
+        <configuration>
+          <verbose>false</verbose>
+          <failOnViolation>true</failOnViolation>
+          <includeTestSourceDirectory>false</includeTestSourceDirectory>
+          <failOnWarning>false</failOnWarning>
+          <sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
+          <testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
+          <configLocation>scalastyle-config.xml</configLocation>
+          <outputFile>scalastyle-output.xml</outputFile>
+          <outputEncoding>UTF-8</outputEncoding>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 
   <profiles>
 
+    <!--
+      This profile is enabled automatically by the sbt built. It changes the scope for the guava
+      dependency, since we don't shade it in the artifacts generated by the sbt build.
+    -->
+    <profile>
+      <id>sbt</id>
+      <dependencies>
+        <dependency>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+          <scope>compile</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+
     <!-- Ganglia integration is not included by default due to LGPL-licensed code -->
     <profile>
       <id>spark-ganglia-lgpl</id>
@@ -958,6 +1195,14 @@
       </modules>
     </profile>
 
+    <!-- Kinesis integration is not included by default due to ASL-licensed code -->
+    <profile>
+      <id>kinesis-asl</id>
+      <modules>
+        <module>extras/kinesis-asl</module>
+      </modules>
+    </profile>
+
     <profile>
       <id>java8-tests</id>
       <build>
@@ -985,6 +1230,10 @@
 
     <!-- A series of build profiles where customizations for particular Hadoop releases can be made -->
 
+    <!-- Hadoop-a.b.c dependencies can be found at
+    http://hadoop.apache.org/docs/ra.b.c/hadoop-project-dist/hadoop-common/dependency-analysis.html
+    -->
+
     <profile>
       <id>hadoop-0.23</id>
       <!-- SPARK-1121: Adds an explicit dependency on Avro to work around a Hadoop 0.23.X issue -->
@@ -1004,6 +1253,7 @@
       <properties>
         <hadoop.version>2.2.0</hadoop.version>
         <protobuf.version>2.5.0</protobuf.version>
+        <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
       </properties>
     </profile>
 
@@ -1013,6 +1263,8 @@
         <hadoop.version>2.3.0</hadoop.version>
         <protobuf.version>2.5.0</protobuf.version>
         <jets3t.version>0.9.0</jets3t.version>
+        <commons.math3.version>3.1.1</commons.math3.version>
+        <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
       </properties>
     </profile>
 
@@ -1022,6 +1274,8 @@
         <hadoop.version>2.4.0</hadoop.version>
         <protobuf.version>2.5.0</protobuf.version>
         <jets3t.version>0.9.0</jets3t.version>
+        <commons.math3.version>3.1.1</commons.math3.version>
+        <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
       </properties>
     </profile>
 
@@ -1036,28 +1290,51 @@
       <id>yarn</id>
       <modules>
         <module>yarn</module>
+        <module>network/yarn</module>
       </modules>
     </profile>
 
     <profile>
-      <id>mapr</id>
-      <activation>
-        <activeByDefault>false</activeByDefault>
-      </activation>
+      <id>mapr3</id>
       <properties>
         <hadoop.version>1.0.3-mapr-3.0.3</hadoop.version>
-        <yarn.version>2.3.0-mapr-4.0.0-beta</yarn.version>
-        <hbase.version>0.94.17-mapr-1403</hbase.version>
-        <zookeeper.version>3.4.5-mapr-1401</zookeeper.version>
+        <yarn.version>2.3.0-mapr-4.0.0-FCS</yarn.version>
+        <hbase.version>0.94.17-mapr-1405</hbase.version>
+        <zookeeper.version>3.4.5-mapr-1406</zookeeper.version>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>mapr4</id>
+      <properties>
+        <hadoop.version>2.3.0-mapr-4.0.0-FCS</hadoop.version>
+        <yarn.version>2.3.0-mapr-4.0.0-FCS</yarn.version>
+        <hbase.version>0.94.17-mapr-1405-4.0.0-FCS</hbase.version>
+        <zookeeper.version>3.4.5-mapr-1406</zookeeper.version>
       </properties>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.curator</groupId>
+          <artifactId>curator-recipes</artifactId>
+          <version>2.4.0</version>
+          <exclusions>
+            <exclusion>
+              <groupId>org.apache.zookeeper</groupId>
+              <artifactId>zookeeper</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.zookeeper</groupId>
+          <artifactId>zookeeper</artifactId>
+          <version>3.4.5-mapr-1406</version>
+        </dependency>
+      </dependencies>
     </profile>
 
     <!-- Build without Hadoop dependencies that are included in some runtime environments. -->
     <profile>
       <id>hadoop-provided</id>
-      <activation>
-        <activeByDefault>false</activeByDefault>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
@@ -1102,6 +1379,57 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>hive-thriftserver</id>
+      <modules>
+        <module>sql/hive-thriftserver</module>
+      </modules>
+    </profile>
+    <profile>
+      <id>hive-0.12.0</id>
+      <properties>
+        <hive.version>0.12.0-protobuf-2.5</hive.version>
+        <hive.version.short>0.12.0</hive.version.short>
+        <derby.version>10.4.2.0</derby.version>
+      </properties>
+    </profile>
+    <profile>
+      <id>hive-0.13.1</id>
+      <properties>
+        <hive.version>0.13.1a</hive.version>
+        <hive.version.short>0.13.1</hive.version.short>
+        <derby.version>10.10.1.1</derby.version>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>scala-2.10</id>
+      <activation>
+        <property><name>!scala-2.11</name></property>
+      </activation>
+      <properties>
+        <scala.version>2.10.4</scala.version>
+        <scala.binary.version>2.10</scala.binary.version>
+        <jline.version>${scala.version}</jline.version>
+        <jline.groupid>org.scala-lang</jline.groupid>
+      </properties>
+      <modules>
+        <module>external/kafka</module>
+      </modules>
+    </profile>
+
+    <profile>
+      <id>scala-2.11</id>
+      <activation>
+        <property><name>scala-2.11</name></property>
+      </activation>
+      <properties>
+        <scala.version>2.11.2</scala.version>
+        <scala.binary.version>2.11</scala.binary.version>
+        <jline.version>2.12</jline.version>
+        <jline.groupid>jline</jline.groupid>
+      </properties>
+    </profile>
 
   </profiles>
 </project>
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index bb2d73741c3bf..d919b18e09855 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -15,18 +15,27 @@
  * limitations under the License.
  */
 
+import sbt._
+import sbt.Keys.version
+
 import com.typesafe.tools.mima.core._
 import com.typesafe.tools.mima.core.MissingClassProblem
 import com.typesafe.tools.mima.core.MissingTypesProblem
 import com.typesafe.tools.mima.core.ProblemFilters._
 import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact}
 import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
-import sbt._
+
 
 object MimaBuild {
 
   def excludeMember(fullName: String) = Seq(
       ProblemFilters.exclude[MissingMethodProblem](fullName),
+      // Sometimes excluded methods have default arguments and 
+      // they are translated into public methods/fields($default$) in generated
+      // bytecode. It is not possible to exhaustively list everything.
+      // But this should be okay.
+      ProblemFilters.exclude[MissingMethodProblem](fullName+"$default$2"),
+      ProblemFilters.exclude[MissingMethodProblem](fullName+"$default$1"),
       ProblemFilters.exclude[MissingFieldProblem](fullName),
       ProblemFilters.exclude[IncompatibleResultTypeProblem](fullName),
       ProblemFilters.exclude[IncompatibleMethTypeProblem](fullName),
@@ -53,7 +62,7 @@ object MimaBuild {
     excludePackage("org.apache.spark." + packageName)
   }
 
-  def ignoredABIProblems(base: File) = {
+  def ignoredABIProblems(base: File, currentSparkVersion: String) = {
 
     // Excludes placed here will be used for all Spark versions
     val defaultExcludes = Seq()
@@ -77,11 +86,16 @@ object MimaBuild {
     }
 
     defaultExcludes ++ ignoredClasses.flatMap(excludeClass) ++
-    ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes
+    ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes(currentSparkVersion)
+  }
+
+  def mimaSettings(sparkHome: File, projectRef: ProjectRef) = {
+    val organization = "org.apache.spark"
+    val previousSparkVersion = "1.1.0"
+    val fullId = "spark-" + projectRef.project + "_2.10"
+    mimaDefaultSettings ++ 
+    Seq(previousArtifact := Some(organization % fullId % previousSparkVersion),
+      binaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value))
   }
 
-  def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq(
-    previousArtifact := None,
-    binaryIssueFilters ++= ignoredABIProblems(sparkHome)
-  )
 }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 1621833e124f5..8a2a865867fc4 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -31,8 +31,66 @@ import com.typesafe.tools.mima.core._
  * MimaBuild.excludeSparkClass("graphx.util.collection.GraphXPrimitiveKeyOpenHashMap")
  */
 object MimaExcludes {
-    val excludes =
-      SparkBuild.SPARK_VERSION match {
+    def excludes(version: String) =
+      version match {
+        case v if v.startsWith("1.2") =>
+          Seq(
+            MimaBuild.excludeSparkPackage("deploy"),
+            MimaBuild.excludeSparkPackage("graphx")
+          ) ++
+          MimaBuild.excludeSparkClass("mllib.linalg.Matrix") ++
+          MimaBuild.excludeSparkClass("mllib.linalg.Vector") ++
+          Seq(
+            ProblemFilters.exclude[IncompatibleTemplateDefProblem](
+              "org.apache.spark.scheduler.TaskLocation"),
+            // Added normL1 and normL2 to trait MultivariateStatisticalSummary
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.stat.MultivariateStatisticalSummary.normL1"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.stat.MultivariateStatisticalSummary.normL2"),
+            // MapStatus should be private[spark]
+            ProblemFilters.exclude[IncompatibleTemplateDefProblem](
+              "org.apache.spark.scheduler.MapStatus"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.network.netty.PathResolver"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.network.netty.client.BlockClientListener"),
+
+            // TaskContext was promoted to Abstract class
+            ProblemFilters.exclude[AbstractClassProblem](
+              "org.apache.spark.TaskContext"),
+            ProblemFilters.exclude[IncompatibleTemplateDefProblem](
+              "org.apache.spark.util.collection.SortDataFormat")
+          ) ++ Seq(
+            // Adding new methods to the JavaRDDLike trait:
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.api.java.JavaRDDLike.takeAsync"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.api.java.JavaRDDLike.foreachPartitionAsync"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.api.java.JavaRDDLike.countAsync"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.api.java.JavaRDDLike.foreachAsync"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.api.java.JavaRDDLike.collectAsync")
+          ) ++ Seq(
+            // SPARK-3822
+            ProblemFilters.exclude[IncompatibleResultTypeProblem](
+              "org.apache.spark.SparkContext.org$apache$spark$SparkContext$$createTaskScheduler")
+          ) ++ Seq(
+            // SPARK-1209
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.hadoop.mapreduce.SparkHadoopMapReduceUtil"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.hadoop.mapred.SparkHadoopMapRedUtil"),
+            ProblemFilters.exclude[MissingTypesProblem](
+              "org.apache.spark.rdd.PairRDDFunctions")
+          ) ++ Seq(
+            // SPARK-4062
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.streaming.kafka.KafkaReceiver#MessageHandler.this")
+          )
+
         case v if v.startsWith("1.1") =>
           Seq(
             MimaBuild.excludeSparkPackage("deploy"),
@@ -41,6 +99,9 @@ object MimaExcludes {
           Seq(
             // Adding new method to JavaRDLike trait - we should probably mark this as a developer API.
             ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitions"),
+            // Should probably mark this as Experimental
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.api.java.JavaRDDLike.foreachAsync"),
             // We made a mistake earlier (ed06500d3) in the Java API to use default parameter values
             // for countApproxDistinct* functions, which does not work in Java. We later removed
             // them, and use the following to tell Mima to not care about them.
@@ -59,10 +120,37 @@ object MimaExcludes {
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.api.java.JavaDoubleRDD.countApproxDistinct$default$1"),
             ProblemFilters.exclude[MissingMethodProblem](
-              "org.apache.spark.storage.MemoryStore.Entry"),
+              "org.apache.spark.storage.DiskStore.getValues"),
             ProblemFilters.exclude[MissingMethodProblem](
-              "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$"
-                + "createZero$1")
+              "org.apache.spark.storage.MemoryStore.Entry")
+          ) ++
+          Seq(
+            // Serializer interface change. See SPARK-3045.
+            ProblemFilters.exclude[IncompatibleTemplateDefProblem](
+              "org.apache.spark.serializer.DeserializationStream"),
+            ProblemFilters.exclude[IncompatibleTemplateDefProblem](
+              "org.apache.spark.serializer.Serializer"),
+            ProblemFilters.exclude[IncompatibleTemplateDefProblem](
+              "org.apache.spark.serializer.SerializationStream"),
+            ProblemFilters.exclude[IncompatibleTemplateDefProblem](
+              "org.apache.spark.serializer.SerializerInstance")
+          )++
+          Seq(
+            // Renamed putValues -> putArray + putIterator
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.storage.MemoryStore.putValues"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.storage.DiskStore.putValues"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.storage.TachyonStore.putValues")
+          ) ++
+          Seq(
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.streaming.flume.FlumeReceiver.this"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem](
+              "org.apache.spark.streaming.kafka.KafkaUtils.createStream"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem](
+              "org.apache.spark.streaming.kafka.KafkaReceiver.this")
           ) ++
           Seq( // Ignore some private methods in ALS.
             ProblemFilters.exclude[MissingMethodProblem](
@@ -70,14 +158,50 @@ object MimaExcludes {
             ProblemFilters.exclude[MissingMethodProblem]( // The only public constructor is the one without arguments.
               "org.apache.spark.mllib.recommendation.ALS.this"),
             ProblemFilters.exclude[MissingMethodProblem](
-              "org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$<init>$default$7")
+              "org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$<init>$default$7"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem](
+              "org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateFeatures")
           ) ++
+          MimaBuild.excludeSparkClass("mllib.linalg.distributed.ColumnStatisticsAggregator") ++
           MimaBuild.excludeSparkClass("rdd.ZippedRDD") ++
           MimaBuild.excludeSparkClass("rdd.ZippedPartition") ++
           MimaBuild.excludeSparkClass("util.SerializableHyperLogLog") ++
           MimaBuild.excludeSparkClass("storage.Values") ++
           MimaBuild.excludeSparkClass("storage.Entry") ++
-          MimaBuild.excludeSparkClass("storage.MemoryStore$Entry")
+          MimaBuild.excludeSparkClass("storage.MemoryStore$Entry") ++
+          // Class was missing "@DeveloperApi" annotation in 1.0.
+          MimaBuild.excludeSparkClass("scheduler.SparkListenerApplicationStart") ++
+          Seq(
+            ProblemFilters.exclude[IncompatibleMethTypeProblem](
+              "org.apache.spark.mllib.tree.impurity.Gini.calculate"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem](
+              "org.apache.spark.mllib.tree.impurity.Entropy.calculate"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem](
+              "org.apache.spark.mllib.tree.impurity.Variance.calculate")
+          ) ++
+          Seq( // Package-private classes removed in SPARK-2341
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.BinaryLabelParser"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.BinaryLabelParser$"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.LabelParser"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.LabelParser$"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.MulticlassLabelParser"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.MulticlassLabelParser$")
+          ) ++
+          Seq( // package-private classes removed in MLlib
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.regression.GeneralizedLinearAlgorithm.org$apache$spark$mllib$regression$GeneralizedLinearAlgorithm$$prependOne")
+          ) ++
+          Seq( // new Vector methods in MLlib (binary compatible assuming users do not implement Vector)
+            ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.copy")
+          ) ++
+          Seq( // synthetic methods generated in LabeledPoint
+            ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.mllib.regression.LabeledPoint$"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.regression.LabeledPoint.apply"),
+            ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.regression.LabeledPoint.toString")
+          ) ++
+          Seq ( // Scala 2.11 compatibility fix
+            ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.StreamingContext.<init>$default$2")
+          )
         case v if v.startsWith("1.0") =>
           Seq(
             MimaBuild.excludeSparkPackage("api.java"),
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 599714233c18f..c96a6c49545c1 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -15,498 +15,226 @@
  * limitations under the License.
  */
 
-import sbt._
-import sbt.Classpaths.publishTask
-import sbt.Keys._
-import sbtassembly.Plugin._
-import AssemblyKeys._
 import scala.util.Properties
-import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
-import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact
-import sbtunidoc.Plugin._
-import UnidocKeys._
-
 import scala.collection.JavaConversions._
 
-// For Sonatype publishing
-// import com.jsuereth.pgp.sbtplugin.PgpKeys._
-
-object SparkBuild extends Build {
-  val SPARK_VERSION = "1.1.0-SNAPSHOT"
-  val SPARK_VERSION_SHORT = SPARK_VERSION.replaceAll("-SNAPSHOT", "")
-
-  // Hadoop version to build against. For example, "1.0.4" for Apache releases, or
-  // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set
-  // through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN.
-  val DEFAULT_HADOOP_VERSION = "1.0.4"
-
-  // Whether the Hadoop version to build against is 2.2.x, or a variant of it. This can be set
-  // through the SPARK_IS_NEW_HADOOP environment variable.
-  val DEFAULT_IS_NEW_HADOOP = false
-
-  val DEFAULT_YARN = false
-
-  val DEFAULT_HIVE = false
-
-  // HBase version; set as appropriate.
-  val HBASE_VERSION = "0.94.6"
-
-  // Target JVM version
-  val SCALAC_JVM_VERSION = "jvm-1.6"
-  val JAVAC_JVM_VERSION = "1.6"
-
-  lazy val root = Project("root", file("."), settings = rootSettings) aggregate(allProjects: _*)
-
-  lazy val core = Project("core", file("core"), settings = coreSettings)
-
-  /** Following project only exists to pull previous artifacts of Spark for generating
-    Mima ignores. For more information see: SPARK 2071 */
-  lazy val oldDeps = Project("oldDeps", file("dev"), settings = oldDepsSettings)
-
-  def replDependencies = Seq[ProjectReference](core, graphx, bagel, mllib, sql) ++ maybeHiveRef
-
-  lazy val repl = Project("repl", file("repl"), settings = replSettings)
-    .dependsOn(replDependencies.map(a => a: sbt.ClasspathDep[sbt.ProjectReference]): _*)
-
-  lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming)
-
-  lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn(core)
-
-  lazy val graphx = Project("graphx", file("graphx"), settings = graphxSettings) dependsOn(core)
-
-  lazy val catalyst = Project("catalyst", file("sql/catalyst"), settings = catalystSettings) dependsOn(core)
-
-  lazy val sql = Project("sql", file("sql/core"), settings = sqlCoreSettings) dependsOn(core) dependsOn(catalyst % "compile->compile;test->test")
-
-  lazy val hive = Project("hive", file("sql/hive"), settings = hiveSettings) dependsOn(sql)
-
-  lazy val maybeHive: Seq[ClasspathDependency] = if (isHiveEnabled) Seq(hive) else Seq()
-  lazy val maybeHiveRef: Seq[ProjectReference] = if (isHiveEnabled) Seq(hive) else Seq()
+import sbt._
+import sbt.Classpaths.publishTask
+import sbt.Keys._
+import sbtunidoc.Plugin.genjavadocSettings
+import sbtunidoc.Plugin.UnidocKeys.unidocGenjavadocVersion
+import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys}
+import net.virtualvoid.sbt.graph.Plugin.graphSettings
+
+object BuildCommons {
+
+  private val buildLocation = file(".").getAbsoluteFile.getParentFile
+
+  val allProjects@Seq(bagel, catalyst, core, graphx, hive, hiveThriftServer, mllib, repl,
+    sql, networkCommon, networkShuffle, streaming, streamingFlumeSink, streamingFlume, streamingKafka,
+    streamingMqtt, streamingTwitter, streamingZeromq) =
+    Seq("bagel", "catalyst", "core", "graphx", "hive", "hive-thriftserver", "mllib", "repl",
+      "sql", "network-common", "network-shuffle", "streaming", "streaming-flume-sink",
+      "streaming-flume", "streaming-kafka", "streaming-mqtt", "streaming-twitter",
+      "streaming-zeromq").map(ProjectRef(buildLocation, _))
+
+  val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, java8Tests,
+    sparkGangliaLgpl, sparkKinesisAsl) = Seq("yarn", "yarn-stable", "yarn-alpha",
+    "java8-tests", "ganglia-lgpl", "kinesis-asl").map(ProjectRef(buildLocation, _))
+
+  val assemblyProjects@Seq(assembly, examples, networkYarn) =
+    Seq("assembly", "examples", "network-yarn").map(ProjectRef(buildLocation, _))
+
+  val tools = ProjectRef(buildLocation, "tools")
+  // Root project.
+  val spark = ProjectRef(buildLocation, "spark")
+  val sparkHome = buildLocation
+}
 
-  lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn(core)
+object SparkBuild extends PomBuild {
 
-  lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core)
+  import BuildCommons._
+  import scala.collection.mutable.Map
 
-  lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
-    .dependsOn(core, graphx, bagel, mllib, streaming, repl, sql) dependsOn(maybeYarn: _*) dependsOn(maybeHive: _*) dependsOn(maybeGanglia: _*)
+  val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty
 
-  lazy val assembleDepsTask = TaskKey[Unit]("assemble-deps")
-  lazy val assembleDeps = assembleDepsTask := {
-    println()
-    println("**** NOTE ****")
-    println("'sbt/sbt assemble-deps' is no longer supported.")
-    println("Instead create a normal assembly and:")
-    println("  export SPARK_PREPEND_CLASSES=1 (toggle on)")
-    println("  unset SPARK_PREPEND_CLASSES (toggle off)")
-    println()
+  // Provides compatibility for older versions of the Spark build
+  def backwardCompatibility = {
+    import scala.collection.mutable
+    var isAlphaYarn = false
+    var profiles: mutable.Seq[String] = mutable.Seq("sbt")
+    if (Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined) {
+      println("NOTE: SPARK_GANGLIA_LGPL is deprecated, please use -Pspark-ganglia-lgpl flag.")
+      profiles ++= Seq("spark-ganglia-lgpl")
+    }
+    if (Properties.envOrNone("SPARK_HIVE").isDefined) {
+      println("NOTE: SPARK_HIVE is deprecated, please use -Phive and -Phive-thriftserver flags.")
+      profiles ++= Seq("hive", "hive-thriftserver")
+    }
+    Properties.envOrNone("SPARK_HADOOP_VERSION") match {
+      case Some(v) =>
+        if (v.matches("0.23.*")) isAlphaYarn = true
+        println("NOTE: SPARK_HADOOP_VERSION is deprecated, please use -Dhadoop.version=" + v)
+        System.setProperty("hadoop.version", v)
+      case None =>
+    }
+    if (Properties.envOrNone("SPARK_YARN").isDefined) {
+      if(isAlphaYarn) {
+        println("NOTE: SPARK_YARN is deprecated, please use -Pyarn-alpha flag.")
+        profiles ++= Seq("yarn-alpha")
+      }
+      else {
+        println("NOTE: SPARK_YARN is deprecated, please use -Pyarn flag.")
+        profiles ++= Seq("yarn")
+      }
+    }
+    profiles
   }
 
-  // A configuration to set an alternative publishLocalConfiguration
-  lazy val MavenCompile = config("m2r") extend(Compile)
-  lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy")
-  val sparkHome = System.getProperty("user.dir")
-
-  // Allows build configuration to be set through environment variables
-  lazy val hadoopVersion = Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION)
-  lazy val isNewHadoop = Properties.envOrNone("SPARK_IS_NEW_HADOOP") match {
-    case None => {
-      val isNewHadoopVersion = "^2\\.[2-9]+".r.findFirstIn(hadoopVersion).isDefined
-      (isNewHadoopVersion|| DEFAULT_IS_NEW_HADOOP)
+  override val profiles = {
+    val profiles = Properties.envOrNone("SBT_MAVEN_PROFILES") match {
+    case None => backwardCompatibility
+    case Some(v) =>
+      if (backwardCompatibility.nonEmpty)
+        println("Note: We ignore environment variables, when use of profile is detected in " +
+          "conjunction with environment variable.")
+      v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
     }
-    case Some(v) => v.toBoolean
-  }
 
-  lazy val isYarnEnabled = Properties.envOrNone("SPARK_YARN") match {
-    case None => DEFAULT_YARN
-    case Some(v) => v.toBoolean
+    if (profiles.exists(_.contains("scala-"))) {
+      profiles
+    } else if (System.getProperty("scala-2.11") != null) {
+      profiles ++ Seq("scala-2.11")
+    } else {
+      println("Enabled default scala profile")
+      profiles ++ Seq("scala-2.10")
+    }
   }
-  lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client"
-  val maybeAvro = if (hadoopVersion.startsWith("0.23.")) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq()
 
-  lazy val isHiveEnabled = Properties.envOrNone("SPARK_HIVE") match {
-    case None => DEFAULT_HIVE
-    case Some(v) => v.toBoolean
+  Properties.envOrNone("SBT_MAVEN_PROPERTIES") match {
+    case Some(v) =>
+      v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.split("=")).foreach(x => System.setProperty(x(0), x(1)))
+    case _ =>
   }
 
-  // Include Ganglia integration if the user has enabled Ganglia
-  // This is isolated from the normal build due to LGPL-licensed code in the library
-  lazy val isGangliaEnabled = Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined
-  lazy val gangliaProj = Project("spark-ganglia-lgpl", file("extras/spark-ganglia-lgpl"), settings = gangliaSettings).dependsOn(core)
-  val maybeGanglia: Seq[ClasspathDependency] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()
-  val maybeGangliaRef: Seq[ProjectReference] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()
-
-  // Include the Java 8 project if the JVM version is 8+
-  lazy val javaVersion = System.getProperty("java.specification.version")
-  lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble
-  val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]()
-  lazy val java8Tests = Project("java8-tests", file("extras/java8-tests"), settings = java8TestsSettings).
-    dependsOn(core) dependsOn(streaming % "compile->compile;test->test")
-
-  // Include the YARN project if the user has enabled YARN
-  lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core)
-  lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core)
-
-  lazy val maybeYarn: Seq[ClasspathDependency] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq()
-  lazy val maybeYarnRef: Seq[ProjectReference] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq()
-
-  lazy val externalTwitter = Project("external-twitter", file("external/twitter"), settings = twitterSettings)
-    .dependsOn(streaming % "compile->compile;test->test")
-
-  lazy val externalKafka = Project("external-kafka", file("external/kafka"), settings = kafkaSettings)
-    .dependsOn(streaming % "compile->compile;test->test")
-
-  lazy val externalFlume = Project("external-flume", file("external/flume"), settings = flumeSettings)
-    .dependsOn(streaming % "compile->compile;test->test")
-
-  lazy val externalZeromq = Project("external-zeromq", file("external/zeromq"), settings = zeromqSettings)
-    .dependsOn(streaming % "compile->compile;test->test")
-
-  lazy val externalMqtt = Project("external-mqtt", file("external/mqtt"), settings = mqttSettings)
-    .dependsOn(streaming % "compile->compile;test->test")
-
-  lazy val allExternal = Seq[ClasspathDependency](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt)
-  lazy val allExternalRefs = Seq[ProjectReference](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt)
-
-  lazy val examples = Project("examples", file("examples"), settings = examplesSettings)
-    .dependsOn(core, mllib, graphx, bagel, streaming, hive) dependsOn(allExternal: _*)
+  override val userPropertiesMap = System.getProperties.toMap
 
-  // Everything except assembly, hive, tools, java8Tests and examples belong to packageProjects
-  lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx, catalyst, sql) ++ maybeYarnRef ++ maybeHiveRef ++ maybeGangliaRef
-
-  lazy val allProjects = packageProjects ++ allExternalRefs ++
-    Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests
+  lazy val MavenCompile = config("m2r") extend(Compile)
+  lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy")
 
-  def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq(
-    organization       := "org.apache.spark",
-    version            := SPARK_VERSION,
-    scalaVersion       := "2.10.4",
-    scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation", "-feature",
-      "-target:" + SCALAC_JVM_VERSION),
-    javacOptions := Seq("-target", JAVAC_JVM_VERSION, "-source", JAVAC_JVM_VERSION),
-    unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath },
+  lazy val sharedSettings = graphSettings ++ genjavadocSettings ++ Seq (
+    javaHome   := Properties.envOrNone("JAVA_HOME").map(file),
+    incOptions := incOptions.value.withNameHashing(true),
     retrieveManaged := true,
-    javaHome := Properties.envOrNone("JAVA_HOME").map(file),
-    // This is to add convenience of enabling sbt -Dsbt.offline=true for making the build offline.
-    offline := "true".equalsIgnoreCase(sys.props("sbt.offline")),
     retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
-    transitiveClassifiers in Scope.GlobalScope := Seq("sources"),
-    testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))),
-    incOptions := incOptions.value.withNameHashing(true),
-    // Fork new JVMs for tests and set Java options for those
-    fork := true,
-    javaOptions in Test += "-Dspark.home=" + sparkHome,
-    javaOptions in Test += "-Dspark.testing=1",
-    javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=true",
-    javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark").map { case (k,v) => s"-D$k=$v" }.toSeq,
-    javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g".split(" ").toSeq,
-    javaOptions += "-Xmx3g",
-    // Show full stack trace and duration in test cases.
-    testOptions in Test += Tests.Argument("-oDF"),
-    // Remove certain packages from Scaladoc
-    scalacOptions in (Compile, doc) := Seq(
-      "-groups",
-      "-skip-packages", Seq(
-        "akka",
-        "org.apache.spark.api.python",
-        "org.apache.spark.network",
-        "org.apache.spark.deploy",
-        "org.apache.spark.util.collection"
-      ).mkString(":"),
-      "-doc-title", "Spark " + SPARK_VERSION_SHORT + " ScalaDoc"
-    ),
-
-    // Only allow one test at a time, even across projects, since they run in the same JVM
-    concurrentRestrictions in Global += Tags.limit(Tags.Test, 1),
-
-    resolvers ++= Seq(
-      // HTTPS is unavailable for Maven Central
-      "Maven Repository"     at "http://repo.maven.apache.org/maven2",
-      "Apache Repository"    at "https://repository.apache.org/content/repositories/releases",
-      "JBoss Repository"     at "https://repository.jboss.org/nexus/content/repositories/releases/",
-      "MQTT Repository"      at "https://repo.eclipse.org/content/repositories/paho-releases/",
-      "Cloudera Repository"  at "http://repository.cloudera.com/artifactory/cloudera-repos/",
-      "Pivotal Repository"   at "http://repo.spring.io/libs-release/",
-      // For Sonatype publishing
-      // "sonatype-snapshots"   at "https://oss.sonatype.org/content/repositories/snapshots",
-      // "sonatype-staging"     at "https://oss.sonatype.org/service/local/staging/deploy/maven2/",
-      // also check the local Maven repository ~/.m2
-      Resolver.mavenLocal
-    ),
-
     publishMavenStyle := true,
+    unidocGenjavadocVersion := "0.8",
 
-    // useGpg in Global := true,
-
-    pomExtra := (
-      <parent>
-        <groupId>org.apache</groupId>
-        <artifactId>apache</artifactId>
-        <version>14</version>
-      </parent>
-      <url>http://spark.apache.org/</url>
-      <licenses>
-        <license>
-          <name>Apache 2.0 License</name>
-          <url>http://www.apache.org/licenses/LICENSE-2.0.html</url>
-          <distribution>repo</distribution>
-        </license>
-      </licenses>
-      <scm>
-        <connection>scm:git:git@github.com:apache/spark.git</connection>
-        <url>scm:git:git@github.com:apache/spark.git</url>
-      </scm>
-      <developers>
-        <developer>
-          <id>matei</id>
-          <name>Matei Zaharia</name>
-          <email>matei.zaharia@gmail.com</email>
-          <url>http://www.cs.berkeley.edu/~matei</url>
-          <organization>Apache Software Foundation</organization>
-          <organizationUrl>http://spark.apache.org</organizationUrl>
-        </developer>
-      </developers>
-      <issueManagement>
-        <system>JIRA</system>
-        <url>https://issues.apache.org/jira/browse/SPARK</url>
-      </issueManagement>
-    ),
-
-    /*
-    publishTo <<= version { (v: String) =>
-      val nexus = "https://oss.sonatype.org/"
-      if (v.trim.endsWith("SNAPSHOT"))
-        Some("sonatype-snapshots" at nexus + "content/repositories/snapshots")
-      else
-        Some("sonatype-staging"  at nexus + "service/local/staging/deploy/maven2")
-    },
-
-    */
-
-    libraryDependencies ++= Seq(
-        "io.netty"          % "netty-all"              % "4.0.17.Final",
-        "org.eclipse.jetty" % "jetty-server"           % jettyVersion,
-        "org.eclipse.jetty" % "jetty-util"             % jettyVersion,
-        "org.eclipse.jetty" % "jetty-plus"             % jettyVersion,
-        "org.eclipse.jetty" % "jetty-security"         % jettyVersion,
-        "org.scalatest"    %% "scalatest"              % "2.1.5"  % "test",
-        "org.scalacheck"   %% "scalacheck"             % "1.11.3" % "test",
-        "com.novocode"      % "junit-interface"        % "0.10"   % "test",
-        "org.easymock"      % "easymockclassextension" % "3.1"    % "test",
-        "org.mockito"       % "mockito-all"            % "1.9.0"  % "test",
-        "junit"             % "junit"                  % "4.10"   % "test",
-        // Needed by cglib which is needed by easymock.
-        "asm"               % "asm"                    % "3.3.1"  % "test"
-    ),
-
-    testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
-    parallelExecution := true,
-    /* Workaround for issue #206 (fixed after SBT 0.11.0) */
-    watchTransitiveSources <<= Defaults.inDependencies[Task[Seq[File]]](watchSources.task,
-      const(std.TaskExtra.constant(Nil)), aggregate = true, includeRoot = true) apply { _.join.map(_.flatten) },
-
-    otherResolvers := Seq(Resolver.file("dotM2", file(Path.userHome + "/.m2/repository"))),
+    resolvers += Resolver.mavenLocal,
+    otherResolvers <<= SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))),
     publishLocalConfiguration in MavenCompile <<= (packagedArtifacts, deliverLocal, ivyLoggingLevel) map {
       (arts, _, level) => new PublishConfiguration(None, "dotM2", arts, Seq(), level)
     },
     publishMavenStyle in MavenCompile := true,
     publishLocal in MavenCompile <<= publishTask(publishLocalConfiguration in MavenCompile, deliverLocal),
     publishLocalBoth <<= Seq(publishLocal in MavenCompile, publishLocal).dependOn
-  ) ++ net.virtualvoid.sbt.graph.Plugin.graphSettings ++ ScalaStyleSettings ++ genjavadocSettings
-
-  val akkaVersion = "2.2.3-shaded-protobuf"
-  val chillVersion = "0.3.6"
-  val codahaleMetricsVersion = "3.0.0"
-  val jblasVersion = "1.2.3"
-  val jets3tVersion = if ("^2\\.[3-9]+".r.findFirstIn(hadoopVersion).isDefined) "0.9.0" else "0.7.1"
-  val jettyVersion = "8.1.14.v20131031"
-  val hiveVersion = "0.12.0"
-  val parquetVersion = "1.4.3"
-  val slf4jVersion = "1.7.5"
-
-  val excludeJBossNetty = ExclusionRule(organization = "org.jboss.netty")
-  val excludeIONetty = ExclusionRule(organization = "io.netty")
-  val excludeEclipseJetty = ExclusionRule(organization = "org.eclipse.jetty")
-  val excludeAsm = ExclusionRule(organization = "org.ow2.asm")
-  val excludeOldAsm = ExclusionRule(organization = "asm")
-  val excludeCommonsLogging = ExclusionRule(organization = "commons-logging")
-  val excludeSLF4J = ExclusionRule(organization = "org.slf4j")
-  val excludeScalap = ExclusionRule(organization = "org.scala-lang", artifact = "scalap")
-  val excludeHadoop = ExclusionRule(organization = "org.apache.hadoop")
-  val excludeCurator = ExclusionRule(organization = "org.apache.curator")
-  val excludePowermock = ExclusionRule(organization = "org.powermock")
-  val excludeFastutil = ExclusionRule(organization = "it.unimi.dsi")
-  val excludeJruby = ExclusionRule(organization = "org.jruby")
-  val excludeThrift = ExclusionRule(organization = "org.apache.thrift")
-  val excludeServletApi = ExclusionRule(organization = "javax.servlet", artifact = "servlet-api")
-  val excludeJUnit = ExclusionRule(organization = "junit")
-
-  def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark",
-      version: String = "1.0.0", crossVersion: String = "2.10"): Option[sbt.ModuleID] = {
-    val fullId = if (crossVersion.isEmpty) id else id + "_" + crossVersion
-    Some(organization % fullId % version) // the artifact to compare binary compatibility with
-  }
-
-  def coreSettings = sharedSettings ++ Seq(
-    name := "spark-core",
-    libraryDependencies ++= Seq(
-        "com.google.guava"           % "guava"            % "14.0.1",
-        "org.apache.commons"         % "commons-lang3"    % "3.3.2",
-        "org.apache.commons"         % "commons-math3"    % "3.3" % "test",
-        "com.google.code.findbugs"   % "jsr305"           % "1.3.9",
-        "log4j"                      % "log4j"            % "1.2.17",
-        "org.slf4j"                  % "slf4j-api"        % slf4jVersion,
-        "org.slf4j"                  % "slf4j-log4j12"    % slf4jVersion,
-        "org.slf4j"                  % "jul-to-slf4j"     % slf4jVersion,
-        "org.slf4j"                  % "jcl-over-slf4j"   % slf4jVersion,
-        "commons-daemon"             % "commons-daemon"   % "1.0.10", // workaround for bug HADOOP-9407
-        "com.ning"                   % "compress-lzf"     % "1.0.0",
-        "org.xerial.snappy"          % "snappy-java"      % "1.0.5",
-        "org.spark-project.akka"    %% "akka-remote"      % akkaVersion,
-        "org.spark-project.akka"    %% "akka-slf4j"       % akkaVersion,
-        "org.spark-project.akka"    %% "akka-testkit"     % akkaVersion % "test",
-        "org.json4s"                %% "json4s-jackson"   % "3.2.6" excludeAll(excludeScalap),
-        "colt"                       % "colt"             % "1.2.0",
-        "org.apache.mesos"           % "mesos"            % "0.18.1" classifier("shaded-protobuf") exclude("com.google.protobuf", "protobuf-java"),
-        "commons-net"                % "commons-net"      % "2.2",
-        "net.java.dev.jets3t"        % "jets3t"           % jets3tVersion excludeAll(excludeCommonsLogging),
-        "commons-codec"              % "commons-codec"    % "1.5", // Prevent jets3t from including the older version of commons-codec
-        "org.apache.derby"           % "derby"            % "10.4.2.0"                     % "test",
-        "org.apache.hadoop"          % hadoopClient       % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeCommonsLogging, excludeSLF4J, excludeOldAsm, excludeServletApi),
-        "org.apache.curator"         % "curator-recipes"  % "2.4.0" excludeAll(excludeJBossNetty),
-        "com.codahale.metrics"       % "metrics-core"     % codahaleMetricsVersion,
-        "com.codahale.metrics"       % "metrics-jvm"      % codahaleMetricsVersion,
-        "com.codahale.metrics"       % "metrics-json"     % codahaleMetricsVersion,
-        "com.codahale.metrics"       % "metrics-graphite" % codahaleMetricsVersion,
-        "com.twitter"               %% "chill"            % chillVersion excludeAll(excludeAsm),
-        "com.twitter"                % "chill-java"       % chillVersion excludeAll(excludeAsm),
-        "org.tachyonproject"         % "tachyon"          % "0.4.1-thrift" excludeAll(excludeHadoop, excludeCurator, excludeEclipseJetty, excludePowermock),
-        "com.clearspring.analytics"  % "stream"           % "2.7.0" excludeAll(excludeFastutil), // Only HyperLogLogPlus is used, which does not depend on fastutil.
-        "org.spark-project"          % "pyrolite"         % "2.0.1",
-        "net.sf.py4j"                % "py4j"             % "0.8.1"
-      ),
-    libraryDependencies ++= maybeAvro,
-    assembleDeps,
-    previousArtifact := sparkPreviousArtifact("spark-core")
   )
 
-  // Create a colon-separate package list adding "org.apache.spark" in front of all of them,
-  // for easier specification of JavaDoc package groups
-  def packageList(names: String*): String = {
-    names.map(s => "org.apache.spark." + s).mkString(":")
+  def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = {
+    val existingSettings = projectsMap.getOrElse(projectRef.project, Seq[Setting[_]]())
+    projectsMap += (projectRef.project -> (existingSettings ++ settings))
   }
 
-  def rootSettings = sharedSettings ++ scalaJavaUnidocSettings ++ Seq(
-    publish := {},
+  // Note ordering of these settings matter.
+  /* Enable shared settings on all projects */
+  (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ Seq(spark, tools))
+    .foreach(enable(sharedSettings ++ ExludedDependencies.settings))
 
-    unidocProjectFilter in (ScalaUnidoc, unidoc) :=
-      inAnyProject -- inProjects(repl, examples, tools, catalyst, yarn, yarnAlpha),
-    unidocProjectFilter in (JavaUnidoc, unidoc) :=
-      inAnyProject -- inProjects(repl, examples, bagel, graphx, catalyst, tools, yarn, yarnAlpha),
+  /* Enable tests settings for all projects except examples, assembly and tools */
+  (allProjects ++ optionallyEnabledProjects).foreach(enable(TestSettings.settings))
 
-    // Skip class names containing $ and some internal packages in Javadocs
-    unidocAllSources in (JavaUnidoc, unidoc) := {
-      (unidocAllSources in (JavaUnidoc, unidoc)).value
-        .map(_.filterNot(_.getName.contains("$")))
-        .map(_.filterNot(_.getCanonicalPath.contains("akka")))
-        .map(_.filterNot(_.getCanonicalPath.contains("deploy")))
-        .map(_.filterNot(_.getCanonicalPath.contains("network")))
-        .map(_.filterNot(_.getCanonicalPath.contains("executor")))
-        .map(_.filterNot(_.getCanonicalPath.contains("python")))
-        .map(_.filterNot(_.getCanonicalPath.contains("collection")))
-    },
+  // TODO: Add Sql to mima checks
+  allProjects.filterNot(x => Seq(spark, sql, hive, hiveThriftServer, catalyst, repl,
+    streamingFlumeSink, networkCommon, networkShuffle, networkYarn).contains(x)).foreach {
+      x => enable(MimaBuild.mimaSettings(sparkHome, x))(x)
+    }
 
-    // Javadoc options: create a window title, and group key packages on index page
-    javacOptions in doc := Seq(
-      "-windowtitle", "Spark " + SPARK_VERSION_SHORT + " JavaDoc",
-      "-public",
-      "-group", "Core Java API", packageList("api.java", "api.java.function"),
-      "-group", "Spark Streaming", packageList(
-        "streaming.api.java", "streaming.flume", "streaming.kafka",
-        "streaming.mqtt", "streaming.twitter", "streaming.zeromq"
-      ),
-      "-group", "MLlib", packageList(
-        "mllib.classification", "mllib.clustering", "mllib.evaluation.binary", "mllib.linalg",
-        "mllib.linalg.distributed", "mllib.optimization", "mllib.rdd", "mllib.recommendation",
-        "mllib.regression", "mllib.stat", "mllib.tree", "mllib.tree.configuration",
-        "mllib.tree.impurity", "mllib.tree.model", "mllib.util"
-      ),
-      "-group", "Spark SQL", packageList("sql.api.java", "sql.hive.api.java"),
-      "-noqualifier", "java.lang"
-    )
-  )
+  /* Enable Assembly for all assembly projects */
+  assemblyProjects.foreach(enable(Assembly.settings))
 
-  def replSettings = sharedSettings ++ Seq(
-    name := "spark-repl",
-    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-compiler" % v),
-    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "jline"          % v),
-    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-reflect"  % v)
-  )
+  /* Enable unidoc only for the root spark project */
+  enable(Unidoc.settings)(spark)
 
-  def examplesSettings = sharedSettings ++ Seq(
-    name := "spark-examples",
-    jarName in assembly <<= version map {
-      v => "spark-examples-" + v + "-hadoop" + hadoopVersion + ".jar" },
-    libraryDependencies ++= Seq(
-      "com.twitter"          %% "algebird-core"   % "0.1.11",
-      "org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeIONetty, excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging, excludeJruby),
-      "org.apache.cassandra" % "cassandra-all" % "1.2.6"
-        exclude("com.google.guava", "guava")
-        exclude("com.googlecode.concurrentlinkedhashmap", "concurrentlinkedhashmap-lru")
-        exclude("com.ning","compress-lzf")
-        exclude("io.netty", "netty")
-        exclude("jline","jline")
-        exclude("org.apache.cassandra.deps", "avro")
-        excludeAll(excludeSLF4J, excludeIONetty),
-      "com.github.scopt" %% "scopt" % "3.2.0"
-    )
-  ) ++ assemblySettings ++ extraAssemblySettings
-
-  def toolsSettings = sharedSettings ++ Seq(
-    name := "spark-tools",
-    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-compiler" % v),
-    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-reflect"  % v )
-  ) ++ assemblySettings ++ extraAssemblySettings
-
-  def graphxSettings = sharedSettings ++ Seq(
-    name := "spark-graphx",
-    previousArtifact := sparkPreviousArtifact("spark-graphx"),
-    libraryDependencies ++= Seq(
-      "org.jblas" % "jblas" % jblasVersion
-    )
-  )
+  /* Catalyst macro settings */
+  enable(Catalyst.settings)(catalyst)
 
-  def bagelSettings = sharedSettings ++ Seq(
-    name := "spark-bagel",
-    previousArtifact := sparkPreviousArtifact("spark-bagel")
-  )
+  /* Spark SQL Core console settings */
+  enable(SQL.settings)(sql)
 
-  def mllibSettings = sharedSettings ++ Seq(
-    name := "spark-mllib",
-    previousArtifact := sparkPreviousArtifact("spark-mllib"),
-    libraryDependencies ++= Seq(
-      "org.jblas" % "jblas" % jblasVersion,
-      "org.scalanlp" %% "breeze" % "0.7" excludeAll(excludeJUnit)
-    )
+  /* Hive console settings */
+  enable(Hive.settings)(hive)
+
+  enable(Flume.settings)(streamingFlumeSink)
+
+  // TODO: move this to its upstream project.
+  override def projectDefinitions(baseDirectory: File): Seq[Project] = {
+    super.projectDefinitions(baseDirectory).map { x =>
+      if (projectsMap.exists(_._1 == x.id)) x.settings(projectsMap(x.id): _*)
+      else x.settings(Seq[Setting[_]](): _*)
+    } ++ Seq[Project](OldDeps.project)
+  }
+
+}
+
+object Flume {
+  lazy val settings = sbtavro.SbtAvro.avroSettings
+}
+
+/**
+  This excludes library dependencies in sbt, which are specified in maven but are
+  not needed by sbt build.
+  */
+object ExludedDependencies {
+  lazy val settings = Seq(
+    libraryDependencies ~= { libs => libs.filterNot(_.name == "groovy-all") }
   )
+}
 
-  def catalystSettings = sharedSettings ++ Seq(
-    name := "catalyst",
-    // The mechanics of rewriting expression ids to compare trees in some test cases makes
-    // assumptions about the the expression ids being contiguous.  Running tests in parallel breaks
-    // this non-deterministically.  TODO: FIX THIS.
-    parallelExecution in Test := false,
-    libraryDependencies ++= Seq(
-      "com.typesafe" %% "scalalogging-slf4j" % "1.0.1"
-    )
+/**
+ * Following project only exists to pull previous artifacts of Spark for generating
+ * Mima ignores. For more information see: SPARK 2071
+ */
+object OldDeps {
+
+  lazy val project = Project("oldDeps", file("dev"), settings = oldDepsSettings)
+
+  def versionArtifact(id: String): Option[sbt.ModuleID] = {
+    val fullId = id + "_2.10"
+    Some("org.apache.spark" % fullId % "1.1.0")
+  }
+
+  def oldDepsSettings() = Defaults.coreDefaultSettings ++ Seq(
+    name := "old-deps",
+    scalaVersion := "2.10.4",
+    retrieveManaged := true,
+    retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
+    libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq",
+      "spark-streaming-flume", "spark-streaming-kafka", "spark-streaming-twitter",
+      "spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx",
+      "spark-core").map(versionArtifact(_).get intransitive())
   )
+}
+
+object Catalyst {
+  lazy val settings = Seq(
+    addCompilerPlugin("org.scalamacros" % "paradise" % "2.0.1" cross CrossVersion.full),
+    // Quasiquotes break compiling scala doc...
+    // TODO: Investigate fixing this.
+    sources in (Compile, doc) ~= (_ filter (_.getName contains "codegen")))
+}
 
-  def sqlCoreSettings = sharedSettings ++ Seq(
-    name := "spark-sql",
-    libraryDependencies ++= Seq(
-      "com.twitter"                  % "parquet-column"             % parquetVersion,
-      "com.twitter"                  % "parquet-hadoop"             % parquetVersion,
-      "com.fasterxml.jackson.core"   % "jackson-databind"           % "2.3.0" // json4s-jackson 3.2.6 requires jackson-databind 2.3.0.
-    ),
+object SQL {
+  lazy val settings = Seq(
     initialCommands in console :=
       """
         |import org.apache.spark.sql.catalyst.analysis._
@@ -519,20 +247,16 @@ object SparkBuild extends Build {
         |import org.apache.spark.sql.catalyst.util._
         |import org.apache.spark.sql.execution
         |import org.apache.spark.sql.test.TestSQLContext._
-        |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin
+        |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin,
+    cleanupCommands in console := "sparkContext.stop()"
   )
+}
 
-  // Since we don't include hive in the main assembly this project also acts as an alternative
-  // assembly jar.
-  def hiveSettings = sharedSettings ++ Seq(
-    name := "spark-hive",
+object Hive {
+
+  lazy val settings = Seq(
     javaOptions += "-XX:MaxPermSize=1g",
-    libraryDependencies ++= Seq(
-      "org.spark-project.hive" % "hive-metastore" % hiveVersion,
-      "org.spark-project.hive" % "hive-exec"      % hiveVersion excludeAll(excludeCommonsLogging),
-      "org.spark-project.hive" % "hive-serde"     % hiveVersion
-    ),
-    // Multiple queries rely on the TestHive singleton.  See comments there for more details.
+    // Multiple queries rely on the TestHive singleton. See comments there for more details.
     parallelExecution in Test := false,
     // Supporting all SerDes requires us to depend on deprecated APIs, so we turn off the warnings
     // only for this subproject.
@@ -552,70 +276,31 @@ object SparkBuild extends Build {
         |import org.apache.spark.sql.execution
         |import org.apache.spark.sql.hive._
         |import org.apache.spark.sql.hive.test.TestHive._
-        |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin
-  )
-
-  def streamingSettings = sharedSettings ++ Seq(
-    name := "spark-streaming",
-    previousArtifact := sparkPreviousArtifact("spark-streaming")
+        |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin,
+    cleanupCommands in console := "sparkContext.stop()",
+    // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
+    // in order to generate golden files.  This is only required for developers who are adding new
+    // new query tests.
+    fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }
   )
 
-  def yarnCommonSettings = sharedSettings ++ Seq(
-    unmanagedSourceDirectories in Compile <++= baseDirectory { base =>
-      Seq(
-         base / "../common/src/main/scala"
-      )
-    },
-
-    unmanagedSourceDirectories in Test <++= baseDirectory { base =>
-      Seq(
-         base / "../common/src/test/scala"
-      )
-    }
-
-  ) ++ extraYarnSettings
-
-  def yarnAlphaSettings = yarnCommonSettings ++ Seq(
-    name := "spark-yarn-alpha"
-  )
-
-  def yarnSettings = yarnCommonSettings ++ Seq(
-    name := "spark-yarn"
-  )
-
-  def gangliaSettings = sharedSettings ++ Seq(
-    name := "spark-ganglia-lgpl",
-    libraryDependencies += "com.codahale.metrics" % "metrics-ganglia" % "3.0.0"
-  )
-
-  def java8TestsSettings = sharedSettings ++ Seq(
-    name := "java8-tests",
-    javacOptions := Seq("-target", "1.8", "-source", "1.8"),
-    testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a")
-  )
-
-  // Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain
-  // if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN).
-  def extraYarnSettings = if(isYarnEnabled) yarnEnabledSettings else Seq()
-
-  def yarnEnabledSettings = Seq(
-    libraryDependencies ++= Seq(
-      // Exclude rule required for all ?
-      "org.apache.hadoop" % hadoopClient         % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm),
-      "org.apache.hadoop" % "hadoop-yarn-api"    % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
-      "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
-      "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
-      "org.apache.hadoop" % "hadoop-yarn-server-web-proxy" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging, excludeServletApi)
-    )
-  )
+}
 
-  def assemblyProjSettings = sharedSettings ++ Seq(
-    name := "spark-assembly",
-    jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" }
-  ) ++ assemblySettings ++ extraAssemblySettings
+object Assembly {
+  import sbtassembly.Plugin._
+  import AssemblyKeys._
 
-  def extraAssemblySettings() = Seq(
+  lazy val settings = assemblySettings ++ Seq(
     test in assembly := {},
+    jarName in assembly <<= (version, moduleName) map { (v, mName) =>
+      if (mName.contains("network-yarn")) {
+        // This must match the same name used in maven (see network/yarn/pom.xml)
+        "spark-" + v + "-yarn-shuffle.jar"
+      } else {
+        mName + "-" + v + "-hadoop" +
+          Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar"
+      }
+    },
     mergeStrategy in assembly := {
       case PathList("org", "datanucleus", xs @ _*)             => MergeStrategy.discard
       case m if m.toLowerCase.endsWith("manifest.mf")          => MergeStrategy.discard
@@ -627,57 +312,101 @@ object SparkBuild extends Build {
     }
   )
 
-  def oldDepsSettings() = Defaults.defaultSettings ++ Seq(
-    name := "old-deps",
-    scalaVersion := "2.10.4",
-    retrieveManaged := true,
-    retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
-    libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq",
-      "spark-streaming-flume", "spark-streaming-kafka", "spark-streaming-twitter",
-      "spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx",
-      "spark-core").map(sparkPreviousArtifact(_).get intransitive())
-  )
+}
 
-  def twitterSettings() = sharedSettings ++ Seq(
-    name := "spark-streaming-twitter",
-    previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"),
-    libraryDependencies ++= Seq(
-      "org.twitter4j" % "twitter4j-stream" % "3.0.3"
-    )
-  )
+object Unidoc {
 
-  def kafkaSettings() = sharedSettings ++ Seq(
-    name := "spark-streaming-kafka",
-    previousArtifact := sparkPreviousArtifact("spark-streaming-kafka"),
-    libraryDependencies ++= Seq(
-      "com.github.sgroschupf"    % "zkclient"   % "0.1",
-      "org.apache.kafka"        %% "kafka"      % "0.8.0"
-        exclude("com.sun.jdmk", "jmxtools")
-        exclude("com.sun.jmx", "jmxri")
-        exclude("net.sf.jopt-simple", "jopt-simple")
-        excludeAll(excludeSLF4J)
-    )
-  )
+  import BuildCommons._
+  import sbtunidoc.Plugin._
+  import UnidocKeys._
+
+  // for easier specification of JavaDoc package groups
+  private def packageList(names: String*): String = {
+    names.map(s => "org.apache.spark." + s).mkString(":")
+  }
+
+  lazy val settings = scalaJavaUnidocSettings ++ Seq (
+    publish := {},
+
+    unidocProjectFilter in(ScalaUnidoc, unidoc) :=
+      inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, catalyst, streamingFlumeSink, yarn, yarnAlpha),
+    unidocProjectFilter in(JavaUnidoc, unidoc) :=
+      inAnyProject -- inProjects(OldDeps.project, repl, bagel, graphx, examples, tools, catalyst, streamingFlumeSink, yarn, yarnAlpha),
 
-  def flumeSettings() = sharedSettings ++ Seq(
-    name := "spark-streaming-flume",
-    previousArtifact := sparkPreviousArtifact("spark-streaming-flume"),
-    libraryDependencies ++= Seq(
-      "org.apache.flume" % "flume-ng-sdk" % "1.4.0" % "compile" excludeAll(excludeIONetty, excludeThrift)
+    // Skip class names containing $ and some internal packages in Javadocs
+    unidocAllSources in (JavaUnidoc, unidoc) := {
+      (unidocAllSources in (JavaUnidoc, unidoc)).value
+        .map(_.filterNot(_.getName.contains("$")))
+        .map(_.filterNot(_.getCanonicalPath.contains("akka")))
+        .map(_.filterNot(_.getCanonicalPath.contains("deploy")))
+        .map(_.filterNot(_.getCanonicalPath.contains("network")))
+        .map(_.filterNot(_.getCanonicalPath.contains("shuffle")))
+        .map(_.filterNot(_.getCanonicalPath.contains("executor")))
+        .map(_.filterNot(_.getCanonicalPath.contains("python")))
+        .map(_.filterNot(_.getCanonicalPath.contains("collection")))
+    },
+
+    // Javadoc options: create a window title, and group key packages on index page
+    javacOptions in doc := Seq(
+      "-windowtitle", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc",
+      "-public",
+      "-group", "Core Java API", packageList("api.java", "api.java.function"),
+      "-group", "Spark Streaming", packageList(
+        "streaming.api.java", "streaming.flume", "streaming.kafka",
+        "streaming.mqtt", "streaming.twitter", "streaming.zeromq", "streaming.kinesis"
+      ),
+      "-group", "MLlib", packageList(
+        "mllib.classification", "mllib.clustering", "mllib.evaluation.binary", "mllib.linalg",
+        "mllib.linalg.distributed", "mllib.optimization", "mllib.rdd", "mllib.recommendation",
+        "mllib.regression", "mllib.stat", "mllib.tree", "mllib.tree.configuration",
+        "mllib.tree.impurity", "mllib.tree.model", "mllib.util"
+      ),
+      "-group", "Spark SQL", packageList("sql.api.java", "sql.api.java.types", "sql.hive.api.java"),
+      "-noqualifier", "java.lang"
     )
   )
+}
 
-  def zeromqSettings() = sharedSettings ++ Seq(
-    name := "spark-streaming-zeromq",
-    previousArtifact := sparkPreviousArtifact("spark-streaming-zeromq"),
-    libraryDependencies ++= Seq(
-      "org.spark-project.akka" %% "akka-zeromq" % akkaVersion
+object TestSettings {
+  import BuildCommons._
+
+  lazy val settings = Seq (
+    // Fork new JVMs for tests and set Java options for those
+    fork := true,
+    javaOptions in Test += "-Dspark.test.home=" + sparkHome,
+    javaOptions in Test += "-Dspark.testing=1",
+    javaOptions in Test += "-Dspark.port.maxRetries=100",
+    javaOptions in Test += "-Dspark.ui.enabled=false",
+    javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=true",
+    javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark")
+      .map { case (k,v) => s"-D$k=$v" }.toSeq,
+    javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g"
+      .split(" ").toSeq,
+    // This places test scope jars on the classpath of executors during tests.
+    javaOptions in Test += 
+      "-Dspark.executor.extraClassPath=" + (fullClasspath in Test).value.files.
+      map(_.getAbsolutePath).mkString(":").stripSuffix(":"),
+    javaOptions += "-Xmx3g",
+    // Show full stack trace and duration in test cases.
+    testOptions in Test += Tests.Argument("-oDF"),
+    testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
+    // Enable Junit testing.
+    libraryDependencies += "com.novocode" % "junit-interface" % "0.9" % "test",
+    // Only allow one test at a time, even across projects, since they run in the same JVM
+    parallelExecution in Test := false,
+    concurrentRestrictions in Global += Tags.limit(Tags.Test, 1),
+    // Remove certain packages from Scaladoc
+    scalacOptions in (Compile, doc) := Seq(
+      "-groups",
+      "-skip-packages", Seq(
+        "akka",
+        "org.apache.spark.api.python",
+        "org.apache.spark.network",
+        "org.apache.spark.deploy",
+        "org.apache.spark.util.collection"
+      ).mkString(":"),
+      "-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc"
     )
   )
 
-  def mqttSettings() = streamingSettings ++ Seq(
-    name := "spark-streaming-mqtt",
-    previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"),
-    libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0")
-  )
 }
diff --git a/project/build.properties b/project/build.properties
index bcde13f4362a7..32a3aeefaf9fb 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-sbt.version=0.13.2
+sbt.version=0.13.6
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 472819b9fb8ba..ee45b6a51905e 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -4,6 +4,8 @@ resolvers += Resolver.url("artifactory", url("http://scalasbt.artifactoryonline.
 
 resolvers += "Typesafe Repository" at "http://repo.typesafe.com/typesafe/releases/"
 
+resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/"
+
 addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2")
 
 addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.2.0")
@@ -17,10 +19,16 @@ addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.6.0")
 
 addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4")
 
-addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.4.0")
+addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.6.0")
 
 addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6")
 
-addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.0")
+addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.1")
+
+addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.1")
+
+addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2")
+
+libraryDependencies += "org.ow2.asm"  % "asm" % "5.0.3"
 
-addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.0")
+libraryDependencies += "org.ow2.asm"  % "asm-commons" % "5.0.3"
diff --git a/project/project/SparkPluginBuild.scala b/project/project/SparkPluginBuild.scala
index e9fba641eb8a1..8863f272da415 100644
--- a/project/project/SparkPluginBuild.scala
+++ b/project/project/SparkPluginBuild.scala
@@ -24,8 +24,10 @@ import sbt.Keys._
  * becomes available for scalastyle sbt plugin.
  */
 object SparkPluginDef extends Build {
-  lazy val root = Project("plugins", file(".")) dependsOn(sparkStyle)
+  lazy val root = Project("plugins", file(".")) dependsOn(sparkStyle, sbtPomReader)
   lazy val sparkStyle = Project("spark-style", file("spark-style"), settings = styleSettings)
+  lazy val sbtPomReader = uri("https://github.com/ScrapCodes/sbt-pom-reader.git#ignore_artifact_id")
+
   // There is actually no need to publish this artifact.
   def styleSettings = Defaults.defaultSettings ++ Seq (
     name                 :=  "spark-style",
diff --git a/project/spark-style/src/main/scala/org/apache/spark/scalastyle/NonASCIICharacterChecker.scala b/project/spark-style/src/main/scala/org/apache/spark/scalastyle/NonASCIICharacterChecker.scala
new file mode 100644
index 0000000000000..3d43c35299555
--- /dev/null
+++ b/project/spark-style/src/main/scala/org/apache/spark/scalastyle/NonASCIICharacterChecker.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.spark.scalastyle
+
+import java.util.regex.Pattern
+
+import org.scalastyle.{PositionError, ScalariformChecker, ScalastyleError}
+
+import scalariform.lexer.Token
+import scalariform.parser.CompilationUnit
+
+class NonASCIICharacterChecker extends ScalariformChecker {
+  val errorKey: String = "non.ascii.character.disallowed"
+
+  override def verify(ast: CompilationUnit): List[ScalastyleError] = {
+    ast.tokens.filter(hasNonAsciiChars).map(x => PositionError(x.offset)).toList
+  }
+
+  private def hasNonAsciiChars(x: Token) =
+    x.rawText.trim.nonEmpty && !Pattern.compile( """\p{ASCII}+""", Pattern.DOTALL)
+    .matcher(x.text.trim).matches()
+
+}
diff --git a/project/spark-style/src/main/scala/org/apache/spark/scalastyle/SparkSpaceAfterCommentStartChecker.scala b/project/spark-style/src/main/scala/org/apache/spark/scalastyle/SparkSpaceAfterCommentStartChecker.scala
deleted file mode 100644
index 80d3faa3fe749..0000000000000
--- a/project/spark-style/src/main/scala/org/apache/spark/scalastyle/SparkSpaceAfterCommentStartChecker.scala
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.spark.scalastyle
-
-import java.util.regex.Pattern
-
-import org.scalastyle.{PositionError, ScalariformChecker, ScalastyleError}
-import scalariform.lexer.{MultiLineComment, ScalaDocComment, SingleLineComment, Token}
-import scalariform.parser.CompilationUnit
-
-class SparkSpaceAfterCommentStartChecker extends ScalariformChecker {
-  val errorKey: String = "insert.a.single.space.after.comment.start.and.before.end"
-
-  private def multiLineCommentRegex(comment: Token) =
-    Pattern.compile( """/\*\S+.*""", Pattern.DOTALL).matcher(comment.text.trim).matches() ||
-      Pattern.compile( """/\*.*\S\*/""", Pattern.DOTALL).matcher(comment.text.trim).matches()
-
-  private def scalaDocPatternRegex(comment: Token) =
-    Pattern.compile( """/\*\*\S+.*""", Pattern.DOTALL).matcher(comment.text.trim).matches() ||
-      Pattern.compile( """/\*\*.*\S\*/""", Pattern.DOTALL).matcher(comment.text.trim).matches()
-
-  private def singleLineCommentRegex(comment: Token): Boolean =
-    comment.text.trim.matches( """//\S+.*""") && !comment.text.trim.matches( """///+""")
-
-  override def verify(ast: CompilationUnit): List[ScalastyleError] = {
-    ast.tokens
-      .filter(hasComment)
-      .map {
-      _.associatedWhitespaceAndComments.comments.map {
-        case x: SingleLineComment if singleLineCommentRegex(x.token) => Some(x.token.offset)
-        case x: MultiLineComment if multiLineCommentRegex(x.token) => Some(x.token.offset)
-        case x: ScalaDocComment if scalaDocPatternRegex(x.token) => Some(x.token.offset)
-        case _ => None
-      }.flatten
-    }.flatten.map(PositionError(_))
-  }
-
-
-  private def hasComment(x: Token) =
-    x.associatedWhitespaceAndComments != null && !x.associatedWhitespaceAndComments.comments.isEmpty
-
-}
diff --git a/python/.gitignore b/python/.gitignore
index 80b361ffbd51c..52128cf844a79 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -1,5 +1,5 @@
 *.pyc
-docs/
+docs/_build/
 pyspark.egg-info
 build/
 dist/
diff --git a/python/docs/Makefile b/python/docs/Makefile
new file mode 100644
index 0000000000000..8a1324eecd325
--- /dev/null
+++ b/python/docs/Makefile
@@ -0,0 +1,179 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.8.2.1-src.zip)
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  xml        to make Docutils-native XML files"
+	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pyspark.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pyspark.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/pyspark"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pyspark"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through platex and dvipdfmx..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+xml:
+	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+	@echo
+	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+	@echo
+	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/python/docs/conf.py b/python/docs/conf.py
new file mode 100644
index 0000000000000..e58d97ae6a746
--- /dev/null
+++ b/python/docs/conf.py
@@ -0,0 +1,332 @@
+# -*- coding: utf-8 -*-
+#
+# pyspark documentation build configuration file, created by
+# sphinx-quickstart on Thu Aug 28 15:17:47 2014.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.viewcode',
+    'epytext',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'PySpark'
+copyright = u'2014, Author'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '1.2-SNAPSHOT'
+# The full version, including alpha/beta/rc tags.
+release = '1.2-SNAPSHOT'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'nature'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+html_logo = "../../docs/img/spark-logo-hd.png"
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+#html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+html_domain_indices = False
+
+# If false, no index is generated.
+html_use_index = False
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'pysparkdoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+  ('index', 'pyspark.tex', u'pyspark Documentation',
+   u'Author', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('index', 'pyspark', u'pyspark Documentation',
+     [u'Author'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  ('index', 'pyspark', u'pyspark Documentation',
+   u'Author', 'pyspark', 'One line description of project.',
+   'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
+
+
+# -- Options for Epub output ----------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = u'pyspark'
+epub_author = u'Author'
+epub_publisher = u'Author'
+epub_copyright = u'2014, Author'
+
+# The basename for the epub file. It defaults to the project name.
+#epub_basename = u'pyspark'
+
+# The HTML theme for the epub output. Since the default themes are not optimized
+# for small screen space, using the same theme for HTML and epub output is
+# usually not wise. This defaults to 'epub', a theme designed to save visual
+# space.
+#epub_theme = 'epub'
+
+# The language of the text. It defaults to the language option
+# or en if the language is not set.
+#epub_language = ''
+
+# The scheme of the identifier. Typical schemes are ISBN or URL.
+#epub_scheme = ''
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#epub_identifier = ''
+
+# A unique identification for the text.
+#epub_uid = ''
+
+# A tuple containing the cover image and cover page html template filenames.
+#epub_cover = ()
+
+# A sequence of (type, uri, title) tuples for the guide element of content.opf.
+#epub_guide = ()
+
+# HTML files that should be inserted before the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_pre_files = []
+
+# HTML files shat should be inserted after the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_post_files = []
+
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ['search.html']
+
+# The depth of the table of contents in toc.ncx.
+#epub_tocdepth = 3
+
+# Allow duplicate toc entries.
+#epub_tocdup = True
+
+# Choose between 'default' and 'includehidden'.
+#epub_tocscope = 'default'
+
+# Fix unsupported image types using the PIL.
+#epub_fix_images = False
+
+# Scale large images.
+#epub_max_image_width = 0
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#epub_show_urls = 'inline'
+
+# If false, no index is generated.
+#epub_use_index = True
diff --git a/python/docs/epytext.py b/python/docs/epytext.py
new file mode 100644
index 0000000000000..19fefbfc057a4
--- /dev/null
+++ b/python/docs/epytext.py
@@ -0,0 +1,27 @@
+import re
+
+RULES = (
+    (r"<[\w.]+>", r""),
+    (r"L{([\w.()]+)}", r":class:`\1`"),
+    (r"[LC]{(\w+\.\w+)\(\)}", r":func:`\1`"),
+    (r"C{([\w.()]+)}", r":class:`\1`"),
+    (r"[IBCM]{([^}]+)}", r"`\1`"),
+    ('pyspark.rdd.RDD', 'RDD'),
+)
+
+def _convert_epytext(line):
+    """
+    >>> _convert_epytext("L{A}")
+    :class:`A`
+    """
+    line = line.replace('@', ':')
+    for p, sub in RULES:
+        line = re.sub(p, sub, line)
+    return line
+
+def _process_docstring(app, what, name, obj, options, lines):
+    for i in range(len(lines)):
+        lines[i] = _convert_epytext(lines[i])
+
+def setup(app):
+    app.connect("autodoc-process-docstring", _process_docstring)
diff --git a/python/docs/index.rst b/python/docs/index.rst
new file mode 100644
index 0000000000000..703bef644de28
--- /dev/null
+++ b/python/docs/index.rst
@@ -0,0 +1,36 @@
+.. pyspark documentation master file, created by
+   sphinx-quickstart on Thu Aug 28 15:17:47 2014.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to Spark Python API Docs!
+===================================
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   pyspark
+   pyspark.sql
+   pyspark.streaming
+   pyspark.mllib
+
+
+Core classes:
+---------------
+
+    :class:`pyspark.SparkContext`
+
+    Main entry point for Spark functionality.
+
+    :class:`pyspark.RDD`
+
+    A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+
+
+Indices and tables
+==================
+
+* :ref:`search`
+
diff --git a/python/docs/make.bat b/python/docs/make.bat
new file mode 100644
index 0000000000000..cc29acdc19686
--- /dev/null
+++ b/python/docs/make.bat
@@ -0,0 +1,6 @@
+@ECHO OFF
+
+rem This is the entry point for running Sphinx documentation. To avoid polluting the
+rem environment, it just launches a new cmd to do the real work.
+
+cmd /V /E /C %~dp0make2.bat %*
diff --git a/python/docs/make2.bat b/python/docs/make2.bat
new file mode 100644
index 0000000000000..05d22eb5cdd23
--- /dev/null
+++ b/python/docs/make2.bat
@@ -0,0 +1,243 @@
+@ECHO OFF
+
+REM Command file for Sphinx documentation
+
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set BUILDDIR=_build
+set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
+set I18NSPHINXOPTS=%SPHINXOPTS% .
+if NOT "%PAPER%" == "" (
+	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+	:help
+	echo.Please use `make ^<target^>` where ^<target^> is one of
+	echo.  html       to make standalone HTML files
+	echo.  dirhtml    to make HTML files named index.html in directories
+	echo.  singlehtml to make a single large HTML file
+	echo.  pickle     to make pickle files
+	echo.  json       to make JSON files
+	echo.  htmlhelp   to make HTML files and a HTML help project
+	echo.  qthelp     to make HTML files and a qthelp project
+	echo.  devhelp    to make HTML files and a Devhelp project
+	echo.  epub       to make an epub
+	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+	echo.  text       to make text files
+	echo.  man        to make manual pages
+	echo.  texinfo    to make Texinfo files
+	echo.  gettext    to make PO message catalogs
+	echo.  changes    to make an overview over all changed/added/deprecated items
+	echo.  xml        to make Docutils-native XML files
+	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
+	echo.  linkcheck  to check all external links for integrity
+	echo.  doctest    to run all doctests embedded in the documentation if enabled
+	goto end
+)
+
+if "%1" == "clean" (
+	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+	del /q /s %BUILDDIR%\*
+	goto end
+)
+
+
+%SPHINXBUILD% 2> nul
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "html" (
+	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+	goto end
+)
+
+if "%1" == "dirhtml" (
+	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+	goto end
+)
+
+if "%1" == "singlehtml" (
+	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
+	goto end
+)
+
+if "%1" == "pickle" (
+	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the pickle files.
+	goto end
+)
+
+if "%1" == "json" (
+	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the JSON files.
+	goto end
+)
+
+if "%1" == "htmlhelp" (
+	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+	goto end
+)
+
+if "%1" == "qthelp" (
+	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pyspark.qhcp
+	echo.To view the help file:
+	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pyspark.ghc
+	goto end
+)
+
+if "%1" == "devhelp" (
+	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished.
+	goto end
+)
+
+if "%1" == "epub" (
+	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The epub file is in %BUILDDIR%/epub.
+	goto end
+)
+
+if "%1" == "latex" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdf" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf
+	cd %BUILDDIR%/..
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdfja" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf-ja
+	cd %BUILDDIR%/..
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "text" (
+	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The text files are in %BUILDDIR%/text.
+	goto end
+)
+
+if "%1" == "man" (
+	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The manual pages are in %BUILDDIR%/man.
+	goto end
+)
+
+if "%1" == "texinfo" (
+	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
+	goto end
+)
+
+if "%1" == "gettext" (
+	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
+	goto end
+)
+
+if "%1" == "changes" (
+	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.The overview file is in %BUILDDIR%/changes.
+	goto end
+)
+
+if "%1" == "linkcheck" (
+	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+	goto end
+)
+
+if "%1" == "doctest" (
+	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+	goto end
+)
+
+if "%1" == "xml" (
+	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The XML files are in %BUILDDIR%/xml.
+	goto end
+)
+
+if "%1" == "pseudoxml" (
+	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
+	goto end
+)
+
+:end
diff --git a/python/docs/pyspark.mllib.rst b/python/docs/pyspark.mllib.rst
new file mode 100644
index 0000000000000..4548b8739ed91
--- /dev/null
+++ b/python/docs/pyspark.mllib.rst
@@ -0,0 +1,85 @@
+pyspark.mllib package
+=====================
+
+Submodules
+----------
+
+pyspark.mllib.classification module
+-----------------------------------
+
+.. automodule:: pyspark.mllib.classification
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.clustering module
+-------------------------------
+
+.. automodule:: pyspark.mllib.clustering
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.feature module
+-------------------------------
+
+.. automodule:: pyspark.mllib.feature
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.linalg module
+---------------------------
+
+.. automodule:: pyspark.mllib.linalg
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.random module
+---------------------------
+
+.. automodule:: pyspark.mllib.random
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.recommendation module
+-----------------------------------
+
+.. automodule:: pyspark.mllib.recommendation
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.regression module
+-------------------------------
+
+.. automodule:: pyspark.mllib.regression
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.stat module
+-------------------------
+
+.. automodule:: pyspark.mllib.stat
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.tree module
+-------------------------
+
+.. automodule:: pyspark.mllib.tree
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.util module
+-------------------------
+
+.. automodule:: pyspark.mllib.util
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/python/docs/pyspark.rst b/python/docs/pyspark.rst
new file mode 100644
index 0000000000000..e81be3b6cb796
--- /dev/null
+++ b/python/docs/pyspark.rst
@@ -0,0 +1,19 @@
+pyspark package
+===============
+
+Subpackages
+-----------
+
+.. toctree::
+    :maxdepth: 1
+    
+    pyspark.sql
+    pyspark.streaming
+    pyspark.mllib
+
+Contents
+--------
+
+.. automodule:: pyspark
+    :members:
+    :undoc-members:
diff --git a/python/docs/pyspark.sql.rst b/python/docs/pyspark.sql.rst
new file mode 100644
index 0000000000000..65b3650ae10ab
--- /dev/null
+++ b/python/docs/pyspark.sql.rst
@@ -0,0 +1,10 @@
+pyspark.sql module
+==================
+
+Module contents
+---------------
+
+.. automodule:: pyspark.sql
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/python/docs/pyspark.streaming.rst b/python/docs/pyspark.streaming.rst
new file mode 100644
index 0000000000000..5024d694b668f
--- /dev/null
+++ b/python/docs/pyspark.streaming.rst
@@ -0,0 +1,10 @@
+pyspark.streaming module
+==================
+
+Module contents
+---------------
+
+.. automodule:: pyspark.streaming
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/python/epydoc.conf b/python/epydoc.conf
deleted file mode 100644
index b73860bad8263..0000000000000
--- a/python/epydoc.conf
+++ /dev/null
@@ -1,38 +0,0 @@
-[epydoc] # Epydoc section marker (required by ConfigParser)
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Information about the project.
-name: Spark 1.0.0 Python API Docs
-url: http://spark.apache.org
-
-# The list of modules to document.  Modules can be named using
-# dotted names, module filenames, or package directory names.
-# This option may be repeated.
-modules: pyspark
-
-# Write html output to the directory "apidocs"
-output: html
-target: docs/
-
-private: no
-
-exclude: pyspark.cloudpickle pyspark.worker pyspark.join
-         pyspark.java_gateway pyspark.examples pyspark.shell pyspark.tests
-         pyspark.rddsampler pyspark.daemon pyspark.mllib._common
-         pyspark.mllib.tests
diff --git a/python/lib/py4j-0.8.1-src.zip b/python/lib/py4j-0.8.1-src.zip
deleted file mode 100644
index 2069a328d1f2e..0000000000000
Binary files a/python/lib/py4j-0.8.1-src.zip and /dev/null differ
diff --git a/python/lib/py4j-0.8.2.1-src.zip b/python/lib/py4j-0.8.2.1-src.zip
new file mode 100644
index 0000000000000..5203b84d9119e
Binary files /dev/null and b/python/lib/py4j-0.8.2.1-src.zip differ
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index 07df8697bd1a8..9556e4718e585 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -20,43 +20,36 @@
 
 Public classes:
 
-  - L{SparkContext<pyspark.context.SparkContext>}
+  - :class:`SparkContext`:
       Main entry point for Spark functionality.
-  - L{RDD<pyspark.rdd.RDD>}
+  - L{RDD}
       A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
-  - L{Broadcast<pyspark.broadcast.Broadcast>}
+  - L{Broadcast}
       A broadcast variable that gets reused across tasks.
-  - L{Accumulator<pyspark.accumulators.Accumulator>}
+  - L{Accumulator}
       An "add-only" shared variable that tasks can only add values to.
-  - L{SparkConf<pyspark.conf.SparkConf>}
+  - L{SparkConf}
       For configuring Spark.
-  - L{SparkFiles<pyspark.files.SparkFiles>}
+  - L{SparkFiles}
       Access files shipped with jobs.
-  - L{StorageLevel<pyspark.storagelevel.StorageLevel>}
+  - L{StorageLevel}
       Finer-grained cache persistence levels.
 
-Spark SQL:
-  - L{SQLContext<pyspark.sql.SQLContext>}
-      Main entry point for SQL functionality.
-  - L{SchemaRDD<pyspark.sql.SchemaRDD>}
-      A Resilient Distributed Dataset (RDD) with Schema information for the data contained. In
-      addition to normal RDD operations, SchemaRDDs also support SQL.
-  - L{Row<pyspark.sql.Row>}
-      A Row of data returned by a Spark SQL query.
-
-Hive:
-  - L{HiveContext<pyspark.context.HiveContext>}
-      Main entry point for accessing data stored in Apache Hive..
 """
 
 from pyspark.conf import SparkConf
 from pyspark.context import SparkContext
-from pyspark.sql import SQLContext
 from pyspark.rdd import RDD
-from pyspark.sql import SchemaRDD
-from pyspark.sql import Row
 from pyspark.files import SparkFiles
 from pyspark.storagelevel import StorageLevel
+from pyspark.accumulators import Accumulator, AccumulatorParam
+from pyspark.broadcast import Broadcast
+from pyspark.serializers import MarshalSerializer, PickleSerializer
 
+# for back compatibility
+from pyspark.sql import SQLContext, HiveContext, SchemaRDD, Row
 
-__all__ = ["SparkConf", "SparkContext", "SQLContext", "RDD", "SchemaRDD", "SparkFiles", "StorageLevel", "Row"]
+__all__ = [
+    "SparkConf", "SparkContext", "SparkFiles", "RDD", "StorageLevel", "Broadcast",
+    "Accumulator", "AccumulatorParam", "MarshalSerializer", "PickleSerializer",
+]
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 2204e9c9ca701..b8cdbbe3cf2b6 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -86,6 +86,7 @@
 Exception:...
 """
 
+import select
 import struct
 import SocketServer
 import threading
@@ -93,6 +94,9 @@
 from pyspark.serializers import read_int, PickleSerializer
 
 
+__all__ = ['Accumulator', 'AccumulatorParam']
+
+
 pickleSer = PickleSerializer()
 
 # Holds accumulators registered on the current machine, keyed by ID. This is then used to send
@@ -109,6 +113,7 @@ def _deserialize_accumulator(aid, zero_value, accum_param):
 
 
 class Accumulator(object):
+
     """
     A shared variable that can be accumulated, i.e., has a commutative and associative "add"
     operation. Worker tasks on a Spark cluster can add values to an Accumulator with the C{+=}
@@ -165,6 +170,7 @@ def __repr__(self):
 
 
 class AccumulatorParam(object):
+
     """
     Helper object that defines how to accumulate values of a given type.
     """
@@ -185,6 +191,7 @@ def addInPlace(self, value1, value2):
 
 
 class AddingAccumulatorParam(AccumulatorParam):
+
     """
     An AccumulatorParam that uses the + operators to add values. Designed for simple types
     such as integers, floats, and lists. Requires the zero value for the underlying type
@@ -208,20 +215,58 @@ def addInPlace(self, value1, value2):
 COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j)
 
 
+class PStatsParam(AccumulatorParam):
+    """PStatsParam is used to merge pstats.Stats"""
+
+    @staticmethod
+    def zero(value):
+        return None
+
+    @staticmethod
+    def addInPlace(value1, value2):
+        if value1 is None:
+            return value2
+        value1.add(value2)
+        return value1
+
+
 class _UpdateRequestHandler(SocketServer.StreamRequestHandler):
+
+    """
+    This handler will keep polling updates from the same socket until the
+    server is shutdown.
+    """
+
     def handle(self):
         from pyspark.accumulators import _accumulatorRegistry
-        num_updates = read_int(self.rfile)
-        for _ in range(num_updates):
-            (aid, update) = pickleSer._read_with_length(self.rfile)
-            _accumulatorRegistry[aid] += update
-        # Write a byte in acknowledgement
-        self.wfile.write(struct.pack("!b", 1))
+        while not self.server.server_shutdown:
+            # Poll every 1 second for new data -- don't block in case of shutdown.
+            r, _, _ = select.select([self.rfile], [], [], 1)
+            if self.rfile in r:
+                num_updates = read_int(self.rfile)
+                for _ in range(num_updates):
+                    (aid, update) = pickleSer._read_with_length(self.rfile)
+                    _accumulatorRegistry[aid] += update
+                # Write a byte in acknowledgement
+                self.wfile.write(struct.pack("!b", 1))
+
+
+class AccumulatorServer(SocketServer.TCPServer):
+
+    """
+    A simple TCP server that intercepts shutdown() in order to interrupt
+    our continuous polling on the handler.
+    """
+    server_shutdown = False
+
+    def shutdown(self):
+        self.server_shutdown = True
+        SocketServer.TCPServer.shutdown(self)
 
 
 def _start_update_server():
     """Start a TCP server to receive accumulator updates in a daemon thread, and returns it"""
-    server = SocketServer.TCPServer(("localhost", 0), _UpdateRequestHandler)
+    server = AccumulatorServer(("localhost", 0), _UpdateRequestHandler)
     thread = threading.Thread(target=server.serve_forever)
     thread.daemon = True
     thread.start()
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index 43f40f8783bfd..f124dc6c07575 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -21,18 +21,20 @@
 >>> b = sc.broadcast([1, 2, 3, 4, 5])
 >>> b.value
 [1, 2, 3, 4, 5]
-
->>> from pyspark.broadcast import _broadcastRegistry
->>> _broadcastRegistry[b.bid] = b
->>> from cPickle import dumps, loads
->>> loads(dumps(b)).value
-[1, 2, 3, 4, 5]
-
 >>> sc.parallelize([0, 0]).flatMap(lambda x: b.value).collect()
 [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
+>>> b.unpersist()
 
 >>> large_broadcast = sc.broadcast(list(range(10000)))
 """
+import os
+
+from pyspark.serializers import CompressedSerializer, PickleSerializer
+
+
+__all__ = ['Broadcast']
+
+
 # Holds broadcasted data received from Java, keyed by its id.
 _broadcastRegistry = {}
 
@@ -45,23 +47,48 @@ def _from_id(bid):
 
 
 class Broadcast(object):
+
     """
     A broadcast variable created with
     L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}.
     Access its value through C{.value}.
     """
 
-    def __init__(self, bid, value, java_broadcast=None, pickle_registry=None):
+    def __init__(self, bid, value, java_broadcast=None,
+                 pickle_registry=None, path=None):
         """
         Should not be called directly by users -- use
         L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}
         instead.
         """
-        self.value = value
         self.bid = bid
+        if path is None:
+            self._value = value
         self._jbroadcast = java_broadcast
         self._pickle_registry = pickle_registry
+        self.path = path
+
+    @property
+    def value(self):
+        """ Return the broadcasted value
+        """
+        if not hasattr(self, "_value") and self.path is not None:
+            ser = CompressedSerializer(PickleSerializer())
+            self._value = ser.load_stream(open(self.path)).next()
+        return self._value
+
+    def unpersist(self, blocking=False):
+        """
+        Delete cached copies of this broadcast on the executors.
+        """
+        self._jbroadcast.unpersist(blocking)
+        os.unlink(self.path)
 
     def __reduce__(self):
         self._pickle_registry.add(self)
         return (_from_id, (self.bid, ))
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/python/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py
index eb5dbb8de2b39..bb0783555aa77 100644
--- a/python/pyspark/cloudpickle.py
+++ b/python/pyspark/cloudpickle.py
@@ -54,6 +54,13 @@
 import new
 import dis
 import traceback
+import platform
+
+PyImp = platform.python_implementation()
+
+
+import logging
+cloudLog = logging.getLogger("Cloud.Transport")
 
 #relevant opcodes
 STORE_GLOBAL = chr(dis.opname.index('STORE_GLOBAL'))
@@ -64,23 +71,9 @@
 HAVE_ARGUMENT = chr(dis.HAVE_ARGUMENT)
 EXTENDED_ARG = chr(dis.EXTENDED_ARG)
 
-import logging
-cloudLog = logging.getLogger("Cloud.Transport")
-
-try:
-    import ctypes
-except (MemoryError, ImportError):
-    logging.warning('Exception raised on importing ctypes. Likely python bug.. some functionality will be disabled', exc_info = True)
-    ctypes = None
-    PyObject_HEAD = None
-else:
-
-    # for reading internal structures
-    PyObject_HEAD = [
-        ('ob_refcnt', ctypes.c_size_t),
-        ('ob_type', ctypes.c_void_p),
-    ]
-
+if PyImp == "PyPy":
+    # register builtin type in `new`
+    new.method = types.MethodType
 
 try:
     from cStringIO import StringIO
@@ -225,6 +218,8 @@ def save_function(self, obj, name=None, pack=struct.pack):
 
         if themodule:
             self.modules.add(themodule)
+            if getattr(themodule, name, None) is obj:
+                return self.save_global(obj, name)
 
         if not self.savedDjangoEnv:
             #hack for django - if we detect the settings module, we transport it
@@ -243,10 +238,10 @@ def save_function(self, obj, name=None, pack=struct.pack):
         # if func is lambda, def'ed at prompt, is in main, or is nested, then
         # we'll pickle the actual function object rather than simply saving a
         # reference (as is done in default pickler), via save_function_tuple.
-        if islambda(obj) or obj.func_code.co_filename == '<stdin>' or themodule == None:
+        if islambda(obj) or obj.func_code.co_filename == '<stdin>' or themodule is None:
             #Force server to import modules that have been imported in main
             modList = None
-            if themodule == None and not self.savedForceImports:
+            if themodule is None and not self.savedForceImports:
                 mainmod = sys.modules['__main__']
                 if useForcedImports and hasattr(mainmod,'___pyc_forcedImports__'):
                     modList = list(mainmod.___pyc_forcedImports__)
@@ -306,14 +301,13 @@ def save_function_tuple(self, func, forced_imports):
 
         # create a skeleton function object and memoize it
         save(_make_skel_func)
-        save((code, len(closure), base_globals))
+        save((code, closure, base_globals))
         write(pickle.REDUCE)
         self.memoize(func)
 
         # save the rest of the func data needed by _fill_function
         save(f_globals)
         save(defaults)
-        save(closure)
         save(dct)
         write(pickle.TUPLE)
         write(pickle.REDUCE)  # applies _fill_function on the tuple
@@ -343,6 +337,12 @@ def extract_code_globals(co):
                 if op in GLOBAL_OPS:
                     out_names.add(names[oparg])
         #print 'extracted', out_names, ' from ', names
+
+        if co.co_consts:   # see if nested function have any global refs
+            for const in co.co_consts:
+                if type(const) is types.CodeType:
+                    out_names |= CloudPickler.extract_code_globals(const)
+
         return out_names
 
     def extract_func_data(self, func):
@@ -354,10 +354,7 @@ def extract_func_data(self, func):
 
         # extract all global ref's
         func_global_refs = CloudPickler.extract_code_globals(code)
-        if code.co_consts:   # see if nested function have any global refs
-            for const in code.co_consts:
-                if type(const) is types.CodeType and const.co_names:
-                    func_global_refs = func_global_refs.union( CloudPickler.extract_code_globals(const))
+
         # process all variables referenced by global environment
         f_globals = {}
         for var in func_global_refs:
@@ -396,6 +393,12 @@ def get_contents(cell):
 
         return (code, f_globals, defaults, closure, dct, base_globals)
 
+    def save_builtin_function(self, obj):
+        if obj.__module__ is "__builtin__":
+            return self.save_global(obj)
+        return self.save_function(obj)
+    dispatch[types.BuiltinFunctionType] = save_builtin_function
+
     def save_global(self, obj, name=None, pack=struct.pack):
         write = self.write
         memo = self.memo
@@ -435,7 +438,7 @@ def save_global(self, obj, name=None, pack=struct.pack):
                     try:
                         klass = getattr(themodule, name)
                     except AttributeError, a:
-                        #print themodule, name, obj, type(obj)
+                        # print themodule, name, obj, type(obj)
                         raise pickle.PicklingError("Can't pickle builtin %s" % obj)
                 else:
                     raise
@@ -480,7 +483,6 @@ def save_global(self, obj, name=None, pack=struct.pack):
         write(pickle.GLOBAL + modname + '\n' + name + '\n')
         self.memoize(obj)
     dispatch[types.ClassType] = save_global
-    dispatch[types.BuiltinFunctionType] = save_global
     dispatch[types.TypeType] = save_global
 
     def save_instancemethod(self, obj):
@@ -551,22 +553,39 @@ def save_property(self, obj):
     dispatch[property] = save_property
 
     def save_itemgetter(self, obj):
-        """itemgetter serializer (needed for namedtuple support)
-        a bit of a pain as we need to read ctypes internals"""
-        class ItemGetterType(ctypes.Structure):
-            _fields_ = PyObject_HEAD + [
-                ('nitems', ctypes.c_size_t),
-                ('item', ctypes.py_object)
-            ]
-
-
-        itemgetter_obj = ctypes.cast(ctypes.c_void_p(id(obj)), ctypes.POINTER(ItemGetterType)).contents
-        return self.save_reduce(operator.itemgetter, (itemgetter_obj.item,))
-
-    if PyObject_HEAD:
+        """itemgetter serializer (needed for namedtuple support)"""
+        class Dummy:
+            def __getitem__(self, item):
+                return item
+        items = obj(Dummy())
+        if not isinstance(items, tuple):
+            items = (items, )
+        return self.save_reduce(operator.itemgetter, items)
+
+    if type(operator.itemgetter) is type:
         dispatch[operator.itemgetter] = save_itemgetter
 
+    def save_attrgetter(self, obj):
+        """attrgetter serializer"""
+        class Dummy(object):
+            def __init__(self, attrs, index=None):
+                self.attrs = attrs
+                self.index = index
+            def __getattribute__(self, item):
+                attrs = object.__getattribute__(self, "attrs")
+                index = object.__getattribute__(self, "index")
+                if index is None:
+                    index = len(attrs)
+                    attrs.append(item)
+                else:
+                    attrs[index] = ".".join([attrs[index], item])
+                return type(self)(attrs, index)
+        attrs = []
+        obj(Dummy(attrs))
+        return self.save_reduce(operator.attrgetter, tuple(attrs))
 
+    if type(operator.attrgetter) is type:
+        dispatch[operator.attrgetter] = save_attrgetter
 
     def save_reduce(self, func, args, state=None,
                     listitems=None, dictitems=None, obj=None):
@@ -656,15 +675,14 @@ def save_partial(self, obj):
     def save_file(self, obj):
         """Save a file"""
         import StringIO as pystringIO #we can't use cStringIO as it lacks the name attribute
-        from ..transport.adapter import SerializingAdapter
 
         if not hasattr(obj, 'name') or  not hasattr(obj, 'mode'):
             raise pickle.PicklingError("Cannot pickle files that do not map to an actual file")
-        if obj.name == '<stdout>':
+        if obj is sys.stdout:
             return self.save_reduce(getattr, (sys,'stdout'), obj=obj)
-        if obj.name == '<stderr>':
+        if obj is sys.stderr:
             return self.save_reduce(getattr, (sys,'stderr'), obj=obj)
-        if obj.name == '<stdin>':
+        if obj is sys.stdin:
             raise pickle.PicklingError("Cannot pickle standard input")
         if  hasattr(obj, 'isatty') and obj.isatty():
             raise pickle.PicklingError("Cannot pickle files that map to tty objects")
@@ -690,13 +708,10 @@ def save_file(self, obj):
             tmpfile.close()
             if tst != '':
                 raise pickle.PicklingError("Cannot pickle file %s as it does not appear to map to a physical, real file" % name)
-        elif fsize > SerializingAdapter.max_transmit_data:
-            raise pickle.PicklingError("Cannot pickle file %s as it exceeds cloudconf.py's max_transmit_data of %d" %
-                                       (name,SerializingAdapter.max_transmit_data))
         else:
             try:
                 tmpfile = file(name)
-                contents = tmpfile.read(SerializingAdapter.max_transmit_data)
+                contents = tmpfile.read()
                 tmpfile.close()
             except IOError:
                 raise pickle.PicklingError("Cannot pickle file %s as it cannot be read" % name)
@@ -876,8 +891,7 @@ def _genpartial(func, args, kwds):
         kwds = {}
     return partial(func, *args, **kwds)
 
-
-def _fill_function(func, globals, defaults, closure, dict):
+def _fill_function(func, globals, defaults, dict):
     """ Fills in the rest of function data into the skeleton function object
         that were created via _make_skel_func().
          """
@@ -885,49 +899,28 @@ def _fill_function(func, globals, defaults, closure, dict):
     func.func_defaults = defaults
     func.func_dict = dict
 
-    if len(closure) != len(func.func_closure):
-        raise pickle.UnpicklingError("closure lengths don't match up")
-    for i in range(len(closure)):
-        _change_cell_value(func.func_closure[i], closure[i])
-
     return func
 
-def _make_skel_func(code, num_closures, base_globals = None):
+def _make_cell(value):
+    return (lambda: value).func_closure[0]
+
+def _reconstruct_closure(values):
+    return tuple([_make_cell(v) for v in values])
+
+def _make_skel_func(code, closures, base_globals = None):
     """ Creates a skeleton function object that contains just the provided
         code and the correct number of cells in func_closure.  All other
         func attributes (e.g. func_globals) are empty.
     """
-    #build closure (cells):
-    if not ctypes:
-        raise Exception('ctypes failed to import; cannot build function')
-
-    cellnew = ctypes.pythonapi.PyCell_New
-    cellnew.restype = ctypes.py_object
-    cellnew.argtypes = (ctypes.py_object,)
-    dummy_closure = tuple(map(lambda i: cellnew(None), range(num_closures)))
+    closure = _reconstruct_closure(closures) if closures else None
 
     if base_globals is None:
         base_globals = {}
     base_globals['__builtins__'] = __builtins__
 
     return types.FunctionType(code, base_globals,
-                              None, None, dummy_closure)
-
-# this piece of opaque code is needed below to modify 'cell' contents
-cell_changer_code = new.code(
-    1, 1, 2, 0,
-    ''.join([
-        chr(dis.opmap['LOAD_FAST']), '\x00\x00',
-        chr(dis.opmap['DUP_TOP']),
-        chr(dis.opmap['STORE_DEREF']), '\x00\x00',
-        chr(dis.opmap['RETURN_VALUE'])
-    ]),
-    (), (), ('newval',), '<nowhere>', 'cell_changer', 1, '', ('c',), ()
-)
-
-def _change_cell_value(cell, newval):
-    """ Changes the contents of 'cell' object to newval """
-    return new.function(cell_changer_code, {}, None, (), (cell,))(newval)
+                              None, None, closure)
+
 
 """Constructors for 3rd party libraries
 Note: These can never be renamed due to client compatibility issues"""
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index 8eff4a242a529..dc7cd0bce56f3 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -30,7 +30,7 @@
 u'local'
 >>> sc.appName
 u'My app'
->>> sc.sparkHome == None
+>>> sc.sparkHome is None
 True
 
 >>> conf = SparkConf(loadDefaults=False)
@@ -50,11 +50,15 @@
 spark.executorEnv.VAR4=value4
 spark.home=/path
 >>> sorted(conf.getAll(), key=lambda p: p[0])
-[(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), (u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')]
+[(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), \
+(u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')]
 """
 
+__all__ = ['SparkConf']
+
 
 class SparkConf(object):
+
     """
     Configuration for a Spark application. Used to set various Spark
     parameters as key-value pairs.
@@ -79,11 +83,11 @@ def __init__(self, loadDefaults=True, _jvm=None, _jconf=None):
         """
         Create a new Spark configuration.
 
-        @param loadDefaults: whether to load values from Java system
+        :param loadDefaults: whether to load values from Java system
                properties (True by default)
-        @param _jvm: internal parameter used to pass a handle to the
+        :param _jvm: internal parameter used to pass a handle to the
                Java VM; does not need to be set by users
-        @param _jconf: Optionally pass in an existing SparkConf handle
+        :param _jconf: Optionally pass in an existing SparkConf handle
                to use its parameters
         """
         if _jconf:
@@ -99,6 +103,12 @@ def set(self, key, value):
         self._jconf.set(key, unicode(value))
         return self
 
+    def setIfMissing(self, key, value):
+        """Set a configuration property, if not already set."""
+        if self.get(key) is None:
+            self.set(key, value)
+        return self
+
     def setMaster(self, value):
         """Set master URL to connect to."""
         self._jconf.setMaster(value)
@@ -116,11 +126,11 @@ def setSparkHome(self, value):
 
     def setExecutorEnv(self, key=None, value=None, pairs=None):
         """Set an environment variable to be passed to executors."""
-        if (key != None and pairs != None) or (key == None and pairs == None):
+        if (key is not None and pairs is not None) or (key is None and pairs is None):
             raise Exception("Either pass one key-value pair or a list of pairs")
-        elif key != None:
+        elif key is not None:
             self._jconf.setExecutorEnv(key, value)
-        elif pairs != None:
+        elif pairs is not None:
             for (k, v) in pairs:
                 self._jconf.setExecutorEnv(k, v)
         return self
@@ -129,7 +139,7 @@ def setAll(self, pairs):
         """
         Set multiple parameters, passed as a list of key-value pairs.
 
-        @param pairs: list of key-value pairs to set
+        :param pairs: list of key-value pairs to set
         """
         for (k, v) in pairs:
             self._jconf.set(k, v)
@@ -137,7 +147,7 @@ def setAll(self, pairs):
 
     def get(self, key, defaultValue=None):
         """Get the configured value for some key, or return a default otherwise."""
-        if defaultValue == None:   # Py4J doesn't call the right get() if we pass None
+        if defaultValue is None:   # Py4J doesn't call the right get() if we pass None
             if not self._jconf.contains(key):
                 return None
             return self._jconf.get(key)
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 95c54e7a5ad63..b6c991453d4de 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -20,7 +20,7 @@
 import sys
 from threading import Lock
 from tempfile import NamedTemporaryFile
-from collections import namedtuple
+import atexit
 
 from pyspark import accumulators
 from pyspark.accumulators import Accumulator
@@ -29,18 +29,30 @@
 from pyspark.files import SparkFiles
 from pyspark.java_gateway import launch_gateway
 from pyspark.serializers import PickleSerializer, BatchedSerializer, UTF8Deserializer, \
-        PairDeserializer
+    PairDeserializer, CompressedSerializer, AutoBatchedSerializer, NoOpSerializer
 from pyspark.storagelevel import StorageLevel
-from pyspark import rdd
 from pyspark.rdd import RDD
+from pyspark.traceback_utils import CallSite, first_spark_call
 
 from py4j.java_collections import ListConverter
 
 
+__all__ = ['SparkContext']
+
+
+# These are special default configs for PySpark, they will overwrite
+# the default ones for Spark if they are not configured by user.
+DEFAULT_CONFIGS = {
+    "spark.serializer.objectStreamReset": 100,
+    "spark.rdd.compress": True,
+}
+
+
 class SparkContext(object):
+
     """
     Main entry point for Spark functionality. A SparkContext represents the
-    connection to a Spark cluster, and can be used to create L{RDD}s and
+    connection to a Spark cluster, and can be used to create L{RDD} and
     broadcast variables on that cluster.
     """
 
@@ -50,32 +62,32 @@ class SparkContext(object):
     _next_accum_id = 0
     _active_spark_context = None
     _lock = Lock()
-    _python_includes = None # zip and egg files that need to be added to PYTHONPATH
-
+    _python_includes = None  # zip and egg files that need to be added to PYTHONPATH
 
     def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
-        environment=None, batchSize=1024, serializer=PickleSerializer(), conf=None,
-        gateway=None):
+                 environment=None, batchSize=0, serializer=PickleSerializer(), conf=None,
+                 gateway=None, jsc=None):
         """
         Create a new SparkContext. At least the master and app name should be set,
         either through the named parameters here or through C{conf}.
 
-        @param master: Cluster URL to connect to
+        :param master: Cluster URL to connect to
                (e.g. mesos://host:port, spark://host:port, local[4]).
-        @param appName: A name for your job, to display on the cluster web UI.
-        @param sparkHome: Location where Spark is installed on cluster nodes.
-        @param pyFiles: Collection of .zip or .py files to send to the cluster
+        :param appName: A name for your job, to display on the cluster web UI.
+        :param sparkHome: Location where Spark is installed on cluster nodes.
+        :param pyFiles: Collection of .zip or .py files to send to the cluster
                and add to PYTHONPATH.  These can be paths on the local file
                system or HDFS, HTTP, HTTPS, or FTP URLs.
-        @param environment: A dictionary of environment variables to set on
+        :param environment: A dictionary of environment variables to set on
                worker nodes.
-        @param batchSize: The number of Python objects represented as a single
-               Java object.  Set 1 to disable batching or -1 to use an
-               unlimited batch size.
-        @param serializer: The serializer for RDDs.
-        @param conf: A L{SparkConf} object setting Spark properties.
-        @param gateway: Use an existing gateway and JVM, otherwise a new JVM
-               will be instatiated.
+        :param batchSize: The number of Python objects represented as a single
+               Java object. Set 1 to disable batching, 0 to automatically choose
+               the batch size based on object sizes, or -1 to use an unlimited
+               batch size
+        :param serializer: The serializer for RDDs.
+        :param conf: A L{SparkConf} object setting Spark properties.
+        :param gateway: Use an existing gateway and JVM, otherwise a new JVM
+               will be instantiated.
 
 
         >>> from pyspark.context import SparkContext
@@ -86,19 +98,24 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
             ...
         ValueError:...
         """
-        if rdd._extract_concise_traceback() is not None:
-            self._callsite = rdd._extract_concise_traceback()
-        else:
-            tempNamedTuple = namedtuple("Callsite", "function file linenum")
-            self._callsite = tempNamedTuple(function=None, file=None, linenum=None)
+        self._callsite = first_spark_call() or CallSite(None, None, None)
         SparkContext._ensure_initialized(self, gateway=gateway)
-
+        try:
+            self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
+                          conf, jsc)
+        except:
+            # If an error occurs, clean up in order to allow future SparkContext creation:
+            self.stop()
+            raise
+
+    def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
+                 conf, jsc):
         self.environment = environment or {}
         self._conf = conf or SparkConf(_jvm=self._jvm)
         self._batchSize = batchSize  # -1 represents an unlimited batch size
         self._unbatched_serializer = serializer
-        if batchSize == 1:
-            self.serializer = self._unbatched_serializer
+        if batchSize == 0:
+            self.serializer = AutoBatchedSerializer(self._unbatched_serializer)
         else:
             self.serializer = BatchedSerializer(self._unbatched_serializer,
                                                 batchSize)
@@ -113,6 +130,8 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
         if environment:
             for key, value in environment.iteritems():
                 self._conf.setExecutorEnv(key, value)
+        for key, value in DEFAULT_CONFIGS.items():
+            self._conf.setIfMissing(key, value)
 
         # Check that we have at least the required parameters
         if not self._conf.contains("spark.master"):
@@ -131,15 +150,15 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
                 self.environment[varName] = v
 
         # Create the Java SparkContext through Py4J
-        self._jsc = self._initialize_context(self._conf._jconf)
+        self._jsc = jsc or self._initialize_context(self._conf._jconf)
 
         # Create a single Accumulator in Java that we'll send all our updates through;
         # they will be passed back to us through a TCP server
         self._accumulatorServer = accumulators._start_update_server()
         (host, port) = self._accumulatorServer.server_address
         self._javaAccumulator = self._jsc.accumulator(
-                self._jvm.java.util.ArrayList(),
-                self._jvm.PythonAccumulatorParam(host, port))
+            self._jvm.java.util.ArrayList(),
+            self._jvm.PythonAccumulatorParam(host, port))
 
         self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
 
@@ -151,7 +170,7 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
 
         SparkFiles._sc = self
         root_dir = SparkFiles.getRootDirectory()
-        sys.path.append(root_dir)
+        sys.path.insert(1, root_dir)
 
         # Deploy any code dependencies specified in the constructor
         self._python_includes = list()
@@ -163,16 +182,18 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
         for path in self._conf.get("spark.submit.pyFiles", "").split(","):
             if path != "":
                 (dirname, filename) = os.path.split(path)
-                self._python_includes.append(filename)
-                sys.path.append(path)
-                if not dirname in sys.path:
-                    sys.path.append(dirname)
+                if filename.lower().endswith("zip") or filename.lower().endswith("egg"):
+                    self._python_includes.append(filename)
+                    sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename))
 
         # Create a temporary directory inside spark.local.dir:
         local_dir = self._jvm.org.apache.spark.util.Utils.getLocalDir(self._jsc.sc().conf())
         self._temp_dir = \
             self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath()
 
+        # profiling stats collected for each PythonRDD
+        self._profile_stats = []
+
     def _initialize_context(self, jconf):
         """
         Initialize SparkContext in function to allow subclass specific initialization
@@ -192,18 +213,36 @@ def _ensure_initialized(cls, instance=None, gateway=None):
                 SparkContext._writeToFile = SparkContext._jvm.PythonRDD.writeToFile
 
             if instance:
-                if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
+                if (SparkContext._active_spark_context and
+                        SparkContext._active_spark_context != instance):
                     currentMaster = SparkContext._active_spark_context.master
                     currentAppName = SparkContext._active_spark_context.appName
                     callsite = SparkContext._active_spark_context._callsite
 
                     # Raise error if there is already a running Spark context
-                    raise ValueError("Cannot run multiple SparkContexts at once; existing SparkContext(app=%s, master=%s)" \
-                        " created by %s at %s:%s " \
-                        % (currentAppName, currentMaster, callsite.function, callsite.file, callsite.linenum))
+                    raise ValueError(
+                        "Cannot run multiple SparkContexts at once; "
+                        "existing SparkContext(app=%s, master=%s)"
+                        " created by %s at %s:%s "
+                        % (currentAppName, currentMaster,
+                            callsite.function, callsite.file, callsite.linenum))
                 else:
                     SparkContext._active_spark_context = instance
 
+    def __enter__(self):
+        """
+        Enable 'with SparkContext(...) as sc: app(sc)' syntax.
+        """
+        return self
+
+    def __exit__(self, type, value, trace):
+        """
+        Enable 'with SparkContext(...) as sc: app' syntax.
+
+        Specifically stop the context on exit of the with block.
+        """
+        self.stop()
+
     @classmethod
     def setSystemProperty(cls, key, value):
         """
@@ -213,6 +252,13 @@ def setSystemProperty(cls, key, value):
         SparkContext._ensure_initialized()
         SparkContext._jvm.java.lang.System.setProperty(key, value)
 
+    @property
+    def version(self):
+        """
+        The version of Spark on which this application is running.
+        """
+        return self._jsc.version()
+
     @property
     def defaultParallelism(self):
         """
@@ -228,17 +274,14 @@ def defaultMinPartitions(self):
         """
         return self._jsc.sc().defaultMinPartitions()
 
-    def __del__(self):
-        self.stop()
-
     def stop(self):
         """
         Shut down the SparkContext.
         """
-        if self._jsc:
+        if getattr(self, "_jsc", None):
             self._jsc.stop()
             self._jsc = None
-        if self._accumulatorServer:
+        if getattr(self, "_accumulatorServer", None):
             self._accumulatorServer.shutdown()
             self._accumulatorServer = None
         with SparkContext._lock:
@@ -246,12 +289,29 @@ def stop(self):
 
     def parallelize(self, c, numSlices=None):
         """
-        Distribute a local Python collection to form an RDD.
+        Distribute a local Python collection to form an RDD. Using xrange
+        is recommended if the input represents a range for performance.
 
-        >>> sc.parallelize(range(5), 5).glom().collect()
-        [[0], [1], [2], [3], [4]]
+        >>> sc.parallelize([0, 2, 3, 4, 6], 5).glom().collect()
+        [[0], [2], [3], [4], [6]]
+        >>> sc.parallelize(xrange(0, 6, 2), 5).glom().collect()
+        [[], [0], [], [2], [4]]
         """
-        numSlices = numSlices or self.defaultParallelism
+        numSlices = int(numSlices) if numSlices is not None else self.defaultParallelism
+        if isinstance(c, xrange):
+            size = len(c)
+            if size == 0:
+                return self.parallelize([], numSlices)
+            step = c[1] - c[0] if size > 1 else 1
+            start0 = c[0]
+
+            def getStart(split):
+                return start0 + (split * size / numSlices) * step
+
+            def f(split, iterator):
+                return xrange(getStart(split), getStart(split + 1), step)
+
+            return self.parallelize([], numSlices).mapPartitionsWithIndex(f)
         # Calling the Java parallelize() method with an ArrayList is too slow,
         # because it sends O(n) Py4J commands.  As an alternative, serialized
         # objects are written to a file and loaded through textFile().
@@ -259,12 +319,8 @@ def parallelize(self, c, numSlices=None):
         # Make sure we distribute data evenly if it's smaller than self.batchSize
         if "__len__" not in dir(c):
             c = list(c)    # Make it a list so we can compute its length
-        batchSize = min(len(c) // numSlices, self._batchSize)
-        if batchSize > 1:
-            serializer = BatchedSerializer(self._unbatched_serializer,
-                                           batchSize)
-        else:
-            serializer = self._unbatched_serializer
+        batchSize = max(1, min(len(c) // numSlices, self._batchSize))
+        serializer = BatchedSerializer(self._unbatched_serializer, batchSize)
         serializer.dump_stream(c, tempFile)
         tempFile.close()
         readRDDFromFile = self._jvm.PythonRDD.readRDDFromFile
@@ -282,15 +338,18 @@ def pickleFile(self, name, minPartitions=None):
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
         """
         minPartitions = minPartitions or self.defaultMinPartitions
-        return RDD(self._jsc.objectFile(name, minPartitions), self,
-                   BatchedSerializer(PickleSerializer()))
+        return RDD(self._jsc.objectFile(name, minPartitions), self)
 
-    def textFile(self, name, minPartitions=None):
+    def textFile(self, name, minPartitions=None, use_unicode=True):
         """
         Read a text file from HDFS, a local file system (available on all
         nodes), or any Hadoop-supported file system URI, and return it as an
         RDD of Strings.
-        
+
+        If use_unicode is False, the strings will be kept as `str` (encoding
+        as `utf-8`), which is faster and smaller than unicode. (Added in
+        Spark 1.2)
+
         >>> path = os.path.join(tempdir, "sample-text.txt")
         >>> with open(path, "w") as testFile:
         ...    testFile.write("Hello world!")
@@ -300,9 +359,9 @@ def textFile(self, name, minPartitions=None):
         """
         minPartitions = minPartitions or min(self.defaultParallelism, 2)
         return RDD(self._jsc.textFile(name, minPartitions), self,
-                   UTF8Deserializer())
+                   UTF8Deserializer(use_unicode))
 
-    def wholeTextFiles(self, path, minPartitions=None):
+    def wholeTextFiles(self, path, minPartitions=None, use_unicode=True):
         """
         Read a directory of text files from HDFS, a local file system
         (available on all nodes), or any  Hadoop-supported file system
@@ -310,6 +369,10 @@ def wholeTextFiles(self, path, minPartitions=None):
         key-value pair, where the key is the path of each file, the
         value is the content of each file.
 
+        If use_unicode is False, the strings will be kept as `str` (encoding
+        as `utf-8`), which is faster and smaller than unicode. (Added in
+        Spark 1.2)
+
         For example, if you have the following files::
 
           hdfs://a-hdfs-path/part-00000
@@ -340,7 +403,37 @@ def wholeTextFiles(self, path, minPartitions=None):
         """
         minPartitions = minPartitions or self.defaultMinPartitions
         return RDD(self._jsc.wholeTextFiles(path, minPartitions), self,
-                   PairDeserializer(UTF8Deserializer(), UTF8Deserializer()))
+                   PairDeserializer(UTF8Deserializer(use_unicode), UTF8Deserializer(use_unicode)))
+
+    def binaryFiles(self, path, minPartitions=None):
+        """
+        :: Experimental ::
+
+        Read a directory of binary files from HDFS, a local file system
+        (available on all nodes), or any Hadoop-supported file system URI
+        as a byte array. Each file is read as a single record and returned
+        in a key-value pair, where the key is the path of each file, the
+        value is the content of each file.
+
+        Note: Small files are preferred, large file is also allowable, but
+        may cause bad performance.
+        """
+        minPartitions = minPartitions or self.defaultMinPartitions
+        return RDD(self._jsc.binaryFiles(path, minPartitions), self,
+                   PairDeserializer(UTF8Deserializer(), NoOpSerializer()))
+
+    def binaryRecords(self, path, recordLength):
+        """
+        :: Experimental ::
+
+        Load data from a flat binary file, assuming each record is a set of numbers
+        with the specified numerical format (see ByteBuffer), and the number of
+        bytes per record is constant.
+
+        :param path: Directory to the input data files
+        :param recordLength: The length at which to split the records
+        """
+        return RDD(self._jsc.binaryRecords(path, recordLength), self, NoOpSerializer())
 
     def _dictToJavaMap(self, d):
         jm = self._jvm.java.util.HashMap()
@@ -351,34 +444,37 @@ def _dictToJavaMap(self, d):
         return jm
 
     def sequenceFile(self, path, keyClass=None, valueClass=None, keyConverter=None,
-                     valueConverter=None, minSplits=None):
+                     valueConverter=None, minSplits=None, batchSize=0):
         """
         Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
         The mechanism is as follows:
+
             1. A Java RDD is created from the SequenceFile or other InputFormat, and the key
                and value Writable classes
             2. Serialization is attempted via Pyrolite pickling
             3. If this fails, the fallback is to call 'toString' on each key and value
             4. C{PickleSerializer} is used to deserialize pickled objects on the Python side
 
-        @param path: path to sequncefile
-        @param keyClass: fully qualified classname of key Writable class
+        :param path: path to sequncefile
+        :param keyClass: fully qualified classname of key Writable class
                (e.g. "org.apache.hadoop.io.Text")
-        @param valueClass: fully qualified classname of value Writable class
+        :param valueClass: fully qualified classname of value Writable class
                (e.g. "org.apache.hadoop.io.LongWritable")
-        @param keyConverter:
-        @param valueConverter:
-        @param minSplits: minimum splits in dataset
+        :param keyConverter:
+        :param valueConverter:
+        :param minSplits: minimum splits in dataset
                (default min(2, sc.defaultParallelism))
+        :param batchSize: The number of Python objects represented as a single
+               Java object. (default 0, choose batchSize automatically)
         """
         minSplits = minSplits or min(self.defaultParallelism, 2)
         jrdd = self._jvm.PythonRDD.sequenceFile(self._jsc, path, keyClass, valueClass,
-                                                keyConverter, valueConverter, minSplits)
-        return RDD(jrdd, self, PickleSerializer())
+                                                keyConverter, valueConverter, minSplits, batchSize)
+        return RDD(jrdd, self)
 
     def newAPIHadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConverter=None,
-                         valueConverter=None, conf=None):
+                         valueConverter=None, conf=None, batchSize=0):
         """
         Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
@@ -387,49 +483,55 @@ def newAPIHadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConv
         A Hadoop configuration can be passed in as a Python dict. This will be converted into a
         Configuration in Java
 
-        @param path: path to Hadoop file
-        @param inputFormatClass: fully qualified classname of Hadoop InputFormat
+        :param path: path to Hadoop file
+        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
                (e.g. "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
-        @param keyClass: fully qualified classname of key Writable class
+        :param keyClass: fully qualified classname of key Writable class
                (e.g. "org.apache.hadoop.io.Text")
-        @param valueClass: fully qualified classname of value Writable class
+        :param valueClass: fully qualified classname of value Writable class
                (e.g. "org.apache.hadoop.io.LongWritable")
-        @param keyConverter: (None by default)
-        @param valueConverter: (None by default)
-        @param conf: Hadoop configuration, passed in as a dict
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: Hadoop configuration, passed in as a dict
                (None by default)
+        :param batchSize: The number of Python objects represented as a single
+               Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
         jrdd = self._jvm.PythonRDD.newAPIHadoopFile(self._jsc, path, inputFormatClass, keyClass,
-                                                    valueClass, keyConverter, valueConverter, jconf)
-        return RDD(jrdd, self, PickleSerializer())
+                                                    valueClass, keyConverter, valueConverter,
+                                                    jconf, batchSize)
+        return RDD(jrdd, self)
 
     def newAPIHadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=None,
-                        valueConverter=None, conf=None):
+                        valueConverter=None, conf=None, batchSize=0):
         """
         Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
         This will be converted into a Configuration in Java.
         The mechanism is the same as for sc.sequenceFile.
 
-        @param inputFormatClass: fully qualified classname of Hadoop InputFormat
+        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
                (e.g. "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
-        @param keyClass: fully qualified classname of key Writable class
+        :param keyClass: fully qualified classname of key Writable class
                (e.g. "org.apache.hadoop.io.Text")
-        @param valueClass: fully qualified classname of value Writable class
+        :param valueClass: fully qualified classname of value Writable class
                (e.g. "org.apache.hadoop.io.LongWritable")
-        @param keyConverter: (None by default)
-        @param valueConverter: (None by default)
-        @param conf: Hadoop configuration, passed in as a dict
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: Hadoop configuration, passed in as a dict
                (None by default)
+        :param batchSize: The number of Python objects represented as a single
+               Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
         jrdd = self._jvm.PythonRDD.newAPIHadoopRDD(self._jsc, inputFormatClass, keyClass,
-                                                   valueClass, keyConverter, valueConverter, jconf)
-        return RDD(jrdd, self, PickleSerializer())
+                                                   valueClass, keyConverter, valueConverter,
+                                                   jconf, batchSize)
+        return RDD(jrdd, self)
 
     def hadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConverter=None,
-                   valueConverter=None, conf=None):
+                   valueConverter=None, conf=None, batchSize=0):
         """
         Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
@@ -438,46 +540,52 @@ def hadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConverter=
         A Hadoop configuration can be passed in as a Python dict. This will be converted into a
         Configuration in Java.
 
-        @param path: path to Hadoop file
-        @param inputFormatClass: fully qualified classname of Hadoop InputFormat
+        :param path: path to Hadoop file
+        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
                (e.g. "org.apache.hadoop.mapred.TextInputFormat")
-        @param keyClass: fully qualified classname of key Writable class
+        :param keyClass: fully qualified classname of key Writable class
                (e.g. "org.apache.hadoop.io.Text")
-        @param valueClass: fully qualified classname of value Writable class
+        :param valueClass: fully qualified classname of value Writable class
                (e.g. "org.apache.hadoop.io.LongWritable")
-        @param keyConverter: (None by default)
-        @param valueConverter: (None by default)
-        @param conf: Hadoop configuration, passed in as a dict
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: Hadoop configuration, passed in as a dict
                (None by default)
+        :param batchSize: The number of Python objects represented as a single
+               Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
         jrdd = self._jvm.PythonRDD.hadoopFile(self._jsc, path, inputFormatClass, keyClass,
-                                              valueClass, keyConverter, valueConverter, jconf)
-        return RDD(jrdd, self, PickleSerializer())
+                                              valueClass, keyConverter, valueConverter,
+                                              jconf, batchSize)
+        return RDD(jrdd, self)
 
     def hadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=None,
-                  valueConverter=None, conf=None):
+                  valueConverter=None, conf=None, batchSize=0):
         """
         Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
         This will be converted into a Configuration in Java.
         The mechanism is the same as for sc.sequenceFile.
 
-        @param inputFormatClass: fully qualified classname of Hadoop InputFormat
+        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
                (e.g. "org.apache.hadoop.mapred.TextInputFormat")
-        @param keyClass: fully qualified classname of key Writable class
+        :param keyClass: fully qualified classname of key Writable class
                (e.g. "org.apache.hadoop.io.Text")
-        @param valueClass: fully qualified classname of value Writable class
+        :param valueClass: fully qualified classname of value Writable class
                (e.g. "org.apache.hadoop.io.LongWritable")
-        @param keyConverter: (None by default)
-        @param valueConverter: (None by default)
-        @param conf: Hadoop configuration, passed in as a dict
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: Hadoop configuration, passed in as a dict
                (None by default)
+        :param batchSize: The number of Python objects represented as a single
+               Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
-        jrdd = self._jvm.PythonRDD.hadoopRDD(self._jsc, inputFormatClass, keyClass, valueClass,
-                                             keyConverter, valueConverter, jconf)
-        return RDD(jrdd, self, PickleSerializer())
+        jrdd = self._jvm.PythonRDD.hadoopRDD(self._jsc, inputFormatClass, keyClass,
+                                             valueClass, keyConverter, valueConverter,
+                                             jconf, batchSize)
+        return RDD(jrdd, self)
 
     def _checkpointFile(self, name, input_deserializer):
         jrdd = self._jsc.checkpointFile(name)
@@ -507,21 +615,23 @@ def union(self, rdds):
         first = rdds[0]._jrdd
         rest = [x._jrdd for x in rdds[1:]]
         rest = ListConverter().convert(rest, self._gateway._gateway_client)
-        return RDD(self._jsc.union(first, rest), self,
-                   rdds[0]._jrdd_deserializer)
+        return RDD(self._jsc.union(first, rest), self, rdds[0]._jrdd_deserializer)
 
     def broadcast(self, value):
         """
         Broadcast a read-only variable to the cluster, returning a
         L{Broadcast<pyspark.broadcast.Broadcast>}
-        object for reading it in distributed functions. The variable will be
-        sent to each cluster only once.
+        object for reading it in distributed functions. The variable will
+        be sent to each cluster only once.
         """
-        pickleSer = PickleSerializer()
-        pickled = pickleSer.dumps(value)
-        jbroadcast = self._jsc.broadcast(bytearray(pickled))
-        return Broadcast(jbroadcast.id(), value, jbroadcast,
-                         self._pickled_broadcast_vars)
+        ser = CompressedSerializer(PickleSerializer())
+        # pass large object by py4j is very slow and need much memory
+        tempFile = NamedTemporaryFile(delete=False, dir=self._temp_dir)
+        ser.dump_stream([value], tempFile)
+        tempFile.close()
+        jbroadcast = self._jvm.PythonRDD.readBroadcastFromFile(self._jsc, tempFile.name)
+        return Broadcast(jbroadcast.id(), None, jbroadcast,
+                         self._pickled_broadcast_vars, tempFile.name)
 
     def accumulator(self, value, accum_param=None):
         """
@@ -551,8 +661,8 @@ def addFile(self, path):
         FTP URI.
 
         To access the file in Spark jobs, use
-        L{SparkFiles.get(path)<pyspark.files.SparkFiles.get>} to find its
-        download location.
+        L{SparkFiles.get(fileName)<pyspark.files.SparkFiles.get>} with the
+        filename to find its download location.
 
         >>> from pyspark import SparkFiles
         >>> path = os.path.join(tempdir, "test.txt")
@@ -562,7 +672,7 @@ def addFile(self, path):
         >>> def func(iterator):
         ...    with open(SparkFiles.get("test.txt")) as testFile:
         ...        fileVal = int(testFile.readline())
-        ...        return [x * 100 for x in iterator]
+        ...        return [x * fileVal for x in iterator]
         >>> sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()
         [100, 200, 300, 400]
         """
@@ -584,11 +694,12 @@ def addPyFile(self, path):
         HTTP, HTTPS or FTP URI.
         """
         self.addFile(path)
-        (dirname, filename) = os.path.split(path) # dirname may be directory or HDFS/S3 prefix
+        (dirname, filename) = os.path.split(path)  # dirname may be directory or HDFS/S3 prefix
 
         if filename.endswith('.zip') or filename.endswith('.ZIP') or filename.endswith('.egg'):
             self._python_includes.append(filename)
-            sys.path.append(os.path.join(SparkFiles.getRootDirectory(), filename)) # for tests in local mode
+            # for tests in local mode
+            sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename))
 
     def setCheckpointDir(self, dirName):
         """
@@ -649,9 +760,9 @@ def setJobGroup(self, groupId, description, interruptOnCancel=False):
         Cancelled
 
         If interruptOnCancel is set to true for the job group, then job cancellation will result
-        in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure
-        that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208,
-        where HDFS may respond to Thread.interrupt() by marking nodes as dead.
+        in Thread.interrupt() being called on the job's executor threads. This is useful to help
+        ensure that the tasks are actually stopped in a timely manner, but is off by default due
+        to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.
         """
         self._jsc.setJobGroup(groupId, description, interruptOnCancel)
 
@@ -688,7 +799,7 @@ def cancelAllJobs(self):
         """
         self._jsc.sc().cancelAllJobs()
 
-    def runJob(self, rdd, partitionFunc, partitions = None, allowLocal = False):
+    def runJob(self, rdd, partitionFunc, partitions=None, allowLocal=False):
         """
         Executes the given partitionFunc on the specified set of partitions,
         returning the result as an array of elements.
@@ -703,7 +814,7 @@ def runJob(self, rdd, partitionFunc, partitions = None, allowLocal = False):
         >>> sc.runJob(myRDD, lambda part: [x * x for x in part], [0, 2], True)
         [0, 1, 16, 25]
         """
-        if partitions == None:
+        if partitions is None:
             partitions = range(rdd._jrdd.partitions().size())
         javaPartitions = ListConverter().convert(partitions, self._gateway._gateway_client)
 
@@ -714,12 +825,47 @@ def runJob(self, rdd, partitionFunc, partitions = None, allowLocal = False):
         it = self._jvm.PythonRDD.runJob(self._jsc.sc(), mappedRDD._jrdd, javaPartitions, allowLocal)
         return list(mappedRDD._collect_iterator_through_file(it))
 
+    def _add_profile(self, id, profileAcc):
+        if not self._profile_stats:
+            dump_path = self._conf.get("spark.python.profile.dump")
+            if dump_path:
+                atexit.register(self.dump_profiles, dump_path)
+            else:
+                atexit.register(self.show_profiles)
+
+        self._profile_stats.append([id, profileAcc, False])
+
+    def show_profiles(self):
+        """ Print the profile stats to stdout """
+        for i, (id, acc, showed) in enumerate(self._profile_stats):
+            stats = acc.value
+            if not showed and stats:
+                print "=" * 60
+                print "Profile of RDD<id=%d>" % id
+                print "=" * 60
+                stats.sort_stats("time", "cumulative").print_stats()
+                # mark it as showed
+                self._profile_stats[i][2] = True
+
+    def dump_profiles(self, path):
+        """ Dump the profile stats into directory `path`
+        """
+        if not os.path.exists(path):
+            os.makedirs(path)
+        for id, acc, _ in self._profile_stats:
+            stats = acc.value
+            if stats:
+                p = os.path.join(path, "rdd_%d.pstats" % id)
+                stats.dump_stats(p)
+        self._profile_stats = []
+
+
 def _test():
     import atexit
     import doctest
     import tempfile
     globs = globals().copy()
-    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    globs['sc'] = SparkContext('local[4]', 'PythonTest')
     globs['tempdir'] = tempfile.mkdtemp()
     atexit.register(lambda: shutil.rmtree(globs['tempdir']))
     (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
diff --git a/python/pyspark/daemon.py b/python/pyspark/daemon.py
index 5eb1c63bf206b..f09587f211708 100644
--- a/python/pyspark/daemon.py
+++ b/python/pyspark/daemon.py
@@ -15,125 +15,65 @@
 # limitations under the License.
 #
 
+import numbers
 import os
 import signal
+import select
 import socket
 import sys
 import traceback
-import multiprocessing
-from ctypes import c_bool
-from errno import EINTR, ECHILD
+import time
+import gc
+from errno import EINTR, ECHILD, EAGAIN
 from socket import AF_INET, SOCK_STREAM, SOMAXCONN
-from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN
+from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN, SIGINT
 from pyspark.worker import main as worker_main
-from pyspark.serializers import write_int
-
-try:
-    POOLSIZE = multiprocessing.cpu_count()
-except NotImplementedError:
-    POOLSIZE = 4
-
-exit_flag = multiprocessing.Value(c_bool, False)
-
-
-def should_exit():
-    global exit_flag
-    return exit_flag.value
+from pyspark.serializers import read_int, write_int
 
 
 def compute_real_exit_code(exit_code):
-  # SystemExit's code can be integer or string, but os._exit only accepts integers
-  import numbers
-  if isinstance(exit_code, numbers.Integral):
+    # SystemExit's code can be integer or string, but os._exit only accepts integers
+    if isinstance(exit_code, numbers.Integral):
+        return exit_code
+    else:
+        return 1
+
+
+def worker(sock):
+    """
+    Called by a worker process after the fork().
+    """
+    signal.signal(SIGHUP, SIG_DFL)
+    signal.signal(SIGCHLD, SIG_DFL)
+    signal.signal(SIGTERM, SIG_DFL)
+    # restore the handler for SIGINT,
+    # it's useful for debugging (show the stacktrace before exit)
+    signal.signal(SIGINT, signal.default_int_handler)
+
+    # Read the socket using fdopen instead of socket.makefile() because the latter
+    # seems to be very slow; note that we need to dup() the file descriptor because
+    # otherwise writes also cause a seek that makes us miss data on the read side.
+    infile = os.fdopen(os.dup(sock.fileno()), "a+", 65536)
+    outfile = os.fdopen(os.dup(sock.fileno()), "a+", 65536)
+    exit_code = 0
+    try:
+        worker_main(infile, outfile)
+    except SystemExit as exc:
+        exit_code = compute_real_exit_code(exc.code)
+    finally:
+        outfile.flush()
     return exit_code
-  else:
-    return 1
-
-
-def worker(listen_sock):
-    # Redirect stdout to stderr
-    os.dup2(2, 1)
-    sys.stdout = sys.stderr   # The sys.stdout object is different from file descriptor 1
-
-    # Manager sends SIGHUP to request termination of workers in the pool
-    def handle_sighup(*args):
-        assert should_exit()
-    signal.signal(SIGHUP, handle_sighup)
-
-    # Cleanup zombie children
-    def handle_sigchld(*args):
-        pid = status = None
-        try:
-            while (pid, status) != (0, 0):
-                pid, status = os.waitpid(0, os.WNOHANG)
-        except EnvironmentError as err:
-            if err.errno == EINTR:
-                # retry
-                handle_sigchld()
-            elif err.errno != ECHILD:
-                raise
-    signal.signal(SIGCHLD, handle_sigchld)
-
-    # Blocks until the socket is closed by draining the input stream
-    # until it raises an exception or returns EOF.
-    def waitSocketClose(sock):
-        try:
-            while True:
-                # Empty string is returned upon EOF (and only then).
-                if sock.recv(4096) == '':
-                    return
-        except:
-            pass
-
-    # Handle clients
-    while not should_exit():
-        # Wait until a client arrives or we have to exit
-        sock = None
-        while not should_exit() and sock is None:
-            try:
-                sock, addr = listen_sock.accept()
-            except EnvironmentError as err:
-                if err.errno != EINTR:
-                    raise
 
-        if sock is not None:
-            # Fork a child to handle the client.
-            # The client is handled in the child so that the manager
-            # never receives SIGCHLD unless a worker crashes.
-            if os.fork() == 0:
-                # Leave the worker pool
-                signal.signal(SIGHUP, SIG_DFL)
-                signal.signal(SIGCHLD, SIG_DFL)
-                listen_sock.close()
-                # Read the socket using fdopen instead of socket.makefile() because the latter
-                # seems to be very slow; note that we need to dup() the file descriptor because
-                # otherwise writes also cause a seek that makes us miss data on the read side.
-                infile = os.fdopen(os.dup(sock.fileno()), "a+", 65536)
-                outfile = os.fdopen(os.dup(sock.fileno()), "a+", 65536)
-                exit_code = 0
-                try:
-                    worker_main(infile, outfile)
-                except SystemExit as exc:
-                    exit_code = exc.code
-                finally:
-                    outfile.flush()
-                    # The Scala side will close the socket upon task completion.
-                    waitSocketClose(sock)
-                    os._exit(compute_real_exit_code(exit_code))
-            else:
-                sock.close()
-
-
-def launch_worker(listen_sock):
-    if os.fork() == 0:
-        try:
-            worker(listen_sock)
-        except Exception as err:
-            traceback.print_exc()
-            os._exit(1)
-        else:
-            assert should_exit()
-            os._exit(0)
+
+# Cleanup zombie children
+def cleanup_dead_children():
+    try:
+        while True:
+            pid, _ = os.waitpid(0, os.WNOHANG)
+            if not pid:
+                break
+    except:
+        pass
 
 
 def manager():
@@ -143,51 +83,102 @@ def manager():
     # Create a listening socket on the AF_INET loopback interface
     listen_sock = socket.socket(AF_INET, SOCK_STREAM)
     listen_sock.bind(('127.0.0.1', 0))
-    listen_sock.listen(max(1024, 2 * POOLSIZE, SOMAXCONN))
+    listen_sock.listen(max(1024, SOMAXCONN))
     listen_host, listen_port = listen_sock.getsockname()
     write_int(listen_port, sys.stdout)
+    sys.stdout.flush()
+
+    def shutdown(code):
+        signal.signal(SIGTERM, SIG_DFL)
+        # Send SIGHUP to notify workers of shutdown
+        os.kill(0, SIGHUP)
+        exit(code)
 
-    # Launch initial worker pool
-    for idx in range(POOLSIZE):
-        launch_worker(listen_sock)
-    listen_sock.close()
-
-    def shutdown():
-        global exit_flag
-        exit_flag.value = True
-
-    # Gracefully exit on SIGTERM, don't die on SIGHUP
-    signal.signal(SIGTERM, lambda signum, frame: shutdown())
-    signal.signal(SIGHUP, SIG_IGN)
-
-    # Cleanup zombie children
-    def handle_sigchld(*args):
-        try:
-            pid, status = os.waitpid(0, os.WNOHANG)
-            if status != 0 and not should_exit():
-                raise RuntimeError("worker crashed: %s, %s" % (pid, status))
-        except EnvironmentError as err:
-            if err.errno not in (ECHILD, EINTR):
-                raise
-    signal.signal(SIGCHLD, handle_sigchld)
+    def handle_sigterm(*args):
+        shutdown(1)
+    signal.signal(SIGTERM, handle_sigterm)  # Gracefully exit on SIGTERM
+    signal.signal(SIGHUP, SIG_IGN)  # Don't die on SIGHUP
+
+    reuse = os.environ.get("SPARK_REUSE_WORKER")
 
     # Initialization complete
-    sys.stdout.close()
     try:
-        while not should_exit():
+        while True:
             try:
-                # Spark tells us to exit by closing stdin
-                if os.read(0, 512) == '':
-                    shutdown()
-            except EnvironmentError as err:
-                if err.errno != EINTR:
-                    shutdown()
+                ready_fds = select.select([0, listen_sock], [], [], 1)[0]
+            except select.error as ex:
+                if ex[0] == EINTR:
+                    continue
+                else:
+                    raise
+
+            # cleanup in signal handler will cause deadlock
+            cleanup_dead_children()
+
+            if 0 in ready_fds:
+                try:
+                    worker_pid = read_int(sys.stdin)
+                except EOFError:
+                    # Spark told us to exit by closing stdin
+                    shutdown(0)
+                try:
+                    os.kill(worker_pid, signal.SIGKILL)
+                except OSError:
+                    pass  # process already died
+
+            if listen_sock in ready_fds:
+                try:
+                    sock, _ = listen_sock.accept()
+                except OSError as e:
+                    if e.errno == EINTR:
+                        continue
                     raise
+
+                # Launch a worker process
+                try:
+                    pid = os.fork()
+                except OSError as e:
+                    if e.errno in (EAGAIN, EINTR):
+                        time.sleep(1)
+                        pid = os.fork()  # error here will shutdown daemon
+                    else:
+                        outfile = sock.makefile('w')
+                        write_int(e.errno, outfile)  # Signal that the fork failed
+                        outfile.flush()
+                        outfile.close()
+                        sock.close()
+                        continue
+
+                if pid == 0:
+                    # in child process
+                    listen_sock.close()
+                    try:
+                        # Acknowledge that the fork was successful
+                        outfile = sock.makefile("w")
+                        write_int(os.getpid(), outfile)
+                        outfile.flush()
+                        outfile.close()
+                        while True:
+                            code = worker(sock)
+                            if not reuse or code:
+                                # wait for closing
+                                try:
+                                    while sock.recv(1024):
+                                        pass
+                                except Exception:
+                                    pass
+                                break
+                            gc.collect()
+                    except:
+                        traceback.print_exc()
+                        os._exit(1)
+                    else:
+                        os._exit(0)
+                else:
+                    sock.close()
+
     finally:
-        signal.signal(SIGTERM, SIG_DFL)
-        exit_flag.value = True
-        # Send SIGHUP to notify workers of shutdown
-        os.kill(0, SIGHUP)
+        shutdown(1)
 
 
 if __name__ == '__main__':
diff --git a/python/pyspark/files.py b/python/pyspark/files.py
index 57ee14eeb7776..797573f49dac8 100644
--- a/python/pyspark/files.py
+++ b/python/pyspark/files.py
@@ -18,7 +18,11 @@
 import os
 
 
+__all__ = ['SparkFiles']
+
+
 class SparkFiles(object):
+
     """
     Resolves paths to files added through
     L{SparkContext.addFile()<pyspark.context.SparkContext.addFile>}.
diff --git a/python/pyspark/heapq3.py b/python/pyspark/heapq3.py
new file mode 100644
index 0000000000000..bc441f138f7fc
--- /dev/null
+++ b/python/pyspark/heapq3.py
@@ -0,0 +1,890 @@
+# -*- encoding: utf-8 -*-
+#  back ported from CPython 3
+# A. HISTORY OF THE SOFTWARE
+# ==========================
+#
+# Python was created in the early 1990s by Guido van Rossum at Stichting
+# Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
+# as a successor of a language called ABC.  Guido remains Python's
+# principal author, although it includes many contributions from others.
+#
+# In 1995, Guido continued his work on Python at the Corporation for
+#     National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
+# in Reston, Virginia where he released several versions of the
+# software.
+#
+# In May 2000, Guido and the Python core development team moved to
+# BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
+# year, the PythonLabs team moved to Digital Creations (now Zope
+# Corporation, see http://www.zope.com).  In 2001, the Python Software
+# Foundation (PSF, see http://www.python.org/psf/) was formed, a
+# non-profit organization created specifically to own Python-related
+# Intellectual Property.  Zope Corporation is a sponsoring member of
+# the PSF.
+#
+# All Python releases are Open Source (see http://www.opensource.org for
+# the Open Source Definition).  Historically, most, but not all, Python
+# releases have also been GPL-compatible; the table below summarizes
+# the various releases.
+#
+# Release         Derived     Year        Owner       GPL-
+# from                                compatible? (1)
+#
+# 0.9.0 thru 1.2              1991-1995   CWI         yes
+# 1.3 thru 1.5.2  1.2         1995-1999   CNRI        yes
+# 1.6             1.5.2       2000        CNRI        no
+# 2.0             1.6         2000        BeOpen.com  no
+# 1.6.1           1.6         2001        CNRI        yes (2)
+# 2.1             2.0+1.6.1   2001        PSF         no
+# 2.0.1           2.0+1.6.1   2001        PSF         yes
+# 2.1.1           2.1+2.0.1   2001        PSF         yes
+# 2.2             2.1.1       2001        PSF         yes
+# 2.1.2           2.1.1       2002        PSF         yes
+# 2.1.3           2.1.2       2002        PSF         yes
+# 2.2.1           2.2         2002        PSF         yes
+# 2.2.2           2.2.1       2002        PSF         yes
+# 2.2.3           2.2.2       2003        PSF         yes
+# 2.3             2.2.2       2002-2003   PSF         yes
+# 2.3.1           2.3         2002-2003   PSF         yes
+# 2.3.2           2.3.1       2002-2003   PSF         yes
+# 2.3.3           2.3.2       2002-2003   PSF         yes
+# 2.3.4           2.3.3       2004        PSF         yes
+# 2.3.5           2.3.4       2005        PSF         yes
+# 2.4             2.3         2004        PSF         yes
+# 2.4.1           2.4         2005        PSF         yes
+# 2.4.2           2.4.1       2005        PSF         yes
+# 2.4.3           2.4.2       2006        PSF         yes
+# 2.4.4           2.4.3       2006        PSF         yes
+# 2.5             2.4         2006        PSF         yes
+# 2.5.1           2.5         2007        PSF         yes
+# 2.5.2           2.5.1       2008        PSF         yes
+# 2.5.3           2.5.2       2008        PSF         yes
+# 2.6             2.5         2008        PSF         yes
+# 2.6.1           2.6         2008        PSF         yes
+# 2.6.2           2.6.1       2009        PSF         yes
+# 2.6.3           2.6.2       2009        PSF         yes
+# 2.6.4           2.6.3       2009        PSF         yes
+# 2.6.5           2.6.4       2010        PSF         yes
+# 2.7             2.6         2010        PSF         yes
+#
+# Footnotes:
+#
+# (1) GPL-compatible doesn't mean that we're distributing Python under
+# the GPL.  All Python licenses, unlike the GPL, let you distribute
+# a modified version without making your changes open source.  The
+# GPL-compatible licenses make it possible to combine Python with
+#     other software that is released under the GPL; the others don't.
+#
+# (2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
+# because its license has a choice of law clause.  According to
+# CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
+# is "not incompatible" with the GPL.
+#
+# Thanks to the many outside volunteers who have worked under Guido's
+# direction to make these releases possible.
+#
+#
+# B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
+# ===============================================================
+#
+# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+# --------------------------------------------
+#
+# 1. This LICENSE AGREEMENT is between the Python Software Foundation
+# ("PSF"), and the Individual or Organization ("Licensee") accessing and
+# otherwise using this software ("Python") in source or binary form and
+# its associated documentation.
+#
+# 2. Subject to the terms and conditions of this License Agreement, PSF hereby
+# grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+# analyze, test, perform and/or display publicly, prepare derivative works,
+# distribute, and otherwise use Python alone or in any derivative version,
+# provided, however, that PSF's License Agreement and PSF's notice of copyright,
+# i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+# 2011, 2012, 2013 Python Software Foundation; All Rights Reserved" are retained
+# in Python alone or in any derivative version prepared by Licensee.
+#
+# 3. In the event Licensee prepares a derivative work that is based on
+# or incorporates Python or any part thereof, and wants to make
+# the derivative work available to others as provided herein, then
+# Licensee hereby agrees to include in any such work a brief summary of
+# the changes made to Python.
+#
+# 4. PSF is making Python available to Licensee on an "AS IS"
+# basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 7. Nothing in this License Agreement shall be deemed to create any
+# relationship of agency, partnership, or joint venture between PSF and
+# Licensee.  This License Agreement does not grant permission to use PSF
+# trademarks or trade name in a trademark sense to endorse or promote
+# products or services of Licensee, or any third party.
+#
+# 8. By copying, installing or otherwise using Python, Licensee
+# agrees to be bound by the terms and conditions of this License
+# Agreement.
+#
+#
+# BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
+# -------------------------------------------
+#
+# BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
+#
+# 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
+# office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
+# Individual or Organization ("Licensee") accessing and otherwise using
+# this software in source or binary form and its associated
+# documentation ("the Software").
+#
+# 2. Subject to the terms and conditions of this BeOpen Python License
+# Agreement, BeOpen hereby grants Licensee a non-exclusive,
+# royalty-free, world-wide license to reproduce, analyze, test, perform
+# and/or display publicly, prepare derivative works, distribute, and
+# otherwise use the Software alone or in any derivative version,
+# provided, however, that the BeOpen Python License is retained in the
+# Software, alone or in any derivative version prepared by Licensee.
+#
+# 3. BeOpen is making the Software available to Licensee on an "AS IS"
+# basis.  BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
+# SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
+# AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
+# DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 5. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 6. This License Agreement shall be governed by and interpreted in all
+# respects by the law of the State of California, excluding conflict of
+# law provisions.  Nothing in this License Agreement shall be deemed to
+# create any relationship of agency, partnership, or joint venture
+# between BeOpen and Licensee.  This License Agreement does not grant
+# permission to use BeOpen trademarks or trade names in a trademark
+# sense to endorse or promote products or services of Licensee, or any
+# third party.  As an exception, the "BeOpen Python" logos available at
+# http://www.pythonlabs.com/logos.html may be used according to the
+# permissions granted on that web page.
+#
+# 7. By copying, installing or otherwise using the software, Licensee
+# agrees to be bound by the terms and conditions of this License
+# Agreement.
+#
+#
+# CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
+# ---------------------------------------
+#
+# 1. This LICENSE AGREEMENT is between the Corporation for National
+#     Research Initiatives, having an office at 1895 Preston White Drive,
+# Reston, VA 20191 ("CNRI"), and the Individual or Organization
+# ("Licensee") accessing and otherwise using Python 1.6.1 software in
+# source or binary form and its associated documentation.
+#
+# 2. Subject to the terms and conditions of this License Agreement, CNRI
+# hereby grants Licensee a nonexclusive, royalty-free, world-wide
+# license to reproduce, analyze, test, perform and/or display publicly,
+# prepare derivative works, distribute, and otherwise use Python 1.6.1
+# alone or in any derivative version, provided, however, that CNRI's
+# License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
+# 1995-2001 Corporation for National Research Initiatives; All Rights
+# Reserved" are retained in Python 1.6.1 alone or in any derivative
+# version prepared by Licensee.  Alternately, in lieu of CNRI's License
+# Agreement, Licensee may substitute the following text (omitting the
+# quotes): "Python 1.6.1 is made available subject to the terms and
+# conditions in CNRI's License Agreement.  This Agreement together with
+# Python 1.6.1 may be located on the Internet using the following
+# unique, persistent identifier (known as a handle): 1895.22/1013.  This
+# Agreement may also be obtained from a proxy server on the Internet
+# using the following URL: http://hdl.handle.net/1895.22/1013".
+#
+# 3. In the event Licensee prepares a derivative work that is based on
+# or incorporates Python 1.6.1 or any part thereof, and wants to make
+# the derivative work available to others as provided herein, then
+# Licensee hereby agrees to include in any such work a brief summary of
+# the changes made to Python 1.6.1.
+#
+# 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
+# basis.  CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+# 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
+# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 7. This License Agreement shall be governed by the federal
+# intellectual property law of the United States, including without
+# limitation the federal copyright law, and, to the extent such
+# U.S. federal law does not apply, by the law of the Commonwealth of
+# Virginia, excluding Virginia's conflict of law provisions.
+# Notwithstanding the foregoing, with regard to derivative works based
+# on Python 1.6.1 that incorporate non-separable material that was
+# previously distributed under the GNU General Public License (GPL), the
+# law of the Commonwealth of Virginia shall govern this License
+# Agreement only as to issues arising under or with respect to
+# Paragraphs 4, 5, and 7 of this License Agreement.  Nothing in this
+# License Agreement shall be deemed to create any relationship of
+# agency, partnership, or joint venture between CNRI and Licensee.  This
+# License Agreement does not grant permission to use CNRI trademarks or
+# trade name in a trademark sense to endorse or promote products or
+# services of Licensee, or any third party.
+#
+# 8. By clicking on the "ACCEPT" button where indicated, or by copying,
+# installing or otherwise using Python 1.6.1, Licensee agrees to be
+# bound by the terms and conditions of this License Agreement.
+#
+# ACCEPT
+#
+#
+# CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
+# --------------------------------------------------
+#
+# Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
+# The Netherlands.  All rights reserved.
+#
+# Permission to use, copy, modify, and distribute this software and its
+# documentation for any purpose and without fee is hereby granted,
+# provided that the above copyright notice appear in all copies and that
+# both that copyright notice and this permission notice appear in
+# supporting documentation, and that the name of Stichting Mathematisch
+# Centrum or CWI not be used in advertising or publicity pertaining to
+# distribution of the software without specific, written prior
+# permission.
+#
+# STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
+# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+# FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
+# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+"""Heap queue algorithm (a.k.a. priority queue).
+
+Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
+all k, counting elements from 0.  For the sake of comparison,
+non-existing elements are considered to be infinite.  The interesting
+property of a heap is that a[0] is always its smallest element.
+
+Usage:
+
+heap = []            # creates an empty heap
+heappush(heap, item) # pushes a new item on the heap
+item = heappop(heap) # pops the smallest item from the heap
+item = heap[0]       # smallest item on the heap without popping it
+heapify(x)           # transforms list into a heap, in-place, in linear time
+item = heapreplace(heap, item) # pops and returns smallest item, and adds
+                               # new item; the heap size is unchanged
+
+Our API differs from textbook heap algorithms as follows:
+
+- We use 0-based indexing.  This makes the relationship between the
+  index for a node and the indexes for its children slightly less
+  obvious, but is more suitable since Python uses 0-based indexing.
+
+- Our heappop() method returns the smallest item, not the largest.
+
+These two make it possible to view the heap as a regular Python list
+without surprises: heap[0] is the smallest item, and heap.sort()
+maintains the heap invariant!
+"""
+
+# Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger
+
+__about__ = """Heap queues
+
+[explanation by François Pinard]
+
+Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
+all k, counting elements from 0.  For the sake of comparison,
+non-existing elements are considered to be infinite.  The interesting
+property of a heap is that a[0] is always its smallest element.
+
+The strange invariant above is meant to be an efficient memory
+representation for a tournament.  The numbers below are `k', not a[k]:
+
+                                   0
+
+                  1                                 2
+
+          3               4                5               6
+
+      7       8       9       10      11      12      13      14
+
+    15 16   17 18   19 20   21 22   23 24   25 26   27 28   29 30
+
+
+In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'.  In
+an usual binary tournament we see in sports, each cell is the winner
+over the two cells it tops, and we can trace the winner down the tree
+to see all opponents s/he had.  However, in many computer applications
+of such tournaments, we do not need to trace the history of a winner.
+To be more memory efficient, when a winner is promoted, we try to
+replace it by something else at a lower level, and the rule becomes
+that a cell and the two cells it tops contain three different items,
+but the top cell "wins" over the two topped cells.
+
+If this heap invariant is protected at all time, index 0 is clearly
+the overall winner.  The simplest algorithmic way to remove it and
+find the "next" winner is to move some loser (let's say cell 30 in the
+diagram above) into the 0 position, and then percolate this new 0 down
+the tree, exchanging values, until the invariant is re-established.
+This is clearly logarithmic on the total number of items in the tree.
+By iterating over all items, you get an O(n ln n) sort.
+
+A nice feature of this sort is that you can efficiently insert new
+items while the sort is going on, provided that the inserted items are
+not "better" than the last 0'th element you extracted.  This is
+especially useful in simulation contexts, where the tree holds all
+incoming events, and the "win" condition means the smallest scheduled
+time.  When an event schedule other events for execution, they are
+scheduled into the future, so they can easily go into the heap.  So, a
+heap is a good structure for implementing schedulers (this is what I
+used for my MIDI sequencer :-).
+
+Various structures for implementing schedulers have been extensively
+studied, and heaps are good for this, as they are reasonably speedy,
+the speed is almost constant, and the worst case is not much different
+than the average case.  However, there are other representations which
+are more efficient overall, yet the worst cases might be terrible.
+
+Heaps are also very useful in big disk sorts.  You most probably all
+know that a big sort implies producing "runs" (which are pre-sorted
+sequences, which size is usually related to the amount of CPU memory),
+followed by a merging passes for these runs, which merging is often
+very cleverly organised[1].  It is very important that the initial
+sort produces the longest runs possible.  Tournaments are a good way
+to that.  If, using all the memory available to hold a tournament, you
+replace and percolate items that happen to fit the current run, you'll
+produce runs which are twice the size of the memory for random input,
+and much better for input fuzzily ordered.
+
+Moreover, if you output the 0'th item on disk and get an input which
+may not fit in the current tournament (because the value "wins" over
+the last output value), it cannot fit in the heap, so the size of the
+heap decreases.  The freed memory could be cleverly reused immediately
+for progressively building a second heap, which grows at exactly the
+same rate the first heap is melting.  When the first heap completely
+vanishes, you switch heaps and start a new run.  Clever and quite
+effective!
+
+In a word, heaps are useful memory structures to know.  I use them in
+a few applications, and I think it is good to keep a `heap' module
+around. :-)
+
+--------------------
+[1] The disk balancing algorithms which are current, nowadays, are
+more annoying than clever, and this is a consequence of the seeking
+capabilities of the disks.  On devices which cannot seek, like big
+tape drives, the story was quite different, and one had to be very
+clever to ensure (far in advance) that each tape movement will be the
+most effective possible (that is, will best participate at
+"progressing" the merge).  Some tapes were even able to read
+backwards, and this was also used to avoid the rewinding time.
+Believe me, real good tape sorts were quite spectacular to watch!
+From all times, sorting has always been a Great Art! :-)
+"""
+
+__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
+           'nlargest', 'nsmallest', 'heappushpop']
+
+def heappush(heap, item):
+    """Push item onto heap, maintaining the heap invariant."""
+    heap.append(item)
+    _siftdown(heap, 0, len(heap)-1)
+
+def heappop(heap):
+    """Pop the smallest item off the heap, maintaining the heap invariant."""
+    lastelt = heap.pop()    # raises appropriate IndexError if heap is empty
+    if heap:
+        returnitem = heap[0]
+        heap[0] = lastelt
+        _siftup(heap, 0)
+        return returnitem
+    return lastelt
+
+def heapreplace(heap, item):
+    """Pop and return the current smallest value, and add the new item.
+
+    This is more efficient than heappop() followed by heappush(), and can be
+    more appropriate when using a fixed-size heap.  Note that the value
+    returned may be larger than item!  That constrains reasonable uses of
+    this routine unless written as part of a conditional replacement:
+
+        if item > heap[0]:
+            item = heapreplace(heap, item)
+    """
+    returnitem = heap[0]    # raises appropriate IndexError if heap is empty
+    heap[0] = item
+    _siftup(heap, 0)
+    return returnitem
+
+def heappushpop(heap, item):
+    """Fast version of a heappush followed by a heappop."""
+    if heap and heap[0] < item:
+        item, heap[0] = heap[0], item
+        _siftup(heap, 0)
+    return item
+
+def heapify(x):
+    """Transform list into a heap, in-place, in O(len(x)) time."""
+    n = len(x)
+    # Transform bottom-up.  The largest index there's any point to looking at
+    # is the largest with a child index in-range, so must have 2*i + 1 < n,
+    # or i < (n-1)/2.  If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so
+    # j-1 is the largest, which is n//2 - 1.  If n is odd = 2*j+1, this is
+    # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.
+    for i in reversed(range(n//2)):
+        _siftup(x, i)
+
+def _heappop_max(heap):
+    """Maxheap version of a heappop."""
+    lastelt = heap.pop()    # raises appropriate IndexError if heap is empty
+    if heap:
+        returnitem = heap[0]
+        heap[0] = lastelt
+        _siftup_max(heap, 0)
+        return returnitem
+    return lastelt
+
+def _heapreplace_max(heap, item):
+    """Maxheap version of a heappop followed by a heappush."""
+    returnitem = heap[0]    # raises appropriate IndexError if heap is empty
+    heap[0] = item
+    _siftup_max(heap, 0)
+    return returnitem
+
+def _heapify_max(x):
+    """Transform list into a maxheap, in-place, in O(len(x)) time."""
+    n = len(x)
+    for i in reversed(range(n//2)):
+        _siftup_max(x, i)
+
+# 'heap' is a heap at all indices >= startpos, except possibly for pos.  pos
+# is the index of a leaf with a possibly out-of-order value.  Restore the
+# heap invariant.
+def _siftdown(heap, startpos, pos):
+    newitem = heap[pos]
+    # Follow the path to the root, moving parents down until finding a place
+    # newitem fits.
+    while pos > startpos:
+        parentpos = (pos - 1) >> 1
+        parent = heap[parentpos]
+        if newitem < parent:
+            heap[pos] = parent
+            pos = parentpos
+            continue
+        break
+    heap[pos] = newitem
+
+# The child indices of heap index pos are already heaps, and we want to make
+# a heap at index pos too.  We do this by bubbling the smaller child of
+# pos up (and so on with that child's children, etc) until hitting a leaf,
+# then using _siftdown to move the oddball originally at index pos into place.
+#
+# We *could* break out of the loop as soon as we find a pos where newitem <=
+# both its children, but turns out that's not a good idea, and despite that
+# many books write the algorithm that way.  During a heap pop, the last array
+# element is sifted in, and that tends to be large, so that comparing it
+# against values starting from the root usually doesn't pay (= usually doesn't
+# get us out of the loop early).  See Knuth, Volume 3, where this is
+# explained and quantified in an exercise.
+#
+# Cutting the # of comparisons is important, since these routines have no
+# way to extract "the priority" from an array element, so that intelligence
+# is likely to be hiding in custom comparison methods, or in array elements
+# storing (priority, record) tuples.  Comparisons are thus potentially
+# expensive.
+#
+# On random arrays of length 1000, making this change cut the number of
+# comparisons made by heapify() a little, and those made by exhaustive
+# heappop() a lot, in accord with theory.  Here are typical results from 3
+# runs (3 just to demonstrate how small the variance is):
+#
+# Compares needed by heapify     Compares needed by 1000 heappops
+# --------------------------     --------------------------------
+# 1837 cut to 1663               14996 cut to 8680
+# 1855 cut to 1659               14966 cut to 8678
+# 1847 cut to 1660               15024 cut to 8703
+#
+# Building the heap by using heappush() 1000 times instead required
+# 2198, 2148, and 2219 compares:  heapify() is more efficient, when
+# you can use it.
+#
+# The total compares needed by list.sort() on the same lists were 8627,
+# 8627, and 8632 (this should be compared to the sum of heapify() and
+# heappop() compares):  list.sort() is (unsurprisingly!) more efficient
+# for sorting.
+
+def _siftup(heap, pos):
+    endpos = len(heap)
+    startpos = pos
+    newitem = heap[pos]
+    # Bubble up the smaller child until hitting a leaf.
+    childpos = 2*pos + 1    # leftmost child position
+    while childpos < endpos:
+        # Set childpos to index of smaller child.
+        rightpos = childpos + 1
+        if rightpos < endpos and not heap[childpos] < heap[rightpos]:
+            childpos = rightpos
+        # Move the smaller child up.
+        heap[pos] = heap[childpos]
+        pos = childpos
+        childpos = 2*pos + 1
+    # The leaf at pos is empty now.  Put newitem there, and bubble it up
+    # to its final resting place (by sifting its parents down).
+    heap[pos] = newitem
+    _siftdown(heap, startpos, pos)
+
+def _siftdown_max(heap, startpos, pos):
+    'Maxheap variant of _siftdown'
+    newitem = heap[pos]
+    # Follow the path to the root, moving parents down until finding a place
+    # newitem fits.
+    while pos > startpos:
+        parentpos = (pos - 1) >> 1
+        parent = heap[parentpos]
+        if parent < newitem:
+            heap[pos] = parent
+            pos = parentpos
+            continue
+        break
+    heap[pos] = newitem
+
+def _siftup_max(heap, pos):
+    'Maxheap variant of _siftup'
+    endpos = len(heap)
+    startpos = pos
+    newitem = heap[pos]
+    # Bubble up the larger child until hitting a leaf.
+    childpos = 2*pos + 1    # leftmost child position
+    while childpos < endpos:
+        # Set childpos to index of larger child.
+        rightpos = childpos + 1
+        if rightpos < endpos and not heap[rightpos] < heap[childpos]:
+            childpos = rightpos
+        # Move the larger child up.
+        heap[pos] = heap[childpos]
+        pos = childpos
+        childpos = 2*pos + 1
+    # The leaf at pos is empty now.  Put newitem there, and bubble it up
+    # to its final resting place (by sifting its parents down).
+    heap[pos] = newitem
+    _siftdown_max(heap, startpos, pos)
+
+def merge(iterables, key=None, reverse=False):
+    '''Merge multiple sorted inputs into a single sorted output.
+
+    Similar to sorted(itertools.chain(*iterables)) but returns a generator,
+    does not pull the data into memory all at once, and assumes that each of
+    the input streams is already sorted (smallest to largest).
+
+    >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
+    [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
+
+    If *key* is not None, applies a key function to each element to determine
+    its sort order.
+
+    >>> list(merge(['dog', 'horse'], ['cat', 'fish', 'kangaroo'], key=len))
+    ['dog', 'cat', 'fish', 'horse', 'kangaroo']
+
+    '''
+
+    h = []
+    h_append = h.append
+
+    if reverse:
+        _heapify = _heapify_max
+        _heappop = _heappop_max
+        _heapreplace = _heapreplace_max
+        direction = -1
+    else:
+        _heapify = heapify
+        _heappop = heappop
+        _heapreplace = heapreplace
+        direction = 1
+
+    if key is None:
+        for order, it in enumerate(map(iter, iterables)):
+            try:
+                next = it.next
+                h_append([next(), order * direction, next])
+            except StopIteration:
+                pass
+        _heapify(h)
+        while len(h) > 1:
+            try:
+                while True:
+                    value, order, next = s = h[0]
+                    yield value
+                    s[0] = next()           # raises StopIteration when exhausted
+                    _heapreplace(h, s)      # restore heap condition
+            except StopIteration:
+                _heappop(h)                 # remove empty iterator
+        if h:
+            # fast case when only a single iterator remains
+            value, order, next = h[0]
+            yield value
+            for value in next.__self__:
+                yield value
+        return
+
+    for order, it in enumerate(map(iter, iterables)):
+        try:
+            next = it.next
+            value = next()
+            h_append([key(value), order * direction, value, next])
+        except StopIteration:
+            pass
+    _heapify(h)
+    while len(h) > 1:
+        try:
+            while True:
+                key_value, order, value, next = s = h[0]
+                yield value
+                value = next()
+                s[0] = key(value)
+                s[2] = value
+                _heapreplace(h, s)
+        except StopIteration:
+            _heappop(h)
+    if h:
+        key_value, order, value, next = h[0]
+        yield value
+        for value in next.__self__:
+            yield value
+
+
+# Algorithm notes for nlargest() and nsmallest()
+# ==============================================
+#
+# Make a single pass over the data while keeping the k most extreme values
+# in a heap.  Memory consumption is limited to keeping k values in a list.
+#
+# Measured performance for random inputs:
+#
+#                                   number of comparisons
+#    n inputs     k-extreme values  (average of 5 trials)   % more than min()
+# -------------   ----------------  ---------------------   -----------------
+#      1,000           100                  3,317               231.7%
+#     10,000           100                 14,046                40.5%
+#    100,000           100                105,749                 5.7%
+#  1,000,000           100              1,007,751                 0.8%
+# 10,000,000           100             10,009,401                 0.1%
+#
+# Theoretical number of comparisons for k smallest of n random inputs:
+#
+# Step   Comparisons                  Action
+# ----   --------------------------   ---------------------------
+#  1     1.66 * k                     heapify the first k-inputs
+#  2     n - k                        compare remaining elements to top of heap
+#  3     k * (1 + lg2(k)) * ln(n/k)   replace the topmost value on the heap
+#  4     k * lg2(k) - (k/2)           final sort of the k most extreme values
+#
+# Combining and simplifying for a rough estimate gives:
+#
+#        comparisons = n + k * (log(k, 2) * log(n/k) + log(k, 2) + log(n/k))
+#
+# Computing the number of comparisons for step 3:
+# -----------------------------------------------
+# * For the i-th new value from the iterable, the probability of being in the
+#   k most extreme values is k/i.  For example, the probability of the 101st
+#   value seen being in the 100 most extreme values is 100/101.
+# * If the value is a new extreme value, the cost of inserting it into the
+#   heap is 1 + log(k, 2).
+# * The probabilty times the cost gives:
+#            (k/i) * (1 + log(k, 2))
+# * Summing across the remaining n-k elements gives:
+#            sum((k/i) * (1 + log(k, 2)) for i in range(k+1, n+1))
+# * This reduces to:
+#            (H(n) - H(k)) * k * (1 + log(k, 2))
+# * Where H(n) is the n-th harmonic number estimated by:
+#            gamma = 0.5772156649
+#            H(n) = log(n, e) + gamma + 1 / (2 * n)
+#   http://en.wikipedia.org/wiki/Harmonic_series_(mathematics)#Rate_of_divergence
+# * Substituting the H(n) formula:
+#            comparisons = k * (1 + log(k, 2)) * (log(n/k, e) + (1/n - 1/k) / 2)
+#
+# Worst-case for step 3:
+# ----------------------
+# In the worst case, the input data is reversed sorted so that every new element
+# must be inserted in the heap:
+#
+#             comparisons = 1.66 * k + log(k, 2) * (n - k)
+#
+# Alternative Algorithms
+# ----------------------
+# Other algorithms were not used because they:
+# 1) Took much more auxiliary memory,
+# 2) Made multiple passes over the data.
+# 3) Made more comparisons in common cases (small k, large n, semi-random input).
+# See the more detailed comparison of approach at:
+# http://code.activestate.com/recipes/577573-compare-algorithms-for-heapqsmallest
+
+def nsmallest(n, iterable, key=None):
+    """Find the n smallest elements in a dataset.
+
+    Equivalent to:  sorted(iterable, key=key)[:n]
+    """
+
+    # Short-cut for n==1 is to use min()
+    if n == 1:
+        it = iter(iterable)
+        sentinel = object()
+        if key is None:
+            result = min(it, default=sentinel)
+        else:
+            result = min(it, default=sentinel, key=key)
+        return [] if result is sentinel else [result]
+
+    # When n>=size, it's faster to use sorted()
+    try:
+        size = len(iterable)
+    except (TypeError, AttributeError):
+        pass
+    else:
+        if n >= size:
+            return sorted(iterable, key=key)[:n]
+
+    # When key is none, use simpler decoration
+    if key is None:
+        it = iter(iterable)
+        # put the range(n) first so that zip() doesn't
+        # consume one too many elements from the iterator
+        result = [(elem, i) for i, elem in zip(range(n), it)]
+        if not result:
+            return result
+        _heapify_max(result)
+        top = result[0][0]
+        order = n
+        _heapreplace = _heapreplace_max
+        for elem in it:
+            if elem < top:
+                _heapreplace(result, (elem, order))
+                top = result[0][0]
+                order += 1
+        result.sort()
+        return [r[0] for r in result]
+
+    # General case, slowest method
+    it = iter(iterable)
+    result = [(key(elem), i, elem) for i, elem in zip(range(n), it)]
+    if not result:
+        return result
+    _heapify_max(result)
+    top = result[0][0]
+    order = n
+    _heapreplace = _heapreplace_max
+    for elem in it:
+        k = key(elem)
+        if k < top:
+            _heapreplace(result, (k, order, elem))
+            top = result[0][0]
+            order += 1
+    result.sort()
+    return [r[2] for r in result]
+
+def nlargest(n, iterable, key=None):
+    """Find the n largest elements in a dataset.
+
+    Equivalent to:  sorted(iterable, key=key, reverse=True)[:n]
+    """
+
+    # Short-cut for n==1 is to use max()
+    if n == 1:
+        it = iter(iterable)
+        sentinel = object()
+        if key is None:
+            result = max(it, default=sentinel)
+        else:
+            result = max(it, default=sentinel, key=key)
+        return [] if result is sentinel else [result]
+
+    # When n>=size, it's faster to use sorted()
+    try:
+        size = len(iterable)
+    except (TypeError, AttributeError):
+        pass
+    else:
+        if n >= size:
+            return sorted(iterable, key=key, reverse=True)[:n]
+
+    # When key is none, use simpler decoration
+    if key is None:
+        it = iter(iterable)
+        result = [(elem, i) for i, elem in zip(range(0, -n, -1), it)]
+        if not result:
+            return result
+        heapify(result)
+        top = result[0][0]
+        order = -n
+        _heapreplace = heapreplace
+        for elem in it:
+            if top < elem:
+                _heapreplace(result, (elem, order))
+                top = result[0][0]
+                order -= 1
+        result.sort(reverse=True)
+        return [r[0] for r in result]
+
+    # General case, slowest method
+    it = iter(iterable)
+    result = [(key(elem), i, elem) for i, elem in zip(range(0, -n, -1), it)]
+    if not result:
+        return result
+    heapify(result)
+    top = result[0][0]
+    order = -n
+    _heapreplace = heapreplace
+    for elem in it:
+        k = key(elem)
+        if top < k:
+            _heapreplace(result, (k, order, elem))
+            top = result[0][0]
+            order -= 1
+    result.sort(reverse=True)
+    return [r[2] for r in result]
+
+# If available, use C implementation
+try:
+    from _heapq import *
+except ImportError:
+    pass
+try:
+    from _heapq import _heapreplace_max
+except ImportError:
+    pass
+try:
+    from _heapq import _heapify_max
+except ImportError:
+    pass
+try:
+    from _heapq import _heappop_max
+except ImportError:
+    pass
+
+
+if __name__ == "__main__":
+
+    import doctest
+    print(doctest.testmod())
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 0dbead4415b02..a975dc19cb78e 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
+import atexit
 import os
 import sys
 import signal
@@ -24,6 +25,7 @@
 from threading import Thread
 from py4j.java_gateway import java_import, JavaGateway, GatewayClient
 
+
 def launch_gateway():
     SPARK_HOME = os.environ["SPARK_HOME"]
 
@@ -38,12 +40,14 @@ def launch_gateway():
         submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS")
         submit_args = submit_args if submit_args is not None else ""
         submit_args = shlex.split(submit_args)
-        command = [os.path.join(SPARK_HOME, script), "pyspark-shell"] + submit_args
+        command = [os.path.join(SPARK_HOME, script)] + submit_args + ["pyspark-shell"]
         if not on_windows:
             # Don't send ctrl-c / SIGINT to the Java gateway:
             def preexec_func():
                 signal.signal(signal.SIGINT, signal.SIG_IGN)
-            proc = Popen(command, stdout=PIPE, stdin=PIPE, preexec_fn=preexec_func)
+            env = dict(os.environ)
+            env["IS_SUBPROCESS"] = "1"  # tell JVM to exit after python exits
+            proc = Popen(command, stdout=PIPE, stdin=PIPE, preexec_fn=preexec_func, env=env)
         else:
             # preexec_fn not supported on Windows
             proc = Popen(command, stdout=PIPE, stdin=PIPE)
@@ -53,17 +57,41 @@ def preexec_func():
             gateway_port = proc.stdout.readline()
             gateway_port = int(gateway_port)
         except ValueError:
+            # Grab the remaining lines of stdout
             (stdout, _) = proc.communicate()
             exit_code = proc.poll()
             error_msg = "Launching GatewayServer failed"
-            error_msg += " with exit code %d!" % exit_code if exit_code else "! "
-            error_msg += "(Warning: unexpected output detected.)\n\n"
-            error_msg += gateway_port + stdout
+            error_msg += " with exit code %d!\n" % exit_code if exit_code else "!\n"
+            error_msg += "Warning: Expected GatewayServer to output a port, but found "
+            if gateway_port == "" and stdout == "":
+                error_msg += "no output.\n"
+            else:
+                error_msg += "the following:\n\n"
+                error_msg += "--------------------------------------------------------------\n"
+                error_msg += gateway_port + stdout
+                error_msg += "--------------------------------------------------------------\n"
             raise Exception(error_msg)
 
+        # In Windows, ensure the Java child processes do not linger after Python has exited.
+        # In UNIX-based systems, the child process can kill itself on broken pipe (i.e. when
+        # the parent process' stdin sends an EOF). In Windows, however, this is not possible
+        # because java.lang.Process reads directly from the parent process' stdin, contending
+        # with any opportunity to read an EOF from the parent. Note that this is only best
+        # effort and will not take effect if the python process is violently terminated.
+        if on_windows:
+            # In Windows, the child process here is "spark-submit.cmd", not the JVM itself
+            # (because the UNIX "exec" command is not available). This means we cannot simply
+            # call proc.kill(), which kills only the "spark-submit.cmd" process but not the
+            # JVMs. Instead, we use "taskkill" with the tree-kill option "/t" to terminate all
+            # child processes in the tree (http://technet.microsoft.com/en-us/library/bb491009.aspx)
+            def killChild():
+                Popen(["cmd", "/c", "taskkill", "/f", "/t", "/pid", str(proc.pid)])
+            atexit.register(killChild)
+
         # Create a thread to echo output from the GatewayServer, which is required
         # for Java log output to show up:
         class EchoOutputThread(Thread):
+
             def __init__(self, stream):
                 Thread.__init__(self)
                 self.daemon = True
diff --git a/python/pyspark/join.py b/python/pyspark/join.py
index 5f3a7e71f7866..b4a844713745a 100644
--- a/python/pyspark/join.py
+++ b/python/pyspark/join.py
@@ -33,10 +33,11 @@
 
 from pyspark.resultiterable import ResultIterable
 
+
 def _do_python_join(rdd, other, numPartitions, dispatch):
     vs = rdd.map(lambda (k, v): (k, (1, v)))
     ws = other.map(lambda (k, v): (k, (2, v)))
-    return vs.union(ws).groupByKey(numPartitions).flatMapValues(lambda x : dispatch(x.__iter__()))
+    return vs.union(ws).groupByKey(numPartitions).flatMapValues(lambda x: dispatch(x.__iter__()))
 
 
 def python_join(rdd, other, numPartitions):
@@ -79,12 +80,29 @@ def dispatch(seq):
     return _do_python_join(rdd, other, numPartitions, dispatch)
 
 
+def python_full_outer_join(rdd, other, numPartitions):
+    def dispatch(seq):
+        vbuf, wbuf = [], []
+        for (n, v) in seq:
+            if n == 1:
+                vbuf.append(v)
+            elif n == 2:
+                wbuf.append(v)
+        if not vbuf:
+            vbuf.append(None)
+        if not wbuf:
+            wbuf.append(None)
+        return [(v, w) for v in vbuf for w in wbuf]
+    return _do_python_join(rdd, other, numPartitions, dispatch)
+
+
 def python_cogroup(rdds, numPartitions):
     def make_mapper(i):
         return lambda (k, v): (k, (i, v))
     vrdds = [rdd.map(make_mapper(i)) for i, rdd in enumerate(rdds)]
     union_vrdds = reduce(lambda acc, other: acc.union(other), vrdds)
     rdd_len = len(vrdds)
+
     def dispatch(seq):
         bufs = [[] for i in range(rdd_len)]
         for (n, v) in seq:
diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py
index 4149f54931d1f..5030a655fcbba 100644
--- a/python/pyspark/mllib/__init__.py
+++ b/python/pyspark/mllib/__init__.py
@@ -24,3 +24,37 @@
 import numpy
 if numpy.version.version < '1.4':
     raise Exception("MLlib requires NumPy 1.4+")
+
+__all__ = ['classification', 'clustering', 'feature', 'linalg', 'random',
+           'recommendation', 'regression', 'stat', 'tree', 'util']
+
+import sys
+import rand as random
+random.__name__ = 'random'
+random.RandomRDDs.__module__ = __name__ + '.random'
+
+
+class RandomModuleHook(object):
+    """
+    Hook to import pyspark.mllib.random
+    """
+    fullname = __name__ + '.random'
+
+    def find_module(self, name, path=None):
+        # skip all other modules
+        if not name.startswith(self.fullname):
+            return
+        return self
+
+    def load_module(self, name):
+        if name == self.fullname:
+            return random
+
+        cname = name.rsplit('.', 1)[-1]
+        try:
+            return getattr(random, cname)
+        except AttributeError:
+            raise ImportError
+
+
+sys.meta_path.append(RandomModuleHook())
diff --git a/python/pyspark/mllib/_common.py b/python/pyspark/mllib/_common.py
deleted file mode 100644
index e609b60a0f968..0000000000000
--- a/python/pyspark/mllib/_common.py
+++ /dev/null
@@ -1,493 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import struct
-import numpy
-from numpy import ndarray, float64, int64, int32, array_equal, array
-from pyspark import SparkContext, RDD
-from pyspark.mllib.linalg import SparseVector
-from pyspark.serializers import Serializer
-
-
-"""
-Common utilities shared throughout MLlib, primarily for dealing with
-different data types. These include:
-- Serialization utilities to / from byte arrays that Java can handle
-- Serializers for other data types, like ALS Rating objects
-- Common methods for linear models
-- Methods to deal with the different vector types we support, such as
-  SparseVector and scipy.sparse matrices.
-"""
-
-
-# Check whether we have SciPy. MLlib works without it too, but if we have it, some methods,
-# such as _dot and _serialize_double_vector, start to support scipy.sparse matrices.
-
-_have_scipy = False
-_scipy_issparse = None
-try:
-    import scipy.sparse
-    _have_scipy = True
-    _scipy_issparse = scipy.sparse.issparse
-except:
-    # No SciPy in environment, but that's okay
-    pass
-
-
-# Serialization functions to and from Scala. These use the following formats, understood
-# by the PythonMLLibAPI class in Scala:
-#
-# Dense double vector format:
-#
-# [1-byte 1] [4-byte length] [length*8 bytes of data]
-#
-# Sparse double vector format:
-#
-# [1-byte 2] [4-byte length] [4-byte nonzeros] [nonzeros*4 bytes of indices] \
-# [nonzeros*8 bytes of values]
-#
-# Double matrix format:
-#
-# [1-byte 3] [4-byte rows] [4-byte cols] [rows*cols*8 bytes of data]
-#
-# LabeledPoint format:
-#
-# [1-byte 4] [8-byte label] [dense or sparse vector]
-#
-# This is all in machine-endian.  That means that the Java interpreter and the
-# Python interpreter must agree on what endian the machine is.
-
-
-DENSE_VECTOR_MAGIC = 1
-SPARSE_VECTOR_MAGIC = 2
-DENSE_MATRIX_MAGIC = 3
-LABELED_POINT_MAGIC = 4
-
-
-def _deserialize_numpy_array(shape, ba, offset, dtype=float64):
-    """
-    Deserialize a numpy array of the given type from an offset in
-    bytearray ba, assigning it the given shape.
-
-    >>> x = array([1.0, 2.0, 3.0, 4.0, 5.0])
-    >>> array_equal(x, _deserialize_numpy_array(x.shape, x.data, 0))
-    True
-    >>> x = array([1.0, 2.0, 3.0, 4.0]).reshape(2,2)
-    >>> array_equal(x, _deserialize_numpy_array(x.shape, x.data, 0))
-    True
-    >>> x = array([1, 2, 3], dtype=int32)
-    >>> array_equal(x, _deserialize_numpy_array(x.shape, x.data, 0, dtype=int32))
-    True
-    """
-    ar = ndarray(shape=shape, buffer=ba, offset=offset, dtype=dtype, order='C')
-    return ar.copy()
-
-
-def _serialize_double_vector(v):
-    """Serialize a double vector into a mutually understood format.
-
-    >>> x = array([1,2,3])
-    >>> y = _deserialize_double_vector(_serialize_double_vector(x))
-    >>> array_equal(y, array([1.0, 2.0, 3.0]))
-    True
-    """
-    v = _convert_vector(v)
-    if type(v) == ndarray:
-        return _serialize_dense_vector(v)
-    elif type(v) == SparseVector:
-        return _serialize_sparse_vector(v)
-    else:
-        raise TypeError("_serialize_double_vector called on a %s; "
-                        "wanted ndarray or SparseVector" % type(v))
-
-
-def _serialize_dense_vector(v):
-    """Serialize a dense vector given as a NumPy array."""
-    if v.ndim != 1:
-        raise TypeError("_serialize_double_vector called on a %ddarray; "
-                        "wanted a 1darray" % v.ndim)
-    if v.dtype != float64:
-        if numpy.issubdtype(v.dtype, numpy.complex):
-            raise TypeError("_serialize_double_vector called on an ndarray of %s; "
-                            "wanted ndarray of float64" % v.dtype)
-        v = v.astype(float64)
-    length = v.shape[0]
-    ba = bytearray(5 + 8 * length)
-    ba[0] = DENSE_VECTOR_MAGIC
-    length_bytes = ndarray(shape=[1], buffer=ba, offset=1, dtype=int32)
-    length_bytes[0] = length
-    _copyto(v, buffer=ba, offset=5, shape=[length], dtype=float64)
-    return ba
-
-
-def _serialize_sparse_vector(v):
-    """Serialize a pyspark.mllib.linalg.SparseVector."""
-    nonzeros = len(v.indices)
-    ba = bytearray(9 + 12 * nonzeros)
-    ba[0] = SPARSE_VECTOR_MAGIC
-    header = ndarray(shape=[2], buffer=ba, offset=1, dtype=int32)
-    header[0] = v.size
-    header[1] = nonzeros
-    _copyto(v.indices, buffer=ba, offset=9, shape=[nonzeros], dtype=int32)
-    values_offset = 9 + 4 * nonzeros
-    _copyto(v.values, buffer=ba, offset=values_offset, shape=[nonzeros], dtype=float64)
-    return ba
-
-
-def _deserialize_double_vector(ba, offset=0):
-    """Deserialize a double vector from a mutually understood format.
-
-    >>> x = array([1.0, 2.0, 3.0, 4.0, -1.0, 0.0, -0.0])
-    >>> array_equal(x, _deserialize_double_vector(_serialize_double_vector(x)))
-    True
-    >>> s = SparseVector(4, [1, 3], [3.0, 5.5])
-    >>> s == _deserialize_double_vector(_serialize_double_vector(s))
-    True
-    """
-    if type(ba) != bytearray:
-        raise TypeError("_deserialize_double_vector called on a %s; "
-                        "wanted bytearray" % type(ba))
-    nb = len(ba) - offset
-    if nb < 5:
-        raise TypeError("_deserialize_double_vector called on a %d-byte array, "
-                "which is too short" % nb)
-    if ba[offset] == DENSE_VECTOR_MAGIC:
-        return _deserialize_dense_vector(ba, offset)
-    elif ba[offset] == SPARSE_VECTOR_MAGIC:
-        return _deserialize_sparse_vector(ba, offset)
-    else:
-        raise TypeError("_deserialize_double_vector called on bytearray "
-                        "with wrong magic")
-
-
-def _deserialize_dense_vector(ba, offset=0):
-    """Deserialize a dense vector into a numpy array."""
-    nb = len(ba) - offset
-    if nb < 5:
-        raise TypeError("_deserialize_dense_vector called on a %d-byte array, "
-                        "which is too short" % nb)
-    length = ndarray(shape=[1], buffer=ba, offset=offset + 1, dtype=int32)[0]
-    if nb < 8 * length + 5:
-        raise TypeError("_deserialize_dense_vector called on bytearray "
-                        "with wrong length")
-    return _deserialize_numpy_array([length], ba, offset + 5)
-
-
-def _deserialize_sparse_vector(ba, offset=0):
-    """Deserialize a sparse vector into a MLlib SparseVector object."""
-    nb = len(ba) - offset
-    if nb < 9:
-        raise TypeError("_deserialize_sparse_vector called on a %d-byte array, "
-                        "which is too short" % nb)
-    header = ndarray(shape=[2], buffer=ba, offset=offset + 1, dtype=int32)
-    size = header[0]
-    nonzeros = header[1]
-    if nb < 9 + 12 * nonzeros:
-        raise TypeError("_deserialize_sparse_vector called on bytearray "
-                        "with wrong length")
-    indices = _deserialize_numpy_array([nonzeros], ba, offset + 9, dtype=int32)
-    values = _deserialize_numpy_array([nonzeros], ba, offset + 9 + 4 * nonzeros, dtype=float64)
-    return SparseVector(int(size), indices, values)
-
-
-def _serialize_double_matrix(m):
-    """Serialize a double matrix into a mutually understood format."""
-    if (type(m) == ndarray and m.ndim == 2):
-        if m.dtype != float64:
-            if numpy.issubdtype(m.dtype, numpy.complex):
-                raise TypeError("_serialize_double_matrix called on an ndarray of %s; "
-                                "wanted ndarray of float64" % m.dtype)
-            m = m.astype(float64)
-        rows = m.shape[0]
-        cols = m.shape[1]
-        ba = bytearray(9 + 8 * rows * cols)
-        ba[0] = DENSE_MATRIX_MAGIC
-        lengths = ndarray(shape=[3], buffer=ba, offset=1, dtype=int32)
-        lengths[0] = rows
-        lengths[1] = cols
-        _copyto(m, buffer=ba, offset=9, shape=[rows, cols], dtype=float64)
-        return ba
-    else:
-        raise TypeError("_serialize_double_matrix called on a "
-                        "non-double-matrix")
-
-
-def _deserialize_double_matrix(ba):
-    """Deserialize a double matrix from a mutually understood format."""
-    if type(ba) != bytearray:
-        raise TypeError("_deserialize_double_matrix called on a %s; "
-                        "wanted bytearray" % type(ba))
-    if len(ba) < 9:
-        raise TypeError("_deserialize_double_matrix called on a %d-byte array, "
-                        "which is too short" % len(ba))
-    if ba[0] != DENSE_MATRIX_MAGIC:
-        raise TypeError("_deserialize_double_matrix called on bytearray "
-                        "with wrong magic")
-    lengths = ndarray(shape=[2], buffer=ba, offset=1, dtype=int32)
-    rows = lengths[0]
-    cols = lengths[1]
-    if (len(ba) != 8 * rows * cols + 9):
-        raise TypeError("_deserialize_double_matrix called on bytearray "
-                        "with wrong length")
-    return _deserialize_numpy_array([rows, cols], ba, 9)
-
-
-def _serialize_labeled_point(p):
-    """
-    Serialize a LabeledPoint with a features vector of any type.
-
-    >>> from pyspark.mllib.regression import LabeledPoint
-    >>> dp0 = LabeledPoint(0.5, array([1.0, 2.0, 3.0, 4.0, -1.0, 0.0, -0.0]))
-    >>> dp1 = _deserialize_labeled_point(_serialize_labeled_point(dp0))
-    >>> dp1.label == dp0.label
-    True
-    >>> array_equal(dp1.features, dp0.features)
-    True
-    >>> sp0 = LabeledPoint(0.0, SparseVector(4, [1, 3], [3.0, 5.5]))
-    >>> sp1 = _deserialize_labeled_point(_serialize_labeled_point(sp0))
-    >>> sp1.label == sp1.label
-    True
-    >>> sp1.features == sp0.features
-    True
-    """
-    from pyspark.mllib.regression import LabeledPoint
-    serialized_features = _serialize_double_vector(p.features)
-    header = bytearray(9)
-    header[0] = LABELED_POINT_MAGIC
-    header_float = ndarray(shape=[1], buffer=header, offset=1, dtype=float64)
-    header_float[0] = p.label
-    return header + serialized_features
-
-def _deserialize_labeled_point(ba, offset=0):
-    """Deserialize a LabeledPoint from a mutually understood format."""
-    from pyspark.mllib.regression import LabeledPoint
-    if type(ba) != bytearray:
-        raise TypeError("Expecting a bytearray but got %s" % type(ba))
-    if ba[offset] != LABELED_POINT_MAGIC:
-        raise TypeError("Expecting magic number %d but got %d" % (LABELED_POINT_MAGIC, ba[0]))
-    label = ndarray(shape=[1], buffer=ba, offset=offset + 1, dtype=float64)[0]
-    features = _deserialize_double_vector(ba, offset + 9)
-    return LabeledPoint(label, features)
-
-def _copyto(array, buffer, offset, shape, dtype):
-    """
-    Copy the contents of a vector to a destination bytearray at the
-    given offset.
-
-    TODO: In the future this could use numpy.copyto on NumPy 1.7+, but
-    we should benchmark that to see whether it provides a benefit.
-    """
-    temp_array = ndarray(shape=shape, buffer=buffer, offset=offset, dtype=dtype, order='C')
-    temp_array[...] = array
-
-
-def _get_unmangled_rdd(data, serializer):
-    dataBytes = data.map(serializer)
-    dataBytes._bypass_serializer = True
-    dataBytes.cache()  # TODO: users should unpersist() this later!
-    return dataBytes
-
-
-# Map a pickled Python RDD of Python dense or sparse vectors to a Java RDD of
-# _serialized_double_vectors
-def _get_unmangled_double_vector_rdd(data):
-    return _get_unmangled_rdd(data, _serialize_double_vector)
-
-
-# Map a pickled Python RDD of LabeledPoint to a Java RDD of _serialized_labeled_points
-def _get_unmangled_labeled_point_rdd(data):
-    return _get_unmangled_rdd(data, _serialize_labeled_point)
-
-
-# Common functions for dealing with and training linear models
-
-def _linear_predictor_typecheck(x, coeffs):
-    """
-    Check that x is a one-dimensional vector of the right shape.
-    This is a temporary hackaround until we actually implement bulk predict.
-    """
-    x = _convert_vector(x)
-    if type(x) == ndarray:
-        if x.ndim == 1:
-            if x.shape != coeffs.shape:
-                raise RuntimeError("Got array of %d elements; wanted %d" % (
-                    numpy.shape(x)[0], coeffs.shape[0]))
-        else:
-            raise RuntimeError("Bulk predict not yet supported.")
-    elif type(x) == SparseVector:
-        if x.size != coeffs.shape[0]:
-            raise RuntimeError("Got sparse vector of size %d; wanted %d" % (
-                x.size, coeffs.shape[0]))
-    elif (type(x) == RDD):
-        raise RuntimeError("Bulk predict not yet supported.")
-    else:
-        raise TypeError("Argument of type " + type(x).__name__ + " unsupported")
-
-
-# If we weren't given initial weights, take a zero vector of the appropriate
-# length.
-def _get_initial_weights(initial_weights, data):
-    if initial_weights is None:
-        initial_weights = _convert_vector(data.first().features)
-        if type(initial_weights) == ndarray:
-            if initial_weights.ndim != 1:
-                raise TypeError("At least one data element has "
-                                + initial_weights.ndim + " dimensions, which is not 1")
-            initial_weights = numpy.zeros([initial_weights.shape[0]])
-        elif type(initial_weights) == SparseVector:
-            initial_weights = numpy.zeros([initial_weights.size])
-    return initial_weights
-
-
-# train_func should take two parameters, namely data and initial_weights, and
-# return the result of a call to the appropriate JVM stub.
-# _regression_train_wrapper is responsible for setup and error checking.
-def _regression_train_wrapper(sc, train_func, klass, data, initial_weights):
-    initial_weights = _get_initial_weights(initial_weights, data)
-    dataBytes = _get_unmangled_labeled_point_rdd(data)
-    ans = train_func(dataBytes, _serialize_double_vector(initial_weights))
-    if len(ans) != 2:
-        raise RuntimeError("JVM call result had unexpected length")
-    elif type(ans[0]) != bytearray:
-        raise RuntimeError("JVM call result had first element of type "
-                           + type(ans[0]).__name__ + " which is not bytearray")
-    elif type(ans[1]) != float:
-        raise RuntimeError("JVM call result had second element of type "
-                           + type(ans[0]).__name__ + " which is not float")
-    return klass(_deserialize_double_vector(ans[0]), ans[1])
-
-
-# Functions for serializing ALS Rating objects and tuples
-
-def _serialize_rating(r):
-    ba = bytearray(16)
-    intpart = ndarray(shape=[2], buffer=ba, dtype=int32)
-    doublepart = ndarray(shape=[1], buffer=ba, dtype=float64, offset=8)
-    intpart[0], intpart[1], doublepart[0] = r
-    return ba
-
-
-class RatingDeserializer(Serializer):
-    def loads(self, stream):
-        length = struct.unpack("!i", stream.read(4))[0]
-        ba = stream.read(length)
-        res = ndarray(shape=(3, ), buffer=ba, dtype=float64, offset=4)
-        return int(res[0]), int(res[1]), res[2]
-
-    def load_stream(self, stream):
-        while True:
-            try:
-                yield self.loads(stream)
-            except struct.error:
-                return
-            except EOFError:
-                return
-
-
-def _serialize_tuple(t):
-    ba = bytearray(8)
-    intpart = ndarray(shape=[2], buffer=ba, dtype=int32)
-    intpart[0], intpart[1] = t
-    return ba
-
-
-# Vector math functions that support all of our vector types
-
-def _convert_vector(vec):
-    """
-    Convert a vector to a format we support internally. This does
-    the following:
-
-    * For dense NumPy vectors (ndarray), returns them as is
-    * For our SparseVector class, returns that as is
-    * For Python lists, converts them to NumPy vectors
-    * For scipy.sparse.*_matrix column vectors, converts them to
-      our own SparseVector type.
-
-    This should be called before passing any data to our algorithms
-    or attempting to serialize it to Java.
-    """
-    if type(vec) == ndarray or type(vec) == SparseVector:
-        return vec
-    elif type(vec) == list:
-        return array(vec, dtype=float64)
-    elif _have_scipy:
-        if _scipy_issparse(vec):
-            assert vec.shape[1] == 1, "Expected column vector"
-            csc = vec.tocsc()
-            return SparseVector(vec.shape[0], csc.indices, csc.data)
-    raise TypeError("Expected NumPy array, SparseVector, or scipy.sparse matrix")
-
-
-def _squared_distance(v1, v2):
-    """
-    Squared distance of two NumPy or sparse vectors.
-
-    >>> dense1 = array([1., 2.])
-    >>> sparse1 = SparseVector(2, [0, 1], [1., 2.])
-    >>> dense2 = array([2., 1.])
-    >>> sparse2 = SparseVector(2, [0, 1], [2., 1.])
-    >>> _squared_distance(dense1, dense2)
-    2.0
-    >>> _squared_distance(dense1, sparse2)
-    2.0
-    >>> _squared_distance(sparse1, dense2)
-    2.0
-    >>> _squared_distance(sparse1, sparse2)
-    2.0
-    """
-    v1 = _convert_vector(v1)
-    v2 = _convert_vector(v2)
-    if type(v1) == ndarray and type(v2) == ndarray:
-        diff = v1 - v2
-        return numpy.dot(diff, diff)
-    elif type(v1) == ndarray:
-        return v2.squared_distance(v1)
-    else:
-        return v1.squared_distance(v2)
-
-
-def _dot(vec, target):
-    """
-    Compute the dot product of a vector of the types we support
-    (Numpy array, list, SparseVector, or SciPy sparse) and a target
-    NumPy array that is either 1- or 2-dimensional. Equivalent to
-    calling numpy.dot of the two vectors, but for SciPy ones, we
-    have to transpose them because they're column vectors.
-    """
-    if type(vec) == ndarray:
-        return numpy.dot(vec, target)
-    elif type(vec) == SparseVector:
-        return vec.dot(target)
-    elif type(vec) == list:
-        return numpy.dot(_convert_vector(vec), target)
-    else:
-        return vec.transpose().dot(target)[0]
-
-
-def _test():
-    import doctest
-    globs = globals().copy()
-    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
-    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
-    globs['sc'].stop()
-    if failure_count:
-        exit(-1)
-
-
-if __name__ == "__main__":
-    _test()
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 1c0c536c4fb3d..b654813fb4cf6 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -15,22 +15,22 @@
 # limitations under the License.
 #
 
+from math import exp
+
 import numpy
+from numpy import array
+
+from pyspark.mllib.common import callMLlibFunc
+from pyspark.mllib.linalg import SparseVector, _convert_to_vector
+from pyspark.mllib.regression import LabeledPoint, LinearModel, _regression_train_wrapper
 
-from numpy import array, shape
-from pyspark import SparkContext
-from pyspark.mllib._common import \
-    _dot, _get_unmangled_rdd, _get_unmangled_double_vector_rdd, \
-    _serialize_double_matrix, _deserialize_double_matrix, \
-    _serialize_double_vector, _deserialize_double_vector, \
-    _get_initial_weights, _serialize_rating, _regression_train_wrapper, \
-    _linear_predictor_typecheck, _get_unmangled_labeled_point_rdd
-from pyspark.mllib.linalg import SparseVector
-from pyspark.mllib.regression import LabeledPoint, LinearModel
-from math import exp, log
+
+__all__ = ['LogisticRegressionModel', 'LogisticRegressionWithSGD', 'SVMModel',
+           'SVMWithSGD', 'NaiveBayesModel', 'NaiveBayes']
 
 
 class LogisticRegressionModel(LinearModel):
+
     """A linear binary classification model derived from logistic regression.
 
     >>> data = [
@@ -60,25 +60,59 @@ class LogisticRegressionModel(LinearModel):
     >>> lrm.predict(SparseVector(2, {1: 0.0})) <= 0
     True
     """
+
     def predict(self, x):
-        _linear_predictor_typecheck(x, self._coeff)
-        margin = _dot(x, self._coeff) + self._intercept
-        prob = 1/(1 + exp(-margin))
+        x = _convert_to_vector(x)
+        margin = self.weights.dot(x) + self._intercept
+        if margin > 0:
+            prob = 1 / (1 + exp(-margin))
+        else:
+            exp_margin = exp(margin)
+            prob = exp_margin / (1 + exp_margin)
         return 1 if prob > 0.5 else 0
 
 
 class LogisticRegressionWithSGD(object):
+
     @classmethod
-    def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, initialWeights=None):
-        """Train a logistic regression model on the given data."""
-        sc = data.context
-        train_func = lambda d, i: sc._jvm.PythonMLLibAPI().trainLogisticRegressionModelWithSGD(
-            d._jrdd, iterations, step, miniBatchFraction, i)
-        return _regression_train_wrapper(sc, train_func, LogisticRegressionModel, data,
-                                         initialWeights)
+    def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
+              initialWeights=None, regParam=0.01, regType="l2", intercept=False):
+        """
+        Train a logistic regression model on the given data.
+
+        :param data:              The training data, an RDD of LabeledPoint.
+        :param iterations:        The number of iterations (default: 100).
+        :param step:              The step parameter used in SGD
+                                  (default: 1.0).
+        :param miniBatchFraction: Fraction of data to be used for each SGD
+                                  iteration.
+        :param initialWeights:    The initial weights (default: None).
+        :param regParam:          The regularizer parameter (default: 0.01).
+        :param regType:           The type of regularizer used for training
+                                  our model.
+
+                                  :Allowed values:
+                                     - "l1" for using L1 regularization
+                                     - "l2" for using L2 regularization
+                                     - None for no regularization
+
+                                     (default: "l2")
+
+        @param intercept:         Boolean parameter which indicates the use
+                                  or not of the augmented representation for
+                                  training data (i.e. whether bias features
+                                  are activated or not).
+        """
+        def train(rdd, i):
+            return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),
+                                 float(step), float(miniBatchFraction), i, float(regParam), regType,
+                                 bool(intercept))
+
+        return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
 
 
 class SVMModel(LinearModel):
+
     """A support vector machine.
 
     >>> data = [
@@ -102,24 +136,54 @@ class SVMModel(LinearModel):
     >>> svm.predict(SparseVector(2, {0: -1.0})) <= 0
     True
     """
+
     def predict(self, x):
-        _linear_predictor_typecheck(x, self._coeff)
-        margin = _dot(x, self._coeff) + self._intercept
+        x = _convert_to_vector(x)
+        margin = self.weights.dot(x) + self.intercept
         return 1 if margin >= 0 else 0
 
 
 class SVMWithSGD(object):
+
     @classmethod
-    def train(cls, data, iterations=100, step=1.0, regParam=1.0,
-              miniBatchFraction=1.0, initialWeights=None):
-        """Train a support vector machine on the given data."""
-        sc = data.context
-        train_func = lambda d, i: sc._jvm.PythonMLLibAPI().trainSVMModelWithSGD(
-            d._jrdd, iterations, step, regParam, miniBatchFraction, i)
-        return _regression_train_wrapper(sc, train_func, SVMModel, data, initialWeights)
+    def train(cls, data, iterations=100, step=1.0, regParam=0.01,
+              miniBatchFraction=1.0, initialWeights=None, regType="l2", intercept=False):
+        """
+        Train a support vector machine on the given data.
+
+        :param data:              The training data, an RDD of LabeledPoint.
+        :param iterations:        The number of iterations (default: 100).
+        :param step:              The step parameter used in SGD
+                                  (default: 1.0).
+        :param regParam:          The regularizer parameter (default: 0.01).
+        :param miniBatchFraction: Fraction of data to be used for each SGD
+                                  iteration.
+        :param initialWeights:    The initial weights (default: None).
+        :param regType:           The type of regularizer used for training
+                                  our model.
+
+                                  :Allowed values:
+                                     - "l1" for using L1 regularization
+                                     - "l2" for using L2 regularization
+                                     - None for no regularization
+
+                                     (default: "l2")
+
+        @param intercept:         Boolean parameter which indicates the use
+                                  or not of the augmented representation for
+                                  training data (i.e. whether bias features
+                                  are activated or not).
+        """
+        def train(rdd, i):
+            return callMLlibFunc("trainSVMModelWithSGD", rdd, int(iterations), float(step),
+                                 float(regParam), float(miniBatchFraction), i, regType,
+                                 bool(intercept))
+
+        return _regression_train_wrapper(train, SVMModel, data, initialWeights)
 
 
 class NaiveBayesModel(object):
+
     """
     Model for Naive Bayes classifiers.
 
@@ -156,10 +220,12 @@ def __init__(self, labels, pi, theta):
 
     def predict(self, x):
         """Return the most likely class for a data vector x"""
-        return self.labels[numpy.argmax(self.pi + _dot(x, self.theta.transpose()))]
+        x = _convert_to_vector(x)
+        return self.labels[numpy.argmax(self.pi + x.dot(self.theta.transpose()))]
 
 
 class NaiveBayes(object):
+
     @classmethod
     def train(cls, data, lambda_=1.0):
         """
@@ -171,22 +237,19 @@ def train(cls, data, lambda_=1.0):
         classification.  By making every vector a 0-1 vector, it can also be
         used as Bernoulli NB (U{http://tinyurl.com/p7c96j6}).
 
-        @param data: RDD of NumPy vectors, one per element, where the first
-               coordinate is the label and the rest is the feature vector
-               (e.g. a count vector).
-        @param lambda_: The smoothing parameter
+        :param data: RDD of LabeledPoint.
+        :param lambda_: The smoothing parameter
         """
-        sc = data.context
-        dataBytes = _get_unmangled_labeled_point_rdd(data)
-        ans = sc._jvm.PythonMLLibAPI().trainNaiveBayes(dataBytes._jrdd, lambda_)
-        return NaiveBayesModel(
-            _deserialize_double_vector(ans[0]),
-            _deserialize_double_vector(ans[1]),
-            _deserialize_double_matrix(ans[2]))
+        first = data.first()
+        if not isinstance(first, LabeledPoint):
+            raise ValueError("`data` should be an RDD of LabeledPoint")
+        labels, pi, theta = callMLlibFunc("trainNaiveBayes", data, lambda_)
+        return NaiveBayesModel(labels.toArray(), pi.toArray(), numpy.array(theta))
 
 
 def _test():
     import doctest
+    from pyspark import SparkContext
     globs = globals().copy()
     globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
     (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index b380e8f6c8725..fe4c4cc5094d8 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -15,20 +15,18 @@
 # limitations under the License.
 #
 
-from numpy import array, dot
-from math import sqrt
 from pyspark import SparkContext
-from pyspark.mllib._common import \
-    _get_unmangled_rdd, _get_unmangled_double_vector_rdd, _squared_distance, \
-    _serialize_double_matrix, _deserialize_double_matrix, \
-    _serialize_double_vector, _deserialize_double_vector, \
-    _get_initial_weights, _serialize_rating, _regression_train_wrapper
-from pyspark.mllib.linalg import SparseVector
+from pyspark.mllib.common import callMLlibFunc, callJavaFunc, _to_java_object_rdd
+from pyspark.mllib.linalg import SparseVector, _convert_to_vector
+
+__all__ = ['KMeansModel', 'KMeans']
 
 
 class KMeansModel(object):
+
     """A clustering model derived from the k-means method.
 
+    >>> from numpy import array
     >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4,2)
     >>> model = KMeans.train(
     ...     sc.parallelize(data), 2, maxIterations=10, runs=30, initializationMode="random")
@@ -55,6 +53,7 @@ class KMeansModel(object):
     >>> type(model.clusterCenters)
     <type 'list'>
     """
+
     def __init__(self, centers):
         self.centers = centers
 
@@ -67,8 +66,9 @@ def predict(self, x):
         """Find the cluster to which x belongs in this model."""
         best = 0
         best_distance = float("inf")
-        for i in range(0, len(self.centers)):
-            distance = _squared_distance(x, self.centers[i])
+        x = _convert_to_vector(x)
+        for i in xrange(len(self.centers)):
+            distance = x.squared_distance(self.centers[i])
             if distance < best_distance:
                 best = i
                 best_distance = distance
@@ -76,20 +76,16 @@ def predict(self, x):
 
 
 class KMeans(object):
+
     @classmethod
-    def train(cls, data, k, maxIterations=100, runs=1, initializationMode="k-means||"):
+    def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"):
         """Train a k-means clustering model."""
-        sc = data.context
-        dataBytes = _get_unmangled_double_vector_rdd(data)
-        ans = sc._jvm.PythonMLLibAPI().trainKMeansModel(
-            dataBytes._jrdd, k, maxIterations, runs, initializationMode)
-        if len(ans) != 1:
-            raise RuntimeError("JVM call result had unexpected length")
-        elif type(ans[0]) != bytearray:
-            raise RuntimeError("JVM call result had first element of type "
-                               + type(ans[0]) + " which is not bytearray")
-        matrix = _deserialize_double_matrix(ans[0])
-        return KMeansModel([row for row in matrix])
+        # cache serialized data to avoid objects over head in JVM
+        jcached = _to_java_object_rdd(rdd.map(_convert_to_vector), cache=True)
+        model = callMLlibFunc("trainKMeansModel", jcached, k, maxIterations, runs,
+                              initializationMode)
+        centers = callJavaFunc(rdd.context, model.clusterCenters)
+        return KMeansModel([c.toArray() for c in centers])
 
 
 def _test():
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
new file mode 100644
index 0000000000000..c6149fe391ec8
--- /dev/null
+++ b/python/pyspark/mllib/common.py
@@ -0,0 +1,140 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import py4j.protocol
+from py4j.protocol import Py4JJavaError
+from py4j.java_gateway import JavaObject
+from py4j.java_collections import MapConverter, ListConverter, JavaArray, JavaList
+
+from pyspark import RDD, SparkContext
+from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
+
+
+# Hack for support float('inf') in Py4j
+_old_smart_decode = py4j.protocol.smart_decode
+
+_float_str_mapping = {
+    'nan': 'NaN',
+    'inf': 'Infinity',
+    '-inf': '-Infinity',
+}
+
+
+def _new_smart_decode(obj):
+    if isinstance(obj, float):
+        s = unicode(obj)
+        return _float_str_mapping.get(s, s)
+    return _old_smart_decode(obj)
+
+py4j.protocol.smart_decode = _new_smart_decode
+
+
+_picklable_classes = [
+    'LinkedList',
+    'SparseVector',
+    'DenseVector',
+    'DenseMatrix',
+    'Rating',
+    'LabeledPoint',
+]
+
+
+# this will call the MLlib version of pythonToJava()
+def _to_java_object_rdd(rdd, cache=False):
+    """ Return an JavaRDD of Object by unpickling
+
+    It will convert each Python object into Java object by Pyrolite, whenever the
+    RDD is serialized in batch or not.
+    """
+    rdd = rdd._reserialize(AutoBatchedSerializer(PickleSerializer()))
+    if cache:
+        rdd.cache()
+    return rdd.ctx._jvm.SerDe.pythonToJava(rdd._jrdd, True)
+
+
+def _py2java(sc, obj):
+    """ Convert Python object into Java """
+    if isinstance(obj, RDD):
+        obj = _to_java_object_rdd(obj)
+    elif isinstance(obj, SparkContext):
+        obj = obj._jsc
+    elif isinstance(obj, dict):
+        obj = MapConverter().convert(obj, sc._gateway._gateway_client)
+    elif isinstance(obj, (list, tuple)):
+        obj = ListConverter().convert(obj, sc._gateway._gateway_client)
+    elif isinstance(obj, JavaObject):
+        pass
+    elif isinstance(obj, (int, long, float, bool, basestring)):
+        pass
+    else:
+        bytes = bytearray(PickleSerializer().dumps(obj))
+        obj = sc._jvm.SerDe.loads(bytes)
+    return obj
+
+
+def _java2py(sc, r):
+    if isinstance(r, JavaObject):
+        clsName = r.getClass().getSimpleName()
+        # convert RDD into JavaRDD
+        if clsName != 'JavaRDD' and clsName.endswith("RDD"):
+            r = r.toJavaRDD()
+            clsName = 'JavaRDD'
+
+        if clsName == 'JavaRDD':
+            jrdd = sc._jvm.SerDe.javaToPython(r)
+            return RDD(jrdd, sc)
+
+        if clsName in _picklable_classes:
+            r = sc._jvm.SerDe.dumps(r)
+        elif isinstance(r, (JavaArray, JavaList)):
+            try:
+                r = sc._jvm.SerDe.dumps(r)
+            except Py4JJavaError:
+                pass  # not pickable
+
+    if isinstance(r, bytearray):
+        r = PickleSerializer().loads(str(r))
+    return r
+
+
+def callJavaFunc(sc, func, *args):
+    """ Call Java Function """
+    args = [_py2java(sc, a) for a in args]
+    return _java2py(sc, func(*args))
+
+
+def callMLlibFunc(name, *args):
+    """ Call API in PythonMLLibAPI """
+    sc = SparkContext._active_spark_context
+    api = getattr(sc._jvm.PythonMLLibAPI(), name)
+    return callJavaFunc(sc, api, *args)
+
+
+class JavaModelWrapper(object):
+    """
+    Wrapper for the model in JVM
+    """
+    def __init__(self, java_model):
+        self._sc = SparkContext._active_spark_context
+        self._java_model = java_model
+
+    def __del__(self):
+        self._sc._gateway.detach(self._java_model)
+
+    def call(self, name, *a):
+        """Call method of java_model"""
+        return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
new file mode 100644
index 0000000000000..8cb992df2d9c7
--- /dev/null
+++ b/python/pyspark/mllib/feature.py
@@ -0,0 +1,416 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Python package for feature in MLlib.
+"""
+from __future__ import absolute_import
+
+import sys
+import warnings
+import random
+
+from py4j.protocol import Py4JJavaError
+
+from pyspark import RDD, SparkContext
+from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
+from pyspark.mllib.linalg import Vectors, _convert_to_vector
+
+__all__ = ['Normalizer', 'StandardScalerModel', 'StandardScaler',
+           'HashingTF', 'IDFModel', 'IDF', 'Word2Vec', 'Word2VecModel']
+
+
+class VectorTransformer(object):
+    """
+    :: DeveloperApi ::
+
+    Base class for transformation of a vector or RDD of vector
+    """
+    def transform(self, vector):
+        """
+        Applies transformation on a vector.
+
+        :param vector: vector to be transformed.
+        """
+        raise NotImplementedError
+
+
+class Normalizer(VectorTransformer):
+    """
+    :: Experimental ::
+
+    Normalizes samples individually to unit L\ :sup:`p`\ norm
+
+    For any 1 <= `p` <= float('inf'), normalizes samples using
+    sum(abs(vector). :sup:`p`) :sup:`(1/p)` as norm.
+
+    For `p` = float('inf'), max(abs(vector)) will be used as norm for normalization.
+
+    >>> v = Vectors.dense(range(3))
+    >>> nor = Normalizer(1)
+    >>> nor.transform(v)
+    DenseVector([0.0, 0.3333, 0.6667])
+
+    >>> rdd = sc.parallelize([v])
+    >>> nor.transform(rdd).collect()
+    [DenseVector([0.0, 0.3333, 0.6667])]
+
+    >>> nor2 = Normalizer(float("inf"))
+    >>> nor2.transform(v)
+    DenseVector([0.0, 0.5, 1.0])
+    """
+    def __init__(self, p=2.0):
+        """
+        :param p: Normalization in L^p^ space, p = 2 by default.
+        """
+        assert p >= 1.0, "p should be greater than 1.0"
+        self.p = float(p)
+
+    def transform(self, vector):
+        """
+        Applies unit length normalization on a vector.
+
+        :param vector: vector or RDD of vector to be normalized.
+        :return: normalized vector. If the norm of the input is zero, it
+                will return the input vector.
+        """
+        sc = SparkContext._active_spark_context
+        assert sc is not None, "SparkContext should be initialized first"
+        if isinstance(vector, RDD):
+            vector = vector.map(_convert_to_vector)
+        else:
+            vector = _convert_to_vector(vector)
+        return callMLlibFunc("normalizeVector", self.p, vector)
+
+
+class JavaVectorTransformer(JavaModelWrapper, VectorTransformer):
+    """
+    Wrapper for the model in JVM
+    """
+
+    def transform(self, vector):
+        if isinstance(vector, RDD):
+            vector = vector.map(_convert_to_vector)
+        else:
+            vector = _convert_to_vector(vector)
+        return self.call("transform", vector)
+
+
+class StandardScalerModel(JavaVectorTransformer):
+    """
+    :: Experimental ::
+
+    Represents a StandardScaler model that can transform vectors.
+    """
+    def transform(self, vector):
+        """
+        Applies standardization transformation on a vector.
+
+        :param vector: Vector or RDD of Vector to be standardized.
+        :return: Standardized vector. If the variance of a column is zero,
+                it will return default `0.0` for the column with zero variance.
+        """
+        return JavaVectorTransformer.transform(self, vector)
+
+
+class StandardScaler(object):
+    """
+    :: Experimental ::
+
+    Standardizes features by removing the mean and scaling to unit
+    variance using column summary statistics on the samples in the
+    training set.
+
+    >>> vs = [Vectors.dense([-2.0, 2.3, 0]), Vectors.dense([3.8, 0.0, 1.9])]
+    >>> dataset = sc.parallelize(vs)
+    >>> standardizer = StandardScaler(True, True)
+    >>> model = standardizer.fit(dataset)
+    >>> result = model.transform(dataset)
+    >>> for r in result.collect(): r
+    DenseVector([-0.7071, 0.7071, -0.7071])
+    DenseVector([0.7071, -0.7071, 0.7071])
+    """
+    def __init__(self, withMean=False, withStd=True):
+        """
+        :param withMean: False by default. Centers the data with mean
+                 before scaling. It will build a dense output, so this
+                 does not work on sparse input and will raise an exception.
+        :param withStd: True by default. Scales the data to unit standard
+                 deviation.
+        """
+        if not (withMean or withStd):
+            warnings.warn("Both withMean and withStd are false. The model does nothing.")
+        self.withMean = withMean
+        self.withStd = withStd
+
+    def fit(self, dataset):
+        """
+        Computes the mean and variance and stores as a model to be used for later scaling.
+
+        :param data: The data used to compute the mean and variance to build
+                    the transformation model.
+        :return: a StandardScalarModel
+        """
+        dataset = dataset.map(_convert_to_vector)
+        jmodel = callMLlibFunc("fitStandardScaler", self.withMean, self.withStd, dataset)
+        return StandardScalerModel(jmodel)
+
+
+class HashingTF(object):
+    """
+    :: Experimental ::
+
+    Maps a sequence of terms to their term frequencies using the hashing trick.
+
+    Note: the terms must be hashable (can not be dict/set/list...).
+
+    >>> htf = HashingTF(100)
+    >>> doc = "a a b b c d".split(" ")
+    >>> htf.transform(doc)
+    SparseVector(100, {1: 1.0, 14: 1.0, 31: 2.0, 44: 2.0})
+    """
+    def __init__(self, numFeatures=1 << 20):
+        """
+        :param numFeatures: number of features (default: 2^20)
+        """
+        self.numFeatures = numFeatures
+
+    def indexOf(self, term):
+        """ Returns the index of the input term. """
+        return hash(term) % self.numFeatures
+
+    def transform(self, document):
+        """
+        Transforms the input document (list of terms) to term frequency vectors,
+        or transform the RDD of document to RDD of term frequency vectors.
+        """
+        if isinstance(document, RDD):
+            return document.map(self.transform)
+
+        freq = {}
+        for term in document:
+            i = self.indexOf(term)
+            freq[i] = freq.get(i, 0) + 1.0
+        return Vectors.sparse(self.numFeatures, freq.items())
+
+
+class IDFModel(JavaVectorTransformer):
+    """
+    Represents an IDF model that can transform term frequency vectors.
+    """
+    def transform(self, dataset):
+        """
+        Transforms term frequency (TF) vectors to TF-IDF vectors.
+
+        If `minDocFreq` was set for the IDF calculation,
+        the terms which occur in fewer than `minDocFreq`
+        documents will have an entry of 0.
+
+        :param dataset: an RDD of term frequency vectors
+        :return: an RDD of TF-IDF vectors
+        """
+        if not isinstance(dataset, RDD):
+            raise TypeError("dataset should be an RDD of term frequency vectors")
+        return JavaVectorTransformer.transform(self, dataset)
+
+
+class IDF(object):
+    """
+    :: Experimental ::
+
+    Inverse document frequency (IDF).
+
+    The standard formulation is used: `idf = log((m + 1) / (d(t) + 1))`,
+    where `m` is the total number of documents and `d(t)` is the number
+    of documents that contain term `t`.
+
+    This implementation supports filtering out terms which do not appear
+    in a minimum number of documents (controlled by the variable `minDocFreq`).
+    For terms that are not in at least `minDocFreq` documents, the IDF is
+    found as 0, resulting in TF-IDFs of 0.
+
+    >>> n = 4
+    >>> freqs = [Vectors.sparse(n, (1, 3), (1.0, 2.0)),
+    ...          Vectors.dense([0.0, 1.0, 2.0, 3.0]),
+    ...          Vectors.sparse(n, [1], [1.0])]
+    >>> data = sc.parallelize(freqs)
+    >>> idf = IDF()
+    >>> model = idf.fit(data)
+    >>> tfidf = model.transform(data)
+    >>> for r in tfidf.collect(): r
+    SparseVector(4, {1: 0.0, 3: 0.5754})
+    DenseVector([0.0, 0.0, 1.3863, 0.863])
+    SparseVector(4, {1: 0.0})
+    """
+    def __init__(self, minDocFreq=0):
+        """
+        :param minDocFreq: minimum of documents in which a term
+                           should appear for filtering
+        """
+        self.minDocFreq = minDocFreq
+
+    def fit(self, dataset):
+        """
+        Computes the inverse document frequency.
+
+        :param dataset: an RDD of term frequency vectors
+        """
+        if not isinstance(dataset, RDD):
+            raise TypeError("dataset should be an RDD of term frequency vectors")
+        jmodel = callMLlibFunc("fitIDF", self.minDocFreq, dataset.map(_convert_to_vector))
+        return IDFModel(jmodel)
+
+
+class Word2VecModel(JavaVectorTransformer):
+    """
+    class for Word2Vec model
+    """
+    def transform(self, word):
+        """
+        Transforms a word to its vector representation
+
+        Note: local use only
+
+        :param word: a word
+        :return: vector representation of word(s)
+        """
+        try:
+            return self.call("transform", word)
+        except Py4JJavaError:
+            raise ValueError("%s not found" % word)
+
+    def findSynonyms(self, word, num):
+        """
+        Find synonyms of a word
+
+        :param word: a word or a vector representation of word
+        :param num: number of synonyms to find
+        :return: array of (word, cosineSimilarity)
+
+        Note: local use only
+        """
+        if not isinstance(word, basestring):
+            word = _convert_to_vector(word)
+        words, similarity = self.call("findSynonyms", word, num)
+        return zip(words, similarity)
+
+
+class Word2Vec(object):
+    """
+    Word2Vec creates vector representation of words in a text corpus.
+    The algorithm first constructs a vocabulary from the corpus
+    and then learns vector representation of words in the vocabulary.
+    The vector representation can be used as features in
+    natural language processing and machine learning algorithms.
+
+    We used skip-gram model in our implementation and hierarchical softmax
+    method to train the model. The variable names in the implementation
+    matches the original C implementation.
+
+    For original C implementation, see https://code.google.com/p/word2vec/
+    For research papers, see
+    Efficient Estimation of Word Representations in Vector Space
+    and
+    Distributed Representations of Words and Phrases and their Compositionality.
+
+    >>> sentence = "a b " * 100 + "a c " * 10
+    >>> localDoc = [sentence, sentence]
+    >>> doc = sc.parallelize(localDoc).map(lambda line: line.split(" "))
+    >>> model = Word2Vec().setVectorSize(10).setSeed(42L).fit(doc)
+
+    >>> syms = model.findSynonyms("a", 2)
+    >>> [s[0] for s in syms]
+    [u'b', u'c']
+    >>> vec = model.transform("a")
+    >>> syms = model.findSynonyms(vec, 2)
+    >>> [s[0] for s in syms]
+    [u'b', u'c']
+    """
+    def __init__(self):
+        """
+        Construct Word2Vec instance
+        """
+        self.vectorSize = 100
+        self.learningRate = 0.025
+        self.numPartitions = 1
+        self.numIterations = 1
+        self.seed = random.randint(0, sys.maxint)
+
+    def setVectorSize(self, vectorSize):
+        """
+        Sets vector size (default: 100).
+        """
+        self.vectorSize = vectorSize
+        return self
+
+    def setLearningRate(self, learningRate):
+        """
+        Sets initial learning rate (default: 0.025).
+        """
+        self.learningRate = learningRate
+        return self
+
+    def setNumPartitions(self, numPartitions):
+        """
+        Sets number of partitions (default: 1). Use a small number for accuracy.
+        """
+        self.numPartitions = numPartitions
+        return self
+
+    def setNumIterations(self, numIterations):
+        """
+        Sets number of iterations (default: 1), which should be smaller than or equal to number of
+        partitions.
+        """
+        self.numIterations = numIterations
+        return self
+
+    def setSeed(self, seed):
+        """
+        Sets random seed.
+        """
+        self.seed = seed
+        return self
+
+    def fit(self, data):
+        """
+        Computes the vector representation of each word in vocabulary.
+
+        :param data: training data. RDD of list of string
+        :return: Word2VecModel instance
+        """
+        if not isinstance(data, RDD):
+            raise TypeError("data should be an RDD of list of string")
+        jmodel = callMLlibFunc("trainWord2Vec", data, int(self.vectorSize),
+                               float(self.learningRate), int(self.numPartitions),
+                               int(self.numIterations), long(self.seed))
+        return Word2VecModel(jmodel)
+
+
+def _test():
+    import doctest
+    from pyspark import SparkContext
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+if __name__ == "__main__":
+    sys.path.pop(0)
+    _test()
diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py
index db39ed0acdb66..537b17657809c 100644
--- a/python/pyspark/mllib/linalg.py
+++ b/python/pyspark/mllib/linalg.py
@@ -23,23 +23,281 @@
 SciPy is available in their environment.
 """
 
-from numpy import array, array_equal, ndarray, float64, int32
+import sys
+import array
+import copy_reg
 
+import numpy as np
 
-class SparseVector(object):
+from pyspark.sql import UserDefinedType, StructField, StructType, ArrayType, DoubleType, \
+    IntegerType, ByteType, Row
+
+
+__all__ = ['Vector', 'DenseVector', 'SparseVector', 'Vectors', 'DenseMatrix', 'Matrices']
+
+
+if sys.version_info[:2] == (2, 7):
+    # speed up pickling array in Python 2.7
+    def fast_pickle_array(ar):
+        return array.array, (ar.typecode, ar.tostring())
+    copy_reg.pickle(array.array, fast_pickle_array)
+
+
+# Check whether we have SciPy. MLlib works without it too, but if we have it, some methods,
+# such as _dot and _serialize_double_vector, start to support scipy.sparse matrices.
+
+try:
+    import scipy.sparse
+    _have_scipy = True
+except:
+    # No SciPy in environment, but that's okay
+    _have_scipy = False
+
+
+def _convert_to_vector(l):
+    if isinstance(l, Vector):
+        return l
+    elif type(l) in (array.array, np.array, np.ndarray, list, tuple):
+        return DenseVector(l)
+    elif _have_scipy and scipy.sparse.issparse(l):
+        assert l.shape[1] == 1, "Expected column vector"
+        csc = l.tocsc()
+        return SparseVector(l.shape[0], csc.indices, csc.data)
+    else:
+        raise TypeError("Cannot convert type %s into Vector" % type(l))
+
+
+def _vector_size(v):
+    """
+    Returns the size of the vector.
+
+    >>> _vector_size([1., 2., 3.])
+    3
+    >>> _vector_size((1., 2., 3.))
+    3
+    >>> _vector_size(array.array('d', [1., 2., 3.]))
+    3
+    >>> _vector_size(np.zeros(3))
+    3
+    >>> _vector_size(np.zeros((3, 1)))
+    3
+    >>> _vector_size(np.zeros((1, 3)))
+    Traceback (most recent call last):
+        ...
+    ValueError: Cannot treat an ndarray of shape (1, 3) as a vector
+    """
+    if isinstance(v, Vector):
+        return len(v)
+    elif type(v) in (array.array, list, tuple):
+        return len(v)
+    elif type(v) == np.ndarray:
+        if v.ndim == 1 or (v.ndim == 2 and v.shape[1] == 1):
+            return len(v)
+        else:
+            raise ValueError("Cannot treat an ndarray of shape %s as a vector" % str(v.shape))
+    elif _have_scipy and scipy.sparse.issparse(v):
+        assert v.shape[1] == 1, "Expected column vector"
+        return v.shape[0]
+    else:
+        raise TypeError("Cannot treat type %s as a vector" % type(v))
+
+
+def _format_float(f, digits=4):
+    s = str(round(f, digits))
+    if '.' in s:
+        s = s[:s.index('.') + 1 + digits]
+    return s
+
+
+class VectorUDT(UserDefinedType):
+    """
+    SQL user-defined type (UDT) for Vector.
+    """
+
+    @classmethod
+    def sqlType(cls):
+        return StructType([
+            StructField("type", ByteType(), False),
+            StructField("size", IntegerType(), True),
+            StructField("indices", ArrayType(IntegerType(), False), True),
+            StructField("values", ArrayType(DoubleType(), False), True)])
+
+    @classmethod
+    def module(cls):
+        return "pyspark.mllib.linalg"
+
+    @classmethod
+    def scalaUDT(cls):
+        return "org.apache.spark.mllib.linalg.VectorUDT"
+
+    def serialize(self, obj):
+        if isinstance(obj, SparseVector):
+            indices = [int(i) for i in obj.indices]
+            values = [float(v) for v in obj.values]
+            return (0, obj.size, indices, values)
+        elif isinstance(obj, DenseVector):
+            values = [float(v) for v in obj]
+            return (1, None, None, values)
+        else:
+            raise ValueError("cannot serialize %r of type %r" % (obj, type(obj)))
+
+    def deserialize(self, datum):
+        assert len(datum) == 4, \
+            "VectorUDT.deserialize given row with length %d but requires 4" % len(datum)
+        tpe = datum[0]
+        if tpe == 0:
+            return SparseVector(datum[1], datum[2], datum[3])
+        elif tpe == 1:
+            return DenseVector(datum[3])
+        else:
+            raise ValueError("do not recognize type %r" % tpe)
+
+
+class Vector(object):
+
+    __UDT__ = VectorUDT()
+
+    """
+    Abstract class for DenseVector and SparseVector
+    """
+    def toArray(self):
+        """
+        Convert the vector into an numpy.ndarray
+        :return: numpy.ndarray
+        """
+        raise NotImplementedError
+
+
+class DenseVector(Vector):
+    """
+    A dense vector represented by a value array.
+    """
+    def __init__(self, ar):
+        if not isinstance(ar, array.array):
+            ar = array.array('d', ar)
+        self.array = ar
+
+    def __reduce__(self):
+        return DenseVector, (self.array,)
+
+    def dot(self, other):
+        """
+        Compute the dot product of two Vectors. We support
+        (Numpy array, list, SparseVector, or SciPy sparse)
+        and a target NumPy array that is either 1- or 2-dimensional.
+        Equivalent to calling numpy.dot of the two vectors.
+
+        >>> dense = DenseVector(array.array('d', [1., 2.]))
+        >>> dense.dot(dense)
+        5.0
+        >>> dense.dot(SparseVector(2, [0, 1], [2., 1.]))
+        4.0
+        >>> dense.dot(range(1, 3))
+        5.0
+        >>> dense.dot(np.array(range(1, 3)))
+        5.0
+        >>> dense.dot([1.,])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> dense.dot(np.reshape([1., 2., 3., 4.], (2, 2), order='F'))
+        array([  5.,  11.])
+        >>> dense.dot(np.reshape([1., 2., 3.], (3, 1), order='F'))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+        if type(other) == np.ndarray and other.ndim > 1:
+            assert len(self) == other.shape[0], "dimension mismatch"
+            return np.dot(self.toArray(), other)
+        elif _have_scipy and scipy.sparse.issparse(other):
+            assert len(self) == other.shape[0], "dimension mismatch"
+            return other.transpose().dot(self.toArray())
+        else:
+            assert len(self) == _vector_size(other), "dimension mismatch"
+            if isinstance(other, SparseVector):
+                return other.dot(self)
+            elif isinstance(other, Vector):
+                return np.dot(self.toArray(), other.toArray())
+            else:
+                return np.dot(self.toArray(), other)
+
+    def squared_distance(self, other):
+        """
+        Squared distance of two Vectors.
+
+        >>> dense1 = DenseVector(array.array('d', [1., 2.]))
+        >>> dense1.squared_distance(dense1)
+        0.0
+        >>> dense2 = np.array([2., 1.])
+        >>> dense1.squared_distance(dense2)
+        2.0
+        >>> dense3 = [2., 1.]
+        >>> dense1.squared_distance(dense3)
+        2.0
+        >>> sparse1 = SparseVector(2, [0, 1], [2., 1.])
+        >>> dense1.squared_distance(sparse1)
+        2.0
+        >>> dense1.squared_distance([1.,])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> dense1.squared_distance(SparseVector(1, [0,], [1.,]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+        assert len(self) == _vector_size(other), "dimension mismatch"
+        if isinstance(other, SparseVector):
+            return other.squared_distance(self)
+        elif _have_scipy and scipy.sparse.issparse(other):
+            return _convert_to_vector(other).squared_distance(self)
+
+        if isinstance(other, Vector):
+            other = other.toArray()
+        elif not isinstance(other, np.ndarray):
+            other = np.array(other)
+        diff = self.toArray() - other
+        return np.dot(diff, diff)
+
+    def toArray(self):
+        return np.array(self.array)
+
+    def __getitem__(self, item):
+        return self.array[item]
+
+    def __len__(self):
+        return len(self.array)
+
+    def __str__(self):
+        return "[" + ",".join([str(v) for v in self.array]) + "]"
+
+    def __repr__(self):
+        return "DenseVector([%s])" % (', '.join(_format_float(i) for i in self.array))
+
+    def __eq__(self, other):
+        return isinstance(other, DenseVector) and self.array == other.array
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __getattr__(self, item):
+        return getattr(self.array, item)
+
+
+class SparseVector(Vector):
     """
     A simple sparse vector class for passing data to MLlib. Users may
     alternatively pass SciPy's {scipy.sparse} data types.
     """
-
     def __init__(self, size, *args):
         """
         Create a sparse vector, using either a dictionary, a list of
         (index, value) pairs, or two separate arrays of indices and
         values (sorted by index).
 
-        @param size: Size of the vector.
-        @param args: Non-zero entries, as a dictionary, list of tupes,
+        :param size: Size of the vector.
+        :param args: Non-zero entries, as a dictionary, list of tupes,
                or two sorted lists containing indices and values.
 
         >>> print SparseVector(4, {1: 1.0, 3: 5.5})
@@ -56,16 +314,19 @@ def __init__(self, size, *args):
             if type(pairs) == dict:
                 pairs = pairs.items()
             pairs = sorted(pairs)
-            self.indices = array([p[0] for p in pairs], dtype=int32)
-            self.values = array([p[1] for p in pairs], dtype=float64)
+            self.indices = array.array('i', [p[0] for p in pairs])
+            self.values = array.array('d', [p[1] for p in pairs])
         else:
             assert len(args[0]) == len(args[1]), "index and value arrays not same length"
-            self.indices = array(args[0], dtype=int32)
-            self.values = array(args[1], dtype=float64)
+            self.indices = array.array('i', args[0])
+            self.values = array.array('d', args[1])
             for i in xrange(len(self.indices) - 1):
                 if self.indices[i] >= self.indices[i + 1]:
                     raise TypeError("indices array must be sorted")
 
+    def __reduce__(self):
+        return (SparseVector, (self.size, self.indices, self.values))
+
     def dot(self, other):
         """
         Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
@@ -73,26 +334,46 @@ def dot(self, other):
         >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
         >>> a.dot(a)
         25.0
-        >>> a.dot(array([1., 2., 3., 4.]))
+        >>> a.dot(array.array('d', [1., 2., 3., 4.]))
         22.0
         >>> b = SparseVector(4, [2, 4], [1.0, 2.0])
         >>> a.dot(b)
         0.0
-        >>> a.dot(array([[1, 1], [2, 2], [3, 3], [4, 4]]))
+        >>> a.dot(np.array([[1, 1], [2, 2], [3, 3], [4, 4]]))
         array([ 22.,  22.])
+        >>> a.dot([1., 2., 3.])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(np.array([1., 2.]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(DenseVector([1., 2.]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(np.zeros((3, 2)))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
         """
-        if type(other) == ndarray:
-            if other.ndim == 1:
-                result = 0.0
-                for i in xrange(len(self.indices)):
-                    result += self.values[i] * other[self.indices[i]]
-                return result
-            elif other.ndim == 2:
+        if type(other) == np.ndarray:
+            if other.ndim == 2:
                 results = [self.dot(other[:, i]) for i in xrange(other.shape[1])]
-                return array(results)
-            else:
-                raise Exception("Cannot call dot with %d-dimensional array" % other.ndim)
-        else:
+                return np.array(results)
+            elif other.ndim > 2:
+                raise ValueError("Cannot call dot with %d-dimensional array" % other.ndim)
+
+        assert len(self) == _vector_size(other), "dimension mismatch"
+
+        if type(other) in (np.ndarray, array.array, DenseVector):
+            result = 0.0
+            for i in xrange(len(self.indices)):
+                result += self.values[i] * other[self.indices[i]]
+            return result
+
+        elif type(other) is SparseVector:
             result = 0.0
             i, j = 0, 0
             while i < len(self.indices) and j < len(other.indices):
@@ -106,6 +387,9 @@ def dot(self, other):
                     j += 1
             return result
 
+        else:
+            return self.dot(_convert_to_vector(other))
+
     def squared_distance(self, other):
         """
         Squared distance from a SparseVector or 1-dimensional NumPy array.
@@ -113,30 +397,41 @@ def squared_distance(self, other):
         >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
         >>> a.squared_distance(a)
         0.0
-        >>> a.squared_distance(array([1., 2., 3., 4.]))
+        >>> a.squared_distance(array.array('d', [1., 2., 3., 4.]))
+        11.0
+        >>> a.squared_distance(np.array([1., 2., 3., 4.]))
         11.0
         >>> b = SparseVector(4, [2, 4], [1.0, 2.0])
         >>> a.squared_distance(b)
         30.0
         >>> b.squared_distance(a)
         30.0
+        >>> b.squared_distance([1., 2.])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> b.squared_distance(SparseVector(3, [1,], [1.0,]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
         """
-        if type(other) == ndarray:
-            if other.ndim == 1:
-                result = 0.0
-                j = 0   # index into our own array
-                for i in xrange(other.shape[0]):
-                    if j < len(self.indices) and self.indices[j] == i:
-                        diff = self.values[j] - other[i]
-                        result += diff * diff
-                        j += 1
-                    else:
-                        result += other[i] * other[i]
-                return result
-            else:
+        assert len(self) == _vector_size(other), "dimension mismatch"
+        if type(other) in (list, array.array, DenseVector, np.array, np.ndarray):
+            if type(other) is np.array and other.ndim != 1:
                 raise Exception("Cannot call squared_distance with %d-dimensional array" %
                                 other.ndim)
-        else:
+            result = 0.0
+            j = 0   # index into our own array
+            for i in xrange(len(other)):
+                if j < len(self.indices) and self.indices[j] == i:
+                    diff = self.values[j] - other[i]
+                    result += diff * diff
+                    j += 1
+                else:
+                    result += other[i] * other[i]
+            return result
+
+        elif type(other) is SparseVector:
             result = 0.0
             i, j = 0, 0
             while i < len(self.indices) and j < len(other.indices):
@@ -158,6 +453,20 @@ def squared_distance(self, other):
                 result += other.values[j] * other.values[j]
                 j += 1
             return result
+        else:
+            return self.squared_distance(_convert_to_vector(other))
+
+    def toArray(self):
+        """
+        Returns a copy of this SparseVector as a 1-dimensional NumPy array.
+        """
+        arr = np.zeros((self.size,), dtype=np.float64)
+        for i in xrange(len(self.indices)):
+            arr[self.indices[i]] = self.values[i]
+        return arr
+
+    def __len__(self):
+        return self.size
 
     def __str__(self):
         inds = "[" + ",".join([str(i) for i in self.indices]) + "]"
@@ -167,7 +476,8 @@ def __str__(self):
     def __repr__(self):
         inds = self.indices
         vals = self.values
-        entries = ", ".join(["{0}: {1}".format(inds[i], vals[i]) for i in xrange(len(inds))])
+        entries = ", ".join(["{0}: {1}".format(inds[i], _format_float(vals[i]))
+                             for i in xrange(len(inds))])
         return "SparseVector({0}, {{{1}}})".format(self.size, entries)
 
     def __eq__(self, other):
@@ -181,17 +491,17 @@ def __eq__(self, other):
         >>> v1 != v2
         False
         """
-
         return (isinstance(other, self.__class__)
                 and other.size == self.size
-                and array_equal(other.indices, self.indices)
-                and array_equal(other.values, self.values))
+                and other.indices == self.indices
+                and other.values == self.values)
 
     def __ne__(self, other):
         return not self.__eq__(other)
 
 
 class Vectors(object):
+
     """
     Factory methods for working with vectors. Note that dense vectors
     are simply represented as NumPy array objects, so there is no need
@@ -207,8 +517,8 @@ def sparse(size, *args):
         (index, value) pairs, or two separate arrays of indices and
         values (sorted by index).
 
-        @param size: Size of the vector.
-        @param args: Non-zero entries, as a dictionary, list of tupes,
+        :param size: Size of the vector.
+        :param args: Non-zero entries, as a dictionary, list of tupes,
                      or two sorted lists containing indices and values.
 
         >>> print Vectors.sparse(4, {1: 1.0, 3: 5.5})
@@ -227,9 +537,9 @@ def dense(elements):
         returns a NumPy array.
 
         >>> Vectors.dense([1, 2, 3])
-        array([ 1.,  2.,  3.])
+        DenseVector([1.0, 2.0, 3.0])
         """
-        return array(elements, dtype=float64)
+        return DenseVector(elements)
 
     @staticmethod
     def stringify(vector):
@@ -242,10 +552,60 @@ def stringify(vector):
         >>> Vectors.stringify(Vectors.dense([0.0, 1.0]))
         '[0.0,1.0]'
         """
-        if type(vector) == SparseVector:
-            return str(vector)
-        else:
-            return "[" + ",".join([str(v) for v in vector]) + "]"
+        return str(vector)
+
+
+class Matrix(object):
+    """
+    Represents a local matrix.
+    """
+
+    def __init__(self, numRows, numCols):
+        self.numRows = numRows
+        self.numCols = numCols
+
+    def toArray(self):
+        """
+        Returns its elements in a NumPy ndarray.
+        """
+        raise NotImplementedError
+
+
+class DenseMatrix(Matrix):
+    """
+    Column-major dense matrix.
+    """
+    def __init__(self, numRows, numCols, values):
+        Matrix.__init__(self, numRows, numCols)
+        assert len(values) == numRows * numCols
+        if not isinstance(values, array.array):
+            values = array.array('d', values)
+        self.values = values
+
+    def __reduce__(self):
+        return DenseMatrix, (self.numRows, self.numCols, self.values)
+
+    def toArray(self):
+        """
+        Return an numpy.ndarray
+
+        >>> arr = array.array('d', [float(i) for i in range(4)])
+        >>> m = DenseMatrix(2, 2, arr)
+        >>> m.toArray()
+        array([[ 0.,  2.],
+               [ 1.,  3.]])
+        """
+        return np.reshape(self.values, (self.numRows, self.numCols), order='F')
+
+
+class Matrices(object):
+    @staticmethod
+    def dense(numRows, numCols, values):
+        """
+        Create a DenseMatrix
+        """
+        return DenseMatrix(numRows, numCols, values)
+
 
 def _test():
     import doctest
diff --git a/python/pyspark/mllib/rand.py b/python/pyspark/mllib/rand.py
new file mode 100644
index 0000000000000..cb4304f92152b
--- /dev/null
+++ b/python/pyspark/mllib/rand.py
@@ -0,0 +1,223 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Python package for random data generation.
+"""
+
+from functools import wraps
+
+from pyspark.mllib.common import callMLlibFunc
+
+
+__all__ = ['RandomRDDs', ]
+
+
+def toArray(f):
+    @wraps(f)
+    def func(sc, *a, **kw):
+        rdd = f(sc, *a, **kw)
+        return rdd.map(lambda vec: vec.toArray())
+    return func
+
+
+class RandomRDDs(object):
+    """
+    Generator methods for creating RDDs comprised of i.i.d samples from
+    some distribution.
+    """
+
+    @staticmethod
+    def uniformRDD(sc, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the
+        uniform distribution U(0.0, 1.0).
+
+        To transform the distribution in the generated RDD from U(0.0, 1.0)
+        to U(a, b), use
+        C{RandomRDDs.uniformRDD(sc, n, p, seed)\
+          .map(lambda v: a + (b - a) * v)}
+
+        :param sc: SparkContext used to create the RDD.
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ `U(0.0, 1.0)`.
+
+        >>> x = RandomRDDs.uniformRDD(sc, 100).collect()
+        >>> len(x)
+        100
+        >>> max(x) <= 1.0 and min(x) >= 0.0
+        True
+        >>> RandomRDDs.uniformRDD(sc, 100, 4).getNumPartitions()
+        4
+        >>> parts = RandomRDDs.uniformRDD(sc, 100, seed=4).getNumPartitions()
+        >>> parts == sc.defaultParallelism
+        True
+        """
+        return callMLlibFunc("uniformRDD", sc._jsc, size, numPartitions, seed)
+
+    @staticmethod
+    def normalRDD(sc, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the standard normal
+        distribution.
+
+        To transform the distribution in the generated RDD from standard normal
+        to some other normal N(mean, sigma^2), use
+        C{RandomRDDs.normal(sc, n, p, seed)\
+          .map(lambda v: mean + sigma * v)}
+
+        :param sc: SparkContext used to create the RDD.
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ N(0.0, 1.0).
+
+        >>> x = RandomRDDs.normalRDD(sc, 1000, seed=1L)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000L
+        >>> abs(stats.mean() - 0.0) < 0.1
+        True
+        >>> abs(stats.stdev() - 1.0) < 0.1
+        True
+        """
+        return callMLlibFunc("normalRDD", sc._jsc, size, numPartitions, seed)
+
+    @staticmethod
+    def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the Poisson
+        distribution with the input mean.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean, or lambda, for the Poisson distribution.
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ Pois(mean).
+
+        >>> mean = 100.0
+        >>> x = RandomRDDs.poissonRDD(sc, mean, 1000, seed=2L)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000L
+        >>> abs(stats.mean() - mean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(stats.stdev() - sqrt(mean)) < 0.5
+        True
+        """
+        return callMLlibFunc("poissonRDD", sc._jsc, float(mean), size, numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the uniform distribution U(0.0, 1.0).
+
+        :param sc: SparkContext used to create the RDD.
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD.
+        :param seed: Seed for the RNG that generates the seed for the generator in each partition.
+        :return: RDD of Vector with vectors containing i.i.d samples ~ `U(0.0, 1.0)`.
+
+        >>> import numpy as np
+        >>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect())
+        >>> mat.shape
+        (10, 10)
+        >>> mat.max() <= 1.0 and mat.min() >= 0.0
+        True
+        >>> RandomRDDs.uniformVectorRDD(sc, 10, 10, 4).getNumPartitions()
+        4
+        """
+        return callMLlibFunc("uniformVectorRDD", sc._jsc, numRows, numCols, numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the standard normal distribution.
+
+        :param sc: SparkContext used to create the RDD.
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ `N(0.0, 1.0)`.
+
+        >>> import numpy as np
+        >>> mat = np.matrix(RandomRDDs.normalVectorRDD(sc, 100, 100, seed=1L).collect())
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - 0.0) < 0.1
+        True
+        >>> abs(mat.std() - 1.0) < 0.1
+        True
+        """
+        return callMLlibFunc("normalVectorRDD", sc._jsc, numRows, numCols, numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the Poisson distribution with the input mean.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean, or lambda, for the Poisson distribution.
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`)
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ Pois(mean).
+
+        >>> import numpy as np
+        >>> mean = 100.0
+        >>> rdd = RandomRDDs.poissonVectorRDD(sc, mean, 100, 100, seed=1L)
+        >>> mat = np.mat(rdd.collect())
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - mean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(mat.std() - sqrt(mean)) < 0.5
+        True
+        """
+        return callMLlibFunc("poissonVectorRDD", sc._jsc, float(mean), numRows, numCols,
+                             numPartitions, seed)
+
+
+def _test():
+    import doctest
+    from pyspark.context import SparkContext
+    globs = globals().copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index 6c385042ffa5f..41bbd9a779c70 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -16,16 +16,27 @@
 #
 
 from pyspark import SparkContext
-from pyspark.mllib._common import \
-    _get_unmangled_rdd, _get_unmangled_double_vector_rdd, \
-    _serialize_double_matrix, _deserialize_double_matrix, \
-    _serialize_double_vector, _deserialize_double_vector, \
-    _get_initial_weights, _serialize_rating, _regression_train_wrapper, \
-    _serialize_tuple, RatingDeserializer
 from pyspark.rdd import RDD
+from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc, _to_java_object_rdd
 
+__all__ = ['MatrixFactorizationModel', 'ALS']
+
+
+class Rating(object):
+    def __init__(self, user, product, rating):
+        self.user = int(user)
+        self.product = int(product)
+        self.rating = float(rating)
+
+    def __reduce__(self):
+        return Rating, (self.user, self.product, self.rating)
+
+    def __repr__(self):
+        return "Rating(%d, %d, %s)" % (self.user, self.product, self.rating)
+
+
+class MatrixFactorizationModel(JavaModelWrapper):
 
-class MatrixFactorizationModel(object):
     """A matrix factorisation model trained by regularized alternating
     least-squares.
 
@@ -33,52 +44,90 @@ class MatrixFactorizationModel(object):
     >>> r2 = (1, 2, 2.0)
     >>> r3 = (2, 1, 2.0)
     >>> ratings = sc.parallelize([r1, r2, r3])
-    >>> model = ALS.trainImplicit(ratings, 1)
-    >>> model.predict(2,2) is not None
-    True
+    >>> model = ALS.trainImplicit(ratings, 1, seed=10)
+    >>> model.predict(2,2)
+    0.4473...
+
     >>> testset = sc.parallelize([(1, 2), (1, 1)])
-    >>> model.predictAll(testset).count() == 2
+    >>> model = ALS.train(ratings, 1, seed=10)
+    >>> model.predictAll(testset).collect()
+    [Rating(1, 1, 1.0471...), Rating(1, 2, 1.9679...)]
+
+    >>> model = ALS.train(ratings, 4, seed=10)
+    >>> model.userFeatures().collect()
+    [(2, array('d', [...])), (1, array('d', [...]))]
+
+    >>> first_user = model.userFeatures().take(1)[0]
+    >>> latents = first_user[1]
+    >>> len(latents) == 4
     True
-    """
 
-    def __init__(self, sc, java_model):
-        self._context = sc
-        self._java_model = java_model
+    >>> model.productFeatures().collect()
+    [(2, array('d', [...])), (1, array('d', [...]))]
+
+    >>> first_product = model.productFeatures().take(1)[0]
+    >>> latents = first_product[1]
+    >>> len(latents) == 4
+    True
 
-    def __del__(self):
-        self._context._gateway.detach(self._java_model)
+    >>> model = ALS.train(ratings, 1, nonnegative=True, seed=10)
+    >>> model.predict(2,2)
+    3.735...
 
+    >>> model = ALS.trainImplicit(ratings, 1, nonnegative=True, seed=10)
+    >>> model.predict(2,2)
+    0.4473...
+    """
     def predict(self, user, product):
-        return self._java_model.predict(user, product)
+        return self._java_model.predict(int(user), int(product))
+
+    def predictAll(self, user_product):
+        assert isinstance(user_product, RDD), "user_product should be RDD of (user, product)"
+        first = user_product.first()
+        assert len(first) == 2, "user_product should be RDD of (user, product)"
+        user_product = user_product.map(lambda (u, p): (int(u), int(p)))
+        return self.call("predict", user_product)
 
-    def predictAll(self, usersProducts):
-        usersProductsJRDD = _get_unmangled_rdd(usersProducts, _serialize_tuple)
-        return RDD(self._java_model.predict(usersProductsJRDD._jrdd),
-                   self._context, RatingDeserializer())
+    def userFeatures(self):
+        return self.call("getUserFeatures")
+
+    def productFeatures(self):
+        return self.call("getProductFeatures")
 
 
 class ALS(object):
+
+    @classmethod
+    def _prepare(cls, ratings):
+        assert isinstance(ratings, RDD), "ratings should be RDD"
+        first = ratings.first()
+        if not isinstance(first, Rating):
+            if isinstance(first, (tuple, list)):
+                ratings = ratings.map(lambda x: Rating(*x))
+            else:
+                raise ValueError("rating should be RDD of Rating or tuple/list")
+        return _to_java_object_rdd(ratings, True)
+
     @classmethod
-    def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1):
-        sc = ratings.context
-        ratingBytes = _get_unmangled_rdd(ratings, _serialize_rating)
-        mod = sc._jvm.PythonMLLibAPI().trainALSModel(
-            ratingBytes._jrdd, rank, iterations, lambda_, blocks)
-        return MatrixFactorizationModel(sc, mod)
+    def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative=False,
+              seed=None):
+        model = callMLlibFunc("trainALSModel", cls._prepare(ratings), rank, iterations,
+                              lambda_, blocks, nonnegative, seed)
+        return MatrixFactorizationModel(model)
 
     @classmethod
-    def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01):
-        sc = ratings.context
-        ratingBytes = _get_unmangled_rdd(ratings, _serialize_rating)
-        mod = sc._jvm.PythonMLLibAPI().trainImplicitALSModel(
-            ratingBytes._jrdd, rank, iterations, lambda_, blocks, alpha)
-        return MatrixFactorizationModel(sc, mod)
+    def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01,
+                      nonnegative=False, seed=None):
+        model = callMLlibFunc("trainImplicitALSModel", cls._prepare(ratings), rank,
+                              iterations, lambda_, blocks, alpha, nonnegative, seed)
+        return MatrixFactorizationModel(model)
 
 
 def _test():
     import doctest
-    globs = globals().copy()
-    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    import pyspark.mllib.recommendation
+    globs = pyspark.mllib.recommendation.__dict__.copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest')
     (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     globs['sc'].stop()
     if failure_count:
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index b84bc531dec8c..f4f5e615fadc3 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -15,44 +15,47 @@
 # limitations under the License.
 #
 
-from numpy import array, ndarray
-from pyspark import SparkContext
-from pyspark.mllib._common import \
-    _dot, _get_unmangled_rdd, _get_unmangled_double_vector_rdd, \
-    _serialize_double_matrix, _deserialize_double_matrix, \
-    _serialize_double_vector, _deserialize_double_vector, \
-    _get_initial_weights, _serialize_rating, _regression_train_wrapper, \
-    _linear_predictor_typecheck, _have_scipy, _scipy_issparse
-from pyspark.mllib.linalg import SparseVector, Vectors
+import numpy as np
+from numpy import array
+
+from pyspark.mllib.common import callMLlibFunc, _to_java_object_rdd
+from pyspark.mllib.linalg import SparseVector, _convert_to_vector
+
+__all__ = ['LabeledPoint', 'LinearModel', 'LinearRegressionModel', 'RidgeRegressionModel',
+           'LinearRegressionWithSGD', 'LassoWithSGD', 'RidgeRegressionWithSGD']
 
 
 class LabeledPoint(object):
+
     """
     The features and labels of a data point.
 
-    @param label: Label for this data point.
-    @param features: Vector of features for this point (NumPy array, list,
+    :param label: Label for this data point.
+    :param features: Vector of features for this point (NumPy array, list,
         pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix)
     """
+
     def __init__(self, label, features):
-        self.label = label
-        if (type(features) == ndarray or type(features) == SparseVector
-                or (_have_scipy and _scipy_issparse(features))):
-            self.features = features
-        elif type(features) == list:
-            self.features = array(features)
-        else:
-            raise TypeError("Expected NumPy array, list, SparseVector, or scipy.sparse matrix")
+        self.label = float(label)
+        self.features = _convert_to_vector(features)
+
+    def __reduce__(self):
+        return (LabeledPoint, (self.label, self.features))
 
     def __str__(self):
-        return "(" + ",".join((str(self.label), Vectors.stringify(self.features))) + ")"
+        return "(" + ",".join((str(self.label), str(self.features))) + ")"
+
+    def __repr__(self):
+        return "LabeledPoint(%s, %s)" % (self.label, self.features)
 
 
 class LinearModel(object):
+
     """A linear model that has a vector of coefficients and an intercept."""
+
     def __init__(self, weights, intercept):
-        self._coeff = weights
-        self._intercept = intercept
+        self._coeff = _convert_to_vector(weights)
+        self._intercept = float(intercept)
 
     @property
     def weights(self):
@@ -62,24 +65,32 @@ def weights(self):
     def intercept(self):
         return self._intercept
 
+    def __repr__(self):
+        return "(weights=%s, intercept=%r)" % (self._coeff, self._intercept)
+
 
 class LinearRegressionModelBase(LinearModel):
+
     """A linear regression model.
 
-    >>> lrmb = LinearRegressionModelBase(array([1.0, 2.0]), 0.1)
-    >>> abs(lrmb.predict(array([-1.03, 7.777])) - 14.624) < 1e-6
+    >>> lrmb = LinearRegressionModelBase(np.array([1.0, 2.0]), 0.1)
+    >>> abs(lrmb.predict(np.array([-1.03, 7.777])) - 14.624) < 1e-6
     True
     >>> abs(lrmb.predict(SparseVector(2, {0: -1.03, 1: 7.777})) - 14.624) < 1e-6
     True
     """
+
     def predict(self, x):
-        """Predict the value of the dependent variable given a vector x"""
-        """containing values for the independent variables."""
-        _linear_predictor_typecheck(x, self._coeff)
-        return _dot(x, self._coeff) + self._intercept
+        """
+        Predict the value of the dependent variable given a vector x
+        containing values for the independent variables.
+        """
+        x = _convert_to_vector(x)
+        return self.weights.dot(x) + self.intercept
 
 
 class LinearRegressionModel(LinearRegressionModelBase):
+
     """A linear regression model derived from a least-squares fit.
 
     >>> from pyspark.mllib.regression import LabeledPoint
@@ -89,10 +100,10 @@ class LinearRegressionModel(LinearRegressionModelBase):
     ...     LabeledPoint(3.0, [2.0]),
     ...     LabeledPoint(2.0, [3.0])
     ... ]
-    >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), initialWeights=array([1.0]))
-    >>> abs(lrm.predict(array([0.0])) - 0) < 0.5
+    >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), initialWeights=np.array([1.0]))
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
-    >>> abs(lrm.predict(array([1.0])) - 1) < 0.5
+    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
@@ -110,18 +121,60 @@ class LinearRegressionModel(LinearRegressionModelBase):
     """
 
 
+# train_func should take two parameters, namely data and initial_weights, and
+# return the result of a call to the appropriate JVM stub.
+# _regression_train_wrapper is responsible for setup and error checking.
+def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
+    first = data.first()
+    if not isinstance(first, LabeledPoint):
+        raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first)
+    initial_weights = initial_weights or [0.0] * len(data.first().features)
+    weights, intercept = train_func(_to_java_object_rdd(data, cache=True),
+                                    _convert_to_vector(initial_weights))
+    return modelClass(weights, intercept)
+
+
 class LinearRegressionWithSGD(object):
+
     @classmethod
-    def train(cls, data, iterations=100, step=1.0,
-              miniBatchFraction=1.0, initialWeights=None):
-        """Train a linear regression model on the given data."""
-        sc = data.context
-        train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-            d._jrdd, iterations, step, miniBatchFraction, i)
-        return _regression_train_wrapper(sc, train_f, LinearRegressionModel, data, initialWeights)
+    def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
+              initialWeights=None, regParam=0.0, regType=None, intercept=False):
+        """
+        Train a linear regression model on the given data.
+
+        :param data:              The training data.
+        :param iterations:        The number of iterations (default: 100).
+        :param step:              The step parameter used in SGD
+                                  (default: 1.0).
+        :param miniBatchFraction: Fraction of data to be used for each SGD
+                                  iteration.
+        :param initialWeights:    The initial weights (default: None).
+        :param regParam:          The regularizer parameter (default: 0.0).
+        :param regType:           The type of regularizer used for training
+                                  our model.
+
+                                  :Allowed values:
+                                     - "l1" for using L1 regularization (lasso),
+                                     - "l2" for using L2 regularization (ridge),
+                                     - None for no regularization
+
+                                     (default: None)
+
+        @param intercept:         Boolean parameter which indicates the use
+                                  or not of the augmented representation for
+                                  training data (i.e. whether bias features
+                                  are activated or not).
+        """
+        def train(rdd, i):
+            return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
+                                 float(step), float(miniBatchFraction), i, float(regParam),
+                                 regType, bool(intercept))
+
+        return _regression_train_wrapper(train, LinearRegressionModel, data, initialWeights)
 
 
 class LassoModel(LinearRegressionModelBase):
+
     """A linear regression model derived from a least-squares fit with an
     l_1 penalty term.
 
@@ -133,9 +186,9 @@ class LassoModel(LinearRegressionModelBase):
     ...     LabeledPoint(2.0, [3.0])
     ... ]
     >>> lrm = LassoWithSGD.train(sc.parallelize(data), initialWeights=array([1.0]))
-    >>> abs(lrm.predict(array([0.0])) - 0) < 0.5
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
-    >>> abs(lrm.predict(array([1.0])) - 1) < 0.5
+    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
@@ -146,7 +199,7 @@ class LassoModel(LinearRegressionModelBase):
     ...     LabeledPoint(2.0, SparseVector(1, {0: 3.0}))
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), initialWeights=array([1.0]))
-    >>> abs(lrm.predict(array([0.0])) - 0) < 0.5
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
@@ -154,17 +207,20 @@ class LassoModel(LinearRegressionModelBase):
 
 
 class LassoWithSGD(object):
+
     @classmethod
-    def train(cls, data, iterations=100, step=1.0, regParam=1.0,
+    def train(cls, data, iterations=100, step=1.0, regParam=0.01,
               miniBatchFraction=1.0, initialWeights=None):
         """Train a Lasso regression model on the given data."""
-        sc = data.context
-        train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLassoModelWithSGD(
-            d._jrdd, iterations, step, regParam, miniBatchFraction, i)
-        return _regression_train_wrapper(sc, train_f, LassoModel, data, initialWeights)
+        def train(rdd, i):
+            return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
+                                 float(regParam), float(miniBatchFraction), i)
+
+        return _regression_train_wrapper(train, LassoModel, data, initialWeights)
 
 
 class RidgeRegressionModel(LinearRegressionModelBase):
+
     """A linear regression model derived from a least-squares fit with an
     l_2 penalty term.
 
@@ -176,9 +232,9 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     ...     LabeledPoint(2.0, [3.0])
     ... ]
     >>> lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), initialWeights=array([1.0]))
-    >>> abs(lrm.predict(array([0.0])) - 0) < 0.5
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
-    >>> abs(lrm.predict(array([1.0])) - 1) < 0.5
+    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
@@ -189,7 +245,7 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     ...     LabeledPoint(2.0, SparseVector(1, {0: 3.0}))
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), initialWeights=array([1.0]))
-    >>> abs(lrm.predict(array([0.0])) - 0) < 0.5
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
@@ -197,19 +253,23 @@ class RidgeRegressionModel(LinearRegressionModelBase):
 
 
 class RidgeRegressionWithSGD(object):
+
     @classmethod
-    def train(cls, data, iterations=100, step=1.0, regParam=1.0,
+    def train(cls, data, iterations=100, step=1.0, regParam=0.01,
               miniBatchFraction=1.0, initialWeights=None):
         """Train a ridge regression model on the given data."""
-        sc = data.context
-        train_func = lambda d, i: sc._jvm.PythonMLLibAPI().trainRidgeModelWithSGD(
-            d._jrdd, iterations, step, regParam, miniBatchFraction, i)
-        return _regression_train_wrapper(sc, train_func, RidgeRegressionModel, data, initialWeights)
+        def train(rdd, i):
+            return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),
+                                 float(regParam), float(miniBatchFraction), i)
+
+        return _regression_train_wrapper(train, RidgeRegressionModel, data, initialWeights)
 
 
 def _test():
     import doctest
-    globs = globals().copy()
+    from pyspark import SparkContext
+    import pyspark.mllib.regression
+    globs = pyspark.mllib.regression.__dict__.copy()
     globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
     (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     globs['sc'].stop()
diff --git a/python/pyspark/mllib/stat.py b/python/pyspark/mllib/stat.py
new file mode 100644
index 0000000000000..1980f5b03f430
--- /dev/null
+++ b/python/pyspark/mllib/stat.py
@@ -0,0 +1,298 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Python package for statistical functions in MLlib.
+"""
+
+from pyspark import RDD
+from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
+from pyspark.mllib.linalg import Matrix, _convert_to_vector
+from pyspark.mllib.regression import LabeledPoint
+
+
+__all__ = ['MultivariateStatisticalSummary', 'ChiSqTestResult', 'Statistics']
+
+
+class MultivariateStatisticalSummary(JavaModelWrapper):
+
+    """
+    Trait for multivariate statistical summary of a data matrix.
+    """
+
+    def mean(self):
+        return self.call("mean").toArray()
+
+    def variance(self):
+        return self.call("variance").toArray()
+
+    def count(self):
+        return self.call("count")
+
+    def numNonzeros(self):
+        return self.call("numNonzeros").toArray()
+
+    def max(self):
+        return self.call("max").toArray()
+
+    def min(self):
+        return self.call("min").toArray()
+
+
+class ChiSqTestResult(JavaModelWrapper):
+    """
+    :: Experimental ::
+
+    Object containing the test results for the chi-squared hypothesis test.
+    """
+    @property
+    def method(self):
+        """
+        Name of the test method
+        """
+        return self._java_model.method()
+
+    @property
+    def pValue(self):
+        """
+        The probability of obtaining a test statistic result at least as
+        extreme as the one that was actually observed, assuming that the
+        null hypothesis is true.
+        """
+        return self._java_model.pValue()
+
+    @property
+    def degreesOfFreedom(self):
+        """
+        Returns the degree(s) of freedom of the hypothesis test.
+        Return type should be Number(e.g. Int, Double) or tuples of Numbers.
+        """
+        return self._java_model.degreesOfFreedom()
+
+    @property
+    def statistic(self):
+        """
+        Test statistic.
+        """
+        return self._java_model.statistic()
+
+    @property
+    def nullHypothesis(self):
+        """
+        Null hypothesis of the test.
+        """
+        return self._java_model.nullHypothesis()
+
+    def __str__(self):
+        return self._java_model.toString()
+
+
+class Statistics(object):
+
+    @staticmethod
+    def colStats(rdd):
+        """
+        Computes column-wise summary statistics for the input RDD[Vector].
+
+        :param rdd: an RDD[Vector] for which column-wise summary statistics
+                    are to be computed.
+        :return: :class:`MultivariateStatisticalSummary` object containing
+                 column-wise summary statistics.
+
+        >>> from pyspark.mllib.linalg import Vectors
+        >>> rdd = sc.parallelize([Vectors.dense([2, 0, 0, -2]),
+        ...                       Vectors.dense([4, 5, 0,  3]),
+        ...                       Vectors.dense([6, 7, 0,  8])])
+        >>> cStats = Statistics.colStats(rdd)
+        >>> cStats.mean()
+        array([ 4.,  4.,  0.,  3.])
+        >>> cStats.variance()
+        array([  4.,  13.,   0.,  25.])
+        >>> cStats.count()
+        3L
+        >>> cStats.numNonzeros()
+        array([ 3.,  2.,  0.,  3.])
+        >>> cStats.max()
+        array([ 6.,  7.,  0.,  8.])
+        >>> cStats.min()
+        array([ 2.,  0.,  0., -2.])
+        """
+        cStats = callMLlibFunc("colStats", rdd.map(_convert_to_vector))
+        return MultivariateStatisticalSummary(cStats)
+
+    @staticmethod
+    def corr(x, y=None, method=None):
+        """
+        Compute the correlation (matrix) for the input RDD(s) using the
+        specified method.
+        Methods currently supported: I{pearson (default), spearman}.
+
+        If a single RDD of Vectors is passed in, a correlation matrix
+        comparing the columns in the input RDD is returned. Use C{method=}
+        to specify the method to be used for single RDD inout.
+        If two RDDs of floats are passed in, a single float is returned.
+
+        :param x: an RDD of vector for which the correlation matrix is to be computed,
+                  or an RDD of float of the same cardinality as y when y is specified.
+        :param y: an RDD of float of the same cardinality as x.
+        :param method: String specifying the method to use for computing correlation.
+                       Supported: `pearson` (default), `spearman`
+        :return: Correlation matrix comparing columns in x.
+
+        >>> x = sc.parallelize([1.0, 0.0, -2.0], 2)
+        >>> y = sc.parallelize([4.0, 5.0, 3.0], 2)
+        >>> zeros = sc.parallelize([0.0, 0.0, 0.0], 2)
+        >>> abs(Statistics.corr(x, y) - 0.6546537) < 1e-7
+        True
+        >>> Statistics.corr(x, y) == Statistics.corr(x, y, "pearson")
+        True
+        >>> Statistics.corr(x, y, "spearman")
+        0.5
+        >>> from math import isnan
+        >>> isnan(Statistics.corr(x, zeros))
+        True
+        >>> from pyspark.mllib.linalg import Vectors
+        >>> rdd = sc.parallelize([Vectors.dense([1, 0, 0, -2]), Vectors.dense([4, 5, 0, 3]),
+        ...                       Vectors.dense([6, 7, 0,  8]), Vectors.dense([9, 0, 0, 1])])
+        >>> pearsonCorr = Statistics.corr(rdd)
+        >>> print str(pearsonCorr).replace('nan', 'NaN')
+        [[ 1.          0.05564149         NaN  0.40047142]
+         [ 0.05564149  1.                 NaN  0.91359586]
+         [        NaN         NaN  1.                 NaN]
+         [ 0.40047142  0.91359586         NaN  1.        ]]
+        >>> spearmanCorr = Statistics.corr(rdd, method="spearman")
+        >>> print str(spearmanCorr).replace('nan', 'NaN')
+        [[ 1.          0.10540926         NaN  0.4       ]
+         [ 0.10540926  1.                 NaN  0.9486833 ]
+         [        NaN         NaN  1.                 NaN]
+         [ 0.4         0.9486833          NaN  1.        ]]
+        >>> try:
+        ...     Statistics.corr(rdd, "spearman")
+        ...     print "Method name as second argument without 'method=' shouldn't be allowed."
+        ... except TypeError:
+        ...     pass
+        """
+        # Check inputs to determine whether a single value or a matrix is needed for output.
+        # Since it's legal for users to use the method name as the second argument, we need to
+        # check if y is used to specify the method name instead.
+        if type(y) == str:
+            raise TypeError("Use 'method=' to specify method name.")
+
+        if not y:
+            return callMLlibFunc("corr", x.map(_convert_to_vector), method).toArray()
+        else:
+            return callMLlibFunc("corr", x.map(float), y.map(float), method)
+
+    @staticmethod
+    def chiSqTest(observed, expected=None):
+        """
+        :: Experimental ::
+
+        If `observed` is Vector, conduct Pearson's chi-squared goodness
+        of fit test of the observed data against the expected distribution,
+        or againt the uniform distribution (by default), with each category
+        having an expected frequency of `1 / len(observed)`.
+        (Note: `observed` cannot contain negative values)
+
+        If `observed` is matrix, conduct Pearson's independence test on the
+        input contingency matrix, which cannot contain negative entries or
+        columns or rows that sum up to 0.
+
+        If `observed` is an RDD of LabeledPoint, conduct Pearson's independence
+        test for every feature against the label across the input RDD.
+        For each feature, the (feature, label) pairs are converted into a
+        contingency matrix for which the chi-squared statistic is computed.
+        All label and feature values must be categorical.
+
+        :param observed: it could be a vector containing the observed categorical
+                         counts/relative frequencies, or the contingency matrix
+                         (containing either counts or relative frequencies),
+                         or an RDD of LabeledPoint containing the labeled dataset
+                         with categorical features. Real-valued features will be
+                         treated as categorical for each distinct value.
+        :param expected: Vector containing the expected categorical counts/relative
+                         frequencies. `expected` is rescaled if the `expected` sum
+                         differs from the `observed` sum.
+        :return: ChiSquaredTest object containing the test statistic, degrees
+                 of freedom, p-value, the method used, and the null hypothesis.
+
+        >>> from pyspark.mllib.linalg import Vectors, Matrices
+        >>> observed = Vectors.dense([4, 6, 5])
+        >>> pearson = Statistics.chiSqTest(observed)
+        >>> print pearson.statistic
+        0.4
+        >>> pearson.degreesOfFreedom
+        2
+        >>> print round(pearson.pValue, 4)
+        0.8187
+        >>> pearson.method
+        u'pearson'
+        >>> pearson.nullHypothesis
+        u'observed follows the same distribution as expected.'
+
+        >>> observed = Vectors.dense([21, 38, 43, 80])
+        >>> expected = Vectors.dense([3, 5, 7, 20])
+        >>> pearson = Statistics.chiSqTest(observed, expected)
+        >>> print round(pearson.pValue, 4)
+        0.0027
+
+        >>> data = [40.0, 24.0, 29.0, 56.0, 32.0, 42.0, 31.0, 10.0, 0.0, 30.0, 15.0, 12.0]
+        >>> chi = Statistics.chiSqTest(Matrices.dense(3, 4, data))
+        >>> print round(chi.statistic, 4)
+        21.9958
+
+        >>> data = [LabeledPoint(0.0, Vectors.dense([0.5, 10.0])),
+        ...         LabeledPoint(0.0, Vectors.dense([1.5, 20.0])),
+        ...         LabeledPoint(1.0, Vectors.dense([1.5, 30.0])),
+        ...         LabeledPoint(0.0, Vectors.dense([3.5, 30.0])),
+        ...         LabeledPoint(0.0, Vectors.dense([3.5, 40.0])),
+        ...         LabeledPoint(1.0, Vectors.dense([3.5, 40.0])),]
+        >>> rdd = sc.parallelize(data, 4)
+        >>> chi = Statistics.chiSqTest(rdd)
+        >>> print chi[0].statistic
+        0.75
+        >>> print chi[1].statistic
+        1.5
+        """
+        if isinstance(observed, RDD):
+            if not isinstance(observed.first(), LabeledPoint):
+                raise ValueError("observed should be an RDD of LabeledPoint")
+            jmodels = callMLlibFunc("chiSqTest", observed)
+            return [ChiSqTestResult(m) for m in jmodels]
+
+        if isinstance(observed, Matrix):
+            jmodel = callMLlibFunc("chiSqTest", observed)
+        else:
+            if expected and len(expected) != len(observed):
+                raise ValueError("`expected` should have same length with `observed`")
+            jmodel = callMLlibFunc("chiSqTest", _convert_to_vector(observed), expected)
+        return ChiSqTestResult(jmodel)
+
+
+def _test():
+    import doctest
+    from pyspark import SparkContext
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 37ccf1d590743..9fa4d6f6a2f5f 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -19,15 +19,27 @@
 Fuller unit tests for Python MLlib.
 """
 
+import sys
+import array as pyarray
+
 from numpy import array, array_equal
-import unittest
 
-from pyspark.mllib._common import _convert_vector, _serialize_double_vector, \
-    _deserialize_double_vector, _dot, _squared_distance
-from pyspark.mllib.linalg import SparseVector
-from pyspark.mllib.regression import LabeledPoint
-from pyspark.tests import PySparkTestCase
+if sys.version_info[:2] <= (2, 6):
+    try:
+        import unittest2 as unittest
+    except ImportError:
+        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.exit(1)
+else:
+    import unittest
 
+from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.random import RandomRDDs
+from pyspark.mllib.stat import Statistics
+from pyspark.serializers import PickleSerializer
+from pyspark.sql import SQLContext
+from pyspark.tests import ReusedPySparkTestCase as PySparkTestCase
 
 _have_scipy = False
 try:
@@ -37,38 +49,52 @@
     # No SciPy, but that's okay, we'll skip those tests
     pass
 
+ser = PickleSerializer()
+
+
+def _squared_distance(a, b):
+    if isinstance(a, Vector):
+        return a.squared_distance(b)
+    else:
+        return b.squared_distance(a)
+
+
+class VectorTests(PySparkTestCase):
+
+    def _test_serialize(self, v):
+        jvec = self.sc._jvm.SerDe.loads(bytearray(ser.dumps(v)))
+        nv = ser.loads(str(self.sc._jvm.SerDe.dumps(jvec)))
+        self.assertEqual(v, nv)
+        vs = [v] * 100
+        jvecs = self.sc._jvm.SerDe.loads(bytearray(ser.dumps(vs)))
+        nvs = ser.loads(str(self.sc._jvm.SerDe.dumps(jvecs)))
+        self.assertEqual(vs, nvs)
 
-class VectorTests(unittest.TestCase):
     def test_serialize(self):
-        sv = SparseVector(4, {1: 1, 3: 2})
-        dv = array([1., 2., 3., 4.])
-        lst = [1, 2, 3, 4]
-        self.assertTrue(sv is _convert_vector(sv))
-        self.assertTrue(dv is _convert_vector(dv))
-        self.assertTrue(array_equal(dv, _convert_vector(lst)))
-        self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(sv)))
-        self.assertTrue(array_equal(dv, _deserialize_double_vector(_serialize_double_vector(dv))))
-        self.assertTrue(array_equal(dv, _deserialize_double_vector(_serialize_double_vector(lst))))
+        self._test_serialize(DenseVector(range(10)))
+        self._test_serialize(DenseVector(array([1., 2., 3., 4.])))
+        self._test_serialize(DenseVector(pyarray.array('d', range(10))))
+        self._test_serialize(SparseVector(4, {1: 1, 3: 2}))
 
     def test_dot(self):
         sv = SparseVector(4, {1: 1, 3: 2})
-        dv = array([1., 2., 3., 4.])
-        lst = [1, 2, 3, 4]
+        dv = DenseVector(array([1., 2., 3., 4.]))
+        lst = DenseVector([1, 2, 3, 4])
         mat = array([[1., 2., 3., 4.],
                      [1., 2., 3., 4.],
                      [1., 2., 3., 4.],
                      [1., 2., 3., 4.]])
-        self.assertEquals(10.0, _dot(sv, dv))
-        self.assertTrue(array_equal(array([3., 6., 9., 12.]), _dot(sv, mat)))
-        self.assertEquals(30.0, _dot(dv, dv))
-        self.assertTrue(array_equal(array([10., 20., 30., 40.]), _dot(dv, mat)))
-        self.assertEquals(30.0, _dot(lst, dv))
-        self.assertTrue(array_equal(array([10., 20., 30., 40.]), _dot(lst, mat)))
+        self.assertEquals(10.0, sv.dot(dv))
+        self.assertTrue(array_equal(array([3., 6., 9., 12.]), sv.dot(mat)))
+        self.assertEquals(30.0, dv.dot(dv))
+        self.assertTrue(array_equal(array([10., 20., 30., 40.]), dv.dot(mat)))
+        self.assertEquals(30.0, lst.dot(dv))
+        self.assertTrue(array_equal(array([10., 20., 30., 40.]), lst.dot(mat)))
 
     def test_squared_distance(self):
         sv = SparseVector(4, {1: 1, 3: 2})
-        dv = array([1., 2., 3., 4.])
-        lst = [4, 3, 2, 1]
+        dv = DenseVector(array([1., 2., 3., 4.]))
+        lst = DenseVector([4, 3, 2, 1])
         self.assertEquals(15.0, _squared_distance(sv, dv))
         self.assertEquals(25.0, _squared_distance(sv, lst))
         self.assertEquals(20.0, _squared_distance(dv, lst))
@@ -81,6 +107,7 @@ def test_squared_distance(self):
 
 
 class ListTests(PySparkTestCase):
+
     """
     Test MLlib algorithms on plain lists, to make sure they're passed through
     as NumPy arrays.
@@ -100,6 +127,7 @@ def test_clustering(self):
 
     def test_classification(self):
         from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
+        from pyspark.mllib.tree import DecisionTree
         data = [
             LabeledPoint(0.0, [1, 0, 0]),
             LabeledPoint(1.0, [0, 1, 1]),
@@ -127,9 +155,19 @@ def test_classification(self):
         self.assertTrue(nb_model.predict(features[2]) <= 0)
         self.assertTrue(nb_model.predict(features[3]) > 0)
 
+        categoricalFeaturesInfo = {0: 3}  # feature 0 has 3 categories
+        dt_model = \
+            DecisionTree.trainClassifier(rdd, numClasses=2,
+                                         categoricalFeaturesInfo=categoricalFeaturesInfo)
+        self.assertTrue(dt_model.predict(features[0]) <= 0)
+        self.assertTrue(dt_model.predict(features[1]) > 0)
+        self.assertTrue(dt_model.predict(features[2]) <= 0)
+        self.assertTrue(dt_model.predict(features[3]) > 0)
+
     def test_regression(self):
         from pyspark.mllib.regression import LinearRegressionWithSGD, LassoWithSGD, \
             RidgeRegressionWithSGD
+        from pyspark.mllib.tree import DecisionTree
         data = [
             LabeledPoint(-1.0, [0, -1]),
             LabeledPoint(1.0, [0, 1]),
@@ -157,9 +195,68 @@ def test_regression(self):
         self.assertTrue(rr_model.predict(features[2]) <= 0)
         self.assertTrue(rr_model.predict(features[3]) > 0)
 
+        categoricalFeaturesInfo = {0: 2}  # feature 0 has 2 categories
+        dt_model = \
+            DecisionTree.trainRegressor(rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
+        self.assertTrue(dt_model.predict(features[0]) <= 0)
+        self.assertTrue(dt_model.predict(features[1]) > 0)
+        self.assertTrue(dt_model.predict(features[2]) <= 0)
+        self.assertTrue(dt_model.predict(features[3]) > 0)
+
+
+class StatTests(PySparkTestCase):
+    # SPARK-4023
+    def test_col_with_different_rdds(self):
+        # numpy
+        data = RandomRDDs.normalVectorRDD(self.sc, 1000, 10, 10)
+        summary = Statistics.colStats(data)
+        self.assertEqual(1000, summary.count())
+        # array
+        data = self.sc.parallelize([range(10)] * 10)
+        summary = Statistics.colStats(data)
+        self.assertEqual(10, summary.count())
+        # array
+        data = self.sc.parallelize([pyarray.array("d", range(10))] * 10)
+        summary = Statistics.colStats(data)
+        self.assertEqual(10, summary.count())
+
+
+class VectorUDTTests(PySparkTestCase):
+
+    dv0 = DenseVector([])
+    dv1 = DenseVector([1.0, 2.0])
+    sv0 = SparseVector(2, [], [])
+    sv1 = SparseVector(2, [1], [2.0])
+    udt = VectorUDT()
+
+    def test_json_schema(self):
+        self.assertEqual(VectorUDT.fromJson(self.udt.jsonValue()), self.udt)
+
+    def test_serialization(self):
+        for v in [self.dv0, self.dv1, self.sv0, self.sv1]:
+            self.assertEqual(v, self.udt.deserialize(self.udt.serialize(v)))
+
+    def test_infer_schema(self):
+        sqlCtx = SQLContext(self.sc)
+        rdd = self.sc.parallelize([LabeledPoint(1.0, self.dv1), LabeledPoint(0.0, self.sv1)])
+        srdd = sqlCtx.inferSchema(rdd)
+        schema = srdd.schema()
+        field = [f for f in schema.fields if f.name == "features"][0]
+        self.assertEqual(field.dataType, self.udt)
+        vectors = srdd.map(lambda p: p.features).collect()
+        self.assertEqual(len(vectors), 2)
+        for v in vectors:
+            if isinstance(v, SparseVector):
+                self.assertEqual(v, self.sv1)
+            elif isinstance(v, DenseVector):
+                self.assertEqual(v, self.dv1)
+            else:
+                raise ValueError("expecting a vector but got %r of type %r" % (v, type(v)))
+
 
 @unittest.skipIf(not _have_scipy, "SciPy not installed")
 class SciPyTests(PySparkTestCase):
+
     """
     Test both vector operations and MLlib algorithms with SciPy sparse matrices,
     if SciPy is available.
@@ -171,41 +268,36 @@ def test_serialize(self):
         lil[1, 0] = 1
         lil[3, 0] = 2
         sv = SparseVector(4, {1: 1, 3: 2})
-        self.assertEquals(sv, _convert_vector(lil))
-        self.assertEquals(sv, _convert_vector(lil.tocsc()))
-        self.assertEquals(sv, _convert_vector(lil.tocoo()))
-        self.assertEquals(sv, _convert_vector(lil.tocsr()))
-        self.assertEquals(sv, _convert_vector(lil.todok()))
-        self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil)))
-        self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil.tocsc())))
-        self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil.tocsr())))
-        self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil.todok())))
+        self.assertEquals(sv, _convert_to_vector(lil))
+        self.assertEquals(sv, _convert_to_vector(lil.tocsc()))
+        self.assertEquals(sv, _convert_to_vector(lil.tocoo()))
+        self.assertEquals(sv, _convert_to_vector(lil.tocsr()))
+        self.assertEquals(sv, _convert_to_vector(lil.todok()))
+
+        def serialize(l):
+            return ser.loads(ser.dumps(_convert_to_vector(l)))
+        self.assertEquals(sv, serialize(lil))
+        self.assertEquals(sv, serialize(lil.tocsc()))
+        self.assertEquals(sv, serialize(lil.tocsr()))
+        self.assertEquals(sv, serialize(lil.todok()))
 
     def test_dot(self):
         from scipy.sparse import lil_matrix
         lil = lil_matrix((4, 1))
         lil[1, 0] = 1
         lil[3, 0] = 2
-        dv = array([1., 2., 3., 4.])
-        sv = SparseVector(4, {0: 1, 1: 2, 2: 3, 3: 4})
-        mat = array([[1., 2., 3., 4.],
-                     [1., 2., 3., 4.],
-                     [1., 2., 3., 4.],
-                     [1., 2., 3., 4.]])
-        self.assertEquals(10.0, _dot(lil, dv))
-        self.assertTrue(array_equal(array([3., 6., 9., 12.]), _dot(lil, mat)))
+        dv = DenseVector(array([1., 2., 3., 4.]))
+        self.assertEquals(10.0, dv.dot(lil))
 
     def test_squared_distance(self):
         from scipy.sparse import lil_matrix
         lil = lil_matrix((4, 1))
         lil[1, 0] = 3
         lil[3, 0] = 2
-        dv = array([1., 2., 3., 4.])
+        dv = DenseVector(array([1., 2., 3., 4.]))
         sv = SparseVector(4, {0: 1, 1: 2, 2: 3, 3: 4})
-        self.assertEquals(15.0, _squared_distance(lil, dv))
-        self.assertEquals(15.0, _squared_distance(lil, sv))
-        self.assertEquals(15.0, _squared_distance(dv, lil))
-        self.assertEquals(15.0, _squared_distance(sv, lil))
+        self.assertEquals(15.0, dv.squared_distance(lil))
+        self.assertEquals(15.0, sv.squared_distance(lil))
 
     def scipy_matrix(self, size, values):
         """Create a column SciPy matrix from a dictionary of values"""
@@ -229,6 +321,7 @@ def test_clustering(self):
 
     def test_classification(self):
         from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
+        from pyspark.mllib.tree import DecisionTree
         data = [
             LabeledPoint(0.0, self.scipy_matrix(2, {0: 1.0})),
             LabeledPoint(1.0, self.scipy_matrix(2, {1: 1.0})),
@@ -256,9 +349,18 @@ def test_classification(self):
         self.assertTrue(nb_model.predict(features[2]) <= 0)
         self.assertTrue(nb_model.predict(features[3]) > 0)
 
+        categoricalFeaturesInfo = {0: 3}  # feature 0 has 3 categories
+        dt_model = DecisionTree.trainClassifier(rdd, numClasses=2,
+                                                categoricalFeaturesInfo=categoricalFeaturesInfo)
+        self.assertTrue(dt_model.predict(features[0]) <= 0)
+        self.assertTrue(dt_model.predict(features[1]) > 0)
+        self.assertTrue(dt_model.predict(features[2]) <= 0)
+        self.assertTrue(dt_model.predict(features[3]) > 0)
+
     def test_regression(self):
         from pyspark.mllib.regression import LinearRegressionWithSGD, LassoWithSGD, \
             RidgeRegressionWithSGD
+        from pyspark.mllib.tree import DecisionTree
         data = [
             LabeledPoint(-1.0, self.scipy_matrix(2, {1: -1.0})),
             LabeledPoint(1.0, self.scipy_matrix(2, {1: 1.0})),
@@ -286,6 +388,13 @@ def test_regression(self):
         self.assertTrue(rr_model.predict(features[2]) <= 0)
         self.assertTrue(rr_model.predict(features[3]) > 0)
 
+        categoricalFeaturesInfo = {0: 2}  # feature 0 has 2 categories
+        dt_model = DecisionTree.trainRegressor(rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
+        self.assertTrue(dt_model.predict(features[0]) <= 0)
+        self.assertTrue(dt_model.predict(features[1]) > 0)
+        self.assertTrue(dt_model.predict(features[2]) <= 0)
+        self.assertTrue(dt_model.predict(features[3]) > 0)
+
 
 if __name__ == "__main__":
     if not _have_scipy:
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
new file mode 100644
index 0000000000000..ef0d556fac7bc
--- /dev/null
+++ b/python/pyspark/mllib/tree.py
@@ -0,0 +1,198 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import SparkContext, RDD
+from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
+from pyspark.mllib.linalg import _convert_to_vector
+from pyspark.mllib.regression import LabeledPoint
+
+__all__ = ['DecisionTreeModel', 'DecisionTree']
+
+
+class DecisionTreeModel(JavaModelWrapper):
+
+    """
+    A decision tree model for classification or regression.
+
+    EXPERIMENTAL: This is an experimental API.
+                  It will probably be modified in future.
+    """
+    def predict(self, x):
+        """
+        Predict the label of one or more examples.
+
+        :param x:  Data point (feature vector),
+                   or an RDD of data points (feature vectors).
+        """
+        if isinstance(x, RDD):
+            return self.call("predict", x.map(_convert_to_vector))
+
+        else:
+            return self.call("predict", _convert_to_vector(x))
+
+    def numNodes(self):
+        return self._java_model.numNodes()
+
+    def depth(self):
+        return self._java_model.depth()
+
+    def __repr__(self):
+        """ Print summary of model. """
+        return self._java_model.toString()
+
+    def toDebugString(self):
+        """ Print full model. """
+        return self._java_model.toDebugString()
+
+
+class DecisionTree(object):
+
+    """
+    Learning algorithm for a decision tree model
+    for classification or regression.
+
+    EXPERIMENTAL: This is an experimental API.
+                  It will probably be modified for Spark v1.2.
+
+    """
+
+    @staticmethod
+    def _train(data, type, numClasses, features, impurity="gini", maxDepth=5, maxBins=32,
+               minInstancesPerNode=1, minInfoGain=0.0):
+        first = data.first()
+        assert isinstance(first, LabeledPoint), "the data should be RDD of LabeledPoint"
+        model = callMLlibFunc("trainDecisionTreeModel", data, type, numClasses, features,
+                              impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
+        return DecisionTreeModel(model)
+
+    @staticmethod
+    def trainClassifier(data, numClasses, categoricalFeaturesInfo,
+                        impurity="gini", maxDepth=5, maxBins=32, minInstancesPerNode=1,
+                        minInfoGain=0.0):
+        """
+        Train a DecisionTreeModel for classification.
+
+        :param data: Training data: RDD of LabeledPoint.
+                     Labels are integers {0,1,...,numClasses}.
+        :param numClasses: Number of classes for classification.
+        :param categoricalFeaturesInfo: Map from categorical feature index
+                                        to number of categories.
+                                        Any feature not in this map
+                                        is treated as continuous.
+        :param impurity: Supported values: "entropy" or "gini"
+        :param maxDepth: Max depth of tree.
+                         E.g., depth 0 means 1 leaf node.
+                         Depth 1 means 1 internal node + 2 leaf nodes.
+        :param maxBins: Number of bins used for finding splits at each node.
+        :param minInstancesPerNode: Min number of instances required at child nodes to create
+                                    the parent split
+        :param minInfoGain: Min info gain required to create a split
+        :return: DecisionTreeModel
+
+        Example usage:
+
+        >>> from numpy import array
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import DecisionTree
+        >>>
+        >>> data = [
+        ...     LabeledPoint(0.0, [0.0]),
+        ...     LabeledPoint(1.0, [1.0]),
+        ...     LabeledPoint(1.0, [2.0]),
+        ...     LabeledPoint(1.0, [3.0])
+        ... ]
+        >>> model = DecisionTree.trainClassifier(sc.parallelize(data), 2, {})
+        >>> print model,  # it already has newline
+        DecisionTreeModel classifier of depth 1 with 3 nodes
+        >>> print model.toDebugString(),  # it already has newline
+        DecisionTreeModel classifier of depth 1 with 3 nodes
+          If (feature 0 <= 0.0)
+           Predict: 0.0
+          Else (feature 0 > 0.0)
+           Predict: 1.0
+        >>> model.predict(array([1.0]))
+        1.0
+        >>> model.predict(array([0.0]))
+        0.0
+        >>> rdd = sc.parallelize([[1.0], [0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
+        """
+        return DecisionTree._train(data, "classification", numClasses, categoricalFeaturesInfo,
+                                   impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
+
+    @staticmethod
+    def trainRegressor(data, categoricalFeaturesInfo,
+                       impurity="variance", maxDepth=5, maxBins=32, minInstancesPerNode=1,
+                       minInfoGain=0.0):
+        """
+        Train a DecisionTreeModel for regression.
+
+        :param data: Training data: RDD of LabeledPoint.
+                     Labels are real numbers.
+        :param categoricalFeaturesInfo: Map from categorical feature index
+                                        to number of categories.
+                                        Any feature not in this map
+                                        is treated as continuous.
+        :param impurity: Supported values: "variance"
+        :param maxDepth: Max depth of tree.
+                         E.g., depth 0 means 1 leaf node.
+                         Depth 1 means 1 internal node + 2 leaf nodes.
+        :param maxBins: Number of bins used for finding splits at each node.
+        :param minInstancesPerNode: Min number of instances required at child nodes to create
+                                    the parent split
+        :param minInfoGain: Min info gain required to create a split
+        :return: DecisionTreeModel
+
+        Example usage:
+
+        >>> from numpy import array
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import DecisionTree
+        >>> from pyspark.mllib.linalg import SparseVector
+        >>>
+        >>> sparse_data = [
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 0.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 1.0})),
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 0.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
+        ... ]
+        >>>
+        >>> model = DecisionTree.trainRegressor(sc.parallelize(sparse_data), {})
+        >>> model.predict(SparseVector(2, {1: 1.0}))
+        1.0
+        >>> model.predict(SparseVector(2, {1: 0.0}))
+        0.0
+        >>> rdd = sc.parallelize([[0.0, 1.0], [0.0, 0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
+        """
+        return DecisionTree._train(data, "regression", 0, categoricalFeaturesInfo,
+                                   impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
+
+
+def _test():
+    import doctest
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index e24c144f458bd..4ed978b45409c 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -16,29 +16,28 @@
 #
 
 import numpy as np
+import warnings
 
-from pyspark.mllib.linalg import Vectors, SparseVector
+from pyspark.mllib.common import callMLlibFunc
+from pyspark.mllib.linalg import Vectors, SparseVector, _convert_to_vector
 from pyspark.mllib.regression import LabeledPoint
-from pyspark.mllib._common import _convert_vector, _deserialize_labeled_point
-from pyspark.rdd import RDD
-from pyspark.serializers import NoOpSerializer
 
 
+class MLUtils(object):
 
-class MLUtils:
     """
     Helper methods to load, save and pre-process data used in MLlib.
     """
 
     @staticmethod
-    def _parse_libsvm_line(line, multiclass):
+    def _parse_libsvm_line(line, multiclass=None):
         """
         Parses a line in LIBSVM format into (label, indices, values).
         """
+        if multiclass is not None:
+            warnings.warn("deprecated", DeprecationWarning)
         items = line.split(None)
         label = float(items[0])
-        if not multiclass:
-            label = 1.0 if label > 0.5 else 0.0
         nnz = len(items) - 1
         indices = np.zeros(nnz, dtype=np.int32)
         values = np.zeros(nnz)
@@ -51,22 +50,20 @@ def _parse_libsvm_line(line, multiclass):
     @staticmethod
     def _convert_labeled_point_to_libsvm(p):
         """Converts a LabeledPoint to a string in LIBSVM format."""
+        assert isinstance(p, LabeledPoint)
         items = [str(p.label)]
-        v = _convert_vector(p.features)
-        if type(v) == np.ndarray:
-            for i in xrange(len(v)):
-                items.append(str(i + 1) + ":" + str(v[i]))
-        elif type(v) == SparseVector:
+        v = _convert_to_vector(p.features)
+        if isinstance(v, SparseVector):
             nnz = len(v.indices)
             for i in xrange(nnz):
                 items.append(str(v.indices[i] + 1) + ":" + str(v.values[i]))
         else:
-            raise TypeError("_convert_labeled_point_to_libsvm needs either ndarray or SparseVector"
-                            " but got " % type(v))
+            for i in xrange(len(v)):
+                items.append(str(i + 1) + ":" + str(v[i]))
         return " ".join(items)
 
     @staticmethod
-    def loadLibSVMFile(sc, path, multiclass=False, numFeatures=-1, minPartitions=None):
+    def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None, multiclass=None):
         """
         Loads labeled data in the LIBSVM format into an RDD of
         LabeledPoint. The LIBSVM format is a text-based format used by
@@ -79,17 +76,10 @@ def loadLibSVMFile(sc, path, multiclass=False, numFeatures=-1, minPartitions=Non
         method parses each line into a LabeledPoint, where the feature
         indices are converted to zero-based.
 
-        @param sc: Spark context
-        @param path: file or directory path in any Hadoop-supported file
+        :param sc: Spark context
+        :param path: file or directory path in any Hadoop-supported file
                      system URI
-        @param multiclass: whether the input labels contain more than
-                           two classes. If false, any label with value
-                           greater than 0.5 will be mapped to 1.0, or
-                           0.0 otherwise. So it works for both +1/-1 and
-                           1/0 cases. If true, the double value parsed
-                           directly from the label string will be used
-                           as the label value.
-        @param numFeatures: number of features, which will be determined
+        :param numFeatures: number of features, which will be determined
                             from the input data if a nonpositive value
                             is given. This is useful when the dataset is
                             already split into multiple files and you
@@ -97,7 +87,7 @@ def loadLibSVMFile(sc, path, multiclass=False, numFeatures=-1, minPartitions=Non
                             features may not present in certain files,
                             which leads to inconsistent feature
                             dimensions.
-        @param minPartitions: min number of partitions
+        :param minPartitions: min number of partitions
         @return: labeled data stored as an RDD of LabeledPoint
 
         >>> from tempfile import NamedTemporaryFile
@@ -106,7 +96,6 @@ def loadLibSVMFile(sc, path, multiclass=False, numFeatures=-1, minPartitions=Non
         >>> tempFile.write("+1 1:1.0 3:2.0 5:3.0\\n-1\\n-1 2:4.0 4:5.0 6:6.0")
         >>> tempFile.flush()
         >>> examples = MLUtils.loadLibSVMFile(sc, tempFile.name).collect()
-        >>> multiclass_examples = MLUtils.loadLibSVMFile(sc, tempFile.name, True).collect()
         >>> tempFile.close()
         >>> type(examples[0]) == LabeledPoint
         True
@@ -115,20 +104,20 @@ def loadLibSVMFile(sc, path, multiclass=False, numFeatures=-1, minPartitions=Non
         >>> type(examples[1]) == LabeledPoint
         True
         >>> print examples[1]
-        (0.0,(6,[],[]))
+        (-1.0,(6,[],[]))
         >>> type(examples[2]) == LabeledPoint
         True
         >>> print examples[2]
-        (0.0,(6,[1,3,5],[4.0,5.0,6.0]))
-        >>> multiclass_examples[1].label
-        -1.0
+        (-1.0,(6,[1,3,5],[4.0,5.0,6.0]))
         """
+        if multiclass is not None:
+            warnings.warn("deprecated", DeprecationWarning)
 
         lines = sc.textFile(path, minPartitions)
-        parsed = lines.map(lambda l: MLUtils._parse_libsvm_line(l, multiclass))
+        parsed = lines.map(lambda l: MLUtils._parse_libsvm_line(l))
         if numFeatures <= 0:
             parsed.cache()
-            numFeatures = parsed.map(lambda x: 0 if x[1].size == 0 else x[1][-1]).reduce(max) + 1
+            numFeatures = parsed.map(lambda x: -1 if x[1].size == 0 else x[1][-1]).reduce(max) + 1
         return parsed.map(lambda x: LabeledPoint(x[0], Vectors.sparse(numFeatures, x[1], x[2])))
 
     @staticmethod
@@ -136,8 +125,8 @@ def saveAsLibSVMFile(data, dir):
         """
         Save labeled data in LIBSVM format.
 
-        @param data: an RDD of LabeledPoint to be saved
-        @param dir: directory to save the data
+        :param data: an RDD of LabeledPoint to be saved
+        :param dir: directory to save the data
 
         >>> from tempfile import NamedTemporaryFile
         >>> from fileinput import input
@@ -154,16 +143,15 @@ def saveAsLibSVMFile(data, dir):
         lines = data.map(lambda p: MLUtils._convert_labeled_point_to_libsvm(p))
         lines.saveAsTextFile(dir)
 
-
     @staticmethod
     def loadLabeledPoints(sc, path, minPartitions=None):
         """
         Load labeled points saved using RDD.saveAsTextFile.
 
-        @param sc: Spark context
-        @param path: file or directory path in any Hadoop-supported file
+        :param sc: Spark context
+        :param path: file or directory path in any Hadoop-supported file
                      system URI
-        @param minPartitions: min number of partitions
+        :param minPartitions: min number of partitions
         @return: labeled data stored as an RDD of LabeledPoint
 
         >>> from tempfile import NamedTemporaryFile
@@ -173,20 +161,11 @@ def loadLabeledPoints(sc, path, minPartitions=None):
         >>> tempFile = NamedTemporaryFile(delete=True)
         >>> tempFile.close()
         >>> sc.parallelize(examples, 1).saveAsTextFile(tempFile.name)
-        >>> loaded = MLUtils.loadLabeledPoints(sc, tempFile.name).collect()
-        >>> type(loaded[0]) == LabeledPoint
-        True
-        >>> print examples[0]
-        (1.1,(3,[0,2],[-1.23,4.56e-07]))
-        >>> type(examples[1]) == LabeledPoint
-        True
-        >>> print examples[1]
-        (0.0,[1.01,2.02,3.03])
+        >>> MLUtils.loadLabeledPoints(sc, tempFile.name).collect()
+        [LabeledPoint(1.1, (3,[0,2],[-1.23,4.56e-07])), LabeledPoint(0.0, [1.01,2.02,3.03])]
         """
         minPartitions = minPartitions or min(sc.defaultParallelism, 2)
-        jSerialized = sc._jvm.PythonMLLibAPI().loadLabeledPoints(sc._jsc, path, minPartitions)
-        serialized = RDD(jSerialized, sc, NoOpSerializer())
-        return serialized.map(lambda bytes: _deserialize_labeled_point(bytearray(bytes)))
+        return callMLlibFunc("loadLabeledPoints", sc, path, minPartitions)
 
 
 def _test():
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index f64f48e3a4c9c..08d047402625f 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -15,167 +15,122 @@
 # limitations under the License.
 #
 
-from base64 import standard_b64encode as b64enc
 import copy
 from collections import defaultdict
-from collections import namedtuple
 from itertools import chain, ifilter, imap
 import operator
 import os
 import sys
 import shlex
-import traceback
 from subprocess import Popen, PIPE
 from tempfile import NamedTemporaryFile
 from threading import Thread
 import warnings
 import heapq
+import bisect
 from random import Random
-from math import sqrt, log
+from math import sqrt, log, isinf, isnan
 
+from pyspark.accumulators import PStatsParam
 from pyspark.serializers import NoOpSerializer, CartesianDeserializer, \
     BatchedSerializer, CloudPickleSerializer, PairDeserializer, \
-    PickleSerializer, pack_long
+    PickleSerializer, pack_long, AutoBatchedSerializer
 from pyspark.join import python_join, python_left_outer_join, \
-    python_right_outer_join, python_cogroup
+    python_right_outer_join, python_full_outer_join, python_cogroup
 from pyspark.statcounter import StatCounter
-from pyspark.rddsampler import RDDSampler
+from pyspark.rddsampler import RDDSampler, RDDStratifiedSampler
 from pyspark.storagelevel import StorageLevel
 from pyspark.resultiterable import ResultIterable
+from pyspark.shuffle import Aggregator, InMemoryMerger, ExternalMerger, \
+    get_used_memory, ExternalSorter
+from pyspark.traceback_utils import SCCallSiteSync
 
 from py4j.java_collections import ListConverter, MapConverter
 
+
 __all__ = ["RDD"]
 
 
-def _extract_concise_traceback():
+# TODO: for Python 3.3+, PYTHONHASHSEED should be reset to disable randomized
+# hash for string
+def portable_hash(x):
     """
-    This function returns the traceback info for a callsite, returns a dict
-    with function name, file name and line number
+    This function returns consistant hash code for builtin types, especially
+    for None and tuple with None.
+
+    The algrithm is similar to that one used by CPython 2.7
+
+    >>> portable_hash(None)
+    0
+    >>> portable_hash((None, 1)) & 0xffffffff
+    219750521
     """
-    tb = traceback.extract_stack()
-    callsite = namedtuple("Callsite", "function file linenum")
-    if len(tb) == 0:
-        return None
-    file, line, module, what = tb[len(tb) - 1]
-    sparkpath = os.path.dirname(file)
-    first_spark_frame = len(tb) - 1
-    for i in range(0, len(tb)):
-        file, line, fun, what = tb[i]
-        if file.startswith(sparkpath):
-            first_spark_frame = i
-            break
-    if first_spark_frame == 0:
-        file, line, fun, what = tb[0]
-        return callsite(function=fun, file=file, linenum=line)
-    sfile, sline, sfun, swhat = tb[first_spark_frame]
-    ufile, uline, ufun, uwhat = tb[first_spark_frame-1]
-    return callsite(function=sfun, file=ufile, linenum=uline)
-
-_spark_stack_depth = 0
-
-class _JavaStackTrace(object):
-    def __init__(self, sc):
-        tb = _extract_concise_traceback()
-        if tb is not None:
-            self._traceback = "%s at %s:%s" % (tb.function, tb.file, tb.linenum)
-        else:
-            self._traceback = "Error! Could not extract traceback info"
-        self._context = sc
-
-    def __enter__(self):
-        global _spark_stack_depth
-        if _spark_stack_depth == 0:
-            self._context._jsc.setCallSite(self._traceback)
-        _spark_stack_depth += 1
-
-    def __exit__(self, type, value, tb):
-        global _spark_stack_depth
-        _spark_stack_depth -= 1
-        if _spark_stack_depth == 0:
-            self._context._jsc.setCallSite(None)
-
-class MaxHeapQ(object):
+    if x is None:
+        return 0
+    if isinstance(x, tuple):
+        h = 0x345678
+        for i in x:
+            h ^= portable_hash(i)
+            h *= 1000003
+            h &= sys.maxint
+        h ^= len(x)
+        if h == -1:
+            h = -2
+        return h
+    return hash(x)
+
+
+class BoundedFloat(float):
     """
-    An implementation of MaxHeap.
-    >>> import pyspark.rdd
-    >>> heap = pyspark.rdd.MaxHeapQ(5)
-    >>> [heap.insert(i) for i in range(10)]
-    [None, None, None, None, None, None, None, None, None, None]
-    >>> sorted(heap.getElements())
-    [0, 1, 2, 3, 4]
-    >>> heap = pyspark.rdd.MaxHeapQ(5)
-    >>> [heap.insert(i) for i in range(9, -1, -1)]
-    [None, None, None, None, None, None, None, None, None, None]
-    >>> sorted(heap.getElements())
-    [0, 1, 2, 3, 4]
-    >>> heap = pyspark.rdd.MaxHeapQ(1)
-    >>> [heap.insert(i) for i in range(9, -1, -1)]
-    [None, None, None, None, None, None, None, None, None, None]
-    >>> heap.getElements()
-    [0]
+    Bounded value is generated by approximate job, with confidence and low
+    bound and high bound.
+
+    >>> BoundedFloat(100.0, 0.95, 95.0, 105.0)
+    100.0
     """
+    def __new__(cls, mean, confidence, low, high):
+        obj = float.__new__(cls, mean)
+        obj.confidence = confidence
+        obj.low = low
+        obj.high = high
+        return obj
 
-    def __init__(self, maxsize):
-        # we start from q[1], this makes calculating children as trivial as 2 * k
-        self.q = [0]
-        self.maxsize = maxsize
-
-    def _swim(self, k):
-        while (k > 1) and (self.q[k/2] < self.q[k]):
-            self._swap(k, k/2)
-            k = k/2
-
-    def _swap(self, i, j):
-        t = self.q[i]
-        self.q[i] = self.q[j]
-        self.q[j] = t
-
-    def _sink(self, k):
-        N = self.size()
-        while 2 * k <= N:
-            j = 2 * k
-            # Here we test if both children are greater than parent
-            # if not swap with larger one.
-            if j < N and self.q[j] < self.q[j + 1]:
-                j = j + 1
-            if(self.q[k] > self.q[j]):
-                break
-            self._swap(k, j)
-            k = j
-
-    def size(self):
-        return len(self.q) - 1
-
-    def insert(self, value):
-        if (self.size()) < self.maxsize:
-            self.q.append(value)
-            self._swim(self.size())
-        else:
-            self._replaceRoot(value)
 
-    def getElements(self):
-        return self.q[1:]
+def _parse_memory(s):
+    """
+    Parse a memory string in the format supported by Java (e.g. 1g, 200m) and
+    return the value in MB
+
+    >>> _parse_memory("256m")
+    256
+    >>> _parse_memory("2g")
+    2048
+    """
+    units = {'g': 1024, 'm': 1, 't': 1 << 20, 'k': 1.0 / 1024}
+    if s[-1] not in units:
+        raise ValueError("invalid format: " + s)
+    return int(float(s[:-1]) * units[s[-1].lower()])
 
-    def _replaceRoot(self, value):
-        if(self.q[1] > value):
-            self.q[1] = value
-            self._sink(1)
 
 class RDD(object):
+
     """
     A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
     Represents an immutable, partitioned collection of elements that can be
     operated on in parallel.
     """
 
-    def __init__(self, jrdd, ctx, jrdd_deserializer):
+    def __init__(self, jrdd, ctx, jrdd_deserializer=AutoBatchedSerializer(PickleSerializer())):
         self._jrdd = jrdd
         self.is_cached = False
         self.is_checkpointed = False
         self.ctx = ctx
         self._jrdd_deserializer = jrdd_deserializer
         self._id = jrdd.id()
+        self._partitionFunc = None
+
+    def _pickled(self):
+        return self._reserialize(AutoBatchedSerializer(PickleSerializer()))
 
     def id(self):
         """
@@ -195,17 +150,22 @@ def context(self):
 
     def cache(self):
         """
-        Persist this RDD with the default storage level (C{MEMORY_ONLY}).
+        Persist this RDD with the default storage level (C{MEMORY_ONLY_SER}).
         """
         self.is_cached = True
-        self._jrdd.cache()
+        self.persist(StorageLevel.MEMORY_ONLY_SER)
         return self
 
-    def persist(self, storageLevel):
+    def persist(self, storageLevel=StorageLevel.MEMORY_ONLY_SER):
         """
         Set this RDD's storage level to persist its values across operations
         after the first time it is computed. This can only be used to assign
         a new storage level if the RDD does not have a storage level set yet.
+        If no storage level is specified defaults to (C{MEMORY_ONLY_SER}).
+
+        >>> rdd = sc.parallelize(["b", "a", "c"])
+        >>> rdd.persist().is_cached
+        True
         """
         self.is_cached = True
         javaStorageLevel = self.ctx._getJavaStorageLevel(storageLevel)
@@ -246,8 +206,6 @@ def getCheckpointFile(self):
         checkpointFile = self._jrdd.rdd().getCheckpointFile()
         if checkpointFile.isDefined():
             return checkpointFile.get()
-        else:
-            return None
 
     def map(self, f, preservesPartitioning=False):
         """
@@ -257,8 +215,9 @@ def map(self, f, preservesPartitioning=False):
         >>> sorted(rdd.map(lambda x: (x, 1)).collect())
         [('a', 1), ('b', 1), ('c', 1)]
         """
-        def func(split, iterator): return imap(f, iterator)
-        return PipelinedRDD(self, func, preservesPartitioning)
+        def func(_, iterator):
+            return imap(f, iterator)
+        return self.mapPartitionsWithIndex(func, preservesPartitioning)
 
     def flatMap(self, f, preservesPartitioning=False):
         """
@@ -271,7 +230,8 @@ def flatMap(self, f, preservesPartitioning=False):
         >>> sorted(rdd.flatMap(lambda x: [(x, x), (x, x)]).collect())
         [(2, 2), (2, 2), (3, 3), (3, 3), (4, 4), (4, 4)]
         """
-        def func(s, iterator): return chain.from_iterable(imap(f, iterator))
+        def func(s, iterator):
+            return chain.from_iterable(imap(f, iterator))
         return self.mapPartitionsWithIndex(func, preservesPartitioning)
 
     def mapPartitions(self, f, preservesPartitioning=False):
@@ -283,8 +243,9 @@ def mapPartitions(self, f, preservesPartitioning=False):
         >>> rdd.mapPartitions(f).collect()
         [3, 7]
         """
-        def func(s, iterator): return f(iterator)
-        return self.mapPartitionsWithIndex(func)
+        def func(s, iterator):
+            return f(iterator)
+        return self.mapPartitionsWithIndex(func, preservesPartitioning)
 
     def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
         """
@@ -311,17 +272,18 @@ def mapPartitionsWithSplit(self, f, preservesPartitioning=False):
         6
         """
         warnings.warn("mapPartitionsWithSplit is deprecated; "
-            "use mapPartitionsWithIndex instead", DeprecationWarning, stacklevel=2)
+                      "use mapPartitionsWithIndex instead", DeprecationWarning, stacklevel=2)
         return self.mapPartitionsWithIndex(f, preservesPartitioning)
 
     def getNumPartitions(self):
-      """
-      Returns the number of partitions in RDD
-      >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
-      >>> rdd.getNumPartitions()
-      2
-      """
-      return self._jrdd.partitions().size()
+        """
+        Returns the number of partitions in RDD
+
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
+        >>> rdd.getNumPartitions()
+        2
+        """
+        return self._jrdd.partitions().size()
 
     def filter(self, f):
         """
@@ -331,10 +293,11 @@ def filter(self, f):
         >>> rdd.filter(lambda x: x % 2 == 0).collect()
         [2, 4]
         """
-        def func(iterator): return ifilter(f, iterator)
-        return self.mapPartitions(func)
+        def func(iterator):
+            return ifilter(f, iterator)
+        return self.mapPartitions(func, True)
 
-    def distinct(self):
+    def distinct(self, numPartitions=None):
         """
         Return a new RDD containing the distinct elements in this RDD.
 
@@ -342,18 +305,15 @@ def distinct(self):
         [1, 2, 3]
         """
         return self.map(lambda x: (x, None)) \
-                   .reduceByKey(lambda x, _: x) \
+                   .reduceByKey(lambda x, _: x, numPartitions) \
                    .map(lambda (x, _): x)
 
     def sample(self, withReplacement, fraction, seed=None):
         """
         Return a sampled subset of this RDD (relies on numpy and falls back
         on default random generator if numpy is unavailable).
-
-        >>> sc.parallelize(range(0, 100)).sample(False, 0.1, 2).collect() #doctest: +SKIP
-        [2, 3, 20, 21, 24, 41, 42, 66, 67, 89, 90, 98]
         """
-        assert fraction >= 0.0, "Invalid fraction value: %s" % fraction
+        assert fraction >= 0.0, "Negative fraction value: %s" % fraction
         return self.mapPartitionsWithIndex(RDDSampler(withReplacement, fraction, seed).func, True)
 
     # this is ported from scala/spark/RDD.scala
@@ -391,9 +351,11 @@ def takeSample(self, withReplacement, num, seed=None):
 
         maxSampleSize = sys.maxint - int(numStDev * sqrt(sys.maxint))
         if num > maxSampleSize:
-            raise ValueError("Sample size cannot be greater than %d." % maxSampleSize)
+            raise ValueError(
+                "Sample size cannot be greater than %d." % maxSampleSize)
 
-        fraction = RDD._computeFractionForSampleSize(num, initialCount, withReplacement)
+        fraction = RDD._computeFractionForSampleSize(
+            num, initialCount, withReplacement)
         samples = self.sample(withReplacement, fraction, seed).collect()
 
         # If the first sample didn't turn out large enough, keep trying to take samples;
@@ -475,17 +437,16 @@ def intersection(self, other):
         """
         return self.map(lambda v: (v, None)) \
             .cogroup(other.map(lambda v: (v, None))) \
-            .filter(lambda x: (len(x[1][0]) != 0) and (len(x[1][1]) != 0)) \
+            .filter(lambda (k, vs): all(vs)) \
             .keys()
 
     def _reserialize(self, serializer=None):
         serializer = serializer or self.ctx.serializer
-        if self._jrdd_deserializer == serializer:
-            return self
-        else:
-            converted = self.map(lambda x: x, preservesPartitioning=True)
-            converted._jrdd_deserializer = serializer
-            return converted
+        if self._jrdd_deserializer != serializer:
+            if not isinstance(self, PipelinedRDD):
+                self = self.map(lambda x: x, preservesPartitioning=True)
+            self._jrdd_deserializer = serializer
+        return self
 
     def __add__(self, other):
         """
@@ -499,55 +460,87 @@ def __add__(self, other):
             raise TypeError
         return self.union(other)
 
-    def sortByKey(self, ascending=True, numPartitions=None, keyfunc = lambda x: x):
+    def repartitionAndSortWithinPartitions(self, numPartitions=None, partitionFunc=portable_hash,
+                                           ascending=True, keyfunc=lambda x: x):
+        """
+        Repartition the RDD according to the given partitioner and, within each resulting partition,
+        sort records by their keys.
+
+        >>> rdd = sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)])
+        >>> rdd2 = rdd.repartitionAndSortWithinPartitions(2, lambda x: x % 2, 2)
+        >>> rdd2.glom().collect()
+        [[(0, 5), (0, 8), (2, 6)], [(1, 3), (3, 8), (3, 8)]]
+        """
+        if numPartitions is None:
+            numPartitions = self._defaultReducePartitions()
+
+        spill = (self.ctx._conf.get("spark.shuffle.spill", 'True').lower() == "true")
+        memory = _parse_memory(self.ctx._conf.get("spark.python.worker.memory", "512m"))
+        serializer = self._jrdd_deserializer
+
+        def sortPartition(iterator):
+            sort = ExternalSorter(memory * 0.9, serializer).sorted if spill else sorted
+            return iter(sort(iterator, key=lambda (k, v): keyfunc(k), reverse=(not ascending)))
+
+        return self.partitionBy(numPartitions, partitionFunc).mapPartitions(sortPartition, True)
+
+    def sortByKey(self, ascending=True, numPartitions=None, keyfunc=lambda x: x):
         """
         Sorts this RDD, which is assumed to consist of (key, value) pairs.
+        # noqa
 
         >>> tmp = [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
+        >>> sc.parallelize(tmp).sortByKey().first()
+        ('1', 3)
+        >>> sc.parallelize(tmp).sortByKey(True, 1).collect()
+        [('1', 3), ('2', 5), ('a', 1), ('b', 2), ('d', 4)]
         >>> sc.parallelize(tmp).sortByKey(True, 2).collect()
         [('1', 3), ('2', 5), ('a', 1), ('b', 2), ('d', 4)]
         >>> tmp2 = [('Mary', 1), ('had', 2), ('a', 3), ('little', 4), ('lamb', 5)]
         >>> tmp2.extend([('whose', 6), ('fleece', 7), ('was', 8), ('white', 9)])
         >>> sc.parallelize(tmp2).sortByKey(True, 3, keyfunc=lambda k: k.lower()).collect()
-        [('a', 3), ('fleece', 7), ('had', 2), ('lamb', 5), ('little', 4), ('Mary', 1), ('was', 8), ('white', 9), ('whose', 6)]
+        [('a', 3), ('fleece', 7), ('had', 2), ('lamb', 5),...('white', 9), ('whose', 6)]
         """
         if numPartitions is None:
             numPartitions = self._defaultReducePartitions()
 
-        bounds = list()
+        spill = (self.ctx._conf.get("spark.shuffle.spill", 'True').lower() == 'true')
+        memory = _parse_memory(self.ctx._conf.get("spark.python.worker.memory", "512m"))
+        serializer = self._jrdd_deserializer
+
+        def sortPartition(iterator):
+            sort = ExternalSorter(memory * 0.9, serializer).sorted if spill else sorted
+            return iter(sort(iterator, key=lambda (k, v): keyfunc(k), reverse=(not ascending)))
+
+        if numPartitions == 1:
+            if self.getNumPartitions() > 1:
+                self = self.coalesce(1)
+            return self.mapPartitions(sortPartition, True)
 
         # first compute the boundary of each part via sampling: we want to partition
         # the key-space into bins such that the bins have roughly the same
         # number of (key, value) pairs falling into them
-        if numPartitions > 1:
-            rddSize = self.count()
-            maxSampleSize = numPartitions * 20.0 # constant from Spark's RangePartitioner
-            fraction = min(maxSampleSize / max(rddSize, 1), 1.0)
-
-            samples = self.sample(False, fraction, 1).map(lambda (k, v): k).collect()
-            samples = sorted(samples, reverse=(not ascending), key=keyfunc)
-
-            # we have numPartitions many parts but one of the them has
-            # an implicit boundary
-            for i in range(0, numPartitions - 1):
-                index = (len(samples) - 1) * (i + 1) / numPartitions
-                bounds.append(samples[index])
-
-        def rangePartitionFunc(k):
-            p = 0
-            while p < len(bounds) and keyfunc(k) > bounds[p]:
-                p += 1
+        rddSize = self.count()
+        if not rddSize:
+            return self  # empty RDD
+        maxSampleSize = numPartitions * 20.0  # constant from Spark's RangePartitioner
+        fraction = min(maxSampleSize / max(rddSize, 1), 1.0)
+        samples = self.sample(False, fraction, 1).map(lambda (k, v): k).collect()
+        samples = sorted(samples, reverse=(not ascending), key=keyfunc)
+
+        # we have numPartitions many parts but one of the them has
+        # an implicit boundary
+        bounds = [samples[len(samples) * (i + 1) / numPartitions]
+                  for i in range(0, numPartitions - 1)]
+
+        def rangePartitioner(k):
+            p = bisect.bisect_left(bounds, keyfunc(k))
             if ascending:
                 return p
             else:
-                return numPartitions-1-p
-
-        def mapFunc(iterator):
-            yield sorted(iterator, reverse=(not ascending), key=lambda (k, v): keyfunc(k))
+                return numPartitions - 1 - p
 
-        return (self.partitionBy(numPartitions, partitionFunc=rangePartitionFunc)
-                    .mapPartitions(mapFunc,preservesPartitioning=True)
-                    .flatMap(lambda x: x, preservesPartitioning=True))
+        return self.partitionBy(numPartitions, rangePartitioner).mapPartitions(sortPartition, True)
 
     def sortBy(self, keyfunc, ascending=True, numPartitions=None):
         """
@@ -570,7 +563,8 @@ def glom(self):
         >>> sorted(rdd.glom().collect())
         [[1, 2], [3, 4]]
         """
-        def func(iterator): yield list(iterator)
+        def func(iterator):
+            yield list(iterator)
         return self.mapPartitions(func)
 
     def cartesian(self, other):
@@ -607,7 +601,9 @@ def pipe(self, command, env={}):
         ['1', '2', '', '3']
         """
         def func(iterator):
-            pipe = Popen(shlex.split(command), env=env, stdin=PIPE, stdout=PIPE)
+            pipe = Popen(
+                shlex.split(command), env=env, stdin=PIPE, stdout=PIPE)
+
             def pipe_objs(out):
                 for obj in iterator:
                     out.write(str(obj).rstrip('\n') + '\n')
@@ -626,8 +622,8 @@ def foreach(self, f):
         def processPartition(iterator):
             for x in iterator:
                 f(x)
-            yield None
-        self.mapPartitions(processPartition).collect()  # Force evaluation
+            return iter([])
+        self.mapPartitions(processPartition).count()  # Force evaluation
 
     def foreachPartition(self, f):
         """
@@ -636,17 +632,22 @@ def foreachPartition(self, f):
         >>> def f(iterator):
         ...      for x in iterator:
         ...           print x
-        ...      yield None
         >>> sc.parallelize([1, 2, 3, 4, 5]).foreachPartition(f)
         """
-        self.mapPartitions(f).collect()  # Force evaluation
+        def func(it):
+            r = f(it)
+            try:
+                return iter(r)
+            except TypeError:
+                return iter([])
+        self.mapPartitions(func).count()  # Force evaluation
 
     def collect(self):
         """
         Return a list that contains all of the elements in this RDD.
         """
-        with _JavaStackTrace(self.context) as st:
-          bytesInJava = self._jrdd.collect().iterator()
+        with SCCallSiteSync(self.context) as css:
+            bytesInJava = self._jrdd.collect().iterator()
         return list(self._collect_iterator_through_file(bytesInJava))
 
     def _collect_iterator_through_file(self, iterator):
@@ -672,18 +673,23 @@ def reduce(self, f):
         15
         >>> sc.parallelize((2 for _ in range(10))).map(lambda x: 1).cache().reduce(add)
         10
+        >>> sc.parallelize([]).reduce(add)
+        Traceback (most recent call last):
+            ...
+        ValueError: Can not reduce() empty RDD
         """
         def func(iterator):
-            acc = None
-            for obj in iterator:
-                if acc is None:
-                    acc = obj
-                else:
-                    acc = f(obj, acc)
-            if acc is not None:
-                yield acc
+            iterator = iter(iterator)
+            try:
+                initial = next(iterator)
+            except StopIteration:
+                return
+            yield reduce(f, iterator, initial)
+
         vals = self.mapPartitions(func).collect()
-        return reduce(f, vals)
+        if vals:
+            return reduce(f, vals)
+        raise ValueError("Can not reduce() empty RDD")
 
     def fold(self, zeroValue, op):
         """
@@ -736,24 +742,37 @@ def func(iterator):
 
         return self.mapPartitions(func).fold(zeroValue, combOp)
 
-
-    def max(self):
+    def max(self, key=None):
         """
         Find the maximum item in this RDD.
 
-        >>> sc.parallelize([1.0, 5.0, 43.0, 10.0]).max()
+        :param key: A function used to generate key for comparing
+
+        >>> rdd = sc.parallelize([1.0, 5.0, 43.0, 10.0])
+        >>> rdd.max()
         43.0
+        >>> rdd.max(key=str)
+        5.0
         """
-        return self.reduce(max)
+        if key is None:
+            return self.reduce(max)
+        return self.reduce(lambda a, b: max(a, b, key=key))
 
-    def min(self):
+    def min(self, key=None):
         """
         Find the minimum item in this RDD.
 
-        >>> sc.parallelize([1.0, 5.0, 43.0, 10.0]).min()
-        1.0
+        :param key: A function used to generate key for comparing
+
+        >>> rdd = sc.parallelize([2.0, 5.0, 43.0, 10.0])
+        >>> rdd.min()
+        2.0
+        >>> rdd.min(key=str)
+        10.0
         """
-        return self.reduce(min)
+        if key is None:
+            return self.reduce(min)
+        return self.reduce(lambda a, b: min(a, b, key=key))
 
     def sum(self):
         """
@@ -783,6 +802,133 @@ def redFunc(left_counter, right_counter):
 
         return self.mapPartitions(lambda i: [StatCounter(i)]).reduce(redFunc)
 
+    def histogram(self, buckets):
+        """
+        Compute a histogram using the provided buckets. The buckets
+        are all open to the right except for the last which is closed.
+        e.g. [1,10,20,50] means the buckets are [1,10) [10,20) [20,50],
+        which means 1<=x<10, 10<=x<20, 20<=x<=50. And on the input of 1
+        and 50 we would have a histogram of 1,0,1.
+
+        If your histogram is evenly spaced (e.g. [0, 10, 20, 30]),
+        this can be switched from an O(log n) inseration to O(1) per
+        element(where n = # buckets).
+
+        Buckets must be sorted and not contain any duplicates, must be
+        at least two elements.
+
+        If `buckets` is a number, it will generates buckets which are
+        evenly spaced between the minimum and maximum of the RDD. For
+        example, if the min value is 0 and the max is 100, given buckets
+        as 2, the resulting buckets will be [0,50) [50,100]. buckets must
+        be at least 1 If the RDD contains infinity, NaN throws an exception
+        If the elements in RDD do not vary (max == min) always returns
+        a single bucket.
+
+        It will return an tuple of buckets and histogram.
+
+        >>> rdd = sc.parallelize(range(51))
+        >>> rdd.histogram(2)
+        ([0, 25, 50], [25, 26])
+        >>> rdd.histogram([0, 5, 25, 50])
+        ([0, 5, 25, 50], [5, 20, 26])
+        >>> rdd.histogram([0, 15, 30, 45, 60])  # evenly spaced buckets
+        ([0, 15, 30, 45, 60], [15, 15, 15, 6])
+        >>> rdd = sc.parallelize(["ab", "ac", "b", "bd", "ef"])
+        >>> rdd.histogram(("a", "b", "c"))
+        (('a', 'b', 'c'), [2, 2])
+        """
+
+        if isinstance(buckets, (int, long)):
+            if buckets < 1:
+                raise ValueError("number of buckets must be >= 1")
+
+            # filter out non-comparable elements
+            def comparable(x):
+                if x is None:
+                    return False
+                if type(x) is float and isnan(x):
+                    return False
+                return True
+
+            filtered = self.filter(comparable)
+
+            # faster than stats()
+            def minmax(a, b):
+                return min(a[0], b[0]), max(a[1], b[1])
+            try:
+                minv, maxv = filtered.map(lambda x: (x, x)).reduce(minmax)
+            except TypeError as e:
+                if " empty " in str(e):
+                    raise ValueError("can not generate buckets from empty RDD")
+                raise
+
+            if minv == maxv or buckets == 1:
+                return [minv, maxv], [filtered.count()]
+
+            try:
+                inc = (maxv - minv) / buckets
+            except TypeError:
+                raise TypeError("Can not generate buckets with non-number in RDD")
+
+            if isinf(inc):
+                raise ValueError("Can not generate buckets with infinite value")
+
+            # keep them as integer if possible
+            if inc * buckets != maxv - minv:
+                inc = (maxv - minv) * 1.0 / buckets
+
+            buckets = [i * inc + minv for i in range(buckets)]
+            buckets.append(maxv)  # fix accumulated error
+            even = True
+
+        elif isinstance(buckets, (list, tuple)):
+            if len(buckets) < 2:
+                raise ValueError("buckets should have more than one value")
+
+            if any(i is None or isinstance(i, float) and isnan(i) for i in buckets):
+                raise ValueError("can not have None or NaN in buckets")
+
+            if sorted(buckets) != list(buckets):
+                raise ValueError("buckets should be sorted")
+
+            if len(set(buckets)) != len(buckets):
+                raise ValueError("buckets should not contain duplicated values")
+
+            minv = buckets[0]
+            maxv = buckets[-1]
+            even = False
+            inc = None
+            try:
+                steps = [buckets[i + 1] - buckets[i] for i in range(len(buckets) - 1)]
+            except TypeError:
+                pass  # objects in buckets do not support '-'
+            else:
+                if max(steps) - min(steps) < 1e-10:  # handle precision errors
+                    even = True
+                    inc = (maxv - minv) / (len(buckets) - 1)
+
+        else:
+            raise TypeError("buckets should be a list or tuple or number(int or long)")
+
+        def histogram(iterator):
+            counters = [0] * len(buckets)
+            for i in iterator:
+                if i is None or (type(i) is float and isnan(i)) or i > maxv or i < minv:
+                    continue
+                t = (int((i - minv) / inc) if even
+                     else bisect.bisect_right(buckets, i) - 1)
+                counters[t] += 1
+            # add last two together
+            last = counters.pop()
+            counters[-1] += last
+            return [counters]
+
+        def mergeCounters(a, b):
+            return [i + j for i, j in zip(a, b)]
+
+        return buckets, self.mapPartitions(histogram).reduce(mergeCounters)
+
     def mean(self):
         """
         Compute the mean of this RDD's elements.
@@ -844,35 +990,33 @@ def countPartition(iterator):
             for obj in iterator:
                 counts[obj] += 1
             yield counts
+
         def mergeMaps(m1, m2):
-            for (k, v) in m2.iteritems():
+            for k, v in m2.iteritems():
                 m1[k] += v
             return m1
         return self.mapPartitions(countPartition).reduce(mergeMaps)
 
-    def top(self, num):
+    def top(self, num, key=None):
         """
         Get the top N elements from a RDD.
 
         Note: It returns the list sorted in descending order.
+
         >>> sc.parallelize([10, 4, 2, 12, 3]).top(1)
         [12]
         >>> sc.parallelize([2, 3, 4, 5, 6], 2).top(2)
         [6, 5]
+        >>> sc.parallelize([10, 4, 2, 12, 3]).top(3, key=str)
+        [4, 3, 2]
         """
         def topIterator(iterator):
-            q = []
-            for k in iterator:
-                if len(q) < num:
-                    heapq.heappush(q, k)
-                else:
-                    heapq.heappushpop(q, k)
-            yield q
+            yield heapq.nlargest(num, iterator, key=key)
 
         def merge(a, b):
-            return next(topIterator(a + b))
+            return heapq.nlargest(num, a + b, key=key)
 
-        return sorted(self.mapPartitions(topIterator).reduce(merge), reverse=True)
+        return self.mapPartitions(topIterator).reduce(merge)
 
     def takeOrdered(self, num, key=None):
         """
@@ -885,24 +1029,10 @@ def takeOrdered(self, num, key=None):
         [10, 9, 7, 6, 5, 4]
         """
 
-        def topNKeyedElems(iterator, key_=None):
-            q = MaxHeapQ(num)
-            for k in iterator:
-                if key_ != None:
-                    k = (key_(k), k)
-                q.insert(k)
-            yield q.getElements()
-
-        def unKey(x, key_=None):
-            if key_ != None:
-                x = [i[1] for i in x]
-            return x
-
         def merge(a, b):
-            return next(topNKeyedElems(a + b))
-        result = self.mapPartitions(lambda i: topNKeyedElems(i, key)).reduce(merge)
-        return sorted(unKey(result, key), key=key)
+            return heapq.nsmallest(num, a + b, key)
 
+        return self.mapPartitions(lambda it: [heapq.nsmallest(num, it, key)]).reduce(merge)
 
     def take(self, num):
         """
@@ -931,17 +1061,21 @@ def take(self, num):
             # we actually cap it at totalParts in runJob.
             numPartsToTry = 1
             if partsScanned > 0:
-                # If we didn't find any rows after the first iteration, just
-                # try all partitions next. Otherwise, interpolate the number
-                # of partitions we need to try, but overestimate it by 50%.
+                # If we didn't find any rows after the previous iteration,
+                # quadruple and retry.  Otherwise, interpolate the number of
+                # partitions we need to try, but overestimate it by 50%.
+                # We also cap the estimation in the end.
                 if len(items) == 0:
-                    numPartsToTry = totalParts - 1
+                    numPartsToTry = partsScanned * 4
                 else:
-                    numPartsToTry = int(1.5 * num * partsScanned / len(items))
+                    # the first paramter of max is >=1 whenever partsScanned >= 2
+                    numPartsToTry = int(1.5 * num * partsScanned / len(items)) - partsScanned
+                    numPartsToTry = min(max(numPartsToTry, 1), partsScanned * 4)
 
             left = num - len(items)
 
             def takeUpToNumLeft(iterator):
+                iterator = iter(iterator)
                 taken = 0
                 while taken < left:
                     yield next(iterator)
@@ -961,8 +1095,122 @@ def first(self):
 
         >>> sc.parallelize([2, 3, 4]).first()
         2
-        """
-        return self.take(1)[0]
+        >>> sc.parallelize([]).first()
+        Traceback (most recent call last):
+            ...
+        ValueError: RDD is empty
+        """
+        rs = self.take(1)
+        if rs:
+            return rs[0]
+        raise ValueError("RDD is empty")
+
+    def saveAsNewAPIHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
+        """
+        Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
+        system, using the new Hadoop OutputFormat API (mapreduce package). Keys/values are
+        converted for output using either user specified converters or, by default,
+        L{org.apache.spark.api.python.JavaToWritableConverter}.
+
+        :param conf: Hadoop job configuration, passed in as a dict
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        """
+        jconf = self.ctx._dictToJavaMap(conf)
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsHadoopDataset(pickledRDD._jrdd, True, jconf,
+                                                    keyConverter, valueConverter, True)
+
+    def saveAsNewAPIHadoopFile(self, path, outputFormatClass, keyClass=None, valueClass=None,
+                               keyConverter=None, valueConverter=None, conf=None):
+        """
+        Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
+        system, using the new Hadoop OutputFormat API (mapreduce package). Key and value types
+        will be inferred if not specified. Keys and values are converted for output using either
+        user specified converters or L{org.apache.spark.api.python.JavaToWritableConverter}. The
+        C{conf} is applied on top of the base Hadoop conf associated with the SparkContext
+        of this RDD to create a merged Hadoop MapReduce job configuration for saving the data.
+
+        :param path: path to Hadoop file
+        :param outputFormatClass: fully qualified classname of Hadoop OutputFormat
+               (e.g. "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat")
+        :param keyClass: fully qualified classname of key Writable class
+               (e.g. "org.apache.hadoop.io.IntWritable", None by default)
+        :param valueClass: fully qualified classname of value Writable class
+               (e.g. "org.apache.hadoop.io.Text", None by default)
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: Hadoop job configuration, passed in as a dict (None by default)
+        """
+        jconf = self.ctx._dictToJavaMap(conf)
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsNewAPIHadoopFile(pickledRDD._jrdd, True, path,
+                                                       outputFormatClass,
+                                                       keyClass, valueClass,
+                                                       keyConverter, valueConverter, jconf)
+
+    def saveAsHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
+        """
+        Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
+        system, using the old Hadoop OutputFormat API (mapred package). Keys/values are
+        converted for output using either user specified converters or, by default,
+        L{org.apache.spark.api.python.JavaToWritableConverter}.
+
+        :param conf: Hadoop job configuration, passed in as a dict
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        """
+        jconf = self.ctx._dictToJavaMap(conf)
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsHadoopDataset(pickledRDD._jrdd, True, jconf,
+                                                    keyConverter, valueConverter, False)
+
+    def saveAsHadoopFile(self, path, outputFormatClass, keyClass=None, valueClass=None,
+                         keyConverter=None, valueConverter=None, conf=None,
+                         compressionCodecClass=None):
+        """
+        Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
+        system, using the old Hadoop OutputFormat API (mapred package). Key and value types
+        will be inferred if not specified. Keys and values are converted for output using either
+        user specified converters or L{org.apache.spark.api.python.JavaToWritableConverter}. The
+        C{conf} is applied on top of the base Hadoop conf associated with the SparkContext
+        of this RDD to create a merged Hadoop MapReduce job configuration for saving the data.
+
+        :param path: path to Hadoop file
+        :param outputFormatClass: fully qualified classname of Hadoop OutputFormat
+               (e.g. "org.apache.hadoop.mapred.SequenceFileOutputFormat")
+        :param keyClass: fully qualified classname of key Writable class
+               (e.g. "org.apache.hadoop.io.IntWritable", None by default)
+        :param valueClass: fully qualified classname of value Writable class
+               (e.g. "org.apache.hadoop.io.Text", None by default)
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: (None by default)
+        :param compressionCodecClass: (None by default)
+        """
+        jconf = self.ctx._dictToJavaMap(conf)
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsHadoopFile(pickledRDD._jrdd, True, path,
+                                                 outputFormatClass,
+                                                 keyClass, valueClass,
+                                                 keyConverter, valueConverter,
+                                                 jconf, compressionCodecClass)
+
+    def saveAsSequenceFile(self, path, compressionCodecClass=None):
+        """
+        Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
+        system, using the L{org.apache.hadoop.io.Writable} types that we convert from the
+        RDD's key and value types. The mechanism is as follows:
+
+            1. Pyrolite is used to convert pickled Python RDD into RDD of Java objects.
+            2. Keys and values of this Java RDD are converted to Writables and written out.
+
+        :param path: path to sequence file
+        :param compressionCodecClass: (None by default)
+        """
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsSequenceFile(pickledRDD._jrdd, True,
+                                                   path, compressionCodecClass)
 
     def saveAsPickleFile(self, path, batchSize=10):
         """
@@ -976,8 +1224,11 @@ def saveAsPickleFile(self, path, batchSize=10):
         >>> sorted(sc.pickleFile(tmpFile.name, 5).collect())
         [1, 2, 'rdd', 'spark']
         """
-        self._reserialize(BatchedSerializer(PickleSerializer(),
-                                batchSize))._jrdd.saveAsObjectFile(path)
+        if batchSize == 0:
+            ser = AutoBatchedSerializer(PickleSerializer())
+        else:
+            ser = BatchedSerializer(PickleSerializer(), batchSize)
+        self._reserialize(ser)._jrdd.saveAsObjectFile(path)
 
     def saveAsTextFile(self, path):
         """
@@ -1003,8 +1254,10 @@ def func(split, iterator):
             for x in iterator:
                 if not isinstance(x, basestring):
                     x = unicode(x)
-                yield x.encode("utf-8")
-        keyed = PipelinedRDD(self, func)
+                if isinstance(x, unicode):
+                    x = x.encode("utf-8")
+                yield x
+        keyed = self.mapPartitionsWithIndex(func)
         keyed._bypass_serializer = True
         keyed._jrdd.map(self.ctx._jvm.BytesToString()).saveAsTextFile(path)
 
@@ -1025,6 +1278,7 @@ def collectAsMap(self):
     def keys(self):
         """
         Return an RDD with the keys of each tuple.
+
         >>> m = sc.parallelize([(1, 2), (3, 4)]).keys()
         >>> m.collect()
         [1, 3]
@@ -1034,6 +1288,7 @@ def keys(self):
     def values(self):
         """
         Return an RDD with the values of each tuple.
+
         >>> m = sc.parallelize([(1, 2), (3, 4)]).values()
         >>> m.collect()
         [2, 4]
@@ -1072,12 +1327,13 @@ def reduceByKeyLocally(self, func):
         """
         def reducePartition(iterator):
             m = {}
-            for (k, v) in iterator:
-                m[k] = v if k not in m else func(m[k], v)
+            for k, v in iterator:
+                m[k] = func(m[k], v) if k in m else v
             yield m
+
         def mergeMaps(m1, m2):
-            for (k, v) in m2.iteritems():
-                m1[k] = v if k not in m1 else func(m1[k], v)
+            for k, v in m2.iteritems():
+                m1[k] = func(m1[k], v) if k in m1 else v
             return m1
         return self.mapPartitions(reducePartition).reduce(mergeMaps)
 
@@ -1115,7 +1371,7 @@ def leftOuterJoin(self, other, numPartitions=None):
 
         For each element (k, v) in C{self}, the resulting RDD will either
         contain all pairs (k, (v, w)) for w in C{other}, or the pair
-        (k, (v, None)) if no elements in other have key k.
+        (k, (v, None)) if no elements in C{other} have key k.
 
         Hash-partitions the resulting RDD into the given number of partitions.
 
@@ -1143,8 +1399,31 @@ def rightOuterJoin(self, other, numPartitions=None):
         """
         return python_right_outer_join(self, other, numPartitions)
 
+    def fullOuterJoin(self, other, numPartitions=None):
+        """
+        Perform a right outer join of C{self} and C{other}.
+
+        For each element (k, v) in C{self}, the resulting RDD will either
+        contain all pairs (k, (v, w)) for w in C{other}, or the pair
+        (k, (v, None)) if no elements in C{other} have key k.
+
+        Similarly, for each element (k, w) in C{other}, the resulting RDD will
+        either contain all pairs (k, (v, w)) for v in C{self}, or the pair
+        (k, (None, w)) if no elements in C{self} have key k.
+
+        Hash-partitions the resulting RDD into the given number of partitions.
+
+        >>> x = sc.parallelize([("a", 1), ("b", 4)])
+        >>> y = sc.parallelize([("a", 2), ("c", 8)])
+        >>> sorted(x.fullOuterJoin(y).collect())
+        [('a', (1, 2)), ('b', (4, None)), ('c', (None, 8))]
+        """
+        return python_full_outer_join(self, other, numPartitions)
+
     # TODO: add option to control map-side combining
-    def partitionBy(self, numPartitions, partitionFunc=None):
+    # portable_hash is used as default, because builtin hash of None is different
+    # cross machines.
+    def partitionBy(self, numPartitions, partitionFunc=portable_hash):
         """
         Return a copy of the RDD partitioned using the specified partitioner.
 
@@ -1156,31 +1435,60 @@ def partitionBy(self, numPartitions, partitionFunc=None):
         if numPartitions is None:
             numPartitions = self._defaultReducePartitions()
 
-        if partitionFunc is None:
-            partitionFunc = lambda x: 0 if x is None else hash(x)
-        # Transferring O(n) objects to Java is too expensive.  Instead, we'll
-        # form the hash buckets in Python, transferring O(numPartitions) objects
-        # to Java.  Each object is a (splitNumber, [objects]) pair.
+        # Transferring O(n) objects to Java is too expensive.
+        # Instead, we'll form the hash buckets in Python,
+        # transferring O(numPartitions) objects to Java.
+        # Each object is a (splitNumber, [objects]) pair.
+        # In order to avoid too huge objects, the objects are
+        # grouped into chunks.
         outputSerializer = self.ctx._unbatched_serializer
+
+        limit = (_parse_memory(self.ctx._conf.get(
+            "spark.python.worker.memory", "512m")) / 2)
+
         def add_shuffle_key(split, iterator):
 
             buckets = defaultdict(list)
+            c, batch = 0, min(10 * numPartitions, 1000)
 
-            for (k, v) in iterator:
+            for k, v in iterator:
                 buckets[partitionFunc(k) % numPartitions].append((k, v))
-            for (split, items) in buckets.iteritems():
+                c += 1
+
+                # check used memory and avg size of chunk of objects
+                if (c % 1000 == 0 and get_used_memory() > limit
+                        or c > batch):
+                    n, size = len(buckets), 0
+                    for split in buckets.keys():
+                        yield pack_long(split)
+                        d = outputSerializer.dumps(buckets[split])
+                        del buckets[split]
+                        yield d
+                        size += len(d)
+
+                    avg = (size / n) >> 20
+                    # let 1M < avg < 10M
+                    if avg < 1:
+                        batch *= 1.5
+                    elif avg > 10:
+                        batch = max(batch / 1.5, 1)
+                    c = 0
+
+            for split, items in buckets.iteritems():
                 yield pack_long(split)
                 yield outputSerializer.dumps(items)
-        keyed = PipelinedRDD(self, add_shuffle_key)
+
+        keyed = self.mapPartitionsWithIndex(add_shuffle_key)
         keyed._bypass_serializer = True
-        with _JavaStackTrace(self.context) as st:
-            pairRDD = self.ctx._jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
+        with SCCallSiteSync(self.context) as css:
+            pairRDD = self.ctx._jvm.PairwiseRDD(
+                keyed._jrdd.rdd()).asJavaPairRDD()
             partitioner = self.ctx._jvm.PythonPartitioner(numPartitions,
                                                           id(partitionFunc))
         jrdd = pairRDD.partitionBy(partitioner).values()
         rdd = RDD(jrdd, self.ctx, BatchedSerializer(outputSerializer))
-        # This is required so that id(partitionFunc) remains unique, even if
-        # partitionFunc is a lambda:
+        # This is required so that id(partitionFunc) remains unique,
+        # even if partitionFunc is a lambda:
         rdd._partitionFunc = partitionFunc
         return rdd
 
@@ -1213,40 +1521,46 @@ def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
         """
         if numPartitions is None:
             numPartitions = self._defaultReducePartitions()
+
+        serializer = self.ctx.serializer
+        spill = (self.ctx._conf.get("spark.shuffle.spill", 'True').lower()
+                 == 'true')
+        memory = _parse_memory(self.ctx._conf.get(
+            "spark.python.worker.memory", "512m"))
+        agg = Aggregator(createCombiner, mergeValue, mergeCombiners)
+
         def combineLocally(iterator):
-            combiners = {}
-            for x in iterator:
-                (k, v) = x
-                if k not in combiners:
-                    combiners[k] = createCombiner(v)
-                else:
-                    combiners[k] = mergeValue(combiners[k], v)
-            return combiners.iteritems()
+            merger = ExternalMerger(agg, memory * 0.9, serializer) \
+                if spill else InMemoryMerger(agg)
+            merger.mergeValues(iterator)
+            return merger.iteritems()
+
         locally_combined = self.mapPartitions(combineLocally)
         shuffled = locally_combined.partitionBy(numPartitions)
+
         def _mergeCombiners(iterator):
-            combiners = {}
-            for (k, v) in iterator:
-                if not k in combiners:
-                    combiners[k] = v
-                else:
-                    combiners[k] = mergeCombiners(combiners[k], v)
-            return combiners.iteritems()
-        return shuffled.mapPartitions(_mergeCombiners)
+            merger = ExternalMerger(agg, memory, serializer) \
+                if spill else InMemoryMerger(agg)
+            merger.mergeCombiners(iterator)
+            return merger.iteritems()
+
+        return shuffled.mapPartitions(_mergeCombiners, True)
 
     def aggregateByKey(self, zeroValue, seqFunc, combFunc, numPartitions=None):
         """
-        Aggregate the values of each key, using given combine functions and a neutral "zero value".
-        This function can return a different result type, U, than the type of the values in this RDD,
-        V. Thus, we need one operation for merging a V into a U and one operation for merging two U's,
-        The former operation is used for merging values within a partition, and the latter is used
-        for merging values between partitions. To avoid memory allocation, both of these functions are
+        Aggregate the values of each key, using given combine functions and a neutral
+        "zero value". This function can return a different result type, U, than the type
+        of the values in this RDD, V. Thus, we need one operation for merging a V into
+        a U and one operation for merging two U's, The former operation is used for merging
+        values within a partition, and the latter is used for merging values between
+        partitions. To avoid memory allocation, both of these functions are
         allowed to modify and return their first argument instead of creating a new U.
         """
         def createZero():
-          return copy.deepcopy(zeroValue)
+            return copy.deepcopy(zeroValue)
 
-        return self.combineByKey(lambda v: seqFunc(createZero(), v), seqFunc, combFunc, numPartitions)
+        return self.combineByKey(
+            lambda v: seqFunc(createZero(), v), seqFunc, combFunc, numPartitions)
 
     def foldByKey(self, zeroValue, func, numPartitions=None):
         """
@@ -1261,11 +1575,10 @@ def foldByKey(self, zeroValue, func, numPartitions=None):
         [('a', 2), ('b', 1)]
         """
         def createZero():
-          return copy.deepcopy(zeroValue)
+            return copy.deepcopy(zeroValue)
 
         return self.combineByKey(lambda v: func(createZero(), v), func, func, numPartitions)
 
-
     # TODO: support variant with custom partitioner
     def groupByKey(self, numPartitions=None):
         """
@@ -1289,12 +1602,12 @@ def mergeValue(xs, x):
             return xs
 
         def mergeCombiners(a, b):
-            return a + b
+            a.extend(b)
+            return a
 
         return self.combineByKey(createCombiner, mergeValue, mergeCombiners,
-                numPartitions).mapValues(lambda x: ResultIterable(x))
+                                 numPartitions).mapValues(lambda x: ResultIterable(x))
 
-    # TODO: add tests
     def flatMapValues(self, f):
         """
         Pass each value in the key-value pair RDD through a flatMap function
@@ -1352,6 +1665,27 @@ def cogroup(self, other, numPartitions=None):
         """
         return python_cogroup((self, other), numPartitions)
 
+    def sampleByKey(self, withReplacement, fractions, seed=None):
+        """
+        Return a subset of this RDD sampled by key (via stratified sampling).
+        Create a sample of this RDD using variable sampling rates for
+        different keys as specified by fractions, a key to sampling rate map.
+
+        >>> fractions = {"a": 0.2, "b": 0.1}
+        >>> rdd = sc.parallelize(fractions.keys()).cartesian(sc.parallelize(range(0, 1000)))
+        >>> sample = dict(rdd.sampleByKey(False, fractions, 2).groupByKey().collect())
+        >>> 100 < len(sample["a"]) < 300 and 50 < len(sample["b"]) < 150
+        True
+        >>> max(sample["a"]) <= 999 and min(sample["a"]) >= 0
+        True
+        >>> max(sample["b"]) <= 999 and min(sample["b"]) >= 0
+        True
+        """
+        for fraction in fractions.values():
+            assert fraction >= 0.0, "Negative fraction value: %s" % fraction
+        return self.mapPartitionsWithIndex(
+            RDDStratifiedSampler(withReplacement, fractions, seed).func, True)
+
     def subtractByKey(self, other, numPartitions=None):
         """
         Return each (key, value) pair in C{self} that has no pair with matching
@@ -1362,9 +1696,9 @@ def subtractByKey(self, other, numPartitions=None):
         >>> sorted(x.subtractByKey(y).collect())
         [('b', 4), ('b', 5)]
         """
-        filter_func = lambda (key, vals): len(vals[0]) > 0 and len(vals[1]) == 0
-        map_func = lambda (key, vals): [(key, val) for val in vals[0]]
-        return self.cogroup(other, numPartitions).filter(filter_func).flatMap(map_func)
+        def filter_func((key, vals)):
+            return vals[0] and not vals[1]
+        return self.cogroup(other, numPartitions).filter(filter_func).flatMapValues(lambda x: x[0])
 
     def subtract(self, other, numPartitions=None):
         """
@@ -1375,8 +1709,9 @@ def subtract(self, other, numPartitions=None):
         >>> sorted(x.subtract(y).collect())
         [('a', 1), ('b', 4), ('b', 5)]
         """
-        rdd = other.map(lambda x: (x, True)) # note: here 'True' is just a placeholder
-        return self.map(lambda x: (x, True)).subtractByKey(rdd).map(lambda tpl: tpl[0]) # note: here 'True' is just a placeholder
+        # note: here 'True' is just a placeholder
+        rdd = other.map(lambda x: (x, True))
+        return self.map(lambda x: (x, True)).subtractByKey(rdd, numPartitions).keys()
 
     def keyBy(self, f):
         """
@@ -1397,6 +1732,7 @@ def repartition(self, numPartitions):
          Internally, this uses a shuffle to redistribute data.
          If you are decreasing the number of partitions in this RDD, consider
          using `coalesce`, which can avoid performing a shuffle.
+
          >>> rdd = sc.parallelize([1,2,3,4,5,6,7], 4)
          >>> sorted(rdd.glom().collect())
          [[1], [2, 3], [4, 5], [6, 7]]
@@ -1411,6 +1747,7 @@ def repartition(self, numPartitions):
     def coalesce(self, numPartitions, shuffle=False):
         """
         Return a new RDD that is reduced into `numPartitions` partitions.
+
         >>> sc.parallelize([1, 2, 3, 4, 5], 3).glom().collect()
         [[1], [2, 3], [4, 5]]
         >>> sc.parallelize([1, 2, 3, 4, 5], 3).coalesce(1).glom().collect()
@@ -1432,38 +1769,110 @@ def zip(self, other):
         >>> x.zip(y).collect()
         [(0, 1000), (1, 1001), (2, 1002), (3, 1003), (4, 1004)]
         """
+        def get_batch_size(ser):
+            if isinstance(ser, BatchedSerializer):
+                return ser.batchSize
+            return 1
+
+        def batch_as(rdd, batchSize):
+            ser = rdd._jrdd_deserializer
+            if isinstance(ser, BatchedSerializer):
+                ser = ser.serializer
+            return rdd._reserialize(BatchedSerializer(ser, batchSize))
+
+        my_batch = get_batch_size(self._jrdd_deserializer)
+        other_batch = get_batch_size(other._jrdd_deserializer)
+        # use the smallest batchSize for both of them
+        batchSize = min(my_batch, other_batch)
+        if batchSize <= 0:
+            # auto batched or unlimited
+            batchSize = 100
+        other = batch_as(other, batchSize)
+        self = batch_as(self, batchSize)
+
+        if self.getNumPartitions() != other.getNumPartitions():
+            raise ValueError("Can only zip with RDD which has the same number of partitions")
+
+        # There will be an Exception in JVM if there are different number
+        # of items in each partitions.
         pairRDD = self._jrdd.zip(other._jrdd)
         deserializer = PairDeserializer(self._jrdd_deserializer,
-                                             other._jrdd_deserializer)
+                                        other._jrdd_deserializer)
         return RDD(pairRDD, self.ctx, deserializer)
 
+    def zipWithIndex(self):
+        """
+        Zips this RDD with its element indices.
+
+        The ordering is first based on the partition index and then the
+        ordering of items within each partition. So the first item in
+        the first partition gets index 0, and the last item in the last
+        partition receives the largest index.
+
+        This method needs to trigger a spark job when this RDD contains
+        more than one partitions.
+
+        >>> sc.parallelize(["a", "b", "c", "d"], 3).zipWithIndex().collect()
+        [('a', 0), ('b', 1), ('c', 2), ('d', 3)]
+        """
+        starts = [0]
+        if self.getNumPartitions() > 1:
+            nums = self.mapPartitions(lambda it: [sum(1 for i in it)]).collect()
+            for i in range(len(nums) - 1):
+                starts.append(starts[-1] + nums[i])
+
+        def func(k, it):
+            for i, v in enumerate(it, starts[k]):
+                yield v, i
+
+        return self.mapPartitionsWithIndex(func)
+
+    def zipWithUniqueId(self):
+        """
+        Zips this RDD with generated unique Long ids.
+
+        Items in the kth partition will get ids k, n+k, 2*n+k, ..., where
+        n is the number of partitions. So there may exist gaps, but this
+        method won't trigger a spark job, which is different from
+        L{zipWithIndex}
+
+        >>> sc.parallelize(["a", "b", "c", "d", "e"], 3).zipWithUniqueId().collect()
+        [('a', 0), ('b', 1), ('c', 4), ('d', 2), ('e', 5)]
+        """
+        n = self.getNumPartitions()
+
+        def func(k, it):
+            for i, v in enumerate(it):
+                yield v, i * n + k
+
+        return self.mapPartitionsWithIndex(func)
+
     def name(self):
         """
         Return the name of this RDD.
         """
         name_ = self._jrdd.name()
-        if not name_:
-            return None
-        return name_.encode('utf-8')
+        if name_:
+            return name_.encode('utf-8')
 
     def setName(self, name):
         """
         Assign a name to this RDD.
+
         >>> rdd1 = sc.parallelize([1,2])
-        >>> rdd1.setName('RDD1')
-        >>> rdd1.name()
+        >>> rdd1.setName('RDD1').name()
         'RDD1'
         """
         self._jrdd.setName(name)
+        return self
 
     def toDebugString(self):
         """
         A description of this RDD and its recursive dependencies for debugging.
         """
         debug_string = self._jrdd.toDebugString()
-        if not debug_string:
-            return None
-        return debug_string.encode('utf-8')
+        if debug_string:
+            return debug_string.encode('utf-8')
 
     def getStorageLevel(self):
         """
@@ -1498,14 +1907,118 @@ def _defaultReducePartitions(self):
         else:
             return self.getNumPartitions()
 
-    # TODO: `lookup` is disabled because we can't make direct comparisons based
-    # on the key; we need to compare the hash of the key to the hash of the
-    # keys in the pairs.  This could be an expensive operation, since those
-    # hashes aren't retained.
+    def lookup(self, key):
+        """
+        Return the list of values in the RDD for key `key`. This operation
+        is done efficiently if the RDD has a known partitioner by only
+        searching the partition that the key maps to.
+
+        >>> l = range(1000)
+        >>> rdd = sc.parallelize(zip(l, l), 10)
+        >>> rdd.lookup(42)  # slow
+        [42]
+        >>> sorted = rdd.sortByKey()
+        >>> sorted.lookup(42)  # fast
+        [42]
+        >>> sorted.lookup(1024)
+        []
+        """
+        values = self.filter(lambda (k, v): k == key).values()
+
+        if self._partitionFunc is not None:
+            return self.ctx.runJob(values, lambda x: x, [self._partitionFunc(key)], False)
+
+        return values.collect()
+
+    def _to_java_object_rdd(self):
+        """ Return an JavaRDD of Object by unpickling
+
+        It will convert each Python object into Java object by Pyrolite, whenever the
+        RDD is serialized in batch or not.
+        """
+        rdd = self._pickled()
+        return self.ctx._jvm.SerDeUtil.pythonToJava(rdd._jrdd, True)
+
+    def countApprox(self, timeout, confidence=0.95):
+        """
+        :: Experimental ::
+        Approximate version of count() that returns a potentially incomplete
+        result within a timeout, even if not all tasks have finished.
+
+        >>> rdd = sc.parallelize(range(1000), 10)
+        >>> rdd.countApprox(1000, 1.0)
+        1000
+        """
+        drdd = self.mapPartitions(lambda it: [float(sum(1 for i in it))])
+        return int(drdd.sumApprox(timeout, confidence))
+
+    def sumApprox(self, timeout, confidence=0.95):
+        """
+        :: Experimental ::
+        Approximate operation to return the sum within a timeout
+        or meet the confidence.
+
+        >>> rdd = sc.parallelize(range(1000), 10)
+        >>> r = sum(xrange(1000))
+        >>> (rdd.sumApprox(1000) - r) / r < 0.05
+        True
+        """
+        jrdd = self.mapPartitions(lambda it: [float(sum(it))])._to_java_object_rdd()
+        jdrdd = self.ctx._jvm.JavaDoubleRDD.fromRDD(jrdd.rdd())
+        r = jdrdd.sumApprox(timeout, confidence).getFinalValue()
+        return BoundedFloat(r.mean(), r.confidence(), r.low(), r.high())
+
+    def meanApprox(self, timeout, confidence=0.95):
+        """
+        :: Experimental ::
+        Approximate operation to return the mean within a timeout
+        or meet the confidence.
+
+        >>> rdd = sc.parallelize(range(1000), 10)
+        >>> r = sum(xrange(1000)) / 1000.0
+        >>> (rdd.meanApprox(1000) - r) / r < 0.05
+        True
+        """
+        jrdd = self.map(float)._to_java_object_rdd()
+        jdrdd = self.ctx._jvm.JavaDoubleRDD.fromRDD(jrdd.rdd())
+        r = jdrdd.meanApprox(timeout, confidence).getFinalValue()
+        return BoundedFloat(r.mean(), r.confidence(), r.low(), r.high())
+
+    def countApproxDistinct(self, relativeSD=0.05):
+        """
+        :: Experimental ::
+        Return approximate number of distinct elements in the RDD.
+
+        The algorithm used is based on streamlib's implementation of
+        "HyperLogLog in Practice: Algorithmic Engineering of a State
+        of The Art Cardinality Estimation Algorithm", available
+        <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+
+        :param relativeSD: Relative accuracy. Smaller values create
+                           counters that require more space.
+                           It must be greater than 0.000017.
+
+        >>> n = sc.parallelize(range(1000)).map(str).countApproxDistinct()
+        >>> 950 < n < 1050
+        True
+        >>> n = sc.parallelize([i % 20 for i in range(1000)]).countApproxDistinct()
+        >>> 18 < n < 22
+        True
+        """
+        if relativeSD < 0.000017:
+            raise ValueError("relativeSD should be greater than 0.000017")
+        if relativeSD > 0.37:
+            raise ValueError("relativeSD should be smaller than 0.37")
+        # the hash space in Java is 2^32
+        hashRDD = self.map(lambda x: portable_hash(x) & 0xFFFFFFFF)
+        return hashRDD._to_java_object_rdd().countApproxDistinct(relativeSD)
+
 
 class PipelinedRDD(RDD):
+
     """
     Pipelined maps:
+
     >>> rdd = sc.parallelize([1, 2, 3, 4])
     >>> rdd.map(lambda x: 2 * x).cache().map(lambda x: 2 * x).collect()
     [4, 8, 12, 16]
@@ -1519,6 +2032,7 @@ class PipelinedRDD(RDD):
     >>> rdd.flatMap(lambda x: [x, x]).reduce(add)
     20
     """
+
     def __init__(self, prev, func, preservesPartitioning=False):
         if not isinstance(prev, PipelinedRDD) or not prev._is_pipelinable():
             # This transformation is the first in its stage:
@@ -1528,6 +2042,7 @@ def __init__(self, prev, func, preservesPartitioning=False):
             self._prev_jrdd_deserializer = prev._jrdd_deserializer
         else:
             prev_func = prev.func
+
             def pipeline_func(split, iterator):
                 return func(split, prev_func(split, iterator))
             self.func = pipeline_func
@@ -1540,8 +2055,16 @@ def pipeline_func(split, iterator):
         self.ctx = prev.ctx
         self.prev = prev
         self._jrdd_val = None
+        self._id = None
         self._jrdd_deserializer = self.ctx.serializer
         self._bypass_serializer = False
+        self._partitionFunc = prev._partitionFunc if self.preservesPartitioning else None
+        self._broadcast = None
+
+    def __del__(self):
+        if self._broadcast:
+            self._broadcast.unpersist()
+            self._broadcast = None
 
     @property
     def _jrdd(self):
@@ -1549,25 +2072,41 @@ def _jrdd(self):
             return self._jrdd_val
         if self._bypass_serializer:
             self._jrdd_deserializer = NoOpSerializer()
-        command = (self.func, self._prev_jrdd_deserializer,
+        enable_profile = self.ctx._conf.get("spark.python.profile", "false") == "true"
+        profileStats = self.ctx.accumulator(None, PStatsParam) if enable_profile else None
+        command = (self.func, profileStats, self._prev_jrdd_deserializer,
                    self._jrdd_deserializer)
-        pickled_command = CloudPickleSerializer().dumps(command)
+        # the serialized command will be compressed by broadcast
+        ser = CloudPickleSerializer()
+        pickled_command = ser.dumps(command)
+        if len(pickled_command) > (1 << 20):  # 1M
+            self._broadcast = self.ctx.broadcast(pickled_command)
+            pickled_command = ser.dumps(self._broadcast)
         broadcast_vars = ListConverter().convert(
             [x._jbroadcast for x in self.ctx._pickled_broadcast_vars],
             self.ctx._gateway._gateway_client)
         self.ctx._pickled_broadcast_vars.clear()
-        class_tag = self._prev_jrdd.classTag()
         env = MapConverter().convert(self.ctx.environment,
                                      self.ctx._gateway._gateway_client)
         includes = ListConverter().convert(self.ctx._python_includes,
-                                     self.ctx._gateway._gateway_client)
+                                           self.ctx._gateway._gateway_client)
         python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
-            bytearray(pickled_command), env, includes, self.preservesPartitioning,
-            self.ctx.pythonExec, broadcast_vars, self.ctx._javaAccumulator,
-            class_tag)
+                                             bytearray(pickled_command),
+                                             env, includes, self.preservesPartitioning,
+                                             self.ctx.pythonExec,
+                                             broadcast_vars, self.ctx._javaAccumulator)
         self._jrdd_val = python_rdd.asJavaRDD()
+
+        if enable_profile:
+            self._id = self._jrdd_val.id()
+            self.ctx._add_profile(self._id, profileStats)
         return self._jrdd_val
 
+    def id(self):
+        if self._id is None:
+            self._id = self._jrdd.id()
+        return self._id
+
     def _is_pipelinable(self):
         return not (self.is_cached or self.is_checkpointed)
 
@@ -1578,8 +2117,9 @@ def _test():
     globs = globals().copy()
     # The small batch size here ensures that we see multiple batches,
     # even in these small test examples:
-    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
-    (failure_count, test_count) = doctest.testmod(globs=globs,optionflags=doctest.ELLIPSIS)
+    globs['sc'] = SparkContext('local[4]', 'PythonTest')
+    (failure_count, test_count) = doctest.testmod(
+        globs=globs, optionflags=doctest.ELLIPSIS)
     globs['sc'].stop()
     if failure_count:
         exit(-1)
diff --git a/python/pyspark/rddsampler.py b/python/pyspark/rddsampler.py
index 845a267e311c5..f5c3cfd259a5b 100644
--- a/python/pyspark/rddsampler.py
+++ b/python/pyspark/rddsampler.py
@@ -18,18 +18,21 @@
 import sys
 import random
 
-class RDDSampler(object):
-    def __init__(self, withReplacement, fraction, seed=None):
+
+class RDDSamplerBase(object):
+
+    def __init__(self, withReplacement, seed=None):
         try:
             import numpy
             self._use_numpy = True
         except ImportError:
-            print >> sys.stderr, "NumPy does not appear to be installed. Falling back to default random generator for sampling."
+            print >> sys.stderr, (
+                "NumPy does not appear to be installed. "
+                "Falling back to default random generator for sampling.")
             self._use_numpy = False
 
-        self._seed = seed if seed is not None else random.randint(0, sys.maxint)
+        self._seed = seed if seed is not None else random.randint(0, 2 ** 32 - 1)
         self._withReplacement = withReplacement
-        self._fraction = fraction
         self._random = None
         self._split = None
         self._rand_initialized = False
@@ -37,14 +40,13 @@ def __init__(self, withReplacement, fraction, seed=None):
     def initRandomGenerator(self, split):
         if self._use_numpy:
             import numpy
-            self._random = numpy.random.RandomState(self._seed)
+            self._random = numpy.random.RandomState(self._seed ^ split)
         else:
-            self._random = random.Random(self._seed)
+            self._random = random.Random(self._seed ^ split)
 
-        for _ in range(0, split):
-            # discard the next few values in the sequence to have a
-            # different seed for the different splits
-            self._random.randint(0, sys.maxint)
+        # mixing because the initial seeds are close to each other
+        for _ in xrange(10):
+            self._random.randint(0, 1)
 
         self._split = split
         self._rand_initialized = True
@@ -61,7 +63,7 @@ def getUniformSample(self, split):
     def getPoissonSample(self, split, mean):
         if not self._rand_initialized or split != self._split:
             self.initRandomGenerator(split)
-        
+
         if self._use_numpy:
             return self._random.poisson(mean)
         else:
@@ -80,23 +82,31 @@ def getPoissonSample(self, split, mean):
                 num_arrivals += 1
 
             return (num_arrivals - 1)
-    
+
     def shuffle(self, vals):
-        if self._random == None:
+        if self._random is None:
             self.initRandomGenerator(0)  # this should only ever called on the master so
             # the split does not matter
-        
+
         if self._use_numpy:
             self._random.shuffle(vals)
         else:
             self._random.shuffle(vals, self._random.random)
 
+
+class RDDSampler(RDDSamplerBase):
+
+    def __init__(self, withReplacement, fraction, seed=None):
+        RDDSamplerBase.__init__(self, withReplacement, seed)
+        self._fraction = fraction
+
     def func(self, split, iterator):
-        if self._withReplacement:            
+        if self._withReplacement:
             for obj in iterator:
-                # For large datasets, the expected number of occurrences of each element in a sample with
-                # replacement is Poisson(frac). We use that to get a count for each element.                                   
-                count = self.getPoissonSample(split, mean = self._fraction)
+                # For large datasets, the expected number of occurrences of each element in
+                # a sample with replacement is Poisson(frac). We use that to get a count for
+                # each element.
+                count = self.getPoissonSample(split, mean=self._fraction)
                 for _ in range(0, count):
                     yield obj
         else:
@@ -104,6 +114,23 @@ def func(self, split, iterator):
                 if self.getUniformSample(split) <= self._fraction:
                     yield obj
 
-            
-            
-            
+
+class RDDStratifiedSampler(RDDSamplerBase):
+
+    def __init__(self, withReplacement, fractions, seed=None):
+        RDDSamplerBase.__init__(self, withReplacement, seed)
+        self._fractions = fractions
+
+    def func(self, split, iterator):
+        if self._withReplacement:
+            for key, val in iterator:
+                # For large datasets, the expected number of occurrences of each element in
+                # a sample with replacement is Poisson(frac). We use that to get a count for
+                # each element.
+                count = self.getPoissonSample(split, mean=self._fractions[key])
+                for _ in range(0, count):
+                    yield key, val
+        else:
+            for key, val in iterator:
+                if self.getUniformSample(split) <= self._fractions[key]:
+                    yield key, val
diff --git a/python/pyspark/resultiterable.py b/python/pyspark/resultiterable.py
index 7f418f8d2e29a..ef04c82866e6c 100644
--- a/python/pyspark/resultiterable.py
+++ b/python/pyspark/resultiterable.py
@@ -19,15 +19,20 @@
 
 import collections
 
+
 class ResultIterable(collections.Iterable):
+
     """
     A special result iterable. This is used because the standard iterator can not be pickled
     """
+
     def __init__(self, data):
         self.data = data
         self.index = 0
         self.maxindex = len(data)
+
     def __iter__(self):
         return iter(self.data)
+
     def __len__(self):
         return len(self.data)
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index b253807974a2e..d597cbf94e1b1 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -33,9 +33,8 @@
 [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
 >>> sc.stop()
 
-By default, PySpark serialize objects in batches; the batch size can be
-controlled through SparkContext's C{batchSize} parameter
-(the default size is 1024 objects):
+PySpark serialize objects in batches; By default, the batch size is chosen based
+on the size of objects, also configurable by SparkContext's C{batchSize} parameter:
 
 >>> sc = SparkContext('local', 'test', batchSize=2)
 >>> rdd = sc.parallelize(range(16), 4).map(lambda x: x)
@@ -48,16 +47,6 @@
 >>> rdd._jrdd.count()
 8L
 >>> sc.stop()
-
-A batch size of -1 uses an unlimited batch size, and a size of 1 disables
-batching:
-
->>> sc = SparkContext('local', 'test', batchSize=1)
->>> rdd = sc.parallelize(range(16), 4).map(lambda x: x)
->>> rdd.glom().collect()
-[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]
->>> rdd._jrdd.count()
-16L
 """
 
 import cPickle
@@ -65,16 +54,22 @@
 import marshal
 import struct
 import sys
+import types
+import collections
+import zlib
+import itertools
+
 from pyspark import cloudpickle
 
 
-__all__ = ["PickleSerializer", "MarshalSerializer"]
+__all__ = ["PickleSerializer", "MarshalSerializer", "UTF8Deserializer"]
 
 
 class SpecialLengths(object):
     END_OF_DATA_SECTION = -1
     PYTHON_EXCEPTION_THROWN = -2
     TIMING_DATA = -3
+    END_OF_STREAM = -4
 
 
 class Serializer(object):
@@ -91,7 +86,6 @@ def load_stream(self, stream):
         """
         raise NotImplementedError
 
-
     def _load_stream_without_unbatching(self, stream):
         return self.load_stream(stream)
 
@@ -107,8 +101,15 @@ def __eq__(self, other):
     def __ne__(self, other):
         return not self.__eq__(other)
 
+    def __repr__(self):
+        return "%s()" % self.__class__.__name__
+
+    def __hash__(self):
+        return hash(str(self))
+
 
 class FramedSerializer(Serializer):
+
     """
     Serializer that writes objects as a stream of (length, data) pairs,
     where C{length} is a 32-bit integer and data is C{length} bytes.
@@ -140,6 +141,8 @@ def _write_with_length(self, obj, stream):
 
     def _read_with_length(self, stream):
         length = read_int(stream)
+        if length == SpecialLengths.END_OF_DATA_SECTION:
+            raise EOFError
         obj = stream.read(length)
         if obj == "":
             raise EOFError
@@ -160,12 +163,14 @@ def loads(self, obj):
 
 
 class BatchedSerializer(Serializer):
+
     """
     Serializes a stream of objects in batches by calling its wrapped
     Serializer with streams of objects.
     """
 
     UNLIMITED_BATCH_SIZE = -1
+    UNKNOWN_BATCH_SIZE = 0
 
     def __init__(self, serializer, batchSize=UNLIMITED_BATCH_SIZE):
         self.serializer = serializer
@@ -194,17 +199,53 @@ def load_stream(self, stream):
         return chain.from_iterable(self._load_stream_without_unbatching(stream))
 
     def _load_stream_without_unbatching(self, stream):
-            return self.serializer.load_stream(stream)
+        return self.serializer.load_stream(stream)
+
+    def __eq__(self, other):
+        return (isinstance(other, BatchedSerializer) and
+                other.serializer == self.serializer and other.batchSize == self.batchSize)
+
+    def __repr__(self):
+        return "BatchedSerializer(%s, %d)" % (str(self.serializer), self.batchSize)
+
+
+class AutoBatchedSerializer(BatchedSerializer):
+    """
+    Choose the size of batch automatically based on the size of object
+    """
+
+    def __init__(self, serializer, bestSize=1 << 16):
+        BatchedSerializer.__init__(self, serializer, self.UNKNOWN_BATCH_SIZE)
+        self.bestSize = bestSize
+
+    def dump_stream(self, iterator, stream):
+        batch, best = 1, self.bestSize
+        iterator = iter(iterator)
+        while True:
+            vs = list(itertools.islice(iterator, batch))
+            if not vs:
+                break
+
+            bytes = self.serializer.dumps(vs)
+            write_int(len(bytes), stream)
+            stream.write(bytes)
+
+            size = len(bytes)
+            if size < best:
+                batch *= 2
+            elif size > best * 10 and batch > 1:
+                batch /= 2
 
     def __eq__(self, other):
-        return isinstance(other, BatchedSerializer) and \
-               other.serializer == self.serializer
+        return (isinstance(other, AutoBatchedSerializer) and
+                other.serializer == self.serializer and other.bestSize == self.bestSize)
 
     def __str__(self):
-        return "BatchedSerializer<%s>" % str(self.serializer)
+        return "AutoBatchedSerializer(%s)" % str(self.serializer)
 
 
 class CartesianDeserializer(FramedSerializer):
+
     """
     Deserializes the JavaRDD cartesian() of two PythonRDDs.
     """
@@ -229,15 +270,16 @@ def load_stream(self, stream):
                 yield pair
 
     def __eq__(self, other):
-        return isinstance(other, CartesianDeserializer) and \
-               self.key_ser == other.key_ser and self.val_ser == other.val_ser
+        return (isinstance(other, CartesianDeserializer) and
+                self.key_ser == other.key_ser and self.val_ser == other.val_ser)
 
-    def __str__(self):
-        return "CartesianDeserializer<%s, %s>" % \
+    def __repr__(self):
+        return "CartesianDeserializer(%s, %s)" % \
                (str(self.key_ser), str(self.val_ser))
 
 
 class PairDeserializer(CartesianDeserializer):
+
     """
     Deserializes the JavaRDD zip() of two PythonRDDs.
     """
@@ -248,25 +290,94 @@ def __init__(self, key_ser, val_ser):
 
     def load_stream(self, stream):
         for (keys, vals) in self.prepare_keys_values(stream):
+            if len(keys) != len(vals):
+                raise ValueError("Can not deserialize RDD with different number of items"
+                                 " in pair: (%d, %d)" % (len(keys), len(vals)))
             for pair in izip(keys, vals):
                 yield pair
 
     def __eq__(self, other):
-        return isinstance(other, PairDeserializer) and \
-               self.key_ser == other.key_ser and self.val_ser == other.val_ser
+        return (isinstance(other, PairDeserializer) and
+                self.key_ser == other.key_ser and self.val_ser == other.val_ser)
 
-    def __str__(self):
-        return "PairDeserializer<%s, %s>" % \
-               (str(self.key_ser), str(self.val_ser))
+    def __repr__(self):
+        return "PairDeserializer(%s, %s)" % (str(self.key_ser), str(self.val_ser))
 
 
 class NoOpSerializer(FramedSerializer):
 
-    def loads(self, obj): return obj
-    def dumps(self, obj): return obj
+    def loads(self, obj):
+        return obj
+
+    def dumps(self, obj):
+        return obj
+
+
+# Hook namedtuple, make it picklable
+
+__cls = {}
+
+
+def _restore(name, fields, value):
+    """ Restore an object of namedtuple"""
+    k = (name, fields)
+    cls = __cls.get(k)
+    if cls is None:
+        cls = collections.namedtuple(name, fields)
+        __cls[k] = cls
+    return cls(*value)
+
+
+def _hack_namedtuple(cls):
+    """ Make class generated by namedtuple picklable """
+    name = cls.__name__
+    fields = cls._fields
+
+    def __reduce__(self):
+        return (_restore, (name, fields, tuple(self)))
+    cls.__reduce__ = __reduce__
+    return cls
+
+
+def _hijack_namedtuple():
+    """ Hack namedtuple() to make it picklable """
+    # hijack only one time
+    if hasattr(collections.namedtuple, "__hijack"):
+        return
+
+    global _old_namedtuple  # or it will put in closure
+
+    def _copy_func(f):
+        return types.FunctionType(f.func_code, f.func_globals, f.func_name,
+                                  f.func_defaults, f.func_closure)
+
+    _old_namedtuple = _copy_func(collections.namedtuple)
+
+    def namedtuple(*args, **kwargs):
+        cls = _old_namedtuple(*args, **kwargs)
+        return _hack_namedtuple(cls)
+
+    # replace namedtuple with new one
+    collections.namedtuple.func_globals["_old_namedtuple"] = _old_namedtuple
+    collections.namedtuple.func_globals["_hack_namedtuple"] = _hack_namedtuple
+    collections.namedtuple.func_code = namedtuple.func_code
+    collections.namedtuple.__hijack = 1
+
+    # hack the cls already generated by namedtuple
+    # those created in other module can be pickled as normal,
+    # so only hack those in __main__ module
+    for n, o in sys.modules["__main__"].__dict__.iteritems():
+        if (type(o) is type and o.__base__ is tuple
+                and hasattr(o, "_fields")
+                and "__reduce__" not in o.__dict__):
+            _hack_namedtuple(o)  # hack inplace
+
+
+_hijack_namedtuple()
 
 
 class PickleSerializer(FramedSerializer):
+
     """
     Serializes objects using Python's cPickle serializer:
 
@@ -276,15 +387,21 @@ class PickleSerializer(FramedSerializer):
     not be as fast as more specialized serializers.
     """
 
-    def dumps(self, obj): return cPickle.dumps(obj, 2)
-    loads = cPickle.loads
+    def dumps(self, obj):
+        return cPickle.dumps(obj, 2)
+
+    def loads(self, obj):
+        return cPickle.loads(obj)
+
 
 class CloudPickleSerializer(PickleSerializer):
 
-    def dumps(self, obj): return cloudpickle.dumps(obj, 2)
+    def dumps(self, obj):
+        return cloudpickle.dumps(obj, 2)
 
 
 class MarshalSerializer(FramedSerializer):
+
     """
     Serializes objects using Python's Marshal serializer:
 
@@ -293,27 +410,82 @@ class MarshalSerializer(FramedSerializer):
     This serializer is faster than PickleSerializer but supports fewer datatypes.
     """
 
-    dumps = marshal.dumps
-    loads = marshal.loads
+    def dumps(self, obj):
+        return marshal.dumps(obj)
+
+    def loads(self, obj):
+        return marshal.loads(obj)
+
+
+class AutoSerializer(FramedSerializer):
+
+    """
+    Choose marshal or cPickle as serialization protocol automatically
+    """
+
+    def __init__(self):
+        FramedSerializer.__init__(self)
+        self._type = None
+
+    def dumps(self, obj):
+        if self._type is not None:
+            return 'P' + cPickle.dumps(obj, -1)
+        try:
+            return 'M' + marshal.dumps(obj)
+        except Exception:
+            self._type = 'P'
+            return 'P' + cPickle.dumps(obj, -1)
+
+    def loads(self, obj):
+        _type = obj[0]
+        if _type == 'M':
+            return marshal.loads(obj[1:])
+        elif _type == 'P':
+            return cPickle.loads(obj[1:])
+        else:
+            raise ValueError("invalid sevialization type: %s" % _type)
+
+
+class CompressedSerializer(FramedSerializer):
+    """
+    Compress the serialized data
+    """
+
+    def __init__(self, serializer):
+        FramedSerializer.__init__(self)
+        self.serializer = serializer
+
+    def dumps(self, obj):
+        return zlib.compress(self.serializer.dumps(obj), 1)
+
+    def loads(self, obj):
+        return self.serializer.loads(zlib.decompress(obj))
 
 
 class UTF8Deserializer(Serializer):
+
     """
     Deserializes streams written by String.getBytes.
     """
 
+    def __init__(self, use_unicode=False):
+        self.use_unicode = use_unicode
+
     def loads(self, stream):
         length = read_int(stream)
-        return stream.read(length).decode('utf8')
+        if length == SpecialLengths.END_OF_DATA_SECTION:
+            raise EOFError
+        s = stream.read(length)
+        return s.decode("utf-8") if self.use_unicode else s
 
     def load_stream(self, stream):
-        while True:
-            try:
+        try:
+            while True:
                 yield self.loads(stream)
-            except struct.error:
-                return
-            except EOFError:
-                return
+        except struct.error:
+            return
+        except EOFError:
+            return
 
 
 def read_long(stream):
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index ebd714db7a918..89cf76920e353 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -28,6 +28,7 @@
     sys.exit(1)
 
 
+import atexit
 import os
 import platform
 import pyspark
@@ -35,27 +36,29 @@
 from pyspark.storagelevel import StorageLevel
 
 # this is the equivalent of ADD_JARS
-add_files = os.environ.get("ADD_FILES").split(',') if os.environ.get("ADD_FILES") != None else None
+add_files = (os.environ.get("ADD_FILES").split(',')
+             if os.environ.get("ADD_FILES") is not None else None)
 
 if os.environ.get("SPARK_EXECUTOR_URI"):
     SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])
 
 sc = SparkContext(appName="PySparkShell", pyFiles=add_files)
+atexit.register(lambda: sc.stop())
 
 print("""Welcome to
       ____              __
      / __/__  ___ _____/ /__
     _\ \/ _ \/ _ `/ __/  '_/
-   /__ / .__/\_,_/_/ /_/\_\   version 1.0.0-SNAPSHOT
+   /__ / .__/\_,_/_/ /_/\_\   version %s
       /_/
-""")
+""" % sc.version)
 print("Using Python version %s (%s, %s)" % (
     platform.python_version(),
     platform.python_build()[0],
     platform.python_build()[1]))
 print("SparkContext available as sc.")
 
-if add_files != None:
+if add_files is not None:
     print("Adding files: [%s]" % ", ".join(add_files))
 
 # The ./bin/pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP,
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
new file mode 100644
index 0000000000000..5931e923c2e36
--- /dev/null
+++ b/python/pyspark/shuffle.py
@@ -0,0 +1,525 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import platform
+import shutil
+import warnings
+import gc
+import itertools
+import random
+
+import pyspark.heapq3 as heapq
+from pyspark.serializers import AutoBatchedSerializer, PickleSerializer
+
+try:
+    import psutil
+
+    def get_used_memory():
+        """ Return the used memory in MB """
+        process = psutil.Process(os.getpid())
+        if hasattr(process, "memory_info"):
+            info = process.memory_info()
+        else:
+            info = process.get_memory_info()
+        return info.rss >> 20
+except ImportError:
+
+    def get_used_memory():
+        """ Return the used memory in MB """
+        if platform.system() == 'Linux':
+            for line in open('/proc/self/status'):
+                if line.startswith('VmRSS:'):
+                    return int(line.split()[1]) >> 10
+        else:
+            warnings.warn("Please install psutil to have better "
+                          "support with spilling")
+            if platform.system() == "Darwin":
+                import resource
+                rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+                return rss >> 20
+            # TODO: support windows
+        return 0
+
+
+def _get_local_dirs(sub):
+    """ Get all the directories """
+    path = os.environ.get("SPARK_LOCAL_DIRS", "/tmp")
+    dirs = path.split(",")
+    if len(dirs) > 1:
+        # different order in different processes and instances
+        rnd = random.Random(os.getpid() + id(dirs))
+        random.shuffle(dirs, rnd.random)
+    return [os.path.join(d, "python", str(os.getpid()), sub) for d in dirs]
+
+
+# global stats
+MemoryBytesSpilled = 0L
+DiskBytesSpilled = 0L
+
+
+class Aggregator(object):
+
+    """
+    Aggregator has tree functions to merge values into combiner.
+
+    createCombiner:  (value) -> combiner
+    mergeValue:      (combine, value) -> combiner
+    mergeCombiners:  (combiner, combiner) -> combiner
+    """
+
+    def __init__(self, createCombiner, mergeValue, mergeCombiners):
+        self.createCombiner = createCombiner
+        self.mergeValue = mergeValue
+        self.mergeCombiners = mergeCombiners
+
+
+class SimpleAggregator(Aggregator):
+
+    """
+    SimpleAggregator is useful for the cases that combiners have
+    same type with values
+    """
+
+    def __init__(self, combiner):
+        Aggregator.__init__(self, lambda x: x, combiner, combiner)
+
+
+class Merger(object):
+
+    """
+    Merge shuffled data together by aggregator
+    """
+
+    def __init__(self, aggregator):
+        self.agg = aggregator
+
+    def mergeValues(self, iterator):
+        """ Combine the items by creator and combiner """
+        raise NotImplementedError
+
+    def mergeCombiners(self, iterator):
+        """ Merge the combined items by mergeCombiner """
+        raise NotImplementedError
+
+    def iteritems(self):
+        """ Return the merged items ad iterator """
+        raise NotImplementedError
+
+
+class InMemoryMerger(Merger):
+
+    """
+    In memory merger based on in-memory dict.
+    """
+
+    def __init__(self, aggregator):
+        Merger.__init__(self, aggregator)
+        self.data = {}
+
+    def mergeValues(self, iterator):
+        """ Combine the items by creator and combiner """
+        # speed up attributes lookup
+        d, creator = self.data, self.agg.createCombiner
+        comb = self.agg.mergeValue
+        for k, v in iterator:
+            d[k] = comb(d[k], v) if k in d else creator(v)
+
+    def mergeCombiners(self, iterator):
+        """ Merge the combined items by mergeCombiner """
+        # speed up attributes lookup
+        d, comb = self.data, self.agg.mergeCombiners
+        for k, v in iterator:
+            d[k] = comb(d[k], v) if k in d else v
+
+    def iteritems(self):
+        """ Return the merged items ad iterator """
+        return self.data.iteritems()
+
+
+class ExternalMerger(Merger):
+
+    """
+    External merger will dump the aggregated data into disks when
+    memory usage goes above the limit, then merge them together.
+
+    This class works as follows:
+
+    - It repeatedly combine the items and save them in one dict in
+      memory.
+
+    - When the used memory goes above memory limit, it will split
+      the combined data into partitions by hash code, dump them
+      into disk, one file per partition.
+
+    - Then it goes through the rest of the iterator, combine items
+      into different dict by hash. Until the used memory goes over
+      memory limit, it dump all the dicts into disks, one file per
+      dict. Repeat this again until combine all the items.
+
+    - Before return any items, it will load each partition and
+      combine them seperately. Yield them before loading next
+      partition.
+
+    - During loading a partition, if the memory goes over limit,
+      it will partition the loaded data and dump them into disks
+      and load them partition by partition again.
+
+    `data` and `pdata` are used to hold the merged items in memory.
+    At first, all the data are merged into `data`. Once the used
+    memory goes over limit, the items in `data` are dumped indo
+    disks, `data` will be cleared, all rest of items will be merged
+    into `pdata` and then dumped into disks. Before returning, all
+    the items in `pdata` will be dumped into disks.
+
+    Finally, if any items were spilled into disks, each partition
+    will be merged into `data` and be yielded, then cleared.
+
+    >>> agg = SimpleAggregator(lambda x, y: x + y)
+    >>> merger = ExternalMerger(agg, 10)
+    >>> N = 10000
+    >>> merger.mergeValues(zip(xrange(N), xrange(N)) * 10)
+    >>> assert merger.spills > 0
+    >>> sum(v for k,v in merger.iteritems())
+    499950000
+
+    >>> merger = ExternalMerger(agg, 10)
+    >>> merger.mergeCombiners(zip(xrange(N), xrange(N)) * 10)
+    >>> assert merger.spills > 0
+    >>> sum(v for k,v in merger.iteritems())
+    499950000
+    """
+
+    # the max total partitions created recursively
+    MAX_TOTAL_PARTITIONS = 4096
+
+    def __init__(self, aggregator, memory_limit=512, serializer=None,
+                 localdirs=None, scale=1, partitions=59, batch=1000):
+        Merger.__init__(self, aggregator)
+        self.memory_limit = memory_limit
+        # default serializer is only used for tests
+        self.serializer = serializer or AutoBatchedSerializer(PickleSerializer())
+        self.localdirs = localdirs or _get_local_dirs(str(id(self)))
+        # number of partitions when spill data into disks
+        self.partitions = partitions
+        # check the memory after # of items merged
+        self.batch = batch
+        # scale is used to scale down the hash of key for recursive hash map
+        self.scale = scale
+        # unpartitioned merged data
+        self.data = {}
+        # partitioned merged data, list of dicts
+        self.pdata = []
+        # number of chunks dumped into disks
+        self.spills = 0
+        # randomize the hash of key, id(o) is the address of o (aligned by 8)
+        self._seed = id(self) + 7
+
+    def _get_spill_dir(self, n):
+        """ Choose one directory for spill by number n """
+        return os.path.join(self.localdirs[n % len(self.localdirs)], str(n))
+
+    def _next_limit(self):
+        """
+        Return the next memory limit. If the memory is not released
+        after spilling, it will dump the data only when the used memory
+        starts to increase.
+        """
+        return max(self.memory_limit, get_used_memory() * 1.05)
+
+    def mergeValues(self, iterator):
+        """ Combine the items by creator and combiner """
+        iterator = iter(iterator)
+        # speedup attribute lookup
+        creator, comb = self.agg.createCombiner, self.agg.mergeValue
+        d, c, batch = self.data, 0, self.batch
+
+        for k, v in iterator:
+            d[k] = comb(d[k], v) if k in d else creator(v)
+
+            c += 1
+            if c % batch == 0 and get_used_memory() > self.memory_limit:
+                self._spill()
+                self._partitioned_mergeValues(iterator, self._next_limit())
+                break
+
+    def _partition(self, key):
+        """ Return the partition for key """
+        return hash((key, self._seed)) % self.partitions
+
+    def _partitioned_mergeValues(self, iterator, limit=0):
+        """ Partition the items by key, then combine them """
+        # speedup attribute lookup
+        creator, comb = self.agg.createCombiner, self.agg.mergeValue
+        c, pdata, hfun, batch = 0, self.pdata, self._partition, self.batch
+
+        for k, v in iterator:
+            d = pdata[hfun(k)]
+            d[k] = comb(d[k], v) if k in d else creator(v)
+            if not limit:
+                continue
+
+            c += 1
+            if c % batch == 0 and get_used_memory() > limit:
+                self._spill()
+                limit = self._next_limit()
+
+    def mergeCombiners(self, iterator, check=True):
+        """ Merge (K,V) pair by mergeCombiner """
+        iterator = iter(iterator)
+        # speedup attribute lookup
+        d, comb, batch = self.data, self.agg.mergeCombiners, self.batch
+        c = 0
+        for k, v in iterator:
+            d[k] = comb(d[k], v) if k in d else v
+            if not check:
+                continue
+
+            c += 1
+            if c % batch == 0 and get_used_memory() > self.memory_limit:
+                self._spill()
+                self._partitioned_mergeCombiners(iterator, self._next_limit())
+                break
+
+    def _partitioned_mergeCombiners(self, iterator, limit=0):
+        """ Partition the items by key, then merge them """
+        comb, pdata = self.agg.mergeCombiners, self.pdata
+        c, hfun = 0, self._partition
+        for k, v in iterator:
+            d = pdata[hfun(k)]
+            d[k] = comb(d[k], v) if k in d else v
+            if not limit:
+                continue
+
+            c += 1
+            if c % self.batch == 0 and get_used_memory() > limit:
+                self._spill()
+                limit = self._next_limit()
+
+    def _spill(self):
+        """
+        dump already partitioned data into disks.
+
+        It will dump the data in batch for better performance.
+        """
+        global MemoryBytesSpilled, DiskBytesSpilled
+        path = self._get_spill_dir(self.spills)
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        used_memory = get_used_memory()
+        if not self.pdata:
+            # The data has not been partitioned, it will iterator the
+            # dataset once, write them into different files, has no
+            # additional memory. It only called when the memory goes
+            # above limit at the first time.
+
+            # open all the files for writing
+            streams = [open(os.path.join(path, str(i)), 'w')
+                       for i in range(self.partitions)]
+
+            for k, v in self.data.iteritems():
+                h = self._partition(k)
+                # put one item in batch, make it compatitable with load_stream
+                # it will increase the memory if dump them in batch
+                self.serializer.dump_stream([(k, v)], streams[h])
+
+            for s in streams:
+                DiskBytesSpilled += s.tell()
+                s.close()
+
+            self.data.clear()
+            self.pdata = [{} for i in range(self.partitions)]
+
+        else:
+            for i in range(self.partitions):
+                p = os.path.join(path, str(i))
+                with open(p, "w") as f:
+                    # dump items in batch
+                    self.serializer.dump_stream(self.pdata[i].iteritems(), f)
+                self.pdata[i].clear()
+                DiskBytesSpilled += os.path.getsize(p)
+
+        self.spills += 1
+        gc.collect()  # release the memory as much as possible
+        MemoryBytesSpilled += (used_memory - get_used_memory()) << 20
+
+    def iteritems(self):
+        """ Return all merged items as iterator """
+        if not self.pdata and not self.spills:
+            return self.data.iteritems()
+        return self._external_items()
+
+    def _external_items(self):
+        """ Return all partitioned items as iterator """
+        assert not self.data
+        if any(self.pdata):
+            self._spill()
+        hard_limit = self._next_limit()
+
+        try:
+            for i in range(self.partitions):
+                self.data = {}
+                for j in range(self.spills):
+                    path = self._get_spill_dir(j)
+                    p = os.path.join(path, str(i))
+                    # do not check memory during merging
+                    self.mergeCombiners(self.serializer.load_stream(open(p)),
+                                        False)
+
+                    # limit the total partitions
+                    if (self.scale * self.partitions < self.MAX_TOTAL_PARTITIONS
+                            and j < self.spills - 1
+                            and get_used_memory() > hard_limit):
+                        self.data.clear()  # will read from disk again
+                        gc.collect()  # release the memory as much as possible
+                        for v in self._recursive_merged_items(i):
+                            yield v
+                        return
+
+                for v in self.data.iteritems():
+                    yield v
+                self.data.clear()
+
+                # remove the merged partition
+                for j in range(self.spills):
+                    path = self._get_spill_dir(j)
+                    os.remove(os.path.join(path, str(i)))
+
+        finally:
+            self._cleanup()
+
+    def _cleanup(self):
+        """ Clean up all the files in disks """
+        for d in self.localdirs:
+            shutil.rmtree(d, True)
+
+    def _recursive_merged_items(self, start):
+        """
+        merge the partitioned items and return the as iterator
+
+        If one partition can not be fit in memory, then them will be
+        partitioned and merged recursively.
+        """
+        # make sure all the data are dumps into disks.
+        assert not self.data
+        if any(self.pdata):
+            self._spill()
+        assert self.spills > 0
+
+        for i in range(start, self.partitions):
+            subdirs = [os.path.join(d, "parts", str(i))
+                       for d in self.localdirs]
+            m = ExternalMerger(self.agg, self.memory_limit, self.serializer,
+                               subdirs, self.scale * self.partitions, self.partitions)
+            m.pdata = [{} for _ in range(self.partitions)]
+            limit = self._next_limit()
+
+            for j in range(self.spills):
+                path = self._get_spill_dir(j)
+                p = os.path.join(path, str(i))
+                m._partitioned_mergeCombiners(
+                    self.serializer.load_stream(open(p)))
+
+                if get_used_memory() > limit:
+                    m._spill()
+                    limit = self._next_limit()
+
+            for v in m._external_items():
+                yield v
+
+            # remove the merged partition
+            for j in range(self.spills):
+                path = self._get_spill_dir(j)
+                os.remove(os.path.join(path, str(i)))
+
+
+class ExternalSorter(object):
+    """
+    ExtenalSorter will divide the elements into chunks, sort them in
+    memory and dump them into disks, finally merge them back.
+
+    The spilling will only happen when the used memory goes above
+    the limit.
+
+    >>> sorter = ExternalSorter(1)  # 1M
+    >>> import random
+    >>> l = range(1024)
+    >>> random.shuffle(l)
+    >>> sorted(l) == list(sorter.sorted(l))
+    True
+    >>> sorted(l) == list(sorter.sorted(l, key=lambda x: -x, reverse=True))
+    True
+    """
+    def __init__(self, memory_limit, serializer=None):
+        self.memory_limit = memory_limit
+        self.local_dirs = _get_local_dirs("sort")
+        self.serializer = serializer or AutoBatchedSerializer(PickleSerializer())
+
+    def _get_path(self, n):
+        """ Choose one directory for spill by number n """
+        d = self.local_dirs[n % len(self.local_dirs)]
+        if not os.path.exists(d):
+            os.makedirs(d)
+        return os.path.join(d, str(n))
+
+    def sorted(self, iterator, key=None, reverse=False):
+        """
+        Sort the elements in iterator, do external sort when the memory
+        goes above the limit.
+        """
+        global MemoryBytesSpilled, DiskBytesSpilled
+        batch = 100
+        chunks, current_chunk = [], []
+        iterator = iter(iterator)
+        while True:
+            # pick elements in batch
+            chunk = list(itertools.islice(iterator, batch))
+            current_chunk.extend(chunk)
+            if len(chunk) < batch:
+                break
+
+            used_memory = get_used_memory()
+            if used_memory > self.memory_limit:
+                # sort them inplace will save memory
+                current_chunk.sort(key=key, reverse=reverse)
+                path = self._get_path(len(chunks))
+                with open(path, 'w') as f:
+                    self.serializer.dump_stream(current_chunk, f)
+                chunks.append(self.serializer.load_stream(open(path)))
+                current_chunk = []
+                gc.collect()
+                MemoryBytesSpilled += (used_memory - get_used_memory()) << 20
+                DiskBytesSpilled += os.path.getsize(path)
+
+            elif not chunks:
+                batch = min(batch * 2, 10000)
+
+        current_chunk.sort(key=key, reverse=reverse)
+        if not chunks:
+            return current_chunk
+
+        if current_chunk:
+            chunks.append(iter(current_chunk))
+
+        return heapq.merge(chunks, key=key, reverse=reverse)
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index ffe177576f363..e5d62a466cab6 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -15,31 +15,1202 @@
 # limitations under the License.
 #
 
-from pyspark.rdd import RDD, PipelinedRDD
-from pyspark.serializers import BatchedSerializer, PickleSerializer
+"""
+public classes of Spark SQL:
+
+    - L{SQLContext}
+      Main entry point for SQL functionality.
+    - L{SchemaRDD}
+      A Resilient Distributed Dataset (RDD) with Schema information for the data contained. In
+      addition to normal RDD operations, SchemaRDDs also support SQL.
+    - L{Row}
+      A Row of data returned by a Spark SQL query.
+    - L{HiveContext}
+      Main entry point for accessing data stored in Apache Hive..
+"""
+
+import itertools
+import decimal
+import datetime
+import keyword
+import warnings
+import json
+import re
+from array import array
+from operator import itemgetter
+from itertools import imap
 
 from py4j.protocol import Py4JError
+from py4j.java_collections import ListConverter, MapConverter
 
-__all__ = ["SQLContext", "HiveContext", "LocalHiveContext", "TestHiveContext", "SchemaRDD", "Row"]
+from pyspark.rdd import RDD
+from pyspark.serializers import BatchedSerializer, AutoBatchedSerializer, PickleSerializer, \
+    CloudPickleSerializer
+from pyspark.storagelevel import StorageLevel
+from pyspark.traceback_utils import SCCallSiteSync
 
 
-class SQLContext:
-    """Main entry point for SparkSQL functionality.
+__all__ = [
+    "StringType", "BinaryType", "BooleanType", "DateType", "TimestampType", "DecimalType",
+    "DoubleType", "FloatType", "ByteType", "IntegerType", "LongType",
+    "ShortType", "ArrayType", "MapType", "StructField", "StructType",
+    "SQLContext", "HiveContext", "SchemaRDD", "Row"]
 
-    A SQLContext can be used create L{SchemaRDD}s, register L{SchemaRDD}s as
+
+class DataType(object):
+
+    """Spark SQL DataType"""
+
+    def __repr__(self):
+        return self.__class__.__name__
+
+    def __hash__(self):
+        return hash(str(self))
+
+    def __eq__(self, other):
+        return (isinstance(other, self.__class__) and
+                self.__dict__ == other.__dict__)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    @classmethod
+    def typeName(cls):
+        return cls.__name__[:-4].lower()
+
+    def jsonValue(self):
+        return self.typeName()
+
+    def json(self):
+        return json.dumps(self.jsonValue(),
+                          separators=(',', ':'),
+                          sort_keys=True)
+
+
+class PrimitiveTypeSingleton(type):
+
+    """Metaclass for PrimitiveType"""
+
+    _instances = {}
+
+    def __call__(cls):
+        if cls not in cls._instances:
+            cls._instances[cls] = super(PrimitiveTypeSingleton, cls).__call__()
+        return cls._instances[cls]
+
+
+class PrimitiveType(DataType):
+
+    """Spark SQL PrimitiveType"""
+
+    __metaclass__ = PrimitiveTypeSingleton
+
+    def __eq__(self, other):
+        # because they should be the same object
+        return self is other
+
+
+class NullType(PrimitiveType):
+
+    """Spark SQL NullType
+
+    The data type representing None, used for the types which has not
+    been inferred.
+    """
+
+
+class StringType(PrimitiveType):
+
+    """Spark SQL StringType
+
+    The data type representing string values.
+    """
+
+
+class BinaryType(PrimitiveType):
+
+    """Spark SQL BinaryType
+
+    The data type representing bytearray values.
+    """
+
+
+class BooleanType(PrimitiveType):
+
+    """Spark SQL BooleanType
+
+    The data type representing bool values.
+    """
+
+
+class DateType(PrimitiveType):
+
+    """Spark SQL DateType
+
+    The data type representing datetime.date values.
+    """
+
+
+class TimestampType(PrimitiveType):
+
+    """Spark SQL TimestampType
+
+    The data type representing datetime.datetime values.
+    """
+
+
+class DecimalType(DataType):
+
+    """Spark SQL DecimalType
+
+    The data type representing decimal.Decimal values.
+    """
+
+    def __init__(self, precision=None, scale=None):
+        self.precision = precision
+        self.scale = scale
+        self.hasPrecisionInfo = precision is not None
+
+    def jsonValue(self):
+        if self.hasPrecisionInfo:
+            return "decimal(%d,%d)" % (self.precision, self.scale)
+        else:
+            return "decimal"
+
+    def __repr__(self):
+        if self.hasPrecisionInfo:
+            return "DecimalType(%d,%d)" % (self.precision, self.scale)
+        else:
+            return "DecimalType()"
+
+
+class DoubleType(PrimitiveType):
+
+    """Spark SQL DoubleType
+
+    The data type representing float values.
+    """
+
+
+class FloatType(PrimitiveType):
+
+    """Spark SQL FloatType
+
+    The data type representing single precision floating-point values.
+    """
+
+
+class ByteType(PrimitiveType):
+
+    """Spark SQL ByteType
+
+    The data type representing int values with 1 singed byte.
+    """
+
+
+class IntegerType(PrimitiveType):
+
+    """Spark SQL IntegerType
+
+    The data type representing int values.
+    """
+
+
+class LongType(PrimitiveType):
+
+    """Spark SQL LongType
+
+    The data type representing long values. If the any value is
+    beyond the range of [-9223372036854775808, 9223372036854775807],
+    please use DecimalType.
+    """
+
+
+class ShortType(PrimitiveType):
+
+    """Spark SQL ShortType
+
+    The data type representing int values with 2 signed bytes.
+    """
+
+
+class ArrayType(DataType):
+
+    """Spark SQL ArrayType
+
+    The data type representing list values. An ArrayType object
+    comprises two fields, elementType (a DataType) and containsNull (a bool).
+    The field of elementType is used to specify the type of array elements.
+    The field of containsNull is used to specify if the array has None values.
+
+    """
+
+    def __init__(self, elementType, containsNull=True):
+        """Creates an ArrayType
+
+        :param elementType: the data type of elements.
+        :param containsNull: indicates whether the list contains None values.
+
+        >>> ArrayType(StringType) == ArrayType(StringType, True)
+        True
+        >>> ArrayType(StringType, False) == ArrayType(StringType)
+        False
+        """
+        self.elementType = elementType
+        self.containsNull = containsNull
+
+    def __repr__(self):
+        return "ArrayType(%s,%s)" % (self.elementType,
+                                     str(self.containsNull).lower())
+
+    def jsonValue(self):
+        return {"type": self.typeName(),
+                "elementType": self.elementType.jsonValue(),
+                "containsNull": self.containsNull}
+
+    @classmethod
+    def fromJson(cls, json):
+        return ArrayType(_parse_datatype_json_value(json["elementType"]),
+                         json["containsNull"])
+
+
+class MapType(DataType):
+
+    """Spark SQL MapType
+
+    The data type representing dict values. A MapType object comprises
+    three fields, keyType (a DataType), valueType (a DataType) and
+    valueContainsNull (a bool).
+
+    The field of keyType is used to specify the type of keys in the map.
+    The field of valueType is used to specify the type of values in the map.
+    The field of valueContainsNull is used to specify if values of this
+    map has None values.
+
+    For values of a MapType column, keys are not allowed to have None values.
+
+    """
+
+    def __init__(self, keyType, valueType, valueContainsNull=True):
+        """Creates a MapType
+        :param keyType: the data type of keys.
+        :param valueType: the data type of values.
+        :param valueContainsNull: indicates whether values contains
+        null values.
+
+        >>> (MapType(StringType, IntegerType)
+        ...        == MapType(StringType, IntegerType, True))
+        True
+        >>> (MapType(StringType, IntegerType, False)
+        ...        == MapType(StringType, FloatType))
+        False
+        """
+        self.keyType = keyType
+        self.valueType = valueType
+        self.valueContainsNull = valueContainsNull
+
+    def __repr__(self):
+        return "MapType(%s,%s,%s)" % (self.keyType, self.valueType,
+                                      str(self.valueContainsNull).lower())
+
+    def jsonValue(self):
+        return {"type": self.typeName(),
+                "keyType": self.keyType.jsonValue(),
+                "valueType": self.valueType.jsonValue(),
+                "valueContainsNull": self.valueContainsNull}
+
+    @classmethod
+    def fromJson(cls, json):
+        return MapType(_parse_datatype_json_value(json["keyType"]),
+                       _parse_datatype_json_value(json["valueType"]),
+                       json["valueContainsNull"])
+
+
+class StructField(DataType):
+
+    """Spark SQL StructField
+
+    Represents a field in a StructType.
+    A StructField object comprises three fields, name (a string),
+    dataType (a DataType) and nullable (a bool). The field of name
+    is the name of a StructField. The field of dataType specifies
+    the data type of a StructField.
+
+    The field of nullable specifies if values of a StructField can
+    contain None values.
+
+    """
+
+    def __init__(self, name, dataType, nullable=True, metadata=None):
+        """Creates a StructField
+        :param name: the name of this field.
+        :param dataType: the data type of this field.
+        :param nullable: indicates whether values of this field
+                         can be null.
+        :param metadata: metadata of this field, which is a map from string
+                         to simple type that can be serialized to JSON
+                         automatically
+
+        >>> (StructField("f1", StringType, True)
+        ...      == StructField("f1", StringType, True))
+        True
+        >>> (StructField("f1", StringType, True)
+        ...      == StructField("f2", StringType, True))
+        False
+        """
+        self.name = name
+        self.dataType = dataType
+        self.nullable = nullable
+        self.metadata = metadata or {}
+
+    def __repr__(self):
+        return "StructField(%s,%s,%s)" % (self.name, self.dataType,
+                                          str(self.nullable).lower())
+
+    def jsonValue(self):
+        return {"name": self.name,
+                "type": self.dataType.jsonValue(),
+                "nullable": self.nullable,
+                "metadata": self.metadata}
+
+    @classmethod
+    def fromJson(cls, json):
+        return StructField(json["name"],
+                           _parse_datatype_json_value(json["type"]),
+                           json["nullable"],
+                           json["metadata"])
+
+
+class StructType(DataType):
+
+    """Spark SQL StructType
+
+    The data type representing rows.
+    A StructType object comprises a list of L{StructField}.
+
+    """
+
+    def __init__(self, fields):
+        """Creates a StructType
+
+        >>> struct1 = StructType([StructField("f1", StringType, True)])
+        >>> struct2 = StructType([StructField("f1", StringType, True)])
+        >>> struct1 == struct2
+        True
+        >>> struct1 = StructType([StructField("f1", StringType, True)])
+        >>> struct2 = StructType([StructField("f1", StringType, True),
+        ...   [StructField("f2", IntegerType, False)]])
+        >>> struct1 == struct2
+        False
+        """
+        self.fields = fields
+
+    def __repr__(self):
+        return ("StructType(List(%s))" %
+                ",".join(str(field) for field in self.fields))
+
+    def jsonValue(self):
+        return {"type": self.typeName(),
+                "fields": [f.jsonValue() for f in self.fields]}
+
+    @classmethod
+    def fromJson(cls, json):
+        return StructType([StructField.fromJson(f) for f in json["fields"]])
+
+
+class UserDefinedType(DataType):
+    """
+    :: WARN: Spark Internal Use Only ::
+    SQL User-Defined Type (UDT).
+    """
+
+    @classmethod
+    def typeName(cls):
+        return cls.__name__.lower()
+
+    @classmethod
+    def sqlType(cls):
+        """
+        Underlying SQL storage type for this UDT.
+        """
+        raise NotImplementedError("UDT must implement sqlType().")
+
+    @classmethod
+    def module(cls):
+        """
+        The Python module of the UDT.
+        """
+        raise NotImplementedError("UDT must implement module().")
+
+    @classmethod
+    def scalaUDT(cls):
+        """
+        The class name of the paired Scala UDT.
+        """
+        raise NotImplementedError("UDT must have a paired Scala UDT.")
+
+    def serialize(self, obj):
+        """
+        Converts the a user-type object into a SQL datum.
+        """
+        raise NotImplementedError("UDT must implement serialize().")
+
+    def deserialize(self, datum):
+        """
+        Converts a SQL datum into a user-type object.
+        """
+        raise NotImplementedError("UDT must implement deserialize().")
+
+    def json(self):
+        return json.dumps(self.jsonValue(), separators=(',', ':'), sort_keys=True)
+
+    def jsonValue(self):
+        schema = {
+            "type": "udt",
+            "class": self.scalaUDT(),
+            "pyClass": "%s.%s" % (self.module(), type(self).__name__),
+            "sqlType": self.sqlType().jsonValue()
+        }
+        return schema
+
+    @classmethod
+    def fromJson(cls, json):
+        pyUDT = json["pyClass"]
+        split = pyUDT.rfind(".")
+        pyModule = pyUDT[:split]
+        pyClass = pyUDT[split+1:]
+        m = __import__(pyModule, globals(), locals(), [pyClass], -1)
+        UDT = getattr(m, pyClass)
+        return UDT()
+
+    def __eq__(self, other):
+        return type(self) == type(other)
+
+
+_all_primitive_types = dict((v.typeName(), v)
+                            for v in globals().itervalues()
+                            if type(v) is PrimitiveTypeSingleton and
+                            v.__base__ == PrimitiveType)
+
+
+_all_complex_types = dict((v.typeName(), v)
+                          for v in [ArrayType, MapType, StructType])
+
+
+def _parse_datatype_json_string(json_string):
+    """Parses the given data type JSON string.
+    >>> def check_datatype(datatype):
+    ...     scala_datatype = sqlCtx._ssql_ctx.parseDataType(datatype.json())
+    ...     python_datatype = _parse_datatype_json_string(scala_datatype.json())
+    ...     return datatype == python_datatype
+    >>> all(check_datatype(cls()) for cls in _all_primitive_types.values())
+    True
+    >>> # Simple ArrayType.
+    >>> simple_arraytype = ArrayType(StringType(), True)
+    >>> check_datatype(simple_arraytype)
+    True
+    >>> # Simple MapType.
+    >>> simple_maptype = MapType(StringType(), LongType())
+    >>> check_datatype(simple_maptype)
+    True
+    >>> # Simple StructType.
+    >>> simple_structtype = StructType([
+    ...     StructField("a", DecimalType(), False),
+    ...     StructField("b", BooleanType(), True),
+    ...     StructField("c", LongType(), True),
+    ...     StructField("d", BinaryType(), False)])
+    >>> check_datatype(simple_structtype)
+    True
+    >>> # Complex StructType.
+    >>> complex_structtype = StructType([
+    ...     StructField("simpleArray", simple_arraytype, True),
+    ...     StructField("simpleMap", simple_maptype, True),
+    ...     StructField("simpleStruct", simple_structtype, True),
+    ...     StructField("boolean", BooleanType(), False),
+    ...     StructField("withMeta", DoubleType(), False, {"name": "age"})])
+    >>> check_datatype(complex_structtype)
+    True
+    >>> # Complex ArrayType.
+    >>> complex_arraytype = ArrayType(complex_structtype, True)
+    >>> check_datatype(complex_arraytype)
+    True
+    >>> # Complex MapType.
+    >>> complex_maptype = MapType(complex_structtype,
+    ...                           complex_arraytype, False)
+    >>> check_datatype(complex_maptype)
+    True
+    >>> check_datatype(ExamplePointUDT())
+    True
+    >>> structtype_with_udt = StructType([StructField("label", DoubleType(), False),
+    ...                                   StructField("point", ExamplePointUDT(), False)])
+    >>> check_datatype(structtype_with_udt)
+    True
+    """
+    return _parse_datatype_json_value(json.loads(json_string))
+
+
+_FIXED_DECIMAL = re.compile("decimal\\((\\d+),(\\d+)\\)")
+
+
+def _parse_datatype_json_value(json_value):
+    if type(json_value) is unicode:
+        if json_value in _all_primitive_types.keys():
+            return _all_primitive_types[json_value]()
+        elif json_value == u'decimal':
+            return DecimalType()
+        elif _FIXED_DECIMAL.match(json_value):
+            m = _FIXED_DECIMAL.match(json_value)
+            return DecimalType(int(m.group(1)), int(m.group(2)))
+        else:
+            raise ValueError("Could not parse datatype: %s" % json_value)
+    else:
+        tpe = json_value["type"]
+        if tpe in _all_complex_types:
+            return _all_complex_types[tpe].fromJson(json_value)
+        elif tpe == 'udt':
+            return UserDefinedType.fromJson(json_value)
+        else:
+            raise ValueError("not supported type: %s" % tpe)
+
+
+# Mapping Python types to Spark SQL DataType
+_type_mappings = {
+    type(None): NullType,
+    bool: BooleanType,
+    int: IntegerType,
+    long: LongType,
+    float: DoubleType,
+    str: StringType,
+    unicode: StringType,
+    bytearray: BinaryType,
+    decimal.Decimal: DecimalType,
+    datetime.date: DateType,
+    datetime.datetime: TimestampType,
+    datetime.time: TimestampType,
+}
+
+
+def _infer_type(obj):
+    """Infer the DataType from obj
+
+    >>> p = ExamplePoint(1.0, 2.0)
+    >>> _infer_type(p)
+    ExamplePointUDT
+    """
+    if obj is None:
+        raise ValueError("Can not infer type for None")
+
+    if hasattr(obj, '__UDT__'):
+        return obj.__UDT__
+
+    dataType = _type_mappings.get(type(obj))
+    if dataType is not None:
+        return dataType()
+
+    if isinstance(obj, dict):
+        for key, value in obj.iteritems():
+            if key is not None and value is not None:
+                return MapType(_infer_type(key), _infer_type(value), True)
+        else:
+            return MapType(NullType(), NullType(), True)
+    elif isinstance(obj, (list, array)):
+        for v in obj:
+            if v is not None:
+                return ArrayType(_infer_type(obj[0]), True)
+        else:
+            return ArrayType(NullType(), True)
+    else:
+        try:
+            return _infer_schema(obj)
+        except ValueError:
+            raise ValueError("not supported type: %s" % type(obj))
+
+
+def _infer_schema(row):
+    """Infer the schema from dict/namedtuple/object"""
+    if isinstance(row, dict):
+        items = sorted(row.items())
+
+    elif isinstance(row, tuple):
+        if hasattr(row, "_fields"):  # namedtuple
+            items = zip(row._fields, tuple(row))
+        elif hasattr(row, "__FIELDS__"):  # Row
+            items = zip(row.__FIELDS__, tuple(row))
+        elif all(isinstance(x, tuple) and len(x) == 2 for x in row):
+            items = row
+        else:
+            raise ValueError("Can't infer schema from tuple")
+
+    elif hasattr(row, "__dict__"):  # object
+        items = sorted(row.__dict__.items())
+
+    else:
+        raise ValueError("Can not infer schema for type: %s" % type(row))
+
+    fields = [StructField(k, _infer_type(v), True) for k, v in items]
+    return StructType(fields)
+
+
+def _need_python_to_sql_conversion(dataType):
+    """
+    Checks whether we need python to sql conversion for the given type.
+    For now, only UDTs need this conversion.
+
+    >>> _need_python_to_sql_conversion(DoubleType())
+    False
+    >>> schema0 = StructType([StructField("indices", ArrayType(IntegerType(), False), False),
+    ...                       StructField("values", ArrayType(DoubleType(), False), False)])
+    >>> _need_python_to_sql_conversion(schema0)
+    False
+    >>> _need_python_to_sql_conversion(ExamplePointUDT())
+    True
+    >>> schema1 = ArrayType(ExamplePointUDT(), False)
+    >>> _need_python_to_sql_conversion(schema1)
+    True
+    >>> schema2 = StructType([StructField("label", DoubleType(), False),
+    ...                       StructField("point", ExamplePointUDT(), False)])
+    >>> _need_python_to_sql_conversion(schema2)
+    True
+    """
+    if isinstance(dataType, StructType):
+        return any([_need_python_to_sql_conversion(f.dataType) for f in dataType.fields])
+    elif isinstance(dataType, ArrayType):
+        return _need_python_to_sql_conversion(dataType.elementType)
+    elif isinstance(dataType, MapType):
+        return _need_python_to_sql_conversion(dataType.keyType) or \
+            _need_python_to_sql_conversion(dataType.valueType)
+    elif isinstance(dataType, UserDefinedType):
+        return True
+    else:
+        return False
+
+
+def _python_to_sql_converter(dataType):
+    """
+    Returns a converter that converts a Python object into a SQL datum for the given type.
+
+    >>> conv = _python_to_sql_converter(DoubleType())
+    >>> conv(1.0)
+    1.0
+    >>> conv = _python_to_sql_converter(ArrayType(DoubleType(), False))
+    >>> conv([1.0, 2.0])
+    [1.0, 2.0]
+    >>> conv = _python_to_sql_converter(ExamplePointUDT())
+    >>> conv(ExamplePoint(1.0, 2.0))
+    [1.0, 2.0]
+    >>> schema = StructType([StructField("label", DoubleType(), False),
+    ...                      StructField("point", ExamplePointUDT(), False)])
+    >>> conv = _python_to_sql_converter(schema)
+    >>> conv((1.0, ExamplePoint(1.0, 2.0)))
+    (1.0, [1.0, 2.0])
+    """
+    if not _need_python_to_sql_conversion(dataType):
+        return lambda x: x
+
+    if isinstance(dataType, StructType):
+        names, types = zip(*[(f.name, f.dataType) for f in dataType.fields])
+        converters = map(_python_to_sql_converter, types)
+
+        def converter(obj):
+            if isinstance(obj, dict):
+                return tuple(c(obj.get(n)) for n, c in zip(names, converters))
+            elif isinstance(obj, tuple):
+                if hasattr(obj, "_fields") or hasattr(obj, "__FIELDS__"):
+                    return tuple(c(v) for c, v in zip(converters, obj))
+                elif all(isinstance(x, tuple) and len(x) == 2 for x in obj):  # k-v pairs
+                    d = dict(obj)
+                    return tuple(c(d.get(n)) for n, c in zip(names, converters))
+                else:
+                    return tuple(c(v) for c, v in zip(converters, obj))
+            else:
+                raise ValueError("Unexpected tuple %r with type %r" % (obj, dataType))
+        return converter
+    elif isinstance(dataType, ArrayType):
+        element_converter = _python_to_sql_converter(dataType.elementType)
+        return lambda a: [element_converter(v) for v in a]
+    elif isinstance(dataType, MapType):
+        key_converter = _python_to_sql_converter(dataType.keyType)
+        value_converter = _python_to_sql_converter(dataType.valueType)
+        return lambda m: dict([(key_converter(k), value_converter(v)) for k, v in m.items()])
+    elif isinstance(dataType, UserDefinedType):
+        return lambda obj: dataType.serialize(obj)
+    else:
+        raise ValueError("Unexpected type %r" % dataType)
+
+
+def _has_nulltype(dt):
+    """ Return whether there is NullType in `dt` or not """
+    if isinstance(dt, StructType):
+        return any(_has_nulltype(f.dataType) for f in dt.fields)
+    elif isinstance(dt, ArrayType):
+        return _has_nulltype((dt.elementType))
+    elif isinstance(dt, MapType):
+        return _has_nulltype(dt.keyType) or _has_nulltype(dt.valueType)
+    else:
+        return isinstance(dt, NullType)
+
+
+def _merge_type(a, b):
+    if isinstance(a, NullType):
+        return b
+    elif isinstance(b, NullType):
+        return a
+    elif type(a) is not type(b):
+        # TODO: type cast (such as int -> long)
+        raise TypeError("Can not merge type %s and %s" % (a, b))
+
+    # same type
+    if isinstance(a, StructType):
+        nfs = dict((f.name, f.dataType) for f in b.fields)
+        fields = [StructField(f.name, _merge_type(f.dataType, nfs.get(f.name, NullType())))
+                  for f in a.fields]
+        names = set([f.name for f in fields])
+        for n in nfs:
+            if n not in names:
+                fields.append(StructField(n, nfs[n]))
+        return StructType(fields)
+
+    elif isinstance(a, ArrayType):
+        return ArrayType(_merge_type(a.elementType, b.elementType), True)
+
+    elif isinstance(a, MapType):
+        return MapType(_merge_type(a.keyType, b.keyType),
+                       _merge_type(a.valueType, b.valueType),
+                       True)
+    else:
+        return a
+
+
+def _create_converter(dataType):
+    """Create an converter to drop the names of fields in obj """
+    if isinstance(dataType, ArrayType):
+        conv = _create_converter(dataType.elementType)
+        return lambda row: map(conv, row)
+
+    elif isinstance(dataType, MapType):
+        conv = _create_converter(dataType.valueType)
+        return lambda row: dict((k, conv(v)) for k, v in row.iteritems())
+
+    elif isinstance(dataType, NullType):
+        return lambda x: None
+
+    elif not isinstance(dataType, StructType):
+        return lambda x: x
+
+    # dataType must be StructType
+    names = [f.name for f in dataType.fields]
+    converters = [_create_converter(f.dataType) for f in dataType.fields]
+
+    def convert_struct(obj):
+        if obj is None:
+            return
+
+        if isinstance(obj, tuple):
+            if hasattr(obj, "fields"):
+                d = dict(zip(obj.fields, obj))
+            if hasattr(obj, "__FIELDS__"):
+                d = dict(zip(obj.__FIELDS__, obj))
+            elif all(isinstance(x, tuple) and len(x) == 2 for x in obj):
+                d = dict(obj)
+            else:
+                raise ValueError("unexpected tuple: %s" % obj)
+
+        elif isinstance(obj, dict):
+            d = obj
+        elif hasattr(obj, "__dict__"):  # object
+            d = obj.__dict__
+        else:
+            raise ValueError("Unexpected obj: %s" % obj)
+
+        return tuple([conv(d.get(name)) for name, conv in zip(names, converters)])
+
+    return convert_struct
+
+
+_BRACKETS = {'(': ')', '[': ']', '{': '}'}
+
+
+def _split_schema_abstract(s):
+    """
+    split the schema abstract into fields
+
+    >>> _split_schema_abstract("a b  c")
+    ['a', 'b', 'c']
+    >>> _split_schema_abstract("a(a b)")
+    ['a(a b)']
+    >>> _split_schema_abstract("a b[] c{a b}")
+    ['a', 'b[]', 'c{a b}']
+    >>> _split_schema_abstract(" ")
+    []
+    """
+
+    r = []
+    w = ''
+    brackets = []
+    for c in s:
+        if c == ' ' and not brackets:
+            if w:
+                r.append(w)
+            w = ''
+        else:
+            w += c
+            if c in _BRACKETS:
+                brackets.append(c)
+            elif c in _BRACKETS.values():
+                if not brackets or c != _BRACKETS[brackets.pop()]:
+                    raise ValueError("unexpected " + c)
+
+    if brackets:
+        raise ValueError("brackets not closed: %s" % brackets)
+    if w:
+        r.append(w)
+    return r
+
+
+def _parse_field_abstract(s):
+    """
+    Parse a field in schema abstract
+
+    >>> _parse_field_abstract("a")
+    StructField(a,None,true)
+    >>> _parse_field_abstract("b(c d)")
+    StructField(b,StructType(...c,None,true),StructField(d...
+    >>> _parse_field_abstract("a[]")
+    StructField(a,ArrayType(None,true),true)
+    >>> _parse_field_abstract("a{[]}")
+    StructField(a,MapType(None,ArrayType(None,true),true),true)
+    """
+    if set(_BRACKETS.keys()) & set(s):
+        idx = min((s.index(c) for c in _BRACKETS if c in s))
+        name = s[:idx]
+        return StructField(name, _parse_schema_abstract(s[idx:]), True)
+    else:
+        return StructField(s, None, True)
+
+
+def _parse_schema_abstract(s):
+    """
+    parse abstract into schema
+
+    >>> _parse_schema_abstract("a b  c")
+    StructType...a...b...c...
+    >>> _parse_schema_abstract("a[b c] b{}")
+    StructType...a,ArrayType...b...c...b,MapType...
+    >>> _parse_schema_abstract("c{} d{a b}")
+    StructType...c,MapType...d,MapType...a...b...
+    >>> _parse_schema_abstract("a b(t)").fields[1]
+    StructField(b,StructType(List(StructField(t,None,true))),true)
+    """
+    s = s.strip()
+    if not s:
+        return
+
+    elif s.startswith('('):
+        return _parse_schema_abstract(s[1:-1])
+
+    elif s.startswith('['):
+        return ArrayType(_parse_schema_abstract(s[1:-1]), True)
+
+    elif s.startswith('{'):
+        return MapType(None, _parse_schema_abstract(s[1:-1]))
+
+    parts = _split_schema_abstract(s)
+    fields = [_parse_field_abstract(p) for p in parts]
+    return StructType(fields)
+
+
+def _infer_schema_type(obj, dataType):
+    """
+    Fill the dataType with types infered from obj
+
+    >>> schema = _parse_schema_abstract("a b c d")
+    >>> row = (1, 1.0, "str", datetime.date(2014, 10, 10))
+    >>> _infer_schema_type(row, schema)
+    StructType...IntegerType...DoubleType...StringType...DateType...
+    >>> row = [[1], {"key": (1, 2.0)}]
+    >>> schema = _parse_schema_abstract("a[] b{c d}")
+    >>> _infer_schema_type(row, schema)
+    StructType...a,ArrayType...b,MapType(StringType,...c,IntegerType...
+    """
+    if dataType is None:
+        return _infer_type(obj)
+
+    if not obj:
+        return NullType()
+
+    if isinstance(dataType, ArrayType):
+        eType = _infer_schema_type(obj[0], dataType.elementType)
+        return ArrayType(eType, True)
+
+    elif isinstance(dataType, MapType):
+        k, v = obj.iteritems().next()
+        return MapType(_infer_type(k),
+                       _infer_schema_type(v, dataType.valueType))
+
+    elif isinstance(dataType, StructType):
+        fs = dataType.fields
+        assert len(fs) == len(obj), \
+            "Obj(%s) have different length with fields(%s)" % (obj, fs)
+        fields = [StructField(f.name, _infer_schema_type(o, f.dataType), True)
+                  for o, f in zip(obj, fs)]
+        return StructType(fields)
+
+    else:
+        raise ValueError("Unexpected dataType: %s" % dataType)
+
+
+_acceptable_types = {
+    BooleanType: (bool,),
+    ByteType: (int, long),
+    ShortType: (int, long),
+    IntegerType: (int, long),
+    LongType: (int, long),
+    FloatType: (float,),
+    DoubleType: (float,),
+    DecimalType: (decimal.Decimal,),
+    StringType: (str, unicode),
+    BinaryType: (bytearray,),
+    DateType: (datetime.date,),
+    TimestampType: (datetime.datetime,),
+    ArrayType: (list, tuple, array),
+    MapType: (dict,),
+    StructType: (tuple, list),
+}
+
+
+def _verify_type(obj, dataType):
+    """
+    Verify the type of obj against dataType, raise an exception if
+    they do not match.
+
+    >>> _verify_type(None, StructType([]))
+    >>> _verify_type("", StringType())
+    >>> _verify_type(0, IntegerType())
+    >>> _verify_type(range(3), ArrayType(ShortType()))
+    >>> _verify_type(set(), ArrayType(StringType())) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    TypeError:...
+    >>> _verify_type({}, MapType(StringType(), IntegerType()))
+    >>> _verify_type((), StructType([]))
+    >>> _verify_type([], StructType([]))
+    >>> _verify_type([1], StructType([])) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ValueError:...
+    >>> _verify_type(ExamplePoint(1.0, 2.0), ExamplePointUDT())
+    >>> _verify_type([1.0, 2.0], ExamplePointUDT()) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ValueError:...
+    """
+    # all objects are nullable
+    if obj is None:
+        return
+
+    if isinstance(dataType, UserDefinedType):
+        if not (hasattr(obj, '__UDT__') and obj.__UDT__ == dataType):
+            raise ValueError("%r is not an instance of type %r" % (obj, dataType))
+        _verify_type(dataType.serialize(obj), dataType.sqlType())
+        return
+
+    _type = type(dataType)
+    assert _type in _acceptable_types, "unkown datatype: %s" % dataType
+
+    # subclass of them can not be deserialized in JVM
+    if type(obj) not in _acceptable_types[_type]:
+        raise TypeError("%s can not accept object in type %s"
+                        % (dataType, type(obj)))
+
+    if isinstance(dataType, ArrayType):
+        for i in obj:
+            _verify_type(i, dataType.elementType)
+
+    elif isinstance(dataType, MapType):
+        for k, v in obj.iteritems():
+            _verify_type(k, dataType.keyType)
+            _verify_type(v, dataType.valueType)
+
+    elif isinstance(dataType, StructType):
+        if len(obj) != len(dataType.fields):
+            raise ValueError("Length of object (%d) does not match with"
+                             "length of fields (%d)" % (len(obj), len(dataType.fields)))
+        for v, f in zip(obj, dataType.fields):
+            _verify_type(v, f.dataType)
+
+
+_cached_cls = {}
+
+
+def _restore_object(dataType, obj):
+    """ Restore object during unpickling. """
+    # use id(dataType) as key to speed up lookup in dict
+    # Because of batched pickling, dataType will be the
+    # same object in most cases.
+    k = id(dataType)
+    cls = _cached_cls.get(k)
+    if cls is None:
+        # use dataType as key to avoid create multiple class
+        cls = _cached_cls.get(dataType)
+        if cls is None:
+            cls = _create_cls(dataType)
+            _cached_cls[dataType] = cls
+        _cached_cls[k] = cls
+    return cls(obj)
+
+
+def _create_object(cls, v):
+    """ Create an customized object with class `cls`. """
+    # datetime.date would be deserialized as datetime.datetime
+    # from java type, so we need to set it back.
+    if cls is datetime.date and isinstance(v, datetime.datetime):
+        return v.date()
+    return cls(v) if v is not None else v
+
+
+def _create_getter(dt, i):
+    """ Create a getter for item `i` with schema """
+    cls = _create_cls(dt)
+
+    def getter(self):
+        return _create_object(cls, self[i])
+
+    return getter
+
+
+def _has_struct_or_date(dt):
+    """Return whether `dt` is or has StructType/DateType in it"""
+    if isinstance(dt, StructType):
+        return True
+    elif isinstance(dt, ArrayType):
+        return _has_struct_or_date(dt.elementType)
+    elif isinstance(dt, MapType):
+        return _has_struct_or_date(dt.valueType)
+    elif isinstance(dt, DateType):
+        return True
+    elif isinstance(dt, UserDefinedType):
+        return True
+    return False
+
+
+def _create_properties(fields):
+    """Create properties according to fields"""
+    ps = {}
+    for i, f in enumerate(fields):
+        name = f.name
+        if (name.startswith("__") and name.endswith("__")
+                or keyword.iskeyword(name)):
+            warnings.warn("field name %s can not be accessed in Python,"
+                          "use position to access it instead" % name)
+        if _has_struct_or_date(f.dataType):
+            # delay creating object until accessing it
+            getter = _create_getter(f.dataType, i)
+        else:
+            getter = itemgetter(i)
+        ps[name] = property(getter)
+    return ps
+
+
+def _create_cls(dataType):
+    """
+    Create an class by dataType
+
+    The created class is similar to namedtuple, but can have nested schema.
+
+    >>> schema = _parse_schema_abstract("a b c")
+    >>> row = (1, 1.0, "str")
+    >>> schema = _infer_schema_type(row, schema)
+    >>> obj = _create_cls(schema)(row)
+    >>> import pickle
+    >>> pickle.loads(pickle.dumps(obj))
+    Row(a=1, b=1.0, c='str')
+
+    >>> row = [[1], {"key": (1, 2.0)}]
+    >>> schema = _parse_schema_abstract("a[] b{c d}")
+    >>> schema = _infer_schema_type(row, schema)
+    >>> obj = _create_cls(schema)(row)
+    >>> pickle.loads(pickle.dumps(obj))
+    Row(a=[1], b={'key': Row(c=1, d=2.0)})
+    >>> pickle.loads(pickle.dumps(obj.a))
+    [1]
+    >>> pickle.loads(pickle.dumps(obj.b))
+    {'key': Row(c=1, d=2.0)}
+    """
+
+    if isinstance(dataType, ArrayType):
+        cls = _create_cls(dataType.elementType)
+
+        def List(l):
+            if l is None:
+                return
+            return [_create_object(cls, v) for v in l]
+
+        return List
+
+    elif isinstance(dataType, MapType):
+        cls = _create_cls(dataType.valueType)
+
+        def Dict(d):
+            if d is None:
+                return
+            return dict((k, _create_object(cls, v)) for k, v in d.items())
+
+        return Dict
+
+    elif isinstance(dataType, DateType):
+        return datetime.date
+
+    elif isinstance(dataType, UserDefinedType):
+        return lambda datum: dataType.deserialize(datum)
+
+    elif not isinstance(dataType, StructType):
+        raise Exception("unexpected data type: %s" % dataType)
+
+    class Row(tuple):
+
+        """ Row in SchemaRDD """
+        __DATATYPE__ = dataType
+        __FIELDS__ = tuple(f.name for f in dataType.fields)
+        __slots__ = ()
+
+        # create property for fast access
+        locals().update(_create_properties(dataType.fields))
+
+        def asDict(self):
+            """ Return as a dict """
+            return dict(zip(self.__FIELDS__, self))
+
+        def __repr__(self):
+            # call collect __repr__ for nested objects
+            return ("Row(%s)" % ", ".join("%s=%r" % (n, getattr(self, n))
+                                          for n in self.__FIELDS__))
+
+        def __reduce__(self):
+            return (_restore_object, (self.__DATATYPE__, tuple(self)))
+
+    return Row
+
+
+class SQLContext(object):
+
+    """Main entry point for Spark SQL functionality.
+
+    A SQLContext can be used create L{SchemaRDD}, register L{SchemaRDD} as
     tables, execute SQL over tables, cache tables, and read parquet files.
     """
 
-    def __init__(self, sparkContext, sqlContext = None):
+    def __init__(self, sparkContext, sqlContext=None):
         """Create a new SQLContext.
 
-        @param sparkContext: The SparkContext to wrap.
+        :param sparkContext: The SparkContext to wrap.
+        :param sqlContext: An optional JVM Scala SQLContext. If set, we do not instatiate a new
+        SQLContext in the JVM, instead we make all calls to this object.
 
         >>> srdd = sqlCtx.inferSchema(rdd)
         >>> sqlCtx.inferSchema(srdd) # doctest: +IGNORE_EXCEPTION_DETAIL
         Traceback (most recent call last):
             ...
-        ValueError:...
+        TypeError:...
 
         >>> bad_rdd = sc.parallelize([1,2,3])
         >>> sqlCtx.inferSchema(bad_rdd) # doctest: +IGNORE_EXCEPTION_DETAIL
@@ -47,65 +1218,234 @@ def __init__(self, sparkContext, sqlContext = None):
             ...
         ValueError:...
 
-        >>> allTypes = sc.parallelize([{"int" : 1, "string" : "string", "double" : 1.0, "long": 1L,
-        ... "boolean" : True}])
-        >>> srdd = sqlCtx.inferSchema(allTypes).map(lambda x: (x.int, x.string, x.double, x.long,
-        ... x.boolean))
-        >>> srdd.collect()[0]
-        (1, u'string', 1.0, 1, True)
+        >>> from datetime import datetime
+        >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1L,
+        ...     b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
+        ...     time=datetime(2014, 8, 1, 14, 1, 5))])
+        >>> srdd = sqlCtx.inferSchema(allTypes)
+        >>> srdd.registerTempTable("allTypes")
+        >>> sqlCtx.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
+        ...            'from allTypes where b and i > 0').collect()
+        [Row(c0=2, c1=2.0, c2=False, c3=2, c4=0...8, 1, 14, 1, 5), a=1)]
+        >>> srdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time,
+        ...                     x.row.a, x.list)).collect()
+        [(1, u'string', 1.0, 1, True, ...(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
         """
         self._sc = sparkContext
         self._jsc = self._sc._jsc
         self._jvm = self._sc._jvm
-        self._pythonToJavaMap = self._jvm.PythonRDD.pythonToJavaMap
-
-        if sqlContext:
-            self._scala_SQLContext = sqlContext
+        self._scala_SQLContext = sqlContext
 
     @property
     def _ssql_ctx(self):
-        """Accessor for the JVM SparkSQL context.
+        """Accessor for the JVM Spark SQL context.
 
         Subclasses can override this property to provide their own
         JVM Contexts.
         """
-        if not hasattr(self, '_scala_SQLContext'):
+        if self._scala_SQLContext is None:
             self._scala_SQLContext = self._jvm.SQLContext(self._jsc.sc())
         return self._scala_SQLContext
 
-    def inferSchema(self, rdd):
-        """Infer and apply a schema to an RDD of L{dict}s.
+    def registerFunction(self, name, f, returnType=StringType()):
+        """Registers a lambda function as a UDF so it can be used in SQL statements.
 
-        We peek at the first row of the RDD to determine the fields names
-        and types, and then use that to extract all the dictionaries. Nested
-        collections are supported, which include array, dict, list, set, and
-        tuple.
+        In addition to a name and the function itself, the return type can be optionally specified.
+        When the return type is not given it default to a string and conversion will automatically
+        be done.  For any other return type, the produced object must match the specified type.
 
+        >>> sqlCtx.registerFunction("stringLengthString", lambda x: len(x))
+        >>> sqlCtx.sql("SELECT stringLengthString('test')").collect()
+        [Row(c0=u'4')]
+        >>> sqlCtx.registerFunction("stringLengthInt", lambda x: len(x), IntegerType())
+        >>> sqlCtx.sql("SELECT stringLengthInt('test')").collect()
+        [Row(c0=4)]
+        """
+        func = lambda _, it: imap(lambda x: f(*x), it)
+        command = (func, None,
+                   AutoBatchedSerializer(PickleSerializer()),
+                   AutoBatchedSerializer(PickleSerializer()))
+        ser = CloudPickleSerializer()
+        pickled_command = ser.dumps(command)
+        if len(pickled_command) > (1 << 20):  # 1M
+            broadcast = self._sc.broadcast(pickled_command)
+            pickled_command = ser.dumps(broadcast)
+        broadcast_vars = ListConverter().convert(
+            [x._jbroadcast for x in self._sc._pickled_broadcast_vars],
+            self._sc._gateway._gateway_client)
+        self._sc._pickled_broadcast_vars.clear()
+        env = MapConverter().convert(self._sc.environment,
+                                     self._sc._gateway._gateway_client)
+        includes = ListConverter().convert(self._sc._python_includes,
+                                           self._sc._gateway._gateway_client)
+        self._ssql_ctx.registerPython(name,
+                                      bytearray(pickled_command),
+                                      env,
+                                      includes,
+                                      self._sc.pythonExec,
+                                      broadcast_vars,
+                                      self._sc._javaAccumulator,
+                                      returnType.json())
+
+    def inferSchema(self, rdd, samplingRatio=None):
+        """Infer and apply a schema to an RDD of L{Row}.
+
+        When samplingRatio is specified, the schema is inferred by looking
+        at the types of each row in the sampled dataset. Otherwise, the
+        first 100 rows of the RDD are inspected. Nested collections are
+        supported, which can include array, dict, list, Row, tuple,
+        namedtuple, or object.
+
+        Each row could be L{pyspark.sql.Row} object or namedtuple or objects.
+        Using top level dicts is deprecated, as dict is used to represent Maps.
+
+        If a single column has multiple distinct inferred types, it may cause
+        runtime exceptions.
+
+        >>> rdd = sc.parallelize(
+        ...     [Row(field1=1, field2="row1"),
+        ...      Row(field1=2, field2="row2"),
+        ...      Row(field1=3, field2="row3")])
         >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> srdd.collect() == [{"field1" : 1, "field2" : "row1"}, {"field1" : 2, "field2": "row2"},
-        ...                    {"field1" : 3, "field2": "row3"}]
-        True
+        >>> srdd.collect()[0]
+        Row(field1=1, field2=u'row1')
 
-        >>> from array import array
+        >>> NestedRow = Row("f1", "f2")
+        >>> nestedRdd1 = sc.parallelize([
+        ...     NestedRow(array('i', [1, 2]), {"row1": 1.0}),
+        ...     NestedRow(array('i', [2, 3]), {"row2": 2.0})])
         >>> srdd = sqlCtx.inferSchema(nestedRdd1)
-        >>> srdd.collect() == [{"f1" : array('i', [1, 2]), "f2" : {"row1" : 1.0}},
-        ...                    {"f1" : array('i', [2, 3]), "f2" : {"row2" : 2.0}}]
-        True
+        >>> srdd.collect()
+        [Row(f1=[1, 2], f2={u'row1': 1.0}), ..., f2={u'row2': 2.0})]
 
+        >>> nestedRdd2 = sc.parallelize([
+        ...     NestedRow([[1, 2], [2, 3]], [1, 2]),
+        ...     NestedRow([[2, 3], [3, 4]], [2, 3])])
         >>> srdd = sqlCtx.inferSchema(nestedRdd2)
-        >>> srdd.collect() == [{"f1" : [[1, 2], [2, 3]], "f2" : set([1, 2]), "f3" : (1, 2)},
-        ...                    {"f1" : [[2, 3], [3, 4]], "f2" : set([2, 3]), "f3" : (2, 3)}]
-        True
+        >>> srdd.collect()
+        [Row(f1=[[1, 2], [2, 3]], f2=[1, 2]), ..., f2=[2, 3])]
         """
-        if (rdd.__class__ is SchemaRDD):
-            raise ValueError("Cannot apply schema to %s" % SchemaRDD.__name__)
-        elif not isinstance(rdd.first(), dict):
-            raise ValueError("Only RDDs with dictionaries can be converted to %s: %s" %
-                             (SchemaRDD.__name__, rdd.first()))
 
-        jrdd = self._pythonToJavaMap(rdd._jrdd)
-        srdd = self._ssql_ctx.inferSchema(jrdd.rdd())
-        return SchemaRDD(srdd, self)
+        if isinstance(rdd, SchemaRDD):
+            raise TypeError("Cannot apply schema to SchemaRDD")
+
+        first = rdd.first()
+        if not first:
+            raise ValueError("The first row in RDD is empty, "
+                             "can not infer schema")
+        if type(first) is dict:
+            warnings.warn("Using RDD of dict to inferSchema is deprecated,"
+                          "please use pyspark.sql.Row instead")
+
+        if samplingRatio is None:
+            schema = _infer_schema(first)
+            if _has_nulltype(schema):
+                for row in rdd.take(100)[1:]:
+                    schema = _merge_type(schema, _infer_schema(row))
+                    if not _has_nulltype(schema):
+                        break
+                else:
+                    warnings.warn("Some of types cannot be determined by the "
+                                  "first 100 rows, please try again with sampling")
+        else:
+            if samplingRatio > 0.99:
+                rdd = rdd.sample(False, float(samplingRatio))
+            schema = rdd.map(_infer_schema).reduce(_merge_type)
+
+        converter = _create_converter(schema)
+        rdd = rdd.map(converter)
+        return self.applySchema(rdd, schema)
+
+    def applySchema(self, rdd, schema):
+        """
+        Applies the given schema to the given RDD of L{tuple} or L{list}.
+
+        These tuples or lists can contain complex nested structures like
+        lists, maps or nested rows.
+
+        The schema should be a StructType.
+
+        It is important that the schema matches the types of the objects
+        in each row or exceptions could be thrown at runtime.
+
+        >>> rdd2 = sc.parallelize([(1, "row1"), (2, "row2"), (3, "row3")])
+        >>> schema = StructType([StructField("field1", IntegerType(), False),
+        ...     StructField("field2", StringType(), False)])
+        >>> srdd = sqlCtx.applySchema(rdd2, schema)
+        >>> sqlCtx.registerRDDAsTable(srdd, "table1")
+        >>> srdd2 = sqlCtx.sql("SELECT * from table1")
+        >>> srdd2.collect()
+        [Row(field1=1, field2=u'row1'),..., Row(field1=3, field2=u'row3')]
+
+        >>> from datetime import date, datetime
+        >>> rdd = sc.parallelize([(127, -128L, -32768, 32767, 2147483647L, 1.0,
+        ...     date(2010, 1, 1),
+        ...     datetime(2010, 1, 1, 1, 1, 1),
+        ...     {"a": 1}, (2,), [1, 2, 3], None)])
+        >>> schema = StructType([
+        ...     StructField("byte1", ByteType(), False),
+        ...     StructField("byte2", ByteType(), False),
+        ...     StructField("short1", ShortType(), False),
+        ...     StructField("short2", ShortType(), False),
+        ...     StructField("int", IntegerType(), False),
+        ...     StructField("float", FloatType(), False),
+        ...     StructField("date", DateType(), False),
+        ...     StructField("time", TimestampType(), False),
+        ...     StructField("map",
+        ...         MapType(StringType(), IntegerType(), False), False),
+        ...     StructField("struct",
+        ...         StructType([StructField("b", ShortType(), False)]), False),
+        ...     StructField("list", ArrayType(ByteType(), False), False),
+        ...     StructField("null", DoubleType(), True)])
+        >>> srdd = sqlCtx.applySchema(rdd, schema)
+        >>> results = srdd.map(
+        ...     lambda x: (x.byte1, x.byte2, x.short1, x.short2, x.int, x.float, x.date,
+        ...         x.time, x.map["a"], x.struct.b, x.list, x.null))
+        >>> results.collect()[0] # doctest: +NORMALIZE_WHITESPACE
+        (127, -128, -32768, 32767, 2147483647, 1.0, datetime.date(2010, 1, 1),
+             datetime.datetime(2010, 1, 1, 1, 1, 1), 1, 2, [1, 2, 3], None)
+
+        >>> srdd.registerTempTable("table2")
+        >>> sqlCtx.sql(
+        ...   "SELECT byte1 - 1 AS byte1, byte2 + 1 AS byte2, " +
+        ...     "short1 + 1 AS short1, short2 - 1 AS short2, int - 1 AS int, " +
+        ...     "float + 1.5 as float FROM table2").collect()
+        [Row(byte1=126, byte2=-127, short1=-32767, short2=32766, int=2147483646, float=2.5)]
+
+        >>> rdd = sc.parallelize([(127, -32768, 1.0,
+        ...     datetime(2010, 1, 1, 1, 1, 1),
+        ...     {"a": 1}, (2,), [1, 2, 3])])
+        >>> abstract = "byte short float time map{} struct(b) list[]"
+        >>> schema = _parse_schema_abstract(abstract)
+        >>> typedSchema = _infer_schema_type(rdd.first(), schema)
+        >>> srdd = sqlCtx.applySchema(rdd, typedSchema)
+        >>> srdd.collect()
+        [Row(byte=127, short=-32768, float=1.0, time=..., list=[1, 2, 3])]
+        """
+
+        if isinstance(rdd, SchemaRDD):
+            raise TypeError("Cannot apply schema to SchemaRDD")
+
+        if not isinstance(schema, StructType):
+            raise TypeError("schema should be StructType")
+
+        # take the first few rows to verify schema
+        rows = rdd.take(10)
+        # Row() cannot been deserialized by Pyrolite
+        if rows and isinstance(rows[0], tuple) and rows[0].__class__.__name__ == 'Row':
+            rdd = rdd.map(tuple)
+            rows = rdd.take(10)
+
+        for row in rows:
+            _verify_type(row, schema)
+
+        # convert python objects to sql data
+        converter = _python_to_sql_converter(schema)
+        rdd = rdd.map(converter)
+
+        jrdd = self._jvm.SerDeUtil.toJavaArray(rdd._to_java_object_rdd())
+        srdd = self._ssql_ctx.applySchemaToPythonRDD(jrdd.rdd(), schema.json())
+        return SchemaRDD(srdd.toJavaSchemaRDD(), self)
 
     def registerRDDAsTable(self, rdd, tableName):
         """Registers the given RDD as a temporary table in the catalog.
@@ -117,8 +1457,8 @@ def registerRDDAsTable(self, rdd, tableName):
         >>> sqlCtx.registerRDDAsTable(srdd, "table1")
         """
         if (rdd.__class__ is SchemaRDD):
-            jschema_rdd = rdd._jschema_rdd
-            self._ssql_ctx.registerRDDAsTable(jschema_rdd, tableName)
+            srdd = rdd._jschema_rdd.baseSchemaRDD()
+            self._ssql_ctx.registerRDDAsTable(srdd, tableName)
         else:
             raise ValueError("Can only register SchemaRDD as table")
 
@@ -134,14 +1474,19 @@ def parquetFile(self, path):
         >>> sorted(srdd.collect()) == sorted(srdd2.collect())
         True
         """
-        jschema_rdd = self._ssql_ctx.parquetFile(path)
+        jschema_rdd = self._ssql_ctx.parquetFile(path).toJavaSchemaRDD()
         return SchemaRDD(jschema_rdd, self)
 
+    def jsonFile(self, path, schema=None, samplingRatio=1.0):
+        """
+        Loads a text file storing one JSON object per line as a
+        L{SchemaRDD}.
+
+        If the schema is provided, applies the given schema to this
+        JSON dataset.
 
-    def jsonFile(self, path):
-        """Loads a text file storing one JSON object per line,
-           returning the result as a L{SchemaRDD}.
-           It goes through the entire dataset once to determine the schema.
+        Otherwise, it samples the dataset with ratio `samplingRatio` to
+        determine the schema.
 
         >>> import tempfile, shutil
         >>> jsonFile = tempfile.mkdtemp()
@@ -150,43 +1495,118 @@ def jsonFile(self, path):
         >>> for json in jsonStrings:
         ...   print>>ofn, json
         >>> ofn.close()
-        >>> srdd = sqlCtx.jsonFile(jsonFile)
-        >>> sqlCtx.registerRDDAsTable(srdd, "table1")
+        >>> srdd1 = sqlCtx.jsonFile(jsonFile)
+        >>> sqlCtx.registerRDDAsTable(srdd1, "table1")
         >>> srdd2 = sqlCtx.sql(
-        ...   "SELECT field1 AS f1, field2 as f2, field3 as f3, field6 as f4 from table1")
-        >>> srdd2.collect() == [
-        ... {"f1":1, "f2":"row1", "f3":{"field4":11, "field5": None}, "f4":None},
-        ... {"f1":2, "f2":None, "f3":{"field4":22,  "field5": [10, 11]}, "f4":[{"field7": "row2"}]},
-        ... {"f1":None, "f2":"row3", "f3":{"field4":33, "field5": []}, "f4":None}]
-        True
+        ...   "SELECT field1 AS f1, field2 as f2, field3 as f3, "
+        ...   "field6 as f4 from table1")
+        >>> for r in srdd2.collect():
+        ...     print r
+        Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
+        Row(f1=2, f2=None, f3=Row(field4=22,..., f4=[Row(field7=u'row2')])
+        Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
+
+        >>> srdd3 = sqlCtx.jsonFile(jsonFile, srdd1.schema())
+        >>> sqlCtx.registerRDDAsTable(srdd3, "table2")
+        >>> srdd4 = sqlCtx.sql(
+        ...   "SELECT field1 AS f1, field2 as f2, field3 as f3, "
+        ...   "field6 as f4 from table2")
+        >>> for r in srdd4.collect():
+        ...    print r
+        Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
+        Row(f1=2, f2=None, f3=Row(field4=22,..., f4=[Row(field7=u'row2')])
+        Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
+
+        >>> schema = StructType([
+        ...     StructField("field2", StringType(), True),
+        ...     StructField("field3",
+        ...         StructType([
+        ...             StructField("field5",
+        ...                 ArrayType(IntegerType(), False), True)]), False)])
+        >>> srdd5 = sqlCtx.jsonFile(jsonFile, schema)
+        >>> sqlCtx.registerRDDAsTable(srdd5, "table3")
+        >>> srdd6 = sqlCtx.sql(
+        ...   "SELECT field2 AS f1, field3.field5 as f2, "
+        ...   "field3.field5[0] as f3 from table3")
+        >>> srdd6.collect()
+        [Row(f1=u'row1', f2=None, f3=None)...Row(f1=u'row3', f2=[], f3=None)]
         """
-        jschema_rdd = self._ssql_ctx.jsonFile(path)
-        return SchemaRDD(jschema_rdd, self)
+        if schema is None:
+            srdd = self._ssql_ctx.jsonFile(path, samplingRatio)
+        else:
+            scala_datatype = self._ssql_ctx.parseDataType(schema.json())
+            srdd = self._ssql_ctx.jsonFile(path, scala_datatype)
+        return SchemaRDD(srdd.toJavaSchemaRDD(), self)
 
-    def jsonRDD(self, rdd):
-        """Loads an RDD storing one JSON object per string, returning the result as a L{SchemaRDD}.
-           It goes through the entire dataset once to determine the schema.
+    def jsonRDD(self, rdd, schema=None, samplingRatio=1.0):
+        """Loads an RDD storing one JSON object per string as a L{SchemaRDD}.
 
-        >>> srdd = sqlCtx.jsonRDD(json)
-        >>> sqlCtx.registerRDDAsTable(srdd, "table1")
+        If the schema is provided, applies the given schema to this
+        JSON dataset.
+
+        Otherwise, it samples the dataset with ratio `samplingRatio` to
+        determine the schema.
+
+        >>> srdd1 = sqlCtx.jsonRDD(json)
+        >>> sqlCtx.registerRDDAsTable(srdd1, "table1")
         >>> srdd2 = sqlCtx.sql(
-        ...   "SELECT field1 AS f1, field2 as f2, field3 as f3, field6 as f4 from table1")
-        >>> srdd2.collect() == [
-        ... {"f1":1, "f2":"row1", "f3":{"field4":11, "field5": None}, "f4":None},
-        ... {"f1":2, "f2":None, "f3":{"field4":22,  "field5": [10, 11]}, "f4":[{"field7": "row2"}]},
-        ... {"f1":None, "f2":"row3", "f3":{"field4":33, "field5": []}, "f4":None}]
-        True
+        ...   "SELECT field1 AS f1, field2 as f2, field3 as f3, "
+        ...   "field6 as f4 from table1")
+        >>> for r in srdd2.collect():
+        ...     print r
+        Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
+        Row(f1=2, f2=None, f3=Row(field4=22..., f4=[Row(field7=u'row2')])
+        Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
+
+        >>> srdd3 = sqlCtx.jsonRDD(json, srdd1.schema())
+        >>> sqlCtx.registerRDDAsTable(srdd3, "table2")
+        >>> srdd4 = sqlCtx.sql(
+        ...   "SELECT field1 AS f1, field2 as f2, field3 as f3, "
+        ...   "field6 as f4 from table2")
+        >>> for r in srdd4.collect():
+        ...     print r
+        Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
+        Row(f1=2, f2=None, f3=Row(field4=22..., f4=[Row(field7=u'row2')])
+        Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
+
+        >>> schema = StructType([
+        ...     StructField("field2", StringType(), True),
+        ...     StructField("field3",
+        ...         StructType([
+        ...             StructField("field5",
+        ...                 ArrayType(IntegerType(), False), True)]), False)])
+        >>> srdd5 = sqlCtx.jsonRDD(json, schema)
+        >>> sqlCtx.registerRDDAsTable(srdd5, "table3")
+        >>> srdd6 = sqlCtx.sql(
+        ...   "SELECT field2 AS f1, field3.field5 as f2, "
+        ...   "field3.field5[0] as f3 from table3")
+        >>> srdd6.collect()
+        [Row(f1=u'row1', f2=None,...Row(f1=u'row3', f2=[], f3=None)]
+
+        >>> sqlCtx.jsonRDD(sc.parallelize(['{}',
+        ...         '{"key0": {"key1": "value1"}}'])).collect()
+        [Row(key0=None), Row(key0=Row(key1=u'value1'))]
+        >>> sqlCtx.jsonRDD(sc.parallelize(['{"key0": null}',
+        ...         '{"key0": {"key1": "value1"}}'])).collect()
+        [Row(key0=None), Row(key0=Row(key1=u'value1'))]
         """
-        def func(split, iterator):
+
+        def func(iterator):
             for x in iterator:
                 if not isinstance(x, basestring):
                     x = unicode(x)
-                yield x.encode("utf-8")
-        keyed = PipelinedRDD(rdd, func)
+                if isinstance(x, unicode):
+                    x = x.encode("utf-8")
+                yield x
+        keyed = rdd.mapPartitions(func)
         keyed._bypass_serializer = True
         jrdd = keyed._jrdd.map(self._jvm.BytesToString())
-        jschema_rdd = self._ssql_ctx.jsonRDD(jrdd.rdd())
-        return SchemaRDD(jschema_rdd, self)
+        if schema is None:
+            srdd = self._ssql_ctx.jsonRDD(jrdd.rdd(), samplingRatio)
+        else:
+            scala_datatype = self._ssql_ctx.parseDataType(schema.json())
+            srdd = self._ssql_ctx.jsonRDD(jrdd.rdd(), scala_datatype)
+        return SchemaRDD(srdd.toJavaSchemaRDD(), self)
 
     def sql(self, sqlQuery):
         """Return a L{SchemaRDD} representing the result of the given query.
@@ -194,11 +1614,10 @@ def sql(self, sqlQuery):
         >>> srdd = sqlCtx.inferSchema(rdd)
         >>> sqlCtx.registerRDDAsTable(srdd, "table1")
         >>> srdd2 = sqlCtx.sql("SELECT field1 AS f1, field2 as f2 from table1")
-        >>> srdd2.collect() == [{"f1" : 1, "f2" : "row1"}, {"f1" : 2, "f2": "row2"},
-        ...                     {"f1" : 3, "f2": "row3"}]
-        True
+        >>> srdd2.collect()
+        [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
         """
-        return SchemaRDD(self._ssql_ctx.sql(sqlQuery), self)
+        return SchemaRDD(self._ssql_ctx.sql(sqlQuery).toJavaSchemaRDD(), self)
 
     def table(self, tableName):
         """Returns the specified table as a L{SchemaRDD}.
@@ -209,7 +1628,7 @@ def table(self, tableName):
         >>> sorted(srdd.collect()) == sorted(srdd2.collect())
         True
         """
-        return SchemaRDD(self._ssql_ctx.table(tableName), self)
+        return SchemaRDD(self._ssql_ctx.table(tableName).toJavaSchemaRDD(), self)
 
     def cacheTable(self, tableName):
         """Caches the specified table in-memory."""
@@ -221,12 +1640,25 @@ def uncacheTable(self, tableName):
 
 
 class HiveContext(SQLContext):
+
     """A variant of Spark SQL that integrates with data stored in Hive.
 
     Configuration for Hive is read from hive-site.xml on the classpath.
     It supports running both SQL and HiveQL commands.
     """
 
+    def __init__(self, sparkContext, hiveContext=None):
+        """Create a new HiveContext.
+
+        :param sparkContext: The SparkContext to wrap.
+        :param hiveContext: An optional JVM Scala HiveContext. If set, we do not instatiate a new
+        HiveContext in the JVM, instead we make all calls to this object.
+        """
+        SQLContext.__init__(self, sparkContext)
+
+        if hiveContext:
+            self._scala_HiveContext = hiveContext
+
     @property
     def _ssql_ctx(self):
         try:
@@ -234,48 +1666,38 @@ def _ssql_ctx(self):
                 self._scala_HiveContext = self._get_hive_ctx()
             return self._scala_HiveContext
         except Py4JError as e:
-            raise Exception("You must build Spark with Hive. Export 'SPARK_HIVE=true' and run " \
-                            "sbt/sbt assembly" , e)
+            raise Exception("You must build Spark with Hive. "
+                            "Export 'SPARK_HIVE=true' and run "
+                            "sbt/sbt assembly", e)
 
     def _get_hive_ctx(self):
         return self._jvm.HiveContext(self._jsc.sc())
 
     def hiveql(self, hqlQuery):
         """
-        Runs a query expressed in HiveQL, returning the result as a L{SchemaRDD}.
+        DEPRECATED: Use sql()
         """
-        return SchemaRDD(self._ssql_ctx.hiveql(hqlQuery), self)
+        warnings.warn("hiveql() is deprecated as the sql function now parses using HiveQL by" +
+                      "default. The SQL dialect for parsing can be set using 'spark.sql.dialect'",
+                      DeprecationWarning)
+        return SchemaRDD(self._ssql_ctx.hiveql(hqlQuery).toJavaSchemaRDD(), self)
 
     def hql(self, hqlQuery):
         """
-        Runs a query expressed in HiveQL, returning the result as a L{SchemaRDD}.
+        DEPRECATED: Use sql()
         """
+        warnings.warn("hql() is deprecated as the sql function now parses using HiveQL by" +
+                      "default. The SQL dialect for parsing can be set using 'spark.sql.dialect'",
+                      DeprecationWarning)
         return self.hiveql(hqlQuery)
 
 
 class LocalHiveContext(HiveContext):
-    """Starts up an instance of hive where metadata is stored locally.
-
-    An in-process metadata data is created with data stored in ./metadata.
-    Warehouse data is stored in in ./warehouse.
-
-    >>> import os
-    >>> hiveCtx = LocalHiveContext(sc)
-    >>> try:
-    ...     supress = hiveCtx.hql("DROP TABLE src")
-    ... except Exception:
-    ...     pass
-    >>> kv1 = os.path.join(os.environ["SPARK_HOME"], 'examples/src/main/resources/kv1.txt')
-    >>> supress = hiveCtx.hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
-    >>> supress = hiveCtx.hql("LOAD DATA LOCAL INPATH '%s' INTO TABLE src" % kv1)
-    >>> results = hiveCtx.hql("FROM src SELECT value").map(lambda r: int(r.value.split('_')[1]))
-    >>> num = results.count()
-    >>> reduce_sum = results.reduce(lambda x, y: x + y)
-    >>> num
-    500
-    >>> reduce_sum
-    130091
-    """
+
+    def __init__(self, sparkContext, sqlContext=None):
+        HiveContext.__init__(self, sparkContext, sqlContext)
+        warnings.warn("LocalHiveContext is deprecated. "
+                      "Use HiveContext instead.", DeprecationWarning)
 
     def _get_hive_ctx(self):
         return self._jvm.LocalHiveContext(self._jsc.sc())
@@ -287,49 +1709,138 @@ def _get_hive_ctx(self):
         return self._jvm.TestHiveContext(self._jsc.sc())
 
 
-# TODO: Investigate if it is more efficient to use a namedtuple. One problem is that named tuples
-# are custom classes that must be generated per Schema.
-class Row(dict):
-    """A row in L{SchemaRDD}.
+def _create_row(fields, values):
+    row = Row(*values)
+    row.__FIELDS__ = fields
+    return row
 
-    An extended L{dict} that takes a L{dict} in its constructor, and
-    exposes those items as fields.
 
-    >>> r = Row({"hello" : "world", "foo" : "bar"})
-    >>> r.hello
-    'world'
-    >>> r.foo
-    'bar'
+class Row(tuple):
+
     """
+    A row in L{SchemaRDD}. The fields in it can be accessed like attributes.
+
+    Row can be used to create a row object by using named arguments,
+    the fields will be sorted by names.
+
+    >>> row = Row(name="Alice", age=11)
+    >>> row
+    Row(age=11, name='Alice')
+    >>> row.name, row.age
+    ('Alice', 11)
+
+    Row also can be used to create another Row like class, then it
+    could be used to create Row objects, such as
+
+    >>> Person = Row("name", "age")
+    >>> Person
+    <Row(name, age)>
+    >>> Person("Alice", 11)
+    Row(name='Alice', age=11)
+    """
+
+    def __new__(self, *args, **kwargs):
+        if args and kwargs:
+            raise ValueError("Can not use both args "
+                             "and kwargs to create Row")
+        if args:
+            # create row class or objects
+            return tuple.__new__(self, args)
+
+        elif kwargs:
+            # create row objects
+            names = sorted(kwargs.keys())
+            values = tuple(kwargs[n] for n in names)
+            row = tuple.__new__(self, values)
+            row.__FIELDS__ = names
+            return row
+
+        else:
+            raise ValueError("No args or kwargs")
+
+    def asDict(self):
+        """
+        Return as an dict
+        """
+        if not hasattr(self, "__FIELDS__"):
+            raise TypeError("Cannot convert a Row class into dict")
+        return dict(zip(self.__FIELDS__, self))
+
+    # let obect acs like class
+    def __call__(self, *args):
+        """create new Row object"""
+        return _create_row(self, args)
+
+    def __getattr__(self, item):
+        if item.startswith("__"):
+            raise AttributeError(item)
+        try:
+            # it will be slow when it has many fields,
+            # but this will not be used in normal cases
+            idx = self.__FIELDS__.index(item)
+            return self[idx]
+        except IndexError:
+            raise AttributeError(item)
+
+    def __reduce__(self):
+        if hasattr(self, "__FIELDS__"):
+            return (_create_row, (self.__FIELDS__, tuple(self)))
+        else:
+            return tuple.__reduce__(self)
+
+    def __repr__(self):
+        if hasattr(self, "__FIELDS__"):
+            return "Row(%s)" % ", ".join("%s=%r" % (k, v)
+                                         for k, v in zip(self.__FIELDS__, self))
+        else:
+            return "<Row(%s)>" % ", ".join(self)
+
 
-    def __init__(self, d):
-        d.update(self.__dict__)
-        self.__dict__ = d
-        dict.__init__(self, d)
+def inherit_doc(cls):
+    for name, func in vars(cls).items():
+        # only inherit docstring for public functions
+        if name.startswith("_"):
+            continue
+        if not func.__doc__:
+            for parent in cls.__bases__:
+                parent_func = getattr(parent, name, None)
+                if parent_func and getattr(parent_func, "__doc__", None):
+                    func.__doc__ = parent_func.__doc__
+                    break
+    return cls
 
 
+@inherit_doc
 class SchemaRDD(RDD):
+
     """An RDD of L{Row} objects that has an associated schema.
 
     The underlying JVM object is a SchemaRDD, not a PythonRDD, so we can
-    utilize the relational query api exposed by SparkSQL.
+    utilize the relational query api exposed by Spark SQL.
 
     For normal L{pyspark.rdd.RDD} operations (map, count, etc.) the
     L{SchemaRDD} is not operated on directly, as it's underlying
     implementation is an RDD composed of Java objects. Instead it is
     converted to a PythonRDD in the JVM, on which Python operations can
     be done.
+
+    This class receives raw tuples from Java but assigns a class to it in
+    all its data-collection methods (mapPartitionsWithIndex, collect, take,
+    etc) so that PySpark sees them as Row objects with named fields.
     """
 
     def __init__(self, jschema_rdd, sql_ctx):
         self.sql_ctx = sql_ctx
         self._sc = sql_ctx._sc
+        clsName = jschema_rdd.getClass().getName()
+        assert clsName.endswith("JavaSchemaRDD"), "jschema_rdd must be JavaSchemaRDD"
         self._jschema_rdd = jschema_rdd
-
+        self._id = None
         self.is_cached = False
         self.is_checkpointed = False
         self.ctx = self.sql_ctx._sc
-        self._jrdd_deserializer = self.ctx.serializer
+        # the _jrdd is created by javaToPython(), serialized by pickle
+        self._jrdd_deserializer = AutoBatchedSerializer(PickleSerializer())
 
     @property
     def _jrdd(self):
@@ -339,12 +1850,25 @@ def _jrdd(self):
         L{pyspark.rdd.RDD} super class (map, filter, etc.).
         """
         if not hasattr(self, '_lazy_jrdd'):
-            self._lazy_jrdd = self._toPython()._jrdd
+            self._lazy_jrdd = self._jschema_rdd.baseSchemaRDD().javaToPython()
         return self._lazy_jrdd
 
-    @property
-    def _id(self):
-        return self._jrdd.id()
+    def id(self):
+        if self._id is None:
+            self._id = self._jrdd.id()
+        return self._id
+
+    def limit(self, num):
+        """Limit the result count to the number specified.
+
+        >>> srdd = sqlCtx.inferSchema(rdd)
+        >>> srdd.limit(2).collect()
+        [Row(field1=1, field2=u'row1'), Row(field1=2, field2=u'row2')]
+        >>> srdd.limit(0).collect()
+        []
+        """
+        rdd = self._jschema_rdd.baseSchemaRDD().limit(num).toJavaSchemaRDD()
+        return SchemaRDD(rdd, self.sql_ctx)
 
     def saveAsParquetFile(self, path):
         """Save the contents as a Parquet file, preserving the schema.
@@ -363,21 +1887,26 @@ def saveAsParquetFile(self, path):
         """
         self._jschema_rdd.saveAsParquetFile(path)
 
-    def registerAsTable(self, name):
+    def registerTempTable(self, name):
         """Registers this RDD as a temporary table using the given name.
 
         The lifetime of this temporary table is tied to the L{SQLContext}
         that was used to create this SchemaRDD.
 
         >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> srdd.registerAsTable("test")
+        >>> srdd.registerTempTable("test")
         >>> srdd2 = sqlCtx.sql("select * from test")
         >>> sorted(srdd.collect()) == sorted(srdd2.collect())
         True
         """
-        self._jschema_rdd.registerAsTable(name)
+        self._jschema_rdd.registerTempTable(name)
+
+    def registerAsTable(self, name):
+        """DEPRECATED: use registerTempTable() instead"""
+        warnings.warn("Use registerTempTable instead of registerAsTable.", DeprecationWarning)
+        self.registerTempTable(name)
 
-    def insertInto(self, tableName, overwrite = False):
+    def insertInto(self, tableName, overwrite=False):
         """Inserts the contents of this SchemaRDD into the specified table.
 
         Optionally overwriting any existing data.
@@ -388,6 +1917,11 @@ def saveAsTable(self, tableName):
         """Creates a new table with the contents of this SchemaRDD."""
         self._jschema_rdd.saveAsTable(tableName)
 
+    def schema(self):
+        """Returns the schema of this SchemaRDD (represented by
+        a L{StructType})."""
+        return _parse_datatype_json_string(self._jschema_rdd.baseSchemaRDD().schema().json())
+
     def schemaString(self):
         """Returns the output schema in the tree format."""
         return self._jschema_rdd.schemaString()
@@ -411,33 +1945,81 @@ def count(self):
         """
         return self._jschema_rdd.count()
 
-    def _toPython(self):
-        # We have to import the Row class explicitly, so that the reference Pickler has is
-        # pyspark.sql.Row instead of __main__.Row
-        from pyspark.sql import Row
-        jrdd = self._jschema_rdd.javaToPython()
-        # TODO: This is inefficient, we should construct the Python Row object
-        # in Java land in the javaToPython function. May require a custom
-        # pickle serializer in Pyrolite
-        return RDD(jrdd, self._sc, BatchedSerializer(
-                        PickleSerializer())).map(lambda d: Row(d))
-
-    # We override the default cache/persist/checkpoint behavior as we want to cache the underlying
-    # SchemaRDD object in the JVM, not the PythonRDD checkpointed by the super class
+    def collect(self):
+        """Return a list that contains all of the rows in this RDD.
+
+        Each object in the list is a Row, the fields can be accessed as
+        attributes.
+
+        Unlike the base RDD implementation of collect, this implementation
+        leverages the query optimizer to perform a collect on the SchemaRDD,
+        which supports features such as filter pushdown.
+
+        >>> srdd = sqlCtx.inferSchema(rdd)
+        >>> srdd.collect()
+        [Row(field1=1, field2=u'row1'), ..., Row(field1=3, field2=u'row3')]
+        """
+        with SCCallSiteSync(self.context) as css:
+            bytesInJava = self._jschema_rdd.baseSchemaRDD().collectToPython().iterator()
+        cls = _create_cls(self.schema())
+        return map(cls, self._collect_iterator_through_file(bytesInJava))
+
+    def take(self, num):
+        """Take the first num rows of the RDD.
+
+        Each object in the list is a Row, the fields can be accessed as
+        attributes.
+
+        Unlike the base RDD implementation of take, this implementation
+        leverages the query optimizer to perform a collect on a SchemaRDD,
+        which supports features such as filter pushdown.
+
+        >>> srdd = sqlCtx.inferSchema(rdd)
+        >>> srdd.take(2)
+        [Row(field1=1, field2=u'row1'), Row(field1=2, field2=u'row2')]
+        """
+        return self.limit(num).collect()
+
+    # Convert each object in the RDD to a Row with the right class
+    # for this SchemaRDD, so that fields can be accessed as attributes.
+    def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
+        """
+        Return a new RDD by applying a function to each partition of this RDD,
+        while tracking the index of the original partition.
+
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
+        >>> def f(splitIndex, iterator): yield splitIndex
+        >>> rdd.mapPartitionsWithIndex(f).sum()
+        6
+        """
+        rdd = RDD(self._jrdd, self._sc, self._jrdd_deserializer)
+
+        schema = self.schema()
+
+        def applySchema(_, it):
+            cls = _create_cls(schema)
+            return itertools.imap(cls, it)
+
+        objrdd = rdd.mapPartitionsWithIndex(applySchema, preservesPartitioning)
+        return objrdd.mapPartitionsWithIndex(f, preservesPartitioning)
+
+    # We override the default cache/persist/checkpoint behavior
+    # as we want to cache the underlying SchemaRDD object in the JVM,
+    # not the PythonRDD checkpointed by the super class
     def cache(self):
         self.is_cached = True
         self._jschema_rdd.cache()
         return self
 
-    def persist(self, storageLevel):
+    def persist(self, storageLevel=StorageLevel.MEMORY_ONLY_SER):
         self.is_cached = True
         javaStorageLevel = self.ctx._getJavaStorageLevel(storageLevel)
         self._jschema_rdd.persist(javaStorageLevel)
         return self
 
-    def unpersist(self):
+    def unpersist(self, blocking=True):
         self.is_cached = False
-        self._jschema_rdd.unpersist()
+        self._jschema_rdd.unpersist(blocking)
         return self
 
     def checkpoint(self):
@@ -449,17 +2031,18 @@ def isCheckpointed(self):
 
     def getCheckpointFile(self):
         checkpointFile = self._jschema_rdd.getCheckpointFile()
-        if checkpointFile.isDefined():
+        if checkpointFile.isPresent():
             return checkpointFile.get()
-        else:
-            return None
 
     def coalesce(self, numPartitions, shuffle=False):
         rdd = self._jschema_rdd.coalesce(numPartitions, shuffle)
         return SchemaRDD(rdd, self.sql_ctx)
 
-    def distinct(self):
-        rdd = self._jschema_rdd.distinct()
+    def distinct(self, numPartitions=None):
+        if numPartitions is None:
+            rdd = self._jschema_rdd.distinct()
+        else:
+            rdd = self._jschema_rdd.distinct(numPartitions)
         return SchemaRDD(rdd, self.sql_ctx)
 
     def intersection(self, other):
@@ -478,35 +2061,42 @@ def subtract(self, other, numPartitions=None):
             if numPartitions is None:
                 rdd = self._jschema_rdd.subtract(other._jschema_rdd)
             else:
-                rdd = self._jschema_rdd.subtract(other._jschema_rdd, numPartitions)
+                rdd = self._jschema_rdd.subtract(other._jschema_rdd,
+                                                 numPartitions)
             return SchemaRDD(rdd, self.sql_ctx)
         else:
             raise ValueError("Can only subtract another SchemaRDD")
 
+
 def _test():
     import doctest
-    from array import array
     from pyspark.context import SparkContext
-    globs = globals().copy()
-    # The small batch size here ensures that we see multiple batches,
-    # even in these small test examples:
-    sc = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    # let doctest run in pyspark.sql, so DataTypes can be picklable
+    import pyspark.sql
+    from pyspark.sql import Row, SQLContext
+    from pyspark.tests import ExamplePoint, ExamplePointUDT
+    globs = pyspark.sql.__dict__.copy()
+    sc = SparkContext('local[4]', 'PythonTest')
     globs['sc'] = sc
     globs['sqlCtx'] = SQLContext(sc)
-    globs['rdd'] = sc.parallelize([{"field1" : 1, "field2" : "row1"},
-        {"field1" : 2, "field2": "row2"}, {"field1" : 3, "field2": "row3"}])
-    jsonStrings = ['{"field1": 1, "field2": "row1", "field3":{"field4":11}}',
-       '{"field1" : 2, "field3":{"field4":22, "field5": [10, 11]}, "field6":[{"field7": "row2"}]}',
-       '{"field1" : null, "field2": "row3", "field3":{"field4":33, "field5": []}}']
+    globs['rdd'] = sc.parallelize(
+        [Row(field1=1, field2="row1"),
+         Row(field1=2, field2="row2"),
+         Row(field1=3, field2="row3")]
+    )
+    globs['ExamplePoint'] = ExamplePoint
+    globs['ExamplePointUDT'] = ExamplePointUDT
+    jsonStrings = [
+        '{"field1": 1, "field2": "row1", "field3":{"field4":11}}',
+        '{"field1" : 2, "field3":{"field4":22, "field5": [10, 11]},'
+        '"field6":[{"field7": "row2"}]}',
+        '{"field1" : null, "field2": "row3", '
+        '"field3":{"field4":33, "field5": []}}'
+    ]
     globs['jsonStrings'] = jsonStrings
     globs['json'] = sc.parallelize(jsonStrings)
-    globs['nestedRdd1'] = sc.parallelize([
-        {"f1" : array('i', [1, 2]), "f2" : {"row1" : 1.0}},
-        {"f1" : array('i', [2, 3]), "f2" : {"row2" : 2.0}}])
-    globs['nestedRdd2'] = sc.parallelize([
-        {"f1" : [[1, 2], [2, 3]], "f2" : set([1, 2]), "f3" : (1, 2)},
-        {"f1" : [[2, 3], [3, 4]], "f2" : set([2, 3]), "f3" : (2, 3)}])
-    (failure_count, test_count) = doctest.testmod(globs=globs,optionflags=doctest.ELLIPSIS)
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql, globs=globs, optionflags=doctest.ELLIPSIS)
     globs['sc'].stop()
     if failure_count:
         exit(-1)
@@ -514,4 +2104,3 @@ def _test():
 
 if __name__ == "__main__":
     _test()
-
diff --git a/python/pyspark/statcounter.py b/python/pyspark/statcounter.py
index 080325061a697..1e597d64e03fe 100644
--- a/python/pyspark/statcounter.py
+++ b/python/pyspark/statcounter.py
@@ -20,29 +20,35 @@
 import copy
 import math
 
+try:
+    from numpy import maximum, minimum, sqrt
+except ImportError:
+    maximum = max
+    minimum = min
+    sqrt = math.sqrt
+
+
 class StatCounter(object):
-    
+
     def __init__(self, values=[]):
         self.n = 0L    # Running count of our values
         self.mu = 0.0  # Running mean of our values
         self.m2 = 0.0  # Running variance numerator (sum of (x - mean)^2)
         self.maxValue = float("-inf")
         self.minValue = float("inf")
-        
+
         for v in values:
             self.merge(v)
-            
+
     # Add a value into this StatCounter, updating the internal statistics.
     def merge(self, value):
         delta = value - self.mu
         self.n += 1
         self.mu += delta / self.n
         self.m2 += delta * (value - self.mu)
-        if self.maxValue < value:
-            self.maxValue = value
-        if self.minValue > value:
-            self.minValue = value
-            
+        self.maxValue = maximum(self.maxValue, value)
+        self.minValue = minimum(self.minValue, value)
+
         return self
 
     # Merge another StatCounter into this one, adding up the internal statistics.
@@ -50,7 +56,7 @@ def mergeStats(self, other):
         if not isinstance(other, StatCounter):
             raise Exception("Can only merge Statcounters!")
 
-        if other is self: # reference equality holds
+        if other is self:  # reference equality holds
             self.merge(copy.deepcopy(other))  # Avoid overwriting fields in a weird order
         else:
             if self.n == 0:
@@ -59,8 +65,8 @@ def mergeStats(self, other):
                 self.n = other.n
                 self.maxValue = other.maxValue
                 self.minValue = other.minValue
-                
-            elif other.n != 0:        
+
+            elif other.n != 0:
                 delta = other.mu - self.mu
                 if other.n * 10 < self.n:
                     self.mu = self.mu + (delta * other.n) / (self.n + other.n)
@@ -68,10 +74,10 @@ def mergeStats(self, other):
                     self.mu = other.mu - (delta * self.n) / (self.n + other.n)
                 else:
                     self.mu = (self.mu * self.n + other.mu * other.n) / (self.n + other.n)
-                
-                    self.maxValue = max(self.maxValue, other.maxValue)
-                    self.minValue = min(self.minValue, other.minValue)
-        
+
+                self.maxValue = maximum(self.maxValue, other.maxValue)
+                self.minValue = minimum(self.minValue, other.minValue)
+
                 self.m2 += other.m2 + (delta * delta * self.n * other.n) / (self.n + other.n)
                 self.n += other.n
         return self
@@ -94,7 +100,7 @@ def min(self):
 
     def max(self):
         return self.maxValue
-    
+
     # Return the variance of the values.
     def variance(self):
         if self.n == 0:
@@ -114,15 +120,15 @@ def sampleVariance(self):
 
     # Return the standard deviation of the values.
     def stdev(self):
-        return math.sqrt(self.variance())
+        return sqrt(self.variance())
 
     #
     # Return the sample standard deviation of the values, which corrects for bias in estimating the
     # variance by dividing by N-1 instead of N.
     #
     def sampleStdev(self):
-        return math.sqrt(self.sampleVariance())
+        return sqrt(self.sampleVariance())
 
     def __repr__(self):
-        return "(count: %s, mean: %s, stdev: %s, max: %s, min: %s)" % (self.count(), self.mean(), self.stdev(), self.max(), self.min())
-
+        return ("(count: %s, mean: %s, stdev: %s, max: %s, min: %s)" %
+                (self.count(), self.mean(), self.stdev(), self.max(), self.min()))
diff --git a/python/pyspark/storagelevel.py b/python/pyspark/storagelevel.py
index 3a18ea54eae4c..676aa0f7144aa 100644
--- a/python/pyspark/storagelevel.py
+++ b/python/pyspark/storagelevel.py
@@ -17,7 +17,9 @@
 
 __all__ = ["StorageLevel"]
 
-class StorageLevel:
+
+class StorageLevel(object):
+
     """
     Flags for controlling the storage of an RDD. Each StorageLevel records whether to use memory,
     whether to drop the RDD to disk if it falls out of memory, whether to keep the data in memory
@@ -25,7 +27,7 @@ class StorageLevel:
     Also contains static constants for some commonly used storage levels, such as MEMORY_ONLY.
     """
 
-    def __init__(self, useDisk, useMemory, useOffHeap, deserialized, replication = 1):
+    def __init__(self, useDisk, useMemory, useOffHeap, deserialized, replication=1):
         self.useDisk = useDisk
         self.useMemory = useMemory
         self.useOffHeap = useOffHeap
@@ -55,4 +57,4 @@ def __str__(self):
 StorageLevel.MEMORY_AND_DISK_2 = StorageLevel(True, True, False, True, 2)
 StorageLevel.MEMORY_AND_DISK_SER = StorageLevel(True, True, False, False)
 StorageLevel.MEMORY_AND_DISK_SER_2 = StorageLevel(True, True, False, False, 2)
-StorageLevel.OFF_HEAP = StorageLevel(False, False, True, False, 1)
\ No newline at end of file
+StorageLevel.OFF_HEAP = StorageLevel(False, False, True, False, 1)
diff --git a/python/pyspark/streaming/__init__.py b/python/pyspark/streaming/__init__.py
new file mode 100644
index 0000000000000..d2644a1d4ffab
--- /dev/null
+++ b/python/pyspark/streaming/__init__.py
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.streaming.context import StreamingContext
+from pyspark.streaming.dstream import DStream
+
+__all__ = ['StreamingContext', 'DStream']
diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py
new file mode 100644
index 0000000000000..2f53fbd27b17a
--- /dev/null
+++ b/python/pyspark/streaming/context.py
@@ -0,0 +1,325 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import sys
+
+from py4j.java_collections import ListConverter
+from py4j.java_gateway import java_import, JavaObject
+
+from pyspark import RDD, SparkConf
+from pyspark.serializers import UTF8Deserializer, CloudPickleSerializer
+from pyspark.context import SparkContext
+from pyspark.storagelevel import StorageLevel
+from pyspark.streaming.dstream import DStream
+from pyspark.streaming.util import TransformFunction, TransformFunctionSerializer
+
+__all__ = ["StreamingContext"]
+
+
+def _daemonize_callback_server():
+    """
+    Hack Py4J to daemonize callback server
+
+    The thread of callback server has daemon=False, it will block the driver
+    from exiting if it's not shutdown. The following code replace `start()`
+    of CallbackServer with a new version, which set daemon=True for this
+    thread.
+
+    Also, it will update the port number (0) with real port
+    """
+    # TODO: create a patch for Py4J
+    import socket
+    import py4j.java_gateway
+    logger = py4j.java_gateway.logger
+    from py4j.java_gateway import Py4JNetworkError
+    from threading import Thread
+
+    def start(self):
+        """Starts the CallbackServer. This method should be called by the
+        client instead of run()."""
+        self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        self.server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR,
+                                      1)
+        try:
+            self.server_socket.bind((self.address, self.port))
+            if not self.port:
+                # update port with real port
+                self.port = self.server_socket.getsockname()[1]
+        except Exception as e:
+            msg = 'An error occurred while trying to start the callback server: %s' % e
+            logger.exception(msg)
+            raise Py4JNetworkError(msg)
+
+        # Maybe thread needs to be cleanup up?
+        self.thread = Thread(target=self.run)
+        self.thread.daemon = True
+        self.thread.start()
+
+    py4j.java_gateway.CallbackServer.start = start
+
+
+class StreamingContext(object):
+    """
+    Main entry point for Spark Streaming functionality. A StreamingContext
+    represents the connection to a Spark cluster, and can be used to create
+    L{DStream} various input sources. It can be from an existing L{SparkContext}.
+    After creating and transforming DStreams, the streaming computation can
+    be started and stopped using `context.start()` and `context.stop()`,
+    respectively. `context.awaitTermination()` allows the current thread
+    to wait for the termination of the context by `stop()` or by an exception.
+    """
+    _transformerSerializer = None
+
+    def __init__(self, sparkContext, batchDuration=None, jssc=None):
+        """
+        Create a new StreamingContext.
+
+        @param sparkContext: L{SparkContext} object.
+        @param batchDuration: the time interval (in seconds) at which streaming
+                              data will be divided into batches
+        """
+
+        self._sc = sparkContext
+        self._jvm = self._sc._jvm
+        self._jssc = jssc or self._initialize_context(self._sc, batchDuration)
+
+    def _initialize_context(self, sc, duration):
+        self._ensure_initialized()
+        return self._jvm.JavaStreamingContext(sc._jsc, self._jduration(duration))
+
+    def _jduration(self, seconds):
+        """
+        Create Duration object given number of seconds
+        """
+        return self._jvm.Duration(int(seconds * 1000))
+
+    @classmethod
+    def _ensure_initialized(cls):
+        SparkContext._ensure_initialized()
+        gw = SparkContext._gateway
+
+        java_import(gw.jvm, "org.apache.spark.streaming.*")
+        java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
+        java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")
+
+        # start callback server
+        # getattr will fallback to JVM, so we cannot test by hasattr()
+        if "_callback_server" not in gw.__dict__:
+            _daemonize_callback_server()
+            # use random port
+            gw._start_callback_server(0)
+            # gateway with real port
+            gw._python_proxy_port = gw._callback_server.port
+            # get the GatewayServer object in JVM by ID
+            jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
+            # update the port of CallbackClient with real port
+            gw.jvm.PythonDStream.updatePythonGatewayPort(jgws, gw._python_proxy_port)
+
+        # register serializer for TransformFunction
+        # it happens before creating SparkContext when loading from checkpointing
+        cls._transformerSerializer = TransformFunctionSerializer(
+            SparkContext._active_spark_context, CloudPickleSerializer(), gw)
+
+    @classmethod
+    def getOrCreate(cls, checkpointPath, setupFunc):
+        """
+        Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
+        If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
+        recreated from the checkpoint data. If the data does not exist, then the provided setupFunc
+        will be used to create a JavaStreamingContext.
+
+        @param checkpointPath Checkpoint directory used in an earlier JavaStreamingContext program
+        @param setupFunc      Function to create a new JavaStreamingContext and setup DStreams
+        """
+        # TODO: support checkpoint in HDFS
+        if not os.path.exists(checkpointPath) or not os.listdir(checkpointPath):
+            ssc = setupFunc()
+            ssc.checkpoint(checkpointPath)
+            return ssc
+
+        cls._ensure_initialized()
+        gw = SparkContext._gateway
+
+        try:
+            jssc = gw.jvm.JavaStreamingContext(checkpointPath)
+        except Exception:
+            print >>sys.stderr, "failed to load StreamingContext from checkpoint"
+            raise
+
+        jsc = jssc.sparkContext()
+        conf = SparkConf(_jconf=jsc.getConf())
+        sc = SparkContext(conf=conf, gateway=gw, jsc=jsc)
+        # update ctx in serializer
+        SparkContext._active_spark_context = sc
+        cls._transformerSerializer.ctx = sc
+        return StreamingContext(sc, None, jssc)
+
+    @property
+    def sparkContext(self):
+        """
+        Return SparkContext which is associated with this StreamingContext.
+        """
+        return self._sc
+
+    def start(self):
+        """
+        Start the execution of the streams.
+        """
+        self._jssc.start()
+
+    def awaitTermination(self, timeout=None):
+        """
+        Wait for the execution to stop.
+        @param timeout: time to wait in seconds
+        """
+        if timeout is None:
+            self._jssc.awaitTermination()
+        else:
+            self._jssc.awaitTermination(int(timeout * 1000))
+
+    def stop(self, stopSparkContext=True, stopGraceFully=False):
+        """
+        Stop the execution of the streams, with option of ensuring all
+        received data has been processed.
+
+        @param stopSparkContext: Stop the associated SparkContext or not
+        @param stopGracefully: Stop gracefully by waiting for the processing
+                              of all received data to be completed
+        """
+        self._jssc.stop(stopSparkContext, stopGraceFully)
+        if stopSparkContext:
+            self._sc.stop()
+
+    def remember(self, duration):
+        """
+        Set each DStreams in this context to remember RDDs it generated
+        in the last given duration. DStreams remember RDDs only for a
+        limited duration of time and releases them for garbage collection.
+        This method allows the developer to specify how to long to remember
+        the RDDs (if the developer wishes to query old data outside the
+        DStream computation).
+
+        @param duration: Minimum duration (in seconds) that each DStream
+                        should remember its RDDs
+        """
+        self._jssc.remember(self._jduration(duration))
+
+    def checkpoint(self, directory):
+        """
+        Sets the context to periodically checkpoint the DStream operations for master
+        fault-tolerance. The graph will be checkpointed every batch interval.
+
+        @param directory: HDFS-compatible directory where the checkpoint data
+                         will be reliably stored
+        """
+        self._jssc.checkpoint(directory)
+
+    def socketTextStream(self, hostname, port, storageLevel=StorageLevel.MEMORY_AND_DISK_SER_2):
+        """
+        Create an input from TCP source hostname:port. Data is received using
+        a TCP socket and receive byte is interpreted as UTF8 encoded ``\\n`` delimited
+        lines.
+
+        @param hostname:      Hostname to connect to for receiving data
+        @param port:          Port to connect to for receiving data
+        @param storageLevel:  Storage level to use for storing the received objects
+        """
+        jlevel = self._sc._getJavaStorageLevel(storageLevel)
+        return DStream(self._jssc.socketTextStream(hostname, port, jlevel), self,
+                       UTF8Deserializer())
+
+    def textFileStream(self, directory):
+        """
+        Create an input stream that monitors a Hadoop-compatible file system
+        for new files and reads them as text files. Files must be wrriten to the
+        monitored directory by "moving" them from another location within the same
+        file system. File names starting with . are ignored.
+        """
+        return DStream(self._jssc.textFileStream(directory), self, UTF8Deserializer())
+
+    def _check_serializers(self, rdds):
+        # make sure they have same serializer
+        if len(set(rdd._jrdd_deserializer for rdd in rdds)) > 1:
+            for i in range(len(rdds)):
+                # reset them to sc.serializer
+                rdds[i] = rdds[i]._reserialize()
+
+    def queueStream(self, rdds, oneAtATime=True, default=None):
+        """
+        Create an input stream from an queue of RDDs or list. In each batch,
+        it will process either one or all of the RDDs returned by the queue.
+
+        NOTE: changes to the queue after the stream is created will not be recognized.
+
+        @param rdds:       Queue of RDDs
+        @param oneAtATime: pick one rdd each time or pick all of them once.
+        @param default:    The default rdd if no more in rdds
+        """
+        if default and not isinstance(default, RDD):
+            default = self._sc.parallelize(default)
+
+        if not rdds and default:
+            rdds = [rdds]
+
+        if rdds and not isinstance(rdds[0], RDD):
+            rdds = [self._sc.parallelize(input) for input in rdds]
+        self._check_serializers(rdds)
+
+        jrdds = ListConverter().convert([r._jrdd for r in rdds],
+                                        SparkContext._gateway._gateway_client)
+        queue = self._jvm.PythonDStream.toRDDQueue(jrdds)
+        if default:
+            default = default._reserialize(rdds[0]._jrdd_deserializer)
+            jdstream = self._jssc.queueStream(queue, oneAtATime, default._jrdd)
+        else:
+            jdstream = self._jssc.queueStream(queue, oneAtATime)
+        return DStream(jdstream, self, rdds[0]._jrdd_deserializer)
+
+    def transform(self, dstreams, transformFunc):
+        """
+        Create a new DStream in which each RDD is generated by applying
+        a function on RDDs of the DStreams. The order of the JavaRDDs in
+        the transform function parameter will be the same as the order
+        of corresponding DStreams in the list.
+        """
+        jdstreams = ListConverter().convert([d._jdstream for d in dstreams],
+                                            SparkContext._gateway._gateway_client)
+        # change the final serializer to sc.serializer
+        func = TransformFunction(self._sc,
+                                 lambda t, *rdds: transformFunc(rdds).map(lambda x: x),
+                                 *[d._jrdd_deserializer for d in dstreams])
+        jfunc = self._jvm.TransformFunction(func)
+        jdstream = self._jssc.transform(jdstreams, jfunc)
+        return DStream(jdstream, self, self._sc.serializer)
+
+    def union(self, *dstreams):
+        """
+        Create a unified DStream from multiple DStreams of the same
+        type and same slide duration.
+        """
+        if not dstreams:
+            raise ValueError("should have at least one DStream to union")
+        if len(dstreams) == 1:
+            return dstreams[0]
+        if len(set(s._jrdd_deserializer for s in dstreams)) > 1:
+            raise ValueError("All DStreams should have same serializer")
+        if len(set(s._slideDuration for s in dstreams)) > 1:
+            raise ValueError("All DStreams should have same slide duration")
+        first = dstreams[0]
+        jrest = ListConverter().convert([d._jdstream for d in dstreams[1:]],
+                                        SparkContext._gateway._gateway_client)
+        return DStream(self._jssc.union(first._jdstream, jrest), self, first._jrdd_deserializer)
diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py
new file mode 100644
index 0000000000000..0826ddc56e844
--- /dev/null
+++ b/python/pyspark/streaming/dstream.py
@@ -0,0 +1,623 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from itertools import chain, ifilter, imap
+import operator
+import time
+from datetime import datetime
+
+from py4j.protocol import Py4JJavaError
+
+from pyspark import RDD
+from pyspark.storagelevel import StorageLevel
+from pyspark.streaming.util import rddToFileName, TransformFunction
+from pyspark.rdd import portable_hash
+from pyspark.resultiterable import ResultIterable
+
+__all__ = ["DStream"]
+
+
+class DStream(object):
+    """
+    A Discretized Stream (DStream), the basic abstraction in Spark Streaming,
+    is a continuous sequence of RDDs (of the same type) representing a
+    continuous stream of data (see L{RDD} in the Spark core documentation
+    for more details on RDDs).
+
+    DStreams can either be created from live data (such as, data from TCP
+    sockets, Kafka, Flume, etc.) using a L{StreamingContext} or it can be
+    generated by transforming existing DStreams using operations such as
+    `map`, `window` and `reduceByKeyAndWindow`. While a Spark Streaming
+    program is running, each DStream periodically generates a RDD, either
+    from live data or by transforming the RDD generated by a parent DStream.
+
+    DStreams internally is characterized by a few basic properties:
+     - A list of other DStreams that the DStream depends on
+     - A time interval at which the DStream generates an RDD
+     - A function that is used to generate an RDD after each time interval
+    """
+    def __init__(self, jdstream, ssc, jrdd_deserializer):
+        self._jdstream = jdstream
+        self._ssc = ssc
+        self._sc = ssc._sc
+        self._jrdd_deserializer = jrdd_deserializer
+        self.is_cached = False
+        self.is_checkpointed = False
+
+    def context(self):
+        """
+        Return the StreamingContext associated with this DStream
+        """
+        return self._ssc
+
+    def count(self):
+        """
+        Return a new DStream in which each RDD has a single element
+        generated by counting each RDD of this DStream.
+        """
+        return self.mapPartitions(lambda i: [sum(1 for _ in i)]).reduce(operator.add)
+
+    def filter(self, f):
+        """
+        Return a new DStream containing only the elements that satisfy predicate.
+        """
+        def func(iterator):
+            return ifilter(f, iterator)
+        return self.mapPartitions(func, True)
+
+    def flatMap(self, f, preservesPartitioning=False):
+        """
+        Return a new DStream by applying a function to all elements of
+        this DStream, and then flattening the results
+        """
+        def func(s, iterator):
+            return chain.from_iterable(imap(f, iterator))
+        return self.mapPartitionsWithIndex(func, preservesPartitioning)
+
+    def map(self, f, preservesPartitioning=False):
+        """
+        Return a new DStream by applying a function to each element of DStream.
+        """
+        def func(iterator):
+            return imap(f, iterator)
+        return self.mapPartitions(func, preservesPartitioning)
+
+    def mapPartitions(self, f, preservesPartitioning=False):
+        """
+        Return a new DStream in which each RDD is generated by applying
+        mapPartitions() to each RDDs of this DStream.
+        """
+        def func(s, iterator):
+            return f(iterator)
+        return self.mapPartitionsWithIndex(func, preservesPartitioning)
+
+    def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
+        """
+        Return a new DStream in which each RDD is generated by applying
+        mapPartitionsWithIndex() to each RDDs of this DStream.
+        """
+        return self.transform(lambda rdd: rdd.mapPartitionsWithIndex(f, preservesPartitioning))
+
+    def reduce(self, func):
+        """
+        Return a new DStream in which each RDD has a single element
+        generated by reducing each RDD of this DStream.
+        """
+        return self.map(lambda x: (None, x)).reduceByKey(func, 1).map(lambda x: x[1])
+
+    def reduceByKey(self, func, numPartitions=None):
+        """
+        Return a new DStream by applying reduceByKey to each RDD.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.combineByKey(lambda x: x, func, func, numPartitions)
+
+    def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
+                     numPartitions=None):
+        """
+        Return a new DStream by applying combineByKey to each RDD.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+
+        def func(rdd):
+            return rdd.combineByKey(createCombiner, mergeValue, mergeCombiners, numPartitions)
+        return self.transform(func)
+
+    def partitionBy(self, numPartitions, partitionFunc=portable_hash):
+        """
+        Return a copy of the DStream in which each RDD are partitioned
+        using the specified partitioner.
+        """
+        return self.transform(lambda rdd: rdd.partitionBy(numPartitions, partitionFunc))
+
+    def foreachRDD(self, func):
+        """
+        Apply a function to each RDD in this DStream.
+        """
+        if func.func_code.co_argcount == 1:
+            old_func = func
+            func = lambda t, rdd: old_func(rdd)
+        jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer)
+        api = self._ssc._jvm.PythonDStream
+        api.callForeachRDD(self._jdstream, jfunc)
+
+    def pprint(self):
+        """
+        Print the first ten elements of each RDD generated in this DStream.
+        """
+        def takeAndPrint(time, rdd):
+            taken = rdd.take(11)
+            print "-------------------------------------------"
+            print "Time: %s" % time
+            print "-------------------------------------------"
+            for record in taken[:10]:
+                print record
+            if len(taken) > 10:
+                print "..."
+            print
+
+        self.foreachRDD(takeAndPrint)
+
+    def mapValues(self, f):
+        """
+        Return a new DStream by applying a map function to the value of
+        each key-value pairs in this DStream without changing the key.
+        """
+        map_values_fn = lambda (k, v): (k, f(v))
+        return self.map(map_values_fn, preservesPartitioning=True)
+
+    def flatMapValues(self, f):
+        """
+        Return a new DStream by applying a flatmap function to the value
+        of each key-value pairs in this DStream without changing the key.
+        """
+        flat_map_fn = lambda (k, v): ((k, x) for x in f(v))
+        return self.flatMap(flat_map_fn, preservesPartitioning=True)
+
+    def glom(self):
+        """
+        Return a new DStream in which RDD is generated by applying glom()
+        to RDD of this DStream.
+        """
+        def func(iterator):
+            yield list(iterator)
+        return self.mapPartitions(func)
+
+    def cache(self):
+        """
+        Persist the RDDs of this DStream with the default storage level
+        (C{MEMORY_ONLY_SER}).
+        """
+        self.is_cached = True
+        self.persist(StorageLevel.MEMORY_ONLY_SER)
+        return self
+
+    def persist(self, storageLevel):
+        """
+        Persist the RDDs of this DStream with the given storage level
+        """
+        self.is_cached = True
+        javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)
+        self._jdstream.persist(javaStorageLevel)
+        return self
+
+    def checkpoint(self, interval):
+        """
+        Enable periodic checkpointing of RDDs of this DStream
+
+        @param interval: time in seconds, after each period of that, generated
+                         RDD will be checkpointed
+        """
+        self.is_checkpointed = True
+        self._jdstream.checkpoint(self._ssc._jduration(interval))
+        return self
+
+    def groupByKey(self, numPartitions=None):
+        """
+        Return a new DStream by applying groupByKey on each RDD.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transform(lambda rdd: rdd.groupByKey(numPartitions))
+
+    def countByValue(self):
+        """
+        Return a new DStream in which each RDD contains the counts of each
+        distinct value in each RDD of this DStream.
+        """
+        return self.map(lambda x: (x, None)).reduceByKey(lambda x, y: None).count()
+
+    def saveAsTextFiles(self, prefix, suffix=None):
+        """
+        Save each RDD in this DStream as at text file, using string
+        representation of elements.
+        """
+        def saveAsTextFile(t, rdd):
+            path = rddToFileName(prefix, suffix, t)
+            try:
+                rdd.saveAsTextFile(path)
+            except Py4JJavaError as e:
+                # after recovered from checkpointing, the foreachRDD may
+                # be called twice
+                if 'FileAlreadyExistsException' not in str(e):
+                    raise
+        return self.foreachRDD(saveAsTextFile)
+
+    # TODO: uncomment this until we have ssc.pickleFileStream()
+    # def saveAsPickleFiles(self, prefix, suffix=None):
+    #     """
+    #     Save each RDD in this DStream as at binary file, the elements are
+    #     serialized by pickle.
+    #     """
+    #     def saveAsPickleFile(t, rdd):
+    #         path = rddToFileName(prefix, suffix, t)
+    #         try:
+    #             rdd.saveAsPickleFile(path)
+    #         except Py4JJavaError as e:
+    #             # after recovered from checkpointing, the foreachRDD may
+    #             # be called twice
+    #             if 'FileAlreadyExistsException' not in str(e):
+    #                 raise
+    #     return self.foreachRDD(saveAsPickleFile)
+
+    def transform(self, func):
+        """
+        Return a new DStream in which each RDD is generated by applying a function
+        on each RDD of this DStream.
+
+        `func` can have one argument of `rdd`, or have two arguments of
+        (`time`, `rdd`)
+        """
+        if func.func_code.co_argcount == 1:
+            oldfunc = func
+            func = lambda t, rdd: oldfunc(rdd)
+        assert func.func_code.co_argcount == 2, "func should take one or two arguments"
+        return TransformedDStream(self, func)
+
+    def transformWith(self, func, other, keepSerializer=False):
+        """
+        Return a new DStream in which each RDD is generated by applying a function
+        on each RDD of this DStream and 'other' DStream.
+
+        `func` can have two arguments of (`rdd_a`, `rdd_b`) or have three
+        arguments of (`time`, `rdd_a`, `rdd_b`)
+        """
+        if func.func_code.co_argcount == 2:
+            oldfunc = func
+            func = lambda t, a, b: oldfunc(a, b)
+        assert func.func_code.co_argcount == 3, "func should take two or three arguments"
+        jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer, other._jrdd_deserializer)
+        dstream = self._sc._jvm.PythonTransformed2DStream(self._jdstream.dstream(),
+                                                          other._jdstream.dstream(), jfunc)
+        jrdd_serializer = self._jrdd_deserializer if keepSerializer else self._sc.serializer
+        return DStream(dstream.asJavaDStream(), self._ssc, jrdd_serializer)
+
+    def repartition(self, numPartitions):
+        """
+        Return a new DStream with an increased or decreased level of parallelism.
+        """
+        return self.transform(lambda rdd: rdd.repartition(numPartitions))
+
+    @property
+    def _slideDuration(self):
+        """
+        Return the slideDuration in seconds of this DStream
+        """
+        return self._jdstream.dstream().slideDuration().milliseconds() / 1000.0
+
+    def union(self, other):
+        """
+        Return a new DStream by unifying data of another DStream with this DStream.
+
+        @param other: Another DStream having the same interval (i.e., slideDuration)
+                     as this DStream.
+        """
+        if self._slideDuration != other._slideDuration:
+            raise ValueError("the two DStream should have same slide duration")
+        return self.transformWith(lambda a, b: a.union(b), other, True)
+
+    def cogroup(self, other, numPartitions=None):
+        """
+        Return a new DStream by applying 'cogroup' between RDDs of this
+        DStream and `other` DStream.
+
+        Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transformWith(lambda a, b: a.cogroup(b, numPartitions), other)
+
+    def join(self, other, numPartitions=None):
+        """
+        Return a new DStream by applying 'join' between RDDs of this DStream and
+        `other` DStream.
+
+        Hash partitioning is used to generate the RDDs with `numPartitions`
+        partitions.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transformWith(lambda a, b: a.join(b, numPartitions), other)
+
+    def leftOuterJoin(self, other, numPartitions=None):
+        """
+        Return a new DStream by applying 'left outer join' between RDDs of this DStream and
+        `other` DStream.
+
+        Hash partitioning is used to generate the RDDs with `numPartitions`
+        partitions.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transformWith(lambda a, b: a.leftOuterJoin(b, numPartitions), other)
+
+    def rightOuterJoin(self, other, numPartitions=None):
+        """
+        Return a new DStream by applying 'right outer join' between RDDs of this DStream and
+        `other` DStream.
+
+        Hash partitioning is used to generate the RDDs with `numPartitions`
+        partitions.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transformWith(lambda a, b: a.rightOuterJoin(b, numPartitions), other)
+
+    def fullOuterJoin(self, other, numPartitions=None):
+        """
+        Return a new DStream by applying 'full outer join' between RDDs of this DStream and
+        `other` DStream.
+
+        Hash partitioning is used to generate the RDDs with `numPartitions`
+        partitions.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transformWith(lambda a, b: a.fullOuterJoin(b, numPartitions), other)
+
+    def _jtime(self, timestamp):
+        """ Convert datetime or unix_timestamp into Time
+        """
+        if isinstance(timestamp, datetime):
+            timestamp = time.mktime(timestamp.timetuple())
+        return self._sc._jvm.Time(long(timestamp * 1000))
+
+    def slice(self, begin, end):
+        """
+        Return all the RDDs between 'begin' to 'end' (both included)
+
+        `begin`, `end` could be datetime.datetime() or unix_timestamp
+        """
+        jrdds = self._jdstream.slice(self._jtime(begin), self._jtime(end))
+        return [RDD(jrdd, self._sc, self._jrdd_deserializer) for jrdd in jrdds]
+
+    def _validate_window_param(self, window, slide):
+        duration = self._jdstream.dstream().slideDuration().milliseconds()
+        if int(window * 1000) % duration != 0:
+            raise ValueError("windowDuration must be multiple of the slide duration (%d ms)"
+                             % duration)
+        if slide and int(slide * 1000) % duration != 0:
+            raise ValueError("slideDuration must be multiple of the slide duration (%d ms)"
+                             % duration)
+
+    def window(self, windowDuration, slideDuration=None):
+        """
+        Return a new DStream in which each RDD contains all the elements in seen in a
+        sliding window of time over this DStream.
+
+        @param windowDuration: width of the window; must be a multiple of this DStream's
+                              batching interval
+        @param slideDuration:  sliding interval of the window (i.e., the interval after which
+                              the new DStream will generate RDDs); must be a multiple of this
+                              DStream's batching interval
+        """
+        self._validate_window_param(windowDuration, slideDuration)
+        d = self._ssc._jduration(windowDuration)
+        if slideDuration is None:
+            return DStream(self._jdstream.window(d), self._ssc, self._jrdd_deserializer)
+        s = self._ssc._jduration(slideDuration)
+        return DStream(self._jdstream.window(d, s), self._ssc, self._jrdd_deserializer)
+
+    def reduceByWindow(self, reduceFunc, invReduceFunc, windowDuration, slideDuration):
+        """
+        Return a new DStream in which each RDD has a single element generated by reducing all
+        elements in a sliding window over this DStream.
+
+        if `invReduceFunc` is not None, the reduction is done incrementally
+        using the old window's reduced value :
+
+        1. reduce the new values that entered the window (e.g., adding new counts)
+
+        2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
+        This is more efficient than `invReduceFunc` is None.
+
+        @param reduceFunc:     associative reduce function
+        @param invReduceFunc:  inverse reduce function of `reduceFunc`
+        @param windowDuration: width of the window; must be a multiple of this DStream's
+                               batching interval
+        @param slideDuration:  sliding interval of the window (i.e., the interval after which
+                               the new DStream will generate RDDs); must be a multiple of this
+                               DStream's batching interval
+        """
+        keyed = self.map(lambda x: (1, x))
+        reduced = keyed.reduceByKeyAndWindow(reduceFunc, invReduceFunc,
+                                             windowDuration, slideDuration, 1)
+        return reduced.map(lambda (k, v): v)
+
+    def countByWindow(self, windowDuration, slideDuration):
+        """
+        Return a new DStream in which each RDD has a single element generated
+        by counting the number of elements in a window over this DStream.
+        windowDuration and slideDuration are as defined in the window() operation.
+
+        This is equivalent to window(windowDuration, slideDuration).count(),
+        but will be more efficient if window is large.
+        """
+        return self.map(lambda x: 1).reduceByWindow(operator.add, operator.sub,
+                                                    windowDuration, slideDuration)
+
+    def countByValueAndWindow(self, windowDuration, slideDuration, numPartitions=None):
+        """
+        Return a new DStream in which each RDD contains the count of distinct elements in
+        RDDs in a sliding window over this DStream.
+
+        @param windowDuration: width of the window; must be a multiple of this DStream's
+                              batching interval
+        @param slideDuration:  sliding interval of the window (i.e., the interval after which
+                              the new DStream will generate RDDs); must be a multiple of this
+                              DStream's batching interval
+        @param numPartitions:  number of partitions of each RDD in the new DStream.
+        """
+        keyed = self.map(lambda x: (x, 1))
+        counted = keyed.reduceByKeyAndWindow(operator.add, operator.sub,
+                                             windowDuration, slideDuration, numPartitions)
+        return counted.filter(lambda (k, v): v > 0).count()
+
+    def groupByKeyAndWindow(self, windowDuration, slideDuration, numPartitions=None):
+        """
+        Return a new DStream by applying `groupByKey` over a sliding window.
+        Similar to `DStream.groupByKey()`, but applies it over a sliding window.
+
+        @param windowDuration: width of the window; must be a multiple of this DStream's
+                              batching interval
+        @param slideDuration:  sliding interval of the window (i.e., the interval after which
+                              the new DStream will generate RDDs); must be a multiple of this
+                              DStream's batching interval
+        @param numPartitions:  Number of partitions of each RDD in the new DStream.
+        """
+        ls = self.mapValues(lambda x: [x])
+        grouped = ls.reduceByKeyAndWindow(lambda a, b: a.extend(b) or a, lambda a, b: a[len(b):],
+                                          windowDuration, slideDuration, numPartitions)
+        return grouped.mapValues(ResultIterable)
+
+    def reduceByKeyAndWindow(self, func, invFunc, windowDuration, slideDuration=None,
+                             numPartitions=None, filterFunc=None):
+        """
+        Return a new DStream by applying incremental `reduceByKey` over a sliding window.
+
+        The reduced value of over a new window is calculated using the old window's reduce value :
+         1. reduce the new values that entered the window (e.g., adding new counts)
+         2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
+
+        `invFunc` can be None, then it will reduce all the RDDs in window, could be slower
+        than having `invFunc`.
+
+        @param reduceFunc:     associative reduce function
+        @param invReduceFunc:  inverse function of `reduceFunc`
+        @param windowDuration: width of the window; must be a multiple of this DStream's
+                              batching interval
+        @param slideDuration:  sliding interval of the window (i.e., the interval after which
+                              the new DStream will generate RDDs); must be a multiple of this
+                              DStream's batching interval
+        @param numPartitions:  number of partitions of each RDD in the new DStream.
+        @param filterFunc:     function to filter expired key-value pairs;
+                              only pairs that satisfy the function are retained
+                              set this to null if you do not want to filter
+        """
+        self._validate_window_param(windowDuration, slideDuration)
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+
+        reduced = self.reduceByKey(func, numPartitions)
+
+        def reduceFunc(t, a, b):
+            b = b.reduceByKey(func, numPartitions)
+            r = a.union(b).reduceByKey(func, numPartitions) if a else b
+            if filterFunc:
+                r = r.filter(filterFunc)
+            return r
+
+        def invReduceFunc(t, a, b):
+            b = b.reduceByKey(func, numPartitions)
+            joined = a.leftOuterJoin(b, numPartitions)
+            return joined.mapValues(lambda (v1, v2): invFunc(v1, v2) if v2 is not None else v1)
+
+        jreduceFunc = TransformFunction(self._sc, reduceFunc, reduced._jrdd_deserializer)
+        if invReduceFunc:
+            jinvReduceFunc = TransformFunction(self._sc, invReduceFunc, reduced._jrdd_deserializer)
+        else:
+            jinvReduceFunc = None
+        if slideDuration is None:
+            slideDuration = self._slideDuration
+        dstream = self._sc._jvm.PythonReducedWindowedDStream(reduced._jdstream.dstream(),
+                                                             jreduceFunc, jinvReduceFunc,
+                                                             self._ssc._jduration(windowDuration),
+                                                             self._ssc._jduration(slideDuration))
+        return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer)
+
+    def updateStateByKey(self, updateFunc, numPartitions=None):
+        """
+        Return a new "state" DStream where the state for each key is updated by applying
+        the given function on the previous state of the key and the new values of the key.
+
+        @param updateFunc: State update function. If this function returns None, then
+                           corresponding state key-value pair will be eliminated.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+
+        def reduceFunc(t, a, b):
+            if a is None:
+                g = b.groupByKey(numPartitions).mapValues(lambda vs: (list(vs), None))
+            else:
+                g = a.cogroup(b, numPartitions)
+                g = g.mapValues(lambda (va, vb): (list(vb), list(va)[0] if len(va) else None))
+            state = g.mapValues(lambda (vs, s): updateFunc(vs, s))
+            return state.filter(lambda (k, v): v is not None)
+
+        jreduceFunc = TransformFunction(self._sc, reduceFunc,
+                                        self._sc.serializer, self._jrdd_deserializer)
+        dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc)
+        return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer)
+
+
+class TransformedDStream(DStream):
+    """
+    TransformedDStream is an DStream generated by an Python function
+    transforming each RDD of an DStream to another RDDs.
+
+    Multiple continuous transformations of DStream can be combined into
+    one transformation.
+    """
+    def __init__(self, prev, func):
+        self._ssc = prev._ssc
+        self._sc = self._ssc._sc
+        self._jrdd_deserializer = self._sc.serializer
+        self.is_cached = False
+        self.is_checkpointed = False
+        self._jdstream_val = None
+
+        if (isinstance(prev, TransformedDStream) and
+                not prev.is_cached and not prev.is_checkpointed):
+            prev_func = prev.func
+            self.func = lambda t, rdd: func(t, prev_func(t, rdd))
+            self.prev = prev.prev
+        else:
+            self.prev = prev
+            self.func = func
+
+    @property
+    def _jdstream(self):
+        if self._jdstream_val is not None:
+            return self._jdstream_val
+
+        jfunc = TransformFunction(self._sc, self.func, self.prev._jrdd_deserializer)
+        dstream = self._sc._jvm.PythonTransformedDStream(self.prev._jdstream.dstream(), jfunc)
+        self._jdstream_val = dstream.asJavaDStream()
+        return self._jdstream_val
diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py
new file mode 100644
index 0000000000000..a8d876d0fa3b3
--- /dev/null
+++ b/python/pyspark/streaming/tests.py
@@ -0,0 +1,545 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+from itertools import chain
+import time
+import operator
+import unittest
+import tempfile
+
+from pyspark.context import SparkConf, SparkContext, RDD
+from pyspark.streaming.context import StreamingContext
+
+
+class PySparkStreamingTestCase(unittest.TestCase):
+
+    timeout = 10  # seconds
+    duration = 1
+
+    def setUp(self):
+        class_name = self.__class__.__name__
+        conf = SparkConf().set("spark.default.parallelism", 1)
+        self.sc = SparkContext(appName=class_name, conf=conf)
+        self.sc.setCheckpointDir("/tmp")
+        # TODO: decrease duration to speed up tests
+        self.ssc = StreamingContext(self.sc, self.duration)
+
+    def tearDown(self):
+        self.ssc.stop()
+
+    def wait_for(self, result, n):
+        start_time = time.time()
+        while len(result) < n and time.time() - start_time < self.timeout:
+            time.sleep(0.01)
+        if len(result) < n:
+            print "timeout after", self.timeout
+
+    def _take(self, dstream, n):
+        """
+        Return the first `n` elements in the stream (will start and stop).
+        """
+        results = []
+
+        def take(_, rdd):
+            if rdd and len(results) < n:
+                results.extend(rdd.take(n - len(results)))
+
+        dstream.foreachRDD(take)
+
+        self.ssc.start()
+        self.wait_for(results, n)
+        return results
+
+    def _collect(self, dstream, n, block=True):
+        """
+        Collect each RDDs into the returned list.
+
+        :return: list, which will have the collected items.
+        """
+        result = []
+
+        def get_output(_, rdd):
+            if rdd and len(result) < n:
+                r = rdd.collect()
+                if r:
+                    result.append(r)
+
+        dstream.foreachRDD(get_output)
+
+        if not block:
+            return result
+
+        self.ssc.start()
+        self.wait_for(result, n)
+        return result
+
+    def _test_func(self, input, func, expected, sort=False, input2=None):
+        """
+        @param input: dataset for the test. This should be list of lists.
+        @param func: wrapped function. This function should return PythonDStream object.
+        @param expected: expected output for this testcase.
+        """
+        if not isinstance(input[0], RDD):
+            input = [self.sc.parallelize(d, 1) for d in input]
+        input_stream = self.ssc.queueStream(input)
+        if input2 and not isinstance(input2[0], RDD):
+            input2 = [self.sc.parallelize(d, 1) for d in input2]
+        input_stream2 = self.ssc.queueStream(input2) if input2 is not None else None
+
+        # Apply test function to stream.
+        if input2:
+            stream = func(input_stream, input_stream2)
+        else:
+            stream = func(input_stream)
+
+        result = self._collect(stream, len(expected))
+        if sort:
+            self._sort_result_based_on_key(result)
+            self._sort_result_based_on_key(expected)
+        self.assertEqual(expected, result)
+
+    def _sort_result_based_on_key(self, outputs):
+        """Sort the list based on first value."""
+        for output in outputs:
+            output.sort(key=lambda x: x[0])
+
+
+class BasicOperationTests(PySparkStreamingTestCase):
+
+    def test_map(self):
+        """Basic operation test for DStream.map."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+
+        def func(dstream):
+            return dstream.map(str)
+        expected = map(lambda x: map(str, x), input)
+        self._test_func(input, func, expected)
+
+    def test_flatMap(self):
+        """Basic operation test for DStream.faltMap."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+
+        def func(dstream):
+            return dstream.flatMap(lambda x: (x, x * 2))
+        expected = map(lambda x: list(chain.from_iterable((map(lambda y: [y, y * 2], x)))),
+                       input)
+        self._test_func(input, func, expected)
+
+    def test_filter(self):
+        """Basic operation test for DStream.filter."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+
+        def func(dstream):
+            return dstream.filter(lambda x: x % 2 == 0)
+        expected = map(lambda x: filter(lambda y: y % 2 == 0, x), input)
+        self._test_func(input, func, expected)
+
+    def test_count(self):
+        """Basic operation test for DStream.count."""
+        input = [range(5), range(10), range(20)]
+
+        def func(dstream):
+            return dstream.count()
+        expected = map(lambda x: [len(x)], input)
+        self._test_func(input, func, expected)
+
+    def test_reduce(self):
+        """Basic operation test for DStream.reduce."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+
+        def func(dstream):
+            return dstream.reduce(operator.add)
+        expected = map(lambda x: [reduce(operator.add, x)], input)
+        self._test_func(input, func, expected)
+
+    def test_reduceByKey(self):
+        """Basic operation test for DStream.reduceByKey."""
+        input = [[("a", 1), ("a", 1), ("b", 1), ("b", 1)],
+                 [("", 1), ("", 1), ("", 1), ("", 1)],
+                 [(1, 1), (1, 1), (2, 1), (2, 1), (3, 1)]]
+
+        def func(dstream):
+            return dstream.reduceByKey(operator.add)
+        expected = [[("a", 2), ("b", 2)], [("", 4)], [(1, 2), (2, 2), (3, 1)]]
+        self._test_func(input, func, expected, sort=True)
+
+    def test_mapValues(self):
+        """Basic operation test for DStream.mapValues."""
+        input = [[("a", 2), ("b", 2), ("c", 1), ("d", 1)],
+                 [("", 4), (1, 1), (2, 2), (3, 3)],
+                 [(1, 1), (2, 1), (3, 1), (4, 1)]]
+
+        def func(dstream):
+            return dstream.mapValues(lambda x: x + 10)
+        expected = [[("a", 12), ("b", 12), ("c", 11), ("d", 11)],
+                    [("", 14), (1, 11), (2, 12), (3, 13)],
+                    [(1, 11), (2, 11), (3, 11), (4, 11)]]
+        self._test_func(input, func, expected, sort=True)
+
+    def test_flatMapValues(self):
+        """Basic operation test for DStream.flatMapValues."""
+        input = [[("a", 2), ("b", 2), ("c", 1), ("d", 1)],
+                 [("", 4), (1, 1), (2, 1), (3, 1)],
+                 [(1, 1), (2, 1), (3, 1), (4, 1)]]
+
+        def func(dstream):
+            return dstream.flatMapValues(lambda x: (x, x + 10))
+        expected = [[("a", 2), ("a", 12), ("b", 2), ("b", 12),
+                     ("c", 1), ("c", 11), ("d", 1), ("d", 11)],
+                    [("", 4), ("", 14), (1, 1), (1, 11), (2, 1), (2, 11), (3, 1), (3, 11)],
+                    [(1, 1), (1, 11), (2, 1), (2, 11), (3, 1), (3, 11), (4, 1), (4, 11)]]
+        self._test_func(input, func, expected)
+
+    def test_glom(self):
+        """Basic operation test for DStream.glom."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+        rdds = [self.sc.parallelize(r, 2) for r in input]
+
+        def func(dstream):
+            return dstream.glom()
+        expected = [[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]]
+        self._test_func(rdds, func, expected)
+
+    def test_mapPartitions(self):
+        """Basic operation test for DStream.mapPartitions."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+        rdds = [self.sc.parallelize(r, 2) for r in input]
+
+        def func(dstream):
+            def f(iterator):
+                yield sum(iterator)
+            return dstream.mapPartitions(f)
+        expected = [[3, 7], [11, 15], [19, 23]]
+        self._test_func(rdds, func, expected)
+
+    def test_countByValue(self):
+        """Basic operation test for DStream.countByValue."""
+        input = [range(1, 5) * 2, range(5, 7) + range(5, 9), ["a", "a", "b", ""]]
+
+        def func(dstream):
+            return dstream.countByValue()
+        expected = [[4], [4], [3]]
+        self._test_func(input, func, expected)
+
+    def test_groupByKey(self):
+        """Basic operation test for DStream.groupByKey."""
+        input = [[(1, 1), (2, 1), (3, 1), (4, 1)],
+                 [(1, 1), (1, 1), (1, 1), (2, 1), (2, 1), (3, 1)],
+                 [("a", 1), ("a", 1), ("b", 1), ("", 1), ("", 1), ("", 1)]]
+
+        def func(dstream):
+            return dstream.groupByKey().mapValues(list)
+
+        expected = [[(1, [1]), (2, [1]), (3, [1]), (4, [1])],
+                    [(1, [1, 1, 1]), (2, [1, 1]), (3, [1])],
+                    [("a", [1, 1]), ("b", [1]), ("", [1, 1, 1])]]
+        self._test_func(input, func, expected, sort=True)
+
+    def test_combineByKey(self):
+        """Basic operation test for DStream.combineByKey."""
+        input = [[(1, 1), (2, 1), (3, 1), (4, 1)],
+                 [(1, 1), (1, 1), (1, 1), (2, 1), (2, 1), (3, 1)],
+                 [("a", 1), ("a", 1), ("b", 1), ("", 1), ("", 1), ("", 1)]]
+
+        def func(dstream):
+            def add(a, b):
+                return a + str(b)
+            return dstream.combineByKey(str, add, add)
+        expected = [[(1, "1"), (2, "1"), (3, "1"), (4, "1")],
+                    [(1, "111"), (2, "11"), (3, "1")],
+                    [("a", "11"), ("b", "1"), ("", "111")]]
+        self._test_func(input, func, expected, sort=True)
+
+    def test_repartition(self):
+        input = [range(1, 5), range(5, 9)]
+        rdds = [self.sc.parallelize(r, 2) for r in input]
+
+        def func(dstream):
+            return dstream.repartition(1).glom()
+        expected = [[[1, 2, 3, 4]], [[5, 6, 7, 8]]]
+        self._test_func(rdds, func, expected)
+
+    def test_union(self):
+        input1 = [range(3), range(5), range(6)]
+        input2 = [range(3, 6), range(5, 6)]
+
+        def func(d1, d2):
+            return d1.union(d2)
+
+        expected = [range(6), range(6), range(6)]
+        self._test_func(input1, func, expected, input2=input2)
+
+    def test_cogroup(self):
+        input = [[(1, 1), (2, 1), (3, 1)],
+                 [(1, 1), (1, 1), (1, 1), (2, 1)],
+                 [("a", 1), ("a", 1), ("b", 1), ("", 1), ("", 1)]]
+        input2 = [[(1, 2)],
+                  [(4, 1)],
+                  [("a", 1), ("a", 1), ("b", 1), ("", 1), ("", 2)]]
+
+        def func(d1, d2):
+            return d1.cogroup(d2).mapValues(lambda vs: tuple(map(list, vs)))
+
+        expected = [[(1, ([1], [2])), (2, ([1], [])), (3, ([1], []))],
+                    [(1, ([1, 1, 1], [])), (2, ([1], [])), (4, ([], [1]))],
+                    [("a", ([1, 1], [1, 1])), ("b", ([1], [1])), ("", ([1, 1], [1, 2]))]]
+        self._test_func(input, func, expected, sort=True, input2=input2)
+
+    def test_join(self):
+        input = [[('a', 1), ('b', 2)]]
+        input2 = [[('b', 3), ('c', 4)]]
+
+        def func(a, b):
+            return a.join(b)
+
+        expected = [[('b', (2, 3))]]
+        self._test_func(input, func, expected, True, input2)
+
+    def test_left_outer_join(self):
+        input = [[('a', 1), ('b', 2)]]
+        input2 = [[('b', 3), ('c', 4)]]
+
+        def func(a, b):
+            return a.leftOuterJoin(b)
+
+        expected = [[('a', (1, None)), ('b', (2, 3))]]
+        self._test_func(input, func, expected, True, input2)
+
+    def test_right_outer_join(self):
+        input = [[('a', 1), ('b', 2)]]
+        input2 = [[('b', 3), ('c', 4)]]
+
+        def func(a, b):
+            return a.rightOuterJoin(b)
+
+        expected = [[('b', (2, 3)), ('c', (None, 4))]]
+        self._test_func(input, func, expected, True, input2)
+
+    def test_full_outer_join(self):
+        input = [[('a', 1), ('b', 2)]]
+        input2 = [[('b', 3), ('c', 4)]]
+
+        def func(a, b):
+            return a.fullOuterJoin(b)
+
+        expected = [[('a', (1, None)), ('b', (2, 3)), ('c', (None, 4))]]
+        self._test_func(input, func, expected, True, input2)
+
+    def test_update_state_by_key(self):
+
+        def updater(vs, s):
+            if not s:
+                s = []
+            s.extend(vs)
+            return s
+
+        input = [[('k', i)] for i in range(5)]
+
+        def func(dstream):
+            return dstream.updateStateByKey(updater)
+
+        expected = [[0], [0, 1], [0, 1, 2], [0, 1, 2, 3], [0, 1, 2, 3, 4]]
+        expected = [[('k', v)] for v in expected]
+        self._test_func(input, func, expected)
+
+
+class WindowFunctionTests(PySparkStreamingTestCase):
+
+    timeout = 20
+
+    def test_window(self):
+        input = [range(1), range(2), range(3), range(4), range(5)]
+
+        def func(dstream):
+            return dstream.window(3, 1).count()
+
+        expected = [[1], [3], [6], [9], [12], [9], [5]]
+        self._test_func(input, func, expected)
+
+    def test_count_by_window(self):
+        input = [range(1), range(2), range(3), range(4), range(5)]
+
+        def func(dstream):
+            return dstream.countByWindow(3, 1)
+
+        expected = [[1], [3], [6], [9], [12], [9], [5]]
+        self._test_func(input, func, expected)
+
+    def test_count_by_window_large(self):
+        input = [range(1), range(2), range(3), range(4), range(5), range(6)]
+
+        def func(dstream):
+            return dstream.countByWindow(5, 1)
+
+        expected = [[1], [3], [6], [10], [15], [20], [18], [15], [11], [6]]
+        self._test_func(input, func, expected)
+
+    def test_count_by_value_and_window(self):
+        input = [range(1), range(2), range(3), range(4), range(5), range(6)]
+
+        def func(dstream):
+            return dstream.countByValueAndWindow(5, 1)
+
+        expected = [[1], [2], [3], [4], [5], [6], [6], [6], [6], [6]]
+        self._test_func(input, func, expected)
+
+    def test_group_by_key_and_window(self):
+        input = [[('a', i)] for i in range(5)]
+
+        def func(dstream):
+            return dstream.groupByKeyAndWindow(3, 1).mapValues(list)
+
+        expected = [[('a', [0])], [('a', [0, 1])], [('a', [0, 1, 2])], [('a', [1, 2, 3])],
+                    [('a', [2, 3, 4])], [('a', [3, 4])], [('a', [4])]]
+        self._test_func(input, func, expected)
+
+    def test_reduce_by_invalid_window(self):
+        input1 = [range(3), range(5), range(1), range(6)]
+        d1 = self.ssc.queueStream(input1)
+        self.assertRaises(ValueError, lambda: d1.reduceByKeyAndWindow(None, None, 0.1, 0.1))
+        self.assertRaises(ValueError, lambda: d1.reduceByKeyAndWindow(None, None, 1, 0.1))
+
+
+class StreamingContextTests(PySparkStreamingTestCase):
+
+    duration = 0.1
+
+    def _add_input_stream(self):
+        inputs = map(lambda x: range(1, x), range(101))
+        stream = self.ssc.queueStream(inputs)
+        self._collect(stream, 1, block=False)
+
+    def test_stop_only_streaming_context(self):
+        self._add_input_stream()
+        self.ssc.start()
+        self.ssc.stop(False)
+        self.assertEqual(len(self.sc.parallelize(range(5), 5).glom().collect()), 5)
+
+    def test_stop_multiple_times(self):
+        self._add_input_stream()
+        self.ssc.start()
+        self.ssc.stop()
+        self.ssc.stop()
+
+    def test_queue_stream(self):
+        input = [range(i + 1) for i in range(3)]
+        dstream = self.ssc.queueStream(input)
+        result = self._collect(dstream, 3)
+        self.assertEqual(input, result)
+
+    def test_text_file_stream(self):
+        d = tempfile.mkdtemp()
+        self.ssc = StreamingContext(self.sc, self.duration)
+        dstream2 = self.ssc.textFileStream(d).map(int)
+        result = self._collect(dstream2, 2, block=False)
+        self.ssc.start()
+        for name in ('a', 'b'):
+            time.sleep(1)
+            with open(os.path.join(d, name), "w") as f:
+                f.writelines(["%d\n" % i for i in range(10)])
+        self.wait_for(result, 2)
+        self.assertEqual([range(10), range(10)], result)
+
+    def test_union(self):
+        input = [range(i + 1) for i in range(3)]
+        dstream = self.ssc.queueStream(input)
+        dstream2 = self.ssc.queueStream(input)
+        dstream3 = self.ssc.union(dstream, dstream2)
+        result = self._collect(dstream3, 3)
+        expected = [i * 2 for i in input]
+        self.assertEqual(expected, result)
+
+    def test_transform(self):
+        dstream1 = self.ssc.queueStream([[1]])
+        dstream2 = self.ssc.queueStream([[2]])
+        dstream3 = self.ssc.queueStream([[3]])
+
+        def func(rdds):
+            rdd1, rdd2, rdd3 = rdds
+            return rdd2.union(rdd3).union(rdd1)
+
+        dstream = self.ssc.transform([dstream1, dstream2, dstream3], func)
+
+        self.assertEqual([2, 3, 1], self._take(dstream, 3))
+
+
+class CheckpointTests(PySparkStreamingTestCase):
+
+    def setUp(self):
+        pass
+
+    def test_get_or_create(self):
+        inputd = tempfile.mkdtemp()
+        outputd = tempfile.mkdtemp() + "/"
+
+        def updater(vs, s):
+            return sum(vs, s or 0)
+
+        def setup():
+            conf = SparkConf().set("spark.default.parallelism", 1)
+            sc = SparkContext(conf=conf)
+            ssc = StreamingContext(sc, 0.5)
+            dstream = ssc.textFileStream(inputd).map(lambda x: (x, 1))
+            wc = dstream.updateStateByKey(updater)
+            wc.map(lambda x: "%s,%d" % x).saveAsTextFiles(outputd + "test")
+            wc.checkpoint(.5)
+            return ssc
+
+        cpd = tempfile.mkdtemp("test_streaming_cps")
+        self.ssc = ssc = StreamingContext.getOrCreate(cpd, setup)
+        ssc.start()
+
+        def check_output(n):
+            while not os.listdir(outputd):
+                time.sleep(0.1)
+            time.sleep(1)  # make sure mtime is larger than the previous one
+            with open(os.path.join(inputd, str(n)), 'w') as f:
+                f.writelines(["%d\n" % i for i in range(10)])
+
+            while True:
+                p = os.path.join(outputd, max(os.listdir(outputd)))
+                if '_SUCCESS' not in os.listdir(p):
+                    # not finished
+                    time.sleep(0.01)
+                    continue
+                ordd = ssc.sparkContext.textFile(p).map(lambda line: line.split(","))
+                d = ordd.values().map(int).collect()
+                if not d:
+                    time.sleep(0.01)
+                    continue
+                self.assertEqual(10, len(d))
+                s = set(d)
+                self.assertEqual(1, len(s))
+                m = s.pop()
+                if n > m:
+                    continue
+                self.assertEqual(n, m)
+                break
+
+        check_output(1)
+        check_output(2)
+        ssc.stop(True, True)
+
+        time.sleep(1)
+        self.ssc = ssc = StreamingContext.getOrCreate(cpd, setup)
+        ssc.start()
+        check_output(3)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/pyspark/streaming/util.py b/python/pyspark/streaming/util.py
new file mode 100644
index 0000000000000..86ee5aa04f252
--- /dev/null
+++ b/python/pyspark/streaming/util.py
@@ -0,0 +1,128 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+from datetime import datetime
+import traceback
+
+from pyspark import SparkContext, RDD
+
+
+class TransformFunction(object):
+    """
+    This class wraps a function RDD[X] -> RDD[Y] that was passed to
+    DStream.transform(), allowing it to be called from Java via Py4J's
+    callback server.
+
+    Java calls this function with a sequence of JavaRDDs and this function
+    returns a single JavaRDD pointer back to Java.
+    """
+    _emptyRDD = None
+
+    def __init__(self, ctx, func, *deserializers):
+        self.ctx = ctx
+        self.func = func
+        self.deserializers = deserializers
+
+    def call(self, milliseconds, jrdds):
+        try:
+            if self.ctx is None:
+                self.ctx = SparkContext._active_spark_context
+            if not self.ctx or not self.ctx._jsc:
+                # stopped
+                return
+
+            # extend deserializers with the first one
+            sers = self.deserializers
+            if len(sers) < len(jrdds):
+                sers += (sers[0],) * (len(jrdds) - len(sers))
+
+            rdds = [RDD(jrdd, self.ctx, ser) if jrdd else None
+                    for jrdd, ser in zip(jrdds, sers)]
+            t = datetime.fromtimestamp(milliseconds / 1000.0)
+            r = self.func(t, *rdds)
+            if r:
+                return r._jrdd
+        except Exception:
+            traceback.print_exc()
+
+    def __repr__(self):
+        return "TransformFunction(%s)" % self.func
+
+    class Java:
+        implements = ['org.apache.spark.streaming.api.python.PythonTransformFunction']
+
+
+class TransformFunctionSerializer(object):
+    """
+    This class implements a serializer for PythonTransformFunction Java
+    objects.
+
+    This is necessary because the Java PythonTransformFunction objects are
+    actually Py4J references to Python objects and thus are not directly
+    serializable. When Java needs to serialize a PythonTransformFunction,
+    it uses this class to invoke Python, which returns the serialized function
+    as a byte array.
+    """
+    def __init__(self, ctx, serializer, gateway=None):
+        self.ctx = ctx
+        self.serializer = serializer
+        self.gateway = gateway or self.ctx._gateway
+        self.gateway.jvm.PythonDStream.registerSerializer(self)
+
+    def dumps(self, id):
+        try:
+            func = self.gateway.gateway_property.pool[id]
+            return bytearray(self.serializer.dumps((func.func, func.deserializers)))
+        except Exception:
+            traceback.print_exc()
+
+    def loads(self, bytes):
+        try:
+            f, deserializers = self.serializer.loads(str(bytes))
+            return TransformFunction(self.ctx, f, *deserializers)
+        except Exception:
+            traceback.print_exc()
+
+    def __repr__(self):
+        return "TransformFunctionSerializer(%s)" % self.serializer
+
+    class Java:
+        implements = ['org.apache.spark.streaming.api.python.PythonTransformFunctionSerializer']
+
+
+def rddToFileName(prefix, suffix, timestamp):
+    """
+    Return string prefix-time(.suffix)
+
+    >>> rddToFileName("spark", None, 12345678910)
+    'spark-12345678910'
+    >>> rddToFileName("spark", "tmp", 12345678910)
+    'spark-12345678910.tmp'
+    """
+    if isinstance(timestamp, datetime):
+        seconds = time.mktime(timestamp.timetuple())
+        timestamp = long(seconds * 1000) + timestamp.microsecond / 1000
+    if suffix is None:
+        return prefix + "-" + str(timestamp)
+    else:
+        return prefix + "-" + str(timestamp) + "." + suffix
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index c15bb457759ed..491e445a216bf 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -19,6 +19,7 @@
 Unit tests for PySpark; additional tests are implemented as doctests in
 individual modules.
 """
+from array import array
 from fileinput import input
 from glob import glob
 import os
@@ -28,46 +29,245 @@
 import sys
 import tempfile
 import time
-import unittest
 import zipfile
+import random
+import threading
 
+if sys.version_info[:2] <= (2, 6):
+    try:
+        import unittest2 as unittest
+    except ImportError:
+        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.exit(1)
+else:
+    import unittest
+
+
+from pyspark.conf import SparkConf
 from pyspark.context import SparkContext
 from pyspark.files import SparkFiles
-from pyspark.serializers import read_int
+from pyspark.serializers import read_int, BatchedSerializer, MarshalSerializer, PickleSerializer, \
+    CloudPickleSerializer
+from pyspark.shuffle import Aggregator, InMemoryMerger, ExternalMerger, ExternalSorter
+from pyspark.sql import SQLContext, IntegerType, Row, ArrayType, StructType, StructField, \
+    UserDefinedType, DoubleType
+from pyspark import shuffle
 
 _have_scipy = False
+_have_numpy = False
 try:
     import scipy.sparse
     _have_scipy = True
 except:
     # No SciPy, but that's okay, we'll skip those tests
     pass
+try:
+    import numpy as np
+    _have_numpy = True
+except:
+    # No NumPy, but that's okay, we'll skip those tests
+    pass
 
 
 SPARK_HOME = os.environ["SPARK_HOME"]
 
 
+class MergerTests(unittest.TestCase):
+
+    def setUp(self):
+        self.N = 1 << 14
+        self.l = [i for i in xrange(self.N)]
+        self.data = zip(self.l, self.l)
+        self.agg = Aggregator(lambda x: [x],
+                              lambda x, y: x.append(y) or x,
+                              lambda x, y: x.extend(y) or x)
+
+    def test_in_memory(self):
+        m = InMemoryMerger(self.agg)
+        m.mergeValues(self.data)
+        self.assertEqual(sum(sum(v) for k, v in m.iteritems()),
+                         sum(xrange(self.N)))
+
+        m = InMemoryMerger(self.agg)
+        m.mergeCombiners(map(lambda (x, y): (x, [y]), self.data))
+        self.assertEqual(sum(sum(v) for k, v in m.iteritems()),
+                         sum(xrange(self.N)))
+
+    def test_small_dataset(self):
+        m = ExternalMerger(self.agg, 1000)
+        m.mergeValues(self.data)
+        self.assertEqual(m.spills, 0)
+        self.assertEqual(sum(sum(v) for k, v in m.iteritems()),
+                         sum(xrange(self.N)))
+
+        m = ExternalMerger(self.agg, 1000)
+        m.mergeCombiners(map(lambda (x, y): (x, [y]), self.data))
+        self.assertEqual(m.spills, 0)
+        self.assertEqual(sum(sum(v) for k, v in m.iteritems()),
+                         sum(xrange(self.N)))
+
+    def test_medium_dataset(self):
+        m = ExternalMerger(self.agg, 10)
+        m.mergeValues(self.data)
+        self.assertTrue(m.spills >= 1)
+        self.assertEqual(sum(sum(v) for k, v in m.iteritems()),
+                         sum(xrange(self.N)))
+
+        m = ExternalMerger(self.agg, 10)
+        m.mergeCombiners(map(lambda (x, y): (x, [y]), self.data * 3))
+        self.assertTrue(m.spills >= 1)
+        self.assertEqual(sum(sum(v) for k, v in m.iteritems()),
+                         sum(xrange(self.N)) * 3)
+
+    def test_huge_dataset(self):
+        m = ExternalMerger(self.agg, 10, partitions=3)
+        m.mergeCombiners(map(lambda (k, v): (k, [str(v)]), self.data * 10))
+        self.assertTrue(m.spills >= 1)
+        self.assertEqual(sum(len(v) for k, v in m._recursive_merged_items(0)),
+                         self.N * 10)
+        m._cleanup()
+
+
+class SorterTests(unittest.TestCase):
+    def test_in_memory_sort(self):
+        l = range(1024)
+        random.shuffle(l)
+        sorter = ExternalSorter(1024)
+        self.assertEquals(sorted(l), list(sorter.sorted(l)))
+        self.assertEquals(sorted(l, reverse=True), list(sorter.sorted(l, reverse=True)))
+        self.assertEquals(sorted(l, key=lambda x: -x), list(sorter.sorted(l, key=lambda x: -x)))
+        self.assertEquals(sorted(l, key=lambda x: -x, reverse=True),
+                          list(sorter.sorted(l, key=lambda x: -x, reverse=True)))
+
+    def test_external_sort(self):
+        l = range(1024)
+        random.shuffle(l)
+        sorter = ExternalSorter(1)
+        self.assertEquals(sorted(l), list(sorter.sorted(l)))
+        self.assertGreater(shuffle.DiskBytesSpilled, 0)
+        last = shuffle.DiskBytesSpilled
+        self.assertEquals(sorted(l, reverse=True), list(sorter.sorted(l, reverse=True)))
+        self.assertGreater(shuffle.DiskBytesSpilled, last)
+        last = shuffle.DiskBytesSpilled
+        self.assertEquals(sorted(l, key=lambda x: -x), list(sorter.sorted(l, key=lambda x: -x)))
+        self.assertGreater(shuffle.DiskBytesSpilled, last)
+        last = shuffle.DiskBytesSpilled
+        self.assertEquals(sorted(l, key=lambda x: -x, reverse=True),
+                          list(sorter.sorted(l, key=lambda x: -x, reverse=True)))
+        self.assertGreater(shuffle.DiskBytesSpilled, last)
+
+    def test_external_sort_in_rdd(self):
+        conf = SparkConf().set("spark.python.worker.memory", "1m")
+        sc = SparkContext(conf=conf)
+        l = range(10240)
+        random.shuffle(l)
+        rdd = sc.parallelize(l, 10)
+        self.assertEquals(sorted(l), rdd.sortBy(lambda x: x).collect())
+        sc.stop()
+
+
+class SerializationTestCase(unittest.TestCase):
+
+    def test_namedtuple(self):
+        from collections import namedtuple
+        from cPickle import dumps, loads
+        P = namedtuple("P", "x y")
+        p1 = P(1, 3)
+        p2 = loads(dumps(p1, 2))
+        self.assertEquals(p1, p2)
+
+    def test_itemgetter(self):
+        from operator import itemgetter
+        ser = CloudPickleSerializer()
+        d = range(10)
+        getter = itemgetter(1)
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+
+        getter = itemgetter(0, 3)
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+
+    def test_attrgetter(self):
+        from operator import attrgetter
+        ser = CloudPickleSerializer()
+
+        class C(object):
+            def __getattr__(self, item):
+                return item
+        d = C()
+        getter = attrgetter("a")
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+        getter = attrgetter("a", "b")
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+
+        d.e = C()
+        getter = attrgetter("e.a")
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+        getter = attrgetter("e.a", "e.b")
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+
+    # Regression test for SPARK-3415
+    def test_pickling_file_handles(self):
+        ser = CloudPickleSerializer()
+        out1 = sys.stderr
+        out2 = ser.loads(ser.dumps(out1))
+        self.assertEquals(out1, out2)
+
+    def test_func_globals(self):
+
+        class Unpicklable(object):
+            def __reduce__(self):
+                raise Exception("not picklable")
+
+        global exit
+        exit = Unpicklable()
+
+        ser = CloudPickleSerializer()
+        self.assertRaises(Exception, lambda: ser.dumps(exit))
+
+        def foo():
+            sys.exit(0)
+
+        self.assertTrue("exit" in foo.func_code.co_names)
+        ser.dumps(foo)
+
+
 class PySparkTestCase(unittest.TestCase):
 
     def setUp(self):
         self._old_sys_path = list(sys.path)
         class_name = self.__class__.__name__
-        self.sc = SparkContext('local[4]', class_name , batchSize=2)
+        self.sc = SparkContext('local[4]', class_name)
 
     def tearDown(self):
         self.sc.stop()
         sys.path = self._old_sys_path
 
-class TestCheckpoint(PySparkTestCase):
+
+class ReusedPySparkTestCase(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.sc = SparkContext('local[4]', cls.__name__)
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.sc.stop()
+
+
+class CheckpointTests(ReusedPySparkTestCase):
 
     def setUp(self):
-        PySparkTestCase.setUp(self)
         self.checkpointDir = tempfile.NamedTemporaryFile(delete=False)
         os.unlink(self.checkpointDir.name)
         self.sc.setCheckpointDir(self.checkpointDir.name)
 
     def tearDown(self):
-        PySparkTestCase.tearDown(self)
         shutil.rmtree(self.checkpointDir.name)
 
     def test_basic_checkpointing(self):
@@ -102,16 +302,23 @@ def test_checkpoint_and_restore(self):
         self.assertEquals([1, 2, 3, 4], recovered.collect())
 
 
-class TestAddFile(PySparkTestCase):
+class AddFileTests(PySparkTestCase):
 
     def test_add_py_file(self):
         # To ensure that we're actually testing addPyFile's effects, check that
         # this job fails due to `userlibrary` not being on the Python path:
+        # disable logging in log4j temporarily
+        log4j = self.sc._jvm.org.apache.log4j
+        old_level = log4j.LogManager.getRootLogger().getLevel()
+        log4j.LogManager.getRootLogger().setLevel(log4j.Level.FATAL)
+
         def func(x):
             from userlibrary import UserClass
             return UserClass().hello()
         self.assertRaises(Exception,
                           self.sc.parallelize(range(2)).map(func).first)
+        log4j.LogManager.getRootLogger().setLevel(old_level)
+
         # Add the file, so the job should now succeed:
         path = os.path.join(SPARK_HOME, "python/test_support/userlibrary.py")
         self.sc.addPyFile(path)
@@ -148,8 +355,29 @@ def func():
         from userlib import UserClass
         self.assertEqual("Hello World from inside a package!", UserClass().hello())
 
+    def test_overwrite_system_module(self):
+        self.sc.addPyFile(os.path.join(SPARK_HOME, "python/test_support/SimpleHTTPServer.py"))
+
+        import SimpleHTTPServer
+        self.assertEqual("My Server", SimpleHTTPServer.__name__)
+
+        def func(x):
+            import SimpleHTTPServer
+            return SimpleHTTPServer.__name__
+
+        self.assertEqual(["My Server"], self.sc.parallelize(range(1)).map(func).collect())
+
+
+class RDDTests(ReusedPySparkTestCase):
 
-class TestRDDFunctions(PySparkTestCase):
+    def test_id(self):
+        rdd = self.sc.parallelize(range(10))
+        id = rdd.id()
+        self.assertEqual(id, rdd.id())
+        rdd2 = rdd.map(str).filter(bool)
+        id2 = rdd2.id()
+        self.assertEqual(id + 1, id2)
+        self.assertEqual(id2, rdd2.id())
 
     def test_save_as_textfile_with_unicode(self):
         # Regression test for SPARK-970
@@ -161,6 +389,15 @@ def test_save_as_textfile_with_unicode(self):
         raw_contents = ''.join(input(glob(tempFile.name + "/part-0000*")))
         self.assertEqual(x, unicode(raw_contents.strip(), "utf-8"))
 
+    def test_save_as_textfile_with_utf8(self):
+        x = u"\u00A1Hola, mundo!"
+        data = self.sc.parallelize([x.encode("utf-8")])
+        tempFile = tempfile.NamedTemporaryFile(delete=True)
+        tempFile.close()
+        data.saveAsTextFile(tempFile.name)
+        raw_contents = ''.join(input(glob(tempFile.name + "/part-0000*")))
+        self.assertEqual(x, unicode(raw_contents.strip(), "utf-8"))
+
     def test_transforming_cartesian_result(self):
         # Regression test for SPARK-1034
         rdd1 = self.sc.parallelize([1, 2])
@@ -168,6 +405,15 @@ def test_transforming_cartesian_result(self):
         cart = rdd1.cartesian(rdd2)
         result = cart.map(lambda (x, y): x + y).collect()
 
+    def test_transforming_pickle_file(self):
+        # Regression test for SPARK-2601
+        data = self.sc.parallelize(["Hello", "World!"])
+        tempFile = tempfile.NamedTemporaryFile(delete=True)
+        tempFile.close()
+        data.saveAsPickleFile(tempFile.name)
+        pickled_file = self.sc.pickleFile(tempFile.name)
+        pickled_file.map(lambda x: x).collect()
+
     def test_cartesian_on_textfile(self):
         # Regression test for
         path = os.path.join(SPARK_HOME, "python/test_support/hello.txt")
@@ -188,8 +434,15 @@ def test_deleting_input_files(self):
         os.unlink(tempFile.name)
         self.assertRaises(Exception, lambda: filtered_data.count())
 
+    def test_sampling_default_seed(self):
+        # Test for SPARK-3995 (default seed setting)
+        data = self.sc.parallelize(range(1000), 1)
+        subset = data.takeSample(False, 10)
+        self.assertEqual(len(subset), 10)
+
     def testAggregateByKey(self):
         data = self.sc.parallelize([(1, 1), (1, 1), (3, 2), (5, 1), (5, 3)], 2)
+
         def seqOp(x, y):
             x.add(y)
             return x
@@ -197,33 +450,488 @@ def seqOp(x, y):
         def combOp(x, y):
             x |= y
             return x
-          
+
         sets = dict(data.aggregateByKey(set(), seqOp, combOp).collect())
         self.assertEqual(3, len(sets))
         self.assertEqual(set([1]), sets[1])
         self.assertEqual(set([2]), sets[3])
         self.assertEqual(set([1, 3]), sets[5])
 
-class TestIO(PySparkTestCase):
+    def test_itemgetter(self):
+        rdd = self.sc.parallelize([range(10)])
+        from operator import itemgetter
+        self.assertEqual([1], rdd.map(itemgetter(1)).collect())
+        self.assertEqual([(2, 3)], rdd.map(itemgetter(2, 3)).collect())
+
+    def test_namedtuple_in_rdd(self):
+        from collections import namedtuple
+        Person = namedtuple("Person", "id firstName lastName")
+        jon = Person(1, "Jon", "Doe")
+        jane = Person(2, "Jane", "Doe")
+        theDoes = self.sc.parallelize([jon, jane])
+        self.assertEquals([jon, jane], theDoes.collect())
+
+    def test_large_broadcast(self):
+        N = 100000
+        data = [[float(i) for i in range(300)] for i in range(N)]
+        bdata = self.sc.broadcast(data)  # 270MB
+        m = self.sc.parallelize(range(1), 1).map(lambda x: len(bdata.value)).sum()
+        self.assertEquals(N, m)
+
+    def test_large_closure(self):
+        N = 1000000
+        data = [float(i) for i in xrange(N)]
+        rdd = self.sc.parallelize(range(1), 1).map(lambda x: len(data))
+        self.assertEquals(N, rdd.first())
+        self.assertTrue(rdd._broadcast is not None)
+        rdd = self.sc.parallelize(range(1), 1).map(lambda x: 1)
+        self.assertEqual(1, rdd.first())
+        self.assertTrue(rdd._broadcast is None)
+
+    def test_zip_with_different_serializers(self):
+        a = self.sc.parallelize(range(5))
+        b = self.sc.parallelize(range(100, 105))
+        self.assertEqual(a.zip(b).collect(), [(0, 100), (1, 101), (2, 102), (3, 103), (4, 104)])
+        a = a._reserialize(BatchedSerializer(PickleSerializer(), 2))
+        b = b._reserialize(MarshalSerializer())
+        self.assertEqual(a.zip(b).collect(), [(0, 100), (1, 101), (2, 102), (3, 103), (4, 104)])
+
+    def test_zip_with_different_number_of_items(self):
+        a = self.sc.parallelize(range(5), 2)
+        # different number of partitions
+        b = self.sc.parallelize(range(100, 106), 3)
+        self.assertRaises(ValueError, lambda: a.zip(b))
+        # different number of batched items in JVM
+        b = self.sc.parallelize(range(100, 104), 2)
+        self.assertRaises(Exception, lambda: a.zip(b).count())
+        # different number of items in one pair
+        b = self.sc.parallelize(range(100, 106), 2)
+        self.assertRaises(Exception, lambda: a.zip(b).count())
+        # same total number of items, but different distributions
+        a = self.sc.parallelize([2, 3], 2).flatMap(range)
+        b = self.sc.parallelize([3, 2], 2).flatMap(range)
+        self.assertEquals(a.count(), b.count())
+        self.assertRaises(Exception, lambda: a.zip(b).count())
+
+    def test_count_approx_distinct(self):
+        rdd = self.sc.parallelize(range(1000))
+        self.assertTrue(950 < rdd.countApproxDistinct(0.04) < 1050)
+        self.assertTrue(950 < rdd.map(float).countApproxDistinct(0.04) < 1050)
+        self.assertTrue(950 < rdd.map(str).countApproxDistinct(0.04) < 1050)
+        self.assertTrue(950 < rdd.map(lambda x: (x, -x)).countApproxDistinct(0.04) < 1050)
+
+        rdd = self.sc.parallelize([i % 20 for i in range(1000)], 7)
+        self.assertTrue(18 < rdd.countApproxDistinct() < 22)
+        self.assertTrue(18 < rdd.map(float).countApproxDistinct() < 22)
+        self.assertTrue(18 < rdd.map(str).countApproxDistinct() < 22)
+        self.assertTrue(18 < rdd.map(lambda x: (x, -x)).countApproxDistinct() < 22)
+
+        self.assertRaises(ValueError, lambda: rdd.countApproxDistinct(0.00000001))
+        self.assertRaises(ValueError, lambda: rdd.countApproxDistinct(0.5))
+
+    def test_histogram(self):
+        # empty
+        rdd = self.sc.parallelize([])
+        self.assertEquals([0], rdd.histogram([0, 10])[1])
+        self.assertEquals([0, 0], rdd.histogram([0, 4, 10])[1])
+        self.assertRaises(ValueError, lambda: rdd.histogram(1))
+
+        # out of range
+        rdd = self.sc.parallelize([10.01, -0.01])
+        self.assertEquals([0], rdd.histogram([0, 10])[1])
+        self.assertEquals([0, 0], rdd.histogram((0, 4, 10))[1])
+
+        # in range with one bucket
+        rdd = self.sc.parallelize(range(1, 5))
+        self.assertEquals([4], rdd.histogram([0, 10])[1])
+        self.assertEquals([3, 1], rdd.histogram([0, 4, 10])[1])
+
+        # in range with one bucket exact match
+        self.assertEquals([4], rdd.histogram([1, 4])[1])
+
+        # out of range with two buckets
+        rdd = self.sc.parallelize([10.01, -0.01])
+        self.assertEquals([0, 0], rdd.histogram([0, 5, 10])[1])
+
+        # out of range with two uneven buckets
+        rdd = self.sc.parallelize([10.01, -0.01])
+        self.assertEquals([0, 0], rdd.histogram([0, 4, 10])[1])
+
+        # in range with two buckets
+        rdd = self.sc.parallelize([1, 2, 3, 5, 6])
+        self.assertEquals([3, 2], rdd.histogram([0, 5, 10])[1])
+
+        # in range with two bucket and None
+        rdd = self.sc.parallelize([1, 2, 3, 5, 6, None, float('nan')])
+        self.assertEquals([3, 2], rdd.histogram([0, 5, 10])[1])
+
+        # in range with two uneven buckets
+        rdd = self.sc.parallelize([1, 2, 3, 5, 6])
+        self.assertEquals([3, 2], rdd.histogram([0, 5, 11])[1])
+
+        # mixed range with two uneven buckets
+        rdd = self.sc.parallelize([-0.01, 0.0, 1, 2, 3, 5, 6, 11.0, 11.01])
+        self.assertEquals([4, 3], rdd.histogram([0, 5, 11])[1])
+
+        # mixed range with four uneven buckets
+        rdd = self.sc.parallelize([-0.01, 0.0, 1, 2, 3, 5, 6, 11.01, 12.0, 199.0, 200.0, 200.1])
+        self.assertEquals([4, 2, 1, 3], rdd.histogram([0.0, 5.0, 11.0, 12.0, 200.0])[1])
+
+        # mixed range with uneven buckets and NaN
+        rdd = self.sc.parallelize([-0.01, 0.0, 1, 2, 3, 5, 6, 11.01, 12.0,
+                                   199.0, 200.0, 200.1, None, float('nan')])
+        self.assertEquals([4, 2, 1, 3], rdd.histogram([0.0, 5.0, 11.0, 12.0, 200.0])[1])
+
+        # out of range with infinite buckets
+        rdd = self.sc.parallelize([10.01, -0.01, float('nan'), float("inf")])
+        self.assertEquals([1, 2], rdd.histogram([float('-inf'), 0, float('inf')])[1])
+
+        # invalid buckets
+        self.assertRaises(ValueError, lambda: rdd.histogram([]))
+        self.assertRaises(ValueError, lambda: rdd.histogram([1]))
+        self.assertRaises(ValueError, lambda: rdd.histogram(0))
+        self.assertRaises(TypeError, lambda: rdd.histogram({}))
+
+        # without buckets
+        rdd = self.sc.parallelize(range(1, 5))
+        self.assertEquals(([1, 4], [4]), rdd.histogram(1))
+
+        # without buckets single element
+        rdd = self.sc.parallelize([1])
+        self.assertEquals(([1, 1], [1]), rdd.histogram(1))
+
+        # without bucket no range
+        rdd = self.sc.parallelize([1] * 4)
+        self.assertEquals(([1, 1], [4]), rdd.histogram(1))
+
+        # without buckets basic two
+        rdd = self.sc.parallelize(range(1, 5))
+        self.assertEquals(([1, 2.5, 4], [2, 2]), rdd.histogram(2))
+
+        # without buckets with more requested than elements
+        rdd = self.sc.parallelize([1, 2])
+        buckets = [1 + 0.2 * i for i in range(6)]
+        hist = [1, 0, 0, 0, 1]
+        self.assertEquals((buckets, hist), rdd.histogram(5))
+
+        # invalid RDDs
+        rdd = self.sc.parallelize([1, float('inf')])
+        self.assertRaises(ValueError, lambda: rdd.histogram(2))
+        rdd = self.sc.parallelize([float('nan')])
+        self.assertRaises(ValueError, lambda: rdd.histogram(2))
+
+        # string
+        rdd = self.sc.parallelize(["ab", "ac", "b", "bd", "ef"], 2)
+        self.assertEquals([2, 2], rdd.histogram(["a", "b", "c"])[1])
+        self.assertEquals((["ab", "ef"], [5]), rdd.histogram(1))
+        self.assertRaises(TypeError, lambda: rdd.histogram(2))
+
+        # mixed RDD
+        rdd = self.sc.parallelize([1, 4, "ab", "ac", "b"], 2)
+        self.assertEquals([1, 1], rdd.histogram([0, 4, 10])[1])
+        self.assertEquals([2, 1], rdd.histogram(["a", "b", "c"])[1])
+        self.assertEquals(([1, "b"], [5]), rdd.histogram(1))
+        self.assertRaises(TypeError, lambda: rdd.histogram(2))
+
+    def test_repartitionAndSortWithinPartitions(self):
+        rdd = self.sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)], 2)
+
+        repartitioned = rdd.repartitionAndSortWithinPartitions(2, lambda key: key % 2)
+        partitions = repartitioned.glom().collect()
+        self.assertEquals(partitions[0], [(0, 5), (0, 8), (2, 6)])
+        self.assertEquals(partitions[1], [(1, 3), (3, 8), (3, 8)])
+
+    def test_distinct(self):
+        rdd = self.sc.parallelize((1, 2, 3)*10, 10)
+        self.assertEquals(rdd.getNumPartitions(), 10)
+        self.assertEquals(rdd.distinct().count(), 3)
+        result = rdd.distinct(5)
+        self.assertEquals(result.getNumPartitions(), 5)
+        self.assertEquals(result.count(), 3)
+
+    def test_sort_on_empty_rdd(self):
+        self.assertEqual([], self.sc.parallelize(zip([], [])).sortByKey().collect())
+
+    def test_sample(self):
+        rdd = self.sc.parallelize(range(0, 100), 4)
+        wo = rdd.sample(False, 0.1, 2).collect()
+        wo_dup = rdd.sample(False, 0.1, 2).collect()
+        self.assertSetEqual(set(wo), set(wo_dup))
+        wr = rdd.sample(True, 0.2, 5).collect()
+        wr_dup = rdd.sample(True, 0.2, 5).collect()
+        self.assertSetEqual(set(wr), set(wr_dup))
+        wo_s10 = rdd.sample(False, 0.3, 10).collect()
+        wo_s20 = rdd.sample(False, 0.3, 20).collect()
+        self.assertNotEqual(set(wo_s10), set(wo_s20))
+        wr_s11 = rdd.sample(True, 0.4, 11).collect()
+        wr_s21 = rdd.sample(True, 0.4, 21).collect()
+        self.assertNotEqual(set(wr_s11), set(wr_s21))
+
+
+class ProfilerTests(PySparkTestCase):
 
-    def test_stdout_redirection(self):
-        import subprocess
-        def func(x):
-            subprocess.check_call('ls', shell=True)
-        self.sc.parallelize([1]).foreach(func)
+    def setUp(self):
+        self._old_sys_path = list(sys.path)
+        class_name = self.__class__.__name__
+        conf = SparkConf().set("spark.python.profile", "true")
+        self.sc = SparkContext('local[4]', class_name, conf=conf)
 
+    def test_profiler(self):
 
-class TestInputFormat(PySparkTestCase):
+        def heavy_foo(x):
+            for i in range(1 << 20):
+                x = 1
+        rdd = self.sc.parallelize(range(100))
+        rdd.foreach(heavy_foo)
+        profiles = self.sc._profile_stats
+        self.assertEqual(1, len(profiles))
+        id, acc, _ = profiles[0]
+        stats = acc.value
+        self.assertTrue(stats is not None)
+        width, stat_list = stats.get_print_list([])
+        func_names = [func_name for fname, n, func_name in stat_list]
+        self.assertTrue("heavy_foo" in func_names)
 
-    def setUp(self):
-        PySparkTestCase.setUp(self)
-        self.tempdir = tempfile.NamedTemporaryFile(delete=False)
-        os.unlink(self.tempdir.name)
-        self.sc._jvm.WriteInputFormatTestDataGenerator.generateData(self.tempdir.name, self.sc._jsc)
+        self.sc.show_profiles()
+        d = tempfile.gettempdir()
+        self.sc.dump_profiles(d)
+        self.assertTrue("rdd_%d.pstats" % id in os.listdir(d))
 
-    def tearDown(self):
-        PySparkTestCase.tearDown(self)
-        shutil.rmtree(self.tempdir.name)
+
+class ExamplePointUDT(UserDefinedType):
+    """
+    User-defined type (UDT) for ExamplePoint.
+    """
+
+    @classmethod
+    def sqlType(self):
+        return ArrayType(DoubleType(), False)
+
+    @classmethod
+    def module(cls):
+        return 'pyspark.tests'
+
+    @classmethod
+    def scalaUDT(cls):
+        return 'org.apache.spark.sql.test.ExamplePointUDT'
+
+    def serialize(self, obj):
+        return [obj.x, obj.y]
+
+    def deserialize(self, datum):
+        return ExamplePoint(datum[0], datum[1])
+
+
+class ExamplePoint:
+    """
+    An example class to demonstrate UDT in Scala, Java, and Python.
+    """
+
+    __UDT__ = ExamplePointUDT()
+
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def __repr__(self):
+        return "ExamplePoint(%s,%s)" % (self.x, self.y)
+
+    def __str__(self):
+        return "(%s,%s)" % (self.x, self.y)
+
+    def __eq__(self, other):
+        return isinstance(other, ExamplePoint) and \
+            other.x == self.x and other.y == self.y
+
+
+class SQLTests(ReusedPySparkTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        ReusedPySparkTestCase.setUpClass()
+        cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(cls.tempdir.name)
+
+    @classmethod
+    def tearDownClass(cls):
+        ReusedPySparkTestCase.tearDownClass()
+        shutil.rmtree(cls.tempdir.name)
+
+    def setUp(self):
+        self.sqlCtx = SQLContext(self.sc)
+
+    def test_udf(self):
+        self.sqlCtx.registerFunction("twoArgs", lambda x, y: len(x) + y, IntegerType())
+        [row] = self.sqlCtx.sql("SELECT twoArgs('test', 1)").collect()
+        self.assertEqual(row[0], 5)
+
+    def test_udf2(self):
+        self.sqlCtx.registerFunction("strlen", lambda string: len(string), IntegerType())
+        self.sqlCtx.inferSchema(self.sc.parallelize([Row(a="test")])).registerTempTable("test")
+        [res] = self.sqlCtx.sql("SELECT strlen(a) FROM test WHERE strlen(a) > 1").collect()
+        self.assertEqual(4, res[0])
+
+    def test_udf_with_array_type(self):
+        d = [Row(l=range(3), d={"key": range(5)})]
+        rdd = self.sc.parallelize(d)
+        srdd = self.sqlCtx.inferSchema(rdd).registerTempTable("test")
+        self.sqlCtx.registerFunction("copylist", lambda l: list(l), ArrayType(IntegerType()))
+        self.sqlCtx.registerFunction("maplen", lambda d: len(d), IntegerType())
+        [(l1, l2)] = self.sqlCtx.sql("select copylist(l), maplen(d) from test").collect()
+        self.assertEqual(range(3), l1)
+        self.assertEqual(1, l2)
+
+    def test_broadcast_in_udf(self):
+        bar = {"a": "aa", "b": "bb", "c": "abc"}
+        foo = self.sc.broadcast(bar)
+        self.sqlCtx.registerFunction("MYUDF", lambda x: foo.value[x] if x else '')
+        [res] = self.sqlCtx.sql("SELECT MYUDF('c')").collect()
+        self.assertEqual("abc", res[0])
+        [res] = self.sqlCtx.sql("SELECT MYUDF('')").collect()
+        self.assertEqual("", res[0])
+
+    def test_basic_functions(self):
+        rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}'])
+        srdd = self.sqlCtx.jsonRDD(rdd)
+        srdd.count()
+        srdd.collect()
+        srdd.schemaString()
+        srdd.schema()
+
+        # cache and checkpoint
+        self.assertFalse(srdd.is_cached)
+        srdd.persist()
+        srdd.unpersist()
+        srdd.cache()
+        self.assertTrue(srdd.is_cached)
+        self.assertFalse(srdd.isCheckpointed())
+        self.assertEqual(None, srdd.getCheckpointFile())
+
+        srdd = srdd.coalesce(2, True)
+        srdd = srdd.repartition(3)
+        srdd = srdd.distinct()
+        srdd.intersection(srdd)
+        self.assertEqual(2, srdd.count())
+
+        srdd.registerTempTable("temp")
+        srdd = self.sqlCtx.sql("select foo from temp")
+        srdd.count()
+        srdd.collect()
+
+    def test_distinct(self):
+        rdd = self.sc.parallelize(['{"a": 1}', '{"b": 2}', '{"c": 3}']*10, 10)
+        srdd = self.sqlCtx.jsonRDD(rdd)
+        self.assertEquals(srdd.getNumPartitions(), 10)
+        self.assertEquals(srdd.distinct().count(), 3)
+        result = srdd.distinct(5)
+        self.assertEquals(result.getNumPartitions(), 5)
+        self.assertEquals(result.count(), 3)
+
+    def test_apply_schema_to_row(self):
+        srdd = self.sqlCtx.jsonRDD(self.sc.parallelize(["""{"a":2}"""]))
+        srdd2 = self.sqlCtx.applySchema(srdd.map(lambda x: x), srdd.schema())
+        self.assertEqual(srdd.collect(), srdd2.collect())
+
+        rdd = self.sc.parallelize(range(10)).map(lambda x: Row(a=x))
+        srdd3 = self.sqlCtx.applySchema(rdd, srdd.schema())
+        self.assertEqual(10, srdd3.count())
+
+    def test_serialize_nested_array_and_map(self):
+        d = [Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})]
+        rdd = self.sc.parallelize(d)
+        srdd = self.sqlCtx.inferSchema(rdd)
+        row = srdd.first()
+        self.assertEqual(1, len(row.l))
+        self.assertEqual(1, row.l[0].a)
+        self.assertEqual("2", row.d["key"].d)
+
+        l = srdd.map(lambda x: x.l).first()
+        self.assertEqual(1, len(l))
+        self.assertEqual('s', l[0].b)
+
+        d = srdd.map(lambda x: x.d).first()
+        self.assertEqual(1, len(d))
+        self.assertEqual(1.0, d["key"].c)
+
+        row = srdd.map(lambda x: x.d["key"]).first()
+        self.assertEqual(1.0, row.c)
+        self.assertEqual("2", row.d)
+
+    def test_infer_schema(self):
+        d = [Row(l=[], d={}),
+             Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")}, s="")]
+        rdd = self.sc.parallelize(d)
+        srdd = self.sqlCtx.inferSchema(rdd)
+        self.assertEqual([], srdd.map(lambda r: r.l).first())
+        self.assertEqual([None, ""], srdd.map(lambda r: r.s).collect())
+        srdd.registerTempTable("test")
+        result = self.sqlCtx.sql("SELECT l[0].a from test where d['key'].d = '2'")
+        self.assertEqual(1, result.first()[0])
+
+        srdd2 = self.sqlCtx.inferSchema(rdd, 1.0)
+        self.assertEqual(srdd.schema(), srdd2.schema())
+        self.assertEqual({}, srdd2.map(lambda r: r.d).first())
+        self.assertEqual([None, ""], srdd2.map(lambda r: r.s).collect())
+        srdd2.registerTempTable("test2")
+        result = self.sqlCtx.sql("SELECT l[0].a from test2 where d['key'].d = '2'")
+        self.assertEqual(1, result.first()[0])
+
+    def test_convert_row_to_dict(self):
+        row = Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})
+        self.assertEqual(1, row.asDict()['l'][0].a)
+        rdd = self.sc.parallelize([row])
+        srdd = self.sqlCtx.inferSchema(rdd)
+        srdd.registerTempTable("test")
+        row = self.sqlCtx.sql("select l[0].a AS la from test").first()
+        self.assertEqual(1, row.asDict()["la"])
+
+    def test_infer_schema_with_udt(self):
+        from pyspark.tests import ExamplePoint, ExamplePointUDT
+        row = Row(label=1.0, point=ExamplePoint(1.0, 2.0))
+        rdd = self.sc.parallelize([row])
+        srdd = self.sqlCtx.inferSchema(rdd)
+        schema = srdd.schema()
+        field = [f for f in schema.fields if f.name == "point"][0]
+        self.assertEqual(type(field.dataType), ExamplePointUDT)
+        srdd.registerTempTable("labeled_point")
+        point = self.sqlCtx.sql("SELECT point FROM labeled_point").first().point
+        self.assertEqual(point, ExamplePoint(1.0, 2.0))
+
+    def test_apply_schema_with_udt(self):
+        from pyspark.tests import ExamplePoint, ExamplePointUDT
+        row = (1.0, ExamplePoint(1.0, 2.0))
+        rdd = self.sc.parallelize([row])
+        schema = StructType([StructField("label", DoubleType(), False),
+                             StructField("point", ExamplePointUDT(), False)])
+        srdd = self.sqlCtx.applySchema(rdd, schema)
+        point = srdd.first().point
+        self.assertEquals(point, ExamplePoint(1.0, 2.0))
+
+    def test_parquet_with_udt(self):
+        from pyspark.tests import ExamplePoint
+        row = Row(label=1.0, point=ExamplePoint(1.0, 2.0))
+        rdd = self.sc.parallelize([row])
+        srdd0 = self.sqlCtx.inferSchema(rdd)
+        output_dir = os.path.join(self.tempdir.name, "labeled_point")
+        srdd0.saveAsParquetFile(output_dir)
+        srdd1 = self.sqlCtx.parquetFile(output_dir)
+        point = srdd1.first().point
+        self.assertEquals(point, ExamplePoint(1.0, 2.0))
+
+
+class InputFormatTests(ReusedPySparkTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        ReusedPySparkTestCase.setUpClass()
+        cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(cls.tempdir.name)
+        cls.sc._jvm.WriteInputFormatTestDataGenerator.generateData(cls.tempdir.name, cls.sc._jsc)
+
+    @classmethod
+    def tearDownClass(cls):
+        ReusedPySparkTestCase.tearDownClass()
+        shutil.rmtree(cls.tempdir.name)
 
     def test_sequencefiles(self):
         basepath = self.tempdir.name
@@ -239,6 +947,17 @@ def test_sequencefiles(self):
         ed = [(1.0, u'aa'), (1.0, u'aa'), (2.0, u'aa'), (2.0, u'bb'), (2.0, u'bb'), (3.0, u'cc')]
         self.assertEqual(doubles, ed)
 
+        bytes = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfbytes/",
+                                            "org.apache.hadoop.io.IntWritable",
+                                            "org.apache.hadoop.io.BytesWritable").collect())
+        ebs = [(1, bytearray('aa', 'utf-8')),
+               (1, bytearray('aa', 'utf-8')),
+               (2, bytearray('aa', 'utf-8')),
+               (2, bytearray('bb', 'utf-8')),
+               (2, bytearray('bb', 'utf-8')),
+               (3, bytearray('cc', 'utf-8'))]
+        self.assertEqual(bytes, ebs)
+
         text = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sftext/",
                                            "org.apache.hadoop.io.Text",
                                            "org.apache.hadoop.io.Text").collect())
@@ -265,21 +984,50 @@ def test_sequencefiles(self):
         maps = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfmap/",
                                            "org.apache.hadoop.io.IntWritable",
                                            "org.apache.hadoop.io.MapWritable").collect())
-        em = [(1, {2.0: u'aa'}),
+        em = [(1, {}),
               (1, {3.0: u'bb'}),
               (2, {1.0: u'aa'}),
               (2, {1.0: u'cc'}),
-              (2, {3.0: u'bb'}),
               (3, {2.0: u'dd'})]
         self.assertEqual(maps, em)
 
+        # arrays get pickled to tuples by default
+        tuples = sorted(self.sc.sequenceFile(
+            basepath + "/sftestdata/sfarray/",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.spark.api.python.DoubleArrayWritable").collect())
+        et = [(1, ()),
+              (2, (3.0, 4.0, 5.0)),
+              (3, (4.0, 5.0, 6.0))]
+        self.assertEqual(tuples, et)
+
+        # with custom converters, primitive arrays can stay as arrays
+        arrays = sorted(self.sc.sequenceFile(
+            basepath + "/sftestdata/sfarray/",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.spark.api.python.DoubleArrayWritable",
+            valueConverter="org.apache.spark.api.python.WritableToDoubleArrayConverter").collect())
+        ea = [(1, array('d')),
+              (2, array('d', [3.0, 4.0, 5.0])),
+              (3, array('d', [4.0, 5.0, 6.0]))]
+        self.assertEqual(arrays, ea)
+
         clazz = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfclass/",
                                             "org.apache.hadoop.io.Text",
                                             "org.apache.spark.api.python.TestWritable").collect())
-        ec = (u'1',
-              {u'__class__': u'org.apache.spark.api.python.TestWritable',
-               u'double': 54.0, u'int': 123, u'str': u'test1'})
-        self.assertEqual(clazz[0], ec)
+        cname = u'org.apache.spark.api.python.TestWritable'
+        ec = [(u'1', {u'__class__': cname, u'double': 1.0, u'int': 1, u'str': u'test1'}),
+              (u'2', {u'__class__': cname, u'double': 2.3, u'int': 2, u'str': u'test2'}),
+              (u'3', {u'__class__': cname, u'double': 3.1, u'int': 3, u'str': u'test3'}),
+              (u'4', {u'__class__': cname, u'double': 4.2, u'int': 4, u'str': u'test4'}),
+              (u'5', {u'__class__': cname, u'double': 5.5, u'int': 5, u'str': u'test56'})]
+        self.assertEqual(clazz, ec)
+
+        unbatched_clazz = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfclass/",
+                                                      "org.apache.hadoop.io.Text",
+                                                      "org.apache.spark.api.python.TestWritable",
+                                                      ).collect())
+        self.assertEqual(unbatched_clazz, ec)
 
     def test_oldhadoop(self):
         basepath = self.tempdir.name
@@ -291,10 +1039,11 @@ def test_oldhadoop(self):
         self.assertEqual(ints, ei)
 
         hellopath = os.path.join(SPARK_HOME, "python/test_support/hello.txt")
-        hello = self.sc.hadoopFile(hellopath,
-                                   "org.apache.hadoop.mapred.TextInputFormat",
-                                   "org.apache.hadoop.io.LongWritable",
-                                   "org.apache.hadoop.io.Text").collect()
+        oldconf = {"mapred.input.dir": hellopath}
+        hello = self.sc.hadoopRDD("org.apache.hadoop.mapred.TextInputFormat",
+                                  "org.apache.hadoop.io.LongWritable",
+                                  "org.apache.hadoop.io.Text",
+                                  conf=oldconf).collect()
         result = [(0, u'Hello World!')]
         self.assertEqual(hello, result)
 
@@ -309,10 +1058,11 @@ def test_newhadoop(self):
         self.assertEqual(ints, ei)
 
         hellopath = os.path.join(SPARK_HOME, "python/test_support/hello.txt")
-        hello = self.sc.newAPIHadoopFile(hellopath,
-                                         "org.apache.hadoop.mapreduce.lib.input.TextInputFormat",
-                                         "org.apache.hadoop.io.LongWritable",
-                                         "org.apache.hadoop.io.Text").collect()
+        newconf = {"mapred.input.dir": hellopath}
+        hello = self.sc.newAPIHadoopRDD("org.apache.hadoop.mapreduce.lib.input.TextInputFormat",
+                                        "org.apache.hadoop.io.LongWritable",
+                                        "org.apache.hadoop.io.Text",
+                                        conf=newconf).collect()
         result = [(0, u'Hello World!')]
         self.assertEqual(hello, result)
 
@@ -347,18 +1097,285 @@ def test_bad_inputs(self):
             "org.apache.hadoop.io.IntWritable",
             "org.apache.hadoop.io.Text"))
 
-    def test_converter(self):
+    def test_converters(self):
+        # use of custom converters
         basepath = self.tempdir.name
         maps = sorted(self.sc.sequenceFile(
             basepath + "/sftestdata/sfmap/",
             "org.apache.hadoop.io.IntWritable",
             "org.apache.hadoop.io.MapWritable",
-            valueConverter="org.apache.spark.api.python.TestConverter").collect())
-        em = [(1, [2.0]), (1, [3.0]), (2, [1.0]), (2, [1.0]), (2, [3.0]), (3, [2.0])]
+            keyConverter="org.apache.spark.api.python.TestInputKeyConverter",
+            valueConverter="org.apache.spark.api.python.TestInputValueConverter").collect())
+        em = [(u'\x01', []),
+              (u'\x01', [3.0]),
+              (u'\x02', [1.0]),
+              (u'\x02', [1.0]),
+              (u'\x03', [2.0])]
         self.assertEqual(maps, em)
 
+    def test_binary_files(self):
+        path = os.path.join(self.tempdir.name, "binaryfiles")
+        os.mkdir(path)
+        data = "short binary data"
+        with open(os.path.join(path, "part-0000"), 'w') as f:
+            f.write(data)
+        [(p, d)] = self.sc.binaryFiles(path).collect()
+        self.assertTrue(p.endswith("part-0000"))
+        self.assertEqual(d, data)
+
+    def test_binary_records(self):
+        path = os.path.join(self.tempdir.name, "binaryrecords")
+        os.mkdir(path)
+        with open(os.path.join(path, "part-0000"), 'w') as f:
+            for i in range(100):
+                f.write('%04d' % i)
+        result = self.sc.binaryRecords(path, 4).map(int).collect()
+        self.assertEqual(range(100), result)
+
 
-class TestDaemon(unittest.TestCase):
+class OutputFormatTests(ReusedPySparkTestCase):
+
+    def setUp(self):
+        self.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(self.tempdir.name)
+
+    def tearDown(self):
+        shutil.rmtree(self.tempdir.name, ignore_errors=True)
+
+    def test_sequencefiles(self):
+        basepath = self.tempdir.name
+        ei = [(1, u'aa'), (1, u'aa'), (2, u'aa'), (2, u'bb'), (2, u'bb'), (3, u'cc')]
+        self.sc.parallelize(ei).saveAsSequenceFile(basepath + "/sfint/")
+        ints = sorted(self.sc.sequenceFile(basepath + "/sfint/").collect())
+        self.assertEqual(ints, ei)
+
+        ed = [(1.0, u'aa'), (1.0, u'aa'), (2.0, u'aa'), (2.0, u'bb'), (2.0, u'bb'), (3.0, u'cc')]
+        self.sc.parallelize(ed).saveAsSequenceFile(basepath + "/sfdouble/")
+        doubles = sorted(self.sc.sequenceFile(basepath + "/sfdouble/").collect())
+        self.assertEqual(doubles, ed)
+
+        ebs = [(1, bytearray(b'\x00\x07spam\x08')), (2, bytearray(b'\x00\x07spam\x08'))]
+        self.sc.parallelize(ebs).saveAsSequenceFile(basepath + "/sfbytes/")
+        bytes = sorted(self.sc.sequenceFile(basepath + "/sfbytes/").collect())
+        self.assertEqual(bytes, ebs)
+
+        et = [(u'1', u'aa'),
+              (u'2', u'bb'),
+              (u'3', u'cc')]
+        self.sc.parallelize(et).saveAsSequenceFile(basepath + "/sftext/")
+        text = sorted(self.sc.sequenceFile(basepath + "/sftext/").collect())
+        self.assertEqual(text, et)
+
+        eb = [(1, False), (1, True), (2, False), (2, False), (2, True), (3, True)]
+        self.sc.parallelize(eb).saveAsSequenceFile(basepath + "/sfbool/")
+        bools = sorted(self.sc.sequenceFile(basepath + "/sfbool/").collect())
+        self.assertEqual(bools, eb)
+
+        en = [(1, None), (1, None), (2, None), (2, None), (2, None), (3, None)]
+        self.sc.parallelize(en).saveAsSequenceFile(basepath + "/sfnull/")
+        nulls = sorted(self.sc.sequenceFile(basepath + "/sfnull/").collect())
+        self.assertEqual(nulls, en)
+
+        em = [(1, {}),
+              (1, {3.0: u'bb'}),
+              (2, {1.0: u'aa'}),
+              (2, {1.0: u'cc'}),
+              (3, {2.0: u'dd'})]
+        self.sc.parallelize(em).saveAsSequenceFile(basepath + "/sfmap/")
+        maps = sorted(self.sc.sequenceFile(basepath + "/sfmap/").collect())
+        self.assertEqual(maps, em)
+
+    def test_oldhadoop(self):
+        basepath = self.tempdir.name
+        dict_data = [(1, {}),
+                     (1, {"row1": 1.0}),
+                     (2, {"row2": 2.0})]
+        self.sc.parallelize(dict_data).saveAsHadoopFile(
+            basepath + "/oldhadoop/",
+            "org.apache.hadoop.mapred.SequenceFileOutputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.MapWritable")
+        result = sorted(self.sc.hadoopFile(
+            basepath + "/oldhadoop/",
+            "org.apache.hadoop.mapred.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.MapWritable").collect())
+        self.assertEqual(result, dict_data)
+
+        conf = {
+            "mapred.output.format.class": "org.apache.hadoop.mapred.SequenceFileOutputFormat",
+            "mapred.output.key.class": "org.apache.hadoop.io.IntWritable",
+            "mapred.output.value.class": "org.apache.hadoop.io.MapWritable",
+            "mapred.output.dir": basepath + "/olddataset/"
+        }
+        self.sc.parallelize(dict_data).saveAsHadoopDataset(conf)
+        input_conf = {"mapred.input.dir": basepath + "/olddataset/"}
+        old_dataset = sorted(self.sc.hadoopRDD(
+            "org.apache.hadoop.mapred.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.MapWritable",
+            conf=input_conf).collect())
+        self.assertEqual(old_dataset, dict_data)
+
+    def test_newhadoop(self):
+        basepath = self.tempdir.name
+        data = [(1, ""),
+                (1, "a"),
+                (2, "bcdf")]
+        self.sc.parallelize(data).saveAsNewAPIHadoopFile(
+            basepath + "/newhadoop/",
+            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.Text")
+        result = sorted(self.sc.newAPIHadoopFile(
+            basepath + "/newhadoop/",
+            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.Text").collect())
+        self.assertEqual(result, data)
+
+        conf = {
+            "mapreduce.outputformat.class":
+                "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+            "mapred.output.key.class": "org.apache.hadoop.io.IntWritable",
+            "mapred.output.value.class": "org.apache.hadoop.io.Text",
+            "mapred.output.dir": basepath + "/newdataset/"
+        }
+        self.sc.parallelize(data).saveAsNewAPIHadoopDataset(conf)
+        input_conf = {"mapred.input.dir": basepath + "/newdataset/"}
+        new_dataset = sorted(self.sc.newAPIHadoopRDD(
+            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.Text",
+            conf=input_conf).collect())
+        self.assertEqual(new_dataset, data)
+
+    def test_newhadoop_with_array(self):
+        basepath = self.tempdir.name
+        # use custom ArrayWritable types and converters to handle arrays
+        array_data = [(1, array('d')),
+                      (1, array('d', [1.0, 2.0, 3.0])),
+                      (2, array('d', [3.0, 4.0, 5.0]))]
+        self.sc.parallelize(array_data).saveAsNewAPIHadoopFile(
+            basepath + "/newhadoop/",
+            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.spark.api.python.DoubleArrayWritable",
+            valueConverter="org.apache.spark.api.python.DoubleArrayToWritableConverter")
+        result = sorted(self.sc.newAPIHadoopFile(
+            basepath + "/newhadoop/",
+            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.spark.api.python.DoubleArrayWritable",
+            valueConverter="org.apache.spark.api.python.WritableToDoubleArrayConverter").collect())
+        self.assertEqual(result, array_data)
+
+        conf = {
+            "mapreduce.outputformat.class":
+                "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+            "mapred.output.key.class": "org.apache.hadoop.io.IntWritable",
+            "mapred.output.value.class": "org.apache.spark.api.python.DoubleArrayWritable",
+            "mapred.output.dir": basepath + "/newdataset/"
+        }
+        self.sc.parallelize(array_data).saveAsNewAPIHadoopDataset(
+            conf,
+            valueConverter="org.apache.spark.api.python.DoubleArrayToWritableConverter")
+        input_conf = {"mapred.input.dir": basepath + "/newdataset/"}
+        new_dataset = sorted(self.sc.newAPIHadoopRDD(
+            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.spark.api.python.DoubleArrayWritable",
+            valueConverter="org.apache.spark.api.python.WritableToDoubleArrayConverter",
+            conf=input_conf).collect())
+        self.assertEqual(new_dataset, array_data)
+
+    def test_newolderror(self):
+        basepath = self.tempdir.name
+        rdd = self.sc.parallelize(range(1, 4)).map(lambda x: (x, "a" * x))
+        self.assertRaises(Exception, lambda: rdd.saveAsHadoopFile(
+            basepath + "/newolderror/saveAsHadoopFile/",
+            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"))
+        self.assertRaises(Exception, lambda: rdd.saveAsNewAPIHadoopFile(
+            basepath + "/newolderror/saveAsNewAPIHadoopFile/",
+            "org.apache.hadoop.mapred.SequenceFileOutputFormat"))
+
+    def test_bad_inputs(self):
+        basepath = self.tempdir.name
+        rdd = self.sc.parallelize(range(1, 4)).map(lambda x: (x, "a" * x))
+        self.assertRaises(Exception, lambda: rdd.saveAsHadoopFile(
+            basepath + "/badinputs/saveAsHadoopFile/",
+            "org.apache.hadoop.mapred.NotValidOutputFormat"))
+        self.assertRaises(Exception, lambda: rdd.saveAsNewAPIHadoopFile(
+            basepath + "/badinputs/saveAsNewAPIHadoopFile/",
+            "org.apache.hadoop.mapreduce.lib.output.NotValidOutputFormat"))
+
+    def test_converters(self):
+        # use of custom converters
+        basepath = self.tempdir.name
+        data = [(1, {3.0: u'bb'}),
+                (2, {1.0: u'aa'}),
+                (3, {2.0: u'dd'})]
+        self.sc.parallelize(data).saveAsNewAPIHadoopFile(
+            basepath + "/converters/",
+            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+            keyConverter="org.apache.spark.api.python.TestOutputKeyConverter",
+            valueConverter="org.apache.spark.api.python.TestOutputValueConverter")
+        converted = sorted(self.sc.sequenceFile(basepath + "/converters/").collect())
+        expected = [(u'1', 3.0),
+                    (u'2', 1.0),
+                    (u'3', 2.0)]
+        self.assertEqual(converted, expected)
+
+    def test_reserialization(self):
+        basepath = self.tempdir.name
+        x = range(1, 5)
+        y = range(1001, 1005)
+        data = zip(x, y)
+        rdd = self.sc.parallelize(x).zip(self.sc.parallelize(y))
+        rdd.saveAsSequenceFile(basepath + "/reserialize/sequence")
+        result1 = sorted(self.sc.sequenceFile(basepath + "/reserialize/sequence").collect())
+        self.assertEqual(result1, data)
+
+        rdd.saveAsHadoopFile(
+            basepath + "/reserialize/hadoop",
+            "org.apache.hadoop.mapred.SequenceFileOutputFormat")
+        result2 = sorted(self.sc.sequenceFile(basepath + "/reserialize/hadoop").collect())
+        self.assertEqual(result2, data)
+
+        rdd.saveAsNewAPIHadoopFile(
+            basepath + "/reserialize/newhadoop",
+            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat")
+        result3 = sorted(self.sc.sequenceFile(basepath + "/reserialize/newhadoop").collect())
+        self.assertEqual(result3, data)
+
+        conf4 = {
+            "mapred.output.format.class": "org.apache.hadoop.mapred.SequenceFileOutputFormat",
+            "mapred.output.key.class": "org.apache.hadoop.io.IntWritable",
+            "mapred.output.value.class": "org.apache.hadoop.io.IntWritable",
+            "mapred.output.dir": basepath + "/reserialize/dataset"}
+        rdd.saveAsHadoopDataset(conf4)
+        result4 = sorted(self.sc.sequenceFile(basepath + "/reserialize/dataset").collect())
+        self.assertEqual(result4, data)
+
+        conf5 = {"mapreduce.outputformat.class":
+                 "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+                 "mapred.output.key.class": "org.apache.hadoop.io.IntWritable",
+                 "mapred.output.value.class": "org.apache.hadoop.io.IntWritable",
+                 "mapred.output.dir": basepath + "/reserialize/newdataset"}
+        rdd.saveAsNewAPIHadoopDataset(conf5)
+        result5 = sorted(self.sc.sequenceFile(basepath + "/reserialize/newdataset").collect())
+        self.assertEqual(result5, data)
+
+    def test_malformed_RDD(self):
+        basepath = self.tempdir.name
+        # non-batch-serialized RDD[[(K, V)]] should be rejected
+        data = [[(1, "a")], [(2, "aa")], [(3, "aaa")]]
+        rdd = self.sc.parallelize(data, len(data))
+        self.assertRaises(Exception, lambda: rdd.saveAsSequenceFile(
+            basepath + "/malformed/sequence"))
+
+
+class DaemonTests(unittest.TestCase):
     def connect(self, port):
         from socket import socket, AF_INET, SOCK_STREAM
         sock = socket(AF_INET, SOCK_STREAM)
@@ -404,7 +1421,109 @@ def test_termination_sigterm(self):
         self.do_termination_test(lambda daemon: os.kill(daemon.pid, SIGTERM))
 
 
-class TestSparkSubmit(unittest.TestCase):
+class WorkerTests(PySparkTestCase):
+
+    def test_cancel_task(self):
+        temp = tempfile.NamedTemporaryFile(delete=True)
+        temp.close()
+        path = temp.name
+
+        def sleep(x):
+            import os
+            import time
+            with open(path, 'w') as f:
+                f.write("%d %d" % (os.getppid(), os.getpid()))
+            time.sleep(100)
+
+        # start job in background thread
+        def run():
+            self.sc.parallelize(range(1)).foreach(sleep)
+        import threading
+        t = threading.Thread(target=run)
+        t.daemon = True
+        t.start()
+
+        daemon_pid, worker_pid = 0, 0
+        while True:
+            if os.path.exists(path):
+                data = open(path).read().split(' ')
+                daemon_pid, worker_pid = map(int, data)
+                break
+            time.sleep(0.1)
+
+        # cancel jobs
+        self.sc.cancelAllJobs()
+        t.join()
+
+        for i in range(50):
+            try:
+                os.kill(worker_pid, 0)
+                time.sleep(0.1)
+            except OSError:
+                break  # worker was killed
+        else:
+            self.fail("worker has not been killed after 5 seconds")
+
+        try:
+            os.kill(daemon_pid, 0)
+        except OSError:
+            self.fail("daemon had been killed")
+
+        # run a normal job
+        rdd = self.sc.parallelize(range(100), 1)
+        self.assertEqual(100, rdd.map(str).count())
+
+    def test_after_exception(self):
+        def raise_exception(_):
+            raise Exception()
+        rdd = self.sc.parallelize(range(100), 1)
+        self.assertRaises(Exception, lambda: rdd.foreach(raise_exception))
+        self.assertEqual(100, rdd.map(str).count())
+
+    def test_after_jvm_exception(self):
+        tempFile = tempfile.NamedTemporaryFile(delete=False)
+        tempFile.write("Hello World!")
+        tempFile.close()
+        data = self.sc.textFile(tempFile.name, 1)
+        filtered_data = data.filter(lambda x: True)
+        self.assertEqual(1, filtered_data.count())
+        os.unlink(tempFile.name)
+        self.assertRaises(Exception, lambda: filtered_data.count())
+
+        rdd = self.sc.parallelize(range(100), 1)
+        self.assertEqual(100, rdd.map(str).count())
+
+    def test_accumulator_when_reuse_worker(self):
+        from pyspark.accumulators import INT_ACCUMULATOR_PARAM
+        acc1 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)
+        self.sc.parallelize(range(100), 20).foreach(lambda x: acc1.add(x))
+        self.assertEqual(sum(range(100)), acc1.value)
+
+        acc2 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)
+        self.sc.parallelize(range(100), 20).foreach(lambda x: acc2.add(x))
+        self.assertEqual(sum(range(100)), acc2.value)
+        self.assertEqual(sum(range(100)), acc1.value)
+
+    def test_reuse_worker_after_take(self):
+        rdd = self.sc.parallelize(range(100000), 1)
+        self.assertEqual(0, rdd.first())
+
+        def count():
+            try:
+                rdd.count()
+            except Exception:
+                pass
+
+        t = threading.Thread(target=count)
+        t.daemon = True
+        t.start()
+        t.join(5)
+        self.assertTrue(not t.isAlive())
+        self.assertEqual(100000, rdd.count())
+
+
+class SparkSubmitTests(unittest.TestCase):
+
     def setUp(self):
         self.programDir = tempfile.mkdtemp()
         self.sparkSubmit = os.path.join(os.environ.get("SPARK_HOME"), "bin", "spark-submit")
@@ -432,8 +1551,9 @@ def createFileInZip(self, name, content):
         pattern = re.compile(r'^ *\|', re.MULTILINE)
         content = re.sub(pattern, '', content.strip())
         path = os.path.join(self.programDir, name + ".zip")
-        with zipfile.ZipFile(path, 'w') as zip:
-            zip.writestr(name, content)
+        zip = zipfile.ZipFile(path, 'w')
+        zip.writestr(name, content)
+        zip.close()
         return path
 
     def test_single_script(self):
@@ -479,7 +1599,7 @@ def test_module_dependency(self):
             |    return x + 1
             """)
         proc = subprocess.Popen([self.sparkSubmit, "--py-files", zip, script],
-            stdout=subprocess.PIPE)
+                                stdout=subprocess.PIPE)
         out, err = proc.communicate()
         self.assertEqual(0, proc.returncode)
         self.assertIn("[2, 3, 4]", out)
@@ -497,9 +1617,9 @@ def test_module_dependency_on_cluster(self):
             |def myfunc(x):
             |    return x + 1
             """)
-        proc = subprocess.Popen(
-            [self.sparkSubmit, "--py-files", zip, "--master", "local-cluster[1,1,512]", script],
-            stdout=subprocess.PIPE)
+        proc = subprocess.Popen([self.sparkSubmit, "--py-files", zip, "--master",
+                                "local-cluster[1,1,512]", script],
+                                stdout=subprocess.PIPE)
         out, err = proc.communicate()
         self.assertEqual(0, proc.returncode)
         self.assertIn("[2, 3, 4]", out)
@@ -515,6 +1635,8 @@ def test_single_script_on_cluster(self):
             |sc = SparkContext()
             |print sc.parallelize([1, 2, 3]).map(foo).collect()
             """)
+        # this will fail if you have different spark.executor.memory
+        # in conf/spark-defaults.conf
         proc = subprocess.Popen(
             [self.sparkSubmit, "--master", "local-cluster[1,1,512]", script],
             stdout=subprocess.PIPE)
@@ -523,8 +1645,42 @@ def test_single_script_on_cluster(self):
         self.assertIn("[2, 4, 6]", out)
 
 
+class ContextTests(unittest.TestCase):
+
+    def test_failed_sparkcontext_creation(self):
+        # Regression test for SPARK-1550
+        self.assertRaises(Exception, lambda: SparkContext("an-invalid-master-name"))
+
+    def test_stop(self):
+        sc = SparkContext()
+        self.assertNotEqual(SparkContext._active_spark_context, None)
+        sc.stop()
+        self.assertEqual(SparkContext._active_spark_context, None)
+
+    def test_with(self):
+        with SparkContext() as sc:
+            self.assertNotEqual(SparkContext._active_spark_context, None)
+        self.assertEqual(SparkContext._active_spark_context, None)
+
+    def test_with_exception(self):
+        try:
+            with SparkContext() as sc:
+                self.assertNotEqual(SparkContext._active_spark_context, None)
+                raise Exception()
+        except:
+            pass
+        self.assertEqual(SparkContext._active_spark_context, None)
+
+    def test_with_stop(self):
+        with SparkContext() as sc:
+            self.assertNotEqual(SparkContext._active_spark_context, None)
+            sc.stop()
+        self.assertEqual(SparkContext._active_spark_context, None)
+
+
 @unittest.skipIf(not _have_scipy, "SciPy not installed")
 class SciPyTests(PySparkTestCase):
+
     """General PySpark tests that depend on scipy """
 
     def test_serialize(self):
@@ -535,9 +1691,27 @@ def test_serialize(self):
         self.assertEqual(expected, observed)
 
 
+@unittest.skipIf(not _have_numpy, "NumPy not installed")
+class NumPyTests(PySparkTestCase):
+
+    """General PySpark tests that depend on numpy """
+
+    def test_statcounter_array(self):
+        x = self.sc.parallelize([np.array([1.0, 1.0]), np.array([2.0, 2.0]), np.array([3.0, 3.0])])
+        s = x.stats()
+        self.assertSequenceEqual([2.0, 2.0], s.mean().tolist())
+        self.assertSequenceEqual([1.0, 1.0], s.min().tolist())
+        self.assertSequenceEqual([3.0, 3.0], s.max().tolist())
+        self.assertSequenceEqual([1.0, 1.0], s.sampleStdev().tolist())
+
+
 if __name__ == "__main__":
     if not _have_scipy:
         print "NOTE: Skipping SciPy tests as it does not seem to be installed"
+    if not _have_numpy:
+        print "NOTE: Skipping NumPy tests as it does not seem to be installed"
     unittest.main()
     if not _have_scipy:
         print "NOTE: SciPy tests were skipped as it does not seem to be installed"
+    if not _have_numpy:
+        print "NOTE: NumPy tests were skipped as it does not seem to be installed"
diff --git a/python/pyspark/traceback_utils.py b/python/pyspark/traceback_utils.py
new file mode 100644
index 0000000000000..bb8646df2b0bf
--- /dev/null
+++ b/python/pyspark/traceback_utils.py
@@ -0,0 +1,78 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from collections import namedtuple
+import os
+import traceback
+
+
+CallSite = namedtuple("CallSite", "function file linenum")
+
+
+def first_spark_call():
+    """
+    Return a CallSite representing the first Spark call in the current call stack.
+    """
+    tb = traceback.extract_stack()
+    if len(tb) == 0:
+        return None
+    file, line, module, what = tb[len(tb) - 1]
+    sparkpath = os.path.dirname(file)
+    first_spark_frame = len(tb) - 1
+    for i in range(0, len(tb)):
+        file, line, fun, what = tb[i]
+        if file.startswith(sparkpath):
+            first_spark_frame = i
+            break
+    if first_spark_frame == 0:
+        file, line, fun, what = tb[0]
+        return CallSite(function=fun, file=file, linenum=line)
+    sfile, sline, sfun, swhat = tb[first_spark_frame]
+    ufile, uline, ufun, uwhat = tb[first_spark_frame - 1]
+    return CallSite(function=sfun, file=ufile, linenum=uline)
+
+
+class SCCallSiteSync(object):
+    """
+    Helper for setting the spark context call site.
+
+    Example usage:
+    from pyspark.context import SCCallSiteSync
+    with SCCallSiteSync(<relevant SparkContext>) as css:
+        <a Spark call>
+    """
+
+    _spark_stack_depth = 0
+
+    def __init__(self, sc):
+        call_site = first_spark_call()
+        if call_site is not None:
+            self._call_site = "%s at %s:%s" % (
+                call_site.function, call_site.file, call_site.linenum)
+        else:
+            self._call_site = "Error! Could not extract traceback info"
+        self._context = sc
+
+    def __enter__(self):
+        if SCCallSiteSync._spark_stack_depth == 0:
+            self._context._jsc.setCallSite(self._call_site)
+        SCCallSiteSync._spark_stack_depth += 1
+
+    def __exit__(self, type, value, tb):
+        SCCallSiteSync._spark_stack_depth -= 1
+        if SCCallSiteSync._spark_stack_depth == 0:
+            self._context._jsc.setCallSite(None)
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index f43210c6c0301..2bdccb5e93f09 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -23,15 +23,16 @@
 import time
 import socket
 import traceback
-# CloudPickler needs to be imported so that depicklers are registered using the
-# copy_reg module.
+import cProfile
+import pstats
+
 from pyspark.accumulators import _accumulatorRegistry
 from pyspark.broadcast import Broadcast, _broadcastRegistry
-from pyspark.cloudpickle import CloudPickler
 from pyspark.files import SparkFiles
 from pyspark.serializers import write_with_length, write_int, read_long, \
-    write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer
-
+    write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer, \
+    CompressedSerializer
+from pyspark import shuffle
 
 pickleSer = PickleSerializer()
 utf8_deserializer = UTF8Deserializer()
@@ -44,12 +45,24 @@ def report_times(outfile, boot, init, finish):
     write_long(1000 * finish, outfile)
 
 
+def add_path(path):
+    # worker can be used, so donot add path multiple times
+    if path not in sys.path:
+        # overwrite system packages
+        sys.path.insert(1, path)
+
+
 def main(infile, outfile):
     try:
         boot_time = time.time()
         split_index = read_int(infile)
         if split_index == -1:  # for unit tests
-            return
+            exit(-1)
+
+        # initialize global state
+        shuffle.MemoryBytesSpilled = 0
+        shuffle.DiskBytesSpilled = 0
+        _accumulatorRegistry.clear()
 
         # fetch name of workdir
         spark_files_dir = utf8_deserializer.loads(infile)
@@ -57,40 +70,74 @@ def main(infile, outfile):
         SparkFiles._is_running_on_worker = True
 
         # fetch names of includes (*.zip and *.egg files) and construct PYTHONPATH
-        sys.path.append(spark_files_dir) # *.py files that were added will be copied here
-        num_python_includes =  read_int(infile)
+        add_path(spark_files_dir)  # *.py files that were added will be copied here
+        num_python_includes = read_int(infile)
         for _ in range(num_python_includes):
             filename = utf8_deserializer.loads(infile)
-            sys.path.append(os.path.join(spark_files_dir, filename))
+            add_path(os.path.join(spark_files_dir, filename))
 
         # fetch names and values of broadcast variables
         num_broadcast_variables = read_int(infile)
+        ser = CompressedSerializer(pickleSer)
         for _ in range(num_broadcast_variables):
             bid = read_long(infile)
-            value = pickleSer._read_with_length(infile)
-            _broadcastRegistry[bid] = Broadcast(bid, value)
+            if bid >= 0:
+                value = ser._read_with_length(infile)
+                _broadcastRegistry[bid] = Broadcast(bid, value)
+            else:
+                bid = - bid - 1
+                _broadcastRegistry.pop(bid)
 
+        _accumulatorRegistry.clear()
         command = pickleSer._read_with_length(infile)
-        (func, deserializer, serializer) = command
+        if isinstance(command, Broadcast):
+            command = pickleSer.loads(command.value)
+        (func, stats, deserializer, serializer) = command
         init_time = time.time()
-        iterator = deserializer.load_stream(infile)
-        serializer.dump_stream(func(split_index, iterator), outfile)
-    except Exception as e:
-        # Write the error to stderr in addition to trying to pass it back to
-        # Java, in case it happened while serializing a record
-        print >> sys.stderr, "PySpark worker failed with exception:"
-        print >> sys.stderr, traceback.format_exc()
-        write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
-        write_with_length(traceback.format_exc(), outfile)
-        sys.exit(-1)
+
+        def process():
+            iterator = deserializer.load_stream(infile)
+            serializer.dump_stream(func(split_index, iterator), outfile)
+
+        if stats:
+            p = cProfile.Profile()
+            p.runcall(process)
+            st = pstats.Stats(p)
+            st.stream = None  # make it picklable
+            stats.add(st.strip_dirs())
+        else:
+            process()
+    except Exception:
+        try:
+            write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
+            write_with_length(traceback.format_exc(), outfile)
+        except IOError:
+            # JVM close the socket
+            pass
+        except Exception:
+            # Write the error to stderr if it happened while serializing
+            print >> sys.stderr, "PySpark worker failed with exception:"
+            print >> sys.stderr, traceback.format_exc()
+        exit(-1)
     finish_time = time.time()
     report_times(outfile, boot_time, init_time, finish_time)
+    write_long(shuffle.MemoryBytesSpilled, outfile)
+    write_long(shuffle.DiskBytesSpilled, outfile)
+
     # Mark the beginning of the accumulators section of the output
     write_int(SpecialLengths.END_OF_DATA_SECTION, outfile)
     write_int(len(_accumulatorRegistry), outfile)
     for (aid, accum) in _accumulatorRegistry.items():
         pickleSer._write_with_length((aid, accum._value), outfile)
 
+    # check end of stream
+    if read_int(infile) == SpecialLengths.END_OF_STREAM:
+        write_int(SpecialLengths.END_OF_STREAM, outfile)
+    else:
+        # write a different value to tell JVM to not reuse this worker
+        write_int(SpecialLengths.END_OF_DATA_SECTION, outfile)
+        exit(-1)
+
 
 if __name__ == '__main__':
     # Read a local port to connect to from stdin
diff --git a/python/run-tests b/python/run-tests
index 9282aa47e8375..e66854b44dfa6 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -19,26 +19,29 @@
 
 
 # Figure out where the Spark framework is installed
-FWDIR="$(cd `dirname $0`; cd ../; pwd)"
+FWDIR="$(cd "`dirname "$0"`"; cd ../; pwd)"
 
 # CD into the python directory to find things on the right path
 cd "$FWDIR/python"
 
 FAILED=0
+LOG_FILE=unit-tests.log
 
-rm -f unit-tests.log
+rm -f $LOG_FILE
 
-# Remove the metastore and warehouse directory created by the HiveContext tests in SparkSQL
+# Remove the metastore and warehouse directory created by the HiveContext tests in Spark SQL
 rm -rf metastore warehouse
 
 function run_test() {
-    echo "Running test: $1"
-    SPARK_TESTING=1 $FWDIR/bin/pyspark $1 2>&1 | tee -a > unit-tests.log
+    echo "Running test: $1" | tee -a $LOG_FILE
+
+    SPARK_TESTING=1 time "$FWDIR"/bin/pyspark $1 2>&1 | tee -a $LOG_FILE
+
     FAILED=$((PIPESTATUS[0]||$FAILED))
 
     # Fail and exit on the first test failure.
     if [[ $FAILED != 0 ]]; then
-        cat unit-tests.log | grep -v "^[0-9][0-9]*" # filter all lines starting with a number.
+        cat $LOG_FILE | grep -v "^[0-9][0-9]*" # filter all lines starting with a number.
         echo -en "\033[31m"  # Red
         echo "Had test failures; see logs."
         echo -en "\033[0m"  # No color
@@ -46,29 +49,71 @@ function run_test() {
     fi
 }
 
-echo "Running PySpark tests. Output is in python/unit-tests.log."
+function run_core_tests() {
+    echo "Run core tests ..."
+    run_test "pyspark/rdd.py"
+    run_test "pyspark/context.py"
+    run_test "pyspark/conf.py"
+    PYSPARK_DOC_TEST=1 run_test "pyspark/broadcast.py"
+    PYSPARK_DOC_TEST=1 run_test "pyspark/accumulators.py"
+    PYSPARK_DOC_TEST=1 run_test "pyspark/serializers.py"
+    run_test "pyspark/shuffle.py"
+    run_test "pyspark/tests.py"
+}
+
+function run_sql_tests() {
+    echo "Run sql tests ..."
+    run_test "pyspark/sql.py"
+}
+
+function run_mllib_tests() {
+    echo "Run mllib tests ..."
+    run_test "pyspark/mllib/classification.py"
+    run_test "pyspark/mllib/clustering.py"
+    run_test "pyspark/mllib/feature.py"
+    run_test "pyspark/mllib/linalg.py"
+    run_test "pyspark/mllib/rand.py"
+    run_test "pyspark/mllib/recommendation.py"
+    run_test "pyspark/mllib/regression.py"
+    run_test "pyspark/mllib/stat.py"
+    run_test "pyspark/mllib/tree.py"
+    run_test "pyspark/mllib/util.py"
+    run_test "pyspark/mllib/tests.py"
+}
+
+function run_streaming_tests() {
+    echo "Run streaming tests ..."
+    run_test "pyspark/streaming/util.py"
+    run_test "pyspark/streaming/tests.py"
+}
+
+echo "Running PySpark tests. Output is in python/$LOG_FILE."
+
+export PYSPARK_PYTHON="python"
+
+# Try to test with Python 2.6, since that's the minimum version that we support:
+if [ $(which python2.6) ]; then
+    export PYSPARK_PYTHON="python2.6"
+fi
+
+echo "Testing with Python version:"
+$PYSPARK_PYTHON --version
+
+run_core_tests
+run_sql_tests
+run_mllib_tests
+run_streaming_tests
+
+# Try to test with PyPy
+if [ $(which pypy) ]; then
+    export PYSPARK_PYTHON="pypy"
+    echo "Testing with PyPy version:"
+    $PYSPARK_PYTHON --version
 
-run_test "pyspark/rdd.py"
-run_test "pyspark/context.py"
-run_test "pyspark/conf.py"
-if [ -n "$_RUN_SQL_TESTS" ]; then
-  run_test "pyspark/sql.py"
+    run_core_tests
+    run_sql_tests
+    run_streaming_tests
 fi
-# These tests are included in the module-level docs, and so must
-# be handled on a higher level rather than within the python file.
-export PYSPARK_DOC_TEST=1
-run_test "pyspark/broadcast.py"
-run_test "pyspark/accumulators.py"
-run_test "pyspark/serializers.py"
-unset PYSPARK_DOC_TEST
-run_test "pyspark/tests.py"
-run_test "pyspark/mllib/_common.py"
-run_test "pyspark/mllib/classification.py"
-run_test "pyspark/mllib/clustering.py"
-run_test "pyspark/mllib/linalg.py"
-run_test "pyspark/mllib/recommendation.py"
-run_test "pyspark/mllib/regression.py"
-run_test "pyspark/mllib/tests.py"
 
 if [[ $FAILED == 0 ]]; then
     echo -en "\033[32m"  # Green
diff --git a/python/test_support/SimpleHTTPServer.py b/python/test_support/SimpleHTTPServer.py
new file mode 100644
index 0000000000000..eddbd588e02dc
--- /dev/null
+++ b/python/test_support/SimpleHTTPServer.py
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Used to test override standard SimpleHTTPServer module.
+"""
+
+__name__ = "My Server"
diff --git a/python/test_support/userlibrary.py b/python/test_support/userlibrary.py
index 8e4a6292bc17c..73fd26e71f10d 100755
--- a/python/test_support/userlibrary.py
+++ b/python/test_support/userlibrary.py
@@ -19,6 +19,8 @@
 Used to test shipping of code depenencies with SparkContext.addPyFile().
 """
 
+
 class UserClass(object):
+
     def hello(self):
         return "Hello World!"
diff --git a/repl/pom.xml b/repl/pom.xml
index 4a66408ef3d2d..c2bf9fdfbcce7 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -32,11 +32,19 @@
   <url>http://spark.apache.org/</url>
 
   <properties>
+    <sbt.project.name>repl</sbt.project.name>
     <deb.install.path>/usr/share/spark</deb.install.path>
     <deb.user>root</deb.user>
+    <extra.source.dir>scala-2.10/src/main/scala</extra.source.dir>
+    <extra.testsource.dir>scala-2.10/src/test/scala</extra.testsource.dir>
   </properties>
 
   <dependencies>
+    <dependency>
+      <groupId>${jline.groupid}</groupId>
+      <artifactId>jline</artifactId>
+      <version>${jline.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -54,6 +62,12 @@
       <version>${project.version}</version>
       <scope>runtime</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
@@ -68,11 +82,6 @@
       <artifactId>scala-reflect</artifactId>
       <version>${scala.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>jline</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>jul-to-slf4j</artifactId>
@@ -92,6 +101,20 @@
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-deploy-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-install-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
@@ -101,6 +124,51 @@
           </environmentVariables>
         </configuration>
       </plugin>
+      <!-- Include a source dir depending on the Scala version -->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-scala-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>src/main/scala</source>
+                <source>${extra.source.dir}</source>
+              </sources>
+            </configuration>
+          </execution>
+          <execution>
+            <id>add-scala-test-sources</id>
+            <phase>generate-test-sources</phase>
+            <goals>
+              <goal>add-test-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>src/test/scala</source>
+                <source>${extra.testsource.dir}</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
+  <profiles>
+    <profile>
+      <id>scala-2.11</id>
+      <activation>
+        <property><name>scala-2.11</name></property>
+      </activation>
+      <properties>
+        <extra.source.dir>scala-2.11/src/main/scala</extra.source.dir>
+        <extra.testsource.dir>scala-2.11/src/test/scala</extra.testsource.dir>
+      </properties>
+    </profile>
+  </profiles>
 </project>
diff --git a/repl/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala
similarity index 100%
rename from repl/src/main/scala/org/apache/spark/repl/Main.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
similarity index 100%
rename from repl/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
similarity index 100%
rename from repl/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkHelper.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkHelper.scala
similarity index 100%
rename from repl/src/main/scala/org/apache/spark/repl/SparkHelper.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkHelper.scala
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
new file mode 100644
index 0000000000000..e56b74edba88c
--- /dev/null
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -0,0 +1,1091 @@
+// scalastyle:off
+
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author Alexander Spoon
+ */
+
+package org.apache.spark.repl
+
+
+import java.net.URL
+
+import scala.reflect.io.AbstractFile
+import scala.tools.nsc._
+import scala.tools.nsc.backend.JavaPlatform
+import scala.tools.nsc.interpreter._
+
+import scala.tools.nsc.interpreter.{Results => IR}
+import Predef.{println => _, _}
+import java.io.{BufferedReader, FileReader}
+import java.net.URI
+import java.util.concurrent.locks.ReentrantLock
+import scala.sys.process.Process
+import scala.tools.nsc.interpreter.session._
+import scala.util.Properties.{jdkHome, javaVersion}
+import scala.tools.util.{Javap}
+import scala.annotation.tailrec
+import scala.collection.mutable.ListBuffer
+import scala.concurrent.ops
+import scala.tools.nsc.util._
+import scala.tools.nsc.interpreter._
+import scala.tools.nsc.io.{File, Directory}
+import scala.reflect.NameTransformer._
+import scala.tools.nsc.util.ScalaClassLoader._
+import scala.tools.util._
+import scala.language.{implicitConversions, existentials, postfixOps}
+import scala.reflect.{ClassTag, classTag}
+import scala.tools.reflect.StdRuntimeTags._
+
+import java.lang.{Class => jClass}
+import scala.reflect.api.{Mirror, TypeCreator, Universe => ApiUniverse}
+
+import org.apache.spark.Logging
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.spark.util.Utils
+
+/** The Scala interactive shell.  It provides a read-eval-print loop
+ *  around the Interpreter class.
+ *  After instantiation, clients should call the main() method.
+ *
+ *  If no in0 is specified, then input will come from the console, and
+ *  the class will attempt to provide input editing feature such as
+ *  input history.
+ *
+ *  @author Moez A. Abdel-Gawad
+ *  @author  Lex Spoon
+ *  @version 1.2
+ */
+class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter,
+               val master: Option[String])
+                extends AnyRef
+                   with LoopCommands
+                   with SparkILoopInit
+                   with Logging
+{
+  def this(in0: BufferedReader, out: JPrintWriter, master: String) = this(Some(in0), out, Some(master))
+  def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out, None)
+  def this() = this(None, new JPrintWriter(Console.out, true), None)
+
+  var in: InteractiveReader = _   // the input stream from which commands come
+  var settings: Settings = _
+  var intp: SparkIMain = _
+
+  @deprecated("Use `intp` instead.", "2.9.0") def interpreter = intp
+  @deprecated("Use `intp` instead.", "2.9.0") def interpreter_= (i: SparkIMain): Unit = intp = i
+
+  /** Having inherited the difficult "var-ness" of the repl instance,
+   *  I'm trying to work around it by moving operations into a class from
+   *  which it will appear a stable prefix.
+   */
+  private def onIntp[T](f: SparkIMain => T): T = f(intp)
+
+  class IMainOps[T <: SparkIMain](val intp: T) {
+    import intp._
+    import global._
+
+    def printAfterTyper(msg: => String) =
+      intp.reporter printMessage afterTyper(msg)
+
+    /** Strip NullaryMethodType artifacts. */
+    private def replInfo(sym: Symbol) = {
+      sym.info match {
+        case NullaryMethodType(restpe) if sym.isAccessor  => restpe
+        case info                                         => info
+      }
+    }
+    def echoTypeStructure(sym: Symbol) =
+      printAfterTyper("" + deconstruct.show(replInfo(sym)))
+
+    def echoTypeSignature(sym: Symbol, verbose: Boolean) = {
+      if (verbose) SparkILoop.this.echo("// Type signature")
+      printAfterTyper("" + replInfo(sym))
+
+      if (verbose) {
+        SparkILoop.this.echo("\n// Internal Type structure")
+        echoTypeStructure(sym)
+      }
+    }
+  }
+  implicit def stabilizeIMain(intp: SparkIMain) = new IMainOps[intp.type](intp)
+
+  /** TODO -
+   *  -n normalize
+   *  -l label with case class parameter names
+   *  -c complete - leave nothing out
+   */
+  private def typeCommandInternal(expr: String, verbose: Boolean): Result = {
+    onIntp { intp =>
+      val sym = intp.symbolOfLine(expr)
+      if (sym.exists) intp.echoTypeSignature(sym, verbose)
+      else ""
+    }
+  }
+
+  var sparkContext: SparkContext = _
+
+  override def echoCommandMessage(msg: String) {
+    intp.reporter printMessage msg
+  }
+
+  // def isAsync = !settings.Yreplsync.value
+  def isAsync = false
+  // lazy val power = new Power(intp, new StdReplVals(this))(tagOfStdReplVals, classTag[StdReplVals])
+  def history = in.history
+
+  /** The context class loader at the time this object was created */
+  protected val originalClassLoader = Utils.getContextOrSparkClassLoader
+
+  // classpath entries added via :cp
+  var addedClasspath: String = ""
+
+  /** A reverse list of commands to replay if the user requests a :replay */
+  var replayCommandStack: List[String] = Nil
+
+  /** A list of commands to replay if the user requests a :replay */
+  def replayCommands = replayCommandStack.reverse
+
+  /** Record a command for replay should the user request a :replay */
+  def addReplay(cmd: String) = replayCommandStack ::= cmd
+
+  def savingReplayStack[T](body: => T): T = {
+    val saved = replayCommandStack
+    try body
+    finally replayCommandStack = saved
+  }
+  def savingReader[T](body: => T): T = {
+    val saved = in
+    try body
+    finally in = saved
+  }
+
+
+  def sparkCleanUp(){
+    echo("Stopping spark context.")
+    intp.beQuietDuring {
+      command("sc.stop()")
+    }
+  }
+  /** Close the interpreter and set the var to null. */
+  def closeInterpreter() {
+    if (intp ne null) {
+      sparkCleanUp()
+      intp.close()
+      intp = null
+    }
+  }
+
+  class SparkILoopInterpreter extends SparkIMain(settings, out) {
+    outer =>
+
+    override lazy val formatting = new Formatting {
+      def prompt = SparkILoop.this.prompt
+    }
+    override protected def parentClassLoader = SparkHelper.explicitParentLoader(settings).getOrElse(classOf[SparkILoop].getClassLoader)
+  }
+
+  /** Create a new interpreter. */
+  def createInterpreter() {
+    require(settings != null)
+
+    if (addedClasspath != "") settings.classpath.append(addedClasspath)
+    val addedJars =
+      if (Utils.isWindows) {
+        // Strip any URI scheme prefix so we can add the correct path to the classpath
+        // e.g. file:/C:/my/path.jar -> C:/my/path.jar
+        SparkILoop.getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") }
+      } else {
+        SparkILoop.getAddedJars
+      }
+    // work around for Scala bug
+    val totalClassPath = addedJars.foldLeft(
+      settings.classpath.value)((l, r) => ClassPath.join(l, r))
+    this.settings.classpath.value = totalClassPath
+
+    intp = new SparkILoopInterpreter
+  }
+
+  /** print a friendly help message */
+  def helpCommand(line: String): Result = {
+    if (line == "") helpSummary()
+    else uniqueCommand(line) match {
+      case Some(lc) => echo("\n" + lc.longHelp)
+      case _        => ambiguousError(line)
+    }
+  }
+  private def helpSummary() = {
+    val usageWidth  = commands map (_.usageMsg.length) max
+    val formatStr   = "%-" + usageWidth + "s %s %s"
+
+    echo("All commands can be abbreviated, e.g. :he instead of :help.")
+    echo("Those marked with a * have more detailed help, e.g. :help imports.\n")
+
+    commands foreach { cmd =>
+      val star = if (cmd.hasLongHelp) "*" else " "
+      echo(formatStr.format(cmd.usageMsg, star, cmd.help))
+    }
+  }
+  private def ambiguousError(cmd: String): Result = {
+    matchingCommands(cmd) match {
+      case Nil  => echo(cmd + ": no such command.  Type :help for help.")
+      case xs   => echo(cmd + " is ambiguous: did you mean " + xs.map(":" + _.name).mkString(" or ") + "?")
+    }
+    Result(true, None)
+  }
+  private def matchingCommands(cmd: String) = commands filter (_.name startsWith cmd)
+  private def uniqueCommand(cmd: String): Option[LoopCommand] = {
+    // this lets us add commands willy-nilly and only requires enough command to disambiguate
+    matchingCommands(cmd) match {
+      case List(x)  => Some(x)
+      // exact match OK even if otherwise appears ambiguous
+      case xs       => xs find (_.name == cmd)
+    }
+  }
+  private var fallbackMode = false 
+
+  private def toggleFallbackMode() {
+    val old = fallbackMode
+    fallbackMode = !old
+    System.setProperty("spark.repl.fallback", fallbackMode.toString)
+    echo(s"""
+      |Switched ${if (old) "off" else "on"} fallback mode without restarting.
+      |       If you have defined classes in the repl, it would 
+      |be good to redefine them incase you plan to use them. If you still run
+      |into issues it would be good to restart the repl and turn on `:fallback` 
+      |mode as first command.
+      """.stripMargin)
+  }
+
+  /** Show the history */
+  lazy val historyCommand = new LoopCommand("history", "show the history (optional num is commands to show)") {
+    override def usage = "[num]"
+    def defaultLines = 20
+
+    def apply(line: String): Result = {
+      if (history eq NoHistory)
+        return "No history available."
+
+      val xs      = words(line)
+      val current = history.index
+      val count   = try xs.head.toInt catch { case _: Exception => defaultLines }
+      val lines   = history.asStrings takeRight count
+      val offset  = current - lines.size + 1
+
+      for ((line, index) <- lines.zipWithIndex)
+        echo("%3d  %s".format(index + offset, line))
+    }
+  }
+
+  // When you know you are most likely breaking into the middle
+  // of a line being typed.  This softens the blow.
+  protected def echoAndRefresh(msg: String) = {
+    echo("\n" + msg)
+    in.redrawLine()
+  }
+  protected def echo(msg: String) = {
+    out println msg
+    out.flush()
+  }
+  protected def echoNoNL(msg: String) = {
+    out print msg
+    out.flush()
+  }
+
+  /** Search the history */
+  def searchHistory(_cmdline: String) {
+    val cmdline = _cmdline.toLowerCase
+    val offset  = history.index - history.size + 1
+
+    for ((line, index) <- history.asStrings.zipWithIndex ; if line.toLowerCase contains cmdline)
+      echo("%d %s".format(index + offset, line))
+  }
+
+  private var currentPrompt = Properties.shellPromptString
+  def setPrompt(prompt: String) = currentPrompt = prompt
+  /** Prompt to print when awaiting input */
+  def prompt = currentPrompt
+
+  import LoopCommand.{ cmd, nullary }
+
+  /** Standard commands */
+  lazy val standardCommands = List(
+    cmd("cp", "<path>", "add a jar or directory to the classpath", addClasspath),
+    cmd("help", "[command]", "print this summary or command-specific help", helpCommand),
+    historyCommand,
+    cmd("h?", "<string>", "search the history", searchHistory),
+    cmd("imports", "[name name ...]", "show import history, identifying sources of names", importsCommand),
+    cmd("implicits", "[-v]", "show the implicits in scope", implicitsCommand),
+    cmd("javap", "<path|class>", "disassemble a file or class name", javapCommand),
+    cmd("load", "<path>", "load and interpret a Scala file", loadCommand),
+    nullary("paste", "enter paste mode: all input up to ctrl-D compiled together", pasteCommand),
+//    nullary("power", "enable power user mode", powerCmd),
+    nullary("quit", "exit the repl", () => Result(false, None)),
+    nullary("replay", "reset execution and replay all previous commands", replay),
+    nullary("reset", "reset the repl to its initial state, forgetting all session entries", resetCommand),
+    shCommand,
+    nullary("silent", "disable/enable automatic printing of results", verbosity),
+    nullary("fallback", """
+                           |disable/enable advanced repl changes, these fix some issues but may introduce others. 
+                           |This mode will be removed once these fixes stablize""".stripMargin, toggleFallbackMode),
+    cmd("type", "[-v] <expr>", "display the type of an expression without evaluating it", typeCommand),
+    nullary("warnings", "show the suppressed warnings from the most recent line which had any", warningsCommand)
+  )
+
+  /** Power user commands */
+  lazy val powerCommands: List[LoopCommand] = List(
+    // cmd("phase", "<phase>", "set the implicit phase for power commands", phaseCommand)
+  )
+
+  // private def dumpCommand(): Result = {
+  //   echo("" + power)
+  //   history.asStrings takeRight 30 foreach echo
+  //   in.redrawLine()
+  // }
+  // private def valsCommand(): Result = power.valsDescription
+
+  private val typeTransforms = List(
+    "scala.collection.immutable." -> "immutable.",
+    "scala.collection.mutable."   -> "mutable.",
+    "scala.collection.generic."   -> "generic.",
+    "java.lang."                  -> "jl.",
+    "scala.runtime."              -> "runtime."
+  )
+
+  private def importsCommand(line: String): Result = {
+    val tokens    = words(line)
+    val handlers  = intp.languageWildcardHandlers ++ intp.importHandlers
+    val isVerbose = tokens contains "-v"
+
+    handlers.filterNot(_.importedSymbols.isEmpty).zipWithIndex foreach {
+      case (handler, idx) =>
+        val (types, terms) = handler.importedSymbols partition (_.name.isTypeName)
+        val imps           = handler.implicitSymbols
+        val found          = tokens filter (handler importsSymbolNamed _)
+        val typeMsg        = if (types.isEmpty) "" else types.size + " types"
+        val termMsg        = if (terms.isEmpty) "" else terms.size + " terms"
+        val implicitMsg    = if (imps.isEmpty) "" else imps.size + " are implicit"
+        val foundMsg       = if (found.isEmpty) "" else found.mkString(" // imports: ", ", ", "")
+        val statsMsg       = List(typeMsg, termMsg, implicitMsg) filterNot (_ == "") mkString ("(", ", ", ")")
+
+        intp.reporter.printMessage("%2d) %-30s %s%s".format(
+          idx + 1,
+          handler.importString,
+          statsMsg,
+          foundMsg
+        ))
+    }
+  }
+
+  private def implicitsCommand(line: String): Result = onIntp { intp =>
+    import intp._
+    import global._
+
+    def p(x: Any) = intp.reporter.printMessage("" + x)
+
+    // If an argument is given, only show a source with that
+    // in its name somewhere.
+    val args     = line split "\\s+"
+    val filtered = intp.implicitSymbolsBySource filter {
+      case (source, syms) =>
+        (args contains "-v") || {
+          if (line == "") (source.fullName.toString != "scala.Predef")
+          else (args exists (source.name.toString contains _))
+        }
+    }
+
+    if (filtered.isEmpty)
+      return "No implicits have been imported other than those in Predef."
+
+    filtered foreach {
+      case (source, syms) =>
+        p("/* " + syms.size + " implicit members imported from " + source.fullName + " */")
+
+        // This groups the members by where the symbol is defined
+        val byOwner = syms groupBy (_.owner)
+        val sortedOwners = byOwner.toList sortBy { case (owner, _) => afterTyper(source.info.baseClasses indexOf owner) }
+
+        sortedOwners foreach {
+          case (owner, members) =>
+            // Within each owner, we cluster results based on the final result type
+            // if there are more than a couple, and sort each cluster based on name.
+            // This is really just trying to make the 100 or so implicits imported
+            // by default into something readable.
+            val memberGroups: List[List[Symbol]] = {
+              val groups = members groupBy (_.tpe.finalResultType) toList
+              val (big, small) = groups partition (_._2.size > 3)
+              val xss = (
+                (big sortBy (_._1.toString) map (_._2)) :+
+                (small flatMap (_._2))
+              )
+
+              xss map (xs => xs sortBy (_.name.toString))
+            }
+
+            val ownerMessage = if (owner == source) " defined in " else " inherited from "
+            p("  /* " + members.size + ownerMessage + owner.fullName + " */")
+
+            memberGroups foreach { group =>
+              group foreach (s => p("  " + intp.symbolDefString(s)))
+              p("")
+            }
+        }
+        p("")
+    }
+  }
+
+  private def findToolsJar() = {
+    val jdkPath = Directory(jdkHome)
+    val jar     = jdkPath / "lib" / "tools.jar" toFile;
+
+    if (jar isFile)
+      Some(jar)
+    else if (jdkPath.isDirectory)
+      jdkPath.deepFiles find (_.name == "tools.jar")
+    else None
+  }
+  private def addToolsJarToLoader() = {
+    val cl = findToolsJar match {
+      case Some(tools) => ScalaClassLoader.fromURLs(Seq(tools.toURL), intp.classLoader)
+      case _           => intp.classLoader
+    }
+    if (Javap.isAvailable(cl)) {
+      logDebug(":javap available.")
+      cl
+    }
+    else {
+      logDebug(":javap unavailable: no tools.jar at " + jdkHome)
+      intp.classLoader
+    }
+  }
+
+  protected def newJavap() = new JavapClass(addToolsJarToLoader(), new SparkIMain.ReplStrippingWriter(intp)) {
+    override def tryClass(path: String): Array[Byte] = {
+      val hd :: rest = path split '.' toList;
+      // If there are dots in the name, the first segment is the
+      // key to finding it.
+      if (rest.nonEmpty) {
+        intp optFlatName hd match {
+          case Some(flat) =>
+            val clazz = flat :: rest mkString NAME_JOIN_STRING
+            val bytes = super.tryClass(clazz)
+            if (bytes.nonEmpty) bytes
+            else super.tryClass(clazz + MODULE_SUFFIX_STRING)
+          case _          => super.tryClass(path)
+        }
+      }
+      else {
+        // Look for Foo first, then Foo$, but if Foo$ is given explicitly,
+        // we have to drop the $ to find object Foo, then tack it back onto
+        // the end of the flattened name.
+        def className  = intp flatName path
+        def moduleName = (intp flatName path.stripSuffix(MODULE_SUFFIX_STRING)) + MODULE_SUFFIX_STRING
+
+        val bytes = super.tryClass(className)
+        if (bytes.nonEmpty) bytes
+        else super.tryClass(moduleName)
+      }
+    }
+  }
+  // private lazy val javap = substituteAndLog[Javap]("javap", NoJavap)(newJavap())
+  private lazy val javap =
+    try newJavap()
+    catch { case _: Exception => null }
+
+  // Still todo: modules.
+  private def typeCommand(line0: String): Result = {
+    line0.trim match {
+      case ""                      => ":type [-v] <expression>"
+      case s if s startsWith "-v " => typeCommandInternal(s stripPrefix "-v " trim, true)
+      case s                       => typeCommandInternal(s, false)
+    }
+  }
+
+  private def warningsCommand(): Result = {
+    if (intp.lastWarnings.isEmpty)
+      "Can't find any cached warnings."
+    else
+      intp.lastWarnings foreach { case (pos, msg) => intp.reporter.warning(pos, msg) }
+  }
+
+  private def javapCommand(line: String): Result = {
+    if (javap == null)
+      ":javap unavailable, no tools.jar at %s.  Set JDK_HOME.".format(jdkHome)
+    else if (javaVersion startsWith "1.7")
+      ":javap not yet working with java 1.7"
+    else if (line == "")
+      ":javap [-lcsvp] [path1 path2 ...]"
+    else
+      javap(words(line)) foreach { res =>
+        if (res.isError) return "Failed: " + res.value
+        else res.show()
+      }
+  }
+
+  private def wrapCommand(line: String): Result = {
+    def failMsg = "Argument to :wrap must be the name of a method with signature [T](=> T): T"
+    onIntp { intp =>
+      import intp._
+      import global._
+
+      words(line) match {
+        case Nil            =>
+          intp.executionWrapper match {
+            case ""   => "No execution wrapper is set."
+            case s    => "Current execution wrapper: " + s
+          }
+        case "clear" :: Nil =>
+          intp.executionWrapper match {
+            case ""   => "No execution wrapper is set."
+            case s    => intp.clearExecutionWrapper() ; "Cleared execution wrapper."
+          }
+        case wrapper :: Nil =>
+          intp.typeOfExpression(wrapper) match {
+            case PolyType(List(targ), MethodType(List(arg), restpe)) =>
+              intp setExecutionWrapper intp.pathToTerm(wrapper)
+              "Set wrapper to '" + wrapper + "'"
+            case tp =>
+              failMsg + "\nFound: <unknown>"
+          }
+        case _ => failMsg
+      }
+    }
+  }
+
+  private def pathToPhaseWrapper = intp.pathToTerm("$r") + ".phased.atCurrent"
+  // private def phaseCommand(name: String): Result = {
+  //   val phased: Phased = power.phased
+  //   import phased.NoPhaseName
+
+  //   if (name == "clear") {
+  //     phased.set(NoPhaseName)
+  //     intp.clearExecutionWrapper()
+  //     "Cleared active phase."
+  //   }
+  //   else if (name == "") phased.get match {
+  //     case NoPhaseName => "Usage: :phase <expr> (e.g. typer, erasure.next, erasure+3)"
+  //     case ph          => "Active phase is '%s'.  (To clear, :phase clear)".format(phased.get)
+  //   }
+  //   else {
+  //     val what = phased.parse(name)
+  //     if (what.isEmpty || !phased.set(what))
+  //       "'" + name + "' does not appear to represent a valid phase."
+  //     else {
+  //       intp.setExecutionWrapper(pathToPhaseWrapper)
+  //       val activeMessage =
+  //         if (what.toString.length == name.length) "" + what
+  //         else "%s (%s)".format(what, name)
+
+  //       "Active phase is now: " + activeMessage
+  //     }
+  //   }
+  // }
+
+  /** Available commands */
+  def commands: List[LoopCommand] = standardCommands /*++ (
+    if (isReplPower) powerCommands else Nil
+  )*/
+
+  private val replayQuestionMessage =
+    """|That entry seems to have slain the compiler.  Shall I replay
+       |your session? I can re-run each line except the last one.
+       |[y/n]
+    """.trim.stripMargin
+
+  private def crashRecovery(ex: Throwable): Boolean = {
+    echo(ex.toString)
+    ex match {
+      case _: NoSuchMethodError | _: NoClassDefFoundError =>
+        echo("\nUnrecoverable error.")
+        throw ex
+      case _  =>
+        def fn(): Boolean =
+          try in.readYesOrNo(replayQuestionMessage, { echo("\nYou must enter y or n.") ; fn() })
+          catch { case _: RuntimeException => false }
+
+        if (fn()) replay()
+        else echo("\nAbandoning crashed session.")
+    }
+    true
+  }
+
+  /** The main read-eval-print loop for the repl.  It calls
+   *  command() for each line of input, and stops when
+   *  command() returns false.
+   */
+  def loop() {
+    def readOneLine() = {
+      out.flush()
+      in readLine prompt
+    }
+    // return false if repl should exit
+    def processLine(line: String): Boolean = {
+      if (isAsync) {
+        if (!awaitInitialized()) return false
+        runThunks()
+      }
+      if (line eq null) false               // assume null means EOF
+      else command(line) match {
+        case Result(false, _)           => false
+        case Result(_, Some(finalLine)) => addReplay(finalLine) ; true
+        case _                          => true
+      }
+    }
+    def innerLoop() {
+      val shouldContinue = try {
+        processLine(readOneLine())
+      } catch {case t: Throwable => crashRecovery(t)}
+      if (shouldContinue)
+        innerLoop()
+    }
+    innerLoop()
+  }
+
+  /** interpret all lines from a specified file */
+  def interpretAllFrom(file: File) {
+    savingReader {
+      savingReplayStack {
+        file applyReader { reader =>
+          in = SimpleReader(reader, out, false)
+          echo("Loading " + file + "...")
+          loop()
+        }
+      }
+    }
+  }
+
+  /** create a new interpreter and replay the given commands */
+  def replay() {
+    reset()
+    if (replayCommandStack.isEmpty)
+      echo("Nothing to replay.")
+    else for (cmd <- replayCommands) {
+      echo("Replaying: " + cmd)  // flush because maybe cmd will have its own output
+      command(cmd)
+      echo("")
+    }
+  }
+  def resetCommand() {
+    echo("Resetting repl state.")
+    if (replayCommandStack.nonEmpty) {
+      echo("Forgetting this session history:\n")
+      replayCommands foreach echo
+      echo("")
+      replayCommandStack = Nil
+    }
+    if (intp.namedDefinedTerms.nonEmpty)
+      echo("Forgetting all expression results and named terms: " + intp.namedDefinedTerms.mkString(", "))
+    if (intp.definedTypes.nonEmpty)
+      echo("Forgetting defined types: " + intp.definedTypes.mkString(", "))
+
+    reset()
+  }
+
+  def reset() {
+    intp.reset()
+    // unleashAndSetPhase()
+  }
+
+  /** fork a shell and run a command */
+  lazy val shCommand = new LoopCommand("sh", "run a shell command (result is implicitly => List[String])") {
+    override def usage = "<command line>"
+    def apply(line: String): Result = line match {
+      case ""   => showUsage()
+      case _    =>
+        val toRun = classOf[ProcessResult].getName + "(" + string2codeQuoted(line) + ")"
+        intp interpret toRun
+        ()
+    }
+  }
+
+  def withFile(filename: String)(action: File => Unit) {
+    val f = File(filename)
+
+    if (f.exists) action(f)
+    else echo("That file does not exist")
+  }
+
+  def loadCommand(arg: String) = {
+    var shouldReplay: Option[String] = None
+    withFile(arg)(f => {
+      interpretAllFrom(f)
+      shouldReplay = Some(":load " + arg)
+    })
+    Result(true, shouldReplay)
+  }
+
+  def addAllClasspath(args: Seq[String]): Unit = {
+    var added = false
+    var totalClasspath = ""
+    for (arg <- args) {
+      val f = File(arg).normalize
+      if (f.exists) {
+        added = true
+        addedClasspath = ClassPath.join(addedClasspath, f.path)
+        totalClasspath = ClassPath.join(settings.classpath.value, addedClasspath)
+        intp.addUrlsToClassPath(f.toURI.toURL)
+        sparkContext.addJar(f.toURI.toURL.getPath)
+      }
+    }
+  }
+
+  def addClasspath(arg: String): Unit = {
+    val f = File(arg).normalize
+    if (f.exists) {
+      addedClasspath = ClassPath.join(addedClasspath, f.path)
+      intp.addUrlsToClassPath(f.toURI.toURL)
+      sparkContext.addJar(f.toURI.toURL.getPath)
+      echo("Added '%s'.  Your new classpath is:\n\"%s\"".format(f.path, intp.global.classPath.asClasspathString))
+    }
+    else echo("The path '" + f + "' doesn't seem to exist.")
+  }
+
+
+  def powerCmd(): Result = {
+    if (isReplPower) "Already in power mode."
+    else enablePowerMode(false)
+  }
+
+  def enablePowerMode(isDuringInit: Boolean) = {
+    // replProps.power setValue true
+    // unleashAndSetPhase()
+    // asyncEcho(isDuringInit, power.banner)
+  }
+  // private def unleashAndSetPhase() {
+//     if (isReplPower) {
+// //      power.unleash()
+//       // Set the phase to "typer"
+//       intp beSilentDuring phaseCommand("typer")
+//     }
+//   }
+
+  def asyncEcho(async: Boolean, msg: => String) {
+    if (async) asyncMessage(msg)
+    else echo(msg)
+  }
+
+  def verbosity() = {
+    // val old = intp.printResults
+    // intp.printResults = !old
+    // echo("Switched " + (if (old) "off" else "on") + " result printing.")
+  }
+
+  /** Run one command submitted by the user.  Two values are returned:
+    * (1) whether to keep running, (2) the line to record for replay,
+    * if any. */
+  def command(line: String): Result = {
+    if (line startsWith ":") {
+      val cmd = line.tail takeWhile (x => !x.isWhitespace)
+      uniqueCommand(cmd) match {
+        case Some(lc) => lc(line.tail stripPrefix cmd dropWhile (_.isWhitespace))
+        case _        => ambiguousError(cmd)
+      }
+    }
+    else if (intp.global == null) Result(false, None)  // Notice failure to create compiler
+    else Result(true, interpretStartingWith(line))
+  }
+
+  private def readWhile(cond: String => Boolean) = {
+    Iterator continually in.readLine("") takeWhile (x => x != null && cond(x))
+  }
+
+  def pasteCommand(): Result = {
+    echo("// Entering paste mode (ctrl-D to finish)\n")
+    val code = readWhile(_ => true) mkString "\n"
+    echo("\n// Exiting paste mode, now interpreting.\n")
+    intp interpret code
+    ()
+  }
+
+  private object paste extends Pasted {
+    val ContinueString = "     | "
+    val PromptString   = "scala> "
+
+    def interpret(line: String): Unit = {
+      echo(line.trim)
+      intp interpret line
+      echo("")
+    }
+
+    def transcript(start: String) = {
+      echo("\n// Detected repl transcript paste: ctrl-D to finish.\n")
+      apply(Iterator(start) ++ readWhile(_.trim != PromptString.trim))
+    }
+  }
+  import paste.{ ContinueString, PromptString }
+
+  /** Interpret expressions starting with the first line.
+    * Read lines until a complete compilation unit is available
+    * or until a syntax error has been seen.  If a full unit is
+    * read, go ahead and interpret it.  Return the full string
+    * to be recorded for replay, if any.
+    */
+  def interpretStartingWith(code: String): Option[String] = {
+    // signal completion non-completion input has been received
+    in.completion.resetVerbosity()
+
+    def reallyInterpret = {
+      val reallyResult = intp.interpret(code)
+      (reallyResult, reallyResult match {
+        case IR.Error       => None
+        case IR.Success     => Some(code)
+        case IR.Incomplete  =>
+          if (in.interactive && code.endsWith("\n\n")) {
+            echo("You typed two blank lines.  Starting a new command.")
+            None
+          }
+          else in.readLine(ContinueString) match {
+            case null =>
+              // we know compilation is going to fail since we're at EOF and the
+              // parser thinks the input is still incomplete, but since this is
+              // a file being read non-interactively we want to fail.  So we send
+              // it straight to the compiler for the nice error message.
+              intp.compileString(code)
+              None
+
+            case line => interpretStartingWith(code + "\n" + line)
+          }
+      })
+    }
+
+    /** Here we place ourselves between the user and the interpreter and examine
+     *  the input they are ostensibly submitting.  We intervene in several cases:
+     *
+     *  1) If the line starts with "scala> " it is assumed to be an interpreter paste.
+     *  2) If the line starts with "." (but not ".." or "./") it is treated as an invocation
+     *     on the previous result.
+     *  3) If the Completion object's execute returns Some(_), we inject that value
+     *     and avoid the interpreter, as it's likely not valid scala code.
+     */
+    if (code == "") None
+    else if (!paste.running && code.trim.startsWith(PromptString)) {
+      paste.transcript(code)
+      None
+    }
+    else if (Completion.looksLikeInvocation(code) && intp.mostRecentVar != "") {
+      interpretStartingWith(intp.mostRecentVar + code)
+    }
+    else if (code.trim startsWith "//") {
+      // line comment, do nothing
+      None
+    }
+    else
+      reallyInterpret._2
+  }
+
+  // runs :load `file` on any files passed via -i
+  def loadFiles(settings: Settings) = settings match {
+    case settings: SparkRunnerSettings =>
+      for (filename <- settings.loadfiles.value) {
+        val cmd = ":load " + filename
+        command(cmd)
+        addReplay(cmd)
+        echo("")
+      }
+    case _ =>
+  }
+
+  /** Tries to create a JLineReader, falling back to SimpleReader:
+   *  unless settings or properties are such that it should start
+   *  with SimpleReader.
+   */
+  def chooseReader(settings: Settings): InteractiveReader = {
+    if (settings.Xnojline.value || Properties.isEmacsShell)
+      SimpleReader()
+    else try new SparkJLineReader(
+      if (settings.noCompletion.value) NoCompletion
+      else new SparkJLineCompletion(intp)
+    )
+    catch {
+      case ex @ (_: Exception | _: NoClassDefFoundError) =>
+        echo("Failed to created SparkJLineReader: " + ex + "\nFalling back to SimpleReader.")
+        SimpleReader()
+    }
+  }
+
+  val u: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
+  val m = u.runtimeMirror(Utils.getSparkClassLoader)
+  private def tagOfStaticClass[T: ClassTag]: u.TypeTag[T] =
+    u.TypeTag[T](
+      m,
+      new TypeCreator {
+        def apply[U <: ApiUniverse with Singleton](m: Mirror[U]): U # Type =
+          m.staticClass(classTag[T].runtimeClass.getName).toTypeConstructor.asInstanceOf[U # Type]
+      })
+
+  def process(settings: Settings): Boolean = savingContextLoader {
+    if (getMaster() == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true")
+
+    this.settings = settings
+    createInterpreter()
+
+    // sets in to some kind of reader depending on environmental cues
+    in = in0 match {
+      case Some(reader) => SimpleReader(reader, out, true)
+      case None         =>
+        // some post-initialization
+        chooseReader(settings) match {
+          case x: SparkJLineReader => addThunk(x.consoleReader.postInit) ; x
+          case x                   => x
+        }
+    }
+    lazy val tagOfSparkIMain = tagOfStaticClass[org.apache.spark.repl.SparkIMain]
+    // Bind intp somewhere out of the regular namespace where
+    // we can get at it in generated code.
+    addThunk(intp.quietBind(NamedParam[SparkIMain]("$intp", intp)(tagOfSparkIMain, classTag[SparkIMain])))
+    addThunk({
+      import scala.tools.nsc.io._
+      import Properties.userHome
+      import scala.compat.Platform.EOL
+      val autorun = replProps.replAutorunCode.option flatMap (f => io.File(f).safeSlurp())
+      if (autorun.isDefined) intp.quietRun(autorun.get)
+    })
+
+    addThunk(printWelcome())
+    addThunk(initializeSpark())
+
+    // it is broken on startup; go ahead and exit
+    if (intp.reporter.hasErrors)
+      return false
+
+    // This is about the illusion of snappiness.  We call initialize()
+    // which spins off a separate thread, then print the prompt and try
+    // our best to look ready.  The interlocking lazy vals tend to
+    // inter-deadlock, so we break the cycle with a single asynchronous
+    // message to an actor.
+    if (isAsync) {
+      intp initialize initializedCallback()
+      createAsyncListener() // listens for signal to run postInitialization
+    }
+    else {
+      intp.initializeSynchronous()
+      postInitialization()
+    }
+    // printWelcome()
+
+    loadFiles(settings)
+
+    try loop()
+    catch AbstractOrMissingHandler()
+    finally closeInterpreter()
+
+    true
+  }
+
+  def createSparkContext(): SparkContext = {
+    val execUri = System.getenv("SPARK_EXECUTOR_URI")
+    val jars = SparkILoop.getAddedJars
+    val conf = new SparkConf()
+      .setMaster(getMaster())
+      .setAppName("Spark shell")
+      .setJars(jars)
+      .set("spark.repl.class.uri", intp.classServer.uri)
+    if (execUri != null) {
+      conf.set("spark.executor.uri", execUri)
+    }
+    sparkContext = new SparkContext(conf)
+    logInfo("Created spark context..")
+    sparkContext
+  }
+
+  private def getMaster(): String = {
+    val master = this.master match {
+      case Some(m) => m
+      case None =>
+        val envMaster = sys.env.get("MASTER")
+        val propMaster = sys.props.get("spark.master")
+        propMaster.orElse(envMaster).getOrElse("local[*]")
+    }
+    master
+  }
+
+  /** process command-line arguments and do as they request */
+  def process(args: Array[String]): Boolean = {
+    val command = new SparkCommandLine(args.toList, msg => echo(msg))
+    def neededHelp(): String =
+      (if (command.settings.help.value) command.usageMsg + "\n" else "") +
+      (if (command.settings.Xhelp.value) command.xusageMsg + "\n" else "")
+
+    // if they asked for no help and command is valid, we call the real main
+    neededHelp() match {
+      case ""     => command.ok && process(command.settings)
+      case help   => echoNoNL(help) ; true
+    }
+  }
+
+  @deprecated("Use `process` instead", "2.9.0")
+  def main(settings: Settings): Unit = process(settings)
+}
+
+object SparkILoop {
+  implicit def loopToInterpreter(repl: SparkILoop): SparkIMain = repl.intp
+  private def echo(msg: String) = Console println msg
+
+  def getAddedJars: Array[String] = {
+    val envJars = sys.env.get("ADD_JARS")
+    val propJars = sys.props.get("spark.jars").flatMap { p =>
+      if (p == "") None else Some(p)
+    }
+    val jars = propJars.orElse(envJars).getOrElse("")
+    Utils.resolveURIs(jars).split(",").filter(_.nonEmpty)
+  }
+
+  // Designed primarily for use by test code: take a String with a
+  // bunch of code, and prints out a transcript of what it would look
+  // like if you'd just typed it into the repl.
+  def runForTranscript(code: String, settings: Settings): String = {
+    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
+
+    stringFromStream { ostream =>
+      Console.withOut(ostream) {
+        val output = new JPrintWriter(new OutputStreamWriter(ostream), true) {
+          override def write(str: String) = {
+            // completely skip continuation lines
+            if (str forall (ch => ch.isWhitespace || ch == '|')) ()
+            // print a newline on empty scala prompts
+            else if ((str contains '\n') && (str.trim == "scala> ")) super.write("\n")
+            else super.write(str)
+          }
+        }
+        val input = new BufferedReader(new StringReader(code)) {
+          override def readLine(): String = {
+            val s = super.readLine()
+            // helping out by printing the line being interpreted.
+            if (s != null)
+              output.println(s)
+            s
+          }
+        }
+        val repl = new SparkILoop(input, output)
+
+        if (settings.classpath.isDefault)
+          settings.classpath.value = sys.props("java.class.path")
+
+        getAddedJars.foreach(settings.classpath.append(_))
+
+        repl process settings
+      }
+    }
+  }
+
+  /** Creates an interpreter loop with default settings and feeds
+   *  the given code to it as input.
+   */
+  def run(code: String, sets: Settings = new Settings): String = {
+    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
+
+    stringFromStream { ostream =>
+      Console.withOut(ostream) {
+        val input    = new BufferedReader(new StringReader(code))
+        val output   = new JPrintWriter(new OutputStreamWriter(ostream), true)
+        val repl     = new ILoop(input, output)
+
+        if (sets.classpath.isDefault)
+          sets.classpath.value = sys.props("java.class.path")
+
+        repl process sets
+      }
+    }
+  }
+  def run(lines: List[String]): String = run(lines map (_ + "\n") mkString)
+}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
similarity index 97%
rename from repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
index 910b31d209e13..7667a9c11979e 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
@@ -14,6 +14,8 @@ import scala.reflect.internal.util.Position
 import scala.util.control.Exception.ignoring
 import scala.tools.nsc.util.stackTraceString
 
+import org.apache.spark.SPARK_VERSION
+
 /**
  *  Machinery for the asynchronous initialization of the repl.
  */
@@ -26,9 +28,9 @@ trait SparkILoopInit {
       ____              __
      / __/__  ___ _____/ /__
     _\ \/ _ \/ _ `/ __/  '_/
-   /___/ .__/\_,_/_/ /_/\_\   version 1.0.0-SNAPSHOT
+   /___/ .__/\_,_/_/ /_/\_\   version %s
       /_/
-""")
+""".format(SPARK_VERSION))
     import Properties._
     val welcomeMsg = "Using Scala %s (%s, Java %s)".format(
       versionString, javaVmName, javaVersion)
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala
new file mode 100644
index 0000000000000..646c68e60c2e9
--- /dev/null
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala
@@ -0,0 +1,1445 @@
+// scalastyle:off
+
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Martin Odersky
+ */
+
+package org.apache.spark.repl
+
+import java.io.File
+
+import scala.tools.nsc._
+import scala.tools.nsc.backend.JavaPlatform
+import scala.tools.nsc.interpreter._
+
+import Predef.{ println => _, _ }
+import scala.tools.nsc.util.{MergedClassPath, stringFromWriter, ScalaClassLoader, stackTraceString}
+import scala.reflect.internal.util._
+import java.net.URL
+import scala.sys.BooleanProp
+import io.{AbstractFile, PlainFile, VirtualDirectory}
+
+import reporters._
+import symtab.Flags
+import scala.reflect.internal.Names
+import scala.tools.util.PathResolver
+import ScalaClassLoader.URLClassLoader
+import scala.tools.nsc.util.Exceptional.unwrap
+import scala.collection.{ mutable, immutable }
+import scala.util.control.Exception.{ ultimately }
+import SparkIMain._
+import java.util.concurrent.Future
+import typechecker.Analyzer
+import scala.language.implicitConversions
+import scala.reflect.runtime.{ universe => ru }
+import scala.reflect.{ ClassTag, classTag }
+import scala.tools.reflect.StdRuntimeTags._
+import scala.util.control.ControlThrowable
+
+import org.apache.spark.{Logging, HttpServer, SecurityManager, SparkConf}
+import org.apache.spark.util.Utils
+
+// /** directory to save .class files to */
+// private class ReplVirtualDirectory(out: JPrintWriter) extends VirtualDirectory("((memory))", None) {
+//   private def pp(root: AbstractFile, indentLevel: Int) {
+//     val spaces = "    " * indentLevel
+//     out.println(spaces + root.name)
+//     if (root.isDirectory)
+//       root.toList sortBy (_.name) foreach (x => pp(x, indentLevel + 1))
+//   }
+//   // print the contents hierarchically
+//   def show() = pp(this, 0)
+// }
+
+  /** An interpreter for Scala code.
+   *
+   *  The main public entry points are compile(), interpret(), and bind().
+   *  The compile() method loads a complete Scala file.  The interpret() method
+   *  executes one line of Scala code at the request of the user.  The bind()
+   *  method binds an object to a variable that can then be used by later
+   *  interpreted code.
+   *
+   *  The overall approach is based on compiling the requested code and then
+   *  using a Java classloader and Java reflection to run the code
+   *  and access its results.
+   *
+   *  In more detail, a single compiler instance is used
+   *  to accumulate all successfully compiled or interpreted Scala code.  To
+   *  "interpret" a line of code, the compiler generates a fresh object that
+   *  includes the line of code and which has public member(s) to export
+   *  all variables defined by that code.  To extract the result of an
+   *  interpreted line to show the user, a second "result object" is created
+   *  which imports the variables exported by the above object and then
+   *  exports members called "$eval" and "$print". To accomodate user expressions
+   *  that read from variables or methods defined in previous statements, "import"
+   *  statements are used.
+   *
+   *  This interpreter shares the strengths and weaknesses of using the
+   *  full compiler-to-Java.  The main strength is that interpreted code
+   *  behaves exactly as does compiled code, including running at full speed.
+   *  The main weakness is that redefining classes and methods is not handled
+   *  properly, because rebinding at the Java level is technically difficult.
+   *
+   *  @author Moez A. Abdel-Gawad
+   *  @author Lex Spoon
+   */
+  class SparkIMain(
+      initialSettings: Settings,
+      val out: JPrintWriter,
+      propagateExceptions: Boolean = false)
+    extends SparkImports with Logging { imain =>
+
+    val conf = new SparkConf()
+
+    val SPARK_DEBUG_REPL: Boolean = (System.getenv("SPARK_DEBUG_REPL") == "1")
+    /** Local directory to save .class files too */
+    lazy val outputDir = {
+      val tmp = System.getProperty("java.io.tmpdir")
+      val rootDir = conf.get("spark.repl.classdir",  tmp)
+      Utils.createTempDir(rootDir)
+    }
+    if (SPARK_DEBUG_REPL) {
+      echo("Output directory: " + outputDir)
+    }
+
+    val virtualDirectory                              = new PlainFile(outputDir) // "directory" for classfiles
+    /** Jetty server that will serve our classes to worker nodes */
+    val classServerPort                               = conf.getInt("spark.replClassServer.port", 0)
+    val classServer                                   = new HttpServer(outputDir, new SecurityManager(conf), classServerPort, "HTTP class server")
+    private var currentSettings: Settings             = initialSettings
+    var printResults                                  = true      // whether to print result lines
+    var totalSilence                                  = false     // whether to print anything
+    private var _initializeComplete                   = false     // compiler is initialized
+    private var _isInitialized: Future[Boolean]       = null      // set up initialization future
+    private var bindExceptions                        = true      // whether to bind the lastException variable
+    private var _executionWrapper                     = ""        // code to be wrapped around all lines
+
+
+    // Start the classServer and store its URI in a spark system property
+    // (which will be passed to executors so that they can connect to it)
+    classServer.start()
+    if (SPARK_DEBUG_REPL) {
+      echo("Class server started, URI = " + classServer.uri)
+    }
+
+    /** We're going to go to some trouble to initialize the compiler asynchronously.
+     *  It's critical that nothing call into it until it's been initialized or we will
+     *  run into unrecoverable issues, but the perceived repl startup time goes
+     *  through the roof if we wait for it.  So we initialize it with a future and
+     *  use a lazy val to ensure that any attempt to use the compiler object waits
+     *  on the future.
+     */
+    private var _classLoader: AbstractFileClassLoader = null                              // active classloader
+    private val _compiler: Global                     = newCompiler(settings, reporter)   // our private compiler
+
+    private trait ExposeAddUrl extends URLClassLoader { def addNewUrl(url: URL) = this.addURL(url) }
+    private var _runtimeClassLoader: URLClassLoader with ExposeAddUrl = null              // wrapper exposing addURL
+
+    private val nextReqId = {
+      var counter = 0
+      () => { counter += 1 ; counter }
+    }
+
+    def compilerClasspath: Seq[URL] = (
+      if (isInitializeComplete) global.classPath.asURLs
+      else new PathResolver(settings).result.asURLs  // the compiler's classpath
+      )
+    def settings = currentSettings
+    def mostRecentLine = prevRequestList match {
+      case Nil      => ""
+      case req :: _ => req.originalLine
+    }
+    // Run the code body with the given boolean settings flipped to true.
+    def withoutWarnings[T](body: => T): T = beQuietDuring {
+      val saved = settings.nowarn.value
+      if (!saved)
+        settings.nowarn.value = true
+
+      try body
+      finally if (!saved) settings.nowarn.value = false
+    }
+
+    /** construct an interpreter that reports to Console */
+    def this(settings: Settings) = this(settings, new NewLinePrintWriter(new ConsoleWriter, true))
+    def this() = this(new Settings())
+
+    lazy val repllog: Logger = new Logger {
+      val out: JPrintWriter = imain.out
+      val isInfo: Boolean  = BooleanProp keyExists "scala.repl.info"
+      val isDebug: Boolean = BooleanProp keyExists "scala.repl.debug"
+      val isTrace: Boolean = BooleanProp keyExists "scala.repl.trace"
+    }
+    lazy val formatting: Formatting = new Formatting {
+      val prompt = Properties.shellPromptString
+    }
+    lazy val reporter: ConsoleReporter = new SparkIMain.ReplReporter(this)
+
+    import formatting._
+    import reporter.{ printMessage, withoutTruncating }
+
+    // This exists mostly because using the reporter too early leads to deadlock.
+    private def echo(msg: String) { Console println msg }
+    private def _initSources = List(new BatchSourceFile("<init>", "class $repl_$init { }"))
+    private def _initialize() = {
+      try {
+        // todo. if this crashes, REPL will hang
+        new _compiler.Run() compileSources _initSources
+        _initializeComplete = true
+        true
+      }
+      catch AbstractOrMissingHandler()
+    }
+    private def tquoted(s: String) = "\"\"\"" + s + "\"\"\""
+
+    // argument is a thunk to execute after init is done
+    def initialize(postInitSignal: => Unit) {
+      synchronized {
+        if (_isInitialized == null) {
+          _isInitialized = io.spawn {
+            try _initialize()
+            finally postInitSignal
+          }
+        }
+      }
+    }
+    def initializeSynchronous(): Unit = {
+      if (!isInitializeComplete) {
+        _initialize()
+        assert(global != null, global)
+      }
+    }
+    def isInitializeComplete = _initializeComplete
+
+    /** the public, go through the future compiler */
+    lazy val global: Global = {
+      if (isInitializeComplete) _compiler
+      else {
+        // If init hasn't been called yet you're on your own.
+        if (_isInitialized == null) {
+          logWarning("Warning: compiler accessed before init set up.  Assuming no postInit code.")
+          initialize(())
+        }
+        //       // blocks until it is ; false means catastrophic failure
+        if (_isInitialized.get()) _compiler
+        else null
+      }
+    }
+    @deprecated("Use `global` for access to the compiler instance.", "2.9.0")
+    lazy val compiler: global.type = global
+
+    import global._
+    import definitions.{ScalaPackage, JavaLangPackage, termMember, typeMember}
+    import rootMirror.{RootClass, getClassIfDefined, getModuleIfDefined, getRequiredModule, getRequiredClass}
+
+    implicit class ReplTypeOps(tp: Type) {
+      def orElse(other: => Type): Type    = if (tp ne NoType) tp else other
+      def andAlso(fn: Type => Type): Type = if (tp eq NoType) tp else fn(tp)
+    }
+
+    // TODO: If we try to make naming a lazy val, we run into big time
+    // scalac unhappiness with what look like cycles.  It has not been easy to
+    // reduce, but name resolution clearly takes different paths.
+    object naming extends {
+      val global: imain.global.type = imain.global
+    } with Naming {
+      // make sure we don't overwrite their unwisely named res3 etc.
+      def freshUserTermName(): TermName = {
+        val name = newTermName(freshUserVarName())
+        if (definedNameMap contains name) freshUserTermName()
+        else name
+      }
+      def isUserTermName(name: Name) = isUserVarName("" + name)
+      def isInternalTermName(name: Name) = isInternalVarName("" + name)
+    }
+    import naming._
+
+    object deconstruct extends {
+      val global: imain.global.type = imain.global
+    } with StructuredTypeStrings
+
+    lazy val memberHandlers = new {
+      val intp: imain.type = imain
+    } with SparkMemberHandlers
+    import memberHandlers._
+
+    /** Temporarily be quiet */
+    def beQuietDuring[T](body: => T): T = {
+      val saved = printResults
+      printResults = false
+      try body
+      finally printResults = saved
+    }
+    def beSilentDuring[T](operation: => T): T = {
+      val saved = totalSilence
+      totalSilence = true
+      try operation
+      finally totalSilence = saved
+    }
+
+    def quietRun[T](code: String) = beQuietDuring(interpret(code))
+
+
+     private def logAndDiscard[T](label: String, alt: => T): PartialFunction[Throwable, T] = {
+      case t: ControlThrowable => throw t
+      case t: Throwable        =>
+        logDebug(label + ": " + unwrap(t))
+        logDebug(stackTraceString(unwrap(t)))
+      alt
+    }
+    /** takes AnyRef because it may be binding a Throwable or an Exceptional */
+
+    private def withLastExceptionLock[T](body: => T, alt: => T): T = {
+      assert(bindExceptions, "withLastExceptionLock called incorrectly.")
+      bindExceptions = false
+
+      try     beQuietDuring(body)
+      catch   logAndDiscard("withLastExceptionLock", alt)
+      finally bindExceptions = true
+    }
+
+    def executionWrapper = _executionWrapper
+    def setExecutionWrapper(code: String) = _executionWrapper = code
+    def clearExecutionWrapper() = _executionWrapper = ""
+
+    /** interpreter settings */
+    lazy val isettings = new SparkISettings(this)
+
+    /** Instantiate a compiler.  Overridable. */
+    protected def newCompiler(settings: Settings, reporter: Reporter): ReplGlobal = {
+      settings.outputDirs setSingleOutput virtualDirectory
+      settings.exposeEmptyPackage.value = true
+      new Global(settings, reporter) with ReplGlobal {
+        override def toString: String = "<global>"
+      }
+    }
+
+    /**
+     * Adds any specified jars to the compile and runtime classpaths.
+     *
+     * @note Currently only supports jars, not directories
+     * @param urls The list of items to add to the compile and runtime classpaths
+     */
+    def addUrlsToClassPath(urls: URL*): Unit = {
+      new Run // Needed to force initialization of "something" to correctly load Scala classes from jars
+      urls.foreach(_runtimeClassLoader.addNewUrl) // Add jars/classes to runtime for execution
+      updateCompilerClassPath(urls: _*)           // Add jars/classes to compile time for compiling
+    }
+
+    protected def updateCompilerClassPath(urls: URL*): Unit = {
+      require(!global.forMSIL) // Only support JavaPlatform
+
+      val platform = global.platform.asInstanceOf[JavaPlatform]
+
+      val newClassPath = mergeUrlsIntoClassPath(platform, urls: _*)
+
+      // NOTE: Must use reflection until this is exposed/fixed upstream in Scala
+      val fieldSetter = platform.getClass.getMethods
+        .find(_.getName.endsWith("currentClassPath_$eq")).get
+      fieldSetter.invoke(platform, Some(newClassPath))
+
+      // Reload all jars specified into our compiler
+      global.invalidateClassPathEntries(urls.map(_.getPath): _*)
+    }
+
+    protected def mergeUrlsIntoClassPath(platform: JavaPlatform, urls: URL*): MergedClassPath[AbstractFile] = {
+      // Collect our new jars/directories and add them to the existing set of classpaths
+      val allClassPaths = (
+        platform.classPath.asInstanceOf[MergedClassPath[AbstractFile]].entries ++
+        urls.map(url => {
+          platform.classPath.context.newClassPath(
+            if (url.getProtocol == "file") {
+              val f = new File(url.getPath)
+              if (f.isDirectory)
+                io.AbstractFile.getDirectory(f)
+              else
+                io.AbstractFile.getFile(f)
+            } else {
+              io.AbstractFile.getURL(url)
+            }
+          )
+        })
+      ).distinct
+
+      // Combine all of our classpaths (old and new) into one merged classpath
+      new MergedClassPath(allClassPaths, platform.classPath.context)
+    }
+
+    /** Parent classloader.  Overridable. */
+    protected def parentClassLoader: ClassLoader =
+      SparkHelper.explicitParentLoader(settings).getOrElse( this.getClass.getClassLoader() )
+
+    /* A single class loader is used for all commands interpreted by this Interpreter.
+     It would also be possible to create a new class loader for each command
+     to interpret.  The advantages of the current approach are:
+
+    - Expressions are only evaluated one time.  This is especially
+    significant for I/O, e.g. "val x = Console.readLine"
+
+    The main disadvantage is:
+
+    - Objects, classes, and methods cannot be rebound.  Instead, definitions
+    shadow the old ones, and old code objects refer to the old
+    definitions.
+    */
+    def resetClassLoader() = {
+      logDebug("Setting new classloader: was " + _classLoader)
+      _classLoader = null
+      ensureClassLoader()
+    }
+    final def ensureClassLoader() {
+      if (_classLoader == null)
+        _classLoader = makeClassLoader()
+    }
+    def classLoader: AbstractFileClassLoader = {
+      ensureClassLoader()
+      _classLoader
+    }
+    private class TranslatingClassLoader(parent: ClassLoader) extends AbstractFileClassLoader(virtualDirectory, parent) {
+      /** Overridden here to try translating a simple name to the generated
+       *  class name if the original attempt fails.  This method is used by
+       *  getResourceAsStream as well as findClass.
+       */
+      override protected def findAbstractFile(name: String): AbstractFile = {
+        super.findAbstractFile(name) match {
+          // deadlocks on startup if we try to translate names too early
+          case null if isInitializeComplete =>
+            generatedName(name) map (x => super.findAbstractFile(x)) orNull
+          case file                         =>
+            file
+        }
+      }
+    }
+    private def makeClassLoader(): AbstractFileClassLoader =
+      new TranslatingClassLoader(parentClassLoader match {
+        case null   => ScalaClassLoader fromURLs compilerClasspath
+        case p      =>
+          _runtimeClassLoader = new URLClassLoader(compilerClasspath, p) with ExposeAddUrl
+          _runtimeClassLoader
+      })
+
+    def getInterpreterClassLoader() = classLoader
+
+    // Set the current Java "context" class loader to this interpreter's class loader
+    def setContextClassLoader() = classLoader.setAsContext()
+
+    /** Given a simple repl-defined name, returns the real name of
+     *  the class representing it, e.g. for "Bippy" it may return
+     *  {{{
+     *    $line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$Bippy
+     *  }}}
+     */
+    def generatedName(simpleName: String): Option[String] = {
+      if (simpleName endsWith nme.MODULE_SUFFIX_STRING) optFlatName(simpleName.init) map (_ + nme.MODULE_SUFFIX_STRING)
+      else optFlatName(simpleName)
+    }
+    def flatName(id: String)    = optFlatName(id) getOrElse id
+    def optFlatName(id: String) = requestForIdent(id) map (_ fullFlatName id)
+
+    def allDefinedNames = definedNameMap.keys.toList.sorted
+    def pathToType(id: String): String = pathToName(newTypeName(id))
+    def pathToTerm(id: String): String = pathToName(newTermName(id))
+    def pathToName(name: Name): String = {
+      if (definedNameMap contains name)
+        definedNameMap(name) fullPath name
+      else name.toString
+    }
+
+    /** Most recent tree handled which wasn't wholly synthetic. */
+    private def mostRecentlyHandledTree: Option[Tree] = {
+      prevRequests.reverse foreach { req =>
+        req.handlers.reverse foreach {
+          case x: MemberDefHandler if x.definesValue && !isInternalTermName(x.name) => return Some(x.member)
+          case _ => ()
+        }
+      }
+      None
+    }
+
+    /** Stubs for work in progress. */
+    def handleTypeRedefinition(name: TypeName, old: Request, req: Request) = {
+      for (t1 <- old.simpleNameOfType(name) ; t2 <- req.simpleNameOfType(name)) {
+        logDebug("Redefining type '%s'\n  %s -> %s".format(name, t1, t2))
+      }
+    }
+
+    def handleTermRedefinition(name: TermName, old: Request, req: Request) = {
+      for (t1 <- old.compilerTypeOf get name ; t2 <- req.compilerTypeOf get name) {
+    //    Printing the types here has a tendency to cause assertion errors, like
+        //   assertion failed: fatal: <refinement> has owner value x, but a class owner is required
+        // so DBG is by-name now to keep it in the family.  (It also traps the assertion error,
+        // but we don't want to unnecessarily risk hosing the compiler's internal state.)
+        logDebug("Redefining term '%s'\n  %s -> %s".format(name, t1, t2))
+      }
+    }
+
+    def recordRequest(req: Request) {
+      if (req == null || referencedNameMap == null)
+        return
+
+      prevRequests += req
+      req.referencedNames foreach (x => referencedNameMap(x) = req)
+
+      // warning about serially defining companions.  It'd be easy
+      // enough to just redefine them together but that may not always
+      // be what people want so I'm waiting until I can do it better.
+      for {
+        name   <- req.definedNames filterNot (x => req.definedNames contains x.companionName)
+        oldReq <- definedNameMap get name.companionName
+        newSym <- req.definedSymbols get name
+        oldSym <- oldReq.definedSymbols get name.companionName
+        if Seq(oldSym, newSym).permutations exists { case Seq(s1, s2) => s1.isClass && s2.isModule }
+      } {
+        afterTyper(replwarn(s"warning: previously defined $oldSym is not a companion to $newSym."))
+        replwarn("Companions must be defined together; you may wish to use :paste mode for this.")
+      }
+
+      // Updating the defined name map
+      req.definedNames foreach { name =>
+        if (definedNameMap contains name) {
+          if (name.isTypeName) handleTypeRedefinition(name.toTypeName, definedNameMap(name), req)
+          else handleTermRedefinition(name.toTermName, definedNameMap(name), req)
+        }
+         definedNameMap(name) = req
+      }
+    }
+
+    def replwarn(msg: => String) {
+      if (!settings.nowarnings.value)
+        printMessage(msg)
+    }
+
+    def isParseable(line: String): Boolean = {
+      beSilentDuring {
+        try parse(line) match {
+          case Some(xs) => xs.nonEmpty  // parses as-is
+          case None     => true         // incomplete
+        }
+        catch { case x: Exception =>    // crashed the compiler
+          replwarn("Exception in isParseable(\"" + line + "\"): " + x)
+           false
+         }
+      }
+    }
+
+    def compileSourcesKeepingRun(sources: SourceFile*) = {
+      val run = new Run()
+      reporter.reset()
+      run compileSources sources.toList
+      (!reporter.hasErrors, run)
+    }
+
+    /** Compile an nsc SourceFile.  Returns true if there are
+     *  no compilation errors, or false otherwise.
+     */
+    def compileSources(sources: SourceFile*): Boolean =
+      compileSourcesKeepingRun(sources: _*)._1
+
+    /** Compile a string.  Returns true if there are no
+     *  compilation errors, or false otherwise.
+     */
+    def compileString(code: String): Boolean =
+      compileSources(new BatchSourceFile("<script>", code))
+
+    /** Build a request from the user. `trees` is `line` after being parsed.
+     */
+    private def buildRequest(line: String, trees: List[Tree]): Request = {
+      executingRequest = new Request(line, trees)
+      executingRequest
+    }
+
+  // rewriting "5 // foo" to "val x = { 5 // foo }" creates broken code because
+  // the close brace is commented out.  Strip single-line comments.
+  // ... but for error message output reasons this is not used, and rather than
+  // enclosing in braces it is constructed like "val x =\n5 // foo".
+  private def removeComments(line: String): String = {
+    showCodeIfDebugging(line) // as we're about to lose our // show
+    line.lines map (s => s indexOf "//" match {
+      case -1   => s
+      case idx  => s take idx
+    }) mkString "\n"
+  }
+
+  private def safePos(t: Tree, alt: Int): Int =
+    try t.pos.startOrPoint
+  catch { case _: UnsupportedOperationException => alt }
+
+  // Given an expression like 10 * 10 * 10 we receive the parent tree positioned
+  // at a '*'.  So look at each subtree and find the earliest of all positions.
+  private def earliestPosition(tree: Tree): Int = {
+    var pos = Int.MaxValue
+    tree foreach { t =>
+      pos = math.min(pos, safePos(t, Int.MaxValue))
+    }
+    pos
+  }
+
+
+  private def requestFromLine(line: String, synthetic: Boolean): Either[IR.Result, Request] = {
+    val content = indentCode(line)
+    val trees = parse(content) match {
+      case None         => return Left(IR.Incomplete)
+      case Some(Nil)    => return Left(IR.Error) // parse error or empty input
+      case Some(trees)  => trees
+    }
+    logDebug(
+      trees map (t => {
+        // [Eugene to Paul] previously it just said `t map ...`
+        // because there was an implicit conversion from Tree to a list of Trees
+        // however Martin and I have removed the conversion
+        // (it was conflicting with the new reflection API),
+        // so I had to rewrite this a bit
+        val subs = t collect { case sub => sub }
+        subs map (t0 =>
+          "  " + safePos(t0, -1) + ": " + t0.shortClass + "\n"
+                ) mkString ""
+      }) mkString "\n"
+    )
+    // If the last tree is a bare expression, pinpoint where it begins using the
+    // AST node position and snap the line off there.  Rewrite the code embodied
+    // by the last tree as a ValDef instead, so we can access the value.
+    trees.last match {
+      case _:Assign                        => // we don't want to include assignments
+        case _:TermTree | _:Ident | _:Select => // ... but do want other unnamed terms.
+          val varName  = if (synthetic) freshInternalVarName() else freshUserVarName()
+      val rewrittenLine = (
+        // In theory this would come out the same without the 1-specific test, but
+        // it's a cushion against any more sneaky parse-tree position vs. code mismatches:
+        // this way such issues will only arise on multiple-statement repl input lines,
+        // which most people don't use.
+        if (trees.size == 1) "val " + varName + " =\n" + content
+        else {
+          // The position of the last tree
+          val lastpos0 = earliestPosition(trees.last)
+          // Oh boy, the parser throws away parens so "(2+2)" is mispositioned,
+          // with increasingly hard to decipher positions as we move on to "() => 5",
+          // (x: Int) => x + 1, and more.  So I abandon attempts to finesse and just
+          // look for semicolons and newlines, which I'm sure is also buggy.
+          val (raw1, raw2) = content splitAt lastpos0
+          logDebug("[raw] " + raw1 + "   <--->   " + raw2)
+
+          val adjustment = (raw1.reverse takeWhile (ch => (ch != ';') && (ch != '\n'))).size
+          val lastpos = lastpos0 - adjustment
+
+          // the source code split at the laboriously determined position.
+          val (l1, l2) = content splitAt lastpos
+          logDebug("[adj] " + l1 + "   <--->   " + l2)
+
+          val prefix   = if (l1.trim == "") "" else l1 + ";\n"
+          // Note to self: val source needs to have this precise structure so that
+          // error messages print the user-submitted part without the "val res0 = " part.
+          val combined   = prefix + "val " + varName + " =\n" + l2
+
+          logDebug(List(
+            "    line" -> line,
+            " content" -> content,
+            "     was" -> l2,
+            "combined" -> combined) map {
+              case (label, s) => label + ": '" + s + "'"
+            } mkString "\n"
+          )
+          combined
+        }
+      )
+        // Rewriting    "foo ; bar ; 123"
+        // to           "foo ; bar ; val resXX = 123"
+        requestFromLine(rewrittenLine, synthetic) match {
+          case Right(req) => return Right(req withOriginalLine line)
+          case x          => return x
+        }
+      case _ =>
+    }
+    Right(buildRequest(line, trees))
+  }
+
+  // normalize non-public types so we don't see protected aliases like Self
+  def normalizeNonPublic(tp: Type) = tp match {
+    case TypeRef(_, sym, _) if sym.isAliasType && !sym.isPublic => tp.dealias
+    case _                                                      => tp
+  }
+
+  /**
+   *  Interpret one line of input. All feedback, including parse errors
+   *  and evaluation results, are printed via the supplied compiler's
+   *  reporter. Values defined are available for future interpreted strings.
+   *
+   *  The return value is whether the line was interpreter successfully,
+   *  e.g. that there were no parse errors.
+   */
+  def interpret(line: String): IR.Result = interpret(line, false)
+  def interpretSynthetic(line: String): IR.Result = interpret(line, true)
+  def interpret(line: String, synthetic: Boolean): IR.Result = {
+    def loadAndRunReq(req: Request) = {
+      classLoader.setAsContext()
+      val (result, succeeded) = req.loadAndRun
+
+      /** To our displeasure, ConsoleReporter offers only printMessage,
+       *  which tacks a newline on the end.  Since that breaks all the
+       *  output checking, we have to take one off to balance.
+       */
+      if (succeeded) {
+        if (printResults && result != "")
+          printMessage(result stripSuffix "\n")
+        else if (isReplDebug) // show quiet-mode activity
+          printMessage(result.trim.lines map ("[quiet] " + _) mkString "\n")
+
+        // Book-keeping.  Have to record synthetic requests too,
+        // as they may have been issued for information, e.g. :type
+        recordRequest(req)
+        IR.Success
+      }
+        else {
+          // don't truncate stack traces
+          withoutTruncating(printMessage(result))
+          IR.Error
+        }
+    }
+
+    if (global == null) IR.Error
+    else requestFromLine(line, synthetic) match {
+      case Left(result) => result
+      case Right(req)   =>
+        // null indicates a disallowed statement type; otherwise compile and
+        // fail if false (implying e.g. a type error)
+        if (req == null || !req.compile) IR.Error
+        else loadAndRunReq(req)
+    }
+  }
+
+  /** Bind a specified name to a specified value.  The name may
+   *  later be used by expressions passed to interpret.
+   *
+   *  @param name      the variable name to bind
+   *  @param boundType the type of the variable, as a string
+   *  @param value     the object value to bind to it
+   *  @return          an indication of whether the binding succeeded
+   */
+  def bind(name: String, boundType: String, value: Any, modifiers: List[String] = Nil): IR.Result = {
+    val bindRep = new ReadEvalPrint()
+    val run = bindRep.compile("""
+                              |object %s {
+                                |  var value: %s = _
+                              |  def set(x: Any) = value = x.asInstanceOf[%s]
+                              |}
+                              """.stripMargin.format(bindRep.evalName, boundType, boundType)
+                            )
+    bindRep.callEither("set", value) match {
+      case Left(ex) =>
+        logDebug("Set failed in bind(%s, %s, %s)".format(name, boundType, value))
+        logDebug(util.stackTraceString(ex))
+        IR.Error
+
+      case Right(_) =>
+        val line = "%sval %s = %s.value".format(modifiers map (_ + " ") mkString, name, bindRep.evalPath)
+      logDebug("Interpreting: " + line)
+      interpret(line)
+    }
+  }
+  def directBind(name: String, boundType: String, value: Any): IR.Result = {
+    val result = bind(name, boundType, value)
+    if (result == IR.Success)
+      directlyBoundNames += newTermName(name)
+    result
+  }
+  def directBind(p: NamedParam): IR.Result                                    = directBind(p.name, p.tpe, p.value)
+  def directBind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = directBind((name, value))
+
+  def rebind(p: NamedParam): IR.Result = {
+    val name     = p.name
+    val oldType  = typeOfTerm(name) orElse { return IR.Error }
+    val newType  = p.tpe
+    val tempName = freshInternalVarName()
+
+    quietRun("val %s = %s".format(tempName, name))
+    quietRun("val %s = %s.asInstanceOf[%s]".format(name, tempName, newType))
+  }
+  def quietImport(ids: String*): IR.Result = beQuietDuring(addImports(ids: _*))
+  def addImports(ids: String*): IR.Result =
+    if (ids.isEmpty) IR.Success
+    else interpret("import " + ids.mkString(", "))
+
+  def quietBind(p: NamedParam): IR.Result                               = beQuietDuring(bind(p))
+  def bind(p: NamedParam): IR.Result                                    = bind(p.name, p.tpe, p.value)
+  def bind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = bind((name, value))
+  def bindSyntheticValue(x: Any): IR.Result                             = bindValue(freshInternalVarName(), x)
+  def bindValue(x: Any): IR.Result                                      = bindValue(freshUserVarName(), x)
+  def bindValue(name: String, x: Any): IR.Result                        = bind(name, TypeStrings.fromValue(x), x)
+
+  /** Reset this interpreter, forgetting all user-specified requests. */
+  def reset() {
+    clearExecutionWrapper()
+    resetClassLoader()
+    resetAllCreators()
+    prevRequests.clear()
+    referencedNameMap.clear()
+    definedNameMap.clear()
+    virtualDirectory.delete()
+    virtualDirectory.create()
+  }
+
+  /** This instance is no longer needed, so release any resources
+   *  it is using.  The reporter's output gets flushed.
+   */
+  def close() {
+    reporter.flush()
+    classServer.stop()
+  }
+
+  /**
+   * Captures the session names (which are set by system properties) once, instead of for each line.
+   */
+  object FixedSessionNames {
+    val lineName    = sessionNames.line
+    val readName    = sessionNames.read
+    val evalName    = sessionNames.eval
+    val printName   = sessionNames.print
+    val resultName  = sessionNames.result
+  }
+
+  /** Here is where we:
+   *
+   *  1) Read some source code, and put it in the "read" object.
+   *  2) Evaluate the read object, and put the result in the "eval" object.
+   *  3) Create a String for human consumption, and put it in the "print" object.
+   *
+   *  Read! Eval! Print! Some of that not yet centralized here.
+   */
+  class ReadEvalPrint(val lineId: Int) {
+    def this() = this(freshLineId())
+
+    private var lastRun: Run = _
+    private var evalCaught: Option[Throwable] = None
+    private var conditionalWarnings: List[ConditionalWarning] = Nil
+
+    val packageName = FixedSessionNames.lineName + lineId
+    val readName    = FixedSessionNames.readName
+    val evalName    = FixedSessionNames.evalName
+    val printName   = FixedSessionNames.printName
+    val resultName  = FixedSessionNames.resultName
+
+    def bindError(t: Throwable) = {
+      // Immediately throw the exception if we are asked to propagate them
+      if (propagateExceptions) {
+        throw unwrap(t)
+      }
+      if (!bindExceptions) // avoid looping if already binding
+        throw t
+
+      val unwrapped = unwrap(t)
+      withLastExceptionLock[String]({
+        directBind[Throwable]("lastException", unwrapped)(tagOfThrowable, classTag[Throwable])
+        util.stackTraceString(unwrapped)
+      }, util.stackTraceString(unwrapped))
+    }
+
+    // TODO: split it out into a package object and a regular
+    // object and we can do that much less wrapping.
+    def packageDecl = "package " + packageName
+
+    def pathTo(name: String)   = packageName + "." + name
+    def packaged(code: String) = packageDecl + "\n\n" + code
+
+    def readPath  = pathTo(readName)
+    def evalPath  = pathTo(evalName)
+    def printPath = pathTo(printName)
+
+    def call(name: String, args: Any*): AnyRef = {
+      val m = evalMethod(name)
+      logDebug("Invoking: " + m)
+      if (args.nonEmpty)
+        logDebug("  with args: " + args.mkString(", "))
+
+      m.invoke(evalClass, args.map(_.asInstanceOf[AnyRef]): _*)
+    }
+
+    def callEither(name: String, args: Any*): Either[Throwable, AnyRef] =
+      try Right(call(name, args: _*))
+    catch { case ex: Throwable => Left(ex) }
+
+    def callOpt(name: String, args: Any*): Option[AnyRef] =
+      try Some(call(name, args: _*))
+    catch { case ex: Throwable => bindError(ex) ; None }
+
+    class EvalException(msg: String, cause: Throwable) extends RuntimeException(msg, cause) { }
+
+    private def evalError(path: String, ex: Throwable) =
+      throw new EvalException("Failed to load '" + path + "': " + ex.getMessage, ex)
+
+    private def load(path: String): Class[_] = {
+      try Class.forName(path, true, classLoader)
+      catch { case ex: Throwable => evalError(path, unwrap(ex)) }
+    }
+
+    lazy val evalClass = load(evalPath)
+    lazy val evalValue = callEither(resultName) match {
+      case Left(ex)      => evalCaught = Some(ex) ; None
+      case Right(result) => Some(result)
+    }
+
+    def compile(source: String): Boolean = compileAndSaveRun("<console>", source)
+
+    /** The innermost object inside the wrapper, found by
+     * following accessPath into the outer one.
+     */
+    def resolvePathToSymbol(accessPath: String): Symbol = {
+      // val readRoot  = getRequiredModule(readPath)   // the outermost wrapper
+      // MATEI: Changed this to getClass because the root object is no longer a module (Scala singleton object)
+
+      val readRoot  = rootMirror.getClassByName(newTypeName(readPath))   // the outermost wrapper
+      (accessPath split '.').foldLeft(readRoot: Symbol) {
+        case (sym, "")    => sym
+        case (sym, name)  => afterTyper(termMember(sym, name))
+      }
+    }
+    /** We get a bunch of repeated warnings for reasons I haven't
+     *  entirely figured out yet.  For now, squash.
+     */
+    private def updateRecentWarnings(run: Run) {
+      def loop(xs: List[(Position, String)]): List[(Position, String)] = xs match {
+        case Nil                  => Nil
+        case ((pos, msg)) :: rest =>
+          val filtered = rest filter { case (pos0, msg0) =>
+            (msg != msg0) || (pos.lineContent.trim != pos0.lineContent.trim) || {
+              // same messages and same line content after whitespace removal
+              // but we want to let through multiple warnings on the same line
+              // from the same run.  The untrimmed line will be the same since
+              // there's no whitespace indenting blowing it.
+              (pos.lineContent == pos0.lineContent)
+            }
+                                    }
+        ((pos, msg)) :: loop(filtered)
+      }
+     // PRASHANT: This leads to a NoSuchMethodError for _.warnings. Yet to figure out its purpose.
+      // val warnings = loop(run.allConditionalWarnings flatMap (_.warnings))
+      // if (warnings.nonEmpty)
+      //   mostRecentWarnings = warnings
+    }
+    private def evalMethod(name: String) = evalClass.getMethods filter (_.getName == name) match {
+      case Array(method) => method
+      case xs            => sys.error("Internal error: eval object " + evalClass + ", " + xs.mkString("\n", "\n", ""))
+    }
+    private def compileAndSaveRun(label: String, code: String) = {
+      showCodeIfDebugging(code)
+      val (success, run) = compileSourcesKeepingRun(new BatchSourceFile(label, packaged(code)))
+      updateRecentWarnings(run)
+      lastRun = run
+      success
+    }
+  }
+
+  /** One line of code submitted by the user for interpretation */
+  // private
+  class Request(val line: String, val trees: List[Tree]) {
+    val reqId = nextReqId()
+    val lineRep = new ReadEvalPrint()
+
+    private var _originalLine: String = null
+    def withOriginalLine(s: String): this.type = { _originalLine = s ; this }
+    def originalLine = if (_originalLine == null) line else _originalLine
+
+    /** handlers for each tree in this request */
+    val handlers: List[MemberHandler] = trees map (memberHandlers chooseHandler _)
+    def defHandlers = handlers collect { case x: MemberDefHandler => x }
+
+    /** all (public) names defined by these statements */
+    val definedNames = handlers flatMap (_.definedNames)
+
+    /** list of names used by this expression */
+    val referencedNames: List[Name] = handlers flatMap (_.referencedNames)
+
+    /** def and val names */
+    def termNames = handlers flatMap (_.definesTerm)
+    def typeNames = handlers flatMap (_.definesType)
+    def definedOrImported = handlers flatMap (_.definedOrImported)
+    def definedSymbolList = defHandlers flatMap (_.definedSymbols)
+
+    def definedTypeSymbol(name: String) = definedSymbols(newTypeName(name))
+    def definedTermSymbol(name: String) = definedSymbols(newTermName(name))
+
+    val definedClasses = handlers.exists {
+      case _: ClassHandler => true
+      case _ => false
+    }
+
+    /** Code to import bound names from previous lines - accessPath is code to
+     * append to objectName to access anything bound by request.
+     */
+    val SparkComputedImports(importsPreamble, importsTrailer, accessPath) =
+      importsCode(referencedNames.toSet, definedClasses)
+
+    /** Code to access a variable with the specified name */
+    def fullPath(vname: String) = {
+      // lineRep.readPath + accessPath + ".`%s`".format(vname)
+      lineRep.readPath + ".INSTANCE" + accessPath + ".`%s`".format(vname)
+    }
+      /** Same as fullpath, but after it has been flattened, so:
+       *  $line5.$iw.$iw.$iw.Bippy      // fullPath
+       *  $line5.$iw$$iw$$iw$Bippy      // fullFlatName
+       */
+      def fullFlatName(name: String) =
+        // lineRep.readPath + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
+        lineRep.readPath + ".INSTANCE" + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
+
+    /** The unmangled symbol name, but supplemented with line info. */
+    def disambiguated(name: Name): String = name + " (in " + lineRep + ")"
+
+    /** Code to access a variable with the specified name */
+    def fullPath(vname: Name): String = fullPath(vname.toString)
+
+    /** the line of code to compute */
+    def toCompute = line
+
+    /** generate the source code for the object that computes this request */
+    private object ObjectSourceCode extends CodeAssembler[MemberHandler] {
+      def path = pathToTerm("$intp")
+      def envLines = {
+        if (!isReplPower) Nil // power mode only for now
+        // $intp is not bound; punt, but include the line.
+        else if (path == "$intp") List(
+          "def $line = " + tquoted(originalLine),
+          "def $trees = Nil"
+        )
+        else List(
+          "def $line  = " + tquoted(originalLine),
+          "def $req = %s.requestForReqId(%s).orNull".format(path, reqId),
+          "def $trees = if ($req eq null) Nil else $req.trees".format(lineRep.readName, path, reqId)
+        )
+      }
+
+      val preamble = """
+        |class %s extends Serializable {
+        |  %s%s%s
+      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString, importsPreamble, indentCode(toCompute))
+      val postamble = importsTrailer + "\n}" + "\n" +
+        "object " + lineRep.readName + " {\n" +
+        "  val INSTANCE = new " + lineRep.readName + "();\n" +
+        "}\n"
+      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
+
+      /*
+      val preamble = """
+        |object %s extends Serializable {
+        |%s%s%s
+      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString, importsPreamble, indentCode(toCompute))
+      val postamble = importsTrailer + "\n}"
+      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
+      */
+
+    }
+
+    private object ResultObjectSourceCode extends CodeAssembler[MemberHandler] {
+      /** We only want to generate this code when the result
+       *  is a value which can be referred to as-is.
+       */
+      val evalResult =
+        if (!handlers.last.definesValue) ""
+        else handlers.last.definesTerm match {
+          case Some(vname) if typeOf contains vname =>
+            "lazy val %s = %s".format(lineRep.resultName, fullPath(vname))
+          case _  => ""
+        }
+
+      // first line evaluates object to make sure constructor is run
+      // initial "" so later code can uniformly be: + etc
+      val preamble = """
+      |object %s {
+      |  %s
+      |  val %s: String = %s {
+      |    %s
+      |    (""
+      """.stripMargin.format(
+        lineRep.evalName, evalResult, lineRep.printName,
+        executionWrapper, lineRep.readName + ".INSTANCE" + accessPath
+      )
+      val postamble = """
+      |    )
+      |  }
+      |}
+      """.stripMargin
+      val generate = (m: MemberHandler) => m resultExtractionCode Request.this
+    }
+
+    // get it
+    def getEvalTyped[T] : Option[T] = getEval map (_.asInstanceOf[T])
+    def getEval: Option[AnyRef] = {
+      // ensure it has been compiled
+      compile
+      // try to load it and call the value method
+      lineRep.evalValue filterNot (_ == null)
+    }
+
+    /** Compile the object file.  Returns whether the compilation succeeded.
+     *  If all goes well, the "types" map is computed. */
+    lazy val compile: Boolean = {
+      // error counting is wrong, hence interpreter may overlook failure - so we reset
+      reporter.reset()
+
+      // compile the object containing the user's code
+      lineRep.compile(ObjectSourceCode(handlers)) && {
+        // extract and remember types
+        typeOf
+        typesOfDefinedTerms
+
+        // Assign symbols to the original trees
+        // TODO - just use the new trees.
+        defHandlers foreach { dh =>
+          val name = dh.member.name
+          definedSymbols get name foreach { sym =>
+            dh.member setSymbol sym
+           logDebug("Set symbol of " + name + " to " + sym.defString)
+          }
+        }
+
+        // compile the result-extraction object
+        withoutWarnings(lineRep compile ResultObjectSourceCode(handlers))
+      }
+    }
+
+    lazy val resultSymbol = lineRep.resolvePathToSymbol(accessPath)
+    def applyToResultMember[T](name: Name, f: Symbol => T) = afterTyper(f(resultSymbol.info.nonPrivateDecl(name)))
+
+    /* typeOf lookup with encoding */
+    def lookupTypeOf(name: Name) = typeOf.getOrElse(name, typeOf(global.encode(name.toString)))
+    def simpleNameOfType(name: TypeName) = (compilerTypeOf get name) map (_.typeSymbol.simpleName)
+
+    private def typeMap[T](f: Type => T) =
+      mapFrom[Name, Name, T](termNames ++ typeNames)(x => f(cleanMemberDecl(resultSymbol, x)))
+
+    /** Types of variables defined by this request. */
+    lazy val compilerTypeOf = typeMap[Type](x => x) withDefaultValue NoType
+    /** String representations of same. */
+    lazy val typeOf         = typeMap[String](tp => afterTyper(tp.toString))
+
+    // lazy val definedTypes: Map[Name, Type] = {
+    //   typeNames map (x => x -> afterTyper(resultSymbol.info.nonPrivateDecl(x).tpe)) toMap
+    // }
+    lazy val definedSymbols = (
+      termNames.map(x => x -> applyToResultMember(x, x => x)) ++
+      typeNames.map(x => x -> compilerTypeOf(x).typeSymbolDirect)
+    ).toMap[Name, Symbol] withDefaultValue NoSymbol
+
+    lazy val typesOfDefinedTerms = mapFrom[Name, Name, Type](termNames)(x => applyToResultMember(x, _.tpe))
+
+    /** load and run the code using reflection */
+    def loadAndRun: (String, Boolean) = {
+      try   { ("" + (lineRep call sessionNames.print), true) }
+      catch { case ex: Throwable => (lineRep.bindError(ex), false) }
+    }
+
+    override def toString = "Request(line=%s, %s trees)".format(line, trees.size)
+  }
+
+  /** Returns the name of the most recent interpreter result.
+   *  Mostly this exists so you can conveniently invoke methods on
+   *  the previous result.
+   */
+  def mostRecentVar: String =
+    if (mostRecentlyHandledTree.isEmpty) ""
+    else "" + (mostRecentlyHandledTree.get match {
+      case x: ValOrDefDef           => x.name
+      case Assign(Ident(name), _)   => name
+      case ModuleDef(_, name, _)    => name
+      case _                        => naming.mostRecentVar
+    })
+
+  private var mostRecentWarnings: List[(global.Position, String)] = Nil
+  def lastWarnings = mostRecentWarnings
+
+  def treesForRequestId(id: Int): List[Tree] =
+    requestForReqId(id).toList flatMap (_.trees)
+
+  def requestForReqId(id: Int): Option[Request] =
+    if (executingRequest != null && executingRequest.reqId == id) Some(executingRequest)
+    else prevRequests find (_.reqId == id)
+
+  def requestForName(name: Name): Option[Request] = {
+    assert(definedNameMap != null, "definedNameMap is null")
+    definedNameMap get name
+  }
+
+  def requestForIdent(line: String): Option[Request] =
+    requestForName(newTermName(line)) orElse requestForName(newTypeName(line))
+
+  def requestHistoryForName(name: Name): List[Request] =
+    prevRequests.toList.reverse filter (_.definedNames contains name)
+
+
+  def definitionForName(name: Name): Option[MemberHandler] =
+    requestForName(name) flatMap { req =>
+      req.handlers find (_.definedNames contains name)
+    }
+
+  def valueOfTerm(id: String): Option[AnyRef] =
+    requestForName(newTermName(id)) flatMap (_.getEval)
+
+  def classOfTerm(id: String): Option[JClass] =
+    valueOfTerm(id) map (_.getClass)
+
+  def typeOfTerm(id: String): Type = newTermName(id) match {
+    case nme.ROOTPKG  => RootClass.tpe
+    case name         => requestForName(name).fold(NoType: Type)(_ compilerTypeOf name)
+  }
+
+  def symbolOfType(id: String): Symbol =
+    requestForName(newTypeName(id)).fold(NoSymbol: Symbol)(_ definedTypeSymbol id)
+
+  def symbolOfTerm(id: String): Symbol =
+    requestForIdent(newTermName(id)).fold(NoSymbol: Symbol)(_ definedTermSymbol id)
+
+  def runtimeClassAndTypeOfTerm(id: String): Option[(JClass, Type)] = {
+    classOfTerm(id) flatMap { clazz =>
+      new RichClass(clazz).supers find(c => !(new RichClass(c).isScalaAnonymous)) map { nonAnon =>
+        (nonAnon, runtimeTypeOfTerm(id))
+      }
+    }
+  }
+
+  def runtimeTypeOfTerm(id: String): Type = {
+    typeOfTerm(id) andAlso { tpe =>
+      val clazz      = classOfTerm(id) getOrElse { return NoType }
+      val staticSym  = tpe.typeSymbol
+      val runtimeSym = getClassIfDefined(clazz.getName)
+
+      if ((runtimeSym != NoSymbol) && (runtimeSym != staticSym) && (runtimeSym isSubClass staticSym))
+        runtimeSym.info
+      else NoType
+    }
+  }
+  def cleanMemberDecl(owner: Symbol, member: Name): Type = afterTyper {
+    normalizeNonPublic {
+      owner.info.nonPrivateDecl(member).tpe match {
+        case NullaryMethodType(tp) => tp
+        case tp                    => tp
+      }
+    }
+  }
+
+  object exprTyper extends {
+    val repl: SparkIMain.this.type = imain
+  } with SparkExprTyper { }
+
+  def parse(line: String): Option[List[Tree]] = exprTyper.parse(line)
+
+  def symbolOfLine(code: String): Symbol =
+    exprTyper.symbolOfLine(code)
+
+  def typeOfExpression(expr: String, silent: Boolean = true): Type =
+    exprTyper.typeOfExpression(expr, silent)
+
+  protected def onlyTerms(xs: List[Name]) = xs collect { case x: TermName => x }
+  protected def onlyTypes(xs: List[Name]) = xs collect { case x: TypeName => x }
+
+  def definedTerms      = onlyTerms(allDefinedNames) filterNot isInternalTermName
+  def definedTypes      = onlyTypes(allDefinedNames)
+  def definedSymbols    = prevRequestList.flatMap(_.definedSymbols.values).toSet[Symbol]
+  def definedSymbolList = prevRequestList flatMap (_.definedSymbolList) filterNot (s => isInternalTermName(s.name))
+
+  // Terms with user-given names (i.e. not res0 and not synthetic)
+    def namedDefinedTerms = definedTerms filterNot (x => isUserVarName("" + x) || directlyBoundNames(x))
+
+  private def findName(name: Name) = definedSymbols find (_.name == name) getOrElse NoSymbol
+
+  /** Translate a repl-defined identifier into a Symbol.
+   */
+  def apply(name: String): Symbol =
+    types(name) orElse terms(name)
+
+  def types(name: String): Symbol = {
+    val tpname = newTypeName(name)
+    findName(tpname) orElse getClassIfDefined(tpname)
+  }
+  def terms(name: String): Symbol = {
+    val termname = newTypeName(name)
+    findName(termname) orElse getModuleIfDefined(termname)
+  }
+  // [Eugene to Paul] possibly you could make use of TypeTags here
+  def types[T: ClassTag] : Symbol = types(classTag[T].runtimeClass.getName)
+  def terms[T: ClassTag] : Symbol = terms(classTag[T].runtimeClass.getName)
+  def apply[T: ClassTag] : Symbol = apply(classTag[T].runtimeClass.getName)
+
+  def classSymbols  = allDefSymbols collect { case x: ClassSymbol => x }
+  def methodSymbols = allDefSymbols collect { case x: MethodSymbol => x }
+
+  /** the previous requests this interpreter has processed */
+  private var executingRequest: Request = _
+  private val prevRequests       = mutable.ListBuffer[Request]()
+  private val referencedNameMap  = mutable.Map[Name, Request]()
+  private val definedNameMap     = mutable.Map[Name, Request]()
+  private val directlyBoundNames = mutable.Set[Name]()
+
+  def allHandlers    = prevRequestList flatMap (_.handlers)
+  def allDefHandlers = allHandlers collect { case x: MemberDefHandler => x }
+  def allDefSymbols  = allDefHandlers map (_.symbol) filter (_ ne NoSymbol)
+
+  def lastRequest         = if (prevRequests.isEmpty) null else prevRequests.last
+  def prevRequestList     = prevRequests.toList
+  def allSeenTypes        = prevRequestList flatMap (_.typeOf.values.toList) distinct
+  def allImplicits        = allHandlers filter (_.definesImplicit) flatMap (_.definedNames)
+  def importHandlers      = allHandlers collect { case x: ImportHandler => x }
+
+  def visibleTermNames: List[Name] = definedTerms ++ importedTerms distinct
+
+  /** Another entry point for tab-completion, ids in scope */
+  def unqualifiedIds = visibleTermNames map (_.toString) filterNot (_ contains "$") sorted
+
+  /** Parse the ScalaSig to find type aliases */
+  def aliasForType(path: String) = ByteCode.aliasForType(path)
+
+  def withoutUnwrapping(op: => Unit): Unit = {
+    val saved = isettings.unwrapStrings
+    isettings.unwrapStrings = false
+    try op
+    finally isettings.unwrapStrings = saved
+  }
+
+  def symbolDefString(sym: Symbol) = {
+    TypeStrings.quieter(
+      afterTyper(sym.defString),
+      sym.owner.name + ".this.",
+      sym.owner.fullName + "."
+    )
+  }
+
+  def showCodeIfDebugging(code: String) {
+    /** Secret bookcase entrance for repl debuggers: end the line
+     *  with "// show" and see what's going on.
+     */
+    def isShow    = code.lines exists (_.trim endsWith "// show")
+    def isShowRaw = code.lines exists (_.trim endsWith "// raw")
+
+    // old style
+    beSilentDuring(parse(code)) foreach { ts =>
+      ts foreach { t =>
+        if (isShow || isShowRaw)
+          withoutUnwrapping(echo(asCompactString(t)))
+        else
+          withoutUnwrapping(logDebug(asCompactString(t)))
+      }
+    }
+  }
+
+  // debugging
+  def debugging[T](msg: String)(res: T) = {
+    logDebug(msg + " " + res)
+    res
+  }
+}
+
+/** Utility methods for the Interpreter. */
+object SparkIMain {
+  // The two name forms this is catching are the two sides of this assignment:
+  //
+  // $line3.$read.$iw.$iw.Bippy =
+  //   $line3.$read$$iw$$iw$Bippy@4a6a00ca
+  private def removeLineWrapper(s: String) = s.replaceAll("""\$line\d+[./]\$(read|eval|print)[$.]""", "")
+  private def removeIWPackages(s: String)  = s.replaceAll("""\$(iw|iwC|read|eval|print)[$.]""", "")
+  private def removeSparkVals(s: String)   = s.replaceAll("""\$VAL[0-9]+[$.]""", "")
+
+  def stripString(s: String)               = removeSparkVals(removeIWPackages(removeLineWrapper(s)))
+
+  trait CodeAssembler[T] {
+    def preamble: String
+    def generate: T => String
+    def postamble: String
+
+    def apply(contributors: List[T]): String = stringFromWriter { code =>
+      code println preamble
+      contributors map generate foreach (code println _)
+      code println postamble
+    }
+  }
+
+  trait StrippingWriter {
+    def isStripping: Boolean
+    def stripImpl(str: String): String
+    def strip(str: String): String = if (isStripping) stripImpl(str) else str
+  }
+  trait TruncatingWriter {
+    def maxStringLength: Int
+    def isTruncating: Boolean
+    def truncate(str: String): String = {
+      if (isTruncating && (maxStringLength != 0 && str.length > maxStringLength))
+        (str take maxStringLength - 3) + "..."
+      else str
+    }
+  }
+  abstract class StrippingTruncatingWriter(out: JPrintWriter)
+          extends JPrintWriter(out)
+             with StrippingWriter
+             with TruncatingWriter {
+    self =>
+
+    def clean(str: String): String = truncate(strip(str))
+    override def write(str: String) = super.write(clean(str))
+  }
+  class ReplStrippingWriter(intp: SparkIMain) extends StrippingTruncatingWriter(intp.out) {
+    import intp._
+    def maxStringLength    = isettings.maxPrintString
+    def isStripping        = isettings.unwrapStrings
+    def isTruncating       = reporter.truncationOK
+
+    def stripImpl(str: String): String = naming.unmangle(str)
+  }
+
+  class ReplReporter(intp: SparkIMain) extends ConsoleReporter(intp.settings, null, new ReplStrippingWriter(intp)) {
+    override def printMessage(msg: String) {
+      // Avoiding deadlock when the compiler starts logging before
+      // the lazy val is done.
+      if (intp.isInitializeComplete) {
+        if (intp.totalSilence) ()
+        else super.printMessage(msg)
+      }
+      else Console.println(msg)
+    }
+  }
+}
+
+class SparkISettings(intp: SparkIMain) extends Logging {
+  /** A list of paths where :load should look */
+  var loadPath = List(".")
+
+  /** Set this to true to see repl machinery under -Yrich-exceptions.
+   */
+  var showInternalStackTraces = false
+
+  /** The maximum length of toString to use when printing the result
+   *  of an evaluation.  0 means no maximum.  If a printout requires
+   *  more than this number of characters, then the printout is
+   *  truncated.
+   */
+  var maxPrintString = 800
+
+  /** The maximum number of completion candidates to print for tab
+   *  completion without requiring confirmation.
+   */
+  var maxAutoprintCompletion = 250
+
+  /** String unwrapping can be disabled if it is causing issues.
+   *  Settings this to false means you will see Strings like "$iw.$iw.".
+   */
+  var unwrapStrings = true
+
+  def deprecation_=(x: Boolean) = {
+    val old = intp.settings.deprecation.value
+    intp.settings.deprecation.value = x
+    if (!old && x) logDebug("Enabled -deprecation output.")
+    else if (old && !x) logDebug("Disabled -deprecation output.")
+  }
+
+  def deprecation: Boolean = intp.settings.deprecation.value
+
+  def allSettings = Map(
+    "maxPrintString" -> maxPrintString,
+    "maxAutoprintCompletion" -> maxAutoprintCompletion,
+    "unwrapStrings" -> unwrapStrings,
+    "deprecation" -> deprecation
+  )
+
+  private def allSettingsString =
+    allSettings.toList sortBy (_._1) map { case (k, v) => "  " + k + " = " + v + "\n" } mkString
+
+  override def toString = """
+    | SparkISettings {
+    | %s
+    | }""".stripMargin.format(allSettingsString)
+}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkImports.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkImports.scala
new file mode 100644
index 0000000000000..193a42dcded12
--- /dev/null
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkImports.scala
@@ -0,0 +1,238 @@
+// scalastyle:off
+
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Paul Phillips
+ */
+
+package org.apache.spark.repl
+
+import scala.tools.nsc._
+import scala.tools.nsc.interpreter._
+
+import scala.collection.{ mutable, immutable }
+
+trait SparkImports {
+  self: SparkIMain =>
+
+  import global._
+  import definitions.{ ScalaPackage, JavaLangPackage, PredefModule }
+  import memberHandlers._
+
+  def isNoImports = settings.noimports.value
+  def isNoPredef  = settings.nopredef.value
+
+  /** Synthetic import handlers for the language defined imports. */
+  private def makeWildcardImportHandler(sym: Symbol): ImportHandler = {
+    val hd :: tl = sym.fullName.split('.').toList map newTermName
+    val tree = Import(
+      tl.foldLeft(Ident(hd): Tree)((x, y) => Select(x, y)),
+      ImportSelector.wildList
+    )
+    tree setSymbol sym
+    new ImportHandler(tree)
+  }
+
+  /** Symbols whose contents are language-defined to be imported. */
+  def languageWildcardSyms: List[Symbol] = List(JavaLangPackage, ScalaPackage, PredefModule)
+  def languageWildcards: List[Type] = languageWildcardSyms map (_.tpe)
+  def languageWildcardHandlers = languageWildcardSyms map makeWildcardImportHandler
+
+  def allImportedNames = importHandlers flatMap (_.importedNames)
+  def importedTerms    = onlyTerms(allImportedNames)
+  def importedTypes    = onlyTypes(allImportedNames)
+
+  /** Types which have been wildcard imported, such as:
+   *    val x = "abc" ; import x._  // type java.lang.String
+   *    import java.lang.String._   // object java.lang.String
+   *
+   *  Used by tab completion.
+   *
+   *  XXX right now this gets import x._ and import java.lang.String._,
+   *  but doesn't figure out import String._.  There's a lot of ad hoc
+   *  scope twiddling which should be swept away in favor of digging
+   *  into the compiler scopes.
+   */
+  def sessionWildcards: List[Type] = {
+    importHandlers filter (_.importsWildcard) map (_.targetType) distinct
+  }
+  def wildcardTypes = languageWildcards ++ sessionWildcards
+
+  def languageSymbols        = languageWildcardSyms flatMap membersAtPickler
+  def sessionImportedSymbols = importHandlers flatMap (_.importedSymbols)
+  def importedSymbols        = languageSymbols ++ sessionImportedSymbols
+  def importedTermSymbols    = importedSymbols collect { case x: TermSymbol => x }
+  def importedTypeSymbols    = importedSymbols collect { case x: TypeSymbol => x }
+  def implicitSymbols        = importedSymbols filter (_.isImplicit)
+
+  def importedTermNamed(name: String): Symbol =
+    importedTermSymbols find (_.name.toString == name) getOrElse NoSymbol
+
+  /** Tuples of (source, imported symbols) in the order they were imported.
+   */
+  def importedSymbolsBySource: List[(Symbol, List[Symbol])] = {
+    val lang    = languageWildcardSyms map (sym => (sym, membersAtPickler(sym)))
+    val session = importHandlers filter (_.targetType != NoType) map { mh =>
+      (mh.targetType.typeSymbol, mh.importedSymbols)
+    }
+
+    lang ++ session
+  }
+  def implicitSymbolsBySource: List[(Symbol, List[Symbol])] = {
+    importedSymbolsBySource map {
+      case (k, vs) => (k, vs filter (_.isImplicit))
+    } filterNot (_._2.isEmpty)
+  }
+
+  /** Compute imports that allow definitions from previous
+   *  requests to be visible in a new request.  Returns
+   *  three pieces of related code:
+   *
+   *  1. An initial code fragment that should go before
+   *  the code of the new request.
+   *
+   *  2. A code fragment that should go after the code
+   *  of the new request.
+   *
+   *  3. An access path which can be traversed to access
+   *  any bindings inside code wrapped by #1 and #2 .
+   *
+   * The argument is a set of Names that need to be imported.
+   *
+   * Limitations: This method is not as precise as it could be.
+   * (1) It does not process wildcard imports to see what exactly
+   * they import.
+   * (2) If it imports any names from a request, it imports all
+   * of them, which is not really necessary.
+   * (3) It imports multiple same-named implicits, but only the
+   * last one imported is actually usable.
+   */
+  case class SparkComputedImports(prepend: String, append: String, access: String)
+  def fallback = System.getProperty("spark.repl.fallback", "false").toBoolean
+
+  protected def importsCode(wanted: Set[Name], definedClass: Boolean): SparkComputedImports = {
+    /** Narrow down the list of requests from which imports
+     *  should be taken.  Removes requests which cannot contribute
+     *  useful imports for the specified set of wanted names.
+     */
+    case class ReqAndHandler(req: Request, handler: MemberHandler) { }
+
+    def reqsToUse: List[ReqAndHandler] = {
+      /** Loop through a list of MemberHandlers and select which ones to keep.
+        * 'wanted' is the set of names that need to be imported.
+       */
+      def select(reqs: List[ReqAndHandler], wanted: Set[Name]): List[ReqAndHandler] = {
+        // Single symbol imports might be implicits! See bug #1752.  Rather than
+        // try to finesse this, we will mimic all imports for now.
+        def keepHandler(handler: MemberHandler) = handler match {
+       /* This case clause tries to "precisely" import only what is required. And in this
+        * it may miss out on some implicits, because implicits are not known in `wanted`. Thus 
+        * it is suitable for defining classes. AFAIK while defining classes implicits are not
+        * needed.*/
+          case h: ImportHandler if definedClass && !fallback => 
+            h.importedNames.exists(x => wanted.contains(x))
+          case _: ImportHandler  => true
+          case x                 => x.definesImplicit || (x.definedNames exists wanted)
+        }
+
+        reqs match {
+          case Nil                                    => Nil
+          case rh :: rest if !keepHandler(rh.handler) => select(rest, wanted)
+          case rh :: rest                             =>
+            import rh.handler._
+            val newWanted = wanted ++ referencedNames -- definedNames -- importedNames
+            rh :: select(rest, newWanted)
+        }
+      }
+
+      /** Flatten the handlers out and pair each with the original request */
+      select(allReqAndHandlers reverseMap { case (r, h) => ReqAndHandler(r, h) }, wanted).reverse
+    }
+
+    val code, trailingBraces, accessPath = new StringBuilder
+    val currentImps = mutable.HashSet[Name]()
+
+    // add code for a new object to hold some imports
+    def addWrapper() {
+      val impname = nme.INTERPRETER_IMPORT_WRAPPER
+      code append "class %sC extends Serializable {\n".format(impname)
+      trailingBraces append "}\nval " + impname + " = new " + impname + "C;\n"
+      accessPath append ("." + impname)
+
+      currentImps.clear
+      // code append "object %s {\n".format(impname)
+      // trailingBraces append "}\n"
+      // accessPath append ("." + impname)
+
+      // currentImps.clear
+    }
+
+    addWrapper()
+
+    // loop through previous requests, adding imports for each one
+    for (ReqAndHandler(req, handler) <- reqsToUse) {
+      handler match {
+        // If the user entered an import, then just use it; add an import wrapping
+        // level if the import might conflict with some other import
+        case x: ImportHandler =>
+          if (x.importsWildcard || currentImps.exists(x.importedNames contains _))
+            addWrapper()
+
+          code append (x.member + "\n")
+
+          // give wildcard imports a import wrapper all to their own
+          if (x.importsWildcard) addWrapper()
+          else currentImps ++= x.importedNames
+
+        // For other requests, import each defined name.
+        // import them explicitly instead of with _, so that
+        // ambiguity errors will not be generated. Also, quote
+        // the name of the variable, so that we don't need to
+        // handle quoting keywords separately.
+        case x: ClassHandler if !fallback =>
+        // I am trying to guess if the import is a defined class
+        // This is an ugly hack, I am not 100% sure of the consequences.
+        // Here we, let everything but "defined classes" use the import with val.
+        // The reason for this is, otherwise the remote executor tries to pull the
+        // classes involved and may fail.
+          for (imv <- x.definedNames) {
+            val objName = req.lineRep.readPath
+            code.append("import " + objName + ".INSTANCE" + req.accessPath + ".`" + imv + "`\n")
+          }
+
+        case x =>
+          for (imv <- x.definedNames) {
+            if (currentImps contains imv) addWrapper()
+            val objName = req.lineRep.readPath
+            val valName = "$VAL" + newValId()
+
+            if(!code.toString.endsWith(".`" + imv + "`;\n")) { // Which means already imported
+                code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
+                code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
+            }
+            // code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
+            // code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
+            // code append ("import " + (req fullPath imv) + "\n")
+            currentImps += imv
+          }
+      }
+    }
+    // add one extra wrapper, to prevent warnings in the common case of
+    // redefining the value bound in the last interpreter request.
+    addWrapper()
+    SparkComputedImports(code.toString, trailingBraces.toString, accessPath.toString)
+  }
+
+  private def allReqAndHandlers =
+    prevRequestList flatMap (req => req.handlers map (req -> _))
+
+  private def membersAtPickler(sym: Symbol): List[Symbol] =
+    beforePickler(sym.info.nonPrivateMembers.toList)
+
+  private var curValId = 0
+
+  private def newValId(): Int = {
+    curValId += 1
+    curValId
+  }
+}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
similarity index 100%
rename from repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
similarity index 100%
rename from repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
similarity index 100%
rename from repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
similarity index 100%
rename from repl/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
diff --git a/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
new file mode 100644
index 0000000000000..91c9c52c3c98a
--- /dev/null
+++ b/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import java.io._
+import java.net.URLClassLoader
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.FunSuite
+import org.apache.spark.SparkContext
+import org.apache.commons.lang3.StringEscapeUtils
+import org.apache.spark.util.Utils
+
+
+class ReplSuite extends FunSuite {
+
+  def runInterpreter(master: String, input: String): String = {
+    val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath"
+
+    val in = new BufferedReader(new StringReader(input + "\n"))
+    val out = new StringWriter()
+    val cl = getClass.getClassLoader
+    var paths = new ArrayBuffer[String]
+    if (cl.isInstanceOf[URLClassLoader]) {
+      val urlLoader = cl.asInstanceOf[URLClassLoader]
+      for (url <- urlLoader.getURLs) {
+        if (url.getProtocol == "file") {
+          paths += url.getFile
+        }
+      }
+    }
+    val classpath = paths.mkString(File.pathSeparator)
+
+    val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
+    System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
+
+    val interp = new SparkILoop(in, new PrintWriter(out), master)
+    org.apache.spark.repl.Main.interp = interp
+    interp.process(Array("-classpath", classpath))
+    org.apache.spark.repl.Main.interp = null
+    if (interp.sparkContext != null) {
+      interp.sparkContext.stop()
+    }
+    if (oldExecutorClasspath != null) {
+      System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath)
+    } else {
+      System.clearProperty(CONF_EXECUTOR_CLASSPATH)
+    }
+    return out.toString
+  }
+
+  def assertContains(message: String, output: String) {
+    val isContain = output.contains(message)
+    assert(isContain,
+      "Interpreter output did not contain '" + message + "':\n" + output)
+  }
+
+  def assertDoesNotContain(message: String, output: String) {
+    val isContain = output.contains(message)
+    assert(!isContain,
+      "Interpreter output contained '" + message + "':\n" + output)
+  }
+
+  test("propagation of local properties") {
+    // A mock ILoop that doesn't install the SIGINT handler.
+    class ILoop(out: PrintWriter) extends SparkILoop(None, out, None) {
+      settings = new scala.tools.nsc.Settings
+      settings.usejavacp.value = true
+      org.apache.spark.repl.Main.interp = this
+      override def createInterpreter() {
+        intp = new SparkILoopInterpreter
+        intp.setContextClassLoader()
+      }
+    }
+
+    val out = new StringWriter()
+    val interp = new ILoop(new PrintWriter(out))
+    interp.sparkContext = new SparkContext("local", "repl-test")
+    interp.createInterpreter()
+    interp.intp.initialize()
+    interp.sparkContext.setLocalProperty("someKey", "someValue")
+
+    // Make sure the value we set in the caller to interpret is propagated in the thread that
+    // interprets the command.
+    interp.interpret("org.apache.spark.repl.Main.interp.sparkContext.getLocalProperty(\"someKey\")")
+    assert(out.toString.contains("someValue"))
+
+    interp.sparkContext.stop()
+    System.clearProperty("spark.driver.port")
+  }
+
+  test("simple foreach with accumulator") {
+    val output = runInterpreter("local",
+      """
+        |val accum = sc.accumulator(0)
+        |sc.parallelize(1 to 10).foreach(x => accum += x)
+        |accum.value
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res1: Int = 55", output)
+  }
+
+  test("external vars") {
+    val output = runInterpreter("local",
+      """
+        |var v = 7
+        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+  }
+
+  test("external classes") {
+    val output = runInterpreter("local",
+      """
+        |class C {
+        |def foo = 5
+        |}
+        |sc.parallelize(1 to 10).map(x => (new C).foo).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 50", output)
+  }
+
+  test("external functions") {
+    val output = runInterpreter("local",
+      """
+        |def double(x: Int) = x + x
+        |sc.parallelize(1 to 10).map(x => double(x)).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 110", output)
+  }
+
+  test("external functions that access vars") {
+    val output = runInterpreter("local",
+      """
+        |var v = 7
+        |def getV() = v
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+  }
+
+  test("broadcast vars") {
+    // Test that the value that a broadcast var had when it was created is used,
+    // even if that variable is then modified in the driver program
+    // TODO: This doesn't actually work for arrays when we run in local mode!
+    val output = runInterpreter("local",
+      """
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |array(0) = 5
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    assertContains("res2: Array[Int] = Array(5, 0, 0, 0, 0)", output)
+  }
+
+  test("interacting with files") {
+    val tempDir = Utils.createTempDir()
+    val out = new FileWriter(tempDir + "/input")
+    out.write("Hello world!\n")
+    out.write("What's up?\n")
+    out.write("Goodbye\n")
+    out.close()
+    val output = runInterpreter("local",
+      """
+        |var file = sc.textFile("%s").cache()
+        |file.count()
+        |file.count()
+        |file.count()
+      """.stripMargin.format(StringEscapeUtils.escapeJava(
+        tempDir.getAbsolutePath + File.separator + "input")))
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Long = 3", output)
+    assertContains("res1: Long = 3", output)
+    assertContains("res2: Long = 3", output)
+    Utils.deleteRecursively(tempDir)
+  }
+
+  test("local-cluster mode") {
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |var v = 7
+        |def getV() = v
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |array(0) = 5
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+    assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+  }
+
+  test("SPARK-1199 two instances of same class don't type check.") {
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |case class Sum(exp: String, exp2: String)
+        |val a = Sum("A", "B")
+        |def b(a: Sum): String = a match { case Sum(_, _) => "Found Sum" }
+        |b(a)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2452 compound statements.") {
+    val output = runInterpreter("local",
+      """
+        |val x = 4 ; def f() = x
+        |f()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2576 importing SQLContext.createSchemaRDD.") {
+    // We need to use local-cluster to test this case.
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
+        |import sqlContext.createSchemaRDD
+        |case class TestCaseClass(value: Int)
+        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toSchemaRDD.collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2632 importing a method from non serializable class and not using it.") {
+    val output = runInterpreter("local",
+    """
+      |class TestClass() { def testMethod = 3 }
+      |val t = new TestClass
+      |import t.testMethod
+      |case class TestCaseClass(value: Int)
+      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect
+    """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  if (System.getenv("MESOS_NATIVE_LIBRARY") != null) {
+    test("running on Mesos") {
+      val output = runInterpreter("localquiet",
+        """
+          |var v = 7
+          |def getV() = v
+          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |v = 10
+          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |var array = new Array[Int](5)
+          |val broadcastArray = sc.broadcast(array)
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+          |array(0) = 5
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        """.stripMargin)
+      assertDoesNotContain("error:", output)
+      assertDoesNotContain("Exception", output)
+      assertContains("res0: Int = 70", output)
+      assertContains("res1: Int = 100", output)
+      assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+      assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    }
+  }
+
+  test("collecting objects of class defined in repl") {
+    val output = runInterpreter("local[2]",
+      """
+        |case class Foo(i: Int)
+        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("ret: Array[Foo] = Array(Foo(1),", output)
+  }
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
new file mode 100644
index 0000000000000..5e93a71995072
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import org.apache.spark.util.Utils
+import org.apache.spark._
+
+import scala.tools.nsc.Settings
+import scala.tools.nsc.interpreter.SparkILoop
+
+object Main extends Logging {
+
+  val conf = new SparkConf()
+  val tmp = System.getProperty("java.io.tmpdir")
+  val rootDir = conf.get("spark.repl.classdir", tmp)
+  val outputDir = Utils.createTempDir(rootDir)
+  val s = new Settings()
+  s.processArguments(List("-Yrepl-class-based",
+    "-Yrepl-outdir", s"${outputDir.getAbsolutePath}", "-Yrepl-sync"), true)
+  val classServer = new HttpServer(outputDir, new SecurityManager(conf))
+  var sparkContext: SparkContext = _
+  var interp = new SparkILoop // this is a public var because tests reset it.
+
+  def main(args: Array[String]) {
+    if (getMaster == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true")
+    // Start the classServer and store its URI in a spark system property
+    // (which will be passed to executors so that they can connect to it)
+    classServer.start()
+    interp.process(s) // Repl starts and goes in loop of R.E.P.L
+    classServer.stop()
+    Option(sparkContext).map(_.stop)
+  }
+
+
+  def getAddedJars: Array[String] = {
+    val envJars = sys.env.get("ADD_JARS")
+    val propJars = sys.props.get("spark.jars").flatMap { p => if (p == "") None else Some(p) }
+    val jars = propJars.orElse(envJars).getOrElse("")
+    Utils.resolveURIs(jars).split(",").filter(_.nonEmpty)
+  }
+
+  def createSparkContext(): SparkContext = {
+    val execUri = System.getenv("SPARK_EXECUTOR_URI")
+    val jars = getAddedJars
+    val conf = new SparkConf()
+      .setMaster(getMaster)
+      .setAppName("Spark shell")
+      .setJars(jars)
+      .set("spark.repl.class.uri", classServer.uri)
+    logInfo("Spark class server started at " + classServer.uri)
+    if (execUri != null) {
+      conf.set("spark.executor.uri", execUri)
+    }
+    if (System.getenv("SPARK_HOME") != null) {
+      conf.setSparkHome(System.getenv("SPARK_HOME"))
+    }
+    sparkContext = new SparkContext(conf)
+    logInfo("Created spark context..")
+    sparkContext
+  }
+
+  private def getMaster: String = {
+    val master = {
+      val envMaster = sys.env.get("MASTER")
+      val propMaster = sys.props.get("spark.master")
+      propMaster.orElse(envMaster).getOrElse("local[*]")
+    }
+    master
+  }
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
new file mode 100644
index 0000000000000..8e519fa67f649
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
@@ -0,0 +1,86 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Paul Phillips
+ */
+
+package scala.tools.nsc
+package interpreter
+
+import scala.tools.nsc.ast.parser.Tokens.EOF
+
+trait SparkExprTyper {
+  val repl: SparkIMain
+
+  import repl._
+  import global.{ reporter => _, Import => _, _ }
+  import naming.freshInternalVarName
+
+  def symbolOfLine(code: String): Symbol = {
+    def asExpr(): Symbol = {
+      val name  = freshInternalVarName()
+      // Typing it with a lazy val would give us the right type, but runs
+      // into compiler bugs with things like existentials, so we compile it
+      // behind a def and strip the NullaryMethodType which wraps the expr.
+      val line = "def " + name + " = " + code
+
+      interpretSynthetic(line) match {
+        case IR.Success =>
+          val sym0 = symbolOfTerm(name)
+          // drop NullaryMethodType
+          sym0.cloneSymbol setInfo exitingTyper(sym0.tpe_*.finalResultType)
+        case _          => NoSymbol
+      }
+    }
+    def asDefn(): Symbol = {
+      val old = repl.definedSymbolList.toSet
+
+      interpretSynthetic(code) match {
+        case IR.Success =>
+          repl.definedSymbolList filterNot old match {
+            case Nil        => NoSymbol
+            case sym :: Nil => sym
+            case syms       => NoSymbol.newOverloaded(NoPrefix, syms)
+          }
+        case _ => NoSymbol
+      }
+    }
+    def asError(): Symbol = {
+      interpretSynthetic(code)
+      NoSymbol
+    }
+    beSilentDuring(asExpr()) orElse beSilentDuring(asDefn()) orElse asError()
+  }
+
+  private var typeOfExpressionDepth = 0
+  def typeOfExpression(expr: String, silent: Boolean = true): Type = {
+    if (typeOfExpressionDepth > 2) {
+      repldbg("Terminating typeOfExpression recursion for expression: " + expr)
+      return NoType
+    }
+    typeOfExpressionDepth += 1
+    // Don't presently have a good way to suppress undesirable success output
+    // while letting errors through, so it is first trying it silently: if there
+    // is an error, and errors are desired, then it re-evaluates non-silently
+    // to induce the error message.
+    try beSilentDuring(symbolOfLine(expr).tpe) match {
+      case NoType if !silent => symbolOfLine(expr).tpe // generate error
+      case tpe               => tpe
+    }
+    finally typeOfExpressionDepth -= 1
+  }
+
+  // This only works for proper types.
+  def typeOfTypeString(typeString: String): Type = {
+    def asProperType(): Option[Type] = {
+      val name = freshInternalVarName()
+      val line = "def %s: %s = ???" format (name, typeString)
+      interpretSynthetic(line) match {
+        case IR.Success =>
+          val sym0 = symbolOfTerm(name)
+          Some(sym0.asMethod.returnType)
+        case _          => None
+      }
+    }
+    beSilentDuring(asProperType()) getOrElse NoType
+  }
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
new file mode 100644
index 0000000000000..a591e9fc4622b
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -0,0 +1,966 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author Alexander Spoon
+ */
+
+package scala
+package tools.nsc
+package interpreter
+
+import scala.language.{ implicitConversions, existentials }
+import scala.annotation.tailrec
+import Predef.{ println => _, _ }
+import interpreter.session._
+import StdReplTags._
+import scala.reflect.api.{Mirror, Universe, TypeCreator}
+import scala.util.Properties.{ jdkHome, javaVersion, versionString, javaVmName }
+import scala.tools.nsc.util.{ ClassPath, Exceptional, stringFromWriter, stringFromStream }
+import scala.reflect.{ClassTag, classTag}
+import scala.reflect.internal.util.{ BatchSourceFile, ScalaClassLoader }
+import ScalaClassLoader._
+import scala.reflect.io.{ File, Directory }
+import scala.tools.util._
+import scala.collection.generic.Clearable
+import scala.concurrent.{ ExecutionContext, Await, Future, future }
+import ExecutionContext.Implicits._
+import java.io.{ BufferedReader, FileReader }
+
+/** The Scala interactive shell.  It provides a read-eval-print loop
+  *  around the Interpreter class.
+  *  After instantiation, clients should call the main() method.
+  *
+  *  If no in0 is specified, then input will come from the console, and
+  *  the class will attempt to provide input editing feature such as
+  *  input history.
+  *
+  *  @author Moez A. Abdel-Gawad
+  *  @author  Lex Spoon
+  *  @version 1.2
+  */
+class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter)
+  extends AnyRef
+  with LoopCommands
+{
+  def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out)
+  def this() = this(None, new JPrintWriter(Console.out, true))
+//
+//  @deprecated("Use `intp` instead.", "2.9.0") def interpreter = intp
+//  @deprecated("Use `intp` instead.", "2.9.0") def interpreter_= (i: Interpreter): Unit = intp = i
+
+  var in: InteractiveReader = _   // the input stream from which commands come
+  var settings: Settings = _
+  var intp: SparkIMain = _
+
+  var globalFuture: Future[Boolean] = _
+
+  protected def asyncMessage(msg: String) {
+    if (isReplInfo || isReplPower)
+      echoAndRefresh(msg)
+  }
+
+  def initializeSpark() {
+    intp.beQuietDuring {
+      command( """
+         @transient val sc = org.apache.spark.repl.Main.createSparkContext();
+               """)
+      command("import org.apache.spark.SparkContext._")
+    }
+    echo("Spark context available as sc.")
+  }
+
+  /** Print a welcome message */
+  def printWelcome() {
+    import org.apache.spark.SPARK_VERSION
+    echo("""Welcome to
+      ____              __
+     / __/__  ___ _____/ /__
+    _\ \/ _ \/ _ `/ __/  '_/
+   /___/ .__/\_,_/_/ /_/\_\   version %s
+      /_/
+         """.format(SPARK_VERSION))
+    val welcomeMsg = "Using Scala %s (%s, Java %s)".format(
+      versionString, javaVmName, javaVersion)
+    echo(welcomeMsg)
+    echo("Type in expressions to have them evaluated.")
+    echo("Type :help for more information.")
+  }
+
+  override def echoCommandMessage(msg: String) {
+    intp.reporter printUntruncatedMessage msg
+  }
+
+  // lazy val power = new Power(intp, new StdReplVals(this))(tagOfStdReplVals, classTag[StdReplVals])
+  def history = in.history
+
+  // classpath entries added via :cp
+  var addedClasspath: String = ""
+
+  /** A reverse list of commands to replay if the user requests a :replay */
+  var replayCommandStack: List[String] = Nil
+
+  /** A list of commands to replay if the user requests a :replay */
+  def replayCommands = replayCommandStack.reverse
+
+  /** Record a command for replay should the user request a :replay */
+  def addReplay(cmd: String) = replayCommandStack ::= cmd
+
+  def savingReplayStack[T](body: => T): T = {
+    val saved = replayCommandStack
+    try body
+    finally replayCommandStack = saved
+  }
+  def savingReader[T](body: => T): T = {
+    val saved = in
+    try body
+    finally in = saved
+  }
+
+  /** Close the interpreter and set the var to null. */
+  def closeInterpreter() {
+    if (intp ne null) {
+      intp.close()
+      intp = null
+    }
+  }
+
+  class SparkILoopInterpreter extends SparkIMain(settings, out) {
+    outer =>
+
+    override lazy val formatting = new Formatting {
+      def prompt = SparkILoop.this.prompt
+    }
+    override protected def parentClassLoader =
+      settings.explicitParentLoader.getOrElse( classOf[SparkILoop].getClassLoader )
+  }
+
+  /** Create a new interpreter. */
+  def createInterpreter() {
+    if (addedClasspath != "")
+      settings.classpath append addedClasspath
+
+    intp = new SparkILoopInterpreter
+  }
+
+  /** print a friendly help message */
+  def helpCommand(line: String): Result = {
+    if (line == "") helpSummary()
+    else uniqueCommand(line) match {
+      case Some(lc) => echo("\n" + lc.help)
+      case _        => ambiguousError(line)
+    }
+  }
+  private def helpSummary() = {
+    val usageWidth  = commands map (_.usageMsg.length) max
+    val formatStr   = "%-" + usageWidth + "s %s"
+
+    echo("All commands can be abbreviated, e.g. :he instead of :help.")
+
+    commands foreach { cmd =>
+      echo(formatStr.format(cmd.usageMsg, cmd.help))
+    }
+  }
+  private def ambiguousError(cmd: String): Result = {
+    matchingCommands(cmd) match {
+      case Nil  => echo(cmd + ": no such command.  Type :help for help.")
+      case xs   => echo(cmd + " is ambiguous: did you mean " + xs.map(":" + _.name).mkString(" or ") + "?")
+    }
+    Result(keepRunning = true, None)
+  }
+  private def matchingCommands(cmd: String) = commands filter (_.name startsWith cmd)
+  private def uniqueCommand(cmd: String): Option[LoopCommand] = {
+    // this lets us add commands willy-nilly and only requires enough command to disambiguate
+    matchingCommands(cmd) match {
+      case List(x)  => Some(x)
+      // exact match OK even if otherwise appears ambiguous
+      case xs       => xs find (_.name == cmd)
+    }
+  }
+
+  /** Show the history */
+  lazy val historyCommand = new LoopCommand("history", "show the history (optional num is commands to show)") {
+    override def usage = "[num]"
+    def defaultLines = 20
+
+    def apply(line: String): Result = {
+      if (history eq NoHistory)
+        return "No history available."
+
+      val xs      = words(line)
+      val current = history.index
+      val count   = try xs.head.toInt catch { case _: Exception => defaultLines }
+      val lines   = history.asStrings takeRight count
+      val offset  = current - lines.size + 1
+
+      for ((line, index) <- lines.zipWithIndex)
+        echo("%3d  %s".format(index + offset, line))
+    }
+  }
+
+  // When you know you are most likely breaking into the middle
+  // of a line being typed.  This softens the blow.
+  protected def echoAndRefresh(msg: String) = {
+    echo("\n" + msg)
+    in.redrawLine()
+  }
+  protected def echo(msg: String) = {
+    out println msg
+    out.flush()
+  }
+
+  /** Search the history */
+  def searchHistory(_cmdline: String) {
+    val cmdline = _cmdline.toLowerCase
+    val offset  = history.index - history.size + 1
+
+    for ((line, index) <- history.asStrings.zipWithIndex ; if line.toLowerCase contains cmdline)
+      echo("%d %s".format(index + offset, line))
+  }
+
+  private val currentPrompt = Properties.shellPromptString
+
+  /** Prompt to print when awaiting input */
+  def prompt = currentPrompt
+
+  import LoopCommand.{ cmd, nullary }
+
+  /** Standard commands **/
+  lazy val standardCommands = List(
+    cmd("cp", "<path>", "add a jar or directory to the classpath", addClasspath),
+    cmd("edit", "<id>|<line>", "edit history", editCommand),
+    cmd("help", "[command]", "print this summary or command-specific help", helpCommand),
+    historyCommand,
+    cmd("h?", "<string>", "search the history", searchHistory),
+    cmd("imports", "[name name ...]", "show import history, identifying sources of names", importsCommand),
+    //cmd("implicits", "[-v]", "show the implicits in scope", intp.implicitsCommand),
+    cmd("javap", "<path|class>", "disassemble a file or class name", javapCommand),
+    cmd("line", "<id>|<line>", "place line(s) at the end of history", lineCommand),
+    cmd("load", "<path>", "interpret lines in a file", loadCommand),
+    cmd("paste", "[-raw] [path]", "enter paste mode or paste a file", pasteCommand),
+    // nullary("power", "enable power user mode", powerCmd),
+    nullary("quit", "exit the interpreter", () => Result(keepRunning = false, None)),
+    nullary("replay", "reset execution and replay all previous commands", replay),
+    nullary("reset", "reset the repl to its initial state, forgetting all session entries", resetCommand),
+    cmd("save", "<path>", "save replayable session to a file", saveCommand),
+    shCommand,
+    cmd("settings", "[+|-]<options>", "+enable/-disable flags, set compiler options", changeSettings),
+    nullary("silent", "disable/enable automatic printing of results", verbosity),
+//    cmd("type", "[-v] <expr>", "display the type of an expression without evaluating it", typeCommand),
+//    cmd("kind", "[-v] <expr>", "display the kind of expression's type", kindCommand),
+    nullary("warnings", "show the suppressed warnings from the most recent line which had any", warningsCommand)
+  )
+
+  /** Power user commands */
+//  lazy val powerCommands: List[LoopCommand] = List(
+//    cmd("phase", "<phase>", "set the implicit phase for power commands", phaseCommand)
+//  )
+
+  private def importsCommand(line: String): Result = {
+    val tokens    = words(line)
+    val handlers  = intp.languageWildcardHandlers ++ intp.importHandlers
+
+    handlers.filterNot(_.importedSymbols.isEmpty).zipWithIndex foreach {
+      case (handler, idx) =>
+        val (types, terms) = handler.importedSymbols partition (_.name.isTypeName)
+        val imps           = handler.implicitSymbols
+        val found          = tokens filter (handler importsSymbolNamed _)
+        val typeMsg        = if (types.isEmpty) "" else types.size + " types"
+        val termMsg        = if (terms.isEmpty) "" else terms.size + " terms"
+        val implicitMsg    = if (imps.isEmpty) "" else imps.size + " are implicit"
+        val foundMsg       = if (found.isEmpty) "" else found.mkString(" // imports: ", ", ", "")
+        val statsMsg       = List(typeMsg, termMsg, implicitMsg) filterNot (_ == "") mkString ("(", ", ", ")")
+
+        intp.reporter.printMessage("%2d) %-30s %s%s".format(
+          idx + 1,
+          handler.importString,
+          statsMsg,
+          foundMsg
+        ))
+    }
+  }
+
+  private def findToolsJar() = PathResolver.SupplementalLocations.platformTools
+
+  private def addToolsJarToLoader() = {
+    val cl = findToolsJar() match {
+      case Some(tools) => ScalaClassLoader.fromURLs(Seq(tools.toURL), intp.classLoader)
+      case _           => intp.classLoader
+    }
+    if (Javap.isAvailable(cl)) {
+      repldbg(":javap available.")
+      cl
+    }
+    else {
+      repldbg(":javap unavailable: no tools.jar at " + jdkHome)
+      intp.classLoader
+    }
+  }
+//
+//  protected def newJavap() =
+//    JavapClass(addToolsJarToLoader(), new IMain.ReplStrippingWriter(intp), Some(intp))
+//
+//  private lazy val javap = substituteAndLog[Javap]("javap", NoJavap)(newJavap())
+
+  // Still todo: modules.
+//  private def typeCommand(line0: String): Result = {
+//    line0.trim match {
+//      case "" => ":type [-v] <expression>"
+//      case s  => intp.typeCommandInternal(s stripPrefix "-v " trim, verbose = s startsWith "-v ")
+//    }
+//  }
+
+//  private def kindCommand(expr: String): Result = {
+//    expr.trim match {
+//      case "" => ":kind [-v] <expression>"
+//      case s  => intp.kindCommandInternal(s stripPrefix "-v " trim, verbose = s startsWith "-v ")
+//    }
+//  }
+
+  private def warningsCommand(): Result = {
+    if (intp.lastWarnings.isEmpty)
+      "Can't find any cached warnings."
+    else
+      intp.lastWarnings foreach { case (pos, msg) => intp.reporter.warning(pos, msg) }
+  }
+
+  private def changeSettings(args: String): Result = {
+    def showSettings() = {
+      for (s <- settings.userSetSettings.toSeq.sorted) echo(s.toString)
+    }
+    def updateSettings() = {
+      // put aside +flag options
+      val (pluses, rest) = (args split "\\s+").toList partition (_.startsWith("+"))
+      val tmps = new Settings
+      val (ok, leftover) = tmps.processArguments(rest, processAll = true)
+      if (!ok) echo("Bad settings request.")
+      else if (leftover.nonEmpty) echo("Unprocessed settings.")
+      else {
+        // boolean flags set-by-user on tmp copy should be off, not on
+        val offs = tmps.userSetSettings filter (_.isInstanceOf[Settings#BooleanSetting])
+        val (minuses, nonbools) = rest partition (arg => offs exists (_ respondsTo arg))
+        // update non-flags
+        settings.processArguments(nonbools, processAll = true)
+        // also snag multi-value options for clearing, e.g. -Ylog: and -language:
+        for {
+          s <- settings.userSetSettings
+          if s.isInstanceOf[Settings#MultiStringSetting] || s.isInstanceOf[Settings#PhasesSetting]
+          if nonbools exists (arg => arg.head == '-' && arg.last == ':' && (s respondsTo arg.init))
+        } s match {
+          case c: Clearable => c.clear()
+          case _ =>
+        }
+        def update(bs: Seq[String], name: String=>String, setter: Settings#Setting=>Unit) = {
+          for (b <- bs)
+            settings.lookupSetting(name(b)) match {
+              case Some(s) =>
+                if (s.isInstanceOf[Settings#BooleanSetting]) setter(s)
+                else echo(s"Not a boolean flag: $b")
+              case _ =>
+                echo(s"Not an option: $b")
+            }
+        }
+        update(minuses, identity, _.tryToSetFromPropertyValue("false"))  // turn off
+        update(pluses, "-" + _.drop(1), _.tryToSet(Nil))                 // turn on
+      }
+    }
+    if (args.isEmpty) showSettings() else updateSettings()
+  }
+
+  private def javapCommand(line: String): Result = {
+//    if (javap == null)
+//      ":javap unavailable, no tools.jar at %s.  Set JDK_HOME.".format(jdkHome)
+//    else if (line == "")
+//      ":javap [-lcsvp] [path1 path2 ...]"
+//    else
+//      javap(words(line)) foreach { res =>
+//        if (res.isError) return "Failed: " + res.value
+//        else res.show()
+//      }
+  }
+
+  private def pathToPhaseWrapper = intp.originalPath("$r") + ".phased.atCurrent"
+
+  private def phaseCommand(name: String): Result = {
+//    val phased: Phased = power.phased
+//    import phased.NoPhaseName
+//
+//    if (name == "clear") {
+//      phased.set(NoPhaseName)
+//      intp.clearExecutionWrapper()
+//      "Cleared active phase."
+//    }
+//    else if (name == "") phased.get match {
+//      case NoPhaseName => "Usage: :phase <expr> (e.g. typer, erasure.next, erasure+3)"
+//      case ph          => "Active phase is '%s'.  (To clear, :phase clear)".format(phased.get)
+//    }
+//    else {
+//      val what = phased.parse(name)
+//      if (what.isEmpty || !phased.set(what))
+//        "'" + name + "' does not appear to represent a valid phase."
+//      else {
+//        intp.setExecutionWrapper(pathToPhaseWrapper)
+//        val activeMessage =
+//          if (what.toString.length == name.length) "" + what
+//          else "%s (%s)".format(what, name)
+//
+//        "Active phase is now: " + activeMessage
+//      }
+//    }
+  }
+
+  /** Available commands */
+  def commands: List[LoopCommand] = standardCommands ++ (
+    // if (isReplPower)
+    //  powerCommands
+    // else
+      Nil
+    )
+
+  val replayQuestionMessage =
+    """|That entry seems to have slain the compiler.  Shall I replay
+      |your session? I can re-run each line except the last one.
+      |[y/n]
+    """.trim.stripMargin
+
+  private val crashRecovery: PartialFunction[Throwable, Boolean] = {
+    case ex: Throwable =>
+      val (err, explain) = (
+        if (intp.isInitializeComplete)
+          (intp.global.throwableAsString(ex), "")
+        else
+          (ex.getMessage, "The compiler did not initialize.\n")
+        )
+      echo(err)
+
+      ex match {
+        case _: NoSuchMethodError | _: NoClassDefFoundError =>
+          echo("\nUnrecoverable error.")
+          throw ex
+        case _  =>
+          def fn(): Boolean =
+            try in.readYesOrNo(explain + replayQuestionMessage, { echo("\nYou must enter y or n.") ; fn() })
+            catch { case _: RuntimeException => false }
+
+          if (fn()) replay()
+          else echo("\nAbandoning crashed session.")
+      }
+      true
+  }
+
+  // return false if repl should exit
+  def processLine(line: String): Boolean = {
+    import scala.concurrent.duration._
+    Await.ready(globalFuture, 60.seconds)
+
+    (line ne null) && (command(line) match {
+      case Result(false, _)      => false
+      case Result(_, Some(line)) => addReplay(line) ; true
+      case _                     => true
+    })
+  }
+
+  private def readOneLine() = {
+    out.flush()
+    in readLine prompt
+  }
+
+  /** The main read-eval-print loop for the repl.  It calls
+    *  command() for each line of input, and stops when
+    *  command() returns false.
+    */
+  @tailrec final def loop() {
+    if ( try processLine(readOneLine()) catch crashRecovery )
+      loop()
+  }
+
+  /** interpret all lines from a specified file */
+  def interpretAllFrom(file: File) {
+    savingReader {
+      savingReplayStack {
+        file applyReader { reader =>
+          in = SimpleReader(reader, out, interactive = false)
+          echo("Loading " + file + "...")
+          loop()
+        }
+      }
+    }
+  }
+
+  /** create a new interpreter and replay the given commands */
+  def replay() {
+    reset()
+    if (replayCommandStack.isEmpty)
+      echo("Nothing to replay.")
+    else for (cmd <- replayCommands) {
+      echo("Replaying: " + cmd)  // flush because maybe cmd will have its own output
+      command(cmd)
+      echo("")
+    }
+  }
+  def resetCommand() {
+    echo("Resetting interpreter state.")
+    if (replayCommandStack.nonEmpty) {
+      echo("Forgetting this session history:\n")
+      replayCommands foreach echo
+      echo("")
+      replayCommandStack = Nil
+    }
+    if (intp.namedDefinedTerms.nonEmpty)
+      echo("Forgetting all expression results and named terms: " + intp.namedDefinedTerms.mkString(", "))
+    if (intp.definedTypes.nonEmpty)
+      echo("Forgetting defined types: " + intp.definedTypes.mkString(", "))
+
+    reset()
+  }
+  def reset() {
+    intp.reset()
+    unleashAndSetPhase()
+  }
+
+  def lineCommand(what: String): Result = editCommand(what, None)
+
+  // :edit id or :edit line
+  def editCommand(what: String): Result = editCommand(what, Properties.envOrNone("EDITOR"))
+
+  def editCommand(what: String, editor: Option[String]): Result = {
+    def diagnose(code: String) = {
+      echo("The edited code is incomplete!\n")
+      val errless = intp compileSources new BatchSourceFile("<pastie>", s"object pastel {\n$code\n}")
+      if (errless) echo("The compiler reports no errors.")
+    }
+    def historicize(text: String) = history match {
+      case jlh: JLineHistory => text.lines foreach jlh.add ; jlh.moveToEnd() ; true
+      case _ => false
+    }
+    def edit(text: String): Result = editor match {
+      case Some(ed) =>
+        val tmp = File.makeTemp()
+        tmp.writeAll(text)
+        try {
+          val pr = new ProcessResult(s"$ed ${tmp.path}")
+          pr.exitCode match {
+            case 0 =>
+              tmp.safeSlurp() match {
+                case Some(edited) if edited.trim.isEmpty => echo("Edited text is empty.")
+                case Some(edited) =>
+                  echo(edited.lines map ("+" + _) mkString "\n")
+                  val res = intp interpret edited
+                  if (res == IR.Incomplete) diagnose(edited)
+                  else {
+                    historicize(edited)
+                    Result(lineToRecord = Some(edited), keepRunning = true)
+                  }
+                case None => echo("Can't read edited text. Did you delete it?")
+              }
+            case x => echo(s"Error exit from $ed ($x), ignoring")
+          }
+        } finally {
+          tmp.delete()
+        }
+      case None =>
+        if (historicize(text)) echo("Placing text in recent history.")
+        else echo(f"No EDITOR defined and you can't change history, echoing your text:%n$text")
+    }
+
+    // if what is a number, use it as a line number or range in history
+    def isNum = what forall (c => c.isDigit || c == '-' || c == '+')
+    // except that "-" means last value
+    def isLast = (what == "-")
+    if (isLast || !isNum) {
+      val name = if (isLast) intp.mostRecentVar else what
+      val sym = intp.symbolOfIdent(name)
+      intp.prevRequestList collectFirst { case r if r.defines contains sym => r } match {
+        case Some(req) => edit(req.line)
+        case None      => echo(s"No symbol in scope: $what")
+      }
+    } else try {
+      val s = what
+      // line 123, 120+3, -3, 120-123, 120-, note -3 is not 0-3 but (cur-3,cur)
+      val (start, len) =
+        if ((s indexOf '+') > 0) {
+          val (a,b) = s splitAt (s indexOf '+')
+          (a.toInt, b.drop(1).toInt)
+        } else {
+          (s indexOf '-') match {
+            case -1 => (s.toInt, 1)
+            case 0  => val n = s.drop(1).toInt ; (history.index - n, n)
+            case _ if s.last == '-' => val n = s.init.toInt ; (n, history.index - n)
+            case i  => val n = s.take(i).toInt ; (n, s.drop(i+1).toInt - n)
+          }
+        }
+      import scala.collection.JavaConverters._
+      val index = (start - 1) max 0
+      val text = history match {
+        case jlh: JLineHistory => jlh.entries(index).asScala.take(len) map (_.value) mkString "\n"
+        case _ => history.asStrings.slice(index, index + len) mkString "\n"
+      }
+      edit(text)
+    } catch {
+      case _: NumberFormatException => echo(s"Bad range '$what'")
+        echo("Use line 123, 120+3, -3, 120-123, 120-, note -3 is not 0-3 but (cur-3,cur)")
+    }
+  }
+
+  /** fork a shell and run a command */
+  lazy val shCommand = new LoopCommand("sh", "run a shell command (result is implicitly => List[String])") {
+    override def usage = "<command line>"
+    def apply(line: String): Result = line match {
+      case ""   => showUsage()
+      case _    =>
+        val toRun = s"new ${classOf[ProcessResult].getName}(${string2codeQuoted(line)})"
+        intp interpret toRun
+        ()
+    }
+  }
+
+  def withFile[A](filename: String)(action: File => A): Option[A] = {
+    val res = Some(File(filename)) filter (_.exists) map action
+    if (res.isEmpty) echo("That file does not exist")  // courtesy side-effect
+    res
+  }
+
+  def loadCommand(arg: String) = {
+    var shouldReplay: Option[String] = None
+    withFile(arg)(f => {
+      interpretAllFrom(f)
+      shouldReplay = Some(":load " + arg)
+    })
+    Result(keepRunning = true, shouldReplay)
+  }
+
+  def saveCommand(filename: String): Result = (
+    if (filename.isEmpty) echo("File name is required.")
+    else if (replayCommandStack.isEmpty) echo("No replay commands in session")
+    else File(filename).printlnAll(replayCommands: _*)
+    )
+
+  def addClasspath(arg: String): Unit = {
+    val f = File(arg).normalize
+    if (f.exists) {
+      addedClasspath = ClassPath.join(addedClasspath, f.path)
+      val totalClasspath = ClassPath.join(settings.classpath.value, addedClasspath)
+      echo("Added '%s'.  Your new classpath is:\n\"%s\"".format(f.path, totalClasspath))
+      replay()
+    }
+    else echo("The path '" + f + "' doesn't seem to exist.")
+  }
+
+  def powerCmd(): Result = {
+    if (isReplPower) "Already in power mode."
+    else enablePowerMode(isDuringInit = false)
+  }
+  def enablePowerMode(isDuringInit: Boolean) = {
+    replProps.power setValue true
+    unleashAndSetPhase()
+    // asyncEcho(isDuringInit, power.banner)
+  }
+  private def unleashAndSetPhase() {
+    if (isReplPower) {
+    //  power.unleash()
+      // Set the phase to "typer"
+      // intp beSilentDuring phaseCommand("typer")
+    }
+  }
+
+  def asyncEcho(async: Boolean, msg: => String) {
+    if (async) asyncMessage(msg)
+    else echo(msg)
+  }
+
+  def verbosity() = {
+    val old = intp.printResults
+    intp.printResults = !old
+    echo("Switched " + (if (old) "off" else "on") + " result printing.")
+  }
+
+  /** Run one command submitted by the user.  Two values are returned:
+    * (1) whether to keep running, (2) the line to record for replay,
+    * if any. */
+  def command(line: String): Result = {
+    if (line startsWith ":") {
+      val cmd = line.tail takeWhile (x => !x.isWhitespace)
+      uniqueCommand(cmd) match {
+        case Some(lc) => lc(line.tail stripPrefix cmd dropWhile (_.isWhitespace))
+        case _        => ambiguousError(cmd)
+      }
+    }
+    else if (intp.global == null) Result(keepRunning = false, None)  // Notice failure to create compiler
+    else Result(keepRunning = true, interpretStartingWith(line))
+  }
+
+  private def readWhile(cond: String => Boolean) = {
+    Iterator continually in.readLine("") takeWhile (x => x != null && cond(x))
+  }
+
+  def pasteCommand(arg: String): Result = {
+    var shouldReplay: Option[String] = None
+    def result = Result(keepRunning = true, shouldReplay)
+    val (raw, file) =
+      if (arg.isEmpty) (false, None)
+      else {
+        val r = """(-raw)?(\s+)?([^\-]\S*)?""".r
+        arg match {
+          case r(flag, sep, name) =>
+            if (flag != null && name != null && sep == null)
+              echo(s"""I assume you mean "$flag $name"?""")
+            (flag != null, Option(name))
+          case _ =>
+            echo("usage: :paste -raw file")
+            return result
+        }
+      }
+    val code = file match {
+      case Some(name) =>
+        withFile(name)(f => {
+          shouldReplay = Some(s":paste $arg")
+          val s = f.slurp.trim
+          if (s.isEmpty) echo(s"File contains no code: $f")
+          else echo(s"Pasting file $f...")
+          s
+        }) getOrElse ""
+      case None =>
+        echo("// Entering paste mode (ctrl-D to finish)\n")
+        val text = (readWhile(_ => true) mkString "\n").trim
+        if (text.isEmpty) echo("\n// Nothing pasted, nothing gained.\n")
+        else echo("\n// Exiting paste mode, now interpreting.\n")
+        text
+    }
+    def interpretCode() = {
+      val res = intp interpret code
+      // if input is incomplete, let the compiler try to say why
+      if (res == IR.Incomplete) {
+        echo("The pasted code is incomplete!\n")
+        // Remembrance of Things Pasted in an object
+        val errless = intp compileSources new BatchSourceFile("<pastie>", s"object pastel {\n$code\n}")
+        if (errless) echo("...but compilation found no error? Good luck with that.")
+      }
+    }
+    def compileCode() = {
+      val errless = intp compileSources new BatchSourceFile("<pastie>", code)
+      if (!errless) echo("There were compilation errors!")
+    }
+    if (code.nonEmpty) {
+      if (raw) compileCode() else interpretCode()
+    }
+    result
+  }
+
+  private object paste extends Pasted {
+    val ContinueString = "     | "
+    val PromptString   = "scala> "
+
+    def interpret(line: String): Unit = {
+      echo(line.trim)
+      intp interpret line
+      echo("")
+    }
+
+    def transcript(start: String) = {
+      echo("\n// Detected repl transcript paste: ctrl-D to finish.\n")
+      apply(Iterator(start) ++ readWhile(_.trim != PromptString.trim))
+    }
+  }
+  import paste.{ ContinueString, PromptString }
+
+  /** Interpret expressions starting with the first line.
+    * Read lines until a complete compilation unit is available
+    * or until a syntax error has been seen.  If a full unit is
+    * read, go ahead and interpret it.  Return the full string
+    * to be recorded for replay, if any.
+    */
+  def interpretStartingWith(code: String): Option[String] = {
+    // signal completion non-completion input has been received
+    in.completion.resetVerbosity()
+
+    def reallyInterpret = {
+      val reallyResult = intp.interpret(code)
+      (reallyResult, reallyResult match {
+        case IR.Error       => None
+        case IR.Success     => Some(code)
+        case IR.Incomplete  =>
+          if (in.interactive && code.endsWith("\n\n")) {
+            echo("You typed two blank lines.  Starting a new command.")
+            None
+          }
+          else in.readLine(ContinueString) match {
+            case null =>
+              // we know compilation is going to fail since we're at EOF and the
+              // parser thinks the input is still incomplete, but since this is
+              // a file being read non-interactively we want to fail.  So we send
+              // it straight to the compiler for the nice error message.
+              intp.compileString(code)
+              None
+
+            case line => interpretStartingWith(code + "\n" + line)
+          }
+      })
+    }
+
+    /** Here we place ourselves between the user and the interpreter and examine
+      *  the input they are ostensibly submitting.  We intervene in several cases:
+      *
+      *  1) If the line starts with "scala> " it is assumed to be an interpreter paste.
+      *  2) If the line starts with "." (but not ".." or "./") it is treated as an invocation
+      *     on the previous result.
+      *  3) If the Completion object's execute returns Some(_), we inject that value
+      *     and avoid the interpreter, as it's likely not valid scala code.
+      */
+    if (code == "") None
+    else if (!paste.running && code.trim.startsWith(PromptString)) {
+      paste.transcript(code)
+      None
+    }
+    else if (Completion.looksLikeInvocation(code) && intp.mostRecentVar != "") {
+      interpretStartingWith(intp.mostRecentVar + code)
+    }
+    else if (code.trim startsWith "//") {
+      // line comment, do nothing
+      None
+    }
+    else
+      reallyInterpret._2
+  }
+
+  // runs :load `file` on any files passed via -i
+  def loadFiles(settings: Settings) = settings match {
+    case settings: GenericRunnerSettings =>
+      for (filename <- settings.loadfiles.value) {
+        val cmd = ":load " + filename
+        command(cmd)
+        addReplay(cmd)
+        echo("")
+      }
+    case _ =>
+  }
+
+  /** Tries to create a JLineReader, falling back to SimpleReader:
+    *  unless settings or properties are such that it should start
+    *  with SimpleReader.
+    */
+  def chooseReader(settings: Settings): InteractiveReader = {
+    if (settings.Xnojline || Properties.isEmacsShell)
+      SimpleReader()
+    else try new JLineReader(
+      if (settings.noCompletion) NoCompletion
+      else new SparkJLineCompletion(intp)
+    )
+    catch {
+      case ex @ (_: Exception | _: NoClassDefFoundError) =>
+        echo("Failed to created JLineReader: " + ex + "\nFalling back to SimpleReader.")
+        SimpleReader()
+    }
+  }
+  protected def tagOfStaticClass[T: ClassTag]: u.TypeTag[T] =
+    u.TypeTag[T](
+      m,
+      new TypeCreator {
+        def apply[U <: Universe with Singleton](m: Mirror[U]): U # Type =
+          m.staticClass(classTag[T].runtimeClass.getName).toTypeConstructor.asInstanceOf[U # Type]
+      })
+
+  private def loopPostInit() {
+    // Bind intp somewhere out of the regular namespace where
+    // we can get at it in generated code.
+    intp.quietBind(NamedParam[SparkIMain]("$intp", intp)(tagOfStaticClass[SparkIMain], classTag[SparkIMain]))
+    // Auto-run code via some setting.
+    ( replProps.replAutorunCode.option
+      flatMap (f => io.File(f).safeSlurp())
+      foreach (intp quietRun _)
+      )
+    // classloader and power mode setup
+    intp.setContextClassLoader()
+    if (isReplPower) {
+     // replProps.power setValue true
+     // unleashAndSetPhase()
+     // asyncMessage(power.banner)
+    }
+    // SI-7418 Now, and only now, can we enable TAB completion.
+    in match {
+      case x: JLineReader => x.consoleReader.postInit
+      case _              =>
+    }
+  }
+  def process(settings: Settings): Boolean = savingContextLoader {
+    this.settings = settings
+    createInterpreter()
+
+    // sets in to some kind of reader depending on environmental cues
+    in = in0.fold(chooseReader(settings))(r => SimpleReader(r, out, interactive = true))
+    globalFuture = future {
+      intp.initializeSynchronous()
+      loopPostInit()
+      !intp.reporter.hasErrors
+    }
+    import scala.concurrent.duration._
+    Await.ready(globalFuture, 10 seconds)
+    printWelcome()
+    initializeSpark()
+    loadFiles(settings)
+
+    try loop()
+    catch AbstractOrMissingHandler()
+    finally closeInterpreter()
+
+    true
+  }
+
+  @deprecated("Use `process` instead", "2.9.0")
+  def main(settings: Settings): Unit = process(settings) //used by sbt
+}
+
+object SparkILoop {
+  implicit def loopToInterpreter(repl: SparkILoop): SparkIMain = repl.intp
+
+  // Designed primarily for use by test code: take a String with a
+  // bunch of code, and prints out a transcript of what it would look
+  // like if you'd just typed it into the repl.
+  def runForTranscript(code: String, settings: Settings): String = {
+    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
+
+    stringFromStream { ostream =>
+      Console.withOut(ostream) {
+        val output = new JPrintWriter(new OutputStreamWriter(ostream), true) {
+          override def write(str: String) = {
+            // completely skip continuation lines
+            if (str forall (ch => ch.isWhitespace || ch == '|')) ()
+            else super.write(str)
+          }
+        }
+        val input = new BufferedReader(new StringReader(code.trim + "\n")) {
+          override def readLine(): String = {
+            val s = super.readLine()
+            // helping out by printing the line being interpreted.
+            if (s != null)
+              output.println(s)
+            s
+          }
+        }
+        val repl = new SparkILoop(input, output)
+        if (settings.classpath.isDefault)
+          settings.classpath.value = sys.props("java.class.path")
+
+        repl process settings
+      }
+    }
+  }
+
+  /** Creates an interpreter loop with default settings and feeds
+    *  the given code to it as input.
+    */
+  def run(code: String, sets: Settings = new Settings): String = {
+    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
+
+    stringFromStream { ostream =>
+      Console.withOut(ostream) {
+        val input    = new BufferedReader(new StringReader(code))
+        val output   = new JPrintWriter(new OutputStreamWriter(ostream), true)
+        val repl     = new SparkILoop(input, output)
+
+        if (sets.classpath.isDefault)
+          sets.classpath.value = sys.props("java.class.path")
+
+        repl process sets
+      }
+    }
+  }
+  def run(lines: List[String]): String = run(lines map (_ + "\n") mkString)
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala
new file mode 100644
index 0000000000000..1bb62c84abddc
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala
@@ -0,0 +1,1319 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Martin Odersky
+ */
+
+package scala
+package tools.nsc
+package interpreter
+
+import PartialFunction.cond
+import scala.language.implicitConversions
+import scala.beans.BeanProperty
+import scala.collection.mutable
+import scala.concurrent.{ Future, ExecutionContext }
+import scala.reflect.runtime.{ universe => ru }
+import scala.reflect.{ ClassTag, classTag }
+import scala.reflect.internal.util.{ BatchSourceFile, SourceFile }
+import scala.tools.util.PathResolver
+import scala.tools.nsc.io.AbstractFile
+import scala.tools.nsc.typechecker.{ TypeStrings, StructuredTypeStrings }
+import scala.tools.nsc.util.{ ScalaClassLoader, stringFromReader, stringFromWriter, StackTraceOps }
+import scala.tools.nsc.util.Exceptional.unwrap
+import javax.script.{AbstractScriptEngine, Bindings, ScriptContext, ScriptEngine, ScriptEngineFactory, ScriptException, CompiledScript, Compilable}
+
+/** An interpreter for Scala code.
+  *
+  *  The main public entry points are compile(), interpret(), and bind().
+  *  The compile() method loads a complete Scala file.  The interpret() method
+  *  executes one line of Scala code at the request of the user.  The bind()
+  *  method binds an object to a variable that can then be used by later
+  *  interpreted code.
+  *
+  *  The overall approach is based on compiling the requested code and then
+  *  using a Java classloader and Java reflection to run the code
+  *  and access its results.
+  *
+  *  In more detail, a single compiler instance is used
+  *  to accumulate all successfully compiled or interpreted Scala code.  To
+  *  "interpret" a line of code, the compiler generates a fresh object that
+  *  includes the line of code and which has public member(s) to export
+  *  all variables defined by that code.  To extract the result of an
+  *  interpreted line to show the user, a second "result object" is created
+  *  which imports the variables exported by the above object and then
+  *  exports members called "$eval" and "$print". To accomodate user expressions
+  *  that read from variables or methods defined in previous statements, "import"
+  *  statements are used.
+  *
+  *  This interpreter shares the strengths and weaknesses of using the
+  *  full compiler-to-Java.  The main strength is that interpreted code
+  *  behaves exactly as does compiled code, including running at full speed.
+  *  The main weakness is that redefining classes and methods is not handled
+  *  properly, because rebinding at the Java level is technically difficult.
+  *
+  *  @author Moez A. Abdel-Gawad
+  *  @author Lex Spoon
+  */
+class SparkIMain(@BeanProperty val factory: ScriptEngineFactory, initialSettings: Settings,
+  protected val out: JPrintWriter) extends AbstractScriptEngine with Compilable with SparkImports {
+  imain =>
+
+  setBindings(createBindings, ScriptContext.ENGINE_SCOPE)
+  object replOutput extends ReplOutput(settings.Yreploutdir) { }
+
+  @deprecated("Use replOutput.dir instead", "2.11.0")
+  def virtualDirectory = replOutput.dir
+  // Used in a test case.
+  def showDirectory() = replOutput.show(out)
+
+  private[nsc] var printResults               = true      // whether to print result lines
+  private[nsc] var totalSilence               = false     // whether to print anything
+  private var _initializeComplete             = false     // compiler is initialized
+  private var _isInitialized: Future[Boolean] = null      // set up initialization future
+  private var bindExceptions                  = true      // whether to bind the lastException variable
+  private var _executionWrapper               = ""        // code to be wrapped around all lines
+
+  /** We're going to go to some trouble to initialize the compiler asynchronously.
+    *  It's critical that nothing call into it until it's been initialized or we will
+    *  run into unrecoverable issues, but the perceived repl startup time goes
+    *  through the roof if we wait for it.  So we initialize it with a future and
+    *  use a lazy val to ensure that any attempt to use the compiler object waits
+    *  on the future.
+    */
+  private var _classLoader: util.AbstractFileClassLoader = null                              // active classloader
+  private val _compiler: ReplGlobal                 = newCompiler(settings, reporter)   // our private compiler
+
+  def compilerClasspath: Seq[java.net.URL] = (
+    if (isInitializeComplete) global.classPath.asURLs
+    else new PathResolver(settings).result.asURLs  // the compiler's classpath
+    )
+  def settings = initialSettings
+  // Run the code body with the given boolean settings flipped to true.
+  def withoutWarnings[T](body: => T): T = beQuietDuring {
+    val saved = settings.nowarn.value
+    if (!saved)
+      settings.nowarn.value = true
+
+    try body
+    finally if (!saved) settings.nowarn.value = false
+  }
+
+  /** construct an interpreter that reports to Console */
+  def this(settings: Settings, out: JPrintWriter) = this(null, settings, out)
+  def this(factory: ScriptEngineFactory, settings: Settings) = this(factory, settings, new NewLinePrintWriter(new ConsoleWriter, true))
+  def this(settings: Settings) = this(settings, new NewLinePrintWriter(new ConsoleWriter, true))
+  def this(factory: ScriptEngineFactory) = this(factory, new Settings())
+  def this() = this(new Settings())
+
+  lazy val formatting: Formatting = new Formatting {
+    val prompt = Properties.shellPromptString
+  }
+  lazy val reporter: SparkReplReporter = new SparkReplReporter(this)
+
+  import formatting._
+  import reporter.{ printMessage, printUntruncatedMessage }
+
+  // This exists mostly because using the reporter too early leads to deadlock.
+  private def echo(msg: String) { Console println msg }
+  private def _initSources = List(new BatchSourceFile("<init>", "class $repl_$init { }"))
+  private def _initialize() = {
+    try {
+      // if this crashes, REPL will hang its head in shame
+      val run = new _compiler.Run()
+      assert(run.typerPhase != NoPhase, "REPL requires a typer phase.")
+      run compileSources _initSources
+      _initializeComplete = true
+      true
+    }
+    catch AbstractOrMissingHandler()
+  }
+  private def tquoted(s: String) = "\"\"\"" + s + "\"\"\""
+  private val logScope = scala.sys.props contains "scala.repl.scope"
+  private def scopelog(msg: String) = if (logScope) Console.err.println(msg)
+
+  // argument is a thunk to execute after init is done
+  def initialize(postInitSignal: => Unit) {
+    synchronized {
+      if (_isInitialized == null) {
+        _isInitialized =
+          Future(try _initialize() finally postInitSignal)(ExecutionContext.global)
+      }
+    }
+  }
+  def initializeSynchronous(): Unit = {
+    if (!isInitializeComplete) {
+      _initialize()
+      assert(global != null, global)
+    }
+  }
+  def isInitializeComplete = _initializeComplete
+
+  lazy val global: Global = {
+    if (!isInitializeComplete) _initialize()
+    _compiler
+  }
+
+  import global._
+  import definitions.{ ObjectClass, termMember, dropNullaryMethod}
+
+  lazy val runtimeMirror = ru.runtimeMirror(classLoader)
+
+  private def noFatal(body: => Symbol): Symbol = try body catch { case _: FatalError => NoSymbol }
+
+  def getClassIfDefined(path: String)  = (
+    noFatal(runtimeMirror staticClass path)
+      orElse noFatal(rootMirror staticClass path)
+    )
+  def getModuleIfDefined(path: String) = (
+    noFatal(runtimeMirror staticModule path)
+      orElse noFatal(rootMirror staticModule path)
+    )
+
+  implicit class ReplTypeOps(tp: Type) {
+    def andAlso(fn: Type => Type): Type = if (tp eq NoType) tp else fn(tp)
+  }
+
+  // TODO: If we try to make naming a lazy val, we run into big time
+  // scalac unhappiness with what look like cycles.  It has not been easy to
+  // reduce, but name resolution clearly takes different paths.
+  object naming extends {
+    val global: imain.global.type = imain.global
+  } with Naming {
+    // make sure we don't overwrite their unwisely named res3 etc.
+    def freshUserTermName(): TermName = {
+      val name = newTermName(freshUserVarName())
+      if (replScope containsName name) freshUserTermName()
+      else name
+    }
+    def isInternalTermName(name: Name) = isInternalVarName("" + name)
+  }
+  import naming._
+
+  object deconstruct extends {
+    val global: imain.global.type = imain.global
+  } with StructuredTypeStrings
+
+  lazy val memberHandlers = new {
+    val intp: imain.type = imain
+  } with SparkMemberHandlers
+  import memberHandlers._
+
+  /** Temporarily be quiet */
+  def beQuietDuring[T](body: => T): T = {
+    val saved = printResults
+    printResults = false
+    try body
+    finally printResults = saved
+  }
+  def beSilentDuring[T](operation: => T): T = {
+    val saved = totalSilence
+    totalSilence = true
+    try operation
+    finally totalSilence = saved
+  }
+
+  def quietRun[T](code: String) = beQuietDuring(interpret(code))
+
+  /** takes AnyRef because it may be binding a Throwable or an Exceptional */
+  private def withLastExceptionLock[T](body: => T, alt: => T): T = {
+    assert(bindExceptions, "withLastExceptionLock called incorrectly.")
+    bindExceptions = false
+
+    try     beQuietDuring(body)
+    catch   logAndDiscard("withLastExceptionLock", alt)
+    finally bindExceptions = true
+  }
+
+  def executionWrapper = _executionWrapper
+  def setExecutionWrapper(code: String) = _executionWrapper = code
+  def clearExecutionWrapper() = _executionWrapper = ""
+
+  /** interpreter settings */
+  lazy val isettings = new SparkISettings(this)
+
+  /** Instantiate a compiler.  Overridable. */
+  protected def newCompiler(settings: Settings, reporter: reporters.Reporter): ReplGlobal = {
+    settings.outputDirs setSingleOutput replOutput.dir
+    settings.exposeEmptyPackage.value = true
+    new Global(settings, reporter) with ReplGlobal { override def toString: String = "<global>" }
+  }
+
+  /** Parent classloader.  Overridable. */
+  protected def parentClassLoader: ClassLoader =
+    settings.explicitParentLoader.getOrElse( this.getClass.getClassLoader() )
+
+  /* A single class loader is used for all commands interpreted by this Interpreter.
+     It would also be possible to create a new class loader for each command
+     to interpret.  The advantages of the current approach are:
+
+       - Expressions are only evaluated one time.  This is especially
+         significant for I/O, e.g. "val x = Console.readLine"
+
+     The main disadvantage is:
+
+       - Objects, classes, and methods cannot be rebound.  Instead, definitions
+         shadow the old ones, and old code objects refer to the old
+         definitions.
+  */
+  def resetClassLoader() = {
+    repldbg("Setting new classloader: was " + _classLoader)
+    _classLoader = null
+    ensureClassLoader()
+  }
+  final def ensureClassLoader() {
+    if (_classLoader == null)
+      _classLoader = makeClassLoader()
+  }
+  def classLoader: util.AbstractFileClassLoader = {
+    ensureClassLoader()
+    _classLoader
+  }
+
+  def backticked(s: String): String = (
+    (s split '.').toList map {
+      case "_"                               => "_"
+      case s if nme.keywords(newTermName(s)) => s"`$s`"
+      case s                                 => s
+    } mkString "."
+    )
+  def readRootPath(readPath: String) = getModuleIfDefined(readPath)
+
+  abstract class PhaseDependentOps {
+    def shift[T](op: => T): T
+
+    def path(name: => Name): String = shift(path(symbolOfName(name)))
+    def path(sym: Symbol): String = backticked(shift(sym.fullName))
+    def sig(sym: Symbol): String  = shift(sym.defString)
+  }
+  object typerOp extends PhaseDependentOps {
+    def shift[T](op: => T): T = exitingTyper(op)
+  }
+  object flatOp extends PhaseDependentOps {
+    def shift[T](op: => T): T = exitingFlatten(op)
+  }
+
+  def originalPath(name: String): String = originalPath(name: TermName)
+  def originalPath(name: Name): String   = typerOp path name
+  def originalPath(sym: Symbol): String  = typerOp path sym
+  def flatPath(sym: Symbol): String      = flatOp shift sym.javaClassName
+  def translatePath(path: String) = {
+    val sym = if (path endsWith "$") symbolOfTerm(path.init) else symbolOfIdent(path)
+    sym.toOption map flatPath
+  }
+  def translateEnclosingClass(n: String) = symbolOfTerm(n).enclClass.toOption map flatPath
+
+  private class TranslatingClassLoader(parent: ClassLoader) extends util.AbstractFileClassLoader(replOutput.dir, parent) {
+    /** Overridden here to try translating a simple name to the generated
+      *  class name if the original attempt fails.  This method is used by
+      *  getResourceAsStream as well as findClass.
+      */
+    override protected def findAbstractFile(name: String): AbstractFile =
+      super.findAbstractFile(name) match {
+        case null if _initializeComplete => translatePath(name) map (super.findAbstractFile(_)) orNull
+        case file => file
+      }
+  }
+  private def makeClassLoader(): util.AbstractFileClassLoader =
+    new TranslatingClassLoader(parentClassLoader match {
+      case null   => ScalaClassLoader fromURLs compilerClasspath
+      case p      => new ScalaClassLoader.URLClassLoader(compilerClasspath, p)
+    })
+
+  // Set the current Java "context" class loader to this interpreter's class loader
+  def setContextClassLoader() = classLoader.setAsContext()
+
+  def allDefinedNames: List[Name]  = exitingTyper(replScope.toList.map(_.name).sorted)
+  def unqualifiedIds: List[String] = allDefinedNames map (_.decode) sorted
+
+  /** Most recent tree handled which wasn't wholly synthetic. */
+  private def mostRecentlyHandledTree: Option[Tree] = {
+    prevRequests.reverse foreach { req =>
+      req.handlers.reverse foreach {
+        case x: MemberDefHandler if x.definesValue && !isInternalTermName(x.name) => return Some(x.member)
+        case _ => ()
+      }
+    }
+    None
+  }
+
+  private def updateReplScope(sym: Symbol, isDefined: Boolean) {
+    def log(what: String) {
+      val mark = if (sym.isType) "t " else "v "
+      val name = exitingTyper(sym.nameString)
+      val info = cleanTypeAfterTyper(sym)
+      val defn = sym defStringSeenAs info
+
+      scopelog(f"[$mark$what%6s] $name%-25s $defn%s")
+    }
+    if (ObjectClass isSubClass sym.owner) return
+    // unlink previous
+    replScope lookupAll sym.name foreach { sym =>
+      log("unlink")
+      replScope unlink sym
+    }
+    val what = if (isDefined) "define" else "import"
+    log(what)
+    replScope enter sym
+  }
+
+  def recordRequest(req: Request) {
+    if (req == null)
+      return
+
+    prevRequests += req
+
+    // warning about serially defining companions.  It'd be easy
+    // enough to just redefine them together but that may not always
+    // be what people want so I'm waiting until I can do it better.
+    exitingTyper {
+      req.defines filterNot (s => req.defines contains s.companionSymbol) foreach { newSym =>
+        val oldSym = replScope lookup newSym.name.companionName
+        if (Seq(oldSym, newSym).permutations exists { case Seq(s1, s2) => s1.isClass && s2.isModule }) {
+          replwarn(s"warning: previously defined $oldSym is not a companion to $newSym.")
+          replwarn("Companions must be defined together; you may wish to use :paste mode for this.")
+        }
+      }
+    }
+    exitingTyper {
+      req.imports foreach (sym => updateReplScope(sym, isDefined = false))
+      req.defines foreach (sym => updateReplScope(sym, isDefined = true))
+    }
+  }
+
+  private[nsc] def replwarn(msg: => String) {
+    if (!settings.nowarnings)
+      printMessage(msg)
+  }
+
+  def compileSourcesKeepingRun(sources: SourceFile*) = {
+    val run = new Run()
+    assert(run.typerPhase != NoPhase, "REPL requires a typer phase.")
+    reporter.reset()
+    run compileSources sources.toList
+    (!reporter.hasErrors, run)
+  }
+
+  /** Compile an nsc SourceFile.  Returns true if there are
+    *  no compilation errors, or false otherwise.
+    */
+  def compileSources(sources: SourceFile*): Boolean =
+    compileSourcesKeepingRun(sources: _*)._1
+
+  /** Compile a string.  Returns true if there are no
+    *  compilation errors, or false otherwise.
+    */
+  def compileString(code: String): Boolean =
+    compileSources(new BatchSourceFile("<script>", code))
+
+  /** Build a request from the user. `trees` is `line` after being parsed.
+    */
+  private def buildRequest(line: String, trees: List[Tree]): Request = {
+    executingRequest = new Request(line, trees)
+    executingRequest
+  }
+
+  private def safePos(t: Tree, alt: Int): Int =
+    try t.pos.start
+    catch { case _: UnsupportedOperationException => alt }
+
+  // Given an expression like 10 * 10 * 10 we receive the parent tree positioned
+  // at a '*'.  So look at each subtree and find the earliest of all positions.
+  private def earliestPosition(tree: Tree): Int = {
+    var pos = Int.MaxValue
+    tree foreach { t =>
+      pos = math.min(pos, safePos(t, Int.MaxValue))
+    }
+    pos
+  }
+
+  private def requestFromLine(line: String, synthetic: Boolean): Either[IR.Result, Request] = {
+    val content = indentCode(line)
+    val trees = parse(content) match {
+      case parse.Incomplete     => return Left(IR.Incomplete)
+      case parse.Error          => return Left(IR.Error)
+      case parse.Success(trees) => trees
+    }
+    repltrace(
+      trees map (t => {
+        // [Eugene to Paul] previously it just said `t map ...`
+        // because there was an implicit conversion from Tree to a list of Trees
+        // however Martin and I have removed the conversion
+        // (it was conflicting with the new reflection API),
+        // so I had to rewrite this a bit
+        val subs = t collect { case sub => sub }
+        subs map (t0 =>
+          "  " + safePos(t0, -1) + ": " + t0.shortClass + "\n"
+          ) mkString ""
+      }) mkString "\n"
+    )
+    // If the last tree is a bare expression, pinpoint where it begins using the
+    // AST node position and snap the line off there.  Rewrite the code embodied
+    // by the last tree as a ValDef instead, so we can access the value.
+    val last = trees.lastOption.getOrElse(EmptyTree)
+    last match {
+      case _:Assign                        => // we don't want to include assignments
+      case _:TermTree | _:Ident | _:Select => // ... but do want other unnamed terms.
+        val varName  = if (synthetic) freshInternalVarName() else freshUserVarName()
+        val rewrittenLine = (
+          // In theory this would come out the same without the 1-specific test, but
+          // it's a cushion against any more sneaky parse-tree position vs. code mismatches:
+          // this way such issues will only arise on multiple-statement repl input lines,
+          // which most people don't use.
+          if (trees.size == 1) "val " + varName + " =\n" + content
+          else {
+            // The position of the last tree
+            val lastpos0 = earliestPosition(last)
+            // Oh boy, the parser throws away parens so "(2+2)" is mispositioned,
+            // with increasingly hard to decipher positions as we move on to "() => 5",
+            // (x: Int) => x + 1, and more.  So I abandon attempts to finesse and just
+            // look for semicolons and newlines, which I'm sure is also buggy.
+            val (raw1, raw2) = content splitAt lastpos0
+            repldbg("[raw] " + raw1 + "   <--->   " + raw2)
+
+            val adjustment = (raw1.reverse takeWhile (ch => (ch != ';') && (ch != '\n'))).size
+            val lastpos = lastpos0 - adjustment
+
+            // the source code split at the laboriously determined position.
+            val (l1, l2) = content splitAt lastpos
+            repldbg("[adj] " + l1 + "   <--->   " + l2)
+
+            val prefix   = if (l1.trim == "") "" else l1 + ";\n"
+            // Note to self: val source needs to have this precise structure so that
+            // error messages print the user-submitted part without the "val res0 = " part.
+            val combined   = prefix + "val " + varName + " =\n" + l2
+
+            repldbg(List(
+              "    line" -> line,
+              " content" -> content,
+              "     was" -> l2,
+              "combined" -> combined) map {
+              case (label, s) => label + ": '" + s + "'"
+            } mkString "\n"
+            )
+            combined
+          }
+          )
+        // Rewriting    "foo ; bar ; 123"
+        // to           "foo ; bar ; val resXX = 123"
+        requestFromLine(rewrittenLine, synthetic) match {
+          case Right(req) => return Right(req withOriginalLine line)
+          case x          => return x
+        }
+      case _ =>
+    }
+    Right(buildRequest(line, trees))
+  }
+
+  // dealias non-public types so we don't see protected aliases like Self
+  def dealiasNonPublic(tp: Type) = tp match {
+    case TypeRef(_, sym, _) if sym.isAliasType && !sym.isPublic => tp.dealias
+    case _                                                      => tp
+  }
+
+  /**
+   *  Interpret one line of input. All feedback, including parse errors
+   *  and evaluation results, are printed via the supplied compiler's
+   *  reporter. Values defined are available for future interpreted strings.
+   *
+   *  The return value is whether the line was interpreter successfully,
+   *  e.g. that there were no parse errors.
+   */
+  def interpret(line: String): IR.Result = interpret(line, synthetic = false)
+  def interpretSynthetic(line: String): IR.Result = interpret(line, synthetic = true)
+  def interpret(line: String, synthetic: Boolean): IR.Result = compile(line, synthetic) match {
+    case Left(result) => result
+    case Right(req)   => new WrappedRequest(req).loadAndRunReq
+  }
+
+  private def compile(line: String, synthetic: Boolean): Either[IR.Result, Request] = {
+    if (global == null) Left(IR.Error)
+    else requestFromLine(line, synthetic) match {
+      case Left(result) => Left(result)
+      case Right(req)   =>
+        // null indicates a disallowed statement type; otherwise compile and
+        // fail if false (implying e.g. a type error)
+        if (req == null || !req.compile) Left(IR.Error) else Right(req)
+    }
+  }
+
+  var code = ""
+  var bound = false
+  def compiled(script: String): CompiledScript = {
+    if (!bound) {
+      quietBind("engine" -> this.asInstanceOf[ScriptEngine])
+      bound = true
+    }
+    val cat = code + script
+    compile(cat, false) match {
+      case Left(result) => result match {
+        case IR.Incomplete => {
+          code = cat + "\n"
+          new CompiledScript {
+            def eval(context: ScriptContext): Object = null
+            def getEngine: ScriptEngine = SparkIMain.this
+          }
+        }
+        case _ => {
+          code = ""
+          throw new ScriptException("compile-time error")
+        }
+      }
+      case Right(req)   => {
+        code = ""
+        new WrappedRequest(req)
+      }
+    }
+  }
+
+  private class WrappedRequest(val req: Request) extends CompiledScript {
+    var recorded = false
+
+    /** In Java we would have to wrap any checked exception in the declared
+      *  ScriptException. Runtime exceptions and errors would be ok and would
+      *  not need to be caught. So let us do the same in Scala : catch and
+      *  wrap any checked exception, and let runtime exceptions and errors
+      *  escape. We could have wrapped runtime exceptions just like other
+      *  exceptions in ScriptException, this is a choice.
+      */
+    @throws[ScriptException]
+    def eval(context: ScriptContext): Object = {
+      val result = req.lineRep.evalEither match {
+        case Left(e: RuntimeException) => throw e
+        case Left(e: Exception) => throw new ScriptException(e)
+        case Left(e) => throw e
+        case Right(result) => result.asInstanceOf[Object]
+      }
+      if (!recorded) {
+        recordRequest(req)
+        recorded = true
+      }
+      result
+    }
+
+    def loadAndRunReq = classLoader.asContext {
+      val (result, succeeded) = req.loadAndRun
+
+      /** To our displeasure, ConsoleReporter offers only printMessage,
+        *  which tacks a newline on the end.  Since that breaks all the
+        *  output checking, we have to take one off to balance.
+        */
+      if (succeeded) {
+        if (printResults && result != "")
+          printMessage(result stripSuffix "\n")
+        else if (isReplDebug) // show quiet-mode activity
+          printMessage(result.trim.lines map ("[quiet] " + _) mkString "\n")
+
+        // Book-keeping.  Have to record synthetic requests too,
+        // as they may have been issued for information, e.g. :type
+        recordRequest(req)
+        IR.Success
+      }
+      else {
+        // don't truncate stack traces
+        printUntruncatedMessage(result)
+        IR.Error
+      }
+    }
+
+    def getEngine: ScriptEngine = SparkIMain.this
+  }
+
+  /** Bind a specified name to a specified value.  The name may
+    *  later be used by expressions passed to interpret.
+    *
+    *  @param name      the variable name to bind
+    *  @param boundType the type of the variable, as a string
+    *  @param value     the object value to bind to it
+    *  @return          an indication of whether the binding succeeded
+    */
+  def bind(name: String, boundType: String, value: Any, modifiers: List[String] = Nil): IR.Result = {
+    val bindRep = new ReadEvalPrint()
+    bindRep.compile("""
+                      |object %s {
+                      |  var value: %s = _
+                      |  def set(x: Any) = value = x.asInstanceOf[%s]
+                      |}
+                    """.stripMargin.format(bindRep.evalName, boundType, boundType)
+    )
+    bindRep.callEither("set", value) match {
+      case Left(ex) =>
+        repldbg("Set failed in bind(%s, %s, %s)".format(name, boundType, value))
+        repldbg(util.stackTraceString(ex))
+        IR.Error
+
+      case Right(_) =>
+        val line = "%sval %s = %s.value".format(modifiers map (_ + " ") mkString, name, bindRep.evalPath)
+        repldbg("Interpreting: " + line)
+        interpret(line)
+    }
+  }
+  def directBind(name: String, boundType: String, value: Any): IR.Result = {
+    val result = bind(name, boundType, value)
+    if (result == IR.Success)
+      directlyBoundNames += newTermName(name)
+    result
+  }
+  def directBind(p: NamedParam): IR.Result                                    = directBind(p.name, p.tpe, p.value)
+  def directBind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = directBind((name, value))
+
+  def rebind(p: NamedParam): IR.Result = {
+    val name     = p.name
+    val newType  = p.tpe
+    val tempName = freshInternalVarName()
+
+    quietRun("val %s = %s".format(tempName, name))
+    quietRun("val %s = %s.asInstanceOf[%s]".format(name, tempName, newType))
+  }
+  def quietBind(p: NamedParam): IR.Result                               = beQuietDuring(bind(p))
+  def bind(p: NamedParam): IR.Result                                    = bind(p.name, p.tpe, p.value)
+  def bind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = bind((name, value))
+
+  /** Reset this interpreter, forgetting all user-specified requests. */
+  def reset() {
+    clearExecutionWrapper()
+    resetClassLoader()
+    resetAllCreators()
+    prevRequests.clear()
+    resetReplScope()
+    replOutput.dir.clear()
+  }
+
+  /** This instance is no longer needed, so release any resources
+    *  it is using.  The reporter's output gets flushed.
+    */
+  def close() {
+    reporter.flush()
+  }
+
+  /** Here is where we:
+    *
+    *  1) Read some source code, and put it in the "read" object.
+    *  2) Evaluate the read object, and put the result in the "eval" object.
+    *  3) Create a String for human consumption, and put it in the "print" object.
+    *
+    *  Read! Eval! Print! Some of that not yet centralized here.
+    */
+  class ReadEvalPrint(val lineId: Int) {
+    def this() = this(freshLineId())
+
+    val packageName = sessionNames.line + lineId
+    val readName    = sessionNames.read
+    val evalName    = sessionNames.eval
+    val printName   = sessionNames.print
+    val resultName  = sessionNames.result
+
+    def bindError(t: Throwable) = {
+      if (!bindExceptions) // avoid looping if already binding
+        throw t
+
+      val unwrapped = unwrap(t)
+
+      // Example input: $line3.$read$$iw$$iw$
+      val classNameRegex = (naming.lineRegex + ".*").r
+      def isWrapperInit(x: StackTraceElement) = cond(x.getClassName) {
+        case classNameRegex() if x.getMethodName == nme.CONSTRUCTOR.decoded => true
+      }
+      val stackTrace = unwrapped stackTracePrefixString (!isWrapperInit(_))
+
+      withLastExceptionLock[String]({
+        directBind[Throwable]("lastException", unwrapped)(StdReplTags.tagOfThrowable, classTag[Throwable])
+        stackTrace
+      }, stackTrace)
+    }
+
+    // TODO: split it out into a package object and a regular
+    // object and we can do that much less wrapping.
+    def packageDecl = "package " + packageName
+
+    def pathTo(name: String)   = packageName + "." + name
+    def packaged(code: String) = packageDecl + "\n\n" + code
+
+    def readPath  = pathTo(readName)
+    def evalPath  = pathTo(evalName)
+
+    def call(name: String, args: Any*): AnyRef = {
+      val m = evalMethod(name)
+      repldbg("Invoking: " + m)
+      if (args.nonEmpty)
+        repldbg("  with args: " + args.mkString(", "))
+
+      m.invoke(evalClass, args.map(_.asInstanceOf[AnyRef]): _*)
+    }
+
+    def callEither(name: String, args: Any*): Either[Throwable, AnyRef] =
+      try Right(call(name, args: _*))
+      catch { case ex: Throwable => Left(ex) }
+
+    class EvalException(msg: String, cause: Throwable) extends RuntimeException(msg, cause) { }
+
+    private def evalError(path: String, ex: Throwable) =
+      throw new EvalException("Failed to load '" + path + "': " + ex.getMessage, ex)
+
+    private def load(path: String): Class[_] = {
+      try Class.forName(path, true, classLoader)
+      catch { case ex: Throwable => evalError(path, unwrap(ex)) }
+    }
+
+    lazy val evalClass = load(evalPath)
+
+    def evalEither = callEither(resultName) match {
+      case Left(ex) => ex match {
+        case ex: NullPointerException => Right(null)
+        case ex => Left(unwrap(ex))
+      }
+      case Right(result) => Right(result)
+    }
+
+    def compile(source: String): Boolean = compileAndSaveRun("<console>", source)
+
+    /** The innermost object inside the wrapper, found by
+      * following accessPath into the outer one.
+      */
+    def resolvePathToSymbol(accessPath: String): Symbol = {
+      val readRoot: global.Symbol = readRootPath(readPath) // the outermost wrapper
+      ((".INSTANCE" + accessPath) split '.').foldLeft(readRoot: Symbol) {
+        case (sym, "")    => sym
+        case (sym, name)  => exitingTyper(termMember(sym, name))
+      }
+    }
+    /** We get a bunch of repeated warnings for reasons I haven't
+      *  entirely figured out yet.  For now, squash.
+      */
+    private def updateRecentWarnings(run: Run) {
+      def loop(xs: List[(Position, String)]): List[(Position, String)] = xs match {
+        case Nil                  => Nil
+        case ((pos, msg)) :: rest =>
+          val filtered = rest filter { case (pos0, msg0) =>
+            (msg != msg0) || (pos.lineContent.trim != pos0.lineContent.trim) || {
+              // same messages and same line content after whitespace removal
+              // but we want to let through multiple warnings on the same line
+              // from the same run.  The untrimmed line will be the same since
+              // there's no whitespace indenting blowing it.
+              (pos.lineContent == pos0.lineContent)
+            }
+          }
+          ((pos, msg)) :: loop(filtered)
+      }
+      val warnings = loop(run.reporting.allConditionalWarnings)
+      if (warnings.nonEmpty)
+        mostRecentWarnings = warnings
+    }
+    private def evalMethod(name: String) = evalClass.getMethods filter (_.getName == name) match {
+      case Array()       => null
+      case Array(method) => method
+      case xs            => sys.error("Internal error: eval object " + evalClass + ", " + xs.mkString("\n", "\n", ""))
+    }
+    private def compileAndSaveRun(label: String, code: String) = {
+      showCodeIfDebugging(code)
+      val (success, run) = compileSourcesKeepingRun(new BatchSourceFile(label, packaged(code)))
+      updateRecentWarnings(run)
+      success
+    }
+  }
+
+  /** One line of code submitted by the user for interpretation */
+  class Request(val line: String, val trees: List[Tree]) {
+    def defines    = defHandlers flatMap (_.definedSymbols)
+    def imports    = importedSymbols
+    def value      = Some(handlers.last) filter (h => h.definesValue) map (h => definedSymbols(h.definesTerm.get)) getOrElse NoSymbol
+
+    val lineRep = new ReadEvalPrint()
+
+    private var _originalLine: String = null
+    def withOriginalLine(s: String): this.type = { _originalLine = s ; this }
+    def originalLine = if (_originalLine == null) line else _originalLine
+
+    /** handlers for each tree in this request */
+    val handlers: List[MemberHandler] = trees map (memberHandlers chooseHandler _)
+    def defHandlers = handlers collect { case x: MemberDefHandler => x }
+
+    /** list of names used by this expression */
+    val referencedNames: List[Name] = handlers flatMap (_.referencedNames)
+
+    /** def and val names */
+    def termNames = handlers flatMap (_.definesTerm)
+    def typeNames = handlers flatMap (_.definesType)
+    def importedSymbols = handlers flatMap {
+      case x: ImportHandler => x.importedSymbols
+      case _                => Nil
+    }
+
+    val definedClasses = handlers.exists {
+      case _: ClassHandler => true
+      case _ => false
+    }
+    /** Code to import bound names from previous lines - accessPath is code to
+      * append to objectName to access anything bound by request.
+      */
+    lazy val ComputedImports(importsPreamble, importsTrailer, accessPath) =
+      exitingTyper(importsCode(referencedNames.toSet, ObjectSourceCode, definedClasses))
+
+    /** the line of code to compute */
+    def toCompute = line
+
+    /** The path of the value that contains the user code. */
+    def fullAccessPath = s"${lineRep.readPath}.INSTANCE$accessPath"
+
+    /** The path of the given member of the wrapping instance. */
+    def fullPath(vname: String) = s"$fullAccessPath.`$vname`"
+
+    /** generate the source code for the object that computes this request */
+    abstract class Wrapper extends SparkIMain.CodeAssembler[MemberHandler] {
+      def path = originalPath("$intp")
+      def envLines = {
+        if (!isReplPower) Nil // power mode only for now
+        else List("def %s = %s".format("$line", tquoted(originalLine)), "def %s = Nil".format("$trees"))
+      }
+      def preamble = s"""
+        |$preambleHeader
+        |%s%s%s
+      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString,
+          importsPreamble, indentCode(toCompute))
+
+      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
+
+      /** A format string with %s for $read, specifying the wrapper definition. */
+      def preambleHeader: String
+
+      /** Like preambleHeader for an import wrapper. */
+      def prewrap: String = preambleHeader + "\n"
+
+      /** Like postamble for an import wrapper. */
+      def postwrap: String
+    }
+
+    private class ObjectBasedWrapper extends Wrapper {
+      def preambleHeader = "object %s {"
+
+      def postamble = importsTrailer + "\n}"
+
+      def postwrap = "}\n"
+    }
+
+    private class ClassBasedWrapper extends Wrapper {
+      def preambleHeader = "class %s extends Serializable {"
+
+      /** Adds an object that instantiates the outer wrapping class. */
+      def postamble  = s"""
+                          |$importsTrailer
+                          |}
+                          |object ${lineRep.readName} {
+                          |   val INSTANCE = new ${lineRep.readName}();
+                          |}
+                          |""".stripMargin
+
+      import nme.{ INTERPRETER_IMPORT_WRAPPER => iw }
+
+      /** Adds a val that instantiates the wrapping class. */
+      def postwrap = s"}\nval $iw = new $iw\n"
+    }
+
+    private lazy val ObjectSourceCode: Wrapper = new ClassBasedWrapper
+    private object ResultObjectSourceCode extends SparkIMain.CodeAssembler[MemberHandler] {
+      /** We only want to generate this code when the result
+        *  is a value which can be referred to as-is.
+        */
+      val evalResult = Request.this.value match {
+        case NoSymbol => ""
+        case sym      =>
+          "lazy val %s = %s".format(lineRep.resultName,  fullPath(sym.decodedName))
+      }
+      // first line evaluates object to make sure constructor is run
+      // initial "" so later code can uniformly be: + etc
+      val preamble = """
+                       |object %s {
+                       |  %s
+                       |  lazy val %s: String = %s {
+                       |    %s
+                       |    (""
+                     """.stripMargin.format(
+          lineRep.evalName, evalResult, lineRep.printName,
+          executionWrapper, fullAccessPath
+        )
+
+      val postamble = """
+                        |    )
+                        |  }
+                        |}
+                      """.stripMargin
+      val generate = (m: MemberHandler) => m resultExtractionCode Request.this
+    }
+
+    /** Compile the object file.  Returns whether the compilation succeeded.
+      *  If all goes well, the "types" map is computed. */
+    lazy val compile: Boolean = {
+      // error counting is wrong, hence interpreter may overlook failure - so we reset
+      reporter.reset()
+
+      // compile the object containing the user's code
+      lineRep.compile(ObjectSourceCode(handlers)) && {
+        // extract and remember types
+        typeOf
+        typesOfDefinedTerms
+
+        // Assign symbols to the original trees
+        // TODO - just use the new trees.
+        defHandlers foreach { dh =>
+          val name = dh.member.name
+          definedSymbols get name foreach { sym =>
+            dh.member setSymbol sym
+            repldbg("Set symbol of " + name + " to " + symbolDefString(sym))
+          }
+        }
+
+        // compile the result-extraction object
+        val handls = if (printResults) handlers else Nil
+        withoutWarnings(lineRep compile ResultObjectSourceCode(handls))
+      }
+    }
+
+    lazy val resultSymbol =  lineRep.resolvePathToSymbol(accessPath)
+
+    def applyToResultMember[T](name: Name, f: Symbol => T) = exitingTyper(f(resultSymbol.info.nonPrivateDecl(name)))
+
+    /* typeOf lookup with encoding */
+    def lookupTypeOf(name: Name) = typeOf.getOrElse(name, typeOf(global.encode(name.toString)))
+
+    private def typeMap[T](f: Type => T) =
+      mapFrom[Name, Name, T](termNames ++ typeNames)(x => f(cleanMemberDecl(resultSymbol, x)))
+
+    /** Types of variables defined by this request. */
+    lazy val compilerTypeOf = typeMap[Type](x => x) withDefaultValue NoType
+    /** String representations of same. */
+    lazy val typeOf         = typeMap[String](tp => exitingTyper(tp.toString))
+
+    lazy val definedSymbols = (
+      termNames.map(x => x -> applyToResultMember(x, x => x)) ++
+        typeNames.map(x => x -> compilerTypeOf(x).typeSymbolDirect)
+      ).toMap[Name, Symbol] withDefaultValue NoSymbol
+
+    lazy val typesOfDefinedTerms = mapFrom[Name, Name, Type](termNames)(x => applyToResultMember(x, _.tpe))
+
+    /** load and run the code using reflection */
+    def loadAndRun: (String, Boolean) = {
+      try   { ("" + (lineRep call sessionNames.print), true) }
+      catch { case ex: Throwable => (lineRep.bindError(ex), false) }
+    }
+
+    override def toString = "Request(line=%s, %s trees)".format(line, trees.size)
+  }
+
+  def createBindings: Bindings = new IBindings {
+    override def put(name: String, value: Object): Object = {
+      val n = name.indexOf(":")
+      val p: NamedParam = if (n < 0) (name, value) else {
+        val nme = name.substring(0, n).trim
+        val tpe = name.substring(n + 1).trim
+        NamedParamClass(nme, tpe, value)
+      }
+      if (!p.name.startsWith("javax.script")) bind(p)
+      null
+    }
+  }
+
+  @throws[ScriptException]
+  def compile(script: String): CompiledScript = eval("new javax.script.CompiledScript { def eval(context: javax.script.ScriptContext): Object = { " + script + " }.asInstanceOf[Object]; def getEngine: javax.script.ScriptEngine = engine }").asInstanceOf[CompiledScript]
+
+  @throws[ScriptException]
+  def compile(reader: java.io.Reader): CompiledScript = compile(stringFromReader(reader))
+
+  @throws[ScriptException]
+  def eval(script: String, context: ScriptContext): Object = compiled(script).eval(context)
+
+  @throws[ScriptException]
+  def eval(reader: java.io.Reader, context: ScriptContext): Object = eval(stringFromReader(reader), context)
+
+  override def finalize = close
+
+  /** Returns the name of the most recent interpreter result.
+    *  Mostly this exists so you can conveniently invoke methods on
+    *  the previous result.
+    */
+  def mostRecentVar: String =
+    if (mostRecentlyHandledTree.isEmpty) ""
+    else "" + (mostRecentlyHandledTree.get match {
+      case x: ValOrDefDef           => x.name
+      case Assign(Ident(name), _)   => name
+      case ModuleDef(_, name, _)    => name
+      case _                        => naming.mostRecentVar
+    })
+
+  private var mostRecentWarnings: List[(global.Position, String)] = Nil
+  def lastWarnings = mostRecentWarnings
+
+  private lazy val importToGlobal  = global mkImporter ru
+  private lazy val importToRuntime = ru.internal createImporter global
+  private lazy val javaMirror = ru.rootMirror match {
+    case x: ru.JavaMirror => x
+    case _                => null
+  }
+  private implicit def importFromRu(sym: ru.Symbol): Symbol = importToGlobal importSymbol sym
+  private implicit def importToRu(sym: Symbol): ru.Symbol   = importToRuntime importSymbol sym
+
+  def classOfTerm(id: String): Option[JClass] = symbolOfTerm(id) match {
+    case NoSymbol => None
+    case sym      => Some(javaMirror runtimeClass importToRu(sym).asClass)
+  }
+
+  def typeOfTerm(id: String): Type = symbolOfTerm(id).tpe
+
+  def valueOfTerm(id: String): Option[Any] = exitingTyper {
+    def value() = {
+      val sym0    = symbolOfTerm(id)
+      val sym     = (importToRuntime importSymbol sym0).asTerm
+      val module  = runtimeMirror.reflectModule(sym.owner.companionSymbol.asModule).instance
+      val module1 = runtimeMirror.reflect(module)
+      val invoker = module1.reflectField(sym)
+
+      invoker.get
+    }
+
+    try Some(value()) catch { case _: Exception => None }
+  }
+
+  /** It's a bit of a shotgun approach, but for now we will gain in
+    *  robustness. Try a symbol-producing operation at phase typer, and
+    *  if that is NoSymbol, try again at phase flatten. I'll be able to
+    *  lose this and run only from exitingTyper as soon as I figure out
+    *  exactly where a flat name is sneaking in when calculating imports.
+    */
+  def tryTwice(op: => Symbol): Symbol = exitingTyper(op) orElse exitingFlatten(op)
+
+  def symbolOfIdent(id: String): Symbol  = symbolOfType(id) orElse symbolOfTerm(id)
+  def symbolOfType(id: String): Symbol   = tryTwice(replScope lookup (id: TypeName))
+  def symbolOfTerm(id: String): Symbol   = tryTwice(replScope lookup (id: TermName))
+  def symbolOfName(id: Name): Symbol     = replScope lookup id
+
+  def runtimeClassAndTypeOfTerm(id: String): Option[(JClass, Type)] = {
+    classOfTerm(id) flatMap { clazz =>
+      clazz.supers find (!_.isScalaAnonymous) map { nonAnon =>
+        (nonAnon, runtimeTypeOfTerm(id))
+      }
+    }
+  }
+
+  def runtimeTypeOfTerm(id: String): Type = {
+    typeOfTerm(id) andAlso { tpe =>
+      val clazz      = classOfTerm(id) getOrElse { return NoType }
+      val staticSym  = tpe.typeSymbol
+      val runtimeSym = getClassIfDefined(clazz.getName)
+
+      if ((runtimeSym != NoSymbol) && (runtimeSym != staticSym) && (runtimeSym isSubClass staticSym))
+        runtimeSym.info
+      else NoType
+    }
+  }
+
+  def cleanTypeAfterTyper(sym: => Symbol): Type = {
+    exitingTyper(
+      dealiasNonPublic(
+        dropNullaryMethod(
+          sym.tpe_*
+        )
+      )
+    )
+  }
+  def cleanMemberDecl(owner: Symbol, member: Name): Type =
+    cleanTypeAfterTyper(owner.info nonPrivateDecl member)
+
+  object exprTyper extends {
+    val repl: SparkIMain.this.type = imain
+  } with SparkExprTyper { }
+
+  /** Parse a line into and return parsing result (error, incomplete or success with list of trees) */
+  object parse {
+    abstract sealed class Result
+    case object Error extends Result
+    case object Incomplete extends Result
+    case class Success(trees: List[Tree]) extends Result
+
+    def apply(line: String): Result = debugging(s"""parse("$line")""")  {
+      var isIncomplete = false
+      currentRun.reporting.withIncompleteHandler((_, _) => isIncomplete = true) {
+        reporter.reset()
+        val trees = newUnitParser(line).parseStats()
+        if (reporter.hasErrors) Error
+        else if (isIncomplete) Incomplete
+        else Success(trees)
+      }
+    }
+  }
+
+  def symbolOfLine(code: String): Symbol =
+    exprTyper.symbolOfLine(code)
+
+  def typeOfExpression(expr: String, silent: Boolean = true): Type =
+    exprTyper.typeOfExpression(expr, silent)
+
+  protected def onlyTerms(xs: List[Name]): List[TermName] = xs collect { case x: TermName => x }
+  protected def onlyTypes(xs: List[Name]): List[TypeName] = xs collect { case x: TypeName => x }
+
+  def definedTerms      = onlyTerms(allDefinedNames) filterNot isInternalTermName
+  def definedTypes      = onlyTypes(allDefinedNames)
+  def definedSymbolList = prevRequestList flatMap (_.defines) filterNot (s => isInternalTermName(s.name))
+
+  // Terms with user-given names (i.e. not res0 and not synthetic)
+  def namedDefinedTerms = definedTerms filterNot (x => isUserVarName("" + x) || directlyBoundNames(x))
+
+  private var _replScope: Scope = _
+  private def resetReplScope() {
+    _replScope = newScope
+  }
+  def replScope = {
+    if (_replScope eq null)
+      _replScope = newScope
+
+    _replScope
+  }
+
+  private var executingRequest: Request = _
+  private val prevRequests       = mutable.ListBuffer[Request]()
+  private val directlyBoundNames = mutable.Set[Name]()
+
+  def allHandlers     = prevRequestList flatMap (_.handlers)
+  def lastRequest     = if (prevRequests.isEmpty) null else prevRequests.last
+  def prevRequestList = prevRequests.toList
+  def importHandlers  = allHandlers collect { case x: ImportHandler => x }
+
+  def withoutUnwrapping(op: => Unit): Unit = {
+    val saved = isettings.unwrapStrings
+    isettings.unwrapStrings = false
+    try op
+    finally isettings.unwrapStrings = saved
+  }
+
+  def symbolDefString(sym: Symbol) = {
+    TypeStrings.quieter(
+      exitingTyper(sym.defString),
+      sym.owner.name + ".this.",
+      sym.owner.fullName + "."
+    )
+  }
+
+  def showCodeIfDebugging(code: String) {
+    /** Secret bookcase entrance for repl debuggers: end the line
+      *  with "// show" and see what's going on.
+      */
+    def isShow = code.lines exists (_.trim endsWith "// show")
+    if (isReplDebug || isShow) {
+      beSilentDuring(parse(code)) match {
+        case parse.Success(ts) =>
+          ts foreach { t =>
+            withoutUnwrapping(echo(asCompactString(t)))
+          }
+        case _ =>
+      }
+    }
+  }
+
+  // debugging
+  def debugging[T](msg: String)(res: T) = {
+    repldbg(msg + " " + res)
+    res
+  }
+}
+
+/** Utility methods for the Interpreter. */
+object SparkIMain {
+  import java.util.Arrays.{ asList => asJavaList }
+
+  // The two name forms this is catching are the two sides of this assignment:
+  //
+  // $line3.$read.$iw.$iw.Bippy =
+  //   $line3.$read$$iw$$iw$Bippy@4a6a00ca
+  private def removeLineWrapper(s: String) = s.replaceAll("""\$line\d+[./]\$(read|eval|print)[$.]""", "")
+  private def removeIWPackages(s: String)  = s.replaceAll("""\$(iw|read|eval|print)[$.]""", "")
+  private def removeSparkVals(s: String) = s.replaceAll("""\$VAL[0-9]+[$.]""", "")
+  def stripString(s: String)               = removeSparkVals(removeIWPackages(removeLineWrapper(s)))
+  
+  trait CodeAssembler[T] {
+    def preamble: String
+    def generate: T => String
+    def postamble: String
+
+    def apply(contributors: List[T]): String = stringFromWriter { code =>
+      code println preamble
+      contributors map generate foreach (code println _)
+      code println postamble
+    }
+  }
+
+  trait StrippingWriter {
+    def isStripping: Boolean
+    def stripImpl(str: String): String
+    def strip(str: String): String = if (isStripping) stripImpl(str) else str
+  }
+  trait TruncatingWriter {
+    def maxStringLength: Int
+    def isTruncating: Boolean
+    def truncate(str: String): String = {
+      if (isTruncating && (maxStringLength != 0 && str.length > maxStringLength))
+        (str take maxStringLength - 3) + "..."
+      else str
+    }
+  }
+  abstract class StrippingTruncatingWriter(out: JPrintWriter)
+    extends JPrintWriter(out)
+    with StrippingWriter
+    with TruncatingWriter {
+    self =>
+
+    def clean(str: String): String = truncate(strip(str))
+    override def write(str: String) = super.write(clean(str))
+  }
+  class SparkReplStrippingWriter(intp: SparkIMain) extends StrippingTruncatingWriter(intp.out) {
+    import intp._
+    def maxStringLength    = isettings.maxPrintString
+    def isStripping        = isettings.unwrapStrings
+    def isTruncating       = reporter.truncationOK
+
+    def stripImpl(str: String): String = naming.unmangle(str)
+  }
+}
+
+/** Settings for the interpreter
+  *
+  * @version 1.0
+  * @author Lex Spoon, 2007/3/24
+  **/
+class SparkISettings(intp: SparkIMain) {
+  /** The maximum length of toString to use when printing the result
+    *  of an evaluation.  0 means no maximum.  If a printout requires
+    *  more than this number of characters, then the printout is
+    *  truncated.
+    */
+  var maxPrintString = replProps.maxPrintString.option.getOrElse(800)
+
+  /** The maximum number of completion candidates to print for tab
+    *  completion without requiring confirmation.
+    */
+  var maxAutoprintCompletion = 250
+
+  /** String unwrapping can be disabled if it is causing issues.
+    *  Setting this to false means you will see Strings like "$iw.$iw.".
+    */
+  var unwrapStrings = true
+
+  def deprecation_=(x: Boolean) = {
+    val old = intp.settings.deprecation.value
+    intp.settings.deprecation.value = x
+    if (!old && x) println("Enabled -deprecation output.")
+    else if (old && !x) println("Disabled -deprecation output.")
+  }
+  def deprecation: Boolean = intp.settings.deprecation.value
+
+  def allSettings = Map[String, Any](
+    "maxPrintString" -> maxPrintString,
+    "maxAutoprintCompletion" -> maxAutoprintCompletion,
+    "unwrapStrings" -> unwrapStrings,
+    "deprecation" -> deprecation
+  )
+
+  private def allSettingsString =
+    allSettings.toList sortBy (_._1) map { case (k, v) => "  " + k + " = " + v + "\n" } mkString
+
+  override def toString = """
+                            | SparkISettings {
+                            | %s
+                            | }""".stripMargin.format(allSettingsString)
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkImports.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkImports.scala
new file mode 100644
index 0000000000000..e60406d1e5ad8
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkImports.scala
@@ -0,0 +1,201 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Paul Phillips
+ */
+
+package scala.tools.nsc
+package interpreter
+
+import scala.collection.{ mutable, immutable }
+
+trait SparkImports {
+  self: SparkIMain =>
+
+  import global._
+  import definitions.{ ObjectClass, ScalaPackage, JavaLangPackage, PredefModule }
+  import memberHandlers._
+
+  /** Synthetic import handlers for the language defined imports. */
+  private def makeWildcardImportHandler(sym: Symbol): ImportHandler = {
+    val hd :: tl = sym.fullName.split('.').toList map newTermName
+    val tree = Import(
+      tl.foldLeft(Ident(hd): Tree)((x, y) => Select(x, y)),
+      ImportSelector.wildList
+    )
+    tree setSymbol sym
+    new ImportHandler(tree)
+  }
+
+  /** Symbols whose contents are language-defined to be imported. */
+  def languageWildcardSyms: List[Symbol] = List(JavaLangPackage, ScalaPackage, PredefModule)
+  def languageWildcardHandlers = languageWildcardSyms map makeWildcardImportHandler
+
+  def allImportedNames = importHandlers flatMap (_.importedNames)
+
+  /** Types which have been wildcard imported, such as:
+    *    val x = "abc" ; import x._  // type java.lang.String
+    *    import java.lang.String._   // object java.lang.String
+    *
+    *  Used by tab completion.
+    *
+    *  XXX right now this gets import x._ and import java.lang.String._,
+    *  but doesn't figure out import String._.  There's a lot of ad hoc
+    *  scope twiddling which should be swept away in favor of digging
+    *  into the compiler scopes.
+    */
+  def sessionWildcards: List[Type] = {
+    importHandlers filter (_.importsWildcard) map (_.targetType) distinct
+  }
+
+  def languageSymbols        = languageWildcardSyms flatMap membersAtPickler
+  def sessionImportedSymbols = importHandlers flatMap (_.importedSymbols)
+  def importedSymbols        = languageSymbols ++ sessionImportedSymbols
+  def importedTermSymbols    = importedSymbols collect { case x: TermSymbol => x }
+
+  /** Tuples of (source, imported symbols) in the order they were imported.
+    */
+  def importedSymbolsBySource: List[(Symbol, List[Symbol])] = {
+    val lang    = languageWildcardSyms map (sym => (sym, membersAtPickler(sym)))
+    val session = importHandlers filter (_.targetType != NoType) map { mh =>
+      (mh.targetType.typeSymbol, mh.importedSymbols)
+    }
+
+    lang ++ session
+  }
+  def implicitSymbolsBySource: List[(Symbol, List[Symbol])] = {
+    importedSymbolsBySource map {
+      case (k, vs) => (k, vs filter (_.isImplicit))
+    } filterNot (_._2.isEmpty)
+  }
+
+  /** Compute imports that allow definitions from previous
+    *  requests to be visible in a new request.  Returns
+    *  three pieces of related code:
+    *
+    *  1. An initial code fragment that should go before
+    *  the code of the new request.
+    *
+    *  2. A code fragment that should go after the code
+    *  of the new request.
+    *
+    *  3. An access path which can be traversed to access
+    *  any bindings inside code wrapped by #1 and #2 .
+    *
+    * The argument is a set of Names that need to be imported.
+    *
+    * Limitations: This method is not as precise as it could be.
+    * (1) It does not process wildcard imports to see what exactly
+    * they import.
+    * (2) If it imports any names from a request, it imports all
+    * of them, which is not really necessary.
+    * (3) It imports multiple same-named implicits, but only the
+    * last one imported is actually usable.
+    */
+  case class ComputedImports(prepend: String, append: String, access: String)
+  protected def importsCode(wanted: Set[Name], wrapper: Request#Wrapper, definedClass: Boolean): ComputedImports = {
+    /** Narrow down the list of requests from which imports
+      *  should be taken.  Removes requests which cannot contribute
+      *  useful imports for the specified set of wanted names.
+      */
+    case class ReqAndHandler(req: Request, handler: MemberHandler) { }
+
+    def reqsToUse: List[ReqAndHandler] = {
+      /** Loop through a list of MemberHandlers and select which ones to keep.
+        * 'wanted' is the set of names that need to be imported.
+        */
+      def select(reqs: List[ReqAndHandler], wanted: Set[Name]): List[ReqAndHandler] = {
+        // Single symbol imports might be implicits! See bug #1752.  Rather than
+        // try to finesse this, we will mimic all imports for now.
+        def keepHandler(handler: MemberHandler) = handler match {
+          case h: ImportHandler if definedClass => h.importedNames.exists(x => wanted.contains(x))
+          case _: ImportHandler => true
+          case x                => x.definesImplicit || (x.definedNames exists wanted)
+        }
+
+        reqs match {
+          case Nil                                    => Nil
+          case rh :: rest if !keepHandler(rh.handler) => select(rest, wanted)
+          case rh :: rest                             =>
+            import rh.handler._
+            val newWanted = wanted ++ referencedNames -- definedNames -- importedNames
+            rh :: select(rest, newWanted)
+        }
+      }
+
+      /** Flatten the handlers out and pair each with the original request */
+      select(allReqAndHandlers reverseMap { case (r, h) => ReqAndHandler(r, h) }, wanted).reverse
+    }
+
+    val code, trailingBraces, accessPath = new StringBuilder
+    val currentImps = mutable.HashSet[Name]()
+
+    // add code for a new object to hold some imports
+    def addWrapper() {
+      import nme.{ INTERPRETER_IMPORT_WRAPPER => iw }
+      code append (wrapper.prewrap format iw)
+      trailingBraces append wrapper.postwrap
+      accessPath append s".$iw"
+      currentImps.clear()
+    }
+
+    def maybeWrap(names: Name*) = if (names exists currentImps) addWrapper()
+
+    def wrapBeforeAndAfter[T](op: => T): T = {
+      addWrapper()
+      try op finally addWrapper()
+    }
+
+    // loop through previous requests, adding imports for each one
+    wrapBeforeAndAfter {
+      for (ReqAndHandler(req, handler) <- reqsToUse) {
+        handler match {
+          // If the user entered an import, then just use it; add an import wrapping
+          // level if the import might conflict with some other import
+          case x: ImportHandler if x.importsWildcard =>
+            wrapBeforeAndAfter(code append (x.member + "\n"))
+          case x: ImportHandler =>
+            maybeWrap(x.importedNames: _*)
+            code append (x.member + "\n")
+            currentImps ++= x.importedNames
+
+          case x: ClassHandler =>
+             for (imv <- x.definedNames) {
+              val objName = req.lineRep.readPath
+              code.append("import " + objName + ".INSTANCE" + req.accessPath + ".`" + imv + "`\n")
+            }
+
+          // For other requests, import each defined name.
+          // import them explicitly instead of with _, so that
+          // ambiguity errors will not be generated. Also, quote
+          // the name of the variable, so that we don't need to
+          // handle quoting keywords separately.
+          case x =>
+            for (imv <- x.definedNames) {
+              if (currentImps contains imv) addWrapper()
+              val objName = req.lineRep.readPath
+              val valName = "$VAL" + newValId()
+              if(!code.toString.endsWith(".`" + imv + "`;\n")) { // Which means already imported
+                code.append("val " + valName + " = " + objName + ".INSTANCE\n")
+                code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
+              }
+              currentImps += imv
+            }
+        }
+      }
+    }
+    addWrapper()
+    ComputedImports(code.toString, trailingBraces.toString, accessPath.toString)
+  }
+  private var curValId = 0
+
+  private def newValId(): Int = {
+    curValId += 1
+    curValId
+  }
+
+  private def allReqAndHandlers =
+    prevRequestList flatMap (req => req.handlers map (req -> _))
+
+  private def membersAtPickler(sym: Symbol): List[Symbol] =
+    enteringPickler(sym.info.nonPrivateMembers.toList)
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
new file mode 100644
index 0000000000000..7fe6dcb32827f
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
@@ -0,0 +1,350 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author Paul Phillips
+ */
+
+package scala.tools.nsc
+package interpreter
+
+import Completion._
+import scala.collection.mutable.ListBuffer
+import scala.reflect.internal.util.StringOps.longestCommonPrefix
+
+// REPL completor - queries supplied interpreter for valid
+// completions based on current contents of buffer.
+class SparkJLineCompletion(val intp: SparkIMain) extends Completion with CompletionOutput {
+  val global: intp.global.type = intp.global
+  import global._
+  import definitions._
+  import rootMirror.{ RootClass, getModuleIfDefined }
+  import intp.{ debugging }
+
+  // verbosity goes up with consecutive tabs
+  private var verbosity: Int = 0
+  def resetVerbosity() = verbosity = 0
+
+  def getSymbol(name: String, isModule: Boolean) = (
+    if (isModule) getModuleIfDefined(name)
+    else getModuleIfDefined(name)
+    )
+
+  trait CompilerCompletion {
+    def tp: Type
+    def effectiveTp = tp match {
+      case MethodType(Nil, resType)   => resType
+      case NullaryMethodType(resType) => resType
+      case _                          => tp
+    }
+
+    // for some reason any's members don't show up in subclasses, which
+    // we need so 5.<tab> offers asInstanceOf etc.
+    private def anyMembers = AnyTpe.nonPrivateMembers
+    def anyRefMethodsToShow = Set("isInstanceOf", "asInstanceOf", "toString")
+
+    def tos(sym: Symbol): String = sym.decodedName
+    def memberNamed(s: String) = exitingTyper(effectiveTp member newTermName(s))
+
+    // XXX we'd like to say "filterNot (_.isDeprecated)" but this causes the
+    // compiler to crash for reasons not yet known.
+    def members     = exitingTyper((effectiveTp.nonPrivateMembers.toList ++ anyMembers) filter (_.isPublic))
+    def methods     = members.toList filter (_.isMethod)
+    def packages    = members.toList filter (_.hasPackageFlag)
+    def aliases     = members.toList filter (_.isAliasType)
+
+    def memberNames   = members map tos
+    def methodNames   = methods map tos
+    def packageNames  = packages map tos
+    def aliasNames    = aliases map tos
+  }
+
+  object NoTypeCompletion extends TypeMemberCompletion(NoType) {
+    override def memberNamed(s: String) = NoSymbol
+    override def members = Nil
+    override def follow(s: String) = None
+    override def alternativesFor(id: String) = Nil
+  }
+
+  object TypeMemberCompletion {
+    def apply(tp: Type, runtimeType: Type, param: NamedParam): TypeMemberCompletion = {
+      new TypeMemberCompletion(tp) {
+        var upgraded = false
+        lazy val upgrade = {
+          intp rebind param
+          intp.reporter.printMessage("\nRebinding stable value %s from %s to %s".format(param.name, tp, param.tpe))
+          upgraded = true
+          new TypeMemberCompletion(runtimeType)
+        }
+        override def completions(verbosity: Int) = {
+          super.completions(verbosity) ++ (
+            if (verbosity == 0) Nil
+            else upgrade.completions(verbosity)
+            )
+        }
+        override def follow(s: String) = super.follow(s) orElse {
+          if (upgraded) upgrade.follow(s)
+          else None
+        }
+        override def alternativesFor(id: String) = super.alternativesFor(id) ++ (
+          if (upgraded) upgrade.alternativesFor(id)
+          else Nil
+          ) distinct
+      }
+    }
+    def apply(tp: Type): TypeMemberCompletion = {
+      if (tp eq NoType) NoTypeCompletion
+      else if (tp.typeSymbol.isPackageClass) new PackageCompletion(tp)
+      else new TypeMemberCompletion(tp)
+    }
+    def imported(tp: Type) = new ImportCompletion(tp)
+  }
+
+  class TypeMemberCompletion(val tp: Type) extends CompletionAware
+  with CompilerCompletion {
+    def excludeEndsWith: List[String] = Nil
+    def excludeStartsWith: List[String] = List("<") // <byname>, <repeated>, etc.
+    def excludeNames: List[String] = (anyref.methodNames filterNot anyRefMethodsToShow) :+ "_root_"
+
+    def methodSignatureString(sym: Symbol) = {
+      IMain stripString exitingTyper(new MethodSymbolOutput(sym).methodString())
+    }
+
+    def exclude(name: String): Boolean = (
+      (name contains "$") ||
+        (excludeNames contains name) ||
+        (excludeEndsWith exists (name endsWith _)) ||
+        (excludeStartsWith exists (name startsWith _))
+      )
+    def filtered(xs: List[String]) = xs filterNot exclude distinct
+
+    def completions(verbosity: Int) =
+      debugging(tp + " completions ==> ")(filtered(memberNames))
+
+    override def follow(s: String): Option[CompletionAware] =
+      debugging(tp + " -> '" + s + "' ==> ")(Some(TypeMemberCompletion(memberNamed(s).tpe)) filterNot (_ eq NoTypeCompletion))
+
+    override def alternativesFor(id: String): List[String] =
+      debugging(id + " alternatives ==> ") {
+        val alts = members filter (x => x.isMethod && tos(x) == id) map methodSignatureString
+
+        if (alts.nonEmpty) "" :: alts else Nil
+      }
+
+    override def toString = "%s (%d members)".format(tp, members.size)
+  }
+
+  class PackageCompletion(tp: Type) extends TypeMemberCompletion(tp) {
+    override def excludeNames = anyref.methodNames
+  }
+
+  class LiteralCompletion(lit: Literal) extends TypeMemberCompletion(lit.value.tpe) {
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(memberNames)
+      case _    => memberNames
+    }
+  }
+
+  class ImportCompletion(tp: Type) extends TypeMemberCompletion(tp) {
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(members filterNot (_.isSetter) map tos)
+      case _    => super.completions(verbosity)
+    }
+  }
+
+  // not for completion but for excluding
+  object anyref extends TypeMemberCompletion(AnyRefTpe) { }
+
+  // the unqualified vals/defs/etc visible in the repl
+  object ids extends CompletionAware {
+    override def completions(verbosity: Int) = intp.unqualifiedIds ++ List("classOf") //, "_root_")
+    // now we use the compiler for everything.
+    override def follow(id: String): Option[CompletionAware] = {
+      if (!completions(0).contains(id))
+        return None
+
+      val tpe = intp typeOfExpression id
+      if (tpe == NoType)
+        return None
+
+      def default = Some(TypeMemberCompletion(tpe))
+
+      // only rebinding vals in power mode for now.
+      if (!isReplPower) default
+      else intp runtimeClassAndTypeOfTerm id match {
+        case Some((clazz, runtimeType)) =>
+          val sym = intp.symbolOfTerm(id)
+          if (sym.isStable) {
+            val param = new NamedParam.Untyped(id, intp valueOfTerm id getOrElse null)
+            Some(TypeMemberCompletion(tpe, runtimeType, param))
+          }
+          else default
+        case _        =>
+          default
+      }
+    }
+    override def toString = "<repl ids> (%s)".format(completions(0).size)
+  }
+
+  // user-issued wildcard imports like "import global._" or "import String._"
+  private def imported = intp.sessionWildcards map TypeMemberCompletion.imported
+
+  // literal Ints, Strings, etc.
+  object literals extends CompletionAware {
+    def simpleParse(code: String): Option[Tree] = newUnitParser(code).parseStats().lastOption
+    def completions(verbosity: Int) = Nil
+
+    override def follow(id: String) = simpleParse(id).flatMap {
+      case x: Literal   => Some(new LiteralCompletion(x))
+      case _            => None
+    }
+  }
+
+  // top level packages
+  object rootClass extends TypeMemberCompletion(RootClass.tpe) {
+    override def completions(verbosity: Int) = super.completions(verbosity) :+ "_root_"
+    override def follow(id: String) = id match {
+      case "_root_" => Some(this)
+      case _        => super.follow(id)
+    }
+  }
+  // members of Predef
+  object predef extends TypeMemberCompletion(PredefModule.tpe) {
+    override def excludeEndsWith    = super.excludeEndsWith ++ List("Wrapper", "ArrayOps")
+    override def excludeStartsWith  = super.excludeStartsWith ++ List("wrap")
+    override def excludeNames       = anyref.methodNames
+
+    override def exclude(name: String) = super.exclude(name) || (
+      (name contains "2")
+      )
+
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => Nil
+      case _    => super.completions(verbosity)
+    }
+  }
+  // members of scala.*
+  object scalalang extends PackageCompletion(ScalaPackage.tpe) {
+    def arityClasses = List("Product", "Tuple", "Function")
+    def skipArity(name: String) = arityClasses exists (x => name != x && (name startsWith x))
+    override def exclude(name: String) = super.exclude(name) || (
+      skipArity(name)
+      )
+
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(packageNames ++ aliasNames)
+      case _    => super.completions(verbosity)
+    }
+  }
+  // members of java.lang.*
+  object javalang extends PackageCompletion(JavaLangPackage.tpe) {
+    override lazy val excludeEndsWith   = super.excludeEndsWith ++ List("Exception", "Error")
+    override lazy val excludeStartsWith = super.excludeStartsWith ++ List("CharacterData")
+
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(packageNames)
+      case _    => super.completions(verbosity)
+    }
+  }
+
+  // the list of completion aware objects which should be consulted
+  // for top level unqualified, it's too noisy to let much in.
+  lazy val topLevelBase: List[CompletionAware] = List(ids, rootClass, predef, scalalang, javalang, literals)
+  def topLevel = topLevelBase ++ imported
+  def topLevelThreshold = 50
+
+  // the first tier of top level objects (doesn't include file completion)
+  def topLevelFor(parsed: Parsed): List[String] = {
+    val buf = new ListBuffer[String]
+    topLevel foreach { ca =>
+      buf ++= (ca completionsFor parsed)
+
+      if (buf.size > topLevelThreshold)
+        return buf.toList.sorted
+    }
+    buf.toList
+  }
+
+  // the most recent result
+  def lastResult = Forwarder(() => ids follow intp.mostRecentVar)
+
+  def lastResultFor(parsed: Parsed) = {
+    /** The logic is a little tortured right now because normally '.' is
+      *  ignored as a delimiter, but on .<tab> it needs to be propagated.
+      */
+    val xs = lastResult completionsFor parsed
+    if (parsed.isEmpty) xs map ("." + _) else xs
+  }
+
+  def completer(): ScalaCompleter = new JLineTabCompletion
+
+  /** This gets a little bit hairy.  It's no small feat delegating everything
+    *  and also keeping track of exactly where the cursor is and where it's supposed
+    *  to end up.  The alternatives mechanism is a little hacky: if there is an empty
+    *  string in the list of completions, that means we are expanding a unique
+    *  completion, so don't update the "last" buffer because it'll be wrong.
+    */
+  class JLineTabCompletion extends ScalaCompleter {
+    // For recording the buffer on the last tab hit
+    private var lastBuf: String = ""
+    private var lastCursor: Int = -1
+
+    // Does this represent two consecutive tabs?
+    def isConsecutiveTabs(buf: String, cursor: Int) =
+      cursor == lastCursor && buf == lastBuf
+
+    // This is jline's entry point for completion.
+    override def complete(buf: String, cursor: Int): Candidates = {
+      verbosity = if (isConsecutiveTabs(buf, cursor)) verbosity + 1 else 0
+      repldbg(f"%ncomplete($buf, $cursor%d) last = ($lastBuf, $lastCursor%d), verbosity: $verbosity")
+
+      // we don't try lower priority completions unless higher ones return no results.
+      def tryCompletion(p: Parsed, completionFunction: Parsed => List[String]): Option[Candidates] = {
+        val winners = completionFunction(p)
+        if (winners.isEmpty)
+          return None
+        val newCursor =
+          if (winners contains "") p.cursor
+          else {
+            val advance = longestCommonPrefix(winners)
+            lastCursor = p.position + advance.length
+            lastBuf = (buf take p.position) + advance
+            repldbg(s"tryCompletion($p, _) lastBuf = $lastBuf, lastCursor = $lastCursor, p.position = ${p.position}")
+            p.position
+          }
+
+        Some(Candidates(newCursor, winners))
+      }
+
+      def mkDotted = Parsed.dotted(buf, cursor) withVerbosity verbosity
+
+      // a single dot is special cased to completion on the previous result
+      def lastResultCompletion =
+        if (!looksLikeInvocation(buf)) None
+        else tryCompletion(Parsed.dotted(buf drop 1, cursor), lastResultFor)
+
+      def tryAll = (
+        lastResultCompletion
+          orElse tryCompletion(mkDotted, topLevelFor)
+          getOrElse Candidates(cursor, Nil)
+        )
+
+      /**
+       *  This is the kickoff point for all manner of theoretically
+       *  possible compiler unhappiness. The fault may be here or
+       *  elsewhere, but we don't want to crash the repl regardless.
+       *  The compiler makes it impossible to avoid catching Throwable
+       *  with its unfortunate tendency to throw java.lang.Errors and
+       *  AssertionErrors as the hats drop. We take two swings at it
+       *  because there are some spots which like to throw an assertion
+       *  once, then work after that. Yeah, what can I say.
+       */
+      try tryAll
+      catch { case ex: Throwable =>
+        repldbg("Error: complete(%s, %s) provoked".format(buf, cursor) + ex)
+        Candidates(cursor,
+          if (isReplDebug) List("<error:" + ex + ">")
+          else Nil
+        )
+      }
+    }
+  }
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
new file mode 100644
index 0000000000000..0e22bc806d922
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
@@ -0,0 +1,221 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Martin Odersky
+ */
+
+package scala.tools.nsc
+package interpreter
+
+import scala.collection.{ mutable, immutable }
+import scala.language.implicitConversions
+
+trait SparkMemberHandlers {
+  val intp: SparkIMain
+
+  import intp.{ Request, global, naming }
+  import global._
+  import naming._
+
+  private def codegenln(leadingPlus: Boolean, xs: String*): String = codegen(leadingPlus, (xs ++ Array("\n")): _*)
+  private def codegenln(xs: String*): String = codegenln(true, xs: _*)
+  private def codegen(leadingPlus: Boolean, xs: String*): String = {
+    val front = if (leadingPlus) "+ " else ""
+    front + (xs map string2codeQuoted mkString " + ")
+  }
+  private implicit def name2string(name: Name) = name.toString
+
+  /** A traverser that finds all mentioned identifiers, i.e. things
+    *  that need to be imported.  It might return extra names.
+    */
+  private class ImportVarsTraverser extends Traverser {
+    val importVars = new mutable.HashSet[Name]()
+
+    override def traverse(ast: Tree) = ast match {
+      case Ident(name) =>
+        // XXX this is obviously inadequate but it's going to require some effort
+        // to get right.
+        if (name.toString startsWith "x$") ()
+        else importVars += name
+      case _        => super.traverse(ast)
+    }
+  }
+  private object ImportVarsTraverser {
+    def apply(member: Tree) = {
+      val ivt = new ImportVarsTraverser()
+      ivt traverse member
+      ivt.importVars.toList
+    }
+  }
+
+  private def isTermMacro(ddef: DefDef): Boolean = ddef.mods.isMacro
+
+  def chooseHandler(member: Tree): MemberHandler = member match {
+    case member: DefDef if isTermMacro(member) => new TermMacroHandler(member)
+    case member: DefDef                        => new DefHandler(member)
+    case member: ValDef                        => new ValHandler(member)
+    case member: ModuleDef                     => new ModuleHandler(member)
+    case member: ClassDef                      => new ClassHandler(member)
+    case member: TypeDef                       => new TypeAliasHandler(member)
+    case member: Assign                        => new AssignHandler(member)
+    case member: Import                        => new ImportHandler(member)
+    case DocDef(_, documented)                 => chooseHandler(documented)
+    case member                                => new GenericHandler(member)
+  }
+
+  sealed abstract class MemberDefHandler(override val member: MemberDef) extends MemberHandler(member) {
+    override def name: Name = member.name
+    def mods: Modifiers     = member.mods
+    def keyword             = member.keyword
+    def prettyName          = name.decode
+
+    override def definesImplicit = member.mods.isImplicit
+    override def definesTerm: Option[TermName] = Some(name.toTermName) filter (_ => name.isTermName)
+    override def definesType: Option[TypeName] = Some(name.toTypeName) filter (_ => name.isTypeName)
+    override def definedSymbols = if (symbol.exists) symbol :: Nil else Nil
+  }
+
+  /** Class to handle one member among all the members included
+    *  in a single interpreter request.
+    */
+  sealed abstract class MemberHandler(val member: Tree) {
+    def name: Name      = nme.NO_NAME
+    def path            = intp.originalPath(symbol).replaceFirst("read", "read.INSTANCE")
+    def symbol          = if (member.symbol eq null) NoSymbol else member.symbol
+    def definesImplicit = false
+    def definesValue    = false
+
+    def definesTerm     = Option.empty[TermName]
+    def definesType     = Option.empty[TypeName]
+
+    private lazy val _referencedNames = ImportVarsTraverser(member)
+    def referencedNames = _referencedNames
+    def importedNames   = List[Name]()
+    def definedNames    = definesTerm.toList ++ definesType.toList
+    def definedSymbols  = List[Symbol]()
+
+    def extraCodeToEvaluate(req: Request): String = ""
+    def resultExtractionCode(req: Request): String = ""
+
+    private def shortName = this.getClass.toString split '.' last
+    override def toString = shortName + referencedNames.mkString(" (refs: ", ", ", ")")
+  }
+
+  class GenericHandler(member: Tree) extends MemberHandler(member)
+
+  class ValHandler(member: ValDef) extends MemberDefHandler(member) {
+    val maxStringElements = 1000  // no need to mkString billions of elements
+    override def definesValue = true
+
+    override def resultExtractionCode(req: Request): String = {
+
+      val isInternal = isUserVarName(name) && req.lookupTypeOf(name) == "Unit"
+      if (!mods.isPublic || isInternal) ""
+      else {
+        // if this is a lazy val we avoid evaluating it here
+        val resultString =
+          if (mods.isLazy) codegenln(false, "<lazy>")
+          else any2stringOf(path, maxStringElements)
+
+        val vidString =
+          if (replProps.vids) s"""" + " @ " + "%%8x".format(System.identityHashCode($path)) + " """.trim
+          else ""
+
+        """ + "%s%s: %s = " + %s""".format(string2code(prettyName), vidString, string2code(req typeOf name), resultString)
+      }
+    }
+  }
+
+  class DefHandler(member: DefDef) extends MemberDefHandler(member) {
+    override def definesValue = flattensToEmpty(member.vparamss) // true if 0-arity
+    override def resultExtractionCode(req: Request) =
+      if (mods.isPublic) codegenln(name, ": ", req.typeOf(name)) else ""
+  }
+
+  abstract class MacroHandler(member: DefDef) extends MemberDefHandler(member) {
+    override def referencedNames = super.referencedNames.flatMap(name => List(name.toTermName, name.toTypeName))
+    override def definesValue = false
+    override def definesTerm: Option[TermName] = Some(name.toTermName)
+    override def definesType: Option[TypeName] = None
+    override def resultExtractionCode(req: Request) = if (mods.isPublic) codegenln(notification(req)) else ""
+    def notification(req: Request): String
+  }
+
+  class TermMacroHandler(member: DefDef) extends MacroHandler(member) {
+    def notification(req: Request) = s"defined term macro $name: ${req.typeOf(name)}"
+  }
+
+  class AssignHandler(member: Assign) extends MemberHandler(member) {
+    val Assign(lhs, rhs) = member
+    override lazy val name = newTermName(freshInternalVarName())
+
+    override def definesTerm = Some(name)
+    override def definesValue = true
+    override def extraCodeToEvaluate(req: Request) =
+      """val %s = %s""".format(name, lhs)
+
+    /** Print out lhs instead of the generated varName */
+    override def resultExtractionCode(req: Request) = {
+      val lhsType = string2code(req lookupTypeOf name)
+      val res     = string2code(req fullPath name)
+      """ + "%s: %s = " + %s + "\n" """.format(string2code(lhs.toString), lhsType, res) + "\n"
+    }
+  }
+
+  class ModuleHandler(module: ModuleDef) extends MemberDefHandler(module) {
+    override def definesTerm = Some(name.toTermName)
+    override def definesValue = true
+
+    override def resultExtractionCode(req: Request) = codegenln("defined object ", name)
+  }
+
+  class ClassHandler(member: ClassDef) extends MemberDefHandler(member) {
+    override def definedSymbols = List(symbol, symbol.companionSymbol) filterNot (_ == NoSymbol)
+    override def definesType = Some(name.toTypeName)
+    override def definesTerm = Some(name.toTermName) filter (_ => mods.isCase)
+
+    override def resultExtractionCode(req: Request) =
+      codegenln("defined %s %s".format(keyword, name))
+  }
+
+  class TypeAliasHandler(member: TypeDef) extends MemberDefHandler(member) {
+    private def isAlias = mods.isPublic && treeInfo.isAliasTypeDef(member)
+    override def definesType = Some(name.toTypeName) filter (_ => isAlias)
+
+    override def resultExtractionCode(req: Request) =
+      codegenln("defined type alias ", name) + "\n"
+  }
+
+  class ImportHandler(imp: Import) extends MemberHandler(imp) {
+    val Import(expr, selectors) = imp
+    def targetType = intp.global.rootMirror.getModuleIfDefined("" + expr) match {
+      case NoSymbol => intp.typeOfExpression("" + expr)
+      case sym      => sym.thisType
+    }
+    private def importableTargetMembers = importableMembers(targetType).toList
+    // wildcard imports, e.g. import foo._
+    private def selectorWild    = selectors filter (_.name == nme.USCOREkw)
+    // renamed imports, e.g. import foo.{ bar => baz }
+    private def selectorRenames = selectors map (_.rename) filterNot (_ == null)
+
+    /** Whether this import includes a wildcard import */
+    val importsWildcard = selectorWild.nonEmpty
+
+    def implicitSymbols = importedSymbols filter (_.isImplicit)
+    def importedSymbols = individualSymbols ++ wildcardSymbols
+
+    private val selectorNames = selectorRenames filterNot (_ == nme.USCOREkw) flatMap (_.bothNames) toSet
+    lazy val individualSymbols: List[Symbol] = exitingTyper(importableTargetMembers filter (m => selectorNames(m.name)))
+    lazy val wildcardSymbols: List[Symbol]   = exitingTyper(if (importsWildcard) importableTargetMembers else Nil)
+
+    /** Complete list of names imported by a wildcard */
+    lazy val wildcardNames: List[Name]   = wildcardSymbols map (_.name)
+    lazy val individualNames: List[Name] = individualSymbols map (_.name)
+
+    /** The names imported by this statement */
+    override lazy val importedNames: List[Name] = wildcardNames ++ individualNames
+    lazy val importsSymbolNamed: Set[String] = importedNames map (_.toString) toSet
+
+    def importString = imp.toString
+    override def resultExtractionCode(req: Request) = codegenln(importString) + "\n"
+  }
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkReplReporter.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkReplReporter.scala
new file mode 100644
index 0000000000000..0711ed4871bb6
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkReplReporter.scala
@@ -0,0 +1,53 @@
+/* NSC -- new Scala compiler
+ * Copyright 2002-2013 LAMP/EPFL
+ * @author Paul Phillips
+ */
+
+package scala.tools.nsc
+package interpreter
+
+import reporters._
+import SparkIMain._
+
+import scala.reflect.internal.util.Position
+
+/** Like ReplGlobal, a layer for ensuring extra functionality.
+  */
+class SparkReplReporter(intp: SparkIMain) extends ConsoleReporter(intp.settings, Console.in, new SparkReplStrippingWriter(intp)) {
+  def printUntruncatedMessage(msg: String) = withoutTruncating(printMessage(msg))
+
+  /** Whether very long lines can be truncated.  This exists so important
+    *  debugging information (like printing the classpath) is not rendered
+    *  invisible due to the max message length.
+    */
+  private var _truncationOK: Boolean = !intp.settings.verbose
+  def truncationOK = _truncationOK
+  def withoutTruncating[T](body: => T): T = {
+    val saved = _truncationOK
+    _truncationOK = false
+    try body
+    finally _truncationOK = saved
+  }
+
+  override def warning(pos: Position, msg: String): Unit = withoutTruncating(super.warning(pos, msg))
+  override def error(pos: Position, msg: String): Unit   = withoutTruncating(super.error(pos, msg))
+
+  override def printMessage(msg: String) {
+    // Avoiding deadlock if the compiler starts logging before
+    // the lazy val is complete.
+    if (intp.isInitializeComplete) {
+      if (intp.totalSilence) {
+        if (isReplTrace)
+          super.printMessage("[silent] " + msg)
+      }
+      else super.printMessage(msg)
+    }
+    else Console.println("[init] " + msg)
+  }
+
+  override def displayPrompt() {
+    if (intp.totalSilence) ()
+    else super.displayPrompt()
+  }
+
+}
diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
new file mode 100644
index 0000000000000..f966f25c5a14c
--- /dev/null
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -0,0 +1,326 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import java.io._
+import java.net.URLClassLoader
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.Await
+import scala.concurrent.duration._
+import scala.tools.nsc.interpreter.SparkILoop
+
+import com.google.common.io.Files
+import org.scalatest.FunSuite
+import org.apache.commons.lang3.StringEscapeUtils
+import org.apache.spark.SparkContext
+import org.apache.spark.util.Utils
+
+
+
+class ReplSuite extends FunSuite {
+
+  def runInterpreter(master: String, input: String): String = {
+    val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath"
+
+    val in = new BufferedReader(new StringReader(input + "\n"))
+    val out = new StringWriter()
+    val cl = getClass.getClassLoader
+    var paths = new ArrayBuffer[String]
+    if (cl.isInstanceOf[URLClassLoader]) {
+      val urlLoader = cl.asInstanceOf[URLClassLoader]
+      for (url <- urlLoader.getURLs) {
+        if (url.getProtocol == "file") {
+          paths += url.getFile
+        }
+      }
+    }
+    val classpath = paths.mkString(File.pathSeparator)
+
+    val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
+    System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
+
+    System.setProperty("spark.master", master)
+    val interp = {
+      new SparkILoop(in, new PrintWriter(out))
+    }
+    org.apache.spark.repl.Main.interp = interp
+    Main.s.processArguments(List("-classpath", classpath), true)
+    Main.main(Array()) // call main
+    org.apache.spark.repl.Main.interp = null
+
+    if (oldExecutorClasspath != null) {
+      System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath)
+    } else {
+      System.clearProperty(CONF_EXECUTOR_CLASSPATH)
+    }
+    return out.toString
+  }
+
+  def assertContains(message: String, output: String) {
+    val isContain = output.contains(message)
+    assert(isContain,
+      "Interpreter output did not contain '" + message + "':\n" + output)
+  }
+
+  def assertDoesNotContain(message: String, output: String) {
+    val isContain = output.contains(message)
+    assert(!isContain,
+      "Interpreter output contained '" + message + "':\n" + output)
+  }
+
+  test("propagation of local properties") {
+    // A mock ILoop that doesn't install the SIGINT handler.
+    class ILoop(out: PrintWriter) extends SparkILoop(None, out) {
+      settings = new scala.tools.nsc.Settings
+      settings.usejavacp.value = true
+      org.apache.spark.repl.Main.interp = this
+      override def createInterpreter() {
+        intp = new SparkILoopInterpreter
+        intp.setContextClassLoader()
+      }
+    }
+
+    val out = new StringWriter()
+    Main.interp = new ILoop(new PrintWriter(out))
+    Main.sparkContext = new SparkContext("local", "repl-test")
+    Main.interp.createInterpreter()
+
+    Main.sparkContext.setLocalProperty("someKey", "someValue")
+
+    // Make sure the value we set in the caller to interpret is propagated in the thread that
+    // interprets the command.
+    Main.interp.interpret("org.apache.spark.repl.Main.sparkContext.getLocalProperty(\"someKey\")")
+    assert(out.toString.contains("someValue"))
+
+    Main.sparkContext.stop()
+    System.clearProperty("spark.driver.port")
+  }
+
+  test("simple foreach with accumulator") {
+    val output = runInterpreter("local",
+      """
+        |val accum = sc.accumulator(0)
+        |sc.parallelize(1 to 10).foreach(x => accum += x)
+        |accum.value
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res1: Int = 55", output)
+  }
+
+  test("external vars") {
+    val output = runInterpreter("local",
+      """
+        |var v = 7
+        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+  }
+
+  test("external classes") {
+    val output = runInterpreter("local",
+      """
+        |class C {
+        |def foo = 5
+        |}
+        |sc.parallelize(1 to 10).map(x => (new C).foo).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 50", output)
+  }
+
+  test("external functions") {
+    val output = runInterpreter("local",
+      """
+        |def double(x: Int) = x + x
+        |sc.parallelize(1 to 10).map(x => double(x)).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 110", output)
+  }
+
+  test("external functions that access vars") {
+    val output = runInterpreter("local",
+      """
+        |var v = 7
+        |def getV() = v
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+  }
+
+  test("broadcast vars") {
+    // Test that the value that a broadcast var had when it was created is used,
+    // even if that variable is then modified in the driver program
+    // TODO: This doesn't actually work for arrays when we run in local mode!
+    val output = runInterpreter("local",
+      """
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |array(0) = 5
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    assertContains("res2: Array[Int] = Array(5, 0, 0, 0, 0)", output)
+  }
+
+  test("interacting with files") {
+    val tempDir = Files.createTempDir()
+    tempDir.deleteOnExit()
+    val out = new FileWriter(tempDir + "/input")
+    out.write("Hello world!\n")
+    out.write("What's up?\n")
+    out.write("Goodbye\n")
+    out.close()
+    val output = runInterpreter("local",
+      """
+        |var file = sc.textFile("%s").cache()
+        |file.count()
+        |file.count()
+        |file.count()
+      """.stripMargin.format(StringEscapeUtils.escapeJava(
+        tempDir.getAbsolutePath + File.separator + "input")))
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Long = 3", output)
+    assertContains("res1: Long = 3", output)
+    assertContains("res2: Long = 3", output)
+    Utils.deleteRecursively(tempDir)
+  }
+
+  test("local-cluster mode") {
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |var v = 7
+        |def getV() = v
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |array(0) = 5
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+    assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+  }
+
+  test("SPARK-1199 two instances of same class don't type check.") {
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |case class Sum(exp: String, exp2: String)
+        |val a = Sum("A", "B")
+        |def b(a: Sum): String = a match { case Sum(_, _) => "Found Sum" }
+        |b(a)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2452 compound statements.") {
+    val output = runInterpreter("local",
+      """
+        |val x = 4 ; def f() = x
+        |f()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2576 importing SQLContext.createSchemaRDD.") {
+    // We need to use local-cluster to test this case.
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
+        |import sqlContext.createSchemaRDD
+        |case class TestCaseClass(value: Int)
+        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toSchemaRDD.collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2632 importing a method from non serializable class and not using it.") {
+    val output = runInterpreter("local",
+    """
+      |class TestClass() { def testMethod = 3 }
+      |val t = new TestClass
+      |import t.testMethod
+      |case class TestCaseClass(value: Int)
+      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect
+    """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  if (System.getenv("MESOS_NATIVE_LIBRARY") != null) {
+    test("running on Mesos") {
+      val output = runInterpreter("localquiet",
+        """
+          |var v = 7
+          |def getV() = v
+          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |v = 10
+          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |var array = new Array[Int](5)
+          |val broadcastArray = sc.broadcast(array)
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+          |array(0) = 5
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        """.stripMargin)
+      assertDoesNotContain("error:", output)
+      assertDoesNotContain("Exception", output)
+      assertContains("res0: Int = 70", output)
+      assertContains("res1: Int = 100", output)
+      assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+      assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    }
+  }
+
+  test("collecting objects of class defined in repl") {
+    val output = runInterpreter("local[2]",
+      """
+        |case class Foo(i: Int)
+        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("ret: Array[Foo] = Array(Foo(1),", output)
+  }
+}
diff --git a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index 687e85ca94d3c..5ee325008a5cd 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -21,10 +21,10 @@ import java.io.{ByteArrayOutputStream, InputStream}
 import java.net.{URI, URL, URLEncoder}
 import java.util.concurrent.{Executors, ExecutorService}
 
-import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 
-import org.apache.spark.SparkEnv
+import org.apache.spark.{SparkConf, SparkEnv}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.util.Utils
 import org.apache.spark.util.ParentClassLoader
 
@@ -36,7 +36,7 @@ import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._
  * used to load classes defined by the interpreter when the REPL is used.
  * Allows the user to specify if user class path should be first
  */
-class ExecutorClassLoader(classUri: String, parent: ClassLoader,
+class ExecutorClassLoader(conf: SparkConf, classUri: String, parent: ClassLoader,
     userClassPathFirst: Boolean) extends ClassLoader {
   val uri = new URI(classUri)
   val directory = uri.getPath
@@ -48,7 +48,7 @@ class ExecutorClassLoader(classUri: String, parent: ClassLoader,
     if (uri.getScheme() == "http") {
       null
     } else {
-      FileSystem.get(uri, new Configuration())
+      FileSystem.get(uri, SparkHadoopUtil.get.newConfiguration(conf))
     }
   }
 
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
deleted file mode 100644
index e1db4d5395ab9..0000000000000
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ /dev/null
@@ -1,1062 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author Alexander Spoon
- */
-
-package org.apache.spark.repl
-
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import scala.tools.nsc.interpreter.{ Results => IR }
-import Predef.{ println => _, _ }
-import java.io.{ BufferedReader, FileReader }
-import java.util.concurrent.locks.ReentrantLock
-import scala.sys.process.Process
-import scala.tools.nsc.interpreter.session._
-import scala.util.Properties.{ jdkHome, javaVersion }
-import scala.tools.util.{ Javap }
-import scala.annotation.tailrec
-import scala.collection.mutable.ListBuffer
-import scala.concurrent.ops
-import scala.tools.nsc.util.{ ClassPath, Exceptional, stringFromWriter, stringFromStream }
-import scala.tools.nsc.interpreter._
-import scala.tools.nsc.io.{ File, Directory }
-import scala.reflect.NameTransformer._
-import scala.tools.nsc.util.ScalaClassLoader
-import scala.tools.nsc.util.ScalaClassLoader._
-import scala.tools.util._
-import scala.language.{implicitConversions, existentials}
-import scala.reflect.{ClassTag, classTag}
-import scala.tools.reflect.StdRuntimeTags._
-
-import java.lang.{Class => jClass}
-import scala.reflect.api.{Mirror, TypeCreator, Universe => ApiUniverse}
-
-import org.apache.spark.Logging
-import org.apache.spark.SparkConf
-import org.apache.spark.SparkContext
-import org.apache.spark.util.Utils
-
-/** The Scala interactive shell.  It provides a read-eval-print loop
- *  around the Interpreter class.
- *  After instantiation, clients should call the main() method.
- *
- *  If no in0 is specified, then input will come from the console, and
- *  the class will attempt to provide input editing feature such as
- *  input history.
- *
- *  @author Moez A. Abdel-Gawad
- *  @author  Lex Spoon
- *  @version 1.2
- */
-class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter,
-               val master: Option[String])
-                extends AnyRef
-                   with LoopCommands
-                   with SparkILoopInit
-                   with Logging
-{
-  def this(in0: BufferedReader, out: JPrintWriter, master: String) = this(Some(in0), out, Some(master))
-  def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out, None)
-  def this() = this(None, new JPrintWriter(Console.out, true), None)
-
-  var in: InteractiveReader = _   // the input stream from which commands come
-  var settings: Settings = _
-  var intp: SparkIMain = _
-
-  @deprecated("Use `intp` instead.", "2.9.0") def interpreter = intp
-  @deprecated("Use `intp` instead.", "2.9.0") def interpreter_= (i: SparkIMain): Unit = intp = i
-
-  /** Having inherited the difficult "var-ness" of the repl instance,
-   *  I'm trying to work around it by moving operations into a class from
-   *  which it will appear a stable prefix.
-   */
-  private def onIntp[T](f: SparkIMain => T): T = f(intp)
-
-  class IMainOps[T <: SparkIMain](val intp: T) {
-    import intp._
-    import global._
-
-    def printAfterTyper(msg: => String) =
-      intp.reporter printMessage afterTyper(msg)
-
-    /** Strip NullaryMethodType artifacts. */
-    private def replInfo(sym: Symbol) = {
-      sym.info match {
-        case NullaryMethodType(restpe) if sym.isAccessor  => restpe
-        case info                                         => info
-      }
-    }
-    def echoTypeStructure(sym: Symbol) =
-      printAfterTyper("" + deconstruct.show(replInfo(sym)))
-
-    def echoTypeSignature(sym: Symbol, verbose: Boolean) = {
-      if (verbose) SparkILoop.this.echo("// Type signature")
-      printAfterTyper("" + replInfo(sym))
-
-      if (verbose) {
-        SparkILoop.this.echo("\n// Internal Type structure")
-        echoTypeStructure(sym)
-      }
-    }
-  }
-  implicit def stabilizeIMain(intp: SparkIMain) = new IMainOps[intp.type](intp)
-
-  /** TODO -
-   *  -n normalize
-   *  -l label with case class parameter names
-   *  -c complete - leave nothing out
-   */
-  private def typeCommandInternal(expr: String, verbose: Boolean): Result = {
-    onIntp { intp =>
-      val sym = intp.symbolOfLine(expr)
-      if (sym.exists) intp.echoTypeSignature(sym, verbose)
-      else ""
-    }
-  }
-
-  var sparkContext: SparkContext = _
-
-  override def echoCommandMessage(msg: String) {
-    intp.reporter printMessage msg
-  }
-
-  // def isAsync = !settings.Yreplsync.value
-  def isAsync = false
-  // lazy val power = new Power(intp, new StdReplVals(this))(tagOfStdReplVals, classTag[StdReplVals])
-  def history = in.history
-
-  /** The context class loader at the time this object was created */
-  protected val originalClassLoader = Utils.getContextOrSparkClassLoader
-
-  // classpath entries added via :cp
-  var addedClasspath: String = ""
-
-  /** A reverse list of commands to replay if the user requests a :replay */
-  var replayCommandStack: List[String] = Nil
-
-  /** A list of commands to replay if the user requests a :replay */
-  def replayCommands = replayCommandStack.reverse
-
-  /** Record a command for replay should the user request a :replay */
-  def addReplay(cmd: String) = replayCommandStack ::= cmd
-
-  def savingReplayStack[T](body: => T): T = {
-    val saved = replayCommandStack
-    try body
-    finally replayCommandStack = saved
-  }
-  def savingReader[T](body: => T): T = {
-    val saved = in
-    try body
-    finally in = saved
-  }
-
-
-  def sparkCleanUp(){
-    echo("Stopping spark context.")
-    intp.beQuietDuring {
-      command("sc.stop()")
-    }
-  }
-  /** Close the interpreter and set the var to null. */
-  def closeInterpreter() {
-    if (intp ne null) {
-      sparkCleanUp()
-      intp.close()
-      intp = null
-    }
-  }
-
-  class SparkILoopInterpreter extends SparkIMain(settings, out) {
-    outer =>
-
-    override lazy val formatting = new Formatting {
-      def prompt = SparkILoop.this.prompt
-    }
-    override protected def parentClassLoader = SparkHelper.explicitParentLoader(settings).getOrElse(classOf[SparkILoop].getClassLoader)
-  }
-
-  /** Create a new interpreter. */
-  def createInterpreter() {
-    require(settings != null)
-
-    if (addedClasspath != "") settings.classpath.append(addedClasspath)
-    // work around for Scala bug
-    val totalClassPath = SparkILoop.getAddedJars.foldLeft(
-      settings.classpath.value)((l, r) => ClassPath.join(l, r))
-    this.settings.classpath.value = totalClassPath
-
-    intp = new SparkILoopInterpreter
-  }
-
-  /** print a friendly help message */
-  def helpCommand(line: String): Result = {
-    if (line == "") helpSummary()
-    else uniqueCommand(line) match {
-      case Some(lc) => echo("\n" + lc.longHelp)
-      case _        => ambiguousError(line)
-    }
-  }
-  private def helpSummary() = {
-    val usageWidth  = commands map (_.usageMsg.length) max
-    val formatStr   = "%-" + usageWidth + "s %s %s"
-
-    echo("All commands can be abbreviated, e.g. :he instead of :help.")
-    echo("Those marked with a * have more detailed help, e.g. :help imports.\n")
-
-    commands foreach { cmd =>
-      val star = if (cmd.hasLongHelp) "*" else " "
-      echo(formatStr.format(cmd.usageMsg, star, cmd.help))
-    }
-  }
-  private def ambiguousError(cmd: String): Result = {
-    matchingCommands(cmd) match {
-      case Nil  => echo(cmd + ": no such command.  Type :help for help.")
-      case xs   => echo(cmd + " is ambiguous: did you mean " + xs.map(":" + _.name).mkString(" or ") + "?")
-    }
-    Result(true, None)
-  }
-  private def matchingCommands(cmd: String) = commands filter (_.name startsWith cmd)
-  private def uniqueCommand(cmd: String): Option[LoopCommand] = {
-    // this lets us add commands willy-nilly and only requires enough command to disambiguate
-    matchingCommands(cmd) match {
-      case List(x)  => Some(x)
-      // exact match OK even if otherwise appears ambiguous
-      case xs       => xs find (_.name == cmd)
-    }
-  }
-
-  /** Show the history */
-  lazy val historyCommand = new LoopCommand("history", "show the history (optional num is commands to show)") {
-    override def usage = "[num]"
-    def defaultLines = 20
-
-    def apply(line: String): Result = {
-      if (history eq NoHistory)
-        return "No history available."
-
-      val xs      = words(line)
-      val current = history.index
-      val count   = try xs.head.toInt catch { case _: Exception => defaultLines }
-      val lines   = history.asStrings takeRight count
-      val offset  = current - lines.size + 1
-
-      for ((line, index) <- lines.zipWithIndex)
-        echo("%3d  %s".format(index + offset, line))
-    }
-  }
-
-  // When you know you are most likely breaking into the middle
-  // of a line being typed.  This softens the blow.
-  protected def echoAndRefresh(msg: String) = {
-    echo("\n" + msg)
-    in.redrawLine()
-  }
-  protected def echo(msg: String) = {
-    out println msg
-    out.flush()
-  }
-  protected def echoNoNL(msg: String) = {
-    out print msg
-    out.flush()
-  }
-
-  /** Search the history */
-  def searchHistory(_cmdline: String) {
-    val cmdline = _cmdline.toLowerCase
-    val offset  = history.index - history.size + 1
-
-    for ((line, index) <- history.asStrings.zipWithIndex ; if line.toLowerCase contains cmdline)
-      echo("%d %s".format(index + offset, line))
-  }
-
-  private var currentPrompt = Properties.shellPromptString
-  def setPrompt(prompt: String) = currentPrompt = prompt
-  /** Prompt to print when awaiting input */
-  def prompt = currentPrompt
-
-  import LoopCommand.{ cmd, nullary }
-
-  /** Standard commands */
-  lazy val standardCommands = List(
-    cmd("cp", "<path>", "add a jar or directory to the classpath", addClasspath),
-    cmd("help", "[command]", "print this summary or command-specific help", helpCommand),
-    historyCommand,
-    cmd("h?", "<string>", "search the history", searchHistory),
-    cmd("imports", "[name name ...]", "show import history, identifying sources of names", importsCommand),
-    cmd("implicits", "[-v]", "show the implicits in scope", implicitsCommand),
-    cmd("javap", "<path|class>", "disassemble a file or class name", javapCommand),
-    cmd("load", "<path>", "load and interpret a Scala file", loadCommand),
-    nullary("paste", "enter paste mode: all input up to ctrl-D compiled together", pasteCommand),
-//    nullary("power", "enable power user mode", powerCmd),
-    nullary("quit", "exit the repl", () => Result(false, None)),
-    nullary("replay", "reset execution and replay all previous commands", replay),
-    nullary("reset", "reset the repl to its initial state, forgetting all session entries", resetCommand),
-    shCommand,
-    nullary("silent", "disable/enable automatic printing of results", verbosity),
-    cmd("type", "[-v] <expr>", "display the type of an expression without evaluating it", typeCommand),
-    nullary("warnings", "show the suppressed warnings from the most recent line which had any", warningsCommand)
-  )
-
-  /** Power user commands */
-  lazy val powerCommands: List[LoopCommand] = List(
-    // cmd("phase", "<phase>", "set the implicit phase for power commands", phaseCommand)
-  )
-
-  // private def dumpCommand(): Result = {
-  //   echo("" + power)
-  //   history.asStrings takeRight 30 foreach echo
-  //   in.redrawLine()
-  // }
-  // private def valsCommand(): Result = power.valsDescription
-
-  private val typeTransforms = List(
-    "scala.collection.immutable." -> "immutable.",
-    "scala.collection.mutable."   -> "mutable.",
-    "scala.collection.generic."   -> "generic.",
-    "java.lang."                  -> "jl.",
-    "scala.runtime."              -> "runtime."
-  )
-
-  private def importsCommand(line: String): Result = {
-    val tokens    = words(line)
-    val handlers  = intp.languageWildcardHandlers ++ intp.importHandlers
-    val isVerbose = tokens contains "-v"
-
-    handlers.filterNot(_.importedSymbols.isEmpty).zipWithIndex foreach {
-      case (handler, idx) =>
-        val (types, terms) = handler.importedSymbols partition (_.name.isTypeName)
-        val imps           = handler.implicitSymbols
-        val found          = tokens filter (handler importsSymbolNamed _)
-        val typeMsg        = if (types.isEmpty) "" else types.size + " types"
-        val termMsg        = if (terms.isEmpty) "" else terms.size + " terms"
-        val implicitMsg    = if (imps.isEmpty) "" else imps.size + " are implicit"
-        val foundMsg       = if (found.isEmpty) "" else found.mkString(" // imports: ", ", ", "")
-        val statsMsg       = List(typeMsg, termMsg, implicitMsg) filterNot (_ == "") mkString ("(", ", ", ")")
-
-        intp.reporter.printMessage("%2d) %-30s %s%s".format(
-          idx + 1,
-          handler.importString,
-          statsMsg,
-          foundMsg
-        ))
-    }
-  }
-
-  private def implicitsCommand(line: String): Result = onIntp { intp =>
-    import intp._
-    import global._
-
-    def p(x: Any) = intp.reporter.printMessage("" + x)
-
-    // If an argument is given, only show a source with that
-    // in its name somewhere.
-    val args     = line split "\\s+"
-    val filtered = intp.implicitSymbolsBySource filter {
-      case (source, syms) =>
-        (args contains "-v") || {
-          if (line == "") (source.fullName.toString != "scala.Predef")
-          else (args exists (source.name.toString contains _))
-        }
-    }
-
-    if (filtered.isEmpty)
-      return "No implicits have been imported other than those in Predef."
-
-    filtered foreach {
-      case (source, syms) =>
-        p("/* " + syms.size + " implicit members imported from " + source.fullName + " */")
-
-        // This groups the members by where the symbol is defined
-        val byOwner = syms groupBy (_.owner)
-        val sortedOwners = byOwner.toList sortBy { case (owner, _) => afterTyper(source.info.baseClasses indexOf owner) }
-
-        sortedOwners foreach {
-          case (owner, members) =>
-            // Within each owner, we cluster results based on the final result type
-            // if there are more than a couple, and sort each cluster based on name.
-            // This is really just trying to make the 100 or so implicits imported
-            // by default into something readable.
-            val memberGroups: List[List[Symbol]] = {
-              val groups = members groupBy (_.tpe.finalResultType) toList
-              val (big, small) = groups partition (_._2.size > 3)
-              val xss = (
-                (big sortBy (_._1.toString) map (_._2)) :+
-                (small flatMap (_._2))
-              )
-
-              xss map (xs => xs sortBy (_.name.toString))
-            }
-
-            val ownerMessage = if (owner == source) " defined in " else " inherited from "
-            p("  /* " + members.size + ownerMessage + owner.fullName + " */")
-
-            memberGroups foreach { group =>
-              group foreach (s => p("  " + intp.symbolDefString(s)))
-              p("")
-            }
-        }
-        p("")
-    }
-  }
-
-  private def findToolsJar() = {
-    val jdkPath = Directory(jdkHome)
-    val jar     = jdkPath / "lib" / "tools.jar" toFile;
-
-    if (jar isFile)
-      Some(jar)
-    else if (jdkPath.isDirectory)
-      jdkPath.deepFiles find (_.name == "tools.jar")
-    else None
-  }
-  private def addToolsJarToLoader() = {
-    val cl = findToolsJar match {
-      case Some(tools) => ScalaClassLoader.fromURLs(Seq(tools.toURL), intp.classLoader)
-      case _           => intp.classLoader
-    }
-    if (Javap.isAvailable(cl)) {
-      logDebug(":javap available.")
-      cl
-    }
-    else {
-      logDebug(":javap unavailable: no tools.jar at " + jdkHome)
-      intp.classLoader
-    }
-  }
-
-  protected def newJavap() = new JavapClass(addToolsJarToLoader(), new SparkIMain.ReplStrippingWriter(intp)) {
-    override def tryClass(path: String): Array[Byte] = {
-      val hd :: rest = path split '.' toList;
-      // If there are dots in the name, the first segment is the
-      // key to finding it.
-      if (rest.nonEmpty) {
-        intp optFlatName hd match {
-          case Some(flat) =>
-            val clazz = flat :: rest mkString NAME_JOIN_STRING
-            val bytes = super.tryClass(clazz)
-            if (bytes.nonEmpty) bytes
-            else super.tryClass(clazz + MODULE_SUFFIX_STRING)
-          case _          => super.tryClass(path)
-        }
-      }
-      else {
-        // Look for Foo first, then Foo$, but if Foo$ is given explicitly,
-        // we have to drop the $ to find object Foo, then tack it back onto
-        // the end of the flattened name.
-        def className  = intp flatName path
-        def moduleName = (intp flatName path.stripSuffix(MODULE_SUFFIX_STRING)) + MODULE_SUFFIX_STRING
-
-        val bytes = super.tryClass(className)
-        if (bytes.nonEmpty) bytes
-        else super.tryClass(moduleName)
-      }
-    }
-  }
-  // private lazy val javap = substituteAndLog[Javap]("javap", NoJavap)(newJavap())
-  private lazy val javap =
-    try newJavap()
-    catch { case _: Exception => null }
-
-  // Still todo: modules.
-  private def typeCommand(line0: String): Result = {
-    line0.trim match {
-      case ""                      => ":type [-v] <expression>"
-      case s if s startsWith "-v " => typeCommandInternal(s stripPrefix "-v " trim, true)
-      case s                       => typeCommandInternal(s, false)
-    }
-  }
-
-  private def warningsCommand(): Result = {
-    if (intp.lastWarnings.isEmpty)
-      "Can't find any cached warnings."
-    else
-      intp.lastWarnings foreach { case (pos, msg) => intp.reporter.warning(pos, msg) }
-  }
-
-  private def javapCommand(line: String): Result = {
-    if (javap == null)
-      ":javap unavailable, no tools.jar at %s.  Set JDK_HOME.".format(jdkHome)
-    else if (javaVersion startsWith "1.7")
-      ":javap not yet working with java 1.7"
-    else if (line == "")
-      ":javap [-lcsvp] [path1 path2 ...]"
-    else
-      javap(words(line)) foreach { res =>
-        if (res.isError) return "Failed: " + res.value
-        else res.show()
-      }
-  }
-
-  private def wrapCommand(line: String): Result = {
-    def failMsg = "Argument to :wrap must be the name of a method with signature [T](=> T): T"
-    onIntp { intp =>
-      import intp._
-      import global._
-
-      words(line) match {
-        case Nil            =>
-          intp.executionWrapper match {
-            case ""   => "No execution wrapper is set."
-            case s    => "Current execution wrapper: " + s
-          }
-        case "clear" :: Nil =>
-          intp.executionWrapper match {
-            case ""   => "No execution wrapper is set."
-            case s    => intp.clearExecutionWrapper() ; "Cleared execution wrapper."
-          }
-        case wrapper :: Nil =>
-          intp.typeOfExpression(wrapper) match {
-            case PolyType(List(targ), MethodType(List(arg), restpe)) =>
-              intp setExecutionWrapper intp.pathToTerm(wrapper)
-              "Set wrapper to '" + wrapper + "'"
-            case tp =>
-              failMsg + "\nFound: <unknown>"
-          }
-        case _ => failMsg
-      }
-    }
-  }
-
-  private def pathToPhaseWrapper = intp.pathToTerm("$r") + ".phased.atCurrent"
-  // private def phaseCommand(name: String): Result = {
-  //   val phased: Phased = power.phased
-  //   import phased.NoPhaseName
-
-  //   if (name == "clear") {
-  //     phased.set(NoPhaseName)
-  //     intp.clearExecutionWrapper()
-  //     "Cleared active phase."
-  //   }
-  //   else if (name == "") phased.get match {
-  //     case NoPhaseName => "Usage: :phase <expr> (e.g. typer, erasure.next, erasure+3)"
-  //     case ph          => "Active phase is '%s'.  (To clear, :phase clear)".format(phased.get)
-  //   }
-  //   else {
-  //     val what = phased.parse(name)
-  //     if (what.isEmpty || !phased.set(what))
-  //       "'" + name + "' does not appear to represent a valid phase."
-  //     else {
-  //       intp.setExecutionWrapper(pathToPhaseWrapper)
-  //       val activeMessage =
-  //         if (what.toString.length == name.length) "" + what
-  //         else "%s (%s)".format(what, name)
-
-  //       "Active phase is now: " + activeMessage
-  //     }
-  //   }
-  // }
-
-  /** Available commands */
-  def commands: List[LoopCommand] = standardCommands /*++ (
-    if (isReplPower) powerCommands else Nil
-  )*/
-
-  val replayQuestionMessage =
-    """|That entry seems to have slain the compiler.  Shall I replay
-       |your session? I can re-run each line except the last one.
-       |[y/n]
-    """.trim.stripMargin
-
-  private val crashRecovery: PartialFunction[Throwable, Boolean] = {
-    case ex: Throwable =>
-      echo(intp.global.throwableAsString(ex))
-
-      ex match {
-        case _: NoSuchMethodError | _: NoClassDefFoundError =>
-          echo("\nUnrecoverable error.")
-          throw ex
-        case _  =>
-          def fn(): Boolean =
-            try in.readYesOrNo(replayQuestionMessage, { echo("\nYou must enter y or n.") ; fn() })
-            catch { case _: RuntimeException => false }
-
-          if (fn()) replay()
-          else echo("\nAbandoning crashed session.")
-      }
-      true
-  }
-
-  /** The main read-eval-print loop for the repl.  It calls
-   *  command() for each line of input, and stops when
-   *  command() returns false.
-   */
-  def loop() {
-    def readOneLine() = {
-      out.flush()
-      in readLine prompt
-    }
-    // return false if repl should exit
-    def processLine(line: String): Boolean = {
-      if (isAsync) {
-        if (!awaitInitialized()) return false
-        runThunks()
-      }
-      if (line eq null) false               // assume null means EOF
-      else command(line) match {
-        case Result(false, _)           => false
-        case Result(_, Some(finalLine)) => addReplay(finalLine) ; true
-        case _                          => true
-      }
-    }
-    def innerLoop() {
-      if ( try processLine(readOneLine()) catch crashRecovery )
-        innerLoop()
-    }
-    innerLoop()
-  }
-
-  /** interpret all lines from a specified file */
-  def interpretAllFrom(file: File) {
-    savingReader {
-      savingReplayStack {
-        file applyReader { reader =>
-          in = SimpleReader(reader, out, false)
-          echo("Loading " + file + "...")
-          loop()
-        }
-      }
-    }
-  }
-
-  /** create a new interpreter and replay the given commands */
-  def replay() {
-    reset()
-    if (replayCommandStack.isEmpty)
-      echo("Nothing to replay.")
-    else for (cmd <- replayCommands) {
-      echo("Replaying: " + cmd)  // flush because maybe cmd will have its own output
-      command(cmd)
-      echo("")
-    }
-  }
-  def resetCommand() {
-    echo("Resetting repl state.")
-    if (replayCommandStack.nonEmpty) {
-      echo("Forgetting this session history:\n")
-      replayCommands foreach echo
-      echo("")
-      replayCommandStack = Nil
-    }
-    if (intp.namedDefinedTerms.nonEmpty)
-      echo("Forgetting all expression results and named terms: " + intp.namedDefinedTerms.mkString(", "))
-    if (intp.definedTypes.nonEmpty)
-      echo("Forgetting defined types: " + intp.definedTypes.mkString(", "))
-
-    reset()
-  }
-
-  def reset() {
-    intp.reset()
-    // unleashAndSetPhase()
-  }
-
-  /** fork a shell and run a command */
-  lazy val shCommand = new LoopCommand("sh", "run a shell command (result is implicitly => List[String])") {
-    override def usage = "<command line>"
-    def apply(line: String): Result = line match {
-      case ""   => showUsage()
-      case _    =>
-        val toRun = classOf[ProcessResult].getName + "(" + string2codeQuoted(line) + ")"
-        intp interpret toRun
-        ()
-    }
-  }
-
-  def withFile(filename: String)(action: File => Unit) {
-    val f = File(filename)
-
-    if (f.exists) action(f)
-    else echo("That file does not exist")
-  }
-
-  def loadCommand(arg: String) = {
-    var shouldReplay: Option[String] = None
-    withFile(arg)(f => {
-      interpretAllFrom(f)
-      shouldReplay = Some(":load " + arg)
-    })
-    Result(true, shouldReplay)
-  }
-
-  def addAllClasspath(args: Seq[String]): Unit = {
-    var added = false
-    var totalClasspath = ""
-    for (arg <- args) {
-      val f = File(arg).normalize
-      if (f.exists) {
-        added = true
-        addedClasspath = ClassPath.join(addedClasspath, f.path)
-        totalClasspath = ClassPath.join(settings.classpath.value, addedClasspath)
-      }
-    }
-    if (added) replay()
-  }
-
-  def addClasspath(arg: String): Unit = {
-    val f = File(arg).normalize
-    if (f.exists) {
-      addedClasspath = ClassPath.join(addedClasspath, f.path)
-      val totalClasspath = ClassPath.join(settings.classpath.value, addedClasspath)
-      echo("Added '%s'.  Your new classpath is:\n\"%s\"".format(f.path, totalClasspath))
-      replay()
-    }
-    else echo("The path '" + f + "' doesn't seem to exist.")
-  }
-
-  def powerCmd(): Result = {
-    if (isReplPower) "Already in power mode."
-    else enablePowerMode(false)
-  }
-
-  def enablePowerMode(isDuringInit: Boolean) = {
-    // replProps.power setValue true
-    // unleashAndSetPhase()
-    // asyncEcho(isDuringInit, power.banner)
-  }
-  // private def unleashAndSetPhase() {
-//     if (isReplPower) {
-// //      power.unleash()
-//       // Set the phase to "typer"
-//       intp beSilentDuring phaseCommand("typer")
-//     }
-//   }
-
-  def asyncEcho(async: Boolean, msg: => String) {
-    if (async) asyncMessage(msg)
-    else echo(msg)
-  }
-
-  def verbosity() = {
-    // val old = intp.printResults
-    // intp.printResults = !old
-    // echo("Switched " + (if (old) "off" else "on") + " result printing.")
-  }
-
-  /** Run one command submitted by the user.  Two values are returned:
-    * (1) whether to keep running, (2) the line to record for replay,
-    * if any. */
-  def command(line: String): Result = {
-    if (line startsWith ":") {
-      val cmd = line.tail takeWhile (x => !x.isWhitespace)
-      uniqueCommand(cmd) match {
-        case Some(lc) => lc(line.tail stripPrefix cmd dropWhile (_.isWhitespace))
-        case _        => ambiguousError(cmd)
-      }
-    }
-    else if (intp.global == null) Result(false, None)  // Notice failure to create compiler
-    else Result(true, interpretStartingWith(line))
-  }
-
-  private def readWhile(cond: String => Boolean) = {
-    Iterator continually in.readLine("") takeWhile (x => x != null && cond(x))
-  }
-
-  def pasteCommand(): Result = {
-    echo("// Entering paste mode (ctrl-D to finish)\n")
-    val code = readWhile(_ => true) mkString "\n"
-    echo("\n// Exiting paste mode, now interpreting.\n")
-    intp interpret code
-    ()
-  }
-
-  private object paste extends Pasted {
-    val ContinueString = "     | "
-    val PromptString   = "scala> "
-
-    def interpret(line: String): Unit = {
-      echo(line.trim)
-      intp interpret line
-      echo("")
-    }
-
-    def transcript(start: String) = {
-      echo("\n// Detected repl transcript paste: ctrl-D to finish.\n")
-      apply(Iterator(start) ++ readWhile(_.trim != PromptString.trim))
-    }
-  }
-  import paste.{ ContinueString, PromptString }
-
-  /** Interpret expressions starting with the first line.
-    * Read lines until a complete compilation unit is available
-    * or until a syntax error has been seen.  If a full unit is
-    * read, go ahead and interpret it.  Return the full string
-    * to be recorded for replay, if any.
-    */
-  def interpretStartingWith(code: String): Option[String] = {
-    // signal completion non-completion input has been received
-    in.completion.resetVerbosity()
-
-    def reallyInterpret = {
-      val reallyResult = intp.interpret(code)
-      (reallyResult, reallyResult match {
-        case IR.Error       => None
-        case IR.Success     => Some(code)
-        case IR.Incomplete  =>
-          if (in.interactive && code.endsWith("\n\n")) {
-            echo("You typed two blank lines.  Starting a new command.")
-            None
-          }
-          else in.readLine(ContinueString) match {
-            case null =>
-              // we know compilation is going to fail since we're at EOF and the
-              // parser thinks the input is still incomplete, but since this is
-              // a file being read non-interactively we want to fail.  So we send
-              // it straight to the compiler for the nice error message.
-              intp.compileString(code)
-              None
-
-            case line => interpretStartingWith(code + "\n" + line)
-          }
-      })
-    }
-
-    /** Here we place ourselves between the user and the interpreter and examine
-     *  the input they are ostensibly submitting.  We intervene in several cases:
-     *
-     *  1) If the line starts with "scala> " it is assumed to be an interpreter paste.
-     *  2) If the line starts with "." (but not ".." or "./") it is treated as an invocation
-     *     on the previous result.
-     *  3) If the Completion object's execute returns Some(_), we inject that value
-     *     and avoid the interpreter, as it's likely not valid scala code.
-     */
-    if (code == "") None
-    else if (!paste.running && code.trim.startsWith(PromptString)) {
-      paste.transcript(code)
-      None
-    }
-    else if (Completion.looksLikeInvocation(code) && intp.mostRecentVar != "") {
-      interpretStartingWith(intp.mostRecentVar + code)
-    }
-    else if (code.trim startsWith "//") {
-      // line comment, do nothing
-      None
-    }
-    else
-      reallyInterpret._2
-  }
-
-  // runs :load `file` on any files passed via -i
-  def loadFiles(settings: Settings) = settings match {
-    case settings: SparkRunnerSettings =>
-      for (filename <- settings.loadfiles.value) {
-        val cmd = ":load " + filename
-        command(cmd)
-        addReplay(cmd)
-        echo("")
-      }
-    case _ =>
-  }
-
-  /** Tries to create a JLineReader, falling back to SimpleReader:
-   *  unless settings or properties are such that it should start
-   *  with SimpleReader.
-   */
-  def chooseReader(settings: Settings): InteractiveReader = {
-    if (settings.Xnojline.value || Properties.isEmacsShell)
-      SimpleReader()
-    else try new SparkJLineReader(
-      if (settings.noCompletion.value) NoCompletion
-      else new SparkJLineCompletion(intp)
-    )
-    catch {
-      case ex @ (_: Exception | _: NoClassDefFoundError) =>
-        echo("Failed to created SparkJLineReader: " + ex + "\nFalling back to SimpleReader.")
-        SimpleReader()
-    }
-  }
-
-  val u: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
-  val m = u.runtimeMirror(Utils.getSparkClassLoader)
-  private def tagOfStaticClass[T: ClassTag]: u.TypeTag[T] =
-    u.TypeTag[T](
-      m,
-      new TypeCreator {
-        def apply[U <: ApiUniverse with Singleton](m: Mirror[U]): U # Type =
-          m.staticClass(classTag[T].runtimeClass.getName).toTypeConstructor.asInstanceOf[U # Type]
-      })
-
-  def process(settings: Settings): Boolean = savingContextLoader {
-    if (getMaster() == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true")
-
-    this.settings = settings
-    createInterpreter()
-
-    // sets in to some kind of reader depending on environmental cues
-    in = in0 match {
-      case Some(reader) => SimpleReader(reader, out, true)
-      case None         =>
-        // some post-initialization
-        chooseReader(settings) match {
-          case x: SparkJLineReader => addThunk(x.consoleReader.postInit) ; x
-          case x                   => x
-        }
-    }
-    lazy val tagOfSparkIMain = tagOfStaticClass[org.apache.spark.repl.SparkIMain]
-    // Bind intp somewhere out of the regular namespace where
-    // we can get at it in generated code.
-    addThunk(intp.quietBind(NamedParam[SparkIMain]("$intp", intp)(tagOfSparkIMain, classTag[SparkIMain])))
-    addThunk({
-      import scala.tools.nsc.io._
-      import Properties.userHome
-      import scala.compat.Platform.EOL
-      val autorun = replProps.replAutorunCode.option flatMap (f => io.File(f).safeSlurp())
-      if (autorun.isDefined) intp.quietRun(autorun.get)
-    })
-
-    addThunk(printWelcome())
-    addThunk(initializeSpark())
-
-    // it is broken on startup; go ahead and exit
-    if (intp.reporter.hasErrors)
-      return false
-
-    // This is about the illusion of snappiness.  We call initialize()
-    // which spins off a separate thread, then print the prompt and try
-    // our best to look ready.  The interlocking lazy vals tend to
-    // inter-deadlock, so we break the cycle with a single asynchronous
-    // message to an actor.
-    if (isAsync) {
-      intp initialize initializedCallback()
-      createAsyncListener() // listens for signal to run postInitialization
-    }
-    else {
-      intp.initializeSynchronous()
-      postInitialization()
-    }
-    // printWelcome()
-
-    loadFiles(settings)
-
-    try loop()
-    catch AbstractOrMissingHandler()
-    finally closeInterpreter()
-
-    true
-  }
-
-  def createSparkContext(): SparkContext = {
-    val execUri = System.getenv("SPARK_EXECUTOR_URI")
-    val jars = SparkILoop.getAddedJars
-    val conf = new SparkConf()
-      .setMaster(getMaster())
-      .setAppName("Spark shell")
-      .setJars(jars)
-      .set("spark.repl.class.uri", intp.classServer.uri)
-    if (execUri != null) {
-      conf.set("spark.executor.uri", execUri)
-    }
-    if (System.getenv("SPARK_HOME") != null) {
-      conf.setSparkHome(System.getenv("SPARK_HOME"))
-    }
-    sparkContext = new SparkContext(conf)
-    logInfo("Created spark context..")
-    sparkContext
-  }
-
-  private def getMaster(): String = {
-    val master = this.master match {
-      case Some(m) => m
-      case None =>
-        val envMaster = sys.env.get("MASTER")
-        val propMaster = sys.props.get("spark.master")
-        propMaster.orElse(envMaster).getOrElse("local[*]")
-    }
-    master
-  }
-
-  /** process command-line arguments and do as they request */
-  def process(args: Array[String]): Boolean = {
-    val command = new SparkCommandLine(args.toList, msg => echo(msg))
-    def neededHelp(): String =
-      (if (command.settings.help.value) command.usageMsg + "\n" else "") +
-      (if (command.settings.Xhelp.value) command.xusageMsg + "\n" else "")
-
-    // if they asked for no help and command is valid, we call the real main
-    neededHelp() match {
-      case ""     => command.ok && process(command.settings)
-      case help   => echoNoNL(help) ; true
-    }
-  }
-
-  @deprecated("Use `process` instead", "2.9.0")
-  def main(settings: Settings): Unit = process(settings)
-}
-
-object SparkILoop {
-  implicit def loopToInterpreter(repl: SparkILoop): SparkIMain = repl.intp
-  private def echo(msg: String) = Console println msg
-
-  def getAddedJars: Array[String] = {
-    val envJars = sys.env.get("ADD_JARS")
-    val propJars = sys.props.get("spark.jars").flatMap { p =>
-      if (p == "") None else Some(p)
-    }
-    val jars = propJars.orElse(envJars).getOrElse("")
-    Utils.resolveURIs(jars).split(",").filter(_.nonEmpty)
-  }
-
-  // Designed primarily for use by test code: take a String with a
-  // bunch of code, and prints out a transcript of what it would look
-  // like if you'd just typed it into the repl.
-  def runForTranscript(code: String, settings: Settings): String = {
-    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
-
-    stringFromStream { ostream =>
-      Console.withOut(ostream) {
-        val output = new JPrintWriter(new OutputStreamWriter(ostream), true) {
-          override def write(str: String) = {
-            // completely skip continuation lines
-            if (str forall (ch => ch.isWhitespace || ch == '|')) ()
-            // print a newline on empty scala prompts
-            else if ((str contains '\n') && (str.trim == "scala> ")) super.write("\n")
-            else super.write(str)
-          }
-        }
-        val input = new BufferedReader(new StringReader(code)) {
-          override def readLine(): String = {
-            val s = super.readLine()
-            // helping out by printing the line being interpreted.
-            if (s != null)
-              output.println(s)
-            s
-          }
-        }
-        val repl = new SparkILoop(input, output)
-
-        if (settings.classpath.isDefault)
-          settings.classpath.value = sys.props("java.class.path")
-
-        getAddedJars.foreach(settings.classpath.append(_))
-
-        repl process settings
-      }
-    }
-  }
-
-  /** Creates an interpreter loop with default settings and feeds
-   *  the given code to it as input.
-   */
-  def run(code: String, sets: Settings = new Settings): String = {
-    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
-
-    stringFromStream { ostream =>
-      Console.withOut(ostream) {
-        val input    = new BufferedReader(new StringReader(code))
-        val output   = new JPrintWriter(new OutputStreamWriter(ostream), true)
-        val repl     = new ILoop(input, output)
-
-        if (sets.classpath.isDefault)
-          sets.classpath.value = sys.props("java.class.path")
-
-        repl process sets
-      }
-    }
-  }
-  def run(lines: List[String]): String = run(lines map (_ + "\n") mkString)
-}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
deleted file mode 100644
index 3842c291d0b7b..0000000000000
--- a/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
+++ /dev/null
@@ -1,1376 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author  Martin Odersky
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import Predef.{ println => _, _ }
-import util.stringFromWriter
-import scala.reflect.internal.util._
-import java.net.URL
-import scala.sys.BooleanProp
-import io.{AbstractFile, PlainFile, VirtualDirectory}
-
-import reporters._
-import symtab.Flags
-import scala.reflect.internal.Names
-import scala.tools.util.PathResolver
-import scala.tools.nsc.util.ScalaClassLoader
-import ScalaClassLoader.URLClassLoader
-import scala.tools.nsc.util.Exceptional.unwrap
-import scala.collection.{ mutable, immutable }
-import scala.util.control.Exception.{ ultimately }
-import SparkIMain._
-import java.util.concurrent.Future
-import typechecker.Analyzer
-import scala.language.implicitConversions
-import scala.reflect.runtime.{ universe => ru }
-import scala.reflect.{ ClassTag, classTag }
-import scala.tools.reflect.StdRuntimeTags._
-import scala.util.control.ControlThrowable
-import util.stackTraceString
-
-import org.apache.spark.{Logging, HttpServer, SecurityManager, SparkConf}
-import org.apache.spark.util.Utils
-
-// /** directory to save .class files to */
-// private class ReplVirtualDirectory(out: JPrintWriter) extends VirtualDirectory("((memory))", None) {
-//   private def pp(root: AbstractFile, indentLevel: Int) {
-//     val spaces = "    " * indentLevel
-//     out.println(spaces + root.name)
-//     if (root.isDirectory)
-//       root.toList sortBy (_.name) foreach (x => pp(x, indentLevel + 1))
-//   }
-//   // print the contents hierarchically
-//   def show() = pp(this, 0)
-// }
-
-  /** An interpreter for Scala code.
-   *
-   *  The main public entry points are compile(), interpret(), and bind().
-   *  The compile() method loads a complete Scala file.  The interpret() method
-   *  executes one line of Scala code at the request of the user.  The bind()
-   *  method binds an object to a variable that can then be used by later
-   *  interpreted code.
-   *
-   *  The overall approach is based on compiling the requested code and then
-   *  using a Java classloader and Java reflection to run the code
-   *  and access its results.
-   *
-   *  In more detail, a single compiler instance is used
-   *  to accumulate all successfully compiled or interpreted Scala code.  To
-   *  "interpret" a line of code, the compiler generates a fresh object that
-   *  includes the line of code and which has public member(s) to export
-   *  all variables defined by that code.  To extract the result of an
-   *  interpreted line to show the user, a second "result object" is created
-   *  which imports the variables exported by the above object and then
-   *  exports members called "$eval" and "$print". To accomodate user expressions
-   *  that read from variables or methods defined in previous statements, "import"
-   *  statements are used.
-   *
-   *  This interpreter shares the strengths and weaknesses of using the
-   *  full compiler-to-Java.  The main strength is that interpreted code
-   *  behaves exactly as does compiled code, including running at full speed.
-   *  The main weakness is that redefining classes and methods is not handled
-   *  properly, because rebinding at the Java level is technically difficult.
-   *
-   *  @author Moez A. Abdel-Gawad
-   *  @author Lex Spoon
-   */
-  class SparkIMain(initialSettings: Settings, val out: JPrintWriter)
-      extends SparkImports with Logging {
-    imain =>
-
-    val conf = new SparkConf()
-
-    val SPARK_DEBUG_REPL: Boolean = (System.getenv("SPARK_DEBUG_REPL") == "1")
-    /** Local directory to save .class files too */
-    lazy val outputDir = {
-      val tmp = System.getProperty("java.io.tmpdir")
-      val rootDir = conf.get("spark.repl.classdir",  tmp)
-      Utils.createTempDir(rootDir)
-    }
-    if (SPARK_DEBUG_REPL) {
-      echo("Output directory: " + outputDir)
-    }
-
-    val virtualDirectory                              = new PlainFile(outputDir) // "directory" for classfiles
-    /** Jetty server that will serve our classes to worker nodes */
-    val classServer                                   = new HttpServer(outputDir, new SecurityManager(conf))
-    private var currentSettings: Settings             = initialSettings
-    var printResults                                  = true      // whether to print result lines
-    var totalSilence                                  = false     // whether to print anything
-    private var _initializeComplete                   = false     // compiler is initialized
-    private var _isInitialized: Future[Boolean]       = null      // set up initialization future
-    private var bindExceptions                        = true      // whether to bind the lastException variable
-    private var _executionWrapper                     = ""        // code to be wrapped around all lines
-
-
-    // Start the classServer and store its URI in a spark system property
-    // (which will be passed to executors so that they can connect to it)
-    classServer.start()
-    if (SPARK_DEBUG_REPL) {
-      echo("Class server started, URI = " + classServer.uri)
-    }
-
-    /** We're going to go to some trouble to initialize the compiler asynchronously.
-     *  It's critical that nothing call into it until it's been initialized or we will
-     *  run into unrecoverable issues, but the perceived repl startup time goes
-     *  through the roof if we wait for it.  So we initialize it with a future and
-     *  use a lazy val to ensure that any attempt to use the compiler object waits
-     *  on the future.
-     */
-    private var _classLoader: AbstractFileClassLoader = null                              // active classloader
-    private val _compiler: Global                     = newCompiler(settings, reporter)   // our private compiler
-
-    private val nextReqId = {
-      var counter = 0
-      () => { counter += 1 ; counter }
-    }
-
-    def compilerClasspath: Seq[URL] = (
-      if (isInitializeComplete) global.classPath.asURLs
-      else new PathResolver(settings).result.asURLs  // the compiler's classpath
-      )
-    def settings = currentSettings
-    def mostRecentLine = prevRequestList match {
-      case Nil      => ""
-      case req :: _ => req.originalLine
-    }
-    // Run the code body with the given boolean settings flipped to true.
-    def withoutWarnings[T](body: => T): T = beQuietDuring {
-      val saved = settings.nowarn.value
-      if (!saved)
-        settings.nowarn.value = true
-
-      try body
-      finally if (!saved) settings.nowarn.value = false
-    }
-
-    /** construct an interpreter that reports to Console */
-    def this(settings: Settings) = this(settings, new NewLinePrintWriter(new ConsoleWriter, true))
-    def this() = this(new Settings())
-
-    lazy val repllog: Logger = new Logger {
-      val out: JPrintWriter = imain.out
-      val isInfo: Boolean  = BooleanProp keyExists "scala.repl.info"
-      val isDebug: Boolean = BooleanProp keyExists "scala.repl.debug"
-      val isTrace: Boolean = BooleanProp keyExists "scala.repl.trace"
-    }
-    lazy val formatting: Formatting = new Formatting {
-      val prompt = Properties.shellPromptString
-    }
-    lazy val reporter: ConsoleReporter = new SparkIMain.ReplReporter(this)
-
-    import formatting._
-    import reporter.{ printMessage, withoutTruncating }
-
-    // This exists mostly because using the reporter too early leads to deadlock.
-    private def echo(msg: String) { Console println msg }
-    private def _initSources = List(new BatchSourceFile("<init>", "class $repl_$init { }"))
-    private def _initialize() = {
-      try {
-        // todo. if this crashes, REPL will hang
-        new _compiler.Run() compileSources _initSources
-        _initializeComplete = true
-        true
-      }
-      catch AbstractOrMissingHandler()
-    }
-    private def tquoted(s: String) = "\"\"\"" + s + "\"\"\""
-
-    // argument is a thunk to execute after init is done
-    def initialize(postInitSignal: => Unit) {
-      synchronized {
-        if (_isInitialized == null) {
-          _isInitialized = io.spawn {
-            try _initialize()
-            finally postInitSignal
-          }
-        }
-      }
-    }
-    def initializeSynchronous(): Unit = {
-      if (!isInitializeComplete) {
-        _initialize()
-        assert(global != null, global)
-      }
-    }
-    def isInitializeComplete = _initializeComplete
-
-    /** the public, go through the future compiler */
-    lazy val global: Global = {
-      if (isInitializeComplete) _compiler
-      else {
-        // If init hasn't been called yet you're on your own.
-        if (_isInitialized == null) {
-          logWarning("Warning: compiler accessed before init set up.  Assuming no postInit code.")
-          initialize(())
-        }
-        //       // blocks until it is ; false means catastrophic failure
-        if (_isInitialized.get()) _compiler
-        else null
-      }
-    }
-    @deprecated("Use `global` for access to the compiler instance.", "2.9.0")
-    lazy val compiler: global.type = global
-
-    import global._
-    import definitions.{ScalaPackage, JavaLangPackage, termMember, typeMember}
-    import rootMirror.{RootClass, getClassIfDefined, getModuleIfDefined, getRequiredModule, getRequiredClass}
-
-    implicit class ReplTypeOps(tp: Type) {
-      def orElse(other: => Type): Type    = if (tp ne NoType) tp else other
-      def andAlso(fn: Type => Type): Type = if (tp eq NoType) tp else fn(tp)
-    }
-
-    // TODO: If we try to make naming a lazy val, we run into big time
-    // scalac unhappiness with what look like cycles.  It has not been easy to
-    // reduce, but name resolution clearly takes different paths.
-    object naming extends {
-      val global: imain.global.type = imain.global
-    } with Naming {
-      // make sure we don't overwrite their unwisely named res3 etc.
-      def freshUserTermName(): TermName = {
-        val name = newTermName(freshUserVarName())
-        if (definedNameMap contains name) freshUserTermName()
-        else name
-      }
-      def isUserTermName(name: Name) = isUserVarName("" + name)
-      def isInternalTermName(name: Name) = isInternalVarName("" + name)
-    }
-    import naming._
-
-    object deconstruct extends {
-      val global: imain.global.type = imain.global
-    } with StructuredTypeStrings
-
-    lazy val memberHandlers = new {
-      val intp: imain.type = imain
-    } with SparkMemberHandlers
-    import memberHandlers._
-
-    /** Temporarily be quiet */
-    def beQuietDuring[T](body: => T): T = {
-      val saved = printResults
-      printResults = false
-      try body
-      finally printResults = saved
-    }
-    def beSilentDuring[T](operation: => T): T = {
-      val saved = totalSilence
-      totalSilence = true
-      try operation
-      finally totalSilence = saved
-    }
-
-    def quietRun[T](code: String) = beQuietDuring(interpret(code))
-
-
-     private def logAndDiscard[T](label: String, alt: => T): PartialFunction[Throwable, T] = {
-      case t: ControlThrowable => throw t
-      case t: Throwable        =>
-        logDebug(label + ": " + unwrap(t))
-        logDebug(stackTraceString(unwrap(t)))
-      alt
-    }
-    /** takes AnyRef because it may be binding a Throwable or an Exceptional */
-
-    private def withLastExceptionLock[T](body: => T, alt: => T): T = {
-      assert(bindExceptions, "withLastExceptionLock called incorrectly.")
-      bindExceptions = false
-
-      try     beQuietDuring(body)
-      catch   logAndDiscard("withLastExceptionLock", alt)
-      finally bindExceptions = true
-    }
-
-    def executionWrapper = _executionWrapper
-    def setExecutionWrapper(code: String) = _executionWrapper = code
-    def clearExecutionWrapper() = _executionWrapper = ""
-
-    /** interpreter settings */
-    lazy val isettings = new SparkISettings(this)
-
-    /** Instantiate a compiler.  Overridable. */
-    protected def newCompiler(settings: Settings, reporter: Reporter): ReplGlobal = {
-      settings.outputDirs setSingleOutput virtualDirectory
-      settings.exposeEmptyPackage.value = true
-      new Global(settings, reporter) with ReplGlobal {
-        override def toString: String = "<global>"
-      }
-    }
-
-    /** Parent classloader.  Overridable. */
-    protected def parentClassLoader: ClassLoader =
-      SparkHelper.explicitParentLoader(settings).getOrElse( this.getClass.getClassLoader() )
-
-    /* A single class loader is used for all commands interpreted by this Interpreter.
-     It would also be possible to create a new class loader for each command
-     to interpret.  The advantages of the current approach are:
-
-    - Expressions are only evaluated one time.  This is especially
-    significant for I/O, e.g. "val x = Console.readLine"
-
-    The main disadvantage is:
-
-    - Objects, classes, and methods cannot be rebound.  Instead, definitions
-    shadow the old ones, and old code objects refer to the old
-    definitions.
-    */
-    def resetClassLoader() = {
-      logDebug("Setting new classloader: was " + _classLoader)
-      _classLoader = null
-      ensureClassLoader()
-    }
-    final def ensureClassLoader() {
-      if (_classLoader == null)
-        _classLoader = makeClassLoader()
-    }
-    def classLoader: AbstractFileClassLoader = {
-      ensureClassLoader()
-      _classLoader
-    }
-    private class TranslatingClassLoader(parent: ClassLoader) extends AbstractFileClassLoader(virtualDirectory, parent) {
-      /** Overridden here to try translating a simple name to the generated
-       *  class name if the original attempt fails.  This method is used by
-       *  getResourceAsStream as well as findClass.
-       */
-      override protected def findAbstractFile(name: String): AbstractFile = {
-        super.findAbstractFile(name) match {
-          // deadlocks on startup if we try to translate names too early
-          case null if isInitializeComplete =>
-            generatedName(name) map (x => super.findAbstractFile(x)) orNull
-          case file                         =>
-            file
-        }
-      }
-    }
-    private def makeClassLoader(): AbstractFileClassLoader =
-      new TranslatingClassLoader(parentClassLoader match {
-        case null   => ScalaClassLoader fromURLs compilerClasspath
-        case p      => new URLClassLoader(compilerClasspath, p)
-      })
-
-    def getInterpreterClassLoader() = classLoader
-
-    // Set the current Java "context" class loader to this interpreter's class loader
-    def setContextClassLoader() = classLoader.setAsContext()
-
-    /** Given a simple repl-defined name, returns the real name of
-     *  the class representing it, e.g. for "Bippy" it may return
-     *  {{{
-     *    $line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$Bippy
-     *  }}}
-     */
-    def generatedName(simpleName: String): Option[String] = {
-      if (simpleName endsWith nme.MODULE_SUFFIX_STRING) optFlatName(simpleName.init) map (_ + nme.MODULE_SUFFIX_STRING)
-      else optFlatName(simpleName)
-    }
-    def flatName(id: String)    = optFlatName(id) getOrElse id
-    def optFlatName(id: String) = requestForIdent(id) map (_ fullFlatName id)
-
-    def allDefinedNames = definedNameMap.keys.toList.sorted
-    def pathToType(id: String): String = pathToName(newTypeName(id))
-    def pathToTerm(id: String): String = pathToName(newTermName(id))
-    def pathToName(name: Name): String = {
-      if (definedNameMap contains name)
-        definedNameMap(name) fullPath name
-      else name.toString
-    }
-
-    /** Most recent tree handled which wasn't wholly synthetic. */
-    private def mostRecentlyHandledTree: Option[Tree] = {
-      prevRequests.reverse foreach { req =>
-        req.handlers.reverse foreach {
-          case x: MemberDefHandler if x.definesValue && !isInternalTermName(x.name) => return Some(x.member)
-          case _ => ()
-        }
-      }
-      None
-    }
-
-    /** Stubs for work in progress. */
-    def handleTypeRedefinition(name: TypeName, old: Request, req: Request) = {
-      for (t1 <- old.simpleNameOfType(name) ; t2 <- req.simpleNameOfType(name)) {
-        logDebug("Redefining type '%s'\n  %s -> %s".format(name, t1, t2))
-      }
-    }
-
-    def handleTermRedefinition(name: TermName, old: Request, req: Request) = {
-      for (t1 <- old.compilerTypeOf get name ; t2 <- req.compilerTypeOf get name) {
-    //    Printing the types here has a tendency to cause assertion errors, like
-        //   assertion failed: fatal: <refinement> has owner value x, but a class owner is required
-        // so DBG is by-name now to keep it in the family.  (It also traps the assertion error,
-        // but we don't want to unnecessarily risk hosing the compiler's internal state.)
-        logDebug("Redefining term '%s'\n  %s -> %s".format(name, t1, t2))
-      }
-    }
-
-    def recordRequest(req: Request) {
-      if (req == null || referencedNameMap == null)
-        return
-
-      prevRequests += req
-      req.referencedNames foreach (x => referencedNameMap(x) = req)
-
-      // warning about serially defining companions.  It'd be easy
-      // enough to just redefine them together but that may not always
-      // be what people want so I'm waiting until I can do it better.
-      for {
-        name   <- req.definedNames filterNot (x => req.definedNames contains x.companionName)
-        oldReq <- definedNameMap get name.companionName
-        newSym <- req.definedSymbols get name
-        oldSym <- oldReq.definedSymbols get name.companionName
-        if Seq(oldSym, newSym).permutations exists { case Seq(s1, s2) => s1.isClass && s2.isModule }
-      } {
-        afterTyper(replwarn(s"warning: previously defined $oldSym is not a companion to $newSym."))
-        replwarn("Companions must be defined together; you may wish to use :paste mode for this.")
-      }
-
-      // Updating the defined name map
-      req.definedNames foreach { name =>
-        if (definedNameMap contains name) {
-          if (name.isTypeName) handleTypeRedefinition(name.toTypeName, definedNameMap(name), req)
-          else handleTermRedefinition(name.toTermName, definedNameMap(name), req)
-        }
-         definedNameMap(name) = req
-      }
-    }
-
-    def replwarn(msg: => String) {
-      if (!settings.nowarnings.value)
-        printMessage(msg)
-    }
-
-    def isParseable(line: String): Boolean = {
-      beSilentDuring {
-        try parse(line) match {
-          case Some(xs) => xs.nonEmpty  // parses as-is
-          case None     => true         // incomplete
-        }
-        catch { case x: Exception =>    // crashed the compiler
-          replwarn("Exception in isParseable(\"" + line + "\"): " + x)
-           false
-         }
-      }
-    }
-
-    def compileSourcesKeepingRun(sources: SourceFile*) = {
-      val run = new Run()
-      reporter.reset()
-      run compileSources sources.toList
-      (!reporter.hasErrors, run)
-    }
-
-    /** Compile an nsc SourceFile.  Returns true if there are
-     *  no compilation errors, or false otherwise.
-     */
-    def compileSources(sources: SourceFile*): Boolean =
-      compileSourcesKeepingRun(sources: _*)._1
-
-    /** Compile a string.  Returns true if there are no
-     *  compilation errors, or false otherwise.
-     */
-    def compileString(code: String): Boolean =
-      compileSources(new BatchSourceFile("<script>", code))
-
-    /** Build a request from the user. `trees` is `line` after being parsed.
-     */
-    private def buildRequest(line: String, trees: List[Tree]): Request = {
-      executingRequest = new Request(line, trees)
-      executingRequest
-    }
-
-  // rewriting "5 // foo" to "val x = { 5 // foo }" creates broken code because
-  // the close brace is commented out.  Strip single-line comments.
-  // ... but for error message output reasons this is not used, and rather than
-  // enclosing in braces it is constructed like "val x =\n5 // foo".
-  private def removeComments(line: String): String = {
-    showCodeIfDebugging(line) // as we're about to lose our // show
-    line.lines map (s => s indexOf "//" match {
-      case -1   => s
-      case idx  => s take idx
-    }) mkString "\n"
-  }
-
-  private def safePos(t: Tree, alt: Int): Int =
-    try t.pos.startOrPoint
-  catch { case _: UnsupportedOperationException => alt }
-
-  // Given an expression like 10 * 10 * 10 we receive the parent tree positioned
-  // at a '*'.  So look at each subtree and find the earliest of all positions.
-  private def earliestPosition(tree: Tree): Int = {
-    var pos = Int.MaxValue
-    tree foreach { t =>
-      pos = math.min(pos, safePos(t, Int.MaxValue))
-    }
-    pos
-  }
-
-
-  private def requestFromLine(line: String, synthetic: Boolean): Either[IR.Result, Request] = {
-    val content = indentCode(line)
-    val trees = parse(content) match {
-      case None         => return Left(IR.Incomplete)
-      case Some(Nil)    => return Left(IR.Error) // parse error or empty input
-      case Some(trees)  => trees
-    }
-    logDebug(
-      trees map (t => {
-        // [Eugene to Paul] previously it just said `t map ...`
-        // because there was an implicit conversion from Tree to a list of Trees
-        // however Martin and I have removed the conversion
-        // (it was conflicting with the new reflection API),
-        // so I had to rewrite this a bit
-        val subs = t collect { case sub => sub }
-        subs map (t0 =>
-          "  " + safePos(t0, -1) + ": " + t0.shortClass + "\n"
-                ) mkString ""
-      }) mkString "\n"
-    )
-    // If the last tree is a bare expression, pinpoint where it begins using the
-    // AST node position and snap the line off there.  Rewrite the code embodied
-    // by the last tree as a ValDef instead, so we can access the value.
-    trees.last match {
-      case _:Assign                        => // we don't want to include assignments
-        case _:TermTree | _:Ident | _:Select => // ... but do want other unnamed terms.
-          val varName  = if (synthetic) freshInternalVarName() else freshUserVarName()
-      val rewrittenLine = (
-        // In theory this would come out the same without the 1-specific test, but
-        // it's a cushion against any more sneaky parse-tree position vs. code mismatches:
-        // this way such issues will only arise on multiple-statement repl input lines,
-        // which most people don't use.
-        if (trees.size == 1) "val " + varName + " =\n" + content
-        else {
-          // The position of the last tree
-          val lastpos0 = earliestPosition(trees.last)
-          // Oh boy, the parser throws away parens so "(2+2)" is mispositioned,
-          // with increasingly hard to decipher positions as we move on to "() => 5",
-          // (x: Int) => x + 1, and more.  So I abandon attempts to finesse and just
-          // look for semicolons and newlines, which I'm sure is also buggy.
-          val (raw1, raw2) = content splitAt lastpos0
-          logDebug("[raw] " + raw1 + "   <--->   " + raw2)
-
-          val adjustment = (raw1.reverse takeWhile (ch => (ch != ';') && (ch != '\n'))).size
-          val lastpos = lastpos0 - adjustment
-
-          // the source code split at the laboriously determined position.
-          val (l1, l2) = content splitAt lastpos
-          logDebug("[adj] " + l1 + "   <--->   " + l2)
-
-          val prefix   = if (l1.trim == "") "" else l1 + ";\n"
-          // Note to self: val source needs to have this precise structure so that
-          // error messages print the user-submitted part without the "val res0 = " part.
-          val combined   = prefix + "val " + varName + " =\n" + l2
-
-          logDebug(List(
-            "    line" -> line,
-            " content" -> content,
-            "     was" -> l2,
-            "combined" -> combined) map {
-              case (label, s) => label + ": '" + s + "'"
-            } mkString "\n"
-          )
-          combined
-        }
-      )
-        // Rewriting    "foo ; bar ; 123"
-        // to           "foo ; bar ; val resXX = 123"
-        requestFromLine(rewrittenLine, synthetic) match {
-          case Right(req) => return Right(req withOriginalLine line)
-          case x          => return x
-        }
-      case _ =>
-    }
-    Right(buildRequest(line, trees))
-  }
-
-  // normalize non-public types so we don't see protected aliases like Self
-  def normalizeNonPublic(tp: Type) = tp match {
-    case TypeRef(_, sym, _) if sym.isAliasType && !sym.isPublic => tp.dealias
-    case _                                                      => tp
-  }
-
-  /**
-   *  Interpret one line of input. All feedback, including parse errors
-   *  and evaluation results, are printed via the supplied compiler's
-   *  reporter. Values defined are available for future interpreted strings.
-   *
-   *  The return value is whether the line was interpreter successfully,
-   *  e.g. that there were no parse errors.
-   */
-  def interpret(line: String): IR.Result = interpret(line, false)
-  def interpretSynthetic(line: String): IR.Result = interpret(line, true)
-  def interpret(line: String, synthetic: Boolean): IR.Result = {
-    def loadAndRunReq(req: Request) = {
-      classLoader.setAsContext()
-      val (result, succeeded) = req.loadAndRun
-
-      /** To our displeasure, ConsoleReporter offers only printMessage,
-       *  which tacks a newline on the end.  Since that breaks all the
-       *  output checking, we have to take one off to balance.
-       */
-      if (succeeded) {
-        if (printResults && result != "")
-          printMessage(result stripSuffix "\n")
-        else if (isReplDebug) // show quiet-mode activity
-          printMessage(result.trim.lines map ("[quiet] " + _) mkString "\n")
-
-        // Book-keeping.  Have to record synthetic requests too,
-        // as they may have been issued for information, e.g. :type
-        recordRequest(req)
-        IR.Success
-      }
-        else {
-          // don't truncate stack traces
-          withoutTruncating(printMessage(result))
-          IR.Error
-        }
-    }
-
-    if (global == null) IR.Error
-    else requestFromLine(line, synthetic) match {
-      case Left(result) => result
-      case Right(req)   =>
-        // null indicates a disallowed statement type; otherwise compile and
-        // fail if false (implying e.g. a type error)
-        if (req == null || !req.compile) IR.Error
-        else loadAndRunReq(req)
-    }
-  }
-
-  /** Bind a specified name to a specified value.  The name may
-   *  later be used by expressions passed to interpret.
-   *
-   *  @param name      the variable name to bind
-   *  @param boundType the type of the variable, as a string
-   *  @param value     the object value to bind to it
-   *  @return          an indication of whether the binding succeeded
-   */
-  def bind(name: String, boundType: String, value: Any, modifiers: List[String] = Nil): IR.Result = {
-    val bindRep = new ReadEvalPrint()
-    val run = bindRep.compile("""
-                              |object %s {
-                                |  var value: %s = _
-                              |  def set(x: Any) = value = x.asInstanceOf[%s]
-                              |}
-                              """.stripMargin.format(bindRep.evalName, boundType, boundType)
-                            )
-    bindRep.callEither("set", value) match {
-      case Left(ex) =>
-        logDebug("Set failed in bind(%s, %s, %s)".format(name, boundType, value))
-        logDebug(util.stackTraceString(ex))
-        IR.Error
-
-      case Right(_) =>
-        val line = "%sval %s = %s.value".format(modifiers map (_ + " ") mkString, name, bindRep.evalPath)
-      logDebug("Interpreting: " + line)
-      interpret(line)
-    }
-  }
-  def directBind(name: String, boundType: String, value: Any): IR.Result = {
-    val result = bind(name, boundType, value)
-    if (result == IR.Success)
-      directlyBoundNames += newTermName(name)
-    result
-  }
-  def directBind(p: NamedParam): IR.Result                                    = directBind(p.name, p.tpe, p.value)
-  def directBind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = directBind((name, value))
-
-  def rebind(p: NamedParam): IR.Result = {
-    val name     = p.name
-    val oldType  = typeOfTerm(name) orElse { return IR.Error }
-    val newType  = p.tpe
-    val tempName = freshInternalVarName()
-
-    quietRun("val %s = %s".format(tempName, name))
-    quietRun("val %s = %s.asInstanceOf[%s]".format(name, tempName, newType))
-  }
-  def quietImport(ids: String*): IR.Result = beQuietDuring(addImports(ids: _*))
-  def addImports(ids: String*): IR.Result =
-    if (ids.isEmpty) IR.Success
-    else interpret("import " + ids.mkString(", "))
-
-  def quietBind(p: NamedParam): IR.Result                               = beQuietDuring(bind(p))
-  def bind(p: NamedParam): IR.Result                                    = bind(p.name, p.tpe, p.value)
-  def bind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = bind((name, value))
-  def bindSyntheticValue(x: Any): IR.Result                             = bindValue(freshInternalVarName(), x)
-  def bindValue(x: Any): IR.Result                                      = bindValue(freshUserVarName(), x)
-  def bindValue(name: String, x: Any): IR.Result                        = bind(name, TypeStrings.fromValue(x), x)
-
-  /** Reset this interpreter, forgetting all user-specified requests. */
-  def reset() {
-    clearExecutionWrapper()
-    resetClassLoader()
-    resetAllCreators()
-    prevRequests.clear()
-    referencedNameMap.clear()
-    definedNameMap.clear()
-    virtualDirectory.delete()
-    virtualDirectory.create()
-  }
-
-  /** This instance is no longer needed, so release any resources
-   *  it is using.  The reporter's output gets flushed.
-   */
-  def close() {
-    reporter.flush()
-    classServer.stop()
-  }
-
-  /**
-   * Captures the session names (which are set by system properties) once, instead of for each line.
-   */
-  object FixedSessionNames {
-    val lineName    = sessionNames.line
-    val readName    = sessionNames.read
-    val evalName    = sessionNames.eval
-    val printName   = sessionNames.print
-    val resultName  = sessionNames.result
-  }
-
-  /** Here is where we:
-   *
-   *  1) Read some source code, and put it in the "read" object.
-   *  2) Evaluate the read object, and put the result in the "eval" object.
-   *  3) Create a String for human consumption, and put it in the "print" object.
-   *
-   *  Read! Eval! Print! Some of that not yet centralized here.
-   */
-  class ReadEvalPrint(val lineId: Int) {
-    def this() = this(freshLineId())
-
-    private var lastRun: Run = _
-    private var evalCaught: Option[Throwable] = None
-    private var conditionalWarnings: List[ConditionalWarning] = Nil
-
-    val packageName = FixedSessionNames.lineName + lineId
-    val readName    = FixedSessionNames.readName
-    val evalName    = FixedSessionNames.evalName
-    val printName   = FixedSessionNames.printName
-    val resultName  = FixedSessionNames.resultName
-
-    def bindError(t: Throwable) = {
-      if (!bindExceptions) // avoid looping if already binding
-        throw t
-
-      val unwrapped = unwrap(t)
-      withLastExceptionLock[String]({
-        directBind[Throwable]("lastException", unwrapped)(tagOfThrowable, classTag[Throwable])
-        util.stackTraceString(unwrapped)
-      }, util.stackTraceString(unwrapped))
-    }
-
-    // TODO: split it out into a package object and a regular
-    // object and we can do that much less wrapping.
-    def packageDecl = "package " + packageName
-
-    def pathTo(name: String)   = packageName + "." + name
-    def packaged(code: String) = packageDecl + "\n\n" + code
-
-    def readPath  = pathTo(readName)
-    def evalPath  = pathTo(evalName)
-    def printPath = pathTo(printName)
-
-    def call(name: String, args: Any*): AnyRef = {
-      val m = evalMethod(name)
-      logDebug("Invoking: " + m)
-      if (args.nonEmpty)
-        logDebug("  with args: " + args.mkString(", "))
-
-      m.invoke(evalClass, args.map(_.asInstanceOf[AnyRef]): _*)
-    }
-
-    def callEither(name: String, args: Any*): Either[Throwable, AnyRef] =
-      try Right(call(name, args: _*))
-    catch { case ex: Throwable => Left(ex) }
-
-    def callOpt(name: String, args: Any*): Option[AnyRef] =
-      try Some(call(name, args: _*))
-    catch { case ex: Throwable => bindError(ex) ; None }
-
-    class EvalException(msg: String, cause: Throwable) extends RuntimeException(msg, cause) { }
-
-    private def evalError(path: String, ex: Throwable) =
-      throw new EvalException("Failed to load '" + path + "': " + ex.getMessage, ex)
-
-    private def load(path: String): Class[_] = {
-      try Class.forName(path, true, classLoader)
-      catch { case ex: Throwable => evalError(path, unwrap(ex)) }
-    }
-
-    lazy val evalClass = load(evalPath)
-    lazy val evalValue = callEither(resultName) match {
-      case Left(ex)      => evalCaught = Some(ex) ; None
-      case Right(result) => Some(result)
-    }
-
-    def compile(source: String): Boolean = compileAndSaveRun("<console>", source)
-
-    /** The innermost object inside the wrapper, found by
-     * following accessPath into the outer one.
-     */
-    def resolvePathToSymbol(accessPath: String): Symbol = {
-      // val readRoot  = getRequiredModule(readPath)   // the outermost wrapper
-      // MATEI: Changed this to getClass because the root object is no longer a module (Scala singleton object)
-
-      val readRoot  = rootMirror.getClassByName(newTypeName(readPath))   // the outermost wrapper
-      (accessPath split '.').foldLeft(readRoot: Symbol) {
-        case (sym, "")    => sym
-        case (sym, name)  => afterTyper(termMember(sym, name))
-      }
-    }
-    /** We get a bunch of repeated warnings for reasons I haven't
-     *  entirely figured out yet.  For now, squash.
-     */
-    private def updateRecentWarnings(run: Run) {
-      def loop(xs: List[(Position, String)]): List[(Position, String)] = xs match {
-        case Nil                  => Nil
-        case ((pos, msg)) :: rest =>
-          val filtered = rest filter { case (pos0, msg0) =>
-            (msg != msg0) || (pos.lineContent.trim != pos0.lineContent.trim) || {
-              // same messages and same line content after whitespace removal
-              // but we want to let through multiple warnings on the same line
-              // from the same run.  The untrimmed line will be the same since
-              // there's no whitespace indenting blowing it.
-              (pos.lineContent == pos0.lineContent)
-            }
-                                    }
-        ((pos, msg)) :: loop(filtered)
-      }
-     // PRASHANT: This leads to a NoSuchMethodError for _.warnings. Yet to figure out its purpose.
-      // val warnings = loop(run.allConditionalWarnings flatMap (_.warnings))
-      // if (warnings.nonEmpty)
-      //   mostRecentWarnings = warnings
-    }
-    private def evalMethod(name: String) = evalClass.getMethods filter (_.getName == name) match {
-      case Array(method) => method
-      case xs            => sys.error("Internal error: eval object " + evalClass + ", " + xs.mkString("\n", "\n", ""))
-    }
-    private def compileAndSaveRun(label: String, code: String) = {
-      showCodeIfDebugging(code)
-      val (success, run) = compileSourcesKeepingRun(new BatchSourceFile(label, packaged(code)))
-      updateRecentWarnings(run)
-      lastRun = run
-      success
-    }
-  }
-
-  /** One line of code submitted by the user for interpretation */
-  // private
-  class Request(val line: String, val trees: List[Tree]) {
-    val reqId = nextReqId()
-    val lineRep = new ReadEvalPrint()
-
-    private var _originalLine: String = null
-    def withOriginalLine(s: String): this.type = { _originalLine = s ; this }
-    def originalLine = if (_originalLine == null) line else _originalLine
-
-    /** handlers for each tree in this request */
-    val handlers: List[MemberHandler] = trees map (memberHandlers chooseHandler _)
-    def defHandlers = handlers collect { case x: MemberDefHandler => x }
-
-    /** all (public) names defined by these statements */
-    val definedNames = handlers flatMap (_.definedNames)
-
-    /** list of names used by this expression */
-    val referencedNames: List[Name] = handlers flatMap (_.referencedNames)
-
-    /** def and val names */
-    def termNames = handlers flatMap (_.definesTerm)
-    def typeNames = handlers flatMap (_.definesType)
-    def definedOrImported = handlers flatMap (_.definedOrImported)
-    def definedSymbolList = defHandlers flatMap (_.definedSymbols)
-
-    def definedTypeSymbol(name: String) = definedSymbols(newTypeName(name))
-    def definedTermSymbol(name: String) = definedSymbols(newTermName(name))
-
-    /** Code to import bound names from previous lines - accessPath is code to
-     * append to objectName to access anything bound by request.
-     */
-    val SparkComputedImports(importsPreamble, importsTrailer, accessPath) =
-      importsCode(referencedNames.toSet)
-
-    /** Code to access a variable with the specified name */
-    def fullPath(vname: String) = {
-      // lineRep.readPath + accessPath + ".`%s`".format(vname)
-      lineRep.readPath + ".INSTANCE" + accessPath + ".`%s`".format(vname)
-    }
-      /** Same as fullpath, but after it has been flattened, so:
-       *  $line5.$iw.$iw.$iw.Bippy      // fullPath
-       *  $line5.$iw$$iw$$iw$Bippy      // fullFlatName
-       */
-      def fullFlatName(name: String) =
-        // lineRep.readPath + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
-        lineRep.readPath + ".INSTANCE" + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
-
-    /** The unmangled symbol name, but supplemented with line info. */
-    def disambiguated(name: Name): String = name + " (in " + lineRep + ")"
-
-    /** Code to access a variable with the specified name */
-    def fullPath(vname: Name): String = fullPath(vname.toString)
-
-    /** the line of code to compute */
-    def toCompute = line
-
-    /** generate the source code for the object that computes this request */
-    private object ObjectSourceCode extends CodeAssembler[MemberHandler] {
-      def path = pathToTerm("$intp")
-      def envLines = {
-        if (!isReplPower) Nil // power mode only for now
-        // $intp is not bound; punt, but include the line.
-        else if (path == "$intp") List(
-          "def $line = " + tquoted(originalLine),
-          "def $trees = Nil"
-        )
-        else List(
-          "def $line  = " + tquoted(originalLine),
-          "def $req = %s.requestForReqId(%s).orNull".format(path, reqId),
-          "def $trees = if ($req eq null) Nil else $req.trees".format(lineRep.readName, path, reqId)
-        )
-      }
-
-      val preamble = """
-        |class %s extends Serializable {
-        |  %s%s%s
-      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString, importsPreamble, indentCode(toCompute))
-      val postamble = importsTrailer + "\n}" + "\n" +
-        "object " + lineRep.readName + " {\n" +
-        "  val INSTANCE = new " + lineRep.readName + "();\n" +
-        "}\n"
-      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
-
-      /*
-      val preamble = """
-        |object %s extends Serializable {
-        |%s%s%s
-      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString, importsPreamble, indentCode(toCompute))
-      val postamble = importsTrailer + "\n}"
-      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
-      */
-
-    }
-
-    private object ResultObjectSourceCode extends CodeAssembler[MemberHandler] {
-      /** We only want to generate this code when the result
-       *  is a value which can be referred to as-is.
-       */
-      val evalResult =
-        if (!handlers.last.definesValue) ""
-        else handlers.last.definesTerm match {
-          case Some(vname) if typeOf contains vname =>
-            "lazy val %s = %s".format(lineRep.resultName, fullPath(vname))
-          case _  => ""
-        }
-
-      // first line evaluates object to make sure constructor is run
-      // initial "" so later code can uniformly be: + etc
-      val preamble = """
-      |object %s {
-      |  %s
-      |  val %s: String = %s {
-      |    %s
-      |    (""
-      """.stripMargin.format(
-        lineRep.evalName, evalResult, lineRep.printName,
-        executionWrapper, lineRep.readName + ".INSTANCE" + accessPath
-      )
-      val postamble = """
-      |    )
-      |  }
-      |}
-      """.stripMargin
-      val generate = (m: MemberHandler) => m resultExtractionCode Request.this
-    }
-
-    // get it
-    def getEvalTyped[T] : Option[T] = getEval map (_.asInstanceOf[T])
-    def getEval: Option[AnyRef] = {
-      // ensure it has been compiled
-      compile
-      // try to load it and call the value method
-      lineRep.evalValue filterNot (_ == null)
-    }
-
-    /** Compile the object file.  Returns whether the compilation succeeded.
-     *  If all goes well, the "types" map is computed. */
-    lazy val compile: Boolean = {
-      // error counting is wrong, hence interpreter may overlook failure - so we reset
-      reporter.reset()
-
-      // compile the object containing the user's code
-      lineRep.compile(ObjectSourceCode(handlers)) && {
-        // extract and remember types
-        typeOf
-        typesOfDefinedTerms
-
-        // Assign symbols to the original trees
-        // TODO - just use the new trees.
-        defHandlers foreach { dh =>
-          val name = dh.member.name
-          definedSymbols get name foreach { sym =>
-            dh.member setSymbol sym
-           logDebug("Set symbol of " + name + " to " + sym.defString)
-          }
-        }
-
-        // compile the result-extraction object
-        withoutWarnings(lineRep compile ResultObjectSourceCode(handlers))
-      }
-    }
-
-    lazy val resultSymbol = lineRep.resolvePathToSymbol(accessPath)
-    def applyToResultMember[T](name: Name, f: Symbol => T) = afterTyper(f(resultSymbol.info.nonPrivateDecl(name)))
-
-    /* typeOf lookup with encoding */
-    def lookupTypeOf(name: Name) = typeOf.getOrElse(name, typeOf(global.encode(name.toString)))
-    def simpleNameOfType(name: TypeName) = (compilerTypeOf get name) map (_.typeSymbol.simpleName)
-
-    private def typeMap[T](f: Type => T) =
-      mapFrom[Name, Name, T](termNames ++ typeNames)(x => f(cleanMemberDecl(resultSymbol, x)))
-
-    /** Types of variables defined by this request. */
-    lazy val compilerTypeOf = typeMap[Type](x => x) withDefaultValue NoType
-    /** String representations of same. */
-    lazy val typeOf         = typeMap[String](tp => afterTyper(tp.toString))
-
-    // lazy val definedTypes: Map[Name, Type] = {
-    //   typeNames map (x => x -> afterTyper(resultSymbol.info.nonPrivateDecl(x).tpe)) toMap
-    // }
-    lazy val definedSymbols = (
-      termNames.map(x => x -> applyToResultMember(x, x => x)) ++
-      typeNames.map(x => x -> compilerTypeOf(x).typeSymbolDirect)
-    ).toMap[Name, Symbol] withDefaultValue NoSymbol
-
-    lazy val typesOfDefinedTerms = mapFrom[Name, Name, Type](termNames)(x => applyToResultMember(x, _.tpe))
-
-    /** load and run the code using reflection */
-    def loadAndRun: (String, Boolean) = {
-      try   { ("" + (lineRep call sessionNames.print), true) }
-      catch { case ex: Throwable => (lineRep.bindError(ex), false) }
-    }
-
-    override def toString = "Request(line=%s, %s trees)".format(line, trees.size)
-  }
-
-  /** Returns the name of the most recent interpreter result.
-   *  Mostly this exists so you can conveniently invoke methods on
-   *  the previous result.
-   */
-  def mostRecentVar: String =
-    if (mostRecentlyHandledTree.isEmpty) ""
-    else "" + (mostRecentlyHandledTree.get match {
-      case x: ValOrDefDef           => x.name
-      case Assign(Ident(name), _)   => name
-      case ModuleDef(_, name, _)    => name
-      case _                        => naming.mostRecentVar
-    })
-
-  private var mostRecentWarnings: List[(global.Position, String)] = Nil
-  def lastWarnings = mostRecentWarnings
-
-  def treesForRequestId(id: Int): List[Tree] =
-    requestForReqId(id).toList flatMap (_.trees)
-
-  def requestForReqId(id: Int): Option[Request] =
-    if (executingRequest != null && executingRequest.reqId == id) Some(executingRequest)
-    else prevRequests find (_.reqId == id)
-
-  def requestForName(name: Name): Option[Request] = {
-    assert(definedNameMap != null, "definedNameMap is null")
-    definedNameMap get name
-  }
-
-  def requestForIdent(line: String): Option[Request] =
-    requestForName(newTermName(line)) orElse requestForName(newTypeName(line))
-
-  def requestHistoryForName(name: Name): List[Request] =
-    prevRequests.toList.reverse filter (_.definedNames contains name)
-
-
-  def definitionForName(name: Name): Option[MemberHandler] =
-    requestForName(name) flatMap { req =>
-      req.handlers find (_.definedNames contains name)
-    }
-
-  def valueOfTerm(id: String): Option[AnyRef] =
-    requestForName(newTermName(id)) flatMap (_.getEval)
-
-  def classOfTerm(id: String): Option[JClass] =
-    valueOfTerm(id) map (_.getClass)
-
-  def typeOfTerm(id: String): Type = newTermName(id) match {
-    case nme.ROOTPKG  => RootClass.tpe
-    case name         => requestForName(name).fold(NoType: Type)(_ compilerTypeOf name)
-  }
-
-  def symbolOfType(id: String): Symbol =
-    requestForName(newTypeName(id)).fold(NoSymbol: Symbol)(_ definedTypeSymbol id)
-
-  def symbolOfTerm(id: String): Symbol =
-    requestForIdent(newTermName(id)).fold(NoSymbol: Symbol)(_ definedTermSymbol id)
-
-  def runtimeClassAndTypeOfTerm(id: String): Option[(JClass, Type)] = {
-    classOfTerm(id) flatMap { clazz =>
-      new RichClass(clazz).supers find(c => !(new RichClass(c).isScalaAnonymous)) map { nonAnon =>
-        (nonAnon, runtimeTypeOfTerm(id))
-      }
-    }
-  }
-
-  def runtimeTypeOfTerm(id: String): Type = {
-    typeOfTerm(id) andAlso { tpe =>
-      val clazz      = classOfTerm(id) getOrElse { return NoType }
-      val staticSym  = tpe.typeSymbol
-      val runtimeSym = getClassIfDefined(clazz.getName)
-
-      if ((runtimeSym != NoSymbol) && (runtimeSym != staticSym) && (runtimeSym isSubClass staticSym))
-        runtimeSym.info
-      else NoType
-    }
-  }
-  def cleanMemberDecl(owner: Symbol, member: Name): Type = afterTyper {
-    normalizeNonPublic {
-      owner.info.nonPrivateDecl(member).tpe match {
-        case NullaryMethodType(tp) => tp
-        case tp                    => tp
-      }
-    }
-  }
-
-  object exprTyper extends {
-    val repl: SparkIMain.this.type = imain
-  } with SparkExprTyper { }
-
-  def parse(line: String): Option[List[Tree]] = exprTyper.parse(line)
-
-  def symbolOfLine(code: String): Symbol =
-    exprTyper.symbolOfLine(code)
-
-  def typeOfExpression(expr: String, silent: Boolean = true): Type =
-    exprTyper.typeOfExpression(expr, silent)
-
-  protected def onlyTerms(xs: List[Name]) = xs collect { case x: TermName => x }
-  protected def onlyTypes(xs: List[Name]) = xs collect { case x: TypeName => x }
-
-  def definedTerms      = onlyTerms(allDefinedNames) filterNot isInternalTermName
-  def definedTypes      = onlyTypes(allDefinedNames)
-  def definedSymbols    = prevRequestList.flatMap(_.definedSymbols.values).toSet[Symbol]
-  def definedSymbolList = prevRequestList flatMap (_.definedSymbolList) filterNot (s => isInternalTermName(s.name))
-
-  // Terms with user-given names (i.e. not res0 and not synthetic)
-    def namedDefinedTerms = definedTerms filterNot (x => isUserVarName("" + x) || directlyBoundNames(x))
-
-  private def findName(name: Name) = definedSymbols find (_.name == name) getOrElse NoSymbol
-
-  /** Translate a repl-defined identifier into a Symbol.
-   */
-  def apply(name: String): Symbol =
-    types(name) orElse terms(name)
-
-  def types(name: String): Symbol = {
-    val tpname = newTypeName(name)
-    findName(tpname) orElse getClassIfDefined(tpname)
-  }
-  def terms(name: String): Symbol = {
-    val termname = newTypeName(name)
-    findName(termname) orElse getModuleIfDefined(termname)
-  }
-  // [Eugene to Paul] possibly you could make use of TypeTags here
-  def types[T: ClassTag] : Symbol = types(classTag[T].runtimeClass.getName)
-  def terms[T: ClassTag] : Symbol = terms(classTag[T].runtimeClass.getName)
-  def apply[T: ClassTag] : Symbol = apply(classTag[T].runtimeClass.getName)
-
-  def classSymbols  = allDefSymbols collect { case x: ClassSymbol => x }
-  def methodSymbols = allDefSymbols collect { case x: MethodSymbol => x }
-
-  /** the previous requests this interpreter has processed */
-  private var executingRequest: Request = _
-  private val prevRequests       = mutable.ListBuffer[Request]()
-  private val referencedNameMap  = mutable.Map[Name, Request]()
-  private val definedNameMap     = mutable.Map[Name, Request]()
-  private val directlyBoundNames = mutable.Set[Name]()
-
-  def allHandlers    = prevRequestList flatMap (_.handlers)
-  def allDefHandlers = allHandlers collect { case x: MemberDefHandler => x }
-  def allDefSymbols  = allDefHandlers map (_.symbol) filter (_ ne NoSymbol)
-
-  def lastRequest         = if (prevRequests.isEmpty) null else prevRequests.last
-  def prevRequestList     = prevRequests.toList
-  def allSeenTypes        = prevRequestList flatMap (_.typeOf.values.toList) distinct
-  def allImplicits        = allHandlers filter (_.definesImplicit) flatMap (_.definedNames)
-  def importHandlers      = allHandlers collect { case x: ImportHandler => x }
-
-  def visibleTermNames: List[Name] = definedTerms ++ importedTerms distinct
-
-  /** Another entry point for tab-completion, ids in scope */
-  def unqualifiedIds = visibleTermNames map (_.toString) filterNot (_ contains "$") sorted
-
-  /** Parse the ScalaSig to find type aliases */
-  def aliasForType(path: String) = ByteCode.aliasForType(path)
-
-  def withoutUnwrapping(op: => Unit): Unit = {
-    val saved = isettings.unwrapStrings
-    isettings.unwrapStrings = false
-    try op
-    finally isettings.unwrapStrings = saved
-  }
-
-  def symbolDefString(sym: Symbol) = {
-    TypeStrings.quieter(
-      afterTyper(sym.defString),
-      sym.owner.name + ".this.",
-      sym.owner.fullName + "."
-    )
-  }
-
-  def showCodeIfDebugging(code: String) {
-    /** Secret bookcase entrance for repl debuggers: end the line
-     *  with "// show" and see what's going on.
-     */
-    def isShow    = code.lines exists (_.trim endsWith "// show")
-    def isShowRaw = code.lines exists (_.trim endsWith "// raw")
-
-    // old style
-    beSilentDuring(parse(code)) foreach { ts =>
-      ts foreach { t =>
-        if (isShow || isShowRaw)
-          withoutUnwrapping(echo(asCompactString(t)))
-        else
-          withoutUnwrapping(logDebug(asCompactString(t)))
-      }
-    }
-  }
-
-  // debugging
-  def debugging[T](msg: String)(res: T) = {
-    logDebug(msg + " " + res)
-    res
-  }
-}
-
-/** Utility methods for the Interpreter. */
-object SparkIMain {
-  // The two name forms this is catching are the two sides of this assignment:
-  //
-  // $line3.$read.$iw.$iw.Bippy =
-  //   $line3.$read$$iw$$iw$Bippy@4a6a00ca
-  private def removeLineWrapper(s: String) = s.replaceAll("""\$line\d+[./]\$(read|eval|print)[$.]""", "")
-  private def removeIWPackages(s: String)  = s.replaceAll("""\$(iw|iwC|read|eval|print)[$.]""", "")
-  private def removeSparkVals(s: String)   = s.replaceAll("""\$VAL[0-9]+[$.]""", "")
-
-  def stripString(s: String)               = removeSparkVals(removeIWPackages(removeLineWrapper(s)))
-
-  trait CodeAssembler[T] {
-    def preamble: String
-    def generate: T => String
-    def postamble: String
-
-    def apply(contributors: List[T]): String = stringFromWriter { code =>
-      code println preamble
-      contributors map generate foreach (code println _)
-      code println postamble
-    }
-  }
-
-  trait StrippingWriter {
-    def isStripping: Boolean
-    def stripImpl(str: String): String
-    def strip(str: String): String = if (isStripping) stripImpl(str) else str
-  }
-  trait TruncatingWriter {
-    def maxStringLength: Int
-    def isTruncating: Boolean
-    def truncate(str: String): String = {
-      if (isTruncating && (maxStringLength != 0 && str.length > maxStringLength))
-        (str take maxStringLength - 3) + "..."
-      else str
-    }
-  }
-  abstract class StrippingTruncatingWriter(out: JPrintWriter)
-          extends JPrintWriter(out)
-             with StrippingWriter
-             with TruncatingWriter {
-    self =>
-
-    def clean(str: String): String = truncate(strip(str))
-    override def write(str: String) = super.write(clean(str))
-  }
-  class ReplStrippingWriter(intp: SparkIMain) extends StrippingTruncatingWriter(intp.out) {
-    import intp._
-    def maxStringLength    = isettings.maxPrintString
-    def isStripping        = isettings.unwrapStrings
-    def isTruncating       = reporter.truncationOK
-
-    def stripImpl(str: String): String = naming.unmangle(str)
-  }
-
-  class ReplReporter(intp: SparkIMain) extends ConsoleReporter(intp.settings, null, new ReplStrippingWriter(intp)) {
-    override def printMessage(msg: String) {
-      // Avoiding deadlock when the compiler starts logging before
-      // the lazy val is done.
-      if (intp.isInitializeComplete) {
-        if (intp.totalSilence) ()
-        else super.printMessage(msg)
-      }
-      else Console.println(msg)
-    }
-  }
-}
-
-class SparkISettings(intp: SparkIMain) extends Logging {
-  /** A list of paths where :load should look */
-  var loadPath = List(".")
-
-  /** Set this to true to see repl machinery under -Yrich-exceptions.
-   */
-  var showInternalStackTraces = false
-
-  /** The maximum length of toString to use when printing the result
-   *  of an evaluation.  0 means no maximum.  If a printout requires
-   *  more than this number of characters, then the printout is
-   *  truncated.
-   */
-  var maxPrintString = 800
-
-  /** The maximum number of completion candidates to print for tab
-   *  completion without requiring confirmation.
-   */
-  var maxAutoprintCompletion = 250
-
-  /** String unwrapping can be disabled if it is causing issues.
-   *  Settings this to false means you will see Strings like "$iw.$iw.".
-   */
-  var unwrapStrings = true
-
-  def deprecation_=(x: Boolean) = {
-    val old = intp.settings.deprecation.value
-    intp.settings.deprecation.value = x
-    if (!old && x) logDebug("Enabled -deprecation output.")
-    else if (old && !x) logDebug("Disabled -deprecation output.")
-  }
-
-  def deprecation: Boolean = intp.settings.deprecation.value
-
-  def allSettings = Map(
-    "maxPrintString" -> maxPrintString,
-    "maxAutoprintCompletion" -> maxAutoprintCompletion,
-    "unwrapStrings" -> unwrapStrings,
-    "deprecation" -> deprecation
-  )
-
-  private def allSettingsString =
-    allSettings.toList sortBy (_._1) map { case (k, v) => "  " + k + " = " + v + "\n" } mkString
-
-  override def toString = """
-    | SparkISettings {
-    | %s
-    | }""".stripMargin.format(allSettingsString)
-}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala b/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
deleted file mode 100644
index bce5c74b9d0da..0000000000000
--- a/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
+++ /dev/null
@@ -1,225 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author  Paul Phillips
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import scala.collection.{ mutable, immutable }
-
-trait SparkImports {
-  self: SparkIMain =>
-
-  import global._
-  import definitions.{ ScalaPackage, JavaLangPackage, PredefModule }
-  import memberHandlers._
-
-  def isNoImports = settings.noimports.value
-  def isNoPredef  = settings.nopredef.value
-
-  /** Synthetic import handlers for the language defined imports. */
-  private def makeWildcardImportHandler(sym: Symbol): ImportHandler = {
-    val hd :: tl = sym.fullName.split('.').toList map newTermName
-    val tree = Import(
-      tl.foldLeft(Ident(hd): Tree)((x, y) => Select(x, y)),
-      ImportSelector.wildList
-    )
-    tree setSymbol sym
-    new ImportHandler(tree)
-  }
-
-  /** Symbols whose contents are language-defined to be imported. */
-  def languageWildcardSyms: List[Symbol] = List(JavaLangPackage, ScalaPackage, PredefModule)
-  def languageWildcards: List[Type] = languageWildcardSyms map (_.tpe)
-  def languageWildcardHandlers = languageWildcardSyms map makeWildcardImportHandler
-
-  def allImportedNames = importHandlers flatMap (_.importedNames)
-  def importedTerms    = onlyTerms(allImportedNames)
-  def importedTypes    = onlyTypes(allImportedNames)
-
-  /** Types which have been wildcard imported, such as:
-   *    val x = "abc" ; import x._  // type java.lang.String
-   *    import java.lang.String._   // object java.lang.String
-   *
-   *  Used by tab completion.
-   *
-   *  XXX right now this gets import x._ and import java.lang.String._,
-   *  but doesn't figure out import String._.  There's a lot of ad hoc
-   *  scope twiddling which should be swept away in favor of digging
-   *  into the compiler scopes.
-   */
-  def sessionWildcards: List[Type] = {
-    importHandlers filter (_.importsWildcard) map (_.targetType) distinct
-  }
-  def wildcardTypes = languageWildcards ++ sessionWildcards
-
-  def languageSymbols        = languageWildcardSyms flatMap membersAtPickler
-  def sessionImportedSymbols = importHandlers flatMap (_.importedSymbols)
-  def importedSymbols        = languageSymbols ++ sessionImportedSymbols
-  def importedTermSymbols    = importedSymbols collect { case x: TermSymbol => x }
-  def importedTypeSymbols    = importedSymbols collect { case x: TypeSymbol => x }
-  def implicitSymbols        = importedSymbols filter (_.isImplicit)
-
-  def importedTermNamed(name: String): Symbol =
-    importedTermSymbols find (_.name.toString == name) getOrElse NoSymbol
-
-  /** Tuples of (source, imported symbols) in the order they were imported.
-   */
-  def importedSymbolsBySource: List[(Symbol, List[Symbol])] = {
-    val lang    = languageWildcardSyms map (sym => (sym, membersAtPickler(sym)))
-    val session = importHandlers filter (_.targetType != NoType) map { mh =>
-      (mh.targetType.typeSymbol, mh.importedSymbols)
-    }
-
-    lang ++ session
-  }
-  def implicitSymbolsBySource: List[(Symbol, List[Symbol])] = {
-    importedSymbolsBySource map {
-      case (k, vs) => (k, vs filter (_.isImplicit))
-    } filterNot (_._2.isEmpty)
-  }
-
-  /** Compute imports that allow definitions from previous
-   *  requests to be visible in a new request.  Returns
-   *  three pieces of related code:
-   *
-   *  1. An initial code fragment that should go before
-   *  the code of the new request.
-   *
-   *  2. A code fragment that should go after the code
-   *  of the new request.
-   *
-   *  3. An access path which can be traversed to access
-   *  any bindings inside code wrapped by #1 and #2 .
-   *
-   * The argument is a set of Names that need to be imported.
-   *
-   * Limitations: This method is not as precise as it could be.
-   * (1) It does not process wildcard imports to see what exactly
-   * they import.
-   * (2) If it imports any names from a request, it imports all
-   * of them, which is not really necessary.
-   * (3) It imports multiple same-named implicits, but only the
-   * last one imported is actually usable.
-   */
-  case class SparkComputedImports(prepend: String, append: String, access: String)
-
-  protected def importsCode(wanted: Set[Name]): SparkComputedImports = {
-    /** Narrow down the list of requests from which imports
-     *  should be taken.  Removes requests which cannot contribute
-     *  useful imports for the specified set of wanted names.
-     */
-    case class ReqAndHandler(req: Request, handler: MemberHandler) { }
-
-    def reqsToUse: List[ReqAndHandler] = {
-      /** Loop through a list of MemberHandlers and select which ones to keep.
-        * 'wanted' is the set of names that need to be imported.
-       */
-      def select(reqs: List[ReqAndHandler], wanted: Set[Name]): List[ReqAndHandler] = {
-        // Single symbol imports might be implicits! See bug #1752.  Rather than
-        // try to finesse this, we will mimic all imports for now.
-        def keepHandler(handler: MemberHandler) = handler match {
-          case _: ImportHandler => true
-          case x                => x.definesImplicit || (x.definedNames exists wanted)
-        }
-
-        reqs match {
-          case Nil                                    => Nil
-          case rh :: rest if !keepHandler(rh.handler) => select(rest, wanted)
-          case rh :: rest                             =>
-            import rh.handler._
-            val newWanted = wanted ++ referencedNames -- definedNames -- importedNames
-            rh :: select(rest, newWanted)
-        }
-      }
-
-      /** Flatten the handlers out and pair each with the original request */
-      select(allReqAndHandlers reverseMap { case (r, h) => ReqAndHandler(r, h) }, wanted).reverse
-    }
-
-    val code, trailingBraces, accessPath = new StringBuilder
-    val currentImps = mutable.HashSet[Name]()
-
-    // add code for a new object to hold some imports
-    def addWrapper() {
-      val impname = nme.INTERPRETER_IMPORT_WRAPPER
-      code append "class %sC extends Serializable {\n".format(impname)
-      trailingBraces append "}\nval " + impname + " = new " + impname + "C;\n"
-      accessPath append ("." + impname)
-
-      currentImps.clear
-      // code append "object %s {\n".format(impname)
-      // trailingBraces append "}\n"
-      // accessPath append ("." + impname)
-
-      // currentImps.clear
-    }
-
-    addWrapper()
-
-    // loop through previous requests, adding imports for each one
-    for (ReqAndHandler(req, handler) <- reqsToUse) {
-      handler match {
-        // If the user entered an import, then just use it; add an import wrapping
-        // level if the import might conflict with some other import
-        case x: ImportHandler =>
-          if (x.importsWildcard || currentImps.exists(x.importedNames contains _))
-            addWrapper()
-
-          code append (x.member + "\n")
-
-          // give wildcard imports a import wrapper all to their own
-          if (x.importsWildcard) addWrapper()
-          else currentImps ++= x.importedNames
-
-        // For other requests, import each defined name.
-        // import them explicitly instead of with _, so that
-        // ambiguity errors will not be generated. Also, quote
-        // the name of the variable, so that we don't need to
-        // handle quoting keywords separately.
-        case x: ClassHandler =>
-        // I am trying to guess if the import is a defined class
-        // This is an ugly hack, I am not 100% sure of the consequences.
-        // Here we, let everything but "defined classes" use the import with val.
-        // The reason for this is, otherwise the remote executor tries to pull the
-        // classes involved and may fail.
-          for (imv <- x.definedNames) {
-            val objName = req.lineRep.readPath
-            code.append("import " + objName + ".INSTANCE" + req.accessPath + ".`" + imv + "`\n")
-          }
-
-        case x =>
-          for (imv <- x.definedNames) {
-            if (currentImps contains imv) addWrapper()
-            val objName = req.lineRep.readPath
-            val valName = "$VAL" + req.lineRep.lineId
-
-            if(!code.toString.endsWith(".`" + imv + "`;\n")) { // Which means already imported
-                code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
-                code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
-            }
-            // code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
-            // code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
-            // code append ("import " + (req fullPath imv) + "\n")
-            currentImps += imv
-          }
-      }
-    }
-    // add one extra wrapper, to prevent warnings in the common case of
-    // redefining the value bound in the last interpreter request.
-    addWrapper()
-    SparkComputedImports(code.toString, trailingBraces.toString, accessPath.toString)
-  }
-
-  private def allReqAndHandlers =
-    prevRequestList flatMap (req => req.handlers map (req -> _))
-
-  private def membersAtPickler(sym: Symbol): List[Symbol] =
-    beforePickler(sym.info.nonPrivateMembers.toList)
-
-}
diff --git a/repl/src/test/resources/log4j.properties b/repl/src/test/resources/log4j.properties
index 9c4896e49698c..52098993f5c3c 100644
--- a/repl/src/test/resources/log4j.properties
+++ b/repl/src/test/resources/log4j.properties
@@ -21,7 +21,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index c0af7ceb6d3ef..6a79e76a34db8 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -23,9 +23,7 @@ import java.net.{URL, URLClassLoader}
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
 
-import com.google.common.io.Files
-
-import org.apache.spark.TestUtils
+import org.apache.spark.{SparkConf, TestUtils}
 import org.apache.spark.util.Utils
 
 class ExecutorClassLoaderSuite extends FunSuite with BeforeAndAfterAll {
@@ -39,10 +37,8 @@ class ExecutorClassLoaderSuite extends FunSuite with BeforeAndAfterAll {
 
   override def beforeAll() {
     super.beforeAll()
-    tempDir1 = Files.createTempDir()
-    tempDir1.deleteOnExit()
-    tempDir2 = Files.createTempDir()
-    tempDir2.deleteOnExit()
+    tempDir1 = Utils.createTempDir()
+    tempDir2 = Utils.createTempDir()
     url1 = "file://" + tempDir1
     urls2 = List(tempDir2.toURI.toURL).toArray
     childClassNames.foreach(TestUtils.createCompiledClass(_, tempDir1, "1"))
@@ -57,7 +53,7 @@ class ExecutorClassLoaderSuite extends FunSuite with BeforeAndAfterAll {
 
   test("child first") {
     val parentLoader = new URLClassLoader(urls2, null)
-    val classLoader = new ExecutorClassLoader(url1, parentLoader, true)
+    val classLoader = new ExecutorClassLoader(new SparkConf(), url1, parentLoader, true)
     val fakeClass = classLoader.loadClass("ReplFakeClass2").newInstance()
     val fakeClassVersion = fakeClass.toString
     assert(fakeClassVersion === "1")
@@ -65,7 +61,7 @@ class ExecutorClassLoaderSuite extends FunSuite with BeforeAndAfterAll {
 
   test("parent first") {
     val parentLoader = new URLClassLoader(urls2, null)
-    val classLoader = new ExecutorClassLoader(url1, parentLoader, false)
+    val classLoader = new ExecutorClassLoader(new SparkConf(), url1, parentLoader, false)
     val fakeClass = classLoader.loadClass("ReplFakeClass1").newInstance()
     val fakeClassVersion = fakeClass.toString
     assert(fakeClassVersion === "2")
@@ -73,7 +69,7 @@ class ExecutorClassLoaderSuite extends FunSuite with BeforeAndAfterAll {
 
   test("child first can fall back") {
     val parentLoader = new URLClassLoader(urls2, null)
-    val classLoader = new ExecutorClassLoader(url1, parentLoader, true)
+    val classLoader = new ExecutorClassLoader(new SparkConf(), url1, parentLoader, true)
     val fakeClass = classLoader.loadClass("ReplFakeClass3").newInstance()
     val fakeClassVersion = fakeClass.toString
     assert(fakeClassVersion === "2")
@@ -81,7 +77,7 @@ class ExecutorClassLoaderSuite extends FunSuite with BeforeAndAfterAll {
 
   test("child first can fail") {
     val parentLoader = new URLClassLoader(urls2, null)
-    val classLoader = new ExecutorClassLoader(url1, parentLoader, true)
+    val classLoader = new ExecutorClassLoader(new SparkConf(), url1, parentLoader, true)
     intercept[java.lang.ClassNotFoundException] {
       classLoader.loadClass("ReplFakeClassDoesNotExist").newInstance()
     }
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
deleted file mode 100644
index f2aa42dbcb4fc..0000000000000
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.repl
-
-import java.io._
-import java.net.URLClassLoader
-
-import scala.collection.mutable.ArrayBuffer
-
-import com.google.common.io.Files
-import org.scalatest.FunSuite
-import org.apache.spark.SparkContext
-import org.apache.commons.lang3.StringEscapeUtils
-import org.apache.spark.util.Utils
-
-
-class ReplSuite extends FunSuite {
-
-  def runInterpreter(master: String, input: String): String = {
-    val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath"
-
-    val in = new BufferedReader(new StringReader(input + "\n"))
-    val out = new StringWriter()
-    val cl = getClass.getClassLoader
-    var paths = new ArrayBuffer[String]
-    if (cl.isInstanceOf[URLClassLoader]) {
-      val urlLoader = cl.asInstanceOf[URLClassLoader]
-      for (url <- urlLoader.getURLs) {
-        if (url.getProtocol == "file") {
-          paths += url.getFile
-        }
-      }
-    }
-    val classpath = paths.mkString(File.pathSeparator)
-
-    val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
-    System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
-
-    val interp = new SparkILoop(in, new PrintWriter(out), master)
-    org.apache.spark.repl.Main.interp = interp
-    interp.process(Array("-classpath", classpath))
-    org.apache.spark.repl.Main.interp = null
-    if (interp.sparkContext != null) {
-      interp.sparkContext.stop()
-    }
-    if (oldExecutorClasspath != null) {
-      System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath)
-    } else {
-      System.clearProperty(CONF_EXECUTOR_CLASSPATH)
-    }
-    return out.toString
-  }
-
-  def assertContains(message: String, output: String) {
-    val isContain = output.contains(message)
-    assert(isContain,
-      "Interpreter output did not contain '" + message + "':\n" + output)
-  }
-
-  def assertDoesNotContain(message: String, output: String) {
-    val isContain = output.contains(message)
-    assert(!isContain,
-      "Interpreter output contained '" + message + "':\n" + output)
-  }
-
-  test("propagation of local properties") {
-    // A mock ILoop that doesn't install the SIGINT handler.
-    class ILoop(out: PrintWriter) extends SparkILoop(None, out, None) {
-      settings = new scala.tools.nsc.Settings
-      settings.usejavacp.value = true
-      org.apache.spark.repl.Main.interp = this
-      override def createInterpreter() {
-        intp = new SparkILoopInterpreter
-        intp.setContextClassLoader()
-      }
-    }
-
-    val out = new StringWriter()
-    val interp = new ILoop(new PrintWriter(out))
-    interp.sparkContext = new SparkContext("local", "repl-test")
-    interp.createInterpreter()
-    interp.intp.initialize()
-    interp.sparkContext.setLocalProperty("someKey", "someValue")
-
-    // Make sure the value we set in the caller to interpret is propagated in the thread that
-    // interprets the command.
-    interp.interpret("org.apache.spark.repl.Main.interp.sparkContext.getLocalProperty(\"someKey\")")
-    assert(out.toString.contains("someValue"))
-
-    interp.sparkContext.stop()
-    System.clearProperty("spark.driver.port")
-  }
-
-  test("simple foreach with accumulator") {
-    val output = runInterpreter("local",
-      """
-        |val accum = sc.accumulator(0)
-        |sc.parallelize(1 to 10).foreach(x => accum += x)
-        |accum.value
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res1: Int = 55", output)
-  }
-
-  test("external vars") {
-    val output = runInterpreter("local",
-      """
-        |var v = 7
-        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-  }
-
-  test("external classes") {
-    val output = runInterpreter("local",
-      """
-        |class C {
-        |def foo = 5
-        |}
-        |sc.parallelize(1 to 10).map(x => (new C).foo).collect.reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 50", output)
-  }
-
-  test("external functions") {
-    val output = runInterpreter("local",
-      """
-        |def double(x: Int) = x + x
-        |sc.parallelize(1 to 10).map(x => double(x)).collect.reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 110", output)
-  }
-
-  test("external functions that access vars") {
-    val output = runInterpreter("local",
-      """
-        |var v = 7
-        |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-  }
-
-  test("broadcast vars") {
-    // Test that the value that a broadcast var had when it was created is used,
-    // even if that variable is then modified in the driver program
-    // TODO: This doesn't actually work for arrays when we run in local mode!
-    val output = runInterpreter("local",
-      """
-        |var array = new Array[Int](5)
-        |val broadcastArray = sc.broadcast(array)
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-        |array(0) = 5
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    assertContains("res2: Array[Int] = Array(5, 0, 0, 0, 0)", output)
-  }
-
-  test("interacting with files") {
-    val tempDir = Files.createTempDir()
-    tempDir.deleteOnExit()
-    val out = new FileWriter(tempDir + "/input")
-    out.write("Hello world!\n")
-    out.write("What's up?\n")
-    out.write("Goodbye\n")
-    out.close()
-    val output = runInterpreter("local",
-      """
-        |var file = sc.textFile("%s").cache()
-        |file.count()
-        |file.count()
-        |file.count()
-      """.stripMargin.format(StringEscapeUtils.escapeJava(
-        tempDir.getAbsolutePath + File.separator + "input")))
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Long = 3", output)
-    assertContains("res1: Long = 3", output)
-    assertContains("res2: Long = 3", output)
-    Utils.deleteRecursively(tempDir)
-  }
-
-  test("local-cluster mode") {
-    val output = runInterpreter("local-cluster[1,1,512]",
-      """
-        |var v = 7
-        |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-        |var array = new Array[Int](5)
-        |val broadcastArray = sc.broadcast(array)
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-        |array(0) = 5
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-    assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-  }
-
-  test("SPARK-1199-simple-reproduce") {
-    val output = runInterpreter("local-cluster[1,1,512]",
-      """
-        |case class Sum(exp: String, exp2: String)
-        |val a = Sum("A", "B")
-        |def b(a: Sum): String = a match { case Sum(_, _) => "Found Sum" }
-        |b(a)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  if (System.getenv("MESOS_NATIVE_LIBRARY") != null) {
-    test("running on Mesos") {
-      val output = runInterpreter("localquiet",
-        """
-          |var v = 7
-          |def getV() = v
-          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-          |v = 10
-          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-          |var array = new Array[Int](5)
-          |val broadcastArray = sc.broadcast(array)
-          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-          |array(0) = 5
-          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-        """.stripMargin)
-      assertDoesNotContain("error:", output)
-      assertDoesNotContain("Exception", output)
-      assertContains("res0: Int = 70", output)
-      assertContains("res1: Int = 100", output)
-      assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-      assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    }
-  }
-
-  test("collecting objects of class defined in repl") {
-    val output = runInterpreter("local[2]",
-      """
-        |case class Foo(i: Int)
-        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("ret: Array[Foo] = Array(Foo(1),", output)
-  }
-}
diff --git a/sbin/slaves.sh b/sbin/slaves.sh
index f89547fef9e46..cdad47ee2e594 100755
--- a/sbin/slaves.sh
+++ b/sbin/slaves.sh
@@ -36,29 +36,31 @@ if [ $# -le 0 ]; then
   exit 1
 fi
 
-sbin=`dirname "$0"`
-sbin=`cd "$sbin"; pwd`
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
 
 . "$sbin/spark-config.sh"
 
 # If the slaves file is specified in the command line,
 # then it takes precedence over the definition in
 # spark-env.sh. Save it here.
-HOSTLIST=$SPARK_SLAVES
+if [ -f "$SPARK_SLAVES" ]; then
+  HOSTLIST=`cat "$SPARK_SLAVES"`
+fi
 
 # Check if --config is passed as an argument. It is an optional parameter.
 # Exit if the argument is not a directory.
 if [ "$1" == "--config" ]
 then
   shift
-  conf_dir=$1
+  conf_dir="$1"
   if [ ! -d "$conf_dir" ]
   then
     echo "ERROR : $conf_dir is not a directory"
     echo $usage
     exit 1
   else
-    export SPARK_CONF_DIR=$conf_dir
+    export SPARK_CONF_DIR="$conf_dir"
   fi
   shift
 fi
@@ -67,23 +69,34 @@ fi
 
 if [ "$HOSTLIST" = "" ]; then
   if [ "$SPARK_SLAVES" = "" ]; then
-    export HOSTLIST="${SPARK_CONF_DIR}/slaves"
+    if [ -f "${SPARK_CONF_DIR}/slaves" ]; then
+      HOSTLIST=`cat "${SPARK_CONF_DIR}/slaves"`
+    else
+      HOSTLIST=localhost
+    fi
   else
-    export HOSTLIST="${SPARK_SLAVES}"
+    HOSTLIST=`cat "${SPARK_SLAVES}"`
   fi
 fi
 
+
+
 # By default disable strict host key checking
 if [ "$SPARK_SSH_OPTS" = "" ]; then
   SPARK_SSH_OPTS="-o StrictHostKeyChecking=no"
 fi
 
-for slave in `cat "$HOSTLIST"|sed  "s/#.*$//;/^$/d"`; do
- ssh $SPARK_SSH_OPTS $slave $"${@// /\\ }" \
-   2>&1 | sed "s/^/$slave: /" &
- if [ "$SPARK_SLAVE_SLEEP" != "" ]; then
-   sleep $SPARK_SLAVE_SLEEP
- fi
+for slave in `echo "$HOSTLIST"|sed  "s/#.*$//;/^$/d"`; do
+  if [ -n "${SPARK_SSH_FOREGROUND}" ]; then
+    ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \
+      2>&1 | sed "s/^/$slave: /"
+  else
+    ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \
+      2>&1 | sed "s/^/$slave: /" &
+  fi
+  if [ "$SPARK_SLAVE_SLEEP" != "" ]; then
+    sleep $SPARK_SLAVE_SLEEP
+  fi
 done
 
 wait
diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index 147b506dd5ca3..b0361d72d3f2c 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -20,20 +20,20 @@
 # also should not be passed any arguments, since we need original $*
 
 # resolve links - $0 may be a softlink
-this="${BASH_SOURCE-$0}"
-common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
+this="${BASH_SOURCE:-$0}"
+common_bin="$(cd -P -- "$(dirname -- "$this")" && pwd -P)"
 script="$(basename -- "$this")"
 this="$common_bin/$script"
 
 # convert relative path to absolute path
-config_bin=`dirname "$this"`
-script=`basename "$this"`
-config_bin=`cd "$config_bin"; pwd`
+config_bin="`dirname "$this"`"
+script="`basename "$this"`"
+config_bin="`cd "$config_bin"; pwd`"
 this="$config_bin/$script"
 
-export SPARK_PREFIX=`dirname "$this"`/..
-export SPARK_HOME=${SPARK_PREFIX}
-export SPARK_CONF_DIR="$SPARK_HOME/conf"
+export SPARK_PREFIX="`dirname "$this"`"/..
+export SPARK_HOME="${SPARK_PREFIX}"
+export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"$SPARK_HOME/conf"}"
 # Add the PySpark classes to the PYTHONPATH:
-export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
-export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH
+export PYTHONPATH="$SPARK_HOME/python:$PYTHONPATH"
+export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh
index 323f675b17848..89608bc41b71d 100755
--- a/sbin/spark-daemon.sh
+++ b/sbin/spark-daemon.sh
@@ -21,8 +21,8 @@
 #
 # Environment Variables
 #
-#   SPARK_CONF_DIR  Alternate conf dir. Default is ${SPARK_PREFIX}/conf.
-#   SPARK_LOG_DIR   Where log files are stored.  PWD by default.
+#   SPARK_CONF_DIR  Alternate conf dir. Default is ${SPARK_HOME}/conf.
+#   SPARK_LOG_DIR   Where log files are stored. ${SPARK_HOME}/logs by default.
 #   SPARK_MASTER    host:path where spark code should be rsync'd from
 #   SPARK_PID_DIR   The pid files are stored. /tmp by default.
 #   SPARK_IDENT_STRING   A string representing this instance of spark. $USER by default
@@ -37,8 +37,8 @@ if [ $# -le 1 ]; then
   exit 1
 fi
 
-sbin=`dirname "$0"`
-sbin=`cd "$sbin"; pwd`
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
 
 . "$sbin/spark-config.sh"
 
@@ -50,19 +50,19 @@ sbin=`cd "$sbin"; pwd`
 if [ "$1" == "--config" ]
 then
   shift
-  conf_dir=$1
+  conf_dir="$1"
   if [ ! -d "$conf_dir" ]
   then
     echo "ERROR : $conf_dir is not a directory"
     echo $usage
     exit 1
   else
-    export SPARK_CONF_DIR=$conf_dir
+    export SPARK_CONF_DIR="$conf_dir"
   fi
   shift
 fi
 
-startStop=$1
+option=$1
 shift
 command=$1
 shift
@@ -100,12 +100,12 @@ if [ "$SPARK_LOG_DIR" = "" ]; then
   export SPARK_LOG_DIR="$SPARK_HOME/logs"
 fi
 mkdir -p "$SPARK_LOG_DIR"
-touch $SPARK_LOG_DIR/.spark_test > /dev/null 2>&1
+touch "$SPARK_LOG_DIR"/.spark_test > /dev/null 2>&1
 TEST_LOG_DIR=$?
 if [ "${TEST_LOG_DIR}" = "0" ]; then
-  rm -f $SPARK_LOG_DIR/.spark_test
+  rm -f "$SPARK_LOG_DIR"/.spark_test
 else
-  chown $SPARK_IDENT_STRING $SPARK_LOG_DIR
+  chown "$SPARK_IDENT_STRING" "$SPARK_LOG_DIR"
 fi
 
 if [ "$SPARK_PID_DIR" = "" ]; then
@@ -113,10 +113,8 @@ if [ "$SPARK_PID_DIR" = "" ]; then
 fi
 
 # some variables
-export SPARK_LOGFILE=spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.log
-export SPARK_ROOT_LOGGER="INFO,DRFA"
-log=$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out
-pid=$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid
+log="$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out"
+pid="$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid"
 
 # Set default scheduling priority
 if [ "$SPARK_NICENESS" = "" ]; then
@@ -124,9 +122,9 @@ if [ "$SPARK_NICENESS" = "" ]; then
 fi
 
 
-case $startStop in
+case $option in
 
-  (start)
+  (start|spark-submit)
 
     mkdir -p "$SPARK_PID_DIR"
 
@@ -138,14 +136,20 @@ case $startStop in
     fi
 
     if [ "$SPARK_MASTER" != "" ]; then
-      echo rsync from $SPARK_MASTER
+      echo rsync from "$SPARK_MASTER"
       rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $SPARK_MASTER/ "$SPARK_HOME"
     fi
 
     spark_rotate_log "$log"
     echo starting $command, logging to $log
-    cd "$SPARK_PREFIX"
-    nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/bin/spark-class $command "$@" >> "$log" 2>&1 < /dev/null &
+    if [ $option == spark-submit ]; then
+      source "$SPARK_HOME"/bin/utils.sh
+      gatherSparkSubmitOpts "$@"
+      nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/bin/spark-submit --class $command \
+        "${SUBMISSION_OPTS[@]}" spark-internal "${APPLICATION_OPTS[@]}" >> "$log" 2>&1 < /dev/null &
+    else
+      nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/bin/spark-class $command "$@" >> "$log" 2>&1 < /dev/null &
+    fi
     newpid=$!
     echo $newpid > $pid
     sleep 2
diff --git a/sbin/spark-executor b/sbin/spark-executor
index 336549f29c9ce..674ce906d9421 100755
--- a/sbin/spark-executor
+++ b/sbin/spark-executor
@@ -17,10 +17,10 @@
 # limitations under the License.
 #
 
-FWDIR="$(cd `dirname $0`/..; pwd)"
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 
-export PYTHONPATH=$FWDIR/python:$PYTHONPATH
-export PYTHONPATH=$FWDIR/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH
+export PYTHONPATH="$FWDIR/python:$PYTHONPATH"
+export PYTHONPATH="$FWDIR/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
 
 echo "Running spark-executor with framework dir = $FWDIR"
-exec $FWDIR/bin/spark-class org.apache.spark.executor.MesosExecutorBackend
+exec "$FWDIR"/bin/spark-class org.apache.spark.executor.MesosExecutorBackend
diff --git a/sbin/start-all.sh b/sbin/start-all.sh
index 5c89ab4d86b3a..1baf57cea09ee 100755
--- a/sbin/start-all.sh
+++ b/sbin/start-all.sh
@@ -21,8 +21,8 @@
 # Starts the master on this node.
 # Starts a worker on each node specified in conf/slaves
 
-sbin=`dirname "$0"`
-sbin=`cd "$sbin"; pwd`
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
 
 TACHYON_STR=""
 
diff --git a/sbin/start-history-server.sh b/sbin/start-history-server.sh
index e30493da32a7a..7172ad15d88fc 100755
--- a/sbin/start-history-server.sh
+++ b/sbin/start-history-server.sh
@@ -24,8 +24,11 @@
 # Use the SPARK_HISTORY_OPTS environment variable to set history server configuration.
 #
 
-sbin=`dirname "$0"`
-sbin=`cd "$sbin"; pwd`
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
+
+. "$sbin/spark-config.sh"
+. "$SPARK_PREFIX/bin/load-spark-env.sh"
 
 if [ $# != 0 ]; then
   echo "Using command line arguments for setting the log directory is deprecated. Please "
diff --git a/sbin/start-master.sh b/sbin/start-master.sh
index c5c02491f78e1..17fff58f4f768 100755
--- a/sbin/start-master.sh
+++ b/sbin/start-master.sh
@@ -19,8 +19,8 @@
 
 # Starts the master on the machine this script is executed on.
 
-sbin=`dirname "$0"`
-sbin=`cd "$sbin"; pwd`
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
 
 START_TACHYON=false
 
diff --git a/sbin/start-slave.sh b/sbin/start-slave.sh
index b563400dc24f3..2fc35309f4ca5 100755
--- a/sbin/start-slave.sh
+++ b/sbin/start-slave.sh
@@ -20,7 +20,7 @@
 # Usage: start-slave.sh <worker#> <master-spark-URL>
 #   where <master-spark-URL> is like "spark://localhost:7077"
 
-sbin=`dirname "$0"`
-sbin=`cd "$sbin"; pwd`
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
 
 "$sbin"/spark-daemon.sh start org.apache.spark.deploy.worker.Worker "$@"
diff --git a/sbin/start-slaves.sh b/sbin/start-slaves.sh
index 4912d0c0c7dfd..ba1a84abc1fef 100755
--- a/sbin/start-slaves.sh
+++ b/sbin/start-slaves.sh
@@ -17,8 +17,8 @@
 # limitations under the License.
 #
 
-sbin=`dirname "$0"`
-sbin=`cd "$sbin"; pwd`
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
 
 
 START_TACHYON=false
@@ -46,11 +46,11 @@ if [ "$SPARK_MASTER_PORT" = "" ]; then
 fi
 
 if [ "$SPARK_MASTER_IP" = "" ]; then
-  SPARK_MASTER_IP=`hostname`
+  SPARK_MASTER_IP="`hostname`"
 fi
 
 if [ "$START_TACHYON" == "true" ]; then
-  "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin"/../tachyon/bin/tachyon bootstrap-conf $SPARK_MASTER_IP
+  "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin"/../tachyon/bin/tachyon bootstrap-conf "$SPARK_MASTER_IP"
 
   # set -t so we can call sudo
   SPARK_SSH_OPTS="-o StrictHostKeyChecking=no -t" "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/../tachyon/bin/tachyon-start.sh" worker SudoMount \; sleep 1
@@ -58,12 +58,12 @@ fi
 
 # Launch the slaves
 if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
-  exec "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" 1 spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT
+  exec "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" 1 "spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT"
 else
   if [ "$SPARK_WORKER_WEBUI_PORT" = "" ]; then
     SPARK_WORKER_WEBUI_PORT=8081
   fi
   for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
-    "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" $(( $i + 1 ))  spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT --webui-port $(( $SPARK_WORKER_WEBUI_PORT + $i ))
+    "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" $(( $i + 1 ))  "spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT" --webui-port $(( $SPARK_WORKER_WEBUI_PORT + $i ))
   done
 fi
diff --git a/sbin/start-thriftserver.sh b/sbin/start-thriftserver.sh
new file mode 100755
index 0000000000000..50e8e06418b07
--- /dev/null
+++ b/sbin/start-thriftserver.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Shell script for starting the Spark SQL Thrift server
+
+# Enter posix mode for bash
+set -o posix
+
+# Figure out where Spark is installed
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
+
+CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
+
+function usage {
+  echo "Usage: ./sbin/start-thriftserver [options] [thrift server options]"
+  pattern="usage"
+  pattern+="\|Spark assembly has been built with Hive"
+  pattern+="\|NOTE: SPARK_PREPEND_CLASSES is set"
+  pattern+="\|Spark Command: "
+  pattern+="\|======="
+  pattern+="\|--help"
+
+  "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+  echo
+  echo "Thrift server options:"
+  "$FWDIR"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
+}
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  usage
+  exit 0
+fi
+
+export SUBMIT_USAGE_FUNCTION=usage
+
+exec "$FWDIR"/sbin/spark-daemon.sh spark-submit $CLASS 1 "$@"
diff --git a/sbin/stop-all.sh b/sbin/stop-all.sh
index 60b358d374565..298c6a9859795 100755
--- a/sbin/stop-all.sh
+++ b/sbin/stop-all.sh
@@ -21,8 +21,8 @@
 # Run this on the master nde
 
 
-sbin=`dirname "$0"`
-sbin=`cd "$sbin"; pwd`
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
 
 # Load the Spark configuration
 . "$sbin/spark-config.sh"
diff --git a/sbin/stop-history-server.sh b/sbin/stop-history-server.sh
index c0034ad641cbe..6e6056359510f 100755
--- a/sbin/stop-history-server.sh
+++ b/sbin/stop-history-server.sh
@@ -19,7 +19,7 @@
 
 # Stops the history server on the machine this script is executed on.
 
-sbin=`dirname "$0"`
-sbin=`cd "$sbin"; pwd`
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
 
 "$sbin"/spark-daemon.sh stop org.apache.spark.deploy.history.HistoryServer 1
diff --git a/sbin/stop-thriftserver.sh b/sbin/stop-thriftserver.sh
new file mode 100755
index 0000000000000..4031a00d4a689
--- /dev/null
+++ b/sbin/stop-thriftserver.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Stops the thrift server on the machine this script is executed on.
+
+sbin="`dirname "$0"`"
+sbin="`cd "$sbin"; pwd`"
+
+"$sbin"/spark-daemon.sh stop org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 1
diff --git a/sbt/sbt b/sbt/sbt
index 9de265bd07dcb..c172fa74bc771 100755
--- a/sbt/sbt
+++ b/sbt/sbt
@@ -3,32 +3,32 @@
 # When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
 # that we can run Hive to generate the golden answer.  This is not required for normal development
 # or testing.
-for i in $HIVE_HOME/lib/*
-do HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$i
+for i in "$HIVE_HOME"/lib/*
+do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i"
 done
 export HADOOP_CLASSPATH
 
 realpath () {
 (
-  TARGET_FILE=$1
+  TARGET_FILE="$1"
 
-  cd $(dirname $TARGET_FILE)
-  TARGET_FILE=$(basename $TARGET_FILE)
+  cd "$(dirname "$TARGET_FILE")"
+  TARGET_FILE="$(basename "$TARGET_FILE")"
 
   COUNT=0
   while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
   do
-      TARGET_FILE=$(readlink $TARGET_FILE)
-      cd $(dirname $TARGET_FILE)
-      TARGET_FILE=$(basename $TARGET_FILE)
+      TARGET_FILE="$(readlink "$TARGET_FILE")"
+      cd $(dirname "$TARGET_FILE")
+      TARGET_FILE="$(basename $TARGET_FILE)"
       COUNT=$(($COUNT + 1))
   done
 
-  echo $(pwd -P)/$TARGET_FILE
+  echo "$(pwd -P)/"$TARGET_FILE""
 )
 }
 
-. $(dirname $(realpath $0))/sbt-launch-lib.bash
+. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash
 
 
 declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
@@ -72,6 +72,7 @@ Usage: $script_name [options]
   -J-X               pass option -X directly to the java runtime
                      (-J is stripped)
   -S-X               add -X to sbt's scalacOptions (-J is stripped)
+  -PmavenProfiles     Enable a maven profile for the build.
 
 In the case of duplicated or conflicting options, the order above
 shows precedence: JAVA_OPTS lowest, command line options highest.
diff --git a/sbt/sbt-launch-lib.bash b/sbt/sbt-launch-lib.bash
index 64e40a88206be..055e206662654 100755
--- a/sbt/sbt-launch-lib.bash
+++ b/sbt/sbt-launch-lib.bash
@@ -7,7 +7,7 @@
 # TODO - Should we merge the main SBT script with this library?
 
 if test -z "$HOME"; then
-  declare -r script_dir="$(dirname $script_path)"
+  declare -r script_dir="$(dirname "$script_path")"
 else
   declare -r script_dir="$HOME/.sbt"
 fi
@@ -16,6 +16,7 @@ declare -a residual_args
 declare -a java_args
 declare -a scalac_args
 declare -a sbt_commands
+declare -a maven_profiles
 
 if test -x "$JAVA_HOME/bin/java"; then
     echo -e "Using $JAVA_HOME as default JAVA_HOME."
@@ -45,20 +46,20 @@ acquire_sbt_jar () {
 
   if [[ ! -f "$sbt_jar" ]]; then
     # Download sbt launch jar if it hasn't been downloaded yet
-    if [ ! -f ${JAR} ]; then
+    if [ ! -f "${JAR}" ]; then
     # Download
     printf "Attempting to fetch sbt\n"
-    JAR_DL=${JAR}.part
+    JAR_DL="${JAR}.part"
     if hash curl 2>/dev/null; then
-      (curl --progress-bar ${URL1} > ${JAR_DL} || curl --progress-bar ${URL2} > ${JAR_DL}) && mv ${JAR_DL} ${JAR}
+      (curl --silent ${URL1} > "${JAR_DL}" || curl --silent ${URL2} > "${JAR_DL}") && mv "${JAR_DL}" "${JAR}"
     elif hash wget 2>/dev/null; then
-      (wget --progress=bar ${URL1} -O ${JAR_DL} || wget --progress=bar ${URL2} -O ${JAR_DL}) && mv ${JAR_DL} ${JAR}
+      (wget --quiet ${URL1} -O "${JAR_DL}" || wget --quiet ${URL2} -O "${JAR_DL}") && mv "${JAR_DL}" "${JAR}"
     else
       printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n"
       exit -1
     fi
     fi
-    if [ ! -f ${JAR} ]; then
+    if [ ! -f "${JAR}" ]; then
     # We failed to download
     printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n"
     exit -1
@@ -87,6 +88,13 @@ addJava () {
   dlog "[addJava] arg = '$1'"
   java_args=( "${java_args[@]}" "$1" )
 }
+
+enableProfile () {
+  dlog "[enableProfile] arg = '$1'"
+  maven_profiles=( "${maven_profiles[@]}" "$1" )
+  export SBT_MAVEN_PROFILES="${maven_profiles[@]}"
+}
+
 addSbt () {
   dlog "[addSbt] arg = '$1'"
   sbt_commands=( "${sbt_commands[@]}" "$1" )
@@ -116,7 +124,8 @@ require_arg () {
   local opt="$2"
   local arg="$3"
   if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then
-    die "$opt requires <$type> argument"
+    echo "$opt requires <$type> argument" 1>&2
+    exit 1
   fi
 }
 
@@ -141,7 +150,8 @@ process_args () {
      -java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && export JAVA_HOME=$2 && shift 2 ;;
 
             -D*) addJava "$1" && shift ;;
-            -J*) addJava "${1:2}" && shift ;;
+            -J*) addJava "${1:2}" && shift ;; 
+            -P*) enableProfile "$1" && shift ;;
               *) addResidual "$1" && shift ;;
     esac
   done
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 76ba1ecca33ab..0ff521706c71a 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -140,5 +140,6 @@
  <!-- <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check> -->
  <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
  <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
- <check level="error" class="org.apache.spark.scalastyle.SparkSpaceAfterCommentStartChecker" enabled="true"></check>
+ <check level="error" class="org.apache.spark.scalastyle.NonASCIICharacterChecker" enabled="true"></check>
+ <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
 </scalastyle>
diff --git a/sql/README.md b/sql/README.md
index 14d5555f0c713..c84534da9a3d3 100644
--- a/sql/README.md
+++ b/sql/README.md
@@ -3,10 +3,11 @@ Spark SQL
 
 This module provides support for executing relational queries expressed in either SQL or a LINQ-like Scala DSL.
 
-Spark SQL is broken up into three subprojects:
+Spark SQL is broken up into four subprojects:
  - Catalyst (sql/catalyst) - An implementation-agnostic framework for manipulating trees of relational operators and expressions.
  - Execution (sql/core) - A query planner / execution engine for translating Catalyst’s logical query plans into Spark RDDs.  This component also includes a new public interface, SQLContext, that allows users to execute SQL or LINQ statements against existing RDDs and Parquet files.
  - Hive Support (sql/hive) - Includes an extension of SQLContext called HiveContext that allows users to write queries using a subset of HiveQL and access data from a Hive Metastore using Hive SerDes.  There are also wrappers that allows users to run queries that include Hive UDFs, UDAFs, and UDTFs.
+ - HiveServer and CLI support (sql/hive-thriftserver) - Includes support for the SQL CLI (bin/spark-sql) and a HiveServer2 (for JDBC/ODBC) compatible server.
 
 
 Other dependencies for developers
@@ -43,38 +44,37 @@ Type in expressions to have them evaluated.
 Type :help for more information.
 
 scala> val query = sql("SELECT * FROM (SELECT * FROM src) a")
-query: org.apache.spark.sql.ExecutedQuery =
-SELECT * FROM (SELECT * FROM src) a
-=== Query Plan ===
-Project [key#6:0.0,value#7:0.1]
- HiveTableScan [key#6,value#7], (MetastoreRelation default, src, None), None
+query: org.apache.spark.sql.SchemaRDD =
+== Query Plan ==
+== Physical Plan ==
+HiveTableScan [key#10,value#11], (MetastoreRelation default, src, None), None
 ```
 
 Query results are RDDs and can be operated as such.
 ```
 scala> query.collect()
-res8: Array[org.apache.spark.sql.execution.Row] = Array([238,val_238], [86,val_86], [311,val_311]...
+res2: Array[org.apache.spark.sql.Row] = Array([238,val_238], [86,val_86], [311,val_311], [27,val_27]...
 ```
 
 You can also build further queries on top of these RDDs using the query DSL.
 ```
-scala> query.where('key === 100).toRdd.collect()
-res11: Array[org.apache.spark.sql.execution.Row] = Array([100,val_100], [100,val_100])
+scala> query.where('key === 100).collect()
+res3: Array[org.apache.spark.sql.Row] = Array([100,val_100], [100,val_100])
 ```
 
-From the console you can even write rules that transform query plans.  For example, the above query has redundant project operators that aren't doing anything.  This redundancy can be eliminated using the `transform` function that is available on all [`TreeNode`](http://databricks.github.io/catalyst/latest/api/#catalyst.trees.TreeNode) objects.
+From the console you can even write rules that transform query plans.  For example, the above query has redundant project operators that aren't doing anything.  This redundancy can be eliminated using the `transform` function that is available on all [`TreeNode`](https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala) objects.
 ```scala
-scala> query.logicalPlan
-res1: catalyst.plans.logical.LogicalPlan = 
-Project {key#0,value#1}
- Project {key#0,value#1}
+scala> query.queryExecution.analyzed
+res4: org.apache.spark.sql.catalyst.plans.logical.LogicalPlan =
+Project [key#10,value#11]
+ Project [key#10,value#11]
   MetastoreRelation default, src, None
 
 
-scala> query.logicalPlan transform {
+scala> query.queryExecution.analyzed transform {
      |   case Project(projectList, child) if projectList == child.output => child
      | }
-res2: catalyst.plans.logical.LogicalPlan = 
-Project {key#0,value#1}
+res5: res17: org.apache.spark.sql.catalyst.plans.logical.LogicalPlan =
+Project [key#10,value#11]
  MetastoreRelation default, src, None
 ```
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 01d7b569080ea..686d189d810a8 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -31,22 +31,25 @@
   <packaging>jar</packaging>
   <name>Spark Project Catalyst</name>
   <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>catalyst</sbt.project.name>
+  </properties>
 
   <dependencies>
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-compiler</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-reflect</artifactId>
     </dependency>
+
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>com.typesafe</groupId>
-      <artifactId>scalalogging-slf4j_${scala.binary.version}</artifactId>
-      <version>1.0.1</version>
-    </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>
@@ -70,30 +73,46 @@
       <!--
            This plugin forces the generation of jar containing catalyst test classes,
            so that the tests classes of external modules can use them. The two execution profiles
-           are necessary - first one for 'mvn package', second one for 'mvn compile'. Ideally,
+           are necessary - first one for 'mvn package', second one for 'mvn test-compile'. Ideally,
            'mvn compile' should not compile test classes and therefore should not need this.
            However, an open Maven bug (http://jira.codehaus.org/browse/MNG-3559)
            causes the compilation to fail if catalyst test-jar is not generated. Hence, the
-           second execution profile for 'mvn compile'.
+           second execution profile for 'mvn test-compile'.
       -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-jar-plugin</artifactId>
         <executions>
-            <execution>
-                <goals>
-                    <goal>test-jar</goal>
-                </goals>
-            </execution>
-            <execution>
-                <id>test-jar-on-compile</id>
-                <phase>compile</phase>
-                <goals>
-                    <goal>test-jar</goal>
-                </goals>
-            </execution>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>test-jar-on-test-compile</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
         </executions>
       </plugin>
     </plugins>
   </build>
+  <profiles>
+    <!-- Quasiquotes are merged into scala reflect from scala 2.11 onwards. -->
+    <profile>
+      <id>scala-2.10</id>
+      <activation>
+        <property><name>!scala-2.11</name></property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.scalamacros</groupId>
+          <artifactId>quasiquotes_${scala.binary.version}</artifactId>
+          <version>${scala.macros.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
 </project>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 5a55be1e51558..71034c2c43c77 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -17,72 +17,169 @@
 
 package org.apache.spark.sql.catalyst
 
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
 
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.util.Utils
+import org.apache.spark.sql.catalyst.annotation.SQLUserDefinedType
+import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute, AttributeReference, Row}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+
+
+/**
+ * A default version of ScalaReflection that uses the runtime universe.
+ */
+object ScalaReflection extends ScalaReflection {
+  val universe: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
+}
 
 /**
- * Provides experimental support for generating catalyst schemas for scala objects.
+ * Support for generating catalyst schemas for scala objects.
  */
-object ScalaReflection {
-  import scala.reflect.runtime.universe._
+trait ScalaReflection {
+  /** The universe we work in (runtime or macro) */
+  val universe: scala.reflect.api.Universe
+
+  import universe._
+
+  // The Predef.Map is scala.collection.immutable.Map.
+  // Since the map values can be mutable, we explicitly import scala.collection.Map at here.
+  import scala.collection.Map
 
   case class Schema(dataType: DataType, nullable: Boolean)
 
+  /**
+   * Converts Scala objects to catalyst rows / types.
+   * Note: This is always called after schemaFor has been called.
+   *       This ordering is important for UDT registration.
+   */
+  def convertToCatalyst(a: Any, dataType: DataType): Any = (a, dataType) match {
+    // Check UDT first since UDTs can override other types
+    case (obj, udt: UserDefinedType[_]) => udt.serialize(obj)
+    case (o: Option[_], _) => o.map(convertToCatalyst(_, dataType)).orNull
+    case (s: Seq[_], arrayType: ArrayType) => s.map(convertToCatalyst(_, arrayType.elementType))
+    case (m: Map[_, _], mapType: MapType) => m.map { case (k, v) =>
+      convertToCatalyst(k, mapType.keyType) -> convertToCatalyst(v, mapType.valueType)
+    }
+    case (p: Product, structType: StructType) =>
+      new GenericRow(
+        p.productIterator.toSeq.zip(structType.fields).map { case (elem, field) =>
+          convertToCatalyst(elem, field.dataType)
+        }.toArray)
+    case (d: BigDecimal, _) => Decimal(d)
+    case (other, _) => other
+  }
+
+  /** Converts Catalyst types used internally in rows to standard Scala types */
+  def convertToScala(a: Any, dataType: DataType): Any = (a, dataType) match {
+    // Check UDT first since UDTs can override other types
+    case (d, udt: UserDefinedType[_]) => udt.deserialize(d)
+    case (s: Seq[_], arrayType: ArrayType) => s.map(convertToScala(_, arrayType.elementType))
+    case (m: Map[_, _], mapType: MapType) => m.map { case (k, v) =>
+      convertToScala(k, mapType.keyType) -> convertToScala(v, mapType.valueType)
+    }
+    case (r: Row, s: StructType) => convertRowToScala(r, s)
+    case (d: Decimal, _: DecimalType) => d.toBigDecimal
+    case (other, _) => other
+  }
+
+  def convertRowToScala(r: Row, schema: StructType): Row = {
+    new GenericRow(
+      r.zip(schema.fields.map(_.dataType))
+        .map(r_dt => convertToScala(r_dt._1, r_dt._2)).toArray)
+  }
+
   /** Returns a Sequence of attributes for the given case class type. */
   def attributesFor[T: TypeTag]: Seq[Attribute] = schemaFor[T] match {
     case Schema(s: StructType, _) =>
-      s.fields.map(f => AttributeReference(f.name, f.dataType, f.nullable)())
+      s.fields.map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
   }
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
   def schemaFor[T: TypeTag]: Schema = schemaFor(typeOf[T])
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
-  def schemaFor(tpe: `Type`): Schema = tpe match {
-    case t if t <:< typeOf[Option[_]] =>
-      val TypeRef(_, _, Seq(optType)) = t
-      Schema(schemaFor(optType).dataType, nullable = true)
-    case t if t <:< typeOf[Product] =>
-      val formalTypeArgs = t.typeSymbol.asClass.typeParams
-      val TypeRef(_, _, actualTypeArgs) = t
-      val params = t.member(nme.CONSTRUCTOR).asMethod.paramss
-      Schema(StructType(
-        params.head.map { p =>
-          val Schema(dataType, nullable) =
-            schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs))
-          StructField(p.name.toString, dataType, nullable)
-        }), nullable = true)
-    // Need to decide if we actually need a special type here.
-    case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
-    case t if t <:< typeOf[Array[_]] =>
-      sys.error(s"Only Array[Byte] supported now, use Seq instead of $t")
-    case t if t <:< typeOf[Seq[_]] =>
-      val TypeRef(_, _, Seq(elementType)) = t
-      Schema(ArrayType(schemaFor(elementType).dataType), nullable = true)
-    case t if t <:< typeOf[Map[_,_]] =>
-      val TypeRef(_, _, Seq(keyType, valueType)) = t
-      Schema(MapType(schemaFor(keyType).dataType, schemaFor(valueType).dataType), nullable = true)
-    case t if t <:< typeOf[String] => Schema(StringType, nullable = true)
-    case t if t <:< typeOf[Timestamp] => Schema(TimestampType, nullable = true)
-    case t if t <:< typeOf[BigDecimal] => Schema(DecimalType, nullable = true)
-    case t if t <:< typeOf[java.lang.Integer] => Schema(IntegerType, nullable = true)
-    case t if t <:< typeOf[java.lang.Long] => Schema(LongType, nullable = true)
-    case t if t <:< typeOf[java.lang.Double] => Schema(DoubleType, nullable = true)
-    case t if t <:< typeOf[java.lang.Float] => Schema(FloatType, nullable = true)
-    case t if t <:< typeOf[java.lang.Short] => Schema(ShortType, nullable = true)
-    case t if t <:< typeOf[java.lang.Byte] => Schema(ByteType, nullable = true)
-    case t if t <:< typeOf[java.lang.Boolean] => Schema(BooleanType, nullable = true)
-    case t if t <:< definitions.IntTpe => Schema(IntegerType, nullable = false)
-    case t if t <:< definitions.LongTpe => Schema(LongType, nullable = false)
-    case t if t <:< definitions.DoubleTpe => Schema(DoubleType, nullable = false)
-    case t if t <:< definitions.FloatTpe => Schema(FloatType, nullable = false)
-    case t if t <:< definitions.ShortTpe => Schema(ShortType, nullable = false)
-    case t if t <:< definitions.ByteTpe => Schema(ByteType, nullable = false)
-    case t if t <:< definitions.BooleanTpe => Schema(BooleanType, nullable = false)
+  def schemaFor(tpe: `Type`): Schema = {
+    val className: String = tpe.erasure.typeSymbol.asClass.fullName
+    tpe match {
+      case t if Utils.classIsLoadable(className) &&
+        Utils.classForName(className).isAnnotationPresent(classOf[SQLUserDefinedType]) =>
+        // Note: We check for classIsLoadable above since Utils.classForName uses Java reflection,
+        //       whereas className is from Scala reflection.  This can make it hard to find classes
+        //       in some cases, such as when a class is enclosed in an object (in which case
+        //       Java appends a '$' to the object name but Scala does not).
+        val udt = Utils.classForName(className)
+          .getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
+        Schema(udt, nullable = true)
+      case t if t <:< typeOf[Option[_]] =>
+        val TypeRef(_, _, Seq(optType)) = t
+        Schema(schemaFor(optType).dataType, nullable = true)
+      case t if t <:< typeOf[Product] =>
+        val formalTypeArgs = t.typeSymbol.asClass.typeParams
+        val TypeRef(_, _, actualTypeArgs) = t
+        val params = t.member(nme.CONSTRUCTOR).asMethod.paramss
+        Schema(StructType(
+          params.head.map { p =>
+            val Schema(dataType, nullable) =
+              schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs))
+            StructField(p.name.toString, dataType, nullable)
+          }), nullable = true)
+      // Need to decide if we actually need a special type here.
+      case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
+      case t if t <:< typeOf[Array[_]] =>
+        sys.error(s"Only Array[Byte] supported now, use Seq instead of $t")
+      case t if t <:< typeOf[Seq[_]] =>
+        val TypeRef(_, _, Seq(elementType)) = t
+        val Schema(dataType, nullable) = schemaFor(elementType)
+        Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
+      case t if t <:< typeOf[Map[_, _]] =>
+        val TypeRef(_, _, Seq(keyType, valueType)) = t
+        val Schema(valueDataType, valueNullable) = schemaFor(valueType)
+        Schema(MapType(schemaFor(keyType).dataType,
+          valueDataType, valueContainsNull = valueNullable), nullable = true)
+      case t if t <:< typeOf[String] => Schema(StringType, nullable = true)
+      case t if t <:< typeOf[Timestamp] => Schema(TimestampType, nullable = true)
+      case t if t <:< typeOf[Date] => Schema(DateType, nullable = true)
+      case t if t <:< typeOf[BigDecimal] => Schema(DecimalType.Unlimited, nullable = true)
+      case t if t <:< typeOf[Decimal] => Schema(DecimalType.Unlimited, nullable = true)
+      case t if t <:< typeOf[java.lang.Integer] => Schema(IntegerType, nullable = true)
+      case t if t <:< typeOf[java.lang.Long] => Schema(LongType, nullable = true)
+      case t if t <:< typeOf[java.lang.Double] => Schema(DoubleType, nullable = true)
+      case t if t <:< typeOf[java.lang.Float] => Schema(FloatType, nullable = true)
+      case t if t <:< typeOf[java.lang.Short] => Schema(ShortType, nullable = true)
+      case t if t <:< typeOf[java.lang.Byte] => Schema(ByteType, nullable = true)
+      case t if t <:< typeOf[java.lang.Boolean] => Schema(BooleanType, nullable = true)
+      case t if t <:< definitions.IntTpe => Schema(IntegerType, nullable = false)
+      case t if t <:< definitions.LongTpe => Schema(LongType, nullable = false)
+      case t if t <:< definitions.DoubleTpe => Schema(DoubleType, nullable = false)
+      case t if t <:< definitions.FloatTpe => Schema(FloatType, nullable = false)
+      case t if t <:< definitions.ShortTpe => Schema(ShortType, nullable = false)
+      case t if t <:< definitions.ByteTpe => Schema(ByteType, nullable = false)
+      case t if t <:< definitions.BooleanTpe => Schema(BooleanType, nullable = false)
+    }
+  }
+
+  def typeOfObject: PartialFunction[Any, DataType] = {
+    // The data type can be determined without ambiguity.
+    case obj: BooleanType.JvmType => BooleanType
+    case obj: BinaryType.JvmType => BinaryType
+    case obj: StringType.JvmType => StringType
+    case obj: ByteType.JvmType => ByteType
+    case obj: ShortType.JvmType => ShortType
+    case obj: IntegerType.JvmType => IntegerType
+    case obj: LongType.JvmType => LongType
+    case obj: FloatType.JvmType => FloatType
+    case obj: DoubleType.JvmType => DoubleType
+    case obj: DateType.JvmType => DateType
+    case obj: BigDecimal => DecimalType.Unlimited
+    case obj: Decimal => DecimalType.Unlimited
+    case obj: TimestampType.JvmType => TimestampType
+    case null => NullType
+    // For other cases, there is no obvious mapping from the type of the given object to a
+    // Catalyst data type. A user should provide his/her specific rules
+    // (in a user-defined PartialFunction) to infer the Catalyst data type for other types of
+    // objects and then compose the user-defined PartialFunction with this one.
   }
 
   implicit class CaseClassRelation[A <: Product : TypeTag](data: Seq[A]) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala
new file mode 100644
index 0000000000000..f5c19ee69c37a
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import scala.language.implicitConversions
+import scala.util.parsing.combinator.lexical.StdLexical
+import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+import scala.util.parsing.combinator.{PackratParsers, RegexParsers}
+import scala.util.parsing.input.CharArrayReader.EofCh
+
+import org.apache.spark.sql.catalyst.plans.logical._
+
+private[sql] abstract class AbstractSparkSQLParser
+  extends StandardTokenParsers with PackratParsers {
+
+  def apply(input: String): LogicalPlan = phrase(start)(new lexical.Scanner(input)) match {
+    case Success(plan, _) => plan
+    case failureOrError => sys.error(failureOrError.toString)
+  }
+
+  protected case class Keyword(str: String)
+
+  protected def start: Parser[LogicalPlan]
+
+  // Returns the whole input string
+  protected lazy val wholeInput: Parser[String] = new Parser[String] {
+    def apply(in: Input): ParseResult[String] =
+      Success(in.source.toString, in.drop(in.source.length()))
+  }
+
+  // Returns the rest of the input string that are not parsed yet
+  protected lazy val restInput: Parser[String] = new Parser[String] {
+    def apply(in: Input): ParseResult[String] =
+      Success(
+        in.source.subSequence(in.offset, in.source.length()).toString,
+        in.drop(in.source.length()))
+  }
+}
+
+class SqlLexical(val keywords: Seq[String]) extends StdLexical {
+  case class FloatLit(chars: String) extends Token {
+    override def toString = chars
+  }
+
+  reserved ++= keywords.flatMap(w => allCaseVersions(w))
+
+  delimiters += (
+    "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
+    ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~"
+  )
+
+  override lazy val token: Parser[Token] =
+    ( identChar ~ (identChar | digit).* ^^
+      { case first ~ rest => processIdent((first :: rest).mkString) }
+    | rep1(digit) ~ ('.' ~> digit.*).? ^^ {
+        case i ~ None    => NumericLit(i.mkString)
+        case i ~ Some(d) => FloatLit(i.mkString + "." + d.mkString)
+      }
+    | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
+      { case chars => StringLit(chars mkString "") }
+    | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
+      { case chars => StringLit(chars mkString "") }
+    | '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
+      { case chars => Identifier(chars mkString "") }
+    | EofCh ^^^ EOF
+    | '\'' ~> failure("unclosed string literal")
+    | '"' ~> failure("unclosed string literal")
+    | delim
+    | failure("illegal character")
+    )
+
+  override def identChar = letter | elem('_')
+
+  override def whitespace: Parser[Any] =
+    ( whitespaceChar
+    | '/' ~ '*' ~ comment
+    | '/' ~ '/' ~ chrExcept(EofCh, '\n').*
+    | '#' ~ chrExcept(EofCh, '\n').*
+    | '-' ~ '-' ~ chrExcept(EofCh, '\n').*
+    | '/' ~ '*' ~ failure("unclosed comment")
+    ).*
+
+  /** Generate all variations of upper and lower case of a given string */
+  def allCaseVersions(s: String, prefix: String = ""): Stream[String] = {
+    if (s == "") {
+      Stream(prefix)
+    } else {
+      allCaseVersions(s.tail, prefix + s.head.toLower) ++
+        allCaseVersions(s.tail, prefix + s.head.toUpper)
+    }
+  }
+}
+
+/**
+ * The top level Spark SQL parser. This parser recognizes syntaxes that are available for all SQL
+ * dialects supported by Spark SQL, and delegates all the other syntaxes to the `fallback` parser.
+ *
+ * @param fallback A function that parses an input string to a logical plan
+ */
+private[sql] class SparkSQLParser(fallback: String => LogicalPlan) extends AbstractSparkSQLParser {
+
+  // A parser for the key-value part of the "SET [key = [value ]]" syntax
+  private object SetCommandParser extends RegexParsers {
+    private val key: Parser[String] = "(?m)[^=]+".r
+
+    private val value: Parser[String] = "(?m).*$".r
+
+    private val pair: Parser[LogicalPlan] =
+      (key ~ ("=".r ~> value).?).? ^^ {
+        case None => SetCommand(None)
+        case Some(k ~ v) => SetCommand(Some(k.trim -> v.map(_.trim)))
+      }
+
+    def apply(input: String): LogicalPlan = parseAll(pair, input) match {
+      case Success(plan, _) => plan
+      case x => sys.error(x.toString)
+    }
+  }
+
+  protected val AS      = Keyword("AS")
+  protected val CACHE   = Keyword("CACHE")
+  protected val LAZY    = Keyword("LAZY")
+  protected val SET     = Keyword("SET")
+  protected val TABLE   = Keyword("TABLE")
+  protected val UNCACHE = Keyword("UNCACHE")
+
+  protected implicit def asParser(k: Keyword): Parser[String] =
+    lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
+
+  private val reservedWords: Seq[String] =
+    this
+      .getClass
+      .getMethods
+      .filter(_.getReturnType == classOf[Keyword])
+      .map(_.invoke(this).asInstanceOf[Keyword].str)
+
+  override val lexical = new SqlLexical(reservedWords)
+
+  override protected lazy val start: Parser[LogicalPlan] = cache | uncache | set | others
+
+  private lazy val cache: Parser[LogicalPlan] =
+    CACHE ~> LAZY.? ~ (TABLE ~> ident) ~ (AS ~> restInput).? ^^ {
+      case isLazy ~ tableName ~ plan =>
+        CacheTableCommand(tableName, plan.map(fallback), isLazy.isDefined)
+    }
+
+  private lazy val uncache: Parser[LogicalPlan] =
+    UNCACHE ~ TABLE ~> ident ^^ {
+      case tableName => UncacheTableCommand(tableName)
+    }
+
+  private lazy val set: Parser[LogicalPlan] =
+    SET ~> restInput ^^ {
+      case input => SetCommandParser(input)
+    }
+
+  private lazy val others: Parser[LogicalPlan] =
+    wholeInput ^^ {
+      case input => fallback(input)
+    }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
old mode 100644
new mode 100755
index e5653c5b14ac1..affef276c2a88
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -18,10 +18,6 @@
 package org.apache.spark.sql.catalyst
 
 import scala.language.implicitConversions
-import scala.util.parsing.combinator.lexical.StdLexical
-import scala.util.parsing.combinator.syntactical.StandardTokenParsers
-import scala.util.parsing.combinator.PackratParsers
-import scala.util.parsing.input.CharArrayReader.EofCh
 
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
@@ -39,46 +35,30 @@ import org.apache.spark.sql.catalyst.types._
  * This is currently included mostly for illustrative purposes.  Users wanting more complete support
  * for a SQL like language should checkout the HiveQL support in the sql/hive sub-project.
  */
-class SqlParser extends StandardTokenParsers with PackratParsers {
-
-  def apply(input: String): LogicalPlan = {
-    // Special-case out set commands since the value fields can be
-    // complex to handle without RegexParsers. Also this approach
-    // is clearer for the several possible cases of set commands.
-    if (input.trim.toLowerCase.startsWith("set")) {
-      input.trim.drop(3).split("=", 2).map(_.trim) match {
-        case Array("") => // "set"
-          SetCommand(None, None)
-        case Array(key) => // "set key"
-          SetCommand(Some(key), None)
-        case Array(key, value) => // "set key=value"
-          SetCommand(Some(key), Some(value))
-      }
-    } else {
-      phrase(query)(new lexical.Scanner(input)) match {
-        case Success(r, x) => r
-        case x => sys.error(x.toString)
-      }
-    }
-  }
-
-  protected case class Keyword(str: String)
-
+class SqlParser extends AbstractSparkSQLParser {
   protected implicit def asParser(k: Keyword): Parser[String] =
     lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
 
+  protected val ABS = Keyword("ABS")
   protected val ALL = Keyword("ALL")
   protected val AND = Keyword("AND")
+  protected val APPROXIMATE = Keyword("APPROXIMATE")
   protected val AS = Keyword("AS")
   protected val ASC = Keyword("ASC")
-  protected val APPROXIMATE = Keyword("APPROXIMATE")
   protected val AVG = Keyword("AVG")
+  protected val BETWEEN = Keyword("BETWEEN")
   protected val BY = Keyword("BY")
   protected val CACHE = Keyword("CACHE")
+  protected val CASE = Keyword("CASE")
   protected val CAST = Keyword("CAST")
   protected val COUNT = Keyword("COUNT")
+  protected val DECIMAL = Keyword("DECIMAL")
   protected val DESC = Keyword("DESC")
   protected val DISTINCT = Keyword("DISTINCT")
+  protected val DOUBLE = Keyword("DOUBLE")
+  protected val ELSE = Keyword("ELSE")
+  protected val END = Keyword("END")
+  protected val EXCEPT = Keyword("EXCEPT")
   protected val FALSE = Keyword("FALSE")
   protected val FIRST = Keyword("FIRST")
   protected val FROM = Keyword("FROM")
@@ -89,42 +69,47 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
   protected val IN = Keyword("IN")
   protected val INNER = Keyword("INNER")
   protected val INSERT = Keyword("INSERT")
+  protected val INTERSECT = Keyword("INTERSECT")
   protected val INTO = Keyword("INTO")
   protected val IS = Keyword("IS")
   protected val JOIN = Keyword("JOIN")
+  protected val LAST = Keyword("LAST")
   protected val LEFT = Keyword("LEFT")
+  protected val LIKE = Keyword("LIKE")
   protected val LIMIT = Keyword("LIMIT")
+  protected val LOWER = Keyword("LOWER")
   protected val MAX = Keyword("MAX")
   protected val MIN = Keyword("MIN")
   protected val NOT = Keyword("NOT")
   protected val NULL = Keyword("NULL")
   protected val ON = Keyword("ON")
   protected val OR = Keyword("OR")
-  protected val OVERWRITE = Keyword("OVERWRITE")
-  protected val LIKE = Keyword("LIKE")
-  protected val RLIKE = Keyword("RLIKE")
-  protected val UPPER = Keyword("UPPER")
-  protected val LOWER = Keyword("LOWER")
-  protected val REGEXP = Keyword("REGEXP")
   protected val ORDER = Keyword("ORDER")
   protected val OUTER = Keyword("OUTER")
+  protected val OVERWRITE = Keyword("OVERWRITE")
+  protected val REGEXP = Keyword("REGEXP")
   protected val RIGHT = Keyword("RIGHT")
+  protected val RLIKE = Keyword("RLIKE")
   protected val SELECT = Keyword("SELECT")
   protected val SEMI = Keyword("SEMI")
+  protected val SQRT = Keyword("SQRT")
   protected val STRING = Keyword("STRING")
+  protected val SUBSTR = Keyword("SUBSTR")
+  protected val SUBSTRING = Keyword("SUBSTRING")
   protected val SUM = Keyword("SUM")
   protected val TABLE = Keyword("TABLE")
+  protected val THEN = Keyword("THEN")
+  protected val TIMESTAMP = Keyword("TIMESTAMP")
   protected val TRUE = Keyword("TRUE")
-  protected val UNCACHE = Keyword("UNCACHE")
   protected val UNION = Keyword("UNION")
+  protected val UPPER = Keyword("UPPER")
+  protected val WHEN = Keyword("WHEN")
   protected val WHERE = Keyword("WHERE")
-  protected val INTERSECT = Keyword("INTERSECT")
-  protected val EXCEPT = Keyword("EXCEPT")
-
 
   // Use reflection to find the reserved words defined in this class.
   protected val reservedWords =
-    this.getClass
+    this
+      .getClass
       .getMethods
       .filter(_.getReturnType == classOf[Keyword])
       .map(_.invoke(this).asInstanceOf[Keyword].str)
@@ -138,269 +123,280 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
     }
   }
 
-  protected lazy val query: Parser[LogicalPlan] = (
-    select * (
-        UNION ~ ALL ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Union(q1, q2) } |
-        INTERSECT ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Intersect(q1, q2) } |
-        EXCEPT ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Except(q1, q2)} |
-        UNION ~ opt(DISTINCT) ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Distinct(Union(q1, q2)) }
+  protected lazy val start: Parser[LogicalPlan] =
+    ( select *
+      ( UNION ~ ALL        ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Union(q1, q2) }
+      | INTERSECT          ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Intersect(q1, q2) }
+      | EXCEPT             ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Except(q1, q2)}
+      | UNION ~ DISTINCT.? ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Distinct(Union(q1, q2)) }
       )
-    | insert | cache
-  )
+    | insert
+    )
 
   protected lazy val select: Parser[LogicalPlan] =
-    SELECT ~> opt(DISTINCT) ~ projections ~
-    opt(from) ~ opt(filter) ~
-    opt(grouping) ~
-    opt(having) ~
-    opt(orderBy) ~
-    opt(limit) <~ opt(";") ^^ {
-      case d ~ p ~ r ~ f ~ g ~ h ~ o ~ l  =>
-        val base = r.getOrElse(NoRelation)
-        val withFilter = f.map(f => Filter(f, base)).getOrElse(base)
-        val withProjection =
-          g.map {g =>
-            Aggregate(assignAliases(g), assignAliases(p), withFilter)
-          }.getOrElse(Project(assignAliases(p), withFilter))
-        val withDistinct = d.map(_ => Distinct(withProjection)).getOrElse(withProjection)
-        val withHaving = h.map(h => Filter(h, withDistinct)).getOrElse(withDistinct)
-        val withOrder = o.map(o => Sort(o, withHaving)).getOrElse(withHaving)
-        val withLimit = l.map { l => Limit(l, withOrder) }.getOrElse(withOrder)
-        withLimit
-  }
+    SELECT ~> DISTINCT.? ~
+      repsep(projection, ",") ~
+      (FROM   ~> relations).? ~
+      (WHERE  ~> expression).? ~
+      (GROUP  ~  BY ~> rep1sep(expression, ",")).? ~
+      (HAVING ~> expression).? ~
+      (ORDER  ~  BY ~> ordering).? ~
+      (LIMIT  ~> expression).? ^^ {
+        case d ~ p ~ r ~ f ~ g ~ h ~ o ~ l  =>
+          val base = r.getOrElse(NoRelation)
+          val withFilter = f.map(Filter(_, base)).getOrElse(base)
+          val withProjection = g
+            .map(Aggregate(_, assignAliases(p), withFilter))
+            .getOrElse(Project(assignAliases(p), withFilter))
+          val withDistinct = d.map(_ => Distinct(withProjection)).getOrElse(withProjection)
+          val withHaving = h.map(Filter(_, withDistinct)).getOrElse(withDistinct)
+          val withOrder = o.map(Sort(_, withHaving)).getOrElse(withHaving)
+          val withLimit = l.map(Limit(_, withOrder)).getOrElse(withOrder)
+          withLimit
+      }
 
   protected lazy val insert: Parser[LogicalPlan] =
-    INSERT ~> opt(OVERWRITE) ~ inTo ~ select <~ opt(";") ^^ {
-      case o ~ r ~ s =>
-        val overwrite: Boolean = o.getOrElse("") == "OVERWRITE"
-        InsertIntoTable(r, Map[String, Option[String]](), s, overwrite)
-    }
-
-  protected lazy val cache: Parser[LogicalPlan] =
-    (CACHE ^^^ true | UNCACHE ^^^ false) ~ TABLE ~ ident ^^ {
-      case doCache ~ _ ~ tableName => CacheCommand(tableName, doCache)
+    INSERT ~> OVERWRITE.? ~ (INTO ~> relation) ~ select ^^ {
+      case o ~ r ~ s => InsertIntoTable(r, Map.empty[String, Option[String]], s, o.isDefined)
     }
 
-  protected lazy val projections: Parser[Seq[Expression]] = repsep(projection, ",")
-
   protected lazy val projection: Parser[Expression] =
-    expression ~ (opt(AS) ~> opt(ident)) ^^ {
-      case e ~ None => e
-      case e ~ Some(a) => Alias(e, a)()
+    expression ~ (AS.? ~> ident.?) ^^ {
+      case e ~ a => a.fold(e)(Alias(e, _)())
     }
 
-  protected lazy val from: Parser[LogicalPlan] = FROM ~> relations
-
-  protected lazy val inTo: Parser[LogicalPlan] = INTO ~> relation
-
   // Based very loosely on the MySQL Grammar.
   // http://dev.mysql.com/doc/refman/5.0/en/join.html
   protected lazy val relations: Parser[LogicalPlan] =
-    relation ~ "," ~ relation ^^ { case r1 ~ _ ~ r2 => Join(r1, r2, Inner, None) } |
-    relation
+    ( relation ~ rep1("," ~> relation) ^^ {
+        case r1 ~ joins => joins.foldLeft(r1) { case(lhs, r) => Join(lhs, r, Inner, None) } }
+    | relation
+    )
 
   protected lazy val relation: Parser[LogicalPlan] =
-    joinedRelation |
-    relationFactor
+    joinedRelation | relationFactor
 
   protected lazy val relationFactor: Parser[LogicalPlan] =
-    ident ~ (opt(AS) ~> opt(ident)) ^^ {
-      case tableName ~ alias => UnresolvedRelation(None, tableName, alias)
-    } |
-    "(" ~> query ~ ")" ~ opt(AS) ~ ident ^^ { case s ~ _ ~ _ ~ a => Subquery(a, s) }
-
-   protected lazy val joinedRelation: Parser[LogicalPlan] =
-     relationFactor ~ opt(joinType) ~ JOIN ~ relationFactor ~ opt(joinConditions) ^^ {
-      case r1 ~ jt ~ _ ~ r2 ~ cond =>
-        Join(r1, r2, joinType = jt.getOrElse(Inner), cond)
-     }
-
-   protected lazy val joinConditions: Parser[Expression] =
-     ON ~> expression
+    ( ident ~ (opt(AS) ~> opt(ident)) ^^ {
+        case tableName ~ alias => UnresolvedRelation(None, tableName, alias)
+      }
+    | ("(" ~> start <~ ")") ~ (AS.? ~> ident) ^^ { case s ~ a => Subquery(a, s) }
+    )
 
-   protected lazy val joinType: Parser[JoinType] =
-     INNER ^^^ Inner |
-     LEFT ~ SEMI ^^^ LeftSemi |
-     LEFT ~ opt(OUTER) ^^^ LeftOuter |
-     RIGHT ~ opt(OUTER) ^^^ RightOuter |
-     FULL ~ opt(OUTER) ^^^ FullOuter
+  protected lazy val joinedRelation: Parser[LogicalPlan] =
+    relationFactor ~ rep1(joinType.? ~ (JOIN ~> relationFactor) ~ joinConditions.?) ^^ {
+      case r1 ~ joins =>
+        joins.foldLeft(r1) { case (lhs, jt ~ rhs ~ cond) =>
+          Join(lhs, rhs, joinType = jt.getOrElse(Inner), cond)
+        }
+    }
 
-  protected lazy val filter: Parser[Expression] = WHERE ~ expression ^^ { case _ ~ e => e }
+  protected lazy val joinConditions: Parser[Expression] =
+    ON ~> expression
 
-  protected lazy val orderBy: Parser[Seq[SortOrder]] =
-    ORDER ~> BY ~> ordering
+  protected lazy val joinType: Parser[JoinType] =
+    ( INNER           ^^^ Inner
+    | LEFT  ~ SEMI    ^^^ LeftSemi
+    | LEFT  ~ OUTER.? ^^^ LeftOuter
+    | RIGHT ~ OUTER.? ^^^ RightOuter
+    | FULL  ~ OUTER.? ^^^ FullOuter
+    )
 
   protected lazy val ordering: Parser[Seq[SortOrder]] =
-    rep1sep(singleOrder, ",") |
-    rep1sep(expression, ",") ~ opt(direction) ^^ {
-      case exps ~ None => exps.map(SortOrder(_, Ascending))
-      case exps ~ Some(d) => exps.map(SortOrder(_, d))
-    }
+    ( rep1sep(singleOrder, ",")
+    | rep1sep(expression, ",") ~ direction.? ^^ {
+        case exps ~ d => exps.map(SortOrder(_, d.getOrElse(Ascending)))
+      }
+    )
 
   protected lazy val singleOrder: Parser[SortOrder] =
-    expression ~ direction ^^ { case e ~ o => SortOrder(e,o) }
+    expression ~ direction ^^ { case e ~ o => SortOrder(e, o) }
 
   protected lazy val direction: Parser[SortDirection] =
-    ASC ^^^ Ascending |
-    DESC ^^^ Descending
-
-  protected lazy val grouping: Parser[Seq[Expression]] =
-    GROUP ~> BY ~> rep1sep(expression, ",")
-
-  protected lazy val having: Parser[Expression] =
-    HAVING ~> expression
-
-  protected lazy val limit: Parser[Expression] =
-    LIMIT ~> expression
+    ( ASC  ^^^ Ascending
+    | DESC ^^^ Descending
+    )
 
-  protected lazy val expression: Parser[Expression] = orExpression
+  protected lazy val expression: Parser[Expression] =
+    orExpression
 
   protected lazy val orExpression: Parser[Expression] =
-    andExpression * (OR ^^^ { (e1: Expression, e2: Expression) => Or(e1,e2) })
+    andExpression * (OR ^^^ { (e1: Expression, e2: Expression) => Or(e1, e2) })
 
   protected lazy val andExpression: Parser[Expression] =
-    comparisonExpression * (AND ^^^ { (e1: Expression, e2: Expression) => And(e1,e2) })
+    comparisonExpression * (AND ^^^ { (e1: Expression, e2: Expression) => And(e1, e2) })
 
   protected lazy val comparisonExpression: Parser[Expression] =
-    termExpression ~ "=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => EqualTo(e1, e2) } |
-    termExpression ~ "<" ~ termExpression ^^ { case e1 ~ _ ~ e2 => LessThan(e1, e2) } |
-    termExpression ~ "<=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => LessThanOrEqual(e1, e2) } |
-    termExpression ~ ">" ~ termExpression ^^ { case e1 ~ _ ~ e2 => GreaterThan(e1, e2) } |
-    termExpression ~ ">=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => GreaterThanOrEqual(e1, e2) } |
-    termExpression ~ "!=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(EqualTo(e1, e2)) } |
-    termExpression ~ "<>" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(EqualTo(e1, e2)) } |
-    termExpression ~ RLIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => RLike(e1, e2) } |
-    termExpression ~ REGEXP ~ termExpression ^^ { case e1 ~ _ ~ e2 => RLike(e1, e2) } |
-    termExpression ~ LIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => Like(e1, e2) } |
-    termExpression ~ IN ~ "(" ~ rep1sep(termExpression, ",") <~ ")" ^^ {
-      case e1 ~ _ ~ _ ~ e2 => In(e1, e2)
-    } |
-    termExpression ~ NOT ~ IN ~ "(" ~ rep1sep(termExpression, ",") <~ ")" ^^ {
-      case e1 ~ _ ~ _ ~ _ ~ e2 => Not(In(e1, e2))
-    } |
-    termExpression <~ IS ~ NULL ^^ { case e => IsNull(e) } |
-    termExpression <~ IS ~ NOT ~ NULL ^^ { case e => IsNotNull(e) } |
-    NOT ~> termExpression ^^ {e => Not(e)} |
-    termExpression
+    ( termExpression ~ ("="  ~> termExpression) ^^ { case e1 ~ e2 => EqualTo(e1, e2) }
+    | termExpression ~ ("<"  ~> termExpression) ^^ { case e1 ~ e2 => LessThan(e1, e2) }
+    | termExpression ~ ("<=" ~> termExpression) ^^ { case e1 ~ e2 => LessThanOrEqual(e1, e2) }
+    | termExpression ~ (">"  ~> termExpression) ^^ { case e1 ~ e2 => GreaterThan(e1, e2) }
+    | termExpression ~ (">=" ~> termExpression) ^^ { case e1 ~ e2 => GreaterThanOrEqual(e1, e2) }
+    | termExpression ~ ("!=" ~> termExpression) ^^ { case e1 ~ e2 => Not(EqualTo(e1, e2)) }
+    | termExpression ~ ("<>" ~> termExpression) ^^ { case e1 ~ e2 => Not(EqualTo(e1, e2)) }
+    | termExpression ~ NOT.? ~ (BETWEEN ~> termExpression) ~ (AND ~> termExpression) ^^ {
+        case e ~ not ~ el ~ eu =>
+          val betweenExpr: Expression = And(GreaterThanOrEqual(e, el), LessThanOrEqual(e, eu))
+          not.fold(betweenExpr)(f=> Not(betweenExpr))
+      }
+    | termExpression ~ (RLIKE  ~> termExpression) ^^ { case e1 ~ e2 => RLike(e1, e2) }
+    | termExpression ~ (REGEXP ~> termExpression) ^^ { case e1 ~ e2 => RLike(e1, e2) }
+    | termExpression ~ (LIKE   ~> termExpression) ^^ { case e1 ~ e2 => Like(e1, e2) }
+    | termExpression ~ (NOT ~ LIKE ~> termExpression) ^^ { case e1 ~ e2 => Not(Like(e1, e2)) }
+    | termExpression ~ (IN ~ "(" ~> rep1sep(termExpression, ",")) <~ ")" ^^ {
+        case e1 ~ e2 => In(e1, e2)
+      }
+    | termExpression ~ (NOT ~ IN ~ "(" ~> rep1sep(termExpression, ",")) <~ ")" ^^ {
+        case e1 ~ e2 => Not(In(e1, e2))
+      }
+    | termExpression <~ IS ~ NULL ^^ { case e => IsNull(e) }
+    | termExpression <~ IS ~ NOT ~ NULL ^^ { case e => IsNotNull(e) }
+    | NOT ~> termExpression ^^ {e => Not(e)}
+    | termExpression
+    )
 
   protected lazy val termExpression: Parser[Expression] =
-    productExpression * (
-      "+" ^^^ { (e1: Expression, e2: Expression) => Add(e1,e2) } |
-      "-" ^^^ { (e1: Expression, e2: Expression) => Subtract(e1,e2) } )
+    productExpression *
+      ( "+" ^^^ { (e1: Expression, e2: Expression) => Add(e1, e2) }
+      | "-" ^^^ { (e1: Expression, e2: Expression) => Subtract(e1, e2) }
+      )
 
   protected lazy val productExpression: Parser[Expression] =
-    baseExpression * (
-      "*" ^^^ { (e1: Expression, e2: Expression) => Multiply(e1,e2) } |
-      "/" ^^^ { (e1: Expression, e2: Expression) => Divide(e1,e2) } |
-      "%" ^^^ { (e1: Expression, e2: Expression) => Remainder(e1,e2) }
-    )
+    baseExpression *
+      ( "*" ^^^ { (e1: Expression, e2: Expression) => Multiply(e1, e2) }
+      | "/" ^^^ { (e1: Expression, e2: Expression) => Divide(e1, e2) }
+      | "%" ^^^ { (e1: Expression, e2: Expression) => Remainder(e1, e2) }
+      | "&" ^^^ { (e1: Expression, e2: Expression) => BitwiseAnd(e1, e2) }
+      | "|" ^^^ { (e1: Expression, e2: Expression) => BitwiseOr(e1, e2) }
+      | "^" ^^^ { (e1: Expression, e2: Expression) => BitwiseXor(e1, e2) }
+      )
 
   protected lazy val function: Parser[Expression] =
-    SUM ~> "(" ~> expression <~ ")" ^^ { case exp => Sum(exp) } |
-    SUM ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => SumDistinct(exp) } |
-    COUNT ~> "(" ~ "*" <~ ")" ^^ { case _ => Count(Literal(1)) } |
-    COUNT ~> "(" ~ expression <~ ")" ^^ { case dist ~ exp => Count(exp) } |
-    COUNT ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => CountDistinct(exp :: Nil) } |
-    APPROXIMATE ~> COUNT ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ {
-      case exp => ApproxCountDistinct(exp)
-    } |
-    APPROXIMATE ~> "(" ~> floatLit ~ ")" ~ COUNT ~ "(" ~ DISTINCT ~ expression <~ ")" ^^ {
-      case s ~ _ ~ _ ~ _ ~ _ ~ e => ApproxCountDistinct(e, s.toDouble)
-    } |
-    FIRST ~> "(" ~> expression <~ ")" ^^ { case exp => First(exp) } |
-    AVG ~> "(" ~> expression <~ ")" ^^ { case exp => Average(exp) } |
-    MIN ~> "(" ~> expression <~ ")" ^^ { case exp => Min(exp) } |
-    MAX ~> "(" ~> expression <~ ")" ^^ { case exp => Max(exp) } |
-    UPPER ~> "(" ~> expression <~ ")" ^^ { case exp => Upper(exp) } |
-    LOWER ~> "(" ~> expression <~ ")" ^^ { case exp => Lower(exp) } |
-    IF ~> "(" ~> expression ~ "," ~ expression ~ "," ~ expression <~ ")" ^^ {
-      case c ~ "," ~ t ~ "," ~ f => If(c,t,f)
-    } |
-    ident ~ "(" ~ repsep(expression, ",") <~ ")" ^^ {
-      case udfName ~ _ ~ exprs => UnresolvedFunction(udfName, exprs)
-    }
+    ( SUM   ~> "(" ~> expression             <~ ")" ^^ { case exp => Sum(exp) }
+    | SUM   ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => SumDistinct(exp) }
+    | COUNT ~  "(" ~> "*"                    <~ ")" ^^ { case _ => Count(Literal(1)) }
+    | COUNT ~  "(" ~> expression             <~ ")" ^^ { case exp => Count(exp) }
+    | COUNT ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => CountDistinct(exp :: Nil) }
+    | APPROXIMATE ~ COUNT ~ "(" ~ DISTINCT ~> expression <~ ")" ^^
+      { case exp => ApproxCountDistinct(exp) }
+    | APPROXIMATE ~> "(" ~> floatLit ~ ")" ~ COUNT ~ "(" ~ DISTINCT ~ expression <~ ")" ^^
+      { case s ~ _ ~ _ ~ _ ~ _ ~ e => ApproxCountDistinct(e, s.toDouble) }
+    | FIRST ~ "(" ~> expression <~ ")" ^^ { case exp => First(exp) }
+    | LAST  ~ "(" ~> expression <~ ")" ^^ { case exp => Last(exp) }
+    | AVG   ~ "(" ~> expression <~ ")" ^^ { case exp => Average(exp) }
+    | MIN   ~ "(" ~> expression <~ ")" ^^ { case exp => Min(exp) }
+    | MAX   ~ "(" ~> expression <~ ")" ^^ { case exp => Max(exp) }
+    | UPPER ~ "(" ~> expression <~ ")" ^^ { case exp => Upper(exp) }
+    | LOWER ~ "(" ~> expression <~ ")" ^^ { case exp => Lower(exp) }
+    | IF ~ "(" ~> expression ~ ("," ~> expression) ~ ("," ~> expression) <~ ")" ^^
+      { case c ~ t ~ f => If(c, t, f) }
+    | CASE ~> expression.? ~ (WHEN ~> expression ~ (THEN ~> expression)).* ~
+        (ELSE ~> expression).? <~ END ^^ {
+          case casePart ~ altPart ~ elsePart =>
+            val altExprs = altPart.flatMap { case whenExpr ~ thenExpr =>
+              Seq(casePart.fold(whenExpr)(EqualTo(_, whenExpr)), thenExpr)
+            }
+            CaseWhen(altExprs ++ elsePart.toList)
+        }
+    | (SUBSTR | SUBSTRING) ~ "(" ~> expression ~ ("," ~> expression) <~ ")" ^^
+      { case s ~ p => Substring(s, p, Literal(Integer.MAX_VALUE)) }
+    | (SUBSTR | SUBSTRING) ~ "(" ~> expression ~ ("," ~> expression) ~ ("," ~> expression) <~ ")" ^^
+      { case s ~ p ~ l => Substring(s, p, l) }
+    | SQRT  ~ "(" ~> expression <~ ")" ^^ { case exp => Sqrt(exp) }
+    | ABS   ~ "(" ~> expression <~ ")" ^^ { case exp => Abs(exp) }
+    | ident ~ ("(" ~> repsep(expression, ",")) <~ ")" ^^
+      { case udfName ~ exprs => UnresolvedFunction(udfName, exprs) }
+    )
 
   protected lazy val cast: Parser[Expression] =
-    CAST ~> "(" ~> expression ~ AS ~ dataType <~ ")" ^^ { case exp ~ _ ~ t => Cast(exp, t) }
+    CAST ~ "(" ~> expression ~ (AS ~> dataType) <~ ")" ^^ { case exp ~ t => Cast(exp, t) }
 
   protected lazy val literal: Parser[Literal] =
-    numericLit ^^ {
-      case i if i.toLong > Int.MaxValue => Literal(i.toLong)
-      case i => Literal(i.toInt)
-    } |
-    NULL ^^^ Literal(null, NullType) |
-    floatLit ^^ {case f => Literal(f.toDouble) } |
-    stringLit ^^ {case s => Literal(s, StringType) }
+    ( numericLiteral
+    | booleanLiteral
+    | stringLit ^^ {case s => Literal(s, StringType) }
+    | NULL ^^^ Literal(null, NullType)
+    )
 
-  protected lazy val floatLit: Parser[String] =
-    elem("decimal", _.isInstanceOf[lexical.FloatLit]) ^^ (_.chars)
-
-  protected lazy val baseExpression: PackratParser[Expression] =
-    expression ~ "[" ~ expression <~ "]" ^^ {
-      case base ~ _ ~ ordinal => GetItem(base, ordinal)
-    } |
-    TRUE ^^^ Literal(true, BooleanType) |
-    FALSE ^^^ Literal(false, BooleanType) |
-    cast |
-    "(" ~> expression <~ ")" |
-    function |
-    "-" ~> literal ^^ UnaryMinus |
-    ident ^^ UnresolvedAttribute |
-    "*" ^^^ Star(None) |
-    literal
+  protected lazy val booleanLiteral: Parser[Literal] =
+    ( TRUE ^^^ Literal(true, BooleanType)
+    | FALSE ^^^ Literal(false, BooleanType)
+    )
 
-  protected lazy val dataType: Parser[DataType] =
-    STRING ^^^ StringType
-}
+  protected lazy val numericLiteral: Parser[Literal] =
+    signedNumericLiteral | unsignedNumericLiteral
+
+  protected lazy val sign: Parser[String] =
+    "+" | "-"
+
+  protected lazy val signedNumericLiteral: Parser[Literal] =
+    ( sign ~ numericLit  ^^ { case s ~ l => Literal(toNarrowestIntegerType(s + l)) }
+    | sign ~ floatLit ^^ { case s ~ f => Literal((s + f).toDouble) }
+    )
+
+  protected lazy val unsignedNumericLiteral: Parser[Literal] =
+    ( numericLit ^^ { n => Literal(toNarrowestIntegerType(n)) }
+    | floatLit ^^ { f => Literal(f.toDouble) }
+    )
+
+  private val longMax = BigDecimal(s"${Long.MaxValue}")
+  private val longMin = BigDecimal(s"${Long.MinValue}")
+  private val intMax = BigDecimal(s"${Int.MaxValue}")
+  private val intMin = BigDecimal(s"${Int.MinValue}")
+
+  private def toNarrowestIntegerType(value: String) = {
+    val bigIntValue = BigDecimal(value)
 
-class SqlLexical(val keywords: Seq[String]) extends StdLexical {
-  case class FloatLit(chars: String) extends Token {
-    override def toString = chars
+    bigIntValue match {
+      case v if v < longMin || v > longMax => v
+      case v if v < intMin || v > intMax => v.toLong
+      case v => v.toInt
+    }
   }
 
-  reserved ++= keywords.flatMap(w => allCaseVersions(w))
+  protected lazy val floatLit: Parser[String] =
+    ( "." ~> unsignedNumericLiteral ^^ { u => "0." + u }
+    | elem("decimal", _.isInstanceOf[lexical.FloatLit]) ^^ (_.chars)
+    )
 
-  delimiters += (
-      "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
-      ",", ";", "%", "{", "}", ":", "[", "]"
-  )
+  protected lazy val baseExpression: Parser[Expression] =
+    ( "*" ^^^ Star(None)
+    | primary
+    )
 
-  override lazy val token: Parser[Token] = (
-    identChar ~ rep( identChar | digit ) ^^
-      { case first ~ rest => processIdent(first :: rest mkString "") }
-      | rep1(digit) ~ opt('.' ~> rep(digit)) ^^ {
-      case i ~ None    => NumericLit(i mkString "")
-      case i ~ Some(d) => FloatLit(i.mkString("") + "." + d.mkString(""))
+  protected lazy val signedPrimary: Parser[Expression] =
+    sign ~ primary ^^ { case s ~ e => if (s == "-") UnaryMinus(e) else e}
+
+  protected lazy val primary: PackratParser[Expression] =
+    ( literal
+    | expression ~ ("[" ~> expression <~ "]") ^^
+      { case base ~ ordinal => GetItem(base, ordinal) }
+    | (expression <~ ".") ~ ident ^^
+      { case base ~ fieldName => GetField(base, fieldName) }
+    | cast
+    | "(" ~> expression <~ ")"
+    | function
+    | dotExpressionHeader
+    | ident ^^ UnresolvedAttribute
+    | signedPrimary
+    | "~" ~> expression ^^ BitwiseNot
+    )
+
+  protected lazy val dotExpressionHeader: Parser[Expression] =
+    (ident <~ ".") ~ ident ~ rep("." ~> ident) ^^ {
+      case i1 ~ i2 ~ rest => UnresolvedAttribute(i1 + "." + i2 + rest.mkString(".", ".", ""))
     }
-      | '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^
-      { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") }
-      | '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^
-      { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") }
-      | EofCh ^^^ EOF
-      | '\'' ~> failure("unclosed string literal")
-      | '\"' ~> failure("unclosed string literal")
-      | delim
-      | failure("illegal character")
+
+  protected lazy val dataType: Parser[DataType] =
+    ( STRING ^^^ StringType
+    | TIMESTAMP ^^^ TimestampType
+    | DOUBLE ^^^ DoubleType
+    | fixedDecimalType
+    | DECIMAL ^^^ DecimalType.Unlimited
     )
 
-  override def identChar = letter | elem('_') | elem('.')
-
-  override def whitespace: Parser[Any] = rep(
-    whitespaceChar
-      | '/' ~ '*' ~ comment
-      | '/' ~ '/' ~ rep( chrExcept(EofCh, '\n') )
-      | '#' ~ rep( chrExcept(EofCh, '\n') )
-      | '-' ~ '-' ~ rep( chrExcept(EofCh, '\n') )
-      | '/' ~ '*' ~ failure("unclosed comment")
-  )
-
-  /** Generate all variations of upper and lower case of a given string */
-  def allCaseVersions(s: String, prefix: String = ""): Stream[String] = {
-    if (s == "") {
-      Stream(prefix)
-    } else {
-      allCaseVersions(s.tail, prefix + s.head.toLower) ++
-        allCaseVersions(s.tail, prefix + s.head.toUpper)
+  protected lazy val fixedDecimalType: Parser[DataType] =
+    (DECIMAL ~ "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
+      case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
     }
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index c7188469bfb86..d3b4cf8e34242 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 
-
 /**
  * A trivial [[Analyzer]] with an [[EmptyCatalog]] and [[EmptyFunctionRegistry]]. Used for testing
  * when all relations are already filled in and the analyser needs only to resolve attribute
@@ -38,31 +37,41 @@ object SimpleAnalyzer extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, true
 class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Boolean)
   extends RuleExecutor[LogicalPlan] with HiveTypeCoercion {
 
+  val resolver = if (caseSensitive) caseSensitiveResolution else caseInsensitiveResolution
+
   // TODO: pass this in as a parameter.
   val fixedPoint = FixedPoint(100)
 
-  val batches: Seq[Batch] = Seq(
+  /**
+   * Override to provide additional rules for the "Resolution" batch.
+   */
+  val extendedRules: Seq[Rule[LogicalPlan]] = Nil
+
+  lazy val batches: Seq[Batch] = Seq(
     Batch("MultiInstanceRelations", Once,
       NewRelationInstances),
-    Batch("CaseInsensitiveAttributeReferences", Once,
-      (if (caseSensitive) Nil else LowercaseAttributeReferences :: Nil) : _*),
     Batch("Resolution", fixedPoint,
       ResolveReferences ::
       ResolveRelations ::
+      ResolveSortReferences ::
       NewRelationInstances ::
       ImplicitGenerate ::
       StarExpansion ::
       ResolveFunctions ::
       GlobalAggregates ::
-      typeCoercionRules :_*),
+      UnresolvedHavingClauseAttributes ::
+      TrimGroupingAliases ::
+      typeCoercionRules ++
+      extendedRules : _*),
     Batch("Check Analysis", Once,
-      CheckResolution),
+      CheckResolution,
+      CheckAggregation),
     Batch("AnalysisOperators", fixedPoint,
       EliminateAnalysisOperators)
   )
 
   /**
-   * Makes sure all attributes have been resolved.
+   * Makes sure all attributes and logical plans have been resolved.
    */
   object CheckResolution extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = {
@@ -70,34 +79,68 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
         case p if p.expressions.exists(!_.resolved) =>
           throw new TreeNodeException(p,
             s"Unresolved attributes: ${p.expressions.filterNot(_.resolved).mkString(",")}")
+        case p if !p.resolved && p.childrenResolved =>
+          throw new TreeNodeException(p, "Unresolved plan found")
+      } match {
+        // As a backstop, use the root node to check that the entire plan tree is resolved.
+        case p if !p.resolved =>
+          throw new TreeNodeException(p, "Unresolved plan in tree")
+        case p => p
       }
     }
   }
 
   /**
-   * Replaces [[UnresolvedRelation]]s with concrete relations from the catalog.
+   * Removes no-op Alias expressions from the plan.
    */
-  object ResolveRelations extends Rule[LogicalPlan] {
+  object TrimGroupingAliases extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-      case UnresolvedRelation(databaseName, name, alias) =>
-        catalog.lookupRelation(databaseName, name, alias)
+      case Aggregate(groups, aggs, child) =>
+        Aggregate(groups.map(_.transform { case Alias(c, _) => c }), aggs, child)
     }
   }
 
   /**
-   * Makes attribute naming case insensitive by turning all UnresolvedAttributes to lowercase.
+   * Checks for non-aggregated attributes with aggregation
    */
-  object LowercaseAttributeReferences extends Rule[LogicalPlan] {
+  object CheckAggregation extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = {
+      plan.transform {
+        case aggregatePlan @ Aggregate(groupingExprs, aggregateExprs, child) =>
+          def isValidAggregateExpression(expr: Expression): Boolean = expr match {
+            case _: AggregateExpression => true
+            case e: Attribute => groupingExprs.contains(e)
+            case e if groupingExprs.contains(e) => true
+            case e if e.references.isEmpty => true
+            case e => e.children.forall(isValidAggregateExpression)
+          }
+
+          aggregateExprs.find { e =>
+            !isValidAggregateExpression(e.transform {
+              // Should trim aliases around `GetField`s. These aliases are introduced while
+              // resolving struct field accesses, because `GetField` is not a `NamedExpression`.
+              // (Should we just turn `GetField` into a `NamedExpression`?)
+              case Alias(g: GetField, _) => g
+            })
+          }.foreach { e =>
+            throw new TreeNodeException(plan, s"Expression not in GROUP BY: $e")
+          }
+
+          aggregatePlan
+      }
+    }
+  }
+
+  /**
+   * Replaces [[UnresolvedRelation]]s with concrete relations from the catalog.
+   */
+  object ResolveRelations extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+      case i @ InsertIntoTable(UnresolvedRelation(databaseName, name, alias), _, _, _) =>
+        i.copy(
+          table = EliminateAnalysisOperators(catalog.lookupRelation(databaseName, name, alias)))
       case UnresolvedRelation(databaseName, name, alias) =>
-        UnresolvedRelation(databaseName, name, alias.map(_.toLowerCase))
-      case Subquery(alias, child) => Subquery(alias.toLowerCase, child)
-      case q: LogicalPlan => q transformExpressions {
-        case s: Star => s.copy(table = s.table.map(_.toLowerCase))
-        case UnresolvedAttribute(name) => UnresolvedAttribute(name.toLowerCase)
-        case Alias(c, name) => Alias(c, name.toLowerCase)()
-        case GetField(c, name) => GetField(c, name.toLowerCase)
-      }
+        catalog.lookupRelation(databaseName, name, alias)
     }
   }
 
@@ -109,17 +152,63 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
   object ResolveReferences extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
       case q: LogicalPlan if q.childrenResolved =>
-        logger.trace(s"Attempting to resolve ${q.simpleString}")
+        logTrace(s"Attempting to resolve ${q.simpleString}")
         q transformExpressions {
           case u @ UnresolvedAttribute(name) =>
             // Leave unchanged if resolution fails.  Hopefully will be resolved next round.
-            val result = q.resolve(name).getOrElse(u)
-            logger.debug(s"Resolving $u to $result")
+            val result = q.resolveChildren(name, resolver).getOrElse(u)
+            logDebug(s"Resolving $u to $result")
             result
         }
     }
   }
 
+  /**
+   * In many dialects of SQL is it valid to sort by attributes that are not present in the SELECT
+   * clause.  This rule detects such queries and adds the required attributes to the original
+   * projection, so that they will be available during sorting. Another projection is added to
+   * remove these attributes after sorting.
+   */
+  object ResolveSortReferences extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
+      case s @ Sort(ordering, p @ Project(projectList, child)) if !s.resolved && p.resolved =>
+        val unresolved = ordering.flatMap(_.collect { case UnresolvedAttribute(name) => name })
+        val resolved = unresolved.flatMap(child.resolve(_, resolver))
+        val requiredAttributes = AttributeSet(resolved.collect { case a: Attribute => a })
+
+        val missingInProject = requiredAttributes -- p.output
+        if (missingInProject.nonEmpty) {
+          // Add missing attributes and then project them away after the sort.
+          Project(projectList,
+            Sort(ordering,
+              Project(projectList ++ missingInProject, child)))
+        } else {
+          logDebug(s"Failed to find $missingInProject in ${p.output.mkString(", ")}")
+          s // Nothing we can do here. Return original plan.
+        }
+      case s @ Sort(ordering, a @ Aggregate(grouping, aggs, child)) if !s.resolved && a.resolved =>
+        val unresolved = ordering.flatMap(_.collect { case UnresolvedAttribute(name) => name })
+        // A small hack to create an object that will allow us to resolve any references that
+        // refer to named expressions that are present in the grouping expressions.
+        val groupingRelation = LocalRelation(
+          grouping.collect { case ne: NamedExpression => ne.toAttribute }
+        )
+
+        logDebug(s"Grouping expressions: $groupingRelation")
+        val resolved = unresolved.flatMap(groupingRelation.resolve(_, resolver))
+        val missingInAggs = resolved.filterNot(a.outputSet.contains)
+        logDebug(s"Resolved: $resolved Missing in aggs: $missingInAggs")
+        if (missingInAggs.nonEmpty) {
+          // Add missing grouping exprs and then project them away after the sort.
+          Project(a.output,
+            Sort(ordering,
+              Aggregate(grouping, aggs ++ missingInAggs, child)))
+        } else {
+          s // Nothing we can do here. Return original plan.
+        }
+    }
+  }
+
   /**
    * Replaces [[UnresolvedFunction]]s with concrete [[catalyst.expressions.Expression Expressions]].
    */
@@ -151,6 +240,30 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
     }
   }
 
+  /**
+   * This rule finds expressions in HAVING clause filters that depend on
+   * unresolved attributes.  It pushes these expressions down to the underlying
+   * aggregates and then projects them away above the filter.
+   */
+  object UnresolvedHavingClauseAttributes extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
+      case filter @ Filter(havingCondition, aggregate @ Aggregate(_, originalAggExprs, _))
+          if aggregate.resolved && containsAggregate(havingCondition) => {
+        val evaluatedCondition = Alias(havingCondition,  "havingCondition")()
+        val aggExprsWithHaving = evaluatedCondition +: originalAggExprs
+
+        Project(aggregate.output,
+          Filter(evaluatedCondition.toAttribute,
+            aggregate.copy(aggregateExpressions = aggExprsWithHaving)))
+      }
+    }
+
+    protected def containsAggregate(condition: Expression): Boolean =
+      condition
+        .collect { case ae: AggregateExpression => ae }
+        .nonEmpty
+  }
+
   /**
    * When a SELECT clause has only a single expression and that expression is a
    * [[catalyst.expressions.Generator Generator]] we convert the
@@ -174,14 +287,14 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
       case p @ Project(projectList, child) if containsStar(projectList) =>
         Project(
           projectList.flatMap {
-            case s: Star => s.expand(child.output)
+            case s: Star => s.expand(child.output, resolver)
             case o => o :: Nil
           },
           child)
       case t: ScriptTransformation if containsStar(t.input) =>
         t.copy(
           input = t.input.flatMap {
-            case s: Star => s.expand(t.child.output)
+            case s: Star => s.expand(t.child.output, resolver)
             case o => o :: Nil
           }
         )
@@ -189,7 +302,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
       case a: Aggregate if containsStar(a.aggregateExpressions) =>
         a.copy(
           aggregateExpressions = a.aggregateExpressions.flatMap {
-            case s: Star => s.expand(a.child.output)
+            case s: Star => s.expand(a.child.output, resolver)
             case o => o :: Nil
           }
         )
@@ -206,13 +319,10 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
 /**
  * Removes [[catalyst.plans.logical.Subquery Subquery]] operators from the plan.  Subqueries are
  * only required to provide scoping information for attributes and can be removed once analysis is
- * complete.  Similarly, this node also removes
- * [[catalyst.plans.logical.LowerCaseSchema LowerCaseSchema]] operators.
+ * complete.
  */
 object EliminateAnalysisOperators extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case Subquery(_, child) => child
-    case LowerCaseSchema(child) => child
   }
 }
-
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
index 0d05d9808b407..0415d74bd8141 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
@@ -28,6 +28,8 @@ trait Catalog {
 
   def caseSensitive: Boolean
 
+  def tableExists(db: Option[String], tableName: String): Boolean
+
   def lookupRelation(
     databaseName: Option[String],
     tableName: String,
@@ -82,12 +84,20 @@ class SimpleCatalog(val caseSensitive: Boolean) extends Catalog {
     tables.clear()
   }
 
+  override def tableExists(db: Option[String], tableName: String): Boolean = {
+    val (dbName, tblName) = processDatabaseAndTableName(db, tableName)
+    tables.get(tblName) match {
+      case Some(_) => true
+      case None => false
+    }
+  }
+
   override def lookupRelation(
       databaseName: Option[String],
       tableName: String,
       alias: Option[String] = None): LogicalPlan = {
     val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
-    val table = tables.get(tblName).getOrElse(sys.error(s"Table Not Found: $tableName"))
+    val table = tables.getOrElse(tblName, sys.error(s"Table Not Found: $tableName"))
     val tableWithQualifiers = Subquery(tblName, table)
 
     // If an alias was specified by the lookup, wrap the plan in a subquery so that attributes are
@@ -107,17 +117,26 @@ trait OverrideCatalog extends Catalog {
   // TODO: This doesn't work when the database changes...
   val overrides = new mutable.HashMap[(Option[String],String), LogicalPlan]()
 
+  abstract override def tableExists(db: Option[String], tableName: String): Boolean = {
+    val (dbName, tblName) = processDatabaseAndTableName(db, tableName)
+    overrides.get((dbName, tblName)) match {
+      case Some(_) => true
+      case None => super.tableExists(db, tableName)
+    }
+  }
+
   abstract override def lookupRelation(
     databaseName: Option[String],
     tableName: String,
     alias: Option[String] = None): LogicalPlan = {
     val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
     val overriddenTable = overrides.get((dbName, tblName))
+    val tableWithQualifers = overriddenTable.map(r => Subquery(tblName, r))
 
     // If an alias was specified by the lookup, wrap the plan in a subquery so that attributes are
     // properly qualified with this alias.
     val withAlias =
-      overriddenTable.map(r => alias.map(a => Subquery(a, r)).getOrElse(r))
+      tableWithQualifers.map(r => alias.map(a => Subquery(a, r)).getOrElse(r))
 
     withAlias.getOrElse(super.lookupRelation(dbName, tblName, alias))
   }
@@ -148,6 +167,10 @@ object EmptyCatalog extends Catalog {
 
   val caseSensitive: Boolean = true
 
+  def tableExists(db: Option[String], tableName: String): Boolean = {
+    throw new UnsupportedOperationException
+  }
+
   def lookupRelation(
     databaseName: Option[String],
     tableName: String,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index c0255701b7ba5..760c49fbca4a5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -18,17 +18,49 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.catalyst.expressions.Expression
+import scala.collection.mutable
 
 /** A catalog for looking up user defined functions, used by an [[Analyzer]]. */
 trait FunctionRegistry {
+  type FunctionBuilder = Seq[Expression] => Expression
+
+  def registerFunction(name: String, builder: FunctionBuilder): Unit
+
   def lookupFunction(name: String, children: Seq[Expression]): Expression
 }
 
+trait OverrideFunctionRegistry extends FunctionRegistry {
+
+  val functionBuilders = new mutable.HashMap[String, FunctionBuilder]()
+
+  def registerFunction(name: String, builder: FunctionBuilder) = {
+    functionBuilders.put(name, builder)
+  }
+
+  abstract override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
+    functionBuilders.get(name).map(_(children)).getOrElse(super.lookupFunction(name,children))
+  }
+}
+
+class SimpleFunctionRegistry extends FunctionRegistry {
+  val functionBuilders = new mutable.HashMap[String, FunctionBuilder]()
+
+  def registerFunction(name: String, builder: FunctionBuilder) = {
+    functionBuilders.put(name, builder)
+  }
+
+  override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
+    functionBuilders(name)(children)
+  }
+}
+
 /**
  * A trivial catalog that returns an error when a function is requested.  Used for testing when all
  * functions are already filled in and the analyser needs only to resolve attribute references.
  */
 object EmptyFunctionRegistry extends FunctionRegistry {
+  def registerFunction(name: String, builder: FunctionBuilder) = ???
+
   def lookupFunction(name: String, children: Seq[Expression]): Expression = {
     throw new UnsupportedOperationException
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 76ddeba9cb312..e38114ab3cf25 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -25,11 +25,35 @@ import org.apache.spark.sql.catalyst.types._
 object HiveTypeCoercion {
   // See https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types.
   // The conversion for integral and floating point types have a linear widening hierarchy:
-  val numericPrecedence =
-    Seq(NullType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType)
-  // Boolean is only wider than Void
-  val booleanPrecedence = Seq(NullType, BooleanType)
-  val allPromotions: Seq[Seq[DataType]] = numericPrecedence :: booleanPrecedence :: Nil
+  private val numericPrecedence =
+    Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType.Unlimited)
+
+  /**
+   * Find the tightest common type of two types that might be used in a binary expression.
+   * This handles all numeric types except fixed-precision decimals interacting with each other or
+   * with primitive types, because in that case the precision and scale of the result depends on
+   * the operation. Those rules are implemented in [[HiveTypeCoercion.DecimalPrecision]].
+   */
+  def findTightestCommonType(t1: DataType, t2: DataType): Option[DataType] = {
+    val valueTypes = Seq(t1, t2).filter(t => t != NullType)
+    if (valueTypes.distinct.size > 1) {
+      // Promote numeric types to the highest of the two and all numeric types to unlimited decimal
+      if (numericPrecedence.contains(t1) && numericPrecedence.contains(t2)) {
+        Some(numericPrecedence.filter(t => t == t1 || t == t2).last)
+      } else if (t1.isInstanceOf[DecimalType] && t2.isInstanceOf[DecimalType]) {
+        // Fixed-precision decimals can up-cast into unlimited
+        if (t1 == DecimalType.Unlimited || t2 == DecimalType.Unlimited) {
+          Some(DecimalType.Unlimited)
+        } else {
+          None
+        }
+      } else {
+        None
+      }
+    } else {
+      Some(if (valueTypes.size == 0) NullType else valueTypes.head)
+    }
+  }
 }
 
 /**
@@ -40,16 +64,20 @@ object HiveTypeCoercion {
  */
 trait HiveTypeCoercion {
 
+  import HiveTypeCoercion._
+
   val typeCoercionRules =
     PropagateTypes ::
     ConvertNaNs ::
     WidenTypes ::
     PromoteStrings ::
+    DecimalPrecision ::
     BooleanComparisons ::
     BooleanCasts ::
     StringToIntegralCasts ::
     FunctionArgumentConversion ::
-    CastNulls ::
+    CaseWhenCoercion ::
+    Division ::
     Nil
 
   /**
@@ -74,7 +102,7 @@ trait HiveTypeCoercion {
             // Leave the same if the dataTypes match.
             case Some(newType) if a.dataType == newType.dataType => a
             case Some(newType) =>
-              logger.debug(s"Promoting $a to $newType in ${q.simpleString}}")
+              logDebug(s"Promoting $a to $newType in ${q.simpleString}}")
               newType
           }
       }
@@ -133,17 +161,10 @@ trait HiveTypeCoercion {
    * - LongType to DoubleType
    */
   object WidenTypes extends Rule[LogicalPlan] {
-
-    def findTightestCommonType(t1: DataType, t2: DataType): Option[DataType] = {
-      // Try and find a promotion rule that contains both types in question.
-      val applicableConversion =
-        HiveTypeCoercion.allPromotions.find(p => p.contains(t1) && p.contains(t2))
-
-      // If found return the widest common type, otherwise None
-      applicableConversion.map(_.filter(t => t == t1 || t == t2).last)
-    }
+    import HiveTypeCoercion._
 
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+      // TODO: unions with fixed-precision decimals
       case u @ Union(left, right) if u.childrenResolved && !u.resolved =>
         val castedInput = left.output.zip(right.output).map {
           // When a string is found on one side, make the other side a string too.
@@ -153,7 +174,7 @@ trait HiveTypeCoercion {
             (Alias(Cast(l, StringType), l.name)(), r)
 
           case (l, r) if l.dataType != r.dataType =>
-            logger.debug(s"Resolving mismatched union input ${l.dataType}, ${r.dataType}")
+            logDebug(s"Resolving mismatched union input ${l.dataType}, ${r.dataType}")
             findTightestCommonType(l.dataType, r.dataType).map { widestType =>
               val newLeft =
                 if (l.dataType == widestType) l else Alias(Cast(l, widestType), l.name)()
@@ -169,7 +190,7 @@ trait HiveTypeCoercion {
 
         val newLeft =
           if (castedLeft.map(_.dataType) != left.output.map(_.dataType)) {
-            logger.debug(s"Widening numeric types in union $castedLeft ${left.output}")
+            logDebug(s"Widening numeric types in union $castedLeft ${left.output}")
             Project(castedLeft, left)
           } else {
             left
@@ -177,7 +198,7 @@ trait HiveTypeCoercion {
 
         val newRight =
           if (castedRight.map(_.dataType) != right.output.map(_.dataType)) {
-            logger.debug(s"Widening numeric types in union $castedRight ${right.output}")
+            logDebug(s"Widening numeric types in union $castedRight ${right.output}")
             Project(castedRight, right)
           } else {
             right
@@ -215,15 +236,150 @@ trait HiveTypeCoercion {
       case a: BinaryArithmetic if a.right.dataType == StringType =>
         a.makeCopy(Array(a.left, Cast(a.right, DoubleType)))
 
+      // we should cast all timestamp/date/string compare into string compare
+      case p: BinaryPredicate if p.left.dataType == StringType
+        && p.right.dataType == DateType =>
+        p.makeCopy(Array(p.left, Cast(p.right, StringType)))
+      case p: BinaryPredicate if p.left.dataType == DateType
+        && p.right.dataType == StringType =>
+        p.makeCopy(Array(Cast(p.left, StringType), p.right))
+      case p: BinaryPredicate if p.left.dataType == StringType
+        && p.right.dataType == TimestampType =>
+        p.makeCopy(Array(p.left, Cast(p.right, StringType)))
+      case p: BinaryPredicate if p.left.dataType == TimestampType
+        && p.right.dataType == StringType =>
+        p.makeCopy(Array(Cast(p.left, StringType), p.right))
+      case p: BinaryPredicate if p.left.dataType == TimestampType
+        && p.right.dataType == DateType =>
+        p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType)))
+      case p: BinaryPredicate if p.left.dataType == DateType
+        && p.right.dataType == TimestampType =>
+        p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType)))
+
       case p: BinaryPredicate if p.left.dataType == StringType && p.right.dataType != StringType =>
         p.makeCopy(Array(Cast(p.left, DoubleType), p.right))
       case p: BinaryPredicate if p.left.dataType != StringType && p.right.dataType == StringType =>
         p.makeCopy(Array(p.left, Cast(p.right, DoubleType)))
 
+      case i @ In(a, b) if a.dataType == DateType && b.forall(_.dataType == StringType) =>
+        i.makeCopy(Array(Cast(a, StringType), b))
+      case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == StringType) =>
+        i.makeCopy(Array(Cast(a, StringType), b))
+      case i @ In(a, b) if a.dataType == DateType && b.forall(_.dataType == TimestampType) =>
+        i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType))))
+      case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == DateType) =>
+        i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType))))
+
       case Sum(e) if e.dataType == StringType =>
         Sum(Cast(e, DoubleType))
       case Average(e) if e.dataType == StringType =>
         Average(Cast(e, DoubleType))
+      case Sqrt(e) if e.dataType == StringType =>
+        Sqrt(Cast(e, DoubleType))
+    }
+  }
+
+  // scalastyle:off
+  /**
+   * Calculates and propagates precision for fixed-precision decimals. Hive has a number of
+   * rules for this based on the SQL standard and MS SQL:
+   * https://cwiki.apache.org/confluence/download/attachments/27362075/Hive_Decimal_Precision_Scale_Support.pdf
+   *
+   * In particular, if we have expressions e1 and e2 with precision/scale p1/s2 and p2/s2
+   * respectively, then the following operations have the following precision / scale:
+   *
+   *   Operation    Result Precision                        Result Scale
+   *   ------------------------------------------------------------------------
+   *   e1 + e2      max(s1, s2) + max(p1-s1, p2-s2) + 1     max(s1, s2)
+   *   e1 - e2      max(s1, s2) + max(p1-s1, p2-s2) + 1     max(s1, s2)
+   *   e1 * e2      p1 + p2 + 1                             s1 + s2
+   *   e1 / e2      p1 - s1 + s2 + max(6, s1 + p2 + 1)      max(6, s1 + p2 + 1)
+   *   e1 % e2      min(p1-s1, p2-s2) + max(s1, s2)         max(s1, s2)
+   *   sum(e1)      p1 + 10                                 s1
+   *   avg(e1)      p1 + 4                                  s1 + 4
+   *
+   * Catalyst also has unlimited-precision decimals. For those, all ops return unlimited precision.
+   *
+   * To implement the rules for fixed-precision types, we introduce casts to turn them to unlimited
+   * precision, do the math on unlimited-precision numbers, then introduce casts back to the
+   * required fixed precision. This allows us to do all rounding and overflow handling in the
+   * cast-to-fixed-precision operator.
+   *
+   * In addition, when mixing non-decimal types with decimals, we use the following rules:
+   * - BYTE gets turned into DECIMAL(3, 0)
+   * - SHORT gets turned into DECIMAL(5, 0)
+   * - INT gets turned into DECIMAL(10, 0)
+   * - LONG gets turned into DECIMAL(20, 0)
+   * - FLOAT and DOUBLE cause fixed-length decimals to turn into DOUBLE (this is the same as Hive,
+   *   but note that unlimited decimals are considered bigger than doubles in WidenTypes)
+   */
+  // scalastyle:on
+  object DecimalPrecision extends Rule[LogicalPlan] {
+    import scala.math.{max, min}
+
+    // Conversion rules for integer types into fixed-precision decimals
+    val intTypeToFixed: Map[DataType, DecimalType] = Map(
+      ByteType -> DecimalType(3, 0),
+      ShortType -> DecimalType(5, 0),
+      IntegerType -> DecimalType(10, 0),
+      LongType -> DecimalType(20, 0)
+    )
+
+    def isFloat(t: DataType): Boolean = t == FloatType || t == DoubleType
+
+    def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+      // Skip nodes whose children have not been resolved yet
+      case e if !e.childrenResolved => e
+
+      case Add(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+        Cast(
+          Add(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)),
+          DecimalType(max(s1, s2) + max(p1 - s1, p2 - s2) + 1, max(s1, s2))
+        )
+
+      case Subtract(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+        Cast(
+          Subtract(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)),
+          DecimalType(max(s1, s2) + max(p1 - s1, p2 - s2) + 1, max(s1, s2))
+        )
+
+      case Multiply(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+        Cast(
+          Multiply(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)),
+          DecimalType(p1 + p2 + 1, s1 + s2)
+        )
+
+      case Divide(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+        Cast(
+          Divide(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)),
+          DecimalType(p1 - s1 + s2 + max(6, s1 + p2 + 1), max(6, s1 + p2 + 1))
+        )
+
+      case Remainder(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+        Cast(
+          Remainder(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)),
+          DecimalType(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
+        )
+
+      // Promote integers inside a binary expression with fixed-precision decimals to decimals,
+      // and fixed-precision decimals in an expression with floats / doubles to doubles
+      case b: BinaryExpression if b.left.dataType != b.right.dataType =>
+        (b.left.dataType, b.right.dataType) match {
+          case (t, DecimalType.Fixed(p, s)) if intTypeToFixed.contains(t) =>
+            b.makeCopy(Array(Cast(b.left, intTypeToFixed(t)), b.right))
+          case (DecimalType.Fixed(p, s), t) if intTypeToFixed.contains(t) =>
+            b.makeCopy(Array(b.left, Cast(b.right, intTypeToFixed(t))))
+          case (t, DecimalType.Fixed(p, s)) if isFloat(t) =>
+            b.makeCopy(Array(b.left, Cast(b.right, DoubleType)))
+          case (DecimalType.Fixed(p, s), t) if isFloat(t) =>
+            b.makeCopy(Array(Cast(b.left, DoubleType), b.right))
+          case _ =>
+            b
+        }
+
+      // TODO: MaxOf, MinOf, etc might want other rules
+
+      // SUM and AVERAGE are handled by the implementations of those expressions
     }
   }
 
@@ -231,11 +387,23 @@ trait HiveTypeCoercion {
    * Changes Boolean values to Bytes so that expressions like true < false can be Evaluated.
    */
   object BooleanComparisons extends Rule[LogicalPlan] {
+    val trueValues = Seq(1, 1L, 1.toByte, 1.toShort, BigDecimal(1)).map(Literal(_))
+    val falseValues = Seq(0, 0L, 0.toByte, 0.toShort, BigDecimal(0)).map(Literal(_))
+
     def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
-      // No need to change EqualTo operators as that actually makes sense for boolean types.
+
+      // Hive treats (true = 1) as true and (false = 0) as true.
+      case EqualTo(l @ BooleanType(), r) if trueValues.contains(r) => l
+      case EqualTo(l, r @ BooleanType()) if trueValues.contains(l) => r
+      case EqualTo(l @ BooleanType(), r) if falseValues.contains(r) => Not(l)
+      case EqualTo(l, r @ BooleanType()) if falseValues.contains(l) => Not(r)
+
+      // No need to change other EqualTo operators as that actually makes sense for boolean types.
       case e: EqualTo => e
+      // No need to change the EqualNullSafe operators, too
+      case e: EqualNullSafe => e
       // Otherwise turn them to Byte types so that there exists and ordering.
       case p: BinaryComparison
           if p.left.dataType == BooleanType && p.right.dataType == BooleanType =>
@@ -254,9 +422,15 @@ trait HiveTypeCoercion {
       // Skip if the type is boolean type already. Note that this extra cast should be removed
       // by optimizer.SimplifyCasts.
       case Cast(e, BooleanType) if e.dataType == BooleanType => e
+      // DateType should be null if be cast to boolean.
+      case Cast(e, BooleanType) if e.dataType == DateType => Cast(e, BooleanType)
       // If the data type is not boolean and is being cast boolean, turn it into a comparison
       // with the numeric value, i.e. x != 0. This will coerce the type into numeric type.
       case Cast(e, BooleanType) if e.dataType != BooleanType => Not(EqualTo(e, Literal(0)))
+      // Stringify boolean if casting to StringType.
+      // TODO Ensure true/false string letter casing is consistent with Hive in all cases.
+      case Cast(e, StringType) if e.dataType == BooleanType =>
+        If(e, Literal("true"), Literal("false"))
       // Turn true into 1, and false into 0 if casting boolean into other types.
       case Cast(e, dataType) if e.dataType == BooleanType =>
         Cast(If(e, Literal(1), Literal(0)), dataType)
@@ -274,7 +448,7 @@ trait HiveTypeCoercion {
       case e if !e.childrenResolved => e
 
       case Cast(e @ StringType(), t: IntegralType) =>
-        Cast(Cast(e, DecimalType), t)
+        Cast(Cast(e, DecimalType.Unlimited), t)
     }
   }
 
@@ -286,6 +460,13 @@ trait HiveTypeCoercion {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
+      case a @ CreateArray(children) if !a.resolved =>
+        val commonType = a.childTypes.reduce(
+          (a,b) =>
+            findTightestCommonType(a,b).getOrElse(StringType))
+        CreateArray(
+          children.map(c => if (c.dataType == commonType) c else Cast(c, commonType)))
+
       // Promote SUM, SUM DISTINCT and AVERAGE to largest types to prevent overflows.
       case s @ Sum(e @ DecimalType()) => s // Decimal is already the biggest.
       case Sum(e @ IntegralType()) if e.dataType != LongType => Sum(Cast(e, LongType))
@@ -302,32 +483,66 @@ trait HiveTypeCoercion {
         Average(Cast(e, LongType))
       case Average(e @ FractionalType()) if e.dataType != DoubleType =>
         Average(Cast(e, DoubleType))
+
+      // Hive lets you do aggregation of timestamps... for some reason
+      case Sum(e @ TimestampType()) => Sum(Cast(e, DoubleType))
+      case Average(e @ TimestampType()) => Average(Cast(e, DoubleType))
+    }
+  }
+
+  /**
+   * Hive only performs integral division with the DIV operator. The arguments to / are always
+   * converted to fractional types.
+   */
+  object Division extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+      // Skip nodes who's children have not been resolved yet.
+      case e if !e.childrenResolved => e
+
+      // Decimal and Double remain the same
+      case d: Divide if d.resolved && d.dataType == DoubleType => d
+      case d: Divide if d.resolved && d.dataType.isInstanceOf[DecimalType] => d
+
+      case Divide(l, r) if l.dataType.isInstanceOf[DecimalType] =>
+        Divide(l, Cast(r, DecimalType.Unlimited))
+      case Divide(l, r) if r.dataType.isInstanceOf[DecimalType] =>
+        Divide(Cast(l, DecimalType.Unlimited), r)
+
+      case Divide(l, r) => Divide(Cast(l, DoubleType), Cast(r, DoubleType))
     }
   }
 
   /**
-   * Ensures that NullType gets casted to some other types under certain circumstances.
+   * Coerces the type of different branches of a CASE WHEN statement to a common type.
    */
-  object CastNulls extends Rule[LogicalPlan] {
+  object CaseWhenCoercion extends Rule[LogicalPlan] {
+    import HiveTypeCoercion._
+
     def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
-      case cw @ CaseWhen(branches) =>
+      case cw @ CaseWhen(branches) if !cw.resolved && !branches.exists(!_.resolved)  =>
         val valueTypes = branches.sliding(2, 2).map {
-          case Seq(_, value) if value.resolved => Some(value.dataType)
-          case Seq(elseVal) if elseVal.resolved => Some(elseVal.dataType)
-          case _ => None
+          case Seq(_, value) => value.dataType
+          case Seq(elseVal) => elseVal.dataType
         }.toSeq
-        if (valueTypes.distinct.size == 2 && valueTypes.exists(_ == Some(NullType))) {
-          val otherType = valueTypes.filterNot(_ == Some(NullType))(0).get
+
+        logDebug(s"Input values for null casting ${valueTypes.mkString(",")}")
+
+        if (valueTypes.distinct.size > 1) {
+          val commonType = valueTypes.reduce { (v1, v2) =>
+            findTightestCommonType(v1, v2)
+              .getOrElse(sys.error(
+                s"Types in CASE WHEN must be the same or coercible to a common type: $v1 != $v2"))
+          }
           val transformedBranches = branches.sliding(2, 2).map {
-            case Seq(cond, value) if value.resolved && value.dataType == NullType =>
-              Seq(cond, Cast(value, otherType))
-            case Seq(elseVal) if elseVal.resolved && elseVal.dataType == NullType =>
-              Seq(Cast(elseVal, otherType))
+            case Seq(cond, value) if value.dataType != commonType =>
+              Seq(cond, Cast(value, commonType))
+            case Seq(elseVal) if elseVal.dataType != commonType =>
+              Seq(Cast(elseVal, commonType))
             case s => s
           }.reduce(_ ++ _)
           CaseWhen(transformedBranches)
         } else {
-          // It is possible to have more types due to the possibility of short-circuiting.
+          // Types match up.  Hopefully some other rule fixes whatever is wrong with resolution.
           cw
         }
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala
index a6ce90854dcb4..22941edef2d46 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
  * of itself with globally unique expression ids.
  */
 trait MultiInstanceRelation {
-  def newInstance: this.type
+  def newInstance(): this.type
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala
index 9f37ca904ffeb..3f672a3e0fd91 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala
@@ -22,4 +22,14 @@ package org.apache.spark.sql.catalyst
  * Analysis consists of translating [[UnresolvedAttribute]]s and [[UnresolvedRelation]]s
  * into fully typed objects using information in a schema [[Catalog]].
  */
-package object analysis
+package object analysis {
+
+  /**
+   * Responsible for resolving which identifiers refer to the same entity.  For example, by using
+   * case insensitive equality.
+   */
+  type Resolver = (String, String) => Boolean
+
+  val caseInsensitiveResolution = (a: String, b: String) => a.equalsIgnoreCase(b)
+  val caseSensitiveResolution = (a: String, b: String) => a == b
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 7abeb032964e1..77d84e1687e1b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.{errors, trees}
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.BaseRelation
+import org.apache.spark.sql.catalyst.plans.logical.LeafNode
 import org.apache.spark.sql.catalyst.trees.TreeNode
 
 /**
@@ -36,7 +36,7 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str
 case class UnresolvedRelation(
     databaseName: Option[String],
     tableName: String,
-    alias: Option[String] = None) extends BaseRelation {
+    alias: Option[String] = None) extends LeafNode {
   override def output = Nil
   override lazy val resolved = false
 }
@@ -54,6 +54,7 @@ case class UnresolvedAttribute(name: String) extends Attribute with trees.LeafNo
   override def newInstance = this
   override def withNullability(newNullability: Boolean) = this
   override def withQualifiers(newQualifiers: Seq[String]) = this
+  override def withName(newName: String) = UnresolvedAttribute(name)
 
   // Unresolved attributes are transient at compile time and don't get evaluated during execution.
   override def eval(input: Row = null): EvaluatedType =
@@ -66,7 +67,6 @@ case class UnresolvedFunction(name: String, children: Seq[Expression]) extends E
   override def dataType = throw new UnresolvedException(this, "dataType")
   override def foldable = throw new UnresolvedException(this, "foldable")
   override def nullable = throw new UnresolvedException(this, "nullable")
-  override def references = children.flatMap(_.references).toSet
   override lazy val resolved = false
 
   // Unresolved functions are transient at compile time and don't get evaluated during execution.
@@ -88,7 +88,7 @@ case class Star(
     mapFunction: Attribute => Expression = identity[Attribute])
   extends Attribute with trees.LeafNode[Expression] {
 
-  override def name = throw new UnresolvedException(this, "exprId")
+  override def name = throw new UnresolvedException(this, "name")
   override def exprId = throw new UnresolvedException(this, "exprId")
   override def dataType = throw new UnresolvedException(this, "dataType")
   override def nullable = throw new UnresolvedException(this, "nullable")
@@ -98,13 +98,14 @@ case class Star(
   override def newInstance = this
   override def withNullability(newNullability: Boolean) = this
   override def withQualifiers(newQualifiers: Seq[String]) = this
+  override def withName(newName: String) = this
 
-  def expand(input: Seq[Attribute]): Seq[NamedExpression] = {
+  def expand(input: Seq[Attribute], resolver: Resolver): Seq[NamedExpression] = {
     val expandedAttributes: Seq[Attribute] = table match {
       // If there is no table specified, use all input attributes.
       case None => input
       // If there is a table, pick out attributes that are part of this table.
-      case Some(t) => input.filter(_.qualifiers contains t)
+      case Some(t) => input.filter(_.qualifiers.filter(resolver(_, t)).nonEmpty)
     }
     val mappedAttributes = expandedAttributes.map(mapFunction).zip(input).map {
       case (n: NamedExpression, _) => n
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/annotation/SQLUserDefinedType.java b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/annotation/SQLUserDefinedType.java
new file mode 100644
index 0000000000000..e966aeea1cb23
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/annotation/SQLUserDefinedType.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.annotation;
+
+import java.lang.annotation.*;
+
+import org.apache.spark.annotation.DeveloperApi;
+import org.apache.spark.sql.catalyst.types.UserDefinedType;
+
+/**
+ * ::DeveloperApi::
+ * A user-defined type which can be automatically recognized by a SQLContext and registered.
+ *
+ * WARNING: This annotation will only work if both Java and Scala reflection return the same class
+ *          names (after erasure) for the UDT.  This will NOT be the case when, e.g., the UDT class
+ *          is enclosed in an object (a singleton).
+ *
+ * WARNING: UDTs are currently only supported from Scala.
+ */
+// TODO: Should I used @Documented ?
+@DeveloperApi
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface SQLUserDefinedType {
+
+  /**
+   * Returns an instance of the UserDefinedType which can serialize and deserialize the user
+   * class to and from Catalyst built-in types.
+   */
+  Class<? extends UserDefinedType<?> > udt();
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
old mode 100644
new mode 100755
index 1b503b957d146..31dc5a58e68e5
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -17,9 +17,12 @@
 
 package org.apache.spark.sql.catalyst
 
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
+
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
 
 import scala.language.implicitConversions
+import scala.reflect.runtime.universe.{TypeTag, typeTag}
 
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions._
@@ -62,12 +65,16 @@ package object dsl {
 
     def unary_- = UnaryMinus(expr)
     def unary_! = Not(expr)
+    def unary_~ = BitwiseNot(expr)
 
     def + (other: Expression) = Add(expr, other)
     def - (other: Expression) = Subtract(expr, other)
     def * (other: Expression) = Multiply(expr, other)
     def / (other: Expression) = Divide(expr, other)
     def % (other: Expression) = Remainder(expr, other)
+    def & (other: Expression) = BitwiseAnd(expr, other)
+    def | (other: Expression) = BitwiseOr(expr, other)
+    def ^ (other: Expression) = BitwiseXor(expr, other)
 
     def && (other: Expression) = And(expr, other)
     def || (other: Expression) = Or(expr, other)
@@ -77,16 +84,34 @@ package object dsl {
     def > (other: Expression) = GreaterThan(expr, other)
     def >= (other: Expression) = GreaterThanOrEqual(expr, other)
     def === (other: Expression) = EqualTo(expr, other)
+    def <=> (other: Expression) = EqualNullSafe(expr, other)
     def !== (other: Expression) = Not(EqualTo(expr, other))
 
+    def in(list: Expression*) = In(expr, list)
+
     def like(other: Expression) = Like(expr, other)
     def rlike(other: Expression) = RLike(expr, other)
+    def contains(other: Expression) = Contains(expr, other)
+    def startsWith(other: Expression) = StartsWith(expr, other)
+    def endsWith(other: Expression) = EndsWith(expr, other)
+    def substr(pos: Expression, len: Expression = Literal(Int.MaxValue)) =
+      Substring(expr, pos, len)
+    def substring(pos: Expression, len: Expression = Literal(Int.MaxValue)) =
+      Substring(expr, pos, len)
+
+    def isNull = IsNull(expr)
+    def isNotNull = IsNotNull(expr)
+
+    def getItem(ordinal: Expression) = GetItem(expr, ordinal)
+    def getField(fieldName: String) = GetField(expr, fieldName)
+
     def cast(to: DataType) = Cast(expr, to)
 
     def asc = SortOrder(expr, Ascending)
     def desc = SortOrder(expr, Descending)
 
-    def as(s: Symbol) = Alias(expr, s.name)()
+    def as(alias: String) = Alias(expr, alias)()
+    def as(alias: Symbol) = Alias(expr, alias.name)()
   }
 
   trait ExpressionConversions {
@@ -102,7 +127,9 @@ package object dsl {
     implicit def floatToLiteral(f: Float) = Literal(f)
     implicit def doubleToLiteral(d: Double) = Literal(d)
     implicit def stringToLiteral(s: String) = Literal(s)
-    implicit def decimalToLiteral(d: BigDecimal) = Literal(d)
+    implicit def dateToLiteral(d: Date) = Literal(d)
+    implicit def bigDecimalToLiteral(d: BigDecimal) = Literal(d)
+    implicit def decimalToLiteral(d: Decimal) = Literal(d)
     implicit def timestampToLiteral(t: Timestamp) = Literal(t)
     implicit def binaryToLiteral(a: Array[Byte]) = Literal(a)
 
@@ -112,8 +139,10 @@ package object dsl {
     def sumDistinct(e: Expression) = SumDistinct(e)
     def count(e: Expression) = Count(e)
     def countDistinct(e: Expression*) = CountDistinct(e)
+    def approxCountDistinct(e: Expression, rsd: Double = 0.05) = ApproxCountDistinct(e, rsd)
     def avg(e: Expression) = Average(e)
     def first(e: Expression) = First(e)
+    def last(e: Expression) = Last(e)
     def min(e: Expression) = Min(e)
     def max(e: Expression) = Max(e)
     def upper(e: Expression) = Upper(e)
@@ -155,14 +184,33 @@ package object dsl {
       /** Creates a new AttributeReference of type string */
       def string = AttributeReference(s, StringType, nullable = true)()
 
+      /** Creates a new AttributeReference of type date */
+      def date = AttributeReference(s, DateType, nullable = true)()
+
+      /** Creates a new AttributeReference of type decimal */
+      def decimal = AttributeReference(s, DecimalType.Unlimited, nullable = true)()
+
       /** Creates a new AttributeReference of type decimal */
-      def decimal = AttributeReference(s, DecimalType, nullable = true)()
+      def decimal(precision: Int, scale: Int) =
+        AttributeReference(s, DecimalType(precision, scale), nullable = true)()
 
       /** Creates a new AttributeReference of type timestamp */
       def timestamp = AttributeReference(s, TimestampType, nullable = true)()
 
       /** Creates a new AttributeReference of type binary */
       def binary = AttributeReference(s, BinaryType, nullable = true)()
+
+      /** Creates a new AttributeReference of type array */
+      def array(dataType: DataType) = AttributeReference(s, ArrayType(dataType), nullable = true)()
+
+      /** Creates a new AttributeReference of type map */
+      def map(keyType: DataType, valueType: DataType): AttributeReference =
+        map(MapType(keyType, valueType))
+      def map(mapType: MapType) = AttributeReference(s, mapType, nullable = true)()
+
+      /** Creates a new AttributeReference of type struct */
+      def struct(fields: StructField*): AttributeReference = struct(StructType(fields))
+      def struct(structType: StructType) = AttributeReference(s, structType, nullable = true)()
     }
 
     implicit class DslAttribute(a: AttributeReference) {
@@ -172,7 +220,7 @@ package object dsl {
       // Protobuf terminology
       def required = a.withNullability(false)
 
-      def at(ordinal: Int) = BoundReference(ordinal, a)
+      def at(ordinal: Int) = BoundReference(ordinal, a.dataType, a.nullable)
     }
   }
 
@@ -239,4 +287,62 @@ package object dsl {
       def writeToFile(path: String) = WriteToFile(path, logicalPlan)
     }
   }
+
+  case class ScalaUdfBuilder[T: TypeTag](f: AnyRef) {
+    def call(args: Expression*) = ScalaUdf(f, ScalaReflection.schemaFor(typeTag[T]).dataType, args)
+  }
+
+  // scalastyle:off
+  /** functionToUdfBuilder 1-22 were generated by this script
+
+    (1 to 22).map { x =>
+      val argTypes = Seq.fill(x)("_").mkString(", ")
+      s"implicit def functionToUdfBuilder[T: TypeTag](func: Function$x[$argTypes, T]) = ScalaUdfBuilder(func)"
+    }
+  */
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function1[_, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function2[_, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function3[_, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function4[_, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function5[_, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function6[_, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function7[_, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function8[_, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function9[_, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function10[_, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function11[_, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function12[_, _, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function13[_, _, _, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]) = ScalaUdfBuilder(func)
+  // scalastyle:on
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
new file mode 100644
index 0000000000000..82e760b6c6916
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+/**
+ * Builds a map that is keyed by an Attribute's expression id. Using the expression id allows values
+ * to be looked up even when the attributes used differ cosmetically (i.e., the capitalization
+ * of the name, or the expected nullability).
+ */
+object AttributeMap {
+  def apply[A](kvs: Seq[(Attribute, A)]) = new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap)
+}
+
+class AttributeMap[A](baseMap: Map[ExprId, (Attribute, A)])
+  extends Map[Attribute, A] with Serializable {
+
+  override def get(k: Attribute): Option[A] = baseMap.get(k.exprId).map(_._2)
+
+  override def + [B1 >: A](kv: (Attribute, B1)): Map[Attribute, B1] = baseMap.values.toMap + kv
+
+  override def iterator: Iterator[(Attribute, A)] = baseMap.valuesIterator
+
+  override def -(key: Attribute): Map[Attribute, A] = baseMap.values.toMap - key
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
new file mode 100644
index 0000000000000..2b4969b7cfec0
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.analysis.Star
+
+protected class AttributeEquals(val a: Attribute) {
+  override def hashCode() = a.exprId.hashCode()
+  override def equals(other: Any) = (a, other.asInstanceOf[AttributeEquals].a) match {
+    case (a1: AttributeReference, a2: AttributeReference) => a1.exprId == a2.exprId
+    case (a1, a2) => a1 == a2
+  }
+}
+
+object AttributeSet {
+  def apply(a: Attribute) =
+    new AttributeSet(Set(new AttributeEquals(a)))
+
+  /** Constructs a new [[AttributeSet]] given a sequence of [[Expression Expressions]]. */
+  def apply(baseSet: Seq[Expression]) =
+    new AttributeSet(
+      baseSet
+        .flatMap(_.references)
+        .map(new AttributeEquals(_)).toSet)
+}
+
+/**
+ * A Set designed to hold [[AttributeReference]] objects, that performs equality checking using
+ * expression id instead of standard java equality.  Using expression id means that these
+ * sets will correctly test for membership, even when the AttributeReferences in question differ
+ * cosmetically (e.g., the names have different capitalizations).
+ *
+ * Note that we do not override equality for Attribute references as it is really weird when
+ * `AttributeReference("a"...) == AttrributeReference("b", ...)`. This tactic leads to broken tests,
+ * and also makes doing transformations hard (we always try keep older trees instead of new ones
+ * when the transformation was a no-op).
+ */
+class AttributeSet private (val baseSet: Set[AttributeEquals])
+  extends Traversable[Attribute] with Serializable {
+
+  /** Returns true if the members of this AttributeSet and other are the same. */
+  override def equals(other: Any) = other match {
+    case otherSet: AttributeSet => baseSet.map(_.a).forall(otherSet.contains)
+    case _ => false
+  }
+
+  /** Returns true if this set contains an Attribute with the same expression id as `elem` */
+  def contains(elem: NamedExpression): Boolean =
+    baseSet.contains(new AttributeEquals(elem.toAttribute))
+
+  /** Returns a new [[AttributeSet]] that contains `elem` in addition to the current elements. */
+  def +(elem: Attribute): AttributeSet =  // scalastyle:ignore
+    new AttributeSet(baseSet + new AttributeEquals(elem))
+
+  /** Returns a new [[AttributeSet]] that does not contain `elem`. */
+  def -(elem: Attribute): AttributeSet =
+    new AttributeSet(baseSet - new AttributeEquals(elem))
+
+  /** Returns an iterator containing all of the attributes in the set. */
+  def iterator: Iterator[Attribute] = baseSet.map(_.a).iterator
+
+  /**
+   * Returns true if the [[Attribute Attributes]] in this set are a subset of the Attributes in
+   * `other`.
+   */
+  def subsetOf(other: AttributeSet) = baseSet.subsetOf(other.baseSet)
+
+  /**
+   * Returns a new [[AttributeSet]] that does not contain any of the [[Attribute Attributes]] found
+   * in `other`.
+   */
+  def --(other: Traversable[NamedExpression]) =
+    new AttributeSet(baseSet -- other.map(a => new AttributeEquals(a.toAttribute)))
+
+  /**
+   * Returns a new [[AttributeSet]] that contains all of the [[Attribute Attributes]] found
+   * in `other`.
+   */
+  def ++(other: AttributeSet) = new AttributeSet(baseSet ++ other.baseSet)
+
+  /**
+   * Returns a new [[AttributeSet]] contain only the [[Attribute Attributes]] where `f` evaluates to
+   * true.
+   */
+  override def filter(f: Attribute => Boolean) = new AttributeSet(baseSet.filter(ae => f(ae.a)))
+
+  /**
+   * Returns a new [[AttributeSet]] that only contains [[Attribute Attributes]] that are found in
+   * `this` and `other`.
+   */
+  def intersect(other: AttributeSet) = new AttributeSet(baseSet.intersect(other.baseSet))
+
+  override def foreach[U](f: (Attribute) => U): Unit = baseSet.map(_.a).foreach(f)
+
+  // We must force toSeq to not be strict otherwise we end up with a [[Stream]] that captures all
+  // sorts of things in its closure.
+  override def toSeq: Seq[Attribute] = baseSet.map(_.a).toArray.toSeq
+
+  override def toString = "{" + baseSet.map(_.a).mkString(", ") + "}"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
index 9ce1f01056462..fa80b07f8e6be 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
@@ -17,72 +17,43 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.catalyst.trees
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.errors.attachTree
-import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.Logging
+import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.trees
 
 /**
  * A bound reference points to a specific slot in the input tuple, allowing the actual value
  * to be retrieved more efficiently.  However, since operations like column pruning can change
  * the layout of intermediate tuples, BindReferences should be run after all such transformations.
  */
-case class BoundReference(ordinal: Int, baseReference: Attribute)
-  extends Attribute with trees.LeafNode[Expression] {
+case class BoundReference(ordinal: Int, dataType: DataType, nullable: Boolean)
+  extends Expression with trees.LeafNode[Expression] {
 
   type EvaluatedType = Any
 
-  override def nullable = baseReference.nullable
-  override def dataType = baseReference.dataType
-  override def exprId = baseReference.exprId
-  override def qualifiers = baseReference.qualifiers
-  override def name = baseReference.name
-
-  override def newInstance = BoundReference(ordinal, baseReference.newInstance)
-  override def withNullability(newNullability: Boolean) =
-    BoundReference(ordinal, baseReference.withNullability(newNullability))
-  override def withQualifiers(newQualifiers: Seq[String]) =
-    BoundReference(ordinal, baseReference.withQualifiers(newQualifiers))
-
-  override def toString = s"$baseReference:$ordinal"
+  override def toString = s"input[$ordinal]"
 
   override def eval(input: Row): Any = input(ordinal)
 }
 
-/**
- * Used to denote operators that do their own binding of attributes internally.
- */
-trait NoBind { self: trees.TreeNode[_] => }
-
-class BindReferences[TreeNode <: QueryPlan[TreeNode]] extends Rule[TreeNode] {
-  import BindReferences._
-
-  def apply(plan: TreeNode): TreeNode = {
-    plan.transform {
-      case n: NoBind => n.asInstanceOf[TreeNode]
-      case leafNode if leafNode.children.isEmpty => leafNode
-      case unaryNode if unaryNode.children.size == 1 => unaryNode.transformExpressions { case e =>
-        bindReference(e, unaryNode.children.head.output)
-      }
-    }
-  }
-}
-
 object BindReferences extends Logging {
-  def bindReference[A <: Expression](expression: A, input: Seq[Attribute]): A = {
+
+  def bindReference[A <: Expression](
+      expression: A,
+      input: Seq[Attribute],
+      allowFailures: Boolean = false): A = {
     expression.transform { case a: AttributeReference =>
       attachTree(a, "Binding attribute") {
         val ordinal = input.indexWhere(_.exprId == a.exprId)
         if (ordinal == -1) {
-          // TODO: This fallback is required because some operators (such as ScriptTransform)
-          // produce new attributes that can't be bound.  Likely the right thing to do is remove
-          // this rule and require all operators to explicitly bind to the input schema that
-          // they specify.
-          logger.debug(s"Couldn't find $a in ${input.mkString("[", ",", "]")}")
-          a
+          if (allowFailures) {
+            a
+          } else {
+            sys.error(s"Couldn't find $a in ${input.mkString("[", ",", "]")}")
+          }
         } else {
-          BoundReference(ordinal, a)
+          BoundReference(ordinal, a.dataType, a.nullable)
         }
       }
     }.asInstanceOf[A] // Kind of a hack, but safe.  TODO: Tighten return type when possible.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 1f9716e385e9e..34697a1249644 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -17,17 +17,27 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
+import java.text.{DateFormat, SimpleDateFormat}
 
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
 
 /** Cast the child expression to the target data type. */
-case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
+case class Cast(child: Expression, dataType: DataType) extends UnaryExpression with Logging {
   override def foldable = child.foldable
 
   override def nullable = (child.dataType, dataType) match {
     case (StringType, _: NumericType) => true
     case (StringType, TimestampType)  => true
+    case (StringType, DateType)       => true
+    case (_: NumericType, DateType)   => true
+    case (BooleanType, DateType)      => true
+    case (DateType, _: NumericType)   => true
+    case (DateType, BooleanType)      => true
+    case (_, DecimalType.Fixed(_, _)) => true  // TODO: not all upcasts here can really give null
     case _                            => child.nullable
   }
 
@@ -41,6 +51,8 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
   // UDFToString
   private[this] def castToString: Any => Any = child.dataType match {
     case BinaryType => buildCast[Array[Byte]](_, new String(_, "UTF-8"))
+    case DateType => buildCast[Date](_, dateToString)
+    case TimestampType => buildCast[Timestamp](_, timestampToString)
     case _ => buildCast[Any](_, _.toString)
   }
 
@@ -54,7 +66,10 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
     case StringType =>
       buildCast[String](_, _.length() != 0)
     case TimestampType =>
-      buildCast[Timestamp](_, b => b.getTime() != 0 || b.getNanos() != 0)
+      buildCast[Timestamp](_, t => t.getTime() != 0 || t.getNanos() != 0)
+    case DateType =>
+      // Hive would return null when cast from date to boolean
+      buildCast[Date](_, d => null)
     case LongType =>
       buildCast[Long](_, _ != 0)
     case IntegerType =>
@@ -63,8 +78,8 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
       buildCast[Short](_, _ != 0)
     case ByteType =>
       buildCast[Byte](_, _ != 0)
-    case DecimalType =>
-      buildCast[BigDecimal](_, _ != 0)
+    case DecimalType() =>
+      buildCast[Decimal](_, _ != 0)
     case DoubleType =>
       buildCast[Double](_, _ != 0)
     case FloatType =>
@@ -84,32 +99,33 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
         try Timestamp.valueOf(n) catch { case _: java.lang.IllegalArgumentException => null }
       })
     case BooleanType =>
-      buildCast[Boolean](_, b => new Timestamp((if (b) 1 else 0) * 1000))
+      buildCast[Boolean](_, b => new Timestamp((if (b) 1 else 0)))
     case LongType =>
-      buildCast[Long](_, l => new Timestamp(l * 1000))
+      buildCast[Long](_, l => new Timestamp(l))
     case IntegerType =>
-      buildCast[Int](_, i => new Timestamp(i * 1000))
+      buildCast[Int](_, i => new Timestamp(i))
     case ShortType =>
-      buildCast[Short](_, s => new Timestamp(s * 1000))
+      buildCast[Short](_, s => new Timestamp(s))
     case ByteType =>
-      buildCast[Byte](_, b => new Timestamp(b * 1000))
+      buildCast[Byte](_, b => new Timestamp(b))
+    case DateType =>
+      buildCast[Date](_, d => new Timestamp(d.getTime))
     // TimestampWritable.decimalToTimestamp
-    case DecimalType =>
-      buildCast[BigDecimal](_, d => decimalToTimestamp(d))
+    case DecimalType() =>
+      buildCast[Decimal](_, d => decimalToTimestamp(d))
     // TimestampWritable.doubleToTimestamp
     case DoubleType =>
-      buildCast[Double](_, d => decimalToTimestamp(d))
+      buildCast[Double](_, d => decimalToTimestamp(Decimal(d)))
     // TimestampWritable.floatToTimestamp
     case FloatType =>
-      buildCast[Float](_, f => decimalToTimestamp(f))
+      buildCast[Float](_, f => decimalToTimestamp(Decimal(f)))
   }
 
-  private[this]  def decimalToTimestamp(d: BigDecimal) = {
-    val seconds = d.longValue()
-    val bd = (d - seconds) * 1000000000
+  private[this]  def decimalToTimestamp(d: Decimal) = {
+    val seconds = Math.floor(d.toDouble).toLong
+    val bd = (d.toBigDecimal - seconds) * 1000000000
     val nanos = bd.intValue()
 
-    // Convert to millis
     val millis = seconds * 1000
     val t = new Timestamp(millis)
 
@@ -119,13 +135,58 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
   }
 
   // Timestamp to long, converting milliseconds to seconds
-  private[this] def timestampToLong(ts: Timestamp) = ts.getTime / 1000
+  private[this] def timestampToLong(ts: Timestamp) = Math.floor(ts.getTime / 1000.0).toLong
 
   private[this] def timestampToDouble(ts: Timestamp) = {
     // First part is the seconds since the beginning of time, followed by nanosecs.
-    ts.getTime / 1000 + ts.getNanos.toDouble / 1000000000
+    Math.floor(ts.getTime / 1000.0).toLong + ts.getNanos.toDouble / 1000000000
   }
 
+  // Converts Timestamp to string according to Hive TimestampWritable convention
+  private[this] def timestampToString(ts: Timestamp): String = {
+    val timestampString = ts.toString
+    val formatted = Cast.threadLocalTimestampFormat.get.format(ts)
+
+    if (timestampString.length > 19 && timestampString.substring(19) != ".0") {
+      formatted + timestampString.substring(19)
+    } else {
+      formatted
+    }
+  }
+
+  // Converts Timestamp to string according to Hive TimestampWritable convention
+  private[this] def timestampToDateString(ts: Timestamp): String = {
+    Cast.threadLocalDateFormat.get.format(ts)
+  }
+
+  // DateConverter
+  private[this] def castToDate: Any => Any = child.dataType match {
+    case StringType =>
+      buildCast[String](_, s =>
+        try Date.valueOf(s) catch { case _: java.lang.IllegalArgumentException => null }
+      )
+    case TimestampType =>
+      // throw valid precision more than seconds, according to Hive.
+      // Timestamp.nanos is in 0 to 999,999,999, no more than a second.
+      buildCast[Timestamp](_, t => new Date(Math.floor(t.getTime / 1000.0).toLong * 1000))
+    // Hive throws this exception as a Semantic Exception
+    // It is never possible to compare result when hive return with exception, so we can return null
+    // NULL is more reasonable here, since the query itself obeys the grammar.
+    case _ => _ => null
+  }
+
+  // Date cannot be cast to long, according to hive
+  private[this] def dateToLong(d: Date) = null
+
+  // Date cannot be cast to double, according to hive
+  private[this] def dateToDouble(d: Date) = null
+
+  // Converts Date to string according to Hive DateWritable convention
+  private[this] def dateToString(d: Date): String = {
+    Cast.threadLocalDateFormat.get.format(d)
+  }
+
+  // LongConverter
   private[this] def castToLong: Any => Any = child.dataType match {
     case StringType =>
       buildCast[String](_, s => try s.toLong catch {
@@ -133,14 +194,17 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
       })
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1L else 0L)
+    case DateType =>
+      buildCast[Date](_, d => dateToLong(d))
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToLong(t))
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toLong)
+    case DecimalType() =>
+      buildCast[Decimal](_, _.toLong)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toLong(b)
   }
 
+  // IntConverter
   private[this] def castToInt: Any => Any = child.dataType match {
     case StringType =>
       buildCast[String](_, s => try s.toInt catch {
@@ -148,14 +212,17 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
       })
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1 else 0)
+    case DateType =>
+      buildCast[Date](_, d => dateToLong(d))
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToLong(t).toInt)
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toInt)
+    case DecimalType() =>
+      buildCast[Decimal](_, _.toInt)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b)
   }
 
+  // ShortConverter
   private[this] def castToShort: Any => Any = child.dataType match {
     case StringType =>
       buildCast[String](_, s => try s.toShort catch {
@@ -163,14 +230,17 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
       })
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1.toShort else 0.toShort)
+    case DateType =>
+      buildCast[Date](_, d => dateToLong(d))
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToLong(t).toShort)
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toShort)
+    case DecimalType() =>
+      buildCast[Decimal](_, _.toShort)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toShort
   }
 
+  // ByteConverter
   private[this] def castToByte: Any => Any = child.dataType match {
     case StringType =>
       buildCast[String](_, s => try s.toByte catch {
@@ -178,28 +248,56 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
       })
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1.toByte else 0.toByte)
+    case DateType =>
+      buildCast[Date](_, d => dateToLong(d))
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToLong(t).toByte)
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toByte)
+    case DecimalType() =>
+      buildCast[Decimal](_, _.toByte)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toByte
   }
 
-  private[this] def castToDecimal: Any => Any = child.dataType match {
+  /**
+   * Change the precision / scale in a given decimal to those set in `decimalType` (if any),
+   * returning null if it overflows or modifying `value` in-place and returning it if successful.
+   *
+   * NOTE: this modifies `value` in-place, so don't call it on external data.
+   */
+  private[this] def changePrecision(value: Decimal, decimalType: DecimalType): Decimal = {
+    decimalType match {
+      case DecimalType.Unlimited =>
+        value
+      case DecimalType.Fixed(precision, scale) =>
+        if (value.changePrecision(precision, scale)) value else null
+    }
+  }
+
+  private[this] def castToDecimal(target: DecimalType): Any => Any = child.dataType match {
     case StringType =>
-      buildCast[String](_, s => try BigDecimal(s.toDouble) catch {
+      buildCast[String](_, s => try changePrecision(Decimal(s.toDouble), target) catch {
         case _: NumberFormatException => null
       })
     case BooleanType =>
-      buildCast[Boolean](_, b => if (b) BigDecimal(1) else BigDecimal(0))
+      buildCast[Boolean](_, b => changePrecision(if (b) Decimal(1) else Decimal(0), target))
+    case DateType =>
+      buildCast[Date](_, d => null) // date can't cast to decimal in Hive
     case TimestampType =>
       // Note that we lose precision here.
-      buildCast[Timestamp](_, t => BigDecimal(timestampToDouble(t)))
-    case x: NumericType =>
-      b => BigDecimal(x.numeric.asInstanceOf[Numeric[Any]].toDouble(b))
+      buildCast[Timestamp](_, t => changePrecision(Decimal(timestampToDouble(t)), target))
+    case DecimalType() =>
+      b => changePrecision(b.asInstanceOf[Decimal].clone(), target)
+    case LongType =>
+      b => changePrecision(Decimal(b.asInstanceOf[Long]), target)
+    case x: NumericType =>  // All other numeric types can be represented precisely as Doubles
+      b => try {
+        changePrecision(Decimal(x.numeric.asInstanceOf[Numeric[Any]].toDouble(b)), target)
+      } catch {
+        case _: NumberFormatException => null
+      }
   }
 
+  // DoubleConverter
   private[this] def castToDouble: Any => Any = child.dataType match {
     case StringType =>
       buildCast[String](_, s => try s.toDouble catch {
@@ -207,14 +305,17 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
       })
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1d else 0d)
+    case DateType =>
+      buildCast[Date](_, d => dateToDouble(d))
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToDouble(t))
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toDouble)
+    case DecimalType() =>
+      buildCast[Decimal](_, _.toDouble)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toDouble(b)
   }
 
+  // FloatConverter
   private[this] def castToFloat: Any => Any = child.dataType match {
     case StringType =>
       buildCast[String](_, s => try s.toFloat catch {
@@ -222,26 +323,30 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
       })
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1f else 0f)
+    case DateType =>
+      buildCast[Date](_, d => dateToDouble(d))
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToDouble(t).toFloat)
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toFloat)
+    case DecimalType() =>
+      buildCast[Decimal](_, _.toFloat)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toFloat(b)
   }
 
   private[this] lazy val cast: Any => Any = dataType match {
-    case StringType => castToString
-    case BinaryType => castToBinary
-    case DecimalType => castToDecimal
+    case dt if dt == child.dataType => identity[Any]
+    case StringType    => castToString
+    case BinaryType    => castToBinary
+    case DateType      => castToDate
+    case decimal: DecimalType => castToDecimal(decimal)
     case TimestampType => castToTimestamp
-    case BooleanType => castToBoolean
-    case ByteType => castToByte
-    case ShortType => castToShort
-    case IntegerType => castToInt
-    case FloatType => castToFloat
-    case LongType => castToLong
-    case DoubleType => castToDouble
+    case BooleanType   => castToBoolean
+    case ByteType      => castToByte
+    case ShortType     => castToShort
+    case IntegerType   => castToInt
+    case FloatType     => castToFloat
+    case LongType      => castToLong
+    case DoubleType    => castToDouble
   }
 
   override def eval(input: Row): Any = {
@@ -249,3 +354,19 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
     if (evaluated == null) null else cast(evaluated)
   }
 }
+
+object Cast {
+  // `SimpleDateFormat` is not thread-safe.
+  private[sql] val threadLocalDateFormat = new ThreadLocal[DateFormat] {
+    override def initialValue() = {
+      new SimpleDateFormat("yyyy-MM-dd")
+    }
+  }
+
+  // `SimpleDateFormat` is not thread-safe.
+  private[sql] val threadLocalTimestampFormat = new ThreadLocal[DateFormat] {
+    override def initialValue() = {
+      new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index ba62dabe3dd6a..39b120e8de485 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.trees
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.types.{DataType, FractionalType, IntegralType, NumericType, NativeType}
+import org.apache.spark.sql.catalyst.util.Metadata
 
 abstract class Expression extends TreeNode[Expression] {
   self: Product =>
@@ -41,7 +42,7 @@ abstract class Expression extends TreeNode[Expression] {
    */
   def foldable: Boolean = false
   def nullable: Boolean
-  def references: Set[Attribute]
+  def references: AttributeSet = AttributeSet(children.flatMap(_.references.iterator))
 
   /** Returns the result of evaluating this expression on a given input Row */
   def eval(input: Row = null): EvaluatedType
@@ -179,6 +180,9 @@ abstract class Expression extends TreeNode[Expression] {
           case i: IntegralType =>
             f.asInstanceOf[(Integral[i.JvmType], i.JvmType, i.JvmType) => i.JvmType](
               i.integral, evalE1.asInstanceOf[i.JvmType], evalE2.asInstanceOf[i.JvmType])
+          case i: FractionalType =>
+            f.asInstanceOf[(Integral[i.JvmType], i.JvmType, i.JvmType) => i.JvmType](
+              i.asIntegral, evalE1.asInstanceOf[i.JvmType], evalE2.asInstanceOf[i.JvmType])
           case other => sys.error(s"Type $other does not support numeric operations")
         }
       }
@@ -230,8 +234,6 @@ abstract class BinaryExpression extends Expression with trees.BinaryNode[Express
 
   override def foldable = left.foldable && right.foldable
 
-  override def references = left.references ++ right.references
-
   override def toString = s"($left $symbol $right)"
 }
 
@@ -242,5 +244,5 @@ abstract class LeafExpression extends Expression with trees.LeafNode[Expression]
 abstract class UnaryExpression extends Expression with trees.UnaryNode[Expression] {
   self: Product =>
 
-  override def references = child.references
+
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
index 2c71d2c7b3563..e7e81a21fdf03 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
@@ -17,16 +17,18 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+
 /**
- * Converts a [[Row]] to another Row given a sequence of expression that define each column of the
- * new row. If the schema of the input row is specified, then the given expression will be bound to
- * that schema.
+ * A [[Projection]] that is calculated by calling the `eval` of each of the specified expressions.
+ * @param expressions a sequence of expressions that determine the value of each column of the
+ *                    output row.
  */
-class Projection(expressions: Seq[Expression]) extends (Row => Row) {
+class InterpretedProjection(expressions: Seq[Expression]) extends Projection {
   def this(expressions: Seq[Expression], inputSchema: Seq[Attribute]) =
     this(expressions.map(BindReferences.bindReference(_, inputSchema)))
 
-  protected val exprArray = expressions.toArray
+  // null check is required for when Kryo invokes the no-arg constructor.
+  protected val exprArray = if (expressions != null) expressions.toArray else null
 
   def apply(input: Row): Row = {
     val outputArray = new Array[Any](exprArray.length)
@@ -37,28 +39,30 @@ class Projection(expressions: Seq[Expression]) extends (Row => Row) {
     }
     new GenericRow(outputArray)
   }
+
+  override def toString = s"Row => [${exprArray.mkString(",")}]"
 }
 
 /**
- * Converts a [[Row]] to another Row given a sequence of expression that define each column of th
- * new row. If the schema of the input row is specified, then the given expression will be bound to
- * that schema.
- *
- * In contrast to a normal projection, a MutableProjection reuses the same underlying row object
- * each time an input row is added.  This significantly reduces the cost of calculating the
- * projection, but means that it is not safe to hold on to a reference to a [[Row]] after `next()`
- * has been called on the [[Iterator]] that produced it. Instead, the user must call `Row.copy()`
- * and hold on to the returned [[Row]] before calling `next()`.
+ * A [[MutableProjection]] that is calculated by calling `eval` on each of the specified
+ * expressions.
+ * @param expressions a sequence of expressions that determine the value of each column of the
+ *                    output row.
  */
-case class MutableProjection(expressions: Seq[Expression]) extends (Row => Row) {
+case class InterpretedMutableProjection(expressions: Seq[Expression]) extends MutableProjection {
   def this(expressions: Seq[Expression], inputSchema: Seq[Attribute]) =
     this(expressions.map(BindReferences.bindReference(_, inputSchema)))
 
   private[this] val exprArray = expressions.toArray
-  private[this] val mutableRow = new GenericMutableRow(exprArray.size)
+  private[this] var mutableRow: MutableRow = new GenericMutableRow(exprArray.size)
   def currentValue: Row = mutableRow
 
-  def apply(input: Row): Row = {
+  override def target(row: MutableRow): MutableProjection = {
+    mutableRow = row
+    this
+  }
+
+  override def apply(input: Row): Row = {
     var i = 0
     while (i < exprArray.length) {
       mutableRow(i) = exprArray(i).eval(input)
@@ -76,6 +80,190 @@ class JoinedRow extends Row {
   private[this] var row1: Row = _
   private[this] var row2: Row = _
 
+  def this(left: Row, right: Row) = {
+    this()
+    row1 = left
+    row2 = right
+  }
+
+  /** Updates this JoinedRow to used point at two new base rows.  Returns itself. */
+  def apply(r1: Row, r2: Row): Row = {
+    row1 = r1
+    row2 = r2
+    this
+  }
+
+  /** Updates this JoinedRow by updating its left base row.  Returns itself. */
+  def withLeft(newLeft: Row): Row = {
+    row1 = newLeft
+    this
+  }
+
+  /** Updates this JoinedRow by updating its right base row.  Returns itself. */
+  def withRight(newRight: Row): Row = {
+    row2 = newRight
+    this
+  }
+
+  def iterator = row1.iterator ++ row2.iterator
+
+  def length = row1.length + row2.length
+
+  def apply(i: Int) =
+    if (i < row1.size) row1(i) else row2(i - row1.size)
+
+  def isNullAt(i: Int) =
+    if (i < row1.size) row1.isNullAt(i) else row2.isNullAt(i - row1.size)
+
+  def getInt(i: Int): Int =
+    if (i < row1.size) row1.getInt(i) else row2.getInt(i - row1.size)
+
+  def getLong(i: Int): Long =
+    if (i < row1.size) row1.getLong(i) else row2.getLong(i - row1.size)
+
+  def getDouble(i: Int): Double =
+    if (i < row1.size) row1.getDouble(i) else row2.getDouble(i - row1.size)
+
+  def getBoolean(i: Int): Boolean =
+    if (i < row1.size) row1.getBoolean(i) else row2.getBoolean(i - row1.size)
+
+  def getShort(i: Int): Short =
+    if (i < row1.size) row1.getShort(i) else row2.getShort(i - row1.size)
+
+  def getByte(i: Int): Byte =
+    if (i < row1.size) row1.getByte(i) else row2.getByte(i - row1.size)
+
+  def getFloat(i: Int): Float =
+    if (i < row1.size) row1.getFloat(i) else row2.getFloat(i - row1.size)
+
+  def getString(i: Int): String =
+    if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
+
+  override def getAs[T](i: Int): T =
+    if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
+
+  def copy() = {
+    val totalSize = row1.size + row2.size
+    val copiedValues = new Array[Any](totalSize)
+    var i = 0
+    while(i < totalSize) {
+      copiedValues(i) = apply(i)
+      i += 1
+    }
+    new GenericRow(copiedValues)
+  }
+
+  override def toString() = {
+    val row = (if (row1 != null) row1 else Seq[Any]()) ++ (if (row2 != null) row2 else Seq[Any]())
+    s"[${row.mkString(",")}]"
+  }
+}
+
+/**
+ * JIT HACK: Replace with macros
+ * The `JoinedRow` class is used in many performance critical situation.  Unfortunately, since there
+ * are multiple different types of `Rows` that could be stored as `row1` and `row2` most of the
+ * calls in the critical path are polymorphic.  By creating special versions of this class that are
+ * used in only a single location of the code, we increase the chance that only a single type of
+ * Row will be referenced, increasing the opportunity for the JIT to play tricks.  This sounds
+ * crazy but in benchmarks it had noticeable effects.
+ */
+class JoinedRow2 extends Row {
+  private[this] var row1: Row = _
+  private[this] var row2: Row = _
+
+  def this(left: Row, right: Row) = {
+    this()
+    row1 = left
+    row2 = right
+  }
+
+  /** Updates this JoinedRow to used point at two new base rows.  Returns itself. */
+  def apply(r1: Row, r2: Row): Row = {
+    row1 = r1
+    row2 = r2
+    this
+  }
+
+  /** Updates this JoinedRow by updating its left base row.  Returns itself. */
+  def withLeft(newLeft: Row): Row = {
+    row1 = newLeft
+    this
+  }
+
+  /** Updates this JoinedRow by updating its right base row.  Returns itself. */
+  def withRight(newRight: Row): Row = {
+    row2 = newRight
+    this
+  }
+
+  def iterator = row1.iterator ++ row2.iterator
+
+  def length = row1.length + row2.length
+
+  def apply(i: Int) =
+    if (i < row1.size) row1(i) else row2(i - row1.size)
+
+  def isNullAt(i: Int) =
+    if (i < row1.size) row1.isNullAt(i) else row2.isNullAt(i - row1.size)
+
+  def getInt(i: Int): Int =
+    if (i < row1.size) row1.getInt(i) else row2.getInt(i - row1.size)
+
+  def getLong(i: Int): Long =
+    if (i < row1.size) row1.getLong(i) else row2.getLong(i - row1.size)
+
+  def getDouble(i: Int): Double =
+    if (i < row1.size) row1.getDouble(i) else row2.getDouble(i - row1.size)
+
+  def getBoolean(i: Int): Boolean =
+    if (i < row1.size) row1.getBoolean(i) else row2.getBoolean(i - row1.size)
+
+  def getShort(i: Int): Short =
+    if (i < row1.size) row1.getShort(i) else row2.getShort(i - row1.size)
+
+  def getByte(i: Int): Byte =
+    if (i < row1.size) row1.getByte(i) else row2.getByte(i - row1.size)
+
+  def getFloat(i: Int): Float =
+    if (i < row1.size) row1.getFloat(i) else row2.getFloat(i - row1.size)
+
+  def getString(i: Int): String =
+    if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
+
+  override def getAs[T](i: Int): T =
+    if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
+
+  def copy() = {
+    val totalSize = row1.size + row2.size
+    val copiedValues = new Array[Any](totalSize)
+    var i = 0
+    while(i < totalSize) {
+      copiedValues(i) = apply(i)
+      i += 1
+    }
+    new GenericRow(copiedValues)
+  }
+
+  override def toString() = {
+    val row = (if (row1 != null) row1 else Seq[Any]()) ++ (if (row2 != null) row2 else Seq[Any]())
+    s"[${row.mkString(",")}]"
+  }
+}
+
+/**
+ * JIT HACK: Replace with macros
+ */
+class JoinedRow3 extends Row {
+  private[this] var row1: Row = _
+  private[this] var row2: Row = _
+
+  def this(left: Row, right: Row) = {
+    this()
+    row1 = left
+    row2 = right
+  }
+
   /** Updates this JoinedRow to used point at two new base rows.  Returns itself. */
   def apply(r1: Row, r2: Row): Row = {
     row1 = r1
@@ -102,7 +290,8 @@ class JoinedRow extends Row {
   def apply(i: Int) =
     if (i < row1.size) row1(i) else row2(i - row1.size)
 
-  def isNullAt(i: Int) = apply(i) == null
+  def isNullAt(i: Int) =
+    if (i < row1.size) row1.isNullAt(i) else row2.isNullAt(i - row1.size)
 
   def getInt(i: Int): Int =
     if (i < row1.size) row1.getInt(i) else row2.getInt(i - row1.size)
@@ -128,6 +317,181 @@ class JoinedRow extends Row {
   def getString(i: Int): String =
     if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
 
+  override def getAs[T](i: Int): T =
+    if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
+
+  def copy() = {
+    val totalSize = row1.size + row2.size
+    val copiedValues = new Array[Any](totalSize)
+    var i = 0
+    while(i < totalSize) {
+      copiedValues(i) = apply(i)
+      i += 1
+    }
+    new GenericRow(copiedValues)
+  }
+
+  override def toString() = {
+    val row = (if (row1 != null) row1 else Seq[Any]()) ++ (if (row2 != null) row2 else Seq[Any]())
+    s"[${row.mkString(",")}]"
+  }
+}
+
+/**
+ * JIT HACK: Replace with macros
+ */
+class JoinedRow4 extends Row {
+  private[this] var row1: Row = _
+  private[this] var row2: Row = _
+
+  def this(left: Row, right: Row) = {
+    this()
+    row1 = left
+    row2 = right
+  }
+
+  /** Updates this JoinedRow to used point at two new base rows.  Returns itself. */
+  def apply(r1: Row, r2: Row): Row = {
+    row1 = r1
+    row2 = r2
+    this
+  }
+
+  /** Updates this JoinedRow by updating its left base row.  Returns itself. */
+  def withLeft(newLeft: Row): Row = {
+    row1 = newLeft
+    this
+  }
+
+  /** Updates this JoinedRow by updating its right base row.  Returns itself. */
+  def withRight(newRight: Row): Row = {
+    row2 = newRight
+    this
+  }
+
+  def iterator = row1.iterator ++ row2.iterator
+
+  def length = row1.length + row2.length
+
+  def apply(i: Int) =
+    if (i < row1.size) row1(i) else row2(i - row1.size)
+
+  def isNullAt(i: Int) =
+    if (i < row1.size) row1.isNullAt(i) else row2.isNullAt(i - row1.size)
+
+  def getInt(i: Int): Int =
+    if (i < row1.size) row1.getInt(i) else row2.getInt(i - row1.size)
+
+  def getLong(i: Int): Long =
+    if (i < row1.size) row1.getLong(i) else row2.getLong(i - row1.size)
+
+  def getDouble(i: Int): Double =
+    if (i < row1.size) row1.getDouble(i) else row2.getDouble(i - row1.size)
+
+  def getBoolean(i: Int): Boolean =
+    if (i < row1.size) row1.getBoolean(i) else row2.getBoolean(i - row1.size)
+
+  def getShort(i: Int): Short =
+    if (i < row1.size) row1.getShort(i) else row2.getShort(i - row1.size)
+
+  def getByte(i: Int): Byte =
+    if (i < row1.size) row1.getByte(i) else row2.getByte(i - row1.size)
+
+  def getFloat(i: Int): Float =
+    if (i < row1.size) row1.getFloat(i) else row2.getFloat(i - row1.size)
+
+  def getString(i: Int): String =
+    if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
+
+  override def getAs[T](i: Int): T =
+    if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
+
+  def copy() = {
+    val totalSize = row1.size + row2.size
+    val copiedValues = new Array[Any](totalSize)
+    var i = 0
+    while(i < totalSize) {
+      copiedValues(i) = apply(i)
+      i += 1
+    }
+    new GenericRow(copiedValues)
+  }
+
+  override def toString() = {
+    val row = (if (row1 != null) row1 else Seq[Any]()) ++ (if (row2 != null) row2 else Seq[Any]())
+    s"[${row.mkString(",")}]"
+  }
+}
+
+/**
+ * JIT HACK: Replace with macros
+ */
+class JoinedRow5 extends Row {
+  private[this] var row1: Row = _
+  private[this] var row2: Row = _
+
+  def this(left: Row, right: Row) = {
+    this()
+    row1 = left
+    row2 = right
+  }
+
+  /** Updates this JoinedRow to used point at two new base rows.  Returns itself. */
+  def apply(r1: Row, r2: Row): Row = {
+    row1 = r1
+    row2 = r2
+    this
+  }
+
+  /** Updates this JoinedRow by updating its left base row.  Returns itself. */
+  def withLeft(newLeft: Row): Row = {
+    row1 = newLeft
+    this
+  }
+
+  /** Updates this JoinedRow by updating its right base row.  Returns itself. */
+  def withRight(newRight: Row): Row = {
+    row2 = newRight
+    this
+  }
+
+  def iterator = row1.iterator ++ row2.iterator
+
+  def length = row1.length + row2.length
+
+  def apply(i: Int) =
+    if (i < row1.size) row1(i) else row2(i - row1.size)
+
+  def isNullAt(i: Int) =
+    if (i < row1.size) row1.isNullAt(i) else row2.isNullAt(i - row1.size)
+
+  def getInt(i: Int): Int =
+    if (i < row1.size) row1.getInt(i) else row2.getInt(i - row1.size)
+
+  def getLong(i: Int): Long =
+    if (i < row1.size) row1.getLong(i) else row2.getLong(i - row1.size)
+
+  def getDouble(i: Int): Double =
+    if (i < row1.size) row1.getDouble(i) else row2.getDouble(i - row1.size)
+
+  def getBoolean(i: Int): Boolean =
+    if (i < row1.size) row1.getBoolean(i) else row2.getBoolean(i - row1.size)
+
+  def getShort(i: Int): Short =
+    if (i < row1.size) row1.getShort(i) else row2.getShort(i - row1.size)
+
+  def getByte(i: Int): Byte =
+    if (i < row1.size) row1.getByte(i) else row2.getByte(i - row1.size)
+
+  def getFloat(i: Int): Float =
+    if (i < row1.size) row1.getFloat(i) else row2.getFloat(i - row1.size)
+
+  def getString(i: Int): String =
+    if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
+
+  override def getAs[T](i: Int): T =
+    if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
+
   def copy() = {
     val totalSize = row1.size + row2.size
     val copiedValues = new Array[Any](totalSize)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Rand.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Rand.scala
index 38f836f0a1a0e..851db95b9177e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Rand.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Rand.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.types.DoubleType
 case object Rand extends LeafExpression {
   override def dataType = DoubleType
   override def nullable = false
-  override def references = Set.empty
 
   private[this] lazy val rand = new Random
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala
index 74ae723686cfe..d00ec39774c35 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala
@@ -32,6 +32,16 @@ object Row {
    * }}}
    */
   def unapplySeq(row: Row): Some[Seq[Any]] = Some(row)
+
+  /**
+   * This method can be used to construct a [[Row]] with the given values.
+   */
+  def apply(values: Any*): Row = new GenericRow(values.toArray)
+
+  /**
+   * This method can be used to construct a [[Row]] from a [[Seq]] of values.
+   */
+  def fromSeq(values: Seq[Any]): Row = new GenericRow(values.toArray)
 }
 
 /**
@@ -54,6 +64,7 @@ trait Row extends Seq[Any] with Serializable {
   def getShort(i: Int): Short
   def getByte(i: Int): Byte
   def getString(i: Int): String
+  def getAs[T](i: Int): T = apply(i).asInstanceOf[T]
 
   override def toString() =
     s"[${this.mkString(",")}]"
@@ -88,15 +99,6 @@ trait MutableRow extends Row {
   def setByte(ordinal: Int, value: Byte)
   def setFloat(ordinal: Int, value: Float)
   def setString(ordinal: Int, value: String)
-
-  /**
-   * Experimental
-   *
-   * Returns a mutable string builder for the specified column.  A given row should return the
-   * result of any mutations made to the returned buffer next time getString is called for the same
-   * column.
-   */
-  def getStringBuilder(ordinal: Int): StringBuilder
 }
 
 /**
@@ -117,6 +119,7 @@ object EmptyRow extends Row {
   def getShort(i: Int): Short = throw new UnsupportedOperationException
   def getByte(i: Int): Byte = throw new UnsupportedOperationException
   def getString(i: Int): String = throw new UnsupportedOperationException
+  override def getAs[T](i: Int): T = throw new UnsupportedOperationException
 
   def copy() = this
 }
@@ -126,7 +129,7 @@ object EmptyRow extends Row {
  * the array is not copied, and thus could technically be mutated after creation, this is not
  * allowed.
  */
-class GenericRow(protected[catalyst] val values: Array[Any]) extends Row {
+class GenericRow(protected[sql] val values: Array[Any]) extends Row {
   /** No-arg constructor for serialization. */
   def this() = this(null)
 
@@ -180,6 +183,35 @@ class GenericRow(protected[catalyst] val values: Array[Any]) extends Row {
     values(i).asInstanceOf[String]
   }
 
+  // Custom hashCode function that matches the efficient code generated version.
+  override def hashCode(): Int = {
+    var result: Int = 37
+
+    var i = 0
+    while (i < values.length) {
+      val update: Int =
+        if (isNullAt(i)) {
+          0
+        } else {
+          apply(i) match {
+            case b: Boolean => if (b) 0 else 1
+            case b: Byte => b.toInt
+            case s: Short => s.toInt
+            case i: Int => i
+            case l: Long => (l ^ (l >>> 32)).toInt
+            case f: Float => java.lang.Float.floatToIntBits(f)
+            case d: Double =>
+              val b = java.lang.Double.doubleToLongBits(d)
+              (b ^ (b >>> 32)).toInt
+            case other => other.hashCode()
+          }
+        }
+      result = 37 * result + update
+      i += 1
+    }
+    result
+  }
+
   def copy() = this
 }
 
@@ -187,21 +219,19 @@ class GenericMutableRow(size: Int) extends GenericRow(size) with MutableRow {
   /** No-arg constructor for serialization. */
   def this() = this(0)
 
-  def getStringBuilder(ordinal: Int): StringBuilder = ???
-
-  override def setBoolean(ordinal: Int,value: Boolean): Unit = { values(ordinal) = value }
-  override def setByte(ordinal: Int,value: Byte): Unit = { values(ordinal) = value }
-  override def setDouble(ordinal: Int,value: Double): Unit = { values(ordinal) = value }
-  override def setFloat(ordinal: Int,value: Float): Unit = { values(ordinal) = value }
-  override def setInt(ordinal: Int,value: Int): Unit = { values(ordinal) = value }
-  override def setLong(ordinal: Int,value: Long): Unit = { values(ordinal) = value }
-  override def setString(ordinal: Int,value: String): Unit = { values(ordinal) = value }
+  override def setBoolean(ordinal: Int, value: Boolean): Unit = { values(ordinal) = value }
+  override def setByte(ordinal: Int, value: Byte): Unit = { values(ordinal) = value }
+  override def setDouble(ordinal: Int, value: Double): Unit = { values(ordinal) = value }
+  override def setFloat(ordinal: Int, value: Float): Unit = { values(ordinal) = value }
+  override def setInt(ordinal: Int, value: Int): Unit = { values(ordinal) = value }
+  override def setLong(ordinal: Int, value: Long): Unit = { values(ordinal) = value }
+  override def setString(ordinal: Int, value: String): Unit = { values(ordinal) = value }
 
   override def setNullAt(i: Int): Unit = { values(i) = null }
 
-  override def setShort(ordinal: Int,value: Short): Unit = { values(ordinal) = value }
+  override def setShort(ordinal: Int, value: Short): Unit = { values(ordinal) = value }
 
-  override def update(ordinal: Int,value: Any): Unit = { values(ordinal) = value }
+  override def update(ordinal: Int, value: Any): Unit = { values(ordinal) = value }
 
   override def copy() = new GenericRow(values.clone())
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
index 5e089f7618e0a..18c96da2f87fb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
@@ -17,23 +17,386 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.types.DataType
+import org.apache.spark.util.ClosureCleaner
 
+/**
+ * User-defined function.
+ * @param dataType  Return type of function.
+ */
 case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expression])
   extends Expression {
 
   type EvaluatedType = Any
 
-  def references = children.flatMap(_.references).toSet
   def nullable = true
 
+  override def toString = s"scalaUDF(${children.mkString(",")})"
+
+  // scalastyle:off
+
+  /** This method has been generated by this script
+
+    (1 to 22).map { x =>
+      val anys = (1 to x).map(x => "Any").reduce(_ + ", " + _)
+      val evals = (0 to x - 1).map(x => s"    ScalaReflection.convertToScala(children($x).eval(input), children($x).dataType)").reduce(_ + ",\n    " + _)
+
+    s"""
+    case $x =>
+      function.asInstanceOf[($anys) => Any](
+    $evals)
+    """
+    }.foreach(println)
+
+  */
+
   override def eval(input: Row): Any = {
-    children.size match {
-      case 1 => function.asInstanceOf[(Any) => Any](children(0).eval(input))
+    val result = children.size match {
+      case 0 => function.asInstanceOf[() => Any]()
+      case 1 =>
+        function.asInstanceOf[(Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType))
+
+
       case 2 =>
         function.asInstanceOf[(Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType))
+
+
+      case 3 =>
+        function.asInstanceOf[(Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType))
+
+
+      case 4 =>
+        function.asInstanceOf[(Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType))
+
+
+      case 5 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType))
+
+
+      case 6 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType))
+
+
+      case 7 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType))
+
+
+      case 8 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType))
+
+
+      case 9 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType))
+
+
+      case 10 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType))
+
+
+      case 11 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType))
+
+
+      case 12 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType))
+
+
+      case 13 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType))
+
+
+      case 14 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType))
+
+
+      case 15 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType))
+
+
+      case 16 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType))
+
+
+      case 17 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType))
+
+
+      case 18 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType),
+          ScalaReflection.convertToScala(children(17).eval(input), children(17).dataType))
+
+
+      case 19 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType),
+          ScalaReflection.convertToScala(children(17).eval(input), children(17).dataType),
+          ScalaReflection.convertToScala(children(18).eval(input), children(18).dataType))
+
+
+      case 20 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType),
+          ScalaReflection.convertToScala(children(17).eval(input), children(17).dataType),
+          ScalaReflection.convertToScala(children(18).eval(input), children(18).dataType),
+          ScalaReflection.convertToScala(children(19).eval(input), children(19).dataType))
+
+
+      case 21 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType),
+          ScalaReflection.convertToScala(children(17).eval(input), children(17).dataType),
+          ScalaReflection.convertToScala(children(18).eval(input), children(18).dataType),
+          ScalaReflection.convertToScala(children(19).eval(input), children(19).dataType),
+          ScalaReflection.convertToScala(children(20).eval(input), children(20).dataType))
+
+
+      case 22 =>
+        function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType),
+          ScalaReflection.convertToScala(children(17).eval(input), children(17).dataType),
+          ScalaReflection.convertToScala(children(18).eval(input), children(18).dataType),
+          ScalaReflection.convertToScala(children(19).eval(input), children(19).dataType),
+          ScalaReflection.convertToScala(children(20).eval(input), children(20).dataType),
+          ScalaReflection.convertToScala(children(21).eval(input), children(21).dataType))
+
     }
+    // scalastyle:on
+
+    ScalaReflection.convertToCatalyst(result, dataType)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
index d2b7685e73065..d00b2ac09745c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
@@ -31,7 +31,6 @@ case object Descending extends SortDirection
 case class SortOrder(child: Expression, direction: SortDirection) extends Expression 
     with trees.UnaryNode[Expression] {
 
-  override def references = child.references
   override def dataType = child.dataType
   override def nullable = child.nullable
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificMutableRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificMutableRow.scala
new file mode 100644
index 0000000000000..570379c533e1f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificMutableRow.scala
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.types._
+
+/**
+ * A parent class for mutable container objects that are reused when the values are changed,
+ * resulting in less garbage.  These values are held by a [[SpecificMutableRow]].
+ *
+ * The following code was roughly used to generate these objects:
+ * {{{
+ * val types = "Int,Float,Boolean,Double,Short,Long,Byte,Any".split(",")
+ * types.map {tpe =>
+ * s"""
+ * final class Mutable$tpe extends MutableValue {
+ *   var value: $tpe = 0
+ *   def boxed = if (isNull) null else value
+ *   def update(v: Any) = value = {
+ *     isNull = false
+ *     v.asInstanceOf[$tpe]
+ *   }
+ *   def copy() = {
+ *     val newCopy = new Mutable$tpe
+ *     newCopy.isNull = isNull
+ *     newCopy.value = value
+ *     newCopy.asInstanceOf[this.type]
+ *   }
+ * }"""
+ * }.foreach(println)
+ *
+ * types.map { tpe =>
+ * s"""
+ *   override def set$tpe(ordinal: Int, value: $tpe): Unit = {
+ *     val currentValue = values(ordinal).asInstanceOf[Mutable$tpe]
+ *     currentValue.isNull = false
+ *     currentValue.value = value
+ *   }
+ *
+ *   override def get$tpe(i: Int): $tpe = {
+ *     values(i).asInstanceOf[Mutable$tpe].value
+ *   }"""
+ * }.foreach(println)
+ * }}}
+ */
+abstract class MutableValue extends Serializable {
+  var isNull: Boolean = true
+  def boxed: Any
+  def update(v: Any)
+  def copy(): this.type
+}
+
+final class MutableInt extends MutableValue {
+  var value: Int = 0
+  def boxed = if (isNull) null else value
+  def update(v: Any) = value = {
+    isNull = false
+    v.asInstanceOf[Int]
+  }
+  def copy() = {
+    val newCopy = new MutableInt
+    newCopy.isNull = isNull
+    newCopy.value = value
+    newCopy.asInstanceOf[this.type]
+  }
+}
+
+final class MutableFloat extends MutableValue {
+  var value: Float = 0
+  def boxed = if (isNull) null else value
+  def update(v: Any) = value = {
+    isNull = false
+    v.asInstanceOf[Float]
+  }
+  def copy() = {
+    val newCopy = new MutableFloat
+    newCopy.isNull = isNull
+    newCopy.value = value
+    newCopy.asInstanceOf[this.type]
+  }
+}
+
+final class MutableBoolean extends MutableValue {
+  var value: Boolean = false
+  def boxed = if (isNull) null else value
+  def update(v: Any) = value = {
+    isNull = false
+    v.asInstanceOf[Boolean]
+  }
+  def copy() = {
+    val newCopy = new MutableBoolean
+    newCopy.isNull = isNull
+    newCopy.value = value
+    newCopy.asInstanceOf[this.type]
+  }
+}
+
+final class MutableDouble extends MutableValue {
+  var value: Double = 0
+  def boxed = if (isNull) null else value
+  def update(v: Any) = value = {
+    isNull = false
+    v.asInstanceOf[Double]
+  }
+  def copy() = {
+    val newCopy = new MutableDouble
+    newCopy.isNull = isNull
+    newCopy.value = value
+    newCopy.asInstanceOf[this.type]
+  }
+}
+
+final class MutableShort extends MutableValue {
+  var value: Short = 0
+  def boxed = if (isNull) null else value
+  def update(v: Any) = value = {
+    isNull = false
+    v.asInstanceOf[Short]
+  }
+  def copy() = {
+    val newCopy = new MutableShort
+    newCopy.isNull = isNull
+    newCopy.value = value
+    newCopy.asInstanceOf[this.type]
+  }
+}
+
+final class MutableLong extends MutableValue {
+  var value: Long = 0
+  def boxed = if (isNull) null else value
+  def update(v: Any) = value = {
+    isNull = false
+    v.asInstanceOf[Long]
+  }
+  def copy() = {
+    val newCopy = new MutableLong
+    newCopy.isNull = isNull
+    newCopy.value = value
+    newCopy.asInstanceOf[this.type]
+  }
+}
+
+final class MutableByte extends MutableValue {
+  var value: Byte = 0
+  def boxed = if (isNull) null else value
+  def update(v: Any) = value = {
+    isNull = false
+    v.asInstanceOf[Byte]
+  }
+  def copy() = {
+    val newCopy = new MutableByte
+    newCopy.isNull = isNull
+    newCopy.value = value
+    newCopy.asInstanceOf[this.type]
+  }
+}
+
+final class MutableAny extends MutableValue {
+  var value: Any = _
+  def boxed = if (isNull) null else value
+  def update(v: Any) = value = {
+    isNull = false
+    v.asInstanceOf[Any]
+  }
+  def copy() = {
+    val newCopy = new MutableAny
+    newCopy.isNull = isNull
+    newCopy.value = value
+    newCopy.asInstanceOf[this.type]
+  }
+}
+
+/**
+ * A row type that holds an array specialized container objects, of type [[MutableValue]], chosen
+ * based on the dataTypes of each column.  The intent is to decrease garbage when modifying the
+ * values of primitive columns.
+ */
+final class SpecificMutableRow(val values: Array[MutableValue]) extends MutableRow {
+
+  def this(dataTypes: Seq[DataType]) =
+    this(
+      dataTypes.map {
+        case IntegerType => new MutableInt
+        case ByteType => new MutableByte
+        case FloatType => new MutableFloat
+        case ShortType => new MutableShort
+        case DoubleType => new MutableDouble
+        case BooleanType => new MutableBoolean
+        case LongType => new MutableLong
+        case _ => new MutableAny
+      }.toArray)
+
+  def this() = this(Seq.empty)
+
+  override def length: Int = values.length
+
+  override def setNullAt(i: Int): Unit = {
+    values(i).isNull = true
+  }
+
+  override def apply(i: Int): Any = values(i).boxed
+
+  override def isNullAt(i: Int): Boolean = values(i).isNull
+
+  override def copy(): Row = {
+    val newValues = new Array[MutableValue](values.length)
+    var i = 0
+    while (i < values.length) {
+      newValues(i) = values(i).copy()
+      i += 1
+    }
+    new SpecificMutableRow(newValues)
+  }
+
+  override def update(ordinal: Int, value: Any): Unit = {
+    if (value == null) setNullAt(ordinal) else values(ordinal).update(value)
+  }
+
+  override def iterator: Iterator[Any] = values.map(_.boxed).iterator
+
+  override def setString(ordinal: Int, value: String) = update(ordinal, value)
+
+  override def getString(ordinal: Int) = apply(ordinal).asInstanceOf[String]
+
+  override def setInt(ordinal: Int, value: Int): Unit = {
+    val currentValue = values(ordinal).asInstanceOf[MutableInt]
+    currentValue.isNull = false
+    currentValue.value = value
+  }
+
+  override def getInt(i: Int): Int = {
+    values(i).asInstanceOf[MutableInt].value
+  }
+
+  override def setFloat(ordinal: Int, value: Float): Unit = {
+    val currentValue = values(ordinal).asInstanceOf[MutableFloat]
+    currentValue.isNull = false
+    currentValue.value = value
+  }
+
+  override def getFloat(i: Int): Float = {
+    values(i).asInstanceOf[MutableFloat].value
+  }
+
+  override def setBoolean(ordinal: Int, value: Boolean): Unit = {
+    val currentValue = values(ordinal).asInstanceOf[MutableBoolean]
+    currentValue.isNull = false
+    currentValue.value = value
+  }
+
+  override def getBoolean(i: Int): Boolean = {
+    values(i).asInstanceOf[MutableBoolean].value
+  }
+
+  override def setDouble(ordinal: Int, value: Double): Unit = {
+    val currentValue = values(ordinal).asInstanceOf[MutableDouble]
+    currentValue.isNull = false
+    currentValue.value = value
+  }
+
+  override def getDouble(i: Int): Double = {
+    values(i).asInstanceOf[MutableDouble].value
+  }
+
+  override def setShort(ordinal: Int, value: Short): Unit = {
+    val currentValue = values(ordinal).asInstanceOf[MutableShort]
+    currentValue.isNull = false
+    currentValue.value = value
+  }
+
+  override def getShort(i: Int): Short = {
+    values(i).asInstanceOf[MutableShort].value
+  }
+
+  override def setLong(ordinal: Int, value: Long): Unit = {
+    val currentValue = values(ordinal).asInstanceOf[MutableLong]
+    currentValue.isNull = false
+    currentValue.value = value
+  }
+
+  override def getLong(i: Int): Long = {
+    values(i).asInstanceOf[MutableLong].value
+  }
+
+  override def setByte(ordinal: Int, value: Byte): Unit = {
+    val currentValue = values(ordinal).asInstanceOf[MutableByte]
+    currentValue.isNull = false
+    currentValue.value = value
+  }
+
+  override def getByte(i: Int): Byte = {
+    values(i).asInstanceOf[MutableByte].value
+  }
+
+  override def getAs[T](i: Int): T = {
+    values(i).boxed.asInstanceOf[T]
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/WrapDynamic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/WrapDynamic.scala
index e787c59e75723..1a4ac06c7a79d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/WrapDynamic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/WrapDynamic.scala
@@ -21,13 +21,19 @@ import scala.language.dynamics
 
 import org.apache.spark.sql.catalyst.types.DataType
 
+/**
+ * The data type representing [[DynamicRow]] values.
+ */
 case object DynamicType extends DataType
 
+/**
+ * Wrap a [[Row]] as a [[DynamicRow]].
+ */
 case class WrapDynamic(children: Seq[Attribute]) extends Expression {
   type EvaluatedType = DynamicRow
 
   def nullable = false
-  def references = children.toSet
+
   def dataType = DynamicType
 
   override def eval(input: Row): DynamicRow = input match {
@@ -37,6 +43,11 @@ case class WrapDynamic(children: Seq[Attribute]) extends Expression {
   }
 }
 
+/**
+ * DynamicRows use scala's Dynamic trait to emulate an ORM of in a dynamically typed language.
+ * Since the type of the column is not known at compile time, all attributes are converted to
+ * strings before being passed to the function.
+ */
 class DynamicRow(val schema: Seq[Attribute], values: Array[Any])
   extends GenericRow(values) with Dynamic {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
old mode 100644
new mode 100755
index 01947273b6ccc..3ceb5ecaf66e4
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
@@ -22,6 +22,7 @@ import com.clearspring.analytics.stream.cardinality.HyperLogLog
 import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.trees
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
+import org.apache.spark.util.collection.OpenHashSet
 
 abstract class AggregateExpression extends Expression {
   self: Product =>
@@ -77,7 +78,7 @@ abstract class AggregateFunction
 
   /** Base should return the generic aggregate expression that this function is computing */
   val base: AggregateExpression
-  override def references = base.references
+
   override def nullable = base.nullable
   override def dataType = base.dataType
 
@@ -88,7 +89,7 @@ abstract class AggregateFunction
 }
 
 case class Min(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] {
-  override def references = child.references
+
   override def nullable = true
   override def dataType = child.dataType
   override def toString = s"MIN($child)"
@@ -104,21 +105,22 @@ case class Min(child: Expression) extends PartialAggregate with trees.UnaryNode[
 case class MinFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction {
   def this() = this(null, null) // Required for serialization.
 
-  var currentMin: Any = _
+  val currentMin: MutableLiteral = MutableLiteral(null, expr.dataType)
+  val cmp = GreaterThan(currentMin, expr)
 
   override def update(input: Row): Unit = {
-    if (currentMin == null) {
-      currentMin = expr.eval(input)
-    } else if(GreaterThan(Literal(currentMin, expr.dataType), expr).eval(input) == true) {
-      currentMin = expr.eval(input)
+    if (currentMin.value == null) {
+      currentMin.value = expr.eval(input)
+    } else if(cmp.eval(input) == true) {
+      currentMin.value = expr.eval(input)
     }
   }
 
-  override def eval(input: Row): Any = currentMin
+  override def eval(input: Row): Any = currentMin.value
 }
 
 case class Max(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] {
-  override def references = child.references
+
   override def nullable = true
   override def dataType = child.dataType
   override def toString = s"MAX($child)"
@@ -134,21 +136,22 @@ case class Max(child: Expression) extends PartialAggregate with trees.UnaryNode[
 case class MaxFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction {
   def this() = this(null, null) // Required for serialization.
 
-  var currentMax: Any = _
+  val currentMax: MutableLiteral = MutableLiteral(null, expr.dataType)
+  val cmp = LessThan(currentMax, expr)
 
   override def update(input: Row): Unit = {
-    if (currentMax == null) {
-      currentMax = expr.eval(input)
-    } else if(LessThan(Literal(currentMax, expr.dataType), expr).eval(input) == true) {
-      currentMax = expr.eval(input)
+    if (currentMax.value == null) {
+      currentMax.value = expr.eval(input)
+    } else if(cmp.eval(input) == true) {
+      currentMax.value = expr.eval(input)
     }
   }
 
-  override def eval(input: Row): Any = currentMax
+  override def eval(input: Row): Any = currentMax.value
 }
 
 case class Count(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] {
-  override def references = child.references
+
   override def nullable = false
   override def dataType = LongType
   override def toString = s"COUNT($child)"
@@ -161,18 +164,91 @@ case class Count(child: Expression) extends PartialAggregate with trees.UnaryNod
   override def newInstance() = new CountFunction(child, this)
 }
 
-case class CountDistinct(expressions: Seq[Expression]) extends AggregateExpression {
+case class CountDistinct(expressions: Seq[Expression]) extends PartialAggregate {
+  def this() = this(null)
+
   override def children = expressions
-  override def references = expressions.flatMap(_.references).toSet
+
   override def nullable = false
   override def dataType = LongType
   override def toString = s"COUNT(DISTINCT ${expressions.mkString(",")})"
   override def newInstance() = new CountDistinctFunction(expressions, this)
+
+  override def asPartial = {
+    val partialSet = Alias(CollectHashSet(expressions), "partialSets")()
+    SplitEvaluation(
+      CombineSetsAndCount(partialSet.toAttribute),
+      partialSet :: Nil)
+  }
+}
+
+case class CollectHashSet(expressions: Seq[Expression]) extends AggregateExpression {
+  def this() = this(null)
+
+  override def children = expressions
+  override def nullable = false
+  override def dataType = ArrayType(expressions.head.dataType)
+  override def toString = s"AddToHashSet(${expressions.mkString(",")})"
+  override def newInstance() = new CollectHashSetFunction(expressions, this)
+}
+
+case class CollectHashSetFunction(
+    @transient expr: Seq[Expression],
+    @transient base: AggregateExpression)
+  extends AggregateFunction {
+
+  def this() = this(null, null) // Required for serialization.
+
+  val seen = new OpenHashSet[Any]()
+
+  @transient
+  val distinctValue = new InterpretedProjection(expr)
+
+  override def update(input: Row): Unit = {
+    val evaluatedExpr = distinctValue(input)
+    if (!evaluatedExpr.anyNull) {
+      seen.add(evaluatedExpr)
+    }
+  }
+
+  override def eval(input: Row): Any = {
+    seen
+  }
+}
+
+case class CombineSetsAndCount(inputSet: Expression) extends AggregateExpression {
+  def this() = this(null)
+
+  override def children = inputSet :: Nil
+  override def nullable = false
+  override def dataType = LongType
+  override def toString = s"CombineAndCount($inputSet)"
+  override def newInstance() = new CombineSetsAndCountFunction(inputSet, this)
+}
+
+case class CombineSetsAndCountFunction(
+    @transient inputSet: Expression,
+    @transient base: AggregateExpression)
+  extends AggregateFunction {
+
+  def this() = this(null, null) // Required for serialization.
+
+  val seen = new OpenHashSet[Any]()
+
+  override def update(input: Row): Unit = {
+    val inputSetEval = inputSet.eval(input).asInstanceOf[OpenHashSet[Any]]
+    val inputIterator = inputSetEval.iterator
+    while (inputIterator.hasNext) {
+      seen.add(inputIterator.next)
+    }
+  }
+
+  override def eval(input: Row): Any = seen.size.toLong
 }
 
 case class ApproxCountDistinctPartition(child: Expression, relativeSD: Double)
   extends AggregateExpression with trees.UnaryNode[Expression] {
-  override def references = child.references
+
   override def nullable = false
   override def dataType = child.dataType
   override def toString = s"APPROXIMATE COUNT(DISTINCT $child)"
@@ -181,7 +257,7 @@ case class ApproxCountDistinctPartition(child: Expression, relativeSD: Double)
 
 case class ApproxCountDistinctMerge(child: Expression, relativeSD: Double)
   extends AggregateExpression with trees.UnaryNode[Expression] {
-  override def references = child.references
+
   override def nullable = false
   override def dataType = LongType
   override def toString = s"APPROXIMATE COUNT(DISTINCT $child)"
@@ -190,7 +266,7 @@ case class ApproxCountDistinctMerge(child: Expression, relativeSD: Double)
 
 case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05)
   extends PartialAggregate with trees.UnaryNode[Expression] {
-  override def references = child.references
+
   override def nullable = false
   override def dataType = LongType
   override def toString = s"APPROXIMATE COUNT(DISTINCT $child)"
@@ -208,29 +284,58 @@ case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05)
 }
 
 case class Average(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] {
-  override def references = child.references
+
   override def nullable = false
-  override def dataType = DoubleType
+
+  override def dataType = child.dataType match {
+    case DecimalType.Fixed(precision, scale) =>
+      DecimalType(precision + 4, scale + 4)  // Add 4 digits after decimal point, like Hive
+    case DecimalType.Unlimited =>
+      DecimalType.Unlimited
+    case _ =>
+      DoubleType
+  }
+
   override def toString = s"AVG($child)"
 
   override def asPartial: SplitEvaluation = {
     val partialSum = Alias(Sum(child), "PartialSum")()
     val partialCount = Alias(Count(child), "PartialCount")()
-    val castedSum = Cast(Sum(partialSum.toAttribute), dataType)
-    val castedCount = Cast(Sum(partialCount.toAttribute), dataType)
 
-    SplitEvaluation(
-      Divide(castedSum, castedCount),
-      partialCount :: partialSum :: Nil)
+    child.dataType match {
+      case DecimalType.Fixed(_, _) =>
+        // Turn the results to unlimited decimals for the division, before going back to fixed
+        val castedSum = Cast(Sum(partialSum.toAttribute), DecimalType.Unlimited)
+        val castedCount = Cast(Sum(partialCount.toAttribute), DecimalType.Unlimited)
+        SplitEvaluation(
+          Cast(Divide(castedSum, castedCount), dataType),
+          partialCount :: partialSum :: Nil)
+
+      case _ =>
+        val castedSum = Cast(Sum(partialSum.toAttribute), dataType)
+        val castedCount = Cast(Sum(partialCount.toAttribute), dataType)
+        SplitEvaluation(
+          Divide(castedSum, castedCount),
+          partialCount :: partialSum :: Nil)
+    }
   }
 
   override def newInstance() = new AverageFunction(child, this)
 }
 
 case class Sum(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] {
-  override def references = child.references
+
   override def nullable = false
-  override def dataType = child.dataType
+
+  override def dataType = child.dataType match {
+    case DecimalType.Fixed(precision, scale) =>
+      DecimalType(precision + 10, scale)  // Add 10 digits left of decimal point, like Hive
+    case DecimalType.Unlimited =>
+      DecimalType.Unlimited
+    case _ =>
+      child.dataType
+  }
+
   override def toString = s"SUM($child)"
 
   override def asPartial: SplitEvaluation = {
@@ -246,16 +351,23 @@ case class Sum(child: Expression) extends PartialAggregate with trees.UnaryNode[
 case class SumDistinct(child: Expression)
   extends AggregateExpression with trees.UnaryNode[Expression] {
 
-  override def references = child.references
   override def nullable = false
-  override def dataType = child.dataType
+
+  override def dataType = child.dataType match {
+    case DecimalType.Fixed(precision, scale) =>
+      DecimalType(precision + 10, scale)  // Add 10 digits left of decimal point, like Hive
+    case DecimalType.Unlimited =>
+      DecimalType.Unlimited
+    case _ =>
+      child.dataType
+  }
+
   override def toString = s"SUM(DISTINCT $child)"
 
   override def newInstance() = new SumDistinctFunction(child, this)
 }
 
 case class First(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] {
-  override def references = child.references
   override def nullable = true
   override def dataType = child.dataType
   override def toString = s"FIRST($child)"
@@ -269,6 +381,21 @@ case class First(child: Expression) extends PartialAggregate with trees.UnaryNod
   override def newInstance() = new FirstFunction(child, this)
 }
 
+case class Last(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] {
+  override def references = child.references
+  override def nullable = true
+  override def dataType = child.dataType
+  override def toString = s"LAST($child)"
+
+  override def asPartial: SplitEvaluation = {
+    val partialLast = Alias(Last(child), "PartialLast")()
+    SplitEvaluation(
+      Last(partialLast.toAttribute),
+      partialLast :: Nil)
+  }
+  override def newInstance() = new LastFunction(child, this)
+}
+
 case class AverageFunction(expr: Expression, base: AggregateExpression)
   extends AggregateFunction {
 
@@ -277,7 +404,7 @@ case class AverageFunction(expr: Expression, base: AggregateExpression)
   private val zero = Cast(Literal(0), expr.dataType)
 
   private var count: Long = _
-  private val sum = MutableLiteral(zero.eval(EmptyRow))
+  private val sum = MutableLiteral(zero.eval(null), expr.dataType)
   private val sumAsDouble = Cast(sum, DoubleType)
 
   private def addFunction(value: Any) = Add(sum, Literal(value))
@@ -350,7 +477,7 @@ case class SumFunction(expr: Expression, base: AggregateExpression) extends Aggr
 
   private val zero = Cast(Literal(0), expr.dataType)
 
-  private val sum = MutableLiteral(zero.eval(null))
+  private val sum = MutableLiteral(zero.eval(null), expr.dataType)
 
   private val addFunction = Add(sum, Coalesce(Seq(expr, zero)))
 
@@ -379,17 +506,22 @@ case class SumDistinctFunction(expr: Expression, base: AggregateExpression)
     seen.reduceLeft(base.dataType.asInstanceOf[NumericType].numeric.asInstanceOf[Numeric[Any]].plus)
 }
 
-case class CountDistinctFunction(expr: Seq[Expression], base: AggregateExpression)
+case class CountDistinctFunction(
+    @transient expr: Seq[Expression],
+    @transient base: AggregateExpression)
   extends AggregateFunction {
 
   def this() = this(null, null) // Required for serialization.
 
-  val seen = new scala.collection.mutable.HashSet[Any]()
+  val seen = new OpenHashSet[Any]()
+
+  @transient
+  val distinctValue = new InterpretedProjection(expr)
 
   override def update(input: Row): Unit = {
-    val evaluatedExpr = expr.map(_.eval(input))
-    if (evaluatedExpr.map(_ != null).reduceLeft(_ && _)) {
-      seen += evaluatedExpr
+    val evaluatedExpr = distinctValue(input)
+    if (!evaluatedExpr.anyNull) {
+      seen.add(evaluatedExpr)
     }
   }
 
@@ -409,3 +541,16 @@ case class FirstFunction(expr: Expression, base: AggregateExpression) extends Ag
 
   override def eval(input: Row): Any = result
 }
+
+case class LastFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction {
+  def this() = this(null, null) // Required for serialization.
+
+  var result: Any = null
+
+  override def update(input: Row): Unit = {
+    result = input
+  }
+
+  override def eval(input: Row): Any =  if (result != null) expr.eval(result.asInstanceOf[Row])
+                                        else null
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index c79c1847cedf5..d17c9553ac24e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.analysis.UnresolvedException
 import org.apache.spark.sql.catalyst.types._
+import scala.math.pow
 
 case class UnaryMinus(child: Expression) extends UnaryExpression {
   type EvaluatedType = Any
@@ -33,6 +34,19 @@ case class UnaryMinus(child: Expression) extends UnaryExpression {
   }
 }
 
+case class Sqrt(child: Expression) extends UnaryExpression {
+  type EvaluatedType = Any
+
+  def dataType = DoubleType
+  override def foldable = child.foldable
+  def nullable = child.nullable
+  override def toString = s"SQRT($child)"
+
+  override def eval(input: Row): Any = {
+    n1(child, input, ((na,a) => math.sqrt(na.toDouble(a))))
+  }
+}
+
 abstract class BinaryArithmetic extends BinaryExpression {
   self: Product =>
 
@@ -41,7 +55,9 @@ abstract class BinaryArithmetic extends BinaryExpression {
   def nullable = left.nullable || right.nullable
 
   override lazy val resolved =
-    left.resolved && right.resolved && left.dataType == right.dataType
+    left.resolved && right.resolved &&
+    left.dataType == right.dataType &&
+    !DecimalType.isFixed(left.dataType)
 
   def dataType = {
     if (!resolved) {
@@ -50,6 +66,23 @@ abstract class BinaryArithmetic extends BinaryExpression {
     }
     left.dataType
   }
+
+  override def eval(input: Row): Any = {
+    val evalE1 = left.eval(input)
+    if(evalE1 == null) {
+      null
+    } else {
+      val evalE2 = right.eval(input)
+      if (evalE2 == null) {
+        null
+      } else {
+        evalInternal(evalE1, evalE2)
+      }
+    }
+  }
+
+  def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any =
+    sys.error(s"BinaryExpressions must either override eval or evalInternal")
 }
 
 case class Add(left: Expression, right: Expression) extends BinaryArithmetic {
@@ -73,6 +106,8 @@ case class Multiply(left: Expression, right: Expression) extends BinaryArithmeti
 case class Divide(left: Expression, right: Expression) extends BinaryArithmetic {
   def symbol = "/"
 
+  override def nullable = left.nullable || right.nullable || dataType.isInstanceOf[DecimalType]
+
   override def eval(input: Row): Any = dataType match {
     case _: FractionalType => f2(input, left, right, _.div(_, _))
     case _: IntegralType => i2(input, left , right, _.quot(_, _))
@@ -83,5 +118,124 @@ case class Divide(left: Expression, right: Expression) extends BinaryArithmetic
 case class Remainder(left: Expression, right: Expression) extends BinaryArithmetic {
   def symbol = "%"
 
+  override def nullable = left.nullable || right.nullable || dataType.isInstanceOf[DecimalType]
+
   override def eval(input: Row): Any = i2(input, left, right, _.rem(_, _))
 }
+
+/**
+ * A function that calculates bitwise and(&) of two numbers.
+ */
+case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic {
+  def symbol = "&"
+
+  override def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any = dataType match {
+    case ByteType => (evalE1.asInstanceOf[Byte] & evalE2.asInstanceOf[Byte]).toByte
+    case ShortType => (evalE1.asInstanceOf[Short] & evalE2.asInstanceOf[Short]).toShort
+    case IntegerType => evalE1.asInstanceOf[Int] & evalE2.asInstanceOf[Int]
+    case LongType => evalE1.asInstanceOf[Long] & evalE2.asInstanceOf[Long]
+    case other => sys.error(s"Unsupported bitwise & operation on ${other}")
+  }
+}
+
+/**
+ * A function that calculates bitwise or(|) of two numbers.
+ */
+case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic {
+  def symbol = "|"
+
+  override def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any = dataType match {
+    case ByteType => (evalE1.asInstanceOf[Byte] | evalE2.asInstanceOf[Byte]).toByte
+    case ShortType => (evalE1.asInstanceOf[Short] | evalE2.asInstanceOf[Short]).toShort
+    case IntegerType => evalE1.asInstanceOf[Int] | evalE2.asInstanceOf[Int]
+    case LongType => evalE1.asInstanceOf[Long] | evalE2.asInstanceOf[Long]
+    case other => sys.error(s"Unsupported bitwise | operation on ${other}")
+  }
+}
+
+/**
+ * A function that calculates bitwise xor(^) of two numbers.
+ */
+case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic {
+  def symbol = "^"
+
+  override def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any = dataType match {
+    case ByteType => (evalE1.asInstanceOf[Byte] ^ evalE2.asInstanceOf[Byte]).toByte
+    case ShortType => (evalE1.asInstanceOf[Short] ^ evalE2.asInstanceOf[Short]).toShort
+    case IntegerType => evalE1.asInstanceOf[Int] ^ evalE2.asInstanceOf[Int]
+    case LongType => evalE1.asInstanceOf[Long] ^ evalE2.asInstanceOf[Long]
+    case other => sys.error(s"Unsupported bitwise ^ operation on ${other}")
+  }
+}
+
+/**
+ * A function that calculates bitwise not(~) of a number.
+ */
+case class BitwiseNot(child: Expression) extends UnaryExpression {
+  type EvaluatedType = Any
+
+  def dataType = child.dataType
+  override def foldable = child.foldable
+  def nullable = child.nullable
+  override def toString = s"-$child"
+
+  override def eval(input: Row): Any = {
+    val evalE = child.eval(input)
+    if (evalE == null) {
+      null
+    } else {
+      dataType match {
+        case ByteType => (~(evalE.asInstanceOf[Byte])).toByte
+        case ShortType => (~(evalE.asInstanceOf[Short])).toShort
+        case IntegerType => ~(evalE.asInstanceOf[Int])
+        case LongType => ~(evalE.asInstanceOf[Long])
+        case other => sys.error(s"Unsupported bitwise ~ operation on ${other}")
+      }
+    }
+  }
+}
+
+case class MaxOf(left: Expression, right: Expression) extends Expression {
+  type EvaluatedType = Any
+
+  override def foldable = left.foldable && right.foldable
+
+  override def nullable = left.nullable && right.nullable
+
+  override def children = left :: right :: Nil
+
+  override def dataType = left.dataType
+
+  override def eval(input: Row): Any = {
+    val leftEval = left.eval(input)
+    val rightEval = right.eval(input)
+    if (leftEval == null) {
+      rightEval
+    } else if (rightEval == null) {
+      leftEval
+    } else {
+      val numeric = left.dataType.asInstanceOf[NumericType].numeric.asInstanceOf[Numeric[Any]]
+      if (numeric.compare(leftEval, rightEval) < 0) {
+        rightEval
+      } else {
+        leftEval
+      }
+    }
+  }
+
+  override def toString = s"MaxOf($left, $right)"
+}
+
+/**
+ * A function that get the absolute value of the numeric value.
+ */
+case class Abs(child: Expression) extends UnaryExpression  {
+  type EvaluatedType = Any
+
+  def dataType = child.dataType
+  override def foldable = child.foldable
+  def nullable = child.nullable
+  override def toString = s"Abs($child)"
+
+  override def eval(input: Row): Any = n1(child, input, _.abs(_))
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
new file mode 100644
index 0000000000000..67f8d411b6bb4
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -0,0 +1,603 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import com.google.common.cache.{CacheLoader, CacheBuilder}
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+
+import scala.language.existentials
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.expressions
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.types._
+
+// These classes are here to avoid issues with serialization and integration with quasiquotes.
+class IntegerHashSet extends org.apache.spark.util.collection.OpenHashSet[Int]
+class LongHashSet extends org.apache.spark.util.collection.OpenHashSet[Long]
+
+/**
+ * A base class for generators of byte code to perform expression evaluation.  Includes a set of
+ * helpers for referring to Catalyst types and building trees that perform evaluation of individual
+ * expressions.
+ */
+abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Logging {
+  import scala.reflect.runtime.{universe => ru}
+  import scala.reflect.runtime.universe._
+
+  import scala.tools.reflect.ToolBox
+
+  protected val toolBox = runtimeMirror(getClass.getClassLoader).mkToolBox()
+
+  protected val rowType = typeOf[Row]
+  protected val mutableRowType = typeOf[MutableRow]
+  protected val genericRowType = typeOf[GenericRow]
+  protected val genericMutableRowType = typeOf[GenericMutableRow]
+
+  protected val projectionType = typeOf[Projection]
+  protected val mutableProjectionType = typeOf[MutableProjection]
+
+  private val curId = new java.util.concurrent.atomic.AtomicInteger()
+  private val javaSeparator = "$"
+
+  /**
+   * Can be flipped on manually in the console to add (expensive) expression evaluation trace code.
+   */
+  var debugLogging = false
+
+  /**
+   * Generates a class for a given input expression.  Called when there is not cached code
+   * already available.
+   */
+  protected def create(in: InType): OutType
+
+  /**
+   * Canonicalizes an input expression. Used to avoid double caching expressions that differ only
+   * cosmetically.
+   */
+  protected def canonicalize(in: InType): InType
+
+  /** Binds an input expression to a given input schema */
+  protected def bind(in: InType, inputSchema: Seq[Attribute]): InType
+
+  /**
+   * A cache of generated classes.
+   *
+   * From the Guava Docs: A Cache is similar to ConcurrentMap, but not quite the same. The most
+   * fundamental difference is that a ConcurrentMap persists all elements that are added to it until
+   * they are explicitly removed. A Cache on the other hand is generally configured to evict entries
+   * automatically, in order to constrain its memory footprint.  Note that this cache does not use
+   * weak keys/values and thus does not respond to memory pressure.
+   */
+  protected val cache = CacheBuilder.newBuilder()
+    .maximumSize(1000)
+    .build(
+      new CacheLoader[InType, OutType]() {
+        override def load(in: InType): OutType = globalLock.synchronized {
+          val startTime = System.nanoTime()
+          val result = create(in)
+          val endTime = System.nanoTime()
+          def timeMs = (endTime - startTime).toDouble / 1000000
+          logInfo(s"Code generated expression $in in $timeMs ms")
+          result
+        }
+      })
+
+  /** Generates the requested evaluator binding the given expression(s) to the inputSchema. */
+  def apply(expressions: InType, inputSchema: Seq[Attribute]): OutType =
+    apply(bind(expressions, inputSchema))
+
+  /** Generates the requested evaluator given already bound expression(s). */
+  def apply(expressions: InType): OutType = cache.get(canonicalize(expressions))
+
+  /**
+   * Returns a term name that is unique within this instance of a `CodeGenerator`.
+   *
+   * (Since we aren't in a macro context we do not seem to have access to the built in `freshName`
+   * function.)
+   */
+  protected def freshName(prefix: String): TermName = {
+    newTermName(s"$prefix$javaSeparator${curId.getAndIncrement}")
+  }
+
+  /**
+   * Scala ASTs for evaluating an [[Expression]] given a [[Row]] of input.
+   *
+   * @param code The sequence of statements required to evaluate the expression.
+   * @param nullTerm A term that holds a boolean value representing whether the expression evaluated
+   *                 to null.
+   * @param primitiveTerm A term for a possible primitive value of the result of the evaluation. Not
+   *                      valid if `nullTerm` is set to `false`.
+   * @param objectTerm A possibly boxed version of the result of evaluating this expression.
+   */
+  protected case class EvaluatedExpression(
+      code: Seq[Tree],
+      nullTerm: TermName,
+      primitiveTerm: TermName,
+      objectTerm: TermName)
+
+  /**
+   * Given an expression tree returns an [[EvaluatedExpression]], which contains Scala trees that
+   * can be used to determine the result of evaluating the expression on an input row.
+   */
+  def expressionEvaluator(e: Expression): EvaluatedExpression = {
+    val primitiveTerm = freshName("primitiveTerm")
+    val nullTerm = freshName("nullTerm")
+    val objectTerm = freshName("objectTerm")
+
+    implicit class Evaluate1(e: Expression) {
+      def castOrNull(f: TermName => Tree, dataType: DataType): Seq[Tree] = {
+        val eval = expressionEvaluator(e)
+        eval.code ++
+        q"""
+          val $nullTerm = ${eval.nullTerm}
+          val $primitiveTerm =
+            if($nullTerm)
+              ${defaultPrimitive(dataType)}
+            else
+              ${f(eval.primitiveTerm)}
+        """.children
+      }
+    }
+
+    implicit class Evaluate2(expressions: (Expression, Expression)) {
+
+      /**
+       * Short hand for generating binary evaluation code, which depends on two sub-evaluations of
+       * the same type.  If either of the sub-expressions is null, the result of this computation
+       * is assumed to be null.
+       *
+       * @param f a function from two primitive term names to a tree that evaluates them.
+       */
+      def evaluate(f: (TermName, TermName) => Tree): Seq[Tree] =
+        evaluateAs(expressions._1.dataType)(f)
+
+      def evaluateAs(resultType: DataType)(f: (TermName, TermName) => Tree): Seq[Tree] = {
+        // TODO: Right now some timestamp tests fail if we enforce this...
+        if (expressions._1.dataType != expressions._2.dataType) {
+          log.warn(s"${expressions._1.dataType} != ${expressions._2.dataType}")
+        }
+
+        val eval1 = expressionEvaluator(expressions._1)
+        val eval2 = expressionEvaluator(expressions._2)
+        val resultCode = f(eval1.primitiveTerm, eval2.primitiveTerm)
+
+        eval1.code ++ eval2.code ++
+        q"""
+          val $nullTerm = ${eval1.nullTerm} || ${eval2.nullTerm}
+          val $primitiveTerm: ${termForType(resultType)} =
+            if($nullTerm) {
+              ${defaultPrimitive(resultType)}
+            } else {
+              $resultCode.asInstanceOf[${termForType(resultType)}]
+            }
+        """.children : Seq[Tree]
+      }
+    }
+
+    val inputTuple = newTermName(s"i")
+
+    // TODO: Skip generation of null handling code when expression are not nullable.
+    val primitiveEvaluation: PartialFunction[Expression, Seq[Tree]] = {
+      case b @ BoundReference(ordinal, dataType, nullable) =>
+        val nullValue = q"$inputTuple.isNullAt($ordinal)"
+        q"""
+          val $nullTerm: Boolean = $nullValue
+          val $primitiveTerm: ${termForType(dataType)} =
+            if($nullTerm)
+              ${defaultPrimitive(dataType)}
+            else
+              ${getColumn(inputTuple, dataType, ordinal)}
+         """.children
+
+      case expressions.Literal(null, dataType) =>
+        q"""
+          val $nullTerm = true
+          val $primitiveTerm: ${termForType(dataType)} = null.asInstanceOf[${termForType(dataType)}]
+         """.children
+
+      case expressions.Literal(value: Boolean, dataType) =>
+        q"""
+          val $nullTerm = ${value == null}
+          val $primitiveTerm: ${termForType(dataType)} = $value
+         """.children
+
+      case expressions.Literal(value: String, dataType) =>
+        q"""
+          val $nullTerm = ${value == null}
+          val $primitiveTerm: ${termForType(dataType)} = $value
+         """.children
+
+      case expressions.Literal(value: Int, dataType) =>
+        q"""
+          val $nullTerm = ${value == null}
+          val $primitiveTerm: ${termForType(dataType)} = $value
+         """.children
+
+      case expressions.Literal(value: Long, dataType) =>
+        q"""
+          val $nullTerm = ${value == null}
+          val $primitiveTerm: ${termForType(dataType)} = $value
+         """.children
+
+      case Cast(e @ BinaryType(), StringType) =>
+        val eval = expressionEvaluator(e)
+        eval.code ++
+        q"""
+          val $nullTerm = ${eval.nullTerm}
+          val $primitiveTerm =
+            if($nullTerm)
+              ${defaultPrimitive(StringType)}
+            else
+              new String(${eval.primitiveTerm}.asInstanceOf[Array[Byte]])
+        """.children
+
+      case Cast(child @ NumericType(), IntegerType) =>
+        child.castOrNull(c => q"$c.toInt", IntegerType)
+
+      case Cast(child @ NumericType(), LongType) =>
+        child.castOrNull(c => q"$c.toLong", LongType)
+
+      case Cast(child @ NumericType(), DoubleType) =>
+        child.castOrNull(c => q"$c.toDouble", DoubleType)
+
+      case Cast(child @ NumericType(), FloatType) =>
+        child.castOrNull(c => q"$c.toFloat", IntegerType)
+
+      // Special handling required for timestamps in hive test cases since the toString function
+      // does not match the expected output.
+      case Cast(e, StringType) if e.dataType != TimestampType =>
+        val eval = expressionEvaluator(e)
+        eval.code ++
+        q"""
+          val $nullTerm = ${eval.nullTerm}
+          val $primitiveTerm =
+            if($nullTerm)
+              ${defaultPrimitive(StringType)}
+            else
+              ${eval.primitiveTerm}.toString
+        """.children
+
+      case EqualTo(e1, e2) =>
+        (e1, e2).evaluateAs (BooleanType) { case (eval1, eval2) => q"$eval1 == $eval2" }
+
+      /* TODO: Fix null semantics.
+      case In(e1, list) if !list.exists(!_.isInstanceOf[expressions.Literal]) =>
+        val eval = expressionEvaluator(e1)
+
+        val checks = list.map {
+          case expressions.Literal(v: String, dataType) =>
+            q"if(${eval.primitiveTerm} == $v) return true"
+          case expressions.Literal(v: Int, dataType) =>
+            q"if(${eval.primitiveTerm} == $v) return true"
+        }
+
+        val funcName = newTermName(s"isIn${curId.getAndIncrement()}")
+
+        q"""
+            def $funcName: Boolean = {
+              ..${eval.code}
+              if(${eval.nullTerm}) return false
+              ..$checks
+              return false
+            }
+            val $nullTerm = false
+            val $primitiveTerm = $funcName
+        """.children
+      */
+
+      case GreaterThan(e1 @ NumericType(), e2 @ NumericType()) =>
+        (e1, e2).evaluateAs (BooleanType) { case (eval1, eval2) => q"$eval1 > $eval2" }
+      case GreaterThanOrEqual(e1 @ NumericType(), e2 @ NumericType()) =>
+        (e1, e2).evaluateAs (BooleanType) { case (eval1, eval2) => q"$eval1 >= $eval2" }
+      case LessThan(e1 @ NumericType(), e2 @ NumericType()) =>
+        (e1, e2).evaluateAs (BooleanType) { case (eval1, eval2) => q"$eval1 < $eval2" }
+      case LessThanOrEqual(e1 @ NumericType(), e2 @ NumericType()) =>
+        (e1, e2).evaluateAs (BooleanType) { case (eval1, eval2) => q"$eval1 <= $eval2" }
+
+      case And(e1, e2) =>
+        val eval1 = expressionEvaluator(e1)
+        val eval2 = expressionEvaluator(e2)
+
+        eval1.code ++ eval2.code ++
+        q"""
+          var $nullTerm = false
+          var $primitiveTerm: ${termForType(BooleanType)} = false
+
+          if ((!${eval1.nullTerm} && !${eval1.primitiveTerm}) ||
+              (!${eval2.nullTerm} && !${eval2.primitiveTerm})) {
+            $nullTerm = false
+            $primitiveTerm = false
+          } else if (${eval1.nullTerm} || ${eval2.nullTerm} ) {
+            $nullTerm = true
+          } else {
+            $nullTerm = false
+            $primitiveTerm = true
+          }
+         """.children
+
+      case Or(e1, e2) =>
+        val eval1 = expressionEvaluator(e1)
+        val eval2 = expressionEvaluator(e2)
+
+        eval1.code ++ eval2.code ++
+        q"""
+          var $nullTerm = false
+          var $primitiveTerm: ${termForType(BooleanType)} = false
+
+          if ((!${eval1.nullTerm} && ${eval1.primitiveTerm}) ||
+              (!${eval2.nullTerm} && ${eval2.primitiveTerm})) {
+            $nullTerm = false
+            $primitiveTerm = true
+          } else if (${eval1.nullTerm} || ${eval2.nullTerm} ) {
+            $nullTerm = true
+          } else {
+            $nullTerm = false
+            $primitiveTerm = false
+          }
+         """.children
+
+      case Not(child) =>
+        // Uh, bad function name...
+        child.castOrNull(c => q"!$c", BooleanType)
+
+      case Add(e1, e2) =>      (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 + $eval2" }
+      case Subtract(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 - $eval2" }
+      case Multiply(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 * $eval2" }
+      case Divide(e1, e2) =>   (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 / $eval2" }
+
+      case IsNotNull(e) =>
+        val eval = expressionEvaluator(e)
+        q"""
+          ..${eval.code}
+          var $nullTerm = false
+          var $primitiveTerm: ${termForType(BooleanType)} = !${eval.nullTerm}
+        """.children
+
+      case IsNull(e) =>
+        val eval = expressionEvaluator(e)
+        q"""
+          ..${eval.code}
+          var $nullTerm = false
+          var $primitiveTerm: ${termForType(BooleanType)} = ${eval.nullTerm}
+        """.children
+
+      case c @ Coalesce(children) =>
+        q"""
+          var $nullTerm = true
+          var $primitiveTerm: ${termForType(c.dataType)} = ${defaultPrimitive(c.dataType)}
+        """.children ++
+        children.map { c =>
+          val eval = expressionEvaluator(c)
+          q"""
+            if($nullTerm) {
+              ..${eval.code}
+              if(!${eval.nullTerm}) {
+                $nullTerm = false
+                $primitiveTerm = ${eval.primitiveTerm}
+              }
+            }
+          """
+        }
+
+      case i @ expressions.If(condition, trueValue, falseValue) =>
+        val condEval = expressionEvaluator(condition)
+        val trueEval = expressionEvaluator(trueValue)
+        val falseEval = expressionEvaluator(falseValue)
+
+        q"""
+          var $nullTerm = false
+          var $primitiveTerm: ${termForType(i.dataType)} = ${defaultPrimitive(i.dataType)}
+          ..${condEval.code}
+          if(!${condEval.nullTerm} && ${condEval.primitiveTerm}) {
+            ..${trueEval.code}
+            $nullTerm = ${trueEval.nullTerm}
+            $primitiveTerm = ${trueEval.primitiveTerm}
+          } else {
+            ..${falseEval.code}
+            $nullTerm = ${falseEval.nullTerm}
+            $primitiveTerm = ${falseEval.primitiveTerm}
+          }
+        """.children
+
+      case NewSet(elementType) =>
+        q"""
+          val $nullTerm = false
+          val $primitiveTerm = new ${hashSetForType(elementType)}()
+        """.children
+
+      case AddItemToSet(item, set) =>
+        val itemEval = expressionEvaluator(item)
+        val setEval = expressionEvaluator(set)
+
+        val ArrayType(elementType, _) = set.dataType
+
+        itemEval.code ++ setEval.code ++
+        q"""
+           if (!${itemEval.nullTerm}) {
+             ${setEval.primitiveTerm}
+               .asInstanceOf[${hashSetForType(elementType)}]
+               .add(${itemEval.primitiveTerm})
+           }
+
+           val $nullTerm = false
+           val $primitiveTerm = ${setEval.primitiveTerm}
+         """.children
+
+      case CombineSets(left, right) =>
+        val leftEval = expressionEvaluator(left)
+        val rightEval = expressionEvaluator(right)
+
+        val ArrayType(elementType, _) = left.dataType
+
+        leftEval.code ++ rightEval.code ++
+        q"""
+          val $nullTerm = false
+          var $primitiveTerm: ${hashSetForType(elementType)} = null
+
+          {
+            val leftSet = ${leftEval.primitiveTerm}.asInstanceOf[${hashSetForType(elementType)}]
+            val rightSet = ${rightEval.primitiveTerm}.asInstanceOf[${hashSetForType(elementType)}]
+            val iterator = rightSet.iterator
+            while (iterator.hasNext) {
+              leftSet.add(iterator.next())
+            }
+            $primitiveTerm = leftSet
+          }
+        """.children
+
+      case MaxOf(e1, e2) =>
+        val eval1 = expressionEvaluator(e1)
+        val eval2 = expressionEvaluator(e2)
+
+        eval1.code ++ eval2.code ++
+        q"""
+          var $nullTerm = false
+          var $primitiveTerm: ${termForType(e1.dataType)} = ${defaultPrimitive(e1.dataType)}
+
+          if (${eval1.nullTerm}) {
+            $nullTerm = ${eval2.nullTerm}
+            $primitiveTerm = ${eval2.primitiveTerm}
+          } else if (${eval2.nullTerm}) {
+            $nullTerm = ${eval1.nullTerm}
+            $primitiveTerm = ${eval1.primitiveTerm}
+          } else {
+            $nullTerm = false
+            if (${eval1.primitiveTerm} > ${eval2.primitiveTerm}) {
+              $primitiveTerm = ${eval1.primitiveTerm}
+            } else {
+              $primitiveTerm = ${eval2.primitiveTerm}
+            }
+          }
+        """.children
+
+      case UnscaledValue(child) =>
+        val childEval = expressionEvaluator(child)
+
+        childEval.code ++
+        q"""
+         var $nullTerm = ${childEval.nullTerm}
+         var $primitiveTerm: Long = if (!$nullTerm) {
+           ${childEval.primitiveTerm}.toUnscaledLong
+         } else {
+           ${defaultPrimitive(LongType)}
+         }
+         """.children
+
+      case MakeDecimal(child, precision, scale) =>
+        val childEval = expressionEvaluator(child)
+
+        childEval.code ++
+        q"""
+         var $nullTerm = ${childEval.nullTerm}
+         var $primitiveTerm: org.apache.spark.sql.catalyst.types.decimal.Decimal =
+           ${defaultPrimitive(DecimalType())}
+
+         if (!$nullTerm) {
+           $primitiveTerm = new org.apache.spark.sql.catalyst.types.decimal.Decimal()
+           $primitiveTerm = $primitiveTerm.setOrNull(${childEval.primitiveTerm}, $precision, $scale)
+           $nullTerm = $primitiveTerm == null
+         }
+         """.children
+    }
+
+    // If there was no match in the partial function above, we fall back on calling the interpreted
+    // expression evaluator.
+    val code: Seq[Tree] =
+      primitiveEvaluation.lift.apply(e).getOrElse {
+        log.debug(s"No rules to generate $e")
+        val tree = reify { e }
+        q"""
+          val $objectTerm = $tree.eval(i)
+          val $nullTerm = $objectTerm == null
+          val $primitiveTerm = $objectTerm.asInstanceOf[${termForType(e.dataType)}]
+         """.children
+      }
+
+    // Only inject debugging code if debugging is turned on.
+    val debugCode =
+      if (debugLogging) {
+        val localLogger = log
+        val localLoggerTree = reify { localLogger }
+        q"""
+          $localLoggerTree.debug(${e.toString} + ": " +  (if($nullTerm) "null" else $primitiveTerm))
+        """ :: Nil
+      } else {
+        Nil
+      }
+
+    EvaluatedExpression(code ++ debugCode, nullTerm, primitiveTerm, objectTerm)
+  }
+
+  protected def getColumn(inputRow: TermName, dataType: DataType, ordinal: Int) = {
+    dataType match {
+      case dt @ NativeType() => q"$inputRow.${accessorForType(dt)}($ordinal)"
+      case _ => q"$inputRow.apply($ordinal).asInstanceOf[${termForType(dataType)}]"
+    }
+  }
+
+  protected def setColumn(
+      destinationRow: TermName,
+      dataType: DataType,
+      ordinal: Int,
+      value: TermName) = {
+    dataType match {
+      case dt @ NativeType() => q"$destinationRow.${mutatorForType(dt)}($ordinal, $value)"
+      case _ => q"$destinationRow.update($ordinal, $value)"
+    }
+  }
+
+  protected def accessorForType(dt: DataType) = newTermName(s"get${primitiveForType(dt)}")
+  protected def mutatorForType(dt: DataType) = newTermName(s"set${primitiveForType(dt)}")
+
+  protected def hashSetForType(dt: DataType) = dt match {
+    case IntegerType => typeOf[IntegerHashSet]
+    case LongType => typeOf[LongHashSet]
+    case unsupportedType =>
+      sys.error(s"Code generation not support for hashset of type $unsupportedType")
+  }
+
+  protected def primitiveForType(dt: DataType) = dt match {
+    case IntegerType => "Int"
+    case LongType => "Long"
+    case ShortType => "Short"
+    case ByteType => "Byte"
+    case DoubleType => "Double"
+    case FloatType => "Float"
+    case BooleanType => "Boolean"
+    case StringType => "String"
+  }
+
+  protected def defaultPrimitive(dt: DataType) = dt match {
+    case BooleanType => ru.Literal(Constant(false))
+    case FloatType => ru.Literal(Constant(-1.0.toFloat))
+    case StringType => ru.Literal(Constant("<uninit>"))
+    case ShortType => ru.Literal(Constant(-1.toShort))
+    case LongType => ru.Literal(Constant(1L))
+    case ByteType => ru.Literal(Constant(-1.toByte))
+    case DoubleType => ru.Literal(Constant(-1.toDouble))
+    case DecimalType() => q"org.apache.spark.sql.catalyst.types.decimal.Decimal(-1)"
+    case IntegerType => ru.Literal(Constant(-1))
+    case _ => ru.Literal(Constant(null))
+  }
+
+  protected def termForType(dt: DataType) = dt match {
+    case n: NativeType => n.tag
+    case _ => typeTag[Any]
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
new file mode 100644
index 0000000000000..a419fd7ecb39b
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import org.apache.spark.sql.catalyst.expressions._
+
+/**
+ * Generates byte code that produces a [[MutableRow]] object that can update itself based on a new
+ * input [[Row]] for a fixed set of [[Expression Expressions]].
+ */
+object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => MutableProjection] {
+  import scala.reflect.runtime.{universe => ru}
+  import scala.reflect.runtime.universe._
+
+  val mutableRowName = newTermName("mutableRow")
+
+  protected def canonicalize(in: Seq[Expression]): Seq[Expression] =
+    in.map(ExpressionCanonicalizer(_))
+
+  protected def bind(in: Seq[Expression], inputSchema: Seq[Attribute]): Seq[Expression] =
+    in.map(BindReferences.bindReference(_, inputSchema))
+
+  protected def create(expressions: Seq[Expression]): (() => MutableProjection) = {
+    val projectionCode = expressions.zipWithIndex.flatMap { case (e, i) =>
+      val evaluationCode = expressionEvaluator(e)
+
+      evaluationCode.code :+
+      q"""
+        if(${evaluationCode.nullTerm})
+          mutableRow.setNullAt($i)
+        else
+          ${setColumn(mutableRowName, e.dataType, i, evaluationCode.primitiveTerm)}
+      """
+    }
+
+    val code =
+      q"""
+        () => { new $mutableProjectionType {
+
+          private[this] var $mutableRowName: $mutableRowType =
+            new $genericMutableRowType(${expressions.size})
+
+          def target(row: $mutableRowType): $mutableProjectionType = {
+            $mutableRowName = row
+            this
+          }
+
+          /* Provide immutable access to the last projected row. */
+          def currentValue: $rowType = mutableRow
+
+          def apply(i: $rowType): $rowType = {
+            ..$projectionCode
+            mutableRow
+          }
+        } }
+      """
+
+    log.debug(s"code for ${expressions.mkString(",")}:\n$code")
+    toolBox.eval(code).asInstanceOf[() => MutableProjection]
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
new file mode 100644
index 0000000000000..094ff14552283
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.types.{StringType, NumericType}
+
+/**
+ * Generates bytecode for an [[Ordering]] of [[Row Rows]] for a given set of
+ * [[Expression Expressions]].
+ */
+object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[Row]] with Logging {
+  import scala.reflect.runtime.{universe => ru}
+  import scala.reflect.runtime.universe._
+
+ protected def canonicalize(in: Seq[SortOrder]): Seq[SortOrder] =
+    in.map(ExpressionCanonicalizer(_).asInstanceOf[SortOrder])
+
+  protected def bind(in: Seq[SortOrder], inputSchema: Seq[Attribute]): Seq[SortOrder] =
+    in.map(BindReferences.bindReference(_, inputSchema))
+
+  protected def create(ordering: Seq[SortOrder]): Ordering[Row] = {
+    val a = newTermName("a")
+    val b = newTermName("b")
+    val comparisons = ordering.zipWithIndex.map { case (order, i) =>
+      val evalA = expressionEvaluator(order.child)
+      val evalB = expressionEvaluator(order.child)
+
+      val compare = order.child.dataType match {
+        case _: NumericType =>
+          q"""
+          val comp = ${evalA.primitiveTerm} - ${evalB.primitiveTerm}
+          if(comp != 0) {
+            return ${if (order.direction == Ascending) q"comp.toInt" else q"-comp.toInt"}
+          }
+          """
+        case StringType =>
+          if (order.direction == Ascending) {
+            q"""return ${evalA.primitiveTerm}.compare(${evalB.primitiveTerm})"""
+          } else {
+            q"""return ${evalB.primitiveTerm}.compare(${evalA.primitiveTerm})"""
+          }
+      }
+
+      q"""
+        i = $a
+        ..${evalA.code}
+        i = $b
+        ..${evalB.code}
+        if (${evalA.nullTerm} && ${evalB.nullTerm}) {
+          // Nothing
+        } else if (${evalA.nullTerm}) {
+          return ${if (order.direction == Ascending) q"-1" else q"1"}
+        } else if (${evalB.nullTerm}) {
+          return ${if (order.direction == Ascending) q"1" else q"-1"}
+        } else {
+          $compare
+        }
+      """
+    }
+
+    val q"class $orderingName extends $orderingType { ..$body }" = reify {
+      class SpecificOrdering extends Ordering[Row] {
+        val o = ordering
+      }
+    }.tree.children.head
+
+    val code = q"""
+      class $orderingName extends $orderingType {
+        ..$body
+        def compare(a: $rowType, b: $rowType): Int = {
+          var i: $rowType = null // Holds current row being evaluated.
+          ..$comparisons
+          return 0
+        }
+      }
+      new $orderingName()
+      """
+    logDebug(s"Generated Ordering: $code")
+    toolBox.eval(code).asInstanceOf[Ordering[Row]]
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
new file mode 100644
index 0000000000000..2a0935c790cf3
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import org.apache.spark.sql.catalyst.expressions._
+
+/**
+ * Generates bytecode that evaluates a boolean [[Expression]] on a given input [[Row]].
+ */
+object GeneratePredicate extends CodeGenerator[Expression, (Row) => Boolean] {
+  import scala.reflect.runtime.{universe => ru}
+  import scala.reflect.runtime.universe._
+
+  protected def canonicalize(in: Expression): Expression = ExpressionCanonicalizer(in)
+
+  protected def bind(in: Expression, inputSchema: Seq[Attribute]): Expression =
+    BindReferences.bindReference(in, inputSchema)
+
+  protected def create(predicate: Expression): ((Row) => Boolean) = {
+    val cEval = expressionEvaluator(predicate)
+
+    val code =
+      q"""
+        (i: $rowType) => {
+          ..${cEval.code}
+          if (${cEval.nullTerm}) false else ${cEval.primitiveTerm}
+        }
+      """
+
+    log.debug(s"Generated predicate '$predicate':\n$code")
+    toolBox.eval(code).asInstanceOf[Row => Boolean]
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
new file mode 100644
index 0000000000000..2ff61169a17db
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.types._
+
+
+/**
+ * Generates bytecode that produces a new [[Row]] object based on a fixed set of input
+ * [[Expression Expressions]] and a given input [[Row]].  The returned [[Row]] object is custom
+ * generated based on the output types of the [[Expression]] to avoid boxing of primitive values.
+ */
+object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
+  import scala.reflect.runtime.{universe => ru}
+  import scala.reflect.runtime.universe._
+
+  protected def canonicalize(in: Seq[Expression]): Seq[Expression] =
+    in.map(ExpressionCanonicalizer(_))
+
+  protected def bind(in: Seq[Expression], inputSchema: Seq[Attribute]): Seq[Expression] =
+    in.map(BindReferences.bindReference(_, inputSchema))
+
+  // Make Mutablility optional...
+  protected def create(expressions: Seq[Expression]): Projection = {
+    val tupleLength = ru.Literal(Constant(expressions.length))
+    val lengthDef = q"final val length = $tupleLength"
+
+    /* TODO: Configurable...
+    val nullFunctions =
+      q"""
+        private final val nullSet = new org.apache.spark.util.collection.BitSet(length)
+        final def setNullAt(i: Int) = nullSet.set(i)
+        final def isNullAt(i: Int) = nullSet.get(i)
+      """
+     */
+
+    val nullFunctions =
+      q"""
+        private[this] var nullBits = new Array[Boolean](${expressions.size})
+        override def setNullAt(i: Int) = { nullBits(i) = true }
+        override def isNullAt(i: Int) = nullBits(i)
+      """.children
+
+    val tupleElements = expressions.zipWithIndex.flatMap {
+      case (e, i) =>
+        val elementName = newTermName(s"c$i")
+        val evaluatedExpression = expressionEvaluator(e)
+        val iLit = ru.Literal(Constant(i))
+
+        q"""
+        var ${newTermName(s"c$i")}: ${termForType(e.dataType)} = _
+        {
+          ..${evaluatedExpression.code}
+          if(${evaluatedExpression.nullTerm})
+            setNullAt($iLit)
+          else {
+            nullBits($iLit) = false
+            $elementName = ${evaluatedExpression.primitiveTerm}
+          }
+        }
+        """.children : Seq[Tree]
+    }
+
+    val iteratorFunction = {
+      val allColumns = (0 until expressions.size).map { i =>
+        val iLit = ru.Literal(Constant(i))
+        q"if(isNullAt($iLit)) { null } else { ${newTermName(s"c$i")} }"
+      }
+      q"override def iterator = Iterator[Any](..$allColumns)"
+    }
+
+    val accessorFailure = q"""scala.sys.error("Invalid ordinal:" + i)"""
+    val applyFunction = {
+      val cases = (0 until expressions.size).map { i =>
+        val ordinal = ru.Literal(Constant(i))
+        val elementName = newTermName(s"c$i")
+        val iLit = ru.Literal(Constant(i))
+
+        q"if(i == $ordinal) { if(isNullAt($i)) return null else return $elementName }"
+      }
+      q"override def apply(i: Int): Any = { ..$cases; $accessorFailure }"
+    }
+
+    val updateFunction = {
+      val cases = expressions.zipWithIndex.map {case (e, i) =>
+        val ordinal = ru.Literal(Constant(i))
+        val elementName = newTermName(s"c$i")
+        val iLit = ru.Literal(Constant(i))
+
+        q"""
+          if(i == $ordinal) {
+            if(value == null) {
+              setNullAt(i)
+            } else {
+              nullBits(i) = false
+              $elementName = value.asInstanceOf[${termForType(e.dataType)}]
+            }
+            return
+          }"""
+      }
+      q"override def update(i: Int, value: Any): Unit = { ..$cases; $accessorFailure }"
+    }
+
+    val specificAccessorFunctions = NativeType.all.map { dataType =>
+      val ifStatements = expressions.zipWithIndex.flatMap {
+        case (e, i) if e.dataType == dataType =>
+          val elementName = newTermName(s"c$i")
+          // TODO: The string of ifs gets pretty inefficient as the row grows in size.
+          // TODO: Optional null checks?
+          q"if(i == $i) return $elementName" :: Nil
+        case _ => Nil
+      }
+
+      q"""
+      override def ${accessorForType(dataType)}(i: Int):${termForType(dataType)} = {
+        ..$ifStatements;
+        $accessorFailure
+      }"""
+    }
+
+    val specificMutatorFunctions = NativeType.all.map { dataType =>
+      val ifStatements = expressions.zipWithIndex.flatMap {
+        case (e, i) if e.dataType == dataType =>
+          val elementName = newTermName(s"c$i")
+          // TODO: The string of ifs gets pretty inefficient as the row grows in size.
+          // TODO: Optional null checks?
+          q"if(i == $i) { nullBits($i) = false; $elementName = value; return }" :: Nil
+        case _ => Nil
+      }
+
+      q"""
+      override def ${mutatorForType(dataType)}(i: Int, value: ${termForType(dataType)}): Unit = {
+        ..$ifStatements;
+        $accessorFailure
+      }"""
+    }
+
+    val hashValues = expressions.zipWithIndex.map { case (e,i) =>
+      val elementName = newTermName(s"c$i")
+      val nonNull = e.dataType match {
+        case BooleanType => q"if ($elementName) 0 else 1"
+        case ByteType | ShortType | IntegerType => q"$elementName.toInt"
+        case LongType => q"($elementName ^ ($elementName >>> 32)).toInt"
+        case FloatType => q"java.lang.Float.floatToIntBits($elementName)"
+        case DoubleType =>
+          q"{ val b = java.lang.Double.doubleToLongBits($elementName); (b ^ (b >>>32)).toInt }"
+        case _ => q"$elementName.hashCode"
+      }
+      q"if (isNullAt($i)) 0 else $nonNull"
+    }
+
+    val hashUpdates: Seq[Tree] = hashValues.map(v => q"""result = 37 * result + $v""": Tree)
+
+    val hashCodeFunction =
+      q"""
+        override def hashCode(): Int = {
+          var result: Int = 37
+          ..$hashUpdates
+          result
+        }
+      """
+
+    val columnChecks = (0 until expressions.size).map { i =>
+      val elementName = newTermName(s"c$i")
+      q"if (this.$elementName != specificType.$elementName) return false"
+    }
+
+    val equalsFunction =
+      q"""
+        override def equals(other: Any): Boolean = other match {
+          case specificType: SpecificRow =>
+            ..$columnChecks
+            return true
+          case other => super.equals(other)
+        }
+      """
+
+    val copyFunction =
+      q"""
+        override def copy() = new $genericRowType(this.toArray)
+      """
+
+    val classBody =
+      nullFunctions ++ (
+        lengthDef +:
+        iteratorFunction +:
+        applyFunction +:
+        updateFunction +:
+        equalsFunction +:
+        hashCodeFunction +:
+        copyFunction +:
+        (tupleElements ++ specificAccessorFunctions ++ specificMutatorFunctions))
+
+    val code = q"""
+      final class SpecificRow(i: $rowType) extends $mutableRowType {
+        ..$classBody
+      }
+
+      new $projectionType { def apply(r: $rowType) = new SpecificRow(r) }
+    """
+
+    log.debug(
+      s"MutableRow, initExprs: ${expressions.mkString(",")} code:\n${toolBox.typeCheck(code)}")
+    toolBox.eval(code).asInstanceOf[Projection]
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/package.scala
new file mode 100644
index 0000000000000..80c7dfd376c96
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/package.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.rules
+import org.apache.spark.sql.catalyst.util
+
+/**
+ * A collection of generators that build custom bytecode at runtime for performing the evaluation
+ * of catalyst expression.
+ */
+package object codegen {
+
+  /**
+   * A lock to protect invoking the scala compiler at runtime, since it is not thread safe in Scala
+   * 2.10.
+   */
+  protected[codegen] val globalLock = org.apache.spark.sql.catalyst.ScalaReflectionLock
+
+  /** Canonicalizes an expression so those that differ only by names can reuse the same code. */
+  object ExpressionCanonicalizer extends rules.RuleExecutor[Expression] {
+    val batches =
+      Batch("CleanExpressions", FixedPoint(20), CleanExpressions) :: Nil
+
+    object CleanExpressions extends rules.Rule[Expression] {
+      def apply(e: Expression): Expression = e transform {
+        case Alias(c, _) => c
+      }
+    }
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Dumps the bytecode from a class to the screen using javap.
+   */
+  @DeveloperApi
+  object DumpByteCode {
+    import scala.sys.process._
+    val dumpDirectory = util.getTempFilePath("sparkSqlByteCode")
+    dumpDirectory.mkdir()
+
+    def apply(obj: Any): Unit = {
+      val generatedClass = obj.getClass
+      val classLoader =
+        generatedClass
+          .getClassLoader
+          .asInstanceOf[scala.tools.nsc.interpreter.AbstractFileClassLoader]
+      val generatedBytes = classLoader.classBytes(generatedClass.getName)
+
+      val packageDir = new java.io.File(dumpDirectory, generatedClass.getPackage.getName)
+      if (!packageDir.exists()) { packageDir.mkdir() }
+
+      val classFile =
+        new java.io.File(packageDir, generatedClass.getName.split("\\.").last + ".class")
+
+      val outfile = new java.io.FileOutputStream(classFile)
+      outfile.write(generatedBytes)
+      outfile.close()
+
+      println(
+        s"javap -p -v -classpath ${dumpDirectory.getCanonicalPath} ${generatedClass.getName}".!!)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala
index 5d3bb25ad568c..917b346086dcb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import scala.collection.Map
+
 import org.apache.spark.sql.catalyst.types._
 
 /**
@@ -29,10 +31,10 @@ case class GetItem(child: Expression, ordinal: Expression) extends Expression {
   /** `Null` is returned for invalid ordinals. */
   override def nullable = true
   override def foldable = child.foldable && ordinal.foldable
-  override def references = children.flatMap(_.references).toSet
+
   def dataType = child.dataType match {
-    case ArrayType(dt) => dt
-    case MapType(_, vt) => vt
+    case ArrayType(dt, _) => dt
+    case MapType(_, vt, _) => vt
   }
   override lazy val resolved =
     childrenResolved &&
@@ -61,7 +63,6 @@ case class GetItem(child: Expression, ordinal: Expression) extends Expression {
           }
         } else {
           val baseValue = value.asInstanceOf[Map[Any, _]]
-          val key = ordinal.eval(input)
           baseValue.get(key).orNull
         }
       }
@@ -100,3 +101,30 @@ case class GetField(child: Expression, fieldName: String) extends UnaryExpressio
 
   override def toString = s"$child.$fieldName"
 }
+
+/**
+ * Returns an Array containing the evaluation of all children expressions.
+ */
+case class CreateArray(children: Seq[Expression]) extends Expression {
+  override type EvaluatedType = Any
+
+  lazy val childTypes = children.map(_.dataType).distinct
+
+  override lazy val resolved =
+    childrenResolved && childTypes.size <= 1
+
+  override def dataType: DataType = {
+    assert(resolved, s"Invalid dataType of mixed ArrayType ${childTypes.mkString(",")}")
+    ArrayType(
+      childTypes.headOption.getOrElse(NullType),
+      containsNull = children.exists(_.nullable))
+  }
+
+  override def nullable: Boolean = false
+
+  override def eval(input: Row): Any = {
+    children.map(_.eval(input))
+  }
+
+  override def toString = s"Array(${children.mkString(",")})"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalFunctions.scala
new file mode 100644
index 0000000000000..d1eab2eb4ed56
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalFunctions.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+import org.apache.spark.sql.catalyst.types.{DecimalType, LongType, DoubleType, DataType}
+
+/** Return the unscaled Long value of a Decimal, assuming it fits in a Long */
+case class UnscaledValue(child: Expression) extends UnaryExpression {
+  override type EvaluatedType = Any
+
+  override def dataType: DataType = LongType
+  override def foldable = child.foldable
+  def nullable = child.nullable
+  override def toString = s"UnscaledValue($child)"
+
+  override def eval(input: Row): Any = {
+    val childResult = child.eval(input)
+    if (childResult == null) {
+      null
+    } else {
+      childResult.asInstanceOf[Decimal].toUnscaledLong
+    }
+  }
+}
+
+/** Create a Decimal from an unscaled Long value */
+case class MakeDecimal(child: Expression, precision: Int, scale: Int) extends UnaryExpression {
+  override type EvaluatedType = Decimal
+
+  override def dataType: DataType = DecimalType(precision, scale)
+  override def foldable = child.foldable
+  def nullable = child.nullable
+  override def toString = s"MakeDecimal($child,$precision,$scale)"
+
+  override def eval(input: Row): Decimal = {
+    val childResult = child.eval(input)
+    if (childResult == null) {
+      null
+    } else {
+      new Decimal().setOrNull(childResult.asInstanceOf[Long], precision, scale)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index dd78614754e12..ab0701fd9a80b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import scala.collection.Map
+
 import org.apache.spark.sql.catalyst.trees
 import org.apache.spark.sql.catalyst.types._
 
@@ -41,12 +43,10 @@ abstract class Generator extends Expression {
   override type EvaluatedType = TraversableOnce[Row]
 
   override lazy val dataType =
-    ArrayType(StructType(output.map(a => StructField(a.name, a.dataType, a.nullable))))
+    ArrayType(StructType(output.map(a => StructField(a.name, a.dataType, a.nullable, a.metadata))))
 
   override def nullable = false
 
-  override def references = children.flatMap(_.references).toSet
-
   /**
    * Should be overridden by specific generators.  Called only once for each instance to ensure
    * that rule application does not change the output schema of a generator.
@@ -84,28 +84,28 @@ case class Explode(attributeNames: Seq[String], child: Expression)
     (child.dataType.isInstanceOf[ArrayType] || child.dataType.isInstanceOf[MapType])
 
   private lazy val elementTypes = child.dataType match {
-    case ArrayType(et) => et :: Nil
-    case MapType(kt,vt) => kt :: vt :: Nil
+    case ArrayType(et, containsNull) => (et, containsNull) :: Nil
+    case MapType(kt, vt, valueContainsNull) => (kt, false) :: (vt, valueContainsNull) :: Nil
   }
 
   // TODO: Move this pattern into Generator.
   protected def makeOutput() =
     if (attributeNames.size == elementTypes.size) {
       attributeNames.zip(elementTypes).map {
-        case (n, t) => AttributeReference(n, t, nullable = true)()
+        case (n, (t, nullable)) => AttributeReference(n, t, nullable)()
       }
     } else {
       elementTypes.zipWithIndex.map {
-        case (t, i) => AttributeReference(s"c_$i", t, nullable = true)()
+        case ((t, nullable), i) => AttributeReference(s"c_$i", t, nullable)()
       }
     }
 
   override def eval(input: Row): TraversableOnce[Row] = {
     child.dataType match {
-      case ArrayType(_) =>
+      case ArrayType(_, _) =>
         val inputArray = child.eval(input).asInstanceOf[Seq[Any]]
         if (inputArray == null) Nil else inputArray.map(v => new GenericRow(Array(v)))
-      case MapType(_, _) =>
+      case MapType(_, _, _) =>
         val inputMap = child.eval(input).asInstanceOf[Map[Any,Any]]
         if (inputMap == null) Nil else inputMap.map { case (k,v) => new GenericRow(Array(k,v)) }
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index e15e16d633365..93c19325151bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
 
 import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
 
 object Literal {
   def apply(v: Any): Literal = v match {
@@ -31,8 +32,10 @@ object Literal {
     case s: Short => Literal(s, ShortType)
     case s: String => Literal(s, StringType)
     case b: Boolean => Literal(b, BooleanType)
-    case d: BigDecimal => Literal(d, DecimalType)
+    case d: BigDecimal => Literal(Decimal(d), DecimalType.Unlimited)
+    case d: Decimal => Literal(d, DecimalType.Unlimited)
     case t: Timestamp => Literal(t, TimestampType)
+    case d: Date => Literal(d, DateType)
     case a: Array[Byte] => Literal(a, BinaryType)
     case null => Literal(null, NullType)
   }
@@ -52,7 +55,7 @@ case class Literal(value: Any, dataType: DataType) extends LeafExpression {
 
   override def foldable = true
   def nullable = value == null
-  def references = Set.empty
+
 
   override def toString = if (value != null) value.toString else "null"
 
@@ -61,13 +64,10 @@ case class Literal(value: Any, dataType: DataType) extends LeafExpression {
 }
 
 // TODO: Specialize
-case class MutableLiteral(var value: Any, nullable: Boolean = true) extends LeafExpression {
+case class MutableLiteral(var value: Any, dataType: DataType, nullable: Boolean = true)
+    extends LeafExpression {
   type EvaluatedType = Any
 
-  val dataType = Literal(value).dataType
-
-  def references = Set.empty
-
   def update(expression: Expression, input: Row) = {
     value = expression.eval(input)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 934bad8c27294..fc90a54a58259 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.trees
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.util.Metadata
 
 object NamedExpression {
   private val curId = new java.util.concurrent.atomic.AtomicLong()
@@ -28,8 +29,8 @@ object NamedExpression {
 }
 
 /**
- * A globally (within this JVM) id for a given named expression.
- * Used to identify with attribute output by a relation is being
+ * A globally unique (within this JVM) id for a given named expression.
+ * Used to identify which attribute output by a relation is being
  * referenced in a subsequent computation.
  */
 case class ExprId(id: Long)
@@ -43,6 +44,9 @@ abstract class NamedExpression extends Expression {
 
   def toAttribute: Attribute
 
+  /** Returns the metadata when an expression is a reference to another expression with metadata. */
+  def metadata: Metadata = Metadata.empty
+
   protected def typeSuffix =
     if (resolved) {
       dataType match {
@@ -57,12 +61,15 @@ abstract class NamedExpression extends Expression {
 abstract class Attribute extends NamedExpression {
   self: Product =>
 
+  override def references = AttributeSet(this)
+
   def withNullability(newNullability: Boolean): Attribute
   def withQualifiers(newQualifiers: Seq[String]): Attribute
+  def withName(newName: String): Attribute
 
   def toAttribute = this
-  def newInstance: Attribute
-  override def references = Set(this)
+  def newInstance(): Attribute
+
 }
 
 /**
@@ -85,11 +92,16 @@ case class Alias(child: Expression, name: String)
 
   override def dataType = child.dataType
   override def nullable = child.nullable
-  override def references = child.references
+  override def metadata: Metadata = {
+    child match {
+      case named: NamedExpression => named.metadata
+      case _ => Metadata.empty
+    }
+  }
 
   override def toAttribute = {
     if (resolved) {
-      AttributeReference(name, child.dataType, child.nullable)(exprId, qualifiers)
+      AttributeReference(name, child.dataType, child.nullable, metadata)(exprId, qualifiers)
     } else {
       UnresolvedAttribute(name)
     }
@@ -106,18 +118,23 @@ case class Alias(child: Expression, name: String)
  * @param name The name of this attribute, should only be used during analysis or for debugging.
  * @param dataType The [[DataType]] of this attribute.
  * @param nullable True if null is a valid value for this attribute.
+ * @param metadata The metadata of this attribute.
  * @param exprId A globally unique id used to check if different AttributeReferences refer to the
  *               same attribute.
  * @param qualifiers a list of strings that can be used to referred to this attribute in a fully
  *                   qualified way. Consider the examples tableName.name, subQueryAlias.name.
  *                   tableName and subQueryAlias are possible qualifiers.
  */
-case class AttributeReference(name: String, dataType: DataType, nullable: Boolean = true)
-    (val exprId: ExprId = NamedExpression.newExprId, val qualifiers: Seq[String] = Nil)
-  extends Attribute with trees.LeafNode[Expression] {
+case class AttributeReference(
+    name: String,
+    dataType: DataType,
+    nullable: Boolean = true,
+    override val metadata: Metadata = Metadata.empty)(
+    val exprId: ExprId = NamedExpression.newExprId,
+    val qualifiers: Seq[String] = Nil) extends Attribute with trees.LeafNode[Expression] {
 
   override def equals(other: Any) = other match {
-    case ar: AttributeReference => exprId == ar.exprId && dataType == ar.dataType
+    case ar: AttributeReference => name == ar.name && exprId == ar.exprId && dataType == ar.dataType
     case _ => false
   }
 
@@ -126,19 +143,29 @@ case class AttributeReference(name: String, dataType: DataType, nullable: Boolea
     var h = 17
     h = h * 37 + exprId.hashCode()
     h = h * 37 + dataType.hashCode()
+    h = h * 37 + metadata.hashCode()
     h
   }
 
-  override def newInstance = AttributeReference(name, dataType, nullable)(qualifiers = qualifiers)
+  override def newInstance() =
+    AttributeReference(name, dataType, nullable, metadata)(qualifiers = qualifiers)
 
   /**
    * Returns a copy of this [[AttributeReference]] with changed nullability.
    */
-  override def withNullability(newNullability: Boolean) = {
+  override def withNullability(newNullability: Boolean): AttributeReference = {
     if (nullable == newNullability) {
       this
     } else {
-      AttributeReference(name, dataType, newNullability)(exprId, qualifiers)
+      AttributeReference(name, dataType, newNullability, metadata)(exprId, qualifiers)
+    }
+  }
+
+  override def withName(newName: String): AttributeReference = {
+    if (name == newName) {
+      this
+    } else {
+      AttributeReference(newName, dataType, nullable)(exprId, qualifiers)
     }
   }
 
@@ -146,10 +173,10 @@ case class AttributeReference(name: String, dataType: DataType, nullable: Boolea
    * Returns a copy of this [[AttributeReference]] with new qualifiers.
    */
   override def withQualifiers(newQualifiers: Seq[String]) = {
-    if (newQualifiers == qualifiers) {
+    if (newQualifiers.toSet == qualifiers.toSet) {
       this
     } else {
-      AttributeReference(name, dataType, nullable)(exprId, newQualifiers)
+      AttributeReference(name, dataType, nullable, metadata)(exprId, newQualifiers)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala
index ce6d99c911ab3..84a3567895175 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala
@@ -26,7 +26,6 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
   /** Coalesce is nullable if all of its children are nullable, or if it has no children. */
   def nullable = !children.exists(!_.nullable)
 
-  def references = children.flatMap(_.references).toSet
   // Coalesce is foldable if all children are foldable.
   override def foldable = !children.exists(!_.foldable)
 
@@ -38,7 +37,9 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
   def dataType = if (resolved) {
     children.head.dataType
   } else {
-    throw new UnresolvedException(this, "Coalesce cannot have children of different types.")
+    val childTypes = children.map(c => s"$c: ${c.dataType}").mkString(", ")
+    throw new UnresolvedException(
+      this, s"Coalesce cannot have children of different types. $childTypes")
   }
 
   override def eval(input: Row): Any = {
@@ -53,17 +54,17 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
 }
 
 case class IsNull(child: Expression) extends Predicate with trees.UnaryNode[Expression] {
-  def references = child.references
   override def foldable = child.foldable
   def nullable = false
 
   override def eval(input: Row): Any = {
     child.eval(input) == null
   }
+
+  override def toString = s"IS NULL $child"
 }
 
 case class IsNotNull(child: Expression) extends Predicate with trees.UnaryNode[Expression] {
-  def references = child.references
   override def foldable = child.foldable
   def nullable = false
   override def toString = s"IS NOT NULL $child"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index b6f2451b52e1f..55d95991c5f11 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -47,4 +47,30 @@ package org.apache.spark.sql.catalyst
  * ==Evaluation==
  * The result of expressions can be evaluated using the `Expression.apply(Row)` method.
  */
-package object expressions
+package object expressions  {
+
+  /**
+   * Converts a [[Row]] to another Row given a sequence of expression that define each column of the
+   * new row. If the schema of the input row is specified, then the given expression will be bound
+   * to that schema.
+   */
+  abstract class Projection extends (Row => Row)
+
+  /**
+   * Converts a [[Row]] to another Row given a sequence of expression that define each column of the
+   * new row. If the schema of the input row is specified, then the given expression will be bound
+   * to that schema.
+   *
+   * In contrast to a normal projection, a MutableProjection reuses the same underlying row object
+   * each time an input row is added.  This significantly reduces the cost of calculating the
+   * projection, but means that it is not safe to hold on to a reference to a [[Row]] after `next()`
+   * has been called on the [[Iterator]] that produced it. Instead, the user must call `Row.copy()`
+   * and hold on to the returned [[Row]] before calling `next()`.
+   */
+  abstract class MutableProjection extends Projection {
+    def currentValue: Row
+
+    /** Uses the given row to store the output of the projection. */
+    def target(row: MutableRow): MutableProjection
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index b63406b94a4a3..94b6fb084d38a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import scala.collection.immutable.HashSet
 import org.apache.spark.sql.catalyst.analysis.UnresolvedException
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.types.BooleanType
 
-
 object InterpretedPredicate {
+  def apply(expression: Expression, inputSchema: Seq[Attribute]): (Row => Boolean) =
+    apply(BindReferences.bindReference(expression, inputSchema))
+
   def apply(expression: Expression): (Row => Boolean) = {
     (r: Row) => expression.eval(r).asInstanceOf[Boolean]
   }
@@ -82,7 +85,7 @@ case class Not(child: Expression) extends UnaryExpression with Predicate {
  */
 case class In(value: Expression, list: Seq[Expression]) extends Predicate {
   def children = value +: list
-  def references = children.flatMap(_.references).toSet
+
   def nullable = true // TODO: Figure out correct nullability semantics of IN.
   override def toString = s"$value IN ${list.mkString("(", ",", ")")}"
 
@@ -92,6 +95,23 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
   }
 }
 
+/**
+ * Optimized version of In clause, when all filter values of In clause are
+ * static.
+ */
+case class InSet(value: Expression, hset: Set[Any])
+  extends Predicate {
+
+  def children = value :: Nil
+
+  def nullable = true // TODO: Figure out correct nullability semantics of IN.
+  override def toString = s"$value INSET ${hset.mkString("(", ",", ")")}"
+
+  override def eval(input: Row): Any = {
+    hset.contains(value.eval(input))
+  }
+}
+
 case class And(left: Expression, right: Expression) extends BinaryPredicate {
   def symbol = "&&"
 
@@ -153,6 +173,22 @@ case class EqualTo(left: Expression, right: Expression) extends BinaryComparison
   }
 }
 
+case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {
+  def symbol = "<=>"
+  override def nullable = false
+  override def eval(input: Row): Any = {
+    val l = left.eval(input)
+    val r = right.eval(input)
+    if (l == null && r == null) {
+      true
+    } else if (l == null || r == null) {
+      false
+    } else {
+      l == r
+    }
+  }
+}
+
 case class LessThan(left: Expression, right: Expression) extends BinaryComparison {
   def symbol = "<"
   override def eval(input: Row): Any = c2(input, left, right, _.lt(_, _))
@@ -178,7 +214,7 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
 
   def children = predicate :: trueValue :: falseValue :: Nil
   override def nullable = trueValue.nullable || falseValue.nullable
-  def references = children.flatMap(_.references).toSet
+
   override lazy val resolved = childrenResolved && trueValue.dataType == falseValue.dataType
   def dataType = {
     if (!resolved) {
@@ -220,7 +256,7 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
 case class CaseWhen(branches: Seq[Expression]) extends Expression {
   type EvaluatedType = Any
   def children = branches
-  def references = children.flatMap(_.references).toSet
+
   def dataType = {
     if (!resolved) {
       throw new UnresolvedException(this, "cannot resolve due to differing types in some branches")
@@ -246,12 +282,13 @@ case class CaseWhen(branches: Seq[Expression]) extends Expression {
       false
     } else {
       val allCondBooleans = predicates.forall(_.dataType == BooleanType)
-      val dataTypesEqual = values.map(_.dataType).distinct.size <= 1
+      // both then and else val should be considered.
+      val dataTypesEqual = (values ++ elseValue).map(_.dataType).distinct.size <= 1
       allCondBooleans && dataTypesEqual
     }
   }
 
-  /** Written in imperative fashion for performance considerations.  Same for CaseKeyWhen. */
+  /** Written in imperative fashion for performance considerations. */
   override def eval(input: Row): Any = {
     val len = branchesArr.length
     var i = 0
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/sets.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/sets.scala
new file mode 100644
index 0000000000000..3d4c4a8853c12
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/sets.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.util.collection.OpenHashSet
+
+/**
+ * Creates a new set of the specified type
+ */
+case class NewSet(elementType: DataType) extends LeafExpression {
+  type EvaluatedType = Any
+
+  def nullable = false
+
+  // We are currently only using these Expressions internally for aggregation.  However, if we ever
+  // expose these to users we'll want to create a proper type instead of hijacking ArrayType.
+  def dataType = ArrayType(elementType)
+
+  def eval(input: Row): Any = {
+    new OpenHashSet[Any]()
+  }
+
+  override def toString = s"new Set($dataType)"
+}
+
+/**
+ * Adds an item to a set.
+ * For performance, this expression mutates its input during evaluation.
+ */
+case class AddItemToSet(item: Expression, set: Expression) extends Expression {
+  type EvaluatedType = Any
+
+  def children = item :: set :: Nil
+
+  def nullable = set.nullable
+
+  def dataType = set.dataType
+  def eval(input: Row): Any = {
+    val itemEval = item.eval(input)
+    val setEval = set.eval(input).asInstanceOf[OpenHashSet[Any]]
+
+    if (itemEval != null) {
+      if (setEval != null) {
+        setEval.add(itemEval)
+        setEval
+      } else {
+        null
+      }
+    } else {
+      setEval
+    }
+  }
+
+  override def toString = s"$set += $item"
+}
+
+/**
+ * Combines the elements of two sets.
+ * For performance, this expression mutates its left input set during evaluation.
+ */
+case class CombineSets(left: Expression, right: Expression) extends BinaryExpression {
+  type EvaluatedType = Any
+
+  def nullable = left.nullable || right.nullable
+
+  def dataType = left.dataType
+
+  def symbol = "++="
+
+  def eval(input: Row): Any = {
+    val leftEval = left.eval(input).asInstanceOf[OpenHashSet[Any]]
+    if(leftEval != null) {
+      val rightEval = right.eval(input).asInstanceOf[OpenHashSet[Any]]
+      if (rightEval != null) {
+        val iterator = rightEval.iterator
+        while(iterator.hasNext) {
+          val rightValue = iterator.next()
+          leftEval.add(rightValue)
+        }
+        leftEval
+      } else {
+        null
+      }
+    } else {
+      null
+    }
+  }
+}
+
+/**
+ * Returns the number of elements in the input set.
+ */
+case class CountSet(child: Expression) extends UnaryExpression {
+  type EvaluatedType = Any
+
+  def nullable = child.nullable
+
+  def dataType = LongType
+
+  def eval(input: Row): Any = {
+    val childEval = child.eval(input).asInstanceOf[OpenHashSet[Any]]
+    if (childEval != null) {
+      childEval.size.toLong
+    }
+  }
+
+  override def toString = s"$child.count()"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index c074b7bb01e57..f6349767764a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -19,11 +19,12 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.util.regex.Pattern
 
-import org.apache.spark.sql.catalyst.types.DataType
-import org.apache.spark.sql.catalyst.types.StringType
-import org.apache.spark.sql.catalyst.types.BooleanType
+import scala.collection.IndexedSeqOptimized
 
 
+import org.apache.spark.sql.catalyst.analysis.UnresolvedException
+import org.apache.spark.sql.catalyst.types.{BinaryType, BooleanType, DataType, StringType}
+
 trait StringRegexExpression {
   self: BinaryExpression =>
 
@@ -32,7 +33,7 @@ trait StringRegexExpression {
   def escape(v: String): String
   def matches(regex: Pattern, str: String): Boolean
 
-  def nullable: Boolean = true
+  def nullable: Boolean = left.nullable || right.nullable
   def dataType: DataType = BooleanType
 
   // try cache the pattern for Literal
@@ -101,31 +102,27 @@ case class Like(left: Expression, right: Expression)
 
   // replace the _ with .{1} exactly match 1 time of any character
   // replace the % with .*, match 0 or more times with any character
-  override def escape(v: String) = {
-    val sb = new StringBuilder()
-    var i = 0;
-    while (i < v.length) {
-      // Make a special case for "\\_" and "\\%"
-      val n = v.charAt(i);
-      if (n == '\\' && i + 1 < v.length && (v.charAt(i + 1) == '_' || v.charAt(i + 1) == '%')) {
-        sb.append(v.charAt(i + 1))
-        i += 1
-      } else {
-        if (n == '_') {
-          sb.append(".");
-        } else if (n == '%') {
-          sb.append(".*");
-        } else {
-          sb.append(Pattern.quote(Character.toString(n)));
-        }
-      }
-
-      i += 1
+  override def escape(v: String) =
+    if (!v.isEmpty) {
+      "(?s)" + (' ' +: v.init).zip(v).flatMap {
+        case (prev, '\\') => ""
+        case ('\\', c) =>
+          c match {
+            case '_' => "_"
+            case '%' => "%"
+            case _ => Pattern.quote("\\" + c)
+          }
+        case (prev, c) =>
+          c match {
+            case '_' => "."
+            case '%' => ".*"
+            case _ => Pattern.quote(Character.toString(c))
+          }
+      }.mkString
+    } else {
+      v
     }
 
-    sb.toString()
-  }
-
   override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches()
 }
 
@@ -156,3 +153,122 @@ case class Lower(child: Expression) extends UnaryExpression with CaseConversionE
 
   override def toString() = s"Lower($child)"
 }
+
+/** A base trait for functions that compare two strings, returning a boolean. */
+trait StringComparison {
+  self: BinaryExpression =>
+
+  type EvaluatedType = Any
+
+  def nullable: Boolean = left.nullable || right.nullable
+  override def dataType: DataType = BooleanType
+
+  def compare(l: String, r: String): Boolean
+
+  override def eval(input: Row): Any = {
+    val leftEval = left.eval(input).asInstanceOf[String]
+    if(leftEval == null) {
+      null
+    } else {
+      val rightEval = right.eval(input).asInstanceOf[String]
+      if (rightEval == null) null else compare(leftEval, rightEval)
+    }
+  }
+
+  def symbol: String = nodeName
+
+  override def toString() = s"$nodeName($left, $right)"
+}
+
+/**
+ * A function that returns true if the string `left` contains the string `right`.
+ */
+case class Contains(left: Expression, right: Expression)
+    extends BinaryExpression with StringComparison {
+  override def compare(l: String, r: String) = l.contains(r)
+}
+
+/**
+ * A function that returns true if the string `left` starts with the string `right`.
+ */
+case class StartsWith(left: Expression, right: Expression)
+    extends BinaryExpression with StringComparison {
+  def compare(l: String, r: String) = l.startsWith(r)
+}
+
+/**
+ * A function that returns true if the string `left` ends with the string `right`.
+ */
+case class EndsWith(left: Expression, right: Expression)
+    extends BinaryExpression with StringComparison {
+  def compare(l: String, r: String) = l.endsWith(r)
+}
+
+/**
+ * A function that takes a substring of its first argument starting at a given position.
+ * Defined for String and Binary types.
+ */
+case class Substring(str: Expression, pos: Expression, len: Expression) extends Expression {
+  
+  type EvaluatedType = Any
+
+  override def foldable = str.foldable && pos.foldable && len.foldable
+
+  def nullable: Boolean = str.nullable || pos.nullable || len.nullable
+  def dataType: DataType = {
+    if (!resolved) {
+      throw new UnresolvedException(this, s"Cannot resolve since $children are not resolved")
+    }
+    if (str.dataType == BinaryType) str.dataType else StringType
+  }
+
+  override def children = str :: pos :: len :: Nil
+
+  @inline
+  def slice[T, C <: Any](str: C, startPos: Int, sliceLen: Int)
+      (implicit ev: (C=>IndexedSeqOptimized[T,_])): Any = {
+    val len = str.length
+    // Hive and SQL use one-based indexing for SUBSTR arguments but also accept zero and
+    // negative indices for start positions. If a start index i is greater than 0, it 
+    // refers to element i-1 in the sequence. If a start index i is less than 0, it refers
+    // to the -ith element before the end of the sequence. If a start index i is 0, it
+    // refers to the first element.
+
+    val start = startPos match {
+      case pos if pos > 0 => pos - 1
+      case neg if neg < 0 => len + neg
+      case _ => 0
+    }
+
+    val end = sliceLen match {
+      case max if max == Integer.MAX_VALUE => max
+      case x => start + x
+    }
+
+    str.slice(start, end)    
+  }
+
+  override def eval(input: Row): Any = {
+    val string = str.eval(input)
+
+    val po = pos.eval(input)
+    val ln = len.eval(input)
+
+    if ((string == null) || (po == null) || (ln == null)) {
+      null
+    } else {
+      val start = po.asInstanceOf[Int]
+      val length = ln.asInstanceOf[Int] 
+
+      string match {
+        case ba: Array[Byte] => slice(ba, start, length)
+        case other => slice(other.toString, start, length)
+      }
+    }
+  }
+
+  override def toString = len match {
+    case max if max == Integer.MAX_VALUE => s"SUBSTR($str, $pos)"
+    case _ => s"SUBSTR($str, $pos, $len)"
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 48ca31e70fe7c..f164a6c68a0de 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import scala.collection.immutable.HashSet
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.Inner
 import org.apache.spark.sql.catalyst.plans.FullOuter
@@ -26,25 +27,80 @@ import org.apache.spark.sql.catalyst.plans.LeftSemi
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
 
-object Optimizer extends RuleExecutor[LogicalPlan] {
+abstract class Optimizer extends RuleExecutor[LogicalPlan]
+
+object DefaultOptimizer extends Optimizer {
   val batches =
     Batch("Combine Limits", FixedPoint(100),
       CombineLimits) ::
     Batch("ConstantFolding", FixedPoint(100),
       NullPropagation,
       ConstantFolding,
+      LikeSimplification,
       BooleanSimplification,
       SimplifyFilters,
       SimplifyCasts,
-      SimplifyCaseConversionExpressions) ::
+      SimplifyCaseConversionExpressions,
+      OptimizeIn) ::
+    Batch("Decimal Optimizations", FixedPoint(100),
+      DecimalAggregates) ::
     Batch("Filter Pushdown", FixedPoint(100),
+      UnionPushdown,
       CombineFilters,
       PushPredicateThroughProject,
       PushPredicateThroughJoin,
       ColumnPruning) :: Nil
 }
 
+/**
+  *  Pushes operations to either side of a Union.
+  */
+object UnionPushdown extends Rule[LogicalPlan] {
+
+  /**
+    *  Maps Attributes from the left side to the corresponding Attribute on the right side.
+    */
+  def buildRewrites(union: Union): AttributeMap[Attribute] = {
+    assert(union.left.output.size == union.right.output.size)
+
+    AttributeMap(union.left.output.zip(union.right.output))
+  }
+
+  /**
+    *  Rewrites an expression so that it can be pushed to the right side of a Union operator.
+    *  This method relies on the fact that the output attributes of a union are always equal
+    *  to the left child's output.
+    */
+  def pushToRight[A <: Expression](e: A, rewrites: AttributeMap[Attribute]): A = {
+    val result = e transform {
+      case a: Attribute => rewrites(a)
+    }
+
+    // We must promise the compiler that we did not discard the names in the case of project
+    // expressions.  This is safe since the only transformation is from Attribute => Attribute.
+    result.asInstanceOf[A]
+  }
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    // Push down filter into union
+    case Filter(condition, u @ Union(left, right)) =>
+      val rewrites = buildRewrites(u)
+      Union(
+        Filter(condition, left),
+        Filter(pushToRight(condition, rewrites), right))
+
+    // Push down projection into union
+    case Project(projectList, u @ Union(left, right)) =>
+      val rewrites = buildRewrites(u)
+      Union(
+        Project(projectList, left),
+        Project(projectList.map(pushToRight(_, rewrites)), right))
+  }
+}
+
+
 /**
  * Attempts to eliminate the reading of unneeded columns from the query plan using the following
  * transformations:
@@ -64,8 +120,10 @@ object ColumnPruning extends Rule[LogicalPlan] {
     // Eliminate unneeded attributes from either side of a Join.
     case Project(projectList, Join(left, right, joinType, condition)) =>
       // Collect the list of all references required either above or to evaluate the condition.
-      val allReferences: Set[Attribute] =
-        projectList.flatMap(_.references).toSet ++ condition.map(_.references).getOrElse(Set.empty)
+      val allReferences: AttributeSet =
+        AttributeSet(
+          projectList.flatMap(_.references.iterator)) ++
+          condition.map(_.references).getOrElse(AttributeSet(Seq.empty))
 
       /** Applies a projection only when the child is producing unnecessary attributes */
       def pruneJoinChild(c: LogicalPlan) = prunedChild(c, allReferences)
@@ -75,8 +133,8 @@ object ColumnPruning extends Rule[LogicalPlan] {
     // Eliminate unneeded attributes from right side of a LeftSemiJoin.
     case Join(left, right, LeftSemi, condition) =>
       // Collect the list of all references required to evaluate the condition.
-      val allReferences: Set[Attribute] =
-        condition.map(_.references).getOrElse(Set.empty)
+      val allReferences: AttributeSet =
+        condition.map(_.references).getOrElse(AttributeSet(Seq.empty))
 
       Join(left, prunedChild(right, allReferences), LeftSemi, condition)
 
@@ -103,7 +161,7 @@ object ColumnPruning extends Rule[LogicalPlan] {
   }
 
   /** Applies a projection only when the child is producing unnecessary attributes */
-  private def prunedChild(c: LogicalPlan, allReferences: Set[Attribute]) =
+  private def prunedChild(c: LogicalPlan, allReferences: AttributeSet) =
     if ((c.outputSet -- allReferences.filter(c.outputSet.contains)).nonEmpty) {
       Project(allReferences.filter(c.outputSet.contains).toSeq, c)
     } else {
@@ -111,6 +169,31 @@ object ColumnPruning extends Rule[LogicalPlan] {
     }
 }
 
+/**
+ * Simplifies LIKE expressions that do not need full regular expressions to evaluate the condition.
+ * For example, when the expression is just checking to see if a string starts with a given
+ * pattern.
+ */
+object LikeSimplification extends Rule[LogicalPlan] {
+  // if guards below protect from escapes on trailing %.
+  // Cases like "something\%" are not optimized, but this does not affect correctness.
+  val startsWith = "([^_%]+)%".r
+  val endsWith = "%([^_%]+)".r
+  val contains = "%([^_%]+)%".r
+  val equalTo = "([^_%]*)".r
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+    case Like(l, Literal(startsWith(pattern), StringType)) if !pattern.endsWith("\\") =>
+      StartsWith(l, Literal(pattern))
+    case Like(l, Literal(endsWith(pattern), StringType)) =>
+      EndsWith(l, Literal(pattern))
+    case Like(l, Literal(contains(pattern), StringType)) if !pattern.endsWith("\\") =>
+      Contains(l, Literal(pattern))
+    case Like(l, Literal(equalTo(pattern), StringType)) =>
+      EqualTo(l, Literal(pattern))
+  }
+}
+
 /**
  * Replaces [[Expression Expressions]] that can be statically evaluated with
  * equivalent [[Literal]] values. This rule is more specific with
@@ -127,11 +210,15 @@ object NullPropagation extends Rule[LogicalPlan] {
       case e @ GetItem(Literal(null, _), _) => Literal(null, e.dataType)
       case e @ GetItem(_, Literal(null, _)) => Literal(null, e.dataType)
       case e @ GetField(Literal(null, _), _) => Literal(null, e.dataType)
-      case e @ Coalesce(children) => {
-        val newChildren = children.filter(c => c match {
+      case e @ EqualNullSafe(Literal(null, _), r) => IsNull(r)
+      case e @ EqualNullSafe(l, Literal(null, _)) => IsNull(l)
+
+      // For Coalesce, remove null literals.
+      case e @ Coalesce(children) =>
+        val newChildren = children.filter {
           case Literal(null, _) => false
           case _ => true
-        })
+        }
         if (newChildren.length == 0) {
           Literal(null, e.dataType)
         } else if (newChildren.length == 1) {
@@ -139,12 +226,11 @@ object NullPropagation extends Rule[LogicalPlan] {
         } else {
           Coalesce(newChildren)
         }
-      }
-      case e @ If(Literal(v, _), trueValue, falseValue) => if (v == true) trueValue else falseValue
-      case e @ In(Literal(v, _), list) if (list.exists(c => c match {
-          case Literal(candidate, _) if candidate == v => true
-          case _ => false
-        })) => Literal(true, BooleanType)
+
+      case e @ Substring(Literal(null, _), _, _) => Literal(null, e.dataType)
+      case e @ Substring(_, Literal(null, _), _) => Literal(null, e.dataType)
+      case e @ Substring(_, _, Literal(null, _)) => Literal(null, e.dataType)
+
       // Put exceptional cases above if any
       case e: BinaryArithmetic => e.children match {
         case Literal(null, _) :: right :: Nil => Literal(null, e.dataType)
@@ -161,6 +247,11 @@ object NullPropagation extends Rule[LogicalPlan] {
         case left :: Literal(null, _) :: Nil => Literal(null, e.dataType)
         case _ => e
       }
+      case e: StringComparison => e.children match {
+        case Literal(null, _) :: right :: Nil => Literal(null, e.dataType)
+        case left :: Literal(null, _) :: Nil => Literal(null, e.dataType)
+        case _ => e
+      }
     }
   }
 }
@@ -172,9 +263,33 @@ object NullPropagation extends Rule[LogicalPlan] {
 object ConstantFolding extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsDown {
-      // Skip redundant folding of literals.
+      // Skip redundant folding of literals. This rule is technically not necessary. Placing this
+      // here avoids running the next rule for Literal values, which would create a new Literal
+      // object and running eval unnecessarily.
       case l: Literal => l
+
+      // Fold expressions that are foldable.
       case e if e.foldable => Literal(e.eval(null), e.dataType)
+
+      // Fold "literal in (item1, item2, ..., literal, ...)" into true directly.
+      case In(Literal(v, _), list) if list.exists {
+          case Literal(candidate, _) if candidate == v => true
+          case _ => false
+        } => Literal(true, BooleanType)
+    }
+  }
+}
+
+/**
+ * Replaces [[In (value, seq[Literal])]] with optimized version[[InSet (value, HashSet[Literal])]]
+ * which is much faster
+ */
+object OptimizeIn extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case q: LogicalPlan => q transformExpressionsDown {
+      case In(v, list) if !list.exists(!_.isInstanceOf[Literal]) =>
+          val hSet = list.map(e => e.eval(null))
+          InSet(v, HashSet() ++ hSet)
     }
   }
 }
@@ -204,6 +319,21 @@ object BooleanSimplification extends Rule[LogicalPlan] {
           case (l, Literal(false, BooleanType)) => l
           case (_, _) => or
         }
+
+      case not @ Not(exp) =>
+        exp match {
+          case Literal(true, BooleanType) => Literal(false)
+          case Literal(false, BooleanType) => Literal(true)
+          case GreaterThan(l, r) => LessThanOrEqual(l, r)
+          case GreaterThanOrEqual(l, r) => LessThan(l, r)
+          case LessThan(l, r) => GreaterThanOrEqual(l, r)
+          case LessThanOrEqual(l, r) => GreaterThan(l, r)
+          case Not(e) => e
+          case _ => not
+        }
+
+      // Turn "if (true) a else b" into "a", and if (false) a else b" into "b".
+      case e @ If(Literal(v, _), trueValue, falseValue) => if (v == true) trueValue else falseValue
     }
   }
 }
@@ -225,12 +355,12 @@ object CombineFilters extends Rule[LogicalPlan] {
  */
 object SimplifyFilters extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case Filter(Literal(true, BooleanType), child) =>
-      child
-    case Filter(Literal(null, _), child) =>
-      LocalRelation(child.output)
-    case Filter(Literal(false, BooleanType), child) =>
-      LocalRelation(child.output)
+    // If the filter condition always evaluate to true, remove the filter.
+    case Filter(Literal(true, BooleanType), child) => child
+    // If the filter condition always evaluate to null or false,
+    // replace the input with an empty relation.
+    case Filter(Literal(null, _), child) => LocalRelation(child.output, data = Seq.empty)
+    case Filter(Literal(false, BooleanType), child) => LocalRelation(child.output, data = Seq.empty)
   }
 }
 
@@ -263,21 +393,21 @@ object PushPredicateThroughProject extends Rule[LogicalPlan] {
  * evaluated using only the attributes of the left or right side of a join.  Other
  * [[Filter]] conditions are moved into the `condition` of the [[Join]].
  *
- * And also Pushes down the join filter, where the `condition` can be evaluated using only the 
- * attributes of the left or right side of sub query when applicable. 
- * 
+ * And also Pushes down the join filter, where the `condition` can be evaluated using only the
+ * attributes of the left or right side of sub query when applicable.
+ *
  * Check https://cwiki.apache.org/confluence/display/Hive/OuterJoinBehavior for more details
  */
 object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
   /**
    * Splits join condition expressions into three categories based on the attributes required
    * to evaluate them.
-   * @returns (canEvaluateInLeft, canEvaluateInRight, haveToEvaluateInBoth)
+   * @return (canEvaluateInLeft, canEvaluateInRight, haveToEvaluateInBoth)
    */
   private def split(condition: Seq[Expression], left: LogicalPlan, right: LogicalPlan) = {
     val (leftEvaluateCondition, rest) =
         condition.partition(_.references subsetOf left.outputSet)
-    val (rightEvaluateCondition, commonCondition) = 
+    val (rightEvaluateCondition, commonCondition) =
         rest.partition(_.references subsetOf right.outputSet)
 
     (leftEvaluateCondition, rightEvaluateCondition, commonCondition)
@@ -286,7 +416,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     // push the where condition down into join filter
     case f @ Filter(filterCondition, Join(left, right, joinType, joinCondition)) =>
-      val (leftFilterConditions, rightFilterConditions, commonFilterCondition) = 
+      val (leftFilterConditions, rightFilterConditions, commonFilterCondition) =
         split(splitConjunctivePredicates(filterCondition), left, right)
 
       joinType match {
@@ -324,7 +454,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
 
     // push down the join filter into sub query scanning if applicable
     case f @ Join(left, right, joinType, joinCondition) =>
-      val (leftJoinConditions, rightJoinConditions, commonJoinCondition) = 
+      val (leftJoinConditions, rightJoinConditions, commonJoinCondition) =
         split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right)
 
       joinType match {
@@ -392,3 +522,26 @@ object SimplifyCaseConversionExpressions extends Rule[LogicalPlan] {
     }
   }
 }
+
+/**
+ * Speeds up aggregates on fixed-precision decimals by executing them on unscaled Long values.
+ *
+ * This uses the same rules for increasing the precision and scale of the output as
+ * [[org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion.DecimalPrecision]].
+ */
+object DecimalAggregates extends Rule[LogicalPlan] {
+  import Decimal.MAX_LONG_DIGITS
+
+  /** Maximum number of decimal digits representable precisely in a Double */
+  val MAX_DOUBLE_DIGITS = 15
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+    case Sum(e @ DecimalType.Expression(prec, scale)) if prec + 10 <= MAX_LONG_DIGITS =>
+      MakeDecimal(Sum(UnscaledValue(e)), prec + 10, scale)
+
+    case Average(e @ DecimalType.Expression(prec, scale)) if prec + 4 <= MAX_DOUBLE_DIGITS =>
+      Cast(
+        Divide(Average(UnscaledValue(e)), Literal(math.pow(10.0, scale), DoubleType)),
+        DecimalType(prec + 4, scale + 4))
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala
new file mode 100644
index 0000000000000..a38079ced34b2
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+/**
+ * Catalyst is a library for manipulating relational query plans.  All classes in catalyst are
+ * considered an internal API to Spark SQL and are subject to change between minor releases.
+ */
+package object catalyst {
+  /**
+   * A JVM-global lock that should be used to prevent thread safety issues when using things in
+   * scala.reflect.*.  Note that Scala Reflection API is made thread-safe in 2.11, but not yet for
+   * 2.10.* builds.  See SI-6240 for more details.
+   */
+  protected[catalyst] object ScalaReflectionLock
+
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
index 67833664b35ae..51b5699affed5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
@@ -17,10 +17,19 @@
 
 package org.apache.spark.sql.catalyst.planning
 
-import org.apache.spark.sql.Logging
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.TreeNode
 
+/**
+ * Given a [[plans.logical.LogicalPlan LogicalPlan]], returns a list of `PhysicalPlan`s that can
+ * be used for execution. If this strategy does not apply to the give logical operation then an
+ * empty list should be returned.
+ */
+abstract class GenericStrategy[PhysicalPlan <: TreeNode[PhysicalPlan]] extends Logging {
+  def apply(plan: LogicalPlan): Seq[PhysicalPlan]
+}
+
 /**
  * Abstract class for transforming [[plans.logical.LogicalPlan LogicalPlan]]s into physical plans.
  * Child classes are responsible for specifying a list of [[Strategy]] objects that each of which
@@ -35,16 +44,7 @@ import org.apache.spark.sql.catalyst.trees.TreeNode
  */
 abstract class QueryPlanner[PhysicalPlan <: TreeNode[PhysicalPlan]] {
   /** A list of execution strategies that can be used by the planner */
-  def strategies: Seq[Strategy]
-
-  /**
-   * Given a [[plans.logical.LogicalPlan LogicalPlan]], returns a list of `PhysicalPlan`s that can
-   * be used for execution. If this strategy does not apply to the give logical operation then an
-   * empty list should be returned.
-   */
-  abstract protected class Strategy extends Logging {
-    def apply(plan: LogicalPlan): Seq[PhysicalPlan]
-  }
+  def strategies: Seq[GenericStrategy[PhysicalPlan]]
 
   /**
    * Returns a placeholder for a physical plan that executes `plan`. This placeholder will be
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index 026692abe067d..310d127506d68 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.catalyst.planning
 
 import scala.annotation.tailrec
 
-import org.apache.spark.sql.Logging
-
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -104,6 +104,84 @@ object PhysicalOperation extends PredicateHelper {
   }
 }
 
+/**
+ * Matches a logical aggregation that can be performed on distributed data in two steps.  The first
+ * operates on the data in each partition performing partial aggregation for each group.  The second
+ * occurs after the shuffle and completes the aggregation.
+ *
+ * This pattern will only match if all aggregate expressions can be computed partially and will
+ * return the rewritten aggregation expressions for both phases.
+ *
+ * The returned values for this match are as follows:
+ *  - Grouping attributes for the final aggregation.
+ *  - Aggregates for the final aggregation.
+ *  - Grouping expressions for the partial aggregation.
+ *  - Partial aggregate expressions.
+ *  - Input to the aggregation.
+ */
+object PartialAggregation {
+  type ReturnType =
+    (Seq[Attribute], Seq[NamedExpression], Seq[Expression], Seq[NamedExpression], LogicalPlan)
+
+  def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
+    case logical.Aggregate(groupingExpressions, aggregateExpressions, child) =>
+      // Collect all aggregate expressions.
+      val allAggregates =
+        aggregateExpressions.flatMap(_ collect { case a: AggregateExpression => a})
+      // Collect all aggregate expressions that can be computed partially.
+      val partialAggregates =
+        aggregateExpressions.flatMap(_ collect { case p: PartialAggregate => p})
+
+      // Only do partial aggregation if supported by all aggregate expressions.
+      if (allAggregates.size == partialAggregates.size) {
+        // Create a map of expressions to their partial evaluations for all aggregate expressions.
+        val partialEvaluations: Map[TreeNodeRef, SplitEvaluation] =
+          partialAggregates.map(a => (new TreeNodeRef(a), a.asPartial)).toMap
+
+        // We need to pass all grouping expressions though so the grouping can happen a second
+        // time. However some of them might be unnamed so we alias them allowing them to be
+        // referenced in the second aggregation.
+        val namedGroupingExpressions: Map[Expression, NamedExpression] = groupingExpressions.map {
+          case n: NamedExpression => (n, n)
+          case other => (other, Alias(other, "PartialGroup")())
+        }.toMap
+
+        // Replace aggregations with a new expression that computes the result from the already
+        // computed partial evaluations and grouping values.
+        val rewrittenAggregateExpressions = aggregateExpressions.map(_.transformUp {
+          case e: Expression if partialEvaluations.contains(new TreeNodeRef(e)) =>
+            partialEvaluations(new TreeNodeRef(e)).finalEvaluation
+
+          case e: Expression =>
+            // Should trim aliases around `GetField`s. These aliases are introduced while
+            // resolving struct field accesses, because `GetField` is not a `NamedExpression`.
+            // (Should we just turn `GetField` into a `NamedExpression`?)
+            namedGroupingExpressions
+              .get(e.transform { case Alias(g: GetField, _) => g })
+              .map(_.toAttribute)
+              .getOrElse(e)
+        }).asInstanceOf[Seq[NamedExpression]]
+
+        val partialComputation =
+          (namedGroupingExpressions.values ++
+            partialEvaluations.values.flatMap(_.partialEvaluations)).toSeq
+
+        val namedGroupingAttributes = namedGroupingExpressions.values.map(_.toAttribute).toSeq
+
+        Some(
+          (namedGroupingAttributes,
+           rewrittenAggregateExpressions,
+           groupingExpressions,
+           partialComputation,
+           child))
+      } else {
+        None
+      }
+    case _ => None
+  }
+}
+
+
 /**
  * A pattern that finds joins with equality conditions that can be evaluated using equi-join.
  */
@@ -114,10 +192,10 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
 
   def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
     case join @ Join(left, right, joinType, condition) =>
-      logger.debug(s"Considering join on: $condition")
+      logDebug(s"Considering join on: $condition")
       // Find equi-join predicates that can be evaluated before the join, and thus can be used
       // as join keys.
-      val (joinPredicates, otherPredicates) = 
+      val (joinPredicates, otherPredicates) =
         condition.map(splitConjunctivePredicates).getOrElse(Nil).partition {
           case EqualTo(l, r) if (canEvaluate(l, left) && canEvaluate(r, right)) ||
             (canEvaluate(l, right) && canEvaluate(r, left)) => true
@@ -132,7 +210,7 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
       val rightKeys = joinKeys.map(_._2)
 
       if (joinKeys.nonEmpty) {
-        logger.debug(s"leftKeys:${leftKeys} | rightKeys:${rightKeys}")
+        logDebug(s"leftKeys:$leftKeys | rightKeys:$rightKeys")
         Some((joinType, leftKeys, rightKeys, otherPredicates.reduceOption(And), left, right))
       } else {
         None
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 7b82e19b2e714..dcbbb62c0aca4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression}
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.types.{ArrayType, DataType, StructField, StructType}
 
@@ -29,7 +29,26 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy
   /**
    * Returns the set of attributes that are output by this node.
    */
-  def outputSet: Set[Attribute] = output.toSet
+  def outputSet: AttributeSet = AttributeSet(output)
+
+  /**
+   * All Attributes that appear in expressions from this operator.  Note that this set does not
+   * include attributes that are implicitly referenced by being passed through to the output tuple.
+   */
+  def references: AttributeSet = AttributeSet(expressions.flatMap(_.references))
+
+  /**
+   * The set of all attributes that are input to this operator by its children.
+   */
+  def inputSet: AttributeSet =
+    AttributeSet(children.flatMap(_.asInstanceOf[QueryPlan[PlanType]].output))
+
+  /**
+   * Attributes that are referenced by expressions but not provided by this nodes children.
+   * Subclasses should override this method if they produce attributes internally as it is used by
+   * assertions designed to prevent the construction of invalid plans.
+   */
+  def missingInput: AttributeSet = references -- inputSet
 
   /**
    * Runs [[transform]] with `rule` on all expressions present in this query operator.
@@ -50,11 +69,11 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy
 
     @inline def transformExpressionDown(e: Expression) = {
       val newE = e.transformDown(rule)
-      if (newE.id != e.id && newE != e) {
+      if (newE.fastEquals(e)) {
+        e
+      } else {
         changed = true
         newE
-      } else {
-        e
       }
     }
 
@@ -82,11 +101,11 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy
 
     @inline def transformExpressionUp(e: Expression) = {
       val newE = e.transformUp(rule)
-      if (newE.id != e.id && newE != e) {
+      if (newE.fastEquals(e)) {
+        e
+      } else {
         changed = true
         newE
-      } else {
-        e
       }
     }
 
@@ -125,52 +144,15 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy
     }.toSeq
   }
 
-  protected def generateSchemaString(schema: Seq[Attribute]): String = {
-    val builder = new StringBuilder
-    builder.append("root\n")
-    val prefix = " |"
-    schema.foreach { attribute =>
-      val name = attribute.name
-      val dataType = attribute.dataType
-      dataType match {
-        case fields: StructType =>
-          builder.append(s"$prefix-- $name: $StructType\n")
-          generateSchemaString(fields, s"$prefix    |", builder)
-        case ArrayType(fields: StructType) =>
-          builder.append(s"$prefix-- $name: $ArrayType[$StructType]\n")
-          generateSchemaString(fields, s"$prefix    |", builder)
-        case ArrayType(elementType: DataType) =>
-          builder.append(s"$prefix-- $name: $ArrayType[$elementType]\n")
-        case _ => builder.append(s"$prefix-- $name: $dataType\n")
-      }
-    }
-
-    builder.toString()
-  }
-
-  protected def generateSchemaString(
-      schema: StructType,
-      prefix: String,
-      builder: StringBuilder): StringBuilder = {
-    schema.fields.foreach {
-      case StructField(name, fields: StructType, _) =>
-        builder.append(s"$prefix-- $name: $StructType\n")
-        generateSchemaString(fields, s"$prefix    |", builder)
-      case StructField(name, ArrayType(fields: StructType), _) =>
-        builder.append(s"$prefix-- $name: $ArrayType[$StructType]\n")
-        generateSchemaString(fields, s"$prefix    |", builder)
-      case StructField(name, ArrayType(elementType: DataType), _) =>
-        builder.append(s"$prefix-- $name: $ArrayType[$elementType]\n")
-      case StructField(name, fieldType: DataType, _) =>
-        builder.append(s"$prefix-- $name: $fieldType\n")
-    }
-
-    builder
-  }
+  def schema: StructType = StructType.fromAttributes(output)
 
   /** Returns the output schema in the tree format. */
-  def schemaString: String = generateSchemaString(output)
+  def schemaString: String = schema.treeString
 
   /** Prints out the schema in the tree format */
   def printSchema(): Unit = println(schemaString)
+
+  protected def statePrefix = if (missingInput.nonEmpty && children.nonEmpty) "!" else ""
+
+  override def simpleString = statePrefix + super.simpleString
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/BaseRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/BaseRelation.scala
deleted file mode 100644
index 582334aa42590..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/BaseRelation.scala
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-abstract class BaseRelation extends LeafNode {
-  self: Product =>
-
-  def tableName: String
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index edc37e3877c0e..ed578e081be73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -17,73 +17,192 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.types.StructType
 import org.apache.spark.sql.catalyst.trees
 
-abstract class LogicalPlan extends QueryPlan[LogicalPlan] {
+/**
+ * Estimates of various statistics.  The default estimation logic simply lazily multiplies the
+ * corresponding statistic produced by the children.  To override this behavior, override
+ * `statistics` and assign it an overriden version of `Statistics`.
+ *
+ * '''NOTE''': concrete and/or overriden versions of statistics fields should pay attention to the
+ * performance of the implementations.  The reason is that estimations might get triggered in
+ * performance-critical processes, such as query plan planning.
+ *
+ * @param sizeInBytes Physical size in bytes. For leaf operators this defaults to 1, otherwise it
+ *                    defaults to the product of children's `sizeInBytes`.
+ */
+private[sql] case class Statistics(sizeInBytes: BigInt)
+
+abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
   self: Product =>
 
-  /**
-   * Returns the set of attributes that are referenced by this node
-   * during evaluation.
-   */
-  def references: Set[Attribute]
+  def statistics: Statistics = {
+    if (children.size == 0) {
+      throw new UnsupportedOperationException(s"LeafNode $nodeName must implement statistics.")
+    }
 
-  /**
-   * Returns the set of attributes that this node takes as
-   * input from its children.
-   */
-  lazy val inputSet: Set[Attribute] = children.flatMap(_.output).toSet
+    Statistics(
+      sizeInBytes = children.map(_.statistics).map(_.sizeInBytes).product)
+  }
 
   /**
    * Returns true if this expression and all its children have been resolved to a specific schema
-   * and false if it is still contains any unresolved placeholders. Implementations of LogicalPlan
+   * and false if it still contains any unresolved placeholders. Implementations of LogicalPlan
    * can override this (e.g.
    * [[org.apache.spark.sql.catalyst.analysis.UnresolvedRelation UnresolvedRelation]]
    * should return `false`).
    */
   lazy val resolved: Boolean = !expressions.exists(!_.resolved) && childrenResolved
 
+  override protected def statePrefix = if (!resolved) "'" else super.statePrefix
+
   /**
    * Returns true if all its children of this query plan have been resolved.
    */
   def childrenResolved: Boolean = !children.exists(!_.resolved)
 
   /**
-   * Optionally resolves the given string to a [[NamedExpression]]. The attribute is expressed as
+   * Returns true when the given logical plan will return the same results as this logical plan.
+   *
+   * Since its likely undecideable to generally determine if two given plans will produce the same
+   * results, it is okay for this function to return false, even if the results are actually
+   * the same.  Such behavior will not affect correctness, only the application of performance
+   * enhancements like caching.  However, it is not acceptable to return true if the results could
+   * possibly be different.
+   *
+   * By default this function performs a modified version of equality that is tolerant of cosmetic
+   * differences like attribute naming and or expression id differences.  Logical operators that
+   * can do better should override this function.
+   */
+  def sameResult(plan: LogicalPlan): Boolean = {
+    plan.getClass == this.getClass &&
+    plan.children.size == children.size && {
+      logDebug(s"[${cleanArgs.mkString(", ")}] == [${plan.cleanArgs.mkString(", ")}]")
+      cleanArgs == plan.cleanArgs
+    } &&
+    (plan.children, children).zipped.forall(_ sameResult _)
+  }
+
+  /** Args that have cleaned such that differences in expression id should not affect equality */
+  protected lazy val cleanArgs: Seq[Any] = {
+    val input = children.flatMap(_.output)
+    productIterator.map {
+      // Children are checked using sameResult above.
+      case tn: TreeNode[_] if children contains tn => null
+      case e: Expression => BindReferences.bindReference(e, input, allowFailures = true)
+      case s: Option[_] => s.map {
+        case e: Expression => BindReferences.bindReference(e, input, allowFailures = true)
+        case other => other
+      }
+      case s: Seq[_] => s.map {
+        case e: Expression => BindReferences.bindReference(e, input, allowFailures = true)
+        case other => other
+      }
+      case other => other
+    }.toSeq
+  }
+
+  /**
+   * Optionally resolves the given string to a [[NamedExpression]] using the input from all child
+   * nodes of this LogicalPlan. The attribute is expressed as
    * as string in the following form: `[scope].AttributeName.[nested].[fields]...`.
    */
-  def resolve(name: String): Option[NamedExpression] = {
+  def resolveChildren(name: String, resolver: Resolver): Option[NamedExpression] =
+    resolve(name, children.flatMap(_.output), resolver)
+
+  /**
+   * Optionally resolves the given string to a [[NamedExpression]] based on the output of this
+   * LogicalPlan. The attribute is expressed as string in the following form:
+   * `[scope].AttributeName.[nested].[fields]...`.
+   */
+  def resolve(name: String, resolver: Resolver): Option[NamedExpression] =
+    resolve(name, output, resolver)
+
+  /** Performs attribute resolution given a name and a sequence of possible attributes. */
+  protected def resolve(
+      name: String,
+      input: Seq[Attribute],
+      resolver: Resolver): Option[NamedExpression] = {
+
     val parts = name.split("\\.")
+
     // Collect all attributes that are output by this nodes children where either the first part
     // matches the name or where the first part matches the scope and the second part matches the
     // name.  Return these matches along with any remaining parts, which represent dotted access to
     // struct fields.
-    val options = children.flatMap(_.output).flatMap { option =>
+    val options = input.flatMap { option =>
       // If the first part of the desired name matches a qualifier for this possible match, drop it.
       val remainingParts =
-        if (option.qualifiers.contains(parts.head) && parts.size > 1) parts.drop(1) else parts
-      if (option.name == remainingParts.head) (option, remainingParts.tail.toList) :: Nil else Nil
+        if (option.qualifiers.find(resolver(_, parts.head)).nonEmpty && parts.size > 1) {
+          parts.drop(1)
+        } else {
+          parts
+        }
+
+      if (resolver(option.name, remainingParts.head)) {
+        // Preserve the case of the user's attribute reference.
+        (option.withName(remainingParts.head), remainingParts.tail.toList) :: Nil
+      } else {
+        Nil
+      }
     }
 
     options.distinct match {
-      case (a, Nil) :: Nil => Some(a) // One match, no nested fields, use it.
+      // One match, no nested fields, use it.
+      case Seq((a, Nil)) => Some(a)
+
       // One match, but we also need to extract the requested nested field.
-      case (a, nestedFields) :: Nil =>
-        a.dataType match {
-          case StructType(fields) =>
-            Some(Alias(nestedFields.foldLeft(a: Expression)(GetField), nestedFields.last)())
-          case _ => None // Don't know how to resolve these field references
-        }
-      case Nil => None         // No matches.
+      case Seq((a, nestedFields)) =>
+        val aliased =
+          Alias(
+            resolveNesting(nestedFields, a, resolver),
+            nestedFields.last)() // Preserve the case of the user's field access.
+        Some(aliased)
+
+      // No matches.
+      case Seq() =>
+        logTrace(s"Could not find $name in ${input.mkString(", ")}")
+        None
+
+      // More than one match.
       case ambiguousReferences =>
         throw new TreeNodeException(
           this, s"Ambiguous references to $name: ${ambiguousReferences.mkString(",")}")
     }
   }
+
+  /**
+   * Given a list of successive nested field accesses, and a based expression, attempt to resolve
+   * the actual field lookups on this expression.
+   */
+  private def resolveNesting(
+      nestedFields: List[String],
+      expression: Expression,
+      resolver: Resolver): Expression = {
+
+    (nestedFields, expression.dataType) match {
+      case (Nil, _) => expression
+      case (requestedField :: rest, StructType(fields)) =>
+        val actualField = fields.filter(f => resolver(f.name, requestedField))
+        actualField match {
+          case Seq() =>
+            sys.error(
+              s"No such struct field $requestedField in ${fields.map(_.name).mkString(", ")}")
+          case Seq(singleMatch) =>
+            resolveNesting(rest, GetField(expression, singleMatch.name), resolver)
+          case multipleMatches =>
+            sys.error(s"Ambiguous reference to fields ${multipleMatches.mkString(", ")}")
+        }
+      case (_, dt) => sys.error(s"Can't access nested field in type $dt")
+    }
+  }
 }
 
 /**
@@ -91,9 +210,6 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] {
  */
 abstract class LeafNode extends LogicalPlan with trees.LeafNode[LogicalPlan] {
   self: Product =>
-
-  // Leaf nodes by definition cannot reference any input attributes.
-  override def references = Set.empty
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala
index d3f9d0fb93237..4460c86ed9026 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala
@@ -30,6 +30,4 @@ case class ScriptTransformation(
     input: Seq[Expression],
     script: String,
     output: Seq[Attribute],
-    child: LogicalPlan) extends UnaryNode {
-  def references = input.flatMap(_.references).toSet
-}
+    child: LogicalPlan) extends UnaryNode
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TestRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TestRelation.scala
index f8fe558511bfd..19769986ef58c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TestRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TestRelation.scala
@@ -41,4 +41,10 @@ case class LocalRelation(output: Seq[Attribute], data: Seq[Product] = Nil)
   }
 
   override protected def stringArgs = Iterator(output)
+
+  override def sameResult(plan: LogicalPlan): Boolean = plan match {
+    case LocalRelation(otherOutput, otherData) =>
+      otherOutput.map(_.dataType) == output.map(_.dataType) && otherData == data
+    case _ => false
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index 1537de259c5b4..00bdf108a8398 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -23,7 +23,6 @@ import org.apache.spark.sql.catalyst.types._
 
 case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extends UnaryNode {
   def output = projectList.map(_.toAttribute)
-  def references = projectList.flatMap(_.references).toSet
 }
 
 /**
@@ -59,14 +58,10 @@ case class Generate(
 
   override def output =
     if (join) child.output ++ generatorOutput else generatorOutput
-
-  override def references =
-    if (join) child.outputSet else generator.references
 }
 
 case class Filter(condition: Expression, child: LogicalPlan) extends UnaryNode {
   override def output = child.output
-  override def references = condition.references
 }
 
 case class Union(left: LogicalPlan, right: LogicalPlan) extends BinaryNode {
@@ -76,8 +71,6 @@ case class Union(left: LogicalPlan, right: LogicalPlan) extends BinaryNode {
   override lazy val resolved =
     childrenResolved &&
     !left.output.zip(right.output).exists { case (l,r) => l.dataType != r.dataType }
-
-  override def references = Set.empty
 }
 
 case class Join(
@@ -86,8 +79,6 @@ case class Join(
   joinType: JoinType,
   condition: Option[Expression]) extends BinaryNode {
 
-  override def references = condition.map(_.references).getOrElse(Set.empty)
-
   override def output = {
     joinType match {
       case LeftSemi =>
@@ -106,8 +97,6 @@ case class Join(
 
 case class Except(left: LogicalPlan, right: LogicalPlan) extends BinaryNode {
   def output = left.output
-
-  def references = Set.empty
 }
 
 case class InsertIntoTable(
@@ -116,9 +105,8 @@ case class InsertIntoTable(
     child: LogicalPlan,
     overwrite: Boolean)
   extends LogicalPlan {
-  // The table being inserted into is a child for the purposes of transformations.
-  override def children = table :: child :: Nil
-  override def references = Set.empty
+
+  override def children = child :: Nil
   override def output = child.output
 
   override lazy val resolved = childrenResolved && child.output.zip(table.output).forall {
@@ -126,24 +114,24 @@ case class InsertIntoTable(
   }
 }
 
-case class InsertIntoCreatedTable(
+case class CreateTableAsSelect[T](
     databaseName: Option[String],
     tableName: String,
-    child: LogicalPlan) extends UnaryNode {
-  override def references = Set.empty
-  override def output = child.output
+    child: LogicalPlan,
+    allowExisting: Boolean,
+    desc: Option[T] = None) extends UnaryNode {
+  override def output = Seq.empty[Attribute]
+  override lazy val resolved = (databaseName != None && childrenResolved)
 }
 
 case class WriteToFile(
     path: String,
     child: LogicalPlan) extends UnaryNode {
-  override def references = Set.empty
   override def output = child.output
 }
 
 case class Sort(order: Seq[SortOrder], child: LogicalPlan) extends UnaryNode {
   override def output = child.output
-  override def references = order.flatMap(_.references).toSet
 }
 
 case class Aggregate(
@@ -153,58 +141,35 @@ case class Aggregate(
   extends UnaryNode {
 
   override def output = aggregateExpressions.map(_.toAttribute)
-  override def references =
-    (groupingExpressions ++ aggregateExpressions).flatMap(_.references).toSet
 }
 
 case class Limit(limitExpr: Expression, child: LogicalPlan) extends UnaryNode {
   override def output = child.output
-  override def references = limitExpr.references
+
+  override lazy val statistics: Statistics =
+    if (output.forall(_.dataType.isInstanceOf[NativeType])) {
+      val limit = limitExpr.eval(null).asInstanceOf[Int]
+      val sizeInBytes = (limit: Long) * output.map { a =>
+        NativeType.defaultSizeOf(a.dataType.asInstanceOf[NativeType])
+      }.sum
+      Statistics(sizeInBytes = sizeInBytes)
+    } else {
+      Statistics(sizeInBytes = children.map(_.statistics).map(_.sizeInBytes).product)
+    }
 }
 
 case class Subquery(alias: String, child: LogicalPlan) extends UnaryNode {
   override def output = child.output.map(_.withQualifiers(alias :: Nil))
-  override def references = Set.empty
-}
-
-/**
- * Converts the schema of `child` to all lowercase, together with LowercaseAttributeReferences
- * this allows for optional case insensitive attribute resolution.  This node can be elided after
- * analysis.
- */
-case class LowerCaseSchema(child: LogicalPlan) extends UnaryNode {
-  protected def lowerCaseSchema(dataType: DataType): DataType = dataType match {
-    case StructType(fields) =>
-      StructType(fields.map(f =>
-        StructField(f.name.toLowerCase(), lowerCaseSchema(f.dataType), f.nullable)))
-    case ArrayType(elemType) => ArrayType(lowerCaseSchema(elemType))
-    case otherType => otherType
-  }
-
-  override val output = child.output.map {
-    case a: AttributeReference =>
-      AttributeReference(
-        a.name.toLowerCase,
-        lowerCaseSchema(a.dataType),
-        a.nullable)(
-        a.exprId,
-        a.qualifiers)
-    case other => other
-  }
-
-  override def references = Set.empty
 }
 
 case class Sample(fraction: Double, withReplacement: Boolean, seed: Long, child: LogicalPlan)
     extends UnaryNode {
 
   override def output = child.output
-  override def references = Set.empty
 }
 
 case class Distinct(child: LogicalPlan) extends UnaryNode {
   override def output = child.output
-  override def references = child.outputSet
 }
 
 case object NoRelation extends LeafNode {
@@ -213,5 +178,4 @@ case object NoRelation extends LeafNode {
 
 case class Intersect(left: LogicalPlan, right: LogicalPlan) extends BinaryNode {
   override def output = left.output
-  override def references = Set.empty
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
index 1d5f033f0d274..1d513d7789763 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BoundReference}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.types.StringType
 
 /**
@@ -35,31 +35,45 @@ abstract class Command extends LeafNode {
  */
 case class NativeCommand(cmd: String) extends Command {
   override def output =
-    Seq(BoundReference(0, AttributeReference("result", StringType, nullable = false)()))
+    Seq(AttributeReference("result", StringType, nullable = false)())
 }
 
 /**
- * Commands of the form "SET (key) (= value)".
+ * Commands of the form "SET [key [= value] ]".
  */
-case class SetCommand(key: Option[String], value: Option[String]) extends Command {
+case class DFSCommand(kv: Option[(String, Option[String])]) extends Command {
   override def output = Seq(
-    BoundReference(0, AttributeReference("key", StringType, nullable = false)()),
-    BoundReference(1, AttributeReference("value", StringType, nullable = false)()))
+    AttributeReference("DFS output", StringType, nullable = false)())
+}
+
+/**
+ *
+ * Commands of the form "SET [key [= value] ]".
+ */
+case class SetCommand(kv: Option[(String, Option[String])]) extends Command {
+  override def output = Seq(
+    AttributeReference("", StringType, nullable = false)())
 }
 
 /**
  * Returned by a parser when the users only wants to see what query plan would be executed, without
  * actually performing the execution.
  */
-case class ExplainCommand(plan: LogicalPlan) extends Command {
+case class ExplainCommand(plan: LogicalPlan, extended: Boolean = false) extends Command {
   override def output =
-    Seq(BoundReference(0, AttributeReference("plan", StringType, nullable = false)()))
+    Seq(AttributeReference("plan", StringType, nullable = false)())
 }
 
 /**
- * Returned for the "CACHE TABLE tableName" and "UNCACHE TABLE tableName" command.
+ * Returned for the "CACHE TABLE tableName [AS SELECT ...]" command.
+ */
+case class CacheTableCommand(tableName: String, plan: Option[LogicalPlan], isLazy: Boolean)
+  extends Command
+
+/**
+ * Returned for the "UNCACHE TABLE tableName" command.
  */
-case class CacheCommand(tableName: String, doCache: Boolean) extends Command
+case class UncacheTableCommand(tableName: String) extends Command
 
 /**
  * Returned for the "DESCRIBE [EXTENDED] [dbName.]tableName" command.
@@ -72,7 +86,7 @@ case class DescribeCommand(
     isExtended: Boolean) extends Command {
   override def output = Seq(
     // Column names are based on Hive.
-    BoundReference(0, AttributeReference("col_name", StringType, nullable = false)()),
-    BoundReference(1, AttributeReference("data_type", StringType, nullable = false)()),
-    BoundReference(2, AttributeReference("comment", StringType, nullable = false)()))
+    AttributeReference("col_name", StringType, nullable = false)(),
+    AttributeReference("data_type", StringType, nullable = false)(),
+    AttributeReference("comment", StringType, nullable = false)())
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala
index 7146fbd540f29..72b0c5c8e7a26 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala
@@ -31,13 +31,9 @@ abstract class RedistributeData extends UnaryNode {
 
 case class SortPartitions(sortExpressions: Seq[SortOrder], child: LogicalPlan)
     extends RedistributeData {
-
-  def references = sortExpressions.flatMap(_.references).toSet
 }
 
 case class Repartition(partitionExpressions: Seq[Expression], child: LogicalPlan)
     extends RedistributeData {
-
-  def references = partitionExpressions.flatMap(_.references).toSet
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index 4bb022cf238af..ccb0df113c063 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -71,6 +71,7 @@ case class OrderedDistribution(ordering: Seq[SortOrder]) extends Distribution {
       "An AllTuples should be used to represent a distribution that only has " +
       "a single partition.")
 
+  // TODO: This is not really valid...
   def clustering = ordering.map(_.child).toSet
 }
 
@@ -139,7 +140,6 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
   with Partitioning {
 
   override def children = expressions
-  override def references = expressions.flatMap(_.references).toSet
   override def nullable = false
   override def dataType = IntegerType
 
@@ -179,7 +179,6 @@ case class RangePartitioning(ordering: Seq[SortOrder], numPartitions: Int)
   with Partitioning {
 
   override def children = ordering
-  override def references = ordering.flatMap(_.references).toSet
   override def nullable = false
   override def dataType = IntegerType
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
index 1076537bc7602..03414b2301e81 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.rules
 
-import org.apache.spark.sql.Logging
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.trees.TreeNode
 
 abstract class Rule[TreeType <: TreeNode[_]] extends Logging {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index e32adb76fe146..c441f0bf24d85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -15,10 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql
-package catalyst
-package rules
+package org.apache.spark.sql.catalyst.rules
 
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util.sideBySide
 
@@ -61,7 +60,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
           case (plan, rule) =>
             val result = rule(plan)
             if (!result.fastEquals(plan)) {
-              logger.trace(
+              logTrace(
                 s"""
                   |=== Applying Rule ${rule.ruleName} ===
                   |${sideBySide(plan.treeString, result.treeString).mkString("\n")}
@@ -72,25 +71,29 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
         }
         iteration += 1
         if (iteration > batch.strategy.maxIterations) {
-          logger.info(s"Max iterations ($iteration) reached for batch ${batch.name}")
+          // Only log if this is a rule that is supposed to run more than once.
+          if (iteration != 2) {
+            logInfo(s"Max iterations (${iteration - 1}) reached for batch ${batch.name}")
+          }
           continue = false
         }
 
         if (curPlan.fastEquals(lastPlan)) {
-          logger.trace(s"Fixed point reached for batch ${batch.name} after $iteration iterations.")
+          logTrace(
+            s"Fixed point reached for batch ${batch.name} after ${iteration - 1} iterations.")
           continue = false
         }
         lastPlan = curPlan
       }
 
       if (!batchStartPlan.fastEquals(curPlan)) {
-        logger.debug(
+        logDebug(
           s"""
           |=== Result of Batch ${batch.name} ===
           |${sideBySide(plan.treeString, curPlan.treeString).mkString("\n")}
         """.stripMargin)
       } else {
-        logger.trace(s"Batch ${batch.name} has no effect.")
+        logTrace(s"Batch ${batch.name} has no effect.")
       }
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index cd04bdf02cf84..2013ae4f7bd13 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -19,11 +19,6 @@ package org.apache.spark.sql.catalyst.trees
 
 import org.apache.spark.sql.catalyst.errors._
 
-object TreeNode {
-  private val currentId = new java.util.concurrent.atomic.AtomicLong
-  protected def nextId() = currentId.getAndIncrement()
-}
-
 /** Used by [[TreeNode.getNodeNumbered]] when traversing the tree for a given number */
 private class MutableInt(var i: Int)
 
@@ -33,29 +28,13 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] {
   /** Returns a Seq of the children of this node */
   def children: Seq[BaseType]
 
-  /**
-   * A globally unique id for this specific instance. Not preserved across copies.
-   * Unlike `equals`, `id` can be used to differentiate distinct but structurally
-   * identical branches of a tree.
-   */
-  val id = TreeNode.nextId()
-
-  /**
-   * Returns true if other is the same [[catalyst.trees.TreeNode TreeNode]] instance.  Unlike
-   * `equals` this function will return false for different instances of structurally identical
-   * trees.
-   */
-  def sameInstance(other: TreeNode[_]): Boolean = {
-    this.id == other.id
-  }
-
   /**
    * Faster version of equality which short-circuits when two treeNodes are the same instance.
    * We don't just override Object.Equals, as doing so prevents the scala compiler from from
    * generating case class `equals` methods
    */
   def fastEquals(other: TreeNode[_]): Boolean = {
-    sameInstance(other) || this == other
+    this.eq(other) || this == other
   }
 
   /**
@@ -280,7 +259,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] {
    */
   def makeCopy(newArgs: Array[AnyRef]): this.type = attachTree(this, "makeCopy") {
     try {
-      val defaultCtor = getClass.getConstructors.head
+      // Skip no-arg constructors that are just there for kryo.
+      val defaultCtor = getClass.getConstructors.find(_.getParameterTypes.size != 0).head
       if (otherCopyArgs.isEmpty) {
         defaultCtor.newInstance(newArgs: _*).asInstanceOf[this.type]
       } else {
@@ -392,3 +372,4 @@ trait UnaryNode[BaseType <: TreeNode[BaseType]] {
   def child: BaseType
   def children = child :: Nil
 }
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/package.scala
index d159ecdd5d781..79a8e06d4b4d4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/package.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst
 
-import org.apache.spark.sql.Logger
+import org.apache.spark.Logging
 
 /**
  * A library for easily manipulating trees of operators.  Operators that extend TreeNode are
@@ -33,7 +33,19 @@ import org.apache.spark.sql.Logger
  *   <li>debugging support - pretty printing, easy splicing of trees, etc.</li>
  * </ul>
  */
-package object trees {
+package object trees extends Logging {
   // Since we want tree nodes to be lightweight, we create one logger for all treenode instances.
-  protected val logger = Logger("catalyst.trees")
+  protected override def logName = "catalyst.trees"
+
+  /**
+   * A [[TreeNode]] companion for reference equality for Hash based Collection.
+   */
+  class TreeNodeRef(val obj: TreeNode[_]) {
+    override def equals(o: Any) = o match {
+      case that: TreeNodeRef => that.obj.eq(obj)
+      case _ => false
+    }
+
+    override def hashCode = if (obj == null) 0 else obj.hashCode
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
index bb77bccf86176..ff1dc03069ef1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
@@ -17,71 +17,180 @@
 
 package org.apache.spark.sql.catalyst.types
 
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
 
+import scala.math.Numeric.{FloatAsIfIntegral, BigDecimalAsIfIntegral, DoubleAsIfIntegral}
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe.{TypeTag, runtimeMirror, typeTag}
 import scala.util.parsing.combinator.RegexParsers
 
-import scala.reflect.ClassTag
-import scala.reflect.runtime.universe.{typeTag, TypeTag, runtimeMirror}
+import org.json4s._
+import org.json4s.JsonAST.JValue
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.ScalaReflectionLock
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, Row}
+import org.apache.spark.sql.catalyst.types.decimal._
+import org.apache.spark.sql.catalyst.util.Metadata
 import org.apache.spark.util.Utils
 
-/**
- *
- */
-object DataType extends RegexParsers {
-  protected lazy val primitiveType: Parser[DataType] =
-    "StringType" ^^^ StringType |
-    "FloatType" ^^^ FloatType |
-    "IntegerType" ^^^ IntegerType |
-    "ByteType" ^^^ ByteType |
-    "ShortType" ^^^ ShortType |
-    "DoubleType" ^^^ DoubleType |
-    "LongType" ^^^ LongType |
-    "BinaryType" ^^^ BinaryType |
-    "BooleanType" ^^^ BooleanType |
-    "DecimalType" ^^^ DecimalType |
-    "TimestampType" ^^^ TimestampType
-
-  protected lazy val arrayType: Parser[DataType] =
-    "ArrayType" ~> "(" ~> dataType <~ ")" ^^ ArrayType
-
-  protected lazy val mapType: Parser[DataType] =
-    "MapType" ~> "(" ~> dataType ~ "," ~ dataType <~ ")" ^^ {
-      case t1 ~ _ ~ t2 => MapType(t1, t2)
-    }
+object DataType {
+  def fromJson(json: String): DataType = parseDataType(parse(json))
 
-  protected lazy val structField: Parser[StructField] =
-    ("StructField(" ~> "[a-zA-Z0-9_]*".r) ~ ("," ~> dataType) ~ ("," ~> boolVal <~ ")") ^^ {
-      case name ~ tpe ~ nullable  =>
-          StructField(name, tpe, nullable = nullable)
+  private object JSortedObject {
+    def unapplySeq(value: JValue): Option[List[(String, JValue)]] = value match {
+      case JObject(seq) => Some(seq.toList.sortBy(_._1))
+      case _ => None
     }
+  }
 
-  protected lazy val boolVal: Parser[Boolean] =
-    "true" ^^^ true |
-    "false" ^^^ false
+  // NOTE: Map fields must be sorted in alphabetical order to keep consistent with the Python side.
+  private def parseDataType(json: JValue): DataType = json match {
+    case JString(name) =>
+      PrimitiveType.nameToType(name)
+
+    case JSortedObject(
+        ("containsNull", JBool(n)),
+        ("elementType", t: JValue),
+        ("type", JString("array"))) =>
+      ArrayType(parseDataType(t), n)
+
+    case JSortedObject(
+        ("keyType", k: JValue),
+        ("type", JString("map")),
+        ("valueContainsNull", JBool(n)),
+        ("valueType", v: JValue)) =>
+      MapType(parseDataType(k), parseDataType(v), n)
+
+    case JSortedObject(
+        ("fields", JArray(fields)),
+        ("type", JString("struct"))) =>
+      StructType(fields.map(parseStructField))
+
+    case JSortedObject(
+        ("class", JString(udtClass)),
+        ("pyClass", _),
+        ("sqlType", _),
+        ("type", JString("udt"))) =>
+      Class.forName(udtClass).newInstance().asInstanceOf[UserDefinedType[_]]
+  }
 
+  private def parseStructField(json: JValue): StructField = json match {
+    case JSortedObject(
+        ("metadata", metadata: JObject),
+        ("name", JString(name)),
+        ("nullable", JBool(nullable)),
+        ("type", dataType: JValue)) =>
+      StructField(name, parseDataType(dataType), nullable, Metadata.fromJObject(metadata))
+  }
 
-  protected lazy val structType: Parser[DataType] =
-    "StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ {
-      case fields => new StructType(fields)
+  @deprecated("Use DataType.fromJson instead", "1.2.0")
+  def fromCaseClassString(string: String): DataType = CaseClassStringParser(string)
+
+  private object CaseClassStringParser extends RegexParsers {
+    protected lazy val primitiveType: Parser[DataType] =
+      ( "StringType" ^^^ StringType
+      | "FloatType" ^^^ FloatType
+      | "IntegerType" ^^^ IntegerType
+      | "ByteType" ^^^ ByteType
+      | "ShortType" ^^^ ShortType
+      | "DoubleType" ^^^ DoubleType
+      | "LongType" ^^^ LongType
+      | "BinaryType" ^^^ BinaryType
+      | "BooleanType" ^^^ BooleanType
+      | "DateType" ^^^ DateType
+      | "DecimalType()" ^^^ DecimalType.Unlimited
+      | fixedDecimalType
+      | "TimestampType" ^^^ TimestampType
+      )
+
+    protected lazy val fixedDecimalType: Parser[DataType] =
+      ("DecimalType(" ~> "[0-9]+".r) ~ ("," ~> "[0-9]+".r <~ ")") ^^ {
+        case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
+      }
+
+    protected lazy val arrayType: Parser[DataType] =
+      "ArrayType" ~> "(" ~> dataType ~ "," ~ boolVal <~ ")" ^^ {
+        case tpe ~ _ ~ containsNull => ArrayType(tpe, containsNull)
+      }
+
+    protected lazy val mapType: Parser[DataType] =
+      "MapType" ~> "(" ~> dataType ~ "," ~ dataType ~ "," ~ boolVal <~ ")" ^^ {
+        case t1 ~ _ ~ t2 ~ _ ~ valueContainsNull => MapType(t1, t2, valueContainsNull)
+      }
+
+    protected lazy val structField: Parser[StructField] =
+      ("StructField(" ~> "[a-zA-Z0-9_]*".r) ~ ("," ~> dataType) ~ ("," ~> boolVal <~ ")") ^^ {
+        case name ~ tpe ~ nullable  =>
+          StructField(name, tpe, nullable = nullable)
+      }
+
+    protected lazy val boolVal: Parser[Boolean] =
+      ( "true" ^^^ true
+      | "false" ^^^ false
+      )
+
+    protected lazy val structType: Parser[DataType] =
+      "StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ {
+        case fields => new StructType(fields)
+      }
+
+    protected lazy val dataType: Parser[DataType] =
+      ( arrayType
+      | mapType
+      | structType
+      | primitiveType
+      )
+
+    /**
+     * Parses a string representation of a DataType.
+     *
+     * TODO: Generate parser as pickler...
+     */
+    def apply(asString: String): DataType = parseAll(dataType, asString) match {
+      case Success(result, _) => result
+      case failure: NoSuccess =>
+        throw new IllegalArgumentException(s"Unsupported dataType: $asString, $failure")
     }
 
-  protected lazy val dataType: Parser[DataType] =
-    arrayType |
-      mapType |
-      structType |
-      primitiveType
+  }
+
+  protected[types] def buildFormattedString(
+      dataType: DataType,
+      prefix: String,
+      builder: StringBuilder): Unit = {
+    dataType match {
+      case array: ArrayType =>
+        array.buildFormattedString(prefix, builder)
+      case struct: StructType =>
+        struct.buildFormattedString(prefix, builder)
+      case map: MapType =>
+        map.buildFormattedString(prefix, builder)
+      case _ =>
+    }
+  }
 
   /**
-   * Parses a string representation of a DataType.
-   *
-   * TODO: Generate parser as pickler...
+   * Compares two types, ignoring nullability of ArrayType, MapType, StructType.
    */
-  def apply(asString: String): DataType = parseAll(dataType, asString) match {
-    case Success(result, _) => result
-    case failure: NoSuccess => sys.error(s"Unsupported dataType: $asString, $failure")
+  def equalsIgnoreNullability(left: DataType, right: DataType): Boolean = {
+    (left, right) match {
+      case (ArrayType(leftElementType, _), ArrayType(rightElementType, _)) =>
+        equalsIgnoreNullability(leftElementType, rightElementType)
+      case (MapType(leftKeyType, leftValueType, _), MapType(rightKeyType, rightValueType, _)) =>
+        equalsIgnoreNullability(leftKeyType, rightKeyType) &&
+        equalsIgnoreNullability(leftValueType, rightValueType)
+      case (StructType(leftFields), StructType(rightFields)) =>
+        leftFields.size == rightFields.size &&
+        leftFields.zip(rightFields)
+          .forall{
+            case (left, right) =>
+              left.name == right.name && equalsIgnoreNullability(left.dataType, right.dataType)
+          }
+      case (left, right) => left == right
+    }
   }
 }
 
@@ -93,56 +202,122 @@ abstract class DataType {
   }
 
   def isPrimitive: Boolean = false
+
+  def typeName: String = this.getClass.getSimpleName.stripSuffix("$").dropRight(4).toLowerCase
+
+  private[sql] def jsonValue: JValue = typeName
+
+  def json: String = compact(render(jsonValue))
+
+  def prettyJson: String = pretty(render(jsonValue))
 }
 
 case object NullType extends DataType
 
+object NativeType {
+  val all = Seq(
+    IntegerType, BooleanType, LongType, DoubleType, FloatType, ShortType, ByteType, StringType)
+
+  def unapply(dt: DataType): Boolean = all.contains(dt)
+
+  val defaultSizeOf: Map[NativeType, Int] = Map(
+    IntegerType -> 4,
+    BooleanType -> 1,
+    LongType -> 8,
+    DoubleType -> 8,
+    FloatType -> 4,
+    ShortType -> 2,
+    ByteType -> 1,
+    StringType -> 4096)
+}
+
 trait PrimitiveType extends DataType {
   override def isPrimitive = true
 }
 
+object PrimitiveType {
+  private val nonDecimals = Seq(NullType, DateType, TimestampType, BinaryType) ++ NativeType.all
+  private val nonDecimalNameToType = nonDecimals.map(t => t.typeName -> t).toMap
+
+  /** Given the string representation of a type, return its DataType */
+  private[sql] def nameToType(name: String): DataType = {
+    val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)""".r
+    name match {
+      case "decimal" => DecimalType.Unlimited
+      case FIXED_DECIMAL(precision, scale) => DecimalType(precision.toInt, scale.toInt)
+      case other => nonDecimalNameToType(other)
+    }
+  }
+}
+
 abstract class NativeType extends DataType {
-  type JvmType
-  @transient val tag: TypeTag[JvmType]
-  val ordering: Ordering[JvmType]
+  private[sql] type JvmType
+  @transient private[sql] val tag: TypeTag[JvmType]
+  private[sql] val ordering: Ordering[JvmType]
 
-  @transient val classTag = {
+  @transient private[sql] val classTag = ScalaReflectionLock.synchronized {
     val mirror = runtimeMirror(Utils.getSparkClassLoader)
     ClassTag[JvmType](mirror.runtimeClass(tag.tpe))
   }
 }
 
 case object StringType extends NativeType with PrimitiveType {
-  type JvmType = String
-  @transient lazy val tag = typeTag[JvmType]
-  val ordering = implicitly[Ordering[JvmType]]
+  private[sql] type JvmType = String
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
 }
-case object BinaryType extends DataType with PrimitiveType {
-  type JvmType = Array[Byte]
+
+case object BinaryType extends NativeType with PrimitiveType {
+  private[sql] type JvmType = Array[Byte]
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val ordering = new Ordering[JvmType] {
+    def compare(x: Array[Byte], y: Array[Byte]): Int = {
+      for (i <- 0 until x.length; if i < y.length) {
+        val res = x(i).compareTo(y(i))
+        if (res != 0) return res
+      }
+      x.length - y.length
+    }
+  }
 }
+
 case object BooleanType extends NativeType with PrimitiveType {
-  type JvmType = Boolean
-  @transient lazy val tag = typeTag[JvmType]
-  val ordering = implicitly[Ordering[JvmType]]
+  private[sql] type JvmType = Boolean
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
 }
 
 case object TimestampType extends NativeType {
-  type JvmType = Timestamp
+  private[sql] type JvmType = Timestamp
 
-  @transient lazy val tag = typeTag[JvmType]
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
 
-  val ordering = new Ordering[JvmType] {
+  private[sql] val ordering = new Ordering[JvmType] {
     def compare(x: Timestamp, y: Timestamp) = x.compareTo(y)
   }
 }
 
+case object DateType extends NativeType {
+  private[sql] type JvmType = Date
+
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+
+  private[sql] val ordering = new Ordering[JvmType] {
+    def compare(x: Date, y: Date) = x.compareTo(y)
+  }
+}
+
 abstract class NumericType extends NativeType with PrimitiveType {
   // Unfortunately we can't get this implicitly as that breaks Spark Serialization. In order for
   // implicitly[Numeric[JvmType]] to be valid, we have to change JvmType from a type variable to a
   // type parameter and and add a numeric annotation (i.e., [JvmType : Numeric]). This gets
   // desugared by the compiler into an argument to the objects constructor. This means there is no
   // longer an no argument constructor and thus the JVM cannot serialize the object anymore.
-  val numeric: Numeric[JvmType]
+  private[sql] val numeric: Numeric[JvmType]
+}
+
+object NumericType {
+  def unapply(e: Expression): Boolean = e.dataType.isInstanceOf[NumericType]
 }
 
 /** Matcher for any expressions that evaluate to [[IntegralType]]s */
@@ -154,39 +329,39 @@ object IntegralType {
 }
 
 abstract class IntegralType extends NumericType {
-  val integral: Integral[JvmType]
+  private[sql] val integral: Integral[JvmType]
 }
 
 case object LongType extends IntegralType {
-  type JvmType = Long
-  @transient lazy val tag = typeTag[JvmType]
-  val numeric = implicitly[Numeric[Long]]
-  val integral = implicitly[Integral[Long]]
-  val ordering = implicitly[Ordering[JvmType]]
+  private[sql] type JvmType = Long
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Long]]
+  private[sql] val integral = implicitly[Integral[Long]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
 }
 
 case object IntegerType extends IntegralType {
-  type JvmType = Int
-  @transient lazy val tag = typeTag[JvmType]
-  val numeric = implicitly[Numeric[Int]]
-  val integral = implicitly[Integral[Int]]
-  val ordering = implicitly[Ordering[JvmType]]
+  private[sql] type JvmType = Int
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Int]]
+  private[sql] val integral = implicitly[Integral[Int]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
 }
 
 case object ShortType extends IntegralType {
-  type JvmType = Short
-  @transient lazy val tag = typeTag[JvmType]
-  val numeric = implicitly[Numeric[Short]]
-  val integral = implicitly[Integral[Short]]
-  val ordering = implicitly[Ordering[JvmType]]
+  private[sql] type JvmType = Short
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Short]]
+  private[sql] val integral = implicitly[Integral[Short]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
 }
 
 case object ByteType extends IntegralType {
-  type JvmType = Byte
-  @transient lazy val tag = typeTag[JvmType]
-  val numeric = implicitly[Numeric[Byte]]
-  val integral = implicitly[Integral[Byte]]
-  val ordering = implicitly[Ordering[JvmType]]
+  private[sql] type JvmType = Byte
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Byte]]
+  private[sql] val integral = implicitly[Integral[Byte]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
 }
 
 /** Matcher for any expressions that evaluate to [[FractionalType]]s */
@@ -196,48 +371,274 @@ object FractionalType {
     case _ => false
   }
 }
+
 abstract class FractionalType extends NumericType {
-  val fractional: Fractional[JvmType]
+  private[sql] val fractional: Fractional[JvmType]
+  private[sql] val asIntegral: Integral[JvmType]
 }
 
-case object DecimalType extends FractionalType {
-  type JvmType = BigDecimal
-  @transient lazy val tag = typeTag[JvmType]
-  val numeric = implicitly[Numeric[BigDecimal]]
-  val fractional = implicitly[Fractional[BigDecimal]]
-  val ordering = implicitly[Ordering[JvmType]]
+/** Precision parameters for a Decimal */
+case class PrecisionInfo(precision: Int, scale: Int)
+
+/** A Decimal that might have fixed precision and scale, or unlimited values for these */
+case class DecimalType(precisionInfo: Option[PrecisionInfo]) extends FractionalType {
+  private[sql] type JvmType = Decimal
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = Decimal.DecimalIsFractional
+  private[sql] val fractional = Decimal.DecimalIsFractional
+  private[sql] val ordering = Decimal.DecimalIsFractional
+  private[sql] val asIntegral = Decimal.DecimalAsIfIntegral
+
+  override def typeName: String = precisionInfo match {
+    case Some(PrecisionInfo(precision, scale)) => s"decimal($precision,$scale)"
+    case None => "decimal"
+  }
+
+  override def toString: String = precisionInfo match {
+    case Some(PrecisionInfo(precision, scale)) => s"DecimalType($precision,$scale)"
+    case None => "DecimalType()"
+  }
+}
+
+/** Extra factory methods and pattern matchers for Decimals */
+object DecimalType {
+  val Unlimited: DecimalType = DecimalType(None)
+
+  object Fixed {
+    def unapply(t: DecimalType): Option[(Int, Int)] =
+      t.precisionInfo.map(p => (p.precision, p.scale))
+  }
+
+  object Expression {
+    def unapply(e: Expression): Option[(Int, Int)] = e.dataType match {
+      case t: DecimalType => t.precisionInfo.map(p => (p.precision, p.scale))
+      case _ => None
+    }
+  }
+
+  def apply(): DecimalType = Unlimited
+
+  def apply(precision: Int, scale: Int): DecimalType =
+    DecimalType(Some(PrecisionInfo(precision, scale)))
+
+  def unapply(t: DataType): Boolean = t.isInstanceOf[DecimalType]
+
+  def unapply(e: Expression): Boolean = e.dataType.isInstanceOf[DecimalType]
+
+  def isFixed(dataType: DataType): Boolean = dataType match {
+    case DecimalType.Fixed(_, _) => true
+    case _ => false
+  }
 }
 
 case object DoubleType extends FractionalType {
-  type JvmType = Double
-  @transient lazy val tag = typeTag[JvmType]
-  val numeric = implicitly[Numeric[Double]]
-  val fractional = implicitly[Fractional[Double]]
-  val ordering = implicitly[Ordering[JvmType]]
+  private[sql] type JvmType = Double
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Double]]
+  private[sql] val fractional = implicitly[Fractional[Double]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
+  private[sql] val asIntegral = DoubleAsIfIntegral
 }
 
 case object FloatType extends FractionalType {
-  type JvmType = Float
-  @transient lazy val tag = typeTag[JvmType]
-  val numeric = implicitly[Numeric[Float]]
-  val fractional = implicitly[Fractional[Float]]
-  val ordering = implicitly[Ordering[JvmType]]
+  private[sql] type JvmType = Float
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Float]]
+  private[sql] val fractional = implicitly[Fractional[Float]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
+  private[sql] val asIntegral = FloatAsIfIntegral
+}
+
+object ArrayType {
+  /** Construct a [[ArrayType]] object with the given element type. The `containsNull` is true. */
+  def apply(elementType: DataType): ArrayType = ArrayType(elementType, true)
+}
+
+/**
+ * The data type for collections of multiple values.
+ * Internally these are represented as columns that contain a ``scala.collection.Seq``.
+ *
+ * @param elementType The data type of values.
+ * @param containsNull Indicates if values have `null` values
+ */
+case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataType {
+  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
+    builder.append(
+      s"$prefix-- element: ${elementType.typeName} (containsNull = $containsNull)\n")
+    DataType.buildFormattedString(elementType, s"$prefix    |", builder)
+  }
+
+  override private[sql] def jsonValue =
+    ("type" -> typeName) ~
+      ("elementType" -> elementType.jsonValue) ~
+      ("containsNull" -> containsNull)
 }
 
-case class ArrayType(elementType: DataType) extends DataType
+/**
+ * A field inside a StructType.
+ * @param name The name of this field.
+ * @param dataType The data type of this field.
+ * @param nullable Indicates if values of this field can be `null` values.
+ * @param metadata The metadata of this field. The metadata should be preserved during
+ *                 transformation if the content of the column is not modified, e.g, in selection.
+ */
+case class StructField(
+    name: String,
+    dataType: DataType,
+    nullable: Boolean = true,
+    metadata: Metadata = Metadata.empty) {
+
+  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
+    builder.append(s"$prefix-- $name: ${dataType.typeName} (nullable = $nullable)\n")
+    DataType.buildFormattedString(dataType, s"$prefix    |", builder)
+  }
 
-case class StructField(name: String, dataType: DataType, nullable: Boolean)
+  // override the default toString to be compatible with legacy parquet files.
+  override def toString: String = s"StructField($name,$dataType,$nullable)"
 
-object StructType {
-  def fromAttributes(attributes: Seq[Attribute]): StructType = {
-    StructType(attributes.map(a => StructField(a.name, a.dataType, a.nullable)))
+  private[sql] def jsonValue: JValue = {
+    ("name" -> name) ~
+      ("type" -> dataType.jsonValue) ~
+      ("nullable" -> nullable) ~
+      ("metadata" -> metadata.jsonValue)
   }
+}
 
-  // def apply(fields: Seq[StructField]) = new StructType(fields.toIndexedSeq)
+object StructType {
+  protected[sql] def fromAttributes(attributes: Seq[Attribute]): StructType =
+    StructType(attributes.map(a => StructField(a.name, a.dataType, a.nullable, a.metadata)))
 }
 
 case class StructType(fields: Seq[StructField]) extends DataType {
-  def toAttributes = fields.map(f => AttributeReference(f.name, f.dataType, f.nullable)())
+
+  /**
+   * Returns all field names in a [[Seq]].
+   */
+  lazy val fieldNames: Seq[String] = fields.map(_.name)
+  private lazy val fieldNamesSet: Set[String] = fieldNames.toSet
+  private lazy val nameToField: Map[String, StructField] = fields.map(f => f.name -> f).toMap
+  /**
+   * Extracts a [[StructField]] of the given name. If the [[StructType]] object does not
+   * have a name matching the given name, `null` will be returned.
+   */
+  def apply(name: String): StructField = {
+    nameToField.getOrElse(name, throw new IllegalArgumentException(s"Field $name does not exist."))
+  }
+
+  /**
+   * Returns a [[StructType]] containing [[StructField]]s of the given names.
+   * Those names which do not have matching fields will be ignored.
+   */
+  def apply(names: Set[String]): StructType = {
+    val nonExistFields = names -- fieldNamesSet
+    if (nonExistFields.nonEmpty) {
+      throw new IllegalArgumentException(
+        s"Field ${nonExistFields.mkString(",")} does not exist.")
+    }
+    // Preserve the original order of fields.
+    StructType(fields.filter(f => names.contains(f.name)))
+  }
+
+  protected[sql] def toAttributes =
+    fields.map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
+
+  def treeString: String = {
+    val builder = new StringBuilder
+    builder.append("root\n")
+    val prefix = " |"
+    fields.foreach(field => field.buildFormattedString(prefix, builder))
+
+    builder.toString()
+  }
+
+  def printTreeString(): Unit = println(treeString)
+
+  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
+    fields.foreach(field => field.buildFormattedString(prefix, builder))
+  }
+
+  override private[sql] def jsonValue =
+    ("type" -> typeName) ~
+      ("fields" -> fields.map(_.jsonValue))
+}
+
+object MapType {
+  /**
+   * Construct a [[MapType]] object with the given key type and value type.
+   * The `valueContainsNull` is true.
+   */
+  def apply(keyType: DataType, valueType: DataType): MapType =
+    MapType(keyType: DataType, valueType: DataType, true)
 }
 
-case class MapType(keyType: DataType, valueType: DataType) extends DataType
+/**
+ * The data type for Maps. Keys in a map are not allowed to have `null` values.
+ * @param keyType The data type of map keys.
+ * @param valueType The data type of map values.
+ * @param valueContainsNull Indicates if map values have `null` values.
+ */
+case class MapType(
+    keyType: DataType,
+    valueType: DataType,
+    valueContainsNull: Boolean) extends DataType {
+  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
+    builder.append(s"$prefix-- key: ${keyType.typeName}\n")
+    builder.append(s"$prefix-- value: ${valueType.typeName} " +
+      s"(valueContainsNull = $valueContainsNull)\n")
+    DataType.buildFormattedString(keyType, s"$prefix    |", builder)
+    DataType.buildFormattedString(valueType, s"$prefix    |", builder)
+  }
+
+  override private[sql] def jsonValue: JValue =
+    ("type" -> typeName) ~
+      ("keyType" -> keyType.jsonValue) ~
+      ("valueType" -> valueType.jsonValue) ~
+      ("valueContainsNull" -> valueContainsNull)
+}
+
+/**
+ * ::DeveloperApi::
+ * The data type for User Defined Types (UDTs).
+ *
+ * This interface allows a user to make their own classes more interoperable with SparkSQL;
+ * e.g., by creating a [[UserDefinedType]] for a class X, it becomes possible to create
+ * a SchemaRDD which has class X in the schema.
+ *
+ * For SparkSQL to recognize UDTs, the UDT must be annotated with
+ * [[org.apache.spark.sql.catalyst.annotation.SQLUserDefinedType]].
+ *
+ * The conversion via `serialize` occurs when instantiating a `SchemaRDD` from another RDD.
+ * The conversion via `deserialize` occurs when reading from a `SchemaRDD`.
+ */
+@DeveloperApi
+abstract class UserDefinedType[UserType] extends DataType with Serializable {
+
+  /** Underlying storage type for this UDT */
+  def sqlType: DataType
+
+  /** Paired Python UDT class, if exists. */
+  def pyUDT: String = null
+
+  /**
+   * Convert the user type to a SQL datum
+   *
+   * TODO: Can we make this take obj: UserType?  The issue is in ScalaReflection.convertToCatalyst,
+   *       where we need to convert Any to UserType.
+   */
+  def serialize(obj: Any): Any
+
+  /** Convert a SQL datum to the user type */
+  def deserialize(datum: Any): UserType
+
+  override private[sql] def jsonValue: JValue = {
+    ("type" -> "udt") ~
+      ("class" -> this.getClass.getName) ~
+      ("pyClass" -> pyUDT) ~
+      ("sqlType" -> sqlType.jsonValue)
+  }
+
+  /**
+   * Class object for the UserType
+   */
+  def userClass: java.lang.Class[UserType]
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/decimal/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/decimal/Decimal.scala
new file mode 100644
index 0000000000000..708362acf32dc
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/decimal/Decimal.scala
@@ -0,0 +1,335 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.types.decimal
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * A mutable implementation of BigDecimal that can hold a Long if values are small enough.
+ *
+ * The semantics of the fields are as follows:
+ * - _precision and _scale represent the SQL precision and scale we are looking for
+ * - If decimalVal is set, it represents the whole decimal value
+ * - Otherwise, the decimal value is longVal / (10 ** _scale)
+ */
+final class Decimal extends Ordered[Decimal] with Serializable {
+  import Decimal.{MAX_LONG_DIGITS, POW_10, ROUNDING_MODE, BIG_DEC_ZERO}
+
+  private var decimalVal: BigDecimal = null
+  private var longVal: Long = 0L
+  private var _precision: Int = 1
+  private var _scale: Int = 0
+
+  def precision: Int = _precision
+  def scale: Int = _scale
+
+  /**
+   * Set this Decimal to the given Long. Will have precision 20 and scale 0.
+   */
+  def set(longVal: Long): Decimal = {
+    if (longVal <= -POW_10(MAX_LONG_DIGITS) || longVal >= POW_10(MAX_LONG_DIGITS)) {
+      // We can't represent this compactly as a long without risking overflow
+      this.decimalVal = BigDecimal(longVal)
+      this.longVal = 0L
+    } else {
+      this.decimalVal = null
+      this.longVal = longVal
+    }
+    this._precision = 20
+    this._scale = 0
+    this
+  }
+
+  /**
+   * Set this Decimal to the given Int. Will have precision 10 and scale 0.
+   */
+  def set(intVal: Int): Decimal = {
+    this.decimalVal = null
+    this.longVal = intVal
+    this._precision = 10
+    this._scale = 0
+    this
+  }
+
+  /**
+   * Set this Decimal to the given unscaled Long, with a given precision and scale.
+   */
+  def set(unscaled: Long, precision: Int, scale: Int): Decimal = {
+    if (setOrNull(unscaled, precision, scale) == null) {
+      throw new IllegalArgumentException("Unscaled value too large for precision")
+    }
+    this
+  }
+
+  /**
+   * Set this Decimal to the given unscaled Long, with a given precision and scale,
+   * and return it, or return null if it cannot be set due to overflow.
+   */
+  def setOrNull(unscaled: Long, precision: Int, scale: Int): Decimal = {
+    if (unscaled <= -POW_10(MAX_LONG_DIGITS) || unscaled >= POW_10(MAX_LONG_DIGITS)) {
+      // We can't represent this compactly as a long without risking overflow
+      if (precision < 19) {
+        return null  // Requested precision is too low to represent this value
+      }
+      this.decimalVal = BigDecimal(longVal)
+      this.longVal = 0L
+    } else {
+      val p = POW_10(math.min(precision, MAX_LONG_DIGITS))
+      if (unscaled <= -p || unscaled >= p) {
+        return null  // Requested precision is too low to represent this value
+      }
+      this.decimalVal = null
+      this.longVal = unscaled
+    }
+    this._precision = precision
+    this._scale = scale
+    this
+  }
+
+  /**
+   * Set this Decimal to the given BigDecimal value, with a given precision and scale.
+   */
+  def set(decimal: BigDecimal, precision: Int, scale: Int): Decimal = {
+    this.decimalVal = decimal.setScale(scale, ROUNDING_MODE)
+    require(decimalVal.precision <= precision, "Overflowed precision")
+    this.longVal = 0L
+    this._precision = precision
+    this._scale = scale
+    this
+  }
+
+  /**
+   * Set this Decimal to the given BigDecimal value, inheriting its precision and scale.
+   */
+  def set(decimal: BigDecimal): Decimal = {
+    this.decimalVal = decimal
+    this.longVal = 0L
+    this._precision = decimal.precision
+    this._scale = decimal.scale
+    this
+  }
+
+  /**
+   * Set this Decimal to the given Decimal value.
+   */
+  def set(decimal: Decimal): Decimal = {
+    this.decimalVal = decimal.decimalVal
+    this.longVal = decimal.longVal
+    this._precision = decimal._precision
+    this._scale = decimal._scale
+    this
+  }
+
+  def toBigDecimal: BigDecimal = {
+    if (decimalVal.ne(null)) {
+      decimalVal
+    } else {
+      BigDecimal(longVal, _scale)
+    }
+  }
+
+  def toUnscaledLong: Long = {
+    if (decimalVal.ne(null)) {
+      decimalVal.underlying().unscaledValue().longValue()
+    } else {
+      longVal
+    }
+  }
+
+  override def toString: String = toBigDecimal.toString()
+
+  @DeveloperApi
+  def toDebugString: String = {
+    if (decimalVal.ne(null)) {
+      s"Decimal(expanded,$decimalVal,$precision,$scale})"
+    } else {
+      s"Decimal(compact,$longVal,$precision,$scale})"
+    }
+  }
+
+  def toDouble: Double = toBigDecimal.doubleValue()
+
+  def toFloat: Float = toBigDecimal.floatValue()
+
+  def toLong: Long = {
+    if (decimalVal.eq(null)) {
+      longVal / POW_10(_scale)
+    } else {
+      decimalVal.longValue()
+    }
+  }
+
+  def toInt: Int = toLong.toInt
+
+  def toShort: Short = toLong.toShort
+
+  def toByte: Byte = toLong.toByte
+
+  /**
+   * Update precision and scale while keeping our value the same, and return true if successful.
+   *
+   * @return true if successful, false if overflow would occur
+   */
+  def changePrecision(precision: Int, scale: Int): Boolean = {
+    // First, update our longVal if we can, or transfer over to using a BigDecimal
+    if (decimalVal.eq(null)) {
+      if (scale < _scale) {
+        // Easier case: we just need to divide our scale down
+        val diff = _scale - scale
+        val droppedDigits = longVal % POW_10(diff)
+        longVal /= POW_10(diff)
+        if (math.abs(droppedDigits) * 2 >= POW_10(diff)) {
+          longVal += (if (longVal < 0) -1L else 1L)
+        }
+      } else if (scale > _scale) {
+        // We might be able to multiply longVal by a power of 10 and not overflow, but if not,
+        // switch to using a BigDecimal
+        val diff = scale - _scale
+        val p = POW_10(math.max(MAX_LONG_DIGITS - diff, 0))
+        if (diff <= MAX_LONG_DIGITS && longVal > -p && longVal < p) {
+          // Multiplying longVal by POW_10(diff) will still keep it below MAX_LONG_DIGITS
+          longVal *= POW_10(diff)
+        } else {
+          // Give up on using Longs; switch to BigDecimal, which we'll modify below
+          decimalVal = BigDecimal(longVal, _scale)
+        }
+      }
+      // In both cases, we will check whether our precision is okay below
+    }
+
+    if (decimalVal.ne(null)) {
+      // We get here if either we started with a BigDecimal, or we switched to one because we would
+      // have overflowed our Long; in either case we must rescale decimalVal to the new scale.
+      val newVal = decimalVal.setScale(scale, ROUNDING_MODE)
+      if (newVal.precision > precision) {
+        return false
+      }
+      decimalVal = newVal
+    } else {
+      // We're still using Longs, but we should check whether we match the new precision
+      val p = POW_10(math.min(_precision, MAX_LONG_DIGITS))
+      if (longVal <= -p || longVal >= p) {
+        // Note that we shouldn't have been able to fix this by switching to BigDecimal
+        return false
+      }
+    }
+
+    _precision = precision
+    _scale = scale
+    true
+  }
+
+  override def clone(): Decimal = new Decimal().set(this)
+
+  override def compare(other: Decimal): Int = {
+    if (decimalVal.eq(null) && other.decimalVal.eq(null) && _scale == other._scale) {
+      if (longVal < other.longVal) -1 else if (longVal == other.longVal) 0 else 1
+    } else {
+      toBigDecimal.compare(other.toBigDecimal)
+    }
+  }
+
+  override def equals(other: Any) = other match {
+    case d: Decimal =>
+      compare(d) == 0
+    case _ =>
+      false
+  }
+
+  override def hashCode(): Int = toBigDecimal.hashCode()
+
+  def isZero: Boolean = if (decimalVal.ne(null)) decimalVal == BIG_DEC_ZERO else longVal == 0
+
+  def + (that: Decimal): Decimal = Decimal(toBigDecimal + that.toBigDecimal)
+
+  def - (that: Decimal): Decimal = Decimal(toBigDecimal - that.toBigDecimal)
+
+  def * (that: Decimal): Decimal = Decimal(toBigDecimal * that.toBigDecimal)
+
+  def / (that: Decimal): Decimal =
+    if (that.isZero) null else Decimal(toBigDecimal / that.toBigDecimal)
+
+  def % (that: Decimal): Decimal =
+    if (that.isZero) null else Decimal(toBigDecimal % that.toBigDecimal)
+
+  def remainder(that: Decimal): Decimal = this % that
+
+  def unary_- : Decimal = {
+    if (decimalVal.ne(null)) {
+      Decimal(-decimalVal)
+    } else {
+      Decimal(-longVal, precision, scale)
+    }
+  }
+}
+
+object Decimal {
+  private val ROUNDING_MODE = BigDecimal.RoundingMode.HALF_UP
+
+  /** Maximum number of decimal digits a Long can represent */
+  val MAX_LONG_DIGITS = 18
+
+  private val POW_10 = Array.tabulate[Long](MAX_LONG_DIGITS + 1)(i => math.pow(10, i).toLong)
+
+  private val BIG_DEC_ZERO = BigDecimal(0)
+
+  def apply(value: Double): Decimal = new Decimal().set(value)
+
+  def apply(value: Long): Decimal = new Decimal().set(value)
+
+  def apply(value: Int): Decimal = new Decimal().set(value)
+
+  def apply(value: BigDecimal): Decimal = new Decimal().set(value)
+
+  def apply(value: BigDecimal, precision: Int, scale: Int): Decimal =
+    new Decimal().set(value, precision, scale)
+
+  def apply(unscaled: Long, precision: Int, scale: Int): Decimal =
+    new Decimal().set(unscaled, precision, scale)
+
+  def apply(value: String): Decimal = new Decimal().set(BigDecimal(value))
+
+  // Evidence parameters for Decimal considered either as Fractional or Integral. We provide two
+  // parameters inheriting from a common trait since both traits define mkNumericOps.
+  // See scala.math's Numeric.scala for examples for Scala's built-in types.
+
+  /** Common methods for Decimal evidence parameters */
+  trait DecimalIsConflicted extends Numeric[Decimal] {
+    override def plus(x: Decimal, y: Decimal): Decimal = x + y
+    override def times(x: Decimal, y: Decimal): Decimal = x * y
+    override def minus(x: Decimal, y: Decimal): Decimal = x - y
+    override def negate(x: Decimal): Decimal = -x
+    override def toDouble(x: Decimal): Double = x.toDouble
+    override def toFloat(x: Decimal): Float = x.toFloat
+    override def toInt(x: Decimal): Int = x.toInt
+    override def toLong(x: Decimal): Long = x.toLong
+    override def fromInt(x: Int): Decimal = new Decimal().set(x)
+    override def compare(x: Decimal, y: Decimal): Int = x.compare(y)
+  }
+
+  /** A [[scala.math.Fractional]] evidence parameter for Decimals. */
+  object DecimalIsFractional extends DecimalIsConflicted with Fractional[Decimal] {
+    override def div(x: Decimal, y: Decimal): Decimal = x / y
+  }
+
+  /** A [[scala.math.Integral]] evidence parameter for Decimals. */
+  object DecimalAsIfIntegral extends DecimalIsConflicted with Integral[Decimal] {
+    override def quot(x: Decimal, y: Decimal): Decimal = x / y
+    override def rem(x: Decimal, y: Decimal): Decimal = x % y
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/Metadata.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/Metadata.scala
new file mode 100755
index 0000000000000..8172733e94dd5
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/Metadata.scala
@@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import scala.collection.mutable
+
+import org.json4s._
+import org.json4s.jackson.JsonMethods._
+
+/**
+ * Metadata is a wrapper over Map[String, Any] that limits the value type to simple ones: Boolean,
+ * Long, Double, String, Metadata, Array[Boolean], Array[Long], Array[Double], Array[String], and
+ * Array[Metadata]. JSON is used for serialization.
+ *
+ * The default constructor is private. User should use either [[MetadataBuilder]] or
+ * [[Metadata$#fromJson]] to create Metadata instances.
+ *
+ * @param map an immutable map that stores the data
+ */
+sealed class Metadata private[util] (private[util] val map: Map[String, Any]) extends Serializable {
+
+  /** Tests whether this Metadata contains a binding for a key. */
+  def contains(key: String): Boolean = map.contains(key)
+
+  /** Gets a Long. */
+  def getLong(key: String): Long = get(key)
+
+  /** Gets a Double. */
+  def getDouble(key: String): Double = get(key)
+
+  /** Gets a Boolean. */
+  def getBoolean(key: String): Boolean = get(key)
+
+  /** Gets a String. */
+  def getString(key: String): String = get(key)
+
+  /** Gets a Metadata. */
+  def getMetadata(key: String): Metadata = get(key)
+
+  /** Gets a Long array. */
+  def getLongArray(key: String): Array[Long] = get(key)
+
+  /** Gets a Double array. */
+  def getDoubleArray(key: String): Array[Double] = get(key)
+
+  /** Gets a Boolean array. */
+  def getBooleanArray(key: String): Array[Boolean] = get(key)
+
+  /** Gets a String array. */
+  def getStringArray(key: String): Array[String] = get(key)
+
+  /** Gets a Metadata array. */
+  def getMetadataArray(key: String): Array[Metadata] = get(key)
+
+  /** Converts to its JSON representation. */
+  def json: String = compact(render(jsonValue))
+
+  override def toString: String = json
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case that: Metadata =>
+        if (map.keySet == that.map.keySet) {
+          map.keys.forall { k =>
+            (map(k), that.map(k)) match {
+              case (v0: Array[_], v1: Array[_]) =>
+                v0.view == v1.view
+              case (v0, v1) =>
+                v0 == v1
+            }
+          }
+        } else {
+          false
+        }
+      case other =>
+        false
+    }
+  }
+
+  override def hashCode: Int = Metadata.hash(this)
+
+  private def get[T](key: String): T = {
+    map(key).asInstanceOf[T]
+  }
+
+  private[sql] def jsonValue: JValue = Metadata.toJsonValue(this)
+}
+
+object Metadata {
+
+  /** Returns an empty Metadata. */
+  def empty: Metadata = new Metadata(Map.empty)
+
+  /** Creates a Metadata instance from JSON. */
+  def fromJson(json: String): Metadata = {
+    fromJObject(parse(json).asInstanceOf[JObject])
+  }
+
+  /** Creates a Metadata instance from JSON AST. */
+  private[sql] def fromJObject(jObj: JObject): Metadata = {
+    val builder = new MetadataBuilder
+    jObj.obj.foreach {
+      case (key, JInt(value)) =>
+        builder.putLong(key, value.toLong)
+      case (key, JDouble(value)) =>
+        builder.putDouble(key, value)
+      case (key, JBool(value)) =>
+        builder.putBoolean(key, value)
+      case (key, JString(value)) =>
+        builder.putString(key, value)
+      case (key, o: JObject) =>
+        builder.putMetadata(key, fromJObject(o))
+      case (key, JArray(value)) =>
+        if (value.isEmpty) {
+          // If it is an empty array, we cannot infer its element type. We put an empty Array[Long].
+          builder.putLongArray(key, Array.empty)
+        } else {
+          value.head match {
+            case _: JInt =>
+              builder.putLongArray(key, value.asInstanceOf[List[JInt]].map(_.num.toLong).toArray)
+            case _: JDouble =>
+              builder.putDoubleArray(key, value.asInstanceOf[List[JDouble]].map(_.num).toArray)
+            case _: JBool =>
+              builder.putBooleanArray(key, value.asInstanceOf[List[JBool]].map(_.value).toArray)
+            case _: JString =>
+              builder.putStringArray(key, value.asInstanceOf[List[JString]].map(_.s).toArray)
+            case _: JObject =>
+              builder.putMetadataArray(
+                key, value.asInstanceOf[List[JObject]].map(fromJObject).toArray)
+            case other =>
+              throw new RuntimeException(s"Do not support array of type ${other.getClass}.")
+          }
+        }
+      case other =>
+        throw new RuntimeException(s"Do not support type ${other.getClass}.")
+    }
+    builder.build()
+  }
+
+  /** Converts to JSON AST. */
+  private def toJsonValue(obj: Any): JValue = {
+    obj match {
+      case map: Map[_, _] =>
+        val fields = map.toList.map { case (k: String, v) => (k, toJsonValue(v)) }
+        JObject(fields)
+      case arr: Array[_] =>
+        val values = arr.toList.map(toJsonValue)
+        JArray(values)
+      case x: Long =>
+        JInt(x)
+      case x: Double =>
+        JDouble(x)
+      case x: Boolean =>
+        JBool(x)
+      case x: String =>
+        JString(x)
+      case x: Metadata =>
+        toJsonValue(x.map)
+      case other =>
+        throw new RuntimeException(s"Do not support type ${other.getClass}.")
+    }
+  }
+
+  /** Computes the hash code for the types we support. */
+  private def hash(obj: Any): Int = {
+    obj match {
+      case map: Map[_, _] =>
+        map.mapValues(hash).##
+      case arr: Array[_] =>
+        // Seq.empty[T] has the same hashCode regardless of T.
+        arr.toSeq.map(hash).##
+      case x: Long =>
+        x.##
+      case x: Double =>
+        x.##
+      case x: Boolean =>
+        x.##
+      case x: String =>
+        x.##
+      case x: Metadata =>
+        hash(x.map)
+      case other =>
+        throw new RuntimeException(s"Do not support type ${other.getClass}.")
+    }
+  }
+}
+
+/**
+ * Builder for [[Metadata]]. If there is a key collision, the latter will overwrite the former.
+ */
+class MetadataBuilder {
+
+  private val map: mutable.Map[String, Any] = mutable.Map.empty
+
+  /** Returns the immutable version of this map.  Used for java interop. */
+  protected def getMap = map.toMap
+
+  /** Include the content of an existing [[Metadata]] instance. */
+  def withMetadata(metadata: Metadata): this.type = {
+    map ++= metadata.map
+    this
+  }
+
+  /** Puts a Long. */
+  def putLong(key: String, value: Long): this.type = put(key, value)
+
+  /** Puts a Double. */
+  def putDouble(key: String, value: Double): this.type = put(key, value)
+
+  /** Puts a Boolean. */
+  def putBoolean(key: String, value: Boolean): this.type = put(key, value)
+
+  /** Puts a String. */
+  def putString(key: String, value: String): this.type = put(key, value)
+
+  /** Puts a [[Metadata]]. */
+  def putMetadata(key: String, value: Metadata): this.type = put(key, value)
+
+  /** Puts a Long array. */
+  def putLongArray(key: String, value: Array[Long]): this.type = put(key, value)
+
+  /** Puts a Double array. */
+  def putDoubleArray(key: String, value: Array[Double]): this.type = put(key, value)
+
+  /** Puts a Boolean array. */
+  def putBooleanArray(key: String, value: Array[Boolean]): this.type = put(key, value)
+
+  /** Puts a String array. */
+  def putStringArray(key: String, value: Array[String]): this.type = put(key, value)
+
+  /** Puts a [[Metadata]] array. */
+  def putMetadataArray(key: String, value: Array[Metadata]): this.type = put(key, value)
+
+  /** Builds the [[Metadata]] instance. */
+  def build(): Metadata = {
+    new Metadata(map.toMap)
+  }
+
+  private def put(key: String, value: Any): this.type = {
+    map.put(key, value)
+    this
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/package-info.java b/sql/catalyst/src/main/scala/org/apache/spark/sql/package-info.java
deleted file mode 100644
index 53603614518f5..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/package-info.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Allows the execution of relational queries, including those expressed in SQL using Spark.
- */
-package org.apache.spark.sql;
\ No newline at end of file
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/package.scala
deleted file mode 100644
index 4589129cd1c90..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/package.scala
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark
-
-/**
- * Allows the execution of relational queries, including those expressed in SQL using Spark.
- *
- * Note that this package is located in catalyst instead of in core so that all subprojects can
- * inherit the settings from this package object.
- */
-package object sql {
-
-  protected[sql] def Logger(name: String) =
-    com.typesafe.scalalogging.slf4j.Logger(org.slf4j.LoggerFactory.getLogger(name))
-
-  protected[sql] type Logging = com.typesafe.scalalogging.slf4j.Logging
-
-  type Row = catalyst.expressions.Row
-
-  val Row = catalyst.expressions.Row
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index c0438dbe52a47..ddc3d44869c98 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.sql.catalyst
 
-import java.sql.Timestamp
+import java.math.BigInteger
+import java.sql.{Date, Timestamp}
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.catalyst.types._
 
 case class PrimitiveData(
@@ -43,6 +44,7 @@ case class NullableData(
     booleanField: java.lang.Boolean,
     stringField: String,
     decimalField: BigDecimal,
+    dateField: Date,
     timestampField: Timestamp,
     binaryField: Array[Byte])
 
@@ -53,11 +55,14 @@ case class OptionalData(
     floatField: Option[Float],
     shortField: Option[Short],
     byteField: Option[Byte],
-    booleanField: Option[Boolean])
+    booleanField: Option[Boolean],
+    structField: Option[PrimitiveData])
 
 case class ComplexData(
     arrayField: Seq[Int],
-    mapField: Map[Int, String],
+    arrayFieldContainsNull: Seq[java.lang.Integer],
+    mapField: Map[Int, Long],
+    mapFieldValueContainsNull: Map[Int, java.lang.Long],
     structField: PrimitiveData)
 
 case class GenericData[A](
@@ -92,13 +97,14 @@ class ScalaReflectionSuite extends FunSuite {
         StructField("byteField", ByteType, nullable = true),
         StructField("booleanField", BooleanType, nullable = true),
         StructField("stringField", StringType, nullable = true),
-        StructField("decimalField", DecimalType, nullable = true),
+        StructField("decimalField", DecimalType.Unlimited, nullable = true),
+        StructField("dateField", DateType, nullable = true),
         StructField("timestampField", TimestampType, nullable = true),
         StructField("binaryField", BinaryType, nullable = true))),
       nullable = true))
   }
 
-  test("optinal data") {
+  test("optional data") {
     val schema = schemaFor[OptionalData]
     assert(schema === Schema(
       StructType(Seq(
@@ -108,7 +114,8 @@ class ScalaReflectionSuite extends FunSuite {
         StructField("floatField", FloatType, nullable = true),
         StructField("shortField", ShortType, nullable = true),
         StructField("byteField", ByteType, nullable = true),
-        StructField("booleanField", BooleanType, nullable = true))),
+        StructField("booleanField", BooleanType, nullable = true),
+        StructField("structField", schemaFor[PrimitiveData].dataType, nullable = true))),
       nullable = true))
   }
 
@@ -116,8 +123,22 @@ class ScalaReflectionSuite extends FunSuite {
     val schema = schemaFor[ComplexData]
     assert(schema === Schema(
       StructType(Seq(
-        StructField("arrayField", ArrayType(IntegerType), nullable = true),
-        StructField("mapField", MapType(IntegerType, StringType), nullable = true),
+        StructField(
+          "arrayField",
+          ArrayType(IntegerType, containsNull = false),
+          nullable = true),
+        StructField(
+          "arrayFieldContainsNull",
+          ArrayType(IntegerType, containsNull = true),
+          nullable = true),
+        StructField(
+          "mapField",
+          MapType(IntegerType, LongType, valueContainsNull = false),
+          nullable = true),
+        StructField(
+          "mapFieldValueContainsNull",
+          MapType(IntegerType, LongType, valueContainsNull = true),
+          nullable = true),
         StructField(
           "structField",
           StructType(Seq(
@@ -148,4 +169,88 @@ class ScalaReflectionSuite extends FunSuite {
         StructField("_2", StringType, nullable = true))),
       nullable = true))
   }
+
+  test("get data type of a value") {
+    // BooleanType
+    assert(BooleanType === typeOfObject(true))
+    assert(BooleanType === typeOfObject(false))
+
+    // BinaryType
+    assert(BinaryType === typeOfObject("string".getBytes))
+
+    // StringType
+    assert(StringType === typeOfObject("string"))
+
+    // ByteType
+    assert(ByteType === typeOfObject(127.toByte))
+
+    // ShortType
+    assert(ShortType === typeOfObject(32767.toShort))
+
+    // IntegerType
+    assert(IntegerType === typeOfObject(2147483647))
+
+    // LongType
+    assert(LongType === typeOfObject(9223372036854775807L))
+
+    // FloatType
+    assert(FloatType === typeOfObject(3.4028235E38.toFloat))
+
+    // DoubleType
+    assert(DoubleType === typeOfObject(1.7976931348623157E308))
+
+    // DecimalType
+    assert(DecimalType.Unlimited === typeOfObject(BigDecimal("1.7976931348623157E318")))
+
+    // DateType
+    assert(DateType === typeOfObject(Date.valueOf("2014-07-25")))
+
+    // TimestampType
+    assert(TimestampType === typeOfObject(Timestamp.valueOf("2014-07-25 10:26:00")))
+
+    // NullType
+    assert(NullType === typeOfObject(null))
+
+    def typeOfObject1: PartialFunction[Any, DataType] = typeOfObject orElse {
+      case value: java.math.BigInteger => DecimalType.Unlimited
+      case value: java.math.BigDecimal => DecimalType.Unlimited
+      case _ => StringType
+    }
+
+    assert(DecimalType.Unlimited === typeOfObject1(
+      new BigInteger("92233720368547758070")))
+    assert(DecimalType.Unlimited === typeOfObject1(
+      new java.math.BigDecimal("1.7976931348623157E318")))
+    assert(StringType === typeOfObject1(BigInt("92233720368547758070")))
+
+    def typeOfObject2: PartialFunction[Any, DataType] = typeOfObject orElse {
+      case value: java.math.BigInteger => DecimalType.Unlimited
+    }
+
+    intercept[MatchError](typeOfObject2(BigInt("92233720368547758070")))
+
+    def typeOfObject3: PartialFunction[Any, DataType] = typeOfObject orElse {
+      case c: Seq[_] => ArrayType(typeOfObject3(c.head))
+    }
+
+    assert(ArrayType(IntegerType) === typeOfObject3(Seq(1, 2, 3)))
+    assert(ArrayType(ArrayType(IntegerType)) === typeOfObject3(Seq(Seq(1,2,3))))
+  }
+
+  test("convert PrimitiveData to catalyst") {
+    val data = PrimitiveData(1, 1, 1, 1, 1, 1, true)
+    val convertedData = Seq(1, 1.toLong, 1.toDouble, 1.toFloat, 1.toShort, 1.toByte, true)
+    val dataType = schemaFor[PrimitiveData].dataType
+    assert(convertToCatalyst(data, dataType) === convertedData)
+  }
+
+  test("convert Option[Product] to catalyst") {
+    val primitiveData = PrimitiveData(1, 1, 1, 1, 1, 1, true)
+    val data = OptionalData(Some(2), Some(2), Some(2), Some(2), Some(2), Some(2), Some(true),
+      Some(primitiveData))
+    val dataType = schemaFor[OptionalData].dataType
+    val convertedData = Row(2, 2.toLong, 2.toDouble, 2.toFloat, 2.toShort, 2.toByte, true,
+      Row(1, 1, 1, 1, 1, 1, true))
+    assert(convertToCatalyst(data, dataType) === convertedData)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 0a4fde3de7752..33a3cba3d4c0e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -19,10 +19,11 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference}
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.types.IntegerType
+import org.apache.spark.sql.catalyst.types._
 
 class AnalysisSuite extends FunSuite with BeforeAndAfter {
   val caseSensitiveCatalog = new SimpleCatalog(true)
@@ -33,6 +34,12 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
     new Analyzer(caseInsensitiveCatalog, EmptyFunctionRegistry, caseSensitive = false)
 
   val testRelation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)())
+  val testRelation2 = LocalRelation(
+    AttributeReference("a", StringType)(),
+    AttributeReference("b", StringType)(),
+    AttributeReference("c", DoubleType)(),
+    AttributeReference("d", DecimalType.Unlimited)(),
+    AttributeReference("e", ShortType)())
 
   before {
     caseSensitiveCatalog.registerTable(None, "TaBlE", testRelation)
@@ -74,7 +81,7 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
     val e = intercept[RuntimeException] {
       caseSensitiveAnalyze(UnresolvedRelation(None, "tAbLe", None))
     }
-    assert(e.getMessage === "Table Not Found: tAbLe")
+    assert(e.getMessage == "Table Not Found: tAbLe")
 
     assert(
       caseSensitiveAnalyze(UnresolvedRelation(None, "TaBlE", None)) ===
@@ -93,6 +100,44 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
     val e = intercept[TreeNodeException[_]] {
       caseSensitiveAnalyze(Project(Seq(UnresolvedAttribute("abcd")), testRelation))
     }
-    assert(e.getMessage().toLowerCase.contains("unresolved"))
+    assert(e.getMessage().toLowerCase.contains("unresolved attribute"))
+  }
+
+  test("throw errors for unresolved plans during analysis") {
+    case class UnresolvedTestPlan() extends LeafNode {
+      override lazy val resolved = false
+      override def output = Nil
+    }
+    val e = intercept[TreeNodeException[_]] {
+      caseSensitiveAnalyze(UnresolvedTestPlan())
+    }
+    assert(e.getMessage().toLowerCase.contains("unresolved plan"))
+  }
+
+  test("divide should be casted into fractional types") {
+    val testRelation2 = LocalRelation(
+      AttributeReference("a", StringType)(),
+      AttributeReference("b", StringType)(),
+      AttributeReference("c", DoubleType)(),
+      AttributeReference("d", DecimalType.Unlimited)(),
+      AttributeReference("e", ShortType)())
+
+    val expr0 = 'a / 2
+    val expr1 = 'a / 'b
+    val expr2 = 'a / 'c
+    val expr3 = 'a / 'd
+    val expr4 = 'e / 'e
+    val plan = caseInsensitiveAnalyze(Project(
+      Alias(expr0, s"Analyzer($expr0)")() ::
+      Alias(expr1, s"Analyzer($expr1)")() ::
+      Alias(expr2, s"Analyzer($expr2)")() ::
+      Alias(expr3, s"Analyzer($expr3)")() ::
+      Alias(expr4, s"Analyzer($expr4)")() :: Nil, testRelation2))
+    val pl = plan.asInstanceOf[Project].projectList
+    assert(pl(0).dataType == DoubleType)
+    assert(pl(1).dataType == DoubleType)
+    assert(pl(2).dataType == DoubleType)
+    assert(pl(3).dataType == DecimalType.Unlimited)
+    assert(pl(4).dataType == DoubleType)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
new file mode 100644
index 0000000000000..d5b7d2789a103
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{Project, LocalRelation}
+import org.apache.spark.sql.catalyst.types._
+import org.scalatest.{BeforeAndAfter, FunSuite}
+
+class DecimalPrecisionSuite extends FunSuite with BeforeAndAfter {
+  val catalog = new SimpleCatalog(false)
+  val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, caseSensitive = false)
+
+  val relation = LocalRelation(
+    AttributeReference("i", IntegerType)(),
+    AttributeReference("d1", DecimalType(2, 1))(),
+    AttributeReference("d2", DecimalType(5, 2))(),
+    AttributeReference("u", DecimalType.Unlimited)(),
+    AttributeReference("f", FloatType)()
+  )
+
+  val i: Expression = UnresolvedAttribute("i")
+  val d1: Expression = UnresolvedAttribute("d1")
+  val d2: Expression = UnresolvedAttribute("d2")
+  val u: Expression = UnresolvedAttribute("u")
+  val f: Expression = UnresolvedAttribute("f")
+
+  before {
+    catalog.registerTable(None, "table", relation)
+  }
+
+  private def checkType(expression: Expression, expectedType: DataType): Unit = {
+    val plan = Project(Seq(Alias(expression, "c")()), relation)
+    assert(analyzer(plan).schema.fields(0).dataType === expectedType)
+  }
+
+  test("basic operations") {
+    checkType(Add(d1, d2), DecimalType(6, 2))
+    checkType(Subtract(d1, d2), DecimalType(6, 2))
+    checkType(Multiply(d1, d2), DecimalType(8, 3))
+    checkType(Divide(d1, d2), DecimalType(10, 7))
+    checkType(Divide(d2, d1), DecimalType(10, 6))
+    checkType(Remainder(d1, d2), DecimalType(3, 2))
+    checkType(Remainder(d2, d1), DecimalType(3, 2))
+    checkType(Sum(d1), DecimalType(12, 1))
+    checkType(Average(d1), DecimalType(6, 5))
+
+    checkType(Add(Add(d1, d2), d1), DecimalType(7, 2))
+    checkType(Add(Add(Add(d1, d2), d1), d2), DecimalType(8, 2))
+    checkType(Add(Add(d1, d2), Add(d1, d2)), DecimalType(7, 2))
+  }
+
+  test("bringing in primitive types") {
+    checkType(Add(d1, i), DecimalType(12, 1))
+    checkType(Add(d1, f), DoubleType)
+    checkType(Add(i, d1), DecimalType(12, 1))
+    checkType(Add(f, d1), DoubleType)
+    checkType(Add(d1, Cast(i, LongType)), DecimalType(22, 1))
+    checkType(Add(d1, Cast(i, ShortType)), DecimalType(7, 1))
+    checkType(Add(d1, Cast(i, ByteType)), DecimalType(5, 1))
+    checkType(Add(d1, Cast(i, DoubleType)), DoubleType)
+  }
+
+  test("unlimited decimals make everything else cast up") {
+    for (expr <- Seq(d1, d2, i, f, u)) {
+      checkType(Add(expr, u), DecimalType.Unlimited)
+      checkType(Subtract(expr, u), DecimalType.Unlimited)
+      checkType(Multiply(expr, u), DecimalType.Unlimited)
+      checkType(Divide(expr, u), DecimalType.Unlimited)
+      checkType(Remainder(expr, u), DecimalType.Unlimited)
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
index b9e0f8e9dcc5f..dfa2d958c0faf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
@@ -19,24 +19,26 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
 import org.apache.spark.sql.catalyst.types._
 
 class HiveTypeCoercionSuite extends FunSuite {
 
-  val rules = new HiveTypeCoercion { }
-  import rules._
-
-  test("tightest common bound for numeric and boolean types") {
+  test("tightest common bound for types") {
     def widenTest(t1: DataType, t2: DataType, tightestCommon: Option[DataType]) {
-      var found = WidenTypes.findTightestCommonType(t1, t2)
+      var found = HiveTypeCoercion.findTightestCommonType(t1, t2)
       assert(found == tightestCommon,
         s"Expected $tightestCommon as tightest common type for $t1 and $t2, found $found")
       // Test both directions to make sure the widening is symmetric.
-      found = WidenTypes.findTightestCommonType(t2, t1)
+      found = HiveTypeCoercion.findTightestCommonType(t2, t1)
       assert(found == tightestCommon,
         s"Expected $tightestCommon as tightest common type for $t2 and $t1, found $found")
     }
 
+    // Null
+    widenTest(NullType, NullType, Some(NullType))
+
     // Boolean
     widenTest(NullType, BooleanType, Some(BooleanType))
     widenTest(BooleanType, BooleanType, Some(BooleanType))
@@ -60,12 +62,56 @@ class HiveTypeCoercionSuite extends FunSuite {
     widenTest(DoubleType, DoubleType, Some(DoubleType))
 
     // Integral mixed with floating point.
-    widenTest(NullType, FloatType, Some(FloatType))
-    widenTest(NullType, DoubleType, Some(DoubleType))
     widenTest(IntegerType, FloatType, Some(FloatType))
     widenTest(IntegerType, DoubleType, Some(DoubleType))
     widenTest(IntegerType, DoubleType, Some(DoubleType))
     widenTest(LongType, FloatType, Some(FloatType))
     widenTest(LongType, DoubleType, Some(DoubleType))
+
+    // Casting up to unlimited-precision decimal
+    widenTest(IntegerType, DecimalType.Unlimited, Some(DecimalType.Unlimited))
+    widenTest(DoubleType, DecimalType.Unlimited, Some(DecimalType.Unlimited))
+    widenTest(DecimalType(3, 2), DecimalType.Unlimited, Some(DecimalType.Unlimited))
+    widenTest(DecimalType.Unlimited, IntegerType, Some(DecimalType.Unlimited))
+    widenTest(DecimalType.Unlimited, DoubleType, Some(DecimalType.Unlimited))
+    widenTest(DecimalType.Unlimited, DecimalType(3, 2), Some(DecimalType.Unlimited))
+
+    // No up-casting for fixed-precision decimal (this is handled by arithmetic rules)
+    widenTest(DecimalType(2, 1), DecimalType(3, 2), None)
+    widenTest(DecimalType(2, 1), DoubleType, None)
+    widenTest(DecimalType(2, 1), IntegerType, None)
+    widenTest(DoubleType, DecimalType(2, 1), None)
+    widenTest(IntegerType, DecimalType(2, 1), None)
+
+    // StringType
+    widenTest(NullType, StringType, Some(StringType))
+    widenTest(StringType, StringType, Some(StringType))
+    widenTest(IntegerType, StringType, None)
+    widenTest(LongType, StringType, None)
+
+    // TimestampType
+    widenTest(NullType, TimestampType, Some(TimestampType))
+    widenTest(TimestampType, TimestampType, Some(TimestampType))
+    widenTest(IntegerType, TimestampType, None)
+    widenTest(StringType, TimestampType, None)
+
+    // ComplexType
+    widenTest(NullType, MapType(IntegerType, StringType, false), Some(MapType(IntegerType, StringType, false)))
+    widenTest(NullType, StructType(Seq()), Some(StructType(Seq())))
+    widenTest(StringType, MapType(IntegerType, StringType, true), None)
+    widenTest(ArrayType(IntegerType), StructType(Seq()), None)
+  }
+
+  test("boolean casts") {
+    val booleanCasts = new HiveTypeCoercion { }.BooleanCasts
+    def ruleTest(initial: Expression, transformed: Expression) {
+      val testRelation = LocalRelation(AttributeReference("a", IntegerType)())
+      assert(booleanCasts(Project(Seq(Alias(initial, "a")()), testRelation)) ==
+        Project(Seq(Alias(transformed, "a")()), testRelation))
+    }
+    // Remove superflous boolean -> boolean casts.
+    ruleTest(Cast(Literal(true), BooleanType), Literal(true))
+    // Stringify boolean when casting to string.
+    ruleTest(Cast(Literal(false), StringType), If(Literal(false), Literal("true"), Literal("false")))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index 84d72814778ba..2f57be94a80fb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -17,19 +17,29 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
 
+import scala.collection.immutable.HashSet
+
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
 import org.scalatest.FunSuite
+import org.scalatest.Matchers._
+import org.scalactic.TripleEqualsSupport.Spread
 
 import org.apache.spark.sql.catalyst.types._
 
+
 /* Implicit conversions */
 import org.apache.spark.sql.catalyst.dsl.expressions._
 
 class ExpressionEvaluationSuite extends FunSuite {
 
   test("literals") {
-    assert((Literal(1) + Literal(1)).eval(null) === 2)
+    checkEvaluation(Literal(1), 1)
+    checkEvaluation(Literal(true), true)
+    checkEvaluation(Literal(0L), 0L)
+    checkEvaluation(Literal("test"), "test")
+    checkEvaluation(Literal(1) + Literal(1), 2)
   }
 
   /**
@@ -61,10 +71,8 @@ class ExpressionEvaluationSuite extends FunSuite {
   test("3VL Not") {
     notTrueTable.foreach {
       case (v, answer) =>
-        val expr = ! Literal(v, BooleanType)
-        val result = expr.eval(null)
-        if (result != answer)
-          fail(s"$expr should not evaluate to $result, expected: $answer")    }
+        checkEvaluation(!Literal(v, BooleanType), answer)
+    }
   }
 
   booleanLogicTest("AND", _ && _,
@@ -127,6 +135,48 @@ class ExpressionEvaluationSuite extends FunSuite {
     }
   }
 
+  def checkDoubleEvaluation(expression: Expression, expected: Spread[Double], inputRow: Row = EmptyRow): Unit = {
+    val actual = try evaluate(expression, inputRow) catch {
+      case e: Exception => fail(s"Exception evaluating $expression", e)
+    }
+    actual.asInstanceOf[Double] shouldBe expected
+  }
+
+  test("IN") {
+    checkEvaluation(In(Literal(1), Seq(Literal(1), Literal(2))), true)
+    checkEvaluation(In(Literal(2), Seq(Literal(1), Literal(2))), true)
+    checkEvaluation(In(Literal(3), Seq(Literal(1), Literal(2))), false)
+    checkEvaluation(In(Literal(1), Seq(Literal(1), Literal(2))) && In(Literal(2), Seq(Literal(1), Literal(2))), true)
+  }
+
+  test("INSET") {
+    val hS = HashSet[Any]() + 1 + 2
+    val nS = HashSet[Any]() + 1 + 2 + null
+    val one = Literal(1)
+    val two = Literal(2)
+    val three = Literal(3)
+    val nl = Literal(null)
+    val s = Seq(one, two)
+    val nullS = Seq(one, two, null)
+    checkEvaluation(InSet(one, hS), true)
+    checkEvaluation(InSet(two, hS), true)
+    checkEvaluation(InSet(two, nS), true)
+    checkEvaluation(InSet(nl, nS), true)
+    checkEvaluation(InSet(three, hS), false)
+    checkEvaluation(InSet(three, nS), false)
+    checkEvaluation(InSet(one, hS) && InSet(two, hS), true)
+  }
+
+  test("MaxOf") {
+    checkEvaluation(MaxOf(1, 2), 2)
+    checkEvaluation(MaxOf(2, 1), 2)
+    checkEvaluation(MaxOf(1L, 2L), 2L)
+    checkEvaluation(MaxOf(2L, 1L), 2L)
+
+    checkEvaluation(MaxOf(Literal(null, IntegerType), 2), 2)
+    checkEvaluation(MaxOf(2, Literal(null, IntegerType)), 2)
+  }
+
   test("LIKE literal Regular Expression") {
     checkEvaluation(Literal(null, StringType).like("a"), null)
     checkEvaluation(Literal("a", StringType).like(Literal(null, StringType)), null)
@@ -142,6 +192,9 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation("abc" like "a%", true)
     checkEvaluation("abc"  like "b%", false)
     checkEvaluation("abc"  like "bc%", false)
+    checkEvaluation("a\nb" like "a_b", true)
+    checkEvaluation("ab" like "a%b", true)
+    checkEvaluation("a\nb" like "a%b", true)
   }
 
   test("LIKE Non-literal Regular Expression") {
@@ -158,7 +211,10 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation("abc" like regEx, true, new GenericRow(Array[Any]("a%")))
     checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("b%")))
     checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("bc%")))
-    
+    checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a_b")))
+    checkEvaluation("ab" like regEx, true, new GenericRow(Array[Any]("a%b")))
+    checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a%b")))
+
     checkEvaluation(Literal(null, StringType) like regEx, null, new GenericRow(Array[Any]("bc%")))
   }
 
@@ -203,50 +259,62 @@ class ExpressionEvaluationSuite extends FunSuite {
 
   test("data type casting") {
 
-    val sts = "1970-01-01 00:00:01.0"
-    val ts = Timestamp.valueOf(sts)
+    val sd = "1970-01-01"
+    val d = Date.valueOf(sd)
+    val sts = sd + " 00:00:02"
+    val nts = sts + ".1"
+    val ts = Timestamp.valueOf(nts)
 
     checkEvaluation("abdef" cast StringType, "abdef")
-    checkEvaluation("abdef" cast DecimalType, null)
+    checkEvaluation("abdef" cast DecimalType.Unlimited, null)
     checkEvaluation("abdef" cast TimestampType, null)
-    checkEvaluation("12.65" cast DecimalType, BigDecimal(12.65))
+    checkEvaluation("12.65" cast DecimalType.Unlimited, Decimal(12.65))
 
     checkEvaluation(Literal(1) cast LongType, 1)
-    checkEvaluation(Cast(Literal(1) cast TimestampType, LongType), 1)
+    checkEvaluation(Cast(Literal(1000) cast TimestampType, LongType), 1.toLong)
+    checkEvaluation(Cast(Literal(-1200) cast TimestampType, LongType), -2.toLong)
+    checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
     checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
 
-    checkEvaluation(Cast(Literal(sts) cast TimestampType, StringType), sts)
+    checkEvaluation(Cast(Literal(sd) cast DateType, StringType), sd)
+    checkEvaluation(Cast(Literal(d) cast StringType, DateType), d)
+    checkEvaluation(Cast(Literal(nts) cast TimestampType, StringType), nts)
     checkEvaluation(Cast(Literal(ts) cast StringType, TimestampType), ts)
+    // all convert to string type to check
+    checkEvaluation(
+      Cast(Cast(Literal(nts) cast TimestampType, DateType), StringType), sd)
+    checkEvaluation(
+      Cast(Cast(Literal(ts) cast DateType, TimestampType), StringType), sts)
 
     checkEvaluation(Cast("abdef" cast BinaryType, StringType), "abdef")
 
     checkEvaluation(Cast(Cast(Cast(Cast(
       Cast("5" cast ByteType, ShortType), IntegerType), FloatType), DoubleType), LongType), 5)
-    checkEvaluation(Cast(Cast(Cast(Cast(
-      Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 5)
-    checkEvaluation(Cast(Cast(Cast(Cast(
-      Cast("5" cast TimestampType, ByteType), DecimalType), LongType), StringType), ShortType), null)
-    checkEvaluation(Cast(Cast(Cast(Cast(
-      Cast("5" cast DecimalType, ByteType), TimestampType), LongType), StringType), ShortType), 5)
+    checkEvaluation(Cast(Cast(Cast(Cast(Cast("5" cast
+      ByteType, TimestampType), DecimalType.Unlimited), LongType), StringType), ShortType), 0)
+    checkEvaluation(Cast(Cast(Cast(Cast(Cast("5" cast
+      TimestampType, ByteType), DecimalType.Unlimited), LongType), StringType), ShortType), null)
+    checkEvaluation(Cast(Cast(Cast(Cast(Cast("5" cast
+      DecimalType.Unlimited, ByteType), TimestampType), LongType), StringType), ShortType), 0)
     checkEvaluation(Literal(true) cast IntegerType, 1)
     checkEvaluation(Literal(false) cast IntegerType, 0)
     checkEvaluation(Cast(Literal(1) cast BooleanType, IntegerType), 1)
     checkEvaluation(Cast(Literal(0) cast BooleanType, IntegerType), 0)
-    checkEvaluation("23" cast DoubleType, 23)
+    checkEvaluation("23" cast DoubleType, 23d)
     checkEvaluation("23" cast IntegerType, 23)
-    checkEvaluation("23" cast FloatType, 23)
-    checkEvaluation("23" cast DecimalType, 23)
-    checkEvaluation("23" cast ByteType, 23)
-    checkEvaluation("23" cast ShortType, 23)
+    checkEvaluation("23" cast FloatType, 23f)
+    checkEvaluation("23" cast DecimalType.Unlimited, Decimal(23))
+    checkEvaluation("23" cast ByteType, 23.toByte)
+    checkEvaluation("23" cast ShortType, 23.toShort)
     checkEvaluation("2012-12-11" cast DoubleType, null)
     checkEvaluation(Literal(123) cast IntegerType, 123)
 
-    checkEvaluation(Literal(23d) + Cast(true, DoubleType), 24)
+    checkEvaluation(Literal(23d) + Cast(true, DoubleType), 24d)
     checkEvaluation(Literal(23) + Cast(true, IntegerType), 24)
-    checkEvaluation(Literal(23f) + Cast(true, FloatType), 24)
-    checkEvaluation(Literal(BigDecimal(23)) + Cast(true, DecimalType), 24)
-    checkEvaluation(Literal(23.toByte) + Cast(true, ByteType), 24)
-    checkEvaluation(Literal(23.toShort) + Cast(true, ShortType), 24)
+    checkEvaluation(Literal(23f) + Cast(true, FloatType), 24f)
+    checkEvaluation(Literal(Decimal(23)) + Cast(true, DecimalType.Unlimited), Decimal(24))
+    checkEvaluation(Literal(23.toByte) + Cast(true, ByteType), 24.toByte)
+    checkEvaluation(Literal(23.toShort) + Cast(true, ShortType), 24.toShort)
 
     intercept[Exception] {evaluate(Literal(1) cast BinaryType, null)}
 
@@ -258,13 +326,78 @@ class ExpressionEvaluationSuite extends FunSuite {
     assert(("abcdef" cast IntegerType).nullable === true)
     assert(("abcdef" cast ShortType).nullable === true)
     assert(("abcdef" cast ByteType).nullable === true)
-    assert(("abcdef" cast DecimalType).nullable === true)
+    assert(("abcdef" cast DecimalType.Unlimited).nullable === true)
+    assert(("abcdef" cast DecimalType(4, 2)).nullable === true)
     assert(("abcdef" cast DoubleType).nullable === true)
     assert(("abcdef" cast FloatType).nullable === true)
 
     checkEvaluation(Cast(Literal(null, IntegerType), ShortType), null)
   }
 
+  test("date") {
+    val d1 = Date.valueOf("1970-01-01")
+    val d2 = Date.valueOf("1970-01-02")
+    checkEvaluation(Literal(d1) < Literal(d2), true)
+  }
+
+  test("casting to fixed-precision decimals") {
+    // Overflow and rounding for casting to fixed-precision decimals:
+    // - Values should round with HALF_UP mode by default when you lower scale
+    // - Values that would overflow the target precision should turn into null
+    // - Because of this, casts to fixed-precision decimals should be nullable
+
+    assert(Cast(Literal(123), DecimalType.Unlimited).nullable === false)
+    assert(Cast(Literal(10.03f), DecimalType.Unlimited).nullable === false)
+    assert(Cast(Literal(10.03), DecimalType.Unlimited).nullable === false)
+    assert(Cast(Literal(Decimal(10.03)), DecimalType.Unlimited).nullable === false)
+
+    assert(Cast(Literal(123), DecimalType(2, 1)).nullable === true)
+    assert(Cast(Literal(10.03f), DecimalType(2, 1)).nullable === true)
+    assert(Cast(Literal(10.03), DecimalType(2, 1)).nullable === true)
+    assert(Cast(Literal(Decimal(10.03)), DecimalType(2, 1)).nullable === true)
+
+    checkEvaluation(Cast(Literal(123), DecimalType.Unlimited), Decimal(123))
+    checkEvaluation(Cast(Literal(123), DecimalType(3, 0)), Decimal(123))
+    checkEvaluation(Cast(Literal(123), DecimalType(3, 1)), null)
+    checkEvaluation(Cast(Literal(123), DecimalType(2, 0)), null)
+
+    checkEvaluation(Cast(Literal(10.03), DecimalType.Unlimited), Decimal(10.03))
+    checkEvaluation(Cast(Literal(10.03), DecimalType(4, 2)), Decimal(10.03))
+    checkEvaluation(Cast(Literal(10.03), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(Cast(Literal(10.03), DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(Cast(Literal(10.03), DecimalType(1, 0)), null)
+    checkEvaluation(Cast(Literal(10.03), DecimalType(2, 1)), null)
+    checkEvaluation(Cast(Literal(10.03), DecimalType(3, 2)), null)
+    checkEvaluation(Cast(Literal(Decimal(10.03)), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(Cast(Literal(Decimal(10.03)), DecimalType(3, 2)), null)
+
+    checkEvaluation(Cast(Literal(10.05), DecimalType.Unlimited), Decimal(10.05))
+    checkEvaluation(Cast(Literal(10.05), DecimalType(4, 2)), Decimal(10.05))
+    checkEvaluation(Cast(Literal(10.05), DecimalType(3, 1)), Decimal(10.1))
+    checkEvaluation(Cast(Literal(10.05), DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(Cast(Literal(10.05), DecimalType(1, 0)), null)
+    checkEvaluation(Cast(Literal(10.05), DecimalType(2, 1)), null)
+    checkEvaluation(Cast(Literal(10.05), DecimalType(3, 2)), null)
+    checkEvaluation(Cast(Literal(Decimal(10.05)), DecimalType(3, 1)), Decimal(10.1))
+    checkEvaluation(Cast(Literal(Decimal(10.05)), DecimalType(3, 2)), null)
+
+    checkEvaluation(Cast(Literal(9.95), DecimalType(3, 2)), Decimal(9.95))
+    checkEvaluation(Cast(Literal(9.95), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(Cast(Literal(9.95), DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(Cast(Literal(9.95), DecimalType(2, 1)), null)
+    checkEvaluation(Cast(Literal(9.95), DecimalType(1, 0)), null)
+    checkEvaluation(Cast(Literal(Decimal(9.95)), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(Cast(Literal(Decimal(9.95)), DecimalType(1, 0)), null)
+
+    checkEvaluation(Cast(Literal(-9.95), DecimalType(3, 2)), Decimal(-9.95))
+    checkEvaluation(Cast(Literal(-9.95), DecimalType(3, 1)), Decimal(-10.0))
+    checkEvaluation(Cast(Literal(-9.95), DecimalType(2, 0)), Decimal(-10))
+    checkEvaluation(Cast(Literal(-9.95), DecimalType(2, 1)), null)
+    checkEvaluation(Cast(Literal(-9.95), DecimalType(1, 0)), null)
+    checkEvaluation(Cast(Literal(Decimal(-9.95)), DecimalType(3, 1)), Decimal(-10.0))
+    checkEvaluation(Cast(Literal(Decimal(-9.95)), DecimalType(1, 0)), null)
+  }
+
   test("timestamp") {
     val ts1 = new Timestamp(12)
     val ts2 = new Timestamp(123)
@@ -272,28 +405,43 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(Literal(ts1) < Literal(ts2), true)
   }
 
+  test("date casting") {
+    val d = Date.valueOf("1970-01-01")
+    checkEvaluation(Cast(d, ShortType), null)
+    checkEvaluation(Cast(d, IntegerType), null)
+    checkEvaluation(Cast(d, LongType), null)
+    checkEvaluation(Cast(d, FloatType), null)
+    checkEvaluation(Cast(d, DoubleType), null)
+    checkEvaluation(Cast(d, DecimalType.Unlimited), null)
+    checkEvaluation(Cast(d, DecimalType(10, 2)), null)
+    checkEvaluation(Cast(d, StringType), "1970-01-01")
+    checkEvaluation(Cast(Cast(d, TimestampType), StringType), "1970-01-01 00:00:00")
+  }
+
   test("timestamp casting") {
     val millis = 15 * 1000 + 2
+    val seconds = millis * 1000 + 2
     val ts = new Timestamp(millis)
     val ts1 = new Timestamp(15 * 1000)  // a timestamp without the milliseconds part
+    val tss = new Timestamp(seconds)
     checkEvaluation(Cast(ts, ShortType), 15)
     checkEvaluation(Cast(ts, IntegerType), 15)
     checkEvaluation(Cast(ts, LongType), 15)
     checkEvaluation(Cast(ts, FloatType), 15.002f)
     checkEvaluation(Cast(ts, DoubleType), 15.002)
-    checkEvaluation(Cast(Cast(ts, ShortType), TimestampType), ts1)
-    checkEvaluation(Cast(Cast(ts, IntegerType), TimestampType), ts1)
-    checkEvaluation(Cast(Cast(ts, LongType), TimestampType), ts1)
+    checkEvaluation(Cast(Cast(tss, ShortType), TimestampType), ts)
+    checkEvaluation(Cast(Cast(tss, IntegerType), TimestampType), ts)
+    checkEvaluation(Cast(Cast(tss, LongType), TimestampType), ts)
     checkEvaluation(Cast(Cast(millis.toFloat / 1000, TimestampType), FloatType),
       millis.toFloat / 1000)
     checkEvaluation(Cast(Cast(millis.toDouble / 1000, TimestampType), DoubleType),
       millis.toDouble / 1000)
-    checkEvaluation(Cast(Literal(BigDecimal(1)) cast TimestampType, DecimalType), 1)
+    checkEvaluation(Cast(Literal(Decimal(1)) cast TimestampType, DecimalType.Unlimited), Decimal(1))
 
     // A test for higher precision than millis
     checkEvaluation(Cast(Cast(0.00000001, TimestampType), DoubleType), 0.00000001)
   }
-  
+
   test("null checking") {
     val row = new GenericRow(Array[Any]("^Ba*n", null, true, null))
     val c1 = 'a.string.at(0)
@@ -301,18 +449,18 @@ class ExpressionEvaluationSuite extends FunSuite {
     val c3 = 'a.boolean.at(2)
     val c4 = 'a.boolean.at(3)
 
-    checkEvaluation(IsNull(c1), false, row)
-    checkEvaluation(IsNotNull(c1), true, row)
+    checkEvaluation(c1.isNull, false, row)
+    checkEvaluation(c1.isNotNull, true, row)
+
+    checkEvaluation(c2.isNull, true, row)
+    checkEvaluation(c2.isNotNull, false, row)
 
-    checkEvaluation(IsNull(c2), true, row)
-    checkEvaluation(IsNotNull(c2), false, row)
+    checkEvaluation(Literal(1, ShortType).isNull, false)
+    checkEvaluation(Literal(1, ShortType).isNotNull, true)
 
-    checkEvaluation(IsNull(Literal(1, ShortType)), false)
-    checkEvaluation(IsNotNull(Literal(1, ShortType)), true)
+    checkEvaluation(Literal(null, ShortType).isNull, true)
+    checkEvaluation(Literal(null, ShortType).isNotNull, false)
 
-    checkEvaluation(IsNull(Literal(null, ShortType)), true)
-    checkEvaluation(IsNotNull(Literal(null, ShortType)), false)
-    
     checkEvaluation(Coalesce(c1 :: c2 :: Nil), "^Ba*n", row)
     checkEvaluation(Coalesce(Literal(null, StringType) :: Nil), null, row)
     checkEvaluation(Coalesce(Literal(null, StringType) :: c1 :: c2 :: Nil), "^Ba*n", row)
@@ -323,14 +471,14 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(If(Literal(null, BooleanType), c2, c1), "^Ba*n", row)
     checkEvaluation(If(Literal(true, BooleanType), c1, c2), "^Ba*n", row)
     checkEvaluation(If(Literal(false, BooleanType), c2, c1), "^Ba*n", row)
-    checkEvaluation(If(Literal(false, BooleanType), 
+    checkEvaluation(If(Literal(false, BooleanType),
       Literal("a", StringType), Literal("b", StringType)), "b", row)
 
-    checkEvaluation(In(c1, c1 :: c2 :: Nil), true, row)
-    checkEvaluation(In(Literal("^Ba*n", StringType), 
-      Literal("^Ba*n", StringType) :: Nil), true, row)
-    checkEvaluation(In(Literal("^Ba*n", StringType),
-      Literal("^Ba*n", StringType) :: c2 :: Nil), true, row)
+    checkEvaluation(c1 in (c1, c2), true, row)
+    checkEvaluation(
+      Literal("^Ba*n", StringType) in (Literal("^Ba*n", StringType)), true, row)
+    checkEvaluation(
+      Literal("^Ba*n", StringType) in (Literal("^Ba*n", StringType), c2), true, row)
   }
 
   test("case when") {
@@ -378,7 +526,7 @@ class ExpressionEvaluationSuite extends FunSuite {
 
   test("complex type") {
     val row = new GenericRow(Array[Any](
-      "^Ba*n",                                  // 0 
+      "^Ba*n",                                  // 0
       null.asInstanceOf[String],                // 1
       new GenericRow(Array[Any]("aa", "bb")),   // 2
       Map("aa"->"bb"),                          // 3
@@ -391,21 +539,21 @@ class ExpressionEvaluationSuite extends FunSuite {
     val typeMap = MapType(StringType, StringType)
     val typeArray = ArrayType(StringType)
 
-    checkEvaluation(GetItem(BoundReference(3, AttributeReference("c", typeMap)()), 
+    checkEvaluation(GetItem(BoundReference(3, typeMap, true),
       Literal("aa")), "bb", row)
     checkEvaluation(GetItem(Literal(null, typeMap), Literal("aa")), null, row)
     checkEvaluation(GetItem(Literal(null, typeMap), Literal(null, StringType)), null, row)
-    checkEvaluation(GetItem(BoundReference(3, AttributeReference("c", typeMap)()), 
+    checkEvaluation(GetItem(BoundReference(3, typeMap, true),
       Literal(null, StringType)), null, row)
 
-    checkEvaluation(GetItem(BoundReference(4, AttributeReference("c", typeArray)()), 
+    checkEvaluation(GetItem(BoundReference(4, typeArray, true),
       Literal(1)), "bb", row)
     checkEvaluation(GetItem(Literal(null, typeArray), Literal(1)), null, row)
     checkEvaluation(GetItem(Literal(null, typeArray), Literal(null, IntegerType)), null, row)
-    checkEvaluation(GetItem(BoundReference(4, AttributeReference("c", typeArray)()), 
+    checkEvaluation(GetItem(BoundReference(4, typeArray, true),
       Literal(null, IntegerType)), null, row)
 
-    checkEvaluation(GetField(BoundReference(2, AttributeReference("c", typeS)()), "a"), "aa", row)
+    checkEvaluation(GetField(BoundReference(2, typeS, nullable = true), "a"), "aa", row)
     checkEvaluation(GetField(Literal(null, typeS), "a"), null, row)
 
     val typeS_notNullable = StructType(
@@ -413,13 +561,15 @@ class ExpressionEvaluationSuite extends FunSuite {
         :: StructField("b", StringType, nullable = false) :: Nil
     )
 
-    assert(GetField(BoundReference(2,
-      AttributeReference("c", typeS)()), "a").nullable === true)
-    assert(GetField(BoundReference(2,
-      AttributeReference("c", typeS_notNullable, nullable = false)()), "a").nullable === false)
+    assert(GetField(BoundReference(2,typeS, nullable = true), "a").nullable === true)
+    assert(GetField(BoundReference(2, typeS_notNullable, nullable = false), "a").nullable === false)
 
     assert(GetField(Literal(null, typeS), "a").nullable === true)
     assert(GetField(Literal(null, typeS_notNullable), "a").nullable === true)
+
+    checkEvaluation('c.map(typeMap).at(3).getItem("aa"), "bb", row)
+    checkEvaluation('c.array(typeArray.elementType).at(4).getItem(1), "bb", row)
+    checkEvaluation('c.struct(typeS).at(2).getField("a"), "aa", row)
   }
 
   test("arithmetic") {
@@ -446,12 +596,37 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(c1 % c2, 1, row)
   }
 
+  test("fractional arithmetic") {
+    val row = new GenericRow(Array[Any](1.1, 2.0, 3.1, null))
+    val c1 = 'a.double.at(0)
+    val c2 = 'a.double.at(1)
+    val c3 = 'a.double.at(2)
+    val c4 = 'a.double.at(3)
+
+    checkEvaluation(UnaryMinus(c1), -1.1, row)
+    checkEvaluation(UnaryMinus(Literal(100.0, DoubleType)), -100.0)
+    checkEvaluation(Add(c1, c4), null, row)
+    checkEvaluation(Add(c1, c2), 3.1, row)
+    checkEvaluation(Add(c1, Literal(null, DoubleType)), null, row)
+    checkEvaluation(Add(Literal(null, DoubleType), c2), null, row)
+    checkEvaluation(Add(Literal(null, DoubleType), Literal(null, DoubleType)), null, row)
+
+    checkEvaluation(-c1, -1.1, row)
+    checkEvaluation(c1 + c2, 3.1, row)
+    checkDoubleEvaluation(c1 - c2, (-0.9 +- 0.001), row)
+    checkDoubleEvaluation(c1 * c2, (2.2 +- 0.001), row)
+    checkDoubleEvaluation(c1 / c2, (0.55 +- 0.001), row)
+    checkDoubleEvaluation(c3 % c2, (1.1 +- 0.001), row)
+  }
+
   test("BinaryComparison") {
-    val row = new GenericRow(Array[Any](1, 2, 3, null))
+    val row = new GenericRow(Array[Any](1, 2, 3, null, 3, null))
     val c1 = 'a.int.at(0)
     val c2 = 'a.int.at(1)
     val c3 = 'a.int.at(2)
     val c4 = 'a.int.at(3)
+    val c5 = 'a.int.at(4)
+    val c6 = 'a.int.at(5)
 
     checkEvaluation(LessThan(c1, c4), null, row)
     checkEvaluation(LessThan(c1, c2), true, row)
@@ -465,6 +640,138 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(c1 >= c2, false, row)
     checkEvaluation(c1 === c2, false, row)
     checkEvaluation(c1 !== c2, true, row)
+    checkEvaluation(c4 <=> c1, false, row)
+    checkEvaluation(c1 <=> c4, false, row)
+    checkEvaluation(c4 <=> c6, true, row)
+    checkEvaluation(c3 <=> c5, true, row)
+    checkEvaluation(Literal(true) <=> Literal(null, BooleanType), false, row)
+    checkEvaluation(Literal(null, BooleanType) <=> Literal(true), false, row)
   }
-}
 
+  test("StringComparison") {
+    val row = new GenericRow(Array[Any]("abc", null))
+    val c1 = 'a.string.at(0)
+    val c2 = 'a.string.at(1)
+
+    checkEvaluation(c1 contains "b", true, row)
+    checkEvaluation(c1 contains "x", false, row)
+    checkEvaluation(c2 contains "b", null, row)
+    checkEvaluation(c1 contains Literal(null, StringType), null, row)
+
+    checkEvaluation(c1 startsWith "a", true, row)
+    checkEvaluation(c1 startsWith "b", false, row)
+    checkEvaluation(c2 startsWith "a", null, row)
+    checkEvaluation(c1 startsWith Literal(null, StringType), null, row)
+
+    checkEvaluation(c1 endsWith "c", true, row)
+    checkEvaluation(c1 endsWith "b", false, row)
+    checkEvaluation(c2 endsWith "b", null, row)
+    checkEvaluation(c1 endsWith Literal(null, StringType), null, row)
+  }
+
+  test("Substring") {
+    val row = new GenericRow(Array[Any]("example", "example".toArray.map(_.toByte)))
+
+    val s = 'a.string.at(0)
+
+    // substring from zero position with less-than-full length
+    checkEvaluation(Substring(s, Literal(0, IntegerType), Literal(2, IntegerType)), "ex", row)
+    checkEvaluation(Substring(s, Literal(1, IntegerType), Literal(2, IntegerType)), "ex", row)
+
+    // substring from zero position with full length
+    checkEvaluation(Substring(s, Literal(0, IntegerType), Literal(7, IntegerType)), "example", row)
+    checkEvaluation(Substring(s, Literal(1, IntegerType), Literal(7, IntegerType)), "example", row)
+
+    // substring from zero position with greater-than-full length
+    checkEvaluation(Substring(s, Literal(0, IntegerType), Literal(100, IntegerType)), "example", row)
+    checkEvaluation(Substring(s, Literal(1, IntegerType), Literal(100, IntegerType)), "example", row)
+
+    // substring from nonzero position with less-than-full length
+    checkEvaluation(Substring(s, Literal(2, IntegerType), Literal(2, IntegerType)), "xa", row)
+
+    // substring from nonzero position with full length
+    checkEvaluation(Substring(s, Literal(2, IntegerType), Literal(6, IntegerType)), "xample", row)
+
+    // substring from nonzero position with greater-than-full length
+    checkEvaluation(Substring(s, Literal(2, IntegerType), Literal(100, IntegerType)), "xample", row)
+
+    // zero-length substring (within string bounds)
+    checkEvaluation(Substring(s, Literal(0, IntegerType), Literal(0, IntegerType)), "", row)
+
+    // zero-length substring (beyond string bounds)
+    checkEvaluation(Substring(s, Literal(100, IntegerType), Literal(4, IntegerType)), "", row)
+
+    // substring(null, _, _) -> null
+    checkEvaluation(Substring(s, Literal(100, IntegerType), Literal(4, IntegerType)), null, new GenericRow(Array[Any](null)))
+
+    // substring(_, null, _) -> null
+    checkEvaluation(Substring(s, Literal(null, IntegerType), Literal(4, IntegerType)), null, row)
+
+    // substring(_, _, null) -> null
+    checkEvaluation(Substring(s, Literal(100, IntegerType), Literal(null, IntegerType)), null, row)
+
+    // 2-arg substring from zero position
+    checkEvaluation(Substring(s, Literal(0, IntegerType), Literal(Integer.MAX_VALUE, IntegerType)), "example", row)
+    checkEvaluation(Substring(s, Literal(1, IntegerType), Literal(Integer.MAX_VALUE, IntegerType)), "example", row)
+
+    // 2-arg substring from nonzero position
+    checkEvaluation(Substring(s, Literal(2, IntegerType), Literal(Integer.MAX_VALUE, IntegerType)), "xample", row)
+
+    val s_notNull = 'a.string.notNull.at(0)
+
+    assert(Substring(s, Literal(0, IntegerType), Literal(2, IntegerType)).nullable === true)
+    assert(Substring(s_notNull, Literal(0, IntegerType), Literal(2, IntegerType)).nullable === false)
+    assert(Substring(s_notNull, Literal(null, IntegerType), Literal(2, IntegerType)).nullable === true)
+    assert(Substring(s_notNull, Literal(0, IntegerType), Literal(null, IntegerType)).nullable === true)
+
+    checkEvaluation(s.substr(0, 2), "ex", row)
+    checkEvaluation(s.substr(0), "example", row)
+    checkEvaluation(s.substring(0, 2), "ex", row)
+    checkEvaluation(s.substring(0), "example", row)
+  }
+
+  test("SQRT") {
+    val inputSequence = (1 to (1<<24) by 511).map(_ * (1L<<24))
+    val expectedResults = inputSequence.map(l => math.sqrt(l.toDouble))
+    val rowSequence = inputSequence.map(l => new GenericRow(Array[Any](l.toDouble)))
+    val d = 'a.double.at(0)
+
+    for ((row, expected) <- rowSequence zip expectedResults) {
+      checkEvaluation(Sqrt(d), expected, row)
+    }
+
+    checkEvaluation(Sqrt(Literal(null, DoubleType)), null, new GenericRow(Array[Any](null)))
+  }
+
+  test("Bitwise operations") {
+    val row = new GenericRow(Array[Any](1, 2, 3, null))
+    val c1 = 'a.int.at(0)
+    val c2 = 'a.int.at(1)
+    val c3 = 'a.int.at(2)
+    val c4 = 'a.int.at(3)
+
+    checkEvaluation(BitwiseAnd(c1, c4), null, row)
+    checkEvaluation(BitwiseAnd(c1, c2), 0, row)
+    checkEvaluation(BitwiseAnd(c1, Literal(null, IntegerType)), null, row)
+    checkEvaluation(BitwiseAnd(Literal(null, IntegerType), Literal(null, IntegerType)), null, row)
+
+    checkEvaluation(BitwiseOr(c1, c4), null, row)
+    checkEvaluation(BitwiseOr(c1, c2), 3, row)
+    checkEvaluation(BitwiseOr(c1, Literal(null, IntegerType)), null, row)
+    checkEvaluation(BitwiseOr(Literal(null, IntegerType), Literal(null, IntegerType)), null, row)
+
+    checkEvaluation(BitwiseXor(c1, c4), null, row)
+    checkEvaluation(BitwiseXor(c1, c2), 3, row)
+    checkEvaluation(BitwiseXor(c1, Literal(null, IntegerType)), null, row)
+    checkEvaluation(BitwiseXor(Literal(null, IntegerType), Literal(null, IntegerType)), null, row)
+
+    checkEvaluation(BitwiseNot(c4), null, row)
+    checkEvaluation(BitwiseNot(c1), -2, row)
+    checkEvaluation(BitwiseNot(Literal(null, IntegerType)), null, row)
+
+    checkEvaluation(c1 & c2, 0, row)
+    checkEvaluation(c1 | c2, 3, row)
+    checkEvaluation(c1 ^ c2, 3, row)
+    checkEvaluation(~c1, -2, row)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedEvaluationSuite.scala
new file mode 100644
index 0000000000000..ef3114fd4dbab
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedEvaluationSuite.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen._
+
+/**
+ * Overrides our expression evaluation tests to use code generation for evaluation.
+ */
+class GeneratedEvaluationSuite extends ExpressionEvaluationSuite {
+  override def checkEvaluation(
+      expression: Expression,
+      expected: Any,
+      inputRow: Row = EmptyRow): Unit = {
+    val plan = try {
+      GenerateMutableProjection(Alias(expression, s"Optimized($expression)")() :: Nil)()
+    } catch {
+      case e: Throwable =>
+        val evaluated = GenerateProjection.expressionEvaluator(expression)
+        fail(
+          s"""
+            |Code generation of $expression failed:
+            |${evaluated.code.mkString("\n")}
+            |$e
+          """.stripMargin)
+    }
+
+    val actual  = plan(inputRow).apply(0)
+    if(actual != expected) {
+      val input = if(inputRow == EmptyRow) "" else s", input: $inputRow"
+      fail(s"Incorrect Evaluation: $expression, actual: $actual, expected: $expected$input")
+    }
+  }
+
+
+  test("multithreaded eval") {
+    import scala.concurrent._
+    import ExecutionContext.Implicits.global
+    import scala.concurrent.duration._
+
+    val futures = (1 to 20).map { _ =>
+      future {
+        GeneratePredicate(EqualTo(Literal(1), Literal(1)))
+        GenerateProjection(EqualTo(Literal(1), Literal(1)) :: Nil)
+        GenerateMutableProjection(EqualTo(Literal(1), Literal(1)) :: Nil)
+        GenerateOrdering(Add(Literal(1), Literal(1)).asc :: Nil)
+      }
+    }
+
+    futures.foreach(Await.result(_, 10.seconds))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedMutableEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedMutableEvaluationSuite.scala
new file mode 100644
index 0000000000000..275ea2627ebcd
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedMutableEvaluationSuite.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen._
+
+/**
+ * Overrides our expression evaluation tests to use generated code on mutable rows.
+ */
+class GeneratedMutableEvaluationSuite extends ExpressionEvaluationSuite {
+  override def checkEvaluation(
+                                expression: Expression,
+                                expected: Any,
+                                inputRow: Row = EmptyRow): Unit = {
+    lazy val evaluated = GenerateProjection.expressionEvaluator(expression)
+
+    val plan = try {
+      GenerateProjection(Alias(expression, s"Optimized($expression)")() :: Nil)
+    } catch {
+      case e: Throwable =>
+        fail(
+          s"""
+            |Code generation of $expression failed:
+            |${evaluated.code.mkString("\n")}
+            |$e
+          """.stripMargin)
+    }
+
+    val actual = plan(inputRow)
+    val expectedRow = new GenericRow(Array[Any](expected))
+    if (actual.hashCode() != expectedRow.hashCode()) {
+      fail(
+        s"""
+          |Mismatched hashCodes for values: $actual, $expectedRow
+          |Hash Codes: ${actual.hashCode()} != ${expectedRow.hashCode()}
+          |${evaluated.code.mkString("\n")}
+        """.stripMargin)
+    }
+    if (actual != expectedRow) {
+      val input = if(inputRow == EmptyRow) "" else s", input: $inputRow"
+      fail(s"Incorrect Evaluation: $expression, actual: $actual, expected: $expected$input")
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
index 4896f1b955f01..e2ae0d25db1a5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
@@ -27,9 +27,9 @@ class CombiningLimitsSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
-      Batch("Combine Limit", FixedPoint(2),
+      Batch("Combine Limit", FixedPoint(10),
         CombineLimits) ::
-      Batch("Constant Folding", FixedPoint(3),
+      Batch("Constant Folding", FixedPoint(10),
         NullPropagation,
         ConstantFolding,
         BooleanSimplification) :: Nil
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 0ff82064012a8..0a27cce337482 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -83,7 +83,7 @@ class ConstantFoldingSuite extends PlanTest {
           Literal(10) as Symbol("2*3+4"),
           Literal(14) as Symbol("2*(3+4)"))
         .where(Literal(true))
-        .groupBy(Literal(3))(Literal(3) as Symbol("9/3"))
+        .groupBy(Literal(3.0))(Literal(3.0) as Symbol("9/3"))
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -201,7 +201,15 @@ class ConstantFoldingSuite extends PlanTest {
           Like(Literal(null, StringType), "abc") as 'c13,
           Like("abc", Literal(null, StringType)) as 'c14,
 
-          Upper(Literal(null, StringType)) as 'c15)
+          Upper(Literal(null, StringType)) as 'c15,
+
+          Substring(Literal(null, StringType), 0, 1) as 'c16,
+          Substring("abc", Literal(null, IntegerType), 1) as 'c17,
+          Substring("abc", 0, Literal(null, IntegerType)) as 'c18,
+
+          Contains(Literal(null, StringType), "abc") as 'c19,
+          Contains("abc", Literal(null, StringType)) as 'c20
+        )
 
     val optimized = Optimize(originalQuery.analyze)
 
@@ -228,8 +236,15 @@ class ConstantFoldingSuite extends PlanTest {
           Literal(null, BooleanType) as 'c13,
           Literal(null, BooleanType) as 'c14,
 
-          Literal(null, StringType) as 'c15)
-        .analyze
+          Literal(null, StringType) as 'c15,
+
+          Literal(null, StringType) as 'c16,
+          Literal(null, StringType) as 'c17,
+          Literal(null, StringType) as 'c18,
+
+          Literal(null, BooleanType) as 'c19,
+          Literal(null, BooleanType) as 'c20
+        ).analyze
 
     comparePlans(optimized, correctAnswer)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExpressionOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExpressionOptimizationSuite.scala
index 890d6289b9dfb..ae99a3f9ba287 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExpressionOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExpressionOptimizationSuite.scala
@@ -30,7 +30,7 @@ class ExpressionOptimizationSuite extends ExpressionEvaluationSuite {
       expected: Any,
       inputRow: Row = EmptyRow): Unit = {
     val plan = Project(Alias(expression, s"Optimized($expression)")() :: Nil, NoRelation)
-    val optimizedPlan = Optimizer(plan)
+    val optimizedPlan = DefaultOptimizer(plan)
     super.checkEvaluation(optimizedPlan.expressions.head, expected, inputRow)
   }
-}
\ No newline at end of file
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
new file mode 100644
index 0000000000000..b10577c8001e2
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.rules._
+
+/* Implicit conversions */
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+
+class LikeSimplificationSuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Like Simplification", Once,
+        LikeSimplification) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.string)
+
+  test("simplify Like into StartsWith") {
+    val originalQuery =
+      testRelation
+        .where(('a like "abc%") || ('a like "abc\\%"))
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where(StartsWith('a, "abc") || ('a like "abc\\%"))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify Like into EndsWith") {
+    val originalQuery =
+      testRelation
+        .where('a like "%xyz")
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where(EndsWith('a, "xyz"))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify Like into Contains") {
+    val originalQuery =
+      testRelation
+        .where(('a like "%mn%") || ('a like "%mn\\%"))
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where(Contains('a, "mn") || ('a like "%mn\\%"))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify Like into EqualTo") {
+    val originalQuery =
+      testRelation
+        .where(('a like "") || ('a like "abc"))
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where(('a === "") || ('a === "abc"))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
new file mode 100644
index 0000000000000..017b180c574b4
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import scala.collection.immutable.HashSet
+import org.apache.spark.sql.catalyst.analysis.{EliminateAnalysisOperators, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.catalyst.types._
+
+// For implicit conversions
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.dsl.expressions._
+
+class OptimizeInSuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("AnalysisNodes", Once,
+        EliminateAnalysisOperators) ::
+      Batch("ConstantFolding", Once,
+        ConstantFolding,
+        BooleanSimplification,
+        OptimizeIn) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+
+  test("OptimizedIn test: In clause optimized to InSet") {
+    val originalQuery =
+      testRelation
+        .where(In(UnresolvedAttribute("a"), Seq(Literal(1),Literal(2))))
+        .analyze
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer =
+      testRelation
+        .where(InSet(UnresolvedAttribute("a"), HashSet[Any]()+1+2))
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+  
+  test("OptimizedIn test: In clause not optimized in case filter has attributes") {
+    val originalQuery =
+      testRelation
+        .where(In(UnresolvedAttribute("a"), Seq(Literal(1),Literal(2), UnresolvedAttribute("b"))))
+        .analyze
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer =
+      testRelation
+        .where(In(UnresolvedAttribute("a"), Seq(Literal(1),Literal(2), UnresolvedAttribute("b"))))
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnionPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnionPushdownSuite.scala
new file mode 100644
index 0000000000000..dfef87bd9133d
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnionPushdownSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.analysis
+import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.{PlanTest, LeftOuter, RightOuter}
+import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.dsl.expressions._
+
+class UnionPushdownSuite extends PlanTest {
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Subqueries", Once,
+        EliminateAnalysisOperators) ::
+      Batch("Union Pushdown", Once,
+        UnionPushdown) :: Nil
+  }
+
+  val testRelation =  LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation2 =  LocalRelation('d.int, 'e.int, 'f.int)
+  val testUnion = Union(testRelation, testRelation2)
+
+  test("union: filter to each side") {
+    val query = testUnion.where('a === 1)
+
+    val optimized = Optimize(query.analyze)
+
+    val correctAnswer =
+      Union(testRelation.where('a === 1), testRelation2.where('d === 1)).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("union: project to each side") {
+    val query = testUnion.select('b)
+
+    val optimized = Optimize(query.analyze)
+
+    val correctAnswer =
+      Union(testRelation.select('b), testRelation2.select('e)).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index 7e9f47ef21df8..c4a1f899d8a13 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -33,7 +33,8 @@ class PlanTest extends FunSuite {
    * we must normalize them to check if two different queries are identical.
    */
   protected def normalizeExprIds(plan: LogicalPlan) = {
-    val minId = plan.flatMap(_.expressions.flatMap(_.references).map(_.exprId.id)).min
+    val list = plan.flatMap(_.expressions.flatMap(_.references).map(_.exprId.id))
+    val minId = if (list.isEmpty) 0 else list.min
     plan transformAllExpressions {
       case a: AttributeReference =>
         AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(a.exprId.id - minId))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
new file mode 100644
index 0000000000000..11e6831b24768
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.{ExprId, AttributeReference}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.util._
+
+/**
+ * Tests for the sameResult function of [[LogicalPlan]].
+ */
+class SameResultSuite extends FunSuite {
+  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation2 = LocalRelation('a.int, 'b.int, 'c.int)
+
+  def assertSameResult(a: LogicalPlan, b: LogicalPlan, result: Boolean = true) = {
+    val aAnalyzed = a.analyze
+    val bAnalyzed = b.analyze
+
+    if (aAnalyzed.sameResult(bAnalyzed) != result) {
+      val comparison = sideBySide(aAnalyzed.toString, bAnalyzed.toString).mkString("\n")
+      fail(s"Plans should return sameResult = $result\n$comparison")
+    }
+  }
+
+  test("relations") {
+    assertSameResult(testRelation, testRelation2)
+  }
+
+  test("projections") {
+    assertSameResult(testRelation.select('a), testRelation2.select('a))
+    assertSameResult(testRelation.select('b), testRelation2.select('b))
+    assertSameResult(testRelation.select('a, 'b), testRelation2.select('a, 'b))
+    assertSameResult(testRelation.select('b, 'a), testRelation2.select('b, 'a))
+
+    assertSameResult(testRelation, testRelation2.select('a), result = false)
+    assertSameResult(testRelation.select('b, 'a), testRelation2.select('a, 'b), result = false)
+  }
+
+  test("filters") {
+    assertSameResult(testRelation.where('a === 'b), testRelation2.where('a === 'b))
+  }
+
+  test("sorts") {
+    assertSameResult(testRelation.orderBy('a.asc), testRelation2.orderBy('a.asc))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 6344874538d67..036fd3fa1d6a1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.types.{StringType, NullType}
 
 case class Dummy(optKey: Option[Expression]) extends Expression {
   def children = optKey.toSeq
-  def references = Set.empty[Attribute]
   def nullable = true
   def dataType = NullType
   override lazy val resolved = true
@@ -52,7 +51,10 @@ class TreeNodeSuite extends FunSuite {
     val after = before transform { case Literal(5, _) => Literal(1)}
 
     assert(before === after)
-    assert(before.map(_.id) === after.map(_.id))
+    // Ensure that the objects after are the same objects before the transformation.
+    before.map(identity[Expression]).zip(after.map(identity[Expression])).foreach {
+      case (b, a) => assert(b eq a)
+    }
   }
 
   test("collect") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/types/decimal/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/types/decimal/DecimalSuite.scala
new file mode 100644
index 0000000000000..e32f1ac382130
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/types/decimal/DecimalSuite.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.types.decimal
+
+import org.scalatest.{PrivateMethodTester, FunSuite}
+
+import scala.language.postfixOps
+
+class DecimalSuite extends FunSuite with PrivateMethodTester {
+  test("creating decimals") {
+    /** Check that a Decimal has the given string representation, precision and scale */
+    def checkDecimal(d: Decimal, string: String, precision: Int, scale: Int): Unit = {
+      assert(d.toString === string)
+      assert(d.precision === precision)
+      assert(d.scale === scale)
+    }
+
+    checkDecimal(new Decimal(), "0", 1, 0)
+    checkDecimal(Decimal(BigDecimal("10.030")), "10.030", 5, 3)
+    checkDecimal(Decimal(BigDecimal("10.030"), 4, 1), "10.0", 4, 1)
+    checkDecimal(Decimal(BigDecimal("-9.95"), 4, 1), "-10.0", 4, 1)
+    checkDecimal(Decimal("10.030"), "10.030", 5, 3)
+    checkDecimal(Decimal(10.03), "10.03", 4, 2)
+    checkDecimal(Decimal(17L), "17", 20, 0)
+    checkDecimal(Decimal(17), "17", 10, 0)
+    checkDecimal(Decimal(17L, 2, 1), "1.7", 2, 1)
+    checkDecimal(Decimal(170L, 4, 2), "1.70", 4, 2)
+    checkDecimal(Decimal(17L, 24, 1), "1.7", 24, 1)
+    checkDecimal(Decimal(1e17.toLong, 18, 0), 1e17.toLong.toString, 18, 0)
+    checkDecimal(Decimal(Long.MaxValue), Long.MaxValue.toString, 20, 0)
+    checkDecimal(Decimal(Long.MinValue), Long.MinValue.toString, 20, 0)
+    intercept[IllegalArgumentException](Decimal(170L, 2, 1))
+    intercept[IllegalArgumentException](Decimal(170L, 2, 0))
+    intercept[IllegalArgumentException](Decimal(BigDecimal("10.030"), 2, 1))
+    intercept[IllegalArgumentException](Decimal(BigDecimal("-9.95"), 2, 1))
+    intercept[IllegalArgumentException](Decimal(1e17.toLong, 17, 0))
+  }
+
+  test("double and long values") {
+    /** Check that a Decimal converts to the given double and long values */
+    def checkValues(d: Decimal, doubleValue: Double, longValue: Long): Unit = {
+      assert(d.toDouble === doubleValue)
+      assert(d.toLong === longValue)
+    }
+
+    checkValues(new Decimal(), 0.0, 0L)
+    checkValues(Decimal(BigDecimal("10.030")), 10.03, 10L)
+    checkValues(Decimal(BigDecimal("10.030"), 4, 1), 10.0, 10L)
+    checkValues(Decimal(BigDecimal("-9.95"), 4, 1), -10.0, -10L)
+    checkValues(Decimal(10.03), 10.03, 10L)
+    checkValues(Decimal(17L), 17.0, 17L)
+    checkValues(Decimal(17), 17.0, 17L)
+    checkValues(Decimal(17L, 2, 1), 1.7, 1L)
+    checkValues(Decimal(170L, 4, 2), 1.7, 1L)
+    checkValues(Decimal(1e16.toLong), 1e16, 1e16.toLong)
+    checkValues(Decimal(1e17.toLong), 1e17, 1e17.toLong)
+    checkValues(Decimal(1e18.toLong), 1e18, 1e18.toLong)
+    checkValues(Decimal(2e18.toLong), 2e18, 2e18.toLong)
+    checkValues(Decimal(Long.MaxValue), Long.MaxValue.toDouble, Long.MaxValue)
+    checkValues(Decimal(Long.MinValue), Long.MinValue.toDouble, Long.MinValue)
+    checkValues(Decimal(Double.MaxValue), Double.MaxValue, 0L)
+    checkValues(Decimal(Double.MinValue), Double.MinValue, 0L)
+  }
+
+  // Accessor for the BigDecimal value of a Decimal, which will be null if it's using Longs
+  private val decimalVal = PrivateMethod[BigDecimal]('decimalVal)
+
+  /** Check whether a decimal is represented compactly (passing whether we expect it to be) */
+  private def checkCompact(d: Decimal, expected: Boolean): Unit = {
+    val isCompact = d.invokePrivate(decimalVal()).eq(null)
+    assert(isCompact == expected, s"$d ${if (expected) "was not" else "was"} compact")
+  }
+
+  test("small decimals represented as unscaled long") {
+    checkCompact(new Decimal(), true)
+    checkCompact(Decimal(BigDecimal(10.03)), false)
+    checkCompact(Decimal(BigDecimal(1e20)), false)
+    checkCompact(Decimal(17L), true)
+    checkCompact(Decimal(17), true)
+    checkCompact(Decimal(17L, 2, 1), true)
+    checkCompact(Decimal(170L, 4, 2), true)
+    checkCompact(Decimal(17L, 24, 1), true)
+    checkCompact(Decimal(1e16.toLong), true)
+    checkCompact(Decimal(1e17.toLong), true)
+    checkCompact(Decimal(1e18.toLong - 1), true)
+    checkCompact(Decimal(- 1e18.toLong + 1), true)
+    checkCompact(Decimal(1e18.toLong - 1, 30, 10), true)
+    checkCompact(Decimal(- 1e18.toLong + 1, 30, 10), true)
+    checkCompact(Decimal(1e18.toLong), false)
+    checkCompact(Decimal(-1e18.toLong), false)
+    checkCompact(Decimal(1e18.toLong, 30, 10), false)
+    checkCompact(Decimal(-1e18.toLong, 30, 10), false)
+    checkCompact(Decimal(Long.MaxValue), false)
+    checkCompact(Decimal(Long.MinValue), false)
+  }
+
+  test("hash code") {
+    assert(Decimal(123).hashCode() === (123).##)
+    assert(Decimal(-123).hashCode() === (-123).##)
+    assert(Decimal(Int.MaxValue).hashCode() === Int.MaxValue.##)
+    assert(Decimal(Long.MaxValue).hashCode() === Long.MaxValue.##)
+    assert(Decimal(BigDecimal(123)).hashCode() === (123).##)
+
+    val reallyBig = BigDecimal("123182312312313232112312312123.1231231231")
+    assert(Decimal(reallyBig).hashCode() === reallyBig.hashCode)
+  }
+
+  test("equals") {
+    // The decimals on the left are stored compactly, while the ones on the right aren't
+    checkCompact(Decimal(123), true)
+    checkCompact(Decimal(BigDecimal(123)), false)
+    checkCompact(Decimal("123"), false)
+    assert(Decimal(123) === Decimal(BigDecimal(123)))
+    assert(Decimal(123) === Decimal(BigDecimal("123.00")))
+    assert(Decimal(-123) === Decimal(BigDecimal(-123)))
+    assert(Decimal(-123) === Decimal(BigDecimal("-123.00")))
+  }
+
+  test("isZero") {
+    assert(Decimal(0).isZero)
+    assert(Decimal(0, 4, 2).isZero)
+    assert(Decimal("0").isZero)
+    assert(Decimal("0.000").isZero)
+    assert(!Decimal(1).isZero)
+    assert(!Decimal(1, 4, 2).isZero)
+    assert(!Decimal("1").isZero)
+    assert(!Decimal("0.001").isZero)
+  }
+
+  test("arithmetic") {
+    assert(Decimal(100) + Decimal(-100) === Decimal(0))
+    assert(Decimal(100) + Decimal(-100) === Decimal(0))
+    assert(Decimal(100) * Decimal(-100) === Decimal(-10000))
+    assert(Decimal(1e13) * Decimal(1e13) === Decimal(1e26))
+    assert(Decimal(100) / Decimal(-100) === Decimal(-1))
+    assert(Decimal(100) / Decimal(0) === null)
+    assert(Decimal(100) % Decimal(-100) === Decimal(0))
+    assert(Decimal(100) % Decimal(3) === Decimal(1))
+    assert(Decimal(-100) % Decimal(3) === Decimal(-1))
+    assert(Decimal(100) % Decimal(0) === null)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/MetadataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/MetadataSuite.scala
new file mode 100755
index 0000000000000..f005b7df21043
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/MetadataSuite.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.json4s.jackson.JsonMethods.parse
+import org.scalatest.FunSuite
+
+class MetadataSuite extends FunSuite {
+
+  val baseMetadata = new MetadataBuilder()
+    .putString("purpose", "ml")
+    .putBoolean("isBase", true)
+    .build()
+
+  val summary = new MetadataBuilder()
+    .putLong("numFeatures", 10L)
+    .build()
+
+  val age = new MetadataBuilder()
+    .putString("name", "age")
+    .putLong("index", 1L)
+    .putBoolean("categorical", false)
+    .putDouble("average", 45.0)
+    .build()
+
+  val gender = new MetadataBuilder()
+    .putString("name", "gender")
+    .putLong("index", 5)
+    .putBoolean("categorical", true)
+    .putStringArray("categories", Array("male", "female"))
+    .build()
+
+  val metadata = new MetadataBuilder()
+    .withMetadata(baseMetadata)
+    .putBoolean("isBase", false) // overwrite an existing key
+    .putMetadata("summary", summary)
+    .putLongArray("long[]", Array(0L, 1L))
+    .putDoubleArray("double[]", Array(3.0, 4.0))
+    .putBooleanArray("boolean[]", Array(true, false))
+    .putMetadataArray("features", Array(age, gender))
+    .build()
+
+  test("metadata builder and getters") {
+    assert(age.contains("summary") === false)
+    assert(age.contains("index") === true)
+    assert(age.getLong("index") === 1L)
+    assert(age.contains("average") === true)
+    assert(age.getDouble("average") === 45.0)
+    assert(age.contains("categorical") === true)
+    assert(age.getBoolean("categorical") === false)
+    assert(age.contains("name") === true)
+    assert(age.getString("name") === "age")
+    assert(metadata.contains("purpose") === true)
+    assert(metadata.getString("purpose") === "ml")
+    assert(metadata.contains("isBase") === true)
+    assert(metadata.getBoolean("isBase") === false)
+    assert(metadata.contains("summary") === true)
+    assert(metadata.getMetadata("summary") === summary)
+    assert(metadata.contains("long[]") === true)
+    assert(metadata.getLongArray("long[]").toSeq === Seq(0L, 1L))
+    assert(metadata.contains("double[]") === true)
+    assert(metadata.getDoubleArray("double[]").toSeq === Seq(3.0, 4.0))
+    assert(metadata.contains("boolean[]") === true)
+    assert(metadata.getBooleanArray("boolean[]").toSeq === Seq(true, false))
+    assert(gender.contains("categories") === true)
+    assert(gender.getStringArray("categories").toSeq === Seq("male", "female"))
+    assert(metadata.contains("features") === true)
+    assert(metadata.getMetadataArray("features").toSeq === Seq(age, gender))
+  }
+
+  test("metadata json conversion") {
+    val json = metadata.json
+    withClue("toJson must produce a valid JSON string") {
+      parse(json)
+    }
+    val parsed = Metadata.fromJson(json)
+    assert(parsed === metadata)
+    assert(parsed.## === metadata.##)
+  }
+}
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 8210fd1f210d1..bd110218d34f7 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -31,6 +31,9 @@
   <packaging>jar</packaging>
   <name>Spark Project SQL</name>
   <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>sql</sbt.project.name>
+  </properties>
 
   <dependencies>
     <dependency>
@@ -65,6 +68,11 @@
       <artifactId>jackson-databind</artifactId>
       <version>2.3.0</version>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/ArrayType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/ArrayType.java
new file mode 100644
index 0000000000000..b73a371e93001
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/ArrayType.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing Lists.
+ * An ArrayType object comprises two fields, {@code DataType elementType} and
+ * {@code boolean containsNull}. The field of {@code elementType} is used to specify the type of
+ * array elements. The field of {@code containsNull} is used to specify if the array has
+ * {@code null} values.
+ *
+ * To create an {@link ArrayType},
+ * {@link DataType#createArrayType(DataType)} or
+ * {@link DataType#createArrayType(DataType, boolean)}
+ * should be used.
+ */
+public class ArrayType extends DataType {
+  private DataType elementType;
+  private boolean containsNull;
+
+  protected ArrayType(DataType elementType, boolean containsNull) {
+    this.elementType = elementType;
+    this.containsNull = containsNull;
+  }
+
+  public DataType getElementType() {
+    return elementType;
+  }
+
+  public boolean isContainsNull() {
+    return containsNull;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    ArrayType arrayType = (ArrayType) o;
+
+    if (containsNull != arrayType.containsNull) return false;
+    if (!elementType.equals(arrayType.elementType)) return false;
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    int result = elementType.hashCode();
+    result = 31 * result + (containsNull ? 1 : 0);
+    return result;
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/BinaryType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/BinaryType.java
new file mode 100644
index 0000000000000..7daad60f62a0b
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/BinaryType.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing byte[] values.
+ *
+ * {@code BinaryType} is represented by the singleton object {@link DataType#BinaryType}.
+ */
+public class BinaryType extends DataType {
+  protected BinaryType() {}
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/BooleanType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/BooleanType.java
new file mode 100644
index 0000000000000..5a1f52725631b
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/BooleanType.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing boolean and Boolean values.
+ *
+ * {@code BooleanType} is represented by the singleton object {@link DataType#BooleanType}.
+ */
+public class BooleanType extends DataType {
+  protected BooleanType() {}
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/ByteType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/ByteType.java
new file mode 100644
index 0000000000000..e5cdf06b21bbe
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/ByteType.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing byte and Byte values.
+ *
+ * {@code ByteType} is represented by the singleton object {@link DataType#ByteType}.
+ */
+public class ByteType extends DataType {
+  protected ByteType() {}
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/DataType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/DataType.java
new file mode 100644
index 0000000000000..c38354039d686
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/DataType.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.util.*;
+
+/**
+ * The base type of all Spark SQL data types.
+ *
+ * To get/create specific data type, users should use singleton objects and factory methods
+ * provided by this class.
+ */
+public abstract class DataType {
+
+  /**
+   * Gets the StringType object.
+   */
+  public static final StringType StringType = new StringType();
+
+  /**
+   * Gets the BinaryType object.
+   */
+  public static final BinaryType BinaryType = new BinaryType();
+
+  /**
+   * Gets the BooleanType object.
+   */
+  public static final BooleanType BooleanType = new BooleanType();
+
+  /**
+   * Gets the DateType object.
+   */
+  public static final DateType DateType = new DateType();
+
+  /**
+   * Gets the TimestampType object.
+   */
+  public static final TimestampType TimestampType = new TimestampType();
+
+  /**
+   * Gets the DoubleType object.
+   */
+  public static final DoubleType DoubleType = new DoubleType();
+
+  /**
+   * Gets the FloatType object.
+   */
+  public static final FloatType FloatType = new FloatType();
+
+  /**
+   * Gets the ByteType object.
+   */
+  public static final ByteType ByteType = new ByteType();
+
+  /**
+   * Gets the IntegerType object.
+   */
+  public static final IntegerType IntegerType = new IntegerType();
+
+  /**
+   * Gets the LongType object.
+   */
+  public static final LongType LongType = new LongType();
+
+  /**
+   * Gets the ShortType object.
+   */
+  public static final ShortType ShortType = new ShortType();
+
+  /**
+   * Creates an ArrayType by specifying the data type of elements ({@code elementType}).
+   * The field of {@code containsNull} is set to {@code true}.
+   */
+  public static ArrayType createArrayType(DataType elementType) {
+    if (elementType == null) {
+      throw new IllegalArgumentException("elementType should not be null.");
+    }
+
+    return new ArrayType(elementType, true);
+  }
+
+  /**
+   * Creates an ArrayType by specifying the data type of elements ({@code elementType}) and
+   * whether the array contains null values ({@code containsNull}).
+   */
+  public static ArrayType createArrayType(DataType elementType, boolean containsNull) {
+    if (elementType == null) {
+      throw new IllegalArgumentException("elementType should not be null.");
+    }
+
+    return new ArrayType(elementType, containsNull);
+  }
+
+  /**
+   * Creates a MapType by specifying the data type of keys ({@code keyType}) and values
+   * ({@code keyType}). The field of {@code valueContainsNull} is set to {@code true}.
+   */
+  public static MapType createMapType(DataType keyType, DataType valueType) {
+    if (keyType == null) {
+      throw new IllegalArgumentException("keyType should not be null.");
+    }
+    if (valueType == null) {
+      throw new IllegalArgumentException("valueType should not be null.");
+    }
+
+    return new MapType(keyType, valueType, true);
+  }
+
+  /**
+   * Creates a MapType by specifying the data type of keys ({@code keyType}), the data type of
+   * values ({@code keyType}), and whether values contain any null value
+   * ({@code valueContainsNull}).
+   */
+  public static MapType createMapType(
+      DataType keyType,
+      DataType valueType,
+      boolean valueContainsNull) {
+    if (keyType == null) {
+      throw new IllegalArgumentException("keyType should not be null.");
+    }
+    if (valueType == null) {
+      throw new IllegalArgumentException("valueType should not be null.");
+    }
+
+    return new MapType(keyType, valueType, valueContainsNull);
+  }
+
+  /**
+   * Creates a StructField by specifying the name ({@code name}), data type ({@code dataType}) and
+   * whether values of this field can be null values ({@code nullable}).
+   */
+  public static StructField createStructField(
+      String name,
+      DataType dataType,
+      boolean nullable,
+      Metadata metadata) {
+    if (name == null) {
+      throw new IllegalArgumentException("name should not be null.");
+    }
+    if (dataType == null) {
+      throw new IllegalArgumentException("dataType should not be null.");
+    }
+    if (metadata == null) {
+      throw new IllegalArgumentException("metadata should not be null.");
+    }
+
+    return new StructField(name, dataType, nullable, metadata);
+  }
+
+  /**
+   * Creates a StructField with empty metadata.
+   *
+   * @see #createStructField(String, DataType, boolean, Metadata)
+   */
+  public static StructField createStructField(String name, DataType dataType, boolean nullable) {
+    return createStructField(name, dataType, nullable, (new MetadataBuilder()).build());
+  }
+
+  /**
+   * Creates a StructType with the given list of StructFields ({@code fields}).
+   */
+  public static StructType createStructType(List<StructField> fields) {
+    return createStructType(fields.toArray(new StructField[0]));
+  }
+
+  /**
+   * Creates a StructType with the given StructField array ({@code fields}).
+   */
+  public static StructType createStructType(StructField[] fields) {
+    if (fields == null) {
+      throw new IllegalArgumentException("fields should not be null.");
+    }
+    Set<String> distinctNames = new HashSet<String>();
+    for (StructField field: fields) {
+      if (field == null) {
+        throw new IllegalArgumentException(
+          "fields should not contain any null.");
+      }
+
+      distinctNames.add(field.getName());
+    }
+    if (distinctNames.size() != fields.length) {
+      throw new IllegalArgumentException("fields should have distinct names.");
+    }
+
+    return new StructType(fields);
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/DateType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/DateType.java
new file mode 100644
index 0000000000000..6677793baa365
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/DateType.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing java.sql.Date values.
+ *
+ * {@code DateType} is represented by the singleton object {@link DataType#DateType}.
+ */
+public class DateType extends DataType {
+    protected DateType() {}
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/DecimalType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/DecimalType.java
new file mode 100644
index 0000000000000..60752451ecfc7
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/DecimalType.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing java.math.BigDecimal values.
+ */
+public class DecimalType extends DataType {
+  private boolean hasPrecisionInfo;
+  private int precision;
+  private int scale;
+
+  public DecimalType(int precision, int scale) {
+    this.hasPrecisionInfo = true;
+    this.precision = precision;
+    this.scale = scale;
+  }
+
+  public DecimalType() {
+    this.hasPrecisionInfo = false;
+    this.precision = -1;
+    this.scale = -1;
+  }
+
+  public boolean isUnlimited() {
+    return !hasPrecisionInfo;
+  }
+
+  public boolean isFixed() {
+    return hasPrecisionInfo;
+  }
+
+  /** Return the precision, or -1 if no precision is set */
+  public int getPrecision() {
+    return precision;
+  }
+
+  /** Return the scale, or -1 if no precision is set */
+  public int getScale() {
+    return scale;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    DecimalType that = (DecimalType) o;
+
+    if (hasPrecisionInfo != that.hasPrecisionInfo) return false;
+    if (precision != that.precision) return false;
+    if (scale != that.scale) return false;
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    int result = (hasPrecisionInfo ? 1 : 0);
+    result = 31 * result + precision;
+    result = 31 * result + scale;
+    return result;
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/DoubleType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/DoubleType.java
new file mode 100644
index 0000000000000..f0060d0bcf9f5
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/DoubleType.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing double and Double values.
+ *
+ * {@code DoubleType} is represented by the singleton object {@link DataType#DoubleType}.
+ */
+public class DoubleType extends DataType {
+  protected DoubleType() {}
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/FloatType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/FloatType.java
new file mode 100644
index 0000000000000..4a6a37f69176a
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/FloatType.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing float and Float values.
+ *
+ * {@code FloatType} is represented by the singleton object {@link DataType#FloatType}.
+ */
+public class FloatType extends DataType {
+  protected FloatType() {}
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/IntegerType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/IntegerType.java
new file mode 100644
index 0000000000000..bfd70490bbbbb
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/IntegerType.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing int and Integer values.
+ *
+ * {@code IntegerType} is represented by the singleton object {@link DataType#IntegerType}.
+ */
+public class IntegerType extends DataType {
+  protected IntegerType() {}
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/LongType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/LongType.java
new file mode 100644
index 0000000000000..af13a46eb165c
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/LongType.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing long and Long values.
+ *
+ * {@code LongType} is represented by the singleton object {@link DataType#LongType}.
+ */
+public class LongType extends DataType {
+  protected LongType() {}
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/MapType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/MapType.java
new file mode 100644
index 0000000000000..063e6b34abc48
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/MapType.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing Maps. A MapType object comprises two fields,
+ * {@code DataType keyType}, {@code DataType valueType}, and {@code boolean valueContainsNull}.
+ * The field of {@code keyType} is used to specify the type of keys in the map.
+ * The field of {@code valueType} is used to specify the type of values in the map.
+ * The field of {@code valueContainsNull} is used to specify if map values have
+ * {@code null} values.
+ * For values of a MapType column, keys are not allowed to have {@code null} values.
+ *
+ * To create a {@link MapType},
+ * {@link DataType#createMapType(DataType, DataType)} or
+ * {@link DataType#createMapType(DataType, DataType, boolean)}
+ * should be used.
+ */
+public class MapType extends DataType {
+  private DataType keyType;
+  private DataType valueType;
+  private boolean valueContainsNull;
+
+  protected MapType(DataType keyType, DataType valueType, boolean valueContainsNull) {
+    this.keyType = keyType;
+    this.valueType = valueType;
+    this.valueContainsNull = valueContainsNull;
+  }
+
+  public DataType getKeyType() {
+    return keyType;
+  }
+
+  public DataType getValueType() {
+    return valueType;
+  }
+
+  public boolean isValueContainsNull() {
+    return valueContainsNull;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    MapType mapType = (MapType) o;
+
+    if (valueContainsNull != mapType.valueContainsNull) return false;
+    if (!keyType.equals(mapType.keyType)) return false;
+    if (!valueType.equals(mapType.valueType)) return false;
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    int result = keyType.hashCode();
+    result = 31 * result + valueType.hashCode();
+    result = 31 * result + (valueContainsNull ? 1 : 0);
+    return result;
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/Metadata.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/Metadata.java
new file mode 100644
index 0000000000000..0f819fb01a76a
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/Metadata.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * Metadata is a wrapper over Map[String, Any] that limits the value type to simple ones: Boolean,
+ * Long, Double, String, Metadata, Array[Boolean], Array[Long], Array[Double], Array[String], and
+ * Array[Metadata]. JSON is used for serialization.
+ *
+ * The default constructor is private. User should use [[MetadataBuilder]].
+ */
+class Metadata extends org.apache.spark.sql.catalyst.util.Metadata {
+  Metadata(scala.collection.immutable.Map<String, Object> map) {
+    super(map);
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/MetadataBuilder.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/MetadataBuilder.java
new file mode 100644
index 0000000000000..6e6b12f0722c5
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/MetadataBuilder.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * Builder for [[Metadata]]. If there is a key collision, the latter will overwrite the former.
+ */
+public class MetadataBuilder extends org.apache.spark.sql.catalyst.util.MetadataBuilder {
+  @Override
+  public Metadata build() {
+    return new Metadata(getMap());
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/ShortType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/ShortType.java
new file mode 100644
index 0000000000000..7d7604b4e3d2d
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/ShortType.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing short and Short values.
+ *
+ * {@code ShortType} is represented by the singleton object {@link DataType#ShortType}.
+ */
+public class ShortType extends DataType {
+  protected ShortType() {}
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/StringType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/StringType.java
new file mode 100644
index 0000000000000..f4ba0c07c9c6e
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/StringType.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing String values.
+ *
+ * {@code StringType} is represented by the singleton object {@link DataType#StringType}.
+ */
+public class StringType extends DataType {
+  protected StringType() {}
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/StructField.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/StructField.java
new file mode 100644
index 0000000000000..7c60d492bcdf0
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/StructField.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.util.Map;
+
+/**
+ * A StructField object represents a field in a StructType object.
+ * A StructField object comprises three fields, {@code String name}, {@code DataType dataType},
+ * and {@code boolean nullable}. The field of {@code name} is the name of a StructField.
+ * The field of {@code dataType} specifies the data type of a StructField.
+ * The field of {@code nullable} specifies if values of a StructField can contain {@code null}
+ * values.
+ * The field of {@code metadata} provides extra information of the StructField.
+ *
+ * To create a {@link StructField},
+ * {@link DataType#createStructField(String, DataType, boolean, Metadata)}
+ * should be used.
+ */
+public class StructField {
+  private String name;
+  private DataType dataType;
+  private boolean nullable;
+  private Metadata metadata;
+
+  protected StructField(
+      String name,
+      DataType dataType,
+      boolean nullable,
+      Metadata metadata) {
+    this.name = name;
+    this.dataType = dataType;
+    this.nullable = nullable;
+    this.metadata = metadata;
+  }
+
+  public String getName() {
+    return name;
+  }
+
+  public DataType getDataType() {
+    return dataType;
+  }
+
+  public boolean isNullable() {
+    return nullable;
+  }
+
+  public Metadata getMetadata() {
+    return metadata;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    StructField that = (StructField) o;
+
+    if (nullable != that.nullable) return false;
+    if (!dataType.equals(that.dataType)) return false;
+    if (!name.equals(that.name)) return false;
+    if (!metadata.equals(that.metadata)) return false;
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    int result = name.hashCode();
+    result = 31 * result + dataType.hashCode();
+    result = 31 * result + (nullable ? 1 : 0);
+    result = 31 * result + metadata.hashCode();
+    return result;
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/StructType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/StructType.java
new file mode 100644
index 0000000000000..a4b501efd9a10
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/StructType.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.util.Arrays;
+
+/**
+ * The data type representing Rows.
+ * A StructType object comprises an array of StructFields.
+ *
+ * To create an {@link StructType},
+ * {@link DataType#createStructType(java.util.List)} or
+ * {@link DataType#createStructType(StructField[])}
+ * should be used.
+ */
+public class StructType extends DataType {
+  private StructField[] fields;
+
+  protected StructType(StructField[] fields) {
+    this.fields = fields;
+  }
+
+  public StructField[] getFields() {
+    return fields;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    StructType that = (StructType) o;
+
+    if (!Arrays.equals(fields, that.fields)) return false;
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    return Arrays.hashCode(fields);
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/TimestampType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/TimestampType.java
new file mode 100644
index 0000000000000..06d44c731cdfe
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/TimestampType.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+/**
+ * The data type representing java.sql.Timestamp values.
+ *
+ * {@code TimestampType} is represented by the singleton object {@link DataType#TimestampType}.
+ */
+public class TimestampType extends DataType {
+  protected TimestampType() {}
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF1.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF1.java
new file mode 100644
index 0000000000000..ef959e35e1027
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF1.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 1 arguments.
+ */
+public interface UDF1<T1, R> extends Serializable {
+  public R call(T1 t1) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF10.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF10.java
new file mode 100644
index 0000000000000..96ab3a96c3d5e
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF10.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 10 arguments.
+ */
+public interface UDF10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF11.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF11.java
new file mode 100644
index 0000000000000..58ae8edd6d817
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF11.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 11 arguments.
+ */
+public interface UDF11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF12.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF12.java
new file mode 100644
index 0000000000000..d9da0f6eddd94
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF12.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 12 arguments.
+ */
+public interface UDF12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11, T12 t12) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF13.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF13.java
new file mode 100644
index 0000000000000..095fc1a8076b5
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF13.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 13 arguments.
+ */
+public interface UDF13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11, T12 t12, T13 t13) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF14.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF14.java
new file mode 100644
index 0000000000000..eb27eaa180086
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF14.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 14 arguments.
+ */
+public interface UDF14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11, T12 t12, T13 t13, T14 t14) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF15.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF15.java
new file mode 100644
index 0000000000000..1fbcff56332b6
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF15.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 15 arguments.
+ */
+public interface UDF15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11, T12 t12, T13 t13, T14 t14, T15 t15) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF16.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF16.java
new file mode 100644
index 0000000000000..1133561787a69
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF16.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 16 arguments.
+ */
+public interface UDF16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11, T12 t12, T13 t13, T14 t14, T15 t15, T16 t16) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF17.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF17.java
new file mode 100644
index 0000000000000..dfae7922c9b63
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF17.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 17 arguments.
+ */
+public interface UDF17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11, T12 t12, T13 t13, T14 t14, T15 t15, T16 t16, T17 t17) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF18.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF18.java
new file mode 100644
index 0000000000000..e9d1c6d52d4ea
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF18.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 18 arguments.
+ */
+public interface UDF18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11, T12 t12, T13 t13, T14 t14, T15 t15, T16 t16, T17 t17, T18 t18) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF19.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF19.java
new file mode 100644
index 0000000000000..46b9d2d3c9457
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF19.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 19 arguments.
+ */
+public interface UDF19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11, T12 t12, T13 t13, T14 t14, T15 t15, T16 t16, T17 t17, T18 t18, T19 t19) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF2.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF2.java
new file mode 100644
index 0000000000000..cd3fde8da419e
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF2.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 2 arguments.
+ */
+public interface UDF2<T1, T2, R> extends Serializable {
+  public R call(T1 t1, T2 t2) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF20.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF20.java
new file mode 100644
index 0000000000000..113d3d26be4a7
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF20.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 20 arguments.
+ */
+public interface UDF20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11, T12 t12, T13 t13, T14 t14, T15 t15, T16 t16, T17 t17, T18 t18, T19 t19, T20 t20) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF21.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF21.java
new file mode 100644
index 0000000000000..74118f2cf8da7
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF21.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 21 arguments.
+ */
+public interface UDF21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11, T12 t12, T13 t13, T14 t14, T15 t15, T16 t16, T17 t17, T18 t18, T19 t19, T20 t20, T21 t21) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF22.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF22.java
new file mode 100644
index 0000000000000..0e7cc40be45ec
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF22.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 22 arguments.
+ */
+public interface UDF22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10, T11 t11, T12 t12, T13 t13, T14 t14, T15 t15, T16 t16, T17 t17, T18 t18, T19 t19, T20 t20, T21 t21, T22 t22) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF3.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF3.java
new file mode 100644
index 0000000000000..6a880f16be47a
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF3.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 3 arguments.
+ */
+public interface UDF3<T1, T2, T3, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF4.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF4.java
new file mode 100644
index 0000000000000..fcad2febb18e6
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF4.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 4 arguments.
+ */
+public interface UDF4<T1, T2, T3, T4, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF5.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF5.java
new file mode 100644
index 0000000000000..ce0cef43a2144
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF5.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 5 arguments.
+ */
+public interface UDF5<T1, T2, T3, T4, T5, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF6.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF6.java
new file mode 100644
index 0000000000000..f56b806684e61
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF6.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 6 arguments.
+ */
+public interface UDF6<T1, T2, T3, T4, T5, T6, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF7.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF7.java
new file mode 100644
index 0000000000000..25bd6d3241bd4
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF7.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 7 arguments.
+ */
+public interface UDF7<T1, T2, T3, T4, T5, T6, T7, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF8.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF8.java
new file mode 100644
index 0000000000000..a3b7ac5f94ce7
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF8.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 8 arguments.
+ */
+public interface UDF8<T1, T2, T3, T4, T5, T6, T7, T8, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF9.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF9.java
new file mode 100644
index 0000000000000..205e72a1522fc
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF9.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+// **************************************************
+// THIS FILE IS AUTOGENERATED BY CODE IN
+// org.apache.spark.sql.api.java.FunctionRegistration
+// **************************************************
+
+/**
+ * A Spark SQL UDF that has 9 arguments.
+ */
+public interface UDF9<T1, T2, T3, T4, T5, T6, T7, T8, T9, R> extends Serializable {
+  public R call(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UserDefinedType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UserDefinedType.java
new file mode 100644
index 0000000000000..b751847b464fd
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UserDefinedType.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+import org.apache.spark.annotation.DeveloperApi;
+
+/**
+ * ::DeveloperApi::
+ * The data type representing User-Defined Types (UDTs).
+ * UDTs may use any other DataType for an underlying representation.
+ */
+@DeveloperApi
+public abstract class UserDefinedType<UserType> extends DataType implements Serializable {
+
+  protected UserDefinedType() { }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+    UserDefinedType<UserType> that = (UserDefinedType<UserType>) o;
+    return this.sqlType().equals(that.sqlType());
+  }
+
+  /** Underlying storage type for this UDT */
+  public abstract DataType sqlType();
+
+  /** Convert the user type to a SQL datum */
+  public abstract Object serialize(Object obj);
+
+  /** Convert a SQL datum to the user type */
+  public abstract UserType deserialize(Object datum);
+
+  /** Class object for the UserType */
+  public abstract Class<UserType> userClass();
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/package-info.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/package-info.java
new file mode 100644
index 0000000000000..67007a9f0d1a3
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Allows the execution of relational queries, including those expressed in SQL using Spark.
+ */
+package org.apache.spark.sql.api.java;
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala
new file mode 100644
index 0000000000000..2e7abac1f1bdb
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.concurrent.locks.ReentrantReadWriteLock
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.columnar.InMemoryRelation
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK
+
+/** Holds a cached logical plan and its data */
+private case class CachedData(plan: LogicalPlan, cachedRepresentation: InMemoryRelation)
+
+/**
+ * Provides support in a SQLContext for caching query results and automatically using these cached
+ * results when subsequent queries are executed.  Data is cached using byte buffers stored in an
+ * InMemoryRelation.  This relation is automatically substituted query plans that return the
+ * `sameResult` as the originally cached query.
+ */
+private[sql] trait CacheManager {
+  self: SQLContext =>
+
+  @transient
+  private val cachedData = new scala.collection.mutable.ArrayBuffer[CachedData]
+
+  @transient
+  private val cacheLock = new ReentrantReadWriteLock
+
+  /** Returns true if the table is currently cached in-memory. */
+  def isCached(tableName: String): Boolean = lookupCachedData(table(tableName)).nonEmpty
+
+  /** Caches the specified table in-memory. */
+  def cacheTable(tableName: String): Unit = cacheQuery(table(tableName))
+
+  /** Removes the specified table from the in-memory cache. */
+  def uncacheTable(tableName: String): Unit = uncacheQuery(table(tableName))
+
+  /** Acquires a read lock on the cache for the duration of `f`. */
+  private def readLock[A](f: => A): A = {
+    val lock = cacheLock.readLock()
+    lock.lock()
+    try f finally {
+      lock.unlock()
+    }
+  }
+
+  /** Acquires a write lock on the cache for the duration of `f`. */
+  private def writeLock[A](f: => A): A = {
+    val lock = cacheLock.writeLock()
+    lock.lock()
+    try f finally {
+      lock.unlock()
+    }
+  }
+
+  private[sql] def clearCache(): Unit = writeLock {
+    cachedData.foreach(_.cachedRepresentation.cachedColumnBuffers.unpersist())
+    cachedData.clear()
+  }
+
+  /**
+   * Caches the data produced by the logical representation of the given schema rdd.  Unlike
+   * `RDD.cache()`, the default storage level is set to be `MEMORY_AND_DISK` because recomputing
+   * the in-memory columnar representation of the underlying table is expensive.
+   */
+  private[sql] def cacheQuery(
+      query: SchemaRDD,
+      storageLevel: StorageLevel = MEMORY_AND_DISK): Unit = writeLock {
+    val planToCache = query.queryExecution.analyzed
+    if (lookupCachedData(planToCache).nonEmpty) {
+      logWarning("Asked to cache already cached data.")
+    } else {
+      cachedData +=
+        CachedData(
+          planToCache,
+          InMemoryRelation(
+            useCompression, columnBatchSize, storageLevel, query.queryExecution.executedPlan))
+    }
+  }
+
+  /** Removes the data for the given SchemaRDD from the cache */
+  private[sql] def uncacheQuery(query: SchemaRDD, blocking: Boolean = true): Unit = writeLock {
+    val planToCache = query.queryExecution.analyzed
+    val dataIndex = cachedData.indexWhere(cd => planToCache.sameResult(cd.plan))
+    require(dataIndex >= 0, s"Table $query is not cached.")
+    cachedData(dataIndex).cachedRepresentation.cachedColumnBuffers.unpersist(blocking)
+    cachedData.remove(dataIndex)
+  }
+
+  /** Tries to remove the data for the given SchemaRDD from the cache if it's cached */
+  private[sql] def tryUncacheQuery(
+      query: SchemaRDD,
+      blocking: Boolean = true): Boolean = writeLock {
+    val planToCache = query.queryExecution.analyzed
+    val dataIndex = cachedData.indexWhere(cd => planToCache.sameResult(cd.plan))
+    val found = dataIndex >= 0
+    if (found) {
+      cachedData(dataIndex).cachedRepresentation.cachedColumnBuffers.unpersist(blocking)
+      cachedData.remove(dataIndex)
+    }
+    found
+  }
+
+  /** Optionally returns cached data for the given SchemaRDD */
+  private[sql] def lookupCachedData(query: SchemaRDD): Option[CachedData] = readLock {
+    lookupCachedData(query.queryExecution.analyzed)
+  }
+
+  /** Optionally returns cached data for the given LogicalPlan. */
+  private[sql] def lookupCachedData(plan: LogicalPlan): Option[CachedData] = readLock {
+    cachedData.find(cd => plan.sameResult(cd.plan))
+  }
+
+  /** Replaces segments of the given logical plan with cached versions where possible. */
+  private[sql] def useCachedData(plan: LogicalPlan): LogicalPlan = {
+    plan transformDown {
+      case currentFragment =>
+        lookupCachedData(currentFragment)
+          .map(_.cachedRepresentation.withOutput(currentFragment.output))
+          .getOrElse(currentFragment)
+    }
+  }
+
+  /**
+   * Invalidates the cache of any data that contains `plan`. Note that it is possible that this
+   * function will over invalidate.
+   */
+  private[sql] def invalidateCache(plan: LogicalPlan): Unit = writeLock {
+    cachedData.foreach {
+      case data if data.plan.collect { case p if p.sameResult(plan) => p }.nonEmpty =>
+        data.cachedRepresentation.recache()
+      case _ =>
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 95ed0f28507fc..9697beb132fbb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -17,76 +17,172 @@
 
 package org.apache.spark.sql
 
+import scala.collection.immutable
+import scala.collection.JavaConversions._
+
 import java.util.Properties
 
-import scala.collection.JavaConverters._
+private[spark] object SQLConf {
+  val COMPRESS_CACHED = "spark.sql.inMemoryColumnarStorage.compressed"
+  val COLUMN_BATCH_SIZE = "spark.sql.inMemoryColumnarStorage.batchSize"
+  val IN_MEMORY_PARTITION_PRUNING = "spark.sql.inMemoryColumnarStorage.partitionPruning"
+  val AUTO_BROADCASTJOIN_THRESHOLD = "spark.sql.autoBroadcastJoinThreshold"
+  val DEFAULT_SIZE_IN_BYTES = "spark.sql.defaultSizeInBytes"
+  val SHUFFLE_PARTITIONS = "spark.sql.shuffle.partitions"
+  val CODEGEN_ENABLED = "spark.sql.codegen"
+  val DIALECT = "spark.sql.dialect"
+
+  val PARQUET_BINARY_AS_STRING = "spark.sql.parquet.binaryAsString"
+  val PARQUET_CACHE_METADATA = "spark.sql.parquet.cacheMetadata"
+  val PARQUET_COMPRESSION = "spark.sql.parquet.compression.codec"
+  val PARQUET_FILTER_PUSHDOWN_ENABLED = "spark.sql.parquet.filterPushdown"
+
+  val COLUMN_NAME_OF_CORRUPT_RECORD = "spark.sql.columnNameOfCorruptRecord"
+
+  // Options that control which operators can be chosen by the query planner.  These should be
+  // considered hints and may be ignored by future versions of Spark SQL.
+  val EXTERNAL_SORT = "spark.sql.planner.externalSort"
+
+  // This is only used for the thriftserver
+  val THRIFTSERVER_POOL = "spark.sql.thriftserver.scheduler.pool"
+
+  object Deprecated {
+    val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
+  }
+}
 
 /**
- * SQLConf holds mutable config parameters and hints.  These can be set and
- * queried either by passing SET commands into Spark SQL's DSL
- * functions (sql(), hql(), etc.), or by programmatically using setters and
- * getters of this class.  This class is thread-safe.
+ * A trait that enables the setting and getting of mutable config parameters/hints.
+ *
+ * In the presence of a SQLContext, these can be set and queried by passing SET commands
+ * into Spark SQL's query functions (i.e. sql()). Otherwise, users of this trait can
+ * modify the hints by programmatically calling the setters and getters of this trait.
+ *
+ * SQLConf is thread-safe (internally synchronized, so safe to be used in multiple threads).
  */
-trait SQLConf {
+private[sql] trait SQLConf {
+  import SQLConf._
+
+  /** Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap. */
+  @transient protected[spark] val settings = java.util.Collections.synchronizedMap(
+    new java.util.HashMap[String, String]())
 
   /** ************************ Spark SQL Params/Hints ******************* */
   // TODO: refactor so that these hints accessors don't pollute the name space of SQLContext?
 
+  /**
+   * The SQL dialect that is used when parsing queries.  This defaults to 'sql' which uses
+   * a simple SQL parser provided by Spark SQL.  This is currently the only option for users of
+   * SQLContext.
+   *
+   * When using a HiveContext, this value defaults to 'hiveql', which uses the Hive 0.12.0 HiveQL
+   * parser.  Users can change this to 'sql' if they want to run queries that aren't supported by
+   * HiveQL (e.g., SELECT 1).
+   *
+   * Note that the choice of dialect does not affect things like what tables are available or
+   * how query execution is performed.
+   */
+  private[spark] def dialect: String = getConf(DIALECT, "sql")
+
+  /** When true tables cached using the in-memory columnar caching will be compressed. */
+  private[spark] def useCompression: Boolean = getConf(COMPRESS_CACHED, "true").toBoolean
+
+  /** The compression codec for writing to a Parquetfile */
+  private[spark] def parquetCompressionCodec: String = getConf(PARQUET_COMPRESSION, "gzip")
+
+  /** The number of rows that will be  */
+  private[spark] def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE, "10000").toInt
+
   /** Number of partitions to use for shuffle operators. */
-  private[spark] def numShufflePartitions: Int = get("spark.sql.shuffle.partitions", "200").toInt
+  private[spark] def numShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS, "200").toInt
+
+  /** When true predicates will be passed to the parquet record reader when possible. */
+  private[spark] def parquetFilterPushDown =
+    getConf(PARQUET_FILTER_PUSHDOWN_ENABLED, "false").toBoolean
+
+  /** When true the planner will use the external sort, which may spill to disk. */
+  private[spark] def externalSortEnabled: Boolean = getConf(EXTERNAL_SORT, "false").toBoolean
+
+  /**
+   * When set to true, Spark SQL will use the Scala compiler at runtime to generate custom bytecode
+   * that evaluates expressions found in queries.  In general this custom code runs much faster
+   * than interpreted evaluation, but there are significant start-up costs due to compilation.
+   * As a result codegen is only beneficial when queries run for a long time, or when the same
+   * expressions are used multiple times.
+   *
+   * Defaults to false as this feature is currently experimental.
+   */
+  private[spark] def codegenEnabled: Boolean = getConf(CODEGEN_ENABLED, "false").toBoolean
 
   /**
    * Upper bound on the sizes (in bytes) of the tables qualified for the auto conversion to
-   * a broadcast value during the physical executions of join operations.  Setting this to 0
+   * a broadcast value during the physical executions of join operations.  Setting this to -1
    * effectively disables auto conversion.
-   * Hive setting: hive.auto.convert.join.noconditionaltask.size.
+   *
+   * Hive setting: hive.auto.convert.join.noconditionaltask.size, whose default value is 10000.
    */
-  private[spark] def autoConvertJoinSize: Int =
-    get("spark.sql.auto.convert.join.size", "10000").toInt
+  private[spark] def autoBroadcastJoinThreshold: Int =
+    getConf(AUTO_BROADCASTJOIN_THRESHOLD, (10 * 1024 * 1024).toString).toInt
 
-  /** A comma-separated list of table names marked to be broadcasted during joins. */
-  private[spark] def joinBroadcastTables: String = get("spark.sql.join.broadcastTables", "")
+  /**
+   * The default size in bytes to assign to a logical operator's estimation statistics.  By default,
+   * it is set to a larger value than `autoBroadcastJoinThreshold`, hence any logical operator
+   * without a properly implemented estimation of this statistic will not be incorrectly broadcasted
+   * in joins.
+   */
+  private[spark] def defaultSizeInBytes: Long =
+    getConf(DEFAULT_SIZE_IN_BYTES, (autoBroadcastJoinThreshold + 1).toString).toLong
 
-  /** ********************** SQLConf functionality methods ************ */
+  /**
+   * When set to true, we always treat byte arrays in Parquet files as strings.
+   */
+  private[spark] def isParquetBinaryAsString: Boolean =
+    getConf(PARQUET_BINARY_AS_STRING, "false").toBoolean
 
-  @transient
-  protected[sql] val settings = java.util.Collections.synchronizedMap(
-    new java.util.HashMap[String, String]())
+  /**
+   * When set to true, partition pruning for in-memory columnar tables is enabled.
+   */
+  private[spark] def inMemoryPartitionPruning: Boolean =
+    getConf(IN_MEMORY_PARTITION_PRUNING, "false").toBoolean
+
+  private[spark] def columnNameOfCorruptRecord: String =
+    getConf(COLUMN_NAME_OF_CORRUPT_RECORD, "_corrupt_record")
+
+  /** ********************** SQLConf functionality methods ************ */
 
-  def set(props: Properties): Unit = {
-    props.asScala.foreach { case (k, v) => this.settings.put(k, v) }
+  /** Set Spark SQL configuration properties. */
+  def setConf(props: Properties): Unit = settings.synchronized {
+    props.foreach { case (k, v) => settings.put(k, v) }
   }
 
-  def set(key: String, value: String): Unit = {
+  /** Set the given Spark SQL configuration property. */
+  def setConf(key: String, value: String): Unit = {
     require(key != null, "key cannot be null")
-    require(value != null, s"value cannot be null for ${key}")
+    require(value != null, s"value cannot be null for key: $key")
     settings.put(key, value)
   }
 
-  def get(key: String): String = {
+  /** Return the value of Spark SQL configuration property for the given key. */
+  def getConf(key: String): String = {
     Option(settings.get(key)).getOrElse(throw new NoSuchElementException(key))
   }
 
-  def get(key: String, defaultValue: String): String = {
+  /**
+   * Return the value of Spark SQL configuration property for the given key. If the key is not set
+   * yet, return `defaultValue`.
+   */
+  def getConf(key: String, defaultValue: String): String = {
     Option(settings.get(key)).getOrElse(defaultValue)
   }
 
-  def getAll: Array[(String, String)] = settings.asScala.toArray
-
-  def getOption(key: String): Option[String] = {
-    Option(settings.get(key))
-  }
-
-  def contains(key: String): Boolean = settings.containsKey(key)
-
-  def toDebugString: String = {
-    settings.synchronized {
-      settings.asScala.toArray.sorted.map{ case (k, v) => s"$k=$v" }.mkString("\n")
-    }
-  }
+  /**
+   * Return all the configuration properties that have been set (i.e. not the default).
+   * This creates a new copy of the config properties in the form of a Map.
+   */
+  def getAllConfs: immutable.Map[String, String] = settings.synchronized { settings.toMap }
 
   private[spark] def clear() {
     settings.clear()
   }
-
 }
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 4abd89955bd27..31cc4170aa867 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -22,22 +22,21 @@ import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.hadoop.conf.Configuration
 
+import org.apache.spark.SparkContext
 import org.apache.spark.annotation.{AlphaComponent, DeveloperApi, Experimental}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.dsl.ExpressionConversions
-import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.catalyst.optimizer.Optimizer
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.optimizer.{Optimizer, DefaultOptimizer}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.columnar.InMemoryRelation
-import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.SparkStrategies
+import org.apache.spark.sql.catalyst.types.UserDefinedType
+import org.apache.spark.sql.execution.{SparkStrategies, _}
 import org.apache.spark.sql.json._
 import org.apache.spark.sql.parquet.ParquetRelation
-import org.apache.spark.SparkContext
+import org.apache.spark.sql.sources.{DataSourceStrategy, BaseRelation, DDLParser, LogicalRelation}
 
 /**
  * :: AlphaComponent ::
@@ -49,28 +48,50 @@ import org.apache.spark.SparkContext
  */
 @AlphaComponent
 class SQLContext(@transient val sparkContext: SparkContext)
-  extends Logging
+  extends org.apache.spark.Logging
   with SQLConf
+  with CacheManager
   with ExpressionConversions
+  with UDFRegistration
   with Serializable {
 
   self =>
 
   @transient
   protected[sql] lazy val catalog: Catalog = new SimpleCatalog(true)
+
+  @transient
+  protected[sql] lazy val functionRegistry: FunctionRegistry = new SimpleFunctionRegistry
+
   @transient
   protected[sql] lazy val analyzer: Analyzer =
-    new Analyzer(catalog, EmptyFunctionRegistry, caseSensitive = true)
+    new Analyzer(catalog, functionRegistry, caseSensitive = true)
+
+  @transient
+  protected[sql] lazy val optimizer: Optimizer = DefaultOptimizer
+
   @transient
-  protected[sql] val optimizer = Optimizer
+  protected[sql] val ddlParser = new DDLParser
+
   @transient
-  protected[sql] val parser = new catalyst.SqlParser
+  protected[sql] val sqlParser = {
+    val fallback = new catalyst.SqlParser
+    new catalyst.SparkSQLParser(fallback(_))
+  }
+
+  protected[sql] def parseSql(sql: String): LogicalPlan = {
+    ddlParser(sql).getOrElse(sqlParser(sql))
+  }
 
-  protected[sql] def parseSql(sql: String): LogicalPlan = parser(sql)
   protected[sql] def executeSql(sql: String): this.QueryExecution = executePlan(parseSql(sql))
   protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
     new this.QueryExecution { val logical = plan }
 
+  sparkContext.getConf.getAll.foreach {
+    case (key, value) if key.startsWith("spark.sql") => setConf(key, value)
+    case _ =>
+  }
+
   /**
    * :: DeveloperApi ::
    * Allows catalyst LogicalPlans to be executed as a SchemaRDD.  Note that the LogicalPlan
@@ -85,8 +106,55 @@ class SQLContext(@transient val sparkContext: SparkContext)
    *
    * @group userf
    */
-  implicit def createSchemaRDD[A <: Product: TypeTag](rdd: RDD[A]) =
-    new SchemaRDD(this, SparkLogicalPlan(ExistingRdd.fromProductRdd(rdd)))
+  implicit def createSchemaRDD[A <: Product: TypeTag](rdd: RDD[A]) = {
+    SparkPlan.currentContext.set(self)
+    val attributeSeq = ScalaReflection.attributesFor[A]
+    val schema = StructType.fromAttributes(attributeSeq)
+    val rowRDD = RDDConversions.productToRowRdd(rdd, schema)
+    new SchemaRDD(this, LogicalRDD(attributeSeq, rowRDD)(self))
+  }
+
+  implicit def baseRelationToSchemaRDD(baseRelation: BaseRelation): SchemaRDD = {
+    logicalPlanToSparkQuery(LogicalRelation(baseRelation))
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Creates a [[SchemaRDD]] from an [[RDD]] containing [[Row]]s by applying a schema to this RDD.
+   * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
+   * the provided schema. Otherwise, there will be runtime exception.
+   * Example:
+   * {{{
+   *  import org.apache.spark.sql._
+   *  val sqlContext = new org.apache.spark.sql.SQLContext(sc)
+   *
+   *  val schema =
+   *    StructType(
+   *      StructField("name", StringType, false) ::
+   *      StructField("age", IntegerType, true) :: Nil)
+   *
+   *  val people =
+   *    sc.textFile("examples/src/main/resources/people.txt").map(
+   *      _.split(",")).map(p => Row(p(0), p(1).trim.toInt))
+   *  val peopleSchemaRDD = sqlContext. applySchema(people, schema)
+   *  peopleSchemaRDD.printSchema
+   *  // root
+   *  // |-- name: string (nullable = false)
+   *  // |-- age: integer (nullable = true)
+   *
+   *    peopleSchemaRDD.registerTempTable("people")
+   *  sqlContext.sql("select name from people").collect.foreach(println)
+   * }}}
+   *
+   * @group userf
+   */
+  @DeveloperApi
+  def applySchema(rowRDD: RDD[Row], schema: StructType): SchemaRDD = {
+    // TODO: use MutableProjection when rowRDD is another SchemaRDD and the applied
+    // schema differs from the existing schema on any field data type.
+    val logicalPlan = LogicalRDD(schema.toAttributes, rowRDD)(self)
+    new SchemaRDD(this, logicalPlan)
+  }
 
   /**
    * Loads a Parquet file, returning the result as a [[SchemaRDD]].
@@ -94,7 +162,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @group userf
    */
   def parquetFile(path: String): SchemaRDD =
-    new SchemaRDD(this, parquet.ParquetRelation(path, Some(sparkContext.hadoopConfiguration)))
+    new SchemaRDD(this, parquet.ParquetRelation(path, Some(sparkContext.hadoopConfiguration), this))
 
   /**
    * Loads a JSON file (one object per line), returning the result as a [[SchemaRDD]].
@@ -104,6 +172,19 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   def jsonFile(path: String): SchemaRDD = jsonFile(path, 1.0)
 
+  /**
+   * :: Experimental ::
+   * Loads a JSON file (one object per line) and applies the given schema,
+   * returning the result as a [[SchemaRDD]].
+   *
+   * @group userf
+   */
+  @Experimental
+  def jsonFile(path: String, schema: StructType): SchemaRDD = {
+    val json = sparkContext.textFile(path)
+    jsonRDD(json, schema)
+  }
+
   /**
    * :: Experimental ::
    */
@@ -124,10 +205,34 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
   /**
    * :: Experimental ::
+   * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
+   * returning the result as a [[SchemaRDD]].
+   *
+   * @group userf
    */
   @Experimental
-  def jsonRDD(json: RDD[String], samplingRatio: Double): SchemaRDD =
-    new SchemaRDD(this, JsonRDD.inferSchema(json, samplingRatio))
+  def jsonRDD(json: RDD[String], schema: StructType): SchemaRDD = {
+    val columnNameOfCorruptJsonRecord = columnNameOfCorruptRecord
+    val appliedSchema =
+      Option(schema).getOrElse(
+        JsonRDD.nullTypeToStringType(
+          JsonRDD.inferSchema(json, 1.0, columnNameOfCorruptJsonRecord)))
+    val rowRDD = JsonRDD.jsonStringToRow(json, appliedSchema, columnNameOfCorruptJsonRecord)
+    applySchema(rowRDD, appliedSchema)
+  }
+
+  /**
+   * :: Experimental ::
+   */
+  @Experimental
+  def jsonRDD(json: RDD[String], samplingRatio: Double): SchemaRDD = {
+    val columnNameOfCorruptJsonRecord = columnNameOfCorruptRecord
+    val appliedSchema =
+      JsonRDD.nullTypeToStringType(
+        JsonRDD.inferSchema(json, samplingRatio, columnNameOfCorruptJsonRecord))
+    val rowRDD = JsonRDD.jsonStringToRow(json, appliedSchema, columnNameOfCorruptJsonRecord)
+    applySchema(rowRDD, appliedSchema)
+  }
 
   /**
    * :: Experimental ::
@@ -139,7 +244,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    *   import sqlContext._
    *
    *   case class Person(name: String, age: Int)
-   *   createParquetFile[Person]("path/to/file.parquet").registerAsTable("people")
+   *   createParquetFile[Person]("path/to/file.parquet").registerTempTable("people")
    *   sql("INSERT INTO people SELECT 'michael', 29")
    * }}}
    *
@@ -160,7 +265,8 @@ class SQLContext(@transient val sparkContext: SparkContext)
       conf: Configuration = new Configuration()): SchemaRDD = {
     new SchemaRDD(
       this,
-      ParquetRelation.createEmpty(path, ScalaReflection.attributesFor[A], allowExisting, conf))
+      ParquetRelation.createEmpty(
+        path, ScalaReflection.attributesFor[A], allowExisting, conf, this))
   }
 
   /**
@@ -170,83 +276,70 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @group userf
    */
   def registerRDDAsTable(rdd: SchemaRDD, tableName: String): Unit = {
-    val name = tableName
-    val newPlan = rdd.logicalPlan transform {
-      case s @ SparkLogicalPlan(ExistingRdd(_, _), _) => s.copy(tableName = name)
-    }
-    catalog.registerTable(None, tableName, newPlan)
+    catalog.registerTable(None, tableName, rdd.queryExecution.logical)
   }
 
   /**
-   * Executes a SQL query using Spark, returning the result as a SchemaRDD.
+   * Drops the temporary table with the given table name in the catalog. If the table has been
+   * cached/persisted before, it's also unpersisted.
+   *
+   * @param tableName the name of the table to be unregistered.
    *
    * @group userf
    */
-  def sql(sqlText: String): SchemaRDD = new SchemaRDD(this, parseSql(sqlText))
-
-  /** Returns the specified table as a SchemaRDD */
-  def table(tableName: String): SchemaRDD =
-    new SchemaRDD(this, catalog.lookupRelation(None, tableName))
-
-  /** Caches the specified table in-memory. */
-  def cacheTable(tableName: String): Unit = {
-    val currentTable = table(tableName).queryExecution.analyzed
-    val asInMemoryRelation = currentTable match {
-      case _: InMemoryRelation =>
-        currentTable.logicalPlan
-
-      case _ =>
-        val useCompression =
-          sparkContext.conf.getBoolean("spark.sql.inMemoryColumnarStorage.compressed", false)
-        InMemoryRelation(useCompression, executePlan(currentTable).executedPlan)
-    }
-
-    catalog.registerTable(None, tableName, asInMemoryRelation)
+  def dropTempTable(tableName: String): Unit = {
+    tryUncacheQuery(table(tableName))
+    catalog.unregisterTable(None, tableName)
   }
 
-  /** Removes the specified table from the in-memory cache. */
-  def uncacheTable(tableName: String): Unit = {
-    table(tableName).queryExecution.analyzed match {
-      // This is kind of a hack to make sure that if this was just an RDD registered as a table,
-      // we reregister the RDD as a table.
-      case inMem @ InMemoryRelation(_, _, e: ExistingRdd) =>
-        inMem.cachedColumnBuffers.unpersist()
-        catalog.unregisterTable(None, tableName)
-        catalog.registerTable(None, tableName, SparkLogicalPlan(e))
-      case inMem: InMemoryRelation =>
-        inMem.cachedColumnBuffers.unpersist()
-        catalog.unregisterTable(None, tableName)
-      case plan => throw new IllegalArgumentException(s"Table $tableName is not cached: $plan")
+  /**
+   * Executes a SQL query using Spark, returning the result as a SchemaRDD.  The dialect that is
+   * used for SQL parsing can be configured with 'spark.sql.dialect'.
+   *
+   * @group userf
+   */
+  def sql(sqlText: String): SchemaRDD = {
+    if (dialect == "sql") {
+      new SchemaRDD(this, parseSql(sqlText))
+    } else {
+      sys.error(s"Unsupported SQL dialect: $dialect")
     }
   }
 
-  /** Returns true if the table is currently cached in-memory. */
-  def isCached(tableName: String): Boolean = {
-    val relation = table(tableName).queryExecution.analyzed
-    relation match {
-      case _: InMemoryRelation => true
-      case _ => false
-    }
-  }
+  /** Returns the specified table as a SchemaRDD */
+  def table(tableName: String): SchemaRDD =
+    new SchemaRDD(this, catalog.lookupRelation(None, tableName))
+
+  /**
+   * :: DeveloperApi ::
+   * Allows extra strategies to be injected into the query planner at runtime.  Note this API
+   * should be consider experimental and is not intended to be stable across releases.
+   */
+  @DeveloperApi
+  var extraStrategies: Seq[Strategy] = Nil
 
   protected[sql] class SparkPlanner extends SparkStrategies {
     val sparkContext: SparkContext = self.sparkContext
 
     val sqlContext: SQLContext = self
 
+    def codegenEnabled = self.codegenEnabled
+
     def numPartitions = self.numShufflePartitions
 
     val strategies: Seq[Strategy] =
+      extraStrategies ++ (
       CommandStrategy(self) ::
+      DataSourceStrategy ::
       TakeOrdered ::
-      PartialAggregation ::
+      HashAggregation ::
       LeftSemiJoin ::
       HashJoin ::
       InMemoryScans ::
       ParquetOperations ::
       BasicOperators ::
       CartesianProduct ::
-      BroadcastNestedLoopJoin :: Nil
+      BroadcastNestedLoopJoin :: Nil)
 
     /**
      * Used to build table scan operators where complex projection and filtering are done using
@@ -267,8 +360,8 @@ class SQLContext(@transient val sparkContext: SparkContext)
         prunePushedDownFilters: Seq[Expression] => Seq[Expression],
         scanBuilder: Seq[Attribute] => SparkPlan): SparkPlan = {
 
-      val projectSet = projectList.flatMap(_.references).toSet
-      val filterSet = filterPredicates.flatMap(_.references).toSet
+      val projectSet = AttributeSet(projectList.flatMap(_.references))
+      val filterSet = AttributeSet(filterPredicates.flatMap(_.references))
       val filterCondition = prunePushedDownFilters(filterPredicates).reduceLeftOption(And)
 
       // Right now we still use a projection even if the only evaluation is applying an alias
@@ -277,7 +370,8 @@ class SQLContext(@transient val sparkContext: SparkContext)
       // TODO: Decouple final output schema from expression evaluation so this copy can be
       // avoided safely.
 
-      if (projectList.toSet == projectSet && filterSet.subsetOf(projectSet)) {
+      if (AttributeSet(projectList.map(_.toAttribute)) == projectSet &&
+          filterSet.subsetOf(projectSet)) {
         // When it is possible to just use column pruning to get the right projection and
         // when the columns of this projection are enough to evaluate all filter conditions,
         // just do a scan followed by a filter, with no extra project.
@@ -297,27 +391,32 @@ class SQLContext(@transient val sparkContext: SparkContext)
   protected[sql] lazy val emptyResult = sparkContext.parallelize(Seq.empty[Row], 1)
 
   /**
-   * Prepares a planned SparkPlan for execution by binding references to specific ordinals, and
-   * inserting shuffle operations as needed.
+   * Prepares a planned SparkPlan for execution by inserting shuffle operations as needed.
    */
   @transient
   protected[sql] val prepareForExecution = new RuleExecutor[SparkPlan] {
     val batches =
-      Batch("Add exchange", Once, AddExchange(self)) ::
-      Batch("Prepare Expressions", Once, new BindReferences[SparkPlan]) :: Nil
+      Batch("Add exchange", Once, AddExchange(self)) :: Nil
   }
 
   /**
+   * :: DeveloperApi ::
    * The primary workflow for executing relational queries using Spark.  Designed to allow easy
    * access to the intermediate phases of query execution for developers.
    */
+  @DeveloperApi
   protected abstract class QueryExecution {
     def logical: LogicalPlan
 
-    lazy val analyzed = analyzer(logical)
-    lazy val optimizedPlan = optimizer(analyzed)
+    lazy val analyzed = ExtractPythonUdfs(analyzer(logical))
+    lazy val withCachedData = useCachedData(analyzed)
+    lazy val optimizedPlan = optimizer(withCachedData)
+
     // TODO: Don't just pick the first one...
-    lazy val sparkPlan = planner(optimizedPlan).next()
+    lazy val sparkPlan = {
+      SparkPlan.currentContext.set(self)
+      planner(optimizedPlan).next()
+    }
     // executedPlan should not be used to initialize any SparkPlan. It should be
     // only used for execution.
     lazy val executedPlan: SparkPlan = prepareForExecution(sparkPlan)
@@ -328,50 +427,79 @@ class SQLContext(@transient val sparkContext: SparkContext)
     protected def stringOrError[A](f: => A): String =
       try f.toString catch { case e: Throwable => e.toString }
 
-    def simpleString: String = stringOrError(executedPlan)
+    def simpleString: String =
+      s"""== Physical Plan ==
+         |${stringOrError(executedPlan)}
+      """.stripMargin.trim
 
     override def toString: String =
-      s"""== Logical Plan ==
+      // TODO previously will output RDD details by run (${stringOrError(toRdd.toDebugString)})
+      // however, the `toRdd` will cause the real execution, which is not what we want.
+      // We need to think about how to avoid the side effect.
+      s"""== Parsed Logical Plan ==
+         |${stringOrError(logical)}
+         |== Analyzed Logical Plan ==
          |${stringOrError(analyzed)}
          |== Optimized Logical Plan ==
          |${stringOrError(optimizedPlan)}
          |== Physical Plan ==
          |${stringOrError(executedPlan)}
+         |Code Generation: ${stringOrError(executedPlan.codegenEnabled)}
+         |== RDD ==
       """.stripMargin.trim
   }
 
   /**
-   * Peek at the first row of the RDD and infer its schema.
-   * TODO: consolidate this with the type system developed in SPARK-2060.
+   * Parses the data type in our internal string representation. The data type string should
+   * have the same format as the one generated by `toString` in scala.
+   * It is only used by PySpark.
    */
-  private[sql] def inferSchema(rdd: RDD[Map[String, _]]): SchemaRDD = {
-    import scala.collection.JavaConversions._
-    def typeFor(obj: Any): DataType = obj match {
-      case c: java.lang.String => StringType
-      case c: java.lang.Integer => IntegerType
-      case c: java.lang.Long => LongType
-      case c: java.lang.Double => DoubleType
-      case c: java.lang.Boolean => BooleanType
-      case c: java.util.List[_] => ArrayType(typeFor(c.head))
-      case c: java.util.Set[_] => ArrayType(typeFor(c.head))
-      case c: java.util.Map[_, _] =>
-        val (key, value) = c.head
-        MapType(typeFor(key), typeFor(value))
-      case c if c.getClass.isArray =>
-        val elem = c.asInstanceOf[Array[_]].head
-        ArrayType(typeFor(elem))
-      case c => throw new Exception(s"Object of type $c cannot be used")
+  private[sql] def parseDataType(dataTypeString: String): DataType = {
+    DataType.fromJson(dataTypeString)
+  }
+
+  /**
+   * Apply a schema defined by the schemaString to an RDD. It is only used by PySpark.
+   */
+  private[sql] def applySchemaToPythonRDD(
+      rdd: RDD[Array[Any]],
+      schemaString: String): SchemaRDD = {
+    val schema = parseDataType(schemaString).asInstanceOf[StructType]
+    applySchemaToPythonRDD(rdd, schema)
+  }
+
+  /**
+   * Apply a schema defined by the schema to an RDD. It is only used by PySpark.
+   */
+  private[sql] def applySchemaToPythonRDD(
+      rdd: RDD[Array[Any]],
+      schema: StructType): SchemaRDD = {
+
+    def needsConversion(dataType: DataType): Boolean = dataType match {
+      case ByteType => true
+      case ShortType => true
+      case FloatType => true
+      case DateType => true
+      case TimestampType => true
+      case ArrayType(_, _) => true
+      case MapType(_, _, _) => true
+      case StructType(_) => true
+      case udt: UserDefinedType[_] => needsConversion(udt.sqlType)
+      case other => false
     }
-    val schema = rdd.first().map { case (fieldName, obj) =>
-      AttributeReference(fieldName, typeFor(obj), true)()
-    }.toSeq
 
-    val rowRdd = rdd.mapPartitions { iter =>
-      iter.map { map =>
-        new GenericRow(map.values.toArray.asInstanceOf[Array[Any]]): Row
-      }
+    val convertedRdd = if (schema.fields.exists(f => needsConversion(f.dataType))) {
+      rdd.map(m => m.zip(schema.fields).map {
+        case (value, field) => EvaluatePython.fromJava(value, field.dataType)
+      })
+    } else {
+      rdd
     }
-    new SchemaRDD(this, SparkLogicalPlan(ExistingRdd(schema, rowRdd)))
-  }
 
+    val rowRdd = convertedRdd.mapPartitions { iter =>
+      iter.map { m => new GenericRow(m): Row}
+    }
+
+    new SchemaRDD(this, LogicalRDD(schema.toAttributes, rowRdd)(self))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
index 8bcfc7c064c2f..fbec2f9f4b2c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
@@ -17,24 +17,26 @@
 
 package org.apache.spark.sql
 
-import java.util.{Map => JMap, List => JList, Set => JSet}
+import java.util.{List => JList}
+
+import org.apache.spark.api.python.SerDeUtil
 
 import scala.collection.JavaConversions._
-import scala.collection.JavaConverters._
 
 import net.razorvine.pickle.Pickler
 
 import org.apache.spark.{Dependency, OneToOneDependency, Partition, Partitioner, TaskContext}
 import org.apache.spark.annotation.{AlphaComponent, Experimental}
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.api.java.JavaSchemaRDD
+import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
-import org.apache.spark.sql.catalyst.types.{ArrayType, BooleanType, StructType}
-import org.apache.spark.sql.execution.{ExistingRdd, SparkLogicalPlan}
-import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.execution.{LogicalRDD, EvaluatePython}
+import org.apache.spark.storage.StorageLevel
 
 /**
  * :: AlphaComponent ::
@@ -46,9 +48,8 @@ import org.apache.spark.api.java.JavaRDD
  * explicitly using the `createSchemaRDD` function on a [[SQLContext]].
  *
  * A `SchemaRDD` can also be created by loading data in from external sources.
- * Examples are loading data from Parquet files by using by using the
- * `parquetFile` method on [[SQLContext]], and loading JSON datasets
- * by using `jsonFile` and `jsonRDD` methods on [[SQLContext]].
+ * Examples are loading data from Parquet files by using the `parquetFile` method on [[SQLContext]]
+ * and loading JSON datasets by using `jsonFile` and `jsonRDD` methods on [[SQLContext]].
  *
  * == SQL Queries ==
  * A SchemaRDD can be registered as a table in the [[SQLContext]] that was used to create it.  Once
@@ -68,7 +69,7 @@ import org.apache.spark.api.java.JavaRDD
  *  val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i")))
  *  // Any RDD containing case classes can be registered as a table.  The schema of the table is
  *  // automatically inferred using scala reflection.
- *  rdd.registerAsTable("records")
+ *  rdd.registerTempTable("records")
  *
  *  val results: SchemaRDD = sql("SELECT * FROM records")
  * }}}
@@ -113,13 +114,22 @@ class SchemaRDD(
   // =========================================================================================
 
   override def compute(split: Partition, context: TaskContext): Iterator[Row] =
-    firstParent[Row].compute(split, context).map(_.copy())
+    firstParent[Row].compute(split, context).map(ScalaReflection.convertRowToScala(_, this.schema))
 
   override def getPartitions: Array[Partition] = firstParent[Row].partitions
 
-  override protected def getDependencies: Seq[Dependency[_]] =
+  override protected def getDependencies: Seq[Dependency[_]] = {
+    schema // Force reification of the schema so it is available on executors.
+
     List(new OneToOneDependency(queryExecution.toRdd))
+  }
 
+  /**
+   * Returns the schema of this SchemaRDD (represented by a [[StructType]]).
+   *
+   * @group schema
+   */
+  lazy val schema: StructType = queryExecution.analyzed.schema
 
   // =======================================================================
   // Query DSL
@@ -256,6 +266,26 @@ class SchemaRDD(
   def unionAll(otherPlan: SchemaRDD) =
     new SchemaRDD(sqlContext, Union(logicalPlan, otherPlan.logicalPlan))
 
+  /**
+   * Performs a relational except on two SchemaRDDs
+   *
+   * @param otherPlan the [[SchemaRDD]] that should be excepted from this one.
+   *
+   * @group Query
+   */
+  def except(otherPlan: SchemaRDD): SchemaRDD =
+    new SchemaRDD(sqlContext, Except(logicalPlan, otherPlan.logicalPlan))
+
+  /**
+   * Performs a relational intersect on two SchemaRDDs
+   *
+   * @param otherPlan the [[SchemaRDD]] that should be intersected with this one.
+   *
+   * @group Query
+   */
+  def intersect(otherPlan: SchemaRDD): SchemaRDD =
+    new SchemaRDD(sqlContext, Intersect(logicalPlan, otherPlan.logicalPlan))
+
   /**
    * Filters tuples using a function over the value of the specified column.
    *
@@ -335,7 +365,7 @@ class SchemaRDD(
       join: Boolean = false,
       outer: Boolean = false,
       alias: Option[String] = None) =
-    new SchemaRDD(sqlContext, Generate(generator, join, outer, None, logicalPlan))
+    new SchemaRDD(sqlContext, Generate(generator, join, outer, alias, logicalPlan))
 
   /**
    * Returns this RDD as a SchemaRDD.  Intended primarily to force the invocation of the implicit
@@ -356,49 +386,21 @@ class SchemaRDD(
    * Converts a JavaRDD to a PythonRDD. It is used by pyspark.
    */
   private[sql] def javaToPython: JavaRDD[Array[Byte]] = {
-    def rowToMap(row: Row, structType: StructType): JMap[String, Any] = {
-      val fields = structType.fields.map(field => (field.name, field.dataType))
-      val map: JMap[String, Any] = new java.util.HashMap
-      row.zip(fields).foreach {
-        case (obj, (name, dataType)) =>
-          dataType match {
-            case struct: StructType => map.put(name, rowToMap(obj.asInstanceOf[Row], struct))
-            case array @ ArrayType(struct: StructType) =>
-              val arrayValues = obj match {
-                case seq: Seq[Any] =>
-                  seq.map(element => rowToMap(element.asInstanceOf[Row], struct)).asJava
-                case list: JList[Any] =>
-                  list.map(element => rowToMap(element.asInstanceOf[Row], struct))
-                case set: JSet[Any] =>
-                  set.map(element => rowToMap(element.asInstanceOf[Row], struct))
-                case array if array != null && array.getClass.isArray =>
-                  array.asInstanceOf[Array[Any]].map {
-                    element => rowToMap(element.asInstanceOf[Row], struct)
-                  }
-                case other => other
-              }
-              map.put(name, arrayValues)
-            case array: ArrayType => {
-              val arrayValues = obj match {
-                case seq: Seq[Any] => seq.asJava
-                case other => other
-              }
-              map.put(name, arrayValues)
-            }
-            case other => map.put(name, obj)
-          }
-      }
-
-      map
-    }
+    val fieldTypes = schema.fields.map(_.dataType)
+    val jrdd = this.map(EvaluatePython.rowToArray(_, fieldTypes)).toJavaRDD()
+    SerDeUtil.javaToPython(jrdd)
+  }
 
-    val rowSchema = StructType.fromAttributes(this.queryExecution.analyzed.output)
-    this.mapPartitions { iter =>
-      val pickle = new Pickler
-      iter.map { row =>
-        rowToMap(row, rowSchema)
-      }.grouped(10).map(batched => pickle.dumps(batched.toArray))
-    }
+  /**
+   * Serializes the Array[Row] returned by SchemaRDD's optimized collect(), using the same
+   * format as javaToPython. It is used by pyspark.
+   */
+  private[sql] def collectToPython: JList[Array[Byte]] = {
+    val fieldTypes = schema.fields.map(_.dataType)
+    val pickle = new Pickler
+    new java.util.ArrayList(collect().map { row =>
+      EvaluatePython.rowToArray(row, fieldTypes)
+    }.grouped(100).map(batched => pickle.dumps(batched.toArray)).toIterable)
   }
 
   /**
@@ -410,11 +412,12 @@ class SchemaRDD(
    * @group schema
    */
   private def applySchema(rdd: RDD[Row]): SchemaRDD = {
-    new SchemaRDD(sqlContext, SparkLogicalPlan(ExistingRdd(logicalPlan.output, rdd)))
+    new SchemaRDD(sqlContext,
+      LogicalRDD(queryExecution.analyzed.output.map(_.newInstance()), rdd)(sqlContext))
   }
 
   // =======================================================================
-  // Overriden RDD actions
+  // Overridden RDD actions
   // =======================================================================
 
   override def collect(): Array[Row] = queryExecution.executedPlan.executeCollect()
@@ -464,4 +467,20 @@ class SchemaRDD(
   override def subtract(other: RDD[Row], p: Partitioner)
                        (implicit ord: Ordering[Row] = null): SchemaRDD =
     applySchema(super.subtract(other, p)(ord))
+
+  /** Overridden cache function will always use the in-memory columnar caching. */
+  override def cache(): this.type = {
+    sqlContext.cacheQuery(this)
+    this
+  }
+
+  override def persist(newLevel: StorageLevel): this.type = {
+    sqlContext.cacheQuery(this, newLevel)
+    this
+  }
+
+  override def unpersist(blocking: Boolean): this.type = {
+    sqlContext.uncacheQuery(this, blocking)
+    this
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
index fe81721943202..fd5f4abcbcd65 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
@@ -20,13 +20,13 @@ package org.apache.spark.sql
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.execution.SparkLogicalPlan
+import org.apache.spark.sql.execution.LogicalRDD
 
 /**
  * Contains functions that are shared between all SchemaRDD types (i.e., Scala, Java)
  */
 private[sql] trait SchemaRDDLike {
-  @transient val sqlContext: SQLContext
+  @transient def sqlContext: SQLContext
   @transient val baseLogicalPlan: LogicalPlan
 
   private[sql] def baseSchemaRDD: SchemaRDD
@@ -54,9 +54,8 @@ private[sql] trait SchemaRDDLike {
   @transient protected[spark] val logicalPlan: LogicalPlan = baseLogicalPlan match {
     // For various commands (like DDL) and queries with side effects, we force query optimization to
     // happen right away to let these side effects take place eagerly.
-    case _: Command | _: InsertIntoTable | _: InsertIntoCreatedTable | _: WriteToFile =>
-      queryExecution.toRdd
-      SparkLogicalPlan(queryExecution.executedPlan)
+    case _: Command | _: InsertIntoTable | _: CreateTableAsSelect[_] |_: WriteToFile =>
+      LogicalRDD(queryExecution.analyzed.output, queryExecution.toRdd)(sqlContext)
     case _ =>
       baseLogicalPlan
   }
@@ -83,10 +82,13 @@ private[sql] trait SchemaRDDLike {
    *
    * @group schema
    */
-  def registerAsTable(tableName: String): Unit = {
+  def registerTempTable(tableName: String): Unit = {
     sqlContext.registerRDDAsTable(baseSchemaRDD, tableName)
   }
 
+  @deprecated("Use registerTempTable instead of registerAsTable.", "1.1")
+  def registerAsTable(tableName: String): Unit = registerTempTable(tableName)
+
   /**
    * :: Experimental ::
    * Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
@@ -121,11 +123,17 @@ private[sql] trait SchemaRDDLike {
    */
   @Experimental
   def saveAsTable(tableName: String): Unit =
-    sqlContext.executePlan(InsertIntoCreatedTable(None, tableName, logicalPlan)).toRdd
+    sqlContext.executePlan(CreateTableAsSelect(None, tableName, logicalPlan, false)).toRdd
 
-  /** Returns the output schema in the tree format. */
-  def schemaString: String = queryExecution.analyzed.schemaString
+  /** Returns the schema as a string in the tree format.
+   *
+   * @group schema
+   */
+  def schemaString: String = baseSchemaRDD.schema.treeString
 
-  /** Prints out the schema in the tree format. */
+  /** Prints out the schema.
+   *
+   * @group schema
+   */
   def printSchema(): Unit = println(schemaString)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UdfRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UdfRegistration.scala
new file mode 100644
index 0000000000000..6d4c0d82ac7af
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UdfRegistration.scala
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.{List => JList, Map => JMap}
+
+import org.apache.spark.Accumulator
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUdf}
+import org.apache.spark.sql.execution.PythonUDF
+
+import scala.reflect.runtime.universe.{TypeTag, typeTag}
+
+/**
+ * Functions for registering scala lambda functions as UDFs in a SQLContext.
+ */
+private[sql] trait UDFRegistration {
+  self: SQLContext =>
+
+  private[spark] def registerPython(
+      name: String,
+      command: Array[Byte],
+      envVars: JMap[String, String],
+      pythonIncludes: JList[String],
+      pythonExec: String,
+      broadcastVars: JList[Broadcast[Array[Byte]]],
+      accumulator: Accumulator[JList[Array[Byte]]],
+      stringDataType: String): Unit = {
+    log.debug(
+      s"""
+        | Registering new PythonUDF:
+        | name: $name
+        | command: ${command.toSeq}
+        | envVars: $envVars
+        | pythonIncludes: $pythonIncludes
+        | pythonExec: $pythonExec
+        | dataType: $stringDataType
+      """.stripMargin)
+
+
+    val dataType = parseDataType(stringDataType)
+
+    def builder(e: Seq[Expression]) =
+      PythonUDF(
+        name,
+        command,
+        envVars,
+        pythonIncludes,
+        pythonExec,
+        broadcastVars,
+        accumulator,
+        dataType,
+        e)
+
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  /** registerFunction 1-22 were generated by this script
+
+    (1 to 22).map { x =>
+      val types = (1 to x).map(x => "_").reduce(_ + ", " + _)
+      s"""
+        def registerFunction[T: TypeTag](name: String, func: Function$x[$types, T]): Unit = {
+          def builder(e: Seq[Expression]) =
+            ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+          functionRegistry.registerFunction(name, builder)
+        }
+      """
+    }
+  */
+
+  // scalastyle:off
+  def registerFunction[T: TypeTag](name: String, func: Function1[_, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function2[_, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function3[_, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function4[_, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function5[_, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function6[_, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function7[_, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function8[_, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function9[_, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function10[_, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function11[_, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function12[_, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function13[_, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  def registerFunction[T: TypeTag](name: String, func: Function22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
+    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor[T].dataType, e)
+    functionRegistry.registerFunction(name, builder)
+  }
+  // scalastyle:on
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala
index 790d9ef22cf16..4c0869e05b029 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala
@@ -21,28 +21,43 @@ import java.beans.Introspector
 
 import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
-import org.apache.spark.sql.json.JsonRDD
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.{SQLContext, StructType => SStructType}
+import org.apache.spark.sql.catalyst.annotation.SQLUserDefinedType
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GenericRow, Row => ScalaRow}
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.execution.LogicalRDD
+import org.apache.spark.sql.json.JsonRDD
 import org.apache.spark.sql.parquet.ParquetRelation
-import org.apache.spark.sql.execution.{ExistingRdd, SparkLogicalPlan}
+import org.apache.spark.sql.sources.{LogicalRelation, BaseRelation}
+import org.apache.spark.sql.types.util.DataTypeConversions
+import org.apache.spark.sql.types.util.DataTypeConversions.asScalaDataType
 import org.apache.spark.util.Utils
 
 /**
  * The entry point for executing Spark SQL queries from a Java program.
  */
-class JavaSQLContext(val sqlContext: SQLContext) {
+class JavaSQLContext(val sqlContext: SQLContext) extends UDFRegistration {
 
   def this(sparkContext: JavaSparkContext) = this(new SQLContext(sparkContext.sc))
 
+  def baseRelationToSchemaRDD(baseRelation: BaseRelation): JavaSchemaRDD = {
+    new JavaSchemaRDD(sqlContext, LogicalRelation(baseRelation))
+  }
+
   /**
-   * Executes a query expressed in SQL, returning the result as a JavaSchemaRDD
+   * Executes a SQL query using Spark, returning the result as a SchemaRDD.  The dialect that is
+   * used for SQL parsing can be configured with 'spark.sql.dialect'.
+   *
+   * @group userf
    */
-  def sql(sqlQuery: String): JavaSchemaRDD =
-    new JavaSchemaRDD(sqlContext, sqlContext.parseSql(sqlQuery))
+  def sql(sqlText: String): JavaSchemaRDD = {
+    if (sqlContext.dialect == "sql") {
+      new JavaSchemaRDD(sqlContext, sqlContext.parseSql(sqlText))
+    } else {
+      sys.error(s"Unsupported SQL dialect: $sqlContext.dialect")
+    }
+  }
 
   /**
    * :: Experimental ::
@@ -52,7 +67,7 @@ class JavaSQLContext(val sqlContext: SQLContext) {
    * {{{
    *   JavaSQLContext sqlCtx = new JavaSQLContext(...)
    *
-   *   sqlCtx.createParquetFile(Person.class, "path/to/file.parquet").registerAsTable("people")
+   *   sqlCtx.createParquetFile(Person.class, "path/to/file.parquet").registerTempTable("people")
    *   sqlCtx.sql("INSERT INTO people SELECT 'michael', 29")
    * }}}
    *
@@ -72,14 +87,17 @@ class JavaSQLContext(val sqlContext: SQLContext) {
       conf: Configuration = new Configuration()): JavaSchemaRDD = {
     new JavaSchemaRDD(
       sqlContext,
-      ParquetRelation.createEmpty(path, getSchema(beanClass), allowExisting, conf))
+      ParquetRelation.createEmpty(path, getSchema(beanClass), allowExisting, conf, sqlContext))
   }
 
   /**
    * Applies a schema to an RDD of Java Beans.
+   *
+   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
+   *          SELECT * queries will return the columns in an undefined order.
    */
   def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): JavaSchemaRDD = {
-    val schema = getSchema(beanClass)
+    val attributeSeq = getSchema(beanClass)
     val className = beanClass.getName
     val rowRdd = rdd.rdd.mapPartitions { iter =>
       // BeanInfo is not serializable so we must rediscover it remotely for each partition.
@@ -89,10 +107,29 @@ class JavaSQLContext(val sqlContext: SQLContext) {
         localBeanInfo.getPropertyDescriptors.filterNot(_.getName == "class").map(_.getReadMethod)
 
       iter.map { row =>
-        new GenericRow(extractors.map(e => e.invoke(row)).toArray[Any]): ScalaRow
+        new GenericRow(
+          extractors.zip(attributeSeq).map { case (e, attr) =>
+            DataTypeConversions.convertJavaToCatalyst(e.invoke(row), attr.dataType)
+          }.toArray[Any]
+        ): ScalaRow
       }
     }
-    new JavaSchemaRDD(sqlContext, SparkLogicalPlan(ExistingRdd(schema, rowRdd)))
+    new JavaSchemaRDD(sqlContext, LogicalRDD(attributeSeq, rowRdd)(sqlContext))
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Creates a JavaSchemaRDD from an RDD containing Rows by applying a schema to this RDD.
+   * It is important to make sure that the structure of every Row of the provided RDD matches the
+   * provided schema. Otherwise, there will be runtime exception.
+   */
+  @DeveloperApi
+  def applySchema(rowRDD: JavaRDD[Row], schema: StructType): JavaSchemaRDD = {
+    val scalaRowRDD = rowRDD.rdd.map(r => r.row)
+    val scalaSchema = asScalaDataType(schema).asInstanceOf[SStructType]
+    val logicalPlan =
+      LogicalRDD(scalaSchema.toAttributes, scalaRowRDD)(sqlContext)
+    new JavaSchemaRDD(sqlContext, logicalPlan)
   }
 
   /**
@@ -101,26 +138,60 @@ class JavaSQLContext(val sqlContext: SQLContext) {
   def parquetFile(path: String): JavaSchemaRDD =
     new JavaSchemaRDD(
       sqlContext,
-      ParquetRelation(path, Some(sqlContext.sparkContext.hadoopConfiguration)))
+      ParquetRelation(path, Some(sqlContext.sparkContext.hadoopConfiguration), sqlContext))
 
   /**
-   * Loads a JSON file (one object per line), returning the result as a [[JavaSchemaRDD]].
+   * Loads a JSON file (one object per line), returning the result as a JavaSchemaRDD.
    * It goes through the entire dataset once to determine the schema.
-   *
-   * @group userf
    */
   def jsonFile(path: String): JavaSchemaRDD =
     jsonRDD(sqlContext.sparkContext.textFile(path))
 
+  /**
+   * :: Experimental ::
+   * Loads a JSON file (one object per line) and applies the given schema,
+   * returning the result as a JavaSchemaRDD.
+   */
+  @Experimental
+  def jsonFile(path: String, schema: StructType): JavaSchemaRDD =
+    jsonRDD(sqlContext.sparkContext.textFile(path), schema)
+
   /**
    * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
-   * [[JavaSchemaRDD]].
+   * JavaSchemaRDD.
    * It goes through the entire dataset once to determine the schema.
-   *
-   * @group userf
    */
-  def jsonRDD(json: JavaRDD[String]): JavaSchemaRDD =
-    new JavaSchemaRDD(sqlContext, JsonRDD.inferSchema(json, 1.0))
+  def jsonRDD(json: JavaRDD[String]): JavaSchemaRDD = {
+    val columnNameOfCorruptJsonRecord = sqlContext.columnNameOfCorruptRecord
+    val appliedScalaSchema =
+      JsonRDD.nullTypeToStringType(
+        JsonRDD.inferSchema(json.rdd, 1.0, columnNameOfCorruptJsonRecord))
+    val scalaRowRDD =
+      JsonRDD.jsonStringToRow(json.rdd, appliedScalaSchema, columnNameOfCorruptJsonRecord)
+    val logicalPlan =
+      LogicalRDD(appliedScalaSchema.toAttributes, scalaRowRDD)(sqlContext)
+    new JavaSchemaRDD(sqlContext, logicalPlan)
+  }
+
+  /**
+   * :: Experimental ::
+   * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
+   * returning the result as a JavaSchemaRDD.
+   */
+  @Experimental
+  def jsonRDD(json: JavaRDD[String], schema: StructType): JavaSchemaRDD = {
+    val columnNameOfCorruptJsonRecord = sqlContext.columnNameOfCorruptRecord
+    val appliedScalaSchema =
+      Option(asScalaDataType(schema)).getOrElse(
+        JsonRDD.nullTypeToStringType(
+          JsonRDD.inferSchema(
+            json.rdd, 1.0, columnNameOfCorruptJsonRecord))).asInstanceOf[SStructType]
+    val scalaRowRDD = JsonRDD.jsonStringToRow(
+      json.rdd, appliedScalaSchema, columnNameOfCorruptJsonRecord)
+    val logicalPlan =
+      LogicalRDD(appliedScalaSchema.toAttributes, scalaRowRDD)(sqlContext)
+    new JavaSchemaRDD(sqlContext, logicalPlan)
+  }
 
   /**
    * Registers the given RDD as a temporary table in the catalog.  Temporary tables exist only
@@ -130,30 +201,58 @@ class JavaSQLContext(val sqlContext: SQLContext) {
     sqlContext.registerRDDAsTable(rdd.baseSchemaRDD, tableName)
   }
 
-  /** Returns a Catalyst Schema for the given java bean class. */
+  /**
+   * Returns a Catalyst Schema for the given java bean class.
+   */
   protected def getSchema(beanClass: Class[_]): Seq[AttributeReference] = {
     // TODO: All of this could probably be moved to Catalyst as it is mostly not Spark specific.
     val beanInfo = Introspector.getBeanInfo(beanClass)
 
+    // Note: The ordering of elements may differ from when the schema is inferred in Scala.
+    //       This is because beanInfo.getPropertyDescriptors gives no guarantees about
+    //       element ordering.
     val fields = beanInfo.getPropertyDescriptors.filterNot(_.getName == "class")
     fields.map { property =>
       val (dataType, nullable) = property.getPropertyType match {
-        case c: Class[_] if c == classOf[java.lang.String] => (StringType, true)
-        case c: Class[_] if c == java.lang.Short.TYPE => (ShortType, false)
-        case c: Class[_] if c == java.lang.Integer.TYPE => (IntegerType, false)
-        case c: Class[_] if c == java.lang.Long.TYPE => (LongType, false)
-        case c: Class[_] if c == java.lang.Double.TYPE => (DoubleType, false)
-        case c: Class[_] if c == java.lang.Byte.TYPE => (ByteType, false)
-        case c: Class[_] if c == java.lang.Float.TYPE => (FloatType, false)
-        case c: Class[_] if c == java.lang.Boolean.TYPE => (BooleanType, false)
-
-        case c: Class[_] if c == classOf[java.lang.Short] => (ShortType, true)
-        case c: Class[_] if c == classOf[java.lang.Integer] => (IntegerType, true)
-        case c: Class[_] if c == classOf[java.lang.Long] => (LongType, true)
-        case c: Class[_] if c == classOf[java.lang.Double] => (DoubleType, true)
-        case c: Class[_] if c == classOf[java.lang.Byte] => (ByteType, true)
-        case c: Class[_] if c == classOf[java.lang.Float] => (FloatType, true)
-        case c: Class[_] if c == classOf[java.lang.Boolean] => (BooleanType, true)
+        case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) =>
+          (c.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance(), true)
+        case c: Class[_] if c == classOf[java.lang.String] =>
+          (org.apache.spark.sql.StringType, true)
+        case c: Class[_] if c == java.lang.Short.TYPE =>
+          (org.apache.spark.sql.ShortType, false)
+        case c: Class[_] if c == java.lang.Integer.TYPE =>
+          (org.apache.spark.sql.IntegerType, false)
+        case c: Class[_] if c == java.lang.Long.TYPE =>
+          (org.apache.spark.sql.LongType, false)
+        case c: Class[_] if c == java.lang.Double.TYPE =>
+          (org.apache.spark.sql.DoubleType, false)
+        case c: Class[_] if c == java.lang.Byte.TYPE =>
+          (org.apache.spark.sql.ByteType, false)
+        case c: Class[_] if c == java.lang.Float.TYPE =>
+          (org.apache.spark.sql.FloatType, false)
+        case c: Class[_] if c == java.lang.Boolean.TYPE =>
+          (org.apache.spark.sql.BooleanType, false)
+
+        case c: Class[_] if c == classOf[java.lang.Short] =>
+          (org.apache.spark.sql.ShortType, true)
+        case c: Class[_] if c == classOf[java.lang.Integer] =>
+          (org.apache.spark.sql.IntegerType, true)
+        case c: Class[_] if c == classOf[java.lang.Long] =>
+          (org.apache.spark.sql.LongType, true)
+        case c: Class[_] if c == classOf[java.lang.Double] =>
+          (org.apache.spark.sql.DoubleType, true)
+        case c: Class[_] if c == classOf[java.lang.Byte] =>
+          (org.apache.spark.sql.ByteType, true)
+        case c: Class[_] if c == classOf[java.lang.Float] =>
+          (org.apache.spark.sql.FloatType, true)
+        case c: Class[_] if c == classOf[java.lang.Boolean] =>
+          (org.apache.spark.sql.BooleanType, true)
+        case c: Class[_] if c == classOf[java.math.BigDecimal] =>
+          (org.apache.spark.sql.DecimalType(), true)
+        case c: Class[_] if c == classOf[java.sql.Date] =>
+          (org.apache.spark.sql.DateType, true)
+        case c: Class[_] if c == classOf[java.sql.Timestamp] =>
+          (org.apache.spark.sql.TimestampType, true)
       }
       AttributeReference(property.getName, dataType, nullable)()
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala
index aff6ffe9f3478..78e8d908fe0c8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala
@@ -17,11 +17,15 @@
 
 package org.apache.spark.sql.api.java
 
+import java.util.{List => JList}
+
 import org.apache.spark.Partitioner
 import org.apache.spark.api.java.{JavaRDDLike, JavaRDD}
 import org.apache.spark.api.java.function.{Function => JFunction}
+import org.apache.spark.sql.types.util.DataTypeConversions
 import org.apache.spark.sql.{SQLContext, SchemaRDD, SchemaRDDLike}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import DataTypeConversions._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
@@ -43,6 +47,9 @@ class JavaSchemaRDD(
 
   private[sql] val baseSchemaRDD = new SchemaRDD(sqlContext, logicalPlan)
 
+  /** Returns the underlying Scala SchemaRDD. */
+  val schemaRDD: SchemaRDD = baseSchemaRDD
+
   override val classTag = scala.reflect.classTag[Row]
 
   override def wrapRDD(rdd: RDD[Row]): JavaRDD[Row] = JavaRDD.fromRDD(rdd)
@@ -51,6 +58,10 @@ class JavaSchemaRDD(
 
   override def toString: String = baseSchemaRDD.toString
 
+  /** Returns the schema of this JavaSchemaRDD (represented by a StructType). */
+  def schema: StructType =
+    asJavaDataType(baseSchemaRDD.schema).asInstanceOf[StructType]
+
   // =======================================================================
   // Base RDD functions that do NOT change schema
   // =======================================================================
@@ -96,6 +107,22 @@ class JavaSchemaRDD(
     this
   }
 
+  // Overridden actions from JavaRDDLike.
+
+  override def collect(): JList[Row] = {
+    import scala.collection.JavaConversions._
+    val arr: java.util.Collection[Row] = baseSchemaRDD.collect().toSeq.map(new Row(_))
+    new java.util.ArrayList(arr)
+  }
+
+  override def count(): Long = baseSchemaRDD.count
+
+  override def take(num: Int): JList[Row] = {
+    import scala.collection.JavaConversions._
+    val arr: java.util.Collection[Row] = baseSchemaRDD.take(num).toSeq.map(new Row(_))
+    new java.util.ArrayList(arr)
+  }
+
   // Transformations (return a new RDD)
 
   /**
@@ -169,7 +196,7 @@ class JavaSchemaRDD(
    * Return an RDD with the elements from `this` that are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
+   * RDD will be &lt;= us.
    */
   def subtract(other: JavaSchemaRDD): JavaSchemaRDD =
     this.baseSchemaRDD.subtract(other.baseSchemaRDD).toJavaSchemaRDD
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala
index 9b0dd2176149b..401798e317e96 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala
@@ -17,10 +17,18 @@
 
 package org.apache.spark.sql.api.java
 
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+
+import scala.annotation.varargs
+import scala.collection.convert.Wrappers.{JListWrapper, JMapWrapper}
+import scala.collection.JavaConversions
+import scala.math.BigDecimal
+
+import org.apache.spark.api.java.JavaUtils.mapAsSerializableJavaMap
 import org.apache.spark.sql.catalyst.expressions.{Row => ScalaRow}
 
 /**
- * A result row from a SparkSQL query.
+ * A result row from a Spark SQL query.
  */
 class Row(private[spark] val row: ScalaRow) extends Serializable {
 
@@ -29,7 +37,7 @@ class Row(private[spark] val row: ScalaRow) extends Serializable {
 
   /** Returns the value of column `i`. */
   def get(i: Int): Any =
-    row(i)
+    Row.toJavaValue(row(i))
 
   /** Returns true if value at column `i` is NULL. */
   def isNullAt(i: Int) = get(i) == null
@@ -89,5 +97,59 @@ class Row(private[spark] val row: ScalaRow) extends Serializable {
    */
   def getString(i: Int): String =
     row.getString(i)
+
+  def canEqual(other: Any): Boolean = other.isInstanceOf[Row]
+
+  override def equals(other: Any): Boolean = other match {
+    case that: Row =>
+      (that canEqual this) &&
+        row == that.row
+    case _ => false
+  }
+
+  override def hashCode(): Int = row.hashCode()
+
+  override def toString: String = row.toString
 }
 
+object Row {
+
+  private def toJavaValue(value: Any): Any = value match {
+    // For values of this ScalaRow, we will do the conversion when
+    // they are actually accessed.
+    case row: ScalaRow => new Row(row)
+    case map: scala.collection.Map[_, _] =>
+      mapAsSerializableJavaMap(
+        map.map {
+          case (key, value) => (toJavaValue(key), toJavaValue(value))
+        }
+      )
+    case seq: scala.collection.Seq[_] =>
+      JavaConversions.seqAsJavaList(seq.map(toJavaValue))
+    case decimal: BigDecimal => decimal.underlying()
+    case other => other
+  }
+
+  // TODO: Consolidate the toScalaValue at here with the scalafy in JsonRDD?
+  private def toScalaValue(value: Any): Any = value match {
+    // Values of this row have been converted to Scala values.
+    case row: Row => row.row
+    case map: java.util.Map[_, _] =>
+      JMapWrapper(map).map {
+        case (key, value) => (toScalaValue(key), toScalaValue(value))
+      }
+    case list: java.util.List[_] =>
+      JListWrapper(list).map(toScalaValue)
+    case decimal: java.math.BigDecimal => BigDecimal(decimal)
+    case other => other
+  }
+
+  /**
+   * Creates a Row with the given values.
+   */
+  @varargs def create(values: Any*): Row = {
+    // Right now, we cannot use @varargs to annotate the constructor of
+    // org.apache.spark.sql.api.java.Row. See https://issues.scala-lang.org/browse/SI-8383.
+    new Row(ScalaRow(values.map(toScalaValue):_*))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/UDFRegistration.scala
new file mode 100644
index 0000000000000..158f26e3d445f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/UDFRegistration.scala
@@ -0,0 +1,252 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.api.java
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUdf}
+import org.apache.spark.sql.types.util.DataTypeConversions._
+
+/**
+ * A collection of functions that allow Java users to register UDFs.  In order to handle functions
+ * of varying airities with minimal boilerplate for our users, we generate classes and functions
+ * for each airity up to 22.  The code for this generation can be found in comments in this trait.
+ */
+private[java] trait UDFRegistration {
+  self: JavaSQLContext =>
+
+  /* The following functions and required interfaces are generated with these code fragments:
+
+   (1 to 22).foreach { i =>
+     val extTypeArgs = (1 to i).map(_ => "_").mkString(", ")
+     val anyTypeArgs = (1 to i).map(_ => "Any").mkString(", ")
+     val anyCast = s".asInstanceOf[UDF$i[$anyTypeArgs, Any]]"
+     val anyParams = (1 to i).map(_ => "_: Any").mkString(", ")
+     println(s"""
+         |def registerFunction(
+         |    name: String, f: UDF$i[$extTypeArgs, _], @transient dataType: DataType) = {
+         |  val scalaType = asScalaDataType(dataType)
+         |  sqlContext.functionRegistry.registerFunction(
+         |    name,
+         |    (e: Seq[Expression]) => ScalaUdf(f$anyCast.call($anyParams), scalaType, e))
+         |}
+       """.stripMargin)
+   }
+
+  import java.io.File
+  import org.apache.spark.sql.catalyst.util.stringToFile
+  val directory = new File("sql/core/src/main/java/org/apache/spark/sql/api/java/")
+  (1 to 22).foreach { i =>
+    val typeArgs = (1 to i).map(i => s"T$i").mkString(", ")
+    val args = (1 to i).map(i => s"T$i t$i").mkString(", ")
+
+    val contents =
+      s"""/*
+         | * Licensed to the Apache Software Foundation (ASF) under one or more
+         | * contributor license agreements.  See the NOTICE file distributed with
+         | * this work for additional information regarding copyright ownership.
+         | * The ASF licenses this file to You under the Apache License, Version 2.0
+         | * (the "License"); you may not use this file except in compliance with
+         | * the License.  You may obtain a copy of the License at
+         | *
+         | *    http://www.apache.org/licenses/LICENSE-2.0
+         | *
+         | * Unless required by applicable law or agreed to in writing, software
+         | * distributed under the License is distributed on an "AS IS" BASIS,
+         | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+         | * See the License for the specific language governing permissions and
+         | * limitations under the License.
+         | */
+         |
+         |package org.apache.spark.sql.api.java;
+         |
+         |import java.io.Serializable;
+         |
+         |// **************************************************
+         |// THIS FILE IS AUTOGENERATED BY CODE IN
+         |// org.apache.spark.sql.api.java.FunctionRegistration
+         |// **************************************************
+         |
+         |/**
+         | * A Spark SQL UDF that has $i arguments.
+         | */
+         |public interface UDF$i<$typeArgs, R> extends Serializable {
+         |  public R call($args) throws Exception;
+         |}
+         |""".stripMargin
+
+      stringToFile(new File(directory, s"UDF$i.java"), contents)
+  }
+
+  */
+
+  // scalastyle:off
+  def registerFunction(name: String, f: UDF1[_, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF1[Any, Any]].call(_: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF2[_, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF3[_, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF4[_, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF5[_, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF6[_, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF7[_, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF17[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF18[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF19[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF20[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF21[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  def registerFunction(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
+    val scalaType = asScalaDataType(dataType)
+    sqlContext.functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF22[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
+  }
+
+  // scalastyle:on
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/UDTWrappers.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/UDTWrappers.scala
new file mode 100644
index 0000000000000..a7d0f4f127ecc
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/UDTWrappers.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java
+
+import org.apache.spark.sql.catalyst.types.{UserDefinedType => ScalaUserDefinedType}
+import org.apache.spark.sql.{DataType => ScalaDataType}
+import org.apache.spark.sql.types.util.DataTypeConversions
+
+/**
+ * Scala wrapper for a Java UserDefinedType
+ */
+private[sql] class JavaToScalaUDTWrapper[UserType](val javaUDT: UserDefinedType[UserType])
+  extends ScalaUserDefinedType[UserType] with Serializable {
+
+  /** Underlying storage type for this UDT */
+  val sqlType: ScalaDataType = DataTypeConversions.asScalaDataType(javaUDT.sqlType())
+
+  /** Convert the user type to a SQL datum */
+  def serialize(obj: Any): Any = javaUDT.serialize(obj)
+
+  /** Convert a SQL datum to the user type */
+  def deserialize(datum: Any): UserType = javaUDT.deserialize(datum)
+
+  val userClass: java.lang.Class[UserType] = javaUDT.userClass()
+}
+
+/**
+ * Java wrapper for a Scala UserDefinedType
+ */
+private[sql] class ScalaToJavaUDTWrapper[UserType](val scalaUDT: ScalaUserDefinedType[UserType])
+  extends UserDefinedType[UserType] with Serializable {
+
+  /** Underlying storage type for this UDT */
+  val sqlType: DataType = DataTypeConversions.asJavaDataType(scalaUDT.sqlType)
+
+  /** Convert the user type to a SQL datum */
+  def serialize(obj: Any): java.lang.Object = scalaUDT.serialize(obj).asInstanceOf[java.lang.Object]
+
+  /** Convert a SQL datum to the user type */
+  def deserialize(datum: Any): UserType = scalaUDT.deserialize(datum)
+
+  val userClass: java.lang.Class[UserType] = scalaUDT.userClass
+}
+
+private[sql] object UDTWrappers {
+
+  def wrapAsScala(udtType: UserDefinedType[_]): ScalaUserDefinedType[_] = {
+    udtType match {
+      case t: ScalaToJavaUDTWrapper[_] => t.scalaUDT
+      case _ => new JavaToScalaUDTWrapper(udtType)
+    }
+  }
+
+  def wrapAsJava(udtType: ScalaUserDefinedType[_]): UserDefinedType[_] = {
+    udtType match {
+      case t: JavaToScalaUDTWrapper[_] => t.javaUDT
+      case _ => new ScalaToJavaUDTWrapper(udtType)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala
index 3c39e1d350fa8..538dd5b734664 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala
@@ -50,11 +50,13 @@ private[sql] abstract class BasicColumnAccessor[T <: DataType, JvmType](
 
   def hasNext = buffer.hasRemaining
 
-  def extractTo(row: MutableRow, ordinal: Int) {
-    columnType.setField(row, ordinal, extractSingle(buffer))
+  def extractTo(row: MutableRow, ordinal: Int): Unit = {
+    extractSingle(row, ordinal)
   }
 
-  def extractSingle(buffer: ByteBuffer): JvmType = columnType.extract(buffer)
+  def extractSingle(row: MutableRow, ordinal: Int): Unit = {
+    columnType.extract(buffer, row, ordinal)
+  }
 
   protected def underlyingBuffer = buffer
 }
@@ -90,6 +92,12 @@ private[sql] class FloatColumnAccessor(buffer: ByteBuffer)
 private[sql] class StringColumnAccessor(buffer: ByteBuffer)
   extends NativeColumnAccessor(buffer, STRING)
 
+private[sql] class DateColumnAccessor(buffer: ByteBuffer)
+  extends NativeColumnAccessor(buffer, DATE)
+
+private[sql] class TimestampColumnAccessor(buffer: ByteBuffer)
+  extends NativeColumnAccessor(buffer, TIMESTAMP)
+
 private[sql] class BinaryColumnAccessor(buffer: ByteBuffer)
   extends BasicColumnAccessor[BinaryType.type, Array[Byte]](buffer, BINARY)
   with NullableColumnAccessor
@@ -105,16 +113,18 @@ private[sql] object ColumnAccessor {
     val columnTypeId = dup.getInt()
 
     columnTypeId match {
-      case INT.typeId     => new IntColumnAccessor(dup)
-      case LONG.typeId    => new LongColumnAccessor(dup)
-      case FLOAT.typeId   => new FloatColumnAccessor(dup)
-      case DOUBLE.typeId  => new DoubleColumnAccessor(dup)
-      case BOOLEAN.typeId => new BooleanColumnAccessor(dup)
-      case BYTE.typeId    => new ByteColumnAccessor(dup)
-      case SHORT.typeId   => new ShortColumnAccessor(dup)
-      case STRING.typeId  => new StringColumnAccessor(dup)
-      case BINARY.typeId  => new BinaryColumnAccessor(dup)
-      case GENERIC.typeId => new GenericColumnAccessor(dup)
+      case INT.typeId       => new IntColumnAccessor(dup)
+      case LONG.typeId      => new LongColumnAccessor(dup)
+      case FLOAT.typeId     => new FloatColumnAccessor(dup)
+      case DOUBLE.typeId    => new DoubleColumnAccessor(dup)
+      case BOOLEAN.typeId   => new BooleanColumnAccessor(dup)
+      case BYTE.typeId      => new ByteColumnAccessor(dup)
+      case SHORT.typeId     => new ShortColumnAccessor(dup)
+      case STRING.typeId    => new StringColumnAccessor(dup)
+      case DATE.typeId      => new DateColumnAccessor(dup)
+      case TIMESTAMP.typeId => new TimestampColumnAccessor(dup)
+      case BINARY.typeId    => new BinaryColumnAccessor(dup)
+      case GENERIC.typeId   => new GenericColumnAccessor(dup)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala
index 4be048cd742d6..c68dceef3b142 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala
@@ -38,7 +38,7 @@ private[sql] trait ColumnBuilder {
   /**
    * Column statistics information
    */
-  def columnStats: ColumnStats[_, _]
+  def columnStats: ColumnStats
 
   /**
    * Returns the final columnar byte buffer.
@@ -47,7 +47,7 @@ private[sql] trait ColumnBuilder {
 }
 
 private[sql] class BasicColumnBuilder[T <: DataType, JvmType](
-    val columnStats: ColumnStats[T, JvmType],
+    val columnStats: ColumnStats,
     val columnType: ColumnType[T, JvmType])
   extends ColumnBuilder {
 
@@ -68,25 +68,24 @@ private[sql] class BasicColumnBuilder[T <: DataType, JvmType](
     buffer.order(ByteOrder.nativeOrder()).putInt(columnType.typeId)
   }
 
-  override def appendFrom(row: Row, ordinal: Int) {
-    val field = columnType.getField(row, ordinal)
-    buffer = ensureFreeSpace(buffer, columnType.actualSize(field))
-    columnType.append(field, buffer)
+  override def appendFrom(row: Row, ordinal: Int): Unit = {
+    buffer = ensureFreeSpace(buffer, columnType.actualSize(row, ordinal))
+    columnType.append(row, ordinal, buffer)
   }
 
   override def build() = {
-    buffer.limit(buffer.position()).rewind()
-    buffer
+    buffer.flip().asInstanceOf[ByteBuffer]
   }
 }
 
 private[sql] abstract class ComplexColumnBuilder[T <: DataType, JvmType](
+    columnStats: ColumnStats,
     columnType: ColumnType[T, JvmType])
-  extends BasicColumnBuilder[T, JvmType](new NoopColumnStats[T, JvmType], columnType)
+  extends BasicColumnBuilder[T, JvmType](columnStats, columnType)
   with NullableColumnBuilder
 
 private[sql] abstract class NativeColumnBuilder[T <: NativeType](
-    override val columnStats: NativeColumnStats[T],
+    override val columnStats: ColumnStats,
     override val columnType: NativeColumnType[T])
   extends BasicColumnBuilder[T, T#JvmType](columnStats, columnType)
   with NullableColumnBuilder
@@ -109,13 +108,19 @@ private[sql] class FloatColumnBuilder extends NativeColumnBuilder(new FloatColum
 
 private[sql] class StringColumnBuilder extends NativeColumnBuilder(new StringColumnStats, STRING)
 
-private[sql] class BinaryColumnBuilder extends ComplexColumnBuilder(BINARY)
+private[sql] class DateColumnBuilder extends NativeColumnBuilder(new DateColumnStats, DATE)
+
+private[sql] class TimestampColumnBuilder
+  extends NativeColumnBuilder(new TimestampColumnStats, TIMESTAMP)
+
+private[sql] class BinaryColumnBuilder extends ComplexColumnBuilder(new BinaryColumnStats, BINARY)
 
 // TODO (lian) Add support for array, struct and map
-private[sql] class GenericColumnBuilder extends ComplexColumnBuilder(GENERIC)
+private[sql] class GenericColumnBuilder
+  extends ComplexColumnBuilder(new GenericColumnStats, GENERIC)
 
 private[sql] object ColumnBuilder {
-  val DEFAULT_INITIAL_BUFFER_SIZE = 10 * 1024 * 104
+  val DEFAULT_INITIAL_BUFFER_SIZE = 1024 * 1024
 
   private[columnar] def ensureFreeSpace(orig: ByteBuffer, size: Int) = {
     if (orig.remaining >= size) {
@@ -126,7 +131,6 @@ private[sql] object ColumnBuilder {
       val newSize = capacity + size.max(capacity / 8 + 1)
       val pos = orig.position()
 
-      orig.clear()
       ByteBuffer
         .allocate(newSize)
         .order(ByteOrder.nativeOrder())
@@ -141,16 +145,18 @@ private[sql] object ColumnBuilder {
       useCompression: Boolean = false): ColumnBuilder = {
 
     val builder = (typeId match {
-      case INT.typeId     => new IntColumnBuilder
-      case LONG.typeId    => new LongColumnBuilder
-      case FLOAT.typeId   => new FloatColumnBuilder
-      case DOUBLE.typeId  => new DoubleColumnBuilder
-      case BOOLEAN.typeId => new BooleanColumnBuilder
-      case BYTE.typeId    => new ByteColumnBuilder
-      case SHORT.typeId   => new ShortColumnBuilder
-      case STRING.typeId  => new StringColumnBuilder
-      case BINARY.typeId  => new BinaryColumnBuilder
-      case GENERIC.typeId => new GenericColumnBuilder
+      case INT.typeId       => new IntColumnBuilder
+      case LONG.typeId      => new LongColumnBuilder
+      case FLOAT.typeId     => new FloatColumnBuilder
+      case DOUBLE.typeId    => new DoubleColumnBuilder
+      case BOOLEAN.typeId   => new BooleanColumnBuilder
+      case BYTE.typeId      => new ByteColumnBuilder
+      case SHORT.typeId     => new ShortColumnBuilder
+      case STRING.typeId    => new StringColumnBuilder
+      case BINARY.typeId    => new BinaryColumnBuilder
+      case GENERIC.typeId   => new GenericColumnBuilder
+      case DATE.typeId      => new DateColumnBuilder
+      case TIMESTAMP.typeId => new TimestampColumnBuilder
     }).asInstanceOf[ColumnBuilder]
 
     builder.initialize(initialSize, columnName, useCompression)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
index 95602d321dc6f..668efe4a3b2a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
@@ -17,350 +17,256 @@
 
 package org.apache.spark.sql.columnar
 
+import java.sql.{Date, Timestamp}
+
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.{AttributeMap, Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.types._
 
+private[sql] class ColumnStatisticsSchema(a: Attribute) extends Serializable {
+  val upperBound = AttributeReference(a.name + ".upperBound", a.dataType, nullable = true)()
+  val lowerBound = AttributeReference(a.name + ".lowerBound", a.dataType, nullable = true)()
+  val nullCount = AttributeReference(a.name + ".nullCount", IntegerType, nullable = false)()
+  val count = AttributeReference(a.name + ".count", IntegerType, nullable = false)()
+  val sizeInBytes = AttributeReference(a.name + ".sizeInBytes", LongType, nullable = false)()
+
+  val schema = Seq(lowerBound, upperBound, nullCount, count, sizeInBytes)
+}
+
+private[sql] class PartitionStatistics(tableSchema: Seq[Attribute]) extends Serializable {
+  val (forAttribute, schema) = {
+    val allStats = tableSchema.map(a => a -> new ColumnStatisticsSchema(a))
+    (AttributeMap(allStats), allStats.map(_._2.schema).foldLeft(Seq.empty[Attribute])(_ ++ _))
+  }
+}
+
 /**
  * Used to collect statistical information when building in-memory columns.
  *
  * NOTE: we intentionally avoid using `Ordering[T]` to compare values here because `Ordering[T]`
  * brings significant performance penalty.
  */
-private[sql] sealed abstract class ColumnStats[T <: DataType, JvmType] extends Serializable {
-  /**
-   * Closed lower bound of this column.
-   */
-  def lowerBound: JvmType
-
-  /**
-   * Closed upper bound of this column.
-   */
-  def upperBound: JvmType
+private[sql] sealed trait ColumnStats extends Serializable {
+  protected var count = 0
+  protected var nullCount = 0
+  protected var sizeInBytes = 0L
 
   /**
    * Gathers statistics information from `row(ordinal)`.
    */
-  def gatherStats(row: Row, ordinal: Int)
-
-  /**
-   * Returns `true` if `lower <= row(ordinal) <= upper`.
-   */
-  def contains(row: Row, ordinal: Int): Boolean
-
-  /**
-   * Returns `true` if `row(ordinal) < upper` holds.
-   */
-  def isAbove(row: Row, ordinal: Int): Boolean
-
-  /**
-   * Returns `true` if `lower < row(ordinal)` holds.
-   */
-  def isBelow(row: Row, ordinal: Int): Boolean
-
-  /**
-   * Returns `true` if `row(ordinal) <= upper` holds.
-   */
-  def isAtOrAbove(row: Row, ordinal: Int): Boolean
+  def gatherStats(row: Row, ordinal: Int): Unit = {
+    if (row.isNullAt(ordinal)) {
+      nullCount += 1
+      // 4 bytes for null position
+      sizeInBytes += 4
+    }
+    count += 1
+  }
 
   /**
-   * Returns `true` if `lower <= row(ordinal)` holds.
+   * Column statistics represented as a single row, currently including closed lower bound, closed
+   * upper bound and null count.
    */
-  def isAtOrBelow(row: Row, ordinal: Int): Boolean
-}
-
-private[sql] sealed abstract class NativeColumnStats[T <: NativeType]
-  extends ColumnStats[T, T#JvmType] {
-
-  type JvmType = T#JvmType
-
-  protected var (_lower, _upper) = initialBounds
-
-  def initialBounds: (JvmType, JvmType)
-
-  protected def columnType: NativeColumnType[T]
-
-  override def lowerBound: T#JvmType = _lower
-
-  override def upperBound: T#JvmType = _upper
-
-  override def isAtOrAbove(row: Row, ordinal: Int) = {
-    contains(row, ordinal) || isAbove(row, ordinal)
-  }
-
-  override def isAtOrBelow(row: Row, ordinal: Int) = {
-    contains(row, ordinal) || isBelow(row, ordinal)
-  }
+  def collectedStatistics: Row
 }
 
-private[sql] class NoopColumnStats[T <: DataType, JvmType] extends ColumnStats[T, JvmType] {
-  override def isAtOrBelow(row: Row, ordinal: Int) = true
-
-  override def isAtOrAbove(row: Row, ordinal: Int) = true
-
-  override def isBelow(row: Row, ordinal: Int) = true
-
-  override def isAbove(row: Row, ordinal: Int) = true
-
-  override def contains(row: Row, ordinal: Int) = true
-
-  override def gatherStats(row: Row, ordinal: Int) {}
-
-  override def upperBound = null.asInstanceOf[JvmType]
+/**
+ * A no-op ColumnStats only used for testing purposes.
+ */
+private[sql] class NoopColumnStats extends ColumnStats {
+  override def gatherStats(row: Row, ordinal: Int): Unit = super.gatherStats(row, ordinal)
 
-  override def lowerBound = null.asInstanceOf[JvmType]
+  def collectedStatistics = Row(null, null, nullCount, count, 0L)
 }
 
-private[sql] abstract class BasicColumnStats[T <: NativeType](
-    protected val columnType: NativeColumnType[T])
-  extends NativeColumnStats[T]
-
-private[sql] class BooleanColumnStats extends BasicColumnStats(BOOLEAN) {
-  override def initialBounds = (true, false)
-
-  override def isBelow(row: Row, ordinal: Int) = {
-    lowerBound < columnType.getField(row, ordinal)
-  }
-
-  override def isAbove(row: Row, ordinal: Int) = {
-    columnType.getField(row, ordinal) < upperBound
-  }
-
-  override def contains(row: Row, ordinal: Int) = {
-    val field = columnType.getField(row, ordinal)
-    lowerBound <= field && field <= upperBound
+private[sql] class BooleanColumnStats extends ColumnStats {
+  protected var upper = false
+  protected var lower = true
+
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      val value = row.getBoolean(ordinal)
+      if (value > upper) upper = value
+      if (value < lower) lower = value
+      sizeInBytes += BOOLEAN.defaultSize
+    }
   }
 
-  override def gatherStats(row: Row, ordinal: Int) {
-    val field = columnType.getField(row, ordinal)
-    if (field > upperBound) _upper = field
-    if (field < lowerBound) _lower = field
-  }
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
-private[sql] class ByteColumnStats extends BasicColumnStats(BYTE) {
-  override def initialBounds = (Byte.MaxValue, Byte.MinValue)
-
-  override def isBelow(row: Row, ordinal: Int) = {
-    lowerBound < columnType.getField(row, ordinal)
-  }
-
-  override def isAbove(row: Row, ordinal: Int) = {
-    columnType.getField(row, ordinal) < upperBound
-  }
-
-  override def contains(row: Row, ordinal: Int) = {
-    val field = columnType.getField(row, ordinal)
-    lowerBound <= field && field <= upperBound
+private[sql] class ByteColumnStats extends ColumnStats {
+  protected var upper = Byte.MinValue
+  protected var lower = Byte.MaxValue
+
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      val value = row.getByte(ordinal)
+      if (value > upper) upper = value
+      if (value < lower) lower = value
+      sizeInBytes += BYTE.defaultSize
+    }
   }
 
-  override def gatherStats(row: Row, ordinal: Int) {
-    val field = columnType.getField(row, ordinal)
-    if (field > upperBound) _upper = field
-    if (field < lowerBound) _lower = field
-  }
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
-private[sql] class ShortColumnStats extends BasicColumnStats(SHORT) {
-  override def initialBounds = (Short.MaxValue, Short.MinValue)
-
-  override def isBelow(row: Row, ordinal: Int) = {
-    lowerBound < columnType.getField(row, ordinal)
-  }
-
-  override def isAbove(row: Row, ordinal: Int) = {
-    columnType.getField(row, ordinal) < upperBound
-  }
-
-  override def contains(row: Row, ordinal: Int) = {
-    val field = columnType.getField(row, ordinal)
-    lowerBound <= field && field <= upperBound
+private[sql] class ShortColumnStats extends ColumnStats {
+  protected var upper = Short.MinValue
+  protected var lower = Short.MaxValue
+
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      val value = row.getShort(ordinal)
+      if (value > upper) upper = value
+      if (value < lower) lower = value
+      sizeInBytes += SHORT.defaultSize
+    }
   }
 
-  override def gatherStats(row: Row, ordinal: Int) {
-    val field = columnType.getField(row, ordinal)
-    if (field > upperBound) _upper = field
-    if (field < lowerBound) _lower = field
-  }
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
-private[sql] class LongColumnStats extends BasicColumnStats(LONG) {
-  override def initialBounds = (Long.MaxValue, Long.MinValue)
-
-  override def isBelow(row: Row, ordinal: Int) = {
-    lowerBound < columnType.getField(row, ordinal)
+private[sql] class LongColumnStats extends ColumnStats {
+  protected var upper = Long.MinValue
+  protected var lower = Long.MaxValue
+
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      val value = row.getLong(ordinal)
+      if (value > upper) upper = value
+      if (value < lower) lower = value
+      sizeInBytes += LONG.defaultSize
+    }
   }
 
-  override def isAbove(row: Row, ordinal: Int) = {
-    columnType.getField(row, ordinal) < upperBound
-  }
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+}
 
-  override def contains(row: Row, ordinal: Int) = {
-    val field = columnType.getField(row, ordinal)
-    lowerBound <= field && field <= upperBound
+private[sql] class DoubleColumnStats extends ColumnStats {
+  protected var upper = Double.MinValue
+  protected var lower = Double.MaxValue
+
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      val value = row.getDouble(ordinal)
+      if (value > upper) upper = value
+      if (value < lower) lower = value
+      sizeInBytes += DOUBLE.defaultSize
+    }
   }
 
-  override def gatherStats(row: Row, ordinal: Int) {
-    val field = columnType.getField(row, ordinal)
-    if (field > upperBound) _upper = field
-    if (field < lowerBound) _lower = field
-  }
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
-private[sql] class DoubleColumnStats extends BasicColumnStats(DOUBLE) {
-  override def initialBounds = (Double.MaxValue, Double.MinValue)
-
-  override def isBelow(row: Row, ordinal: Int) = {
-    lowerBound < columnType.getField(row, ordinal)
+private[sql] class FloatColumnStats extends ColumnStats {
+  protected var upper = Float.MinValue
+  protected var lower = Float.MaxValue
+
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      val value = row.getFloat(ordinal)
+      if (value > upper) upper = value
+      if (value < lower) lower = value
+      sizeInBytes += FLOAT.defaultSize
+    }
   }
 
-  override def isAbove(row: Row, ordinal: Int) = {
-    columnType.getField(row, ordinal) < upperBound
-  }
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+}
 
-  override def contains(row: Row, ordinal: Int) = {
-    val field = columnType.getField(row, ordinal)
-    lowerBound <= field && field <= upperBound
+private[sql] class IntColumnStats extends ColumnStats {
+  protected var upper = Int.MinValue
+  protected var lower = Int.MaxValue
+
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      val value = row.getInt(ordinal)
+      if (value > upper) upper = value
+      if (value < lower) lower = value
+      sizeInBytes += INT.defaultSize
+    }
   }
 
-  override def gatherStats(row: Row, ordinal: Int) {
-    val field = columnType.getField(row, ordinal)
-    if (field > upperBound) _upper = field
-    if (field < lowerBound) _lower = field
-  }
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
-private[sql] class FloatColumnStats extends BasicColumnStats(FLOAT) {
-  override def initialBounds = (Float.MaxValue, Float.MinValue)
-
-  override def isBelow(row: Row, ordinal: Int) = {
-    lowerBound < columnType.getField(row, ordinal)
+private[sql] class StringColumnStats extends ColumnStats {
+  protected var upper: String = null
+  protected var lower: String = null
+
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      val value = row.getString(ordinal)
+      if (upper == null || value.compareTo(upper) > 0) upper = value
+      if (lower == null || value.compareTo(lower) < 0) lower = value
+      sizeInBytes += STRING.actualSize(row, ordinal)
+    }
   }
 
-  override def isAbove(row: Row, ordinal: Int) = {
-    columnType.getField(row, ordinal) < upperBound
-  }
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+}
 
-  override def contains(row: Row, ordinal: Int) = {
-    val field = columnType.getField(row, ordinal)
-    lowerBound <= field && field <= upperBound
-  }
+private[sql] class DateColumnStats extends ColumnStats {
+  protected var upper: Date = null
+  protected var lower: Date = null
 
   override def gatherStats(row: Row, ordinal: Int) {
-    val field = columnType.getField(row, ordinal)
-    if (field > upperBound) _upper = field
-    if (field < lowerBound) _lower = field
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      val value = row(ordinal).asInstanceOf[Date]
+      if (upper == null || value.compareTo(upper) > 0) upper = value
+      if (lower == null || value.compareTo(lower) < 0) lower = value
+      sizeInBytes += DATE.defaultSize
+    }
   }
-}
 
-private[sql] object IntColumnStats {
-  val UNINITIALIZED = 0
-  val INITIALIZED = 1
-  val ASCENDING = 2
-  val DESCENDING = 3
-  val UNORDERED = 4
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
-/**
- * Statistical information for `Int` columns. More information is collected since `Int` is
- * frequently used. Extra information include:
- *
- * - Ordering state (ascending/descending/unordered), may be used to decide whether binary search
- *   is applicable when searching elements.
- * - Maximum delta between adjacent elements, may be used to guide the `IntDelta` compression
- *   scheme.
- *
- * (This two kinds of information are not used anywhere yet and might be removed later.)
- */
-private[sql] class IntColumnStats extends BasicColumnStats(INT) {
-  import IntColumnStats._
-
-  private var orderedState = UNINITIALIZED
-  private var lastValue: Int = _
-  private var _maxDelta: Int = _
-
-  def isAscending = orderedState != DESCENDING && orderedState != UNORDERED
-  def isDescending = orderedState != ASCENDING && orderedState != UNORDERED
-  def isOrdered = isAscending || isDescending
-  def maxDelta = _maxDelta
-
-  override def initialBounds = (Int.MaxValue, Int.MinValue)
-
-  override def isBelow(row: Row, ordinal: Int) = {
-    lowerBound < columnType.getField(row, ordinal)
-  }
-
-  override def isAbove(row: Row, ordinal: Int) = {
-    columnType.getField(row, ordinal) < upperBound
-  }
-
-  override def contains(row: Row, ordinal: Int) = {
-    val field = columnType.getField(row, ordinal)
-    lowerBound <= field && field <= upperBound
-  }
-
-  override def gatherStats(row: Row, ordinal: Int) {
-    val field = columnType.getField(row, ordinal)
-
-    if (field > upperBound) _upper = field
-    if (field < lowerBound) _lower = field
-
-    orderedState = orderedState match {
-      case UNINITIALIZED =>
-        lastValue = field
-        INITIALIZED
-
-      case INITIALIZED =>
-        // If all the integers in the column are the same, ordered state is set to Ascending.
-        // TODO (lian) Confirm whether this is the standard behaviour.
-        val nextState = if (field >= lastValue) ASCENDING else DESCENDING
-        _maxDelta = math.abs(field - lastValue)
-        lastValue = field
-        nextState
-
-      case ASCENDING if field < lastValue =>
-        UNORDERED
-
-      case DESCENDING if field > lastValue =>
-        UNORDERED
-
-      case state @ (ASCENDING | DESCENDING) =>
-        _maxDelta = _maxDelta.max(field - lastValue)
-        lastValue = field
-        state
-
-      case _ =>
-        orderedState
+private[sql] class TimestampColumnStats extends ColumnStats {
+  protected var upper: Timestamp = null
+  protected var lower: Timestamp = null
+
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      val value = row(ordinal).asInstanceOf[Timestamp]
+      if (upper == null || value.compareTo(upper) > 0) upper = value
+      if (lower == null || value.compareTo(lower) < 0) lower = value
+      sizeInBytes += TIMESTAMP.defaultSize
     }
   }
-}
-
-private[sql] class StringColumnStats extends BasicColumnStats(STRING) {
-  override def initialBounds = (null, null)
 
-  override def gatherStats(row: Row, ordinal: Int) {
-    val field = columnType.getField(row, ordinal)
-    if ((upperBound eq null) || field.compareTo(upperBound) > 0) _upper = field
-    if ((lowerBound eq null) || field.compareTo(lowerBound) < 0) _lower = field
-  }
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+}
 
-  override def contains(row: Row, ordinal: Int) = {
-    !(upperBound eq null) && {
-      val field = columnType.getField(row, ordinal)
-      lowerBound.compareTo(field) <= 0 && field.compareTo(upperBound) <= 0
+private[sql] class BinaryColumnStats extends ColumnStats {
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      sizeInBytes += BINARY.actualSize(row, ordinal)
     }
   }
 
-  override def isAbove(row: Row, ordinal: Int) = {
-    !(upperBound eq null) && {
-      val field = columnType.getField(row, ordinal)
-      field.compareTo(upperBound) < 0
-    }
-  }
+  def collectedStatistics = Row(null, null, nullCount, count, sizeInBytes)
+}
 
-  override def isBelow(row: Row, ordinal: Int) = {
-    !(lowerBound eq null) && {
-      val field = columnType.getField(row, ordinal)
-      lowerBound.compareTo(field) < 0
+private[sql] class GenericColumnStats extends ColumnStats {
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      sizeInBytes += GENERIC.actualSize(row, ordinal)
     }
   }
+
+  def collectedStatistics = Row(null, null, nullCount, count, sizeInBytes)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
index 4cd52d8288137..ab66c85c4f242 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.columnar
 
 import java.nio.ByteBuffer
+import java.sql.{Date, Timestamp}
 
 import scala.reflect.runtime.universe.TypeTag
 
@@ -44,16 +45,33 @@ private[sql] sealed abstract class ColumnType[T <: DataType, JvmType](
    */
   def extract(buffer: ByteBuffer): JvmType
 
+  /**
+   * Extracts a value out of the buffer at the buffer's current position and stores in
+   * `row(ordinal)`. Subclasses should override this method to avoid boxing/unboxing costs whenever
+   * possible.
+   */
+  def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = {
+    setField(row, ordinal, extract(buffer))
+  }
+
   /**
    * Appends the given value v of type T into the given ByteBuffer.
    */
-  def append(v: JvmType, buffer: ByteBuffer)
+  def append(v: JvmType, buffer: ByteBuffer): Unit
+
+  /**
+   * Appends `row(ordinal)` of type T into the given ByteBuffer. Subclasses should override this
+   * method to avoid boxing/unboxing costs whenever possible.
+   */
+  def append(row: Row, ordinal: Int, buffer: ByteBuffer): Unit = {
+    append(getField(row, ordinal), buffer)
+  }
 
   /**
-   * Returns the size of the value. This is used to calculate the size of variable length types
-   * such as byte arrays and strings.
+   * Returns the size of the value `row(ordinal)`. This is used to calculate the size of variable
+   * length types such as byte arrays and strings.
    */
-  def actualSize(v: JvmType): Int = defaultSize
+  def actualSize(row: Row, ordinal: Int): Int = defaultSize
 
   /**
    * Returns `row(ordinal)`. Subclasses should override this method to avoid boxing/unboxing costs
@@ -65,7 +83,15 @@ private[sql] sealed abstract class ColumnType[T <: DataType, JvmType](
    * Sets `row(ordinal)` to `field`. Subclasses should override this method to avoid boxing/unboxing
    * costs whenever possible.
    */
-  def setField(row: MutableRow, ordinal: Int, value: JvmType)
+  def setField(row: MutableRow, ordinal: Int, value: JvmType): Unit
+
+  /**
+   * Copies `from(fromOrdinal)` to `to(toOrdinal)`. Subclasses should override this method to avoid
+   * boxing/unboxing costs whenever possible.
+   */
+  def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
+    to(toOrdinal) = from(fromOrdinal)
+  }
 
   /**
    * Creates a duplicated copy of the value.
@@ -88,121 +114,205 @@ private[sql] abstract class NativeColumnType[T <: NativeType](
 }
 
 private[sql] object INT extends NativeColumnType(IntegerType, 0, 4) {
-  def append(v: Int, buffer: ByteBuffer) {
+  def append(v: Int, buffer: ByteBuffer): Unit = {
     buffer.putInt(v)
   }
 
+  override def append(row: Row, ordinal: Int, buffer: ByteBuffer): Unit = {
+    buffer.putInt(row.getInt(ordinal))
+  }
+
   def extract(buffer: ByteBuffer) = {
     buffer.getInt()
   }
 
-  override def setField(row: MutableRow, ordinal: Int, value: Int) {
+  override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = {
+    row.setInt(ordinal, buffer.getInt())
+  }
+
+  override def setField(row: MutableRow, ordinal: Int, value: Int): Unit = {
     row.setInt(ordinal, value)
   }
 
   override def getField(row: Row, ordinal: Int) = row.getInt(ordinal)
+
+  override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
+    to.setInt(toOrdinal, from.getInt(fromOrdinal))
+  }
 }
 
 private[sql] object LONG extends NativeColumnType(LongType, 1, 8) {
-  override def append(v: Long, buffer: ByteBuffer) {
+  override def append(v: Long, buffer: ByteBuffer): Unit = {
     buffer.putLong(v)
   }
 
+  override def append(row: Row, ordinal: Int, buffer: ByteBuffer): Unit = {
+    buffer.putLong(row.getLong(ordinal))
+  }
+
   override def extract(buffer: ByteBuffer) = {
     buffer.getLong()
   }
 
-  override def setField(row: MutableRow, ordinal: Int, value: Long) {
+  override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = {
+    row.setLong(ordinal, buffer.getLong())
+  }
+
+  override def setField(row: MutableRow, ordinal: Int, value: Long): Unit = {
     row.setLong(ordinal, value)
   }
 
   override def getField(row: Row, ordinal: Int) = row.getLong(ordinal)
+
+  override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
+    to.setLong(toOrdinal, from.getLong(fromOrdinal))
+  }
 }
 
 private[sql] object FLOAT extends NativeColumnType(FloatType, 2, 4) {
-  override def append(v: Float, buffer: ByteBuffer) {
+  override def append(v: Float, buffer: ByteBuffer): Unit = {
     buffer.putFloat(v)
   }
 
+  override def append(row: Row, ordinal: Int, buffer: ByteBuffer): Unit = {
+    buffer.putFloat(row.getFloat(ordinal))
+  }
+
   override def extract(buffer: ByteBuffer) = {
     buffer.getFloat()
   }
 
-  override def setField(row: MutableRow, ordinal: Int, value: Float) {
+  override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = {
+    row.setFloat(ordinal, buffer.getFloat())
+  }
+
+  override def setField(row: MutableRow, ordinal: Int, value: Float): Unit = {
     row.setFloat(ordinal, value)
   }
 
   override def getField(row: Row, ordinal: Int) = row.getFloat(ordinal)
+
+  override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
+    to.setFloat(toOrdinal, from.getFloat(fromOrdinal))
+  }
 }
 
 private[sql] object DOUBLE extends NativeColumnType(DoubleType, 3, 8) {
-  override def append(v: Double, buffer: ByteBuffer) {
+  override def append(v: Double, buffer: ByteBuffer): Unit = {
     buffer.putDouble(v)
   }
 
+  override def append(row: Row, ordinal: Int, buffer: ByteBuffer): Unit = {
+    buffer.putDouble(row.getDouble(ordinal))
+  }
+
   override def extract(buffer: ByteBuffer) = {
     buffer.getDouble()
   }
 
-  override def setField(row: MutableRow, ordinal: Int, value: Double) {
+  override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = {
+    row.setDouble(ordinal, buffer.getDouble())
+  }
+
+  override def setField(row: MutableRow, ordinal: Int, value: Double): Unit = {
     row.setDouble(ordinal, value)
   }
 
   override def getField(row: Row, ordinal: Int) = row.getDouble(ordinal)
+
+  override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
+    to.setDouble(toOrdinal, from.getDouble(fromOrdinal))
+  }
 }
 
 private[sql] object BOOLEAN extends NativeColumnType(BooleanType, 4, 1) {
-  override def append(v: Boolean, buffer: ByteBuffer) {
-    buffer.put(if (v) 1.toByte else 0.toByte)
+  override def append(v: Boolean, buffer: ByteBuffer): Unit = {
+    buffer.put(if (v) 1: Byte else 0: Byte)
   }
 
-  override def extract(buffer: ByteBuffer) = {
-    if (buffer.get() == 1) true else false
+  override def append(row: Row, ordinal: Int, buffer: ByteBuffer): Unit = {
+    buffer.put(if (row.getBoolean(ordinal)) 1: Byte else 0: Byte)
+  }
+
+  override def extract(buffer: ByteBuffer) = buffer.get() == 1
+
+  override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = {
+    row.setBoolean(ordinal, buffer.get() == 1)
   }
 
-  override def setField(row: MutableRow, ordinal: Int, value: Boolean) {
+  override def setField(row: MutableRow, ordinal: Int, value: Boolean): Unit = {
     row.setBoolean(ordinal, value)
   }
 
   override def getField(row: Row, ordinal: Int) = row.getBoolean(ordinal)
+
+  override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
+    to.setBoolean(toOrdinal, from.getBoolean(fromOrdinal))
+  }
 }
 
 private[sql] object BYTE extends NativeColumnType(ByteType, 5, 1) {
-  override def append(v: Byte, buffer: ByteBuffer) {
+  override def append(v: Byte, buffer: ByteBuffer): Unit = {
     buffer.put(v)
   }
 
+  override def append(row: Row, ordinal: Int, buffer: ByteBuffer): Unit = {
+    buffer.put(row.getByte(ordinal))
+  }
+
   override def extract(buffer: ByteBuffer) = {
     buffer.get()
   }
 
-  override def setField(row: MutableRow, ordinal: Int, value: Byte) {
+  override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = {
+    row.setByte(ordinal, buffer.get())
+  }
+
+  override def setField(row: MutableRow, ordinal: Int, value: Byte): Unit = {
     row.setByte(ordinal, value)
   }
 
   override def getField(row: Row, ordinal: Int) = row.getByte(ordinal)
+
+  override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
+    to.setByte(toOrdinal, from.getByte(fromOrdinal))
+  }
 }
 
 private[sql] object SHORT extends NativeColumnType(ShortType, 6, 2) {
-  override def append(v: Short, buffer: ByteBuffer) {
+  override def append(v: Short, buffer: ByteBuffer): Unit = {
     buffer.putShort(v)
   }
 
+  override def append(row: Row, ordinal: Int, buffer: ByteBuffer): Unit = {
+    buffer.putShort(row.getShort(ordinal))
+  }
+
   override def extract(buffer: ByteBuffer) = {
     buffer.getShort()
   }
 
-  override def setField(row: MutableRow, ordinal: Int, value: Short) {
+  override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = {
+    row.setShort(ordinal, buffer.getShort())
+  }
+
+  override def setField(row: MutableRow, ordinal: Int, value: Short): Unit = {
     row.setShort(ordinal, value)
   }
 
   override def getField(row: Row, ordinal: Int) = row.getShort(ordinal)
+
+  override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
+    to.setShort(toOrdinal, from.getShort(fromOrdinal))
+  }
 }
 
 private[sql] object STRING extends NativeColumnType(StringType, 7, 8) {
-  override def actualSize(v: String): Int = v.getBytes("utf-8").length + 4
+  override def actualSize(row: Row, ordinal: Int): Int = {
+    row.getString(ordinal).getBytes("utf-8").length + 4
+  }
 
-  override def append(v: String, buffer: ByteBuffer) {
+  override def append(v: String, buffer: ByteBuffer): Unit = {
     val stringBytes = v.getBytes("utf-8")
     buffer.putInt(stringBytes.length).put(stringBytes, 0, stringBytes.length)
   }
@@ -214,11 +324,54 @@ private[sql] object STRING extends NativeColumnType(StringType, 7, 8) {
     new String(stringBytes, "utf-8")
   }
 
-  override def setField(row: MutableRow, ordinal: Int, value: String) {
+  override def setField(row: MutableRow, ordinal: Int, value: String): Unit = {
     row.setString(ordinal, value)
   }
 
   override def getField(row: Row, ordinal: Int) = row.getString(ordinal)
+
+  override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
+    to.setString(toOrdinal, from.getString(fromOrdinal))
+  }
+}
+
+private[sql] object DATE extends NativeColumnType(DateType, 8, 8) {
+  override def extract(buffer: ByteBuffer) = {
+    val date = new Date(buffer.getLong())
+    date
+  }
+
+  override def append(v: Date, buffer: ByteBuffer): Unit = {
+    buffer.putLong(v.getTime)
+  }
+
+  override def getField(row: Row, ordinal: Int) = {
+    row(ordinal).asInstanceOf[Date]
+  }
+
+  override def setField(row: MutableRow, ordinal: Int, value: Date): Unit = {
+    row(ordinal) = value
+  }
+}
+
+private[sql] object TIMESTAMP extends NativeColumnType(TimestampType, 9, 12) {
+  override def extract(buffer: ByteBuffer) = {
+    val timestamp = new Timestamp(buffer.getLong())
+    timestamp.setNanos(buffer.getInt())
+    timestamp
+  }
+
+  override def append(v: Timestamp, buffer: ByteBuffer): Unit = {
+    buffer.putLong(v.getTime).putInt(v.getNanos)
+  }
+
+  override def getField(row: Row, ordinal: Int) = {
+    row(ordinal).asInstanceOf[Timestamp]
+  }
+
+  override def setField(row: MutableRow, ordinal: Int, value: Timestamp): Unit = {
+    row(ordinal) = value
+  }
 }
 
 private[sql] sealed abstract class ByteArrayColumnType[T <: DataType](
@@ -226,9 +379,11 @@ private[sql] sealed abstract class ByteArrayColumnType[T <: DataType](
     defaultSize: Int)
   extends ColumnType[T, Array[Byte]](typeId, defaultSize) {
 
-  override def actualSize(v: Array[Byte]) = v.length + 4
+  override def actualSize(row: Row, ordinal: Int) = {
+    getField(row, ordinal).length + 4
+  }
 
-  override def append(v: Array[Byte], buffer: ByteBuffer) {
+  override def append(v: Array[Byte], buffer: ByteBuffer): Unit = {
     buffer.putInt(v.length).put(v, 0, v.length)
   }
 
@@ -240,8 +395,8 @@ private[sql] sealed abstract class ByteArrayColumnType[T <: DataType](
   }
 }
 
-private[sql] object BINARY extends ByteArrayColumnType[BinaryType.type](8, 16) {
-  override def setField(row: MutableRow, ordinal: Int, value: Array[Byte]) {
+private[sql] object BINARY extends ByteArrayColumnType[BinaryType.type](10, 16) {
+  override def setField(row: MutableRow, ordinal: Int, value: Array[Byte]): Unit = {
     row(ordinal) = value
   }
 
@@ -251,8 +406,8 @@ private[sql] object BINARY extends ByteArrayColumnType[BinaryType.type](8, 16) {
 // Used to process generic objects (all types other than those listed above). Objects should be
 // serialized first before appending to the column `ByteBuffer`, and is also extracted as serialized
 // byte array.
-private[sql] object GENERIC extends ByteArrayColumnType[DataType](9, 16) {
-  override def setField(row: MutableRow, ordinal: Int, value: Array[Byte]) {
+private[sql] object GENERIC extends ByteArrayColumnType[DataType](11, 16) {
+  override def setField(row: MutableRow, ordinal: Int, value: Array[Byte]): Unit = {
     row(ordinal) = SparkSqlSerializer.deserialize[Any](value)
   }
 
@@ -262,16 +417,18 @@ private[sql] object GENERIC extends ByteArrayColumnType[DataType](9, 16) {
 private[sql] object ColumnType {
   def apply(dataType: DataType): ColumnType[_, _] = {
     dataType match {
-      case IntegerType => INT
-      case LongType    => LONG
-      case FloatType   => FLOAT
-      case DoubleType  => DOUBLE
-      case BooleanType => BOOLEAN
-      case ByteType    => BYTE
-      case ShortType   => SHORT
-      case StringType  => STRING
-      case BinaryType  => BINARY
-      case _           => GENERIC
+      case IntegerType   => INT
+      case LongType      => LONG
+      case FloatType     => FLOAT
+      case DoubleType    => DOUBLE
+      case BooleanType   => BOOLEAN
+      case ByteType      => BYTE
+      case ShortType     => SHORT
+      case StringType    => STRING
+      case BinaryType    => BINARY
+      case DateType      => DATE
+      case TimestampType => TIMESTAMP
+      case _             => GENERIC
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
index e1e4f24c6c66c..455b415d9d959 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
@@ -17,88 +17,300 @@
 
 package org.apache.spark.sql.columnar
 
-import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
-import org.apache.spark.sql.catalyst.expressions.{GenericMutableRow, Attribute}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution.{SparkPlan, LeafNode}
+import java.nio.ByteBuffer
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Row
-import org.apache.spark.SparkConf
+import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics}
+import org.apache.spark.sql.execution.{LeafNode, SparkPlan}
+import org.apache.spark.storage.StorageLevel
 
-object InMemoryRelation {
-  def apply(useCompression: Boolean, child: SparkPlan): InMemoryRelation =
-    new InMemoryRelation(child.output, useCompression, child)
+private[sql] object InMemoryRelation {
+  def apply(
+      useCompression: Boolean,
+      batchSize: Int,
+      storageLevel: StorageLevel,
+      child: SparkPlan): InMemoryRelation =
+    new InMemoryRelation(child.output, useCompression, batchSize, storageLevel, child)()
 }
 
+private[sql] case class CachedBatch(buffers: Array[Array[Byte]], stats: Row)
+
 private[sql] case class InMemoryRelation(
     output: Seq[Attribute],
     useCompression: Boolean,
-    child: SparkPlan)
+    batchSize: Int,
+    storageLevel: StorageLevel,
+    child: SparkPlan)(
+    private var _cachedColumnBuffers: RDD[CachedBatch] = null,
+    private var _statistics: Statistics = null)
   extends LogicalPlan with MultiInstanceRelation {
 
-  override def children = Seq.empty
-  override def references = Set.empty
+  private val batchStats =
+    child.sqlContext.sparkContext.accumulableCollection(ArrayBuffer.empty[Row])
+
+  val partitionStatistics = new PartitionStatistics(output)
 
-  override def newInstance() =
-    new InMemoryRelation(output.map(_.newInstance), useCompression, child).asInstanceOf[this.type]
+  private def computeSizeInBytes = {
+    val sizeOfRow: Expression =
+      BindReferences.bindReference(
+        output.map(a => partitionStatistics.forAttribute(a).sizeInBytes).reduce(Add),
+        partitionStatistics.schema)
+
+    batchStats.value.map(row => sizeOfRow.eval(row).asInstanceOf[Long]).sum
+  }
 
-  lazy val cachedColumnBuffers = {
+  // Statistics propagation contracts:
+  // 1. Non-null `_statistics` must reflect the actual statistics of the underlying data
+  // 2. Only propagate statistics when `_statistics` is non-null
+  private def statisticsToBePropagated = if (_statistics == null) {
+    val updatedStats = statistics
+    if (_statistics == null) null else updatedStats
+  } else {
+    _statistics
+  }
+
+  override def statistics = if (_statistics == null) {
+    if (batchStats.value.isEmpty) {
+      // Underlying columnar RDD hasn't been materialized, no useful statistics information
+      // available, return the default statistics.
+      Statistics(sizeInBytes = child.sqlContext.defaultSizeInBytes)
+    } else {
+      // Underlying columnar RDD has been materialized, required information has also been collected
+      // via the `batchStats` accumulator, compute the final statistics, and update `_statistics`.
+      _statistics = Statistics(sizeInBytes = computeSizeInBytes)
+      _statistics
+    }
+  } else {
+    // Pre-computed statistics
+    _statistics
+  }
+
+  // If the cached column buffers were not passed in, we calculate them in the constructor.
+  // As in Spark, the actual work of caching is lazy.
+  if (_cachedColumnBuffers == null) {
+    buildBuffers()
+  }
+
+  def recache() = {
+    _cachedColumnBuffers.unpersist()
+    _cachedColumnBuffers = null
+    buildBuffers()
+  }
+
+  private def buildBuffers(): Unit = {
     val output = child.output
-    val cached = child.execute().mapPartitions { iterator =>
-      val columnBuilders = output.map { attribute =>
-        ColumnBuilder(ColumnType(attribute.dataType).typeId, 0, attribute.name, useCompression)
-      }.toArray
-
-      var row: Row = null
-      while (iterator.hasNext) {
-        row = iterator.next()
-        var i = 0
-        while (i < row.length) {
-          columnBuilders(i).appendFrom(row, i)
-          i += 1
+    val cached = child.execute().mapPartitions { rowIterator =>
+      new Iterator[CachedBatch] {
+        def next() = {
+          val columnBuilders = output.map { attribute =>
+            val columnType = ColumnType(attribute.dataType)
+            val initialBufferSize = columnType.defaultSize * batchSize
+            ColumnBuilder(columnType.typeId, initialBufferSize, attribute.name, useCompression)
+          }.toArray
+
+          var rowCount = 0
+          while (rowIterator.hasNext && rowCount < batchSize) {
+            val row = rowIterator.next()
+            var i = 0
+            while (i < row.length) {
+              columnBuilders(i).appendFrom(row, i)
+              i += 1
+            }
+            rowCount += 1
+          }
+
+          val stats = Row.fromSeq(
+            columnBuilders.map(_.columnStats.collectedStatistics).foldLeft(Seq.empty[Any])(_ ++ _))
+
+          batchStats += stats
+          CachedBatch(columnBuilders.map(_.build().array()), stats)
         }
-      }
 
-      Iterator.single(columnBuilders.map(_.build()))
-    }.cache()
+        def hasNext = rowIterator.hasNext
+      }
+    }.persist(storageLevel)
 
     cached.setName(child.toString)
-    // Force the materialization of the cached RDD.
-    cached.count()
-    cached
+    _cachedColumnBuffers = cached
+  }
+
+  def withOutput(newOutput: Seq[Attribute]): InMemoryRelation = {
+    InMemoryRelation(
+      newOutput, useCompression, batchSize, storageLevel, child)(
+      _cachedColumnBuffers, statisticsToBePropagated)
+  }
+
+  override def children = Seq.empty
+
+  override def newInstance() = {
+    new InMemoryRelation(
+      output.map(_.newInstance()),
+      useCompression,
+      batchSize,
+      storageLevel,
+      child)(
+      _cachedColumnBuffers,
+      statisticsToBePropagated).asInstanceOf[this.type]
   }
+
+  def cachedColumnBuffers = _cachedColumnBuffers
+
+  override protected def otherCopyArgs: Seq[AnyRef] =
+    Seq(_cachedColumnBuffers, statisticsToBePropagated)
 }
 
 private[sql] case class InMemoryColumnarTableScan(
     attributes: Seq[Attribute],
+    predicates: Seq[Expression],
     relation: InMemoryRelation)
   extends LeafNode {
 
+  @transient override val sqlContext = relation.child.sqlContext
+
   override def output: Seq[Attribute] = attributes
 
+  private def statsFor(a: Attribute) = relation.partitionStatistics.forAttribute(a)
+
+  // Returned filter predicate should return false iff it is impossible for the input expression
+  // to evaluate to `true' based on statistics collected about this partition batch.
+  val buildFilter: PartialFunction[Expression, Expression] = {
+    case And(lhs: Expression, rhs: Expression)
+      if buildFilter.isDefinedAt(lhs) && buildFilter.isDefinedAt(rhs) =>
+      buildFilter(lhs) && buildFilter(rhs)
+
+    case Or(lhs: Expression, rhs: Expression)
+      if buildFilter.isDefinedAt(lhs) && buildFilter.isDefinedAt(rhs) =>
+      buildFilter(lhs) || buildFilter(rhs)
+
+    case EqualTo(a: AttributeReference, l: Literal) =>
+      statsFor(a).lowerBound <= l && l <= statsFor(a).upperBound
+    case EqualTo(l: Literal, a: AttributeReference) =>
+      statsFor(a).lowerBound <= l && l <= statsFor(a).upperBound
+
+    case LessThan(a: AttributeReference, l: Literal) => statsFor(a).lowerBound < l
+    case LessThan(l: Literal, a: AttributeReference) => l < statsFor(a).upperBound
+
+    case LessThanOrEqual(a: AttributeReference, l: Literal) => statsFor(a).lowerBound <= l
+    case LessThanOrEqual(l: Literal, a: AttributeReference) => l <= statsFor(a).upperBound
+
+    case GreaterThan(a: AttributeReference, l: Literal) => l < statsFor(a).upperBound
+    case GreaterThan(l: Literal, a: AttributeReference) => statsFor(a).lowerBound < l
+
+    case GreaterThanOrEqual(a: AttributeReference, l: Literal) => l <= statsFor(a).upperBound
+    case GreaterThanOrEqual(l: Literal, a: AttributeReference) => statsFor(a).lowerBound <= l
+
+    case IsNull(a: Attribute)    => statsFor(a).nullCount > 0
+    case IsNotNull(a: Attribute) => statsFor(a).count - statsFor(a).nullCount > 0
+  }
+
+  val partitionFilters = {
+    predicates.flatMap { p =>
+      val filter = buildFilter.lift(p)
+      val boundFilter =
+        filter.map(
+          BindReferences.bindReference(
+            _,
+            relation.partitionStatistics.schema,
+            allowFailures = true))
+
+      boundFilter.foreach(_ =>
+        filter.foreach(f => logInfo(s"Predicate $p generates partition filter: $f")))
+
+      // If the filter can't be resolved then we are missing required statistics.
+      boundFilter.filter(_.resolved)
+    }
+  }
+
+  // Accumulators used for testing purposes
+  val readPartitions = sparkContext.accumulator(0)
+  val readBatches = sparkContext.accumulator(0)
+
+  private val inMemoryPartitionPruningEnabled = sqlContext.inMemoryPartitionPruning
+
   override def execute() = {
-    relation.cachedColumnBuffers.mapPartitions { iterator =>
-      val columnBuffers = iterator.next()
-      assert(!iterator.hasNext)
-
-      new Iterator[Row] {
-        // Find the ordinals of the requested columns.  If none are requested, use the first.
-        val requestedColumns =
-          if (attributes.isEmpty) Seq(0) else attributes.map(relation.output.indexOf(_))
-
-        val columnAccessors = requestedColumns.map(columnBuffers(_)).map(ColumnAccessor(_))
-        val nextRow = new GenericMutableRow(columnAccessors.length)
-
-        override def next() = {
-          var i = 0
-          while (i < nextRow.length) {
-            columnAccessors(i).extractTo(nextRow, i)
-            i += 1
+    readPartitions.setValue(0)
+    readBatches.setValue(0)
+
+    relation.cachedColumnBuffers.mapPartitions { cachedBatchIterator =>
+      val partitionFilter = newPredicate(
+        partitionFilters.reduceOption(And).getOrElse(Literal(true)),
+        relation.partitionStatistics.schema)
+
+      // Find the ordinals and data types of the requested columns.  If none are requested, use the
+      // narrowest (the field with minimum default element size).
+      val (requestedColumnIndices, requestedColumnDataTypes) = if (attributes.isEmpty) {
+        val (narrowestOrdinal, narrowestDataType) =
+          relation.output.zipWithIndex.map { case (a, ordinal) =>
+            ordinal -> a.dataType
+          } minBy { case (_, dataType) =>
+            ColumnType(dataType).defaultSize
+          }
+        Seq(narrowestOrdinal) -> Seq(narrowestDataType)
+      } else {
+        attributes.map { a =>
+          relation.output.indexWhere(_.exprId == a.exprId) -> a.dataType
+        }.unzip
+      }
+
+      val nextRow = new SpecificMutableRow(requestedColumnDataTypes)
+
+      def cachedBatchesToRows(cacheBatches: Iterator[CachedBatch]) = {
+        val rows = cacheBatches.flatMap { cachedBatch =>
+          // Build column accessors
+          val columnAccessors = requestedColumnIndices.map { batch =>
+            ColumnAccessor(ByteBuffer.wrap(cachedBatch.buffers(batch)))
+          }
+
+          // Extract rows via column accessors
+          new Iterator[Row] {
+            override def next() = {
+              var i = 0
+              while (i < nextRow.length) {
+                columnAccessors(i).extractTo(nextRow, i)
+                i += 1
+              }
+              nextRow
+            }
+
+            override def hasNext = columnAccessors(0).hasNext
           }
-          nextRow
         }
 
-        override def hasNext = columnAccessors.head.hasNext
+        if (rows.hasNext) {
+          readPartitions += 1
+        }
+
+        rows
       }
+
+      // Do partition batch pruning if enabled
+      val cachedBatchesToScan =
+        if (inMemoryPartitionPruningEnabled) {
+          cachedBatchIterator.filter { cachedBatch =>
+            if (!partitionFilter(cachedBatch.stats)) {
+              def statsString = relation.partitionStatistics.schema
+                .zip(cachedBatch.stats)
+                .map { case (a, s) => s"${a.name}: $s" }
+                .mkString(", ")
+              logInfo(s"Skipping partition based on stats $statsString")
+              false
+            } else {
+              readBatches += 1
+              true
+            }
+          }
+        } else {
+          cachedBatchIterator
+        }
+
+      cachedBatchesToRows(cachedBatchesToScan)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnAccessor.scala
index b7f8826861a2c..965782a40031b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnAccessor.scala
@@ -29,7 +29,7 @@ private[sql] trait NullableColumnAccessor extends ColumnAccessor {
   private var nextNullIndex: Int = _
   private var pos: Int = 0
 
-  abstract override protected def initialize() {
+  abstract override protected def initialize(): Unit = {
     nullsBuffer = underlyingBuffer.duplicate().order(ByteOrder.nativeOrder())
     nullCount = nullsBuffer.getInt()
     nextNullIndex = if (nullCount > 0) nullsBuffer.getInt() else -1
@@ -39,7 +39,7 @@ private[sql] trait NullableColumnAccessor extends ColumnAccessor {
     super.initialize()
   }
 
-  abstract override def extractTo(row: MutableRow, ordinal: Int) {
+  abstract override def extractTo(row: MutableRow, ordinal: Int): Unit = {
     if (pos == nextNullIndex) {
       seenNulls += 1
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnBuilder.scala
index d008806eedbe1..f1f494ac26d0c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnBuilder.scala
@@ -36,11 +36,15 @@ import org.apache.spark.sql.Row
  * }}}
  */
 private[sql] trait NullableColumnBuilder extends ColumnBuilder {
-  private var nulls: ByteBuffer = _
+  protected var nulls: ByteBuffer = _
+  protected var nullCount: Int = _
   private var pos: Int = _
-  private var nullCount: Int = _
 
-  abstract override def initialize(initialSize: Int, columnName: String, useCompression: Boolean) {
+  abstract override def initialize(
+      initialSize: Int,
+      columnName: String,
+      useCompression: Boolean): Unit = {
+
     nulls = ByteBuffer.allocate(1024)
     nulls.order(ByteOrder.nativeOrder())
     pos = 0
@@ -48,7 +52,8 @@ private[sql] trait NullableColumnBuilder extends ColumnBuilder {
     super.initialize(initialSize, columnName, useCompression)
   }
 
-  abstract override def appendFrom(row: Row, ordinal: Int) {
+  abstract override def appendFrom(row: Row, ordinal: Int): Unit = {
+    columnStats.gatherStats(row, ordinal)
     if (row.isNullAt(ordinal)) {
       nulls = ColumnBuilder.ensureFreeSpace(nulls, 4)
       nulls.putInt(pos)
@@ -78,4 +83,9 @@ private[sql] trait NullableColumnBuilder extends ColumnBuilder {
     buffer.rewind()
     buffer
   }
+
+  protected def buildNonNulls(): ByteBuffer = {
+    nulls.limit(nulls.position()).rewind()
+    super.build()
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala
index b4120a3d4368b..27ac5f4dbdbbc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.columnar.compression
 
-import java.nio.ByteBuffer
-
+import org.apache.spark.sql.catalyst.expressions.MutableRow
 import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar.{ColumnAccessor, NativeColumnAccessor}
 
@@ -34,5 +33,7 @@ private[sql] trait CompressibleColumnAccessor[T <: NativeType] extends ColumnAcc
 
   abstract override def hasNext = super.hasNext || decoder.hasNext
 
-  override def extractSingle(buffer: ByteBuffer): T#JvmType = decoder.next()
+  override def extractSingle(row: MutableRow, ordinal: Int): Unit = {
+    decoder.next(row, ordinal)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala
index 4c6675c3c87bf..628d9cec41d6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.columnar.compression
 
 import java.nio.{ByteBuffer, ByteOrder}
 
-import org.apache.spark.sql.{Logging, Row}
+import org.apache.spark.Logging
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar.{ColumnBuilder, NativeColumnBuilder}
 
@@ -45,16 +46,18 @@ private[sql] trait CompressibleColumnBuilder[T <: NativeType]
 
   this: NativeColumnBuilder[T] with WithCompressionSchemes =>
 
-  import CompressionScheme._
-
   var compressionEncoders: Seq[Encoder[T]] = _
 
-  abstract override def initialize(initialSize: Int, columnName: String, useCompression: Boolean) {
+  abstract override def initialize(
+      initialSize: Int,
+      columnName: String,
+      useCompression: Boolean): Unit = {
+
     compressionEncoders =
       if (useCompression) {
-        schemes.filter(_.supports(columnType)).map(_.encoder[T])
+        schemes.filter(_.supports(columnType)).map(_.encoder[T](columnType))
       } else {
-        Seq(PassThrough.encoder)
+        Seq(PassThrough.encoder(columnType))
       }
     super.initialize(initialSize, columnName, useCompression)
   }
@@ -63,45 +66,47 @@ private[sql] trait CompressibleColumnBuilder[T <: NativeType]
     encoder.compressionRatio < 0.8
   }
 
-  private def gatherCompressibilityStats(row: Row, ordinal: Int) {
-    val field = columnType.getField(row, ordinal)
-
+  private def gatherCompressibilityStats(row: Row, ordinal: Int): Unit = {
     var i = 0
     while (i < compressionEncoders.length) {
-      compressionEncoders(i).gatherCompressibilityStats(field, columnType)
+      compressionEncoders(i).gatherCompressibilityStats(row, ordinal)
       i += 1
     }
   }
 
-  abstract override def appendFrom(row: Row, ordinal: Int) {
+  abstract override def appendFrom(row: Row, ordinal: Int): Unit = {
     super.appendFrom(row, ordinal)
     if (!row.isNullAt(ordinal)) {
       gatherCompressibilityStats(row, ordinal)
     }
   }
 
-  abstract override def build() = {
-    val rawBuffer = super.build()
+  override def build() = {
+    val nonNullBuffer = buildNonNulls()
+    val typeId = nonNullBuffer.getInt()
     val encoder: Encoder[T] = {
       val candidate = compressionEncoders.minBy(_.compressionRatio)
-      if (isWorthCompressing(candidate)) candidate else PassThrough.encoder
+      if (isWorthCompressing(candidate)) candidate else PassThrough.encoder(columnType)
     }
 
-    val headerSize = columnHeaderSize(rawBuffer)
+    // Header = column type ID + null count + null positions
+    val headerSize = 4 + 4 + nulls.limit()
     val compressedSize = if (encoder.compressedSize == 0) {
-      rawBuffer.limit - headerSize
+      nonNullBuffer.remaining()
     } else {
       encoder.compressedSize
     }
 
-    // Reserves 4 bytes for compression scheme ID
     val compressedBuffer = ByteBuffer
+      // Reserves 4 bytes for compression scheme ID
       .allocate(headerSize + 4 + compressedSize)
       .order(ByteOrder.nativeOrder)
+      // Write the header
+      .putInt(typeId)
+      .putInt(nullCount)
+      .put(nulls)
 
-    copyColumnHeader(rawBuffer, compressedBuffer)
-
-    logger.info(s"Compressor for [$columnName]: $encoder, ratio: ${encoder.compressionRatio}")
-    encoder.compress(rawBuffer, compressedBuffer, columnType)
+    logDebug(s"Compressor for [$columnName]: $encoder, ratio: ${encoder.compressionRatio}")
+    encoder.compress(nonNullBuffer, compressedBuffer)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressionScheme.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressionScheme.scala
index ba1810dd2ae66..acb06cb5376b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressionScheme.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressionScheme.scala
@@ -17,13 +17,15 @@
 
 package org.apache.spark.sql.columnar.compression
 
-import java.nio.{ByteOrder, ByteBuffer}
+import java.nio.{ByteBuffer, ByteOrder}
 
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.MutableRow
 import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar.{ColumnType, NativeColumnType}
 
 private[sql] trait Encoder[T <: NativeType] {
-  def gatherCompressibilityStats(value: T#JvmType, columnType: NativeColumnType[T]) {}
+  def gatherCompressibilityStats(row: Row, ordinal: Int): Unit = {}
 
   def compressedSize: Int
 
@@ -33,17 +35,21 @@ private[sql] trait Encoder[T <: NativeType] {
     if (uncompressedSize > 0) compressedSize.toDouble / uncompressedSize else 1.0
   }
 
-  def compress(from: ByteBuffer, to: ByteBuffer, columnType: NativeColumnType[T]): ByteBuffer
+  def compress(from: ByteBuffer, to: ByteBuffer): ByteBuffer
 }
 
-private[sql] trait Decoder[T <: NativeType] extends Iterator[T#JvmType]
+private[sql] trait Decoder[T <: NativeType] {
+  def next(row: MutableRow, ordinal: Int): Unit
+
+  def hasNext: Boolean
+}
 
 private[sql] trait CompressionScheme {
   def typeId: Int
 
   def supports(columnType: ColumnType[_, _]): Boolean
 
-  def encoder[T <: NativeType]: Encoder[T]
+  def encoder[T <: NativeType](columnType: NativeColumnType[T]): Encoder[T]
 
   def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T]): Decoder[T]
 }
@@ -67,22 +73,6 @@ private[sql] object CompressionScheme {
       s"Unrecognized compression scheme type ID: $typeId"))
   }
 
-  def copyColumnHeader(from: ByteBuffer, to: ByteBuffer) {
-    // Writes column type ID
-    to.putInt(from.getInt())
-
-    // Writes null count
-    val nullCount = from.getInt()
-    to.putInt(nullCount)
-
-    // Writes null positions
-    var i = 0
-    while (i < nullCount) {
-      to.putInt(from.getInt())
-      i += 1
-    }
-  }
-
   def columnHeaderSize(columnBuffer: ByteBuffer): Int = {
     val header = columnBuffer.duplicate().order(ByteOrder.nativeOrder)
     val nullCount = header.getInt(4)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala
index 8cf9ec74ca2de..29edcf17242c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala
@@ -23,7 +23,8 @@ import scala.collection.mutable
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.runtimeMirror
 
-import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.{MutableRow, SpecificMutableRow}
 import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.columnar._
 import org.apache.spark.util.Utils
@@ -33,18 +34,20 @@ private[sql] case object PassThrough extends CompressionScheme {
 
   override def supports(columnType: ColumnType[_, _]) = true
 
-  override def encoder[T <: NativeType] = new this.Encoder[T]
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+    new this.Encoder[T](columnType)
+  }
 
   override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T]) = {
     new this.Decoder(buffer, columnType)
   }
 
-  class Encoder[T <: NativeType] extends compression.Encoder[T] {
+  class Encoder[T <: NativeType](columnType: NativeColumnType[T]) extends compression.Encoder[T] {
     override def uncompressedSize = 0
 
     override def compressedSize = 0
 
-    override def compress(from: ByteBuffer, to: ByteBuffer, columnType: NativeColumnType[T]) = {
+    override def compress(from: ByteBuffer, to: ByteBuffer) = {
       // Writes compression type ID and copies raw contents
       to.putInt(PassThrough.typeId).put(from).rewind()
       to
@@ -54,7 +57,9 @@ private[sql] case object PassThrough extends CompressionScheme {
   class Decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T])
     extends compression.Decoder[T] {
 
-    override def next() = columnType.extract(buffer)
+    override def next(row: MutableRow, ordinal: Int): Unit = {
+      columnType.extract(buffer, row, ordinal)
+    }
 
     override def hasNext = buffer.hasRemaining
   }
@@ -63,7 +68,9 @@ private[sql] case object PassThrough extends CompressionScheme {
 private[sql] case object RunLengthEncoding extends CompressionScheme {
   override val typeId = 1
 
-  override def encoder[T <: NativeType] = new this.Encoder[T]
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+    new this.Encoder[T](columnType)
+  }
 
   override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T]) = {
     new this.Decoder(buffer, columnType)
@@ -74,24 +81,25 @@ private[sql] case object RunLengthEncoding extends CompressionScheme {
     case _ => false
   }
 
-  class Encoder[T <: NativeType] extends compression.Encoder[T] {
+  class Encoder[T <: NativeType](columnType: NativeColumnType[T]) extends compression.Encoder[T] {
     private var _uncompressedSize = 0
     private var _compressedSize = 0
 
     // Using `MutableRow` to store the last value to avoid boxing/unboxing cost.
-    private val lastValue = new GenericMutableRow(1)
+    private val lastValue = new SpecificMutableRow(Seq(columnType.dataType))
     private var lastRun = 0
 
     override def uncompressedSize = _uncompressedSize
 
     override def compressedSize = _compressedSize
 
-    override def gatherCompressibilityStats(value: T#JvmType, columnType: NativeColumnType[T]) {
-      val actualSize = columnType.actualSize(value)
+    override def gatherCompressibilityStats(row: Row, ordinal: Int): Unit = {
+      val value = columnType.getField(row, ordinal)
+      val actualSize = columnType.actualSize(row, ordinal)
       _uncompressedSize += actualSize
 
       if (lastValue.isNullAt(0)) {
-        columnType.setField(lastValue, 0, value)
+        columnType.copyField(row, ordinal, lastValue, 0)
         lastRun = 1
         _compressedSize += actualSize + 4
       } else {
@@ -99,37 +107,40 @@ private[sql] case object RunLengthEncoding extends CompressionScheme {
           lastRun += 1
         } else {
           _compressedSize += actualSize + 4
-          columnType.setField(lastValue, 0, value)
+          columnType.copyField(row, ordinal, lastValue, 0)
           lastRun = 1
         }
       }
     }
 
-    override def compress(from: ByteBuffer, to: ByteBuffer, columnType: NativeColumnType[T]) = {
+    override def compress(from: ByteBuffer, to: ByteBuffer) = {
       to.putInt(RunLengthEncoding.typeId)
 
       if (from.hasRemaining) {
-        var currentValue = columnType.extract(from)
+        val currentValue = new SpecificMutableRow(Seq(columnType.dataType))
         var currentRun = 1
+        val value = new SpecificMutableRow(Seq(columnType.dataType))
+
+        columnType.extract(from, currentValue, 0)
 
         while (from.hasRemaining) {
-          val value = columnType.extract(from)
+          columnType.extract(from, value, 0)
 
-          if (value == currentValue) {
+          if (value.head == currentValue.head) {
             currentRun += 1
           } else {
             // Writes current run
-            columnType.append(currentValue, to)
+            columnType.append(currentValue, 0, to)
             to.putInt(currentRun)
 
             // Resets current run
-            currentValue = value
+            columnType.copyField(value, 0, currentValue, 0)
             currentRun = 1
           }
         }
 
         // Writes the last run
-        columnType.append(currentValue, to)
+        columnType.append(currentValue, 0, to)
         to.putInt(currentRun)
       }
 
@@ -145,7 +156,7 @@ private[sql] case object RunLengthEncoding extends CompressionScheme {
     private var valueCount = 0
     private var currentValue: T#JvmType = _
 
-    override def next() = {
+    override def next(row: MutableRow, ordinal: Int): Unit = {
       if (valueCount == run) {
         currentValue = columnType.extract(buffer)
         run = buffer.getInt()
@@ -154,7 +165,7 @@ private[sql] case object RunLengthEncoding extends CompressionScheme {
         valueCount += 1
       }
 
-      currentValue
+      columnType.setField(row, ordinal, currentValue)
     }
 
     override def hasNext = valueCount < run || buffer.hasRemaining
@@ -171,14 +182,16 @@ private[sql] case object DictionaryEncoding extends CompressionScheme {
     new this.Decoder(buffer, columnType)
   }
 
-  override def encoder[T <: NativeType] = new this.Encoder[T]
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+    new this.Encoder[T](columnType)
+  }
 
   override def supports(columnType: ColumnType[_, _]) = columnType match {
     case INT | LONG | STRING => true
     case _ => false
   }
 
-  class Encoder[T <: NativeType] extends compression.Encoder[T] {
+  class Encoder[T <: NativeType](columnType: NativeColumnType[T]) extends compression.Encoder[T] {
     // Size of the input, uncompressed, in bytes. Note that we only count until the dictionary
     // overflows.
     private var _uncompressedSize = 0
@@ -200,9 +213,11 @@ private[sql] case object DictionaryEncoding extends CompressionScheme {
     // to store dictionary element count.
     private var dictionarySize = 4
 
-    override def gatherCompressibilityStats(value: T#JvmType, columnType: NativeColumnType[T]) {
+    override def gatherCompressibilityStats(row: Row, ordinal: Int): Unit = {
+      val value = columnType.getField(row, ordinal)
+
       if (!overflow) {
-        val actualSize = columnType.actualSize(value)
+        val actualSize = columnType.actualSize(row, ordinal)
         count += 1
         _uncompressedSize += actualSize
 
@@ -221,7 +236,7 @@ private[sql] case object DictionaryEncoding extends CompressionScheme {
       }
     }
 
-    override def compress(from: ByteBuffer, to: ByteBuffer, columnType: NativeColumnType[T]) = {
+    override def compress(from: ByteBuffer, to: ByteBuffer) = {
       if (overflow) {
         throw new IllegalStateException(
           "Dictionary encoding should not be used because of dictionary overflow.")
@@ -264,7 +279,9 @@ private[sql] case object DictionaryEncoding extends CompressionScheme {
       }
     }
 
-    override def next() = dictionary(buffer.getShort())
+    override def next(row: MutableRow, ordinal: Int): Unit = {
+      columnType.setField(row, ordinal, dictionary(buffer.getShort()))
+    }
 
     override def hasNext = buffer.hasRemaining
   }
@@ -279,25 +296,20 @@ private[sql] case object BooleanBitSet extends CompressionScheme {
     new this.Decoder(buffer).asInstanceOf[compression.Decoder[T]]
   }
 
-  override def encoder[T <: NativeType] = (new this.Encoder).asInstanceOf[compression.Encoder[T]]
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+    (new this.Encoder).asInstanceOf[compression.Encoder[T]]
+  }
 
   override def supports(columnType: ColumnType[_, _]) = columnType == BOOLEAN
 
   class Encoder extends compression.Encoder[BooleanType.type] {
     private var _uncompressedSize = 0
 
-    override def gatherCompressibilityStats(
-        value: Boolean,
-        columnType: NativeColumnType[BooleanType.type]) {
-
+    override def gatherCompressibilityStats(row: Row, ordinal: Int): Unit = {
       _uncompressedSize += BOOLEAN.defaultSize
     }
 
-    override def compress(
-        from: ByteBuffer,
-        to: ByteBuffer,
-        columnType: NativeColumnType[BooleanType.type]) = {
-
+    override def compress(from: ByteBuffer, to: ByteBuffer) = {
       to.putInt(BooleanBitSet.typeId)
         // Total element count (1 byte per Boolean value)
         .putInt(from.remaining)
@@ -349,7 +361,7 @@ private[sql] case object BooleanBitSet extends CompressionScheme {
 
     private var visited: Int = 0
 
-    override def next(): Boolean = {
+    override def next(row: MutableRow, ordinal: Int): Unit = {
       val bit = visited % BITS_PER_LONG
 
       visited += 1
@@ -357,123 +369,167 @@ private[sql] case object BooleanBitSet extends CompressionScheme {
         currentWord = buffer.getLong()
       }
 
-      ((currentWord >> bit) & 1) != 0
+      row.setBoolean(ordinal, ((currentWord >> bit) & 1) != 0)
     }
 
     override def hasNext: Boolean = visited < count
   }
 }
 
-private[sql] sealed abstract class IntegralDelta[I <: IntegralType] extends CompressionScheme {
+private[sql] case object IntDelta extends CompressionScheme {
+  override def typeId: Int = 4
+
   override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T]) = {
-    new this.Decoder(buffer, columnType.asInstanceOf[NativeColumnType[I]])
-      .asInstanceOf[compression.Decoder[T]]
+    new Decoder(buffer, INT).asInstanceOf[compression.Decoder[T]]
   }
 
-  override def encoder[T <: NativeType] = (new this.Encoder).asInstanceOf[compression.Encoder[T]]
-
-  /**
-   * Computes `delta = x - y`, returns `(true, delta)` if `delta` can fit into a single byte, or
-   * `(false, 0: Byte)` otherwise.
-   */
-  protected def byteSizedDelta(x: I#JvmType, y: I#JvmType): (Boolean, Byte)
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+    (new Encoder).asInstanceOf[compression.Encoder[T]]
+  }
 
-  /**
-   * Simply computes `x + delta`
-   */
-  protected def addDelta(x: I#JvmType, delta: Byte): I#JvmType
+  override def supports(columnType: ColumnType[_, _]) = columnType == INT
 
-  class Encoder extends compression.Encoder[I] {
-    private var _compressedSize: Int = 0
+  class Encoder extends compression.Encoder[IntegerType.type] {
+    protected var _compressedSize: Int = 0
+    protected var _uncompressedSize: Int = 0
 
-    private var _uncompressedSize: Int = 0
+    override def compressedSize = _compressedSize
+    override def uncompressedSize = _uncompressedSize
 
-    private var prev: I#JvmType = _
+    private var prevValue: Int = _
 
-    private var initial = true
+    override def gatherCompressibilityStats(row: Row, ordinal: Int): Unit = {
+      val value = row.getInt(ordinal)
+      val delta = value - prevValue
 
-    override def gatherCompressibilityStats(value: I#JvmType, columnType: NativeColumnType[I]) {
-      _uncompressedSize += columnType.defaultSize
+      _compressedSize += 1
 
-      if (initial) {
-        initial = false
-        _compressedSize += 1 + columnType.defaultSize
-      } else {
-        val (smallEnough, _) = byteSizedDelta(value, prev)
-        _compressedSize += (if (smallEnough) 1 else 1 + columnType.defaultSize)
+      // If this is the first integer to be compressed, or the delta is out of byte range, then give
+      // up compressing this integer.
+      if (_uncompressedSize == 0 || delta <= Byte.MinValue || delta > Byte.MaxValue) {
+        _compressedSize += INT.defaultSize
       }
 
-      prev = value
+      _uncompressedSize += INT.defaultSize
+      prevValue = value
     }
 
-    override def compress(from: ByteBuffer, to: ByteBuffer, columnType: NativeColumnType[I]) = {
+    override def compress(from: ByteBuffer, to: ByteBuffer): ByteBuffer = {
       to.putInt(typeId)
 
       if (from.hasRemaining) {
-        var prev = columnType.extract(from)
+        var prev = from.getInt()
         to.put(Byte.MinValue)
-        columnType.append(prev, to)
+        to.putInt(prev)
 
         while (from.hasRemaining) {
-          val current = columnType.extract(from)
-          val (smallEnough, delta) = byteSizedDelta(current, prev)
+          val current = from.getInt()
+          val delta = current - prev
           prev = current
 
-          if (smallEnough) {
-            to.put(delta)
+          if (Byte.MinValue < delta && delta <= Byte.MaxValue) {
+            to.put(delta.toByte)
           } else {
             to.put(Byte.MinValue)
-            columnType.append(current, to)
+            to.putInt(current)
           }
         }
       }
 
-      to.rewind()
-      to
+      to.rewind().asInstanceOf[ByteBuffer]
     }
-
-    override def uncompressedSize = _uncompressedSize
-
-    override def compressedSize = _compressedSize
   }
 
-  class Decoder(buffer: ByteBuffer, columnType: NativeColumnType[I])
-    extends compression.Decoder[I] {
+  class Decoder(buffer: ByteBuffer, columnType: NativeColumnType[IntegerType.type])
+    extends compression.Decoder[IntegerType.type] {
+
+    private var prev: Int = _
 
-    private var prev: I#JvmType = _
+    override def hasNext: Boolean = buffer.hasRemaining
 
-    override def next() = {
+    override def next(row: MutableRow, ordinal: Int): Unit = {
       val delta = buffer.get()
-      prev = if (delta > Byte.MinValue) addDelta(prev, delta) else columnType.extract(buffer)
-      prev
+      prev = if (delta > Byte.MinValue) prev + delta else buffer.getInt()
+      row.setInt(ordinal, prev)
     }
-
-    override def hasNext = buffer.hasRemaining
   }
 }
 
-private[sql] case object IntDelta extends IntegralDelta[IntegerType.type] {
-  override val typeId = 4
+private[sql] case object LongDelta extends CompressionScheme {
+  override def typeId: Int = 5
 
-  override def supports(columnType: ColumnType[_, _]) = columnType == INT
+  override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T]) = {
+    new Decoder(buffer, LONG).asInstanceOf[compression.Decoder[T]]
+  }
+
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+    (new Encoder).asInstanceOf[compression.Encoder[T]]
+  }
 
-  override protected def addDelta(x: Int, delta: Byte) = x + delta
+  override def supports(columnType: ColumnType[_, _]) = columnType == LONG
+
+  class Encoder extends compression.Encoder[LongType.type] {
+    protected var _compressedSize: Int = 0
+    protected var _uncompressedSize: Int = 0
+
+    override def compressedSize = _compressedSize
+    override def uncompressedSize = _uncompressedSize
+
+    private var prevValue: Long = _
+
+    override def gatherCompressibilityStats(row: Row, ordinal: Int): Unit = {
+      val value = row.getLong(ordinal)
+      val delta = value - prevValue
+
+      _compressedSize += 1
 
-  override protected def byteSizedDelta(x: Int, y: Int): (Boolean, Byte) = {
-    val delta = x - y
-    if (math.abs(delta) <= Byte.MaxValue) (true, delta.toByte) else (false, 0: Byte)
+      // If this is the first long integer to be compressed, or the delta is out of byte range, then
+      // give up compressing this long integer.
+      if (_uncompressedSize == 0 || delta <= Byte.MinValue || delta > Byte.MaxValue) {
+        _compressedSize += LONG.defaultSize
+      }
+
+      _uncompressedSize += LONG.defaultSize
+      prevValue = value
+    }
+
+    override def compress(from: ByteBuffer, to: ByteBuffer): ByteBuffer = {
+      to.putInt(typeId)
+
+      if (from.hasRemaining) {
+        var prev = from.getLong()
+        to.put(Byte.MinValue)
+        to.putLong(prev)
+
+        while (from.hasRemaining) {
+          val current = from.getLong()
+          val delta = current - prev
+          prev = current
+
+          if (Byte.MinValue < delta && delta <= Byte.MaxValue) {
+            to.put(delta.toByte)
+          } else {
+            to.put(Byte.MinValue)
+            to.putLong(current)
+          }
+        }
+      }
+
+      to.rewind().asInstanceOf[ByteBuffer]
+    }
   }
-}
 
-private[sql] case object LongDelta extends IntegralDelta[LongType.type] {
-  override val typeId = 5
+  class Decoder(buffer: ByteBuffer, columnType: NativeColumnType[LongType.type])
+    extends compression.Decoder[LongType.type] {
 
-  override def supports(columnType: ColumnType[_, _]) = columnType == LONG
+    private var prev: Long = _
 
-  override protected def addDelta(x: Long, delta: Byte) = x + delta
+    override def hasNext: Boolean = buffer.hasRemaining
 
-  override protected def byteSizedDelta(x: Long, y: Long): (Boolean, Byte) = {
-    val delta = x - y
-    if (math.abs(delta) <= Byte.MaxValue) (true, delta.toByte) else (false, 0: Byte)
+    override def next(row: MutableRow, ordinal: Int): Unit = {
+      val delta = buffer.get()
+      prev = if (delta > Byte.MinValue) prev + delta else buffer.getLong()
+      row.setLong(ordinal, prev)
+    }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
index c1ced8bfa404a..be9f155253d77 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
@@ -42,8 +42,8 @@ case class Aggregate(
     partial: Boolean,
     groupingExpressions: Seq[Expression],
     aggregateExpressions: Seq[NamedExpression],
-    child: SparkPlan)(@transient sqlContext: SQLContext)
-  extends UnaryNode with NoBind {
+    child: SparkPlan)
+  extends UnaryNode {
 
   override def requiredChildDistribution =
     if (partial) {
@@ -56,8 +56,6 @@ case class Aggregate(
       }
     }
 
-  override def otherCopyArgs = sqlContext :: Nil
-
   // HACK: Generators don't correctly preserve their output through serializations so we grab
   // out child's output attributes statically here.
   private[this] val childOutput = child.output
@@ -138,7 +136,7 @@ case class Aggregate(
             i += 1
           }
         }
-        val resultProjection = new Projection(resultExpressions, computedSchema)
+        val resultProjection = new InterpretedProjection(resultExpressions, computedSchema)
         val aggregateResults = new GenericMutableRow(computedAggregates.length)
 
         var i = 0
@@ -152,7 +150,7 @@ case class Aggregate(
     } else {
       child.execute().mapPartitions { iter =>
         val hashTable = new HashMap[Row, Array[AggregateFunction]]
-        val groupingProjection = new MutableProjection(groupingExpressions, childOutput)
+        val groupingProjection = new InterpretedMutableProjection(groupingExpressions, childOutput)
 
         var currentRow: Row = null
         while (iter.hasNext) {
@@ -175,8 +173,9 @@ case class Aggregate(
           private[this] val hashTableIter = hashTable.entrySet().iterator()
           private[this] val aggregateResults = new GenericMutableRow(computedAggregates.length)
           private[this] val resultProjection =
-            new MutableProjection(resultExpressions, computedSchema ++ namedGroups.map(_._2))
-          private[this] val joinedRow = new JoinedRow
+            new InterpretedMutableProjection(
+              resultExpressions, computedSchema ++ namedGroups.map(_._2))
+          private[this] val joinedRow = new JoinedRow4
 
           override final def hasNext: Boolean = hashTableIter.hasNext
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
index 00010ef6e798a..cff7a012691dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -18,11 +18,12 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.{HashPartitioner, RangePartitioner, SparkConf}
+import org.apache.spark.shuffle.sort.SortShuffleManager
+import org.apache.spark.{SparkEnv, HashPartitioner, RangePartitioner, SparkConf}
 import org.apache.spark.rdd.ShuffledRDD
 import org.apache.spark.sql.{SQLContext, Row}
 import org.apache.spark.sql.catalyst.errors.attachTree
-import org.apache.spark.sql.catalyst.expressions.{NoBind, MutableProjection, RowOrdering}
+import org.apache.spark.sql.catalyst.expressions.RowOrdering
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.util.MutablePair
@@ -31,47 +32,66 @@ import org.apache.spark.util.MutablePair
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends UnaryNode with NoBind {
+case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends UnaryNode {
 
   override def outputPartitioning = newPartitioning
 
-  def output = child.output
+  override def output = child.output
 
-  def execute() = attachTree(this , "execute") {
+  /** We must copy rows when sort based shuffle is on */
+  protected def sortBasedShuffleOn = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager]
+
+  override def execute() = attachTree(this , "execute") {
     newPartitioning match {
       case HashPartitioning(expressions, numPartitions) =>
         // TODO: Eliminate redundant expressions in grouping key and value.
-        val rdd = child.execute().mapPartitions { iter =>
-          val hashExpressions = new MutableProjection(expressions, child.output)
-          val mutablePair = new MutablePair[Row, Row]()
-          iter.map(r => mutablePair.update(hashExpressions(r), r))
+        val rdd = if (sortBasedShuffleOn) {
+          child.execute().mapPartitions { iter =>
+            val hashExpressions = newMutableProjection(expressions, child.output)()
+            iter.map(r => (hashExpressions(r).copy(), r.copy()))
+          }
+        } else {
+          child.execute().mapPartitions { iter =>
+            val hashExpressions = newMutableProjection(expressions, child.output)()
+            val mutablePair = new MutablePair[Row, Row]()
+            iter.map(r => mutablePair.update(hashExpressions(r), r))
+          }
         }
         val part = new HashPartitioner(numPartitions)
-        val shuffled = new ShuffledRDD[Row, Row, Row, MutablePair[Row, Row]](rdd, part)
+        val shuffled = new ShuffledRDD[Row, Row, Row](rdd, part)
         shuffled.setSerializer(new SparkSqlSerializer(new SparkConf(false)))
         shuffled.map(_._2)
 
       case RangePartitioning(sortingExpressions, numPartitions) =>
+        val rdd = if (sortBasedShuffleOn) {
+          child.execute().mapPartitions { iter => iter.map(row => (row.copy(), null))}
+        } else {
+          child.execute().mapPartitions { iter =>
+            val mutablePair = new MutablePair[Row, Null](null, null)
+            iter.map(row => mutablePair.update(row, null))
+          }
+        }
+
         // TODO: RangePartitioner should take an Ordering.
         implicit val ordering = new RowOrdering(sortingExpressions, child.output)
 
-        val rdd = child.execute().mapPartitions { iter =>
-          val mutablePair = new MutablePair[Row, Null](null, null)
-          iter.map(row => mutablePair.update(row, null))
-        }
         val part = new RangePartitioner(numPartitions, rdd, ascending = true)
-        val shuffled = new ShuffledRDD[Row, Null, Null, MutablePair[Row, Null]](rdd, part)
+        val shuffled = new ShuffledRDD[Row, Null, Null](rdd, part)
         shuffled.setSerializer(new SparkSqlSerializer(new SparkConf(false)))
 
         shuffled.map(_._1)
 
       case SinglePartition =>
-        val rdd = child.execute().mapPartitions { iter =>
-          val mutablePair = new MutablePair[Null, Row]()
-          iter.map(r => mutablePair.update(null, r))
+        val rdd = if (sortBasedShuffleOn) {
+          child.execute().mapPartitions { iter => iter.map(r => (null, r.copy())) }
+        } else {
+          child.execute().mapPartitions { iter =>
+            val mutablePair = new MutablePair[Null, Row]()
+            iter.map(r => mutablePair.update(null, r))
+          }
         }
         val partitioner = new HashPartitioner(1)
-        val shuffled = new ShuffledRDD[Null, Row, Row, MutablePair[Null, Row]](rdd, partitioner)
+        val shuffled = new ShuffledRDD[Null, Row, Row](rdd, partitioner)
         shuffled.setSerializer(new SparkSqlSerializer(new SparkConf(false)))
         shuffled.map(_._2)
 
@@ -99,7 +119,7 @@ private[sql] case class AddExchange(sqlContext: SQLContext) extends Rule[SparkPl
         !operator.requiredChildDistribution.zip(operator.children).map {
           case (required, child) =>
             val valid = child.outputPartitioning.satisfies(required)
-            logger.debug(
+            logDebug(
               s"${if (valid) "Valid" else "Invalid"} distribution," +
                 s"required: $required current: ${child.outputPartitioning}")
             valid
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
new file mode 100644
index 0000000000000..ed6b95dc6d9d0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataType, StructType, Row, SQLContext}
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.ScalaReflection.Schema
+import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
+import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericMutableRow}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics}
+import org.apache.spark.sql.catalyst.types.UserDefinedType
+
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+object RDDConversions {
+  def productToRowRdd[A <: Product](data: RDD[A], schema: StructType): RDD[Row] = {
+    data.mapPartitions { iterator =>
+      if (iterator.isEmpty) {
+        Iterator.empty
+      } else {
+        val bufferedIterator = iterator.buffered
+        val mutableRow = new GenericMutableRow(bufferedIterator.head.productArity)
+        val schemaFields = schema.fields.toArray
+        bufferedIterator.map { r =>
+          var i = 0
+          while (i < mutableRow.length) {
+            mutableRow(i) =
+              ScalaReflection.convertToCatalyst(r.productElement(i), schemaFields(i).dataType)
+            i += 1
+          }
+
+          mutableRow
+        }
+      }
+    }
+  }
+}
+
+case class LogicalRDD(output: Seq[Attribute], rdd: RDD[Row])(sqlContext: SQLContext)
+  extends LogicalPlan with MultiInstanceRelation {
+
+  def children = Nil
+
+  def newInstance() =
+    LogicalRDD(output.map(_.newInstance()), rdd)(sqlContext).asInstanceOf[this.type]
+
+  override def sameResult(plan: LogicalPlan) = plan match {
+    case LogicalRDD(_, otherRDD) => rdd.id == otherRDD.id
+    case _ => false
+  }
+
+  @transient override lazy val statistics = Statistics(
+    // TODO: Instead of returning a default value here, find a way to return a meaningful size
+    // estimate for RDDs. See PR 1238 for more discussions.
+    sizeInBytes = BigInt(sqlContext.defaultSizeInBytes)
+  )
+}
+
+case class PhysicalRDD(output: Seq[Attribute], rdd: RDD[Row]) extends LeafNode {
+  override def execute() = rdd
+}
+
+@deprecated("Use LogicalRDD", "1.2.0")
+case class ExistingRdd(output: Seq[Attribute], rdd: RDD[Row]) extends LeafNode {
+  override def execute() = rdd
+}
+
+@deprecated("Use LogicalRDD", "1.2.0")
+case class SparkLogicalPlan(alreadyPlanned: SparkPlan)(@transient sqlContext: SQLContext)
+  extends LogicalPlan with MultiInstanceRelation {
+
+  def output = alreadyPlanned.output
+  override def children = Nil
+
+  override final def newInstance(): this.type = {
+    SparkLogicalPlan(
+      alreadyPlanned match {
+        case ExistingRdd(output, rdd) => ExistingRdd(output.map(_.newInstance()), rdd)
+        case _ => sys.error("Multiple instance of the same relation detected.")
+      })(sqlContext).asInstanceOf[this.type]
+  }
+
+  override def sameResult(plan: LogicalPlan) = plan match {
+    case SparkLogicalPlan(ExistingRdd(_, rdd)) =>
+      rdd.id == alreadyPlanned.asInstanceOf[ExistingRdd].rdd.id
+    case _ => false
+  }
+
+  @transient override lazy val statistics = Statistics(
+    // TODO: Instead of returning a default value here, find a way to return a meaningful size
+    // estimate for RDDs. See PR 1238 for more discussions.
+    sizeInBytes = BigInt(sqlContext.defaultSizeInBytes)
+  )
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
index 47b3d00262dbb..38877c28de3a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
@@ -39,7 +39,8 @@ case class Generate(
     child: SparkPlan)
   extends UnaryNode {
 
-  protected def generatorOutput: Seq[Attribute] = {
+  // This must be a val since the generator output expr ids are not preserved by serialization.
+  protected val generatorOutput: Seq[Attribute] = {
     if (join && outer) {
       generator.output.map(_.withNullability(true))
     } else {
@@ -47,23 +48,26 @@ case class Generate(
     }
   }
 
-  override def output =
+  // This must be a val since the generator output expr ids are not preserved by serialization.
+  override val output =
     if (join) child.output ++ generatorOutput else generatorOutput
 
+  val boundGenerator = BindReferences.bindReference(generator, child.output)
+
   override def execute() = {
     if (join) {
       child.execute().mapPartitions { iter =>
         val nullValues = Seq.fill(generator.output.size)(Literal(null))
         // Used to produce rows with no matches when outer = true.
         val outerProjection =
-          new Projection(child.output ++ nullValues, child.output)
+          newProjection(child.output ++ nullValues, child.output)
 
         val joinProjection =
-          new Projection(child.output ++ generator.output, child.output ++ generator.output)
+          newProjection(child.output ++ generatorOutput, child.output ++ generatorOutput)
         val joinedRow = new JoinedRow
 
         iter.flatMap {row =>
-          val outputRows = generator.eval(row)
+          val outputRows = boundGenerator.eval(row)
           if (outer && outputRows.isEmpty) {
             outerProjection(row) :: Nil
           } else {
@@ -72,7 +76,7 @@ case class Generate(
         }
       }
     } else {
-      child.execute().mapPartitions(iter => iter.flatMap(row => generator.eval(row)))
+      child.execute().mapPartitions(iter => iter.flatMap(row => boundGenerator.eval(row)))
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
new file mode 100644
index 0000000000000..087b0ecbb25c0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.trees._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.catalyst.types._
+
+case class AggregateEvaluation(
+    schema: Seq[Attribute],
+    initialValues: Seq[Expression],
+    update: Seq[Expression],
+    result: Expression)
+
+/**
+ * :: DeveloperApi ::
+ * Alternate version of aggregation that leverages projection and thus code generation.
+ * Aggregations are converted into a set of projections from a aggregation buffer tuple back onto
+ * itself. Currently only used for simple aggregations like SUM, COUNT, or AVERAGE are supported.
+ *
+ * @param partial if true then aggregation is done partially on local data without shuffling to
+ *                ensure all values where `groupingExpressions` are equal are present.
+ * @param groupingExpressions expressions that are evaluated to determine grouping.
+ * @param aggregateExpressions expressions that are computed for each group.
+ * @param child the input data source.
+ */
+@DeveloperApi
+case class GeneratedAggregate(
+    partial: Boolean,
+    groupingExpressions: Seq[Expression],
+    aggregateExpressions: Seq[NamedExpression],
+    child: SparkPlan)
+  extends UnaryNode {
+
+  override def requiredChildDistribution =
+    if (partial) {
+      UnspecifiedDistribution :: Nil
+    } else {
+      if (groupingExpressions == Nil) {
+        AllTuples :: Nil
+      } else {
+        ClusteredDistribution(groupingExpressions) :: Nil
+      }
+    }
+
+  override def output = aggregateExpressions.map(_.toAttribute)
+
+  override def execute() = {
+    val aggregatesToCompute = aggregateExpressions.flatMap { a =>
+      a.collect { case agg: AggregateExpression => agg}
+    }
+
+    val computeFunctions = aggregatesToCompute.map {
+      case c @ Count(expr) =>
+        // If we're evaluating UnscaledValue(x), we can do Count on x directly, since its
+        // UnscaledValue will be null if and only if x is null; helps with Average on decimals
+        val toCount = expr match {
+          case UnscaledValue(e) => e
+          case _ => expr
+        }
+        val currentCount = AttributeReference("currentCount", LongType, nullable = false)()
+        val initialValue = Literal(0L)
+        val updateFunction = If(IsNotNull(toCount), Add(currentCount, Literal(1L)), currentCount)
+        val result = currentCount
+
+        AggregateEvaluation(currentCount :: Nil, initialValue :: Nil, updateFunction :: Nil, result)
+
+      case Sum(expr) =>
+        val resultType = expr.dataType match {
+          case DecimalType.Fixed(precision, scale) =>
+            DecimalType(precision + 10, scale)
+          case _ =>
+            expr.dataType
+        }
+
+        val currentSum = AttributeReference("currentSum", resultType, nullable = false)()
+        val initialValue = Cast(Literal(0L), resultType)
+
+        // Coalasce avoids double calculation...
+        // but really, common sub expression elimination would be better....
+        val updateFunction = Coalesce(Add(expr, currentSum) :: currentSum :: Nil)
+        val result = currentSum
+
+        AggregateEvaluation(currentSum :: Nil, initialValue :: Nil, updateFunction :: Nil, result)
+
+      case a @ Average(expr) =>
+        val currentCount = AttributeReference("currentCount", LongType, nullable = false)()
+        val currentSum = AttributeReference("currentSum", expr.dataType, nullable = false)()
+        val initialCount = Literal(0L)
+        val initialSum = Cast(Literal(0L), expr.dataType)
+
+        // If we're evaluating UnscaledValue(x), we can do Count on x directly, since its
+        // UnscaledValue will be null if and only if x is null; helps with Average on decimals
+        val toCount = expr match {
+          case UnscaledValue(e) => e
+          case _ => expr
+        }
+
+        val updateCount = If(IsNotNull(toCount), Add(currentCount, Literal(1L)), currentCount)
+        val updateSum = Coalesce(Add(expr, currentSum) :: currentSum :: Nil)
+
+        val resultType = expr.dataType match {
+          case DecimalType.Fixed(precision, scale) =>
+            DecimalType(precision + 4, scale + 4)
+          case DecimalType.Unlimited =>
+            DecimalType.Unlimited
+          case _ =>
+            DoubleType
+        }
+        val result = Divide(Cast(currentSum, resultType), Cast(currentCount, resultType))
+
+        AggregateEvaluation(
+          currentCount :: currentSum :: Nil,
+          initialCount :: initialSum :: Nil,
+          updateCount :: updateSum :: Nil,
+          result
+        )
+
+      case m @ Max(expr) =>
+        val currentMax = AttributeReference("currentMax", expr.dataType, nullable = true)()
+        val initialValue = Literal(null, expr.dataType)
+        val updateMax = MaxOf(currentMax, expr)
+
+        AggregateEvaluation(
+          currentMax :: Nil,
+          initialValue :: Nil,
+          updateMax :: Nil,
+          currentMax)
+
+      case CollectHashSet(Seq(expr)) =>
+        val set = AttributeReference("hashSet", ArrayType(expr.dataType), nullable = false)()
+        val initialValue = NewSet(expr.dataType)
+        val addToSet = AddItemToSet(expr, set)
+
+        AggregateEvaluation(
+          set :: Nil,
+          initialValue :: Nil,
+          addToSet :: Nil,
+          set)
+
+      case CombineSetsAndCount(inputSet) =>
+        val ArrayType(inputType, _) = inputSet.dataType
+        val set = AttributeReference("hashSet", inputSet.dataType, nullable = false)()
+        val initialValue = NewSet(inputType)
+        val collectSets = CombineSets(set, inputSet)
+
+        AggregateEvaluation(
+          set :: Nil,
+          initialValue :: Nil,
+          collectSets :: Nil,
+          CountSet(set))
+    }
+
+    val computationSchema = computeFunctions.flatMap(_.schema)
+
+    val resultMap: Map[TreeNodeRef, Expression] =
+      aggregatesToCompute.zip(computeFunctions).map {
+        case (agg, func) => new TreeNodeRef(agg) -> func.result
+      }.toMap
+
+    val namedGroups = groupingExpressions.zipWithIndex.map {
+      case (ne: NamedExpression, _) => (ne, ne)
+      case (e, i) => (e, Alias(e, s"GroupingExpr$i")())
+    }
+
+    val groupMap: Map[Expression, Attribute] =
+      namedGroups.map { case (k, v) => k -> v.toAttribute}.toMap
+
+    // The set of expressions that produce the final output given the aggregation buffer and the
+    // grouping expressions.
+    val resultExpressions = aggregateExpressions.map(_.transform {
+      case e: Expression if resultMap.contains(new TreeNodeRef(e)) => resultMap(new TreeNodeRef(e))
+      case e: Expression if groupMap.contains(e) => groupMap(e)
+    })
+
+    child.execute().mapPartitions { iter =>
+      // Builds a new custom class for holding the results of aggregation for a group.
+      val initialValues = computeFunctions.flatMap(_.initialValues)
+      val newAggregationBuffer = newProjection(initialValues, child.output)
+      log.info(s"Initial values: ${initialValues.mkString(",")}")
+
+      // A projection that computes the group given an input tuple.
+      val groupProjection = newProjection(groupingExpressions, child.output)
+      log.info(s"Grouping Projection: ${groupingExpressions.mkString(",")}")
+
+      // A projection that is used to update the aggregate values for a group given a new tuple.
+      // This projection should be targeted at the current values for the group and then applied
+      // to a joined row of the current values with the new input row.
+      val updateExpressions = computeFunctions.flatMap(_.update)
+      val updateSchema = computeFunctions.flatMap(_.schema) ++ child.output
+      val updateProjection = newMutableProjection(updateExpressions, updateSchema)()
+      log.info(s"Update Expressions: ${updateExpressions.mkString(",")}")
+
+      // A projection that produces the final result, given a computation.
+      val resultProjectionBuilder =
+        newMutableProjection(
+          resultExpressions,
+          (namedGroups.map(_._2.toAttribute) ++ computationSchema).toSeq)
+      log.info(s"Result Projection: ${resultExpressions.mkString(",")}")
+
+      val joinedRow = new JoinedRow3
+
+      if (groupingExpressions.isEmpty) {
+        // TODO: Codegening anything other than the updateProjection is probably over kill.
+        val buffer = newAggregationBuffer(EmptyRow).asInstanceOf[MutableRow]
+        var currentRow: Row = null
+        updateProjection.target(buffer)
+
+        while (iter.hasNext) {
+          currentRow = iter.next()
+          updateProjection(joinedRow(buffer, currentRow))
+        }
+
+        val resultProjection = resultProjectionBuilder()
+        Iterator(resultProjection(buffer))
+      } else {
+        val buffers = new java.util.HashMap[Row, MutableRow]()
+
+        var currentRow: Row = null
+        while (iter.hasNext) {
+          currentRow = iter.next()
+          val currentGroup = groupProjection(currentRow)
+          var currentBuffer = buffers.get(currentGroup)
+          if (currentBuffer == null) {
+            currentBuffer = newAggregationBuffer(EmptyRow).asInstanceOf[MutableRow]
+            buffers.put(currentGroup, currentBuffer)
+          }
+          // Target the projection at the current aggregation buffer and then project the updated
+          // values.
+          updateProjection.target(currentBuffer)(joinedRow(currentBuffer, currentRow))
+        }
+
+        new Iterator[Row] {
+          private[this] val resultIterator = buffers.entrySet.iterator()
+          private[this] val resultProjection = resultProjectionBuilder()
+
+          def hasNext = resultIterator.hasNext
+
+          def next() = {
+            val currentGroup = resultIterator.next()
+            resultProjection(joinedRow(currentGroup.getKey, currentGroup.getValue))
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 27dc091b85812..81c60e00505c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -18,22 +18,53 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Logging, Row}
-import org.apache.spark.sql.catalyst.trees
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.{ScalaReflection, trees}
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
-import org.apache.spark.sql.catalyst.expressions.GenericRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.logical.BaseRelation
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical._
 
+
+object SparkPlan {
+  protected[sql] val currentContext = new ThreadLocal[SQLContext]()
+}
+
 /**
  * :: DeveloperApi ::
  */
 @DeveloperApi
-abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging {
+abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializable {
   self: Product =>
 
+  /**
+   * A handle to the SQL Context that was used to create this plan.   Since many operators need
+   * access to the sqlContext for RDD operations or configuration this field is automatically
+   * populated by the query planning infrastructure.
+   */
+  @transient
+  protected[spark] val sqlContext = SparkPlan.currentContext.get()
+
+  protected def sparkContext = sqlContext.sparkContext
+
+  // sqlContext will be null when we are being deserialized on the slaves.  In this instance
+  // the value of codegenEnabled will be set by the desserializer after the constructor has run.
+  val codegenEnabled: Boolean = if (sqlContext != null) {
+    sqlContext.codegenEnabled
+  } else {
+    false
+  }
+
+  /** Overridden make copy also propogates sqlContext to copied plan. */
+  override def makeCopy(newArgs: Array[AnyRef]): this.type = {
+    SparkPlan.currentContext.set(sqlContext)
+    super.makeCopy(newArgs)
+  }
+
   // TODO: Move to `DistributedPlan`
   /** Specifies how data is partitioned across different nodes in the cluster. */
   def outputPartitioning: Partitioning = UnknownPartitioning(0) // TODO: WRONG WIDTH!
@@ -49,37 +80,48 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging {
   /**
    * Runs this query returning the result as an array.
    */
-  def executeCollect(): Array[Row] = execute().map(_.copy()).collect()
+  def executeCollect(): Array[Row] =
+    execute().map(ScalaReflection.convertRowToScala(_, schema)).collect()
 
-  protected def buildRow(values: Seq[Any]): Row =
-    new GenericRow(values.toArray)
-}
+  protected def newProjection(
+      expressions: Seq[Expression], inputSchema: Seq[Attribute]): Projection = {
+    log.debug(
+      s"Creating Projection: $expressions, inputSchema: $inputSchema, codegen:$codegenEnabled")
+    if (codegenEnabled) {
+      GenerateProjection(expressions, inputSchema)
+    } else {
+      new InterpretedProjection(expressions, inputSchema)
+    }
+  }
 
-/**
- * :: DeveloperApi ::
- * Allows already planned SparkQueries to be linked into logical query plans.
- *
- * Note that in general it is not valid to use this class to link multiple copies of the same
- * physical operator into the same query plan as this violates the uniqueness of expression ids.
- * Special handling exists for ExistingRdd as these are already leaf operators and thus we can just
- * replace the output attributes with new copies of themselves without breaking any attribute
- * linking.
- */
-@DeveloperApi
-case class SparkLogicalPlan(alreadyPlanned: SparkPlan, tableName: String = "SparkLogicalPlan")
-  extends BaseRelation with MultiInstanceRelation {
-
-  def output = alreadyPlanned.output
-  override def references = Set.empty
-  override def children = Nil
-
-  override final def newInstance: this.type = {
-    SparkLogicalPlan(
-      alreadyPlanned match {
-        case ExistingRdd(output, rdd) => ExistingRdd(output.map(_.newInstance), rdd)
-        case _ => sys.error("Multiple instance of the same relation detected.")
-      }, tableName)
-      .asInstanceOf[this.type]
+  protected def newMutableProjection(
+      expressions: Seq[Expression],
+      inputSchema: Seq[Attribute]): () => MutableProjection = {
+    log.debug(
+      s"Creating MutableProj: $expressions, inputSchema: $inputSchema, codegen:$codegenEnabled")
+    if(codegenEnabled) {
+      GenerateMutableProjection(expressions, inputSchema)
+    } else {
+      () => new InterpretedMutableProjection(expressions, inputSchema)
+    }
+  }
+
+
+  protected def newPredicate(
+      expression: Expression, inputSchema: Seq[Attribute]): (Row) => Boolean = {
+    if (codegenEnabled) {
+      GeneratePredicate(expression, inputSchema)
+    } else {
+      InterpretedPredicate(expression, inputSchema)
+    }
+  }
+
+  protected def newOrdering(order: Seq[SortOrder], inputSchema: Seq[Attribute]): Ordering[Row] = {
+    if (codegenEnabled) {
+      GenerateOrdering(order, inputSchema)
+    } else {
+      new RowOrdering(order, inputSchema)
+    }
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
index 34b355e906695..84d96e612f0dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
@@ -24,13 +24,18 @@ import scala.reflect.ClassTag
 import com.clearspring.analytics.stream.cardinality.HyperLogLog
 import com.esotericsoftware.kryo.io.{Input, Output}
 import com.esotericsoftware.kryo.{Serializer, Kryo}
-import com.twitter.chill.AllScalaRegistrar
+import com.twitter.chill.{AllScalaRegistrar, ResourcePool}
 
 import org.apache.spark.{SparkEnv, SparkConf}
-import org.apache.spark.serializer.KryoSerializer
+import org.apache.spark.serializer.{SerializerInstance, KryoSerializer}
+import org.apache.spark.sql.catalyst.expressions.GenericRow
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+import org.apache.spark.util.collection.OpenHashSet
 import org.apache.spark.util.MutablePair
 import org.apache.spark.util.Utils
 
+import org.apache.spark.sql.catalyst.expressions.codegen.{IntegerHashSet, LongHashSet}
+
 private[sql] class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(conf) {
   override def newKryo(): Kryo = {
     val kryo = new Kryo()
@@ -41,6 +46,14 @@ private[sql] class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(co
     kryo.register(classOf[com.clearspring.analytics.stream.cardinality.HyperLogLog],
                   new HyperLogLogSerializer)
     kryo.register(classOf[scala.math.BigDecimal], new BigDecimalSerializer)
+
+    // Specific hashsets must come first TODO: Move to core.
+    kryo.register(classOf[IntegerHashSet], new IntegerHashSetSerializer)
+    kryo.register(classOf[LongHashSet], new LongHashSetSerializer)
+    kryo.register(classOf[org.apache.spark.util.collection.OpenHashSet[_]],
+                  new OpenHashSetSerializer)
+    kryo.register(classOf[Decimal])
+
     kryo.setReferences(false)
     kryo.setClassLoader(Utils.getSparkClassLoader)
     new AllScalaRegistrar().apply(kryo)
@@ -48,22 +61,41 @@ private[sql] class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(co
   }
 }
 
-private[sql] object SparkSqlSerializer {
-  // TODO (lian) Using KryoSerializer here is workaround, needs further investigation
-  // Using SparkSqlSerializer here makes BasicQuerySuite to fail because of Kryo serialization
-  // related error.
-  @transient lazy val ser: KryoSerializer = {
+private[execution] class KryoResourcePool(size: Int)
+    extends ResourcePool[SerializerInstance](size) {
+
+  val ser: KryoSerializer = {
     val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
+    // TODO (lian) Using KryoSerializer here is workaround, needs further investigation
+    // Using SparkSqlSerializer here makes BasicQuerySuite to fail because of Kryo serialization
+    // related error.
     new KryoSerializer(sparkConf)
   }
 
-  def serialize[T: ClassTag](o: T): Array[Byte] = {
-    ser.newInstance().serialize(o).array()
-  }
+  def newInstance() = ser.newInstance()
+}
 
-  def deserialize[T: ClassTag](bytes: Array[Byte]): T  = {
-    ser.newInstance().deserialize[T](ByteBuffer.wrap(bytes))
+private[sql] object SparkSqlSerializer {
+  @transient lazy val resourcePool = new KryoResourcePool(30)
+
+  private[this] def acquireRelease[O](fn: SerializerInstance => O): O = {
+    val kryo = resourcePool.borrow
+    try {
+      fn(kryo)
+    } finally {
+      resourcePool.release(kryo)
+    }
   }
+
+  def serialize[T: ClassTag](o: T): Array[Byte] =
+    acquireRelease { k =>
+      k.serialize(o).array()
+    }
+
+  def deserialize[T: ClassTag](bytes: Array[Byte]): T =
+    acquireRelease { k =>
+      k.deserialize[T](ByteBuffer.wrap(bytes))
+    }
 }
 
 private[sql] class BigDecimalSerializer extends Serializer[BigDecimal] {
@@ -90,3 +122,78 @@ private[sql] class HyperLogLogSerializer extends Serializer[HyperLogLog] {
     HyperLogLog.Builder.build(bytes)
   }
 }
+
+private[sql] class OpenHashSetSerializer extends Serializer[OpenHashSet[_]] {
+  def write(kryo: Kryo, output: Output, hs: OpenHashSet[_]) {
+    val rowSerializer = kryo.getDefaultSerializer(classOf[Array[Any]]).asInstanceOf[Serializer[Any]]
+    output.writeInt(hs.size)
+    val iterator = hs.iterator
+    while(iterator.hasNext) {
+      val row = iterator.next()
+      rowSerializer.write(kryo, output, row.asInstanceOf[GenericRow].values)
+    }
+  }
+
+  def read(kryo: Kryo, input: Input, tpe: Class[OpenHashSet[_]]): OpenHashSet[_] = {
+    val rowSerializer = kryo.getDefaultSerializer(classOf[Array[Any]]).asInstanceOf[Serializer[Any]]
+    val numItems = input.readInt()
+    val set = new OpenHashSet[Any](numItems + 1)
+    var i = 0
+    while (i < numItems) {
+      val row =
+        new GenericRow(rowSerializer.read(
+          kryo,
+          input,
+          classOf[Array[Any]].asInstanceOf[Class[Any]]).asInstanceOf[Array[Any]])
+      set.add(row)
+      i += 1
+    }
+    set
+  }
+}
+
+private[sql] class IntegerHashSetSerializer extends Serializer[IntegerHashSet] {
+  def write(kryo: Kryo, output: Output, hs: IntegerHashSet) {
+    output.writeInt(hs.size)
+    val iterator = hs.iterator
+    while(iterator.hasNext) {
+      val value: Int = iterator.next()
+      output.writeInt(value)
+    }
+  }
+
+  def read(kryo: Kryo, input: Input, tpe: Class[IntegerHashSet]): IntegerHashSet = {
+    val numItems = input.readInt()
+    val set = new IntegerHashSet
+    var i = 0
+    while (i < numItems) {
+      val value = input.readInt()
+      set.add(value)
+      i += 1
+    }
+    set
+  }
+}
+
+private[sql] class LongHashSetSerializer extends Serializer[LongHashSet] {
+  def write(kryo: Kryo, output: Output, hs: LongHashSet) {
+    output.writeInt(hs.size)
+    val iterator = hs.iterator
+    while(iterator.hasNext) {
+      val value = iterator.next()
+      output.writeLong(value)
+    }
+  }
+
+  def read(kryo: Kryo, input: Input, tpe: Class[LongHashSet]): LongHashSet = {
+    val numItems = input.readInt()
+    val set = new LongHashSet
+    var i = 0
+    while (i < numItems) {
+      val value = input.readLong()
+      set.add(value)
+      i += 1
+    }
+    set
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 7080074a69c07..7ef1f9f2c5c02 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -17,15 +17,17 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.{SQLContext, execution}
+import org.apache.spark.sql.{SQLContext, Strategy, execution}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{BaseRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.columnar.{InMemoryRelation, InMemoryColumnarTableScan}
 import org.apache.spark.sql.parquet._
 
+
 private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   self: SQLContext#SparkPlanner =>
 
@@ -33,13 +35,12 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       // Find left semi joins where at least some predicates can be evaluated by matching join keys
       case ExtractEquiJoinKeys(LeftSemi, leftKeys, rightKeys, condition, left, right) =>
-        val semiJoin = execution.LeftSemiJoinHash(
+        val semiJoin = joins.LeftSemiJoinHash(
           leftKeys, rightKeys, planLater(left), planLater(right))
         condition.map(Filter(_, semiJoin)).getOrElse(semiJoin) :: Nil
       // no predicate can be evaluated by matching hash keys
       case logical.Join(left, right, LeftSemi, condition) =>
-        execution.LeftSemiJoinBNL(
-          planLater(left), planLater(right), condition)(sqlContext) :: Nil
+        joins.LeftSemiJoinBNL(planLater(left), planLater(right), condition) :: Nil
       case _ => Nil
     }
   }
@@ -47,112 +48,128 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   /**
    * Uses the ExtractEquiJoinKeys pattern to find joins where at least some of the predicates can be
    * evaluated by matching hash keys.
+   *
+   * This strategy applies a simple optimization based on the estimates of the physical sizes of
+   * the two join sides.  When planning a [[joins.BroadcastHashJoin]], if one side has an
+   * estimated physical size smaller than the user-settable threshold
+   * [[org.apache.spark.sql.SQLConf.AUTO_BROADCASTJOIN_THRESHOLD]], the planner would mark it as the
+   * ''build'' relation and mark the other relation as the ''stream'' side.  The build table will be
+   * ''broadcasted'' to all of the executors involved in the join, as a
+   * [[org.apache.spark.broadcast.Broadcast]] object.  If both estimates exceed the threshold, they
+   * will instead be used to decide the build side in a [[joins.ShuffledHashJoin]].
    */
   object HashJoin extends Strategy with PredicateHelper {
-    private[this] def broadcastHashJoin(
+
+    private[this] def makeBroadcastHashJoin(
         leftKeys: Seq[Expression],
         rightKeys: Seq[Expression],
         left: LogicalPlan,
         right: LogicalPlan,
         condition: Option[Expression],
-        side: BuildSide) = {
-      val broadcastHashJoin = execution.BroadcastHashJoin(
-        leftKeys, rightKeys, side, planLater(left), planLater(right))(sqlContext)
+        side: joins.BuildSide) = {
+      val broadcastHashJoin = execution.joins.BroadcastHashJoin(
+        leftKeys, rightKeys, side, planLater(left), planLater(right))
       condition.map(Filter(_, broadcastHashJoin)).getOrElse(broadcastHashJoin) :: Nil
     }
 
-    def broadcastTables: Seq[String] = sqlContext.joinBroadcastTables.split(",").toBuffer
-
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case ExtractEquiJoinKeys(
-              Inner,
-              leftKeys,
-              rightKeys,
-              condition,
-              left,
-              right @ PhysicalOperation(_, _, b: BaseRelation))
-        if broadcastTables.contains(b.tableName) =>
-          broadcastHashJoin(leftKeys, rightKeys, left, right, condition, BuildRight)
+      case ExtractEquiJoinKeys(Inner, leftKeys, rightKeys, condition, left, right)
+        if sqlContext.autoBroadcastJoinThreshold > 0 &&
+           right.statistics.sizeInBytes <= sqlContext.autoBroadcastJoinThreshold =>
+        makeBroadcastHashJoin(leftKeys, rightKeys, left, right, condition, joins.BuildRight)
 
-      case ExtractEquiJoinKeys(
-              Inner,
-              leftKeys,
-              rightKeys,
-              condition,
-              left @ PhysicalOperation(_, _, b: BaseRelation),
-              right)
-        if broadcastTables.contains(b.tableName) =>
-          broadcastHashJoin(leftKeys, rightKeys, left, right, condition, BuildLeft)
+      case ExtractEquiJoinKeys(Inner, leftKeys, rightKeys, condition, left, right)
+        if sqlContext.autoBroadcastJoinThreshold > 0 &&
+           left.statistics.sizeInBytes <= sqlContext.autoBroadcastJoinThreshold =>
+          makeBroadcastHashJoin(leftKeys, rightKeys, left, right, condition, joins.BuildLeft)
 
       case ExtractEquiJoinKeys(Inner, leftKeys, rightKeys, condition, left, right) =>
-        val hashJoin =
-          execution.ShuffledHashJoin(
-            leftKeys, rightKeys, BuildRight, planLater(left), planLater(right))
+        val buildSide =
+          if (right.statistics.sizeInBytes <= left.statistics.sizeInBytes) {
+            joins.BuildRight
+          } else {
+            joins.BuildLeft
+          }
+        val hashJoin = joins.ShuffledHashJoin(
+          leftKeys, rightKeys, buildSide, planLater(left), planLater(right))
         condition.map(Filter(_, hashJoin)).getOrElse(hashJoin) :: Nil
 
+      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right) =>
+        joins.HashOuterJoin(
+          leftKeys, rightKeys, joinType, condition, planLater(left), planLater(right)) :: Nil
+
       case _ => Nil
     }
   }
 
-  object PartialAggregation extends Strategy {
+  object HashAggregation extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case logical.Aggregate(groupingExpressions, aggregateExpressions, child) =>
-        // Collect all aggregate expressions.
-        val allAggregates =
-          aggregateExpressions.flatMap(_ collect { case a: AggregateExpression => a })
-        // Collect all aggregate expressions that can be computed partially.
-        val partialAggregates =
-          aggregateExpressions.flatMap(_ collect { case p: PartialAggregate => p })
-
-        // Only do partial aggregation if supported by all aggregate expressions.
-        if (allAggregates.size == partialAggregates.size) {
-          // Create a map of expressions to their partial evaluations for all aggregate expressions.
-          val partialEvaluations: Map[Long, SplitEvaluation] =
-            partialAggregates.map(a => (a.id, a.asPartial)).toMap
+      // Aggregations that can be performed in two phases, before and after the shuffle.
 
-          // We need to pass all grouping expressions though so the grouping can happen a second
-          // time. However some of them might be unnamed so we alias them allowing them to be
-          // referenced in the second aggregation.
-          val namedGroupingExpressions: Map[Expression, NamedExpression] = groupingExpressions.map {
-            case n: NamedExpression => (n, n)
-            case other => (other, Alias(other, "PartialGroup")())
-          }.toMap
-
-          // Replace aggregations with a new expression that computes the result from the already
-          // computed partial evaluations and grouping values.
-          val rewrittenAggregateExpressions = aggregateExpressions.map(_.transformUp {
-            case e: Expression if partialEvaluations.contains(e.id) =>
-              partialEvaluations(e.id).finalEvaluation
-            case e: Expression if namedGroupingExpressions.contains(e) =>
-              namedGroupingExpressions(e).toAttribute
-          }).asInstanceOf[Seq[NamedExpression]]
-
-          val partialComputation =
-            (namedGroupingExpressions.values ++
-             partialEvaluations.values.flatMap(_.partialEvaluations)).toSeq
-
-          // Construct two phased aggregation.
-          execution.Aggregate(
+      // Cases where all aggregates can be codegened.
+      case PartialAggregation(
+             namedGroupingAttributes,
+             rewrittenAggregateExpressions,
+             groupingExpressions,
+             partialComputation,
+             child)
+             if canBeCodeGened(
+                  allAggregates(partialComputation) ++
+                  allAggregates(rewrittenAggregateExpressions)) &&
+               codegenEnabled =>
+          execution.GeneratedAggregate(
             partial = false,
-            namedGroupingExpressions.values.map(_.toAttribute).toSeq,
+            namedGroupingAttributes,
             rewrittenAggregateExpressions,
-            execution.Aggregate(
+            execution.GeneratedAggregate(
               partial = true,
               groupingExpressions,
               partialComputation,
-              planLater(child))(sqlContext))(sqlContext) :: Nil
-        } else {
-          Nil
-        }
+              planLater(child))) :: Nil
+
+      // Cases where some aggregate can not be codegened
+      case PartialAggregation(
+             namedGroupingAttributes,
+             rewrittenAggregateExpressions,
+             groupingExpressions,
+             partialComputation,
+             child) =>
+        execution.Aggregate(
+          partial = false,
+          namedGroupingAttributes,
+          rewrittenAggregateExpressions,
+          execution.Aggregate(
+            partial = true,
+            groupingExpressions,
+            partialComputation,
+            planLater(child))) :: Nil
+
       case _ => Nil
     }
+
+    def canBeCodeGened(aggs: Seq[AggregateExpression]) = !aggs.exists {
+      case _: Sum | _: Count | _: Max | _: CombineSetsAndCount => false
+      // The generated set implementation is pretty limited ATM.
+      case CollectHashSet(exprs) if exprs.size == 1  &&
+           Seq(IntegerType, LongType).contains(exprs.head.dataType) => false
+      case _ => true
+    }
+
+    def allAggregates(exprs: Seq[Expression]) =
+      exprs.flatMap(_.collect { case a: AggregateExpression => a })
   }
 
   object BroadcastNestedLoopJoin extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.Join(left, right, joinType, condition) =>
-        execution.BroadcastNestedLoopJoin(
-          planLater(left), planLater(right), joinType, condition)(sqlContext) :: Nil
+        val buildSide =
+          if (right.statistics.sizeInBytes <= left.statistics.sizeInBytes) {
+            joins.BuildRight
+          } else {
+            joins.BuildLeft
+          }
+        joins.BroadcastNestedLoopJoin(
+          planLater(left), planLater(right), buildSide, joinType, condition) :: Nil
       case _ => Nil
     }
   }
@@ -160,10 +177,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   object CartesianProduct extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.Join(left, right, _, None) =>
-        execution.CartesianProduct(planLater(left), planLater(right)) :: Nil
+        execution.joins.CartesianProduct(planLater(left), planLater(right)) :: Nil
       case logical.Join(left, right, Inner, Some(condition)) =>
         execution.Filter(condition,
-          execution.CartesianProduct(planLater(left), planLater(right))) :: Nil
+          execution.joins.CartesianProduct(planLater(left), planLater(right))) :: Nil
       case _ => Nil
     }
   }
@@ -171,16 +188,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   protected lazy val singleRowRdd =
     sparkContext.parallelize(Seq(new GenericRow(Array[Any]()): Row), 1)
 
-  def convertToCatalyst(a: Any): Any = a match {
-    case s: Seq[Any] => s.map(convertToCatalyst)
-    case p: Product => new GenericRow(p.productIterator.map(convertToCatalyst).toArray)
-    case other => other
-  }
-
   object TakeOrdered extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.Limit(IntegerLiteral(limit), logical.Sort(order, child)) =>
-        execution.TakeOrdered(limit, order, planLater(child))(sqlContext) :: Nil
+        execution.TakeOrdered(limit, order, planLater(child)) :: Nil
       case _ => Nil
     }
   }
@@ -190,14 +201,14 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       // TODO: need to support writing to other types of files.  Unify the below code paths.
       case logical.WriteToFile(path, child) =>
         val relation =
-          ParquetRelation.create(path, child, sparkContext.hadoopConfiguration)
+          ParquetRelation.create(path, child, sparkContext.hadoopConfiguration, sqlContext)
         // Note: overwrite=false because otherwise the metadata we just created will be deleted
-        InsertIntoParquetTable(relation, planLater(child), overwrite=false)(sqlContext) :: Nil
+        InsertIntoParquetTable(relation, planLater(child), overwrite = false) :: Nil
       case logical.InsertIntoTable(table: ParquetRelation, partition, child, overwrite) =>
-        InsertIntoParquetTable(table, planLater(child), overwrite)(sqlContext) :: Nil
+        InsertIntoParquetTable(table, planLater(child), overwrite) :: Nil
       case PhysicalOperation(projectList, filters: Seq[Expression], relation: ParquetRelation) =>
         val prunePushedDownFilters =
-          if (sparkContext.conf.getBoolean(ParquetFilters.PARQUET_FILTER_PUSHDOWN_ENABLED, true)) {
+          if (sqlContext.parquetFilterPushDown) {
             (filters: Seq[Expression]) => {
               filters.filter { filter =>
                 // Note: filters cannot be pushed down to Parquet if they contain more complex
@@ -223,7 +234,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           projectList,
           filters,
           prunePushedDownFilters,
-          ParquetTableScan(_, relation, filters)(sqlContext)) :: Nil
+          ParquetTableScan(
+            _,
+            relation,
+            if (sqlContext.parquetFilterPushDown) filters else Nil)) :: Nil
 
       case _ => Nil
     }
@@ -235,8 +249,8 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         pruneFilterProject(
           projectList,
           filters,
-          identity[Seq[Expression]], // No filters are pushed down.
-          InMemoryColumnarTableScan(_, mem)) :: Nil
+          identity[Seq[Expression]], // All filters still need to be evaluated.
+          InMemoryColumnarTableScan(_,  filters, mem)) :: Nil
       case _ => Nil
     }
   }
@@ -247,11 +261,14 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.Distinct(child) =>
-        execution.Aggregate(
-          partial = false, child.output, child.output, planLater(child))(sqlContext) :: Nil
+        execution.Distinct(partial = false,
+          execution.Distinct(partial = true, planLater(child))) :: Nil
+
+      case logical.Sort(sortExprs, child) if sqlContext.externalSortEnabled =>
+        execution.ExternalSort(sortExprs, global = true, planLater(child)):: Nil
       case logical.Sort(sortExprs, child) =>
-        // This sort is a global sort. Its requiredDistribution will be an OrderedDistribution.
         execution.Sort(sortExprs, global = true, planLater(child)):: Nil
+
       case logical.SortPartitions(sortExprs, child) =>
         // This sort only sorts tuples within a partition. Its requiredDistribution will be
         // an UnspecifiedDistribution.
@@ -261,41 +278,48 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case logical.Filter(condition, child) =>
         execution.Filter(condition, planLater(child)) :: Nil
       case logical.Aggregate(group, agg, child) =>
-        execution.Aggregate(partial = false, group, agg, planLater(child))(sqlContext) :: Nil
+        execution.Aggregate(partial = false, group, agg, planLater(child)) :: Nil
       case logical.Sample(fraction, withReplacement, seed, child) =>
         execution.Sample(fraction, withReplacement, seed, planLater(child)) :: Nil
+      case SparkLogicalPlan(alreadyPlanned) => alreadyPlanned :: Nil
       case logical.LocalRelation(output, data) =>
-        val dataAsRdd =
-          sparkContext.parallelize(data.map(r =>
-            new GenericRow(r.productIterator.map(convertToCatalyst).toArray): Row))
-        execution.ExistingRdd(output, dataAsRdd) :: Nil
+        val nPartitions = if (data.isEmpty) 1 else numPartitions
+        PhysicalRDD(
+          output,
+          RDDConversions.productToRowRdd(sparkContext.parallelize(data, nPartitions),
+            StructType.fromAttributes(output))) :: Nil
       case logical.Limit(IntegerLiteral(limit), child) =>
-        execution.Limit(limit, planLater(child))(sqlContext) :: Nil
+        execution.Limit(limit, planLater(child)) :: Nil
       case Unions(unionChildren) =>
-        execution.Union(unionChildren.map(planLater))(sqlContext) :: Nil
-      case logical.Except(left,right) =>                                        
-        execution.Except(planLater(left),planLater(right)) :: Nil   
+        execution.Union(unionChildren.map(planLater)) :: Nil
+      case logical.Except(left, right) =>
+        execution.Except(planLater(left), planLater(right)) :: Nil
       case logical.Intersect(left, right) =>
         execution.Intersect(planLater(left), planLater(right)) :: Nil
       case logical.Generate(generator, join, outer, _, child) =>
         execution.Generate(generator, join = join, outer = outer, planLater(child)) :: Nil
       case logical.NoRelation =>
-        execution.ExistingRdd(Nil, singleRowRdd) :: Nil
+        execution.PhysicalRDD(Nil, singleRowRdd) :: Nil
       case logical.Repartition(expressions, child) =>
         execution.Exchange(HashPartitioning(expressions, numPartitions), planLater(child)) :: Nil
-      case SparkLogicalPlan(existingPlan, _) => existingPlan :: Nil
+      case e @ EvaluatePython(udf, child, _) =>
+        BatchPythonEvaluation(udf, e.output, planLater(child)) :: Nil
+      case LogicalRDD(output, rdd) => PhysicalRDD(output, rdd) :: Nil
       case _ => Nil
     }
   }
 
   case class CommandStrategy(context: SQLContext) extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case logical.SetCommand(key, value) =>
-        Seq(execution.SetCommand(key, value, plan.output)(context))
-      case logical.ExplainCommand(logicalPlan) =>
-        Seq(execution.ExplainCommand(logicalPlan, plan.output)(context))
-      case logical.CacheCommand(tableName, cache) =>
-        Seq(execution.CacheCommand(tableName, cache)(context))
+      case r: RunnableCommand => ExecutedCommand(r) :: Nil
+      case logical.SetCommand(kv) =>
+        Seq(execution.SetCommand(kv, plan.output)(context))
+      case logical.ExplainCommand(logicalPlan, extended) =>
+        Seq(execution.ExplainCommand(logicalPlan, plan.output, extended)(context))
+      case logical.CacheTableCommand(tableName, optPlan, isLazy) =>
+        Seq(execution.CacheTableCommand(tableName, optPlan, isLazy))
+      case logical.UncacheTableCommand(tableName) =>
+        Seq(execution.UncacheTableCommand(tableName))
       case _ => Nil
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
index 97abd636ab5fb..e53723c176569 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -20,15 +20,16 @@ package org.apache.spark.sql.execution
 import scala.collection.mutable.ArrayBuffer
 import scala.reflect.runtime.universe.TypeTag
 
+import org.apache.spark.{SparkEnv, HashPartitioner, SparkConf}
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.{HashPartitioner, SparkConf}
 import org.apache.spark.rdd.{RDD, ShuffledRDD}
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.physical.{OrderedDistribution, UnspecifiedDistribution}
+import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, OrderedDistribution, SinglePartition, UnspecifiedDistribution}
 import org.apache.spark.util.MutablePair
+import org.apache.spark.util.collection.ExternalSorter
 
 /**
  * :: DeveloperApi ::
@@ -37,9 +38,11 @@ import org.apache.spark.util.MutablePair
 case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends UnaryNode {
   override def output = projectList.map(_.toAttribute)
 
-  override def execute() = child.execute().mapPartitions { iter =>
-    @transient val reusableProjection = new MutableProjection(projectList)
-    iter.map(reusableProjection)
+  @transient lazy val buildProjection = newMutableProjection(projectList, child.output)
+
+  def execute() = child.execute().mapPartitions { iter =>
+    val resuableProjection = buildProjection()
+    iter.map(resuableProjection)
   }
 }
 
@@ -50,8 +53,10 @@ case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends
 case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode {
   override def output = child.output
 
-  override def execute() = child.execute().mapPartitions { iter =>
-    iter.filter(condition.eval(_).asInstanceOf[Boolean])
+  @transient lazy val conditionEvaluator = newPredicate(condition, child.output)
+
+  def execute() = child.execute().mapPartitions { iter =>
+    iter.filter(conditionEvaluator)
   }
 }
 
@@ -72,12 +77,10 @@ case class Sample(fraction: Double, withReplacement: Boolean, seed: Long, child:
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class Union(children: Seq[SparkPlan])(@transient sqlContext: SQLContext) extends SparkPlan {
+case class Union(children: Seq[SparkPlan]) extends SparkPlan {
   // TODO: attributes output by union should be distinct for nullability purposes
   override def output = children.head.output
-  override def execute() = sqlContext.sparkContext.union(children.map(_.execute()))
-
-  override def otherCopyArgs = sqlContext :: Nil
+  override def execute() = sparkContext.union(children.map(_.execute()))
 }
 
 /**
@@ -89,14 +92,16 @@ case class Union(children: Seq[SparkPlan])(@transient sqlContext: SQLContext) ex
  * repartition all the data to a single partition to compute the global limit.
  */
 @DeveloperApi
-case class Limit(limit: Int, child: SparkPlan)(@transient sqlContext: SQLContext)
+case class Limit(limit: Int, child: SparkPlan)
   extends UnaryNode {
   // TODO: Implement a partition local limit, and use a strategy to generate the proper limit plan:
   // partition local limit -> exchange into one partition -> partition local limit again
 
-  override def otherCopyArgs = sqlContext :: Nil
+  /** We must copy rows when sort based shuffle is on */
+  private def sortBasedShuffleOn = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager]
 
   override def output = child.output
+  override def outputPartitioning = SinglePartition
 
   /**
    * A custom implementation modeled after the take function on RDDs but which never runs any job
@@ -139,16 +144,22 @@ case class Limit(limit: Int, child: SparkPlan)(@transient sqlContext: SQLContext
       partsScanned += numPartsToTry
     }
 
-    buf.toArray
+    buf.toArray.map(ScalaReflection.convertRowToScala(_, this.schema))
   }
 
   override def execute() = {
-    val rdd = child.execute().mapPartitions { iter =>
-      val mutablePair = new MutablePair[Boolean, Row]()
-      iter.take(limit).map(row => mutablePair.update(false, row))
+    val rdd: RDD[_ <: Product2[Boolean, Row]] = if (sortBasedShuffleOn) {
+      child.execute().mapPartitions { iter =>
+        iter.take(limit).map(row => (false, row.copy()))
+      }
+    } else {
+      child.execute().mapPartitions { iter =>
+        val mutablePair = new MutablePair[Boolean, Row]()
+        iter.take(limit).map(row => mutablePair.update(false, row))
+      }
     }
     val part = new HashPartitioner(1)
-    val shuffled = new ShuffledRDD[Boolean, Row, Row, MutablePair[Boolean, Row]](rdd, part)
+    val shuffled = new ShuffledRDD[Boolean, Row, Row](rdd, part)
     shuffled.setSerializer(new SparkSqlSerializer(new SparkConf(false)))
     shuffled.mapPartitions(_.take(limit).map(_._2))
   }
@@ -161,24 +172,27 @@ case class Limit(limit: Int, child: SparkPlan)(@transient sqlContext: SQLContext
  * Spark's top operator does the opposite in ordering so we name it TakeOrdered to avoid confusion.
  */
 @DeveloperApi
-case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
-                      (@transient sqlContext: SQLContext) extends UnaryNode {
-  override def otherCopyArgs = sqlContext :: Nil
+case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan) extends UnaryNode {
 
   override def output = child.output
+  override def outputPartitioning = SinglePartition
 
-  @transient
-  lazy val ordering = new RowOrdering(sortOrder)
+  val ord = new RowOrdering(sortOrder, child.output)
 
-  override def executeCollect() = child.execute().map(_.copy()).takeOrdered(limit)(ordering)
+  // TODO: Is this copying for no reason?
+  override def executeCollect() = child.execute().map(_.copy()).takeOrdered(limit)(ord)
+    .map(ScalaReflection.convertRowToScala(_, this.schema))
 
   // TODO: Terminal split should be implemented differently from non-terminal split.
   // TODO: Pick num splits based on |limit|.
-  override def execute() = sqlContext.sparkContext.makeRDD(executeCollect(), 1)
+  override def execute() = sparkContext.makeRDD(executeCollect(), 1)
 }
 
 /**
  * :: DeveloperApi ::
+ * Performs a sort on-heap.
+ * @param global when true performs a global sort of all partitions by shuffling the data first
+ *               if necessary.
  */
 @DeveloperApi
 case class Sort(
@@ -189,15 +203,11 @@ case class Sort(
   override def requiredChildDistribution =
     if (global) OrderedDistribution(sortOrder) :: Nil else UnspecifiedDistribution :: Nil
 
-  @transient
-  lazy val ordering = new RowOrdering(sortOrder)
-
   override def execute() = attachTree(this, "sort") {
-    // TODO: Optimize sorting operation?
-    child.execute()
-      .mapPartitions(
-        iterator => iterator.map(_.copy()).toArray.sorted(ordering).iterator,
-        preservesPartitioning = true)
+    child.execute().mapPartitions( { iterator =>
+      val ordering = newOrdering(sortOrder, child.output)
+      iterator.map(_.copy()).toArray.sorted(ordering).iterator
+    }, preservesPartitioning = true)
   }
 
   override def output = child.output
@@ -205,50 +215,63 @@ case class Sort(
 
 /**
  * :: DeveloperApi ::
+ * Performs a sort, spilling to disk as needed.
+ * @param global when true performs a global sort of all partitions by shuffling the data first
+ *               if necessary.
  */
 @DeveloperApi
-object ExistingRdd {
-  def convertToCatalyst(a: Any): Any = a match {
-    case o: Option[_] => o.orNull
-    case s: Seq[Any] => s.map(convertToCatalyst)
-    case p: Product => new GenericRow(p.productIterator.map(convertToCatalyst).toArray)
-    case other => other
-  }
+case class ExternalSort(
+    sortOrder: Seq[SortOrder],
+    global: Boolean,
+    child: SparkPlan)
+  extends UnaryNode {
+  override def requiredChildDistribution =
+    if (global) OrderedDistribution(sortOrder) :: Nil else UnspecifiedDistribution :: Nil
 
-  def productToRowRdd[A <: Product](data: RDD[A]): RDD[Row] = {
-    data.mapPartitions { iterator =>
-      if (iterator.isEmpty) {
-        Iterator.empty
-      } else {
-        val bufferedIterator = iterator.buffered
-        val mutableRow = new GenericMutableRow(bufferedIterator.head.productArity)
-
-        bufferedIterator.map { r =>
-          var i = 0
-          while (i < mutableRow.length) {
-            mutableRow(i) = convertToCatalyst(r.productElement(i))
-            i += 1
-          }
-
-          mutableRow
-        }
-      }
-    }
+  override def execute() = attachTree(this, "sort") {
+    child.execute().mapPartitions( { iterator =>
+      val ordering = newOrdering(sortOrder, child.output)
+      val sorter = new ExternalSorter[Row, Null, Row](ordering = Some(ordering))
+      sorter.insertAll(iterator.map(r => (r, null)))
+      sorter.iterator.map(_._1)
+    }, preservesPartitioning = true)
   }
 
-  def fromProductRdd[A <: Product : TypeTag](productRdd: RDD[A]) = {
-    ExistingRdd(ScalaReflection.attributesFor[A], productToRowRdd(productRdd))
-  }
+  override def output = child.output
 }
 
 /**
  * :: DeveloperApi ::
+ * Computes the set of distinct input rows using a HashSet.
+ * @param partial when true the distinct operation is performed partially, per partition, without
+ *                shuffling the data.
+ * @param child the input query plan.
  */
 @DeveloperApi
-case class ExistingRdd(output: Seq[Attribute], rdd: RDD[Row]) extends LeafNode {
-  override def execute() = rdd
+case class Distinct(partial: Boolean, child: SparkPlan) extends UnaryNode {
+  override def output = child.output
+
+  override def requiredChildDistribution =
+    if (partial) UnspecifiedDistribution :: Nil else ClusteredDistribution(child.output) :: Nil
+
+  override def execute() = {
+    child.execute().mapPartitions { iter =>
+      val hashSet = new scala.collection.mutable.HashSet[Row]()
+
+      var currentRow: Row = null
+      while (iter.hasNext) {
+        currentRow = iter.next()
+        if (!hashSet.contains(currentRow)) {
+          hashSet.add(currentRow.copy())
+        }
+      }
+
+      hashSet.iterator
+    }
+  }
 }
 
+
 /**
  * :: DeveloperApi ::
  * Returns a table with the elements from left that are not in right using
@@ -276,3 +299,15 @@ case class Intersect(left: SparkPlan, right: SparkPlan) extends BinaryNode {
     left.execute().map(_.copy()).intersection(right.execute().map(_.copy()))
   }
 }
+
+/**
+ * :: DeveloperApi ::
+ * A plan node that does nothing but lie about the output of its child.  Used to spice a
+ * (hopefully structurally equivalent) tree from a different optimization sequence into an already
+ * resolved tree.
+ */
+@DeveloperApi
+case class OutputFaker(output: Seq[Attribute], child: SparkPlan) extends SparkPlan {
+  def children = child :: Nil
+  def execute() = child.execute()
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 98d2f89c8ae71..f23b9c48cfb40 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -17,14 +17,19 @@
 
 package org.apache.spark.sql.execution
 
+import org.apache.spark.Logging
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
-import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRow}
+import org.apache.spark.sql.catalyst.expressions.{Row, Attribute}
+import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.{SQLConf, SQLContext}
 
+// TODO: DELETE ME...
 trait Command {
+  this: SparkPlan =>
+
   /**
    * A concrete command should override this lazy field to wrap up any side effects caused by the
    * command or any other computation that should be evaluated exactly once. The value of this field
@@ -34,39 +39,80 @@ trait Command {
    * The `execute()` method of all the physical command classes should reference `sideEffectResult`
    * so that the command can be executed eagerly right after the command query is created.
    */
-  protected[sql] lazy val sideEffectResult: Seq[Any] = Seq.empty[Any]
+  protected lazy val sideEffectResult: Seq[Row] = Seq.empty[Row]
+
+  override def executeCollect(): Array[Row] = sideEffectResult.toArray
+
+  override def execute(): RDD[Row] = sqlContext.sparkContext.parallelize(sideEffectResult, 1)
 }
 
-/**
- * :: DeveloperApi ::
- */
-@DeveloperApi
-case class SetCommand(
-    key: Option[String], value: Option[String], output: Seq[Attribute])(
-    @transient context: SQLContext)
-  extends LeafNode with Command {
+// TODO: Replace command with runnable command.
+trait RunnableCommand extends logical.Command {
+  self: Product =>
 
-  override protected[sql] lazy val sideEffectResult: Seq[(String, String)] = (key, value) match {
-    // Set value for key k.
-    case (Some(k), Some(v)) =>
-      context.set(k, v)
-      Array(k -> v)
+  def output: Seq[Attribute]
+  def run(sqlContext: SQLContext): Seq[Row]
+}
 
-    // Query the value bound to key k.
-    case (Some(k), _) =>
-      Array(k -> context.getOption(k).getOrElse("<undefined>"))
+case class ExecutedCommand(cmd: RunnableCommand) extends SparkPlan {
+  /**
+   * A concrete command should override this lazy field to wrap up any side effects caused by the
+   * command or any other computation that should be evaluated exactly once. The value of this field
+   * can be used as the contents of the corresponding RDD generated from the physical plan of this
+   * command.
+   *
+   * The `execute()` method of all the physical command classes should reference `sideEffectResult`
+   * so that the command can be executed eagerly right after the command query is created.
+   */
+  protected[sql] lazy val sideEffectResult: Seq[Row] = cmd.run(sqlContext)
 
-    // Query all key-value pairs that are set in the SQLConf of the context.
-    case (None, None) =>
-      context.getAll
+  override def output = cmd.output
 
-    case _ =>
-      throw new IllegalArgumentException()
-  }
+  override def children = Nil
+
+  override def executeCollect(): Array[Row] = sideEffectResult.toArray
+
+  override def execute(): RDD[Row] = sqlContext.sparkContext.parallelize(sideEffectResult, 1)
+}
 
-  def execute(): RDD[Row] = {
-    val rows = sideEffectResult.map { case (k, v) => new GenericRow(Array[Any](k, v)) }
-    context.sparkContext.parallelize(rows, 1)
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+case class SetCommand(kv: Option[(String, Option[String])], output: Seq[Attribute])(
+    @transient context: SQLContext)
+  extends LeafNode with Command with Logging {
+
+  override protected lazy val sideEffectResult: Seq[Row] = kv match {
+    // Configures the deprecated "mapred.reduce.tasks" property.
+    case Some((SQLConf.Deprecated.MAPRED_REDUCE_TASKS, Some(value))) =>
+      logWarning(
+        s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
+          s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS} instead.")
+      context.setConf(SQLConf.SHUFFLE_PARTITIONS, value)
+      Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$value"))
+
+    // Configures a single property.
+    case Some((key, Some(value))) =>
+      context.setConf(key, value)
+      Seq(Row(s"$key=$value"))
+
+    // Queries all key-value pairs that are set in the SQLConf of the context. Notice that different
+    // from Hive, here "SET -v" is an alias of "SET". (In Hive, "SET" returns all changed properties
+    // while "SET -v" returns all properties.)
+    case Some(("-v", None)) | None =>
+      context.getAllConfs.map { case (k, v) => Row(s"$k=$v") }.toSeq
+
+    // Queries the deprecated "mapred.reduce.tasks" property.
+    case Some((SQLConf.Deprecated.MAPRED_REDUCE_TASKS, None)) =>
+      logWarning(
+        s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
+          s"showing ${SQLConf.SHUFFLE_PARTITIONS} instead.")
+      Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=${context.numShufflePartitions}"))
+
+    // Queries a single property.
+    case Some((key, None)) =>
+      Seq(Row(s"$key=${context.getConf(key, "<undefined>")}"))
   }
 
   override def otherCopyArgs = context :: Nil
@@ -82,20 +128,19 @@ case class SetCommand(
  */
 @DeveloperApi
 case class ExplainCommand(
-    logicalPlan: LogicalPlan, output: Seq[Attribute])(
+    logicalPlan: LogicalPlan, output: Seq[Attribute], extended: Boolean)(
     @transient context: SQLContext)
   extends LeafNode with Command {
 
   // Run through the optimizer to generate the physical plan.
-  override protected[sql] lazy val sideEffectResult: Seq[String] = try {
-    "Physical execution plan:" +: context.executePlan(logicalPlan).executedPlan.toString.split("\n")
-  } catch { case cause: TreeNodeException[_] =>
-    "Error occurred during query planning: " +: cause.getMessage.split("\n")
-  }
+  override protected lazy val sideEffectResult: Seq[Row] = try {
+    // TODO in Hive, the "extended" ExplainCommand prints the AST as well, and detailed properties.
+    val queryExecution = context.executePlan(logicalPlan)
+    val outputString = if (extended) queryExecution.toString else queryExecution.simpleString
 
-  def execute(): RDD[Row] = {
-    val explanation = sideEffectResult.map(row => new GenericRow(Array[Any](row)))
-    context.sparkContext.parallelize(explanation, 1)
+    outputString.split("\n").map(Row(_))
+  } catch { case cause: TreeNodeException[_] =>
+    ("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_))
   }
 
   override def otherCopyArgs = context :: Nil
@@ -105,21 +150,39 @@ case class ExplainCommand(
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class CacheCommand(tableName: String, doCache: Boolean)(@transient context: SQLContext)
+case class CacheTableCommand(
+    tableName: String,
+    plan: Option[LogicalPlan],
+    isLazy: Boolean)
   extends LeafNode with Command {
 
-  override protected[sql] lazy val sideEffectResult = {
-    if (doCache) {
-      context.cacheTable(tableName)
-    } else {
-      context.uncacheTable(tableName)
+  override protected lazy val sideEffectResult = {
+    import sqlContext._
+
+    plan.foreach(_.registerTempTable(tableName))
+    val schemaRDD = table(tableName)
+    schemaRDD.cache()
+
+    if (!isLazy) {
+      // Performs eager caching
+      schemaRDD.count()
     }
-    Seq.empty[Any]
+
+    Seq.empty[Row]
   }
 
-  override def execute(): RDD[Row] = {
-    sideEffectResult
-    context.emptyResult
+  override def output: Seq[Attribute] = Seq.empty
+}
+
+
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+case class UncacheTableCommand(tableName: String) extends LeafNode with Command {
+  override protected lazy val sideEffectResult: Seq[Row] = {
+    sqlContext.table(tableName).unpersist()
+    Seq.empty[Row]
   }
 
   override def output: Seq[Attribute] = Seq.empty
@@ -133,15 +196,8 @@ case class DescribeCommand(child: SparkPlan, output: Seq[Attribute])(
     @transient context: SQLContext)
   extends LeafNode with Command {
 
-  override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = {
-    Seq(("# Registered as a temporary table", null, null)) ++
-      child.output.map(field => (field.name, field.dataType.toString, null))
-  }
-
-  override def execute(): RDD[Row] = {
-    val rows = sideEffectResult.map {
-      case (name, dataType, comment) => new GenericRow(Array[Any](name, dataType, comment))
-    }
-    context.sparkContext.parallelize(rows, 1)
+  override protected lazy val sideEffectResult: Seq[Row] = {
+    Row("# Registered as a temporary table", null, null) +:
+      child.output.map(field => Row(field.name, field.dataType.toString, null))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index c6fbd6d2f6930..61be5ed2db65c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -23,6 +23,8 @@ import org.apache.spark.{AccumulatorParam, Accumulator, SparkContext}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext._
 import org.apache.spark.sql.{SchemaRDD, Row}
+import org.apache.spark.sql.catalyst.trees.TreeNodeRef
+import org.apache.spark.sql.catalyst.types._
 
 /**
  * :: DeveloperApi ::
@@ -41,13 +43,13 @@ package object debug {
    */
   @DeveloperApi
   implicit class DebugQuery(query: SchemaRDD) {
-    def debug(implicit sc: SparkContext): Unit = {
+    def debug(): Unit = {
       val plan = query.queryExecution.executedPlan
-      val visited = new collection.mutable.HashSet[Long]()
+      val visited = new collection.mutable.HashSet[TreeNodeRef]()
       val debugPlan = plan transform {
-        case s: SparkPlan if !visited.contains(s.id) =>
-          visited += s.id
-          DebugNode(sc, s)
+        case s: SparkPlan if !visited.contains(new TreeNodeRef(s)) =>
+          visited += new TreeNodeRef(s)
+          DebugNode(s)
       }
       println(s"Results returned: ${debugPlan.execute().count()}")
       debugPlan.foreach {
@@ -55,13 +57,26 @@ package object debug {
         case _ =>
       }
     }
-  }
 
-  private[sql] case class DebugNode(
-      @transient sparkContext: SparkContext,
-      child: SparkPlan) extends UnaryNode {
-    def references = Set.empty
+    def typeCheck(): Unit = {
+      val plan = query.queryExecution.executedPlan
+      val visited = new collection.mutable.HashSet[TreeNodeRef]()
+      val debugPlan = plan transform {
+        case s: SparkPlan if !visited.contains(new TreeNodeRef(s)) =>
+          visited += new TreeNodeRef(s)
+          TypeCheck(s)
+      }
+      try {
+        println(s"Results returned: ${debugPlan.execute().count()}")
+      } catch {
+        case e: Exception =>
+          def unwrap(e: Throwable): Throwable = if (e.getCause == null) e else unwrap(e.getCause)
+          println(s"Deepest Error: ${unwrap(e)}")
+      }
+    }
+  }
 
+  private[sql] case class DebugNode(child: SparkPlan) extends UnaryNode {
     def output = child.output
 
     implicit object SetAccumulatorParam extends AccumulatorParam[HashSet[String]] {
@@ -77,22 +92,22 @@ package object debug {
     }
 
     /**
-     * A collection of stats for each column of output.
+     * A collection of metrics for each column of output.
      * @param elementTypes the actual runtime types for the output.  Useful when there are bugs
      *        causing the wrong data to be projected.
      */
-    case class ColumnStat(
+    case class ColumnMetrics(
         elementTypes: Accumulator[HashSet[String]] = sparkContext.accumulator(HashSet.empty))
     val tupleCount = sparkContext.accumulator[Int](0)
 
     val numColumns = child.output.size
-    val columnStats = Array.fill(child.output.size)(new ColumnStat())
+    val columnStats = Array.fill(child.output.size)(new ColumnMetrics())
 
     def dumpStats(): Unit = {
       println(s"== ${child.simpleString} ==")
       println(s"Tuples output: ${tupleCount.value}")
-      child.output.zip(columnStats).foreach { case(attr, stat) =>
-        val actualDataTypes =stat.elementTypes.value.mkString("{", ",", "}")
+      child.output.zip(columnStats).foreach { case(attr, metric) =>
+        val actualDataTypes = metric.elementTypes.value.mkString("{", ",", "}")
         println(s" ${attr.name} ${attr.dataType}: $actualDataTypes")
       }
     }
@@ -107,7 +122,9 @@ package object debug {
             var i = 0
             while (i < numColumns) {
               val value = currentRow(i)
-              columnStats(i).elementTypes += HashSet(value.getClass.getName)
+              if (value != null) {
+                columnStats(i).elementTypes += HashSet(value.getClass.getName)
+              }
               i += 1
             }
             currentRow
@@ -116,4 +133,71 @@ package object debug {
       }
     }
   }
+
+  /**
+   * :: DeveloperApi ::
+   * Helper functions for checking that runtime types match a given schema.
+   */
+  @DeveloperApi
+  object TypeCheck {
+    def typeCheck(data: Any, schema: DataType): Unit = (data, schema) match {
+      case (null, _) =>
+
+      case (row: Row, StructType(fields)) =>
+        row.zip(fields.map(_.dataType)).foreach { case(d,t) => typeCheck(d,t) }
+      case (s: Seq[_], ArrayType(elemType, _)) =>
+        s.foreach(typeCheck(_, elemType))
+      case (m: Map[_, _], MapType(keyType, valueType, _)) =>
+        m.keys.foreach(typeCheck(_, keyType))
+        m.values.foreach(typeCheck(_, valueType))
+
+      case (_: Long, LongType) =>
+      case (_: Int, IntegerType) =>
+      case (_: String, StringType) =>
+      case (_: Float, FloatType) =>
+      case (_: Byte, ByteType) =>
+      case (_: Short, ShortType) =>
+      case (_: Boolean, BooleanType) =>
+      case (_: Double, DoubleType) =>
+
+      case (d, t) => sys.error(s"Invalid data found: got $d (${d.getClass}) expected $t")
+    }
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Augments SchemaRDDs with debug methods.
+   */
+  @DeveloperApi
+  private[sql] case class TypeCheck(child: SparkPlan) extends SparkPlan {
+    import TypeCheck._
+
+    override def nodeName  = ""
+
+    /* Only required when defining this class in a REPL.
+    override def makeCopy(args: Array[Object]): this.type =
+      TypeCheck(args(0).asInstanceOf[SparkPlan]).asInstanceOf[this.type]
+    */
+
+    def output = child.output
+
+    def children = child :: Nil
+
+    def execute() = {
+      child.execute().map { row =>
+        try typeCheck(row, child.schema) catch {
+          case e: Exception =>
+            sys.error(
+              s"""
+                  |ERROR WHEN TYPE CHECKING QUERY
+                  |==============================
+                  |$e
+                  |======== BAD TREE ============
+                  |$child
+             """.stripMargin)
+        }
+        row
+      }
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
deleted file mode 100644
index 7d1f11caae838..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution
-
-import scala.collection.mutable.{ArrayBuffer, BitSet}
-import scala.concurrent.ExecutionContext.Implicits.global
-import scala.concurrent._
-import scala.concurrent.duration._
-
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.physical._
-
-@DeveloperApi
-sealed abstract class BuildSide
-
-@DeveloperApi
-case object BuildLeft extends BuildSide
-
-@DeveloperApi
-case object BuildRight extends BuildSide
-
-trait HashJoin {
-  val leftKeys: Seq[Expression]
-  val rightKeys: Seq[Expression]
-  val buildSide: BuildSide
-  val left: SparkPlan
-  val right: SparkPlan
-
-  lazy val (buildPlan, streamedPlan) = buildSide match {
-    case BuildLeft => (left, right)
-    case BuildRight => (right, left)
-  }
-
-  lazy val (buildKeys, streamedKeys) = buildSide match {
-    case BuildLeft => (leftKeys, rightKeys)
-    case BuildRight => (rightKeys, leftKeys)
-  }
-
-  def output = left.output ++ right.output
-
-  @transient lazy val buildSideKeyGenerator = new Projection(buildKeys, buildPlan.output)
-  @transient lazy val streamSideKeyGenerator =
-    () => new MutableProjection(streamedKeys, streamedPlan.output)
-
-  def joinIterators(buildIter: Iterator[Row], streamIter: Iterator[Row]): Iterator[Row] = {
-    // TODO: Use Spark's HashMap implementation.
-
-    val hashTable = new java.util.HashMap[Row, ArrayBuffer[Row]]()
-    var currentRow: Row = null
-
-    // Create a mapping of buildKeys -> rows
-    while (buildIter.hasNext) {
-      currentRow = buildIter.next()
-      val rowKey = buildSideKeyGenerator(currentRow)
-      if(!rowKey.anyNull) {
-        val existingMatchList = hashTable.get(rowKey)
-        val matchList = if (existingMatchList == null) {
-          val newMatchList = new ArrayBuffer[Row]()
-          hashTable.put(rowKey, newMatchList)
-          newMatchList
-        } else {
-          existingMatchList
-        }
-        matchList += currentRow.copy()
-      }
-    }
-
-    new Iterator[Row] {
-      private[this] var currentStreamedRow: Row = _
-      private[this] var currentHashMatches: ArrayBuffer[Row] = _
-      private[this] var currentMatchPosition: Int = -1
-
-      // Mutable per row objects.
-      private[this] val joinRow = new JoinedRow
-
-      private[this] val joinKeys = streamSideKeyGenerator()
-
-      override final def hasNext: Boolean =
-        (currentMatchPosition != -1 && currentMatchPosition < currentHashMatches.size) ||
-          (streamIter.hasNext && fetchNext())
-
-      override final def next() = {
-        val ret = buildSide match {
-          case BuildRight => joinRow(currentStreamedRow, currentHashMatches(currentMatchPosition))
-          case BuildLeft => joinRow(currentHashMatches(currentMatchPosition), currentStreamedRow)
-        }
-        currentMatchPosition += 1
-        ret
-      }
-
-      /**
-       * Searches the streamed iterator for the next row that has at least one match in hashtable.
-       *
-       * @return true if the search is successful, and false if the streamed iterator runs out of
-       *         tuples.
-       */
-      private final def fetchNext(): Boolean = {
-        currentHashMatches = null
-        currentMatchPosition = -1
-
-        while (currentHashMatches == null && streamIter.hasNext) {
-          currentStreamedRow = streamIter.next()
-          if (!joinKeys(currentStreamedRow).anyNull) {
-            currentHashMatches = hashTable.get(joinKeys.currentValue)
-          }
-        }
-
-        if (currentHashMatches == null) {
-          false
-        } else {
-          currentMatchPosition = 0
-          true
-        }
-      }
-    }
-  }
-}
-
-/**
- * :: DeveloperApi ::
- * Performs an inner hash join of two child relations by first shuffling the data using the join
- * keys.
- */
-@DeveloperApi
-case class ShuffledHashJoin(
-    leftKeys: Seq[Expression],
-    rightKeys: Seq[Expression],
-    buildSide: BuildSide,
-    left: SparkPlan,
-    right: SparkPlan) extends BinaryNode with HashJoin {
-
-  override def outputPartitioning: Partitioning = left.outputPartitioning
-
-  override def requiredChildDistribution =
-    ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
-
-  def execute() = {
-    buildPlan.execute().zipPartitions(streamedPlan.execute()) {
-      (buildIter, streamIter) => joinIterators(buildIter, streamIter)
-    }
-  }
-}
-
-/**
- * :: DeveloperApi ::
- * Build the right table's join keys into a HashSet, and iteratively go through the left
- * table, to find the if join keys are in the Hash set.
- */
-@DeveloperApi
-case class LeftSemiJoinHash(
-    leftKeys: Seq[Expression],
-    rightKeys: Seq[Expression],
-    left: SparkPlan,
-    right: SparkPlan) extends BinaryNode with HashJoin {
-
-  val buildSide = BuildRight
-
-  override def requiredChildDistribution =
-    ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
-
-  override def output = left.output
-
-  def execute() = {
-    buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) =>
-      val hashSet = new java.util.HashSet[Row]()
-      var currentRow: Row = null
-
-      // Create a Hash set of buildKeys
-      while (buildIter.hasNext) {
-        currentRow = buildIter.next()
-        val rowKey = buildSideKeyGenerator(currentRow)
-        if(!rowKey.anyNull) {
-          val keyExists = hashSet.contains(rowKey)
-          if (!keyExists) {
-            hashSet.add(rowKey)
-          }
-        }
-      }
-
-      val joinKeys = streamSideKeyGenerator()
-      streamIter.filter(current => {
-        !joinKeys(current).anyNull && hashSet.contains(joinKeys.currentValue)
-      })
-    }
-  }
-}
-
-
-/**
- * :: DeveloperApi ::
- * Performs an inner hash join of two child relations.  When the output RDD of this operator is
- * being constructed, a Spark job is asynchronously started to calculate the values for the
- * broadcasted relation.  This data is then placed in a Spark broadcast variable.  The streamed
- * relation is not shuffled.
- */
-@DeveloperApi
-case class BroadcastHashJoin(
-     leftKeys: Seq[Expression],
-     rightKeys: Seq[Expression],
-     buildSide: BuildSide,
-     left: SparkPlan,
-     right: SparkPlan)(@transient sqlContext: SQLContext) extends BinaryNode with HashJoin {
-
-  override def otherCopyArgs = sqlContext :: Nil
-
-  override def outputPartitioning: Partitioning = left.outputPartitioning
-
-  override def requiredChildDistribution =
-    UnspecifiedDistribution :: UnspecifiedDistribution :: Nil
-
-  @transient
-  lazy val broadcastFuture = future {
-    sqlContext.sparkContext.broadcast(buildPlan.executeCollect())
-  }
-
-  def execute() = {
-    val broadcastRelation = Await.result(broadcastFuture, 5.minute)
-
-    streamedPlan.execute().mapPartitions { streamedIter =>
-      joinIterators(broadcastRelation.value.iterator, streamedIter)
-    }
-  }
-}
-
-/**
- * :: DeveloperApi ::
- * Using BroadcastNestedLoopJoin to calculate left semi join result when there's no join keys
- * for hash join.
- */
-@DeveloperApi
-case class LeftSemiJoinBNL(
-    streamed: SparkPlan, broadcast: SparkPlan, condition: Option[Expression])
-    (@transient sqlContext: SQLContext)
-  extends BinaryNode {
-  // TODO: Override requiredChildDistribution.
-
-  override def outputPartitioning: Partitioning = streamed.outputPartitioning
-
-  override def otherCopyArgs = sqlContext :: Nil
-
-  def output = left.output
-
-  /** The Streamed Relation */
-  def left = streamed
-  /** The Broadcast relation */
-  def right = broadcast
-
-  @transient lazy val boundCondition =
-    InterpretedPredicate(
-      condition
-        .map(c => BindReferences.bindReference(c, left.output ++ right.output))
-        .getOrElse(Literal(true)))
-
-  def execute() = {
-    val broadcastedRelation =
-      sqlContext.sparkContext.broadcast(broadcast.execute().map(_.copy()).collect().toIndexedSeq)
-
-    streamed.execute().mapPartitions { streamedIter =>
-      val joinedRow = new JoinedRow
-
-      streamedIter.filter(streamedRow => {
-        var i = 0
-        var matched = false
-
-        while (i < broadcastedRelation.value.size && !matched) {
-          val broadcastedRow = broadcastedRelation.value(i)
-          if (boundCondition(joinedRow(streamedRow, broadcastedRow))) {
-            matched = true
-          }
-          i += 1
-        }
-        matched
-      })
-    }
-  }
-}
-
-/**
- * :: DeveloperApi ::
- */
-@DeveloperApi
-case class CartesianProduct(left: SparkPlan, right: SparkPlan) extends BinaryNode {
-  def output = left.output ++ right.output
-
-  def execute() = left.execute().map(_.copy()).cartesian(right.execute().map(_.copy())).map {
-    case (l: Row, r: Row) => buildRow(l ++ r)
-  }
-}
-
-/**
- * :: DeveloperApi ::
- */
-@DeveloperApi
-case class BroadcastNestedLoopJoin(
-    streamed: SparkPlan, broadcast: SparkPlan, joinType: JoinType, condition: Option[Expression])
-    (@transient sqlContext: SQLContext)
-  extends BinaryNode {
-  // TODO: Override requiredChildDistribution.
-
-  override def outputPartitioning: Partitioning = streamed.outputPartitioning
-
-  override def otherCopyArgs = sqlContext :: Nil
-
-  override def output = {
-    joinType match {
-      case LeftOuter =>
-        left.output ++ right.output.map(_.withNullability(true))
-      case RightOuter =>
-        left.output.map(_.withNullability(true)) ++ right.output
-      case FullOuter =>
-        left.output.map(_.withNullability(true)) ++ right.output.map(_.withNullability(true))
-      case _ =>
-        left.output ++ right.output
-    }
-  }
-
-  /** The Streamed Relation */
-  def left = streamed
-  /** The Broadcast relation */
-  def right = broadcast
-
-  @transient lazy val boundCondition =
-    InterpretedPredicate(
-      condition
-        .map(c => BindReferences.bindReference(c, left.output ++ right.output))
-        .getOrElse(Literal(true)))
-
-  def execute() = {
-    val broadcastedRelation =
-      sqlContext.sparkContext.broadcast(broadcast.execute().map(_.copy()).collect().toIndexedSeq)
-
-    val streamedPlusMatches = streamed.execute().mapPartitions { streamedIter =>
-      val matchedRows = new ArrayBuffer[Row]
-      // TODO: Use Spark's BitSet.
-      val includedBroadcastTuples = new BitSet(broadcastedRelation.value.size)
-      val joinedRow = new JoinedRow
-
-      streamedIter.foreach { streamedRow =>
-        var i = 0
-        var matched = false
-
-        while (i < broadcastedRelation.value.size) {
-          // TODO: One bitset per partition instead of per row.
-          val broadcastedRow = broadcastedRelation.value(i)
-          if (boundCondition(joinedRow(streamedRow, broadcastedRow))) {
-            matchedRows += buildRow(streamedRow ++ broadcastedRow)
-            matched = true
-            includedBroadcastTuples += i
-          }
-          i += 1
-        }
-
-        if (!matched && (joinType == LeftOuter || joinType == FullOuter)) {
-          matchedRows += buildRow(streamedRow ++ Array.fill(right.output.size)(null))
-        }
-      }
-      Iterator((matchedRows, includedBroadcastTuples))
-    }
-
-    val includedBroadcastTuples = streamedPlusMatches.map(_._2)
-    val allIncludedBroadcastTuples =
-      if (includedBroadcastTuples.count == 0) {
-        new scala.collection.mutable.BitSet(broadcastedRelation.value.size)
-      } else {
-        streamedPlusMatches.map(_._2).reduce(_ ++ _)
-      }
-
-    val rightOuterMatches: Seq[Row] =
-      if (joinType == RightOuter || joinType == FullOuter) {
-        broadcastedRelation.value.zipWithIndex.filter {
-          case (row, i) => !allIncludedBroadcastTuples.contains(i)
-        }.map {
-          // TODO: Use projection.
-          case (row, _) => buildRow(Vector.fill(left.output.size)(null) ++ row)
-        }
-      } else {
-        Vector()
-      }
-
-    // TODO: Breaks lineage.
-    sqlContext.sparkContext.union(
-      streamedPlusMatches.flatMap(_._1), sqlContext.sparkContext.makeRDD(rightOuterMatches))
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
new file mode 100644
index 0000000000000..5cf2a785adc7d
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import scala.concurrent._
+import scala.concurrent.duration._
+import scala.concurrent.ExecutionContext.Implicits.global
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.expressions.{Row, Expression}
+import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, UnspecifiedDistribution}
+import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
+
+/**
+ * :: DeveloperApi ::
+ * Performs an inner hash join of two child relations.  When the output RDD of this operator is
+ * being constructed, a Spark job is asynchronously started to calculate the values for the
+ * broadcasted relation.  This data is then placed in a Spark broadcast variable.  The streamed
+ * relation is not shuffled.
+ */
+@DeveloperApi
+case class BroadcastHashJoin(
+    leftKeys: Seq[Expression],
+    rightKeys: Seq[Expression],
+    buildSide: BuildSide,
+    left: SparkPlan,
+    right: SparkPlan)
+  extends BinaryNode with HashJoin {
+
+  override def outputPartitioning: Partitioning = streamedPlan.outputPartitioning
+
+  override def requiredChildDistribution =
+    UnspecifiedDistribution :: UnspecifiedDistribution :: Nil
+
+  @transient
+  private val broadcastFuture = future {
+    // Note that we use .execute().collect() because we don't want to convert data to Scala types
+    val input: Array[Row] = buildPlan.execute().map(_.copy()).collect()
+    val hashed = HashedRelation(input.iterator, buildSideKeyGenerator, input.length)
+    sparkContext.broadcast(hashed)
+  }
+
+  override def execute() = {
+    val broadcastRelation = Await.result(broadcastFuture, 5.minute)
+
+    streamedPlan.execute().mapPartitions { streamedIter =>
+      hashJoin(streamedIter, broadcastRelation.value)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala
new file mode 100644
index 0000000000000..36aad13778bd2
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.physical.Partitioning
+import org.apache.spark.sql.catalyst.plans.{FullOuter, JoinType, LeftOuter, RightOuter}
+import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
+import org.apache.spark.util.collection.CompactBuffer
+
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+case class BroadcastNestedLoopJoin(
+    left: SparkPlan,
+    right: SparkPlan,
+    buildSide: BuildSide,
+    joinType: JoinType,
+    condition: Option[Expression]) extends BinaryNode {
+  // TODO: Override requiredChildDistribution.
+
+  /** BuildRight means the right relation <=> the broadcast relation. */
+  private val (streamed, broadcast) = buildSide match {
+    case BuildRight => (left, right)
+    case BuildLeft => (right, left)
+  }
+
+  override def outputPartitioning: Partitioning = streamed.outputPartitioning
+
+  override def output = {
+    joinType match {
+      case LeftOuter =>
+        left.output ++ right.output.map(_.withNullability(true))
+      case RightOuter =>
+        left.output.map(_.withNullability(true)) ++ right.output
+      case FullOuter =>
+        left.output.map(_.withNullability(true)) ++ right.output.map(_.withNullability(true))
+      case _ =>
+        left.output ++ right.output
+    }
+  }
+
+  @transient private lazy val boundCondition =
+    InterpretedPredicate(
+      condition
+        .map(c => BindReferences.bindReference(c, left.output ++ right.output))
+        .getOrElse(Literal(true)))
+
+  override def execute() = {
+    val broadcastedRelation =
+      sparkContext.broadcast(broadcast.execute().map(_.copy()).collect().toIndexedSeq)
+
+    /** All rows that either match both-way, or rows from streamed joined with nulls. */
+    val matchesOrStreamedRowsWithNulls = streamed.execute().mapPartitions { streamedIter =>
+      val matchedRows = new CompactBuffer[Row]
+      // TODO: Use Spark's BitSet.
+      val includedBroadcastTuples =
+        new scala.collection.mutable.BitSet(broadcastedRelation.value.size)
+      val joinedRow = new JoinedRow
+      val leftNulls = new GenericMutableRow(left.output.size)
+      val rightNulls = new GenericMutableRow(right.output.size)
+
+      streamedIter.foreach { streamedRow =>
+        var i = 0
+        var streamRowMatched = false
+
+        while (i < broadcastedRelation.value.size) {
+          // TODO: One bitset per partition instead of per row.
+          val broadcastedRow = broadcastedRelation.value(i)
+          buildSide match {
+            case BuildRight if boundCondition(joinedRow(streamedRow, broadcastedRow)) =>
+              matchedRows += joinedRow(streamedRow, broadcastedRow).copy()
+              streamRowMatched = true
+              includedBroadcastTuples += i
+            case BuildLeft if boundCondition(joinedRow(broadcastedRow, streamedRow)) =>
+              matchedRows += joinedRow(broadcastedRow, streamedRow).copy()
+              streamRowMatched = true
+              includedBroadcastTuples += i
+            case _ =>
+          }
+          i += 1
+        }
+
+        (streamRowMatched, joinType, buildSide) match {
+          case (false, LeftOuter | FullOuter, BuildRight) =>
+            matchedRows += joinedRow(streamedRow, rightNulls).copy()
+          case (false, RightOuter | FullOuter, BuildLeft) =>
+            matchedRows += joinedRow(leftNulls, streamedRow).copy()
+          case _ =>
+        }
+      }
+      Iterator((matchedRows, includedBroadcastTuples))
+    }
+
+    val includedBroadcastTuples = matchesOrStreamedRowsWithNulls.map(_._2)
+    val allIncludedBroadcastTuples =
+      if (includedBroadcastTuples.count == 0) {
+        new scala.collection.mutable.BitSet(broadcastedRelation.value.size)
+      } else {
+        includedBroadcastTuples.reduce(_ ++ _)
+      }
+
+    val leftNulls = new GenericMutableRow(left.output.size)
+    val rightNulls = new GenericMutableRow(right.output.size)
+    /** Rows from broadcasted joined with nulls. */
+    val broadcastRowsWithNulls: Seq[Row] = {
+      val buf: CompactBuffer[Row] = new CompactBuffer()
+      var i = 0
+      val rel = broadcastedRelation.value
+      while (i < rel.length) {
+        if (!allIncludedBroadcastTuples.contains(i)) {
+          (joinType, buildSide) match {
+            case (RightOuter | FullOuter, BuildRight) => buf += new JoinedRow(leftNulls, rel(i))
+            case (LeftOuter | FullOuter, BuildLeft) => buf += new JoinedRow(rel(i), rightNulls)
+            case _ =>
+          }
+        }
+        i += 1
+      }
+      buf.toSeq
+    }
+
+    // TODO: Breaks lineage.
+    sparkContext.union(
+      matchesOrStreamedRowsWithNulls.flatMap(_._1), sparkContext.makeRDD(broadcastRowsWithNulls))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala
new file mode 100644
index 0000000000000..76c14c02aab34
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.expressions.JoinedRow
+import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
+
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+case class CartesianProduct(left: SparkPlan, right: SparkPlan) extends BinaryNode {
+  override def output = left.output ++ right.output
+
+  override def execute() = {
+    val leftResults = left.execute().map(_.copy())
+    val rightResults = right.execute().map(_.copy())
+
+    leftResults.cartesian(rightResults).mapPartitions { iter =>
+      val joinedRow = new JoinedRow
+      iter.map(r => joinedRow(r._1, r._2))
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
new file mode 100644
index 0000000000000..4012d757d5f9a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.util.collection.CompactBuffer
+
+
+trait HashJoin {
+  self: SparkPlan =>
+
+  val leftKeys: Seq[Expression]
+  val rightKeys: Seq[Expression]
+  val buildSide: BuildSide
+  val left: SparkPlan
+  val right: SparkPlan
+
+  protected lazy val (buildPlan, streamedPlan) = buildSide match {
+    case BuildLeft => (left, right)
+    case BuildRight => (right, left)
+  }
+
+  protected lazy val (buildKeys, streamedKeys) = buildSide match {
+    case BuildLeft => (leftKeys, rightKeys)
+    case BuildRight => (rightKeys, leftKeys)
+  }
+
+  override def output = left.output ++ right.output
+
+  @transient protected lazy val buildSideKeyGenerator: Projection =
+    newProjection(buildKeys, buildPlan.output)
+
+  @transient protected lazy val streamSideKeyGenerator: () => MutableProjection =
+    newMutableProjection(streamedKeys, streamedPlan.output)
+
+  protected def hashJoin(streamIter: Iterator[Row], hashedRelation: HashedRelation): Iterator[Row] =
+  {
+    new Iterator[Row] {
+      private[this] var currentStreamedRow: Row = _
+      private[this] var currentHashMatches: CompactBuffer[Row] = _
+      private[this] var currentMatchPosition: Int = -1
+
+      // Mutable per row objects.
+      private[this] val joinRow = new JoinedRow2
+
+      private[this] val joinKeys = streamSideKeyGenerator()
+
+      override final def hasNext: Boolean =
+        (currentMatchPosition != -1 && currentMatchPosition < currentHashMatches.size) ||
+          (streamIter.hasNext && fetchNext())
+
+      override final def next() = {
+        val ret = buildSide match {
+          case BuildRight => joinRow(currentStreamedRow, currentHashMatches(currentMatchPosition))
+          case BuildLeft => joinRow(currentHashMatches(currentMatchPosition), currentStreamedRow)
+        }
+        currentMatchPosition += 1
+        ret
+      }
+
+      /**
+       * Searches the streamed iterator for the next row that has at least one match in hashtable.
+       *
+       * @return true if the search is successful, and false if the streamed iterator runs out of
+       *         tuples.
+       */
+      private final def fetchNext(): Boolean = {
+        currentHashMatches = null
+        currentMatchPosition = -1
+
+        while (currentHashMatches == null && streamIter.hasNext) {
+          currentStreamedRow = streamIter.next()
+          if (!joinKeys(currentStreamedRow).anyNull) {
+            currentHashMatches = hashedRelation.get(joinKeys.currentValue)
+          }
+        }
+
+        if (currentHashMatches == null) {
+          false
+        } else {
+          currentMatchPosition = 0
+          true
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala
new file mode 100644
index 0000000000000..b73041d306b36
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import java.util.{HashMap => JavaHashMap}
+
+import scala.collection.JavaConversions._
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Partitioning, UnknownPartitioning}
+import org.apache.spark.sql.catalyst.plans.{FullOuter, JoinType, LeftOuter, RightOuter}
+import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
+import org.apache.spark.util.collection.CompactBuffer
+
+/**
+ * :: DeveloperApi ::
+ * Performs a hash based outer join for two child relations by shuffling the data using
+ * the join keys. This operator requires loading the associated partition in both side into memory.
+ */
+@DeveloperApi
+case class HashOuterJoin(
+    leftKeys: Seq[Expression],
+    rightKeys: Seq[Expression],
+    joinType: JoinType,
+    condition: Option[Expression],
+    left: SparkPlan,
+    right: SparkPlan) extends BinaryNode {
+
+  override def outputPartitioning: Partitioning = joinType match {
+    case LeftOuter => left.outputPartitioning
+    case RightOuter => right.outputPartitioning
+    case FullOuter => UnknownPartitioning(left.outputPartitioning.numPartitions)
+    case x => throw new Exception(s"HashOuterJoin should not take $x as the JoinType")
+  }
+
+  override def requiredChildDistribution =
+    ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
+
+  override def output = {
+    joinType match {
+      case LeftOuter =>
+        left.output ++ right.output.map(_.withNullability(true))
+      case RightOuter =>
+        left.output.map(_.withNullability(true)) ++ right.output
+      case FullOuter =>
+        left.output.map(_.withNullability(true)) ++ right.output.map(_.withNullability(true))
+      case x =>
+        throw new Exception(s"HashOuterJoin should not take $x as the JoinType")
+    }
+  }
+
+  @transient private[this] lazy val DUMMY_LIST = Seq[Row](null)
+  @transient private[this] lazy val EMPTY_LIST = Seq.empty[Row]
+
+  // TODO we need to rewrite all of the iterators with our own implementation instead of the Scala
+  // iterator for performance purpose.
+
+  private[this] def leftOuterIterator(
+      key: Row, leftIter: Iterable[Row], rightIter: Iterable[Row]): Iterator[Row] = {
+    val joinedRow = new JoinedRow()
+    val rightNullRow = new GenericRow(right.output.length)
+    val boundCondition =
+      condition.map(newPredicate(_, left.output ++ right.output)).getOrElse((row: Row) => true)
+
+    leftIter.iterator.flatMap { l =>
+      joinedRow.withLeft(l)
+      var matched = false
+      (if (!key.anyNull) rightIter.collect { case r if (boundCondition(joinedRow.withRight(r))) =>
+        matched = true
+        joinedRow.copy
+      } else {
+        Nil
+      }) ++ DUMMY_LIST.filter(_ => !matched).map( _ => {
+        // DUMMY_LIST.filter(_ => !matched) is a tricky way to add additional row,
+        // as we don't know whether we need to append it until finish iterating all of the
+        // records in right side.
+        // If we didn't get any proper row, then append a single row with empty right
+        joinedRow.withRight(rightNullRow).copy
+      })
+    }
+  }
+
+  private[this] def rightOuterIterator(
+      key: Row, leftIter: Iterable[Row], rightIter: Iterable[Row]): Iterator[Row] = {
+    val joinedRow = new JoinedRow()
+    val leftNullRow = new GenericRow(left.output.length)
+    val boundCondition =
+      condition.map(newPredicate(_, left.output ++ right.output)).getOrElse((row: Row) => true)
+
+    rightIter.iterator.flatMap { r =>
+      joinedRow.withRight(r)
+      var matched = false
+      (if (!key.anyNull) leftIter.collect { case l if (boundCondition(joinedRow.withLeft(l))) =>
+        matched = true
+        joinedRow.copy
+      } else {
+        Nil
+      }) ++ DUMMY_LIST.filter(_ => !matched).map( _ => {
+        // DUMMY_LIST.filter(_ => !matched) is a tricky way to add additional row,
+        // as we don't know whether we need to append it until finish iterating all of the
+        // records in left side.
+        // If we didn't get any proper row, then append a single row with empty left.
+        joinedRow.withLeft(leftNullRow).copy
+      })
+    }
+  }
+
+  private[this] def fullOuterIterator(
+      key: Row, leftIter: Iterable[Row], rightIter: Iterable[Row]): Iterator[Row] = {
+    val joinedRow = new JoinedRow()
+    val leftNullRow = new GenericRow(left.output.length)
+    val rightNullRow = new GenericRow(right.output.length)
+    val boundCondition =
+      condition.map(newPredicate(_, left.output ++ right.output)).getOrElse((row: Row) => true)
+
+    if (!key.anyNull) {
+      // Store the positions of records in right, if one of its associated row satisfy
+      // the join condition.
+      val rightMatchedSet = scala.collection.mutable.Set[Int]()
+      leftIter.iterator.flatMap[Row] { l =>
+        joinedRow.withLeft(l)
+        var matched = false
+        rightIter.zipWithIndex.collect {
+          // 1. For those matched (satisfy the join condition) records with both sides filled,
+          //    append them directly
+
+          case (r, idx) if (boundCondition(joinedRow.withRight(r)))=> {
+            matched = true
+            // if the row satisfy the join condition, add its index into the matched set
+            rightMatchedSet.add(idx)
+            joinedRow.copy
+          }
+        } ++ DUMMY_LIST.filter(_ => !matched).map( _ => {
+          // 2. For those unmatched records in left, append additional records with empty right.
+
+          // DUMMY_LIST.filter(_ => !matched) is a tricky way to add additional row,
+          // as we don't know whether we need to append it until finish iterating all
+          // of the records in right side.
+          // If we didn't get any proper row, then append a single row with empty right.
+          joinedRow.withRight(rightNullRow).copy
+        })
+      } ++ rightIter.zipWithIndex.collect {
+        // 3. For those unmatched records in right, append additional records with empty left.
+
+        // Re-visiting the records in right, and append additional row with empty left, if its not
+        // in the matched set.
+        case (r, idx) if (!rightMatchedSet.contains(idx)) => {
+          joinedRow(leftNullRow, r).copy
+        }
+      }
+    } else {
+      leftIter.iterator.map[Row] { l =>
+        joinedRow(l, rightNullRow).copy
+      } ++ rightIter.iterator.map[Row] { r =>
+        joinedRow(leftNullRow, r).copy
+      }
+    }
+  }
+
+  private[this] def buildHashTable(
+      iter: Iterator[Row], keyGenerator: Projection): JavaHashMap[Row, CompactBuffer[Row]] = {
+    val hashTable = new JavaHashMap[Row, CompactBuffer[Row]]()
+    while (iter.hasNext) {
+      val currentRow = iter.next()
+      val rowKey = keyGenerator(currentRow)
+
+      var existingMatchList = hashTable.get(rowKey)
+      if (existingMatchList == null) {
+        existingMatchList = new CompactBuffer[Row]()
+        hashTable.put(rowKey, existingMatchList)
+      }
+
+      existingMatchList += currentRow.copy()
+    }
+
+    hashTable
+  }
+
+  override def execute() = {
+    left.execute().zipPartitions(right.execute()) { (leftIter, rightIter) =>
+      // TODO this probably can be replaced by external sort (sort merged join?)
+      // Build HashMap for current partition in left relation
+      val leftHashTable = buildHashTable(leftIter, newProjection(leftKeys, left.output))
+      // Build HashMap for current partition in right relation
+      val rightHashTable = buildHashTable(rightIter, newProjection(rightKeys, right.output))
+      val boundCondition =
+        condition.map(newPredicate(_, left.output ++ right.output)).getOrElse((row: Row) => true)
+      joinType match {
+        case LeftOuter => leftHashTable.keysIterator.flatMap { key =>
+          leftOuterIterator(key, leftHashTable.getOrElse(key, EMPTY_LIST),
+            rightHashTable.getOrElse(key, EMPTY_LIST))
+        }
+        case RightOuter => rightHashTable.keysIterator.flatMap { key =>
+          rightOuterIterator(key, leftHashTable.getOrElse(key, EMPTY_LIST),
+            rightHashTable.getOrElse(key, EMPTY_LIST))
+        }
+        case FullOuter => (leftHashTable.keySet ++ rightHashTable.keySet).iterator.flatMap { key =>
+          fullOuterIterator(key,
+            leftHashTable.getOrElse(key, EMPTY_LIST),
+            rightHashTable.getOrElse(key, EMPTY_LIST))
+        }
+        case x => throw new Exception(s"HashOuterJoin should not take $x as the JoinType")
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
new file mode 100644
index 0000000000000..38b8993b03f82
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import java.util.{HashMap => JavaHashMap}
+
+import org.apache.spark.sql.catalyst.expressions.{Projection, Row}
+import org.apache.spark.util.collection.CompactBuffer
+
+
+/**
+ * Interface for a hashed relation by some key. Use [[HashedRelation.apply]] to create a concrete
+ * object.
+ */
+private[joins] sealed trait HashedRelation {
+  def get(key: Row): CompactBuffer[Row]
+}
+
+
+/**
+ * A general [[HashedRelation]] backed by a hash map that maps the key into a sequence of values.
+ */
+private[joins] final class GeneralHashedRelation(hashTable: JavaHashMap[Row, CompactBuffer[Row]])
+  extends HashedRelation with Serializable {
+
+  override def get(key: Row) = hashTable.get(key)
+}
+
+
+/**
+ * A specialized [[HashedRelation]] that maps key into a single value. This implementation
+ * assumes the key is unique.
+ */
+private[joins] final class UniqueKeyHashedRelation(hashTable: JavaHashMap[Row, Row])
+  extends HashedRelation with Serializable {
+
+  override def get(key: Row) = {
+    val v = hashTable.get(key)
+    if (v eq null) null else CompactBuffer(v)
+  }
+
+  def getValue(key: Row): Row = hashTable.get(key)
+}
+
+
+// TODO(rxin): a version of [[HashedRelation]] backed by arrays for consecutive integer keys.
+
+
+private[joins] object HashedRelation {
+
+  def apply(
+      input: Iterator[Row],
+      keyGenerator: Projection,
+      sizeEstimate: Int = 64): HashedRelation = {
+
+    // TODO: Use Spark's HashMap implementation.
+    val hashTable = new JavaHashMap[Row, CompactBuffer[Row]](sizeEstimate)
+    var currentRow: Row = null
+
+    // Whether the join key is unique. If the key is unique, we can convert the underlying
+    // hash map into one specialized for this.
+    var keyIsUnique = true
+
+    // Create a mapping of buildKeys -> rows
+    while (input.hasNext) {
+      currentRow = input.next()
+      val rowKey = keyGenerator(currentRow)
+      if (!rowKey.anyNull) {
+        val existingMatchList = hashTable.get(rowKey)
+        val matchList = if (existingMatchList == null) {
+          val newMatchList = new CompactBuffer[Row]()
+          hashTable.put(rowKey, newMatchList)
+          newMatchList
+        } else {
+          keyIsUnique = false
+          existingMatchList
+        }
+        matchList += currentRow.copy()
+      }
+    }
+
+    if (keyIsUnique) {
+      val uniqHashTable = new JavaHashMap[Row, Row](hashTable.size)
+      val iter = hashTable.entrySet().iterator()
+      while (iter.hasNext) {
+        val entry = iter.next()
+        uniqHashTable.put(entry.getKey, entry.getValue()(0))
+      }
+      new UniqueKeyHashedRelation(uniqHashTable)
+    } else {
+      new GeneralHashedRelation(hashTable)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala
new file mode 100644
index 0000000000000..60003d1900d85
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.physical.Partitioning
+import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
+
+/**
+ * :: DeveloperApi ::
+ * Using BroadcastNestedLoopJoin to calculate left semi join result when there's no join keys
+ * for hash join.
+ */
+@DeveloperApi
+case class LeftSemiJoinBNL(
+    streamed: SparkPlan, broadcast: SparkPlan, condition: Option[Expression])
+  extends BinaryNode {
+  // TODO: Override requiredChildDistribution.
+
+  override def outputPartitioning: Partitioning = streamed.outputPartitioning
+
+  override def output = left.output
+
+  /** The Streamed Relation */
+  override def left = streamed
+  /** The Broadcast relation */
+  override def right = broadcast
+
+  @transient private lazy val boundCondition =
+    InterpretedPredicate(
+      condition
+        .map(c => BindReferences.bindReference(c, left.output ++ right.output))
+        .getOrElse(Literal(true)))
+
+  override def execute() = {
+    val broadcastedRelation =
+      sparkContext.broadcast(broadcast.execute().map(_.copy()).collect().toIndexedSeq)
+
+    streamed.execute().mapPartitions { streamedIter =>
+      val joinedRow = new JoinedRow
+
+      streamedIter.filter(streamedRow => {
+        var i = 0
+        var matched = false
+
+        while (i < broadcastedRelation.value.size && !matched) {
+          val broadcastedRow = broadcastedRelation.value(i)
+          if (boundCondition(joinedRow(streamedRow, broadcastedRow))) {
+            matched = true
+          }
+          i += 1
+        }
+        matched
+      })
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala
new file mode 100644
index 0000000000000..ea7babf3be948
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.expressions.{Expression, Row}
+import org.apache.spark.sql.catalyst.plans.physical.ClusteredDistribution
+import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
+
+/**
+ * :: DeveloperApi ::
+ * Build the right table's join keys into a HashSet, and iteratively go through the left
+ * table, to find the if join keys are in the Hash set.
+ */
+@DeveloperApi
+case class LeftSemiJoinHash(
+    leftKeys: Seq[Expression],
+    rightKeys: Seq[Expression],
+    left: SparkPlan,
+    right: SparkPlan) extends BinaryNode with HashJoin {
+
+  override val buildSide = BuildRight
+
+  override def requiredChildDistribution =
+    ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
+
+  override def output = left.output
+
+  override def execute() = {
+    buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) =>
+      val hashSet = new java.util.HashSet[Row]()
+      var currentRow: Row = null
+
+      // Create a Hash set of buildKeys
+      while (buildIter.hasNext) {
+        currentRow = buildIter.next()
+        val rowKey = buildSideKeyGenerator(currentRow)
+        if (!rowKey.anyNull) {
+          val keyExists = hashSet.contains(rowKey)
+          if (!keyExists) {
+            hashSet.add(rowKey)
+          }
+        }
+      }
+
+      val joinKeys = streamSideKeyGenerator()
+      streamIter.filter(current => {
+        !joinKeys(current).anyNull && hashSet.contains(joinKeys.currentValue)
+      })
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala
new file mode 100644
index 0000000000000..418c1c23e5546
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Partitioning}
+import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
+
+/**
+ * :: DeveloperApi ::
+ * Performs an inner hash join of two child relations by first shuffling the data using the join
+ * keys.
+ */
+@DeveloperApi
+case class ShuffledHashJoin(
+    leftKeys: Seq[Expression],
+    rightKeys: Seq[Expression],
+    buildSide: BuildSide,
+    left: SparkPlan,
+    right: SparkPlan)
+  extends BinaryNode with HashJoin {
+
+  override def outputPartitioning: Partitioning = left.outputPartitioning
+
+  override def requiredChildDistribution =
+    ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
+
+  override def execute() = {
+    buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) =>
+      val hashed = HashedRelation(buildIter, buildSideKeyGenerator)
+      hashJoin(streamIter, hashed)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/package.scala
new file mode 100644
index 0000000000000..7f2ab1765b28f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/package.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * :: DeveloperApi ::
+ * Physical execution operators for join operations.
+ */
+package object joins {
+
+  @DeveloperApi
+  sealed abstract class BuildSide
+
+  @DeveloperApi
+  case object BuildRight extends BuildSide
+
+  @DeveloperApi
+  case object BuildLeft extends BuildSide
+
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
new file mode 100644
index 0000000000000..a83cf5d441d1e
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
@@ -0,0 +1,268 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.execution
+
+import java.util.{List => JList, Map => JMap}
+
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+
+import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+
+import net.razorvine.pickle.{Pickler, Unpickler}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.api.python.PythonRDD
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.{Accumulator, Logging => SparkLogging}
+
+/**
+ * A serialized version of a Python lambda function.  Suitable for use in a [[PythonRDD]].
+ */
+private[spark] case class PythonUDF(
+    name: String,
+    command: Array[Byte],
+    envVars: JMap[String, String],
+    pythonIncludes: JList[String],
+    pythonExec: String,
+    broadcastVars: JList[Broadcast[Array[Byte]]],
+    accumulator: Accumulator[JList[Array[Byte]]],
+    dataType: DataType,
+    children: Seq[Expression]) extends Expression with SparkLogging {
+
+  override def toString = s"PythonUDF#$name(${children.mkString(",")})"
+
+  def nullable: Boolean = true
+
+  override def eval(input: Row) = sys.error("PythonUDFs can not be directly evaluated.")
+}
+
+/**
+ * Extracts PythonUDFs from operators, rewriting the query plan so that the UDF can be evaluated
+ * alone in a batch.
+ *
+ * This has the limitation that the input to the Python UDF is not allowed include attributes from
+ * multiple child operators.
+ */
+private[spark] object ExtractPythonUdfs extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan) = plan transform {
+    // Skip EvaluatePython nodes.
+    case p: EvaluatePython => p
+
+    case l: LogicalPlan =>
+      // Extract any PythonUDFs from the current operator.
+      val udfs = l.expressions.flatMap(_.collect { case udf: PythonUDF => udf})
+      if (udfs.isEmpty) {
+        // If there aren't any, we are done.
+        l
+      } else {
+        // Pick the UDF we are going to evaluate (TODO: Support evaluating multiple UDFs at a time)
+        // If there is more than one, we will add another evaluation operator in a subsequent pass.
+        val udf = udfs.head
+
+        var evaluation: EvaluatePython = null
+
+        // Rewrite the child that has the input required for the UDF
+        val newChildren = l.children.map { child =>
+          // Check to make sure that the UDF can be evaluated with only the input of this child.
+          // Other cases are disallowed as they are ambiguous or would require a cartisian product.
+          if (udf.references.subsetOf(child.outputSet)) {
+            evaluation = EvaluatePython(udf, child)
+            evaluation
+          } else if (udf.references.intersect(child.outputSet).nonEmpty) {
+            sys.error(s"Invalid PythonUDF $udf, requires attributes from more than one child.")
+          } else {
+            child
+          }
+        }
+
+        assert(evaluation != null, "Unable to evaluate PythonUDF.  Missing input attributes.")
+
+        // Trim away the new UDF value if it was only used for filtering or something.
+        logical.Project(
+          l.output,
+          l.transformExpressions {
+            case p: PythonUDF if p.fastEquals(udf) => evaluation.resultAttribute
+          }.withNewChildren(newChildren))
+      }
+  }
+}
+
+object EvaluatePython {
+  def apply(udf: PythonUDF, child: LogicalPlan) =
+    new EvaluatePython(udf, child, AttributeReference("pythonUDF", udf.dataType)())
+
+  /**
+   * Helper for converting a Scala object to a java suitable for pyspark serialization.
+   */
+  def toJava(obj: Any, dataType: DataType): Any = (obj, dataType) match {
+    case (null, _) => null
+
+    case (row: Seq[Any], struct: StructType) =>
+      val fields = struct.fields.map(field => field.dataType)
+      row.zip(fields).map {
+        case (obj, dataType) => toJava(obj, dataType)
+      }.toArray
+
+    case (seq: Seq[Any], array: ArrayType) =>
+      seq.map(x => toJava(x, array.elementType)).asJava
+    case (list: JList[_], array: ArrayType) =>
+      list.map(x => toJava(x, array.elementType)).asJava
+    case (arr, array: ArrayType) if arr.getClass.isArray =>
+      arr.asInstanceOf[Array[Any]].map(x => toJava(x, array.elementType))
+
+    case (obj: Map[_, _], mt: MapType) => obj.map {
+      case (k, v) => (k, toJava(v, mt.valueType)) // key should be primitive type
+    }.asJava
+
+    case (ud, udt: UserDefinedType[_]) => toJava(udt.serialize(ud), udt.sqlType)
+
+    case (dec: BigDecimal, dt: DecimalType) => dec.underlying()  // Pyrolite can handle BigDecimal
+
+    // Pyrolite can handle Timestamp
+    case (other, _) => other
+  }
+
+  /**
+   * Convert Row into Java Array (for pickled into Python)
+   */
+  def rowToArray(row: Row, fields: Seq[DataType]): Array[Any] = {
+    row.zip(fields).map {case (obj, dt) => toJava(obj, dt)}.toArray
+  }
+
+  // Converts value to the type specified by the data type.
+  // Because Python does not have data types for TimestampType, FloatType, ShortType, and
+  // ByteType, we need to explicitly convert values in columns of these data types to the desired
+  // JVM data types.
+  def fromJava(obj: Any, dataType: DataType): Any = (obj, dataType) match {
+    // TODO: We should check nullable
+    case (null, _) => null
+
+    case (c: java.util.List[_], ArrayType(elementType, _)) =>
+      c.map { e => fromJava(e, elementType)}: Seq[Any]
+
+    case (c, ArrayType(elementType, _)) if c.getClass.isArray =>
+      c.asInstanceOf[Array[_]].map(e => fromJava(e, elementType)): Seq[Any]
+
+    case (c: java.util.Map[_, _], MapType(keyType, valueType, _)) => c.map {
+      case (key, value) => (fromJava(key, keyType), fromJava(value, valueType))
+    }.toMap
+
+    case (c, StructType(fields)) if c.getClass.isArray =>
+      new GenericRow(c.asInstanceOf[Array[_]].zip(fields).map {
+        case (e, f) => fromJava(e, f.dataType)
+      }): Row
+
+    case (c: java.util.Calendar, DateType) =>
+      new java.sql.Date(c.getTime().getTime())
+
+    case (c: java.util.Calendar, TimestampType) =>
+      new java.sql.Timestamp(c.getTime().getTime())
+
+    case (_, udt: UserDefinedType[_]) =>
+      fromJava(obj, udt.sqlType)
+
+    case (c: Int, ByteType) => c.toByte
+    case (c: Long, ByteType) => c.toByte
+    case (c: Int, ShortType) => c.toShort
+    case (c: Long, ShortType) => c.toShort
+    case (c: Long, IntegerType) => c.toInt
+    case (c: Double, FloatType) => c.toFloat
+    case (c, StringType) if !c.isInstanceOf[String] => c.toString
+
+    case (c, _) => c
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ * Evaluates a [[PythonUDF]], appending the result to the end of the input tuple.
+ */
+@DeveloperApi
+case class EvaluatePython(
+    udf: PythonUDF,
+    child: LogicalPlan,
+    resultAttribute: AttributeReference)
+  extends logical.UnaryNode {
+
+  def output = child.output :+ resultAttribute
+}
+
+/**
+ * :: DeveloperApi ::
+ * Uses PythonRDD to evaluate a [[PythonUDF]], one partition of tuples at a time.  The input
+ * data is cached and zipped with the result of the udf evaluation.
+ */
+@DeveloperApi
+case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child: SparkPlan)
+  extends SparkPlan {
+  def children = child :: Nil
+
+  def execute() = {
+    // TODO: Clean up after ourselves?
+    val childResults = child.execute().map(_.copy()).cache()
+
+    val parent = childResults.mapPartitions { iter =>
+      val pickle = new Pickler
+      val currentRow = newMutableProjection(udf.children, child.output)()
+      val fields = udf.children.map(_.dataType)
+      iter.grouped(1000).map { inputRows =>
+        val toBePickled = inputRows.map { row =>
+          EvaluatePython.rowToArray(currentRow(row), fields)
+        }.toArray
+        pickle.dumps(toBePickled)
+      }
+    }
+
+    val pyRDD = new PythonRDD(
+      parent,
+      udf.command,
+      udf.envVars,
+      udf.pythonIncludes,
+      false,
+      udf.pythonExec,
+      udf.broadcastVars,
+      udf.accumulator
+    ).mapPartitions { iter =>
+      val pickle = new Unpickler
+      iter.flatMap { pickedResult =>
+        val unpickledBatch = pickle.loads(pickedResult)
+        unpickledBatch.asInstanceOf[java.util.ArrayList[Any]]
+      }
+    }.mapPartitions { iter =>
+      val row = new GenericMutableRow(1)
+      iter.map { result =>
+        row(0) = EvaluatePython.fromJava(result, udf.dataType)
+        row: Row
+      }
+    }
+
+    childResults.zip(pyRDD).mapPartitions { iter =>
+      val joinedRow = new JoinedRow()
+      iter.map {
+        case (row, udfResult) =>
+          joinedRow(row, udfResult)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
new file mode 100644
index 0000000000000..fc70c183437f6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.json
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.sources._
+
+private[sql] class DefaultSource extends RelationProvider {
+  /** Returns a new base relation with the given parameters. */
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    val fileName = parameters.getOrElse("path", sys.error("Option 'path' not specified"))
+    val samplingRatio = parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0)
+
+    JSONRelation(fileName, samplingRatio)(sqlContext)
+  }
+}
+
+private[sql] case class JSONRelation(fileName: String, samplingRatio: Double)(
+    @transient val sqlContext: SQLContext)
+  extends TableScan {
+
+  private def baseRDD = sqlContext.sparkContext.textFile(fileName)
+
+  override val schema =
+    JsonRDD.inferSchema(
+      baseRDD,
+      samplingRatio,
+      sqlContext.columnNameOfCorruptRecord)
+
+  override def buildScan() =
+    JsonRDD.jsonStringToRow(baseRDD, schema, sqlContext.columnNameOfCorruptRecord)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index f6cbca96483e2..d9d7a3fea3963 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -17,38 +17,42 @@
 
 package org.apache.spark.sql.json
 
-import scala.collection.JavaConversions._
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+import org.apache.spark.sql.types.util.DataTypeConversions
+
+import scala.collection.Map
+import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}
 import scala.math.BigDecimal
+import java.sql.{Date, Timestamp}
 
+import com.fasterxml.jackson.core.JsonProcessingException
 import com.fasterxml.jackson.databind.ObjectMapper
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.execution.{ExistingRdd, SparkLogicalPlan}
-import org.apache.spark.sql.Logging
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.Logging
 
 private[sql] object JsonRDD extends Logging {
 
-  private[sql] def inferSchema(
+  private[sql] def jsonStringToRow(
       json: RDD[String],
-      samplingRatio: Double = 1.0): LogicalPlan = {
-    require(samplingRatio > 0, s"samplingRatio ($samplingRatio) should be greater than 0")
-    val schemaData = if (samplingRatio > 0.99) json else json.sample(false, samplingRatio, 1)
-    val allKeys = parseJson(schemaData).map(allKeysWithValueTypes).reduce(_ ++ _)
-    val baseSchema = createSchema(allKeys)
-
-    createLogicalPlan(json, baseSchema)
+      schema: StructType,
+      columnNameOfCorruptRecords: String): RDD[Row] = {
+    parseJson(json, columnNameOfCorruptRecords).map(parsed => asRow(parsed, schema))
   }
 
-  private def createLogicalPlan(
+  private[sql] def inferSchema(
       json: RDD[String],
-      baseSchema: StructType): LogicalPlan = {
-    val schema = nullTypeToStringType(baseSchema)
-
-    SparkLogicalPlan(ExistingRdd(asAttributes(schema), parseJson(json).map(asRow(_, schema))))
+      samplingRatio: Double = 1.0,
+      columnNameOfCorruptRecords: String): StructType = {
+    require(samplingRatio > 0, s"samplingRatio ($samplingRatio) should be greater than 0")
+    val schemaData = if (samplingRatio > 0.99) json else json.sample(false, samplingRatio, 1)
+    val allKeys =
+      parseJson(schemaData, columnNameOfCorruptRecords).map(allKeysWithValueTypes).reduce(_ ++ _)
+    createSchema(allKeys)
   }
 
   private def createSchema(allKeys: Set[(String, DataType)]): StructType = {
@@ -70,24 +74,48 @@ private[sql] object JsonRDD extends Logging {
 
     def makeStruct(values: Seq[Seq[String]], prefix: Seq[String]): StructType = {
       val (topLevel, structLike) = values.partition(_.size == 1)
+
       val topLevelFields = topLevel.filter {
         name => resolved.get(prefix ++ name).get match {
-          case ArrayType(StructType(Nil)) => false
-          case ArrayType(_) => true
+          case ArrayType(elementType, _) => {
+            def hasInnerStruct(t: DataType): Boolean = t match {
+              case s: StructType => true
+              case ArrayType(t1, _) => hasInnerStruct(t1)
+              case o => false
+            }
+
+            // Check if this array has inner struct.
+            !hasInnerStruct(elementType)
+          }
           case struct: StructType => false
           case _ => true
         }
       }.map {
         a => StructField(a.head, resolved.get(prefix ++ a).get, nullable = true)
       }
+      val topLevelFieldNameSet = topLevelFields.map(_.name)
 
-      val structFields: Seq[StructField] = structLike.groupBy(_(0)).map {
+      val structFields: Seq[StructField] = structLike.groupBy(_(0)).filter {
+        case (name, _) => !topLevelFieldNameSet.contains(name)
+      }.map {
         case (name, fields) => {
           val nestedFields = fields.map(_.tail)
           val structType = makeStruct(nestedFields, prefix :+ name)
           val dataType = resolved.get(prefix :+ name).get
           dataType match {
-            case array: ArrayType => Some(StructField(name, ArrayType(structType), nullable = true))
+            case array: ArrayType =>
+              // The pattern of this array is ArrayType(...(ArrayType(StructType))).
+              // Since the inner struct of array is a placeholder (StructType(Nil)),
+              // we need to replace this placeholder with the actual StructType (structType).
+              def getActualArrayType(
+                  innerStruct: StructType,
+                  currentArray: ArrayType): ArrayType = currentArray match {
+                case ArrayType(s: StructType, containsNull) =>
+                  ArrayType(innerStruct, containsNull)
+                case ArrayType(a: ArrayType, containsNull) =>
+                  ArrayType(getActualArrayType(innerStruct, a), containsNull)
+              }
+              Some(StructField(name, getActualArrayType(structType, array), nullable = true))
             case struct: StructType => Some(StructField(name, structType, nullable = true))
             // dataType is StringType means that we have resolved type conflicts involving
             // primitive types and complex types. So, the type of name has been relaxed to
@@ -97,66 +125,67 @@ private[sql] object JsonRDD extends Logging {
         }
       }.flatMap(field => field).toSeq
 
-      StructType(
-        (topLevelFields ++ structFields).sortBy {
-        case StructField(name, _, _) => name
-      })
+      StructType((topLevelFields ++ structFields).sortBy(_.name))
     }
 
     makeStruct(resolved.keySet.toSeq, Nil)
   }
 
+  private[sql] def nullTypeToStringType(struct: StructType): StructType = {
+    val fields = struct.fields.map {
+      case StructField(fieldName, dataType, nullable, _) => {
+        val newType = dataType match {
+          case NullType => StringType
+          case ArrayType(NullType, containsNull) => ArrayType(StringType, containsNull)
+          case ArrayType(struct: StructType, containsNull) =>
+            ArrayType(nullTypeToStringType(struct), containsNull)
+          case struct: StructType =>nullTypeToStringType(struct)
+          case other: DataType => other
+        }
+        StructField(fieldName, newType, nullable)
+      }
+    }
+
+    StructType(fields)
+  }
+
   /**
    * Returns the most general data type for two given data types.
    */
   private[json] def compatibleType(t1: DataType, t2: DataType): DataType = {
-    // Try and find a promotion rule that contains both types in question.
-    val applicableConversion = HiveTypeCoercion.allPromotions.find(p => p.contains(t1) && p
-      .contains(t2))
-
-    // If found return the widest common type, otherwise None
-    val returnType = applicableConversion.map(_.filter(t => t == t1 || t == t2).last)
-
-    if (returnType.isDefined) {
-      returnType.get
-    } else {
-      // t1 or t2 is a StructType, ArrayType, or an unexpected type.
-      (t1, t2) match {
-        case (other: DataType, NullType) => other
-        case (NullType, other: DataType) => other
-        case (StructType(fields1), StructType(fields2)) => {
-          val newFields = (fields1 ++ fields2).groupBy(field => field.name).map {
-            case (name, fieldTypes) => {
-              val dataType = fieldTypes.map(field => field.dataType).reduce(
-                (type1: DataType, type2: DataType) => compatibleType(type1, type2))
-              StructField(name, dataType, true)
+    HiveTypeCoercion.findTightestCommonType(t1, t2) match {
+      case Some(commonType) => commonType
+      case None =>
+        // t1 or t2 is a StructType, ArrayType, or an unexpected type.
+        (t1, t2) match {
+          case (other: DataType, NullType) => other
+          case (NullType, other: DataType) => other
+          case (StructType(fields1), StructType(fields2)) => {
+            val newFields = (fields1 ++ fields2).groupBy(field => field.name).map {
+              case (name, fieldTypes) => {
+                val dataType = fieldTypes.map(field => field.dataType).reduce(
+                  (type1: DataType, type2: DataType) => compatibleType(type1, type2))
+                StructField(name, dataType, true)
+              }
             }
+            StructType(newFields.toSeq.sortBy(_.name))
           }
-          StructType(newFields.toSeq.sortBy {
-            case StructField(name, _, _) => name
-          })
+          case (ArrayType(elementType1, containsNull1), ArrayType(elementType2, containsNull2)) =>
+            ArrayType(compatibleType(elementType1, elementType2), containsNull1 || containsNull2)
+          // TODO: We should use JsonObjectStringType to mark that values of field will be
+          // strings and every string is a Json object.
+          case (_, _) => StringType
         }
-        case (ArrayType(elementType1), ArrayType(elementType2)) =>
-          ArrayType(compatibleType(elementType1, elementType2))
-        // TODO: We should use JsonObjectStringType to mark that values of field will be
-        // strings and every string is a Json object.
-        case (_, _) => StringType
-      }
     }
   }
 
-  private def typeOfPrimitiveValue(value: Any): DataType = {
-    value match {
-      case value: java.lang.String => StringType
-      case value: java.lang.Integer => IntegerType
-      case value: java.lang.Long => LongType
+  private def typeOfPrimitiveValue: PartialFunction[Any, DataType] = {
+    ScalaReflection.typeOfObject orElse {
       // Since we do not have a data type backed by BigInteger,
       // when we see a Java BigInteger, we use DecimalType.
-      case value: java.math.BigInteger => DecimalType
-      case value: java.lang.Double => DoubleType
-      case value: java.math.BigDecimal => DecimalType
-      case value: java.lang.Boolean => BooleanType
-      case null => NullType
+      case value: java.math.BigInteger => DecimalType.Unlimited
+      // DecimalType's JVMType is scala BigDecimal.
+      case value: java.math.BigDecimal => DecimalType.Unlimited
       // Unexpected data type.
       case _ => StringType
     }
@@ -165,16 +194,16 @@ private[sql] object JsonRDD extends Logging {
   /**
    * Returns the element type of an JSON array. We go through all elements of this array
    * to detect any possible type conflict. We use [[compatibleType]] to resolve
-   * type conflicts. Right now, when the element of an array is another array, we
-   * treat the element as String.
+   * type conflicts.
    */
   private def typeOfArray(l: Seq[Any]): ArrayType = {
+    val containsNull = l.exists(v => v == null)
     val elements = l.flatMap(v => Option(v))
     if (elements.isEmpty) {
       // If this JSON array is empty, we use NullType as a placeholder.
       // If this array is not empty in other JSON objects, we can resolve
       // the type after we have passed through all JSON objects.
-      ArrayType(NullType)
+      ArrayType(NullType, containsNull)
     } else {
       val elementType = elements.map {
         e => e match {
@@ -186,7 +215,7 @@ private[sql] object JsonRDD extends Logging {
         }
       }.reduce((type1: DataType, type2: DataType) => compatibleType(type1, type2))
 
-      ArrayType(elementType)
+      ArrayType(elementType, containsNull)
     }
   }
 
@@ -204,48 +233,57 @@ private[sql] object JsonRDD extends Logging {
       case (key, value) => (s"`$key`", value)
     }.toSet
     keyValuePairs.flatMap {
-      case (key: String, struct: Map[String, Any]) => {
-        // The value associted with the key is an JSON object.
-        allKeysWithValueTypes(struct).map {
+      case (key: String, struct: Map[_, _]) => {
+        // The value associated with the key is an JSON object.
+        allKeysWithValueTypes(struct.asInstanceOf[Map[String, Any]]).map {
           case (k, dataType) => (s"$key.$k", dataType)
         } ++ Set((key, StructType(Nil)))
       }
-      case (key: String, array: List[Any]) => {
-        // The value associted with the key is an array.
-        typeOfArray(array) match {
-          case ArrayType(StructType(Nil)) => {
+      case (key: String, array: Seq[_]) => {
+        // The value associated with the key is an array.
+        // Handle inner structs of an array.
+        def buildKeyPathForInnerStructs(v: Any, t: DataType): Seq[(String, DataType)] = t match {
+          case ArrayType(StructType(Nil), containsNull) => {
             // The elements of this arrays are structs.
-            array.asInstanceOf[List[Map[String, Any]]].flatMap {
+            v.asInstanceOf[Seq[Map[String, Any]]].flatMap(Option(_)).flatMap {
               element => allKeysWithValueTypes(element)
             }.map {
-              case (k, dataType) => (s"$key.$k", dataType)
-            } :+ (key, ArrayType(StructType(Nil)))
+              case (k, t) => (s"$key.$k", t)
+            }
           }
-          case ArrayType(elementType) => (key, ArrayType(elementType)) :: Nil
+          case ArrayType(t1, containsNull) =>
+            v.asInstanceOf[Seq[Any]].flatMap(Option(_)).flatMap {
+              element => buildKeyPathForInnerStructs(element, t1)
+            }
+          case other => Nil
         }
+        val elementType = typeOfArray(array)
+        buildKeyPathForInnerStructs(array, elementType) :+ (key, elementType)
       }
       case (key: String, value) => (key, typeOfPrimitiveValue(value)) :: Nil
     }
   }
 
   /**
-   * Converts a Java Map/List to a Scala Map/List.
+   * Converts a Java Map/List to a Scala Map/Seq.
    * We do not use Jackson's scala module at here because
    * DefaultScalaModule in jackson-module-scala will make
    * the parsing very slow.
    */
   private def scalafy(obj: Any): Any = obj match {
-    case map: java.util.Map[String, Object] =>
+    case map: java.util.Map[_, _] =>
       // .map(identity) is used as a workaround of non-serializable Map
       // generated by .mapValues.
       // This issue is documented at https://issues.scala-lang.org/browse/SI-7005
-      map.toMap.mapValues(scalafy).map(identity)
-    case list: java.util.List[Object] =>
-      list.toList.map(scalafy)
+      JMapWrapper(map).mapValues(scalafy).map(identity)
+    case list: java.util.List[_] =>
+      JListWrapper(list).map(scalafy)
     case atom => atom
   }
 
-  private def parseJson(json: RDD[String]): RDD[Map[String, Any]] = {
+  private def parseJson(
+      json: RDD[String],
+      columnNameOfCorruptRecords: String): RDD[Map[String, Any]] = {
     // According to [Jackson-72: https://jira.codehaus.org/browse/JACKSON-72],
     // ObjectMapper will not return BigDecimal when
     // "DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS" is disabled
@@ -259,8 +297,19 @@ private[sql] object JsonRDD extends Logging {
       // the ObjectMapper will take the last value associated with this duplicate key.
       // For example: for {"key": 1, "key":2}, we will get "key"->2.
       val mapper = new ObjectMapper()
-      iter.map(record => mapper.readValue(record, classOf[java.util.Map[String, Any]]))
-      }).map(scalafy).map(_.asInstanceOf[Map[String, Any]])
+      iter.flatMap { record =>
+        try {
+          val parsed = mapper.readValue(record, classOf[Object]) match {
+            case map: java.util.Map[_, _] => scalafy(map).asInstanceOf[Map[String, Any]] :: Nil
+            case list: java.util.List[_] => scalafy(list).asInstanceOf[Seq[Map[String, Any]]]
+          }
+
+          parsed
+        } catch {
+          case e: JsonProcessingException => Map(columnNameOfCorruptRecords -> record) :: Nil
+        }
+      }
+    })
   }
 
   private def toLong(value: Any): Long = {
@@ -278,13 +327,13 @@ private[sql] object JsonRDD extends Logging {
     }
   }
 
-  private def toDecimal(value: Any): BigDecimal = {
+  private def toDecimal(value: Any): Decimal = {
     value match {
-      case value: java.lang.Integer => BigDecimal(value)
-      case value: java.lang.Long => BigDecimal(value)
-      case value: java.math.BigInteger => BigDecimal(value)
-      case value: java.lang.Double => BigDecimal(value)
-      case value: java.math.BigDecimal => BigDecimal(value)
+      case value: java.lang.Integer => Decimal(value)
+      case value: java.lang.Long => Decimal(value)
+      case value: java.math.BigInteger => Decimal(BigDecimal(value))
+      case value: java.lang.Double => Decimal(value)
+      case value: java.math.BigDecimal => Decimal(BigDecimal(value))
     }
   }
 
@@ -311,7 +360,8 @@ private[sql] object JsonRDD extends Logging {
       case (key, value) =>
         if (count > 0) builder.append(",")
         count += 1
-        builder.append(s"""\"${key}\":${toString(value)}""")
+        val stringValue = if (value.isInstanceOf[String]) s"""\"$value\"""" else toString(value)
+        builder.append(s"""\"${key}\":${stringValue}""")
     }
     builder.append("}")
 
@@ -320,79 +370,58 @@ private[sql] object JsonRDD extends Logging {
 
   private def toString(value: Any): String = {
     value match {
-      case value: Map[String, Any] => toJsonObjectString(value)
-      case value: Seq[Any] => toJsonArrayString(value)
+      case value: Map[_, _] => toJsonObjectString(value.asInstanceOf[Map[String, Any]])
+      case value: Seq[_] => toJsonArrayString(value)
       case value => Option(value).map(_.toString).orNull
     }
   }
 
+  private def toDate(value: Any): Date = {
+    value match {
+      // only support string as date
+      case value: java.lang.String => new Date(DataTypeConversions.stringToTime(value).getTime)
+    }
+  }
+
+  private def toTimestamp(value: Any): Timestamp = {
+    value match {
+      case value: java.lang.Integer => new Timestamp(value.asInstanceOf[Int].toLong)
+      case value: java.lang.Long => new Timestamp(value)
+      case value: java.lang.String => toTimestamp(DataTypeConversions.stringToTime(value).getTime)
+    }
+  }
+
   private[json] def enforceCorrectType(value: Any, desiredType: DataType): Any ={
     if (value == null) {
       null
     } else {
       desiredType match {
-        case ArrayType(elementType) =>
-          value.asInstanceOf[Seq[Any]].map(enforceCorrectType(_, elementType))
         case StringType => toString(value)
         case IntegerType => value.asInstanceOf[IntegerType.JvmType]
         case LongType => toLong(value)
         case DoubleType => toDouble(value)
-        case DecimalType => toDecimal(value)
+        case DecimalType() => toDecimal(value)
         case BooleanType => value.asInstanceOf[BooleanType.JvmType]
         case NullType => null
+
+        case ArrayType(elementType, _) =>
+          value.asInstanceOf[Seq[Any]].map(enforceCorrectType(_, elementType))
+        case struct: StructType => asRow(value.asInstanceOf[Map[String, Any]], struct)
+        case DateType => toDate(value)
+        case TimestampType => toTimestamp(value)
       }
     }
   }
 
   private def asRow(json: Map[String,Any], schema: StructType): Row = {
+    // TODO: Reuse the row instead of creating a new one for every record.
     val row = new GenericMutableRow(schema.fields.length)
     schema.fields.zipWithIndex.foreach {
-      // StructType
-      case (StructField(name, fields: StructType, _), i) =>
+      case (StructField(name, dataType, _, _), i) =>
         row.update(i, json.get(name).flatMap(v => Option(v)).map(
-          v => asRow(v.asInstanceOf[Map[String, Any]], fields)).orNull)
-
-      // ArrayType(StructType)
-      case (StructField(name, ArrayType(structType: StructType), _), i) =>
-        row.update(i,
-          json.get(name).flatMap(v => Option(v)).map(
-            v => v.asInstanceOf[Seq[Any]].map(
-              e => asRow(e.asInstanceOf[Map[String, Any]], structType))).orNull)
-
-      // Other cases
-      case (StructField(name, dataType, _), i) =>
-        row.update(i, json.get(name).flatMap(v => Option(v)).map(
-          enforceCorrectType(_, dataType)).getOrElse(null))
+          enforceCorrectType(_, dataType)).orNull)
     }
 
     row
   }
-
-  private def nullTypeToStringType(struct: StructType): StructType = {
-    val fields = struct.fields.map {
-      case StructField(fieldName, dataType, nullable) => {
-        val newType = dataType match {
-          case NullType => StringType
-          case ArrayType(NullType) => ArrayType(StringType)
-          case struct: StructType => nullTypeToStringType(struct)
-          case other: DataType => other
-        }
-        StructField(fieldName, newType, nullable)
-      }
-    }
-
-    StructType(fields)
-  }
-
-  private def asAttributes(struct: StructType): Seq[AttributeReference] = {
-    struct.fields.map(f => AttributeReference(f.name, f.dataType, nullable = true)())
-  }
-
-  private def asStruct(attributes: Seq[AttributeReference]): StructType = {
-    val fields = attributes.map {
-      case AttributeReference(name, dataType, nullable) => StructField(name, dataType, nullable)
-    }
-
-    StructType(fields)
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
new file mode 100644
index 0000000000000..51dad54f1a3f3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -0,0 +1,463 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.execution.SparkPlan
+
+/**
+ * Allows the execution of relational queries, including those expressed in SQL using Spark.
+ *
+ *  @groupname dataType Data types
+ *  @groupdesc Spark SQL data types.
+ *  @groupprio dataType -3
+ *  @groupname field Field
+ *  @groupprio field -2
+ *  @groupname row Row
+ *  @groupprio row -1
+ */
+package object sql {
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * Represents one row of output from a relational operator.
+   * @group row
+   */
+  @DeveloperApi
+  type Row = catalyst.expressions.Row
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * A [[Row]] object can be constructed by providing field values. Example:
+   * {{{
+   * import org.apache.spark.sql._
+   *
+   * // Create a Row from values.
+   * Row(value1, value2, value3, ...)
+   * // Create a Row from a Seq of values.
+   * Row.fromSeq(Seq(value1, value2, ...))
+   * }}}
+   *
+   * A value of a row can be accessed through both generic access by ordinal,
+   * which will incur boxing overhead for primitives, as well as native primitive access.
+   * An example of generic access by ordinal:
+   * {{{
+   * import org.apache.spark.sql._
+   *
+   * val row = Row(1, true, "a string", null)
+   * // row: Row = [1,true,a string,null]
+   * val firstValue = row(0)
+   * // firstValue: Any = 1
+   * val fourthValue = row(3)
+   * // fourthValue: Any = null
+   * }}}
+   *
+   * For native primitive access, it is invalid to use the native primitive interface to retrieve
+   * a value that is null, instead a user must check `isNullAt` before attempting to retrieve a
+   * value that might be null.
+   * An example of native primitive access:
+   * {{{
+   * // using the row from the previous example.
+   * val firstValue = row.getInt(0)
+   * // firstValue: Int = 1
+   * val isNull = row.isNullAt(3)
+   * // isNull: Boolean = true
+   * }}}
+   *
+   * Interfaces related to native primitive access are:
+   *
+   * `isNullAt(i: Int): Boolean`
+   *
+   * `getInt(i: Int): Int`
+   *
+   * `getLong(i: Int): Long`
+   *
+   * `getDouble(i: Int): Double`
+   *
+   * `getFloat(i: Int): Float`
+   *
+   * `getBoolean(i: Int): Boolean`
+   *
+   * `getShort(i: Int): Short`
+   *
+   * `getByte(i: Int): Byte`
+   *
+   * `getString(i: Int): String`
+   *
+   * Fields in a [[Row]] object can be extracted in a pattern match. Example:
+   * {{{
+   * import org.apache.spark.sql._
+   *
+   * val pairs = sql("SELECT key, value FROM src").rdd.map {
+   *   case Row(key: Int, value: String) =>
+   *     key -> value
+   * }
+   * }}}
+   *
+   * @group row
+   */
+  @DeveloperApi
+  val Row = catalyst.expressions.Row
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The base type of all Spark SQL data types.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  type DataType = catalyst.types.DataType
+
+  @DeveloperApi
+  val DataType = catalyst.types.DataType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `String` values
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val StringType = catalyst.types.StringType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `Array[Byte]` values.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val BinaryType = catalyst.types.BinaryType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `Boolean` values.
+   *
+   *@group dataType
+   */
+  @DeveloperApi
+  val BooleanType = catalyst.types.BooleanType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `java.sql.Timestamp` values.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val TimestampType = catalyst.types.TimestampType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `java.sql.Date` values.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val DateType = catalyst.types.DateType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `scala.math.BigDecimal` values.
+   *
+   * TODO(matei): explain precision and scale
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  type DecimalType = catalyst.types.DecimalType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `scala.math.BigDecimal` values.
+   *
+   * TODO(matei): explain precision and scale
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val DecimalType = catalyst.types.DecimalType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `Double` values.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val DoubleType = catalyst.types.DoubleType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `Float` values.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val FloatType = catalyst.types.FloatType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `Byte` values.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val ByteType = catalyst.types.ByteType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `Int` values.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val IntegerType = catalyst.types.IntegerType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `Long` values.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val LongType = catalyst.types.LongType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `Short` values.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val ShortType = catalyst.types.ShortType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type for collections of multiple values.
+   * Internally these are represented as columns that contain a ``scala.collection.Seq``.
+   *
+   * An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and
+   * `containsNull: Boolean`. The field of `elementType` is used to specify the type of
+   * array elements. The field of `containsNull` is used to specify if the array has `null` values.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  type ArrayType = catalyst.types.ArrayType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * An [[ArrayType]] object can be constructed with two ways,
+   * {{{
+   * ArrayType(elementType: DataType, containsNull: Boolean)
+   * }}} and
+   * {{{
+   * ArrayType(elementType: DataType)
+   * }}}
+   * For `ArrayType(elementType)`, the field of `containsNull` is set to `false`.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val ArrayType = catalyst.types.ArrayType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing `Map`s. A [[MapType]] object comprises three fields,
+   * `keyType: [[DataType]]`, `valueType: [[DataType]]` and `valueContainsNull: Boolean`.
+   * The field of `keyType` is used to specify the type of keys in the map.
+   * The field of `valueType` is used to specify the type of values in the map.
+   * The field of `valueContainsNull` is used to specify if values of this map has `null` values.
+   * For values of a MapType column, keys are not allowed to have `null` values.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  type MapType = catalyst.types.MapType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * A [[MapType]] object can be constructed with two ways,
+   * {{{
+   * MapType(keyType: DataType, valueType: DataType, valueContainsNull: Boolean)
+   * }}} and
+   * {{{
+   * MapType(keyType: DataType, valueType: DataType)
+   * }}}
+   * For `MapType(keyType: DataType, valueType: DataType)`,
+   * the field of `valueContainsNull` is set to `true`.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val MapType = catalyst.types.MapType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * The data type representing [[Row]]s.
+   * A [[StructType]] object comprises a [[Seq]] of [[StructField]]s.
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  type StructType = catalyst.types.StructType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * A [[StructType]] object can be constructed by
+   * {{{
+   * StructType(fields: Seq[StructField])
+   * }}}
+   * For a [[StructType]] object, one or multiple [[StructField]]s can be extracted by names.
+   * If multiple [[StructField]]s are extracted, a [[StructType]] object will be returned.
+   * If a provided name does not have a matching field, it will be ignored. For the case
+   * of extracting a single StructField, a `null` will be returned.
+   * Example:
+   * {{{
+   * import org.apache.spark.sql._
+   *
+   * val struct =
+   *   StructType(
+   *     StructField("a", IntegerType, true) ::
+   *     StructField("b", LongType, false) ::
+   *     StructField("c", BooleanType, false) :: Nil)
+   *
+   * // Extract a single StructField.
+   * val singleField = struct("b")
+   * // singleField: StructField = StructField(b,LongType,false)
+   *
+   * // This struct does not have a field called "d". null will be returned.
+   * val nonExisting = struct("d")
+   * // nonExisting: StructField = null
+   *
+   * // Extract multiple StructFields. Field names are provided in a set.
+   * // A StructType object will be returned.
+   * val twoFields = struct(Set("b", "c"))
+   * // twoFields: StructType =
+   * //   StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false)))
+   *
+   * // Those names do not have matching fields will be ignored.
+   * // For the case shown below, "d" will be ignored and
+   * // it is treated as struct(Set("b", "c")).
+   * val ignoreNonExisting = struct(Set("b", "c", "d"))
+   * // ignoreNonExisting: StructType =
+   * //   StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false)))
+   * }}}
+   *
+   * A [[Row]] object is used as a value of the StructType.
+   * Example:
+   * {{{
+   * import org.apache.spark.sql._
+   *
+   * val innerStruct =
+   *   StructType(
+   *     StructField("f1", IntegerType, true) ::
+   *     StructField("f2", LongType, false) ::
+   *     StructField("f3", BooleanType, false) :: Nil)
+   *
+   * val struct = StructType(
+   *   StructField("a", innerStruct, true) :: Nil)
+   *
+   * // Create a Row with the schema defined by struct
+   * val row = Row(Row(1, 2, true))
+   * // row: Row = [[1,2,true]]
+   * }}}
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val StructType = catalyst.types.StructType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * A [[StructField]] object represents a field in a [[StructType]] object.
+   * A [[StructField]] object comprises three fields, `name: [[String]]`, `dataType: [[DataType]]`,
+   * and `nullable: Boolean`. The field of `name` is the name of a `StructField`. The field of
+   * `dataType` specifies the data type of a `StructField`.
+   * The field of `nullable` specifies if values of a `StructField` can contain `null` values.
+   *
+   * @group field
+   */
+  @DeveloperApi
+  type StructField = catalyst.types.StructField
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * A [[StructField]] object can be constructed by
+   * {{{
+   * StructField(name: String, dataType: DataType, nullable: Boolean)
+   * }}}
+   *
+   * @group dataType
+   */
+  @DeveloperApi
+  val StructField = catalyst.types.StructField
+
+  /**
+   * Converts a logical plan into zero or more SparkPlans.
+   */
+  @DeveloperApi
+  type Strategy = org.apache.spark.sql.catalyst.planning.GenericStrategy[SparkPlan]
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * Metadata is a wrapper over Map[String, Any] that limits the value type to simple ones: Boolean,
+   * Long, Double, String, Metadata, Array[Boolean], Array[Long], Array[Double], Array[String], and
+   * Array[Metadata]. JSON is used for serialization.
+   *
+   * The default constructor is private. User should use either [[MetadataBuilder]] or
+   * [[Metadata$#fromJson]] to create Metadata instances.
+   *
+   * @param map an immutable map that stores the data
+   */
+  @DeveloperApi
+  type Metadata = catalyst.util.Metadata
+
+  /**
+   * :: DeveloperApi ::
+   * Builder for [[Metadata]]. If there is a key collision, the latter will overwrite the former.
+   */
+  @DeveloperApi
+  type MetadataBuilder = catalyst.util.MetadataBuilder
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
index 889a408e3c393..1bbb66aaa19a3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
@@ -17,13 +17,15 @@
 
 package org.apache.spark.sql.parquet
 
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+
 import scala.collection.mutable.{Buffer, ArrayBuffer, HashMap}
 
 import parquet.io.api.{PrimitiveConverter, GroupConverter, Binary, Converter}
 import parquet.schema.MessageType
 
 import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.catalyst.expressions.{GenericRow, Row, Attribute}
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.parquet.CatalystConverter.FieldType
 
 /**
@@ -58,6 +60,7 @@ private[sql] object CatalystConverter {
   // This is mostly Parquet convention (see, e.g., `ConversionPatterns`).
   // Note that "array" for the array elements is chosen by ParquetAvro.
   // Using a different value will result in Parquet silently dropping columns.
+  val ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME = "bag"
   val ARRAY_ELEMENTS_SCHEMA_NAME = "array"
   val MAP_KEY_SCHEMA_NAME = "key"
   val MAP_VALUE_SCHEMA_NAME = "value"
@@ -74,22 +77,28 @@ private[sql] object CatalystConverter {
       parent: CatalystConverter): Converter = {
     val fieldType: DataType = field.dataType
     fieldType match {
+      case udt: UserDefinedType[_] => {
+        createConverter(field.copy(dataType = udt.sqlType), fieldIndex, parent)
+      }
       // For native JVM types we use a converter with native arrays
-      case ArrayType(elementType: NativeType) => {
+      case ArrayType(elementType: NativeType, false) => {
         new CatalystNativeArrayConverter(elementType, fieldIndex, parent)
       }
       // This is for other types of arrays, including those with nested fields
-      case ArrayType(elementType: DataType) => {
+      case ArrayType(elementType: DataType, false) => {
         new CatalystArrayConverter(elementType, fieldIndex, parent)
       }
+      case ArrayType(elementType: DataType, true) => {
+        new CatalystArrayContainsNullConverter(elementType, fieldIndex, parent)
+      }
       case StructType(fields: Seq[StructField]) => {
         new CatalystStructConverter(fields.toArray, fieldIndex, parent)
       }
-      case MapType(keyType: DataType, valueType: DataType) => {
+      case MapType(keyType: DataType, valueType: DataType, valueContainsNull: Boolean) => {
         new CatalystMapConverter(
           Array(
             new FieldType(MAP_KEY_SCHEMA_NAME, keyType, false),
-            new FieldType(MAP_VALUE_SCHEMA_NAME, valueType, true)),
+            new FieldType(MAP_VALUE_SCHEMA_NAME, valueType, valueContainsNull)),
           fieldIndex,
           parent)
       }
@@ -113,8 +122,14 @@ private[sql] object CatalystConverter {
             parent.updateByte(fieldIndex, value.asInstanceOf[ByteType.JvmType])
         }
       }
+      case d: DecimalType => {
+        new CatalystPrimitiveConverter(parent, fieldIndex) {
+          override def addBinary(value: Binary): Unit =
+            parent.updateDecimal(fieldIndex, value, d)
+        }
+      }
       // All other primitive types use the default converter
-      case ctype: NativeType => { // note: need the type tag here!
+      case ctype: PrimitiveType => { // note: need the type tag here!
         new CatalystPrimitiveConverter(parent, fieldIndex)
       }
       case _ => throw new RuntimeException(
@@ -187,6 +202,10 @@ private[parquet] abstract class CatalystConverter extends GroupConverter {
   protected[parquet] def updateString(fieldIndex: Int, value: Binary): Unit =
     updateField(fieldIndex, value.toStringUsingUTF8)
 
+  protected[parquet] def updateDecimal(fieldIndex: Int, value: Binary, ctype: DecimalType): Unit = {
+    updateField(fieldIndex, readDecimal(new Decimal(), value, ctype))
+  }
+
   protected[parquet] def isRootConverter: Boolean = parent == null
 
   protected[parquet] def clearBuffer(): Unit
@@ -197,6 +216,27 @@ private[parquet] abstract class CatalystConverter extends GroupConverter {
    * @return
    */
   def getCurrentRecord: Row = throw new UnsupportedOperationException
+
+  /**
+   * Read a decimal value from a Parquet Binary into "dest". Only supports decimals that fit in
+   * a long (i.e. precision <= 18)
+   */
+  protected[parquet] def readDecimal(dest: Decimal, value: Binary, ctype: DecimalType): Unit = {
+    val precision = ctype.precisionInfo.get.precision
+    val scale = ctype.precisionInfo.get.scale
+    val bytes = value.getBytes
+    require(bytes.length <= 16, "Decimal field too large to read")
+    var unscaled = 0L
+    var i = 0
+    while (i < bytes.length) {
+      unscaled = (unscaled << 8) | (bytes(i) & 0xFF)
+      i += 1
+    }
+    // Make sure unscaled has the right sign, by sign-extending the first bit
+    val numBits = 8 * bytes.length
+    unscaled = (unscaled << (64 - numBits)) >> (64 - numBits)
+    dest.set(unscaled, precision, scale)
+  }
 }
 
 /**
@@ -218,8 +258,8 @@ private[parquet] class CatalystGroupConverter(
       schema,
       index,
       parent,
-      current=null,
-      buffer=new ArrayBuffer[Row](
+      current = null,
+      buffer = new ArrayBuffer[Row](
         CatalystArrayConverter.INITIAL_ARRAY_SIZE))
 
   /**
@@ -229,9 +269,9 @@ private[parquet] class CatalystGroupConverter(
     this(attributes.map(a => new FieldType(a.name, a.dataType, a.nullable)), 0, null)
 
   protected [parquet] val converters: Array[Converter] =
-    schema.map(field =>
-      CatalystConverter.createConverter(field, schema.indexOf(field), this))
-    .toArray
+    schema.zipWithIndex.map {
+      case (field, idx) => CatalystConverter.createConverter(field, idx, this)
+    }.toArray
 
   override val size = schema.size
 
@@ -264,7 +304,7 @@ private[parquet] class CatalystGroupConverter(
 
   override def end(): Unit = {
     if (!isRootConverter) {
-      assert(current!=null) // there should be no empty groups
+      assert(current != null) // there should be no empty groups
       buffer.append(new GenericRow(current.toArray))
       parent.updateField(index, new GenericRow(buffer.toArray.asInstanceOf[Array[Any]]))
     }
@@ -278,19 +318,19 @@ private[parquet] class CatalystGroupConverter(
  */
 private[parquet] class CatalystPrimitiveRowConverter(
     protected[parquet] val schema: Array[FieldType],
-    protected[parquet] var current: ParquetRelation.RowType)
+    protected[parquet] var current: MutableRow)
   extends CatalystConverter {
 
   // This constructor is used for the root converter only
   def this(attributes: Array[Attribute]) =
     this(
       attributes.map(a => new FieldType(a.name, a.dataType, a.nullable)),
-      new ParquetRelation.RowType(attributes.length))
+      new SpecificMutableRow(attributes.map(_.dataType)))
 
   protected [parquet] val converters: Array[Converter] =
-    schema.map(field =>
-      CatalystConverter.createConverter(field, schema.indexOf(field), this))
-      .toArray
+    schema.zipWithIndex.map {
+      case (field, idx) => CatalystConverter.createConverter(field, idx, this)
+    }.toArray
 
   override val size = schema.size
 
@@ -299,7 +339,7 @@ private[parquet] class CatalystPrimitiveRowConverter(
   override val parent = null
 
   // Should be only called in root group converter!
-  override def getCurrentRecord: ParquetRelation.RowType = current
+  override def getCurrentRecord: Row = current
 
   override def getConverter(fieldIndex: Int): Converter = converters(fieldIndex)
 
@@ -321,7 +361,7 @@ private[parquet] class CatalystPrimitiveRowConverter(
 
   override def end(): Unit = {}
 
-  // Overriden here to avoid auto-boxing for primitive types
+  // Overridden here to avoid auto-boxing for primitive types
   override protected[parquet] def updateBoolean(fieldIndex: Int, value: Boolean): Unit =
     current.setBoolean(fieldIndex, value)
 
@@ -348,6 +388,16 @@ private[parquet] class CatalystPrimitiveRowConverter(
 
   override protected[parquet] def updateString(fieldIndex: Int, value: Binary): Unit =
     current.setString(fieldIndex, value.toStringUsingUTF8)
+
+  override protected[parquet] def updateDecimal(
+      fieldIndex: Int, value: Binary, ctype: DecimalType): Unit = {
+    var decimal = current(fieldIndex).asInstanceOf[Decimal]
+    if (decimal == null) {
+      decimal = new Decimal
+      current(fieldIndex) = decimal
+    }
+    readDecimal(decimal, value, ctype)
+  }
 }
 
 /**
@@ -378,7 +428,7 @@ private[parquet] class CatalystPrimitiveConverter(
     parent.updateLong(fieldIndex, value)
 }
 
-object CatalystArrayConverter {
+private[parquet] object CatalystArrayConverter {
   val INITIAL_ARRAY_SIZE = 20
 }
 
@@ -486,7 +536,7 @@ private[parquet] class CatalystNativeArrayConverter(
   override protected[parquet] def updateField(fieldIndex: Int, value: Any): Unit =
     throw new UnsupportedOperationException
 
-  // Overriden here to avoid auto-boxing for primitive types
+  // Overridden here to avoid auto-boxing for primitive types
   override protected[parquet] def updateBoolean(fieldIndex: Int, value: Boolean): Unit = {
     checkGrowBuffer()
     buffer(elements) = value.asInstanceOf[NativeType]
@@ -567,6 +617,85 @@ private[parquet] class CatalystNativeArrayConverter(
   }
 }
 
+/**
+ * A `parquet.io.api.GroupConverter` that converts a single-element groups that
+ * match the characteristics of an array contains null (see
+ * [[org.apache.spark.sql.parquet.ParquetTypesConverter]]) into an
+ * [[org.apache.spark.sql.catalyst.types.ArrayType]].
+ *
+ * @param elementType The type of the array elements (complex or primitive)
+ * @param index The position of this (array) field inside its parent converter
+ * @param parent The parent converter
+ * @param buffer A data buffer
+ */
+private[parquet] class CatalystArrayContainsNullConverter(
+    val elementType: DataType,
+    val index: Int,
+    protected[parquet] val parent: CatalystConverter,
+    protected[parquet] var buffer: Buffer[Any])
+  extends CatalystConverter {
+
+  def this(elementType: DataType, index: Int, parent: CatalystConverter) =
+    this(
+      elementType,
+      index,
+      parent,
+      new ArrayBuffer[Any](CatalystArrayConverter.INITIAL_ARRAY_SIZE))
+
+  protected[parquet] val converter: Converter = new CatalystConverter {
+
+    private var current: Any = null
+
+    val converter = CatalystConverter.createConverter(
+      new CatalystConverter.FieldType(
+        CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME,
+        elementType,
+        false),
+      fieldIndex = 0,
+      parent = this)
+
+    override def getConverter(fieldIndex: Int): Converter = converter
+
+    override def end(): Unit = parent.updateField(index, current)
+
+    override def start(): Unit = {
+      current = null
+    }
+
+    override protected[parquet] val size: Int = 1
+    override protected[parquet] val index: Int = 0
+    override protected[parquet] val parent = CatalystArrayContainsNullConverter.this
+
+    override protected[parquet] def updateField(fieldIndex: Int, value: Any): Unit = {
+      current = value
+    }
+
+    override protected[parquet] def clearBuffer(): Unit = {}
+  }
+
+  override def getConverter(fieldIndex: Int): Converter = converter
+
+  // arrays have only one (repeated) field, which is its elements
+  override val size = 1
+
+  override protected[parquet] def updateField(fieldIndex: Int, value: Any): Unit = {
+    buffer += value
+  }
+
+  override protected[parquet] def clearBuffer(): Unit = {
+    buffer.clear()
+  }
+
+  override def start(): Unit = {}
+
+  override def end(): Unit = {
+    assert(parent != null)
+    // here we need to make sure to use ArrayScalaType
+    parent.updateField(index, buffer.toArray.toSeq)
+    clearBuffer()
+  }
+}
+
 /**
  * This converter is for multi-element groups of primitive or complex types
  * that have repetition level optional or required (so struct fields).
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
index cc575bedd8fcb..9a3f6d388d621 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
@@ -17,30 +17,39 @@
 
 package org.apache.spark.sql.parquet
 
+import java.nio.ByteBuffer
+import java.sql.{Date, Timestamp}
+
 import org.apache.hadoop.conf.Configuration
 
-import parquet.filter._
-import parquet.filter.ColumnPredicates._
+import parquet.common.schema.ColumnPath
+import parquet.filter2.compat.FilterCompat
+import parquet.filter2.compat.FilterCompat._
+import parquet.filter2.predicate.Operators.{Column, SupportsLtGt}
+import parquet.filter2.predicate.{FilterApi, FilterPredicate}
+import parquet.filter2.predicate.FilterApi._
+import parquet.io.api.Binary
 import parquet.column.ColumnReader
 
 import com.google.common.io.BaseEncoding
 
+import org.apache.spark.SparkEnv
 import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
 import org.apache.spark.sql.catalyst.expressions.{Predicate => CatalystPredicate}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.SparkSqlSerializer
+import org.apache.spark.sql.parquet.ParquetColumns._
 
-object ParquetFilters {
+private[sql] object ParquetFilters {
   val PARQUET_FILTER_DATA = "org.apache.spark.sql.parquet.row.filter"
-  // set this to false if pushdown should be disabled
-  val PARQUET_FILTER_PUSHDOWN_ENABLED = "spark.sql.hints.parquetFilterPushdown"
 
-  def createRecordFilter(filterExpressions: Seq[Expression]): UnboundRecordFilter = {
+  def createRecordFilter(filterExpressions: Seq[Expression]): Filter = {
     val filters: Seq[CatalystFilter] = filterExpressions.collect {
       case (expression: Expression) if createFilter(expression).isDefined =>
         createFilter(expression).get
     }
-    if (filters.length > 0) filters.reduce(AndRecordFilter.and) else null
+    if (filters.length > 0) FilterCompat.get(filters.reduce(FilterApi.and)) else null
   }
 
   def createFilter(expression: Expression): Option[CatalystFilter] = {
@@ -49,78 +58,188 @@ object ParquetFilters {
         literal: Literal,
         predicate: CatalystPredicate) = literal.dataType match {
       case BooleanType =>
-        ComparisonFilter.createBooleanFilter(name, literal.value.asInstanceOf[Boolean], predicate)
+        ComparisonFilter.createBooleanEqualityFilter(
+          name, 
+          literal.value.asInstanceOf[Boolean],
+          predicate)
+      case ByteType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.eq(byteColumn(name), literal.value.asInstanceOf[java.lang.Byte]),
+          predicate)
+      case ShortType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.eq(shortColumn(name), literal.value.asInstanceOf[java.lang.Short]),
+          predicate)
       case IntegerType =>
-        ComparisonFilter.createIntFilter(
+        new ComparisonFilter(
           name,
-          (x: Int) => x == literal.value.asInstanceOf[Int],
+          FilterApi.eq(intColumn(name), literal.value.asInstanceOf[Integer]),
           predicate)
       case LongType =>
-        ComparisonFilter.createLongFilter(
+        new ComparisonFilter(
           name,
-          (x: Long) => x == literal.value.asInstanceOf[Long],
+          FilterApi.eq(longColumn(name), literal.value.asInstanceOf[java.lang.Long]),
           predicate)
       case DoubleType =>
-        ComparisonFilter.createDoubleFilter(
+        new ComparisonFilter(
           name,
-          (x: Double) => x == literal.value.asInstanceOf[Double],
+          FilterApi.eq(doubleColumn(name), literal.value.asInstanceOf[java.lang.Double]),
           predicate)
       case FloatType =>
-        ComparisonFilter.createFloatFilter(
+        new ComparisonFilter(
           name,
-          (x: Float) => x == literal.value.asInstanceOf[Float],
+          FilterApi.eq(floatColumn(name), literal.value.asInstanceOf[java.lang.Float]),
           predicate)
       case StringType =>
-        ComparisonFilter.createStringFilter(name, literal.value.asInstanceOf[String], predicate)
+        ComparisonFilter.createStringEqualityFilter(
+          name, 
+          literal.value.asInstanceOf[String], 
+          predicate)
+      case BinaryType =>
+        ComparisonFilter.createBinaryEqualityFilter(
+          name,
+          literal.value.asInstanceOf[Array[Byte]],
+          predicate)
+      case DateType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.eq(dateColumn(name), new WrappedDate(literal.value.asInstanceOf[Date])),
+          predicate)
+      case TimestampType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.eq(timestampColumn(name),
+            new WrappedTimestamp(literal.value.asInstanceOf[Timestamp])),
+          predicate)
+      case DecimalType.Unlimited =>
+        new ComparisonFilter(
+          name,
+          FilterApi.eq(decimalColumn(name), literal.value.asInstanceOf[Decimal]),
+          predicate)
     }
+
     def createLessThanFilter(
         name: String,
         literal: Literal,
         predicate: CatalystPredicate) = literal.dataType match {
-      case IntegerType =>
-        ComparisonFilter.createIntFilter(
+      case ByteType =>
+        new ComparisonFilter(
           name,
-          (x: Int) => x < literal.value.asInstanceOf[Int],
+          FilterApi.lt(byteColumn(name), literal.value.asInstanceOf[java.lang.Byte]),
+          predicate)
+      case ShortType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.lt(shortColumn(name), literal.value.asInstanceOf[java.lang.Short]),
+          predicate)
+      case IntegerType =>
+        new ComparisonFilter(
+          name, 
+          FilterApi.lt(intColumn(name), literal.value.asInstanceOf[Integer]),
           predicate)
       case LongType =>
-        ComparisonFilter.createLongFilter(
+        new ComparisonFilter(
           name,
-          (x: Long) => x < literal.value.asInstanceOf[Long],
+          FilterApi.lt(longColumn(name), literal.value.asInstanceOf[java.lang.Long]),
           predicate)
       case DoubleType =>
-        ComparisonFilter.createDoubleFilter(
+        new ComparisonFilter(
           name,
-          (x: Double) => x < literal.value.asInstanceOf[Double],
+          FilterApi.lt(doubleColumn(name), literal.value.asInstanceOf[java.lang.Double]),
           predicate)
       case FloatType =>
-        ComparisonFilter.createFloatFilter(
+        new ComparisonFilter(
           name,
-          (x: Float) => x < literal.value.asInstanceOf[Float],
+          FilterApi.lt(floatColumn(name), literal.value.asInstanceOf[java.lang.Float]),
+          predicate)
+      case StringType =>
+        ComparisonFilter.createStringLessThanFilter(
+          name,
+          literal.value.asInstanceOf[String],
+          predicate)
+      case BinaryType =>
+        ComparisonFilter.createBinaryLessThanFilter(
+          name,
+          literal.value.asInstanceOf[Array[Byte]],
+          predicate)
+      case DateType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.lt(dateColumn(name), new WrappedDate(literal.value.asInstanceOf[Date])),
+          predicate)
+      case TimestampType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.lt(timestampColumn(name),
+            new WrappedTimestamp(literal.value.asInstanceOf[Timestamp])),
+          predicate)
+      case DecimalType.Unlimited =>
+        new ComparisonFilter(
+          name,
+          FilterApi.lt(decimalColumn(name), literal.value.asInstanceOf[Decimal]),
           predicate)
     }
     def createLessThanOrEqualFilter(
         name: String,
         literal: Literal,
         predicate: CatalystPredicate) = literal.dataType match {
+      case ByteType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.ltEq(byteColumn(name), literal.value.asInstanceOf[java.lang.Byte]),
+          predicate)
+      case ShortType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.ltEq(shortColumn(name), literal.value.asInstanceOf[java.lang.Short]),
+          predicate)
       case IntegerType =>
-        ComparisonFilter.createIntFilter(
+        new ComparisonFilter(
           name,
-          (x: Int) => x <= literal.value.asInstanceOf[Int],
+          FilterApi.ltEq(intColumn(name), literal.value.asInstanceOf[Integer]),
           predicate)
       case LongType =>
-        ComparisonFilter.createLongFilter(
+        new ComparisonFilter(
           name,
-          (x: Long) => x <= literal.value.asInstanceOf[Long],
+          FilterApi.ltEq(longColumn(name), literal.value.asInstanceOf[java.lang.Long]),
           predicate)
       case DoubleType =>
-        ComparisonFilter.createDoubleFilter(
+        new ComparisonFilter(
           name,
-          (x: Double) => x <= literal.value.asInstanceOf[Double],
+          FilterApi.ltEq(doubleColumn(name), literal.value.asInstanceOf[java.lang.Double]),
           predicate)
       case FloatType =>
-        ComparisonFilter.createFloatFilter(
+        new ComparisonFilter(
+          name,
+          FilterApi.ltEq(floatColumn(name), literal.value.asInstanceOf[java.lang.Float]),
+          predicate)
+      case StringType =>
+        ComparisonFilter.createStringLessThanOrEqualFilter(
+          name,
+          literal.value.asInstanceOf[String],
+          predicate)
+      case BinaryType =>
+        ComparisonFilter.createBinaryLessThanOrEqualFilter(
+          name,
+          literal.value.asInstanceOf[Array[Byte]],
+          predicate)
+      case DateType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.ltEq(dateColumn(name), new WrappedDate(literal.value.asInstanceOf[Date])),
+          predicate)
+      case TimestampType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.ltEq(timestampColumn(name),
+            new WrappedTimestamp(literal.value.asInstanceOf[Timestamp])),
+          predicate)
+      case DecimalType.Unlimited =>
+        new ComparisonFilter(
           name,
-          (x: Float) => x <= literal.value.asInstanceOf[Float],
+          FilterApi.ltEq(decimalColumn(name), literal.value.asInstanceOf[Decimal]),
           predicate)
     }
     // TODO: combine these two types somehow?
@@ -128,49 +247,122 @@ object ParquetFilters {
         name: String,
         literal: Literal,
         predicate: CatalystPredicate) = literal.dataType match {
+      case ByteType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.gt(byteColumn(name), literal.value.asInstanceOf[java.lang.Byte]),
+          predicate)
+      case ShortType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.gt(shortColumn(name), literal.value.asInstanceOf[java.lang.Short]),
+          predicate)
       case IntegerType =>
-        ComparisonFilter.createIntFilter(
+        new ComparisonFilter(
           name,
-          (x: Int) => x > literal.value.asInstanceOf[Int],
+          FilterApi.gt(intColumn(name), literal.value.asInstanceOf[Integer]),
           predicate)
       case LongType =>
-        ComparisonFilter.createLongFilter(
+        new ComparisonFilter(
           name,
-          (x: Long) => x > literal.value.asInstanceOf[Long],
+          FilterApi.gt(longColumn(name), literal.value.asInstanceOf[java.lang.Long]),
           predicate)
       case DoubleType =>
-        ComparisonFilter.createDoubleFilter(
+        new ComparisonFilter(
           name,
-          (x: Double) => x > literal.value.asInstanceOf[Double],
+          FilterApi.gt(doubleColumn(name), literal.value.asInstanceOf[java.lang.Double]),
           predicate)
       case FloatType =>
-        ComparisonFilter.createFloatFilter(
+        new ComparisonFilter(
           name,
-          (x: Float) => x > literal.value.asInstanceOf[Float],
+          FilterApi.gt(floatColumn(name), literal.value.asInstanceOf[java.lang.Float]),
+          predicate)
+      case StringType =>
+        ComparisonFilter.createStringGreaterThanFilter(
+          name,
+          literal.value.asInstanceOf[String],
+          predicate)
+      case BinaryType =>
+        ComparisonFilter.createBinaryGreaterThanFilter(
+          name,
+          literal.value.asInstanceOf[Array[Byte]],
+          predicate)
+      case DateType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.gt(dateColumn(name), new WrappedDate(literal.value.asInstanceOf[Date])),
+          predicate)
+      case TimestampType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.gt(timestampColumn(name),
+            new WrappedTimestamp(literal.value.asInstanceOf[Timestamp])),
+          predicate)
+      case DecimalType.Unlimited =>
+        new ComparisonFilter(
+          name,
+          FilterApi.gt(decimalColumn(name), literal.value.asInstanceOf[Decimal]),
           predicate)
     }
     def createGreaterThanOrEqualFilter(
         name: String,
         literal: Literal,
         predicate: CatalystPredicate) = literal.dataType match {
+      case ByteType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.gtEq(byteColumn(name), literal.value.asInstanceOf[java.lang.Byte]),
+          predicate)
+      case ShortType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.gtEq(shortColumn(name), literal.value.asInstanceOf[java.lang.Short]),
+          predicate)
       case IntegerType =>
-        ComparisonFilter.createIntFilter(
-          name, (x: Int) => x >= literal.value.asInstanceOf[Int],
+        new ComparisonFilter(
+          name,
+          FilterApi.gtEq(intColumn(name), literal.value.asInstanceOf[Integer]),
           predicate)
       case LongType =>
-        ComparisonFilter.createLongFilter(
+        new ComparisonFilter(
           name,
-          (x: Long) => x >= literal.value.asInstanceOf[Long],
+          FilterApi.gtEq(longColumn(name), literal.value.asInstanceOf[java.lang.Long]),
           predicate)
       case DoubleType =>
-        ComparisonFilter.createDoubleFilter(
+        new ComparisonFilter(
           name,
-          (x: Double) => x >= literal.value.asInstanceOf[Double],
+          FilterApi.gtEq(doubleColumn(name), literal.value.asInstanceOf[java.lang.Double]),
           predicate)
       case FloatType =>
-        ComparisonFilter.createFloatFilter(
+        new ComparisonFilter(
+          name,
+          FilterApi.gtEq(floatColumn(name), literal.value.asInstanceOf[java.lang.Float]),
+          predicate)
+      case StringType =>
+        ComparisonFilter.createStringGreaterThanOrEqualFilter(
           name,
-          (x: Float) => x >= literal.value.asInstanceOf[Float],
+          literal.value.asInstanceOf[String],
+          predicate)
+      case BinaryType =>
+        ComparisonFilter.createBinaryGreaterThanOrEqualFilter(
+          name,
+          literal.value.asInstanceOf[Array[Byte]],
+          predicate)
+      case DateType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.gtEq(dateColumn(name), new WrappedDate(literal.value.asInstanceOf[Date])),
+          predicate)
+      case TimestampType =>
+        new ComparisonFilter(
+          name,
+          FilterApi.gtEq(timestampColumn(name),
+            new WrappedTimestamp(literal.value.asInstanceOf[Timestamp])),
+          predicate)
+      case DecimalType.Unlimited =>
+        new ComparisonFilter(
+          name,
+          FilterApi.gtEq(decimalColumn(name), literal.value.asInstanceOf[Decimal]),
           predicate)
     }
 
@@ -201,29 +393,30 @@ object ParquetFilters {
         (leftFilter, rightFilter) match {
           case (None, Some(filter)) => Some(filter)
           case (Some(filter), None) => Some(filter)
-          case (_, _) =>
-            Some(new AndFilter(leftFilter.get, rightFilter.get))
+          case (Some(leftF), Some(rightF)) =>
+            Some(new AndFilter(leftF, rightF))
+          case _ => None
         }
       }
-      case p @ EqualTo(left: Literal, right: NamedExpression) if !right.nullable =>
+      case p @ EqualTo(left: Literal, right: NamedExpression) if left.dataType != NullType =>
         Some(createEqualityFilter(right.name, left, p))
-      case p @ EqualTo(left: NamedExpression, right: Literal) if !left.nullable =>
+      case p @ EqualTo(left: NamedExpression, right: Literal) if right.dataType != NullType =>
         Some(createEqualityFilter(left.name, right, p))
-      case p @ LessThan(left: Literal, right: NamedExpression) if !right.nullable =>
+      case p @ LessThan(left: Literal, right: NamedExpression) =>
         Some(createLessThanFilter(right.name, left, p))
-      case p @ LessThan(left: NamedExpression, right: Literal) if !left.nullable =>
+      case p @ LessThan(left: NamedExpression, right: Literal) =>
         Some(createLessThanFilter(left.name, right, p))
-      case p @ LessThanOrEqual(left: Literal, right: NamedExpression) if !right.nullable =>
+      case p @ LessThanOrEqual(left: Literal, right: NamedExpression) =>
         Some(createLessThanOrEqualFilter(right.name, left, p))
-      case p @ LessThanOrEqual(left: NamedExpression, right: Literal) if !left.nullable =>
+      case p @ LessThanOrEqual(left: NamedExpression, right: Literal) =>
         Some(createLessThanOrEqualFilter(left.name, right, p))
-      case p @ GreaterThan(left: Literal, right: NamedExpression) if !right.nullable =>
+      case p @ GreaterThan(left: Literal, right: NamedExpression) =>
         Some(createGreaterThanFilter(right.name, left, p))
-      case p @ GreaterThan(left: NamedExpression, right: Literal) if !left.nullable =>
+      case p @ GreaterThan(left: NamedExpression, right: Literal) =>
         Some(createGreaterThanFilter(left.name, right, p))
-      case p @ GreaterThanOrEqual(left: Literal, right: NamedExpression) if !right.nullable =>
+      case p @ GreaterThanOrEqual(left: Literal, right: NamedExpression) =>
         Some(createGreaterThanOrEqualFilter(right.name, left, p))
-      case p @ GreaterThanOrEqual(left: NamedExpression, right: Literal) if !left.nullable =>
+      case p @ GreaterThanOrEqual(left: NamedExpression, right: Literal) =>
         Some(createGreaterThanOrEqualFilter(left.name, right, p))
       case _ => None
     }
@@ -236,7 +429,8 @@ object ParquetFilters {
    */
   def serializeFilterExpressions(filters: Seq[Expression], conf: Configuration): Unit = {
     if (filters.length > 0) {
-      val serialized: Array[Byte] = SparkSqlSerializer.serialize(filters)
+      val serialized: Array[Byte] =
+        SparkEnv.get.closureSerializer.newInstance().serialize(filters).array()
       val encoded: String = BaseEncoding.base64().encode(serialized)
       conf.set(PARQUET_FILTER_DATA, encoded)
     }
@@ -251,7 +445,7 @@ object ParquetFilters {
     val data = conf.get(PARQUET_FILTER_DATA)
     if (data != null) {
       val decoded: Array[Byte] = BaseEncoding.base64().decode(data)
-      SparkSqlSerializer.deserialize(decoded)
+      SparkEnv.get.closureSerializer.newInstance().deserialize(ByteBuffer.wrap(decoded))
     } else {
       Seq()
     }
@@ -295,142 +489,206 @@ object ParquetFilters {
 }
 
 abstract private[parquet] class CatalystFilter(
-    @transient val predicate: CatalystPredicate) extends UnboundRecordFilter
+    @transient val predicate: CatalystPredicate) extends FilterPredicate
 
 private[parquet] case class ComparisonFilter(
     val columnName: String,
-    private var filter: UnboundRecordFilter,
+    private var filter: FilterPredicate,
     @transient override val predicate: CatalystPredicate)
   extends CatalystFilter(predicate) {
-  override def bind(readers: java.lang.Iterable[ColumnReader]): RecordFilter = {
-    filter.bind(readers)
+  override def accept[R](visitor: FilterPredicate.Visitor[R]): R = {
+    filter.accept(visitor)
   }
 }
 
 private[parquet] case class OrFilter(
-    private var filter: UnboundRecordFilter,
+    private var filter: FilterPredicate,
     @transient val left: CatalystFilter,
     @transient val right: CatalystFilter,
     @transient override val predicate: Or)
   extends CatalystFilter(predicate) {
   def this(l: CatalystFilter, r: CatalystFilter) =
     this(
-      OrRecordFilter.or(l, r),
+      FilterApi.or(l, r),
       l,
       r,
       Or(l.predicate, r.predicate))
 
-  override def bind(readers: java.lang.Iterable[ColumnReader]): RecordFilter = {
-    filter.bind(readers)
+  override def accept[R](visitor: FilterPredicate.Visitor[R]): R  = {
+    filter.accept(visitor);
   }
+
 }
 
 private[parquet] case class AndFilter(
-    private var filter: UnboundRecordFilter,
+    private var filter: FilterPredicate,
     @transient val left: CatalystFilter,
     @transient val right: CatalystFilter,
     @transient override val predicate: And)
   extends CatalystFilter(predicate) {
   def this(l: CatalystFilter, r: CatalystFilter) =
     this(
-      AndRecordFilter.and(l, r),
+      FilterApi.and(l, r),
       l,
       r,
       And(l.predicate, r.predicate))
 
-  override def bind(readers: java.lang.Iterable[ColumnReader]): RecordFilter = {
-    filter.bind(readers)
+  override def accept[R](visitor: FilterPredicate.Visitor[R]): R = {
+    filter.accept(visitor);
   }
+
 }
 
 private[parquet] object ComparisonFilter {
-  def createBooleanFilter(
+  def createBooleanEqualityFilter(
       columnName: String,
       value: Boolean,
       predicate: CatalystPredicate): CatalystFilter =
     new ComparisonFilter(
       columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToBoolean(
-          new BooleanPredicateFunction {
-            def functionToApply(input: Boolean): Boolean = input == value
-          }
-      )),
+      FilterApi.eq(booleanColumn(columnName), value.asInstanceOf[java.lang.Boolean]),
+      predicate)
+
+  def createStringEqualityFilter(
+      columnName: String,
+      value: String,
+      predicate: CatalystPredicate): CatalystFilter =
+    new ComparisonFilter(
+      columnName,
+      FilterApi.eq(binaryColumn(columnName), Binary.fromString(value)),
+      predicate)
+
+  def createStringLessThanFilter(
+      columnName: String,
+      value: String,
+      predicate: CatalystPredicate): CatalystFilter =
+    new ComparisonFilter(
+      columnName,
+      FilterApi.lt(binaryColumn(columnName), Binary.fromString(value)),
+      predicate)
+
+  def createStringLessThanOrEqualFilter(
+      columnName: String,
+      value: String,
+      predicate: CatalystPredicate): CatalystFilter =
+    new ComparisonFilter(
+      columnName,
+      FilterApi.ltEq(binaryColumn(columnName), Binary.fromString(value)),
+      predicate)
+
+  def createStringGreaterThanFilter(
+      columnName: String,
+      value: String,
+      predicate: CatalystPredicate): CatalystFilter =
+    new ComparisonFilter(
+      columnName,
+      FilterApi.gt(binaryColumn(columnName), Binary.fromString(value)),
       predicate)
 
-  def createStringFilter(
+  def createStringGreaterThanOrEqualFilter(
       columnName: String,
       value: String,
       predicate: CatalystPredicate): CatalystFilter =
     new ComparisonFilter(
       columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToString (
-          new ColumnPredicates.PredicateFunction[String]  {
-            def functionToApply(input: String): Boolean = input == value
-          }
-      )),
+      FilterApi.gtEq(binaryColumn(columnName), Binary.fromString(value)),
+      predicate)
+
+  def createBinaryEqualityFilter(
+      columnName: String,
+      value: Array[Byte],
+      predicate: CatalystPredicate): CatalystFilter =
+    new ComparisonFilter(
+      columnName,
+      FilterApi.eq(binaryColumn(columnName), Binary.fromByteArray(value)),
       predicate)
 
-  def createIntFilter(
+  def createBinaryLessThanFilter(
       columnName: String,
-      func: Int => Boolean,
+      value: Array[Byte],
       predicate: CatalystPredicate): CatalystFilter =
     new ComparisonFilter(
       columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToInteger(
-          new IntegerPredicateFunction {
-            def functionToApply(input: Int) = func(input)
-          }
-      )),
+      FilterApi.lt(binaryColumn(columnName), Binary.fromByteArray(value)),
       predicate)
 
-  def createLongFilter(
+  def createBinaryLessThanOrEqualFilter(
       columnName: String,
-      func: Long => Boolean,
+      value: Array[Byte],
       predicate: CatalystPredicate): CatalystFilter =
     new ComparisonFilter(
       columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToLong(
-          new LongPredicateFunction {
-            def functionToApply(input: Long) = func(input)
-          }
-      )),
+      FilterApi.ltEq(binaryColumn(columnName), Binary.fromByteArray(value)),
       predicate)
 
-  def createDoubleFilter(
+  def createBinaryGreaterThanFilter(
       columnName: String,
-      func: Double => Boolean,
+      value: Array[Byte],
       predicate: CatalystPredicate): CatalystFilter =
     new ComparisonFilter(
       columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToDouble(
-          new DoublePredicateFunction {
-            def functionToApply(input: Double) = func(input)
-          }
-      )),
+      FilterApi.gt(binaryColumn(columnName), Binary.fromByteArray(value)),
       predicate)
 
-  def createFloatFilter(
+  def createBinaryGreaterThanOrEqualFilter(
       columnName: String,
-      func: Float => Boolean,
+      value: Array[Byte],
       predicate: CatalystPredicate): CatalystFilter =
     new ComparisonFilter(
       columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToFloat(
-          new FloatPredicateFunction {
-            def functionToApply(input: Float) = func(input)
-          }
-      )),
+      FilterApi.gtEq(binaryColumn(columnName), Binary.fromByteArray(value)),
       predicate)
 }
+
+private[spark] object ParquetColumns {
+
+  def byteColumn(columnPath: String): ByteColumn = {
+    new ByteColumn(ColumnPath.fromDotString(columnPath))
+  }
+
+  final class ByteColumn(columnPath: ColumnPath)
+    extends Column[java.lang.Byte](columnPath, classOf[java.lang.Byte]) with SupportsLtGt
+
+  def shortColumn(columnPath: String): ShortColumn = {
+    new ShortColumn(ColumnPath.fromDotString(columnPath))
+  }
+
+  final class ShortColumn(columnPath: ColumnPath)
+    extends Column[java.lang.Short](columnPath, classOf[java.lang.Short]) with SupportsLtGt
+
+
+  def dateColumn(columnPath: String): DateColumn = {
+    new DateColumn(ColumnPath.fromDotString(columnPath))
+  }
+
+  final class DateColumn(columnPath: ColumnPath)
+    extends Column[WrappedDate](columnPath, classOf[WrappedDate]) with SupportsLtGt
+
+  def timestampColumn(columnPath: String): TimestampColumn = {
+    new TimestampColumn(ColumnPath.fromDotString(columnPath))
+  }
+
+  final class TimestampColumn(columnPath: ColumnPath)
+    extends Column[WrappedTimestamp](columnPath, classOf[WrappedTimestamp]) with SupportsLtGt
+
+  def decimalColumn(columnPath: String): DecimalColumn = {
+    new DecimalColumn(ColumnPath.fromDotString(columnPath))
+  }
+
+  final class DecimalColumn(columnPath: ColumnPath)
+    extends Column[Decimal](columnPath, classOf[Decimal]) with SupportsLtGt
+
+  final class WrappedDate(val date: Date) extends Comparable[WrappedDate] {
+
+    override def compareTo(other: WrappedDate): Int = {
+      date.compareTo(other.date)
+    }
+  }
+
+  final class WrappedTimestamp(val timestamp: Timestamp) extends Comparable[WrappedTimestamp] {
+
+    override def compareTo(other: WrappedTimestamp): Int = {
+      timestamp.compareTo(other.timestamp)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
index 9c4771d1a9846..b237a07c72d07 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
@@ -22,14 +22,14 @@ import java.io.IOException
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.fs.permission.FsAction
-
 import parquet.hadoop.ParquetOutputFormat
 import parquet.hadoop.metadata.CompressionCodecName
 import parquet.schema.MessageType
 
+import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, UnresolvedException}
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, LeafNode}
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 
 /**
  * Relation that consists of data stored in a Parquet columnar format.
@@ -45,7 +45,10 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, LeafNode}
  */
 private[sql] case class ParquetRelation(
     path: String,
-    @transient conf: Option[Configuration] = None) extends LeafNode with MultiInstanceRelation {
+    @transient conf: Option[Configuration],
+    @transient sqlContext: SQLContext,
+    partitioningAttributes: Seq[Attribute] = Nil)
+  extends LeafNode with MultiInstanceRelation {
 
   self: Product =>
 
@@ -57,9 +60,14 @@ private[sql] case class ParquetRelation(
       .getSchema
 
   /** Attributes */
-  override val output = ParquetTypesConverter.readSchemaFromFile(new Path(path), conf)
+  override val output =
+    partitioningAttributes ++
+    ParquetTypesConverter.readSchemaFromFile(
+      new Path(path.split(",").head),
+      conf,
+      sqlContext.isParquetBinaryAsString)
 
-  override def newInstance = ParquetRelation(path).asInstanceOf[this.type]
+  override def newInstance() = ParquetRelation(path, conf, sqlContext).asInstanceOf[this.type]
 
   // Equals must also take into account the output attributes so that we can distinguish between
   // different instances of the same relation,
@@ -68,11 +76,29 @@ private[sql] case class ParquetRelation(
       p.path == path && p.output == output
     case _ => false
   }
+
+  // TODO: Use data from the footers.
+  override lazy val statistics = Statistics(sizeInBytes = sqlContext.defaultSizeInBytes)
 }
 
 private[sql] object ParquetRelation {
 
   def enableLogForwarding() {
+    // Note: the parquet.Log class has a static initializer that
+    // sets the java.util.logging Logger for "parquet". This
+    // checks first to see if there's any handlers already set
+    // and if not it creates them. If this method executes prior
+    // to that class being loaded then:
+    //  1) there's no handlers installed so there's none to 
+    // remove. But when it IS finally loaded the desired affect
+    // of removing them is circumvented.
+    //  2) The parquet.Log static initializer calls setUseParentHanders(false)
+    // undoing the attempt to override the logging here.
+    //
+    // Therefore we need to force the class to be loaded.
+    // This should really be resolved by Parquet.
+    Class.forName(classOf[parquet.Log].getName())
+
     // Note: Logger.getLogger("parquet") has a default logger
     // that appends to Console which needs to be cleared.
     val parquetLogger = java.util.logging.Logger.getLogger("parquet")
@@ -88,8 +114,13 @@ private[sql] object ParquetRelation {
   // The compression type
   type CompressionType = parquet.hadoop.metadata.CompressionCodecName
 
-  // The default compression
-  val defaultCompression = CompressionCodecName.GZIP
+  // The parquet compression short names
+  val shortParquetCompressionCodecNames = Map(
+    "NONE"         -> CompressionCodecName.UNCOMPRESSED,
+    "UNCOMPRESSED" -> CompressionCodecName.UNCOMPRESSED,
+    "SNAPPY"       -> CompressionCodecName.SNAPPY,
+    "GZIP"         -> CompressionCodecName.GZIP,
+    "LZO"          -> CompressionCodecName.LZO)
 
   /**
    * Creates a new ParquetRelation and underlying Parquetfile for the given LogicalPlan. Note that
@@ -104,13 +135,14 @@ private[sql] object ParquetRelation {
    */
   def create(pathString: String,
              child: LogicalPlan,
-             conf: Configuration): ParquetRelation = {
+             conf: Configuration,
+             sqlContext: SQLContext): ParquetRelation = {
     if (!child.resolved) {
       throw new UnresolvedException[LogicalPlan](
         child,
         "Attempt to create Parquet table from unresolved child (when schema is not available)")
     }
-    createEmpty(pathString, child.output, false, conf)
+    createEmpty(pathString, child.output, false, conf, sqlContext)
   }
 
   /**
@@ -125,14 +157,14 @@ private[sql] object ParquetRelation {
   def createEmpty(pathString: String,
                   attributes: Seq[Attribute],
                   allowExisting: Boolean,
-                  conf: Configuration): ParquetRelation = {
+                  conf: Configuration,
+                  sqlContext: SQLContext): ParquetRelation = {
     val path = checkPath(pathString, allowExisting, conf)
-    if (conf.get(ParquetOutputFormat.COMPRESSION) == null) {
-      conf.set(ParquetOutputFormat.COMPRESSION, ParquetRelation.defaultCompression.name())
-    }
+    conf.set(ParquetOutputFormat.COMPRESSION, shortParquetCompressionCodecNames.getOrElse(
+      sqlContext.parquetCompressionCodec.toUpperCase, CompressionCodecName.UNCOMPRESSED).name())
     ParquetRelation.enableLogForwarding()
     ParquetTypesConverter.writeMetaData(attributes, path, conf)
-    new ParquetRelation(path.toString, Some(conf)) {
+    new ParquetRelation(path.toString, Some(conf), sqlContext) {
       override val output = attributes
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
index ade823b51c9cd..f6bed5016fbfb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -18,58 +18,91 @@
 package org.apache.spark.sql.parquet
 
 import java.io.IOException
+import java.lang.{Long => JLong}
 import java.text.SimpleDateFormat
-import java.util.Date
+import java.util.concurrent.{Callable, TimeUnit}
+import java.util.{ArrayList, Collections, Date, List => JList}
 
+import org.apache.spark.annotation.DeveloperApi
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable
+import scala.util.Try
+
+import com.google.common.cache.CacheBuilder
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{BlockLocation, FileStatus, Path}
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat}
-import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat, FileOutputCommitter}
+import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat}
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
 
-import parquet.hadoop.{ParquetRecordReader, ParquetInputFormat, ParquetOutputFormat}
-import parquet.hadoop.api.ReadSupport
+import parquet.hadoop._
+import parquet.hadoop.api.{InitContext, ReadSupport}
+import parquet.hadoop.metadata.GlobalMetaData
+import parquet.hadoop.api.ReadSupport.ReadContext
 import parquet.hadoop.util.ContextUtil
-import parquet.io.InvalidRecordException
+import parquet.io.ParquetDecodingException
 import parquet.schema.MessageType
 
-import org.apache.spark.{Logging, SerializableWritable, TaskContext}
+import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.SQLConf
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, Row}
 import org.apache.spark.sql.execution.{LeafNode, SparkPlan, UnaryNode}
+import org.apache.spark.{Logging, SerializableWritable, TaskContext}
 
 /**
+ * :: DeveloperApi ::
  * Parquet table scan operator. Imports the file that backs the given
  * [[org.apache.spark.sql.parquet.ParquetRelation]] as a ``RDD[Row]``.
  */
 case class ParquetTableScan(
-    // note: output cannot be transient, see
-    // https://issues.apache.org/jira/browse/SPARK-1367
-    output: Seq[Attribute],
+    attributes: Seq[Attribute],
     relation: ParquetRelation,
-    columnPruningPred: Seq[Expression])(
-    @transient val sqlContext: SQLContext)
+    columnPruningPred: Seq[Expression])
   extends LeafNode {
 
+  // The resolution of Parquet attributes is case sensitive, so we resolve the original attributes
+  // by exprId. note: output cannot be transient, see
+  // https://issues.apache.org/jira/browse/SPARK-1367
+  val normalOutput =
+    attributes
+      .filterNot(a => relation.partitioningAttributes.map(_.exprId).contains(a.exprId))
+      .flatMap(a => relation.output.find(o => o.exprId == a.exprId))
+
+  val partOutput =
+    attributes.flatMap(a => relation.partitioningAttributes.find(o => o.exprId == a.exprId))
+
+  def output = partOutput ++ normalOutput
+
+  assert(normalOutput.size + partOutput.size == attributes.size,
+    s"$normalOutput + $partOutput != $attributes, ${relation.output}")
+
   override def execute(): RDD[Row] = {
+    import parquet.filter2.compat.FilterCompat.FilterPredicateCompat
+    import parquet.filter2.compat.FilterCompat.Filter
+    import parquet.filter2.predicate.FilterPredicate
+
     val sc = sqlContext.sparkContext
     val job = new Job(sc.hadoopConfiguration)
-    ParquetInputFormat.setReadSupportClass(
-      job,
-      classOf[org.apache.spark.sql.parquet.RowReadSupport])
+    ParquetInputFormat.setReadSupportClass(job, classOf[RowReadSupport])
+
     val conf: Configuration = ContextUtil.getConfiguration(job)
-    val fileList = FileSystemHelper.listFiles(relation.path, conf)
-    // add all paths in the directory but skip "hidden" ones such
-    // as "_SUCCESS" and "_metadata"
-    for (path <- fileList if !path.getName.startsWith("_")) {
-      NewFileInputFormat.addInputPath(job, path)
+
+    relation.path.split(",").foreach { curPath =>
+      val qualifiedPath = {
+        val path = new Path(curPath)
+        path.getFileSystem(conf).makeQualified(path)
+      }
+      NewFileInputFormat.addInputPath(job, qualifiedPath)
     }
 
     // Store both requested and original schema in `Configuration`
     conf.set(
       RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA,
-      ParquetTypesConverter.convertToString(output))
+      ParquetTypesConverter.convertToString(normalOutput))
     conf.set(
       RowWriteSupport.SPARK_ROW_SCHEMA,
       ParquetTypesConverter.convertToString(relation.output))
@@ -78,24 +111,57 @@ case class ParquetTableScan(
     // Note 1: the input format ignores all predicates that cannot be expressed
     // as simple column predicate filters in Parquet. Here we just record
     // the whole pruning predicate.
-    // Note 2: you can disable filter predicate pushdown by setting
-    // "spark.sql.hints.parquetFilterPushdown" to false inside SparkConf.
-    if (columnPruningPred.length > 0 &&
-      sc.conf.getBoolean(ParquetFilters.PARQUET_FILTER_PUSHDOWN_ENABLED, true)) {
-      ParquetFilters.serializeFilterExpressions(columnPruningPred, conf)
+    if (columnPruningPred.length > 0) {
+      // Set this in configuration of ParquetInputFormat, needed for RowGroupFiltering
+      val filter: Filter = ParquetFilters.createRecordFilter(columnPruningPred)
+      if (filter != null){
+        val filterPredicate = filter.asInstanceOf[FilterPredicateCompat].getFilterPredicate
+        ParquetInputFormat.setFilterPredicate(conf, filterPredicate)  
+      }
     }
 
-    sc.newAPIHadoopRDD(
-      conf,
-      classOf[org.apache.spark.sql.parquet.FilteringParquetRowInputFormat],
-      classOf[Void],
-      classOf[Row])
-      .map(_._2)
-      .filter(_ != null) // Parquet's record filters may produce null values
+    // Tell FilteringParquetRowInputFormat whether it's okay to cache Parquet and FS metadata
+    conf.set(
+      SQLConf.PARQUET_CACHE_METADATA,
+      sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "true"))
+
+    val baseRDD =
+      new org.apache.spark.rdd.NewHadoopRDD(
+        sc,
+        classOf[FilteringParquetRowInputFormat],
+        classOf[Void],
+        classOf[Row],
+        conf)
+
+    if (partOutput.nonEmpty) {
+      baseRDD.mapPartitionsWithInputSplit { case (split, iter) =>
+        val partValue = "([^=]+)=([^=]+)".r
+        val partValues =
+          split.asInstanceOf[parquet.hadoop.ParquetInputSplit]
+            .getPath
+            .toString
+            .split("/")
+            .flatMap {
+              case partValue(key, value) => Some(key -> value)
+              case _ => None
+            }.toMap
+
+        val partitionRowValues =
+          partOutput.map(a => Cast(Literal(partValues(a.name)), a.dataType).eval(EmptyRow))
+
+        new Iterator[Row] {
+          private[this] val joinedRow = new JoinedRow5(Row(partitionRowValues:_*), null)
+
+          def hasNext = iter.hasNext
+
+          def next() = joinedRow.withRight(iter.next()._2)
+        }
+      }
+    } else {
+      baseRDD.map(_._2)
+    }
   }
 
-  override def otherCopyArgs = sqlContext :: Nil
-
   /**
    * Applies a (candidate) projection.
    *
@@ -105,10 +171,9 @@ case class ParquetTableScan(
   def pruneColumns(prunedAttributes: Seq[Attribute]): ParquetTableScan = {
     val success = validateProjection(prunedAttributes)
     if (success) {
-      ParquetTableScan(prunedAttributes, relation, columnPruningPred)(sqlContext)
+      ParquetTableScan(prunedAttributes, relation, columnPruningPred)
     } else {
       sys.error("Warning: Could not validate Parquet schema projection in pruneColumns")
-      this
     }
   }
 
@@ -122,18 +187,12 @@ case class ParquetTableScan(
   private def validateProjection(projection: Seq[Attribute]): Boolean = {
     val original: MessageType = relation.parquetSchema
     val candidate: MessageType = ParquetTypesConverter.convertFromAttributes(projection)
-    try {
-      original.checkContains(candidate)
-      true
-    } catch {
-      case e: InvalidRecordException => {
-        false
-      }
-    }
+    Try(original.checkContains(candidate)).isSuccess
   }
 }
 
 /**
+ * :: DeveloperApi ::
  * Operator that acts as a sink for queries on RDDs and can be used to
  * store the output inside a directory of Parquet files. This operator
  * is similar to Hive's INSERT INTO TABLE operation in the sense that
@@ -149,11 +208,11 @@ case class ParquetTableScan(
  * cause unpredicted behaviour and therefore results in a RuntimeException
  * (only detected via filename pattern so will not catch all cases).
  */
+@DeveloperApi
 case class InsertIntoParquetTable(
     relation: ParquetRelation,
     child: SparkPlan,
-    overwrite: Boolean = false)(
-    @transient val sqlContext: SQLContext)
+    overwrite: Boolean = false)
   extends UnaryNode with SparkHadoopMapReduceUtil {
 
   /**
@@ -173,7 +232,7 @@ case class InsertIntoParquetTable(
 
     val writeSupport =
       if (child.output.map(_.dataType).forall(_.isPrimitive)) {
-        logger.debug("Initializing MutableRowWriteSupport")
+        log.debug("Initializing MutableRowWriteSupport")
         classOf[org.apache.spark.sql.parquet.MutableRowWriteSupport]
       } else {
         classOf[org.apache.spark.sql.parquet.RowWriteSupport]
@@ -205,8 +264,6 @@ case class InsertIntoParquetTable(
 
   override def output = child.output
 
-  override def otherCopyArgs = sqlContext :: Nil
-
   /**
    * Stores the given Row RDD as a Hadoop file.
    *
@@ -287,13 +344,21 @@ private[parquet] class AppendingParquetOutputFormat(offset: Int)
 
   // override to choose output filename so not overwrite existing ones
   override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
-    val taskId: TaskID = context.getTaskAttemptID.getTaskID
+    val taskId: TaskID = getTaskAttemptID(context).getTaskID
     val partition: Int = taskId.getId
     val filename = s"part-r-${partition + offset}.parquet"
     val committer: FileOutputCommitter =
       getOutputCommitter(context).asInstanceOf[FileOutputCommitter]
     new Path(committer.getWorkPath, filename)
   }
+
+  // The TaskAttemptContext is a class in hadoop-1 but is an interface in hadoop-2.
+  // The signatures of the method TaskAttemptContext.getTaskAttemptID for the both versions
+  // are the same, so the method calls are source-compatible but NOT binary-compatible because
+  // the opcode of method call for class is INVOKEVIRTUAL and for interface is INVOKEINTERFACE.
+  private def getTaskAttemptID(context: TaskAttemptContext): TaskAttemptID = {
+    context.getClass.getMethod("getTaskAttemptID").invoke(context).asInstanceOf[TaskAttemptID]
+  }
 }
 
 /**
@@ -302,22 +367,246 @@ private[parquet] class AppendingParquetOutputFormat(offset: Int)
  */
 private[parquet] class FilteringParquetRowInputFormat
   extends parquet.hadoop.ParquetInputFormat[Row] with Logging {
+
+  private var footers: JList[Footer] = _
+
+  private var fileStatuses= Map.empty[Path, FileStatus]
+
   override def createRecordReader(
       inputSplit: InputSplit,
       taskAttemptContext: TaskAttemptContext): RecordReader[Void, Row] = {
+    
+    import parquet.filter2.compat.FilterCompat.NoOpFilter
+    import parquet.filter2.compat.FilterCompat.Filter
+
     val readSupport: ReadSupport[Row] = new RowReadSupport()
 
-    val filterExpressions =
-      ParquetFilters.deserializeFilterExpressions(ContextUtil.getConfiguration(taskAttemptContext))
-    if (filterExpressions.length > 0) {
-      logInfo(s"Pushing down predicates for RecordFilter: ${filterExpressions.mkString(", ")}")
+    val filter = ParquetInputFormat.getFilter(ContextUtil.getConfiguration(taskAttemptContext))
+    if (!filter.isInstanceOf[NoOpFilter]) {
       new ParquetRecordReader[Row](
         readSupport,
-        ParquetFilters.createRecordFilter(filterExpressions))
+        filter)
     } else {
       new ParquetRecordReader[Row](readSupport)
     }
   }
+
+  override def getFooters(jobContext: JobContext): JList[Footer] = {
+    import FilteringParquetRowInputFormat.footerCache
+
+    if (footers eq null) {
+      val conf = ContextUtil.getConfiguration(jobContext)
+      val cacheMetadata = conf.getBoolean(SQLConf.PARQUET_CACHE_METADATA, true)
+      val statuses = listStatus(jobContext)
+      fileStatuses = statuses.map(file => file.getPath -> file).toMap
+      if (statuses.isEmpty) {
+        footers = Collections.emptyList[Footer]
+      } else if (!cacheMetadata) {
+        // Read the footers from HDFS
+        footers = getFooters(conf, statuses)
+      } else {
+        // Read only the footers that are not in the footerCache
+        val foundFooters = footerCache.getAllPresent(statuses)
+        val toFetch = new ArrayList[FileStatus]
+        for (s <- statuses) {
+          if (!foundFooters.containsKey(s)) {
+            toFetch.add(s)
+          }
+        }
+        val newFooters = new mutable.HashMap[FileStatus, Footer]
+        if (toFetch.size > 0) {
+          val fetched = getFooters(conf, toFetch)
+          for ((status, i) <- toFetch.zipWithIndex) {
+            newFooters(status) = fetched.get(i)
+          }
+          footerCache.putAll(newFooters)
+        }
+        footers = new ArrayList[Footer](statuses.size)
+        for (status <- statuses) {
+          footers.add(newFooters.getOrElse(status, foundFooters.get(status)))
+        }
+      }
+    }
+
+    footers
+  }
+
+  // TODO Remove this method and related code once PARQUET-16 is fixed
+  // This method together with the `getFooters` method and the `fileStatuses` field are just used
+  // to mimic this PR: https://github.com/apache/incubator-parquet-mr/pull/17
+  override def getSplits(
+      configuration: Configuration,
+      footers: JList[Footer]): JList[ParquetInputSplit] = {
+
+    // Use task side strategy by default
+    val taskSideMetaData = configuration.getBoolean(ParquetInputFormat.TASK_SIDE_METADATA, true)
+    val maxSplitSize: JLong = configuration.getLong("mapred.max.split.size", Long.MaxValue)
+    val minSplitSize: JLong =
+      Math.max(getFormatMinSplitSize(), configuration.getLong("mapred.min.split.size", 0L))
+    if (maxSplitSize < 0 || minSplitSize < 0) {
+      throw new ParquetDecodingException(
+        s"maxSplitSize or minSplitSie should not be negative: maxSplitSize = $maxSplitSize;" +
+          s" minSplitSize = $minSplitSize")
+    }
+    
+    // Uses strict type checking by default
+    val getGlobalMetaData =
+      classOf[ParquetFileWriter].getDeclaredMethod("getGlobalMetaData", classOf[JList[Footer]])
+    getGlobalMetaData.setAccessible(true)
+    val globalMetaData = getGlobalMetaData.invoke(null, footers).asInstanceOf[GlobalMetaData]
+
+    if (globalMetaData == null) {
+     val splits = mutable.ArrayBuffer.empty[ParquetInputSplit]
+     return splits
+    }   
+ 
+    val readContext = getReadSupport(configuration).init(
+      new InitContext(configuration,
+        globalMetaData.getKeyValueMetaData(),
+        globalMetaData.getSchema()))
+    
+    if (taskSideMetaData){
+      logInfo("Using Task Side Metadata Split Strategy")
+      return getTaskSideSplits(configuration,
+        footers,
+        maxSplitSize,
+        minSplitSize,
+        readContext)
+    } else {
+      logInfo("Using Client Side Metadata Split Strategy")
+      return getClientSideSplits(configuration,
+        footers,
+        maxSplitSize,
+        minSplitSize,
+        readContext)
+    }
+    
+  }
+
+  def getClientSideSplits(
+    configuration: Configuration,
+    footers: JList[Footer],
+    maxSplitSize: JLong,
+    minSplitSize: JLong,
+    readContext: ReadContext): JList[ParquetInputSplit] = {
+    
+    import FilteringParquetRowInputFormat.blockLocationCache
+    import parquet.filter2.compat.FilterCompat;
+    import parquet.filter2.compat.FilterCompat.Filter;
+    import parquet.filter2.compat.RowGroupFilter;
+   
+    val cacheMetadata = configuration.getBoolean(SQLConf.PARQUET_CACHE_METADATA, true)
+
+    val splits = mutable.ArrayBuffer.empty[ParquetInputSplit]
+    val filter: Filter = ParquetInputFormat.getFilter(configuration)
+    var rowGroupsDropped: Long = 0
+    var totalRowGroups: Long  = 0
+
+    // Ugly hack, stuck with it until PR:
+    // https://github.com/apache/incubator-parquet-mr/pull/17 
+    // is resolved
+    val generateSplits =
+      Class.forName("parquet.hadoop.ClientSideMetadataSplitStrategy")
+       .getDeclaredMethods.find(_.getName == "generateSplits").getOrElse(
+         sys.error(s"Failed to reflectively invoke ClientSideMetadataSplitStrategy.generateSplits"))
+    generateSplits.setAccessible(true)
+
+    for (footer <- footers) {
+      val fs = footer.getFile.getFileSystem(configuration)
+      val file = footer.getFile
+      val status = fileStatuses.getOrElse(file, fs.getFileStatus(file))
+      val parquetMetaData = footer.getParquetMetadata
+      val blocks = parquetMetaData.getBlocks
+      totalRowGroups = totalRowGroups + blocks.size
+      val filteredBlocks = RowGroupFilter.filterRowGroups(
+        filter,
+        blocks,
+        parquetMetaData.getFileMetaData.getSchema)
+      rowGroupsDropped = rowGroupsDropped + (blocks.size - filteredBlocks.size)
+      
+      if (!filteredBlocks.isEmpty){
+          var blockLocations: Array[BlockLocation] = null
+          if (!cacheMetadata) {
+            blockLocations = fs.getFileBlockLocations(status, 0, status.getLen)
+          } else {
+            blockLocations = blockLocationCache.get(status, new Callable[Array[BlockLocation]] {
+              def call(): Array[BlockLocation] = fs.getFileBlockLocations(status, 0, status.getLen)
+            })
+          }
+          splits.addAll(
+            generateSplits.invoke(
+              null,
+              filteredBlocks,
+              blockLocations,
+              status,
+              readContext.getRequestedSchema.toString,
+              readContext.getReadSupportMetadata,
+              minSplitSize,
+              maxSplitSize).asInstanceOf[JList[ParquetInputSplit]])
+        }
+    }
+
+    if (rowGroupsDropped > 0 && totalRowGroups > 0){
+      val percentDropped = ((rowGroupsDropped/totalRowGroups.toDouble) * 100).toInt
+      logInfo(s"Dropping $rowGroupsDropped row groups that do not pass filter predicate "
+        + s"($percentDropped %) !")
+    }
+    else {
+      logInfo("There were no row groups that could be dropped due to filter predicates")
+    }
+    splits
+
+  }
+
+  def getTaskSideSplits(
+    configuration: Configuration,
+    footers: JList[Footer],
+    maxSplitSize: JLong,
+    minSplitSize: JLong,
+    readContext: ReadContext): JList[ParquetInputSplit] = {
+
+    val splits = mutable.ArrayBuffer.empty[ParquetInputSplit]
+    
+    // Ugly hack, stuck with it until PR:
+    // https://github.com/apache/incubator-parquet-mr/pull/17
+    // is resolved
+    val generateSplits =
+      Class.forName("parquet.hadoop.TaskSideMetadataSplitStrategy")
+       .getDeclaredMethods.find(_.getName == "generateTaskSideMDSplits").getOrElse(
+         sys.error(
+           s"Failed to reflectively invoke TaskSideMetadataSplitStrategy.generateTaskSideMDSplits"))
+    generateSplits.setAccessible(true)
+ 
+    for (footer <- footers) {
+      val file = footer.getFile
+      val fs = file.getFileSystem(configuration)
+      val status = fileStatuses.getOrElse(file, fs.getFileStatus(file))
+      val blockLocations = fs.getFileBlockLocations(status, 0, status.getLen)
+      splits.addAll(
+        generateSplits.invoke(
+         null,
+         blockLocations,
+         status,
+         readContext.getRequestedSchema.toString,
+         readContext.getReadSupportMetadata,
+         minSplitSize,
+         maxSplitSize).asInstanceOf[JList[ParquetInputSplit]])
+    }
+
+    splits
+  } 
+
+}
+
+private[parquet] object FilteringParquetRowInputFormat {
+  private val footerCache = CacheBuilder.newBuilder()
+    .maximumSize(20000)
+    .build[FileStatus, Footer]()
+
+  private val blockLocationCache = CacheBuilder.newBuilder()
+    .maximumSize(20000)
+    .expireAfterWrite(15, TimeUnit.MINUTES)  // Expire locations since HDFS files might move
+    .build[FileStatus, Array[BlockLocation]]()
 }
 
 private[parquet] object FileSystemHelper {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
index 9cd5dc5bbd393..ef3687e692964 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
@@ -17,20 +17,20 @@
 
 package org.apache.spark.sql.parquet
 
-import org.apache.hadoop.conf.Configuration
+import java.util.{HashMap => JHashMap}
 
+import org.apache.hadoop.conf.Configuration
 import parquet.column.ParquetProperties
 import parquet.hadoop.ParquetOutputFormat
 import parquet.hadoop.api.ReadSupport.ReadContext
 import parquet.hadoop.api.{ReadSupport, WriteSupport}
 import parquet.io.api._
-import parquet.schema.{MessageType, MessageTypeParser}
+import parquet.schema.MessageType
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Row}
 import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.execution.SparkSqlSerializer
-import com.google.common.io.BaseEncoding
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
 
 /**
  * A `parquet.io.api.RecordMaterializer` for Rows.
@@ -81,9 +81,10 @@ private[parquet] class RowReadSupport extends ReadSupport[Row] with Logging {
       }
     }
     // if both unavailable, fall back to deducing the schema from the given Parquet schema
+    // TODO: Why it can be null?
     if (schema == null)  {
       log.debug("falling back to Parquet read schema")
-      schema = ParquetTypesConverter.convertToAttributes(parquetSchema)
+      schema = ParquetTypesConverter.convertToAttributes(parquetSchema, false)
     }
     log.debug(s"list of attributes that will be read: $schema")
     new RowRecordMaterializer(parquetSchema, schema)
@@ -93,8 +94,8 @@ private[parquet] class RowReadSupport extends ReadSupport[Row] with Logging {
       configuration: Configuration,
       keyValueMetaData: java.util.Map[String, String],
       fileSchema: MessageType): ReadContext = {
-    var parquetSchema: MessageType = fileSchema
-    var metadata: java.util.Map[String, String] = new java.util.HashMap[String, String]()
+    var parquetSchema = fileSchema
+    val metadata = new JHashMap[String, String]()
     val requestedAttributes = RowReadSupport.getRequestedSchema(configuration)
 
     if (requestedAttributes != null) {
@@ -109,7 +110,7 @@ private[parquet] class RowReadSupport extends ReadSupport[Row] with Logging {
       metadata.put(RowReadSupport.SPARK_METADATA_KEY, origAttributesStr)
     }
 
-    return new ReadSupport.ReadContext(parquetSchema, metadata)
+    new ReadSupport.ReadContext(parquetSchema, metadata)
   }
 }
 
@@ -132,13 +133,17 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
   private[parquet] var attributes: Seq[Attribute] = null
 
   override def init(configuration: Configuration): WriteSupport.WriteContext = {
-    attributes = if (attributes == null) RowWriteSupport.getSchema(configuration) else attributes
-    
+    val origAttributesStr: String = configuration.get(RowWriteSupport.SPARK_ROW_SCHEMA)
+    val metadata = new JHashMap[String, String]()
+    metadata.put(RowReadSupport.SPARK_METADATA_KEY, origAttributesStr)
+
+    if (attributes == null) {
+      attributes = ParquetTypesConverter.convertFromString(origAttributesStr)
+    }
+
     log.debug(s"write support initialized for requested schema $attributes")
     ParquetRelation.enableLogForwarding()
-    new WriteSupport.WriteContext(
-      ParquetTypesConverter.convertFromAttributes(attributes),
-      new java.util.HashMap[java.lang.String, java.lang.String]())
+    new WriteSupport.WriteContext(ParquetTypesConverter.convertFromAttributes(attributes), metadata)
   }
 
   override def prepareForWrite(recordConsumer: RecordConsumer): Unit = {
@@ -147,16 +152,17 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
   }
 
   override def write(record: Row): Unit = {
-    if (attributes.size > record.size) {
+    val attributesSize = attributes.size
+    if (attributesSize > record.size) {
       throw new IndexOutOfBoundsException(
-        s"Trying to write more fields than contained in row (${attributes.size}>${record.size})")
+        s"Trying to write more fields than contained in row (${attributesSize}>${record.size})")
     }
 
     var index = 0
     writer.startMessage()
-    while(index < attributes.size) {
+    while(index < attributesSize) {
       // null values indicate optional fields but we do not check currently
-      if (record(index) != null && record(index) != Nil) {
+      if (record(index) != null) {
         writer.startField(attributes(index).name, index)
         writeValue(attributes(index).dataType, record(index))
         writer.endField(attributes(index).name, index)
@@ -167,12 +173,13 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
   }
 
   private[parquet] def writeValue(schema: DataType, value: Any): Unit = {
-    if (value != null && value != Nil) {
+    if (value != null) {
       schema match {
-        case t @ ArrayType(_) => writeArray(
+        case t: UserDefinedType[_] => writeValue(t.sqlType, value)
+        case t @ ArrayType(_, _) => writeArray(
           t,
           value.asInstanceOf[CatalystConverter.ArrayScalaType[_]])
-        case t @ MapType(_, _) => writeMap(
+        case t @ MapType(_, _, _) => writeMap(
           t,
           value.asInstanceOf[CatalystConverter.MapScalaType[_, _]])
         case t @ StructType(_) => writeStruct(
@@ -184,13 +191,15 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
   }
 
   private[parquet] def writePrimitive(schema: PrimitiveType, value: Any): Unit = {
-    if (value != null && value != Nil) {
+    if (value != null) {
       schema match {
         case StringType => writer.addBinary(
           Binary.fromByteArray(
             value.asInstanceOf[String].getBytes("utf-8")
           )
         )
+        case BinaryType => writer.addBinary(
+          Binary.fromByteArray(value.asInstanceOf[Array[Byte]]))
         case IntegerType => writer.addInteger(value.asInstanceOf[Int])
         case ShortType => writer.addInteger(value.asInstanceOf[Short])
         case LongType => writer.addLong(value.asInstanceOf[Long])
@@ -198,6 +207,11 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
         case DoubleType => writer.addDouble(value.asInstanceOf[Double])
         case FloatType => writer.addFloat(value.asInstanceOf[Float])
         case BooleanType => writer.addBoolean(value.asInstanceOf[Boolean])
+        case d: DecimalType =>
+          if (d.precisionInfo == None || d.precisionInfo.get.precision > 18) {
+            sys.error(s"Unsupported datatype $d, cannot write to consumer")
+          }
+          writeDecimal(value.asInstanceOf[Decimal], d.precisionInfo.get.precision)
         case _ => sys.error(s"Do not know how to writer $schema to consumer")
       }
     }
@@ -206,12 +220,12 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
   private[parquet] def writeStruct(
       schema: StructType,
       struct: CatalystConverter.StructScalaType[_]): Unit = {
-    if (struct != null && struct != Nil) {
+    if (struct != null) {
       val fields = schema.fields.toArray
       writer.startGroup()
       var i = 0
       while(i < fields.size) {
-        if (struct(i) != null && struct(i) != Nil) {
+        if (struct(i) != null) {
           writer.startField(fields(i).name, i)
           writeValue(fields(i).dataType, struct(i))
           writer.endField(fields(i).name, i)
@@ -222,62 +236,92 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
     }
   }
 
-  // TODO: support null values, see
-  // https://issues.apache.org/jira/browse/SPARK-1649
   private[parquet] def writeArray(
       schema: ArrayType,
       array: CatalystConverter.ArrayScalaType[_]): Unit = {
     val elementType = schema.elementType
     writer.startGroup()
     if (array.size > 0) {
-      writer.startField(CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME, 0)
-      var i = 0
-      while(i < array.size) {
-        writeValue(elementType, array(i))
-        i = i + 1
+      if (schema.containsNull) {
+        writer.startField(CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME, 0)
+        var i = 0
+        while (i < array.size) {
+          writer.startGroup()
+          if (array(i) != null) {
+            writer.startField(CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME, 0)
+            writeValue(elementType, array(i))
+            writer.endField(CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME, 0)
+          }
+          writer.endGroup()
+          i = i + 1
+        }
+        writer.endField(CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME, 0)
+      } else {
+        writer.startField(CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME, 0)
+        var i = 0
+        while (i < array.size) {
+          writeValue(elementType, array(i))
+          i = i + 1
+        }
+        writer.endField(CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME, 0)
       }
-      writer.endField(CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME, 0)
     }
     writer.endGroup()
   }
 
-  // TODO: support null values, see
-  // https://issues.apache.org/jira/browse/SPARK-1649
   private[parquet] def writeMap(
       schema: MapType,
       map: CatalystConverter.MapScalaType[_, _]): Unit = {
     writer.startGroup()
     if (map.size > 0) {
       writer.startField(CatalystConverter.MAP_SCHEMA_NAME, 0)
-      writer.startGroup()
-      writer.startField(CatalystConverter.MAP_KEY_SCHEMA_NAME, 0)
-      for(key <- map.keys) {
+      for ((key, value) <- map) {
+        writer.startGroup()
+        writer.startField(CatalystConverter.MAP_KEY_SCHEMA_NAME, 0)
         writeValue(schema.keyType, key)
+        writer.endField(CatalystConverter.MAP_KEY_SCHEMA_NAME, 0)
+        if (value != null) {
+          writer.startField(CatalystConverter.MAP_VALUE_SCHEMA_NAME, 1)
+          writeValue(schema.valueType, value)
+          writer.endField(CatalystConverter.MAP_VALUE_SCHEMA_NAME, 1)
+        }
+        writer.endGroup()
       }
-      writer.endField(CatalystConverter.MAP_KEY_SCHEMA_NAME, 0)
-      writer.startField(CatalystConverter.MAP_VALUE_SCHEMA_NAME, 1)
-      for(value <- map.values) {
-        writeValue(schema.valueType, value)
-      }
-      writer.endField(CatalystConverter.MAP_VALUE_SCHEMA_NAME, 1)
-      writer.endGroup()
       writer.endField(CatalystConverter.MAP_SCHEMA_NAME, 0)
     }
     writer.endGroup()
   }
+
+  // Scratch array used to write decimals as fixed-length binary
+  private val scratchBytes = new Array[Byte](8)
+
+  private[parquet] def writeDecimal(decimal: Decimal, precision: Int): Unit = {
+    val numBytes = ParquetTypesConverter.BYTES_FOR_PRECISION(precision)
+    val unscaledLong = decimal.toUnscaledLong
+    var i = 0
+    var shift = 8 * (numBytes - 1)
+    while (i < numBytes) {
+      scratchBytes(i) = (unscaledLong >> shift).toByte
+      i += 1
+      shift -= 8
+    }
+    writer.addBinary(Binary.fromByteArray(scratchBytes, 0, numBytes))
+  }
+
 }
 
 // Optimized for non-nested rows
 private[parquet] class MutableRowWriteSupport extends RowWriteSupport {
   override def write(record: Row): Unit = {
-    if (attributes.size > record.size) {
+    val attributesSize = attributes.size
+    if (attributesSize > record.size) {
       throw new IndexOutOfBoundsException(
-        s"Trying to write more fields than contained in row (${attributes.size}>${record.size})")
+        s"Trying to write more fields than contained in row (${attributesSize}>${record.size})")
     }
 
     var index = 0
     writer.startMessage()
-    while(index < attributes.size) {
+    while(index < attributesSize) {
       // null values indicate optional fields but we do not check currently
       if (record(index) != null && record(index) != Nil) {
         writer.startField(attributes(index).name, index)
@@ -299,6 +343,8 @@ private[parquet] class MutableRowWriteSupport extends RowWriteSupport {
           record(index).asInstanceOf[String].getBytes("utf-8")
         )
       )
+      case BinaryType => writer.addBinary(
+        Binary.fromByteArray(record(index).asInstanceOf[Array[Byte]]))
       case IntegerType => writer.addInteger(record.getInt(index))
       case ShortType => writer.addInteger(record.getShort(index))
       case LongType => writer.addLong(record.getLong(index))
@@ -306,6 +352,11 @@ private[parquet] class MutableRowWriteSupport extends RowWriteSupport {
       case DoubleType => writer.addDouble(record.getDouble(index))
       case FloatType => writer.addFloat(record.getFloat(index))
       case BooleanType => writer.addBoolean(record.getBoolean(index))
+      case d: DecimalType =>
+        if (d.precisionInfo == None || d.precisionInfo.get.precision > 18) {
+          sys.error(s"Unsupported datatype $d, cannot write to consumer")
+        }
+        writeDecimal(record(index).asInstanceOf[Decimal], d.precisionInfo.get.precision)
       case _ => sys.error(s"Unsupported datatype $ctype, cannot write to consumer")
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
index 1dc58633a2a68..c0918a40d136f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
@@ -22,6 +22,7 @@ import java.io.File
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
 import org.apache.hadoop.mapreduce.Job
+import org.apache.spark.sql.test.TestSQLContext
 
 import parquet.example.data.{GroupWriter, Group}
 import parquet.example.data.simple.SimpleGroup
@@ -58,7 +59,7 @@ private[sql] object ParquetTestData {
     """message myrecord {
       optional boolean myboolean;
       optional int32 myint;
-      optional binary mystring;
+      optional binary mystring (UTF8);
       optional int64 mylong;
       optional float myfloat;
       optional double mydouble;
@@ -87,10 +88,16 @@ private[sql] object ParquetTestData {
       message myrecord {
       required boolean myboolean;
       required int32 myint;
-      required binary mystring;
+      required binary mystring (UTF8);
       required int64 mylong;
       required float myfloat;
       required double mydouble;
+      optional boolean myoptboolean;
+      optional int32 myoptint;
+      optional binary myoptstring (UTF8);
+      optional int64 myoptlong;
+      optional float myoptfloat;
+      optional double myoptdouble;
       }
     """
 
@@ -103,7 +110,7 @@ private[sql] object ParquetTestData {
   val testDir = Utils.createTempDir()
   val testFilterDir = Utils.createTempDir()
 
-  lazy val testData = new ParquetRelation(testDir.toURI.toString)
+  lazy val testData = new ParquetRelation(testDir.toURI.toString, None, TestSQLContext)
 
   val testNestedSchema1 =
     // based on blogpost example, source:
@@ -119,14 +126,14 @@ private[sql] object ParquetTestData {
     // so that array types can be translated correctly.
     """
       message AddressBook {
-        required binary owner;
+        required binary owner (UTF8);
         optional group ownerPhoneNumbers {
-          repeated binary array;
+          repeated binary array (UTF8);
         }
         optional group contacts {
           repeated group array {
-            required binary name;
-            optional binary phoneNumber;
+            required binary name (UTF8);
+            optional binary phoneNumber (UTF8);
           }
         }
       }
@@ -181,16 +188,16 @@ private[sql] object ParquetTestData {
         required int32 x;
         optional group data1 {
           repeated group map {
-            required binary key;
+            required binary key (UTF8);
             required int32 value;
           }
         }
         required group data2 {
           repeated group map {
-            required binary key;
+            required binary key (UTF8);
             required group value {
               required int64 payload1;
-              optional binary payload2;
+              optional binary payload2 (UTF8);
             }
           }
         }
@@ -202,8 +209,10 @@ private[sql] object ParquetTestData {
   val testNestedDir3 = Utils.createTempDir()
   val testNestedDir4 = Utils.createTempDir()
 
-  lazy val testNestedData1 = new ParquetRelation(testNestedDir1.toURI.toString)
-  lazy val testNestedData2 = new ParquetRelation(testNestedDir2.toURI.toString)
+  lazy val testNestedData1 =
+    new ParquetRelation(testNestedDir1.toURI.toString, None, TestSQLContext)
+  lazy val testNestedData2 =
+    new ParquetRelation(testNestedDir2.toURI.toString, None, TestSQLContext)
 
   def writeFile() = {
     testDir.delete()
@@ -252,6 +261,19 @@ private[sql] object ParquetTestData {
       record.add(3, i.toLong)
       record.add(4, i.toFloat + 0.5f)
       record.add(5, i.toDouble + 0.5d)
+      if (i % 2 == 0) {
+        if (i % 3 == 0) {
+          record.add(6, true)
+        } else {
+          record.add(6, false)
+        }
+        record.add(7, i)
+        record.add(8, i.toString)
+        record.add(9, i.toLong)
+        record.add(10, i.toFloat + 0.5f)
+        record.add(11, i.toDouble + 0.5d)
+      }
+ 
       writer.write(record)
     }
     writer.close()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala
index f9046368e7ced..fa37d1f2ae7e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala
@@ -19,15 +19,18 @@ package org.apache.spark.sql.parquet
 
 import java.io.IOException
 
+import scala.util.Try
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.mapreduce.Job
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
 
 import parquet.hadoop.{ParquetFileReader, Footer, ParquetFileWriter}
 import parquet.hadoop.metadata.{ParquetMetadata, FileMetaData}
 import parquet.hadoop.util.ContextUtil
-import parquet.schema.{Type => ParquetType, PrimitiveType => ParquetPrimitiveType, MessageType}
-import parquet.schema.{GroupType => ParquetGroupType, OriginalType => ParquetOriginalType, ConversionPatterns}
+import parquet.schema.{Type => ParquetType, Types => ParquetTypes, PrimitiveType => ParquetPrimitiveType, MessageType}
+import parquet.schema.{GroupType => ParquetGroupType, OriginalType => ParquetOriginalType, ConversionPatterns, DecimalMetadata}
 import parquet.schema.PrimitiveType.{PrimitiveTypeName => ParquetPrimitiveTypeName}
 import parquet.schema.Type.Repetition
 
@@ -38,23 +41,41 @@ import org.apache.spark.sql.catalyst.types._
 // Implicits
 import scala.collection.JavaConversions._
 
+/** A class representing Parquet info fields we care about, for passing back to Parquet */
+private[parquet] case class ParquetTypeInfo(
+  primitiveType: ParquetPrimitiveTypeName,
+  originalType: Option[ParquetOriginalType] = None,
+  decimalMetadata: Option[DecimalMetadata] = None,
+  length: Option[Int] = None)
+
 private[parquet] object ParquetTypesConverter extends Logging {
   def isPrimitiveType(ctype: DataType): Boolean =
     classOf[PrimitiveType] isAssignableFrom ctype.getClass
 
-  def toPrimitiveDataType(parquetType : ParquetPrimitiveTypeName): DataType = parquetType match {
-    case ParquetPrimitiveTypeName.BINARY => StringType
-    case ParquetPrimitiveTypeName.BOOLEAN => BooleanType
-    case ParquetPrimitiveTypeName.DOUBLE => DoubleType
-    case ParquetPrimitiveTypeName.FIXED_LEN_BYTE_ARRAY => ArrayType(ByteType)
-    case ParquetPrimitiveTypeName.FLOAT => FloatType
-    case ParquetPrimitiveTypeName.INT32 => IntegerType
-    case ParquetPrimitiveTypeName.INT64 => LongType
-    case ParquetPrimitiveTypeName.INT96 =>
-      // TODO: add BigInteger type? TODO(andre) use DecimalType instead????
-      sys.error("Potential loss of precision: cannot convert INT96")
-    case _ => sys.error(
-      s"Unsupported parquet datatype $parquetType")
+  def toPrimitiveDataType(
+      parquetType: ParquetPrimitiveType,
+      binaryAsString: Boolean): DataType = {
+    val originalType = parquetType.getOriginalType
+    val decimalInfo = parquetType.getDecimalMetadata
+    parquetType.getPrimitiveTypeName match {
+      case ParquetPrimitiveTypeName.BINARY
+        if (originalType == ParquetOriginalType.UTF8 || binaryAsString) => StringType
+      case ParquetPrimitiveTypeName.BINARY => BinaryType
+      case ParquetPrimitiveTypeName.BOOLEAN => BooleanType
+      case ParquetPrimitiveTypeName.DOUBLE => DoubleType
+      case ParquetPrimitiveTypeName.FLOAT => FloatType
+      case ParquetPrimitiveTypeName.INT32 => IntegerType
+      case ParquetPrimitiveTypeName.INT64 => LongType
+      case ParquetPrimitiveTypeName.INT96 =>
+        // TODO: add BigInteger type? TODO(andre) use DecimalType instead????
+        sys.error("Potential loss of precision: cannot convert INT96")
+      case ParquetPrimitiveTypeName.FIXED_LEN_BYTE_ARRAY
+        if (originalType == ParquetOriginalType.DECIMAL && decimalInfo.getPrecision <= 18) =>
+          // TODO: for now, our reader only supports decimals that fit in a Long
+          DecimalType(decimalInfo.getPrecision, decimalInfo.getScale)
+      case _ => sys.error(
+        s"Unsupported parquet datatype $parquetType")
+    }
   }
 
   /**
@@ -82,7 +103,7 @@ private[parquet] object ParquetTypesConverter extends Logging {
    * @param parquetType The type to convert.
    * @return The corresponding Catalyst type.
    */
-  def toDataType(parquetType: ParquetType): DataType = {
+  def toDataType(parquetType: ParquetType, isBinaryAsString: Boolean): DataType = {
     def correspondsToMap(groupType: ParquetGroupType): Boolean = {
       if (groupType.getFieldCount != 1 || groupType.getFields.apply(0).isPrimitive) {
         false
@@ -104,7 +125,7 @@ private[parquet] object ParquetTypesConverter extends Logging {
     }
 
     if (parquetType.isPrimitive) {
-      toPrimitiveDataType(parquetType.asPrimitiveType.getPrimitiveTypeName)
+      toPrimitiveDataType(parquetType.asPrimitiveType, isBinaryAsString)
     } else {
       val groupType = parquetType.asGroupType()
       parquetType.getOriginalType match {
@@ -113,7 +134,13 @@ private[parquet] object ParquetTypesConverter extends Logging {
         case ParquetOriginalType.LIST => { // TODO: check enums!
           assert(groupType.getFieldCount == 1)
           val field = groupType.getFields.apply(0)
-          new ArrayType(toDataType(field))
+          if (field.getName == CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME) {
+            val bag = field.asGroupType()
+            assert(bag.getFieldCount == 1)
+            ArrayType(toDataType(bag.getFields.apply(0), isBinaryAsString), containsNull = true)
+          } else {
+            ArrayType(toDataType(field, isBinaryAsString), containsNull = false)
+          }
         }
         case ParquetOriginalType.MAP => {
           assert(
@@ -123,32 +150,40 @@ private[parquet] object ParquetTypesConverter extends Logging {
           assert(
             keyValueGroup.getFieldCount == 2,
             "Parquet Map type malformatted: nested group should have 2 (key, value) fields!")
-          val keyType = toDataType(keyValueGroup.getFields.apply(0))
           assert(keyValueGroup.getFields.apply(0).getRepetition == Repetition.REQUIRED)
-          val valueType = toDataType(keyValueGroup.getFields.apply(1))
-          assert(keyValueGroup.getFields.apply(1).getRepetition == Repetition.REQUIRED)
-          new MapType(keyType, valueType)
+
+          val keyType = toDataType(keyValueGroup.getFields.apply(0), isBinaryAsString)
+          val valueType = toDataType(keyValueGroup.getFields.apply(1), isBinaryAsString)
+          MapType(keyType, valueType,
+            keyValueGroup.getFields.apply(1).getRepetition != Repetition.REQUIRED)
         }
         case _ => {
           // Note: the order of these checks is important!
           if (correspondsToMap(groupType)) { // MapType
             val keyValueGroup = groupType.getFields.apply(0).asGroupType()
-            val keyType = toDataType(keyValueGroup.getFields.apply(0))
             assert(keyValueGroup.getFields.apply(0).getRepetition == Repetition.REQUIRED)
-            val valueType = toDataType(keyValueGroup.getFields.apply(1))
-            assert(keyValueGroup.getFields.apply(1).getRepetition == Repetition.REQUIRED)
-            new MapType(keyType, valueType)
+
+            val keyType = toDataType(keyValueGroup.getFields.apply(0), isBinaryAsString)
+            val valueType = toDataType(keyValueGroup.getFields.apply(1), isBinaryAsString)
+            MapType(keyType, valueType,
+              keyValueGroup.getFields.apply(1).getRepetition != Repetition.REQUIRED)
           } else if (correspondsToArray(groupType)) { // ArrayType
-            val elementType = toDataType(groupType.getFields.apply(0))
-            new ArrayType(elementType)
+            val field = groupType.getFields.apply(0)
+            if (field.getName == CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME) {
+              val bag = field.asGroupType()
+              assert(bag.getFieldCount == 1)
+              ArrayType(toDataType(bag.getFields.apply(0), isBinaryAsString), containsNull = true)
+            } else {
+              ArrayType(toDataType(field, isBinaryAsString), containsNull = false)
+            }
           } else { // everything else: StructType
             val fields = groupType
               .getFields
               .map(ptype => new StructField(
               ptype.getName,
-              toDataType(ptype),
+              toDataType(ptype, isBinaryAsString),
               ptype.getRepetition != Repetition.REQUIRED))
-            new StructType(fields)
+            StructType(fields)
           }
         }
       }
@@ -161,24 +196,40 @@ private[parquet] object ParquetTypesConverter extends Logging {
    * is not primitive.
    *
    * @param ctype The type to convert
-   * @return The name of the corresponding Parquet primitive type
+   * @return The name of the corresponding Parquet type properties
    */
-  def fromPrimitiveDataType(ctype: DataType):
-      Option[ParquetPrimitiveTypeName] = ctype match {
-    case StringType => Some(ParquetPrimitiveTypeName.BINARY)
-    case BooleanType => Some(ParquetPrimitiveTypeName.BOOLEAN)
-    case DoubleType => Some(ParquetPrimitiveTypeName.DOUBLE)
-    case ArrayType(ByteType) =>
-      Some(ParquetPrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
-    case FloatType => Some(ParquetPrimitiveTypeName.FLOAT)
-    case IntegerType => Some(ParquetPrimitiveTypeName.INT32)
+  def fromPrimitiveDataType(ctype: DataType): Option[ParquetTypeInfo] = ctype match {
+    case StringType => Some(ParquetTypeInfo(
+      ParquetPrimitiveTypeName.BINARY, Some(ParquetOriginalType.UTF8)))
+    case BinaryType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.BINARY))
+    case BooleanType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.BOOLEAN))
+    case DoubleType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.DOUBLE))
+    case FloatType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.FLOAT))
+    case IntegerType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT32))
     // There is no type for Byte or Short so we promote them to INT32.
-    case ShortType => Some(ParquetPrimitiveTypeName.INT32)
-    case ByteType => Some(ParquetPrimitiveTypeName.INT32)
-    case LongType => Some(ParquetPrimitiveTypeName.INT64)
+    case ShortType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT32))
+    case ByteType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT32))
+    case LongType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT64))
+    case DecimalType.Fixed(precision, scale) if precision <= 18 =>
+      // TODO: for now, our writer only supports decimals that fit in a Long
+      Some(ParquetTypeInfo(ParquetPrimitiveTypeName.FIXED_LEN_BYTE_ARRAY,
+        Some(ParquetOriginalType.DECIMAL),
+        Some(new DecimalMetadata(precision, scale)),
+        Some(BYTES_FOR_PRECISION(precision))))
     case _ => None
   }
 
+  /**
+   * Compute the FIXED_LEN_BYTE_ARRAY length needed to represent a given DECIMAL precision.
+   */
+  private[parquet] val BYTES_FOR_PRECISION = Array.tabulate[Int](38) { precision =>
+    var length = 1
+    while (math.pow(2.0, 8 * length - 1) < math.pow(10.0, precision)) {
+      length += 1
+    }
+    length
+  }
+
   /**
    * Converts a given Catalyst [[org.apache.spark.sql.catalyst.types.DataType]] into
    * the corresponding Parquet `Type`.
@@ -226,18 +277,43 @@ private[parquet] object ParquetTypesConverter extends Logging {
       } else {
         if (nullable) Repetition.OPTIONAL else Repetition.REQUIRED
       }
-    val primitiveType = fromPrimitiveDataType(ctype)
-    if (primitiveType.isDefined) {
-      new ParquetPrimitiveType(repetition, primitiveType.get, name)
-    } else {
+    val typeInfo = fromPrimitiveDataType(ctype)
+    typeInfo.map {
+      case ParquetTypeInfo(primitiveType, originalType, decimalMetadata, length) =>
+        val builder = ParquetTypes.primitive(primitiveType, repetition).as(originalType.orNull)
+        for (len <- length) {
+          builder.length(len)
+        }
+        for (metadata <- decimalMetadata) {
+          builder.precision(metadata.getPrecision).scale(metadata.getScale)
+        }
+        builder.named(name)
+    }.getOrElse {
       ctype match {
-        case ArrayType(elementType) => {
+        case udt: UserDefinedType[_] => {
+          fromDataType(udt.sqlType, name, nullable, inArray)
+        }
+        case ArrayType(elementType, false) => {
           val parquetElementType = fromDataType(
             elementType,
             CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME,
             nullable = false,
             inArray = true)
-            ConversionPatterns.listType(repetition, name, parquetElementType)
+          ConversionPatterns.listType(repetition, name, parquetElementType)
+        }
+        case ArrayType(elementType, true) => {
+          val parquetElementType = fromDataType(
+            elementType,
+            CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME,
+            nullable = true,
+            inArray = false)
+          ConversionPatterns.listType(
+            repetition,
+            name,
+            new ParquetGroupType(
+              Repetition.REPEATED,
+              CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME,
+              parquetElementType))
         }
         case StructType(structFields) => {
           val fields = structFields.map {
@@ -245,7 +321,7 @@ private[parquet] object ParquetTypesConverter extends Logging {
           }
           new ParquetGroupType(repetition, name, fields)
         }
-        case MapType(keyType, valueType) => {
+        case MapType(keyType, valueType, valueContainsNull) => {
           val parquetKeyType =
             fromDataType(
               keyType,
@@ -256,7 +332,7 @@ private[parquet] object ParquetTypesConverter extends Logging {
             fromDataType(
               valueType,
               CatalystConverter.MAP_VALUE_SCHEMA_NAME,
-              nullable = false,
+              nullable = valueContainsNull,
               inArray = false)
           ConversionPatterns.mapType(
             repetition,
@@ -269,7 +345,7 @@ private[parquet] object ParquetTypesConverter extends Logging {
     }
   }
 
-  def convertToAttributes(parquetSchema: ParquetType): Seq[Attribute] = {
+  def convertToAttributes(parquetSchema: ParquetType, isBinaryAsString: Boolean): Seq[Attribute] = {
     parquetSchema
       .asGroupType()
       .getFields
@@ -277,7 +353,7 @@ private[parquet] object ParquetTypesConverter extends Logging {
         field =>
           new AttributeReference(
             field.getName,
-            toDataType(field),
+            toDataType(field, isBinaryAsString),
             field.getRepetition != Repetition.REQUIRED)())
   }
 
@@ -289,14 +365,14 @@ private[parquet] object ParquetTypesConverter extends Logging {
   }
 
   def convertFromString(string: String): Seq[Attribute] = {
-    DataType(string) match {
+    Try(DataType.fromJson(string)).getOrElse(DataType.fromCaseClassString(string)) match {
       case s: StructType => s.toAttributes
       case other => sys.error(s"Can convert $string to row")
     }
   }
 
   def convertToString(schema: Seq[Attribute]): String = {
-    StructType.fromAttributes(schema).toString
+    StructType.fromAttributes(schema).json
   }
 
   def writeMetaData(attributes: Seq[Attribute], origPath: Path, conf: Configuration): Unit = {
@@ -360,25 +436,27 @@ private[parquet] object ParquetTypesConverter extends Logging {
       throw new IllegalArgumentException(s"Incorrectly formatted Parquet metadata path $origPath")
     }
     val path = origPath.makeQualified(fs)
-    if (!fs.getFileStatus(path).isDir) {
-      throw new IllegalArgumentException(
-        s"Expected $path for be a directory with Parquet files/metadata")
+
+    val children = fs.listStatus(path).filterNot { status =>
+      val name = status.getPath.getName
+      (name(0) == '.' || name(0) == '_') && name != ParquetFileWriter.PARQUET_METADATA_FILE
     }
+
     ParquetRelation.enableLogForwarding()
-    val metadataPath = new Path(path, ParquetFileWriter.PARQUET_METADATA_FILE)
-    // if this is a new table that was just created we will find only the metadata file
-    if (fs.exists(metadataPath) && fs.isFile(metadataPath)) {
-      ParquetFileReader.readFooter(conf, metadataPath)
-    } else {
-      // there may be one or more Parquet files in the given directory
-      val footers = ParquetFileReader.readFooters(conf, fs.getFileStatus(path))
-      // TODO: for now we assume that all footers (if there is more than one) have identical
-      // metadata; we may want to add a check here at some point
-      if (footers.size() == 0) {
-        throw new IllegalArgumentException(s"Could not find Parquet metadata at path $path")
-      }
-      footers(0).getParquetMetadata
-    }
+
+    // NOTE (lian): Parquet "_metadata" file can be very slow if the file consists of lots of row
+    // groups. Since Parquet schema is replicated among all row groups, we only need to touch a
+    // single row group to read schema related metadata. Notice that we are making assumptions that
+    // all data in a single Parquet file have the same schema, which is normally true.
+    children
+      // Try any non-"_metadata" file first...
+      .find(_.getPath.getName != ParquetFileWriter.PARQUET_METADATA_FILE)
+      // ... and fallback to "_metadata" if no such file exists (which implies the Parquet file is
+      // empty, thus normally the "_metadata" file is expected to be fairly small).
+      .orElse(children.find(_.getPath.getName == ParquetFileWriter.PARQUET_METADATA_FILE))
+      .map(ParquetFileReader.readFooter(conf, _))
+      .getOrElse(
+        throw new IllegalArgumentException(s"Could not find Parquet metadata at path $path"))
   }
 
   /**
@@ -391,7 +469,10 @@ private[parquet] object ParquetTypesConverter extends Logging {
    * @param conf The Hadoop configuration to use.
    * @return A list of attributes that make up the schema.
    */
-  def readSchemaFromFile(origPath: Path, conf: Option[Configuration]): Seq[Attribute] = {
+  def readSchemaFromFile(
+      origPath: Path,
+      conf: Option[Configuration],
+      isBinaryAsString: Boolean): Seq[Attribute] = {
     val keyValueMetadata: java.util.Map[String, String] =
       readMetaData(origPath, conf)
         .getFileMetaData
@@ -400,8 +481,8 @@ private[parquet] object ParquetTypesConverter extends Logging {
       convertFromString(keyValueMetadata.get(RowReadSupport.SPARK_METADATA_KEY))
     } else {
       val attributes = convertToAttributes(
-        readMetaData(origPath, conf).getFileMetaData.getSchema)
-      log.warn(s"Falling back to schema conversion from Parquet types; result: $attributes")
+        readMetaData(origPath, conf).getFileMetaData.getSchema, isBinaryAsString)
+      log.info(s"Falling back to schema conversion from Parquet types; result: $attributes")
       attributes
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
new file mode 100644
index 0000000000000..954e86822de17
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.Row
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.expressions
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SparkPlan
+
+/**
+ * A Strategy for planning scans over data sources defined using the sources API.
+ */
+private[sql] object DataSourceStrategy extends Strategy {
+  def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: PrunedFilteredScan)) =>
+      pruneFilterProject(
+        l,
+        projectList,
+        filters,
+        (a, f) => t.buildScan(a, f)) :: Nil
+
+    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: PrunedScan)) =>
+      pruneFilterProject(
+        l,
+        projectList,
+        filters,
+        (a, _) => t.buildScan(a)) :: Nil
+
+    case l @ LogicalRelation(t: TableScan) =>
+      execution.PhysicalRDD(l.output, t.buildScan()) :: Nil
+
+    case _ => Nil
+  }
+
+  protected def pruneFilterProject(
+    relation: LogicalRelation,
+    projectList: Seq[NamedExpression],
+    filterPredicates: Seq[Expression],
+    scanBuilder: (Array[String], Array[Filter]) => RDD[Row]) = {
+
+    val projectSet = AttributeSet(projectList.flatMap(_.references))
+    val filterSet = AttributeSet(filterPredicates.flatMap(_.references))
+    val filterCondition = filterPredicates.reduceLeftOption(And)
+
+    val pushedFilters = selectFilters(filterPredicates.map { _ transform {
+      case a: AttributeReference => relation.attributeMap(a) // Match original case of attributes.
+    }}).toArray
+
+    if (projectList.map(_.toAttribute) == projectList &&
+        projectSet.size == projectList.size &&
+        filterSet.subsetOf(projectSet)) {
+      // When it is possible to just use column pruning to get the right projection and
+      // when the columns of this projection are enough to evaluate all filter conditions,
+      // just do a scan followed by a filter, with no extra project.
+      val requestedColumns =
+        projectList.asInstanceOf[Seq[Attribute]] // Safe due to if above.
+          .map(relation.attributeMap)            // Match original case of attributes.
+          .map(_.name)
+          .toArray
+
+      val scan =
+        execution.PhysicalRDD(
+          projectList.map(_.toAttribute),
+          scanBuilder(requestedColumns, pushedFilters))
+      filterCondition.map(execution.Filter(_, scan)).getOrElse(scan)
+    } else {
+      val requestedColumns = (projectSet ++ filterSet).map(relation.attributeMap).toSeq
+      val columnNames = requestedColumns.map(_.name).toArray
+
+      val scan = execution.PhysicalRDD(requestedColumns, scanBuilder(columnNames, pushedFilters))
+      execution.Project(projectList, filterCondition.map(execution.Filter(_, scan)).getOrElse(scan))
+    }
+  }
+
+  protected def selectFilters(filters: Seq[Expression]): Seq[Filter] = filters.collect {
+    case expressions.EqualTo(a: Attribute, Literal(v, _)) => EqualTo(a.name, v)
+    case expressions.EqualTo(Literal(v, _), a: Attribute) => EqualTo(a.name, v)
+
+    case expressions.GreaterThan(a: Attribute, Literal(v, _)) => GreaterThan(a.name, v)
+    case expressions.GreaterThan(Literal(v, _), a: Attribute) => LessThan(a.name, v)
+
+    case expressions.LessThan(a: Attribute, Literal(v, _)) => LessThan(a.name, v)
+    case expressions.LessThan(Literal(v, _), a: Attribute) => GreaterThan(a.name, v)
+
+    case expressions.GreaterThanOrEqual(a: Attribute, Literal(v, _)) =>
+      GreaterThanOrEqual(a.name, v)
+    case expressions.GreaterThanOrEqual(Literal(v, _), a: Attribute) =>
+      LessThanOrEqual(a.name, v)
+
+    case expressions.LessThanOrEqual(a: Attribute, Literal(v, _)) => LessThanOrEqual(a.name, v)
+    case expressions.LessThanOrEqual(Literal(v, _), a: Attribute) => GreaterThanOrEqual(a.name, v)
+
+    case expressions.InSet(a: Attribute, set) => In(a.name, set.toArray)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/LogicalRelation.scala
new file mode 100644
index 0000000000000..4d87f6817dcb9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/LogicalRelation.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.sources
+
+import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
+import org.apache.spark.sql.catalyst.expressions.AttributeMap
+import org.apache.spark.sql.catalyst.plans.logical.{Statistics, LeafNode, LogicalPlan}
+
+/**
+ * Used to link a [[BaseRelation]] in to a logical query plan.
+ */
+private[sql] case class LogicalRelation(relation: BaseRelation)
+  extends LeafNode
+  with MultiInstanceRelation {
+
+  override val output = relation.schema.toAttributes
+
+  // Logical Relations are distinct if they have different output for the sake of transformations.
+  override def equals(other: Any) = other match {
+    case l @ LogicalRelation(otherRelation) => relation == otherRelation && output == l.output
+    case  _ => false
+  }
+
+  override def sameResult(otherPlan: LogicalPlan) = otherPlan match {
+    case LogicalRelation(otherRelation) => relation == otherRelation
+    case _ => false
+  }
+
+  @transient override lazy val statistics = Statistics(
+    sizeInBytes = BigInt(relation.sizeInBytes)
+  )
+
+  /** Used to lookup original attribute capitalization */
+  val attributeMap = AttributeMap(output.map(o => (o, o)))
+
+  def newInstance() = LogicalRelation(relation).asInstanceOf[this.type]
+
+  override def simpleString = s"Relation[${output.mkString(",")}] $relation"
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
new file mode 100644
index 0000000000000..9168ca2fc6fec
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.execution.RunnableCommand
+import org.apache.spark.util.Utils
+
+import scala.language.implicitConversions
+import scala.util.parsing.combinator.lexical.StdLexical
+import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+import scala.util.parsing.combinator.PackratParsers
+
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.SqlLexical
+
+/**
+ * A parser for foreign DDL commands.
+ */
+private[sql] class DDLParser extends StandardTokenParsers with PackratParsers with Logging {
+
+  def apply(input: String): Option[LogicalPlan] = {
+    phrase(ddl)(new lexical.Scanner(input)) match {
+      case Success(r, x) => Some(r)
+      case x =>
+        logDebug(s"Not recognized as DDL: $x")
+        None
+    }
+  }
+
+  protected case class Keyword(str: String)
+
+  protected implicit def asParser(k: Keyword): Parser[String] =
+    lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
+
+  protected val CREATE = Keyword("CREATE")
+  protected val TEMPORARY = Keyword("TEMPORARY")
+  protected val TABLE = Keyword("TABLE")
+  protected val USING = Keyword("USING")
+  protected val OPTIONS = Keyword("OPTIONS")
+
+  // Use reflection to find the reserved words defined in this class.
+  protected val reservedWords =
+    this.getClass
+      .getMethods
+      .filter(_.getReturnType == classOf[Keyword])
+      .map(_.invoke(this).asInstanceOf[Keyword].str)
+
+  override val lexical = new SqlLexical(reservedWords)
+
+  protected lazy val ddl: Parser[LogicalPlan] = createTable
+
+  /**
+   * CREATE FOREIGN TEMPORARY TABLE avroTable
+   * USING org.apache.spark.sql.avro
+   * OPTIONS (path "../hive/src/test/resources/data/files/episodes.avro")
+   */
+  protected lazy val createTable: Parser[LogicalPlan] =
+    CREATE ~ TEMPORARY ~ TABLE ~> ident ~ (USING ~> className) ~ (OPTIONS ~> options) ^^ {
+      case tableName ~ provider ~ opts =>
+        CreateTableUsing(tableName, provider, opts)
+    }
+
+  protected lazy val options: Parser[Map[String, String]] =
+    "(" ~> repsep(pair, ",") <~ ")" ^^ { case s: Seq[(String, String)] => s.toMap }
+
+  protected lazy val className: Parser[String] = repsep(ident, ".") ^^ { case s => s.mkString(".")}
+
+  protected lazy val pair: Parser[(String, String)] = ident ~ stringLit ^^ { case k ~ v => (k,v) }
+}
+
+private[sql] case class CreateTableUsing(
+    tableName: String,
+    provider: String,
+    options: Map[String, String]) extends RunnableCommand {
+
+  def run(sqlContext: SQLContext) = {
+    val loader = Utils.getContextOrSparkClassLoader
+    val clazz: Class[_] = try loader.loadClass(provider) catch {
+      case cnf: java.lang.ClassNotFoundException =>
+        try loader.loadClass(provider + ".DefaultSource") catch {
+          case cnf: java.lang.ClassNotFoundException =>
+            sys.error(s"Failed to load class for data source: $provider")
+        }
+    }
+    val dataSource = clazz.newInstance().asInstanceOf[org.apache.spark.sql.sources.RelationProvider]
+    val relation = dataSource.createRelation(sqlContext, options)
+
+    sqlContext.baseRelationToSchemaRDD(relation).registerTempTable(tableName)
+    Seq.empty
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
new file mode 100644
index 0000000000000..4a9fefc12b9ad
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+abstract class Filter
+
+case class EqualTo(attribute: String, value: Any) extends Filter
+case class GreaterThan(attribute: String, value: Any) extends Filter
+case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter
+case class LessThan(attribute: String, value: Any) extends Filter
+case class LessThanOrEqual(attribute: String, value: Any) extends Filter
+case class In(attribute: String, values: Array[Any]) extends Filter
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
new file mode 100644
index 0000000000000..861638b1e99b6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -0,0 +1,95 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.spark.sql.sources
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{SQLConf, Row, SQLContext, StructType}
+import org.apache.spark.sql.catalyst.expressions.{Expression, Attribute}
+
+/**
+ * Implemented by objects that produce relations for a specific kind of data source.  When
+ * Spark SQL is given a DDL operation with a USING clause specified, this interface is used to
+ * pass in the parameters specified by a user.
+ *
+ * Users may specify the fully qualified class name of a given data source.  When that class is
+ * not found Spark SQL will append the class name `DefaultSource` to the path, allowing for
+ * less verbose invocation.  For example, 'org.apache.spark.sql.json' would resolve to the
+ * data source 'org.apache.spark.sql.json.DefaultSource'
+ *
+ * A new instance of this class with be instantiated each time a DDL call is made.
+ */
+@DeveloperApi
+trait RelationProvider {
+  /** Returns a new base relation with the given parameters. */
+  def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation
+}
+
+/**
+ * Represents a collection of tuples with a known schema.  Classes that extend BaseRelation must
+ * be able to produce the schema of their data in the form of a [[StructType]]  Concrete
+ * implementation should inherit from one of the descendant `Scan` classes, which define various
+ * abstract methods for execution.
+ *
+ * BaseRelations must also define a equality function that only returns true when the two
+ * instances will return the same data.  This equality function is used when determining when
+ * it is safe to substitute cached results for a given relation.
+ */
+@DeveloperApi
+abstract class BaseRelation {
+  def sqlContext: SQLContext
+  def schema: StructType
+
+  /**
+   * Returns an estimated size of this relation in bytes.  This information is used by the planner
+   * to decided when it is safe to broadcast a relation and can be overridden by sources that
+   * know the size ahead of time. By default, the system will assume that tables are too
+   * large to broadcast.  This method will be called multiple times during query planning
+   * and thus should not perform expensive operations for each invocation.
+   */
+  def sizeInBytes = sqlContext.defaultSizeInBytes
+}
+
+/**
+ * A BaseRelation that can produce all of its tuples as an RDD of Row objects.
+ */
+@DeveloperApi
+abstract class TableScan extends BaseRelation {
+  def buildScan(): RDD[Row]
+}
+
+/**
+ * A BaseRelation that can eliminate unneeded columns before producing an RDD
+ * containing all of its tuples as Row objects.
+ */
+@DeveloperApi
+abstract class PrunedScan extends BaseRelation {
+  def buildScan(requiredColumns: Array[String]): RDD[Row]
+}
+
+/**
+ * A BaseRelation that can eliminate unneeded columns and filter using selected
+ * predicates before producing an RDD containing all matching tuples as Row objects.
+ *
+ * The pushed down filters are currently purely an optimization as they will all be evaluated
+ * again.  This means it is safe to use them with methods that produce false positives such
+ * as filtering partitions based on a bloom filter.
+ */
+@DeveloperApi
+abstract class PrunedFilteredScan extends BaseRelation {
+  def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/package.scala
new file mode 100644
index 0000000000000..8393c510f4f6d
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/package.scala
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+/**
+ * A set of APIs for adding data sources to Spark SQL.
+ */
+package object sources
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
new file mode 100644
index 0000000000000..b9569e96c0312
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.test
+
+import java.util
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.catalyst.annotation.SQLUserDefinedType
+import org.apache.spark.sql.catalyst.types._
+
+/**
+ * An example class to demonstrate UDT in Scala, Java, and Python.
+ * @param x x coordinate
+ * @param y y coordinate
+ */
+@SQLUserDefinedType(udt = classOf[ExamplePointUDT])
+private[sql] class ExamplePoint(val x: Double, val y: Double)
+
+/**
+ * User-defined type for [[ExamplePoint]].
+ */
+private[sql] class ExamplePointUDT extends UserDefinedType[ExamplePoint] {
+
+  override def sqlType: DataType = ArrayType(DoubleType, false)
+
+  override def pyUDT: String = "pyspark.tests.ExamplePointUDT"
+
+  override def serialize(obj: Any): Seq[Double] = {
+    obj match {
+      case p: ExamplePoint =>
+        Seq(p.x, p.y)
+    }
+  }
+
+  override def deserialize(datum: Any): ExamplePoint = {
+    datum match {
+      case values: Seq[_] =>
+        val xy = values.asInstanceOf[Seq[Double]]
+        assert(xy.length == 2)
+        new ExamplePoint(xy(0), xy(1))
+      case values: util.ArrayList[_] =>
+        val xy = values.asInstanceOf[util.ArrayList[Double]].asScala
+        new ExamplePoint(xy(0), xy(1))
+    }
+  }
+
+  override def userClass: Class[ExamplePoint] = classOf[ExamplePoint]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala
index f2389f8f0591e..6bb81c76ed8bd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -18,8 +18,17 @@
 package org.apache.spark.sql.test
 
 import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.{SQLConf, SQLContext}
 
 /** A SQLContext that can be used for local testing. */
 object TestSQLContext
-  extends SQLContext(new SparkContext("local", "TestSQLContext", new SparkConf()))
+  extends SQLContext(
+    new SparkContext(
+      "local[2]",
+      "TestSQLContext",
+      new SparkConf().set("spark.sql.testkey", "true"))) {
+
+  /** Fewer partitions to speed up testing. */
+  override private[spark] def numShufflePartitions: Int =
+    getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala b/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala
new file mode 100644
index 0000000000000..d4258156f18f6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types.util
+
+import java.text.SimpleDateFormat
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.api.java.{DataType => JDataType, StructField => JStructField,
+  MetadataBuilder => JMetaDataBuilder, UDTWrappers, JavaToScalaUDTWrapper}
+import org.apache.spark.sql.api.java.{DecimalType => JDecimalType}
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.types.UserDefinedType
+
+protected[sql] object DataTypeConversions {
+
+  /**
+   * Returns the equivalent StructField in Scala for the given StructField in Java.
+   */
+  def asJavaStructField(scalaStructField: StructField): JStructField = {
+    JDataType.createStructField(
+      scalaStructField.name,
+      asJavaDataType(scalaStructField.dataType),
+      scalaStructField.nullable,
+      (new JMetaDataBuilder).withMetadata(scalaStructField.metadata).build())
+  }
+
+  /**
+   * Returns the equivalent DataType in Java for the given DataType in Scala.
+   */
+  def asJavaDataType(scalaDataType: DataType): JDataType = scalaDataType match {
+    case udtType: UserDefinedType[_] =>
+      UDTWrappers.wrapAsJava(udtType)
+
+    case StringType => JDataType.StringType
+    case BinaryType => JDataType.BinaryType
+    case BooleanType => JDataType.BooleanType
+    case DateType => JDataType.DateType
+    case TimestampType => JDataType.TimestampType
+    case DecimalType.Fixed(precision, scale) => new JDecimalType(precision, scale)
+    case DecimalType.Unlimited => new JDecimalType()
+    case DoubleType => JDataType.DoubleType
+    case FloatType => JDataType.FloatType
+    case ByteType => JDataType.ByteType
+    case IntegerType => JDataType.IntegerType
+    case LongType => JDataType.LongType
+    case ShortType => JDataType.ShortType
+
+    case arrayType: ArrayType => JDataType.createArrayType(
+        asJavaDataType(arrayType.elementType), arrayType.containsNull)
+    case mapType: MapType => JDataType.createMapType(
+        asJavaDataType(mapType.keyType),
+        asJavaDataType(mapType.valueType),
+        mapType.valueContainsNull)
+    case structType: StructType => JDataType.createStructType(
+        structType.fields.map(asJavaStructField).asJava)
+  }
+
+  /**
+   * Returns the equivalent StructField in Scala for the given StructField in Java.
+   */
+  def asScalaStructField(javaStructField: JStructField): StructField = {
+    StructField(
+      javaStructField.getName,
+      asScalaDataType(javaStructField.getDataType),
+      javaStructField.isNullable,
+      javaStructField.getMetadata)
+  }
+
+  /**
+   * Returns the equivalent DataType in Scala for the given DataType in Java.
+   */
+  def asScalaDataType(javaDataType: JDataType): DataType = javaDataType match {
+    case udtType: org.apache.spark.sql.api.java.UserDefinedType[_] =>
+      UDTWrappers.wrapAsScala(udtType)
+
+    case stringType: org.apache.spark.sql.api.java.StringType =>
+      StringType
+    case binaryType: org.apache.spark.sql.api.java.BinaryType =>
+      BinaryType
+    case booleanType: org.apache.spark.sql.api.java.BooleanType =>
+      BooleanType
+    case dateType: org.apache.spark.sql.api.java.DateType =>
+      DateType
+    case timestampType: org.apache.spark.sql.api.java.TimestampType =>
+      TimestampType
+    case decimalType: org.apache.spark.sql.api.java.DecimalType =>
+      if (decimalType.isFixed) {
+        DecimalType(decimalType.getPrecision, decimalType.getScale)
+      } else {
+        DecimalType.Unlimited
+      }
+    case doubleType: org.apache.spark.sql.api.java.DoubleType =>
+      DoubleType
+    case floatType: org.apache.spark.sql.api.java.FloatType =>
+      FloatType
+    case byteType: org.apache.spark.sql.api.java.ByteType =>
+      ByteType
+    case integerType: org.apache.spark.sql.api.java.IntegerType =>
+      IntegerType
+    case longType: org.apache.spark.sql.api.java.LongType =>
+      LongType
+    case shortType: org.apache.spark.sql.api.java.ShortType =>
+      ShortType
+
+    case arrayType: org.apache.spark.sql.api.java.ArrayType =>
+      ArrayType(asScalaDataType(arrayType.getElementType), arrayType.isContainsNull)
+    case mapType: org.apache.spark.sql.api.java.MapType =>
+      MapType(
+        asScalaDataType(mapType.getKeyType),
+        asScalaDataType(mapType.getValueType),
+        mapType.isValueContainsNull)
+    case structType: org.apache.spark.sql.api.java.StructType =>
+      StructType(structType.getFields.map(asScalaStructField))
+  }
+
+  def stringToTime(s: String): java.util.Date = {
+    if (!s.contains('T')) {
+      // JDBC escape string
+      if (s.contains(' ')) {
+        java.sql.Timestamp.valueOf(s)
+      } else {
+        java.sql.Date.valueOf(s)
+      }
+    } else if (s.endsWith("Z")) {
+      // this is zero timezone of ISO8601
+      stringToTime(s.substring(0, s.length - 1) + "GMT-00:00")
+    } else if (s.indexOf("GMT") == -1) {
+      // timezone with ISO8601
+      val inset = "+00.00".length
+      val s0 = s.substring(0, s.length - inset)
+      val s1 = s.substring(s.length - inset, s.length)
+      if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
+        stringToTime(s0 + "GMT" + s1)
+      } else {
+        stringToTime(s0 + ".0GMT" + s1)
+      }
+    } else {
+      // ISO8601 with GMT insert
+      val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" )
+      ISO8601GMT.parse(s)
+    }
+  }
+
+  /** Converts Java objects to catalyst rows / types */
+  def convertJavaToCatalyst(a: Any, dataType: DataType): Any = (a, dataType) match {
+    case (obj, udt: UserDefinedType[_]) => ScalaReflection.convertToCatalyst(obj, udt) // Scala type
+    case (d: java.math.BigDecimal, _) => Decimal(BigDecimal(d))
+    case (other, _) => other
+  }
+
+  /** Converts Java objects to catalyst rows / types */
+  def convertCatalystToJava(a: Any): Any = a match {
+    case d: scala.math.BigDecimal => d.underlying()
+    case other => other
+  }
+}
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaAPISuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaAPISuite.java
new file mode 100644
index 0000000000000..a9a11285def54
--- /dev/null
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaAPISuite.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+import org.apache.spark.sql.api.java.UDF1;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runners.Suite;
+import org.junit.runner.RunWith;
+
+import org.apache.spark.api.java.JavaSparkContext;
+
+// The test suite itself is Serializable so that anonymous Function implementations can be
+// serialized, as an alternative to converting these anonymous classes to static inner classes;
+// see http://stackoverflow.com/questions/758570/.
+public class JavaAPISuite implements Serializable {
+  private transient JavaSparkContext sc;
+  private transient JavaSQLContext sqlContext;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaAPISuite");
+    sqlContext = new JavaSQLContext(sc);
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void udf1Test() {
+    // With Java 8 lambdas:
+    // sqlContext.registerFunction(
+    //   "stringLengthTest", (String str) -> str.length(), DataType.IntegerType);
+
+    sqlContext.registerFunction("stringLengthTest", new UDF1<String, Integer>() {
+      @Override
+      public Integer call(String str) throws Exception {
+        return str.length();
+      }
+    }, DataType.IntegerType);
+
+    // TODO: Why do we need this cast?
+    Row result = (Row) sqlContext.sql("SELECT stringLengthTest('test')").first();
+    assert(result.getInt(0) == 4);
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void udf2Test() {
+    // With Java 8 lambdas:
+    // sqlContext.registerFunction(
+    //   "stringLengthTest",
+    //   (String str1, String str2) -> str1.length() + str2.length,
+    //   DataType.IntegerType);
+
+    sqlContext.registerFunction("stringLengthTest", new UDF2<String, String, Integer>() {
+      @Override
+      public Integer call(String str1, String str2) throws Exception {
+        return str1.length() + str2.length();
+      }
+    }, DataType.IntegerType);
+
+    // TODO: Why do we need this cast?
+    Row result = (Row) sqlContext.sql("SELECT stringLengthTest('test', 'test2')").first();
+    assert(result.getInt(0) == 9);
+  }
+}
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
new file mode 100644
index 0000000000000..a04b8060cd658
--- /dev/null
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+// The test suite itself is Serializable so that anonymous Function implementations can be
+// serialized, as an alternative to converting these anonymous classes to static inner classes;
+// see http://stackoverflow.com/questions/758570/.
+public class JavaApplySchemaSuite implements Serializable {
+  private transient JavaSparkContext javaCtx;
+  private transient JavaSQLContext javaSqlCtx;
+
+  @Before
+  public void setUp() {
+    javaCtx = new JavaSparkContext("local", "JavaApplySchemaSuite");
+    javaSqlCtx = new JavaSQLContext(javaCtx);
+  }
+
+  @After
+  public void tearDown() {
+    javaCtx.stop();
+    javaCtx = null;
+    javaSqlCtx = null;
+  }
+
+  public static class Person implements Serializable {
+    private String name;
+    private int age;
+
+    public String getName() {
+      return name;
+    }
+
+    public void setName(String name) {
+      this.name = name;
+    }
+
+    public int getAge() {
+      return age;
+    }
+
+    public void setAge(int age) {
+      this.age = age;
+    }
+  }
+
+  @Test
+  public void applySchema() {
+    List<Person> personList = new ArrayList<Person>(2);
+    Person person1 = new Person();
+    person1.setName("Michael");
+    person1.setAge(29);
+    personList.add(person1);
+    Person person2 = new Person();
+    person2.setName("Yin");
+    person2.setAge(28);
+    personList.add(person2);
+
+    JavaRDD<Row> rowRDD = javaCtx.parallelize(personList).map(
+      new Function<Person, Row>() {
+        public Row call(Person person) throws Exception {
+          return Row.create(person.getName(), person.getAge());
+        }
+      });
+
+    List<StructField> fields = new ArrayList<StructField>(2);
+    fields.add(DataType.createStructField("name", DataType.StringType, false));
+    fields.add(DataType.createStructField("age", DataType.IntegerType, false));
+    StructType schema = DataType.createStructType(fields);
+
+    JavaSchemaRDD schemaRDD = javaSqlCtx.applySchema(rowRDD, schema);
+    schemaRDD.registerTempTable("people");
+    List<Row> actual = javaSqlCtx.sql("SELECT * FROM people").collect();
+
+    List<Row> expected = new ArrayList<Row>(2);
+    expected.add(Row.create("Michael", 29));
+    expected.add(Row.create("Yin", 28));
+
+    Assert.assertEquals(expected, actual);
+  }
+
+  @Test
+  public void applySchemaToJSON() {
+    JavaRDD<String> jsonRDD = javaCtx.parallelize(Arrays.asList(
+      "{\"string\":\"this is a simple string.\", \"integer\":10, \"long\":21474836470, " +
+        "\"bigInteger\":92233720368547758070, \"double\":1.7976931348623157E308, " +
+        "\"boolean\":true, \"null\":null}",
+      "{\"string\":\"this is another simple string.\", \"integer\":11, \"long\":21474836469, " +
+        "\"bigInteger\":92233720368547758069, \"double\":1.7976931348623157E305, " +
+        "\"boolean\":false, \"null\":null}"));
+    List<StructField> fields = new ArrayList<StructField>(7);
+    fields.add(DataType.createStructField("bigInteger", new DecimalType(), true));
+    fields.add(DataType.createStructField("boolean", DataType.BooleanType, true));
+    fields.add(DataType.createStructField("double", DataType.DoubleType, true));
+    fields.add(DataType.createStructField("integer", DataType.IntegerType, true));
+    fields.add(DataType.createStructField("long", DataType.LongType, true));
+    fields.add(DataType.createStructField("null", DataType.StringType, true));
+    fields.add(DataType.createStructField("string", DataType.StringType, true));
+    StructType expectedSchema = DataType.createStructType(fields);
+    List<Row> expectedResult = new ArrayList<Row>(2);
+    expectedResult.add(
+      Row.create(
+        new BigDecimal("92233720368547758070"),
+        true,
+        1.7976931348623157E308,
+        10,
+        21474836470L,
+        null,
+        "this is a simple string."));
+    expectedResult.add(
+      Row.create(
+        new BigDecimal("92233720368547758069"),
+        false,
+        1.7976931348623157E305,
+        11,
+        21474836469L,
+        null,
+        "this is another simple string."));
+
+    JavaSchemaRDD schemaRDD1 = javaSqlCtx.jsonRDD(jsonRDD);
+    StructType actualSchema1 = schemaRDD1.schema();
+    Assert.assertEquals(expectedSchema, actualSchema1);
+    schemaRDD1.registerTempTable("jsonTable1");
+    List<Row> actual1 = javaSqlCtx.sql("select * from jsonTable1").collect();
+    Assert.assertEquals(expectedResult, actual1);
+
+    JavaSchemaRDD schemaRDD2 = javaSqlCtx.jsonRDD(jsonRDD, expectedSchema);
+    StructType actualSchema2 = schemaRDD2.schema();
+    Assert.assertEquals(expectedSchema, actualSchema2);
+    schemaRDD2.registerTempTable("jsonTable2");
+    List<Row> actual2 = javaSqlCtx.sql("select * from jsonTable2").collect();
+    Assert.assertEquals(expectedResult, actual2);
+  }
+}
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java
new file mode 100644
index 0000000000000..bc5cd66482add
--- /dev/null
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.math.BigDecimal;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class JavaRowSuite {
+  private byte byteValue;
+  private short shortValue;
+  private int intValue;
+  private long longValue;
+  private float floatValue;
+  private double doubleValue;
+  private BigDecimal decimalValue;
+  private boolean booleanValue;
+  private String stringValue;
+  private byte[] binaryValue;
+  private Date dateValue;
+  private Timestamp timestampValue;
+
+  @Before
+  public void setUp() {
+    byteValue = (byte)127;
+    shortValue = (short)32767;
+    intValue = 2147483647;
+    longValue = 9223372036854775807L;
+    floatValue = (float)3.4028235E38;
+    doubleValue = 1.7976931348623157E308;
+    decimalValue = new BigDecimal("1.7976931348623157E328");
+    booleanValue = true;
+    stringValue = "this is a string";
+    binaryValue = stringValue.getBytes();
+    dateValue = Date.valueOf("2014-06-30");
+    timestampValue = Timestamp.valueOf("2014-06-30 09:20:00.0");
+  }
+
+  @Test
+  public void constructSimpleRow() {
+    Row simpleRow = Row.create(
+      byteValue,                 // ByteType
+      new Byte(byteValue),
+      shortValue,                // ShortType
+      new Short(shortValue),
+      intValue,                  // IntegerType
+      new Integer(intValue),
+      longValue,                 // LongType
+      new Long(longValue),
+      floatValue,                // FloatType
+      new Float(floatValue),
+      doubleValue,               // DoubleType
+      new Double(doubleValue),
+      decimalValue,              // DecimalType
+      booleanValue,              // BooleanType
+      new Boolean(booleanValue),
+      stringValue,               // StringType
+      binaryValue,               // BinaryType
+      dateValue,                 // DateType
+      timestampValue,            // TimestampType
+      null                       // null
+    );
+
+    Assert.assertEquals(byteValue, simpleRow.getByte(0));
+    Assert.assertEquals(byteValue, simpleRow.get(0));
+    Assert.assertEquals(byteValue, simpleRow.getByte(1));
+    Assert.assertEquals(byteValue, simpleRow.get(1));
+    Assert.assertEquals(shortValue, simpleRow.getShort(2));
+    Assert.assertEquals(shortValue, simpleRow.get(2));
+    Assert.assertEquals(shortValue, simpleRow.getShort(3));
+    Assert.assertEquals(shortValue, simpleRow.get(3));
+    Assert.assertEquals(intValue, simpleRow.getInt(4));
+    Assert.assertEquals(intValue, simpleRow.get(4));
+    Assert.assertEquals(intValue, simpleRow.getInt(5));
+    Assert.assertEquals(intValue, simpleRow.get(5));
+    Assert.assertEquals(longValue, simpleRow.getLong(6));
+    Assert.assertEquals(longValue, simpleRow.get(6));
+    Assert.assertEquals(longValue, simpleRow.getLong(7));
+    Assert.assertEquals(longValue, simpleRow.get(7));
+    // When we create the row, we do not do any conversion
+    // for a float/double value, so we just set the delta to 0.
+    Assert.assertEquals(floatValue, simpleRow.getFloat(8), 0);
+    Assert.assertEquals(floatValue, simpleRow.get(8));
+    Assert.assertEquals(floatValue, simpleRow.getFloat(9), 0);
+    Assert.assertEquals(floatValue, simpleRow.get(9));
+    Assert.assertEquals(doubleValue, simpleRow.getDouble(10), 0);
+    Assert.assertEquals(doubleValue, simpleRow.get(10));
+    Assert.assertEquals(doubleValue, simpleRow.getDouble(11), 0);
+    Assert.assertEquals(doubleValue, simpleRow.get(11));
+    Assert.assertEquals(decimalValue, simpleRow.get(12));
+    Assert.assertEquals(booleanValue, simpleRow.getBoolean(13));
+    Assert.assertEquals(booleanValue, simpleRow.get(13));
+    Assert.assertEquals(booleanValue, simpleRow.getBoolean(14));
+    Assert.assertEquals(booleanValue, simpleRow.get(14));
+    Assert.assertEquals(stringValue, simpleRow.getString(15));
+    Assert.assertEquals(stringValue, simpleRow.get(15));
+    Assert.assertEquals(binaryValue, simpleRow.get(16));
+    Assert.assertEquals(dateValue, simpleRow.get(17));
+    Assert.assertEquals(timestampValue, simpleRow.get(18));
+    Assert.assertEquals(true, simpleRow.isNullAt(19));
+    Assert.assertEquals(null, simpleRow.get(19));
+  }
+
+  @Test
+  public void constructComplexRow() {
+    // Simple array
+    List<String> simpleStringArray = Arrays.asList(
+      stringValue + " (1)", stringValue + " (2)", stringValue + "(3)");
+
+    // Simple map
+    Map<String, Long> simpleMap = new HashMap<String, Long>();
+    simpleMap.put(stringValue + " (1)", longValue);
+    simpleMap.put(stringValue + " (2)", longValue - 1);
+    simpleMap.put(stringValue + " (3)", longValue - 2);
+
+    // Simple struct
+    Row simpleStruct = Row.create(
+      doubleValue, stringValue, timestampValue, null);
+
+    // Complex array
+    List<Map<String, Long>> arrayOfMaps = Arrays.asList(simpleMap);
+    List<Row> arrayOfRows = Arrays.asList(simpleStruct);
+
+    // Complex map
+    Map<List<Row>, Row> complexMap = new HashMap<List<Row>, Row>();
+    complexMap.put(arrayOfRows, simpleStruct);
+
+    // Complex struct
+    Row complexStruct = Row.create(
+      simpleStringArray,
+      simpleMap,
+      simpleStruct,
+      arrayOfMaps,
+      arrayOfRows,
+      complexMap,
+      null);
+    Assert.assertEquals(simpleStringArray, complexStruct.get(0));
+    Assert.assertEquals(simpleMap, complexStruct.get(1));
+    Assert.assertEquals(simpleStruct, complexStruct.get(2));
+    Assert.assertEquals(arrayOfMaps, complexStruct.get(3));
+    Assert.assertEquals(arrayOfRows, complexStruct.get(4));
+    Assert.assertEquals(complexMap, complexStruct.get(5));
+    Assert.assertEquals(null, complexStruct.get(6));
+
+    // A very complex row
+    Row complexRow = Row.create(arrayOfMaps, arrayOfRows, complexMap, complexStruct);
+    Assert.assertEquals(arrayOfMaps, complexRow.get(0));
+    Assert.assertEquals(arrayOfRows, complexRow.get(1));
+    Assert.assertEquals(complexMap, complexRow.get(2));
+    Assert.assertEquals(complexStruct, complexRow.get(3));
+  }
+}
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java
new file mode 100644
index 0000000000000..8396a29c61c4c
--- /dev/null
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.util.List;
+import java.util.ArrayList;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.spark.sql.types.util.DataTypeConversions;
+
+public class JavaSideDataTypeConversionSuite {
+  public void checkDataType(DataType javaDataType) {
+    org.apache.spark.sql.catalyst.types.DataType scalaDataType =
+      DataTypeConversions.asScalaDataType(javaDataType);
+    DataType actual = DataTypeConversions.asJavaDataType(scalaDataType);
+    Assert.assertEquals(javaDataType, actual);
+  }
+
+  @Test
+  public void createDataTypes() {
+    // Simple DataTypes.
+    checkDataType(DataType.StringType);
+    checkDataType(DataType.BinaryType);
+    checkDataType(DataType.BooleanType);
+    checkDataType(DataType.DateType);
+    checkDataType(DataType.TimestampType);
+    checkDataType(new DecimalType());
+    checkDataType(new DecimalType(10, 4));
+    checkDataType(DataType.DoubleType);
+    checkDataType(DataType.FloatType);
+    checkDataType(DataType.ByteType);
+    checkDataType(DataType.IntegerType);
+    checkDataType(DataType.LongType);
+    checkDataType(DataType.ShortType);
+
+    // Simple ArrayType.
+    DataType simpleJavaArrayType = DataType.createArrayType(DataType.StringType, true);
+    checkDataType(simpleJavaArrayType);
+
+    // Simple MapType.
+    DataType simpleJavaMapType = DataType.createMapType(DataType.StringType, DataType.LongType);
+    checkDataType(simpleJavaMapType);
+
+    // Simple StructType.
+    List<StructField> simpleFields = new ArrayList<StructField>();
+    simpleFields.add(DataType.createStructField("a", new DecimalType(), false));
+    simpleFields.add(DataType.createStructField("b", DataType.BooleanType, true));
+    simpleFields.add(DataType.createStructField("c", DataType.LongType, true));
+    simpleFields.add(DataType.createStructField("d", DataType.BinaryType, false));
+    DataType simpleJavaStructType = DataType.createStructType(simpleFields);
+    checkDataType(simpleJavaStructType);
+
+    // Complex StructType.
+    List<StructField> complexFields = new ArrayList<StructField>();
+    complexFields.add(DataType.createStructField("simpleArray", simpleJavaArrayType, true));
+    complexFields.add(DataType.createStructField("simpleMap", simpleJavaMapType, true));
+    complexFields.add(DataType.createStructField("simpleStruct", simpleJavaStructType, true));
+    complexFields.add(DataType.createStructField("boolean", DataType.BooleanType, false));
+    DataType complexJavaStructType = DataType.createStructType(complexFields);
+    checkDataType(complexJavaStructType);
+
+    // Complex ArrayType.
+    DataType complexJavaArrayType = DataType.createArrayType(complexJavaStructType, true);
+    checkDataType(complexJavaArrayType);
+
+    // Complex MapType.
+    DataType complexJavaMapType =
+      DataType.createMapType(complexJavaStructType, complexJavaArrayType, false);
+    checkDataType(complexJavaMapType);
+  }
+
+  @Test
+  public void illegalArgument() {
+    // ArrayType
+    try {
+      DataType.createArrayType(null, true);
+      Assert.fail();
+    } catch (IllegalArgumentException expectedException) {
+    }
+
+    // MapType
+    try {
+      DataType.createMapType(null, DataType.StringType);
+      Assert.fail();
+    } catch (IllegalArgumentException expectedException) {
+    }
+    try {
+      DataType.createMapType(DataType.StringType, null);
+      Assert.fail();
+    } catch (IllegalArgumentException expectedException) {
+    }
+    try {
+      DataType.createMapType(null, null);
+      Assert.fail();
+    } catch (IllegalArgumentException expectedException) {
+    }
+
+    // StructField
+    try {
+      DataType.createStructField(null, DataType.StringType, true);
+    } catch (IllegalArgumentException expectedException) {
+    }
+    try {
+      DataType.createStructField("name", null, true);
+    } catch (IllegalArgumentException expectedException) {
+    }
+    try {
+      DataType.createStructField(null, null, true);
+    } catch (IllegalArgumentException expectedException) {
+    }
+
+    // StructType
+    try {
+      List<StructField> simpleFields = new ArrayList<StructField>();
+      simpleFields.add(DataType.createStructField("a", new DecimalType(), false));
+      simpleFields.add(DataType.createStructField("b", DataType.BooleanType, true));
+      simpleFields.add(DataType.createStructField("c", DataType.LongType, true));
+      simpleFields.add(null);
+      DataType.createStructType(simpleFields);
+      Assert.fail();
+    } catch (IllegalArgumentException expectedException) {
+    }
+    try {
+      List<StructField> simpleFields = new ArrayList<StructField>();
+      simpleFields.add(DataType.createStructField("a", new DecimalType(), false));
+      simpleFields.add(DataType.createStructField("a", DataType.BooleanType, true));
+      simpleFields.add(DataType.createStructField("c", DataType.LongType, true));
+      DataType.createStructType(simpleFields);
+      Assert.fail();
+    } catch (IllegalArgumentException expectedException) {
+    }
+  }
+}
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaUserDefinedTypeSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaUserDefinedTypeSuite.java
new file mode 100644
index 0000000000000..0caa8219a63e9
--- /dev/null
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaUserDefinedTypeSuite.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+import java.util.*;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.MyDenseVector;
+import org.apache.spark.sql.MyLabeledPoint;
+
+public class JavaUserDefinedTypeSuite implements Serializable {
+  private transient JavaSparkContext javaCtx;
+  private transient JavaSQLContext javaSqlCtx;
+
+  @Before
+  public void setUp() {
+    javaCtx = new JavaSparkContext("local", "JavaUserDefinedTypeSuite");
+    javaSqlCtx = new JavaSQLContext(javaCtx);
+  }
+
+  @After
+  public void tearDown() {
+    javaCtx.stop();
+    javaCtx = null;
+    javaSqlCtx = null;
+  }
+
+  @Test
+  public void useScalaUDT() {
+    List<MyLabeledPoint> points = Arrays.asList(
+        new MyLabeledPoint(1.0, new MyDenseVector(new double[]{0.1, 1.0})),
+        new MyLabeledPoint(0.0, new MyDenseVector(new double[]{0.2, 2.0})));
+    JavaRDD<MyLabeledPoint> pointsRDD = javaCtx.parallelize(points);
+
+    JavaSchemaRDD schemaRDD = javaSqlCtx.applySchema(pointsRDD, MyLabeledPoint.class);
+    schemaRDD.registerTempTable("points");
+
+    List<Row> actualLabelRows = javaSqlCtx.sql("SELECT label FROM points").collect();
+    List<Double> actualLabels = new LinkedList<Double>();
+    for (Row r : actualLabelRows) {
+      actualLabels.add(r.getDouble(0));
+    }
+    for (MyLabeledPoint lp : points) {
+      Assert.assertTrue(actualLabels.contains(lp.label()));
+    }
+
+    List<Row> actualFeatureRows = javaSqlCtx.sql("SELECT features FROM points").collect();
+    List<MyDenseVector> actualFeatures = new LinkedList<MyDenseVector>();
+    for (Row r : actualFeatureRows) {
+      actualFeatures.add((MyDenseVector)r.get(0));
+    }
+    for (MyLabeledPoint lp : points) {
+      Assert.assertTrue(actualFeatures.contains(lp.features()));
+    }
+
+    List<Row> actual = javaSqlCtx.sql("SELECT label, features FROM points").collect();
+    List<MyLabeledPoint> actualPoints =
+        new LinkedList<MyLabeledPoint>();
+    for (Row r : actual) {
+      actualPoints.add(new MyLabeledPoint(r.getDouble(0), (MyDenseVector)r.get(1)));
+    }
+    for (MyLabeledPoint lp : points) {
+      Assert.assertTrue(actualPoints.contains(lp));
+    }
+  }
+}
diff --git a/sql/core/src/test/resources/log4j.properties b/sql/core/src/test/resources/log4j.properties
index dffd15a61838b..fbed0a782dd3e 100644
--- a/sql/core/src/test/resources/log4j.properties
+++ b/sql/core/src/test/resources/log4j.properties
@@ -30,12 +30,15 @@ log4j.appender.FA=org.apache.log4j.FileAppender
 log4j.appender.FA.append=false
 log4j.appender.FA.file=target/unit-tests.log
 log4j.appender.FA.layout=org.apache.log4j.PatternLayout
-log4j.appender.FA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.FA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Set the logger level of File Appender to WARN
 log4j.appender.FA.Threshold = INFO
 
 # Some packages are noisy for no good reason.
+log4j.additivity.parquet.hadoop.ParquetRecordReader=false
+log4j.logger.parquet.hadoop.ParquetRecordReader=OFF
+
 log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false
 log4j.logger.org.apache.hadoop.hive.serde2.lazy.LazyStruct=OFF
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index c3c0dcb1aa00b..765fa82776341 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -18,95 +18,237 @@
 package org.apache.spark.sql
 
 import org.apache.spark.sql.TestData._
-import org.apache.spark.sql.columnar.{InMemoryRelation, InMemoryColumnarTableScan}
-import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.columnar._
 import org.apache.spark.sql.test.TestSQLContext._
+import org.apache.spark.storage.{StorageLevel, RDDBlockId}
+
+case class BigData(s: String)
 
 class CachedTableSuite extends QueryTest {
   TestData // Load test tables.
 
+  def rddIdOf(tableName: String): Int = {
+    val executedPlan = table(tableName).queryExecution.executedPlan
+    executedPlan.collect {
+      case InMemoryColumnarTableScan(_, _, relation) =>
+        relation.cachedColumnBuffers.id
+      case _ =>
+        fail(s"Table $tableName is not cached\n" + executedPlan)
+    }.head
+  }
+
+  def isMaterialized(rddId: Int): Boolean = {
+    sparkContext.env.blockManager.get(RDDBlockId(rddId, 0)).nonEmpty
+  }
+
+  test("cache temp table") {
+    testData.select('key).registerTempTable("tempTable")
+    assertCached(sql("SELECT COUNT(*) FROM tempTable"), 0)
+    cacheTable("tempTable")
+    assertCached(sql("SELECT COUNT(*) FROM tempTable"))
+    uncacheTable("tempTable")
+  }
+
+  test("cache table as select") {
+    sql("CACHE TABLE tempTable AS SELECT key FROM testData")
+    assertCached(sql("SELECT COUNT(*) FROM tempTable"))
+    uncacheTable("tempTable")
+  }
+
+  test("uncaching temp table") {
+    testData.select('key).registerTempTable("tempTable1")
+    testData.select('key).registerTempTable("tempTable2")
+    cacheTable("tempTable1")
+
+    assertCached(sql("SELECT COUNT(*) FROM tempTable1"))
+    assertCached(sql("SELECT COUNT(*) FROM tempTable2"))
+
+    // Is this valid?
+    uncacheTable("tempTable2")
+
+    // Should this be cached?
+    assertCached(sql("SELECT COUNT(*) FROM tempTable1"), 0)
+  }
+
+  test("too big for memory") {
+    val data = "*" * 10000
+    sparkContext.parallelize(1 to 200000, 1).map(_ => BigData(data)).registerTempTable("bigData")
+    table("bigData").persist(StorageLevel.MEMORY_AND_DISK)
+    assert(table("bigData").count() === 200000L)
+    table("bigData").unpersist(blocking = true)
+  }
+
+  test("calling .cache() should use in-memory columnar caching") {
+    table("testData").cache()
+    assertCached(table("testData"))
+    table("testData").unpersist(blocking = true)
+  }
+
+  test("calling .unpersist() should drop in-memory columnar cache") {
+    table("testData").cache()
+    table("testData").count()
+    table("testData").unpersist(blocking = true)
+    assertCached(table("testData"), 0)
+  }
+
+  test("isCached") {
+    cacheTable("testData")
+
+    assertCached(table("testData"))
+    assert(table("testData").queryExecution.withCachedData match {
+      case _: InMemoryRelation => true
+      case _ => false
+    })
+
+    uncacheTable("testData")
+    assert(!isCached("testData"))
+    assert(table("testData").queryExecution.withCachedData match {
+      case _: InMemoryRelation => false
+      case _ => true
+    })
+  }
+
   test("SPARK-1669: cacheTable should be idempotent") {
     assume(!table("testData").logicalPlan.isInstanceOf[InMemoryRelation])
 
     cacheTable("testData")
-    table("testData").queryExecution.analyzed match {
-      case _: InMemoryRelation =>
-      case _ =>
-        fail("testData should be cached")
+    assertCached(table("testData"))
+
+    assertResult(1, "InMemoryRelation not found, testData should have been cached") {
+      table("testData").queryExecution.withCachedData.collect {
+        case r: InMemoryRelation => r
+      }.size
     }
 
     cacheTable("testData")
-    table("testData").queryExecution.analyzed match {
-      case InMemoryRelation(_, _, _: InMemoryColumnarTableScan) =>
-        fail("cacheTable is not idempotent")
-
-      case _ =>
+    assertResult(0, "Double InMemoryRelations found, cacheTable() is not idempotent") {
+      table("testData").queryExecution.withCachedData.collect {
+        case r @ InMemoryRelation(_, _, _, _, _: InMemoryColumnarTableScan) => r
+      }.size
     }
+
+    uncacheTable("testData")
   }
 
   test("read from cached table and uncache") {
-    TestSQLContext.cacheTable("testData")
-
-    checkAnswer(
-      TestSQLContext.table("testData"),
-      testData.collect().toSeq
-    )
-
-    TestSQLContext.table("testData").queryExecution.analyzed match {
-      case _ : InMemoryRelation => // Found evidence of caching
-      case noCache => fail(s"No cache node found in plan $noCache")
-    }
-
-    TestSQLContext.uncacheTable("testData")
+    cacheTable("testData")
+    checkAnswer(table("testData"), testData.collect().toSeq)
+    assertCached(table("testData"))
 
-    checkAnswer(
-      TestSQLContext.table("testData"),
-      testData.collect().toSeq
-    )
-
-    TestSQLContext.table("testData").queryExecution.analyzed match {
-      case cachePlan: InMemoryRelation =>
-        fail(s"Table still cached after uncache: $cachePlan")
-      case noCache => // Table uncached successfully
-    }
+    uncacheTable("testData")
+    checkAnswer(table("testData"), testData.collect().toSeq)
+    assertCached(table("testData"), 0)
   }
 
   test("correct error on uncache of non-cached table") {
     intercept[IllegalArgumentException] {
-      TestSQLContext.uncacheTable("testData")
+      uncacheTable("testData")
     }
   }
 
-  test("SELECT Star Cached Table") {
-    TestSQLContext.sql("SELECT * FROM testData").registerAsTable("selectStar")
-    TestSQLContext.cacheTable("selectStar")
-    TestSQLContext.sql("SELECT * FROM selectStar WHERE key = 1").collect()
-    TestSQLContext.uncacheTable("selectStar")
+  test("SELECT star from cached table") {
+    sql("SELECT * FROM testData").registerTempTable("selectStar")
+    cacheTable("selectStar")
+    checkAnswer(
+      sql("SELECT * FROM selectStar WHERE key = 1"),
+      Seq(Row(1, "1")))
+    uncacheTable("selectStar")
   }
 
   test("Self-join cached") {
     val unCachedAnswer =
-      TestSQLContext.sql("SELECT * FROM testData a JOIN testData b ON a.key = b.key").collect()
-    TestSQLContext.cacheTable("testData")
+      sql("SELECT * FROM testData a JOIN testData b ON a.key = b.key").collect()
+    cacheTable("testData")
     checkAnswer(
-      TestSQLContext.sql("SELECT * FROM testData a JOIN testData b ON a.key = b.key"),
+      sql("SELECT * FROM testData a JOIN testData b ON a.key = b.key"),
       unCachedAnswer.toSeq)
-    TestSQLContext.uncacheTable("testData")
+    uncacheTable("testData")
   }
 
   test("'CACHE TABLE' and 'UNCACHE TABLE' SQL statement") {
-    TestSQLContext.sql("CACHE TABLE testData")
-    TestSQLContext.table("testData").queryExecution.executedPlan match {
-      case _: InMemoryColumnarTableScan => // Found evidence of caching
-      case _ => fail(s"Table 'testData' should be cached")
-    }
-    assert(TestSQLContext.isCached("testData"), "Table 'testData' should be cached")
+    sql("CACHE TABLE testData")
+    assertCached(table("testData"))
+
+    val rddId = rddIdOf("testData")
+    assert(
+      isMaterialized(rddId),
+      "Eagerly cached in-memory table should have already been materialized")
+
+    sql("UNCACHE TABLE testData")
+    assert(!isCached("testData"), "Table 'testData' should not be cached")
+    assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+  }
+
+  test("CACHE TABLE tableName AS SELECT * FROM anotherTable") {
+    sql("CACHE TABLE testCacheTable AS SELECT * FROM testData")
+    assertCached(table("testCacheTable"))
+
+    val rddId = rddIdOf("testCacheTable")
+    assert(
+      isMaterialized(rddId),
+      "Eagerly cached in-memory table should have already been materialized")
+
+    uncacheTable("testCacheTable")
+    assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+  }
 
-    TestSQLContext.sql("UNCACHE TABLE testData")
-    TestSQLContext.table("testData").queryExecution.executedPlan match {
-      case _: InMemoryColumnarTableScan => fail(s"Table 'testData' should not be cached")
-      case _ => // Found evidence of uncaching
+  test("CACHE TABLE tableName AS SELECT ...") {
+    sql("CACHE TABLE testCacheTable AS SELECT key FROM testData LIMIT 10")
+    assertCached(table("testCacheTable"))
+
+    val rddId = rddIdOf("testCacheTable")
+    assert(
+      isMaterialized(rddId),
+      "Eagerly cached in-memory table should have already been materialized")
+
+    uncacheTable("testCacheTable")
+    assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+  }
+
+  test("CACHE LAZY TABLE tableName") {
+    sql("CACHE LAZY TABLE testData")
+    assertCached(table("testData"))
+
+    val rddId = rddIdOf("testData")
+    assert(
+      !isMaterialized(rddId),
+      "Lazily cached in-memory table shouldn't be materialized eagerly")
+
+    sql("SELECT COUNT(*) FROM testData").collect()
+    assert(
+      isMaterialized(rddId),
+      "Lazily cached in-memory table should have been materialized")
+
+    uncacheTable("testData")
+    assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+  }
+
+  test("InMemoryRelation statistics") {
+    sql("CACHE TABLE testData")
+    table("testData").queryExecution.withCachedData.collect {
+      case cached: InMemoryRelation =>
+        val actualSizeInBytes = (1 to 100).map(i => INT.defaultSize + i.toString.length + 4).sum
+        assert(cached.statistics.sizeInBytes === actualSizeInBytes)
     }
-    assert(!TestSQLContext.isCached("testData"), "Table 'testData' should not be cached")
+  }
+
+  test("Drops temporary table") {
+    testData.select('key).registerTempTable("t1")
+    table("t1")
+    dropTempTable("t1")
+    assert(intercept[RuntimeException](table("t1")).getMessage.startsWith("Table Not Found"))
+  }
+
+  test("Drops cached temporary table") {
+    testData.select('key).registerTempTable("t1")
+    testData.select('key).registerTempTable("t2")
+    cacheTable("t1")
+
+    assert(isCached("t1"))
+    assert(isCached("t2"))
+
+    dropTempTable("t1")
+    assert(intercept[RuntimeException](table("t1")).getMessage.startsWith("Table Not Found"))
+    assert(!isCached("t2"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataTypeSuite.scala
new file mode 100644
index 0000000000000..e9740d913cf57
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataTypeSuite.scala
@@ -0,0 +1,88 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import org.scalatest.FunSuite
+
+class DataTypeSuite extends FunSuite {
+
+  test("construct an ArrayType") {
+    val array = ArrayType(StringType)
+
+    assert(ArrayType(StringType, true) === array)
+  }
+
+  test("construct an MapType") {
+    val map = MapType(StringType, IntegerType)
+
+    assert(MapType(StringType, IntegerType, true) === map)
+  }
+
+  test("extract fields from a StructType") {
+    val struct = StructType(
+      StructField("a", IntegerType, true) ::
+      StructField("b", LongType, false) ::
+      StructField("c", StringType, true) ::
+      StructField("d", FloatType, true) :: Nil)
+
+    assert(StructField("b", LongType, false) === struct("b"))
+
+    intercept[IllegalArgumentException] {
+      struct("e")
+    }
+
+    val expectedStruct = StructType(
+      StructField("b", LongType, false) ::
+      StructField("d", FloatType, true) :: Nil)
+
+    assert(expectedStruct === struct(Set("b", "d")))
+    intercept[IllegalArgumentException] {
+      struct(Set("b", "d", "e", "f"))
+    }
+  }
+
+  def checkDataTypeJsonRepr(dataType: DataType): Unit = {
+    test(s"JSON - $dataType") {
+      assert(DataType.fromJson(dataType.json) === dataType)
+    }
+  }
+
+  checkDataTypeJsonRepr(BooleanType)
+  checkDataTypeJsonRepr(ByteType)
+  checkDataTypeJsonRepr(ShortType)
+  checkDataTypeJsonRepr(IntegerType)
+  checkDataTypeJsonRepr(LongType)
+  checkDataTypeJsonRepr(FloatType)
+  checkDataTypeJsonRepr(DoubleType)
+  checkDataTypeJsonRepr(DecimalType.Unlimited)
+  checkDataTypeJsonRepr(TimestampType)
+  checkDataTypeJsonRepr(StringType)
+  checkDataTypeJsonRepr(BinaryType)
+  checkDataTypeJsonRepr(ArrayType(DoubleType, true))
+  checkDataTypeJsonRepr(ArrayType(StringType, false))
+  checkDataTypeJsonRepr(MapType(IntegerType, StringType, true))
+  checkDataTypeJsonRepr(MapType(IntegerType, ArrayType(DoubleType), false))
+  val metadata = new MetadataBuilder()
+    .putString("name", "age")
+    .build()
+  checkDataTypeJsonRepr(
+    StructType(Seq(
+      StructField("a", IntegerType, nullable = true),
+      StructField("b", ArrayType(DoubleType), nullable = false),
+      StructField("c", DoubleType, nullable = false, metadata))))
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
index 04ac008682f5f..e70ad891eea36 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
@@ -19,13 +19,13 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.test._
 
 /* Implicits */
-import TestSQLContext._
+import org.apache.spark.sql.catalyst.dsl._
+import org.apache.spark.sql.test.TestSQLContext._
 
 class DslQuerySuite extends QueryTest {
-  import TestData._
+  import org.apache.spark.sql.TestData._
 
   test("table scan") {
     checkAnswer(
@@ -33,17 +33,23 @@ class DslQuerySuite extends QueryTest {
       testData.collect().toSeq)
   }
 
+  test("repartition") {
+    checkAnswer(
+      testData.select('key).repartition(10).select('key),
+      testData.select('key).collect().toSeq)
+  }
+
   test("agg") {
     checkAnswer(
-      testData2.groupBy('a)('a, Sum('b)),
+      testData2.groupBy('a)('a, sum('b)),
       Seq((1,3),(2,3),(3,3))
     )
     checkAnswer(
-      testData2.groupBy('a)('a, Sum('b) as 'totB).aggregate(Sum('totB)),
+      testData2.groupBy('a)('a, sum('b) as 'totB).aggregate(sum('totB)),
       9
     )
     checkAnswer(
-      testData2.aggregate(Sum('b)),
+      testData2.aggregate(sum('b)),
       9
     )
   }
@@ -98,19 +104,19 @@ class DslQuerySuite extends QueryTest {
       Seq((3,1), (3,2), (2,1), (2,2), (1,1), (1,2)))
 
     checkAnswer(
-      arrayData.orderBy(GetItem('data, 0).asc),
+      arrayData.orderBy('data.getItem(0).asc),
       arrayData.collect().sortBy(_.data(0)).toSeq)
 
     checkAnswer(
-      arrayData.orderBy(GetItem('data, 0).desc),
+      arrayData.orderBy('data.getItem(0).desc),
       arrayData.collect().sortBy(_.data(0)).reverse.toSeq)
 
     checkAnswer(
-      mapData.orderBy(GetItem('data, 1).asc),
+      mapData.orderBy('data.getItem(1).asc),
       mapData.collect().sortBy(_.data(1)).toSeq)
 
     checkAnswer(
-      mapData.orderBy(GetItem('data, 1).desc),
+      mapData.orderBy('data.getItem(1).desc),
       mapData.collect().sortBy(_.data(1)).reverse.toSeq)
   }
 
@@ -128,6 +134,26 @@ class DslQuerySuite extends QueryTest {
       mapData.take(1).toSeq)
   }
 
+  test("SPARK-3395 limit distinct") {
+    val filtered = TestData.testData2
+      .distinct()
+      .orderBy(SortOrder('a, Ascending), SortOrder('b, Ascending))
+      .limit(1)
+      .registerTempTable("onerow")
+    checkAnswer(
+      sql("select * from onerow inner join testData2 on onerow.a = testData2.a"),
+      (1, 1, 1, 1) ::
+      (1, 1, 1, 2) :: Nil)
+  }
+
+  test("SPARK-3858 generator qualifiers are discarded") {
+    checkAnswer(
+      arrayData.as('ad)
+        .generate(Explode("data" :: Nil, 'data), alias = Some("ex"))
+        .select("ex.data".attr),
+      Seq(1, 2, 3, 2, 3, 4).map(Seq(_)))
+  }
+
   test("average") {
     checkAnswer(
       testData2.groupBy()(avg('a)),
@@ -168,4 +194,35 @@ class DslQuerySuite extends QueryTest {
   test("zero count") {
     assert(emptyTableData.count() === 0)
   }
+
+  test("except") {
+    checkAnswer(
+      lowerCaseData.except(upperCaseData),
+      (1, "a") ::
+      (2, "b") ::
+      (3, "c") ::
+      (4, "d") :: Nil)
+    checkAnswer(lowerCaseData.except(lowerCaseData), Nil)
+    checkAnswer(upperCaseData.except(upperCaseData), Nil)
+  }
+
+  test("intersect") {
+    checkAnswer(
+      lowerCaseData.intersect(lowerCaseData),
+      (1, "a") ::
+      (2, "b") ::
+      (3, "c") ::
+      (4, "d") :: Nil)
+    checkAnswer(lowerCaseData.intersect(upperCaseData), Nil)
+  }
+
+  test("udf") {
+    val foo = (a: Int, b: String) => a.toString + b
+
+    checkAnswer(
+      // SELECT *, foo(key, value) FROM testData
+      testData.select(Star(None), foo.call('key, 'value)).limit(3),
+      (1, "1", "11") :: (2, "2", "22") :: (3, "3", "33") :: Nil
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InsertIntoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InsertIntoSuite.scala
index 4f0b85f26254b..c87d762751e6d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/InsertIntoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/InsertIntoSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import java.io.File
+import _root_.java.io.File
 
 /* Implicits */
 import org.apache.spark.sql.test.TestSQLContext._
@@ -31,7 +31,7 @@ class InsertIntoSuite extends QueryTest {
     testFilePath.delete()
     testFilePath.deleteOnExit()
     val testFile = createParquetFile[TestData](testFilePath.getCanonicalPath)
-    testFile.registerAsTable("createAndInsertTest")
+    testFile.registerTempTable("createAndInsertTest")
 
     // Add some data.
     testData.insertInto("createAndInsertTest")
@@ -86,7 +86,7 @@ class InsertIntoSuite extends QueryTest {
     testFilePath.delete()
     testFilePath.deleteOnExit()
     val testFile = createParquetFile[TestData](testFilePath.getCanonicalPath)
-    testFile.registerAsTable("createAndInsertSQLTest")
+    testFile.registerTempTable("createAndInsertSQLTest")
 
     sql("INSERT INTO createAndInsertSQLTest SELECT * FROM testData")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 054b14f8f7ffa..8b4cf5bac0187 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -17,14 +17,15 @@
 
 package org.apache.spark.sql
 
+import org.scalatest.BeforeAndAfterEach
+
 import org.apache.spark.sql.TestData._
-import org.apache.spark.sql.catalyst.plans.{LeftOuter, RightOuter, FullOuter, Inner}
-import org.apache.spark.sql.execution._
-import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, LeftOuter, RightOuter}
+import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.test.TestSQLContext._
 
-class JoinSuite extends QueryTest {
-
+class JoinSuite extends QueryTest with BeforeAndAfterEach {
   // Ensures tables are loaded.
   TestData
 
@@ -36,34 +37,65 @@ class JoinSuite extends QueryTest {
     assert(planned.size === 1)
   }
 
-  test("plans broadcast hash join, given hints") {
-
-    def mkTest(buildSide: BuildSide, leftTable: String, rightTable: String) = {
-      TestSQLContext.settings.synchronized {
-        TestSQLContext.set("spark.sql.join.broadcastTables",
-          s"${if (buildSide == BuildRight) rightTable else leftTable}")
-        val rdd = sql( s"""SELECT * FROM $leftTable JOIN $rightTable ON key = a""")
-        // Using `sparkPlan` because for relevant patterns in HashJoin to be
-        // matched, other strategies need to be applied.
-        val physical = rdd.queryExecution.sparkPlan
-        val bhj = physical.collect { case j: BroadcastHashJoin if j.buildSide == buildSide => j}
-
-        assert(bhj.size === 1, "planner does not pick up hint to generate broadcast hash join")
-        checkAnswer(
-          rdd,
-          Seq(
-            (1, "1", 1, 1),
-            (1, "1", 1, 2),
-            (2, "2", 2, 1),
-            (2, "2", 2, 2),
-            (3, "3", 3, 1),
-            (3, "3", 3, 2)
-          ))
-      }
+  def assertJoin(sqlString: String, c: Class[_]): Any = {
+    val rdd = sql(sqlString)
+    val physical = rdd.queryExecution.sparkPlan
+    val operators = physical.collect {
+      case j: ShuffledHashJoin => j
+      case j: HashOuterJoin => j
+      case j: LeftSemiJoinHash => j
+      case j: BroadcastHashJoin => j
+      case j: LeftSemiJoinBNL => j
+      case j: CartesianProduct => j
+      case j: BroadcastNestedLoopJoin => j
+    }
+
+    assert(operators.size === 1)
+    if (operators(0).getClass() != c) {
+      fail(s"$sqlString expected operator: $c, but got ${operators(0)}\n physical: \n$physical")
     }
+  }
+
+  test("join operator selection") {
+    clearCache()
 
-    mkTest(BuildRight, "testData", "testData2")
-    mkTest(BuildLeft, "testData", "testData2")
+    Seq(
+      ("SELECT * FROM testData LEFT SEMI JOIN testData2 ON key = a", classOf[LeftSemiJoinHash]),
+      ("SELECT * FROM testData LEFT SEMI JOIN testData2", classOf[LeftSemiJoinBNL]),
+      ("SELECT * FROM testData JOIN testData2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData JOIN testData2 WHERE key = 2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData LEFT JOIN testData2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData RIGHT JOIN testData2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData FULL OUTER JOIN testData2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData LEFT JOIN testData2 WHERE key = 2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData RIGHT JOIN testData2 WHERE key = 2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData FULL OUTER JOIN testData2 WHERE key = 2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData JOIN testData2 WHERE key > a", classOf[CartesianProduct]),
+      ("SELECT * FROM testData FULL OUTER JOIN testData2 WHERE key > a", classOf[CartesianProduct]),
+      ("SELECT * FROM testData JOIN testData2 ON key = a", classOf[ShuffledHashJoin]),
+      ("SELECT * FROM testData JOIN testData2 ON key = a and key = 2", classOf[ShuffledHashJoin]),
+      ("SELECT * FROM testData JOIN testData2 ON key = a where key = 2", classOf[ShuffledHashJoin]),
+      ("SELECT * FROM testData LEFT JOIN testData2 ON key = a", classOf[HashOuterJoin]),
+      ("SELECT * FROM testData RIGHT JOIN testData2 ON key = a where key = 2",
+        classOf[HashOuterJoin]),
+      ("SELECT * FROM testData right join testData2 ON key = a and key = 2",
+        classOf[HashOuterJoin]),
+      ("SELECT * FROM testData full outer join testData2 ON key = a", classOf[HashOuterJoin])
+      // TODO add BroadcastNestedLoopJoin
+    ).foreach { case (query, joinClass) => assertJoin(query, joinClass) }
+  }
+
+  test("broadcasted hash join operator selection") {
+    clearCache()
+    sql("CACHE TABLE testData")
+
+    Seq(
+      ("SELECT * FROM testData join testData2 ON key = a", classOf[BroadcastHashJoin]),
+      ("SELECT * FROM testData join testData2 ON key = a and key = 2", classOf[BroadcastHashJoin]),
+      ("SELECT * FROM testData join testData2 ON key = a where key = 2", classOf[BroadcastHashJoin])
+    ).foreach { case (query, joinClass) => assertJoin(query, joinClass) }
+
+    sql("UNCACHE TABLE testData")
   }
 
   test("multiple-key equi-join is hash-join") {
@@ -146,6 +178,58 @@ class JoinSuite extends QueryTest {
       (4, "D", 4, "d") ::
       (5, "E", null, null) ::
       (6, "F", null, null) :: Nil)
+
+    checkAnswer(
+      upperCaseData.join(lowerCaseData, LeftOuter, Some('n === 'N && 'n > 1)),
+      (1, "A", null, null) ::
+      (2, "B", 2, "b") ::
+      (3, "C", 3, "c") ::
+      (4, "D", 4, "d") ::
+      (5, "E", null, null) ::
+      (6, "F", null, null) :: Nil)
+
+    checkAnswer(
+      upperCaseData.join(lowerCaseData, LeftOuter, Some('n === 'N && 'N > 1)),
+      (1, "A", null, null) ::
+      (2, "B", 2, "b") ::
+      (3, "C", 3, "c") ::
+      (4, "D", 4, "d") ::
+      (5, "E", null, null) ::
+      (6, "F", null, null) :: Nil)
+
+    checkAnswer(
+      upperCaseData.join(lowerCaseData, LeftOuter, Some('n === 'N && 'l > 'L)),
+      (1, "A", 1, "a") ::
+      (2, "B", 2, "b") ::
+      (3, "C", 3, "c") ::
+      (4, "D", 4, "d") ::
+      (5, "E", null, null) ::
+      (6, "F", null, null) :: Nil)
+
+    // Make sure we are choosing left.outputPartitioning as the
+    // outputPartitioning for the outer join operator.
+    checkAnswer(
+      sql(
+        """
+          |SELECT l.N, count(*)
+          |FROM upperCaseData l LEFT OUTER JOIN allNulls r ON (l.N = r.a)
+          |GROUP BY l.N
+        """.stripMargin),
+      (1, 1) ::
+      (2, 1) ::
+      (3, 1) ::
+      (4, 1) ::
+      (5, 1) ::
+      (6, 1) :: Nil)
+
+    checkAnswer(
+      sql(
+        """
+          |SELECT r.a, count(*)
+          |FROM upperCaseData l LEFT OUTER JOIN allNulls r ON (l.N = r.a)
+          |GROUP BY r.a
+        """.stripMargin),
+      (null, 6) :: Nil)
   }
 
   test("right outer join") {
@@ -157,11 +241,63 @@ class JoinSuite extends QueryTest {
       (4, "d", 4, "D") ::
       (null, null, 5, "E") ::
       (null, null, 6, "F") :: Nil)
+    checkAnswer(
+      lowerCaseData.join(upperCaseData, RightOuter, Some('n === 'N && 'n > 1)),
+      (null, null, 1, "A") ::
+      (2, "b", 2, "B") ::
+      (3, "c", 3, "C") ::
+      (4, "d", 4, "D") ::
+      (null, null, 5, "E") ::
+      (null, null, 6, "F") :: Nil)
+    checkAnswer(
+      lowerCaseData.join(upperCaseData, RightOuter, Some('n === 'N && 'N > 1)),
+      (null, null, 1, "A") ::
+      (2, "b", 2, "B") ::
+      (3, "c", 3, "C") ::
+      (4, "d", 4, "D") ::
+      (null, null, 5, "E") ::
+      (null, null, 6, "F") :: Nil)
+    checkAnswer(
+      lowerCaseData.join(upperCaseData, RightOuter, Some('n === 'N && 'l > 'L)),
+      (1, "a", 1, "A") ::
+      (2, "b", 2, "B") ::
+      (3, "c", 3, "C") ::
+      (4, "d", 4, "D") ::
+      (null, null, 5, "E") ::
+      (null, null, 6, "F") :: Nil)
+
+    // Make sure we are choosing right.outputPartitioning as the
+    // outputPartitioning for the outer join operator.
+    checkAnswer(
+      sql(
+        """
+          |SELECT l.a, count(*)
+          |FROM allNulls l RIGHT OUTER JOIN upperCaseData r ON (l.a = r.N)
+          |GROUP BY l.a
+        """.stripMargin),
+      (null, 6) :: Nil)
+
+    checkAnswer(
+      sql(
+        """
+          |SELECT r.N, count(*)
+          |FROM allNulls l RIGHT OUTER JOIN upperCaseData r ON (l.a = r.N)
+          |GROUP BY r.N
+        """.stripMargin),
+      (1, 1) ::
+      (2, 1) ::
+      (3, 1) ::
+      (4, 1) ::
+      (5, 1) ::
+      (6, 1) :: Nil)
   }
 
   test("full outer join") {
-    val left = upperCaseData.where('N <= 4).as('left)
-    val right = upperCaseData.where('N >= 3).as('right)
+    upperCaseData.where('N <= 4).registerTempTable("left")
+    upperCaseData.where('N >= 3).registerTempTable("right")
+
+    val left = UnresolvedRelation(None, "left", None)
+    val right = UnresolvedRelation(None, "right", None)
 
     checkAnswer(
       left.join(right, FullOuter, Some("left.N".attr === "right.N".attr)),
@@ -171,5 +307,74 @@ class JoinSuite extends QueryTest {
       (4, "D", 4, "D") ::
       (null, null, 5, "E") ::
       (null, null, 6, "F") :: Nil)
+
+    checkAnswer(
+      left.join(right, FullOuter, Some(("left.N".attr === "right.N".attr) && ("left.N".attr !== 3))),
+      (1, "A", null, null) ::
+      (2, "B", null, null) ::
+      (3, "C", null, null) ::
+      (null, null, 3, "C") ::
+      (4, "D", 4, "D") ::
+      (null, null, 5, "E") ::
+      (null, null, 6, "F") :: Nil)
+
+    checkAnswer(
+      left.join(right, FullOuter, Some(("left.N".attr === "right.N".attr) && ("right.N".attr !== 3))),
+      (1, "A", null, null) ::
+      (2, "B", null, null) ::
+      (3, "C", null, null) ::
+      (null, null, 3, "C") ::
+      (4, "D", 4, "D") ::
+      (null, null, 5, "E") ::
+      (null, null, 6, "F") :: Nil)
+
+    // Make sure we are UnknownPartitioning as the outputPartitioning for the outer join operator.
+    checkAnswer(
+      sql(
+        """
+          |SELECT l.a, count(*)
+          |FROM allNulls l FULL OUTER JOIN upperCaseData r ON (l.a = r.N)
+          |GROUP BY l.a
+        """.stripMargin),
+      (null, 10) :: Nil)
+
+    checkAnswer(
+      sql(
+        """
+          |SELECT r.N, count(*)
+          |FROM allNulls l FULL OUTER JOIN upperCaseData r ON (l.a = r.N)
+          |GROUP BY r.N
+        """.stripMargin),
+      (1, 1) ::
+      (2, 1) ::
+      (3, 1) ::
+      (4, 1) ::
+      (5, 1) ::
+      (6, 1) ::
+      (null, 4) :: Nil)
+
+    checkAnswer(
+      sql(
+        """
+          |SELECT l.N, count(*)
+          |FROM upperCaseData l FULL OUTER JOIN allNulls r ON (l.N = r.a)
+          |GROUP BY l.N
+        """.stripMargin),
+      (1, 1) ::
+      (2, 1) ::
+      (3, 1) ::
+      (4, 1) ::
+      (5, 1) ::
+      (6, 1) ::
+      (null, 4) :: Nil)
+
+    checkAnswer(
+      sql(
+        """
+          |SELECT r.a, count(*)
+          |FROM upperCaseData l FULL OUTER JOIN allNulls r ON (l.N = r.a)
+          |GROUP BY r.a
+        """.stripMargin),
+      (null, 10) :: Nil)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 8e1e1971d968b..3d9f0cbf80fe7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -19,8 +19,29 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.columnar.InMemoryRelation
 
 class QueryTest extends PlanTest {
+
+  /**
+   * Runs the plan and makes sure the answer contains all of the keywords, or the
+   * none of keywords are listed in the answer
+   * @param rdd the [[SchemaRDD]] to be executed
+   * @param exists true for make sure the keywords are listed in the output, otherwise
+   *               to make sure none of the keyword are not listed in the output
+   * @param keywords keyword in string array
+   */
+  def checkExistence(rdd: SchemaRDD, exists: Boolean, keywords: String*) {
+    val outputs = rdd.collect().map(_.mkString).mkString
+    for (key <- keywords) {
+      if (exists) {
+        assert(outputs.contains(key), s"Failed for $rdd ($key doens't exist in result)")
+      } else {
+        assert(!outputs.contains(key), s"Failed for $rdd ($key existed in the result)")
+      }
+    }
+  }
+
   /**
    * Runs the plan and makes sure the answer matches the expected result.
    * @param rdd the [[SchemaRDD]] to be executed
@@ -45,6 +66,7 @@ class QueryTest extends PlanTest {
             |${rdd.queryExecution}
             |== Exception ==
             |$e
+            |${org.apache.spark.sql.catalyst.util.stackTraceToString(e)}
           """.stripMargin)
     }
 
@@ -58,11 +80,31 @@ class QueryTest extends PlanTest {
         |${rdd.queryExecution.executedPlan}
         |== Results ==
         |${sideBySide(
-            s"== Correct Answer - ${convertedAnswer.size} ==" +:
-              prepareAnswer(convertedAnswer).map(_.toString),
-            s"== Spark Answer - ${sparkAnswer.size} ==" +:
-              prepareAnswer(sparkAnswer).map(_.toString)).mkString("\n")}
+        s"== Correct Answer - ${convertedAnswer.size} ==" +:
+          prepareAnswer(convertedAnswer).map(_.toString),
+        s"== Spark Answer - ${sparkAnswer.size} ==" +:
+          prepareAnswer(sparkAnswer).map(_.toString)).mkString("\n")}
       """.stripMargin)
     }
   }
+
+  def sqlTest(sqlString: String, expectedAnswer: Any)(implicit sqlContext: SQLContext): Unit = {
+    test(sqlString) {
+      checkAnswer(sqlContext.sql(sqlString), expectedAnswer)
+    }
+  }
+
+  /** Asserts that a given SchemaRDD will be executed using the given number of cached results. */
+  def assertCached(query: SchemaRDD, numCachedTables: Int = 1): Unit = {
+    val planWithCaching = query.queryExecution.withCachedData
+    val cachedData = planWithCaching collect {
+      case cached: InMemoryRelation => cached
+    }
+
+    assert(
+      cachedData.size == numCachedTables,
+      s"Expected query to contain $numCachedTables, but it actually had ${cachedData.size}\n" +
+        planWithCaching)
+  }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
new file mode 100644
index 0000000000000..811319e0a6601
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
@@ -0,0 +1,52 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.sql.catalyst.expressions.{GenericMutableRow, SpecificMutableRow}
+
+class RowSuite extends FunSuite {
+
+  test("create row") {
+    val expected = new GenericMutableRow(4)
+    expected.update(0, 2147483647)
+    expected.update(1, "this is a string")
+    expected.update(2, false)
+    expected.update(3, null)
+    val actual1 = Row(2147483647, "this is a string", false, null)
+    assert(expected.size === actual1.size)
+    assert(expected.getInt(0) === actual1.getInt(0))
+    assert(expected.getString(1) === actual1.getString(1))
+    assert(expected.getBoolean(2) === actual1.getBoolean(2))
+    assert(expected(3) === actual1(3))
+
+    val actual2 = Row.fromSeq(Seq(2147483647, "this is a string", false, null))
+    assert(expected.size === actual2.size)
+    assert(expected.getInt(0) === actual2.getInt(0))
+    assert(expected.getString(1) === actual2.getString(1))
+    assert(expected.getBoolean(2) === actual2.getBoolean(2))
+    assert(expected(3) === actual2(3))
+  }
+
+  test("SpecificMutableRow.update with null") {
+    val row = new SpecificMutableRow(Seq(IntegerType))
+    row(0) = null
+    assert(row.isNullAt(0))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala
index 93792f698cfaf..60701f0e154f8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala
@@ -17,61 +17,68 @@
 
 package org.apache.spark.sql
 
+import org.scalatest.FunSuiteLike
+
 import org.apache.spark.sql.test._
 
 /* Implicits */
 import TestSQLContext._
 
-class SQLConfSuite extends QueryTest {
+class SQLConfSuite extends QueryTest with FunSuiteLike {
 
   val testKey = "test.key.0"
   val testVal = "test.val.0"
 
+  test("propagate from spark conf") {
+    // We create a new context here to avoid order dependence with other tests that might call
+    // clear().
+    val newContext = new SQLContext(TestSQLContext.sparkContext)
+    assert(newContext.getConf("spark.sql.testkey", "false") == "true")
+  }
+
   test("programmatic ways of basic setting and getting") {
-    TestSQLContext.settings.synchronized {
-      clear()
-      assert(getOption(testKey).isEmpty)
-      assert(getAll.toSet === Set())
-
-      set(testKey, testVal)
-      assert(get(testKey) == testVal)
-      assert(get(testKey, testVal + "_") == testVal)
-      assert(getOption(testKey) == Some(testVal))
-      assert(contains(testKey))
-
-      // Tests SQLConf as accessed from a SQLContext is mutable after
-      // the latter is initialized, unlike SparkConf inside a SparkContext.
-      assert(TestSQLContext.get(testKey) == testVal)
-      assert(TestSQLContext.get(testKey, testVal + "_") == testVal)
-      assert(TestSQLContext.getOption(testKey) == Some(testVal))
-      assert(TestSQLContext.contains(testKey))
-
-      clear()
-    }
+    clear()
+    assert(getAllConfs.size === 0)
+
+    setConf(testKey, testVal)
+    assert(getConf(testKey) == testVal)
+    assert(getConf(testKey, testVal + "_") == testVal)
+    assert(getAllConfs.contains(testKey))
+
+    // Tests SQLConf as accessed from a SQLContext is mutable after
+    // the latter is initialized, unlike SparkConf inside a SparkContext.
+    assert(TestSQLContext.getConf(testKey) == testVal)
+    assert(TestSQLContext.getConf(testKey, testVal + "_") == testVal)
+    assert(TestSQLContext.getAllConfs.contains(testKey))
+
+    clear()
   }
 
   test("parse SQL set commands") {
-    TestSQLContext.settings.synchronized {
-      clear()
-      sql(s"set $testKey=$testVal")
-      assert(get(testKey, testVal + "_") == testVal)
-      assert(TestSQLContext.get(testKey, testVal + "_") == testVal)
-
-      sql("set mapred.reduce.tasks=20")
-      assert(get("mapred.reduce.tasks", "0") == "20")
-      sql("set mapred.reduce.tasks = 40")
-      assert(get("mapred.reduce.tasks", "0") == "40")
-
-      val key = "spark.sql.key"
-      val vs = "val0,val_1,val2.3,my_table"
-      sql(s"set $key=$vs")
-      assert(get(key, "0") == vs)
-
-      sql(s"set $key=")
-      assert(get(key, "0") == "")
-
-      clear()
-    }
+    clear()
+    sql(s"set $testKey=$testVal")
+    assert(getConf(testKey, testVal + "_") == testVal)
+    assert(TestSQLContext.getConf(testKey, testVal + "_") == testVal)
+
+    sql("set some.property=20")
+    assert(getConf("some.property", "0") == "20")
+    sql("set some.property = 40")
+    assert(getConf("some.property", "0") == "40")
+
+    val key = "spark.sql.key"
+    val vs = "val0,val_1,val2.3,my_table"
+    sql(s"set $key=$vs")
+    assert(getConf(key, "0") == vs)
+
+    sql(s"set $key=")
+    assert(getConf(key, "0") == "")
+
+    clear()
   }
 
+  test("deprecated property") {
+    clear()
+    sql(s"set ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS}=10")
+    assert(getConf(SQLConf.SHUFFLE_PARTITIONS) == "10")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index fa1f32f8a49a9..a63515464c688 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -17,25 +17,137 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators
+import java.util.TimeZone
+
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.columnar.{InMemoryColumnarTableScan, InMemoryRelation}
-import org.apache.spark.sql.test._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 
 /* Implicits */
-import TestSQLContext._
-import TestData._
+import org.apache.spark.sql.TestData._
+import org.apache.spark.sql.test.TestSQLContext._
 
-class SQLQuerySuite extends QueryTest {
+class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   // Make sure the tables are loaded.
   TestData
 
+  var origZone: TimeZone = _
+  override protected def beforeAll() {
+    origZone = TimeZone.getDefault
+    TimeZone.setDefault(TimeZone.getTimeZone("UTC"))
+  }
+
+  override protected def afterAll() {
+    TimeZone.setDefault(origZone)
+  }
+
+  test("grouping on nested fields") {
+    jsonRDD(sparkContext.parallelize("""{"nested": {"attribute": 1}, "value": 2}""" :: Nil))
+     .registerTempTable("rows")
+
+    checkAnswer(
+      sql(
+        """
+          |select attribute, sum(cnt)
+          |from (
+          |  select nested.attribute, count(*) as cnt
+          |  from rows
+          |  group by nested.attribute) a
+          |group by attribute
+        """.stripMargin),
+      Row(1, 1) :: Nil)
+  }
+
+  test("SPARK-3176 Added Parser of SQL ABS()") {
+    checkAnswer(
+      sql("SELECT ABS(-1.3)"),
+      1.3)
+    checkAnswer(
+      sql("SELECT ABS(0.0)"),
+      0.0)
+    checkAnswer(
+      sql("SELECT ABS(2.5)"),
+      2.5)
+  }
+
+  test("aggregation with codegen") {
+    val originalValue = codegenEnabled
+    setConf(SQLConf.CODEGEN_ENABLED, "true")
+    sql("SELECT key FROM testData GROUP BY key").collect()
+    setConf(SQLConf.CODEGEN_ENABLED, originalValue.toString)
+  }
+
+  test("SPARK-3176 Added Parser of SQL LAST()") {
+    checkAnswer(
+      sql("SELECT LAST(n) FROM lowerCaseData"),
+      4)
+  }
+
   test("SPARK-2041 column name equals tablename") {
     checkAnswer(
       sql("SELECT tableName FROM tableName"),
       "test")
   }
 
+  test("SQRT") {
+    checkAnswer(
+      sql("SELECT SQRT(key) FROM testData"),
+      (1 to 100).map(x => Row(math.sqrt(x.toDouble))).toSeq
+    )
+  }
+
+  test("SQRT with automatic string casts") {
+    checkAnswer(
+      sql("SELECT SQRT(CAST(key AS STRING)) FROM testData"),
+      (1 to 100).map(x => Row(math.sqrt(x.toDouble))).toSeq
+    )
+  }
+
+  test("SPARK-2407 Added Parser of SQL SUBSTR()") {
+    checkAnswer(
+      sql("SELECT substr(tableName, 1, 2) FROM tableName"),
+      "te")
+    checkAnswer(
+      sql("SELECT substr(tableName, 3) FROM tableName"),
+      "st")
+    checkAnswer(
+      sql("SELECT substring(tableName, 1, 2) FROM tableName"),
+      "te")
+    checkAnswer(
+      sql("SELECT substring(tableName, 3) FROM tableName"),
+      "st")
+  }
+
+  test("SPARK-3173 Timestamp support in the parser") {
+    checkAnswer(sql(
+      "SELECT time FROM timestamps WHERE time=CAST('1970-01-01 00:00:00.001' AS TIMESTAMP)"),
+      Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001"))))
+
+    checkAnswer(sql(
+      "SELECT time FROM timestamps WHERE time='1970-01-01 00:00:00.001'"),
+      Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001"))))
+
+    checkAnswer(sql(
+      "SELECT time FROM timestamps WHERE '1970-01-01 00:00:00.001'=time"),
+      Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001"))))
+
+    checkAnswer(sql(
+      """SELECT time FROM timestamps WHERE time<'1970-01-01 00:00:00.003'
+          AND time>'1970-01-01 00:00:00.001'"""),
+      Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.002"))))
+
+    checkAnswer(sql(
+      "SELECT time FROM timestamps WHERE time IN ('1970-01-01 00:00:00.001','1970-01-01 00:00:00.002')"),
+      Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001")),
+        Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.002"))))
+
+    checkAnswer(sql(
+      "SELECT time FROM timestamps WHERE time='123'"),
+      Nil)
+  }
+
   test("index into array") {
     checkAnswer(
       sql("SELECT data, data[0], data[0] + data[1], data[0 + 1] FROM arrayData"),
@@ -84,7 +196,7 @@ class SQLQuerySuite extends QueryTest {
       Seq(Seq("1")))
   }
 
-  test("sorting") {
+  def sortTest() = {
     checkAnswer(
       sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC"),
       Seq((1,1), (1,2), (2,1), (2,2), (3,1), (3,2)))
@@ -101,6 +213,14 @@ class SQLQuerySuite extends QueryTest {
       sql("SELECT * FROM testData2 ORDER BY a DESC, b ASC"),
       Seq((3,1), (3,2), (2,1), (2,2), (1,1), (1,2)))
 
+    checkAnswer(
+      sql("SELECT b FROM binaryData ORDER BY a ASC"),
+      (1 to 5).map(Row(_)).toSeq)
+
+    checkAnswer(
+      sql("SELECT b FROM binaryData ORDER BY a DESC"),
+      (1 to 5).map(Row(_)).toSeq.reverse)
+
     checkAnswer(
       sql("SELECT * FROM arrayData ORDER BY data[0] ASC"),
       arrayData.collect().sortBy(_.data(0)).toSeq)
@@ -118,6 +238,20 @@ class SQLQuerySuite extends QueryTest {
       mapData.collect().sortBy(_.data(1)).reverse.toSeq)
   }
 
+  test("sorting") {
+    val before = externalSortEnabled
+    setConf(SQLConf.EXTERNAL_SORT, "false")
+    sortTest()
+    setConf(SQLConf.EXTERNAL_SORT, before.toString)
+  }
+
+  test("external sorting") {
+    val before = externalSortEnabled
+    setConf(SQLConf.EXTERNAL_SORT, "true")
+    sortTest()
+    setConf(SQLConf.EXTERNAL_SORT, before.toString)
+  }
+
   test("limit") {
     checkAnswer(
       sql("SELECT * FROM testData LIMIT 10"),
@@ -168,14 +302,13 @@ class SQLQuerySuite extends QueryTest {
       3)
   }
 
-  // No support for primitive nulls yet.
-  ignore("null count") {
+  test("null count") {
     checkAnswer(
-      sql("SELECT a, COUNT(b) FROM testData3"),
-      Seq((1,0), (2, 1)))
+      sql("SELECT a, COUNT(b) FROM testData3 GROUP BY a"),
+      Seq((1, 0), (2, 1)))
 
     checkAnswer(
-      testData3.groupBy()(Count('a), Count('b), Count(1), CountDistinct('a :: Nil), CountDistinct('b :: Nil)),
+      sql("SELECT COUNT(a), COUNT(b), COUNT(1), COUNT(DISTINCT a), COUNT(DISTINCT b) FROM testData3"),
       (2, 1, 2, 2, 1) :: Nil)
   }
 
@@ -291,6 +424,23 @@ class SQLQuerySuite extends QueryTest {
       (null, null, 6, "F") :: Nil)
   }
 
+  test("SPARK-3349 partitioning after limit") {
+    sql("SELECT DISTINCT n FROM lowerCaseData ORDER BY n DESC")
+      .limit(2)
+      .registerTempTable("subset1")
+    sql("SELECT DISTINCT n FROM lowerCaseData")
+      .limit(2)
+      .registerTempTable("subset2")
+    checkAnswer(
+      sql("SELECT * FROM lowerCaseData INNER JOIN subset1 ON subset1.n = lowerCaseData.n"),
+      (3, "c", 3) ::
+      (4, "d", 4) :: Nil)
+    checkAnswer(
+      sql("SELECT * FROM lowerCaseData INNER JOIN subset2 ON subset2.n = lowerCaseData.n"),
+      (1, "a", 1) ::
+      (2, "b", 2) :: Nil)
+  }
+
   test("mixed-case keywords") {
     checkAnswer(
       sql(
@@ -371,21 +521,51 @@ class SQLQuerySuite extends QueryTest {
         (3, null)))
   }
 
-  test("EXCEPT") {
+  test("UNION") {
+    checkAnswer(
+      sql("SELECT * FROM lowerCaseData UNION SELECT * FROM upperCaseData"),
+      (1, "A") :: (1, "a") :: (2, "B") :: (2, "b") :: (3, "C") :: (3, "c") ::
+      (4, "D") :: (4, "d") :: (5, "E") :: (6, "F") :: Nil)
+    checkAnswer(
+      sql("SELECT * FROM lowerCaseData UNION SELECT * FROM lowerCaseData"),
+      (1, "a") :: (2, "b") :: (3, "c") :: (4, "d") :: Nil)
+    checkAnswer(
+      sql("SELECT * FROM lowerCaseData UNION ALL SELECT * FROM lowerCaseData"),
+      (1, "a") :: (1, "a") :: (2, "b") :: (2, "b") :: (3, "c") :: (3, "c") ::
+      (4, "d") :: (4, "d") :: Nil)
+  }
+
+  test("UNION with column mismatches") {
+    // Column name mismatches are allowed.
+    checkAnswer(
+      sql("SELECT n,l FROM lowerCaseData UNION SELECT N as x1, L as x2 FROM upperCaseData"),
+      (1, "A") :: (1, "a") :: (2, "B") :: (2, "b") :: (3, "C") :: (3, "c") ::
+      (4, "D") :: (4, "d") :: (5, "E") :: (6, "F") :: Nil)
+    // Column type mismatches are not allowed, forcing a type coercion.
+    checkAnswer(
+      sql("SELECT n FROM lowerCaseData UNION SELECT L FROM upperCaseData"),
+      ("1" :: "2" :: "3" :: "4" :: "A" :: "B" :: "C" :: "D" :: "E" :: "F" :: Nil).map(Tuple1(_)))
+    // Column type mismatches where a coercion is not possible, in this case between integer
+    // and array types, trigger a TreeNodeException.
+    intercept[TreeNodeException[_]] {
+      sql("SELECT data FROM arrayData UNION SELECT 1 FROM arrayData").collect()
+    }
+  }
 
+  test("EXCEPT") {
     checkAnswer(
-      sql("SELECT * FROM lowerCaseData EXCEPT SELECT * FROM upperCaseData "),
+      sql("SELECT * FROM lowerCaseData EXCEPT SELECT * FROM upperCaseData"),
       (1, "a") ::
       (2, "b") ::
       (3, "c") ::
       (4, "d") :: Nil)
     checkAnswer(
-      sql("SELECT * FROM lowerCaseData EXCEPT SELECT * FROM lowerCaseData "), Nil)
+      sql("SELECT * FROM lowerCaseData EXCEPT SELECT * FROM lowerCaseData"), Nil)
     checkAnswer(
-      sql("SELECT * FROM upperCaseData EXCEPT SELECT * FROM upperCaseData "), Nil)
+      sql("SELECT * FROM upperCaseData EXCEPT SELECT * FROM upperCaseData"), Nil)
   }
 
- test("INTERSECT") {
+  test("INTERSECT") {
     checkAnswer(
       sql("SELECT * FROM lowerCaseData INTERSECT SELECT * FROM lowerCaseData"),
       (1, "a") ::
@@ -397,40 +577,400 @@ class SQLQuerySuite extends QueryTest {
   }
 
   test("SET commands semantics using sql()") {
-    TestSQLContext.settings.synchronized {
-      clear()
-      val testKey = "test.key.0"
-      val testVal = "test.val.0"
-      val nonexistentKey = "nonexistent"
-
-      // "set" itself returns all config variables currently specified in SQLConf.
-      assert(sql("SET").collect().size == 0)
-
-      // "set key=val"
-      sql(s"SET $testKey=$testVal")
-      checkAnswer(
-        sql("SET"),
-        Seq(Seq(testKey, testVal))
-      )
-
-      sql(s"SET ${testKey + testKey}=${testVal + testVal}")
-      checkAnswer(
-        sql("set"),
-        Seq(
-          Seq(testKey, testVal),
-          Seq(testKey + testKey, testVal + testVal))
-      )
-
-      // "set key"
-      checkAnswer(
-        sql(s"SET $testKey"),
-        Seq(Seq(testKey, testVal))
-      )
-      checkAnswer(
-        sql(s"SET $nonexistentKey"),
-        Seq(Seq(nonexistentKey, "<undefined>"))
-      )
-      clear()
+    clear()
+    val testKey = "test.key.0"
+    val testVal = "test.val.0"
+    val nonexistentKey = "nonexistent"
+
+    // "set" itself returns all config variables currently specified in SQLConf.
+    assert(sql("SET").collect().size == 0)
+
+    // "set key=val"
+    sql(s"SET $testKey=$testVal")
+    checkAnswer(
+      sql("SET"),
+      Seq(Seq(s"$testKey=$testVal"))
+    )
+
+    sql(s"SET ${testKey + testKey}=${testVal + testVal}")
+    checkAnswer(
+      sql("set"),
+      Seq(
+        Seq(s"$testKey=$testVal"),
+        Seq(s"${testKey + testKey}=${testVal + testVal}"))
+    )
+
+    // "set key"
+    checkAnswer(
+      sql(s"SET $testKey"),
+      Seq(Seq(s"$testKey=$testVal"))
+    )
+    checkAnswer(
+      sql(s"SET $nonexistentKey"),
+      Seq(Seq(s"$nonexistentKey=<undefined>"))
+    )
+    clear()
+  }
+
+  test("apply schema") {
+    val schema1 = StructType(
+      StructField("f1", IntegerType, false) ::
+      StructField("f2", StringType, false) ::
+      StructField("f3", BooleanType, false) ::
+      StructField("f4", IntegerType, true) :: Nil)
+
+    val rowRDD1 = unparsedStrings.map { r =>
+      val values = r.split(",").map(_.trim)
+      val v4 = try values(3).toInt catch {
+        case _: NumberFormatException => null
+      }
+      Row(values(0).toInt, values(1), values(2).toBoolean, v4)
+    }
+
+    val schemaRDD1 = applySchema(rowRDD1, schema1)
+    schemaRDD1.registerTempTable("applySchema1")
+    checkAnswer(
+      sql("SELECT * FROM applySchema1"),
+      (1, "A1", true, null) ::
+      (2, "B2", false, null) ::
+      (3, "C3", true, null) ::
+      (4, "D4", true, 2147483644) :: Nil)
+
+    checkAnswer(
+      sql("SELECT f1, f4 FROM applySchema1"),
+      (1, null) ::
+      (2, null) ::
+      (3, null) ::
+      (4, 2147483644) :: Nil)
+
+    val schema2 = StructType(
+      StructField("f1", StructType(
+        StructField("f11", IntegerType, false) ::
+        StructField("f12", BooleanType, false) :: Nil), false) ::
+      StructField("f2", MapType(StringType, IntegerType, true), false) :: Nil)
+
+    val rowRDD2 = unparsedStrings.map { r =>
+      val values = r.split(",").map(_.trim)
+      val v4 = try values(3).toInt catch {
+        case _: NumberFormatException => null
+      }
+      Row(Row(values(0).toInt, values(2).toBoolean), Map(values(1) -> v4))
     }
+
+    val schemaRDD2 = applySchema(rowRDD2, schema2)
+    schemaRDD2.registerTempTable("applySchema2")
+    checkAnswer(
+      sql("SELECT * FROM applySchema2"),
+      (Seq(1, true), Map("A1" -> null)) ::
+      (Seq(2, false), Map("B2" -> null)) ::
+      (Seq(3, true), Map("C3" -> null)) ::
+      (Seq(4, true), Map("D4" -> 2147483644)) :: Nil)
+
+    checkAnswer(
+      sql("SELECT f1.f11, f2['D4'] FROM applySchema2"),
+      (1, null) ::
+      (2, null) ::
+      (3, null) ::
+      (4, 2147483644) :: Nil)
+
+    // The value of a MapType column can be a mutable map.
+    val rowRDD3 = unparsedStrings.map { r =>
+      val values = r.split(",").map(_.trim)
+      val v4 = try values(3).toInt catch {
+        case _: NumberFormatException => null
+      }
+      Row(Row(values(0).toInt, values(2).toBoolean), scala.collection.mutable.Map(values(1) -> v4))
+    }
+
+    val schemaRDD3 = applySchema(rowRDD3, schema2)
+    schemaRDD3.registerTempTable("applySchema3")
+
+    checkAnswer(
+      sql("SELECT f1.f11, f2['D4'] FROM applySchema3"),
+      (1, null) ::
+      (2, null) ::
+      (3, null) ::
+      (4, 2147483644) :: Nil)
+  }
+
+  test("SPARK-3423 BETWEEN") {
+    checkAnswer(
+      sql("SELECT key, value FROM testData WHERE key BETWEEN 5 and 7"),
+      Seq((5, "5"), (6, "6"), (7, "7"))
+    )
+
+    checkAnswer(
+      sql("SELECT key, value FROM testData WHERE key BETWEEN 7 and 7"),
+      Seq((7, "7"))
+    )
+
+    checkAnswer(
+      sql("SELECT key, value FROM testData WHERE key BETWEEN 9 and 7"),
+      Seq()
+    )
+  }
+
+  test("cast boolean to string") {
+    // TODO Ensure true/false string letter casing is consistent with Hive in all cases.
+    checkAnswer(
+      sql("SELECT CAST(TRUE AS STRING), CAST(FALSE AS STRING) FROM testData LIMIT 1"),
+      ("true", "false") :: Nil)
+  }
+
+  test("metadata is propagated correctly") {
+    val person = sql("SELECT * FROM person")
+    val schema = person.schema
+    val docKey = "doc"
+    val docValue = "first name"
+    val metadata = new MetadataBuilder()
+      .putString(docKey, docValue)
+      .build()
+    val schemaWithMeta = new StructType(Seq(
+      schema("id"), schema("name").copy(metadata = metadata), schema("age")))
+    val personWithMeta = applySchema(person, schemaWithMeta)
+    def validateMetadata(rdd: SchemaRDD): Unit = {
+      assert(rdd.schema("name").metadata.getString(docKey) == docValue)
+    }
+    personWithMeta.registerTempTable("personWithMeta")
+    validateMetadata(personWithMeta.select('name))
+    validateMetadata(personWithMeta.select("name".attr))
+    validateMetadata(personWithMeta.select('id, 'name))
+    validateMetadata(sql("SELECT * FROM personWithMeta"))
+    validateMetadata(sql("SELECT id, name FROM personWithMeta"))
+    validateMetadata(sql("SELECT * FROM personWithMeta JOIN salary ON id = personId"))
+    validateMetadata(sql("SELECT name, salary FROM personWithMeta JOIN salary ON id = personId"))
+  }
+
+  test("SPARK-3371 Renaming a function expression with group by gives error") {
+    registerFunction("len", (s: String) => s.length)
+    checkAnswer(
+      sql("SELECT len(value) as temp FROM testData WHERE key = 1 group by len(value)"), 1)
+  }
+
+  test("SPARK-3813 CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END") {
+    checkAnswer(
+      sql("SELECT CASE key WHEN 1 THEN 1 ELSE 0 END FROM testData WHERE key = 1 group by key"), 1)
+  }
+
+  test("SPARK-3813 CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END") {
+    checkAnswer(
+      sql("SELECT CASE WHEN key = 1 THEN 1 ELSE 2 END FROM testData WHERE key = 1 group by key"), 1)
+  }
+
+  test("throw errors for non-aggregate attributes with aggregation") {
+    def checkAggregation(query: String, isInvalidQuery: Boolean = true) {
+      val logicalPlan = sql(query).queryExecution.logical
+
+      if (isInvalidQuery) {
+        val e = intercept[TreeNodeException[LogicalPlan]](sql(query).queryExecution.analyzed)
+        assert(
+          e.getMessage.startsWith("Expression not in GROUP BY"),
+          "Non-aggregate attribute(s) not detected\n" + logicalPlan)
+      } else {
+        // Should not throw
+        sql(query).queryExecution.analyzed
+      }
+    }
+
+    checkAggregation("SELECT key, COUNT(*) FROM testData")
+    checkAggregation("SELECT COUNT(key), COUNT(*) FROM testData", false)
+
+    checkAggregation("SELECT value, COUNT(*) FROM testData GROUP BY key")
+    checkAggregation("SELECT COUNT(value), SUM(key) FROM testData GROUP BY key", false)
+
+    checkAggregation("SELECT key + 2, COUNT(*) FROM testData GROUP BY key + 1")
+    checkAggregation("SELECT key + 1 + 1, COUNT(*) FROM testData GROUP BY key + 1", false)
+  }
+
+  test("Test to check we can use Long.MinValue") {
+    checkAnswer(
+      sql(s"SELECT ${Long.MinValue} FROM testData ORDER BY key LIMIT 1"), Long.MinValue
+    )
+
+    checkAnswer(
+      sql(s"SELECT key FROM testData WHERE key > ${Long.MinValue}"), (1 to 100).map(Row(_)).toSeq
+    )
+  }
+
+  test("Floating point number format") {
+    checkAnswer(
+      sql("SELECT 0.3"), 0.3
+    )
+
+    checkAnswer(
+      sql("SELECT -0.8"), -0.8
+    )
+
+    checkAnswer(
+      sql("SELECT .5"), 0.5
+    )
+
+    checkAnswer(
+      sql("SELECT -.18"), -0.18
+    )
+  }
+
+  test("Auto cast integer type") {
+    checkAnswer(
+      sql(s"SELECT ${Int.MaxValue + 1L}"), Int.MaxValue + 1L
+    )
+
+    checkAnswer(
+      sql(s"SELECT ${Int.MinValue - 1L}"), Int.MinValue - 1L
+    )
+
+    checkAnswer(
+      sql("SELECT 9223372036854775808"), BigDecimal("9223372036854775808")
+    )
+
+    checkAnswer(
+      sql("SELECT -9223372036854775809"), BigDecimal("-9223372036854775809")
+    )
+  }
+
+  test("Test to check we can apply sign to expression") {
+
+    checkAnswer(
+      sql("SELECT -100"), -100
+    )
+
+    checkAnswer(
+      sql("SELECT +230"), 230
+    )
+
+    checkAnswer(
+      sql("SELECT -5.2"), -5.2
+    )
+
+    checkAnswer(
+      sql("SELECT +6.8"), 6.8
+    )
+
+    checkAnswer(
+      sql("SELECT -key FROM testData WHERE key = 2"), -2
+    )
+
+    checkAnswer(
+      sql("SELECT +key FROM testData WHERE key = 3"), 3
+    )
+
+    checkAnswer(
+      sql("SELECT -(key + 1) FROM testData WHERE key = 1"), -2
+    )
+
+    checkAnswer(
+      sql("SELECT - key + 1 FROM testData WHERE key = 10"), -9
+    )
+
+    checkAnswer(
+      sql("SELECT +(key + 5) FROM testData WHERE key = 5"), 10
+    )
+
+    checkAnswer(
+      sql("SELECT -MAX(key) FROM testData"), -100
+    )
+
+    checkAnswer(
+      sql("SELECT +MAX(key) FROM testData"), 100
+    )
+
+    checkAnswer(
+      sql("SELECT - (-10)"), 10
+    )
+
+    checkAnswer(
+      sql("SELECT + (-key) FROM testData WHERE key = 32"), -32
+    )
+
+    checkAnswer(
+      sql("SELECT - (+Max(key)) FROM testData"), -100
+    )
+
+    checkAnswer(
+      sql("SELECT - - 3"), 3
+    )
+
+    checkAnswer(
+      sql("SELECT - + 20"), -20
+    )
+
+    checkAnswer(
+      sql("SELEcT - + 45"), -45
+    )
+
+    checkAnswer(
+      sql("SELECT + + 100"), 100
+    )
+
+    checkAnswer(
+      sql("SELECT - - Max(key) FROM testData"), 100
+    )
+
+    checkAnswer(
+      sql("SELECT + - key FROM testData WHERE key = 33"), -33
+    )
+  }
+
+  test("Multiple join") {
+    checkAnswer(
+      sql(
+        """SELECT a.key, b.key, c.key
+          |FROM testData a
+          |JOIN testData b ON a.key = b.key
+          |JOIN testData c ON a.key = c.key
+        """.stripMargin),
+      (1 to 100).map(i => Seq(i, i, i)))
+  }
+
+  test("SPARK-3483 Special chars in column names") {
+    val data = sparkContext.parallelize(Seq("""{"key?number1": "value1", "key.number2": "value2"}"""))
+    jsonRDD(data).registerTempTable("records")
+    sql("SELECT `key?number1` FROM records")
+  }
+
+  test("SPARK-3814 Support Bitwise & operator") {
+    checkAnswer(sql("SELECT key&1 FROM testData WHERE key = 1 "), 1)
+  }
+
+  test("SPARK-3814 Support Bitwise | operator") {
+    checkAnswer(sql("SELECT key|0 FROM testData WHERE key = 1 "), 1)
+  }
+
+  test("SPARK-3814 Support Bitwise ^ operator") {
+    checkAnswer(sql("SELECT key^0 FROM testData WHERE key = 1 "), 1)
+  }
+
+  test("SPARK-3814 Support Bitwise ~ operator") {
+    checkAnswer(sql("SELECT ~key FROM testData WHERE key = 1 "), -2)
+  }
+
+  test("SPARK-4120 Join of multiple tables does not work in SparkSQL") {
+    checkAnswer(
+      sql(
+        """SELECT a.key, b.key, c.key
+          |FROM testData a,testData b,testData c
+          |where a.key = b.key and a.key = c.key
+        """.stripMargin),
+      (1 to 100).map(i => Seq(i, i, i)))
+  }
+
+  test("SPARK-4154 Query does not work if it has 'not between' in Spark SQL and HQL") {
+    checkAnswer(sql("SELECT key FROM testData WHERE key not between 0 and 10 order by key"),
+        (11 to 100).map(i => Seq(i)))
+  }
+
+  test("SPARK-4207 Query which has syntax like 'not like' is not working in Spark SQL") {
+    checkAnswer(sql("SELECT key FROM testData WHERE value not like '100%' order by key"),
+        (1 to 99).map(i => Seq(i)))
+  }
+
+  test("SPARK-4322 Grouping field with struct field as sub expression") {
+    jsonRDD(sparkContext.makeRDD("""{"a": {"b": [{"c": 1}]}}""" :: Nil)).registerTempTable("data")
+    checkAnswer(sql("SELECT a.b[0].c FROM data GROUP BY a.b[0].c"), 1)
+    dropTempTable("data")
+
+    jsonRDD(sparkContext.makeRDD("""{"a": {"b": 1}}""" :: Nil)).registerTempTable("data")
+    checkAnswer(sql("SELECT a.b + 1 FROM data GROUP BY a.b + 1"), 2)
+    dropTempTable("data")
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
index f2934da9a031d..cf3a59e545905 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.sql
 
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
 
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
 import org.scalatest.FunSuite
 
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.test.TestSQLContext._
 
 case class ReflectData(
@@ -33,6 +35,7 @@ case class ReflectData(
     byteField: Byte,
     booleanField: Boolean,
     decimalField: BigDecimal,
+    date: Date,
     timestampField: Timestamp,
     seqInt: Seq[Int])
 
@@ -56,20 +59,38 @@ case class OptionalReflectData(
 
 case class ReflectBinary(data: Array[Byte])
 
+case class Nested(i: Option[Int], s: String)
+
+case class Data(
+    array: Seq[Int],
+    arrayContainsNull: Seq[Option[Int]],
+    map: Map[Int, Long],
+    mapContainsNul: Map[Int, Option[Long]],
+    nested: Nested)
+
+case class ComplexReflectData(
+    arrayField: Seq[Int],
+    arrayFieldContainsNull: Seq[Option[Int]],
+    mapField: Map[Int, Long],
+    mapFieldContainsNull: Map[Int, Option[Long]],
+    dataField: Data)
+
 class ScalaReflectionRelationSuite extends FunSuite {
   test("query case class RDD") {
     val data = ReflectData("a", 1, 1L, 1.toFloat, 1.toDouble, 1.toShort, 1.toByte, true,
-                           BigDecimal(1), new Timestamp(12345), Seq(1,2,3))
+                           BigDecimal(1), new Date(12345), new Timestamp(12345), Seq(1,2,3))
     val rdd = sparkContext.parallelize(data :: Nil)
-    rdd.registerAsTable("reflectData")
+    rdd.registerTempTable("reflectData")
 
-    assert(sql("SELECT * FROM reflectData").collect().head === data.productIterator.toSeq)
+    assert(sql("SELECT * FROM reflectData").collect().head ===
+      Seq("a", 1, 1L, 1.toFloat, 1.toDouble, 1.toShort, 1.toByte, true,
+          BigDecimal(1), new Date(12345), new Timestamp(12345), Seq(1,2,3)))
   }
 
   test("query case class RDD with nulls") {
     val data = NullReflectData(null, null, null, null, null, null, null)
     val rdd = sparkContext.parallelize(data :: Nil)
-    rdd.registerAsTable("reflectNullData")
+    rdd.registerTempTable("reflectNullData")
 
     assert(sql("SELECT * FROM reflectNullData").collect().head === Seq.fill(7)(null))
   }
@@ -77,7 +98,7 @@ class ScalaReflectionRelationSuite extends FunSuite {
   test("query case class RDD with Nones") {
     val data = OptionalReflectData(None, None, None, None, None, None, None)
     val rdd = sparkContext.parallelize(data :: Nil)
-    rdd.registerAsTable("reflectOptionalData")
+    rdd.registerTempTable("reflectOptionalData")
 
     assert(sql("SELECT * FROM reflectOptionalData").collect().head === Seq.fill(7)(null))
   }
@@ -85,9 +106,38 @@ class ScalaReflectionRelationSuite extends FunSuite {
   // Equality is broken for Arrays, so we test that separately.
   test("query binary data") {
     val rdd = sparkContext.parallelize(ReflectBinary(Array[Byte](1)) :: Nil)
-    rdd.registerAsTable("reflectBinary")
+    rdd.registerTempTable("reflectBinary")
 
     val result = sql("SELECT data FROM reflectBinary").collect().head(0).asInstanceOf[Array[Byte]]
     assert(result.toSeq === Seq[Byte](1))
   }
+
+  test("query complex data") {
+    val data = ComplexReflectData(
+      Seq(1, 2, 3),
+      Seq(Some(1), Some(2), None),
+      Map(1 -> 10L, 2 -> 20L),
+      Map(1 -> Some(10L), 2 -> Some(20L), 3 -> None),
+      Data(
+        Seq(10, 20, 30),
+        Seq(Some(10), Some(20), None),
+        Map(10 -> 100L, 20 -> 200L),
+        Map(10 -> Some(100L), 20 -> Some(200L), 30 -> None),
+        Nested(None, "abc")))
+    val rdd = sparkContext.parallelize(data :: Nil)
+    rdd.registerTempTable("reflectComplexData")
+
+    assert(sql("SELECT * FROM reflectComplexData").collect().head ===
+      new GenericRow(Array[Any](
+        Seq(1, 2, 3),
+        Seq(1, 2, null),
+        Map(1 -> 10L, 2 -> 20L),
+        Map(1 -> 10L, 2 -> 20L, 3 -> null),
+        new GenericRow(Array[Any](
+          Seq(10, 20, 30),
+          Seq(10, 20, null),
+          Map(10 -> 100L, 20 -> 200L),
+          Map(10 -> 100L, 20 -> 200L, 30 -> null),
+          new GenericRow(Array[Any](null, "abc")))))))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
index 330b20b315d63..92b49e8155900 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
@@ -17,46 +17,59 @@
 
 package org.apache.spark.sql
 
+import java.sql.Timestamp
+
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.test._
 
 /* Implicits */
-import TestSQLContext._
+import org.apache.spark.sql.test.TestSQLContext._
 
 case class TestData(key: Int, value: String)
 
 object TestData {
-  val testData: SchemaRDD = TestSQLContext.sparkContext.parallelize(
-    (1 to 100).map(i => TestData(i, i.toString)))
-  testData.registerAsTable("testData")
+  val testData = TestSQLContext.sparkContext.parallelize(
+    (1 to 100).map(i => TestData(i, i.toString))).toSchemaRDD
+  testData.registerTempTable("testData")
 
   case class LargeAndSmallInts(a: Int, b: Int)
-  val largeAndSmallInts: SchemaRDD =
+  val largeAndSmallInts =
     TestSQLContext.sparkContext.parallelize(
       LargeAndSmallInts(2147483644, 1) ::
       LargeAndSmallInts(1, 2) ::
       LargeAndSmallInts(2147483645, 1) ::
       LargeAndSmallInts(2, 2) ::
       LargeAndSmallInts(2147483646, 1) ::
-      LargeAndSmallInts(3, 2) :: Nil)
-  largeAndSmallInts.registerAsTable("largeAndSmallInts")
-  
+      LargeAndSmallInts(3, 2) :: Nil).toSchemaRDD
+  largeAndSmallInts.registerTempTable("largeAndSmallInts")
+
   case class TestData2(a: Int, b: Int)
-  val testData2: SchemaRDD =
+  val testData2 =
     TestSQLContext.sparkContext.parallelize(
       TestData2(1, 1) ::
       TestData2(1, 2) ::
       TestData2(2, 1) ::
       TestData2(2, 2) ::
       TestData2(3, 1) ::
-      TestData2(3, 2) :: Nil)
-  testData2.registerAsTable("testData2")
+      TestData2(3, 2) :: Nil).toSchemaRDD
+  testData2.registerTempTable("testData2")
 
-  // TODO: There is no way to express null primitives as case classes currently...
+  case class BinaryData(a: Array[Byte], b: Int)
+  val binaryData =
+    TestSQLContext.sparkContext.parallelize(
+      BinaryData("12".getBytes(), 1) ::
+      BinaryData("22".getBytes(), 5) ::
+      BinaryData("122".getBytes(), 3) ::
+      BinaryData("121".getBytes(), 2) ::
+      BinaryData("123".getBytes(), 4) :: Nil).toSchemaRDD
+  binaryData.registerTempTable("binaryData")
+
+  case class TestData3(a: Int, b: Option[Int])
   val testData3 =
-    logical.LocalRelation('a.int, 'b.int).loadData(
-      (1, null) ::
-      (2, 2) :: Nil)
+    TestSQLContext.sparkContext.parallelize(
+      TestData3(1, None) ::
+      TestData3(2, Some(2)) :: Nil).toSchemaRDD
+  testData3.registerTempTable("testData3")
 
   val emptyTableData = logical.LocalRelation('a.int, 'b.int)
 
@@ -68,8 +81,8 @@ object TestData {
       UpperCaseData(3, "C") ::
       UpperCaseData(4, "D") ::
       UpperCaseData(5, "E") ::
-      UpperCaseData(6, "F") :: Nil)
-  upperCaseData.registerAsTable("upperCaseData")
+      UpperCaseData(6, "F") :: Nil).toSchemaRDD
+  upperCaseData.registerTempTable("upperCaseData")
 
   case class LowerCaseData(n: Int, l: String)
   val lowerCaseData =
@@ -77,17 +90,17 @@ object TestData {
       LowerCaseData(1, "a") ::
       LowerCaseData(2, "b") ::
       LowerCaseData(3, "c") ::
-      LowerCaseData(4, "d") :: Nil)
-  lowerCaseData.registerAsTable("lowerCaseData")
+      LowerCaseData(4, "d") :: Nil).toSchemaRDD
+  lowerCaseData.registerTempTable("lowerCaseData")
 
   case class ArrayData(data: Seq[Int], nestedData: Seq[Seq[Int]])
   val arrayData =
     TestSQLContext.sparkContext.parallelize(
       ArrayData(Seq(1,2,3), Seq(Seq(1,2,3))) ::
       ArrayData(Seq(2,3,4), Seq(Seq(2,3,4))) :: Nil)
-  arrayData.registerAsTable("arrayData")
+  arrayData.registerTempTable("arrayData")
 
-  case class MapData(data: Map[Int, String])
+  case class MapData(data: scala.collection.Map[Int, String])
   val mapData =
     TestSQLContext.sparkContext.parallelize(
       MapData(Map(1 -> "a1", 2 -> "b1", 3 -> "c1", 4 -> "d1", 5 -> "e1")) ::
@@ -95,18 +108,18 @@ object TestData {
       MapData(Map(1 -> "a3", 2 -> "b3", 3 -> "c3")) ::
       MapData(Map(1 -> "a4", 2 -> "b4")) ::
       MapData(Map(1 -> "a5")) :: Nil)
-  mapData.registerAsTable("mapData")
+  mapData.registerTempTable("mapData")
 
   case class StringData(s: String)
   val repeatedData =
     TestSQLContext.sparkContext.parallelize(List.fill(2)(StringData("test")))
-  repeatedData.registerAsTable("repeatedData")
+  repeatedData.registerTempTable("repeatedData")
 
   val nullableRepeatedData =
     TestSQLContext.sparkContext.parallelize(
       List.fill(2)(StringData(null)) ++
       List.fill(2)(StringData("test")))
-  nullableRepeatedData.registerAsTable("nullableRepeatedData")
+  nullableRepeatedData.registerTempTable("nullableRepeatedData")
 
   case class NullInts(a: Integer)
   val nullInts =
@@ -116,7 +129,15 @@ object TestData {
       NullInts(3) ::
       NullInts(null) :: Nil
     )
-  nullInts.registerAsTable("nullInts")
+  nullInts.registerTempTable("nullInts")
+
+  val allNulls =
+    TestSQLContext.sparkContext.parallelize(
+      NullInts(null) ::
+      NullInts(null) ::
+      NullInts(null) ::
+      NullInts(null) :: Nil)
+  allNulls.registerTempTable("allNulls")
 
   case class NullStrings(n: Int, s: String)
   val nullStrings =
@@ -124,8 +145,45 @@ object TestData {
       NullStrings(1, "abc") ::
       NullStrings(2, "ABC") ::
       NullStrings(3, null) :: Nil)
-  nullStrings.registerAsTable("nullStrings")
+  nullStrings.registerTempTable("nullStrings")
 
   case class TableName(tableName: String)
-  TestSQLContext.sparkContext.parallelize(TableName("test") :: Nil).registerAsTable("tableName")
+  TestSQLContext.sparkContext.parallelize(TableName("test") :: Nil).registerTempTable("tableName")
+
+  val unparsedStrings =
+    TestSQLContext.sparkContext.parallelize(
+      "1, A1, true, null" ::
+      "2, B2, false, null" ::
+      "3, C3, true, null" ::
+      "4, D4, true, 2147483644" :: Nil)
+
+  case class TimestampField(time: Timestamp)
+  val timestamps = TestSQLContext.sparkContext.parallelize((1 to 3).map { i =>
+    TimestampField(new Timestamp(i))
+  })
+  timestamps.registerTempTable("timestamps")
+
+  case class IntField(i: Int)
+  // An RDD with 4 elements and 8 partitions
+  val withEmptyParts = TestSQLContext.sparkContext.parallelize((1 to 4).map(IntField), 8)
+  withEmptyParts.registerTempTable("withEmptyParts")
+
+  case class Person(id: Int, name: String, age: Int)
+  case class Salary(personId: Int, salary: Double)
+  val person = TestSQLContext.sparkContext.parallelize(
+    Person(0, "mike", 30) ::
+    Person(1, "jim", 20) :: Nil)
+  person.registerTempTable("person")
+  val salary = TestSQLContext.sparkContext.parallelize(
+    Salary(0, 2000.0) ::
+    Salary(1, 1000.0) :: Nil)
+  salary.registerTempTable("salary")
+
+  case class ComplexData(m: Map[Int, String], s: TestData, a: Seq[Int], b: Boolean)
+  val complexData =
+    TestSQLContext.sparkContext.parallelize(
+      ComplexData(Map(1 -> "1"), TestData(1, "1"), Seq(1), true)
+        :: ComplexData(Map(2 -> "2"), TestData(2, "2"), Seq(2), false)
+        :: Nil).toSchemaRDD
+  complexData.registerTempTable("complexData")
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
new file mode 100644
index 0000000000000..ef9b76b1e251e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.test._
+
+/* Implicits */
+import TestSQLContext._
+
+case class FunctionResult(f1: String, f2: String)
+
+class UDFSuite extends QueryTest {
+
+  test("Simple UDF") {
+    registerFunction("strLenScala", (_: String).length)
+    assert(sql("SELECT strLenScala('test')").first().getInt(0) === 4)
+  }
+
+  test("TwoArgument UDF") {
+    registerFunction("strLenScala", (_: String).length + (_:Int))
+    assert(sql("SELECT strLenScala('test', 1)").first().getInt(0) === 5)
+  }
+
+
+  test("struct UDF") {
+    registerFunction("returnStruct", (f1: String, f2: String) => FunctionResult(f1, f2))
+
+    val result=
+      sql("SELECT returnStruct('test', 'test2') as ret")
+        .select("ret.f1".attr).first().getString(0)
+    assert(result == "test")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
new file mode 100644
index 0000000000000..1806a1dd82023
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.beans.{BeanInfo, BeanProperty}
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.annotation.SQLUserDefinedType
+import org.apache.spark.sql.catalyst.types.UserDefinedType
+import org.apache.spark.sql.test.TestSQLContext._
+
+@SQLUserDefinedType(udt = classOf[MyDenseVectorUDT])
+private[sql] class MyDenseVector(val data: Array[Double]) extends Serializable {
+  override def equals(other: Any): Boolean = other match {
+    case v: MyDenseVector =>
+      java.util.Arrays.equals(this.data, v.data)
+    case _ => false
+  }
+}
+
+@BeanInfo
+private[sql] case class MyLabeledPoint(
+    @BeanProperty label: Double,
+    @BeanProperty features: MyDenseVector)
+
+private[sql] class MyDenseVectorUDT extends UserDefinedType[MyDenseVector] {
+
+  override def sqlType: DataType = ArrayType(DoubleType, containsNull = false)
+
+  override def serialize(obj: Any): Seq[Double] = {
+    obj match {
+      case features: MyDenseVector =>
+        features.data.toSeq
+    }
+  }
+
+  override def deserialize(datum: Any): MyDenseVector = {
+    datum match {
+      case data: Seq[_] =>
+        new MyDenseVector(data.asInstanceOf[Seq[Double]].toArray)
+    }
+  }
+
+  override def userClass = classOf[MyDenseVector]
+}
+
+class UserDefinedTypeSuite extends QueryTest {
+  val points = Seq(
+    MyLabeledPoint(1.0, new MyDenseVector(Array(0.1, 1.0))),
+    MyLabeledPoint(0.0, new MyDenseVector(Array(0.2, 2.0))))
+  val pointsRDD: RDD[MyLabeledPoint] = sparkContext.parallelize(points)
+
+
+  test("register user type: MyDenseVector for MyLabeledPoint") {
+    val labels: RDD[Double] = pointsRDD.select('label).map { case Row(v: Double) => v }
+    val labelsArrays: Array[Double] = labels.collect()
+    assert(labelsArrays.size === 2)
+    assert(labelsArrays.contains(1.0))
+    assert(labelsArrays.contains(0.0))
+
+    val features: RDD[MyDenseVector] =
+      pointsRDD.select('features).map { case Row(v: MyDenseVector) => v }
+    val featuresArrays: Array[MyDenseVector] = features.collect()
+    assert(featuresArrays.size === 2)
+    assert(featuresArrays.contains(new MyDenseVector(Array(0.1, 1.0))))
+    assert(featuresArrays.contains(new MyDenseVector(Array(0.2, 2.0))))
+  }
+
+  test("UDTs and UDFs") {
+    registerFunction("testType", (d: MyDenseVector) => d.isInstanceOf[MyDenseVector])
+    pointsRDD.registerTempTable("points")
+    checkAnswer(
+      sql("SELECT testType(features) from points"),
+      Seq(Row(true), Row(true)))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala
index 020baf0c7ec6f..c9012c9e47cff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.api.java
 
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+
 import scala.beans.BeanProperty
 
 import org.scalatest.FunSuite
@@ -45,6 +47,9 @@ class AllTypesBean extends Serializable {
   @BeanProperty var shortField: java.lang.Short = _
   @BeanProperty var byteField: java.lang.Byte = _
   @BeanProperty var booleanField: java.lang.Boolean = _
+  @BeanProperty var dateField: java.sql.Date = _
+  @BeanProperty var timestampField: java.sql.Timestamp = _
+  @BeanProperty var bigDecimalField: java.math.BigDecimal = _
 }
 
 class JavaSQLSuite extends FunSuite {
@@ -59,7 +64,7 @@ class JavaSQLSuite extends FunSuite {
     val rdd = javaCtx.parallelize(person :: Nil)
     val schemaRDD = javaSqlCtx.applySchema(rdd, classOf[PersonBean])
 
-    schemaRDD.registerAsTable("people")
+    schemaRDD.registerTempTable("people")
     javaSqlCtx.sql("SELECT * FROM people").collect()
   }
 
@@ -73,19 +78,46 @@ class JavaSQLSuite extends FunSuite {
     bean.setShortField(0.toShort)
     bean.setByteField(0.toByte)
     bean.setBooleanField(false)
+    bean.setDateField(java.sql.Date.valueOf("2014-10-10"))
+    bean.setTimestampField(java.sql.Timestamp.valueOf("2014-10-10 00:00:00.0"))
+    bean.setBigDecimalField(new java.math.BigDecimal(0))
 
     val rdd = javaCtx.parallelize(bean :: Nil)
     val schemaRDD = javaSqlCtx.applySchema(rdd, classOf[AllTypesBean])
-    schemaRDD.registerAsTable("allTypes")
+    schemaRDD.registerTempTable("allTypes")
 
     assert(
       javaSqlCtx.sql(
         """
           |SELECT stringField, intField, longField, floatField, doubleField, shortField, byteField,
-          |       booleanField
+          |       booleanField, dateField, timestampField, bigDecimalField
           |FROM allTypes
         """.stripMargin).collect.head.row ===
-      Seq("", 0, 0L, 0F, 0.0, 0.toShort, 0.toByte, false))
+      Seq("", 0, 0L, 0F, 0.0, 0.toShort, 0.toByte, false, java.sql.Date.valueOf("2014-10-10"),
+        java.sql.Timestamp.valueOf("2014-10-10 00:00:00.0"), scala.math.BigDecimal(0)))
+  }
+
+  test("decimal types in JavaBeans") {
+    val bean = new AllTypesBean
+    bean.setStringField("")
+    bean.setIntField(0)
+    bean.setLongField(0)
+    bean.setFloatField(0.0F)
+    bean.setDoubleField(0.0)
+    bean.setShortField(0.toShort)
+    bean.setByteField(0.toByte)
+    bean.setBooleanField(false)
+    bean.setDateField(java.sql.Date.valueOf("2014-10-10"))
+    bean.setTimestampField(java.sql.Timestamp.valueOf("2014-10-10 00:00:00.0"))
+    bean.setBigDecimalField(new java.math.BigDecimal(0))
+
+    val rdd = javaCtx.parallelize(bean :: Nil)
+    val schemaRDD = javaSqlCtx.applySchema(rdd, classOf[AllTypesBean])
+    schemaRDD.registerTempTable("decimalTypes")
+
+    assert(javaSqlCtx.sql(
+      "select bigDecimalField + bigDecimalField from decimalTypes"
+    ).collect.head.row === Seq(scala.math.BigDecimal(0)))
   }
 
   test("all types null in JavaBeans") {
@@ -98,19 +130,22 @@ class JavaSQLSuite extends FunSuite {
     bean.setShortField(null)
     bean.setByteField(null)
     bean.setBooleanField(null)
+    bean.setDateField(null)
+    bean.setTimestampField(null)
+    bean.setBigDecimalField(null)
 
     val rdd = javaCtx.parallelize(bean :: Nil)
     val schemaRDD = javaSqlCtx.applySchema(rdd, classOf[AllTypesBean])
-    schemaRDD.registerAsTable("allTypes")
+    schemaRDD.registerTempTable("allTypes")
 
     assert(
       javaSqlCtx.sql(
         """
           |SELECT stringField, intField, longField, floatField, doubleField, shortField, byteField,
-          |       booleanField
+          |       booleanField, dateField, timestampField, bigDecimalField
           |FROM allTypes
         """.stripMargin).collect.head.row ===
-        Seq.fill(8)(null))
+        Seq.fill(11)(null))
   }
 
   test("loads JSON datasets") {
@@ -127,7 +162,7 @@ class JavaSQLSuite extends FunSuite {
 
     var schemaRDD = javaSqlCtx.jsonRDD(rdd)
 
-    schemaRDD.registerAsTable("jsonTable1")
+    schemaRDD.registerTempTable("jsonTable1")
 
     assert(
       javaSqlCtx.sql("select * from jsonTable1").collect.head.row ===
@@ -144,7 +179,7 @@ class JavaSQLSuite extends FunSuite {
     rdd.saveAsTextFile(path)
     schemaRDD = javaSqlCtx.jsonFile(path)
 
-    schemaRDD.registerAsTable("jsonTable2")
+    schemaRDD.registerTempTable("jsonTable2")
 
     assert(
       javaSqlCtx.sql("select * from jsonTable2").collect.head.row ===
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala
new file mode 100644
index 0000000000000..62fe59dd345d7
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.sql.{DataType => SDataType, StructField => SStructField, StructType => SStructType}
+import org.apache.spark.sql.types.util.DataTypeConversions._
+
+class ScalaSideDataTypeConversionSuite extends FunSuite {
+
+  def checkDataType(scalaDataType: SDataType) {
+    val javaDataType = asJavaDataType(scalaDataType)
+    val actual = asScalaDataType(javaDataType)
+    assert(scalaDataType === actual, s"Converted data type ${actual} " +
+      s"does not equal the expected data type ${scalaDataType}")
+  }
+
+  test("convert data types") {
+    // Simple DataTypes.
+    checkDataType(org.apache.spark.sql.StringType)
+    checkDataType(org.apache.spark.sql.BinaryType)
+    checkDataType(org.apache.spark.sql.BooleanType)
+    checkDataType(org.apache.spark.sql.DateType)
+    checkDataType(org.apache.spark.sql.TimestampType)
+    checkDataType(org.apache.spark.sql.DecimalType.Unlimited)
+    checkDataType(org.apache.spark.sql.DoubleType)
+    checkDataType(org.apache.spark.sql.FloatType)
+    checkDataType(org.apache.spark.sql.ByteType)
+    checkDataType(org.apache.spark.sql.IntegerType)
+    checkDataType(org.apache.spark.sql.LongType)
+    checkDataType(org.apache.spark.sql.ShortType)
+
+    // Simple ArrayType.
+    val simpleScalaArrayType =
+      org.apache.spark.sql.ArrayType(org.apache.spark.sql.StringType, true)
+    checkDataType(simpleScalaArrayType)
+
+    // Simple MapType.
+    val simpleScalaMapType =
+      org.apache.spark.sql.MapType(org.apache.spark.sql.StringType, org.apache.spark.sql.LongType)
+    checkDataType(simpleScalaMapType)
+
+    // Simple StructType.
+    val simpleScalaStructType = SStructType(
+      SStructField("a", org.apache.spark.sql.DecimalType.Unlimited, false) ::
+      SStructField("b", org.apache.spark.sql.BooleanType, true) ::
+      SStructField("c", org.apache.spark.sql.LongType, true) ::
+      SStructField("d", org.apache.spark.sql.BinaryType, false) :: Nil)
+    checkDataType(simpleScalaStructType)
+
+    // Complex StructType.
+    val metadata = new MetadataBuilder()
+      .putString("name", "age")
+      .build()
+    val complexScalaStructType = SStructType(
+      SStructField("simpleArray", simpleScalaArrayType, true) ::
+      SStructField("simpleMap", simpleScalaMapType, true) ::
+      SStructField("simpleStruct", simpleScalaStructType, true) ::
+      SStructField("boolean", org.apache.spark.sql.BooleanType, false) ::
+      SStructField("withMeta", org.apache.spark.sql.DoubleType, false, metadata) :: Nil)
+    checkDataType(complexScalaStructType)
+
+    // Complex ArrayType.
+    val complexScalaArrayType =
+      org.apache.spark.sql.ArrayType(complexScalaStructType, true)
+    checkDataType(complexScalaArrayType)
+
+    // Complex MapType.
+    val complexScalaMapType =
+      org.apache.spark.sql.MapType(complexScalaStructType, complexScalaArrayType, false)
+    checkDataType(complexScalaMapType)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnStatsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnStatsSuite.scala
index 6f0d46d816266..a9f0851f8826c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnStatsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnStatsSuite.scala
@@ -19,28 +19,31 @@ package org.apache.spark.sql.columnar
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.catalyst.types._
 
 class ColumnStatsSuite extends FunSuite {
-  testColumnStats(classOf[BooleanColumnStats], BOOLEAN)
-  testColumnStats(classOf[ByteColumnStats],    BYTE)
-  testColumnStats(classOf[ShortColumnStats],   SHORT)
-  testColumnStats(classOf[IntColumnStats],     INT)
-  testColumnStats(classOf[LongColumnStats],    LONG)
-  testColumnStats(classOf[FloatColumnStats],   FLOAT)
-  testColumnStats(classOf[DoubleColumnStats],  DOUBLE)
-  testColumnStats(classOf[StringColumnStats],  STRING)
-
-  def testColumnStats[T <: NativeType, U <: NativeColumnStats[T]](
+  testColumnStats(classOf[ByteColumnStats], BYTE, Row(Byte.MaxValue, Byte.MinValue, 0))
+  testColumnStats(classOf[ShortColumnStats], SHORT, Row(Short.MaxValue, Short.MinValue, 0))
+  testColumnStats(classOf[IntColumnStats], INT, Row(Int.MaxValue, Int.MinValue, 0))
+  testColumnStats(classOf[LongColumnStats], LONG, Row(Long.MaxValue, Long.MinValue, 0))
+  testColumnStats(classOf[FloatColumnStats], FLOAT, Row(Float.MaxValue, Float.MinValue, 0))
+  testColumnStats(classOf[DoubleColumnStats], DOUBLE, Row(Double.MaxValue, Double.MinValue, 0))
+  testColumnStats(classOf[StringColumnStats], STRING, Row(null, null, 0))
+  testColumnStats(classOf[DateColumnStats], DATE, Row(null, null, 0))
+  testColumnStats(classOf[TimestampColumnStats], TIMESTAMP, Row(null, null, 0))
+
+  def testColumnStats[T <: NativeType, U <: ColumnStats](
       columnStatsClass: Class[U],
-      columnType: NativeColumnType[T]) {
+      columnType: NativeColumnType[T],
+      initialStatistics: Row): Unit = {
 
     val columnStatsName = columnStatsClass.getSimpleName
 
     test(s"$columnStatsName: empty") {
       val columnStats = columnStatsClass.newInstance()
-      assertResult(columnStats.initialBounds, "Wrong initial bounds") {
-        (columnStats.lowerBound, columnStats.upperBound)
+      columnStats.collectedStatistics.zip(initialStatistics).foreach { case (actual, expected) =>
+        assert(actual === expected)
       }
     }
 
@@ -48,14 +51,22 @@ class ColumnStatsSuite extends FunSuite {
       import ColumnarTestUtils._
 
       val columnStats = columnStatsClass.newInstance()
-      val rows = Seq.fill(10)(makeRandomRow(columnType))
+      val rows = Seq.fill(10)(makeRandomRow(columnType)) ++ Seq.fill(10)(makeNullRow(1))
       rows.foreach(columnStats.gatherStats(_, 0))
 
-      val values = rows.map(_.head.asInstanceOf[T#JvmType])
+      val values = rows.take(10).map(_.head.asInstanceOf[T#JvmType])
       val ordering = columnType.dataType.ordering.asInstanceOf[Ordering[T#JvmType]]
+      val stats = columnStats.collectedStatistics
 
-      assertResult(values.min(ordering), "Wrong lower bound")(columnStats.lowerBound)
-      assertResult(values.max(ordering), "Wrong upper bound")(columnStats.upperBound)
+      assertResult(values.min(ordering), "Wrong lower bound")(stats(0))
+      assertResult(values.max(ordering), "Wrong upper bound")(stats(1))
+      assertResult(10, "Wrong null count")(stats(2))
+      assertResult(20, "Wrong row count")(stats(3))
+      assertResult(stats(4), "Wrong size in bytes") {
+        rows.map { row =>
+          if (row.isNullAt(0)) 4 else columnType.actualSize(row, 0)
+        }.sum
+      }
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala
index 314b7d317ed75..3f3f35d50188b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala
@@ -18,10 +18,12 @@
 package org.apache.spark.sql.columnar
 
 import java.nio.ByteBuffer
+import java.sql.{Date, Timestamp}
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.sql.Logging
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
 import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.columnar.ColumnarTestUtils._
 import org.apache.spark.sql.execution.SparkSqlSerializer
@@ -31,8 +33,8 @@ class ColumnTypeSuite extends FunSuite with Logging {
 
   test("defaultSize") {
     val checks = Map(
-      INT -> 4, SHORT -> 2, LONG -> 8, BYTE -> 1, DOUBLE -> 8, FLOAT -> 4,
-      BOOLEAN -> 1, STRING -> 8, BINARY -> 16, GENERIC -> 16)
+      INT -> 4, SHORT -> 2, LONG -> 8, BYTE -> 1, DOUBLE -> 8, FLOAT -> 4, BOOLEAN -> 1,
+      STRING -> 8, DATE -> 8, TIMESTAMP -> 12, BINARY -> 16, GENERIC -> 16)
 
     checks.foreach { case (columnType, expectedSize) =>
       assertResult(expectedSize, s"Wrong defaultSize for $columnType") {
@@ -45,21 +47,25 @@ class ColumnTypeSuite extends FunSuite with Logging {
     def checkActualSize[T <: DataType, JvmType](
         columnType: ColumnType[T, JvmType],
         value: JvmType,
-        expected: Int) {
+        expected: Int): Unit = {
 
       assertResult(expected, s"Wrong actualSize for $columnType") {
-        columnType.actualSize(value)
+        val row = new GenericMutableRow(1)
+        columnType.setField(row, 0, value)
+        columnType.actualSize(row, 0)
       }
     }
 
-    checkActualSize(INT,     Int.MaxValue,    4)
-    checkActualSize(SHORT,   Short.MaxValue,  2)
-    checkActualSize(LONG,    Long.MaxValue,   8)
-    checkActualSize(BYTE,    Byte.MaxValue,   1)
-    checkActualSize(DOUBLE,  Double.MaxValue, 8)
-    checkActualSize(FLOAT,   Float.MaxValue,  4)
-    checkActualSize(BOOLEAN, true,            1)
-    checkActualSize(STRING,  "hello",         4 + "hello".getBytes("utf-8").length)
+    checkActualSize(INT,       Int.MaxValue,      4)
+    checkActualSize(SHORT,     Short.MaxValue,    2)
+    checkActualSize(LONG,      Long.MaxValue,     8)
+    checkActualSize(BYTE,      Byte.MaxValue,     1)
+    checkActualSize(DOUBLE,    Double.MaxValue,   8)
+    checkActualSize(FLOAT,     Float.MaxValue,    4)
+    checkActualSize(BOOLEAN,   true,              1)
+    checkActualSize(STRING,    "hello",           4 + "hello".getBytes("utf-8").length)
+    checkActualSize(DATE,      new Date(0L),      8)
+    checkActualSize(TIMESTAMP, new Timestamp(0L), 12)
 
     val binary = Array.fill[Byte](4)(0: Byte)
     checkActualSize(BINARY,  binary, 4 + 4)
@@ -145,7 +151,7 @@ class ColumnTypeSuite extends FunSuite with Logging {
   def testNativeColumnType[T <: NativeType](
       columnType: NativeColumnType[T],
       putter: (ByteBuffer, T#JvmType) => Unit,
-      getter: (ByteBuffer) => T#JvmType) {
+      getter: (ByteBuffer) => T#JvmType): Unit = {
 
     testColumnType[T, T#JvmType](columnType, putter, getter)
   }
@@ -153,7 +159,7 @@ class ColumnTypeSuite extends FunSuite with Logging {
   def testColumnType[T <: DataType, JvmType](
       columnType: ColumnType[T, JvmType],
       putter: (ByteBuffer, JvmType) => Unit,
-      getter: (ByteBuffer) => JvmType) {
+      getter: (ByteBuffer) => JvmType): Unit = {
 
     val buffer = ByteBuffer.allocate(DEFAULT_BUFFER_SIZE)
     val seq = (0 until 4).map(_ => makeRandomValue(columnType))
@@ -164,7 +170,7 @@ class ColumnTypeSuite extends FunSuite with Logging {
 
       buffer.rewind()
       seq.foreach { expected =>
-        logger.info("buffer = " + buffer + ", expected = " + expected)
+        logInfo("buffer = " + buffer + ", expected = " + expected)
         val extracted = columnType.extract(buffer)
         assert(
           expected === extracted,
@@ -188,17 +194,7 @@ class ColumnTypeSuite extends FunSuite with Logging {
   }
 
   private def hexDump(value: Any): String = {
-    if (value.isInstanceOf[String]) {
-      val sb = new StringBuilder()
-      for (ch <- value.asInstanceOf[String].toCharArray) {
-        sb.append(Integer.toHexString(ch & 0xffff)).append(' ')
-      }
-      if (! sb.isEmpty) sb.setLength(sb.length - 1)
-      sb.toString()
-    } else {
-      // for now ..
-      hexDump(value.toString)
-    }
+    value.toString.map(ch => Integer.toHexString(ch & 0xffff)).mkString(" ")
   }
 
   private def dumpBuffer(buff: ByteBuffer): Any = {
@@ -207,7 +203,7 @@ class ColumnTypeSuite extends FunSuite with Logging {
       val b = buff.get()
       sb.append(Integer.toHexString(b & 0xff)).append(' ')
     }
-    if (! sb.isEmpty) sb.setLength(sb.length - 1)
+    if (sb.nonEmpty) sb.setLength(sb.length - 1)
     sb.toString()
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnarTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnarTestUtils.scala
index 04bdc43d95328..a1f21219eaf2f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnarTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnarTestUtils.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.columnar
 import scala.collection.immutable.HashSet
 import scala.util.Random
 
+import java.sql.{Date, Timestamp}
+
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
 import org.apache.spark.sql.catalyst.types.{DataType, NativeType}
@@ -39,15 +41,20 @@ object ColumnarTestUtils {
     }
 
     (columnType match {
-      case BYTE    => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
-      case SHORT   => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
-      case INT     => Random.nextInt()
-      case LONG    => Random.nextLong()
-      case FLOAT   => Random.nextFloat()
-      case DOUBLE  => Random.nextDouble()
-      case STRING  => Random.nextString(Random.nextInt(32))
-      case BOOLEAN => Random.nextBoolean()
-      case BINARY  => randomBytes(Random.nextInt(32))
+      case BYTE      => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
+      case SHORT     => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
+      case INT       => Random.nextInt()
+      case LONG      => Random.nextLong()
+      case FLOAT     => Random.nextFloat()
+      case DOUBLE    => Random.nextDouble()
+      case STRING    => Random.nextString(Random.nextInt(32))
+      case BOOLEAN   => Random.nextBoolean()
+      case BINARY    => randomBytes(Random.nextInt(32))
+      case DATE      => new Date(Random.nextLong())
+      case TIMESTAMP =>
+        val timestamp = new Timestamp(Random.nextLong())
+        timestamp.setNanos(Random.nextInt(999999999))
+        timestamp
       case _ =>
         // Using a random one-element map instead of an arbitrary object
         Map(Random.nextInt() -> Random.nextString(Random.nextInt(32)))
@@ -96,5 +103,4 @@ object ColumnarTestUtils {
 
     (values, rows)
   }
-
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
index 86727b93f3659..15903d07df29a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
@@ -17,25 +17,34 @@
 
 package org.apache.spark.sql.columnar
 
-import org.apache.spark.sql.{QueryTest, TestData}
+import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.catalyst.expressions.Row
-import org.apache.spark.sql.execution.SparkLogicalPlan
-import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.test.TestSQLContext._
+import org.apache.spark.sql.{QueryTest, TestData}
+import org.apache.spark.storage.StorageLevel.MEMORY_ONLY
 
 class InMemoryColumnarQuerySuite extends QueryTest {
-  import TestData._
-  import TestSQLContext._
+  // Make sure the tables are loaded.
+  TestData
 
   test("simple columnar query") {
-    val plan = TestSQLContext.executePlan(testData.logicalPlan).executedPlan
-    val scan = InMemoryRelation(useCompression = true, plan)
+    val plan = executePlan(testData.logicalPlan).executedPlan
+    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan)
 
     checkAnswer(scan, testData.collect().toSeq)
   }
 
+  test("default size avoids broadcast") {
+    // TODO: Improve this test when we have better statistics
+    sparkContext.parallelize(1 to 10).map(i => TestData(i, i.toString)).registerTempTable("sizeTst")
+    cacheTable("sizeTst")
+    assert(
+      table("sizeTst").queryExecution.logical.statistics.sizeInBytes > autoBroadcastJoinThreshold)
+  }
+
   test("projection") {
-    val plan = TestSQLContext.executePlan(testData.select('value, 'key).logicalPlan).executedPlan
-    val scan = InMemoryRelation(useCompression = true, plan)
+    val plan = executePlan(testData.select('value, 'key).logicalPlan).executedPlan
+    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan)
 
     checkAnswer(scan, testData.collect().map {
       case Row(key: Int, value: String) => value -> key
@@ -43,8 +52,8 @@ class InMemoryColumnarQuerySuite extends QueryTest {
   }
 
   test("SPARK-1436 regression: in-memory columns must be able to be accessed multiple times") {
-    val plan = TestSQLContext.executePlan(testData.logicalPlan).executedPlan
-    val scan = InMemoryRelation(useCompression = true, plan)
+    val plan = executePlan(testData.logicalPlan).executedPlan
+    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan)
 
     checkAnswer(scan, testData.collect().toSeq)
     checkAnswer(scan, testData.collect().toSeq)
@@ -55,7 +64,7 @@ class InMemoryColumnarQuerySuite extends QueryTest {
       sql("SELECT * FROM repeatedData"),
       repeatedData.collect().toSeq)
 
-    TestSQLContext.cacheTable("repeatedData")
+    cacheTable("repeatedData")
 
     checkAnswer(
       sql("SELECT * FROM repeatedData"),
@@ -67,10 +76,41 @@ class InMemoryColumnarQuerySuite extends QueryTest {
       sql("SELECT * FROM nullableRepeatedData"),
       nullableRepeatedData.collect().toSeq)
 
-    TestSQLContext.cacheTable("nullableRepeatedData")
+    cacheTable("nullableRepeatedData")
 
     checkAnswer(
       sql("SELECT * FROM nullableRepeatedData"),
       nullableRepeatedData.collect().toSeq)
   }
+
+  test("SPARK-2729 regression: timestamp data type") {
+    checkAnswer(
+      sql("SELECT time FROM timestamps"),
+      timestamps.collect().toSeq)
+
+    cacheTable("timestamps")
+
+    checkAnswer(
+      sql("SELECT time FROM timestamps"),
+      timestamps.collect().toSeq)
+  }
+
+  test("SPARK-3320 regression: batched column buffer building should work with empty partitions") {
+    checkAnswer(
+      sql("SELECT * FROM withEmptyParts"),
+      withEmptyParts.collect().toSeq)
+
+    cacheTable("withEmptyParts")
+
+    checkAnswer(
+      sql("SELECT * FROM withEmptyParts"),
+      withEmptyParts.collect().toSeq)
+  }
+
+  test("SPARK-4182 Caching complex types") {
+    complexData.cache().count()
+    // Shouldn't throw
+    complexData.count()
+    complexData.unpersist()
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnAccessorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnAccessorSuite.scala
index 35ab14cbc353d..21906e3fdcc6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnAccessorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnAccessorSuite.scala
@@ -41,11 +41,15 @@ object TestNullableColumnAccessor {
 class NullableColumnAccessorSuite extends FunSuite {
   import ColumnarTestUtils._
 
-  Seq(INT, LONG, SHORT, BOOLEAN, BYTE, STRING, DOUBLE, FLOAT, BINARY, GENERIC).foreach {
+  Seq(
+    INT, LONG, SHORT, BOOLEAN, BYTE, STRING, DOUBLE, FLOAT, BINARY, GENERIC, DATE, TIMESTAMP
+  ).foreach {
     testNullableColumnAccessor(_)
   }
 
-  def testNullableColumnAccessor[T <: DataType, JvmType](columnType: ColumnType[T, JvmType]) {
+  def testNullableColumnAccessor[T <: DataType, JvmType](
+      columnType: ColumnType[T, JvmType]): Unit = {
+
     val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
     val nullRow = makeNullRow(1)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnBuilderSuite.scala
index d8898527baa39..cb73f3da81e24 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnBuilderSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.execution.SparkSqlSerializer
 
 class TestNullableColumnBuilder[T <: DataType, JvmType](columnType: ColumnType[T, JvmType])
-  extends BasicColumnBuilder[T, JvmType](new NoopColumnStats[T, JvmType], columnType)
+  extends BasicColumnBuilder[T, JvmType](new NoopColumnStats, columnType)
   with NullableColumnBuilder
 
 object TestNullableColumnBuilder {
@@ -37,11 +37,15 @@ object TestNullableColumnBuilder {
 class NullableColumnBuilderSuite extends FunSuite {
   import ColumnarTestUtils._
 
-  Seq(INT, LONG, SHORT, BOOLEAN, BYTE, STRING, DOUBLE, FLOAT, BINARY, GENERIC).foreach {
+  Seq(
+    INT, LONG, SHORT, BOOLEAN, BYTE, STRING, DOUBLE, FLOAT, BINARY, GENERIC, DATE, TIMESTAMP
+  ).foreach {
     testNullableColumnBuilder(_)
   }
 
-  def testNullableColumnBuilder[T <: DataType, JvmType](columnType: ColumnType[T, JvmType]) {
+  def testNullableColumnBuilder[T <: DataType, JvmType](
+      columnType: ColumnType[T, JvmType]): Unit = {
+
     val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
 
     test(s"$typeName column builder: empty column") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
new file mode 100644
index 0000000000000..9ba3c210171bd
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.columnar
+
+import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.test.TestSQLContext._
+
+class PartitionBatchPruningSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAfter {
+  val originalColumnBatchSize = columnBatchSize
+  val originalInMemoryPartitionPruning = inMemoryPartitionPruning
+
+  override protected def beforeAll(): Unit = {
+    // Make a table with 5 partitions, 2 batches per partition, 10 elements per batch
+    setConf(SQLConf.COLUMN_BATCH_SIZE, "10")
+
+    val pruningData = sparkContext.makeRDD((1 to 100).map { key =>
+      val string = if (((key - 1) / 10) % 2 == 0) null else key.toString
+      TestData(key, string)
+    }, 5)
+    pruningData.registerTempTable("pruningData")
+
+    // Enable in-memory partition pruning
+    setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, "true")
+  }
+
+  override protected def afterAll(): Unit = {
+    setConf(SQLConf.COLUMN_BATCH_SIZE, originalColumnBatchSize.toString)
+    setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning.toString)
+  }
+
+  before {
+    cacheTable("pruningData")
+  }
+
+  after {
+    uncacheTable("pruningData")
+  }
+
+  // Comparisons
+  checkBatchPruning("SELECT key FROM pruningData WHERE key = 1", 1, 1)(Seq(1))
+  checkBatchPruning("SELECT key FROM pruningData WHERE 1 = key", 1, 1)(Seq(1))
+  checkBatchPruning("SELECT key FROM pruningData WHERE key < 12", 1, 2)(1 to 11)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key <= 11", 1, 2)(1 to 11)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key > 88", 1, 2)(89 to 100)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key >= 89", 1, 2)(89 to 100)
+  checkBatchPruning("SELECT key FROM pruningData WHERE 12 > key", 1, 2)(1 to 11)
+  checkBatchPruning("SELECT key FROM pruningData WHERE 11 >= key", 1, 2)(1 to 11)
+  checkBatchPruning("SELECT key FROM pruningData WHERE 88 < key", 1, 2)(89 to 100)
+  checkBatchPruning("SELECT key FROM pruningData WHERE 89 <= key", 1, 2)(89 to 100)
+
+  // IS NULL
+  checkBatchPruning("SELECT key FROM pruningData WHERE value IS NULL", 5, 5) {
+    (1 to 10) ++ (21 to 30) ++ (41 to 50) ++ (61 to 70) ++ (81 to 90)
+  }
+
+  // IS NOT NULL
+  checkBatchPruning("SELECT key FROM pruningData WHERE value IS NOT NULL", 5, 5) {
+    (11 to 20) ++ (31 to 40) ++ (51 to 60) ++ (71 to 80) ++ (91 to 100)
+  }
+
+  // Conjunction and disjunction
+  checkBatchPruning("SELECT key FROM pruningData WHERE key > 8 AND key <= 21", 2, 3)(9 to 21)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key < 2 OR key > 99", 2, 2)(Seq(1, 100))
+  checkBatchPruning("SELECT key FROM pruningData WHERE key < 2 OR (key > 78 AND key < 92)", 3, 4) {
+    Seq(1) ++ (79 to 91)
+  }
+
+  // With unsupported predicate
+  checkBatchPruning("SELECT key FROM pruningData WHERE NOT (key < 88)", 1, 2)(88 to 100)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key < 12 AND key IS NOT NULL", 1, 2)(1 to 11)
+
+  {
+    val seq = (1 to 30).mkString(", ")
+    checkBatchPruning(s"SELECT key FROM pruningData WHERE NOT (key IN ($seq))", 5, 10)(31 to 100)
+  }
+
+  def checkBatchPruning(
+      query: String,
+      expectedReadPartitions: Int,
+      expectedReadBatches: Int)(
+      expectedQueryResult: => Seq[Int]): Unit = {
+
+    test(query) {
+      val schemaRdd = sql(query)
+      assertResult(expectedQueryResult.toArray, "Wrong query result") {
+        schemaRdd.collect().map(_.head).toArray
+      }
+
+      val (readPartitions, readBatches) = schemaRdd.queryExecution.executedPlan.collect {
+        case in: InMemoryColumnarTableScan => (in.readPartitions.value, in.readBatches.value)
+      }.head
+
+      assert(readBatches === expectedReadBatches, "Wrong number of read batches")
+      assert(readPartitions === expectedReadPartitions, "Wrong number of read partitions")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/BooleanBitSetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/BooleanBitSetSuite.scala
index 5fba00480967c..d9e488e0ffd16 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/BooleanBitSetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/BooleanBitSetSuite.scala
@@ -20,7 +20,8 @@ package org.apache.spark.sql.columnar.compression
 import org.scalatest.FunSuite
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.columnar.{BOOLEAN, BooleanColumnStats}
+import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
+import org.apache.spark.sql.columnar.{NoopColumnStats, BOOLEAN}
 import org.apache.spark.sql.columnar.ColumnarTestUtils._
 
 class BooleanBitSetSuite extends FunSuite {
@@ -31,7 +32,7 @@ class BooleanBitSetSuite extends FunSuite {
     // Tests encoder
     // -------------
 
-    val builder = TestCompressibleColumnBuilder(new BooleanColumnStats, BOOLEAN, BooleanBitSet)
+    val builder = TestCompressibleColumnBuilder(new NoopColumnStats, BOOLEAN, BooleanBitSet)
     val rows = Seq.fill[Row](count)(makeRandomRow(BOOLEAN))
     val values = rows.map(_.head)
 
@@ -72,10 +73,14 @@ class BooleanBitSetSuite extends FunSuite {
     buffer.rewind().position(headerSize + 4)
 
     val decoder = BooleanBitSet.decoder(buffer, BOOLEAN)
+    val mutableRow = new GenericMutableRow(1)
     if (values.nonEmpty) {
       values.foreach {
         assert(decoder.hasNext)
-        assertResult(_, "Wrong decoded value")(decoder.next())
+        assertResult(_, "Wrong decoded value") {
+          decoder.next(mutableRow, 0)
+          mutableRow.getBoolean(0)
+        }
       }
     }
     assert(!decoder.hasNext)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala
index d8ae2a26778c9..1cdb909146d57 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala
@@ -21,6 +21,7 @@ import java.nio.ByteBuffer
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
 import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar._
 import org.apache.spark.sql.columnar.ColumnarTestUtils._
@@ -31,7 +32,7 @@ class DictionaryEncodingSuite extends FunSuite {
   testDictionaryEncoding(new StringColumnStats, STRING)
 
   def testDictionaryEncoding[T <: NativeType](
-      columnStats: NativeColumnStats[T],
+      columnStats: ColumnStats,
       columnType: NativeColumnType[T]) {
 
     val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
@@ -67,7 +68,7 @@ class DictionaryEncodingSuite extends FunSuite {
         val buffer = builder.build()
         val headerSize = CompressionScheme.columnHeaderSize(buffer)
         // 4 extra bytes for dictionary size
-        val dictionarySize = 4 + values.map(columnType.actualSize).sum
+        val dictionarySize = 4 + rows.map(columnType.actualSize(_, 0)).sum
         // 2 bytes for each `Short`
         val compressedSize = 4 + dictionarySize + 2 * inputSeq.length
         // 4 extra bytes for compression scheme type ID
@@ -97,11 +98,15 @@ class DictionaryEncodingSuite extends FunSuite {
         buffer.rewind().position(headerSize + 4)
 
         val decoder = DictionaryEncoding.decoder(buffer, columnType)
+        val mutableRow = new GenericMutableRow(1)
 
         if (inputSeq.nonEmpty) {
           inputSeq.foreach { i =>
             assert(decoder.hasNext)
-            assertResult(values(i), "Wrong decoded value")(decoder.next())
+            assertResult(values(i), "Wrong decoded value") {
+              decoder.next(mutableRow, 0)
+              columnType.getField(mutableRow, 0)
+            }
           }
         }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala
index 17619dcf974e3..73f31c0233343 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala
@@ -29,9 +29,9 @@ class IntegralDeltaSuite extends FunSuite {
   testIntegralDelta(new LongColumnStats, LONG, LongDelta)
 
   def testIntegralDelta[I <: IntegralType](
-      columnStats: NativeColumnStats[I],
+      columnStats: ColumnStats,
       columnType: NativeColumnType[I],
-      scheme: IntegralDelta[I]) {
+      scheme: CompressionScheme) {
 
     def skeleton(input: Seq[I#JvmType]) {
       // -------------
@@ -96,10 +96,15 @@ class IntegralDeltaSuite extends FunSuite {
       buffer.rewind().position(headerSize + 4)
 
       val decoder = scheme.decoder(buffer, columnType)
+      val mutableRow = new GenericMutableRow(1)
+
       if (input.nonEmpty) {
         input.foreach{
           assert(decoder.hasNext)
-          assertResult(_, "Wrong decoded value")(decoder.next())
+          assertResult(_, "Wrong decoded value") {
+            decoder.next(mutableRow, 0)
+            columnType.getField(mutableRow, 0)
+          }
         }
       }
       assert(!decoder.hasNext)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala
index 40115beb98899..4ce2552112c92 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala
@@ -19,12 +19,13 @@ package org.apache.spark.sql.columnar.compression
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
 import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar._
 import org.apache.spark.sql.columnar.ColumnarTestUtils._
 
 class RunLengthEncodingSuite extends FunSuite {
-  testRunLengthEncoding(new BooleanColumnStats, BOOLEAN)
+  testRunLengthEncoding(new NoopColumnStats, BOOLEAN)
   testRunLengthEncoding(new ByteColumnStats,    BYTE)
   testRunLengthEncoding(new ShortColumnStats,   SHORT)
   testRunLengthEncoding(new IntColumnStats,     INT)
@@ -32,7 +33,7 @@ class RunLengthEncodingSuite extends FunSuite {
   testRunLengthEncoding(new StringColumnStats,  STRING)
 
   def testRunLengthEncoding[T <: NativeType](
-      columnStats: NativeColumnStats[T],
+      columnStats: ColumnStats,
       columnType: NativeColumnType[T]) {
 
     val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
@@ -57,7 +58,7 @@ class RunLengthEncodingSuite extends FunSuite {
       // Compression scheme ID + compressed contents
       val compressedSize = 4 + inputRuns.map { case (index, _) =>
         // 4 extra bytes each run for run length
-        columnType.actualSize(values(index)) + 4
+        columnType.actualSize(rows(index), 0) + 4
       }.sum
 
       // 4 extra bytes for compression scheme type ID
@@ -80,11 +81,15 @@ class RunLengthEncodingSuite extends FunSuite {
       buffer.rewind().position(headerSize + 4)
 
       val decoder = RunLengthEncoding.decoder(buffer, columnType)
+      val mutableRow = new GenericMutableRow(1)
 
       if (inputSeq.nonEmpty) {
         inputSeq.foreach { i =>
           assert(decoder.hasNext)
-          assertResult(values(i), "Wrong decoded value")(decoder.next())
+          assertResult(values(i), "Wrong decoded value") {
+            decoder.next(mutableRow, 0)
+            columnType.getField(mutableRow, 0)
+          }
         }
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/TestCompressibleColumnBuilder.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/TestCompressibleColumnBuilder.scala
index 6d688ea95cfc0..7db723d648d80 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/TestCompressibleColumnBuilder.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/TestCompressibleColumnBuilder.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar._
 
 class TestCompressibleColumnBuilder[T <: NativeType](
-    override val columnStats: NativeColumnStats[T],
+    override val columnStats: ColumnStats,
     override val columnType: NativeColumnType[T],
     override val schemes: Seq[CompressionScheme])
   extends NativeColumnBuilder(columnStats, columnType)
@@ -33,7 +33,7 @@ class TestCompressibleColumnBuilder[T <: NativeType](
 
 object TestCompressibleColumnBuilder {
   def apply[T <: NativeType](
-      columnStats: NativeColumnStats[T],
+      columnStats: ColumnStats,
       columnType: NativeColumnType[T],
       scheme: CompressionScheme) = {
 
@@ -42,4 +42,3 @@ object TestCompressibleColumnBuilder {
     builder
   }
 }
-
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 215618e852eb2..a5af71acfc79a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -19,10 +19,11 @@ package org.apache.spark.sql.execution
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.sql.{SQLConf, execution}
 import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.execution
+import org.apache.spark.sql.execution.joins.{BroadcastHashJoin, ShuffledHashJoin}
 import org.apache.spark.sql.test.TestSQLContext._
 import org.apache.spark.sql.test.TestSQLContext.planner._
 
@@ -39,22 +40,60 @@ class PlannerSuite extends FunSuite {
 
   test("count is partially aggregated") {
     val query = testData.groupBy('value)(Count('key)).queryExecution.analyzed
-    val planned = PartialAggregation(query).head
-    val aggregations = planned.collect { case a: Aggregate => a }
+    val planned = HashAggregation(query).head
+    val aggregations = planned.collect { case n if n.nodeName contains "Aggregate" => n }
 
     assert(aggregations.size === 2)
   }
 
-  test("count distinct is not partially aggregated") {
+  test("count distinct is partially aggregated") {
     val query = testData.groupBy('value)(CountDistinct('key :: Nil)).queryExecution.analyzed
-    val planned = PartialAggregation(query)
-    assert(planned.isEmpty)
+    val planned = HashAggregation(query)
+    assert(planned.nonEmpty)
   }
 
-  test("mixed aggregates are not partially aggregated") {
+  test("mixed aggregates are partially aggregated") {
     val query =
       testData.groupBy('value)(Count('value), CountDistinct('key :: Nil)).queryExecution.analyzed
-    val planned = PartialAggregation(query)
-    assert(planned.isEmpty)
+    val planned = HashAggregation(query)
+    assert(planned.nonEmpty)
+  }
+
+  test("sizeInBytes estimation of limit operator for broadcast hash join optimization") {
+    val origThreshold = autoBroadcastJoinThreshold
+    setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD, 81920.toString)
+
+    // Using a threshold that is definitely larger than the small testing table (b) below
+    val a = testData.as('a)
+    val b = testData.limit(3).as('b)
+    val planned = a.join(b, Inner, Some("a.key".attr === "b.key".attr)).queryExecution.executedPlan
+
+    val broadcastHashJoins = planned.collect { case join: BroadcastHashJoin => join }
+    val shuffledHashJoins = planned.collect { case join: ShuffledHashJoin => join }
+
+    assert(broadcastHashJoins.size === 1, "Should use broadcast hash join")
+    assert(shuffledHashJoins.isEmpty, "Should not use shuffled hash join")
+
+    setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD, origThreshold.toString)
+  }
+
+  test("InMemoryRelation statistics propagation") {
+    val origThreshold = autoBroadcastJoinThreshold
+    setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD, 81920.toString)
+
+    testData.limit(3).registerTempTable("tiny")
+    sql("CACHE TABLE tiny")
+
+    val a = testData.as('a)
+    val b = table("tiny").as('b)
+    val planned = a.join(b, Inner, Some("a.key".attr === "b.key".attr)).queryExecution.executedPlan
+
+    val broadcastHashJoins = planned.collect { case join: BroadcastHashJoin => join }
+    val shuffledHashJoins = planned.collect { case join: ShuffledHashJoin => join }
+
+    assert(broadcastHashJoins.size === 1, "Should use broadcast hash join")
+    assert(shuffledHashJoins.isEmpty, "Should not use shuffled hash join")
+
+    setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD, origThreshold.toString)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/TgfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/TgfSuite.scala
index e55648b8ed15a..2cab5e0c44d92 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/TgfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/TgfSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.test.TestSQLContext._
  * Note: this is only a rough example of how TGFs can be expressed, the final version will likely
  * involve a lot more sugar for cleaner use in Scala/Java/etc.
  */
-case class ExampleTGF(input: Seq[Attribute] = Seq('name, 'age)) extends Generator {
+case class ExampleTGF(input: Seq[Expression] = Seq('name, 'age)) extends Generator {
   def children = input
   protected def makeOutput() = 'nameAndAge.string :: Nil
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
new file mode 100644
index 0000000000000..87c28c334d228
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.debug
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.sql.TestData._
+import org.apache.spark.sql.test.TestSQLContext._
+
+class DebuggingSuite extends FunSuite {
+  test("SchemaRDD.debug()") {
+    testData.debug()
+  }
+
+  test("SchemaRDD.typeCheck()") {
+    testData.typeCheck()
+  }
+}
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
new file mode 100644
index 0000000000000..2aad01ded1acf
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.sql.catalyst.expressions.{Projection, Row}
+import org.apache.spark.util.collection.CompactBuffer
+
+
+class HashedRelationSuite extends FunSuite {
+
+  // Key is simply the record itself
+  private val keyProjection = new Projection {
+    override def apply(row: Row): Row = row
+  }
+
+  test("GeneralHashedRelation") {
+    val data = Array(Row(0), Row(1), Row(2), Row(2))
+    val hashed = HashedRelation(data.iterator, keyProjection)
+    assert(hashed.isInstanceOf[GeneralHashedRelation])
+
+    assert(hashed.get(data(0)) == CompactBuffer[Row](data(0)))
+    assert(hashed.get(data(1)) == CompactBuffer[Row](data(1)))
+    assert(hashed.get(Row(10)) === null)
+
+    val data2 = CompactBuffer[Row](data(2))
+    data2 += data(2)
+    assert(hashed.get(data(2)) == data2)
+  }
+
+  test("UniqueKeyHashedRelation") {
+    val data = Array(Row(0), Row(1), Row(2))
+    val hashed = HashedRelation(data.iterator, keyProjection)
+    assert(hashed.isInstanceOf[UniqueKeyHashedRelation])
+
+    assert(hashed.get(data(0)) == CompactBuffer[Row](data(0)))
+    assert(hashed.get(data(1)) == CompactBuffer[Row](data(1)))
+    assert(hashed.get(data(2)) == CompactBuffer[Row](data(2)))
+    assert(hashed.get(Row(10)) === null)
+
+    val uniqHashed = hashed.asInstanceOf[UniqueKeyHashedRelation]
+    assert(uniqHashed.getValue(data(0)) == data(0))
+    assert(uniqHashed.getValue(data(1)) == data(1))
+    assert(uniqHashed.getValue(data(2)) == data(2))
+    assert(uniqHashed.getValue(Row(10)) == null)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index e765cfc83a397..f8ca2c773d9ab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -17,15 +17,15 @@
 
 package org.apache.spark.sql.json
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
-import org.apache.spark.sql.catalyst.plans.logical.LeafNode
 import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.json.JsonRDD.{enforceCorrectType, compatibleType}
-import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.{Row, SQLConf, QueryTest}
+import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext._
 
-protected case class Schema(output: Seq[Attribute]) extends LeafNode
+import java.sql.{Date, Timestamp}
 
 class JsonSuite extends QueryTest {
   import TestJsonData._
@@ -44,16 +44,35 @@ class JsonSuite extends QueryTest {
     checkTypePromotion(intNumber, enforceCorrectType(intNumber, IntegerType))
     checkTypePromotion(intNumber.toLong, enforceCorrectType(intNumber, LongType))
     checkTypePromotion(intNumber.toDouble, enforceCorrectType(intNumber, DoubleType))
-    checkTypePromotion(BigDecimal(intNumber), enforceCorrectType(intNumber, DecimalType))
+    checkTypePromotion(
+      Decimal(intNumber), enforceCorrectType(intNumber, DecimalType.Unlimited))
 
     val longNumber: Long = 9223372036854775807L
     checkTypePromotion(longNumber, enforceCorrectType(longNumber, LongType))
     checkTypePromotion(longNumber.toDouble, enforceCorrectType(longNumber, DoubleType))
-    checkTypePromotion(BigDecimal(longNumber), enforceCorrectType(longNumber, DecimalType))
+    checkTypePromotion(
+      Decimal(longNumber), enforceCorrectType(longNumber, DecimalType.Unlimited))
 
     val doubleNumber: Double = 1.7976931348623157E308d
     checkTypePromotion(doubleNumber.toDouble, enforceCorrectType(doubleNumber, DoubleType))
-    checkTypePromotion(BigDecimal(doubleNumber), enforceCorrectType(doubleNumber, DecimalType))
+    checkTypePromotion(
+      Decimal(doubleNumber), enforceCorrectType(doubleNumber, DecimalType.Unlimited))
+
+    checkTypePromotion(new Timestamp(intNumber), enforceCorrectType(intNumber, TimestampType))
+    checkTypePromotion(new Timestamp(intNumber.toLong),
+        enforceCorrectType(intNumber.toLong, TimestampType))
+    val strTime = "2014-09-30 12:34:56"
+    checkTypePromotion(Timestamp.valueOf(strTime), enforceCorrectType(strTime, TimestampType))
+
+    val strDate = "2014-10-15"
+    checkTypePromotion(Date.valueOf(strDate), enforceCorrectType(strDate, DateType))
+
+    val ISO8601Time1 = "1970-01-01T01:00:01.0Z"
+    checkTypePromotion(new Timestamp(3601000), enforceCorrectType(ISO8601Time1, TimestampType))
+    checkTypePromotion(new Date(3601000), enforceCorrectType(ISO8601Time1, DateType))
+    val ISO8601Time2 = "1970-01-01T02:00:01-01:00"
+    checkTypePromotion(new Timestamp(10801000), enforceCorrectType(ISO8601Time2, TimestampType))
+    checkTypePromotion(new Date(10801000), enforceCorrectType(ISO8601Time2, DateType))
   }
 
   test("Get compatible type") {
@@ -71,7 +90,7 @@ class JsonSuite extends QueryTest {
     checkDataType(NullType, IntegerType, IntegerType)
     checkDataType(NullType, LongType, LongType)
     checkDataType(NullType, DoubleType, DoubleType)
-    checkDataType(NullType, DecimalType, DecimalType)
+    checkDataType(NullType, DecimalType.Unlimited, DecimalType.Unlimited)
     checkDataType(NullType, StringType, StringType)
     checkDataType(NullType, ArrayType(IntegerType), ArrayType(IntegerType))
     checkDataType(NullType, StructType(Nil), StructType(Nil))
@@ -82,7 +101,7 @@ class JsonSuite extends QueryTest {
     checkDataType(BooleanType, IntegerType, StringType)
     checkDataType(BooleanType, LongType, StringType)
     checkDataType(BooleanType, DoubleType, StringType)
-    checkDataType(BooleanType, DecimalType, StringType)
+    checkDataType(BooleanType, DecimalType.Unlimited, StringType)
     checkDataType(BooleanType, StringType, StringType)
     checkDataType(BooleanType, ArrayType(IntegerType), StringType)
     checkDataType(BooleanType, StructType(Nil), StringType)
@@ -91,7 +110,7 @@ class JsonSuite extends QueryTest {
     checkDataType(IntegerType, IntegerType, IntegerType)
     checkDataType(IntegerType, LongType, LongType)
     checkDataType(IntegerType, DoubleType, DoubleType)
-    checkDataType(IntegerType, DecimalType, DecimalType)
+    checkDataType(IntegerType, DecimalType.Unlimited, DecimalType.Unlimited)
     checkDataType(IntegerType, StringType, StringType)
     checkDataType(IntegerType, ArrayType(IntegerType), StringType)
     checkDataType(IntegerType, StructType(Nil), StringType)
@@ -99,23 +118,23 @@ class JsonSuite extends QueryTest {
     // LongType
     checkDataType(LongType, LongType, LongType)
     checkDataType(LongType, DoubleType, DoubleType)
-    checkDataType(LongType, DecimalType, DecimalType)
+    checkDataType(LongType, DecimalType.Unlimited, DecimalType.Unlimited)
     checkDataType(LongType, StringType, StringType)
     checkDataType(LongType, ArrayType(IntegerType), StringType)
     checkDataType(LongType, StructType(Nil), StringType)
 
     // DoubleType
     checkDataType(DoubleType, DoubleType, DoubleType)
-    checkDataType(DoubleType, DecimalType, DecimalType)
+    checkDataType(DoubleType, DecimalType.Unlimited, DecimalType.Unlimited)
     checkDataType(DoubleType, StringType, StringType)
     checkDataType(DoubleType, ArrayType(IntegerType), StringType)
     checkDataType(DoubleType, StructType(Nil), StringType)
 
     // DoubleType
-    checkDataType(DecimalType, DecimalType, DecimalType)
-    checkDataType(DecimalType, StringType, StringType)
-    checkDataType(DecimalType, ArrayType(IntegerType), StringType)
-    checkDataType(DecimalType, StructType(Nil), StringType)
+    checkDataType(DecimalType.Unlimited, DecimalType.Unlimited, DecimalType.Unlimited)
+    checkDataType(DecimalType.Unlimited, StringType, StringType)
+    checkDataType(DecimalType.Unlimited, ArrayType(IntegerType), StringType)
+    checkDataType(DecimalType.Unlimited, StructType(Nil), StringType)
 
     // StringType
     checkDataType(StringType, StringType, StringType)
@@ -127,6 +146,18 @@ class JsonSuite extends QueryTest {
     checkDataType(ArrayType(IntegerType), ArrayType(LongType), ArrayType(LongType))
     checkDataType(ArrayType(IntegerType), ArrayType(StringType), ArrayType(StringType))
     checkDataType(ArrayType(IntegerType), StructType(Nil), StringType)
+    checkDataType(
+      ArrayType(IntegerType, true), ArrayType(IntegerType), ArrayType(IntegerType, true))
+    checkDataType(
+      ArrayType(IntegerType, true), ArrayType(IntegerType, false), ArrayType(IntegerType, true))
+    checkDataType(
+      ArrayType(IntegerType, true), ArrayType(IntegerType, true), ArrayType(IntegerType, true))
+    checkDataType(
+      ArrayType(IntegerType, false), ArrayType(IntegerType), ArrayType(IntegerType, true))
+    checkDataType(
+      ArrayType(IntegerType, false), ArrayType(IntegerType, false), ArrayType(IntegerType, false))
+    checkDataType(
+      ArrayType(IntegerType, false), ArrayType(IntegerType, true), ArrayType(IntegerType, true))
 
     // StructType
     checkDataType(StructType(Nil), StructType(Nil), StructType(Nil))
@@ -157,25 +188,25 @@ class JsonSuite extends QueryTest {
     checkDataType(
       StructType(
         StructField("f1", IntegerType, true) :: Nil),
-      DecimalType,
+      DecimalType.Unlimited,
       StringType)
   }
 
   test("Primitive field and type inferring") {
     val jsonSchemaRDD = jsonRDD(primitiveFieldAndType)
 
-    val expectedSchema =
-      AttributeReference("bigInteger", DecimalType, true)() ::
-      AttributeReference("boolean", BooleanType, true)() ::
-      AttributeReference("double", DoubleType, true)() ::
-      AttributeReference("integer", IntegerType, true)() ::
-      AttributeReference("long", LongType, true)() ::
-      AttributeReference("null", StringType, true)() ::
-      AttributeReference("string", StringType, true)() :: Nil
+    val expectedSchema = StructType(
+      StructField("bigInteger", DecimalType.Unlimited, true) ::
+      StructField("boolean", BooleanType, true) ::
+      StructField("double", DoubleType, true) ::
+      StructField("integer", IntegerType, true) ::
+      StructField("long", LongType, true) ::
+      StructField("null", StringType, true) ::
+      StructField("string", StringType, true) :: Nil)
 
-    comparePlans(Schema(expectedSchema), Schema(jsonSchemaRDD.logicalPlan.output))
+    assert(expectedSchema === jsonSchemaRDD.schema)
 
-    jsonSchemaRDD.registerAsTable("jsonTable")
+    jsonSchemaRDD.registerTempTable("jsonTable")
 
     checkAnswer(
       sql("select * from jsonTable"),
@@ -190,31 +221,33 @@ class JsonSuite extends QueryTest {
   }
 
   test("Complex field and type inferring") {
-    val jsonSchemaRDD = jsonRDD(complexFieldAndType)
-
-    val expectedSchema =
-      AttributeReference("arrayOfArray1", ArrayType(ArrayType(StringType)), true)() ::
-      AttributeReference("arrayOfArray2", ArrayType(ArrayType(DoubleType)), true)() ::
-      AttributeReference("arrayOfBigInteger", ArrayType(DecimalType), true)() ::
-      AttributeReference("arrayOfBoolean", ArrayType(BooleanType), true)() ::
-      AttributeReference("arrayOfDouble", ArrayType(DoubleType), true)() ::
-      AttributeReference("arrayOfInteger", ArrayType(IntegerType), true)() ::
-      AttributeReference("arrayOfLong", ArrayType(LongType), true)() ::
-      AttributeReference("arrayOfNull", ArrayType(StringType), true)() ::
-      AttributeReference("arrayOfString", ArrayType(StringType), true)() ::
-      AttributeReference("arrayOfStruct", ArrayType(
-        StructType(StructField("field1", BooleanType, true) ::
-                   StructField("field2", StringType, true) :: Nil)), true)() ::
-      AttributeReference("struct", StructType(
+    val jsonSchemaRDD = jsonRDD(complexFieldAndType1)
+
+    val expectedSchema = StructType(
+      StructField("arrayOfArray1", ArrayType(ArrayType(StringType, false), false), true) ::
+      StructField("arrayOfArray2", ArrayType(ArrayType(DoubleType, false), false), true) ::
+      StructField("arrayOfBigInteger", ArrayType(DecimalType.Unlimited, false), true) ::
+      StructField("arrayOfBoolean", ArrayType(BooleanType, false), true) ::
+      StructField("arrayOfDouble", ArrayType(DoubleType, false), true) ::
+      StructField("arrayOfInteger", ArrayType(IntegerType, false), true) ::
+      StructField("arrayOfLong", ArrayType(LongType, false), true) ::
+      StructField("arrayOfNull", ArrayType(StringType, true), true) ::
+      StructField("arrayOfString", ArrayType(StringType, false), true) ::
+      StructField("arrayOfStruct", ArrayType(
+        StructType(
+          StructField("field1", BooleanType, true) ::
+          StructField("field2", StringType, true) ::
+          StructField("field3", StringType, true) :: Nil), false), true) ::
+      StructField("struct", StructType(
         StructField("field1", BooleanType, true) ::
-        StructField("field2", DecimalType, true) :: Nil), true)() ::
-      AttributeReference("structWithArrayFields", StructType(
-        StructField("field1", ArrayType(IntegerType), true) ::
-        StructField("field2", ArrayType(StringType), true) :: Nil), true)() :: Nil
+        StructField("field2", DecimalType.Unlimited, true) :: Nil), true) ::
+      StructField("structWithArrayFields", StructType(
+        StructField("field1", ArrayType(IntegerType, false), true) ::
+        StructField("field2", ArrayType(StringType, false), true) :: Nil), true) :: Nil)
 
-    comparePlans(Schema(expectedSchema), Schema(jsonSchemaRDD.logicalPlan.output))
+    assert(expectedSchema === jsonSchemaRDD.schema)
 
-    jsonSchemaRDD.registerAsTable("jsonTable")
+    jsonSchemaRDD.registerTempTable("jsonTable")
 
     // Access elements of a primitive array.
     checkAnswer(
@@ -254,15 +287,19 @@ class JsonSuite extends QueryTest {
 
     // Access elements of an array of structs.
     checkAnswer(
-      sql("select arrayOfStruct[0], arrayOfStruct[1], arrayOfStruct[2] from jsonTable"),
-      (true :: "str1" :: Nil, false :: null :: Nil, null) :: Nil
+      sql("select arrayOfStruct[0], arrayOfStruct[1], arrayOfStruct[2], arrayOfStruct[3] " +
+        "from jsonTable"),
+      (true :: "str1" :: null :: Nil,
+      false :: null :: null :: Nil,
+      null :: null :: null :: Nil,
+      null) :: Nil
     )
 
     // Access a struct and fields inside of it.
     checkAnswer(
       sql("select struct, struct.field1, struct.field2 from jsonTable"),
-      (
-        Seq(true, BigDecimal("92233720368547758070")),
+      Row(
+        Row(true, BigDecimal("92233720368547758070")),
         true,
         BigDecimal("92233720368547758070")) :: Nil
     )
@@ -281,8 +318,8 @@ class JsonSuite extends QueryTest {
   }
 
   ignore("Complex field and type inferring (Ignored)") {
-    val jsonSchemaRDD = jsonRDD(complexFieldAndType)
-    jsonSchemaRDD.registerAsTable("jsonTable")
+    val jsonSchemaRDD = jsonRDD(complexFieldAndType1)
+    jsonSchemaRDD.registerTempTable("jsonTable")
 
     // Right now, "field1" and "field2" are treated as aliases. We should fix it.
     checkAnswer(
@@ -301,17 +338,17 @@ class JsonSuite extends QueryTest {
   test("Type conflict in primitive field values") {
     val jsonSchemaRDD = jsonRDD(primitiveFieldValueTypeConflict)
 
-    val expectedSchema =
-      AttributeReference("num_bool", StringType, true)() ::
-      AttributeReference("num_num_1", LongType, true)() ::
-      AttributeReference("num_num_2", DecimalType, true)() ::
-      AttributeReference("num_num_3", DoubleType, true)() ::
-      AttributeReference("num_str", StringType, true)() ::
-      AttributeReference("str_bool", StringType, true)() :: Nil
+    val expectedSchema = StructType(
+      StructField("num_bool", StringType, true) ::
+      StructField("num_num_1", LongType, true) ::
+      StructField("num_num_2", DecimalType.Unlimited, true) ::
+      StructField("num_num_3", DoubleType, true) ::
+      StructField("num_str", StringType, true) ::
+      StructField("str_bool", StringType, true) :: Nil)
 
-    comparePlans(Schema(expectedSchema), Schema(jsonSchemaRDD.logicalPlan.output))
+    assert(expectedSchema === jsonSchemaRDD.schema)
 
-    jsonSchemaRDD.registerAsTable("jsonTable")
+    jsonSchemaRDD.registerTempTable("jsonTable")
 
     checkAnswer(
       sql("select * from jsonTable"),
@@ -356,6 +393,12 @@ class JsonSuite extends QueryTest {
       92233720368547758071.2
     )
 
+    // Number and String conflict: resolve the type as number in this query.
+    checkAnswer(
+      sql("select num_str + 1.2 from jsonTable where num_str > 92233720368547758060"),
+      BigDecimal("92233720368547758061.2").toDouble
+    )
+
     // String and Boolean conflict: resolve the type as string.
     checkAnswer(
       sql("select * from jsonTable where str_bool = 'str1'"),
@@ -365,7 +408,7 @@ class JsonSuite extends QueryTest {
 
   ignore("Type conflict in primitive field values (Ignored)") {
     val jsonSchemaRDD = jsonRDD(primitiveFieldValueTypeConflict)
-    jsonSchemaRDD.registerAsTable("jsonTable")
+    jsonSchemaRDD.registerTempTable("jsonTable")
 
     // Right now, the analyzer does not promote strings in a boolean expreesion.
     // Number and Boolean conflict: resolve the type as boolean in this query.
@@ -391,13 +434,6 @@ class JsonSuite extends QueryTest {
       false
     )
 
-    // Right now, we have a parsing error.
-    // Number and String conflict: resolve the type as number in this query.
-    checkAnswer(
-      sql("select num_str + 1.2 from jsonTable where num_str > 92233720368547758060"),
-      BigDecimal("92233720368547758061.2")
-    )
-
     // The plan of the following DSL is
     // Project [(CAST(num_str#65:4, DoubleType) + 1.2) AS num#78]
     //  Filter (CAST(CAST(num_str#65:4, DoubleType), DecimalType) > 92233720368547758060)
@@ -426,17 +462,17 @@ class JsonSuite extends QueryTest {
   test("Type conflict in complex field values") {
     val jsonSchemaRDD = jsonRDD(complexFieldValueTypeConflict)
 
-    val expectedSchema =
-      AttributeReference("array", ArrayType(IntegerType), true)() ::
-      AttributeReference("num_struct", StringType, true)() ::
-      AttributeReference("str_array", StringType, true)() ::
-      AttributeReference("struct", StructType(
-        StructField("field", StringType, true) :: Nil), true)() ::
-      AttributeReference("struct_array", StringType, true)() :: Nil
+    val expectedSchema = StructType(
+      StructField("array", ArrayType(IntegerType, false), true) ::
+      StructField("num_struct", StringType, true) ::
+      StructField("str_array", StringType, true) ::
+      StructField("struct", StructType(
+        StructField("field", StringType, true) :: Nil), true) ::
+      StructField("struct_array", StringType, true) :: Nil)
 
-    comparePlans(Schema(expectedSchema), Schema(jsonSchemaRDD.logicalPlan.output))
+    assert(expectedSchema === jsonSchemaRDD.schema)
 
-    jsonSchemaRDD.registerAsTable("jsonTable")
+    jsonSchemaRDD.registerTempTable("jsonTable")
 
     checkAnswer(
       sql("select * from jsonTable"),
@@ -450,24 +486,27 @@ class JsonSuite extends QueryTest {
   test("Type conflict in array elements") {
     val jsonSchemaRDD = jsonRDD(arrayElementTypeConflict)
 
-    val expectedSchema =
-      AttributeReference("array1", ArrayType(StringType), true)() ::
-      AttributeReference("array2", ArrayType(StructType(
-        StructField("field", LongType, true) :: Nil)), true)() :: Nil
+    val expectedSchema = StructType(
+      StructField("array1", ArrayType(StringType, true), true) ::
+      StructField("array2", ArrayType(StructType(
+        StructField("field", LongType, true) :: Nil), false), true) ::
+      StructField("array3", ArrayType(StringType, false), true) :: Nil)
 
-    comparePlans(Schema(expectedSchema), Schema(jsonSchemaRDD.logicalPlan.output))
+    assert(expectedSchema === jsonSchemaRDD.schema)
 
-    jsonSchemaRDD.registerAsTable("jsonTable")
+    jsonSchemaRDD.registerTempTable("jsonTable")
 
     checkAnswer(
       sql("select * from jsonTable"),
       Seq(Seq("1", "1.1", "true", null, "[]", "{}", "[2,3,4]",
-        """{"field":str}"""), Seq(Seq(214748364700L), Seq(1))) :: Nil
+        """{"field":"str"}"""), Seq(Seq(214748364700L), Seq(1)), null) ::
+      Seq(null, null, Seq("""{"field":"str"}""", """{"field":1}""")) ::
+      Seq(null, null, Seq("1", "2", "3")) :: Nil
     )
 
     // Treat an element as a number.
     checkAnswer(
-      sql("select array1[0] + 1 from jsonTable"),
+      sql("select array1[0] + 1 from jsonTable where array1 is not null"),
       2
     )
   }
@@ -475,17 +514,17 @@ class JsonSuite extends QueryTest {
   test("Handling missing fields") {
     val jsonSchemaRDD = jsonRDD(missingFields)
 
-    val expectedSchema =
-      AttributeReference("a", BooleanType, true)() ::
-      AttributeReference("b", LongType, true)() ::
-      AttributeReference("c", ArrayType(IntegerType), true)() ::
-      AttributeReference("d", StructType(
-        StructField("field", BooleanType, true) :: Nil), true)() ::
-      AttributeReference("e", StringType, true)() :: Nil
+    val expectedSchema = StructType(
+      StructField("a", BooleanType, true) ::
+      StructField("b", LongType, true) ::
+      StructField("c", ArrayType(IntegerType, false), true) ::
+      StructField("d", StructType(
+        StructField("field", BooleanType, true) :: Nil), true) ::
+      StructField("e", StringType, true) :: Nil)
 
-    comparePlans(Schema(expectedSchema), Schema(jsonSchemaRDD.logicalPlan.output))
+    assert(expectedSchema === jsonSchemaRDD.schema)
 
-    jsonSchemaRDD.registerAsTable("jsonTable")
+    jsonSchemaRDD.registerTempTable("jsonTable")
   }
 
   test("Loading a JSON dataset from a text file") {
@@ -494,18 +533,18 @@ class JsonSuite extends QueryTest {
     primitiveFieldAndType.map(record => record.replaceAll("\n", " ")).saveAsTextFile(path)
     val jsonSchemaRDD = jsonFile(path)
 
-    val expectedSchema =
-      AttributeReference("bigInteger", DecimalType, true)() ::
-      AttributeReference("boolean", BooleanType, true)() ::
-      AttributeReference("double", DoubleType, true)() ::
-      AttributeReference("integer", IntegerType, true)() ::
-      AttributeReference("long", LongType, true)() ::
-      AttributeReference("null", StringType, true)() ::
-      AttributeReference("string", StringType, true)() :: Nil
+    val expectedSchema = StructType(
+      StructField("bigInteger", DecimalType.Unlimited, true) ::
+      StructField("boolean", BooleanType, true) ::
+      StructField("double", DoubleType, true) ::
+      StructField("integer", IntegerType, true) ::
+      StructField("long", LongType, true) ::
+      StructField("null", StringType, true) ::
+      StructField("string", StringType, true) :: Nil)
 
-    comparePlans(Schema(expectedSchema), Schema(jsonSchemaRDD.logicalPlan.output))
+    assert(expectedSchema === jsonSchemaRDD.schema)
 
-    jsonSchemaRDD.registerAsTable("jsonTable")
+    jsonSchemaRDD.registerTempTable("jsonTable")
 
     checkAnswer(
       sql("select * from jsonTable"),
@@ -518,4 +557,226 @@ class JsonSuite extends QueryTest {
       "this is a simple string.") :: Nil
     )
   }
+
+  test("Loading a JSON dataset from a text file with SQL") {
+    val file = getTempFilePath("json")
+    val path = file.toString
+    primitiveFieldAndType.map(record => record.replaceAll("\n", " ")).saveAsTextFile(path)
+
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE jsonTableSQL
+        |USING org.apache.spark.sql.json
+        |OPTIONS (
+        |  path '$path'
+        |)
+      """.stripMargin)
+
+    checkAnswer(
+      sql("select * from jsonTableSQL"),
+      (BigDecimal("92233720368547758070"),
+        true,
+        1.7976931348623157E308,
+        10,
+        21474836470L,
+        null,
+        "this is a simple string.") :: Nil
+    )
+  }
+
+  test("Applying schemas") {
+    val file = getTempFilePath("json")
+    val path = file.toString
+    primitiveFieldAndType.map(record => record.replaceAll("\n", " ")).saveAsTextFile(path)
+
+    val schema = StructType(
+      StructField("bigInteger", DecimalType.Unlimited, true) ::
+      StructField("boolean", BooleanType, true) ::
+      StructField("double", DoubleType, true) ::
+      StructField("integer", IntegerType, true) ::
+      StructField("long", LongType, true) ::
+      StructField("null", StringType, true) ::
+      StructField("string", StringType, true) :: Nil)
+
+    val jsonSchemaRDD1 = jsonFile(path, schema)
+
+    assert(schema === jsonSchemaRDD1.schema)
+
+    jsonSchemaRDD1.registerTempTable("jsonTable1")
+
+    checkAnswer(
+      sql("select * from jsonTable1"),
+      (BigDecimal("92233720368547758070"),
+      true,
+      1.7976931348623157E308,
+      10,
+      21474836470L,
+      null,
+      "this is a simple string.") :: Nil
+    )
+
+    val jsonSchemaRDD2 = jsonRDD(primitiveFieldAndType, schema)
+
+    assert(schema === jsonSchemaRDD2.schema)
+
+    jsonSchemaRDD2.registerTempTable("jsonTable2")
+
+    checkAnswer(
+      sql("select * from jsonTable2"),
+      (BigDecimal("92233720368547758070"),
+      true,
+      1.7976931348623157E308,
+      10,
+      21474836470L,
+      null,
+      "this is a simple string.") :: Nil
+    )
+  }
+
+  test("SPARK-2096 Correctly parse dot notations") {
+    val jsonSchemaRDD = jsonRDD(complexFieldAndType2)
+    jsonSchemaRDD.registerTempTable("jsonTable")
+
+    checkAnswer(
+      sql("select arrayOfStruct[0].field1, arrayOfStruct[0].field2 from jsonTable"),
+      (true, "str1") :: Nil
+    )
+    checkAnswer(
+      sql(
+        """
+          |select complexArrayOfStruct[0].field1[1].inner2[0], complexArrayOfStruct[1].field2[0][1]
+          |from jsonTable
+        """.stripMargin),
+      ("str2", 6) :: Nil
+    )
+  }
+
+  test("SPARK-3390 Complex arrays") {
+    val jsonSchemaRDD = jsonRDD(complexFieldAndType2)
+    jsonSchemaRDD.registerTempTable("jsonTable")
+
+    checkAnswer(
+      sql(
+        """
+          |select arrayOfArray1[0][0][0], arrayOfArray1[1][0][1], arrayOfArray1[1][1][0]
+          |from jsonTable
+        """.stripMargin),
+      (5, 7, 8) :: Nil
+    )
+    checkAnswer(
+      sql(
+        """
+          |select arrayOfArray2[0][0][0].inner1, arrayOfArray2[1][0],
+          |arrayOfArray2[1][1][1].inner2[0], arrayOfArray2[2][0][0].inner3[0][0].inner4
+          |from jsonTable
+        """.stripMargin),
+      ("str1", Nil, "str4", 2) :: Nil
+    )
+  }
+
+  test("SPARK-3308 Read top level JSON arrays") {
+    val jsonSchemaRDD = jsonRDD(jsonArray)
+    jsonSchemaRDD.registerTempTable("jsonTable")
+
+    checkAnswer(
+      sql(
+        """
+          |select a, b, c
+          |from jsonTable
+        """.stripMargin),
+      ("str_a_1", null, null) ::
+      ("str_a_2", null, null) ::
+      (null, "str_b_3", null) ::
+      ("str_a_4", "str_b_4", "str_c_4") :: Nil
+    )
+  }
+
+  test("Corrupt records") {
+    // Test if we can query corrupt records.
+    val oldColumnNameOfCorruptRecord = TestSQLContext.columnNameOfCorruptRecord
+    TestSQLContext.setConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD, "_unparsed")
+
+    val jsonSchemaRDD = jsonRDD(corruptRecords)
+    jsonSchemaRDD.registerTempTable("jsonTable")
+
+    val schema = StructType(
+      StructField("_unparsed", StringType, true) ::
+      StructField("a", StringType, true) ::
+      StructField("b", StringType, true) ::
+      StructField("c", StringType, true) :: Nil)
+
+    assert(schema === jsonSchemaRDD.schema)
+
+    // In HiveContext, backticks should be used to access columns starting with a underscore.
+    checkAnswer(
+      sql(
+        """
+          |SELECT a, b, c, _unparsed
+          |FROM jsonTable
+        """.stripMargin),
+      (null, null, null, "{") ::
+      (null, null, null, "") ::
+      (null, null, null, """{"a":1, b:2}""") ::
+      (null, null, null, """{"a":{, b:3}""") ::
+      ("str_a_4", "str_b_4", "str_c_4", null) ::
+      (null, null, null, "]") :: Nil
+    )
+
+    checkAnswer(
+      sql(
+        """
+          |SELECT a, b, c
+          |FROM jsonTable
+          |WHERE _unparsed IS NULL
+        """.stripMargin),
+      ("str_a_4", "str_b_4", "str_c_4") :: Nil
+    )
+
+    checkAnswer(
+      sql(
+        """
+          |SELECT _unparsed
+          |FROM jsonTable
+          |WHERE _unparsed IS NOT NULL
+        """.stripMargin),
+      Seq("{") ::
+      Seq("") ::
+      Seq("""{"a":1, b:2}""") ::
+      Seq("""{"a":{, b:3}""") ::
+      Seq("]") :: Nil
+    )
+
+    TestSQLContext.setConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD, oldColumnNameOfCorruptRecord)
+  }
+
+  test("SPARK-4068: nulls in arrays") {
+    val jsonSchemaRDD = jsonRDD(nullsInArrays)
+    jsonSchemaRDD.registerTempTable("jsonTable")
+
+    val schema = StructType(
+      StructField("field1",
+        ArrayType(ArrayType(ArrayType(ArrayType(StringType, false), false), true), false), true) ::
+      StructField("field2",
+        ArrayType(ArrayType(
+          StructType(StructField("Test", IntegerType, true) :: Nil), false), true), true) ::
+      StructField("field3",
+        ArrayType(ArrayType(
+          StructType(StructField("Test", StringType, true) :: Nil), true), false), true) ::
+      StructField("field4",
+        ArrayType(ArrayType(ArrayType(IntegerType, false), true), false), true) :: Nil)
+
+    assert(schema === jsonSchemaRDD.schema)
+
+    checkAnswer(
+      sql(
+        """
+          |SELECT field1, field2, field3, field4
+          |FROM jsonTable
+        """.stripMargin),
+      Seq(Seq(Seq(null), Seq(Seq(Seq("Test")))), null, null, null) ::
+      Seq(null, Seq(null, Seq(Seq(1))), null, null) ::
+      Seq(null, null, Seq(Seq(null), Seq(Seq("2"))), null) ::
+      Seq(null, null, null, Seq(Seq(null, Seq(1, 2, 3)))) :: Nil
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala
index d0180f3754f22..e5773a55875bc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala
@@ -32,22 +32,6 @@ object TestJsonData {
           "null":null
       }"""  :: Nil)
 
-  val complexFieldAndType =
-    TestSQLContext.sparkContext.parallelize(
-      """{"struct":{"field1": true, "field2": 92233720368547758070},
-          "structWithArrayFields":{"field1":[4, 5, 6], "field2":["str1", "str2"]},
-          "arrayOfString":["str1", "str2"],
-          "arrayOfInteger":[1, 2147483647, -2147483648],
-          "arrayOfLong":[21474836470, 9223372036854775807, -9223372036854775808],
-          "arrayOfBigInteger":[922337203685477580700, -922337203685477580800],
-          "arrayOfDouble":[1.2, 1.7976931348623157E308, 4.9E-324, 2.2250738585072014E-308],
-          "arrayOfBoolean":[true, false, true],
-          "arrayOfNull":[null, null, null, null],
-          "arrayOfStruct":[{"field1": true, "field2": "str1"}, {"field1": false}],
-          "arrayOfArray1":[[1, 2, 3], ["str1", "str2"]],
-          "arrayOfArray2":[[1, 2, 3], [1.1, 2.1, 3.1]]
-         }"""  :: Nil)
-
   val primitiveFieldValueTypeConflict =
     TestSQLContext.sparkContext.parallelize(
       """{"num_num_1":11, "num_num_2":null, "num_num_3": 1.1,
@@ -73,7 +57,9 @@ object TestJsonData {
   val arrayElementTypeConflict =
     TestSQLContext.sparkContext.parallelize(
       """{"array1": [1, 1.1, true, null, [], {}, [2,3,4], {"field":"str"}],
-          "array2": [{"field":214748364700}, {"field":1}]}""" :: Nil)
+          "array2": [{"field":214748364700}, {"field":1}]}""" ::
+      """{"array3": [{"field":"str"}, {"field":1}]}""" ::
+      """{"array3": [1, 2, 3]}""" :: Nil)
 
   val missingFields =
     TestSQLContext.sparkContext.parallelize(
@@ -82,4 +68,97 @@ object TestJsonData {
       """{"c":[33, 44]}""" ::
       """{"d":{"field":true}}""" ::
       """{"e":"str"}""" :: Nil)
+
+  val complexFieldAndType1 =
+    TestSQLContext.sparkContext.parallelize(
+      """{"struct":{"field1": true, "field2": 92233720368547758070},
+          "structWithArrayFields":{"field1":[4, 5, 6], "field2":["str1", "str2"]},
+          "arrayOfString":["str1", "str2"],
+          "arrayOfInteger":[1, 2147483647, -2147483648],
+          "arrayOfLong":[21474836470, 9223372036854775807, -9223372036854775808],
+          "arrayOfBigInteger":[922337203685477580700, -922337203685477580800],
+          "arrayOfDouble":[1.2, 1.7976931348623157E308, 4.9E-324, 2.2250738585072014E-308],
+          "arrayOfBoolean":[true, false, true],
+          "arrayOfNull":[null, null, null, null],
+          "arrayOfStruct":[{"field1": true, "field2": "str1"}, {"field1": false}, {"field3": null}],
+          "arrayOfArray1":[[1, 2, 3], ["str1", "str2"]],
+          "arrayOfArray2":[[1, 2, 3], [1.1, 2.1, 3.1]]
+         }"""  :: Nil)
+
+  val complexFieldAndType2 =
+    TestSQLContext.sparkContext.parallelize(
+      """{"arrayOfStruct":[{"field1": true, "field2": "str1"}, {"field1": false}, {"field3": null}],
+          "complexArrayOfStruct": [
+          {
+            "field1": [
+            {
+              "inner1": "str1"
+            },
+            {
+              "inner2": ["str2", "str22"]
+            }],
+            "field2": [[1, 2], [3, 4]]
+          },
+          {
+            "field1": [
+            {
+              "inner2": ["str3", "str33"]
+            },
+            {
+              "inner1": "str4"
+            }],
+            "field2": [[5, 6], [7, 8]]
+          }],
+          "arrayOfArray1": [
+          [
+            [5]
+          ],
+          [
+            [6, 7],
+            [8]
+          ]],
+          "arrayOfArray2": [
+          [
+            [
+              {
+                "inner1": "str1"
+              }
+            ]
+          ],
+          [
+            [],
+            [
+              {"inner2": ["str3", "str33"]},
+              {"inner2": ["str4"], "inner1": "str11"}
+            ]
+          ],
+          [
+            [
+              {"inner3": [[{"inner4": 2}]]}
+            ]
+          ]]
+      }""" :: Nil)
+
+  val nullsInArrays =
+    TestSQLContext.sparkContext.parallelize(
+      """{"field1":[[null], [[["Test"]]]]}""" ::
+      """{"field2":[null, [{"Test":1}]]}""" ::
+      """{"field3":[[null], [{"Test":"2"}]]}""" ::
+      """{"field4":[[null, [1,2,3]]]}""" :: Nil)
+
+  val jsonArray =
+    TestSQLContext.sparkContext.parallelize(
+      """[{"a":"str_a_1"}]""" ::
+      """[{"a":"str_a_2"}, {"b":"str_b_3"}]""" ::
+      """{"b":"str_b_4", "a":"str_a_4", "c":"str_c_4"}""" ::
+      """[]""" :: Nil)
+
+  val corruptRecords =
+    TestSQLContext.sparkContext.parallelize(
+      """{""" ::
+      """""" ::
+      """{"a":1, b:2}""" ::
+      """{"a":{, b:3}""" ::
+      """{"b":"str_b_4", "a":"str_a_4", "c":"str_c_4"}""" ::
+      """]""" :: Nil)
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
index dbf315947ff47..80a3e0b4c91ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
@@ -17,26 +17,19 @@
 
 package org.apache.spark.sql.parquet
 
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.mapreduce.Job
 import org.scalatest.{BeforeAndAfterAll, FunSuiteLike}
-
 import parquet.hadoop.ParquetFileWriter
 import parquet.hadoop.util.ContextUtil
-import parquet.schema.MessageTypeParser
-
-import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.mapreduce.Job
-import org.apache.spark.SparkContext
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{SqlLexical, SqlParser}
-import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types.{BooleanType, IntegerType}
+import org.apache.spark.sql.catalyst.types.IntegerType
 import org.apache.spark.sql.catalyst.util.getTempFilePath
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext._
 import org.apache.spark.util.Utils
 
-
 case class TestRDDEntry(key: Int, value: String)
 
 case class NullReflectData(
@@ -77,19 +70,21 @@ case class AllDataTypesWithNonPrimitiveType(
     byteField: Byte,
     booleanField: Boolean,
     array: Seq[Int],
-    map: Map[Int, String],
-    nested: Nested)
+    arrayContainsNull: Seq[Option[Int]],
+    map: Map[Int, Long],
+    mapValueContainsNull: Map[Int, Option[Long]],
+    data: Data)
+
+case class BinaryData(binaryData: Array[Byte])
+
+case class NumericData(i: Int, d: Double)
 
 class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterAll {
   TestData // Load test data tables.
 
   var testRDD: SchemaRDD = null
 
-  // TODO: remove this once SqlParser can parse nested select statements
-  var nestedParserSqlContext: NestedParserSQLContext = null
-
   override def beforeAll() {
-    nestedParserSqlContext = new NestedParserSQLContext(TestSQLContext.sparkContext)
     ParquetTestData.writeFile()
     ParquetTestData.writeFilterFile()
     ParquetTestData.writeNestedFile1()
@@ -97,9 +92,11 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
     ParquetTestData.writeNestedFile3()
     ParquetTestData.writeNestedFile4()
     testRDD = parquetFile(ParquetTestData.testDir.toString)
-    testRDD.registerAsTable("testsource")
+    testRDD.registerTempTable("testsource")
     parquetFile(ParquetTestData.testFilterDir.toString)
-      .registerAsTable("testfiltersource")
+      .registerTempTable("testfiltersource")
+
+    setConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED, "true")
   }
 
   override def afterAll() {
@@ -115,46 +112,165 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
   test("Read/Write All Types") {
     val tempDir = getTempFilePath("parquetTest").getCanonicalPath
     val range = (0 to 255)
-    TestSQLContext.sparkContext.parallelize(range)
+    val data = sparkContext.parallelize(range)
       .map(x => AllDataTypes(s"$x", x, x.toLong, x.toFloat, x.toDouble, x.toShort, x.toByte, x % 2 == 0))
-      .saveAsParquetFile(tempDir)
-    val result = parquetFile(tempDir).collect()
-    range.foreach {
-      i =>
-        assert(result(i).getString(0) == s"$i", s"row $i String field did not match, got ${result(i).getString(0)}")
-        assert(result(i).getInt(1) === i)
-        assert(result(i).getLong(2) === i.toLong)
-        assert(result(i).getFloat(3) === i.toFloat)
-        assert(result(i).getDouble(4) === i.toDouble)
-        assert(result(i).getShort(5) === i.toShort)
-        assert(result(i).getByte(6) === i.toByte)
-        assert(result(i).getBoolean(7) === (i % 2 == 0))
-    }
+
+    data.saveAsParquetFile(tempDir)
+
+    checkAnswer(
+      parquetFile(tempDir),
+      data.toSchemaRDD.collect().toSeq)
+  }
+
+  test("read/write binary data") {
+    // Since equality for Array[Byte] is broken we test this separately.
+    val tempDir = getTempFilePath("parquetTest").getCanonicalPath
+    sparkContext.parallelize(BinaryData("test".getBytes("utf8")) :: Nil).saveAsParquetFile(tempDir)
+    parquetFile(tempDir)
+      .map(r => new String(r(0).asInstanceOf[Array[Byte]], "utf8"))
+      .collect().toSeq == Seq("test")
+  }
+
+  ignore("Treat binary as string") {
+    val oldIsParquetBinaryAsString = TestSQLContext.isParquetBinaryAsString
+
+    // Create the test file.
+    val file = getTempFilePath("parquet")
+    val path = file.toString
+    val range = (0 to 255)
+    val rowRDD = TestSQLContext.sparkContext.parallelize(range)
+      .map(i => org.apache.spark.sql.Row(i, s"val_$i".getBytes))
+    // We need to ask Parquet to store the String column as a Binary column.
+    val schema = StructType(
+      StructField("c1", IntegerType, false) ::
+      StructField("c2", BinaryType, false) :: Nil)
+    val schemaRDD1 = applySchema(rowRDD, schema)
+    schemaRDD1.saveAsParquetFile(path)
+    checkAnswer(
+      parquetFile(path).select('c1, 'c2.cast(StringType)),
+      schemaRDD1.select('c1, 'c2.cast(StringType)).collect().toSeq)
+
+    setConf(SQLConf.PARQUET_BINARY_AS_STRING, "true")
+    parquetFile(path).printSchema()
+    checkAnswer(
+      parquetFile(path),
+      schemaRDD1.select('c1, 'c2.cast(StringType)).collect().toSeq)
+
+
+    // Set it back.
+    TestSQLContext.setConf(SQLConf.PARQUET_BINARY_AS_STRING, oldIsParquetBinaryAsString.toString)
+  }
+
+  test("Compression options for writing to a Parquetfile") {
+    val defaultParquetCompressionCodec = TestSQLContext.parquetCompressionCodec
+    import scala.collection.JavaConversions._
+
+    val file = getTempFilePath("parquet")
+    val path = file.toString
+    val rdd = TestSQLContext.sparkContext.parallelize((1 to 100))
+      .map(i => TestRDDEntry(i, s"val_$i"))
+
+    // test default compression codec
+    rdd.saveAsParquetFile(path)
+    var actualCodec = ParquetTypesConverter.readMetaData(new Path(path), Some(TestSQLContext.sparkContext.hadoopConfiguration))
+      .getBlocks.flatMap(block => block.getColumns).map(column => column.getCodec.name()).distinct
+    assert(actualCodec === TestSQLContext.parquetCompressionCodec.toUpperCase :: Nil)
+
+    parquetFile(path).registerTempTable("tmp")
+    checkAnswer(
+      sql("SELECT key, value FROM tmp WHERE value = 'val_5' OR value = 'val_7'"),
+      (5, "val_5") ::
+      (7, "val_7") :: Nil)
+
+    Utils.deleteRecursively(file)
+
+    // test uncompressed parquet file with property value "UNCOMPRESSED"
+    TestSQLContext.setConf(SQLConf.PARQUET_COMPRESSION, "UNCOMPRESSED")
+
+    rdd.saveAsParquetFile(path)
+    actualCodec = ParquetTypesConverter.readMetaData(new Path(path), Some(TestSQLContext.sparkContext.hadoopConfiguration))
+      .getBlocks.flatMap(block => block.getColumns).map(column => column.getCodec.name()).distinct
+    assert(actualCodec === TestSQLContext.parquetCompressionCodec.toUpperCase :: Nil)
+
+    parquetFile(path).registerTempTable("tmp")
+    checkAnswer(
+      sql("SELECT key, value FROM tmp WHERE value = 'val_5' OR value = 'val_7'"),
+      (5, "val_5") ::
+      (7, "val_7") :: Nil)
+
+    Utils.deleteRecursively(file)
+
+    // test uncompressed parquet file with property value "none"
+    TestSQLContext.setConf(SQLConf.PARQUET_COMPRESSION, "none")
+
+    rdd.saveAsParquetFile(path)
+    actualCodec = ParquetTypesConverter.readMetaData(new Path(path), Some(TestSQLContext.sparkContext.hadoopConfiguration))
+      .getBlocks.flatMap(block => block.getColumns).map(column => column.getCodec.name()).distinct
+    assert(actualCodec === "UNCOMPRESSED" :: Nil)
+
+    parquetFile(path).registerTempTable("tmp")
+    checkAnswer(
+      sql("SELECT key, value FROM tmp WHERE value = 'val_5' OR value = 'val_7'"),
+      (5, "val_5") ::
+      (7, "val_7") :: Nil)
+
+    Utils.deleteRecursively(file)
+
+    // test gzip compression codec
+    TestSQLContext.setConf(SQLConf.PARQUET_COMPRESSION, "gzip")
+
+    rdd.saveAsParquetFile(path)
+    actualCodec = ParquetTypesConverter.readMetaData(new Path(path), Some(TestSQLContext.sparkContext.hadoopConfiguration))
+      .getBlocks.flatMap(block => block.getColumns).map(column => column.getCodec.name()).distinct
+    assert(actualCodec === TestSQLContext.parquetCompressionCodec.toUpperCase :: Nil)
+
+    parquetFile(path).registerTempTable("tmp")
+    checkAnswer(
+      sql("SELECT key, value FROM tmp WHERE value = 'val_5' OR value = 'val_7'"),
+      (5, "val_5") ::
+      (7, "val_7") :: Nil)
+
+    Utils.deleteRecursively(file)
+
+    // test snappy compression codec
+    TestSQLContext.setConf(SQLConf.PARQUET_COMPRESSION, "snappy")
+
+    rdd.saveAsParquetFile(path)
+    actualCodec = ParquetTypesConverter.readMetaData(new Path(path), Some(TestSQLContext.sparkContext.hadoopConfiguration))
+      .getBlocks.flatMap(block => block.getColumns).map(column => column.getCodec.name()).distinct
+    assert(actualCodec === TestSQLContext.parquetCompressionCodec.toUpperCase :: Nil)
+
+    parquetFile(path).registerTempTable("tmp")
+    checkAnswer(
+      sql("SELECT key, value FROM tmp WHERE value = 'val_5' OR value = 'val_7'"),
+      (5, "val_5") ::
+      (7, "val_7") :: Nil)
+
+    Utils.deleteRecursively(file)
+
+    // TODO: Lzo requires additional external setup steps so leave it out for now
+    // ref.: https://github.com/Parquet/parquet-mr/blob/parquet-1.5.0/parquet-hadoop/src/test/java/parquet/hadoop/example/TestInputOutputFormat.java#L169
+
+    // Set it back.
+    TestSQLContext.setConf(SQLConf.PARQUET_COMPRESSION, defaultParquetCompressionCodec)
   }
 
   test("Read/Write All Types with non-primitive type") {
     val tempDir = getTempFilePath("parquetTest").getCanonicalPath
     val range = (0 to 255)
-    TestSQLContext.sparkContext.parallelize(range)
+    val data = sparkContext.parallelize(range)
       .map(x => AllDataTypesWithNonPrimitiveType(
         s"$x", x, x.toLong, x.toFloat, x.toDouble, x.toShort, x.toByte, x % 2 == 0,
-        Seq(x), Map(x -> s"$x"), Nested(x, s"$x")))
-      .saveAsParquetFile(tempDir)
-    val result = parquetFile(tempDir).collect()
-    range.foreach {
-      i =>
-        assert(result(i).getString(0) == s"$i", s"row $i String field did not match, got ${result(i).getString(0)}")
-        assert(result(i).getInt(1) === i)
-        assert(result(i).getLong(2) === i.toLong)
-        assert(result(i).getFloat(3) === i.toFloat)
-        assert(result(i).getDouble(4) === i.toDouble)
-        assert(result(i).getShort(5) === i.toShort)
-        assert(result(i).getByte(6) === i.toByte)
-        assert(result(i).getBoolean(7) === (i % 2 == 0))
-        assert(result(i)(8) === Seq(i))
-        assert(result(i)(9) === Map(i -> s"$i"))
-        assert(result(i)(10) === new GenericRow(Array[Any](i, s"$i")))
-    }
+        (0 until x),
+        (0 until x).map(Option(_).filter(_ % 3 == 0)),
+        (0 until x).map(i => i -> i.toLong).toMap,
+        (0 until x).map(i => i -> Option(i.toLong)).toMap + (x -> None),
+        Data((0 until x), Nested(x, s"$x"))))
+    data.saveAsParquetFile(tempDir)
+
+    checkAnswer(
+      parquetFile(tempDir),
+      data.toSchemaRDD.collect().toSeq)
   }
 
   test("self-join parquet files") {
@@ -201,18 +317,7 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
   }
 
   test("Projection of simple Parquet file") {
-    val scanner = new ParquetTableScan(
-      ParquetTestData.testData.output,
-      ParquetTestData.testData,
-      Seq())(TestSQLContext)
-    val projected = scanner.pruneColumns(ParquetTypesConverter
-      .convertToAttributes(MessageTypeParser
-      .parseMessageType(ParquetTestData.subTestSchema)))
-    assert(projected.output.size === 2)
-    val result = projected
-      .execute()
-      .map(_.copy())
-      .collect()
+    val result = ParquetTestData.testData.select('myboolean, 'mylong).collect()
     result.zipWithIndex.foreach {
       case (row, index) => {
           if (index % 3 == 0)
@@ -250,7 +355,7 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
   test("Creating case class RDD table") {
     TestSQLContext.sparkContext.parallelize((1 to 100))
       .map(i => TestRDDEntry(i, s"val_$i"))
-      .registerAsTable("tmp")
+      .registerTempTable("tmp")
     val rdd = sql("SELECT * FROM tmp").collect().sortBy(_.getInt(0))
     var counter = 1
     rdd.foreach {
@@ -262,19 +367,24 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
     }
   }
 
-  test("Saving case class RDD table to file and reading it back in") {
+  test("Read a parquet file instead of a directory") {
     val file = getTempFilePath("parquet")
     val path = file.toString
+    val fsPath = new Path(path)
+    val fs: FileSystem = fsPath.getFileSystem(TestSQLContext.sparkContext.hadoopConfiguration)
     val rdd = TestSQLContext.sparkContext.parallelize((1 to 100))
       .map(i => TestRDDEntry(i, s"val_$i"))
-    rdd.saveAsParquetFile(path)
-    val readFile = parquetFile(path)
-    readFile.registerAsTable("tmpx")
+    rdd.coalesce(1).saveAsParquetFile(path)
+
+    val children = fs.listStatus(fsPath).filter(_.getPath.getName.endsWith(".parquet"))
+    assert(children.length > 0)
+    val readFile = parquetFile(path + "/" + children(0).getPath.getName)
+    readFile.registerTempTable("tmpx")
     val rdd_copy = sql("SELECT * FROM tmpx").collect()
     val rdd_orig = rdd.collect()
     for(i <- 0 to 99) {
-      assert(rdd_copy(i).apply(0) === rdd_orig(i).key,  s"key error in line $i")
-      assert(rdd_copy(i).apply(1) === rdd_orig(i).value, s"value in line $i")
+      assert(rdd_copy(i).apply(0) === rdd_orig(i).key,   s"key error in line $i")
+      assert(rdd_copy(i).apply(1) === rdd_orig(i).value, s"value error in line $i")
     }
     Utils.deleteRecursively(file)
   }
@@ -283,38 +393,25 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
     val dirname = Utils.createTempDir()
     val source_rdd = TestSQLContext.sparkContext.parallelize((1 to 100))
       .map(i => TestRDDEntry(i, s"val_$i"))
-    source_rdd.registerAsTable("source")
+    source_rdd.registerTempTable("source")
     val dest_rdd = createParquetFile[TestRDDEntry](dirname.toString)
-    dest_rdd.registerAsTable("dest")
+    dest_rdd.registerTempTable("dest")
     sql("INSERT OVERWRITE INTO dest SELECT * FROM source").collect()
     val rdd_copy1 = sql("SELECT * FROM dest").collect()
     assert(rdd_copy1.size === 100)
-    assert(rdd_copy1(0).apply(0) === 1)
-    assert(rdd_copy1(0).apply(1) === "val_1")
-    // TODO: why does collecting break things? It seems InsertIntoParquet::execute() is
-    // executed twice otherwise?!
+
     sql("INSERT INTO dest SELECT * FROM source")
-    val rdd_copy2 = sql("SELECT * FROM dest").collect()
+    val rdd_copy2 = sql("SELECT * FROM dest").collect().sortBy(_.getInt(0))
     assert(rdd_copy2.size === 200)
-    assert(rdd_copy2(0).apply(0) === 1)
-    assert(rdd_copy2(0).apply(1) === "val_1")
-    assert(rdd_copy2(99).apply(0) === 100)
-    assert(rdd_copy2(99).apply(1) === "val_100")
-    assert(rdd_copy2(100).apply(0) === 1)
-    assert(rdd_copy2(100).apply(1) === "val_1")
     Utils.deleteRecursively(dirname)
   }
 
   test("Insert (appending) to same table via Scala API") {
-    // TODO: why does collecting break things? It seems InsertIntoParquet::execute() is
-    // executed twice otherwise?!
     sql("INSERT INTO testsource SELECT * FROM testsource")
     val double_rdd = sql("SELECT * FROM testsource").collect()
     assert(double_rdd != null)
     assert(double_rdd.size === 30)
-    for(i <- (0 to 14)) {
-      assert(double_rdd(i) === double_rdd(i+15), s"error: lines $i and ${i+15} to not match")
-    }
+
     // let's restore the original test data
     Utils.deleteRecursively(ParquetTestData.testDir)
     ParquetTestData.writeFile()
@@ -384,11 +481,14 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
     val predicate5 = new GreaterThan(attribute1, attribute2)
     val badfilter = ParquetFilters.createFilter(predicate5)
     assert(badfilter.isDefined === false)
+
+    val predicate6 = And(GreaterThan(attribute1, attribute2), GreaterThan(attribute1, attribute2))
+    val badfilter2 = ParquetFilters.createFilter(predicate6)
+    assert(badfilter2.isDefined === false)
   }
 
   test("test filter by predicate pushdown") {
     for(myval <- Seq("myint", "mylong", "mydouble", "myfloat")) {
-      println(s"testing field $myval")
       val query1 = sql(s"SELECT * FROM testfiltersource WHERE $myval < 150 AND $myval >= 100")
       assert(
         query1.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
@@ -464,6 +564,103 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
     assert(stringResult.size === 1)
     assert(stringResult(0).getString(2) == "100", "stringvalue incorrect")
     assert(stringResult(0).getInt(1) === 100)
+
+    val query7 = sql(s"SELECT * FROM testfiltersource WHERE myoptint < 40")
+    assert(
+      query7.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
+      "Top operator should be ParquetTableScan after pushdown")
+    val optResult = query7.collect()
+    assert(optResult.size === 20)
+    for(i <- 0 until 20) {
+      if (optResult(i)(7) != i * 2) {
+        fail(s"optional Int value in result row $i should be ${2*4*i}")
+      }
+    }
+    for(myval <- Seq("myoptint", "myoptlong", "myoptdouble", "myoptfloat")) {
+      val query8 = sql(s"SELECT * FROM testfiltersource WHERE $myval < 150 AND $myval >= 100")
+      assert(
+        query8.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
+        "Top operator should be ParquetTableScan after pushdown")
+      val result8 = query8.collect()
+      assert(result8.size === 25)
+      assert(result8(0)(7) === 100)
+      assert(result8(24)(7) === 148)
+      val query9 = sql(s"SELECT * FROM testfiltersource WHERE $myval > 150 AND $myval <= 200")
+      assert(
+        query9.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
+        "Top operator should be ParquetTableScan after pushdown")
+      val result9 = query9.collect()
+      assert(result9.size === 25)
+      if (myval == "myoptint" || myval == "myoptlong") {
+        assert(result9(0)(7) === 152)
+        assert(result9(24)(7) === 200)
+      } else {
+        assert(result9(0)(7) === 150)
+        assert(result9(24)(7) === 198)
+      }
+    }
+    val query10 = sql("SELECT * FROM testfiltersource WHERE myoptstring = \"100\"")
+    assert(
+      query10.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
+      "Top operator should be ParquetTableScan after pushdown")
+    val result10 = query10.collect()
+    assert(result10.size === 1)
+    assert(result10(0).getString(8) == "100", "stringvalue incorrect")
+    assert(result10(0).getInt(7) === 100)
+    val query11 = sql(s"SELECT * FROM testfiltersource WHERE myoptboolean = true AND myoptint < 40")
+    assert(
+      query11.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
+      "Top operator should be ParquetTableScan after pushdown")
+    val result11 = query11.collect()
+    assert(result11.size === 7)
+    for(i <- 0 until 6) {
+      if (!result11(i).getBoolean(6)) {
+        fail(s"optional Boolean value in result row $i not true")
+      }
+      if (result11(i).getInt(7) != i * 6) {
+        fail(s"optional Int value in result row $i should be ${6*i}")
+      }
+    }
+
+    val query12 = sql("SELECT * FROM testfiltersource WHERE mystring >= \"50\"")
+    assert(
+      query12.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
+      "Top operator should be ParquetTableScan after pushdown")
+    val result12 = query12.collect()
+    assert(result12.size === 54)
+    assert(result12(0).getString(2) == "6")
+    assert(result12(4).getString(2) == "50")
+    assert(result12(53).getString(2) == "99")
+
+    val query13 = sql("SELECT * FROM testfiltersource WHERE mystring > \"50\"")
+    assert(
+      query13.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
+      "Top operator should be ParquetTableScan after pushdown")
+    val result13 = query13.collect()
+    assert(result13.size === 53)
+    assert(result13(0).getString(2) == "6")
+    assert(result13(4).getString(2) == "51")
+    assert(result13(52).getString(2) == "99")
+
+    val query14 = sql("SELECT * FROM testfiltersource WHERE mystring <= \"50\"")
+    assert(
+      query14.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
+      "Top operator should be ParquetTableScan after pushdown")
+    val result14 = query14.collect()
+    assert(result14.size === 148)
+    assert(result14(0).getString(2) == "0")
+    assert(result14(46).getString(2) == "50")
+    assert(result14(147).getString(2) == "200")
+
+    val query15 = sql("SELECT * FROM testfiltersource WHERE mystring < \"50\"")
+    assert(
+      query15.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
+      "Top operator should be ParquetTableScan after pushdown")
+    val result15 = query15.collect()
+    assert(result15.size === 147)
+    assert(result15(0).getString(2) == "0")
+    assert(result15(46).getString(2) == "100")
+    assert(result15(146).getString(2) == "200")
   }
 
   test("SPARK-1913 regression: columns only referenced by pushed down filters should remain") {
@@ -547,11 +744,9 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
   }
 
   test("Projection in addressbook") {
-    val data = nestedParserSqlContext
-      .parquetFile(ParquetTestData.testNestedDir1.toString)
-      .toSchemaRDD
-    data.registerAsTable("data")
-    val query = nestedParserSqlContext.sql("SELECT owner, contacts[1].name FROM data")
+    val data = parquetFile(ParquetTestData.testNestedDir1.toString).toSchemaRDD
+    data.registerTempTable("data")
+    val query = sql("SELECT owner, contacts[1].name FROM data")
     val tmp = query.collect()
     assert(tmp.size === 2)
     assert(tmp(0).size === 2)
@@ -562,21 +757,19 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
   }
 
   test("Simple query on nested int data") {
-    val data = nestedParserSqlContext
-      .parquetFile(ParquetTestData.testNestedDir2.toString)
-      .toSchemaRDD
-    data.registerAsTable("data")
-    val result1 = nestedParserSqlContext.sql("SELECT entries[0].value FROM data").collect()
+    val data = parquetFile(ParquetTestData.testNestedDir2.toString).toSchemaRDD
+    data.registerTempTable("data")
+    val result1 = sql("SELECT entries[0].value FROM data").collect()
     assert(result1.size === 1)
     assert(result1(0).size === 1)
     assert(result1(0)(0) === 2.5)
-    val result2 = nestedParserSqlContext.sql("SELECT entries[0] FROM data").collect()
+    val result2 = sql("SELECT entries[0] FROM data").collect()
     assert(result2.size === 1)
     val subresult1 = result2(0)(0).asInstanceOf[CatalystConverter.StructScalaType[_]]
     assert(subresult1.size === 2)
     assert(subresult1(0) === 2.5)
     assert(subresult1(1) === false)
-    val result3 = nestedParserSqlContext.sql("SELECT outerouter FROM data").collect()
+    val result3 = sql("SELECT outerouter FROM data").collect()
     val subresult2 = result3(0)(0)
       .asInstanceOf[CatalystConverter.ArrayScalaType[_]](0)
       .asInstanceOf[CatalystConverter.ArrayScalaType[_]]
@@ -589,19 +782,18 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
   }
 
   test("nested structs") {
-    val data = nestedParserSqlContext
-      .parquetFile(ParquetTestData.testNestedDir3.toString)
+    val data = parquetFile(ParquetTestData.testNestedDir3.toString)
       .toSchemaRDD
-    data.registerAsTable("data")
-    val result1 = nestedParserSqlContext.sql("SELECT booleanNumberPairs[0].value[0].truth FROM data").collect()
+    data.registerTempTable("data")
+    val result1 = sql("SELECT booleanNumberPairs[0].value[0].truth FROM data").collect()
     assert(result1.size === 1)
     assert(result1(0).size === 1)
     assert(result1(0)(0) === false)
-    val result2 = nestedParserSqlContext.sql("SELECT booleanNumberPairs[0].value[1].truth FROM data").collect()
+    val result2 = sql("SELECT booleanNumberPairs[0].value[1].truth FROM data").collect()
     assert(result2.size === 1)
     assert(result2(0).size === 1)
     assert(result2(0)(0) === true)
-    val result3 = nestedParserSqlContext.sql("SELECT booleanNumberPairs[1].value[0].truth FROM data").collect()
+    val result3 = sql("SELECT booleanNumberPairs[1].value[0].truth FROM data").collect()
     assert(result3.size === 1)
     assert(result3(0).size === 1)
     assert(result3(0)(0) === false)
@@ -611,7 +803,7 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
     val data = TestSQLContext
       .parquetFile(ParquetTestData.testNestedDir4.toString)
       .toSchemaRDD
-    data.registerAsTable("mapTable")
+    data.registerTempTable("mapTable")
     val result1 = sql("SELECT data1 FROM mapTable").collect()
     assert(result1.size === 1)
     assert(result1(0)(0)
@@ -625,11 +817,9 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
   }
 
   test("map with struct values") {
-    val data = nestedParserSqlContext
-      .parquetFile(ParquetTestData.testNestedDir4.toString)
-      .toSchemaRDD
-    data.registerAsTable("mapTable")
-    val result1 = nestedParserSqlContext.sql("SELECT data2 FROM mapTable").collect()
+    val data = parquetFile(ParquetTestData.testNestedDir4.toString).toSchemaRDD
+    data.registerTempTable("mapTable")
+    val result1 = sql("SELECT data2 FROM mapTable").collect()
     assert(result1.size === 1)
     val entry1 = result1(0)(0)
       .asInstanceOf[CatalystConverter.MapScalaType[String, CatalystConverter.StructScalaType[_]]]
@@ -643,7 +833,7 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
     assert(entry2 != null)
     assert(entry2(0) === 49)
     assert(entry2(1) === null)
-    val result2 = nestedParserSqlContext.sql("""SELECT data2["seven"].payload1, data2["seven"].payload2 FROM mapTable""").collect()
+    val result2 = sql("""SELECT data2["seven"].payload1, data2["seven"].payload2 FROM mapTable""").collect()
     assert(result2.size === 1)
     assert(result2(0)(0) === 42.toLong)
     assert(result2(0)(1) === "the answer")
@@ -654,15 +844,12 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
     // has no effect in this test case
     val tmpdir = Utils.createTempDir()
     Utils.deleteRecursively(tmpdir)
-    val result = nestedParserSqlContext
-      .parquetFile(ParquetTestData.testNestedDir1.toString)
-      .toSchemaRDD
+    val result = parquetFile(ParquetTestData.testNestedDir1.toString).toSchemaRDD
     result.saveAsParquetFile(tmpdir.toString)
-    nestedParserSqlContext
-      .parquetFile(tmpdir.toString)
+    parquetFile(tmpdir.toString)
       .toSchemaRDD
-      .registerAsTable("tmpcopy")
-    val tmpdata = nestedParserSqlContext.sql("SELECT owner, contacts[1].name FROM tmpcopy").collect()
+      .registerTempTable("tmpcopy")
+    val tmpdata = sql("SELECT owner, contacts[1].name FROM tmpcopy").collect()
     assert(tmpdata.size === 2)
     assert(tmpdata(0).size === 2)
     assert(tmpdata(0)(0) === "Julien Le Dem")
@@ -673,20 +860,17 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
   }
 
   test("Writing out Map and reading it back in") {
-    val data = nestedParserSqlContext
-      .parquetFile(ParquetTestData.testNestedDir4.toString)
-      .toSchemaRDD
+    val data = parquetFile(ParquetTestData.testNestedDir4.toString).toSchemaRDD
     val tmpdir = Utils.createTempDir()
     Utils.deleteRecursively(tmpdir)
     data.saveAsParquetFile(tmpdir.toString)
-    nestedParserSqlContext
-      .parquetFile(tmpdir.toString)
+    parquetFile(tmpdir.toString)
       .toSchemaRDD
-      .registerAsTable("tmpmapcopy")
-    val result1 = nestedParserSqlContext.sql("""SELECT data1["key2"] FROM tmpmapcopy""").collect()
+      .registerTempTable("tmpmapcopy")
+    val result1 = sql("""SELECT data1["key2"] FROM tmpmapcopy""").collect()
     assert(result1.size === 1)
     assert(result1(0)(0) === 2)
-    val result2 = nestedParserSqlContext.sql("SELECT data2 FROM tmpmapcopy").collect()
+    val result2 = sql("SELECT data2 FROM tmpmapcopy").collect()
     assert(result2.size === 1)
     val entry1 = result2(0)(0)
       .asInstanceOf[CatalystConverter.MapScalaType[String, CatalystConverter.StructScalaType[_]]]
@@ -700,42 +884,64 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
     assert(entry2 != null)
     assert(entry2(0) === 49)
     assert(entry2(1) === null)
-    val result3 = nestedParserSqlContext.sql("""SELECT data2["seven"].payload1, data2["seven"].payload2 FROM tmpmapcopy""").collect()
+    val result3 = sql("""SELECT data2["seven"].payload1, data2["seven"].payload2 FROM tmpmapcopy""").collect()
     assert(result3.size === 1)
     assert(result3(0)(0) === 42.toLong)
     assert(result3(0)(1) === "the answer")
     Utils.deleteRecursively(tmpdir)
   }
-}
 
-// TODO: the code below is needed temporarily until the standard parser is able to parse
-// nested field expressions correctly
-class NestedParserSQLContext(@transient override val sparkContext: SparkContext) extends SQLContext(sparkContext) {
-  override protected[sql] val parser = new NestedSqlParser()
-}
+  test("Querying on empty parquet throws exception (SPARK-3536)") {
+    val tmpdir = Utils.createTempDir()
+    Utils.deleteRecursively(tmpdir)
+    createParquetFile[TestRDDEntry](tmpdir.toString()).registerTempTable("tmpemptytable")
+    val result1 = sql("SELECT * FROM tmpemptytable").collect()
+    assert(result1.size === 0)
+    Utils.deleteRecursively(tmpdir)
+  }
 
-class NestedSqlLexical(override val keywords: Seq[String]) extends SqlLexical(keywords) {
-  override def identChar = letter | elem('_')
-  delimiters += (".")
-}
+  test("DataType string parser compatibility") {
+    val schema = StructType(List(
+      StructField("c1", IntegerType, false),
+      StructField("c2", BinaryType, false)))
+
+    val fromCaseClassString = ParquetTypesConverter.convertFromString(schema.toString)
+    val fromJson = ParquetTypesConverter.convertFromString(schema.json)
+
+    (fromCaseClassString, fromJson).zipped.foreach { (a, b) =>
+      assert(a.name == b.name)
+      assert(a.dataType === b.dataType)
+    }
+  }
+
+  test("read/write fixed-length decimals") {
+    for ((precision, scale) <- Seq((5, 2), (1, 0), (1, 1), (18, 10), (18, 17))) {
+      val tempDir = getTempFilePath("parquetTest").getCanonicalPath
+      val data = sparkContext.parallelize(0 to 1000)
+        .map(i => NumericData(i, i / 100.0))
+        .select('i, 'd cast DecimalType(precision, scale))
+      data.saveAsParquetFile(tempDir)
+      checkAnswer(parquetFile(tempDir), data.toSchemaRDD.collect().toSeq)
+    }
 
-class NestedSqlParser extends SqlParser {
-  override val lexical = new NestedSqlLexical(reservedWords)
-
-  override protected lazy val baseExpression: PackratParser[Expression] =
-    expression ~ "[" ~ expression <~ "]" ^^ {
-      case base ~ _ ~ ordinal => GetItem(base, ordinal)
-    } |
-    expression ~ "." ~ ident ^^ {
-      case base ~ _ ~ fieldName => GetField(base, fieldName)
-    } |
-    TRUE ^^^ Literal(true, BooleanType) |
-    FALSE ^^^ Literal(false, BooleanType) |
-    cast |
-    "(" ~> expression <~ ")" |
-    function |
-    "-" ~> literal ^^ UnaryMinus |
-    ident ^^ UnresolvedAttribute |
-    "*" ^^^ Star(None) |
-    literal
+    // Decimals with precision above 18 are not yet supported
+    intercept[RuntimeException] {
+      val tempDir = getTempFilePath("parquetTest").getCanonicalPath
+      val data = sparkContext.parallelize(0 to 1000)
+        .map(i => NumericData(i, i / 100.0))
+        .select('i, 'd cast DecimalType(19, 10))
+      data.saveAsParquetFile(tempDir)
+      checkAnswer(parquetFile(tempDir), data.toSchemaRDD.collect().toSeq)
+    }
+
+    // Unlimited-length decimals are not yet supported
+    intercept[RuntimeException] {
+      val tempDir = getTempFilePath("parquetTest").getCanonicalPath
+      val data = sparkContext.parallelize(0 to 1000)
+        .map(i => NumericData(i, i / 100.0))
+        .select('i, 'd cast DecimalType.Unlimited)
+      data.saveAsParquetFile(tempDir)
+      checkAnswer(parquetFile(tempDir), data.toSchemaRDD.collect().toSeq)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
new file mode 100644
index 0000000000000..9626252e742e5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
@@ -0,0 +1,34 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.sources
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.analysis.Analyzer
+import org.apache.spark.sql.test.TestSQLContext
+import org.scalatest.BeforeAndAfter
+
+abstract class DataSourceTest extends QueryTest with BeforeAndAfter {
+  // Case sensitivity is not configurable yet, but we want to test some edge cases.
+  // TODO: Remove when it is configurable
+  implicit val caseInsensisitiveContext = new SQLContext(TestSQLContext.sparkContext) {
+    @transient
+    override protected[sql] lazy val analyzer: Analyzer =
+      new Analyzer(catalog, functionRegistry, caseSensitive = false)
+  }
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
new file mode 100644
index 0000000000000..939b3c0c66de7
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
@@ -0,0 +1,183 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.sources
+
+import scala.language.existentials
+
+import org.apache.spark.sql._
+
+class FilteredScanSource extends RelationProvider {
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    SimpleFilteredScan(parameters("from").toInt, parameters("to").toInt)(sqlContext)
+  }
+}
+
+case class SimpleFilteredScan(from: Int, to: Int)(@transient val sqlContext: SQLContext)
+  extends PrunedFilteredScan {
+
+  override def schema =
+    StructType(
+      StructField("a", IntegerType, nullable = false) ::
+      StructField("b", IntegerType, nullable = false) :: Nil)
+
+  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]) = {
+    val rowBuilders = requiredColumns.map {
+      case "a" => (i: Int) => Seq(i)
+      case "b" => (i: Int) => Seq(i * 2)
+    }
+
+    FiltersPushed.list = filters
+
+    val filterFunctions = filters.collect {
+      case EqualTo("a", v) => (a: Int) => a == v
+      case LessThan("a", v: Int) => (a: Int) => a < v
+      case LessThanOrEqual("a", v: Int) => (a: Int) => a <= v
+      case GreaterThan("a", v: Int) => (a: Int) => a > v
+      case GreaterThanOrEqual("a", v: Int) => (a: Int) => a >= v
+      case In("a", values) => (a: Int) => values.map(_.asInstanceOf[Int]).toSet.contains(a)
+    }
+
+    def eval(a: Int) = !filterFunctions.map(_(a)).contains(false)
+
+    sqlContext.sparkContext.parallelize(from to to).filter(eval).map(i =>
+      Row.fromSeq(rowBuilders.map(_(i)).reduceOption(_ ++ _).getOrElse(Seq.empty)))
+  }
+}
+
+// A hack for better error messages when filter pushdown fails.
+object FiltersPushed {
+  var list: Seq[Filter] = Nil
+}
+
+class FilteredScanSuite extends DataSourceTest {
+
+  import caseInsensisitiveContext._
+
+  before {
+    sql(
+      """
+        |CREATE TEMPORARY TABLE oneToTenFiltered
+        |USING org.apache.spark.sql.sources.FilteredScanSource
+        |OPTIONS (
+        |  from '1',
+        |  to '10'
+        |)
+      """.stripMargin)
+  }
+
+  sqlTest(
+    "SELECT * FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT a, b FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT b, a FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i * 2, i)).toSeq)
+
+  sqlTest(
+    "SELECT a FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i)).toSeq)
+
+  sqlTest(
+    "SELECT b FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT a * 2 FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT A AS b FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i)).toSeq)
+
+  sqlTest(
+    "SELECT x.b, y.a FROM oneToTenFiltered x JOIN oneToTenFiltered y ON x.a = y.b",
+    (1 to 5).map(i => Row(i * 4, i)).toSeq)
+
+  sqlTest(
+    "SELECT x.a, y.b FROM oneToTenFiltered x JOIN oneToTenFiltered y ON x.a = y.b",
+    (2 to 10 by 2).map(i => Row(i, i)).toSeq)
+
+  sqlTest(
+    "SELECT * FROM oneToTenFiltered WHERE a = 1",
+    Seq(1).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT * FROM oneToTenFiltered WHERE a IN (1,3,5)",
+    Seq(1,3,5).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT * FROM oneToTenFiltered WHERE A = 1",
+    Seq(1).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT * FROM oneToTenFiltered WHERE b = 2",
+    Seq(1).map(i => Row(i, i * 2)).toSeq)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE A = 1", 1)
+  testPushDown("SELECT a FROM oneToTenFiltered WHERE A = 1", 1)
+  testPushDown("SELECT b FROM oneToTenFiltered WHERE A = 1", 1)
+  testPushDown("SELECT a, b FROM oneToTenFiltered WHERE A = 1", 1)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a = 1", 1)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE 1 = a", 1)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a > 1", 9)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a >= 2", 9)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE 1 < a", 9)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE 2 <= a", 9)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE 1 > a", 0)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE 2 >= a", 2)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a < 1", 0)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a <= 2", 2)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a > 1 AND a < 10", 8)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a IN (1,3,5)", 3)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a = 20", 0)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE b = 1", 10)
+
+  def testPushDown(sqlString: String, expectedCount: Int): Unit = {
+    test(s"PushDown Returns $expectedCount: $sqlString") {
+      val queryExecution = sql(sqlString).queryExecution
+      val rawPlan = queryExecution.executedPlan.collect {
+        case p: execution.PhysicalRDD => p
+      } match {
+        case Seq(p) => p
+        case _ => fail(s"More than one PhysicalRDD found\n$queryExecution")
+      }
+      val rawCount = rawPlan.execute().count()
+
+      if (rawCount != expectedCount) {
+        fail(
+          s"Wrong # of results for pushed filter. Got $rawCount, Expected $expectedCount\n" +
+            s"Filters pushed: ${FiltersPushed.list.mkString(",")}\n" +
+            queryExecution)
+      }
+    }
+  }
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
new file mode 100644
index 0000000000000..fee2e22611cdc
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
@@ -0,0 +1,137 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.sources
+
+import org.apache.spark.sql._
+
+class PrunedScanSource extends RelationProvider {
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    SimplePrunedScan(parameters("from").toInt, parameters("to").toInt)(sqlContext)
+  }
+}
+
+case class SimplePrunedScan(from: Int, to: Int)(@transient val sqlContext: SQLContext)
+  extends PrunedScan {
+
+  override def schema =
+    StructType(
+      StructField("a", IntegerType, nullable = false) ::
+      StructField("b", IntegerType, nullable = false) :: Nil)
+
+  override def buildScan(requiredColumns: Array[String]) = {
+    val rowBuilders = requiredColumns.map {
+      case "a" => (i: Int) => Seq(i)
+      case "b" => (i: Int) => Seq(i * 2)
+    }
+
+    sqlContext.sparkContext.parallelize(from to to).map(i =>
+      Row.fromSeq(rowBuilders.map(_(i)).reduceOption(_ ++ _).getOrElse(Seq.empty)))
+  }
+}
+
+class PrunedScanSuite extends DataSourceTest {
+  import caseInsensisitiveContext._
+
+  before {
+    sql(
+      """
+        |CREATE TEMPORARY TABLE oneToTenPruned
+        |USING org.apache.spark.sql.sources.PrunedScanSource
+        |OPTIONS (
+        |  from '1',
+        |  to '10'
+        |)
+      """.stripMargin)
+  }
+
+  sqlTest(
+    "SELECT * FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT a, b FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT b, a FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i * 2, i)).toSeq)
+
+  sqlTest(
+    "SELECT a FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i)).toSeq)
+
+  sqlTest(
+    "SELECT a, a FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i, i)).toSeq)
+
+  sqlTest(
+    "SELECT b FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT a * 2 FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT A AS b FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i)).toSeq)
+
+  sqlTest(
+    "SELECT x.b, y.a FROM oneToTenPruned x JOIN oneToTenPruned y ON x.a = y.b",
+    (1 to 5).map(i => Row(i * 4, i)).toSeq)
+
+  sqlTest(
+    "SELECT x.a, y.b FROM oneToTenPruned x JOIN oneToTenPruned y ON x.a = y.b",
+    (2 to 10 by 2).map(i => Row(i, i)).toSeq)
+
+  testPruning("SELECT * FROM oneToTenPruned", "a", "b")
+  testPruning("SELECT a, b FROM oneToTenPruned", "a", "b")
+  testPruning("SELECT b, a FROM oneToTenPruned", "b", "a")
+  testPruning("SELECT b, b FROM oneToTenPruned", "b")
+  testPruning("SELECT a FROM oneToTenPruned", "a")
+  testPruning("SELECT b FROM oneToTenPruned", "b")
+
+  def testPruning(sqlString: String, expectedColumns: String*): Unit = {
+    test(s"Columns output ${expectedColumns.mkString(",")}: $sqlString") {
+      val queryExecution = sql(sqlString).queryExecution
+      val rawPlan = queryExecution.executedPlan.collect {
+        case p: execution.PhysicalRDD => p
+      } match {
+        case Seq(p) => p
+        case _ => fail(s"More than one PhysicalRDD found\n$queryExecution")
+      }
+      val rawColumns = rawPlan.output.map(_.name)
+      val rawOutput = rawPlan.execute().first()
+
+      if (rawColumns != expectedColumns) {
+        fail(
+          s"Wrong column names. Got $rawColumns, Expected $expectedColumns\n" +
+          s"Filters pushed: ${FiltersPushed.list.mkString(",")}\n" +
+            queryExecution)
+      }
+
+      if (rawOutput.size != expectedColumns.size) {
+        fail(s"Wrong output row. Got $rawOutput\n$queryExecution")
+      }
+    }
+  }
+
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
new file mode 100644
index 0000000000000..b254b0620c779
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -0,0 +1,125 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.sources
+
+import org.apache.spark.sql._
+
+class DefaultSource extends SimpleScanSource
+
+class SimpleScanSource extends RelationProvider {
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    SimpleScan(parameters("from").toInt, parameters("to").toInt)(sqlContext)
+  }
+}
+
+case class SimpleScan(from: Int, to: Int)(@transient val sqlContext: SQLContext)
+  extends TableScan {
+
+  override def schema =
+    StructType(StructField("i", IntegerType, nullable = false) :: Nil)
+
+  override def buildScan() = sqlContext.sparkContext.parallelize(from to to).map(Row(_))
+}
+
+class TableScanSuite extends DataSourceTest {
+  import caseInsensisitiveContext._
+
+  before {
+    sql(
+      """
+        |CREATE TEMPORARY TABLE oneToTen
+        |USING org.apache.spark.sql.sources.SimpleScanSource
+        |OPTIONS (
+        |  from '1',
+        |  to '10'
+        |)
+      """.stripMargin)
+  }
+
+  sqlTest(
+    "SELECT * FROM oneToTen",
+    (1 to 10).map(Row(_)).toSeq)
+
+  sqlTest(
+    "SELECT i FROM oneToTen",
+    (1 to 10).map(Row(_)).toSeq)
+
+  sqlTest(
+    "SELECT i FROM oneToTen WHERE i < 5",
+    (1 to 4).map(Row(_)).toSeq)
+
+  sqlTest(
+    "SELECT i * 2 FROM oneToTen",
+    (1 to 10).map(i => Row(i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT a.i, b.i FROM oneToTen a JOIN oneToTen b ON a.i = b.i + 1",
+    (2 to 10).map(i => Row(i, i - 1)).toSeq)
+
+
+  test("Caching")  {
+    // Cached Query Execution
+    cacheTable("oneToTen")
+    assertCached(sql("SELECT * FROM oneToTen"))
+    checkAnswer(
+      sql("SELECT * FROM oneToTen"),
+      (1 to 10).map(Row(_)).toSeq)
+
+    assertCached(sql("SELECT i FROM oneToTen"))
+    checkAnswer(
+      sql("SELECT i FROM oneToTen"),
+      (1 to 10).map(Row(_)).toSeq)
+
+    assertCached(sql("SELECT i FROM oneToTen WHERE i < 5"))
+    checkAnswer(
+      sql("SELECT i FROM oneToTen WHERE i < 5"),
+      (1 to 4).map(Row(_)).toSeq)
+
+    assertCached(sql("SELECT i * 2 FROM oneToTen"))
+    checkAnswer(
+      sql("SELECT i * 2 FROM oneToTen"),
+      (1 to 10).map(i => Row(i * 2)).toSeq)
+
+    assertCached(sql("SELECT a.i, b.i FROM oneToTen a JOIN oneToTen b ON a.i = b.i + 1"), 2)
+    checkAnswer(
+      sql("SELECT a.i, b.i FROM oneToTen a JOIN oneToTen b ON a.i = b.i + 1"),
+      (2 to 10).map(i => Row(i, i - 1)).toSeq)
+
+    // Verify uncaching
+    uncacheTable("oneToTen")
+    assertCached(sql("SELECT * FROM oneToTen"), 0)
+  }
+
+  test("defaultSource") {
+    sql(
+      """
+        |CREATE TEMPORARY TABLE oneToTenDef
+        |USING org.apache.spark.sql.sources
+        |OPTIONS (
+        |  from '1',
+        |  to '10'
+        |)
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT * FROM oneToTenDef"),
+      (1 to 10).map(Row(_)).toSeq)
+  }
+}
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
new file mode 100644
index 0000000000000..8db3010624100
--- /dev/null
+++ b/sql/hive-thriftserver/pom.xml
@@ -0,0 +1,100 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>1.2.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-hive-thriftserver_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Hive Thrift Server</name>
+  <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>hive-thriftserver</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-hive_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project.hive</groupId>
+      <artifactId>hive-cli</artifactId>
+      <version>${hive.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project.hive</groupId>
+      <artifactId>hive-jdbc</artifactId>
+      <version>${hive.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project.hive</groupId>
+      <artifactId>hive-beeline</artifactId>
+      <version>${hive.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-default-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>v${hive.version.short}/src/main/scala</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-deploy-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala
new file mode 100644
index 0000000000000..6ed8fd2768f95
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import scala.collection.JavaConversions._
+
+import org.apache.commons.lang.exception.ExceptionUtils
+import org.apache.hadoop.hive.metastore.api.{FieldSchema, Schema}
+import org.apache.hadoop.hive.ql.Driver
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes}
+
+private[hive] abstract class AbstractSparkSQLDriver(
+    val context: HiveContext = SparkSQLEnv.hiveContext) extends Driver with Logging {
+
+  private[hive] var tableSchema: Schema = _
+  private[hive] var hiveResponse: Seq[String] = _
+
+  override def init(): Unit = {
+  }
+
+  private def getResultSetSchema(query: context.QueryExecution): Schema = {
+    val analyzed = query.analyzed
+    logDebug(s"Result Schema: ${analyzed.output}")
+    if (analyzed.output.size == 0) {
+      new Schema(new FieldSchema("Response code", "string", "") :: Nil, null)
+    } else {
+      val fieldSchemas = analyzed.output.map { attr =>
+        new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "")
+      }
+
+      new Schema(fieldSchemas, null)
+    }
+  }
+
+  override def run(command: String): CommandProcessorResponse = {
+    // TODO unify the error code
+    try {
+      val execution = context.executePlan(context.sql(command).logicalPlan)
+      hiveResponse = execution.stringResult()
+      tableSchema = getResultSetSchema(execution)
+      new CommandProcessorResponse(0)
+    } catch {
+      case cause: Throwable =>
+        logError(s"Failed in [$command]", cause)
+        new CommandProcessorResponse(0, ExceptionUtils.getFullStackTrace(cause), null)
+    }
+  }
+
+  override def close(): Int = {
+    hiveResponse = null
+    tableSchema = null
+    0
+  }
+
+  override def getSchema: Schema = tableSchema
+
+  override def destroy() {
+    super.destroy()
+    hiveResponse = null
+    tableSchema = null
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
new file mode 100644
index 0000000000000..bd4e99492b395
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import org.apache.commons.logging.LogFactory
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hive.service.cli.thrift.ThriftBinaryCLIService
+import org.apache.hive.service.server.{HiveServer2, ServerOptionsProcessor}
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
+
+/**
+ * The main entry point for the Spark SQL port of HiveServer2.  Starts up a `SparkSQLContext` and a
+ * `HiveThriftServer2` thrift server.
+ */
+object HiveThriftServer2 extends Logging {
+  var LOG = LogFactory.getLog(classOf[HiveServer2])
+
+  /**
+   * :: DeveloperApi ::
+   * Starts a new thrift server with the given context.
+   */
+  @DeveloperApi
+  def startWithContext(sqlContext: HiveContext): Unit = {
+    val server = new HiveThriftServer2(sqlContext)
+    server.init(sqlContext.hiveconf)
+    server.start()
+  }
+
+
+  def main(args: Array[String]) {
+    val optionsProcessor = new ServerOptionsProcessor("HiveThriftServer2")
+    if (!optionsProcessor.process(args)) {
+      System.exit(-1)
+    }
+
+    logInfo("Starting SparkContext")
+    SparkSQLEnv.init()
+
+    Runtime.getRuntime.addShutdownHook(
+      new Thread() {
+        override def run() {
+          SparkSQLEnv.stop()
+        }
+      }
+    )
+
+    try {
+      val server = new HiveThriftServer2(SparkSQLEnv.hiveContext)
+      server.init(SparkSQLEnv.hiveContext.hiveconf)
+      server.start()
+      logInfo("HiveThriftServer2 started")
+    } catch {
+      case e: Exception =>
+        logError("Error starting HiveThriftServer2", e)
+        System.exit(-1)
+    }
+  }
+}
+
+private[hive] class HiveThriftServer2(hiveContext: HiveContext)
+  extends HiveServer2
+  with ReflectedCompositeService {
+
+  override def init(hiveConf: HiveConf) {
+    val sparkSqlCliService = new SparkSQLCLIService(hiveContext)
+    setSuperField(this, "cliService", sparkSqlCliService)
+    addService(sparkSqlCliService)
+
+    val thriftCliService = new ThriftBinaryCLIService(sparkSqlCliService)
+    setSuperField(this, "thriftCLIService", thriftCliService)
+    addService(thriftCliService)
+
+    initCompositeService(hiveConf)
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ReflectionUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ReflectionUtils.scala
new file mode 100644
index 0000000000000..599294dfbb7d7
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ReflectionUtils.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+private[hive] object ReflectionUtils {
+  def setSuperField(obj : Object, fieldName: String, fieldValue: Object) {
+    setAncestorField(obj, 1, fieldName, fieldValue)
+  }
+
+  def setAncestorField(obj: AnyRef, level: Int, fieldName: String, fieldValue: AnyRef) {
+    val ancestor = Iterator.iterate[Class[_]](obj.getClass)(_.getSuperclass).drop(level).next()
+    val field = ancestor.getDeclaredField(fieldName)
+    field.setAccessible(true)
+    field.set(obj, fieldValue)
+  }
+
+  def getSuperField[T](obj: AnyRef, fieldName: String): T = {
+    getAncestorField[T](obj, 1, fieldName)
+  }
+
+  def getAncestorField[T](clazz: Object, level: Int, fieldName: String): T = {
+    val ancestor = Iterator.iterate[Class[_]](clazz.getClass)(_.getSuperclass).drop(level).next()
+    val field = ancestor.getDeclaredField(fieldName)
+    field.setAccessible(true)
+    field.get(clazz).asInstanceOf[T]
+  }
+
+  def invokeStatic(clazz: Class[_], methodName: String, args: (Class[_], AnyRef)*): AnyRef = {
+    invoke(clazz, null, methodName, args: _*)
+  }
+
+  def invoke(
+      clazz: Class[_],
+      obj: AnyRef,
+      methodName: String,
+      args: (Class[_], AnyRef)*): AnyRef = {
+
+    val (types, values) = args.unzip
+    val method = clazz.getDeclaredMethod(methodName, types: _*)
+    method.setAccessible(true)
+    method.invoke(obj, values.toSeq: _*)
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
new file mode 100755
index 0000000000000..2cd02ae9269f5
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -0,0 +1,331 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import scala.collection.JavaConversions._
+
+import java.io._
+import java.util.{ArrayList => JArrayList}
+
+import jline.{ConsoleReader, History}
+import org.apache.commons.lang.StringUtils
+import org.apache.commons.logging.LogFactory
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hive.cli.{CliDriver, CliSessionState, OptionsProcessor}
+import org.apache.hadoop.hive.common.LogUtils.LogInitializationException
+import org.apache.hadoop.hive.common.{HiveInterruptCallback, HiveInterruptUtils, LogUtils}
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.Driver
+import org.apache.hadoop.hive.ql.exec.Utilities
+import org.apache.hadoop.hive.ql.processors.{SetProcessor, CommandProcessor, CommandProcessorFactory}
+import org.apache.hadoop.hive.ql.session.SessionState
+import org.apache.hadoop.hive.shims.ShimLoader
+import org.apache.thrift.transport.TSocket
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.hive.HiveShim
+import org.apache.spark.sql.hive.thriftserver.HiveThriftServerShim
+
+private[hive] object SparkSQLCLIDriver {
+  private var prompt = "spark-sql"
+  private var continuedPrompt = "".padTo(prompt.length, ' ')
+  private var transport:TSocket = _
+
+  installSignalHandler()
+
+  /**
+   * Install an interrupt callback to cancel all Spark jobs. In Hive's CliDriver#processLine(),
+   * a signal handler will invoke this registered callback if a Ctrl+C signal is detected while
+   * a command is being processed by the current thread.
+   */
+  def installSignalHandler() {
+    HiveInterruptUtils.add(new HiveInterruptCallback {
+      override def interrupt() {
+        // Handle remote execution mode
+        if (SparkSQLEnv.sparkContext != null) {
+          SparkSQLEnv.sparkContext.cancelAllJobs()
+        } else {
+          if (transport != null) {
+            // Force closing of TCP connection upon session termination
+            transport.getSocket.close()
+          }
+        }
+      }
+    })
+  }
+
+  def main(args: Array[String]) {
+    val oproc = new OptionsProcessor()
+    if (!oproc.process_stage1(args)) {
+      System.exit(1)
+    }
+
+    val sessionState = new CliSessionState(new HiveConf(classOf[SessionState]))
+
+    sessionState.in = System.in
+    try {
+      sessionState.out = new PrintStream(System.out, true, "UTF-8")
+      sessionState.info = new PrintStream(System.err, true, "UTF-8")
+      sessionState.err = new PrintStream(System.err, true, "UTF-8")
+    } catch {
+      case e: UnsupportedEncodingException => System.exit(3)
+    }
+
+    if (!oproc.process_stage2(sessionState)) {
+      System.exit(2)
+    }
+
+    // Set all properties specified via command line.
+    val conf: HiveConf = sessionState.getConf
+    sessionState.cmdProperties.entrySet().foreach { item: java.util.Map.Entry[Object, Object] =>
+      conf.set(item.getKey.asInstanceOf[String], item.getValue.asInstanceOf[String])
+      sessionState.getOverriddenConfigurations.put(
+        item.getKey.asInstanceOf[String], item.getValue.asInstanceOf[String])
+    }
+
+    SessionState.start(sessionState)
+
+    // Clean up after we exit
+    Runtime.getRuntime.addShutdownHook(
+      new Thread() {
+        override def run() {
+          SparkSQLEnv.stop()
+        }
+      }
+    )
+
+    // "-h" option has been passed, so connect to Hive thrift server.
+    if (sessionState.getHost != null) {
+      sessionState.connect()
+      if (sessionState.isRemoteMode) {
+        prompt = s"[${sessionState.getHost}:${sessionState.getPort}]" + prompt
+        continuedPrompt = "".padTo(prompt.length, ' ')
+      }
+    }
+
+    if (!sessionState.isRemoteMode) {
+      // Hadoop-20 and above - we need to augment classpath using hiveconf
+      // components.
+      // See also: code in ExecDriver.java
+      var loader = conf.getClassLoader
+      val auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS)
+      if (StringUtils.isNotBlank(auxJars)) {
+        loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","))
+      }
+      conf.setClassLoader(loader)
+      Thread.currentThread().setContextClassLoader(loader)
+    }
+
+    val cli = new SparkSQLCLIDriver
+    cli.setHiveVariables(oproc.getHiveVariables)
+
+    // TODO work around for set the log output to console, because the HiveContext
+    // will set the output into an invalid buffer.
+    sessionState.in = System.in
+    try {
+      sessionState.out = new PrintStream(System.out, true, "UTF-8")
+      sessionState.info = new PrintStream(System.err, true, "UTF-8")
+      sessionState.err = new PrintStream(System.err, true, "UTF-8")
+    } catch {
+      case e: UnsupportedEncodingException => System.exit(3)
+    }
+
+    // Execute -i init files (always in silent mode)
+    cli.processInitFiles(sessionState)
+
+    if (sessionState.execString != null) {
+      System.exit(cli.processLine(sessionState.execString))
+    }
+
+    try {
+      if (sessionState.fileName != null) {
+        System.exit(cli.processFile(sessionState.fileName))
+      }
+    } catch {
+      case e: FileNotFoundException =>
+        System.err.println(s"Could not open input file for reading. (${e.getMessage})")
+        System.exit(3)
+    }
+
+    val reader = new ConsoleReader()
+    reader.setBellEnabled(false)
+    // reader.setDebug(new PrintWriter(new FileWriter("writer.debug", true)))
+    CliDriver.getCommandCompletor.foreach((e) => reader.addCompletor(e))
+
+    val historyDirectory = System.getProperty("user.home")
+
+    try {
+      if (new File(historyDirectory).exists()) {
+        val historyFile = historyDirectory + File.separator + ".hivehistory"
+        reader.setHistory(new History(new File(historyFile)))
+      } else {
+        System.err.println("WARNING: Directory for Hive history file: " + historyDirectory +
+                           " does not exist.   History will not be available during this session.")
+      }
+    } catch {
+      case e: Exception =>
+        System.err.println("WARNING: Encountered an error while trying to initialize Hive's " +
+                           "history file.  History will not be available during this session.")
+        System.err.println(e.getMessage)
+    }
+
+    val clientTransportTSocketField = classOf[CliSessionState].getDeclaredField("transport")
+    clientTransportTSocketField.setAccessible(true)
+
+    transport = clientTransportTSocketField.get(sessionState).asInstanceOf[TSocket]
+
+    var ret = 0
+    var prefix = ""
+    val currentDB = ReflectionUtils.invokeStatic(classOf[CliDriver], "getFormattedDb",
+      classOf[HiveConf] -> conf, classOf[CliSessionState] -> sessionState)
+
+    def promptWithCurrentDB = s"$prompt$currentDB"
+    def continuedPromptWithDBSpaces = continuedPrompt + ReflectionUtils.invokeStatic(
+      classOf[CliDriver], "spacesForString", classOf[String] -> currentDB)
+
+    var currentPrompt = promptWithCurrentDB
+    var line = reader.readLine(currentPrompt + "> ")
+
+    while (line != null) {
+      if (prefix.nonEmpty) {
+        prefix += '\n'
+      }
+
+      if (line.trim().endsWith(";") && !line.trim().endsWith("\\;")) {
+        line = prefix + line
+        ret = cli.processLine(line, true)
+        prefix = ""
+        currentPrompt = promptWithCurrentDB
+      } else {
+        prefix = prefix + line
+        currentPrompt = continuedPromptWithDBSpaces
+      }
+
+      line = reader.readLine(currentPrompt + "> ")
+    }
+
+    sessionState.close()
+
+    System.exit(ret)
+  }
+}
+
+private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
+  private val sessionState = SessionState.get().asInstanceOf[CliSessionState]
+
+  private val LOG = LogFactory.getLog("CliDriver")
+
+  private val console = new SessionState.LogHelper(LOG)
+
+  private val conf: Configuration =
+    if (sessionState != null) sessionState.getConf else new Configuration()
+
+  // Force initializing SparkSQLEnv. This is put here but not object SparkSQLCliDriver
+  // because the Hive unit tests do not go through the main() code path.
+  if (!sessionState.isRemoteMode) {
+    SparkSQLEnv.init()
+  }
+
+  override def processCmd(cmd: String): Int = {
+    val cmd_trimmed: String = cmd.trim()
+    val tokens: Array[String] = cmd_trimmed.split("\\s+")
+    val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim()
+    if (cmd_trimmed.toLowerCase.equals("quit") ||
+      cmd_trimmed.toLowerCase.equals("exit") ||
+      tokens(0).equalsIgnoreCase("source") ||
+      cmd_trimmed.startsWith("!") ||
+      tokens(0).toLowerCase.equals("list") ||
+      sessionState.isRemoteMode) {
+      val start = System.currentTimeMillis()
+      super.processCmd(cmd)
+      val end = System.currentTimeMillis()
+      val timeTaken: Double = (end - start) / 1000.0
+      console.printInfo(s"Time taken: $timeTaken seconds")
+      0
+    } else {
+      var ret = 0
+      val hconf = conf.asInstanceOf[HiveConf]
+      val proc: CommandProcessor = HiveShim.getCommandProcessor(Array(tokens(0)), hconf)
+
+      if (proc != null) {
+        if (proc.isInstanceOf[Driver] || proc.isInstanceOf[SetProcessor]) {
+          val driver = new SparkSQLDriver
+
+          driver.init()
+          val out = sessionState.out
+          val start:Long = System.currentTimeMillis()
+          if (sessionState.getIsVerbose) {
+            out.println(cmd)
+          }
+
+          val rc = driver.run(cmd)
+          ret = rc.getResponseCode
+          if (ret != 0) {
+            console.printError(rc.getErrorMessage())
+            driver.close()
+            return ret
+          }
+
+          val res = new JArrayList[String]()
+
+          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CLI_PRINT_HEADER)) {
+            // Print the column names.
+            Option(driver.getSchema.getFieldSchemas).map { fields =>
+              out.println(fields.map(_.getName).mkString("\t"))
+            }
+          }
+
+          try {
+            while (!out.checkError() && driver.getResults(res)) {
+              res.foreach(out.println)
+              res.clear()
+            }
+          } catch {
+            case e:IOException =>
+              console.printError(
+                s"""Failed with exception ${e.getClass.getName}: ${e.getMessage}
+                   |${org.apache.hadoop.util.StringUtils.stringifyException(e)}
+                 """.stripMargin)
+              ret = 1
+          }
+
+          val cret = driver.close()
+          if (ret == 0) {
+            ret = cret
+          }
+
+          val end = System.currentTimeMillis()
+          if (end > start) {
+            val timeTaken:Double = (end - start) / 1000.0
+            console.printInfo(s"Time taken: $timeTaken seconds", null)
+          }
+
+          // Destroy the driver to release all the locks.
+          driver.destroy()
+        } else {
+          if (sessionState.getIsVerbose) {
+            sessionState.out.println(tokens(0) + " " + cmd_1)
+          }
+          ret = proc.run(cmd_1).getResponseCode
+        }
+      }
+      ret
+    }
+  }
+}
+
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
new file mode 100644
index 0000000000000..499e077d7294a
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.io.IOException
+import java.util.{List => JList}
+import javax.security.auth.login.LoginException
+
+import scala.collection.JavaConversions._
+
+import org.apache.commons.logging.Log
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.shims.ShimLoader
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hive.service.Service.STATE
+import org.apache.hive.service.auth.HiveAuthFactory
+import org.apache.hive.service.cli._
+import org.apache.hive.service.{AbstractService, Service, ServiceException}
+
+import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
+import org.apache.spark.util.Utils
+
+private[hive] class SparkSQLCLIService(hiveContext: HiveContext)
+  extends CLIService
+  with ReflectedCompositeService {
+
+  override def init(hiveConf: HiveConf) {
+    setSuperField(this, "hiveConf", hiveConf)
+
+    val sparkSqlSessionManager = new SparkSQLSessionManager(hiveContext)
+    setSuperField(this, "sessionManager", sparkSqlSessionManager)
+    addService(sparkSqlSessionManager)
+    var sparkServiceUGI: UserGroupInformation = null
+
+    if (ShimLoader.getHadoopShims.isSecurityEnabled) {
+      try {
+        HiveAuthFactory.loginFromKeytab(hiveConf)
+        sparkServiceUGI = ShimLoader.getHadoopShims.getUGIForConf(hiveConf)
+        HiveThriftServerShim.setServerUserName(sparkServiceUGI, this)
+      } catch {
+        case e @ (_: IOException | _: LoginException) =>
+          throw new ServiceException("Unable to login to kerberos with given principal/keytab", e)
+      }
+    }
+
+    initCompositeService(hiveConf)
+  }
+
+  override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = {
+    getInfoType match {
+      case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL")
+      case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL")
+      case GetInfoType.CLI_DBMS_VER => new GetInfoValue(hiveContext.sparkContext.version)
+      case _ => super.getInfo(sessionHandle, getInfoType)
+    }
+  }
+}
+
+private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
+  def initCompositeService(hiveConf: HiveConf) {
+    // Emulating `CompositeService.init(hiveConf)`
+    val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList")
+    serviceList.foreach(_.init(hiveConf))
+
+    // Emulating `AbstractService.init(hiveConf)`
+    invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED)
+    setAncestorField(this, 3, "hiveConf", hiveConf)
+    invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED)
+    getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.")
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
new file mode 100644
index 0000000000000..89732c939b0ec
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import scala.collection.JavaConversions._
+
+import org.apache.spark.scheduler.StatsReportListener
+import org.apache.spark.sql.hive.{HiveShim, HiveContext}
+import org.apache.spark.{Logging, SparkConf, SparkContext}
+
+/** A singleton object for the master program. The slaves should not access this. */
+private[hive] object SparkSQLEnv extends Logging {
+  logDebug("Initializing SparkSQLEnv")
+
+  var hiveContext: HiveContext = _
+  var sparkContext: SparkContext = _
+
+  def init() {
+    if (hiveContext == null) {
+      val sparkConf = new SparkConf()
+        .setAppName(s"SparkSQL::${java.net.InetAddress.getLocalHost.getHostName}")
+        .set("spark.sql.hive.version", HiveShim.version)
+      sparkContext = new SparkContext(sparkConf)
+
+      sparkContext.addSparkListener(new StatsReportListener())
+      hiveContext = new HiveContext(sparkContext)
+
+      if (log.isDebugEnabled) {
+        hiveContext.hiveconf.getAllProperties.toSeq.sorted.foreach { case (k, v) =>
+          logDebug(s"HiveConf var: $k=$v")
+        }
+      }
+    }
+  }
+
+  /** Cleans up and shuts down the Spark SQL environments. */
+  def stop() {
+    logDebug("Shutting down Spark SQL Environment")
+    // Stop the SparkContext
+    if (SparkSQLEnv.sparkContext != null) {
+      sparkContext.stop()
+      sparkContext = null
+      hiveContext = null
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
new file mode 100644
index 0000000000000..6b3275b4eaf04
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.util.concurrent.Executors
+
+import org.apache.commons.logging.Log
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.hive.service.cli.session.SessionManager
+
+import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
+import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager
+
+private[hive] class SparkSQLSessionManager(hiveContext: HiveContext)
+  extends SessionManager
+  with ReflectedCompositeService {
+
+  override def init(hiveConf: HiveConf) {
+    setSuperField(this, "hiveConf", hiveConf)
+
+    val backgroundPoolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS)
+    setSuperField(this, "backgroundOperationPool", Executors.newFixedThreadPool(backgroundPoolSize))
+    getAncestorField[Log](this, 3, "LOG").info(
+      s"HiveServer2: Async execution pool size $backgroundPoolSize")
+
+    val sparkSqlOperationManager = new SparkSQLOperationManager(hiveContext)
+    setSuperField(this, "operationManager", sparkSqlOperationManager)
+    addService(sparkSqlOperationManager)
+
+    initCompositeService(hiveConf)
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
new file mode 100644
index 0000000000000..99c4f46a82b8e
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver.server
+
+import java.util.{Map => JMap}
+import scala.collection.mutable.Map
+
+import org.apache.hive.service.cli._
+import org.apache.hive.service.cli.operation.{ExecuteStatementOperation, Operation, OperationManager}
+import org.apache.hive.service.cli.session.HiveSession
+import org.apache.spark.Logging
+import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.hive.thriftserver.{SparkExecuteStatementOperation, ReflectionUtils}
+
+/**
+ * Executes queries using Spark SQL, and maintains a list of handles to active queries.
+ */
+private[thriftserver] class SparkSQLOperationManager(hiveContext: HiveContext)
+  extends OperationManager with Logging {
+
+  val handleToOperation = ReflectionUtils
+    .getSuperField[JMap[OperationHandle, Operation]](this, "handleToOperation")
+
+  // TODO: Currenlty this will grow infinitely, even as sessions expire
+  val sessionToActivePool = Map[HiveSession, String]()
+
+  override def newExecuteStatementOperation(
+      parentSession: HiveSession,
+      statement: String,
+      confOverlay: JMap[String, String],
+      async: Boolean): ExecuteStatementOperation = synchronized {
+
+    val operation = new SparkExecuteStatementOperation(parentSession, statement, confOverlay)(
+      hiveContext, sessionToActivePool)
+    handleToOperation.put(operation.getHandle, operation)
+    operation
+  }
+}
diff --git a/sql/hive-thriftserver/src/test/resources/data/files/small_kv.txt b/sql/hive-thriftserver/src/test/resources/data/files/small_kv.txt
new file mode 100644
index 0000000000000..850f8014b6f05
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/resources/data/files/small_kv.txt
@@ -0,0 +1,5 @@
+238val_238
+86val_86
+311val_311
+27val_27
+165val_165
diff --git a/sql/hive-thriftserver/src/test/resources/data/files/small_kv_with_null.txt b/sql/hive-thriftserver/src/test/resources/data/files/small_kv_with_null.txt
new file mode 100644
index 0000000000000..ae08c640e6c13
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/resources/data/files/small_kv_with_null.txt
@@ -0,0 +1,10 @@
+238val_238
+
+311val_311
+val_27
+val_165
+val_409
+255val_255
+278val_278
+98val_98
+val_484
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
new file mode 100644
index 0000000000000..e8ffbc5b954d4
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.io._
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
+import scala.concurrent.{Await, Promise}
+import scala.sys.process.{Process, ProcessLogger}
+
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.util.getTempFilePath
+
+class CliSuite extends FunSuite with BeforeAndAfterAll with Logging {
+  def runCliWithin(
+      timeout: FiniteDuration,
+      extraArgs: Seq[String] = Seq.empty)(
+      queriesAndExpectedAnswers: (String, String)*) {
+
+    val (queries, expectedAnswers) = queriesAndExpectedAnswers.unzip
+    val warehousePath = getTempFilePath("warehouse")
+    val metastorePath = getTempFilePath("metastore")
+    val cliScript = "../../bin/spark-sql".split("/").mkString(File.separator)
+
+    val command = {
+      val jdbcUrl = s"jdbc:derby:;databaseName=$metastorePath;create=true"
+      s"""$cliScript
+         |  --master local
+         |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$jdbcUrl
+         |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
+       """.stripMargin.split("\\s+").toSeq ++ extraArgs
+    }
+
+    var next = 0
+    val foundAllExpectedAnswers = Promise.apply[Unit]()
+    val queryStream = new ByteArrayInputStream(queries.mkString("\n").getBytes)
+    val buffer = new ArrayBuffer[String]()
+    val lock = new Object
+
+    def captureOutput(source: String)(line: String): Unit = lock.synchronized {
+      buffer += s"$source> $line"
+      // If we haven't found all expected answers and another expected answer comes up...
+      if (next < expectedAnswers.size && line.startsWith(expectedAnswers(next))) {
+        next += 1
+        // If all expected answers have been found...
+        if (next == expectedAnswers.size) {
+          foundAllExpectedAnswers.trySuccess(())
+        }
+      }
+    }
+
+    // Searching expected output line from both stdout and stderr of the CLI process
+    val process = (Process(command) #< queryStream).run(
+      ProcessLogger(captureOutput("stdout"), captureOutput("stderr")))
+
+    try {
+      Await.result(foundAllExpectedAnswers.future, timeout)
+    } catch { case cause: Throwable =>
+      logError(
+        s"""
+           |=======================
+           |CliSuite failure output
+           |=======================
+           |Spark SQL CLI command line: ${command.mkString(" ")}
+           |
+           |Executed query $next "${queries(next)}",
+           |But failed to capture expected output "${expectedAnswers(next)}" within $timeout.
+           |
+           |${buffer.mkString("\n")}
+           |===========================
+           |End CliSuite failure output
+           |===========================
+         """.stripMargin, cause)
+      throw cause
+    } finally {
+      warehousePath.delete()
+      metastorePath.delete()
+      process.destroy()
+    }
+  }
+
+  test("Simple commands") {
+    val dataFilePath =
+      Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
+
+    runCliWithin(3.minute)(
+      "CREATE TABLE hive_test(key INT, val STRING);"
+        -> "OK",
+      "SHOW TABLES;"
+        -> "hive_test",
+      s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE hive_test;"
+        -> "OK",
+      "CACHE TABLE hive_test;"
+        -> "Time taken: ",
+      "SELECT COUNT(*) FROM hive_test;"
+        -> "5",
+      "DROP TABLE hive_test;"
+        -> "Time taken: "
+    )
+  }
+
+  test("Single command with -e") {
+    runCliWithin(1.minute, Seq("-e", "SHOW TABLES;"))("" -> "OK")
+  }
+}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala
new file mode 100644
index 0000000000000..bba29b2bdca4d
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala
@@ -0,0 +1,293 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.io.File
+import java.net.ServerSocket
+import java.sql.{DriverManager, Statement}
+import java.util.concurrent.TimeoutException
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
+import scala.concurrent.{Await, Promise}
+import scala.sys.process.{Process, ProcessLogger}
+import scala.util.Try
+
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.hive.jdbc.HiveDriver
+import org.apache.hive.service.auth.PlainSaslHelper
+import org.apache.hive.service.cli.GetInfoType
+import org.apache.hive.service.cli.thrift.TCLIService.Client
+import org.apache.hive.service.cli.thrift._
+import org.apache.thrift.protocol.TBinaryProtocol
+import org.apache.thrift.transport.TSocket
+import org.scalatest.FunSuite
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.util.getTempFilePath
+import org.apache.spark.sql.hive.HiveShim
+
+/**
+ * Tests for the HiveThriftServer2 using JDBC.
+ *
+ * NOTE: SPARK_PREPEND_CLASSES is explicitly disabled in this test suite. Assembly jar must be
+ * rebuilt after changing HiveThriftServer2 related code.
+ */
+class HiveThriftServer2Suite extends FunSuite with Logging {
+  Class.forName(classOf[HiveDriver].getCanonicalName)
+
+  def randomListeningPort =  {
+    // Let the system to choose a random available port to avoid collision with other parallel
+    // builds.
+    val socket = new ServerSocket(0)
+    val port = socket.getLocalPort
+    socket.close()
+    port
+  }
+
+  def withJdbcStatement(serverStartTimeout: FiniteDuration = 1.minute)(f: Statement => Unit) {
+    val port = randomListeningPort
+
+    startThriftServer(port, serverStartTimeout) {
+      val jdbcUri = s"jdbc:hive2://${"localhost"}:$port/"
+      val user = System.getProperty("user.name")
+      val connection = DriverManager.getConnection(jdbcUri, user, "")
+      val statement = connection.createStatement()
+
+      try {
+        f(statement)
+      } finally {
+        statement.close()
+        connection.close()
+      }
+    }
+  }
+
+  def withCLIServiceClient(
+      serverStartTimeout: FiniteDuration = 1.minute)(
+      f: ThriftCLIServiceClient => Unit) {
+    val port = randomListeningPort
+
+    startThriftServer(port) {
+      // Transport creation logics below mimics HiveConnection.createBinaryTransport
+      val rawTransport = new TSocket("localhost", port)
+      val user = System.getProperty("user.name")
+      val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
+      val protocol = new TBinaryProtocol(transport)
+      val client = new ThriftCLIServiceClient(new Client(protocol))
+
+      transport.open()
+
+      try {
+        f(client)
+      } finally {
+        transport.close()
+      }
+    }
+  }
+
+  def startThriftServer(
+      port: Int,
+      serverStartTimeout: FiniteDuration = 1.minute)(
+      f: => Unit) {
+    val startScript = "../../sbin/start-thriftserver.sh".split("/").mkString(File.separator)
+    val stopScript = "../../sbin/stop-thriftserver.sh".split("/").mkString(File.separator)
+
+    val warehousePath = getTempFilePath("warehouse")
+    val metastorePath = getTempFilePath("metastore")
+    val metastoreJdbcUri = s"jdbc:derby:;databaseName=$metastorePath;create=true"
+    val command =
+      s"""$startScript
+         |  --master local
+         |  --hiveconf hive.root.logger=INFO,console
+         |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri
+         |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
+         |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=${"localhost"}
+         |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_PORT}=$port
+       """.stripMargin.split("\\s+").toSeq
+
+    val serverRunning = Promise[Unit]()
+    val buffer = new ArrayBuffer[String]()
+    val LOGGING_MARK =
+      s"starting ${HiveThriftServer2.getClass.getCanonicalName.stripSuffix("$")}, logging to "
+    var logTailingProcess: Process = null
+    var logFilePath: String = null
+
+    def captureLogOutput(line: String): Unit = {
+      buffer += line
+      if (line.contains("ThriftBinaryCLIService listening on")) {
+        serverRunning.success(())
+      }
+    }
+
+    def captureThriftServerOutput(source: String)(line: String): Unit = {
+      if (line.startsWith(LOGGING_MARK)) {
+        logFilePath = line.drop(LOGGING_MARK.length).trim
+        // Ensure that the log file is created so that the `tail' command won't fail
+        Try(new File(logFilePath).createNewFile())
+        logTailingProcess = Process(s"/usr/bin/env tail -f $logFilePath")
+          .run(ProcessLogger(captureLogOutput, _ => ()))
+      }
+    }
+
+    val env = Seq(
+      // Resets SPARK_TESTING to avoid loading Log4J configurations in testing class paths
+      "SPARK_TESTING" -> "0",
+      // Prevents loading classes out of the assembly jar. Otherwise Utils.sparkVersion can't read
+      // proper version information from the jar manifest.
+      "SPARK_PREPEND_CLASSES" -> "")
+
+    Process(command, None, env: _*).run(ProcessLogger(
+      captureThriftServerOutput("stdout"),
+      captureThriftServerOutput("stderr")))
+
+    try {
+      Await.result(serverRunning.future, serverStartTimeout)
+      f
+    } catch {
+      case cause: Exception =>
+        cause match {
+          case _: TimeoutException =>
+            logError(s"Failed to start Hive Thrift server within $serverStartTimeout", cause)
+          case _ =>
+        }
+        logError(
+          s"""
+             |=====================================
+             |HiveThriftServer2Suite failure output
+             |=====================================
+             |HiveThriftServer2 command line: ${command.mkString(" ")}
+             |Binding port: $port
+             |System user: ${System.getProperty("user.name")}
+             |
+             |${buffer.mkString("\n")}
+             |=========================================
+             |End HiveThriftServer2Suite failure output
+             |=========================================
+           """.stripMargin, cause)
+        throw cause
+    } finally {
+      warehousePath.delete()
+      metastorePath.delete()
+      Process(stopScript).run().exitValue()
+      // The `spark-daemon.sh' script uses kill, which is not synchronous, have to wait for a while.
+      Thread.sleep(3.seconds.toMillis)
+      Option(logTailingProcess).map(_.destroy())
+      Option(logFilePath).map(new File(_).delete())
+    }
+  }
+
+  test("Test JDBC query execution") {
+    withJdbcStatement() { statement =>
+      val dataFilePath =
+        Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
+
+      val queries =
+        s"""SET spark.sql.shuffle.partitions=3;
+           |CREATE TABLE test(key INT, val STRING);
+           |LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE test;
+           |CACHE TABLE test;
+         """.stripMargin.split(";").map(_.trim).filter(_.nonEmpty)
+
+      queries.foreach(statement.execute)
+
+      assertResult(5, "Row count mismatch") {
+        val resultSet = statement.executeQuery("SELECT COUNT(*) FROM test")
+        resultSet.next()
+        resultSet.getInt(1)
+      }
+    }
+  }
+
+  test("SPARK-3004 regression: result set containing NULL") {
+    withJdbcStatement() { statement =>
+      val dataFilePath =
+        Thread.currentThread().getContextClassLoader.getResource(
+          "data/files/small_kv_with_null.txt")
+
+      val queries = Seq(
+        "DROP TABLE IF EXISTS test_null",
+        "CREATE TABLE test_null(key INT, val STRING)",
+        s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE test_null")
+
+      queries.foreach(statement.execute)
+
+      val resultSet = statement.executeQuery("SELECT * FROM test_null WHERE key IS NULL")
+
+      (0 until 5).foreach { _ =>
+        resultSet.next()
+        assert(resultSet.getInt(1) === 0)
+        assert(resultSet.wasNull())
+      }
+
+      assert(!resultSet.next())
+    }
+  }
+
+  test("GetInfo Thrift API") {
+    withCLIServiceClient() { client =>
+      val user = System.getProperty("user.name")
+      val sessionHandle = client.openSession(user, "")
+
+      assertResult("Spark SQL", "Wrong GetInfo(CLI_DBMS_NAME) result") {
+        client.getInfo(sessionHandle, GetInfoType.CLI_DBMS_NAME).getStringValue
+      }
+
+      assertResult("Spark SQL", "Wrong GetInfo(CLI_SERVER_NAME) result") {
+        client.getInfo(sessionHandle, GetInfoType.CLI_SERVER_NAME).getStringValue
+      }
+
+      assertResult(true, "Spark version shouldn't be \"Unknown\"") {
+        val version = client.getInfo(sessionHandle, GetInfoType.CLI_DBMS_VER).getStringValue
+        logInfo(s"Spark version: $version")
+        version != "Unknown"
+      }
+    }
+  }
+
+  test("Checks Hive version") {
+    withJdbcStatement() { statement =>
+      val resultSet = statement.executeQuery("SET spark.sql.hive.version")
+      resultSet.next()
+      assert(resultSet.getString(1) === s"spark.sql.hive.version=${HiveShim.version}")
+    }
+  }
+
+  test("SPARK-4292 regression: result set iterator issue") {
+    withJdbcStatement() { statement =>
+      val dataFilePath =
+        Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
+
+      val queries = Seq(
+        "DROP TABLE IF EXISTS test_4292",
+        "CREATE TABLE test_4292(key INT, val STRING)",
+        s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE test_4292")
+
+      queries.foreach(statement.execute)
+
+      val resultSet = statement.executeQuery("SELECT key FROM test_4292")
+
+      Seq(238, 86, 311, 27, 165).foreach { key =>
+        resultSet.next()
+        assert(resultSet.getInt(1) == key)
+      }
+
+      statement.executeQuery("DROP TABLE IF EXISTS test_4292")
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala b/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala
new file mode 100644
index 0000000000000..aa2e3cab72bb9
--- /dev/null
+++ b/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.sql.Timestamp
+import java.util.{ArrayList => JArrayList, Map => JMap}
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ArrayBuffer, Map => SMap}
+import scala.math._
+
+import org.apache.hadoop.hive.common.`type`.HiveDecimal
+import org.apache.hadoop.hive.metastore.api.FieldSchema
+import org.apache.hadoop.hive.shims.ShimLoader
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hive.service.cli._
+import org.apache.hive.service.cli.operation.ExecuteStatementOperation
+import org.apache.hive.service.cli.session.HiveSession
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.plans.logical.SetCommand
+import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
+import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes}
+import org.apache.spark.sql.{SQLConf, SchemaRDD, Row => SparkRow}
+
+/**
+ * A compatibility layer for interacting with Hive version 0.12.0.
+ */
+private[thriftserver] object HiveThriftServerShim {
+  val version = "0.12.0"
+
+  def setServerUserName(sparkServiceUGI: UserGroupInformation, sparkCliService:SparkSQLCLIService) = {
+    val serverUserName = ShimLoader.getHadoopShims.getShortUserName(sparkServiceUGI)
+    setSuperField(sparkCliService, "serverUserName", serverUserName)
+  }
+}
+
+private[hive] class SparkSQLDriver(val _context: HiveContext = SparkSQLEnv.hiveContext)
+  extends AbstractSparkSQLDriver(_context) {
+  override def getResults(res: JArrayList[String]): Boolean = {
+    if (hiveResponse == null) {
+      false
+    } else {
+      res.addAll(hiveResponse)
+      hiveResponse = null
+      true
+    }
+  }
+}
+
+private[hive] class SparkExecuteStatementOperation(
+    parentSession: HiveSession,
+    statement: String,
+    confOverlay: JMap[String, String])(
+    hiveContext: HiveContext,
+    sessionToActivePool: SMap[HiveSession, String])
+  extends ExecuteStatementOperation(parentSession, statement, confOverlay) with Logging {
+
+  private var result: SchemaRDD = _
+  private var iter: Iterator[SparkRow] = _
+  private var dataTypes: Array[DataType] = _
+
+  def close(): Unit = {
+    // RDDs will be cleaned automatically upon garbage collection.
+    logDebug("CLOSING")
+  }
+
+  def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = {
+    if (!iter.hasNext) {
+      new RowSet()
+    } else {
+      // maxRowsL here typically maps to java.sql.Statement.getFetchSize, which is an int
+      val maxRows = maxRowsL.toInt
+      var curRow = 0
+      var rowSet = new ArrayBuffer[Row](maxRows.min(1024))
+
+      while (curRow < maxRows && iter.hasNext) {
+        val sparkRow = iter.next()
+        val row = new Row()
+        var curCol = 0
+
+        while (curCol < sparkRow.length) {
+          if (sparkRow.isNullAt(curCol)) {
+            addNullColumnValue(sparkRow, row, curCol)
+          } else {
+            addNonNullColumnValue(sparkRow, row, curCol)
+          }
+          curCol += 1
+        }
+        rowSet += row
+        curRow += 1
+      }
+      new RowSet(rowSet, 0)
+    }
+  }
+
+  def addNonNullColumnValue(from: SparkRow, to: Row, ordinal: Int) {
+    dataTypes(ordinal) match {
+      case StringType =>
+        to.addString(from(ordinal).asInstanceOf[String])
+      case IntegerType =>
+        to.addColumnValue(ColumnValue.intValue(from.getInt(ordinal)))
+      case BooleanType =>
+        to.addColumnValue(ColumnValue.booleanValue(from.getBoolean(ordinal)))
+      case DoubleType =>
+        to.addColumnValue(ColumnValue.doubleValue(from.getDouble(ordinal)))
+      case FloatType =>
+        to.addColumnValue(ColumnValue.floatValue(from.getFloat(ordinal)))
+      case DecimalType() =>
+        val hiveDecimal = from.get(ordinal).asInstanceOf[BigDecimal].bigDecimal
+        to.addColumnValue(ColumnValue.stringValue(new HiveDecimal(hiveDecimal)))
+      case LongType =>
+        to.addColumnValue(ColumnValue.longValue(from.getLong(ordinal)))
+      case ByteType =>
+        to.addColumnValue(ColumnValue.byteValue(from.getByte(ordinal)))
+      case ShortType =>
+        to.addColumnValue(ColumnValue.shortValue(from.getShort(ordinal)))
+      case TimestampType =>
+        to.addColumnValue(
+          ColumnValue.timestampValue(from.get(ordinal).asInstanceOf[Timestamp]))
+      case BinaryType | _: ArrayType | _: StructType | _: MapType =>
+        val hiveString = result
+          .queryExecution
+          .asInstanceOf[HiveContext#QueryExecution]
+          .toHiveString((from.get(ordinal), dataTypes(ordinal)))
+        to.addColumnValue(ColumnValue.stringValue(hiveString))
+    }
+  }
+
+  def addNullColumnValue(from: SparkRow, to: Row, ordinal: Int) {
+    dataTypes(ordinal) match {
+      case StringType =>
+        to.addString(null)
+      case IntegerType =>
+        to.addColumnValue(ColumnValue.intValue(null))
+      case BooleanType =>
+        to.addColumnValue(ColumnValue.booleanValue(null))
+      case DoubleType =>
+        to.addColumnValue(ColumnValue.doubleValue(null))
+      case FloatType =>
+        to.addColumnValue(ColumnValue.floatValue(null))
+      case DecimalType() =>
+        to.addColumnValue(ColumnValue.stringValue(null: HiveDecimal))
+      case LongType =>
+        to.addColumnValue(ColumnValue.longValue(null))
+      case ByteType =>
+        to.addColumnValue(ColumnValue.byteValue(null))
+      case ShortType =>
+        to.addColumnValue(ColumnValue.shortValue(null))
+      case TimestampType =>
+        to.addColumnValue(ColumnValue.timestampValue(null))
+      case BinaryType | _: ArrayType | _: StructType | _: MapType =>
+        to.addColumnValue(ColumnValue.stringValue(null: String))
+    }
+  }
+
+  def getResultSetSchema: TableSchema = {
+    logInfo(s"Result Schema: ${result.queryExecution.analyzed.output}")
+    if (result.queryExecution.analyzed.output.size == 0) {
+      new TableSchema(new FieldSchema("Result", "string", "") :: Nil)
+    } else {
+      val schema = result.queryExecution.analyzed.output.map { attr =>
+        new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "")
+      }
+      new TableSchema(schema)
+    }
+  }
+
+  def run(): Unit = {
+    logInfo(s"Running query '$statement'")
+    setState(OperationState.RUNNING)
+    try {
+      result = hiveContext.sql(statement)
+      logDebug(result.queryExecution.toString())
+      result.queryExecution.logical match {
+        case SetCommand(Some((SQLConf.THRIFTSERVER_POOL, Some(value)))) =>
+          sessionToActivePool(parentSession) = value
+          logInfo(s"Setting spark.scheduler.pool=$value for future statements in this session.")
+        case _ =>
+      }
+
+      val groupId = round(random * 1000000).toString
+      hiveContext.sparkContext.setJobGroup(groupId, statement)
+      sessionToActivePool.get(parentSession).foreach { pool =>
+        hiveContext.sparkContext.setLocalProperty("spark.scheduler.pool", pool)
+      }
+      iter = {
+        val useIncrementalCollect =
+          hiveContext.getConf("spark.sql.thriftServer.incrementalCollect", "false").toBoolean
+        if (useIncrementalCollect) {
+          result.toLocalIterator
+        } else {
+          result.collect().iterator
+        }
+      }
+      dataTypes = result.queryExecution.analyzed.output.map(_.dataType).toArray
+      setHasResultSet(true)
+    } catch {
+      // Actually do need to catch Throwable as some failures don't inherit from Exception and
+      // HiveServer will silently swallow them.
+      case e: Throwable =>
+        setState(OperationState.ERROR)
+        logError("Error executing query:",e)
+        throw new HiveSQLException(e.toString)
+    }
+    setState(OperationState.FINISHED)
+  }
+}
diff --git a/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala b/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala
new file mode 100644
index 0000000000000..a642478d08857
--- /dev/null
+++ b/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala
@@ -0,0 +1,260 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.security.PrivilegedExceptionAction
+import java.sql.Timestamp
+import java.util.concurrent.Future
+import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ArrayBuffer, Map => SMap}
+import scala.math._
+
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.metastore.api.FieldSchema
+import org.apache.hadoop.hive.ql.metadata.Hive
+import org.apache.hadoop.hive.ql.session.SessionState
+import org.apache.hadoop.hive.shims.ShimLoader
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hive.service.cli._
+import org.apache.hive.service.cli.operation.ExecuteStatementOperation
+import org.apache.hive.service.cli.session.HiveSession
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
+import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes}
+import org.apache.spark.sql.{SchemaRDD, Row => SparkRow}
+
+/**
+ * A compatibility layer for interacting with Hive version 0.12.0.
+ */
+private[thriftserver] object HiveThriftServerShim {
+  val version = "0.13.1"
+
+  def setServerUserName(sparkServiceUGI: UserGroupInformation, sparkCliService:SparkSQLCLIService) = {
+    setSuperField(sparkCliService, "serviceUGI", sparkServiceUGI)
+  }
+}
+
+private[hive] class SparkSQLDriver(val _context: HiveContext = SparkSQLEnv.hiveContext)
+  extends AbstractSparkSQLDriver(_context) {
+  override def getResults(res: JList[_]): Boolean = {
+    if (hiveResponse == null) {
+      false
+    } else {
+      res.asInstanceOf[JArrayList[String]].addAll(hiveResponse)
+      hiveResponse = null
+      true
+    }
+  }
+}
+
+private[hive] class SparkExecuteStatementOperation(
+    parentSession: HiveSession,
+    statement: String,
+    confOverlay: JMap[String, String],
+    runInBackground: Boolean = true)(
+    hiveContext: HiveContext,
+    sessionToActivePool: SMap[HiveSession, String]) extends ExecuteStatementOperation(
+  parentSession, statement, confOverlay, runInBackground) with Logging {
+
+  private var result: SchemaRDD = _
+  private var iter: Iterator[SparkRow] = _
+  private var dataTypes: Array[DataType] = _
+
+  private def runInternal(cmd: String) = {
+    try {
+      result = hiveContext.sql(cmd)
+      logDebug(result.queryExecution.toString())
+      val groupId = round(random * 1000000).toString
+      hiveContext.sparkContext.setJobGroup(groupId, statement)
+      iter = {
+        val useIncrementalCollect =
+          hiveContext.getConf("spark.sql.thriftServer.incrementalCollect", "false").toBoolean
+        if (useIncrementalCollect) {
+          result.toLocalIterator
+        } else {
+          result.collect().iterator
+        }
+      }
+      dataTypes = result.queryExecution.analyzed.output.map(_.dataType).toArray
+    } catch {
+      // Actually do need to catch Throwable as some failures don't inherit from Exception and
+      // HiveServer will silently swallow them.
+      case e: Throwable =>
+        setState(OperationState.ERROR)
+        logError("Error executing query:",e)
+        throw new HiveSQLException(e.toString)
+    }
+  }
+
+  def close(): Unit = {
+    // RDDs will be cleaned automatically upon garbage collection.
+    logDebug("CLOSING")
+  }
+
+  def addNonNullColumnValue(from: SparkRow, to: ArrayBuffer[Any],  ordinal: Int) {
+    dataTypes(ordinal) match {
+      case StringType =>
+        to += from.get(ordinal).asInstanceOf[String]
+      case IntegerType =>
+        to += from.getInt(ordinal)
+      case BooleanType =>
+        to += from.getBoolean(ordinal)
+      case DoubleType =>
+        to += from.getDouble(ordinal)
+      case FloatType =>
+        to += from.getFloat(ordinal)
+      case DecimalType() =>
+        to += from.get(ordinal).asInstanceOf[BigDecimal].bigDecimal
+      case LongType =>
+        to += from.getLong(ordinal)
+      case ByteType =>
+        to += from.getByte(ordinal)
+      case ShortType =>
+        to += from.getShort(ordinal)
+      case TimestampType =>
+        to +=  from.get(ordinal).asInstanceOf[Timestamp]
+      case BinaryType =>
+        to += from.get(ordinal).asInstanceOf[String]
+      case _: ArrayType =>
+        to += from.get(ordinal).asInstanceOf[String]
+      case _: StructType =>
+        to += from.get(ordinal).asInstanceOf[String]
+      case _: MapType =>
+        to += from.get(ordinal).asInstanceOf[String]
+    }
+  }
+
+  def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = {
+    validateDefaultFetchOrientation(order)
+    assertState(OperationState.FINISHED)
+    setHasResultSet(true)
+    val reultRowSet: RowSet = RowSetFactory.create(getResultSetSchema, getProtocolVersion)
+    if (!iter.hasNext) {
+      reultRowSet
+    } else {
+      // maxRowsL here typically maps to java.sql.Statement.getFetchSize, which is an int
+      val maxRows = maxRowsL.toInt
+      var curRow = 0
+      while (curRow < maxRows && iter.hasNext) {
+        val sparkRow = iter.next()
+        val row = ArrayBuffer[Any]()
+        var curCol = 0
+        while (curCol < sparkRow.length) {
+          if (sparkRow.isNullAt(curCol)) {
+            row += null
+          } else {
+            addNonNullColumnValue(sparkRow, row, curCol)
+          }
+          curCol += 1
+        }
+        reultRowSet.addRow(row.toArray.asInstanceOf[Array[Object]])
+        curRow += 1
+      }
+      reultRowSet
+    }
+  }
+
+  def getResultSetSchema: TableSchema = {
+    logInfo(s"Result Schema: ${result.queryExecution.analyzed.output}")
+    if (result.queryExecution.analyzed.output.size == 0) {
+      new TableSchema(new FieldSchema("Result", "string", "") :: Nil)
+    } else {
+      val schema = result.queryExecution.analyzed.output.map { attr =>
+        new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "")
+      }
+      new TableSchema(schema)
+    }
+  }
+
+  private def getConfigForOperation: HiveConf = {
+    var sqlOperationConf: HiveConf = getParentSession.getHiveConf
+    if (!getConfOverlay.isEmpty || shouldRunAsync) {
+      sqlOperationConf = new HiveConf(sqlOperationConf)
+      import scala.collection.JavaConversions._
+      for (confEntry <- getConfOverlay.entrySet) {
+        try {
+          sqlOperationConf.verifyAndSet(confEntry.getKey, confEntry.getValue)
+        }
+        catch { case e: IllegalArgumentException =>
+          throw new HiveSQLException("Error applying statement specific settings", e)
+        }
+      }
+    }
+    sqlOperationConf
+  }
+
+  def run(): Unit = {
+    logInfo(s"Running query '$statement'")
+    val opConfig: HiveConf = getConfigForOperation
+    setState(OperationState.RUNNING)
+    setHasResultSet(true)
+
+    if (!shouldRunAsync) {
+      runInternal(statement)
+      setState(OperationState.FINISHED)
+    } else {
+      val parentSessionState = SessionState.get
+      val sessionHive: Hive = Hive.get
+      val currentUGI: UserGroupInformation = ShimLoader.getHadoopShims.getUGIForConf(opConfig)
+
+      val backgroundOperation: Runnable = new Runnable {
+        def run() {
+          val doAsAction: PrivilegedExceptionAction[AnyRef] =
+            new PrivilegedExceptionAction[AnyRef] {
+              def run: AnyRef = {
+                Hive.set(sessionHive)
+                SessionState.setCurrentSessionState(parentSessionState)
+                try {
+                  runInternal(statement)
+                }
+                catch { case e: HiveSQLException =>
+                  setOperationException(e)
+                  logError("Error running hive query: ", e)
+                }
+                null
+              }
+            }
+          try {
+            ShimLoader.getHadoopShims.doAs(currentUGI, doAsAction)
+          }
+          catch { case e: Exception =>
+            setOperationException(new HiveSQLException(e))
+            logError("Error running hive query as user : " + currentUGI.getShortUserName, e)
+          }
+          setState(OperationState.FINISHED)
+        }
+      }
+
+      try {
+        val backgroundHandle: Future[_] = getParentSession.getSessionManager.
+          submitBackgroundOperation(backgroundOperation)
+        setBackgroundHandle(backgroundHandle)
+      } catch {
+        // Actually do need to catch Throwable as some failures don't inherit from Exception and
+        // HiveServer will silently swallow them.
+        case e: Throwable =>
+          logError("Error executing query:",e)
+          throw new HiveSQLException(e.toString)
+      }
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
similarity index 81%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
rename to sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index cdfc2d0c17384..7c0be4872d762 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -18,9 +18,12 @@
 package org.apache.spark.sql.hive.execution
 
 import java.io.File
+import java.util.{Locale, TimeZone}
 
 import org.scalatest.BeforeAndAfter
 
+import org.apache.spark.sql.SQLConf
+import org.apache.spark.sql.hive.HiveShim
 import org.apache.spark.sql.hive.test.TestHive
 
 /**
@@ -28,24 +31,41 @@ import org.apache.spark.sql.hive.test.TestHive
  */
 class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
   // TODO: bundle in jar files... get from classpath
-  lazy val hiveQueryDir = TestHive.getHiveFile("ql" + File.separator + "src" +
-    File.separator + "test" + File.separator + "queries" + File.separator + "clientpositive")
+  private lazy val hiveQueryDir = TestHive.getHiveFile(
+    "ql/src/test/queries/clientpositive".split("/").mkString(File.separator))
+
+  private val originalTimeZone = TimeZone.getDefault
+  private val originalLocale = Locale.getDefault
+  private val originalColumnBatchSize = TestHive.columnBatchSize
+  private val originalInMemoryPartitionPruning = TestHive.inMemoryPartitionPruning
 
   def testCases = hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f)
 
   override def beforeAll() {
     TestHive.cacheTables = true
+    // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
+    TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
+    // Add Locale setting
+    Locale.setDefault(Locale.US)
+    // Set a relatively small column batch size for testing purposes
+    TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, "5")
+    // Enable in-memory partition pruning for testing purposes
+    TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, "true")
   }
 
   override def afterAll() {
     TestHive.cacheTables = false
+    TimeZone.setDefault(originalTimeZone)
+    Locale.setDefault(originalLocale)
+    TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, originalColumnBatchSize.toString)
+    TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning.toString)
   }
 
   /** A list of tests deemed out of scope currently and thus completely disregarded. */
   override def blackList = Seq(
     // These tests use hooks that are not on the classpath and thus break all subsequent execution.
     "hook_order",
-    "hook_context",
+    "hook_context_cs",
     "mapjoin_hook",
     "multi_sahooks",
     "overridden_confs",
@@ -83,14 +103,20 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf5",
     "udf_java_method",
     "create_merge_compressed",
+    "create_view_partitioned",
+    "database_location",
+    "database_properties",
+
+    // DFS commands
+    "symlink_text_input_format",
 
     // Weird DDL differences result in failures on jenkins.
     "create_like2",
     "create_view_translate",
     "partitions_json",
 
-    // Timezone specific test answers.
-    "udf_unix_timestamp",
+    // This test is totally fine except that it includes wrong queries and expects errors, but error
+    // message format in Hive and Spark SQL differ. Should workaround this later.
     "udf_to_unix_timestamp",
 
     // Cant run without local map/reduce.
@@ -113,6 +139,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "stats20",
     "alter_merge_stats",
     "columnstats.*",
+    "annotate_stats.*",
+    "database_drop",
+    "index_serde",
 
 
     // Hive seems to think 1.0 > NaN = true && 1.0 < NaN = false... which is wrong.
@@ -186,8 +215,31 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
 
     // Hive returns the results of describe as plain text. Comments with multiple lines
     // introduce extra lines in the Hive results, which make the result comparison fail.
-    "describe_comment_indent"
-  )
+    "describe_comment_indent",
+
+    // Limit clause without a ordering, which causes failure.
+    "orc_predicate_pushdown",
+
+    // Requires precision decimal support:
+    "decimal_1",
+    "udf_pmod",
+    "udf_when",
+    "udf_case",
+    "udf_to_double",
+    "udf_to_float",
+
+    // Needs constant object inspectors
+    "udf_round",
+    "udf7",
+
+    // Sort with Limit clause causes failure.
+    "ctas",
+    "ctas_hadoop20",
+
+    // timestamp in array, the output format of Hive contains double quotes, while
+    // Spark SQL doesn't
+    "udf_sort_array"
+  ) ++ HiveShim.compatibilityBlackList
 
   /**
    * The set of tests that are believed to be working in catalyst. Tests not on whiteList or
@@ -214,6 +266,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "alter_varchar2",
     "alter_view_as_select",
     "ambiguous_col",
+    "annotate_stats_join",
+    "annotate_stats_limit",
+    "annotate_stats_part",
+    "annotate_stats_table",
+    "annotate_stats_union",
     "auto_join0",
     "auto_join1",
     "auto_join10",
@@ -257,6 +314,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "auto_sortmerge_join_13",
     "auto_sortmerge_join_14",
     "auto_sortmerge_join_15",
+    "auto_sortmerge_join_16",
     "auto_sortmerge_join_2",
     "auto_sortmerge_join_3",
     "auto_sortmerge_join_4",
@@ -276,9 +334,12 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "compute_stats_empty_table",
     "compute_stats_long",
     "compute_stats_string",
-    "compute_stats_table",
     "convert_enum_to_string",
+    "correlationoptimizer1",
+    "correlationoptimizer10",
     "correlationoptimizer11",
+    "correlationoptimizer13",
+    "correlationoptimizer14",
     "correlationoptimizer15",
     "correlationoptimizer2",
     "correlationoptimizer3",
@@ -286,6 +347,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "correlationoptimizer6",
     "correlationoptimizer7",
     "correlationoptimizer8",
+    "correlationoptimizer9",
     "count",
     "cp_mj_rc",
     "create_insert_outputformat",
@@ -293,9 +355,21 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "create_nested_type",
     "create_skewed_table1",
     "create_struct_table",
+    "cross_join",
+    "cross_product_check_1",
+    "cross_product_check_2",
     "ct_case_insensitive",
+    "database_drop",
     "database_location",
     "database_properties",
+    "date_2",
+    "date_3",
+    "date_4",
+    "date_comparison",
+    "date_join1",
+    "date_serde",
+    "date_udf",
+    "decimal_1",
     "decimal_4",
     "decimal_join",
     "default_partition_name",
@@ -304,8 +378,12 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "describe_formatted_view_partitioned",
     "diff_part_input_formats",
     "disable_file_format_check",
+    "disallow_incompatible_type_change_off",
+    "distinct_stats",
+    "drop_database_removes_partition_dirs",
     "drop_function",
     "drop_index",
+    "drop_index_removes_partition_dirs",
     "drop_multi_partitions",
     "drop_partitions_filter",
     "drop_partitions_filter2",
@@ -313,23 +391,30 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "drop_partitions_ignore_protection",
     "drop_table",
     "drop_table2",
+    "drop_table_removes_partition_dirs",
     "drop_view",
+    "dynamic_partition_skip_default",
     "escape_clusterby1",
     "escape_distributeby1",
     "escape_orderby1",
     "escape_sortby1",
+    "explain_rearrange",
     "fetch_aggregation",
+    "fileformat_mix",
     "fileformat_sequencefile",
     "fileformat_text",
     "filter_join_breaktask",
     "filter_join_breaktask2",
     "groupby1",
     "groupby11",
+    "groupby12",
+    "groupby1_limit",
     "groupby1_map",
     "groupby1_map_nomap",
     "groupby1_map_skew",
     "groupby1_noskew",
     "groupby2",
+    "groupby2_limit",
     "groupby2_map",
     "groupby2_map_skew",
     "groupby2_noskew",
@@ -350,6 +435,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "groupby7_map_multi_single_reducer",
     "groupby7_map_skew",
     "groupby7_noskew",
+    "groupby7_noskew_multi_single_reducer",
     "groupby8",
     "groupby8_map",
     "groupby8_map_skew",
@@ -359,8 +445,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "groupby_map_ppr",
     "groupby_multi_insert_common_distinct",
     "groupby_multi_single_reducer2",
+    "groupby_multi_single_reducer3",
     "groupby_mutli_insert_common_distinct",
     "groupby_neg_float",
+    "groupby_ppd",
     "groupby_ppr",
     "groupby_sort_10",
     "groupby_sort_2",
@@ -372,7 +460,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "groupby_sort_8",
     "groupby_sort_9",
     "groupby_sort_test_1",
+    "having",
     "implicit_cast1",
+    "index_serde",
+    "infer_bucket_sort_dyn_part",
     "innerjoin",
     "inoutdriver",
     "input",
@@ -400,6 +491,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "input4",
     "input40",
     "input41",
+    "input49",
     "input4_cb_delim",
     "input6",
     "input7",
@@ -442,7 +534,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "join17",
     "join18",
     "join19",
-    "join_1to1",
     "join2",
     "join20",
     "join21",
@@ -474,6 +565,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "join7",
     "join8",
     "join9",
+    "join_1to1",
     "join_array",
     "join_casesensitive",
     "join_empty",
@@ -481,6 +573,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "join_hive_626",
     "join_map_ppr",
     "join_nulls",
+    "join_nullsafe",
     "join_rc",
     "join_reorder2",
     "join_reorder3",
@@ -491,11 +584,26 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "lateral_view_ppd",
     "leftsemijoin",
     "leftsemijoin_mr",
+    "limit_pushdown_negative",
     "lineage1",
     "literal_double",
     "literal_ints",
     "literal_string",
+    "load_dyn_part1",
+    "load_dyn_part10",
+    "load_dyn_part11",
+    "load_dyn_part12",
+    "load_dyn_part13",
+    "load_dyn_part14",
+    "load_dyn_part14_win",
+    "load_dyn_part2",
+    "load_dyn_part3",
+    "load_dyn_part4",
+    "load_dyn_part5",
+    "load_dyn_part6",
     "load_dyn_part7",
+    "load_dyn_part8",
+    "load_dyn_part9",
     "load_file_with_space_in_the_name",
     "loadpart1",
     "louter_join_ppr",
@@ -516,13 +624,13 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "merge1",
     "merge2",
     "mergejoins",
-    "multigroupby_singlemr",
+    "multiMapJoin1",
+    "multiMapJoin2",
     "multi_insert_gby",
     "multi_insert_gby3",
     "multi_insert_lateral_view",
     "multi_join_union",
-    "multiMapJoin1",
-    "multiMapJoin2",
+    "multigroupby_singlemr",
     "noalias_subq1",
     "nomore_ambiguous_table_col",
     "nonblock_op_deduplicate",
@@ -545,12 +653,14 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "outer_join_ppr",
     "parallel",
     "parenthesis_star_by",
-    "partcols1",
     "part_inherit_tbl_props",
     "part_inherit_tbl_props_empty",
     "part_inherit_tbl_props_with_star",
+    "partcols1",
+    "partition_date",
     "partition_schema1",
     "partition_serde_format",
+    "partition_type_check",
     "partition_varchar1",
     "partition_wise_fileformat4",
     "partition_wise_fileformat5",
@@ -577,7 +687,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "ppd_outer_join5",
     "ppd_random",
     "ppd_repeated_alias",
-    "ppd_transform",
     "ppd_udf_col",
     "ppd_union",
     "ppr_allchildsarenull",
@@ -598,6 +707,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "reduce_deduplicate",
     "reduce_deduplicate_exclude_gby",
     "reduce_deduplicate_exclude_join",
+    "reduce_deduplicate_extended",
     "reducesink_dedup",
     "rename_column",
     "router_join_ppr",
@@ -609,9 +719,19 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "serde_regex",
     "serde_reported_schema",
     "set_variable_sub",
+    "show_columns",
+    "show_create_table_alter",
+    "show_create_table_db_table",
+    "show_create_table_delimited",
+    "show_create_table_does_not_exist",
+    "show_create_table_index",
+    "show_create_table_partitioned",
+    "show_create_table_serde",
+    "show_create_table_view",
     "show_describe_func_quotes",
     "show_functions",
     "show_partitions",
+    "show_tblproperties",
     "skewjoinopt13",
     "skewjoinopt18",
     "skewjoinopt9",
@@ -646,10 +766,17 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "stats_publisher_error_1",
     "subq2",
     "tablename_with_select",
+    "timestamp_1",
+    "timestamp_2",
+    "timestamp_3",
     "timestamp_comparison",
+    "timestamp_lazy",
+    "timestamp_null",
+    "timestamp_udf",
     "touch",
     "transform_ppr1",
     "transform_ppr2",
+    "truncate_table",
     "type_cast_1",
     "type_widening",
     "udaf_collect_set",
@@ -657,12 +784,12 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udaf_covar_pop",
     "udaf_covar_samp",
     "udaf_histogram_numeric",
-    "udf_10_trims",
     "udf2",
     "udf6",
     "udf7",
     "udf8",
     "udf9",
+    "udf_10_trims",
     "udf_E",
     "udf_PI",
     "udf_abs",
@@ -695,15 +822,15 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_cos",
     "udf_count",
     "udf_date_add",
-    "udf_datediff",
     "udf_date_sub",
+    "udf_datediff",
     "udf_day",
     "udf_dayofmonth",
     "udf_degrees",
     "udf_div",
     "udf_double",
-    "udf_E",
     "udf_elt",
+    "udf_equal",
     "udf_exp",
     "udf_field",
     "udf_find_in_set",
@@ -739,6 +866,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_minute",
     "udf_modulo",
     "udf_month",
+    "udf_named_struct",
     "udf_negative",
     "udf_not",
     "udf_notequal",
@@ -746,7 +874,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_nvl",
     "udf_or",
     "udf_parse_url",
-    "udf_PI",
+    "udf_pmod",
     "udf_positive",
     "udf_pow",
     "udf_power",
@@ -772,6 +900,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_stddev_pop",
     "udf_stddev_samp",
     "udf_string",
+    "udf_struct",
     "udf_substring",
     "udf_subtract",
     "udf_sum",
@@ -786,6 +915,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_translate",
     "udf_trim",
     "udf_ucase",
+    "udf_unix_timestamp",
     "udf_upper",
     "udf_var_pop",
     "udf_var_samp",
@@ -831,6 +961,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "union7",
     "union8",
     "union9",
+    "union_date",
     "union_lateralview",
     "union_ppr",
     "union_remove_11",
@@ -839,6 +970,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "union_script",
     "varchar_2",
     "varchar_join1",
-    "varchar_union1"
+    "varchar_union1",
+    "view",
+    "view_cast",
+    "view_inputs"
   )
 }
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 5ede76e5c3904..fa9a1e64b0f80 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -31,6 +31,9 @@
   <packaging>jar</packaging>
   <name>Spark Project Hive</name>
   <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>hive</sbt.project.name>
+  </properties>
 
   <dependencies>
     <dependency>
@@ -48,6 +51,11 @@
       <artifactId>hive-metastore</artifactId>
       <version>${hive.version}</version>
     </dependency>
+    <dependency>
+      <groupId>commons-httpclient</groupId>
+      <artifactId>commons-httpclient</artifactId>
+      <version>3.1</version>
+    </dependency>
     <dependency>
       <groupId>org.spark-project.hive</groupId>
       <artifactId>hive-exec</artifactId>
@@ -57,6 +65,10 @@
           <groupId>commons-logging</groupId>
           <artifactId>commons-logging</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.esotericsoftware.kryo</groupId>
+          <artifactId>kryo</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -82,6 +94,15 @@
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
+      <version>${avro.version}</version>
+    </dependency>
+    <!-- use the build matching the hadoop api of avro-mapred (i.e. no classifier for hadoop 1 API,
+    hadoop2 classifier for hadoop 2 API. avro-mapred is a dependency of org.spark-project.hive:hive-serde -->
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro-mapred</artifactId>
+      <version>${avro.version}</version>
+      <classifier>${avro.mapred.classifier}</classifier>
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
@@ -94,6 +115,45 @@
       <scope>test</scope>
     </dependency>
   </dependencies>
+  <profiles>
+    <profile>
+      <id>hive</id>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-scala-test-sources</id>
+                <phase>generate-test-sources</phase>
+                <goals>
+                  <goal>add-test-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>src/test/scala</source>
+                    <source>compatibility/src/test/scala</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>hive-0.12.0</id>
+      <dependencies>
+         <dependency>
+           <groupId>com.twitter</groupId>
+           <artifactId>parquet-hive-bundle</artifactId>
+           <version>1.5.0</version>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
@@ -102,6 +162,24 @@
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
       </plugin>
+      <plugin>
+         <groupId>org.codehaus.mojo</groupId>
+         <artifactId>build-helper-maven-plugin</artifactId>
+         <executions>
+           <execution>
+             <id>add-default-sources</id>
+             <phase>generate-sources</phase>
+             <goals>
+               <goal>add-source</goal>
+             </goals>
+             <configuration>
+               <sources>
+                 <source>v${hive.version.short}/src/main/scala</source>
+               </sources>
+             </configuration>
+           </execution>
+         </executions>
+      </plugin>
 
       <!-- Deploy datanucleus jars to the spark/lib_managed/jars directory -->
       <plugin>
diff --git a/sql/hive/src/main/scala/org/apache/spark/SparkHadoopWriter.scala b/sql/hive/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
deleted file mode 100644
index ab7862f4f9e06..0000000000000
--- a/sql/hive/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive
-
-import java.io.IOException
-import java.text.NumberFormat
-import java.util.Date
-
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.hive.ql.exec.{FileSinkOperator, Utilities}
-import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat}
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc
-import org.apache.hadoop.mapred._
-import org.apache.hadoop.io.Writable
-
-import org.apache.spark.{Logging, SerializableWritable, SparkHadoopWriter}
-
-/**
- * Internal helper class that saves an RDD using a Hive OutputFormat.
- * It is based on [[SparkHadoopWriter]].
- */
-private[hive] class SparkHiveHadoopWriter(
-    @transient jobConf: JobConf,
-    fileSinkConf: FileSinkDesc)
-  extends Logging
-  with SparkHadoopMapRedUtil
-  with Serializable {
-
-  private val now = new Date()
-  private val conf = new SerializableWritable(jobConf)
-
-  private var jobID = 0
-  private var splitID = 0
-  private var attemptID = 0
-  private var jID: SerializableWritable[JobID] = null
-  private var taID: SerializableWritable[TaskAttemptID] = null
-
-  @transient private var writer: FileSinkOperator.RecordWriter = null
-  @transient private var format: HiveOutputFormat[AnyRef, Writable] = null
-  @transient private var committer: OutputCommitter = null
-  @transient private var jobContext: JobContext = null
-  @transient private var taskContext: TaskAttemptContext = null
-
-  def preSetup() {
-    setIDs(0, 0, 0)
-    setConfParams()
-
-    val jCtxt = getJobContext()
-    getOutputCommitter().setupJob(jCtxt)
-  }
-
-
-  def setup(jobid: Int, splitid: Int, attemptid: Int) {
-    setIDs(jobid, splitid, attemptid)
-    setConfParams()
-  }
-
-  def open() {
-    val numfmt = NumberFormat.getInstance()
-    numfmt.setMinimumIntegerDigits(5)
-    numfmt.setGroupingUsed(false)
-
-    val extension = Utilities.getFileExtension(
-      conf.value,
-      fileSinkConf.getCompressed,
-      getOutputFormat())
-
-    val outputName = "part-"  + numfmt.format(splitID) + extension
-    val path = FileOutputFormat.getTaskOutputPath(conf.value, outputName)
-
-    getOutputCommitter().setupTask(getTaskContext())
-    writer = HiveFileFormatUtils.getHiveRecordWriter(
-      conf.value,
-      fileSinkConf.getTableInfo,
-      conf.value.getOutputValueClass.asInstanceOf[Class[Writable]],
-      fileSinkConf,
-      path,
-      null)
-  }
-
-  def write(value: Writable) {
-    if (writer != null) {
-      writer.write(value)
-    } else {
-      throw new IOException("Writer is null, open() has not been called")
-    }
-  }
-
-  def close() {
-    // Seems the boolean value passed into close does not matter.
-    writer.close(false)
-  }
-
-  def commit() {
-    val taCtxt = getTaskContext()
-    val cmtr = getOutputCommitter()
-    if (cmtr.needsTaskCommit(taCtxt)) {
-      try {
-        cmtr.commitTask(taCtxt)
-        logInfo (taID + ": Committed")
-      } catch {
-        case e: IOException =>
-          logError("Error committing the output of task: " + taID.value, e)
-          cmtr.abortTask(taCtxt)
-          throw e
-      }
-    } else {
-      logWarning ("No need to commit output of task: " + taID.value)
-    }
-  }
-
-  def commitJob() {
-    // always ? Or if cmtr.needsTaskCommit ?
-    val cmtr = getOutputCommitter()
-    cmtr.commitJob(getJobContext())
-  }
-
-  // ********* Private Functions *********
-
-  private def getOutputFormat(): HiveOutputFormat[AnyRef,Writable] = {
-    if (format == null) {
-      format = conf.value.getOutputFormat()
-        .asInstanceOf[HiveOutputFormat[AnyRef,Writable]]
-    }
-    format
-  }
-
-  private def getOutputCommitter(): OutputCommitter = {
-    if (committer == null) {
-      committer = conf.value.getOutputCommitter
-    }
-    committer
-  }
-
-  private def getJobContext(): JobContext = {
-    if (jobContext == null) {
-      jobContext = newJobContext(conf.value, jID.value)
-    }
-    jobContext
-  }
-
-  private def getTaskContext(): TaskAttemptContext = {
-    if (taskContext == null) {
-      taskContext =  newTaskAttemptContext(conf.value, taID.value)
-    }
-    taskContext
-  }
-
-  private def setIDs(jobId: Int, splitId: Int, attemptId: Int) {
-    jobID = jobId
-    splitID = splitId
-    attemptID = attemptId
-
-    jID = new SerializableWritable[JobID](SparkHadoopWriter.createJobID(now, jobId))
-    taID = new SerializableWritable[TaskAttemptID](
-      new TaskAttemptID(new TaskID(jID.value, true, splitID), attemptID))
-  }
-
-  private def setConfParams() {
-    conf.value.set("mapred.job.id", jID.value.toString)
-    conf.value.set("mapred.tip.id", taID.value.getTaskID.toString)
-    conf.value.set("mapred.task.id", taID.value.toString)
-    conf.value.setBoolean("mapred.task.is.map", true)
-    conf.value.setInt("mapred.task.partition", splitID)
-  }
-}
-
-private[hive] object SparkHiveHadoopWriter {
-  def createPathFromString(path: String, conf: JobConf): Path = {
-    if (path == null) {
-      throw new IllegalArgumentException("Output path is null")
-    }
-    val outputPath = new Path(path)
-    val fs = outputPath.getFileSystem(conf)
-    if (outputPath == null || fs == null) {
-      throw new IllegalArgumentException("Incorrectly formatted output path")
-    }
-    outputPath.makeQualified(fs)
-  }
-}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
new file mode 100644
index 0000000000000..430ffb29989ea
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import scala.language.implicitConversions
+
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.{AbstractSparkSQLParser, SqlLexical}
+
+/**
+ * A parser that recognizes all HiveQL constructs together with Spark SQL specific extensions.
+ */
+private[hive] class ExtendedHiveQlParser extends AbstractSparkSQLParser {
+  protected implicit def asParser(k: Keyword): Parser[String] =
+    lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
+
+  protected val ADD  = Keyword("ADD")
+  protected val DFS  = Keyword("DFS")
+  protected val FILE = Keyword("FILE")
+  protected val JAR  = Keyword("JAR")
+
+  private val reservedWords =
+    this
+      .getClass
+      .getMethods
+      .filter(_.getReturnType == classOf[Keyword])
+      .map(_.invoke(this).asInstanceOf[Keyword].str)
+
+  override val lexical = new SqlLexical(reservedWords)
+
+  protected lazy val start: Parser[LogicalPlan] = dfs | addJar | addFile | hiveQl
+
+  protected lazy val hiveQl: Parser[LogicalPlan] =
+    restInput ^^ {
+      case statement => HiveQl.createPlan(statement.trim)
+    }
+
+  protected lazy val dfs: Parser[LogicalPlan] =
+    DFS ~> wholeInput ^^ {
+      case command => NativeCommand(command.trim)
+    }
+
+  private lazy val addFile: Parser[LogicalPlan] =
+    ADD ~ FILE ~> restInput ^^ {
+      case input => AddFile(input.trim)
+    }
+
+  private lazy val addJar: Parser[LogicalPlan] =
+    ADD ~ JAR ~> restInput ^^ {
+      case input => AddJar(input.trim)
+    }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 7aedfcd74189b..e88afaaf001c0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -18,32 +18,47 @@
 package org.apache.spark.sql.hive
 
 import java.io.{BufferedReader, File, InputStreamReader, PrintStream}
+import java.sql.{Date, Timestamp}
 import java.util.{ArrayList => JArrayList}
 
+import org.apache.hadoop.hive.common.`type`.HiveDecimal
+import org.apache.spark.sql.catalyst.types.DecimalType
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+
 import scala.collection.JavaConversions._
 import scala.language.implicitConversions
-import scala.reflect.runtime.universe.TypeTag
+import scala.reflect.runtime.universe.{TypeTag, typeTag}
 
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.ql.Driver
+import org.apache.hadoop.hive.ql.metadata.Table
 import org.apache.hadoop.hive.ql.processors._
 import org.apache.hadoop.hive.ql.session.SessionState
+import org.apache.hadoop.hive.serde2.io.TimestampWritable
+import org.apache.hadoop.hive.serde2.io.DateWritable
 
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, OverrideCatalog}
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, EliminateAnalysisOperators}
+import org.apache.spark.sql.catalyst.analysis.{OverrideCatalog, OverrideFunctionRegistry}
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.execution.ExtractPythonUdfs
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.execution.{Command => PhysicalCommand}
 import org.apache.spark.sql.hive.execution.DescribeHiveTableCommand
+import org.apache.spark.sql.sources.DataSourceStrategy
 
 /**
- * Starts up an instance of hive where metadata is stored locally. An in-process metadata data is
- * created with data stored in ./metadata.  Warehouse data is stored in in ./warehouse.
+ * DEPRECATED: Use HiveContext instead.
  */
+@deprecated("""
+  Use HiveContext instead.  It will still create a local metastore if one is not specified.
+  However, note that the default directory is ./metastore_db, not ./metastore
+  """, "1.1")
 class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   lazy val metastorePath = new File("metastore").getCanonicalPath
@@ -51,9 +66,9 @@ class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   /** Sets up the system initially or after a RESET command */
   protected def configure() {
-    set("javax.jdo.option.ConnectionURL",
+    setConf("javax.jdo.option.ConnectionURL",
       s"jdbc:derby:;databaseName=$metastorePath;create=true")
-    set("hive.metastore.warehouse.dir", warehousePath)
+    setConf("hive.metastore.warehouse.dir", warehousePath)
   }
 
   configure() // Must be called before initializing the catalog below.
@@ -66,15 +81,37 @@ class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
 class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   self =>
 
-  override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
-    new this.QueryExecution { val logical = plan }
+  // Change the default SQL dialect to HiveQL
+  override private[spark] def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
 
   /**
-   * Executes a query expressed in HiveQL using Spark, returning the result as a SchemaRDD.
+   * When true, enables an experimental feature where metastore tables that use the parquet SerDe
+   * are automatically converted to use the Spark SQL parquet table scan, instead of the Hive
+   * SerDe.
    */
+  private[spark] def convertMetastoreParquet: Boolean =
+    getConf("spark.sql.hive.convertMetastoreParquet", "true") == "true"
+
+  override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
+    new this.QueryExecution { val logical = plan }
+
+  override def sql(sqlText: String): SchemaRDD = {
+    // TODO: Create a framework for registering parsers instead of just hardcoding if statements.
+    if (dialect == "sql") {
+      super.sql(sqlText)
+    } else if (dialect == "hiveql") {
+      new SchemaRDD(this, ddlParser(sqlText).getOrElse(HiveQl.parseSql(sqlText)))
+    }  else {
+      sys.error(s"Unsupported SQL dialect: $dialect.  Try 'sql' or 'hiveql'")
+    }
+  }
+
+  @deprecated("hiveql() is deprecated as the sql function now parses using HiveQL by default. " +
+             s"The SQL dialect for parsing can be set using ${SQLConf.DIALECT}", "1.1")
   def hiveql(hqlQuery: String): SchemaRDD = new SchemaRDD(this, HiveQl.parseSql(hqlQuery))
 
-  /** An alias for `hiveql`. */
+  @deprecated("hql() is deprecated as the sql function now parses using HiveQL by default. " +
+             s"The SQL dialect for parsing can be set using ${SQLConf.DIALECT}", "1.1")
   def hql(hqlQuery: String): SchemaRDD = hiveql(hqlQuery)
 
   /**
@@ -88,9 +125,83 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     catalog.createTable("default", tableName, ScalaReflection.attributesFor[A], allowExisting)
   }
 
+  /**
+   * Analyzes the given table in the current database to generate statistics, which will be
+   * used in query optimizations.
+   *
+   * Right now, it only supports Hive tables and it only updates the size of a Hive table
+   * in the Hive metastore.
+   */
+  def analyze(tableName: String) {
+    val relation = EliminateAnalysisOperators(catalog.lookupRelation(None, tableName))
+
+    relation match {
+      case relation: MetastoreRelation => {
+        // This method is mainly based on
+        // org.apache.hadoop.hive.ql.stats.StatsUtils.getFileSizeForTable(HiveConf, Table)
+        // in Hive 0.13 (except that we do not use fs.getContentSummary).
+        // TODO: Generalize statistics collection.
+        // TODO: Why fs.getContentSummary returns wrong size on Jenkins?
+        // Can we use fs.getContentSummary in future?
+        // Seems fs.getContentSummary returns wrong table size on Jenkins. So we use
+        // countFileSize to count the table size.
+        def calculateTableSize(fs: FileSystem, path: Path): Long = {
+          val fileStatus = fs.getFileStatus(path)
+          val size = if (fileStatus.isDir) {
+            fs.listStatus(path).map(status => calculateTableSize(fs, status.getPath)).sum
+          } else {
+            fileStatus.getLen
+          }
+
+          size
+        }
+
+        def getFileSizeForTable(conf: HiveConf, table: Table): Long = {
+          val path = table.getPath()
+          var size: Long = 0L
+          try {
+            val fs = path.getFileSystem(conf)
+            size = calculateTableSize(fs, path)
+          } catch {
+            case e: Exception =>
+              logWarning(
+                s"Failed to get the size of table ${table.getTableName} in the " +
+                s"database ${table.getDbName} because of ${e.toString}", e)
+              size = 0L
+          }
+
+          size
+        }
+
+        val tableParameters = relation.hiveQlTable.getParameters
+        val oldTotalSize =
+          Option(tableParameters.get(HiveShim.getStatsSetupConstTotalSize))
+            .map(_.toLong)
+            .getOrElse(0L)
+        val newTotalSize = getFileSizeForTable(hiveconf, relation.hiveQlTable)
+        // Update the Hive metastore if the total size of the table is different than the size
+        // recorded in the Hive metastore.
+        // This logic is based on org.apache.hadoop.hive.ql.exec.StatsTask.aggregateStats().
+        if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
+          tableParameters.put(HiveShim.getStatsSetupConstTotalSize, newTotalSize.toString)
+          val hiveTTable = relation.hiveQlTable.getTTable
+          hiveTTable.setParameters(tableParameters)
+          val tableFullName =
+            relation.hiveQlTable.getDbName() + "." + relation.hiveQlTable.getTableName()
+
+          catalog.client.alterTable(tableFullName, new Table(hiveTTable))
+        }
+      }
+      case otherRelation =>
+        throw new NotImplementedError(
+          s"Analyze has only implemented for Hive tables, " +
+            s"but ${tableName} is a ${otherRelation.nodeName}")
+    }
+  }
+
   // Circular buffer to hold what hive prints to STDOUT and ERR.  Only printed when failures occur.
   @transient
-  protected val outputBuffer =  new java.io.OutputStream {
+  protected lazy val outputBuffer =  new java.io.OutputStream {
     var pos: Int = 0
     var buffer = new Array[Int](10240)
     def write(i: Int): Unit = {
@@ -118,55 +229,66 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   }
 
   /**
-   * SQLConf and HiveConf contracts: when the hive session is first initialized, params in
-   * HiveConf will get picked up by the SQLConf.  Additionally, any properties set by
-   * set() or a SET command inside hql() or sql() will be set in the SQLConf *as well as*
-   * in the HiveConf.
+   * SQLConf and HiveConf contracts:
+   *
+   * 1. reuse existing started SessionState if any
+   * 2. when the Hive session is first initialized, params in HiveConf will get picked up by the
+   *    SQLConf.  Additionally, any properties set by set() or a SET command inside sql() will be
+   *    set in the SQLConf *as well as* in the HiveConf.
    */
-  @transient protected[hive] lazy val hiveconf = new HiveConf(classOf[SessionState])
-  @transient protected[hive] lazy val sessionState = {
-    val ss = new SessionState(hiveconf)
-    set(hiveconf.getAllProperties)  // Have SQLConf pick up the initial set of HiveConf.
-    ss
-  }
-
-  sessionState.err = new PrintStream(outputBuffer, true, "UTF-8")
-  sessionState.out = new PrintStream(outputBuffer, true, "UTF-8")
+  @transient protected[hive] lazy val (hiveconf, sessionState) =
+    Option(SessionState.get())
+      .orElse {
+        val newState = new SessionState(new HiveConf(classOf[SessionState]))
+        // Only starts newly created `SessionState` instance.  Any existing `SessionState` instance
+        // returned by `SessionState.get()` must be the most recently started one.
+        SessionState.start(newState)
+        Some(newState)
+      }
+      .map { state =>
+        setConf(state.getConf.getAllProperties)
+        if (state.out == null) state.out = new PrintStream(outputBuffer, true, "UTF-8")
+        if (state.err == null) state.err = new PrintStream(outputBuffer, true, "UTF-8")
+        (state.getConf, state)
+      }
+      .get
 
-  override def set(key: String, value: String): Unit = {
-    super.set(key, value)
+  override def setConf(key: String, value: String): Unit = {
+    super.setConf(key, value)
     runSqlHive(s"SET $key=$value")
   }
 
   /* A catalyst metadata catalog that points to the Hive Metastore. */
   @transient
-  override protected[sql] lazy val catalog = new HiveMetastoreCatalog(this) with OverrideCatalog {
-    override def lookupRelation(
-      databaseName: Option[String],
-      tableName: String,
-      alias: Option[String] = None): LogicalPlan = {
+  override protected[sql] lazy val catalog = new HiveMetastoreCatalog(this) with OverrideCatalog
 
-      LowerCaseSchema(super.lookupRelation(databaseName, tableName, alias))
-    }
-  }
+  // Note that HiveUDFs will be overridden by functions registered in this context.
+  @transient
+  override protected[sql] lazy val functionRegistry =
+    new HiveFunctionRegistry with OverrideFunctionRegistry
 
   /* An analyzer that uses the Hive metastore. */
   @transient
   override protected[sql] lazy val analyzer =
-    new Analyzer(catalog, HiveFunctionRegistry, caseSensitive = false)
+    new Analyzer(catalog, functionRegistry, caseSensitive = false) {
+      override val extendedRules =
+        catalog.CreateTables ::
+        catalog.PreInsertionCasts ::
+        ExtractPythonUdfs ::
+        Nil
+    }
 
   /**
    * Runs the specified SQL query using Hive.
    */
   protected[sql] def runSqlHive(sql: String): Seq[String] = {
     val maxResults = 100000
-    val results = runHive(sql, 100000)
+    val results = runHive(sql, maxResults)
     // It is very confusing when you only get back some of the results...
     if (results.size == maxResults) sys.error("RESULTS POSSIBLY TRUNCATED")
     results
   }
 
-  SessionState.start(sessionState)
 
   /**
    * Execute the command using Hive and return the results as a sequence. Each element
@@ -177,32 +299,38 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
       val cmd_trimmed: String = cmd.trim()
       val tokens: Array[String] = cmd_trimmed.split("\\s+")
       val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim()
-      val proc: CommandProcessor = CommandProcessorFactory.get(tokens(0), hiveconf)
-
-      SessionState.start(sessionState)
+      val proc: CommandProcessor = HiveShim.getCommandProcessor(Array(tokens(0)), hiveconf)
+
+      // Makes sure the session represented by the `sessionState` field is activated. This implies
+      // Spark SQL Hive support uses a single `SessionState` for all Hive operations and breaks
+      // session isolation under multi-user scenarios (i.e. HiveThriftServer2).
+      // TODO Fix session isolation
+      if (SessionState.get() != sessionState) {
+        SessionState.start(sessionState)
+      }
 
       proc match {
         case driver: Driver =>
-          driver.init()
-
-          val results = new JArrayList[String]
+          val results = HiveShim.createDriverResultsArray
           val response: CommandProcessorResponse = driver.run(cmd)
           // Throw an exception if there is an error in query processing.
           if (response.getResponseCode != 0) {
-            driver.destroy()
+            driver.close()
             throw new QueryExecutionException(response.getErrorMessage)
           }
           driver.setMaxRows(maxRows)
           driver.getResults(results)
-          driver.destroy()
-          results
+          driver.close()
+          HiveShim.processResults(results)
         case _ =>
-          sessionState.out.println(tokens(0) + " " + cmd_1)
+          if (sessionState.out != null) {
+            sessionState.out.println(tokens(0) + " " + cmd_1)
+          }
           Seq(proc.run(cmd_1).getResponseCode.toString)
       }
     } catch {
       case e: Exception =>
-        logger.error(
+        logError(
           s"""
             |======================
             |HIVE FAILURE OUTPUT
@@ -220,16 +348,18 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   val hivePlanner = new SparkPlanner with HiveStrategies {
     val hiveContext = self
 
-    override val strategies: Seq[Strategy] = Seq(
+    override val strategies: Seq[Strategy] = extraStrategies ++ Seq(
+      DataSourceStrategy,
       CommandStrategy(self),
       HiveCommandStrategy(self),
       TakeOrdered,
       ParquetOperations,
       InMemoryScans,
+      ParquetConversion, // Must be before HiveTableScans
       HiveTableScans,
       DataSinks,
       Scripts,
-      PartialAggregation,
+      HashAggregation,
       LeftSemiJoin,
       HashJoin,
       BasicOperators,
@@ -243,29 +373,29 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
 
   /** Extends QueryExecution with hive specific features. */
   protected[sql] abstract class QueryExecution extends super.QueryExecution {
-    // TODO: Create mixin for the analyzer instead of overriding things here.
-    override lazy val optimizedPlan =
-      optimizer(catalog.PreInsertionCasts(catalog.CreateTables(analyzed)))
-
-    override lazy val toRdd: RDD[Row] = executedPlan.execute().map(_.copy())
 
     protected val primitiveTypes =
       Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
-        ShortType, DecimalType, TimestampType)
+        ShortType, DateType, TimestampType, BinaryType)
 
-    protected def toHiveString(a: (Any, DataType)): String = a match {
+    protected[sql] def toHiveString(a: (Any, DataType)): String = a match {
       case (struct: Row, StructType(fields)) =>
         struct.zip(fields).map {
           case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
         }.mkString("{", ",", "}")
-      case (seq: Seq[_], ArrayType(typ)) =>
+      case (seq: Seq[_], ArrayType(typ, _)) =>
         seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-      case (map: Map[_,_], MapType(kType, vType)) =>
+      case (map: Map[_,_], MapType(kType, vType, _)) =>
         map.map {
           case (key, value) =>
             toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
         }.toSeq.sorted.mkString("{", ",", "}")
       case (null, _) => "NULL"
+      case (d: Date, DateType) => new DateWritable(d).toString
+      case (t: Timestamp, TimestampType) => new TimestampWritable(t).toString
+      case (bin: Array[Byte], BinaryType) => new String(bin, "UTF-8")
+      case (decimal: Decimal, DecimalType()) =>  // Hive strips trailing zeros so use its toString
+        HiveShim.createDecimal(decimal.toBigDecimal.underlying()).toString
       case (other, tpe) if primitiveTypes contains tpe => other.toString
     }
 
@@ -275,15 +405,16 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         struct.zip(fields).map {
           case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
         }.mkString("{", ",", "}")
-      case (seq: Seq[_], ArrayType(typ)) =>
+      case (seq: Seq[_], ArrayType(typ, _)) =>
         seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-      case (map: Map[_,_], MapType(kType, vType)) =>
+      case (map: Map[_, _], MapType(kType, vType, _)) =>
         map.map {
           case (key, value) =>
             toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
         }.toSeq.sorted.mkString("{", ",", "}")
       case (null, _) => "null"
       case (s: String, StringType) => "\"" + s + "\""
+      case (decimal, DecimalType()) => decimal.toString
       case (other, tpe) if primitiveTypes contains tpe => other.toString
     }
 
@@ -297,10 +428,10 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         // be similar with Hive.
         describeHiveTableCommand.hiveString
       case command: PhysicalCommand =>
-        command.sideEffectResult.map(_.toString)
+        command.executeCollect().map(_.head.toString)
 
       case other =>
-        val result: Seq[Seq[Any]] = toRdd.collect().toSeq
+        val result: Seq[Seq[Any]] = toRdd.map(_.copy()).collect().toSeq
         // We need the types so we can output struct field names
         val types = analyzed.output.map(_.dataType)
         // Reformat to match hive tab delimited output.
@@ -312,7 +443,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
       logical match {
         case _: NativeCommand => "<Native command: executed by Hive>"
         case _: SetCommand => "<SET command: executed by Hive, and noted by SQLContext>"
-        case _ => executedPlan.toString
+        case _ => super.simpleString
       }
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
new file mode 100644
index 0000000000000..7e76aff642bb5
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -0,0 +1,369 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar}
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory
+import org.apache.hadoop.hive.serde2.objectinspector._
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector._
+import org.apache.hadoop.hive.serde2.objectinspector.primitive._
+import org.apache.hadoop.hive.serde2.{io => hiveIo}
+import org.apache.hadoop.{io => hadoopIo}
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.types
+import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+
+/* Implicit conversions */
+import scala.collection.JavaConversions._
+
+private[hive] trait HiveInspectors {
+
+  def javaClassToDataType(clz: Class[_]): DataType = clz match {
+    // writable
+    case c: Class[_] if c == classOf[hadoopIo.DoubleWritable] => DoubleType
+    case c: Class[_] if c == classOf[hiveIo.DoubleWritable] => DoubleType
+    case c: Class[_] if c == classOf[hiveIo.HiveDecimalWritable] => DecimalType.Unlimited
+    case c: Class[_] if c == classOf[hiveIo.ByteWritable] => ByteType
+    case c: Class[_] if c == classOf[hiveIo.ShortWritable] => ShortType
+    case c: Class[_] if c == classOf[hiveIo.DateWritable] => DateType
+    case c: Class[_] if c == classOf[hiveIo.TimestampWritable] => TimestampType
+    case c: Class[_] if c == classOf[hadoopIo.Text] => StringType
+    case c: Class[_] if c == classOf[hadoopIo.IntWritable] => IntegerType
+    case c: Class[_] if c == classOf[hadoopIo.LongWritable] => LongType
+    case c: Class[_] if c == classOf[hadoopIo.FloatWritable] => FloatType
+    case c: Class[_] if c == classOf[hadoopIo.BooleanWritable] => BooleanType
+    case c: Class[_] if c == classOf[hadoopIo.BytesWritable] => BinaryType
+
+    // java class
+    case c: Class[_] if c == classOf[java.lang.String] => StringType
+    case c: Class[_] if c == classOf[java.sql.Date] => DateType
+    case c: Class[_] if c == classOf[java.sql.Timestamp] => TimestampType
+    case c: Class[_] if c == classOf[HiveDecimal] => DecimalType.Unlimited
+    case c: Class[_] if c == classOf[java.math.BigDecimal] => DecimalType.Unlimited
+    case c: Class[_] if c == classOf[Array[Byte]] => BinaryType
+    case c: Class[_] if c == classOf[java.lang.Short] => ShortType
+    case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
+    case c: Class[_] if c == classOf[java.lang.Long] => LongType
+    case c: Class[_] if c == classOf[java.lang.Double] => DoubleType
+    case c: Class[_] if c == classOf[java.lang.Byte] => ByteType
+    case c: Class[_] if c == classOf[java.lang.Float] => FloatType
+    case c: Class[_] if c == classOf[java.lang.Boolean] => BooleanType
+
+    // primitive type
+    case c: Class[_] if c == java.lang.Short.TYPE => ShortType
+    case c: Class[_] if c == java.lang.Integer.TYPE => IntegerType
+    case c: Class[_] if c == java.lang.Long.TYPE => LongType
+    case c: Class[_] if c == java.lang.Double.TYPE => DoubleType
+    case c: Class[_] if c == java.lang.Byte.TYPE => ByteType
+    case c: Class[_] if c == java.lang.Float.TYPE => FloatType
+    case c: Class[_] if c == java.lang.Boolean.TYPE => BooleanType
+
+    case c: Class[_] if c.isArray => ArrayType(javaClassToDataType(c.getComponentType))
+
+    // Hive seems to return this for struct types?
+    case c: Class[_] if c == classOf[java.lang.Object] => NullType
+  }
+
+  /**
+   * Converts hive types to native catalyst types.
+   * @param data the data in Hive type
+   * @param oi   the ObjectInspector associated with the Hive Type
+   * @return     convert the data into catalyst type
+   */
+  def unwrap(data: Any, oi: ObjectInspector): Any = oi match {
+    case _ if data == null => null
+    case hvoi: HiveVarcharObjectInspector =>
+      if (data == null) null else hvoi.getPrimitiveJavaObject(data).getValue
+    case hdoi: HiveDecimalObjectInspector =>
+      if (data == null) null else HiveShim.toCatalystDecimal(hdoi, data)
+    // org.apache.hadoop.hive.serde2.io.TimestampWritable.set will reset current time object
+    // if next timestamp is null, so Timestamp object is cloned
+    case ti: TimestampObjectInspector => ti.getPrimitiveJavaObject(data).clone()
+    case pi: PrimitiveObjectInspector => pi.getPrimitiveJavaObject(data)
+    case li: ListObjectInspector =>
+      Option(li.getList(data))
+        .map(_.map(unwrap(_, li.getListElementObjectInspector)).toSeq)
+        .orNull
+    case mi: MapObjectInspector =>
+      Option(mi.getMap(data)).map(
+        _.map {
+          case (k,v) =>
+            (unwrap(k, mi.getMapKeyObjectInspector),
+              unwrap(v, mi.getMapValueObjectInspector))
+        }.toMap).orNull
+    case si: StructObjectInspector =>
+      val allRefs = si.getAllStructFieldRefs
+      new GenericRow(
+        allRefs.map(r =>
+          unwrap(si.getStructFieldData(data,r), r.getFieldObjectInspector)).toArray)
+  }
+
+
+  /**
+   * Wraps with Hive types based on object inspector.
+   * TODO: Consolidate all hive OI/data interface code.
+   */
+  protected def wrapperFor(oi: ObjectInspector): Any => Any = oi match {
+    case _: JavaHiveVarcharObjectInspector =>
+      (o: Any) => new HiveVarchar(o.asInstanceOf[String], o.asInstanceOf[String].size)
+
+    case _: JavaHiveDecimalObjectInspector =>
+      (o: Any) => HiveShim.createDecimal(o.asInstanceOf[Decimal].toBigDecimal.underlying())
+
+    case soi: StandardStructObjectInspector =>
+      val wrappers = soi.getAllStructFieldRefs.map(ref => wrapperFor(ref.getFieldObjectInspector))
+      (o: Any) => {
+        val struct = soi.create()
+        (soi.getAllStructFieldRefs, wrappers, o.asInstanceOf[Row]).zipped.foreach {
+          (field, wrapper, data) => soi.setStructFieldData(struct, field, wrapper(data))
+        }
+        struct
+      }
+
+    case loi: ListObjectInspector =>
+      val wrapper = wrapperFor(loi.getListElementObjectInspector)
+      (o: Any) => seqAsJavaList(o.asInstanceOf[Seq[_]].map(wrapper))
+
+    case moi: MapObjectInspector =>
+      // The Predef.Map is scala.collection.immutable.Map.
+      // Since the map values can be mutable, we explicitly import scala.collection.Map at here.
+      import scala.collection.Map
+
+      val keyWrapper = wrapperFor(moi.getMapKeyObjectInspector)
+      val valueWrapper = wrapperFor(moi.getMapValueObjectInspector)
+      (o: Any) => mapAsJavaMap(o.asInstanceOf[Map[_, _]].map { case (key, value) =>
+        keyWrapper(key) -> valueWrapper(value)
+      })
+
+    case _ =>
+      identity[Any]
+  }
+
+  /**
+   * Converts native catalyst types to the types expected by Hive
+   * @param a the value to be wrapped
+   * @param oi This ObjectInspector associated with the value returned by this function, and
+   *           the ObjectInspector should also be consistent with those returned from
+   *           toInspector: DataType => ObjectInspector and
+   *           toInspector: Expression => ObjectInspector
+   */
+  def wrap(a: Any, oi: ObjectInspector): AnyRef = if (a == null) {
+    null
+  } else {
+    oi match {
+      case x: ConstantObjectInspector => x.getWritableConstantValue
+      case x: PrimitiveObjectInspector => a match {
+        // TODO what if x.preferWritable() == true? reuse the writable?
+        case s: String => s: java.lang.String
+        case i: Int => i: java.lang.Integer
+        case b: Boolean => b: java.lang.Boolean
+        case f: Float => f: java.lang.Float
+        case d: Double => d: java.lang.Double
+        case l: Long => l: java.lang.Long
+        case l: Short => l: java.lang.Short
+        case l: Byte => l: java.lang.Byte
+        case b: BigDecimal => HiveShim.createDecimal(b.underlying())
+        case d: Decimal => HiveShim.createDecimal(d.toBigDecimal.underlying())
+        case b: Array[Byte] => b
+        case d: java.sql.Date => d
+        case t: java.sql.Timestamp => t
+      }
+      case x: StructObjectInspector =>
+        val fieldRefs = x.getAllStructFieldRefs
+        val row = a.asInstanceOf[Seq[_]]
+        val result = new java.util.ArrayList[AnyRef](fieldRefs.length)
+        var i = 0
+        while (i < fieldRefs.length) {
+          result.add(wrap(row(i), fieldRefs.get(i).getFieldObjectInspector))
+          i += 1
+        }
+
+        result
+      case x: ListObjectInspector =>
+        val list = new java.util.ArrayList[Object]
+        a.asInstanceOf[Seq[_]].foreach {
+          v => list.add(wrap(v, x.getListElementObjectInspector))
+        }
+        list
+      case x: MapObjectInspector =>
+        // Some UDFs seem to assume we pass in a HashMap.
+        val hashMap = new java.util.HashMap[AnyRef, AnyRef]()
+        hashMap.putAll(a.asInstanceOf[Map[_, _]].map {
+          case (k, v) =>
+            wrap(k, x.getMapKeyObjectInspector) -> wrap(v, x.getMapValueObjectInspector)
+        })
+
+        hashMap
+    }
+  }
+
+  def wrap(
+      row: Seq[Any],
+      inspectors: Seq[ObjectInspector],
+      cache: Array[AnyRef]): Array[AnyRef] = {
+    var i = 0
+    while (i < inspectors.length) {
+      cache(i) = wrap(row(i), inspectors(i))
+      i += 1
+    }
+    cache
+  }
+
+  def toInspector(dataType: DataType): ObjectInspector = dataType match {
+    case ArrayType(tpe, _) =>
+      ObjectInspectorFactory.getStandardListObjectInspector(toInspector(tpe))
+    case MapType(keyType, valueType, _) =>
+      ObjectInspectorFactory.getStandardMapObjectInspector(
+        toInspector(keyType), toInspector(valueType))
+    case StringType => PrimitiveObjectInspectorFactory.javaStringObjectInspector
+    case IntegerType => PrimitiveObjectInspectorFactory.javaIntObjectInspector
+    case DoubleType => PrimitiveObjectInspectorFactory.javaDoubleObjectInspector
+    case BooleanType => PrimitiveObjectInspectorFactory.javaBooleanObjectInspector
+    case LongType => PrimitiveObjectInspectorFactory.javaLongObjectInspector
+    case FloatType => PrimitiveObjectInspectorFactory.javaFloatObjectInspector
+    case ShortType => PrimitiveObjectInspectorFactory.javaShortObjectInspector
+    case ByteType => PrimitiveObjectInspectorFactory.javaByteObjectInspector
+    case NullType => PrimitiveObjectInspectorFactory.javaVoidObjectInspector
+    case BinaryType => PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector
+    case DateType => PrimitiveObjectInspectorFactory.javaDateObjectInspector
+    case TimestampType => PrimitiveObjectInspectorFactory.javaTimestampObjectInspector
+    case DecimalType() => PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector
+    case StructType(fields) =>
+      ObjectInspectorFactory.getStandardStructObjectInspector(
+        fields.map(f => f.name), fields.map(f => toInspector(f.dataType)))
+  }
+
+  def toInspector(expr: Expression): ObjectInspector = expr match {
+    case Literal(value, StringType) =>
+      HiveShim.getStringWritableConstantObjectInspector(value)
+    case Literal(value, IntegerType) =>
+      HiveShim.getIntWritableConstantObjectInspector(value)
+    case Literal(value, DoubleType) =>
+      HiveShim.getDoubleWritableConstantObjectInspector(value)
+    case Literal(value, BooleanType) =>
+      HiveShim.getBooleanWritableConstantObjectInspector(value)
+    case Literal(value, LongType) =>
+      HiveShim.getLongWritableConstantObjectInspector(value)
+    case Literal(value, FloatType) =>
+      HiveShim.getFloatWritableConstantObjectInspector(value)
+    case Literal(value, ShortType) =>
+      HiveShim.getShortWritableConstantObjectInspector(value)
+    case Literal(value, ByteType) =>
+      HiveShim.getByteWritableConstantObjectInspector(value)
+    case Literal(value, BinaryType) =>
+      HiveShim.getBinaryWritableConstantObjectInspector(value)
+    case Literal(value, DateType) =>
+      HiveShim.getDateWritableConstantObjectInspector(value)
+    case Literal(value, TimestampType) =>
+      HiveShim.getTimestampWritableConstantObjectInspector(value)
+    case Literal(value, DecimalType()) =>
+      HiveShim.getDecimalWritableConstantObjectInspector(value)
+    case Literal(_, NullType) =>
+      HiveShim.getPrimitiveNullWritableConstantObjectInspector
+    case Literal(value, ArrayType(dt, _)) =>
+      val listObjectInspector = toInspector(dt)
+      if (value == null) {
+        ObjectInspectorFactory.getStandardConstantListObjectInspector(listObjectInspector, null)
+      } else {
+        val list = new java.util.ArrayList[Object]()
+        value.asInstanceOf[Seq[_]].foreach(v => list.add(wrap(v, listObjectInspector)))
+        ObjectInspectorFactory.getStandardConstantListObjectInspector(listObjectInspector, list)
+      }
+    case Literal(value, MapType(keyType, valueType, _)) =>
+      val keyOI = toInspector(keyType)
+      val valueOI = toInspector(valueType)
+      if (value == null) {
+        ObjectInspectorFactory.getStandardConstantMapObjectInspector(keyOI, valueOI, null)
+      } else {
+        val map = new java.util.HashMap[Object, Object]()
+        value.asInstanceOf[Map[_, _]].foreach (entry => {
+          map.put(wrap(entry._1, keyOI), wrap(entry._2, valueOI))
+        })
+        ObjectInspectorFactory.getStandardConstantMapObjectInspector(keyOI, valueOI, map)
+      }
+    case _ => toInspector(expr.dataType)
+  }
+
+  def inspectorToDataType(inspector: ObjectInspector): DataType = inspector match {
+    case s: StructObjectInspector =>
+      StructType(s.getAllStructFieldRefs.map(f => {
+        types.StructField(
+          f.getFieldName, inspectorToDataType(f.getFieldObjectInspector), nullable = true)
+      }))
+    case l: ListObjectInspector => ArrayType(inspectorToDataType(l.getListElementObjectInspector))
+    case m: MapObjectInspector =>
+      MapType(
+        inspectorToDataType(m.getMapKeyObjectInspector),
+        inspectorToDataType(m.getMapValueObjectInspector))
+    case _: WritableStringObjectInspector => StringType
+    case _: JavaStringObjectInspector => StringType
+    case _: WritableIntObjectInspector => IntegerType
+    case _: JavaIntObjectInspector => IntegerType
+    case _: WritableDoubleObjectInspector => DoubleType
+    case _: JavaDoubleObjectInspector => DoubleType
+    case _: WritableBooleanObjectInspector => BooleanType
+    case _: JavaBooleanObjectInspector => BooleanType
+    case _: WritableLongObjectInspector => LongType
+    case _: JavaLongObjectInspector => LongType
+    case _: WritableShortObjectInspector => ShortType
+    case _: JavaShortObjectInspector => ShortType
+    case _: WritableByteObjectInspector => ByteType
+    case _: JavaByteObjectInspector => ByteType
+    case _: WritableFloatObjectInspector => FloatType
+    case _: JavaFloatObjectInspector => FloatType
+    case _: WritableBinaryObjectInspector => BinaryType
+    case _: JavaBinaryObjectInspector => BinaryType
+    case w: WritableHiveDecimalObjectInspector => HiveShim.decimalTypeInfoToCatalyst(w)
+    case j: JavaHiveDecimalObjectInspector => HiveShim.decimalTypeInfoToCatalyst(j)
+    case _: WritableDateObjectInspector => DateType
+    case _: JavaDateObjectInspector => DateType
+    case _: WritableTimestampObjectInspector => TimestampType
+    case _: JavaTimestampObjectInspector => TimestampType
+    case _: WritableVoidObjectInspector => NullType
+    case _: JavaVoidObjectInspector => NullType
+  }
+
+  implicit class typeInfoConversions(dt: DataType) {
+    import org.apache.hadoop.hive.serde2.typeinfo._
+    import TypeInfoFactory._
+
+    def toTypeInfo: TypeInfo = dt match {
+      case ArrayType(elemType, _) =>
+        getListTypeInfo(elemType.toTypeInfo)
+      case StructType(fields) =>
+        getStructTypeInfo(fields.map(_.name), fields.map(_.dataType.toTypeInfo))
+      case MapType(keyType, valueType, _) =>
+        getMapTypeInfo(keyType.toTypeInfo, valueType.toTypeInfo)
+      case BinaryType => binaryTypeInfo
+      case BooleanType => booleanTypeInfo
+      case ByteType => byteTypeInfo
+      case DoubleType => doubleTypeInfo
+      case FloatType => floatTypeInfo
+      case IntegerType => intTypeInfo
+      case LongType => longTypeInfo
+      case ShortType => shortTypeInfo
+      case StringType => stringTypeInfo
+      case d: DecimalType => HiveShim.decimalTypeInfo(d)
+      case DateType => dateTypeInfo
+      case TimestampType => timestampTypeInfo
+      case NullType => voidTypeInfo
+    }
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index f83068860701f..9045fc8558276 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -17,87 +17,235 @@
 
 package org.apache.spark.sql.hive
 
+import java.io.IOException
+import java.util.{List => JList}
+
 import scala.util.parsing.combinator.RegexParsers
 
-import org.apache.hadoop.hive.metastore.api.{FieldSchema, StorageDescriptor, SerDeInfo}
+import org.apache.hadoop.util.ReflectionUtils
+
+import org.apache.hadoop.hive.metastore.TableType
+import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.hive.metastore.api.{Table => TTable, Partition => TPartition}
-import org.apache.hadoop.hive.ql.metadata.{Hive, Partition, Table}
-import org.apache.hadoop.hive.ql.plan.TableDesc
-import org.apache.hadoop.hive.ql.session.SessionState
-import org.apache.hadoop.hive.serde2.Deserializer
+import org.apache.hadoop.hive.ql.metadata.{Hive, Partition, Table, HiveException}
+import org.apache.hadoop.hive.ql.plan.CreateTableDesc
+import org.apache.hadoop.hive.serde.serdeConstants
+import org.apache.hadoop.hive.serde2.{Deserializer, SerDeException}
+import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 
+import org.apache.spark.Logging
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.sql.Logging
-import org.apache.spark.sql.catalyst.analysis.{EliminateAnalysisOperators, Catalog}
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.analysis.{Catalog, OverrideCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.columnar.InMemoryRelation
-import org.apache.spark.sql.hive.execution.HiveTableScan
+import org.apache.spark.util.Utils
 
 /* Implicit conversions */
 import scala.collection.JavaConversions._
 
 private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
-  import HiveMetastoreTypes._
+  import org.apache.spark.sql.hive.HiveMetastoreTypes._
 
-  val client = Hive.get(hive.hiveconf)
+  /** Connection to hive metastore.  Usages should lock on `this`. */
+  protected[hive] val client = Hive.get(hive.hiveconf)
 
   val caseSensitive: Boolean = false
 
+  def tableExists(db: Option[String], tableName: String): Boolean = {
+    val (databaseName, tblName) = processDatabaseAndTableName(
+      db.getOrElse(hive.sessionState.getCurrentDatabase), tableName)
+    client.getTable(databaseName, tblName, false) != null
+  }
+
   def lookupRelation(
       db: Option[String],
       tableName: String,
-      alias: Option[String]): LogicalPlan = {
-    val (dbName, tblName) = processDatabaseAndTableName(db, tableName)
-    val databaseName = dbName.getOrElse(hive.sessionState.getCurrentDatabase)
+      alias: Option[String]): LogicalPlan = synchronized {
+    val (databaseName, tblName) =
+      processDatabaseAndTableName(db.getOrElse(hive.sessionState.getCurrentDatabase), tableName)
     val table = client.getTable(databaseName, tblName)
-    val partitions: Seq[Partition] =
-      if (table.isPartitioned) {
-        client.getAllPartitionsForPruner(table).toSeq
-      } else {
-        Nil
-      }
+    if (table.isView) {
+      // if the unresolved relation is from hive view
+      // parse the text into logic node.
+      HiveQl.createPlanForView(table, alias)
+    } else {
+      val partitions: Seq[Partition] =
+        if (table.isPartitioned) {
+          HiveShim.getAllPartitionsOf(client, table).toSeq
+        } else {
+          Nil
+        }
 
-    // Since HiveQL is case insensitive for table names we make them all lowercase.
-    MetastoreRelation(
-      databaseName,
-      tblName,
-      alias)(table.getTTable, partitions.map(part => part.getTPartition))
+      // Since HiveQL is case insensitive for table names we make them all lowercase.
+      MetastoreRelation(
+        databaseName, tblName, alias)(
+          table.getTTable, partitions.map(part => part.getTPartition))(hive)
+    }
   }
 
+  /**
+   * Create table with specified database, table name, table description and schema
+   * @param databaseName Database Name
+   * @param tableName Table Name
+   * @param schema Schema of the new table, if not specified, will use the schema
+   *               specified in crtTbl
+   * @param allowExisting if true, ignore AlreadyExistsException
+   * @param desc CreateTableDesc object which contains the SerDe info. Currently
+   *               we support most of the features except the bucket.
+   */
   def createTable(
       databaseName: String,
       tableName: String,
       schema: Seq[Attribute],
-      allowExisting: Boolean = false): Unit = {
+      allowExisting: Boolean = false,
+      desc: Option[CreateTableDesc] = None) {
+    val hconf = hive.hiveconf
+
     val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
-    val table = new Table(dbName, tblName)
-    val hiveSchema =
+    val tbl = new Table(dbName, tblName)
+
+    val crtTbl: CreateTableDesc = desc.getOrElse(null)
+
+    // We should respect the passed in schema, unless it's not set
+    val hiveSchema: JList[FieldSchema] = if (schema == null || schema.isEmpty) {
+      crtTbl.getCols
+    } else {
       schema.map(attr => new FieldSchema(attr.name, toMetastoreType(attr.dataType), ""))
-    table.setFields(hiveSchema)
-
-    val sd = new StorageDescriptor()
-    table.getTTable.setSd(sd)
-    sd.setCols(hiveSchema)
-
-    // TODO: THESE ARE ALL DEFAULTS, WE NEED TO PARSE / UNDERSTAND the output specs.
-    sd.setCompressed(false)
-    sd.setParameters(Map[String, String]())
-    sd.setInputFormat("org.apache.hadoop.mapred.TextInputFormat")
-    sd.setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")
-    val serDeInfo = new SerDeInfo()
-    serDeInfo.setName(tblName)
-    serDeInfo.setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
-    serDeInfo.setParameters(Map[String, String]())
-    sd.setSerdeInfo(serDeInfo)
-
-    try client.createTable(table) catch {
-      case e: org.apache.hadoop.hive.ql.metadata.HiveException
-        if e.getCause.isInstanceOf[org.apache.hadoop.hive.metastore.api.AlreadyExistsException] &&
-           allowExisting => // Do nothing.
+    }
+    tbl.setFields(hiveSchema)
+
+    // Most of code are similar with the DDLTask.createTable() of Hive,
+    if (crtTbl != null && crtTbl.getTblProps() != null) {
+      tbl.getTTable().getParameters().putAll(crtTbl.getTblProps())
+    }
+
+    if (crtTbl != null && crtTbl.getPartCols() != null) {
+      tbl.setPartCols(crtTbl.getPartCols())
+    }
+
+    if (crtTbl != null && crtTbl.getStorageHandler() != null) {
+      tbl.setProperty(
+        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE,
+        crtTbl.getStorageHandler())
+    }
+
+    /*
+     * We use LazySimpleSerDe by default.
+     *
+     * If the user didn't specify a SerDe, and any of the columns are not simple
+     * types, we will have to use DynamicSerDe instead.
+     */
+    if (crtTbl == null || crtTbl.getSerName() == null) {
+      val storageHandler = tbl.getStorageHandler()
+      if (storageHandler == null) {
+        logInfo(s"Default to LazySimpleSerDe for table $dbName.$tblName")
+        tbl.setSerializationLib(classOf[LazySimpleSerDe].getName())
+
+        import org.apache.hadoop.mapred.TextInputFormat
+        import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+        import org.apache.hadoop.io.Text
+
+        tbl.setInputFormatClass(classOf[TextInputFormat])
+        tbl.setOutputFormatClass(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]])
+        tbl.setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
+      } else {
+        val serDeClassName = storageHandler.getSerDeClass().getName()
+        logInfo(s"Use StorageHandler-supplied $serDeClassName for table $dbName.$tblName")
+        tbl.setSerializationLib(serDeClassName)
+      }
+    } else {
+      // let's validate that the serde exists
+      val serdeName = crtTbl.getSerName()
+      try {
+        val d = ReflectionUtils.newInstance(hconf.getClassByName(serdeName), hconf)
+        if (d != null) {
+          logDebug("Found class for $serdeName")
+        }
+      } catch {
+        case e: SerDeException => throw new HiveException("Cannot validate serde: " + serdeName, e)
+      }
+      tbl.setSerializationLib(serdeName)
+    }
+
+    if (crtTbl != null && crtTbl.getFieldDelim() != null) {
+      tbl.setSerdeParam(serdeConstants.FIELD_DELIM, crtTbl.getFieldDelim())
+      tbl.setSerdeParam(serdeConstants.SERIALIZATION_FORMAT, crtTbl.getFieldDelim())
+    }
+    if (crtTbl != null && crtTbl.getFieldEscape() != null) {
+      tbl.setSerdeParam(serdeConstants.ESCAPE_CHAR, crtTbl.getFieldEscape())
+    }
+
+    if (crtTbl != null && crtTbl.getCollItemDelim() != null) {
+      tbl.setSerdeParam(serdeConstants.COLLECTION_DELIM, crtTbl.getCollItemDelim())
+    }
+    if (crtTbl != null && crtTbl.getMapKeyDelim() != null) {
+      tbl.setSerdeParam(serdeConstants.MAPKEY_DELIM, crtTbl.getMapKeyDelim())
+    }
+    if (crtTbl != null && crtTbl.getLineDelim() != null) {
+      tbl.setSerdeParam(serdeConstants.LINE_DELIM, crtTbl.getLineDelim())
+    }
+
+    if (crtTbl != null && crtTbl.getSerdeProps() != null) {
+      val iter = crtTbl.getSerdeProps().entrySet().iterator()
+      while (iter.hasNext()) {
+        val m = iter.next()
+        tbl.setSerdeParam(m.getKey(), m.getValue())
+      }
+    }
+
+    if (crtTbl != null && crtTbl.getComment() != null) {
+      tbl.setProperty("comment", crtTbl.getComment())
+    }
+
+    if (crtTbl != null && crtTbl.getLocation() != null) {
+      HiveShim.setLocation(tbl, crtTbl)
+    }
+
+    if (crtTbl != null && crtTbl.getSkewedColNames() != null) {
+      tbl.setSkewedColNames(crtTbl.getSkewedColNames())
+    }
+    if (crtTbl != null && crtTbl.getSkewedColValues() != null) {
+      tbl.setSkewedColValues(crtTbl.getSkewedColValues())
+    }
+
+    if (crtTbl != null) {
+      tbl.setStoredAsSubDirectories(crtTbl.isStoredAsSubDirectories())
+      tbl.setInputFormatClass(crtTbl.getInputFormat())
+      tbl.setOutputFormatClass(crtTbl.getOutputFormat())
+    }
+
+    tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName())
+    tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName())
+
+    if (crtTbl != null && crtTbl.isExternal()) {
+      tbl.setProperty("EXTERNAL", "TRUE")
+      tbl.setTableType(TableType.EXTERNAL_TABLE)
+    }
+
+    // set owner
+    try {
+      tbl.setOwner(hive.hiveconf.getUser)
+    } catch {
+      case e: IOException => throw new HiveException("Unable to get current user", e)
+    }
+
+    // set create time
+    tbl.setCreateTime((System.currentTimeMillis() / 1000).asInstanceOf[Int])
+
+    // TODO add bucket support
+    // TODO set more info if Hive upgrade
+
+    // create the table
+    synchronized {
+      try client.createTable(tbl, allowExisting) catch {
+        case e: org.apache.hadoop.hive.metastore.api.AlreadyExistsException
+          if allowExisting => // Do nothing
+        case e: Throwable => throw e
+      }
     }
   }
 
@@ -107,18 +255,14 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
    */
   object CreateTables extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-      case InsertIntoCreatedTable(db, tableName, child) =>
+      // Wait until children are resolved.
+      case p: LogicalPlan if !p.childrenResolved => p
+
+      case CreateTableAsSelect(db, tableName, child, allowExisting, extra) =>
         val (dbName, tblName) = processDatabaseAndTableName(db, tableName)
         val databaseName = dbName.getOrElse(hive.sessionState.getCurrentDatabase)
 
-        createTable(databaseName, tblName, child.output)
-
-        InsertIntoTable(
-          EliminateAnalysisOperators(
-            lookupRelation(Some(databaseName), tblName, None)),
-          Map.empty,
-          child,
-          overwrite = false)
+        CreateTableAsSelect(Some(databaseName), tableName, child, allowExisting, extra)
     }
   }
 
@@ -128,26 +272,26 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
    */
   object PreInsertionCasts extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan.transform {
-      // Wait until children are resolved
+      // Wait until children are resolved.
       case p: LogicalPlan if !p.childrenResolved => p
 
       case p @ InsertIntoTable(table: MetastoreRelation, _, child, _) =>
         castChildOutput(p, table, child)
-
-      case p @ logical.InsertIntoTable(
-                 InMemoryRelation(_, _,
-                   HiveTableScan(_, table, _)), _, child, _) =>
-        castChildOutput(p, table, child)
     }
 
     def castChildOutput(p: InsertIntoTable, table: MetastoreRelation, child: LogicalPlan) = {
       val childOutputDataTypes = child.output.map(_.dataType)
-      // Only check attributes, not partitionKeys since they are always strings.
-      // TODO: Fully support inserting into partitioned tables.
-      val tableOutputDataTypes = table.attributes.map(_.dataType)
+      val tableOutputDataTypes =
+        (table.attributes ++ table.partitionKeys).take(child.output.length).map(_.dataType)
 
       if (childOutputDataTypes == tableOutputDataTypes) {
         p
+      } else if (childOutputDataTypes.size == tableOutputDataTypes.size &&
+        childOutputDataTypes.zip(tableOutputDataTypes)
+          .forall { case (left, right) => DataType.equalsIgnoreNullability(left, right) }) {
+        // If both types ignoring nullability of ArrayType, MapType, StructType are the same,
+        // use InsertIntoHiveTable instead of InsertIntoTable.
+        InsertIntoHiveTable(p.table, p.partition, p.child, p.overwrite)
       } else {
         // Only do the casting when child output data types differ from table output data types.
         val castedChildOutput = child.output.zip(table.output).map {
@@ -178,6 +322,27 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
   override def unregisterAllTables() = {}
 }
 
+/**
+ * A logical plan representing insertion into Hive table.
+ * This plan ignores nullability of ArrayType, MapType, StructType unlike InsertIntoTable
+ * because Hive table doesn't have nullability for ARRAY, MAP, STRUCT types.
+ */
+private[hive] case class InsertIntoHiveTable(
+    table: LogicalPlan,
+    partition: Map[String, Option[String]],
+    child: LogicalPlan,
+    overwrite: Boolean)
+  extends LogicalPlan {
+
+  override def children = child :: Nil
+  override def output = child.output
+
+  override lazy val resolved = childrenResolved && child.output.zip(table.output).forall {
+    case (childAttr, tableAttr) =>
+      DataType.equalsIgnoreNullability(childAttr.dataType, tableAttr.dataType)
+  }
+}
+
 /**
  * :: DeveloperApi ::
  * Provides conversions between Spark SQL data types and Hive Metastore types.
@@ -194,12 +359,22 @@ object HiveMetastoreTypes extends RegexParsers {
     "bigint" ^^^ LongType |
     "binary" ^^^ BinaryType |
     "boolean" ^^^ BooleanType |
-    "decimal" ^^^ DecimalType |
+    fixedDecimalType |                     // Hive 0.13+ decimal with precision/scale
+    "decimal" ^^^ DecimalType.Unlimited |  // Hive 0.12 decimal with no precision/scale
+    "date" ^^^ DateType |
     "timestamp" ^^^ TimestampType |
     "varchar\\((\\d+)\\)".r ^^^ StringType
 
+  protected lazy val fixedDecimalType: Parser[DataType] =
+    ("decimal" ~> "(" ~> "\\d+".r) ~ ("," ~> "\\d+".r <~ ")") ^^ {
+      case precision ~ scale =>
+        DecimalType(precision.toInt, scale.toInt)
+    }
+
   protected lazy val arrayType: Parser[DataType] =
-    "array" ~> "<" ~> dataType <~ ">" ^^ ArrayType
+    "array" ~> "<" ~> dataType <~ ">" ^^ {
+      case tpe => ArrayType(tpe)
+    }
 
   protected lazy val mapType: Parser[DataType] =
     "map" ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
@@ -228,10 +403,10 @@ object HiveMetastoreTypes extends RegexParsers {
   }
 
   def toMetastoreType(dt: DataType): String = dt match {
-    case ArrayType(elementType) => s"array<${toMetastoreType(elementType)}>"
+    case ArrayType(elementType, _) => s"array<${toMetastoreType(elementType)}>"
     case StructType(fields) =>
       s"struct<${fields.map(f => s"${f.name}:${toMetastoreType(f.dataType)}").mkString(",")}>"
-    case MapType(keyType, valueType) =>
+    case MapType(keyType, valueType, _) =>
       s"map<${toMetastoreType(keyType)},${toMetastoreType(valueType)}>"
     case StringType => "string"
     case FloatType => "float"
@@ -242,29 +417,54 @@ object HiveMetastoreTypes extends RegexParsers {
     case LongType => "bigint"
     case BinaryType => "binary"
     case BooleanType => "boolean"
-    case DecimalType => "decimal"
+    case DateType => "date"
+    case d: DecimalType => HiveShim.decimalMetastoreString(d)
     case TimestampType => "timestamp"
+    case NullType => "void"
+    case udt: UserDefinedType[_] => toMetastoreType(udt.sqlType)
   }
 }
 
 private[hive] case class MetastoreRelation
     (databaseName: String, tableName: String, alias: Option[String])
     (val table: TTable, val partitions: Seq[TPartition])
-  extends BaseRelation {
+    (@transient sqlContext: SQLContext)
+  extends LeafNode {
+
+  self: Product =>
+
   // TODO: Can we use org.apache.hadoop.hive.ql.metadata.Table as the type of table and
   // use org.apache.hadoop.hive.ql.metadata.Partition as the type of elements of partitions.
   // Right now, using org.apache.hadoop.hive.ql.metadata.Table and
   // org.apache.hadoop.hive.ql.metadata.Partition will cause a NotSerializableException
   // which indicates the SerDe we used is not Serializable.
 
-  def hiveQlTable = new Table(table)
+  @transient val hiveQlTable = new Table(table)
 
-  def hiveQlPartitions = partitions.map { p =>
+  @transient val hiveQlPartitions = partitions.map { p =>
     new Partition(hiveQlTable, p)
   }
 
-  val tableDesc = new TableDesc(
-    Class.forName(hiveQlTable.getSerializationLib).asInstanceOf[Class[Deserializer]],
+  @transient override lazy val statistics = Statistics(
+    sizeInBytes = {
+      // TODO: check if this estimate is valid for tables after partition pruning.
+      // NOTE: getting `totalSize` directly from params is kind of hacky, but this should be
+      // relatively cheap if parameters for the table are populated into the metastore.  An
+      // alternative would be going through Hadoop's FileSystem API, which can be expensive if a lot
+      // of RPCs are involved.  Besides `totalSize`, there are also `numFiles`, `numRows`,
+      // `rawDataSize` keys (see StatsSetupConst in Hive) that we can look at in the future.
+      BigInt(
+        Option(hiveQlTable.getParameters.get(HiveShim.getStatsSetupConstTotalSize))
+          .map(_.toLong)
+          .getOrElse(sqlContext.defaultSizeInBytes))
+    }
+  )
+
+  val tableDesc = HiveShim.getTableDesc(
+    Class.forName(
+      hiveQlTable.getSerializationLib,
+      true,
+      Utils.getContextOrSparkClassLoader).asInstanceOf[Class[Deserializer]],
     hiveQlTable.getInputFormatClass,
     // The class of table should be org.apache.hadoop.hive.ql.metadata.Table because
     // getOutputFormatClass will use HiveFileFormatUtils.getOutputFormatSubstitute to
@@ -274,20 +474,26 @@ private[hive] case class MetastoreRelation
     hiveQlTable.getMetadata
   )
 
-   implicit class SchemaAttribute(f: FieldSchema) {
-     def toAttribute = AttributeReference(
-       f.getName,
-       HiveMetastoreTypes.toDataType(f.getType),
-       // Since data can be dumped in randomly with no validation, everything is nullable.
-       nullable = true
-     )(qualifiers = tableName +: alias.toSeq)
-   }
+  implicit class SchemaAttribute(f: FieldSchema) {
+    def toAttribute = AttributeReference(
+      f.getName,
+      HiveMetastoreTypes.toDataType(f.getType),
+      // Since data can be dumped in randomly with no validation, everything is nullable.
+      nullable = true
+    )(qualifiers = Seq(alias.getOrElse(tableName)))
+  }
 
   // Must be a stable value since new attributes are born here.
   val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute)
 
   /** Non-partitionKey attributes */
-  val attributes = table.getSd.getCols.map(_.toAttribute)
+  val attributes = hiveQlTable.getCols.map(_.toAttribute)
 
   val output = attributes ++ partitionKeys
+
+  /** An attribute map that can be used to lookup original attributes based on expression id. */
+  val attributeMap = AttributeMap(output.map(o => (o,o)))
+
+  /** An attribute map for determining the ordinal for non-partition columns. */
+  val columnOrdinals = AttributeMap(attributes.zipWithIndex)
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index b70104dd5be5a..1ca0403d6f8ce 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -17,16 +17,22 @@
 
 package org.apache.spark.sql.hive
 
+import java.sql.Date
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.Context
 import org.apache.hadoop.hive.ql.lib.Node
+import org.apache.hadoop.hive.ql.metadata.Table
 import org.apache.hadoop.hive.ql.parse._
 import org.apache.hadoop.hive.ql.plan.PlanUtils
 
+import org.apache.spark.sql.catalyst.SparkSQLParser
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
 
 /* Implicit conversions */
 import scala.collection.JavaConversions._
@@ -38,19 +44,21 @@ import scala.collection.JavaConversions._
  */
 private[hive] case object NativePlaceholder extends Command
 
-private[hive] case class ShellCommand(cmd: String) extends Command
+private[hive] case class AddFile(filePath: String) extends Command
 
-private[hive] case class SourceCommand(filePath: String) extends Command
+private[hive] case class AddJar(path: String) extends Command
 
-private[hive] case class AddJar(jarPath: String) extends Command
+private[hive] case class DropTable(tableName: String, ifExists: Boolean) extends Command
 
-private[hive] case class AddFile(filePath: String) extends Command
+private[hive] case class AnalyzeTable(tableName: String) extends Command
 
 /** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */
 private[hive] object HiveQl {
   protected val nativeCommands = Seq(
     "TOK_DESCFUNCTION",
     "TOK_DESCDATABASE",
+    "TOK_SHOW_CREATETABLE",
+    "TOK_SHOWCOLUMNS",
     "TOK_SHOW_TABLESTATUS",
     "TOK_SHOWDATABASES",
     "TOK_SHOWFUNCTIONS",
@@ -58,6 +66,7 @@ private[hive] object HiveQl {
     "TOK_SHOWINDEXES",
     "TOK_SHOWPARTITIONS",
     "TOK_SHOWTABLES",
+    "TOK_SHOW_TBLPROPERTIES",
 
     "TOK_LOCKTABLE",
     "TOK_SHOWLOCKS",
@@ -74,7 +83,6 @@ private[hive] object HiveQl {
     "TOK_CREATEFUNCTION",
     "TOK_DROPFUNCTION",
 
-    "TOK_ANALYZE",
     "TOK_ALTERDATABASE_PROPERTIES",
     "TOK_ALTERINDEX_PROPERTIES",
     "TOK_ALTERINDEX_REBUILD",
@@ -92,16 +100,13 @@ private[hive] object HiveQl {
     "TOK_ALTERTABLE_SKEWED",
     "TOK_ALTERTABLE_TOUCH",
     "TOK_ALTERTABLE_UNARCHIVE",
-    "TOK_ANALYZE",
     "TOK_CREATEDATABASE",
     "TOK_CREATEFUNCTION",
     "TOK_CREATEINDEX",
     "TOK_DROPDATABASE",
     "TOK_DROPINDEX",
-    "TOK_DROPTABLE",
     "TOK_MSCK",
 
-    // TODO(marmbrus): Figure out how view are expanded by hive, as we might need to handle this.
     "TOK_ALTERVIEW_ADDPARTS",
     "TOK_ALTERVIEW_AS",
     "TOK_ALTERVIEW_DROPPARTS",
@@ -120,9 +125,15 @@ private[hive] object HiveQl {
   // Commands that we do not need to explain.
   protected val noExplainCommands = Seq(
     "TOK_CREATETABLE",
-    "TOK_DESCTABLE"
+    "TOK_DESCTABLE",
+    "TOK_TRUNCATETABLE"     // truncate table" is a NativeCommand, does not need to explain.
   ) ++ nativeCommands
 
+  protected val hqlParser = {
+    val fallback = new ExtendedHiveQlParser
+    new SparkSQLParser(fallback(_))
+  }
+
   /**
    * A set of implicit transformations that allow Hive ASTNodes to be rewritten by transformations
    * similar to [[catalyst.trees.TreeNode]].
@@ -208,46 +219,32 @@ private[hive] object HiveQl {
   /**
    * Returns the AST for the given SQL string.
    */
-  def getAst(sql: String): ASTNode = ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql))
+  def getAst(sql: String): ASTNode = {
+    /*
+     * Context has to be passed in hive0.13.1.
+     * Otherwise, there will be Null pointer exception,
+     * when retrieving properties form HiveConf.
+     */
+    val hContext = new Context(new HiveConf())
+    val node = ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql, hContext))
+    hContext.clear()
+    node
+  }
+
 
   /** Returns a LogicalPlan for a given HiveQL string. */
-  def parseSql(sql: String): LogicalPlan = {
+  def parseSql(sql: String): LogicalPlan = hqlParser(sql)
+
+  /** Creates LogicalPlan for a given HiveQL string. */
+  def createPlan(sql: String) = {
     try {
-      if (sql.trim.toLowerCase.startsWith("set")) {
-        // Split in two parts since we treat the part before the first "="
-        // as key, and the part after as value, which may contain other "=" signs.
-        sql.trim.drop(3).split("=", 2).map(_.trim) match {
-          case Array("") => // "set"
-            SetCommand(None, None)
-          case Array(key) => // "set key"
-            SetCommand(Some(key), None)
-          case Array(key, value) => // "set key=value"
-            SetCommand(Some(key), Some(value))
-        }
-      } else if (sql.trim.toLowerCase.startsWith("cache table")) {
-        CacheCommand(sql.trim.drop(12).trim, true)
-      } else if (sql.trim.toLowerCase.startsWith("uncache table")) {
-        CacheCommand(sql.trim.drop(14).trim, false)
-      } else if (sql.trim.toLowerCase.startsWith("add jar")) {
-        AddJar(sql.trim.drop(8))
-      } else if (sql.trim.toLowerCase.startsWith("add file")) {
-        AddFile(sql.trim.drop(9))
-      } else if (sql.trim.toLowerCase.startsWith("dfs")) {
+      val tree = getAst(sql)
+      if (nativeCommands contains tree.getText) {
         NativeCommand(sql)
-      } else if (sql.trim.startsWith("source")) {
-        SourceCommand(sql.split(" ").toSeq match { case Seq("source", filePath) => filePath })
-      } else if (sql.trim.startsWith("!")) {
-        ShellCommand(sql.drop(1))
       } else {
-        val tree = getAst(sql)
-
-        if (nativeCommands contains tree.getText) {
-          NativeCommand(sql)
-        } else {
-          nodeToPlan(tree) match {
-            case NativePlaceholder => NativeCommand(sql)
-            case other => other
-          }
+        nodeToPlan(tree) match {
+          case NativePlaceholder => NativeCommand(sql)
+          case other => other
         }
       }
     } catch {
@@ -256,10 +253,20 @@ private[hive] object HiveQl {
         s"""
           |Unsupported language features in query: $sql
           |${dumpTree(getAst(sql))}
+          |$e
+          |${e.getStackTrace.head}
         """.stripMargin)
     }
   }
 
+  /** Creates LogicalPlan for a given VIEW */
+  def createPlanForView(view: Table, alias: Option[String]) = alias match {
+    // because hive use things like `_c0` to build the expanded text
+    // currently we cannot support view from "create view v1(c1) as ..."
+    case None => Subquery(view.getTableName, createPlan(view.getViewExpandedText))
+    case Some(aliasText) => Subquery(aliasText, createPlan(view.getViewExpandedText))
+  }
+
   def parseDdl(ddl: String): Seq[Attribute] = {
     val tree =
       try {
@@ -298,8 +305,11 @@ private[hive] object HiveQl {
       matches.headOption
     }
 
-    assert(remainingNodes.isEmpty,
-      s"Unhandled clauses: ${remainingNodes.map(dumpTree(_)).mkString("\n")}")
+    if (remainingNodes.nonEmpty) {
+      sys.error(
+        s"""Unhandled clauses: ${remainingNodes.map(dumpTree(_)).mkString("\n")}.
+           |You are likely trying to use an unsupported Hive feature."""".stripMargin)
+    }
     clauses
   }
 
@@ -324,15 +334,21 @@ private[hive] object HiveQl {
   }
 
   protected def nodeToDataType(node: Node): DataType = node match {
-    case Token("TOK_DECIMAL", Nil) => DecimalType
+    case Token("TOK_DECIMAL", precision :: scale :: Nil) =>
+      DecimalType(precision.getText.toInt, scale.getText.toInt)
+    case Token("TOK_DECIMAL", precision :: Nil) =>
+      DecimalType(precision.getText.toInt, 0)
+    case Token("TOK_DECIMAL", Nil) => DecimalType.Unlimited
     case Token("TOK_BIGINT", Nil) => LongType
     case Token("TOK_INT", Nil) => IntegerType
     case Token("TOK_TINYINT", Nil) => ByteType
     case Token("TOK_SMALLINT", Nil) => ShortType
     case Token("TOK_BOOLEAN", Nil) => BooleanType
     case Token("TOK_STRING", Nil) => StringType
+    case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
     case Token("TOK_FLOAT", Nil) => FloatType
     case Token("TOK_DOUBLE", Nil) => DoubleType
+    case Token("TOK_DATE", Nil) => DateType
     case Token("TOK_TIMESTAMP", Nil) => TimestampType
     case Token("TOK_BINARY", Nil) => BinaryType
     case Token("TOK_LIST", elementType :: Nil) => ArrayType(nodeToDataType(elementType))
@@ -379,16 +395,37 @@ private[hive] object HiveQl {
   }
 
   protected def nodeToPlan(node: Node): LogicalPlan = node match {
+    // Special drop table that also uncaches.
+    case Token("TOK_DROPTABLE",
+           Token("TOK_TABNAME", tableNameParts) ::
+           ifExists) =>
+      val tableName = tableNameParts.map { case Token(p, Nil) => p }.mkString(".")
+      DropTable(tableName, ifExists.nonEmpty)
+    // Support "ANALYZE TABLE tableNmae COMPUTE STATISTICS noscan"
+    case Token("TOK_ANALYZE",
+            Token("TOK_TAB", Token("TOK_TABNAME", tableNameParts) :: partitionSpec) ::
+            isNoscan) =>
+      // Reference:
+      // https://cwiki.apache.org/confluence/display/Hive/StatsDev#StatsDev-ExistingTables
+      if (partitionSpec.nonEmpty) {
+        // Analyze partitions will be treated as a Hive native command.
+        NativePlaceholder
+      } else if (isNoscan.isEmpty) {
+        // If users do not specify "noscan", it will be treated as a Hive native command.
+        NativePlaceholder
+      } else {
+        val tableName = tableNameParts.map { case Token(p, Nil) => p }.mkString(".")
+        AnalyzeTable(tableName)
+      }
     // Just fake explain for any of the native commands.
     case Token("TOK_EXPLAIN", explainArgs)
       if noExplainCommands.contains(explainArgs.head.getText) =>
       ExplainCommand(NoRelation)
     case Token("TOK_EXPLAIN", explainArgs) =>
       // Ignore FORMATTED if present.
-      val Some(query) :: _ :: _ :: Nil =
+      val Some(query) :: _ :: extended :: Nil =
         getClauses(Seq("TOK_QUERY", "FORMATTED", "EXTENDED"), explainArgs)
-      // TODO: support EXTENDED?
-      ExplainCommand(nodeToPlan(query))
+      ExplainCommand(nodeToPlan(query), extended != None)
 
     case Token("TOK_DESCTABLE", describeArgs) =>
       // Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
@@ -424,14 +461,14 @@ private[hive] object HiveQl {
       }
 
     case Token("TOK_CREATETABLE", children)
-        if children.collect { case t@Token("TOK_QUERY", _) => t }.nonEmpty =>
-      // TODO: Parse other clauses.
+        if children.collect { case t @ Token("TOK_QUERY", _) => t }.nonEmpty =>
       // Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
       val (
           Some(tableNameParts) ::
           _ /* likeTable */ ::
-          Some(query) +:
-          notImplemented) =
+          Some(query) ::
+          allowExisting +:
+          ignores) =
         getClauses(
           Seq(
             "TOK_TABNAME",
@@ -455,18 +492,17 @@ private[hive] object HiveQl {
             "TOK_TABLELOCATION",
             "TOK_TABLEPROPERTIES"),
           children)
-      if (notImplemented.exists(token => !token.isEmpty)) {
-        throw new NotImplementedError(
-          s"Unhandled clauses: ${notImplemented.flatten.map(dumpTree(_)).mkString("\n")}")
-      }
-
       val (db, tableName) = extractDbNameTableName(tableNameParts)
 
-      InsertIntoCreatedTable(db, tableName, nodeToPlan(query))
+      CreateTableAsSelect(db, tableName, nodeToPlan(query), allowExisting != None, Some(node))
 
     // If its not a "CREATE TABLE AS" like above then just pass it back to hive as a native command.
     case Token("TOK_CREATETABLE", _) => NativePlaceholder
 
+    // Support "TRUNCATE TABLE table_name [PARTITION partition_spec]"
+    case Token("TOK_TRUNCATETABLE",
+          Token("TOK_TABLE_PARTITION",table)::Nil) =>  NativePlaceholder
+
     case Token("TOK_QUERY",
            Token("TOK_FROM", fromClause :: Nil) ::
            insertClauses) =>
@@ -612,7 +648,7 @@ private[hive] object HiveQl {
         // TOK_DESTINATION means to overwrite the table.
         val resultDestination =
           (intoClause orElse destClause).getOrElse(sys.error("No destination found."))
-        val overwrite = if (intoClause.isEmpty) true else false
+        val overwrite = intoClause.isEmpty
         nodeToDest(
           resultDestination,
           withLimit,
@@ -633,7 +669,7 @@ private[hive] object HiveQl {
   def nodeToRelation(node: Node): LogicalPlan = node match {
     case Token("TOK_SUBQUERY",
            query :: Token(alias, Nil) :: Nil) =>
-      Subquery(alias, nodeToPlan(query))
+      Subquery(cleanIdentifier(alias), nodeToPlan(query))
 
     case Token(laterViewToken(isOuter), selectClause :: relationClause :: Nil) =>
       val Token("TOK_SELECT",
@@ -743,15 +779,18 @@ private[hive] object HiveQl {
     case Token(allJoinTokens(joinToken),
            relation1 ::
            relation2 :: other) =>
-      assert(other.size <= 1, s"Unhandled join child $other")
+      if (!(other.size <= 1)) {
+        sys.error(s"Unsupported join operation: $other")
+      }
+
       val joinType = joinToken match {
         case "TOK_JOIN" => Inner
+        case "TOK_CROSSJOIN" => Inner
         case "TOK_RIGHTOUTERJOIN" => RightOuter
         case "TOK_LEFTOUTERJOIN" => LeftOuter
         case "TOK_FULLOUTERJOIN" => FullOuter
         case "TOK_LEFTSEMIJOIN" => LeftSemi
       }
-      assert(other.size <= 1, "Unhandled join clauses.")
       Join(nodeToRelation(relation1),
         nodeToRelation(relation2),
         joinType,
@@ -797,11 +836,6 @@ private[hive] object HiveQl {
           cleanIdentifier(key.toLowerCase) -> None
       }.toMap).getOrElse(Map.empty)
 
-      if (partitionKeys.values.exists(p => p.isEmpty)) {
-        throw new NotImplementedError(s"Do not support INSERT INTO/OVERWRITE with" +
-          s"dynamic partitioning.")
-      }
-
       InsertIntoTable(UnresolvedRelation(db, tableName, None), partitionKeys, query, overwrite)
 
     case a: ASTNode =>
@@ -815,7 +849,7 @@ private[hive] object HiveQl {
 
     case Token("TOK_SELEXPR",
            e :: Token(alias, Nil) :: Nil) =>
-      Some(Alias(nodeToExpr(e), alias)())
+      Some(Alias(nodeToExpr(e), cleanIdentifier(alias))())
 
     /* Hints are ignored */
     case Token("TOK_HINTLIST", _) => None
@@ -836,9 +870,11 @@ private[hive] object HiveQl {
     HiveParser.Number,
     HiveParser.TinyintLiteral,
     HiveParser.SmallintLiteral,
-    HiveParser.BigintLiteral)
+    HiveParser.BigintLiteral,
+    HiveParser.DecimalLiteral)
 
   /* Case insensitive matches */
+  val ARRAY = "(?i)ARRAY".r
   val COUNT = "(?i)COUNT".r
   val AVG = "(?i)AVG".r
   val SUM = "(?i)SUM".r
@@ -860,6 +896,8 @@ private[hive] object HiveQl {
   val BETWEEN = "(?i)BETWEEN".r
   val WHEN = "(?i)WHEN".r
   val CASE = "(?i)CASE".r
+  val SUBSTR = "(?i)SUBSTR(?:ING)?".r
+  val SQRT = "(?i)SQRT".r
 
   protected def nodeToExpr(node: Node): Expression = node match {
     /* Attribute References */
@@ -870,10 +908,7 @@ private[hive] object HiveQl {
       nodeToExpr(qualifier) match {
         case UnresolvedAttribute(qualifierName) =>
           UnresolvedAttribute(qualifierName + "." + cleanIdentifier(attr))
-        // The precidence for . seems to be wrong, so [] binds tighter an we need to go inside to
-        // find the underlying attribute references.
-        case GetItem(UnresolvedAttribute(qualifierName), ordinal) =>
-          GetItem(UnresolvedAttribute(qualifierName + "." + cleanIdentifier(attr)), ordinal)
+        case other => GetField(other, attr)
       }
 
     /* Stars (*) */
@@ -900,7 +935,9 @@ private[hive] object HiveQl {
     /* Casts */
     case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), StringType)
-    case Token("TOK_FUNCTION", Token("TOK_VARCHAR", Nil) :: arg :: Nil) =>
+    case Token("TOK_FUNCTION", Token("TOK_VARCHAR", _) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), StringType)
+    case Token("TOK_FUNCTION", Token("TOK_CHAR", _) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), StringType)
     case Token("TOK_FUNCTION", Token("TOK_INT", Nil) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), IntegerType)
@@ -918,22 +955,36 @@ private[hive] object HiveQl {
       Cast(nodeToExpr(arg), BinaryType)
     case Token("TOK_FUNCTION", Token("TOK_BOOLEAN", Nil) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), BooleanType)
+    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: scale :: nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DecimalType(precision.getText.toInt, scale.getText.toInt))
+    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DecimalType(precision.getText.toInt, 0))
     case Token("TOK_FUNCTION", Token("TOK_DECIMAL", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), DecimalType)
+      Cast(nodeToExpr(arg), DecimalType.Unlimited)
     case Token("TOK_FUNCTION", Token("TOK_TIMESTAMP", Nil) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), TimestampType)
+    case Token("TOK_FUNCTION", Token("TOK_DATE", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DateType)
 
     /* Arithmetic */
     case Token("-", child :: Nil) => UnaryMinus(nodeToExpr(child))
+    case Token("~", child :: Nil) => BitwiseNot(nodeToExpr(child))
     case Token("+", left :: right:: Nil) => Add(nodeToExpr(left), nodeToExpr(right))
     case Token("-", left :: right:: Nil) => Subtract(nodeToExpr(left), nodeToExpr(right))
     case Token("*", left :: right:: Nil) => Multiply(nodeToExpr(left), nodeToExpr(right))
     case Token("/", left :: right:: Nil) => Divide(nodeToExpr(left), nodeToExpr(right))
-    case Token(DIV(), left :: right:: Nil) => Divide(nodeToExpr(left), nodeToExpr(right))
+    case Token(DIV(), left :: right:: Nil) =>
+      Cast(Divide(nodeToExpr(left), nodeToExpr(right)), LongType)
     case Token("%", left :: right:: Nil) => Remainder(nodeToExpr(left), nodeToExpr(right))
+    case Token("&", left :: right:: Nil) => BitwiseAnd(nodeToExpr(left), nodeToExpr(right))
+    case Token("|", left :: right:: Nil) => BitwiseOr(nodeToExpr(left), nodeToExpr(right))
+    case Token("^", left :: right:: Nil) => BitwiseXor(nodeToExpr(left), nodeToExpr(right))
+    case Token("TOK_FUNCTION", Token(SQRT(), Nil) :: arg :: Nil) => Sqrt(nodeToExpr(arg))
 
     /* Comparisons */
     case Token("=", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
+    case Token("==", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
+    case Token("<=>", left :: right:: Nil) => EqualNullSafe(nodeToExpr(left), nodeToExpr(right))
     case Token("!=", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
     case Token("<>", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
     case Token(">", left :: right:: Nil) => GreaterThan(nodeToExpr(left), nodeToExpr(right))
@@ -951,15 +1002,20 @@ private[hive] object HiveQl {
       In(nodeToExpr(value), list.map(nodeToExpr))
     case Token("TOK_FUNCTION",
            Token(BETWEEN(), Nil) ::
-           Token("KW_FALSE", Nil) ::
+           kw ::
            target ::
            minValue ::
            maxValue :: Nil) =>
 
       val targetExpression = nodeToExpr(target)
-      And(
-        GreaterThanOrEqual(targetExpression, nodeToExpr(minValue)),
-        LessThanOrEqual(targetExpression, nodeToExpr(maxValue)))
+      val betweenExpr =
+        And(
+          GreaterThanOrEqual(targetExpression, nodeToExpr(minValue)),
+          LessThanOrEqual(targetExpression, nodeToExpr(maxValue)))
+      kw match {
+        case Token("KW_FALSE", Nil) => betweenExpr
+        case Token("KW_TRUE", Nil) => Not(betweenExpr)
+      }
 
     /* Boolean Logic */
     case Token(AND(), left :: right:: Nil) => And(nodeToExpr(left), nodeToExpr(right))
@@ -986,7 +1042,13 @@ private[hive] object HiveQl {
       GetItem(nodeToExpr(child), nodeToExpr(ordinal))
 
     /* Other functions */
+    case Token("TOK_FUNCTION", Token(ARRAY(), Nil) :: children) =>
+      CreateArray(children.map(nodeToExpr))
     case Token("TOK_FUNCTION", Token(RAND(), Nil) :: Nil) => Rand
+    case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: Nil) =>
+      Substring(nodeToExpr(string), nodeToExpr(pos), Literal(Integer.MAX_VALUE, IntegerType))
+    case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) =>
+      Substring(nodeToExpr(string), nodeToExpr(pos), nodeToExpr(length))
 
     /* UDFs - Must be last otherwise will preempt built in functions */
     case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>
@@ -1015,10 +1077,10 @@ private[hive] object HiveQl {
         } else if (ast.getText.endsWith("Y")) {
           // Literal tinyint.
           v = Literal(ast.getText.substring(0, ast.getText.length() - 1).toByte, ByteType)
-        } else if (ast.getText.endsWith("BD")) {
+        } else if (ast.getText.endsWith("BD") || ast.getText.endsWith("D")) {
           // Literal decimal
-          val strVal = ast.getText.substring(0, ast.getText.length() - 2)
-          BigDecimal(strVal)
+          val strVal = ast.getText.stripSuffix("D").stripSuffix("B")
+          v = Literal(Decimal(strVal))
         } else {
           v = Literal(ast.getText.toDouble, DoubleType)
           v = Literal(ast.getText.toLong, LongType)
@@ -1029,7 +1091,7 @@ private[hive] object HiveQl {
       }
 
       if (v == null) {
-        sys.error(s"Failed to parse number ${ast.getText}")
+        sys.error(s"Failed to parse number '${ast.getText}'.")
       } else {
         v
       }
@@ -1037,6 +1099,9 @@ private[hive] object HiveQl {
     case ast: ASTNode if ast.getType == HiveParser.StringLiteral =>
       Literal(BaseSemanticAnalyzer.unescapeSQLString(ast.getText))
 
+    case ast: ASTNode if ast.getType == HiveParser.TOK_DATELITERAL =>
+      Literal(Date.valueOf(ast.getText.substring(1, ast.getText.length - 1)))
+
     case a: ASTNode =>
       throw new NotImplementedError(
         s"""No parse rules for ASTNode type: ${a.getType}, text: ${a.getText} :
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 4d0fab4140b21..3a49dddd858d9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -17,14 +17,23 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.SQLContext
+import org.apache.hadoop.hive.ql.parse.ASTNode
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution._
+import org.apache.spark.sql.catalyst.types.StringType
+import org.apache.spark.sql.execution.{DescribeCommand, OutputFaker, SparkPlan}
+import org.apache.spark.sql.hive
 import org.apache.spark.sql.hive.execution._
-import org.apache.spark.sql.columnar.InMemoryRelation
+import org.apache.spark.sql.parquet.ParquetRelation
+import org.apache.spark.sql.{SQLContext, SchemaRDD, Strategy}
+
+import scala.collection.JavaConversions._
 
 private[hive] trait HiveStrategies {
   // Possibly being too clever with types here... or not clever enough.
@@ -32,6 +41,115 @@ private[hive] trait HiveStrategies {
 
   val hiveContext: HiveContext
 
+  /**
+   * :: Experimental ::
+   * Finds table scans that would use the Hive SerDe and replaces them with our own native parquet
+   * table scan operator.
+   *
+   * TODO: Much of this logic is duplicated in HiveTableScan.  Ideally we would do some refactoring
+   * but since this is after the code freeze for 1.1 all logic is here to minimize disruption.
+   *
+   * Other issues:
+   *  - Much of this logic assumes case insensitive resolution.
+   */
+  @Experimental
+  object ParquetConversion extends Strategy {
+    implicit class LogicalPlanHacks(s: SchemaRDD) {
+      def lowerCase =
+        new SchemaRDD(s.sqlContext, s.logicalPlan)
+
+      def addPartitioningAttributes(attrs: Seq[Attribute]) =
+        new SchemaRDD(
+          s.sqlContext,
+          s.logicalPlan transform {
+            case p: ParquetRelation => p.copy(partitioningAttributes = attrs)
+          })
+    }
+
+    implicit class PhysicalPlanHacks(originalPlan: SparkPlan) {
+      def fakeOutput(newOutput: Seq[Attribute]) =
+        OutputFaker(
+          originalPlan.output.map(a =>
+            newOutput.find(a.name.toLowerCase == _.name.toLowerCase)
+              .getOrElse(
+                sys.error(s"Can't find attribute $a to fake in set ${newOutput.mkString(",")}"))),
+          originalPlan)
+    }
+
+    def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case PhysicalOperation(projectList, predicates, relation: MetastoreRelation)
+          if relation.tableDesc.getSerdeClassName.contains("Parquet") &&
+             hiveContext.convertMetastoreParquet =>
+
+        // Filter out all predicates that only deal with partition keys
+        val partitionsKeys = AttributeSet(relation.partitionKeys)
+        val (pruningPredicates, otherPredicates) = predicates.partition {
+          _.references.subsetOf(partitionsKeys)
+        }
+
+        // We are going to throw the predicates and projection back at the whole optimization
+        // sequence so lets unresolve all the attributes, allowing them to be rebound to the
+        // matching parquet attributes.
+        val unresolvedOtherPredicates = otherPredicates.map(_ transform {
+          case a: AttributeReference => UnresolvedAttribute(a.name)
+        }).reduceOption(And).getOrElse(Literal(true))
+
+        val unresolvedProjection = projectList.map(_ transform {
+          case a: AttributeReference => UnresolvedAttribute(a.name)
+        })
+
+        if (relation.hiveQlTable.isPartitioned) {
+          val rawPredicate = pruningPredicates.reduceOption(And).getOrElse(Literal(true))
+          // Translate the predicate so that it automatically casts the input values to the correct
+          // data types during evaluation
+          val castedPredicate = rawPredicate transform {
+            case a: AttributeReference =>
+              val idx = relation.partitionKeys.indexWhere(a.exprId == _.exprId)
+              val key = relation.partitionKeys(idx)
+              Cast(BoundReference(idx, StringType, nullable = true), key.dataType)
+          }
+
+          val inputData = new GenericMutableRow(relation.partitionKeys.size)
+          val pruningCondition =
+            if(codegenEnabled) {
+              GeneratePredicate(castedPredicate)
+            } else {
+              InterpretedPredicate(castedPredicate)
+            }
+
+          val partitions = relation.hiveQlPartitions.filter { part =>
+            val partitionValues = part.getValues
+            var i = 0
+            while (i < partitionValues.size()) {
+              inputData(i) = partitionValues(i)
+              i += 1
+            }
+            pruningCondition(inputData)
+          }
+
+          hiveContext
+            .parquetFile(partitions.map(_.getLocation).mkString(","))
+            .addPartitioningAttributes(relation.partitionKeys)
+            .lowerCase
+            .where(unresolvedOtherPredicates)
+            .select(unresolvedProjection:_*)
+            .queryExecution
+            .executedPlan
+            .fakeOutput(projectList.map(_.toAttribute)):: Nil
+        } else {
+          hiveContext
+            .parquetFile(relation.hiveQlTable.getDataLocation.toString)
+            .lowerCase
+            .where(unresolvedOtherPredicates)
+            .select(unresolvedProjection:_*)
+            .queryExecution
+            .executedPlan
+            .fakeOutput(projectList.map(_.toAttribute)) :: Nil
+        }
+      case _ => Nil
+    }
+  }
+
   object Scripts extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.ScriptTransformation(input, script, output, child) =>
@@ -43,11 +161,19 @@ private[hive] trait HiveStrategies {
   object DataSinks extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.InsertIntoTable(table: MetastoreRelation, partition, child, overwrite) =>
-        InsertIntoHiveTable(table, partition, planLater(child), overwrite)(hiveContext) :: Nil
-      case logical.InsertIntoTable(
-             InMemoryRelation(_, _,
-               HiveTableScan(_, table, _)), partition, child, overwrite) =>
-        InsertIntoHiveTable(table, partition, planLater(child), overwrite)(hiveContext) :: Nil
+        execution.InsertIntoHiveTable(
+          table, partition, planLater(child), overwrite)(hiveContext) :: Nil
+      case hive.InsertIntoHiveTable(table: MetastoreRelation, partition, child, overwrite) =>
+        execution.InsertIntoHiveTable(
+          table, partition, planLater(child), overwrite)(hiveContext) :: Nil
+      case logical.CreateTableAsSelect(
+             Some(database), tableName, child, allowExisting, Some(extra: ASTNode)) =>
+        CreateTableAsSelect(
+          database,
+          tableName,
+          child,
+          allowExisting,
+          extra) :: Nil
       case _ => Nil
     }
   }
@@ -61,9 +187,9 @@ private[hive] trait HiveStrategies {
       case PhysicalOperation(projectList, predicates, relation: MetastoreRelation) =>
         // Filter out all predicates that only deal with partition keys, these are given to the
         // hive table scan operator to be used for partition pruning.
-        val partitionKeyIds = relation.partitionKeys.map(_.exprId).toSet
+        val partitionKeyIds = AttributeSet(relation.partitionKeys)
         val (pruningPredicates, otherPredicates) = predicates.partition {
-          _.references.map(_.exprId).subsetOf(partitionKeyIds)
+          _.references.subsetOf(partitionKeyIds)
         }
 
         pruneFilterProject(
@@ -78,8 +204,15 @@ private[hive] trait HiveStrategies {
 
   case class HiveCommandStrategy(context: HiveContext) extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case logical.NativeCommand(sql) =>
-        NativeCommand(sql, plan.output)(context) :: Nil
+      case logical.NativeCommand(sql) => NativeCommand(sql, plan.output)(context) :: Nil
+
+      case hive.DropTable(tableName, ifExists) => execution.DropTable(tableName, ifExists) :: Nil
+
+      case hive.AddJar(path) => execution.AddJar(path) :: Nil
+
+      case hive.AddFile(path) => execution.AddFile(path) :: Nil
+
+      case hive.AnalyzeTable(tableName) => execution.AnalyzeTable(tableName) :: Nil
 
       case describe: logical.DescribeCommand =>
         val resolvedTable = context.executePlan(describe.table).analyzed
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 8cfde46186ca4..f60bc3788e3e4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -19,25 +19,29 @@ package org.apache.spark.sql.hive
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{Path, PathFilter}
+import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants._
 import org.apache.hadoop.hive.ql.exec.Utilities
 import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Table => HiveTable}
-import org.apache.hadoop.hive.ql.plan.TableDesc
+import org.apache.hadoop.hive.ql.plan.{PlanUtils, TableDesc}
 import org.apache.hadoop.hive.serde2.Deserializer
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
+import org.apache.hadoop.hive.serde2.objectinspector.primitive._
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf}
 
 import org.apache.spark.SerializableWritable
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD}
+import org.apache.spark.sql.catalyst.expressions._
 
 /**
  * A trait for subclasses that handle table scans.
  */
 private[hive] sealed trait TableReader {
-  def makeRDDForTable(hiveTable: HiveTable): RDD[_]
+  def makeRDDForTable(hiveTable: HiveTable): RDD[Row]
 
-  def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): RDD[_]
+  def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): RDD[Row]
 }
 
 
@@ -46,7 +50,11 @@ private[hive] sealed trait TableReader {
  * data warehouse directory.
  */
 private[hive]
-class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveContext)
+class HadoopTableReader(
+    @transient attributes: Seq[Attribute],
+    @transient relation: MetastoreRelation,
+    @transient sc: HiveContext,
+    @transient hiveExtraConf: HiveConf)
   extends TableReader {
 
   // Choose the minimum number of splits. If mapred.map.tasks is set, then use that unless
@@ -57,16 +65,12 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
   // TODO: set aws s3 credentials.
 
   private val _broadcastedHiveConf =
-    sc.sparkContext.broadcast(new SerializableWritable(sc.hiveconf))
+    sc.sparkContext.broadcast(new SerializableWritable(hiveExtraConf))
 
-  def broadcastedHiveConf = _broadcastedHiveConf
-
-  def hiveConf = _broadcastedHiveConf.value.value
-
-  override def makeRDDForTable(hiveTable: HiveTable): RDD[_] =
+  override def makeRDDForTable(hiveTable: HiveTable): RDD[Row] =
     makeRDDForTable(
       hiveTable,
-      _tableDesc.getDeserializerClass.asInstanceOf[Class[Deserializer]],
+      relation.tableDesc.getDeserializerClass.asInstanceOf[Class[Deserializer]],
       filterOpt = None)
 
   /**
@@ -81,14 +85,14 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
   def makeRDDForTable(
       hiveTable: HiveTable,
       deserializerClass: Class[_ <: Deserializer],
-      filterOpt: Option[PathFilter]): RDD[_] = {
+      filterOpt: Option[PathFilter]): RDD[Row] = {
 
     assert(!hiveTable.isPartitioned, """makeRDDForTable() cannot be called on a partitioned table,
       since input formats may differ across partitions. Use makeRDDForTablePartitions() instead.""")
 
     // Create local references to member variables, so that the entire `this` object won't be
     // serialized in the closure below.
-    val tableDesc = _tableDesc
+    val tableDesc = relation.tableDesc
     val broadcastedHiveConf = _broadcastedHiveConf
 
     val tablePath = hiveTable.getPath
@@ -99,23 +103,20 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
       .asInstanceOf[java.lang.Class[InputFormat[Writable, Writable]]]
     val hadoopRDD = createHadoopRdd(tableDesc, inputPathStr, ifc)
 
+    val attrsWithIndex = attributes.zipWithIndex
+    val mutableRow = new SpecificMutableRow(attributes.map(_.dataType))
+
     val deserializedHadoopRDD = hadoopRDD.mapPartitions { iter =>
       val hconf = broadcastedHiveConf.value.value
       val deserializer = deserializerClass.newInstance()
       deserializer.initialize(hconf, tableDesc.getProperties)
-
-      // Deserialize each Writable to get the row value.
-      iter.map {
-        case v: Writable => deserializer.deserialize(v)
-        case value =>
-          sys.error(s"Unable to deserialize non-Writable: $value of ${value.getClass.getName}")
-      }
+      HadoopTableReader.fillObject(iter, deserializer, attrsWithIndex, mutableRow)
     }
 
     deserializedHadoopRDD
   }
 
-  override def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): RDD[_] = {
+  override def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): RDD[Row] = {
     val partitionToDeserializer = partitions.map(part =>
       (part, part.getDeserializer.getClass.asInstanceOf[Class[Deserializer]])).toMap
     makeRDDForPartitionedTable(partitionToDeserializer, filterOpt = None)
@@ -132,12 +133,12 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
    *     subdirectory of each partition being read. If None, then all files are accepted.
    */
   def makeRDDForPartitionedTable(
-      partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]],
-      filterOpt: Option[PathFilter]): RDD[_] = {
-
+      partitionToDeserializer: Map[HivePartition,
+      Class[_ <: Deserializer]],
+      filterOpt: Option[PathFilter]): RDD[Row] = {
     val hivePartitionRDDs = partitionToDeserializer.map { case (partition, partDeserializer) =>
       val partDesc = Utilities.getPartitionDesc(partition)
-      val partPath = partition.getPartitionPath
+      val partPath = HiveShim.getDataLocationPath(partition)
       val inputPathStr = applyFilterIfNeeded(partPath, filterOpt)
       val ifc = partDesc.getInputFileFormatClass
         .asInstanceOf[java.lang.Class[InputFormat[Writable, Writable]]]
@@ -156,29 +157,41 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
       }
 
       // Create local references so that the outer object isn't serialized.
-      val tableDesc = _tableDesc
+      val tableDesc = relation.tableDesc
       val broadcastedHiveConf = _broadcastedHiveConf
       val localDeserializer = partDeserializer
+      val mutableRow = new SpecificMutableRow(attributes.map(_.dataType))
 
-      val hivePartitionRDD = createHadoopRdd(tableDesc, inputPathStr, ifc)
-      hivePartitionRDD.mapPartitions { iter =>
-        val hconf = broadcastedHiveConf.value.value
-        val rowWithPartArr = new Array[Object](2)
-        // Map each tuple to a row object
-        iter.map { value =>
-          val deserializer = localDeserializer.newInstance()
-          deserializer.initialize(hconf, partProps)
-          val deserializedRow = deserializer.deserialize(value)
-          rowWithPartArr.update(0, deserializedRow)
-          rowWithPartArr.update(1, partValues)
-          rowWithPartArr.asInstanceOf[Object]
+      // Splits all attributes into two groups, partition key attributes and those that are not.
+      // Attached indices indicate the position of each attribute in the output schema.
+      val (partitionKeyAttrs, nonPartitionKeyAttrs) =
+        attributes.zipWithIndex.partition { case (attr, _) =>
+          relation.partitionKeys.contains(attr)
         }
+
+      def fillPartitionKeys(rawPartValues: Array[String], row: MutableRow) = {
+        partitionKeyAttrs.foreach { case (attr, ordinal) =>
+          val partOrdinal = relation.partitionKeys.indexOf(attr)
+          row(ordinal) = Cast(Literal(rawPartValues(partOrdinal)), attr.dataType).eval(null)
+        }
+      }
+
+      // Fill all partition keys to the given MutableRow object
+      fillPartitionKeys(partValues, mutableRow)
+
+      createHadoopRdd(tableDesc, inputPathStr, ifc).mapPartitions { iter =>
+        val hconf = broadcastedHiveConf.value.value
+        val deserializer = localDeserializer.newInstance()
+        deserializer.initialize(hconf, partProps)
+
+        // fill the non partition key attributes
+        HadoopTableReader.fillObject(iter, deserializer, nonPartitionKeyAttrs, mutableRow)
       }
     }.toSeq
 
     // Even if we don't use any partitions, we still need an empty RDD
     if (hivePartitionRDDs.size == 0) {
-      new EmptyRDD[Object](sc.sparkContext)
+      new EmptyRDD[Row](sc.sparkContext)
     } else {
       new UnionRDD(hivePartitionRDDs(0).context, hivePartitionRDDs)
     }
@@ -221,10 +234,9 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
     // Only take the value (skip the key) because Hive works only with values.
     rdd.map(_._2)
   }
-
 }
 
-private[hive] object HadoopTableReader {
+private[hive] object HadoopTableReader extends HiveInspectors {
   /**
    * Curried. After given an argument for 'path', the resulting JobConf => Unit closure is used to
    * instantiate a HadoopRDD.
@@ -232,9 +244,87 @@ private[hive] object HadoopTableReader {
   def initializeLocalJobConfFunc(path: String, tableDesc: TableDesc)(jobConf: JobConf) {
     FileInputFormat.setInputPaths(jobConf, path)
     if (tableDesc != null) {
+      PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc)
       Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf)
     }
     val bufferSize = System.getProperty("spark.buffer.size", "65536")
     jobConf.set("io.file.buffer.size", bufferSize)
   }
+
+  /**
+   * Transform all given raw `Writable`s into `Row`s.
+   *
+   * @param iterator Iterator of all `Writable`s to be transformed
+   * @param deserializer The `Deserializer` associated with the input `Writable`
+   * @param nonPartitionKeyAttrs Attributes that should be filled together with their corresponding
+   *                             positions in the output schema
+   * @param mutableRow A reusable `MutableRow` that should be filled
+   * @return An `Iterator[Row]` transformed from `iterator`
+   */
+  def fillObject(
+      iterator: Iterator[Writable],
+      deserializer: Deserializer,
+      nonPartitionKeyAttrs: Seq[(Attribute, Int)],
+      mutableRow: MutableRow): Iterator[Row] = {
+
+    val soi = deserializer.getObjectInspector().asInstanceOf[StructObjectInspector]
+    val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map { case (attr, ordinal) =>
+      soi.getStructFieldRef(attr.name) -> ordinal
+    }.unzip
+
+    // Builds specific unwrappers ahead of time according to object inspector types to avoid pattern
+    // matching and branching costs per row.
+    val unwrappers: Seq[(Any, MutableRow, Int) => Unit] = fieldRefs.map {
+      _.getFieldObjectInspector match {
+        case oi: BooleanObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) => row.setBoolean(ordinal, oi.get(value))
+        case oi: ByteObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) => row.setByte(ordinal, oi.get(value))
+        case oi: ShortObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) => row.setShort(ordinal, oi.get(value))
+        case oi: IntObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) => row.setInt(ordinal, oi.get(value))
+        case oi: LongObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) => row.setLong(ordinal, oi.get(value))
+        case oi: FloatObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) => row.setFloat(ordinal, oi.get(value))
+        case oi: DoubleObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) => row.setDouble(ordinal, oi.get(value))
+        case oi: HiveVarcharObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) =>
+            row.setString(ordinal, oi.getPrimitiveJavaObject(value).getValue)
+        case oi: HiveDecimalObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) =>
+            row.update(ordinal, HiveShim.toCatalystDecimal(oi, value))
+        case oi: TimestampObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) =>
+            row.update(ordinal, oi.getPrimitiveJavaObject(value).clone())
+        case oi: DateObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) =>
+            row.update(ordinal, oi.getPrimitiveJavaObject(value))
+        case oi: BinaryObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) =>
+            row.update(ordinal, oi.getPrimitiveJavaObject(value))
+        case oi =>
+          (value: Any, row: MutableRow, ordinal: Int) => row(ordinal) = unwrap(value, oi)
+      }
+    }
+
+    // Map each tuple to a row object
+    iterator.map { value =>
+      val raw = deserializer.deserialize(value)
+      var i = 0
+      while (i < fieldRefs.length) {
+        val fieldValue = soi.getStructFieldData(raw, fieldRefs(i))
+        if (fieldValue == null) {
+          mutableRow.setNullAt(fieldOrdinals(i))
+        } else {
+          unwrappers(i)(fieldValue, mutableRow, fieldOrdinals(i))
+        }
+        i += 1
+      }
+
+      mutableRow: Row
+    }
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index 9386008d02d51..bb79ad5538046 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -20,27 +20,35 @@ package org.apache.spark.sql.hive.test
 import java.io.File
 import java.util.{Set => JavaSet}
 
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.session.SessionState
+
 import scala.collection.mutable
 import scala.language.implicitConversions
 
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry
 import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContainerOutputFormat}
 import org.apache.hadoop.hive.ql.metadata.Table
+import org.apache.hadoop.hive.ql.processors._
 import org.apache.hadoop.hive.serde2.RegexSerDe
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.apache.hadoop.hive.serde2.avro.AvroSerDe
 
 import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.util.Utils
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.plans.logical.{CacheCommand, LogicalPlan, NativeCommand}
+import org.apache.spark.sql.catalyst.plans.logical.{CacheTableCommand, LogicalPlan, NativeCommand}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.hive._
+import org.apache.spark.sql.SQLConf
 
 /* Implicit conversions */
 import scala.collection.JavaConversions._
 
+// SPARK-3729: Test key required to check for initialization errors with config.
 object TestHive
-  extends TestHiveContext(new SparkContext("local", "TestSQLContext", new SparkConf()))
+  extends TestHiveContext(
+    new SparkContext("local[2]", "TestSQLContext", new SparkConf().set("spark.sql.test", "")))
 
 /**
  * A locally running test instance of Spark's Hive execution engine.
@@ -53,15 +61,35 @@ object TestHive
  * hive metastore seems to lead to weird non-deterministic failures.  Therefore, the execution of
  * test cases that rely on TestHive must be serialized.
  */
-class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
+class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   self =>
 
   // By clearing the port we force Spark to pick a new one.  This allows us to rerun tests
   // without restarting the JVM.
   System.clearProperty("spark.hostPort")
+  CommandProcessorFactory.clean(hiveconf)
+
+  lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath
+  lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath
+
+  /** Sets up the system initially or after a RESET command */
+  protected def configure(): Unit = {
+    setConf("javax.jdo.option.ConnectionURL",
+      s"jdbc:derby:;databaseName=$metastorePath;create=true")
+    setConf("hive.metastore.warehouse.dir", warehousePath)
+    Utils.registerShutdownDeleteDir(new File(warehousePath))
+    Utils.registerShutdownDeleteDir(new File(metastorePath))
+  }
+
+  val testTempDir = File.createTempFile("testTempFiles", "spark.hive.tmp")
+  testTempDir.delete()
+  testTempDir.mkdir()
+  Utils.registerShutdownDeleteDir(testTempDir)
 
-  override lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath
-  override lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath
+  // For some hive test case which contain ${system:test.tmp.dir}
+  System.setProperty("test.tmp.dir", testTempDir.getCanonicalPath)
+
+  configure() // Must be called before initializing the catalog below.
 
   /** The location of the compiled hive distribution */
   lazy val hiveHome = envVarToFile("HIVE_HOME")
@@ -74,6 +102,10 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
   override def executePlan(plan: LogicalPlan): this.QueryExecution =
     new this.QueryExecution { val logical = plan }
 
+  /** Fewer partitions to speed up testing. */
+  override private[spark] def numShufflePartitions: Int =
+    getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
+
   /**
    * Returns the value of specified environmental variable as a [[java.io.File]] after checking
    * to ensure it exists
@@ -90,7 +122,7 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
     if (cmd.toUpperCase contains "LOAD DATA") {
       val testDataLocation =
         hiveDevHome.map(_.getCanonicalPath).getOrElse(inRepoTests.getCanonicalPath)
-      cmd.replaceAll("\\.\\.", testDataLocation)
+      cmd.replaceAll("\\.\\./\\.\\./", testDataLocation + "/")
     } else {
       cmd
     }
@@ -98,7 +130,7 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
   val hiveFilesTemp = File.createTempFile("catalystHiveFiles", "")
   hiveFilesTemp.delete()
   hiveFilesTemp.mkdir()
-  hiveFilesTemp.deleteOnExit()
+  Utils.registerShutdownDeleteDir(hiveFilesTemp)
 
   val inRepoTests = if (System.getProperty("user.dir").endsWith("sql" + File.separator + "hive")) {
     new File("src" + File.separator + "test" + File.separator + "resources" + File.separator)
@@ -130,7 +162,7 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
     override lazy val analyzed = {
       val describedTables = logical match {
         case NativeCommand(describedTable(tbl)) => tbl :: Nil
-        case CacheCommand(tbl, _) => tbl :: Nil
+        case CacheTableCommand(tbl, _, _) => tbl :: Nil
         case _ => Nil
       }
 
@@ -139,7 +171,7 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
         describedTables ++
         logical.collect { case UnresolvedRelation(databaseName, name, _) => name }
       val referencedTestTables = referencedTables.filter(testTables.contains)
-      logger.debug(s"Query references test tables: ${referencedTestTables.mkString(", ")}")
+      logDebug(s"Query references test tables: ${referencedTestTables.mkString(", ")}")
       referencedTestTables.foreach(loadTestTable)
       // Proceed with analysis.
       analyzer(logical)
@@ -252,7 +284,74 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
          |)
        """.stripMargin.cmd,
       s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/episodes.avro")}' INTO TABLE episodes".cmd
-    )
+    ),
+    // THIS TABLE IS NOT THE SAME AS THE HIVE TEST TABLE episodes_partitioned AS DYNAMIC PARITIONING
+    // IS NOT YET SUPPORTED
+    TestTable("episodes_part",
+      s"""CREATE TABLE episodes_part (title STRING, air_date STRING, doctor INT)
+         |PARTITIONED BY (doctor_pt INT)
+         |ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'
+         |STORED AS
+         |INPUTFORMAT '${classOf[AvroContainerInputFormat].getCanonicalName}'
+         |OUTPUTFORMAT '${classOf[AvroContainerOutputFormat].getCanonicalName}'
+         |TBLPROPERTIES (
+         |  'avro.schema.literal'='{
+         |    "type": "record",
+         |    "name": "episodes",
+         |    "namespace": "testing.hive.avro.serde",
+         |    "fields": [
+         |      {
+         |          "name": "title",
+         |          "type": "string",
+         |          "doc": "episode title"
+         |      },
+         |      {
+         |          "name": "air_date",
+         |          "type": "string",
+         |          "doc": "initial date"
+         |      },
+         |      {
+         |          "name": "doctor",
+         |          "type": "int",
+         |          "doc": "main actor playing the Doctor in episode"
+         |      }
+         |    ]
+         |  }'
+         |)
+       """.stripMargin.cmd,
+      // WORKAROUND: Required to pass schema to SerDe for partitioned tables.
+      // TODO: Pass this automatically from the table to partitions.
+      s"""
+         |ALTER TABLE episodes_part SET SERDEPROPERTIES (
+         |  'avro.schema.literal'='{
+         |    "type": "record",
+         |    "name": "episodes",
+         |    "namespace": "testing.hive.avro.serde",
+         |    "fields": [
+         |      {
+         |          "name": "title",
+         |          "type": "string",
+         |          "doc": "episode title"
+         |      },
+         |      {
+         |          "name": "air_date",
+         |          "type": "string",
+         |          "doc": "initial date"
+         |      },
+         |      {
+         |          "name": "doctor",
+         |          "type": "int",
+         |          "doc": "main actor playing the Doctor in episode"
+         |      }
+         |    ]
+         |  }'
+         |)
+        """.stripMargin.cmd,
+      s"""
+        INSERT OVERWRITE TABLE episodes_part PARTITION (doctor_pt=1)
+        SELECT title, air_date, doctor FROM episodes
+      """.cmd
+      )
   )
 
   hiveQTestUtilTables.foreach(registerTestTable)
@@ -262,9 +361,9 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
   var cacheTables: Boolean = false
   def loadTestTable(name: String) {
     if (!(loadedTables contains name)) {
-      // Marks the table as loaded first to prevent infite mutually recursive table loading.
+      // Marks the table as loaded first to prevent infinite mutually recursive table loading.
       loadedTables += name
-      logger.info(s"Loading test table $name")
+      logInfo(s"Loading test table $name")
       val createCmds =
         testTables.get(name).map(_.commands).getOrElse(sys.error(s"Unknown test table $name"))
       createCmds.foreach(_())
@@ -281,6 +380,9 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
    */
   protected val originalUdfs: JavaSet[String] = FunctionRegistry.getFunctionNames
 
+  // Database default may not exist in 0.13.1, create it if not exist
+  HiveShim.createDefaultDBIfNeeded(this)
+
   /**
    * Resets the test instance by deleting any tables that have been created.
    * TODO: also clear out UDFs, views, etc.
@@ -288,22 +390,14 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
   def reset() {
     try {
       // HACK: Hive is too noisy by default.
-      org.apache.log4j.LogManager.getCurrentLoggers.foreach { logger =>
-        logger.asInstanceOf[org.apache.log4j.Logger].setLevel(org.apache.log4j.Level.WARN)
+      org.apache.log4j.LogManager.getCurrentLoggers.foreach { log =>
+        log.asInstanceOf[org.apache.log4j.Logger].setLevel(org.apache.log4j.Level.WARN)
       }
 
-      // It is important that we RESET first as broken hooks that might have been set could break
-      // other sql exec here.
-      runSqlHive("RESET")
-      // For some reason, RESET does not reset the following variables...
-      runSqlHive("set datanucleus.cache.collections=true")
-      runSqlHive("set datanucleus.cache.collections.lazy=true")
-      // Lots of tests fail if we do not change the partition whitelist from the default.
-      runSqlHive("set hive.metastore.partition.name.whitelist.pattern=.*")
-
+      clearCache()
       loadedTables.clear()
       catalog.client.getAllTables("default").foreach { t =>
-        logger.debug(s"Deleting table $t")
+        logDebug(s"Deleting table $t")
         val table = catalog.client.getTable("default", t)
 
         catalog.client.getIndexes("default", t, 255).foreach { index =>
@@ -316,7 +410,7 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
       }
 
       catalog.client.getAllDatabases.filterNot(_ == "default").foreach { db =>
-        logger.debug(s"Dropping Database: $db")
+        logDebug(s"Dropping Database: $db")
         catalog.client.dropDatabase(db, true, false, true)
       }
 
@@ -326,6 +420,16 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
         FunctionRegistry.unregisterTemporaryUDF(udfName)
       }
 
+      // Some tests corrupt this value on purpose, which breaks the RESET call below.
+      hiveconf.set("fs.default.name", new File(".").toURI.toString)
+      // It is important that we RESET first as broken hooks that might have been set could break
+      // other sql exec here.
+      runSqlHive("RESET")
+      // For some reason, RESET does not reset the following variables...
+      runSqlHive("set datanucleus.cache.collections=true")
+      runSqlHive("set datanucleus.cache.collections.lazy=true")
+      // Lots of tests fail if we do not change the partition whitelist from the default.
+      runSqlHive("set hive.metastore.partition.name.whitelist.pattern=.*")
       configure()
 
       runSqlHive("USE default")
@@ -338,11 +442,7 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
       loadTestTable("srcpart")
     } catch {
       case e: Exception =>
-        logger.error(s"FATAL ERROR: Failed to reset TestDB state. $e")
-        // At this point there is really no reason to continue, but the test framework traps exits.
-        // So instead we just pause forever so that at least the developer can see where things
-        // started to go wrong.
-        Thread.sleep(100000)
+        logError("FATAL ERROR: Failed to reset TestDB state.", e)
     }
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala
index c9ee162191c96..1817c7832490e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala
@@ -19,18 +19,31 @@ package org.apache.spark.sql.hive.api.java
 
 import org.apache.spark.api.java.JavaSparkContext
 import org.apache.spark.sql.api.java.{JavaSQLContext, JavaSchemaRDD}
+import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.hive.{HiveContext, HiveQl}
 
 /**
  * The entry point for executing Spark SQL queries from a Java program.
  */
-class JavaHiveContext(sparkContext: JavaSparkContext) extends JavaSQLContext(sparkContext) {
+class JavaHiveContext(sqlContext: SQLContext) extends JavaSQLContext(sqlContext) {
 
-  override val sqlContext = new HiveContext(sparkContext)
+  def this(sparkContext: JavaSparkContext) = this(new HiveContext(sparkContext))
+
+  override def sql(sqlText: String): JavaSchemaRDD = {
+    // TODO: Create a framework for registering parsers instead of just hardcoding if statements.
+    if (sqlContext.dialect == "sql") {
+      super.sql(sqlText)
+    } else if (sqlContext.dialect == "hiveql") {
+      new JavaSchemaRDD(sqlContext, HiveQl.parseSql(sqlText))
+    }  else {
+      sys.error(s"Unsupported SQL dialect: ${sqlContext.dialect}.  Try 'sql' or 'hiveql'")
+    }
+  }
 
   /**
-    * Executes a query expressed in HiveQL, returning the result as a JavaSchemaRDD.
+    * DEPRECATED: Use sql(...) Instead
     */
+  @Deprecated
   def hql(hqlQuery: String): JavaSchemaRDD =
     new JavaSchemaRDD(sqlContext, HiveQl.parseSql(hqlQuery))
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
new file mode 100644
index 0000000000000..3d24d87bc3d38
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.hadoop.hive.ql.Context
+import org.apache.hadoop.hive.ql.parse.{SemanticAnalyzer, ASTNode}
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
+import org.apache.spark.sql.execution.{SparkPlan, Command, LeafNode}
+import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.hive.MetastoreRelation
+
+/**
+ * :: Experimental ::
+ * Create table and insert the query result into it.
+ * @param database the database name of the new relation
+ * @param tableName the table name of the new relation
+ * @param query the query whose result will be insert into the new relation
+ * @param allowExisting allow continue working if it's already exists, otherwise
+ *                      raise exception
+ * @param extra the extra information for this Operator, it should be the
+ *              ASTNode object for extracting the CreateTableDesc.
+
+ */
+@Experimental
+case class CreateTableAsSelect(
+    database: String,
+    tableName: String,
+    query: LogicalPlan,
+    allowExisting: Boolean,
+    extra: ASTNode) extends LeafNode with Command {
+
+  def output = Seq.empty
+
+  private[this] def sc = sqlContext.asInstanceOf[HiveContext]
+
+  // A lazy computing of the metastoreRelation
+  private[this] lazy val metastoreRelation: MetastoreRelation = {
+    // Get the CreateTableDesc from Hive SemanticAnalyzer
+    val sa = new SemanticAnalyzer(sc.hiveconf)
+
+    sa.analyze(extra, new Context(sc.hiveconf))
+    val desc = sa.getQB().getTableDesc
+    // Create Hive Table
+    sc.catalog.createTable(database, tableName, query.output, allowExisting, Some(desc))
+
+    // Get the Metastore Relation
+    sc.catalog.lookupRelation(Some(database), tableName, None) match {
+      case r: MetastoreRelation => r
+    }
+  }
+
+  override protected[sql] lazy val sideEffectResult: Seq[Row] = {
+    // TODO ideally, we should get the output data ready first and then
+    // add the relation into catalog, just in case of failure occurs while data
+    // processing.
+    if (sc.catalog.tableExists(Some(database), tableName)) {
+      if (allowExisting) {
+        // table already exists, will do nothing, to keep consistent with Hive
+      } else {
+        throw
+          new org.apache.hadoop.hive.metastore.api.AlreadyExistsException(s"$database.$tableName")
+      }
+    } else {
+      sc.executePlan(InsertIntoTable(metastoreRelation, Map(), query, true)).toRdd
+    }
+
+    Seq.empty[Row]
+  }
+
+  override def execute(): RDD[Row] = {
+    sideEffectResult
+    sparkContext.emptyRDD[Row]
+  }
+
+  override def argString: String = {
+    s"[Database:$database, TableName: $tableName, InsertIntoHiveTable]\n" + query.toString
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
index a40e89e0d382b..5d98834c6fb33 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
@@ -23,9 +23,10 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRow, Row}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Row}
 import org.apache.spark.sql.execution.{Command, LeafNode}
 import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation}
+import org.apache.spark.sql.hive.HiveShim
 
 /**
  * Implementation for "describe [extended] table".
@@ -41,26 +42,22 @@ case class DescribeHiveTableCommand(
   extends LeafNode with Command {
 
   // Strings with the format like Hive. It is used for result comparison in our unit tests.
-  lazy val hiveString: Seq[String] = {
-    val alignment = 20
-    val delim = "\t"
-
-    sideEffectResult.map {
-      case (name, dataType, comment) =>
-        String.format("%-" + alignment + "s", name) + delim +
-          String.format("%-" + alignment + "s", dataType) + delim +
-          String.format("%-" + alignment + "s", Option(comment).getOrElse("None"))
-    }
+  lazy val hiveString: Seq[String] = sideEffectResult.map {
+    case Row(name: String, dataType: String, comment) =>
+      Seq(name, dataType,
+        Option(comment.asInstanceOf[String]).getOrElse(""))
+        .map(s => String.format(s"%-20s", s))
+        .mkString("\t")
   }
 
-  override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = {
+  override protected lazy val sideEffectResult: Seq[Row] = {
     // Trying to mimic the format of Hive's output. But not exactly the same.
     var results: Seq[(String, String, String)] = Nil
 
     val columns: Seq[FieldSchema] = table.hiveQlTable.getCols
     val partitionColumns: Seq[FieldSchema] = table.hiveQlTable.getPartCols
     results ++= columns.map(field => (field.getName, field.getType, field.getComment))
-    if (!partitionColumns.isEmpty) {
+    if (partitionColumns.nonEmpty) {
       val partColumnInfo =
         partitionColumns.map(field => (field.getName, field.getType, field.getComment))
       results ++=
@@ -74,14 +71,9 @@ case class DescribeHiveTableCommand(
       results ++= Seq(("Detailed Table Information", table.hiveQlTable.getTTable.toString, ""))
     }
 
-    results
-  }
-
-  override def execute(): RDD[Row] = {
-    val rows = sideEffectResult.map {
-      case (name, dataType, comment) => new GenericRow(Array[Any](name, dataType, comment))
+    results.map { case (name, dataType, comment) =>
+      Row(name, dataType, comment)
     }
-    context.sparkContext.parallelize(rows, 1)
   }
 
   override def otherCopyArgs = context :: Nil
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
index ef8bae74530ec..d39413a44a6cb 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
@@ -23,7 +23,6 @@ import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar}
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition}
 import org.apache.hadoop.hive.serde.serdeConstants
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils
 import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption
 import org.apache.hadoop.hive.serde2.objectinspector.primitive._
@@ -34,7 +33,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.types.{BooleanType, DataType}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.hive._
-import org.apache.spark.util.MutablePair
 
 /**
  * :: DeveloperApi ::
@@ -46,16 +44,18 @@ import org.apache.spark.util.MutablePair
  */
 @DeveloperApi
 case class HiveTableScan(
-    attributes: Seq[Attribute],
+    requestedAttributes: Seq[Attribute],
     relation: MetastoreRelation,
     partitionPruningPred: Option[Expression])(
     @transient val context: HiveContext)
-  extends LeafNode
-  with HiveInspectors {
+  extends LeafNode {
 
   require(partitionPruningPred.isEmpty || relation.hiveQlTable.isPartitioned,
     "Partition pruning predicates only supported for partitioned tables.")
 
+  // Retrieve the original attributes based on expression ID so that capitalization matches.
+  val attributes = requestedAttributes.map(relation.attributeMap)
+
   // Bind all partition key attribute references in the partition pruning predicate for later
   // evaluation.
   private[this] val boundPruningPred = partitionPruningPred.map { pred =>
@@ -66,75 +66,36 @@ case class HiveTableScan(
     BindReferences.bindReference(pred, relation.partitionKeys)
   }
 
+  // Create a local copy of hiveconf,so that scan specific modifications should not impact 
+  // other queries
   @transient
-  private[this] val hadoopReader = new HadoopTableReader(relation.tableDesc, context)
+  private[this] val hiveExtraConf = new HiveConf(context.hiveconf)
 
-  /**
-   * The hive object inspector for this table, which can be used to extract values from the
-   * serialized row representation.
-   */
-  @transient
-  private[this] lazy val objectInspector =
-    relation.tableDesc.getDeserializer.getObjectInspector.asInstanceOf[StructObjectInspector]
+  // append columns ids and names before broadcast
+  addColumnMetadataToConf(hiveExtraConf)
 
-  /**
-   * Functions that extract the requested attributes from the hive output.  Partitioned values are
-   * casted from string to its declared data type.
-   */
   @transient
-  protected lazy val attributeFunctions: Seq[(Any, Array[String]) => Any] = {
-    attributes.map { a =>
-      val ordinal = relation.partitionKeys.indexOf(a)
-      if (ordinal >= 0) {
-        val dataType = relation.partitionKeys(ordinal).dataType
-        (_: Any, partitionKeys: Array[String]) => {
-          castFromString(partitionKeys(ordinal), dataType)
-        }
-      } else {
-        val ref = objectInspector.getAllStructFieldRefs
-          .find(_.getFieldName == a.name)
-          .getOrElse(sys.error(s"Can't find attribute $a"))
-        val fieldObjectInspector = ref.getFieldObjectInspector
-
-        val unwrapHiveData = fieldObjectInspector match {
-          case _: HiveVarcharObjectInspector =>
-            (value: Any) => value.asInstanceOf[HiveVarchar].getValue
-          case _: HiveDecimalObjectInspector =>
-            (value: Any) => BigDecimal(value.asInstanceOf[HiveDecimal].bigDecimalValue())
-          case _ =>
-            identity[Any] _
-        }
-
-        (row: Any, _: Array[String]) => {
-          val data = objectInspector.getStructFieldData(row, ref)
-          val hiveData = unwrapData(data, fieldObjectInspector)
-          if (hiveData != null) unwrapHiveData(hiveData) else null
-        }
-      }
-    }
-  }
+  private[this] val hadoopReader = 
+    new HadoopTableReader(attributes, relation, context, hiveExtraConf)
 
   private[this] def castFromString(value: String, dataType: DataType) = {
     Cast(Literal(value), dataType).eval(null)
   }
 
   private def addColumnMetadataToConf(hiveConf: HiveConf) {
-    // Specifies IDs and internal names of columns to be scanned.
-    val neededColumnIDs = attributes.map(a => relation.output.indexWhere(_.name == a.name): Integer)
-    val columnInternalNames = neededColumnIDs.map(HiveConf.getColumnInternalName(_)).mkString(",")
-
-    if (attributes.size == relation.output.size) {
-      ColumnProjectionUtils.setFullyReadColumns(hiveConf)
-    } else {
-      ColumnProjectionUtils.appendReadColumnIDs(hiveConf, neededColumnIDs)
-    }
+    // Specifies needed column IDs for those non-partitioning columns.
+    val neededColumnIDs = attributes.flatMap(relation.columnOrdinals.get).map(o => o: Integer)
+
+    HiveShim.appendReadColumns(hiveConf, neededColumnIDs, attributes.map(_.name))
 
-    ColumnProjectionUtils.appendReadColumnNames(hiveConf, attributes.map(_.name))
+    val tableDesc = relation.tableDesc
+    val deserializer = tableDesc.getDeserializerClass.newInstance
+    deserializer.initialize(hiveConf, tableDesc.getProperties)
 
     // Specifies types and object inspectors of columns to be scanned.
     val structOI = ObjectInspectorUtils
       .getStandardObjectInspector(
-        relation.tableDesc.getDeserializer.getObjectInspector,
+        deserializer.getObjectInspector,
         ObjectInspectorCopyOption.JAVA)
       .asInstanceOf[StructObjectInspector]
 
@@ -145,15 +106,7 @@ case class HiveTableScan(
       .mkString(",")
 
     hiveConf.set(serdeConstants.LIST_COLUMN_TYPES, columnTypeNames)
-    hiveConf.set(serdeConstants.LIST_COLUMNS, columnInternalNames)
-  }
-
-  addColumnMetadataToConf(context.hiveconf)
-
-  private def inputRdd = if (!relation.hiveQlTable.isPartitioned) {
-    hadoopReader.makeRDDForTable(relation.hiveQlTable)
-  } else {
-    hadoopReader.makeRDDForPartitionedTable(prunePartitions(relation.hiveQlPartitions))
+    hiveConf.set(serdeConstants.LIST_COLUMNS, relation.attributes.map(_.name).mkString(","))
   }
 
   /**
@@ -179,44 +132,10 @@ case class HiveTableScan(
     }
   }
 
-  override def execute() = {
-    inputRdd.mapPartitions { iterator =>
-      if (iterator.isEmpty) {
-        Iterator.empty
-      } else {
-        val mutableRow = new GenericMutableRow(attributes.length)
-        val mutablePair = new MutablePair[Any, Array[String]]()
-        val buffered = iterator.buffered
-
-        // NOTE (lian): Critical path of Hive table scan, unnecessary FP style code and pattern
-        // matching are avoided intentionally.
-        val rowsAndPartitionKeys = buffered.head match {
-          // With partition keys
-          case _: Array[Any] =>
-            buffered.map { case array: Array[Any] =>
-              val deserializedRow = array(0)
-              val partitionKeys = array(1).asInstanceOf[Array[String]]
-              mutablePair.update(deserializedRow, partitionKeys)
-            }
-
-          // Without partition keys
-          case _ =>
-            val emptyPartitionKeys = Array.empty[String]
-            buffered.map { deserializedRow =>
-              mutablePair.update(deserializedRow, emptyPartitionKeys)
-            }
-        }
-
-        rowsAndPartitionKeys.map { pair =>
-          var i = 0
-          while (i < attributes.length) {
-            mutableRow(i) = attributeFunctions(i)(pair._1, pair._2)
-            i += 1
-          }
-          mutableRow: Row
-        }
-      }
-    }
+  override def execute() = if (!relation.hiveQlTable.isPartitioned) {
+    hadoopReader.makeRDDForTable(relation.hiveQlTable)
+  } else {
+    hadoopReader.makeRDDForPartitionedTable(prunePartitions(relation.hiveQlPartitions))
   }
 
   override def output = attributes
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index c2b0b00aa5852..81390f626726c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -17,29 +17,32 @@
 
 package org.apache.spark.sql.hive.execution
 
-import scala.collection.JavaConversions._
+import java.util
 
-import java.util.{HashMap => JHashMap}
+import scala.collection.JavaConversions._
 
-import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar}
+import org.apache.hadoop.hive.common.`type`.HiveVarchar
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.metastore.MetaStoreUtils
-import org.apache.hadoop.hive.ql.Context
 import org.apache.hadoop.hive.ql.metadata.Hive
-import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc}
+import org.apache.hadoop.hive.ql.plan.TableDesc
+import org.apache.hadoop.hive.ql.{Context, ErrorMsg}
 import org.apache.hadoop.hive.serde2.Serializer
-import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharObjectInspector
-import org.apache.hadoop.io.Writable
+import org.apache.hadoop.hive.serde2.objectinspector._
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.{JavaHiveDecimalObjectInspector, JavaHiveVarcharObjectInspector}
 import org.apache.hadoop.mapred.{FileOutputCommitter, FileOutputFormat, JobConf}
 
-import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions.Row
-import org.apache.spark.sql.execution.{SparkPlan, UnaryNode}
-import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation, SparkHiveHadoopWriter}
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+import org.apache.spark.sql.execution.{Command, SparkPlan, UnaryNode}
+import org.apache.spark.sql.hive._
+import org.apache.spark.sql.hive.{ ShimFileSinkDesc => FileSinkDesc}
+import org.apache.spark.sql.hive.HiveShim._
+import org.apache.spark.{SerializableWritable, SparkException, TaskContext}
 
 /**
  * :: DeveloperApi ::
@@ -51,11 +54,11 @@ case class InsertIntoHiveTable(
     child: SparkPlan,
     overwrite: Boolean)
     (@transient sc: HiveContext)
-  extends UnaryNode {
+  extends UnaryNode with Command with HiveInspectors {
 
-  val outputClass = newSerializer(table.tableDesc).getSerializedClass
-  @transient private val hiveContext = new Context(sc.hiveconf)
-  @transient private val db = Hive.get(sc.hiveconf)
+  @transient lazy val outputClass = newSerializer(table.tableDesc).getSerializedClass
+  @transient private lazy val hiveContext = new Context(sc.hiveconf)
+  @transient private lazy val db = Hive.get(sc.hiveconf)
 
   private def newSerializer(tableDesc: TableDesc): Serializer = {
     val serializer = tableDesc.getDeserializerClass.newInstance().asInstanceOf[Serializer]
@@ -67,100 +70,63 @@ case class InsertIntoHiveTable(
 
   def output = child.output
 
-  /**
-   * Wraps with Hive types based on object inspector.
-   * TODO: Consolidate all hive OI/data interface code.
-   */
-  protected def wrap(a: (Any, ObjectInspector)): Any = a match {
-    case (s: String, oi: JavaHiveVarcharObjectInspector) =>
-      new HiveVarchar(s, s.size)
-
-    case (bd: BigDecimal, oi: JavaHiveDecimalObjectInspector) =>
-      new HiveDecimal(bd.underlying())
-
-    case (row: Row, oi: StandardStructObjectInspector) =>
-      val struct = oi.create()
-      row.zip(oi.getAllStructFieldRefs: Seq[StructField]).foreach {
-        case (data, field) =>
-          oi.setStructFieldData(struct, field, wrap(data, field.getFieldObjectInspector))
-      }
-      struct
-
-    case (s: Seq[_], oi: ListObjectInspector) =>
-      val wrappedSeq = s.map(wrap(_, oi.getListElementObjectInspector))
-      seqAsJavaList(wrappedSeq)
-
-    case (m: Map[_, _], oi: MapObjectInspector) =>
-      val keyOi = oi.getMapKeyObjectInspector
-      val valueOi = oi.getMapValueObjectInspector
-      val wrappedMap = m.map { case (key, value) => wrap(key, keyOi) -> wrap(value, valueOi) }
-      mapAsJavaMap(wrappedMap)
-
-    case (obj, _) =>
-      obj
-  }
-
   def saveAsHiveFile(
-      rdd: RDD[Writable],
+      rdd: RDD[Row],
       valueClass: Class[_],
       fileSinkConf: FileSinkDesc,
-      conf: JobConf,
-      isCompressed: Boolean) {
-    if (valueClass == null) {
-      throw new SparkException("Output value class not set")
-    }
-    conf.setOutputValueClass(valueClass)
-    if (fileSinkConf.getTableInfo.getOutputFileFormatClassName == null) {
-      throw new SparkException("Output format class not set")
-    }
-    // Doesn't work in Scala 2.9 due to what may be a generics bug
-    // TODO: Should we uncomment this for Scala 2.10?
-    // conf.setOutputFormat(outputFormatClass)
-    conf.set("mapred.output.format.class", fileSinkConf.getTableInfo.getOutputFileFormatClassName)
-    if (isCompressed) {
-      // Please note that isCompressed, "mapred.output.compress", "mapred.output.compression.codec",
-      // and "mapred.output.compression.type" have no impact on ORC because it uses table properties
-      // to store compression information.
-      conf.set("mapred.output.compress", "true")
-      fileSinkConf.setCompressed(true)
-      fileSinkConf.setCompressCodec(conf.get("mapred.output.compression.codec"))
-      fileSinkConf.setCompressType(conf.get("mapred.output.compression.type"))
-    }
-    conf.setOutputCommitter(classOf[FileOutputCommitter])
+      conf: SerializableWritable[JobConf],
+      writerContainer: SparkHiveWriterContainer): Unit = {
+    assert(valueClass != null, "Output value class not set")
+    conf.value.setOutputValueClass(valueClass)
+
+    val outputFileFormatClassName = fileSinkConf.getTableInfo.getOutputFileFormatClassName
+    assert(outputFileFormatClassName != null, "Output format class not set")
+    conf.value.set("mapred.output.format.class", outputFileFormatClassName)
+    conf.value.setOutputCommitter(classOf[FileOutputCommitter])
+
     FileOutputFormat.setOutputPath(
-      conf,
-      SparkHiveHadoopWriter.createPathFromString(fileSinkConf.getDirName, conf))
+      conf.value,
+      SparkHiveWriterContainer.createPathFromString(fileSinkConf.getDirName, conf.value))
+    log.debug("Saving as hadoop file of type " + valueClass.getSimpleName)
+
+    writerContainer.driverSideSetup()
+    sc.sparkContext.runJob(rdd, writeToFile _)
+    writerContainer.commitJob()
 
-    logger.debug("Saving as hadoop file of type " + valueClass.getSimpleName)
+    // Note that this function is executed on executor side
+    def writeToFile(context: TaskContext, iterator: Iterator[Row]): Unit = {
+      val serializer = newSerializer(fileSinkConf.getTableInfo)
+      val standardOI = ObjectInspectorUtils
+        .getStandardObjectInspector(
+          fileSinkConf.getTableInfo.getDeserializer.getObjectInspector,
+          ObjectInspectorCopyOption.JAVA)
+        .asInstanceOf[StructObjectInspector]
 
-    val writer = new SparkHiveHadoopWriter(conf, fileSinkConf)
-    writer.preSetup()
+      val fieldOIs = standardOI.getAllStructFieldRefs.map(_.getFieldObjectInspector).toArray
+      val wrappers = fieldOIs.map(wrapperFor)
+      val outputData = new Array[Any](fieldOIs.length)
 
-    def writeToFile(context: TaskContext, iter: Iterator[Writable]) {
       // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
       // around by taking a mod. We expect that no task will be attempted 2 billion times.
       val attemptNumber = (context.attemptId % Int.MaxValue).toInt
+      writerContainer.executorSideSetup(context.stageId, context.partitionId, attemptNumber)
 
-      writer.setup(context.stageId, context.partitionId, attemptNumber)
-      writer.open()
+      iterator.foreach { row =>
+        var i = 0
+        while (i < fieldOIs.length) {
+          outputData(i) = if (row.isNullAt(i)) null else wrappers(i)(row(i))
+          i += 1
+        }
 
-      var count = 0
-      while(iter.hasNext) {
-        val record = iter.next()
-        count += 1
-        writer.write(record)
+        writerContainer
+          .getLocalFileWriter(row)
+          .write(serializer.serialize(outputData, standardOI))
       }
 
-      writer.close()
-      writer.commit()
+      writerContainer.close()
     }
-
-    sc.sparkContext.runJob(rdd, writeToFile _)
-    writer.commitJob()
   }
 
-  override def execute() = result
-
   /**
    * Inserts all the rows in the table into Hive.  Row objects are properly serialized with the
    * `org.apache.hadoop.hive.serde2.SerDe` and the
@@ -168,50 +134,69 @@ case class InsertIntoHiveTable(
    *
    * Note: this is run once and then kept to avoid double insertions.
    */
-  private lazy val result: RDD[Row] = {
-    val childRdd = child.execute()
-    assert(childRdd != null)
-
+  override protected[sql] lazy val sideEffectResult: Seq[Row] = {
     // Have to pass the TableDesc object to RDD.mapPartitions and then instantiate new serializer
     // instances within the closure, since Serializer is not serializable while TableDesc is.
     val tableDesc = table.tableDesc
     val tableLocation = table.hiveQlTable.getDataLocation
-    val tmpLocation = hiveContext.getExternalTmpFileURI(tableLocation)
+    val tmpLocation = HiveShim.getExternalTmpPath(hiveContext, tableLocation)
     val fileSinkConf = new FileSinkDesc(tmpLocation.toString, tableDesc, false)
-    val rdd = childRdd.mapPartitions { iter =>
-      val serializer = newSerializer(fileSinkConf.getTableInfo)
-      val standardOI = ObjectInspectorUtils
-        .getStandardObjectInspector(
-          fileSinkConf.getTableInfo.getDeserializer.getObjectInspector,
-          ObjectInspectorCopyOption.JAVA)
-        .asInstanceOf[StructObjectInspector]
+    val isCompressed = sc.hiveconf.getBoolean(
+      ConfVars.COMPRESSRESULT.varname, ConfVars.COMPRESSRESULT.defaultBoolVal)
 
+    if (isCompressed) {
+      // Please note that isCompressed, "mapred.output.compress", "mapred.output.compression.codec",
+      // and "mapred.output.compression.type" have no impact on ORC because it uses table properties
+      // to store compression information.
+      sc.hiveconf.set("mapred.output.compress", "true")
+      fileSinkConf.setCompressed(true)
+      fileSinkConf.setCompressCodec(sc.hiveconf.get("mapred.output.compression.codec"))
+      fileSinkConf.setCompressType(sc.hiveconf.get("mapred.output.compression.type"))
+    }
 
-      val fieldOIs = standardOI.getAllStructFieldRefs.map(_.getFieldObjectInspector).toArray
-      val outputData = new Array[Any](fieldOIs.length)
-      iter.map { row =>
-        var i = 0
-        while (i < row.length) {
-          // Casts Strings to HiveVarchars when necessary.
-          outputData(i) = wrap(row(i), fieldOIs(i))
-          i += 1
-        }
+    val numDynamicPartitions = partition.values.count(_.isEmpty)
+    val numStaticPartitions = partition.values.count(_.nonEmpty)
+    val partitionSpec = partition.map {
+      case (key, Some(value)) => key -> value
+      case (key, None) => key -> ""
+    }
+
+    // All partition column names in the format of "<column name 1>/<column name 2>/..."
+    val partitionColumns = fileSinkConf.getTableInfo.getProperties.getProperty("partition_columns")
+    val partitionColumnNames = Option(partitionColumns).map(_.split("/")).orNull
+
+    // Validate partition spec if there exist any dynamic partitions
+    if (numDynamicPartitions > 0) {
+      // Report error if dynamic partitioning is not enabled
+      if (!sc.hiveconf.getBoolVar(HiveConf.ConfVars.DYNAMICPARTITIONING)) {
+        throw new SparkException(ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg)
+      }
 
-        serializer.serialize(outputData, standardOI)
+      // Report error if dynamic partition strict mode is on but no static partition is found
+      if (numStaticPartitions == 0 &&
+        sc.hiveconf.getVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equalsIgnoreCase("strict")) {
+        throw new SparkException(ErrorMsg.DYNAMIC_PARTITION_STRICT_MODE.getMsg)
+      }
+
+      // Report error if any static partition appears after a dynamic partition
+      val isDynamic = partitionColumnNames.map(partitionSpec(_).isEmpty)
+      if (isDynamic.init.zip(isDynamic.tail).contains((true, false))) {
+        throw new SparkException(ErrorMsg.PARTITION_DYN_STA_ORDER.getMsg)
       }
     }
 
-    // ORC stores compression information in table properties. While, there are other formats
-    // (e.g. RCFile) that rely on hadoop configurations to store compression information.
     val jobConf = new JobConf(sc.hiveconf)
-    saveAsHiveFile(
-      rdd,
-      outputClass,
-      fileSinkConf,
-      jobConf,
-      sc.hiveconf.getBoolean("hive.exec.compress.output", false))
-
-    // TODO: Handle dynamic partitioning.
+    val jobConfSer = new SerializableWritable(jobConf)
+
+    val writerContainer = if (numDynamicPartitions > 0) {
+      val dynamicPartColNames = partitionColumnNames.takeRight(numDynamicPartitions)
+      new SparkHiveDynamicPartitionWriterContainer(jobConf, fileSinkConf, dynamicPartColNames)
+    } else {
+      new SparkHiveWriterContainer(jobConf, fileSinkConf)
+    }
+
+    saveAsHiveFile(child.execute(), outputClass, fileSinkConf, jobConfSer, writerContainer)
+
     val outputPath = FileOutputFormat.getOutputPath(jobConf)
     // Have to construct the format of dbname.tablename.
     val qualifiedTableName = s"${table.databaseName}.${table.tableName}"
@@ -220,9 +205,12 @@ case class InsertIntoHiveTable(
     // holdDDLTime will be true when TOK_HOLD_DDLTIME presents in the query as a hint.
     val holdDDLTime = false
     if (partition.nonEmpty) {
-      val partitionSpec = partition.map {
-        case (key, Some(value)) => key -> value
-        case (key, None) => key -> "" // Should not reach here right now.
+
+      // loadPartition call orders directories created on the iteration order of the this map
+      val orderedPartitionSpec = new util.LinkedHashMap[String,String]()
+      table.hiveQlTable.getPartCols().foreach{
+        entry=>
+          orderedPartitionSpec.put(entry.getName,partitionSpec.get(entry.getName).getOrElse(""))
       }
       val partVals = MetaStoreUtils.getPvals(table.hiveQlTable.getPartCols, partitionSpec)
       db.validatePartitionNameCharacters(partVals)
@@ -231,14 +219,26 @@ case class InsertIntoHiveTable(
       val inheritTableSpecs = true
       // TODO: Correctly set isSkewedStoreAsSubdir.
       val isSkewedStoreAsSubdir = false
-      db.loadPartition(
-        outputPath,
-        qualifiedTableName,
-        partitionSpec,
-        overwrite,
-        holdDDLTime,
-        inheritTableSpecs,
-        isSkewedStoreAsSubdir)
+      if (numDynamicPartitions > 0) {
+        db.loadDynamicPartitions(
+          outputPath,
+          qualifiedTableName,
+          orderedPartitionSpec,
+          overwrite,
+          numDynamicPartitions,
+          holdDDLTime,
+          isSkewedStoreAsSubdir
+        )
+      } else {
+        db.loadPartition(
+          outputPath,
+          qualifiedTableName,
+          orderedPartitionSpec,
+          overwrite,
+          holdDDLTime,
+          inheritTableSpecs,
+          isSkewedStoreAsSubdir)
+      }
     } else {
       db.loadTable(
         outputPath,
@@ -247,10 +247,13 @@ case class InsertIntoHiveTable(
         holdDDLTime)
     }
 
+    // Invalidate the cache.
+    sqlContext.invalidateCache(table)
+
     // It would be nice to just return the childRdd unchanged so insert operations could be chained,
     // however for now we return an empty list to simplify compatibility checks with hive, which
     // does not return anything for insert operations.
     // TODO: implement hive compatibility as rules.
-    sc.sparkContext.makeRDD(Nil, 1)
+    Seq.empty[Row]
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/NativeCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/NativeCommand.scala
index fe6031678f70f..6930c2babd117 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/NativeCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/NativeCommand.scala
@@ -32,16 +32,7 @@ case class NativeCommand(
     @transient context: HiveContext)
   extends LeafNode with Command {
 
-  override protected[sql] lazy val sideEffectResult: Seq[String] = context.runSqlHive(sql)
-
-  override def execute(): RDD[Row] = {
-    if (sideEffectResult.size == 0) {
-      context.emptyResult
-    } else {
-      val rows = sideEffectResult.map(r => new GenericRow(Array[Any](r)))
-      context.sparkContext.parallelize(rows, 1)
-    }
-  }
+  override protected lazy val sideEffectResult: Seq[Row] = context.runSqlHive(sql).map(Row(_))
 
   override def otherCopyArgs = context :: Nil
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
index 8258ee5fef0eb..0c8f676e9c5c8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
@@ -67,7 +67,7 @@ case class ScriptTransformation(
         }
       }
       readerThread.start()
-      val outputProjection = new Projection(input)
+      val outputProjection = new InterpretedProjection(input, child.output)
       iter
         .map(outputProjection)
         // TODO: Use SerDe
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
new file mode 100644
index 0000000000000..903075edf7e04
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.execution.{Command, LeafNode}
+import org.apache.spark.sql.hive.HiveContext
+
+/**
+ * :: DeveloperApi ::
+ * Analyzes the given table in the current database to generate statistics, which will be
+ * used in query optimizations.
+ *
+ * Right now, it only supports Hive tables and it only updates the size of a Hive table
+ * in the Hive metastore.
+ */
+@DeveloperApi
+case class AnalyzeTable(tableName: String) extends LeafNode with Command {
+  def hiveContext = sqlContext.asInstanceOf[HiveContext]
+
+  def output = Seq.empty
+
+  override protected lazy val sideEffectResult: Seq[Row] = {
+    hiveContext.analyze(tableName)
+    Seq.empty[Row]
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ * Drops a table from the metastore and removes it if it is cached.
+ */
+@DeveloperApi
+case class DropTable(tableName: String, ifExists: Boolean) extends LeafNode with Command {
+  def hiveContext = sqlContext.asInstanceOf[HiveContext]
+
+  def output = Seq.empty
+
+  override protected lazy val sideEffectResult: Seq[Row] = {
+    val ifExistsClause = if (ifExists) "IF EXISTS " else ""
+    hiveContext.runSqlHive(s"DROP TABLE $ifExistsClause$tableName")
+    hiveContext.catalog.unregisterTable(None, tableName)
+    Seq.empty[Row]
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+case class AddJar(path: String) extends LeafNode with Command {
+  def hiveContext = sqlContext.asInstanceOf[HiveContext]
+
+  override def output = Seq.empty
+
+  override protected lazy val sideEffectResult: Seq[Row] = {
+    hiveContext.runSqlHive(s"ADD JAR $path")
+    hiveContext.sparkContext.addJar(path)
+    Seq.empty[Row]
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+case class AddFile(path: String) extends LeafNode with Command {
+  def hiveContext = sqlContext.asInstanceOf[HiveContext]
+
+  override def output = Seq.empty
+
+  override protected lazy val sideEffectResult: Seq[Row] = {
+    hiveContext.runSqlHive(s"ADD FILE $path")
+    hiveContext.sparkContext.addFile(path)
+    Seq.empty[Row]
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 9b105308ab7cf..86f7eea5dfd69 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -17,141 +17,64 @@
 
 package org.apache.spark.sql.hive
 
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ConversionHelper
+
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.hadoop.hive.common.`type`.HiveDecimal
-import org.apache.hadoop.hive.ql.exec.UDF
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory
+import org.apache.hadoop.hive.ql.exec.{UDF, UDAF}
 import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry}
 import org.apache.hadoop.hive.ql.udf.{UDFType => HiveUDFType}
 import org.apache.hadoop.hive.ql.udf.generic._
-import org.apache.hadoop.hive.serde2.objectinspector._
-import org.apache.hadoop.hive.serde2.objectinspector.primitive._
-import org.apache.hadoop.hive.serde2.{io => hiveIo}
-import org.apache.hadoop.{io => hadoopIo}
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF._
 
-import org.apache.spark.sql.Logging
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types
 import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.util.Utils.getContextOrSparkClassLoader
 
 /* Implicit conversions */
 import scala.collection.JavaConversions._
 
-private[hive] object HiveFunctionRegistry
-  extends analysis.FunctionRegistry with HiveFunctionFactory with HiveInspectors {
+private[hive] abstract class HiveFunctionRegistry
+  extends analysis.FunctionRegistry with HiveInspectors {
+
+  def getFunctionInfo(name: String) = FunctionRegistry.getFunctionInfo(name)
 
   def lookupFunction(name: String, children: Seq[Expression]): Expression = {
     // We only look it up to see if it exists, but do not include it in the HiveUDF since it is
     // not always serializable.
-    val functionInfo: FunctionInfo = Option(FunctionRegistry.getFunctionInfo(name)).getOrElse(
-      sys.error(s"Couldn't find function $name"))
-
-    if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) {
-      val function = createFunction[UDF](name)
-      val method = function.getResolver.getEvalMethod(children.map(_.dataType.toTypeInfo))
+    val functionInfo: FunctionInfo =
+      Option(FunctionRegistry.getFunctionInfo(name.toLowerCase)).getOrElse(
+        sys.error(s"Couldn't find function $name"))
 
-      lazy val expectedDataTypes = method.getParameterTypes.map(javaClassToDataType)
+    val functionClassName = functionInfo.getFunctionClass.getName
 
-      HiveSimpleUdf(
-        name,
-        children.zip(expectedDataTypes).map { case (e, t) => Cast(e, t) }
-      )
+    if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) {
+      HiveSimpleUdf(functionClassName, children)
     } else if (classOf[GenericUDF].isAssignableFrom(functionInfo.getFunctionClass)) {
-      HiveGenericUdf(name, children)
+      HiveGenericUdf(functionClassName, children)
     } else if (
          classOf[AbstractGenericUDAFResolver].isAssignableFrom(functionInfo.getFunctionClass)) {
-      HiveGenericUdaf(name, children)
-
+      HiveGenericUdaf(functionClassName, children)
+    } else if (classOf[UDAF].isAssignableFrom(functionInfo.getFunctionClass)) {
+      HiveUdaf(functionClassName, children)
     } else if (classOf[GenericUDTF].isAssignableFrom(functionInfo.getFunctionClass)) {
-      HiveGenericUdtf(name, Nil, children)
+      HiveGenericUdtf(functionClassName, Nil, children)
     } else {
       sys.error(s"No handler for udf ${functionInfo.getFunctionClass}")
     }
   }
-
-  def javaClassToDataType(clz: Class[_]): DataType = clz match {
-    // writable
-    case c: Class[_] if c == classOf[hadoopIo.DoubleWritable] => DoubleType
-    case c: Class[_] if c == classOf[hiveIo.DoubleWritable] => DoubleType
-    case c: Class[_] if c == classOf[hiveIo.HiveDecimalWritable] => DecimalType
-    case c: Class[_] if c == classOf[hiveIo.ByteWritable] => ByteType
-    case c: Class[_] if c == classOf[hiveIo.ShortWritable] => ShortType
-    case c: Class[_] if c == classOf[hiveIo.TimestampWritable] => TimestampType
-    case c: Class[_] if c == classOf[hadoopIo.Text] => StringType
-    case c: Class[_] if c == classOf[hadoopIo.IntWritable] => IntegerType
-    case c: Class[_] if c == classOf[hadoopIo.LongWritable] => LongType
-    case c: Class[_] if c == classOf[hadoopIo.FloatWritable] => FloatType
-    case c: Class[_] if c == classOf[hadoopIo.BooleanWritable] => BooleanType
-    case c: Class[_] if c == classOf[hadoopIo.BytesWritable] => BinaryType
-
-    // java class
-    case c: Class[_] if c == classOf[java.lang.String] => StringType
-    case c: Class[_] if c == classOf[java.sql.Timestamp] => TimestampType
-    case c: Class[_] if c == classOf[HiveDecimal] => DecimalType
-    case c: Class[_] if c == classOf[java.math.BigDecimal] => DecimalType
-    case c: Class[_] if c == classOf[Array[Byte]] => BinaryType
-    case c: Class[_] if c == classOf[java.lang.Short] => ShortType
-    case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
-    case c: Class[_] if c == classOf[java.lang.Long] => LongType
-    case c: Class[_] if c == classOf[java.lang.Double] => DoubleType
-    case c: Class[_] if c == classOf[java.lang.Byte] => ByteType
-    case c: Class[_] if c == classOf[java.lang.Float] => FloatType
-    case c: Class[_] if c == classOf[java.lang.Boolean] => BooleanType
-
-    // primitive type
-    case c: Class[_] if c == java.lang.Short.TYPE => ShortType
-    case c: Class[_] if c == java.lang.Integer.TYPE => IntegerType
-    case c: Class[_] if c == java.lang.Long.TYPE => LongType
-    case c: Class[_] if c == java.lang.Double.TYPE => DoubleType
-    case c: Class[_] if c == java.lang.Byte.TYPE => ByteType
-    case c: Class[_] if c == java.lang.Float.TYPE => FloatType
-    case c: Class[_] if c == java.lang.Boolean.TYPE => BooleanType
-
-    case c: Class[_] if c.isArray => ArrayType(javaClassToDataType(c.getComponentType))
-  }
 }
 
 private[hive] trait HiveFunctionFactory {
-  def getFunctionInfo(name: String) = FunctionRegistry.getFunctionInfo(name)
-  def getFunctionClass(name: String) = getFunctionInfo(name).getFunctionClass
-  def createFunction[UDFType](name: String) =
-    getFunctionClass(name).newInstance.asInstanceOf[UDFType]
-
-  /** Converts hive types to native catalyst types. */
-  def unwrap(a: Any): Any = a match {
-    case null => null
-    case i: hadoopIo.IntWritable => i.get
-    case t: hadoopIo.Text => t.toString
-    case l: hadoopIo.LongWritable => l.get
-    case d: hadoopIo.DoubleWritable => d.get
-    case d: hiveIo.DoubleWritable => d.get
-    case s: hiveIo.ShortWritable => s.get
-    case b: hadoopIo.BooleanWritable => b.get
-    case b: hiveIo.ByteWritable => b.get
-    case b: hadoopIo.FloatWritable => b.get
-    case b: hadoopIo.BytesWritable => {
-      val bytes = new Array[Byte](b.getLength)
-      System.arraycopy(b.getBytes(), 0, bytes, 0, b.getLength)
-      bytes
-    }
-    case t: hiveIo.TimestampWritable => t.getTimestamp
-    case b: hiveIo.HiveDecimalWritable => BigDecimal(b.getHiveDecimal().bigDecimalValue())
-    case list: java.util.List[_] => list.map(unwrap)
-    case map: java.util.Map[_,_] => map.map { case (k, v) => (unwrap(k), unwrap(v)) }.toMap
-    case array: Array[_] => array.map(unwrap).toSeq
-    case p: java.lang.Short => p
-    case p: java.lang.Long => p
-    case p: java.lang.Float => p
-    case p: java.lang.Integer => p
-    case p: java.lang.Double => p
-    case p: java.lang.Byte => p
-    case p: java.lang.Boolean => p
-    case str: String => str
-    case p: java.math.BigDecimal => p
-    case p: Array[Byte] => p
-    case p: java.sql.Timestamp => p
-  }
+  val functionClassName: String
+
+  def createFunction[UDFType]() =
+    getContextOrSparkClassLoader.loadClass(functionClassName).newInstance.asInstanceOf[UDFType]
 }
 
 private[hive] abstract class HiveUdf extends Expression with Logging with HiveFunctionFactory {
@@ -160,20 +83,16 @@ private[hive] abstract class HiveUdf extends Expression with Logging with HiveFu
   type UDFType
   type EvaluatedType = Any
 
-  val name: String
-
   def nullable = true
-  def references = children.flatMap(_.references).toSet
 
-  // FunctionInfo is not serializable so we must look it up here again.
-  lazy val functionInfo = getFunctionInfo(name)
-  lazy val function = createFunction[UDFType](name)
+  lazy val function = createFunction[UDFType]()
 
-  override def toString = s"$nodeName#${functionInfo.getDisplayName}(${children.mkString(",")})"
+  override def toString = s"$nodeName#$functionClassName(${children.mkString(",")})"
 }
 
-private[hive] case class HiveSimpleUdf(name: String, children: Seq[Expression]) extends HiveUdf {
-  import org.apache.spark.sql.hive.HiveFunctionRegistry._
+private[hive] case class HiveSimpleUdf(functionClassName: String, children: Seq[Expression])
+  extends HiveUdf with HiveInspectors {
+
   type UDFType = UDF
 
   @transient
@@ -181,60 +100,58 @@ private[hive] case class HiveSimpleUdf(name: String, children: Seq[Expression])
     function.getResolver.getEvalMethod(children.map(_.dataType.toTypeInfo))
 
   @transient
-  lazy val dataType = javaClassToDataType(method.getReturnType)
+  protected lazy val arguments = children.map(toInspector).toArray
 
-  protected lazy val wrappers: Array[(Any) => AnyRef] = method.getParameterTypes.map { argClass =>
-    val primitiveClasses = Seq(
-      Integer.TYPE, classOf[java.lang.Integer], classOf[java.lang.String], java.lang.Double.TYPE,
-      classOf[java.lang.Double], java.lang.Long.TYPE, classOf[java.lang.Long],
-      classOf[HiveDecimal], java.lang.Byte.TYPE, classOf[java.lang.Byte],
-      classOf[java.sql.Timestamp]
-    )
-    val matchingConstructor = argClass.getConstructors.find { c =>
-      c.getParameterTypes.size == 1 && primitiveClasses.contains(c.getParameterTypes.head)
-    }
+  @transient
+  protected lazy val isUDFDeterministic = {
+    val udfType = function.getClass().getAnnotation(classOf[HiveUDFType])
+    udfType != null && udfType.deterministic()
+  }
 
-    val constructor = matchingConstructor.getOrElse(
-      sys.error(s"No matching wrapper found, options: ${argClass.getConstructors.toSeq}."))
-
-    (a: Any) => {
-      logger.debug(
-        s"Wrapping $a of type ${if (a == null) "null" else a.getClass.getName} using $constructor.")
-      // We must make sure that primitives get boxed java style.
-      if (a == null) {
-        null
-      } else {
-        constructor.newInstance(a match {
-          case i: Int => i: java.lang.Integer
-          case bd: BigDecimal => new HiveDecimal(bd.underlying())
-          case other: AnyRef => other
-        }).asInstanceOf[AnyRef]
-      }
-    }
+  override def foldable = {
+    isUDFDeterministic && children.foldLeft(true)((prev, n) => prev && n.foldable)
   }
 
+  // Create parameter converters
+  @transient
+  protected lazy val conversionHelper = new ConversionHelper(method, arguments)
+
+  @transient
+  lazy val dataType = javaClassToDataType(method.getReturnType)
+
+  @transient
+  lazy val returnInspector = ObjectInspectorFactory.getReflectionObjectInspector(
+    method.getGenericReturnType(), ObjectInspectorOptions.JAVA)
+
+  @transient
+  protected lazy val cached = new Array[AnyRef](children.length)
+
   // TODO: Finish input output types.
   override def eval(input: Row): Any = {
-    val evaluatedChildren = children.map(_.eval(input))
-    // Wrap the function arguments in the expected types.
-    val args = evaluatedChildren.zip(wrappers).map {
-      case (arg, wrapper) => wrapper(arg)
-    }
+    unwrap(
+      FunctionRegistry.invoke(method, function, conversionHelper
+        .convertIfNecessary(wrap(children.map(c => c.eval(input)), arguments, cached): _*): _*),
+      returnInspector)
+  }
+}
 
-    // Invoke the udf and unwrap the result.
-    unwrap(method.invoke(function, args: _*))
+// Adapter from Catalyst ExpressionResult to Hive DeferredObject
+private[hive] class DeferredObjectAdapter(oi: ObjectInspector)
+  extends DeferredObject with HiveInspectors {
+  private var func: () => Any = _
+  def set(func: () => Any) {
+    this.func = func
   }
+  override def prepare(i: Int) = {}
+  override def get(): AnyRef = wrap(func(), oi)
 }
 
-private[hive] case class HiveGenericUdf(name: String, children: Seq[Expression])
+private[hive] case class HiveGenericUdf(functionClassName: String, children: Seq[Expression])
   extends HiveUdf with HiveInspectors {
-
-  import org.apache.hadoop.hive.ql.udf.generic.GenericUDF._
-
   type UDFType = GenericUDF
 
   @transient
-  protected lazy val argumentInspectors = children.map(_.dataType).map(toInspector)
+  protected lazy val argumentInspectors = children.map(toInspector)
 
   @transient
   protected lazy val returnInspector = function.initialize(argumentInspectors.toArray)
@@ -249,163 +166,67 @@ private[hive] case class HiveGenericUdf(name: String, children: Seq[Expression])
     isUDFDeterministic && children.foldLeft(true)((prev, n) => prev && n.foldable)
   }
 
-  protected lazy val deferedObjects = Array.fill[DeferredObject](children.length)({
-    new DeferredObjectAdapter
-  })
-
-  // Adapter from Catalyst ExpressionResult to Hive DeferredObject
-  class DeferredObjectAdapter extends DeferredObject {
-    private var func: () => Any = _
-    def set(func: () => Any) {
-      this.func = func
-    }
-    override def prepare(i: Int) = {}
-    override def get(): AnyRef = wrap(func())
-  }
+  @transient
+  protected lazy val deferedObjects =
+    argumentInspectors.map(new DeferredObjectAdapter(_)).toArray[DeferredObject]
 
-  val dataType: DataType = inspectorToDataType(returnInspector)
+  lazy val dataType: DataType = inspectorToDataType(returnInspector)
 
   override def eval(input: Row): Any = {
     returnInspector // Make sure initialized.
     var i = 0
     while (i < children.length) {
       val idx = i
-      deferedObjects(i).asInstanceOf[DeferredObjectAdapter].set(() => {children(idx).eval(input)})
+      deferedObjects(i).asInstanceOf[DeferredObjectAdapter].set(
+        () => {
+          children(idx).eval(input)
+        })
       i += 1
     }
-    unwrap(function.evaluate(deferedObjects))
+    unwrap(function.evaluate(deferedObjects), returnInspector)
   }
 }
 
-private[hive] trait HiveInspectors {
-
-  def unwrapData(data: Any, oi: ObjectInspector): Any = oi match {
-    case pi: PrimitiveObjectInspector => pi.getPrimitiveJavaObject(data)
-    case li: ListObjectInspector =>
-      Option(li.getList(data))
-        .map(_.map(unwrapData(_, li.getListElementObjectInspector)).toSeq)
-        .orNull
-    case mi: MapObjectInspector =>
-      Option(mi.getMap(data)).map(
-        _.map {
-          case (k,v) =>
-            (unwrapData(k, mi.getMapKeyObjectInspector),
-              unwrapData(v, mi.getMapValueObjectInspector))
-        }.toMap).orNull
-    case si: StructObjectInspector =>
-      val allRefs = si.getAllStructFieldRefs
-      new GenericRow(
-        allRefs.map(r =>
-          unwrapData(si.getStructFieldData(data,r), r.getFieldObjectInspector)).toArray)
-  }
+private[hive] case class HiveGenericUdaf(
+    functionClassName: String,
+    children: Seq[Expression]) extends AggregateExpression
+  with HiveInspectors
+  with HiveFunctionFactory {
 
-  /** Converts native catalyst types to the types expected by Hive */
-  def wrap(a: Any): AnyRef = a match {
-    case s: String => new hadoopIo.Text(s) // TODO why should be Text?
-    case i: Int => i: java.lang.Integer
-    case b: Boolean => b: java.lang.Boolean
-    case f: Float => f: java.lang.Float
-    case d: Double => d: java.lang.Double
-    case l: Long => l: java.lang.Long
-    case l: Short => l: java.lang.Short
-    case l: Byte => l: java.lang.Byte
-    case b: BigDecimal => b.bigDecimal
-    case b: Array[Byte] => b
-    case t: java.sql.Timestamp => t
-    case s: Seq[_] => seqAsJavaList(s.map(wrap))
-    case m: Map[_,_] =>
-      mapAsJavaMap(m.map { case (k, v) => wrap(k) -> wrap(v) })
-    case null => null
-  }
+  type UDFType = AbstractGenericUDAFResolver
 
-  def toInspector(dataType: DataType): ObjectInspector = dataType match {
-    case ArrayType(tpe) => ObjectInspectorFactory.getStandardListObjectInspector(toInspector(tpe))
-    case MapType(keyType, valueType) =>
-      ObjectInspectorFactory.getStandardMapObjectInspector(
-        toInspector(keyType), toInspector(valueType))
-    case StringType => PrimitiveObjectInspectorFactory.javaStringObjectInspector
-    case IntegerType => PrimitiveObjectInspectorFactory.javaIntObjectInspector
-    case DoubleType => PrimitiveObjectInspectorFactory.javaDoubleObjectInspector
-    case BooleanType => PrimitiveObjectInspectorFactory.javaBooleanObjectInspector
-    case LongType => PrimitiveObjectInspectorFactory.javaLongObjectInspector
-    case FloatType => PrimitiveObjectInspectorFactory.javaFloatObjectInspector
-    case ShortType => PrimitiveObjectInspectorFactory.javaShortObjectInspector
-    case ByteType => PrimitiveObjectInspectorFactory.javaByteObjectInspector
-    case NullType => PrimitiveObjectInspectorFactory.javaVoidObjectInspector
-    case BinaryType => PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector
-    case TimestampType => PrimitiveObjectInspectorFactory.javaTimestampObjectInspector
-    case DecimalType => PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector
-    case StructType(fields) =>
-      ObjectInspectorFactory.getStandardStructObjectInspector(
-        fields.map(f => f.name), fields.map(f => toInspector(f.dataType)))
-  }
+  @transient
+  protected lazy val resolver: AbstractGenericUDAFResolver = createFunction()
 
-  def inspectorToDataType(inspector: ObjectInspector): DataType = inspector match {
-    case s: StructObjectInspector =>
-      StructType(s.getAllStructFieldRefs.map(f => {
-        types.StructField(
-          f.getFieldName, inspectorToDataType(f.getFieldObjectInspector), nullable = true)
-      }))
-    case l: ListObjectInspector => ArrayType(inspectorToDataType(l.getListElementObjectInspector))
-    case m: MapObjectInspector =>
-      MapType(
-        inspectorToDataType(m.getMapKeyObjectInspector),
-        inspectorToDataType(m.getMapValueObjectInspector))
-    case _: WritableStringObjectInspector => StringType
-    case _: JavaStringObjectInspector => StringType
-    case _: WritableIntObjectInspector => IntegerType
-    case _: JavaIntObjectInspector => IntegerType
-    case _: WritableDoubleObjectInspector => DoubleType
-    case _: JavaDoubleObjectInspector => DoubleType
-    case _: WritableBooleanObjectInspector => BooleanType
-    case _: JavaBooleanObjectInspector => BooleanType
-    case _: WritableLongObjectInspector => LongType
-    case _: JavaLongObjectInspector => LongType
-    case _: WritableShortObjectInspector => ShortType
-    case _: JavaShortObjectInspector => ShortType
-    case _: WritableByteObjectInspector => ByteType
-    case _: JavaByteObjectInspector => ByteType
-    case _: WritableFloatObjectInspector => FloatType
-    case _: JavaFloatObjectInspector => FloatType
-    case _: WritableBinaryObjectInspector => BinaryType
-    case _: JavaBinaryObjectInspector => BinaryType
-    case _: WritableHiveDecimalObjectInspector => DecimalType
-    case _: JavaHiveDecimalObjectInspector => DecimalType
-    case _: WritableTimestampObjectInspector => TimestampType
-    case _: JavaTimestampObjectInspector => TimestampType
+  @transient
+  protected lazy val objectInspector  = {
+    resolver.getEvaluator(children.map(_.dataType.toTypeInfo).toArray)
+      .init(GenericUDAFEvaluator.Mode.COMPLETE, inspectors.toArray)
   }
 
-  implicit class typeInfoConversions(dt: DataType) {
-    import org.apache.hadoop.hive.serde2.typeinfo._
-    import TypeInfoFactory._
-
-    def toTypeInfo: TypeInfo = dt match {
-      case BinaryType => binaryTypeInfo
-      case BooleanType => booleanTypeInfo
-      case ByteType => byteTypeInfo
-      case DoubleType => doubleTypeInfo
-      case FloatType => floatTypeInfo
-      case IntegerType => intTypeInfo
-      case LongType => longTypeInfo
-      case ShortType => shortTypeInfo
-      case StringType => stringTypeInfo
-      case DecimalType => decimalTypeInfo
-      case TimestampType => timestampTypeInfo
-      case NullType => voidTypeInfo
-    }
-  }
+  @transient
+  protected lazy val inspectors = children.map(_.dataType).map(toInspector)
+
+  def dataType: DataType = inspectorToDataType(objectInspector)
+
+  def nullable: Boolean = true
+
+  override def toString = s"$nodeName#$functionClassName(${children.mkString(",")})"
+
+  def newInstance() = new HiveUdafFunction(functionClassName, children, this)
 }
 
-private[hive] case class HiveGenericUdaf(
-    name: String,
+/** It is used as a wrapper for the hive functions which uses UDAF interface */
+private[hive] case class HiveUdaf(
+    functionClassName: String,
     children: Seq[Expression]) extends AggregateExpression
   with HiveInspectors
   with HiveFunctionFactory {
 
-  type UDFType = AbstractGenericUDAFResolver
+  type UDFType = UDAF
 
   @transient
-  protected lazy val resolver: AbstractGenericUDAFResolver = createFunction(name)
+  protected lazy val resolver: AbstractGenericUDAFResolver = new GenericUDAFBridge(createFunction())
 
   @transient
   protected lazy val objectInspector  = {
@@ -420,11 +241,10 @@ private[hive] case class HiveGenericUdaf(
 
   def nullable: Boolean = true
 
-  def references: Set[Attribute] = children.map(_.references).flatten.toSet
-
-  override def toString = s"$nodeName#$name(${children.mkString(",")})"
+  override def toString = s"$nodeName#$functionClassName(${children.mkString(",")})"
 
-  def newInstance() = new HiveUdafFunction(name, children, this)
+  def newInstance() =
+    new HiveUdafFunction(functionClassName, children, this, true)
 }
 
 /**
@@ -439,24 +259,26 @@ private[hive] case class HiveGenericUdaf(
  * user defined aggregations, which have clean semantics even in a partitioned execution.
  */
 private[hive] case class HiveGenericUdtf(
-    name: String,
+    functionClassName: String,
     aliasNames: Seq[String],
     children: Seq[Expression])
   extends Generator with HiveInspectors with HiveFunctionFactory {
 
-  override def references = children.flatMap(_.references).toSet
-
   @transient
-  protected lazy val function: GenericUDTF = createFunction(name)
+  protected lazy val function: GenericUDTF = createFunction()
 
+  @transient
   protected lazy val inputInspectors = children.map(_.dataType).map(toInspector)
 
-  protected lazy val outputInspectors = {
-    val structInspector = function.initialize(inputInspectors.toArray)
-    structInspector.getAllStructFieldRefs.map(_.getFieldObjectInspector)
-  }
+  @transient
+  protected lazy val outputInspector = function.initialize(inputInspectors.toArray)
+
+  @transient
+  protected lazy val udtInput = new Array[AnyRef](children.length)
 
-  protected lazy val outputDataTypes = outputInspectors.map(inspectorToDataType)
+  protected lazy val outputDataTypes = outputInspector.getAllStructFieldRefs.map {
+    field => inspectorToDataType(field.getFieldObjectInspector)
+  }
 
   override protected def makeOutput() = {
     // Use column names when given, otherwise c_1, c_2, ... c_n.
@@ -474,14 +296,12 @@ private[hive] case class HiveGenericUdtf(
   }
 
   override def eval(input: Row): TraversableOnce[Row] = {
-    outputInspectors // Make sure initialized.
+    outputInspector // Make sure initialized.
 
-    val inputProjection = new Projection(children)
+    val inputProjection = new InterpretedProjection(children)
     val collector = new UDTFCollector
     function.setCollector(collector)
-
-    val udtInput = inputProjection(input).map(wrap).toArray
-    function.process(udtInput)
+    function.process(wrap(inputProjection(input), inputInspectors, udtInput))
     collector.collectRows()
   }
 
@@ -492,7 +312,7 @@ private[hive] case class HiveGenericUdtf(
       // We need to clone the input here because implementations of
       // GenericUDTF reuse the same object. Luckily they are always an array, so
       // it is easy to clone.
-      collected += new GenericRow(input.asInstanceOf[Array[_]].map(unwrap))
+      collected += unwrap(input, outputInspector).asInstanceOf[Row]
     }
 
     def collectRows() = {
@@ -502,20 +322,26 @@ private[hive] case class HiveGenericUdtf(
     }
   }
 
-  override def toString = s"$nodeName#$name(${children.mkString(",")})"
+  override def toString = s"$nodeName#$functionClassName(${children.mkString(",")})"
 }
 
 private[hive] case class HiveUdafFunction(
-    functionName: String,
+    functionClassName: String,
     exprs: Seq[Expression],
-    base: AggregateExpression)
+    base: AggregateExpression,
+    isUDAFBridgeRequired: Boolean = false)
   extends AggregateFunction
   with HiveInspectors
   with HiveFunctionFactory {
 
   def this() = this(null, null, null)
 
-  private val resolver = createFunction[AbstractGenericUDAFResolver](functionName)
+  private val resolver =
+    if (isUDAFBridgeRequired) {
+      new GenericUDAFBridge(createFunction[UDAF]())
+    } else {
+      createFunction[AbstractGenericUDAFResolver]()
+    }
 
   private val inspectors = exprs.map(_.dataType).map(toInspector).toArray
 
@@ -527,10 +353,10 @@ private[hive] case class HiveUdafFunction(
   private val buffer =
     function.getNewAggregationBuffer.asInstanceOf[GenericUDAFEvaluator.AbstractAggregationBuffer]
 
-  override def eval(input: Row): Any = unwrapData(function.evaluate(buffer), returnInspector)
+  override def eval(input: Row): Any = unwrap(function.evaluate(buffer), returnInspector)
 
   @transient
-  val inputProjection = new Projection(exprs)
+  val inputProjection = new InterpretedProjection(exprs)
 
   def update(input: Row): Unit = {
     val inputs = inputProjection(input).asInstanceOf[Seq[AnyRef]].toArray
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
new file mode 100644
index 0000000000000..cc8bb3e172c6e
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.io.IOException
+import java.text.NumberFormat
+import java.util.Date
+
+import scala.collection.mutable
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.hadoop.hive.ql.exec.{FileSinkOperator, Utilities}
+import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat}
+import org.apache.hadoop.hive.ql.plan.{PlanUtils, TableDesc}
+import org.apache.hadoop.io.Writable
+import org.apache.hadoop.mapred._
+
+import org.apache.spark.mapred.SparkHadoopMapRedUtil
+import org.apache.spark.sql.Row
+import org.apache.spark.{Logging, SerializableWritable, SparkHadoopWriter}
+import org.apache.spark.sql.hive.{ShimFileSinkDesc => FileSinkDesc}
+import org.apache.spark.sql.hive.HiveShim._
+
+/**
+ * Internal helper class that saves an RDD using a Hive OutputFormat.
+ * It is based on [[SparkHadoopWriter]].
+ */
+private[hive] class SparkHiveWriterContainer(
+    @transient jobConf: JobConf,
+    fileSinkConf: FileSinkDesc)
+  extends Logging
+  with SparkHadoopMapRedUtil
+  with Serializable {
+
+  private val now = new Date()
+  private val tableDesc: TableDesc = fileSinkConf.getTableInfo
+  // Add table properties from storage handler to jobConf, so any custom storage
+  // handler settings can be set to jobConf
+  if (tableDesc != null) {
+    PlanUtils.configureOutputJobPropertiesForStorageHandler(tableDesc)
+    Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf)
+  }
+  protected val conf = new SerializableWritable(jobConf)
+
+  private var jobID = 0
+  private var splitID = 0
+  private var attemptID = 0
+  private var jID: SerializableWritable[JobID] = null
+  private var taID: SerializableWritable[TaskAttemptID] = null
+
+  @transient private var writer: FileSinkOperator.RecordWriter = null
+  @transient protected lazy val committer = conf.value.getOutputCommitter
+  @transient protected lazy val jobContext = newJobContext(conf.value, jID.value)
+  @transient private lazy val taskContext = newTaskAttemptContext(conf.value, taID.value)
+  @transient private lazy val outputFormat =
+    conf.value.getOutputFormat.asInstanceOf[HiveOutputFormat[AnyRef,Writable]]
+
+  def driverSideSetup() {
+    setIDs(0, 0, 0)
+    setConfParams()
+    committer.setupJob(jobContext)
+  }
+
+  def executorSideSetup(jobId: Int, splitId: Int, attemptId: Int) {
+    setIDs(jobId, splitId, attemptId)
+    setConfParams()
+    committer.setupTask(taskContext)
+    initWriters()
+  }
+
+  protected def getOutputName: String = {
+    val numberFormat = NumberFormat.getInstance()
+    numberFormat.setMinimumIntegerDigits(5)
+    numberFormat.setGroupingUsed(false)
+    val extension = Utilities.getFileExtension(conf.value, fileSinkConf.getCompressed, outputFormat)
+    "part-" + numberFormat.format(splitID) + extension
+  }
+
+  def getLocalFileWriter(row: Row): FileSinkOperator.RecordWriter = writer
+
+  def close() {
+    // Seems the boolean value passed into close does not matter.
+    writer.close(false)
+    commit()
+  }
+
+  def commitJob() {
+    committer.commitJob(jobContext)
+  }
+
+  protected def initWriters() {
+    // NOTE this method is executed at the executor side.
+    // For Hive tables without partitions or with only static partitions, only 1 writer is needed.
+    writer = HiveFileFormatUtils.getHiveRecordWriter(
+      conf.value,
+      fileSinkConf.getTableInfo,
+      conf.value.getOutputValueClass.asInstanceOf[Class[Writable]],
+      fileSinkConf,
+      FileOutputFormat.getTaskOutputPath(conf.value, getOutputName),
+      Reporter.NULL)
+  }
+
+  protected def commit() {
+    if (committer.needsTaskCommit(taskContext)) {
+      try {
+        committer.commitTask(taskContext)
+        logInfo (taID + ": Committed")
+      } catch {
+        case e: IOException =>
+          logError("Error committing the output of task: " + taID.value, e)
+          committer.abortTask(taskContext)
+          throw e
+      }
+    } else {
+      logInfo("No need to commit output of task: " + taID.value)
+    }
+  }
+
+  private def setIDs(jobId: Int, splitId: Int, attemptId: Int) {
+    jobID = jobId
+    splitID = splitId
+    attemptID = attemptId
+
+    jID = new SerializableWritable[JobID](SparkHadoopWriter.createJobID(now, jobId))
+    taID = new SerializableWritable[TaskAttemptID](
+      new TaskAttemptID(new TaskID(jID.value, true, splitID), attemptID))
+  }
+
+  private def setConfParams() {
+    conf.value.set("mapred.job.id", jID.value.toString)
+    conf.value.set("mapred.tip.id", taID.value.getTaskID.toString)
+    conf.value.set("mapred.task.id", taID.value.toString)
+    conf.value.setBoolean("mapred.task.is.map", true)
+    conf.value.setInt("mapred.task.partition", splitID)
+  }
+}
+
+private[hive] object SparkHiveWriterContainer {
+  def createPathFromString(path: String, conf: JobConf): Path = {
+    if (path == null) {
+      throw new IllegalArgumentException("Output path is null")
+    }
+    val outputPath = new Path(path)
+    val fs = outputPath.getFileSystem(conf)
+    if (outputPath == null || fs == null) {
+      throw new IllegalArgumentException("Incorrectly formatted output path")
+    }
+    outputPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+  }
+}
+
+private[spark] object SparkHiveDynamicPartitionWriterContainer {
+  val SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = "mapreduce.fileoutputcommitter.marksuccessfuljobs"
+}
+
+private[spark] class SparkHiveDynamicPartitionWriterContainer(
+    @transient jobConf: JobConf,
+    fileSinkConf: FileSinkDesc,
+    dynamicPartColNames: Array[String])
+  extends SparkHiveWriterContainer(jobConf, fileSinkConf) {
+
+  import SparkHiveDynamicPartitionWriterContainer._
+
+  private val defaultPartName = jobConf.get(
+    ConfVars.DEFAULTPARTITIONNAME.varname, ConfVars.DEFAULTPARTITIONNAME.defaultVal)
+
+  @transient private var writers: mutable.HashMap[String, FileSinkOperator.RecordWriter] = _
+
+  override protected def initWriters(): Unit = {
+    // NOTE: This method is executed at the executor side.
+    // Actual writers are created for each dynamic partition on the fly.
+    writers = mutable.HashMap.empty[String, FileSinkOperator.RecordWriter]
+  }
+
+  override def close(): Unit = {
+    writers.values.foreach(_.close(false))
+    commit()
+  }
+
+  override def commitJob(): Unit = {
+    // This is a hack to avoid writing _SUCCESS mark file. In lower versions of Hadoop (e.g. 1.0.4),
+    // semantics of FileSystem.globStatus() is different from higher versions (e.g. 2.4.1) and will
+    // include _SUCCESS file when glob'ing for dynamic partition data files.
+    //
+    // Better solution is to add a step similar to what Hive FileSinkOperator.jobCloseOp does:
+    // calling something like Utilities.mvFileToFinalPath to cleanup the output directory and then
+    // load it with loadDynamicPartitions/loadPartition/loadTable.
+    val oldMarker = jobConf.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)
+    jobConf.setBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, false)
+    super.commitJob()
+    jobConf.setBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, oldMarker)
+  }
+
+  override def getLocalFileWriter(row: Row): FileSinkOperator.RecordWriter = {
+    val dynamicPartPath = dynamicPartColNames
+      .zip(row.takeRight(dynamicPartColNames.length))
+      .map { case (col, rawVal) =>
+        val string = if (rawVal == null) null else String.valueOf(rawVal)
+        s"/$col=${if (string == null || string.isEmpty) defaultPartName else string}"
+      }
+      .mkString
+
+    def newWriter = {
+      val newFileSinkDesc = new FileSinkDesc(
+        fileSinkConf.getDirName + dynamicPartPath,
+        fileSinkConf.getTableInfo,
+        fileSinkConf.getCompressed)
+      newFileSinkDesc.setCompressCodec(fileSinkConf.getCompressCodec)
+      newFileSinkDesc.setCompressType(fileSinkConf.getCompressType)
+
+      val path = {
+        val outputPath = FileOutputFormat.getOutputPath(conf.value)
+        assert(outputPath != null, "Undefined job output-path")
+        val workPath = new Path(outputPath, dynamicPartPath.stripPrefix("/"))
+        new Path(workPath, getOutputName)
+      }
+
+      HiveFileFormatUtils.getHiveRecordWriter(
+        conf.value,
+        fileSinkConf.getTableInfo,
+        conf.value.getOutputValueClass.asInstanceOf[Class[Writable]],
+        newFileSinkDesc,
+        path,
+        Reporter.NULL)
+    }
+
+    writers.getOrElseUpdate(dynamicPartPath, newWriter)
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/parquet/FakeParquetSerDe.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/parquet/FakeParquetSerDe.scala
new file mode 100644
index 0000000000000..abed299cd957f
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/parquet/FakeParquetSerDe.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.parquet
+
+import java.util.Properties
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category
+import org.apache.hadoop.hive.serde2.{SerDeStats, SerDe}
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
+import org.apache.hadoop.io.Writable
+
+/**
+ * A placeholder that allows Spark SQL users to create metastore tables that are stored as
+ * parquet files.  It is only intended to pass the checks that the serde is valid and exists
+ * when a CREATE TABLE is run.  The actual work of decoding will be done by ParquetTableScan
+ * when "spark.sql.hive.convertMetastoreParquet" is set to true.
+ */
+@deprecated("No code should depend on FakeParquetHiveSerDe as it is only intended as a " +
+            "placeholder in the Hive MetaStore")
+class FakeParquetSerDe extends SerDe {
+  override def getObjectInspector: ObjectInspector = new ObjectInspector {
+    override def getCategory: Category = Category.PRIMITIVE
+
+    override def getTypeName: String = "string"
+  }
+
+  override def deserialize(p1: Writable): AnyRef = throwError
+
+  override def initialize(p1: Configuration, p2: Properties): Unit = {}
+
+  override def getSerializedClass: Class[_ <: Writable] = throwError
+
+  override def getSerDeStats: SerDeStats = throwError
+
+  override def serialize(p1: scala.Any, p2: ObjectInspector): Writable = throwError
+
+  private def throwError =
+    sys.error(
+      "spark.sql.hive.convertMetastoreParquet must be set to true to use FakeParquetSerDe")
+}
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFIntegerToString.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFIntegerToString.java
new file mode 100644
index 0000000000000..6c4f378bc5471
--- /dev/null
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFIntegerToString.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+public class UDFIntegerToString extends UDF {
+  public String evaluate(Integer i) {
+    return i.toString();
+  }
+}
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListListInt.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListListInt.java
new file mode 100644
index 0000000000000..d2d39a8c4dc28
--- /dev/null
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListListInt.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.util.List;
+
+public class UDFListListInt extends UDF {
+  /**
+   *
+   * @param obj
+   *   SQL schema: array<struct<x: int, y: int, z: int>>
+   *   Java Type: List<List<Integer>>
+   * @return
+   */
+  public long evaluate(Object obj) {
+    if (obj == null) {
+      return 0l;
+    }
+    List<List> listList = (List<List>) obj;
+    long retVal = 0;
+    for (List aList : listList) {
+      @SuppressWarnings("unchecked")
+      List<Object> list = (List<Object>) aList;
+      @SuppressWarnings("unchecked")
+      Integer someInt = (Integer) list.get(1);
+      try {
+        retVal += (long) (someInt.intValue());
+      } catch (NullPointerException e) {
+        System.out.println(e);
+      }
+    }
+    return retVal;
+  }
+}
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListString.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListString.java
new file mode 100644
index 0000000000000..efd34df293c88
--- /dev/null
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListString.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.util.List;
+import org.apache.commons.lang.StringUtils;
+
+public class UDFListString extends UDF {
+
+  public String evaluate(Object a) {
+    if (a == null) {
+      return null;
+    }
+    @SuppressWarnings("unchecked")
+    List<Object> s = (List<Object>) a;
+
+    return StringUtils.join(s, ',');
+  }
+
+
+}
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFStringString.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFStringString.java
new file mode 100644
index 0000000000000..a369188d471e8
--- /dev/null
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFStringString.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+public class UDFStringString extends UDF {
+  public String evaluate(String s1, String s2) {
+    return s1 + " " + s2;
+  }
+}
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFTwoListList.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFTwoListList.java
new file mode 100644
index 0000000000000..0165591a7ce78
--- /dev/null
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFTwoListList.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+public class UDFTwoListList extends UDF {
+  public String evaluate(Object o1, Object o2) {
+    UDFListListInt udf = new UDFListListInt();
+
+    return String.format("%s, %s", udf.evaluate(o1), udf.evaluate(o2));
+  }
+}
diff --git a/sql/hive/src/test/resources/data/conf/hive-site.xml b/sql/hive/src/test/resources/data/conf/hive-site.xml
index 4e6ff16135833..7931d6a7128fe 100644
--- a/sql/hive/src/test/resources/data/conf/hive-site.xml
+++ b/sql/hive/src/test/resources/data/conf/hive-site.xml
@@ -19,6 +19,12 @@
 
 <configuration>
 
+<property>
+  <name>hive.in.test</name>
+  <value>true</value>
+  <description>Internal marker for test. Used for masking env-dependent values</description>
+</property>
+
 <!-- Hive Configuration can either be stored in this file or in the hadoop configuration files  -->
 <!-- that are implied by Hadoop setup variables.                                                -->
 <!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive    -->
@@ -28,7 +34,7 @@
 <!-- Hive Execution Parameters -->
 <property>
   <name>hadoop.tmp.dir</name>
-  <value>${build.dir.hive}/test/hadoop-${user.name}</value>
+  <value>${test.tmp.dir}/hadoop-tmp</value>
   <description>A base for other temporary directories.</description>
 </property>
 
@@ -42,22 +48,27 @@
 
 <property>
   <name>hive.exec.scratchdir</name>
-  <value>${build.dir}/scratchdir</value>
+  <value>${test.tmp.dir}/scratchdir</value>
   <description>Scratch space for Hive jobs</description>
 </property>
 
 <property>
   <name>hive.exec.local.scratchdir</name>
-  <value>${build.dir}/localscratchdir/</value>
+  <value>${test.tmp.dir}/localscratchdir/</value>
   <description>Local scratch space for Hive jobs</description>
 </property>
 
 <property>
   <name>javax.jdo.option.ConnectionURL</name>
-  <!-- note: variable substituion not working here because it's loaded by jdo, not Hive -->
-  <value>jdbc:derby:;databaseName=../build/test/junit_metastore_db;create=true</value>
+  <value>jdbc:derby:;databaseName=${test.tmp.dir}/junit_metastore_db;create=true</value>
 </property>
 
+<property>
+  <name>hive.stats.dbconnectionstring</name>
+  <value>jdbc:derby:;databaseName=${test.tmp.dir}/TempStatsStore;create=true</value>
+</property>
+
+
 <property>
   <name>javax.jdo.option.ConnectionDriverName</name>
   <value>org.apache.derby.jdbc.EmbeddedDriver</value>
@@ -82,7 +93,7 @@
 
 <property>
   <name>hive.metastore.metadb.dir</name>
-  <value>file://${build.dir}/test/data/metadb/</value>
+  <value>file://${test.tmp.dir}/metadb/</value>
   <description>
   Required by metastore server or if the uris argument below is not supplied
   </description>
@@ -90,32 +101,19 @@
 
 <property>
   <name>test.log.dir</name>
-  <value>${build.dir}/test/logs</value>
-  <description></description>
-</property>
-
-<property>
-  <name>test.src.dir</name>
-  <value>file://${build.dir}/src/test</value>
+  <value>${test.tmp.dir}/log/</value>
   <description></description>
 </property>
 
 <property>
   <name>test.data.files</name>
-  <value>${user.dir}/../data/files</value>
-  <description></description>
-</property>
-
-<property>
-  <name>test.query.file1</name>
-  <value>file://${user.dir}/../ql/src/test/org/apache/hadoop/hive/ql/input2.q</value>
-  <value></value>
+  <value>${hive.root}/data/files</value>
   <description></description>
 </property>
 
 <property>
   <name>hive.jar.path</name>
-  <value>${build.dir.hive}/ql/hive-exec-${version}.jar</value>
+  <value>${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar</value>
   <description></description>
 </property>
 
@@ -127,7 +125,7 @@
 
 <property>
   <name>hive.querylog.location</name>
-  <value>${build.dir}/tmp</value>
+  <value>${test.tmp.dir}/tmp</value>
   <description>Location of the structured hive logs</description>
 </property>
 
@@ -143,18 +141,25 @@
   <description>Post Execute Hook for Tests</description>
 </property>
 
-<property>
-  <name>hive.task.progress</name>
-  <value>false</value>
-  <description>Track progress of a task</description>
-</property>
-
 <property>
   <name>hive.support.concurrency</name>
   <value>true</value>
   <description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description>
 </property>
 
+<property>
+  <key>hive.unlock.numretries</key>
+  <value>2</value>
+  <description>The number of times you want to retry to do one unlock</description>
+</property>
+
+<property>
+  <key>hive.lock.sleep.between.retries</key>
+  <value>2</value>
+  <description>The sleep time (in seconds) between various retries</description>
+</property>
+
+
 <property>
   <name>fs.pfile.impl</name>
   <value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
@@ -194,4 +199,21 @@
   <description>The default SerDe hive will use for the rcfile format</description>
 </property>
 
+<property>
+  <name>hive.stats.dbclass</name>
+  <value>jdbc:derby</value>
+  <description>The storage for temporary stats generated by tasks. Currently, jdbc, hbase and counter types are supported</description>
+</property>
+
+<property>
+  <name>hive.stats.key.prefix.reserve.length</name>
+  <value>0</value>
+</property>
+
+<property>
+  <name>hive.conf.restricted.list</name>
+  <value>dummy.config.value</value>
+  <description>Using dummy config value above because you cannot override config with empty value</description>
+</property>
+
 </configuration>
diff --git a/sql/hive/src/test/resources/data/files/ProxyAuth.res b/sql/hive/src/test/resources/data/files/ProxyAuth.res
new file mode 100644
index 0000000000000..96eca8f61de33
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/ProxyAuth.res
@@ -0,0 +1,15 @@
++-----+-------+
+| id  | name  |
++-----+-------+
+| 1   | aaa   |
+| 2   | bbb   |
+| 3   | ccc   |
+| 4   | ddd   |
+| 5   | eee   |
++-----+-------+
++-------+-----+
+| name  | id  |
++-------+-----+
+| aaa   | 1   |
+| bbb   | 2   |
++-------+-----+
diff --git a/sql/hive/src/test/resources/data/files/alltypes.txt b/sql/hive/src/test/resources/data/files/alltypes.txt
new file mode 100644
index 0000000000000..358cf400ec815
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/alltypes.txt
@@ -0,0 +1,2 @@
+true|10|100|1000|10000|4.0|20.0|2.2222|1969-12-31 15:59:58.174|1970-01-01 00:00:00|hello|hello|k1:v1,k2:v2|100,200|{10, "foo"}
+true|20|200|2000|20000|8.0|40.0|4.2222|1970-12-31 15:59:58.174|1971-01-01 00:00:00|||k3:v3,k4:v4|200,300|{20, "bar"}
diff --git a/sql/hive/src/test/resources/data/files/alltypes2.txt b/sql/hive/src/test/resources/data/files/alltypes2.txt
new file mode 100644
index 0000000000000..c6a05a1dad893
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/alltypes2.txt
@@ -0,0 +1,2 @@
+true|10|100|1000|10000|4.0|20.0|4.2222|1969-12-31 15:59:58.174|1970-01-01|string|hello|hello|k1:v1,k2:v2|100,200|{10, "foo"}
+false|20|200|2000|20000|8.0|40.0|2.2222|1970-12-31 15:59:58.174|1971-01-01|abcd|world|world|k3:v3,k4:v4|200,300|{20, "bar"}
diff --git a/sql/hive/src/test/resources/data/files/alltypesorc b/sql/hive/src/test/resources/data/files/alltypesorc
new file mode 100644
index 0000000000000..95c68cabd15e0
Binary files /dev/null and b/sql/hive/src/test/resources/data/files/alltypesorc differ
diff --git a/sql/hive/src/test/resources/data/files/char_varchar_udf.txt b/sql/hive/src/test/resources/data/files/char_varchar_udf.txt
new file mode 100644
index 0000000000000..570078d90e65b
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/char_varchar_udf.txt
@@ -0,0 +1 @@
+47.3224	2923.29346
diff --git a/sql/hive/src/test/resources/data/files/datatypes.txt b/sql/hive/src/test/resources/data/files/datatypes.txt
index 10daa1b11c26b..0228a27e927bb 100644
--- a/sql/hive/src/test/resources/data/files/datatypes.txt
+++ b/sql/hive/src/test/resources/data/files/datatypes.txt
@@ -1,3 +1,3 @@
-\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N
--1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N\N
-1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.0123456YWJjZA==2013-01-01abc123
+\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N
+-1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N\N\N
+1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.0123456YWJjZA==2013-01-01abc123abc123X'01FF'
diff --git a/sql/hive/src/test/resources/data/files/decimal.txt b/sql/hive/src/test/resources/data/files/decimal.txt
new file mode 100644
index 0000000000000..28800f5278b10
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/decimal.txt
@@ -0,0 +1,18 @@
+55.33
+44.2
+435.33
+324.33
+324.33
+44.2
+55.3
+55.3
+0.0
+
+66.4
+23.22
+-87.2
+
+33.44
+55.3
+435.331
+-0.342
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/dept.txt b/sql/hive/src/test/resources/data/files/dept.txt
new file mode 100644
index 0000000000000..292bee6ee0370
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/dept.txt
@@ -0,0 +1,4 @@
+31|sales
+33|engineering
+34|clerical
+35|marketing
diff --git a/sql/hive/src/test/resources/data/files/emp.txt b/sql/hive/src/test/resources/data/files/emp.txt
new file mode 100644
index 0000000000000..a0e76b90e57dc
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/emp.txt
@@ -0,0 +1,6 @@
+Rafferty|31
+Jones|33
+Steinberg|33
+Robinson|34
+Smith|34
+John|
diff --git a/sql/hive/src/test/resources/data/files/exported_table/_metadata b/sql/hive/src/test/resources/data/files/exported_table/_metadata
new file mode 100644
index 0000000000000..81fbf63a54980
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/exported_table/_metadata
@@ -0,0 +1 @@
+{"partitions":[],"table":"{\"1\":{\"str\":\"j1_41\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"johndee\"},\"4\":{\"i32\":1371900915},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",2,{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"string\"}},{\"1\":{\"str\":\"b\"},\"2\":{\"str\":\"int\"}}]},\"2\":{\"str\":\"hdfs://hivebase01:8020/user/hive/warehouse/j1_41\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"serialization.format\":\",\",\"field.delim\":\",\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",1,{\"transient_lastDdlTime\":\"1371900931\"}]},\"12\":{\"str\":\"MANAGED_TABLE\"}}","version":"0.1"}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/exported_table/data/data b/sql/hive/src/test/resources/data/files/exported_table/data/data
new file mode 100644
index 0000000000000..40a75acfa0016
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/exported_table/data/data
@@ -0,0 +1,2 @@
+johndee,1
+burks,2
diff --git a/sql/hive/src/test/resources/data/files/ext_test_space/folder+with space/data.txt b/sql/hive/src/test/resources/data/files/ext_test_space/folder+with space/data.txt
new file mode 100644
index 0000000000000..6a3906944cbd1
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/ext_test_space/folder+with space/data.txt	
@@ -0,0 +1,3 @@
+12	jason
+13	steven
+15	joe
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/futurama_episodes.avro b/sql/hive/src/test/resources/data/files/futurama_episodes.avro
new file mode 100644
index 0000000000000..c08b97b46e3f1
Binary files /dev/null and b/sql/hive/src/test/resources/data/files/futurama_episodes.avro differ
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_1/0001.txt b/sql/hive/src/test/resources/data/files/header_footer_table_1/0001.txt
new file mode 100644
index 0000000000000..c242b42b6070b
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_1/0001.txt
@@ -0,0 +1,8 @@
+name	message	0
+steven	hive	1
+dave	oozie	2
+xifa	phd	3
+chuan	hadoop	4
+shanyu	senior	5
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_1/0002.txt b/sql/hive/src/test/resources/data/files/header_footer_table_1/0002.txt
new file mode 100644
index 0000000000000..d5db38d168be7
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_1/0002.txt
@@ -0,0 +1,8 @@
+name	message	0
+steven2	hive	11
+dave2	oozie	12
+xifa2	phd	13
+chuan2	hadoop	14
+shanyu2	senior	15
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_1/0003.txt b/sql/hive/src/test/resources/data/files/header_footer_table_1/0003.txt
new file mode 100644
index 0000000000000..f7a763d8b9638
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_1/0003.txt
@@ -0,0 +1,4 @@
+name	message	0
+david3	oozie	22
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/01/0001.txt b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/01/0001.txt
new file mode 100644
index 0000000000000..c242b42b6070b
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/01/0001.txt
@@ -0,0 +1,8 @@
+name	message	0
+steven	hive	1
+dave	oozie	2
+xifa	phd	3
+chuan	hadoop	4
+shanyu	senior	5
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/02/0002.txt b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/02/0002.txt
new file mode 100644
index 0000000000000..d5db38d168be7
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/02/0002.txt
@@ -0,0 +1,8 @@
+name	message	0
+steven2	hive	11
+dave2	oozie	12
+xifa2	phd	13
+chuan2	hadoop	14
+shanyu2	senior	15
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/03/0003.txt b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/03/0003.txt
new file mode 100644
index 0000000000000..f7a763d8b9638
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/03/0003.txt
@@ -0,0 +1,4 @@
+name	message	0
+david3	oozie	22
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter3-1-75be487df30e301e156a22eee075633d b/sql/hive/src/test/resources/data/files/header_footer_table_3/empty1.txt
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter3-1-75be487df30e301e156a22eee075633d
rename to sql/hive/src/test/resources/data/files/header_footer_table_3/empty1.txt
diff --git a/sql/hive/src/test/resources/golden/alter3-21-231db1adbff5fc90e57cca6a087f3df5 b/sql/hive/src/test/resources/data/files/header_footer_table_3/empty2.txt
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter3-21-231db1adbff5fc90e57cca6a087f3df5
rename to sql/hive/src/test/resources/data/files/header_footer_table_3/empty2.txt
diff --git a/sql/hive/src/test/resources/data/files/input.txt b/sql/hive/src/test/resources/data/files/input.txt
new file mode 100644
index 0000000000000..caea9919d1d7e
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/input.txt
@@ -0,0 +1,7 @@
+a	b	c	d	e	f	g
+a	b	c	d	e	f	g
+a	b	c	d	e	f	g
+			d	e	f	g
+a	b	c	d			
+a				e	f	g
+a			d			g
diff --git a/sql/hive/src/test/resources/data/files/issue-4077-data.txt b/sql/hive/src/test/resources/data/files/issue-4077-data.txt
new file mode 100644
index 0000000000000..18067b0a64c9c
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/issue-4077-data.txt
@@ -0,0 +1,2 @@
+2014-12-11 00:00:00,1
+2014-12-11astring00:00:00,2
diff --git a/sql/hive/src/test/resources/data/files/keystore.jks b/sql/hive/src/test/resources/data/files/keystore.jks
new file mode 100644
index 0000000000000..469d8a543a4d2
Binary files /dev/null and b/sql/hive/src/test/resources/data/files/keystore.jks differ
diff --git a/sql/hive/src/test/resources/data/files/kv9.txt b/sql/hive/src/test/resources/data/files/kv9.txt
new file mode 100644
index 0000000000000..b72475f21bff0
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/kv9.txt
@@ -0,0 +1,27 @@
+-4400 4400
+1E+99 0
+1E-99 0
+0 0
+10 10
+23232.23435 2
+2389432.23752 3
+2389432.2375 4
+10.73433 5
+0.333 0
+-0.3 0
+-0.333 0
+1.0 1
+2 2
+3.14 3
+-1.12 -1
+-1.122 -11
+1.12 1
+1.122 1
+124.00 124
+125.2 125
+-1255.49 -1255
+3.14 3
+3.140 4
+0.9999999999999999999999999 1
+-1234567890.1234567890 -1234567890
+1234567890.1234567800 1234567890
diff --git a/sql/hive/src/test/resources/data/files/loc.txt b/sql/hive/src/test/resources/data/files/loc.txt
new file mode 100644
index 0000000000000..69910b7649571
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/loc.txt
@@ -0,0 +1,8 @@
+OH|31|43201|2001
+IO|32|43202|2001
+CA|35|43809|2001
+FL|33|54342|2001
+UT|35||2001
+CA|35|43809|2001
+|34|40000|
+FL|33|54342|2001
diff --git a/sql/hive/src/test/resources/data/files/non_ascii_tbl.txt b/sql/hive/src/test/resources/data/files/non_ascii_tbl.txt
new file mode 100644
index 0000000000000..41586d61eda03
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/non_ascii_tbl.txt
@@ -0,0 +1 @@
+1|Garçu Kôkaku kidôtai
diff --git a/sql/hive/src/test/resources/data/files/orc_create_people.txt b/sql/hive/src/test/resources/data/files/orc_create_people.txt
index 884598981a13c..ab93c1400769a 100644
--- a/sql/hive/src/test/resources/data/files/orc_create_people.txt
+++ b/sql/hive/src/test/resources/data/files/orc_create_people.txt
@@ -1,100 +1,100 @@
-1CelesteBrowning959-3763 Nec, Av.Ca
-2RisaYangP.O. Box 292, 8229 Porttitor RoadOr
-3VenusSuttonAp #962-8021 Egestas Rd.Ca
-4GretchenHarrisonP.O. Box 636, 8734 Magna AvenueOr
-5LaniIrwinAp #441-5911 Iaculis, AveCa
-6VeraGeorge409-1555 Vel, AveOr
-7JessicaMalone286-9779 Aliquam RoadCa
-8AnnChapmanAp #504-3915 Placerat RoadOr
-9NigelBartlettAp #185-385 Diam StreetCa
-10AzaliaJennings5772 Diam St.Or
-11PrestonCannonAp #527-8769 Nunc AvenueCa
-12AllistairVasquez2562 Odio. St.Or
-13ReedHayes5190 Elit StreetCa
-14ElaineBarronP.O. Box 840, 8860 Sodales Av.Or
-15LydiaHoodP.O. Box 698, 5666 Semper RoadCa
-16VanceMaxwell298-3313 Malesuada RoadOr
-17KeikoDeleonP.O. Box 732, 5921 Massa. Av.Ca
-18DolanKaneAp #906-3606 Ut Rd.Or
-19MerrittPerkinsP.O. Box 228, 7090 Egestas StreetCa
-20CaseySalazar506-5065 Ut St.Or
-21SamsonNoel1370 Ultrices, RoadCa
-22ByronWalkerP.O. Box 386, 8324 Tellus AveOr
-23PiperSingletonAp #500-3561 Primis St.Ca
-24RiaMckinney3080 Dui Rd.Or
-25RahimStanley559-9016 Nascetur StreetCa
-26ChloeSteeleP.O. Box 766, 1628 Elit StreetOr
-27PalomaWardAp #390-3042 Ipsum Rd.Ca
-28RoaryShermanAp #409-6549 Metus St.Or
-29CalvinBuckner6378 Diam AvenueCa
-30CamilleGoodAp #113-8659 Suspendisse St.Or
-31SteelAyala5518 Justo St.Ca
-32JosiahGilbertAp #149-6651 At, Av.Or
-33HamiltonCruz4620 Tellus. AveCa
-34ScarletSantos586-1785 Velit. Av.Or
-35LewisMcintyre629-6419 Ac Rd.Ca
-36ArsenioMejiaP.O. Box 767, 8625 Justo Rd.Or
-37VelmaHaley1377 At Rd.Ca
-38TatumJennings829-7432 Posuere, RoadOr
-39BritanniEaton8811 Morbi StreetCa
-40AileenJacobsonP.O. Box 469, 2266 Dui, Rd.Or
-41KareemAyala2706 Ridiculus StreetCa
-42MaiteRush7592 Neque RoadOr
-43SigneVelasquezAp #868-3039 Eget St.Ca
-44ZoritaCamachoP.O. Box 651, 3340 Quis Av.Or
-45GlennaCurtis953-7965 Enim AveCa
-46QuinCortez4898 Ridiculus St.Or
-47TalonDaltonP.O. Box 408, 7597 Integer Rd.Ca
-48DarrylBlankenshipP.O. Box 771, 1471 Non Rd.Or
-49VernonReyesP.O. Box 971, 7009 Vulputate StreetCa
-50TallulahHeathP.O. Box 865, 3697 Dis AveOr
-51CiaranOlson2721 Et St.Ca
-52OrlandoWittP.O. Box 717, 1102 Nulla. Rd.Or
-53QuinnRiceAp #647-6627 Tristique AvenueCa
-54WyattPickettAp #128-3130 Vel, Rd.Or
-55EmeraldCopeland857-5119 Turpis Rd.Ca
-56JonasQuinnAp #441-7183 Ligula. StreetOr
-57WillaBerg6672 Velit AveCa
-58MalikLee998-9208 In StreetOr
-59CallieMedina1620 Dui. Rd.Ca
-60LukeMasonP.O. Box 143, 2070 Augue Rd.Or
-61ShafiraEstrada8824 Ante StreetCa
-62ElizabethRutledge315-6510 Sit St.Or
-63PandoraLevine357-3596 Nibh. AveCa
-64HilelPrince845-1229 Sociosqu Rd.Or
-65RinahTorresAp #492-9328 At St.Ca
-66YaelHobbsP.O. Box 477, 3896 In StreetOr
-67NevadaNashP.O. Box 251, 1914 Tincidunt RoadCa
-68MarnyHuffP.O. Box 818, 6086 Ultricies St.Or
-69KimberleyMilesAp #893-3685 In RoadCa
-70DuncanFullerAp #197-5216 Iaculis StreetOr
-71YardleyLeblancP.O. Box 938, 1278 Sit AveCa
-72HamishBrewerAp #854-781 Quisque St.Or
-73PetraMoon453-6609 Curabitur StreetCa
-74ReeseEstradaAp #382-3313 Malesuada St.Or
-75GageHiggins7443 Eu StreetCa
-76ZacheryCamachoAp #795-4143 Quam. St.Or
-77KellyGarnerP.O. Box 895, 2843 Cras Rd.Ca
-78HanaeCarr9440 Amet St.Or
-79AnnAlston884-7948 Dictum RoadCa
-80ChancellorCobbP.O. Box 889, 5978 Ac AvenueOr
-81DorothyHarrell6974 Tristique AveCa
-82VaughanLeon1610 Luctus Av.Or
-83WynneJimenez321-9171 Felis. AvenueCa
-84WillaMendoza489-182 Sed Av.Or
-85CamdenGoodwin4579 Ante St.Ca
-86IfeomaFrenchP.O. Box 160, 8769 Integer RoadOr
-87RamonaStrong1666 Ridiculus AvenueCa
-88BrettRamosAp #579-9879 Et, RoadOr
-89UllaGray595-7066 Malesuada RoadCa
-90KevynMccallP.O. Box 968, 1420 Aenean AvenueOr
-91GenevieveWilkins908 Turpis. StreetCa
-92ThaneOneil6766 Lectus St.Or
-93MarikoClineP.O. Box 329, 5375 Ac St.Ca
-94LaelMclean500-7010 Sit St.Or
-95WinifredHopperAp #140-8982 Velit AvenueCa
-96RafaelEnglandP.O. Box 405, 7857 Eget Av.Or
-97DanaCarter814-601 Purus. Av.Ca
-98JulietBattleAp #535-1965 Cursus St.Or
-99WynterVincent626-8492 Mollis AvenueCa
-100WangMitchell4023 Lacinia. AveOr
+1CelesteBrowning959-3763 Nec, Av.100.002011-03-12 15:20:00Ca
+2RisaYangP.O. Box 292, 8229 Porttitor Road200.002011-03-12 15:20:00Or
+3VenusSuttonAp #962-8021 Egestas Rd.300.002011-03-12 15:20:00Ca
+4GretchenHarrisonP.O. Box 636, 8734 Magna Avenue400.002011-03-12 15:20:00Or
+5LaniIrwinAp #441-5911 Iaculis, Ave500.002011-03-12 15:20:00Ca
+6VeraGeorge409-1555 Vel, Ave600.002011-03-12 15:20:00Or
+7JessicaMalone286-9779 Aliquam Road700.002011-03-12 15:20:00Ca
+8AnnChapmanAp #504-3915 Placerat Road800.002011-03-12 15:20:00Or
+9NigelBartlettAp #185-385 Diam Street900.002011-03-12 15:20:00Ca
+10AzaliaJennings5772 Diam St.100.002011-03-12 15:20:00Or
+11PrestonCannonAp #527-8769 Nunc Avenue100.002011-03-12 15:20:00Ca
+12AllistairVasquez2562 Odio. St.100.002011-03-12 15:20:00Or
+13ReedHayes5190 Elit Street100.002011-03-12 15:20:00Ca
+14ElaineBarronP.O. Box 840, 8860 Sodales Av.100.002011-03-12 15:20:00Or
+15LydiaHoodP.O. Box 698, 5666 Semper Road100.002011-03-12 15:20:00Ca
+16VanceMaxwell298-3313 Malesuada Road100.002011-03-12 15:20:00Or
+17KeikoDeleonP.O. Box 732, 5921 Massa. Av.100.002011-03-12 15:20:00Ca
+18DolanKaneAp #906-3606 Ut Rd.100.002011-03-12 15:20:00Or
+19MerrittPerkinsP.O. Box 228, 7090 Egestas Street100.002011-03-12 15:20:00Ca
+20CaseySalazar506-5065 Ut St.200.002011-03-12 15:20:00Or
+21SamsonNoel1370 Ultrices, Road200.002012-03-12 15:20:00Ca
+22ByronWalkerP.O. Box 386, 8324 Tellus Ave200.002012-03-12 15:20:00Or
+23PiperSingletonAp #500-3561 Primis St.200.002012-03-12 15:20:00Ca
+24RiaMckinney3080 Dui Rd.200.002012-03-12 15:20:00Or
+25RahimStanley559-9016 Nascetur Street200.002012-03-12 15:20:00Ca
+26ChloeSteeleP.O. Box 766, 1628 Elit Street200.002012-03-12 15:20:00Or
+27PalomaWardAp #390-3042 Ipsum Rd.200.002012-03-12 15:20:00Ca
+28RoaryShermanAp #409-6549 Metus St.200.002012-03-12 15:20:00Or
+29CalvinBuckner6378 Diam Avenue200.002012-03-12 15:20:00Ca
+30CamilleGoodAp #113-8659 Suspendisse St.300.002012-03-12 15:20:00Or
+31SteelAyala5518 Justo St.300.002012-03-12 15:20:00Ca
+32JosiahGilbertAp #149-6651 At, Av.300.002012-03-12 15:20:00Or
+33HamiltonCruz4620 Tellus. Ave300.002012-03-12 15:20:00Ca
+34ScarletSantos586-1785 Velit. Av.300.002012-03-12 15:20:00Or
+35LewisMcintyre629-6419 Ac Rd.300.002012-03-12 15:20:00Ca
+36ArsenioMejiaP.O. Box 767, 8625 Justo Rd.300.002012-03-12 15:20:00Or
+37VelmaHaley1377 At Rd.300.002012-03-12 15:20:00Ca
+38TatumJennings829-7432 Posuere, Road300.002012-03-12 15:20:00Or
+39BritanniEaton8811 Morbi Street300.002012-03-12 15:20:00Ca
+40AileenJacobsonP.O. Box 469, 2266 Dui, Rd.400.002012-03-12 15:20:00Or
+41KareemAyala2706 Ridiculus Street400.002013-03-12 15:20:00Ca
+42MaiteRush7592 Neque Road400.002013-03-12 15:20:00Or
+43SigneVelasquezAp #868-3039 Eget St.400.002013-03-12 15:20:00Ca
+44ZoritaCamachoP.O. Box 651, 3340 Quis Av.400.002013-03-12 15:20:00Or
+45GlennaCurtis953-7965 Enim Ave400.002013-03-12 15:20:00Ca
+46QuinCortez4898 Ridiculus St.400.002013-03-12 15:20:00Or
+47TalonDaltonP.O. Box 408, 7597 Integer Rd.400.002013-03-12 15:20:00Ca
+48DarrylBlankenshipP.O. Box 771, 1471 Non Rd.400.002013-03-12 15:20:00Or
+49VernonReyesP.O. Box 971, 7009 Vulputate Street400.002013-03-12 15:20:00Ca
+50TallulahHeathP.O. Box 865, 3697 Dis Ave500.002013-03-12 15:20:00Or
+51CiaranOlson2721 Et St.500.002013-03-12 15:20:00Ca
+52OrlandoWittP.O. Box 717, 1102 Nulla. Rd.500.002013-03-12 15:20:00Or
+53QuinnRiceAp #647-6627 Tristique Avenue500.002013-03-12 15:20:00Ca
+54WyattPickettAp #128-3130 Vel, Rd.500.002013-03-12 15:20:00Or
+55EmeraldCopeland857-5119 Turpis Rd.500.002013-03-12 15:20:00Ca
+56JonasQuinnAp #441-7183 Ligula. Street500.002013-03-12 15:20:00Or
+57WillaBerg6672 Velit Ave500.002013-03-12 15:20:00Ca
+58MalikLee998-9208 In Street500.002013-03-12 15:20:00Or
+59CallieMedina1620 Dui. Rd.500.002013-03-12 15:20:00Ca
+60LukeMasonP.O. Box 143, 2070 Augue Rd.600.002013-03-12 15:20:00Or
+61ShafiraEstrada8824 Ante Street600.002014-03-12 15:20:00Ca
+62ElizabethRutledge315-6510 Sit St.600.002014-03-12 15:20:00Or
+63PandoraLevine357-3596 Nibh. Ave600.002014-03-12 15:20:00Ca
+64HilelPrince845-1229 Sociosqu Rd.600.002014-03-12 15:20:00Or
+65RinahTorresAp #492-9328 At St.600.002014-03-12 15:20:00Ca
+66YaelHobbsP.O. Box 477, 3896 In Street600.002014-03-12 15:20:00Or
+67NevadaNashP.O. Box 251, 1914 Tincidunt Road600.002014-03-12 15:20:00Ca
+68MarnyHuffP.O. Box 818, 6086 Ultricies St.600.002014-03-12 15:20:00Or
+69KimberleyMilesAp #893-3685 In Road600.002014-03-12 15:20:00Ca
+70DuncanFullerAp #197-5216 Iaculis Street700.002014-03-12 15:20:00Or
+71YardleyLeblancP.O. Box 938, 1278 Sit Ave700.002014-03-12 15:20:00Ca
+72HamishBrewerAp #854-781 Quisque St.700.002014-03-12 15:20:00Or
+73PetraMoon453-6609 Curabitur Street700.002014-03-12 15:20:00Ca
+74ReeseEstradaAp #382-3313 Malesuada St.700.002014-03-12 15:20:00Or
+75GageHiggins7443 Eu Street700.002014-03-12 15:20:00Ca
+76ZacheryCamachoAp #795-4143 Quam. St.700.002014-03-12 15:20:00Or
+77KellyGarnerP.O. Box 895, 2843 Cras Rd.700.002014-03-12 15:20:00Ca
+78HanaeCarr9440 Amet St.700.002014-03-12 15:20:00Or
+79AnnAlston884-7948 Dictum Road700.002014-03-12 15:20:00Ca
+80ChancellorCobbP.O. Box 889, 5978 Ac Avenue800.002014-03-12 15:20:00Or
+81DorothyHarrell6974 Tristique Ave800.002010-03-12 15:20:00Ca
+82VaughanLeon1610 Luctus Av.800.002010-03-12 15:20:00Or
+83WynneJimenez321-9171 Felis. Avenue800.002010-03-12 15:20:00Ca
+84WillaMendoza489-182 Sed Av.800.002010-03-12 15:20:00Or
+85CamdenGoodwin4579 Ante St.800.002010-03-12 15:20:00Ca
+86IfeomaFrenchP.O. Box 160, 8769 Integer Road800.002010-03-12 15:20:00Or
+87RamonaStrong1666 Ridiculus Avenue800.002010-03-12 15:20:00Ca
+88BrettRamosAp #579-9879 Et, Road800.002010-03-12 15:20:00Or
+89UllaGray595-7066 Malesuada Road800.002010-03-12 15:20:00Ca
+90KevynMccallP.O. Box 968, 1420 Aenean Avenue900.002010-03-12 15:20:00Or
+91GenevieveWilkins908 Turpis. Street900.002010-03-12 15:20:00Ca
+92ThaneOneil6766 Lectus St.900.002010-03-12 15:20:00Or
+93MarikoClineP.O. Box 329, 5375 Ac St.900.002010-03-12 15:20:00Ca
+94LaelMclean500-7010 Sit St.900.002010-03-12 15:20:00Or
+95WinifredHopperAp #140-8982 Velit Avenue900.002010-03-12 15:20:00Ca
+96RafaelEnglandP.O. Box 405, 7857 Eget Av.900.002010-03-12 15:20:00Or
+97DanaCarter814-601 Purus. Av.900.002010-03-12 15:20:00Ca
+98JulietBattleAp #535-1965 Cursus St.900.002010-03-12 15:20:00Or
+99WynterVincent626-8492 Mollis Avenue900.002010-03-12 15:20:00Ca
+100WangMitchell4023 Lacinia. Ave100.002010-03-12 15:20:00Or
diff --git a/sql/hive/src/test/resources/data/files/orc_split_elim.orc b/sql/hive/src/test/resources/data/files/orc_split_elim.orc
new file mode 100644
index 0000000000000..cd145d3431049
Binary files /dev/null and b/sql/hive/src/test/resources/data/files/orc_split_elim.orc differ
diff --git a/sql/hive/src/test/resources/data/files/parquet_create.txt b/sql/hive/src/test/resources/data/files/parquet_create.txt
new file mode 100644
index 0000000000000..ccd48ee37a61b
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/parquet_create.txt
@@ -0,0 +1,3 @@
+1|foo line1|key11:value11,key12:value12,key13:value13|a,b,c|one,two
+2|bar line2|key21:value21,key22:value22,key23:value23|d,e,f|three,four
+3|baz line3|key31:value31,key32:value32,key33:value33|g,h,i|five,six
diff --git a/sql/hive/src/test/resources/data/files/parquet_partitioned.txt b/sql/hive/src/test/resources/data/files/parquet_partitioned.txt
new file mode 100644
index 0000000000000..8f322f3bf385c
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/parquet_partitioned.txt
@@ -0,0 +1,3 @@
+1|foo|part1
+2|bar|part2
+3|baz|part2
diff --git a/sql/hive/src/test/resources/data/files/parquet_types.txt b/sql/hive/src/test/resources/data/files/parquet_types.txt
new file mode 100644
index 0000000000000..0be390b38cea1
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/parquet_types.txt
@@ -0,0 +1,21 @@
+100|1|1|1.0|0.0|abc
+101|2|2|1.1|0.3|def
+102|3|3|1.2|0.6|ghi
+103|1|4|1.3|0.9|jkl
+104|2|5|1.4|1.2|mno
+105|3|1|1.0|1.5|pqr
+106|1|2|1.1|1.8|stu
+107|2|3|1.2|2.1|vwx
+108|3|4|1.3|2.4|yza
+109|1|5|1.4|2.7|bcd
+110|2|1|1.0|3.0|efg
+111|3|2|1.1|3.3|hij
+112|1|3|1.2|3.6|klm
+113|2|4|1.3|3.9|nop
+114|3|5|1.4|4.2|qrs
+115|1|1|1.0|4.5|tuv
+116|2|2|1.1|4.8|wxy
+117|3|3|1.2|5.1|zab
+118|1|4|1.3|5.4|cde
+119|2|5|1.4|5.7|fgh
+120|3|1|1.0|6.0|ijk
diff --git a/sql/hive/src/test/resources/data/files/person age.txt b/sql/hive/src/test/resources/data/files/person age.txt
index c902284c11155..f63d2410885a3 100644
--- a/sql/hive/src/test/resources/data/files/person age.txt	
+++ b/sql/hive/src/test/resources/data/files/person age.txt	
@@ -1,5 +1,5 @@
-John	23
-Tom	17
-Jim	31
-Boby	9
-Paul	51
\ No newline at end of file
+John23
+Tom17
+Jim31
+Boby9
+Paul51
diff --git a/sql/hive/src/test/resources/data/files/person+age.txt b/sql/hive/src/test/resources/data/files/person+age.txt
new file mode 100644
index 0000000000000..9d2fa32342d53
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/person+age.txt
@@ -0,0 +1,3 @@
+Sean29
+Tim47
+Pooh21
diff --git a/sql/hive/src/test/resources/data/files/posexplode_data.txt b/sql/hive/src/test/resources/data/files/posexplode_data.txt
new file mode 100644
index 0000000000000..d04778becc38f
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/posexplode_data.txt
@@ -0,0 +1,4 @@
+John Doe100000.0Mary SmithTodd JonesFederal Taxes.2State Taxes.05Insurance.11 Michigan Ave.ChicagoIL60600
+Mary Smith80000.0Jeremy KingFederal Taxes.2State Taxes. 05Insurance.1100 Ontario St.ChicagoIL60601
+Todd Jones70000.0Federal Taxes.15State Taxes.03Insurance. 1200 Chicago Ave.Oak ParkIL60700
+Jeremy King60000.0Federal Taxes.15State Taxes.03Insurance. 1300 Obscure Dr.ObscuriaIL60100
diff --git a/sql/hive/src/test/resources/data/files/sample.json b/sql/hive/src/test/resources/data/files/sample.json
new file mode 100644
index 0000000000000..7b749791ef419
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/sample.json
@@ -0,0 +1 @@
+{"a" : "2" ,"b" : "blah"}
diff --git a/sql/hive/src/test/resources/data/files/symlink1.txt b/sql/hive/src/test/resources/data/files/symlink1.txt
index dc1a7c5682a26..91d734607ea35 100644
--- a/sql/hive/src/test/resources/data/files/symlink1.txt
+++ b/sql/hive/src/test/resources/data/files/symlink1.txt
@@ -1,2 +1,2 @@
-../data/files/T1.txt
-../data/files/T3.txt
+../../data/files/T1.txt
+../../data/files/T3.txt
diff --git a/sql/hive/src/test/resources/data/files/symlink2.txt b/sql/hive/src/test/resources/data/files/symlink2.txt
index 8436a30adf366..487b05e941d46 100644
--- a/sql/hive/src/test/resources/data/files/symlink2.txt
+++ b/sql/hive/src/test/resources/data/files/symlink2.txt
@@ -1 +1 @@
-../data/files/T2.txt
+../../data/files/T2.txt
diff --git a/sql/hive/src/test/resources/data/files/truststore.jks b/sql/hive/src/test/resources/data/files/truststore.jks
new file mode 100644
index 0000000000000..9c5d703fba6c8
Binary files /dev/null and b/sql/hive/src/test/resources/data/files/truststore.jks differ
diff --git a/sql/hive/src/test/resources/data/scripts/input20_script.py b/sql/hive/src/test/resources/data/scripts/input20_script.py
new file mode 100644
index 0000000000000..40e3683dc3d36
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/input20_script.py
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import sys
+import re
+line = sys.stdin.readline()
+x = 1
+while line:
+  tem = sys.stdin.readline()
+  if line == tem:
+    x = x + 1
+  else:
+    print str(x).strip()+'\t'+re.sub('\t','_',line.strip())
+    line = tem
+    x = 1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/'1' + '1'-0-77504a9f3d712143beb52f3c25a904cb b/sql/hive/src/test/resources/golden/'1' + '1'-0-77504a9f3d712143beb52f3c25a904cb
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + '1'-0-77504a9f3d712143beb52f3c25a904cb	
+++ b/sql/hive/src/test/resources/golden/'1' + '1'-0-77504a9f3d712143beb52f3c25a904cb	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/'1' + 1-0-130514c6116c311d808590a075b187b b/sql/hive/src/test/resources/golden/'1' + 1-0-130514c6116c311d808590a075b187b
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + 1-0-130514c6116c311d808590a075b187b	
+++ b/sql/hive/src/test/resources/golden/'1' + 1-0-130514c6116c311d808590a075b187b	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/'1' + 1.0-0-5db3b55120a19863d96460d399c2d0e b/sql/hive/src/test/resources/golden/'1' + 1.0-0-5db3b55120a19863d96460d399c2d0e
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + 1.0-0-5db3b55120a19863d96460d399c2d0e	
+++ b/sql/hive/src/test/resources/golden/'1' + 1.0-0-5db3b55120a19863d96460d399c2d0e	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/'1' + 1L-0-657763a2cfaa4fe3f73c5b68bc84a548 b/sql/hive/src/test/resources/golden/'1' + 1L-0-657763a2cfaa4fe3f73c5b68bc84a548
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + 1L-0-657763a2cfaa4fe3f73c5b68bc84a548	
+++ b/sql/hive/src/test/resources/golden/'1' + 1L-0-657763a2cfaa4fe3f73c5b68bc84a548	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/'1' + 1S-0-c3a1c44bebcde38c4d43bd73e3849630 b/sql/hive/src/test/resources/golden/'1' + 1S-0-c3a1c44bebcde38c4d43bd73e3849630
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + 1S-0-c3a1c44bebcde38c4d43bd73e3849630	
+++ b/sql/hive/src/test/resources/golden/'1' + 1S-0-c3a1c44bebcde38c4d43bd73e3849630	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/'1' + 1Y-0-aa608227a4f07c5cb98fcafe1e20488a b/sql/hive/src/test/resources/golden/'1' + 1Y-0-aa608227a4f07c5cb98fcafe1e20488a
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + 1Y-0-aa608227a4f07c5cb98fcafe1e20488a	
+++ b/sql/hive/src/test/resources/golden/'1' + 1Y-0-aa608227a4f07c5cb98fcafe1e20488a	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1 + '1'-0-4d39227e4121e2dc9a25f21fa27f89a b/sql/hive/src/test/resources/golden/1 + '1'-0-4d39227e4121e2dc9a25f21fa27f89a
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1 + '1'-0-4d39227e4121e2dc9a25f21fa27f89a	
+++ b/sql/hive/src/test/resources/golden/1 + '1'-0-4d39227e4121e2dc9a25f21fa27f89a	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1 + 1-0-83de1c24fd6dee00409e8fdd99306ed6 b/sql/hive/src/test/resources/golden/1 + 1-0-83de1c24fd6dee00409e8fdd99306ed6
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1 + 1-0-83de1c24fd6dee00409e8fdd99306ed6	
+++ b/sql/hive/src/test/resources/golden/1 + 1-0-83de1c24fd6dee00409e8fdd99306ed6	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1 + 1.0-0-4f5da98a11db8e7192423c27db767ca6 b/sql/hive/src/test/resources/golden/1 + 1.0-0-4f5da98a11db8e7192423c27db767ca6
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1 + 1.0-0-4f5da98a11db8e7192423c27db767ca6	
+++ b/sql/hive/src/test/resources/golden/1 + 1.0-0-4f5da98a11db8e7192423c27db767ca6	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1 + 1L-0-917a033ac7f8f8b3a2e8e961dc91f35e b/sql/hive/src/test/resources/golden/1 + 1L-0-917a033ac7f8f8b3a2e8e961dc91f35e
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1 + 1L-0-917a033ac7f8f8b3a2e8e961dc91f35e	
+++ b/sql/hive/src/test/resources/golden/1 + 1L-0-917a033ac7f8f8b3a2e8e961dc91f35e	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1 + 1S-0-2e99da48f67f588c9e632a57c713522e b/sql/hive/src/test/resources/golden/1 + 1S-0-2e99da48f67f588c9e632a57c713522e
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1 + 1S-0-2e99da48f67f588c9e632a57c713522e	
+++ b/sql/hive/src/test/resources/golden/1 + 1S-0-2e99da48f67f588c9e632a57c713522e	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1 + 1Y-0-1ff4db1fdac05de5b092095c2402fc5f b/sql/hive/src/test/resources/golden/1 + 1Y-0-1ff4db1fdac05de5b092095c2402fc5f
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1 + 1Y-0-1ff4db1fdac05de5b092095c2402fc5f	
+++ b/sql/hive/src/test/resources/golden/1 + 1Y-0-1ff4db1fdac05de5b092095c2402fc5f	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1.0 + '1'-0-a6ec78b3b93d52034aab829d43210e73 b/sql/hive/src/test/resources/golden/1.0 + '1'-0-a6ec78b3b93d52034aab829d43210e73
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + '1'-0-a6ec78b3b93d52034aab829d43210e73	
+++ b/sql/hive/src/test/resources/golden/1.0 + '1'-0-a6ec78b3b93d52034aab829d43210e73	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1.0 + 1-0-30a4b1c8227906931cd0532367bebc43 b/sql/hive/src/test/resources/golden/1.0 + 1-0-30a4b1c8227906931cd0532367bebc43
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + 1-0-30a4b1c8227906931cd0532367bebc43	
+++ b/sql/hive/src/test/resources/golden/1.0 + 1-0-30a4b1c8227906931cd0532367bebc43	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1.0 + 1.0-0-87321b2e30ee2986b00b631d0e4f4d8d b/sql/hive/src/test/resources/golden/1.0 + 1.0-0-87321b2e30ee2986b00b631d0e4f4d8d
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + 1.0-0-87321b2e30ee2986b00b631d0e4f4d8d	
+++ b/sql/hive/src/test/resources/golden/1.0 + 1.0-0-87321b2e30ee2986b00b631d0e4f4d8d	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1.0 + 1L-0-44bb88a1c9280952e8119a3ab1bb4205 b/sql/hive/src/test/resources/golden/1.0 + 1L-0-44bb88a1c9280952e8119a3ab1bb4205
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + 1L-0-44bb88a1c9280952e8119a3ab1bb4205	
+++ b/sql/hive/src/test/resources/golden/1.0 + 1L-0-44bb88a1c9280952e8119a3ab1bb4205	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1.0 + 1S-0-31fbe14d01fb532176c1689680398368 b/sql/hive/src/test/resources/golden/1.0 + 1S-0-31fbe14d01fb532176c1689680398368
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + 1S-0-31fbe14d01fb532176c1689680398368	
+++ b/sql/hive/src/test/resources/golden/1.0 + 1S-0-31fbe14d01fb532176c1689680398368	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1.0 + 1Y-0-12bcf6e49e83abd2aa36ea612b418d43 b/sql/hive/src/test/resources/golden/1.0 + 1Y-0-12bcf6e49e83abd2aa36ea612b418d43
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + 1Y-0-12bcf6e49e83abd2aa36ea612b418d43	
+++ b/sql/hive/src/test/resources/golden/1.0 + 1Y-0-12bcf6e49e83abd2aa36ea612b418d43	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1L + '1'-0-6e39c7be301f3846efa9b4c939815b4a b/sql/hive/src/test/resources/golden/1L + '1'-0-6e39c7be301f3846efa9b4c939815b4a
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1L + '1'-0-6e39c7be301f3846efa9b4c939815b4a	
+++ b/sql/hive/src/test/resources/golden/1L + '1'-0-6e39c7be301f3846efa9b4c939815b4a	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1L + 1-0-1864a260554255a09e4f28b8551eef9d b/sql/hive/src/test/resources/golden/1L + 1-0-1864a260554255a09e4f28b8551eef9d
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1L + 1-0-1864a260554255a09e4f28b8551eef9d	
+++ b/sql/hive/src/test/resources/golden/1L + 1-0-1864a260554255a09e4f28b8551eef9d	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1L + 1.0-0-95a30c4b746f520f1251981a66cef5c8 b/sql/hive/src/test/resources/golden/1L + 1.0-0-95a30c4b746f520f1251981a66cef5c8
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1L + 1.0-0-95a30c4b746f520f1251981a66cef5c8	
+++ b/sql/hive/src/test/resources/golden/1L + 1.0-0-95a30c4b746f520f1251981a66cef5c8	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1L + 1L-0-e54a673c779f31597acdc5bd7d315d9f b/sql/hive/src/test/resources/golden/1L + 1L-0-e54a673c779f31597acdc5bd7d315d9f
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1L + 1L-0-e54a673c779f31597acdc5bd7d315d9f	
+++ b/sql/hive/src/test/resources/golden/1L + 1L-0-e54a673c779f31597acdc5bd7d315d9f	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1L + 1S-0-b8e70f71c32aac77e2683ba20ab99688 b/sql/hive/src/test/resources/golden/1L + 1S-0-b8e70f71c32aac77e2683ba20ab99688
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1L + 1S-0-b8e70f71c32aac77e2683ba20ab99688	
+++ b/sql/hive/src/test/resources/golden/1L + 1S-0-b8e70f71c32aac77e2683ba20ab99688	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1L + 1Y-0-55de31e21660fa7d213b1f68d636cbf9 b/sql/hive/src/test/resources/golden/1L + 1Y-0-55de31e21660fa7d213b1f68d636cbf9
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1L + 1Y-0-55de31e21660fa7d213b1f68d636cbf9	
+++ b/sql/hive/src/test/resources/golden/1L + 1Y-0-55de31e21660fa7d213b1f68d636cbf9	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1S + '1'-0-c3cf30b2c4bffc76100e7b43e7b2aec5 b/sql/hive/src/test/resources/golden/1S + '1'-0-c3cf30b2c4bffc76100e7b43e7b2aec5
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1S + '1'-0-c3cf30b2c4bffc76100e7b43e7b2aec5	
+++ b/sql/hive/src/test/resources/golden/1S + '1'-0-c3cf30b2c4bffc76100e7b43e7b2aec5	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1S + 1-0-c378b0b2a57c54b3815e8a64686756d3 b/sql/hive/src/test/resources/golden/1S + 1-0-c378b0b2a57c54b3815e8a64686756d3
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1S + 1-0-c378b0b2a57c54b3815e8a64686756d3	
+++ b/sql/hive/src/test/resources/golden/1S + 1-0-c378b0b2a57c54b3815e8a64686756d3	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1S + 1.0-0-8dfa46ec33c1be5ffba2e40cbfe5349e b/sql/hive/src/test/resources/golden/1S + 1.0-0-8dfa46ec33c1be5ffba2e40cbfe5349e
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1S + 1.0-0-8dfa46ec33c1be5ffba2e40cbfe5349e	
+++ b/sql/hive/src/test/resources/golden/1S + 1.0-0-8dfa46ec33c1be5ffba2e40cbfe5349e	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1S + 1L-0-9d3ff8b5d4883a4a5a1dc0dd4f0c1116 b/sql/hive/src/test/resources/golden/1S + 1L-0-9d3ff8b5d4883a4a5a1dc0dd4f0c1116
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1S + 1L-0-9d3ff8b5d4883a4a5a1dc0dd4f0c1116	
+++ b/sql/hive/src/test/resources/golden/1S + 1L-0-9d3ff8b5d4883a4a5a1dc0dd4f0c1116	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1S + 1S-0-5054df6e72be611d6ee2a4e135bd949e b/sql/hive/src/test/resources/golden/1S + 1S-0-5054df6e72be611d6ee2a4e135bd949e
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1S + 1S-0-5054df6e72be611d6ee2a4e135bd949e	
+++ b/sql/hive/src/test/resources/golden/1S + 1S-0-5054df6e72be611d6ee2a4e135bd949e	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1S + 1Y-0-e59bc8279cd364224476ffc504c7685b b/sql/hive/src/test/resources/golden/1S + 1Y-0-e59bc8279cd364224476ffc504c7685b
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1S + 1Y-0-e59bc8279cd364224476ffc504c7685b	
+++ b/sql/hive/src/test/resources/golden/1S + 1Y-0-e59bc8279cd364224476ffc504c7685b	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1Y + '1'-0-bdc0f1c52b94a852b595e54997eb9dfb b/sql/hive/src/test/resources/golden/1Y + '1'-0-bdc0f1c52b94a852b595e54997eb9dfb
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1Y + '1'-0-bdc0f1c52b94a852b595e54997eb9dfb	
+++ b/sql/hive/src/test/resources/golden/1Y + '1'-0-bdc0f1c52b94a852b595e54997eb9dfb	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1Y + 1-0-a4541db51882b19503649138fbb295f b/sql/hive/src/test/resources/golden/1Y + 1-0-a4541db51882b19503649138fbb295f
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1Y + 1-0-a4541db51882b19503649138fbb295f	
+++ b/sql/hive/src/test/resources/golden/1Y + 1-0-a4541db51882b19503649138fbb295f	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1Y + 1.0-0-3ad5e3db0d0300312d33231e7c2a6c8d b/sql/hive/src/test/resources/golden/1Y + 1.0-0-3ad5e3db0d0300312d33231e7c2a6c8d
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1Y + 1.0-0-3ad5e3db0d0300312d33231e7c2a6c8d	
+++ b/sql/hive/src/test/resources/golden/1Y + 1.0-0-3ad5e3db0d0300312d33231e7c2a6c8d	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1Y + 1L-0-2aa9a7b23c741d78032def0641a21cb1 b/sql/hive/src/test/resources/golden/1Y + 1L-0-2aa9a7b23c741d78032def0641a21cb1
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1Y + 1L-0-2aa9a7b23c741d78032def0641a21cb1	
+++ b/sql/hive/src/test/resources/golden/1Y + 1L-0-2aa9a7b23c741d78032def0641a21cb1	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1Y + 1S-0-93a44c4952c4d343d3885edfc95b4b80 b/sql/hive/src/test/resources/golden/1Y + 1S-0-93a44c4952c4d343d3885edfc95b4b80
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1Y + 1S-0-93a44c4952c4d343d3885edfc95b4b80	
+++ b/sql/hive/src/test/resources/golden/1Y + 1S-0-93a44c4952c4d343d3885edfc95b4b80	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1Y + 1Y-0-3d9619d963e7f1cb4ab3e8b5e24ff0d5 b/sql/hive/src/test/resources/golden/1Y + 1Y-0-3d9619d963e7f1cb4ab3e8b5e24ff0d5
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1Y + 1Y-0-3d9619d963e7f1cb4ab3e8b5e24ff0d5	
+++ b/sql/hive/src/test/resources/golden/1Y + 1Y-0-3d9619d963e7f1cb4ab3e8b5e24ff0d5	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-10-d8a597810b222e9e121a11a1f5658fb0 b/sql/hive/src/test/resources/golden/Cast Timestamp to Timestamp in UDF-0-db6d4503454e4dbb9edcbab9a8718d7f
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-10-d8a597810b222e9e121a11a1f5658fb0
rename to sql/hive/src/test/resources/golden/Cast Timestamp to Timestamp in UDF-0-db6d4503454e4dbb9edcbab9a8718d7f
diff --git a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-45f5619d9e4510195fe67f7c8d14a5c0 b/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-45f5619d9e4510195fe67f7c8d14a5c0
deleted file mode 100644
index 7643569a2c234..0000000000000
--- a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-45f5619d9e4510195fe67f7c8d14a5c0	
+++ /dev/null
@@ -1,309 +0,0 @@
-0.0	0	0	0	3
-2.0	0	2	0	1
-4.0	0	4	0	1
-5.0	0	15	0	3
-8.0	0	8	0	1
-9.0	0	9	0	1
-10.0	0	10	0	1
-11.0	0	11	0	1
-12.0	0	24	0	2
-15.0	0	30	0	2
-17.0	0	17	0	1
-18.0	0	36	0	2
-19.0	0	19	0	1
-20.0	0	20	0	1
-24.0	0	48	0	2
-26.0	0	52	0	2
-27.0	0	27	0	1
-28.0	0	28	0	1
-30.0	0	30	0	1
-33.0	0	33	0	1
-34.0	0	34	0	1
-35.0	0	105	0	3
-37.0	0	74	0	2
-41.0	0	41	0	1
-42.0	0	84	0	2
-43.0	0	43	0	1
-44.0	0	44	0	1
-47.0	0	47	0	1
-51.0	0	102	0	2
-53.0	0	53	0	1
-54.0	0	54	0	1
-57.0	0	57	0	1
-58.0	0	116	0	2
-64.0	0	64	0	1
-65.0	0	65	0	1
-66.0	0	66	0	1
-67.0	0	134	0	2
-69.0	0	69	0	1
-70.0	0	210	0	3
-72.0	0	144	0	2
-74.0	0	74	0	1
-76.0	0	152	0	2
-77.0	0	77	0	1
-78.0	0	78	0	1
-80.0	0	80	0	1
-82.0	0	82	0	1
-83.0	0	166	0	2
-84.0	0	168	0	2
-85.0	0	85	0	1
-86.0	0	86	0	1
-87.0	0	87	0	1
-90.0	0	270	0	3
-92.0	0	92	0	1
-95.0	0	190	0	2
-96.0	0	96	0	1
-97.0	0	194	0	2
-98.0	0	196	0	2
-100.0	0	200	0	2
-103.0	0	206	0	2
-104.0	0	208	0	2
-105.0	0	105	0	1
-111.0	0	111	0	1
-113.0	0	226	0	2
-114.0	0	114	0	1
-116.0	0	116	0	1
-118.0	0	236	0	2
-119.0	0	357	0	3
-120.0	0	240	0	2
-125.0	0	250	0	2
-126.0	0	126	0	1
-128.0	0	384	0	3
-129.0	0	258	0	2
-131.0	0	131	0	1
-133.0	0	133	0	1
-134.0	0	268	0	2
-136.0	0	136	0	1
-137.0	0	274	0	2
-138.0	0	552	0	4
-143.0	0	143	0	1
-145.0	0	145	0	1
-146.0	0	292	0	2
-149.0	0	298	0	2
-150.0	0	150	0	1
-152.0	0	304	0	2
-153.0	0	153	0	1
-155.0	0	155	0	1
-156.0	0	156	0	1
-157.0	0	157	0	1
-158.0	0	158	0	1
-160.0	0	160	0	1
-162.0	0	162	0	1
-163.0	0	163	0	1
-164.0	0	328	0	2
-165.0	0	330	0	2
-166.0	0	166	0	1
-167.0	0	501	0	3
-168.0	0	168	0	1
-169.0	0	676	0	4
-170.0	0	170	0	1
-172.0	0	344	0	2
-174.0	0	348	0	2
-175.0	0	350	0	2
-176.0	0	352	0	2
-177.0	0	177	0	1
-178.0	0	178	0	1
-179.0	0	358	0	2
-180.0	0	180	0	1
-181.0	0	181	0	1
-183.0	0	183	0	1
-186.0	0	186	0	1
-187.0	0	561	0	3
-189.0	0	189	0	1
-190.0	0	190	0	1
-191.0	0	382	0	2
-192.0	0	192	0	1
-193.0	0	579	0	3
-194.0	0	194	0	1
-195.0	0	390	0	2
-196.0	0	196	0	1
-197.0	0	394	0	2
-199.0	0	597	0	3
-200.0	0	400	0	2
-201.0	0	201	0	1
-202.0	0	202	0	1
-203.0	0	406	0	2
-205.0	0	410	0	2
-207.0	0	414	0	2
-208.0	0	624	0	3
-209.0	0	418	0	2
-213.0	0	426	0	2
-214.0	0	214	0	1
-216.0	0	432	0	2
-217.0	0	434	0	2
-218.0	0	218	0	1
-219.0	0	438	0	2
-221.0	0	442	0	2
-222.0	0	222	0	1
-223.0	0	446	0	2
-224.0	0	448	0	2
-226.0	0	226	0	1
-228.0	0	228	0	1
-229.0	0	458	0	2
-230.0	0	1150	0	5
-233.0	0	466	0	2
-235.0	0	235	0	1
-237.0	0	474	0	2
-238.0	0	476	0	2
-239.0	0	478	0	2
-241.0	0	241	0	1
-242.0	0	484	0	2
-244.0	0	244	0	1
-247.0	0	247	0	1
-248.0	0	248	0	1
-249.0	0	249	0	1
-252.0	0	252	0	1
-255.0	0	510	0	2
-256.0	0	512	0	2
-257.0	0	257	0	1
-258.0	0	258	0	1
-260.0	0	260	0	1
-262.0	0	262	0	1
-263.0	0	263	0	1
-265.0	0	530	0	2
-266.0	0	266	0	1
-272.0	0	544	0	2
-273.0	0	819	0	3
-274.0	0	274	0	1
-275.0	0	275	0	1
-277.0	0	1108	0	4
-278.0	0	556	0	2
-280.0	0	560	0	2
-281.0	0	562	0	2
-282.0	0	564	0	2
-283.0	0	283	0	1
-284.0	0	284	0	1
-285.0	0	285	0	1
-286.0	0	286	0	1
-287.0	0	287	0	1
-288.0	0	576	0	2
-289.0	0	289	0	1
-291.0	0	291	0	1
-292.0	0	292	0	1
-296.0	0	296	0	1
-298.0	0	894	0	3
-302.0	0	302	0	1
-305.0	0	305	0	1
-306.0	0	306	0	1
-307.0	0	614	0	2
-308.0	0	308	0	1
-309.0	0	618	0	2
-310.0	0	310	0	1
-311.0	0	933	0	3
-315.0	0	315	0	1
-316.0	0	948	0	3
-317.0	0	634	0	2
-318.0	0	954	0	3
-321.0	0	642	0	2
-322.0	0	644	0	2
-323.0	0	323	0	1
-325.0	0	650	0	2
-327.0	0	981	0	3
-331.0	0	662	0	2
-332.0	0	332	0	1
-333.0	0	666	0	2
-335.0	0	335	0	1
-336.0	0	336	0	1
-338.0	0	338	0	1
-339.0	0	339	0	1
-341.0	0	341	0	1
-342.0	0	684	0	2
-344.0	0	688	0	2
-345.0	0	345	0	1
-348.0	0	1740	0	5
-351.0	0	351	0	1
-353.0	0	706	0	2
-356.0	0	356	0	1
-360.0	0	360	0	1
-362.0	0	362	0	1
-364.0	0	364	0	1
-365.0	0	365	0	1
-366.0	0	366	0	1
-367.0	0	734	0	2
-368.0	0	368	0	1
-369.0	0	1107	0	3
-373.0	0	373	0	1
-374.0	0	374	0	1
-375.0	0	375	0	1
-377.0	0	377	0	1
-378.0	0	378	0	1
-379.0	0	379	0	1
-382.0	0	764	0	2
-384.0	0	1152	0	3
-386.0	0	386	0	1
-389.0	0	389	0	1
-392.0	0	392	0	1
-393.0	0	393	0	1
-394.0	0	394	0	1
-395.0	0	790	0	2
-396.0	0	1188	0	3
-397.0	0	794	0	2
-399.0	0	798	0	2
-400.0	0	400	0	1
-401.0	0	2005	0	5
-402.0	0	402	0	1
-403.0	0	1209	0	3
-404.0	0	808	0	2
-406.0	0	1624	0	4
-407.0	0	407	0	1
-409.0	0	1227	0	3
-411.0	0	411	0	1
-413.0	0	826	0	2
-414.0	0	828	0	2
-417.0	0	1251	0	3
-418.0	0	418	0	1
-419.0	0	419	0	1
-421.0	0	421	0	1
-424.0	0	848	0	2
-427.0	0	427	0	1
-429.0	0	858	0	2
-430.0	0	1290	0	3
-431.0	0	1293	0	3
-432.0	0	432	0	1
-435.0	0	435	0	1
-436.0	0	436	0	1
-437.0	0	437	0	1
-438.0	0	1314	0	3
-439.0	0	878	0	2
-443.0	0	443	0	1
-444.0	0	444	0	1
-446.0	0	446	0	1
-448.0	0	448	0	1
-449.0	0	449	0	1
-452.0	0	452	0	1
-453.0	0	453	0	1
-454.0	0	1362	0	3
-455.0	0	455	0	1
-457.0	0	457	0	1
-458.0	0	916	0	2
-459.0	0	918	0	2
-460.0	0	460	0	1
-462.0	0	924	0	2
-463.0	0	926	0	2
-466.0	0	1398	0	3
-467.0	0	467	0	1
-468.0	0	1872	0	4
-469.0	0	2345	0	5
-470.0	0	470	0	1
-472.0	0	472	0	1
-475.0	0	475	0	1
-477.0	0	477	0	1
-478.0	0	956	0	2
-479.0	0	479	0	1
-480.0	0	1440	0	3
-481.0	0	481	0	1
-482.0	0	482	0	1
-483.0	0	483	0	1
-484.0	0	484	0	1
-485.0	0	485	0	1
-487.0	0	487	0	1
-489.0	0	1956	0	4
-490.0	0	490	0	1
-491.0	0	491	0	1
-492.0	0	984	0	2
-493.0	0	493	0	1
-494.0	0	494	0	1
-495.0	0	495	0	1
-496.0	0	496	0	1
-497.0	0	497	0	1
-498.0	0	1494	0	3
diff --git a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-a393cfc24ad74f930f3284743254c10c b/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-a393cfc24ad74f930f3284743254c10c
deleted file mode 100644
index f23b45c32ecca..0000000000000
--- a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-a393cfc24ad74f930f3284743254c10c	
+++ /dev/null
@@ -1,309 +0,0 @@
-0.0	0.0	0	0	0	3
-0.0	2.0	0	2	0	1
-0.0	4.0	0	4	0	1
-0.0	5.0	0	15	0	3
-0.0	8.0	0	8	0	1
-0.0	9.0	0	9	0	1
-0.0	10.0	0	10	0	1
-0.0	11.0	0	11	0	1
-0.0	12.0	0	24	0	2
-0.0	15.0	0	30	0	2
-0.0	17.0	0	17	0	1
-0.0	18.0	0	36	0	2
-0.0	19.0	0	19	0	1
-0.0	20.0	0	20	0	1
-0.0	24.0	0	48	0	2
-0.0	26.0	0	52	0	2
-0.0	27.0	0	27	0	1
-0.0	28.0	0	28	0	1
-0.0	30.0	0	30	0	1
-0.0	33.0	0	33	0	1
-0.0	34.0	0	34	0	1
-0.0	35.0	0	105	0	3
-0.0	37.0	0	74	0	2
-0.0	41.0	0	41	0	1
-0.0	42.0	0	84	0	2
-0.0	43.0	0	43	0	1
-0.0	44.0	0	44	0	1
-0.0	47.0	0	47	0	1
-0.0	51.0	0	102	0	2
-0.0	53.0	0	53	0	1
-0.0	54.0	0	54	0	1
-0.0	57.0	0	57	0	1
-0.0	58.0	0	116	0	2
-0.0	64.0	0	64	0	1
-0.0	65.0	0	65	0	1
-0.0	66.0	0	66	0	1
-0.0	67.0	0	134	0	2
-0.0	69.0	0	69	0	1
-0.0	70.0	0	210	0	3
-0.0	72.0	0	144	0	2
-0.0	74.0	0	74	0	1
-0.0	76.0	0	152	0	2
-0.0	77.0	0	77	0	1
-0.0	78.0	0	78	0	1
-0.0	80.0	0	80	0	1
-0.0	82.0	0	82	0	1
-0.0	83.0	0	166	0	2
-0.0	84.0	0	168	0	2
-0.0	85.0	0	85	0	1
-0.0	86.0	0	86	0	1
-0.0	87.0	0	87	0	1
-0.0	90.0	0	270	0	3
-0.0	92.0	0	92	0	1
-0.0	95.0	0	190	0	2
-0.0	96.0	0	96	0	1
-0.0	97.0	0	194	0	2
-0.0	98.0	0	196	0	2
-0.0	100.0	0	200	0	2
-0.0	103.0	0	206	0	2
-0.0	104.0	0	208	0	2
-0.0	105.0	0	105	0	1
-0.0	111.0	0	111	0	1
-0.0	113.0	0	226	0	2
-0.0	114.0	0	114	0	1
-0.0	116.0	0	116	0	1
-0.0	118.0	0	236	0	2
-0.0	119.0	0	357	0	3
-0.0	120.0	0	240	0	2
-0.0	125.0	0	250	0	2
-0.0	126.0	0	126	0	1
-0.0	128.0	0	384	0	3
-0.0	129.0	0	258	0	2
-0.0	131.0	0	131	0	1
-0.0	133.0	0	133	0	1
-0.0	134.0	0	268	0	2
-0.0	136.0	0	136	0	1
-0.0	137.0	0	274	0	2
-0.0	138.0	0	552	0	4
-0.0	143.0	0	143	0	1
-0.0	145.0	0	145	0	1
-0.0	146.0	0	292	0	2
-0.0	149.0	0	298	0	2
-0.0	150.0	0	150	0	1
-0.0	152.0	0	304	0	2
-0.0	153.0	0	153	0	1
-0.0	155.0	0	155	0	1
-0.0	156.0	0	156	0	1
-0.0	157.0	0	157	0	1
-0.0	158.0	0	158	0	1
-0.0	160.0	0	160	0	1
-0.0	162.0	0	162	0	1
-0.0	163.0	0	163	0	1
-0.0	164.0	0	328	0	2
-0.0	165.0	0	330	0	2
-0.0	166.0	0	166	0	1
-0.0	167.0	0	501	0	3
-0.0	168.0	0	168	0	1
-0.0	169.0	0	676	0	4
-0.0	170.0	0	170	0	1
-0.0	172.0	0	344	0	2
-0.0	174.0	0	348	0	2
-0.0	175.0	0	350	0	2
-0.0	176.0	0	352	0	2
-0.0	177.0	0	177	0	1
-0.0	178.0	0	178	0	1
-0.0	179.0	0	358	0	2
-0.0	180.0	0	180	0	1
-0.0	181.0	0	181	0	1
-0.0	183.0	0	183	0	1
-0.0	186.0	0	186	0	1
-0.0	187.0	0	561	0	3
-0.0	189.0	0	189	0	1
-0.0	190.0	0	190	0	1
-0.0	191.0	0	382	0	2
-0.0	192.0	0	192	0	1
-0.0	193.0	0	579	0	3
-0.0	194.0	0	194	0	1
-0.0	195.0	0	390	0	2
-0.0	196.0	0	196	0	1
-0.0	197.0	0	394	0	2
-0.0	199.0	0	597	0	3
-0.0	200.0	0	400	0	2
-0.0	201.0	0	201	0	1
-0.0	202.0	0	202	0	1
-0.0	203.0	0	406	0	2
-0.0	205.0	0	410	0	2
-0.0	207.0	0	414	0	2
-0.0	208.0	0	624	0	3
-0.0	209.0	0	418	0	2
-0.0	213.0	0	426	0	2
-0.0	214.0	0	214	0	1
-0.0	216.0	0	432	0	2
-0.0	217.0	0	434	0	2
-0.0	218.0	0	218	0	1
-0.0	219.0	0	438	0	2
-0.0	221.0	0	442	0	2
-0.0	222.0	0	222	0	1
-0.0	223.0	0	446	0	2
-0.0	224.0	0	448	0	2
-0.0	226.0	0	226	0	1
-0.0	228.0	0	228	0	1
-0.0	229.0	0	458	0	2
-0.0	230.0	0	1150	0	5
-0.0	233.0	0	466	0	2
-0.0	235.0	0	235	0	1
-0.0	237.0	0	474	0	2
-0.0	238.0	0	476	0	2
-0.0	239.0	0	478	0	2
-0.0	241.0	0	241	0	1
-0.0	242.0	0	484	0	2
-0.0	244.0	0	244	0	1
-0.0	247.0	0	247	0	1
-0.0	248.0	0	248	0	1
-0.0	249.0	0	249	0	1
-0.0	252.0	0	252	0	1
-0.0	255.0	0	510	0	2
-0.0	256.0	0	512	0	2
-0.0	257.0	0	257	0	1
-0.0	258.0	0	258	0	1
-0.0	260.0	0	260	0	1
-0.0	262.0	0	262	0	1
-0.0	263.0	0	263	0	1
-0.0	265.0	0	530	0	2
-0.0	266.0	0	266	0	1
-0.0	272.0	0	544	0	2
-0.0	273.0	0	819	0	3
-0.0	274.0	0	274	0	1
-0.0	275.0	0	275	0	1
-0.0	277.0	0	1108	0	4
-0.0	278.0	0	556	0	2
-0.0	280.0	0	560	0	2
-0.0	281.0	0	562	0	2
-0.0	282.0	0	564	0	2
-0.0	283.0	0	283	0	1
-0.0	284.0	0	284	0	1
-0.0	285.0	0	285	0	1
-0.0	286.0	0	286	0	1
-0.0	287.0	0	287	0	1
-0.0	288.0	0	576	0	2
-0.0	289.0	0	289	0	1
-0.0	291.0	0	291	0	1
-0.0	292.0	0	292	0	1
-0.0	296.0	0	296	0	1
-0.0	298.0	0	894	0	3
-0.0	302.0	0	302	0	1
-0.0	305.0	0	305	0	1
-0.0	306.0	0	306	0	1
-0.0	307.0	0	614	0	2
-0.0	308.0	0	308	0	1
-0.0	309.0	0	618	0	2
-0.0	310.0	0	310	0	1
-0.0	311.0	0	933	0	3
-0.0	315.0	0	315	0	1
-0.0	316.0	0	948	0	3
-0.0	317.0	0	634	0	2
-0.0	318.0	0	954	0	3
-0.0	321.0	0	642	0	2
-0.0	322.0	0	644	0	2
-0.0	323.0	0	323	0	1
-0.0	325.0	0	650	0	2
-0.0	327.0	0	981	0	3
-0.0	331.0	0	662	0	2
-0.0	332.0	0	332	0	1
-0.0	333.0	0	666	0	2
-0.0	335.0	0	335	0	1
-0.0	336.0	0	336	0	1
-0.0	338.0	0	338	0	1
-0.0	339.0	0	339	0	1
-0.0	341.0	0	341	0	1
-0.0	342.0	0	684	0	2
-0.0	344.0	0	688	0	2
-0.0	345.0	0	345	0	1
-0.0	348.0	0	1740	0	5
-0.0	351.0	0	351	0	1
-0.0	353.0	0	706	0	2
-0.0	356.0	0	356	0	1
-0.0	360.0	0	360	0	1
-0.0	362.0	0	362	0	1
-0.0	364.0	0	364	0	1
-0.0	365.0	0	365	0	1
-0.0	366.0	0	366	0	1
-0.0	367.0	0	734	0	2
-0.0	368.0	0	368	0	1
-0.0	369.0	0	1107	0	3
-0.0	373.0	0	373	0	1
-0.0	374.0	0	374	0	1
-0.0	375.0	0	375	0	1
-0.0	377.0	0	377	0	1
-0.0	378.0	0	378	0	1
-0.0	379.0	0	379	0	1
-0.0	382.0	0	764	0	2
-0.0	384.0	0	1152	0	3
-0.0	386.0	0	386	0	1
-0.0	389.0	0	389	0	1
-0.0	392.0	0	392	0	1
-0.0	393.0	0	393	0	1
-0.0	394.0	0	394	0	1
-0.0	395.0	0	790	0	2
-0.0	396.0	0	1188	0	3
-0.0	397.0	0	794	0	2
-0.0	399.0	0	798	0	2
-0.0	400.0	0	400	0	1
-0.0	401.0	0	2005	0	5
-0.0	402.0	0	402	0	1
-0.0	403.0	0	1209	0	3
-0.0	404.0	0	808	0	2
-0.0	406.0	0	1624	0	4
-0.0	407.0	0	407	0	1
-0.0	409.0	0	1227	0	3
-0.0	411.0	0	411	0	1
-0.0	413.0	0	826	0	2
-0.0	414.0	0	828	0	2
-0.0	417.0	0	1251	0	3
-0.0	418.0	0	418	0	1
-0.0	419.0	0	419	0	1
-0.0	421.0	0	421	0	1
-0.0	424.0	0	848	0	2
-0.0	427.0	0	427	0	1
-0.0	429.0	0	858	0	2
-0.0	430.0	0	1290	0	3
-0.0	431.0	0	1293	0	3
-0.0	432.0	0	432	0	1
-0.0	435.0	0	435	0	1
-0.0	436.0	0	436	0	1
-0.0	437.0	0	437	0	1
-0.0	438.0	0	1314	0	3
-0.0	439.0	0	878	0	2
-0.0	443.0	0	443	0	1
-0.0	444.0	0	444	0	1
-0.0	446.0	0	446	0	1
-0.0	448.0	0	448	0	1
-0.0	449.0	0	449	0	1
-0.0	452.0	0	452	0	1
-0.0	453.0	0	453	0	1
-0.0	454.0	0	1362	0	3
-0.0	455.0	0	455	0	1
-0.0	457.0	0	457	0	1
-0.0	458.0	0	916	0	2
-0.0	459.0	0	918	0	2
-0.0	460.0	0	460	0	1
-0.0	462.0	0	924	0	2
-0.0	463.0	0	926	0	2
-0.0	466.0	0	1398	0	3
-0.0	467.0	0	467	0	1
-0.0	468.0	0	1872	0	4
-0.0	469.0	0	2345	0	5
-0.0	470.0	0	470	0	1
-0.0	472.0	0	472	0	1
-0.0	475.0	0	475	0	1
-0.0	477.0	0	477	0	1
-0.0	478.0	0	956	0	2
-0.0	479.0	0	479	0	1
-0.0	480.0	0	1440	0	3
-0.0	481.0	0	481	0	1
-0.0	482.0	0	482	0	1
-0.0	483.0	0	483	0	1
-0.0	484.0	0	484	0	1
-0.0	485.0	0	485	0	1
-0.0	487.0	0	487	0	1
-0.0	489.0	0	1956	0	4
-0.0	490.0	0	490	0	1
-0.0	491.0	0	491	0	1
-0.0	492.0	0	984	0	2
-0.0	493.0	0	493	0	1
-0.0	494.0	0	494	0	1
-0.0	495.0	0	495	0	1
-0.0	496.0	0	496	0	1
-0.0	497.0	0	497	0	1
-0.0	498.0	0	1494	0	3
diff --git a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-ae497f1556f548c1e2da9244397a985d b/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-ae497f1556f548c1e2da9244397a985d
deleted file mode 100644
index 7839d714c25d2..0000000000000
--- a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-ae497f1556f548c1e2da9244397a985d	
+++ /dev/null
@@ -1,309 +0,0 @@
-0	3
-0	1
-0	1
-0	3
-0	1
-0	1
-0	1
-0	1
-0	2
-0	2
-0	1
-0	2
-0	1
-0	1
-0	2
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	3
-0	2
-0	1
-0	2
-0	1
-0	1
-0	1
-0	2
-0	1
-0	1
-0	1
-0	2
-0	1
-0	1
-0	1
-0	2
-0	1
-0	3
-0	2
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	2
-0	2
-0	1
-0	1
-0	1
-0	3
-0	1
-0	2
-0	1
-0	2
-0	2
-0	2
-0	2
-0	2
-0	1
-0	1
-0	2
-0	1
-0	1
-0	2
-0	3
-0	2
-0	2
-0	1
-0	3
-0	2
-0	1
-0	1
-0	2
-0	1
-0	2
-0	4
-0	1
-0	1
-0	2
-0	2
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	2
-0	1
-0	3
-0	1
-0	4
-0	1
-0	2
-0	2
-0	2
-0	2
-0	1
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	3
-0	1
-0	1
-0	2
-0	1
-0	3
-0	1
-0	2
-0	1
-0	2
-0	3
-0	2
-0	1
-0	1
-0	2
-0	2
-0	2
-0	3
-0	2
-0	2
-0	1
-0	2
-0	2
-0	1
-0	2
-0	2
-0	1
-0	2
-0	2
-0	1
-0	1
-0	2
-0	5
-0	2
-0	1
-0	2
-0	2
-0	2
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	1
-0	2
-0	3
-0	1
-0	1
-0	4
-0	2
-0	2
-0	2
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	3
-0	1
-0	1
-0	1
-0	2
-0	1
-0	2
-0	1
-0	3
-0	1
-0	3
-0	2
-0	3
-0	2
-0	2
-0	1
-0	2
-0	3
-0	2
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	2
-0	1
-0	5
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	1
-0	3
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	3
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	3
-0	2
-0	2
-0	1
-0	5
-0	1
-0	3
-0	2
-0	4
-0	1
-0	3
-0	1
-0	2
-0	2
-0	3
-0	1
-0	1
-0	1
-0	2
-0	1
-0	2
-0	3
-0	3
-0	1
-0	1
-0	1
-0	1
-0	3
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	3
-0	1
-0	1
-0	2
-0	2
-0	1
-0	2
-0	2
-0	3
-0	1
-0	4
-0	5
-0	1
-0	1
-0	1
-0	1
-0	2
-0	1
-0	3
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	4
-0	1
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	3
diff --git a/sql/hive/src/test/resources/golden/Escape sequences-0-2f25c33d97c43f3276171624d988a286 b/sql/hive/src/test/resources/golden/Escape sequences-0-2f25c33d97c43f3276171624d988a286
index bbe37f8e2a790..2899c62e08afd 100644
--- a/sql/hive/src/test/resources/golden/Escape sequences-0-2f25c33d97c43f3276171624d988a286	
+++ b/sql/hive/src/test/resources/golden/Escape sequences-0-2f25c33d97c43f3276171624d988a286	
@@ -1 +1 @@
-86	\	\
\ No newline at end of file
+86	\	\
diff --git a/sql/hive/src/test/resources/golden/LIKE-0-8a6078c9da5f15ea95ba3682fd66e672 b/sql/hive/src/test/resources/golden/LIKE-0-8a6078c9da5f15ea95ba3682fd66e672
index 8007988316af0..16c12c524e7d2 100644
--- a/sql/hive/src/test/resources/golden/LIKE-0-8a6078c9da5f15ea95ba3682fd66e672
+++ b/sql/hive/src/test/resources/golden/LIKE-0-8a6078c9da5f15ea95ba3682fd66e672
@@ -172,4 +172,4 @@
 194	val_194
 414	val_414
 126	val_126
-169	val_169
\ No newline at end of file
+169	val_169
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-12-18906f5c6413065621430e3fe33c7e9e b/sql/hive/src/test/resources/golden/NaN to Decimal-0-6ca781bc343025635d72321ef0a9d425
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-12-18906f5c6413065621430e3fe33c7e9e
rename to sql/hive/src/test/resources/golden/NaN to Decimal-0-6ca781bc343025635d72321ef0a9d425
diff --git a/sql/hive/src/test/resources/golden/Read Partitioned with AvroSerDe-0-e4501461c855cc9071a872a64186c3de b/sql/hive/src/test/resources/golden/Read Partitioned with AvroSerDe-0-e4501461c855cc9071a872a64186c3de
new file mode 100644
index 0000000000000..49c8434730ffa
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/Read Partitioned with AvroSerDe-0-e4501461c855cc9071a872a64186c3de	
@@ -0,0 +1,8 @@
+The Eleventh Hour	3 April 2010	11	1
+The Doctor's Wife	14 May 2011	11	1
+Horror of Fang Rock	3 September 1977	4	1
+An Unearthly Child	23 November 1963	1	1
+The Mysterious Planet	6 September 1986	6	1
+Rose	26 March 2005	9	1
+The Power of the Daleks	5 November 1966	2	1
+Castrolava	4 January 1982	5	1
diff --git a/sql/hive/src/test/resources/golden/Simple Average + 1 with group-0-f52ca483a3e5eadc1b20ba8320d029a7 b/sql/hive/src/test/resources/golden/Simple Average + 1 with group-0-f52ca483a3e5eadc1b20ba8320d029a7
index 595d7af04a69a..1cce52daebce9 100644
--- a/sql/hive/src/test/resources/golden/Simple Average + 1 with group-0-f52ca483a3e5eadc1b20ba8320d029a7	
+++ b/sql/hive/src/test/resources/golden/Simple Average + 1 with group-0-f52ca483a3e5eadc1b20ba8320d029a7	
@@ -306,4 +306,4 @@
 96.0	val_95
 97.0	val_96
 98.0	val_97
-99.0	val_98
\ No newline at end of file
+99.0	val_98
diff --git a/sql/hive/src/test/resources/golden/Simple Average + 1-0-5e296b921c470f0f0b5d099f28bd5935 b/sql/hive/src/test/resources/golden/Simple Average + 1-0-5e296b921c470f0f0b5d099f28bd5935
index 3d2dbbd731543..7f2be0e640110 100644
--- a/sql/hive/src/test/resources/golden/Simple Average + 1-0-5e296b921c470f0f0b5d099f28bd5935	
+++ b/sql/hive/src/test/resources/golden/Simple Average + 1-0-5e296b921c470f0f0b5d099f28bd5935	
@@ -1 +1 @@
-261.182
\ No newline at end of file
+261.182
diff --git a/sql/hive/src/test/resources/golden/Simple Average-0-c197ea78c4d8f85f1a317805b6da07e5 b/sql/hive/src/test/resources/golden/Simple Average-0-c197ea78c4d8f85f1a317805b6da07e5
index cbc70c89f8859..ed2b1a0e7c53a 100644
--- a/sql/hive/src/test/resources/golden/Simple Average-0-c197ea78c4d8f85f1a317805b6da07e5	
+++ b/sql/hive/src/test/resources/golden/Simple Average-0-c197ea78c4d8f85f1a317805b6da07e5	
@@ -1 +1 @@
-260.182
\ No newline at end of file
+260.182
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-11-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/add_part_exist-11-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-11-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/add_part_exist-11-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-18-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-18-d824f22606f48dfca48ce241a7505f5b
index f1ea8b0260d2c..0fd332ea35b85 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-18-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-18-d824f22606f48dfca48ce241a7505f5b
@@ -1 +1 @@
-ds=2010-01-01
\ No newline at end of file
+ds=2010-01-01
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-20-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-20-d824f22606f48dfca48ce241a7505f5b
index f1ea8b0260d2c..0fd332ea35b85 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-20-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-20-d824f22606f48dfca48ce241a7505f5b
@@ -1 +1 @@
-ds=2010-01-01
\ No newline at end of file
+ds=2010-01-01
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-22-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-22-d824f22606f48dfca48ce241a7505f5b
index 121aebdfab9af..2c7bbaac17318 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-22-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-22-d824f22606f48dfca48ce241a7505f5b
@@ -1,2 +1,2 @@
 ds=2010-01-01
-ds=2010-01-02
\ No newline at end of file
+ds=2010-01-02
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-24-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-24-d824f22606f48dfca48ce241a7505f5b
index 64467c12563c8..7d9d4194c5cd1 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-24-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-24-d824f22606f48dfca48ce241a7505f5b
@@ -1,3 +1,3 @@
 ds=2010-01-01
 ds=2010-01-02
-ds=2010-01-03
\ No newline at end of file
+ds=2010-01-03
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-3-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-3-d824f22606f48dfca48ce241a7505f5b
index f1ea8b0260d2c..0fd332ea35b85 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-3-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-3-d824f22606f48dfca48ce241a7505f5b
@@ -1 +1 @@
-ds=2010-01-01
\ No newline at end of file
+ds=2010-01-01
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-5-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-5-d824f22606f48dfca48ce241a7505f5b
index f1ea8b0260d2c..0fd332ea35b85 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-5-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-5-d824f22606f48dfca48ce241a7505f5b
@@ -1 +1 @@
-ds=2010-01-01
\ No newline at end of file
+ds=2010-01-01
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-7-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-7-d824f22606f48dfca48ce241a7505f5b
index 121aebdfab9af..2c7bbaac17318 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-7-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-7-d824f22606f48dfca48ce241a7505f5b
@@ -1,2 +1,2 @@
 ds=2010-01-01
-ds=2010-01-02
\ No newline at end of file
+ds=2010-01-02
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-9-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-9-d824f22606f48dfca48ce241a7505f5b
index 64467c12563c8..7d9d4194c5cd1 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-9-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-9-d824f22606f48dfca48ce241a7505f5b
@@ -1,3 +1,3 @@
 ds=2010-01-01
 ds=2010-01-02
-ds=2010-01-03
\ No newline at end of file
+ds=2010-01-03
diff --git a/sql/hive/src/test/resources/golden/add_part_multiple-1-4d9d4efbabc9fffef8841cc049f479c1 b/sql/hive/src/test/resources/golden/add_part_multiple-1-4d9d4efbabc9fffef8841cc049f479c1
index da21345385b2a..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/add_part_multiple-1-4d9d4efbabc9fffef8841cc049f479c1
+++ b/sql/hive/src/test/resources/golden/add_part_multiple-1-4d9d4efbabc9fffef8841cc049f479c1
@@ -1,29 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ALTERTABLE_ADDPARTS add_part_test TOK_IFNOTEXISTS (TOK_PARTSPEC (TOK_PARTVAL ds '2010-01-01')) (TOK_PARTITIONLOCATION 'A') (TOK_PARTSPEC (TOK_PARTVAL ds '2010-02-01')) (TOK_PARTITIONLOCATION 'B') (TOK_PARTSPEC (TOK_PARTVAL ds '2010-03-01')) (TOK_PARTSPEC (TOK_PARTVAL ds '2010-04-01')) (TOK_PARTITIONLOCATION 'C'))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-  Stage-2 is a root stage
-  Stage-3 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Add Partition Operator:
-          Location: A
-          Spec: {ds=2010-01-01}
-
-  Stage: Stage-1
-      Add Partition Operator:
-          Location: B
-          Spec: {ds=2010-02-01}
-
-  Stage: Stage-2
-      Add Partition Operator:
-          Spec: {ds=2010-03-01}
-
-  Stage: Stage-3
-      Add Partition Operator:
-          Location: C
-          Spec: {ds=2010-04-01}
-
diff --git a/sql/hive/src/test/resources/golden/add_part_multiple-4-7950c676506564b085b41426ed41747c b/sql/hive/src/test/resources/golden/add_part_multiple-4-7950c676506564b085b41426ed41747c
index fc4021c39737f..0587978b3efe9 100644
--- a/sql/hive/src/test/resources/golden/add_part_multiple-4-7950c676506564b085b41426ed41747c
+++ b/sql/hive/src/test/resources/golden/add_part_multiple-4-7950c676506564b085b41426ed41747c
@@ -1,4 +1,4 @@
 100	100	2010-01-01
 200	200	2010-02-01
 400	300	2010-03-01
-500	400	2010-04-01
\ No newline at end of file
+500	400	2010-04-01
diff --git a/sql/hive/src/test/resources/golden/add_partition_no_whitelist-0-3806584ff765bca682594008b90fc304 b/sql/hive/src/test/resources/golden/add_partition_no_whitelist-0-3806584ff765bca682594008b90fc304
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/add_partition_no_whitelist-0-3806584ff765bca682594008b90fc304
+++ b/sql/hive/src/test/resources/golden/add_partition_no_whitelist-0-3806584ff765bca682594008b90fc304
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/add_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196 b/sql/hive/src/test/resources/golden/add_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/add_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
+++ b/sql/hive/src/test/resources/golden/add_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/alias.attr-0-42104e7e35985f4504c6d9a79b1bb4b8 b/sql/hive/src/test/resources/golden/alias.attr-0-42104e7e35985f4504c6d9a79b1bb4b8
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/alias.attr-0-42104e7e35985f4504c6d9a79b1bb4b8
+++ b/sql/hive/src/test/resources/golden/alias.attr-0-42104e7e35985f4504c6d9a79b1bb4b8
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/alter2-1-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-1-aac9c2c7033fd7264c9a107a88ff591
index 62a8ea1d115fb..4e4f6dc8ef5a7 100644
--- a/sql/hive/src/test/resources/golden/alter2-1-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-1-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728471, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728471}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-11-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-11-aac9c2c7033fd7264c9a107a88ff591
index 72621add45bb3..3633169d9d5f2 100644
--- a/sql/hive/src/test/resources/golden/alter2-11-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-11-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728473, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728473}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-14-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-14-aac9c2c7033fd7264c9a107a88ff591
index 72621add45bb3..3633169d9d5f2 100644
--- a/sql/hive/src/test/resources/golden/alter2-14-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-14-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728473, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728473}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-15-75a213649242c2410ea6846f08c91d75 b/sql/hive/src/test/resources/golden/alter2-15-75a213649242c2410ea6846f08c91d75
index 15e6bc8823150..30f78a2dc1fb1 100644
--- a/sql/hive/src/test/resources/golden/alter2-15-75a213649242c2410ea6846f08c91d75
+++ b/sql/hive/src/test/resources/golden/alter2-15-75a213649242c2410ea6846f08c91d75
@@ -1 +1 @@
-insertdate=2008-01-01
\ No newline at end of file
+insertdate=2008-01-01
diff --git a/sql/hive/src/test/resources/golden/alter2-17-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-17-aac9c2c7033fd7264c9a107a88ff591
index 72621add45bb3..3633169d9d5f2 100644
--- a/sql/hive/src/test/resources/golden/alter2-17-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-17-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728473, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728473}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-18-75a213649242c2410ea6846f08c91d75 b/sql/hive/src/test/resources/golden/alter2-18-75a213649242c2410ea6846f08c91d75
index d31318a383fc1..81b4ae118d12d 100644
--- a/sql/hive/src/test/resources/golden/alter2-18-75a213649242c2410ea6846f08c91d75
+++ b/sql/hive/src/test/resources/golden/alter2-18-75a213649242c2410ea6846f08c91d75
@@ -1,2 +1,2 @@
 insertdate=2008-01-01
-insertdate=2008-01-02
\ No newline at end of file
+insertdate=2008-01-02
diff --git a/sql/hive/src/test/resources/golden/alter2-20-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/alter2-20-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter2-20-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/alter2-20-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter2-25-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-25-4ef75e12575453225738ea167c4617e5
index 83184977e9da8..ea564fc70087a 100644
--- a/sql/hive/src/test/resources/golden/alter2-25-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-25-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728475, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728475}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-28-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-28-4ef75e12575453225738ea167c4617e5
index 83184977e9da8..ea564fc70087a 100644
--- a/sql/hive/src/test/resources/golden/alter2-28-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-28-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728475, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728475}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-29-63f13c364546ddce5d2176c6604a948f b/sql/hive/src/test/resources/golden/alter2-29-63f13c364546ddce5d2176c6604a948f
index 15e6bc8823150..30f78a2dc1fb1 100644
--- a/sql/hive/src/test/resources/golden/alter2-29-63f13c364546ddce5d2176c6604a948f
+++ b/sql/hive/src/test/resources/golden/alter2-29-63f13c364546ddce5d2176c6604a948f
@@ -1 +1 @@
-insertdate=2008-01-01
\ No newline at end of file
+insertdate=2008-01-01
diff --git a/sql/hive/src/test/resources/golden/alter2-31-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-31-4ef75e12575453225738ea167c4617e5
index 83184977e9da8..ea564fc70087a 100644
--- a/sql/hive/src/test/resources/golden/alter2-31-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-31-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728475, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728475}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-32-63f13c364546ddce5d2176c6604a948f b/sql/hive/src/test/resources/golden/alter2-32-63f13c364546ddce5d2176c6604a948f
index d31318a383fc1..81b4ae118d12d 100644
--- a/sql/hive/src/test/resources/golden/alter2-32-63f13c364546ddce5d2176c6604a948f
+++ b/sql/hive/src/test/resources/golden/alter2-32-63f13c364546ddce5d2176c6604a948f
@@ -1,2 +1,2 @@
 insertdate=2008-01-01
-insertdate=2008-01-02
\ No newline at end of file
+insertdate=2008-01-02
diff --git a/sql/hive/src/test/resources/golden/alter2-35-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-35-4ef75e12575453225738ea167c4617e5
index 6e30936a881bd..808dc578804ee 100644
--- a/sql/hive/src/test/resources/golden/alter2-35-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-35-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728477, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728477}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-38-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-38-4ef75e12575453225738ea167c4617e5
index 6e30936a881bd..808dc578804ee 100644
--- a/sql/hive/src/test/resources/golden/alter2-38-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-38-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728477, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728477}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-39-63f13c364546ddce5d2176c6604a948f b/sql/hive/src/test/resources/golden/alter2-39-63f13c364546ddce5d2176c6604a948f
index 15e6bc8823150..30f78a2dc1fb1 100644
--- a/sql/hive/src/test/resources/golden/alter2-39-63f13c364546ddce5d2176c6604a948f
+++ b/sql/hive/src/test/resources/golden/alter2-39-63f13c364546ddce5d2176c6604a948f
@@ -1 +1 @@
-insertdate=2008-01-01
\ No newline at end of file
+insertdate=2008-01-01
diff --git a/sql/hive/src/test/resources/golden/alter2-4-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-4-aac9c2c7033fd7264c9a107a88ff591
index 62a8ea1d115fb..4e4f6dc8ef5a7 100644
--- a/sql/hive/src/test/resources/golden/alter2-4-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-4-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728471, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728471}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-41-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-41-4ef75e12575453225738ea167c4617e5
index 6e30936a881bd..808dc578804ee 100644
--- a/sql/hive/src/test/resources/golden/alter2-41-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-41-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728477, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728477}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-42-63f13c364546ddce5d2176c6604a948f b/sql/hive/src/test/resources/golden/alter2-42-63f13c364546ddce5d2176c6604a948f
index d31318a383fc1..81b4ae118d12d 100644
--- a/sql/hive/src/test/resources/golden/alter2-42-63f13c364546ddce5d2176c6604a948f
+++ b/sql/hive/src/test/resources/golden/alter2-42-63f13c364546ddce5d2176c6604a948f
@@ -1,2 +1,2 @@
 insertdate=2008-01-01
-insertdate=2008-01-02
\ No newline at end of file
+insertdate=2008-01-02
diff --git a/sql/hive/src/test/resources/golden/alter2-5-75a213649242c2410ea6846f08c91d75 b/sql/hive/src/test/resources/golden/alter2-5-75a213649242c2410ea6846f08c91d75
index 15e6bc8823150..30f78a2dc1fb1 100644
--- a/sql/hive/src/test/resources/golden/alter2-5-75a213649242c2410ea6846f08c91d75
+++ b/sql/hive/src/test/resources/golden/alter2-5-75a213649242c2410ea6846f08c91d75
@@ -1 +1 @@
-insertdate=2008-01-01
\ No newline at end of file
+insertdate=2008-01-01
diff --git a/sql/hive/src/test/resources/golden/alter2-7-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-7-aac9c2c7033fd7264c9a107a88ff591
index 62a8ea1d115fb..4e4f6dc8ef5a7 100644
--- a/sql/hive/src/test/resources/golden/alter2-7-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-7-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728471, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728471}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-8-75a213649242c2410ea6846f08c91d75 b/sql/hive/src/test/resources/golden/alter2-8-75a213649242c2410ea6846f08c91d75
index d31318a383fc1..81b4ae118d12d 100644
--- a/sql/hive/src/test/resources/golden/alter2-8-75a213649242c2410ea6846f08c91d75
+++ b/sql/hive/src/test/resources/golden/alter2-8-75a213649242c2410ea6846f08c91d75
@@ -1,2 +1,2 @@
 insertdate=2008-01-01
-insertdate=2008-01-02
\ No newline at end of file
+insertdate=2008-01-02
diff --git a/sql/hive/src/test/resources/golden/alter5-1-b8349afaf8e62dc6608a889c04ee3d4b b/sql/hive/src/test/resources/golden/alter3-1-47f70392b97b94cdb425b25bde204f58
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter5-1-b8349afaf8e62dc6608a889c04ee3d4b
rename to sql/hive/src/test/resources/golden/alter3-1-47f70392b97b94cdb425b25bde204f58
diff --git a/sql/hive/src/test/resources/golden/alter3-12-2fcb7fc251f682a584ad513fddfac506 b/sql/hive/src/test/resources/golden/alter3-12-2fcb7fc251f682a584ad513fddfac506
index ba1746da5ce69..424f827129d71 100644
--- a/sql/hive/src/test/resources/golden/alter3-12-2fcb7fc251f682a584ad513fddfac506
+++ b/sql/hive/src/test/resources/golden/alter3-12-2fcb7fc251f682a584ad513fddfac506
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter3_like_renamed, dbName:default, owner:marmbrus, createTime:1389728495, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_like_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, last_modified_by=marmbrus, last_modified_time=1389728524, transient_lastDdlTime=1389728524, numRows=6, totalSize=171, rawDataSize=6}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter3_like_renamed, dbName:default, owner:marmbrus, createTime:1413871632, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_like_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871646, transient_lastDdlTime=1413871646}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter3-16-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/alter3-16-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter3-16-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/alter3-16-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter5-15-b8349afaf8e62dc6608a889c04ee3d4b b/sql/hive/src/test/resources/golden/alter3-21-91e32b3028ecc352dad8884829148311
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter5-15-b8349afaf8e62dc6608a889c04ee3d4b
rename to sql/hive/src/test/resources/golden/alter3-21-91e32b3028ecc352dad8884829148311
diff --git a/sql/hive/src/test/resources/golden/alter3-25-568a59760e5d3241b63d65cce595face b/sql/hive/src/test/resources/golden/alter3-25-568a59760e5d3241b63d65cce595face
index 8e609740f34a7..d1f5bd707fcb4 100644
--- a/sql/hive/src/test/resources/golden/alter3-25-568a59760e5d3241b63d65cce595face
+++ b/sql/hive/src/test/resources/golden/alter3-25-568a59760e5d3241b63d65cce595face
@@ -3,4 +3,4 @@
 3	test_part:	test_part:
 4	test_part:	test_part:
 5	test_part:	test_part:
-6	test_part:	test_part:
\ No newline at end of file
+6	test_part:	test_part:
diff --git a/sql/hive/src/test/resources/golden/alter3-27-54ad133b447f67c6d1ed7d4c43803a87 b/sql/hive/src/test/resources/golden/alter3-27-54ad133b447f67c6d1ed7d4c43803a87
index 0e12e0e166336..ba284b7d4300a 100644
--- a/sql/hive/src/test/resources/golden/alter3-27-54ad133b447f67c6d1ed7d4c43803a87
+++ b/sql/hive/src/test/resources/golden/alter3-27-54ad133b447f67c6d1ed7d4c43803a87
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter3_renamed, dbName:alter3_db, owner:marmbrus, createTime:1389728526, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_db.db/alter3_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, last_modified_by=marmbrus, last_modified_time=1389728544, transient_lastDdlTime=1389728544, numRows=6, totalSize=171, rawDataSize=6}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter3_renamed, dbName:alter3_db, owner:marmbrus, createTime:1413871647, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_db.db/alter3_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871654, transient_lastDdlTime=1413871654}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter3-28-5332228ea451105c897d0c8c3c8f2773 b/sql/hive/src/test/resources/golden/alter3-28-5332228ea451105c897d0c8c3c8f2773
index 4acb920388ceb..ffd81df5a7bf4 100644
--- a/sql/hive/src/test/resources/golden/alter3-28-5332228ea451105c897d0c8c3c8f2773
+++ b/sql/hive/src/test/resources/golden/alter3-28-5332228ea451105c897d0c8c3c8f2773
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[test_part:, test_part:], dbName:alter3_db, tableName:alter3_renamed, createTime:1389728543, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_db.db/alter3_renamed/pcol1=test_part%3A/pcol2=test_part%3A, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1389728544, numRows=6, totalSize=171, rawDataSize=6})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[test_part:, test_part:], dbName:alter3_db, tableName:alter3_renamed, createTime:1413871654, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_db.db/alter3_renamed/pcol1=test_part%3A/pcol2=test_part%3A, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413871654, COLUMN_STATS_ACCURATE=true, totalSize=171, numRows=6, rawDataSize=6})	
diff --git a/sql/hive/src/test/resources/golden/alter3-29-b8fba19b9009131caffbb5fe7468b67c b/sql/hive/src/test/resources/golden/alter3-29-b8fba19b9009131caffbb5fe7468b67c
index 8e609740f34a7..d1f5bd707fcb4 100644
--- a/sql/hive/src/test/resources/golden/alter3-29-b8fba19b9009131caffbb5fe7468b67c
+++ b/sql/hive/src/test/resources/golden/alter3-29-b8fba19b9009131caffbb5fe7468b67c
@@ -3,4 +3,4 @@
 3	test_part:	test_part:
 4	test_part:	test_part:
 5	test_part:	test_part:
-6	test_part:	test_part:
\ No newline at end of file
+6	test_part:	test_part:
diff --git a/sql/hive/src/test/resources/golden/alter3-32-327744965ee8ed630f56fa3e4a3c5c65 b/sql/hive/src/test/resources/golden/alter3-32-327744965ee8ed630f56fa3e4a3c5c65
index 12b038204bef4..5f38c1fce1523 100644
--- a/sql/hive/src/test/resources/golden/alter3-32-327744965ee8ed630f56fa3e4a3c5c65
+++ b/sql/hive/src/test/resources/golden/alter3-32-327744965ee8ed630f56fa3e4a3c5c65
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter3_like_renamed, dbName:alter3_db, owner:marmbrus, createTime:1389728527, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_db.db/alter3_like_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, last_modified_by=marmbrus, last_modified_time=1389728560, transient_lastDdlTime=1389728560, numRows=6, totalSize=171, rawDataSize=6}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter3_like_renamed, dbName:alter3_db, owner:marmbrus, createTime:1413871647, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_db.db/alter3_like_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871661, transient_lastDdlTime=1413871661}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter3-5-bf2a8fd1884bb584059c848332e30c97 b/sql/hive/src/test/resources/golden/alter3-5-bf2a8fd1884bb584059c848332e30c97
index 8e609740f34a7..d1f5bd707fcb4 100644
--- a/sql/hive/src/test/resources/golden/alter3-5-bf2a8fd1884bb584059c848332e30c97
+++ b/sql/hive/src/test/resources/golden/alter3-5-bf2a8fd1884bb584059c848332e30c97
@@ -3,4 +3,4 @@
 3	test_part:	test_part:
 4	test_part:	test_part:
 5	test_part:	test_part:
-6	test_part:	test_part:
\ No newline at end of file
+6	test_part:	test_part:
diff --git a/sql/hive/src/test/resources/golden/alter3-7-30be5698ca15c1fd836686e7ad48ad8 b/sql/hive/src/test/resources/golden/alter3-7-30be5698ca15c1fd836686e7ad48ad8
index 0c47fb0ac64aa..f4a4878f5ac17 100644
--- a/sql/hive/src/test/resources/golden/alter3-7-30be5698ca15c1fd836686e7ad48ad8
+++ b/sql/hive/src/test/resources/golden/alter3-7-30be5698ca15c1fd836686e7ad48ad8
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter3_renamed, dbName:default, owner:marmbrus, createTime:1389728495, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, last_modified_by=marmbrus, last_modified_time=1389728509, transient_lastDdlTime=1389728509, numRows=6, totalSize=171, rawDataSize=6}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter3_renamed, dbName:default, owner:marmbrus, createTime:1413871632, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871639, transient_lastDdlTime=1413871639}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter3-8-8f0a466bd1d021e40690865b7ae52a43 b/sql/hive/src/test/resources/golden/alter3-8-8f0a466bd1d021e40690865b7ae52a43
index 1165de3a92fd0..658503a751829 100644
--- a/sql/hive/src/test/resources/golden/alter3-8-8f0a466bd1d021e40690865b7ae52a43
+++ b/sql/hive/src/test/resources/golden/alter3-8-8f0a466bd1d021e40690865b7ae52a43
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[test_part:, test_part:], dbName:default, tableName:alter3_renamed, createTime:1389728508, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_renamed/pcol1=test_part%3A/pcol2=test_part%3A, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1389728508, numRows=6, totalSize=171, rawDataSize=6})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[test_part:, test_part:], dbName:default, tableName:alter3_renamed, createTime:1413871639, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_renamed/pcol1=test_part%3A/pcol2=test_part%3A, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413871639, COLUMN_STATS_ACCURATE=true, totalSize=171, numRows=6, rawDataSize=6})	
diff --git a/sql/hive/src/test/resources/golden/alter3-9-b8a2a12aeddb84f56c7a1f6346bae3d2 b/sql/hive/src/test/resources/golden/alter3-9-b8a2a12aeddb84f56c7a1f6346bae3d2
index 8e609740f34a7..d1f5bd707fcb4 100644
--- a/sql/hive/src/test/resources/golden/alter3-9-b8a2a12aeddb84f56c7a1f6346bae3d2
+++ b/sql/hive/src/test/resources/golden/alter3-9-b8a2a12aeddb84f56c7a1f6346bae3d2
@@ -3,4 +3,4 @@
 3	test_part:	test_part:
 4	test_part:	test_part:
 5	test_part:	test_part:
-6	test_part:	test_part:
\ No newline at end of file
+6	test_part:	test_part:
diff --git a/sql/hive/src/test/resources/golden/alter4-1-7ead71f9870ae36dd1cb50b51b41fad7 b/sql/hive/src/test/resources/golden/alter4-1-7ead71f9870ae36dd1cb50b51b41fad7
index 8d2aebeb4d29d..ab967d6b42956 100644
--- a/sql/hive/src/test/resources/golden/alter4-1-7ead71f9870ae36dd1cb50b51b41fad7
+++ b/sql/hive/src/test/resources/golden/alter4-1-7ead71f9870ae36dd1cb50b51b41fad7
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:set_bucketing_test, dbName:default, owner:marmbrus, createTime:1389735344, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5432448755546164619/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389735344}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:set_bucketing_test, dbName:default, owner:marmbrus, createTime:1413871664, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871664}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter4-10-7ead71f9870ae36dd1cb50b51b41fad7 b/sql/hive/src/test/resources/golden/alter4-10-7ead71f9870ae36dd1cb50b51b41fad7
index cf67a009ff291..9fafeb10086b2 100644
--- a/sql/hive/src/test/resources/golden/alter4-10-7ead71f9870ae36dd1cb50b51b41fad7
+++ b/sql/hive/src/test/resources/golden/alter4-10-7ead71f9870ae36dd1cb50b51b41fad7
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:set_bucketing_test, dbName:alter4_db, owner:marmbrus, createTime:1389735347, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5432448755546164619/alter4_db.db/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389735347}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:set_bucketing_test, dbName:alter4_db, owner:marmbrus, createTime:1413871664, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter4_db.db/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871664}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter4-12-7ead71f9870ae36dd1cb50b51b41fad7 b/sql/hive/src/test/resources/golden/alter4-12-7ead71f9870ae36dd1cb50b51b41fad7
index a6375189a7a23..576e37d08cd09 100644
--- a/sql/hive/src/test/resources/golden/alter4-12-7ead71f9870ae36dd1cb50b51b41fad7
+++ b/sql/hive/src/test/resources/golden/alter4-12-7ead71f9870ae36dd1cb50b51b41fad7
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:set_bucketing_test, dbName:alter4_db, owner:marmbrus, createTime:1389735347, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5432448755546164619/alter4_db.db/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1389735348, transient_lastDdlTime=1389735348}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:set_bucketing_test, dbName:alter4_db, owner:marmbrus, createTime:1413871664, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter4_db.db/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871664, transient_lastDdlTime=1413871664, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter4-16-549981e00a3d95f03dd5a9ef6044aa20 b/sql/hive/src/test/resources/golden/alter4-16-549981e00a3d95f03dd5a9ef6044aa20
index 331d858ce9b12..4ad96d51599fb 100644
--- a/sql/hive/src/test/resources/golden/alter4-16-549981e00a3d95f03dd5a9ef6044aa20
+++ b/sql/hive/src/test/resources/golden/alter4-16-549981e00a3d95f03dd5a9ef6044aa20
@@ -1 +1 @@
-default
\ No newline at end of file
+default
diff --git a/sql/hive/src/test/resources/golden/alter4-3-7ead71f9870ae36dd1cb50b51b41fad7 b/sql/hive/src/test/resources/golden/alter4-3-7ead71f9870ae36dd1cb50b51b41fad7
index 6b79a37a85f0c..0ed971efdf450 100644
--- a/sql/hive/src/test/resources/golden/alter4-3-7ead71f9870ae36dd1cb50b51b41fad7
+++ b/sql/hive/src/test/resources/golden/alter4-3-7ead71f9870ae36dd1cb50b51b41fad7
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:set_bucketing_test, dbName:default, owner:marmbrus, createTime:1389735344, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5432448755546164619/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1389735345, transient_lastDdlTime=1389735345}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:set_bucketing_test, dbName:default, owner:marmbrus, createTime:1413871664, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871664, transient_lastDdlTime=1413871664, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter4-5-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/alter4-5-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter4-5-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/alter4-5-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-3-cfef140167765d259320ed1e8aba718d b/sql/hive/src/test/resources/golden/alter5-1-cbad54fbd4a08fc8717708f93358ec3e
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_merge_2-3-cfef140167765d259320ed1e8aba718d
rename to sql/hive/src/test/resources/golden/alter5-1-cbad54fbd4a08fc8717708f93358ec3e
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-4-47aa9e3236da627ab505a6bd01e563be b/sql/hive/src/test/resources/golden/alter5-15-cbad54fbd4a08fc8717708f93358ec3e
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_merge_2-4-47aa9e3236da627ab505a6bd01e563be
rename to sql/hive/src/test/resources/golden/alter5-15-cbad54fbd4a08fc8717708f93358ec3e
diff --git a/sql/hive/src/test/resources/golden/alter5-18-2a9c8219c1468a1cf0534c665d1fcebf b/sql/hive/src/test/resources/golden/alter5-18-2a9c8219c1468a1cf0534c665d1fcebf
index e180d4c53ae73..b6aa69a915a6e 100644
--- a/sql/hive/src/test/resources/golden/alter5-18-2a9c8219c1468a1cf0534c665d1fcebf
+++ b/sql/hive/src/test/resources/golden/alter5-18-2a9c8219c1468a1cf0534c665d1fcebf
@@ -1,9 +1,9 @@
-col1                	string              	None                
-dt                  	string              	None                
+col1                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[a], dbName:alter5_db, tableName:alter5, createTime:1390897177, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/alter5_db.db/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1390897177})	
+Detailed Partition Information	Partition(values:[a], dbName:alter5_db, tableName:alter5, createTime:1413871673, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter5_db.db/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871673})	
diff --git a/sql/hive/src/test/resources/golden/alter5-21-2a9c8219c1468a1cf0534c665d1fcebf b/sql/hive/src/test/resources/golden/alter5-21-2a9c8219c1468a1cf0534c665d1fcebf
index 3d4ba2636ab66..1c321d5287472 100644
--- a/sql/hive/src/test/resources/golden/alter5-21-2a9c8219c1468a1cf0534c665d1fcebf
+++ b/sql/hive/src/test/resources/golden/alter5-21-2a9c8219c1468a1cf0534c665d1fcebf
@@ -1,9 +1,9 @@
-col1                	string              	None                
-dt                  	string              	None                
+col1                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[a], dbName:alter5_db, tableName:alter5, createTime:1390897177, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/alter5_db.db/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390897186, numRows=6, totalSize=12, rawDataSize=6})	
+Detailed Partition Information	Partition(values:[a], dbName:alter5_db, tableName:alter5, createTime:1413871673, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter5_db.db/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413871680, COLUMN_STATS_ACCURATE=true, totalSize=12, numRows=6, rawDataSize=6})	
diff --git a/sql/hive/src/test/resources/golden/alter5-4-2a9c8219c1468a1cf0534c665d1fcebf b/sql/hive/src/test/resources/golden/alter5-4-2a9c8219c1468a1cf0534c665d1fcebf
index 6669b628fc224..110ee88b930d1 100644
--- a/sql/hive/src/test/resources/golden/alter5-4-2a9c8219c1468a1cf0534c665d1fcebf
+++ b/sql/hive/src/test/resources/golden/alter5-4-2a9c8219c1468a1cf0534c665d1fcebf
@@ -1,9 +1,9 @@
-col1                	string              	None                
-dt                  	string              	None                
+col1                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[a], dbName:default, tableName:alter5, createTime:1390897166, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1390897166})	
+Detailed Partition Information	Partition(values:[a], dbName:default, tableName:alter5, createTime:1413871665, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871665})	
diff --git a/sql/hive/src/test/resources/golden/alter5-7-2a9c8219c1468a1cf0534c665d1fcebf b/sql/hive/src/test/resources/golden/alter5-7-2a9c8219c1468a1cf0534c665d1fcebf
index e866ae0fa7654..e7245656fec6c 100644
--- a/sql/hive/src/test/resources/golden/alter5-7-2a9c8219c1468a1cf0534c665d1fcebf
+++ b/sql/hive/src/test/resources/golden/alter5-7-2a9c8219c1468a1cf0534c665d1fcebf
@@ -1,9 +1,9 @@
-col1                	string              	None                
-dt                  	string              	None                
+col1                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[a], dbName:default, tableName:alter5, createTime:1390897166, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390897176, numRows=6, totalSize=12, rawDataSize=6})	
+Detailed Partition Information	Partition(values:[a], dbName:default, tableName:alter5, createTime:1413871665, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413871672, COLUMN_STATS_ACCURATE=true, totalSize=12, numRows=6, rawDataSize=6})	
diff --git a/sql/hive/src/test/resources/golden/alter_index-2-f36cb2eed39691ca949b25182e2dd31 b/sql/hive/src/test/resources/golden/alter_index-2-f36cb2eed39691ca949b25182e2dd31
index 2860d5c6b4cfb..1bfdd079010aa 100644
--- a/sql/hive/src/test/resources/golden/alter_index-2-f36cb2eed39691ca949b25182e2dd31
+++ b/sql/hive/src/test/resources/golden/alter_index-2-f36cb2eed39691ca949b25182e2dd31
@@ -1,5 +1,5 @@
-key                 	int                 	None                
+key                 	int                 	                    
 _bucketname         	string              	                    
 _offsets            	array<bigint>       	                    
 	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_8__, dbName:default, owner:null, createTime:1389733869, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2023038695216118221/default__src_src_index_8__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389733869}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:default__src_src_index_8__, dbName:default, owner:marmbrus, createTime:1413871682, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/default__src_src_index_8__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871682}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_index-4-f36cb2eed39691ca949b25182e2dd31 b/sql/hive/src/test/resources/golden/alter_index-4-f36cb2eed39691ca949b25182e2dd31
index 2860d5c6b4cfb..1bfdd079010aa 100644
--- a/sql/hive/src/test/resources/golden/alter_index-4-f36cb2eed39691ca949b25182e2dd31
+++ b/sql/hive/src/test/resources/golden/alter_index-4-f36cb2eed39691ca949b25182e2dd31
@@ -1,5 +1,5 @@
-key                 	int                 	None                
+key                 	int                 	                    
 _bucketname         	string              	                    
 _offsets            	array<bigint>       	                    
 	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_8__, dbName:default, owner:null, createTime:1389733869, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2023038695216118221/default__src_src_index_8__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389733869}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:default__src_src_index_8__, dbName:default, owner:marmbrus, createTime:1413871682, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/default__src_src_index_8__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871682}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_index-6-489b4ceb2f4301a7132628303f99240d b/sql/hive/src/test/resources/golden/alter_index-6-489b4ceb2f4301a7132628303f99240d
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter_index-6-489b4ceb2f4301a7132628303f99240d
+++ b/sql/hive/src/test/resources/golden/alter_index-6-489b4ceb2f4301a7132628303f99240d
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-10-69fe9bb96263a49b9cca70cea7eb57e1 b/sql/hive/src/test/resources/golden/alter_merge_2-10-69fe9bb96263a49b9cca70cea7eb57e1
index c80ef36c96ad4..136628776692b 100644
--- a/sql/hive/src/test/resources/golden/alter_merge_2-10-69fe9bb96263a49b9cca70cea7eb57e1
+++ b/sql/hive/src/test/resources/golden/alter_merge_2-10-69fe9bb96263a49b9cca70cea7eb57e1
@@ -1 +1 @@
-754	-7678496319
\ No newline at end of file
+754	-7678496319
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-2-bf243aa10b608872b9e8286f89c5ff30 b/sql/hive/src/test/resources/golden/alter_merge_2-2-bf243aa10b608872b9e8286f89c5ff30
index 618c1d01b726d..d32a641c0050e 100644
--- a/sql/hive/src/test/resources/golden/alter_merge_2-2-bf243aa10b608872b9e8286f89c5ff30
+++ b/sql/hive/src/test/resources/golden/alter_merge_2-2-bf243aa10b608872b9e8286f89c5ff30
@@ -1,12 +1,12 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
-ts                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-ts                  	string              	None                
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2012-01-03, 2012-01-03+14:46:31], dbName:default, tableName:src_rc_merge_test_part, createTime:1389728902, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/src_rc_merge_test_part/ds=2012-01-03/ts=2012-01-03+14%3A46%3A31, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1389728902})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2012-01-03, 2012-01-03+14:46:31], dbName:default, tableName:src_rc_merge_test_part, createTime:1413930366, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:ts, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6310217467672453297/src_rc_merge_test_part/ds=2012-01-03/ts=2012-01-03+14%3A46%3A31, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413930366})	
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-5-43bd090cda258e25037e7c32d500a85f b/sql/hive/src/test/resources/golden/alter_merge_2-3-bc9bb363f9a2026cfc70a31bb4551352
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_merge_2-5-43bd090cda258e25037e7c32d500a85f
rename to sql/hive/src/test/resources/golden/alter_merge_2-3-bc9bb363f9a2026cfc70a31bb4551352
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-0-76649a6f1ff2f3ffa8310c34f0438e3a b/sql/hive/src/test/resources/golden/alter_merge_2-4-d3bf7703ba00cf7c40f2a2dbb8ca7224
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-0-76649a6f1ff2f3ffa8310c34f0438e3a
rename to sql/hive/src/test/resources/golden/alter_merge_2-4-d3bf7703ba00cf7c40f2a2dbb8ca7224
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-1-b9bb847419afb2b70d0cb887894f0b90 b/sql/hive/src/test/resources/golden/alter_merge_2-5-6319bf26f3739260b1a77e2ea89ef147
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-1-b9bb847419afb2b70d0cb887894f0b90
rename to sql/hive/src/test/resources/golden/alter_merge_2-5-6319bf26f3739260b1a77e2ea89ef147
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-6-f2eeb518a957cece4250cc7558839e02 b/sql/hive/src/test/resources/golden/alter_merge_2-6-f2eeb518a957cece4250cc7558839e02
index 3f10ffe7a4c47..60d3b2f4a4cd5 100644
--- a/sql/hive/src/test/resources/golden/alter_merge_2-6-f2eeb518a957cece4250cc7558839e02
+++ b/sql/hive/src/test/resources/golden/alter_merge_2-6-f2eeb518a957cece4250cc7558839e02
@@ -1 +1 @@
-15
\ No newline at end of file
+15
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-7-69fe9bb96263a49b9cca70cea7eb57e1 b/sql/hive/src/test/resources/golden/alter_merge_2-7-69fe9bb96263a49b9cca70cea7eb57e1
index c80ef36c96ad4..136628776692b 100644
--- a/sql/hive/src/test/resources/golden/alter_merge_2-7-69fe9bb96263a49b9cca70cea7eb57e1
+++ b/sql/hive/src/test/resources/golden/alter_merge_2-7-69fe9bb96263a49b9cca70cea7eb57e1
@@ -1 +1 @@
-754	-7678496319
\ No newline at end of file
+754	-7678496319
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-9-f2eeb518a957cece4250cc7558839e02 b/sql/hive/src/test/resources/golden/alter_merge_2-9-f2eeb518a957cece4250cc7558839e02
index 3f10ffe7a4c47..60d3b2f4a4cd5 100644
--- a/sql/hive/src/test/resources/golden/alter_merge_2-9-f2eeb518a957cece4250cc7558839e02
+++ b/sql/hive/src/test/resources/golden/alter_merge_2-9-f2eeb518a957cece4250cc7558839e02
@@ -1 +1 @@
-15
\ No newline at end of file
+15
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-12-84807e0be3e91250d4b8f2dc7938a256 b/sql/hive/src/test/resources/golden/alter_partition_coltype-12-84807e0be3e91250d4b8f2dc7938a256
deleted file mode 100644
index 410b14d2ce6f9..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-12-84807e0be3e91250d4b8f2dc7938a256
+++ /dev/null
@@ -1 +0,0 @@
-25
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-14-fdad866a990591083719bd45e4113f58 b/sql/hive/src/test/resources/golden/alter_partition_coltype-14-fdad866a990591083719bd45e4113f58
deleted file mode 100644
index 410b14d2ce6f9..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-14-fdad866a990591083719bd45e4113f58
+++ /dev/null
@@ -1 +0,0 @@
-25
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-16-b0534980e325d1fee63c2636ad3f8a4e b/sql/hive/src/test/resources/golden/alter_partition_coltype-16-b0534980e325d1fee63c2636ad3f8a4e
deleted file mode 100644
index b28ab5ccf8a1b..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-16-b0534980e325d1fee63c2636ad3f8a4e
+++ /dev/null
@@ -1,75 +0,0 @@
-238	val_238	10	3.0
-NULL		10	3.0
-311	val_311	10	3.0
-NULL	val_27	10	3.0
-NULL	val_165	10	3.0
-NULL	val_409	10	3.0
-255	val_255	10	3.0
-278	val_278	10	3.0
-98	val_98	10	3.0
-NULL	val_484	10	3.0
-NULL	val_265	10	3.0
-NULL	val_193	10	3.0
-401	val_401	10	3.0
-150	val_150	10	3.0
-273	val_273	10	3.0
-224		10	3.0
-369		10	3.0
-66	val_66	10	3.0
-128		10	3.0
-213	val_213	10	3.0
-146	val_146	10	3.0
-406	val_406	10	3.0
-NULL		10	3.0
-NULL		10	3.0
-NULL		10	3.0
-238	val_238	100x	3.0
-NULL		100x	3.0
-311	val_311	100x	3.0
-NULL	val_27	100x	3.0
-NULL	val_165	100x	3.0
-NULL	val_409	100x	3.0
-255	val_255	100x	3.0
-278	val_278	100x	3.0
-98	val_98	100x	3.0
-NULL	val_484	100x	3.0
-NULL	val_265	100x	3.0
-NULL	val_193	100x	3.0
-401	val_401	100x	3.0
-150	val_150	100x	3.0
-273	val_273	100x	3.0
-224		100x	3.0
-369		100x	3.0
-66	val_66	100x	3.0
-128		100x	3.0
-213	val_213	100x	3.0
-146	val_146	100x	3.0
-406	val_406	100x	3.0
-NULL		100x	3.0
-NULL		100x	3.0
-NULL		100x	3.0
-238	val_238	100x	6:30pm
-NULL		100x	6:30pm
-311	val_311	100x	6:30pm
-NULL	val_27	100x	6:30pm
-NULL	val_165	100x	6:30pm
-NULL	val_409	100x	6:30pm
-255	val_255	100x	6:30pm
-278	val_278	100x	6:30pm
-98	val_98	100x	6:30pm
-NULL	val_484	100x	6:30pm
-NULL	val_265	100x	6:30pm
-NULL	val_193	100x	6:30pm
-401	val_401	100x	6:30pm
-150	val_150	100x	6:30pm
-273	val_273	100x	6:30pm
-224		100x	6:30pm
-369		100x	6:30pm
-66	val_66	100x	6:30pm
-128		100x	6:30pm
-213	val_213	100x	6:30pm
-146	val_146	100x	6:30pm
-406	val_406	100x	6:30pm
-NULL		100x	6:30pm
-NULL		100x	6:30pm
-NULL		100x	6:30pm
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-18-fbdd117c3b1ec9c92c7c33d52d94e42c b/sql/hive/src/test/resources/golden/alter_partition_coltype-18-fbdd117c3b1ec9c92c7c33d52d94e42c
deleted file mode 100644
index c5b431b6cba29..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-18-fbdd117c3b1ec9c92c7c33d52d94e42c
+++ /dev/null
@@ -1 +0,0 @@
-50
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-19-63799ee74ccc42d9bb817e6d00a6fae3 b/sql/hive/src/test/resources/golden/alter_partition_coltype-19-63799ee74ccc42d9bb817e6d00a6fae3
deleted file mode 100644
index a76c74dcec6ab..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-19-63799ee74ccc42d9bb817e6d00a6fae3
+++ /dev/null
@@ -1 +0,0 @@
-75
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-2-17e04afbb81a724fe8c47c07b642f9a b/sql/hive/src/test/resources/golden/alter_partition_coltype-2-17e04afbb81a724fe8c47c07b642f9a
deleted file mode 100644
index 316ca7f65ba20..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-2-17e04afbb81a724fe8c47c07b642f9a
+++ /dev/null
@@ -1,10 +0,0 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
-ts                  	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-dt                  	string              	None                
-ts                  	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-21-17e04afbb81a724fe8c47c07b642f9a b/sql/hive/src/test/resources/golden/alter_partition_coltype-21-17e04afbb81a724fe8c47c07b642f9a
deleted file mode 100644
index a7382fabfcb49..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-21-17e04afbb81a724fe8c47c07b642f9a
+++ /dev/null
@@ -1,10 +0,0 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
-ts                  	double              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-dt                  	string              	None                
-ts                  	double              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-22-639cdccdea5f4b1863f9b17b04183c93 b/sql/hive/src/test/resources/golden/alter_partition_coltype-22-639cdccdea5f4b1863f9b17b04183c93
deleted file mode 100644
index a7382fabfcb49..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-22-639cdccdea5f4b1863f9b17b04183c93
+++ /dev/null
@@ -1,10 +0,0 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
-ts                  	double              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-dt                  	string              	None                
-ts                  	double              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-23-325be9a5d7c0277498a139c0a9fdb26a b/sql/hive/src/test/resources/golden/alter_partition_coltype-23-325be9a5d7c0277498a139c0a9fdb26a
deleted file mode 100644
index a7382fabfcb49..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-23-325be9a5d7c0277498a139c0a9fdb26a
+++ /dev/null
@@ -1,10 +0,0 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
-ts                  	double              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-dt                  	string              	None                
-ts                  	double              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-24-71de9e476503e83f82baf1ab17da87f6 b/sql/hive/src/test/resources/golden/alter_partition_coltype-24-71de9e476503e83f82baf1ab17da87f6
deleted file mode 100644
index a7382fabfcb49..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-24-71de9e476503e83f82baf1ab17da87f6
+++ /dev/null
@@ -1,10 +0,0 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
-ts                  	double              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-dt                  	string              	None                
-ts                  	double              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-3-179315b6f54dc18e6eeffd7aaa947fa5 b/sql/hive/src/test/resources/golden/alter_partition_coltype-3-179315b6f54dc18e6eeffd7aaa947fa5
deleted file mode 100644
index 410b14d2ce6f9..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-3-179315b6f54dc18e6eeffd7aaa947fa5
+++ /dev/null
@@ -1 +0,0 @@
-25
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-6-db84ad9022cdc1828c24a0340321c8fd b/sql/hive/src/test/resources/golden/alter_partition_coltype-6-db84ad9022cdc1828c24a0340321c8fd
deleted file mode 100644
index 410b14d2ce6f9..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-6-db84ad9022cdc1828c24a0340321c8fd
+++ /dev/null
@@ -1 +0,0 @@
-25
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-8-42a70ae131fbb834c79074fdbd7beea0 b/sql/hive/src/test/resources/golden/alter_partition_coltype-8-42a70ae131fbb834c79074fdbd7beea0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-8-42a70ae131fbb834c79074fdbd7beea0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-1-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-1-30348eedd3afb892ac9d825dd7fdb5d8
index db182e444d31d..11487abed2b60 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-1-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-1-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1388805891}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871688, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871688}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-11-fe39b84ddc86b6bf042dc30c1b612321 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-11-fe39b84ddc86b6bf042dc30c1b612321
index 81e23f0bc1951..979969dcbfd3f 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-11-fe39b84ddc86b6bf042dc30c1b612321
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-11-fe39b84ddc86b6bf042dc30c1b612321
@@ -1,10 +1,10 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1388805891, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test/ds=2010, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388805891, transient_lastDdlTime=1388805891})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1413871689, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test/ds=2010, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871689, transient_lastDdlTime=1413871689, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-13-fe39b84ddc86b6bf042dc30c1b612321 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-13-fe39b84ddc86b6bf042dc30c1b612321
index 6dc1f3ca2c187..7e14edcdead2e 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-13-fe39b84ddc86b6bf042dc30c1b612321
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-13-fe39b84ddc86b6bf042dc30c1b612321
@@ -1,10 +1,10 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1388805891, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/test/test/ds=2010, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388805891, transient_lastDdlTime=1388805891})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1413871689, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/test/test/ds=2010, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871689, transient_lastDdlTime=1413871689, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-14-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-14-30348eedd3afb892ac9d825dd7fdb5d8
index 4b754043d63ab..77a764a814eb9 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-14-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-14-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,10 +1,10 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1388805891}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871689, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871689}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-16-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-16-30348eedd3afb892ac9d825dd7fdb5d8
index f44c28ee36760..c8606b1acad0c 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-16-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-16-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,10 +1,10 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388805892, transient_lastDdlTime=1388805892}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871689, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871689, transient_lastDdlTime=1413871689}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-18-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-18-30348eedd3afb892ac9d825dd7fdb5d8
index e739ad4992ec9..59922d3b7a086 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-18-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-18-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,10 +1,10 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/test/test/, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388805892, transient_lastDdlTime=1388805892}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871689, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/test/test/, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871689, transient_lastDdlTime=1413871689}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-3-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-3-30348eedd3afb892ac9d825dd7fdb5d8
index 092e5ed6e8a46..45ef755539479 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-3-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-3-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,4 +1,4 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388805891, transient_lastDdlTime=1388805891}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871688, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871688, transient_lastDdlTime=1413871688, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-5-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-5-30348eedd3afb892ac9d825dd7fdb5d8
index 197e67d09bf49..d6804307f3dc7 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-5-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-5-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,4 +1,4 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/test/test/, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388805891, transient_lastDdlTime=1388805891}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871688, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/test/test/, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871688, transient_lastDdlTime=1413871688, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-9-fe39b84ddc86b6bf042dc30c1b612321 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-9-fe39b84ddc86b6bf042dc30c1b612321
index 29b5b693b8589..77ba51afd2468 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-9-fe39b84ddc86b6bf042dc30c1b612321
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-9-fe39b84ddc86b6bf042dc30c1b612321
@@ -1,10 +1,10 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1388805891, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test/ds=2010, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1388805891})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1413871689, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test/ds=2010, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871689})	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-10-aa739a81271c760437de572a6c951eb9 b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-1-e11f1476d2f30ab7a83f95013809f9e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-10-aa739a81271c760437de572a6c951eb9
rename to sql/hive/src/test/resources/golden/alter_partition_protect_mode-1-e11f1476d2f30ab7a83f95013809f9e6
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-11-94b98dca970e36e6d4d65a795c9413d6 b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-2-29b4b67965f24c40bcc426043efb892d
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-11-94b98dca970e36e6d4d65a795c9413d6
rename to sql/hive/src/test/resources/golden/alter_partition_protect_mode-2-29b4b67965f24c40bcc426043efb892d
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-13-60ab4c242e19b882e4b1571544311e7e b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-3-2100fc7853625d8c7dad5c0ee70d4690
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-13-60ab4c242e19b882e4b1571544311e7e
rename to sql/hive/src/test/resources/golden/alter_partition_protect_mode-3-2100fc7853625d8c7dad5c0ee70d4690
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-15-634775da8cebfb8ce45b3965a0ae2880 b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-4-c3fa911536c75e87febf2874e7889879
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-15-634775da8cebfb8ce45b3965a0ae2880
rename to sql/hive/src/test/resources/golden/alter_partition_protect_mode-4-c3fa911536c75e87febf2874e7889879
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-6-beb03691c7cc6cf1597d3ff16ef98d17 b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-6-beb03691c7cc6cf1597d3ff16ef98d17
index 02d72f4292749..c1b66aba46e84 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-6-beb03691c7cc6cf1597d3ff16ef98d17
+++ b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-6-beb03691c7cc6cf1597d3ff16ef98d17
@@ -9,4 +9,4 @@
 3	13	1996	12
 7	17	1996	12
 8	18	1996	12
-8	28	1996	12
\ No newline at end of file
+8	28	1996	12
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-9-d1b12be1d01eabaf244f41e74d902d9d b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-9-d1b12be1d01eabaf244f41e74d902d9d
index bca47334cedaa..12ec4459dc167 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-9-d1b12be1d01eabaf244f41e74d902d9d
+++ b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-9-d1b12be1d01eabaf244f41e74d902d9d
@@ -3,4 +3,4 @@
 3	13	1995	09
 7	17	1995	09
 8	18	1995	09
-8	28	1995	09
\ No newline at end of file
+8	28	1995	09
diff --git a/sql/hive/src/test/resources/golden/alter_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196 b/sql/hive/src/test/resources/golden/alter_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
+++ b/sql/hive/src/test/resources/golden/alter_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-11-bc84e38ccad173f5b47474d91db244d7 b/sql/hive/src/test/resources/golden/alter_rename_partition-11-bc84e38ccad173f5b47474d91db244d7
index e881b2a4b74eb..605b17f1d8ceb 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-11-bc84e38ccad173f5b47474d91db244d7
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-11-bc84e38ccad173f5b47474d91db244d7
@@ -3,4 +3,4 @@
 3	new_part1:	new_part2:
 4	new_part1:	new_part2:
 5	new_part1:	new_part2:
-6	new_part1:	new_part2:
\ No newline at end of file
+6	new_part1:	new_part2:
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-14-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/alter_rename_partition-14-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-14-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-14-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-17-13c0443789a696bde8d08c05f526643f b/sql/hive/src/test/resources/golden/alter_rename_partition-19-d862c00e5a37ae841a6c8ec6c5d68e8c
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-17-13c0443789a696bde8d08c05f526643f
rename to sql/hive/src/test/resources/golden/alter_rename_partition-19-d862c00e5a37ae841a6c8ec6c5d68e8c
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-2-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/alter_rename_partition-2-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-2-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-2-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-22-d50111b57d14f1ded1c47c773b0e0ac2 b/sql/hive/src/test/resources/golden/alter_rename_partition-22-d50111b57d14f1ded1c47c773b0e0ac2
index 684f1da0fd0a6..6105de433830c 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-22-d50111b57d14f1ded1c47c773b0e0ac2
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-22-d50111b57d14f1ded1c47c773b0e0ac2
@@ -3,4 +3,4 @@
 3	old_part1:	old_part2:
 4	old_part1:	old_part2:
 5	old_part1:	old_part2:
-6	old_part1:	old_part2:
\ No newline at end of file
+6	old_part1:	old_part2:
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-24-21dd05d56ebba285a8eb5bde5904d6a3 b/sql/hive/src/test/resources/golden/alter_rename_partition-24-21dd05d56ebba285a8eb5bde5904d6a3
index fc31ec62a1280..fb196b30f4478 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-24-21dd05d56ebba285a8eb5bde5904d6a3
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-24-21dd05d56ebba285a8eb5bde5904d6a3
@@ -1 +1 @@
-pcol1=new_part1%3A/pcol2=new_part2%3A
\ No newline at end of file
+pcol1=new_part1%3A/pcol2=new_part2%3A
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-26-270655c514bb6f04acd4459df52dd77b b/sql/hive/src/test/resources/golden/alter_rename_partition-26-270655c514bb6f04acd4459df52dd77b
index e881b2a4b74eb..605b17f1d8ceb 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-26-270655c514bb6f04acd4459df52dd77b
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-26-270655c514bb6f04acd4459df52dd77b
@@ -3,4 +3,4 @@
 3	new_part1:	new_part2:
 4	new_part1:	new_part2:
 5	new_part1:	new_part2:
-6	new_part1:	new_part2:
\ No newline at end of file
+6	new_part1:	new_part2:
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-20-b4411edb9c52a474a971640f037c8a30 b/sql/hive/src/test/resources/golden/alter_rename_partition-4-833254c8f7c5b1248e1a249560652627
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-20-b4411edb9c52a474a971640f037c8a30
rename to sql/hive/src/test/resources/golden/alter_rename_partition-4-833254c8f7c5b1248e1a249560652627
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-7-e3d9a36d53d30de215b855095c58d0d7 b/sql/hive/src/test/resources/golden/alter_rename_partition-7-e3d9a36d53d30de215b855095c58d0d7
index 684f1da0fd0a6..6105de433830c 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-7-e3d9a36d53d30de215b855095c58d0d7
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-7-e3d9a36d53d30de215b855095c58d0d7
@@ -3,4 +3,4 @@
 3	old_part1:	old_part2:
 4	old_part1:	old_part2:
 5	old_part1:	old_part2:
-6	old_part1:	old_part2:
\ No newline at end of file
+6	old_part1:	old_part2:
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-9-21dd05d56ebba285a8eb5bde5904d6a3 b/sql/hive/src/test/resources/golden/alter_rename_partition-9-21dd05d56ebba285a8eb5bde5904d6a3
index fc31ec62a1280..fb196b30f4478 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-9-21dd05d56ebba285a8eb5bde5904d6a3
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-9-21dd05d56ebba285a8eb5bde5904d6a3
@@ -1 +1 @@
-pcol1=new_part1%3A/pcol2=new_part2%3A
\ No newline at end of file
+pcol1=new_part1%3A/pcol2=new_part2%3A
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-1-5bc931a540f0fec54e852ff10f52f879 b/sql/hive/src/test/resources/golden/alter_table_serde-1-5bc931a540f0fec54e852ff10f52f879
index ccd6518a50f7c..5a608a9064b84 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-1-5bc931a540f0fec54e852ff10f52f879
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-1-5bc931a540f0fec54e852ff10f52f879
@@ -1,5 +1,5 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1388805893, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1388805893}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1413871711, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871711}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-11-6ee4b3a60659ec5496f06347eda232a8 b/sql/hive/src/test/resources/golden/alter_table_serde-11-6ee4b3a60659ec5496f06347eda232a8
index d135e450e6e8e..1d6d48bbe1d1c 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-11-6ee4b3a60659ec5496f06347eda232a8
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-11-6ee4b3a60659ec5496f06347eda232a8
@@ -1,11 +1,11 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
-dt                  	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1388805893, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1388805893})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1413871711, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871711})	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-13-6ee4b3a60659ec5496f06347eda232a8 b/sql/hive/src/test/resources/golden/alter_table_serde-13-6ee4b3a60659ec5496f06347eda232a8
index d135e450e6e8e..1d6d48bbe1d1c 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-13-6ee4b3a60659ec5496f06347eda232a8
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-13-6ee4b3a60659ec5496f06347eda232a8
@@ -1,11 +1,11 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
-dt                  	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1388805893, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1388805893})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1413871711, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871711})	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-15-6ee4b3a60659ec5496f06347eda232a8 b/sql/hive/src/test/resources/golden/alter_table_serde-15-6ee4b3a60659ec5496f06347eda232a8
index c3fac88f91a36..f33269bd266be 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-15-6ee4b3a60659ec5496f06347eda232a8
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-15-6ee4b3a60659ec5496f06347eda232a8
@@ -1,11 +1,11 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
-dt                  	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1388805893, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388805893, transient_lastDdlTime=1388805893})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1413871711, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871712, transient_lastDdlTime=1413871712, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-17-6ee4b3a60659ec5496f06347eda232a8 b/sql/hive/src/test/resources/golden/alter_table_serde-17-6ee4b3a60659ec5496f06347eda232a8
index 6c8f91de7cded..41e8d696d33ea 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-17-6ee4b3a60659ec5496f06347eda232a8
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-17-6ee4b3a60659ec5496f06347eda232a8
@@ -1,11 +1,11 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
-dt                  	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1388805893, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1, field.delim=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388805893, transient_lastDdlTime=1388805893})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1413871711, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1, field.delim=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871712, transient_lastDdlTime=1413871712, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-3-5bc931a540f0fec54e852ff10f52f879 b/sql/hive/src/test/resources/golden/alter_table_serde-3-5bc931a540f0fec54e852ff10f52f879
index 37a5b2cc47bad..7caf0af50b071 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-3-5bc931a540f0fec54e852ff10f52f879
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-3-5bc931a540f0fec54e852ff10f52f879
@@ -1,5 +1,5 @@
-id                  	int                 	from deserializer   
-query               	string              	from deserializer   
-name                	string              	from deserializer   
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1388805893, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:from deserializer), FieldSchema(name:query, type:string, comment:from deserializer), FieldSchema(name:name, type:string, comment:from deserializer)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388805893, transient_lastDdlTime=1388805893}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1413871711, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871711, transient_lastDdlTime=1413871711, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-5-5bc931a540f0fec54e852ff10f52f879 b/sql/hive/src/test/resources/golden/alter_table_serde-5-5bc931a540f0fec54e852ff10f52f879
index 0348dd15fd4f5..a55479e69bbea 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-5-5bc931a540f0fec54e852ff10f52f879
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-5-5bc931a540f0fec54e852ff10f52f879
@@ -1,5 +1,5 @@
-id                  	int                 	from deserializer   
-query               	string              	from deserializer   
-name                	string              	from deserializer   
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1388805893, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:from deserializer), FieldSchema(name:query, type:string, comment:from deserializer), FieldSchema(name:name, type:string, comment:from deserializer)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1, field.delim=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388805893, transient_lastDdlTime=1388805893}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1413871711, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1, field.delim=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871711, transient_lastDdlTime=1413871711, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-9-6ee4b3a60659ec5496f06347eda232a8 b/sql/hive/src/test/resources/golden/alter_table_serde-9-6ee4b3a60659ec5496f06347eda232a8
index d135e450e6e8e..1d6d48bbe1d1c 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-9-6ee4b3a60659ec5496f06347eda232a8
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-9-6ee4b3a60659ec5496f06347eda232a8
@@ -1,11 +1,11 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
-dt                  	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1388805893, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1388805893})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1413871711, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871711})	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-25-40b997fef00cf1a26f73ddb5013bbeb6 b/sql/hive/src/test/resources/golden/alter_varchar2-2-3a20c238eab602ad3d593b1eb3fa6dbb
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-25-40b997fef00cf1a26f73ddb5013bbeb6
rename to sql/hive/src/test/resources/golden/alter_varchar2-2-3a20c238eab602ad3d593b1eb3fa6dbb
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-3-fb3191f771e2396d5fc80659a8c68797 b/sql/hive/src/test/resources/golden/alter_varchar2-3-fb3191f771e2396d5fc80659a8c68797
index 40818a7de46d0..600b377716894 100644
--- a/sql/hive/src/test/resources/golden/alter_varchar2-3-fb3191f771e2396d5fc80659a8c68797
+++ b/sql/hive/src/test/resources/golden/alter_varchar2-3-fb3191f771e2396d5fc80659a8c68797
@@ -1 +1 @@
-val_238	7
\ No newline at end of file
+val_238	7
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-5-84e700f9dc6033c1f237fcdb95e31a0c b/sql/hive/src/test/resources/golden/alter_varchar2-5-84e700f9dc6033c1f237fcdb95e31a0c
index 827220bd4996f..ad69f390bc8db 100644
--- a/sql/hive/src/test/resources/golden/alter_varchar2-5-84e700f9dc6033c1f237fcdb95e31a0c
+++ b/sql/hive/src/test/resources/golden/alter_varchar2-5-84e700f9dc6033c1f237fcdb95e31a0c
@@ -1 +1 @@
-1	val_238	7
\ No newline at end of file
+1	val_238	7
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-4-f6ba42faebdf8ec8781716fec6f7813d b/sql/hive/src/test/resources/golden/alter_varchar2-6-3250407f20f3766c18f44b8bfae1829d
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-4-f6ba42faebdf8ec8781716fec6f7813d
rename to sql/hive/src/test/resources/golden/alter_varchar2-6-3250407f20f3766c18f44b8bfae1829d
diff --git a/sql/hive/src/test/resources/golden/combine3-0-84b74227c9f1563f530cd3ac3b333e54 b/sql/hive/src/test/resources/golden/alter_varchar2-7-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-0-84b74227c9f1563f530cd3ac3b333e54
rename to sql/hive/src/test/resources/golden/alter_varchar2-7-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-7-84e700f9dc6033c1f237fcdb95e31a0c b/sql/hive/src/test/resources/golden/alter_varchar2-7-84e700f9dc6033c1f237fcdb95e31a0c
deleted file mode 100644
index 827220bd4996f..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_varchar2-7-84e700f9dc6033c1f237fcdb95e31a0c
+++ /dev/null
@@ -1 +0,0 @@
-1	val_238	7
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-8-4c12c4c53d99338796be34e603dc612c b/sql/hive/src/test/resources/golden/alter_varchar2-8-4c12c4c53d99338796be34e603dc612c
deleted file mode 100644
index 8a8234a35f6bb..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_varchar2-8-4c12c4c53d99338796be34e603dc612c
+++ /dev/null
@@ -1 +0,0 @@
-2	238	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-8-84e700f9dc6033c1f237fcdb95e31a0c b/sql/hive/src/test/resources/golden/alter_varchar2-8-84e700f9dc6033c1f237fcdb95e31a0c
new file mode 100644
index 0000000000000..ad69f390bc8db
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/alter_varchar2-8-84e700f9dc6033c1f237fcdb95e31a0c
@@ -0,0 +1 @@
+1	val_238	7
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-9-4c12c4c53d99338796be34e603dc612c b/sql/hive/src/test/resources/golden/alter_varchar2-9-4c12c4c53d99338796be34e603dc612c
new file mode 100644
index 0000000000000..1f8ddaec9003a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/alter_varchar2-9-4c12c4c53d99338796be34e603dc612c
@@ -0,0 +1 @@
+2	238	3
diff --git a/sql/hive/src/test/resources/golden/alter_view_as_select-2-1ac845048a8c714a36a719ea8e4f570b b/sql/hive/src/test/resources/golden/alter_view_as_select-2-1ac845048a8c714a36a719ea8e4f570b
index a99747531cef4..ecae907d92279 100644
--- a/sql/hive/src/test/resources/golden/alter_view_as_select-2-1ac845048a8c714a36a719ea8e4f570b
+++ b/sql/hive/src/test/resources/golden/alter_view_as_select-2-1ac845048a8c714a36a719ea8e4f570b
@@ -1,20 +1,20 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:47:52 PST 2014	 
+CreateTime:         	Mon Oct 20 23:10:25 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
 Table Type:         	VIRTUAL_VIEW        	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813272          
+	transient_lastDdlTime	1413871825          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	null                	 
diff --git a/sql/hive/src/test/resources/golden/alter_view_as_select-4-1ac845048a8c714a36a719ea8e4f570b b/sql/hive/src/test/resources/golden/alter_view_as_select-4-1ac845048a8c714a36a719ea8e4f570b
index 499c73127d890..10e3eca6c6791 100644
--- a/sql/hive/src/test/resources/golden/alter_view_as_select-4-1ac845048a8c714a36a719ea8e4f570b
+++ b/sql/hive/src/test/resources/golden/alter_view_as_select-4-1ac845048a8c714a36a719ea8e4f570b
@@ -1,17 +1,17 @@
 # col_name            	data_type           	comment             
 	 	 
-value               	string              	None                
+value               	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:47:52 PST 2014	 
+CreateTime:         	Mon Oct 20 23:10:25 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
 Table Type:         	VIRTUAL_VIEW        	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813272          
+	transient_lastDdlTime	1413871825          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	null                	 
diff --git a/sql/hive/src/test/resources/golden/alter_view_as_select-6-1ac845048a8c714a36a719ea8e4f570b b/sql/hive/src/test/resources/golden/alter_view_as_select-6-1ac845048a8c714a36a719ea8e4f570b
index a5fba77abdf07..c3661a1f79b57 100644
--- a/sql/hive/src/test/resources/golden/alter_view_as_select-6-1ac845048a8c714a36a719ea8e4f570b
+++ b/sql/hive/src/test/resources/golden/alter_view_as_select-6-1ac845048a8c714a36a719ea8e4f570b
@@ -1,18 +1,18 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:47:52 PST 2014	 
+CreateTime:         	Mon Oct 20 23:10:25 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
 Table Type:         	VIRTUAL_VIEW        	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813272          
+	transient_lastDdlTime	1413871825          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	null                	 
diff --git a/sql/hive/src/test/resources/golden/alter_view_rename-2-67e47ee2746463594d5c48b10ba1bb b/sql/hive/src/test/resources/golden/alter_view_rename-2-67e47ee2746463594d5c48b10ba1bb
deleted file mode 100644
index ee76e02af3aba..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_view_rename-2-67e47ee2746463594d5c48b10ba1bb
+++ /dev/null
@@ -1,5 +0,0 @@
-foo                 	int                 	None                
-bar                 	string              	None                
-ds                  	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:view1, dbName:default, owner:tnachen, createTime:1392426511, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:foo, type:int, comment:null), FieldSchema(name:bar, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:null, parameters:{}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1392426511}, viewOriginalText:SELECT * FROM invites, viewExpandedText:SELECT `invites`.`foo`, `invites`.`bar`, `invites`.`ds` FROM `default`.`invites`, tableType:VIRTUAL_VIEW)	
diff --git a/sql/hive/src/test/resources/golden/alter_view_rename-4-19c1c00f0aa99d81b7466958c15d88e3 b/sql/hive/src/test/resources/golden/alter_view_rename-4-19c1c00f0aa99d81b7466958c15d88e3
deleted file mode 100644
index 8603577477bfc..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_view_rename-4-19c1c00f0aa99d81b7466958c15d88e3
+++ /dev/null
@@ -1,5 +0,0 @@
-foo                 	int                 	None                
-bar                 	string              	None                
-ds                  	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:view2, dbName:default, owner:tnachen, createTime:1392426511, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:foo, type:int, comment:null), FieldSchema(name:bar, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:null, parameters:{}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=tnachen, last_modified_time=1392426511, transient_lastDdlTime=1392426511}, viewOriginalText:SELECT * FROM invites, viewExpandedText:SELECT `invites`.`foo`, `invites`.`bar`, `invites`.`ds` FROM `default`.`invites`, tableType:VIRTUAL_VIEW)	
diff --git a/sql/hive/src/test/resources/golden/combine3-1-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/ambiguous_col-0-e91e3e5a22029b9b979ccbbef97add66
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-1-86a409d8b868dc5f1a3bd1e04c2bc28c
rename to sql/hive/src/test/resources/golden/ambiguous_col-0-e91e3e5a22029b9b979ccbbef97add66
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-5-eeb71f1bc830750174b8b0d03de8c37d b/sql/hive/src/test/resources/golden/ambiguous_col-1-b4fe82679efdf6a15e9ecff53baf8d8d
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-5-eeb71f1bc830750174b8b0d03de8c37d
rename to sql/hive/src/test/resources/golden/ambiguous_col-1-b4fe82679efdf6a15e9ecff53baf8d8d
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-7-f3d9e8fc026c62c23e1ae0d191c89bc0 b/sql/hive/src/test/resources/golden/ambiguous_col-2-dadfa3854356dead14b93c5a71a5d8ab
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-7-f3d9e8fc026c62c23e1ae0d191c89bc0
rename to sql/hive/src/test/resources/golden/ambiguous_col-2-dadfa3854356dead14b93c5a71a5d8ab
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-9-2f7e10db0fcc7939dea528e04f460d42 b/sql/hive/src/test/resources/golden/ambiguous_col-3-70509ccd2765d90b98666b6dff8afe1b
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-9-2f7e10db0fcc7939dea528e04f460d42
rename to sql/hive/src/test/resources/golden/ambiguous_col-3-70509ccd2765d90b98666b6dff8afe1b
diff --git a/sql/hive/src/test/resources/golden/combine3-14-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/annotate_stats_join-0-2a0c41508e1e70eaedf9de99751c8fa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-14-e39f59c35ebbe686a18d45d9d8bf3ab0
rename to sql/hive/src/test/resources/golden/annotate_stats_join-0-2a0c41508e1e70eaedf9de99751c8fa9
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-1-fbbdf7be607407661749730f1a0efd9c b/sql/hive/src/test/resources/golden/annotate_stats_join-1-84e7846d50fc15e836c83911ce039871
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_protect_mode-1-fbbdf7be607407661749730f1a0efd9c
rename to sql/hive/src/test/resources/golden/annotate_stats_join-1-84e7846d50fc15e836c83911ce039871
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-2-79b9075b2f86f16f2356d9fa2a9afd56 b/sql/hive/src/test/resources/golden/annotate_stats_join-10-a59dc1c01d48c82d46a5beab759f618d
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_protect_mode-2-79b9075b2f86f16f2356d9fa2a9afd56
rename to sql/hive/src/test/resources/golden/annotate_stats_join-10-a59dc1c01d48c82d46a5beab759f618d
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-3-dd92c46e933d94b35c225daeef0285d4 b/sql/hive/src/test/resources/golden/annotate_stats_join-11-3e0ade2476221f6396381f55a3b82e60
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_protect_mode-3-dd92c46e933d94b35c225daeef0285d4
rename to sql/hive/src/test/resources/golden/annotate_stats_join-11-3e0ade2476221f6396381f55a3b82e60
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-4-9114d1f6859382a125fc4221d2d3ab6 b/sql/hive/src/test/resources/golden/annotate_stats_join-12-cef44682d339a67ba765f854da21f976
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_protect_mode-4-9114d1f6859382a125fc4221d2d3ab6
rename to sql/hive/src/test/resources/golden/annotate_stats_join-12-cef44682d339a67ba765f854da21f976
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-19-b2c9ded072d49abe14831bf48290319c b/sql/hive/src/test/resources/golden/annotate_stats_join-13-6292c27f0c4eaeab2a5e6c317e3afa2e
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_rename_partition-19-b2c9ded072d49abe14831bf48290319c
rename to sql/hive/src/test/resources/golden/annotate_stats_join-13-6292c27f0c4eaeab2a5e6c317e3afa2e
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-4-3479a886936571d5028971aecade705f b/sql/hive/src/test/resources/golden/annotate_stats_join-14-4f3042b9feebd00c540ddac40e7254d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_rename_partition-4-3479a886936571d5028971aecade705f
rename to sql/hive/src/test/resources/golden/annotate_stats_join-14-4f3042b9feebd00c540ddac40e7254d1
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-2-325238d61f56d84c17e29033105d7b19 b/sql/hive/src/test/resources/golden/annotate_stats_join-15-a2f3b8a636e46e5df514df46c452855f
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_varchar2-2-325238d61f56d84c17e29033105d7b19
rename to sql/hive/src/test/resources/golden/annotate_stats_join-15-a2f3b8a636e46e5df514df46c452855f
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-6-eb0f1b170900bb995674b0bc1968c656 b/sql/hive/src/test/resources/golden/annotate_stats_join-16-a75699a21ea36d962a1ba45bd5a12f26
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_varchar2-6-eb0f1b170900bb995674b0bc1968c656
rename to sql/hive/src/test/resources/golden/annotate_stats_join-16-a75699a21ea36d962a1ba45bd5a12f26
diff --git a/sql/hive/src/test/resources/golden/alter_view_rename-0-bb255b994b5207324fba6988caa937e6 b/sql/hive/src/test/resources/golden/annotate_stats_join-17-64380f94a431e3a1698aa2edd3d0a6b2
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_view_rename-0-bb255b994b5207324fba6988caa937e6
rename to sql/hive/src/test/resources/golden/annotate_stats_join-17-64380f94a431e3a1698aa2edd3d0a6b2
diff --git a/sql/hive/src/test/resources/golden/alter_view_rename-1-2a83c96363ca8d12cd2e9181209c8d8d b/sql/hive/src/test/resources/golden/annotate_stats_join-18-222d1fcce7420950436e00d3a1bba957
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_view_rename-1-2a83c96363ca8d12cd2e9181209c8d8d
rename to sql/hive/src/test/resources/golden/annotate_stats_join-18-222d1fcce7420950436e00d3a1bba957
diff --git a/sql/hive/src/test/resources/golden/alter_view_rename-3-95655e33f22fc8f66549a9708812589a b/sql/hive/src/test/resources/golden/annotate_stats_join-19-dea5f8f5c703583c7a3bdcb62cd3d589
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_view_rename-3-95655e33f22fc8f66549a9708812589a
rename to sql/hive/src/test/resources/golden/annotate_stats_join-19-dea5f8f5c703583c7a3bdcb62cd3d589
diff --git a/sql/hive/src/test/resources/golden/ambiguous_col-0-b4fe82679efdf6a15e9ecff53baf8d8d b/sql/hive/src/test/resources/golden/annotate_stats_join-2-40548ec2313af8dbdcbb8ad0477d8600
similarity index 100%
rename from sql/hive/src/test/resources/golden/ambiguous_col-0-b4fe82679efdf6a15e9ecff53baf8d8d
rename to sql/hive/src/test/resources/golden/annotate_stats_join-2-40548ec2313af8dbdcbb8ad0477d8600
diff --git a/sql/hive/src/test/resources/golden/ambiguous_col-1-dadfa3854356dead14b93c5a71a5d8ab b/sql/hive/src/test/resources/golden/annotate_stats_join-20-2d5e186b97166e3a625a169d0d73f9c8
similarity index 100%
rename from sql/hive/src/test/resources/golden/ambiguous_col-1-dadfa3854356dead14b93c5a71a5d8ab
rename to sql/hive/src/test/resources/golden/annotate_stats_join-20-2d5e186b97166e3a625a169d0d73f9c8
diff --git a/sql/hive/src/test/resources/golden/ambiguous_col-2-70509ccd2765d90b98666b6dff8afe1b b/sql/hive/src/test/resources/golden/annotate_stats_join-21-ed73d4b01424287148347ccf721b37e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/ambiguous_col-2-70509ccd2765d90b98666b6dff8afe1b
rename to sql/hive/src/test/resources/golden/annotate_stats_join-21-ed73d4b01424287148347ccf721b37e2
diff --git a/sql/hive/src/test/resources/golden/archive-10-f845008104fd12eb0f13f4b113f95cf1 b/sql/hive/src/test/resources/golden/annotate_stats_join-22-2cf93da6bb0efdafeaa989c9df236701
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-10-f845008104fd12eb0f13f4b113f95cf1
rename to sql/hive/src/test/resources/golden/annotate_stats_join-22-2cf93da6bb0efdafeaa989c9df236701
diff --git a/sql/hive/src/test/resources/golden/archive-12-f07653bca86e1ecb614ffd0296790d05 b/sql/hive/src/test/resources/golden/annotate_stats_join-23-fa90806f6137300311df032e28df3d4c
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-12-f07653bca86e1ecb614ffd0296790d05
rename to sql/hive/src/test/resources/golden/annotate_stats_join-23-fa90806f6137300311df032e28df3d4c
diff --git a/sql/hive/src/test/resources/golden/archive-16-892147913578bcf60620b7dd73893dd0 b/sql/hive/src/test/resources/golden/annotate_stats_join-24-4a6976344eeae35e059285ed78f9feb3
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-16-892147913578bcf60620b7dd73893dd0
rename to sql/hive/src/test/resources/golden/annotate_stats_join-24-4a6976344eeae35e059285ed78f9feb3
diff --git a/sql/hive/src/test/resources/golden/archive-18-8f980275ab3a5bcfc1784f4acd46447a b/sql/hive/src/test/resources/golden/annotate_stats_join-3-26f82fb6734d5bc6f7159b06c0949178
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-18-8f980275ab3a5bcfc1784f4acd46447a
rename to sql/hive/src/test/resources/golden/annotate_stats_join-3-26f82fb6734d5bc6f7159b06c0949178
diff --git a/sql/hive/src/test/resources/golden/archive-19-f8a52a8a40141409a667a9ba2cf9630f b/sql/hive/src/test/resources/golden/annotate_stats_join-4-a598c93d86a646cfa6ea86da54bce9b8
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-19-f8a52a8a40141409a667a9ba2cf9630f
rename to sql/hive/src/test/resources/golden/annotate_stats_join-4-a598c93d86a646cfa6ea86da54bce9b8
diff --git a/sql/hive/src/test/resources/golden/archive-2-713efc113418b01f76ffd589840193c8 b/sql/hive/src/test/resources/golden/annotate_stats_join-5-d12ba848d20d1def95379c490068f050
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-2-713efc113418b01f76ffd589840193c8
rename to sql/hive/src/test/resources/golden/annotate_stats_join-5-d12ba848d20d1def95379c490068f050
diff --git a/sql/hive/src/test/resources/golden/archive-21-f07653bca86e1ecb614ffd0296790d05 b/sql/hive/src/test/resources/golden/annotate_stats_join-6-96a4806e61c5d882affac57e115f285f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-21-f07653bca86e1ecb614ffd0296790d05
rename to sql/hive/src/test/resources/golden/annotate_stats_join-6-96a4806e61c5d882affac57e115f285f
diff --git a/sql/hive/src/test/resources/golden/archive-23-892147913578bcf60620b7dd73893dd0 b/sql/hive/src/test/resources/golden/annotate_stats_join-7-38be41d6328f57350a78fb2097d1ebd2
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-23-892147913578bcf60620b7dd73893dd0
rename to sql/hive/src/test/resources/golden/annotate_stats_join-7-38be41d6328f57350a78fb2097d1ebd2
diff --git a/sql/hive/src/test/resources/golden/archive-25-56bea24ffa83d9b5932893a8ff1cb44f b/sql/hive/src/test/resources/golden/annotate_stats_join-8-eb11e867a36e2114ef0f0f001e01984c
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-25-56bea24ffa83d9b5932893a8ff1cb44f
rename to sql/hive/src/test/resources/golden/annotate_stats_join-8-eb11e867a36e2114ef0f0f001e01984c
diff --git a/sql/hive/src/test/resources/golden/archive-26-44fa61c2bd0fd9acfa0d889e59880d8a b/sql/hive/src/test/resources/golden/annotate_stats_join-9-e0b7cb2226234fbea8ae11ea7d9d84bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-26-44fa61c2bd0fd9acfa0d889e59880d8a
rename to sql/hive/src/test/resources/golden/annotate_stats_join-9-e0b7cb2226234fbea8ae11ea7d9d84bd
diff --git a/sql/hive/src/test/resources/golden/combine3-15-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/annotate_stats_limit-0-2a0c41508e1e70eaedf9de99751c8fa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-15-7cccbdffc32975f8935eeba14a28147
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-0-2a0c41508e1e70eaedf9de99751c8fa9
diff --git a/sql/hive/src/test/resources/golden/archive-27-9ae773ebe64a3d437a035e9d94f49e5 b/sql/hive/src/test/resources/golden/annotate_stats_limit-1-26f82fb6734d5bc6f7159b06c0949178
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-27-9ae773ebe64a3d437a035e9d94f49e5
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-1-26f82fb6734d5bc6f7159b06c0949178
diff --git a/sql/hive/src/test/resources/golden/archive-29-a0c6922e3a1dca51861c8a872dc8af19 b/sql/hive/src/test/resources/golden/annotate_stats_limit-10-a89c94fd0de0cfc96725fea890987cc0
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-29-a0c6922e3a1dca51861c8a872dc8af19
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-10-a89c94fd0de0cfc96725fea890987cc0
diff --git a/sql/hive/src/test/resources/golden/archive-3-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/annotate_stats_limit-2-eb11e867a36e2114ef0f0f001e01984c
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-3-27ad2962fed131f51ba802596ba37278
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-2-eb11e867a36e2114ef0f0f001e01984c
diff --git a/sql/hive/src/test/resources/golden/archive-31-cd46bc635e3010cf1b990a652a584a09 b/sql/hive/src/test/resources/golden/annotate_stats_limit-3-e0b7cb2226234fbea8ae11ea7d9d84bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-31-cd46bc635e3010cf1b990a652a584a09
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-3-e0b7cb2226234fbea8ae11ea7d9d84bd
diff --git a/sql/hive/src/test/resources/golden/archive-32-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/annotate_stats_limit-4-4252006172a476fbc591eebee49bffa3
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-32-27ad2962fed131f51ba802596ba37278
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-4-4252006172a476fbc591eebee49bffa3
diff --git a/sql/hive/src/test/resources/golden/archive-4-3e95421993ab28d18245ec2340f580a3 b/sql/hive/src/test/resources/golden/annotate_stats_limit-5-a2f3b8a636e46e5df514df46c452855f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-4-3e95421993ab28d18245ec2340f580a3
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-5-a2f3b8a636e46e5df514df46c452855f
diff --git a/sql/hive/src/test/resources/golden/archive-5-c0c18ac884677231a41eea8d980d0451 b/sql/hive/src/test/resources/golden/annotate_stats_limit-6-f1fd6c403a9787947877f48c772afe96
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-5-c0c18ac884677231a41eea8d980d0451
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-6-f1fd6c403a9787947877f48c772afe96
diff --git a/sql/hive/src/test/resources/golden/archive-6-528ab9750a558af7f1a43b3108e793dd b/sql/hive/src/test/resources/golden/annotate_stats_limit-7-69b422026fce4e2d2cde9a478d6aaa40
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-6-528ab9750a558af7f1a43b3108e793dd
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-7-69b422026fce4e2d2cde9a478d6aaa40
diff --git a/sql/hive/src/test/resources/golden/archive-7-e8d1d10c308a73eef78dde414a5e40ca b/sql/hive/src/test/resources/golden/annotate_stats_limit-8-72f5e55c1e244084eea255c32a6f58eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-7-e8d1d10c308a73eef78dde414a5e40ca
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-8-72f5e55c1e244084eea255c32a6f58eb
diff --git a/sql/hive/src/test/resources/golden/archive-8-af459a0264559a2aeaa1341ce779ab3c b/sql/hive/src/test/resources/golden/annotate_stats_limit-9-9da67c62d5e3145d450ad7953b33361f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-8-af459a0264559a2aeaa1341ce779ab3c
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-9-9da67c62d5e3145d450ad7953b33361f
diff --git a/sql/hive/src/test/resources/golden/combine3-2-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/annotate_stats_part-0-2a0c41508e1e70eaedf9de99751c8fa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-2-c95dc367df88c9e5cf77157f29ba2daf
rename to sql/hive/src/test/resources/golden/annotate_stats_part-0-2a0c41508e1e70eaedf9de99751c8fa9
diff --git a/sql/hive/src/test/resources/golden/combine3-3-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/annotate_stats_part-1-3c29684bfd2df7439ee0551eb42cfa0
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-3-6e53a3ac93113f20db3a12f1dcf30e86
rename to sql/hive/src/test/resources/golden/annotate_stats_part-1-3c29684bfd2df7439ee0551eb42cfa0
diff --git a/sql/hive/src/test/resources/golden/archive-9-48b10f27e1459bb8e62d6c71484e2cf b/sql/hive/src/test/resources/golden/annotate_stats_part-10-5ba0295bfe42deb678e59b3a330b14ff
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-9-48b10f27e1459bb8e62d6c71484e2cf
rename to sql/hive/src/test/resources/golden/annotate_stats_part-10-5ba0295bfe42deb678e59b3a330b14ff
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-10-f845008104fd12eb0f13f4b113f95cf1 b/sql/hive/src/test/resources/golden/annotate_stats_part-11-dbdbe2e04c5dad6c8af78b6386b329
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-10-f845008104fd12eb0f13f4b113f95cf1
rename to sql/hive/src/test/resources/golden/annotate_stats_part-11-dbdbe2e04c5dad6c8af78b6386b329
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-12-f07653bca86e1ecb614ffd0296790d05 b/sql/hive/src/test/resources/golden/annotate_stats_part-12-2856fb4f4af8932804cb238429d9de6f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-12-f07653bca86e1ecb614ffd0296790d05
rename to sql/hive/src/test/resources/golden/annotate_stats_part-12-2856fb4f4af8932804cb238429d9de6f
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-16-892147913578bcf60620b7dd73893dd0 b/sql/hive/src/test/resources/golden/annotate_stats_part-13-4fa8b0f9fb0202ac0e82fb87538d6445
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-16-892147913578bcf60620b7dd73893dd0
rename to sql/hive/src/test/resources/golden/annotate_stats_part-13-4fa8b0f9fb0202ac0e82fb87538d6445
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-18-8f980275ab3a5bcfc1784f4acd46447a b/sql/hive/src/test/resources/golden/annotate_stats_part-14-62c557bfb7bf70694a32ebceea329ebd
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-18-8f980275ab3a5bcfc1784f4acd46447a
rename to sql/hive/src/test/resources/golden/annotate_stats_part-14-62c557bfb7bf70694a32ebceea329ebd
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-19-f8a52a8a40141409a667a9ba2cf9630f b/sql/hive/src/test/resources/golden/annotate_stats_part-15-f796cd035603726a5c4ce3e71194822b
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-19-f8a52a8a40141409a667a9ba2cf9630f
rename to sql/hive/src/test/resources/golden/annotate_stats_part-15-f796cd035603726a5c4ce3e71194822b
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-2-433a1b642df2cebe37927d6d89e0b301 b/sql/hive/src/test/resources/golden/annotate_stats_part-16-45eb5544124364714549f199f9c2b2ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-2-433a1b642df2cebe37927d6d89e0b301
rename to sql/hive/src/test/resources/golden/annotate_stats_part-16-45eb5544124364714549f199f9c2b2ac
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-21-f07653bca86e1ecb614ffd0296790d05 b/sql/hive/src/test/resources/golden/annotate_stats_part-17-ad61ebd912b6bef3778c4ff38c0be5ab
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-21-f07653bca86e1ecb614ffd0296790d05
rename to sql/hive/src/test/resources/golden/annotate_stats_part-17-ad61ebd912b6bef3778c4ff38c0be5ab
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-23-892147913578bcf60620b7dd73893dd0 b/sql/hive/src/test/resources/golden/annotate_stats_part-18-b9f2dff7c7b57412cea44433ea810fa7
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-23-892147913578bcf60620b7dd73893dd0
rename to sql/hive/src/test/resources/golden/annotate_stats_part-18-b9f2dff7c7b57412cea44433ea810fa7
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-25-56bea24ffa83d9b5932893a8ff1cb44f b/sql/hive/src/test/resources/golden/annotate_stats_part-19-10cab43c2966718bb39e2f22365cd6c1
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-25-56bea24ffa83d9b5932893a8ff1cb44f
rename to sql/hive/src/test/resources/golden/annotate_stats_part-19-10cab43c2966718bb39e2f22365cd6c1
diff --git a/sql/hive/src/test/resources/golden/combine3-4-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/annotate_stats_part-2-a4fb8359a2179ec70777aad6366071b7
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-4-84967075baa3e56fff2a23f8ab9ba076
rename to sql/hive/src/test/resources/golden/annotate_stats_part-2-a4fb8359a2179ec70777aad6366071b7
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-26-44fa61c2bd0fd9acfa0d889e59880d8a b/sql/hive/src/test/resources/golden/annotate_stats_part-20-51ec5046b50543899ed54c9fc2b885af
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-26-44fa61c2bd0fd9acfa0d889e59880d8a
rename to sql/hive/src/test/resources/golden/annotate_stats_part-20-51ec5046b50543899ed54c9fc2b885af
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-27-9ae773ebe64a3d437a035e9d94f49e5 b/sql/hive/src/test/resources/golden/annotate_stats_part-21-d69cb7b7770b51bc4b99d5d0f74d4662
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-27-9ae773ebe64a3d437a035e9d94f49e5
rename to sql/hive/src/test/resources/golden/annotate_stats_part-21-d69cb7b7770b51bc4b99d5d0f74d4662
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-29-a0c6922e3a1dca51861c8a872dc8af19 b/sql/hive/src/test/resources/golden/annotate_stats_part-22-22f430881fbfe92736988c2a4184fe0c
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-29-a0c6922e3a1dca51861c8a872dc8af19
rename to sql/hive/src/test/resources/golden/annotate_stats_part-22-22f430881fbfe92736988c2a4184fe0c
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-3-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/annotate_stats_part-23-2719ee3f0b8b07f1e5c80f8329b9f87f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-3-27ad2962fed131f51ba802596ba37278
rename to sql/hive/src/test/resources/golden/annotate_stats_part-23-2719ee3f0b8b07f1e5c80f8329b9f87f
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-31-cd46bc635e3010cf1b990a652a584a09 b/sql/hive/src/test/resources/golden/annotate_stats_part-24-1f7bdd816b83f45a6d53d08b6866109f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-31-cd46bc635e3010cf1b990a652a584a09
rename to sql/hive/src/test/resources/golden/annotate_stats_part-24-1f7bdd816b83f45a6d53d08b6866109f
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-32-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/annotate_stats_part-25-d1599e385e8bad6cd62d564a6fd0536f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-32-27ad2962fed131f51ba802596ba37278
rename to sql/hive/src/test/resources/golden/annotate_stats_part-25-d1599e385e8bad6cd62d564a6fd0536f
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-4-3e95421993ab28d18245ec2340f580a3 b/sql/hive/src/test/resources/golden/annotate_stats_part-26-ec26bcb2587f4638b364ba494ac373e0
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-4-3e95421993ab28d18245ec2340f580a3
rename to sql/hive/src/test/resources/golden/annotate_stats_part-26-ec26bcb2587f4638b364ba494ac373e0
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-5-c0c18ac884677231a41eea8d980d0451 b/sql/hive/src/test/resources/golden/annotate_stats_part-27-7804e2e230a42a46d72546fa2381b423
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-5-c0c18ac884677231a41eea8d980d0451
rename to sql/hive/src/test/resources/golden/annotate_stats_part-27-7804e2e230a42a46d72546fa2381b423
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-6-528ab9750a558af7f1a43b3108e793dd b/sql/hive/src/test/resources/golden/annotate_stats_part-28-40f2a1f06d181ef93edf42a591cbf15e
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-6-528ab9750a558af7f1a43b3108e793dd
rename to sql/hive/src/test/resources/golden/annotate_stats_part-28-40f2a1f06d181ef93edf42a591cbf15e
diff --git a/sql/hive/src/test/resources/golden/combine3-5-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/annotate_stats_part-3-16367c381d4b189b3640c92511244bfe
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-5-2ee5d706fe3a3bcc38b795f6e94970ea
rename to sql/hive/src/test/resources/golden/annotate_stats_part-3-16367c381d4b189b3640c92511244bfe
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-7-e8d1d10c308a73eef78dde414a5e40ca b/sql/hive/src/test/resources/golden/annotate_stats_part-4-397e834a25a20b68aed5e87d55ea6174
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-7-e8d1d10c308a73eef78dde414a5e40ca
rename to sql/hive/src/test/resources/golden/annotate_stats_part-4-397e834a25a20b68aed5e87d55ea6174
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-8-af459a0264559a2aeaa1341ce779ab3c b/sql/hive/src/test/resources/golden/annotate_stats_part-5-cef44682d339a67ba765f854da21f976
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-8-af459a0264559a2aeaa1341ce779ab3c
rename to sql/hive/src/test/resources/golden/annotate_stats_part-5-cef44682d339a67ba765f854da21f976
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-9-48b10f27e1459bb8e62d6c71484e2cf b/sql/hive/src/test/resources/golden/annotate_stats_part-6-f4263aca1612c05cb08242448191ad05
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-9-48b10f27e1459bb8e62d6c71484e2cf
rename to sql/hive/src/test/resources/golden/annotate_stats_part-6-f4263aca1612c05cb08242448191ad05
diff --git a/sql/hive/src/test/resources/golden/archive_multi-10-f845008104fd12eb0f13f4b113f95cf1 b/sql/hive/src/test/resources/golden/annotate_stats_part-7-b222533ddda09918c4f5acc408bf1a02
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-10-f845008104fd12eb0f13f4b113f95cf1
rename to sql/hive/src/test/resources/golden/annotate_stats_part-7-b222533ddda09918c4f5acc408bf1a02
diff --git a/sql/hive/src/test/resources/golden/archive_multi-12-8419ad4ed6683ebd15f993f703975b31 b/sql/hive/src/test/resources/golden/annotate_stats_part-8-84e3979946de67779a9704a3adc2184f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-12-8419ad4ed6683ebd15f993f703975b31
rename to sql/hive/src/test/resources/golden/annotate_stats_part-8-84e3979946de67779a9704a3adc2184f
diff --git a/sql/hive/src/test/resources/golden/archive_multi-16-ad80f33c39be583ad7ebf0c8f350d11d b/sql/hive/src/test/resources/golden/annotate_stats_part-9-c6c38e93babafe56cd4f177a17d37b8
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-16-ad80f33c39be583ad7ebf0c8f350d11d
rename to sql/hive/src/test/resources/golden/annotate_stats_part-9-c6c38e93babafe56cd4f177a17d37b8
diff --git a/sql/hive/src/test/resources/golden/ctas_uses_database_location-0-1c3ace37d0bbb5c8033c48cde7680d21 b/sql/hive/src/test/resources/golden/annotate_stats_table-0-2a0c41508e1e70eaedf9de99751c8fa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_uses_database_location-0-1c3ace37d0bbb5c8033c48cde7680d21
rename to sql/hive/src/test/resources/golden/annotate_stats_table-0-2a0c41508e1e70eaedf9de99751c8fa9
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-14-34916eb904b8113a401ce78e6941a204 b/sql/hive/src/test/resources/golden/annotate_stats_table-1-3c29684bfd2df7439ee0551eb42cfa0
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-14-34916eb904b8113a401ce78e6941a204
rename to sql/hive/src/test/resources/golden/annotate_stats_table-1-3c29684bfd2df7439ee0551eb42cfa0
diff --git a/sql/hive/src/test/resources/golden/archive_multi-2-cd46bc635e3010cf1b990a652a584a09 b/sql/hive/src/test/resources/golden/annotate_stats_table-10-a7419af512a6c0b60ec51e556608cdad
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-2-cd46bc635e3010cf1b990a652a584a09
rename to sql/hive/src/test/resources/golden/annotate_stats_table-10-a7419af512a6c0b60ec51e556608cdad
diff --git a/sql/hive/src/test/resources/golden/archive_multi-3-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/annotate_stats_table-11-4be11140c2e21b3a172224d103844785
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-3-27ad2962fed131f51ba802596ba37278
rename to sql/hive/src/test/resources/golden/annotate_stats_table-11-4be11140c2e21b3a172224d103844785
diff --git a/sql/hive/src/test/resources/golden/archive_multi-4-3e95421993ab28d18245ec2340f580a3 b/sql/hive/src/test/resources/golden/annotate_stats_table-12-84bc7cf67544461e9102820ed410dfb6
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-4-3e95421993ab28d18245ec2340f580a3
rename to sql/hive/src/test/resources/golden/annotate_stats_table-12-84bc7cf67544461e9102820ed410dfb6
diff --git a/sql/hive/src/test/resources/golden/archive_multi-5-c0c18ac884677231a41eea8d980d0451 b/sql/hive/src/test/resources/golden/annotate_stats_table-13-d2acfdf279aad035b31ed61e87bff6
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-5-c0c18ac884677231a41eea8d980d0451
rename to sql/hive/src/test/resources/golden/annotate_stats_table-13-d2acfdf279aad035b31ed61e87bff6
diff --git a/sql/hive/src/test/resources/golden/archive_multi-6-c06da7f8c1e98dc22e3171018e357f6a b/sql/hive/src/test/resources/golden/annotate_stats_table-14-60d1c44b8412c4983265e320ecfb25f0
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-6-c06da7f8c1e98dc22e3171018e357f6a
rename to sql/hive/src/test/resources/golden/annotate_stats_table-14-60d1c44b8412c4983265e320ecfb25f0
diff --git a/sql/hive/src/test/resources/golden/archive_multi-7-e8d1d10c308a73eef78dde414a5e40ca b/sql/hive/src/test/resources/golden/annotate_stats_table-15-7b185296b6a0c6ceff2df1f89fecc6b4
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-7-e8d1d10c308a73eef78dde414a5e40ca
rename to sql/hive/src/test/resources/golden/annotate_stats_table-15-7b185296b6a0c6ceff2df1f89fecc6b4
diff --git a/sql/hive/src/test/resources/golden/archive_multi-8-af459a0264559a2aeaa1341ce779ab3c b/sql/hive/src/test/resources/golden/annotate_stats_table-16-7cf8cb7d8d0fe3ea94d7b190a2e5ad3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-8-af459a0264559a2aeaa1341ce779ab3c
rename to sql/hive/src/test/resources/golden/annotate_stats_table-16-7cf8cb7d8d0fe3ea94d7b190a2e5ad3d
diff --git a/sql/hive/src/test/resources/golden/archive_multi-9-48b10f27e1459bb8e62d6c71484e2cf b/sql/hive/src/test/resources/golden/annotate_stats_table-17-cd84d629d569a09d139f38dae01e677c
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-9-48b10f27e1459bb8e62d6c71484e2cf
rename to sql/hive/src/test/resources/golden/annotate_stats_table-17-cd84d629d569a09d139f38dae01e677c
diff --git a/sql/hive/src/test/resources/golden/auto_join16-1-bba773956a3bad8d400fe4216a3fa8bf b/sql/hive/src/test/resources/golden/annotate_stats_table-18-7012af4092c18075f4f3a00b80f491ee
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join16-1-bba773956a3bad8d400fe4216a3fa8bf
rename to sql/hive/src/test/resources/golden/annotate_stats_table-18-7012af4092c18075f4f3a00b80f491ee
diff --git a/sql/hive/src/test/resources/golden/auto_join32-12-a8b69002151b3df4383d2c354dbaa7d4 b/sql/hive/src/test/resources/golden/annotate_stats_table-2-84e7846d50fc15e836c83911ce039871
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-12-a8b69002151b3df4383d2c354dbaa7d4
rename to sql/hive/src/test/resources/golden/annotate_stats_table-2-84e7846d50fc15e836c83911ce039871
diff --git a/sql/hive/src/test/resources/golden/auto_join32-13-63241e3791725baad8baa00fb833ef5e b/sql/hive/src/test/resources/golden/annotate_stats_table-3-a598c93d86a646cfa6ea86da54bce9b8
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-13-63241e3791725baad8baa00fb833ef5e
rename to sql/hive/src/test/resources/golden/annotate_stats_table-3-a598c93d86a646cfa6ea86da54bce9b8
diff --git a/sql/hive/src/test/resources/golden/auto_join32-14-b754b2485c6a8d0caa5e65b1c63bbd0f b/sql/hive/src/test/resources/golden/annotate_stats_table-4-d12ba848d20d1def95379c490068f050
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-14-b754b2485c6a8d0caa5e65b1c63bbd0f
rename to sql/hive/src/test/resources/golden/annotate_stats_table-4-d12ba848d20d1def95379c490068f050
diff --git a/sql/hive/src/test/resources/golden/auto_join32-15-187fd938996ae7d96e60475fb69b8d35 b/sql/hive/src/test/resources/golden/annotate_stats_table-5-4a8ad142e57c5dce2623e92f5e5ad15a
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-15-187fd938996ae7d96e60475fb69b8d35
rename to sql/hive/src/test/resources/golden/annotate_stats_table-5-4a8ad142e57c5dce2623e92f5e5ad15a
diff --git a/sql/hive/src/test/resources/golden/auto_join32-20-e67740fb52998f1d3afcfa667505cf7 b/sql/hive/src/test/resources/golden/annotate_stats_table-6-a59dc1c01d48c82d46a5beab759f618d
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-20-e67740fb52998f1d3afcfa667505cf7
rename to sql/hive/src/test/resources/golden/annotate_stats_table-6-a59dc1c01d48c82d46a5beab759f618d
diff --git a/sql/hive/src/test/resources/golden/auto_join32-21-da5b4647f5605dff66aa47e059f9ec8b b/sql/hive/src/test/resources/golden/annotate_stats_table-7-6292c27f0c4eaeab2a5e6c317e3afa2e
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-21-da5b4647f5605dff66aa47e059f9ec8b
rename to sql/hive/src/test/resources/golden/annotate_stats_table-7-6292c27f0c4eaeab2a5e6c317e3afa2e
diff --git a/sql/hive/src/test/resources/golden/auto_join32-22-a1d339a0d904c3f35771192a026c7f9c b/sql/hive/src/test/resources/golden/annotate_stats_table-8-43cb040e2fe01904bc52e198fcae9b3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-22-a1d339a0d904c3f35771192a026c7f9c
rename to sql/hive/src/test/resources/golden/annotate_stats_table-8-43cb040e2fe01904bc52e198fcae9b3d
diff --git a/sql/hive/src/test/resources/golden/auto_join32-23-1948951cc3c06cdf962d59e932a84588 b/sql/hive/src/test/resources/golden/annotate_stats_table-9-eadfdc61d22bd22bbf5a69370908a82e
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-23-1948951cc3c06cdf962d59e932a84588
rename to sql/hive/src/test/resources/golden/annotate_stats_table-9-eadfdc61d22bd22bbf5a69370908a82e
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b b/sql/hive/src/test/resources/golden/annotate_stats_union-0-2a0c41508e1e70eaedf9de99751c8fa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b
rename to sql/hive/src/test/resources/golden/annotate_stats_union-0-2a0c41508e1e70eaedf9de99751c8fa9
diff --git a/sql/hive/src/test/resources/golden/auto_join_filters-2-100faa7fd01bfb4390c782bb262a9403 b/sql/hive/src/test/resources/golden/annotate_stats_union-1-26f82fb6734d5bc6f7159b06c0949178
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_filters-2-100faa7fd01bfb4390c782bb262a9403
rename to sql/hive/src/test/resources/golden/annotate_stats_union-1-26f82fb6734d5bc6f7159b06c0949178
diff --git a/sql/hive/src/test/resources/golden/auto_join_filters-31-aa161b0d9fe9d1aad10654fce0e3670b b/sql/hive/src/test/resources/golden/annotate_stats_union-10-3aa3eb68d092ea0157c5426a5f2ca3f9
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_filters-31-aa161b0d9fe9d1aad10654fce0e3670b
rename to sql/hive/src/test/resources/golden/annotate_stats_union-10-3aa3eb68d092ea0157c5426a5f2ca3f9
diff --git a/sql/hive/src/test/resources/golden/auto_join_filters-32-3c52df82c7d78501610f3f898103f753 b/sql/hive/src/test/resources/golden/annotate_stats_union-11-c1302094d7b4456550826535b529468b
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_filters-32-3c52df82c7d78501610f3f898103f753
rename to sql/hive/src/test/resources/golden/annotate_stats_union-11-c1302094d7b4456550826535b529468b
diff --git a/sql/hive/src/test/resources/golden/auto_join_filters-33-1d85bb008e02ef4025171a4bc0866a6c b/sql/hive/src/test/resources/golden/annotate_stats_union-12-3af760f960a2e0995d1bc4afef0c5aa0
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_filters-33-1d85bb008e02ef4025171a4bc0866a6c
rename to sql/hive/src/test/resources/golden/annotate_stats_union-12-3af760f960a2e0995d1bc4afef0c5aa0
diff --git a/sql/hive/src/test/resources/golden/auto_join_filters-34-e79c906b894fed049ddfab4496a4e3 b/sql/hive/src/test/resources/golden/annotate_stats_union-13-26f82fb6734d5bc6f7159b06c0949178
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_filters-34-e79c906b894fed049ddfab4496a4e3
rename to sql/hive/src/test/resources/golden/annotate_stats_union-13-26f82fb6734d5bc6f7159b06c0949178
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-2-97641998eb9ddf2dff56de6758486aa0 b/sql/hive/src/test/resources/golden/annotate_stats_union-14-eb11e867a36e2114ef0f0f001e01984c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_nulls-2-97641998eb9ddf2dff56de6758486aa0
rename to sql/hive/src/test/resources/golden/annotate_stats_union-14-eb11e867a36e2114ef0f0f001e01984c
diff --git a/sql/hive/src/test/resources/golden/auto_join_reordering_values-1-1247d9a5ffabd61647697dc186c7a2a2 b/sql/hive/src/test/resources/golden/annotate_stats_union-15-e0b7cb2226234fbea8ae11ea7d9d84bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_reordering_values-1-1247d9a5ffabd61647697dc186c7a2a2
rename to sql/hive/src/test/resources/golden/annotate_stats_union-15-e0b7cb2226234fbea8ae11ea7d9d84bd
diff --git a/sql/hive/src/test/resources/golden/auto_join_reordering_values-4-7fb82039c95389f11b174d9f22aacb35 b/sql/hive/src/test/resources/golden/annotate_stats_union-16-4252006172a476fbc591eebee49bffa3
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_reordering_values-4-7fb82039c95389f11b174d9f22aacb35
rename to sql/hive/src/test/resources/golden/annotate_stats_union-16-4252006172a476fbc591eebee49bffa3
diff --git a/sql/hive/src/test/resources/golden/auto_join_reordering_values-7-880ba1dba6057dd6cde89d1b17724a6b b/sql/hive/src/test/resources/golden/annotate_stats_union-17-a2f3b8a636e46e5df514df46c452855f
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_reordering_values-7-880ba1dba6057dd6cde89d1b17724a6b
rename to sql/hive/src/test/resources/golden/annotate_stats_union-17-a2f3b8a636e46e5df514df46c452855f
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/annotate_stats_union-18-58d55f8c8b2489726232a00254499144
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/annotate_stats_union-18-58d55f8c8b2489726232a00254499144
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-10-9666fb18356436e2800550df9ca90c04 b/sql/hive/src/test/resources/golden/annotate_stats_union-19-70c851c23a7ac89f0366b9c26085cc3e
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-10-9666fb18356436e2800550df9ca90c04
rename to sql/hive/src/test/resources/golden/annotate_stats_union-19-70c851c23a7ac89f0366b9c26085cc3e
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-11-a54cefeeb6d79c72f01c61035e9dcf15 b/sql/hive/src/test/resources/golden/annotate_stats_union-2-eb11e867a36e2114ef0f0f001e01984c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-11-a54cefeeb6d79c72f01c61035e9dcf15
rename to sql/hive/src/test/resources/golden/annotate_stats_union-2-eb11e867a36e2114ef0f0f001e01984c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/annotate_stats_union-20-222d1fcce7420950436e00d3a1bba957
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/annotate_stats_union-20-222d1fcce7420950436e00d3a1bba957
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-4-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/annotate_stats_union-21-a5503df8f50e057415719bcfe4c69f13
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-4-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/annotate_stats_union-21-a5503df8f50e057415719bcfe4c69f13
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-5-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/annotate_stats_union-22-a23089e5a76dc35df04d9936d60c4565
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-5-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/annotate_stats_union-22-a23089e5a76dc35df04d9936d60c4565
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-6-307339322d96b8f923d57c0dc9cdcb60 b/sql/hive/src/test/resources/golden/annotate_stats_union-3-e0b7cb2226234fbea8ae11ea7d9d84bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-6-307339322d96b8f923d57c0dc9cdcb60
rename to sql/hive/src/test/resources/golden/annotate_stats_union-3-e0b7cb2226234fbea8ae11ea7d9d84bd
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-7-94cc219f61413ab321916821e1288152 b/sql/hive/src/test/resources/golden/annotate_stats_union-4-4252006172a476fbc591eebee49bffa3
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-7-94cc219f61413ab321916821e1288152
rename to sql/hive/src/test/resources/golden/annotate_stats_union-4-4252006172a476fbc591eebee49bffa3
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-8-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/annotate_stats_union-5-a2f3b8a636e46e5df514df46c452855f
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-8-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/annotate_stats_union-5-a2f3b8a636e46e5df514df46c452855f
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-9-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/annotate_stats_union-6-222d1fcce7420950436e00d3a1bba957
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-9-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/annotate_stats_union-6-222d1fcce7420950436e00d3a1bba957
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/annotate_stats_union-7-aab1a7b7e1fe4d061a580126d67dfd0a
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/annotate_stats_union-7-aab1a7b7e1fe4d061a580126d67dfd0a
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-10-9666fb18356436e2800550df9ca90c04 b/sql/hive/src/test/resources/golden/annotate_stats_union-8-172e0d089bd5bcbaf54775a618d826bb
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-10-9666fb18356436e2800550df9ca90c04
rename to sql/hive/src/test/resources/golden/annotate_stats_union-8-172e0d089bd5bcbaf54775a618d826bb
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-11-a54cefeeb6d79c72f01c61035e9dcf15 b/sql/hive/src/test/resources/golden/annotate_stats_union-9-69b422026fce4e2d2cde9a478d6aaa40
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-11-a54cefeeb6d79c72f01c61035e9dcf15
rename to sql/hive/src/test/resources/golden/annotate_stats_union-9-69b422026fce4e2d2cde9a478d6aaa40
diff --git a/sql/hive/src/test/resources/golden/archive-0-89cd75b0565e8d96910d5528db9984e7 b/sql/hive/src/test/resources/golden/archive-0-89cd75b0565e8d96910d5528db9984e7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-0-89cd75b0565e8d96910d5528db9984e7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-1-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/archive-1-e39f59c35ebbe686a18d45d9d8bf3ab0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-1-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-11-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive-11-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-11-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-13-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive-13-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-13-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-14-2cde1a2d3bfcec814985f498eba0fb8 b/sql/hive/src/test/resources/golden/archive-14-2cde1a2d3bfcec814985f498eba0fb8
deleted file mode 100644
index 21b3b13a81191..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-14-2cde1a2d3bfcec814985f498eba0fb8
+++ /dev/null
@@ -1 +0,0 @@
-0	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-15-c9f39b388ae698e385e092d0ffeb3c73 b/sql/hive/src/test/resources/golden/archive-15-c9f39b388ae698e385e092d0ffeb3c73
deleted file mode 100644
index 5e5f6ff96623f..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-15-c9f39b388ae698e385e092d0ffeb3c73
+++ /dev/null
@@ -1,9 +0,0 @@
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-17-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive-17-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-17-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-20-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive-20-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-20-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-22-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive-22-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-22-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-24-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive-24-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-24-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-28-188eb7912265ed8dffa5200517bbe526 b/sql/hive/src/test/resources/golden/archive-28-188eb7912265ed8dffa5200517bbe526
deleted file mode 100644
index 18a1a7925ff29..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-28-188eb7912265ed8dffa5200517bbe526
+++ /dev/null
@@ -1 +0,0 @@
-48656137
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-30-bea4ae5a0d219d544ea0b53bf29ecc7a b/sql/hive/src/test/resources/golden/archive-30-bea4ae5a0d219d544ea0b53bf29ecc7a
deleted file mode 100644
index 18a1a7925ff29..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-30-bea4ae5a0d219d544ea0b53bf29ecc7a
+++ /dev/null
@@ -1 +0,0 @@
-48656137
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-0-89cd75b0565e8d96910d5528db9984e7 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-0-89cd75b0565e8d96910d5528db9984e7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-0-89cd75b0565e8d96910d5528db9984e7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-1-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-1-e39f59c35ebbe686a18d45d9d8bf3ab0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-1-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-11-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-11-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-11-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-13-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-13-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-13-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-14-2cde1a2d3bfcec814985f498eba0fb8 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-14-2cde1a2d3bfcec814985f498eba0fb8
deleted file mode 100644
index 21b3b13a81191..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-14-2cde1a2d3bfcec814985f498eba0fb8
+++ /dev/null
@@ -1 +0,0 @@
-0	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-15-c9f39b388ae698e385e092d0ffeb3c73 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-15-c9f39b388ae698e385e092d0ffeb3c73
deleted file mode 100644
index 5e5f6ff96623f..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-15-c9f39b388ae698e385e092d0ffeb3c73
+++ /dev/null
@@ -1,9 +0,0 @@
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-17-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-17-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-17-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-20-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-20-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-20-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-22-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-22-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-22-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-24-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-24-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-24-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-28-188eb7912265ed8dffa5200517bbe526 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-28-188eb7912265ed8dffa5200517bbe526
deleted file mode 100644
index 18a1a7925ff29..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-28-188eb7912265ed8dffa5200517bbe526
+++ /dev/null
@@ -1 +0,0 @@
-48656137
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-30-bea4ae5a0d219d544ea0b53bf29ecc7a b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-30-bea4ae5a0d219d544ea0b53bf29ecc7a
deleted file mode 100644
index 18a1a7925ff29..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-30-bea4ae5a0d219d544ea0b53bf29ecc7a
+++ /dev/null
@@ -1 +0,0 @@
-48656137
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-0-89cd75b0565e8d96910d5528db9984e7 b/sql/hive/src/test/resources/golden/archive_multi-0-89cd75b0565e8d96910d5528db9984e7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-0-89cd75b0565e8d96910d5528db9984e7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-1-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/archive_multi-1-e39f59c35ebbe686a18d45d9d8bf3ab0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-1-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-11-cf5431cd843666b95ad2a82b334ac01e b/sql/hive/src/test/resources/golden/archive_multi-11-cf5431cd843666b95ad2a82b334ac01e
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-11-cf5431cd843666b95ad2a82b334ac01e
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-13-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive_multi-13-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-13-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-14-2cde1a2d3bfcec814985f498eba0fb8 b/sql/hive/src/test/resources/golden/archive_multi-14-2cde1a2d3bfcec814985f498eba0fb8
deleted file mode 100644
index 21b3b13a81191..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-14-2cde1a2d3bfcec814985f498eba0fb8
+++ /dev/null
@@ -1 +0,0 @@
-0	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-15-c9f39b388ae698e385e092d0ffeb3c73 b/sql/hive/src/test/resources/golden/archive_multi-15-c9f39b388ae698e385e092d0ffeb3c73
deleted file mode 100644
index 5e5f6ff96623f..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-15-c9f39b388ae698e385e092d0ffeb3c73
+++ /dev/null
@@ -1,9 +0,0 @@
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-17-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive_multi-17-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-17-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/attr-0-24e06ffd262f2a5a6eec3314445d83ba b/sql/hive/src/test/resources/golden/attr-0-24e06ffd262f2a5a6eec3314445d83ba
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/attr-0-24e06ffd262f2a5a6eec3314445d83ba
+++ b/sql/hive/src/test/resources/golden/attr-0-24e06ffd262f2a5a6eec3314445d83ba
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join0-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join0-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join0-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join0-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join0-2-7bd04899197b027d81c24e45a99ad15c b/sql/hive/src/test/resources/golden/auto_join0-2-7bd04899197b027d81c24e45a99ad15c
index 308fc0924e670..8e9be7f8c620b 100644
--- a/sql/hive/src/test/resources/golden/auto_join0-2-7bd04899197b027d81c24e45a99ad15c
+++ b/sql/hive/src/test/resources/golden/auto_join0-2-7bd04899197b027d81c24e45a99ad15c
@@ -1 +1 @@
-34298511120
\ No newline at end of file
+34298511120
diff --git a/sql/hive/src/test/resources/golden/auto_join1-0-443afb71720bad780b5dbfb6dbf4b51a b/sql/hive/src/test/resources/golden/auto_join1-0-443afb71720bad780b5dbfb6dbf4b51a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join1-0-443afb71720bad780b5dbfb6dbf4b51a
+++ b/sql/hive/src/test/resources/golden/auto_join1-0-443afb71720bad780b5dbfb6dbf4b51a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join1-4-ae1247a065c41ce0329ca6078ab586e b/sql/hive/src/test/resources/golden/auto_join1-4-ae1247a065c41ce0329ca6078ab586e
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join1-4-ae1247a065c41ce0329ca6078ab586e
+++ b/sql/hive/src/test/resources/golden/auto_join1-4-ae1247a065c41ce0329ca6078ab586e
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_join10-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join10-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join10-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join10-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join10-2-eef4ee52e0783b15fb5fe17378806b13 b/sql/hive/src/test/resources/golden/auto_join10-2-eef4ee52e0783b15fb5fe17378806b13
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join10-2-eef4ee52e0783b15fb5fe17378806b13
+++ b/sql/hive/src/test/resources/golden/auto_join10-2-eef4ee52e0783b15fb5fe17378806b13
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_join11-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join11-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join11-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join11-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join11-2-5496e81f60ba1d8a95d8375589c71e05 b/sql/hive/src/test/resources/golden/auto_join11-2-5496e81f60ba1d8a95d8375589c71e05
index 69dbf8c3143e9..faeebdd1eafdf 100644
--- a/sql/hive/src/test/resources/golden/auto_join11-2-5496e81f60ba1d8a95d8375589c71e05
+++ b/sql/hive/src/test/resources/golden/auto_join11-2-5496e81f60ba1d8a95d8375589c71e05
@@ -1 +1 @@
--101339664144
\ No newline at end of file
+-101339664144
diff --git a/sql/hive/src/test/resources/golden/auto_join12-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join12-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join12-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join12-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join12-2-4df549c5f0b6bff0c843008fa35b1320 b/sql/hive/src/test/resources/golden/auto_join12-2-4df549c5f0b6bff0c843008fa35b1320
index eff107c7ce6bc..caf74830fbd05 100644
--- a/sql/hive/src/test/resources/golden/auto_join12-2-4df549c5f0b6bff0c843008fa35b1320
+++ b/sql/hive/src/test/resources/golden/auto_join12-2-4df549c5f0b6bff0c843008fa35b1320
@@ -1 +1 @@
--136852761207
\ No newline at end of file
+-136852761207
diff --git a/sql/hive/src/test/resources/golden/auto_join13-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join13-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join13-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join13-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join13-2-5ff417533a1243cd6fc556960fa170c9 b/sql/hive/src/test/resources/golden/auto_join13-2-5ff417533a1243cd6fc556960fa170c9
index de6c015da2059..e971081015515 100644
--- a/sql/hive/src/test/resources/golden/auto_join13-2-5ff417533a1243cd6fc556960fa170c9
+++ b/sql/hive/src/test/resources/golden/auto_join13-2-5ff417533a1243cd6fc556960fa170c9
@@ -1 +1 @@
--97676500536
\ No newline at end of file
+-97676500536
diff --git a/sql/hive/src/test/resources/golden/auto_join14-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join14-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join14-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join14-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join14-2-9b141c1e5917ca82c6bc36a9a2950a1e b/sql/hive/src/test/resources/golden/auto_join14-2-9b141c1e5917ca82c6bc36a9a2950a1e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join14-2-9b141c1e5917ca82c6bc36a9a2950a1e
+++ b/sql/hive/src/test/resources/golden/auto_join14-2-9b141c1e5917ca82c6bc36a9a2950a1e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join14-3-2b9ccaa793eae0e73bf76335d3d6880 b/sql/hive/src/test/resources/golden/auto_join14-3-2b9ccaa793eae0e73bf76335d3d6880
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join14-3-2b9ccaa793eae0e73bf76335d3d6880
+++ b/sql/hive/src/test/resources/golden/auto_join14-3-2b9ccaa793eae0e73bf76335d3d6880
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join14-4-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/auto_join14-4-bab89dfffa77258e34a595e0e79986e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join14-4-bab89dfffa77258e34a595e0e79986e3
+++ b/sql/hive/src/test/resources/golden/auto_join14-4-bab89dfffa77258e34a595e0e79986e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join14-7-5b5ded1412301eae5f8f705a39e6832 b/sql/hive/src/test/resources/golden/auto_join14-7-5b5ded1412301eae5f8f705a39e6832
index f1871a4957ddb..0f27a9bde401c 100644
--- a/sql/hive/src/test/resources/golden/auto_join14-7-5b5ded1412301eae5f8f705a39e6832
+++ b/sql/hive/src/test/resources/golden/auto_join14-7-5b5ded1412301eae5f8f705a39e6832
@@ -1 +1 @@
-404554174174
\ No newline at end of file
+404554174174
diff --git a/sql/hive/src/test/resources/golden/auto_join15-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join15-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join15-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join15-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join15-2-7bf2df40dd30fb2f8c4af9a0d09e24f9 b/sql/hive/src/test/resources/golden/auto_join15-2-7bf2df40dd30fb2f8c4af9a0d09e24f9
index 006e1f82c0a47..31d87ddbd1310 100644
--- a/sql/hive/src/test/resources/golden/auto_join15-2-7bf2df40dd30fb2f8c4af9a0d09e24f9
+++ b/sql/hive/src/test/resources/golden/auto_join15-2-7bf2df40dd30fb2f8c4af9a0d09e24f9
@@ -1 +1 @@
--793937029770
\ No newline at end of file
+-793937029770
diff --git a/sql/hive/src/test/resources/golden/auto_join16-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join16-0-ce1ef910fff98f174931cc641f7cef3a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/auto_join16-0-ce1ef910fff98f174931cc641f7cef3a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/auto_join16-2-66e56dcda38eb09819ac49e47e40d125 b/sql/hive/src/test/resources/golden/auto_join16-2-66e56dcda38eb09819ac49e47e40d125
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/auto_join16-2-66e56dcda38eb09819ac49e47e40d125
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/auto_join17-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join17-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join17-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join17-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join17-4-11d706a64d44a8b0d41b290c4671c29c b/sql/hive/src/test/resources/golden/auto_join17-4-11d706a64d44a8b0d41b290c4671c29c
index 006e1f82c0a47..31d87ddbd1310 100644
--- a/sql/hive/src/test/resources/golden/auto_join17-4-11d706a64d44a8b0d41b290c4671c29c
+++ b/sql/hive/src/test/resources/golden/auto_join17-4-11d706a64d44a8b0d41b290c4671c29c
@@ -1 +1 @@
--793937029770
\ No newline at end of file
+-793937029770
diff --git a/sql/hive/src/test/resources/golden/auto_join18-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join18-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join18-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join18-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join18-2-f633ade9577c8b0e89d89124194c8d0f b/sql/hive/src/test/resources/golden/auto_join18-2-f633ade9577c8b0e89d89124194c8d0f
index 0c9b518e65ece..42af6f2e56afc 100644
--- a/sql/hive/src/test/resources/golden/auto_join18-2-f633ade9577c8b0e89d89124194c8d0f
+++ b/sql/hive/src/test/resources/golden/auto_join18-2-f633ade9577c8b0e89d89124194c8d0f
@@ -1 +1 @@
-2358131334
\ No newline at end of file
+2358131334
diff --git a/sql/hive/src/test/resources/golden/auto_join19-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join19-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join19-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join19-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join19-4-eaa70da463b92e85e1796277f016c18f b/sql/hive/src/test/resources/golden/auto_join19-4-eaa70da463b92e85e1796277f016c18f
index 795166629df40..069b64b649977 100644
--- a/sql/hive/src/test/resources/golden/auto_join19-4-eaa70da463b92e85e1796277f016c18f
+++ b/sql/hive/src/test/resources/golden/auto_join19-4-eaa70da463b92e85e1796277f016c18f
@@ -1 +1 @@
-407444119660
\ No newline at end of file
+407444119660
diff --git a/sql/hive/src/test/resources/golden/auto_join2-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join2-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join2-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join2-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join2-4-9d8144612cb3132ad9f7c8fa93586185 b/sql/hive/src/test/resources/golden/auto_join2-4-9d8144612cb3132ad9f7c8fa93586185
index 1c958900f5013..b8f473bf53aa3 100644
--- a/sql/hive/src/test/resources/golden/auto_join2-4-9d8144612cb3132ad9f7c8fa93586185
+++ b/sql/hive/src/test/resources/golden/auto_join2-4-9d8144612cb3132ad9f7c8fa93586185
@@ -1 +1 @@
-33815990627
\ No newline at end of file
+33815990627
diff --git a/sql/hive/src/test/resources/golden/auto_join20-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join20-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join20-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join20-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join20-2-903ee25e327188edaaf2040fec5a8e52 b/sql/hive/src/test/resources/golden/auto_join20-2-903ee25e327188edaaf2040fec5a8e52
index b1a6075f768c8..6b72a1789f57d 100644
--- a/sql/hive/src/test/resources/golden/auto_join20-2-903ee25e327188edaaf2040fec5a8e52
+++ b/sql/hive/src/test/resources/golden/auto_join20-2-903ee25e327188edaaf2040fec5a8e52
@@ -1 +1 @@
--24276731469
\ No newline at end of file
+-24276731469
diff --git a/sql/hive/src/test/resources/golden/auto_join20-4-e48e08d5b94719d56a58284eaef757f2 b/sql/hive/src/test/resources/golden/auto_join20-4-e48e08d5b94719d56a58284eaef757f2
index b1a6075f768c8..6b72a1789f57d 100644
--- a/sql/hive/src/test/resources/golden/auto_join20-4-e48e08d5b94719d56a58284eaef757f2
+++ b/sql/hive/src/test/resources/golden/auto_join20-4-e48e08d5b94719d56a58284eaef757f2
@@ -1 +1 @@
--24276731469
\ No newline at end of file
+-24276731469
diff --git a/sql/hive/src/test/resources/golden/auto_join21-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join21-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join21-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join21-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join21-2-3536b7d78713e86ee67f5f6c9b88958f b/sql/hive/src/test/resources/golden/auto_join21-2-3536b7d78713e86ee67f5f6c9b88958f
index 9672e21fa0323..80c230cf4a09d 100644
--- a/sql/hive/src/test/resources/golden/auto_join21-2-3536b7d78713e86ee67f5f6c9b88958f
+++ b/sql/hive/src/test/resources/golden/auto_join21-2-3536b7d78713e86ee67f5f6c9b88958f
@@ -497,4 +497,4 @@ NULL	NULL	NULL	NULL	496	val_496
 NULL	NULL	NULL	NULL	497	val_497
 NULL	NULL	NULL	NULL	498	val_498
 NULL	NULL	NULL	NULL	498	val_498
-NULL	NULL	NULL	NULL	498	val_498
\ No newline at end of file
+NULL	NULL	NULL	NULL	498	val_498
diff --git a/sql/hive/src/test/resources/golden/auto_join22-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join22-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join22-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join22-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join22-2-a4abc288c20edee53ede45d248cf3abb b/sql/hive/src/test/resources/golden/auto_join22-2-a4abc288c20edee53ede45d248cf3abb
index dba3bca53f72d..79162030a2043 100644
--- a/sql/hive/src/test/resources/golden/auto_join22-2-a4abc288c20edee53ede45d248cf3abb
+++ b/sql/hive/src/test/resources/golden/auto_join22-2-a4abc288c20edee53ede45d248cf3abb
@@ -1 +1 @@
-344337359100
\ No newline at end of file
+344337359100
diff --git a/sql/hive/src/test/resources/golden/auto_join23-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join23-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join23-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join23-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join23-2-6d2c5b58222f31658a0cf957e093a150 b/sql/hive/src/test/resources/golden/auto_join23-2-6d2c5b58222f31658a0cf957e093a150
index 5707ed08e7e54..13e88f30fc08c 100644
--- a/sql/hive/src/test/resources/golden/auto_join23-2-6d2c5b58222f31658a0cf957e093a150
+++ b/sql/hive/src/test/resources/golden/auto_join23-2-6d2c5b58222f31658a0cf957e093a150
@@ -97,4 +97,4 @@
 9	val_9	5	val_5
 9	val_9	5	val_5
 9	val_9	8	val_8
-9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_join24-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join24-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join24-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join24-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join24-4-d79325ef6494aa87843fdfd78de7c812 b/sql/hive/src/test/resources/golden/auto_join24-4-d79325ef6494aa87843fdfd78de7c812
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/auto_join24-4-d79325ef6494aa87843fdfd78de7c812
+++ b/sql/hive/src/test/resources/golden/auto_join24-4-d79325ef6494aa87843fdfd78de7c812
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/auto_join26-1-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join26-1-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join26-1-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join26-1-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join26-4-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/auto_join26-4-c9859bf9c9942c59f3b19d82bd1a3afa
index 71094ee7360db..16b313fc58f23 100644
--- a/sql/hive/src/test/resources/golden/auto_join26-4-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/auto_join26-4-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -12,4 +12,4 @@
 311	3
 369	3
 401	5
-406	4
\ No newline at end of file
+406	4
diff --git a/sql/hive/src/test/resources/golden/auto_join27-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join27-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join27-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join27-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join27-2-fceaa1ebd63334061d2d8daf961e935e b/sql/hive/src/test/resources/golden/auto_join27-2-fceaa1ebd63334061d2d8daf961e935e
index dd35c6b71fc80..a16667d785bf8 100644
--- a/sql/hive/src/test/resources/golden/auto_join27-2-fceaa1ebd63334061d2d8daf961e935e
+++ b/sql/hive/src/test/resources/golden/auto_join27-2-fceaa1ebd63334061d2d8daf961e935e
@@ -1 +1 @@
-548
\ No newline at end of file
+548
diff --git a/sql/hive/src/test/resources/golden/auto_join28-0-10a2c01dccc8980fe6aff1f9dd65042c b/sql/hive/src/test/resources/golden/auto_join28-0-10a2c01dccc8980fe6aff1f9dd65042c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join28-0-10a2c01dccc8980fe6aff1f9dd65042c
+++ b/sql/hive/src/test/resources/golden/auto_join28-0-10a2c01dccc8980fe6aff1f9dd65042c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join28-1-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join28-1-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join28-1-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join28-1-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join3-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join3-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join3-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join3-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join3-4-eaa70da463b92e85e1796277f016c18f b/sql/hive/src/test/resources/golden/auto_join3-4-eaa70da463b92e85e1796277f016c18f
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join3-4-eaa70da463b92e85e1796277f016c18f
+++ b/sql/hive/src/test/resources/golden/auto_join3-4-eaa70da463b92e85e1796277f016c18f
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join30-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join30-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join30-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join30-10-820f36ed1bdf14c1deb383f508a5ed7a b/sql/hive/src/test/resources/golden/auto_join30-10-820f36ed1bdf14c1deb383f508a5ed7a
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-10-820f36ed1bdf14c1deb383f508a5ed7a
+++ b/sql/hive/src/test/resources/golden/auto_join30-10-820f36ed1bdf14c1deb383f508a5ed7a
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join30-12-de6e26f52f6f9ea5ef9a4868e57d36d b/sql/hive/src/test/resources/golden/auto_join30-12-de6e26f52f6f9ea5ef9a4868e57d36d
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-12-de6e26f52f6f9ea5ef9a4868e57d36d
+++ b/sql/hive/src/test/resources/golden/auto_join30-12-de6e26f52f6f9ea5ef9a4868e57d36d
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join30-14-7a248488c218919ab50e072fdbdecb73 b/sql/hive/src/test/resources/golden/auto_join30-14-7a248488c218919ab50e072fdbdecb73
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-14-7a248488c218919ab50e072fdbdecb73
+++ b/sql/hive/src/test/resources/golden/auto_join30-14-7a248488c218919ab50e072fdbdecb73
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join30-16-f4f5bc179d84baf57e14cd2f8bd39436 b/sql/hive/src/test/resources/golden/auto_join30-16-f4f5bc179d84baf57e14cd2f8bd39436
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-16-f4f5bc179d84baf57e14cd2f8bd39436
+++ b/sql/hive/src/test/resources/golden/auto_join30-16-f4f5bc179d84baf57e14cd2f8bd39436
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join30-2-bc472f95600f47d5ea60fdeddc59dbc7 b/sql/hive/src/test/resources/golden/auto_join30-2-bc472f95600f47d5ea60fdeddc59dbc7
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-2-bc472f95600f47d5ea60fdeddc59dbc7
+++ b/sql/hive/src/test/resources/golden/auto_join30-2-bc472f95600f47d5ea60fdeddc59dbc7
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_join30-4-f5083eca9c3df277988d8b345b8d43 b/sql/hive/src/test/resources/golden/auto_join30-4-f5083eca9c3df277988d8b345b8d43
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-4-f5083eca9c3df277988d8b345b8d43
+++ b/sql/hive/src/test/resources/golden/auto_join30-4-f5083eca9c3df277988d8b345b8d43
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_join30-6-4a9144326fc7d066c9aadb13d1b95031 b/sql/hive/src/test/resources/golden/auto_join30-6-4a9144326fc7d066c9aadb13d1b95031
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-6-4a9144326fc7d066c9aadb13d1b95031
+++ b/sql/hive/src/test/resources/golden/auto_join30-6-4a9144326fc7d066c9aadb13d1b95031
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_join30-8-8a27209399df7f9c4d15988b11753a61 b/sql/hive/src/test/resources/golden/auto_join30-8-8a27209399df7f9c4d15988b11753a61
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-8-8a27209399df7f9c4d15988b11753a61
+++ b/sql/hive/src/test/resources/golden/auto_join30-8-8a27209399df7f9c4d15988b11753a61
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join31-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join31-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join31-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join31-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join31-2-a64aa9cb44edc6b85ed945fb13ca9c2 b/sql/hive/src/test/resources/golden/auto_join31-2-a64aa9cb44edc6b85ed945fb13ca9c2
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join31-2-a64aa9cb44edc6b85ed945fb13ca9c2
+++ b/sql/hive/src/test/resources/golden/auto_join31-2-a64aa9cb44edc6b85ed945fb13ca9c2
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join32-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_join32-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join32-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_join32-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_join32-12-4a7d51ed5c1d98c518ea74f73c6c7d6c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_join32-12-4a7d51ed5c1d98c518ea74f73c6c7d6c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-4-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_join32-13-ee2dcaae78ae900ffce8d19fbadc3735
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-4-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_join32-13-ee2dcaae78ae900ffce8d19fbadc3735
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-5-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_join32-14-7927c2ce644d1ce1de251405c8563e99
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-5-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_join32-14-7927c2ce644d1ce1de251405c8563e99
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-6-307339322d96b8f923d57c0dc9cdcb60 b/sql/hive/src/test/resources/golden/auto_join32-15-4cd3b51861720ac06c6deb818c83670
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-6-307339322d96b8f923d57c0dc9cdcb60
rename to sql/hive/src/test/resources/golden/auto_join32-15-4cd3b51861720ac06c6deb818c83670
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-7-94cc219f61413ab321916821e1288152 b/sql/hive/src/test/resources/golden/auto_join32-20-693874ea8e06a8b155394ac27af2b1a7
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-7-94cc219f61413ab321916821e1288152
rename to sql/hive/src/test/resources/golden/auto_join32-20-693874ea8e06a8b155394ac27af2b1a7
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-8-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/auto_join32-21-bf8a1bb0baaae9fbf1c3aa656f991f42
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-8-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/auto_join32-21-bf8a1bb0baaae9fbf1c3aa656f991f42
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-9-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/auto_join32-22-3d14d63e996851f51a98f987995d8da6
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-9-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/auto_join32-22-3d14d63e996851f51a98f987995d8da6
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_join32-23-8b183ec2c164b3b530e802ffc880a5fa
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_join32-23-8b183ec2c164b3b530e802ffc880a5fa
diff --git a/sql/hive/src/test/resources/golden/auto_join32-5-c23ea191ee4d60c0a6252ce763b1beed b/sql/hive/src/test/resources/golden/auto_join32-5-c23ea191ee4d60c0a6252ce763b1beed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join32-5-c23ea191ee4d60c0a6252ce763b1beed
+++ b/sql/hive/src/test/resources/golden/auto_join32-5-c23ea191ee4d60c0a6252ce763b1beed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join32-6-442e69416faaea9309bb8c2a3eb73ef b/sql/hive/src/test/resources/golden/auto_join32-6-442e69416faaea9309bb8c2a3eb73ef
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join32-6-442e69416faaea9309bb8c2a3eb73ef
+++ b/sql/hive/src/test/resources/golden/auto_join32-6-442e69416faaea9309bb8c2a3eb73ef
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join32-7-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_join32-7-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join32-7-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_join32-7-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join4-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join4-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join4-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join4-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join4-4-998c3a307b074a6505bb7fcef276be04 b/sql/hive/src/test/resources/golden/auto_join4-4-998c3a307b074a6505bb7fcef276be04
index f7d1e92d77207..9e503cc0fbeb9 100644
--- a/sql/hive/src/test/resources/golden/auto_join4-4-998c3a307b074a6505bb7fcef276be04
+++ b/sql/hive/src/test/resources/golden/auto_join4-4-998c3a307b074a6505bb7fcef276be04
@@ -1 +1 @@
-5079148035
\ No newline at end of file
+5079148035
diff --git a/sql/hive/src/test/resources/golden/auto_join5-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join5-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join5-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join5-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join5-4-998c3a307b074a6505bb7fcef276be04 b/sql/hive/src/test/resources/golden/auto_join5-4-998c3a307b074a6505bb7fcef276be04
index e859b7c4ada7b..1b298efae95b9 100644
--- a/sql/hive/src/test/resources/golden/auto_join5-4-998c3a307b074a6505bb7fcef276be04
+++ b/sql/hive/src/test/resources/golden/auto_join5-4-998c3a307b074a6505bb7fcef276be04
@@ -1 +1 @@
-9766083196
\ No newline at end of file
+9766083196
diff --git a/sql/hive/src/test/resources/golden/auto_join6-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join6-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join6-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join6-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join6-4-998c3a307b074a6505bb7fcef276be04 b/sql/hive/src/test/resources/golden/auto_join6-4-998c3a307b074a6505bb7fcef276be04
index f15ab2fb14eaa..1e9e1b54b2074 100644
--- a/sql/hive/src/test/resources/golden/auto_join6-4-998c3a307b074a6505bb7fcef276be04
+++ b/sql/hive/src/test/resources/golden/auto_join6-4-998c3a307b074a6505bb7fcef276be04
@@ -1 +1 @@
-2607643291
\ No newline at end of file
+2607643291
diff --git a/sql/hive/src/test/resources/golden/auto_join7-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join7-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join7-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join7-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join7-4-30d0c1a49784347fedbac21a69c3a899 b/sql/hive/src/test/resources/golden/auto_join7-4-30d0c1a49784347fedbac21a69c3a899
index 5f7f06c079d24..5ac39f668df1c 100644
--- a/sql/hive/src/test/resources/golden/auto_join7-4-30d0c1a49784347fedbac21a69c3a899
+++ b/sql/hive/src/test/resources/golden/auto_join7-4-30d0c1a49784347fedbac21a69c3a899
@@ -1 +1 @@
--2315698213
\ No newline at end of file
+-2315698213
diff --git a/sql/hive/src/test/resources/golden/auto_join9-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join9-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join9-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join9-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join9-4-eaa70da463b92e85e1796277f016c18f b/sql/hive/src/test/resources/golden/auto_join9-4-eaa70da463b92e85e1796277f016c18f
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join9-4-eaa70da463b92e85e1796277f016c18f
+++ b/sql/hive/src/test/resources/golden/auto_join9-4-eaa70da463b92e85e1796277f016c18f
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-10-9666fb18356436e2800550df9ca90c04 b/sql/hive/src/test/resources/golden/auto_join_filters-2-bee6095f42de6a16708c2f9addc1b9bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-10-9666fb18356436e2800550df9ca90c04
rename to sql/hive/src/test/resources/golden/auto_join_filters-2-bee6095f42de6a16708c2f9addc1b9bd
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-11-a54cefeeb6d79c72f01c61035e9dcf15 b/sql/hive/src/test/resources/golden/auto_join_filters-31-268d8fb3cb9b04eb269fe7ec40a24dfe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-11-a54cefeeb6d79c72f01c61035e9dcf15
rename to sql/hive/src/test/resources/golden/auto_join_filters-31-268d8fb3cb9b04eb269fe7ec40a24dfe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-17-a8c60901367229310f86a8521a26478a b/sql/hive/src/test/resources/golden/auto_join_filters-32-6dc6866a65c74d69538b776b41b06c16
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-17-a8c60901367229310f86a8521a26478a
rename to sql/hive/src/test/resources/golden/auto_join_filters-32-6dc6866a65c74d69538b776b41b06c16
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-18-f50f21c997d775c369fd52f9bafb9b36 b/sql/hive/src/test/resources/golden/auto_join_filters-33-e884480a0f7273d3e2f2de2ba46b855c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-18-f50f21c997d775c369fd52f9bafb9b36
rename to sql/hive/src/test/resources/golden/auto_join_filters-33-e884480a0f7273d3e2f2de2ba46b855c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-19-fe49b6f198661d2e020a0c8bd26c9237 b/sql/hive/src/test/resources/golden/auto_join_filters-34-98fd86aea9cacaa82d43c7468109dd33
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-19-fe49b6f198661d2e020a0c8bd26c9237
rename to sql/hive/src/test/resources/golden/auto_join_filters-34-98fd86aea9cacaa82d43c7468109dd33
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join_nulls-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-10-ad7e37acbc658b5a822ca342fd4b9d01 b/sql/hive/src/test/resources/golden/auto_join_nulls-10-ad7e37acbc658b5a822ca342fd4b9d01
index b201b9c4d9348..acf9ebdfa9579 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-10-ad7e37acbc658b5a822ca342fd4b9d01
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-10-ad7e37acbc658b5a822ca342fd4b9d01
@@ -1 +1 @@
-4542003
\ No newline at end of file
+4542003
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-11-141c550a314d15c4e200e5baeb246de2 b/sql/hive/src/test/resources/golden/auto_join_nulls-11-141c550a314d15c4e200e5baeb246de2
index d365cdf04366c..4380aa676ba67 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-11-141c550a314d15c4e200e5baeb246de2
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-11-141c550a314d15c4e200e5baeb246de2
@@ -1 +1 @@
-4542038
\ No newline at end of file
+4542038
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-12-8a65225a88da0169af26848c06cb981c b/sql/hive/src/test/resources/golden/auto_join_nulls-12-8a65225a88da0169af26848c06cb981c
index bc7bcdca25bfb..359888de9ce3c 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-12-8a65225a88da0169af26848c06cb981c
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-12-8a65225a88da0169af26848c06cb981c
@@ -1 +1 @@
-4543491
\ No newline at end of file
+4543491
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-13-e9c3ae95d7edd0c311c7d57e4cebdc80 b/sql/hive/src/test/resources/golden/auto_join_nulls-13-e9c3ae95d7edd0c311c7d57e4cebdc80
index b201b9c4d9348..acf9ebdfa9579 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-13-e9c3ae95d7edd0c311c7d57e4cebdc80
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-13-e9c3ae95d7edd0c311c7d57e4cebdc80
@@ -1 +1 @@
-4542003
\ No newline at end of file
+4542003
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-14-2be327f5d98b6ca8a45a6e1d97948ec8 b/sql/hive/src/test/resources/golden/auto_join_nulls-14-2be327f5d98b6ca8a45a6e1d97948ec8
index feea6ee0a8e0d..3b196ba0b9f87 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-14-2be327f5d98b6ca8a45a6e1d97948ec8
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-14-2be327f5d98b6ca8a45a6e1d97948ec8
@@ -1 +1 @@
-3079923
\ No newline at end of file
+3079923
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-15-d2ff8e87c24e152107bba1ebf659d0c8 b/sql/hive/src/test/resources/golden/auto_join_nulls-15-d2ff8e87c24e152107bba1ebf659d0c8
index f713b04028bbd..dba80cf2f3b4b 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-15-d2ff8e87c24e152107bba1ebf659d0c8
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-15-d2ff8e87c24e152107bba1ebf659d0c8
@@ -1 +1 @@
-4509891
\ No newline at end of file
+4509891
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-16-dbe244d2c21e477c3703c4ce1903e8af b/sql/hive/src/test/resources/golden/auto_join_nulls-16-dbe244d2c21e477c3703c4ce1903e8af
index a94eda6b2c374..7e29fae3a0aa6 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-16-dbe244d2c21e477c3703c4ce1903e8af
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-16-dbe244d2c21e477c3703c4ce1903e8af
@@ -1 +1 @@
-3113558
\ No newline at end of file
+3113558
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-17-f3cf64fcd82d5f33d249ed64bfc13621 b/sql/hive/src/test/resources/golden/auto_join_nulls-17-f3cf64fcd82d5f33d249ed64bfc13621
index feea6ee0a8e0d..3b196ba0b9f87 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-17-f3cf64fcd82d5f33d249ed64bfc13621
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-17-f3cf64fcd82d5f33d249ed64bfc13621
@@ -1 +1 @@
-3079923
\ No newline at end of file
+3079923
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-18-439a409bc50dfd86dee78c151c3de5eb b/sql/hive/src/test/resources/golden/auto_join_nulls-18-439a409bc50dfd86dee78c151c3de5eb
index 88c5f95e0d838..a4231499b4e56 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-18-439a409bc50dfd86dee78c151c3de5eb
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-18-439a409bc50dfd86dee78c151c3de5eb
@@ -1 +1 @@
-4543526
\ No newline at end of file
+4543526
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-19-92641e46934ebbf3d44e6e60de1882f4 b/sql/hive/src/test/resources/golden/auto_join_nulls-19-92641e46934ebbf3d44e6e60de1882f4
index 88c5f95e0d838..a4231499b4e56 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-19-92641e46934ebbf3d44e6e60de1882f4
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-19-92641e46934ebbf3d44e6e60de1882f4
@@ -1 +1 @@
-4543526
\ No newline at end of file
+4543526
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_join_nulls-2-75b1f5331b62fedb7dbbe6ac93a3c83f
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_join_nulls-2-75b1f5331b62fedb7dbbe6ac93a3c83f
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-20-e34b2b210059a5f93c0a873d91859b5d b/sql/hive/src/test/resources/golden/auto_join_nulls-20-e34b2b210059a5f93c0a873d91859b5d
index 88c5f95e0d838..a4231499b4e56 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-20-e34b2b210059a5f93c0a873d91859b5d
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-20-e34b2b210059a5f93c0a873d91859b5d
@@ -1 +1 @@
-4543526
\ No newline at end of file
+4543526
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-21-c7aaa831acbd959c6d1793056e3c288a b/sql/hive/src/test/resources/golden/auto_join_nulls-21-c7aaa831acbd959c6d1793056e3c288a
index 88c5f95e0d838..a4231499b4e56 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-21-c7aaa831acbd959c6d1793056e3c288a
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-21-c7aaa831acbd959c6d1793056e3c288a
@@ -1 +1 @@
-4543526
\ No newline at end of file
+4543526
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-22-f5beafb0c5ed59a1852811c9710fe9a3 b/sql/hive/src/test/resources/golden/auto_join_nulls-22-f5beafb0c5ed59a1852811c9710fe9a3
index f2ec932ce57f4..2fa702a938a45 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-22-f5beafb0c5ed59a1852811c9710fe9a3
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-22-f5beafb0c5ed59a1852811c9710fe9a3
@@ -1 +1 @@
-3112070
\ No newline at end of file
+3112070
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-23-5805a41289a26139c06604a40bf5a6fa b/sql/hive/src/test/resources/golden/auto_join_nulls-23-5805a41289a26139c06604a40bf5a6fa
index a94eda6b2c374..7e29fae3a0aa6 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-23-5805a41289a26139c06604a40bf5a6fa
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-23-5805a41289a26139c06604a40bf5a6fa
@@ -1 +1 @@
-3113558
\ No newline at end of file
+3113558
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-24-80991af26d5d37e0864ecc2c8ab0b984 b/sql/hive/src/test/resources/golden/auto_join_nulls-24-80991af26d5d37e0864ecc2c8ab0b984
index f2ec932ce57f4..2fa702a938a45 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-24-80991af26d5d37e0864ecc2c8ab0b984
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-24-80991af26d5d37e0864ecc2c8ab0b984
@@ -1 +1 @@
-3112070
\ No newline at end of file
+3112070
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-3-f0befc0275bda075e4f3cd61eafcccc7 b/sql/hive/src/test/resources/golden/auto_join_nulls-3-f0befc0275bda075e4f3cd61eafcccc7
index 4125efd2dd065..27994c451682a 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-3-f0befc0275bda075e4f3cd61eafcccc7
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-3-f0befc0275bda075e4f3cd61eafcccc7
@@ -1 +1 @@
-13630578
\ No newline at end of file
+13630578
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-4-fc1128c86cd430db8cd4ff834be4562 b/sql/hive/src/test/resources/golden/auto_join_nulls-4-fc1128c86cd430db8cd4ff834be4562
index 4125efd2dd065..27994c451682a 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-4-fc1128c86cd430db8cd4ff834be4562
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-4-fc1128c86cd430db8cd4ff834be4562
@@ -1 +1 @@
-13630578
\ No newline at end of file
+13630578
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-5-2b5f38b7537ed5c40c0ad478b08fc1fc b/sql/hive/src/test/resources/golden/auto_join_nulls-5-2b5f38b7537ed5c40c0ad478b08fc1fc
index 4125efd2dd065..27994c451682a 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-5-2b5f38b7537ed5c40c0ad478b08fc1fc
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-5-2b5f38b7537ed5c40c0ad478b08fc1fc
@@ -1 +1 @@
-13630578
\ No newline at end of file
+13630578
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-6-d256ec23d7b98e1517cacf5dae2f4124 b/sql/hive/src/test/resources/golden/auto_join_nulls-6-d256ec23d7b98e1517cacf5dae2f4124
index e877d44372ecb..476d8eeee571a 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-6-d256ec23d7b98e1517cacf5dae2f4124
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-6-d256ec23d7b98e1517cacf5dae2f4124
@@ -1 +1 @@
-3078400
\ No newline at end of file
+3078400
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-7-8395fa78507105c2a018e88f717b95e2 b/sql/hive/src/test/resources/golden/auto_join_nulls-7-8395fa78507105c2a018e88f717b95e2
index 18be36a9bdb54..935eec97c5601 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-7-8395fa78507105c2a018e88f717b95e2
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-7-8395fa78507105c2a018e88f717b95e2
@@ -1 +1 @@
-4509856
\ No newline at end of file
+4509856
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-8-fd992f2127a139aeb554d797e748ed54 b/sql/hive/src/test/resources/golden/auto_join_nulls-8-fd992f2127a139aeb554d797e748ed54
index f2ec932ce57f4..2fa702a938a45 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-8-fd992f2127a139aeb554d797e748ed54
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-8-fd992f2127a139aeb554d797e748ed54
@@ -1 +1 @@
-3112070
\ No newline at end of file
+3112070
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-9-e3a86622a437e910b7225d1e6108da9e b/sql/hive/src/test/resources/golden/auto_join_nulls-9-e3a86622a437e910b7225d1e6108da9e
index e877d44372ecb..476d8eeee571a 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-9-e3a86622a437e910b7225d1e6108da9e
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-9-e3a86622a437e910b7225d1e6108da9e
@@ -1 +1 @@
-3078400
\ No newline at end of file
+3078400
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-4-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_join_reordering_values-1-2bfb628930d072124636d21d82e3b462
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-4-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_join_reordering_values-1-2bfb628930d072124636d21d82e3b462
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-5-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_join_reordering_values-4-11af6838bb9e04152c2f9a7e2044abe0
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-5-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_join_reordering_values-4-11af6838bb9e04152c2f9a7e2044abe0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-6-307339322d96b8f923d57c0dc9cdcb60 b/sql/hive/src/test/resources/golden/auto_join_reordering_values-7-99fcaa5203ed3debb52c9086028dc8c2
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-6-307339322d96b8f923d57c0dc9cdcb60
rename to sql/hive/src/test/resources/golden/auto_join_reordering_values-7-99fcaa5203ed3debb52c9086028dc8c2
diff --git a/sql/hive/src/test/resources/golden/auto_join_reordering_values-8-950af86c321a67ab3ed0fa5b63ea6aed b/sql/hive/src/test/resources/golden/auto_join_reordering_values-8-950af86c321a67ab3ed0fa5b63ea6aed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join_reordering_values-8-950af86c321a67ab3ed0fa5b63ea6aed
+++ b/sql/hive/src/test/resources/golden/auto_join_reordering_values-8-950af86c321a67ab3ed0fa5b63ea6aed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-10-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-10-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-10-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-10-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-12-94538bc7322522a5534cafc0551d2189 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-12-94538bc7322522a5534cafc0551d2189
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-12-94538bc7322522a5534cafc0551d2189
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-12-94538bc7322522a5534cafc0551d2189
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-14-d5183dfa8d9fb9175478fb1c2f2edb97 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-14-d5183dfa8d9fb9175478fb1c2f2edb97
index 62f9457511f87..1e8b314962144 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-14-d5183dfa8d9fb9175478fb1c2f2edb97
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-14-d5183dfa8d9fb9175478fb1c2f2edb97
@@ -1 +1 @@
-6
\ No newline at end of file
+6
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-16-2798f20aaf0fe5505c34b118e4b10bc5 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-16-2798f20aaf0fe5505c34b118e4b10bc5
index 2eafac63a9a98..2a51623eae15c 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-16-2798f20aaf0fe5505c34b118e4b10bc5
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-16-2798f20aaf0fe5505c34b118e4b10bc5
@@ -3,4 +3,4 @@
 4	1	1
 5	9	9
 8	1	1
-9	1	1
\ No newline at end of file
+9	1	1
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-18-21269869cd3aaf4ade2170d9017de018 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-18-21269869cd3aaf4ade2170d9017de018
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-18-21269869cd3aaf4ade2170d9017de018
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-18-21269869cd3aaf4ade2170d9017de018
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-20-4e0e8cd0626a84b21ca7d2f633623578 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-20-4e0e8cd0626a84b21ca7d2f633623578
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-20-4e0e8cd0626a84b21ca7d2f633623578
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-20-4e0e8cd0626a84b21ca7d2f633623578
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-22-2fe7b834b341bf18e36cd79dd00ec16a b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-22-2fe7b834b341bf18e36cd79dd00ec16a
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-22-2fe7b834b341bf18e36cd79dd00ec16a
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-22-2fe7b834b341bf18e36cd79dd00ec16a
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-24-43ba2c72db9db1ec18d835ec978f8da1 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-24-43ba2c72db9db1ec18d835ec978f8da1
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-24-43ba2c72db9db1ec18d835ec978f8da1
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-24-43ba2c72db9db1ec18d835ec978f8da1
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-26-b66c416fdd98d76981f19e9c14b6a562 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-26-b66c416fdd98d76981f19e9c14b6a562
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-26-b66c416fdd98d76981f19e9c14b6a562
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-26-b66c416fdd98d76981f19e9c14b6a562
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-28-b889b147255231f7fe44bd57e1f8ba66 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-28-b889b147255231f7fe44bd57e1f8ba66
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-28-b889b147255231f7fe44bd57e1f8ba66
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-28-b889b147255231f7fe44bd57e1f8ba66
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-30-b9d66e78b8898a97a42d1118300fa0ce b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-30-b9d66e78b8898a97a42d1118300fa0ce
index 2ebc6516c7df1..f6b91e0e1f8dd 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-30-b9d66e78b8898a97a42d1118300fa0ce
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-30-b9d66e78b8898a97a42d1118300fa0ce
@@ -1 +1 @@
-56
\ No newline at end of file
+56
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-32-b0ca9e20cd48457e6cf1c313d5505213 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-32-b0ca9e20cd48457e6cf1c313d5505213
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-32-b0ca9e20cd48457e6cf1c313d5505213
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-32-b0ca9e20cd48457e6cf1c313d5505213
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-37-a45927057c01fd54818b5dd50e77f60e b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-37-a45927057c01fd54818b5dd50e77f60e
index 3d2e6576f591f..f892bae472dff 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-37-a45927057c01fd54818b5dd50e77f60e
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-37-a45927057c01fd54818b5dd50e77f60e
@@ -19,4 +19,4 @@
 5	val_5
 5	val_5
 8	val_8
-9	val_9
\ No newline at end of file
+9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-38-a988727daa49fb5e190f81c027bb7005 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-38-a988727daa49fb5e190f81c027bb7005
index 34d56da297220..74ff4beddf949 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-38-a988727daa49fb5e190f81c027bb7005
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-38-a988727daa49fb5e190f81c027bb7005
@@ -19,4 +19,4 @@
 5	val_5	val_5
 5	val_5	val_5
 8	val_8	val_8
-9	val_9	val_9
\ No newline at end of file
+9	val_9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-43-a45927057c01fd54818b5dd50e77f60e b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-43-a45927057c01fd54818b5dd50e77f60e
index 3d2e6576f591f..f892bae472dff 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-43-a45927057c01fd54818b5dd50e77f60e
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-43-a45927057c01fd54818b5dd50e77f60e
@@ -19,4 +19,4 @@
 5	val_5
 5	val_5
 8	val_8
-9	val_9
\ No newline at end of file
+9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-44-48b32dd521ddf1af1c8075ecbeccaa75 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-44-48b32dd521ddf1af1c8075ecbeccaa75
index 4a9735f855f96..ec7496a567609 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-44-48b32dd521ddf1af1c8075ecbeccaa75
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-44-48b32dd521ddf1af1c8075ecbeccaa75
@@ -3,4 +3,4 @@
 4	1
 5	9
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-7-94cc219f61413ab321916821e1288152 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-7-94cc219f61413ab321916821e1288152
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-8-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-10-5cba470fbd02e730781a3b63fd9aa3e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-8-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-10-5cba470fbd02e730781a3b63fd9aa3e2
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-9-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-11-337e909605c780d00ad8895686defa06
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-9-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-11-337e909605c780d00ad8895686defa06
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-12-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-12-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-12-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-12-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-13-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-13-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-13-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-13-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-14-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-14-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-14-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-14-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-15-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-15-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-15-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-15-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-16-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-16-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-16-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-16-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-17-30259eb1873d8f5d00dccd8af0b0ccbc b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-17-30259eb1873d8f5d00dccd8af0b0ccbc
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-17-30259eb1873d8f5d00dccd8af0b0ccbc
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-17-30259eb1873d8f5d00dccd8af0b0ccbc
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-19-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-19-325432a220aa3ebe8b816069916924d8
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-19-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-19-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-21-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-21-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-21-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-21-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-22-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-22-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-22-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-22-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-24-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-24-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-24-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-24-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-4-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-4-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-3-55c1e972192580d734fad7f57dd62e6a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-5-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-3-55c1e972192580d734fad7f57dd62e6a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-5-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-4-210f08b7e8c20c9ff364c215af412d87 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-6-e44aff8a860cf3965752d3e1ce725cde
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-4-210f08b7e8c20c9ff364c215af412d87
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-6-e44aff8a860cf3965752d3e1ce725cde
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-6-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-7-ae582a255a02d91674aab500aee79e20
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-6-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-7-ae582a255a02d91674aab500aee79e20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-7-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-8-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-7-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-8-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-8-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-9-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-8-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-9-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-10-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-10-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-10-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-10-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-11-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-11-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-11-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-11-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-12-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-12-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-12-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-12-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-14-95e18bd00f2de246efca1756681c1e87 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-14-95e18bd00f2de246efca1756681c1e87
index 86ee83a4a2686..425151f3a411f 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-14-95e18bd00f2de246efca1756681c1e87
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-14-95e18bd00f2de246efca1756681c1e87
@@ -1 +1 @@
-40
\ No newline at end of file
+40
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-16-caa641c820fcc5f601758c5f0385b4e b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-16-caa641c820fcc5f601758c5f0385b4e
index 301160a93062d..45a4fb75db864 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-16-caa641c820fcc5f601758c5f0385b4e
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-16-caa641c820fcc5f601758c5f0385b4e
@@ -1 +1 @@
-8
\ No newline at end of file
+8
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-7-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-7-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-7-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-7-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-8-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-8-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-8-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-8-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-9-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-9-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-9-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-9-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-9-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-9-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-10-5cba470fbd02e730781a3b63fd9aa3e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-10-5cba470fbd02e730781a3b63fd9aa3e2
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-11-337e909605c780d00ad8895686defa06
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-11-337e909605c780d00ad8895686defa06
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-12-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-12-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-12-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-12-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-14-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-14-325432a220aa3ebe8b816069916924d8
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-14-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-14-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-15-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-15-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-15-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-15-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-16-c23ea191ee4d60c0a6252ce763b1beed b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-16-c23ea191ee4d60c0a6252ce763b1beed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-16-c23ea191ee4d60c0a6252ce763b1beed
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-16-c23ea191ee4d60c0a6252ce763b1beed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-17-442e69416faaea9309bb8c2a3eb73ef b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-17-442e69416faaea9309bb8c2a3eb73ef
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-17-442e69416faaea9309bb8c2a3eb73ef
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-17-442e69416faaea9309bb8c2a3eb73ef
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-19-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-19-325432a220aa3ebe8b816069916924d8
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-19-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-19-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-3-6876517daaf54cadefb6bbbf54bd4a24 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-3-6876517daaf54cadefb6bbbf54bd4a24
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-21-c4d55c247c9326f474d89b29b81d60aa b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-21-c4d55c247c9326f474d89b29b81d60aa
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-21-c4d55c247c9326f474d89b29b81d60aa
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-21-c4d55c247c9326f474d89b29b81d60aa
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-4-cd25b8502c668759783aaba4d550a05f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-4-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-4-cd25b8502c668759783aaba4d550a05f
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-4-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-6-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-5-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-6-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-5-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-7-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-6-e44aff8a860cf3965752d3e1ce725cde
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-7-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-6-e44aff8a860cf3965752d3e1ce725cde
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-8-307339322d96b8f923d57c0dc9cdcb60 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-7-ae582a255a02d91674aab500aee79e20
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-8-307339322d96b8f923d57c0dc9cdcb60
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-7-ae582a255a02d91674aab500aee79e20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-9-94cc219f61413ab321916821e1288152 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-8-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-9-94cc219f61413ab321916821e1288152
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-8-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-9-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-9-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-10-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-10-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-11-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-10-5cba470fbd02e730781a3b63fd9aa3e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-11-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-10-5cba470fbd02e730781a3b63fd9aa3e2
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-11-337e909605c780d00ad8895686defa06
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-11-337e909605c780d00ad8895686defa06
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-12-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-12-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-12-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-12-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-13-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-13-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-13-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-13-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-14-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-14-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-14-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-14-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-15-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-15-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-15-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-15-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-3-55c1e972192580d734fad7f57dd62e6a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-17-97ba394ab6aad2547f399ebbf757a4b6
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-3-55c1e972192580d734fad7f57dd62e6a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-17-97ba394ab6aad2547f399ebbf757a4b6
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-4-210f08b7e8c20c9ff364c215af412d87 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-18-73ee2d7b62e2aede20ca5de577cd7b7f
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-4-210f08b7e8c20c9ff364c215af412d87
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-18-73ee2d7b62e2aede20ca5de577cd7b7f
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-5-6876517daaf54cadefb6bbbf54bd4a24 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-19-5fb8b113a91fbdb15eb35fe1a1d1b4f
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-5-6876517daaf54cadefb6bbbf54bd4a24
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-19-5fb8b113a91fbdb15eb35fe1a1d1b4f
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-6-cd25b8502c668759783aaba4d550a05f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-6-cd25b8502c668759783aaba4d550a05f
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-21-4ecd65f0e26e981b66770b3e91e128fc b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-21-4ecd65f0e26e981b66770b3e91e128fc
index 83be903e06482..5629a958479c7 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-21-4ecd65f0e26e981b66770b3e91e128fc
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-21-4ecd65f0e26e981b66770b3e91e128fc
@@ -1 +1 @@
-570
\ No newline at end of file
+570
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-7-c20aa9939d703c529c4538994dc6f066 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-4-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-7-c20aa9939d703c529c4538994dc6f066
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-4-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-8-baa1253610c081917208199feb52a768 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-5-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-8-baa1253610c081917208199feb52a768
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-5-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-1-fac79d1e5c34142393fc328b2935a9b8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-6-e44aff8a860cf3965752d3e1ce725cde
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-1-fac79d1e5c34142393fc328b2935a9b8
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-6-e44aff8a860cf3965752d3e1ce725cde
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-2-7282f71445d3b6acef073be9b7cbab98 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-7-ae582a255a02d91674aab500aee79e20
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-2-7282f71445d3b6acef073be9b7cbab98
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-7-ae582a255a02d91674aab500aee79e20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-3-56f0862dbe9f7c0eecafe22d5d185c7c b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-8-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-3-56f0862dbe9f7c0eecafe22d5d185c7c
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-8-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-4-be71f06ad593935a8e81d61b695b2052 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-9-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-4-be71f06ad593935a8e81d61b695b2052
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-9-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-10-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-10-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-10-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-10-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-11-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-11-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-11-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-11-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-12-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-12-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-12-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-12-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-13-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-13-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-13-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-13-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-14-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-14-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-14-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-14-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-15-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-15-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-15-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-15-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-18-cc27d771c9a20d3d83f87802e1a9dbe2 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-18-cc27d771c9a20d3d83f87802e1a9dbe2
index 251ff85eda52d..1a9097317aa62 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-18-cc27d771c9a20d3d83f87802e1a9dbe2
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-18-cc27d771c9a20d3d83f87802e1a9dbe2
@@ -19,4 +19,4 @@
 5	5
 5	5
 8	8
-9	9
\ No newline at end of file
+9	9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-19-4b2ac2865384fbca7f374191d8021d51 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-19-4b2ac2865384fbca7f374191d8021d51
index af8f457e93476..225fc24a2290f 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-19-4b2ac2865384fbca7f374191d8021d51
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-19-4b2ac2865384fbca7f374191d8021d51
@@ -19,4 +19,4 @@ val_5	val_5
 val_5	val_5
 val_5	val_5
 val_8	val_8
-val_9	val_9
\ No newline at end of file
+val_9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-20-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-20-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-20-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-20-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-21-ea23403b9eb55e8b06d1c198e439569f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-21-ea23403b9eb55e8b06d1c198e439569f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-21-ea23403b9eb55e8b06d1c198e439569f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-21-ea23403b9eb55e8b06d1c198e439569f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-24-cc27d771c9a20d3d83f87802e1a9dbe2 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-24-cc27d771c9a20d3d83f87802e1a9dbe2
index 251ff85eda52d..1a9097317aa62 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-24-cc27d771c9a20d3d83f87802e1a9dbe2
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-24-cc27d771c9a20d3d83f87802e1a9dbe2
@@ -19,4 +19,4 @@
 5	5
 5	5
 8	8
-9	9
\ No newline at end of file
+9	9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-25-4b2ac2865384fbca7f374191d8021d51 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-25-4b2ac2865384fbca7f374191d8021d51
index af8f457e93476..225fc24a2290f 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-25-4b2ac2865384fbca7f374191d8021d51
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-25-4b2ac2865384fbca7f374191d8021d51
@@ -19,4 +19,4 @@ val_5	val_5
 val_5	val_5
 val_5	val_5
 val_8	val_8
-val_9	val_9
\ No newline at end of file
+val_9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-26-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-26-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-26-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-26-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-29-cc27d771c9a20d3d83f87802e1a9dbe2 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-29-cc27d771c9a20d3d83f87802e1a9dbe2
index 251ff85eda52d..1a9097317aa62 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-29-cc27d771c9a20d3d83f87802e1a9dbe2
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-29-cc27d771c9a20d3d83f87802e1a9dbe2
@@ -19,4 +19,4 @@
 5	5
 5	5
 8	8
-9	9
\ No newline at end of file
+9	9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-30-4b2ac2865384fbca7f374191d8021d51 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-30-4b2ac2865384fbca7f374191d8021d51
index af8f457e93476..225fc24a2290f 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-30-4b2ac2865384fbca7f374191d8021d51
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-30-4b2ac2865384fbca7f374191d8021d51
@@ -19,4 +19,4 @@ val_5	val_5
 val_5	val_5
 val_5	val_5
 val_8	val_8
-val_9	val_9
\ No newline at end of file
+val_9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-9-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-9-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-9-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-9-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-10-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-10-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-10-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-10-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-11-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-11-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-11-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-11-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-12-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-12-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-12-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-12-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-13-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-13-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-13-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-13-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-15-43ad2152b18d711adbdd1aeb940b662a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-15-43ad2152b18d711adbdd1aeb940b662a
index 1758dddccea2b..f5c89552bd3e6 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-15-43ad2152b18d711adbdd1aeb940b662a
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-15-43ad2152b18d711adbdd1aeb940b662a
@@ -1 +1 @@
-32
\ No newline at end of file
+32
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-18-a16ff76d72ad428fb9d43ab910f259fd b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-18-a16ff76d72ad428fb9d43ab910f259fd
index bea0d09c49935..c92ba56847bf4 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-18-a16ff76d72ad428fb9d43ab910f259fd
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-18-a16ff76d72ad428fb9d43ab910f259fd
@@ -1 +1 @@
-207
\ No newline at end of file
+207
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-7-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-7-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-7-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-7-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-8-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-8-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-8-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-8-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-9-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-9-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-9-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-9-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-10-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-10-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-10-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-10-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-11-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-11-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-11-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-11-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-12-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-12-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-12-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-12-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-13-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-13-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-13-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-13-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-7-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-7-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-7-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-7-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-8-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-8-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-8-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-8-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-9-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-9-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-9-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-9-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/escape1-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-0-24ca942f094b14b92086305cc125e833
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-0-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-0-24ca942f094b14b92086305cc125e833
diff --git a/sql/hive/src/test/resources/golden/escape1-1-683124e29877d2c5a96b95c8ddba97b7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-1-16367c381d4b189b3640c92511244bfe
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-1-683124e29877d2c5a96b95c8ddba97b7
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-1-16367c381d4b189b3640c92511244bfe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-6-12e01dc9146f45ded0a6655cb04467b4 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-10-4706d21b17f993cc1cc94be6b7e04c28
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-6-12e01dc9146f45ded0a6655cb04467b4
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-10-4706d21b17f993cc1cc94be6b7e04c28
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-7-ec1aaae06a8dbb2faf36b53246124d4a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-11-5e81d0b41cc58d8cc151046c7a111411
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-7-ec1aaae06a8dbb2faf36b53246124d4a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-11-5e81d0b41cc58d8cc151046c7a111411
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-12-e8a77ff790bfd6740489fc4374ec6c3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-12-e8a77ff790bfd6740489fc4374ec6c3d
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-10-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-13-920818d557d2525dabb5c539a80a1bbb
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-10-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-13-920818d557d2525dabb5c539a80a1bbb
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-11-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-14-455dfeeba27ecf53923db0cbf0aab908
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-11-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-14-455dfeeba27ecf53923db0cbf0aab908
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-12-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-15-11f98f575685beedc14a88fc47a61041
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-12-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-15-11f98f575685beedc14a88fc47a61041
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-16-dff02d7b4c5242434d5e7449bdb67f8b b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-16-dff02d7b4c5242434d5e7449bdb67f8b
new file mode 100644
index 0000000000000..2da41004e8914
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-16-dff02d7b4c5242434d5e7449bdb67f8b
@@ -0,0 +1,24 @@
+0	val_0	val_0	day1	1
+0	val_0	val_0	day1	1
+0	val_0	val_0	day1	1
+0	val_0	val_0	day1	1
+0	val_0	val_0	day1	1
+0	val_0	val_0	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+374	val_374	val_374	day1	1
+374	val_374	val_374	day1	1
+172	val_172	val_172	day1	1
+172	val_172	val_172	day1	1
+172	val_172	val_172	day1	1
+172	val_172	val_172	day1	1
+103	val_103	val_103	day1	1
+103	val_103	val_103	day1	1
+103	val_103	val_103	day1	1
+103	val_103	val_103	day1	1
diff --git a/sql/hive/src/test/resources/golden/escape2-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-2-7b4ad215fc2e75c71c6614a2b6322e8e
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-0-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-2-7b4ad215fc2e75c71c6614a2b6322e8e
diff --git a/sql/hive/src/test/resources/golden/escape2-1-683124e29877d2c5a96b95c8ddba97b7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-3-365488a703b0640acda73a7d7e6efa06
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-1-683124e29877d2c5a96b95c8ddba97b7
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-3-365488a703b0640acda73a7d7e6efa06
diff --git a/sql/hive/src/test/resources/golden/escape2-2-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-4-d0ec6d66ff349db09fd455eec149efdb
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-2-86a409d8b868dc5f1a3bd1e04c2bc28c
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-4-d0ec6d66ff349db09fd455eec149efdb
diff --git a/sql/hive/src/test/resources/golden/escape2-3-1774adb1085f4ee6782a8dac0735399 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-5-3b0f76816be2c1b18a2058027a19bc9f
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-3-1774adb1085f4ee6782a8dac0735399
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-5-3b0f76816be2c1b18a2058027a19bc9f
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-6-86473a0498e4361e4db0b4a22f2e8571
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-6-86473a0498e4361e4db0b4a22f2e8571
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-13-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-7-7e87a31677022b6a1c360922ef74754e
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-13-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-7-7e87a31677022b6a1c360922ef74754e
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-8-17d5c9dd1a25e8a54dc9c7444cbe98c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-8-17d5c9dd1a25e8a54dc9c7444cbe98c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-3-55c1e972192580d734fad7f57dd62e6a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-9-ae5880516ea2f924cfbaeb919adc86e
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-3-55c1e972192580d734fad7f57dd62e6a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-9-ae5880516ea2f924cfbaeb919adc86e
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-4-210f08b7e8c20c9ff364c215af412d87 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-4-210f08b7e8c20c9ff364c215af412d87
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-10-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-10-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-10-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-10-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-11-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-11-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-11-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-11-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-12-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-12-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-12-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-12-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-13-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-13-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-13-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-13-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-14-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-14-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-14-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-14-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-15-ec410b2c54c1ce7001abe7130a3b1c21 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-15-ec410b2c54c1ce7001abe7130a3b1c21
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-15-ec410b2c54c1ce7001abe7130a3b1c21
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-15-ec410b2c54c1ce7001abe7130a3b1c21
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-17-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-17-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-17-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-17-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-18-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-18-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-18-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-18-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-19-a0fc12fc2b968d7e85e6c1e2fd70cd94 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-19-a0fc12fc2b968d7e85e6c1e2fd70cd94
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-19-a0fc12fc2b968d7e85e6c1e2fd70cd94
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-19-a0fc12fc2b968d7e85e6c1e2fd70cd94
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-5-6876517daaf54cadefb6bbbf54bd4a24 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-5-6876517daaf54cadefb6bbbf54bd4a24
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-20-8180638a57b64557e02815c863031755 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-20-8180638a57b64557e02815c863031755
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-20-8180638a57b64557e02815c863031755
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-20-8180638a57b64557e02815c863031755
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-22-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-22-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-22-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-22-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-6-cd25b8502c668759783aaba4d550a05f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-3-aa6ec7f17b48cf49f02ab7367453ab39
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-6-cd25b8502c668759783aaba4d550a05f
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-3-aa6ec7f17b48cf49f02ab7367453ab39
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-7-c20aa9939d703c529c4538994dc6f066 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-4-66b07c93d79ed9958b8427dad16c3ef3
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-7-c20aa9939d703c529c4538994dc6f066
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-4-66b07c93d79ed9958b8427dad16c3ef3
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-8-baa1253610c081917208199feb52a768 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-6-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-8-baa1253610c081917208199feb52a768
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-6-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-7-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-7-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-10-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-8-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-10-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-8-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-11-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-9-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-11-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-9-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-12-9666fb18356436e2800550df9ca90c04 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-12-9666fb18356436e2800550df9ca90c04
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-10-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-10-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-10-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-10-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-11-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-11-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-11-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-11-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-12-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-12-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-12-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-12-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-13-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-13-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-13-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-13-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-14-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-14-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-14-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-14-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-15-30259eb1873d8f5d00dccd8af0b0ccbc b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-15-30259eb1873d8f5d00dccd8af0b0ccbc
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-15-30259eb1873d8f5d00dccd8af0b0ccbc
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-15-30259eb1873d8f5d00dccd8af0b0ccbc
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-17-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-17-325432a220aa3ebe8b816069916924d8
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-17-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-17-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-19-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-19-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-19-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-19-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-13-a54cefeeb6d79c72f01c61035e9dcf15 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-13-a54cefeeb6d79c72f01c61035e9dcf15
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-20-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-20-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-20-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-20-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-22-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-22-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-22-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-22-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-3-47a16cf1686c81c5ba76fd92fa5e05a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-3-47a16cf1686c81c5ba76fd92fa5e05a1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-3-6876517daaf54cadefb6bbbf54bd4a24 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-4-45b63361c1e5178b69a1531b238c8460
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-3-6876517daaf54cadefb6bbbf54bd4a24
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-4-45b63361c1e5178b69a1531b238c8460
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-4-cd25b8502c668759783aaba4d550a05f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-6-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-4-cd25b8502c668759783aaba4d550a05f
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-6-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-6-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-7-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-6-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-7-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-7-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-8-e44aff8a860cf3965752d3e1ce725cde
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-7-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-8-e44aff8a860cf3965752d3e1ce725cde
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-8-307339322d96b8f923d57c0dc9cdcb60 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-9-ae582a255a02d91674aab500aee79e20
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-8-307339322d96b8f923d57c0dc9cdcb60
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-9-ae582a255a02d91674aab500aee79e20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-9-94cc219f61413ab321916821e1288152 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-9-94cc219f61413ab321916821e1288152
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/ba_table1-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-10-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table1-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-10-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/ba_table1-1-7b43ffa8083fda74ab342029dce2e3d9 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-11-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table1-1-7b43ffa8083fda74ab342029dce2e3d9
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-11-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-12-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-12-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-12-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-12-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-13-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-13-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-13-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-13-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-14-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-14-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-14-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-14-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-15-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-15-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-15-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-15-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-16-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-16-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-16-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-16-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-17-b1607a2f1e7da8ac0a9a035b99f81d28 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-17-b1607a2f1e7da8ac0a9a035b99f81d28
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-17-b1607a2f1e7da8ac0a9a035b99f81d28
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-17-b1607a2f1e7da8ac0a9a035b99f81d28
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-19-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-19-325432a220aa3ebe8b816069916924d8
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-19-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-19-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/ba_table2-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table2-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-21-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-21-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-21-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-21-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-22-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-22-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-22-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-22-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-24-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-24-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-24-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-24-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/ba_table2-1-6f3e37cab4fdc60491dea1ff6fc9931a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-3-aa6ec7f17b48cf49f02ab7367453ab39
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table2-1-6f3e37cab4fdc60491dea1ff6fc9931a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-3-aa6ec7f17b48cf49f02ab7367453ab39
diff --git a/sql/hive/src/test/resources/golden/ba_table2-2-8491941c2baa0c7d96e17b8f47dfebe7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-4-66b07c93d79ed9958b8427dad16c3ef3
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table2-2-8491941c2baa0c7d96e17b8f47dfebe7
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-4-66b07c93d79ed9958b8427dad16c3ef3
diff --git a/sql/hive/src/test/resources/golden/ba_table3-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-5-47a16cf1686c81c5ba76fd92fa5e05a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table3-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-5-47a16cf1686c81c5ba76fd92fa5e05a1
diff --git a/sql/hive/src/test/resources/golden/ba_table3-1-28b12606c5369c783e63c17826a18d0d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-6-45b63361c1e5178b69a1531b238c8460
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table3-1-28b12606c5369c783e63c17826a18d0d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-6-45b63361c1e5178b69a1531b238c8460
diff --git a/sql/hive/src/test/resources/golden/ba_table_union-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-7-ecca12a2f377c18c53563a534e9dd5a5
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table_union-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-7-ecca12a2f377c18c53563a534e9dd5a5
diff --git a/sql/hive/src/test/resources/golden/ba_table_union-1-3f8df0a4ab12f1a31a7906e77e9b7b75 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-8-65930e1b01da720cf296ca3df668b58d
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table_union-1-3f8df0a4ab12f1a31a7906e77e9b7b75
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-8-65930e1b01da720cf296ca3df668b58d
diff --git a/sql/hive/src/test/resources/golden/binary_output_format-0-483cdc9eade175b0c89b9f5b3eb505f1 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-1-fdbb11a2de2777dfdd916b59764d5c8e
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_output_format-0-483cdc9eade175b0c89b9f5b3eb505f1
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-1-fdbb11a2de2777dfdd916b59764d5c8e
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-10-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-10-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-10-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-10-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-11-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-11-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-11-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-11-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-12-b1607a2f1e7da8ac0a9a035b99f81d28 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-12-b1607a2f1e7da8ac0a9a035b99f81d28
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-12-b1607a2f1e7da8ac0a9a035b99f81d28
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-12-b1607a2f1e7da8ac0a9a035b99f81d28
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-14-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-14-325432a220aa3ebe8b816069916924d8
index dec2bf5d6199c..d6b24041cf041 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-14-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-14-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-19
\ No newline at end of file
+19
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-16-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-16-7d0c37fc09323ce11aae0b58dc687660
index dec2bf5d6199c..d6b24041cf041 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-16-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-16-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-19
\ No newline at end of file
+19
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-17-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-17-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-17-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-17-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-18-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-18-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-18-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-18-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/binary_output_format-1-9e0909b6330578a25806527dd0ecf7ef b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-2-4f56cb50ec6c5cc57974f85d54bcc8ed
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_output_format-1-9e0909b6330578a25806527dd0ecf7ef
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-2-4f56cb50ec6c5cc57974f85d54bcc8ed
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-20-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-20-7d0c37fc09323ce11aae0b58dc687660
index dec2bf5d6199c..d6b24041cf041 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-20-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-20-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-19
\ No newline at end of file
+19
diff --git a/sql/hive/src/test/resources/golden/binary_output_format-2-a42be5ce444ef1d2a1dbe654a57d6f55 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-3-9878d6ab0fe143611c88ffc0602197e3
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_output_format-2-a42be5ce444ef1d2a1dbe654a57d6f55
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-3-9878d6ab0fe143611c88ffc0602197e3
diff --git a/sql/hive/src/test/resources/golden/binary_table_bincolserde-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-4-a576657b9fd1c23aed85a7409887b2fb
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_table_bincolserde-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-4-a576657b9fd1c23aed85a7409887b2fb
diff --git a/sql/hive/src/test/resources/golden/binary_table_bincolserde-1-3defb7199b65cfd10cb2f46275c581b2 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-6-350b202868590b5edaed18007fd5cbbe
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_table_bincolserde-1-3defb7199b65cfd10cb2f46275c581b2
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-6-350b202868590b5edaed18007fd5cbbe
diff --git a/sql/hive/src/test/resources/golden/binary_table_bincolserde-2-706a062089583074f30fb13c661fc81e b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-7-1155788c7c133a73c3609e8052accfa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_table_bincolserde-2-706a062089583074f30fb13c661fc81e
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-7-1155788c7c133a73c3609e8052accfa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-8-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-8-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-8-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-8-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-9-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-9-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-9-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-9-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-11-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-11-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-11-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-11-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-12-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-12-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-12-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-12-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-13-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-13-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-13-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-13-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-14-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-14-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-14-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-14-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-15-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-15-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-15-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-15-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-16-ea23403b9eb55e8b06d1c198e439569f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-16-ea23403b9eb55e8b06d1c198e439569f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-16-ea23403b9eb55e8b06d1c198e439569f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-16-ea23403b9eb55e8b06d1c198e439569f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-17-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-17-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-17-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-17-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-19-cf41f7ce9478536e823107d1810ff1d7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-19-cf41f7ce9478536e823107d1810ff1d7
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-19-cf41f7ce9478536e823107d1810ff1d7
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-19-cf41f7ce9478536e823107d1810ff1d7
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-21-b55506a213ec710004e6d7f3462834d0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-21-b55506a213ec710004e6d7f3462834d0
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-21-b55506a213ec710004e6d7f3462834d0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-21-b55506a213ec710004e6d7f3462834d0
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-23-4281442c87dcf6007f8bd42504eba186 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-23-4281442c87dcf6007f8bd42504eba186
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-23-4281442c87dcf6007f8bd42504eba186
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-23-4281442c87dcf6007f8bd42504eba186
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-25-52f0e65724d29e2b4054b59a50d2837b b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-25-52f0e65724d29e2b4054b59a50d2837b
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-25-52f0e65724d29e2b4054b59a50d2837b
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-25-52f0e65724d29e2b4054b59a50d2837b
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-27-961f141836f2cc9521f681cadbc3d140 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-27-961f141836f2cc9521f681cadbc3d140
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-27-961f141836f2cc9521f681cadbc3d140
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-27-961f141836f2cc9521f681cadbc3d140
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-29-fd0cc412e0987569a4ed879454b53fb0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-29-fd0cc412e0987569a4ed879454b53fb0
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-29-fd0cc412e0987569a4ed879454b53fb0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-29-fd0cc412e0987569a4ed879454b53fb0
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-30-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-30-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-30-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-30-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-32-cf41f7ce9478536e823107d1810ff1d7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-32-cf41f7ce9478536e823107d1810ff1d7
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-32-cf41f7ce9478536e823107d1810ff1d7
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-32-cf41f7ce9478536e823107d1810ff1d7
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-34-52f0e65724d29e2b4054b59a50d2837b b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-34-52f0e65724d29e2b4054b59a50d2837b
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-34-52f0e65724d29e2b4054b59a50d2837b
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-34-52f0e65724d29e2b4054b59a50d2837b
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-36-961f141836f2cc9521f681cadbc3d140 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-36-961f141836f2cc9521f681cadbc3d140
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-36-961f141836f2cc9521f681cadbc3d140
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-36-961f141836f2cc9521f681cadbc3d140
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-38-fd0cc412e0987569a4ed879454b53fb0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-38-fd0cc412e0987569a4ed879454b53fb0
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-38-fd0cc412e0987569a4ed879454b53fb0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-38-fd0cc412e0987569a4ed879454b53fb0
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/binary_table_colserde-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_table_colserde-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/binary_table_colserde-1-179ac81920d8dfa6e324cc881b5f1624 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-10-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_table_colserde-1-179ac81920d8dfa6e324cc881b5f1624
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-10-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/binarysortable_1-1-4a0ed18480313e66b869ec4f49371cf5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-11-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/binarysortable_1-1-4a0ed18480313e66b869ec4f49371cf5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-11-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-0-d300f67f11082f3614a8e93e8808960d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-12-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnarserde_create_shortcut-0-d300f67f11082f3614a8e93e8808960d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-12-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-1-3a1329c4145738961e1b8bdbd056497c b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-13-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnarserde_create_shortcut-1-3a1329c4145738961e1b8bdbd056497c
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-13-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-14-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-14-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-14-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-14-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-15-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-15-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-15-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-15-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-16-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-16-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-16-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-16-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-17-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-17-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-17-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-17-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-18-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-18-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-18-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-18-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-19-b1607a2f1e7da8ac0a9a035b99f81d28 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-19-b1607a2f1e7da8ac0a9a035b99f81d28
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-19-b1607a2f1e7da8ac0a9a035b99f81d28
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-19-b1607a2f1e7da8ac0a9a035b99f81d28
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-2-b4b94bc85ee3bdef2b458d974d36935 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnarserde_create_shortcut-2-b4b94bc85ee3bdef2b458d974d36935
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-21-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-21-325432a220aa3ebe8b816069916924d8
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-21-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-21-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-23-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-23-7d0c37fc09323ce11aae0b58dc687660
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-23-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-23-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-24-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-24-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-24-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-24-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-26-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-26-7d0c37fc09323ce11aae0b58dc687660
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-26-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-26-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-0-78efaffd8fd417371fb888d6d1ba995c b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-3-aa6ec7f17b48cf49f02ab7367453ab39
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-0-78efaffd8fd417371fb888d6d1ba995c
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-3-aa6ec7f17b48cf49f02ab7367453ab39
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-1-6483a7ac7f2312cbbf3fce4c4740edf4 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-4-66b07c93d79ed9958b8427dad16c3ef3
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-1-6483a7ac7f2312cbbf3fce4c4740edf4
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-4-66b07c93d79ed9958b8427dad16c3ef3
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-2-eb06998a353abc3022a9e0a17d5dba59 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-5-47a16cf1686c81c5ba76fd92fa5e05a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-2-eb06998a353abc3022a9e0a17d5dba59
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-5-47a16cf1686c81c5ba76fd92fa5e05a1
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-3-3ab5479f002e412965f259485075f6bd b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-6-45b63361c1e5178b69a1531b238c8460
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-3-3ab5479f002e412965f259485075f6bd
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-6-45b63361c1e5178b69a1531b238c8460
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-6-86ba38eff353a720bbabf726365b6712 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-7-ecca12a2f377c18c53563a534e9dd5a5
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-6-86ba38eff353a720bbabf726365b6712
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-7-ecca12a2f377c18c53563a534e9dd5a5
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-9-ddd27c2a530d8cea3df6f2a4917aabe b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-8-65930e1b01da720cf296ca3df668b58d
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-9-ddd27c2a530d8cea3df6f2a4917aabe
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-8-65930e1b01da720cf296ca3df668b58d
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-0-ada4896eb3d16ba1cd5ed5b439f2a875 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-0-ada4896eb3d16ba1cd5ed5b439f2a875
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-1-d5b5623715ee672e2f12b3fa775dc67c b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-10-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-1-d5b5623715ee672e2f12b3fa775dc67c
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-10-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-2-9cfeaeeb342d7eda1f9be97b4f7991f3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-11-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-2-9cfeaeeb342d7eda1f9be97b4f7991f3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-11-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-5-ebf2d35321a3af996c150c6072d16a8c b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-12-5cba470fbd02e730781a3b63fd9aa3e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-5-ebf2d35321a3af996c150c6072d16a8c
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-12-5cba470fbd02e730781a3b63fd9aa3e2
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-6-46c090f169c8dc7dbc24c2264da20f55 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-13-337e909605c780d00ad8895686defa06
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-6-46c090f169c8dc7dbc24c2264da20f55
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-13-337e909605c780d00ad8895686defa06
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-14-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-14-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-14-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-14-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-15-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-15-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-15-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-15-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-16-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-16-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-16-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-16-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-17-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-17-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-17-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-17-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-18-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-18-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-18-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-18-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-19-30259eb1873d8f5d00dccd8af0b0ccbc b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-19-30259eb1873d8f5d00dccd8af0b0ccbc
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-19-30259eb1873d8f5d00dccd8af0b0ccbc
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-19-30259eb1873d8f5d00dccd8af0b0ccbc
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-8-716c74ca8a0fc8c88e898163a8e41b8f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-8-716c74ca8a0fc8c88e898163a8e41b8f
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-21-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-21-325432a220aa3ebe8b816069916924d8
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-21-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-21-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-23-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-23-7d0c37fc09323ce11aae0b58dc687660
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-23-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-23-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-24-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-24-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-24-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-24-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-25-a0fc12fc2b968d7e85e6c1e2fd70cd94 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-25-a0fc12fc2b968d7e85e6c1e2fd70cd94
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-25-a0fc12fc2b968d7e85e6c1e2fd70cd94
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-25-a0fc12fc2b968d7e85e6c1e2fd70cd94
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-26-8180638a57b64557e02815c863031755 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-26-8180638a57b64557e02815c863031755
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-26-8180638a57b64557e02815c863031755
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-26-8180638a57b64557e02815c863031755
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-28-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-28-7d0c37fc09323ce11aae0b58dc687660
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-28-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-28-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-0-7a9e67189d3d4151f23b12c22bde06b5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-3-47a16cf1686c81c5ba76fd92fa5e05a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-0-7a9e67189d3d4151f23b12c22bde06b5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-3-47a16cf1686c81c5ba76fd92fa5e05a1
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-10-a5c8b73241600b6e2af8b3a41f5f5055 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-4-45b63361c1e5178b69a1531b238c8460
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-10-a5c8b73241600b6e2af8b3a41f5f5055
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-4-45b63361c1e5178b69a1531b238c8460
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-11-e25fd9e055710371ec90e0730c92f272 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-6-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-11-e25fd9e055710371ec90e0730c92f272
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-6-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-13-5ac3e540fd24f94fee378e49597817b3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-7-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-13-5ac3e540fd24f94fee378e49597817b3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-7-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-15-dd652175dac4463fed3c56aded11e6c1 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-8-e44aff8a860cf3965752d3e1ce725cde
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-15-dd652175dac4463fed3c56aded11e6c1
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-8-e44aff8a860cf3965752d3e1ce725cde
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-17-8e4598e3f0701478ed12042438699ce5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-9-ae582a255a02d91674aab500aee79e20
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-17-8e4598e3f0701478ed12042438699ce5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-9-ae582a255a02d91674aab500aee79e20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-10-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-10-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-10-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-10-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-11-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-11-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-11-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-11-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-13-94538bc7322522a5534cafc0551d2189 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-13-94538bc7322522a5534cafc0551d2189
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-13-94538bc7322522a5534cafc0551d2189
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-13-94538bc7322522a5534cafc0551d2189
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-15-63261d35ddda973eeeb97b994ab7a476 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-15-63261d35ddda973eeeb97b994ab7a476
index 4a9735f855f96..ec7496a567609 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-15-63261d35ddda973eeeb97b994ab7a476
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-15-63261d35ddda973eeeb97b994ab7a476
@@ -3,4 +3,4 @@
 4	1
 5	9
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-17-d5183dfa8d9fb9175478fb1c2f2edb97 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-17-d5183dfa8d9fb9175478fb1c2f2edb97
index 62f9457511f87..1e8b314962144 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-17-d5183dfa8d9fb9175478fb1c2f2edb97
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-17-d5183dfa8d9fb9175478fb1c2f2edb97
@@ -1 +1 @@
-6
\ No newline at end of file
+6
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-19-2798f20aaf0fe5505c34b118e4b10bc5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-19-2798f20aaf0fe5505c34b118e4b10bc5
index 2eafac63a9a98..2a51623eae15c 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-19-2798f20aaf0fe5505c34b118e4b10bc5
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-19-2798f20aaf0fe5505c34b118e4b10bc5
@@ -3,4 +3,4 @@
 4	1	1
 5	9	9
 8	1	1
-9	1	1
\ No newline at end of file
+9	1	1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-21-21269869cd3aaf4ade2170d9017de018 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-21-21269869cd3aaf4ade2170d9017de018
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-21-21269869cd3aaf4ade2170d9017de018
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-21-21269869cd3aaf4ade2170d9017de018
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-23-4e0e8cd0626a84b21ca7d2f633623578 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-23-4e0e8cd0626a84b21ca7d2f633623578
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-23-4e0e8cd0626a84b21ca7d2f633623578
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-23-4e0e8cd0626a84b21ca7d2f633623578
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-25-2fe7b834b341bf18e36cd79dd00ec16a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-25-2fe7b834b341bf18e36cd79dd00ec16a
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-25-2fe7b834b341bf18e36cd79dd00ec16a
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-25-2fe7b834b341bf18e36cd79dd00ec16a
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-27-43ba2c72db9db1ec18d835ec978f8da1 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-27-43ba2c72db9db1ec18d835ec978f8da1
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-27-43ba2c72db9db1ec18d835ec978f8da1
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-27-43ba2c72db9db1ec18d835ec978f8da1
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-29-b66c416fdd98d76981f19e9c14b6a562 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-29-b66c416fdd98d76981f19e9c14b6a562
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-29-b66c416fdd98d76981f19e9c14b6a562
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-29-b66c416fdd98d76981f19e9c14b6a562
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-31-b889b147255231f7fe44bd57e1f8ba66 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-31-b889b147255231f7fe44bd57e1f8ba66
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-31-b889b147255231f7fe44bd57e1f8ba66
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-31-b889b147255231f7fe44bd57e1f8ba66
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-33-aa8d713ad4e19b72b5bd7628d60c295e b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-33-aa8d713ad4e19b72b5bd7628d60c295e
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-33-aa8d713ad4e19b72b5bd7628d60c295e
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-33-aa8d713ad4e19b72b5bd7628d60c295e
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-35-b9d66e78b8898a97a42d1118300fa0ce b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-35-b9d66e78b8898a97a42d1118300fa0ce
index 2ebc6516c7df1..f6b91e0e1f8dd 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-35-b9d66e78b8898a97a42d1118300fa0ce
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-35-b9d66e78b8898a97a42d1118300fa0ce
@@ -1 +1 @@
-56
\ No newline at end of file
+56
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-37-b0ca9e20cd48457e6cf1c313d5505213 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-37-b0ca9e20cd48457e6cf1c313d5505213
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-37-b0ca9e20cd48457e6cf1c313d5505213
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-37-b0ca9e20cd48457e6cf1c313d5505213
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-38-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-38-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-38-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-38-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-40-94538bc7322522a5534cafc0551d2189 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-40-94538bc7322522a5534cafc0551d2189
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-40-94538bc7322522a5534cafc0551d2189
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-40-94538bc7322522a5534cafc0551d2189
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-42-63261d35ddda973eeeb97b994ab7a476 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-42-63261d35ddda973eeeb97b994ab7a476
index 4a9735f855f96..ec7496a567609 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-42-63261d35ddda973eeeb97b994ab7a476
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-42-63261d35ddda973eeeb97b994ab7a476
@@ -3,4 +3,4 @@
 4	1
 5	9
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-44-d5183dfa8d9fb9175478fb1c2f2edb97 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-44-d5183dfa8d9fb9175478fb1c2f2edb97
index 62f9457511f87..1e8b314962144 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-44-d5183dfa8d9fb9175478fb1c2f2edb97
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-44-d5183dfa8d9fb9175478fb1c2f2edb97
@@ -1 +1 @@
-6
\ No newline at end of file
+6
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-46-2798f20aaf0fe5505c34b118e4b10bc5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-46-2798f20aaf0fe5505c34b118e4b10bc5
index 2eafac63a9a98..2a51623eae15c 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-46-2798f20aaf0fe5505c34b118e4b10bc5
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-46-2798f20aaf0fe5505c34b118e4b10bc5
@@ -3,4 +3,4 @@
 4	1	1
 5	9	9
 8	1	1
-9	1	1
\ No newline at end of file
+9	1	1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-48-21269869cd3aaf4ade2170d9017de018 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-48-21269869cd3aaf4ade2170d9017de018
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-48-21269869cd3aaf4ade2170d9017de018
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-48-21269869cd3aaf4ade2170d9017de018
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-50-4e0e8cd0626a84b21ca7d2f633623578 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-50-4e0e8cd0626a84b21ca7d2f633623578
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-50-4e0e8cd0626a84b21ca7d2f633623578
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-50-4e0e8cd0626a84b21ca7d2f633623578
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-52-2fe7b834b341bf18e36cd79dd00ec16a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-52-2fe7b834b341bf18e36cd79dd00ec16a
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-52-2fe7b834b341bf18e36cd79dd00ec16a
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-52-2fe7b834b341bf18e36cd79dd00ec16a
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-54-43ba2c72db9db1ec18d835ec978f8da1 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-54-43ba2c72db9db1ec18d835ec978f8da1
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-54-43ba2c72db9db1ec18d835ec978f8da1
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-54-43ba2c72db9db1ec18d835ec978f8da1
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-56-b889b147255231f7fe44bd57e1f8ba66 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-56-b889b147255231f7fe44bd57e1f8ba66
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-56-b889b147255231f7fe44bd57e1f8ba66
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-56-b889b147255231f7fe44bd57e1f8ba66
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-58-aa8d713ad4e19b72b5bd7628d60c295e b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-58-aa8d713ad4e19b72b5bd7628d60c295e
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-58-aa8d713ad4e19b72b5bd7628d60c295e
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-58-aa8d713ad4e19b72b5bd7628d60c295e
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-60-b9d66e78b8898a97a42d1118300fa0ce b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-60-b9d66e78b8898a97a42d1118300fa0ce
index 2ebc6516c7df1..f6b91e0e1f8dd 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-60-b9d66e78b8898a97a42d1118300fa0ce
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-60-b9d66e78b8898a97a42d1118300fa0ce
@@ -1 +1 @@
-56
\ No newline at end of file
+56
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-62-b0ca9e20cd48457e6cf1c313d5505213 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-62-b0ca9e20cd48457e6cf1c313d5505213
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-62-b0ca9e20cd48457e6cf1c313d5505213
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-62-b0ca9e20cd48457e6cf1c313d5505213
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-7-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-7-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-7-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-7-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-8-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-8-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-8-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-8-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-9-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-9-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-9-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-9-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ba_table1-2-7e72160489bbb59dadf24e0cc239a5f4 b/sql/hive/src/test/resources/golden/ba_table1-2-7e72160489bbb59dadf24e0cc239a5f4
deleted file mode 100644
index d0eea8a3ca661..0000000000000
--- a/sql/hive/src/test/resources/golden/ba_table1-2-7e72160489bbb59dadf24e0cc239a5f4
+++ /dev/null
@@ -1,4 +0,0 @@
-ba_key              	binary              	None                
-ba_val              	binary              	None                
-	 	 
-Detailed Table Information	Table(tableName:ba_test, dbName:default, owner:tnachen, createTime:1392426673, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:ba_key, type:binary, comment:null), FieldSchema(name:ba_val, type:binary, comment:null)], location:file:/tmp/sharkWarehouse2805388002645706641/ba_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1392426673}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/ba_table2-3-7e72160489bbb59dadf24e0cc239a5f4 b/sql/hive/src/test/resources/golden/ba_table2-3-7e72160489bbb59dadf24e0cc239a5f4
deleted file mode 100644
index 0a3c9f383a6ef..0000000000000
--- a/sql/hive/src/test/resources/golden/ba_table2-3-7e72160489bbb59dadf24e0cc239a5f4
+++ /dev/null
@@ -1,4 +0,0 @@
-ba_key              	binary              	from deserializer   
-ba_val              	binary              	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:ba_test, dbName:default, owner:tnachen, createTime:1392426674, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:ba_key, type:binary, comment:from deserializer), FieldSchema(name:ba_val, type:binary, comment:from deserializer)], location:file:/tmp/sharkWarehouse2805388002645706641/ba_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=tnachen, last_modified_time=1392426674, transient_lastDdlTime=1392426674}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/ba_table_union-2-7e72160489bbb59dadf24e0cc239a5f4 b/sql/hive/src/test/resources/golden/ba_table_union-2-7e72160489bbb59dadf24e0cc239a5f4
deleted file mode 100644
index 66d33b789f1a7..0000000000000
--- a/sql/hive/src/test/resources/golden/ba_table_union-2-7e72160489bbb59dadf24e0cc239a5f4
+++ /dev/null
@@ -1,4 +0,0 @@
-ba_key              	binary              	None                
-ba_val              	binary              	None                
-	 	 
-Detailed Table Information	Table(tableName:ba_test, dbName:default, owner:tnachen, createTime:1392426676, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:ba_key, type:binary, comment:null), FieldSchema(name:ba_val, type:binary, comment:null)], location:file:/tmp/sharkWarehouse2805388002645706641/ba_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1392426676}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/between-0-df3cf89fcf2ef64199a582fae14a3321 b/sql/hive/src/test/resources/golden/between-0-df3cf89fcf2ef64199a582fae14a3321
deleted file mode 100644
index dcd1d8643e3cb..0000000000000
--- a/sql/hive/src/test/resources/golden/between-0-df3cf89fcf2ef64199a582fae14a3321
+++ /dev/null
@@ -1 +0,0 @@
-2	val_2
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/binary_constant-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/binary_constant-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/binary_constant-0-8c922b2264278dd481ef0dff2088e2b8 b/sql/hive/src/test/resources/golden/binary_constant-0-8c922b2264278dd481ef0dff2088e2b8
deleted file mode 100644
index 2e65efe2a145d..0000000000000
--- a/sql/hive/src/test/resources/golden/binary_constant-0-8c922b2264278dd481ef0dff2088e2b8
+++ /dev/null
@@ -1 +0,0 @@
-a
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/binary_constant-1-604fde0397444c5c9f1d70e6287de664 b/sql/hive/src/test/resources/golden/binary_constant-1-604fde0397444c5c9f1d70e6287de664
new file mode 100644
index 0000000000000..78981922613b2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/binary_constant-1-604fde0397444c5c9f1d70e6287de664
@@ -0,0 +1 @@
+a
diff --git a/sql/hive/src/test/resources/golden/binary_output_format-3-84db2ef4a7f2151e26457db559b862d9 b/sql/hive/src/test/resources/golden/binary_output_format-3-84db2ef4a7f2151e26457db559b862d9
deleted file mode 100644
index c5c8d29fdd13e..0000000000000
--- a/sql/hive/src/test/resources/golden/binary_output_format-3-84db2ef4a7f2151e26457db559b862d9
+++ /dev/null
@@ -1,500 +0,0 @@
-238	val_238
-86	val_86
-311	val_311
-27	val_27
-165	val_165
-409	val_409
-255	val_255
-278	val_278
-98	val_98
-484	val_484
-265	val_265
-193	val_193
-401	val_401
-150	val_150
-273	val_273
-224	val_224
-369	val_369
-66	val_66
-128	val_128
-213	val_213
-146	val_146
-406	val_406
-429	val_429
-374	val_374
-152	val_152
-469	val_469
-145	val_145
-495	val_495
-37	val_37
-327	val_327
-281	val_281
-277	val_277
-209	val_209
-15	val_15
-82	val_82
-403	val_403
-166	val_166
-417	val_417
-430	val_430
-252	val_252
-292	val_292
-219	val_219
-287	val_287
-153	val_153
-193	val_193
-338	val_338
-446	val_446
-459	val_459
-394	val_394
-237	val_237
-482	val_482
-174	val_174
-413	val_413
-494	val_494
-207	val_207
-199	val_199
-466	val_466
-208	val_208
-174	val_174
-399	val_399
-396	val_396
-247	val_247
-417	val_417
-489	val_489
-162	val_162
-377	val_377
-397	val_397
-309	val_309
-365	val_365
-266	val_266
-439	val_439
-342	val_342
-367	val_367
-325	val_325
-167	val_167
-195	val_195
-475	val_475
-17	val_17
-113	val_113
-155	val_155
-203	val_203
-339	val_339
-0	val_0
-455	val_455
-128	val_128
-311	val_311
-316	val_316
-57	val_57
-302	val_302
-205	val_205
-149	val_149
-438	val_438
-345	val_345
-129	val_129
-170	val_170
-20	val_20
-489	val_489
-157	val_157
-378	val_378
-221	val_221
-92	val_92
-111	val_111
-47	val_47
-72	val_72
-4	val_4
-280	val_280
-35	val_35
-427	val_427
-277	val_277
-208	val_208
-356	val_356
-399	val_399
-169	val_169
-382	val_382
-498	val_498
-125	val_125
-386	val_386
-437	val_437
-469	val_469
-192	val_192
-286	val_286
-187	val_187
-176	val_176
-54	val_54
-459	val_459
-51	val_51
-138	val_138
-103	val_103
-239	val_239
-213	val_213
-216	val_216
-430	val_430
-278	val_278
-176	val_176
-289	val_289
-221	val_221
-65	val_65
-318	val_318
-332	val_332
-311	val_311
-275	val_275
-137	val_137
-241	val_241
-83	val_83
-333	val_333
-180	val_180
-284	val_284
-12	val_12
-230	val_230
-181	val_181
-67	val_67
-260	val_260
-404	val_404
-384	val_384
-489	val_489
-353	val_353
-373	val_373
-272	val_272
-138	val_138
-217	val_217
-84	val_84
-348	val_348
-466	val_466
-58	val_58
-8	val_8
-411	val_411
-230	val_230
-208	val_208
-348	val_348
-24	val_24
-463	val_463
-431	val_431
-179	val_179
-172	val_172
-42	val_42
-129	val_129
-158	val_158
-119	val_119
-496	val_496
-0	val_0
-322	val_322
-197	val_197
-468	val_468
-393	val_393
-454	val_454
-100	val_100
-298	val_298
-199	val_199
-191	val_191
-418	val_418
-96	val_96
-26	val_26
-165	val_165
-327	val_327
-230	val_230
-205	val_205
-120	val_120
-131	val_131
-51	val_51
-404	val_404
-43	val_43
-436	val_436
-156	val_156
-469	val_469
-468	val_468
-308	val_308
-95	val_95
-196	val_196
-288	val_288
-481	val_481
-457	val_457
-98	val_98
-282	val_282
-197	val_197
-187	val_187
-318	val_318
-318	val_318
-409	val_409
-470	val_470
-137	val_137
-369	val_369
-316	val_316
-169	val_169
-413	val_413
-85	val_85
-77	val_77
-0	val_0
-490	val_490
-87	val_87
-364	val_364
-179	val_179
-118	val_118
-134	val_134
-395	val_395
-282	val_282
-138	val_138
-238	val_238
-419	val_419
-15	val_15
-118	val_118
-72	val_72
-90	val_90
-307	val_307
-19	val_19
-435	val_435
-10	val_10
-277	val_277
-273	val_273
-306	val_306
-224	val_224
-309	val_309
-389	val_389
-327	val_327
-242	val_242
-369	val_369
-392	val_392
-272	val_272
-331	val_331
-401	val_401
-242	val_242
-452	val_452
-177	val_177
-226	val_226
-5	val_5
-497	val_497
-402	val_402
-396	val_396
-317	val_317
-395	val_395
-58	val_58
-35	val_35
-336	val_336
-95	val_95
-11	val_11
-168	val_168
-34	val_34
-229	val_229
-233	val_233
-143	val_143
-472	val_472
-322	val_322
-498	val_498
-160	val_160
-195	val_195
-42	val_42
-321	val_321
-430	val_430
-119	val_119
-489	val_489
-458	val_458
-78	val_78
-76	val_76
-41	val_41
-223	val_223
-492	val_492
-149	val_149
-449	val_449
-218	val_218
-228	val_228
-138	val_138
-453	val_453
-30	val_30
-209	val_209
-64	val_64
-468	val_468
-76	val_76
-74	val_74
-342	val_342
-69	val_69
-230	val_230
-33	val_33
-368	val_368
-103	val_103
-296	val_296
-113	val_113
-216	val_216
-367	val_367
-344	val_344
-167	val_167
-274	val_274
-219	val_219
-239	val_239
-485	val_485
-116	val_116
-223	val_223
-256	val_256
-263	val_263
-70	val_70
-487	val_487
-480	val_480
-401	val_401
-288	val_288
-191	val_191
-5	val_5
-244	val_244
-438	val_438
-128	val_128
-467	val_467
-432	val_432
-202	val_202
-316	val_316
-229	val_229
-469	val_469
-463	val_463
-280	val_280
-2	val_2
-35	val_35
-283	val_283
-331	val_331
-235	val_235
-80	val_80
-44	val_44
-193	val_193
-321	val_321
-335	val_335
-104	val_104
-466	val_466
-366	val_366
-175	val_175
-403	val_403
-483	val_483
-53	val_53
-105	val_105
-257	val_257
-406	val_406
-409	val_409
-190	val_190
-406	val_406
-401	val_401
-114	val_114
-258	val_258
-90	val_90
-203	val_203
-262	val_262
-348	val_348
-424	val_424
-12	val_12
-396	val_396
-201	val_201
-217	val_217
-164	val_164
-431	val_431
-454	val_454
-478	val_478
-298	val_298
-125	val_125
-431	val_431
-164	val_164
-424	val_424
-187	val_187
-382	val_382
-5	val_5
-70	val_70
-397	val_397
-480	val_480
-291	val_291
-24	val_24
-351	val_351
-255	val_255
-104	val_104
-70	val_70
-163	val_163
-438	val_438
-119	val_119
-414	val_414
-200	val_200
-491	val_491
-237	val_237
-439	val_439
-360	val_360
-248	val_248
-479	val_479
-305	val_305
-417	val_417
-199	val_199
-444	val_444
-120	val_120
-429	val_429
-169	val_169
-443	val_443
-323	val_323
-325	val_325
-277	val_277
-230	val_230
-478	val_478
-178	val_178
-468	val_468
-310	val_310
-317	val_317
-333	val_333
-493	val_493
-460	val_460
-207	val_207
-249	val_249
-265	val_265
-480	val_480
-83	val_83
-136	val_136
-353	val_353
-172	val_172
-214	val_214
-462	val_462
-233	val_233
-406	val_406
-133	val_133
-175	val_175
-189	val_189
-454	val_454
-375	val_375
-401	val_401
-421	val_421
-407	val_407
-384	val_384
-256	val_256
-26	val_26
-134	val_134
-67	val_67
-384	val_384
-379	val_379
-18	val_18
-462	val_462
-492	val_492
-100	val_100
-298	val_298
-9	val_9
-341	val_341
-498	val_498
-146	val_146
-458	val_458
-362	val_362
-186	val_186
-285	val_285
-348	val_348
-167	val_167
-18	val_18
-273	val_273
-183	val_183
-281	val_281
-344	val_344
-97	val_97
-469	val_469
-315	val_315
-84	val_84
-28	val_28
-37	val_37
-448	val_448
-152	val_152
-348	val_348
-307	val_307
-194	val_194
-414	val_414
-477	val_477
-222	val_222
-126	val_126
-90	val_90
-169	val_169
-403	val_403
-400	val_400
-200	val_200
-97	val_97
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/binary_table_bincolserde-3-7e72160489bbb59dadf24e0cc239a5f4 b/sql/hive/src/test/resources/golden/binary_table_bincolserde-3-7e72160489bbb59dadf24e0cc239a5f4
deleted file mode 100644
index df14abbcc193f..0000000000000
--- a/sql/hive/src/test/resources/golden/binary_table_bincolserde-3-7e72160489bbb59dadf24e0cc239a5f4
+++ /dev/null
@@ -1,4 +0,0 @@
-ba_key              	binary              	from deserializer   
-ba_val              	binary              	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:ba_test, dbName:default, owner:tnachen, createTime:1392426678, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:ba_key, type:binary, comment:from deserializer), FieldSchema(name:ba_val, type:binary, comment:from deserializer)], location:file:/tmp/sharkWarehouse2805388002645706641/ba_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=tnachen, last_modified_time=1392426678, transient_lastDdlTime=1392426678}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/binary_table_colserde-2-7e72160489bbb59dadf24e0cc239a5f4 b/sql/hive/src/test/resources/golden/binary_table_colserde-2-7e72160489bbb59dadf24e0cc239a5f4
deleted file mode 100644
index 0c246ae33e56f..0000000000000
--- a/sql/hive/src/test/resources/golden/binary_table_colserde-2-7e72160489bbb59dadf24e0cc239a5f4
+++ /dev/null
@@ -1,4 +0,0 @@
-ba_key              	binary              	from deserializer   
-ba_val              	binary              	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:ba_test, dbName:default, owner:tnachen, createTime:1392426679, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:ba_key, type:binary, comment:null), FieldSchema(name:ba_val, type:binary, comment:null)], location:file:/tmp/sharkWarehouse2805388002645706641/ba_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1392426679}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/combine2_win-10-66ee1339a5a8cc224e83d583acc709c4 b/sql/hive/src/test/resources/golden/binarysortable_1-1-aca7ae366662c9698b5d2c01a6cb3948
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_win-10-66ee1339a5a8cc224e83d583acc709c4
rename to sql/hive/src/test/resources/golden/binarysortable_1-1-aca7ae366662c9698b5d2c01a6cb3948
diff --git a/sql/hive/src/test/resources/golden/binarysortable_1-3-d6518380547e7eef338886f3bdc7bdd2 b/sql/hive/src/test/resources/golden/binarysortable_1-3-d6518380547e7eef338886f3bdc7bdd2
index 27687b47813a9..d6ee76110e8a9 100644
--- a/sql/hive/src/test/resources/golden/binarysortable_1-3-d6518380547e7eef338886f3bdc7bdd2
+++ b/sql/hive/src/test/resources/golden/binarysortable_1-3-d6518380547e7eef338886f3bdc7bdd2
@@ -7,4 +7,4 @@
 a^@bc^A^B^A^@	1.0
 test^@^@^A^Atest	6.0
 test^@test	4.0
-test^Atest	5.0
\ No newline at end of file
+test^Atest	5.0
diff --git a/sql/hive/src/test/resources/golden/boolean = number-0-6b6975fa1892cc48edd87dc0df48a7c0 b/sql/hive/src/test/resources/golden/boolean = number-0-6b6975fa1892cc48edd87dc0df48a7c0
new file mode 100644
index 0000000000000..4d1ebdcde2c71
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/boolean = number-0-6b6975fa1892cc48edd87dc0df48a7c0	
@@ -0,0 +1 @@
+true	true	true	true	true	true	false	false	false	false	false	false	false	false	false	false	false	false	true	true	true	true	true	true	false	false	false	false	false	false	false	false	false	false	false	false
diff --git a/sql/hive/src/test/resources/golden/decimal_2-25-14face5c7104382196e65741a199c36 b/sql/hive/src/test/resources/golden/case else null-0-8ef2f741400830ef889a9dd0c817fe3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-25-14face5c7104382196e65741a199c36
rename to sql/hive/src/test/resources/golden/case else null-0-8ef2f741400830ef889a9dd0c817fe3d
diff --git a/sql/hive/src/test/resources/golden/case sensitivity: Hive table-0-5d14d21a239daa42b086cc895215009a b/sql/hive/src/test/resources/golden/case sensitivity when query Hive table-0-5d14d21a239daa42b086cc895215009a
similarity index 100%
rename from sql/hive/src/test/resources/golden/case sensitivity: Hive table-0-5d14d21a239daa42b086cc895215009a
rename to sql/hive/src/test/resources/golden/case sensitivity when query Hive table-0-5d14d21a239daa42b086cc895215009a
diff --git a/sql/hive/src/test/resources/golden/custom_input_output_format-0-94f3da887aa34aed74715bd2051bf3c5 b/sql/hive/src/test/resources/golden/case when then 1 else null end -0-f7c7fdd35c084bc797890aa08d33693c
similarity index 100%
rename from sql/hive/src/test/resources/golden/custom_input_output_format-0-94f3da887aa34aed74715bd2051bf3c5
rename to sql/hive/src/test/resources/golden/case when then 1 else null end -0-f7c7fdd35c084bc797890aa08d33693c
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-1-98d2ce732277c3a7fb4827d8b221a43a b/sql/hive/src/test/resources/golden/case when then 1.0 else null end -0-aeb1f906bfe92f2d406f84109301afe0
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_double-1-98d2ce732277c3a7fb4827d8b221a43a
rename to sql/hive/src/test/resources/golden/case when then 1.0 else null end -0-aeb1f906bfe92f2d406f84109301afe0
diff --git a/sql/hive/src/test/resources/golden/decimal_2-24-8c2a8f1f3b792d5017be42078b15b94e b/sql/hive/src/test/resources/golden/case when then 1L else null end -0-763ae85e7a52b4cf4162d6a8931716bb
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-24-8c2a8f1f3b792d5017be42078b15b94e
rename to sql/hive/src/test/resources/golden/case when then 1L else null end -0-763ae85e7a52b4cf4162d6a8931716bb
diff --git a/sql/hive/src/test/resources/golden/decimal_2-29-659f627f0ff5fe2296a8a0a7daed6298 b/sql/hive/src/test/resources/golden/case when then 1S else null end -0-6f5f3b3dbe9f1d1eb98443aef315b982
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-29-659f627f0ff5fe2296a8a0a7daed6298
rename to sql/hive/src/test/resources/golden/case when then 1S else null end -0-6f5f3b3dbe9f1d1eb98443aef315b982
diff --git a/sql/hive/src/test/resources/golden/groupby_bigdata-2-3688b45adbdb190d58799c0b6d601055 b/sql/hive/src/test/resources/golden/case when then 1Y else null end -0-589982a400d86157791c7216b10b6b5d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_bigdata-2-3688b45adbdb190d58799c0b6d601055
rename to sql/hive/src/test/resources/golden/case when then 1Y else null end -0-589982a400d86157791c7216b10b6b5d
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-13-2a65d450f57f8ba9f594063b96074f0e b/sql/hive/src/test/resources/golden/case when then null else 1 end -0-48bd83660cf3ba93cdbdc24559092171
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-13-2a65d450f57f8ba9f594063b96074f0e
rename to sql/hive/src/test/resources/golden/case when then null else 1 end -0-48bd83660cf3ba93cdbdc24559092171
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-2-9eadfd16be30c653625fce7b74048d9d b/sql/hive/src/test/resources/golden/case when then null else 1.0 end -0-7f5ce763801781cf568c6a31dd80b623
similarity index 100%
rename from sql/hive/src/test/resources/golden/metadataonly1-2-9eadfd16be30c653625fce7b74048d9d
rename to sql/hive/src/test/resources/golden/case when then null else 1.0 end -0-7f5ce763801781cf568c6a31dd80b623
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-2-8ac9813b27801704082c6e9ea4cdc312 b/sql/hive/src/test/resources/golden/case when then null else 1L end -0-a7f1305ea4f86e596c368e35e45cc4e5
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_pmod-2-8ac9813b27801704082c6e9ea4cdc312
rename to sql/hive/src/test/resources/golden/case when then null else 1L end -0-a7f1305ea4f86e596c368e35e45cc4e5
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-18-fcd7af0e71d3e2d934239ba606e3ed87 b/sql/hive/src/test/resources/golden/case when then null else 1S end -0-dfb61969e6cb6e6dbe89225b538c8d98
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-18-fcd7af0e71d3e2d934239ba606e3ed87
rename to sql/hive/src/test/resources/golden/case when then null else 1S end -0-dfb61969e6cb6e6dbe89225b538c8d98
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-19-dcdb12fe551aa68a56921822f5d1a343 b/sql/hive/src/test/resources/golden/case when then null else 1Y end -0-7f4c32299c3738739b678ece62752a7b
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-19-dcdb12fe551aa68a56921822f5d1a343
rename to sql/hive/src/test/resources/golden/case when then null else 1Y end -0-7f4c32299c3738739b678ece62752a7b
diff --git a/sql/hive/src/test/resources/golden/cast1-3-18dc2ce8a8b2486d268bceef63aa0c2a b/sql/hive/src/test/resources/golden/cast1-3-18dc2ce8a8b2486d268bceef63aa0c2a
index 9bdd310949be8..b62097939c833 100644
--- a/sql/hive/src/test/resources/golden/cast1-3-18dc2ce8a8b2486d268bceef63aa0c2a
+++ b/sql/hive/src/test/resources/golden/cast1-3-18dc2ce8a8b2486d268bceef63aa0c2a
@@ -1 +1 @@
-5	5.0	5.0	5.0	5	true	1
\ No newline at end of file
+5	5.0	5.0	5.0	5	true	1
diff --git a/sql/hive/src/test/resources/golden/cast_to_int-0-ec8e07c04f0e9bc9bb34db97ee6faa98 b/sql/hive/src/test/resources/golden/cast_to_int-0-ec8e07c04f0e9bc9bb34db97ee6faa98
deleted file mode 100644
index bbe268ea91ddf..0000000000000
--- a/sql/hive/src/test/resources/golden/cast_to_int-0-ec8e07c04f0e9bc9bb34db97ee6faa98
+++ /dev/null
@@ -1 +0,0 @@
-1.0	1.4	1.6	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	NULL	2147483647	-2147483648	32767	-32768	-128	127	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-12-5ac3e540fd24f94fee378e49597817b3 b/sql/hive/src/test/resources/golden/char_varchar_udf-0-4de1b2fa0908a3d856474aae2bc38c08
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_win-12-5ac3e540fd24f94fee378e49597817b3
rename to sql/hive/src/test/resources/golden/char_varchar_udf-0-4de1b2fa0908a3d856474aae2bc38c08
diff --git a/sql/hive/src/test/resources/golden/combine2_win-14-dd652175dac4463fed3c56aded11e6c1 b/sql/hive/src/test/resources/golden/char_varchar_udf-1-5b1e7c580ed5e756d30a4c557af8902
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_win-14-dd652175dac4463fed3c56aded11e6c1
rename to sql/hive/src/test/resources/golden/char_varchar_udf-1-5b1e7c580ed5e756d30a4c557af8902
diff --git a/sql/hive/src/test/resources/golden/combine2_win-16-8e4598e3f0701478ed12042438699ce5 b/sql/hive/src/test/resources/golden/char_varchar_udf-2-880ce74a83bb2bfb9c9bd584b8874ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_win-16-8e4598e3f0701478ed12042438699ce5
rename to sql/hive/src/test/resources/golden/char_varchar_udf-2-880ce74a83bb2bfb9c9bd584b8874ac
diff --git a/sql/hive/src/test/resources/golden/cluster-1-707a2295731e0d631a6c5f71c745c8d5 b/sql/hive/src/test/resources/golden/cluster-1-707a2295731e0d631a6c5f71c745c8d5
index 87d0f8dd52b68..ae8b7551d9630 100644
--- a/sql/hive/src/test/resources/golden/cluster-1-707a2295731e0d631a6c5f71c745c8d5
+++ b/sql/hive/src/test/resources/golden/cluster-1-707a2295731e0d631a6c5f71c745c8d5
@@ -1 +1 @@
-10	val_10
\ No newline at end of file
+10	val_10
diff --git a/sql/hive/src/test/resources/golden/cluster-11-dcf78a6537ba2b4d4b828a9a27cf545e b/sql/hive/src/test/resources/golden/cluster-11-dcf78a6537ba2b4d4b828a9a27cf545e
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-11-dcf78a6537ba2b4d4b828a9a27cf545e
+++ b/sql/hive/src/test/resources/golden/cluster-11-dcf78a6537ba2b4d4b828a9a27cf545e
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-13-12635b4b7f34eba5554d5b892b5b64e7 b/sql/hive/src/test/resources/golden/cluster-13-12635b4b7f34eba5554d5b892b5b64e7
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-13-12635b4b7f34eba5554d5b892b5b64e7
+++ b/sql/hive/src/test/resources/golden/cluster-13-12635b4b7f34eba5554d5b892b5b64e7
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-15-b4c15c85c18f310f1b5bc56a78ad94b0 b/sql/hive/src/test/resources/golden/cluster-15-b4c15c85c18f310f1b5bc56a78ad94b0
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-15-b4c15c85c18f310f1b5bc56a78ad94b0
+++ b/sql/hive/src/test/resources/golden/cluster-15-b4c15c85c18f310f1b5bc56a78ad94b0
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-17-62979aa9e6b4e6ffb44ec452aabbef65 b/sql/hive/src/test/resources/golden/cluster-17-62979aa9e6b4e6ffb44ec452aabbef65
index 1a018b640eb6f..e2ef71e141c46 100644
--- a/sql/hive/src/test/resources/golden/cluster-17-62979aa9e6b4e6ffb44ec452aabbef65
+++ b/sql/hive/src/test/resources/golden/cluster-17-62979aa9e6b4e6ffb44ec452aabbef65
@@ -1 +1 @@
-20	val_20	20
\ No newline at end of file
+20	val_20	20
diff --git a/sql/hive/src/test/resources/golden/cluster-19-e5284c7a7c36ee55740bd127ef4bf8c7 b/sql/hive/src/test/resources/golden/cluster-19-e5284c7a7c36ee55740bd127ef4bf8c7
index 333a4cf9af123..5b04be3cdf046 100644
--- a/sql/hive/src/test/resources/golden/cluster-19-e5284c7a7c36ee55740bd127ef4bf8c7
+++ b/sql/hive/src/test/resources/golden/cluster-19-e5284c7a7c36ee55740bd127ef4bf8c7
@@ -1 +1 @@
-20	val_20	20	val_20
\ No newline at end of file
+20	val_20	20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-21-4787b258a786cf195bcb59cd90f6013f b/sql/hive/src/test/resources/golden/cluster-21-4787b258a786cf195bcb59cd90f6013f
index 333a4cf9af123..5b04be3cdf046 100644
--- a/sql/hive/src/test/resources/golden/cluster-21-4787b258a786cf195bcb59cd90f6013f
+++ b/sql/hive/src/test/resources/golden/cluster-21-4787b258a786cf195bcb59cd90f6013f
@@ -1 +1 @@
-20	val_20	20	val_20
\ No newline at end of file
+20	val_20	20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-23-b66ed6ead4deecd49f0f67de1f2bab2e b/sql/hive/src/test/resources/golden/cluster-23-b66ed6ead4deecd49f0f67de1f2bab2e
index 1a018b640eb6f..e2ef71e141c46 100644
--- a/sql/hive/src/test/resources/golden/cluster-23-b66ed6ead4deecd49f0f67de1f2bab2e
+++ b/sql/hive/src/test/resources/golden/cluster-23-b66ed6ead4deecd49f0f67de1f2bab2e
@@ -1 +1 @@
-20	val_20	20
\ No newline at end of file
+20	val_20	20
diff --git a/sql/hive/src/test/resources/golden/cluster-25-f57ce48b6a6e671b58c96535ab482b6a b/sql/hive/src/test/resources/golden/cluster-25-f57ce48b6a6e671b58c96535ab482b6a
index a79654385b09f..764cbe5f62b60 100644
--- a/sql/hive/src/test/resources/golden/cluster-25-f57ce48b6a6e671b58c96535ab482b6a
+++ b/sql/hive/src/test/resources/golden/cluster-25-f57ce48b6a6e671b58c96535ab482b6a
@@ -495,4 +495,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/cluster-3-530671e2a5b8983c60cfedaf486f0f0f b/sql/hive/src/test/resources/golden/cluster-3-530671e2a5b8983c60cfedaf486f0f0f
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-3-530671e2a5b8983c60cfedaf486f0f0f
+++ b/sql/hive/src/test/resources/golden/cluster-3-530671e2a5b8983c60cfedaf486f0f0f
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-5-e99040f6a24c53a4c89801ff3663ff72 b/sql/hive/src/test/resources/golden/cluster-5-e99040f6a24c53a4c89801ff3663ff72
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-5-e99040f6a24c53a4c89801ff3663ff72
+++ b/sql/hive/src/test/resources/golden/cluster-5-e99040f6a24c53a4c89801ff3663ff72
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-7-a22600d60c81a25061b1e20b6726c691 b/sql/hive/src/test/resources/golden/cluster-7-a22600d60c81a25061b1e20b6726c691
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-7-a22600d60c81a25061b1e20b6726c691
+++ b/sql/hive/src/test/resources/golden/cluster-7-a22600d60c81a25061b1e20b6726c691
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-9-cc36ac7ee5e8c6ea21b956abbc2506e2 b/sql/hive/src/test/resources/golden/cluster-9-cc36ac7ee5e8c6ea21b956abbc2506e2
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-9-cc36ac7ee5e8c6ea21b956abbc2506e2
+++ b/sql/hive/src/test/resources/golden/cluster-9-cc36ac7ee5e8c6ea21b956abbc2506e2
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-3-a66dbbe24c9eb33f40dd353ed5a5c14f b/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-3-a66dbbe24c9eb33f40dd353ed5a5c14f
deleted file mode 100644
index 15e92afeeca27..0000000000000
--- a/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-3-a66dbbe24c9eb33f40dd353ed5a5c14f
+++ /dev/null
@@ -1,11 +0,0 @@
-[0,0,0]	["0","0","0"]	{"key_0":"value_0"}	1712634731	record_0
-[1,2,3]	["10","100","1000"]	{"key_1":"value_1"}	465985200	record_1
-[2,4,6]	["20","200","2000"]	{"key_2":"value_2"}	-751827638	record_2
-[3,6,9]	["30","300","3000"]	{"key_3":"value_3"}	477111222	record_3
-[4,8,12]	["40","400","4000"]	{"key_4":"value_4"}	-734328909	record_4
-[5,10,15]	["50","500","5000"]	{"key_5":"value_5"}	-1952710710	record_5
-[6,12,18]	["60","600","6000"]	{"key_6":"value_6"}	1244525190	record_6
-[7,14,21]	["70","700","7000"]	{"key_7":"value_7"}	-1461153973	record_7
-[8,16,24]	["80","800","8000"]	{"key_8":"value_8"}	1638581578	record_8
-[9,18,27]	["90","900","9000"]	{"key_9":"value_9"}	336964413	record_9
-NULL	NULL	NULL	0	NULL
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-4-30d92b61681b9ae7786ed46c3c3e808 b/sql/hive/src/test/resources/golden/columnstats_partlvl-4-30d92b61681b9ae7786ed46c3c3e808
deleted file mode 100644
index 6ad4db1788424..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_partlvl-4-30d92b61681b9ae7786ed46c3c3e808
+++ /dev/null
@@ -1,58 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 2000.0))) (TOK_TABCOLNAME employeeID))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        employee_part 
-          TableScan
-            alias: employee_part
-            Select Operator
-              expressions:
-                    expr: employeeid
-                    type: int
-              outputColumnNames: employeeid
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(employeeid, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: employeeID
-          Column Types: int
-          Partition: employeesalary=2000.0
-          Table: Employee_Part
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-5-f37a302cb19b4fe1c8280c08153294a3 b/sql/hive/src/test/resources/golden/columnstats_partlvl-5-f37a302cb19b4fe1c8280c08153294a3
deleted file mode 100644
index 91ce2a521cde1..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_partlvl-5-f37a302cb19b4fe1c8280c08153294a3
+++ /dev/null
@@ -1,129 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 2000.0))) (TOK_TABCOLNAME employeeID))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        employee_part 
-          TableScan
-            alias: employee_part
-            GatherStats: false
-            Select Operator
-              expressions:
-                    expr: employeeid
-                    type: int
-              outputColumnNames: employeeid
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(employeeid, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Path -> Alias:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=2000.0 [employee_part]
-      Path -> Partition:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=2000.0 
-          Partition
-            base file name: employeesalary=2000.0
-            input format: org.apache.hadoop.mapred.TextInputFormat
-            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-            partition values:
-              employeesalary 2000.0
-            properties:
-              bucket_count -1
-              columns employeeid,employeename
-              columns.types int:string
-              field.delim |
-              file.inputformat org.apache.hadoop.mapred.TextInputFormat
-              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=2000.0
-              name default.employee_part
-              numFiles 1
-              numRows 0
-              partition_columns employeesalary
-              rawDataSize 0
-              serialization.ddl struct employee_part { i32 employeeid, string employeename}
-              serialization.format |
-              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              totalSize 105
-              transient_lastDdlTime 1389728706
-            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-          
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              properties:
-                bucket_count -1
-                columns employeeid,employeename
-                columns.types int:string
-                field.delim |
-                file.inputformat org.apache.hadoop.mapred.TextInputFormat
-                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part
-                name default.employee_part
-                numFiles 2
-                numPartitions 2
-                numRows 0
-                partition_columns employeesalary
-                rawDataSize 0
-                serialization.ddl struct employee_part { i32 employeeid, string employeename}
-                serialization.format |
-                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                totalSize 210
-                transient_lastDdlTime 1389728706
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.employee_part
-            name: default.employee_part
-      Truncated Path -> Alias:
-        /employee_part/employeesalary=2000.0 [employee_part]
-      Needs Tagging: false
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              directory: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-07_241_106202206012377173-1/-ext-10001
-              NumFilesPerFileSink: 1
-              Stats Publishing Key Prefix: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-07_241_106202206012377173-1/-ext-10001/
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    columns _col0
-                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-                    escape.delim \
-                    hive.serialization.extend.nesting.levels true
-                    serialization.format 1
-              TotalFiles: 1
-              GatherStats: false
-              MultiFileSpray: false
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: employeeID
-          Column Types: int
-          Partition: employeesalary=2000.0
-          Table: Employee_Part
-          Is Table Level Stats: false
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-7-1f91b01f40c5e87aa33ceb9b5fa0b2f1 b/sql/hive/src/test/resources/golden/columnstats_partlvl-7-1f91b01f40c5e87aa33ceb9b5fa0b2f1
deleted file mode 100644
index 777024f6946e3..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_partlvl-7-1f91b01f40c5e87aa33ceb9b5fa0b2f1
+++ /dev/null
@@ -1,58 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 4000.0))) (TOK_TABCOLNAME employeeID))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        employee_part 
-          TableScan
-            alias: employee_part
-            Select Operator
-              expressions:
-                    expr: employeeid
-                    type: int
-              outputColumnNames: employeeid
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(employeeid, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: employeeID
-          Column Types: int
-          Partition: employeesalary=4000.0
-          Table: Employee_Part
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-8-dc5682403f4154cef30860f2b4e37bce b/sql/hive/src/test/resources/golden/columnstats_partlvl-8-dc5682403f4154cef30860f2b4e37bce
deleted file mode 100644
index cd72c7efbf56f..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_partlvl-8-dc5682403f4154cef30860f2b4e37bce
+++ /dev/null
@@ -1,129 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 4000.0))) (TOK_TABCOLNAME employeeID))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        employee_part 
-          TableScan
-            alias: employee_part
-            GatherStats: false
-            Select Operator
-              expressions:
-                    expr: employeeid
-                    type: int
-              outputColumnNames: employeeid
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(employeeid, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Path -> Alias:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=4000.0 [employee_part]
-      Path -> Partition:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=4000.0 
-          Partition
-            base file name: employeesalary=4000.0
-            input format: org.apache.hadoop.mapred.TextInputFormat
-            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-            partition values:
-              employeesalary 4000.0
-            properties:
-              bucket_count -1
-              columns employeeid,employeename
-              columns.types int:string
-              field.delim |
-              file.inputformat org.apache.hadoop.mapred.TextInputFormat
-              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=4000.0
-              name default.employee_part
-              numFiles 1
-              numRows 0
-              partition_columns employeesalary
-              rawDataSize 0
-              serialization.ddl struct employee_part { i32 employeeid, string employeename}
-              serialization.format |
-              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              totalSize 105
-              transient_lastDdlTime 1389728706
-            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-          
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              properties:
-                bucket_count -1
-                columns employeeid,employeename
-                columns.types int:string
-                field.delim |
-                file.inputformat org.apache.hadoop.mapred.TextInputFormat
-                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part
-                name default.employee_part
-                numFiles 2
-                numPartitions 2
-                numRows 0
-                partition_columns employeesalary
-                rawDataSize 0
-                serialization.ddl struct employee_part { i32 employeeid, string employeename}
-                serialization.format |
-                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                totalSize 210
-                transient_lastDdlTime 1389728706
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.employee_part
-            name: default.employee_part
-      Truncated Path -> Alias:
-        /employee_part/employeesalary=4000.0 [employee_part]
-      Needs Tagging: false
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              directory: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-24_849_6968895828655634809-1/-ext-10001
-              NumFilesPerFileSink: 1
-              Stats Publishing Key Prefix: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-24_849_6968895828655634809-1/-ext-10001/
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    columns _col0
-                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-                    escape.delim \
-                    hive.serialization.extend.nesting.levels true
-                    serialization.format 1
-              TotalFiles: 1
-              GatherStats: false
-              MultiFileSpray: false
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: employeeID
-          Column Types: int
-          Partition: employeesalary=4000.0
-          Table: Employee_Part
-          Is Table Level Stats: false
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-3-7c45bd1125420b85a0374fecbf947a95 b/sql/hive/src/test/resources/golden/columnstats_tbllvl-3-7c45bd1125420b85a0374fecbf947a95
deleted file mode 100644
index d1e5e7375467d..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_tbllvl-3-7c45bd1125420b85a0374fecbf947a95
+++ /dev/null
@@ -1,73 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME UserVisits_web_text_none)) (TOK_TABCOLNAME sourceIP avgTimeOnSite adRevenue))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        uservisits_web_text_none 
-          TableScan
-            alias: uservisits_web_text_none
-            Select Operator
-              expressions:
-                    expr: sourceip
-                    type: string
-                    expr: avgtimeonsite
-                    type: int
-                    expr: adrevenue
-                    type: float
-              outputColumnNames: sourceip, avgtimeonsite, adrevenue
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(sourceip, 16)
-                      expr: compute_stats(avgtimeonsite, 16)
-                      expr: compute_stats(adrevenue, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0, _col1, _col2
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col1
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col2
-                        type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-                expr: compute_stats(VALUE._col1)
-                expr: compute_stats(VALUE._col2)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col1
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col2
-                  type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0, _col1, _col2
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: sourceIP, avgTimeOnSite, adRevenue
-          Column Types: string, int, float
-          Table: UserVisits_web_text_none
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-4-d20bef3e7fe811a9029c969dec1b6770 b/sql/hive/src/test/resources/golden/columnstats_tbllvl-4-d20bef3e7fe811a9029c969dec1b6770
deleted file mode 100644
index 3f3aa581b43f9..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_tbllvl-4-d20bef3e7fe811a9029c969dec1b6770
+++ /dev/null
@@ -1,141 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME UserVisits_web_text_none)) (TOK_TABCOLNAME sourceIP avgTimeOnSite adRevenue))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        uservisits_web_text_none 
-          TableScan
-            alias: uservisits_web_text_none
-            GatherStats: false
-            Select Operator
-              expressions:
-                    expr: sourceip
-                    type: string
-                    expr: avgtimeonsite
-                    type: int
-                    expr: adrevenue
-                    type: float
-              outputColumnNames: sourceip, avgtimeonsite, adrevenue
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(sourceip, 16)
-                      expr: compute_stats(avgtimeonsite, 16)
-                      expr: compute_stats(adrevenue, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0, _col1, _col2
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col1
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col2
-                        type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Path -> Alias:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/uservisits_web_text_none [uservisits_web_text_none]
-      Path -> Partition:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/uservisits_web_text_none 
-          Partition
-            base file name: uservisits_web_text_none
-            input format: org.apache.hadoop.mapred.TextInputFormat
-            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-            properties:
-              bucket_count -1
-              columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite
-              columns.types string:string:string:float:string:string:string:string:int
-              field.delim |
-              file.inputformat org.apache.hadoop.mapred.TextInputFormat
-              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/uservisits_web_text_none
-              name default.uservisits_web_text_none
-              numFiles 1
-              numPartitions 0
-              numRows 0
-              rawDataSize 0
-              serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite}
-              serialization.format |
-              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              totalSize 7060
-              transient_lastDdlTime 1389728748
-            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-          
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              properties:
-                bucket_count -1
-                columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite
-                columns.types string:string:string:float:string:string:string:string:int
-                field.delim |
-                file.inputformat org.apache.hadoop.mapred.TextInputFormat
-                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/uservisits_web_text_none
-                name default.uservisits_web_text_none
-                numFiles 1
-                numPartitions 0
-                numRows 0
-                rawDataSize 0
-                serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite}
-                serialization.format |
-                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                totalSize 7060
-                transient_lastDdlTime 1389728748
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.uservisits_web_text_none
-            name: default.uservisits_web_text_none
-      Truncated Path -> Alias:
-        /uservisits_web_text_none [uservisits_web_text_none]
-      Needs Tagging: false
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-                expr: compute_stats(VALUE._col1)
-                expr: compute_stats(VALUE._col2)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col1
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col2
-                  type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0, _col1, _col2
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              directory: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-49_160_8862102294255849057-1/-ext-10001
-              NumFilesPerFileSink: 1
-              Stats Publishing Key Prefix: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-49_160_8862102294255849057-1/-ext-10001/
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    columns _col0,_col1,_col2
-                    columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
-                    escape.delim \
-                    hive.serialization.extend.nesting.levels true
-                    serialization.format 1
-              TotalFiles: 1
-              GatherStats: false
-              MultiFileSpray: false
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: sourceIP, avgTimeOnSite, adRevenue
-          Column Types: string, int, float
-          Table: UserVisits_web_text_none
-          Is Table Level Stats: true
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-7-ce5ad528f8b9ad2c309aea199cbe769d b/sql/hive/src/test/resources/golden/columnstats_tbllvl-7-ce5ad528f8b9ad2c309aea199cbe769d
deleted file mode 100644
index 4ff444febde63..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_tbllvl-7-ce5ad528f8b9ad2c309aea199cbe769d
+++ /dev/null
@@ -1,89 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME empty_tab)) (TOK_TABCOLNAME a b c d e))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        empty_tab 
-          TableScan
-            alias: empty_tab
-            Select Operator
-              expressions:
-                    expr: a
-                    type: int
-                    expr: b
-                    type: double
-                    expr: c
-                    type: string
-                    expr: d
-                    type: boolean
-                    expr: e
-                    type: binary
-              outputColumnNames: a, b, c, d, e
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(a, 16)
-                      expr: compute_stats(b, 16)
-                      expr: compute_stats(c, 16)
-                      expr: compute_stats(d, 16)
-                      expr: compute_stats(e, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col1
-                        type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col2
-                        type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col3
-                        type: struct<columntype:string,counttrues:bigint,countfalses:bigint,countnulls:bigint>
-                        expr: _col4
-                        type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint>
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-                expr: compute_stats(VALUE._col1)
-                expr: compute_stats(VALUE._col2)
-                expr: compute_stats(VALUE._col3)
-                expr: compute_stats(VALUE._col4)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col1
-                  type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col2
-                  type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col3
-                  type: struct<columntype:string,counttrues:bigint,countfalses:bigint,countnulls:bigint>
-                  expr: _col4
-                  type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint>
-            outputColumnNames: _col0, _col1, _col2, _col3, _col4
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: a, b, c, d, e
-          Column Types: int, double, string, boolean, binary
-          Table: empty_tab
-
diff --git a/sql/hive/src/test/resources/golden/combine1-0-84b74227c9f1563f530cd3ac3b333e54 b/sql/hive/src/test/resources/golden/combine1-0-84b74227c9f1563f530cd3ac3b333e54
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-0-84b74227c9f1563f530cd3ac3b333e54
+++ b/sql/hive/src/test/resources/golden/combine1-0-84b74227c9f1563f530cd3ac3b333e54
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-1-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/combine1-1-86a409d8b868dc5f1a3bd1e04c2bc28c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-1-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ b/sql/hive/src/test/resources/golden/combine1-1-86a409d8b868dc5f1a3bd1e04c2bc28c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-2-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/combine1-2-c95dc367df88c9e5cf77157f29ba2daf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-2-c95dc367df88c9e5cf77157f29ba2daf
+++ b/sql/hive/src/test/resources/golden/combine1-2-c95dc367df88c9e5cf77157f29ba2daf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-3-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/combine1-3-6e53a3ac93113f20db3a12f1dcf30e86
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-3-6e53a3ac93113f20db3a12f1dcf30e86
+++ b/sql/hive/src/test/resources/golden/combine1-3-6e53a3ac93113f20db3a12f1dcf30e86
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-4-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/combine1-4-84967075baa3e56fff2a23f8ab9ba076
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-4-84967075baa3e56fff2a23f8ab9ba076
+++ b/sql/hive/src/test/resources/golden/combine1-4-84967075baa3e56fff2a23f8ab9ba076
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-5-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/combine1-5-2ee5d706fe3a3bcc38b795f6e94970ea
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-5-2ee5d706fe3a3bcc38b795f6e94970ea
+++ b/sql/hive/src/test/resources/golden/combine1-5-2ee5d706fe3a3bcc38b795f6e94970ea
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-6-1d1f97cce07323812de3027920b04b75 b/sql/hive/src/test/resources/golden/combine1-6-1d1f97cce07323812de3027920b04b75
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-6-1d1f97cce07323812de3027920b04b75
+++ b/sql/hive/src/test/resources/golden/combine1-6-1d1f97cce07323812de3027920b04b75
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-9-e5ce23369b0ad260512a0f61c6969b73 b/sql/hive/src/test/resources/golden/combine1-9-e5ce23369b0ad260512a0f61c6969b73
index 8f8e1f4b21fe3..b40eac432935c 100644
--- a/sql/hive/src/test/resources/golden/combine1-9-e5ce23369b0ad260512a0f61c6969b73
+++ b/sql/hive/src/test/resources/golden/combine1-9-e5ce23369b0ad260512a0f61c6969b73
@@ -497,4 +497,4 @@
 97	val_97
 97	val_97
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-1-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/combine2_hadoop20-1-86a409d8b868dc5f1a3bd1e04c2bc28c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-1-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-12-cd15ffd140539cf86090814729ec4748 b/sql/hive/src/test/resources/golden/combine2_hadoop20-12-cd15ffd140539cf86090814729ec4748
deleted file mode 100644
index 80fa68b84c17e..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-12-cd15ffd140539cf86090814729ec4748
+++ /dev/null
@@ -1,8 +0,0 @@
-value=2010-04-21 09%3A45%3A00
-value=val_0
-value=val_2
-value=val_4
-value=val_5
-value=val_8
-value=val_9
-value=|
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-14-4695309eb4e91ef29c9857aa8fd6130c b/sql/hive/src/test/resources/golden/combine2_hadoop20-14-4695309eb4e91ef29c9857aa8fd6130c
deleted file mode 100644
index ff6141674e603..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-14-4695309eb4e91ef29c9857aa8fd6130c
+++ /dev/null
@@ -1,12 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-11	|
-19	2010-04-21 09:45:00
-2	val_2
-4	val_4
-5	val_5
-5	val_5
-5	val_5
-8	val_8
-9	val_9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-16-557997716a68312e8cae75428e3ce31 b/sql/hive/src/test/resources/golden/combine2_hadoop20-16-557997716a68312e8cae75428e3ce31
deleted file mode 100644
index 3cacc0b93c9c9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-16-557997716a68312e8cae75428e3ce31
+++ /dev/null
@@ -1 +0,0 @@
-12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-18-2af7419c1d84fe155e23f3972e049b97 b/sql/hive/src/test/resources/golden/combine2_hadoop20-18-2af7419c1d84fe155e23f3972e049b97
deleted file mode 100644
index 1a0aa74952afa..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-18-2af7419c1d84fe155e23f3972e049b97
+++ /dev/null
@@ -1,2 +0,0 @@
-2008-04-08	1000
-2008-04-09	1000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-2-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/combine2_hadoop20-2-c95dc367df88c9e5cf77157f29ba2daf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-2-c95dc367df88c9e5cf77157f29ba2daf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-3-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/combine2_hadoop20-3-6e53a3ac93113f20db3a12f1dcf30e86
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-3-6e53a3ac93113f20db3a12f1dcf30e86
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-4-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/combine2_hadoop20-4-84967075baa3e56fff2a23f8ab9ba076
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-4-84967075baa3e56fff2a23f8ab9ba076
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-5-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/combine2_hadoop20-5-2ee5d706fe3a3bcc38b795f6e94970ea
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-5-2ee5d706fe3a3bcc38b795f6e94970ea
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-6-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/combine2_hadoop20-6-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-6-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-7-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/combine2_hadoop20-7-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-7-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-8-99d1f07b2ce904afd6a809fd1814efe9 b/sql/hive/src/test/resources/golden/combine2_hadoop20-8-99d1f07b2ce904afd6a809fd1814efe9
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-8-99d1f07b2ce904afd6a809fd1814efe9
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-9-30cb07965e4b5025545361b948fc83c2 b/sql/hive/src/test/resources/golden/combine2_hadoop20-9-30cb07965e4b5025545361b948fc83c2
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-9-30cb07965e4b5025545361b948fc83c2
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-0-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/combine2_win-0-86a409d8b868dc5f1a3bd1e04c2bc28c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-0-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-1-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/combine2_win-1-c95dc367df88c9e5cf77157f29ba2daf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-1-c95dc367df88c9e5cf77157f29ba2daf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-11-cd15ffd140539cf86090814729ec4748 b/sql/hive/src/test/resources/golden/combine2_win-11-cd15ffd140539cf86090814729ec4748
deleted file mode 100644
index 80fa68b84c17e..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-11-cd15ffd140539cf86090814729ec4748
+++ /dev/null
@@ -1,8 +0,0 @@
-value=2010-04-21 09%3A45%3A00
-value=val_0
-value=val_2
-value=val_4
-value=val_5
-value=val_8
-value=val_9
-value=|
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-13-4695309eb4e91ef29c9857aa8fd6130c b/sql/hive/src/test/resources/golden/combine2_win-13-4695309eb4e91ef29c9857aa8fd6130c
deleted file mode 100644
index ff6141674e603..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-13-4695309eb4e91ef29c9857aa8fd6130c
+++ /dev/null
@@ -1,12 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-11	|
-19	2010-04-21 09:45:00
-2	val_2
-4	val_4
-5	val_5
-5	val_5
-5	val_5
-8	val_8
-9	val_9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-15-557997716a68312e8cae75428e3ce31 b/sql/hive/src/test/resources/golden/combine2_win-15-557997716a68312e8cae75428e3ce31
deleted file mode 100644
index 3cacc0b93c9c9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-15-557997716a68312e8cae75428e3ce31
+++ /dev/null
@@ -1 +0,0 @@
-12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-17-2af7419c1d84fe155e23f3972e049b97 b/sql/hive/src/test/resources/golden/combine2_win-17-2af7419c1d84fe155e23f3972e049b97
deleted file mode 100644
index 1a0aa74952afa..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-17-2af7419c1d84fe155e23f3972e049b97
+++ /dev/null
@@ -1,2 +0,0 @@
-2008-04-08	1000
-2008-04-09	1000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-2-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/combine2_win-2-6e53a3ac93113f20db3a12f1dcf30e86
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-2-6e53a3ac93113f20db3a12f1dcf30e86
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-3-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/combine2_win-3-84967075baa3e56fff2a23f8ab9ba076
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-3-84967075baa3e56fff2a23f8ab9ba076
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-4-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/combine2_win-4-2ee5d706fe3a3bcc38b795f6e94970ea
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-4-2ee5d706fe3a3bcc38b795f6e94970ea
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-5-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/combine2_win-5-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-5-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-6-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/combine2_win-6-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-6-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-7-99d1f07b2ce904afd6a809fd1814efe9 b/sql/hive/src/test/resources/golden/combine2_win-7-99d1f07b2ce904afd6a809fd1814efe9
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-7-99d1f07b2ce904afd6a809fd1814efe9
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-8-30cb07965e4b5025545361b948fc83c2 b/sql/hive/src/test/resources/golden/combine2_win-8-30cb07965e4b5025545361b948fc83c2
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-8-30cb07965e4b5025545361b948fc83c2
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine3-11-30b8b79049113252bec1cbeac4018a3 b/sql/hive/src/test/resources/golden/combine3-11-30b8b79049113252bec1cbeac4018a3
deleted file mode 100644
index 80665a4d4c983..0000000000000
--- a/sql/hive/src/test/resources/golden/combine3-11-30b8b79049113252bec1cbeac4018a3
+++ /dev/null
@@ -1,12 +0,0 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-Detailed Partition Information	Partition(values:[2010-08-03, 00], dbName:default, tableName:combine_3_srcpart_seq_rc, createTime:1390898644, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/combine_3_srcpart_seq_rc/ds=2010-08-03/hr=00, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390898644, numRows=500, totalSize=15250, rawDataSize=5312})	
diff --git a/sql/hive/src/test/resources/golden/combine3-12-f4baee25e0ad813258d67d707a6fc43b b/sql/hive/src/test/resources/golden/combine3-12-f4baee25e0ad813258d67d707a6fc43b
deleted file mode 100644
index 5a87a3aec7cf5..0000000000000
--- a/sql/hive/src/test/resources/golden/combine3-12-f4baee25e0ad813258d67d707a6fc43b
+++ /dev/null
@@ -1,12 +0,0 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-Detailed Partition Information	Partition(values:[2010-08-03, 001], dbName:default, tableName:combine_3_srcpart_seq_rc, createTime:1390898653, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/combine_3_srcpart_seq_rc/ds=2010-08-03/hr=001, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390898653, numRows=500, totalSize=2202, rawDataSize=4551})	
diff --git a/sql/hive/src/test/resources/golden/combine3-13-1c359bedf474e8e26f3b3562f7af6edc b/sql/hive/src/test/resources/golden/combine3-13-1c359bedf474e8e26f3b3562f7af6edc
deleted file mode 100644
index 7b6455db7834b..0000000000000
--- a/sql/hive/src/test/resources/golden/combine3-13-1c359bedf474e8e26f3b3562f7af6edc
+++ /dev/null
@@ -1,30 +0,0 @@
-0	val_0	2010-08-03	00
-0	val_0	2010-08-03	00
-0	val_0	2010-08-03	00
-0	val_0	2010-08-03	001
-0	val_0	2010-08-03	001
-0	val_0	2010-08-03	001
-2	val_2	2010-08-03	00
-2	val_2	2010-08-03	001
-4	val_4	2010-08-03	00
-4	val_4	2010-08-03	001
-5	val_5	2010-08-03	00
-5	val_5	2010-08-03	00
-5	val_5	2010-08-03	00
-5	val_5	2010-08-03	001
-5	val_5	2010-08-03	001
-5	val_5	2010-08-03	001
-8	val_8	2010-08-03	00
-8	val_8	2010-08-03	001
-9	val_9	2010-08-03	00
-9	val_9	2010-08-03	001
-10	val_10	2010-08-03	00
-10	val_10	2010-08-03	001
-11	val_11	2010-08-03	00
-11	val_11	2010-08-03	001
-12	val_12	2010-08-03	00
-12	val_12	2010-08-03	00
-12	val_12	2010-08-03	001
-12	val_12	2010-08-03	001
-15	val_15	2010-08-03	00
-15	val_15	2010-08-03	00
diff --git a/sql/hive/src/test/resources/golden/combine3-21-8ba8f8723c1530be062cefc2d9246e56 b/sql/hive/src/test/resources/golden/combine3-21-8ba8f8723c1530be062cefc2d9246e56
deleted file mode 100644
index d57cb5369e219..0000000000000
--- a/sql/hive/src/test/resources/golden/combine3-21-8ba8f8723c1530be062cefc2d9246e56
+++ /dev/null
@@ -1,30 +0,0 @@
-0	1
-0	1
-0	1
-0	11
-0	11
-0	11
-2	1
-2	11
-4	1
-4	11
-8	1
-8	11
-10	1
-10	11
-12	1
-12	1
-12	11
-12	11
-18	1
-18	1
-18	11
-18	11
-20	1
-20	11
-24	1
-24	1
-24	11
-24	11
-26	1
-26	1
diff --git a/sql/hive/src/test/resources/golden/combine2_win-9-7fd472d5ba7a41eb391f723c6dcf42af b/sql/hive/src/test/resources/golden/compute_stats_binary-1-44b15c6c9600109e064a5ea5f9c81051
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_win-9-7fd472d5ba7a41eb391f723c6dcf42af
rename to sql/hive/src/test/resources/golden/compute_stats_binary-1-44b15c6c9600109e064a5ea5f9c81051
diff --git a/sql/hive/src/test/resources/golden/combine3-10-fb173ed4483e732d367e51f88be793b1 b/sql/hive/src/test/resources/golden/compute_stats_boolean-1-72ee4bdd5cea69136940dc40e6890e1d
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-10-fb173ed4483e732d367e51f88be793b1
rename to sql/hive/src/test/resources/golden/compute_stats_boolean-1-72ee4bdd5cea69136940dc40e6890e1d
diff --git a/sql/hive/src/test/resources/golden/compute_stats_boolean-2-fbea367698de65e22d4d660a518ea95e b/sql/hive/src/test/resources/golden/compute_stats_boolean-2-fbea367698de65e22d4d660a518ea95e
index dc7b54ad01435..bb95160cb6e07 100644
--- a/sql/hive/src/test/resources/golden/compute_stats_boolean-2-fbea367698de65e22d4d660a518ea95e
+++ b/sql/hive/src/test/resources/golden/compute_stats_boolean-2-fbea367698de65e22d4d660a518ea95e
@@ -1 +1 @@
-33
\ No newline at end of file
+33
diff --git a/sql/hive/src/test/resources/golden/compute_stats_boolean-3-a14d8a5835c94829271f9f463d96d83d b/sql/hive/src/test/resources/golden/compute_stats_boolean-3-a14d8a5835c94829271f9f463d96d83d
index dd487e6fea3ff..279805d381a21 100644
--- a/sql/hive/src/test/resources/golden/compute_stats_boolean-3-a14d8a5835c94829271f9f463d96d83d
+++ b/sql/hive/src/test/resources/golden/compute_stats_boolean-3-a14d8a5835c94829271f9f463d96d83d
@@ -1 +1 @@
-{"columntype":"Boolean","counttrues":13,"countfalses":19,"countnulls":1}
\ No newline at end of file
+{"columntype":"Boolean","counttrues":13,"countfalses":19,"countnulls":1}
diff --git a/sql/hive/src/test/resources/golden/combine3-16-6635f7f5c55557b06ad3acc321eaa739 b/sql/hive/src/test/resources/golden/compute_stats_double-1-8f634b9e334fd58e71844e6283d9794d
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-16-6635f7f5c55557b06ad3acc321eaa739
rename to sql/hive/src/test/resources/golden/compute_stats_double-1-8f634b9e334fd58e71844e6283d9794d
diff --git a/sql/hive/src/test/resources/golden/combine3-17-8cb751103da7c909276db6bddb50ae6a b/sql/hive/src/test/resources/golden/compute_stats_long-1-9313f166464633b3929707d7ef11d758
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-17-8cb751103da7c909276db6bddb50ae6a
rename to sql/hive/src/test/resources/golden/compute_stats_long-1-9313f166464633b3929707d7ef11d758
diff --git a/sql/hive/src/test/resources/golden/combine3-18-31fae7c6be75b97d475512bd75a58a0b b/sql/hive/src/test/resources/golden/compute_stats_string-1-3491ef2747a8f34899108d4ae8ebc7eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-18-31fae7c6be75b97d475512bd75a58a0b
rename to sql/hive/src/test/resources/golden/compute_stats_string-1-3491ef2747a8f34899108d4ae8ebc7eb
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-5-5d7dfaa9e4137938559eb536e28f8d0e b/sql/hive/src/test/resources/golden/constant array-0-761ef205b10ac4a10122c8b4ce10ada
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_sort_array-5-5d7dfaa9e4137938559eb536e28f8d0e
rename to sql/hive/src/test/resources/golden/constant array-0-761ef205b10ac4a10122c8b4ce10ada
diff --git a/sql/hive/src/test/resources/golden/constant null testing-0-9a02bc7de09bcabcbd4c91f54a814c20 b/sql/hive/src/test/resources/golden/constant null testing-0-9a02bc7de09bcabcbd4c91f54a814c20
new file mode 100644
index 0000000000000..7c41615f8c184
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/constant null testing-0-9a02bc7de09bcabcbd4c91f54a814c20	
@@ -0,0 +1 @@
+1	NULL	1	NULL	1.0	NULL	true	NULL	1	NULL	1.0	NULL	1	NULL	1	NULL	1	NULL	1970-01-01	NULL	1969-12-31 16:00:00.001	NULL	1	NULL
diff --git a/sql/hive/src/test/resources/golden/convert_enum_to_string-1-db089ff46f9826c7883198adacdfad59 b/sql/hive/src/test/resources/golden/convert_enum_to_string-1-db089ff46f9826c7883198adacdfad59
index c615f0148ccd1..d35bf9093ca9c 100644
--- a/sql/hive/src/test/resources/golden/convert_enum_to_string-1-db089ff46f9826c7883198adacdfad59
+++ b/sql/hive/src/test/resources/golden/convert_enum_to_string-1-db089ff46f9826c7883198adacdfad59
@@ -19,9 +19,9 @@ my_stringset        	struct<>            	from deserializer
 my_enumset          	struct<>            	from deserializer   
 my_structset        	struct<>            	from deserializer   
 optionals           	struct<>            	from deserializer   
-b                   	string              	None                
+b                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-b                   	string              	None                
\ No newline at end of file
+b                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer1-0-b1e2ade89ae898650f0be4f796d8947b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-0-b1e2ade89ae898650f0be4f796d8947b
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer1-1-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-1-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-10-5d712a42dcc1c4f7c797dabda5eb7b3a b/sql/hive/src/test/resources/golden/correlationoptimizer1-10-5d712a42dcc1c4f7c797dabda5eb7b3a
new file mode 100644
index 0000000000000..58010b0040b74
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-10-5d712a42dcc1c4f7c797dabda5eb7b3a
@@ -0,0 +1 @@
+3556	37
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer1-11-b1e2ade89ae898650f0be4f796d8947b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-11-b1e2ade89ae898650f0be4f796d8947b
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer1-12-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-12-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/combine3-19-e30d6cd92dc5a7a86fb2b9154497b04f b/sql/hive/src/test/resources/golden/correlationoptimizer1-13-f9f839aedb3a350719c0cbc53a06ace5
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-19-e30d6cd92dc5a7a86fb2b9154497b04f
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-13-f9f839aedb3a350719c0cbc53a06ace5
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-14-dae4256e08d595317f8e09a56354a3d9 b/sql/hive/src/test/resources/golden/correlationoptimizer1-14-dae4256e08d595317f8e09a56354a3d9
new file mode 100644
index 0000000000000..235736a2807b6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-14-dae4256e08d595317f8e09a56354a3d9
@@ -0,0 +1 @@
+3556	15
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer1-15-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-15-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/combine3-20-5bd4bb8b05f31b14bbc59287dff01ffd b/sql/hive/src/test/resources/golden/correlationoptimizer1-16-43f356b36962f2bade5706d8cf5ae6b4
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-20-5bd4bb8b05f31b14bbc59287dff01ffd
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-16-43f356b36962f2bade5706d8cf5ae6b4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-17-dae4256e08d595317f8e09a56354a3d9 b/sql/hive/src/test/resources/golden/correlationoptimizer1-17-dae4256e08d595317f8e09a56354a3d9
new file mode 100644
index 0000000000000..235736a2807b6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-17-dae4256e08d595317f8e09a56354a3d9
@@ -0,0 +1 @@
+3556	15
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer1-18-b1e2ade89ae898650f0be4f796d8947b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-18-b1e2ade89ae898650f0be4f796d8947b
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer1-19-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-19-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/combine3-22-11025483569617a9f014b5defd71e933 b/sql/hive/src/test/resources/golden/correlationoptimizer1-2-42a0eedc3751f792ad5438b2c64d3897
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-22-11025483569617a9f014b5defd71e933
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-2-42a0eedc3751f792ad5438b2c64d3897
diff --git a/sql/hive/src/test/resources/golden/combine3-23-4725c48df09565618cbffd05953a5f62 b/sql/hive/src/test/resources/golden/correlationoptimizer1-20-b4c1e27a7c1f61a3e9ae07c80e6e2973
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-23-4725c48df09565618cbffd05953a5f62
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-20-b4c1e27a7c1f61a3e9ae07c80e6e2973
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-21-16c57348be42ca3cc2f80f7f92265696 b/sql/hive/src/test/resources/golden/correlationoptimizer1-21-16c57348be42ca3cc2f80f7f92265696
new file mode 100644
index 0000000000000..76c4941de407d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-21-16c57348be42ca3cc2f80f7f92265696
@@ -0,0 +1 @@
+3556	47
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer1-22-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-22-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/combine3-6-4725c48df09565618cbffd05953a5f62 b/sql/hive/src/test/resources/golden/correlationoptimizer1-23-2cdc77fd60449f3547cf95d8eb09a2
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-6-4725c48df09565618cbffd05953a5f62
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-23-2cdc77fd60449f3547cf95d8eb09a2
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-24-16c57348be42ca3cc2f80f7f92265696 b/sql/hive/src/test/resources/golden/correlationoptimizer1-24-16c57348be42ca3cc2f80f7f92265696
new file mode 100644
index 0000000000000..76c4941de407d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-24-16c57348be42ca3cc2f80f7f92265696
@@ -0,0 +1 @@
+3556	47
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer1-25-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-25-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/combine3-7-53a5c509ebc9ee8458f27cc9bac46d00 b/sql/hive/src/test/resources/golden/correlationoptimizer1-26-5522db58d123d1bec48b6e71c1b258f3
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-7-53a5c509ebc9ee8458f27cc9bac46d00
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-26-5522db58d123d1bec48b6e71c1b258f3
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-27-d31433f229e853e8b8440b4ddc63c80e b/sql/hive/src/test/resources/golden/correlationoptimizer1-27-d31433f229e853e8b8440b4ddc63c80e
new file mode 100644
index 0000000000000..76c4941de407d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-27-d31433f229e853e8b8440b4ddc63c80e
@@ -0,0 +1 @@
+3556	47
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer1-28-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-28-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/combine3-8-68399bc39f71ddc99ed09ed9d2fd897b b/sql/hive/src/test/resources/golden/correlationoptimizer1-29-941ecfef9448ecff56cc16bcfb233ee4
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-8-68399bc39f71ddc99ed09ed9d2fd897b
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-29-941ecfef9448ecff56cc16bcfb233ee4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-3-5d712a42dcc1c4f7c797dabda5eb7b3a b/sql/hive/src/test/resources/golden/correlationoptimizer1-3-5d712a42dcc1c4f7c797dabda5eb7b3a
new file mode 100644
index 0000000000000..58010b0040b74
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-3-5d712a42dcc1c4f7c797dabda5eb7b3a
@@ -0,0 +1 @@
+3556	37
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-30-d31433f229e853e8b8440b4ddc63c80e b/sql/hive/src/test/resources/golden/correlationoptimizer1-30-d31433f229e853e8b8440b4ddc63c80e
new file mode 100644
index 0000000000000..76c4941de407d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-30-d31433f229e853e8b8440b4ddc63c80e
@@ -0,0 +1 @@
+3556	47
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer1-31-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-31-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/combine3-9-b5703b76bbe99c41cbb63582a09a6e69 b/sql/hive/src/test/resources/golden/correlationoptimizer1-32-76aad6bc7d7e4a28c33aca1f0ba30e65
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-9-b5703b76bbe99c41cbb63582a09a6e69
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-32-76aad6bc7d7e4a28c33aca1f0ba30e65
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-33-7722bcc896718b584f78cecdab1fdc9f b/sql/hive/src/test/resources/golden/correlationoptimizer1-33-7722bcc896718b584f78cecdab1fdc9f
new file mode 100644
index 0000000000000..fb15947b3d0bb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-33-7722bcc896718b584f78cecdab1fdc9f
@@ -0,0 +1,16 @@
+NULL	NULL	10
+66	val_66	1
+98	val_98	2
+128	NULL	1
+146	val_146	2
+150	val_150	1
+213	val_213	2
+224	NULL	1
+238	val_238	2
+255	val_255	2
+273	val_273	3
+278	val_278	2
+311	val_311	3
+369	NULL	1
+401	val_401	5
+406	val_406	4
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer1-34-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-34-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/compute_stats_binary-1-8e576a57fc67a7fa78ce1d8c8a63a043 b/sql/hive/src/test/resources/golden/correlationoptimizer1-35-e1d4857548e626bb31d70c096b8d0a95
similarity index 100%
rename from sql/hive/src/test/resources/golden/compute_stats_binary-1-8e576a57fc67a7fa78ce1d8c8a63a043
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-35-e1d4857548e626bb31d70c096b8d0a95
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-36-7722bcc896718b584f78cecdab1fdc9f b/sql/hive/src/test/resources/golden/correlationoptimizer1-36-7722bcc896718b584f78cecdab1fdc9f
new file mode 100644
index 0000000000000..fb15947b3d0bb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-36-7722bcc896718b584f78cecdab1fdc9f
@@ -0,0 +1,16 @@
+NULL	NULL	10
+66	val_66	1
+98	val_98	2
+128	NULL	1
+146	val_146	2
+150	val_150	1
+213	val_213	2
+224	NULL	1
+238	val_238	2
+255	val_255	2
+273	val_273	3
+278	val_278	2
+311	val_311	3
+369	NULL	1
+401	val_401	5
+406	val_406	4
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer1-37-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-37-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/compute_stats_boolean-1-8300537a2a508b3390c3172cd69c69b5 b/sql/hive/src/test/resources/golden/correlationoptimizer1-38-ef6502d6b282c8a6d228bba395b24724
similarity index 100%
rename from sql/hive/src/test/resources/golden/compute_stats_boolean-1-8300537a2a508b3390c3172cd69c69b5
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-38-ef6502d6b282c8a6d228bba395b24724
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-39-ea87e76dba02a46cb958148333e397b7 b/sql/hive/src/test/resources/golden/correlationoptimizer1-39-ea87e76dba02a46cb958148333e397b7
new file mode 100644
index 0000000000000..5aa2d482094af
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-39-ea87e76dba02a46cb958148333e397b7
@@ -0,0 +1 @@
+79136	500
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer1-4-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-4-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer1-40-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-40-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/compute_stats_double-1-a23a25a680139ed823c77f3f9f486065 b/sql/hive/src/test/resources/golden/correlationoptimizer1-41-b79b220859c09354e23b533c105ccbab
similarity index 100%
rename from sql/hive/src/test/resources/golden/compute_stats_double-1-a23a25a680139ed823c77f3f9f486065
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-41-b79b220859c09354e23b533c105ccbab
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-42-ea87e76dba02a46cb958148333e397b7 b/sql/hive/src/test/resources/golden/correlationoptimizer1-42-ea87e76dba02a46cb958148333e397b7
new file mode 100644
index 0000000000000..5aa2d482094af
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-42-ea87e76dba02a46cb958148333e397b7
@@ -0,0 +1 @@
+79136	500
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer1-43-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-43-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/compute_stats_long-1-a7bc730f9862198709539e35c0208248 b/sql/hive/src/test/resources/golden/correlationoptimizer1-44-638e5300f4c892c2bf27bd91a8f81b64
similarity index 100%
rename from sql/hive/src/test/resources/golden/compute_stats_long-1-a7bc730f9862198709539e35c0208248
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-44-638e5300f4c892c2bf27bd91a8f81b64
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-45-66010469a9cdb66851da9a727ef9fdad b/sql/hive/src/test/resources/golden/correlationoptimizer1-45-66010469a9cdb66851da9a727ef9fdad
new file mode 100644
index 0000000000000..b4a3a9d327f47
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-45-66010469a9cdb66851da9a727ef9fdad
@@ -0,0 +1 @@
+3556	500
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer1-46-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-46-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/compute_stats_string-1-3bddaed6594ed44249e4a30c43e83d1f b/sql/hive/src/test/resources/golden/correlationoptimizer1-47-3514c74c7f68f2d70cc6d51ac46c20
similarity index 100%
rename from sql/hive/src/test/resources/golden/compute_stats_string-1-3bddaed6594ed44249e4a30c43e83d1f
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-47-3514c74c7f68f2d70cc6d51ac46c20
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-48-66010469a9cdb66851da9a727ef9fdad b/sql/hive/src/test/resources/golden/correlationoptimizer1-48-66010469a9cdb66851da9a727ef9fdad
new file mode 100644
index 0000000000000..b4a3a9d327f47
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-48-66010469a9cdb66851da9a727ef9fdad
@@ -0,0 +1 @@
+3556	500
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer1-49-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-49-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-1-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/correlationoptimizer1-5-a1c80c68b9a7597096c2580c3766f7f7
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer4-1-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-5-a1c80c68b9a7597096c2580c3766f7f7
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-3-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/correlationoptimizer1-50-7490df6719cd7e47aa08dbcbc3266a92
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer4-3-4abc4f450a58ccdd0df2e345f1276979
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-50-7490df6719cd7e47aa08dbcbc3266a92
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-51-e71195e7d9f557e2abc7f03462d22dba b/sql/hive/src/test/resources/golden/correlationoptimizer1-51-e71195e7d9f557e2abc7f03462d22dba
new file mode 100644
index 0000000000000..bb564e0fd06eb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-51-e71195e7d9f557e2abc7f03462d22dba
@@ -0,0 +1 @@
+3556	510
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer1-52-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-52-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-5-c44e5ccbff08c59a5dc1b74306835cd9 b/sql/hive/src/test/resources/golden/correlationoptimizer1-53-73da9fe2b0c2ee26c021ec3f2fa27272
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer4-5-c44e5ccbff08c59a5dc1b74306835cd9
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-53-73da9fe2b0c2ee26c021ec3f2fa27272
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-54-e71195e7d9f557e2abc7f03462d22dba b/sql/hive/src/test/resources/golden/correlationoptimizer1-54-e71195e7d9f557e2abc7f03462d22dba
new file mode 100644
index 0000000000000..bb564e0fd06eb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-54-e71195e7d9f557e2abc7f03462d22dba
@@ -0,0 +1 @@
+3556	510
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer1-55-b1e2ade89ae898650f0be4f796d8947b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-55-b1e2ade89ae898650f0be4f796d8947b
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer1-56-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-56-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/count-1-3531872d964bc2b4f07d51b9d1629df2 b/sql/hive/src/test/resources/golden/correlationoptimizer1-57-fcf9bcb522f542637ccdea863b408448
similarity index 100%
rename from sql/hive/src/test/resources/golden/count-1-3531872d964bc2b4f07d51b9d1629df2
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-57-fcf9bcb522f542637ccdea863b408448
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-58-3070366869308907e54797927805603 b/sql/hive/src/test/resources/golden/correlationoptimizer1-58-3070366869308907e54797927805603
new file mode 100644
index 0000000000000..edd216f16b190
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-58-3070366869308907e54797927805603
@@ -0,0 +1 @@
+3556	661329102	37
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer1-59-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-59-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-6-5d712a42dcc1c4f7c797dabda5eb7b3a b/sql/hive/src/test/resources/golden/correlationoptimizer1-6-5d712a42dcc1c4f7c797dabda5eb7b3a
new file mode 100644
index 0000000000000..58010b0040b74
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-6-5d712a42dcc1c4f7c797dabda5eb7b3a
@@ -0,0 +1 @@
+3556	37
diff --git a/sql/hive/src/test/resources/golden/create table as with db name-0-b7dfeb6a941b42f7def5fdceae99f425 b/sql/hive/src/test/resources/golden/correlationoptimizer1-60-dad56e1f06c808b29e5dc8fb0c49efb2
similarity index 100%
rename from sql/hive/src/test/resources/golden/create table as with db name-0-b7dfeb6a941b42f7def5fdceae99f425
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-60-dad56e1f06c808b29e5dc8fb0c49efb2
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-61-3070366869308907e54797927805603 b/sql/hive/src/test/resources/golden/correlationoptimizer1-61-3070366869308907e54797927805603
new file mode 100644
index 0000000000000..edd216f16b190
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-61-3070366869308907e54797927805603
@@ -0,0 +1 @@
+3556	661329102	37
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer1-62-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-62-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/create_1-1-c2351f011b8ea41ff7dfa8f195148da3 b/sql/hive/src/test/resources/golden/correlationoptimizer1-63-3cd3fbbbd8ee5c274fe3d6a45126cef4
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-1-c2351f011b8ea41ff7dfa8f195148da3
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-63-3cd3fbbbd8ee5c274fe3d6a45126cef4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-64-a6bba6d9b422adb386b35c62cecb548 b/sql/hive/src/test/resources/golden/correlationoptimizer1-64-a6bba6d9b422adb386b35c62cecb548
new file mode 100644
index 0000000000000..9fde6099b4f08
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-64-a6bba6d9b422adb386b35c62cecb548
@@ -0,0 +1 @@
+2835	29
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer1-65-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-65-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/create_1-11-7daaeabd9c286e511e0628a32dc714d5 b/sql/hive/src/test/resources/golden/correlationoptimizer1-66-d6bbaf0d40010159095e4cac025c50c5
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-11-7daaeabd9c286e511e0628a32dc714d5
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-66-d6bbaf0d40010159095e4cac025c50c5
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-67-a6bba6d9b422adb386b35c62cecb548 b/sql/hive/src/test/resources/golden/correlationoptimizer1-67-a6bba6d9b422adb386b35c62cecb548
new file mode 100644
index 0000000000000..9fde6099b4f08
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-67-a6bba6d9b422adb386b35c62cecb548
@@ -0,0 +1 @@
+2835	29
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer1-7-24ca942f094b14b92086305cc125e833
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-7-24ca942f094b14b92086305cc125e833
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer1-8-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-8-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/create_1-14-437bd1acbae61f48e851addb769d3379 b/sql/hive/src/test/resources/golden/correlationoptimizer1-9-d5bea91b4edb8be0428a336ff9c21dde
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-14-437bd1acbae61f48e851addb769d3379
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-9-d5bea91b4edb8be0428a336ff9c21dde
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer10-0-b1e2ade89ae898650f0be4f796d8947b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-0-b1e2ade89ae898650f0be4f796d8947b
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer10-1-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-1-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer10-10-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_08_nonpart_rename-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-10-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/create_1-4-610b82bf7b0080d293977927e5ef780c b/sql/hive/src/test/resources/golden/correlationoptimizer10-11-a1b7af95dfd01783c07aa23208d6160
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-4-610b82bf7b0080d293977927e5ef780c
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-11-a1b7af95dfd01783c07aa23208d6160
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer10-12-1322cff0bdf29aab32e638ad48c71ff9 b/sql/hive/src/test/resources/golden/correlationoptimizer10-12-1322cff0bdf29aab32e638ad48c71ff9
new file mode 100644
index 0000000000000..9ea431d9f5d18
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer10-12-1322cff0bdf29aab32e638ad48c71ff9
@@ -0,0 +1,5 @@
+66	val_66
+98	val_98
+128	
+146	val_146
+150	val_150
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer10-13-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_08_nonpart_rename-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-13-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/create_1-5-c77b018276b1558c1d9462e0625e152e b/sql/hive/src/test/resources/golden/correlationoptimizer10-14-934b668c11600dc9c013c2ddc4c0d68c
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-5-c77b018276b1558c1d9462e0625e152e
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-14-934b668c11600dc9c013c2ddc4c0d68c
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer10-15-430ff20a144fb3dbf526232d9cb2baa b/sql/hive/src/test/resources/golden/correlationoptimizer10-15-430ff20a144fb3dbf526232d9cb2baa
new file mode 100644
index 0000000000000..5abecc5df25fd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer10-15-430ff20a144fb3dbf526232d9cb2baa
@@ -0,0 +1,23 @@
+181	val_181
+183	val_183
+186	val_186
+187	val_187
+187	val_187
+187	val_187
+189	val_189
+190	val_190
+191	val_191
+191	val_191
+192	val_192
+193	val_193
+193	val_193
+193	val_193
+194	val_194
+195	val_195
+195	val_195
+196	val_196
+197	val_197
+197	val_197
+199	val_199
+199	val_199
+199	val_199
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-2-ad7877a96aba7cd6e29edc19f4f0b394 b/sql/hive/src/test/resources/golden/correlationoptimizer10-16-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_08_nonpart_rename-2-ad7877a96aba7cd6e29edc19f4f0b394
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-16-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/create_1-8-19331fe6a2a35f1171babfe4e1c86f59 b/sql/hive/src/test/resources/golden/correlationoptimizer10-17-9d5521ecef1353d23fd2d4f7d78e7006
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-8-19331fe6a2a35f1171babfe4e1c86f59
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-17-9d5521ecef1353d23fd2d4f7d78e7006
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer10-18-430ff20a144fb3dbf526232d9cb2baa b/sql/hive/src/test/resources/golden/correlationoptimizer10-18-430ff20a144fb3dbf526232d9cb2baa
new file mode 100644
index 0000000000000..5abecc5df25fd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer10-18-430ff20a144fb3dbf526232d9cb2baa
@@ -0,0 +1,23 @@
+181	val_181
+183	val_183
+186	val_186
+187	val_187
+187	val_187
+187	val_187
+189	val_189
+190	val_190
+191	val_191
+191	val_191
+192	val_192
+193	val_193
+193	val_193
+193	val_193
+194	val_194
+195	val_195
+195	val_195
+196	val_196
+197	val_197
+197	val_197
+199	val_199
+199	val_199
+199	val_199
diff --git a/sql/hive/src/test/resources/golden/create_big_view-0-bdf3c2e20793ef833f336a40791091d b/sql/hive/src/test/resources/golden/correlationoptimizer10-2-f9de06a4184ab1f42793327c1497437
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_big_view-0-bdf3c2e20793ef833f336a40791091d
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-2-f9de06a4184ab1f42793327c1497437
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer10-3-6a01aa7ca94cda4268af894b4fd852ea b/sql/hive/src/test/resources/golden/correlationoptimizer10-3-6a01aa7ca94cda4268af894b4fd852ea
new file mode 100644
index 0000000000000..d00aeb4be0340
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer10-3-6a01aa7ca94cda4268af894b4fd852ea
@@ -0,0 +1,15 @@
+66	1
+98	1
+128	1
+146	1
+150	1
+213	1
+224	1
+238	1
+255	1
+273	1
+278	1
+311	1
+369	1
+401	1
+406	1
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer10-4-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-4-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/create_big_view-1-fbd4b50d4d80d2a927a0db5037c62bee b/sql/hive/src/test/resources/golden/correlationoptimizer10-5-6f191930802d659058465b2e6de08dd3
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_big_view-1-fbd4b50d4d80d2a927a0db5037c62bee
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-5-6f191930802d659058465b2e6de08dd3
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer10-6-6a01aa7ca94cda4268af894b4fd852ea b/sql/hive/src/test/resources/golden/correlationoptimizer10-6-6a01aa7ca94cda4268af894b4fd852ea
new file mode 100644
index 0000000000000..d00aeb4be0340
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer10-6-6a01aa7ca94cda4268af894b4fd852ea
@@ -0,0 +1,15 @@
+66	1
+98	1
+128	1
+146	1
+150	1
+213	1
+224	1
+238	1
+255	1
+273	1
+278	1
+311	1
+369	1
+401	1
+406	1
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer10-7-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-7-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/create_escape-0-3e860648a3f2f258d59a62fd0bbe1c3e b/sql/hive/src/test/resources/golden/correlationoptimizer10-8-b2c8d0056b9f1b41cdaeaab4a1a5c9ec
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_escape-0-3e860648a3f2f258d59a62fd0bbe1c3e
rename to sql/hive/src/test/resources/golden/correlationoptimizer10-8-b2c8d0056b9f1b41cdaeaab4a1a5c9ec
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer10-9-1322cff0bdf29aab32e638ad48c71ff9 b/sql/hive/src/test/resources/golden/correlationoptimizer10-9-1322cff0bdf29aab32e638ad48c71ff9
new file mode 100644
index 0000000000000..9ea431d9f5d18
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer10-9-1322cff0bdf29aab32e638ad48c71ff9
@@ -0,0 +1,5 @@
+66	val_66
+98	val_98
+128	
+146	val_146
+150	val_150
diff --git a/sql/hive/src/test/resources/golden/create_escape-3-9541399cde42210bd7ac1beb07ceb14 b/sql/hive/src/test/resources/golden/correlationoptimizer13-0-efd135a811fa94760736a761d220b82
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_escape-3-9541399cde42210bd7ac1beb07ceb14
rename to sql/hive/src/test/resources/golden/correlationoptimizer13-0-efd135a811fa94760736a761d220b82
diff --git a/sql/hive/src/test/resources/golden/create_genericudaf-1-c7f934e9c76350a0d3caa694463a673b b/sql/hive/src/test/resources/golden/correlationoptimizer13-1-32a82500cc28465fac6f64dde0c431c6
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_genericudaf-1-c7f934e9c76350a0d3caa694463a673b
rename to sql/hive/src/test/resources/golden/correlationoptimizer13-1-32a82500cc28465fac6f64dde0c431c6
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer13-2-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer13-2-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/create_genericudaf-2-84f898ffd668a17fe2ef081866d1dcd2 b/sql/hive/src/test/resources/golden/correlationoptimizer13-3-bb61d9292434f37bd386e5bff683764d
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_genericudaf-2-84f898ffd668a17fe2ef081866d1dcd2
rename to sql/hive/src/test/resources/golden/correlationoptimizer13-3-bb61d9292434f37bd386e5bff683764d
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-0-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer15-0-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-0-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-0-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer15-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-3-e149747103059314a9984235702b24b6 b/sql/hive/src/test/resources/golden/correlationoptimizer15-3-e149747103059314a9984235702b24b6
index 0b1ee37d7831c..6eebe80953bf0 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-3-e149747103059314a9984235702b24b6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-3-e149747103059314a9984235702b24b6
@@ -34,4 +34,4 @@
 406	1	406
 406	1	406
 406	1	406
-406	1	406
\ No newline at end of file
+406	1	406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer15-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-5-9914f44ecb6ae7587b62e5349ff60d04 b/sql/hive/src/test/resources/golden/correlationoptimizer15-5-9914f44ecb6ae7587b62e5349ff60d04
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-5-9914f44ecb6ae7587b62e5349ff60d04
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-5-9914f44ecb6ae7587b62e5349ff60d04
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-7-e149747103059314a9984235702b24b6 b/sql/hive/src/test/resources/golden/correlationoptimizer15-7-e149747103059314a9984235702b24b6
index 0b1ee37d7831c..6eebe80953bf0 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-7-e149747103059314a9984235702b24b6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-7-e149747103059314a9984235702b24b6
@@ -34,4 +34,4 @@
 406	1	406
 406	1	406
 406	1	406
-406	1	406
\ No newline at end of file
+406	1	406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-0-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer2-0-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-0-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-0-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-10-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-10-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-10-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-10-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-12-e6b368bfaea4d2838f8038b3bd29db06 b/sql/hive/src/test/resources/golden/correlationoptimizer2-12-e6b368bfaea4d2838f8038b3bd29db06
index 6c6dc691bbff2..e7c8cc75a0d6c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-12-e6b368bfaea4d2838f8038b3bd29db06
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-12-e6b368bfaea4d2838f8038b3bd29db06
@@ -1 +1 @@
-79136	500	3556	15
\ No newline at end of file
+79136	500	3556	15
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-13-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-13-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-13-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-13-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-15-18f10d12e8bfa473a916c2f528500538 b/sql/hive/src/test/resources/golden/correlationoptimizer2-15-18f10d12e8bfa473a916c2f528500538
index f4bb720dfd7f8..96d1ad9dd0559 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-15-18f10d12e8bfa473a916c2f528500538
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-15-18f10d12e8bfa473a916c2f528500538
@@ -1 +1 @@
-3556	37	3556	25
\ No newline at end of file
+3556	37	3556	25
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-16-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-16-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-16-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-16-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-18-18f10d12e8bfa473a916c2f528500538 b/sql/hive/src/test/resources/golden/correlationoptimizer2-18-18f10d12e8bfa473a916c2f528500538
index f4bb720dfd7f8..96d1ad9dd0559 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-18-18f10d12e8bfa473a916c2f528500538
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-18-18f10d12e8bfa473a916c2f528500538
@@ -1 +1 @@
-3556	37	3556	25
\ No newline at end of file
+3556	37	3556	25
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-19-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-19-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-19-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-19-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-21-c0c5744805b82aa8a6e3a62dfdb8097e b/sql/hive/src/test/resources/golden/correlationoptimizer2-21-c0c5744805b82aa8a6e3a62dfdb8097e
index 4acbb60e81661..716f95a30304b 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-21-c0c5744805b82aa8a6e3a62dfdb8097e
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-21-c0c5744805b82aa8a6e3a62dfdb8097e
@@ -1 +1 @@
-79136	500	3556	25
\ No newline at end of file
+79136	500	3556	25
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-22-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-22-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-22-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-22-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-24-c0c5744805b82aa8a6e3a62dfdb8097e b/sql/hive/src/test/resources/golden/correlationoptimizer2-24-c0c5744805b82aa8a6e3a62dfdb8097e
index 4acbb60e81661..716f95a30304b 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-24-c0c5744805b82aa8a6e3a62dfdb8097e
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-24-c0c5744805b82aa8a6e3a62dfdb8097e
@@ -1 +1 @@
-79136	500	3556	25
\ No newline at end of file
+79136	500	3556	25
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-25-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-25-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-25-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-25-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-27-9b22dad2843cdc379d90687745561104 b/sql/hive/src/test/resources/golden/correlationoptimizer2-27-9b22dad2843cdc379d90687745561104
index a95fce80fd7b4..3821ee1926f17 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-27-9b22dad2843cdc379d90687745561104
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-27-9b22dad2843cdc379d90687745561104
@@ -1 +1 @@
-79136	310
\ No newline at end of file
+79136	310
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-28-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-28-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-28-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-28-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-3-d915fbdd493869aec42f548bdb66598d b/sql/hive/src/test/resources/golden/correlationoptimizer2-3-d915fbdd493869aec42f548bdb66598d
index c6243d7056353..7e1b7f7408e2d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-3-d915fbdd493869aec42f548bdb66598d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-3-d915fbdd493869aec42f548bdb66598d
@@ -1 +1 @@
-3556	37	3556	15
\ No newline at end of file
+3556	37	3556	15
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-30-9b22dad2843cdc379d90687745561104 b/sql/hive/src/test/resources/golden/correlationoptimizer2-30-9b22dad2843cdc379d90687745561104
index a95fce80fd7b4..3821ee1926f17 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-30-9b22dad2843cdc379d90687745561104
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-30-9b22dad2843cdc379d90687745561104
@@ -1 +1 @@
-79136	310
\ No newline at end of file
+79136	310
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-31-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-31-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-31-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-31-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-33-2b9eddc3452d8fc24ae9273e5d522e4b b/sql/hive/src/test/resources/golden/correlationoptimizer2-33-2b9eddc3452d8fc24ae9273e5d522e4b
index 556b77ecfc9eb..a8707661c9399 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-33-2b9eddc3452d8fc24ae9273e5d522e4b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-33-2b9eddc3452d8fc24ae9273e5d522e4b
@@ -1 +1 @@
-9992	3531902962	9992	37
\ No newline at end of file
+9992	3531902962	9992	37
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-34-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-34-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-34-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-34-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-36-2b9eddc3452d8fc24ae9273e5d522e4b b/sql/hive/src/test/resources/golden/correlationoptimizer2-36-2b9eddc3452d8fc24ae9273e5d522e4b
index 556b77ecfc9eb..a8707661c9399 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-36-2b9eddc3452d8fc24ae9273e5d522e4b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-36-2b9eddc3452d8fc24ae9273e5d522e4b
@@ -1 +1 @@
-9992	3531902962	9992	37
\ No newline at end of file
+9992	3531902962	9992	37
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-6-d915fbdd493869aec42f548bdb66598d b/sql/hive/src/test/resources/golden/correlationoptimizer2-6-d915fbdd493869aec42f548bdb66598d
index c6243d7056353..7e1b7f7408e2d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-6-d915fbdd493869aec42f548bdb66598d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-6-d915fbdd493869aec42f548bdb66598d
@@ -1 +1 @@
-3556	37	3556	15
\ No newline at end of file
+3556	37	3556	15
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-7-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-7-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-7-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-7-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-9-e6b368bfaea4d2838f8038b3bd29db06 b/sql/hive/src/test/resources/golden/correlationoptimizer2-9-e6b368bfaea4d2838f8038b3bd29db06
index 6c6dc691bbff2..e7c8cc75a0d6c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-9-e6b368bfaea4d2838f8038b3bd29db06
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-9-e6b368bfaea4d2838f8038b3bd29db06
@@ -1 +1 @@
-79136	500	3556	15
\ No newline at end of file
+79136	500	3556	15
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-0-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer3-0-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-0-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-0-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer3-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-10-e3d5ff08760b877d49c0f10f63776325 b/sql/hive/src/test/resources/golden/correlationoptimizer3-10-e3d5ff08760b877d49c0f10f63776325
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-10-e3d5ff08760b877d49c0f10f63776325
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-10-e3d5ff08760b877d49c0f10f63776325
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-11-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer3-11-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-11-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-11-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-12-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer3-12-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-12-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-12-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-14-1f13e03988991067d13a9f3f1b36fcf5 b/sql/hive/src/test/resources/golden/correlationoptimizer3-14-1f13e03988991067d13a9f3f1b36fcf5
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-14-1f13e03988991067d13a9f3f1b36fcf5
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-14-1f13e03988991067d13a9f3f1b36fcf5
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-15-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer3-15-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-15-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-15-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-17-1f13e03988991067d13a9f3f1b36fcf5 b/sql/hive/src/test/resources/golden/correlationoptimizer3-17-1f13e03988991067d13a9f3f1b36fcf5
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-17-1f13e03988991067d13a9f3f1b36fcf5
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-17-1f13e03988991067d13a9f3f1b36fcf5
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-18-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer3-18-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-18-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-18-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-19-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer3-19-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-19-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-19-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-21-1f13e03988991067d13a9f3f1b36fcf5 b/sql/hive/src/test/resources/golden/correlationoptimizer3-21-1f13e03988991067d13a9f3f1b36fcf5
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-21-1f13e03988991067d13a9f3f1b36fcf5
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-21-1f13e03988991067d13a9f3f1b36fcf5
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-3-e3d5ff08760b877d49c0f10f63776325 b/sql/hive/src/test/resources/golden/correlationoptimizer3-3-e3d5ff08760b877d49c0f10f63776325
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-3-e3d5ff08760b877d49c0f10f63776325
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-3-e3d5ff08760b877d49c0f10f63776325
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer3-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-6-e3d5ff08760b877d49c0f10f63776325 b/sql/hive/src/test/resources/golden/correlationoptimizer3-6-e3d5ff08760b877d49c0f10f63776325
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-6-e3d5ff08760b877d49c0f10f63776325
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-6-e3d5ff08760b877d49c0f10f63776325
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-7-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer3-7-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-7-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-7-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-8-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer3-8-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-8-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-8-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/create_genericudaf-4-c64a3266b9a1c3383cc56bd883345c1 b/sql/hive/src/test/resources/golden/correlationoptimizer4-1-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_genericudaf-4-c64a3266b9a1c3383cc56bd883345c1
rename to sql/hive/src/test/resources/golden/correlationoptimizer4-1-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-10-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-10-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-10-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-10-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-12-340cf26bcac4ee29bdf8fd588ddc3c2e b/sql/hive/src/test/resources/golden/correlationoptimizer4-12-340cf26bcac4ee29bdf8fd588ddc3c2e
index 14e309fdcad89..281e236e8a80d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-12-340cf26bcac4ee29bdf8fd588ddc3c2e
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-12-340cf26bcac4ee29bdf8fd588ddc3c2e
@@ -1 +1 @@
-13	10
\ No newline at end of file
+13	10
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-13-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-13-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-13-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-13-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-14-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer4-14-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-14-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-14-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-16-340cf26bcac4ee29bdf8fd588ddc3c2e b/sql/hive/src/test/resources/golden/correlationoptimizer4-16-340cf26bcac4ee29bdf8fd588ddc3c2e
index 14e309fdcad89..281e236e8a80d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-16-340cf26bcac4ee29bdf8fd588ddc3c2e
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-16-340cf26bcac4ee29bdf8fd588ddc3c2e
@@ -1 +1 @@
-13	10
\ No newline at end of file
+13	10
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-17-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer4-17-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-17-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-17-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-18-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer4-18-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-18-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-18-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-20-6ad79a473a876923a247f0cb57bb4208 b/sql/hive/src/test/resources/golden/correlationoptimizer4-20-6ad79a473a876923a247f0cb57bb4208
index 1b8c137073e37..1ed2737b0d1fb 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-20-6ad79a473a876923a247f0cb57bb4208
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-20-6ad79a473a876923a247f0cb57bb4208
@@ -1 +1 @@
-22	12
\ No newline at end of file
+22	12
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-21-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-21-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-21-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-21-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-23-6ad79a473a876923a247f0cb57bb4208 b/sql/hive/src/test/resources/golden/correlationoptimizer4-23-6ad79a473a876923a247f0cb57bb4208
index 1b8c137073e37..1ed2737b0d1fb 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-23-6ad79a473a876923a247f0cb57bb4208
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-23-6ad79a473a876923a247f0cb57bb4208
@@ -1 +1 @@
-22	12
\ No newline at end of file
+22	12
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-24-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-24-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-24-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-24-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-26-62a0fd05be48759c39f3c284458dde9b b/sql/hive/src/test/resources/golden/correlationoptimizer4-26-62a0fd05be48759c39f3c284458dde9b
index e6c95e600a2c0..35b3da1e4da5c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-26-62a0fd05be48759c39f3c284458dde9b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-26-62a0fd05be48759c39f3c284458dde9b
@@ -1 +1 @@
-13	12
\ No newline at end of file
+13	12
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-27-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer4-27-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-27-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-27-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-29-6fa624d24bcc899e11dbe8b19b0d5cbf b/sql/hive/src/test/resources/golden/correlationoptimizer4-29-6fa624d24bcc899e11dbe8b19b0d5cbf
index 0248cc90cb2c6..7b6dfccea7a0c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-29-6fa624d24bcc899e11dbe8b19b0d5cbf
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-29-6fa624d24bcc899e11dbe8b19b0d5cbf
@@ -1 +1 @@
-21	12
\ No newline at end of file
+21	12
diff --git a/sql/hive/src/test/resources/golden/create_like-0-c2351f011b8ea41ff7dfa8f195148da3 b/sql/hive/src/test/resources/golden/correlationoptimizer4-3-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-0-c2351f011b8ea41ff7dfa8f195148da3
rename to sql/hive/src/test/resources/golden/correlationoptimizer4-3-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-30-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-30-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-30-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-30-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-32-6fa624d24bcc899e11dbe8b19b0d5cbf b/sql/hive/src/test/resources/golden/correlationoptimizer4-32-6fa624d24bcc899e11dbe8b19b0d5cbf
index 0248cc90cb2c6..7b6dfccea7a0c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-32-6fa624d24bcc899e11dbe8b19b0d5cbf
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-32-6fa624d24bcc899e11dbe8b19b0d5cbf
@@ -1 +1 @@
-21	12
\ No newline at end of file
+21	12
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-33-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-33-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-33-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-33-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-35-64d8e7807af6fc5a0214675a7c6a3be8 b/sql/hive/src/test/resources/golden/correlationoptimizer4-35-64d8e7807af6fc5a0214675a7c6a3be8
index 0248cc90cb2c6..7b6dfccea7a0c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-35-64d8e7807af6fc5a0214675a7c6a3be8
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-35-64d8e7807af6fc5a0214675a7c6a3be8
@@ -1 +1 @@
-21	12
\ No newline at end of file
+21	12
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-36-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer4-36-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-36-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-36-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-38-85fff71b240d0e26bab91d670c1349f2 b/sql/hive/src/test/resources/golden/correlationoptimizer4-38-85fff71b240d0e26bab91d670c1349f2
index fd3a42ebe67e7..60ee3e8737989 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-38-85fff71b240d0e26bab91d670c1349f2
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-38-85fff71b240d0e26bab91d670c1349f2
@@ -1 +1 @@
-21	14
\ No newline at end of file
+21	14
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-39-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-39-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-39-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-39-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-41-85fff71b240d0e26bab91d670c1349f2 b/sql/hive/src/test/resources/golden/correlationoptimizer4-41-85fff71b240d0e26bab91d670c1349f2
index fd3a42ebe67e7..60ee3e8737989 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-41-85fff71b240d0e26bab91d670c1349f2
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-41-85fff71b240d0e26bab91d670c1349f2
@@ -1 +1 @@
-21	14
\ No newline at end of file
+21	14
diff --git a/sql/hive/src/test/resources/golden/create_like-2-a20451f152e68606cc5e373fe5fd86a b/sql/hive/src/test/resources/golden/correlationoptimizer4-5-b5777cff7c522c4b527f77988e7f6bf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-2-a20451f152e68606cc5e373fe5fd86a
rename to sql/hive/src/test/resources/golden/correlationoptimizer4-5-b5777cff7c522c4b527f77988e7f6bf1
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-6-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer4-6-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-6-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-6-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-7-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer4-7-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-7-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-7-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-9-340cf26bcac4ee29bdf8fd588ddc3c2e b/sql/hive/src/test/resources/golden/correlationoptimizer4-9-340cf26bcac4ee29bdf8fd588ddc3c2e
index 14e309fdcad89..281e236e8a80d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-9-340cf26bcac4ee29bdf8fd588ddc3c2e
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-9-340cf26bcac4ee29bdf8fd588ddc3c2e
@@ -1 +1 @@
-13	10
\ No newline at end of file
+13	10
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-0-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer6-0-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-0-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-0-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-10-25b1fe48ef86952fc7766fb74b63bf21 b/sql/hive/src/test/resources/golden/correlationoptimizer6-10-25b1fe48ef86952fc7766fb74b63bf21
index 19b8a2aea8f64..8f9dae31cc51c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-10-25b1fe48ef86952fc7766fb74b63bf21
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-10-25b1fe48ef86952fc7766fb74b63bf21
@@ -12,4 +12,4 @@
 311	1	311	9
 369	1	369	9
 401	1	401	25
-406	1	406	16
\ No newline at end of file
+406	1	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-11-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer6-11-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-11-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-11-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-12-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-12-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-12-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-12-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-14-84463190baec77d61b287a071c8886db b/sql/hive/src/test/resources/golden/correlationoptimizer6-14-84463190baec77d61b287a071c8886db
index c4a418f59625b..26151f7b6d0f1 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-14-84463190baec77d61b287a071c8886db
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-14-84463190baec77d61b287a071c8886db
@@ -12,4 +12,4 @@
 311	311	3
 369	369	3
 401	401	5
-406	406	4
\ No newline at end of file
+406	406	4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-15-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-15-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-15-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-15-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-17-84463190baec77d61b287a071c8886db b/sql/hive/src/test/resources/golden/correlationoptimizer6-17-84463190baec77d61b287a071c8886db
index c4a418f59625b..26151f7b6d0f1 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-17-84463190baec77d61b287a071c8886db
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-17-84463190baec77d61b287a071c8886db
@@ -12,4 +12,4 @@
 311	311	3
 369	369	3
 401	401	5
-406	406	4
\ No newline at end of file
+406	406	4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-18-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-18-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-18-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-18-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-20-4b0a2d0d62b76bbd8a44ebed4a9cc4de b/sql/hive/src/test/resources/golden/correlationoptimizer6-20-4b0a2d0d62b76bbd8a44ebed4a9cc4de
index 9c8189500649e..c7d10af90e353 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-20-4b0a2d0d62b76bbd8a44ebed4a9cc4de
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-20-4b0a2d0d62b76bbd8a44ebed4a9cc4de
@@ -12,4 +12,4 @@
 311	311	9
 369	369	9
 401	401	25
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-21-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-21-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-21-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-21-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-23-4b0a2d0d62b76bbd8a44ebed4a9cc4de b/sql/hive/src/test/resources/golden/correlationoptimizer6-23-4b0a2d0d62b76bbd8a44ebed4a9cc4de
index 9c8189500649e..c7d10af90e353 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-23-4b0a2d0d62b76bbd8a44ebed4a9cc4de
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-23-4b0a2d0d62b76bbd8a44ebed4a9cc4de
@@ -12,4 +12,4 @@
 311	311	9
 369	369	9
 401	401	25
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-24-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer6-24-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-24-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-24-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-25-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-25-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-25-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-25-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-27-e149747103059314a9984235702b24b6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-27-e149747103059314a9984235702b24b6
index 0b1ee37d7831c..6eebe80953bf0 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-27-e149747103059314a9984235702b24b6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-27-e149747103059314a9984235702b24b6
@@ -34,4 +34,4 @@
 406	1	406
 406	1	406
 406	1	406
-406	1	406
\ No newline at end of file
+406	1	406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-28-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-28-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-28-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-28-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-3-25b1fe48ef86952fc7766fb74b63bf21 b/sql/hive/src/test/resources/golden/correlationoptimizer6-3-25b1fe48ef86952fc7766fb74b63bf21
index 19b8a2aea8f64..8f9dae31cc51c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-3-25b1fe48ef86952fc7766fb74b63bf21
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-3-25b1fe48ef86952fc7766fb74b63bf21
@@ -12,4 +12,4 @@
 311	1	311	9
 369	1	369	9
 401	1	401	25
-406	1	406	16
\ No newline at end of file
+406	1	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-30-e149747103059314a9984235702b24b6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-30-e149747103059314a9984235702b24b6
index 0b1ee37d7831c..6eebe80953bf0 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-30-e149747103059314a9984235702b24b6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-30-e149747103059314a9984235702b24b6
@@ -34,4 +34,4 @@
 406	1	406
 406	1	406
 406	1	406
-406	1	406
\ No newline at end of file
+406	1	406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-31-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-31-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-31-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-31-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-33-15d991127dc684513e2fff1aea3f1560 b/sql/hive/src/test/resources/golden/correlationoptimizer6-33-15d991127dc684513e2fff1aea3f1560
index 7c591d6146fd6..4e3bbb2779bf3 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-33-15d991127dc684513e2fff1aea3f1560
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-33-15d991127dc684513e2fff1aea3f1560
@@ -34,4 +34,4 @@
 406	406	16
 406	406	16
 406	406	16
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-34-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-34-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-34-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-34-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-36-15d991127dc684513e2fff1aea3f1560 b/sql/hive/src/test/resources/golden/correlationoptimizer6-36-15d991127dc684513e2fff1aea3f1560
index 7c591d6146fd6..4e3bbb2779bf3 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-36-15d991127dc684513e2fff1aea3f1560
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-36-15d991127dc684513e2fff1aea3f1560
@@ -34,4 +34,4 @@
 406	406	16
 406	406	16
 406	406	16
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-37-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-37-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-37-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-37-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-39-813d72763b5e9e3bed0f06232f55c8b8 b/sql/hive/src/test/resources/golden/correlationoptimizer6-39-813d72763b5e9e3bed0f06232f55c8b8
index 7c591d6146fd6..4e3bbb2779bf3 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-39-813d72763b5e9e3bed0f06232f55c8b8
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-39-813d72763b5e9e3bed0f06232f55c8b8
@@ -34,4 +34,4 @@
 406	406	16
 406	406	16
 406	406	16
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-40-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-40-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-40-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-40-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-42-813d72763b5e9e3bed0f06232f55c8b8 b/sql/hive/src/test/resources/golden/correlationoptimizer6-42-813d72763b5e9e3bed0f06232f55c8b8
index 7c591d6146fd6..4e3bbb2779bf3 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-42-813d72763b5e9e3bed0f06232f55c8b8
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-42-813d72763b5e9e3bed0f06232f55c8b8
@@ -34,4 +34,4 @@
 406	406	16
 406	406	16
 406	406	16
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-43-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-43-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-43-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-43-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-45-ff6c54b483726ef15c90a4c68dc659a0 b/sql/hive/src/test/resources/golden/correlationoptimizer6-45-ff6c54b483726ef15c90a4c68dc659a0
index 8338433cd5a27..917ab36da2fbb 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-45-ff6c54b483726ef15c90a4c68dc659a0
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-45-ff6c54b483726ef15c90a4c68dc659a0
@@ -34,4 +34,4 @@
 406	4	1	406	val_406
 406	4	1	406	val_406
 406	4	1	406	val_406
-406	4	1	406	val_406
\ No newline at end of file
+406	4	1	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-46-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-46-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-46-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-46-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-48-ff6c54b483726ef15c90a4c68dc659a0 b/sql/hive/src/test/resources/golden/correlationoptimizer6-48-ff6c54b483726ef15c90a4c68dc659a0
index 8338433cd5a27..917ab36da2fbb 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-48-ff6c54b483726ef15c90a4c68dc659a0
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-48-ff6c54b483726ef15c90a4c68dc659a0
@@ -34,4 +34,4 @@
 406	4	1	406	val_406
 406	4	1	406	val_406
 406	4	1	406	val_406
-406	4	1	406	val_406
\ No newline at end of file
+406	4	1	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-49-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-49-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-49-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-49-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-51-4746d944f4193018017984ca2df3c60d b/sql/hive/src/test/resources/golden/correlationoptimizer6-51-4746d944f4193018017984ca2df3c60d
index 8b1d3cd388fa2..a6942feae66e5 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-51-4746d944f4193018017984ca2df3c60d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-51-4746d944f4193018017984ca2df3c60d
@@ -12,4 +12,4 @@
 311	1	311	val_311	9
 369	1	369	val_369	9
 401	1	401	val_401	25
-406	1	406	val_406	16
\ No newline at end of file
+406	1	406	val_406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-52-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-52-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-52-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-52-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-54-4746d944f4193018017984ca2df3c60d b/sql/hive/src/test/resources/golden/correlationoptimizer6-54-4746d944f4193018017984ca2df3c60d
index 8b1d3cd388fa2..a6942feae66e5 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-54-4746d944f4193018017984ca2df3c60d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-54-4746d944f4193018017984ca2df3c60d
@@ -12,4 +12,4 @@
 311	1	311	val_311	9
 369	1	369	val_369	9
 401	1	401	val_401	25
-406	1	406	val_406	16
\ No newline at end of file
+406	1	406	val_406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-55-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-55-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-55-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-55-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-56-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer6-56-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-56-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-56-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-58-4746d944f4193018017984ca2df3c60d b/sql/hive/src/test/resources/golden/correlationoptimizer6-58-4746d944f4193018017984ca2df3c60d
index 8b1d3cd388fa2..a6942feae66e5 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-58-4746d944f4193018017984ca2df3c60d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-58-4746d944f4193018017984ca2df3c60d
@@ -12,4 +12,4 @@
 311	1	311	val_311	9
 369	1	369	val_369	9
 401	1	401	val_401	25
-406	1	406	val_406	16
\ No newline at end of file
+406	1	406	val_406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-6-25b1fe48ef86952fc7766fb74b63bf21 b/sql/hive/src/test/resources/golden/correlationoptimizer6-6-25b1fe48ef86952fc7766fb74b63bf21
index 19b8a2aea8f64..8f9dae31cc51c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-6-25b1fe48ef86952fc7766fb74b63bf21
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-6-25b1fe48ef86952fc7766fb74b63bf21
@@ -12,4 +12,4 @@
 311	1	311	9
 369	1	369	9
 401	1	401	25
-406	1	406	16
\ No newline at end of file
+406	1	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-7-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-7-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-7-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-7-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-8-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer6-8-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-8-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-8-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer7-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer7-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-10-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer7-10-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-10-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-10-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-12-22d71fb589c53776dabb4696b38c4a42 b/sql/hive/src/test/resources/golden/correlationoptimizer7-12-22d71fb589c53776dabb4696b38c4a42
index 747b650237b53..1a82dfc153565 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-12-22d71fb589c53776dabb4696b38c4a42
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-12-22d71fb589c53776dabb4696b38c4a42
@@ -12,4 +12,4 @@
 311	3	311	val_311
 369	3	369	
 401	5	401	val_401
-406	4	406	val_406
\ No newline at end of file
+406	4	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-13-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer7-13-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-13-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-13-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-15-22d71fb589c53776dabb4696b38c4a42 b/sql/hive/src/test/resources/golden/correlationoptimizer7-15-22d71fb589c53776dabb4696b38c4a42
index 747b650237b53..1a82dfc153565 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-15-22d71fb589c53776dabb4696b38c4a42
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-15-22d71fb589c53776dabb4696b38c4a42
@@ -12,4 +12,4 @@
 311	3	311	val_311
 369	3	369	
 401	5	401	val_401
-406	4	406	val_406
\ No newline at end of file
+406	4	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-3-22d71fb589c53776dabb4696b38c4a42 b/sql/hive/src/test/resources/golden/correlationoptimizer7-3-22d71fb589c53776dabb4696b38c4a42
index 747b650237b53..1a82dfc153565 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-3-22d71fb589c53776dabb4696b38c4a42
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-3-22d71fb589c53776dabb4696b38c4a42
@@ -12,4 +12,4 @@
 311	3	311	val_311
 369	3	369	
 401	5	401	val_401
-406	4	406	val_406
\ No newline at end of file
+406	4	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer7-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-6-22d71fb589c53776dabb4696b38c4a42 b/sql/hive/src/test/resources/golden/correlationoptimizer7-6-22d71fb589c53776dabb4696b38c4a42
index 747b650237b53..1a82dfc153565 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-6-22d71fb589c53776dabb4696b38c4a42
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-6-22d71fb589c53776dabb4696b38c4a42
@@ -12,4 +12,4 @@
 311	3	311	val_311
 369	3	369	
 401	5	401	val_401
-406	4	406	val_406
\ No newline at end of file
+406	4	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-7-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer7-7-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-7-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-7-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-8-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/correlationoptimizer7-8-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-8-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-8-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-9-4d3e60a0e9bd8c12fdba4e010493537d b/sql/hive/src/test/resources/golden/correlationoptimizer7-9-4d3e60a0e9bd8c12fdba4e010493537d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-9-4d3e60a0e9bd8c12fdba4e010493537d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-9-4d3e60a0e9bd8c12fdba4e010493537d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-0-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer8-0-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-0-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-0-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer8-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-10-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer8-10-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-10-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-10-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-12-c1dfbe5cf77a97d195a3d0a65d1f1f b/sql/hive/src/test/resources/golden/correlationoptimizer8-12-c1dfbe5cf77a97d195a3d0a65d1f1f
index 7c0af7229d62a..ecbe52c536ebe 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-12-c1dfbe5cf77a97d195a3d0a65d1f1f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-12-c1dfbe5cf77a97d195a3d0a65d1f1f
@@ -21,4 +21,4 @@ val_273	1	NULL	NULL
 val_278	1	NULL	NULL
 val_311	1	NULL	NULL
 val_401	1	NULL	NULL
-val_406	1	NULL	NULL
\ No newline at end of file
+val_406	1	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-13-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer8-13-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-13-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-13-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-15-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer8-15-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-15-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-15-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-3-932db4b9935e4bc3d21dd33a8d12c275 b/sql/hive/src/test/resources/golden/correlationoptimizer8-3-932db4b9935e4bc3d21dd33a8d12c275
index efdd80c9f8b89..2f62508e3342a 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-3-932db4b9935e4bc3d21dd33a8d12c275
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-3-932db4b9935e4bc3d21dd33a8d12c275
@@ -10,4 +10,4 @@
 311	val_311	3
 369		3
 401	val_401	5
-406	val_406	4
\ No newline at end of file
+406	val_406	4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer8-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-6-932db4b9935e4bc3d21dd33a8d12c275 b/sql/hive/src/test/resources/golden/correlationoptimizer8-6-932db4b9935e4bc3d21dd33a8d12c275
index efdd80c9f8b89..2f62508e3342a 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-6-932db4b9935e4bc3d21dd33a8d12c275
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-6-932db4b9935e4bc3d21dd33a8d12c275
@@ -10,4 +10,4 @@
 311	val_311	3
 369		3
 401	val_401	5
-406	val_406	4
\ No newline at end of file
+406	val_406	4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-7-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer8-7-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-7-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-7-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-9-c1dfbe5cf77a97d195a3d0a65d1f1f b/sql/hive/src/test/resources/golden/correlationoptimizer8-9-c1dfbe5cf77a97d195a3d0a65d1f1f
index 7c0af7229d62a..ecbe52c536ebe 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-9-c1dfbe5cf77a97d195a3d0a65d1f1f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-9-c1dfbe5cf77a97d195a3d0a65d1f1f
@@ -21,4 +21,4 @@ val_273	1	NULL	NULL
 val_278	1	NULL	NULL
 val_311	1	NULL	NULL
 val_401	1	NULL	NULL
-val_406	1	NULL	NULL
\ No newline at end of file
+val_406	1	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/create_like-4-39ead53334938635b60a5ffdaa2c9f86 b/sql/hive/src/test/resources/golden/correlationoptimizer9-0-efd135a811fa94760736a761d220b82
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-4-39ead53334938635b60a5ffdaa2c9f86
rename to sql/hive/src/test/resources/golden/correlationoptimizer9-0-efd135a811fa94760736a761d220b82
diff --git a/sql/hive/src/test/resources/golden/exim_10_external_managed-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer9-1-b1e2ade89ae898650f0be4f796d8947b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_10_external_managed-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer9-1-b1e2ade89ae898650f0be4f796d8947b
diff --git a/sql/hive/src/test/resources/golden/create_like-5-dc9de26002604e9e436135bd4b40636d b/sql/hive/src/test/resources/golden/correlationoptimizer9-10-1190d82f88f7fb1f91968f6e2e03772a
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-5-dc9de26002604e9e436135bd4b40636d
rename to sql/hive/src/test/resources/golden/correlationoptimizer9-10-1190d82f88f7fb1f91968f6e2e03772a
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer9-11-bc2ae88b17ac2bdbd288e07194a40168 b/sql/hive/src/test/resources/golden/correlationoptimizer9-11-bc2ae88b17ac2bdbd288e07194a40168
new file mode 100644
index 0000000000000..17c838bb62b3b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer9-11-bc2ae88b17ac2bdbd288e07194a40168
@@ -0,0 +1,9 @@
+103	val_103	103	val_103	4	4
+104	val_104	104	val_104	4	4
+105	val_105	105	val_105	1	1
+111	val_111	111	val_111	1	1
+113	val_113	113	val_113	4	4
+114	val_114	114	val_114	1	1
+116	val_116	116	val_116	1	1
+118	val_118	118	val_118	4	4
+119	val_119	119	val_119	9	9
diff --git a/sql/hive/src/test/resources/golden/exim_10_external_managed-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer9-12-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_10_external_managed-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer9-12-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/create_like-6-83eb00d0ac6d26d398ed5f9ddb1d3919 b/sql/hive/src/test/resources/golden/correlationoptimizer9-13-1190d82f88f7fb1f91968f6e2e03772a
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-6-83eb00d0ac6d26d398ed5f9ddb1d3919
rename to sql/hive/src/test/resources/golden/correlationoptimizer9-13-1190d82f88f7fb1f91968f6e2e03772a
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer9-14-bc2ae88b17ac2bdbd288e07194a40168 b/sql/hive/src/test/resources/golden/correlationoptimizer9-14-bc2ae88b17ac2bdbd288e07194a40168
new file mode 100644
index 0000000000000..17c838bb62b3b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer9-14-bc2ae88b17ac2bdbd288e07194a40168
@@ -0,0 +1,9 @@
+103	val_103	103	val_103	4	4
+104	val_104	104	val_104	4	4
+105	val_105	105	val_105	1	1
+111	val_111	111	val_111	1	1
+113	val_113	113	val_113	4	4
+114	val_114	114	val_114	1	1
+116	val_116	116	val_116	1	1
+118	val_118	118	val_118	4	4
+119	val_119	119	val_119	9	9
diff --git a/sql/hive/src/test/resources/golden/create_like-8-639a13da6855b974fc2e170fd49b33cb b/sql/hive/src/test/resources/golden/correlationoptimizer9-2-32a82500cc28465fac6f64dde0c431c6
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-8-639a13da6855b974fc2e170fd49b33cb
rename to sql/hive/src/test/resources/golden/correlationoptimizer9-2-32a82500cc28465fac6f64dde0c431c6
diff --git a/sql/hive/src/test/resources/golden/exim_10_external_managed-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer9-3-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_10_external_managed-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer9-3-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/create_like-9-a0ce573e299b66b8ce31da2890b318cb b/sql/hive/src/test/resources/golden/correlationoptimizer9-4-ec131bcf578dba99f20b16a7dc6b9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-9-a0ce573e299b66b8ce31da2890b318cb
rename to sql/hive/src/test/resources/golden/correlationoptimizer9-4-ec131bcf578dba99f20b16a7dc6b9b
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer9-5-b4e378104bb5ab8d8ba5f905aa1ff450 b/sql/hive/src/test/resources/golden/correlationoptimizer9-5-b4e378104bb5ab8d8ba5f905aa1ff450
new file mode 100644
index 0000000000000..248a14f1f4a9f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer9-5-b4e378104bb5ab8d8ba5f905aa1ff450
@@ -0,0 +1,9 @@
+103	103	4	4
+104	104	4	4
+105	105	1	1
+111	111	1	1
+113	113	4	4
+114	114	1	1
+116	116	1	1
+118	118	4	4
+119	119	9	9
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer9-6-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_11_managed_external-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer9-6-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-0-366a4de0343396b9df03277f1098722c b/sql/hive/src/test/resources/golden/correlationoptimizer9-7-f952899d70bd718cbdbc44a5290938c9
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-0-366a4de0343396b9df03277f1098722c
rename to sql/hive/src/test/resources/golden/correlationoptimizer9-7-f952899d70bd718cbdbc44a5290938c9
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer9-8-b4e378104bb5ab8d8ba5f905aa1ff450 b/sql/hive/src/test/resources/golden/correlationoptimizer9-8-b4e378104bb5ab8d8ba5f905aa1ff450
new file mode 100644
index 0000000000000..248a14f1f4a9f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer9-8-b4e378104bb5ab8d8ba5f905aa1ff450
@@ -0,0 +1,9 @@
+103	103	4	4
+104	104	4	4
+105	105	1	1
+111	111	1	1
+113	113	4	4
+114	114	1	1
+116	116	1	1
+118	118	4	4
+119	119	9	9
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer9-9-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_11_managed_external-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer9-9-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/count distinct 0 values-0-1843b7947729b771fee3a4abd050bfdc
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_11_managed_external-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/count distinct 0 values-0-1843b7947729b771fee3a4abd050bfdc
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-17-5298cc011d107ad06c365d132cab6fc9 b/sql/hive/src/test/resources/golden/count distinct 1 value + null long-0-89b850197b326239d60a5e1d5db7c9c9
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-17-5298cc011d107ad06c365d132cab6fc9
rename to sql/hive/src/test/resources/golden/count distinct 1 value + null long-0-89b850197b326239d60a5e1d5db7c9c9
diff --git a/sql/hive/src/test/resources/golden/input16_cc-2-9ea7bc94b5383e71adb678f1dac1c619 b/sql/hive/src/test/resources/golden/count distinct 1 value + null-0-a014038c00fb81e88041ed4a8368e6f7
similarity index 100%
rename from sql/hive/src/test/resources/golden/input16_cc-2-9ea7bc94b5383e71adb678f1dac1c619
rename to sql/hive/src/test/resources/golden/count distinct 1 value + null-0-a014038c00fb81e88041ed4a8368e6f7
diff --git a/sql/hive/src/test/resources/golden/loadpart_err-1-8e68053c7f96f268ae1e25082e9b7517 b/sql/hive/src/test/resources/golden/count distinct 1 value long-0-77b9ed1d7ae65fa53830a3bc586856ff
similarity index 100%
rename from sql/hive/src/test/resources/golden/loadpart_err-1-8e68053c7f96f268ae1e25082e9b7517
rename to sql/hive/src/test/resources/golden/count distinct 1 value long-0-77b9ed1d7ae65fa53830a3bc586856ff
diff --git a/sql/hive/src/test/resources/golden/count distinct 1 value strings-0-c68e75ec4c884b93765a466e992e391d b/sql/hive/src/test/resources/golden/count distinct 1 value strings-0-c68e75ec4c884b93765a466e992e391d
new file mode 100644
index 0000000000000..0cfbf08886fca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/count distinct 1 value strings-0-c68e75ec4c884b93765a466e992e391d	
@@ -0,0 +1 @@
+2
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-5-9eadfd16be30c653625fce7b74048d9d b/sql/hive/src/test/resources/golden/count distinct 1 value-0-a4047b06a324fb5ea400c94350c9e038
similarity index 100%
rename from sql/hive/src/test/resources/golden/metadataonly1-5-9eadfd16be30c653625fce7b74048d9d
rename to sql/hive/src/test/resources/golden/count distinct 1 value-0-a4047b06a324fb5ea400c94350c9e038
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-7-92512b7ba2cb393d1335dcc2bcf5c2bc b/sql/hive/src/test/resources/golden/count distinct 2 values including null-0-75672236a30e10dab13b9b246c5a3a1e
similarity index 100%
rename from sql/hive/src/test/resources/golden/metadataonly1-7-92512b7ba2cb393d1335dcc2bcf5c2bc
rename to sql/hive/src/test/resources/golden/count distinct 2 values including null-0-75672236a30e10dab13b9b246c5a3a1e
diff --git a/sql/hive/src/test/resources/golden/count distinct 2 values long-0-f4ec7d767ba8c49d41edf5d6f58cf6d1 b/sql/hive/src/test/resources/golden/count distinct 2 values long-0-f4ec7d767ba8c49d41edf5d6f58cf6d1
new file mode 100644
index 0000000000000..0cfbf08886fca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/count distinct 2 values long-0-f4ec7d767ba8c49d41edf5d6f58cf6d1	
@@ -0,0 +1 @@
+2
diff --git a/sql/hive/src/test/resources/golden/count distinct 2 values-0-c61df65af167acaf7edb174e77898f3e b/sql/hive/src/test/resources/golden/count distinct 2 values-0-c61df65af167acaf7edb174e77898f3e
new file mode 100644
index 0000000000000..0cfbf08886fca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/count distinct 2 values-0-c61df65af167acaf7edb174e77898f3e	
@@ -0,0 +1 @@
+2
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-1-276fbe6fb296b13904516888ffa95342 b/sql/hive/src/test/resources/golden/count-1-c47c4abedf055b4e734232fd2e274d55
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-1-276fbe6fb296b13904516888ffa95342
rename to sql/hive/src/test/resources/golden/count-1-c47c4abedf055b4e734232fd2e274d55
diff --git a/sql/hive/src/test/resources/golden/count-10-d0f804c7d06375db74a0fcf5f17603c6 b/sql/hive/src/test/resources/golden/count-10-d0f804c7d06375db74a0fcf5f17603c6
index 4b020e0595d2c..966f27f6c9b9b 100644
--- a/sql/hive/src/test/resources/golden/count-10-d0f804c7d06375db74a0fcf5f17603c6
+++ b/sql/hive/src/test/resources/golden/count-10-d0f804c7d06375db74a0fcf5f17603c6
@@ -1,4 +1,4 @@
 NULL	1	1	6
 10	2	2	10
 12	1	2	9
-100	1	1	3
\ No newline at end of file
+100	1	1	3
diff --git a/sql/hive/src/test/resources/golden/count-12-944f53db544c07a7b38a0544a21d8e13 b/sql/hive/src/test/resources/golden/count-12-944f53db544c07a7b38a0544a21d8e13
index 54199fdb8166d..5eec149bfcc92 100644
--- a/sql/hive/src/test/resources/golden/count-12-944f53db544c07a7b38a0544a21d8e13
+++ b/sql/hive/src/test/resources/golden/count-12-944f53db544c07a7b38a0544a21d8e13
@@ -1 +1 @@
-7	7	6	6	6	7	3	3	6	7	4	5	6	6	5	6	4	5	5	5	4
\ No newline at end of file
+7	7	6	6	6	7	3	3	6	7	4	5	6	6	5	6	4	5	5	5	4
diff --git a/sql/hive/src/test/resources/golden/count-2-461bad3feb7dbc25fb35d45c6876d698 b/sql/hive/src/test/resources/golden/count-2-461bad3feb7dbc25fb35d45c6876d698
index 2e9278da88858..162877fdb3e5d 100644
--- a/sql/hive/src/test/resources/golden/count-2-461bad3feb7dbc25fb35d45c6876d698
+++ b/sql/hive/src/test/resources/golden/count-2-461bad3feb7dbc25fb35d45c6876d698
@@ -4,4 +4,4 @@ NULL	35	23	6
 12	NULL	80	2
 10	100	NULL	5
 10	100	45	4
-12	100	75	7
\ No newline at end of file
+12	100	75	7
diff --git a/sql/hive/src/test/resources/golden/count-3-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/count-3-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/count-3-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/count-3-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/count-5-d0f804c7d06375db74a0fcf5f17603c6 b/sql/hive/src/test/resources/golden/count-5-d0f804c7d06375db74a0fcf5f17603c6
index 4b020e0595d2c..966f27f6c9b9b 100644
--- a/sql/hive/src/test/resources/golden/count-5-d0f804c7d06375db74a0fcf5f17603c6
+++ b/sql/hive/src/test/resources/golden/count-5-d0f804c7d06375db74a0fcf5f17603c6
@@ -1,4 +1,4 @@
 NULL	1	1	6
 10	2	2	10
 12	1	2	9
-100	1	1	3
\ No newline at end of file
+100	1	1	3
diff --git a/sql/hive/src/test/resources/golden/count-7-944f53db544c07a7b38a0544a21d8e13 b/sql/hive/src/test/resources/golden/count-7-944f53db544c07a7b38a0544a21d8e13
index 54199fdb8166d..5eec149bfcc92 100644
--- a/sql/hive/src/test/resources/golden/count-7-944f53db544c07a7b38a0544a21d8e13
+++ b/sql/hive/src/test/resources/golden/count-7-944f53db544c07a7b38a0544a21d8e13
@@ -1 +1 @@
-7	7	6	6	6	7	3	3	6	7	4	5	6	6	5	6	4	5	5	5	4
\ No newline at end of file
+7	7	6	6	6	7	3	3	6	7	4	5	6	6	5	6	4	5	5	5	4
diff --git a/sql/hive/src/test/resources/golden/count-8-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/count-8-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/count-8-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/count-8-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/create table as-1-b9002c1d71895be765575b62656d1928 b/sql/hive/src/test/resources/golden/create table as-1-b9002c1d71895be765575b62656d1928
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/create table as-1-b9002c1d71895be765575b62656d1928	
+++ b/sql/hive/src/test/resources/golden/create table as-1-b9002c1d71895be765575b62656d1928	
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/create_1-0-d57ed4bbfee1ffaffaeba0a4be84c31d b/sql/hive/src/test/resources/golden/create_1-0-d57ed4bbfee1ffaffaeba0a4be84c31d
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-0-d57ed4bbfee1ffaffaeba0a4be84c31d
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-10-b9c0b95624e601614ea2561b83aaf0ba b/sql/hive/src/test/resources/golden/create_1-10-b9c0b95624e601614ea2561b83aaf0ba
deleted file mode 100644
index 66d40e52a4539..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-10-b9c0b95624e601614ea2561b83aaf0ba
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:table3, dbName:default, owner:marmbrus, createTime:1389343868, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/table3, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-12-fe8680bc5ef6fe3e64c68d2638e10785 b/sql/hive/src/test/resources/golden/create_1-12-fe8680bc5ef6fe3e64c68d2638e10785
deleted file mode 100644
index 8e606f61a1c30..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-12-fe8680bc5ef6fe3e64c68d2638e10785
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-13-5715f85456733a71fb7c83f1b254b3a b/sql/hive/src/test/resources/golden/create_1-13-5715f85456733a71fb7c83f1b254b3a
deleted file mode 100644
index 05b460270525d..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-13-5715f85456733a71fb7c83f1b254b3a
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:table4, dbName:default, owner:marmbrus, createTime:1389343869, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/table4, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-15-fd9415e340cf54a9473cc4bf86673816 b/sql/hive/src/test/resources/golden/create_1-15-fd9415e340cf54a9473cc4bf86673816
deleted file mode 100644
index eda3946588e3f..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-15-fd9415e340cf54a9473cc4bf86673816
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	from deserializer   
-b                   	string              	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-16-c99c700ca511b68577aae8ae513a4a32 b/sql/hive/src/test/resources/golden/create_1-16-c99c700ca511b68577aae8ae513a4a32
deleted file mode 100644
index 8fc60adf10167..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-16-c99c700ca511b68577aae8ae513a4a32
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	from deserializer   
-b                   	string              	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:table5, dbName:default, owner:marmbrus, createTime:1389343869, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/table5, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-2-ecd02bc3563cd6b60b8394956cb69084 b/sql/hive/src/test/resources/golden/create_1-2-ecd02bc3563cd6b60b8394956cb69084
deleted file mode 100644
index 8e606f61a1c30..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-2-ecd02bc3563cd6b60b8394956cb69084
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-3-c27702ff131e0ecfd71f1e1779fbe365 b/sql/hive/src/test/resources/golden/create_1-3-c27702ff131e0ecfd71f1e1779fbe365
deleted file mode 100644
index 5e9bc70c3a1a3..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-3-c27702ff131e0ecfd71f1e1779fbe365
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:table1, dbName:default, owner:marmbrus, createTime:1389343868, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/table1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389343868}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-6-52dc9f900d7f7a559698aff9565f061a b/sql/hive/src/test/resources/golden/create_1-6-52dc9f900d7f7a559698aff9565f061a
deleted file mode 100644
index 6ed3515ba6876..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-6-52dc9f900d7f7a559698aff9565f061a
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	None                
-b                   	int                 	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-7-8564aa6cc2b0ee85292b3e8976fe9001 b/sql/hive/src/test/resources/golden/create_1-7-8564aa6cc2b0ee85292b3e8976fe9001
deleted file mode 100644
index bec4c72e49857..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-7-8564aa6cc2b0ee85292b3e8976fe9001
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	None                
-b                   	int                 	None                
-	 	 
-Detailed Table Information	Table(tableName:table2, dbName:default, owner:marmbrus, createTime:1389343868, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:int, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/table2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389343868}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-9-f19e6b501d5d2fb284777c71f8f6c0c3 b/sql/hive/src/test/resources/golden/create_1-9-f19e6b501d5d2fb284777c71f8f6c0c3
deleted file mode 100644
index 8e606f61a1c30..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-9-f19e6b501d5d2fb284777c71f8f6c0c3
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_escape-1-ecd02bc3563cd6b60b8394956cb69084 b/sql/hive/src/test/resources/golden/create_escape-1-ecd02bc3563cd6b60b8394956cb69084
deleted file mode 100644
index 8e606f61a1c30..0000000000000
--- a/sql/hive/src/test/resources/golden/create_escape-1-ecd02bc3563cd6b60b8394956cb69084
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_escape-2-c27702ff131e0ecfd71f1e1779fbe365 b/sql/hive/src/test/resources/golden/create_escape-2-c27702ff131e0ecfd71f1e1779fbe365
deleted file mode 100644
index 8ab6b24deab60..0000000000000
--- a/sql/hive/src/test/resources/golden/create_escape-2-c27702ff131e0ecfd71f1e1779fbe365
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:table1, dbName:default, owner:marmbrus, createTime:1388825524, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3286459698772672096/table1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{escape.delim=\, serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_escape-4-7d84873a6ce03e0e408fa67ef5dd733 b/sql/hive/src/test/resources/golden/create_escape-4-7d84873a6ce03e0e408fa67ef5dd733
deleted file mode 100644
index bbe37f8e2a790..0000000000000
--- a/sql/hive/src/test/resources/golden/create_escape-4-7d84873a6ce03e0e408fa67ef5dd733
+++ /dev/null
@@ -1 +0,0 @@
-86	\	\
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_genericudaf-0-499d36ed8229cbf74a07b59914bdf717 b/sql/hive/src/test/resources/golden/create_genericudaf-0-499d36ed8229cbf74a07b59914bdf717
deleted file mode 100644
index cbcdfbe72e8c6..0000000000000
--- a/sql/hive/src/test/resources/golden/create_genericudaf-0-499d36ed8229cbf74a07b59914bdf717
+++ /dev/null
@@ -1,9 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION test_avg 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
diff --git a/sql/hive/src/test/resources/golden/create_genericudaf-3-d3b1af3e7f037de800255d9411a690e8 b/sql/hive/src/test/resources/golden/create_genericudaf-3-d3b1af3e7f037de800255d9411a690e8
deleted file mode 100644
index 2f958c483a9df..0000000000000
--- a/sql/hive/src/test/resources/golden/create_genericudaf-3-d3b1af3e7f037de800255d9411a690e8
+++ /dev/null
@@ -1 +0,0 @@
-1.0	260.182
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_genericudf-0-dd23fb149bb6d6937b838334559ad8d1 b/sql/hive/src/test/resources/golden/create_genericudf-0-dd23fb149bb6d6937b838334559ad8d1
index 0e9c06c882602..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/create_genericudf-0-dd23fb149bb6d6937b838334559ad8d1
+++ b/sql/hive/src/test/resources/golden/create_genericudf-0-dd23fb149bb6d6937b838334559ad8d1
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION test_translate 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestTranslate')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/create_insert_outputformat-2-8c2e4081b9d69297c35faab0a2ff86dc b/sql/hive/src/test/resources/golden/create_insert_outputformat-2-8c2e4081b9d69297c35faab0a2ff86dc
index 83f572c424926..90f9bd0430a4c 100644
--- a/sql/hive/src/test/resources/golden/create_insert_outputformat-2-8c2e4081b9d69297c35faab0a2ff86dc
+++ b/sql/hive/src/test/resources/golden/create_insert_outputformat-2-8c2e4081b9d69297c35faab0a2ff86dc
@@ -1,2 +1,2 @@
-key                 	int                 	None                
-value               	string              	None                
\ No newline at end of file
+key                 	int                 	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_insert_outputformat-5-8552731917a8260c25e6df79b83bf5c b/sql/hive/src/test/resources/golden/create_insert_outputformat-5-8552731917a8260c25e6df79b83bf5c
index 83f572c424926..90f9bd0430a4c 100644
--- a/sql/hive/src/test/resources/golden/create_insert_outputformat-5-8552731917a8260c25e6df79b83bf5c
+++ b/sql/hive/src/test/resources/golden/create_insert_outputformat-5-8552731917a8260c25e6df79b83bf5c
@@ -1,2 +1,2 @@
-key                 	int                 	None                
-value               	string              	None                
\ No newline at end of file
+key                 	int                 	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_insert_outputformat-8-33a4312fd617c5c9f2a560dc6ce868a5 b/sql/hive/src/test/resources/golden/create_insert_outputformat-8-33a4312fd617c5c9f2a560dc6ce868a5
index 83f572c424926..90f9bd0430a4c 100644
--- a/sql/hive/src/test/resources/golden/create_insert_outputformat-8-33a4312fd617c5c9f2a560dc6ce868a5
+++ b/sql/hive/src/test/resources/golden/create_insert_outputformat-8-33a4312fd617c5c9f2a560dc6ce868a5
@@ -1,2 +1,2 @@
-key                 	int                 	None                
-value               	string              	None                
\ No newline at end of file
+key                 	int                 	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_like-1-30485a2507b60b96cad3d293527e6af b/sql/hive/src/test/resources/golden/create_like-1-30485a2507b60b96cad3d293527e6af
deleted file mode 100644
index b8ddba7f50b97..0000000000000
--- a/sql/hive/src/test/resources/golden/create_like-1-30485a2507b60b96cad3d293527e6af
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 11:13:23 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/table1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392059603          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/create_like-10-7d84873a6ce03e0e408fa67ef5dd733 b/sql/hive/src/test/resources/golden/create_like-10-7d84873a6ce03e0e408fa67ef5dd733
deleted file mode 100644
index e87fb81a6f043..0000000000000
--- a/sql/hive/src/test/resources/golden/create_like-10-7d84873a6ce03e0e408fa67ef5dd733
+++ /dev/null
@@ -1 +0,0 @@
-86	val_86
diff --git a/sql/hive/src/test/resources/golden/create_like-11-ba64f0122b21f605d8b2928753784130 b/sql/hive/src/test/resources/golden/create_like-11-ba64f0122b21f605d8b2928753784130
deleted file mode 100644
index 375dcacb8db71..0000000000000
--- a/sql/hive/src/test/resources/golden/create_like-11-ba64f0122b21f605d8b2928753784130
+++ /dev/null
@@ -1,2 +0,0 @@
-100	val_100
-100	val_100
diff --git a/sql/hive/src/test/resources/golden/create_like-3-eea111a209cf8895f31f64699669c705 b/sql/hive/src/test/resources/golden/create_like-3-eea111a209cf8895f31f64699669c705
deleted file mode 100644
index 52b637c27f98d..0000000000000
--- a/sql/hive/src/test/resources/golden/create_like-3-eea111a209cf8895f31f64699669c705
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 11:13:23 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/table2	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392059603          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/create_like-7-231c8b6709a8dc0b6a3b3a9751191cd7 b/sql/hive/src/test/resources/golden/create_like-7-231c8b6709a8dc0b6a3b3a9751191cd7
deleted file mode 100644
index d9308798dcb1c..0000000000000
--- a/sql/hive/src/test/resources/golden/create_like-7-231c8b6709a8dc0b6a3b3a9751191cd7
+++ /dev/null
@@ -1,28 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 11:13:23 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/table3	 
-Table Type:         	EXTERNAL_TABLE      	 
-Table Parameters:	 	 
-	EXTERNAL            	TRUE                
-	transient_lastDdlTime	1392059603          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/create_like_view-10-eea111a209cf8895f31f64699669c705 b/sql/hive/src/test/resources/golden/create_like_view-10-eea111a209cf8895f31f64699669c705
index 3e2a912824ab3..0e52b5f82ef4a 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-10-eea111a209cf8895f31f64699669c705
+++ b/sql/hive/src/test/resources/golden/create_like_view-10-eea111a209cf8895f31f64699669c705
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:52:37 PST 2014	 
+CreateTime:         	Tue Oct 21 01:24:36 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3766080982052101504/table2	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/table2	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813557          
+	transient_lastDdlTime	1413879876          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
diff --git a/sql/hive/src/test/resources/golden/create_like_view-14-f19e6b501d5d2fb284777c71f8f6c0c3 b/sql/hive/src/test/resources/golden/create_like_view-14-f19e6b501d5d2fb284777c71f8f6c0c3
index 115d12fb70c81..26b5989488752 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-14-f19e6b501d5d2fb284777c71f8f6c0c3
+++ b/sql/hive/src/test/resources/golden/create_like_view-14-f19e6b501d5d2fb284777c71f8f6c0c3
@@ -1,2 +1,2 @@
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_like_view-15-231c8b6709a8dc0b6a3b3a9751191cd7 b/sql/hive/src/test/resources/golden/create_like_view-15-231c8b6709a8dc0b6a3b3a9751191cd7
index 61d34badb1a2d..47808e8f20c83 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-15-231c8b6709a8dc0b6a3b3a9751191cd7
+++ b/sql/hive/src/test/resources/golden/create_like_view-15-231c8b6709a8dc0b6a3b3a9751191cd7
@@ -1,20 +1,20 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:52:37 PST 2014	 
+CreateTime:         	Tue Oct 21 01:24:36 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3766080982052101504/table3	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/table3	 
 Table Type:         	EXTERNAL_TABLE      	 
 Table Parameters:	 	 
 	EXTERNAL            	TRUE                
-	transient_lastDdlTime	1391813557          
+	transient_lastDdlTime	1413879876          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
diff --git a/sql/hive/src/test/resources/golden/create_like_view-25-87a663f8fd80110a9cee249535037c0d b/sql/hive/src/test/resources/golden/create_like_view-25-87a663f8fd80110a9cee249535037c0d
index 4c2967215fe66..ad1f1f42d6b98 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-25-87a663f8fd80110a9cee249535037c0d
+++ b/sql/hive/src/test/resources/golden/create_like_view-25-87a663f8fd80110a9cee249535037c0d
@@ -1,25 +1,25 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:53:16 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:10 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3766080982052101504/table1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/table1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813596          
+	transient_lastDdlTime	1413879910          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
diff --git a/sql/hive/src/test/resources/golden/create_like_view-5-ecd02bc3563cd6b60b8394956cb69084 b/sql/hive/src/test/resources/golden/create_like_view-5-ecd02bc3563cd6b60b8394956cb69084
index 115d12fb70c81..26b5989488752 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-5-ecd02bc3563cd6b60b8394956cb69084
+++ b/sql/hive/src/test/resources/golden/create_like_view-5-ecd02bc3563cd6b60b8394956cb69084
@@ -1,2 +1,2 @@
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_like_view-6-30485a2507b60b96cad3d293527e6af b/sql/hive/src/test/resources/golden/create_like_view-6-30485a2507b60b96cad3d293527e6af
index 02dee147bca42..91e1ebbfee4de 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-6-30485a2507b60b96cad3d293527e6af
+++ b/sql/hive/src/test/resources/golden/create_like_view-6-30485a2507b60b96cad3d293527e6af
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:52:37 PST 2014	 
+CreateTime:         	Tue Oct 21 01:24:36 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3766080982052101504/table1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/table1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813557          
+	transient_lastDdlTime	1413879876          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
diff --git a/sql/hive/src/test/resources/golden/create_like_view-9-52dc9f900d7f7a559698aff9565f061a b/sql/hive/src/test/resources/golden/create_like_view-9-52dc9f900d7f7a559698aff9565f061a
index 115d12fb70c81..26b5989488752 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-9-52dc9f900d7f7a559698aff9565f061a
+++ b/sql/hive/src/test/resources/golden/create_like_view-9-52dc9f900d7f7a559698aff9565f061a
@@ -1,2 +1,2 @@
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-10-d6fee0f05fa9b04cb7c557862402c929 b/sql/hive/src/test/resources/golden/create_merge_compressed-10-d6fee0f05fa9b04cb7c557862402c929
deleted file mode 100644
index dbdf4585360ae..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-10-d6fee0f05fa9b04cb7c557862402c929
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:tgt_rc_merge_test
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/tgt_rc_merge_test
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:1
-totalFileSize:239
-maxFileSize:239
-minFileSize:239
-lastAccessTime:0
-lastUpdateTime:1389344017000
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-11-614c34f9e88015f21bffc4b8930bc95d b/sql/hive/src/test/resources/golden/create_merge_compressed-11-614c34f9e88015f21bffc4b8930bc95d
deleted file mode 100644
index 9a037142aa3c1..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-11-614c34f9e88015f21bffc4b8930bc95d
+++ /dev/null
@@ -1 +0,0 @@
-10
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-12-4d89cbe49f710527b54e6262472f0320 b/sql/hive/src/test/resources/golden/create_merge_compressed-12-4d89cbe49f710527b54e6262472f0320
deleted file mode 100644
index eb6c3f6aef813..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-12-4d89cbe49f710527b54e6262472f0320
+++ /dev/null
@@ -1 +0,0 @@
-508	-751895388
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-2-84b74227c9f1563f530cd3ac3b333e54 b/sql/hive/src/test/resources/golden/create_merge_compressed-2-84b74227c9f1563f530cd3ac3b333e54
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-2-84b74227c9f1563f530cd3ac3b333e54
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-6-d6fee0f05fa9b04cb7c557862402c929 b/sql/hive/src/test/resources/golden/create_merge_compressed-6-d6fee0f05fa9b04cb7c557862402c929
deleted file mode 100644
index 8a9d9d56a66d6..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-6-d6fee0f05fa9b04cb7c557862402c929
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:tgt_rc_merge_test
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/tgt_rc_merge_test
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:2
-totalFileSize:338
-maxFileSize:169
-minFileSize:169
-lastAccessTime:0
-lastUpdateTime:1389343990000
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-7-614c34f9e88015f21bffc4b8930bc95d b/sql/hive/src/test/resources/golden/create_merge_compressed-7-614c34f9e88015f21bffc4b8930bc95d
deleted file mode 100644
index 9a037142aa3c1..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-7-614c34f9e88015f21bffc4b8930bc95d
+++ /dev/null
@@ -1 +0,0 @@
-10
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-8-4d89cbe49f710527b54e6262472f0320 b/sql/hive/src/test/resources/golden/create_merge_compressed-8-4d89cbe49f710527b54e6262472f0320
deleted file mode 100644
index eb6c3f6aef813..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-8-4d89cbe49f710527b54e6262472f0320
+++ /dev/null
@@ -1 +0,0 @@
-508	-751895388
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_nested_type-1-ecd02bc3563cd6b60b8394956cb69084 b/sql/hive/src/test/resources/golden/create_nested_type-1-ecd02bc3563cd6b60b8394956cb69084
index c49ad26c04d67..6485e594f54fe 100644
--- a/sql/hive/src/test/resources/golden/create_nested_type-1-ecd02bc3563cd6b60b8394956cb69084
+++ b/sql/hive/src/test/resources/golden/create_nested_type-1-ecd02bc3563cd6b60b8394956cb69084
@@ -1,4 +1,4 @@
-a                   	string              	None                
-b                   	array<string>       	None                
-c                   	array<map<string,string>>	None                
-d                   	map<string,array<string>>	None                
+a                   	string              	                    
+b                   	array<string>       	                    
+c                   	array<map<string,string>>	                    
+d                   	map<string,array<string>>	                    
diff --git a/sql/hive/src/test/resources/golden/create_nested_type-2-c27702ff131e0ecfd71f1e1779fbe365 b/sql/hive/src/test/resources/golden/create_nested_type-2-c27702ff131e0ecfd71f1e1779fbe365
index 41c7202c8b886..6d68db6387495 100644
--- a/sql/hive/src/test/resources/golden/create_nested_type-2-c27702ff131e0ecfd71f1e1779fbe365
+++ b/sql/hive/src/test/resources/golden/create_nested_type-2-c27702ff131e0ecfd71f1e1779fbe365
@@ -1,6 +1,6 @@
-a                   	string              	None                
-b                   	array<string>       	None                
-c                   	array<map<string,string>>	None                
-d                   	map<string,array<string>>	None                
+a                   	string              	                    
+b                   	array<string>       	                    
+c                   	array<map<string,string>>	                    
+d                   	map<string,array<string>>	                    
 	 	 
-Detailed Table Information	Table(tableName:table1, dbName:default, owner:marmbrus, createTime:1391226109, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:array<string>, comment:null), FieldSchema(name:c, type:array<map<string,string>>, comment:null), FieldSchema(name:d, type:map<string,array<string>>, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/table1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1391226109}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:table1, dbName:default, owner:marmbrus, createTime:1413879912, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:array<string>, comment:null), FieldSchema(name:c, type:array<map<string,string>>, comment:null), FieldSchema(name:d, type:map<string,array<string>>, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/table1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413879912}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-13-440c6f8daa221613fe796d99b494e61f b/sql/hive/src/test/resources/golden/create_nested_type-3-280fd0d9876e475d7dcf5383876ebc79
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-13-440c6f8daa221613fe796d99b494e61f
rename to sql/hive/src/test/resources/golden/create_nested_type-3-280fd0d9876e475d7dcf5383876ebc79
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-2-30dc3e80e3873af5115e4f5e39078a13 b/sql/hive/src/test/resources/golden/create_or_replace_view-2-30dc3e80e3873af5115e4f5e39078a13
deleted file mode 100644
index 46869fc9e88eb..0000000000000
--- a/sql/hive/src/test/resources/golden/create_or_replace_view-2-30dc3e80e3873af5115e4f5e39078a13
+++ /dev/null
@@ -1,30 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 12:09:28 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Table Type:         	VIRTUAL_VIEW        	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392062968          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	null                	 
-InputFormat:        	org.apache.hadoop.mapred.SequenceFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-	 	 
-# View Information	 	 
-View Original Text: 	select * from srcpart	 
-View Expanded Text: 	select `srcpart`.`key`, `srcpart`.`value`, `srcpart`.`ds`, `srcpart`.`hr` from `default`.`srcpart`	 
diff --git a/sql/hive/src/test/resources/golden/create_skewed_table1-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/create_skewed_table1-0-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/create_skewed_table1-0-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/create_skewed_table1-0-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/create_skewed_table1-4-f3f1642674545762a4bff5cb75634e20 b/sql/hive/src/test/resources/golden/create_skewed_table1-4-f3f1642674545762a4bff5cb75634e20
index d7c386e17c096..d35fc35a810d3 100644
--- a/sql/hive/src/test/resources/golden/create_skewed_table1-4-f3f1642674545762a4bff5cb75634e20
+++ b/sql/hive/src/test/resources/golden/create_skewed_table1-4-f3f1642674545762a4bff5cb75634e20
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 22:51:44 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:13 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6750581243740872392/list_bucket_single_2	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/list_bucket_single_2	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1389768704          
+	transient_lastDdlTime	1413879913          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -26,4 +26,4 @@ Sort Columns:       	[]
 Skewed Columns:     	[key]               	 
 Skewed Values:      	[[1], [5], [6]]     	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/create_skewed_table1-5-f5e6d62497ae174fdfeeb3fd6f899b2e b/sql/hive/src/test/resources/golden/create_skewed_table1-5-f5e6d62497ae174fdfeeb3fd6f899b2e
index 0817efac83d79..e8f72845c1c7a 100644
--- a/sql/hive/src/test/resources/golden/create_skewed_table1-5-f5e6d62497ae174fdfeeb3fd6f899b2e
+++ b/sql/hive/src/test/resources/golden/create_skewed_table1-5-f5e6d62497ae174fdfeeb3fd6f899b2e
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 22:51:44 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:13 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6750581243740872392/list_bucket_single	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/list_bucket_single	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1389768704          
+	transient_lastDdlTime	1413879913          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -26,4 +26,4 @@ Sort Columns:       	[]
 Skewed Columns:     	[key]               	 
 Skewed Values:      	[[1], [5], [6]]     	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/create_skewed_table1-6-d7a147c6b0a3609663628b43457b2cef b/sql/hive/src/test/resources/golden/create_skewed_table1-6-d7a147c6b0a3609663628b43457b2cef
index 2986dd43b0fd6..c8d58bbb1b1ce 100644
--- a/sql/hive/src/test/resources/golden/create_skewed_table1-6-d7a147c6b0a3609663628b43457b2cef
+++ b/sql/hive/src/test/resources/golden/create_skewed_table1-6-d7a147c6b0a3609663628b43457b2cef
@@ -1,20 +1,20 @@
 # col_name            	data_type           	comment             
 	 	 
-col1                	string              	None                
-col2                	int                 	None                
-col3                	string              	None                
+col1                	string              	                    
+col2                	int                 	                    
+col3                	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 22:51:44 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:13 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6750581243740872392/list_bucket_multiple	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/list_bucket_multiple	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1389768704          
+	transient_lastDdlTime	1413879913          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -27,4 +27,4 @@ Sort Columns:       	[]
 Skewed Columns:     	[col1, col2]        	 
 Skewed Values:      	[[s1, 1], [s3, 3], [s13, 13], [s78, 78]]	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-14-32251c08304629a3153e0b471de060c5 b/sql/hive/src/test/resources/golden/create_struct_table-1-719851d0e8b89b51bdc6be4177455a92
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-14-32251c08304629a3153e0b471de060c5
rename to sql/hive/src/test/resources/golden/create_struct_table-1-719851d0e8b89b51bdc6be4177455a92
diff --git a/sql/hive/src/test/resources/golden/create_udaf-0-a69c2b11dc912ef5444af32dce6aa33e b/sql/hive/src/test/resources/golden/create_udaf-0-a69c2b11dc912ef5444af32dce6aa33e
deleted file mode 100644
index 8af6a0338d65a..0000000000000
--- a/sql/hive/src/test/resources/golden/create_udaf-0-a69c2b11dc912ef5444af32dce6aa33e
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION test_max 'org.apache.hadoop.hive.ql.udf.UDAFTestMax')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/create_view-19-df2da604b5a8f2b236519510b5e4d34b b/sql/hive/src/test/resources/golden/create_view-19-df2da604b5a8f2b236519510b5e4d34b
deleted file mode 100644
index e87fb81a6f043..0000000000000
--- a/sql/hive/src/test/resources/golden/create_view-19-df2da604b5a8f2b236519510b5e4d34b
+++ /dev/null
@@ -1 +0,0 @@
-86	val_86
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-4-aa9fad452f806fd16fc0bdc7cdfdc4d5 b/sql/hive/src/test/resources/golden/create_view_partitioned-4-aa9fad452f806fd16fc0bdc7cdfdc4d5
deleted file mode 100644
index 70d2ecdbc8f78..0000000000000
--- a/sql/hive/src/test/resources/golden/create_view_partitioned-4-aa9fad452f806fd16fc0bdc7cdfdc4d5
+++ /dev/null
@@ -1,13 +0,0 @@
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-value               	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:vp1, dbName:default, owner:marmbrus, createTime:1392062982, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:null, parameters:{}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1392062982}, viewOriginalText:SELECT key, value	 
-FROM src	 	 
-WHERE key=86, viewExpandedText:SELECT `src`.`key`, `src`.`value`	 	 
-FROM `default`.`src`	 	 
-WHERE `src`.`key`=86, tableType:VIRTUAL_VIEW)		 
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-5-d7a7d8592fca266745725192d3f875fc b/sql/hive/src/test/resources/golden/create_view_partitioned-5-d7a7d8592fca266745725192d3f875fc
deleted file mode 100644
index 43bc3de44f4df..0000000000000
--- a/sql/hive/src/test/resources/golden/create_view_partitioned-5-d7a7d8592fca266745725192d3f875fc
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-value               	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 12:09:42 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Table Type:         	VIRTUAL_VIEW        	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392062982          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	null                	 
-InputFormat:        	org.apache.hadoop.mapred.SequenceFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-	 	 
-# View Information	 	 
-View Original Text: 	SELECT key, value	 
-FROM src	 	 
-WHERE key=86	 	 
-View Expanded Text: 	SELECT `src`.`key`, `src`.`value`	 
-FROM `default`.`src`	 	 
-WHERE `src`.`key`=86	 	 
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-3-d7cc5e5c2cc9e09e715afcf8a0fd34c3 b/sql/hive/src/test/resources/golden/cross_join-0-7e4af1870bc73decae43b3383c7d2046
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-3-d7cc5e5c2cc9e09e715afcf8a0fd34c3
rename to sql/hive/src/test/resources/golden/cross_join-0-7e4af1870bc73decae43b3383c7d2046
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-4-c2a7e48e37375fc59126d71b9965b6c3 b/sql/hive/src/test/resources/golden/cross_join-1-1a96761bf3e47ace9a422ed58273ff35
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-4-c2a7e48e37375fc59126d71b9965b6c3
rename to sql/hive/src/test/resources/golden/cross_join-1-1a96761bf3e47ace9a422ed58273ff35
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-5-c2a7e48e37375fc59126d71b9965b6c3 b/sql/hive/src/test/resources/golden/cross_join-2-85c93a81eae05bf56a04a904bb80a229
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-5-c2a7e48e37375fc59126d71b9965b6c3
rename to sql/hive/src/test/resources/golden/cross_join-2-85c93a81eae05bf56a04a904bb80a229
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-9-16a420c1def17d14881bd75d35a4c0e5 b/sql/hive/src/test/resources/golden/cross_product_check_1-0-d782db598869f9b19e0fcf5ea2a83594
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-9-16a420c1def17d14881bd75d35a4c0e5
rename to sql/hive/src/test/resources/golden/cross_product_check_1-0-d782db598869f9b19e0fcf5ea2a83594
diff --git a/sql/hive/src/test/resources/golden/create_nested_type-3-ac452c9279877935983c37113898e53c b/sql/hive/src/test/resources/golden/cross_product_check_1-1-bd8395c55fa2fc80b68eb043b7020b76
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_nested_type-3-ac452c9279877935983c37113898e53c
rename to sql/hive/src/test/resources/golden/cross_product_check_1-1-bd8395c55fa2fc80b68eb043b7020b76
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/cross_product_check_1-2-4e1b3108b7e1b9d8e94e73f9dfa44617
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_12_external_location-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/cross_product_check_1-2-4e1b3108b7e1b9d8e94e73f9dfa44617
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-0-a14cfe3eff322066e61023ec06c7735d b/sql/hive/src/test/resources/golden/cross_product_check_1-3-32a3e6eb858d37f58e225d07fb323254
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_or_replace_view-0-a14cfe3eff322066e61023ec06c7735d
rename to sql/hive/src/test/resources/golden/cross_product_check_1-3-32a3e6eb858d37f58e225d07fb323254
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-1-a27131eb04bd5e071d3267c92d3f8dea b/sql/hive/src/test/resources/golden/cross_product_check_1-4-36a6b6fb71ea08ff817dd40d1ffdb970
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_or_replace_view-1-a27131eb04bd5e071d3267c92d3f8dea
rename to sql/hive/src/test/resources/golden/cross_product_check_1-4-36a6b6fb71ea08ff817dd40d1ffdb970
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-3-5fd147edbe44a96782923a3ef6caa47d b/sql/hive/src/test/resources/golden/cross_product_check_1-5-103739f072b849d212dbc40919f92b74
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_or_replace_view-3-5fd147edbe44a96782923a3ef6caa47d
rename to sql/hive/src/test/resources/golden/cross_product_check_1-5-103739f072b849d212dbc40919f92b74
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-4-b1880014afc9ad1f8db91ba3db3867de b/sql/hive/src/test/resources/golden/cross_product_check_1-6-9a202e3bf15be2a310504a50920e7d25
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_or_replace_view-4-b1880014afc9ad1f8db91ba3db3867de
rename to sql/hive/src/test/resources/golden/cross_product_check_1-6-9a202e3bf15be2a310504a50920e7d25
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-5-b1d2deb8a13dde4bf18c9b34836e00fb b/sql/hive/src/test/resources/golden/cross_product_check_1-7-e4e93f6e0dc63e1e324fb913a26fa8ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_or_replace_view-5-b1d2deb8a13dde4bf18c9b34836e00fb
rename to sql/hive/src/test/resources/golden/cross_product_check_1-7-e4e93f6e0dc63e1e324fb913a26fa8ac
diff --git a/sql/hive/src/test/resources/golden/create_struct_table-1-2ca90a28a994405e6150c96f4a572294 b/sql/hive/src/test/resources/golden/cross_product_check_2-0-d782db598869f9b19e0fcf5ea2a83594
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_struct_table-1-2ca90a28a994405e6150c96f4a572294
rename to sql/hive/src/test/resources/golden/cross_product_check_2-0-d782db598869f9b19e0fcf5ea2a83594
diff --git a/sql/hive/src/test/resources/golden/create_union_table-0-8e765b54f15b948fc88392da69da283 b/sql/hive/src/test/resources/golden/cross_product_check_2-1-bd8395c55fa2fc80b68eb043b7020b76
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_union_table-0-8e765b54f15b948fc88392da69da283
rename to sql/hive/src/test/resources/golden/cross_product_check_2-1-bd8395c55fa2fc80b68eb043b7020b76
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/cross_product_check_2-2-24ca942f094b14b92086305cc125e833
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_12_external_location-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/cross_product_check_2-2-24ca942f094b14b92086305cc125e833
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/cross_product_check_2-3-44d382ce6848d3f0b900b0808747d8e9
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_12_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/cross_product_check_2-3-44d382ce6848d3f0b900b0808747d8e9
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/cross_product_check_2-4-c14792ccac2ca64e3e9e21af4fd12d2c
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_13_managed_location-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/cross_product_check_2-4-c14792ccac2ca64e3e9e21af4fd12d2c
diff --git a/sql/hive/src/test/resources/golden/create_union_table-1-aee4ce62fc2631423af0f569f4448353 b/sql/hive/src/test/resources/golden/cross_product_check_2-5-32a3e6eb858d37f58e225d07fb323254
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_union_table-1-aee4ce62fc2631423af0f569f4448353
rename to sql/hive/src/test/resources/golden/cross_product_check_2-5-32a3e6eb858d37f58e225d07fb323254
diff --git a/sql/hive/src/test/resources/golden/create_union_table-2-b1feb4a197caf28d5223e72e10a91e78 b/sql/hive/src/test/resources/golden/cross_product_check_2-6-36a6b6fb71ea08ff817dd40d1ffdb970
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_union_table-2-b1feb4a197caf28d5223e72e10a91e78
rename to sql/hive/src/test/resources/golden/cross_product_check_2-6-36a6b6fb71ea08ff817dd40d1ffdb970
diff --git a/sql/hive/src/test/resources/golden/create_view-0-26e7fe8b9b9769a8d6a8a95b9cfbdf91 b/sql/hive/src/test/resources/golden/cross_product_check_2-7-103739f072b849d212dbc40919f92b74
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-0-26e7fe8b9b9769a8d6a8a95b9cfbdf91
rename to sql/hive/src/test/resources/golden/cross_product_check_2-7-103739f072b849d212dbc40919f92b74
diff --git a/sql/hive/src/test/resources/golden/create_view-1-c186ac1fe46117acb6fd452df15e0d92 b/sql/hive/src/test/resources/golden/cross_product_check_2-8-9a202e3bf15be2a310504a50920e7d25
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-1-c186ac1fe46117acb6fd452df15e0d92
rename to sql/hive/src/test/resources/golden/cross_product_check_2-8-9a202e3bf15be2a310504a50920e7d25
diff --git a/sql/hive/src/test/resources/golden/create_view-10-9f71514dffc747ddd49fbb1fafb6d3dd b/sql/hive/src/test/resources/golden/cross_product_check_2-9-e4e93f6e0dc63e1e324fb913a26fa8ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-10-9f71514dffc747ddd49fbb1fafb6d3dd
rename to sql/hive/src/test/resources/golden/cross_product_check_2-9-e4e93f6e0dc63e1e324fb913a26fa8ac
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-7-8620d7f55849ab163b6b85f51abdaaec b/sql/hive/src/test/resources/golden/ctas_varchar-7-8620d7f55849ab163b6b85f51abdaaec
deleted file mode 100644
index 6839c16243bcd..0000000000000
--- a/sql/hive/src/test/resources/golden/ctas_varchar-7-8620d7f55849ab163b6b85f51abdaaec
+++ /dev/null
@@ -1,5 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-2	val_2
-4	val_4
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-8-c93df93c0e8688f9c7a6167589670d32 b/sql/hive/src/test/resources/golden/ctas_varchar-8-c93df93c0e8688f9c7a6167589670d32
deleted file mode 100644
index 6839c16243bcd..0000000000000
--- a/sql/hive/src/test/resources/golden/ctas_varchar-8-c93df93c0e8688f9c7a6167589670d32
+++ /dev/null
@@ -1,5 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-2	val_2
-4	val_4
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/database.table table.attr case insensitive-0-98b2e34c9134208e9fe7c62d33010005
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_13_managed_location-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/database.table table.attr case insensitive-0-98b2e34c9134208e9fe7c62d33010005
diff --git a/sql/hive/src/test/resources/golden/database.table table.attr-0-6cbb13c3a48f53fa6550dbba4d2c28fd b/sql/hive/src/test/resources/golden/database.table table.attr-0-6cbb13c3a48f53fa6550dbba4d2c28fd
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database.table table.attr-0-6cbb13c3a48f53fa6550dbba4d2c28fd	
+++ b/sql/hive/src/test/resources/golden/database.table table.attr-0-6cbb13c3a48f53fa6550dbba4d2c28fd	
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/database.table-0-c657beb729b6a7882309a203fc6f298e b/sql/hive/src/test/resources/golden/database.table-0-c657beb729b6a7882309a203fc6f298e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database.table-0-c657beb729b6a7882309a203fc6f298e
+++ b/sql/hive/src/test/resources/golden/database.table-0-c657beb729b6a7882309a203fc6f298e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/create_view-11-2021c047203276dd2db51a56e672fcea b/sql/hive/src/test/resources/golden/database_drop-10-8db536f925bf0f5058f97897e145a661
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-11-2021c047203276dd2db51a56e672fcea
rename to sql/hive/src/test/resources/golden/database_drop-10-8db536f925bf0f5058f97897e145a661
diff --git a/sql/hive/src/test/resources/golden/create_view-12-420752b11848e29bce1c8ace7d3060fc b/sql/hive/src/test/resources/golden/database_drop-11-1b0a6cff3151cfa0ef0a6f78587973a5
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-12-420752b11848e29bce1c8ace7d3060fc
rename to sql/hive/src/test/resources/golden/database_drop-11-1b0a6cff3151cfa0ef0a6f78587973a5
diff --git a/sql/hive/src/test/resources/golden/database_drop-11-2ea883422b74b701711e14e61472ba06 b/sql/hive/src/test/resources/golden/database_drop-12-2ea883422b74b701711e14e61472ba06
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-11-2ea883422b74b701711e14e61472ba06
rename to sql/hive/src/test/resources/golden/database_drop-12-2ea883422b74b701711e14e61472ba06
diff --git a/sql/hive/src/test/resources/golden/database_drop-12-e02a53f7e798d2741152526516f14941 b/sql/hive/src/test/resources/golden/database_drop-13-e02a53f7e798d2741152526516f14941
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-12-e02a53f7e798d2741152526516f14941
rename to sql/hive/src/test/resources/golden/database_drop-13-e02a53f7e798d2741152526516f14941
diff --git a/sql/hive/src/test/resources/golden/create_view-13-bff53e5df8356ac16f7b9b78b157e60a b/sql/hive/src/test/resources/golden/database_drop-14-8f0fe60664d020b2a42c7f5c0c7bed35
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-13-bff53e5df8356ac16f7b9b78b157e60a
rename to sql/hive/src/test/resources/golden/database_drop-14-8f0fe60664d020b2a42c7f5c0c7bed35
diff --git a/sql/hive/src/test/resources/golden/create_view-14-69162f2f22978113fea529d7fc7b78d3 b/sql/hive/src/test/resources/golden/database_drop-15-7928ac876f76c990fa21d74b6c9e14f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-14-69162f2f22978113fea529d7fc7b78d3
rename to sql/hive/src/test/resources/golden/database_drop-15-7928ac876f76c990fa21d74b6c9e14f6
diff --git a/sql/hive/src/test/resources/golden/create_view-15-ceebf4cb0dc23f517a444266bc8d2447 b/sql/hive/src/test/resources/golden/database_drop-16-f9036ff3f0a3101373bdbc9a52faf00e
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-15-ceebf4cb0dc23f517a444266bc8d2447
rename to sql/hive/src/test/resources/golden/database_drop-16-f9036ff3f0a3101373bdbc9a52faf00e
diff --git a/sql/hive/src/test/resources/golden/create_view-16-cb434f5704ee3ed21d1f1521a2a654f4 b/sql/hive/src/test/resources/golden/database_drop-17-95b906fd73935da8746b5277170e91e8
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-16-cb434f5704ee3ed21d1f1521a2a654f4
rename to sql/hive/src/test/resources/golden/database_drop-17-95b906fd73935da8746b5277170e91e8
diff --git a/sql/hive/src/test/resources/golden/create_view-17-b3c1023d56f3439833c246e8bfd4502a b/sql/hive/src/test/resources/golden/database_drop-18-f65bf675b83871af7906741a60fa1318
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-17-b3c1023d56f3439833c246e8bfd4502a
rename to sql/hive/src/test/resources/golden/database_drop-18-f65bf675b83871af7906741a60fa1318
diff --git a/sql/hive/src/test/resources/golden/create_view-18-437d0a699b73c61044ebf8539feb14f6 b/sql/hive/src/test/resources/golden/database_drop-19-d7fefbf585dbb67491e871ef58dca752
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-18-437d0a699b73c61044ebf8539feb14f6
rename to sql/hive/src/test/resources/golden/database_drop-19-d7fefbf585dbb67491e871ef58dca752
diff --git a/sql/hive/src/test/resources/golden/create_view-2-d80dcd1271ab264292e9938f3162427c b/sql/hive/src/test/resources/golden/database_drop-20-cbb84f0db4d55930a85cff28f7400bd0
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-2-d80dcd1271ab264292e9938f3162427c
rename to sql/hive/src/test/resources/golden/database_drop-20-cbb84f0db4d55930a85cff28f7400bd0
diff --git a/sql/hive/src/test/resources/golden/create_view-20-56d203e4d0eb776bb4fa38409222b5b8 b/sql/hive/src/test/resources/golden/database_drop-21-eea49f066631be60519ae9d6b614d7d0
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-20-56d203e4d0eb776bb4fa38409222b5b8
rename to sql/hive/src/test/resources/golden/database_drop-21-eea49f066631be60519ae9d6b614d7d0
diff --git a/sql/hive/src/test/resources/golden/create_view-21-3609711e61b5b8d241d0e839557bfd64 b/sql/hive/src/test/resources/golden/database_drop-22-85833c3a68c29098827e438ff580bb94
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-21-3609711e61b5b8d241d0e839557bfd64
rename to sql/hive/src/test/resources/golden/database_drop-22-85833c3a68c29098827e438ff580bb94
diff --git a/sql/hive/src/test/resources/golden/create_view-22-3bc364c0ee46900d2201d706d2d58d67 b/sql/hive/src/test/resources/golden/database_drop-23-84a5672989118a1b5792474c1469de90
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-22-3bc364c0ee46900d2201d706d2d58d67
rename to sql/hive/src/test/resources/golden/database_drop-23-84a5672989118a1b5792474c1469de90
diff --git a/sql/hive/src/test/resources/golden/create_view-3-25ffe475d52d6c399acaf120dc02afe8 b/sql/hive/src/test/resources/golden/database_drop-24-441116797e8d95554c74472fa7644440
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-3-25ffe475d52d6c399acaf120dc02afe8
rename to sql/hive/src/test/resources/golden/database_drop-24-441116797e8d95554c74472fa7644440
diff --git a/sql/hive/src/test/resources/golden/database_drop-3-f21dd011aeb417043ed58c03fd5c3bf0 b/sql/hive/src/test/resources/golden/database_drop-25-847ca97dd211889d83e5d487bfc44e23
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-3-f21dd011aeb417043ed58c03fd5c3bf0
rename to sql/hive/src/test/resources/golden/database_drop-25-847ca97dd211889d83e5d487bfc44e23
diff --git a/sql/hive/src/test/resources/golden/database_drop-4-d419f4ff197d4291208c2028cd158909 b/sql/hive/src/test/resources/golden/database_drop-26-bea20178515df24fcca04c0384f1c1b7
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-4-d419f4ff197d4291208c2028cd158909
rename to sql/hive/src/test/resources/golden/database_drop-26-bea20178515df24fcca04c0384f1c1b7
diff --git a/sql/hive/src/test/resources/golden/create_view-4-87ed262d455e99ad45c909a2265a61b0 b/sql/hive/src/test/resources/golden/database_drop-27-cb4b90a7f63c1646319ee7bb014a7750
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-4-87ed262d455e99ad45c909a2265a61b0
rename to sql/hive/src/test/resources/golden/database_drop-27-cb4b90a7f63c1646319ee7bb014a7750
diff --git a/sql/hive/src/test/resources/golden/create_view-5-391caf27ff1589ec68d5f3bc4a27e711 b/sql/hive/src/test/resources/golden/database_drop-28-53d67cbed634cff012dac41340bf6630
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-5-391caf27ff1589ec68d5f3bc4a27e711
rename to sql/hive/src/test/resources/golden/database_drop-28-53d67cbed634cff012dac41340bf6630
diff --git a/sql/hive/src/test/resources/golden/create_view-6-d8d0e830783c383e3c00e9de3919c409 b/sql/hive/src/test/resources/golden/database_drop-29-29d3232325eda40cbb0bd1786b7d070e
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-6-d8d0e830783c383e3c00e9de3919c409
rename to sql/hive/src/test/resources/golden/database_drop-29-29d3232325eda40cbb0bd1786b7d070e
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/database_drop-3-db64b724719d27c7f0db4f51f5c4edaa
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_13_managed_location-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/database_drop-3-db64b724719d27c7f0db4f51f5c4edaa
diff --git a/sql/hive/src/test/resources/golden/create_view-7-50b35b8a1772becc96cff65bba1eaee7 b/sql/hive/src/test/resources/golden/database_drop-30-bbad0860316d8b9b1eed50d231f3ab5d
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-7-50b35b8a1772becc96cff65bba1eaee7
rename to sql/hive/src/test/resources/golden/database_drop-30-bbad0860316d8b9b1eed50d231f3ab5d
diff --git a/sql/hive/src/test/resources/golden/create_view-8-2ae18fc75eda9c3fe7d4e87829180805 b/sql/hive/src/test/resources/golden/database_drop-31-981f8e58caeae9cbbad3a113e3043de5
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-8-2ae18fc75eda9c3fe7d4e87829180805
rename to sql/hive/src/test/resources/golden/database_drop-31-981f8e58caeae9cbbad3a113e3043de5
diff --git a/sql/hive/src/test/resources/golden/create_view-9-ed7a1e8aeaed3beb95ac5aa3af216ab9 b/sql/hive/src/test/resources/golden/database_drop-32-6c8e7590238b5aca1772721f0b914ece
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-9-ed7a1e8aeaed3beb95ac5aa3af216ab9
rename to sql/hive/src/test/resources/golden/database_drop-32-6c8e7590238b5aca1772721f0b914ece
diff --git a/sql/hive/src/test/resources/golden/database_drop-5-b7cf74929eabe781b0db79ed1043dc24 b/sql/hive/src/test/resources/golden/database_drop-33-2bc7864932f597bdf98bdc410b1a2d9c
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-5-b7cf74929eabe781b0db79ed1043dc24
rename to sql/hive/src/test/resources/golden/database_drop-33-2bc7864932f597bdf98bdc410b1a2d9c
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-4-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/database_drop-34-491138bed44a70cb783bb2b531e1d82
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-4-75eed21390055f8e397c81ab9d253a32
rename to sql/hive/src/test/resources/golden/database_drop-34-491138bed44a70cb783bb2b531e1d82
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-5-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/database_drop-35-9e0285d0596607cdadf75a763a543866
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-5-2a161bb8d69da9d6e3679ca3677a0de5
rename to sql/hive/src/test/resources/golden/database_drop-35-9e0285d0596607cdadf75a763a543866
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-0-d98274f9b34c8968292ccd6c959491dc b/sql/hive/src/test/resources/golden/database_drop-36-e66471f3f1bbe2d4b3b214e47793656d
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view_partitioned-0-d98274f9b34c8968292ccd6c959491dc
rename to sql/hive/src/test/resources/golden/database_drop-36-e66471f3f1bbe2d4b3b214e47793656d
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-1-ff29c88fac9c21eaf36469a4ce0fce18 b/sql/hive/src/test/resources/golden/database_drop-37-f6410721e3125a89836817136306eac4
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view_partitioned-1-ff29c88fac9c21eaf36469a4ce0fce18
rename to sql/hive/src/test/resources/golden/database_drop-37-f6410721e3125a89836817136306eac4
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-2-45e7b89caadba56ec67638c341209f96 b/sql/hive/src/test/resources/golden/database_drop-38-7d45d97adebe50917a94cbe232c112a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view_partitioned-2-45e7b89caadba56ec67638c341209f96
rename to sql/hive/src/test/resources/golden/database_drop-38-7d45d97adebe50917a94cbe232c112a8
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-3-cf44ff130f66de720a77888260ef8d16 b/sql/hive/src/test/resources/golden/database_drop-39-91b4a660ae5d5d2966d6bf3b6ae751d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view_partitioned-3-cf44ff130f66de720a77888260ef8d16
rename to sql/hive/src/test/resources/golden/database_drop-39-91b4a660ae5d5d2966d6bf3b6ae751d1
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/database_drop-4-f21dd011aeb417043ed58c03fd5c3bf0
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-5-75eed21390055f8e397c81ab9d253a32
rename to sql/hive/src/test/resources/golden/database_drop-4-f21dd011aeb417043ed58c03fd5c3bf0
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-0-311fdd725609cd47ea1b859f706da41e b/sql/hive/src/test/resources/golden/database_drop-40-10073fb74a5c792322fc52389997695b
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-0-311fdd725609cd47ea1b859f706da41e
rename to sql/hive/src/test/resources/golden/database_drop-40-10073fb74a5c792322fc52389997695b
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-1-939814768fe997b27f01758d60fcd8f5 b/sql/hive/src/test/resources/golden/database_drop-41-7164c585e1ef4d9036ed4db275811084
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-1-939814768fe997b27f01758d60fcd8f5
rename to sql/hive/src/test/resources/golden/database_drop-41-7164c585e1ef4d9036ed4db275811084
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-2-3223504c97628a44b65736565c1dda32 b/sql/hive/src/test/resources/golden/database_drop-42-c55cffbfc4d950363be97ccdb028faf3
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-2-3223504c97628a44b65736565c1dda32
rename to sql/hive/src/test/resources/golden/database_drop-42-c55cffbfc4d950363be97ccdb028faf3
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/database_drop-43-cc6860c125b8b62450cb858c72716dc2
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-6-2a161bb8d69da9d6e3679ca3677a0de5
rename to sql/hive/src/test/resources/golden/database_drop-43-cc6860c125b8b62450cb858c72716dc2
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/database_drop-44-de81fd80132350aedcd9f0d9a212fd94
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/database_drop-44-de81fd80132350aedcd9f0d9a212fd94
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-3-5d14fdc559b9790d81a8020bdcf4159b b/sql/hive/src/test/resources/golden/database_drop-45-64e22634663e57153119340583e93651
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-3-5d14fdc559b9790d81a8020bdcf4159b
rename to sql/hive/src/test/resources/golden/database_drop-45-64e22634663e57153119340583e93651
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-4-ccead78e4ec4583da3b48864e78cfd44 b/sql/hive/src/test/resources/golden/database_drop-46-eb28b907b605e51026f9902287e1d90d
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-4-ccead78e4ec4583da3b48864e78cfd44
rename to sql/hive/src/test/resources/golden/database_drop-46-eb28b907b605e51026f9902287e1d90d
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/database_drop-47-44e4d2bdab2dfa4583da47281ed00ba3
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/database_drop-47-44e4d2bdab2dfa4583da47281ed00ba3
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/database_drop-48-d419f4ff197d4291208c2028cd158909
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/database_drop-48-d419f4ff197d4291208c2028cd158909
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-4-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/database_drop-5-d419f4ff197d4291208c2028cd158909
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-4-677ddd4b14eb6f19cfcf0c3d57f54e22
rename to sql/hive/src/test/resources/golden/database_drop-5-d419f4ff197d4291208c2028cd158909
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-5-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/database_drop-6-b7cf74929eabe781b0db79ed1043dc24
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-5-a9f93b4185a714e4f6d14171d10a6c07
rename to sql/hive/src/test/resources/golden/database_drop-6-b7cf74929eabe781b0db79ed1043dc24
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-5-ff9d92788c0a7d6d4fca8a5bf1095e7f b/sql/hive/src/test/resources/golden/database_drop-7-a47b1b070ec7c3b9ccabc34f41aebad9
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-5-ff9d92788c0a7d6d4fca8a5bf1095e7f
rename to sql/hive/src/test/resources/golden/database_drop-7-a47b1b070ec7c3b9ccabc34f41aebad9
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-6-351fa7f09c2e809f9cc87d83e11c1539 b/sql/hive/src/test/resources/golden/database_drop-8-b3980119a4b2d8f531951a940cba3697
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-6-351fa7f09c2e809f9cc87d83e11c1539
rename to sql/hive/src/test/resources/golden/database_drop-8-b3980119a4b2d8f531951a940cba3697
diff --git a/sql/hive/src/test/resources/golden/database_drop-10-1b0a6cff3151cfa0ef0a6f78587973a5 b/sql/hive/src/test/resources/golden/database_drop-9-97101266791d2b2c662bcde549422318
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-10-1b0a6cff3151cfa0ef0a6f78587973a5
rename to sql/hive/src/test/resources/golden/database_drop-9-97101266791d2b2c662bcde549422318
diff --git a/sql/hive/src/test/resources/golden/database_location-1-2b66b4554cf8ecf2ab6c325d4d89491 b/sql/hive/src/test/resources/golden/database_location-1-2b66b4554cf8ecf2ab6c325d4d89491
index f05305df0c3d3..a97bfdfeef098 100644
--- a/sql/hive/src/test/resources/golden/database_location-1-2b66b4554cf8ecf2ab6c325d4d89491
+++ b/sql/hive/src/test/resources/golden/database_location-1-2b66b4554cf8ecf2ab6c325d4d89491
@@ -1 +1 @@
-db1		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db	
\ No newline at end of file
+db1		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db1.db	marmbrus
diff --git a/sql/hive/src/test/resources/golden/database_location-10-c5cd9c57a13da7f345563fbd75da4e45 b/sql/hive/src/test/resources/golden/database_location-10-c5cd9c57a13da7f345563fbd75da4e45
index e92c241d50092..adf00309e0fae 100644
--- a/sql/hive/src/test/resources/golden/database_location-10-c5cd9c57a13da7f345563fbd75da4e45
+++ b/sql/hive/src/test/resources/golden/database_location-10-c5cd9c57a13da7f345563fbd75da4e45
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-name                	string              	None                
-value               	int                 	None                
+name                	string              	                    
+value               	int                 	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	db2                 	 
 Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 11:40:51 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:24 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db2/table_db2	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db2/table_db2	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1389728451          
+	transient_lastDdlTime	1413879924          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +24,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/database_location-11-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/database_location-11-9c36cac1372650b703400c60dd29042c
index 7c77c06cda82b..2e4bf94808285 100644
--- a/sql/hive/src/test/resources/golden/database_location-11-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/database_location-11-9c36cac1372650b703400c60dd29042c
@@ -1 +1 @@
-table_db2
\ No newline at end of file
+table_db2
diff --git a/sql/hive/src/test/resources/golden/database_location-4-be65cf649363681d54e593c42a5ecffb b/sql/hive/src/test/resources/golden/database_location-4-be65cf649363681d54e593c42a5ecffb
index cda1736e9ca6b..e7c766a71c1d2 100644
--- a/sql/hive/src/test/resources/golden/database_location-4-be65cf649363681d54e593c42a5ecffb
+++ b/sql/hive/src/test/resources/golden/database_location-4-be65cf649363681d54e593c42a5ecffb
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-name                	string              	None                
-value               	int                 	None                
+name                	string              	                    
+value               	int                 	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	db1                 	 
 Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 11:40:50 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:24 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/table_db1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db1.db/table_db1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1389728450          
+	transient_lastDdlTime	1413879924          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +24,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/database_location-5-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/database_location-5-9c36cac1372650b703400c60dd29042c
index 4d8fc6a6d6fe0..6eabc06bb1362 100644
--- a/sql/hive/src/test/resources/golden/database_location-5-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/database_location-5-9c36cac1372650b703400c60dd29042c
@@ -1 +1 @@
-table_db1
\ No newline at end of file
+table_db1
diff --git a/sql/hive/src/test/resources/golden/database_location-7-5698ac10441da07dbe3a947143c999c2 b/sql/hive/src/test/resources/golden/database_location-7-5698ac10441da07dbe3a947143c999c2
index 3e23970adddcf..65f9d79a3c381 100644
--- a/sql/hive/src/test/resources/golden/database_location-7-5698ac10441da07dbe3a947143c999c2
+++ b/sql/hive/src/test/resources/golden/database_location-7-5698ac10441da07dbe3a947143c999c2
@@ -1 +1 @@
-db2	database 2	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db2	
\ No newline at end of file
+db2	database 2	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db2	marmbrus
diff --git a/sql/hive/src/test/resources/golden/database_properties-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/database_properties-0-e7bfbd9422685e9a3a6c9bd4965f828f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database_properties-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ b/sql/hive/src/test/resources/golden/database_properties-0-e7bfbd9422685e9a3a6c9bd4965f828f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/database_properties-1-10de6a198e2b3f61974519ddd8623e68 b/sql/hive/src/test/resources/golden/database_properties-1-10de6a198e2b3f61974519ddd8623e68
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database_properties-1-10de6a198e2b3f61974519ddd8623e68
+++ b/sql/hive/src/test/resources/golden/database_properties-1-10de6a198e2b3f61974519ddd8623e68
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/database_properties-10-26c10ff2ec4a69b16589069ced427d23 b/sql/hive/src/test/resources/golden/database_properties-10-26c10ff2ec4a69b16589069ced427d23
index 2f5fbe26f4945..4c04259aed3a7 100644
--- a/sql/hive/src/test/resources/golden/database_properties-10-26c10ff2ec4a69b16589069ced427d23
+++ b/sql/hive/src/test/resources/golden/database_properties-10-26c10ff2ec4a69b16589069ced427d23
@@ -1 +1 @@
-db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db2.db	{mapred.jobtracker.url=http://my.jobtracker.com:53000, new.property=some new props, mapred.scratch.dir=hdfs://tmp.dfs.com:50029/tmp, hive.warehouse.dir=new/warehouse/dir}
\ No newline at end of file
+db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db2.db	marmbrus
diff --git a/sql/hive/src/test/resources/golden/database_properties-3-751417d45b8e80ee5cba2034458b5bc9 b/sql/hive/src/test/resources/golden/database_properties-3-751417d45b8e80ee5cba2034458b5bc9
index 86362ead004d9..3cd776a0711ff 100644
--- a/sql/hive/src/test/resources/golden/database_properties-3-751417d45b8e80ee5cba2034458b5bc9
+++ b/sql/hive/src/test/resources/golden/database_properties-3-751417d45b8e80ee5cba2034458b5bc9
@@ -1,2 +1,2 @@
 db1
-default
\ No newline at end of file
+default
diff --git a/sql/hive/src/test/resources/golden/database_properties-5-51c0974df1125b233936f25ce709ba4a b/sql/hive/src/test/resources/golden/database_properties-5-51c0974df1125b233936f25ce709ba4a
index ff89c3fe899d2..4c04259aed3a7 100644
--- a/sql/hive/src/test/resources/golden/database_properties-5-51c0974df1125b233936f25ce709ba4a
+++ b/sql/hive/src/test/resources/golden/database_properties-5-51c0974df1125b233936f25ce709ba4a
@@ -1 +1 @@
-db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db2.db	
\ No newline at end of file
+db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db2.db	marmbrus
diff --git a/sql/hive/src/test/resources/golden/database_properties-6-26c10ff2ec4a69b16589069ced427d23 b/sql/hive/src/test/resources/golden/database_properties-6-26c10ff2ec4a69b16589069ced427d23
index 5827d2726d084..4c04259aed3a7 100644
--- a/sql/hive/src/test/resources/golden/database_properties-6-26c10ff2ec4a69b16589069ced427d23
+++ b/sql/hive/src/test/resources/golden/database_properties-6-26c10ff2ec4a69b16589069ced427d23
@@ -1 +1 @@
-db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db2.db	{mapred.jobtracker.url=http://my.jobtracker.com:53000, mapred.scratch.dir=hdfs://tmp.dfs.com:50029/tmp, hive.warehouse.dir=/user/hive/warehouse}
\ No newline at end of file
+db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db2.db	marmbrus
diff --git a/sql/hive/src/test/resources/golden/database_properties-7-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/database_properties-7-e7bfbd9422685e9a3a6c9bd4965f828f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database_properties-7-e7bfbd9422685e9a3a6c9bd4965f828f
+++ b/sql/hive/src/test/resources/golden/database_properties-7-e7bfbd9422685e9a3a6c9bd4965f828f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/database_properties-8-10de6a198e2b3f61974519ddd8623e68 b/sql/hive/src/test/resources/golden/database_properties-8-10de6a198e2b3f61974519ddd8623e68
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database_properties-8-10de6a198e2b3f61974519ddd8623e68
+++ b/sql/hive/src/test/resources/golden/database_properties-8-10de6a198e2b3f61974519ddd8623e68
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/database_drop-6-a47b1b070ec7c3b9ccabc34f41aebad9 b/sql/hive/src/test/resources/golden/date_1-0-23edf29bf7376c70d5ecf12720f4b1eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-6-a47b1b070ec7c3b9ccabc34f41aebad9
rename to sql/hive/src/test/resources/golden/date_1-0-23edf29bf7376c70d5ecf12720f4b1eb
diff --git a/sql/hive/src/test/resources/golden/database_drop-7-693736836ccc99f6e2449b94efcfeeba b/sql/hive/src/test/resources/golden/date_1-1-4ebe3571c13a8b0c03096fbd972b7f1b
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-7-693736836ccc99f6e2449b94efcfeeba
rename to sql/hive/src/test/resources/golden/date_1-1-4ebe3571c13a8b0c03096fbd972b7f1b
diff --git a/sql/hive/src/test/resources/golden/date_1-10-d964bec7e5632091ab5cb6f6786dbbf9 b/sql/hive/src/test/resources/golden/date_1-10-d964bec7e5632091ab5cb6f6786dbbf9
new file mode 100644
index 0000000000000..8fb5edae63c6f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_1-10-d964bec7e5632091ab5cb6f6786dbbf9
@@ -0,0 +1 @@
+2011-01-01	1
diff --git a/sql/hive/src/test/resources/golden/date_1-11-480c5f024a28232b7857be327c992509 b/sql/hive/src/test/resources/golden/date_1-11-480c5f024a28232b7857be327c992509
new file mode 100644
index 0000000000000..5a368ab170261
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_1-11-480c5f024a28232b7857be327c992509
@@ -0,0 +1 @@
+2012-01-01	2011-01-01	2011-01-01 00:00:00	2011-01-01	2011-01-01
diff --git a/sql/hive/src/test/resources/golden/date_1-12-4c0ed7fcb75770d8790575b586bf14f4 b/sql/hive/src/test/resources/golden/date_1-12-4c0ed7fcb75770d8790575b586bf14f4
new file mode 100644
index 0000000000000..edb4b1f84001b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_1-12-4c0ed7fcb75770d8790575b586bf14f4
@@ -0,0 +1 @@
+NULL	NULL	NULL	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/date_1-13-44fc74c1993062c0a9522199ff27fea b/sql/hive/src/test/resources/golden/date_1-13-44fc74c1993062c0a9522199ff27fea
new file mode 100644
index 0000000000000..2af0b9ed3a68c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_1-13-44fc74c1993062c0a9522199ff27fea
@@ -0,0 +1 @@
+true	true	true	true	true	true	true	true	true	true
diff --git a/sql/hive/src/test/resources/golden/date_1-14-4855a66124b16d1d0d003235995ac06b b/sql/hive/src/test/resources/golden/date_1-14-4855a66124b16d1d0d003235995ac06b
new file mode 100644
index 0000000000000..d8dfbf60007bd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_1-14-4855a66124b16d1d0d003235995ac06b
@@ -0,0 +1 @@
+2001-01-28	2001-02-28	2001-03-28	2001-04-28	2001-05-28	2001-06-28	2001-07-28	2001-08-28	2001-09-28	2001-10-28	2001-11-28	2001-12-28
diff --git a/sql/hive/src/test/resources/golden/date_1-15-8bc190dba0f641840b5e1e198a14c55b b/sql/hive/src/test/resources/golden/date_1-15-8bc190dba0f641840b5e1e198a14c55b
new file mode 100644
index 0000000000000..4f6a1bc4273e0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_1-15-8bc190dba0f641840b5e1e198a14c55b
@@ -0,0 +1 @@
+true	true	true	true	true	true	true	true	true	true	true	true
diff --git a/sql/hive/src/test/resources/golden/database_drop-8-97101266791d2b2c662bcde549422318 b/sql/hive/src/test/resources/golden/date_1-16-23edf29bf7376c70d5ecf12720f4b1eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-8-97101266791d2b2c662bcde549422318
rename to sql/hive/src/test/resources/golden/date_1-16-23edf29bf7376c70d5ecf12720f4b1eb
diff --git a/sql/hive/src/test/resources/golden/database_drop-9-8db536f925bf0f5058f97897e145a661 b/sql/hive/src/test/resources/golden/date_1-2-abdce0c0d14d3fc7441b7c134b02f99a
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-9-8db536f925bf0f5058f97897e145a661
rename to sql/hive/src/test/resources/golden/date_1-2-abdce0c0d14d3fc7441b7c134b02f99a
diff --git a/sql/hive/src/test/resources/golden/date_1-3-df16364a220ff96a6ea1cd478cbc1d0b b/sql/hive/src/test/resources/golden/date_1-3-df16364a220ff96a6ea1cd478cbc1d0b
new file mode 100644
index 0000000000000..963bc42fdee07
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_1-3-df16364a220ff96a6ea1cd478cbc1d0b
@@ -0,0 +1 @@
+2011-01-01
diff --git a/sql/hive/src/test/resources/golden/date_1-4-d964bec7e5632091ab5cb6f6786dbbf9 b/sql/hive/src/test/resources/golden/date_1-4-d964bec7e5632091ab5cb6f6786dbbf9
new file mode 100644
index 0000000000000..8fb5edae63c6f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_1-4-d964bec7e5632091ab5cb6f6786dbbf9
@@ -0,0 +1 @@
+2011-01-01	1
diff --git a/sql/hive/src/test/resources/golden/date_2-2-cab14d992c53c106ab257fae52001e04 b/sql/hive/src/test/resources/golden/date_1-5-5e70fc74158fbfca38134174360de12d
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_2-2-cab14d992c53c106ab257fae52001e04
rename to sql/hive/src/test/resources/golden/date_1-5-5e70fc74158fbfca38134174360de12d
diff --git a/sql/hive/src/test/resources/golden/date_1-6-df16364a220ff96a6ea1cd478cbc1d0b b/sql/hive/src/test/resources/golden/date_1-6-df16364a220ff96a6ea1cd478cbc1d0b
new file mode 100644
index 0000000000000..963bc42fdee07
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_1-6-df16364a220ff96a6ea1cd478cbc1d0b
@@ -0,0 +1 @@
+2011-01-01
diff --git a/sql/hive/src/test/resources/golden/date_1-7-d964bec7e5632091ab5cb6f6786dbbf9 b/sql/hive/src/test/resources/golden/date_1-7-d964bec7e5632091ab5cb6f6786dbbf9
new file mode 100644
index 0000000000000..8fb5edae63c6f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_1-7-d964bec7e5632091ab5cb6f6786dbbf9
@@ -0,0 +1 @@
+2011-01-01	1
diff --git a/sql/hive/src/test/resources/golden/date_join1-2-894b6541812ac8b0abe2a24c966817d8 b/sql/hive/src/test/resources/golden/date_1-8-1d5c58095cd52ea539d869f2ab1ab67d
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_join1-2-894b6541812ac8b0abe2a24c966817d8
rename to sql/hive/src/test/resources/golden/date_1-8-1d5c58095cd52ea539d869f2ab1ab67d
diff --git a/sql/hive/src/test/resources/golden/date_1-9-df16364a220ff96a6ea1cd478cbc1d0b b/sql/hive/src/test/resources/golden/date_1-9-df16364a220ff96a6ea1cd478cbc1d0b
new file mode 100644
index 0000000000000..963bc42fdee07
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_1-9-df16364a220ff96a6ea1cd478cbc1d0b
@@ -0,0 +1 @@
+2011-01-01
diff --git a/sql/hive/src/test/resources/golden/date_serde-7-a34279d8ebbadb78e925e8ed9c78947d b/sql/hive/src/test/resources/golden/date_2-2-efdf7f5d9f15edcb59a30f8ea166fbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_serde-7-a34279d8ebbadb78e925e8ed9c78947d
rename to sql/hive/src/test/resources/golden/date_2-2-efdf7f5d9f15edcb59a30f8ea166fbf1
diff --git a/sql/hive/src/test/resources/golden/date_2-3-eedb73e0a622c2ab760b524f395dd4ba b/sql/hive/src/test/resources/golden/date_2-3-eedb73e0a622c2ab760b524f395dd4ba
new file mode 100644
index 0000000000000..db973ab292d5b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_2-3-eedb73e0a622c2ab760b524f395dd4ba
@@ -0,0 +1,137 @@
+2010-10-20	7291
+2010-10-20	3198
+2010-10-20	3014
+2010-10-20	2630
+2010-10-20	1610
+2010-10-20	1599
+2010-10-20	1531
+2010-10-20	1142
+2010-10-20	1064
+2010-10-20	897
+2010-10-20	361
+2010-10-21	7291
+2010-10-21	3198
+2010-10-21	3014
+2010-10-21	2646
+2010-10-21	2630
+2010-10-21	1610
+2010-10-21	1599
+2010-10-21	1531
+2010-10-21	1142
+2010-10-21	1064
+2010-10-21	897
+2010-10-21	361
+2010-10-22	3198
+2010-10-22	3014
+2010-10-22	2646
+2010-10-22	2630
+2010-10-22	1610
+2010-10-22	1599
+2010-10-22	1531
+2010-10-22	1142
+2010-10-22	1064
+2010-10-22	897
+2010-10-22	361
+2010-10-23	7274
+2010-10-23	5917
+2010-10-23	5904
+2010-10-23	5832
+2010-10-23	3171
+2010-10-23	3085
+2010-10-23	2932
+2010-10-23	1805
+2010-10-23	650
+2010-10-23	426
+2010-10-23	384
+2010-10-23	272
+2010-10-24	7282
+2010-10-24	3198
+2010-10-24	3014
+2010-10-24	2646
+2010-10-24	2630
+2010-10-24	2571
+2010-10-24	2254
+2010-10-24	1610
+2010-10-24	1599
+2010-10-24	1531
+2010-10-24	897
+2010-10-24	361
+2010-10-25	7291
+2010-10-25	3198
+2010-10-25	3014
+2010-10-25	2646
+2010-10-25	2630
+2010-10-25	1610
+2010-10-25	1599
+2010-10-25	1531
+2010-10-25	1142
+2010-10-25	1064
+2010-10-25	897
+2010-10-25	361
+2010-10-26	7291
+2010-10-26	3198
+2010-10-26	3014
+2010-10-26	2662
+2010-10-26	2646
+2010-10-26	2630
+2010-10-26	1610
+2010-10-26	1599
+2010-10-26	1531
+2010-10-26	1142
+2010-10-26	1064
+2010-10-26	897
+2010-10-26	361
+2010-10-27	7291
+2010-10-27	3198
+2010-10-27	3014
+2010-10-27	2630
+2010-10-27	1610
+2010-10-27	1599
+2010-10-27	1531
+2010-10-27	1142
+2010-10-27	1064
+2010-10-27	897
+2010-10-27	361
+2010-10-28	7291
+2010-10-28	3198
+2010-10-28	3014
+2010-10-28	2646
+2010-10-28	2630
+2010-10-28	1610
+2010-10-28	1599
+2010-10-28	1531
+2010-10-28	1142
+2010-10-28	1064
+2010-10-28	897
+2010-10-28	361
+2010-10-29	7291
+2010-10-29	3198
+2010-10-29	3014
+2010-10-29	2646
+2010-10-29	2630
+2010-10-29	1610
+2010-10-29	1599
+2010-10-29	1531
+2010-10-29	1142
+2010-10-29	1064
+2010-10-29	897
+2010-10-29	361
+2010-10-30	5917
+2010-10-30	5904
+2010-10-30	3171
+2010-10-30	3085
+2010-10-30	2932
+2010-10-30	2018
+2010-10-30	1805
+2010-10-30	650
+2010-10-30	426
+2010-10-30	384
+2010-10-30	272
+2010-10-31	7282
+2010-10-31	3198
+2010-10-31	2571
+2010-10-31	1610
+2010-10-31	1599
+2010-10-31	1531
+2010-10-31	897
+2010-10-31	361
diff --git a/sql/hive/src/test/resources/golden/date_2-4-3618dfde8da7c26f03bca72970db9ef7 b/sql/hive/src/test/resources/golden/date_2-4-3618dfde8da7c26f03bca72970db9ef7
new file mode 100644
index 0000000000000..1b0ea7b9eec84
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_2-4-3618dfde8da7c26f03bca72970db9ef7
@@ -0,0 +1,137 @@
+2010-10-31	361
+2010-10-31	897
+2010-10-31	1531
+2010-10-31	1599
+2010-10-31	1610
+2010-10-31	2571
+2010-10-31	3198
+2010-10-31	7282
+2010-10-30	272
+2010-10-30	384
+2010-10-30	426
+2010-10-30	650
+2010-10-30	1805
+2010-10-30	2018
+2010-10-30	2932
+2010-10-30	3085
+2010-10-30	3171
+2010-10-30	5904
+2010-10-30	5917
+2010-10-29	361
+2010-10-29	897
+2010-10-29	1064
+2010-10-29	1142
+2010-10-29	1531
+2010-10-29	1599
+2010-10-29	1610
+2010-10-29	2630
+2010-10-29	2646
+2010-10-29	3014
+2010-10-29	3198
+2010-10-29	7291
+2010-10-28	361
+2010-10-28	897
+2010-10-28	1064
+2010-10-28	1142
+2010-10-28	1531
+2010-10-28	1599
+2010-10-28	1610
+2010-10-28	2630
+2010-10-28	2646
+2010-10-28	3014
+2010-10-28	3198
+2010-10-28	7291
+2010-10-27	361
+2010-10-27	897
+2010-10-27	1064
+2010-10-27	1142
+2010-10-27	1531
+2010-10-27	1599
+2010-10-27	1610
+2010-10-27	2630
+2010-10-27	3014
+2010-10-27	3198
+2010-10-27	7291
+2010-10-26	361
+2010-10-26	897
+2010-10-26	1064
+2010-10-26	1142
+2010-10-26	1531
+2010-10-26	1599
+2010-10-26	1610
+2010-10-26	2630
+2010-10-26	2646
+2010-10-26	2662
+2010-10-26	3014
+2010-10-26	3198
+2010-10-26	7291
+2010-10-25	361
+2010-10-25	897
+2010-10-25	1064
+2010-10-25	1142
+2010-10-25	1531
+2010-10-25	1599
+2010-10-25	1610
+2010-10-25	2630
+2010-10-25	2646
+2010-10-25	3014
+2010-10-25	3198
+2010-10-25	7291
+2010-10-24	361
+2010-10-24	897
+2010-10-24	1531
+2010-10-24	1599
+2010-10-24	1610
+2010-10-24	2254
+2010-10-24	2571
+2010-10-24	2630
+2010-10-24	2646
+2010-10-24	3014
+2010-10-24	3198
+2010-10-24	7282
+2010-10-23	272
+2010-10-23	384
+2010-10-23	426
+2010-10-23	650
+2010-10-23	1805
+2010-10-23	2932
+2010-10-23	3085
+2010-10-23	3171
+2010-10-23	5832
+2010-10-23	5904
+2010-10-23	5917
+2010-10-23	7274
+2010-10-22	361
+2010-10-22	897
+2010-10-22	1064
+2010-10-22	1142
+2010-10-22	1531
+2010-10-22	1599
+2010-10-22	1610
+2010-10-22	2630
+2010-10-22	2646
+2010-10-22	3014
+2010-10-22	3198
+2010-10-21	361
+2010-10-21	897
+2010-10-21	1064
+2010-10-21	1142
+2010-10-21	1531
+2010-10-21	1599
+2010-10-21	1610
+2010-10-21	2630
+2010-10-21	2646
+2010-10-21	3014
+2010-10-21	3198
+2010-10-21	7291
+2010-10-20	361
+2010-10-20	897
+2010-10-20	1064
+2010-10-20	1142
+2010-10-20	1531
+2010-10-20	1599
+2010-10-20	1610
+2010-10-20	2630
+2010-10-20	3014
+2010-10-20	3198
+2010-10-20	7291
diff --git a/sql/hive/src/test/resources/golden/date_2-5-fe9bebfc8994ddd8d7cd0208c1f0af3c b/sql/hive/src/test/resources/golden/date_2-5-fe9bebfc8994ddd8d7cd0208c1f0af3c
new file mode 100644
index 0000000000000..0f2a6f7a99237
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_2-5-fe9bebfc8994ddd8d7cd0208c1f0af3c
@@ -0,0 +1,12 @@
+2010-10-20	11
+2010-10-21	12
+2010-10-22	11
+2010-10-23	12
+2010-10-24	12
+2010-10-25	12
+2010-10-26	13
+2010-10-27	11
+2010-10-28	12
+2010-10-29	12
+2010-10-30	11
+2010-10-31	8
diff --git a/sql/hive/src/test/resources/golden/ddltime-0-d81aa70a19a0e7428e8f7a76b60d3fc3 b/sql/hive/src/test/resources/golden/date_2-6-f4edce7cb20f325e8b69e787b2ae8882
similarity index 100%
rename from sql/hive/src/test/resources/golden/ddltime-0-d81aa70a19a0e7428e8f7a76b60d3fc3
rename to sql/hive/src/test/resources/golden/date_2-6-f4edce7cb20f325e8b69e787b2ae8882
diff --git a/sql/hive/src/test/resources/golden/decimal_1-0-31ecaab3afa056fcc656d6e54f845cf4 b/sql/hive/src/test/resources/golden/date_3-3-4cf49e71b636df754871a675f9e4e24
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-0-31ecaab3afa056fcc656d6e54f845cf4
rename to sql/hive/src/test/resources/golden/date_3-3-4cf49e71b636df754871a675f9e4e24
diff --git a/sql/hive/src/test/resources/golden/decimal_1-1-f2e4dab0c9a4d9b3128aca89a424accd b/sql/hive/src/test/resources/golden/date_3-3-c26f0641e7cec1093273b258e6bf7120
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-1-f2e4dab0c9a4d9b3128aca89a424accd
rename to sql/hive/src/test/resources/golden/date_3-3-c26f0641e7cec1093273b258e6bf7120
diff --git a/sql/hive/src/test/resources/golden/date_3-4-e009f358964f6d1236cfc03283e2b06f b/sql/hive/src/test/resources/golden/date_3-4-e009f358964f6d1236cfc03283e2b06f
new file mode 100644
index 0000000000000..66d2220d06de2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_3-4-e009f358964f6d1236cfc03283e2b06f
@@ -0,0 +1 @@
+1	2011-01-01
diff --git a/sql/hive/src/test/resources/golden/decimal_1-13-31ecaab3afa056fcc656d6e54f845cf4 b/sql/hive/src/test/resources/golden/date_3-5-c26de4559926ddb0127d2dc5ea154774
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-13-31ecaab3afa056fcc656d6e54f845cf4
rename to sql/hive/src/test/resources/golden/date_3-5-c26de4559926ddb0127d2dc5ea154774
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/date_4-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/date_4-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/decimal_1-2-80fc87cab17ceffea334afbb230a6653 b/sql/hive/src/test/resources/golden/date_4-0-b84f7e931d710dcbe3c5126d998285a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-2-80fc87cab17ceffea334afbb230a6653
rename to sql/hive/src/test/resources/golden/date_4-0-b84f7e931d710dcbe3c5126d998285a8
diff --git a/sql/hive/src/test/resources/golden/decimal_1-3-2a3647b0a148236c45d8e3e9891c8ad5 b/sql/hive/src/test/resources/golden/date_4-1-6272f5e518f6a20bc96a5870ff315c4f
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-3-2a3647b0a148236c45d8e3e9891c8ad5
rename to sql/hive/src/test/resources/golden/date_4-1-6272f5e518f6a20bc96a5870ff315c4f
diff --git a/sql/hive/src/test/resources/golden/decimal_2-0-3c8ecb1468952afe028596c65d587bee b/sql/hive/src/test/resources/golden/date_4-1-b84f7e931d710dcbe3c5126d998285a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-0-3c8ecb1468952afe028596c65d587bee
rename to sql/hive/src/test/resources/golden/date_4-1-b84f7e931d710dcbe3c5126d998285a8
diff --git a/sql/hive/src/test/resources/golden/decimal_2-1-868e124edc1581325bd0fd10235a126b b/sql/hive/src/test/resources/golden/date_4-2-4a0e7bde447ef616b98e0f55d2886de0
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-1-868e124edc1581325bd0fd10235a126b
rename to sql/hive/src/test/resources/golden/date_4-2-4a0e7bde447ef616b98e0f55d2886de0
diff --git a/sql/hive/src/test/resources/golden/decimal_2-12-d63b5ea25e27852413132db4d9bfb035 b/sql/hive/src/test/resources/golden/date_4-2-6272f5e518f6a20bc96a5870ff315c4f
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-12-d63b5ea25e27852413132db4d9bfb035
rename to sql/hive/src/test/resources/golden/date_4-2-6272f5e518f6a20bc96a5870ff315c4f
diff --git a/sql/hive/src/test/resources/golden/decimal_2-2-6cc742523b3574e59ca21dad30f2d506 b/sql/hive/src/test/resources/golden/date_4-3-4a0e7bde447ef616b98e0f55d2886de0
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-2-6cc742523b3574e59ca21dad30f2d506
rename to sql/hive/src/test/resources/golden/date_4-3-4a0e7bde447ef616b98e0f55d2886de0
diff --git a/sql/hive/src/test/resources/golden/decimal_2-3-17702ba7ecd04caad0158e2cd1f6f26e b/sql/hive/src/test/resources/golden/date_4-3-a23faa56b5d3ca9063a21f72b4278b00
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-3-17702ba7ecd04caad0158e2cd1f6f26e
rename to sql/hive/src/test/resources/golden/date_4-3-a23faa56b5d3ca9063a21f72b4278b00
diff --git a/sql/hive/src/test/resources/golden/decimal_2-31-3c8ecb1468952afe028596c65d587bee b/sql/hive/src/test/resources/golden/date_4-4-6c4c2941bae77147a4d3d8fcaa1c88c8
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-31-3c8ecb1468952afe028596c65d587bee
rename to sql/hive/src/test/resources/golden/date_4-4-6c4c2941bae77147a4d3d8fcaa1c88c8
diff --git a/sql/hive/src/test/resources/golden/date_4-4-bee09a7384666043621f68297cee2e68 b/sql/hive/src/test/resources/golden/date_4-4-bee09a7384666043621f68297cee2e68
new file mode 100644
index 0000000000000..b61affde4ffce
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_4-4-bee09a7384666043621f68297cee2e68
@@ -0,0 +1 @@
+2011-01-01	2011-01-01
diff --git a/sql/hive/src/test/resources/golden/decimal_3-0-90cd495a00051a0631b2021dbb9a4aef b/sql/hive/src/test/resources/golden/date_4-5-b84f7e931d710dcbe3c5126d998285a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_3-0-90cd495a00051a0631b2021dbb9a4aef
rename to sql/hive/src/test/resources/golden/date_4-5-b84f7e931d710dcbe3c5126d998285a8
diff --git a/sql/hive/src/test/resources/golden/date_4-5-bee09a7384666043621f68297cee2e68 b/sql/hive/src/test/resources/golden/date_4-5-bee09a7384666043621f68297cee2e68
new file mode 100644
index 0000000000000..b61affde4ffce
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_4-5-bee09a7384666043621f68297cee2e68
@@ -0,0 +1 @@
+2011-01-01	2011-01-01
diff --git a/sql/hive/src/test/resources/golden/decimal_3-1-76f900dfe9ce95e8262a53939d33fb01 b/sql/hive/src/test/resources/golden/date_4-6-b84f7e931d710dcbe3c5126d998285a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_3-1-76f900dfe9ce95e8262a53939d33fb01
rename to sql/hive/src/test/resources/golden/date_4-6-b84f7e931d710dcbe3c5126d998285a8
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/date_comparison-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/date_comparison-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-0-48751533b44ea9e8ac3131767c2fed05 b/sql/hive/src/test/resources/golden/date_comparison-0-69eec445bd045c9dc899fafa348d8495
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-0-48751533b44ea9e8ac3131767c2fed05
rename to sql/hive/src/test/resources/golden/date_comparison-0-69eec445bd045c9dc899fafa348d8495
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-1-60557e7bd2822c89fa8b076a9d0520fc b/sql/hive/src/test/resources/golden/date_comparison-1-69eec445bd045c9dc899fafa348d8495
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-1-60557e7bd2822c89fa8b076a9d0520fc
rename to sql/hive/src/test/resources/golden/date_comparison-1-69eec445bd045c9dc899fafa348d8495
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-3-13e17ed811165196416f777cbc162592 b/sql/hive/src/test/resources/golden/date_comparison-1-fcc400871a502009c8680509e3869ec1
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-3-13e17ed811165196416f777cbc162592
rename to sql/hive/src/test/resources/golden/date_comparison-1-fcc400871a502009c8680509e3869ec1
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-6-8c8e73673a950f6b3d960b08fcea076f b/sql/hive/src/test/resources/golden/date_comparison-10-a9f2560c273163e11306d4f1dd1d9d54
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-6-8c8e73673a950f6b3d960b08fcea076f
rename to sql/hive/src/test/resources/golden/date_comparison-10-a9f2560c273163e11306d4f1dd1d9d54
diff --git a/sql/hive/src/test/resources/golden/decimal_1-4-f2bcc4f2b8db16b865d4ca396fbca575 b/sql/hive/src/test/resources/golden/date_comparison-10-bcd987341fc1c38047a27d29dac6ae7c
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-4-f2bcc4f2b8db16b865d4ca396fbca575
rename to sql/hive/src/test/resources/golden/date_comparison-10-bcd987341fc1c38047a27d29dac6ae7c
diff --git a/sql/hive/src/test/resources/golden/decimal_2-13-20ffe5115367abea9ea0ed1bda7a9439 b/sql/hive/src/test/resources/golden/date_comparison-11-4a7bac9ddcf40db6329faaec8e426543
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-13-20ffe5115367abea9ea0ed1bda7a9439
rename to sql/hive/src/test/resources/golden/date_comparison-11-4a7bac9ddcf40db6329faaec8e426543
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-10-51822ac740629bebd81d2abda6e1144 b/sql/hive/src/test/resources/golden/date_comparison-11-a9f2560c273163e11306d4f1dd1d9d54
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-10-51822ac740629bebd81d2abda6e1144
rename to sql/hive/src/test/resources/golden/date_comparison-11-a9f2560c273163e11306d4f1dd1d9d54
diff --git a/sql/hive/src/test/resources/golden/decimal_2-4-20ffe5115367abea9ea0ed1bda7a9439 b/sql/hive/src/test/resources/golden/date_comparison-12-4a7bac9ddcf40db6329faaec8e426543
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-4-20ffe5115367abea9ea0ed1bda7a9439
rename to sql/hive/src/test/resources/golden/date_comparison-12-4a7bac9ddcf40db6329faaec8e426543
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-2-f96a9d88327951bd93f672dc2463ecd4 b/sql/hive/src/test/resources/golden/date_comparison-2-b8598a4d0c948c2ddcf3eeef0abf2264
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-2-f96a9d88327951bd93f672dc2463ecd4
rename to sql/hive/src/test/resources/golden/date_comparison-2-b8598a4d0c948c2ddcf3eeef0abf2264
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-11-441306cae24618c49ec63445a31bf16b b/sql/hive/src/test/resources/golden/date_comparison-2-fcc400871a502009c8680509e3869ec1
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-11-441306cae24618c49ec63445a31bf16b
rename to sql/hive/src/test/resources/golden/date_comparison-2-fcc400871a502009c8680509e3869ec1
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-12-bfcc534e73e320a1cfad9c584678d870 b/sql/hive/src/test/resources/golden/date_comparison-3-14d35f266be9cceb11a2ae09ec8b3835
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-12-bfcc534e73e320a1cfad9c584678d870
rename to sql/hive/src/test/resources/golden/date_comparison-3-14d35f266be9cceb11a2ae09ec8b3835
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-4-4fa8a36edbefde4427c2ab2cf30e6399 b/sql/hive/src/test/resources/golden/date_comparison-3-b8598a4d0c948c2ddcf3eeef0abf2264
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-4-4fa8a36edbefde4427c2ab2cf30e6399
rename to sql/hive/src/test/resources/golden/date_comparison-3-b8598a4d0c948c2ddcf3eeef0abf2264
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-13-a2bddaa5db1841bb4617239b9f17a06d b/sql/hive/src/test/resources/golden/date_comparison-4-14d35f266be9cceb11a2ae09ec8b3835
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-13-a2bddaa5db1841bb4617239b9f17a06d
rename to sql/hive/src/test/resources/golden/date_comparison-4-14d35f266be9cceb11a2ae09ec8b3835
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-5-7e4fb6e8ba01df422e4c67e06a0c8453 b/sql/hive/src/test/resources/golden/date_comparison-4-c8865b14d53f2c2496fb69ee8191bf37
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-5-7e4fb6e8ba01df422e4c67e06a0c8453
rename to sql/hive/src/test/resources/golden/date_comparison-4-c8865b14d53f2c2496fb69ee8191bf37
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-7-510c0a2a57dc5df8588bd13c4152f8bc b/sql/hive/src/test/resources/golden/date_comparison-5-c8865b14d53f2c2496fb69ee8191bf37
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-7-510c0a2a57dc5df8588bd13c4152f8bc
rename to sql/hive/src/test/resources/golden/date_comparison-5-c8865b14d53f2c2496fb69ee8191bf37
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-8-659d5b1ae8200f13f265270e52a3dd65 b/sql/hive/src/test/resources/golden/date_comparison-5-f2c907e64da8166a731ddc0ed19bad6c
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-8-659d5b1ae8200f13f265270e52a3dd65
rename to sql/hive/src/test/resources/golden/date_comparison-5-f2c907e64da8166a731ddc0ed19bad6c
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-14-773801b833cf72d35016916b786275b5 b/sql/hive/src/test/resources/golden/date_comparison-6-5606505a92bad10023ad9a3ef77eacc9
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-14-773801b833cf72d35016916b786275b5
rename to sql/hive/src/test/resources/golden/date_comparison-6-5606505a92bad10023ad9a3ef77eacc9
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-0-35e75ee310b66710e2e88cf0fecca670 b/sql/hive/src/test/resources/golden/date_comparison-6-f2c907e64da8166a731ddc0ed19bad6c
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-0-35e75ee310b66710e2e88cf0fecca670
rename to sql/hive/src/test/resources/golden/date_comparison-6-f2c907e64da8166a731ddc0ed19bad6c
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-1-cbead694a25ec357d69fd008776e19c9 b/sql/hive/src/test/resources/golden/date_comparison-7-47913d4aaf0d468ab3764cc3bfd68eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-1-cbead694a25ec357d69fd008776e19c9
rename to sql/hive/src/test/resources/golden/date_comparison-7-47913d4aaf0d468ab3764cc3bfd68eb
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-15-4071ed0ff57b53963d5ee662fa9db0b0 b/sql/hive/src/test/resources/golden/date_comparison-7-5606505a92bad10023ad9a3ef77eacc9
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-15-4071ed0ff57b53963d5ee662fa9db0b0
rename to sql/hive/src/test/resources/golden/date_comparison-7-5606505a92bad10023ad9a3ef77eacc9
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-16-6b441df08afdc0c6c4a82670997dabb5 b/sql/hive/src/test/resources/golden/date_comparison-8-1e5ce4f833b6fba45618437c8fb7643c
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-16-6b441df08afdc0c6c4a82670997dabb5
rename to sql/hive/src/test/resources/golden/date_comparison-8-1e5ce4f833b6fba45618437c8fb7643c
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-2-7f24ad5f9bdc0afb6bade7c85490c845 b/sql/hive/src/test/resources/golden/date_comparison-8-47913d4aaf0d468ab3764cc3bfd68eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-2-7f24ad5f9bdc0afb6bade7c85490c845
rename to sql/hive/src/test/resources/golden/date_comparison-8-47913d4aaf0d468ab3764cc3bfd68eb
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-17-85342c694d7f35e7eedb24e850d0c7df b/sql/hive/src/test/resources/golden/date_comparison-9-1e5ce4f833b6fba45618437c8fb7643c
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-17-85342c694d7f35e7eedb24e850d0c7df
rename to sql/hive/src/test/resources/golden/date_comparison-9-1e5ce4f833b6fba45618437c8fb7643c
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-3-266b9601a9154438ab95550fcd36494c b/sql/hive/src/test/resources/golden/date_comparison-9-bcd987341fc1c38047a27d29dac6ae7c
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-3-266b9601a9154438ab95550fcd36494c
rename to sql/hive/src/test/resources/golden/date_comparison-9-bcd987341fc1c38047a27d29dac6ae7c
diff --git a/sql/hive/src/test/resources/golden/decimal_3-12-d495d7178707ba55dcc01b9bb3398792 b/sql/hive/src/test/resources/golden/date_join1-2-e967e1ef6b209dfa5bdc60021dcb1964
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_3-12-d495d7178707ba55dcc01b9bb3398792
rename to sql/hive/src/test/resources/golden/date_join1-2-e967e1ef6b209dfa5bdc60021dcb1964
diff --git a/sql/hive/src/test/resources/golden/date_join1-3-f71c7be760fb4de4eff8225f2c6614b2 b/sql/hive/src/test/resources/golden/date_join1-3-f71c7be760fb4de4eff8225f2c6614b2
new file mode 100644
index 0000000000000..b7305b903edca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_join1-3-f71c7be760fb4de4eff8225f2c6614b2
@@ -0,0 +1,22 @@
+1064	2010-10-20	1064	2010-10-20
+1142	2010-10-21	1142	2010-10-21
+1599	2010-10-22	1599	2010-10-22
+361	2010-10-23	361	2010-10-23
+897	2010-10-24	897	2010-10-24
+1531	2010-10-25	1531	2010-10-25
+1610	2010-10-26	1610	2010-10-26
+3198	2010-10-27	3198	2010-10-27
+1064	2010-10-28	1064	2010-10-28
+1142	2010-10-29	1142	2010-10-29
+1064	2000-11-20	1064	2000-11-20
+1142	2000-11-21	1142	2000-11-21
+1599	2000-11-22	1599	2000-11-22
+361	2000-11-23	361	2000-11-23
+897	2000-11-24	897	2000-11-24
+1531	2000-11-25	1531	2000-11-25
+1610	2000-11-26	1610	2000-11-26
+3198	2000-11-27	3198	2000-11-27
+1064	2000-11-28	1064	2000-11-28
+1142	2000-11-28	1064	2000-11-28
+1064	2000-11-28	1142	2000-11-28
+1142	2000-11-28	1142	2000-11-28
diff --git a/sql/hive/src/test/resources/golden/decimal_3-2-4c5356497c8830c8e7cd8e5c02ad104 b/sql/hive/src/test/resources/golden/date_join1-4-70b9b49c55699fe94cfde069f5d197c
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_3-2-4c5356497c8830c8e7cd8e5c02ad104
rename to sql/hive/src/test/resources/golden/date_join1-4-70b9b49c55699fe94cfde069f5d197c
diff --git a/sql/hive/src/test/resources/golden/decimal_4-2-945542ec888136afaece8d7a5e20d52d b/sql/hive/src/test/resources/golden/date_serde-10-d80e681519dcd8f5078c5602bb5befa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_4-2-945542ec888136afaece8d7a5e20d52d
rename to sql/hive/src/test/resources/golden/date_serde-10-d80e681519dcd8f5078c5602bb5befa9
diff --git a/sql/hive/src/test/resources/golden/decimal_4-3-399140971a10a5a0cc6a8c97a4635e b/sql/hive/src/test/resources/golden/date_serde-11-29540200936bba47f17553547b409af7
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_4-3-399140971a10a5a0cc6a8c97a4635e
rename to sql/hive/src/test/resources/golden/date_serde-11-29540200936bba47f17553547b409af7
diff --git a/sql/hive/src/test/resources/golden/decimal_4-4-81b37675c09ed874497325ae13233e5c b/sql/hive/src/test/resources/golden/date_serde-12-c3c3275658b89d31fc504db31ae9f99c
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_4-4-81b37675c09ed874497325ae13233e5c
rename to sql/hive/src/test/resources/golden/date_serde-12-c3c3275658b89d31fc504db31ae9f99c
diff --git a/sql/hive/src/test/resources/golden/date_serde-13-6c546456c81e635b6753e1552fac9129 b/sql/hive/src/test/resources/golden/date_serde-13-6c546456c81e635b6753e1552fac9129
new file mode 100644
index 0000000000000..9f2238d57d6f5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-13-6c546456c81e635b6753e1552fac9129
@@ -0,0 +1 @@
+2010-10-20	1064
diff --git a/sql/hive/src/test/resources/golden/date_serde-14-f8ba18cc7b0225b4022299c44d435101 b/sql/hive/src/test/resources/golden/date_serde-14-f8ba18cc7b0225b4022299c44d435101
new file mode 100644
index 0000000000000..9f2238d57d6f5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-14-f8ba18cc7b0225b4022299c44d435101
@@ -0,0 +1 @@
+2010-10-20	1064
diff --git a/sql/hive/src/test/resources/golden/decimal_join-0-c7659c1efa06c9eab1db28e916b750e4 b/sql/hive/src/test/resources/golden/date_serde-15-66fadc9bcea7d107a610758aa6f50ff3
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_join-0-c7659c1efa06c9eab1db28e916b750e4
rename to sql/hive/src/test/resources/golden/date_serde-15-66fadc9bcea7d107a610758aa6f50ff3
diff --git a/sql/hive/src/test/resources/golden/decimal_join-1-c1524f17ee815171055a67ddc2f9de4e b/sql/hive/src/test/resources/golden/date_serde-16-1bd3345b46f77e17810978e56f9f7c6b
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_join-1-c1524f17ee815171055a67ddc2f9de4e
rename to sql/hive/src/test/resources/golden/date_serde-16-1bd3345b46f77e17810978e56f9f7c6b
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-0-cae8ceb6b0ab342948041c511a867b8e b/sql/hive/src/test/resources/golden/date_serde-17-a0df43062f8ab676ef728c9968443f12
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-0-cae8ceb6b0ab342948041c511a867b8e
rename to sql/hive/src/test/resources/golden/date_serde-17-a0df43062f8ab676ef728c9968443f12
diff --git a/sql/hive/src/test/resources/golden/date_serde-18-b50ecc72ce9018ab12fb17568fef038a b/sql/hive/src/test/resources/golden/date_serde-18-b50ecc72ce9018ab12fb17568fef038a
new file mode 100644
index 0000000000000..9f2238d57d6f5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-18-b50ecc72ce9018ab12fb17568fef038a
@@ -0,0 +1 @@
+2010-10-20	1064
diff --git a/sql/hive/src/test/resources/golden/date_serde-19-28f1cf92bdd6b2e5d328cd9d10f828b6 b/sql/hive/src/test/resources/golden/date_serde-19-28f1cf92bdd6b2e5d328cd9d10f828b6
new file mode 100644
index 0000000000000..9f2238d57d6f5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-19-28f1cf92bdd6b2e5d328cd9d10f828b6
@@ -0,0 +1 @@
+2010-10-20	1064
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-1-3f4119830536c92e5ccd76be0259e110 b/sql/hive/src/test/resources/golden/date_serde-20-588516368d8c1533cb7bfb2157fd58c1
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-1-3f4119830536c92e5ccd76be0259e110
rename to sql/hive/src/test/resources/golden/date_serde-20-588516368d8c1533cb7bfb2157fd58c1
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-11-673b15434ba47f11c71c3e8b2a575d83 b/sql/hive/src/test/resources/golden/date_serde-21-dfe166fe053468e738dca23ebe043091
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-11-673b15434ba47f11c71c3e8b2a575d83
rename to sql/hive/src/test/resources/golden/date_serde-21-dfe166fe053468e738dca23ebe043091
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-15-5c49f041326bc5a9e936910094f190ce b/sql/hive/src/test/resources/golden/date_serde-22-45240a488fb708e432d2f45b74ef7e63
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-15-5c49f041326bc5a9e936910094f190ce
rename to sql/hive/src/test/resources/golden/date_serde-22-45240a488fb708e432d2f45b74ef7e63
diff --git a/sql/hive/src/test/resources/golden/date_serde-23-1742a51e4967a8d263572d890cd8d4a8 b/sql/hive/src/test/resources/golden/date_serde-23-1742a51e4967a8d263572d890cd8d4a8
new file mode 100644
index 0000000000000..9f2238d57d6f5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-23-1742a51e4967a8d263572d890cd8d4a8
@@ -0,0 +1 @@
+2010-10-20	1064
diff --git a/sql/hive/src/test/resources/golden/date_serde-24-14fd49bd6fee907c1699f7b4e26685b b/sql/hive/src/test/resources/golden/date_serde-24-14fd49bd6fee907c1699f7b4e26685b
new file mode 100644
index 0000000000000..9f2238d57d6f5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-24-14fd49bd6fee907c1699f7b4e26685b
@@ -0,0 +1 @@
+2010-10-20	1064
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-2-d5be00a0fa6e2e290b40458442bd036c b/sql/hive/src/test/resources/golden/date_serde-25-a199cf185184a25190d65c123d0694ee
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-2-d5be00a0fa6e2e290b40458442bd036c
rename to sql/hive/src/test/resources/golden/date_serde-25-a199cf185184a25190d65c123d0694ee
diff --git a/sql/hive/src/test/resources/golden/delimiter-1-d9e405c4107da8af78fcacb83a667b41 b/sql/hive/src/test/resources/golden/date_serde-26-c5fa68d9aff36f22e5edc1b54332d0ab
similarity index 100%
rename from sql/hive/src/test/resources/golden/delimiter-1-d9e405c4107da8af78fcacb83a667b41
rename to sql/hive/src/test/resources/golden/date_serde-26-c5fa68d9aff36f22e5edc1b54332d0ab
diff --git a/sql/hive/src/test/resources/golden/describe_comment_indent-0-5b66d27453f15517fb266a5e1a0e3cbb b/sql/hive/src/test/resources/golden/date_serde-27-4d86c79f858866acec3c37f6598c2638
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_comment_indent-0-5b66d27453f15517fb266a5e1a0e3cbb
rename to sql/hive/src/test/resources/golden/date_serde-27-4d86c79f858866acec3c37f6598c2638
diff --git a/sql/hive/src/test/resources/golden/date_serde-28-16a41fc9e0f51eb417c763bae8e9cadb b/sql/hive/src/test/resources/golden/date_serde-28-16a41fc9e0f51eb417c763bae8e9cadb
new file mode 100644
index 0000000000000..9f2238d57d6f5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-28-16a41fc9e0f51eb417c763bae8e9cadb
@@ -0,0 +1 @@
+2010-10-20	1064
diff --git a/sql/hive/src/test/resources/golden/date_serde-29-bd1cb09aacd906527b0bbf43bbded812 b/sql/hive/src/test/resources/golden/date_serde-29-bd1cb09aacd906527b0bbf43bbded812
new file mode 100644
index 0000000000000..9f2238d57d6f5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-29-bd1cb09aacd906527b0bbf43bbded812
@@ -0,0 +1 @@
+2010-10-20	1064
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-1-12cc2812ab067d58718c29ea6aa3d8a3 b/sql/hive/src/test/resources/golden/date_serde-30-7c80741f9f485729afc68609c55423a0
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_database_json-1-12cc2812ab067d58718c29ea6aa3d8a3
rename to sql/hive/src/test/resources/golden/date_serde-30-7c80741f9f485729afc68609c55423a0
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-10-1afddec8522bd69f496b15980600a6e1 b/sql/hive/src/test/resources/golden/date_serde-31-da36cd1654aee055cb3650133c9d11f
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_database_json-10-1afddec8522bd69f496b15980600a6e1
rename to sql/hive/src/test/resources/golden/date_serde-31-da36cd1654aee055cb3650133c9d11f
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-6-1afddec8522bd69f496b15980600a6e1 b/sql/hive/src/test/resources/golden/date_serde-32-bb2f76bd307ed616a3c797f8dd45a8d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_database_json-6-1afddec8522bd69f496b15980600a6e1
rename to sql/hive/src/test/resources/golden/date_serde-32-bb2f76bd307ed616a3c797f8dd45a8d1
diff --git a/sql/hive/src/test/resources/golden/date_serde-33-a742813b024e6dcfb4a358aa4e9fcdb6 b/sql/hive/src/test/resources/golden/date_serde-33-a742813b024e6dcfb4a358aa4e9fcdb6
new file mode 100644
index 0000000000000..9f2238d57d6f5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-33-a742813b024e6dcfb4a358aa4e9fcdb6
@@ -0,0 +1 @@
+2010-10-20	1064
diff --git a/sql/hive/src/test/resources/golden/date_serde-34-6485841336c097895ad5b34f42c0745f b/sql/hive/src/test/resources/golden/date_serde-34-6485841336c097895ad5b34f42c0745f
new file mode 100644
index 0000000000000..9f2238d57d6f5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-34-6485841336c097895ad5b34f42c0745f
@@ -0,0 +1 @@
+2010-10-20	1064
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-7-7529ec337ca17cdf95d037f29e1cb793 b/sql/hive/src/test/resources/golden/date_serde-35-8651a7c351cbc07fb1af6193f6885de8
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_database_json-7-7529ec337ca17cdf95d037f29e1cb793
rename to sql/hive/src/test/resources/golden/date_serde-35-8651a7c351cbc07fb1af6193f6885de8
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-1-889714213a760ae9ab3ebe199eb30b62 b/sql/hive/src/test/resources/golden/date_serde-36-36e6041f53433482631018410bb62a99
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-1-889714213a760ae9ab3ebe199eb30b62
rename to sql/hive/src/test/resources/golden/date_serde-36-36e6041f53433482631018410bb62a99
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-2-cbd03c487eba9e34d57a8decaa3a0dfa b/sql/hive/src/test/resources/golden/date_serde-37-3ddfd8ecb28991aeed588f1ea852c427
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-2-cbd03c487eba9e34d57a8decaa3a0dfa
rename to sql/hive/src/test/resources/golden/date_serde-37-3ddfd8ecb28991aeed588f1ea852c427
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-3-db8910ace81a5102495905a508ef5c28 b/sql/hive/src/test/resources/golden/date_serde-38-e6167e27465514356c557a77d956ea46
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-3-db8910ace81a5102495905a508ef5c28
rename to sql/hive/src/test/resources/golden/date_serde-38-e6167e27465514356c557a77d956ea46
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-5-889714213a760ae9ab3ebe199eb30b62 b/sql/hive/src/test/resources/golden/date_serde-39-c1e17c93582656c12970c37bac153bf2
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-5-889714213a760ae9ab3ebe199eb30b62
rename to sql/hive/src/test/resources/golden/date_serde-39-c1e17c93582656c12970c37bac153bf2
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-0-f34ca99310bf1d4793cf64423c024ad b/sql/hive/src/test/resources/golden/date_serde-40-4a17944b9ec8999bb20c5ba5d4cb877c
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_pretty-0-f34ca99310bf1d4793cf64423c024ad
rename to sql/hive/src/test/resources/golden/date_serde-40-4a17944b9ec8999bb20c5ba5d4cb877c
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-14-22db46f42dc0c1bf01a76ca360c20a7 b/sql/hive/src/test/resources/golden/date_serde-7-580096b3b48db26bea91b80e1e1b081a
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_pretty-14-22db46f42dc0c1bf01a76ca360c20a7
rename to sql/hive/src/test/resources/golden/date_serde-7-580096b3b48db26bea91b80e1e1b081a
diff --git a/sql/hive/src/test/resources/golden/date_serde-8-cace4f60a08342f58fbe816a9c3a73cf b/sql/hive/src/test/resources/golden/date_serde-8-cace4f60a08342f58fbe816a9c3a73cf
new file mode 100644
index 0000000000000..16c03e7276fec
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-8-cace4f60a08342f58fbe816a9c3a73cf
@@ -0,0 +1,137 @@
+Baltimore	New York	2010-10-20	-30.0	1064
+Baltimore	New York	2010-10-20	23.0	1142
+Baltimore	New York	2010-10-20	6.0	1599
+Chicago	New York	2010-10-20	42.0	361
+Chicago	New York	2010-10-20	24.0	897
+Chicago	New York	2010-10-20	15.0	1531
+Chicago	New York	2010-10-20	-6.0	1610
+Chicago	New York	2010-10-20	-2.0	3198
+Baltimore	New York	2010-10-21	17.0	1064
+Baltimore	New York	2010-10-21	105.0	1142
+Baltimore	New York	2010-10-21	28.0	1599
+Chicago	New York	2010-10-21	142.0	361
+Chicago	New York	2010-10-21	77.0	897
+Chicago	New York	2010-10-21	53.0	1531
+Chicago	New York	2010-10-21	-5.0	1610
+Chicago	New York	2010-10-21	51.0	3198
+Baltimore	New York	2010-10-22	-12.0	1064
+Baltimore	New York	2010-10-22	54.0	1142
+Baltimore	New York	2010-10-22	18.0	1599
+Chicago	New York	2010-10-22	2.0	361
+Chicago	New York	2010-10-22	24.0	897
+Chicago	New York	2010-10-22	16.0	1531
+Chicago	New York	2010-10-22	-6.0	1610
+Chicago	New York	2010-10-22	-11.0	3198
+Baltimore	New York	2010-10-23	18.0	272
+Baltimore	New York	2010-10-23	-10.0	1805
+Baltimore	New York	2010-10-23	6.0	3171
+Chicago	New York	2010-10-23	3.0	384
+Chicago	New York	2010-10-23	32.0	426
+Chicago	New York	2010-10-23	1.0	650
+Chicago	New York	2010-10-23	11.0	3085
+Baltimore	New York	2010-10-24	12.0	1599
+Baltimore	New York	2010-10-24	20.0	2571
+Chicago	New York	2010-10-24	10.0	361
+Chicago	New York	2010-10-24	113.0	897
+Chicago	New York	2010-10-24	-5.0	1531
+Chicago	New York	2010-10-24	-17.0	1610
+Chicago	New York	2010-10-24	-3.0	3198
+Baltimore	New York	2010-10-25	-25.0	1064
+Baltimore	New York	2010-10-25	92.0	1142
+Baltimore	New York	2010-10-25	106.0	1599
+Chicago	New York	2010-10-25	31.0	361
+Chicago	New York	2010-10-25	-1.0	897
+Chicago	New York	2010-10-25	43.0	1531
+Chicago	New York	2010-10-25	6.0	1610
+Chicago	New York	2010-10-25	-16.0	3198
+Baltimore	New York	2010-10-26	-22.0	1064
+Baltimore	New York	2010-10-26	123.0	1142
+Baltimore	New York	2010-10-26	90.0	1599
+Chicago	New York	2010-10-26	12.0	361
+Chicago	New York	2010-10-26	0.0	897
+Chicago	New York	2010-10-26	29.0	1531
+Chicago	New York	2010-10-26	-17.0	1610
+Chicago	New York	2010-10-26	6.0	3198
+Baltimore	New York	2010-10-27	-18.0	1064
+Baltimore	New York	2010-10-27	49.0	1142
+Baltimore	New York	2010-10-27	92.0	1599
+Chicago	New York	2010-10-27	148.0	361
+Chicago	New York	2010-10-27	-11.0	897
+Chicago	New York	2010-10-27	70.0	1531
+Chicago	New York	2010-10-27	8.0	1610
+Chicago	New York	2010-10-27	21.0	3198
+Baltimore	New York	2010-10-28	-4.0	1064
+Baltimore	New York	2010-10-28	-14.0	1142
+Baltimore	New York	2010-10-28	-14.0	1599
+Chicago	New York	2010-10-28	2.0	361
+Chicago	New York	2010-10-28	2.0	897
+Chicago	New York	2010-10-28	-11.0	1531
+Chicago	New York	2010-10-28	3.0	1610
+Chicago	New York	2010-10-28	-18.0	3198
+Baltimore	New York	2010-10-29	-24.0	1064
+Baltimore	New York	2010-10-29	21.0	1142
+Baltimore	New York	2010-10-29	-2.0	1599
+Chicago	New York	2010-10-29	-12.0	361
+Chicago	New York	2010-10-29	-11.0	897
+Chicago	New York	2010-10-29	15.0	1531
+Chicago	New York	2010-10-29	-18.0	1610
+Chicago	New York	2010-10-29	-4.0	3198
+Baltimore	New York	2010-10-30	14.0	272
+Baltimore	New York	2010-10-30	-1.0	1805
+Baltimore	New York	2010-10-30	5.0	3171
+Chicago	New York	2010-10-30	-6.0	384
+Chicago	New York	2010-10-30	-10.0	426
+Chicago	New York	2010-10-30	-5.0	650
+Chicago	New York	2010-10-30	-5.0	3085
+Baltimore	New York	2010-10-31	-1.0	1599
+Baltimore	New York	2010-10-31	-14.0	2571
+Chicago	New York	2010-10-31	-25.0	361
+Chicago	New York	2010-10-31	-18.0	897
+Chicago	New York	2010-10-31	-4.0	1531
+Chicago	New York	2010-10-31	-22.0	1610
+Chicago	New York	2010-10-31	-15.0	3198
+Cleveland	New York	2010-10-30	-23.0	2018
+Cleveland	New York	2010-10-30	-12.0	2932
+Cleveland	New York	2010-10-29	-4.0	2630
+Cleveland	New York	2010-10-29	-19.0	2646
+Cleveland	New York	2010-10-29	-12.0	3014
+Cleveland	New York	2010-10-28	3.0	2630
+Cleveland	New York	2010-10-28	-6.0	2646
+Cleveland	New York	2010-10-28	1.0	3014
+Cleveland	New York	2010-10-27	16.0	2630
+Cleveland	New York	2010-10-27	27.0	3014
+Cleveland	New York	2010-10-26	4.0	2630
+Cleveland	New York	2010-10-26	-27.0	2646
+Cleveland	New York	2010-10-26	-11.0	2662
+Cleveland	New York	2010-10-26	13.0	3014
+Cleveland	New York	2010-10-25	-4.0	2630
+Cleveland	New York	2010-10-25	81.0	2646
+Cleveland	New York	2010-10-25	42.0	3014
+Cleveland	New York	2010-10-24	5.0	2254
+Cleveland	New York	2010-10-24	-11.0	2630
+Cleveland	New York	2010-10-24	-20.0	2646
+Cleveland	New York	2010-10-24	-9.0	3014
+Cleveland	New York	2010-10-23	-21.0	2932
+Cleveland	New York	2010-10-22	1.0	2630
+Cleveland	New York	2010-10-22	-25.0	2646
+Cleveland	New York	2010-10-22	-3.0	3014
+Cleveland	New York	2010-10-21	3.0	2630
+Cleveland	New York	2010-10-21	29.0	2646
+Cleveland	New York	2010-10-21	72.0	3014
+Cleveland	New York	2010-10-20	-8.0	2630
+Cleveland	New York	2010-10-20	-15.0	3014
+Washington	New York	2010-10-23	-25.0	5832
+Washington	New York	2010-10-23	-21.0	5904
+Washington	New York	2010-10-23	-18.0	5917
+Washington	New York	2010-10-30	-27.0	5904
+Washington	New York	2010-10-30	-16.0	5917
+Washington	New York	2010-10-20	-2.0	7291
+Washington	New York	2010-10-21	22.0	7291
+Washington	New York	2010-10-23	-16.0	7274
+Washington	New York	2010-10-24	-26.0	7282
+Washington	New York	2010-10-25	9.0	7291
+Washington	New York	2010-10-26	4.0	7291
+Washington	New York	2010-10-27	26.0	7291
+Washington	New York	2010-10-28	45.0	7291
+Washington	New York	2010-10-29	1.0	7291
+Washington	New York	2010-10-31	-18.0	7282
diff --git a/sql/hive/src/test/resources/golden/date_serde-9-436c3c61cc4278b54ac79c53c88ff422 b/sql/hive/src/test/resources/golden/date_serde-9-436c3c61cc4278b54ac79c53c88ff422
new file mode 100644
index 0000000000000..0f2a6f7a99237
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_serde-9-436c3c61cc4278b54ac79c53c88ff422
@@ -0,0 +1,12 @@
+2010-10-20	11
+2010-10-21	12
+2010-10-22	11
+2010-10-23	12
+2010-10-24	12
+2010-10-25	12
+2010-10-26	13
+2010-10-27	11
+2010-10-28	12
+2010-10-29	12
+2010-10-30	11
+2010-10-31	8
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-9-e382a994134aefcd2652b57af9195644 b/sql/hive/src/test/resources/golden/date_udf-0-84604a42a5d7f2842f1eec10c689d447
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_pretty-9-e382a994134aefcd2652b57af9195644
rename to sql/hive/src/test/resources/golden/date_udf-0-84604a42a5d7f2842f1eec10c689d447
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-0-5528e36b3b0f5b14313898cc45f9c23a b/sql/hive/src/test/resources/golden/date_udf-1-5e8136f6a6503ae9bef9beca80fada13
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-0-5528e36b3b0f5b14313898cc45f9c23a
rename to sql/hive/src/test/resources/golden/date_udf-1-5e8136f6a6503ae9bef9beca80fada13
diff --git a/sql/hive/src/test/resources/golden/date_udf-10-988ad9744096a29a3672a2d4c121299b b/sql/hive/src/test/resources/golden/date_udf-10-988ad9744096a29a3672a2d4c121299b
new file mode 100644
index 0000000000000..83c33400edb47
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_udf-10-988ad9744096a29a3672a2d4c121299b
@@ -0,0 +1 @@
+0	3333	-3333	-3332	3332
diff --git a/sql/hive/src/test/resources/golden/date_udf-11-a5100dd42201b5bc035a9d684cc21bdc b/sql/hive/src/test/resources/golden/date_udf-11-a5100dd42201b5bc035a9d684cc21bdc
new file mode 100644
index 0000000000000..4a2462bb3929b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_udf-11-a5100dd42201b5bc035a9d684cc21bdc
@@ -0,0 +1 @@
+NULL	2011	5	6	6	18	2011-05-06
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-15-7ab76c4458c7f78038c8b1df0fdeafbe b/sql/hive/src/test/resources/golden/date_udf-12-eb7280a1f191344a99eaa0f805e8faff
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-15-7ab76c4458c7f78038c8b1df0fdeafbe
rename to sql/hive/src/test/resources/golden/date_udf-12-eb7280a1f191344a99eaa0f805e8faff
diff --git a/sql/hive/src/test/resources/golden/date_udf-13-cc99e4f14fd092994b006ee7ebe4fc92 b/sql/hive/src/test/resources/golden/date_udf-13-cc99e4f14fd092994b006ee7ebe4fc92
new file mode 100644
index 0000000000000..977f0d24c58cc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_udf-13-cc99e4f14fd092994b006ee7ebe4fc92
@@ -0,0 +1 @@
+0	3333	-3333	-3333	3333
diff --git a/sql/hive/src/test/resources/golden/date_udf-14-a6a5ce5134cc1125355a4bdf0a73d97 b/sql/hive/src/test/resources/golden/date_udf-14-a6a5ce5134cc1125355a4bdf0a73d97
new file mode 100644
index 0000000000000..44d1f45e4eb73
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_udf-14-a6a5ce5134cc1125355a4bdf0a73d97
@@ -0,0 +1 @@
+1970-01-01 08:00:00	1969-12-31 16:00:00	2013-06-19 07:00:00	2013-06-18 17:00:00
diff --git a/sql/hive/src/test/resources/golden/date_udf-15-d031ee50c119d7c6acafd53543dbd0c4 b/sql/hive/src/test/resources/golden/date_udf-15-d031ee50c119d7c6acafd53543dbd0c4
new file mode 100644
index 0000000000000..645b71d8d61e7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_udf-15-d031ee50c119d7c6acafd53543dbd0c4
@@ -0,0 +1 @@
+true	true	true	true
diff --git a/sql/hive/src/test/resources/golden/date_udf-16-dc59f69e1685e8d923b187ec50d80f06 b/sql/hive/src/test/resources/golden/date_udf-16-dc59f69e1685e8d923b187ec50d80f06
new file mode 100644
index 0000000000000..51863e9a14e4b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_udf-16-dc59f69e1685e8d923b187ec50d80f06
@@ -0,0 +1 @@
+2010-10-20
diff --git a/sql/hive/src/test/resources/golden/date_udf-17-7d046d4efc568049cf3792470b6feab9 b/sql/hive/src/test/resources/golden/date_udf-17-7d046d4efc568049cf3792470b6feab9
new file mode 100644
index 0000000000000..4043ee1cbdd40
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_udf-17-7d046d4efc568049cf3792470b6feab9
@@ -0,0 +1 @@
+2010-10-31
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-1-4f3d3497418242124113538edab45df7 b/sql/hive/src/test/resources/golden/date_udf-18-84604a42a5d7f2842f1eec10c689d447
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-1-4f3d3497418242124113538edab45df7
rename to sql/hive/src/test/resources/golden/date_udf-18-84604a42a5d7f2842f1eec10c689d447
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-2-b198700c0129910d6205ef063ee83d5a b/sql/hive/src/test/resources/golden/date_udf-19-5e8136f6a6503ae9bef9beca80fada13
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-2-b198700c0129910d6205ef063ee83d5a
rename to sql/hive/src/test/resources/golden/date_udf-19-5e8136f6a6503ae9bef9beca80fada13
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-3-458d6aaffeee94997f67a43b88382106 b/sql/hive/src/test/resources/golden/date_udf-2-10e337c34d1e82a360b8599988f4b266
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-3-458d6aaffeee94997f67a43b88382106
rename to sql/hive/src/test/resources/golden/date_udf-2-10e337c34d1e82a360b8599988f4b266
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-4-67eeb4eddd6b4e905404dd32a9f1d9c2 b/sql/hive/src/test/resources/golden/date_udf-20-10e337c34d1e82a360b8599988f4b266
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-4-67eeb4eddd6b4e905404dd32a9f1d9c2
rename to sql/hive/src/test/resources/golden/date_udf-20-10e337c34d1e82a360b8599988f4b266
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-5-a305e3d663b2697acca5abb9e7d897c4 b/sql/hive/src/test/resources/golden/date_udf-3-29e406e613c0284b3e16a8943a4d31bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-5-a305e3d663b2697acca5abb9e7d897c4
rename to sql/hive/src/test/resources/golden/date_udf-3-29e406e613c0284b3e16a8943a4d31bd
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-1-e48b5f50bb2ff56f9886004366cfd491 b/sql/hive/src/test/resources/golden/date_udf-4-23653315213f578856ab5c3bd80c0264
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_table_json-1-e48b5f50bb2ff56f9886004366cfd491
rename to sql/hive/src/test/resources/golden/date_udf-4-23653315213f578856ab5c3bd80c0264
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-5-865aeeea2647a71f7f25b03da4203ffb b/sql/hive/src/test/resources/golden/date_udf-5-891fd92a4787b9789f6d1f51c1eddc8a
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_table_json-5-865aeeea2647a71f7f25b03da4203ffb
rename to sql/hive/src/test/resources/golden/date_udf-5-891fd92a4787b9789f6d1f51c1eddc8a
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-8-deaf826aef1c9a7f36d7824eafd20f11 b/sql/hive/src/test/resources/golden/date_udf-6-3473c118d20783eafb456043a2ee5d5b
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_table_json-8-deaf826aef1c9a7f36d7824eafd20f11
rename to sql/hive/src/test/resources/golden/date_udf-6-3473c118d20783eafb456043a2ee5d5b
diff --git a/sql/hive/src/test/resources/golden/disable_file_format_check-2-d3e20a1484eabcd50e2039e55b4f549 b/sql/hive/src/test/resources/golden/date_udf-7-9fb5165824e161074565e7500959c1b2
similarity index 100%
rename from sql/hive/src/test/resources/golden/disable_file_format_check-2-d3e20a1484eabcd50e2039e55b4f549
rename to sql/hive/src/test/resources/golden/date_udf-7-9fb5165824e161074565e7500959c1b2
diff --git a/sql/hive/src/test/resources/golden/disable_file_format_check-4-fe6f402a026c882c2bc8f5251794dbbb b/sql/hive/src/test/resources/golden/date_udf-7-ef82dff775f4aba5d7a638b4e5fd9c5d
similarity index 100%
rename from sql/hive/src/test/resources/golden/disable_file_format_check-4-fe6f402a026c882c2bc8f5251794dbbb
rename to sql/hive/src/test/resources/golden/date_udf-7-ef82dff775f4aba5d7a638b4e5fd9c5d
diff --git a/sql/hive/src/test/resources/golden/date_udf-8-badfe833681362092fc6345f888b1c21 b/sql/hive/src/test/resources/golden/date_udf-8-badfe833681362092fc6345f888b1c21
new file mode 100644
index 0000000000000..18d17ea11b53e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_udf-8-badfe833681362092fc6345f888b1c21
@@ -0,0 +1 @@
+1304665200	2011	5	6	6	18	2011-05-06
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-6-5810193ce35d38c23f4fc4b4979d60a4 b/sql/hive/src/test/resources/golden/date_udf-9-a8cbb039661d796beaa0d1564c58c563
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-6-5810193ce35d38c23f4fc4b4979d60a4
rename to sql/hive/src/test/resources/golden/date_udf-9-a8cbb039661d796beaa0d1564c58c563
diff --git a/sql/hive/src/test/resources/golden/ddltime-1-426da52526f6f48c0ddeb0026fd566f1 b/sql/hive/src/test/resources/golden/ddltime-1-426da52526f6f48c0ddeb0026fd566f1
deleted file mode 100644
index c270c7cbdfa1f..0000000000000
--- a/sql/hive/src/test/resources/golden/ddltime-1-426da52526f6f48c0ddeb0026fd566f1
+++ /dev/null
@@ -1,4 +0,0 @@
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:t1, dbName:default, owner:marmbrus, createTime:1392063041, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/t1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1392063041}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/decimal_1-10-be179d261b9c42ed843dbf736b12e75 b/sql/hive/src/test/resources/golden/decimal_1-10-be179d261b9c42ed843dbf736b12e75
deleted file mode 100644
index 53aca7545dac7..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_1-10-be179d261b9c42ed843dbf736b12e75
+++ /dev/null
@@ -1 +0,0 @@
-17.29
diff --git a/sql/hive/src/test/resources/golden/decimal_1-11-4c1fefa61e10a78f6406b526a60e4937 b/sql/hive/src/test/resources/golden/decimal_1-11-4c1fefa61e10a78f6406b526a60e4937
deleted file mode 100644
index 53aca7545dac7..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_1-11-4c1fefa61e10a78f6406b526a60e4937
+++ /dev/null
@@ -1 +0,0 @@
-17.29
diff --git a/sql/hive/src/test/resources/golden/decimal_1-12-75b9add5e32a963cc9913c6ef4f84989 b/sql/hive/src/test/resources/golden/decimal_1-12-75b9add5e32a963cc9913c6ef4f84989
deleted file mode 100644
index c4a17c1b14c88..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_1-12-75b9add5e32a963cc9913c6ef4f84989
+++ /dev/null
@@ -1 +0,0 @@
-1969-12-31 16:00:17.29
diff --git a/sql/hive/src/test/resources/golden/decimal_1-5-cbe6b235663cf78e602673ed715a2f40 b/sql/hive/src/test/resources/golden/decimal_1-5-cbe6b235663cf78e602673ed715a2f40
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_1-5-cbe6b235663cf78e602673ed715a2f40
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_1-6-91b7219bd8c67673e16cb970fcc7be1b b/sql/hive/src/test/resources/golden/decimal_1-6-91b7219bd8c67673e16cb970fcc7be1b
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_1-6-91b7219bd8c67673e16cb970fcc7be1b
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_1-7-7b2fab0ebc0962f0a53f6c61da417a b/sql/hive/src/test/resources/golden/decimal_1-7-7b2fab0ebc0962f0a53f6c61da417a
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_1-7-7b2fab0ebc0962f0a53f6c61da417a
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_1-8-cdd0932288d3cc43636334439805769d b/sql/hive/src/test/resources/golden/decimal_1-8-cdd0932288d3cc43636334439805769d
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_1-8-cdd0932288d3cc43636334439805769d
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_1-9-1504e1b00092e2c58bafcc9936ad178c b/sql/hive/src/test/resources/golden/decimal_1-9-1504e1b00092e2c58bafcc9936ad178c
deleted file mode 100644
index 53aca7545dac7..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_1-9-1504e1b00092e2c58bafcc9936ad178c
+++ /dev/null
@@ -1 +0,0 @@
-17.29
diff --git a/sql/hive/src/test/resources/golden/decimal_2-10-f97d72aeb605ee18d34361c073552e92 b/sql/hive/src/test/resources/golden/decimal_2-10-f97d72aeb605ee18d34361c073552e92
deleted file mode 100644
index 53aca7545dac7..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-10-f97d72aeb605ee18d34361c073552e92
+++ /dev/null
@@ -1 +0,0 @@
-17.29
diff --git a/sql/hive/src/test/resources/golden/decimal_2-11-58a090c30c59446d1e2b2a6c85fabf50 b/sql/hive/src/test/resources/golden/decimal_2-11-58a090c30c59446d1e2b2a6c85fabf50
deleted file mode 100644
index 53aca7545dac7..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-11-58a090c30c59446d1e2b2a6c85fabf50
+++ /dev/null
@@ -1 +0,0 @@
-17.29
diff --git a/sql/hive/src/test/resources/golden/decimal_2-14-3105d1029ad981af9cf1039ad9410fc0 b/sql/hive/src/test/resources/golden/decimal_2-14-3105d1029ad981af9cf1039ad9410fc0
deleted file mode 100644
index b1bd38b62a080..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-14-3105d1029ad981af9cf1039ad9410fc0
+++ /dev/null
@@ -1 +0,0 @@
-13
diff --git a/sql/hive/src/test/resources/golden/decimal_2-15-3266fde6f6ab80a8bc027de0d5574f02 b/sql/hive/src/test/resources/golden/decimal_2-15-3266fde6f6ab80a8bc027de0d5574f02
deleted file mode 100644
index de7771ac23570..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-15-3266fde6f6ab80a8bc027de0d5574f02
+++ /dev/null
@@ -1 +0,0 @@
--3827
diff --git a/sql/hive/src/test/resources/golden/decimal_2-16-dc565c6c5bc24fd8b32729ce91999580 b/sql/hive/src/test/resources/golden/decimal_2-16-dc565c6c5bc24fd8b32729ce91999580
deleted file mode 100644
index 272791f402250..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-16-dc565c6c5bc24fd8b32729ce91999580
+++ /dev/null
@@ -1 +0,0 @@
-3404045
diff --git a/sql/hive/src/test/resources/golden/decimal_2-17-a4a1ca9ae92642dd78d4bdf6af781c2d b/sql/hive/src/test/resources/golden/decimal_2-17-a4a1ca9ae92642dd78d4bdf6af781c2d
deleted file mode 100644
index 272791f402250..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-17-a4a1ca9ae92642dd78d4bdf6af781c2d
+++ /dev/null
@@ -1 +0,0 @@
-3404045
diff --git a/sql/hive/src/test/resources/golden/decimal_2-18-f7c34d67fd579c82c636415172ec675e b/sql/hive/src/test/resources/golden/decimal_2-18-f7c34d67fd579c82c636415172ec675e
deleted file mode 100644
index deb8427800ee4..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-18-f7c34d67fd579c82c636415172ec675e
+++ /dev/null
@@ -1 +0,0 @@
-3404045.5
diff --git a/sql/hive/src/test/resources/golden/decimal_2-19-f97d72aeb605ee18d34361c073552e92 b/sql/hive/src/test/resources/golden/decimal_2-19-f97d72aeb605ee18d34361c073552e92
deleted file mode 100644
index 6f31e8fe55034..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-19-f97d72aeb605ee18d34361c073552e92
+++ /dev/null
@@ -1 +0,0 @@
-3404045.5044003
diff --git a/sql/hive/src/test/resources/golden/decimal_2-20-58a090c30c59446d1e2b2a6c85fabf50 b/sql/hive/src/test/resources/golden/decimal_2-20-58a090c30c59446d1e2b2a6c85fabf50
deleted file mode 100644
index 6f31e8fe55034..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-20-58a090c30c59446d1e2b2a6c85fabf50
+++ /dev/null
@@ -1 +0,0 @@
-3404045.5044003
diff --git a/sql/hive/src/test/resources/golden/decimal_2-21-d72d68be1217c7b7a958f58456d85821 b/sql/hive/src/test/resources/golden/decimal_2-21-d72d68be1217c7b7a958f58456d85821
deleted file mode 100644
index 6324d401a069f..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-21-d72d68be1217c7b7a958f58456d85821
+++ /dev/null
@@ -1 +0,0 @@
-3.14
diff --git a/sql/hive/src/test/resources/golden/decimal_2-22-648e694eea042c59e8db30d067cb5bc8 b/sql/hive/src/test/resources/golden/decimal_2-22-648e694eea042c59e8db30d067cb5bc8
deleted file mode 100644
index 6324d401a069f..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-22-648e694eea042c59e8db30d067cb5bc8
+++ /dev/null
@@ -1 +0,0 @@
-3.14
diff --git a/sql/hive/src/test/resources/golden/decimal_2-23-a87b701c93a922b9e33ba16ae6a477ff b/sql/hive/src/test/resources/golden/decimal_2-23-a87b701c93a922b9e33ba16ae6a477ff
deleted file mode 100644
index 603f18cc37bc4..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-23-a87b701c93a922b9e33ba16ae6a477ff
+++ /dev/null
@@ -1 +0,0 @@
-1355944339.1234567
diff --git a/sql/hive/src/test/resources/golden/decimal_2-30-26a71d79e41353830b4ada96de6e2b8a b/sql/hive/src/test/resources/golden/decimal_2-30-26a71d79e41353830b4ada96de6e2b8a
deleted file mode 100644
index 474c8b180aea9..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-30-26a71d79e41353830b4ada96de6e2b8a
+++ /dev/null
@@ -1 +0,0 @@
-0.99999999999999999999
diff --git a/sql/hive/src/test/resources/golden/decimal_2-5-3105d1029ad981af9cf1039ad9410fc0 b/sql/hive/src/test/resources/golden/decimal_2-5-3105d1029ad981af9cf1039ad9410fc0
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-5-3105d1029ad981af9cf1039ad9410fc0
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_2-6-3266fde6f6ab80a8bc027de0d5574f02 b/sql/hive/src/test/resources/golden/decimal_2-6-3266fde6f6ab80a8bc027de0d5574f02
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-6-3266fde6f6ab80a8bc027de0d5574f02
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_2-7-dc565c6c5bc24fd8b32729ce91999580 b/sql/hive/src/test/resources/golden/decimal_2-7-dc565c6c5bc24fd8b32729ce91999580
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-7-dc565c6c5bc24fd8b32729ce91999580
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_2-8-a4a1ca9ae92642dd78d4bdf6af781c2d b/sql/hive/src/test/resources/golden/decimal_2-8-a4a1ca9ae92642dd78d4bdf6af781c2d
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-8-a4a1ca9ae92642dd78d4bdf6af781c2d
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_2-9-f7c34d67fd579c82c636415172ec675e b/sql/hive/src/test/resources/golden/decimal_2-9-f7c34d67fd579c82c636415172ec675e
deleted file mode 100644
index 53aca7545dac7..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-9-f7c34d67fd579c82c636415172ec675e
+++ /dev/null
@@ -1 +0,0 @@
-17.29
diff --git a/sql/hive/src/test/resources/golden/decimal_3-10-420614bb0789115e008c96a7ad822624 b/sql/hive/src/test/resources/golden/decimal_3-10-420614bb0789115e008c96a7ad822624
deleted file mode 100644
index 8d8753f153d7c..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-10-420614bb0789115e008c96a7ad822624
+++ /dev/null
@@ -1,4 +0,0 @@
-3.14	3
-3.14	3
-3.14	3
-3.14	4
diff --git a/sql/hive/src/test/resources/golden/decimal_3-11-63913753553b16d6c24e063fb49fdd15 b/sql/hive/src/test/resources/golden/decimal_3-11-63913753553b16d6c24e063fb49fdd15
deleted file mode 100644
index 8d8753f153d7c..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-11-63913753553b16d6c24e063fb49fdd15
+++ /dev/null
@@ -1,4 +0,0 @@
-3.14	3
-3.14	3
-3.14	3
-3.14	4
diff --git a/sql/hive/src/test/resources/golden/decimal_3-3-cb2d14de1d779cce9c19ba1a9690ca6b b/sql/hive/src/test/resources/golden/decimal_3-3-cb2d14de1d779cce9c19ba1a9690ca6b
deleted file mode 100644
index 3e290231c27e2..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-3-cb2d14de1d779cce9c19ba1a9690ca6b
+++ /dev/null
@@ -1,38 +0,0 @@
-NULL	0
-NULL	0
--1234567890.123456789	-1234567890
--4400	4400
--1255.49	-1255
--1.122	-11
--1.12	-1
--1.12	-1
--0.333	0
--0.33	0
--0.3	0
-0	0
-0	0
-0.01	0
-0.02	0
-0.1	0
-0.2	0
-0.3	0
-0.33	0
-0.333	0
-0.9999999999999999999999999	1
-1	1
-1	1
-1.12	1
-1.122	1
-2	2
-2	2
-3.14	3
-3.14	3
-3.14	3
-3.14	4
-10	10
-20	20
-100	100
-124	124
-125.2	125
-200	200
-1234567890.12345678	1234567890
diff --git a/sql/hive/src/test/resources/golden/decimal_3-4-b3f259a4b17b4fc585476ad4be8ed263 b/sql/hive/src/test/resources/golden/decimal_3-4-b3f259a4b17b4fc585476ad4be8ed263
deleted file mode 100644
index 64fa7bca9a81b..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-4-b3f259a4b17b4fc585476ad4be8ed263
+++ /dev/null
@@ -1,38 +0,0 @@
-1234567890.12345678	1234567890
-200	200
-125.2	125
-124	124
-100	100
-20	20
-10	10
-3.14	4
-3.14	3
-3.14	3
-3.14	3
-2	2
-2	2
-1.122	1
-1.12	1
-1	1
-1	1
-0.9999999999999999999999999	1
-0.333	0
-0.33	0
-0.3	0
-0.2	0
-0.1	0
-0.02	0
-0.01	0
-0	0
-0	0
--0.3	0
--0.33	0
--0.333	0
--1.12	-1
--1.12	-1
--1.122	-11
--1255.49	-1255
--4400	4400
--1234567890.123456789	-1234567890
-NULL	0
-NULL	0
diff --git a/sql/hive/src/test/resources/golden/decimal_3-5-cb2d14de1d779cce9c19ba1a9690ca6b b/sql/hive/src/test/resources/golden/decimal_3-5-cb2d14de1d779cce9c19ba1a9690ca6b
deleted file mode 100644
index 3e290231c27e2..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-5-cb2d14de1d779cce9c19ba1a9690ca6b
+++ /dev/null
@@ -1,38 +0,0 @@
-NULL	0
-NULL	0
--1234567890.123456789	-1234567890
--4400	4400
--1255.49	-1255
--1.122	-11
--1.12	-1
--1.12	-1
--0.333	0
--0.33	0
--0.3	0
-0	0
-0	0
-0.01	0
-0.02	0
-0.1	0
-0.2	0
-0.3	0
-0.33	0
-0.333	0
-0.9999999999999999999999999	1
-1	1
-1	1
-1.12	1
-1.122	1
-2	2
-2	2
-3.14	3
-3.14	3
-3.14	3
-3.14	4
-10	10
-20	20
-100	100
-124	124
-125.2	125
-200	200
-1234567890.12345678	1234567890
diff --git a/sql/hive/src/test/resources/golden/decimal_3-6-127a3a8400cae591c282dd24f8951e55 b/sql/hive/src/test/resources/golden/decimal_3-6-127a3a8400cae591c282dd24f8951e55
deleted file mode 100644
index 24d34ee5d8c1b..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-6-127a3a8400cae591c282dd24f8951e55
+++ /dev/null
@@ -1,30 +0,0 @@
-NULL
--1234567890.123456789
--4400
--1255.49
--1.122
--1.12
--0.333
--0.33
--0.3
-0
-0.01
-0.02
-0.1
-0.2
-0.3
-0.33
-0.333
-0.9999999999999999999999999
-1
-1.12
-1.122
-2
-3.14
-10
-20
-100
-124
-125.2
-200
-1234567890.12345678
diff --git a/sql/hive/src/test/resources/golden/decimal_3-7-9d4f27d4a4819113c5083462baa72052 b/sql/hive/src/test/resources/golden/decimal_3-7-9d4f27d4a4819113c5083462baa72052
deleted file mode 100644
index e08f588c89461..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-7-9d4f27d4a4819113c5083462baa72052
+++ /dev/null
@@ -1,30 +0,0 @@
-NULL	0
--1234567890.123456789	-1234567890
--4400	4400
--1255.49	-1255
--1.122	-11
--1.12	-2
--0.333	0
--0.33	0
--0.3	0
-0	0
-0.01	0
-0.02	0
-0.1	0
-0.2	0
-0.3	0
-0.33	0
-0.333	0
-0.9999999999999999999999999	1
-1	2
-1.12	1
-1.122	1
-2	4
-3.14	13
-10	10
-20	20
-100	100
-124	124
-125.2	125
-200	200
-1234567890.12345678	1234567890
diff --git a/sql/hive/src/test/resources/golden/decimal_3-8-f65f4df6e3d971d575654ade4b4e4800 b/sql/hive/src/test/resources/golden/decimal_3-8-f65f4df6e3d971d575654ade4b4e4800
deleted file mode 100644
index 796707d06b0dd..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-8-f65f4df6e3d971d575654ade4b4e4800
+++ /dev/null
@@ -1,17 +0,0 @@
--1234567890	-1234567890.123456789
--1255	-1255.49
--11	-1.122
--1	-2.24
-0	0.33
-1	5.2419999999999999999999999
-2	4
-3	9.42
-4	3.14
-10	10
-20	20
-100	100
-124	124
-125	125.2
-200	200
-4400	-4400
-1234567890	1234567890.12345678
diff --git a/sql/hive/src/test/resources/golden/decimal_3-9-b54243d38214362f9a9b1831548faac4 b/sql/hive/src/test/resources/golden/decimal_3-9-b54243d38214362f9a9b1831548faac4
deleted file mode 100644
index 4217ad848170e..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-9-b54243d38214362f9a9b1831548faac4
+++ /dev/null
@@ -1,56 +0,0 @@
--1234567890.123456789	-1234567890	-1234567890.123456789	-1234567890
--4400	4400	-4400	4400
--1255.49	-1255	-1255.49	-1255
--1.122	-11	-1.122	-11
--1.12	-1	-1.12	-1
--1.12	-1	-1.12	-1
--1.12	-1	-1.12	-1
--1.12	-1	-1.12	-1
--0.333	0	-0.333	0
--0.33	0	-0.33	0
--0.3	0	-0.3	0
-0	0	0	0
-0	0	0	0
-0	0	0	0
-0	0	0	0
-0.01	0	0.01	0
-0.02	0	0.02	0
-0.1	0	0.1	0
-0.2	0	0.2	0
-0.3	0	0.3	0
-0.33	0	0.33	0
-0.333	0	0.333	0
-0.9999999999999999999999999	1	0.9999999999999999999999999	1
-1	1	1	1
-1	1	1	1
-1	1	1	1
-1	1	1	1
-1.12	1	1.12	1
-1.122	1	1.122	1
-2	2	2	2
-2	2	2	2
-2	2	2	2
-2	2	2	2
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	4
-3.14	3	3.14	4
-3.14	3	3.14	4
-3.14	4	3.14	3
-3.14	4	3.14	3
-3.14	4	3.14	3
-3.14	4	3.14	4
-10	10	10	10
-20	20	20	20
-100	100	100	100
-124	124	124	124
-125.2	125	125.2	125
-200	200	200	200
-1234567890.12345678	1234567890	1234567890.12345678	1234567890
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-2-f5340880d2be7b0643eb995673e89d11 b/sql/hive/src/test/resources/golden/decimal_4-2-85c3185beb011f5c1e6856fc773a7484
similarity index 100%
rename from sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-2-f5340880d2be7b0643eb995673e89d11
rename to sql/hive/src/test/resources/golden/decimal_4-2-85c3185beb011f5c1e6856fc773a7484
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-3-b4e3c4c6b7ae6e0fad9ab7728f2ace85 b/sql/hive/src/test/resources/golden/decimal_4-3-1451d7491441c1632fd5f751876cce6e
similarity index 100%
rename from sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-3-b4e3c4c6b7ae6e0fad9ab7728f2ace85
rename to sql/hive/src/test/resources/golden/decimal_4-3-1451d7491441c1632fd5f751876cce6e
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-5-f40a07d7654573e1a8517770eb8529e7 b/sql/hive/src/test/resources/golden/decimal_4-4-1bf9ff1d72a06c33885ba695adf2511d
similarity index 100%
rename from sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-5-f40a07d7654573e1a8517770eb8529e7
rename to sql/hive/src/test/resources/golden/decimal_4-4-1bf9ff1d72a06c33885ba695adf2511d
diff --git a/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e b/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e
index 3e290231c27e2..f59549a6e4a46 100644
--- a/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e
+++ b/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e
@@ -1,5 +1,4 @@
 NULL	0
-NULL	0
 -1234567890.123456789	-1234567890
 -4400	4400
 -1255.49	-1255
@@ -11,6 +10,7 @@ NULL	0
 -0.3	0
 0	0
 0	0
+0	0
 0.01	0
 0.02	0
 0.1	0
diff --git a/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531 b/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531
index 795a4b567ab7f..6bada475c6d3d 100644
--- a/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531
+++ b/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531
@@ -1,5 +1,4 @@
 NULL	NULL
-NULL	NULL
 -1234567890.123456789	-3703703670.370370367
 -4400	-13200
 -1255.49	-3766.47
@@ -11,6 +10,7 @@ NULL	NULL
 -0.3	-0.9
 0	0
 0	0
+0	0
 0.01	0.03
 0.02	0.06
 0.1	0.3
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-13-67d75c72ea2d3982c55f3a850d93f83c b/sql/hive/src/test/resources/golden/decimal_join-0-4668e9dee2cd7a32f2b7311d7cd35508
similarity index 100%
rename from sql/hive/src/test/resources/golden/drop_partitions_filter2-13-67d75c72ea2d3982c55f3a850d93f83c
rename to sql/hive/src/test/resources/golden/decimal_join-0-4668e9dee2cd7a32f2b7311d7cd35508
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-7-74ed9df854eae5a025077b7012ef7b97 b/sql/hive/src/test/resources/golden/decimal_join-1-5098974222b22a21ed847c7906df9313
similarity index 100%
rename from sql/hive/src/test/resources/golden/drop_partitions_filter2-7-74ed9df854eae5a025077b7012ef7b97
rename to sql/hive/src/test/resources/golden/decimal_join-1-5098974222b22a21ed847c7906df9313
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-3-42cb35d680b3caeeb22e1c4865b8264b b/sql/hive/src/test/resources/golden/decimal_precision-3-42cb35d680b3caeeb22e1c4865b8264b
deleted file mode 100644
index 3d9e792183f3c..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-3-42cb35d680b3caeeb22e1c4865b8264b
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
--99999999999999999999999999999999999999
--999999999999999999999999999999999999
--99999999999999999999999999999999999
--0.0000000000000000000000000000000000001
-0
-0.0000000000000000000000000000000000001
-0.123456789012345
-0.1234567890123456789012345678901234578
-1.234567890123456
-1.2345678901234567890123456789012345678
-12.34567890123456
-12.345678901234567890123456789012345678
-123.4567890123456
-123.45678901234567890123456789012345678
-1234.567890123456
-1234.5678901234567890123456789012345678
-12345.67890123456
-12345.678901234567890123456789012345678
-123456.7890123456
-123456.78901234567890123456789012345678
-1234567.890123456
-1234567.8901234567890123456789012345678
-12345678.90123456
-12345678.901234567890123456789012345678
-123456789.0123456
-123456789.01234567890123456789012345678
-1234567890.123456
-1234567890.1234567890123456789012345678
-12345678901.23456
-12345678901.234567890123456789012345678
-123456789012.3456
-123456789012.34567890123456789012345678
-1234567890123.456
-1234567890123.4567890123456789012345678
-12345678901234.56
-12345678901234.567890123456789012345678
-123456789012345.6
-123456789012345.67890123456789012345678
-1234567890123456.7890123456789012345678
-12345678901234567.890123456789012345678
-123456789012345678.90123456789012345678
-1234567890123456789.0123456789012345678
-12345678901234567890.123456789012345678
-123456789012345678901.23456789012345678
-1234567890123456789012.3456789012345678
-12345678901234567890123.456789012345678
-123456789012345678901234.56789012345678
-1234567890123456789012345.6789012345678
-12345678901234567890123456.789012345678
-123456789012345678901234567.89012345678
-1234567890123456789012345678.9012345678
-12345678901234567890123456789.012345678
-123456789012345678901234567890.12345678
-1234567890123456789012345678901.2345678
-12345678901234567890123456789012.345678
-123456789012345678901234567890123.45678
-1234567890123456789012345678901234.5678
-12345678901234567890123456789012345.678
-99999999999999999999999999999999999
-123456789012345678901234567890123456.78
-999999999999999999999999999999999999
-12345678901234567890123456789012345678
-12345678901234567890123456789012345678
-12345678901234567890123456789012345678
-12345678901234567890123456789012345678
-12345678901234567890123456789012345678
-99999999999999999999999999999999999999
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-4-38aaeba3e587b4dac72e26c4b02029fc b/sql/hive/src/test/resources/golden/decimal_precision-4-38aaeba3e587b4dac72e26c4b02029fc
deleted file mode 100644
index 9853ce72ed8c3..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-4-38aaeba3e587b4dac72e26c4b02029fc
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
--99999999999999999999999999999999999999	-99999999999999999999999999999999999998	NULL
--999999999999999999999999999999999999	-999999999999999999999999999999999998	-1000000000000000000000000000000000000
--99999999999999999999999999999999999	-99999999999999999999999999999999998	-100000000000000000000000000000000000
--0.0000000000000000000000000000000000001	0.9999999999999999999999999999999999999	-1.0000000000000000000000000000000000001
-0	1	-1
-0.0000000000000000000000000000000000001	1.0000000000000000000000000000000000001	-0.9999999999999999999999999999999999999
-0.123456789012345	1.123456789012345	-0.876543210987655
-0.1234567890123456789012345678901234578	1.1234567890123456789012345678901234578	-0.8765432109876543210987654321098765422
-1.234567890123456	2.234567890123456	0.234567890123456
-1.2345678901234567890123456789012345678	2.2345678901234567890123456789012345678	0.2345678901234567890123456789012345678
-12.34567890123456	13.34567890123456	11.34567890123456
-12.345678901234567890123456789012345678	13.345678901234567890123456789012345678	11.345678901234567890123456789012345678
-123.4567890123456	124.4567890123456	122.4567890123456
-123.45678901234567890123456789012345678	124.45678901234567890123456789012345678	122.45678901234567890123456789012345678
-1234.567890123456	1235.567890123456	1233.567890123456
-1234.5678901234567890123456789012345678	1235.5678901234567890123456789012345678	1233.5678901234567890123456789012345678
-12345.67890123456	12346.67890123456	12344.67890123456
-12345.678901234567890123456789012345678	12346.678901234567890123456789012345678	12344.678901234567890123456789012345678
-123456.7890123456	123457.7890123456	123455.7890123456
-123456.78901234567890123456789012345678	123457.78901234567890123456789012345678	123455.78901234567890123456789012345678
-1234567.890123456	1234568.890123456	1234566.890123456
-1234567.8901234567890123456789012345678	1234568.8901234567890123456789012345678	1234566.8901234567890123456789012345678
-12345678.90123456	12345679.90123456	12345677.90123456
-12345678.901234567890123456789012345678	12345679.901234567890123456789012345678	12345677.901234567890123456789012345678
-123456789.0123456	123456790.0123456	123456788.0123456
-123456789.01234567890123456789012345678	123456790.01234567890123456789012345678	123456788.01234567890123456789012345678
-1234567890.123456	1234567891.123456	1234567889.123456
-1234567890.1234567890123456789012345678	1234567891.1234567890123456789012345678	1234567889.1234567890123456789012345678
-12345678901.23456	12345678902.23456	12345678900.23456
-12345678901.234567890123456789012345678	12345678902.234567890123456789012345678	12345678900.234567890123456789012345678
-123456789012.3456	123456789013.3456	123456789011.3456
-123456789012.34567890123456789012345678	123456789013.34567890123456789012345678	123456789011.34567890123456789012345678
-1234567890123.456	1234567890124.456	1234567890122.456
-1234567890123.4567890123456789012345678	1234567890124.4567890123456789012345678	1234567890122.4567890123456789012345678
-12345678901234.56	12345678901235.56	12345678901233.56
-12345678901234.567890123456789012345678	12345678901235.567890123456789012345678	12345678901233.567890123456789012345678
-123456789012345.6	123456789012346.6	123456789012344.6
-123456789012345.67890123456789012345678	123456789012346.67890123456789012345678	123456789012344.67890123456789012345678
-1234567890123456.7890123456789012345678	1234567890123457.7890123456789012345678	1234567890123455.7890123456789012345678
-12345678901234567.890123456789012345678	12345678901234568.890123456789012345678	12345678901234566.890123456789012345678
-123456789012345678.90123456789012345678	123456789012345679.90123456789012345678	123456789012345677.90123456789012345678
-1234567890123456789.0123456789012345678	1234567890123456790.0123456789012345678	1234567890123456788.0123456789012345678
-12345678901234567890.123456789012345678	12345678901234567891.123456789012345678	12345678901234567889.123456789012345678
-123456789012345678901.23456789012345678	123456789012345678902.23456789012345678	123456789012345678900.23456789012345678
-1234567890123456789012.3456789012345678	1234567890123456789013.3456789012345678	1234567890123456789011.3456789012345678
-12345678901234567890123.456789012345678	12345678901234567890124.456789012345678	12345678901234567890122.456789012345678
-123456789012345678901234.56789012345678	123456789012345678901235.56789012345678	123456789012345678901233.56789012345678
-1234567890123456789012345.6789012345678	1234567890123456789012346.6789012345678	1234567890123456789012344.6789012345678
-12345678901234567890123456.789012345678	12345678901234567890123457.789012345678	12345678901234567890123455.789012345678
-123456789012345678901234567.89012345678	123456789012345678901234568.89012345678	123456789012345678901234566.89012345678
-1234567890123456789012345678.9012345678	1234567890123456789012345679.9012345678	1234567890123456789012345677.9012345678
-12345678901234567890123456789.012345678	12345678901234567890123456790.012345678	12345678901234567890123456788.012345678
-123456789012345678901234567890.12345678	123456789012345678901234567891.12345678	123456789012345678901234567889.12345678
-1234567890123456789012345678901.2345678	1234567890123456789012345678902.2345678	1234567890123456789012345678900.2345678
-12345678901234567890123456789012.345678	12345678901234567890123456789013.345678	12345678901234567890123456789011.345678
-123456789012345678901234567890123.45678	123456789012345678901234567890124.45678	123456789012345678901234567890122.45678
-1234567890123456789012345678901234.5678	1234567890123456789012345678901235.5678	1234567890123456789012345678901233.5678
-12345678901234567890123456789012345.678	12345678901234567890123456789012346.678	12345678901234567890123456789012344.678
-99999999999999999999999999999999999	100000000000000000000000000000000000	99999999999999999999999999999999998
-123456789012345678901234567890123456.78	123456789012345678901234567890123457.78	123456789012345678901234567890123455.78
-999999999999999999999999999999999999	1000000000000000000000000000000000000	999999999999999999999999999999999998
-12345678901234567890123456789012345678	12345678901234567890123456789012345679	12345678901234567890123456789012345677
-12345678901234567890123456789012345678	12345678901234567890123456789012345679	12345678901234567890123456789012345677
-12345678901234567890123456789012345678	12345678901234567890123456789012345679	12345678901234567890123456789012345677
-12345678901234567890123456789012345678	12345678901234567890123456789012345679	12345678901234567890123456789012345677
-12345678901234567890123456789012345678	12345678901234567890123456789012345679	12345678901234567890123456789012345677
-99999999999999999999999999999999999999	NULL	99999999999999999999999999999999999998
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-5-bb27734245ecbd0511be91af21c3b9ef b/sql/hive/src/test/resources/golden/decimal_precision-5-bb27734245ecbd0511be91af21c3b9ef
deleted file mode 100644
index 7cc75c789dee1..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-5-bb27734245ecbd0511be91af21c3b9ef
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
--99999999999999999999999999999999999999	NULL	-33333333333333333333333333333333333333
--999999999999999999999999999999999999	-1999999999999999999999999999999999998	-333333333333333333333333333333333333
--99999999999999999999999999999999999	-199999999999999999999999999999999998	-33333333333333333333333333333333333
--0.0000000000000000000000000000000000001	-0.0000000000000000000000000000000000002	0
-0	0	0
-0.0000000000000000000000000000000000001	0.0000000000000000000000000000000000002	0
-0.123456789012345	0.24691357802469	0.041152263004115
-0.1234567890123456789012345678901234578	0.2469135780246913578024691357802469156	0.0411522630041152263004115226300411526
-1.234567890123456	2.469135780246912	0.411522630041152
-1.2345678901234567890123456789012345678	2.4691357802469135780246913578024691356	0.4115226300411522630041152263004115226
-12.34567890123456	24.69135780246912	4.11522630041152
-12.345678901234567890123456789012345678	24.691357802469135780246913578024691356	4.115226300411522630041152263004115226
-123.4567890123456	246.9135780246912	41.1522630041152
-123.45678901234567890123456789012345678	246.91357802469135780246913578024691356	41.15226300411522630041152263004115226
-1234.567890123456	2469.135780246912	411.522630041152
-1234.5678901234567890123456789012345678	2469.1357802469135780246913578024691356	411.5226300411522630041152263004115226
-12345.67890123456	24691.35780246912	4115.22630041152
-12345.678901234567890123456789012345678	24691.357802469135780246913578024691356	4115.226300411522630041152263004115226
-123456.7890123456	246913.5780246912	41152.2630041152
-123456.78901234567890123456789012345678	246913.57802469135780246913578024691356	41152.26300411522630041152263004115226
-1234567.890123456	2469135.780246912	411522.630041152
-1234567.8901234567890123456789012345678	2469135.7802469135780246913578024691356	411522.6300411522630041152263004115226
-12345678.90123456	24691357.80246912	4115226.30041152
-12345678.901234567890123456789012345678	24691357.802469135780246913578024691356	4115226.300411522630041152263004115226
-123456789.0123456	246913578.0246912	41152263.0041152
-123456789.01234567890123456789012345678	246913578.02469135780246913578024691356	41152263.00411522630041152263004115226
-1234567890.123456	2469135780.246912	411522630.041152
-1234567890.1234567890123456789012345678	2469135780.2469135780246913578024691356	411522630.0411522630041152263004115226
-12345678901.23456	24691357802.46912	4115226300.41152
-12345678901.234567890123456789012345678	24691357802.469135780246913578024691356	4115226300.411522630041152263004115226
-123456789012.3456	246913578024.6912	41152263004.1152
-123456789012.34567890123456789012345678	246913578024.69135780246913578024691356	41152263004.11522630041152263004115226
-1234567890123.456	2469135780246.912	411522630041.152
-1234567890123.4567890123456789012345678	2469135780246.9135780246913578024691356	411522630041.1522630041152263004115226
-12345678901234.56	24691357802469.12	4115226300411.52
-12345678901234.567890123456789012345678	24691357802469.135780246913578024691356	4115226300411.522630041152263004115226
-123456789012345.6	246913578024691.2	41152263004115.2
-123456789012345.67890123456789012345678	246913578024691.35780246913578024691356	41152263004115.22630041152263004115226
-1234567890123456.7890123456789012345678	2469135780246913.5780246913578024691356	411522630041152.2630041152263004115226
-12345678901234567.890123456789012345678	24691357802469135.780246913578024691356	4115226300411522.630041152263004115226
-123456789012345678.90123456789012345678	246913578024691357.80246913578024691356	41152263004115226.30041152263004115226
-1234567890123456789.0123456789012345678	2469135780246913578.0246913578024691356	411522630041152263.0041152263004115226
-12345678901234567890.123456789012345678	24691357802469135780.246913578024691356	4115226300411522630.041152263004115226
-123456789012345678901.23456789012345678	246913578024691357802.46913578024691356	41152263004115226300.41152263004115226
-1234567890123456789012.3456789012345678	2469135780246913578024.6913578024691356	411522630041152263004.1152263004115226
-12345678901234567890123.456789012345678	24691357802469135780246.913578024691356	4115226300411522630041.152263004115226
-123456789012345678901234.56789012345678	246913578024691357802469.13578024691356	41152263004115226300411.52263004115226
-1234567890123456789012345.6789012345678	2469135780246913578024691.3578024691356	411522630041152263004115.2263004115226
-12345678901234567890123456.789012345678	24691357802469135780246913.578024691356	4115226300411522630041152.263004115226
-123456789012345678901234567.89012345678	246913578024691357802469135.78024691356	41152263004115226300411522.63004115226
-1234567890123456789012345678.9012345678	2469135780246913578024691357.8024691356	411522630041152263004115226.3004115226
-12345678901234567890123456789.012345678	24691357802469135780246913578.024691356	4115226300411522630041152263.004115226
-123456789012345678901234567890.12345678	246913578024691357802469135780.24691356	41152263004115226300411522630.04115226
-1234567890123456789012345678901.2345678	2469135780246913578024691357802.4691356	411522630041152263004115226300.4115226
-12345678901234567890123456789012.345678	24691357802469135780246913578024.691356	4115226300411522630041152263004.115226
-123456789012345678901234567890123.45678	246913578024691357802469135780246.91356	41152263004115226300411522630041.15226
-1234567890123456789012345678901234.5678	2469135780246913578024691357802469.1356	411522630041152263004115226300411.5226
-12345678901234567890123456789012345.678	24691357802469135780246913578024691.356	4115226300411522630041152263004115.226
-99999999999999999999999999999999999	199999999999999999999999999999999998	33333333333333333333333333333333333
-123456789012345678901234567890123456.78	246913578024691357802469135780246913.56	41152263004115226300411522630041152.26
-999999999999999999999999999999999999	1999999999999999999999999999999999998	333333333333333333333333333333333333
-12345678901234567890123456789012345678	24691357802469135780246913578024691356	4115226300411522630041152263004115226
-12345678901234567890123456789012345678	24691357802469135780246913578024691356	4115226300411522630041152263004115226
-12345678901234567890123456789012345678	24691357802469135780246913578024691356	4115226300411522630041152263004115226
-12345678901234567890123456789012345678	24691357802469135780246913578024691356	4115226300411522630041152263004115226
-12345678901234567890123456789012345678	24691357802469135780246913578024691356	4115226300411522630041152263004115226
-99999999999999999999999999999999999999	NULL	33333333333333333333333333333333333333
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-6-b2547e6ef33325b2da12ce91b57af21 b/sql/hive/src/test/resources/golden/decimal_precision-6-b2547e6ef33325b2da12ce91b57af21
deleted file mode 100644
index c40875630d1b2..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-6-b2547e6ef33325b2da12ce91b57af21
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
--99999999999999999999999999999999999999	-11111111111111111111111111111111111111
--999999999999999999999999999999999999	-111111111111111111111111111111111111
--99999999999999999999999999999999999	-11111111111111111111111111111111111
--0.0000000000000000000000000000000000001	0
-0	0
-0.0000000000000000000000000000000000001	0
-0.123456789012345	0.0137174210013716666666666666666666667
-0.1234567890123456789012345678901234578	0.0137174210013717421001371742100137175
-1.234567890123456	0.1371742100137173333333333333333333333
-1.2345678901234567890123456789012345678	0.1371742100137174210013717421001371742
-12.34567890123456	1.3717421001371733333333333333333333333
-12.345678901234567890123456789012345678	1.371742100137174210013717421001371742
-123.4567890123456	13.717421001371733333333333333333333333
-123.45678901234567890123456789012345678	13.71742100137174210013717421001371742
-1234.567890123456	137.17421001371733333333333333333333333
-1234.5678901234567890123456789012345678	137.1742100137174210013717421001371742
-12345.67890123456	1371.7421001371733333333333333333333333
-12345.678901234567890123456789012345678	1371.742100137174210013717421001371742
-123456.7890123456	13717.421001371733333333333333333333333
-123456.78901234567890123456789012345678	13717.42100137174210013717421001371742
-1234567.890123456	137174.21001371733333333333333333333333
-1234567.8901234567890123456789012345678	137174.2100137174210013717421001371742
-12345678.90123456	1371742.1001371733333333333333333333333
-12345678.901234567890123456789012345678	1371742.100137174210013717421001371742
-123456789.0123456	13717421.001371733333333333333333333333
-123456789.01234567890123456789012345678	13717421.00137174210013717421001371742
-1234567890.123456	137174210.01371733333333333333333333333
-1234567890.1234567890123456789012345678	137174210.0137174210013717421001371742
-12345678901.23456	1371742100.1371733333333333333333333333
-12345678901.234567890123456789012345678	1371742100.137174210013717421001371742
-123456789012.3456	13717421001.371733333333333333333333333
-123456789012.34567890123456789012345678	13717421001.37174210013717421001371742
-1234567890123.456	137174210013.71733333333333333333333333
-1234567890123.4567890123456789012345678	137174210013.7174210013717421001371742
-12345678901234.56	1371742100137.1733333333333333333333333
-12345678901234.567890123456789012345678	1371742100137.174210013717421001371742
-123456789012345.6	13717421001371.733333333333333333333333
-123456789012345.67890123456789012345678	13717421001371.74210013717421001371742
-1234567890123456.7890123456789012345678	137174210013717.4210013717421001371742
-12345678901234567.890123456789012345678	1371742100137174.210013717421001371742
-123456789012345678.90123456789012345678	13717421001371742.10013717421001371742
-1234567890123456789.0123456789012345678	137174210013717421.0013717421001371742
-12345678901234567890.123456789012345678	1371742100137174210.013717421001371742
-123456789012345678901.23456789012345678	13717421001371742100.13717421001371742
-1234567890123456789012.3456789012345678	137174210013717421001.3717421001371742
-12345678901234567890123.456789012345678	1371742100137174210013.717421001371742
-123456789012345678901234.56789012345678	13717421001371742100137.17421001371742
-1234567890123456789012345.6789012345678	137174210013717421001371.7421001371742
-12345678901234567890123456.789012345678	1371742100137174210013717.421001371742
-123456789012345678901234567.89012345678	13717421001371742100137174.21001371742
-1234567890123456789012345678.9012345678	137174210013717421001371742.1001371742
-12345678901234567890123456789.012345678	1371742100137174210013717421.001371742
-123456789012345678901234567890.12345678	13717421001371742100137174210.01371742
-1234567890123456789012345678901.2345678	137174210013717421001371742100.1371742
-12345678901234567890123456789012.345678	1371742100137174210013717421001.371742
-123456789012345678901234567890123.45678	13717421001371742100137174210013.71742
-1234567890123456789012345678901234.5678	137174210013717421001371742100137.1742
-12345678901234567890123456789012345.678	1371742100137174210013717421001371.742
-99999999999999999999999999999999999	11111111111111111111111111111111111
-123456789012345678901234567890123456.78	13717421001371742100137174210013717.42
-999999999999999999999999999999999999	111111111111111111111111111111111111
-12345678901234567890123456789012345678	1371742100137174210013717421001371742
-12345678901234567890123456789012345678	1371742100137174210013717421001371742
-12345678901234567890123456789012345678	1371742100137174210013717421001371742
-12345678901234567890123456789012345678	1371742100137174210013717421001371742
-12345678901234567890123456789012345678	1371742100137174210013717421001371742
-99999999999999999999999999999999999999	11111111111111111111111111111111111111
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-7-ee5b7767c7fbb8614bb4ef907e8737b7 b/sql/hive/src/test/resources/golden/decimal_precision-7-ee5b7767c7fbb8614bb4ef907e8737b7
deleted file mode 100644
index bd23d17293f79..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-7-ee5b7767c7fbb8614bb4ef907e8737b7
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
--99999999999999999999999999999999999999	-3703703703703703703703703703703703703.7
--999999999999999999999999999999999999	-37037037037037037037037037037037037
--99999999999999999999999999999999999	-3703703703703703703703703703703703.6667
--0.0000000000000000000000000000000000001	0
-0	0
-0.0000000000000000000000000000000000001	0
-0.123456789012345	0.0045724736671238888888888888888888889
-0.1234567890123456789012345678901234578	0.0045724736671239140333790580700045725
-1.234567890123456	0.0457247366712391111111111111111111111
-1.2345678901234567890123456789012345678	0.0457247366712391403337905807000457247
-12.34567890123456	0.4572473667123911111111111111111111111
-12.345678901234567890123456789012345678	0.4572473667123914033379058070004572473
-123.4567890123456	4.5724736671239111111111111111111111111
-123.45678901234567890123456789012345678	4.5724736671239140333790580700045724733
-1234.567890123456	45.724736671239111111111111111111111111
-1234.5678901234567890123456789012345678	45.724736671239140333790580700045724733
-12345.67890123456	457.24736671239111111111111111111111111
-12345.678901234567890123456789012345678	457.24736671239140333790580700045724733
-123456.7890123456	4572.4736671239111111111111111111111111
-123456.78901234567890123456789012345678	4572.4736671239140333790580700045724733
-1234567.890123456	45724.736671239111111111111111111111111
-1234567.8901234567890123456789012345678	45724.736671239140333790580700045724733
-12345678.90123456	457247.36671239111111111111111111111111
-12345678.901234567890123456789012345678	457247.36671239140333790580700045724733
-123456789.0123456	4572473.6671239111111111111111111111111
-123456789.01234567890123456789012345678	4572473.6671239140333790580700045724733
-1234567890.123456	45724736.671239111111111111111111111111
-1234567890.1234567890123456789012345678	45724736.671239140333790580700045724733
-12345678901.23456	457247366.71239111111111111111111111111
-12345678901.234567890123456789012345678	457247366.71239140333790580700045724733
-123456789012.3456	4572473667.1239111111111111111111111111
-123456789012.34567890123456789012345678	4572473667.1239140333790580700045724733
-1234567890123.456	45724736671.239111111111111111111111111
-1234567890123.4567890123456789012345678	45724736671.239140333790580700045724733
-12345678901234.56	457247366712.39111111111111111111111111
-12345678901234.567890123456789012345678	457247366712.39140333790580700045724733
-123456789012345.6	4572473667123.9111111111111111111111111
-123456789012345.67890123456789012345678	4572473667123.9140333790580700045724733
-1234567890123456.7890123456789012345678	45724736671239.140333790580700045724733
-12345678901234567.890123456789012345678	457247366712391.40333790580700045724733
-123456789012345678.90123456789012345678	4572473667123914.0333790580700045724733
-1234567890123456789.0123456789012345678	45724736671239140.333790580700045724733
-12345678901234567890.123456789012345678	457247366712391403.33790580700045724733
-123456789012345678901.23456789012345678	4572473667123914033.3790580700045724733
-1234567890123456789012.3456789012345678	45724736671239140333.790580700045724733
-12345678901234567890123.456789012345678	457247366712391403337.90580700045724733
-123456789012345678901234.56789012345678	4572473667123914033379.0580700045724733
-1234567890123456789012345.6789012345678	45724736671239140333790.580700045724733
-12345678901234567890123456.789012345678	457247366712391403337905.80700045724733
-123456789012345678901234567.89012345678	4572473667123914033379058.0700045724733
-1234567890123456789012345678.9012345678	45724736671239140333790580.700045724733
-12345678901234567890123456789.012345678	457247366712391403337905807.00045724733
-123456789012345678901234567890.12345678	4572473667123914033379058070.0045724733
-1234567890123456789012345678901.2345678	45724736671239140333790580700.045724733
-12345678901234567890123456789012.345678	457247366712391403337905807000.45724733
-123456789012345678901234567890123.45678	4572473667123914033379058070004.5724733
-1234567890123456789012345678901234.5678	45724736671239140333790580700045.724733
-12345678901234567890123456789012345.678	457247366712391403337905807000457.24733
-99999999999999999999999999999999999	3703703703703703703703703703703703.6667
-123456789012345678901234567890123456.78	4572473667123914033379058070004572.4733
-999999999999999999999999999999999999	37037037037037037037037037037037037
-12345678901234567890123456789012345678	457247366712391403337905807000457247.33
-12345678901234567890123456789012345678	457247366712391403337905807000457247.33
-12345678901234567890123456789012345678	457247366712391403337905807000457247.33
-12345678901234567890123456789012345678	457247366712391403337905807000457247.33
-12345678901234567890123456789012345678	457247366712391403337905807000457247.33
-99999999999999999999999999999999999999	3703703703703703703703703703703703703.7
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-8-6e6bd4655de124dc1fc482ce0d11930e b/sql/hive/src/test/resources/golden/decimal_precision-8-6e6bd4655de124dc1fc482ce0d11930e
deleted file mode 100644
index c1e0db0174c63..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-8-6e6bd4655de124dc1fc482ce0d11930e
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
--99999999999999999999999999999999999999	NULL
--999999999999999999999999999999999999	NULL
--99999999999999999999999999999999999	NULL
--0.0000000000000000000000000000000000001	NULL
-0	0
-0.0000000000000000000000000000000000001	NULL
-0.123456789012345	0.015241578753238669120562399025
-0.1234567890123456789012345678901234578	NULL
-1.234567890123456	1.524157875323881726870921383936
-1.2345678901234567890123456789012345678	NULL
-12.34567890123456	152.4157875323881726870921383936
-12.345678901234567890123456789012345678	NULL
-123.4567890123456	15241.57875323881726870921383936
-123.45678901234567890123456789012345678	NULL
-1234.567890123456	1524157.875323881726870921383936
-1234.5678901234567890123456789012345678	NULL
-12345.67890123456	152415787.5323881726870921383936
-12345.678901234567890123456789012345678	NULL
-123456.7890123456	15241578753.23881726870921383936
-123456.78901234567890123456789012345678	NULL
-1234567.890123456	1524157875323.881726870921383936
-1234567.8901234567890123456789012345678	NULL
-12345678.90123456	152415787532388.1726870921383936
-12345678.901234567890123456789012345678	NULL
-123456789.0123456	15241578753238817.26870921383936
-123456789.01234567890123456789012345678	NULL
-1234567890.123456	1524157875323881726.870921383936
-1234567890.1234567890123456789012345678	NULL
-12345678901.23456	152415787532388172687.0921383936
-12345678901.234567890123456789012345678	NULL
-123456789012.3456	15241578753238817268709.21383936
-123456789012.34567890123456789012345678	NULL
-1234567890123.456	1524157875323881726870921.383936
-1234567890123.4567890123456789012345678	NULL
-12345678901234.56	152415787532388172687092138.3936
-12345678901234.567890123456789012345678	NULL
-123456789012345.6	15241578753238817268709213839.36
-123456789012345.67890123456789012345678	NULL
-1234567890123456.7890123456789012345678	NULL
-12345678901234567.890123456789012345678	NULL
-123456789012345678.90123456789012345678	NULL
-1234567890123456789.0123456789012345678	NULL
-12345678901234567890.123456789012345678	NULL
-123456789012345678901.23456789012345678	NULL
-1234567890123456789012.3456789012345678	NULL
-12345678901234567890123.456789012345678	NULL
-123456789012345678901234.56789012345678	NULL
-1234567890123456789012345.6789012345678	NULL
-12345678901234567890123456.789012345678	NULL
-123456789012345678901234567.89012345678	NULL
-1234567890123456789012345678.9012345678	NULL
-12345678901234567890123456789.012345678	NULL
-123456789012345678901234567890.12345678	NULL
-1234567890123456789012345678901.2345678	NULL
-12345678901234567890123456789012.345678	NULL
-123456789012345678901234567890123.45678	NULL
-1234567890123456789012345678901234.5678	NULL
-12345678901234567890123456789012345.678	NULL
-99999999999999999999999999999999999	NULL
-123456789012345678901234567890123456.78	NULL
-999999999999999999999999999999999999	NULL
-12345678901234567890123456789012345678	NULL
-12345678901234567890123456789012345678	NULL
-12345678901234567890123456789012345678	NULL
-12345678901234567890123456789012345678	NULL
-12345678901234567890123456789012345678	NULL
-99999999999999999999999999999999999999	NULL
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-9-e7b465fbeb49487b2a972a314e2c01ab b/sql/hive/src/test/resources/golden/decimal_precision-9-e7b465fbeb49487b2a972a314e2c01ab
deleted file mode 100644
index 81af0e4cd3ab8..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-9-e7b465fbeb49487b2a972a314e2c01ab
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/default_partition_name-1-9de8e5f66c536d4ace89c61759db829c b/sql/hive/src/test/resources/golden/default_partition_name-1-9de8e5f66c536d4ace89c61759db829c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/default_partition_name-1-9de8e5f66c536d4ace89c61759db829c
+++ b/sql/hive/src/test/resources/golden/default_partition_name-1-9de8e5f66c536d4ace89c61759db829c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/default_partition_name-3-a7047012b4bce0158edaafe5cf0a57be b/sql/hive/src/test/resources/golden/default_partition_name-3-a7047012b4bce0158edaafe5cf0a57be
index ded23df148827..3aa0e30600f3c 100644
--- a/sql/hive/src/test/resources/golden/default_partition_name-3-a7047012b4bce0158edaafe5cf0a57be
+++ b/sql/hive/src/test/resources/golden/default_partition_name-3-a7047012b4bce0158edaafe5cf0a57be
@@ -1 +1 @@
-ds=__HIVE_DEFAULT_PARTITION__
\ No newline at end of file
+ds=__HIVE_DEFAULT_PARTITION__
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-0-2bba07855af8c11899cc6b89f8c0ee02 b/sql/hive/src/test/resources/golden/delimiter-1-121ff21e6931a82235de8301118cbed8
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-0-2bba07855af8c11899cc6b89f8c0ee02
rename to sql/hive/src/test/resources/golden/delimiter-1-121ff21e6931a82235de8301118cbed8
diff --git a/sql/hive/src/test/resources/golden/delimiter-2-d7137294d2e53ea6edc259943e4c6069 b/sql/hive/src/test/resources/golden/delimiter-2-d7137294d2e53ea6edc259943e4c6069
index f438072c76b5f..424a2fee06987 100644
--- a/sql/hive/src/test/resources/golden/delimiter-2-d7137294d2e53ea6edc259943e4c6069
+++ b/sql/hive/src/test/resources/golden/delimiter-2-d7137294d2e53ea6edc259943e4c6069
@@ -1,3 +1,3 @@
 35	40
 48	32
-100100	40
\ No newline at end of file
+100100	40
diff --git a/sql/hive/src/test/resources/golden/delimiter-3-176724f76343433a8f2e6131b12206d7 b/sql/hive/src/test/resources/golden/delimiter-3-176724f76343433a8f2e6131b12206d7
index f438072c76b5f..424a2fee06987 100644
--- a/sql/hive/src/test/resources/golden/delimiter-3-176724f76343433a8f2e6131b12206d7
+++ b/sql/hive/src/test/resources/golden/delimiter-3-176724f76343433a8f2e6131b12206d7
@@ -1,3 +1,3 @@
 35	40
 48	32
-100100	40
\ No newline at end of file
+100100	40
diff --git a/sql/hive/src/test/resources/golden/describe_comment_indent-1-5536eb772d43014b971c6da3a0c44904 b/sql/hive/src/test/resources/golden/describe_comment_indent-1-5536eb772d43014b971c6da3a0c44904
deleted file mode 100644
index d980efc81b947..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_comment_indent-1-5536eb772d43014b971c6da3a0c44904
+++ /dev/null
@@ -1,6 +0,0 @@
-col1                	int                 	col1 one line comment
-col2                	string              	col2                
-                    	                    	two lines comment
-col3                	string              	col3                
-                    	                    	three lines
-                    	                    	comment
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_comment_indent-2-91bffa09f4f2caaaa6864bf935c2ea94 b/sql/hive/src/test/resources/golden/describe_comment_indent-2-91bffa09f4f2caaaa6864bf935c2ea94
deleted file mode 100644
index 01b9151074b22..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_comment_indent-2-91bffa09f4f2caaaa6864bf935c2ea94
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-col1                	int                 	col1 one line comment
-col2                	string              	col2                
-                    	                    	two lines comment
-col3                	string              	col3                
-                    	                    	three lines
-                    	                    	comment
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 11:42:35 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/test_table	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	comment             	table comment\ntwo lines
-	transient_lastDdlTime	1389728555          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b b/sql/hive/src/test/resources/golden/describe_database_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-11-b05391400dc31139998dc3abaaf86320 b/sql/hive/src/test/resources/golden/describe_database_json-11-b05391400dc31139998dc3abaaf86320
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-11-b05391400dc31139998dc3abaaf86320
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-2-8e7cfe3e6069e796124ca940125385a b/sql/hive/src/test/resources/golden/describe_database_json-2-8e7cfe3e6069e796124ca940125385a
deleted file mode 100644
index c56a79e4f322e..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-2-8e7cfe3e6069e796124ca940125385a
+++ /dev/null
@@ -1 +0,0 @@
-{"location":"file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/jsondb1","comment":"Test database","database":"jsondb1"}	 	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-3-d097973152d91fa8072facb0f739e304 b/sql/hive/src/test/resources/golden/describe_database_json-3-d097973152d91fa8072facb0f739e304
deleted file mode 100644
index aa08c38c68d1d..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-3-d097973152d91fa8072facb0f739e304
+++ /dev/null
@@ -1 +0,0 @@
-{"location":"file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/jsondb1","params":{"id":"jsondb1"},"comment":"Test database","database":"jsondb1"}	 	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-4-549981e00a3d95f03dd5a9ef6044aa20 b/sql/hive/src/test/resources/golden/describe_database_json-4-549981e00a3d95f03dd5a9ef6044aa20
deleted file mode 100644
index 513aeaab1dc66..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-4-549981e00a3d95f03dd5a9ef6044aa20
+++ /dev/null
@@ -1 +0,0 @@
-{"databases":["default","jsondb1"]}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-5-a3ee372283f45479db3f2cd7ebeedc8c b/sql/hive/src/test/resources/golden/describe_database_json-5-a3ee372283f45479db3f2cd7ebeedc8c
deleted file mode 100644
index 606069d6291b4..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-5-a3ee372283f45479db3f2cd7ebeedc8c
+++ /dev/null
@@ -1 +0,0 @@
-{"databases":["jsondb1"]}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-8-8e7cfe3e6069e796124ca940125385a b/sql/hive/src/test/resources/golden/describe_database_json-8-8e7cfe3e6069e796124ca940125385a
deleted file mode 100644
index bea7c01440c46..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-8-8e7cfe3e6069e796124ca940125385a
+++ /dev/null
@@ -1 +0,0 @@
-{"location":"file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/jsondb1.db","database":"jsondb1"}	 	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-9-d097973152d91fa8072facb0f739e304 b/sql/hive/src/test/resources/golden/describe_database_json-9-d097973152d91fa8072facb0f739e304
deleted file mode 100644
index bea7c01440c46..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-9-d097973152d91fa8072facb0f739e304
+++ /dev/null
@@ -1 +0,0 @@
-{"location":"file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/jsondb1.db","database":"jsondb1"}	 	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned-3-b80c7ae3530bfdbc8e865d49742da826 b/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned-3-b80c7ae3530bfdbc8e865d49742da826
index 381821184d693..1ed0de6860c08 100644
--- a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned-3-b80c7ae3530bfdbc8e865d49742da826
+++ b/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned-3-b80c7ae3530bfdbc8e865d49742da826
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	int                 	None                
+key                 	int                 	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-value               	string              	None                
+value               	string              	                    
 	 	 
 # Detailed Partition Information	 	 
 Partition Value:    	[val_86]            	 
 Database:           	default             	 
 Table:              	view_partitioned    	 
-CreateTime:         	Fri Feb 07 15:09:16 PST 2014	 
+CreateTime:         	Tue Oct 21 01:26:15 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Location:           	null                	 
 Partition Parameters:	 	 
-	transient_lastDdlTime	1391814556          
+	transient_lastDdlTime	1413879975          
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-4-b80c7ae3530bfdbc8e865d49742da826 b/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-4-b80c7ae3530bfdbc8e865d49742da826
deleted file mode 100644
index 7b51873776ad8..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-4-b80c7ae3530bfdbc8e865d49742da826
+++ /dev/null
@@ -1 +0,0 @@
-{"columns":[{"name":"key","type":"int"}]}	 	 
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-1-dbfaa12f26f99277b8397379189172cf b/sql/hive/src/test/resources/golden/describe_pretty-1-dbfaa12f26f99277b8397379189172cf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-1-dbfaa12f26f99277b8397379189172cf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-10-dbfaa12f26f99277b8397379189172cf b/sql/hive/src/test/resources/golden/describe_pretty-10-dbfaa12f26f99277b8397379189172cf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-10-dbfaa12f26f99277b8397379189172cf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-11-e98c50f4b5cdb6ba04df147c57e0b264 b/sql/hive/src/test/resources/golden/describe_pretty-11-e98c50f4b5cdb6ba04df147c57e0b264
deleted file mode 100644
index 35fac1b6f2579..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-11-e98c50f4b5cdb6ba04df147c57e0b264
+++ /dev/null
@@ -1,12 +0,0 @@
-col_name                          data_type     comment	 	 
-	 	 
-col1                              int           col1 one line comment	 	 
-col2_abcdefghiklmnopqrstuvxyz     string        col2	 	 
-                                                two lines comment	 	 
-col3                              string        col3	 	 
-                                                three lines	 	 
-                                                comment	 	 
-col4                              string        col4 very long comment that is	 	 
-                                                greater than 80 chars and is	 	 
-                                                likely to spill into multiple	 	 
-                                                lines	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-12-7d1e5e69d235a785fa3f0e099dd894c3 b/sql/hive/src/test/resources/golden/describe_pretty-12-7d1e5e69d235a785fa3f0e099dd894c3
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-12-7d1e5e69d235a785fa3f0e099dd894c3
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-13-e98c50f4b5cdb6ba04df147c57e0b264 b/sql/hive/src/test/resources/golden/describe_pretty-13-e98c50f4b5cdb6ba04df147c57e0b264
deleted file mode 100644
index b57f8955ca397..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-13-e98c50f4b5cdb6ba04df147c57e0b264
+++ /dev/null
@@ -1,24 +0,0 @@
-col_name                          data_type     comment	 	 
-	 	 
-col1                              int           col1 one	 	 
-                                                line	 	 
-                                                comment	 	 
-col2_abcdefghiklmnopqrstuvxyz     string        col2	 	 
-                                                two lines	 	 
-                                                comment	 	 
-col3                              string        col3	 	 
-                                                three	 	 
-                                                lines	 	 
-                                                comment	 	 
-col4                              string        col4 very	 	 
-                                                long	 	 
-                                                comment	 	 
-                                                that is	 	 
-                                                greater	 	 
-                                                than 80	 	 
-                                                chars and	 	 
-                                                is likely	 	 
-                                                to spill	 	 
-                                                into	 	 
-                                                multiple	 	 
-                                                lines	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-15-db4abe06aba81e685c52db6b43ba7c03 b/sql/hive/src/test/resources/golden/describe_pretty-15-db4abe06aba81e685c52db6b43ba7c03
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-15-db4abe06aba81e685c52db6b43ba7c03
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-16-ada55b65b72e0d65563ad6161e005f22 b/sql/hive/src/test/resources/golden/describe_pretty-16-ada55b65b72e0d65563ad6161e005f22
deleted file mode 100644
index 3b7fe3c133089..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-16-ada55b65b72e0d65563ad6161e005f22
+++ /dev/null
@@ -1,37 +0,0 @@
-col_name           data_type     comment	 	 
-	 	 
-col1               int           col1 one line comment	 	 
-col2               string        col2	 	 
-                                 two lines comment	 	 
-col3               string        col3	 	 
-                                 three lines	 	 
-                                 comment	 	 
-col4               string        col4 very long comment that	 	 
-                                 is greater than 80 chars	 	 
-                                 and is likely to spill into	 	 
-                                 multiple lines	 	 
-col5               string        col5 very long multi-line	 	 
-                                 comment where each line is	 	 
-                                 very long by itself and is	 	 
-                                 likely to spill	 	 
-                                 into multiple lines.  Lorem	 	 
-                                 ipsum dolor sit amet,	 	 
-                                 consectetur adipiscing	 	 
-                                 elit. Proin in dolor nisl,	 	 
-                                 sodales	 	 
-                                 adipiscing tortor. Integer	 	 
-                                 venenatis	 	 
-col6               string        This comment has a very	 	 
-                                 long single word ABCDEFGHIJ	 	 
-                                 KLMNOPQRSTUVXYZabcdefghijkl	 	 
-                                 mnopqrstuvzxyz123 which	 	 
-                                 will not fit in a line by	 	 
-                                 itself for small column	 	 
-                                 widths.	 	 
-col7_nocomment     string        None	 	 
-ds                 string        None	 	 
-	 	 
-# Partition Information	 	 
-col_name           data_type     comment	 	 
-	 	 
-ds                 string        None	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-2-713712e0f6f18144d1f3a522e9b98861 b/sql/hive/src/test/resources/golden/describe_pretty-2-713712e0f6f18144d1f3a522e9b98861
deleted file mode 100644
index 49175da27357f..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-2-713712e0f6f18144d1f3a522e9b98861
+++ /dev/null
@@ -1,23 +0,0 @@
-col_name           data_type     comment	 	 
-	 	 
-col1               int           col1 one line comment	 	 
-col2               string        col2	 	 
-                                 two lines comment	 	 
-col3               string        col3	 	 
-                                 three lines	 	 
-                                 comment	 	 
-col4               string        col4 very long comment that is greater than 80	 	 
-                                 chars and is likely to spill into multiple	 	 
-                                 lines	 	 
-col5               string        col5 very long multi-line comment where each	 	 
-                                 line is very long by itself and is likely to	 	 
-                                 spill	 	 
-                                 into multiple lines.  Lorem ipsum dolor sit	 	 
-                                 amet, consectetur adipiscing elit. Proin in	 	 
-                                 dolor nisl, sodales	 	 
-                                 adipiscing tortor. Integer venenatis	 	 
-col6               string        This comment has a very long single word ABCDEF	 	 
-                                 GHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvzxyz12	 	 
-                                 3 which will not fit in a line by itself for	 	 
-                                 small column widths.	 	 
-col7_nocomment     string        None	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-3-87c1f2148ecdc0d77eecb72b0268d4b4 b/sql/hive/src/test/resources/golden/describe_pretty-3-87c1f2148ecdc0d77eecb72b0268d4b4
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-3-87c1f2148ecdc0d77eecb72b0268d4b4
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-4-1546db18568697fa68a7fc781802d255 b/sql/hive/src/test/resources/golden/describe_pretty-4-1546db18568697fa68a7fc781802d255
deleted file mode 100644
index c3e77e079a9d4..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-4-1546db18568697fa68a7fc781802d255
+++ /dev/null
@@ -1,14 +0,0 @@
-col_name           data_type     comment	 	 
-	 	 
-col1               int           col1 one line comment	 	 
-col2               string        col2	 	 
-                                 two lines comment	 	 
-col3               string        col3	 	 
-                                 three lines	 	 
-                                 comment	 	 
-col4               string        col4 very long comment that is greater than 80 chars and is likely to spill into multiple lines	 	 
-col5               string        col5 very long multi-line comment where each line is very long by itself and is likely to spill	 	 
-                                 into multiple lines.  Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin in dolor nisl, sodales	 	 
-                                 adipiscing tortor. Integer venenatis	 	 
-col6               string        This comment has a very long single word ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvzxyz123 which will not fit in a line by itself for small column widths.	 	 
-col7_nocomment     string        None	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-5-ce1966d8626096821b404ab8745c4914 b/sql/hive/src/test/resources/golden/describe_pretty-5-ce1966d8626096821b404ab8745c4914
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-5-ce1966d8626096821b404ab8745c4914
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-6-1546db18568697fa68a7fc781802d255 b/sql/hive/src/test/resources/golden/describe_pretty-6-1546db18568697fa68a7fc781802d255
deleted file mode 100644
index 452f75a11fdd0..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-6-1546db18568697fa68a7fc781802d255
+++ /dev/null
@@ -1,44 +0,0 @@
-col_name           data_type     comment	 	 
-	 	 
-col1               int           col1 one line	 	 
-                                 comment	 	 
-col2               string        col2	 	 
-                                 two lines comment	 	 
-col3               string        col3	 	 
-                                 three lines	 	 
-                                 comment	 	 
-col4               string        col4 very long	 	 
-                                 comment that is	 	 
-                                 greater than 80	 	 
-                                 chars and is	 	 
-                                 likely to spill	 	 
-                                 into multiple	 	 
-                                 lines	 	 
-col5               string        col5 very long	 	 
-                                 multi-line	 	 
-                                 comment where	 	 
-                                 each line is very	 	 
-                                 long by itself	 	 
-                                 and is likely to	 	 
-                                 spill	 	 
-                                 into multiple	 	 
-                                 lines.  Lorem	 	 
-                                 ipsum dolor sit	 	 
-                                 amet, consectetur	 	 
-                                 adipiscing elit.	 	 
-                                 Proin in dolor	 	 
-                                 nisl, sodales	 	 
-                                 adipiscing	 	 
-                                 tortor. Integer	 	 
-                                 venenatis	 	 
-col6               string        This comment has	 	 
-                                 a very long	 	 
-                                 single word ABCDE	 	 
-                                 FGHIJKLMNOPQRSTUV	 	 
-                                 XYZabcdefghijklmn	 	 
-                                 opqrstuvzxyz123	 	 
-                                 which will not	 	 
-                                 fit in a line by	 	 
-                                 itself for small	 	 
-                                 column widths.	 	 
-col7_nocomment     string        None	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-7-db4abe06aba81e685c52db6b43ba7c03 b/sql/hive/src/test/resources/golden/describe_pretty-7-db4abe06aba81e685c52db6b43ba7c03
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-7-db4abe06aba81e685c52db6b43ba7c03
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-8-1546db18568697fa68a7fc781802d255 b/sql/hive/src/test/resources/golden/describe_pretty-8-1546db18568697fa68a7fc781802d255
deleted file mode 100644
index ee5a10c85057a..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-8-1546db18568697fa68a7fc781802d255
+++ /dev/null
@@ -1,31 +0,0 @@
-col_name           data_type     comment	 	 
-	 	 
-col1               int           col1 one line comment	 	 
-col2               string        col2	 	 
-                                 two lines comment	 	 
-col3               string        col3	 	 
-                                 three lines	 	 
-                                 comment	 	 
-col4               string        col4 very long comment that	 	 
-                                 is greater than 80 chars	 	 
-                                 and is likely to spill into	 	 
-                                 multiple lines	 	 
-col5               string        col5 very long multi-line	 	 
-                                 comment where each line is	 	 
-                                 very long by itself and is	 	 
-                                 likely to spill	 	 
-                                 into multiple lines.  Lorem	 	 
-                                 ipsum dolor sit amet,	 	 
-                                 consectetur adipiscing	 	 
-                                 elit. Proin in dolor nisl,	 	 
-                                 sodales	 	 
-                                 adipiscing tortor. Integer	 	 
-                                 venenatis	 	 
-col6               string        This comment has a very	 	 
-                                 long single word ABCDEFGHIJ	 	 
-                                 KLMNOPQRSTUVXYZabcdefghijkl	 	 
-                                 mnopqrstuvzxyz123 which	 	 
-                                 will not fit in a line by	 	 
-                                 itself for small column	 	 
-                                 widths.	 	 
-col7_nocomment     string        None	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-10-2d15bc50701f8f3c64ec48bd03a23ac5 b/sql/hive/src/test/resources/golden/describe_syntax-10-2d15bc50701f8f3c64ec48bd03a23ac5
deleted file mode 100644
index 4184ce21dc079..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-10-2d15bc50701f8f3c64ec48bd03a23ac5
+++ /dev/null
@@ -1,12 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:t1, dbName:db1, owner:marmbrus, createTime:1389728588, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key1, type:int, comment:null), FieldSchema(name:value1, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728588}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-11-ab161e38c5d66b6c344c8372160ac74f b/sql/hive/src/test/resources/golden/describe_syntax-11-ab161e38c5d66b6c344c8372160ac74f
deleted file mode 100644
index c94d6dcb90042..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-11-ab161e38c5d66b6c344c8372160ac74f
+++ /dev/null
@@ -1,33 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	None                
-value1              	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	db1                 	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 11:43:08 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1389728588          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-12-90c7890e1aa28e94520f35f5679560a4 b/sql/hive/src/test/resources/golden/describe_syntax-12-90c7890e1aa28e94520f35f5679560a4
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-12-90c7890e1aa28e94520f35f5679560a4
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-13-7c1216f9852d454bf93256e5a2588758 b/sql/hive/src/test/resources/golden/describe_syntax-13-7c1216f9852d454bf93256e5a2588758
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-13-7c1216f9852d454bf93256e5a2588758
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-14-d167187f54ec60c25f5f7e1f2f2afee9 b/sql/hive/src/test/resources/golden/describe_syntax-14-d167187f54ec60c25f5f7e1f2f2afee9
deleted file mode 100644
index f3d242157dd98..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-14-d167187f54ec60c25f5f7e1f2f2afee9
+++ /dev/null
@@ -1,3 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-15-e420b255509153b3326f00dcd25d11e4 b/sql/hive/src/test/resources/golden/describe_syntax-15-e420b255509153b3326f00dcd25d11e4
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-15-e420b255509153b3326f00dcd25d11e4
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-16-5043ee273a313d28adeca74fd33739a7 b/sql/hive/src/test/resources/golden/describe_syntax-16-5043ee273a313d28adeca74fd33739a7
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-16-5043ee273a313d28adeca74fd33739a7
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-17-c97a9e691cc08199678ead7f79d58b58 b/sql/hive/src/test/resources/golden/describe_syntax-17-c97a9e691cc08199678ead7f79d58b58
deleted file mode 100644
index f3d242157dd98..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-17-c97a9e691cc08199678ead7f79d58b58
+++ /dev/null
@@ -1,3 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-18-69cecdc1889b7feb40b46c95524aaad4 b/sql/hive/src/test/resources/golden/describe_syntax-18-69cecdc1889b7feb40b46c95524aaad4
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-18-69cecdc1889b7feb40b46c95524aaad4
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-19-c1c3359705e256d7641bbffab00c43fa b/sql/hive/src/test/resources/golden/describe_syntax-19-c1c3359705e256d7641bbffab00c43fa
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-19-c1c3359705e256d7641bbffab00c43fa
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-20-781dd449a9b26dc54f3bba8e5178cd8a b/sql/hive/src/test/resources/golden/describe_syntax-20-781dd449a9b26dc54f3bba8e5178cd8a
deleted file mode 100644
index f3d242157dd98..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-20-781dd449a9b26dc54f3bba8e5178cd8a
+++ /dev/null
@@ -1,3 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-21-5bac87eeb7e71928d01275b006720de3 b/sql/hive/src/test/resources/golden/describe_syntax-21-5bac87eeb7e71928d01275b006720de3
deleted file mode 100644
index 4f76eaca6cd8b..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-21-5bac87eeb7e71928d01275b006720de3
+++ /dev/null
@@ -1,10 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-22-719a15ffd0018bb2898f9045be886e0f b/sql/hive/src/test/resources/golden/describe_syntax-22-719a15ffd0018bb2898f9045be886e0f
deleted file mode 100644
index aa25ca5a29dd3..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-22-719a15ffd0018bb2898f9045be886e0f
+++ /dev/null
@@ -1,12 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-Detailed Partition Information	Partition(values:[4, 5], dbName:db1, tableName:t1, createTime:1389728588, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key1, type:int, comment:null), FieldSchema(name:value1, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1/ds=4/part=5, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1389728588})	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-23-2b2f2f068fe8e8fcbe10d11506804e49 b/sql/hive/src/test/resources/golden/describe_syntax-23-2b2f2f068fe8e8fcbe10d11506804e49
deleted file mode 100644
index 311870f6ad6b0..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-23-2b2f2f068fe8e8fcbe10d11506804e49
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	None                
-value1              	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[4, 5]              	 
-Database:           	db1                 	 
-Table:              	t1                  	 
-CreateTime:         	Tue Jan 14 11:43:08 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1/ds=4/part=5	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389728588          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-24-ee226b42db35b219702319858e925468 b/sql/hive/src/test/resources/golden/describe_syntax-24-ee226b42db35b219702319858e925468
deleted file mode 100644
index 4f76eaca6cd8b..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-24-ee226b42db35b219702319858e925468
+++ /dev/null
@@ -1,10 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-25-b6e10514fb473803c44bc793b9f9713e b/sql/hive/src/test/resources/golden/describe_syntax-25-b6e10514fb473803c44bc793b9f9713e
deleted file mode 100644
index aa25ca5a29dd3..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-25-b6e10514fb473803c44bc793b9f9713e
+++ /dev/null
@@ -1,12 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-Detailed Partition Information	Partition(values:[4, 5], dbName:db1, tableName:t1, createTime:1389728588, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key1, type:int, comment:null), FieldSchema(name:value1, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1/ds=4/part=5, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1389728588})	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-26-4851dc65e26ec33c605ab1ed65b59bec b/sql/hive/src/test/resources/golden/describe_syntax-26-4851dc65e26ec33c605ab1ed65b59bec
deleted file mode 100644
index 311870f6ad6b0..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-26-4851dc65e26ec33c605ab1ed65b59bec
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	None                
-value1              	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[4, 5]              	 
-Database:           	db1                 	 
-Table:              	t1                  	 
-CreateTime:         	Tue Jan 14 11:43:08 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1/ds=4/part=5	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389728588          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-6-74668fbe18bbf3d6323e052ef2b4ca85 b/sql/hive/src/test/resources/golden/describe_syntax-6-74668fbe18bbf3d6323e052ef2b4ca85
deleted file mode 100644
index 4f76eaca6cd8b..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-6-74668fbe18bbf3d6323e052ef2b4ca85
+++ /dev/null
@@ -1,10 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-7-d67ccae0a3893b4b91b0d4f1bd73b66d b/sql/hive/src/test/resources/golden/describe_syntax-7-d67ccae0a3893b4b91b0d4f1bd73b66d
deleted file mode 100644
index 4184ce21dc079..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-7-d67ccae0a3893b4b91b0d4f1bd73b66d
+++ /dev/null
@@ -1,12 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:t1, dbName:db1, owner:marmbrus, createTime:1389728588, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key1, type:int, comment:null), FieldSchema(name:value1, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728588}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-8-9e40f8077f34fa35fccfeae972e8c4 b/sql/hive/src/test/resources/golden/describe_syntax-8-9e40f8077f34fa35fccfeae972e8c4
deleted file mode 100644
index c94d6dcb90042..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-8-9e40f8077f34fa35fccfeae972e8c4
+++ /dev/null
@@ -1,33 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	None                
-value1              	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	db1                 	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 11:43:08 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1389728588          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-9-fb744775fb30d92297534d29b6eafd6b b/sql/hive/src/test/resources/golden/describe_syntax-9-fb744775fb30d92297534d29b6eafd6b
deleted file mode 100644
index 4f76eaca6cd8b..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-9-fb744775fb30d92297534d29b6eafd6b
+++ /dev/null
@@ -1,10 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b b/sql/hive/src/test/resources/golden/describe_table_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-2-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/describe_table_json-2-9c36cac1372650b703400c60dd29042c
deleted file mode 100644
index f8bc404bf7308..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-2-9c36cac1372650b703400c60dd29042c
+++ /dev/null
@@ -1 +0,0 @@
-{"tables":["jsontable","src","srcpart"]}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-3-576670af142149302decb9bf8662e68a b/sql/hive/src/test/resources/golden/describe_table_json-3-576670af142149302decb9bf8662e68a
deleted file mode 100644
index 5895645dbbb50..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-3-576670af142149302decb9bf8662e68a
+++ /dev/null
@@ -1 +0,0 @@
-{"tables":["jsontable"]}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-4-4a57591b392bb8fd18238d068d191721 b/sql/hive/src/test/resources/golden/describe_table_json-4-4a57591b392bb8fd18238d068d191721
deleted file mode 100644
index 353bf2df92f18..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-4-4a57591b392bb8fd18238d068d191721
+++ /dev/null
@@ -1 +0,0 @@
-{"tables":[]}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-6-ac49d26a0211b804fee89bbe0808f430 b/sql/hive/src/test/resources/golden/describe_table_json-6-ac49d26a0211b804fee89bbe0808f430
deleted file mode 100644
index 96c1178ae6eab..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-6-ac49d26a0211b804fee89bbe0808f430
+++ /dev/null
@@ -1 +0,0 @@
-{"columns":[{"name":"key","type":"int"},{"name":"value","type":"string"}]}	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-7-24552fd8c4b7d9d460a8ea25eb2d6e30 b/sql/hive/src/test/resources/golden/describe_table_json-7-24552fd8c4b7d9d460a8ea25eb2d6e30
deleted file mode 100644
index 4cf10d1d762b0..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-7-24552fd8c4b7d9d460a8ea25eb2d6e30
+++ /dev/null
@@ -1 +0,0 @@
-{"columns":[{"name":"key","type":"int"},{"name":"value","type":"string"}],"tableInfo":{"owner":"marmbrus","parameters":{"id":"jsontable","last_modified_by":"marmbrus","last_modified_time":"1389728616","transient_lastDdlTime":"1389728616","comment":"json table"},"createTime":1389728615,"dbName":"default","tableName":"jsontable","privileges":null,"tableType":"MANAGED_TABLE","sd":{"location":"file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/jsontable","parameters":{},"inputFormat":"org.apache.hadoop.mapred.TextInputFormat","outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","compressed":false,"cols":[{"name":"key","type":"int","comment":null,"setName":true,"setType":true,"setComment":false},{"name":"value","type":"string","comment":null,"setName":true,"setType":true,"setComment":false}],"serdeInfo":{"name":null,"parameters":{"serialization.format":"1"},"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","setName":false,"parametersSize":1,"setParameters":true,"setSerializationLib":true},"colsSize":2,"skewedInfo":{"skewedColNames":[],"skewedColValues":[],"skewedColValueLocationMaps":{},"skewedColNamesSize":0,"skewedColNamesIterator":[],"setSkewedColNames":true,"skewedColValuesSize":0,"skewedColValuesIterator":[],"setSkewedColValues":true,"skewedColValueLocationMapsSize":0,"setSkewedColValueLocationMaps":true},"bucketCols":[],"numBuckets":-1,"sortCols":[],"storedAsSubDirectories":false,"setSkewedInfo":true,"parametersSize":0,"setParameters":true,"colsIterator":[{"name":"key","type":"int","comment":null,"setName":true,"setType":true,"setComment":false},{"name":"value","type":"string","comment":null,"setName":true,"setType":true,"setComment":false}],"setCols":true,"setLocation":true,"setInputFormat":true,"setOutputFormat":true,"setCompressed":true,"setNumBuckets":true,"setSerdeInfo":true,"bucketColsSize":0,"bucketColsIterator":[],"setBucketCols":true,"sortColsSize":0,"sortColsIterator":[],"setSortCols":true,"setStoredAsSubDirectories":true},"partitionKeys":[],"viewOriginalText":null,"lastAccessTime":0,"retention":0,"viewExpandedText":null,"partitionKeysSize":0,"setTableType":true,"setTableName":true,"setDbName":true,"setOwner":true,"setCreateTime":true,"setLastAccessTime":true,"setRetention":true,"setSd":true,"partitionKeysIterator":[],"setPartitionKeys":true,"parametersSize":5,"setParameters":true,"setViewOriginalText":false,"setViewExpandedText":false,"setPrivileges":false}}	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-9-b05391400dc31139998dc3abaaf86320 b/sql/hive/src/test/resources/golden/describe_table_json-9-b05391400dc31139998dc3abaaf86320
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-9-b05391400dc31139998dc3abaaf86320
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/diff_part_input_formats-4-a4890f2b20715c75e05c674d9155a5b b/sql/hive/src/test/resources/golden/diff_part_input_formats-4-a4890f2b20715c75e05c674d9155a5b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/diff_part_input_formats-4-a4890f2b20715c75e05c674d9155a5b
+++ b/sql/hive/src/test/resources/golden/diff_part_input_formats-4-a4890f2b20715c75e05c674d9155a5b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/disable_file_format_check-0-bd9d00e3ffcaea450a3cc8d0ba6f865c b/sql/hive/src/test/resources/golden/disable_file_format_check-0-bd9d00e3ffcaea450a3cc8d0ba6f865c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/disable_file_format_check-0-bd9d00e3ffcaea450a3cc8d0ba6f865c
+++ b/sql/hive/src/test/resources/golden/disable_file_format_check-0-bd9d00e3ffcaea450a3cc8d0ba6f865c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-2-d71f115b7d42f6c67de701bf69c617a9 b/sql/hive/src/test/resources/golden/disable_file_format_check-2-2d27f92dfced693fa3a68ecce5e2e838
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-2-d71f115b7d42f6c67de701bf69c617a9
rename to sql/hive/src/test/resources/golden/disable_file_format_check-2-2d27f92dfced693fa3a68ecce5e2e838
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-3-b7f2a424f616cfb015937e9ef980277 b/sql/hive/src/test/resources/golden/disable_file_format_check-4-a2150709a6ff73326bdf4865dd124a23
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-3-b7f2a424f616cfb015937e9ef980277
rename to sql/hive/src/test/resources/golden/disable_file_format_check-4-a2150709a6ff73326bdf4865dd124a23
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-0-a071dedef216e84d1cb2f0de6d34fd1a b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-0-a071dedef216e84d1cb2f0de6d34fd1a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-0-a071dedef216e84d1cb2f0de6d34fd1a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-1-a071dedef216e84d1cb2f0de6d34fd1a
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-1-a071dedef216e84d1cb2f0de6d34fd1a
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-1-ce3797dc14a603cba2a5e58c8612de5b b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-1-ce3797dc14a603cba2a5e58c8612de5b
deleted file mode 100644
index 7ae602958428e..0000000000000
--- a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-1-ce3797dc14a603cba2a5e58c8612de5b
+++ /dev/null
@@ -1 +0,0 @@
-238	val_238
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/read from cached table-0-ce3797dc14a603cba2a5e58c8612de5b b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-2-ce3797dc14a603cba2a5e58c8612de5b
similarity index 100%
rename from sql/hive/src/test/resources/golden/read from cached table-0-ce3797dc14a603cba2a5e58c8612de5b
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-2-ce3797dc14a603cba2a5e58c8612de5b
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-4-d8c93b46de2a09a4af12017c6ba196f0 b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-3-f5340880d2be7b0643eb995673e89d11
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-4-d8c93b46de2a09a4af12017c6ba196f0
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-3-f5340880d2be7b0643eb995673e89d11
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-4-34064fd15c28dba55865cb8f3c5ba68c b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-4-34064fd15c28dba55865cb8f3c5ba68c
deleted file mode 100644
index ca21e093aa698..0000000000000
--- a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-4-34064fd15c28dba55865cb8f3c5ba68c
+++ /dev/null
@@ -1 +0,0 @@
-1	{"a1":"b1"}	foo1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-5-e394cdeb88f69b4d4b08450680f779b9 b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-4-714ab8c97f4d8993680b91e1ed8f3782
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-5-e394cdeb88f69b4d4b08450680f779b9
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-4-714ab8c97f4d8993680b91e1ed8f3782
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-5-34064fd15c28dba55865cb8f3c5ba68c b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-5-34064fd15c28dba55865cb8f3c5ba68c
new file mode 100644
index 0000000000000..573c4b56de599
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-5-34064fd15c28dba55865cb8f3c5ba68c
@@ -0,0 +1 @@
+1	{"a1":"b1"}	foo1
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-6-725ba4225501c1279f593b9c72eaca28 b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-6-f40a07d7654573e1a8517770eb8529e7
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-6-725ba4225501c1279f593b9c72eaca28
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-6-f40a07d7654573e1a8517770eb8529e7
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/distinct_stats-0-418ec894d08c33fd712eb358f579b7a0
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/distinct_stats-0-418ec894d08c33fd712eb358f579b7a0
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/distinct_stats-1-10987e425ba8ba8d9c01538f16eab970
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/distinct_stats-1-10987e425ba8ba8d9c01538f16eab970
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-8-725ba4225501c1279f593b9c72eaca28 b/sql/hive/src/test/resources/golden/distinct_stats-2-a2d8f812612283b20ec3f1e92a263440
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-8-725ba4225501c1279f593b9c72eaca28
rename to sql/hive/src/test/resources/golden/distinct_stats-2-a2d8f812612283b20ec3f1e92a263440
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-9-3b57aa58995f862f2713624b50db5b65 b/sql/hive/src/test/resources/golden/distinct_stats-3-a4397664f1f109ef0aa5ff36961b25b
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-9-3b57aa58995f862f2713624b50db5b65
rename to sql/hive/src/test/resources/golden/distinct_stats-3-a4397664f1f109ef0aa5ff36961b25b
diff --git a/sql/hive/src/test/resources/golden/enforce_order-0-9b9af6de0a12a47694e6f93264c2ebf9 b/sql/hive/src/test/resources/golden/distinct_stats-4-e540680af4a857404a0cb7cabc1bcf31
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-0-9b9af6de0a12a47694e6f93264c2ebf9
rename to sql/hive/src/test/resources/golden/distinct_stats-4-e540680af4a857404a0cb7cabc1bcf31
diff --git a/sql/hive/src/test/resources/golden/enforce_order-1-633034e3d966737cecf2befc5df1e35d b/sql/hive/src/test/resources/golden/distinct_stats-5-32e9736bf27c1d2e4399a8125e14befc
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-1-633034e3d966737cecf2befc5df1e35d
rename to sql/hive/src/test/resources/golden/distinct_stats-5-32e9736bf27c1d2e4399a8125e14befc
diff --git a/sql/hive/src/test/resources/golden/enforce_order-3-794fc9cf9fb97001efa85f24fde0cd4f b/sql/hive/src/test/resources/golden/distinct_stats-6-297a53801744e6c4786e315e32c6189a
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-3-794fc9cf9fb97001efa85f24fde0cd4f
rename to sql/hive/src/test/resources/golden/distinct_stats-6-297a53801744e6c4786e315e32c6189a
diff --git a/sql/hive/src/test/resources/golden/enforce_order-4-3136edd49e681ea21aa35d0836eab65 b/sql/hive/src/test/resources/golden/distinct_stats-7-92b9ef922e6b63a9de3ebcc23ee2d02d
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-4-3136edd49e681ea21aa35d0836eab65
rename to sql/hive/src/test/resources/golden/distinct_stats-7-92b9ef922e6b63a9de3ebcc23ee2d02d
diff --git a/sql/hive/src/test/resources/golden/enforce_order-5-15d62540b57faa68f58c5c83c3a296c9 b/sql/hive/src/test/resources/golden/distinct_stats-8-ca16024e6f5399b1d035f5b9fd665163
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-5-15d62540b57faa68f58c5c83c3a296c9
rename to sql/hive/src/test/resources/golden/distinct_stats-8-ca16024e6f5399b1d035f5b9fd665163
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/distinct_stats-9-4b2407991ccf180e0eb38bf3d2ef2ec8
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/distinct_stats-9-4b2407991ccf180e0eb38bf3d2ef2ec8
diff --git a/sql/hive/src/test/resources/golden/div-0-3760f9b354ddacd7c7b01b28791d4585 b/sql/hive/src/test/resources/golden/div-0-3760f9b354ddacd7c7b01b28791d4585
new file mode 100644
index 0000000000000..17ba0bea723c6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/div-0-3760f9b354ddacd7c7b01b28791d4585
@@ -0,0 +1 @@
+0	0	0	1	2
diff --git a/sql/hive/src/test/resources/golden/decimal_2-26-4ea9fdaf7131c085df8f93ffb64956e5 b/sql/hive/src/test/resources/golden/double case-0-f513687d17dcb18546fefa75000a52f2
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-26-4ea9fdaf7131c085df8f93ffb64956e5
rename to sql/hive/src/test/resources/golden/double case-0-f513687d17dcb18546fefa75000a52f2
diff --git a/sql/hive/src/test/resources/golden/enforce_order-6-277e01aa70e41e8cce47236fcbbb36c2 b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-2-fb7b53f61989f4f645dac4a8f017d6ee
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-6-277e01aa70e41e8cce47236fcbbb36c2
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-2-fb7b53f61989f4f645dac4a8f017d6ee
diff --git a/sql/hive/src/test/resources/golden/escape1-2-395d5a528c5e7235a48b4ac90938e2d6 b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-3-46fe5bb027667f528d7179b239e3427f
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-2-395d5a528c5e7235a48b4ac90938e2d6
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-3-46fe5bb027667f528d7179b239e3427f
diff --git a/sql/hive/src/test/resources/golden/escape1-3-4267651148da591da38737028fdbd80 b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-4-26dcd2b2f263b5b417430efcf354663a
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-3-4267651148da591da38737028fdbd80
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-4-26dcd2b2f263b5b417430efcf354663a
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-5-2a1bd5ed3955825a9dbb76769f7fe4ea
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-5-2a1bd5ed3955825a9dbb76769f7fe4ea
diff --git a/sql/hive/src/test/resources/golden/escape1-4-9745f8775c680d094a2c7cbeeb9bdf62 b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-6-7a9e67189d3d4151f23b12c22bde06b5
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-4-9745f8775c680d094a2c7cbeeb9bdf62
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-6-7a9e67189d3d4151f23b12c22bde06b5
diff --git a/sql/hive/src/test/resources/golden/escape1-5-70729c3d79ded87e884c176138174645 b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-7-16c31455a193e1cb06a2ede4e9f5d5dd
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-5-70729c3d79ded87e884c176138174645
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-7-16c31455a193e1cb06a2ede4e9f5d5dd
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-8-2a1bd5ed3955825a9dbb76769f7fe4ea
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-8-2a1bd5ed3955825a9dbb76769f7fe4ea
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-9-40110efef10f6f7b873dcd1d53463101
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-9-40110efef10f6f7b873dcd1d53463101
diff --git a/sql/hive/src/test/resources/golden/drop_function-0-9a0a48e6e2e5edffb4bcca349c49fa48 b/sql/hive/src/test/resources/golden/drop_function-0-9a0a48e6e2e5edffb4bcca349c49fa48
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_function-0-9a0a48e6e2e5edffb4bcca349c49fa48
+++ b/sql/hive/src/test/resources/golden/drop_function-0-9a0a48e6e2e5edffb4bcca349c49fa48
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/drop_index-0-9a0a48e6e2e5edffb4bcca349c49fa48 b/sql/hive/src/test/resources/golden/drop_index-0-9a0a48e6e2e5edffb4bcca349c49fa48
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_index-0-9a0a48e6e2e5edffb4bcca349c49fa48
+++ b/sql/hive/src/test/resources/golden/drop_index-0-9a0a48e6e2e5edffb4bcca349c49fa48
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/escape1-8-910536a438eec89c78bd611b3c4bb7e0 b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-0-a99af48bbcbaba062e9bc387ae2b4975
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-8-910536a438eec89c78bd611b3c4bb7e0
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-0-a99af48bbcbaba062e9bc387ae2b4975
diff --git a/sql/hive/src/test/resources/golden/escape2-10-13884d58efe80bd24862b3c54cb57c6e b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-1-e67a0f7ff61a97b2c49386890ea88c54
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-10-13884d58efe80bd24862b3c54cb57c6e
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-1-e67a0f7ff61a97b2c49386890ea88c54
diff --git a/sql/hive/src/test/resources/golden/escape2-4-bf9010ab0f7bc30bd6db771c5b3d0abe b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-2-19915227905aab376d918b3cada85c25
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-4-bf9010ab0f7bc30bd6db771c5b3d0abe
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-2-19915227905aab376d918b3cada85c25
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-3-e32b952789a115ff02201dfa618d92b2
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-3-e32b952789a115ff02201dfa618d92b2
diff --git a/sql/hive/src/test/resources/golden/escape2-5-fd0c88ad6ad131a16d1b78adbea65800 b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-4-58aff7fa05fba3a7549629a17e285036
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-5-fd0c88ad6ad131a16d1b78adbea65800
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-4-58aff7fa05fba3a7549629a17e285036
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-5-e32b952789a115ff02201dfa618d92b2
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-5-e32b952789a115ff02201dfa618d92b2
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-6-4642f8a18bf7409849f2e91d7a05f352
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-6-4642f8a18bf7409849f2e91d7a05f352
diff --git a/sql/hive/src/test/resources/golden/drop_multi_partitions-10-9120e865aa132bac6e0a29c907f0b760 b/sql/hive/src/test/resources/golden/drop_multi_partitions-10-9120e865aa132bac6e0a29c907f0b760
index c9d701778f9ab..2895d472ca5d9 100644
--- a/sql/hive/src/test/resources/golden/drop_multi_partitions-10-9120e865aa132bac6e0a29c907f0b760
+++ b/sql/hive/src/test/resources/golden/drop_multi_partitions-10-9120e865aa132bac6e0a29c907f0b760
@@ -1 +1 @@
-b=2/c=2
\ No newline at end of file
+b=2/c=2
diff --git a/sql/hive/src/test/resources/golden/drop_multi_partitions-4-9120e865aa132bac6e0a29c907f0b760 b/sql/hive/src/test/resources/golden/drop_multi_partitions-4-9120e865aa132bac6e0a29c907f0b760
index 53e09b6e34202..0f9c2f1d90639 100644
--- a/sql/hive/src/test/resources/golden/drop_multi_partitions-4-9120e865aa132bac6e0a29c907f0b760
+++ b/sql/hive/src/test/resources/golden/drop_multi_partitions-4-9120e865aa132bac6e0a29c907f0b760
@@ -1,3 +1,3 @@
 b=1/c=1
 b=1/c=2
-b=2/c=2
\ No newline at end of file
+b=2/c=2
diff --git a/sql/hive/src/test/resources/golden/drop_multi_partitions-5-53b0c9e5b0c8c37e75c1750280cf2aa0 b/sql/hive/src/test/resources/golden/drop_multi_partitions-5-53b0c9e5b0c8c37e75c1750280cf2aa0
index 31b543e8b4122..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/drop_multi_partitions-5-53b0c9e5b0c8c37e75c1750280cf2aa0
+++ b/sql/hive/src/test/resources/golden/drop_multi_partitions-5-53b0c9e5b0c8c37e75c1750280cf2aa0
@@ -1,12 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ALTERTABLE_DROPPARTS mp (TOK_PARTSPEC (TOK_PARTVAL b = '1')))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Drop Table Operator:
-        Drop Table
-          table: mp
-
diff --git a/sql/hive/src/test/resources/golden/drop_multi_partitions-7-9120e865aa132bac6e0a29c907f0b760 b/sql/hive/src/test/resources/golden/drop_multi_partitions-7-9120e865aa132bac6e0a29c907f0b760
index c9d701778f9ab..2895d472ca5d9 100644
--- a/sql/hive/src/test/resources/golden/drop_multi_partitions-7-9120e865aa132bac6e0a29c907f0b760
+++ b/sql/hive/src/test/resources/golden/drop_multi_partitions-7-9120e865aa132bac6e0a29c907f0b760
@@ -1 +1 @@
-b=2/c=2
\ No newline at end of file
+b=2/c=2
diff --git a/sql/hive/src/test/resources/golden/drop_multi_partitions-8-46a4f646bbc04f70e7ae5ed992f102f1 b/sql/hive/src/test/resources/golden/drop_multi_partitions-8-46a4f646bbc04f70e7ae5ed992f102f1
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_multi_partitions-8-46a4f646bbc04f70e7ae5ed992f102f1
+++ b/sql/hive/src/test/resources/golden/drop_multi_partitions-8-46a4f646bbc04f70e7ae5ed992f102f1
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-1-e3eda6672f6602a1b9bc93ef789662f6 b/sql/hive/src/test/resources/golden/drop_partitions_filter-1-e3eda6672f6602a1b9bc93ef789662f6
index d1e9fd1cd0a21..30eb53bdc8e70 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-1-e3eda6672f6602a1b9bc93ef789662f6
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-1-e3eda6672f6602a1b9bc93ef789662f6
@@ -1,10 +1,10 @@
-a                   	string              	None                
-b                   	int                 	None                
-c                   	string              	None                
-d                   	string              	None                
+a                   	string              	                    
+b                   	int                 	                    
+c                   	string              	                    
+d                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c                   	string              	None                
-d                   	string              	None                
\ No newline at end of file
+c                   	string              	                    
+d                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-11-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-11-83e3e422cdf9403523fa60d75376d7d7
index 3377ef5cf498f..5e6d96c4e60aa 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-11-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-11-83e3e422cdf9403523fa60d75376d7d7
@@ -6,4 +6,4 @@ c=India/d=3
 c=Russia/d=3
 c=US/d=1
 c=US/d=2
-c=Uganda/d=2
\ No newline at end of file
+c=Uganda/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-13-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-13-83e3e422cdf9403523fa60d75376d7d7
index 40e71fb79ad0f..e91541d1527d7 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-13-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-13-83e3e422cdf9403523fa60d75376d7d7
@@ -5,4 +5,4 @@ c=Greece/d=2
 c=India/d=3
 c=Russia/d=3
 c=US/d=2
-c=Uganda/d=2
\ No newline at end of file
+c=Uganda/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-15-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-15-83e3e422cdf9403523fa60d75376d7d7
index c03d86a551c29..316e63e21e7b1 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-15-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-15-83e3e422cdf9403523fa60d75376d7d7
@@ -3,4 +3,4 @@ c=France/d=4
 c=Germany/d=2
 c=Greece/d=2
 c=India/d=3
-c=Russia/d=3
\ No newline at end of file
+c=Russia/d=3
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-17-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-17-83e3e422cdf9403523fa60d75376d7d7
index 133c0256f898f..231c59f365307 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-17-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-17-83e3e422cdf9403523fa60d75376d7d7
@@ -2,4 +2,4 @@ c=Canada/d=3
 c=France/d=4
 c=Germany/d=2
 c=Greece/d=2
-c=India/d=3
\ No newline at end of file
+c=India/d=3
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-19-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-19-83e3e422cdf9403523fa60d75376d7d7
index 98e52eedc2b03..02a7003ca12de 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-19-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-19-83e3e422cdf9403523fa60d75376d7d7
@@ -1,3 +1,3 @@
 c=Canada/d=3
 c=France/d=4
-c=Germany/d=2
\ No newline at end of file
+c=Germany/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-21-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-21-83e3e422cdf9403523fa60d75376d7d7
index 687f7dd7e13a0..2ffea3b50a9b4 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-21-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-21-83e3e422cdf9403523fa60d75376d7d7
@@ -1 +1 @@
-c=France/d=4
\ No newline at end of file
+c=France/d=4
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-22-46a4f646bbc04f70e7ae5ed992f102f1 b/sql/hive/src/test/resources/golden/drop_partitions_filter-22-46a4f646bbc04f70e7ae5ed992f102f1
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-22-46a4f646bbc04f70e7ae5ed992f102f1
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-22-46a4f646bbc04f70e7ae5ed992f102f1
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-24-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-24-83e3e422cdf9403523fa60d75376d7d7
index 687f7dd7e13a0..2ffea3b50a9b4 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-24-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-24-83e3e422cdf9403523fa60d75376d7d7
@@ -1 +1 @@
-c=France/d=4
\ No newline at end of file
+c=France/d=4
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-1-e3eda6672f6602a1b9bc93ef789662f6 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-1-e3eda6672f6602a1b9bc93ef789662f6
index 1329d173d6a21..ca4194fbcf3ff 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter2-1-e3eda6672f6602a1b9bc93ef789662f6
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter2-1-e3eda6672f6602a1b9bc93ef789662f6
@@ -1,10 +1,10 @@
-a                   	string              	None                
-b                   	int                 	None                
-c                   	int                 	None                
-d                   	int                 	None                
+a                   	string              	                    
+b                   	int                 	                    
+c                   	int                 	                    
+d                   	int                 	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c                   	int                 	None                
-d                   	int                 	None                
\ No newline at end of file
+c                   	int                 	                    
+d                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-10-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-10-83e3e422cdf9403523fa60d75376d7d7
index 08051a26d24cc..7ace4dc662306 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter2-10-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter2-10-83e3e422cdf9403523fa60d75376d7d7
@@ -2,4 +2,4 @@ c=1/d=2
 c=2/d=1
 c=2/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=30/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-12-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-12-83e3e422cdf9403523fa60d75376d7d7
index 7f6e4ae8abf83..1ca1833c09245 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter2-12-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter2-12-83e3e422cdf9403523fa60d75376d7d7
@@ -1,3 +1,3 @@
 c=1/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=30/d=2
diff --git a/sql/hive/src/test/resources/golden/escape2-6-9745f8775c680d094a2c7cbeeb9bdf62 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-13-8117981303487dc4c4873356931ef26a
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-6-9745f8775c680d094a2c7cbeeb9bdf62
rename to sql/hive/src/test/resources/golden/drop_partitions_filter2-13-8117981303487dc4c4873356931ef26a
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-14-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-14-83e3e422cdf9403523fa60d75376d7d7
new file mode 100644
index 0000000000000..fee1b1ad01412
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter2-14-83e3e422cdf9403523fa60d75376d7d7
@@ -0,0 +1 @@
+c=30/d=2
diff --git a/sql/hive/src/test/resources/golden/escape2-7-70729c3d79ded87e884c176138174645 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-15-67d75c72ea2d3982c55f3a850d93f83c
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-7-70729c3d79ded87e884c176138174645
rename to sql/hive/src/test/resources/golden/drop_partitions_filter2-15-67d75c72ea2d3982c55f3a850d93f83c
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-7-f34625fd49a5e655cba3abb5cb8c5417
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/drop_partitions_filter2-7-f34625fd49a5e655cba3abb5cb8c5417
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-8-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-8-83e3e422cdf9403523fa60d75376d7d7
index 226ef460b53a6..b77f18cd02020 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter2-8-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter2-8-83e3e422cdf9403523fa60d75376d7d7
@@ -3,4 +3,4 @@ c=1/d=2
 c=2/d=1
 c=2/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=30/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter3-1-e3eda6672f6602a1b9bc93ef789662f6 b/sql/hive/src/test/resources/golden/drop_partitions_filter3-1-e3eda6672f6602a1b9bc93ef789662f6
index 01562f65d807f..77bc36b96870b 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter3-1-e3eda6672f6602a1b9bc93ef789662f6
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter3-1-e3eda6672f6602a1b9bc93ef789662f6
@@ -1,10 +1,10 @@
-a                   	string              	None                
-b                   	int                 	None                
-c                   	string              	None                
-d                   	int                 	None                
+a                   	string              	                    
+b                   	int                 	                    
+c                   	string              	                    
+d                   	int                 	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c                   	string              	None                
-d                   	int                 	None                
\ No newline at end of file
+c                   	string              	                    
+d                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter3-10-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter3-10-83e3e422cdf9403523fa60d75376d7d7
index 08051a26d24cc..50e8df00f1597 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter3-10-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter3-10-83e3e422cdf9403523fa60d75376d7d7
@@ -2,4 +2,4 @@ c=1/d=2
 c=2/d=1
 c=2/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=3/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter3-12-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter3-12-83e3e422cdf9403523fa60d75376d7d7
index 7f6e4ae8abf83..20bc2b0c74d32 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter3-12-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter3-12-83e3e422cdf9403523fa60d75376d7d7
@@ -1,3 +1,3 @@
 c=1/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=3/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter3-8-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter3-8-83e3e422cdf9403523fa60d75376d7d7
index 226ef460b53a6..6200b3ad2dd94 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter3-8-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter3-8-83e3e422cdf9403523fa60d75376d7d7
@@ -3,4 +3,4 @@ c=1/d=2
 c=2/d=1
 c=2/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=3/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_ignore_protection-3-312aa26fdea6da7907e3a91f75e36f1 b/sql/hive/src/test/resources/golden/drop_partitions_ignore_protection-3-312aa26fdea6da7907e3a91f75e36f1
index 8caab1c99b27d..73f873dbcf00b 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_ignore_protection-3-312aa26fdea6da7907e3a91f75e36f1
+++ b/sql/hive/src/test/resources/golden/drop_partitions_ignore_protection-3-312aa26fdea6da7907e3a91f75e36f1
@@ -1,10 +1,10 @@
-c1                  	string              	None                
-c2                  	string              	None                
-p                   	string              	None                
+c1                  	string              	                    
+c2                  	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl_protectmode_no_drop, createTime:1389728724, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:c1, type:string, comment:null), FieldSchema(name:c2, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/tbl_protectmode_no_drop/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1389728724, PROTECT_MODE=NO_DROP, transient_lastDdlTime=1389728724})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl_protectmode_no_drop, createTime:1413879999, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:c1, type:string, comment:null), FieldSchema(name:c2, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl_protectmode_no_drop/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413879999, PROTECT_MODE=NO_DROP, transient_lastDdlTime=1413879999, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/drop_table-0-9a0a48e6e2e5edffb4bcca349c49fa48 b/sql/hive/src/test/resources/golden/drop_table-0-9a0a48e6e2e5edffb4bcca349c49fa48
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_table-0-9a0a48e6e2e5edffb4bcca349c49fa48
+++ b/sql/hive/src/test/resources/golden/drop_table-0-9a0a48e6e2e5edffb4bcca349c49fa48
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/drop_table2-0-ac5bb9a5583f2d8968f2aaef3385b3f2 b/sql/hive/src/test/resources/golden/drop_table2-0-ac5bb9a5583f2d8968f2aaef3385b3f2
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_table2-0-ac5bb9a5583f2d8968f2aaef3385b3f2
+++ b/sql/hive/src/test/resources/golden/drop_table2-0-ac5bb9a5583f2d8968f2aaef3385b3f2
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/drop_table2-5-1fdd850f6c301619f91eb58c890f2ad4 b/sql/hive/src/test/resources/golden/drop_table2-5-1fdd850f6c301619f91eb58c890f2ad4
index fbe12dca4efc8..a4b5a45443235 100644
--- a/sql/hive/src/test/resources/golden/drop_table2-5-1fdd850f6c301619f91eb58c890f2ad4
+++ b/sql/hive/src/test/resources/golden/drop_table2-5-1fdd850f6c301619f91eb58c890f2ad4
@@ -1,3 +1,3 @@
 p=p1
 p=p2
-p=p3
\ No newline at end of file
+p=p3
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-0-97b52abf021c81b8364041c1a0bbccf3
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-0-97b52abf021c81b8364041c1a0bbccf3
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-1-f11a45c42752d06821ccd26d948d51ff
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-1-f11a45c42752d06821ccd26d948d51ff
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-2-c0b85445b616f93c5e6d090fa35072e7
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-2-c0b85445b616f93c5e6d090fa35072e7
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-3-10a71bca930d911cc4c2022575b17299
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-3-10a71bca930d911cc4c2022575b17299
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-4-b2ca31dd6cc5c32e33df700786f5b208
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-4-b2ca31dd6cc5c32e33df700786f5b208
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-5-10a71bca930d911cc4c2022575b17299
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-5-10a71bca930d911cc4c2022575b17299
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-6-d1c175a9d042ecd389f2f93fc867591d
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-6-d1c175a9d042ecd389f2f93fc867591d
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/dynamic_partition-0-be33aaa7253c8f248ff3921cd7dae340
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/dynamic_partition-0-be33aaa7253c8f248ff3921cd7dae340
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/dynamic_partition-1-640552dd462707563fd255a713f83b41
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/dynamic_partition-1-640552dd462707563fd255a713f83b41
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/dynamic_partition-2-36456c9d0d2e3ef72ab5ba9ba48e5493
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/dynamic_partition-2-36456c9d0d2e3ef72ab5ba9ba48e5493
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/dynamic_partition-3-b7f7fa7ebf666f4fee27e149d8c6961f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/dynamic_partition-3-b7f7fa7ebf666f4fee27e149d8c6961f
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/dynamic_partition-4-8bdb71ad8cb3cc3026043def2525de3a
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/dynamic_partition-4-8bdb71ad8cb3cc3026043def2525de3a
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/dynamic_partition-5-c630dce438f3792e7fb0f523fbbb3e1e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/dynamic_partition-5-c630dce438f3792e7fb0f523fbbb3e1e
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/dynamic_partition-6-7abc9ec8a36cdc5e89e955265a7fd7cf
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/dynamic_partition-6-7abc9ec8a36cdc5e89e955265a7fd7cf
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/dynamic_partition-7-be33aaa7253c8f248ff3921cd7dae340
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/dynamic_partition-7-be33aaa7253c8f248ff3921cd7dae340
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-0-3cd14bc5b126ff8b337c4abc09134260
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-0-3cd14bc5b126ff8b337c4abc09134260
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-1-16367c381d4b189b3640c92511244bfe
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-1-16367c381d4b189b3640c92511244bfe
+++ b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-1-16367c381d4b189b3640c92511244bfe
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-3-c148cf39c4f8f02d44964cfd6919fa29 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-2-ce929ee6a92b81d8080ca322c1c38a4b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-3-c148cf39c4f8f02d44964cfd6919fa29
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-2-ce929ee6a92b81d8080ca322c1c38a4b
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-4-3c27502d4f6977b959e0928755b43be3 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-3-f3a5a998099b756a21cf9122a15b09d5
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-4-3c27502d4f6977b959e0928755b43be3
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-3-f3a5a998099b756a21cf9122a15b09d5
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-5-2d68fb88796f07b4bbe2b6895464ee62 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-4-c8aa1ebce4b0b2b7f46bb3c2502f8b49
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-5-2d68fb88796f07b4bbe2b6895464ee62
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-4-c8aa1ebce4b0b2b7f46bb3c2502f8b49
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-6-10c086eb97e0cae9a2d4b79d90925e85 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-5-fed732d2e7d94a4fc02e7694f9f9a39c
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-6-10c086eb97e0cae9a2d4b79d90925e85
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-5-fed732d2e7d94a4fc02e7694f9f9a39c
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-7-bff74501ebaea5bd2227d029003dbe08 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-6-adab6a0187003ab7ee6f217c9e409d91
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-7-bff74501ebaea5bd2227d029003dbe08
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-6-adab6a0187003ab7ee6f217c9e409d91
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-7-e707e693aa61edf87768fb71f6e936e1 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-7-e707e693aa61edf87768fb71f6e936e1
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-7-e707e693aa61edf87768fb71f6e936e1
+++ b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-7-e707e693aa61edf87768fb71f6e936e1
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-8-9c8594d9438bbceaa3e6c6f98278cf60 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-8-adab6a0187003ab7ee6f217c9e409d91
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-8-9c8594d9438bbceaa3e6c6f98278cf60
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-8-adab6a0187003ab7ee6f217c9e409d91
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-9-18a8affd3b07841b7cbe64003470a9f7 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-9-4b099f87e221b8fd5c0d0d4a97c0d146
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-9-18a8affd3b07841b7cbe64003470a9f7
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-9-4b099f87e221b8fd5c0d0d4a97c0d146
diff --git a/sql/hive/src/test/resources/golden/enforce_order-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/enforce_order-2-ffe97dc8c1df3195982e38263fbe8717
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/enforce_order-2-ffe97dc8c1df3195982e38263fbe8717
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/enforce_order-7-5fb418612e7c2201790d6f318c715ccf b/sql/hive/src/test/resources/golden/enforce_order-7-5fb418612e7c2201790d6f318c715ccf
deleted file mode 100644
index 8273b7ed19da6..0000000000000
--- a/sql/hive/src/test/resources/golden/enforce_order-7-5fb418612e7c2201790d6f318c715ccf
+++ /dev/null
@@ -1,10 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-10	val_10
-100	val_100
-100	val_100
-103	val_103
-103	val_103
-104	val_104
-104	val_104
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/enforce_order-8-a7cb2e48b7b4fc5b008be3c9a3485314 b/sql/hive/src/test/resources/golden/enforce_order-8-a7cb2e48b7b4fc5b008be3c9a3485314
deleted file mode 100644
index 045906a29a1da..0000000000000
--- a/sql/hive/src/test/resources/golden/enforce_order-8-a7cb2e48b7b4fc5b008be3c9a3485314
+++ /dev/null
@@ -1,10 +0,0 @@
-98	val_98
-98	val_98
-97	val_97
-97	val_97
-96	val_96
-95	val_95
-95	val_95
-92	val_92
-90	val_90
-90	val_90
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/escape1-6-134b596abc363f0bfa7f770732ebb960 b/sql/hive/src/test/resources/golden/escape1-6-134b596abc363f0bfa7f770732ebb960
deleted file mode 100644
index a949a93dfcca6..0000000000000
--- a/sql/hive/src/test/resources/golden/escape1-6-134b596abc363f0bfa7f770732ebb960
+++ /dev/null
@@ -1 +0,0 @@
-128
diff --git a/sql/hive/src/test/resources/golden/escape1-7-486585cbb4de5bc908dde4c601dd7c17 b/sql/hive/src/test/resources/golden/escape1-7-486585cbb4de5bc908dde4c601dd7c17
deleted file mode 100644
index e2799aa7e14b5..0000000000000
Binary files a/sql/hive/src/test/resources/golden/escape1-7-486585cbb4de5bc908dde4c601dd7c17 and /dev/null differ
diff --git a/sql/hive/src/test/resources/golden/escape2-8-134b596abc363f0bfa7f770732ebb960 b/sql/hive/src/test/resources/golden/escape2-8-134b596abc363f0bfa7f770732ebb960
deleted file mode 100644
index a949a93dfcca6..0000000000000
--- a/sql/hive/src/test/resources/golden/escape2-8-134b596abc363f0bfa7f770732ebb960
+++ /dev/null
@@ -1 +0,0 @@
-128
diff --git a/sql/hive/src/test/resources/golden/escape2-9-486585cbb4de5bc908dde4c601dd7c17 b/sql/hive/src/test/resources/golden/escape2-9-486585cbb4de5bc908dde4c601dd7c17
deleted file mode 100644
index e2799aa7e14b5..0000000000000
Binary files a/sql/hive/src/test/resources/golden/escape2-9-486585cbb4de5bc908dde4c601dd7c17 and /dev/null differ
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_04_all_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_04_all_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_04_all_part-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_04_all_part-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-10-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_04_evolved_parts-10-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-10-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-11-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_04_evolved_parts-11-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-11-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_05_some_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_05_some_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_05_some_part-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_05_some_part-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_06_one_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_06_one_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_06_one_part-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_06_one_part-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_10_external_managed-3-be31972099603addb71187f19f7cd25d b/sql/hive/src/test/resources/golden/exim_10_external_managed-3-be31972099603addb71187f19f7cd25d
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_10_external_managed-3-be31972099603addb71187f19f7cd25d
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_10_external_managed-4-46667daf88f9c8b9b758ced6a6b28ef1 b/sql/hive/src/test/resources/golden/exim_10_external_managed-4-46667daf88f9c8b9b758ced6a6b28ef1
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_10_external_managed-4-46667daf88f9c8b9b758ced6a6b28ef1
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_11_managed_external-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_11_managed_external-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_11_managed_external-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_11_managed_external-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_12_external_location-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_12_external_location-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_12_external_location-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_12_external_location-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_13_managed_location-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_13_managed_location-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_13_managed_location-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_13_managed_location-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_15_external_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_15_external_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_15_external_part-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_15_external_part-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_16_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_16_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_16_part_external-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_16_part_external-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_17_part_managed-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_17_part_managed-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_17_part_managed-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_17_part_managed-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_18_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_18_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_18_part_external-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_18_part_external-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-6-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-6-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-6-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-7-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-7-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-7-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_19_part_external_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_19_part_external_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_19_part_external_location-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_19_part_external_location-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_20_part_managed_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_20_part_managed_location-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-6-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-6-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-6-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-7-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-7-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-7-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-5-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-5-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-5-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-6-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-6-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-6-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-5-d2ec90909f243a767be1aa299720f45d b/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-5-d2ec90909f243a767be1aa299720f45d
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-5-d2ec90909f243a767be1aa299720f45d
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/explain_rearrange-0-6f7c8515c354fb050829ebd66413425
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/explain_rearrange-0-6f7c8515c354fb050829ebd66413425
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/explain_rearrange-1-378d42317b39c6519f15bd2f99c5ddc4
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/explain_rearrange-1-378d42317b39c6519f15bd2f99c5ddc4
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/explain_rearrange-10-3f2680208772a0e51aefc4ef5604dddf
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/explain_rearrange-10-3f2680208772a0e51aefc4ef5604dddf
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/explain_rearrange-11-f2ca12a948fd9b5b842168e7c7d7b768
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/explain_rearrange-11-f2ca12a948fd9b5b842168e7c7d7b768
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/explain_rearrange-12-3d63f0bb8fbacbcff9e5989ddf1bcc8e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/explain_rearrange-12-3d63f0bb8fbacbcff9e5989ddf1bcc8e
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/explain_rearrange-13-5baad22ed7efa18d73eb8349e57cf331
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/explain_rearrange-13-5baad22ed7efa18d73eb8349e57cf331
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/explain_rearrange-14-490d6253b73064ce403e4d04a8bc18f3
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/explain_rearrange-14-490d6253b73064ce403e4d04a8bc18f3
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/explain_rearrange-2-24ca942f094b14b92086305cc125e833
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/explain_rearrange-2-24ca942f094b14b92086305cc125e833
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/explain_rearrange-3-3b0f76816be2c1b18a2058027a19bc9f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/explain_rearrange-3-3b0f76816be2c1b18a2058027a19bc9f
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/explain_rearrange-4-86473a0498e4361e4db0b4a22f2e8571
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/explain_rearrange-4-86473a0498e4361e4db0b4a22f2e8571
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/explain_rearrange-5-d0ec6d66ff349db09fd455eec149efdb
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/explain_rearrange-5-d0ec6d66ff349db09fd455eec149efdb
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-4-7368973ec1870dd3b237c37eb3857b1e b/sql/hive/src/test/resources/golden/explain_rearrange-6-cda81d86d127fca0e2fbc2161e91400d
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-4-7368973ec1870dd3b237c37eb3857b1e
rename to sql/hive/src/test/resources/golden/explain_rearrange-6-cda81d86d127fca0e2fbc2161e91400d
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/explain_rearrange-7-5b13cfa4b730e38ef2794c1532968e04
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/explain_rearrange-7-5b13cfa4b730e38ef2794c1532968e04
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/explain_rearrange-8-1fd9c02fc67c3a403cb73eb10ed9fc12
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/explain_rearrange-8-1fd9c02fc67c3a403cb73eb10ed9fc12
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/explain_rearrange-9-73b9ac83dbc9874dc9379ad4364d40ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/explain_rearrange-9-73b9ac83dbc9874dc9379ad4364d40ac
diff --git a/sql/hive/src/test/resources/golden/fetch_aggregation-0-739356d010a919138911f295fac81607 b/sql/hive/src/test/resources/golden/fetch_aggregation-0-739356d010a919138911f295fac81607
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/fetch_aggregation-0-739356d010a919138911f295fac81607
+++ b/sql/hive/src/test/resources/golden/fetch_aggregation-0-739356d010a919138911f295fac81607
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/fetch_aggregation-2-f015c961b6c34ac56cb8fc52fb36d7c7 b/sql/hive/src/test/resources/golden/fetch_aggregation-2-f015c961b6c34ac56cb8fc52fb36d7c7
index fb12634ea81c1..8059361d2485f 100644
--- a/sql/hive/src/test/resources/golden/fetch_aggregation-2-f015c961b6c34ac56cb8fc52fb36d7c7
+++ b/sql/hive/src/test/resources/golden/fetch_aggregation-2-f015c961b6c34ac56cb8fc52fb36d7c7
@@ -1 +1 @@
-500	130091	260.182	0	498	142.92680950752384	20428.072876000006
\ No newline at end of file
+500	130091	260.182	0	498	142.92680950752384	20428.072876000006
diff --git a/sql/hive/src/test/resources/golden/fileformat_mix-5-c2d0da9a0f01736a2163c99fc667f279 b/sql/hive/src/test/resources/golden/fileformat_mix-5-c2d0da9a0f01736a2163c99fc667f279
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/fileformat_mix-5-c2d0da9a0f01736a2163c99fc667f279
+++ b/sql/hive/src/test/resources/golden/fileformat_mix-5-c2d0da9a0f01736a2163c99fc667f279
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/fileformat_mix-6-4b658b3222b7a09ef41d023215e5b818 b/sql/hive/src/test/resources/golden/fileformat_mix-6-4b658b3222b7a09ef41d023215e5b818
index d23e05acf7ba5..e34118512c1d7 100644
--- a/sql/hive/src/test/resources/golden/fileformat_mix-6-4b658b3222b7a09ef41d023215e5b818
+++ b/sql/hive/src/test/resources/golden/fileformat_mix-6-4b658b3222b7a09ef41d023215e5b818
@@ -497,4 +497,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/fileformat_sequencefile-2-80ec34a069bc561aa6dc87314391b131 b/sql/hive/src/test/resources/golden/fileformat_sequencefile-2-80ec34a069bc561aa6dc87314391b131
index 6280b32facd66..1cf9f21c9ed88 100644
--- a/sql/hive/src/test/resources/golden/fileformat_sequencefile-2-80ec34a069bc561aa6dc87314391b131
+++ b/sql/hive/src/test/resources/golden/fileformat_sequencefile-2-80ec34a069bc561aa6dc87314391b131
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:dest1, dbName:default, owner:marmbrus, createTime:1398823397, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/dest1, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1398823397}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:dest1, dbName:default, owner:marmbrus, createTime:1413880056, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/dest1, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413880056}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/fileformat_text-2-80ec34a069bc561aa6dc87314391b131 b/sql/hive/src/test/resources/golden/fileformat_text-2-80ec34a069bc561aa6dc87314391b131
index e793ec2f946e5..62d6734063fdd 100644
--- a/sql/hive/src/test/resources/golden/fileformat_text-2-80ec34a069bc561aa6dc87314391b131
+++ b/sql/hive/src/test/resources/golden/fileformat_text-2-80ec34a069bc561aa6dc87314391b131
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:dest1, dbName:default, owner:marmbrus, createTime:1398823407, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/dest1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1398823407}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:dest1, dbName:default, owner:marmbrus, createTime:1413880064, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/dest1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413880064}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask-3-550e9b759fb088a81feddeff2e4be64e b/sql/hive/src/test/resources/golden/filter_join_breaktask-3-550e9b759fb088a81feddeff2e4be64e
index cc545367b951b..31956a614026d 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask-3-550e9b759fb088a81feddeff2e4be64e
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask-3-550e9b759fb088a81feddeff2e4be64e
@@ -9,4 +9,4 @@
 66	val_66
 213	val_213
 146	val_146
-406	val_406
\ No newline at end of file
+406	val_406
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-10-8dbe2f8cda7ab38b38fc75d0d2413efd b/sql/hive/src/test/resources/golden/filter_join_breaktask2-10-8dbe2f8cda7ab38b38fc75d0d2413efd
index 140c6590a27fc..51eb2d30c97eb 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask2-10-8dbe2f8cda7ab38b38fc75d0d2413efd
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask2-10-8dbe2f8cda7ab38b38fc75d0d2413efd
@@ -1 +1 @@
-4	5	0	2010-04-17
\ No newline at end of file
+4	5	0	2010-04-17
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-11-5cc7e24257a1cb4ad0f2fe41c7177370 b/sql/hive/src/test/resources/golden/filter_join_breaktask2-11-5cc7e24257a1cb4ad0f2fe41c7177370
index 51f5701ceae46..7dc9c13cfee04 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask2-11-5cc7e24257a1cb4ad0f2fe41c7177370
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask2-11-5cc7e24257a1cb4ad0f2fe41c7177370
@@ -1 +1 @@
-4	1	1	8	4	5	1	0	9	U	2	2	0	2	1	1	J	C	A	U	2	s	2	NULL	NULL	NULL	NULL	NULL	NULL	1	j	S	6	NULL	1	2	J	g	1	e	2	1	2	U	P	p	3	0	0	0	1	1	1	0	0	0	6	2	j	NULL	NULL	NULL	NULL	NULL	NULL	5	NULL	NULL	j	2	2	1	2	2	1	1	1	1	1	1	1	1	32	NULL	2010-04-17
\ No newline at end of file
+4	1	1	8	4	5	1	0	9	U	2	2	0	2	1	1	J	C	A	U	2	s	2	NULL	NULL	NULL	NULL	NULL	NULL	1	j	S	6	NULL	1	2	J	g	1	e	2	1	2	U	P	p	3	0	0	0	1	1	1	0	0	0	6	2	j	NULL	NULL	NULL	NULL	NULL	NULL	5	NULL	NULL	j	2	2	1	2	2	1	1	1	1	1	1	1	1	32	NULL	2010-04-17
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-12-c608f51882260caec7bb9c57a0ba3179 b/sql/hive/src/test/resources/golden/filter_join_breaktask2-12-c608f51882260caec7bb9c57a0ba3179
index 69dcc336a73f2..19a6df9f78cd7 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask2-12-c608f51882260caec7bb9c57a0ba3179
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask2-12-c608f51882260caec7bb9c57a0ba3179
@@ -1 +1 @@
-5	5	4
\ No newline at end of file
+5	5	4
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/filter_join_breaktask2-4-fe8b55e4e4098d7a2662338783a50306
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/filter_join_breaktask2-4-fe8b55e4e4098d7a2662338783a50306
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/filter_join_breaktask2-5-cf724251613216ec54f8ac2e6b9b92fd
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/filter_join_breaktask2-5-cf724251613216ec54f8ac2e6b9b92fd
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/filter_join_breaktask2-6-8c782ae8f8245bdbe90d068a6b577d1e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/filter_join_breaktask2-6-8c782ae8f8245bdbe90d068a6b577d1e
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-8-604cf64d16b9d438ee187a10d9f6352e b/sql/hive/src/test/resources/golden/filter_join_breaktask2-8-604cf64d16b9d438ee187a10d9f6352e
index c0f577c3cd2fd..d6961f6e46e93 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask2-8-604cf64d16b9d438ee187a10d9f6352e
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask2-8-604cf64d16b9d438ee187a10d9f6352e
@@ -1 +1 @@
-5	name	NULL	2	kavin	NULL	9	c	8	0	0	7	1	2	0	3	2	NULL	1	NULL	3	2	0	0	5	10	2010-04-17
\ No newline at end of file
+5	name	NULL	2	kavin	NULL	9	c	8	0	0	7	1	2	0	3	2	NULL	1	NULL	3	2	0	0	5	10	2010-04-17
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-9-6d551990a8a745dde1cb5244947b81a1 b/sql/hive/src/test/resources/golden/filter_join_breaktask2-9-6d551990a8a745dde1cb5244947b81a1
index e426b4879bcb6..be23778bd2cc4 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask2-9-6d551990a8a745dde1cb5244947b81a1
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask2-9-6d551990a8a745dde1cb5244947b81a1
@@ -1 +1 @@
-5	1	1	1	0	0	4	2010-04-17
\ No newline at end of file
+5	1	1	1	0	0	4	2010-04-17
diff --git a/sql/hive/src/test/resources/golden/groupby1-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby1-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby1-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby1-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby1-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1-3-d57ed4bbfee1ffaffaeba0a4be84c31d b/sql/hive/src/test/resources/golden/groupby1-3-d57ed4bbfee1ffaffaeba0a4be84c31d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1-3-d57ed4bbfee1ffaffaeba0a4be84c31d
+++ b/sql/hive/src/test/resources/golden/groupby1-3-d57ed4bbfee1ffaffaeba0a4be84c31d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1-5-dd7bf298b8c921355edd8665c6b0c168 b/sql/hive/src/test/resources/golden/groupby1-5-dd7bf298b8c921355edd8665c6b0c168
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1-5-dd7bf298b8c921355edd8665c6b0c168
+++ b/sql/hive/src/test/resources/golden/groupby1-5-dd7bf298b8c921355edd8665c6b0c168
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1-7-c2c54378ffce53ade73a9dda783903e7 b/sql/hive/src/test/resources/golden/groupby1-7-c2c54378ffce53ade73a9dda783903e7
index 339756be98e73..4953d093489ee 100644
--- a/sql/hive/src/test/resources/golden/groupby1-7-c2c54378ffce53ade73a9dda783903e7
+++ b/sql/hive/src/test/resources/golden/groupby1-7-c2c54378ffce53ade73a9dda783903e7
@@ -306,4 +306,4 @@
 495	495.0
 496	496.0
 497	497.0
-498	1494.0
\ No newline at end of file
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby11-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby11-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby11-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby11-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby11-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby11-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby11-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby11-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby11-6-686ee4a5b2e24e51ba1d41b42215accd b/sql/hive/src/test/resources/golden/groupby11-6-686ee4a5b2e24e51ba1d41b42215accd
index 8939d9f8af186..07df09912b06e 100644
--- a/sql/hive/src/test/resources/golden/groupby11-6-686ee4a5b2e24e51ba1d41b42215accd
+++ b/sql/hive/src/test/resources/golden/groupby11-6-686ee4a5b2e24e51ba1d41b42215accd
@@ -306,4 +306,4 @@ val_92	1	1	111
 val_95	2	1	111
 val_96	1	1	111
 val_97	2	1	111
-val_98	2	1	111
\ No newline at end of file
+val_98	2	1	111
diff --git a/sql/hive/src/test/resources/golden/groupby11-7-149d359546ab38226ffeb023d7414b3d b/sql/hive/src/test/resources/golden/groupby11-7-149d359546ab38226ffeb023d7414b3d
index 82a0329b4d459..efbfb713807a0 100644
--- a/sql/hive/src/test/resources/golden/groupby11-7-149d359546ab38226ffeb023d7414b3d
+++ b/sql/hive/src/test/resources/golden/groupby11-7-149d359546ab38226ffeb023d7414b3d
@@ -306,4 +306,4 @@
 95	2	1	111
 96	1	1	111
 97	2	1	111
-98	2	1	111
\ No newline at end of file
+98	2	1	111
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/groupby12-0-67e864faaff4c6b2a8e1c9fbd188bb66
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/groupby12-0-67e864faaff4c6b2a8e1c9fbd188bb66
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/groupby12-1-13ab74a58da514fe01dbeda0c3e79883
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/groupby12-1-13ab74a58da514fe01dbeda0c3e79883
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/groupby12-2-fd150794945892f3c926a1881cd819f4
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/groupby12-2-fd150794945892f3c926a1881cd819f4
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/groupby12-3-8018bb917a0706925c14421ec2761663
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/groupby12-3-8018bb917a0706925c14421ec2761663
diff --git a/sql/hive/src/test/resources/golden/groupby12-4-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby12-4-adc1ec67836b26b60d8547c4996bfd8f
new file mode 100644
index 0000000000000..98f8836673e8a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby12-4-adc1ec67836b26b60d8547c4996bfd8f
@@ -0,0 +1,309 @@
+3	1
+1	1
+1	1
+3	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+2	1
+1	1
+1	1
+2	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+3	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+2	1
+1	1
+3	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+1	1
+1	1
+3	1
+1	1
+2	1
+1	1
+2	1
+2	1
+2	1
+2	1
+2	1
+1	1
+1	1
+2	1
+1	1
+1	1
+2	1
+3	1
+2	1
+2	1
+1	1
+3	1
+2	1
+1	1
+1	1
+2	1
+1	1
+2	1
+4	1
+1	1
+1	1
+2	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+3	1
+1	1
+4	1
+1	1
+2	1
+2	1
+2	1
+2	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+3	1
+1	1
+1	1
+2	1
+1	1
+3	1
+1	1
+2	1
+1	1
+2	1
+3	1
+2	1
+1	1
+1	1
+2	1
+2	1
+2	1
+3	1
+2	1
+2	1
+1	1
+2	1
+2	1
+1	1
+2	1
+2	1
+1	1
+2	1
+2	1
+1	1
+1	1
+2	1
+5	1
+2	1
+1	1
+2	1
+2	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+2	1
+3	1
+1	1
+1	1
+4	1
+2	1
+2	1
+2	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+3	1
+1	1
+1	1
+1	1
+2	1
+1	1
+2	1
+1	1
+3	1
+1	1
+3	1
+2	1
+3	1
+2	1
+2	1
+1	1
+2	1
+3	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+5	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+3	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+3	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+3	1
+2	1
+2	1
+1	1
+5	1
+1	1
+3	1
+2	1
+4	1
+1	1
+3	1
+1	1
+2	1
+2	1
+3	1
+1	1
+1	1
+1	1
+2	1
+1	1
+2	1
+3	1
+3	1
+1	1
+1	1
+1	1
+1	1
+3	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+3	1
+1	1
+1	1
+2	1
+2	1
+1	1
+2	1
+2	1
+3	1
+1	1
+4	1
+5	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+3	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+4	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+3	1
diff --git a/sql/hive/src/test/resources/golden/groupby1_limit-0-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby1_limit-0-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_limit-0-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby1_limit-0-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/groupby1_limit-3-607512a8dd16cd9ddde561eeabfa51db
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/groupby1_limit-3-607512a8dd16cd9ddde561eeabfa51db
diff --git a/sql/hive/src/test/resources/golden/groupby1_limit-4-7a5ee36a377f0869bcb8c29c2391caa2 b/sql/hive/src/test/resources/golden/groupby1_limit-4-7a5ee36a377f0869bcb8c29c2391caa2
index 9cf9606d751ac..f92f60d11d4ef 100644
--- a/sql/hive/src/test/resources/golden/groupby1_limit-4-7a5ee36a377f0869bcb8c29c2391caa2
+++ b/sql/hive/src/test/resources/golden/groupby1_limit-4-7a5ee36a377f0869bcb8c29c2391caa2
@@ -2,4 +2,4 @@
 2	2.0
 4	4.0
 5	15.0
-8	8.0
\ No newline at end of file
+8	8.0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby1_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby1_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby1_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby1_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby1_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby1_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby1_map-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby1_map-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby1_map-6-c83b156356c269b6aef263640a4f6b7b b/sql/hive/src/test/resources/golden/groupby1_map-6-c83b156356c269b6aef263640a4f6b7b
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby1_map-6-c83b156356c269b6aef263640a4f6b7b
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_nomap-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby1_map_nomap-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_nomap-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby1_map_nomap-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_nomap-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby1_map_nomap-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_nomap-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby1_map_nomap-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_nomap-2-fb2713a701e7e9c3fa36b5459d772f4 b/sql/hive/src/test/resources/golden/groupby1_map_nomap-2-fb2713a701e7e9c3fa36b5459d772f4
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_nomap-2-fb2713a701e7e9c3fa36b5459d772f4
+++ b/sql/hive/src/test/resources/golden/groupby1_map_nomap-2-fb2713a701e7e9c3fa36b5459d772f4
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_nomap-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby1_map_nomap-6-adc1ec67836b26b60d8547c4996bfd8f
index 339756be98e73..4953d093489ee 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_nomap-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby1_map_nomap-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -306,4 +306,4 @@
 495	495.0
 496	496.0
 497	497.0
-498	1494.0
\ No newline at end of file
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby1_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby1_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby1_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby1_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby1_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby1_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby1_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby1_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_skew-6-c83b156356c269b6aef263640a4f6b7b b/sql/hive/src/test/resources/golden/groupby1_map_skew-6-c83b156356c269b6aef263640a4f6b7b
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby1_map_skew-6-c83b156356c269b6aef263640a4f6b7b
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby1_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby1_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby1_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby1_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby1_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby1_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby1_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_noskew-6-c2c54378ffce53ade73a9dda783903e7 b/sql/hive/src/test/resources/golden/groupby1_noskew-6-c2c54378ffce53ade73a9dda783903e7
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby1_noskew-6-c2c54378ffce53ade73a9dda783903e7
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby1_noskew-6-f7eda18efd187ec4bf4cb135833746cb b/sql/hive/src/test/resources/golden/groupby1_noskew-6-f7eda18efd187ec4bf4cb135833746cb
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby1_noskew-6-f7eda18efd187ec4bf4cb135833746cb
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby2-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/groupby2-0-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2-0-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/groupby2-0-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2-1-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby2-1-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2-1-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby2-1-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2-2-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby2-2-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2-2-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby2-2-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2-6-41dfb7b036cae4972b275a0262ea2e4c b/sql/hive/src/test/resources/golden/groupby2-6-41dfb7b036cae4972b275a0262ea2e4c
index 3210a3048ff48..ba568b8fd6cf2 100644
--- a/sql/hive/src/test/resources/golden/groupby2-6-41dfb7b036cae4972b275a0262ea2e4c
+++ b/sql/hive/src/test/resources/golden/groupby2-6-41dfb7b036cae4972b275a0262ea2e4c
@@ -7,4 +7,4 @@
 6	5	6398.0
 7	6	7735.0
 8	8	8762.0
-9	7	91047.0
\ No newline at end of file
+9	7	91047.0
diff --git a/sql/hive/src/test/resources/golden/groupby2_limit-0-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby2_limit-0-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_limit-0-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby2_limit-0-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/groupby2_limit-1-a56d6499aef913e11ef599ac8b4f2a25
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/groupby2_limit-1-a56d6499aef913e11ef599ac8b4f2a25
diff --git a/sql/hive/src/test/resources/golden/groupby2_limit-2-3460130ddc7cd91c0b1d5ce2ff98d0b9 b/sql/hive/src/test/resources/golden/groupby2_limit-2-3460130ddc7cd91c0b1d5ce2ff98d0b9
deleted file mode 100644
index 9cf9606d751ac..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby2_limit-2-3460130ddc7cd91c0b1d5ce2ff98d0b9
+++ /dev/null
@@ -1,5 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby2_limit-2-e7a95dc27fbfcb10bf92a6db61522b6a b/sql/hive/src/test/resources/golden/groupby2_limit-2-e7a95dc27fbfcb10bf92a6db61522b6a
new file mode 100644
index 0000000000000..f92f60d11d4ef
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby2_limit-2-e7a95dc27fbfcb10bf92a6db61522b6a
@@ -0,0 +1,5 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby2_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby2_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby2_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby2_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby2_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby2_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby2_map-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 3210a3048ff48..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby2_map-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0
-1	71	116414.0
-2	69	225571.0
-3	62	332004.0
-4	74	452763.0
-5	6	5397.0
-6	5	6398.0
-7	6	7735.0
-8	8	8762.0
-9	7	91047.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby2_map-6-c83b156356c269b6aef263640a4f6b7b b/sql/hive/src/test/resources/golden/groupby2_map-6-c83b156356c269b6aef263640a4f6b7b
new file mode 100644
index 0000000000000..ba568b8fd6cf2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby2_map-6-c83b156356c269b6aef263640a4f6b7b
@@ -0,0 +1,10 @@
+0	1	00.0
+1	71	116414.0
+2	69	225571.0
+3	62	332004.0
+4	74	452763.0
+5	6	5397.0
+6	5	6398.0
+7	6	7735.0
+8	8	8762.0
+9	7	91047.0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 922f4bfc83e44..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby2_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby2_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby2_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby2_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby2_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby2_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby2_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
index 3210a3048ff48..ba568b8fd6cf2 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby2_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -7,4 +7,4 @@
 6	5	6398.0
 7	6	7735.0
 8	8	8762.0
-9	7	91047.0
\ No newline at end of file
+9	7	91047.0
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby2_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby2_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby2_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby2_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby2_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby2_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew-6-232957c90d04497da7fe4e599e0dbb94 b/sql/hive/src/test/resources/golden/groupby2_noskew-6-232957c90d04497da7fe4e599e0dbb94
new file mode 100644
index 0000000000000..ba568b8fd6cf2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby2_noskew-6-232957c90d04497da7fe4e599e0dbb94
@@ -0,0 +1,10 @@
+0	1	00.0
+1	71	116414.0
+2	69	225571.0
+3	62	332004.0
+4	74	452763.0
+5	6	5397.0
+6	5	6398.0
+7	6	7735.0
+8	8	8762.0
+9	7	91047.0
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew-6-41dfb7b036cae4972b275a0262ea2e4c b/sql/hive/src/test/resources/golden/groupby2_noskew-6-41dfb7b036cae4972b275a0262ea2e4c
deleted file mode 100644
index 3210a3048ff48..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby2_noskew-6-41dfb7b036cae4972b275a0262ea2e4c
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0
-1	71	116414.0
-2	69	225571.0
-3	62	332004.0
-4	74	452763.0
-5	6	5397.0
-6	5	6398.0
-7	6	7735.0
-8	8	8762.0
-9	7	91047.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-6-41dfb7b036cae4972b275a0262ea2e4c b/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-6-41dfb7b036cae4972b275a0262ea2e4c
deleted file mode 100644
index 922f4bfc83e44..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-6-41dfb7b036cae4972b275a0262ea2e4c
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby4-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby4-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby4-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby4-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby4-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby4-5-adc1ec67836b26b60d8547c4996bfd8f
index f55b5c9eef39f..8b1acc12b635c 100644
--- a/sql/hive/src/test/resources/golden/groupby4-5-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby4-5-adc1ec67836b26b60d8547c4996bfd8f
@@ -7,4 +7,4 @@
 6
 7
 8
-9
\ No newline at end of file
+9
diff --git a/sql/hive/src/test/resources/golden/groupby4_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby4_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby4_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby4_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby4_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby4_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby4_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby4_map-6-adc1ec67836b26b60d8547c4996bfd8f
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby4_map-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/groupby4_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby4_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby4_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby4_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby4_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby4_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby4_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby4_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby4_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/groupby4_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby4_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby4_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby4_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby4_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby4_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby4_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f b/sql/hive/src/test/resources/golden/groupby4_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f
new file mode 100644
index 0000000000000..8b1acc12b635c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby4_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f
@@ -0,0 +1,10 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/sql/hive/src/test/resources/golden/groupby4_noskew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby4_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index f55b5c9eef39f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby4_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby5-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby5-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby5-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby5-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby5-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby5-5-adc1ec67836b26b60d8547c4996bfd8f
index 339756be98e73..4953d093489ee 100644
--- a/sql/hive/src/test/resources/golden/groupby5-5-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby5-5-adc1ec67836b26b60d8547c4996bfd8f
@@ -306,4 +306,4 @@
 495	495.0
 496	496.0
 497	497.0
-498	1494.0
\ No newline at end of file
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby5_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby5_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby5_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby5_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby5_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby5_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby5_map-6-adc1ec67836b26b60d8547c4996bfd8f
index 349d8b75d942b..3975bfc1af512 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby5_map-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -1 +1 @@
-130091
\ No newline at end of file
+130091
diff --git a/sql/hive/src/test/resources/golden/groupby5_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby5_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby5_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby5_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby5_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby5_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby5_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby5_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
index 349d8b75d942b..3975bfc1af512 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby5_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -1 +1 @@
-130091
\ No newline at end of file
+130091
diff --git a/sql/hive/src/test/resources/golden/groupby5_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby5_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby5_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby5_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby5_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby5_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby5_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_noskew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby5_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby5_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby5_noskew-6-c83b156356c269b6aef263640a4f6b7b b/sql/hive/src/test/resources/golden/groupby5_noskew-6-c83b156356c269b6aef263640a4f6b7b
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby5_noskew-6-c83b156356c269b6aef263640a4f6b7b
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby6-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby6-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby6-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby6-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby6-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby6-5-adc1ec67836b26b60d8547c4996bfd8f
index f55b5c9eef39f..8b1acc12b635c 100644
--- a/sql/hive/src/test/resources/golden/groupby6-5-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby6-5-adc1ec67836b26b60d8547c4996bfd8f
@@ -7,4 +7,4 @@
 6
 7
 8
-9
\ No newline at end of file
+9
diff --git a/sql/hive/src/test/resources/golden/groupby6_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby6_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby6_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby6_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby6_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby6_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby6_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map-6-2d1fb04c7060fafe470e14061b2a5b6f b/sql/hive/src/test/resources/golden/groupby6_map-6-2d1fb04c7060fafe470e14061b2a5b6f
new file mode 100644
index 0000000000000..8b1acc12b635c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby6_map-6-2d1fb04c7060fafe470e14061b2a5b6f
@@ -0,0 +1,10 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/sql/hive/src/test/resources/golden/groupby6_map-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby6_map-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index f55b5c9eef39f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby6_map-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby6_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby6_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby6_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby6_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby6_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby6_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby6_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map_skew-6-2d1fb04c7060fafe470e14061b2a5b6f b/sql/hive/src/test/resources/golden/groupby6_map_skew-6-2d1fb04c7060fafe470e14061b2a5b6f
new file mode 100644
index 0000000000000..8b1acc12b635c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby6_map_skew-6-2d1fb04c7060fafe470e14061b2a5b6f
@@ -0,0 +1,10 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/sql/hive/src/test/resources/golden/groupby6_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby6_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index f55b5c9eef39f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby6_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby6_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby6_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby6_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby6_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby6_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby6_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby6_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f b/sql/hive/src/test/resources/golden/groupby6_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f
new file mode 100644
index 0000000000000..8b1acc12b635c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby6_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f
@@ -0,0 +1,10 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/sql/hive/src/test/resources/golden/groupby6_noskew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby6_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index f55b5c9eef39f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby6_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby7-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby7-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby7-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby7-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7-4-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7-4-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7-4-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7-4-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7-5-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7-5-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7-5-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7-5-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7-7-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby7-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 339756be98e73..4953d093489ee 100644
--- a/sql/hive/src/test/resources/golden/groupby7-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby7-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	495.0
 496	496.0
 497	497.0
-498	1494.0
\ No newline at end of file
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7-8-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby7-8-404392d6faff5db5f36b4aa87ac8e8c9
index 339756be98e73..4953d093489ee 100644
--- a/sql/hive/src/test/resources/golden/groupby7-8-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby7-8-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	495.0
 496	496.0
 497	497.0
-498	1494.0
\ No newline at end of file
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby7_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby7_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-1-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby7_map-1-7f98b724df05f51b3ec1f087a8da414e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-1-7f98b724df05f51b3ec1f087a8da414e
+++ b/sql/hive/src/test/resources/golden/groupby7_map-1-7f98b724df05f51b3ec1f087a8da414e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-10-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby7_map-10-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map-10-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-10-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby7_map-10-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map-10-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-11-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby7_map-11-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map-11-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-11-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby7_map-11-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map-11-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-2-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby7_map-2-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-2-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby7_map-2-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-3-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby7_map-3-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-3-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby7_map-3-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7_map-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7_map-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-7-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7_map-7-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-7-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7_map-7-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-6-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby7_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby7_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-10-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby7_map_skew-10-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-10-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-10-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby7_map_skew-10-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-10-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby7_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7_map_skew-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-6-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7_map_skew-6-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-6-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-6-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-9-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby7_map_skew-9-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-9-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-9-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby7_map_skew-9-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-9-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby7_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-1-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby7_noskew-1-7f98b724df05f51b3ec1f087a8da414e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-1-7f98b724df05f51b3ec1f087a8da414e
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-1-7f98b724df05f51b3ec1f087a8da414e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-10-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby7_noskew-10-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-10-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-10-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby7_noskew-10-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-10-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-11-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby7_noskew-11-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-11-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-11-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby7_noskew-11-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-11-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-2-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby7_noskew-2-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-2-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-2-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-3-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby7_noskew-3-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-3-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-3-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7_noskew-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-7-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7_noskew-7-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-7-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-7-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-10-96d0598a2a4c17041a31b908d1f065e5 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-10-96d0598a2a4c17041a31b908d1f065e5
index dfca4e169cbe8..951e74db0fe23 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-10-96d0598a2a4c17041a31b908d1f065e5
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-10-96d0598a2a4c17041a31b908d1f065e5
@@ -7,4 +7,4 @@
 10	10.0
 11	11.0
 12	24.0
-15	30.0
\ No newline at end of file
+15	30.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-6-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-8-ab2390a3b0ba76907a7bee390b2924a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-8-ab2390a3b0ba76907a7bee390b2924a1
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-9-652510b8dc20117c65511f06e6e73d73 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-9-652510b8dc20117c65511f06e6e73d73
index dfca4e169cbe8..951e74db0fe23 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-9-652510b8dc20117c65511f06e6e73d73
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-9-652510b8dc20117c65511f06e6e73d73
@@ -7,4 +7,4 @@
 10	10.0
 11	11.0
 12	24.0
-15	30.0
\ No newline at end of file
+15	30.0
diff --git a/sql/hive/src/test/resources/golden/groupby8-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby8-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby8-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby8-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby8-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8-11-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby8-11-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby8-11-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby8-11-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8-12-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby8-12-404392d6faff5db5f36b4aa87ac8e8c9
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby8-12-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby8-12-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8-6-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby8-6-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby8-6-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby8-6-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8-7-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby8-7-404392d6faff5db5f36b4aa87ac8e8c9
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby8-7-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby8-7-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8-8-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby8-8-7f98b724df05f51b3ec1f087a8da414e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8-8-7f98b724df05f51b3ec1f087a8da414e
+++ b/sql/hive/src/test/resources/golden/groupby8-8-7f98b724df05f51b3ec1f087a8da414e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby8_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby8_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby8_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby8_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby8_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby8_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-7-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby8_map-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_map-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-7-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby8_map-7-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_map-7-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-8-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby8_map-8-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_map-8-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-8-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby8_map-8-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_map-8-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby8_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby8_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby8_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby8_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby8_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby8_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby8_map_skew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_map_skew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-7-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby8_map_skew-7-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_map_skew-7-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-8-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby8_map_skew-8-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_map_skew-8-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-8-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby8_map_skew-8-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_map_skew-8-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby8_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby8_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby8_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby8_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby8_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby8_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby8_noskew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_noskew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-7-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby8_noskew-7-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_noskew-7-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-8-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby8_noskew-8-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_noskew-8-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-8-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby8_noskew-8-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_noskew-8-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-10-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby9-10-7f98b724df05f51b3ec1f087a8da414e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby9-10-7f98b724df05f51b3ec1f087a8da414e
+++ b/sql/hive/src/test/resources/golden/groupby9-10-7f98b724df05f51b3ec1f087a8da414e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby9-13-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby9-13-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby9-13-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby9-13-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-14-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby9-14-404392d6faff5db5f36b4aa87ac8e8c9
index 0e40f3f1a49a9..c156bd5f5d8c6 100644
--- a/sql/hive/src/test/resources/golden/groupby9-14-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby9-14-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	val_495	1
 496	val_496	1
 497	val_497	1
-498	val_498	1
\ No newline at end of file
+498	val_498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-17-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby9-17-a6b1560ec2eb0bd64c9dd2c8daae99c5
index df07a9da29f01..93e965c771403 100644
--- a/sql/hive/src/test/resources/golden/groupby9-17-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby9-17-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/groupby9-18-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby9-18-404392d6faff5db5f36b4aa87ac8e8c9
index cf0c065dc6532..742479ac713c5 100644
--- a/sql/hive/src/test/resources/golden/groupby9-18-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby9-18-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	val_495	1
 496	val_496	1
 497	val_497	1
-498	val_498	3
\ No newline at end of file
+498	val_498	3
diff --git a/sql/hive/src/test/resources/golden/groupby9-21-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby9-21-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby9-21-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby9-21-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-22-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby9-22-404392d6faff5db5f36b4aa87ac8e8c9
index 4ef88d57aa72b..deaf15e462910 100644
--- a/sql/hive/src/test/resources/golden/groupby9-22-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby9-22-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 95	val_95	1
 96	val_96	1
 97	val_97	1
-98	val_98	1
\ No newline at end of file
+98	val_98	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-4-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby9-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby9-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby9-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-5-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby9-5-404392d6faff5db5f36b4aa87ac8e8c9
index 0e40f3f1a49a9..c156bd5f5d8c6 100644
--- a/sql/hive/src/test/resources/golden/groupby9-5-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby9-5-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	val_495	1
 496	val_496	1
 497	val_497	1
-498	val_498	1
\ No newline at end of file
+498	val_498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-8-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby9-8-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby9-8-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby9-8-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-9-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby9-9-404392d6faff5db5f36b4aa87ac8e8c9
index 4ef88d57aa72b..deaf15e462910 100644
--- a/sql/hive/src/test/resources/golden/groupby9-9-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby9-9-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 95	val_95	1
 96	val_96	1
 97	val_97	1
-98	val_98	1
\ No newline at end of file
+98	val_98	1
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby_map_ppr-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_map_ppr-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby_map_ppr-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby_map_ppr-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_map_ppr-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby_map_ppr-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby_map_ppr-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_map_ppr-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby_map_ppr-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby_map_ppr-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index ae9bbc3e2c2f7..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_map_ppr-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0
-1	71	132828.0
-2	69	251142.0
-3	62	364008.0
-4	74	4105526.0
-5	6	5794.0
-6	5	6796.0
-7	6	71470.0
-8	8	81524.0
-9	7	92094.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr-6-c83b156356c269b6aef263640a4f6b7b b/sql/hive/src/test/resources/golden/groupby_map_ppr-6-c83b156356c269b6aef263640a4f6b7b
new file mode 100644
index 0000000000000..1b9d97300aa08
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_map_ppr-6-c83b156356c269b6aef263640a4f6b7b
@@ -0,0 +1,10 @@
+0	1	00.0
+1	71	132828.0
+2	69	251142.0
+3	62	364008.0
+4	74	4105526.0
+5	6	5794.0
+6	5	6796.0
+7	6	71470.0
+8	8	81524.0
+9	7	92094.0
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 64bb7c62c1885..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	1
-1	71	132828.0	10044	71
-2	69	251142.0	15780	69
-3	62	364008.0	20119	62
-4	74	4105526.0	30965	74
-5	6	5794.0	278	6
-6	5	6796.0	331	5
-7	6	71470.0	447	6
-8	8	81524.0	595	8
-9	7	92094.0	577	7
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-10-6aa66df624cd32601218eee200977ce6 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-10-6aa66df624cd32601218eee200977ce6
index 6b6a788e382db..0b7e79a79bd11 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-10-6aa66df624cd32601218eee200977ce6
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-10-6aa66df624cd32601218eee200977ce6
@@ -3,4 +3,4 @@
 4	1
 5	1
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-11-59a77127c166fef57504c011ccb427a6 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-11-59a77127c166fef57504c011ccb427a6
index c33eed60c8d5b..d63ed5dbe78bb 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-11-59a77127c166fef57504c011ccb427a6
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-11-59a77127c166fef57504c011ccb427a6
@@ -3,4 +3,4 @@
 8	1
 10	1
 16	1
-18	1
\ No newline at end of file
+18	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-5-6aa66df624cd32601218eee200977ce6 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-5-6aa66df624cd32601218eee200977ce6
index 6b6a788e382db..0b7e79a79bd11 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-5-6aa66df624cd32601218eee200977ce6
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-5-6aa66df624cd32601218eee200977ce6
@@ -3,4 +3,4 @@
 4	1
 5	1
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-6-59a77127c166fef57504c011ccb427a6 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-6-59a77127c166fef57504c011ccb427a6
index c33eed60c8d5b..d63ed5dbe78bb 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-6-59a77127c166fef57504c011ccb427a6
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-6-59a77127c166fef57504c011ccb427a6
@@ -3,4 +3,4 @@
 8	1
 10	1
 16	1
-18	1
\ No newline at end of file
+18	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-7-293182ac89effb268855f5ac53e1ec11 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-7-293182ac89effb268855f5ac53e1ec11
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-7-293182ac89effb268855f5ac53e1ec11
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-7-293182ac89effb268855f5ac53e1ec11
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-10-3b3ef4d975716744c85af560aa240abd b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-10-3b3ef4d975716744c85af560aa240abd
deleted file mode 100644
index 922f4bfc83e44..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-10-3b3ef4d975716744c85af560aa240abd
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-13-38f492067df78144c272bb212633cc5e b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-13-38f492067df78144c272bb212633cc5e
deleted file mode 100644
index 106132fc993d3..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-13-38f492067df78144c272bb212633cc5e
+++ /dev/null
@@ -1,5 +0,0 @@
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-14-d7b3676444a9e95811184637dd0b3231 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-14-d7b3676444a9e95811184637dd0b3231
deleted file mode 100644
index 65235356ea425..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-14-d7b3676444a9e95811184637dd0b3231
+++ /dev/null
@@ -1,5 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-15-3b3ef4d975716744c85af560aa240abd b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-15-3b3ef4d975716744c85af560aa240abd
deleted file mode 100644
index 922f4bfc83e44..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-15-3b3ef4d975716744c85af560aa240abd
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-16-b21ae88a5fc7f9b5c25ca954889ee421 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-16-b21ae88a5fc7f9b5c25ca954889ee421
deleted file mode 100644
index 017878bc9bee5..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-16-b21ae88a5fc7f9b5c25ca954889ee421
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	3
-1	4	1878.0	878	6
-1	5	1729.0	729	8
-1	6	11282.0	1282	12
-1	6	11494.0	1494	11
-1	7	11171.0	1171	11
-1	7	11516.0	1516	10
-1	8	11263.0	1263	10
-1	9	12294.0	2294	14
-1	9	12654.0	2654	16
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-17-7f344c2f066d74ecd135c43d39658bae b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-17-7f344c2f066d74ecd135c43d39658bae
deleted file mode 100644
index f21a658e3c68f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-17-7f344c2f066d74ecd135c43d39658bae
+++ /dev/null
@@ -1,32 +0,0 @@
-5	1	5102.0	102	2
-5	1	5116.0	116	2
-5	1	515.0	15	3
-5	1	553.0	53	1
-5	1	554.0	54	1
-5	1	557.0	57	1
-6	1	6134.0	134	2
-6	1	664.0	64	1
-6	1	665.0	65	1
-6	1	666.0	66	1
-6	1	669.0	69	1
-7	1	7144.0	144	2
-7	1	7152.0	152	2
-7	1	7210.0	210	3
-7	1	774.0	74	1
-7	1	777.0	77	1
-7	1	778.0	78	1
-8	1	8166.0	166	2
-8	1	8168.0	168	2
-8	1	88.0	8	1
-8	1	880.0	80	1
-8	1	882.0	82	1
-8	1	885.0	85	1
-8	1	886.0	86	1
-8	1	887.0	87	1
-9	1	9190.0	190	2
-9	1	9194.0	194	2
-9	1	9196.0	196	2
-9	1	9270.0	270	3
-9	1	99.0	9	1
-9	1	992.0	92	1
-9	1	996.0	96	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-8-38f492067df78144c272bb212633cc5e b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-8-38f492067df78144c272bb212633cc5e
deleted file mode 100644
index 106132fc993d3..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-8-38f492067df78144c272bb212633cc5e
+++ /dev/null
@@ -1,5 +0,0 @@
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-9-d7b3676444a9e95811184637dd0b3231 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-9-d7b3676444a9e95811184637dd0b3231
deleted file mode 100644
index 65235356ea425..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-9-d7b3676444a9e95811184637dd0b3231
+++ /dev/null
@@ -1,5 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-0-b31bf66c43bb9f7ddc09b138b7bf36e0 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-0-b31bf66c43bb9f7ddc09b138b7bf36e0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-0-b31bf66c43bb9f7ddc09b138b7bf36e0
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-0-b31bf66c43bb9f7ddc09b138b7bf36e0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-5-376542befbcab97d864e874251720c40 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-5-376542befbcab97d864e874251720c40
index dbc7aebdbbae5..dcb604016a969 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-5-376542befbcab97d864e874251720c40
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-5-376542befbcab97d864e874251720c40
@@ -2,4 +2,4 @@
 6	5
 7	6
 8	8
-9	7
\ No newline at end of file
+9	7
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-6-8566e4c94d34b0fc4a71d34e89529c74 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-6-8566e4c94d34b0fc4a71d34e89529c74
index ebf9d6978dec6..cecfbbd281537 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-6-8566e4c94d34b0fc4a71d34e89529c74
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-6-8566e4c94d34b0fc4a71d34e89529c74
@@ -2,4 +2,4 @@
 1	71	115
 2	69	111
 3	62	99
-4	74	124
\ No newline at end of file
+4	74	124
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-10-73819ea1a7c0653a61652b3766afb003 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-10-73819ea1a7c0653a61652b3766afb003
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-10-73819ea1a7c0653a61652b3766afb003
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-10-73819ea1a7c0653a61652b3766afb003
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-13-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-13-521e0c1054cfa35116c02245874a4e69
index a5ae9e2a62227..83d4ac2489823 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-13-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-13-521e0c1054cfa35116c02245874a4e69
@@ -1,2 +1,2 @@
 100	2
-200	2
\ No newline at end of file
+200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-14-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-14-a9a491ed99b4629ee15bce994d5c6c63
index 16c7a647a2344..ab65c1a003b5b 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-14-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-14-a9a491ed99b4629ee15bce994d5c6c63
@@ -1 +1 @@
-400	1
\ No newline at end of file
+400	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-17-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-17-521e0c1054cfa35116c02245874a4e69
index 67f077e51a647..11c303a71007c 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-17-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-17-521e0c1054cfa35116c02245874a4e69
@@ -1,2 +1,2 @@
 val_100	2
-val_200	2
\ No newline at end of file
+val_200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-18-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-18-a9a491ed99b4629ee15bce994d5c6c63
index b06ad20135fbe..1e4c20551b68b 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-18-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-18-a9a491ed99b4629ee15bce994d5c6c63
@@ -1 +1 @@
-val_200	2
\ No newline at end of file
+val_200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-4-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-4-521e0c1054cfa35116c02245874a4e69
index a5ae9e2a62227..83d4ac2489823 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-4-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-4-521e0c1054cfa35116c02245874a4e69
@@ -1,2 +1,2 @@
 100	2
-200	2
\ No newline at end of file
+200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-5-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-5-a9a491ed99b4629ee15bce994d5c6c63
index 16c7a647a2344..ab65c1a003b5b 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-5-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-5-a9a491ed99b4629ee15bce994d5c6c63
@@ -1 +1 @@
-400	1
\ No newline at end of file
+400	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-8-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-8-521e0c1054cfa35116c02245874a4e69
index 67f077e51a647..11c303a71007c 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-8-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-8-521e0c1054cfa35116c02245874a4e69
@@ -1,2 +1,2 @@
 val_100	2
-val_200	2
\ No newline at end of file
+val_200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-9-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-9-a9a491ed99b4629ee15bce994d5c6c63
index b06ad20135fbe..1e4c20551b68b 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-9-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-9-a9a491ed99b4629ee15bce994d5c6c63
@@ -1 +1 @@
-val_200	2
\ No newline at end of file
+val_200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_neg_float-0-dd44874fbefeee1ed0a0dd8bfbab96eb b/sql/hive/src/test/resources/golden/groupby_neg_float-0-dd44874fbefeee1ed0a0dd8bfbab96eb
index efe5cc7795c65..26dbfc34ad4fb 100644
--- a/sql/hive/src/test/resources/golden/groupby_neg_float-0-dd44874fbefeee1ed0a0dd8bfbab96eb
+++ b/sql/hive/src/test/resources/golden/groupby_neg_float-0-dd44874fbefeee1ed0a0dd8bfbab96eb
@@ -1 +1 @@
--30.33
\ No newline at end of file
+-30.33
diff --git a/sql/hive/src/test/resources/golden/groupby_neg_float-1-4a435e268a327404f75725b82a32ee03 b/sql/hive/src/test/resources/golden/groupby_neg_float-1-4a435e268a327404f75725b82a32ee03
index efe5cc7795c65..26dbfc34ad4fb 100644
--- a/sql/hive/src/test/resources/golden/groupby_neg_float-1-4a435e268a327404f75725b82a32ee03
+++ b/sql/hive/src/test/resources/golden/groupby_neg_float-1-4a435e268a327404f75725b82a32ee03
@@ -1 +1 @@
--30.33
\ No newline at end of file
+-30.33
diff --git a/sql/hive/src/test/resources/golden/groupby_position-0-422c2068a838f59324c1d9861225c824 b/sql/hive/src/test/resources/golden/groupby_position-0-422c2068a838f59324c1d9861225c824
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-0-422c2068a838f59324c1d9861225c824
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-10-b4c6c0aedcc924e2af65549b87b3f3f7 b/sql/hive/src/test/resources/golden/groupby_position-10-b4c6c0aedcc924e2af65549b87b3f3f7
deleted file mode 100644
index f2a91fe3bfab0..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-10-b4c6c0aedcc924e2af65549b87b3f3f7
+++ /dev/null
@@ -1,13 +0,0 @@
-0	val_0	1
-2	val_2	1
-4	val_4	1
-5	val_5	1
-8	val_8	1
-9	val_9	1
-10	val_10	1
-11	val_11	1
-12	val_12	1
-15	val_15	1
-17	val_17	1
-18	val_18	1
-19	val_19	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-12-5583e5cfcf8083d45a3bd80434c1829f b/sql/hive/src/test/resources/golden/groupby_position-12-5583e5cfcf8083d45a3bd80434c1829f
deleted file mode 100644
index c8e666cb01e8e..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-12-5583e5cfcf8083d45a3bd80434c1829f
+++ /dev/null
@@ -1,14 +0,0 @@
-0	3
-5	3
-12	2
-15	2
-18	2
-2	1
-4	1
-8	1
-9	1
-10	1
-11	1
-17	1
-19	1
-20	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-14-6f54558123eb5d63eeeb3e261c1dc5fb b/sql/hive/src/test/resources/golden/groupby_position-14-6f54558123eb5d63eeeb3e261c1dc5fb
deleted file mode 100644
index 96a824a81c589..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-14-6f54558123eb5d63eeeb3e261c1dc5fb
+++ /dev/null
@@ -1,3 +0,0 @@
-19	val_19	19	val_19
-18	val_18	18	val_18
-17	val_17	17	val_17
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-5-e2470670c5d709001fb17ecbc6e4f85d b/sql/hive/src/test/resources/golden/groupby_position-5-e2470670c5d709001fb17ecbc6e4f85d
deleted file mode 100644
index a79396dac079a..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-5-e2470670c5d709001fb17ecbc6e4f85d
+++ /dev/null
@@ -1,13 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-6-b4c6c0aedcc924e2af65549b87b3f3f7 b/sql/hive/src/test/resources/golden/groupby_position-6-b4c6c0aedcc924e2af65549b87b3f3f7
deleted file mode 100644
index f2a91fe3bfab0..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-6-b4c6c0aedcc924e2af65549b87b3f3f7
+++ /dev/null
@@ -1,13 +0,0 @@
-0	val_0	1
-2	val_2	1
-4	val_4	1
-5	val_5	1
-8	val_8	1
-9	val_9	1
-10	val_10	1
-11	val_11	1
-12	val_12	1
-15	val_15	1
-17	val_17	1
-18	val_18	1
-19	val_19	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-9-e2470670c5d709001fb17ecbc6e4f85d b/sql/hive/src/test/resources/golden/groupby_position-9-e2470670c5d709001fb17ecbc6e4f85d
deleted file mode 100644
index a79396dac079a..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-9-e2470670c5d709001fb17ecbc6e4f85d
+++ /dev/null
@@ -1,13 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/groupby_ppd-0-4b116ec2d8fb55c52b7b0d248c616ae2
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_08_nonpart_rename-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/groupby_ppd-0-4b116ec2d8fb55c52b7b0d248c616ae2
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/groupby_ppd-1-db7b1db8f5e61f0fa78d2874e4d72d9d
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_08_nonpart_rename-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/groupby_ppd-1-db7b1db8f5e61f0fa78d2874e4d72d9d
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/groupby_ppd-2-d286410aa1d5f5c8d91b863a6d6e29c5
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/groupby_ppd-2-d286410aa1d5f5c8d91b863a6d6e29c5
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby_ppr-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_ppr-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby_ppr-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby_ppr-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_ppr-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby_ppr-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby_ppr-5-adc1ec67836b26b60d8547c4996bfd8f
index ae9bbc3e2c2f7..1b9d97300aa08 100644
--- a/sql/hive/src/test/resources/golden/groupby_ppr-5-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby_ppr-5-adc1ec67836b26b60d8547c4996bfd8f
@@ -7,4 +7,4 @@
 6	5	6796.0
 7	6	71470.0
 8	8	81524.0
-9	7	92094.0
\ No newline at end of file
+9	7	92094.0
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-5-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 64bb7c62c1885..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-5-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	1
-1	71	132828.0	10044	71
-2	69	251142.0	15780	69
-3	62	364008.0	20119	62
-4	74	4105526.0	30965	74
-5	6	5794.0	278	6
-6	5	6796.0	331	5
-7	6	71470.0	447	6
-8	8	81524.0	595	8
-9	7	92094.0	577	7
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_1-1-ffe97dc8c1df3195982e38263fbe8717
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-1-ffe97dc8c1df3195982e38263fbe8717
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-10-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-10-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-10-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-14-bbdd53118f788d7bb679d094c41243c8 b/sql/hive/src/test/resources/golden/groupby_sort_1-14-bbdd53118f788d7bb679d094c41243c8
deleted file mode 100644
index 10f4a1f5ff34c..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-14-bbdd53118f788d7bb679d094c41243c8
+++ /dev/null
@@ -1,6 +0,0 @@
-1	11	1
-2	12	1
-3	13	1
-7	17	1
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-17-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-17-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-17-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_1-2-fc30020d09151dc29be807795ad9475e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-2-fc30020d09151dc29be807795ad9475e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-20-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-20-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-20-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-24-d53196339980a00a619788bd799a32e7 b/sql/hive/src/test/resources/golden/groupby_sort_1-24-d53196339980a00a619788bd799a32e7
deleted file mode 100644
index c1cc4ee204773..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-24-d53196339980a00a619788bd799a32e7
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1	1
-1	2	1
-1	3	1
-1	7	1
-1	8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-28-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_1-28-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-28-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_1-3-fffea659b633b1f269b38556a7f54634
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-3-fffea659b633b1f269b38556a7f54634
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-31-d53196339980a00a619788bd799a32e7 b/sql/hive/src/test/resources/golden/groupby_sort_1-31-d53196339980a00a619788bd799a32e7
deleted file mode 100644
index f0192040e147b..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-31-d53196339980a00a619788bd799a32e7
+++ /dev/null
@@ -1,5 +0,0 @@
-1	2	1
-2	3	1
-3	4	1
-7	8	1
-8	9	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-34-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-34-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index b6c2eb98e5e49..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-34-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-2	1
-4	1
-6	1
-14	1
-16	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-37-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-37-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 8e7ee8a2b47bb..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-37-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1
-1	1
-2	1
-2	1
-3	1
-3	1
-7	1
-7	1
-8	2
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-40-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-40-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 0b2b54cd94e4f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-40-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1
-2	1
-2	1
-3	1
-4	1
-6	1
-7	1
-8	2
-14	1
-16	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-43-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-43-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 3d4708b7c9d64..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-43-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	2
-2	2
-3	2
-7	2
-8	4
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-49-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-49-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-49-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-52-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_1-52-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-52-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-56-5373c5449884d95bc7db9dab55239a49 b/sql/hive/src/test/resources/golden/groupby_sort_1-56-5373c5449884d95bc7db9dab55239a49
deleted file mode 100644
index 7ca6b0b28a960..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-56-5373c5449884d95bc7db9dab55239a49
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	2	1
-2	1	12	2	1
-3	1	13	2	1
-7	1	17	2	1
-8	1	18	2	1
-8	1	28	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-59-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_1-59-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-59-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-62-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_1-62-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 58e16ef3c0ef3..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-62-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	2	11	1
-2	2	12	1
-3	2	13	1
-7	2	17	1
-8	2	18	1
-8	2	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-63-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby_sort_1-63-dbcec232623048c7748b708123e18bf0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-63-dbcec232623048c7748b708123e18bf0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-64-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby_sort_1-64-7f98b724df05f51b3ec1f087a8da414e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-64-7f98b724df05f51b3ec1f087a8da414e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-65-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby_sort_1-65-83c59d378571a6e487aa20217bd87817
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-65-83c59d378571a6e487aa20217bd87817
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-68-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby_sort_1-68-9d01ff3d1fde3ed2ab55ea9d7079fd5c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-68-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-69-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby_sort_1-69-ed76c0068780120a6f23feefee303403
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-69-ed76c0068780120a6f23feefee303403
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-72-d2fa5e7bdd6b7934d10d5905cacd5715 b/sql/hive/src/test/resources/golden/groupby_sort_1-72-d2fa5e7bdd6b7934d10d5905cacd5715
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-72-d2fa5e7bdd6b7934d10d5905cacd5715
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-73-6296dde4e71acf7e7f42ee58cf3b5acd b/sql/hive/src/test/resources/golden/groupby_sort_1-73-6296dde4e71acf7e7f42ee58cf3b5acd
deleted file mode 100644
index 10f4a1f5ff34c..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-73-6296dde4e71acf7e7f42ee58cf3b5acd
+++ /dev/null
@@ -1,6 +0,0 @@
-1	11	1
-2	12	1
-3	13	1
-7	17	1
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-76-d2fa5e7bdd6b7934d10d5905cacd5715 b/sql/hive/src/test/resources/golden/groupby_sort_1-76-d2fa5e7bdd6b7934d10d5905cacd5715
deleted file mode 100644
index d15db8c5d079f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-76-d2fa5e7bdd6b7934d10d5905cacd5715
+++ /dev/null
@@ -1 +0,0 @@
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-77-6d6eac558569563cc47a657daffa8ba7 b/sql/hive/src/test/resources/golden/groupby_sort_1-77-6d6eac558569563cc47a657daffa8ba7
deleted file mode 100644
index 3d1609d961673..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-77-6d6eac558569563cc47a657daffa8ba7
+++ /dev/null
@@ -1,2 +0,0 @@
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_10-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-10-b4e225bc4787d7718bae6f00d8addfe2 b/sql/hive/src/test/resources/golden/groupby_sort_10-10-b4e225bc4787d7718bae6f00d8addfe2
index c6bb9dbfd6497..bfca78293c988 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-10-b4e225bc4787d7718bae6f00d8addfe2
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-10-b4e225bc4787d7718bae6f00d8addfe2
@@ -1,2 +1,2 @@
 0
-11
\ No newline at end of file
+11
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_10-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_10-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-7-b4e225bc4787d7718bae6f00d8addfe2 b/sql/hive/src/test/resources/golden/groupby_sort_10-7-b4e225bc4787d7718bae6f00d8addfe2
index c6bb9dbfd6497..bfca78293c988 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-7-b4e225bc4787d7718bae6f00d8addfe2
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-7-b4e225bc4787d7718bae6f00d8addfe2
@@ -1,2 +1,2 @@
 0
-11
\ No newline at end of file
+11
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-11-d9bf7e80b71121935ed4b008ae916cb1 b/sql/hive/src/test/resources/golden/groupby_sort_11-11-d9bf7e80b71121935ed4b008ae916cb1
deleted file mode 100644
index ded2854cdf564..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-11-d9bf7e80b71121935ed4b008ae916cb1
+++ /dev/null
@@ -1,6 +0,0 @@
-1	3	3	0.0
-1	1	1	2.0
-1	1	1	4.0
-1	3	3	5.0
-1	1	1	8.0
-1	1	1	9.0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-13-c7d70702783eb90d4f53028a63c318f8 b/sql/hive/src/test/resources/golden/groupby_sort_11-13-c7d70702783eb90d4f53028a63c318f8
deleted file mode 100644
index 487b4c4a5cc6f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-13-c7d70702783eb90d4f53028a63c318f8
+++ /dev/null
@@ -1,6 +0,0 @@
-0	1	3	3	0.0
-2	1	1	1	2.0
-4	1	1	1	4.0
-5	1	3	3	5.0
-8	1	1	1	8.0
-9	1	1	1	9.0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-15-9fd5f5fce6f3821f2d7801eb0e83a015 b/sql/hive/src/test/resources/golden/groupby_sort_11-15-9fd5f5fce6f3821f2d7801eb0e83a015
deleted file mode 100644
index 1e8b314962144..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-15-9fd5f5fce6f3821f2d7801eb0e83a015
+++ /dev/null
@@ -1 +0,0 @@
-6
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-20-bd99462ed878bf4bec74b3cb9132908d b/sql/hive/src/test/resources/golden/groupby_sort_11-20-bd99462ed878bf4bec74b3cb9132908d
deleted file mode 100644
index 1e8b314962144..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-20-bd99462ed878bf4bec74b3cb9132908d
+++ /dev/null
@@ -1 +0,0 @@
-6
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-7-bd99462ed878bf4bec74b3cb9132908d b/sql/hive/src/test/resources/golden/groupby_sort_11-7-bd99462ed878bf4bec74b3cb9132908d
deleted file mode 100644
index 1e8b314962144..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-7-bd99462ed878bf4bec74b3cb9132908d
+++ /dev/null
@@ -1 +0,0 @@
-6
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-9-9be51f5537a03d7dbf56693d9fdc1688 b/sql/hive/src/test/resources/golden/groupby_sort_11-9-9be51f5537a03d7dbf56693d9fdc1688
deleted file mode 100644
index 6a5fe2835fc56..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-9-9be51f5537a03d7dbf56693d9fdc1688
+++ /dev/null
@@ -1 +0,0 @@
-6	10	10	28.0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_2-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_2-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_2-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_2-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_2-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_2-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-10-5032dd0941fab4871eefd79f7e4a5c86 b/sql/hive/src/test/resources/golden/groupby_sort_2-10-5032dd0941fab4871eefd79f7e4a5c86
index c5b99ed941efc..e6a233467dcf5 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_2-10-5032dd0941fab4871eefd79f7e4a5c86
+++ b/sql/hive/src/test/resources/golden/groupby_sort_2-10-5032dd0941fab4871eefd79f7e4a5c86
@@ -3,4 +3,4 @@
 13	1
 17	1
 18	1
-28	1
\ No newline at end of file
+28	1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_2-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_2-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_2-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_2-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_2-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_2-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/groupby_sort_2-5-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/groupby_sort_2-5-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_3-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_3-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-10-4bf8dba6e66e733423a3408d61897926 b/sql/hive/src/test/resources/golden/groupby_sort_3-10-4bf8dba6e66e733423a3408d61897926
index 10f4a1f5ff34c..0f333f42821a0 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-10-4bf8dba6e66e733423a3408d61897926
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-10-4bf8dba6e66e733423a3408d61897926
@@ -3,4 +3,4 @@
 3	13	1
 7	17	1
 8	18	1
-8	28	1
\ No newline at end of file
+8	28	1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-14-1c8def3eb5fc590046d9cdd02d1cbf3f b/sql/hive/src/test/resources/golden/groupby_sort_3-14-1c8def3eb5fc590046d9cdd02d1cbf3f
index 4e31460a412ba..e7273779ac1b8 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-14-1c8def3eb5fc590046d9cdd02d1cbf3f
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-14-1c8def3eb5fc590046d9cdd02d1cbf3f
@@ -2,4 +2,4 @@
 2	1
 3	1
 7	1
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_3-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_3-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/groupby_sort_3-5-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/groupby_sort_3-5-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_4-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_4-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-10-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_4-10-f3f94c4814c3bff60a0b06edf0c884bd
index 4e31460a412ba..e7273779ac1b8 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-10-f3f94c4814c3bff60a0b06edf0c884bd
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-10-f3f94c4814c3bff60a0b06edf0c884bd
@@ -2,4 +2,4 @@
 2	1
 3	1
 7	1
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-14-75d59344b6114c0bb20d5eac301c2170 b/sql/hive/src/test/resources/golden/groupby_sort_4-14-75d59344b6114c0bb20d5eac301c2170
index 10f4a1f5ff34c..0f333f42821a0 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-14-75d59344b6114c0bb20d5eac301c2170
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-14-75d59344b6114c0bb20d5eac301c2170
@@ -3,4 +3,4 @@
 3	13	1
 7	17	1
 8	18	1
-8	28	1
\ No newline at end of file
+8	28	1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_4-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_4-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/groupby_sort_4-5-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/groupby_sort_4-5-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_5-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_5-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-10-4bf8dba6e66e733423a3408d61897926 b/sql/hive/src/test/resources/golden/groupby_sort_5-10-4bf8dba6e66e733423a3408d61897926
index 10f4a1f5ff34c..0f333f42821a0 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-10-4bf8dba6e66e733423a3408d61897926
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-10-4bf8dba6e66e733423a3408d61897926
@@ -3,4 +3,4 @@
 3	13	1
 7	17	1
 8	18	1
-8	28	1
\ No newline at end of file
+8	28	1
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/groupby_sort_5-13-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/groupby_sort_5-13-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-17-4bf8dba6e66e733423a3408d61897926 b/sql/hive/src/test/resources/golden/groupby_sort_5-17-4bf8dba6e66e733423a3408d61897926
index 10f4a1f5ff34c..0f333f42821a0 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-17-4bf8dba6e66e733423a3408d61897926
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-17-4bf8dba6e66e733423a3408d61897926
@@ -3,4 +3,4 @@
 3	13	1
 7	17	1
 8	18	1
-8	28	1
\ No newline at end of file
+8	28	1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_5-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/groupby_sort_5-20-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_11_managed_external-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/groupby_sort_5-20-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-25-1c8def3eb5fc590046d9cdd02d1cbf3f b/sql/hive/src/test/resources/golden/groupby_sort_5-25-1c8def3eb5fc590046d9cdd02d1cbf3f
index 4e31460a412ba..e7273779ac1b8 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-25-1c8def3eb5fc590046d9cdd02d1cbf3f
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-25-1c8def3eb5fc590046d9cdd02d1cbf3f
@@ -2,4 +2,4 @@
 2	1
 3	1
 7	1
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_5-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/groupby_sort_5-5-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_11_managed_external-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/groupby_sort_5-5-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_6-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_6-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_6-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-15-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_6-15-f3f94c4814c3bff60a0b06edf0c884bd
index 4e31460a412ba..e7273779ac1b8 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_6-15-f3f94c4814c3bff60a0b06edf0c884bd
+++ b/sql/hive/src/test/resources/golden/groupby_sort_6-15-f3f94c4814c3bff60a0b06edf0c884bd
@@ -2,4 +2,4 @@
 2	1
 3	1
 7	1
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_6-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_6-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_6-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_6-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_6-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_6-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/groupby_sort_6-9-591e03d1cfc10821a601498df1ed6675
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_12_external_location-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/groupby_sort_6-9-591e03d1cfc10821a601498df1ed6675
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_7-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_7-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_7-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-10-4bf8dba6e66e733423a3408d61897926 b/sql/hive/src/test/resources/golden/groupby_sort_7-10-4bf8dba6e66e733423a3408d61897926
index 10f4a1f5ff34c..0f333f42821a0 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_7-10-4bf8dba6e66e733423a3408d61897926
+++ b/sql/hive/src/test/resources/golden/groupby_sort_7-10-4bf8dba6e66e733423a3408d61897926
@@ -3,4 +3,4 @@
 3	13	1
 7	17	1
 8	18	1
-8	28	1
\ No newline at end of file
+8	28	1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_7-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_7-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_7-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_7-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_7-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_7-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/groupby_sort_7-5-43e94a517107a5bcf6fee78e6c88a1cc
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_12_external_location-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/groupby_sort_7-5-43e94a517107a5bcf6fee78e6c88a1cc
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_8-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_8-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-11-bd99462ed878bf4bec74b3cb9132908d b/sql/hive/src/test/resources/golden/groupby_sort_8-11-bd99462ed878bf4bec74b3cb9132908d
index 7813681f5b41c..7ed6ff82de6bc 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-11-bd99462ed878bf4bec74b3cb9132908d
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-11-bd99462ed878bf4bec74b3cb9132908d
@@ -1 +1 @@
-5
\ No newline at end of file
+5
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_8-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_8-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/groupby_sort_8-5-43e94a517107a5bcf6fee78e6c88a1cc
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_13_managed_location-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/groupby_sort_8-5-43e94a517107a5bcf6fee78e6c88a1cc
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-8-bd99462ed878bf4bec74b3cb9132908d b/sql/hive/src/test/resources/golden/groupby_sort_8-8-bd99462ed878bf4bec74b3cb9132908d
index 7813681f5b41c..7ed6ff82de6bc 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-8-bd99462ed878bf4bec74b3cb9132908d
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-8-bd99462ed878bf4bec74b3cb9132908d
@@ -1 +1 @@
-5
\ No newline at end of file
+5
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-9-8e06b51e940e956f14a8c7679c3d423a b/sql/hive/src/test/resources/golden/groupby_sort_8-9-8e06b51e940e956f14a8c7679c3d423a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-9-8e06b51e940e956f14a8c7679c3d423a
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-9-8e06b51e940e956f14a8c7679c3d423a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_9-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_9-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_9-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_9-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_9-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_9-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_9-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_9-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_9-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/groupby_sort_9-5-43e94a517107a5bcf6fee78e6c88a1cc
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_13_managed_location-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/groupby_sort_9-5-43e94a517107a5bcf6fee78e6c88a1cc
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-9-feec69facdc973a0ff78455f766845c b/sql/hive/src/test/resources/golden/groupby_sort_9-9-feec69facdc973a0ff78455f766845c
index 3d4708b7c9d64..612dcbb640d46 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_9-9-feec69facdc973a0ff78455f766845c
+++ b/sql/hive/src/test/resources/golden/groupby_sort_9-9-feec69facdc973a0ff78455f766845c
@@ -2,4 +2,4 @@
 2	2
 3	2
 7	2
-8	4
\ No newline at end of file
+8	4
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-1-ffe97dc8c1df3195982e38263fbe8717
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-1-ffe97dc8c1df3195982e38263fbe8717
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-11-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-11-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-11-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-15-bbdd53118f788d7bb679d094c41243c8 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-15-bbdd53118f788d7bb679d094c41243c8
deleted file mode 100644
index 10f4a1f5ff34c..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-15-bbdd53118f788d7bb679d094c41243c8
+++ /dev/null
@@ -1,6 +0,0 @@
-1	11	1
-2	12	1
-3	13	1
-7	17	1
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-18-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-18-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-18-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-2-fc30020d09151dc29be807795ad9475e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-2-fc30020d09151dc29be807795ad9475e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-21-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-21-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-21-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-25-d53196339980a00a619788bd799a32e7 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-25-d53196339980a00a619788bd799a32e7
deleted file mode 100644
index c1cc4ee204773..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-25-d53196339980a00a619788bd799a32e7
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1	1
-1	2	1
-1	3	1
-1	7	1
-1	8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-29-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-29-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-29-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-3-fffea659b633b1f269b38556a7f54634
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-3-fffea659b633b1f269b38556a7f54634
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-32-d53196339980a00a619788bd799a32e7 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-32-d53196339980a00a619788bd799a32e7
deleted file mode 100644
index f0192040e147b..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-32-d53196339980a00a619788bd799a32e7
+++ /dev/null
@@ -1,5 +0,0 @@
-1	2	1
-2	3	1
-3	4	1
-7	8	1
-8	9	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-35-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-35-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index b6c2eb98e5e49..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-35-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-2	1
-4	1
-6	1
-14	1
-16	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-38-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-38-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 8e7ee8a2b47bb..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-38-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1
-1	1
-2	1
-2	1
-3	1
-3	1
-7	1
-7	1
-8	2
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-4-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-4-c67a488530dc7e20a9e7acf02c14380f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-4-c67a488530dc7e20a9e7acf02c14380f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-41-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-41-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 0b2b54cd94e4f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-41-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1
-2	1
-2	1
-3	1
-4	1
-6	1
-7	1
-8	2
-14	1
-16	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-44-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-44-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 3d4708b7c9d64..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-44-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	2
-2	2
-3	2
-7	2
-8	4
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-50-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-50-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-50-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-53-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-53-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-53-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-57-5373c5449884d95bc7db9dab55239a49 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-57-5373c5449884d95bc7db9dab55239a49
deleted file mode 100644
index 7ca6b0b28a960..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-57-5373c5449884d95bc7db9dab55239a49
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	2	1
-2	1	12	2	1
-3	1	13	2	1
-7	1	17	2	1
-8	1	18	2	1
-8	1	28	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-60-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-60-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-60-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-63-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-63-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 58e16ef3c0ef3..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-63-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	2	11	1
-2	2	12	1
-3	2	13	1
-7	2	17	1
-8	2	18	1
-8	2	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-64-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-64-dbcec232623048c7748b708123e18bf0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-64-dbcec232623048c7748b708123e18bf0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-65-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-65-7f98b724df05f51b3ec1f087a8da414e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-65-7f98b724df05f51b3ec1f087a8da414e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-66-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-66-83c59d378571a6e487aa20217bd87817
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-66-83c59d378571a6e487aa20217bd87817
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-69-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-69-9d01ff3d1fde3ed2ab55ea9d7079fd5c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-69-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-70-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-70-ed76c0068780120a6f23feefee303403
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-70-ed76c0068780120a6f23feefee303403
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-73-d2fa5e7bdd6b7934d10d5905cacd5715 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-73-d2fa5e7bdd6b7934d10d5905cacd5715
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-73-d2fa5e7bdd6b7934d10d5905cacd5715
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-74-6296dde4e71acf7e7f42ee58cf3b5acd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-74-6296dde4e71acf7e7f42ee58cf3b5acd
deleted file mode 100644
index 10f4a1f5ff34c..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-74-6296dde4e71acf7e7f42ee58cf3b5acd
+++ /dev/null
@@ -1,6 +0,0 @@
-1	11	1
-2	12	1
-3	13	1
-7	17	1
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-77-d2fa5e7bdd6b7934d10d5905cacd5715 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-77-d2fa5e7bdd6b7934d10d5905cacd5715
deleted file mode 100644
index d15db8c5d079f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-77-d2fa5e7bdd6b7934d10d5905cacd5715
+++ /dev/null
@@ -1 +0,0 @@
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-78-6d6eac558569563cc47a657daffa8ba7 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-78-6d6eac558569563cc47a657daffa8ba7
deleted file mode 100644
index 3d1609d961673..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-78-6d6eac558569563cc47a657daffa8ba7
+++ /dev/null
@@ -1,2 +0,0 @@
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_test_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_test_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_test_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_test_1-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_test_1-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_test_1-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_test_1-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_test_1-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_test_1-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_test_1-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_test_1-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_test_1-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-4-8e06b51e940e956f14a8c7679c3d423a b/sql/hive/src/test/resources/golden/groupby_sort_test_1-4-8e06b51e940e956f14a8c7679c3d423a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_test_1-4-8e06b51e940e956f14a8c7679c3d423a
+++ b/sql/hive/src/test/resources/golden/groupby_sort_test_1-4-8e06b51e940e956f14a8c7679c3d423a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/groupby_sort_test_1-6-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/groupby_sort_test_1-6-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/hash-0-a658b129316d666d4b01c1581eed1c1f b/sql/hive/src/test/resources/golden/hash-0-a658b129316d666d4b01c1581eed1c1f
index a99cee758fe57..3cb614bdd84e8 100644
--- a/sql/hive/src/test/resources/golden/hash-0-a658b129316d666d4b01c1581eed1c1f
+++ b/sql/hive/src/test/resources/golden/hash-0-a658b129316d666d4b01c1581eed1c1f
@@ -1 +1 @@
-3556498
\ No newline at end of file
+3556498
diff --git a/sql/hive/src/test/resources/golden/having no references-0-d2de3ba23759d25ef77cdfbab72cbb63 b/sql/hive/src/test/resources/golden/having no references-0-d2de3ba23759d25ef77cdfbab72cbb63
new file mode 100644
index 0000000000000..3f2cab688ccc2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having no references-0-d2de3ba23759d25ef77cdfbab72cbb63	
@@ -0,0 +1,136 @@
+0
+5
+12
+15
+18
+24
+26
+35
+37
+42
+51
+58
+67
+70
+72
+76
+83
+84
+90
+95
+97
+98
+100
+103
+104
+113
+118
+119
+120
+125
+128
+129
+134
+137
+138
+146
+149
+152
+164
+165
+167
+169
+172
+174
+175
+176
+179
+187
+191
+193
+195
+197
+199
+200
+203
+205
+207
+208
+209
+213
+216
+217
+219
+221
+223
+224
+229
+230
+233
+237
+238
+239
+242
+255
+256
+265
+272
+273
+277
+278
+280
+281
+282
+288
+298
+307
+309
+311
+316
+317
+318
+321
+322
+325
+327
+331
+333
+342
+344
+348
+353
+367
+369
+382
+384
+395
+396
+397
+399
+401
+403
+404
+406
+409
+413
+414
+417
+424
+429
+430
+431
+438
+439
+454
+458
+459
+462
+463
+466
+468
+469
+478
+480
+489
+492
+498
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/having-0-57f3f26c0203c29c2a91a7cca557ce55
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/having-0-57f3f26c0203c29c2a91a7cca557ce55
diff --git a/sql/hive/src/test/resources/golden/having-1-ef81808faeab6d212c3cf32abfc0d873 b/sql/hive/src/test/resources/golden/having-1-ef81808faeab6d212c3cf32abfc0d873
new file mode 100644
index 0000000000000..704f1e62f14c5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-1-ef81808faeab6d212c3cf32abfc0d873
@@ -0,0 +1,10 @@
+4
+4
+5
+4
+5
+5
+4
+4
+5
+4
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/having-2-a2b4f52cb92f730ddb912b063636d6c1
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/having-2-a2b4f52cb92f730ddb912b063636d6c1
diff --git a/sql/hive/src/test/resources/golden/having-3-3fa6387b6a4ece110ac340c7b893964e b/sql/hive/src/test/resources/golden/having-3-3fa6387b6a4ece110ac340c7b893964e
new file mode 100644
index 0000000000000..b56757a60f780
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-3-3fa6387b6a4ece110ac340c7b893964e
@@ -0,0 +1,308 @@
+0	val_0
+2	val_2
+4	val_4
+5	val_5
+8	val_8
+9	val_9
+10	val_10
+11	val_11
+12	val_12
+15	val_15
+17	val_17
+18	val_18
+19	val_19
+20	val_20
+24	val_24
+26	val_26
+27	val_27
+28	val_28
+30	val_30
+33	val_33
+34	val_34
+35	val_35
+37	val_37
+41	val_41
+42	val_42
+43	val_43
+44	val_44
+47	val_47
+51	val_51
+53	val_53
+54	val_54
+57	val_57
+58	val_58
+64	val_64
+65	val_65
+66	val_66
+67	val_67
+69	val_69
+70	val_70
+72	val_72
+74	val_74
+76	val_76
+77	val_77
+78	val_78
+80	val_80
+82	val_82
+83	val_83
+84	val_84
+85	val_85
+86	val_86
+87	val_87
+90	val_90
+92	val_92
+95	val_95
+96	val_96
+97	val_97
+98	val_98
+100	val_100
+103	val_103
+104	val_104
+105	val_105
+111	val_111
+113	val_113
+114	val_114
+116	val_116
+118	val_118
+119	val_119
+120	val_120
+125	val_125
+126	val_126
+128	val_128
+129	val_129
+131	val_131
+133	val_133
+134	val_134
+136	val_136
+137	val_137
+138	val_138
+143	val_143
+145	val_145
+146	val_146
+149	val_149
+150	val_150
+152	val_152
+153	val_153
+155	val_155
+156	val_156
+157	val_157
+158	val_158
+160	val_160
+162	val_162
+163	val_163
+164	val_164
+165	val_165
+166	val_166
+167	val_167
+168	val_168
+169	val_169
+170	val_170
+172	val_172
+174	val_174
+175	val_175
+176	val_176
+177	val_177
+178	val_178
+179	val_179
+180	val_180
+181	val_181
+183	val_183
+186	val_186
+187	val_187
+189	val_189
+190	val_190
+191	val_191
+192	val_192
+193	val_193
+194	val_194
+195	val_195
+196	val_196
+197	val_197
+199	val_199
+200	val_200
+201	val_201
+202	val_202
+203	val_203
+205	val_205
+207	val_207
+208	val_208
+209	val_209
+213	val_213
+214	val_214
+216	val_216
+217	val_217
+218	val_218
+219	val_219
+221	val_221
+222	val_222
+223	val_223
+224	val_224
+226	val_226
+228	val_228
+229	val_229
+230	val_230
+233	val_233
+235	val_235
+237	val_237
+238	val_238
+239	val_239
+241	val_241
+242	val_242
+244	val_244
+247	val_247
+248	val_248
+249	val_249
+252	val_252
+255	val_255
+256	val_256
+257	val_257
+258	val_258
+260	val_260
+262	val_262
+263	val_263
+265	val_265
+266	val_266
+272	val_272
+273	val_273
+274	val_274
+275	val_275
+277	val_277
+278	val_278
+280	val_280
+281	val_281
+282	val_282
+283	val_283
+284	val_284
+285	val_285
+286	val_286
+287	val_287
+288	val_288
+289	val_289
+291	val_291
+292	val_292
+296	val_296
+298	val_298
+305	val_305
+306	val_306
+307	val_307
+308	val_308
+309	val_309
+310	val_310
+311	val_311
+315	val_315
+316	val_316
+317	val_317
+318	val_318
+321	val_321
+322	val_322
+323	val_323
+325	val_325
+327	val_327
+331	val_331
+332	val_332
+333	val_333
+335	val_335
+336	val_336
+338	val_338
+339	val_339
+341	val_341
+342	val_342
+344	val_344
+345	val_345
+348	val_348
+351	val_351
+353	val_353
+356	val_356
+360	val_360
+362	val_362
+364	val_364
+365	val_365
+366	val_366
+367	val_367
+368	val_368
+369	val_369
+373	val_373
+374	val_374
+375	val_375
+377	val_377
+378	val_378
+379	val_379
+382	val_382
+384	val_384
+386	val_386
+389	val_389
+392	val_392
+393	val_393
+394	val_394
+395	val_395
+396	val_396
+397	val_397
+399	val_399
+400	val_400
+401	val_401
+402	val_402
+403	val_403
+404	val_404
+406	val_406
+407	val_407
+409	val_409
+411	val_411
+413	val_413
+414	val_414
+417	val_417
+418	val_418
+419	val_419
+421	val_421
+424	val_424
+427	val_427
+429	val_429
+430	val_430
+431	val_431
+432	val_432
+435	val_435
+436	val_436
+437	val_437
+438	val_438
+439	val_439
+443	val_443
+444	val_444
+446	val_446
+448	val_448
+449	val_449
+452	val_452
+453	val_453
+454	val_454
+455	val_455
+457	val_457
+458	val_458
+459	val_459
+460	val_460
+462	val_462
+463	val_463
+466	val_466
+467	val_467
+468	val_468
+469	val_469
+470	val_470
+472	val_472
+475	val_475
+477	val_477
+478	val_478
+479	val_479
+480	val_480
+481	val_481
+482	val_482
+483	val_483
+484	val_484
+485	val_485
+487	val_487
+489	val_489
+490	val_490
+491	val_491
+492	val_492
+493	val_493
+494	val_494
+495	val_495
+496	val_496
+497	val_497
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/having-4-e9918bd385cb35db4ebcbd4e398547f4
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/having-4-e9918bd385cb35db4ebcbd4e398547f4
diff --git a/sql/hive/src/test/resources/golden/having-5-4a0c4e521b8a6f6146151c13a2715ff b/sql/hive/src/test/resources/golden/having-5-4a0c4e521b8a6f6146151c13a2715ff
new file mode 100644
index 0000000000000..2d7022e386303
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-5-4a0c4e521b8a6f6146151c13a2715ff
@@ -0,0 +1,199 @@
+4
+5
+8
+9
+26
+27
+28
+30
+33
+34
+35
+37
+41
+42
+43
+44
+47
+51
+53
+54
+57
+58
+64
+65
+66
+67
+69
+70
+72
+74
+76
+77
+78
+80
+82
+83
+84
+85
+86
+87
+90
+92
+95
+96
+97
+98
+256
+257
+258
+260
+262
+263
+265
+266
+272
+273
+274
+275
+277
+278
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+291
+292
+296
+298
+302
+305
+306
+307
+308
+309
+310
+311
+315
+316
+317
+318
+321
+322
+323
+325
+327
+331
+332
+333
+335
+336
+338
+339
+341
+342
+344
+345
+348
+351
+353
+356
+360
+362
+364
+365
+366
+367
+368
+369
+373
+374
+375
+377
+378
+379
+382
+384
+386
+389
+392
+393
+394
+395
+396
+397
+399
+400
+401
+402
+403
+404
+406
+407
+409
+411
+413
+414
+417
+418
+419
+421
+424
+427
+429
+430
+431
+432
+435
+436
+437
+438
+439
+443
+444
+446
+448
+449
+452
+453
+454
+455
+457
+458
+459
+460
+462
+463
+466
+467
+468
+469
+470
+472
+475
+477
+478
+479
+480
+481
+482
+483
+484
+485
+487
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/having-6-9f50df5b5f31c7166b0396ab434dc095
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/having-6-9f50df5b5f31c7166b0396ab434dc095
diff --git a/sql/hive/src/test/resources/golden/having-7-5ad96cb287df02080da1e2594f08d83e b/sql/hive/src/test/resources/golden/having-7-5ad96cb287df02080da1e2594f08d83e
new file mode 100644
index 0000000000000..bd545ccf7430c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-7-5ad96cb287df02080da1e2594f08d83e
@@ -0,0 +1,125 @@
+302
+305
+306
+307
+308
+309
+310
+311
+315
+316
+317
+318
+321
+322
+323
+325
+327
+331
+332
+333
+335
+336
+338
+339
+341
+342
+344
+345
+348
+351
+353
+356
+360
+362
+364
+365
+366
+367
+368
+369
+373
+374
+375
+377
+378
+379
+382
+384
+386
+389
+392
+393
+394
+395
+396
+397
+399
+400
+401
+402
+403
+404
+406
+407
+409
+411
+413
+414
+417
+418
+419
+421
+424
+427
+429
+430
+431
+432
+435
+436
+437
+438
+439
+443
+444
+446
+448
+449
+452
+453
+454
+455
+457
+458
+459
+460
+462
+463
+466
+467
+468
+469
+470
+472
+475
+477
+478
+479
+480
+481
+482
+483
+484
+485
+487
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/having-8-4aa7197e20b5a64461ca670a79488103
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/having-8-4aa7197e20b5a64461ca670a79488103
diff --git a/sql/hive/src/test/resources/golden/having-9-a79743372d86d77b0ff53a71adcb1cff b/sql/hive/src/test/resources/golden/having-9-a79743372d86d77b0ff53a71adcb1cff
new file mode 100644
index 0000000000000..d77586c12b6af
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-9-a79743372d86d77b0ff53a71adcb1cff
@@ -0,0 +1,199 @@
+4	val_4
+5	val_5
+8	val_8
+9	val_9
+26	val_26
+27	val_27
+28	val_28
+30	val_30
+33	val_33
+34	val_34
+35	val_35
+37	val_37
+41	val_41
+42	val_42
+43	val_43
+44	val_44
+47	val_47
+51	val_51
+53	val_53
+54	val_54
+57	val_57
+58	val_58
+64	val_64
+65	val_65
+66	val_66
+67	val_67
+69	val_69
+70	val_70
+72	val_72
+74	val_74
+76	val_76
+77	val_77
+78	val_78
+80	val_80
+82	val_82
+83	val_83
+84	val_84
+85	val_85
+86	val_86
+87	val_87
+90	val_90
+92	val_92
+95	val_95
+96	val_96
+97	val_97
+98	val_98
+256	val_256
+257	val_257
+258	val_258
+260	val_260
+262	val_262
+263	val_263
+265	val_265
+266	val_266
+272	val_272
+273	val_273
+274	val_274
+275	val_275
+277	val_277
+278	val_278
+280	val_280
+281	val_281
+282	val_282
+283	val_283
+284	val_284
+285	val_285
+286	val_286
+287	val_287
+288	val_288
+289	val_289
+291	val_291
+292	val_292
+296	val_296
+298	val_298
+302	val_302
+305	val_305
+306	val_306
+307	val_307
+308	val_308
+309	val_309
+310	val_310
+311	val_311
+315	val_315
+316	val_316
+317	val_317
+318	val_318
+321	val_321
+322	val_322
+323	val_323
+325	val_325
+327	val_327
+331	val_331
+332	val_332
+333	val_333
+335	val_335
+336	val_336
+338	val_338
+339	val_339
+341	val_341
+342	val_342
+344	val_344
+345	val_345
+348	val_348
+351	val_351
+353	val_353
+356	val_356
+360	val_360
+362	val_362
+364	val_364
+365	val_365
+366	val_366
+367	val_367
+368	val_368
+369	val_369
+373	val_373
+374	val_374
+375	val_375
+377	val_377
+378	val_378
+379	val_379
+382	val_382
+384	val_384
+386	val_386
+389	val_389
+392	val_392
+393	val_393
+394	val_394
+395	val_395
+396	val_396
+397	val_397
+399	val_399
+400	val_400
+401	val_401
+402	val_402
+403	val_403
+404	val_404
+406	val_406
+407	val_407
+409	val_409
+411	val_411
+413	val_413
+414	val_414
+417	val_417
+418	val_418
+419	val_419
+421	val_421
+424	val_424
+427	val_427
+429	val_429
+430	val_430
+431	val_431
+432	val_432
+435	val_435
+436	val_436
+437	val_437
+438	val_438
+439	val_439
+443	val_443
+444	val_444
+446	val_446
+448	val_448
+449	val_449
+452	val_452
+453	val_453
+454	val_454
+455	val_455
+457	val_457
+458	val_458
+459	val_459
+460	val_460
+462	val_462
+463	val_463
+466	val_466
+467	val_467
+468	val_468
+469	val_469
+470	val_470
+472	val_472
+475	val_475
+477	val_477
+478	val_478
+479	val_479
+480	val_480
+481	val_481
+482	val_482
+483	val_483
+484	val_484
+485	val_485
+487	val_487
+489	val_489
+490	val_490
+491	val_491
+492	val_492
+493	val_493
+494	val_494
+495	val_495
+496	val_496
+497	val_497
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/index_creation-10-4bd6c86cd3f6a94964b3d13235e8e261 b/sql/hive/src/test/resources/golden/index_creation-10-4bd6c86cd3f6a94964b3d13235e8e261
deleted file mode 100644
index d7c6f236687d8..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-10-4bd6c86cd3f6a94964b3d13235e8e261
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_2__, dbName:default, owner:null, createTime:1389344545, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_2__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389344545}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-12-9cc02e06c6051810c50e225bb2c66669 b/sql/hive/src/test/resources/golden/index_creation-12-9cc02e06c6051810c50e225bb2c66669
deleted file mode 100644
index 4c6ec0ba34bb8..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-12-9cc02e06c6051810c50e225bb2c66669
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:src_idx_src_index_3, dbName:default, owner:null, createTime:1389344545, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/src_idx_src_index_3, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389344545}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-14-a8c1ed9913d9fbcd1b3299a2f8bb2165 b/sql/hive/src/test/resources/golden/index_creation-14-a8c1ed9913d9fbcd1b3299a2f8bb2165
deleted file mode 100644
index 0c6af94247b85..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-14-a8c1ed9913d9fbcd1b3299a2f8bb2165
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_4__, dbName:default, owner:null, createTime:1389344545, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_4__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-16-ffa6d3fcef97b7322dd3759d4a70881d b/sql/hive/src/test/resources/golden/index_creation-16-ffa6d3fcef97b7322dd3759d4a70881d
deleted file mode 100644
index e4a5816e6f1cc..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-16-ffa6d3fcef97b7322dd3759d4a70881d
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_5__, dbName:default, owner:null, createTime:1389344546, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_5__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{escape.delim=\, serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-18-bcdb19db031341c4a50264ccf49328e4 b/sql/hive/src/test/resources/golden/index_creation-18-bcdb19db031341c4a50264ccf49328e4
deleted file mode 100644
index 3d8751ca47049..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-18-bcdb19db031341c4a50264ccf49328e4
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	from deserializer   
-_bucketname         	string              	from deserializer   
-_offsets            	array<bigint>       	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_6__, dbName:default, owner:null, createTime:1389344546, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_6__, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389344546}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-20-68c5c98581c683b17ceaf1c0fd192871 b/sql/hive/src/test/resources/golden/index_creation-20-68c5c98581c683b17ceaf1c0fd192871
deleted file mode 100644
index 0e4852b319052..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-20-68c5c98581c683b17ceaf1c0fd192871
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	from deserializer   
-_bucketname         	string              	from deserializer   
-_offsets            	array<bigint>       	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:src_idx_src_index_7, dbName:default, owner:null, createTime:1389344546, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/src_idx_src_index_7, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389344546}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-22-f36cb2eed39691ca949b25182e2dd31 b/sql/hive/src/test/resources/golden/index_creation-22-f36cb2eed39691ca949b25182e2dd31
deleted file mode 100644
index 41a5492fc5331..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-22-f36cb2eed39691ca949b25182e2dd31
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_8__, dbName:default, owner:null, createTime:1389344546, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_8__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389344546}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-24-8cb102bd5addf7fece0e2691468bc3bf b/sql/hive/src/test/resources/golden/index_creation-24-8cb102bd5addf7fece0e2691468bc3bf
deleted file mode 100644
index 8212bf8b7d2fb..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-24-8cb102bd5addf7fece0e2691468bc3bf
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_9__, dbName:default, owner:null, createTime:1389344546, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_9__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{prop2=val2, prop1=val1, transient_lastDdlTime=1389344546}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-39-489b4ceb2f4301a7132628303f99240d b/sql/hive/src/test/resources/golden/index_creation-39-489b4ceb2f4301a7132628303f99240d
deleted file mode 100644
index e8310385c56dc..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-39-489b4ceb2f4301a7132628303f99240d
+++ /dev/null
@@ -1 +0,0 @@
-src
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/index_serde-0-db64b724719d27c7f0db4f51f5c4edaa
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/index_serde-0-db64b724719d27c7f0db4f51f5c4edaa
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/index_serde-1-6560d12b69d55e5297a145ebc4bb0cb3
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/index_serde-1-6560d12b69d55e5297a145ebc4bb0cb3
diff --git a/sql/hive/src/test/resources/golden/index_serde-1-f92d6c66d21791c11d2a822df04c1b63 b/sql/hive/src/test/resources/golden/index_serde-1-f92d6c66d21791c11d2a822df04c1b63
deleted file mode 100644
index 48522980f81a8..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-1-f92d6c66d21791c11d2a822df04c1b63
+++ /dev/null
@@ -1,3 +0,0 @@
-number              	int                 	from deserializer   
-first_name          	string              	from deserializer   
-last_name           	string              	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_serde-10-123301a057d4a46072d0431e00e20c4b b/sql/hive/src/test/resources/golden/index_serde-10-123301a057d4a46072d0431e00e20c4b
deleted file mode 100644
index 4ed570f9070eb..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-10-123301a057d4a46072d0431e00e20c4b
+++ /dev/null
@@ -1,5 +0,0 @@
-7	Sylvester	McCoy
-8	Paul	McGann
-9	Christopher	Eccleston
-10	David	Tennant
-11	Matt	Smith
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/index_serde-10-c85e061ea9c5b90ca69b7450faad14b6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/index_serde-10-c85e061ea9c5b90ca69b7450faad14b6
diff --git a/sql/hive/src/test/resources/golden/index_serde-11-123301a057d4a46072d0431e00e20c4b b/sql/hive/src/test/resources/golden/index_serde-11-123301a057d4a46072d0431e00e20c4b
new file mode 100644
index 0000000000000..63d56733b58b0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/index_serde-11-123301a057d4a46072d0431e00e20c4b
@@ -0,0 +1,5 @@
+7	Sylvester	McCoy
+8	Paul	McGann
+9	Christopher	Eccleston
+10	David	Tennant
+11	Matt	Smith
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/index_serde-12-309e916d683a1a12ab62565697cb0046
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/index_serde-12-309e916d683a1a12ab62565697cb0046
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/index_serde-13-d590fd7cb9d433143de490d75686dd4
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/index_serde-13-d590fd7cb9d433143de490d75686dd4
diff --git a/sql/hive/src/test/resources/golden/index_serde-2-f92d6c66d21791c11d2a822df04c1b63 b/sql/hive/src/test/resources/golden/index_serde-2-f92d6c66d21791c11d2a822df04c1b63
new file mode 100644
index 0000000000000..e716294e919d1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/index_serde-2-f92d6c66d21791c11d2a822df04c1b63
@@ -0,0 +1,3 @@
+number              	int                 	from deserializer   
+first_name          	string              	from deserializer   
+last_name           	string              	from deserializer   
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/index_serde-3-ebab588c84a7a29f03b41dcd98132229
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/index_serde-3-ebab588c84a7a29f03b41dcd98132229
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/index_serde-4-afcf2a156ccd4f79a0489b4593908d79
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/index_serde-4-afcf2a156ccd4f79a0489b4593908d79
diff --git a/sql/hive/src/test/resources/golden/index_serde-4-d7547751c37375a9238043bbe250e716 b/sql/hive/src/test/resources/golden/index_serde-4-d7547751c37375a9238043bbe250e716
deleted file mode 100644
index d2ca633d0ae01..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-4-d7547751c37375a9238043bbe250e716
+++ /dev/null
@@ -1,5 +0,0 @@
-number              	int                 	from deserializer   
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__doctors_doctors_index__, dbName:default, owner:null, createTime:1389729651, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:number, type:int, comment:from deserializer), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/default__doctors_doctors_index__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:number, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389729651}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_serde-5-d7547751c37375a9238043bbe250e716 b/sql/hive/src/test/resources/golden/index_serde-5-d7547751c37375a9238043bbe250e716
new file mode 100644
index 0000000000000..c344129fb8f69
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/index_serde-5-d7547751c37375a9238043bbe250e716
@@ -0,0 +1,5 @@
+number              	int                 	from deserializer   
+_bucketname         	string              	                    
+_offsets            	array<bigint>       	                    
+	 	 
+Detailed Table Information	Table(tableName:default__doctors_doctors_index__, dbName:default, owner:marmbrus, createTime:1414101838, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:number, type:int, comment:from deserializer), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1438070583820061187/default__doctors_doctors_index__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:number, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1414101838}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/index_serde-6-c9d7dcde469d3b9a66965a64dd15e4ae b/sql/hive/src/test/resources/golden/index_serde-6-c9d7dcde469d3b9a66965a64dd15e4ae
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-6-c9d7dcde469d3b9a66965a64dd15e4ae
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/index_serde-6-e6ff4b23b7f102e359afb4d53a1dedc3
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/index_serde-6-e6ff4b23b7f102e359afb4d53a1dedc3
diff --git a/sql/hive/src/test/resources/golden/index_serde-7-3b03210f94ec40db9ab02620645014d1 b/sql/hive/src/test/resources/golden/index_serde-7-3b03210f94ec40db9ab02620645014d1
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-7-3b03210f94ec40db9ab02620645014d1
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/index_serde-7-c9d7dcde469d3b9a66965a64dd15e4ae
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/index_serde-7-c9d7dcde469d3b9a66965a64dd15e4ae
diff --git a/sql/hive/src/test/resources/golden/index_serde-8-35f48c7d6fa164bb84643657bc9280a8 b/sql/hive/src/test/resources/golden/index_serde-8-35f48c7d6fa164bb84643657bc9280a8
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-8-35f48c7d6fa164bb84643657bc9280a8
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/index_serde-8-3b03210f94ec40db9ab02620645014d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/index_serde-8-3b03210f94ec40db9ab02620645014d1
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/index_serde-9-35f48c7d6fa164bb84643657bc9280a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/index_serde-9-35f48c7d6fa164bb84643657bc9280a8
diff --git a/sql/hive/src/test/resources/golden/innerjoin-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/innerjoin-0-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/innerjoin-0-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/innerjoin-0-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/innerjoin-4-35c7611180562dcd9dab834f41654095 b/sql/hive/src/test/resources/golden/innerjoin-4-35c7611180562dcd9dab834f41654095
index 3b7cf42f96358..821c3c8c89252 100644
--- a/sql/hive/src/test/resources/golden/innerjoin-4-35c7611180562dcd9dab834f41654095
+++ b/sql/hive/src/test/resources/golden/innerjoin-4-35c7611180562dcd9dab834f41654095
@@ -1025,4 +1025,4 @@
 498	val_498
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/inoutdriver-1-b2f337566a5075f3e3e81335008d95d3 b/sql/hive/src/test/resources/golden/inoutdriver-1-b2f337566a5075f3e3e81335008d95d3
index 010e999c36749..e8a910f80f457 100644
--- a/sql/hive/src/test/resources/golden/inoutdriver-1-b2f337566a5075f3e3e81335008d95d3
+++ b/sql/hive/src/test/resources/golden/inoutdriver-1-b2f337566a5075f3e3e81335008d95d3
@@ -1,3 +1,3 @@
-a                   	int                 	None                
+a                   	int                 	                    
 	 	 
-Detailed Table Information	Table(tableName:test, dbName:default, owner:marmbrus, createTime:1389729862, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389729862}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:test, dbName:default, owner:marmbrus, createTime:1413881850, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413881850}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/input-1-6558e385bb08991302a72076dd7b7ff5 b/sql/hive/src/test/resources/golden/input-1-6558e385bb08991302a72076dd7b7ff5
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/input-1-6558e385bb08991302a72076dd7b7ff5
+++ b/sql/hive/src/test/resources/golden/input-1-6558e385bb08991302a72076dd7b7ff5
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/input0-1-efefd4364cd2790447fb0f908e87501f b/sql/hive/src/test/resources/golden/input0-1-efefd4364cd2790447fb0f908e87501f
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/input0-1-efefd4364cd2790447fb0f908e87501f
+++ b/sql/hive/src/test/resources/golden/input0-1-efefd4364cd2790447fb0f908e87501f
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/input1-1-8aaad4ee49c9bdf7b34642cc75f6a1a0 b/sql/hive/src/test/resources/golden/input1-1-8aaad4ee49c9bdf7b34642cc75f6a1a0
index e00bf4ec2c7e5..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input1-1-8aaad4ee49c9bdf7b34642cc75f6a1a0
+++ b/sql/hive/src/test/resources/golden/input1-1-8aaad4ee49c9bdf7b34642cc75f6a1a0
@@ -1,17 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_DESCTABLE (TOK_TABTYPE TEST1))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Describe Table Operator:
-        Describe Table
-          table: TEST1
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145 b/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145
index 743be67e8d1c3..d3ffb995aff4b 100644
--- a/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145
+++ b/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145
@@ -1,2 +1,2 @@
-a                   	int                 	None                
-b                   	double              	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input10-1-6970b6d2d451612b59fccbfd7ec68f74 b/sql/hive/src/test/resources/golden/input10-1-6970b6d2d451612b59fccbfd7ec68f74
index 175d371fd09c9..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input10-1-6970b6d2d451612b59fccbfd7ec68f74
+++ b/sql/hive/src/test/resources/golden/input10-1-6970b6d2d451612b59fccbfd7ec68f74
@@ -1,17 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_DESCTABLE (TOK_TABTYPE TEST10))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Describe Table Operator:
-        Describe Table
-          table: TEST10
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/input10-2-73f00da5cfc254745d1d80f913eb6449 b/sql/hive/src/test/resources/golden/input10-2-73f00da5cfc254745d1d80f913eb6449
index 8dcdf43e31be3..4cb356c235573 100644
--- a/sql/hive/src/test/resources/golden/input10-2-73f00da5cfc254745d1d80f913eb6449
+++ b/sql/hive/src/test/resources/golden/input10-2-73f00da5cfc254745d1d80f913eb6449
@@ -1,10 +1,10 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
\ No newline at end of file
+ds                  	string              	                    
+hr                  	string              	                    
diff --git a/sql/hive/src/test/resources/golden/input11-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input11-3-adc1ec67836b26b60d8547c4996bfd8f
index 37dd922fd0787..46057aa0a8fca 100644
--- a/sql/hive/src/test/resources/golden/input11-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input11-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -81,4 +81,4 @@
 28	val_28
 37	val_37
 90	val_90
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/input11_limit-3-8a0c68a4f7386ff214db5d9eed0876d5 b/sql/hive/src/test/resources/golden/input11_limit-3-8a0c68a4f7386ff214db5d9eed0876d5
index fdf93911ee847..d8bd3b13b83f1 100644
--- a/sql/hive/src/test/resources/golden/input11_limit-3-8a0c68a4f7386ff214db5d9eed0876d5
+++ b/sql/hive/src/test/resources/golden/input11_limit-3-8a0c68a4f7386ff214db5d9eed0876d5
@@ -7,4 +7,4 @@
 66	val_66
 82	val_82
 86	val_86
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/input12-0-9b141c1e5917ca82c6bc36a9a2950a1e b/sql/hive/src/test/resources/golden/input12-0-9b141c1e5917ca82c6bc36a9a2950a1e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input12-0-9b141c1e5917ca82c6bc36a9a2950a1e
+++ b/sql/hive/src/test/resources/golden/input12-0-9b141c1e5917ca82c6bc36a9a2950a1e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input12-1-2b9ccaa793eae0e73bf76335d3d6880 b/sql/hive/src/test/resources/golden/input12-1-2b9ccaa793eae0e73bf76335d3d6880
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input12-1-2b9ccaa793eae0e73bf76335d3d6880
+++ b/sql/hive/src/test/resources/golden/input12-1-2b9ccaa793eae0e73bf76335d3d6880
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input12-10-4d9eb316259a8e7ed6627bc27a639f7c b/sql/hive/src/test/resources/golden/input12-10-4d9eb316259a8e7ed6627bc27a639f7c
index 4220cf5f30392..a66a07386eef8 100644
--- a/sql/hive/src/test/resources/golden/input12-10-4d9eb316259a8e7ed6627bc27a639f7c
+++ b/sql/hive/src/test/resources/golden/input12-10-4d9eb316259a8e7ed6627bc27a639f7c
@@ -308,4 +308,4 @@
 222	2008-04-08	12
 403	2008-04-08	12
 400	2008-04-08	12
-200	2008-04-08	12
\ No newline at end of file
+200	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/input12-2-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/input12-2-bab89dfffa77258e34a595e0e79986e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input12-2-bab89dfffa77258e34a595e0e79986e3
+++ b/sql/hive/src/test/resources/golden/input12-2-bab89dfffa77258e34a595e0e79986e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input12-8-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input12-8-adc1ec67836b26b60d8547c4996bfd8f
index 37dd922fd0787..46057aa0a8fca 100644
--- a/sql/hive/src/test/resources/golden/input12-8-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input12-8-adc1ec67836b26b60d8547c4996bfd8f
@@ -81,4 +81,4 @@
 28	val_28
 37	val_37
 90	val_90
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/input12-9-3d08dc27c1a133c2497fc554c0d169bd b/sql/hive/src/test/resources/golden/input12-9-3d08dc27c1a133c2497fc554c0d169bd
index d6e0c29932b9b..9ee31317478d5 100644
--- a/sql/hive/src/test/resources/golden/input12-9-3d08dc27c1a133c2497fc554c0d169bd
+++ b/sql/hive/src/test/resources/golden/input12-9-3d08dc27c1a133c2497fc554c0d169bd
@@ -102,4 +102,4 @@
 152	val_152
 194	val_194
 126	val_126
-169	val_169
\ No newline at end of file
+169	val_169
diff --git a/sql/hive/src/test/resources/golden/input14-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input14-3-adc1ec67836b26b60d8547c4996bfd8f
index 703a4eef24f3f..0190981db84ed 100644
--- a/sql/hive/src/test/resources/golden/input14-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input14-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -81,4 +81,4 @@
 97	val_97
 97	val_97
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/input14_limit-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input14_limit-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 4335dce6a9929..0000000000000
--- a/sql/hive/src/test/resources/golden/input14_limit-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,5 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-10	val_10
-11	val_11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input15-2-ae5e0fbdf88ecca2c7d67df1ad141919 b/sql/hive/src/test/resources/golden/input15-2-ae5e0fbdf88ecca2c7d67df1ad141919
index ded361eb294f0..90f9bd0430a4c 100644
--- a/sql/hive/src/test/resources/golden/input15-2-ae5e0fbdf88ecca2c7d67df1ad141919
+++ b/sql/hive/src/test/resources/golden/input15-2-ae5e0fbdf88ecca2c7d67df1ad141919
@@ -1,2 +1,2 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/input17-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input17-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 1c48b3680a3ac..0000000000000
--- a/sql/hive/src/test/resources/golden/input17-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,11 +0,0 @@
-NULL	NULL
--1461153966	{"myint":49,"mystring":"343","underscore_int":7}
--1952710705	{"myint":25,"mystring":"125","underscore_int":5}
--734328905	{"myint":16,"mystring":"64","underscore_int":4}
--751827636	{"myint":4,"mystring":"8","underscore_int":2}
-1244525196	{"myint":36,"mystring":"216","underscore_int":6}
-1638581586	{"myint":64,"mystring":"512","underscore_int":8}
-1712634731	{"myint":0,"mystring":"0","underscore_int":0}
-336964422	{"myint":81,"mystring":"729","underscore_int":9}
-465985201	{"myint":1,"mystring":"1","underscore_int":1}
-477111225	{"myint":9,"mystring":"27","underscore_int":3}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/input19-1-f32df514de8156b5f5b435eea2c9be40
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/input19-1-f32df514de8156b5f5b435eea2c9be40
diff --git a/sql/hive/src/test/resources/golden/input19-2-5a804e02e4419e785d15e7f39d6c5730 b/sql/hive/src/test/resources/golden/input19-2-5a804e02e4419e785d15e7f39d6c5730
index 795dbe3a976eb..db9438946fc84 100644
--- a/sql/hive/src/test/resources/golden/input19-2-5a804e02e4419e785d15e7f39d6c5730
+++ b/sql/hive/src/test/resources/golden/input19-2-5a804e02e4419e785d15e7f39d6c5730
@@ -1 +1 @@
-127.0.0.1	NULL	frank	10/Oct/2000:13:55:36 -0700	GET /apache_pb.gif HTTP/1.0	200	2326
\ No newline at end of file
+127.0.0.1	NULL	frank	10/Oct/2000:13:55:36 -0700	GET /apache_pb.gif HTTP/1.0	200	2326
diff --git a/sql/hive/src/test/resources/golden/input1_limit-4-8a0c68a4f7386ff214db5d9eed0876d5 b/sql/hive/src/test/resources/golden/input1_limit-4-8a0c68a4f7386ff214db5d9eed0876d5
index fdf93911ee847..d8bd3b13b83f1 100644
--- a/sql/hive/src/test/resources/golden/input1_limit-4-8a0c68a4f7386ff214db5d9eed0876d5
+++ b/sql/hive/src/test/resources/golden/input1_limit-4-8a0c68a4f7386ff214db5d9eed0876d5
@@ -7,4 +7,4 @@
 66	val_66
 82	val_82
 86	val_86
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/input1_limit-5-eaaf713833e28a803c798562c7d6cd23 b/sql/hive/src/test/resources/golden/input1_limit-5-eaaf713833e28a803c798562c7d6cd23
index 8a8f1a1b8bbe1..f2f1112224cd7 100644
--- a/sql/hive/src/test/resources/golden/input1_limit-5-eaaf713833e28a803c798562c7d6cd23
+++ b/sql/hive/src/test/resources/golden/input1_limit-5-eaaf713833e28a803c798562c7d6cd23
@@ -2,4 +2,4 @@
 37	val_37
 66	val_66
 86	val_86
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16 b/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16
index 743be67e8d1c3..d3ffb995aff4b 100644
--- a/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16
+++ b/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16
@@ -1,2 +1,2 @@
-a                   	int                 	None                
-b                   	double              	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd b/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd
index 743be67e8d1c3..d3ffb995aff4b 100644
--- a/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd
+++ b/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd
@@ -1,2 +1,2 @@
-a                   	int                 	None                
-b                   	double              	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b b/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b
index ca0726f517eeb..77eaef91c9c3f 100644
--- a/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b
+++ b/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b
@@ -1,3 +1,3 @@
-a                   	array<int>          	None                
-b                   	double              	None                
-c                   	map<double,int>     	None                
\ No newline at end of file
+a                   	array<int>          	                    
+b                   	double              	                    
+c                   	map<double,int>     	                    
diff --git a/sql/hive/src/test/resources/golden/input2-5-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/input2-5-9c36cac1372650b703400c60dd29042c
index 2c861553f9aa2..2dd749277aa48 100644
--- a/sql/hive/src/test/resources/golden/input2-5-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/input2-5-9c36cac1372650b703400c60dd29042c
@@ -1,4 +1,4 @@
 src
 srcpart
 test2a
-test2b
\ No newline at end of file
+test2b
diff --git a/sql/hive/src/test/resources/golden/input2-7-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/input2-7-9c36cac1372650b703400c60dd29042c
index 3e40a0c866d4d..d2cb69524ba34 100644
--- a/sql/hive/src/test/resources/golden/input2-7-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/input2-7-9c36cac1372650b703400c60dd29042c
@@ -1,3 +1,3 @@
 src
 srcpart
-test2b
\ No newline at end of file
+test2b
diff --git a/sql/hive/src/test/resources/golden/input2-9-48bf8c06ed0a264d0863613fe79795e1 b/sql/hive/src/test/resources/golden/input2-9-48bf8c06ed0a264d0863613fe79795e1
index 5337f342fedd8..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input2-9-48bf8c06ed0a264d0863613fe79795e1
+++ b/sql/hive/src/test/resources/golden/input2-9-48bf8c06ed0a264d0863613fe79795e1
@@ -1,17 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  TOK_SHOWTABLES
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Table Operator:
-        Show Tables
-          database name: default
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/input21-1-c45ad493e95150b580be778da6065f36
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/input21-1-c45ad493e95150b580be778da6065f36
diff --git a/sql/hive/src/test/resources/golden/input21-3-9809b74435cbaedef0dc6e6b88b180fe b/sql/hive/src/test/resources/golden/input21-3-9809b74435cbaedef0dc6e6b88b180fe
index c6c298df200ca..8b39955512bc7 100644
--- a/sql/hive/src/test/resources/golden/input21-3-9809b74435cbaedef0dc6e6b88b180fe
+++ b/sql/hive/src/test/resources/golden/input21-3-9809b74435cbaedef0dc6e6b88b180fe
@@ -7,4 +7,4 @@ NULL	1	same	5
 NULL	NULL	same	6
 1.0	NULL	same	7
 1.0	1	same	8
-1.0	1	same	9
\ No newline at end of file
+1.0	1	same	9
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/input22-1-b663ec84da3f9d9b9594ea2da81b1442
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/input22-1-b663ec84da3f9d9b9594ea2da81b1442
diff --git a/sql/hive/src/test/resources/golden/input22-3-8285c1934441b12f6c016e13cb410e79 b/sql/hive/src/test/resources/golden/input22-3-8285c1934441b12f6c016e13cb410e79
index 336ba0545635d..891dedb34c1dd 100644
--- a/sql/hive/src/test/resources/golden/input22-3-8285c1934441b12f6c016e13cb410e79
+++ b/sql/hive/src/test/resources/golden/input22-3-8285c1934441b12f6c016e13cb410e79
@@ -7,4 +7,4 @@
 103
 103
 104
-104
\ No newline at end of file
+104
diff --git a/sql/hive/src/test/resources/golden/input24-3-3189f3b2990de94619b9cb583d9dd3c5 b/sql/hive/src/test/resources/golden/input24-3-3189f3b2990de94619b9cb583d9dd3c5
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input24-3-3189f3b2990de94619b9cb583d9dd3c5
+++ b/sql/hive/src/test/resources/golden/input24-3-3189f3b2990de94619b9cb583d9dd3c5
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input26-1-8272225744e83ba4cbe158a5c113fce3 b/sql/hive/src/test/resources/golden/input26-1-8272225744e83ba4cbe158a5c113fce3
index b8fd0ab6545d6..833b21e4d6e4d 100644
--- a/sql/hive/src/test/resources/golden/input26-1-8272225744e83ba4cbe158a5c113fce3
+++ b/sql/hive/src/test/resources/golden/input26-1-8272225744e83ba4cbe158a5c113fce3
@@ -2,4 +2,4 @@
 0	val_0	2008-04-08	11
 0	val_0	2008-04-08	11
 2	val_2	2008-04-08	11
-4	val_4	2008-04-08	11
\ No newline at end of file
+4	val_4	2008-04-08	11
diff --git a/sql/hive/src/test/resources/golden/input2_limit-1-fed7e0bb996623da7dd17793e835f785 b/sql/hive/src/test/resources/golden/input2_limit-1-fed7e0bb996623da7dd17793e835f785
index 899417ee77ad8..badda49814562 100644
--- a/sql/hive/src/test/resources/golden/input2_limit-1-fed7e0bb996623da7dd17793e835f785
+++ b/sql/hive/src/test/resources/golden/input2_limit-1-fed7e0bb996623da7dd17793e835f785
@@ -2,4 +2,4 @@
 86	val_86
 27	val_27
 165	val_165
-255	val_255
\ No newline at end of file
+255	val_255
diff --git a/sql/hive/src/test/resources/golden/input3-1-6ec8e282bd39883a57aecd9e4c8cdf1d b/sql/hive/src/test/resources/golden/input3-1-6ec8e282bd39883a57aecd9e4c8cdf1d
index 743be67e8d1c3..d3ffb995aff4b 100644
--- a/sql/hive/src/test/resources/golden/input3-1-6ec8e282bd39883a57aecd9e4c8cdf1d
+++ b/sql/hive/src/test/resources/golden/input3-1-6ec8e282bd39883a57aecd9e4c8cdf1d
@@ -1,2 +1,2 @@
-a                   	int                 	None                
-b                   	double              	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input3-10-10a1a8a97f6417c3da16829f7e519475 b/sql/hive/src/test/resources/golden/input3-10-10a1a8a97f6417c3da16829f7e519475
index 594b29ca1410f..bd673a6c1f1d4 100644
--- a/sql/hive/src/test/resources/golden/input3-10-10a1a8a97f6417c3da16829f7e519475
+++ b/sql/hive/src/test/resources/golden/input3-10-10a1a8a97f6417c3da16829f7e519475
@@ -1,4 +1,4 @@
-a                   	array<int>          	None                
-b                   	double              	None                
-c                   	map<double,int>     	None                
-x                   	double              	None                
\ No newline at end of file
+a                   	array<int>          	                    
+b                   	double              	                    
+c                   	map<double,int>     	                    
+x                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input3-11-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/input3-11-9c36cac1372650b703400c60dd29042c
index ac382c7369264..f5b9883df09c0 100644
--- a/sql/hive/src/test/resources/golden/input3-11-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/input3-11-9c36cac1372650b703400c60dd29042c
@@ -1,4 +1,4 @@
 src
 srcpart
 test3a
-test3c
\ No newline at end of file
+test3c
diff --git a/sql/hive/src/test/resources/golden/input3-12-a22d09de72e5067a0a94113cdecdaa95 b/sql/hive/src/test/resources/golden/input3-12-a22d09de72e5067a0a94113cdecdaa95
index cb17be511e875..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input3-12-a22d09de72e5067a0a94113cdecdaa95
+++ b/sql/hive/src/test/resources/golden/input3-12-a22d09de72e5067a0a94113cdecdaa95
@@ -1,14 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ALTERTABLE_REPLACECOLS TEST3c (TOK_TABCOLLIST (TOK_TABCOL R1 TOK_INT) (TOK_TABCOL R2 TOK_DOUBLE)))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Alter Table Operator:
-        Alter Table
-          type: replace columns
-          new columns: r1 int, r2 double
-          old name: TEST3c
-
diff --git a/sql/hive/src/test/resources/golden/input3-14-efee6816e20fe61595a4a2a991071219 b/sql/hive/src/test/resources/golden/input3-14-efee6816e20fe61595a4a2a991071219
index b906fd3c2e775..ea55abd792314 100644
--- a/sql/hive/src/test/resources/golden/input3-14-efee6816e20fe61595a4a2a991071219
+++ b/sql/hive/src/test/resources/golden/input3-14-efee6816e20fe61595a4a2a991071219
@@ -1,4 +1,4 @@
-r1                  	int                 	None                
-r2                  	double              	None                
+r1                  	int                 	                    
+r2                  	double              	                    
 	 	 
-Detailed Table Information	Table(tableName:test3c, dbName:default, owner:marmbrus, createTime:1389730377, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:r1, type:int, comment:null), FieldSchema(name:r2, type:double, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/test3c, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1389730378, transient_lastDdlTime=1389730378}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:test3c, dbName:default, owner:marmbrus, createTime:1413882084, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:r1, type:int, comment:null), FieldSchema(name:r2, type:double, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test3c, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413882084, transient_lastDdlTime=1413882084, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/input3-3-1c5990b1aed2be48311810dae3019994 b/sql/hive/src/test/resources/golden/input3-3-1c5990b1aed2be48311810dae3019994
index ca0726f517eeb..77eaef91c9c3f 100644
--- a/sql/hive/src/test/resources/golden/input3-3-1c5990b1aed2be48311810dae3019994
+++ b/sql/hive/src/test/resources/golden/input3-3-1c5990b1aed2be48311810dae3019994
@@ -1,3 +1,3 @@
-a                   	array<int>          	None                
-b                   	double              	None                
-c                   	map<double,int>     	None                
\ No newline at end of file
+a                   	array<int>          	                    
+b                   	double              	                    
+c                   	map<double,int>     	                    
diff --git a/sql/hive/src/test/resources/golden/input3-4-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/input3-4-9c36cac1372650b703400c60dd29042c
index 6b1ce270d97e9..b584fd7c6fd36 100644
--- a/sql/hive/src/test/resources/golden/input3-4-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/input3-4-9c36cac1372650b703400c60dd29042c
@@ -1,4 +1,4 @@
 src
 srcpart
 test3a
-test3b
\ No newline at end of file
+test3b
diff --git a/sql/hive/src/test/resources/golden/input3-5-f40b7cc4ac38c0121ccab9ef4e7e9fd2 b/sql/hive/src/test/resources/golden/input3-5-f40b7cc4ac38c0121ccab9ef4e7e9fd2
index 92c0ed68f8a7b..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input3-5-f40b7cc4ac38c0121ccab9ef4e7e9fd2
+++ b/sql/hive/src/test/resources/golden/input3-5-f40b7cc4ac38c0121ccab9ef4e7e9fd2
@@ -1,14 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ALTERTABLE_ADDCOLS TEST3b (TOK_TABCOLLIST (TOK_TABCOL X TOK_DOUBLE)))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Alter Table Operator:
-        Alter Table
-          type: add columns
-          new columns: x double
-          old name: TEST3b
-
diff --git a/sql/hive/src/test/resources/golden/input3-7-1c5990b1aed2be48311810dae3019994 b/sql/hive/src/test/resources/golden/input3-7-1c5990b1aed2be48311810dae3019994
index 594b29ca1410f..bd673a6c1f1d4 100644
--- a/sql/hive/src/test/resources/golden/input3-7-1c5990b1aed2be48311810dae3019994
+++ b/sql/hive/src/test/resources/golden/input3-7-1c5990b1aed2be48311810dae3019994
@@ -1,4 +1,4 @@
-a                   	array<int>          	None                
-b                   	double              	None                
-c                   	map<double,int>     	None                
-x                   	double              	None                
\ No newline at end of file
+a                   	array<int>          	                    
+b                   	double              	                    
+c                   	map<double,int>     	                    
+x                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input3-8-4dc0fefca4d158fd2ab40551ae9e35be b/sql/hive/src/test/resources/golden/input3-8-4dc0fefca4d158fd2ab40551ae9e35be
index 09bbc29377720..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input3-8-4dc0fefca4d158fd2ab40551ae9e35be
+++ b/sql/hive/src/test/resources/golden/input3-8-4dc0fefca4d158fd2ab40551ae9e35be
@@ -1,14 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ALTERTABLE_RENAME TEST3b TEST3c)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Alter Table Operator:
-        Alter Table
-          type: rename
-          new name: TEST3c
-          old name: TEST3b
-
diff --git a/sql/hive/src/test/resources/golden/input30-2-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/input30-2-823920925ca9c8a2ca9016f52c0f4ee
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input30-2-823920925ca9c8a2ca9016f52c0f4ee
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input30-3-c21dba410fb07a098f93430a9d21df79 b/sql/hive/src/test/resources/golden/input30-3-c21dba410fb07a098f93430a9d21df79
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input30-3-c21dba410fb07a098f93430a9d21df79
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input30-6-f120ac8c87db9eebb5da7ac99b48600 b/sql/hive/src/test/resources/golden/input30-6-f120ac8c87db9eebb5da7ac99b48600
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input30-6-f120ac8c87db9eebb5da7ac99b48600
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input30-7-95d10d459c088d5fbefd00bdd8d44c3f b/sql/hive/src/test/resources/golden/input30-7-95d10d459c088d5fbefd00bdd8d44c3f
deleted file mode 100644
index 25bf17fc5aaab..0000000000000
--- a/sql/hive/src/test/resources/golden/input30-7-95d10d459c088d5fbefd00bdd8d44c3f
+++ /dev/null
@@ -1 +0,0 @@
-18
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-11-6d0814cbb72eb96bfc75c95d06f1e528 b/sql/hive/src/test/resources/golden/input39-11-6d0814cbb72eb96bfc75c95d06f1e528
deleted file mode 100644
index 25bf17fc5aaab..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-11-6d0814cbb72eb96bfc75c95d06f1e528
+++ /dev/null
@@ -1 +0,0 @@
-18
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-12-f120ac8c87db9eebb5da7ac99b48600 b/sql/hive/src/test/resources/golden/input39-12-f120ac8c87db9eebb5da7ac99b48600
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-12-f120ac8c87db9eebb5da7ac99b48600
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-13-c9c5b254ce9b439f09e72a0dce5ec8f0 b/sql/hive/src/test/resources/golden/input39-13-c9c5b254ce9b439f09e72a0dce5ec8f0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-13-c9c5b254ce9b439f09e72a0dce5ec8f0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-14-bcc1d5fd287f81bac1092a913b09956d b/sql/hive/src/test/resources/golden/input39-14-bcc1d5fd287f81bac1092a913b09956d
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-14-bcc1d5fd287f81bac1092a913b09956d
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-5-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/input39-5-823920925ca9c8a2ca9016f52c0f4ee
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-5-823920925ca9c8a2ca9016f52c0f4ee
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-6-763ab5853bff619e6525c01e46b2a923 b/sql/hive/src/test/resources/golden/input39-6-763ab5853bff619e6525c01e46b2a923
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-6-763ab5853bff619e6525c01e46b2a923
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-7-9b141c1e5917ca82c6bc36a9a2950a1e b/sql/hive/src/test/resources/golden/input39-7-9b141c1e5917ca82c6bc36a9a2950a1e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-7-9b141c1e5917ca82c6bc36a9a2950a1e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-8-2b9ccaa793eae0e73bf76335d3d6880 b/sql/hive/src/test/resources/golden/input39-8-2b9ccaa793eae0e73bf76335d3d6880
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-8-2b9ccaa793eae0e73bf76335d3d6880
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-9-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/input39-9-bab89dfffa77258e34a595e0e79986e3
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-9-bab89dfffa77258e34a595e0e79986e3
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-10-6d0814cbb72eb96bfc75c95d06f1e528 b/sql/hive/src/test/resources/golden/input39_hadoop20-10-6d0814cbb72eb96bfc75c95d06f1e528
deleted file mode 100644
index 25bf17fc5aaab..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-10-6d0814cbb72eb96bfc75c95d06f1e528
+++ /dev/null
@@ -1 +0,0 @@
-18
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-11-f120ac8c87db9eebb5da7ac99b48600 b/sql/hive/src/test/resources/golden/input39_hadoop20-11-f120ac8c87db9eebb5da7ac99b48600
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-11-f120ac8c87db9eebb5da7ac99b48600
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-12-a6a77ae749a7e7f8022d71c03322fc21 b/sql/hive/src/test/resources/golden/input39_hadoop20-12-a6a77ae749a7e7f8022d71c03322fc21
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-12-a6a77ae749a7e7f8022d71c03322fc21
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-5-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/input39_hadoop20-5-823920925ca9c8a2ca9016f52c0f4ee
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-5-823920925ca9c8a2ca9016f52c0f4ee
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-6-763ab5853bff619e6525c01e46b2a923 b/sql/hive/src/test/resources/golden/input39_hadoop20-6-763ab5853bff619e6525c01e46b2a923
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-6-763ab5853bff619e6525c01e46b2a923
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-7-db1cd54a4cb36de2087605f32e41824f b/sql/hive/src/test/resources/golden/input39_hadoop20-7-db1cd54a4cb36de2087605f32e41824f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-7-db1cd54a4cb36de2087605f32e41824f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-8-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/input39_hadoop20-8-bab89dfffa77258e34a595e0e79986e3
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-8-bab89dfffa77258e34a595e0e79986e3
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input3_limit-6-7fec232bd656e1c1cf6cd731afc55d67 b/sql/hive/src/test/resources/golden/input3_limit-6-7fec232bd656e1c1cf6cd731afc55d67
deleted file mode 100644
index 6731b0cb0baaa..0000000000000
--- a/sql/hive/src/test/resources/golden/input3_limit-6-7fec232bd656e1c1cf6cd731afc55d67
+++ /dev/null
@@ -1,20 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-0	val_1
-0	val_1
-1	val_2
-10	val_10
-10	val_11
-100	val_100
-100	val_100
-100	val_101
-100	val_101
-101	val_102
-102	val_103
-103	val_103
-103	val_103
-104	val_104
-104	val_104
-104	val_105
-104	val_105
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/input4-1-7ce0bc5e5feeb09bf3fc139e102fb00e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/input4-1-7ce0bc5e5feeb09bf3fc139e102fb00e
diff --git a/sql/hive/src/test/resources/golden/input4-1-c139adc70f9942e527142e3be7fd2b87 b/sql/hive/src/test/resources/golden/input4-1-c139adc70f9942e527142e3be7fd2b87
deleted file mode 100644
index 65a457b52b0a6..0000000000000
--- a/sql/hive/src/test/resources/golden/input4-1-c139adc70f9942e527142e3be7fd2b87
+++ /dev/null
@@ -1,27 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_LOAD '/Users/marmbrus/workspace/hive/data/files/kv1.txt' (TOK_TAB (TOK_TABNAME INPUT4)) LOCAL)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-  Stage-2 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-0
-    Copy
-      source: file:/Users/marmbrus/workspace/hive/data/files/kv1.txt
-      destination: file:/tmp/hive-marmbrus/hive_2014-01-14_12-16-46_262_527870677085258278-1/-ext-10000
-
-  Stage: Stage-1
-    Move Operator
-      tables:
-          replace: false
-          table:
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.input4
-
-  Stage: Stage-2
-    Stats-Aggr Operator
-
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/input4-2-b663ec84da3f9d9b9594ea2da81b1442
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/input4-2-b663ec84da3f9d9b9594ea2da81b1442
diff --git a/sql/hive/src/test/resources/golden/input4-4-271b04e0fbee2ee81bae21dcb46d55e4 b/sql/hive/src/test/resources/golden/input4-4-271b04e0fbee2ee81bae21dcb46d55e4
index 218c223b050b7..103b537db90c6 100644
--- a/sql/hive/src/test/resources/golden/input4-4-271b04e0fbee2ee81bae21dcb46d55e4
+++ b/sql/hive/src/test/resources/golden/input4-4-271b04e0fbee2ee81bae21dcb46d55e4
@@ -497,4 +497,4 @@ val_169	169
 val_403	403
 val_400	400
 val_200	200
-val_97	97
\ No newline at end of file
+val_97	97
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/input40-1-a8adb8ae1d13607851431a1baf7578ba
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/input40-1-a8adb8ae1d13607851431a1baf7578ba
diff --git a/sql/hive/src/test/resources/golden/input40-2-e7ab3c9244fcfda76061b4153d796e93 b/sql/hive/src/test/resources/golden/input40-2-e7ab3c9244fcfda76061b4153d796e93
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/input40-2-e7ab3c9244fcfda76061b4153d796e93
+++ b/sql/hive/src/test/resources/golden/input40-2-e7ab3c9244fcfda76061b4153d796e93
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/input40-4-fdeea6b676c670b17c8d91e24a97a127
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/input40-4-fdeea6b676c670b17c8d91e24a97a127
diff --git a/sql/hive/src/test/resources/golden/input40-5-ccdc9c87a8fdbeee0fa48927f9700361 b/sql/hive/src/test/resources/golden/input40-5-ccdc9c87a8fdbeee0fa48927f9700361
index 6bc66cd8fe19b..e4b818f03539d 100644
--- a/sql/hive/src/test/resources/golden/input40-5-ccdc9c87a8fdbeee0fa48927f9700361
+++ b/sql/hive/src/test/resources/golden/input40-5-ccdc9c87a8fdbeee0fa48927f9700361
@@ -497,4 +497,4 @@
 97	val_97	2009-08-01
 97	val_97	2009-08-01
 98	val_98	2009-08-01
-98	val_98	2009-08-01
\ No newline at end of file
+98	val_98	2009-08-01
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/input40-6-6651f53efc5d03ed2d43b9d7aecc0002
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/input40-6-6651f53efc5d03ed2d43b9d7aecc0002
diff --git a/sql/hive/src/test/resources/golden/input40-7-ccdc9c87a8fdbeee0fa48927f9700361 b/sql/hive/src/test/resources/golden/input40-7-ccdc9c87a8fdbeee0fa48927f9700361
index ee3dddd8cff71..4467e7af00c0e 100644
--- a/sql/hive/src/test/resources/golden/input40-7-ccdc9c87a8fdbeee0fa48927f9700361
+++ b/sql/hive/src/test/resources/golden/input40-7-ccdc9c87a8fdbeee0fa48927f9700361
@@ -997,4 +997,4 @@
 97	val_98	2009-08-01
 98	val_98	2009-08-01
 98	val_98	2009-08-01
-99	val_100	2009-08-01
\ No newline at end of file
+99	val_100	2009-08-01
diff --git a/sql/hive/src/test/resources/golden/input41-0-763ab5853bff619e6525c01e46b2a923 b/sql/hive/src/test/resources/golden/input41-0-763ab5853bff619e6525c01e46b2a923
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input41-0-763ab5853bff619e6525c01e46b2a923
+++ b/sql/hive/src/test/resources/golden/input41-0-763ab5853bff619e6525c01e46b2a923
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input41-3-526399455dc5ecd4ea9f676b09fafeee b/sql/hive/src/test/resources/golden/input41-3-526399455dc5ecd4ea9f676b09fafeee
index 579784a58a66c..61191cde2953b 100644
--- a/sql/hive/src/test/resources/golden/input41-3-526399455dc5ecd4ea9f676b09fafeee
+++ b/sql/hive/src/test/resources/golden/input41-3-526399455dc5ecd4ea9f676b09fafeee
@@ -1,2 +1,2 @@
 0
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/input4_cb_delim-1-353d2238b781a117888a67bb7b2b2537
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/input4_cb_delim-1-353d2238b781a117888a67bb7b2b2537
diff --git a/sql/hive/src/test/resources/golden/input4_cb_delim-2-e52787bf798a941c854eb09d75efe617 b/sql/hive/src/test/resources/golden/input4_cb_delim-2-e52787bf798a941c854eb09d75efe617
index 218c223b050b7..103b537db90c6 100644
--- a/sql/hive/src/test/resources/golden/input4_cb_delim-2-e52787bf798a941c854eb09d75efe617
+++ b/sql/hive/src/test/resources/golden/input4_cb_delim-2-e52787bf798a941c854eb09d75efe617
@@ -497,4 +497,4 @@ val_169	169
 val_403	403
 val_400	400
 val_200	200
-val_97	97
\ No newline at end of file
+val_97	97
diff --git a/sql/hive/src/test/resources/golden/input4_limit-1-c634fc723fb3aac3ce007069bdcb2af b/sql/hive/src/test/resources/golden/input4_limit-1-c634fc723fb3aac3ce007069bdcb2af
deleted file mode 100644
index 217a1915f8826..0000000000000
--- a/sql/hive/src/test/resources/golden/input4_limit-1-c634fc723fb3aac3ce007069bdcb2af
+++ /dev/null
@@ -1,10 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-2	val_2
-4	val_4
-5	val_5
-5	val_5
-5	val_5
-8	val_8
-9	val_9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input5-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input5-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index f8aa003a65bb4..0000000000000
--- a/sql/hive/src/test/resources/golden/input5-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,11 +0,0 @@
-NULL	NULL
-[0,0,0]	[{"myint":0,"mystring":"0","underscore_int":0}]
-[1,2,3]	[{"myint":1,"mystring":"1","underscore_int":1}]
-[2,4,6]	[{"myint":4,"mystring":"8","underscore_int":2}]
-[3,6,9]	[{"myint":9,"mystring":"27","underscore_int":3}]
-[4,8,12]	[{"myint":16,"mystring":"64","underscore_int":4}]
-[5,10,15]	[{"myint":25,"mystring":"125","underscore_int":5}]
-[6,12,18]	[{"myint":36,"mystring":"216","underscore_int":6}]
-[7,14,21]	[{"myint":49,"mystring":"343","underscore_int":7}]
-[8,16,24]	[{"myint":64,"mystring":"512","underscore_int":8}]
-[9,18,27]	[{"myint":81,"mystring":"729","underscore_int":9}]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input6-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input6-3-adc1ec67836b26b60d8547c4996bfd8f
index b4dc488518f24..f3f63f08fcf70 100644
--- a/sql/hive/src/test/resources/golden/input6-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input6-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -7,4 +7,4 @@ NULL	val_265
 NULL	val_193
 NULL	
 NULL	
-NULL	
\ No newline at end of file
+NULL	
diff --git a/sql/hive/src/test/resources/golden/input7-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input7-3-adc1ec67836b26b60d8547c4996bfd8f
index 0b8a8960a992b..65cada3d45b2b 100644
--- a/sql/hive/src/test/resources/golden/input7-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input7-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -22,4 +22,4 @@ NULL	146
 NULL	406
 NULL	NULL
 NULL	NULL
-NULL	NULL
\ No newline at end of file
+NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/input8-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input8-3-adc1ec67836b26b60d8547c4996bfd8f
index c87107a2f1168..416fbfb9e5228 100644
--- a/sql/hive/src/test/resources/golden/input8-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input8-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -22,4 +22,4 @@ NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
-NULL	NULL	NULL
\ No newline at end of file
+NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/input_columnarserde-3-3455e6f385b0f60be5d0c842ade0f1d7 b/sql/hive/src/test/resources/golden/input_columnarserde-3-3455e6f385b0f60be5d0c842ade0f1d7
deleted file mode 100644
index e782acd4d1e7f..0000000000000
--- a/sql/hive/src/test/resources/golden/input_columnarserde-3-3455e6f385b0f60be5d0c842ade0f1d7
+++ /dev/null
@@ -1,11 +0,0 @@
-[0,0,0]	["0","0","0"]	{"key_0":"value_0"}	1712634731	record_0
-[1,2,3]	["10","100","1000"]	{"key_1":"value_1"}	465985200	record_1
-[2,4,6]	["20","200","2000"]	{"key_2":"value_2"}	-751827638	record_2
-[3,6,9]	["30","300","3000"]	{"key_3":"value_3"}	477111222	record_3
-[4,8,12]	["40","400","4000"]	{"key_4":"value_4"}	-734328909	record_4
-[5,10,15]	["50","500","5000"]	{"key_5":"value_5"}	-1952710710	record_5
-[6,12,18]	["60","600","6000"]	{"key_6":"value_6"}	1244525190	record_6
-[7,14,21]	["70","700","7000"]	{"key_7":"value_7"}	-1461153973	record_7
-[8,16,24]	["80","800","8000"]	{"key_8":"value_8"}	1638581578	record_8
-[9,18,27]	["90","900","9000"]	{"key_9":"value_9"}	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_columnarserde-4-c471b057cdb4f3c3405b311ea2b92aa3 b/sql/hive/src/test/resources/golden/input_columnarserde-4-c471b057cdb4f3c3405b311ea2b92aa3
deleted file mode 100644
index 6038b8aa32884..0000000000000
--- a/sql/hive/src/test/resources/golden/input_columnarserde-4-c471b057cdb4f3c3405b311ea2b92aa3
+++ /dev/null
@@ -1,11 +0,0 @@
-0	0	NULL	1712634731	record_0
-1	10	NULL	465985200	record_1
-2	20	NULL	-751827638	record_2
-3	30	NULL	477111222	record_3
-4	40	NULL	-734328909	record_4
-5	50	NULL	-1952710710	record_5
-6	60	NULL	1244525190	record_6
-7	70	NULL	-1461153973	record_7
-8	80	NULL	1638581578	record_8
-9	90	NULL	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_dynamicserde-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input_dynamicserde-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index e782acd4d1e7f..0000000000000
--- a/sql/hive/src/test/resources/golden/input_dynamicserde-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,11 +0,0 @@
-[0,0,0]	["0","0","0"]	{"key_0":"value_0"}	1712634731	record_0
-[1,2,3]	["10","100","1000"]	{"key_1":"value_1"}	465985200	record_1
-[2,4,6]	["20","200","2000"]	{"key_2":"value_2"}	-751827638	record_2
-[3,6,9]	["30","300","3000"]	{"key_3":"value_3"}	477111222	record_3
-[4,8,12]	["40","400","4000"]	{"key_4":"value_4"}	-734328909	record_4
-[5,10,15]	["50","500","5000"]	{"key_5":"value_5"}	-1952710710	record_5
-[6,12,18]	["60","600","6000"]	{"key_6":"value_6"}	1244525190	record_6
-[7,14,21]	["70","700","7000"]	{"key_7":"value_7"}	-1461153973	record_7
-[8,16,24]	["80","800","8000"]	{"key_8":"value_8"}	1638581578	record_8
-[9,18,27]	["90","900","9000"]	{"key_9":"value_9"}	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_dynamicserde-4-8bf42e02f82b0ac58e7d0b525a993b31 b/sql/hive/src/test/resources/golden/input_dynamicserde-4-8bf42e02f82b0ac58e7d0b525a993b31
deleted file mode 100644
index 6038b8aa32884..0000000000000
--- a/sql/hive/src/test/resources/golden/input_dynamicserde-4-8bf42e02f82b0ac58e7d0b525a993b31
+++ /dev/null
@@ -1,11 +0,0 @@
-0	0	NULL	1712634731	record_0
-1	10	NULL	465985200	record_1
-2	20	NULL	-751827638	record_2
-3	30	NULL	477111222	record_3
-4	40	NULL	-734328909	record_4
-5	50	NULL	-1952710710	record_5
-6	60	NULL	1244525190	record_6
-7	70	NULL	-1461153973	record_7
-8	80	NULL	1638581578	record_8
-9	90	NULL	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-12-abde20a4a37ed330bc7128576dd18d7c b/sql/hive/src/test/resources/golden/input_lazyserde-12-abde20a4a37ed330bc7128576dd18d7c
deleted file mode 100644
index 6af528eab23a8..0000000000000
--- a/sql/hive/src/test/resources/golden/input_lazyserde-12-abde20a4a37ed330bc7128576dd18d7c
+++ /dev/null
@@ -1,11 +0,0 @@
-{"key_0":"value_0"}
-{"key_1":"value_1"}
-{"key_2":"value_2"}
-{"key_3":"value_3"}
-{"key_4":"value_4"}
-{"key_5":"value_5"}
-{"key_6":"value_6"}
-{"key_7":"value_7"}
-{"key_8":"value_8"}
-{"key_9":"value_9"}
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-3-bdf93641b97ab6347ece67e2fb636e97 b/sql/hive/src/test/resources/golden/input_lazyserde-3-bdf93641b97ab6347ece67e2fb636e97
deleted file mode 100644
index e782acd4d1e7f..0000000000000
--- a/sql/hive/src/test/resources/golden/input_lazyserde-3-bdf93641b97ab6347ece67e2fb636e97
+++ /dev/null
@@ -1,11 +0,0 @@
-[0,0,0]	["0","0","0"]	{"key_0":"value_0"}	1712634731	record_0
-[1,2,3]	["10","100","1000"]	{"key_1":"value_1"}	465985200	record_1
-[2,4,6]	["20","200","2000"]	{"key_2":"value_2"}	-751827638	record_2
-[3,6,9]	["30","300","3000"]	{"key_3":"value_3"}	477111222	record_3
-[4,8,12]	["40","400","4000"]	{"key_4":"value_4"}	-734328909	record_4
-[5,10,15]	["50","500","5000"]	{"key_5":"value_5"}	-1952710710	record_5
-[6,12,18]	["60","600","6000"]	{"key_6":"value_6"}	1244525190	record_6
-[7,14,21]	["70","700","7000"]	{"key_7":"value_7"}	-1461153973	record_7
-[8,16,24]	["80","800","8000"]	{"key_8":"value_8"}	1638581578	record_8
-[9,18,27]	["90","900","9000"]	{"key_9":"value_9"}	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-4-8cc058fb7986f59965976cad813267db b/sql/hive/src/test/resources/golden/input_lazyserde-4-8cc058fb7986f59965976cad813267db
deleted file mode 100644
index 6038b8aa32884..0000000000000
--- a/sql/hive/src/test/resources/golden/input_lazyserde-4-8cc058fb7986f59965976cad813267db
+++ /dev/null
@@ -1,11 +0,0 @@
-0	0	NULL	1712634731	record_0
-1	10	NULL	465985200	record_1
-2	20	NULL	-751827638	record_2
-3	30	NULL	477111222	record_3
-4	40	NULL	-734328909	record_4
-5	50	NULL	-1952710710	record_5
-6	60	NULL	1244525190	record_6
-7	70	NULL	-1461153973	record_7
-8	80	NULL	1638581578	record_8
-9	90	NULL	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-8-abde20a4a37ed330bc7128576dd18d7c b/sql/hive/src/test/resources/golden/input_lazyserde-8-abde20a4a37ed330bc7128576dd18d7c
deleted file mode 100644
index 1bb008b44d6ee..0000000000000
--- a/sql/hive/src/test/resources/golden/input_lazyserde-8-abde20a4a37ed330bc7128576dd18d7c
+++ /dev/null
@@ -1,11 +0,0 @@
-[0,0,0]
-[1,2,3]
-[2,4,6]
-[3,6,9]
-[4,8,12]
-[5,10,15]
-[6,12,18]
-[7,14,21]
-[8,16,24]
-[9,18,27]
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_limit-1-77d7012bc901b0a9bcf9dae500ae2034 b/sql/hive/src/test/resources/golden/input_limit-1-77d7012bc901b0a9bcf9dae500ae2034
index 17f24d2991d14..03887aed65852 100644
--- a/sql/hive/src/test/resources/golden/input_limit-1-77d7012bc901b0a9bcf9dae500ae2034
+++ b/sql/hive/src/test/resources/golden/input_limit-1-77d7012bc901b0a9bcf9dae500ae2034
@@ -17,4 +17,4 @@
 369	val_369
 66	val_66
 128	val_128
-213	val_213
\ No newline at end of file
+213	val_213
diff --git a/sql/hive/src/test/resources/golden/input_part0-1-1aa1486a207bedc275035acc3b37cbdb b/sql/hive/src/test/resources/golden/input_part0-1-1aa1486a207bedc275035acc3b37cbdb
index 499e6b8ab6b7e..d7a8f25b41301 100644
--- a/sql/hive/src/test/resources/golden/input_part0-1-1aa1486a207bedc275035acc3b37cbdb
+++ b/sql/hive/src/test/resources/golden/input_part0-1-1aa1486a207bedc275035acc3b37cbdb
@@ -997,4 +997,4 @@
 403	val_403	2008-04-08	12
 400	val_400	2008-04-08	12
 200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
\ No newline at end of file
+97	val_97	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/input_part1-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input_part1-3-adc1ec67836b26b60d8547c4996bfd8f
index 3e5ae10e4670a..7ae7ecbe28de6 100644
--- a/sql/hive/src/test/resources/golden/input_part1-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input_part1-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -81,4 +81,4 @@
 28	val_28	12	2008-04-08
 37	val_37	12	2008-04-08
 90	val_90	12	2008-04-08
-97	val_97	12	2008-04-08
\ No newline at end of file
+97	val_97	12	2008-04-08
diff --git a/sql/hive/src/test/resources/golden/input_part10-3-48b242bc305c9bf879e083fa11edc967 b/sql/hive/src/test/resources/golden/input_part10-3-48b242bc305c9bf879e083fa11edc967
index 82116102c1f54..89c49ce857f5a 100644
--- a/sql/hive/src/test/resources/golden/input_part10-3-48b242bc305c9bf879e083fa11edc967
+++ b/sql/hive/src/test/resources/golden/input_part10-3-48b242bc305c9bf879e083fa11edc967
@@ -1,12 +1,12 @@
-a                   	string              	None                
-b                   	string              	None                
-ds                  	string              	None                
-ts                  	string              	None                
+a                   	string              	                    
+b                   	string              	                    
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-ts                  	string              	None                
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2008 04 08, 10:11:12=455], dbName:default, tableName:part_special, createTime:1388798899, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:ts, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6540137288252557391/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1388798899})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2008 04 08, 10:11:12=455], dbName:default, tableName:part_special, createTime:1413882241, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:ts, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413882241, COLUMN_STATS_ACCURATE=true, totalSize=4, numRows=1, rawDataSize=3})	
diff --git a/sql/hive/src/test/resources/golden/input_part10-4-d0ba28297a8b73569d93605aa890aa09 b/sql/hive/src/test/resources/golden/input_part10-4-d0ba28297a8b73569d93605aa890aa09
index c8d0d55930069..6de1c02821c77 100644
--- a/sql/hive/src/test/resources/golden/input_part10-4-d0ba28297a8b73569d93605aa890aa09
+++ b/sql/hive/src/test/resources/golden/input_part10-4-d0ba28297a8b73569d93605aa890aa09
@@ -1 +1 @@
-1	2	2008 04 08	10:11:12=455
\ No newline at end of file
+1	2	2008 04 08	10:11:12=455
diff --git a/sql/hive/src/test/resources/golden/input_part10_win-3-48b242bc305c9bf879e083fa11edc967 b/sql/hive/src/test/resources/golden/input_part10_win-3-48b242bc305c9bf879e083fa11edc967
index 869eb58e70d82..f5c60fae1925e 100644
--- a/sql/hive/src/test/resources/golden/input_part10_win-3-48b242bc305c9bf879e083fa11edc967
+++ b/sql/hive/src/test/resources/golden/input_part10_win-3-48b242bc305c9bf879e083fa11edc967
@@ -1,12 +1,12 @@
-a                   	string              	None                
-b                   	string              	None                
-ds                  	string              	None                
-ts                  	string              	None                
+a                   	string              	                    
+b                   	string              	                    
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-ts                  	string              	None                
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2008 04 08, 10:11:12=455], dbName:default, tableName:part_special, createTime:1388798920, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:ts, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6540137288252557391/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1388798920, numRows=1, totalSize=4, rawDataSize=3})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2008 04 08, 10:11:12=455], dbName:default, tableName:part_special, createTime:1413882252, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:ts, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413882252, COLUMN_STATS_ACCURATE=true, totalSize=4, numRows=1, rawDataSize=3})	
diff --git a/sql/hive/src/test/resources/golden/input_part10_win-4-d0ba28297a8b73569d93605aa890aa09 b/sql/hive/src/test/resources/golden/input_part10_win-4-d0ba28297a8b73569d93605aa890aa09
index c8d0d55930069..6de1c02821c77 100644
--- a/sql/hive/src/test/resources/golden/input_part10_win-4-d0ba28297a8b73569d93605aa890aa09
+++ b/sql/hive/src/test/resources/golden/input_part10_win-4-d0ba28297a8b73569d93605aa890aa09
@@ -1 +1 @@
-1	2	2008 04 08	10:11:12=455
\ No newline at end of file
+1	2	2008 04 08	10:11:12=455
diff --git a/sql/hive/src/test/resources/golden/input_part2-4-93c97e1760e0d41b3791d6f08010a665 b/sql/hive/src/test/resources/golden/input_part2-4-93c97e1760e0d41b3791d6f08010a665
index fd945cc15d9ca..883de3e945c46 100644
--- a/sql/hive/src/test/resources/golden/input_part2-4-93c97e1760e0d41b3791d6f08010a665
+++ b/sql/hive/src/test/resources/golden/input_part2-4-93c97e1760e0d41b3791d6f08010a665
@@ -81,4 +81,4 @@
 97	val_97	12	2008-04-08
 97	val_97	12	2008-04-08
 98	val_98	12	2008-04-08
-98	val_98	12	2008-04-08
\ No newline at end of file
+98	val_98	12	2008-04-08
diff --git a/sql/hive/src/test/resources/golden/input_part2-5-9d0992a91951e6e4242b5b4c38d4e861 b/sql/hive/src/test/resources/golden/input_part2-5-9d0992a91951e6e4242b5b4c38d4e861
index f23877361f33b..b23aa27263654 100644
--- a/sql/hive/src/test/resources/golden/input_part2-5-9d0992a91951e6e4242b5b4c38d4e861
+++ b/sql/hive/src/test/resources/golden/input_part2-5-9d0992a91951e6e4242b5b4c38d4e861
@@ -81,4 +81,4 @@
 97	val_97	12	2008-04-09
 97	val_97	12	2008-04-09
 98	val_98	12	2008-04-09
-98	val_98	12	2008-04-09
\ No newline at end of file
+98	val_98	12	2008-04-09
diff --git a/sql/hive/src/test/resources/golden/input_part3-1-ba5256285fb22a43b491253a2d519730 b/sql/hive/src/test/resources/golden/input_part3-1-ba5256285fb22a43b491253a2d519730
index f4026a591a958..31b575a403f81 100644
--- a/sql/hive/src/test/resources/golden/input_part3-1-ba5256285fb22a43b491253a2d519730
+++ b/sql/hive/src/test/resources/golden/input_part3-1-ba5256285fb22a43b491253a2d519730
@@ -497,4 +497,4 @@
 403	val_403	2008-04-08	11
 400	val_400	2008-04-08	11
 200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
\ No newline at end of file
+97	val_97	2008-04-08	11
diff --git a/sql/hive/src/test/resources/golden/input_part5-3-e4419c33287ca1f48a43f61cca5b5928 b/sql/hive/src/test/resources/golden/input_part5-3-e4419c33287ca1f48a43f61cca5b5928
index 853c3bc8df7f0..dfabe7827837c 100644
--- a/sql/hive/src/test/resources/golden/input_part5-3-e4419c33287ca1f48a43f61cca5b5928
+++ b/sql/hive/src/test/resources/golden/input_part5-3-e4419c33287ca1f48a43f61cca5b5928
@@ -165,4 +165,4 @@
 98	val_98	2008-04-08	11
 98	val_98	2008-04-08	11
 98	val_98	2008-04-08	12
-98	val_98	2008-04-08	12
\ No newline at end of file
+98	val_98	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/input_part7-1-affad4cedcd29bb136bc477fc07e6ea0 b/sql/hive/src/test/resources/golden/input_part7-1-affad4cedcd29bb136bc477fc07e6ea0
index 540ca86636f3c..ed8993a9cbd0c 100644
--- a/sql/hive/src/test/resources/golden/input_part7-1-affad4cedcd29bb136bc477fc07e6ea0
+++ b/sql/hive/src/test/resources/golden/input_part7-1-affad4cedcd29bb136bc477fc07e6ea0
@@ -333,4 +333,4 @@
 98	val_98	2008-04-08	12
 98	val_98	2008-04-08	12
 98	val_98	2008-04-08	12
-98	val_98	2008-04-08	12
\ No newline at end of file
+98	val_98	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/input_part8-1-60b409a520999ba50e8b7c6e30de3474 b/sql/hive/src/test/resources/golden/input_part8-1-60b409a520999ba50e8b7c6e30de3474
index a1728c82f0b35..5ee171a64f7ab 100644
--- a/sql/hive/src/test/resources/golden/input_part8-1-60b409a520999ba50e8b7c6e30de3474
+++ b/sql/hive/src/test/resources/golden/input_part8-1-60b409a520999ba50e8b7c6e30de3474
@@ -7,4 +7,4 @@
 255	val_255	2008-04-08	11
 278	val_278	2008-04-08	11
 98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
\ No newline at end of file
+484	val_484	2008-04-08	11
diff --git a/sql/hive/src/test/resources/golden/input_part9-1-e60c60afc073367464898b8396e8f643 b/sql/hive/src/test/resources/golden/input_part9-1-e60c60afc073367464898b8396e8f643
index 438355d7b06f5..b7704cd0a2f0f 100644
--- a/sql/hive/src/test/resources/golden/input_part9-1-e60c60afc073367464898b8396e8f643
+++ b/sql/hive/src/test/resources/golden/input_part9-1-e60c60afc073367464898b8396e8f643
@@ -997,4 +997,4 @@
 498	val_498	2008-04-08	11
 498	val_498	2008-04-08	12
 498	val_498	2008-04-08	12
-498	val_498	2008-04-08	12
\ No newline at end of file
+498	val_498	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/input_testsequencefile-0-68975193b30cb34102b380e647d8d5f4 b/sql/hive/src/test/resources/golden/input_testsequencefile-0-68975193b30cb34102b380e647d8d5f4
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input_testsequencefile-0-68975193b30cb34102b380e647d8d5f4
+++ b/sql/hive/src/test/resources/golden/input_testsequencefile-0-68975193b30cb34102b380e647d8d5f4
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input_testsequencefile-1-1c0f3be2d837dee49312e0a80440447e b/sql/hive/src/test/resources/golden/input_testsequencefile-1-1c0f3be2d837dee49312e0a80440447e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input_testsequencefile-1-1c0f3be2d837dee49312e0a80440447e
+++ b/sql/hive/src/test/resources/golden/input_testsequencefile-1-1c0f3be2d837dee49312e0a80440447e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input_testsequencefile-5-3708198aac609695b22e19e89306034c b/sql/hive/src/test/resources/golden/input_testsequencefile-5-3708198aac609695b22e19e89306034c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input_testsequencefile-5-3708198aac609695b22e19e89306034c
+++ b/sql/hive/src/test/resources/golden/input_testsequencefile-5-3708198aac609695b22e19e89306034c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input_testsequencefile-6-6716fa5aec125f0f8e9520e4318f71b9 b/sql/hive/src/test/resources/golden/input_testsequencefile-6-6716fa5aec125f0f8e9520e4318f71b9
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/input_testsequencefile-6-6716fa5aec125f0f8e9520e4318f71b9
+++ b/sql/hive/src/test/resources/golden/input_testsequencefile-6-6716fa5aec125f0f8e9520e4318f71b9
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/input_testxpath-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input_testxpath-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index c1a6abba038e7..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,11 +0,0 @@
-0	0	NULL
-2	1	NULL
-4	8	value_2
-6	27	NULL
-8	64	NULL
-10	125	NULL
-12	216	NULL
-14	343	NULL
-16	512	NULL
-18	729	NULL
-NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath2-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input_testxpath2-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 51645b2a07a39..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath2-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath3-1-807b097ac2f785f774db03069ebbde11 b/sql/hive/src/test/resources/golden/input_testxpath3-1-807b097ac2f785f774db03069ebbde11
deleted file mode 100644
index 373a573714f4e..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath3-1-807b097ac2f785f774db03069ebbde11
+++ /dev/null
@@ -1,11 +0,0 @@
-NULL	[0]
-NULL	[1]
-NULL	[4]
-NULL	[9]
-NULL	[16]
-NULL	[25]
-NULL	[36]
-NULL	[49]
-NULL	[64]
-value_9	[81]
-NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-0-73819ea1a7c0653a61652b3766afb003 b/sql/hive/src/test/resources/golden/input_testxpath4-0-73819ea1a7c0653a61652b3766afb003
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath4-0-73819ea1a7c0653a61652b3766afb003
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-2-e4beab1294dcac60ff36e4f2561810b9 b/sql/hive/src/test/resources/golden/input_testxpath4-2-e4beab1294dcac60ff36e4f2561810b9
deleted file mode 100644
index 7490d2d44d71a..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath4-2-e4beab1294dcac60ff36e4f2561810b9
+++ /dev/null
@@ -1,10 +0,0 @@
-NULL	[0]
-NULL	[1]
-NULL	[4]
-NULL	[9]
-NULL	[16]
-NULL	[25]
-NULL	[36]
-NULL	[49]
-NULL	[64]
-value_9	[81]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-3-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/input_testxpath4-3-ae225e86c2ae20519ffdf23190454161
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath4-3-ae225e86c2ae20519ffdf23190454161
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-5-e4beab1294dcac60ff36e4f2561810b9 b/sql/hive/src/test/resources/golden/input_testxpath4-5-e4beab1294dcac60ff36e4f2561810b9
deleted file mode 100644
index 7490d2d44d71a..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath4-5-e4beab1294dcac60ff36e4f2561810b9
+++ /dev/null
@@ -1,10 +0,0 @@
-NULL	[0]
-NULL	[1]
-NULL	[4]
-NULL	[9]
-NULL	[16]
-NULL	[25]
-NULL	[36]
-NULL	[49]
-NULL	[64]
-value_9	[81]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/inputddl2-2-7c1c0606b5ea8a1c90d46fe221993b57 b/sql/hive/src/test/resources/golden/inputddl2-2-7c1c0606b5ea8a1c90d46fe221993b57
index 679d54cb5cb5e..3577c8a431869 100644
--- a/sql/hive/src/test/resources/golden/inputddl2-2-7c1c0606b5ea8a1c90d46fe221993b57
+++ b/sql/hive/src/test/resources/golden/inputddl2-2-7c1c0606b5ea8a1c90d46fe221993b57
@@ -1,10 +1,10 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-country             	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+country             	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
diff --git a/sql/hive/src/test/resources/golden/inputddl3-2-73f945a673d2e388847c317f683f160c b/sql/hive/src/test/resources/golden/inputddl3-2-73f945a673d2e388847c317f683f160c
index ded361eb294f0..90f9bd0430a4c 100644
--- a/sql/hive/src/test/resources/golden/inputddl3-2-73f945a673d2e388847c317f683f160c
+++ b/sql/hive/src/test/resources/golden/inputddl3-2-73f945a673d2e388847c317f683f160c
@@ -1,2 +1,2 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/inputddl4-1-dd94576788fa50ebcf950cdf837fbcf6 b/sql/hive/src/test/resources/golden/inputddl4-1-dd94576788fa50ebcf950cdf837fbcf6
index d5a489a9a5ed5..454e27ff0c28d 100644
--- a/sql/hive/src/test/resources/golden/inputddl4-1-dd94576788fa50ebcf950cdf837fbcf6
+++ b/sql/hive/src/test/resources/golden/inputddl4-1-dd94576788fa50ebcf950cdf837fbcf6
@@ -1,15 +1,15 @@
-viewtime            	string              	None                
-userid              	int                 	None                
-page_url            	string              	None                
-referrer_url        	string              	None                
-friends             	array<bigint>       	None                
-properties          	map<string,string>  	None                
+viewtime            	string              	                    
+userid              	int                 	                    
+page_url            	string              	                    
+referrer_url        	string              	                    
+friends             	array<bigint>       	                    
+properties          	map<string,string>  	                    
 ip                  	string              	IP Address of the User
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-country             	string              	None                
\ No newline at end of file
+ds                  	string              	                    
+country             	string              	                    
diff --git a/sql/hive/src/test/resources/golden/inputddl4-2-7fdf00ff5c22ae284728e0f035396865 b/sql/hive/src/test/resources/golden/inputddl4-2-7fdf00ff5c22ae284728e0f035396865
index e0bedb0512cfc..0ea7cee2a9cf9 100644
--- a/sql/hive/src/test/resources/golden/inputddl4-2-7fdf00ff5c22ae284728e0f035396865
+++ b/sql/hive/src/test/resources/golden/inputddl4-2-7fdf00ff5c22ae284728e0f035396865
@@ -1,17 +1,17 @@
-viewtime            	string              	None                
-userid              	int                 	None                
-page_url            	string              	None                
-referrer_url        	string              	None                
-friends             	array<bigint>       	None                
-properties          	map<string,string>  	None                
+viewtime            	string              	                    
+userid              	int                 	                    
+page_url            	string              	                    
+referrer_url        	string              	                    
+friends             	array<bigint>       	                    
+properties          	map<string,string>  	                    
 ip                  	string              	IP Address of the User
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:inputddl4, dbName:default, owner:marmbrus, createTime:1389731336, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:viewtime, type:string, comment:null), FieldSchema(name:userid, type:int, comment:null), FieldSchema(name:page_url, type:string, comment:null), FieldSchema(name:referrer_url, type:string, comment:null), FieldSchema(name:friends, type:array<bigint>, comment:null), FieldSchema(name:properties, type:map<string,string>, comment:null), FieldSchema(name:ip, type:string, comment:IP Address of the User), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/inputddl4, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:32, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[userid], sortCols:[Order(col:viewtime, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], parameters:{transient_lastDdlTime=1389731336, comment=This is the page view table}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:inputddl4, dbName:default, owner:marmbrus, createTime:1413882343, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:viewtime, type:string, comment:null), FieldSchema(name:userid, type:int, comment:null), FieldSchema(name:page_url, type:string, comment:null), FieldSchema(name:referrer_url, type:string, comment:null), FieldSchema(name:friends, type:array<bigint>, comment:null), FieldSchema(name:properties, type:map<string,string>, comment:null), FieldSchema(name:ip, type:string, comment:IP Address of the User), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/inputddl4, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:32, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[userid], sortCols:[Order(col:viewtime, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], parameters:{transient_lastDdlTime=1413882343, comment=This is the page view table}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/inputddl6-1-572c1abb70f09726d1ba77bdc884597b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/inputddl6-1-572c1abb70f09726d1ba77bdc884597b
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/inputddl6-2-3a4def4b370f75c5fcc1174626490363
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/inputddl6-2-3a4def4b370f75c5fcc1174626490363
diff --git a/sql/hive/src/test/resources/golden/inputddl6-3-e81b962986706e1c16f059b407e3f05c b/sql/hive/src/test/resources/golden/inputddl6-3-e81b962986706e1c16f059b407e3f05c
index f1427896e8330..a8a418d9736d9 100644
--- a/sql/hive/src/test/resources/golden/inputddl6-3-e81b962986706e1c16f059b407e3f05c
+++ b/sql/hive/src/test/resources/golden/inputddl6-3-e81b962986706e1c16f059b407e3f05c
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:inputddl6, dbName:default, owner:marmbrus, createTime:1389731342, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/inputddl6, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=2, numFiles=2, transient_lastDdlTime=1389731342, numRows=0, totalSize=11624, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:inputddl6, dbName:default, owner:marmbrus, createTime:1413882344, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/inputddl6, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1413882344}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/inputddl6-4-5855e2998e26f63e927854afa86c1f03 b/sql/hive/src/test/resources/golden/inputddl6-4-5855e2998e26f63e927854afa86c1f03
index 822897217e867..05507162a9244 100644
--- a/sql/hive/src/test/resources/golden/inputddl6-4-5855e2998e26f63e927854afa86c1f03
+++ b/sql/hive/src/test/resources/golden/inputddl6-4-5855e2998e26f63e927854afa86c1f03
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2008-04-08], dbName:default, tableName:inputddl6, createTime:1389731342, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/inputddl6/ds=2008-04-08, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1389731342, numRows=0, totalSize=5812, rawDataSize=0})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2008-04-08], dbName:default, tableName:inputddl6, createTime:1413882344, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/inputddl6/ds=2008-04-08, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413882344, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=0, rawDataSize=0})	
diff --git a/sql/hive/src/test/resources/golden/inputddl6-5-47e6a8d33932cb014830812e1f7b1f94 b/sql/hive/src/test/resources/golden/inputddl6-5-47e6a8d33932cb014830812e1f7b1f94
index 94bcaaee2408c..6e2459ea22ee1 100644
--- a/sql/hive/src/test/resources/golden/inputddl6-5-47e6a8d33932cb014830812e1f7b1f94
+++ b/sql/hive/src/test/resources/golden/inputddl6-5-47e6a8d33932cb014830812e1f7b1f94
@@ -1,2 +1,2 @@
 ds=2008-04-08
-ds=2008-04-09
\ No newline at end of file
+ds=2008-04-09
diff --git a/sql/hive/src/test/resources/golden/inputddl6-7-47e6a8d33932cb014830812e1f7b1f94 b/sql/hive/src/test/resources/golden/inputddl6-7-47e6a8d33932cb014830812e1f7b1f94
index b12a9f82cd90a..017a142ab30b7 100644
--- a/sql/hive/src/test/resources/golden/inputddl6-7-47e6a8d33932cb014830812e1f7b1f94
+++ b/sql/hive/src/test/resources/golden/inputddl6-7-47e6a8d33932cb014830812e1f7b1f94
@@ -1 +1 @@
-ds=2008-04-09
\ No newline at end of file
+ds=2008-04-09
diff --git a/sql/hive/src/test/resources/golden/inputddl6-8-f9e6ee98eb448f9ab68fa77bea027aa5 b/sql/hive/src/test/resources/golden/inputddl6-8-f9e6ee98eb448f9ab68fa77bea027aa5
index a6c282ab6f573..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/inputddl6-8-f9e6ee98eb448f9ab68fa77bea027aa5
+++ b/sql/hive/src/test/resources/golden/inputddl6-8-f9e6ee98eb448f9ab68fa77bea027aa5
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_DESCTABLE (TOK_TABTYPE INPUTDDL6 (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-09'))) EXTENDED)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Describe Table Operator:
-        Describe Table
-          partition:
-            ds 2008-04-09
-          table: INPUTDDL6
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-5-75f428bb2aa8624ac08095cdfd7a6993 b/sql/hive/src/test/resources/golden/inputddl7-1-7195712efb4910294f63303ebce24453
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-5-75f428bb2aa8624ac08095cdfd7a6993
rename to sql/hive/src/test/resources/golden/inputddl7-1-7195712efb4910294f63303ebce24453
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/inputddl7-10-7c9248b56948716913d332bd712d69bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/inputddl7-10-7c9248b56948716913d332bd712d69bd
diff --git a/sql/hive/src/test/resources/golden/inputddl7-11-6e1f1709d770ed76aee6ff5f76856e63 b/sql/hive/src/test/resources/golden/inputddl7-11-6e1f1709d770ed76aee6ff5f76856e63
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-11-6e1f1709d770ed76aee6ff5f76856e63
+++ b/sql/hive/src/test/resources/golden/inputddl7-11-6e1f1709d770ed76aee6ff5f76856e63
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/inputddl7-12-2c56d4a781242b0521f82bb0d2cd277 b/sql/hive/src/test/resources/golden/inputddl7-12-2c56d4a781242b0521f82bb0d2cd277
index ef633a4aa09e0..edfcdbb1211bb 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-12-2c56d4a781242b0521f82bb0d2cd277
+++ b/sql/hive/src/test/resources/golden/inputddl7-12-2c56d4a781242b0521f82bb0d2cd277
@@ -1,3 +1,3 @@
-name                	string              	None                
+name                	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:t1, dbName:default, owner:marmbrus, createTime:1389731349, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/t1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numPartitions=0, numFiles=1, transient_lastDdlTime=1389731349, numRows=0, totalSize=5812, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:t1, dbName:default, owner:marmbrus, createTime:1413882345, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/t1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1413882345, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=0, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/inputddl7-13-45059a21c202b4658285738ee62a018a b/sql/hive/src/test/resources/golden/inputddl7-13-45059a21c202b4658285738ee62a018a
index 86d5d8a125fbe..86c9b459e36c5 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-13-45059a21c202b4658285738ee62a018a
+++ b/sql/hive/src/test/resources/golden/inputddl7-13-45059a21c202b4658285738ee62a018a
@@ -1,3 +1,3 @@
-name                	string              	None                
+name                	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:t2, dbName:default, owner:marmbrus, createTime:1389731362, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/t2, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numPartitions=0, numFiles=1, transient_lastDdlTime=1389731362, numRows=0, totalSize=10508, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:t2, dbName:default, owner:marmbrus, createTime:1413882355, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/t2, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1413882355, COLUMN_STATS_ACCURATE=true, totalSize=10508, numRows=0, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/inputddl7-14-30c87bc734c2afa4fea0facdf7279145 b/sql/hive/src/test/resources/golden/inputddl7-14-30c87bc734c2afa4fea0facdf7279145
index b14d78536150c..04956c2c1c447 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-14-30c87bc734c2afa4fea0facdf7279145
+++ b/sql/hive/src/test/resources/golden/inputddl7-14-30c87bc734c2afa4fea0facdf7279145
@@ -1,9 +1,9 @@
-name                	string              	None                
-ds                  	string              	None                
+name                	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2008-04-09], dbName:default, tableName:t3, createTime:1389731375, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/t3/ds=2008-04-09, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1389731375, numRows=0, totalSize=5812, rawDataSize=0})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2008-04-09], dbName:default, tableName:t3, createTime:1413882365, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/t3/ds=2008-04-09, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413882365, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=0, rawDataSize=0})	
diff --git a/sql/hive/src/test/resources/golden/inputddl7-15-fed9badc255db68c3ed1cd1446d9fbe b/sql/hive/src/test/resources/golden/inputddl7-15-fed9badc255db68c3ed1cd1446d9fbe
index e476a7fa3959b..76ecadd3851a6 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-15-fed9badc255db68c3ed1cd1446d9fbe
+++ b/sql/hive/src/test/resources/golden/inputddl7-15-fed9badc255db68c3ed1cd1446d9fbe
@@ -1,9 +1,9 @@
-name                	string              	None                
-ds                  	string              	None                
+name                	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2008-04-09], dbName:default, tableName:t4, createTime:1389731388, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/t4/ds=2008-04-09, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1389731388, numRows=0, totalSize=10508, rawDataSize=0})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2008-04-09], dbName:default, tableName:t4, createTime:1413882375, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/t4/ds=2008-04-09, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413882375, COLUMN_STATS_ACCURATE=true, totalSize=10508, numRows=0, rawDataSize=0})	
diff --git a/sql/hive/src/test/resources/golden/inputddl7-2-7bc7f4f4a29dc0721ad4bb6fb9b64291 b/sql/hive/src/test/resources/golden/inputddl7-2-7bc7f4f4a29dc0721ad4bb6fb9b64291
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-2-7bc7f4f4a29dc0721ad4bb6fb9b64291
+++ b/sql/hive/src/test/resources/golden/inputddl7-2-7bc7f4f4a29dc0721ad4bb6fb9b64291
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/inputddl7-4-68715ba2c11220be62394c86453e6d54
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/inputddl7-4-68715ba2c11220be62394c86453e6d54
diff --git a/sql/hive/src/test/resources/golden/inputddl7-5-e3c5d1248a06e6b33b15fc6ec8c67f68 b/sql/hive/src/test/resources/golden/inputddl7-5-e3c5d1248a06e6b33b15fc6ec8c67f68
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-5-e3c5d1248a06e6b33b15fc6ec8c67f68
+++ b/sql/hive/src/test/resources/golden/inputddl7-5-e3c5d1248a06e6b33b15fc6ec8c67f68
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/inputddl7-7-59dd2d2556769e19bdc0a444f40f8a71
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/inputddl7-7-59dd2d2556769e19bdc0a444f40f8a71
diff --git a/sql/hive/src/test/resources/golden/inputddl7-8-495dc87b0bde752c890f213ff9531508 b/sql/hive/src/test/resources/golden/inputddl7-8-495dc87b0bde752c890f213ff9531508
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-8-495dc87b0bde752c890f213ff9531508
+++ b/sql/hive/src/test/resources/golden/inputddl7-8-495dc87b0bde752c890f213ff9531508
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/inputddl8-1-c70f2d2544633366b76b92bcff18e995 b/sql/hive/src/test/resources/golden/inputddl8-1-c70f2d2544633366b76b92bcff18e995
index 66e6efceed3dc..5166f3678f405 100644
--- a/sql/hive/src/test/resources/golden/inputddl8-1-c70f2d2544633366b76b92bcff18e995
+++ b/sql/hive/src/test/resources/golden/inputddl8-1-c70f2d2544633366b76b92bcff18e995
@@ -4,13 +4,13 @@ lint                	array<int>          	from deserializer
 lstring             	array<string>       	from deserializer   
 lintstring          	array<struct<myint:int,mystring:string,underscore_int:int>>	from deserializer   
 mstringstring       	map<string,string>  	from deserializer   
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:inputddl8, dbName:default, owner:marmbrus, createTime:1389731407, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/inputddl8, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:32, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.Complex, serialization.format=com.facebook.thrift.protocol.TBinaryProtocol}), bucketCols:[aint], sortCols:[Order(col:lint, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], parameters:{transient_lastDdlTime=1389731407, comment=This is a thrift based table}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:inputddl8, dbName:default, owner:marmbrus, createTime:1413882387, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/inputddl8, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:32, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.Complex, serialization.format=com.facebook.thrift.protocol.TBinaryProtocol}), bucketCols:[aint], sortCols:[Order(col:lint, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], parameters:{transient_lastDdlTime=1413882387, comment=This is a thrift based table}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/insert2_overwrite_partitions-3-86653b3af59df59f225ee00ff5fc119f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/insert2_overwrite_partitions-3-86653b3af59df59f225ee00ff5fc119f
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/insert2_overwrite_partitions-4-e81d45a5bec5642ec4b762f1c1a482af
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/insert2_overwrite_partitions-4-e81d45a5bec5642ec4b762f1c1a482af
diff --git a/sql/hive/src/test/resources/golden/insert_compressed-0-ea607fbed28d20e5726f4501285d698d b/sql/hive/src/test/resources/golden/insert_compressed-0-ea607fbed28d20e5726f4501285d698d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/insert_compressed-0-ea607fbed28d20e5726f4501285d698d
+++ b/sql/hive/src/test/resources/golden/insert_compressed-0-ea607fbed28d20e5726f4501285d698d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/insert_compressed-4-5133d2457097962811a2adf0ecd9e4ef b/sql/hive/src/test/resources/golden/insert_compressed-4-5133d2457097962811a2adf0ecd9e4ef
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/insert_compressed-4-5133d2457097962811a2adf0ecd9e4ef
+++ b/sql/hive/src/test/resources/golden/insert_compressed-4-5133d2457097962811a2adf0ecd9e4ef
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/insert_compressed-6-5133d2457097962811a2adf0ecd9e4ef b/sql/hive/src/test/resources/golden/insert_compressed-6-5133d2457097962811a2adf0ecd9e4ef
index e37d32abba426..83b33d238dab9 100644
--- a/sql/hive/src/test/resources/golden/insert_compressed-6-5133d2457097962811a2adf0ecd9e4ef
+++ b/sql/hive/src/test/resources/golden/insert_compressed-6-5133d2457097962811a2adf0ecd9e4ef
@@ -1 +1 @@
-1000
\ No newline at end of file
+1000
diff --git a/sql/hive/src/test/resources/golden/insert_compressed-8-5133d2457097962811a2adf0ecd9e4ef b/sql/hive/src/test/resources/golden/insert_compressed-8-5133d2457097962811a2adf0ecd9e4ef
index 37021f4a27201..3d86ec6498f3f 100644
--- a/sql/hive/src/test/resources/golden/insert_compressed-8-5133d2457097962811a2adf0ecd9e4ef
+++ b/sql/hive/src/test/resources/golden/insert_compressed-8-5133d2457097962811a2adf0ecd9e4ef
@@ -1 +1 @@
-1500
\ No newline at end of file
+1500
diff --git a/sql/hive/src/test/resources/golden/insert_into1-11-41015d6409c5ebf670eed4999157fdb b/sql/hive/src/test/resources/golden/insert_into1-11-41015d6409c5ebf670eed4999157fdb
deleted file mode 100644
index 5e96d815b6b78..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into1-11-41015d6409c5ebf670eed4999157fdb
+++ /dev/null
@@ -1 +0,0 @@
--826625916
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into1-4-41015d6409c5ebf670eed4999157fdb b/sql/hive/src/test/resources/golden/insert_into1-4-41015d6409c5ebf670eed4999157fdb
deleted file mode 100644
index eb9dc5833c2f9..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into1-4-41015d6409c5ebf670eed4999157fdb
+++ /dev/null
@@ -1 +0,0 @@
-10226524244
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into1-7-41015d6409c5ebf670eed4999157fdb b/sql/hive/src/test/resources/golden/insert_into1-7-41015d6409c5ebf670eed4999157fdb
deleted file mode 100644
index 28ced898ab537..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into1-7-41015d6409c5ebf670eed4999157fdb
+++ /dev/null
@@ -1 +0,0 @@
-20453048488
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into1-8-2a2bd9dc41eaa041aff7eca70a72cb0d b/sql/hive/src/test/resources/golden/insert_into1-8-2a2bd9dc41eaa041aff7eca70a72cb0d
deleted file mode 100644
index ae4ee13c08e76..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into1-8-2a2bd9dc41eaa041aff7eca70a72cb0d
+++ /dev/null
@@ -1 +0,0 @@
-200
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into2-12-4cf03cb3982a457f2f72220265ecc844 b/sql/hive/src/test/resources/golden/insert_into2-12-4cf03cb3982a457f2f72220265ecc844
deleted file mode 100644
index 84d7f3929d86d..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into2-12-4cf03cb3982a457f2f72220265ecc844
+++ /dev/null
@@ -1 +0,0 @@
--27100860056
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into2-5-de6e50a2ae0ab5a9c466998b57f86b08 b/sql/hive/src/test/resources/golden/insert_into2-5-de6e50a2ae0ab5a9c466998b57f86b08
deleted file mode 100644
index ae4ee13c08e76..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into2-5-de6e50a2ae0ab5a9c466998b57f86b08
+++ /dev/null
@@ -1 +0,0 @@
-200
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into2-6-4cf03cb3982a457f2f72220265ecc844 b/sql/hive/src/test/resources/golden/insert_into2-6-4cf03cb3982a457f2f72220265ecc844
deleted file mode 100644
index 3395f3bcc7b51..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into2-6-4cf03cb3982a457f2f72220265ecc844
+++ /dev/null
@@ -1 +0,0 @@
--24159954504
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into2-9-4cf03cb3982a457f2f72220265ecc844 b/sql/hive/src/test/resources/golden/insert_into2-9-4cf03cb3982a457f2f72220265ecc844
deleted file mode 100644
index ee0a47c9f6e00..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into2-9-4cf03cb3982a457f2f72220265ecc844
+++ /dev/null
@@ -1 +0,0 @@
--36239931656
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-10-40a108b4c89bc5d6424f21f3b8a2f5e7 b/sql/hive/src/test/resources/golden/insert_into3-10-40a108b4c89bc5d6424f21f3b8a2f5e7
deleted file mode 100644
index 5e96d815b6b78..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into3-10-40a108b4c89bc5d6424f21f3b8a2f5e7
+++ /dev/null
@@ -1 +0,0 @@
--826625916
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-11-9cfd2d054f84262eb74a870b6365db87 b/sql/hive/src/test/resources/golden/insert_into3-11-9cfd2d054f84262eb74a870b6365db87
deleted file mode 100644
index 2ed5a7da11dcd..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into3-11-9cfd2d054f84262eb74a870b6365db87
+++ /dev/null
@@ -1 +0,0 @@
-9399898328
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-6-40a108b4c89bc5d6424f21f3b8a2f5e7 b/sql/hive/src/test/resources/golden/insert_into3-6-40a108b4c89bc5d6424f21f3b8a2f5e7
deleted file mode 100644
index 00ffdd24b0cff..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into3-6-40a108b4c89bc5d6424f21f3b8a2f5e7
+++ /dev/null
@@ -1 +0,0 @@
-7813690682
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-7-9cfd2d054f84262eb74a870b6365db87 b/sql/hive/src/test/resources/golden/insert_into3-7-9cfd2d054f84262eb74a870b6365db87
deleted file mode 100644
index eb9dc5833c2f9..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into3-7-9cfd2d054f84262eb74a870b6365db87
+++ /dev/null
@@ -1 +0,0 @@
-10226524244
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into4-0-b4831621b2a02fc4e8e655b03c289310 b/sql/hive/src/test/resources/golden/insert_into4-0-b4831621b2a02fc4e8e655b03c289310
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into4-0-b4831621b2a02fc4e8e655b03c289310
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into4-10-a8016ca816f7b362cf3a0384ca91b159 b/sql/hive/src/test/resources/golden/insert_into4-10-a8016ca816f7b362cf3a0384ca91b159
deleted file mode 100644
index e2954bd63682e..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into4-10-a8016ca816f7b362cf3a0384ca91b159
+++ /dev/null
@@ -1 +0,0 @@
--1653251832
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into4-13-a5bb41af609f38f01d8c71334cc8d71b b/sql/hive/src/test/resources/golden/insert_into4-13-a5bb41af609f38f01d8c71334cc8d71b
deleted file mode 100644
index e2954bd63682e..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into4-13-a5bb41af609f38f01d8c71334cc8d71b
+++ /dev/null
@@ -1 +0,0 @@
--1653251832
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into4-7-a8016ca816f7b362cf3a0384ca91b159 b/sql/hive/src/test/resources/golden/insert_into4-7-a8016ca816f7b362cf3a0384ca91b159
deleted file mode 100644
index 5e96d815b6b78..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into4-7-a8016ca816f7b362cf3a0384ca91b159
+++ /dev/null
@@ -1 +0,0 @@
--826625916
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into5-12-a49d025c7556a83f301b6ecf9ceb2ce7 b/sql/hive/src/test/resources/golden/insert_into5-12-a49d025c7556a83f301b6ecf9ceb2ce7
deleted file mode 100644
index 3b6ef434b37e7..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into5-12-a49d025c7556a83f301b6ecf9ceb2ce7
+++ /dev/null
@@ -1 +0,0 @@
--18626052920
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into5-15-a49d025c7556a83f301b6ecf9ceb2ce7 b/sql/hive/src/test/resources/golden/insert_into5-15-a49d025c7556a83f301b6ecf9ceb2ce7
deleted file mode 100644
index 0744f3dae0e0a..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into5-15-a49d025c7556a83f301b6ecf9ceb2ce7
+++ /dev/null
@@ -1 +0,0 @@
--37252105840
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into5-6-a058ba199b9777d48b6c6595f2388533 b/sql/hive/src/test/resources/golden/insert_into5-6-a058ba199b9777d48b6c6595f2388533
deleted file mode 100644
index c0066b75af40e..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into5-6-a058ba199b9777d48b6c6595f2388533
+++ /dev/null
@@ -1 +0,0 @@
-481928560
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into5-9-a058ba199b9777d48b6c6595f2388533 b/sql/hive/src/test/resources/golden/insert_into5-9-a058ba199b9777d48b6c6595f2388533
deleted file mode 100644
index 0b4a44e064f85..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into5-9-a058ba199b9777d48b6c6595f2388533
+++ /dev/null
@@ -1 +0,0 @@
-963857120
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into6-0-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/insert_into6-0-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into6-0-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into6-1-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/insert_into6-1-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into6-1-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into6-12-29afaab2cf10eaa65afaa6c0fcca0902 b/sql/hive/src/test/resources/golden/insert_into6-12-29afaab2cf10eaa65afaa6c0fcca0902
deleted file mode 100644
index d541b5d207233..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into6-12-29afaab2cf10eaa65afaa6c0fcca0902
+++ /dev/null
@@ -1 +0,0 @@
--35226404960
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into6-13-b357a845fb5f668b09dea94c6d2e7d66 b/sql/hive/src/test/resources/golden/insert_into6-13-b357a845fb5f668b09dea94c6d2e7d66
deleted file mode 100644
index c21f4017362c1..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into6-13-b357a845fb5f668b09dea94c6d2e7d66
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=1
-ds=2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into6-9-3ccb7bc735e406ad1723c758f01bcaab b/sql/hive/src/test/resources/golden/insert_into6-9-3ccb7bc735e406ad1723c758f01bcaab
deleted file mode 100644
index d541b5d207233..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into6-9-3ccb7bc735e406ad1723c758f01bcaab
+++ /dev/null
@@ -1 +0,0 @@
--35226404960
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insertexternal1-1-6d1b6c44f9f52ec67f9d4a3cdf580a1a b/sql/hive/src/test/resources/golden/insertexternal1-1-6d1b6c44f9f52ec67f9d4a3cdf580a1a
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/insertexternal1-1-6d1b6c44f9f52ec67f9d4a3cdf580a1a
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/insertexternal1-2-bc513eeb5cbba0b15c8f425b9cc3cd7b b/sql/hive/src/test/resources/golden/insertexternal1-2-bc513eeb5cbba0b15c8f425b9cc3cd7b
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/insertexternal1-2-bc513eeb5cbba0b15c8f425b9cc3cd7b
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/insertexternal1-3-f64289fb03ab105e12659fc3972ca241 b/sql/hive/src/test/resources/golden/insertexternal1-3-f64289fb03ab105e12659fc3972ca241
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/insertexternal1-3-f64289fb03ab105e12659fc3972ca241
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/join0-2-52055f2dce8eac4e5249d02c42c0da87 b/sql/hive/src/test/resources/golden/join0-2-52055f2dce8eac4e5249d02c42c0da87
index 5707ed08e7e54..13e88f30fc08c 100644
--- a/sql/hive/src/test/resources/golden/join0-2-52055f2dce8eac4e5249d02c42c0da87
+++ b/sql/hive/src/test/resources/golden/join0-2-52055f2dce8eac4e5249d02c42c0da87
@@ -97,4 +97,4 @@
 9	val_9	5	val_5
 9	val_9	5	val_5
 9	val_9	8	val_8
-9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join1-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/join1-0-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join1-0-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/join1-0-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join1-4-35c7611180562dcd9dab834f41654095 b/sql/hive/src/test/resources/golden/join1-4-35c7611180562dcd9dab834f41654095
index 59821aeea3008..c9c79b85dfe24 100644
--- a/sql/hive/src/test/resources/golden/join1-4-35c7611180562dcd9dab834f41654095
+++ b/sql/hive/src/test/resources/golden/join1-4-35c7611180562dcd9dab834f41654095
@@ -1025,4 +1025,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join10-1-73136f8e6e9ba82f75570afd15c2828d b/sql/hive/src/test/resources/golden/join10-1-73136f8e6e9ba82f75570afd15c2828d
index 59821aeea3008..c9c79b85dfe24 100644
--- a/sql/hive/src/test/resources/golden/join10-1-73136f8e6e9ba82f75570afd15c2828d
+++ b/sql/hive/src/test/resources/golden/join10-1-73136f8e6e9ba82f75570afd15c2828d
@@ -1025,4 +1025,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join11-1-b580f87daf1763cd8c5e59ad5b271232 b/sql/hive/src/test/resources/golden/join11-1-b580f87daf1763cd8c5e59ad5b271232
index c888cfd6a0479..e9016953d4e48 100644
--- a/sql/hive/src/test/resources/golden/join11-1-b580f87daf1763cd8c5e59ad5b271232
+++ b/sql/hive/src/test/resources/golden/join11-1-b580f87daf1763cd8c5e59ad5b271232
@@ -145,4 +145,4 @@
 90	val_90
 90	val_90
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join12-1-496c8aabaf3261e8fefd7b357f2ac7f b/sql/hive/src/test/resources/golden/join12-1-496c8aabaf3261e8fefd7b357f2ac7f
index 1780076ae513f..598608f124d38 100644
--- a/sql/hive/src/test/resources/golden/join12-1-496c8aabaf3261e8fefd7b357f2ac7f
+++ b/sql/hive/src/test/resources/golden/join12-1-496c8aabaf3261e8fefd7b357f2ac7f
@@ -229,4 +229,4 @@
 37	val_37
 37	val_37
 37	val_37
-37	val_37
\ No newline at end of file
+37	val_37
diff --git a/sql/hive/src/test/resources/golden/join13-1-696b36d15c4358145f77c2b15b7507d5 b/sql/hive/src/test/resources/golden/join13-1-696b36d15c4358145f77c2b15b7507d5
index a82a9a22c6b87..f9890a4c2d194 100644
--- a/sql/hive/src/test/resources/golden/join13-1-696b36d15c4358145f77c2b15b7507d5
+++ b/sql/hive/src/test/resources/golden/join13-1-696b36d15c4358145f77c2b15b7507d5
@@ -161,4 +161,4 @@
 90	val_90
 90	val_90
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join14-1-9b141c1e5917ca82c6bc36a9a2950a1e b/sql/hive/src/test/resources/golden/join14-1-9b141c1e5917ca82c6bc36a9a2950a1e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14-1-9b141c1e5917ca82c6bc36a9a2950a1e
+++ b/sql/hive/src/test/resources/golden/join14-1-9b141c1e5917ca82c6bc36a9a2950a1e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14-2-2b9ccaa793eae0e73bf76335d3d6880 b/sql/hive/src/test/resources/golden/join14-2-2b9ccaa793eae0e73bf76335d3d6880
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14-2-2b9ccaa793eae0e73bf76335d3d6880
+++ b/sql/hive/src/test/resources/golden/join14-2-2b9ccaa793eae0e73bf76335d3d6880
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14-3-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/join14-3-bab89dfffa77258e34a595e0e79986e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14-3-bab89dfffa77258e34a595e0e79986e3
+++ b/sql/hive/src/test/resources/golden/join14-3-bab89dfffa77258e34a595e0e79986e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14-4-27f1a57fbb815d169af86ae2f8305cb6 b/sql/hive/src/test/resources/golden/join14-4-27f1a57fbb815d169af86ae2f8305cb6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14-4-27f1a57fbb815d169af86ae2f8305cb6
+++ b/sql/hive/src/test/resources/golden/join14-4-27f1a57fbb815d169af86ae2f8305cb6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14-7-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3 b/sql/hive/src/test/resources/golden/join14-7-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
index 941e6bb7c887e..bd6afa8dada84 100644
--- a/sql/hive/src/test/resources/golden/join14-7-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
+++ b/sql/hive/src/test/resources/golden/join14-7-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
@@ -1749,4 +1749,4 @@
 403	val_403
 400	val_400
 200	val_200
-200	val_200
\ No newline at end of file
+200	val_200
diff --git a/sql/hive/src/test/resources/golden/join14_hadoop20-1-db1cd54a4cb36de2087605f32e41824f b/sql/hive/src/test/resources/golden/join14_hadoop20-1-db1cd54a4cb36de2087605f32e41824f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14_hadoop20-1-db1cd54a4cb36de2087605f32e41824f
+++ b/sql/hive/src/test/resources/golden/join14_hadoop20-1-db1cd54a4cb36de2087605f32e41824f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14_hadoop20-2-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/join14_hadoop20-2-bab89dfffa77258e34a595e0e79986e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14_hadoop20-2-bab89dfffa77258e34a595e0e79986e3
+++ b/sql/hive/src/test/resources/golden/join14_hadoop20-2-bab89dfffa77258e34a595e0e79986e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14_hadoop20-5-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3 b/sql/hive/src/test/resources/golden/join14_hadoop20-5-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
index 941e6bb7c887e..bd6afa8dada84 100644
--- a/sql/hive/src/test/resources/golden/join14_hadoop20-5-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
+++ b/sql/hive/src/test/resources/golden/join14_hadoop20-5-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
@@ -1749,4 +1749,4 @@
 403	val_403
 400	val_400
 200	val_200
-200	val_200
\ No newline at end of file
+200	val_200
diff --git a/sql/hive/src/test/resources/golden/join15-1-81d76d3bf59889b07b413b6f88772667 b/sql/hive/src/test/resources/golden/join15-1-81d76d3bf59889b07b413b6f88772667
index c2e3ea8b0c8e2..b212e93a0a8c2 100644
--- a/sql/hive/src/test/resources/golden/join15-1-81d76d3bf59889b07b413b6f88772667
+++ b/sql/hive/src/test/resources/golden/join15-1-81d76d3bf59889b07b413b6f88772667
@@ -1025,4 +1025,4 @@
 498	val_498	498	val_498
 498	val_498	498	val_498
 498	val_498	498	val_498
-498	val_498	498	val_498
\ No newline at end of file
+498	val_498	498	val_498
diff --git a/sql/hive/src/test/resources/golden/join17-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join17-3-adc1ec67836b26b60d8547c4996bfd8f
index 66fafbdf72b63..3df4716f0b05f 100644
--- a/sql/hive/src/test/resources/golden/join17-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join17-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -1025,4 +1025,4 @@
 200	val_200	200	val_200
 200	val_200	200	val_200
 97	val_97	97	val_97
-97	val_97	97	val_97
\ No newline at end of file
+97	val_97	97	val_97
diff --git a/sql/hive/src/test/resources/golden/join18-1-49f8ba8d43a6bb1d892ba66a812304f5 b/sql/hive/src/test/resources/golden/join18-1-49f8ba8d43a6bb1d892ba66a812304f5
index 10628f6c64bd4..947ed199b0d4d 100644
--- a/sql/hive/src/test/resources/golden/join18-1-49f8ba8d43a6bb1d892ba66a812304f5
+++ b/sql/hive/src/test/resources/golden/join18-1-49f8ba8d43a6bb1d892ba66a812304f5
@@ -307,4 +307,4 @@ NULL	NULL	NULL	7
 495	1	NULL	NULL
 496	1	NULL	NULL
 497	1	NULL	NULL
-498	3	NULL	NULL
\ No newline at end of file
+498	3	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join2-3-cac2c9e0f8601dd56822c990774e0696 b/sql/hive/src/test/resources/golden/join2-3-cac2c9e0f8601dd56822c990774e0696
index 2945c31ed4f00..12a176c7bc7cb 100644
--- a/sql/hive/src/test/resources/golden/join2-3-cac2c9e0f8601dd56822c990774e0696
+++ b/sql/hive/src/test/resources/golden/join2-3-cac2c9e0f8601dd56822c990774e0696
@@ -469,4 +469,4 @@
 200	val_400
 200	val_400
 97	val_194
-97	val_194
\ No newline at end of file
+97	val_194
diff --git a/sql/hive/src/test/resources/golden/join20-1-9685f2d31ffe922d3ea217de32ca3049 b/sql/hive/src/test/resources/golden/join20-1-9685f2d31ffe922d3ea217de32ca3049
index bd6e20b8de1e4..7568f8cde28f5 100644
--- a/sql/hive/src/test/resources/golden/join20-1-9685f2d31ffe922d3ea217de32ca3049
+++ b/sql/hive/src/test/resources/golden/join20-1-9685f2d31ffe922d3ea217de32ca3049
@@ -545,4 +545,4 @@ NULL	NULL	NULL	NULL	498	val_498
 5	val_5	5	val_5	5	val_5
 5	val_5	5	val_5	5	val_5
 8	val_8	8	val_8	8	val_8
-9	val_9	9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join20-3-3331a020843caccf2fa32a1225c5c3a9 b/sql/hive/src/test/resources/golden/join20-3-3331a020843caccf2fa32a1225c5c3a9
index bd6e20b8de1e4..7568f8cde28f5 100644
--- a/sql/hive/src/test/resources/golden/join20-3-3331a020843caccf2fa32a1225c5c3a9
+++ b/sql/hive/src/test/resources/golden/join20-3-3331a020843caccf2fa32a1225c5c3a9
@@ -545,4 +545,4 @@ NULL	NULL	NULL	NULL	498	val_498
 5	val_5	5	val_5	5	val_5
 5	val_5	5	val_5	5	val_5
 8	val_8	8	val_8	8	val_8
-9	val_9	9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join21-1-3536b7d78713e86ee67f5f6c9b88958f b/sql/hive/src/test/resources/golden/join21-1-3536b7d78713e86ee67f5f6c9b88958f
index 9672e21fa0323..80c230cf4a09d 100644
--- a/sql/hive/src/test/resources/golden/join21-1-3536b7d78713e86ee67f5f6c9b88958f
+++ b/sql/hive/src/test/resources/golden/join21-1-3536b7d78713e86ee67f5f6c9b88958f
@@ -497,4 +497,4 @@ NULL	NULL	NULL	NULL	496	val_496
 NULL	NULL	NULL	NULL	497	val_497
 NULL	NULL	NULL	NULL	498	val_498
 NULL	NULL	NULL	NULL	498	val_498
-NULL	NULL	NULL	NULL	498	val_498
\ No newline at end of file
+NULL	NULL	NULL	NULL	498	val_498
diff --git a/sql/hive/src/test/resources/golden/join23-1-91b8e7fe75a7e3ba8147c56734436681 b/sql/hive/src/test/resources/golden/join23-1-91b8e7fe75a7e3ba8147c56734436681
index 5707ed08e7e54..13e88f30fc08c 100644
--- a/sql/hive/src/test/resources/golden/join23-1-91b8e7fe75a7e3ba8147c56734436681
+++ b/sql/hive/src/test/resources/golden/join23-1-91b8e7fe75a7e3ba8147c56734436681
@@ -97,4 +97,4 @@
 9	val_9	5	val_5
 9	val_9	5	val_5
 9	val_9	8	val_8
-9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join24-2-d79325ef6494aa87843fdfd78de7c812 b/sql/hive/src/test/resources/golden/join24-2-d79325ef6494aa87843fdfd78de7c812
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/join24-2-d79325ef6494aa87843fdfd78de7c812
+++ b/sql/hive/src/test/resources/golden/join24-2-d79325ef6494aa87843fdfd78de7c812
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/join25-0-8934d9b821aa4b34b760f73eff56cd06 b/sql/hive/src/test/resources/golden/join25-0-8934d9b821aa4b34b760f73eff56cd06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join25-0-8934d9b821aa4b34b760f73eff56cd06
+++ b/sql/hive/src/test/resources/golden/join25-0-8934d9b821aa4b34b760f73eff56cd06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join25-4-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join25-4-c9859bf9c9942c59f3b19d82bd1a3afa
index 183353e5c705b..d3d377e8ae74d 100644
--- a/sql/hive/src/test/resources/golden/join25-4-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join25-4-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -34,4 +34,4 @@
 406	val_406	val_406
 406	val_406	val_406
 406	val_406	val_406
-406	val_406	val_406
\ No newline at end of file
+406	val_406	val_406
diff --git a/sql/hive/src/test/resources/golden/join26-3-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join26-3-c9859bf9c9942c59f3b19d82bd1a3afa
index 4c88927e56579..120056ea10c60 100644
--- a/sql/hive/src/test/resources/golden/join26-3-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join26-3-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -104,4 +104,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join27-3-e86808fdbd54120d1e2356f8f61c02f9 b/sql/hive/src/test/resources/golden/join27-3-e86808fdbd54120d1e2356f8f61c02f9
index 2b75023ea4a9f..2adcbc0f14b62 100644
--- a/sql/hive/src/test/resources/golden/join27-3-e86808fdbd54120d1e2356f8f61c02f9
+++ b/sql/hive/src/test/resources/golden/join27-3-e86808fdbd54120d1e2356f8f61c02f9
@@ -38,4 +38,4 @@ NULL	val_484	val_484
 406	val_406	val_406
 406	val_406	val_406
 406	val_406	val_406
-406	val_406	val_406
\ No newline at end of file
+406	val_406	val_406
diff --git a/sql/hive/src/test/resources/golden/join28-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join28-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join28-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join28-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join28-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join28-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join28-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join28-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join28-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join28-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join28-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join28-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join28-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join28-6-c9859bf9c9942c59f3b19d82bd1a3afa
index da3c427cab73f..c998494e1c524 100644
--- a/sql/hive/src/test/resources/golden/join28-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join28-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -104,4 +104,4 @@
 98	val_98
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/join29-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join29-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join29-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join29-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join29-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join29-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join29-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join29-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join29-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join29-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join29-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join29-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join29-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join29-6-c9859bf9c9942c59f3b19d82bd1a3afa
index f553ce0ca41f3..c8445b6e2c784 100644
--- a/sql/hive/src/test/resources/golden/join29-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join29-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -12,4 +12,4 @@
 401	1	5
 406	1	4
 66	1	1
-98	1	2
\ No newline at end of file
+98	1	2
diff --git a/sql/hive/src/test/resources/golden/join3-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join3-3-adc1ec67836b26b60d8547c4996bfd8f
index 8886dc7e8f229..9c33812fa7ea4 100644
--- a/sql/hive/src/test/resources/golden/join3-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join3-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -2651,4 +2651,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join30-3-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join30-3-c9859bf9c9942c59f3b19d82bd1a3afa
index 71094ee7360db..16b313fc58f23 100644
--- a/sql/hive/src/test/resources/golden/join30-3-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join30-3-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -12,4 +12,4 @@
 311	3
 369	3
 401	5
-406	4
\ No newline at end of file
+406	4
diff --git a/sql/hive/src/test/resources/golden/join31-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join31-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join31-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join31-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join31-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join31-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join31-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join31-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join31-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join31-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join31-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join31-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join31-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join31-6-c9859bf9c9942c59f3b19d82bd1a3afa
index 5d28208ab255c..7c33b34887d6b 100644
--- a/sql/hive/src/test/resources/golden/join31-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join31-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -12,4 +12,4 @@
 401	1
 406	1
 66	1
-98	1
\ No newline at end of file
+98	1
diff --git a/sql/hive/src/test/resources/golden/join32-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join32-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join32-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join32-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join32-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join32-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join32-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join32-6-c9859bf9c9942c59f3b19d82bd1a3afa
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join32-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-10-e4ca54ef5e9c845b5bd7fb6b56cdc45a b/sql/hive/src/test/resources/golden/join32_lessSize-10-e4ca54ef5e9c845b5bd7fb6b56cdc45a
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-10-e4ca54ef5e9c845b5bd7fb6b56cdc45a
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-10-e4ca54ef5e9c845b5bd7fb6b56cdc45a
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-13-ed70124968560328930327ecb108c4e9 b/sql/hive/src/test/resources/golden/join32_lessSize-13-ed70124968560328930327ecb108c4e9
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-13-ed70124968560328930327ecb108c4e9
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-13-ed70124968560328930327ecb108c4e9
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-16-ed70124968560328930327ecb108c4e9 b/sql/hive/src/test/resources/golden/join32_lessSize-16-ed70124968560328930327ecb108c4e9
index b1251794645ce..13c35f8a6c6ba 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-16-ed70124968560328930327ecb108c4e9
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-16-ed70124968560328930327ecb108c4e9
@@ -94,4 +94,4 @@ NULL	val_484	val_484
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-19-ed70124968560328930327ecb108c4e9 b/sql/hive/src/test/resources/golden/join32_lessSize-19-ed70124968560328930327ecb108c4e9
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-19-ed70124968560328930327ecb108c4e9
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-19-ed70124968560328930327ecb108c4e9
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-2-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join32_lessSize-2-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-2-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-2-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-22-ed70124968560328930327ecb108c4e9 b/sql/hive/src/test/resources/golden/join32_lessSize-22-ed70124968560328930327ecb108c4e9
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-22-ed70124968560328930327ecb108c4e9
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-22-ed70124968560328930327ecb108c4e9
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-3-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join32_lessSize-3-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-3-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-3-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-4-7f7f3daa7ff45944c3d534f9feff3bb6 b/sql/hive/src/test/resources/golden/join32_lessSize-4-7f7f3daa7ff45944c3d534f9feff3bb6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-4-7f7f3daa7ff45944c3d534f9feff3bb6
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-4-7f7f3daa7ff45944c3d534f9feff3bb6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-7-e4ca54ef5e9c845b5bd7fb6b56cdc45a b/sql/hive/src/test/resources/golden/join32_lessSize-7-e4ca54ef5e9c845b5bd7fb6b56cdc45a
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-7-e4ca54ef5e9c845b5bd7fb6b56cdc45a
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-7-e4ca54ef5e9c845b5bd7fb6b56cdc45a
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join33-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join33-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join33-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join33-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join33-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join33-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join33-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join33-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join33-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join33-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join33-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join33-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join33-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join33-6-c9859bf9c9942c59f3b19d82bd1a3afa
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join33-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join33-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join34-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join34-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join34-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join34-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join34-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join34-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join34-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join34-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join34-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join34-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join34-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join34-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join34-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join34-6-c9859bf9c9942c59f3b19d82bd1a3afa
index e18d1ff802c93..6fbe456119b70 100644
--- a/sql/hive/src/test/resources/golden/join34-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join34-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -31,4 +31,4 @@
 406	val_406	val_406
 406	val_406	val_406
 406	val_406	val_406
-406	val_406	val_406
\ No newline at end of file
+406	val_406	val_406
diff --git a/sql/hive/src/test/resources/golden/join35-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join35-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join35-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join35-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join35-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join35-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join35-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join35-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join35-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join35-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join35-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join35-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join35-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join35-6-c9859bf9c9942c59f3b19d82bd1a3afa
index efdd80c9f8b89..2f62508e3342a 100644
--- a/sql/hive/src/test/resources/golden/join35-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join35-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -10,4 +10,4 @@
 311	val_311	3
 369		3
 401	val_401	5
-406	val_406	4
\ No newline at end of file
+406	val_406	4
diff --git a/sql/hive/src/test/resources/golden/join36-0-8934d9b821aa4b34b760f73eff56cd06 b/sql/hive/src/test/resources/golden/join36-0-8934d9b821aa4b34b760f73eff56cd06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join36-0-8934d9b821aa4b34b760f73eff56cd06
+++ b/sql/hive/src/test/resources/golden/join36-0-8934d9b821aa4b34b760f73eff56cd06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join36-8-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join36-8-c9859bf9c9942c59f3b19d82bd1a3afa
index a250f202c4df0..4cb7ec20b1a33 100644
--- a/sql/hive/src/test/resources/golden/join36-8-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join36-8-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -306,4 +306,4 @@
 495	1	1
 496	1	1
 497	1	1
-498	3	3
\ No newline at end of file
+498	3	3
diff --git a/sql/hive/src/test/resources/golden/join37-0-8934d9b821aa4b34b760f73eff56cd06 b/sql/hive/src/test/resources/golden/join37-0-8934d9b821aa4b34b760f73eff56cd06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join37-0-8934d9b821aa4b34b760f73eff56cd06
+++ b/sql/hive/src/test/resources/golden/join37-0-8934d9b821aa4b34b760f73eff56cd06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join37-4-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join37-4-c9859bf9c9942c59f3b19d82bd1a3afa
index 183353e5c705b..d3d377e8ae74d 100644
--- a/sql/hive/src/test/resources/golden/join37-4-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join37-4-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -34,4 +34,4 @@
 406	val_406	val_406
 406	val_406	val_406
 406	val_406	val_406
-406	val_406	val_406
\ No newline at end of file
+406	val_406	val_406
diff --git a/sql/hive/src/test/resources/golden/join38-2-eacdb3417c4a563982c488812d654c9 b/sql/hive/src/test/resources/golden/join38-2-eacdb3417c4a563982c488812d654c9
index 5e020e3aff653..3dcf0e17586a9 100644
--- a/sql/hive/src/test/resources/golden/join38-2-eacdb3417c4a563982c488812d654c9
+++ b/sql/hive/src/test/resources/golden/join38-2-eacdb3417c4a563982c488812d654c9
@@ -1,2 +1,2 @@
 100	101	102	103	104	105	106	107	108	109	110	111
-100	101	102	103	104	105	106	107	108	109	110	111
\ No newline at end of file
+100	101	102	103	104	105	106	107	108	109	110	111
diff --git a/sql/hive/src/test/resources/golden/join38-4-53d219706847e890de1dcd369563ebef b/sql/hive/src/test/resources/golden/join38-4-53d219706847e890de1dcd369563ebef
index fce97c9211916..edbff1207ded9 100644
--- a/sql/hive/src/test/resources/golden/join38-4-53d219706847e890de1dcd369563ebef
+++ b/sql/hive/src/test/resources/golden/join38-4-53d219706847e890de1dcd369563ebef
@@ -1 +1 @@
-val_111	105	2
\ No newline at end of file
+val_111	105	2
diff --git a/sql/hive/src/test/resources/golden/join39-3-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join39-3-c9859bf9c9942c59f3b19d82bd1a3afa
index 5b45955ceb274..56d4dbe5b4d25 100644
--- a/sql/hive/src/test/resources/golden/join39-3-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join39-3-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -563,4 +563,4 @@
 98	val_98	98	val_98
 98	val_98	98	val_98
 98	val_98	98	val_98
-98	val_98	98	val_98
\ No newline at end of file
+98	val_98	98	val_98
diff --git a/sql/hive/src/test/resources/golden/join4-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join4-3-adc1ec67836b26b60d8547c4996bfd8f
index e45f3234d5a88..dc3a273d7a367 100644
--- a/sql/hive/src/test/resources/golden/join4-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join4-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -8,4 +8,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/join40-0-d2b5e23edec42a62e61750b110ecbaac b/sql/hive/src/test/resources/golden/join40-0-d2b5e23edec42a62e61750b110ecbaac
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join40-0-d2b5e23edec42a62e61750b110ecbaac
+++ b/sql/hive/src/test/resources/golden/join40-0-d2b5e23edec42a62e61750b110ecbaac
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join40-10-1d1f79e8e773d630f355c1a89d84b5aa b/sql/hive/src/test/resources/golden/join40-10-1d1f79e8e773d630f355c1a89d84b5aa
index 8543fe173f87f..50237fbde9e34 100644
--- a/sql/hive/src/test/resources/golden/join40-10-1d1f79e8e773d630f355c1a89d84b5aa
+++ b/sql/hive/src/test/resources/golden/join40-10-1d1f79e8e773d630f355c1a89d84b5aa
@@ -563,4 +563,4 @@
 400	val_400	NULL	NULL
 200	val_200	NULL	NULL
 97	val_97	97	val_97
-97	val_97	97	val_97
\ No newline at end of file
+97	val_97	97	val_97
diff --git a/sql/hive/src/test/resources/golden/join40-12-aaddbef9069aba3ebeb310be74671cda b/sql/hive/src/test/resources/golden/join40-12-aaddbef9069aba3ebeb310be74671cda
index 67d892c80f493..1f3d8a7a1fc08 100644
--- a/sql/hive/src/test/resources/golden/join40-12-aaddbef9069aba3ebeb310be74671cda
+++ b/sql/hive/src/test/resources/golden/join40-12-aaddbef9069aba3ebeb310be74671cda
@@ -1 +1 @@
-1028
\ No newline at end of file
+1028
diff --git a/sql/hive/src/test/resources/golden/join40-2-507b1d9f6abbdb756a589d7bc4826251 b/sql/hive/src/test/resources/golden/join40-2-507b1d9f6abbdb756a589d7bc4826251
index 8543fe173f87f..50237fbde9e34 100644
--- a/sql/hive/src/test/resources/golden/join40-2-507b1d9f6abbdb756a589d7bc4826251
+++ b/sql/hive/src/test/resources/golden/join40-2-507b1d9f6abbdb756a589d7bc4826251
@@ -563,4 +563,4 @@
 400	val_400	NULL	NULL
 200	val_200	NULL	NULL
 97	val_97	97	val_97
-97	val_97	97	val_97
\ No newline at end of file
+97	val_97	97	val_97
diff --git a/sql/hive/src/test/resources/golden/join40-4-61fb097bda1751910de966d6a4a8f0b7 b/sql/hive/src/test/resources/golden/join40-4-61fb097bda1751910de966d6a4a8f0b7
index 59821aeea3008..c9c79b85dfe24 100644
--- a/sql/hive/src/test/resources/golden/join40-4-61fb097bda1751910de966d6a4a8f0b7
+++ b/sql/hive/src/test/resources/golden/join40-4-61fb097bda1751910de966d6a4a8f0b7
@@ -1025,4 +1025,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join40-6-9685f2d31ffe922d3ea217de32ca3049 b/sql/hive/src/test/resources/golden/join40-6-9685f2d31ffe922d3ea217de32ca3049
index bd6e20b8de1e4..7568f8cde28f5 100644
--- a/sql/hive/src/test/resources/golden/join40-6-9685f2d31ffe922d3ea217de32ca3049
+++ b/sql/hive/src/test/resources/golden/join40-6-9685f2d31ffe922d3ea217de32ca3049
@@ -545,4 +545,4 @@ NULL	NULL	NULL	NULL	498	val_498
 5	val_5	5	val_5	5	val_5
 5	val_5	5	val_5	5	val_5
 8	val_8	8	val_8	8	val_8
-9	val_9	9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join40-8-3331a020843caccf2fa32a1225c5c3a9 b/sql/hive/src/test/resources/golden/join40-8-3331a020843caccf2fa32a1225c5c3a9
index bd6e20b8de1e4..7568f8cde28f5 100644
--- a/sql/hive/src/test/resources/golden/join40-8-3331a020843caccf2fa32a1225c5c3a9
+++ b/sql/hive/src/test/resources/golden/join40-8-3331a020843caccf2fa32a1225c5c3a9
@@ -545,4 +545,4 @@ NULL	NULL	NULL	NULL	498	val_498
 5	val_5	5	val_5	5	val_5
 5	val_5	5	val_5	5	val_5
 8	val_8	8	val_8	8	val_8
-9	val_9	9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join41-1-25e434b6d05e08fdd5f4d9957438917 b/sql/hive/src/test/resources/golden/join41-1-25e434b6d05e08fdd5f4d9957438917
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join41-1-25e434b6d05e08fdd5f4d9957438917
+++ b/sql/hive/src/test/resources/golden/join41-1-25e434b6d05e08fdd5f4d9957438917
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join41-3-1dc98363e7da167dc45711a87ef3a988 b/sql/hive/src/test/resources/golden/join41-3-1dc98363e7da167dc45711a87ef3a988
index 7265626e5dbde..c20eb16d47f91 100644
--- a/sql/hive/src/test/resources/golden/join41-3-1dc98363e7da167dc45711a87ef3a988
+++ b/sql/hive/src/test/resources/golden/join41-3-1dc98363e7da167dc45711a87ef3a988
@@ -1,3 +1,3 @@
 0	val_0	NULL	NULL
 0	val_0	NULL	NULL
-0	val_0	NULL	NULL
\ No newline at end of file
+0	val_0	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join41-4-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join41-4-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join41-4-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join41-4-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join41-6-1dc98363e7da167dc45711a87ef3a988 b/sql/hive/src/test/resources/golden/join41-6-1dc98363e7da167dc45711a87ef3a988
index 7265626e5dbde..c20eb16d47f91 100644
--- a/sql/hive/src/test/resources/golden/join41-6-1dc98363e7da167dc45711a87ef3a988
+++ b/sql/hive/src/test/resources/golden/join41-6-1dc98363e7da167dc45711a87ef3a988
@@ -1,3 +1,3 @@
 0	val_0	NULL	NULL
 0	val_0	NULL	NULL
-0	val_0	NULL	NULL
\ No newline at end of file
+0	val_0	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join5-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join5-3-adc1ec67836b26b60d8547c4996bfd8f
index 06b2b4d7e6d47..b52cff5c472e4 100644
--- a/sql/hive/src/test/resources/golden/join5-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join5-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -6,4 +6,4 @@ NULL	NULL	24	val_24
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/join6-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join6-3-adc1ec67836b26b60d8547c4996bfd8f
index 36ca4a0805f4b..fb58885263569 100644
--- a/sql/hive/src/test/resources/golden/join6-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join6-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -11,4 +11,4 @@
 19	val_19	19	val_19
 NULL	NULL	20	val_20
 NULL	NULL	24	val_24
-NULL	NULL	24	val_24
\ No newline at end of file
+NULL	NULL	24	val_24
diff --git a/sql/hive/src/test/resources/golden/join7-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join7-3-adc1ec67836b26b60d8547c4996bfd8f
index eb0a6246c1988..0e75c1b63abb5 100644
--- a/sql/hive/src/test/resources/golden/join7-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join7-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -11,4 +11,4 @@
 19	val_19	19	val_19	NULL	NULL
 NULL	NULL	20	val_20	NULL	NULL
 NULL	NULL	24	val_24	NULL	NULL
-NULL	NULL	24	val_24	NULL	NULL
\ No newline at end of file
+NULL	NULL	24	val_24	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join8-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join8-3-adc1ec67836b26b60d8547c4996bfd8f
index f2e6e07d8fa1f..c3f5359beb06f 100644
--- a/sql/hive/src/test/resources/golden/join8-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join8-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -2,4 +2,4 @@
 12	val_12	NULL	NULL
 15	val_15	NULL	NULL
 11	val_11	NULL	NULL
-12	val_12	NULL	NULL
\ No newline at end of file
+12	val_12	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join9-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join9-3-adc1ec67836b26b60d8547c4996bfd8f
index 59821aeea3008..c9c79b85dfe24 100644
--- a/sql/hive/src/test/resources/golden/join9-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join9-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -1025,4 +1025,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join_1to1-1-5bab379018a4fbef12cc93658f26580a b/sql/hive/src/test/resources/golden/join_1to1-1-5bab379018a4fbef12cc93658f26580a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/join_1to1-1-789b2636cfb6a08965e0bd190e419762
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/join_1to1-1-789b2636cfb6a08965e0bd190e419762
diff --git a/sql/hive/src/test/resources/golden/join_1to1-10-a1caf8c546f519e2dfb5e17c8addf62e b/sql/hive/src/test/resources/golden/join_1to1-10-a1caf8c546f519e2dfb5e17c8addf62e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-10-a1caf8c546f519e2dfb5e17c8addf62e
+++ b/sql/hive/src/test/resources/golden/join_1to1-10-a1caf8c546f519e2dfb5e17c8addf62e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-11-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-11-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-11-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-11-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-12-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-12-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-12-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-12-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-13-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-13-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-13-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-13-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-14-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-14-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-14-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-14-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-15-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-15-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-15-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-15-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-16-9914f44ecb6ae7587b62e5349ff60d04 b/sql/hive/src/test/resources/golden/join_1to1-16-9914f44ecb6ae7587b62e5349ff60d04
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-16-9914f44ecb6ae7587b62e5349ff60d04
+++ b/sql/hive/src/test/resources/golden/join_1to1-16-9914f44ecb6ae7587b62e5349ff60d04
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-17-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-17-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-17-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-17-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-18-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-18-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-18-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-18-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-19-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-19-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-19-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-19-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-20-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-20-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-20-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-20-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-21-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-21-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-21-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-21-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-22-5a065a27a36bb0ff980fa7ffef3a2600 b/sql/hive/src/test/resources/golden/join_1to1-22-5a065a27a36bb0ff980fa7ffef3a2600
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-22-5a065a27a36bb0ff980fa7ffef3a2600
+++ b/sql/hive/src/test/resources/golden/join_1to1-22-5a065a27a36bb0ff980fa7ffef3a2600
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-23-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-23-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-23-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-23-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-24-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-24-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-24-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-24-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-25-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-25-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-25-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-25-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-26-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-26-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-26-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-26-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-27-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-27-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-27-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-27-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-28-a1caf8c546f519e2dfb5e17c8addf62e b/sql/hive/src/test/resources/golden/join_1to1-28-a1caf8c546f519e2dfb5e17c8addf62e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-28-a1caf8c546f519e2dfb5e17c8addf62e
+++ b/sql/hive/src/test/resources/golden/join_1to1-28-a1caf8c546f519e2dfb5e17c8addf62e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-29-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-29-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-29-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-29-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/join_1to1-3-6228e662e573a00ed04550d049d97a3b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/join_1to1-3-6228e662e573a00ed04550d049d97a3b
diff --git a/sql/hive/src/test/resources/golden/join_1to1-3-ee6db4188755bf471a12316ec7301500 b/sql/hive/src/test/resources/golden/join_1to1-3-ee6db4188755bf471a12316ec7301500
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_1to1-30-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-30-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-30-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-30-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-31-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-31-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-31-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-31-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-32-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-32-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-32-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-32-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-33-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-33-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-33-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-33-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-34-9914f44ecb6ae7587b62e5349ff60d04 b/sql/hive/src/test/resources/golden/join_1to1-34-9914f44ecb6ae7587b62e5349ff60d04
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-34-9914f44ecb6ae7587b62e5349ff60d04
+++ b/sql/hive/src/test/resources/golden/join_1to1-34-9914f44ecb6ae7587b62e5349ff60d04
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-35-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-35-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-35-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-35-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-36-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-36-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-36-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-36-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-37-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-37-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-37-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-37-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-38-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-38-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-38-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-38-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-39-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-39-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-39-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-39-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-4-5a065a27a36bb0ff980fa7ffef3a2600 b/sql/hive/src/test/resources/golden/join_1to1-4-5a065a27a36bb0ff980fa7ffef3a2600
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-4-5a065a27a36bb0ff980fa7ffef3a2600
+++ b/sql/hive/src/test/resources/golden/join_1to1-4-5a065a27a36bb0ff980fa7ffef3a2600
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-5-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-5-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-5-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-5-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-6-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-6-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-6-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-6-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-7-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-7-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-7-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-7-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-8-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-8-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-8-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-8-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-9-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-9-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-9-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-9-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/join_array-2-16840a0266cad03a1a0b134d105b854f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/join_array-2-16840a0266cad03a1a0b134d105b854f
diff --git a/sql/hive/src/test/resources/golden/join_array-2-a4363f7c7e4b7d717ed90e77c37581de b/sql/hive/src/test/resources/golden/join_array-2-a4363f7c7e4b7d717ed90e77c37581de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/join_array-3-a6ca6b64324596831033fdfe5b63a942
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/join_array-3-a6ca6b64324596831033fdfe5b63a942
diff --git a/sql/hive/src/test/resources/golden/join_array-3-ddd65703cdad8959cd0cd831304b0ab9 b/sql/hive/src/test/resources/golden/join_array-3-ddd65703cdad8959cd0cd831304b0ab9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_array-4-b235265cd6bd58fd743c27b02e547d62 b/sql/hive/src/test/resources/golden/join_array-4-b235265cd6bd58fd743c27b02e547d62
index ce9688a065b34..84660b3eb4327 100644
--- a/sql/hive/src/test/resources/golden/join_array-4-b235265cd6bd58fd743c27b02e547d62
+++ b/sql/hive/src/test/resources/golden/join_array-4-b235265cd6bd58fd743c27b02e547d62
@@ -1 +1 @@
-10320092026892491	3312
\ No newline at end of file
+10320092026892491	3312
diff --git a/sql/hive/src/test/resources/golden/join_array-5-a9b9419b94631f8fe1c2297ebf103a9a b/sql/hive/src/test/resources/golden/join_array-5-a9b9419b94631f8fe1c2297ebf103a9a
index f937af554adef..1d1c48ae5a48f 100644
--- a/sql/hive/src/test/resources/golden/join_array-5-a9b9419b94631f8fe1c2297ebf103a9a
+++ b/sql/hive/src/test/resources/golden/join_array-5-a9b9419b94631f8fe1c2297ebf103a9a
@@ -1,2 +1,2 @@
 10320092002467760	[0,23,37,48,53,55,55,56,60,66,72,76,77,78,80,81,87,88,90,90,91,90,92,97,100,103,104,107,108,108,109,110,113,113,113,113,113,113,114,116,116,116,117,116,117,117,117,115,115,117,117,117,121,120,131,131,131,125,125,124,124,128,128,131,131,132,133,134,134,134,134,26,26,null,null,null,null,116]
-10320092026892491	[0,2,59,106,131,142,159,244,320,398,417,433,553,616,710,826,917,971,1046,1051,1093,1112,1142,1215,1220,1226,1232,1267,1364,1549,1646,1948,2170,2272,2325,2433,2534,2852,2925,2992,3119,3207,3279,3323,3412,3637,3645,3634,3450,3473,3638,3688,3736,3758,3812,3862,3873,3868,3883,4118,4134,4127,4170,4216,null,null,null,null,3139]
\ No newline at end of file
+10320092026892491	[0,2,59,106,131,142,159,244,320,398,417,433,553,616,710,826,917,971,1046,1051,1093,1112,1142,1215,1220,1226,1232,1267,1364,1549,1646,1948,2170,2272,2325,2433,2534,2852,2925,2992,3119,3207,3279,3323,3412,3637,3645,3634,3450,3473,3638,3688,3736,3758,3812,3862,3873,3868,3883,4118,4134,4127,4170,4216,null,null,null,null,3139]
diff --git a/sql/hive/src/test/resources/golden/join_array-6-6eded94bd39189ea6d67fe383f9b865c b/sql/hive/src/test/resources/golden/join_array-6-6eded94bd39189ea6d67fe383f9b865c
index f937af554adef..1d1c48ae5a48f 100644
--- a/sql/hive/src/test/resources/golden/join_array-6-6eded94bd39189ea6d67fe383f9b865c
+++ b/sql/hive/src/test/resources/golden/join_array-6-6eded94bd39189ea6d67fe383f9b865c
@@ -1,2 +1,2 @@
 10320092002467760	[0,23,37,48,53,55,55,56,60,66,72,76,77,78,80,81,87,88,90,90,91,90,92,97,100,103,104,107,108,108,109,110,113,113,113,113,113,113,114,116,116,116,117,116,117,117,117,115,115,117,117,117,121,120,131,131,131,125,125,124,124,128,128,131,131,132,133,134,134,134,134,26,26,null,null,null,null,116]
-10320092026892491	[0,2,59,106,131,142,159,244,320,398,417,433,553,616,710,826,917,971,1046,1051,1093,1112,1142,1215,1220,1226,1232,1267,1364,1549,1646,1948,2170,2272,2325,2433,2534,2852,2925,2992,3119,3207,3279,3323,3412,3637,3645,3634,3450,3473,3638,3688,3736,3758,3812,3862,3873,3868,3883,4118,4134,4127,4170,4216,null,null,null,null,3139]
\ No newline at end of file
+10320092026892491	[0,2,59,106,131,142,159,244,320,398,417,433,553,616,710,826,917,971,1046,1051,1093,1112,1142,1215,1220,1226,1232,1267,1364,1549,1646,1948,2170,2272,2325,2433,2534,2852,2925,2992,3119,3207,3279,3323,3412,3637,3645,3634,3450,3473,3638,3688,3736,3758,3812,3862,3873,3868,3883,4118,4134,4127,4170,4216,null,null,null,null,3139]
diff --git a/sql/hive/src/test/resources/golden/join_casesensitive-1-404d691e85c7b74bad73576ee80de290 b/sql/hive/src/test/resources/golden/join_casesensitive-1-404d691e85c7b74bad73576ee80de290
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/join_casesensitive-1-d1e9ae71a3ed691c39bb8f77ab28edbf
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/join_casesensitive-1-d1e9ae71a3ed691c39bb8f77ab28edbf
diff --git a/sql/hive/src/test/resources/golden/join_casesensitive-3-3c88c1f52d27e9fb093aaf10f97c7cfc b/sql/hive/src/test/resources/golden/join_casesensitive-3-3c88c1f52d27e9fb093aaf10f97c7cfc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-2-f5d86ed3cbc46bb0c7619703081d5873 b/sql/hive/src/test/resources/golden/join_casesensitive-3-93300f4a9242fa2804a5b368538d83f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-2-f5d86ed3cbc46bb0c7619703081d5873
rename to sql/hive/src/test/resources/golden/join_casesensitive-3-93300f4a9242fa2804a5b368538d83f
diff --git a/sql/hive/src/test/resources/golden/join_casesensitive-4-c880b2256f97413b8fe68d19d99747fd b/sql/hive/src/test/resources/golden/join_casesensitive-4-c880b2256f97413b8fe68d19d99747fd
index 810f01f682c37..f7d190a11d65c 100644
--- a/sql/hive/src/test/resources/golden/join_casesensitive-4-c880b2256f97413b8fe68d19d99747fd
+++ b/sql/hive/src/test/resources/golden/join_casesensitive-4-c880b2256f97413b8fe68d19d99747fd
@@ -82,4 +82,4 @@ NULL	10050	66	50	10050	88
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_empty-4-df2401785dfa257de49c3ad80b0f480a b/sql/hive/src/test/resources/golden/join_empty-4-df2401785dfa257de49c3ad80b0f480a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_empty-4-df2401785dfa257de49c3ad80b0f480a
+++ b/sql/hive/src/test/resources/golden/join_empty-4-df2401785dfa257de49c3ad80b0f480a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_empty-5-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/join_empty-5-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_empty-5-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/join_empty-5-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_filters-1-100faa7fd01bfb4390c782bb262a9403 b/sql/hive/src/test/resources/golden/join_filters-1-100faa7fd01bfb4390c782bb262a9403
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-3-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/join_filters-1-bee6095f42de6a16708c2f9addc1b9bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-3-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/join_filters-1-bee6095f42de6a16708c2f9addc1b9bd
diff --git a/sql/hive/src/test/resources/golden/join_filters-10-f0c5c51de4151a17a77c780be0c13e01 b/sql/hive/src/test/resources/golden/join_filters-10-f0c5c51de4151a17a77c780be0c13e01
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-10-f0c5c51de4151a17a77c780be0c13e01
+++ b/sql/hive/src/test/resources/golden/join_filters-10-f0c5c51de4151a17a77c780be0c13e01
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-100-a4c7cd87175222bea19fd33018890efe b/sql/hive/src/test/resources/golden/join_filters-100-a4c7cd87175222bea19fd33018890efe
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-100-a4c7cd87175222bea19fd33018890efe
+++ b/sql/hive/src/test/resources/golden/join_filters-100-a4c7cd87175222bea19fd33018890efe
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-101-f086409eb336282af5a00f4c7192ef2b b/sql/hive/src/test/resources/golden/join_filters-101-f086409eb336282af5a00f4c7192ef2b
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-101-f086409eb336282af5a00f4c7192ef2b
+++ b/sql/hive/src/test/resources/golden/join_filters-101-f086409eb336282af5a00f4c7192ef2b
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-102-634888c4fa78ce10181c9514c6393554 b/sql/hive/src/test/resources/golden/join_filters-102-634888c4fa78ce10181c9514c6393554
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-102-634888c4fa78ce10181c9514c6393554
+++ b/sql/hive/src/test/resources/golden/join_filters-102-634888c4fa78ce10181c9514c6393554
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-103-c020d3461658ae8e118281f40264ae5b b/sql/hive/src/test/resources/golden/join_filters-103-c020d3461658ae8e118281f40264ae5b
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-103-c020d3461658ae8e118281f40264ae5b
+++ b/sql/hive/src/test/resources/golden/join_filters-103-c020d3461658ae8e118281f40264ae5b
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-104-c9b79f30e1f25672ec89014f966b41b0 b/sql/hive/src/test/resources/golden/join_filters-104-c9b79f30e1f25672ec89014f966b41b0
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-104-c9b79f30e1f25672ec89014f966b41b0
+++ b/sql/hive/src/test/resources/golden/join_filters-104-c9b79f30e1f25672ec89014f966b41b0
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-105-b3d9dcbb7e1caef97646eb89edf82eb b/sql/hive/src/test/resources/golden/join_filters-105-b3d9dcbb7e1caef97646eb89edf82eb
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-105-b3d9dcbb7e1caef97646eb89edf82eb
+++ b/sql/hive/src/test/resources/golden/join_filters-105-b3d9dcbb7e1caef97646eb89edf82eb
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-106-870999978978b22f21997899f1e652b8 b/sql/hive/src/test/resources/golden/join_filters-106-870999978978b22f21997899f1e652b8
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-106-870999978978b22f21997899f1e652b8
+++ b/sql/hive/src/test/resources/golden/join_filters-106-870999978978b22f21997899f1e652b8
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-107-94824a62d882227f270a66ec7ef28cd4 b/sql/hive/src/test/resources/golden/join_filters-107-94824a62d882227f270a66ec7ef28cd4
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-107-94824a62d882227f270a66ec7ef28cd4
+++ b/sql/hive/src/test/resources/golden/join_filters-107-94824a62d882227f270a66ec7ef28cd4
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-108-d793c1514545499f58fb1b355cbd3f0e b/sql/hive/src/test/resources/golden/join_filters-108-d793c1514545499f58fb1b355cbd3f0e
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-108-d793c1514545499f58fb1b355cbd3f0e
+++ b/sql/hive/src/test/resources/golden/join_filters-108-d793c1514545499f58fb1b355cbd3f0e
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-109-2709001b4aa57ed01ba975e83b556475 b/sql/hive/src/test/resources/golden/join_filters-109-2709001b4aa57ed01ba975e83b556475
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-109-2709001b4aa57ed01ba975e83b556475
+++ b/sql/hive/src/test/resources/golden/join_filters-109-2709001b4aa57ed01ba975e83b556475
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-11-6fb35e1a65211e68de5df4a1cb9c7117 b/sql/hive/src/test/resources/golden/join_filters-11-6fb35e1a65211e68de5df4a1cb9c7117
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-11-6fb35e1a65211e68de5df4a1cb9c7117
+++ b/sql/hive/src/test/resources/golden/join_filters-11-6fb35e1a65211e68de5df4a1cb9c7117
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-110-86868ef0f28c5b42f04fb9ca64aaa3ba b/sql/hive/src/test/resources/golden/join_filters-110-86868ef0f28c5b42f04fb9ca64aaa3ba
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-110-86868ef0f28c5b42f04fb9ca64aaa3ba
+++ b/sql/hive/src/test/resources/golden/join_filters-110-86868ef0f28c5b42f04fb9ca64aaa3ba
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-111-fd3188d025e4c84d45cbb265d08ca569 b/sql/hive/src/test/resources/golden/join_filters-111-fd3188d025e4c84d45cbb265d08ca569
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-111-fd3188d025e4c84d45cbb265d08ca569
+++ b/sql/hive/src/test/resources/golden/join_filters-111-fd3188d025e4c84d45cbb265d08ca569
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-112-a0cd07949ff7dbc2287dc060bedb1942 b/sql/hive/src/test/resources/golden/join_filters-112-a0cd07949ff7dbc2287dc060bedb1942
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-112-a0cd07949ff7dbc2287dc060bedb1942
+++ b/sql/hive/src/test/resources/golden/join_filters-112-a0cd07949ff7dbc2287dc060bedb1942
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-113-48c4978768872751832149d72cdf0ba b/sql/hive/src/test/resources/golden/join_filters-113-48c4978768872751832149d72cdf0ba
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-113-48c4978768872751832149d72cdf0ba
+++ b/sql/hive/src/test/resources/golden/join_filters-113-48c4978768872751832149d72cdf0ba
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-114-58b8db46ea5080791c7416838d3e8f95 b/sql/hive/src/test/resources/golden/join_filters-114-58b8db46ea5080791c7416838d3e8f95
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-114-58b8db46ea5080791c7416838d3e8f95
+++ b/sql/hive/src/test/resources/golden/join_filters-114-58b8db46ea5080791c7416838d3e8f95
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-115-98e45a3167d19d09219076a2d93afa49 b/sql/hive/src/test/resources/golden/join_filters-115-98e45a3167d19d09219076a2d93afa49
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-115-98e45a3167d19d09219076a2d93afa49
+++ b/sql/hive/src/test/resources/golden/join_filters-115-98e45a3167d19d09219076a2d93afa49
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-116-bec2950fff2c1b3c4dc5be846653ea6 b/sql/hive/src/test/resources/golden/join_filters-116-bec2950fff2c1b3c4dc5be846653ea6
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-116-bec2950fff2c1b3c4dc5be846653ea6
+++ b/sql/hive/src/test/resources/golden/join_filters-116-bec2950fff2c1b3c4dc5be846653ea6
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-117-ee8471d93c3778f4b48c5c59f473dd35 b/sql/hive/src/test/resources/golden/join_filters-117-ee8471d93c3778f4b48c5c59f473dd35
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-117-ee8471d93c3778f4b48c5c59f473dd35
+++ b/sql/hive/src/test/resources/golden/join_filters-117-ee8471d93c3778f4b48c5c59f473dd35
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-118-9b395d7db8722a467d46588d0f27fc9e b/sql/hive/src/test/resources/golden/join_filters-118-9b395d7db8722a467d46588d0f27fc9e
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-118-9b395d7db8722a467d46588d0f27fc9e
+++ b/sql/hive/src/test/resources/golden/join_filters-118-9b395d7db8722a467d46588d0f27fc9e
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-119-7688784396db55ff148292a9755c918a b/sql/hive/src/test/resources/golden/join_filters-119-7688784396db55ff148292a9755c918a
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-119-7688784396db55ff148292a9755c918a
+++ b/sql/hive/src/test/resources/golden/join_filters-119-7688784396db55ff148292a9755c918a
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-12-4c29d6be8717481332cd1ee7ca17690e b/sql/hive/src/test/resources/golden/join_filters-12-4c29d6be8717481332cd1ee7ca17690e
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-12-4c29d6be8717481332cd1ee7ca17690e
+++ b/sql/hive/src/test/resources/golden/join_filters-12-4c29d6be8717481332cd1ee7ca17690e
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-120-6578651ffbf95c0b02825e2125e32709 b/sql/hive/src/test/resources/golden/join_filters-120-6578651ffbf95c0b02825e2125e32709
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-120-6578651ffbf95c0b02825e2125e32709
+++ b/sql/hive/src/test/resources/golden/join_filters-120-6578651ffbf95c0b02825e2125e32709
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-121-6dc4977da7f8dc7c636a03d7ad892ada b/sql/hive/src/test/resources/golden/join_filters-121-6dc4977da7f8dc7c636a03d7ad892ada
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-121-6dc4977da7f8dc7c636a03d7ad892ada
+++ b/sql/hive/src/test/resources/golden/join_filters-121-6dc4977da7f8dc7c636a03d7ad892ada
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-122-64bb9f9a0eef32d2392b80b93c9b2c98 b/sql/hive/src/test/resources/golden/join_filters-122-64bb9f9a0eef32d2392b80b93c9b2c98
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-122-64bb9f9a0eef32d2392b80b93c9b2c98
+++ b/sql/hive/src/test/resources/golden/join_filters-122-64bb9f9a0eef32d2392b80b93c9b2c98
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-13-c6a291879bdb37f0c84f6074f257d52 b/sql/hive/src/test/resources/golden/join_filters-13-c6a291879bdb37f0c84f6074f257d52
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-13-c6a291879bdb37f0c84f6074f257d52
+++ b/sql/hive/src/test/resources/golden/join_filters-13-c6a291879bdb37f0c84f6074f257d52
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-14-ef8255dcad808f9128d79e6ee9f368cf b/sql/hive/src/test/resources/golden/join_filters-14-ef8255dcad808f9128d79e6ee9f368cf
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-14-ef8255dcad808f9128d79e6ee9f368cf
+++ b/sql/hive/src/test/resources/golden/join_filters-14-ef8255dcad808f9128d79e6ee9f368cf
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-15-a83678913c62249c8fdf2dac1f6e3046 b/sql/hive/src/test/resources/golden/join_filters-15-a83678913c62249c8fdf2dac1f6e3046
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-15-a83678913c62249c8fdf2dac1f6e3046
+++ b/sql/hive/src/test/resources/golden/join_filters-15-a83678913c62249c8fdf2dac1f6e3046
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-16-a6511579b5e4c2d845664f0c4a6ce622 b/sql/hive/src/test/resources/golden/join_filters-16-a6511579b5e4c2d845664f0c4a6ce622
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-16-a6511579b5e4c2d845664f0c4a6ce622
+++ b/sql/hive/src/test/resources/golden/join_filters-16-a6511579b5e4c2d845664f0c4a6ce622
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-17-c22aab929016fa6f4b6d1e14cc3a4940 b/sql/hive/src/test/resources/golden/join_filters-17-c22aab929016fa6f4b6d1e14cc3a4940
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-17-c22aab929016fa6f4b6d1e14cc3a4940
+++ b/sql/hive/src/test/resources/golden/join_filters-17-c22aab929016fa6f4b6d1e14cc3a4940
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-18-8ef51af7455eaeeaefff1c98e16dcc65 b/sql/hive/src/test/resources/golden/join_filters-18-8ef51af7455eaeeaefff1c98e16dcc65
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-18-8ef51af7455eaeeaefff1c98e16dcc65
+++ b/sql/hive/src/test/resources/golden/join_filters-18-8ef51af7455eaeeaefff1c98e16dcc65
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-19-e164192e6538b428c8b53e008a978d3f b/sql/hive/src/test/resources/golden/join_filters-19-e164192e6538b428c8b53e008a978d3f
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-19-e164192e6538b428c8b53e008a978d3f
+++ b/sql/hive/src/test/resources/golden/join_filters-19-e164192e6538b428c8b53e008a978d3f
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-2-67fff9e6931a7320444e857e01b3d496 b/sql/hive/src/test/resources/golden/join_filters-2-67fff9e6931a7320444e857e01b3d496
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-2-67fff9e6931a7320444e857e01b3d496
+++ b/sql/hive/src/test/resources/golden/join_filters-2-67fff9e6931a7320444e857e01b3d496
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-20-7a5da20822bf51ed69ccf640cbb816cf b/sql/hive/src/test/resources/golden/join_filters-20-7a5da20822bf51ed69ccf640cbb816cf
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-20-7a5da20822bf51ed69ccf640cbb816cf
+++ b/sql/hive/src/test/resources/golden/join_filters-20-7a5da20822bf51ed69ccf640cbb816cf
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-21-13d6d5335625fc3386a8011dc557002e b/sql/hive/src/test/resources/golden/join_filters-21-13d6d5335625fc3386a8011dc557002e
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-21-13d6d5335625fc3386a8011dc557002e
+++ b/sql/hive/src/test/resources/golden/join_filters-21-13d6d5335625fc3386a8011dc557002e
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-22-f12ffd6cc9213d9c64cbea1b6db0632e b/sql/hive/src/test/resources/golden/join_filters-22-f12ffd6cc9213d9c64cbea1b6db0632e
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-22-f12ffd6cc9213d9c64cbea1b6db0632e
+++ b/sql/hive/src/test/resources/golden/join_filters-22-f12ffd6cc9213d9c64cbea1b6db0632e
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-23-a800b885358695758afdb719cdefa94f b/sql/hive/src/test/resources/golden/join_filters-23-a800b885358695758afdb719cdefa94f
index ea646edf6d143..04ecda59b21bd 100644
--- a/sql/hive/src/test/resources/golden/join_filters-23-a800b885358695758afdb719cdefa94f
+++ b/sql/hive/src/test/resources/golden/join_filters-23-a800b885358695758afdb719cdefa94f
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40	NULL	NULL
 NULL	NULL	12	35	NULL	NULL
 NULL	NULL	48	NULL	NULL	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-24-df3db5a91f3f4e88e18d2b1cc5b47113 b/sql/hive/src/test/resources/golden/join_filters-24-df3db5a91f3f4e88e18d2b1cc5b47113
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-24-df3db5a91f3f4e88e18d2b1cc5b47113
+++ b/sql/hive/src/test/resources/golden/join_filters-24-df3db5a91f3f4e88e18d2b1cc5b47113
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-25-435ecfc7f9cb5f32019fe498c21ccad3 b/sql/hive/src/test/resources/golden/join_filters-25-435ecfc7f9cb5f32019fe498c21ccad3
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-25-435ecfc7f9cb5f32019fe498c21ccad3
+++ b/sql/hive/src/test/resources/golden/join_filters-25-435ecfc7f9cb5f32019fe498c21ccad3
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-26-da36fab3d1686aedd415a7a0f0253eca b/sql/hive/src/test/resources/golden/join_filters-26-da36fab3d1686aedd415a7a0f0253eca
index ea646edf6d143..04ecda59b21bd 100644
--- a/sql/hive/src/test/resources/golden/join_filters-26-da36fab3d1686aedd415a7a0f0253eca
+++ b/sql/hive/src/test/resources/golden/join_filters-26-da36fab3d1686aedd415a7a0f0253eca
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40	NULL	NULL
 NULL	NULL	12	35	NULL	NULL
 NULL	NULL	48	NULL	NULL	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-27-5f4a5437696f2a60bec9ac1443244242 b/sql/hive/src/test/resources/golden/join_filters-27-5f4a5437696f2a60bec9ac1443244242
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-27-5f4a5437696f2a60bec9ac1443244242
+++ b/sql/hive/src/test/resources/golden/join_filters-27-5f4a5437696f2a60bec9ac1443244242
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-28-2acf41a9f6efac0d800df557db716359 b/sql/hive/src/test/resources/golden/join_filters-28-2acf41a9f6efac0d800df557db716359
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-28-2acf41a9f6efac0d800df557db716359
+++ b/sql/hive/src/test/resources/golden/join_filters-28-2acf41a9f6efac0d800df557db716359
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-29-cf5fef4ddc313209f5ab1e5ea3763e35 b/sql/hive/src/test/resources/golden/join_filters-29-cf5fef4ddc313209f5ab1e5ea3763e35
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-29-cf5fef4ddc313209f5ab1e5ea3763e35
+++ b/sql/hive/src/test/resources/golden/join_filters-29-cf5fef4ddc313209f5ab1e5ea3763e35
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-3-989b3dbd15ba601ae80fe454e03213d7 b/sql/hive/src/test/resources/golden/join_filters-3-989b3dbd15ba601ae80fe454e03213d7
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-3-989b3dbd15ba601ae80fe454e03213d7
+++ b/sql/hive/src/test/resources/golden/join_filters-3-989b3dbd15ba601ae80fe454e03213d7
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-30-97f94f35ebc30f776a19bed59c7fb4bf b/sql/hive/src/test/resources/golden/join_filters-30-97f94f35ebc30f776a19bed59c7fb4bf
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-30-97f94f35ebc30f776a19bed59c7fb4bf
+++ b/sql/hive/src/test/resources/golden/join_filters-30-97f94f35ebc30f776a19bed59c7fb4bf
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-31-4923f8ba429f0c71ad9b6b9499e73a7f b/sql/hive/src/test/resources/golden/join_filters-31-4923f8ba429f0c71ad9b6b9499e73a7f
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-31-4923f8ba429f0c71ad9b6b9499e73a7f
+++ b/sql/hive/src/test/resources/golden/join_filters-31-4923f8ba429f0c71ad9b6b9499e73a7f
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-32-5978cd7936c296493a16a31b926043ab b/sql/hive/src/test/resources/golden/join_filters-32-5978cd7936c296493a16a31b926043ab
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-32-5978cd7936c296493a16a31b926043ab
+++ b/sql/hive/src/test/resources/golden/join_filters-32-5978cd7936c296493a16a31b926043ab
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-33-607d64d50ef9aad424bd22b358efe027 b/sql/hive/src/test/resources/golden/join_filters-33-607d64d50ef9aad424bd22b358efe027
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-33-607d64d50ef9aad424bd22b358efe027
+++ b/sql/hive/src/test/resources/golden/join_filters-33-607d64d50ef9aad424bd22b358efe027
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-34-35c2c8d74bc0ebc4710c81333cb254a9 b/sql/hive/src/test/resources/golden/join_filters-34-35c2c8d74bc0ebc4710c81333cb254a9
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-34-35c2c8d74bc0ebc4710c81333cb254a9
+++ b/sql/hive/src/test/resources/golden/join_filters-34-35c2c8d74bc0ebc4710c81333cb254a9
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-35-a4c7cd87175222bea19fd33018890efe b/sql/hive/src/test/resources/golden/join_filters-35-a4c7cd87175222bea19fd33018890efe
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-35-a4c7cd87175222bea19fd33018890efe
+++ b/sql/hive/src/test/resources/golden/join_filters-35-a4c7cd87175222bea19fd33018890efe
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-36-f086409eb336282af5a00f4c7192ef2b b/sql/hive/src/test/resources/golden/join_filters-36-f086409eb336282af5a00f4c7192ef2b
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-36-f086409eb336282af5a00f4c7192ef2b
+++ b/sql/hive/src/test/resources/golden/join_filters-36-f086409eb336282af5a00f4c7192ef2b
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-37-634888c4fa78ce10181c9514c6393554 b/sql/hive/src/test/resources/golden/join_filters-37-634888c4fa78ce10181c9514c6393554
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-37-634888c4fa78ce10181c9514c6393554
+++ b/sql/hive/src/test/resources/golden/join_filters-37-634888c4fa78ce10181c9514c6393554
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-38-c020d3461658ae8e118281f40264ae5b b/sql/hive/src/test/resources/golden/join_filters-38-c020d3461658ae8e118281f40264ae5b
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-38-c020d3461658ae8e118281f40264ae5b
+++ b/sql/hive/src/test/resources/golden/join_filters-38-c020d3461658ae8e118281f40264ae5b
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-39-c9b79f30e1f25672ec89014f966b41b0 b/sql/hive/src/test/resources/golden/join_filters-39-c9b79f30e1f25672ec89014f966b41b0
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-39-c9b79f30e1f25672ec89014f966b41b0
+++ b/sql/hive/src/test/resources/golden/join_filters-39-c9b79f30e1f25672ec89014f966b41b0
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-4-33bfcd576019d7e32683556f66e3757 b/sql/hive/src/test/resources/golden/join_filters-4-33bfcd576019d7e32683556f66e3757
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-4-33bfcd576019d7e32683556f66e3757
+++ b/sql/hive/src/test/resources/golden/join_filters-4-33bfcd576019d7e32683556f66e3757
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-40-b3d9dcbb7e1caef97646eb89edf82eb b/sql/hive/src/test/resources/golden/join_filters-40-b3d9dcbb7e1caef97646eb89edf82eb
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-40-b3d9dcbb7e1caef97646eb89edf82eb
+++ b/sql/hive/src/test/resources/golden/join_filters-40-b3d9dcbb7e1caef97646eb89edf82eb
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-41-870999978978b22f21997899f1e652b8 b/sql/hive/src/test/resources/golden/join_filters-41-870999978978b22f21997899f1e652b8
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-41-870999978978b22f21997899f1e652b8
+++ b/sql/hive/src/test/resources/golden/join_filters-41-870999978978b22f21997899f1e652b8
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-42-94824a62d882227f270a66ec7ef28cd4 b/sql/hive/src/test/resources/golden/join_filters-42-94824a62d882227f270a66ec7ef28cd4
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-42-94824a62d882227f270a66ec7ef28cd4
+++ b/sql/hive/src/test/resources/golden/join_filters-42-94824a62d882227f270a66ec7ef28cd4
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-43-d793c1514545499f58fb1b355cbd3f0e b/sql/hive/src/test/resources/golden/join_filters-43-d793c1514545499f58fb1b355cbd3f0e
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-43-d793c1514545499f58fb1b355cbd3f0e
+++ b/sql/hive/src/test/resources/golden/join_filters-43-d793c1514545499f58fb1b355cbd3f0e
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-5-760e902318ec521eed07cf23e0f256a2 b/sql/hive/src/test/resources/golden/join_filters-46-268d8fb3cb9b04eb269fe7ec40a24dfe
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-5-760e902318ec521eed07cf23e0f256a2
rename to sql/hive/src/test/resources/golden/join_filters-46-268d8fb3cb9b04eb269fe7ec40a24dfe
diff --git a/sql/hive/src/test/resources/golden/join_filters-46-aa161b0d9fe9d1aad10654fce0e3670b b/sql/hive/src/test/resources/golden/join_filters-46-aa161b0d9fe9d1aad10654fce0e3670b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_filters-47-3c52df82c7d78501610f3f898103f753 b/sql/hive/src/test/resources/golden/join_filters-47-3c52df82c7d78501610f3f898103f753
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-3-f5d86ed3cbc46bb0c7619703081d5873 b/sql/hive/src/test/resources/golden/join_filters-47-6dc6866a65c74d69538b776b41b06c16
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-3-f5d86ed3cbc46bb0c7619703081d5873
rename to sql/hive/src/test/resources/golden/join_filters-47-6dc6866a65c74d69538b776b41b06c16
diff --git a/sql/hive/src/test/resources/golden/join_filters-48-1d85bb008e02ef4025171a4bc0866a6c b/sql/hive/src/test/resources/golden/join_filters-48-1d85bb008e02ef4025171a4bc0866a6c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/join_filters-48-e884480a0f7273d3e2f2de2ba46b855c
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/join_filters-48-e884480a0f7273d3e2f2de2ba46b855c
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/join_filters-49-98fd86aea9cacaa82d43c7468109dd33
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/join_filters-49-98fd86aea9cacaa82d43c7468109dd33
diff --git a/sql/hive/src/test/resources/golden/join_filters-49-e79c906b894fed049ddfab4496a4e3 b/sql/hive/src/test/resources/golden/join_filters-49-e79c906b894fed049ddfab4496a4e3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_filters-5-f0c0d07019afb1bbe162e3183e18023e b/sql/hive/src/test/resources/golden/join_filters-5-f0c0d07019afb1bbe162e3183e18023e
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-5-f0c0d07019afb1bbe162e3183e18023e
+++ b/sql/hive/src/test/resources/golden/join_filters-5-f0c0d07019afb1bbe162e3183e18023e
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-50-3e6612a89e9124592e790594775054b1 b/sql/hive/src/test/resources/golden/join_filters-50-3e6612a89e9124592e790594775054b1
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_filters-50-3e6612a89e9124592e790594775054b1
+++ b/sql/hive/src/test/resources/golden/join_filters-50-3e6612a89e9124592e790594775054b1
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_filters-51-60a5f56f33fc8854a2b687005f0d96ac b/sql/hive/src/test/resources/golden/join_filters-51-60a5f56f33fc8854a2b687005f0d96ac
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_filters-51-60a5f56f33fc8854a2b687005f0d96ac
+++ b/sql/hive/src/test/resources/golden/join_filters-51-60a5f56f33fc8854a2b687005f0d96ac
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_filters-52-64cabe5164130a94f387288f37b62d71 b/sql/hive/src/test/resources/golden/join_filters-52-64cabe5164130a94f387288f37b62d71
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_filters-52-64cabe5164130a94f387288f37b62d71
+++ b/sql/hive/src/test/resources/golden/join_filters-52-64cabe5164130a94f387288f37b62d71
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_filters-53-2709001b4aa57ed01ba975e83b556475 b/sql/hive/src/test/resources/golden/join_filters-53-2709001b4aa57ed01ba975e83b556475
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-53-2709001b4aa57ed01ba975e83b556475
+++ b/sql/hive/src/test/resources/golden/join_filters-53-2709001b4aa57ed01ba975e83b556475
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-54-86868ef0f28c5b42f04fb9ca64aaa3ba b/sql/hive/src/test/resources/golden/join_filters-54-86868ef0f28c5b42f04fb9ca64aaa3ba
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-54-86868ef0f28c5b42f04fb9ca64aaa3ba
+++ b/sql/hive/src/test/resources/golden/join_filters-54-86868ef0f28c5b42f04fb9ca64aaa3ba
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-55-fd3188d025e4c84d45cbb265d08ca569 b/sql/hive/src/test/resources/golden/join_filters-55-fd3188d025e4c84d45cbb265d08ca569
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-55-fd3188d025e4c84d45cbb265d08ca569
+++ b/sql/hive/src/test/resources/golden/join_filters-55-fd3188d025e4c84d45cbb265d08ca569
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-56-a0cd07949ff7dbc2287dc060bedb1942 b/sql/hive/src/test/resources/golden/join_filters-56-a0cd07949ff7dbc2287dc060bedb1942
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-56-a0cd07949ff7dbc2287dc060bedb1942
+++ b/sql/hive/src/test/resources/golden/join_filters-56-a0cd07949ff7dbc2287dc060bedb1942
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-57-48c4978768872751832149d72cdf0ba b/sql/hive/src/test/resources/golden/join_filters-57-48c4978768872751832149d72cdf0ba
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-57-48c4978768872751832149d72cdf0ba
+++ b/sql/hive/src/test/resources/golden/join_filters-57-48c4978768872751832149d72cdf0ba
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-58-58b8db46ea5080791c7416838d3e8f95 b/sql/hive/src/test/resources/golden/join_filters-58-58b8db46ea5080791c7416838d3e8f95
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-58-58b8db46ea5080791c7416838d3e8f95
+++ b/sql/hive/src/test/resources/golden/join_filters-58-58b8db46ea5080791c7416838d3e8f95
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-59-98e45a3167d19d09219076a2d93afa49 b/sql/hive/src/test/resources/golden/join_filters-59-98e45a3167d19d09219076a2d93afa49
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-59-98e45a3167d19d09219076a2d93afa49
+++ b/sql/hive/src/test/resources/golden/join_filters-59-98e45a3167d19d09219076a2d93afa49
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-6-c0c40d001cac0bc91095dddda1513ad9 b/sql/hive/src/test/resources/golden/join_filters-6-c0c40d001cac0bc91095dddda1513ad9
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-6-c0c40d001cac0bc91095dddda1513ad9
+++ b/sql/hive/src/test/resources/golden/join_filters-6-c0c40d001cac0bc91095dddda1513ad9
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-60-bec2950fff2c1b3c4dc5be846653ea6 b/sql/hive/src/test/resources/golden/join_filters-60-bec2950fff2c1b3c4dc5be846653ea6
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-60-bec2950fff2c1b3c4dc5be846653ea6
+++ b/sql/hive/src/test/resources/golden/join_filters-60-bec2950fff2c1b3c4dc5be846653ea6
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-61-ee8471d93c3778f4b48c5c59f473dd35 b/sql/hive/src/test/resources/golden/join_filters-61-ee8471d93c3778f4b48c5c59f473dd35
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-61-ee8471d93c3778f4b48c5c59f473dd35
+++ b/sql/hive/src/test/resources/golden/join_filters-61-ee8471d93c3778f4b48c5c59f473dd35
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-62-9b395d7db8722a467d46588d0f27fc9e b/sql/hive/src/test/resources/golden/join_filters-62-9b395d7db8722a467d46588d0f27fc9e
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-62-9b395d7db8722a467d46588d0f27fc9e
+++ b/sql/hive/src/test/resources/golden/join_filters-62-9b395d7db8722a467d46588d0f27fc9e
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-63-7688784396db55ff148292a9755c918a b/sql/hive/src/test/resources/golden/join_filters-63-7688784396db55ff148292a9755c918a
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-63-7688784396db55ff148292a9755c918a
+++ b/sql/hive/src/test/resources/golden/join_filters-63-7688784396db55ff148292a9755c918a
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-64-6578651ffbf95c0b02825e2125e32709 b/sql/hive/src/test/resources/golden/join_filters-64-6578651ffbf95c0b02825e2125e32709
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-64-6578651ffbf95c0b02825e2125e32709
+++ b/sql/hive/src/test/resources/golden/join_filters-64-6578651ffbf95c0b02825e2125e32709
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-65-6dc4977da7f8dc7c636a03d7ad892ada b/sql/hive/src/test/resources/golden/join_filters-65-6dc4977da7f8dc7c636a03d7ad892ada
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-65-6dc4977da7f8dc7c636a03d7ad892ada
+++ b/sql/hive/src/test/resources/golden/join_filters-65-6dc4977da7f8dc7c636a03d7ad892ada
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-66-64bb9f9a0eef32d2392b80b93c9b2c98 b/sql/hive/src/test/resources/golden/join_filters-66-64bb9f9a0eef32d2392b80b93c9b2c98
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-66-64bb9f9a0eef32d2392b80b93c9b2c98
+++ b/sql/hive/src/test/resources/golden/join_filters-66-64bb9f9a0eef32d2392b80b93c9b2c98
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-67-67fff9e6931a7320444e857e01b3d496 b/sql/hive/src/test/resources/golden/join_filters-67-67fff9e6931a7320444e857e01b3d496
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-67-67fff9e6931a7320444e857e01b3d496
+++ b/sql/hive/src/test/resources/golden/join_filters-67-67fff9e6931a7320444e857e01b3d496
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-68-989b3dbd15ba601ae80fe454e03213d7 b/sql/hive/src/test/resources/golden/join_filters-68-989b3dbd15ba601ae80fe454e03213d7
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-68-989b3dbd15ba601ae80fe454e03213d7
+++ b/sql/hive/src/test/resources/golden/join_filters-68-989b3dbd15ba601ae80fe454e03213d7
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-69-33bfcd576019d7e32683556f66e3757 b/sql/hive/src/test/resources/golden/join_filters-69-33bfcd576019d7e32683556f66e3757
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-69-33bfcd576019d7e32683556f66e3757
+++ b/sql/hive/src/test/resources/golden/join_filters-69-33bfcd576019d7e32683556f66e3757
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-7-89963646509154a2fb1ddbbf1f55349d b/sql/hive/src/test/resources/golden/join_filters-7-89963646509154a2fb1ddbbf1f55349d
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-7-89963646509154a2fb1ddbbf1f55349d
+++ b/sql/hive/src/test/resources/golden/join_filters-7-89963646509154a2fb1ddbbf1f55349d
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-70-f0c0d07019afb1bbe162e3183e18023e b/sql/hive/src/test/resources/golden/join_filters-70-f0c0d07019afb1bbe162e3183e18023e
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-70-f0c0d07019afb1bbe162e3183e18023e
+++ b/sql/hive/src/test/resources/golden/join_filters-70-f0c0d07019afb1bbe162e3183e18023e
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-71-c0c40d001cac0bc91095dddda1513ad9 b/sql/hive/src/test/resources/golden/join_filters-71-c0c40d001cac0bc91095dddda1513ad9
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-71-c0c40d001cac0bc91095dddda1513ad9
+++ b/sql/hive/src/test/resources/golden/join_filters-71-c0c40d001cac0bc91095dddda1513ad9
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-72-89963646509154a2fb1ddbbf1f55349d b/sql/hive/src/test/resources/golden/join_filters-72-89963646509154a2fb1ddbbf1f55349d
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-72-89963646509154a2fb1ddbbf1f55349d
+++ b/sql/hive/src/test/resources/golden/join_filters-72-89963646509154a2fb1ddbbf1f55349d
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-73-69e0235472d7cee7d83037cd083544a5 b/sql/hive/src/test/resources/golden/join_filters-73-69e0235472d7cee7d83037cd083544a5
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-73-69e0235472d7cee7d83037cd083544a5
+++ b/sql/hive/src/test/resources/golden/join_filters-73-69e0235472d7cee7d83037cd083544a5
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-74-b6372cc006844e8488a3b7836c67daaa b/sql/hive/src/test/resources/golden/join_filters-74-b6372cc006844e8488a3b7836c67daaa
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-74-b6372cc006844e8488a3b7836c67daaa
+++ b/sql/hive/src/test/resources/golden/join_filters-74-b6372cc006844e8488a3b7836c67daaa
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-75-f0c5c51de4151a17a77c780be0c13e01 b/sql/hive/src/test/resources/golden/join_filters-75-f0c5c51de4151a17a77c780be0c13e01
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-75-f0c5c51de4151a17a77c780be0c13e01
+++ b/sql/hive/src/test/resources/golden/join_filters-75-f0c5c51de4151a17a77c780be0c13e01
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-76-6fb35e1a65211e68de5df4a1cb9c7117 b/sql/hive/src/test/resources/golden/join_filters-76-6fb35e1a65211e68de5df4a1cb9c7117
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-76-6fb35e1a65211e68de5df4a1cb9c7117
+++ b/sql/hive/src/test/resources/golden/join_filters-76-6fb35e1a65211e68de5df4a1cb9c7117
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-77-4c29d6be8717481332cd1ee7ca17690e b/sql/hive/src/test/resources/golden/join_filters-77-4c29d6be8717481332cd1ee7ca17690e
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-77-4c29d6be8717481332cd1ee7ca17690e
+++ b/sql/hive/src/test/resources/golden/join_filters-77-4c29d6be8717481332cd1ee7ca17690e
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-78-c6a291879bdb37f0c84f6074f257d52 b/sql/hive/src/test/resources/golden/join_filters-78-c6a291879bdb37f0c84f6074f257d52
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-78-c6a291879bdb37f0c84f6074f257d52
+++ b/sql/hive/src/test/resources/golden/join_filters-78-c6a291879bdb37f0c84f6074f257d52
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-79-ef8255dcad808f9128d79e6ee9f368cf b/sql/hive/src/test/resources/golden/join_filters-79-ef8255dcad808f9128d79e6ee9f368cf
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-79-ef8255dcad808f9128d79e6ee9f368cf
+++ b/sql/hive/src/test/resources/golden/join_filters-79-ef8255dcad808f9128d79e6ee9f368cf
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-8-69e0235472d7cee7d83037cd083544a5 b/sql/hive/src/test/resources/golden/join_filters-8-69e0235472d7cee7d83037cd083544a5
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-8-69e0235472d7cee7d83037cd083544a5
+++ b/sql/hive/src/test/resources/golden/join_filters-8-69e0235472d7cee7d83037cd083544a5
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-80-a83678913c62249c8fdf2dac1f6e3046 b/sql/hive/src/test/resources/golden/join_filters-80-a83678913c62249c8fdf2dac1f6e3046
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-80-a83678913c62249c8fdf2dac1f6e3046
+++ b/sql/hive/src/test/resources/golden/join_filters-80-a83678913c62249c8fdf2dac1f6e3046
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-81-a6511579b5e4c2d845664f0c4a6ce622 b/sql/hive/src/test/resources/golden/join_filters-81-a6511579b5e4c2d845664f0c4a6ce622
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-81-a6511579b5e4c2d845664f0c4a6ce622
+++ b/sql/hive/src/test/resources/golden/join_filters-81-a6511579b5e4c2d845664f0c4a6ce622
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-82-c22aab929016fa6f4b6d1e14cc3a4940 b/sql/hive/src/test/resources/golden/join_filters-82-c22aab929016fa6f4b6d1e14cc3a4940
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-82-c22aab929016fa6f4b6d1e14cc3a4940
+++ b/sql/hive/src/test/resources/golden/join_filters-82-c22aab929016fa6f4b6d1e14cc3a4940
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-83-8ef51af7455eaeeaefff1c98e16dcc65 b/sql/hive/src/test/resources/golden/join_filters-83-8ef51af7455eaeeaefff1c98e16dcc65
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-83-8ef51af7455eaeeaefff1c98e16dcc65
+++ b/sql/hive/src/test/resources/golden/join_filters-83-8ef51af7455eaeeaefff1c98e16dcc65
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-84-e164192e6538b428c8b53e008a978d3f b/sql/hive/src/test/resources/golden/join_filters-84-e164192e6538b428c8b53e008a978d3f
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-84-e164192e6538b428c8b53e008a978d3f
+++ b/sql/hive/src/test/resources/golden/join_filters-84-e164192e6538b428c8b53e008a978d3f
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-85-7a5da20822bf51ed69ccf640cbb816cf b/sql/hive/src/test/resources/golden/join_filters-85-7a5da20822bf51ed69ccf640cbb816cf
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-85-7a5da20822bf51ed69ccf640cbb816cf
+++ b/sql/hive/src/test/resources/golden/join_filters-85-7a5da20822bf51ed69ccf640cbb816cf
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-86-13d6d5335625fc3386a8011dc557002e b/sql/hive/src/test/resources/golden/join_filters-86-13d6d5335625fc3386a8011dc557002e
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-86-13d6d5335625fc3386a8011dc557002e
+++ b/sql/hive/src/test/resources/golden/join_filters-86-13d6d5335625fc3386a8011dc557002e
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-87-f12ffd6cc9213d9c64cbea1b6db0632e b/sql/hive/src/test/resources/golden/join_filters-87-f12ffd6cc9213d9c64cbea1b6db0632e
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-87-f12ffd6cc9213d9c64cbea1b6db0632e
+++ b/sql/hive/src/test/resources/golden/join_filters-87-f12ffd6cc9213d9c64cbea1b6db0632e
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-88-a800b885358695758afdb719cdefa94f b/sql/hive/src/test/resources/golden/join_filters-88-a800b885358695758afdb719cdefa94f
index ea646edf6d143..04ecda59b21bd 100644
--- a/sql/hive/src/test/resources/golden/join_filters-88-a800b885358695758afdb719cdefa94f
+++ b/sql/hive/src/test/resources/golden/join_filters-88-a800b885358695758afdb719cdefa94f
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40	NULL	NULL
 NULL	NULL	12	35	NULL	NULL
 NULL	NULL	48	NULL	NULL	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-89-df3db5a91f3f4e88e18d2b1cc5b47113 b/sql/hive/src/test/resources/golden/join_filters-89-df3db5a91f3f4e88e18d2b1cc5b47113
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-89-df3db5a91f3f4e88e18d2b1cc5b47113
+++ b/sql/hive/src/test/resources/golden/join_filters-89-df3db5a91f3f4e88e18d2b1cc5b47113
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-9-b6372cc006844e8488a3b7836c67daaa b/sql/hive/src/test/resources/golden/join_filters-9-b6372cc006844e8488a3b7836c67daaa
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-9-b6372cc006844e8488a3b7836c67daaa
+++ b/sql/hive/src/test/resources/golden/join_filters-9-b6372cc006844e8488a3b7836c67daaa
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-90-435ecfc7f9cb5f32019fe498c21ccad3 b/sql/hive/src/test/resources/golden/join_filters-90-435ecfc7f9cb5f32019fe498c21ccad3
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-90-435ecfc7f9cb5f32019fe498c21ccad3
+++ b/sql/hive/src/test/resources/golden/join_filters-90-435ecfc7f9cb5f32019fe498c21ccad3
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-91-da36fab3d1686aedd415a7a0f0253eca b/sql/hive/src/test/resources/golden/join_filters-91-da36fab3d1686aedd415a7a0f0253eca
index ea646edf6d143..04ecda59b21bd 100644
--- a/sql/hive/src/test/resources/golden/join_filters-91-da36fab3d1686aedd415a7a0f0253eca
+++ b/sql/hive/src/test/resources/golden/join_filters-91-da36fab3d1686aedd415a7a0f0253eca
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40	NULL	NULL
 NULL	NULL	12	35	NULL	NULL
 NULL	NULL	48	NULL	NULL	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-92-5f4a5437696f2a60bec9ac1443244242 b/sql/hive/src/test/resources/golden/join_filters-92-5f4a5437696f2a60bec9ac1443244242
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-92-5f4a5437696f2a60bec9ac1443244242
+++ b/sql/hive/src/test/resources/golden/join_filters-92-5f4a5437696f2a60bec9ac1443244242
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-93-2acf41a9f6efac0d800df557db716359 b/sql/hive/src/test/resources/golden/join_filters-93-2acf41a9f6efac0d800df557db716359
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-93-2acf41a9f6efac0d800df557db716359
+++ b/sql/hive/src/test/resources/golden/join_filters-93-2acf41a9f6efac0d800df557db716359
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-94-cf5fef4ddc313209f5ab1e5ea3763e35 b/sql/hive/src/test/resources/golden/join_filters-94-cf5fef4ddc313209f5ab1e5ea3763e35
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-94-cf5fef4ddc313209f5ab1e5ea3763e35
+++ b/sql/hive/src/test/resources/golden/join_filters-94-cf5fef4ddc313209f5ab1e5ea3763e35
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-95-97f94f35ebc30f776a19bed59c7fb4bf b/sql/hive/src/test/resources/golden/join_filters-95-97f94f35ebc30f776a19bed59c7fb4bf
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-95-97f94f35ebc30f776a19bed59c7fb4bf
+++ b/sql/hive/src/test/resources/golden/join_filters-95-97f94f35ebc30f776a19bed59c7fb4bf
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-96-4923f8ba429f0c71ad9b6b9499e73a7f b/sql/hive/src/test/resources/golden/join_filters-96-4923f8ba429f0c71ad9b6b9499e73a7f
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-96-4923f8ba429f0c71ad9b6b9499e73a7f
+++ b/sql/hive/src/test/resources/golden/join_filters-96-4923f8ba429f0c71ad9b6b9499e73a7f
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-97-5978cd7936c296493a16a31b926043ab b/sql/hive/src/test/resources/golden/join_filters-97-5978cd7936c296493a16a31b926043ab
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-97-5978cd7936c296493a16a31b926043ab
+++ b/sql/hive/src/test/resources/golden/join_filters-97-5978cd7936c296493a16a31b926043ab
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-98-607d64d50ef9aad424bd22b358efe027 b/sql/hive/src/test/resources/golden/join_filters-98-607d64d50ef9aad424bd22b358efe027
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-98-607d64d50ef9aad424bd22b358efe027
+++ b/sql/hive/src/test/resources/golden/join_filters-98-607d64d50ef9aad424bd22b358efe027
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-99-35c2c8d74bc0ebc4710c81333cb254a9 b/sql/hive/src/test/resources/golden/join_filters-99-35c2c8d74bc0ebc4710c81333cb254a9
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-99-35c2c8d74bc0ebc4710c81333cb254a9
+++ b/sql/hive/src/test/resources/golden/join_filters-99-35c2c8d74bc0ebc4710c81333cb254a9
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters_overlap-0-990e447b6447ced0d9684eb7db9e63ce b/sql/hive/src/test/resources/golden/join_filters_overlap-0-990e447b6447ced0d9684eb7db9e63ce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_filters_overlap-1-a7336cd2d643f92715b42f9c6eb2c108 b/sql/hive/src/test/resources/golden/join_filters_overlap-1-a7336cd2d643f92715b42f9c6eb2c108
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_hive_626-3-2609f2809e0c6ae1ca93806e37960990 b/sql/hive/src/test/resources/golden/join_hive_626-3-2609f2809e0c6ae1ca93806e37960990
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/join_hive_626-3-4a2f2f2858540afea9a195b5322941ee
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/join_hive_626-3-4a2f2f2858540afea9a195b5322941ee
diff --git a/sql/hive/src/test/resources/golden/join_hive_626-4-387e3bc517b49d4e1c9752c07b72b790 b/sql/hive/src/test/resources/golden/join_hive_626-4-387e3bc517b49d4e1c9752c07b72b790
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-3-f5d86ed3cbc46bb0c7619703081d5873 b/sql/hive/src/test/resources/golden/join_hive_626-4-4bb73b33747da4ed852df381b7b45a71
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-3-f5d86ed3cbc46bb0c7619703081d5873
rename to sql/hive/src/test/resources/golden/join_hive_626-4-4bb73b33747da4ed852df381b7b45a71
diff --git a/sql/hive/src/test/resources/golden/join_hive_626-5-a0eb25c15b6ca1a04da14781b1213e11 b/sql/hive/src/test/resources/golden/join_hive_626-5-a0eb25c15b6ca1a04da14781b1213e11
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/join_hive_626-5-c6a3ae6f3539ab48b996060fb51d8ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/join_hive_626-5-c6a3ae6f3539ab48b996060fb51d8ebe
diff --git a/sql/hive/src/test/resources/golden/join_hive_626-7-d9c817eebc533bedc3ef9172d325a2c2 b/sql/hive/src/test/resources/golden/join_hive_626-7-d9c817eebc533bedc3ef9172d325a2c2
index e3143e49c2bad..8b00ef084005b 100644
--- a/sql/hive/src/test/resources/golden/join_hive_626-7-d9c817eebc533bedc3ef9172d325a2c2
+++ b/sql/hive/src/test/resources/golden/join_hive_626-7-d9c817eebc533bedc3ef9172d325a2c2
@@ -1 +1 @@
-foo1	bar10	2
\ No newline at end of file
+foo1	bar10	2
diff --git a/sql/hive/src/test/resources/golden/join_map_ppr-10-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join_map_ppr-10-c9859bf9c9942c59f3b19d82bd1a3afa
index 4c88927e56579..120056ea10c60 100644
--- a/sql/hive/src/test/resources/golden/join_map_ppr-10-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join_map_ppr-10-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -104,4 +104,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join_map_ppr-3-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join_map_ppr-3-c9859bf9c9942c59f3b19d82bd1a3afa
index 4c88927e56579..120056ea10c60 100644
--- a/sql/hive/src/test/resources/golden/join_map_ppr-3-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join_map_ppr-3-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -104,4 +104,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-4-7accb0b0e00dcfd6468a6ff6058bb4e8 b/sql/hive/src/test/resources/golden/join_nulls-1-75b1f5331b62fedb7dbbe6ac93a3c83f
similarity index 100%
rename from sql/hive/src/test/resources/golden/filter_join_breaktask2-4-7accb0b0e00dcfd6468a6ff6058bb4e8
rename to sql/hive/src/test/resources/golden/join_nulls-1-75b1f5331b62fedb7dbbe6ac93a3c83f
diff --git a/sql/hive/src/test/resources/golden/join_nulls-1-97641998eb9ddf2dff56de6758486aa0 b/sql/hive/src/test/resources/golden/join_nulls-1-97641998eb9ddf2dff56de6758486aa0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_nulls-10-39071d8e6b246cfd405714dbf0b5337b b/sql/hive/src/test/resources/golden/join_nulls-10-39071d8e6b246cfd405714dbf0b5337b
index 4bc2d3969f17f..610f43b2ac6dc 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-10-39071d8e6b246cfd405714dbf0b5337b
+++ b/sql/hive/src/test/resources/golden/join_nulls-10-39071d8e6b246cfd405714dbf0b5337b
@@ -1,3 +1,3 @@
 NULL	35	NULL	35
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-11-545dff878ea0d79cdfee517572c9e0c8 b/sql/hive/src/test/resources/golden/join_nulls-11-545dff878ea0d79cdfee517572c9e0c8
index 8414cfbede40a..19621e45f3758 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-11-545dff878ea0d79cdfee517572c9e0c8
+++ b/sql/hive/src/test/resources/golden/join_nulls-11-545dff878ea0d79cdfee517572c9e0c8
@@ -1,3 +1,3 @@
 NULL	35	NULL	NULL
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-12-5e1ca8627aa685435142d8e339e77062 b/sql/hive/src/test/resources/golden/join_nulls-12-5e1ca8627aa685435142d8e339e77062
index c11ae6cce7abb..b512581d48dfe 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-12-5e1ca8627aa685435142d8e339e77062
+++ b/sql/hive/src/test/resources/golden/join_nulls-12-5e1ca8627aa685435142d8e339e77062
@@ -1,3 +1,3 @@
 NULL	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-13-5f670a20cbec42a34685ee0398ad7e2d b/sql/hive/src/test/resources/golden/join_nulls-13-5f670a20cbec42a34685ee0398ad7e2d
index e28e1b1e5e8b1..ee5b5e1ec3096 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-13-5f670a20cbec42a34685ee0398ad7e2d
+++ b/sql/hive/src/test/resources/golden/join_nulls-13-5f670a20cbec42a34685ee0398ad7e2d
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-14-e97ba69145da387a4a66635b8499077 b/sql/hive/src/test/resources/golden/join_nulls-14-e97ba69145da387a4a66635b8499077
index 85192b86801e2..9a862c2d08e7c 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-14-e97ba69145da387a4a66635b8499077
+++ b/sql/hive/src/test/resources/golden/join_nulls-14-e97ba69145da387a4a66635b8499077
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	35
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-15-e9f9be8be6e936fb832483dfa0be5126 b/sql/hive/src/test/resources/golden/join_nulls-15-e9f9be8be6e936fb832483dfa0be5126
index c19237d5d4309..5d0c8eb89e421 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-15-e9f9be8be6e936fb832483dfa0be5126
+++ b/sql/hive/src/test/resources/golden/join_nulls-15-e9f9be8be6e936fb832483dfa0be5126
@@ -1,3 +1,3 @@
 NULL	NULL	48	NULL
 NULL	35	NULL	35
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-16-b000ccd6c2c05af1875e21428aa3d9b9 b/sql/hive/src/test/resources/golden/join_nulls-16-b000ccd6c2c05af1875e21428aa3d9b9
index e28e1b1e5e8b1..ee5b5e1ec3096 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-16-b000ccd6c2c05af1875e21428aa3d9b9
+++ b/sql/hive/src/test/resources/golden/join_nulls-16-b000ccd6c2c05af1875e21428aa3d9b9
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-17-2b0bfe5754456475ceb6af4694165f b/sql/hive/src/test/resources/golden/join_nulls-17-2b0bfe5754456475ceb6af4694165f
index 56ca29ffe263c..25db723b06378 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-17-2b0bfe5754456475ceb6af4694165f
+++ b/sql/hive/src/test/resources/golden/join_nulls-17-2b0bfe5754456475ceb6af4694165f
@@ -2,4 +2,4 @@ NULL	NULL	NULL	35
 NULL	NULL	48	NULL
 NULL	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-18-321cf9d31dac835c3def6ca3b3b860a2 b/sql/hive/src/test/resources/golden/join_nulls-18-321cf9d31dac835c3def6ca3b3b860a2
index 92e228d381628..810287110dc40 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-18-321cf9d31dac835c3def6ca3b3b860a2
+++ b/sql/hive/src/test/resources/golden/join_nulls-18-321cf9d31dac835c3def6ca3b3b860a2
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	35
 NULL	35	NULL	NULL
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-19-739bf8e440e698540d18c29226c3564c b/sql/hive/src/test/resources/golden/join_nulls-19-739bf8e440e698540d18c29226c3564c
index f57bbd5152852..115f8d9531800 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-19-739bf8e440e698540d18c29226c3564c
+++ b/sql/hive/src/test/resources/golden/join_nulls-19-739bf8e440e698540d18c29226c3564c
@@ -1,4 +1,4 @@
 NULL	NULL	48	NULL
 NULL	35	NULL	35
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-2-f4b71841cfff5294bc1e5ac163eadbe5 b/sql/hive/src/test/resources/golden/join_nulls-2-f4b71841cfff5294bc1e5ac163eadbe5
index 0d14bf4e38509..bb1fee6e62e99 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-2-f4b71841cfff5294bc1e5ac163eadbe5
+++ b/sql/hive/src/test/resources/golden/join_nulls-2-f4b71841cfff5294bc1e5ac163eadbe5
@@ -6,4 +6,4 @@ NULL	35	100	100
 48	NULL	100	100
 100	100	NULL	35
 100	100	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-20-fff960f1cb832bc93e3d1de519e573d5 b/sql/hive/src/test/resources/golden/join_nulls-20-fff960f1cb832bc93e3d1de519e573d5
index 56ca29ffe263c..25db723b06378 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-20-fff960f1cb832bc93e3d1de519e573d5
+++ b/sql/hive/src/test/resources/golden/join_nulls-20-fff960f1cb832bc93e3d1de519e573d5
@@ -2,4 +2,4 @@ NULL	NULL	NULL	35
 NULL	NULL	48	NULL
 NULL	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-21-62a25fb819ae5c1ea757b6e759082a2e b/sql/hive/src/test/resources/golden/join_nulls-21-62a25fb819ae5c1ea757b6e759082a2e
index 0db9ffd61a3ad..9bca4244a2eb4 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-21-62a25fb819ae5c1ea757b6e759082a2e
+++ b/sql/hive/src/test/resources/golden/join_nulls-21-62a25fb819ae5c1ea757b6e759082a2e
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	NULL	48	NULL
 NULL	35	NULL	35	NULL	35
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-22-5b2df5518994ae86c041484561857da0 b/sql/hive/src/test/resources/golden/join_nulls-22-5b2df5518994ae86c041484561857da0
index ed1b35fbe80ae..f7ff69b919598 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-22-5b2df5518994ae86c041484561857da0
+++ b/sql/hive/src/test/resources/golden/join_nulls-22-5b2df5518994ae86c041484561857da0
@@ -1,3 +1,3 @@
 NULL	NULL	48	NULL	NULL	NULL
 NULL	35	NULL	35	NULL	35
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-23-982c3e55235cafa3d89b5dee4366fdf8 b/sql/hive/src/test/resources/golden/join_nulls-23-982c3e55235cafa3d89b5dee4366fdf8
index 0db9ffd61a3ad..9bca4244a2eb4 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-23-982c3e55235cafa3d89b5dee4366fdf8
+++ b/sql/hive/src/test/resources/golden/join_nulls-23-982c3e55235cafa3d89b5dee4366fdf8
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	NULL	48	NULL
 NULL	35	NULL	35	NULL	35
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-24-86ad66543a735d396f7336cb5bdfa495 b/sql/hive/src/test/resources/golden/join_nulls-24-86ad66543a735d396f7336cb5bdfa495
index 0d14bf4e38509..bb1fee6e62e99 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-24-86ad66543a735d396f7336cb5bdfa495
+++ b/sql/hive/src/test/resources/golden/join_nulls-24-86ad66543a735d396f7336cb5bdfa495
@@ -6,4 +6,4 @@ NULL	35	100	100
 48	NULL	100	100
 100	100	NULL	35
 100	100	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-25-45bd90c71bc44c2accf0009f1ceb5f56 b/sql/hive/src/test/resources/golden/join_nulls-25-45bd90c71bc44c2accf0009f1ceb5f56
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-25-45bd90c71bc44c2accf0009f1ceb5f56
+++ b/sql/hive/src/test/resources/golden/join_nulls-25-45bd90c71bc44c2accf0009f1ceb5f56
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-26-3fb736c1d060bbf9ec7ce58be4531dc9 b/sql/hive/src/test/resources/golden/join_nulls-26-3fb736c1d060bbf9ec7ce58be4531dc9
index bdd09da460bbe..309eec4c7d309 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-26-3fb736c1d060bbf9ec7ce58be4531dc9
+++ b/sql/hive/src/test/resources/golden/join_nulls-26-3fb736c1d060bbf9ec7ce58be4531dc9
@@ -1,2 +1,2 @@
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-27-6ee7affed896b1c539628ab081842b83 b/sql/hive/src/test/resources/golden/join_nulls-27-6ee7affed896b1c539628ab081842b83
index dfe4f4318fc66..96e8fb0241578 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-27-6ee7affed896b1c539628ab081842b83
+++ b/sql/hive/src/test/resources/golden/join_nulls-27-6ee7affed896b1c539628ab081842b83
@@ -1,2 +1,2 @@
 NULL	35	NULL	35
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-28-455aace3472c5840a885b6fab6a046cb b/sql/hive/src/test/resources/golden/join_nulls-28-455aace3472c5840a885b6fab6a046cb
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-28-455aace3472c5840a885b6fab6a046cb
+++ b/sql/hive/src/test/resources/golden/join_nulls-28-455aace3472c5840a885b6fab6a046cb
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-29-2c278a88713aef5cd30ff3720ef3eeeb b/sql/hive/src/test/resources/golden/join_nulls-29-2c278a88713aef5cd30ff3720ef3eeeb
index bdd09da460bbe..309eec4c7d309 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-29-2c278a88713aef5cd30ff3720ef3eeeb
+++ b/sql/hive/src/test/resources/golden/join_nulls-29-2c278a88713aef5cd30ff3720ef3eeeb
@@ -1,2 +1,2 @@
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-3-88d2da3bcb2c8b04df1b6f18ff2226c4 b/sql/hive/src/test/resources/golden/join_nulls-3-88d2da3bcb2c8b04df1b6f18ff2226c4
index 0d14bf4e38509..bb1fee6e62e99 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-3-88d2da3bcb2c8b04df1b6f18ff2226c4
+++ b/sql/hive/src/test/resources/golden/join_nulls-3-88d2da3bcb2c8b04df1b6f18ff2226c4
@@ -6,4 +6,4 @@ NULL	35	100	100
 48	NULL	100	100
 100	100	NULL	35
 100	100	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-30-2c0c41da38714d1b16feffb00fa08bb1 b/sql/hive/src/test/resources/golden/join_nulls-30-2c0c41da38714d1b16feffb00fa08bb1
index dfe4f4318fc66..96e8fb0241578 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-30-2c0c41da38714d1b16feffb00fa08bb1
+++ b/sql/hive/src/test/resources/golden/join_nulls-30-2c0c41da38714d1b16feffb00fa08bb1
@@ -1,2 +1,2 @@
 NULL	35	NULL	35
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-31-a33c48d38817ee3a7aca511dc7793486 b/sql/hive/src/test/resources/golden/join_nulls-31-a33c48d38817ee3a7aca511dc7793486
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-31-a33c48d38817ee3a7aca511dc7793486
+++ b/sql/hive/src/test/resources/golden/join_nulls-31-a33c48d38817ee3a7aca511dc7793486
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-32-e6b104ae96622ff75d510efc6efc9352 b/sql/hive/src/test/resources/golden/join_nulls-32-e6b104ae96622ff75d510efc6efc9352
index c11ae6cce7abb..b512581d48dfe 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-32-e6b104ae96622ff75d510efc6efc9352
+++ b/sql/hive/src/test/resources/golden/join_nulls-32-e6b104ae96622ff75d510efc6efc9352
@@ -1,3 +1,3 @@
 NULL	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-33-1284a11bf6aeef8ff87b471d41985f26 b/sql/hive/src/test/resources/golden/join_nulls-33-1284a11bf6aeef8ff87b471d41985f26
index 8414cfbede40a..19621e45f3758 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-33-1284a11bf6aeef8ff87b471d41985f26
+++ b/sql/hive/src/test/resources/golden/join_nulls-33-1284a11bf6aeef8ff87b471d41985f26
@@ -1,3 +1,3 @@
 NULL	35	NULL	NULL
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-34-aeb90811861431cadc5512637793afc1 b/sql/hive/src/test/resources/golden/join_nulls-34-aeb90811861431cadc5512637793afc1
index 4bc2d3969f17f..610f43b2ac6dc 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-34-aeb90811861431cadc5512637793afc1
+++ b/sql/hive/src/test/resources/golden/join_nulls-34-aeb90811861431cadc5512637793afc1
@@ -1,3 +1,3 @@
 NULL	35	NULL	35
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-35-2d1d18d4e9775ec69333947fbd62bc82 b/sql/hive/src/test/resources/golden/join_nulls-35-2d1d18d4e9775ec69333947fbd62bc82
index e28e1b1e5e8b1..ee5b5e1ec3096 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-35-2d1d18d4e9775ec69333947fbd62bc82
+++ b/sql/hive/src/test/resources/golden/join_nulls-35-2d1d18d4e9775ec69333947fbd62bc82
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-36-7c029c91141b36af79ba0dc1de73a257 b/sql/hive/src/test/resources/golden/join_nulls-36-7c029c91141b36af79ba0dc1de73a257
index 85192b86801e2..9a862c2d08e7c 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-36-7c029c91141b36af79ba0dc1de73a257
+++ b/sql/hive/src/test/resources/golden/join_nulls-36-7c029c91141b36af79ba0dc1de73a257
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	35
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-37-fa84731f5a6beec20d64a7981815b9bc b/sql/hive/src/test/resources/golden/join_nulls-37-fa84731f5a6beec20d64a7981815b9bc
index c19237d5d4309..5d0c8eb89e421 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-37-fa84731f5a6beec20d64a7981815b9bc
+++ b/sql/hive/src/test/resources/golden/join_nulls-37-fa84731f5a6beec20d64a7981815b9bc
@@ -1,3 +1,3 @@
 NULL	NULL	48	NULL
 NULL	35	NULL	35
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-4-a1f20b4863428627bae1316755cc2d94 b/sql/hive/src/test/resources/golden/join_nulls-4-a1f20b4863428627bae1316755cc2d94
index 0d14bf4e38509..bb1fee6e62e99 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-4-a1f20b4863428627bae1316755cc2d94
+++ b/sql/hive/src/test/resources/golden/join_nulls-4-a1f20b4863428627bae1316755cc2d94
@@ -6,4 +6,4 @@ NULL	35	100	100
 48	NULL	100	100
 100	100	NULL	35
 100	100	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-5-25f891cf5e9138c14ba8c385c83230ba b/sql/hive/src/test/resources/golden/join_nulls-40-268d8fb3cb9b04eb269fe7ec40a24dfe
similarity index 100%
rename from sql/hive/src/test/resources/golden/filter_join_breaktask2-5-25f891cf5e9138c14ba8c385c83230ba
rename to sql/hive/src/test/resources/golden/join_nulls-40-268d8fb3cb9b04eb269fe7ec40a24dfe
diff --git a/sql/hive/src/test/resources/golden/join_nulls-40-aa161b0d9fe9d1aad10654fce0e3670b b/sql/hive/src/test/resources/golden/join_nulls-40-aa161b0d9fe9d1aad10654fce0e3670b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_nulls-41-3c52df82c7d78501610f3f898103f753 b/sql/hive/src/test/resources/golden/join_nulls-41-3c52df82c7d78501610f3f898103f753
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-6-4aaf479be27e3f8b38465ea946b530e3 b/sql/hive/src/test/resources/golden/join_nulls-41-6dc6866a65c74d69538b776b41b06c16
similarity index 100%
rename from sql/hive/src/test/resources/golden/filter_join_breaktask2-6-4aaf479be27e3f8b38465ea946b530e3
rename to sql/hive/src/test/resources/golden/join_nulls-41-6dc6866a65c74d69538b776b41b06c16
diff --git a/sql/hive/src/test/resources/golden/join_nulls-42-1d85bb008e02ef4025171a4bc0866a6c b/sql/hive/src/test/resources/golden/join_nulls-42-1d85bb008e02ef4025171a4bc0866a6c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby1_limit-3-deff149e2f91d6d605a3dccf1ca8bdd5 b/sql/hive/src/test/resources/golden/join_nulls-42-e884480a0f7273d3e2f2de2ba46b855c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby1_limit-3-deff149e2f91d6d605a3dccf1ca8bdd5
rename to sql/hive/src/test/resources/golden/join_nulls-42-e884480a0f7273d3e2f2de2ba46b855c
diff --git a/sql/hive/src/test/resources/golden/groupby2_limit-1-d6db5e2b44be5a3927eab70e4cf60c70 b/sql/hive/src/test/resources/golden/join_nulls-43-98fd86aea9cacaa82d43c7468109dd33
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_limit-1-d6db5e2b44be5a3927eab70e4cf60c70
rename to sql/hive/src/test/resources/golden/join_nulls-43-98fd86aea9cacaa82d43c7468109dd33
diff --git a/sql/hive/src/test/resources/golden/join_nulls-43-e79c906b894fed049ddfab4496a4e3 b/sql/hive/src/test/resources/golden/join_nulls-43-e79c906b894fed049ddfab4496a4e3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_nulls-44-2db30531137611e06fdba478ca7a8412 b/sql/hive/src/test/resources/golden/join_nulls-44-2db30531137611e06fdba478ca7a8412
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-44-2db30531137611e06fdba478ca7a8412
+++ b/sql/hive/src/test/resources/golden/join_nulls-44-2db30531137611e06fdba478ca7a8412
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_nulls-45-e58b2754e8d9c56a473557a549d0d2b9 b/sql/hive/src/test/resources/golden/join_nulls-45-e58b2754e8d9c56a473557a549d0d2b9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-45-e58b2754e8d9c56a473557a549d0d2b9
+++ b/sql/hive/src/test/resources/golden/join_nulls-45-e58b2754e8d9c56a473557a549d0d2b9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_nulls-46-64cabe5164130a94f387288f37b62d71 b/sql/hive/src/test/resources/golden/join_nulls-46-64cabe5164130a94f387288f37b62d71
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-46-64cabe5164130a94f387288f37b62d71
+++ b/sql/hive/src/test/resources/golden/join_nulls-46-64cabe5164130a94f387288f37b62d71
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_nulls-47-ebf794e8b51be738e2d664f249869de1 b/sql/hive/src/test/resources/golden/join_nulls-47-ebf794e8b51be738e2d664f249869de1
index 7d9efd15fa287..c823f539ee328 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-47-ebf794e8b51be738e2d664f249869de1
+++ b/sql/hive/src/test/resources/golden/join_nulls-47-ebf794e8b51be738e2d664f249869de1
@@ -1,4 +1,4 @@
 48	NULL	48	NULL
 100	100	100	100
 148	NULL	148	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-48-979c3ee0ee49bfd246d5372b8873fd3 b/sql/hive/src/test/resources/golden/join_nulls-48-979c3ee0ee49bfd246d5372b8873fd3
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-48-979c3ee0ee49bfd246d5372b8873fd3
+++ b/sql/hive/src/test/resources/golden/join_nulls-48-979c3ee0ee49bfd246d5372b8873fd3
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-49-2d20d8f4221281a6b1cd579e761f81b7 b/sql/hive/src/test/resources/golden/join_nulls-49-2d20d8f4221281a6b1cd579e761f81b7
index 4d2ff6a237416..2b06d1c29d984 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-49-2d20d8f4221281a6b1cd579e761f81b7
+++ b/sql/hive/src/test/resources/golden/join_nulls-49-2d20d8f4221281a6b1cd579e761f81b7
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 48	NULL	48	NULL
 100	100	100	100
 148	NULL	148	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-5-5ec6e3df7181e0738244623cc01bf22c b/sql/hive/src/test/resources/golden/join_nulls-5-5ec6e3df7181e0738244623cc01bf22c
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-5-5ec6e3df7181e0738244623cc01bf22c
+++ b/sql/hive/src/test/resources/golden/join_nulls-5-5ec6e3df7181e0738244623cc01bf22c
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-50-8b26343e74c161cf74ece5f0bd21470 b/sql/hive/src/test/resources/golden/join_nulls-50-8b26343e74c161cf74ece5f0bd21470
index 7d9efd15fa287..c823f539ee328 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-50-8b26343e74c161cf74ece5f0bd21470
+++ b/sql/hive/src/test/resources/golden/join_nulls-50-8b26343e74c161cf74ece5f0bd21470
@@ -1,4 +1,4 @@
 48	NULL	48	NULL
 100	100	100	100
 148	NULL	148	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-51-75339d2eb2afabf5dd088074b2563d8f b/sql/hive/src/test/resources/golden/join_nulls-51-75339d2eb2afabf5dd088074b2563d8f
index 695bb1544ec89..06f37e109247c 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-51-75339d2eb2afabf5dd088074b2563d8f
+++ b/sql/hive/src/test/resources/golden/join_nulls-51-75339d2eb2afabf5dd088074b2563d8f
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	48	NULL
 100	100	100	100
 148	NULL	148	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-52-caad1db99085760daaf8f96c0ce5564 b/sql/hive/src/test/resources/golden/join_nulls-52-caad1db99085760daaf8f96c0ce5564
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-52-caad1db99085760daaf8f96c0ce5564
+++ b/sql/hive/src/test/resources/golden/join_nulls-52-caad1db99085760daaf8f96c0ce5564
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-53-ec965e66e7fad403cd0ea388c3e70363 b/sql/hive/src/test/resources/golden/join_nulls-53-ec965e66e7fad403cd0ea388c3e70363
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-53-ec965e66e7fad403cd0ea388c3e70363
+++ b/sql/hive/src/test/resources/golden/join_nulls-53-ec965e66e7fad403cd0ea388c3e70363
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-54-6f15c794b5587430ebb685cc61c502 b/sql/hive/src/test/resources/golden/join_nulls-54-6f15c794b5587430ebb685cc61c502
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-54-6f15c794b5587430ebb685cc61c502
+++ b/sql/hive/src/test/resources/golden/join_nulls-54-6f15c794b5587430ebb685cc61c502
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-55-a1c73326f8c8d9beccda3ba365352564 b/sql/hive/src/test/resources/golden/join_nulls-55-a1c73326f8c8d9beccda3ba365352564
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-55-a1c73326f8c8d9beccda3ba365352564
+++ b/sql/hive/src/test/resources/golden/join_nulls-55-a1c73326f8c8d9beccda3ba365352564
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-56-f7b9629093c818b6c384c79d1458d178 b/sql/hive/src/test/resources/golden/join_nulls-56-f7b9629093c818b6c384c79d1458d178
index 86b3dab6bffc8..88b1f5331b84e 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-56-f7b9629093c818b6c384c79d1458d178
+++ b/sql/hive/src/test/resources/golden/join_nulls-56-f7b9629093c818b6c384c79d1458d178
@@ -1,4 +1,4 @@
 NULL	35	NULL	35
 NULL	135	NULL	135
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-57-cf353446d7f358a508f17d0984b90158 b/sql/hive/src/test/resources/golden/join_nulls-57-cf353446d7f358a508f17d0984b90158
index 169df31887b96..b060f40ffdf48 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-57-cf353446d7f358a508f17d0984b90158
+++ b/sql/hive/src/test/resources/golden/join_nulls-57-cf353446d7f358a508f17d0984b90158
@@ -3,4 +3,4 @@ NULL	NULL	148	NULL
 NULL	35	NULL	35
 NULL	135	NULL	135
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-58-5f9a59160a76f9b649102a97987ed33a b/sql/hive/src/test/resources/golden/join_nulls-58-5f9a59160a76f9b649102a97987ed33a
index 86b3dab6bffc8..88b1f5331b84e 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-58-5f9a59160a76f9b649102a97987ed33a
+++ b/sql/hive/src/test/resources/golden/join_nulls-58-5f9a59160a76f9b649102a97987ed33a
@@ -1,4 +1,4 @@
 NULL	35	NULL	35
 NULL	135	NULL	135
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-59-8753a39412ac59c7a05951aeeea73b24 b/sql/hive/src/test/resources/golden/join_nulls-59-8753a39412ac59c7a05951aeeea73b24
index bdd5dcf63fd00..1256e9451914b 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-59-8753a39412ac59c7a05951aeeea73b24
+++ b/sql/hive/src/test/resources/golden/join_nulls-59-8753a39412ac59c7a05951aeeea73b24
@@ -3,4 +3,4 @@ NULL	135	NULL	135
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-6-7eea211c80e7f1146098e80ffb890d67 b/sql/hive/src/test/resources/golden/join_nulls-6-7eea211c80e7f1146098e80ffb890d67
index bdd09da460bbe..309eec4c7d309 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-6-7eea211c80e7f1146098e80ffb890d67
+++ b/sql/hive/src/test/resources/golden/join_nulls-6-7eea211c80e7f1146098e80ffb890d67
@@ -1,2 +1,2 @@
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-7-1692e12155c37a8d94d63d274a9eb8e0 b/sql/hive/src/test/resources/golden/join_nulls-7-1692e12155c37a8d94d63d274a9eb8e0
index dfe4f4318fc66..96e8fb0241578 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-7-1692e12155c37a8d94d63d274a9eb8e0
+++ b/sql/hive/src/test/resources/golden/join_nulls-7-1692e12155c37a8d94d63d274a9eb8e0
@@ -1,2 +1,2 @@
 NULL	35	NULL	35
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-8-609f6bd812a44b20da0a39c827e4d870 b/sql/hive/src/test/resources/golden/join_nulls-8-609f6bd812a44b20da0a39c827e4d870
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-8-609f6bd812a44b20da0a39c827e4d870
+++ b/sql/hive/src/test/resources/golden/join_nulls-8-609f6bd812a44b20da0a39c827e4d870
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-9-ef4b27a877efc377cd5750af6725194b b/sql/hive/src/test/resources/golden/join_nulls-9-ef4b27a877efc377cd5750af6725194b
index c11ae6cce7abb..b512581d48dfe 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-9-ef4b27a877efc377cd5750af6725194b
+++ b/sql/hive/src/test/resources/golden/join_nulls-9-ef4b27a877efc377cd5750af6725194b
@@ -1,3 +1,3 @@
 NULL	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/join_nullsafe-0-869726b703f160eabdb7763700b53e60
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/join_nullsafe-0-869726b703f160eabdb7763700b53e60
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-3-3aa4057488720c0f514696154f2070b5 b/sql/hive/src/test/resources/golden/join_nullsafe-1-5644ab44e5ba9f2941216b8d5dc33a99
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-3-3aa4057488720c0f514696154f2070b5
rename to sql/hive/src/test/resources/golden/join_nullsafe-1-5644ab44e5ba9f2941216b8d5dc33a99
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-10-b6de4e85dcc1d1949c7431d39fa1b919 b/sql/hive/src/test/resources/golden/join_nullsafe-10-b6de4e85dcc1d1949c7431d39fa1b919
new file mode 100644
index 0000000000000..31c409082cc2f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-10-b6de4e85dcc1d1949c7431d39fa1b919
@@ -0,0 +1,2 @@
+NULL	10	10	NULL	NULL	10
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-4-485f8328cdc8639bcdea5fb9f58c8695 b/sql/hive/src/test/resources/golden/join_nullsafe-11-3aa243002a5363b84556736ef71613b1
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-4-485f8328cdc8639bcdea5fb9f58c8695
rename to sql/hive/src/test/resources/golden/join_nullsafe-11-3aa243002a5363b84556736ef71613b1
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-12-3cc55b14e8256d2c51361b61986c291e b/sql/hive/src/test/resources/golden/join_nullsafe-12-3cc55b14e8256d2c51361b61986c291e
new file mode 100644
index 0000000000000..9b77d13cbaab2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-12-3cc55b14e8256d2c51361b61986c291e
@@ -0,0 +1,4 @@
+NULL	NULL	NULL	NULL	NULL	NULL
+NULL	10	10	NULL	NULL	10
+10	NULL	NULL	10	10	NULL
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-13-69d94e229191e7b9b1a3e7eae46eb993 b/sql/hive/src/test/resources/golden/join_nullsafe-13-69d94e229191e7b9b1a3e7eae46eb993
new file mode 100644
index 0000000000000..47c0709d39851
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-13-69d94e229191e7b9b1a3e7eae46eb993
@@ -0,0 +1,12 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	10	NULL	NULL
+NULL	10	10	NULL
+NULL	10	48	NULL
+NULL	35	NULL	NULL
+NULL	35	10	NULL
+NULL	35	48	NULL
+10	NULL	NULL	10
+48	NULL	NULL	NULL
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-14-cf9ff6ee72a701a8e2f3e7fb0667903c b/sql/hive/src/test/resources/golden/join_nullsafe-14-cf9ff6ee72a701a8e2f3e7fb0667903c
new file mode 100644
index 0000000000000..36ba48516b658
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-14-cf9ff6ee72a701a8e2f3e7fb0667903c
@@ -0,0 +1,12 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	35
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	10	NULL	NULL
+NULL	10	10	NULL
+NULL	10	48	NULL
+NULL	35	NULL	NULL
+NULL	35	10	NULL
+NULL	35	48	NULL
+10	NULL	NULL	10
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-15-507d0fa6d7ce39e2d9921555cea6f8da b/sql/hive/src/test/resources/golden/join_nullsafe-15-507d0fa6d7ce39e2d9921555cea6f8da
new file mode 100644
index 0000000000000..fc1fd198cf8be
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-15-507d0fa6d7ce39e2d9921555cea6f8da
@@ -0,0 +1,13 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	35
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	10	NULL	NULL
+NULL	10	10	NULL
+NULL	10	48	NULL
+NULL	35	NULL	NULL
+NULL	35	10	NULL
+NULL	35	48	NULL
+10	NULL	NULL	10
+48	NULL	NULL	NULL
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-16-1c714fc339304de4db630530e5d1ce97 b/sql/hive/src/test/resources/golden/join_nullsafe-16-1c714fc339304de4db630530e5d1ce97
new file mode 100644
index 0000000000000..1cc70524f9d6d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-16-1c714fc339304de4db630530e5d1ce97
@@ -0,0 +1,11 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	10	NULL	NULL
+NULL	10	10	NULL
+NULL	10	48	NULL
+NULL	35	NULL	NULL
+NULL	35	10	NULL
+NULL	35	48	NULL
+10	NULL	NULL	10
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-17-8a4b0dc781a28ad11a0db9805fe03aa8 b/sql/hive/src/test/resources/golden/join_nullsafe-17-8a4b0dc781a28ad11a0db9805fe03aa8
new file mode 100644
index 0000000000000..1cc70524f9d6d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-17-8a4b0dc781a28ad11a0db9805fe03aa8
@@ -0,0 +1,11 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	10	NULL	NULL
+NULL	10	10	NULL
+NULL	10	48	NULL
+NULL	35	NULL	NULL
+NULL	35	10	NULL
+NULL	35	48	NULL
+10	NULL	NULL	10
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-5-23478129b7b6e377dcfb7caaa17dfb48 b/sql/hive/src/test/resources/golden/join_nullsafe-18-10b2051e65cac50ee1ea1c138ec192c8
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-5-23478129b7b6e377dcfb7caaa17dfb48
rename to sql/hive/src/test/resources/golden/join_nullsafe-18-10b2051e65cac50ee1ea1c138ec192c8
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-3-f64b982c4bf34931f03447e635ae33d2 b/sql/hive/src/test/resources/golden/join_nullsafe-19-23ab7ac8229a53d391195be7ca092429
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-3-f64b982c4bf34931f03447e635ae33d2
rename to sql/hive/src/test/resources/golden/join_nullsafe-19-23ab7ac8229a53d391195be7ca092429
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-4-2115f3e7d207621ce2b07b6e33563844 b/sql/hive/src/test/resources/golden/join_nullsafe-2-5bb63fafa390b1d4c20e225a8a648dcf
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-4-2115f3e7d207621ce2b07b6e33563844
rename to sql/hive/src/test/resources/golden/join_nullsafe-2-5bb63fafa390b1d4c20e225a8a648dcf
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-5-92891db0de9b8cd00892f0a790aff494 b/sql/hive/src/test/resources/golden/join_nullsafe-20-88faf8a93ba6759bd6f2bbcbdcfecda0
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-5-92891db0de9b8cd00892f0a790aff494
rename to sql/hive/src/test/resources/golden/join_nullsafe-20-88faf8a93ba6759bd6f2bbcbdcfecda0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-8-e404ba29e723df8bd8662d4f48129c7a b/sql/hive/src/test/resources/golden/join_nullsafe-21-24332b9390108fb3379e1acc599293a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-8-e404ba29e723df8bd8662d4f48129c7a
rename to sql/hive/src/test/resources/golden/join_nullsafe-21-24332b9390108fb3379e1acc599293a1
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types-0-de39f8b5f4305136d945da94d5222283 b/sql/hive/src/test/resources/golden/join_nullsafe-22-4be80634a6bd916e3ebd60a124f0a48e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types-0-de39f8b5f4305136d945da94d5222283
rename to sql/hive/src/test/resources/golden/join_nullsafe-22-4be80634a6bd916e3ebd60a124f0a48e
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types-1-10fe6134247226ab2b309bb62460d080 b/sql/hive/src/test/resources/golden/join_nullsafe-23-e4425d56be43c21124d95160653ce0ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types-1-10fe6134247226ab2b309bb62460d080
rename to sql/hive/src/test/resources/golden/join_nullsafe-23-e4425d56be43c21124d95160653ce0ac
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/join_nullsafe-24-2db30531137611e06fdba478ca7a8412
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/join_nullsafe-24-2db30531137611e06fdba478ca7a8412
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/join_nullsafe-25-e58b2754e8d9c56a473557a549d0d2b9
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-0-dbcec232623048c7748b708123e18bf0
rename to sql/hive/src/test/resources/golden/join_nullsafe-25-e58b2754e8d9c56a473557a549d0d2b9
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/join_nullsafe-26-64cabe5164130a94f387288f37b62d71
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063
rename to sql/hive/src/test/resources/golden/join_nullsafe-26-64cabe5164130a94f387288f37b62d71
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-27-e8ed4a1b574a6ca70cbfb3f7b9980aa6 b/sql/hive/src/test/resources/golden/join_nullsafe-27-e8ed4a1b574a6ca70cbfb3f7b9980aa6
new file mode 100644
index 0000000000000..66482299904bb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-27-e8ed4a1b574a6ca70cbfb3f7b9980aa6
@@ -0,0 +1,42 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	10
+NULL	NULL	NULL	10
+NULL	NULL	NULL	35
+NULL	NULL	NULL	35
+NULL	NULL	NULL	110
+NULL	NULL	NULL	110
+NULL	NULL	NULL	135
+NULL	NULL	NULL	135
+NULL	10	NULL	NULL
+NULL	10	NULL	NULL
+NULL	10	NULL	10
+NULL	10	NULL	35
+NULL	10	NULL	110
+NULL	10	NULL	135
+NULL	35	NULL	NULL
+NULL	35	NULL	NULL
+NULL	35	NULL	10
+NULL	35	NULL	35
+NULL	35	NULL	110
+NULL	35	NULL	135
+NULL	110	NULL	NULL
+NULL	110	NULL	NULL
+NULL	110	NULL	10
+NULL	110	NULL	35
+NULL	110	NULL	110
+NULL	110	NULL	135
+NULL	135	NULL	NULL
+NULL	135	NULL	NULL
+NULL	135	NULL	10
+NULL	135	NULL	35
+NULL	135	NULL	110
+NULL	135	NULL	135
+10	NULL	10	NULL
+48	NULL	48	NULL
+100	100	100	100
+110	NULL	110	NULL
+148	NULL	148	NULL
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-28-5a0c946cd7033857ca99e5fb800f8525 b/sql/hive/src/test/resources/golden/join_nullsafe-28-5a0c946cd7033857ca99e5fb800f8525
new file mode 100644
index 0000000000000..2efbef0484452
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-28-5a0c946cd7033857ca99e5fb800f8525
@@ -0,0 +1,14 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	10	NULL	10
+NULL	35	NULL	35
+NULL	110	NULL	110
+NULL	135	NULL	135
+10	NULL	10	NULL
+48	NULL	48	NULL
+100	100	100	100
+110	NULL	110	NULL
+148	NULL	148	NULL
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-29-514043c2ddaf6ea8f16a764adc92d1cf b/sql/hive/src/test/resources/golden/join_nullsafe-29-514043c2ddaf6ea8f16a764adc92d1cf
new file mode 100644
index 0000000000000..66482299904bb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-29-514043c2ddaf6ea8f16a764adc92d1cf
@@ -0,0 +1,42 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	10
+NULL	NULL	NULL	10
+NULL	NULL	NULL	35
+NULL	NULL	NULL	35
+NULL	NULL	NULL	110
+NULL	NULL	NULL	110
+NULL	NULL	NULL	135
+NULL	NULL	NULL	135
+NULL	10	NULL	NULL
+NULL	10	NULL	NULL
+NULL	10	NULL	10
+NULL	10	NULL	35
+NULL	10	NULL	110
+NULL	10	NULL	135
+NULL	35	NULL	NULL
+NULL	35	NULL	NULL
+NULL	35	NULL	10
+NULL	35	NULL	35
+NULL	35	NULL	110
+NULL	35	NULL	135
+NULL	110	NULL	NULL
+NULL	110	NULL	NULL
+NULL	110	NULL	10
+NULL	110	NULL	35
+NULL	110	NULL	110
+NULL	110	NULL	135
+NULL	135	NULL	NULL
+NULL	135	NULL	NULL
+NULL	135	NULL	10
+NULL	135	NULL	35
+NULL	135	NULL	110
+NULL	135	NULL	135
+10	NULL	10	NULL
+48	NULL	48	NULL
+100	100	100	100
+110	NULL	110	NULL
+148	NULL	148	NULL
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types-2-2500cd8e85b71222253a05a979442a4a b/sql/hive/src/test/resources/golden/join_nullsafe-3-ae378fc0f875a21884e58fa35a6d52cd
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types-2-2500cd8e85b71222253a05a979442a4a
rename to sql/hive/src/test/resources/golden/join_nullsafe-3-ae378fc0f875a21884e58fa35a6d52cd
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-30-fcbf92cb1b85ab01102fbbc6caba9a88 b/sql/hive/src/test/resources/golden/join_nullsafe-30-fcbf92cb1b85ab01102fbbc6caba9a88
new file mode 100644
index 0000000000000..66482299904bb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-30-fcbf92cb1b85ab01102fbbc6caba9a88
@@ -0,0 +1,42 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	10
+NULL	NULL	NULL	10
+NULL	NULL	NULL	35
+NULL	NULL	NULL	35
+NULL	NULL	NULL	110
+NULL	NULL	NULL	110
+NULL	NULL	NULL	135
+NULL	NULL	NULL	135
+NULL	10	NULL	NULL
+NULL	10	NULL	NULL
+NULL	10	NULL	10
+NULL	10	NULL	35
+NULL	10	NULL	110
+NULL	10	NULL	135
+NULL	35	NULL	NULL
+NULL	35	NULL	NULL
+NULL	35	NULL	10
+NULL	35	NULL	35
+NULL	35	NULL	110
+NULL	35	NULL	135
+NULL	110	NULL	NULL
+NULL	110	NULL	NULL
+NULL	110	NULL	10
+NULL	110	NULL	35
+NULL	110	NULL	110
+NULL	110	NULL	135
+NULL	135	NULL	NULL
+NULL	135	NULL	NULL
+NULL	135	NULL	10
+NULL	135	NULL	35
+NULL	135	NULL	110
+NULL	135	NULL	135
+10	NULL	10	NULL
+48	NULL	48	NULL
+100	100	100	100
+110	NULL	110	NULL
+148	NULL	148	NULL
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-31-1cb03e1106f79d14f22bc89d386cedcf b/sql/hive/src/test/resources/golden/join_nullsafe-31-1cb03e1106f79d14f22bc89d386cedcf
new file mode 100644
index 0000000000000..66482299904bb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-31-1cb03e1106f79d14f22bc89d386cedcf
@@ -0,0 +1,42 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	10
+NULL	NULL	NULL	10
+NULL	NULL	NULL	35
+NULL	NULL	NULL	35
+NULL	NULL	NULL	110
+NULL	NULL	NULL	110
+NULL	NULL	NULL	135
+NULL	NULL	NULL	135
+NULL	10	NULL	NULL
+NULL	10	NULL	NULL
+NULL	10	NULL	10
+NULL	10	NULL	35
+NULL	10	NULL	110
+NULL	10	NULL	135
+NULL	35	NULL	NULL
+NULL	35	NULL	NULL
+NULL	35	NULL	10
+NULL	35	NULL	35
+NULL	35	NULL	110
+NULL	35	NULL	135
+NULL	110	NULL	NULL
+NULL	110	NULL	NULL
+NULL	110	NULL	10
+NULL	110	NULL	35
+NULL	110	NULL	110
+NULL	110	NULL	135
+NULL	135	NULL	NULL
+NULL	135	NULL	NULL
+NULL	135	NULL	10
+NULL	135	NULL	35
+NULL	135	NULL	110
+NULL	135	NULL	135
+10	NULL	10	NULL
+48	NULL	48	NULL
+100	100	100	100
+110	NULL	110	NULL
+148	NULL	148	NULL
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-32-6a0bf6127d4b042e67ae8ee15125fb87 b/sql/hive/src/test/resources/golden/join_nullsafe-32-6a0bf6127d4b042e67ae8ee15125fb87
new file mode 100644
index 0000000000000..ea001a222f357
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-32-6a0bf6127d4b042e67ae8ee15125fb87
@@ -0,0 +1,40 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	NULL	48	NULL
+NULL	NULL	110	NULL
+NULL	NULL	110	NULL
+NULL	NULL	148	NULL
+NULL	NULL	148	NULL
+NULL	10	NULL	NULL
+NULL	10	NULL	NULL
+NULL	10	10	NULL
+NULL	10	48	NULL
+NULL	10	110	NULL
+NULL	10	148	NULL
+NULL	35	NULL	NULL
+NULL	35	NULL	NULL
+NULL	35	10	NULL
+NULL	35	48	NULL
+NULL	35	110	NULL
+NULL	35	148	NULL
+NULL	110	NULL	NULL
+NULL	110	NULL	NULL
+NULL	110	10	NULL
+NULL	110	48	NULL
+NULL	110	110	NULL
+NULL	110	148	NULL
+NULL	135	NULL	NULL
+NULL	135	NULL	NULL
+NULL	135	10	NULL
+NULL	135	48	NULL
+NULL	135	110	NULL
+NULL	135	148	NULL
+10	NULL	NULL	10
+100	100	100	100
+110	NULL	NULL	110
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-33-63157d43422fcedadba408537ccecd5c b/sql/hive/src/test/resources/golden/join_nullsafe-33-63157d43422fcedadba408537ccecd5c
new file mode 100644
index 0000000000000..ea001a222f357
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-33-63157d43422fcedadba408537ccecd5c
@@ -0,0 +1,40 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	NULL	48	NULL
+NULL	NULL	110	NULL
+NULL	NULL	110	NULL
+NULL	NULL	148	NULL
+NULL	NULL	148	NULL
+NULL	10	NULL	NULL
+NULL	10	NULL	NULL
+NULL	10	10	NULL
+NULL	10	48	NULL
+NULL	10	110	NULL
+NULL	10	148	NULL
+NULL	35	NULL	NULL
+NULL	35	NULL	NULL
+NULL	35	10	NULL
+NULL	35	48	NULL
+NULL	35	110	NULL
+NULL	35	148	NULL
+NULL	110	NULL	NULL
+NULL	110	NULL	NULL
+NULL	110	10	NULL
+NULL	110	48	NULL
+NULL	110	110	NULL
+NULL	110	148	NULL
+NULL	135	NULL	NULL
+NULL	135	NULL	NULL
+NULL	135	10	NULL
+NULL	135	48	NULL
+NULL	135	110	NULL
+NULL	135	148	NULL
+10	NULL	NULL	10
+100	100	100	100
+110	NULL	NULL	110
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-34-9265f806b71c03061f93f9fbc88aa223 b/sql/hive/src/test/resources/golden/join_nullsafe-34-9265f806b71c03061f93f9fbc88aa223
new file mode 100644
index 0000000000000..1093bd89f6e3f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-34-9265f806b71c03061f93f9fbc88aa223
@@ -0,0 +1,42 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	NULL	48	NULL
+NULL	NULL	110	NULL
+NULL	NULL	110	NULL
+NULL	NULL	148	NULL
+NULL	NULL	148	NULL
+NULL	10	NULL	NULL
+NULL	10	NULL	NULL
+NULL	10	10	NULL
+NULL	10	48	NULL
+NULL	10	110	NULL
+NULL	10	148	NULL
+NULL	35	NULL	NULL
+NULL	35	NULL	NULL
+NULL	35	10	NULL
+NULL	35	48	NULL
+NULL	35	110	NULL
+NULL	35	148	NULL
+NULL	110	NULL	NULL
+NULL	110	NULL	NULL
+NULL	110	10	NULL
+NULL	110	48	NULL
+NULL	110	110	NULL
+NULL	110	148	NULL
+NULL	135	NULL	NULL
+NULL	135	NULL	NULL
+NULL	135	10	NULL
+NULL	135	48	NULL
+NULL	135	110	NULL
+NULL	135	148	NULL
+10	NULL	NULL	10
+48	NULL	NULL	NULL
+100	100	100	100
+110	NULL	NULL	110
+148	NULL	NULL	NULL
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-35-95815bafb81cccb8129c20d399a446fc b/sql/hive/src/test/resources/golden/join_nullsafe-35-95815bafb81cccb8129c20d399a446fc
new file mode 100644
index 0000000000000..9cf0036674d6e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-35-95815bafb81cccb8129c20d399a446fc
@@ -0,0 +1,42 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	35
+NULL	NULL	NULL	135
+NULL	NULL	10	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	NULL	48	NULL
+NULL	NULL	110	NULL
+NULL	NULL	110	NULL
+NULL	NULL	148	NULL
+NULL	NULL	148	NULL
+NULL	10	NULL	NULL
+NULL	10	NULL	NULL
+NULL	10	10	NULL
+NULL	10	48	NULL
+NULL	10	110	NULL
+NULL	10	148	NULL
+NULL	35	NULL	NULL
+NULL	35	NULL	NULL
+NULL	35	10	NULL
+NULL	35	48	NULL
+NULL	35	110	NULL
+NULL	35	148	NULL
+NULL	110	NULL	NULL
+NULL	110	NULL	NULL
+NULL	110	10	NULL
+NULL	110	48	NULL
+NULL	110	110	NULL
+NULL	110	148	NULL
+NULL	135	NULL	NULL
+NULL	135	NULL	NULL
+NULL	135	10	NULL
+NULL	135	48	NULL
+NULL	135	110	NULL
+NULL	135	148	NULL
+10	NULL	NULL	10
+100	100	100	100
+110	NULL	NULL	110
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-36-c4762c60cc93236b7647ebd32a40ce57 b/sql/hive/src/test/resources/golden/join_nullsafe-36-c4762c60cc93236b7647ebd32a40ce57
new file mode 100644
index 0000000000000..77f6a8ddd7c28
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-36-c4762c60cc93236b7647ebd32a40ce57
@@ -0,0 +1,42 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	NULL	48	NULL
+NULL	NULL	110	NULL
+NULL	NULL	110	NULL
+NULL	NULL	148	NULL
+NULL	NULL	148	NULL
+NULL	10	NULL	10
+NULL	35	NULL	35
+NULL	110	NULL	110
+NULL	135	NULL	135
+10	NULL	NULL	NULL
+10	NULL	NULL	NULL
+10	NULL	10	NULL
+10	NULL	48	NULL
+10	NULL	110	NULL
+10	NULL	148	NULL
+48	NULL	NULL	NULL
+48	NULL	NULL	NULL
+48	NULL	10	NULL
+48	NULL	48	NULL
+48	NULL	110	NULL
+48	NULL	148	NULL
+100	100	100	100
+110	NULL	NULL	NULL
+110	NULL	NULL	NULL
+110	NULL	10	NULL
+110	NULL	48	NULL
+110	NULL	110	NULL
+110	NULL	148	NULL
+148	NULL	NULL	NULL
+148	NULL	NULL	NULL
+148	NULL	10	NULL
+148	NULL	48	NULL
+148	NULL	110	NULL
+148	NULL	148	NULL
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-37-a87893adfc73c9cc63ceab200bb56245 b/sql/hive/src/test/resources/golden/join_nullsafe-37-a87893adfc73c9cc63ceab200bb56245
new file mode 100644
index 0000000000000..77f6a8ddd7c28
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-37-a87893adfc73c9cc63ceab200bb56245
@@ -0,0 +1,42 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	NULL	48	NULL
+NULL	NULL	110	NULL
+NULL	NULL	110	NULL
+NULL	NULL	148	NULL
+NULL	NULL	148	NULL
+NULL	10	NULL	10
+NULL	35	NULL	35
+NULL	110	NULL	110
+NULL	135	NULL	135
+10	NULL	NULL	NULL
+10	NULL	NULL	NULL
+10	NULL	10	NULL
+10	NULL	48	NULL
+10	NULL	110	NULL
+10	NULL	148	NULL
+48	NULL	NULL	NULL
+48	NULL	NULL	NULL
+48	NULL	10	NULL
+48	NULL	48	NULL
+48	NULL	110	NULL
+48	NULL	148	NULL
+100	100	100	100
+110	NULL	NULL	NULL
+110	NULL	NULL	NULL
+110	NULL	10	NULL
+110	NULL	48	NULL
+110	NULL	110	NULL
+110	NULL	148	NULL
+148	NULL	NULL	NULL
+148	NULL	NULL	NULL
+148	NULL	10	NULL
+148	NULL	48	NULL
+148	NULL	110	NULL
+148	NULL	148	NULL
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-38-e3dfe0044b44c8a49414479521acf762 b/sql/hive/src/test/resources/golden/join_nullsafe-38-e3dfe0044b44c8a49414479521acf762
new file mode 100644
index 0000000000000..77f6a8ddd7c28
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-38-e3dfe0044b44c8a49414479521acf762
@@ -0,0 +1,42 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	NULL	48	NULL
+NULL	NULL	110	NULL
+NULL	NULL	110	NULL
+NULL	NULL	148	NULL
+NULL	NULL	148	NULL
+NULL	10	NULL	10
+NULL	35	NULL	35
+NULL	110	NULL	110
+NULL	135	NULL	135
+10	NULL	NULL	NULL
+10	NULL	NULL	NULL
+10	NULL	10	NULL
+10	NULL	48	NULL
+10	NULL	110	NULL
+10	NULL	148	NULL
+48	NULL	NULL	NULL
+48	NULL	NULL	NULL
+48	NULL	10	NULL
+48	NULL	48	NULL
+48	NULL	110	NULL
+48	NULL	148	NULL
+100	100	100	100
+110	NULL	NULL	NULL
+110	NULL	NULL	NULL
+110	NULL	10	NULL
+110	NULL	48	NULL
+110	NULL	110	NULL
+110	NULL	148	NULL
+148	NULL	NULL	NULL
+148	NULL	NULL	NULL
+148	NULL	10	NULL
+148	NULL	48	NULL
+148	NULL	110	NULL
+148	NULL	148	NULL
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-39-9a7e1f373b9c02e632d6c7c550b908ec b/sql/hive/src/test/resources/golden/join_nullsafe-39-9a7e1f373b9c02e632d6c7c550b908ec
new file mode 100644
index 0000000000000..77f6a8ddd7c28
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-39-9a7e1f373b9c02e632d6c7c550b908ec
@@ -0,0 +1,42 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	NULL	48	NULL
+NULL	NULL	110	NULL
+NULL	NULL	110	NULL
+NULL	NULL	148	NULL
+NULL	NULL	148	NULL
+NULL	10	NULL	10
+NULL	35	NULL	35
+NULL	110	NULL	110
+NULL	135	NULL	135
+10	NULL	NULL	NULL
+10	NULL	NULL	NULL
+10	NULL	10	NULL
+10	NULL	48	NULL
+10	NULL	110	NULL
+10	NULL	148	NULL
+48	NULL	NULL	NULL
+48	NULL	NULL	NULL
+48	NULL	10	NULL
+48	NULL	48	NULL
+48	NULL	110	NULL
+48	NULL	148	NULL
+100	100	100	100
+110	NULL	NULL	NULL
+110	NULL	NULL	NULL
+110	NULL	10	NULL
+110	NULL	48	NULL
+110	NULL	110	NULL
+110	NULL	148	NULL
+148	NULL	NULL	NULL
+148	NULL	NULL	NULL
+148	NULL	10	NULL
+148	NULL	48	NULL
+148	NULL	110	NULL
+148	NULL	148	NULL
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-4-644c616d87ae426eb2f8c71638045185 b/sql/hive/src/test/resources/golden/join_nullsafe-4-644c616d87ae426eb2f8c71638045185
new file mode 100644
index 0000000000000..1cc70524f9d6d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-4-644c616d87ae426eb2f8c71638045185
@@ -0,0 +1,11 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	10	NULL	NULL
+NULL	10	10	NULL
+NULL	10	48	NULL
+NULL	35	NULL	NULL
+NULL	35	10	NULL
+NULL	35	48	NULL
+10	NULL	NULL	10
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types-3-85850072f0f9604d4e55a08fb9c45ba6 b/sql/hive/src/test/resources/golden/join_nullsafe-40-3c868718e4c120cb9a72ab7318c75be3
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types-3-85850072f0f9604d4e55a08fb9c45ba6
rename to sql/hive/src/test/resources/golden/join_nullsafe-40-3c868718e4c120cb9a72ab7318c75be3
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-41-1f7d8737c3e2d74d5ad865535d729811 b/sql/hive/src/test/resources/golden/join_nullsafe-41-1f7d8737c3e2d74d5ad865535d729811
new file mode 100644
index 0000000000000..421049d6e509e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-41-1f7d8737c3e2d74d5ad865535d729811
@@ -0,0 +1,9 @@
+NULL	NULL	NULL	NULL
+NULL	NULL	10	NULL
+NULL	NULL	48	NULL
+NULL	10	NULL	NULL
+NULL	10	10	NULL
+NULL	10	48	NULL
+NULL	35	NULL	NULL
+NULL	35	10	NULL
+NULL	35	48	NULL
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-1-de39f8b5f4305136d945da94d5222283 b/sql/hive/src/test/resources/golden/join_nullsafe-5-1e393de94850e92b3b00536aacc9371f
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-1-de39f8b5f4305136d945da94d5222283
rename to sql/hive/src/test/resources/golden/join_nullsafe-5-1e393de94850e92b3b00536aacc9371f
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-6-d66451815212e7d17744184e74c6b0a0 b/sql/hive/src/test/resources/golden/join_nullsafe-6-d66451815212e7d17744184e74c6b0a0
new file mode 100644
index 0000000000000..aec3122cae5f9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-6-d66451815212e7d17744184e74c6b0a0
@@ -0,0 +1,2 @@
+10	NULL	NULL	10	10	NULL
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-2-10fe6134247226ab2b309bb62460d080 b/sql/hive/src/test/resources/golden/join_nullsafe-7-a3ad3cc301d9884898d3e6ab6c792d4c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-2-10fe6134247226ab2b309bb62460d080
rename to sql/hive/src/test/resources/golden/join_nullsafe-7-a3ad3cc301d9884898d3e6ab6c792d4c
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-8-cc7527bcf746ab7e2cd9f28db0ead0ac b/sql/hive/src/test/resources/golden/join_nullsafe-8-cc7527bcf746ab7e2cd9f28db0ead0ac
new file mode 100644
index 0000000000000..30db79efa79b4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/join_nullsafe-8-cc7527bcf746ab7e2cd9f28db0ead0ac
@@ -0,0 +1,29 @@
+NULL	NULL	NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL	NULL	10
+NULL	NULL	NULL	NULL	NULL	35
+NULL	NULL	10	NULL	NULL	NULL
+NULL	NULL	10	NULL	NULL	10
+NULL	NULL	10	NULL	NULL	35
+NULL	NULL	48	NULL	NULL	NULL
+NULL	NULL	48	NULL	NULL	10
+NULL	NULL	48	NULL	NULL	35
+NULL	10	NULL	NULL	NULL	NULL
+NULL	10	NULL	NULL	NULL	10
+NULL	10	NULL	NULL	NULL	35
+NULL	10	10	NULL	NULL	NULL
+NULL	10	10	NULL	NULL	10
+NULL	10	10	NULL	NULL	35
+NULL	10	48	NULL	NULL	NULL
+NULL	10	48	NULL	NULL	10
+NULL	10	48	NULL	NULL	35
+NULL	35	NULL	NULL	NULL	NULL
+NULL	35	NULL	NULL	NULL	10
+NULL	35	NULL	NULL	NULL	35
+NULL	35	10	NULL	NULL	NULL
+NULL	35	10	NULL	NULL	10
+NULL	35	10	NULL	NULL	35
+NULL	35	48	NULL	NULL	NULL
+NULL	35	48	NULL	NULL	10
+NULL	35	48	NULL	NULL	35
+10	NULL	NULL	10	10	NULL
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-3-28264b197e3997f7c5fe88c1c2f7d5c5 b/sql/hive/src/test/resources/golden/join_nullsafe-9-88f6f40959b0d2faabd9d4b3cd853809
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-3-28264b197e3997f7c5fe88c1c2f7d5c5
rename to sql/hive/src/test/resources/golden/join_nullsafe-9-88f6f40959b0d2faabd9d4b3cd853809
diff --git a/sql/hive/src/test/resources/golden/join_rc-5-1aef75afe38d512addb44dbf9a650263 b/sql/hive/src/test/resources/golden/join_rc-5-1aef75afe38d512addb44dbf9a650263
index 59821aeea3008..c9c79b85dfe24 100644
--- a/sql/hive/src/test/resources/golden/join_rc-5-1aef75afe38d512addb44dbf9a650263
+++ b/sql/hive/src/test/resources/golden/join_rc-5-1aef75afe38d512addb44dbf9a650263
@@ -1025,4 +1025,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-11-f8460b061fa00f6afbfe8eeccf4d3564 b/sql/hive/src/test/resources/golden/join_reorder2-11-f8460b061fa00f6afbfe8eeccf4d3564
index 06a83e95d4f79..33e6a1546ca65 100644
--- a/sql/hive/src/test/resources/golden/join_reorder2-11-f8460b061fa00f6afbfe8eeccf4d3564
+++ b/sql/hive/src/test/resources/golden/join_reorder2-11-f8460b061fa00f6afbfe8eeccf4d3564
@@ -1 +1 @@
-2	12	2	22	2	12	2	12
\ No newline at end of file
+2	12	2	22	2	12	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/join_reorder2-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-3-a04c523002e79c588e350486c815e785 b/sql/hive/src/test/resources/golden/join_reorder2-4-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-3-a04c523002e79c588e350486c815e785
rename to sql/hive/src/test/resources/golden/join_reorder2-4-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-5-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/join_reorder2-5-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-4-d6c134387c0c9343ec6ea88b5086dbe2 b/sql/hive/src/test/resources/golden/join_reorder2-5-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-4-d6c134387c0c9343ec6ea88b5086dbe2
rename to sql/hive/src/test/resources/golden/join_reorder2-5-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-6-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/join_reorder2-6-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-5-2efbb90d3df011282b70623e0905c390 b/sql/hive/src/test/resources/golden/join_reorder2-6-8eb53fb8f05a43ee377aa1c927857e7c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-5-2efbb90d3df011282b70623e0905c390
rename to sql/hive/src/test/resources/golden/join_reorder2-6-8eb53fb8f05a43ee377aa1c927857e7c
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-7-512b75ccb9459a6334da1d9699f4a5ec b/sql/hive/src/test/resources/golden/join_reorder2-7-512b75ccb9459a6334da1d9699f4a5ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-1-f64b982c4bf34931f03447e635ae33d2 b/sql/hive/src/test/resources/golden/join_reorder2-7-5f4cfbbe53c5e808ee08b26514272034
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-1-f64b982c4bf34931f03447e635ae33d2
rename to sql/hive/src/test/resources/golden/join_reorder2-7-5f4cfbbe53c5e808ee08b26514272034
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-9-26ffed826eceda953b7124ee39ace828 b/sql/hive/src/test/resources/golden/join_reorder2-9-26ffed826eceda953b7124ee39ace828
index 06a83e95d4f79..33e6a1546ca65 100644
--- a/sql/hive/src/test/resources/golden/join_reorder2-9-26ffed826eceda953b7124ee39ace828
+++ b/sql/hive/src/test/resources/golden/join_reorder2-9-26ffed826eceda953b7124ee39ace828
@@ -1 +1 @@
-2	12	2	22	2	12	2	12
\ No newline at end of file
+2	12	2	22	2	12	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-11-d6392b851f7dd5e3705e8ff51e1c6583 b/sql/hive/src/test/resources/golden/join_reorder3-11-d6392b851f7dd5e3705e8ff51e1c6583
index 06a83e95d4f79..33e6a1546ca65 100644
--- a/sql/hive/src/test/resources/golden/join_reorder3-11-d6392b851f7dd5e3705e8ff51e1c6583
+++ b/sql/hive/src/test/resources/golden/join_reorder3-11-d6392b851f7dd5e3705e8ff51e1c6583
@@ -1 +1 @@
-2	12	2	22	2	12	2	12
\ No newline at end of file
+2	12	2	22	2	12	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/join_reorder3-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-11-4e95946ec07f04479da42ba5cbfa531b b/sql/hive/src/test/resources/golden/join_reorder3-4-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-11-4e95946ec07f04479da42ba5cbfa531b
rename to sql/hive/src/test/resources/golden/join_reorder3-4-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-5-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/join_reorder3-5-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-12-51fe5b5a17ddc56fb8712340b38773b2 b/sql/hive/src/test/resources/golden/join_reorder3-5-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-12-51fe5b5a17ddc56fb8712340b38773b2
rename to sql/hive/src/test/resources/golden/join_reorder3-5-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-6-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/join_reorder3-6-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-18-4938ddc6b516cf67779be0d7dc29e7ab b/sql/hive/src/test/resources/golden/join_reorder3-6-8eb53fb8f05a43ee377aa1c927857e7c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-18-4938ddc6b516cf67779be0d7dc29e7ab
rename to sql/hive/src/test/resources/golden/join_reorder3-6-8eb53fb8f05a43ee377aa1c927857e7c
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-7-512b75ccb9459a6334da1d9699f4a5ec b/sql/hive/src/test/resources/golden/join_reorder3-7-512b75ccb9459a6334da1d9699f4a5ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-19-180b611e08d4080fa098ed69169c2478 b/sql/hive/src/test/resources/golden/join_reorder3-7-5f4cfbbe53c5e808ee08b26514272034
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-19-180b611e08d4080fa098ed69169c2478
rename to sql/hive/src/test/resources/golden/join_reorder3-7-5f4cfbbe53c5e808ee08b26514272034
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-9-9d45e66a3990b7c53fd780f07cd52e13 b/sql/hive/src/test/resources/golden/join_reorder3-9-9d45e66a3990b7c53fd780f07cd52e13
index 06a83e95d4f79..33e6a1546ca65 100644
--- a/sql/hive/src/test/resources/golden/join_reorder3-9-9d45e66a3990b7c53fd780f07cd52e13
+++ b/sql/hive/src/test/resources/golden/join_reorder3-9-9d45e66a3990b7c53fd780f07cd52e13
@@ -1 +1 @@
-2	12	2	22	2	12	2	12
\ No newline at end of file
+2	12	2	22	2	12	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-10-1d6b948747ac31296710a491a5652e3f b/sql/hive/src/test/resources/golden/join_reorder4-10-1d6b948747ac31296710a491a5652e3f
index 872146532307a..19304c010452e 100644
--- a/sql/hive/src/test/resources/golden/join_reorder4-10-1d6b948747ac31296710a491a5652e3f
+++ b/sql/hive/src/test/resources/golden/join_reorder4-10-1d6b948747ac31296710a491a5652e3f
@@ -1 +1 @@
-2	12	2	22	2	12
\ No newline at end of file
+2	12	2	22	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-12-203aed2c4298eba4a3c51816a21a1c1 b/sql/hive/src/test/resources/golden/join_reorder4-12-203aed2c4298eba4a3c51816a21a1c1
index 872146532307a..19304c010452e 100644
--- a/sql/hive/src/test/resources/golden/join_reorder4-12-203aed2c4298eba4a3c51816a21a1c1
+++ b/sql/hive/src/test/resources/golden/join_reorder4-12-203aed2c4298eba4a3c51816a21a1c1
@@ -1 +1 @@
-2	12	2	22	2	12
\ No newline at end of file
+2	12	2	22	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-3-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/join_reorder4-3-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-2-392062177be62090adedf1ab6c0a0b78 b/sql/hive/src/test/resources/golden/join_reorder4-3-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-2-392062177be62090adedf1ab6c0a0b78
rename to sql/hive/src/test/resources/golden/join_reorder4-3-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-4-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/join_reorder4-4-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-20-db2149b42cdbf998187034233fc846cc b/sql/hive/src/test/resources/golden/join_reorder4-4-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-20-db2149b42cdbf998187034233fc846cc
rename to sql/hive/src/test/resources/golden/join_reorder4-4-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-5-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/join_reorder4-5-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-21-322f23866cf3ca62d4ba93cf904c520 b/sql/hive/src/test/resources/golden/join_reorder4-5-8eb53fb8f05a43ee377aa1c927857e7c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-21-322f23866cf3ca62d4ba93cf904c520
rename to sql/hive/src/test/resources/golden/join_reorder4-5-8eb53fb8f05a43ee377aa1c927857e7c
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-6-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join_reorder4-6-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_reorder4-6-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join_reorder4-6-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-8-4bbb8e937245e2fa7cafbb423814754 b/sql/hive/src/test/resources/golden/join_reorder4-8-4bbb8e937245e2fa7cafbb423814754
index 872146532307a..19304c010452e 100644
--- a/sql/hive/src/test/resources/golden/join_reorder4-8-4bbb8e937245e2fa7cafbb423814754
+++ b/sql/hive/src/test/resources/golden/join_reorder4-8-4bbb8e937245e2fa7cafbb423814754
@@ -1 +1 @@
-2	12	2	22	2	12
\ No newline at end of file
+2	12	2	22	2	12
diff --git a/sql/hive/src/test/resources/golden/join_star-10-57ce75f989b3b3bfd2f2eceb228e892e b/sql/hive/src/test/resources/golden/join_star-10-57ce75f989b3b3bfd2f2eceb228e892e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-22-21fcbcafc3b5b22572ddd5c87df1c06f b/sql/hive/src/test/resources/golden/join_star-10-a9e579038e3d4826fdae475d7058ab82
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-22-21fcbcafc3b5b22572ddd5c87df1c06f
rename to sql/hive/src/test/resources/golden/join_star-10-a9e579038e3d4826fdae475d7058ab82
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-3-82b65775ae9c6dcb85ca87804dbb0288 b/sql/hive/src/test/resources/golden/join_star-11-72730ecdad9c0fd4c6ce64a0cb89fb74
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-3-82b65775ae9c6dcb85ca87804dbb0288
rename to sql/hive/src/test/resources/golden/join_star-11-72730ecdad9c0fd4c6ce64a0cb89fb74
diff --git a/sql/hive/src/test/resources/golden/join_star-11-eba1397e66f25cba4fd264209cc92bae b/sql/hive/src/test/resources/golden/join_star-11-eba1397e66f25cba4fd264209cc92bae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_star-12-89b53ae954ec88171ef87e0459f6eb82 b/sql/hive/src/test/resources/golden/join_star-12-89b53ae954ec88171ef87e0459f6eb82
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-4-7df7c16e4063683d0ca40360da460799 b/sql/hive/src/test/resources/golden/join_star-12-f581d6d305d652cd0f4e4fa912eb578d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-4-7df7c16e4063683d0ca40360da460799
rename to sql/hive/src/test/resources/golden/join_star-12-f581d6d305d652cd0f4e4fa912eb578d
diff --git a/sql/hive/src/test/resources/golden/join_star-13-342b7249c9ce1484869169b1b33191cb b/sql/hive/src/test/resources/golden/join_star-13-342b7249c9ce1484869169b1b33191cb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-5-c19e7bf0732e7a103966ebc43eb0bcea b/sql/hive/src/test/resources/golden/join_star-13-7268564732cbb7489248f9d818f80c14
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-5-c19e7bf0732e7a103966ebc43eb0bcea
rename to sql/hive/src/test/resources/golden/join_star-13-7268564732cbb7489248f9d818f80c14
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-6-3be7ae6a87b9091bc61d221de8bdd55b b/sql/hive/src/test/resources/golden/join_star-14-2ee0fcf000f8687fc8941bf212477e57
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-6-3be7ae6a87b9091bc61d221de8bdd55b
rename to sql/hive/src/test/resources/golden/join_star-14-2ee0fcf000f8687fc8941bf212477e57
diff --git a/sql/hive/src/test/resources/golden/join_star-14-75513308d30b781fd2e06d81963c4363 b/sql/hive/src/test/resources/golden/join_star-14-75513308d30b781fd2e06d81963c4363
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_star-15-24a0b1d0257bad4f85b3a122acf6bef9 b/sql/hive/src/test/resources/golden/join_star-15-24a0b1d0257bad4f85b3a122acf6bef9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-7-ad39c0f14b6e0752489479138516bd57 b/sql/hive/src/test/resources/golden/join_star-15-43b0b3b5e40044f8dbaeef2c7fc9e3e9
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-7-ad39c0f14b6e0752489479138516bd57
rename to sql/hive/src/test/resources/golden/join_star-15-43b0b3b5e40044f8dbaeef2c7fc9e3e9
diff --git a/sql/hive/src/test/resources/golden/join_star-16-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join_star-16-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_star-16-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join_star-16-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_star-17-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join_star-17-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_star-17-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join_star-17-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_star-18-1c22e451845667bd6b4eac3c49c36965 b/sql/hive/src/test/resources/golden/join_star-18-1c22e451845667bd6b4eac3c49c36965
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_star-18-1c22e451845667bd6b4eac3c49c36965
+++ b/sql/hive/src/test/resources/golden/join_star-18-1c22e451845667bd6b4eac3c49c36965
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_star-20-76473cb68a69b9408207fd43ddae9339 b/sql/hive/src/test/resources/golden/join_star-20-76473cb68a69b9408207fd43ddae9339
index e60bca6269264..f97ee71bc2333 100644
--- a/sql/hive/src/test/resources/golden/join_star-20-76473cb68a69b9408207fd43ddae9339
+++ b/sql/hive/src/test/resources/golden/join_star-20-76473cb68a69b9408207fd43ddae9339
@@ -7,4 +7,4 @@
 61	62	3
 71	72	3
 81	82	3
-91	92	3
\ No newline at end of file
+91	92	3
diff --git a/sql/hive/src/test/resources/golden/join_star-22-429cfd140488242d569aee6594aae76b b/sql/hive/src/test/resources/golden/join_star-22-429cfd140488242d569aee6594aae76b
index e08d04997c56b..e56cb5c03eaa4 100644
--- a/sql/hive/src/test/resources/golden/join_star-22-429cfd140488242d569aee6594aae76b
+++ b/sql/hive/src/test/resources/golden/join_star-22-429cfd140488242d569aee6594aae76b
@@ -7,4 +7,4 @@
 61	62	3	4
 71	72	3	4
 81	82	3	4
-91	92	3	4
\ No newline at end of file
+91	92	3	4
diff --git a/sql/hive/src/test/resources/golden/join_star-24-e11ab68d72d874c7c6c658c1018f5a49 b/sql/hive/src/test/resources/golden/join_star-24-e11ab68d72d874c7c6c658c1018f5a49
index b89911131bd13..80924380b7d76 100644
--- a/sql/hive/src/test/resources/golden/join_star-24-e11ab68d72d874c7c6c658c1018f5a49
+++ b/sql/hive/src/test/resources/golden/join_star-24-e11ab68d72d874c7c6c658c1018f5a49
@@ -7,4 +7,4 @@
 61	62	3	3
 71	72	3	3
 81	82	3	3
-91	92	3	3
\ No newline at end of file
+91	92	3	3
diff --git a/sql/hive/src/test/resources/golden/join_star-26-a412a0620a359a62e2ab4e45fa8e0330 b/sql/hive/src/test/resources/golden/join_star-26-a412a0620a359a62e2ab4e45fa8e0330
index b89911131bd13..80924380b7d76 100644
--- a/sql/hive/src/test/resources/golden/join_star-26-a412a0620a359a62e2ab4e45fa8e0330
+++ b/sql/hive/src/test/resources/golden/join_star-26-a412a0620a359a62e2ab4e45fa8e0330
@@ -7,4 +7,4 @@
 61	62	3	3
 71	72	3	3
 81	82	3	3
-91	92	3	3
\ No newline at end of file
+91	92	3	3
diff --git a/sql/hive/src/test/resources/golden/join_star-28-98a3f4d8a2209e771c57462d4b723ff9 b/sql/hive/src/test/resources/golden/join_star-28-98a3f4d8a2209e771c57462d4b723ff9
index 19611e75e33c3..ebfbb5cdae331 100644
--- a/sql/hive/src/test/resources/golden/join_star-28-98a3f4d8a2209e771c57462d4b723ff9
+++ b/sql/hive/src/test/resources/golden/join_star-28-98a3f4d8a2209e771c57462d4b723ff9
@@ -7,4 +7,4 @@
 61	62	3	3	4	4	4	4	4
 71	72	3	3	4	4	4	4	4
 81	82	3	3	4	4	4	4	4
-91	92	3	3	4	4	4	4	4
\ No newline at end of file
+91	92	3	3	4	4	4	4	4
diff --git a/sql/hive/src/test/resources/golden/join_star-8-a957982d8981ff0a35397ca449297024 b/sql/hive/src/test/resources/golden/join_star-8-a957982d8981ff0a35397ca449297024
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_position-1-6b06902de5c0ca13cebe03018d86f447 b/sql/hive/src/test/resources/golden/join_star-8-c3d53a4daab9614a09870dc8e9571f74
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-1-6b06902de5c0ca13cebe03018d86f447
rename to sql/hive/src/test/resources/golden/join_star-8-c3d53a4daab9614a09870dc8e9571f74
diff --git a/sql/hive/src/test/resources/golden/groupby_position-11-75a28e558d2fb7a78f43f55b0479c646 b/sql/hive/src/test/resources/golden/join_star-9-3f7ccccc2488de5f33a38cb3cc3eb628
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-11-75a28e558d2fb7a78f43f55b0479c646
rename to sql/hive/src/test/resources/golden/join_star-9-3f7ccccc2488de5f33a38cb3cc3eb628
diff --git a/sql/hive/src/test/resources/golden/join_star-9-904e30d8615eb411fb24c2cc08df94f4 b/sql/hive/src/test/resources/golden/join_star-9-904e30d8615eb411fb24c2cc08df94f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_thrift-0-811b62ecbacdb26c67fa260ff3747a41 b/sql/hive/src/test/resources/golden/join_thrift-0-811b62ecbacdb26c67fa260ff3747a41
deleted file mode 100644
index 07433297e6de1..0000000000000
--- a/sql/hive/src/test/resources/golden/join_thrift-0-811b62ecbacdb26c67fa260ff3747a41
+++ /dev/null
@@ -1,6 +0,0 @@
-aint                	int                 	from deserializer   
-astring             	string              	from deserializer   
-lint                	array<int>          	from deserializer   
-lstring             	array<string>       	from deserializer   
-lintstring          	array<struct<myint:int,mystring:string,underscore_int:int>>	from deserializer   
-mstringstring       	map<string,string>  	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/join_thrift-1-4f47dd107d2be1212411bda214c0d1db b/sql/hive/src/test/resources/golden/join_thrift-1-4f47dd107d2be1212411bda214c0d1db
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_thrift-2-d4bfb7bab262dc2898431680711dec1b b/sql/hive/src/test/resources/golden/join_thrift-2-d4bfb7bab262dc2898431680711dec1b
deleted file mode 100644
index 4aeaf4ff1f1b0..0000000000000
--- a/sql/hive/src/test/resources/golden/join_thrift-2-d4bfb7bab262dc2898431680711dec1b
+++ /dev/null
@@ -1,11 +0,0 @@
-1712634731	[{"myint":0,"mystring":"0","underscore_int":0}]
-465985200	[{"myint":1,"mystring":"1","underscore_int":1}]
--751827638	[{"myint":4,"mystring":"8","underscore_int":2}]
-477111222	[{"myint":9,"mystring":"27","underscore_int":3}]
--734328909	[{"myint":16,"mystring":"64","underscore_int":4}]
--1952710710	[{"myint":25,"mystring":"125","underscore_int":5}]
-1244525190	[{"myint":36,"mystring":"216","underscore_int":6}]
--1461153973	[{"myint":49,"mystring":"343","underscore_int":7}]
-1638581578	[{"myint":64,"mystring":"512","underscore_int":8}]
-336964413	[{"myint":81,"mystring":"729","underscore_int":9}]
-0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/join_vc-0-f6269c9b545c9f908ef9aebf1a2ac097 b/sql/hive/src/test/resources/golden/join_vc-0-f6269c9b545c9f908ef9aebf1a2ac097
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-0-d286410aa1d5f5c8d91b863a6d6e29c5 b/sql/hive/src/test/resources/golden/join_view-0-d286410aa1d5f5c8d91b863a6d6e29c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-1-4e6a4fd729bac739f5f9b9e2c42b2467 b/sql/hive/src/test/resources/golden/join_view-1-4e6a4fd729bac739f5f9b9e2c42b2467
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-2-eda40dba9678df62dde73fc5dafb2b44 b/sql/hive/src/test/resources/golden/join_view-2-eda40dba9678df62dde73fc5dafb2b44
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-3-eccb00d8dada8ab56a48c373e381e02b b/sql/hive/src/test/resources/golden/join_view-3-eccb00d8dada8ab56a48c373e381e02b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-5-85baeea910adc4589bc3ec2ce0b1e856 b/sql/hive/src/test/resources/golden/join_view-5-85baeea910adc4589bc3ec2ce0b1e856
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-6-c37b7962ab7371f94a9c54d61f7638ef b/sql/hive/src/test/resources/golden/join_view-6-c37b7962ab7371f94a9c54d61f7638ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-7-a14cfe3eff322066e61023ec06c7735d b/sql/hive/src/test/resources/golden/join_view-7-a14cfe3eff322066e61023ec06c7735d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-8-d286410aa1d5f5c8d91b863a6d6e29c5 b/sql/hive/src/test/resources/golden/join_view-8-d286410aa1d5f5c8d91b863a6d6e29c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-9-4e6a4fd729bac739f5f9b9e2c42b2467 b/sql/hive/src/test/resources/golden/join_view-9-4e6a4fd729bac739f5f9b9e2c42b2467
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-0-72509f06e1f7c5d5ccc292f775f8eea7 b/sql/hive/src/test/resources/golden/lateral_view_noalias-0-72509f06e1f7c5d5ccc292f775f8eea7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-1-6d5806dd1d2511911a5de1e205523f42 b/sql/hive/src/test/resources/golden/lateral_view_noalias-1-6d5806dd1d2511911a5de1e205523f42
deleted file mode 100644
index 0da0d93886e01..0000000000000
--- a/sql/hive/src/test/resources/golden/lateral_view_noalias-1-6d5806dd1d2511911a5de1e205523f42
+++ /dev/null
@@ -1,2 +0,0 @@
-key1	100
-key2	200
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-2-155b3cc2f5054725a9c2acca3c38c00a b/sql/hive/src/test/resources/golden/lateral_view_noalias-2-155b3cc2f5054725a9c2acca3c38c00a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-3-3b7045ace234af8e5e86d8ac23ccee56 b/sql/hive/src/test/resources/golden/lateral_view_noalias-3-3b7045ace234af8e5e86d8ac23ccee56
deleted file mode 100644
index 0da0d93886e01..0000000000000
--- a/sql/hive/src/test/resources/golden/lateral_view_noalias-3-3b7045ace234af8e5e86d8ac23ccee56
+++ /dev/null
@@ -1,2 +0,0 @@
-key1	100
-key2	200
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-4-e1eca4e08216897d090259d4fd1e3fe b/sql/hive/src/test/resources/golden/lateral_view_noalias-4-e1eca4e08216897d090259d4fd1e3fe
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-5-16d227442dd775615c6ecfceedc6c612 b/sql/hive/src/test/resources/golden/lateral_view_noalias-5-16d227442dd775615c6ecfceedc6c612
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-0-b66c363516d1f717765df9b91da3b5b4 b/sql/hive/src/test/resources/golden/lateral_view_outer-0-b66c363516d1f717765df9b91da3b5b4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-1-8d4332785ff69bb86607700c133d0baa b/sql/hive/src/test/resources/golden/lateral_view_outer-1-8d4332785ff69bb86607700c133d0baa
deleted file mode 100644
index a2a7fdd233a2a..0000000000000
--- a/sql/hive/src/test/resources/golden/lateral_view_outer-1-8d4332785ff69bb86607700c133d0baa
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	NULL
-86	val_86	NULL
-311	val_311	NULL
-27	val_27	NULL
-165	val_165	NULL
-409	val_409	NULL
-255	val_255	NULL
-278	val_278	NULL
-98	val_98	NULL
-484	val_484	NULL
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-2-b4474ec12d042fca7a21a2cd551c1068 b/sql/hive/src/test/resources/golden/lateral_view_outer-2-b4474ec12d042fca7a21a2cd551c1068
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-3-57b2ffd7b60708551238c491a2a8685d b/sql/hive/src/test/resources/golden/lateral_view_outer-3-57b2ffd7b60708551238c491a2a8685d
deleted file mode 100644
index 37d918a386d7d..0000000000000
--- a/sql/hive/src/test/resources/golden/lateral_view_outer-3-57b2ffd7b60708551238c491a2a8685d
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	4
-238	val_238	5
-86	val_86	4
-86	val_86	5
-311	val_311	4
-311	val_311	5
-27	val_27	4
-27	val_27	5
-165	val_165	4
-165	val_165	5
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-4-96fe3dc4f8116e535c322437a59b5e4e b/sql/hive/src/test/resources/golden/lateral_view_outer-4-96fe3dc4f8116e535c322437a59b5e4e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-5-2ec3aeb923621c64da042402709e64e9 b/sql/hive/src/test/resources/golden/lateral_view_outer-5-2ec3aeb923621c64da042402709e64e9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-6-511e4df505342e04c20e50fda8962120 b/sql/hive/src/test/resources/golden/lateral_view_outer-6-511e4df505342e04c20e50fda8962120
deleted file mode 100644
index 3ad56f646ed85..0000000000000
--- a/sql/hive/src/test/resources/golden/lateral_view_outer-6-511e4df505342e04c20e50fda8962120
+++ /dev/null
@@ -1,10 +0,0 @@
-238	NULL	NULL
-86	NULL	NULL
-311	["val_311","val_311"]	val_311
-311	["val_311","val_311"]	val_311
-27	NULL	NULL
-165	NULL	NULL
-409	["val_409","val_409"]	val_409
-409	["val_409","val_409"]	val_409
-255	NULL	NULL
-278	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/lateral_view_ppd-10-a537ad7282d1c9957cdae74ad87c790b b/sql/hive/src/test/resources/golden/lateral_view_ppd-10-a537ad7282d1c9957cdae74ad87c790b
new file mode 100644
index 0000000000000..cbeb8081f06c6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/lateral_view_ppd-10-a537ad7282d1c9957cdae74ad87c790b
@@ -0,0 +1,6 @@
+val_0	2
+val_0	3
+val_0	2
+val_0	3
+val_0	2
+val_0	3
diff --git a/sql/hive/src/test/resources/golden/groupby_position-13-9ee2150594ad2eece6ee14424155d396 b/sql/hive/src/test/resources/golden/lateral_view_ppd-9-dc6fea663d875b082d38bd326d21cd95
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-13-9ee2150594ad2eece6ee14424155d396
rename to sql/hive/src/test/resources/golden/lateral_view_ppd-9-dc6fea663d875b082d38bd326d21cd95
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-5-9c307c0559d735960ce77efa95b2b17b b/sql/hive/src/test/resources/golden/leftsemijoin-5-9c307c0559d735960ce77efa95b2b17b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_position-2-627bb7be9c0edb5ba4c677912800d364 b/sql/hive/src/test/resources/golden/leftsemijoin-5-aba449db0d4fe6dc9771426e102bb543
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-2-627bb7be9c0edb5ba4c677912800d364
rename to sql/hive/src/test/resources/golden/leftsemijoin-5-aba449db0d4fe6dc9771426e102bb543
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-6-82921fc96eef547ec0f71027ee88298c b/sql/hive/src/test/resources/golden/leftsemijoin-6-82921fc96eef547ec0f71027ee88298c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_position-3-c39bd6c3c24658ec082bef9876d6e2ac b/sql/hive/src/test/resources/golden/leftsemijoin-6-9f50dce576b019c0be997055b8876621
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-3-c39bd6c3c24658ec082bef9876d6e2ac
rename to sql/hive/src/test/resources/golden/leftsemijoin-6-9f50dce576b019c0be997055b8876621
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-7-b30aa3b4a45db6b64bb46b4d9bd32ff0 b/sql/hive/src/test/resources/golden/leftsemijoin-7-b30aa3b4a45db6b64bb46b4d9bd32ff0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_position-4-19461cbd2be1f2f3f3b65478e3eeb53c b/sql/hive/src/test/resources/golden/leftsemijoin-7-fff6ca40e6048d52dc2d3afc68e8353e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-4-19461cbd2be1f2f3f3b65478e3eeb53c
rename to sql/hive/src/test/resources/golden/leftsemijoin-7-fff6ca40e6048d52dc2d3afc68e8353e
diff --git a/sql/hive/src/test/resources/golden/groupby_position-7-5b32a45af11e04b46f8566bd27a28014 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-1-5b2e555868faa404ea09928936178181
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-7-5b32a45af11e04b46f8566bd27a28014
rename to sql/hive/src/test/resources/golden/leftsemijoin_mr-1-5b2e555868faa404ea09928936178181
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-1-aa3f07f028027ffd13ab5535dc821593 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-1-aa3f07f028027ffd13ab5535dc821593
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-3-645cf8b871c9b27418d6fa1d1bda9a52 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-3-645cf8b871c9b27418d6fa1d1bda9a52
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_position-8-3ff6c255dda69ec117c6a7ec93db2f1c b/sql/hive/src/test/resources/golden/leftsemijoin_mr-3-c019cb2a855138da0d0b1e5c67cd6354
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-8-3ff6c255dda69ec117c6a7ec93db2f1c
rename to sql/hive/src/test/resources/golden/leftsemijoin_mr-3-c019cb2a855138da0d0b1e5c67cd6354
diff --git a/sql/hive/src/test/resources/golden/length.udf-0-e85ebb8ce5d939964fd87bd13b326c02 b/sql/hive/src/test/resources/golden/length.udf-0-e85ebb8ce5d939964fd87bd13b326c02
index bf0d87ab1b2b0..b8626c4cff284 100644
--- a/sql/hive/src/test/resources/golden/length.udf-0-e85ebb8ce5d939964fd87bd13b326c02
+++ b/sql/hive/src/test/resources/golden/length.udf-0-e85ebb8ce5d939964fd87bd13b326c02
@@ -1 +1 @@
-4
\ No newline at end of file
+4
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-0-79b294d0081c3dfd36c5b8b5e78dc7fb b/sql/hive/src/test/resources/golden/limit_pushdown-0-79b294d0081c3dfd36c5b8b5e78dc7fb
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-0-79b294d0081c3dfd36c5b8b5e78dc7fb
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-1-d0a93f40892e3894460553b443c77428 b/sql/hive/src/test/resources/golden/limit_pushdown-1-d0a93f40892e3894460553b443c77428
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-1-d0a93f40892e3894460553b443c77428
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-10-19842c7708a0787e59cc5e6b8c3a4250 b/sql/hive/src/test/resources/golden/limit_pushdown-10-19842c7708a0787e59cc5e6b8c3a4250
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-11-b435672262fc3c02d1ed1a93ff8d856f b/sql/hive/src/test/resources/golden/limit_pushdown-11-b435672262fc3c02d1ed1a93ff8d856f
deleted file mode 100644
index 9e3b31ad52c13..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-11-b435672262fc3c02d1ed1a93ff8d856f
+++ /dev/null
@@ -1,20 +0,0 @@
-0
-2
-4
-5
-8
-9
-10
-11
-12
-15
-17
-18
-19
-20
-24
-26
-27
-28
-30
-33
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-12-a3516c212d6c79986536edbd9c961098 b/sql/hive/src/test/resources/golden/limit_pushdown-12-a3516c212d6c79986536edbd9c961098
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-13-93906f7e87e5e3dc4b9590ec13b6af78 b/sql/hive/src/test/resources/golden/limit_pushdown-13-93906f7e87e5e3dc4b9590ec13b6af78
deleted file mode 100644
index ebf6c0424c26d..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-13-93906f7e87e5e3dc4b9590ec13b6af78
+++ /dev/null
@@ -1,20 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-14-cfae77212d164efb18208f71332fd368 b/sql/hive/src/test/resources/golden/limit_pushdown-14-cfae77212d164efb18208f71332fd368
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-15-62a90d3ba54cc68055b3502c3864a3c1 b/sql/hive/src/test/resources/golden/limit_pushdown-15-62a90d3ba54cc68055b3502c3864a3c1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-16-b618008e10acc4ee96b6ea2dbdf0f32 b/sql/hive/src/test/resources/golden/limit_pushdown-16-b618008e10acc4ee96b6ea2dbdf0f32
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-17-11c349c0db0f869be88351256650fe60 b/sql/hive/src/test/resources/golden/limit_pushdown-17-11c349c0db0f869be88351256650fe60
deleted file mode 100644
index 153dcec21bc5b..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-17-11c349c0db0f869be88351256650fe60
+++ /dev/null
@@ -1,20 +0,0 @@
-val_0	0
-val_2	2
-val_4	4
-val_8	8
-val_9	9
-val_10	10
-val_11	11
-val_5	15
-val_17	17
-val_19	19
-val_20	20
-val_12	24
-val_27	27
-val_28	28
-val_30	30
-val_15	30
-val_33	33
-val_34	34
-val_18	36
-val_41	41
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-18-66ecdd019c936dec1ab5c9389c9071b3 b/sql/hive/src/test/resources/golden/limit_pushdown-18-66ecdd019c936dec1ab5c9389c9071b3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-19-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/limit_pushdown-19-67e864faaff4c6b2a8e1c9fbd188bb66
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-19-67e864faaff4c6b2a8e1c9fbd188bb66
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-2-4d1e292b00635298240ff909be64dce4 b/sql/hive/src/test/resources/golden/limit_pushdown-2-4d1e292b00635298240ff909be64dce4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-20-d4de935f7a059cce44889c6ba69cfddb b/sql/hive/src/test/resources/golden/limit_pushdown-20-d4de935f7a059cce44889c6ba69cfddb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-21-f04dee0f94443ca01320657897cbb914 b/sql/hive/src/test/resources/golden/limit_pushdown-21-f04dee0f94443ca01320657897cbb914
deleted file mode 100644
index ae8f0265b71ca..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-21-f04dee0f94443ca01320657897cbb914
+++ /dev/null
@@ -1,20 +0,0 @@
-val_0	0
-val_10	10
-val_100	200
-val_103	206
-val_104	208
-val_105	105
-val_11	11
-val_111	111
-val_113	226
-val_114	114
-val_116	116
-val_118	236
-val_119	357
-val_12	24
-val_120	240
-val_125	250
-val_126	126
-val_128	384
-val_129	258
-val_131	131
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-22-4d378725e22e7e48c861983ba935bf5e b/sql/hive/src/test/resources/golden/limit_pushdown-22-4d378725e22e7e48c861983ba935bf5e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-22-4d378725e22e7e48c861983ba935bf5e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-23-53b4dd4899cb2ba681a801a3b5ca155a b/sql/hive/src/test/resources/golden/limit_pushdown-23-53b4dd4899cb2ba681a801a3b5ca155a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-24-cb58113b9dfd3e93d4623cba6a090976 b/sql/hive/src/test/resources/golden/limit_pushdown-24-cb58113b9dfd3e93d4623cba6a090976
deleted file mode 100644
index e604892422d59..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-24-cb58113b9dfd3e93d4623cba6a090976
+++ /dev/null
@@ -1,100 +0,0 @@
-0	val_0	val_0	val_0	val_0	val_0	val_0	val_0	val_0
-0	val_0	val_0	val_0	val_0	val_0	val_0	val_0	val_0
-0	val_0	val_0	val_0	val_0	val_0	val_0	val_0	val_0
-2	val_2	val_2	val_2	val_2	val_2	val_2	val_2	val_2
-4	val_4	val_4	val_4	val_4	val_4	val_4	val_4	val_4
-5	val_5	val_5	val_5	val_5	val_5	val_5	val_5	val_5
-5	val_5	val_5	val_5	val_5	val_5	val_5	val_5	val_5
-5	val_5	val_5	val_5	val_5	val_5	val_5	val_5	val_5
-8	val_8	val_8	val_8	val_8	val_8	val_8	val_8	val_8
-9	val_9	val_9	val_9	val_9	val_9	val_9	val_9	val_9
-10	val_10	val_10	val_10	val_10	val_10	val_10	val_10	val_10
-11	val_11	val_11	val_11	val_11	val_11	val_11	val_11	val_11
-12	val_12	val_12	val_12	val_12	val_12	val_12	val_12	val_12
-12	val_12	val_12	val_12	val_12	val_12	val_12	val_12	val_12
-15	val_15	val_15	val_15	val_15	val_15	val_15	val_15	val_15
-15	val_15	val_15	val_15	val_15	val_15	val_15	val_15	val_15
-17	val_17	val_17	val_17	val_17	val_17	val_17	val_17	val_17
-18	val_18	val_18	val_18	val_18	val_18	val_18	val_18	val_18
-18	val_18	val_18	val_18	val_18	val_18	val_18	val_18	val_18
-19	val_19	val_19	val_19	val_19	val_19	val_19	val_19	val_19
-20	val_20	val_20	val_20	val_20	val_20	val_20	val_20	val_20
-24	val_24	val_24	val_24	val_24	val_24	val_24	val_24	val_24
-24	val_24	val_24	val_24	val_24	val_24	val_24	val_24	val_24
-26	val_26	val_26	val_26	val_26	val_26	val_26	val_26	val_26
-26	val_26	val_26	val_26	val_26	val_26	val_26	val_26	val_26
-27	val_27	val_27	val_27	val_27	val_27	val_27	val_27	val_27
-28	val_28	val_28	val_28	val_28	val_28	val_28	val_28	val_28
-30	val_30	val_30	val_30	val_30	val_30	val_30	val_30	val_30
-33	val_33	val_33	val_33	val_33	val_33	val_33	val_33	val_33
-34	val_34	val_34	val_34	val_34	val_34	val_34	val_34	val_34
-35	val_35	val_35	val_35	val_35	val_35	val_35	val_35	val_35
-35	val_35	val_35	val_35	val_35	val_35	val_35	val_35	val_35
-35	val_35	val_35	val_35	val_35	val_35	val_35	val_35	val_35
-37	val_37	val_37	val_37	val_37	val_37	val_37	val_37	val_37
-37	val_37	val_37	val_37	val_37	val_37	val_37	val_37	val_37
-41	val_41	val_41	val_41	val_41	val_41	val_41	val_41	val_41
-42	val_42	val_42	val_42	val_42	val_42	val_42	val_42	val_42
-42	val_42	val_42	val_42	val_42	val_42	val_42	val_42	val_42
-43	val_43	val_43	val_43	val_43	val_43	val_43	val_43	val_43
-44	val_44	val_44	val_44	val_44	val_44	val_44	val_44	val_44
-47	val_47	val_47	val_47	val_47	val_47	val_47	val_47	val_47
-51	val_51	val_51	val_51	val_51	val_51	val_51	val_51	val_51
-51	val_51	val_51	val_51	val_51	val_51	val_51	val_51	val_51
-53	val_53	val_53	val_53	val_53	val_53	val_53	val_53	val_53
-54	val_54	val_54	val_54	val_54	val_54	val_54	val_54	val_54
-57	val_57	val_57	val_57	val_57	val_57	val_57	val_57	val_57
-58	val_58	val_58	val_58	val_58	val_58	val_58	val_58	val_58
-58	val_58	val_58	val_58	val_58	val_58	val_58	val_58	val_58
-64	val_64	val_64	val_64	val_64	val_64	val_64	val_64	val_64
-65	val_65	val_65	val_65	val_65	val_65	val_65	val_65	val_65
-66	val_66	val_66	val_66	val_66	val_66	val_66	val_66	val_66
-67	val_67	val_67	val_67	val_67	val_67	val_67	val_67	val_67
-67	val_67	val_67	val_67	val_67	val_67	val_67	val_67	val_67
-69	val_69	val_69	val_69	val_69	val_69	val_69	val_69	val_69
-70	val_70	val_70	val_70	val_70	val_70	val_70	val_70	val_70
-70	val_70	val_70	val_70	val_70	val_70	val_70	val_70	val_70
-70	val_70	val_70	val_70	val_70	val_70	val_70	val_70	val_70
-72	val_72	val_72	val_72	val_72	val_72	val_72	val_72	val_72
-72	val_72	val_72	val_72	val_72	val_72	val_72	val_72	val_72
-74	val_74	val_74	val_74	val_74	val_74	val_74	val_74	val_74
-76	val_76	val_76	val_76	val_76	val_76	val_76	val_76	val_76
-76	val_76	val_76	val_76	val_76	val_76	val_76	val_76	val_76
-77	val_77	val_77	val_77	val_77	val_77	val_77	val_77	val_77
-78	val_78	val_78	val_78	val_78	val_78	val_78	val_78	val_78
-80	val_80	val_80	val_80	val_80	val_80	val_80	val_80	val_80
-82	val_82	val_82	val_82	val_82	val_82	val_82	val_82	val_82
-83	val_83	val_83	val_83	val_83	val_83	val_83	val_83	val_83
-83	val_83	val_83	val_83	val_83	val_83	val_83	val_83	val_83
-84	val_84	val_84	val_84	val_84	val_84	val_84	val_84	val_84
-84	val_84	val_84	val_84	val_84	val_84	val_84	val_84	val_84
-85	val_85	val_85	val_85	val_85	val_85	val_85	val_85	val_85
-86	val_86	val_86	val_86	val_86	val_86	val_86	val_86	val_86
-87	val_87	val_87	val_87	val_87	val_87	val_87	val_87	val_87
-90	val_90	val_90	val_90	val_90	val_90	val_90	val_90	val_90
-90	val_90	val_90	val_90	val_90	val_90	val_90	val_90	val_90
-90	val_90	val_90	val_90	val_90	val_90	val_90	val_90	val_90
-92	val_92	val_92	val_92	val_92	val_92	val_92	val_92	val_92
-95	val_95	val_95	val_95	val_95	val_95	val_95	val_95	val_95
-95	val_95	val_95	val_95	val_95	val_95	val_95	val_95	val_95
-96	val_96	val_96	val_96	val_96	val_96	val_96	val_96	val_96
-97	val_97	val_97	val_97	val_97	val_97	val_97	val_97	val_97
-97	val_97	val_97	val_97	val_97	val_97	val_97	val_97	val_97
-98	val_98	val_98	val_98	val_98	val_98	val_98	val_98	val_98
-98	val_98	val_98	val_98	val_98	val_98	val_98	val_98	val_98
-100	val_100	val_100	val_100	val_100	val_100	val_100	val_100	val_100
-100	val_100	val_100	val_100	val_100	val_100	val_100	val_100	val_100
-103	val_103	val_103	val_103	val_103	val_103	val_103	val_103	val_103
-103	val_103	val_103	val_103	val_103	val_103	val_103	val_103	val_103
-104	val_104	val_104	val_104	val_104	val_104	val_104	val_104	val_104
-104	val_104	val_104	val_104	val_104	val_104	val_104	val_104	val_104
-105	val_105	val_105	val_105	val_105	val_105	val_105	val_105	val_105
-111	val_111	val_111	val_111	val_111	val_111	val_111	val_111	val_111
-113	val_113	val_113	val_113	val_113	val_113	val_113	val_113	val_113
-113	val_113	val_113	val_113	val_113	val_113	val_113	val_113	val_113
-114	val_114	val_114	val_114	val_114	val_114	val_114	val_114	val_114
-116	val_116	val_116	val_116	val_116	val_116	val_116	val_116	val_116
-118	val_118	val_118	val_118	val_118	val_118	val_118	val_118	val_118
-118	val_118	val_118	val_118	val_118	val_118	val_118	val_118	val_118
-119	val_119	val_119	val_119	val_119	val_119	val_119	val_119	val_119
-119	val_119	val_119	val_119	val_119	val_119	val_119	val_119	val_119
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-25-1b6cdcbc002f5f1bde0f369a0dd1632a b/sql/hive/src/test/resources/golden/limit_pushdown-25-1b6cdcbc002f5f1bde0f369a0dd1632a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-26-e691b284f1b830d7b83a36750105737c b/sql/hive/src/test/resources/golden/limit_pushdown-26-e691b284f1b830d7b83a36750105737c
deleted file mode 100644
index 92dc6ce9dbf9e..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-26-e691b284f1b830d7b83a36750105737c
+++ /dev/null
@@ -1,100 +0,0 @@
-0
-200
-206
-208
-105
-10
-111
-226
-114
-116
-236
-357
-11
-240
-250
-126
-384
-258
-24
-131
-133
-268
-136
-274
-552
-143
-145
-292
-298
-150
-304
-153
-155
-156
-157
-158
-30
-160
-162
-163
-328
-330
-166
-501
-168
-676
-170
-344
-348
-350
-352
-177
-178
-358
-17
-180
-181
-183
-186
-561
-189
-36
-190
-382
-192
-579
-194
-390
-196
-394
-597
-19
-400
-201
-202
-406
-410
-414
-624
-418
-20
-426
-214
-432
-434
-218
-438
-442
-222
-446
-448
-226
-228
-458
-1150
-466
-235
-474
-476
-478
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-3-cc674af3ce71c06670e366932df43462 b/sql/hive/src/test/resources/golden/limit_pushdown-3-cc674af3ce71c06670e366932df43462
deleted file mode 100644
index 95f5492558a9b..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-3-cc674af3ce71c06670e366932df43462
+++ /dev/null
@@ -1,20 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-2	val_2
-4	val_4
-5	val_5
-5	val_5
-5	val_5
-8	val_8
-9	val_9
-10	val_10
-11	val_11
-12	val_12
-12	val_12
-15	val_15
-15	val_15
-17	val_17
-18	val_18
-18	val_18
-19	val_19
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-4-81bbb7300da27bc63f7a139677faac3f b/sql/hive/src/test/resources/golden/limit_pushdown-4-81bbb7300da27bc63f7a139677faac3f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-5-c0664bc5bff121823ac2f37df13d6bd7 b/sql/hive/src/test/resources/golden/limit_pushdown-5-c0664bc5bff121823ac2f37df13d6bd7
deleted file mode 100644
index 0bac402cfa497..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-5-c0664bc5bff121823ac2f37df13d6bd7
+++ /dev/null
@@ -1,20 +0,0 @@
-498	val_498
-498	val_498
-498	val_498
-497	val_497
-496	val_496
-495	val_495
-494	val_494
-493	val_493
-492	val_492
-492	val_492
-491	val_491
-490	val_490
-489	val_489
-489	val_489
-489	val_489
-489	val_489
-487	val_487
-485	val_485
-484	val_484
-483	val_483
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-6-b722630e977a5ca509234fc417d7b30 b/sql/hive/src/test/resources/golden/limit_pushdown-6-b722630e977a5ca509234fc417d7b30
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-7-e8c8087a3910ea53a725b01b9d05b921 b/sql/hive/src/test/resources/golden/limit_pushdown-7-e8c8087a3910ea53a725b01b9d05b921
deleted file mode 100644
index b57d0cc951566..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-7-e8c8087a3910ea53a725b01b9d05b921
+++ /dev/null
@@ -1,20 +0,0 @@
-val_0	3
-val_10	11
-val_100	202
-val_103	208
-val_104	210
-val_105	106
-val_11	12
-val_111	112
-val_113	228
-val_114	115
-val_116	117
-val_118	238
-val_119	360
-val_12	26
-val_120	242
-val_125	252
-val_126	127
-val_128	387
-val_129	260
-val_131	132
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-8-6194a19c1d5065731ec26ea1d5a390e1 b/sql/hive/src/test/resources/golden/limit_pushdown-8-6194a19c1d5065731ec26ea1d5a390e1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-9-1446f634128a1c3e7cd224ea82452d0a b/sql/hive/src/test/resources/golden/limit_pushdown-9-1446f634128a1c3e7cd224ea82452d0a
deleted file mode 100644
index 0e95c446f3b0d..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-9-1446f634128a1c3e7cd224ea82452d0a
+++ /dev/null
@@ -1,20 +0,0 @@
-val_0	1.0
-val_10	11.0
-val_100	101.0
-val_103	104.0
-val_104	105.0
-val_105	106.0
-val_11	12.0
-val_111	112.0
-val_113	114.0
-val_114	115.0
-val_116	117.0
-val_118	119.0
-val_119	120.0
-val_12	13.0
-val_120	121.0
-val_125	126.0
-val_126	127.0
-val_128	129.0
-val_129	130.0
-val_131	132.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/limit_pushdown_negative-0-79b294d0081c3dfd36c5b8b5e78dc7fb
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-2-83c59d378571a6e487aa20217bd87817
rename to sql/hive/src/test/resources/golden/limit_pushdown_negative-0-79b294d0081c3dfd36c5b8b5e78dc7fb
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-2-3aa4057488720c0f514696154f2070b5 b/sql/hive/src/test/resources/golden/limit_pushdown_negative-1-f87339637a48bd1533493ebbed5432a7
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-2-3aa4057488720c0f514696154f2070b5
rename to sql/hive/src/test/resources/golden/limit_pushdown_negative-1-f87339637a48bd1533493ebbed5432a7
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-3-ace7b2624b125764e9f1f6b5559f023d b/sql/hive/src/test/resources/golden/limit_pushdown_negative-2-de7e5ac581b870fff10dc82c75c1c79e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-3-ace7b2624b125764e9f1f6b5559f023d
rename to sql/hive/src/test/resources/golden/limit_pushdown_negative-2-de7e5ac581b870fff10dc82c75c1c79e
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-4-61ae2f9b3a9a2f60a307aa25e42425d b/sql/hive/src/test/resources/golden/limit_pushdown_negative-3-be440c3f959ca53b758481aa90551984
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-4-61ae2f9b3a9a2f60a307aa25e42425d
rename to sql/hive/src/test/resources/golden/limit_pushdown_negative-3-be440c3f959ca53b758481aa90551984
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-11-c166396bbdb62490f56ab0bc84aaa7d9 b/sql/hive/src/test/resources/golden/limit_pushdown_negative-4-4dedc8057d76af264c198beaacd7f000
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-11-c166396bbdb62490f56ab0bc84aaa7d9
rename to sql/hive/src/test/resources/golden/limit_pushdown_negative-4-4dedc8057d76af264c198beaacd7f000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-12-c8a51e8a269da4c4ae98ac105a573f3c b/sql/hive/src/test/resources/golden/limit_pushdown_negative-5-543a20e69bd8987bc37a22c1c7ef33f1
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-12-c8a51e8a269da4c4ae98ac105a573f3c
rename to sql/hive/src/test/resources/golden/limit_pushdown_negative-5-543a20e69bd8987bc37a22c1c7ef33f1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-13-46c4a3675c8de0510b648856a193f3e7 b/sql/hive/src/test/resources/golden/limit_pushdown_negative-6-3f8274466914ad200b33a2c83fa6dab5
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-13-46c4a3675c8de0510b648856a193f3e7
rename to sql/hive/src/test/resources/golden/limit_pushdown_negative-6-3f8274466914ad200b33a2c83fa6dab5
diff --git a/sql/hive/src/test/resources/golden/literal_double-0-10ef1098e35d900983be3814de8f974f b/sql/hive/src/test/resources/golden/literal_double-0-10ef1098e35d900983be3814de8f974f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/literal_double-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-0-67e864faaff4c6b2a8e1c9fbd188bb66
rename to sql/hive/src/test/resources/golden/literal_double-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-15-54f3a2a97939d3eca8a601b74ef30dea b/sql/hive/src/test/resources/golden/literal_double-1-10ef1098e35d900983be3814de8f974f
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-15-54f3a2a97939d3eca8a601b74ef30dea
rename to sql/hive/src/test/resources/golden/literal_double-1-10ef1098e35d900983be3814de8f974f
diff --git a/sql/hive/src/test/resources/golden/literal_double-1-3863c17e03c9c1cd68452106a8721d13 b/sql/hive/src/test/resources/golden/literal_double-1-3863c17e03c9c1cd68452106a8721d13
deleted file mode 100644
index 69505e73feb1d..0000000000000
--- a/sql/hive/src/test/resources/golden/literal_double-1-3863c17e03c9c1cd68452106a8721d13
+++ /dev/null
@@ -1 +0,0 @@
-3.14	-3.14	3.14E8	3.14E-8	-3.14E8	-3.14E-8	3.14E8	3.14E8	3.14E-8
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/literal_double-2-3863c17e03c9c1cd68452106a8721d13 b/sql/hive/src/test/resources/golden/literal_double-2-3863c17e03c9c1cd68452106a8721d13
new file mode 100644
index 0000000000000..24ca45210038a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/literal_double-2-3863c17e03c9c1cd68452106a8721d13
@@ -0,0 +1 @@
+3.14	-3.14	3.14E8	3.14E-8	-3.14E8	-3.14E-8	3.14E8	3.14E8	3.14E-8
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/literal_ints-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063
rename to sql/hive/src/test/resources/golden/literal_ints-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/literal_ints-0-5ffd1b49cdda4149aef2c61c53a56890 b/sql/hive/src/test/resources/golden/literal_ints-0-5ffd1b49cdda4149aef2c61c53a56890
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-16-236d51792f4913b5858f367e3cff2c60 b/sql/hive/src/test/resources/golden/literal_ints-1-5ffd1b49cdda4149aef2c61c53a56890
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-16-236d51792f4913b5858f367e3cff2c60
rename to sql/hive/src/test/resources/golden/literal_ints-1-5ffd1b49cdda4149aef2c61c53a56890
diff --git a/sql/hive/src/test/resources/golden/literal_ints-1-b41c42ce2f6ba483b68bb08752b95ec4 b/sql/hive/src/test/resources/golden/literal_ints-1-b41c42ce2f6ba483b68bb08752b95ec4
deleted file mode 100644
index 6dc85dd550540..0000000000000
--- a/sql/hive/src/test/resources/golden/literal_ints-1-b41c42ce2f6ba483b68bb08752b95ec4
+++ /dev/null
@@ -1 +0,0 @@
-100	100	100	100
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/literal_ints-2-b41c42ce2f6ba483b68bb08752b95ec4 b/sql/hive/src/test/resources/golden/literal_ints-2-b41c42ce2f6ba483b68bb08752b95ec4
new file mode 100644
index 0000000000000..46a059f07aca6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/literal_ints-2-b41c42ce2f6ba483b68bb08752b95ec4
@@ -0,0 +1 @@
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/literal_string-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-2-83c59d378571a6e487aa20217bd87817
rename to sql/hive/src/test/resources/golden/literal_string-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/literal_string-0-9b48d41e5c9e41ddc070e2fd31ace15 b/sql/hive/src/test/resources/golden/literal_string-0-9b48d41e5c9e41ddc070e2fd31ace15
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/literal_string-1-2cf4b7268b47246afdf6c792acca379d b/sql/hive/src/test/resources/golden/literal_string-1-2cf4b7268b47246afdf6c792acca379d
deleted file mode 100644
index 6323f4efa99c7..0000000000000
--- a/sql/hive/src/test/resources/golden/literal_string-1-2cf4b7268b47246afdf6c792acca379d
+++ /dev/null
@@ -1 +0,0 @@
-facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-18-1e77dcdd6f54334dbae6a4d11ad6ff64 b/sql/hive/src/test/resources/golden/literal_string-1-9b48d41e5c9e41ddc070e2fd31ace15
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-18-1e77dcdd6f54334dbae6a4d11ad6ff64
rename to sql/hive/src/test/resources/golden/literal_string-1-9b48d41e5c9e41ddc070e2fd31ace15
diff --git a/sql/hive/src/test/resources/golden/literal_string-2-2cf4b7268b47246afdf6c792acca379d b/sql/hive/src/test/resources/golden/literal_string-2-2cf4b7268b47246afdf6c792acca379d
new file mode 100644
index 0000000000000..1d05317d62547
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/literal_string-2-2cf4b7268b47246afdf6c792acca379d
@@ -0,0 +1 @@
+facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook
diff --git a/sql/hive/src/test/resources/golden/load_binary_data-0-491edd0c42ceb79e799ba50555bc8c15 b/sql/hive/src/test/resources/golden/load_binary_data-0-491edd0c42ceb79e799ba50555bc8c15
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_binary_data-1-5d72f8449b69df3c08e3f444f09428bc b/sql/hive/src/test/resources/golden/load_binary_data-1-5d72f8449b69df3c08e3f444f09428bc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_binary_data-2-242b1655c7e7325ee9f26552ea8fc25 b/sql/hive/src/test/resources/golden/load_binary_data-2-242b1655c7e7325ee9f26552ea8fc25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_binary_data-3-2a72df8d3e398d0963ef91162ce7d268 b/sql/hive/src/test/resources/golden/load_binary_data-3-2a72df8d3e398d0963ef91162ce7d268
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_binary_data-4-3ebc340f7f63740f8534706d42dd37ca b/sql/hive/src/test/resources/golden/load_binary_data-4-3ebc340f7f63740f8534706d42dd37ca
deleted file mode 100644
index de5212a3c320f..0000000000000
Binary files a/sql/hive/src/test/resources/golden/load_binary_data-4-3ebc340f7f63740f8534706d42dd37ca and /dev/null differ
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part1-10-c66fea7ad025cd1f513a98a4cc1036b1 b/sql/hive/src/test/resources/golden/load_dyn_part1-10-c66fea7ad025cd1f513a98a4cc1036b1
new file mode 100644
index 0000000000000..04b36182974f9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part1-10-c66fea7ad025cd1f513a98a4cc1036b1
@@ -0,0 +1,1000 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part1-10-fca9513ea05bfb8b7e0e6f337d184d66 b/sql/hive/src/test/resources/golden/load_dyn_part1-10-fca9513ea05bfb8b7e0e6f337d184d66
deleted file mode 100644
index d7a8f25b41301..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part1-10-fca9513ea05bfb8b7e0e6f337d184d66
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part1-11-8b6be78ae0527e5b7efd7db758966853 b/sql/hive/src/test/resources/golden/load_dyn_part1-11-8b6be78ae0527e5b7efd7db758966853
new file mode 100644
index 0000000000000..ee4601f59e708
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part1-11-8b6be78ae0527e5b7efd7db758966853
@@ -0,0 +1,1000 @@
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+2	val_2	2008-12-31	11
+4	val_4	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+8	val_8	2008-12-31	11
+9	val_9	2008-12-31	11
+10	val_10	2008-12-31	11
+11	val_11	2008-12-31	11
+12	val_12	2008-12-31	11
+12	val_12	2008-12-31	11
+15	val_15	2008-12-31	11
+15	val_15	2008-12-31	11
+17	val_17	2008-12-31	11
+18	val_18	2008-12-31	11
+18	val_18	2008-12-31	11
+19	val_19	2008-12-31	11
+20	val_20	2008-12-31	11
+24	val_24	2008-12-31	11
+24	val_24	2008-12-31	11
+26	val_26	2008-12-31	11
+26	val_26	2008-12-31	11
+27	val_27	2008-12-31	11
+28	val_28	2008-12-31	11
+30	val_30	2008-12-31	11
+33	val_33	2008-12-31	11
+34	val_34	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+37	val_37	2008-12-31	11
+37	val_37	2008-12-31	11
+41	val_41	2008-12-31	11
+42	val_42	2008-12-31	11
+42	val_42	2008-12-31	11
+43	val_43	2008-12-31	11
+44	val_44	2008-12-31	11
+47	val_47	2008-12-31	11
+51	val_51	2008-12-31	11
+51	val_51	2008-12-31	11
+53	val_53	2008-12-31	11
+54	val_54	2008-12-31	11
+57	val_57	2008-12-31	11
+58	val_58	2008-12-31	11
+58	val_58	2008-12-31	11
+64	val_64	2008-12-31	11
+65	val_65	2008-12-31	11
+66	val_66	2008-12-31	11
+67	val_67	2008-12-31	11
+67	val_67	2008-12-31	11
+69	val_69	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+72	val_72	2008-12-31	11
+72	val_72	2008-12-31	11
+74	val_74	2008-12-31	11
+76	val_76	2008-12-31	11
+76	val_76	2008-12-31	11
+77	val_77	2008-12-31	11
+78	val_78	2008-12-31	11
+80	val_80	2008-12-31	11
+82	val_82	2008-12-31	11
+83	val_83	2008-12-31	11
+83	val_83	2008-12-31	11
+84	val_84	2008-12-31	11
+84	val_84	2008-12-31	11
+85	val_85	2008-12-31	11
+86	val_86	2008-12-31	11
+87	val_87	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+92	val_92	2008-12-31	11
+95	val_95	2008-12-31	11
+95	val_95	2008-12-31	11
+96	val_96	2008-12-31	11
+97	val_97	2008-12-31	11
+97	val_97	2008-12-31	11
+98	val_98	2008-12-31	11
+98	val_98	2008-12-31	11
+100	val_100	2008-12-31	11
+100	val_100	2008-12-31	11
+103	val_103	2008-12-31	11
+103	val_103	2008-12-31	11
+104	val_104	2008-12-31	11
+104	val_104	2008-12-31	11
+105	val_105	2008-12-31	11
+111	val_111	2008-12-31	11
+113	val_113	2008-12-31	11
+113	val_113	2008-12-31	11
+114	val_114	2008-12-31	11
+116	val_116	2008-12-31	11
+118	val_118	2008-12-31	11
+118	val_118	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+120	val_120	2008-12-31	11
+120	val_120	2008-12-31	11
+125	val_125	2008-12-31	11
+125	val_125	2008-12-31	11
+126	val_126	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+129	val_129	2008-12-31	11
+129	val_129	2008-12-31	11
+131	val_131	2008-12-31	11
+133	val_133	2008-12-31	11
+134	val_134	2008-12-31	11
+134	val_134	2008-12-31	11
+136	val_136	2008-12-31	11
+137	val_137	2008-12-31	11
+137	val_137	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+143	val_143	2008-12-31	11
+145	val_145	2008-12-31	11
+146	val_146	2008-12-31	11
+146	val_146	2008-12-31	11
+149	val_149	2008-12-31	11
+149	val_149	2008-12-31	11
+150	val_150	2008-12-31	11
+152	val_152	2008-12-31	11
+152	val_152	2008-12-31	11
+153	val_153	2008-12-31	11
+155	val_155	2008-12-31	11
+156	val_156	2008-12-31	11
+157	val_157	2008-12-31	11
+158	val_158	2008-12-31	11
+160	val_160	2008-12-31	11
+162	val_162	2008-12-31	11
+163	val_163	2008-12-31	11
+164	val_164	2008-12-31	11
+164	val_164	2008-12-31	11
+165	val_165	2008-12-31	11
+165	val_165	2008-12-31	11
+166	val_166	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+168	val_168	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+170	val_170	2008-12-31	11
+172	val_172	2008-12-31	11
+172	val_172	2008-12-31	11
+174	val_174	2008-12-31	11
+174	val_174	2008-12-31	11
+175	val_175	2008-12-31	11
+175	val_175	2008-12-31	11
+176	val_176	2008-12-31	11
+176	val_176	2008-12-31	11
+177	val_177	2008-12-31	11
+178	val_178	2008-12-31	11
+179	val_179	2008-12-31	11
+179	val_179	2008-12-31	11
+180	val_180	2008-12-31	11
+181	val_181	2008-12-31	11
+183	val_183	2008-12-31	11
+186	val_186	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+189	val_189	2008-12-31	11
+190	val_190	2008-12-31	11
+191	val_191	2008-12-31	11
+191	val_191	2008-12-31	11
+192	val_192	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+194	val_194	2008-12-31	11
+195	val_195	2008-12-31	11
+195	val_195	2008-12-31	11
+196	val_196	2008-12-31	11
+197	val_197	2008-12-31	11
+197	val_197	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+200	val_200	2008-12-31	11
+200	val_200	2008-12-31	11
+201	val_201	2008-12-31	11
+202	val_202	2008-12-31	11
+203	val_203	2008-12-31	11
+203	val_203	2008-12-31	11
+205	val_205	2008-12-31	11
+205	val_205	2008-12-31	11
+207	val_207	2008-12-31	11
+207	val_207	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+209	val_209	2008-12-31	11
+209	val_209	2008-12-31	11
+213	val_213	2008-12-31	11
+213	val_213	2008-12-31	11
+214	val_214	2008-12-31	11
+216	val_216	2008-12-31	11
+216	val_216	2008-12-31	11
+217	val_217	2008-12-31	11
+217	val_217	2008-12-31	11
+218	val_218	2008-12-31	11
+219	val_219	2008-12-31	11
+219	val_219	2008-12-31	11
+221	val_221	2008-12-31	11
+221	val_221	2008-12-31	11
+222	val_222	2008-12-31	11
+223	val_223	2008-12-31	11
+223	val_223	2008-12-31	11
+224	val_224	2008-12-31	11
+224	val_224	2008-12-31	11
+226	val_226	2008-12-31	11
+228	val_228	2008-12-31	11
+229	val_229	2008-12-31	11
+229	val_229	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+233	val_233	2008-12-31	11
+233	val_233	2008-12-31	11
+235	val_235	2008-12-31	11
+237	val_237	2008-12-31	11
+237	val_237	2008-12-31	11
+238	val_238	2008-12-31	11
+238	val_238	2008-12-31	11
+239	val_239	2008-12-31	11
+239	val_239	2008-12-31	11
+241	val_241	2008-12-31	11
+242	val_242	2008-12-31	11
+242	val_242	2008-12-31	11
+244	val_244	2008-12-31	11
+247	val_247	2008-12-31	11
+248	val_248	2008-12-31	11
+249	val_249	2008-12-31	11
+252	val_252	2008-12-31	11
+255	val_255	2008-12-31	11
+255	val_255	2008-12-31	11
+256	val_256	2008-12-31	11
+256	val_256	2008-12-31	11
+257	val_257	2008-12-31	11
+258	val_258	2008-12-31	11
+260	val_260	2008-12-31	11
+262	val_262	2008-12-31	11
+263	val_263	2008-12-31	11
+265	val_265	2008-12-31	11
+265	val_265	2008-12-31	11
+266	val_266	2008-12-31	11
+272	val_272	2008-12-31	11
+272	val_272	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+274	val_274	2008-12-31	11
+275	val_275	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+278	val_278	2008-12-31	11
+278	val_278	2008-12-31	11
+280	val_280	2008-12-31	11
+280	val_280	2008-12-31	11
+281	val_281	2008-12-31	11
+281	val_281	2008-12-31	11
+282	val_282	2008-12-31	11
+282	val_282	2008-12-31	11
+283	val_283	2008-12-31	11
+284	val_284	2008-12-31	11
+285	val_285	2008-12-31	11
+286	val_286	2008-12-31	11
+287	val_287	2008-12-31	11
+288	val_288	2008-12-31	11
+288	val_288	2008-12-31	11
+289	val_289	2008-12-31	11
+291	val_291	2008-12-31	11
+292	val_292	2008-12-31	11
+296	val_296	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+302	val_302	2008-12-31	11
+305	val_305	2008-12-31	11
+306	val_306	2008-12-31	11
+307	val_307	2008-12-31	11
+307	val_307	2008-12-31	11
+308	val_308	2008-12-31	11
+309	val_309	2008-12-31	11
+309	val_309	2008-12-31	11
+310	val_310	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+315	val_315	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+317	val_317	2008-12-31	11
+317	val_317	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+321	val_321	2008-12-31	11
+321	val_321	2008-12-31	11
+322	val_322	2008-12-31	11
+322	val_322	2008-12-31	11
+323	val_323	2008-12-31	11
+325	val_325	2008-12-31	11
+325	val_325	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+331	val_331	2008-12-31	11
+331	val_331	2008-12-31	11
+332	val_332	2008-12-31	11
+333	val_333	2008-12-31	11
+333	val_333	2008-12-31	11
+335	val_335	2008-12-31	11
+336	val_336	2008-12-31	11
+338	val_338	2008-12-31	11
+339	val_339	2008-12-31	11
+341	val_341	2008-12-31	11
+342	val_342	2008-12-31	11
+342	val_342	2008-12-31	11
+344	val_344	2008-12-31	11
+344	val_344	2008-12-31	11
+345	val_345	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+351	val_351	2008-12-31	11
+353	val_353	2008-12-31	11
+353	val_353	2008-12-31	11
+356	val_356	2008-12-31	11
+360	val_360	2008-12-31	11
+362	val_362	2008-12-31	11
+364	val_364	2008-12-31	11
+365	val_365	2008-12-31	11
+366	val_366	2008-12-31	11
+367	val_367	2008-12-31	11
+367	val_367	2008-12-31	11
+368	val_368	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+373	val_373	2008-12-31	11
+374	val_374	2008-12-31	11
+375	val_375	2008-12-31	11
+377	val_377	2008-12-31	11
+378	val_378	2008-12-31	11
+379	val_379	2008-12-31	11
+382	val_382	2008-12-31	11
+382	val_382	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+386	val_386	2008-12-31	11
+389	val_389	2008-12-31	11
+392	val_392	2008-12-31	11
+393	val_393	2008-12-31	11
+394	val_394	2008-12-31	11
+395	val_395	2008-12-31	11
+395	val_395	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+397	val_397	2008-12-31	11
+397	val_397	2008-12-31	11
+399	val_399	2008-12-31	11
+399	val_399	2008-12-31	11
+400	val_400	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+402	val_402	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+404	val_404	2008-12-31	11
+404	val_404	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+407	val_407	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+411	val_411	2008-12-31	11
+413	val_413	2008-12-31	11
+413	val_413	2008-12-31	11
+414	val_414	2008-12-31	11
+414	val_414	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+418	val_418	2008-12-31	11
+419	val_419	2008-12-31	11
+421	val_421	2008-12-31	11
+424	val_424	2008-12-31	11
+424	val_424	2008-12-31	11
+427	val_427	2008-12-31	11
+429	val_429	2008-12-31	11
+429	val_429	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+432	val_432	2008-12-31	11
+435	val_435	2008-12-31	11
+436	val_436	2008-12-31	11
+437	val_437	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+439	val_439	2008-12-31	11
+439	val_439	2008-12-31	11
+443	val_443	2008-12-31	11
+444	val_444	2008-12-31	11
+446	val_446	2008-12-31	11
+448	val_448	2008-12-31	11
+449	val_449	2008-12-31	11
+452	val_452	2008-12-31	11
+453	val_453	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+455	val_455	2008-12-31	11
+457	val_457	2008-12-31	11
+458	val_458	2008-12-31	11
+458	val_458	2008-12-31	11
+459	val_459	2008-12-31	11
+459	val_459	2008-12-31	11
+460	val_460	2008-12-31	11
+462	val_462	2008-12-31	11
+462	val_462	2008-12-31	11
+463	val_463	2008-12-31	11
+463	val_463	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+467	val_467	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+470	val_470	2008-12-31	11
+472	val_472	2008-12-31	11
+475	val_475	2008-12-31	11
+477	val_477	2008-12-31	11
+478	val_478	2008-12-31	11
+478	val_478	2008-12-31	11
+479	val_479	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+481	val_481	2008-12-31	11
+482	val_482	2008-12-31	11
+483	val_483	2008-12-31	11
+484	val_484	2008-12-31	11
+485	val_485	2008-12-31	11
+487	val_487	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+490	val_490	2008-12-31	11
+491	val_491	2008-12-31	11
+492	val_492	2008-12-31	11
+492	val_492	2008-12-31	11
+493	val_493	2008-12-31	11
+494	val_494	2008-12-31	11
+495	val_495	2008-12-31	11
+496	val_496	2008-12-31	11
+497	val_497	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+2	val_2	2008-12-31	12
+4	val_4	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+8	val_8	2008-12-31	12
+9	val_9	2008-12-31	12
+10	val_10	2008-12-31	12
+11	val_11	2008-12-31	12
+12	val_12	2008-12-31	12
+12	val_12	2008-12-31	12
+15	val_15	2008-12-31	12
+15	val_15	2008-12-31	12
+17	val_17	2008-12-31	12
+18	val_18	2008-12-31	12
+18	val_18	2008-12-31	12
+19	val_19	2008-12-31	12
+20	val_20	2008-12-31	12
+24	val_24	2008-12-31	12
+24	val_24	2008-12-31	12
+26	val_26	2008-12-31	12
+26	val_26	2008-12-31	12
+27	val_27	2008-12-31	12
+28	val_28	2008-12-31	12
+30	val_30	2008-12-31	12
+33	val_33	2008-12-31	12
+34	val_34	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+37	val_37	2008-12-31	12
+37	val_37	2008-12-31	12
+41	val_41	2008-12-31	12
+42	val_42	2008-12-31	12
+42	val_42	2008-12-31	12
+43	val_43	2008-12-31	12
+44	val_44	2008-12-31	12
+47	val_47	2008-12-31	12
+51	val_51	2008-12-31	12
+51	val_51	2008-12-31	12
+53	val_53	2008-12-31	12
+54	val_54	2008-12-31	12
+57	val_57	2008-12-31	12
+58	val_58	2008-12-31	12
+58	val_58	2008-12-31	12
+64	val_64	2008-12-31	12
+65	val_65	2008-12-31	12
+66	val_66	2008-12-31	12
+67	val_67	2008-12-31	12
+67	val_67	2008-12-31	12
+69	val_69	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+72	val_72	2008-12-31	12
+72	val_72	2008-12-31	12
+74	val_74	2008-12-31	12
+76	val_76	2008-12-31	12
+76	val_76	2008-12-31	12
+77	val_77	2008-12-31	12
+78	val_78	2008-12-31	12
+80	val_80	2008-12-31	12
+82	val_82	2008-12-31	12
+83	val_83	2008-12-31	12
+83	val_83	2008-12-31	12
+84	val_84	2008-12-31	12
+84	val_84	2008-12-31	12
+85	val_85	2008-12-31	12
+86	val_86	2008-12-31	12
+87	val_87	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+92	val_92	2008-12-31	12
+95	val_95	2008-12-31	12
+95	val_95	2008-12-31	12
+96	val_96	2008-12-31	12
+97	val_97	2008-12-31	12
+97	val_97	2008-12-31	12
+98	val_98	2008-12-31	12
+98	val_98	2008-12-31	12
+100	val_100	2008-12-31	12
+100	val_100	2008-12-31	12
+103	val_103	2008-12-31	12
+103	val_103	2008-12-31	12
+104	val_104	2008-12-31	12
+104	val_104	2008-12-31	12
+105	val_105	2008-12-31	12
+111	val_111	2008-12-31	12
+113	val_113	2008-12-31	12
+113	val_113	2008-12-31	12
+114	val_114	2008-12-31	12
+116	val_116	2008-12-31	12
+118	val_118	2008-12-31	12
+118	val_118	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+120	val_120	2008-12-31	12
+120	val_120	2008-12-31	12
+125	val_125	2008-12-31	12
+125	val_125	2008-12-31	12
+126	val_126	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+129	val_129	2008-12-31	12
+129	val_129	2008-12-31	12
+131	val_131	2008-12-31	12
+133	val_133	2008-12-31	12
+134	val_134	2008-12-31	12
+134	val_134	2008-12-31	12
+136	val_136	2008-12-31	12
+137	val_137	2008-12-31	12
+137	val_137	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+143	val_143	2008-12-31	12
+145	val_145	2008-12-31	12
+146	val_146	2008-12-31	12
+146	val_146	2008-12-31	12
+149	val_149	2008-12-31	12
+149	val_149	2008-12-31	12
+150	val_150	2008-12-31	12
+152	val_152	2008-12-31	12
+152	val_152	2008-12-31	12
+153	val_153	2008-12-31	12
+155	val_155	2008-12-31	12
+156	val_156	2008-12-31	12
+157	val_157	2008-12-31	12
+158	val_158	2008-12-31	12
+160	val_160	2008-12-31	12
+162	val_162	2008-12-31	12
+163	val_163	2008-12-31	12
+164	val_164	2008-12-31	12
+164	val_164	2008-12-31	12
+165	val_165	2008-12-31	12
+165	val_165	2008-12-31	12
+166	val_166	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+168	val_168	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+170	val_170	2008-12-31	12
+172	val_172	2008-12-31	12
+172	val_172	2008-12-31	12
+174	val_174	2008-12-31	12
+174	val_174	2008-12-31	12
+175	val_175	2008-12-31	12
+175	val_175	2008-12-31	12
+176	val_176	2008-12-31	12
+176	val_176	2008-12-31	12
+177	val_177	2008-12-31	12
+178	val_178	2008-12-31	12
+179	val_179	2008-12-31	12
+179	val_179	2008-12-31	12
+180	val_180	2008-12-31	12
+181	val_181	2008-12-31	12
+183	val_183	2008-12-31	12
+186	val_186	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+189	val_189	2008-12-31	12
+190	val_190	2008-12-31	12
+191	val_191	2008-12-31	12
+191	val_191	2008-12-31	12
+192	val_192	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+194	val_194	2008-12-31	12
+195	val_195	2008-12-31	12
+195	val_195	2008-12-31	12
+196	val_196	2008-12-31	12
+197	val_197	2008-12-31	12
+197	val_197	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+200	val_200	2008-12-31	12
+200	val_200	2008-12-31	12
+201	val_201	2008-12-31	12
+202	val_202	2008-12-31	12
+203	val_203	2008-12-31	12
+203	val_203	2008-12-31	12
+205	val_205	2008-12-31	12
+205	val_205	2008-12-31	12
+207	val_207	2008-12-31	12
+207	val_207	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+209	val_209	2008-12-31	12
+209	val_209	2008-12-31	12
+213	val_213	2008-12-31	12
+213	val_213	2008-12-31	12
+214	val_214	2008-12-31	12
+216	val_216	2008-12-31	12
+216	val_216	2008-12-31	12
+217	val_217	2008-12-31	12
+217	val_217	2008-12-31	12
+218	val_218	2008-12-31	12
+219	val_219	2008-12-31	12
+219	val_219	2008-12-31	12
+221	val_221	2008-12-31	12
+221	val_221	2008-12-31	12
+222	val_222	2008-12-31	12
+223	val_223	2008-12-31	12
+223	val_223	2008-12-31	12
+224	val_224	2008-12-31	12
+224	val_224	2008-12-31	12
+226	val_226	2008-12-31	12
+228	val_228	2008-12-31	12
+229	val_229	2008-12-31	12
+229	val_229	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+233	val_233	2008-12-31	12
+233	val_233	2008-12-31	12
+235	val_235	2008-12-31	12
+237	val_237	2008-12-31	12
+237	val_237	2008-12-31	12
+238	val_238	2008-12-31	12
+238	val_238	2008-12-31	12
+239	val_239	2008-12-31	12
+239	val_239	2008-12-31	12
+241	val_241	2008-12-31	12
+242	val_242	2008-12-31	12
+242	val_242	2008-12-31	12
+244	val_244	2008-12-31	12
+247	val_247	2008-12-31	12
+248	val_248	2008-12-31	12
+249	val_249	2008-12-31	12
+252	val_252	2008-12-31	12
+255	val_255	2008-12-31	12
+255	val_255	2008-12-31	12
+256	val_256	2008-12-31	12
+256	val_256	2008-12-31	12
+257	val_257	2008-12-31	12
+258	val_258	2008-12-31	12
+260	val_260	2008-12-31	12
+262	val_262	2008-12-31	12
+263	val_263	2008-12-31	12
+265	val_265	2008-12-31	12
+265	val_265	2008-12-31	12
+266	val_266	2008-12-31	12
+272	val_272	2008-12-31	12
+272	val_272	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+274	val_274	2008-12-31	12
+275	val_275	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+278	val_278	2008-12-31	12
+278	val_278	2008-12-31	12
+280	val_280	2008-12-31	12
+280	val_280	2008-12-31	12
+281	val_281	2008-12-31	12
+281	val_281	2008-12-31	12
+282	val_282	2008-12-31	12
+282	val_282	2008-12-31	12
+283	val_283	2008-12-31	12
+284	val_284	2008-12-31	12
+285	val_285	2008-12-31	12
+286	val_286	2008-12-31	12
+287	val_287	2008-12-31	12
+288	val_288	2008-12-31	12
+288	val_288	2008-12-31	12
+289	val_289	2008-12-31	12
+291	val_291	2008-12-31	12
+292	val_292	2008-12-31	12
+296	val_296	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+302	val_302	2008-12-31	12
+305	val_305	2008-12-31	12
+306	val_306	2008-12-31	12
+307	val_307	2008-12-31	12
+307	val_307	2008-12-31	12
+308	val_308	2008-12-31	12
+309	val_309	2008-12-31	12
+309	val_309	2008-12-31	12
+310	val_310	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+315	val_315	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+317	val_317	2008-12-31	12
+317	val_317	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+321	val_321	2008-12-31	12
+321	val_321	2008-12-31	12
+322	val_322	2008-12-31	12
+322	val_322	2008-12-31	12
+323	val_323	2008-12-31	12
+325	val_325	2008-12-31	12
+325	val_325	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+331	val_331	2008-12-31	12
+331	val_331	2008-12-31	12
+332	val_332	2008-12-31	12
+333	val_333	2008-12-31	12
+333	val_333	2008-12-31	12
+335	val_335	2008-12-31	12
+336	val_336	2008-12-31	12
+338	val_338	2008-12-31	12
+339	val_339	2008-12-31	12
+341	val_341	2008-12-31	12
+342	val_342	2008-12-31	12
+342	val_342	2008-12-31	12
+344	val_344	2008-12-31	12
+344	val_344	2008-12-31	12
+345	val_345	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+351	val_351	2008-12-31	12
+353	val_353	2008-12-31	12
+353	val_353	2008-12-31	12
+356	val_356	2008-12-31	12
+360	val_360	2008-12-31	12
+362	val_362	2008-12-31	12
+364	val_364	2008-12-31	12
+365	val_365	2008-12-31	12
+366	val_366	2008-12-31	12
+367	val_367	2008-12-31	12
+367	val_367	2008-12-31	12
+368	val_368	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+373	val_373	2008-12-31	12
+374	val_374	2008-12-31	12
+375	val_375	2008-12-31	12
+377	val_377	2008-12-31	12
+378	val_378	2008-12-31	12
+379	val_379	2008-12-31	12
+382	val_382	2008-12-31	12
+382	val_382	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+386	val_386	2008-12-31	12
+389	val_389	2008-12-31	12
+392	val_392	2008-12-31	12
+393	val_393	2008-12-31	12
+394	val_394	2008-12-31	12
+395	val_395	2008-12-31	12
+395	val_395	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+397	val_397	2008-12-31	12
+397	val_397	2008-12-31	12
+399	val_399	2008-12-31	12
+399	val_399	2008-12-31	12
+400	val_400	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+402	val_402	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+404	val_404	2008-12-31	12
+404	val_404	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+407	val_407	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+411	val_411	2008-12-31	12
+413	val_413	2008-12-31	12
+413	val_413	2008-12-31	12
+414	val_414	2008-12-31	12
+414	val_414	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+418	val_418	2008-12-31	12
+419	val_419	2008-12-31	12
+421	val_421	2008-12-31	12
+424	val_424	2008-12-31	12
+424	val_424	2008-12-31	12
+427	val_427	2008-12-31	12
+429	val_429	2008-12-31	12
+429	val_429	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+432	val_432	2008-12-31	12
+435	val_435	2008-12-31	12
+436	val_436	2008-12-31	12
+437	val_437	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+439	val_439	2008-12-31	12
+439	val_439	2008-12-31	12
+443	val_443	2008-12-31	12
+444	val_444	2008-12-31	12
+446	val_446	2008-12-31	12
+448	val_448	2008-12-31	12
+449	val_449	2008-12-31	12
+452	val_452	2008-12-31	12
+453	val_453	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+455	val_455	2008-12-31	12
+457	val_457	2008-12-31	12
+458	val_458	2008-12-31	12
+458	val_458	2008-12-31	12
+459	val_459	2008-12-31	12
+459	val_459	2008-12-31	12
+460	val_460	2008-12-31	12
+462	val_462	2008-12-31	12
+462	val_462	2008-12-31	12
+463	val_463	2008-12-31	12
+463	val_463	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+467	val_467	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+470	val_470	2008-12-31	12
+472	val_472	2008-12-31	12
+475	val_475	2008-12-31	12
+477	val_477	2008-12-31	12
+478	val_478	2008-12-31	12
+478	val_478	2008-12-31	12
+479	val_479	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+481	val_481	2008-12-31	12
+482	val_482	2008-12-31	12
+483	val_483	2008-12-31	12
+484	val_484	2008-12-31	12
+485	val_485	2008-12-31	12
+487	val_487	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+490	val_490	2008-12-31	12
+491	val_491	2008-12-31	12
+492	val_492	2008-12-31	12
+492	val_492	2008-12-31	12
+493	val_493	2008-12-31	12
+494	val_494	2008-12-31	12
+495	val_495	2008-12-31	12
+496	val_496	2008-12-31	12
+497	val_497	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part1-11-9c82167763a771c175c656786d545798 b/sql/hive/src/test/resources/golden/load_dyn_part1-11-9c82167763a771c175c656786d545798
deleted file mode 100644
index 653516475da22..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part1-11-9c82167763a771c175c656786d545798
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-12-31	11
-86	val_86	2008-12-31	11
-311	val_311	2008-12-31	11
-27	val_27	2008-12-31	11
-165	val_165	2008-12-31	11
-409	val_409	2008-12-31	11
-255	val_255	2008-12-31	11
-278	val_278	2008-12-31	11
-98	val_98	2008-12-31	11
-484	val_484	2008-12-31	11
-265	val_265	2008-12-31	11
-193	val_193	2008-12-31	11
-401	val_401	2008-12-31	11
-150	val_150	2008-12-31	11
-273	val_273	2008-12-31	11
-224	val_224	2008-12-31	11
-369	val_369	2008-12-31	11
-66	val_66	2008-12-31	11
-128	val_128	2008-12-31	11
-213	val_213	2008-12-31	11
-146	val_146	2008-12-31	11
-406	val_406	2008-12-31	11
-429	val_429	2008-12-31	11
-374	val_374	2008-12-31	11
-152	val_152	2008-12-31	11
-469	val_469	2008-12-31	11
-145	val_145	2008-12-31	11
-495	val_495	2008-12-31	11
-37	val_37	2008-12-31	11
-327	val_327	2008-12-31	11
-281	val_281	2008-12-31	11
-277	val_277	2008-12-31	11
-209	val_209	2008-12-31	11
-15	val_15	2008-12-31	11
-82	val_82	2008-12-31	11
-403	val_403	2008-12-31	11
-166	val_166	2008-12-31	11
-417	val_417	2008-12-31	11
-430	val_430	2008-12-31	11
-252	val_252	2008-12-31	11
-292	val_292	2008-12-31	11
-219	val_219	2008-12-31	11
-287	val_287	2008-12-31	11
-153	val_153	2008-12-31	11
-193	val_193	2008-12-31	11
-338	val_338	2008-12-31	11
-446	val_446	2008-12-31	11
-459	val_459	2008-12-31	11
-394	val_394	2008-12-31	11
-237	val_237	2008-12-31	11
-482	val_482	2008-12-31	11
-174	val_174	2008-12-31	11
-413	val_413	2008-12-31	11
-494	val_494	2008-12-31	11
-207	val_207	2008-12-31	11
-199	val_199	2008-12-31	11
-466	val_466	2008-12-31	11
-208	val_208	2008-12-31	11
-174	val_174	2008-12-31	11
-399	val_399	2008-12-31	11
-396	val_396	2008-12-31	11
-247	val_247	2008-12-31	11
-417	val_417	2008-12-31	11
-489	val_489	2008-12-31	11
-162	val_162	2008-12-31	11
-377	val_377	2008-12-31	11
-397	val_397	2008-12-31	11
-309	val_309	2008-12-31	11
-365	val_365	2008-12-31	11
-266	val_266	2008-12-31	11
-439	val_439	2008-12-31	11
-342	val_342	2008-12-31	11
-367	val_367	2008-12-31	11
-325	val_325	2008-12-31	11
-167	val_167	2008-12-31	11
-195	val_195	2008-12-31	11
-475	val_475	2008-12-31	11
-17	val_17	2008-12-31	11
-113	val_113	2008-12-31	11
-155	val_155	2008-12-31	11
-203	val_203	2008-12-31	11
-339	val_339	2008-12-31	11
-0	val_0	2008-12-31	11
-455	val_455	2008-12-31	11
-128	val_128	2008-12-31	11
-311	val_311	2008-12-31	11
-316	val_316	2008-12-31	11
-57	val_57	2008-12-31	11
-302	val_302	2008-12-31	11
-205	val_205	2008-12-31	11
-149	val_149	2008-12-31	11
-438	val_438	2008-12-31	11
-345	val_345	2008-12-31	11
-129	val_129	2008-12-31	11
-170	val_170	2008-12-31	11
-20	val_20	2008-12-31	11
-489	val_489	2008-12-31	11
-157	val_157	2008-12-31	11
-378	val_378	2008-12-31	11
-221	val_221	2008-12-31	11
-92	val_92	2008-12-31	11
-111	val_111	2008-12-31	11
-47	val_47	2008-12-31	11
-72	val_72	2008-12-31	11
-4	val_4	2008-12-31	11
-280	val_280	2008-12-31	11
-35	val_35	2008-12-31	11
-427	val_427	2008-12-31	11
-277	val_277	2008-12-31	11
-208	val_208	2008-12-31	11
-356	val_356	2008-12-31	11
-399	val_399	2008-12-31	11
-169	val_169	2008-12-31	11
-382	val_382	2008-12-31	11
-498	val_498	2008-12-31	11
-125	val_125	2008-12-31	11
-386	val_386	2008-12-31	11
-437	val_437	2008-12-31	11
-469	val_469	2008-12-31	11
-192	val_192	2008-12-31	11
-286	val_286	2008-12-31	11
-187	val_187	2008-12-31	11
-176	val_176	2008-12-31	11
-54	val_54	2008-12-31	11
-459	val_459	2008-12-31	11
-51	val_51	2008-12-31	11
-138	val_138	2008-12-31	11
-103	val_103	2008-12-31	11
-239	val_239	2008-12-31	11
-213	val_213	2008-12-31	11
-216	val_216	2008-12-31	11
-430	val_430	2008-12-31	11
-278	val_278	2008-12-31	11
-176	val_176	2008-12-31	11
-289	val_289	2008-12-31	11
-221	val_221	2008-12-31	11
-65	val_65	2008-12-31	11
-318	val_318	2008-12-31	11
-332	val_332	2008-12-31	11
-311	val_311	2008-12-31	11
-275	val_275	2008-12-31	11
-137	val_137	2008-12-31	11
-241	val_241	2008-12-31	11
-83	val_83	2008-12-31	11
-333	val_333	2008-12-31	11
-180	val_180	2008-12-31	11
-284	val_284	2008-12-31	11
-12	val_12	2008-12-31	11
-230	val_230	2008-12-31	11
-181	val_181	2008-12-31	11
-67	val_67	2008-12-31	11
-260	val_260	2008-12-31	11
-404	val_404	2008-12-31	11
-384	val_384	2008-12-31	11
-489	val_489	2008-12-31	11
-353	val_353	2008-12-31	11
-373	val_373	2008-12-31	11
-272	val_272	2008-12-31	11
-138	val_138	2008-12-31	11
-217	val_217	2008-12-31	11
-84	val_84	2008-12-31	11
-348	val_348	2008-12-31	11
-466	val_466	2008-12-31	11
-58	val_58	2008-12-31	11
-8	val_8	2008-12-31	11
-411	val_411	2008-12-31	11
-230	val_230	2008-12-31	11
-208	val_208	2008-12-31	11
-348	val_348	2008-12-31	11
-24	val_24	2008-12-31	11
-463	val_463	2008-12-31	11
-431	val_431	2008-12-31	11
-179	val_179	2008-12-31	11
-172	val_172	2008-12-31	11
-42	val_42	2008-12-31	11
-129	val_129	2008-12-31	11
-158	val_158	2008-12-31	11
-119	val_119	2008-12-31	11
-496	val_496	2008-12-31	11
-0	val_0	2008-12-31	11
-322	val_322	2008-12-31	11
-197	val_197	2008-12-31	11
-468	val_468	2008-12-31	11
-393	val_393	2008-12-31	11
-454	val_454	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-199	val_199	2008-12-31	11
-191	val_191	2008-12-31	11
-418	val_418	2008-12-31	11
-96	val_96	2008-12-31	11
-26	val_26	2008-12-31	11
-165	val_165	2008-12-31	11
-327	val_327	2008-12-31	11
-230	val_230	2008-12-31	11
-205	val_205	2008-12-31	11
-120	val_120	2008-12-31	11
-131	val_131	2008-12-31	11
-51	val_51	2008-12-31	11
-404	val_404	2008-12-31	11
-43	val_43	2008-12-31	11
-436	val_436	2008-12-31	11
-156	val_156	2008-12-31	11
-469	val_469	2008-12-31	11
-468	val_468	2008-12-31	11
-308	val_308	2008-12-31	11
-95	val_95	2008-12-31	11
-196	val_196	2008-12-31	11
-288	val_288	2008-12-31	11
-481	val_481	2008-12-31	11
-457	val_457	2008-12-31	11
-98	val_98	2008-12-31	11
-282	val_282	2008-12-31	11
-197	val_197	2008-12-31	11
-187	val_187	2008-12-31	11
-318	val_318	2008-12-31	11
-318	val_318	2008-12-31	11
-409	val_409	2008-12-31	11
-470	val_470	2008-12-31	11
-137	val_137	2008-12-31	11
-369	val_369	2008-12-31	11
-316	val_316	2008-12-31	11
-169	val_169	2008-12-31	11
-413	val_413	2008-12-31	11
-85	val_85	2008-12-31	11
-77	val_77	2008-12-31	11
-0	val_0	2008-12-31	11
-490	val_490	2008-12-31	11
-87	val_87	2008-12-31	11
-364	val_364	2008-12-31	11
-179	val_179	2008-12-31	11
-118	val_118	2008-12-31	11
-134	val_134	2008-12-31	11
-395	val_395	2008-12-31	11
-282	val_282	2008-12-31	11
-138	val_138	2008-12-31	11
-238	val_238	2008-12-31	11
-419	val_419	2008-12-31	11
-15	val_15	2008-12-31	11
-118	val_118	2008-12-31	11
-72	val_72	2008-12-31	11
-90	val_90	2008-12-31	11
-307	val_307	2008-12-31	11
-19	val_19	2008-12-31	11
-435	val_435	2008-12-31	11
-10	val_10	2008-12-31	11
-277	val_277	2008-12-31	11
-273	val_273	2008-12-31	11
-306	val_306	2008-12-31	11
-224	val_224	2008-12-31	11
-309	val_309	2008-12-31	11
-389	val_389	2008-12-31	11
-327	val_327	2008-12-31	11
-242	val_242	2008-12-31	11
-369	val_369	2008-12-31	11
-392	val_392	2008-12-31	11
-272	val_272	2008-12-31	11
-331	val_331	2008-12-31	11
-401	val_401	2008-12-31	11
-242	val_242	2008-12-31	11
-452	val_452	2008-12-31	11
-177	val_177	2008-12-31	11
-226	val_226	2008-12-31	11
-5	val_5	2008-12-31	11
-497	val_497	2008-12-31	11
-402	val_402	2008-12-31	11
-396	val_396	2008-12-31	11
-317	val_317	2008-12-31	11
-395	val_395	2008-12-31	11
-58	val_58	2008-12-31	11
-35	val_35	2008-12-31	11
-336	val_336	2008-12-31	11
-95	val_95	2008-12-31	11
-11	val_11	2008-12-31	11
-168	val_168	2008-12-31	11
-34	val_34	2008-12-31	11
-229	val_229	2008-12-31	11
-233	val_233	2008-12-31	11
-143	val_143	2008-12-31	11
-472	val_472	2008-12-31	11
-322	val_322	2008-12-31	11
-498	val_498	2008-12-31	11
-160	val_160	2008-12-31	11
-195	val_195	2008-12-31	11
-42	val_42	2008-12-31	11
-321	val_321	2008-12-31	11
-430	val_430	2008-12-31	11
-119	val_119	2008-12-31	11
-489	val_489	2008-12-31	11
-458	val_458	2008-12-31	11
-78	val_78	2008-12-31	11
-76	val_76	2008-12-31	11
-41	val_41	2008-12-31	11
-223	val_223	2008-12-31	11
-492	val_492	2008-12-31	11
-149	val_149	2008-12-31	11
-449	val_449	2008-12-31	11
-218	val_218	2008-12-31	11
-228	val_228	2008-12-31	11
-138	val_138	2008-12-31	11
-453	val_453	2008-12-31	11
-30	val_30	2008-12-31	11
-209	val_209	2008-12-31	11
-64	val_64	2008-12-31	11
-468	val_468	2008-12-31	11
-76	val_76	2008-12-31	11
-74	val_74	2008-12-31	11
-342	val_342	2008-12-31	11
-69	val_69	2008-12-31	11
-230	val_230	2008-12-31	11
-33	val_33	2008-12-31	11
-368	val_368	2008-12-31	11
-103	val_103	2008-12-31	11
-296	val_296	2008-12-31	11
-113	val_113	2008-12-31	11
-216	val_216	2008-12-31	11
-367	val_367	2008-12-31	11
-344	val_344	2008-12-31	11
-167	val_167	2008-12-31	11
-274	val_274	2008-12-31	11
-219	val_219	2008-12-31	11
-239	val_239	2008-12-31	11
-485	val_485	2008-12-31	11
-116	val_116	2008-12-31	11
-223	val_223	2008-12-31	11
-256	val_256	2008-12-31	11
-263	val_263	2008-12-31	11
-70	val_70	2008-12-31	11
-487	val_487	2008-12-31	11
-480	val_480	2008-12-31	11
-401	val_401	2008-12-31	11
-288	val_288	2008-12-31	11
-191	val_191	2008-12-31	11
-5	val_5	2008-12-31	11
-244	val_244	2008-12-31	11
-438	val_438	2008-12-31	11
-128	val_128	2008-12-31	11
-467	val_467	2008-12-31	11
-432	val_432	2008-12-31	11
-202	val_202	2008-12-31	11
-316	val_316	2008-12-31	11
-229	val_229	2008-12-31	11
-469	val_469	2008-12-31	11
-463	val_463	2008-12-31	11
-280	val_280	2008-12-31	11
-2	val_2	2008-12-31	11
-35	val_35	2008-12-31	11
-283	val_283	2008-12-31	11
-331	val_331	2008-12-31	11
-235	val_235	2008-12-31	11
-80	val_80	2008-12-31	11
-44	val_44	2008-12-31	11
-193	val_193	2008-12-31	11
-321	val_321	2008-12-31	11
-335	val_335	2008-12-31	11
-104	val_104	2008-12-31	11
-466	val_466	2008-12-31	11
-366	val_366	2008-12-31	11
-175	val_175	2008-12-31	11
-403	val_403	2008-12-31	11
-483	val_483	2008-12-31	11
-53	val_53	2008-12-31	11
-105	val_105	2008-12-31	11
-257	val_257	2008-12-31	11
-406	val_406	2008-12-31	11
-409	val_409	2008-12-31	11
-190	val_190	2008-12-31	11
-406	val_406	2008-12-31	11
-401	val_401	2008-12-31	11
-114	val_114	2008-12-31	11
-258	val_258	2008-12-31	11
-90	val_90	2008-12-31	11
-203	val_203	2008-12-31	11
-262	val_262	2008-12-31	11
-348	val_348	2008-12-31	11
-424	val_424	2008-12-31	11
-12	val_12	2008-12-31	11
-396	val_396	2008-12-31	11
-201	val_201	2008-12-31	11
-217	val_217	2008-12-31	11
-164	val_164	2008-12-31	11
-431	val_431	2008-12-31	11
-454	val_454	2008-12-31	11
-478	val_478	2008-12-31	11
-298	val_298	2008-12-31	11
-125	val_125	2008-12-31	11
-431	val_431	2008-12-31	11
-164	val_164	2008-12-31	11
-424	val_424	2008-12-31	11
-187	val_187	2008-12-31	11
-382	val_382	2008-12-31	11
-5	val_5	2008-12-31	11
-70	val_70	2008-12-31	11
-397	val_397	2008-12-31	11
-480	val_480	2008-12-31	11
-291	val_291	2008-12-31	11
-24	val_24	2008-12-31	11
-351	val_351	2008-12-31	11
-255	val_255	2008-12-31	11
-104	val_104	2008-12-31	11
-70	val_70	2008-12-31	11
-163	val_163	2008-12-31	11
-438	val_438	2008-12-31	11
-119	val_119	2008-12-31	11
-414	val_414	2008-12-31	11
-200	val_200	2008-12-31	11
-491	val_491	2008-12-31	11
-237	val_237	2008-12-31	11
-439	val_439	2008-12-31	11
-360	val_360	2008-12-31	11
-248	val_248	2008-12-31	11
-479	val_479	2008-12-31	11
-305	val_305	2008-12-31	11
-417	val_417	2008-12-31	11
-199	val_199	2008-12-31	11
-444	val_444	2008-12-31	11
-120	val_120	2008-12-31	11
-429	val_429	2008-12-31	11
-169	val_169	2008-12-31	11
-443	val_443	2008-12-31	11
-323	val_323	2008-12-31	11
-325	val_325	2008-12-31	11
-277	val_277	2008-12-31	11
-230	val_230	2008-12-31	11
-478	val_478	2008-12-31	11
-178	val_178	2008-12-31	11
-468	val_468	2008-12-31	11
-310	val_310	2008-12-31	11
-317	val_317	2008-12-31	11
-333	val_333	2008-12-31	11
-493	val_493	2008-12-31	11
-460	val_460	2008-12-31	11
-207	val_207	2008-12-31	11
-249	val_249	2008-12-31	11
-265	val_265	2008-12-31	11
-480	val_480	2008-12-31	11
-83	val_83	2008-12-31	11
-136	val_136	2008-12-31	11
-353	val_353	2008-12-31	11
-172	val_172	2008-12-31	11
-214	val_214	2008-12-31	11
-462	val_462	2008-12-31	11
-233	val_233	2008-12-31	11
-406	val_406	2008-12-31	11
-133	val_133	2008-12-31	11
-175	val_175	2008-12-31	11
-189	val_189	2008-12-31	11
-454	val_454	2008-12-31	11
-375	val_375	2008-12-31	11
-401	val_401	2008-12-31	11
-421	val_421	2008-12-31	11
-407	val_407	2008-12-31	11
-384	val_384	2008-12-31	11
-256	val_256	2008-12-31	11
-26	val_26	2008-12-31	11
-134	val_134	2008-12-31	11
-67	val_67	2008-12-31	11
-384	val_384	2008-12-31	11
-379	val_379	2008-12-31	11
-18	val_18	2008-12-31	11
-462	val_462	2008-12-31	11
-492	val_492	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-9	val_9	2008-12-31	11
-341	val_341	2008-12-31	11
-498	val_498	2008-12-31	11
-146	val_146	2008-12-31	11
-458	val_458	2008-12-31	11
-362	val_362	2008-12-31	11
-186	val_186	2008-12-31	11
-285	val_285	2008-12-31	11
-348	val_348	2008-12-31	11
-167	val_167	2008-12-31	11
-18	val_18	2008-12-31	11
-273	val_273	2008-12-31	11
-183	val_183	2008-12-31	11
-281	val_281	2008-12-31	11
-344	val_344	2008-12-31	11
-97	val_97	2008-12-31	11
-469	val_469	2008-12-31	11
-315	val_315	2008-12-31	11
-84	val_84	2008-12-31	11
-28	val_28	2008-12-31	11
-37	val_37	2008-12-31	11
-448	val_448	2008-12-31	11
-152	val_152	2008-12-31	11
-348	val_348	2008-12-31	11
-307	val_307	2008-12-31	11
-194	val_194	2008-12-31	11
-414	val_414	2008-12-31	11
-477	val_477	2008-12-31	11
-222	val_222	2008-12-31	11
-126	val_126	2008-12-31	11
-90	val_90	2008-12-31	11
-169	val_169	2008-12-31	11
-403	val_403	2008-12-31	11
-400	val_400	2008-12-31	11
-200	val_200	2008-12-31	11
-97	val_97	2008-12-31	11
-238	val_238	2008-12-31	12
-86	val_86	2008-12-31	12
-311	val_311	2008-12-31	12
-27	val_27	2008-12-31	12
-165	val_165	2008-12-31	12
-409	val_409	2008-12-31	12
-255	val_255	2008-12-31	12
-278	val_278	2008-12-31	12
-98	val_98	2008-12-31	12
-484	val_484	2008-12-31	12
-265	val_265	2008-12-31	12
-193	val_193	2008-12-31	12
-401	val_401	2008-12-31	12
-150	val_150	2008-12-31	12
-273	val_273	2008-12-31	12
-224	val_224	2008-12-31	12
-369	val_369	2008-12-31	12
-66	val_66	2008-12-31	12
-128	val_128	2008-12-31	12
-213	val_213	2008-12-31	12
-146	val_146	2008-12-31	12
-406	val_406	2008-12-31	12
-429	val_429	2008-12-31	12
-374	val_374	2008-12-31	12
-152	val_152	2008-12-31	12
-469	val_469	2008-12-31	12
-145	val_145	2008-12-31	12
-495	val_495	2008-12-31	12
-37	val_37	2008-12-31	12
-327	val_327	2008-12-31	12
-281	val_281	2008-12-31	12
-277	val_277	2008-12-31	12
-209	val_209	2008-12-31	12
-15	val_15	2008-12-31	12
-82	val_82	2008-12-31	12
-403	val_403	2008-12-31	12
-166	val_166	2008-12-31	12
-417	val_417	2008-12-31	12
-430	val_430	2008-12-31	12
-252	val_252	2008-12-31	12
-292	val_292	2008-12-31	12
-219	val_219	2008-12-31	12
-287	val_287	2008-12-31	12
-153	val_153	2008-12-31	12
-193	val_193	2008-12-31	12
-338	val_338	2008-12-31	12
-446	val_446	2008-12-31	12
-459	val_459	2008-12-31	12
-394	val_394	2008-12-31	12
-237	val_237	2008-12-31	12
-482	val_482	2008-12-31	12
-174	val_174	2008-12-31	12
-413	val_413	2008-12-31	12
-494	val_494	2008-12-31	12
-207	val_207	2008-12-31	12
-199	val_199	2008-12-31	12
-466	val_466	2008-12-31	12
-208	val_208	2008-12-31	12
-174	val_174	2008-12-31	12
-399	val_399	2008-12-31	12
-396	val_396	2008-12-31	12
-247	val_247	2008-12-31	12
-417	val_417	2008-12-31	12
-489	val_489	2008-12-31	12
-162	val_162	2008-12-31	12
-377	val_377	2008-12-31	12
-397	val_397	2008-12-31	12
-309	val_309	2008-12-31	12
-365	val_365	2008-12-31	12
-266	val_266	2008-12-31	12
-439	val_439	2008-12-31	12
-342	val_342	2008-12-31	12
-367	val_367	2008-12-31	12
-325	val_325	2008-12-31	12
-167	val_167	2008-12-31	12
-195	val_195	2008-12-31	12
-475	val_475	2008-12-31	12
-17	val_17	2008-12-31	12
-113	val_113	2008-12-31	12
-155	val_155	2008-12-31	12
-203	val_203	2008-12-31	12
-339	val_339	2008-12-31	12
-0	val_0	2008-12-31	12
-455	val_455	2008-12-31	12
-128	val_128	2008-12-31	12
-311	val_311	2008-12-31	12
-316	val_316	2008-12-31	12
-57	val_57	2008-12-31	12
-302	val_302	2008-12-31	12
-205	val_205	2008-12-31	12
-149	val_149	2008-12-31	12
-438	val_438	2008-12-31	12
-345	val_345	2008-12-31	12
-129	val_129	2008-12-31	12
-170	val_170	2008-12-31	12
-20	val_20	2008-12-31	12
-489	val_489	2008-12-31	12
-157	val_157	2008-12-31	12
-378	val_378	2008-12-31	12
-221	val_221	2008-12-31	12
-92	val_92	2008-12-31	12
-111	val_111	2008-12-31	12
-47	val_47	2008-12-31	12
-72	val_72	2008-12-31	12
-4	val_4	2008-12-31	12
-280	val_280	2008-12-31	12
-35	val_35	2008-12-31	12
-427	val_427	2008-12-31	12
-277	val_277	2008-12-31	12
-208	val_208	2008-12-31	12
-356	val_356	2008-12-31	12
-399	val_399	2008-12-31	12
-169	val_169	2008-12-31	12
-382	val_382	2008-12-31	12
-498	val_498	2008-12-31	12
-125	val_125	2008-12-31	12
-386	val_386	2008-12-31	12
-437	val_437	2008-12-31	12
-469	val_469	2008-12-31	12
-192	val_192	2008-12-31	12
-286	val_286	2008-12-31	12
-187	val_187	2008-12-31	12
-176	val_176	2008-12-31	12
-54	val_54	2008-12-31	12
-459	val_459	2008-12-31	12
-51	val_51	2008-12-31	12
-138	val_138	2008-12-31	12
-103	val_103	2008-12-31	12
-239	val_239	2008-12-31	12
-213	val_213	2008-12-31	12
-216	val_216	2008-12-31	12
-430	val_430	2008-12-31	12
-278	val_278	2008-12-31	12
-176	val_176	2008-12-31	12
-289	val_289	2008-12-31	12
-221	val_221	2008-12-31	12
-65	val_65	2008-12-31	12
-318	val_318	2008-12-31	12
-332	val_332	2008-12-31	12
-311	val_311	2008-12-31	12
-275	val_275	2008-12-31	12
-137	val_137	2008-12-31	12
-241	val_241	2008-12-31	12
-83	val_83	2008-12-31	12
-333	val_333	2008-12-31	12
-180	val_180	2008-12-31	12
-284	val_284	2008-12-31	12
-12	val_12	2008-12-31	12
-230	val_230	2008-12-31	12
-181	val_181	2008-12-31	12
-67	val_67	2008-12-31	12
-260	val_260	2008-12-31	12
-404	val_404	2008-12-31	12
-384	val_384	2008-12-31	12
-489	val_489	2008-12-31	12
-353	val_353	2008-12-31	12
-373	val_373	2008-12-31	12
-272	val_272	2008-12-31	12
-138	val_138	2008-12-31	12
-217	val_217	2008-12-31	12
-84	val_84	2008-12-31	12
-348	val_348	2008-12-31	12
-466	val_466	2008-12-31	12
-58	val_58	2008-12-31	12
-8	val_8	2008-12-31	12
-411	val_411	2008-12-31	12
-230	val_230	2008-12-31	12
-208	val_208	2008-12-31	12
-348	val_348	2008-12-31	12
-24	val_24	2008-12-31	12
-463	val_463	2008-12-31	12
-431	val_431	2008-12-31	12
-179	val_179	2008-12-31	12
-172	val_172	2008-12-31	12
-42	val_42	2008-12-31	12
-129	val_129	2008-12-31	12
-158	val_158	2008-12-31	12
-119	val_119	2008-12-31	12
-496	val_496	2008-12-31	12
-0	val_0	2008-12-31	12
-322	val_322	2008-12-31	12
-197	val_197	2008-12-31	12
-468	val_468	2008-12-31	12
-393	val_393	2008-12-31	12
-454	val_454	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-199	val_199	2008-12-31	12
-191	val_191	2008-12-31	12
-418	val_418	2008-12-31	12
-96	val_96	2008-12-31	12
-26	val_26	2008-12-31	12
-165	val_165	2008-12-31	12
-327	val_327	2008-12-31	12
-230	val_230	2008-12-31	12
-205	val_205	2008-12-31	12
-120	val_120	2008-12-31	12
-131	val_131	2008-12-31	12
-51	val_51	2008-12-31	12
-404	val_404	2008-12-31	12
-43	val_43	2008-12-31	12
-436	val_436	2008-12-31	12
-156	val_156	2008-12-31	12
-469	val_469	2008-12-31	12
-468	val_468	2008-12-31	12
-308	val_308	2008-12-31	12
-95	val_95	2008-12-31	12
-196	val_196	2008-12-31	12
-288	val_288	2008-12-31	12
-481	val_481	2008-12-31	12
-457	val_457	2008-12-31	12
-98	val_98	2008-12-31	12
-282	val_282	2008-12-31	12
-197	val_197	2008-12-31	12
-187	val_187	2008-12-31	12
-318	val_318	2008-12-31	12
-318	val_318	2008-12-31	12
-409	val_409	2008-12-31	12
-470	val_470	2008-12-31	12
-137	val_137	2008-12-31	12
-369	val_369	2008-12-31	12
-316	val_316	2008-12-31	12
-169	val_169	2008-12-31	12
-413	val_413	2008-12-31	12
-85	val_85	2008-12-31	12
-77	val_77	2008-12-31	12
-0	val_0	2008-12-31	12
-490	val_490	2008-12-31	12
-87	val_87	2008-12-31	12
-364	val_364	2008-12-31	12
-179	val_179	2008-12-31	12
-118	val_118	2008-12-31	12
-134	val_134	2008-12-31	12
-395	val_395	2008-12-31	12
-282	val_282	2008-12-31	12
-138	val_138	2008-12-31	12
-238	val_238	2008-12-31	12
-419	val_419	2008-12-31	12
-15	val_15	2008-12-31	12
-118	val_118	2008-12-31	12
-72	val_72	2008-12-31	12
-90	val_90	2008-12-31	12
-307	val_307	2008-12-31	12
-19	val_19	2008-12-31	12
-435	val_435	2008-12-31	12
-10	val_10	2008-12-31	12
-277	val_277	2008-12-31	12
-273	val_273	2008-12-31	12
-306	val_306	2008-12-31	12
-224	val_224	2008-12-31	12
-309	val_309	2008-12-31	12
-389	val_389	2008-12-31	12
-327	val_327	2008-12-31	12
-242	val_242	2008-12-31	12
-369	val_369	2008-12-31	12
-392	val_392	2008-12-31	12
-272	val_272	2008-12-31	12
-331	val_331	2008-12-31	12
-401	val_401	2008-12-31	12
-242	val_242	2008-12-31	12
-452	val_452	2008-12-31	12
-177	val_177	2008-12-31	12
-226	val_226	2008-12-31	12
-5	val_5	2008-12-31	12
-497	val_497	2008-12-31	12
-402	val_402	2008-12-31	12
-396	val_396	2008-12-31	12
-317	val_317	2008-12-31	12
-395	val_395	2008-12-31	12
-58	val_58	2008-12-31	12
-35	val_35	2008-12-31	12
-336	val_336	2008-12-31	12
-95	val_95	2008-12-31	12
-11	val_11	2008-12-31	12
-168	val_168	2008-12-31	12
-34	val_34	2008-12-31	12
-229	val_229	2008-12-31	12
-233	val_233	2008-12-31	12
-143	val_143	2008-12-31	12
-472	val_472	2008-12-31	12
-322	val_322	2008-12-31	12
-498	val_498	2008-12-31	12
-160	val_160	2008-12-31	12
-195	val_195	2008-12-31	12
-42	val_42	2008-12-31	12
-321	val_321	2008-12-31	12
-430	val_430	2008-12-31	12
-119	val_119	2008-12-31	12
-489	val_489	2008-12-31	12
-458	val_458	2008-12-31	12
-78	val_78	2008-12-31	12
-76	val_76	2008-12-31	12
-41	val_41	2008-12-31	12
-223	val_223	2008-12-31	12
-492	val_492	2008-12-31	12
-149	val_149	2008-12-31	12
-449	val_449	2008-12-31	12
-218	val_218	2008-12-31	12
-228	val_228	2008-12-31	12
-138	val_138	2008-12-31	12
-453	val_453	2008-12-31	12
-30	val_30	2008-12-31	12
-209	val_209	2008-12-31	12
-64	val_64	2008-12-31	12
-468	val_468	2008-12-31	12
-76	val_76	2008-12-31	12
-74	val_74	2008-12-31	12
-342	val_342	2008-12-31	12
-69	val_69	2008-12-31	12
-230	val_230	2008-12-31	12
-33	val_33	2008-12-31	12
-368	val_368	2008-12-31	12
-103	val_103	2008-12-31	12
-296	val_296	2008-12-31	12
-113	val_113	2008-12-31	12
-216	val_216	2008-12-31	12
-367	val_367	2008-12-31	12
-344	val_344	2008-12-31	12
-167	val_167	2008-12-31	12
-274	val_274	2008-12-31	12
-219	val_219	2008-12-31	12
-239	val_239	2008-12-31	12
-485	val_485	2008-12-31	12
-116	val_116	2008-12-31	12
-223	val_223	2008-12-31	12
-256	val_256	2008-12-31	12
-263	val_263	2008-12-31	12
-70	val_70	2008-12-31	12
-487	val_487	2008-12-31	12
-480	val_480	2008-12-31	12
-401	val_401	2008-12-31	12
-288	val_288	2008-12-31	12
-191	val_191	2008-12-31	12
-5	val_5	2008-12-31	12
-244	val_244	2008-12-31	12
-438	val_438	2008-12-31	12
-128	val_128	2008-12-31	12
-467	val_467	2008-12-31	12
-432	val_432	2008-12-31	12
-202	val_202	2008-12-31	12
-316	val_316	2008-12-31	12
-229	val_229	2008-12-31	12
-469	val_469	2008-12-31	12
-463	val_463	2008-12-31	12
-280	val_280	2008-12-31	12
-2	val_2	2008-12-31	12
-35	val_35	2008-12-31	12
-283	val_283	2008-12-31	12
-331	val_331	2008-12-31	12
-235	val_235	2008-12-31	12
-80	val_80	2008-12-31	12
-44	val_44	2008-12-31	12
-193	val_193	2008-12-31	12
-321	val_321	2008-12-31	12
-335	val_335	2008-12-31	12
-104	val_104	2008-12-31	12
-466	val_466	2008-12-31	12
-366	val_366	2008-12-31	12
-175	val_175	2008-12-31	12
-403	val_403	2008-12-31	12
-483	val_483	2008-12-31	12
-53	val_53	2008-12-31	12
-105	val_105	2008-12-31	12
-257	val_257	2008-12-31	12
-406	val_406	2008-12-31	12
-409	val_409	2008-12-31	12
-190	val_190	2008-12-31	12
-406	val_406	2008-12-31	12
-401	val_401	2008-12-31	12
-114	val_114	2008-12-31	12
-258	val_258	2008-12-31	12
-90	val_90	2008-12-31	12
-203	val_203	2008-12-31	12
-262	val_262	2008-12-31	12
-348	val_348	2008-12-31	12
-424	val_424	2008-12-31	12
-12	val_12	2008-12-31	12
-396	val_396	2008-12-31	12
-201	val_201	2008-12-31	12
-217	val_217	2008-12-31	12
-164	val_164	2008-12-31	12
-431	val_431	2008-12-31	12
-454	val_454	2008-12-31	12
-478	val_478	2008-12-31	12
-298	val_298	2008-12-31	12
-125	val_125	2008-12-31	12
-431	val_431	2008-12-31	12
-164	val_164	2008-12-31	12
-424	val_424	2008-12-31	12
-187	val_187	2008-12-31	12
-382	val_382	2008-12-31	12
-5	val_5	2008-12-31	12
-70	val_70	2008-12-31	12
-397	val_397	2008-12-31	12
-480	val_480	2008-12-31	12
-291	val_291	2008-12-31	12
-24	val_24	2008-12-31	12
-351	val_351	2008-12-31	12
-255	val_255	2008-12-31	12
-104	val_104	2008-12-31	12
-70	val_70	2008-12-31	12
-163	val_163	2008-12-31	12
-438	val_438	2008-12-31	12
-119	val_119	2008-12-31	12
-414	val_414	2008-12-31	12
-200	val_200	2008-12-31	12
-491	val_491	2008-12-31	12
-237	val_237	2008-12-31	12
-439	val_439	2008-12-31	12
-360	val_360	2008-12-31	12
-248	val_248	2008-12-31	12
-479	val_479	2008-12-31	12
-305	val_305	2008-12-31	12
-417	val_417	2008-12-31	12
-199	val_199	2008-12-31	12
-444	val_444	2008-12-31	12
-120	val_120	2008-12-31	12
-429	val_429	2008-12-31	12
-169	val_169	2008-12-31	12
-443	val_443	2008-12-31	12
-323	val_323	2008-12-31	12
-325	val_325	2008-12-31	12
-277	val_277	2008-12-31	12
-230	val_230	2008-12-31	12
-478	val_478	2008-12-31	12
-178	val_178	2008-12-31	12
-468	val_468	2008-12-31	12
-310	val_310	2008-12-31	12
-317	val_317	2008-12-31	12
-333	val_333	2008-12-31	12
-493	val_493	2008-12-31	12
-460	val_460	2008-12-31	12
-207	val_207	2008-12-31	12
-249	val_249	2008-12-31	12
-265	val_265	2008-12-31	12
-480	val_480	2008-12-31	12
-83	val_83	2008-12-31	12
-136	val_136	2008-12-31	12
-353	val_353	2008-12-31	12
-172	val_172	2008-12-31	12
-214	val_214	2008-12-31	12
-462	val_462	2008-12-31	12
-233	val_233	2008-12-31	12
-406	val_406	2008-12-31	12
-133	val_133	2008-12-31	12
-175	val_175	2008-12-31	12
-189	val_189	2008-12-31	12
-454	val_454	2008-12-31	12
-375	val_375	2008-12-31	12
-401	val_401	2008-12-31	12
-421	val_421	2008-12-31	12
-407	val_407	2008-12-31	12
-384	val_384	2008-12-31	12
-256	val_256	2008-12-31	12
-26	val_26	2008-12-31	12
-134	val_134	2008-12-31	12
-67	val_67	2008-12-31	12
-384	val_384	2008-12-31	12
-379	val_379	2008-12-31	12
-18	val_18	2008-12-31	12
-462	val_462	2008-12-31	12
-492	val_492	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-9	val_9	2008-12-31	12
-341	val_341	2008-12-31	12
-498	val_498	2008-12-31	12
-146	val_146	2008-12-31	12
-458	val_458	2008-12-31	12
-362	val_362	2008-12-31	12
-186	val_186	2008-12-31	12
-285	val_285	2008-12-31	12
-348	val_348	2008-12-31	12
-167	val_167	2008-12-31	12
-18	val_18	2008-12-31	12
-273	val_273	2008-12-31	12
-183	val_183	2008-12-31	12
-281	val_281	2008-12-31	12
-344	val_344	2008-12-31	12
-97	val_97	2008-12-31	12
-469	val_469	2008-12-31	12
-315	val_315	2008-12-31	12
-84	val_84	2008-12-31	12
-28	val_28	2008-12-31	12
-37	val_37	2008-12-31	12
-448	val_448	2008-12-31	12
-152	val_152	2008-12-31	12
-348	val_348	2008-12-31	12
-307	val_307	2008-12-31	12
-194	val_194	2008-12-31	12
-414	val_414	2008-12-31	12
-477	val_477	2008-12-31	12
-222	val_222	2008-12-31	12
-126	val_126	2008-12-31	12
-90	val_90	2008-12-31	12
-169	val_169	2008-12-31	12
-403	val_403	2008-12-31	12
-400	val_400	2008-12-31	12
-200	val_200	2008-12-31	12
-97	val_97	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part1-3-e17dba1884f6a1d2b5002925afd7c2d3 b/sql/hive/src/test/resources/golden/load_dyn_part1-3-e17dba1884f6a1d2b5002925afd7c2d3
index e0dcec0869734..bf80c353fd1ea 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part1-3-e17dba1884f6a1d2b5002925afd7c2d3
+++ b/sql/hive/src/test/resources/golden/load_dyn_part1-3-e17dba1884f6a1d2b5002925afd7c2d3
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part1, dbName:default, owner:marmbrus, createTime:1390899591, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899591}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part1, dbName:default, owner:marmbrus, createTime:1413887363, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887363}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-0-463330cf55370dbe92d6ed74ef91302 b/sql/hive/src/test/resources/golden/load_dyn_part10-0-463330cf55370dbe92d6ed74ef91302
index e9c723bbd136e..8c43153cf66f9 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-0-463330cf55370dbe92d6ed74ef91302
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-0-463330cf55370dbe92d6ed74ef91302
@@ -1,4 +1,4 @@
 ds=2008-04-08/hr=11
 ds=2008-04-08/hr=12
 ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
+ds=2008-04-09/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-2-151ba0c3b8317902f1235ac07d58135e b/sql/hive/src/test/resources/golden/load_dyn_part10-2-151ba0c3b8317902f1235ac07d58135e
index 9d3a522ff81dc..63350bdd8517f 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-2-151ba0c3b8317902f1235ac07d58135e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-2-151ba0c3b8317902f1235ac07d58135e
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part10, dbName:default, owner:marmbrus, createTime:1389738873, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4075462935071533647/nzhang_part10, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1389738873}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part10, dbName:default, owner:marmbrus, createTime:1413887405, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part10, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887405}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/load_dyn_part10-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-4-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part10-4-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-4-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-4-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-5-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/load_dyn_part10-5-16367c381d4b189b3640c92511244bfe
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-5-16367c381d4b189b3640c92511244bfe
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-5-16367c381d4b189b3640c92511244bfe
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-8-245027204484e281e1cfaf74386d2967 b/sql/hive/src/test/resources/golden/load_dyn_part10-8-245027204484e281e1cfaf74386d2967
index 574727266b183..1634adfc4d70d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-8-245027204484e281e1cfaf74386d2967
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-8-245027204484e281e1cfaf74386d2967
@@ -1,2 +1,2 @@
 ds=2008-12-31/hr=11
-ds=2008-12-31/hr=12
\ No newline at end of file
+ds=2008-12-31/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-9-a646fd106fe73b8753fe3fee86d56ebf b/sql/hive/src/test/resources/golden/load_dyn_part10-9-a646fd106fe73b8753fe3fee86d56ebf
new file mode 100644
index 0000000000000..ee4601f59e708
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-9-a646fd106fe73b8753fe3fee86d56ebf
@@ -0,0 +1,1000 @@
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+2	val_2	2008-12-31	11
+4	val_4	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+8	val_8	2008-12-31	11
+9	val_9	2008-12-31	11
+10	val_10	2008-12-31	11
+11	val_11	2008-12-31	11
+12	val_12	2008-12-31	11
+12	val_12	2008-12-31	11
+15	val_15	2008-12-31	11
+15	val_15	2008-12-31	11
+17	val_17	2008-12-31	11
+18	val_18	2008-12-31	11
+18	val_18	2008-12-31	11
+19	val_19	2008-12-31	11
+20	val_20	2008-12-31	11
+24	val_24	2008-12-31	11
+24	val_24	2008-12-31	11
+26	val_26	2008-12-31	11
+26	val_26	2008-12-31	11
+27	val_27	2008-12-31	11
+28	val_28	2008-12-31	11
+30	val_30	2008-12-31	11
+33	val_33	2008-12-31	11
+34	val_34	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+37	val_37	2008-12-31	11
+37	val_37	2008-12-31	11
+41	val_41	2008-12-31	11
+42	val_42	2008-12-31	11
+42	val_42	2008-12-31	11
+43	val_43	2008-12-31	11
+44	val_44	2008-12-31	11
+47	val_47	2008-12-31	11
+51	val_51	2008-12-31	11
+51	val_51	2008-12-31	11
+53	val_53	2008-12-31	11
+54	val_54	2008-12-31	11
+57	val_57	2008-12-31	11
+58	val_58	2008-12-31	11
+58	val_58	2008-12-31	11
+64	val_64	2008-12-31	11
+65	val_65	2008-12-31	11
+66	val_66	2008-12-31	11
+67	val_67	2008-12-31	11
+67	val_67	2008-12-31	11
+69	val_69	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+72	val_72	2008-12-31	11
+72	val_72	2008-12-31	11
+74	val_74	2008-12-31	11
+76	val_76	2008-12-31	11
+76	val_76	2008-12-31	11
+77	val_77	2008-12-31	11
+78	val_78	2008-12-31	11
+80	val_80	2008-12-31	11
+82	val_82	2008-12-31	11
+83	val_83	2008-12-31	11
+83	val_83	2008-12-31	11
+84	val_84	2008-12-31	11
+84	val_84	2008-12-31	11
+85	val_85	2008-12-31	11
+86	val_86	2008-12-31	11
+87	val_87	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+92	val_92	2008-12-31	11
+95	val_95	2008-12-31	11
+95	val_95	2008-12-31	11
+96	val_96	2008-12-31	11
+97	val_97	2008-12-31	11
+97	val_97	2008-12-31	11
+98	val_98	2008-12-31	11
+98	val_98	2008-12-31	11
+100	val_100	2008-12-31	11
+100	val_100	2008-12-31	11
+103	val_103	2008-12-31	11
+103	val_103	2008-12-31	11
+104	val_104	2008-12-31	11
+104	val_104	2008-12-31	11
+105	val_105	2008-12-31	11
+111	val_111	2008-12-31	11
+113	val_113	2008-12-31	11
+113	val_113	2008-12-31	11
+114	val_114	2008-12-31	11
+116	val_116	2008-12-31	11
+118	val_118	2008-12-31	11
+118	val_118	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+120	val_120	2008-12-31	11
+120	val_120	2008-12-31	11
+125	val_125	2008-12-31	11
+125	val_125	2008-12-31	11
+126	val_126	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+129	val_129	2008-12-31	11
+129	val_129	2008-12-31	11
+131	val_131	2008-12-31	11
+133	val_133	2008-12-31	11
+134	val_134	2008-12-31	11
+134	val_134	2008-12-31	11
+136	val_136	2008-12-31	11
+137	val_137	2008-12-31	11
+137	val_137	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+143	val_143	2008-12-31	11
+145	val_145	2008-12-31	11
+146	val_146	2008-12-31	11
+146	val_146	2008-12-31	11
+149	val_149	2008-12-31	11
+149	val_149	2008-12-31	11
+150	val_150	2008-12-31	11
+152	val_152	2008-12-31	11
+152	val_152	2008-12-31	11
+153	val_153	2008-12-31	11
+155	val_155	2008-12-31	11
+156	val_156	2008-12-31	11
+157	val_157	2008-12-31	11
+158	val_158	2008-12-31	11
+160	val_160	2008-12-31	11
+162	val_162	2008-12-31	11
+163	val_163	2008-12-31	11
+164	val_164	2008-12-31	11
+164	val_164	2008-12-31	11
+165	val_165	2008-12-31	11
+165	val_165	2008-12-31	11
+166	val_166	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+168	val_168	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+170	val_170	2008-12-31	11
+172	val_172	2008-12-31	11
+172	val_172	2008-12-31	11
+174	val_174	2008-12-31	11
+174	val_174	2008-12-31	11
+175	val_175	2008-12-31	11
+175	val_175	2008-12-31	11
+176	val_176	2008-12-31	11
+176	val_176	2008-12-31	11
+177	val_177	2008-12-31	11
+178	val_178	2008-12-31	11
+179	val_179	2008-12-31	11
+179	val_179	2008-12-31	11
+180	val_180	2008-12-31	11
+181	val_181	2008-12-31	11
+183	val_183	2008-12-31	11
+186	val_186	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+189	val_189	2008-12-31	11
+190	val_190	2008-12-31	11
+191	val_191	2008-12-31	11
+191	val_191	2008-12-31	11
+192	val_192	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+194	val_194	2008-12-31	11
+195	val_195	2008-12-31	11
+195	val_195	2008-12-31	11
+196	val_196	2008-12-31	11
+197	val_197	2008-12-31	11
+197	val_197	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+200	val_200	2008-12-31	11
+200	val_200	2008-12-31	11
+201	val_201	2008-12-31	11
+202	val_202	2008-12-31	11
+203	val_203	2008-12-31	11
+203	val_203	2008-12-31	11
+205	val_205	2008-12-31	11
+205	val_205	2008-12-31	11
+207	val_207	2008-12-31	11
+207	val_207	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+209	val_209	2008-12-31	11
+209	val_209	2008-12-31	11
+213	val_213	2008-12-31	11
+213	val_213	2008-12-31	11
+214	val_214	2008-12-31	11
+216	val_216	2008-12-31	11
+216	val_216	2008-12-31	11
+217	val_217	2008-12-31	11
+217	val_217	2008-12-31	11
+218	val_218	2008-12-31	11
+219	val_219	2008-12-31	11
+219	val_219	2008-12-31	11
+221	val_221	2008-12-31	11
+221	val_221	2008-12-31	11
+222	val_222	2008-12-31	11
+223	val_223	2008-12-31	11
+223	val_223	2008-12-31	11
+224	val_224	2008-12-31	11
+224	val_224	2008-12-31	11
+226	val_226	2008-12-31	11
+228	val_228	2008-12-31	11
+229	val_229	2008-12-31	11
+229	val_229	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+233	val_233	2008-12-31	11
+233	val_233	2008-12-31	11
+235	val_235	2008-12-31	11
+237	val_237	2008-12-31	11
+237	val_237	2008-12-31	11
+238	val_238	2008-12-31	11
+238	val_238	2008-12-31	11
+239	val_239	2008-12-31	11
+239	val_239	2008-12-31	11
+241	val_241	2008-12-31	11
+242	val_242	2008-12-31	11
+242	val_242	2008-12-31	11
+244	val_244	2008-12-31	11
+247	val_247	2008-12-31	11
+248	val_248	2008-12-31	11
+249	val_249	2008-12-31	11
+252	val_252	2008-12-31	11
+255	val_255	2008-12-31	11
+255	val_255	2008-12-31	11
+256	val_256	2008-12-31	11
+256	val_256	2008-12-31	11
+257	val_257	2008-12-31	11
+258	val_258	2008-12-31	11
+260	val_260	2008-12-31	11
+262	val_262	2008-12-31	11
+263	val_263	2008-12-31	11
+265	val_265	2008-12-31	11
+265	val_265	2008-12-31	11
+266	val_266	2008-12-31	11
+272	val_272	2008-12-31	11
+272	val_272	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+274	val_274	2008-12-31	11
+275	val_275	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+278	val_278	2008-12-31	11
+278	val_278	2008-12-31	11
+280	val_280	2008-12-31	11
+280	val_280	2008-12-31	11
+281	val_281	2008-12-31	11
+281	val_281	2008-12-31	11
+282	val_282	2008-12-31	11
+282	val_282	2008-12-31	11
+283	val_283	2008-12-31	11
+284	val_284	2008-12-31	11
+285	val_285	2008-12-31	11
+286	val_286	2008-12-31	11
+287	val_287	2008-12-31	11
+288	val_288	2008-12-31	11
+288	val_288	2008-12-31	11
+289	val_289	2008-12-31	11
+291	val_291	2008-12-31	11
+292	val_292	2008-12-31	11
+296	val_296	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+302	val_302	2008-12-31	11
+305	val_305	2008-12-31	11
+306	val_306	2008-12-31	11
+307	val_307	2008-12-31	11
+307	val_307	2008-12-31	11
+308	val_308	2008-12-31	11
+309	val_309	2008-12-31	11
+309	val_309	2008-12-31	11
+310	val_310	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+315	val_315	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+317	val_317	2008-12-31	11
+317	val_317	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+321	val_321	2008-12-31	11
+321	val_321	2008-12-31	11
+322	val_322	2008-12-31	11
+322	val_322	2008-12-31	11
+323	val_323	2008-12-31	11
+325	val_325	2008-12-31	11
+325	val_325	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+331	val_331	2008-12-31	11
+331	val_331	2008-12-31	11
+332	val_332	2008-12-31	11
+333	val_333	2008-12-31	11
+333	val_333	2008-12-31	11
+335	val_335	2008-12-31	11
+336	val_336	2008-12-31	11
+338	val_338	2008-12-31	11
+339	val_339	2008-12-31	11
+341	val_341	2008-12-31	11
+342	val_342	2008-12-31	11
+342	val_342	2008-12-31	11
+344	val_344	2008-12-31	11
+344	val_344	2008-12-31	11
+345	val_345	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+351	val_351	2008-12-31	11
+353	val_353	2008-12-31	11
+353	val_353	2008-12-31	11
+356	val_356	2008-12-31	11
+360	val_360	2008-12-31	11
+362	val_362	2008-12-31	11
+364	val_364	2008-12-31	11
+365	val_365	2008-12-31	11
+366	val_366	2008-12-31	11
+367	val_367	2008-12-31	11
+367	val_367	2008-12-31	11
+368	val_368	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+373	val_373	2008-12-31	11
+374	val_374	2008-12-31	11
+375	val_375	2008-12-31	11
+377	val_377	2008-12-31	11
+378	val_378	2008-12-31	11
+379	val_379	2008-12-31	11
+382	val_382	2008-12-31	11
+382	val_382	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+386	val_386	2008-12-31	11
+389	val_389	2008-12-31	11
+392	val_392	2008-12-31	11
+393	val_393	2008-12-31	11
+394	val_394	2008-12-31	11
+395	val_395	2008-12-31	11
+395	val_395	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+397	val_397	2008-12-31	11
+397	val_397	2008-12-31	11
+399	val_399	2008-12-31	11
+399	val_399	2008-12-31	11
+400	val_400	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+402	val_402	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+404	val_404	2008-12-31	11
+404	val_404	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+407	val_407	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+411	val_411	2008-12-31	11
+413	val_413	2008-12-31	11
+413	val_413	2008-12-31	11
+414	val_414	2008-12-31	11
+414	val_414	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+418	val_418	2008-12-31	11
+419	val_419	2008-12-31	11
+421	val_421	2008-12-31	11
+424	val_424	2008-12-31	11
+424	val_424	2008-12-31	11
+427	val_427	2008-12-31	11
+429	val_429	2008-12-31	11
+429	val_429	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+432	val_432	2008-12-31	11
+435	val_435	2008-12-31	11
+436	val_436	2008-12-31	11
+437	val_437	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+439	val_439	2008-12-31	11
+439	val_439	2008-12-31	11
+443	val_443	2008-12-31	11
+444	val_444	2008-12-31	11
+446	val_446	2008-12-31	11
+448	val_448	2008-12-31	11
+449	val_449	2008-12-31	11
+452	val_452	2008-12-31	11
+453	val_453	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+455	val_455	2008-12-31	11
+457	val_457	2008-12-31	11
+458	val_458	2008-12-31	11
+458	val_458	2008-12-31	11
+459	val_459	2008-12-31	11
+459	val_459	2008-12-31	11
+460	val_460	2008-12-31	11
+462	val_462	2008-12-31	11
+462	val_462	2008-12-31	11
+463	val_463	2008-12-31	11
+463	val_463	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+467	val_467	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+470	val_470	2008-12-31	11
+472	val_472	2008-12-31	11
+475	val_475	2008-12-31	11
+477	val_477	2008-12-31	11
+478	val_478	2008-12-31	11
+478	val_478	2008-12-31	11
+479	val_479	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+481	val_481	2008-12-31	11
+482	val_482	2008-12-31	11
+483	val_483	2008-12-31	11
+484	val_484	2008-12-31	11
+485	val_485	2008-12-31	11
+487	val_487	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+490	val_490	2008-12-31	11
+491	val_491	2008-12-31	11
+492	val_492	2008-12-31	11
+492	val_492	2008-12-31	11
+493	val_493	2008-12-31	11
+494	val_494	2008-12-31	11
+495	val_495	2008-12-31	11
+496	val_496	2008-12-31	11
+497	val_497	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+2	val_2	2008-12-31	12
+4	val_4	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+8	val_8	2008-12-31	12
+9	val_9	2008-12-31	12
+10	val_10	2008-12-31	12
+11	val_11	2008-12-31	12
+12	val_12	2008-12-31	12
+12	val_12	2008-12-31	12
+15	val_15	2008-12-31	12
+15	val_15	2008-12-31	12
+17	val_17	2008-12-31	12
+18	val_18	2008-12-31	12
+18	val_18	2008-12-31	12
+19	val_19	2008-12-31	12
+20	val_20	2008-12-31	12
+24	val_24	2008-12-31	12
+24	val_24	2008-12-31	12
+26	val_26	2008-12-31	12
+26	val_26	2008-12-31	12
+27	val_27	2008-12-31	12
+28	val_28	2008-12-31	12
+30	val_30	2008-12-31	12
+33	val_33	2008-12-31	12
+34	val_34	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+37	val_37	2008-12-31	12
+37	val_37	2008-12-31	12
+41	val_41	2008-12-31	12
+42	val_42	2008-12-31	12
+42	val_42	2008-12-31	12
+43	val_43	2008-12-31	12
+44	val_44	2008-12-31	12
+47	val_47	2008-12-31	12
+51	val_51	2008-12-31	12
+51	val_51	2008-12-31	12
+53	val_53	2008-12-31	12
+54	val_54	2008-12-31	12
+57	val_57	2008-12-31	12
+58	val_58	2008-12-31	12
+58	val_58	2008-12-31	12
+64	val_64	2008-12-31	12
+65	val_65	2008-12-31	12
+66	val_66	2008-12-31	12
+67	val_67	2008-12-31	12
+67	val_67	2008-12-31	12
+69	val_69	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+72	val_72	2008-12-31	12
+72	val_72	2008-12-31	12
+74	val_74	2008-12-31	12
+76	val_76	2008-12-31	12
+76	val_76	2008-12-31	12
+77	val_77	2008-12-31	12
+78	val_78	2008-12-31	12
+80	val_80	2008-12-31	12
+82	val_82	2008-12-31	12
+83	val_83	2008-12-31	12
+83	val_83	2008-12-31	12
+84	val_84	2008-12-31	12
+84	val_84	2008-12-31	12
+85	val_85	2008-12-31	12
+86	val_86	2008-12-31	12
+87	val_87	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+92	val_92	2008-12-31	12
+95	val_95	2008-12-31	12
+95	val_95	2008-12-31	12
+96	val_96	2008-12-31	12
+97	val_97	2008-12-31	12
+97	val_97	2008-12-31	12
+98	val_98	2008-12-31	12
+98	val_98	2008-12-31	12
+100	val_100	2008-12-31	12
+100	val_100	2008-12-31	12
+103	val_103	2008-12-31	12
+103	val_103	2008-12-31	12
+104	val_104	2008-12-31	12
+104	val_104	2008-12-31	12
+105	val_105	2008-12-31	12
+111	val_111	2008-12-31	12
+113	val_113	2008-12-31	12
+113	val_113	2008-12-31	12
+114	val_114	2008-12-31	12
+116	val_116	2008-12-31	12
+118	val_118	2008-12-31	12
+118	val_118	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+120	val_120	2008-12-31	12
+120	val_120	2008-12-31	12
+125	val_125	2008-12-31	12
+125	val_125	2008-12-31	12
+126	val_126	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+129	val_129	2008-12-31	12
+129	val_129	2008-12-31	12
+131	val_131	2008-12-31	12
+133	val_133	2008-12-31	12
+134	val_134	2008-12-31	12
+134	val_134	2008-12-31	12
+136	val_136	2008-12-31	12
+137	val_137	2008-12-31	12
+137	val_137	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+143	val_143	2008-12-31	12
+145	val_145	2008-12-31	12
+146	val_146	2008-12-31	12
+146	val_146	2008-12-31	12
+149	val_149	2008-12-31	12
+149	val_149	2008-12-31	12
+150	val_150	2008-12-31	12
+152	val_152	2008-12-31	12
+152	val_152	2008-12-31	12
+153	val_153	2008-12-31	12
+155	val_155	2008-12-31	12
+156	val_156	2008-12-31	12
+157	val_157	2008-12-31	12
+158	val_158	2008-12-31	12
+160	val_160	2008-12-31	12
+162	val_162	2008-12-31	12
+163	val_163	2008-12-31	12
+164	val_164	2008-12-31	12
+164	val_164	2008-12-31	12
+165	val_165	2008-12-31	12
+165	val_165	2008-12-31	12
+166	val_166	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+168	val_168	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+170	val_170	2008-12-31	12
+172	val_172	2008-12-31	12
+172	val_172	2008-12-31	12
+174	val_174	2008-12-31	12
+174	val_174	2008-12-31	12
+175	val_175	2008-12-31	12
+175	val_175	2008-12-31	12
+176	val_176	2008-12-31	12
+176	val_176	2008-12-31	12
+177	val_177	2008-12-31	12
+178	val_178	2008-12-31	12
+179	val_179	2008-12-31	12
+179	val_179	2008-12-31	12
+180	val_180	2008-12-31	12
+181	val_181	2008-12-31	12
+183	val_183	2008-12-31	12
+186	val_186	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+189	val_189	2008-12-31	12
+190	val_190	2008-12-31	12
+191	val_191	2008-12-31	12
+191	val_191	2008-12-31	12
+192	val_192	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+194	val_194	2008-12-31	12
+195	val_195	2008-12-31	12
+195	val_195	2008-12-31	12
+196	val_196	2008-12-31	12
+197	val_197	2008-12-31	12
+197	val_197	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+200	val_200	2008-12-31	12
+200	val_200	2008-12-31	12
+201	val_201	2008-12-31	12
+202	val_202	2008-12-31	12
+203	val_203	2008-12-31	12
+203	val_203	2008-12-31	12
+205	val_205	2008-12-31	12
+205	val_205	2008-12-31	12
+207	val_207	2008-12-31	12
+207	val_207	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+209	val_209	2008-12-31	12
+209	val_209	2008-12-31	12
+213	val_213	2008-12-31	12
+213	val_213	2008-12-31	12
+214	val_214	2008-12-31	12
+216	val_216	2008-12-31	12
+216	val_216	2008-12-31	12
+217	val_217	2008-12-31	12
+217	val_217	2008-12-31	12
+218	val_218	2008-12-31	12
+219	val_219	2008-12-31	12
+219	val_219	2008-12-31	12
+221	val_221	2008-12-31	12
+221	val_221	2008-12-31	12
+222	val_222	2008-12-31	12
+223	val_223	2008-12-31	12
+223	val_223	2008-12-31	12
+224	val_224	2008-12-31	12
+224	val_224	2008-12-31	12
+226	val_226	2008-12-31	12
+228	val_228	2008-12-31	12
+229	val_229	2008-12-31	12
+229	val_229	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+233	val_233	2008-12-31	12
+233	val_233	2008-12-31	12
+235	val_235	2008-12-31	12
+237	val_237	2008-12-31	12
+237	val_237	2008-12-31	12
+238	val_238	2008-12-31	12
+238	val_238	2008-12-31	12
+239	val_239	2008-12-31	12
+239	val_239	2008-12-31	12
+241	val_241	2008-12-31	12
+242	val_242	2008-12-31	12
+242	val_242	2008-12-31	12
+244	val_244	2008-12-31	12
+247	val_247	2008-12-31	12
+248	val_248	2008-12-31	12
+249	val_249	2008-12-31	12
+252	val_252	2008-12-31	12
+255	val_255	2008-12-31	12
+255	val_255	2008-12-31	12
+256	val_256	2008-12-31	12
+256	val_256	2008-12-31	12
+257	val_257	2008-12-31	12
+258	val_258	2008-12-31	12
+260	val_260	2008-12-31	12
+262	val_262	2008-12-31	12
+263	val_263	2008-12-31	12
+265	val_265	2008-12-31	12
+265	val_265	2008-12-31	12
+266	val_266	2008-12-31	12
+272	val_272	2008-12-31	12
+272	val_272	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+274	val_274	2008-12-31	12
+275	val_275	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+278	val_278	2008-12-31	12
+278	val_278	2008-12-31	12
+280	val_280	2008-12-31	12
+280	val_280	2008-12-31	12
+281	val_281	2008-12-31	12
+281	val_281	2008-12-31	12
+282	val_282	2008-12-31	12
+282	val_282	2008-12-31	12
+283	val_283	2008-12-31	12
+284	val_284	2008-12-31	12
+285	val_285	2008-12-31	12
+286	val_286	2008-12-31	12
+287	val_287	2008-12-31	12
+288	val_288	2008-12-31	12
+288	val_288	2008-12-31	12
+289	val_289	2008-12-31	12
+291	val_291	2008-12-31	12
+292	val_292	2008-12-31	12
+296	val_296	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+302	val_302	2008-12-31	12
+305	val_305	2008-12-31	12
+306	val_306	2008-12-31	12
+307	val_307	2008-12-31	12
+307	val_307	2008-12-31	12
+308	val_308	2008-12-31	12
+309	val_309	2008-12-31	12
+309	val_309	2008-12-31	12
+310	val_310	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+315	val_315	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+317	val_317	2008-12-31	12
+317	val_317	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+321	val_321	2008-12-31	12
+321	val_321	2008-12-31	12
+322	val_322	2008-12-31	12
+322	val_322	2008-12-31	12
+323	val_323	2008-12-31	12
+325	val_325	2008-12-31	12
+325	val_325	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+331	val_331	2008-12-31	12
+331	val_331	2008-12-31	12
+332	val_332	2008-12-31	12
+333	val_333	2008-12-31	12
+333	val_333	2008-12-31	12
+335	val_335	2008-12-31	12
+336	val_336	2008-12-31	12
+338	val_338	2008-12-31	12
+339	val_339	2008-12-31	12
+341	val_341	2008-12-31	12
+342	val_342	2008-12-31	12
+342	val_342	2008-12-31	12
+344	val_344	2008-12-31	12
+344	val_344	2008-12-31	12
+345	val_345	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+351	val_351	2008-12-31	12
+353	val_353	2008-12-31	12
+353	val_353	2008-12-31	12
+356	val_356	2008-12-31	12
+360	val_360	2008-12-31	12
+362	val_362	2008-12-31	12
+364	val_364	2008-12-31	12
+365	val_365	2008-12-31	12
+366	val_366	2008-12-31	12
+367	val_367	2008-12-31	12
+367	val_367	2008-12-31	12
+368	val_368	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+373	val_373	2008-12-31	12
+374	val_374	2008-12-31	12
+375	val_375	2008-12-31	12
+377	val_377	2008-12-31	12
+378	val_378	2008-12-31	12
+379	val_379	2008-12-31	12
+382	val_382	2008-12-31	12
+382	val_382	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+386	val_386	2008-12-31	12
+389	val_389	2008-12-31	12
+392	val_392	2008-12-31	12
+393	val_393	2008-12-31	12
+394	val_394	2008-12-31	12
+395	val_395	2008-12-31	12
+395	val_395	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+397	val_397	2008-12-31	12
+397	val_397	2008-12-31	12
+399	val_399	2008-12-31	12
+399	val_399	2008-12-31	12
+400	val_400	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+402	val_402	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+404	val_404	2008-12-31	12
+404	val_404	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+407	val_407	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+411	val_411	2008-12-31	12
+413	val_413	2008-12-31	12
+413	val_413	2008-12-31	12
+414	val_414	2008-12-31	12
+414	val_414	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+418	val_418	2008-12-31	12
+419	val_419	2008-12-31	12
+421	val_421	2008-12-31	12
+424	val_424	2008-12-31	12
+424	val_424	2008-12-31	12
+427	val_427	2008-12-31	12
+429	val_429	2008-12-31	12
+429	val_429	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+432	val_432	2008-12-31	12
+435	val_435	2008-12-31	12
+436	val_436	2008-12-31	12
+437	val_437	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+439	val_439	2008-12-31	12
+439	val_439	2008-12-31	12
+443	val_443	2008-12-31	12
+444	val_444	2008-12-31	12
+446	val_446	2008-12-31	12
+448	val_448	2008-12-31	12
+449	val_449	2008-12-31	12
+452	val_452	2008-12-31	12
+453	val_453	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+455	val_455	2008-12-31	12
+457	val_457	2008-12-31	12
+458	val_458	2008-12-31	12
+458	val_458	2008-12-31	12
+459	val_459	2008-12-31	12
+459	val_459	2008-12-31	12
+460	val_460	2008-12-31	12
+462	val_462	2008-12-31	12
+462	val_462	2008-12-31	12
+463	val_463	2008-12-31	12
+463	val_463	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+467	val_467	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+470	val_470	2008-12-31	12
+472	val_472	2008-12-31	12
+475	val_475	2008-12-31	12
+477	val_477	2008-12-31	12
+478	val_478	2008-12-31	12
+478	val_478	2008-12-31	12
+479	val_479	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+481	val_481	2008-12-31	12
+482	val_482	2008-12-31	12
+483	val_483	2008-12-31	12
+484	val_484	2008-12-31	12
+485	val_485	2008-12-31	12
+487	val_487	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+490	val_490	2008-12-31	12
+491	val_491	2008-12-31	12
+492	val_492	2008-12-31	12
+492	val_492	2008-12-31	12
+493	val_493	2008-12-31	12
+494	val_494	2008-12-31	12
+495	val_495	2008-12-31	12
+496	val_496	2008-12-31	12
+497	val_497	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-9-eb29a5b2392bf821b2ff51b70863d531 b/sql/hive/src/test/resources/golden/load_dyn_part10-9-eb29a5b2392bf821b2ff51b70863d531
deleted file mode 100644
index 3994499f92576..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-9-eb29a5b2392bf821b2ff51b70863d531
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-12-31	11
-86	val_86	2008-12-31	11
-311	val_311	2008-12-31	11
-27	val_27	2008-12-31	11
-165	val_165	2008-12-31	11
-409	val_409	2008-12-31	11
-255	val_255	2008-12-31	11
-278	val_278	2008-12-31	11
-98	val_98	2008-12-31	11
-484	val_484	2008-12-31	11
-265	val_265	2008-12-31	11
-193	val_193	2008-12-31	11
-401	val_401	2008-12-31	11
-150	val_150	2008-12-31	11
-273	val_273	2008-12-31	11
-224	val_224	2008-12-31	11
-369	val_369	2008-12-31	11
-66	val_66	2008-12-31	11
-128	val_128	2008-12-31	11
-213	val_213	2008-12-31	11
-146	val_146	2008-12-31	11
-406	val_406	2008-12-31	11
-429	val_429	2008-12-31	11
-374	val_374	2008-12-31	11
-152	val_152	2008-12-31	11
-469	val_469	2008-12-31	11
-145	val_145	2008-12-31	11
-495	val_495	2008-12-31	11
-37	val_37	2008-12-31	11
-327	val_327	2008-12-31	11
-281	val_281	2008-12-31	11
-277	val_277	2008-12-31	11
-209	val_209	2008-12-31	11
-15	val_15	2008-12-31	11
-82	val_82	2008-12-31	11
-403	val_403	2008-12-31	11
-166	val_166	2008-12-31	11
-417	val_417	2008-12-31	11
-430	val_430	2008-12-31	11
-252	val_252	2008-12-31	11
-292	val_292	2008-12-31	11
-219	val_219	2008-12-31	11
-287	val_287	2008-12-31	11
-153	val_153	2008-12-31	11
-193	val_193	2008-12-31	11
-338	val_338	2008-12-31	11
-446	val_446	2008-12-31	11
-459	val_459	2008-12-31	11
-394	val_394	2008-12-31	11
-237	val_237	2008-12-31	11
-482	val_482	2008-12-31	11
-174	val_174	2008-12-31	11
-413	val_413	2008-12-31	11
-494	val_494	2008-12-31	11
-207	val_207	2008-12-31	11
-199	val_199	2008-12-31	11
-466	val_466	2008-12-31	11
-208	val_208	2008-12-31	11
-174	val_174	2008-12-31	11
-399	val_399	2008-12-31	11
-396	val_396	2008-12-31	11
-247	val_247	2008-12-31	11
-417	val_417	2008-12-31	11
-489	val_489	2008-12-31	11
-162	val_162	2008-12-31	11
-377	val_377	2008-12-31	11
-397	val_397	2008-12-31	11
-309	val_309	2008-12-31	11
-365	val_365	2008-12-31	11
-266	val_266	2008-12-31	11
-439	val_439	2008-12-31	11
-342	val_342	2008-12-31	11
-367	val_367	2008-12-31	11
-325	val_325	2008-12-31	11
-167	val_167	2008-12-31	11
-195	val_195	2008-12-31	11
-475	val_475	2008-12-31	11
-17	val_17	2008-12-31	11
-113	val_113	2008-12-31	11
-155	val_155	2008-12-31	11
-203	val_203	2008-12-31	11
-339	val_339	2008-12-31	11
-0	val_0	2008-12-31	11
-455	val_455	2008-12-31	11
-128	val_128	2008-12-31	11
-311	val_311	2008-12-31	11
-316	val_316	2008-12-31	11
-57	val_57	2008-12-31	11
-302	val_302	2008-12-31	11
-205	val_205	2008-12-31	11
-149	val_149	2008-12-31	11
-438	val_438	2008-12-31	11
-345	val_345	2008-12-31	11
-129	val_129	2008-12-31	11
-170	val_170	2008-12-31	11
-20	val_20	2008-12-31	11
-489	val_489	2008-12-31	11
-157	val_157	2008-12-31	11
-378	val_378	2008-12-31	11
-221	val_221	2008-12-31	11
-92	val_92	2008-12-31	11
-111	val_111	2008-12-31	11
-47	val_47	2008-12-31	11
-72	val_72	2008-12-31	11
-4	val_4	2008-12-31	11
-280	val_280	2008-12-31	11
-35	val_35	2008-12-31	11
-427	val_427	2008-12-31	11
-277	val_277	2008-12-31	11
-208	val_208	2008-12-31	11
-356	val_356	2008-12-31	11
-399	val_399	2008-12-31	11
-169	val_169	2008-12-31	11
-382	val_382	2008-12-31	11
-498	val_498	2008-12-31	11
-125	val_125	2008-12-31	11
-386	val_386	2008-12-31	11
-437	val_437	2008-12-31	11
-469	val_469	2008-12-31	11
-192	val_192	2008-12-31	11
-286	val_286	2008-12-31	11
-187	val_187	2008-12-31	11
-176	val_176	2008-12-31	11
-54	val_54	2008-12-31	11
-459	val_459	2008-12-31	11
-51	val_51	2008-12-31	11
-138	val_138	2008-12-31	11
-103	val_103	2008-12-31	11
-239	val_239	2008-12-31	11
-213	val_213	2008-12-31	11
-216	val_216	2008-12-31	11
-430	val_430	2008-12-31	11
-278	val_278	2008-12-31	11
-176	val_176	2008-12-31	11
-289	val_289	2008-12-31	11
-221	val_221	2008-12-31	11
-65	val_65	2008-12-31	11
-318	val_318	2008-12-31	11
-332	val_332	2008-12-31	11
-311	val_311	2008-12-31	11
-275	val_275	2008-12-31	11
-137	val_137	2008-12-31	11
-241	val_241	2008-12-31	11
-83	val_83	2008-12-31	11
-333	val_333	2008-12-31	11
-180	val_180	2008-12-31	11
-284	val_284	2008-12-31	11
-12	val_12	2008-12-31	11
-230	val_230	2008-12-31	11
-181	val_181	2008-12-31	11
-67	val_67	2008-12-31	11
-260	val_260	2008-12-31	11
-404	val_404	2008-12-31	11
-384	val_384	2008-12-31	11
-489	val_489	2008-12-31	11
-353	val_353	2008-12-31	11
-373	val_373	2008-12-31	11
-272	val_272	2008-12-31	11
-138	val_138	2008-12-31	11
-217	val_217	2008-12-31	11
-84	val_84	2008-12-31	11
-348	val_348	2008-12-31	11
-466	val_466	2008-12-31	11
-58	val_58	2008-12-31	11
-8	val_8	2008-12-31	11
-411	val_411	2008-12-31	11
-230	val_230	2008-12-31	11
-208	val_208	2008-12-31	11
-348	val_348	2008-12-31	11
-24	val_24	2008-12-31	11
-463	val_463	2008-12-31	11
-431	val_431	2008-12-31	11
-179	val_179	2008-12-31	11
-172	val_172	2008-12-31	11
-42	val_42	2008-12-31	11
-129	val_129	2008-12-31	11
-158	val_158	2008-12-31	11
-119	val_119	2008-12-31	11
-496	val_496	2008-12-31	11
-0	val_0	2008-12-31	11
-322	val_322	2008-12-31	11
-197	val_197	2008-12-31	11
-468	val_468	2008-12-31	11
-393	val_393	2008-12-31	11
-454	val_454	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-199	val_199	2008-12-31	11
-191	val_191	2008-12-31	11
-418	val_418	2008-12-31	11
-96	val_96	2008-12-31	11
-26	val_26	2008-12-31	11
-165	val_165	2008-12-31	11
-327	val_327	2008-12-31	11
-230	val_230	2008-12-31	11
-205	val_205	2008-12-31	11
-120	val_120	2008-12-31	11
-131	val_131	2008-12-31	11
-51	val_51	2008-12-31	11
-404	val_404	2008-12-31	11
-43	val_43	2008-12-31	11
-436	val_436	2008-12-31	11
-156	val_156	2008-12-31	11
-469	val_469	2008-12-31	11
-468	val_468	2008-12-31	11
-308	val_308	2008-12-31	11
-95	val_95	2008-12-31	11
-196	val_196	2008-12-31	11
-288	val_288	2008-12-31	11
-481	val_481	2008-12-31	11
-457	val_457	2008-12-31	11
-98	val_98	2008-12-31	11
-282	val_282	2008-12-31	11
-197	val_197	2008-12-31	11
-187	val_187	2008-12-31	11
-318	val_318	2008-12-31	11
-318	val_318	2008-12-31	11
-409	val_409	2008-12-31	11
-470	val_470	2008-12-31	11
-137	val_137	2008-12-31	11
-369	val_369	2008-12-31	11
-316	val_316	2008-12-31	11
-169	val_169	2008-12-31	11
-413	val_413	2008-12-31	11
-85	val_85	2008-12-31	11
-77	val_77	2008-12-31	11
-0	val_0	2008-12-31	11
-490	val_490	2008-12-31	11
-87	val_87	2008-12-31	11
-364	val_364	2008-12-31	11
-179	val_179	2008-12-31	11
-118	val_118	2008-12-31	11
-134	val_134	2008-12-31	11
-395	val_395	2008-12-31	11
-282	val_282	2008-12-31	11
-138	val_138	2008-12-31	11
-238	val_238	2008-12-31	11
-419	val_419	2008-12-31	11
-15	val_15	2008-12-31	11
-118	val_118	2008-12-31	11
-72	val_72	2008-12-31	11
-90	val_90	2008-12-31	11
-307	val_307	2008-12-31	11
-19	val_19	2008-12-31	11
-435	val_435	2008-12-31	11
-10	val_10	2008-12-31	11
-277	val_277	2008-12-31	11
-273	val_273	2008-12-31	11
-306	val_306	2008-12-31	11
-224	val_224	2008-12-31	11
-309	val_309	2008-12-31	11
-389	val_389	2008-12-31	11
-327	val_327	2008-12-31	11
-242	val_242	2008-12-31	11
-369	val_369	2008-12-31	11
-392	val_392	2008-12-31	11
-272	val_272	2008-12-31	11
-331	val_331	2008-12-31	11
-401	val_401	2008-12-31	11
-242	val_242	2008-12-31	11
-452	val_452	2008-12-31	11
-177	val_177	2008-12-31	11
-226	val_226	2008-12-31	11
-5	val_5	2008-12-31	11
-497	val_497	2008-12-31	11
-402	val_402	2008-12-31	11
-396	val_396	2008-12-31	11
-317	val_317	2008-12-31	11
-395	val_395	2008-12-31	11
-58	val_58	2008-12-31	11
-35	val_35	2008-12-31	11
-336	val_336	2008-12-31	11
-95	val_95	2008-12-31	11
-11	val_11	2008-12-31	11
-168	val_168	2008-12-31	11
-34	val_34	2008-12-31	11
-229	val_229	2008-12-31	11
-233	val_233	2008-12-31	11
-143	val_143	2008-12-31	11
-472	val_472	2008-12-31	11
-322	val_322	2008-12-31	11
-498	val_498	2008-12-31	11
-160	val_160	2008-12-31	11
-195	val_195	2008-12-31	11
-42	val_42	2008-12-31	11
-321	val_321	2008-12-31	11
-430	val_430	2008-12-31	11
-119	val_119	2008-12-31	11
-489	val_489	2008-12-31	11
-458	val_458	2008-12-31	11
-78	val_78	2008-12-31	11
-76	val_76	2008-12-31	11
-41	val_41	2008-12-31	11
-223	val_223	2008-12-31	11
-492	val_492	2008-12-31	11
-149	val_149	2008-12-31	11
-449	val_449	2008-12-31	11
-218	val_218	2008-12-31	11
-228	val_228	2008-12-31	11
-138	val_138	2008-12-31	11
-453	val_453	2008-12-31	11
-30	val_30	2008-12-31	11
-209	val_209	2008-12-31	11
-64	val_64	2008-12-31	11
-468	val_468	2008-12-31	11
-76	val_76	2008-12-31	11
-74	val_74	2008-12-31	11
-342	val_342	2008-12-31	11
-69	val_69	2008-12-31	11
-230	val_230	2008-12-31	11
-33	val_33	2008-12-31	11
-368	val_368	2008-12-31	11
-103	val_103	2008-12-31	11
-296	val_296	2008-12-31	11
-113	val_113	2008-12-31	11
-216	val_216	2008-12-31	11
-367	val_367	2008-12-31	11
-344	val_344	2008-12-31	11
-167	val_167	2008-12-31	11
-274	val_274	2008-12-31	11
-219	val_219	2008-12-31	11
-239	val_239	2008-12-31	11
-485	val_485	2008-12-31	11
-116	val_116	2008-12-31	11
-223	val_223	2008-12-31	11
-256	val_256	2008-12-31	11
-263	val_263	2008-12-31	11
-70	val_70	2008-12-31	11
-487	val_487	2008-12-31	11
-480	val_480	2008-12-31	11
-401	val_401	2008-12-31	11
-288	val_288	2008-12-31	11
-191	val_191	2008-12-31	11
-5	val_5	2008-12-31	11
-244	val_244	2008-12-31	11
-438	val_438	2008-12-31	11
-128	val_128	2008-12-31	11
-467	val_467	2008-12-31	11
-432	val_432	2008-12-31	11
-202	val_202	2008-12-31	11
-316	val_316	2008-12-31	11
-229	val_229	2008-12-31	11
-469	val_469	2008-12-31	11
-463	val_463	2008-12-31	11
-280	val_280	2008-12-31	11
-2	val_2	2008-12-31	11
-35	val_35	2008-12-31	11
-283	val_283	2008-12-31	11
-331	val_331	2008-12-31	11
-235	val_235	2008-12-31	11
-80	val_80	2008-12-31	11
-44	val_44	2008-12-31	11
-193	val_193	2008-12-31	11
-321	val_321	2008-12-31	11
-335	val_335	2008-12-31	11
-104	val_104	2008-12-31	11
-466	val_466	2008-12-31	11
-366	val_366	2008-12-31	11
-175	val_175	2008-12-31	11
-403	val_403	2008-12-31	11
-483	val_483	2008-12-31	11
-53	val_53	2008-12-31	11
-105	val_105	2008-12-31	11
-257	val_257	2008-12-31	11
-406	val_406	2008-12-31	11
-409	val_409	2008-12-31	11
-190	val_190	2008-12-31	11
-406	val_406	2008-12-31	11
-401	val_401	2008-12-31	11
-114	val_114	2008-12-31	11
-258	val_258	2008-12-31	11
-90	val_90	2008-12-31	11
-203	val_203	2008-12-31	11
-262	val_262	2008-12-31	11
-348	val_348	2008-12-31	11
-424	val_424	2008-12-31	11
-12	val_12	2008-12-31	11
-396	val_396	2008-12-31	11
-201	val_201	2008-12-31	11
-217	val_217	2008-12-31	11
-164	val_164	2008-12-31	11
-431	val_431	2008-12-31	11
-454	val_454	2008-12-31	11
-478	val_478	2008-12-31	11
-298	val_298	2008-12-31	11
-125	val_125	2008-12-31	11
-431	val_431	2008-12-31	11
-164	val_164	2008-12-31	11
-424	val_424	2008-12-31	11
-187	val_187	2008-12-31	11
-382	val_382	2008-12-31	11
-5	val_5	2008-12-31	11
-70	val_70	2008-12-31	11
-397	val_397	2008-12-31	11
-480	val_480	2008-12-31	11
-291	val_291	2008-12-31	11
-24	val_24	2008-12-31	11
-351	val_351	2008-12-31	11
-255	val_255	2008-12-31	11
-104	val_104	2008-12-31	11
-70	val_70	2008-12-31	11
-163	val_163	2008-12-31	11
-438	val_438	2008-12-31	11
-119	val_119	2008-12-31	11
-414	val_414	2008-12-31	11
-200	val_200	2008-12-31	11
-491	val_491	2008-12-31	11
-237	val_237	2008-12-31	11
-439	val_439	2008-12-31	11
-360	val_360	2008-12-31	11
-248	val_248	2008-12-31	11
-479	val_479	2008-12-31	11
-305	val_305	2008-12-31	11
-417	val_417	2008-12-31	11
-199	val_199	2008-12-31	11
-444	val_444	2008-12-31	11
-120	val_120	2008-12-31	11
-429	val_429	2008-12-31	11
-169	val_169	2008-12-31	11
-443	val_443	2008-12-31	11
-323	val_323	2008-12-31	11
-325	val_325	2008-12-31	11
-277	val_277	2008-12-31	11
-230	val_230	2008-12-31	11
-478	val_478	2008-12-31	11
-178	val_178	2008-12-31	11
-468	val_468	2008-12-31	11
-310	val_310	2008-12-31	11
-317	val_317	2008-12-31	11
-333	val_333	2008-12-31	11
-493	val_493	2008-12-31	11
-460	val_460	2008-12-31	11
-207	val_207	2008-12-31	11
-249	val_249	2008-12-31	11
-265	val_265	2008-12-31	11
-480	val_480	2008-12-31	11
-83	val_83	2008-12-31	11
-136	val_136	2008-12-31	11
-353	val_353	2008-12-31	11
-172	val_172	2008-12-31	11
-214	val_214	2008-12-31	11
-462	val_462	2008-12-31	11
-233	val_233	2008-12-31	11
-406	val_406	2008-12-31	11
-133	val_133	2008-12-31	11
-175	val_175	2008-12-31	11
-189	val_189	2008-12-31	11
-454	val_454	2008-12-31	11
-375	val_375	2008-12-31	11
-401	val_401	2008-12-31	11
-421	val_421	2008-12-31	11
-407	val_407	2008-12-31	11
-384	val_384	2008-12-31	11
-256	val_256	2008-12-31	11
-26	val_26	2008-12-31	11
-134	val_134	2008-12-31	11
-67	val_67	2008-12-31	11
-384	val_384	2008-12-31	11
-379	val_379	2008-12-31	11
-18	val_18	2008-12-31	11
-462	val_462	2008-12-31	11
-492	val_492	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-9	val_9	2008-12-31	11
-341	val_341	2008-12-31	11
-498	val_498	2008-12-31	11
-146	val_146	2008-12-31	11
-458	val_458	2008-12-31	11
-362	val_362	2008-12-31	11
-186	val_186	2008-12-31	11
-285	val_285	2008-12-31	11
-348	val_348	2008-12-31	11
-167	val_167	2008-12-31	11
-18	val_18	2008-12-31	11
-273	val_273	2008-12-31	11
-183	val_183	2008-12-31	11
-281	val_281	2008-12-31	11
-344	val_344	2008-12-31	11
-97	val_97	2008-12-31	11
-469	val_469	2008-12-31	11
-315	val_315	2008-12-31	11
-84	val_84	2008-12-31	11
-28	val_28	2008-12-31	11
-37	val_37	2008-12-31	11
-448	val_448	2008-12-31	11
-152	val_152	2008-12-31	11
-348	val_348	2008-12-31	11
-307	val_307	2008-12-31	11
-194	val_194	2008-12-31	11
-414	val_414	2008-12-31	11
-477	val_477	2008-12-31	11
-222	val_222	2008-12-31	11
-126	val_126	2008-12-31	11
-90	val_90	2008-12-31	11
-169	val_169	2008-12-31	11
-403	val_403	2008-12-31	11
-400	val_400	2008-12-31	11
-200	val_200	2008-12-31	11
-97	val_97	2008-12-31	11
-238	val_238	2008-12-31	12
-86	val_86	2008-12-31	12
-311	val_311	2008-12-31	12
-27	val_27	2008-12-31	12
-165	val_165	2008-12-31	12
-409	val_409	2008-12-31	12
-255	val_255	2008-12-31	12
-278	val_278	2008-12-31	12
-98	val_98	2008-12-31	12
-484	val_484	2008-12-31	12
-265	val_265	2008-12-31	12
-193	val_193	2008-12-31	12
-401	val_401	2008-12-31	12
-150	val_150	2008-12-31	12
-273	val_273	2008-12-31	12
-224	val_224	2008-12-31	12
-369	val_369	2008-12-31	12
-66	val_66	2008-12-31	12
-128	val_128	2008-12-31	12
-213	val_213	2008-12-31	12
-146	val_146	2008-12-31	12
-406	val_406	2008-12-31	12
-429	val_429	2008-12-31	12
-374	val_374	2008-12-31	12
-152	val_152	2008-12-31	12
-469	val_469	2008-12-31	12
-145	val_145	2008-12-31	12
-495	val_495	2008-12-31	12
-37	val_37	2008-12-31	12
-327	val_327	2008-12-31	12
-281	val_281	2008-12-31	12
-277	val_277	2008-12-31	12
-209	val_209	2008-12-31	12
-15	val_15	2008-12-31	12
-82	val_82	2008-12-31	12
-403	val_403	2008-12-31	12
-166	val_166	2008-12-31	12
-417	val_417	2008-12-31	12
-430	val_430	2008-12-31	12
-252	val_252	2008-12-31	12
-292	val_292	2008-12-31	12
-219	val_219	2008-12-31	12
-287	val_287	2008-12-31	12
-153	val_153	2008-12-31	12
-193	val_193	2008-12-31	12
-338	val_338	2008-12-31	12
-446	val_446	2008-12-31	12
-459	val_459	2008-12-31	12
-394	val_394	2008-12-31	12
-237	val_237	2008-12-31	12
-482	val_482	2008-12-31	12
-174	val_174	2008-12-31	12
-413	val_413	2008-12-31	12
-494	val_494	2008-12-31	12
-207	val_207	2008-12-31	12
-199	val_199	2008-12-31	12
-466	val_466	2008-12-31	12
-208	val_208	2008-12-31	12
-174	val_174	2008-12-31	12
-399	val_399	2008-12-31	12
-396	val_396	2008-12-31	12
-247	val_247	2008-12-31	12
-417	val_417	2008-12-31	12
-489	val_489	2008-12-31	12
-162	val_162	2008-12-31	12
-377	val_377	2008-12-31	12
-397	val_397	2008-12-31	12
-309	val_309	2008-12-31	12
-365	val_365	2008-12-31	12
-266	val_266	2008-12-31	12
-439	val_439	2008-12-31	12
-342	val_342	2008-12-31	12
-367	val_367	2008-12-31	12
-325	val_325	2008-12-31	12
-167	val_167	2008-12-31	12
-195	val_195	2008-12-31	12
-475	val_475	2008-12-31	12
-17	val_17	2008-12-31	12
-113	val_113	2008-12-31	12
-155	val_155	2008-12-31	12
-203	val_203	2008-12-31	12
-339	val_339	2008-12-31	12
-0	val_0	2008-12-31	12
-455	val_455	2008-12-31	12
-128	val_128	2008-12-31	12
-311	val_311	2008-12-31	12
-316	val_316	2008-12-31	12
-57	val_57	2008-12-31	12
-302	val_302	2008-12-31	12
-205	val_205	2008-12-31	12
-149	val_149	2008-12-31	12
-438	val_438	2008-12-31	12
-345	val_345	2008-12-31	12
-129	val_129	2008-12-31	12
-170	val_170	2008-12-31	12
-20	val_20	2008-12-31	12
-489	val_489	2008-12-31	12
-157	val_157	2008-12-31	12
-378	val_378	2008-12-31	12
-221	val_221	2008-12-31	12
-92	val_92	2008-12-31	12
-111	val_111	2008-12-31	12
-47	val_47	2008-12-31	12
-72	val_72	2008-12-31	12
-4	val_4	2008-12-31	12
-280	val_280	2008-12-31	12
-35	val_35	2008-12-31	12
-427	val_427	2008-12-31	12
-277	val_277	2008-12-31	12
-208	val_208	2008-12-31	12
-356	val_356	2008-12-31	12
-399	val_399	2008-12-31	12
-169	val_169	2008-12-31	12
-382	val_382	2008-12-31	12
-498	val_498	2008-12-31	12
-125	val_125	2008-12-31	12
-386	val_386	2008-12-31	12
-437	val_437	2008-12-31	12
-469	val_469	2008-12-31	12
-192	val_192	2008-12-31	12
-286	val_286	2008-12-31	12
-187	val_187	2008-12-31	12
-176	val_176	2008-12-31	12
-54	val_54	2008-12-31	12
-459	val_459	2008-12-31	12
-51	val_51	2008-12-31	12
-138	val_138	2008-12-31	12
-103	val_103	2008-12-31	12
-239	val_239	2008-12-31	12
-213	val_213	2008-12-31	12
-216	val_216	2008-12-31	12
-430	val_430	2008-12-31	12
-278	val_278	2008-12-31	12
-176	val_176	2008-12-31	12
-289	val_289	2008-12-31	12
-221	val_221	2008-12-31	12
-65	val_65	2008-12-31	12
-318	val_318	2008-12-31	12
-332	val_332	2008-12-31	12
-311	val_311	2008-12-31	12
-275	val_275	2008-12-31	12
-137	val_137	2008-12-31	12
-241	val_241	2008-12-31	12
-83	val_83	2008-12-31	12
-333	val_333	2008-12-31	12
-180	val_180	2008-12-31	12
-284	val_284	2008-12-31	12
-12	val_12	2008-12-31	12
-230	val_230	2008-12-31	12
-181	val_181	2008-12-31	12
-67	val_67	2008-12-31	12
-260	val_260	2008-12-31	12
-404	val_404	2008-12-31	12
-384	val_384	2008-12-31	12
-489	val_489	2008-12-31	12
-353	val_353	2008-12-31	12
-373	val_373	2008-12-31	12
-272	val_272	2008-12-31	12
-138	val_138	2008-12-31	12
-217	val_217	2008-12-31	12
-84	val_84	2008-12-31	12
-348	val_348	2008-12-31	12
-466	val_466	2008-12-31	12
-58	val_58	2008-12-31	12
-8	val_8	2008-12-31	12
-411	val_411	2008-12-31	12
-230	val_230	2008-12-31	12
-208	val_208	2008-12-31	12
-348	val_348	2008-12-31	12
-24	val_24	2008-12-31	12
-463	val_463	2008-12-31	12
-431	val_431	2008-12-31	12
-179	val_179	2008-12-31	12
-172	val_172	2008-12-31	12
-42	val_42	2008-12-31	12
-129	val_129	2008-12-31	12
-158	val_158	2008-12-31	12
-119	val_119	2008-12-31	12
-496	val_496	2008-12-31	12
-0	val_0	2008-12-31	12
-322	val_322	2008-12-31	12
-197	val_197	2008-12-31	12
-468	val_468	2008-12-31	12
-393	val_393	2008-12-31	12
-454	val_454	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-199	val_199	2008-12-31	12
-191	val_191	2008-12-31	12
-418	val_418	2008-12-31	12
-96	val_96	2008-12-31	12
-26	val_26	2008-12-31	12
-165	val_165	2008-12-31	12
-327	val_327	2008-12-31	12
-230	val_230	2008-12-31	12
-205	val_205	2008-12-31	12
-120	val_120	2008-12-31	12
-131	val_131	2008-12-31	12
-51	val_51	2008-12-31	12
-404	val_404	2008-12-31	12
-43	val_43	2008-12-31	12
-436	val_436	2008-12-31	12
-156	val_156	2008-12-31	12
-469	val_469	2008-12-31	12
-468	val_468	2008-12-31	12
-308	val_308	2008-12-31	12
-95	val_95	2008-12-31	12
-196	val_196	2008-12-31	12
-288	val_288	2008-12-31	12
-481	val_481	2008-12-31	12
-457	val_457	2008-12-31	12
-98	val_98	2008-12-31	12
-282	val_282	2008-12-31	12
-197	val_197	2008-12-31	12
-187	val_187	2008-12-31	12
-318	val_318	2008-12-31	12
-318	val_318	2008-12-31	12
-409	val_409	2008-12-31	12
-470	val_470	2008-12-31	12
-137	val_137	2008-12-31	12
-369	val_369	2008-12-31	12
-316	val_316	2008-12-31	12
-169	val_169	2008-12-31	12
-413	val_413	2008-12-31	12
-85	val_85	2008-12-31	12
-77	val_77	2008-12-31	12
-0	val_0	2008-12-31	12
-490	val_490	2008-12-31	12
-87	val_87	2008-12-31	12
-364	val_364	2008-12-31	12
-179	val_179	2008-12-31	12
-118	val_118	2008-12-31	12
-134	val_134	2008-12-31	12
-395	val_395	2008-12-31	12
-282	val_282	2008-12-31	12
-138	val_138	2008-12-31	12
-238	val_238	2008-12-31	12
-419	val_419	2008-12-31	12
-15	val_15	2008-12-31	12
-118	val_118	2008-12-31	12
-72	val_72	2008-12-31	12
-90	val_90	2008-12-31	12
-307	val_307	2008-12-31	12
-19	val_19	2008-12-31	12
-435	val_435	2008-12-31	12
-10	val_10	2008-12-31	12
-277	val_277	2008-12-31	12
-273	val_273	2008-12-31	12
-306	val_306	2008-12-31	12
-224	val_224	2008-12-31	12
-309	val_309	2008-12-31	12
-389	val_389	2008-12-31	12
-327	val_327	2008-12-31	12
-242	val_242	2008-12-31	12
-369	val_369	2008-12-31	12
-392	val_392	2008-12-31	12
-272	val_272	2008-12-31	12
-331	val_331	2008-12-31	12
-401	val_401	2008-12-31	12
-242	val_242	2008-12-31	12
-452	val_452	2008-12-31	12
-177	val_177	2008-12-31	12
-226	val_226	2008-12-31	12
-5	val_5	2008-12-31	12
-497	val_497	2008-12-31	12
-402	val_402	2008-12-31	12
-396	val_396	2008-12-31	12
-317	val_317	2008-12-31	12
-395	val_395	2008-12-31	12
-58	val_58	2008-12-31	12
-35	val_35	2008-12-31	12
-336	val_336	2008-12-31	12
-95	val_95	2008-12-31	12
-11	val_11	2008-12-31	12
-168	val_168	2008-12-31	12
-34	val_34	2008-12-31	12
-229	val_229	2008-12-31	12
-233	val_233	2008-12-31	12
-143	val_143	2008-12-31	12
-472	val_472	2008-12-31	12
-322	val_322	2008-12-31	12
-498	val_498	2008-12-31	12
-160	val_160	2008-12-31	12
-195	val_195	2008-12-31	12
-42	val_42	2008-12-31	12
-321	val_321	2008-12-31	12
-430	val_430	2008-12-31	12
-119	val_119	2008-12-31	12
-489	val_489	2008-12-31	12
-458	val_458	2008-12-31	12
-78	val_78	2008-12-31	12
-76	val_76	2008-12-31	12
-41	val_41	2008-12-31	12
-223	val_223	2008-12-31	12
-492	val_492	2008-12-31	12
-149	val_149	2008-12-31	12
-449	val_449	2008-12-31	12
-218	val_218	2008-12-31	12
-228	val_228	2008-12-31	12
-138	val_138	2008-12-31	12
-453	val_453	2008-12-31	12
-30	val_30	2008-12-31	12
-209	val_209	2008-12-31	12
-64	val_64	2008-12-31	12
-468	val_468	2008-12-31	12
-76	val_76	2008-12-31	12
-74	val_74	2008-12-31	12
-342	val_342	2008-12-31	12
-69	val_69	2008-12-31	12
-230	val_230	2008-12-31	12
-33	val_33	2008-12-31	12
-368	val_368	2008-12-31	12
-103	val_103	2008-12-31	12
-296	val_296	2008-12-31	12
-113	val_113	2008-12-31	12
-216	val_216	2008-12-31	12
-367	val_367	2008-12-31	12
-344	val_344	2008-12-31	12
-167	val_167	2008-12-31	12
-274	val_274	2008-12-31	12
-219	val_219	2008-12-31	12
-239	val_239	2008-12-31	12
-485	val_485	2008-12-31	12
-116	val_116	2008-12-31	12
-223	val_223	2008-12-31	12
-256	val_256	2008-12-31	12
-263	val_263	2008-12-31	12
-70	val_70	2008-12-31	12
-487	val_487	2008-12-31	12
-480	val_480	2008-12-31	12
-401	val_401	2008-12-31	12
-288	val_288	2008-12-31	12
-191	val_191	2008-12-31	12
-5	val_5	2008-12-31	12
-244	val_244	2008-12-31	12
-438	val_438	2008-12-31	12
-128	val_128	2008-12-31	12
-467	val_467	2008-12-31	12
-432	val_432	2008-12-31	12
-202	val_202	2008-12-31	12
-316	val_316	2008-12-31	12
-229	val_229	2008-12-31	12
-469	val_469	2008-12-31	12
-463	val_463	2008-12-31	12
-280	val_280	2008-12-31	12
-2	val_2	2008-12-31	12
-35	val_35	2008-12-31	12
-283	val_283	2008-12-31	12
-331	val_331	2008-12-31	12
-235	val_235	2008-12-31	12
-80	val_80	2008-12-31	12
-44	val_44	2008-12-31	12
-193	val_193	2008-12-31	12
-321	val_321	2008-12-31	12
-335	val_335	2008-12-31	12
-104	val_104	2008-12-31	12
-466	val_466	2008-12-31	12
-366	val_366	2008-12-31	12
-175	val_175	2008-12-31	12
-403	val_403	2008-12-31	12
-483	val_483	2008-12-31	12
-53	val_53	2008-12-31	12
-105	val_105	2008-12-31	12
-257	val_257	2008-12-31	12
-406	val_406	2008-12-31	12
-409	val_409	2008-12-31	12
-190	val_190	2008-12-31	12
-406	val_406	2008-12-31	12
-401	val_401	2008-12-31	12
-114	val_114	2008-12-31	12
-258	val_258	2008-12-31	12
-90	val_90	2008-12-31	12
-203	val_203	2008-12-31	12
-262	val_262	2008-12-31	12
-348	val_348	2008-12-31	12
-424	val_424	2008-12-31	12
-12	val_12	2008-12-31	12
-396	val_396	2008-12-31	12
-201	val_201	2008-12-31	12
-217	val_217	2008-12-31	12
-164	val_164	2008-12-31	12
-431	val_431	2008-12-31	12
-454	val_454	2008-12-31	12
-478	val_478	2008-12-31	12
-298	val_298	2008-12-31	12
-125	val_125	2008-12-31	12
-431	val_431	2008-12-31	12
-164	val_164	2008-12-31	12
-424	val_424	2008-12-31	12
-187	val_187	2008-12-31	12
-382	val_382	2008-12-31	12
-5	val_5	2008-12-31	12
-70	val_70	2008-12-31	12
-397	val_397	2008-12-31	12
-480	val_480	2008-12-31	12
-291	val_291	2008-12-31	12
-24	val_24	2008-12-31	12
-351	val_351	2008-12-31	12
-255	val_255	2008-12-31	12
-104	val_104	2008-12-31	12
-70	val_70	2008-12-31	12
-163	val_163	2008-12-31	12
-438	val_438	2008-12-31	12
-119	val_119	2008-12-31	12
-414	val_414	2008-12-31	12
-200	val_200	2008-12-31	12
-491	val_491	2008-12-31	12
-237	val_237	2008-12-31	12
-439	val_439	2008-12-31	12
-360	val_360	2008-12-31	12
-248	val_248	2008-12-31	12
-479	val_479	2008-12-31	12
-305	val_305	2008-12-31	12
-417	val_417	2008-12-31	12
-199	val_199	2008-12-31	12
-444	val_444	2008-12-31	12
-120	val_120	2008-12-31	12
-429	val_429	2008-12-31	12
-169	val_169	2008-12-31	12
-443	val_443	2008-12-31	12
-323	val_323	2008-12-31	12
-325	val_325	2008-12-31	12
-277	val_277	2008-12-31	12
-230	val_230	2008-12-31	12
-478	val_478	2008-12-31	12
-178	val_178	2008-12-31	12
-468	val_468	2008-12-31	12
-310	val_310	2008-12-31	12
-317	val_317	2008-12-31	12
-333	val_333	2008-12-31	12
-493	val_493	2008-12-31	12
-460	val_460	2008-12-31	12
-207	val_207	2008-12-31	12
-249	val_249	2008-12-31	12
-265	val_265	2008-12-31	12
-480	val_480	2008-12-31	12
-83	val_83	2008-12-31	12
-136	val_136	2008-12-31	12
-353	val_353	2008-12-31	12
-172	val_172	2008-12-31	12
-214	val_214	2008-12-31	12
-462	val_462	2008-12-31	12
-233	val_233	2008-12-31	12
-406	val_406	2008-12-31	12
-133	val_133	2008-12-31	12
-175	val_175	2008-12-31	12
-189	val_189	2008-12-31	12
-454	val_454	2008-12-31	12
-375	val_375	2008-12-31	12
-401	val_401	2008-12-31	12
-421	val_421	2008-12-31	12
-407	val_407	2008-12-31	12
-384	val_384	2008-12-31	12
-256	val_256	2008-12-31	12
-26	val_26	2008-12-31	12
-134	val_134	2008-12-31	12
-67	val_67	2008-12-31	12
-384	val_384	2008-12-31	12
-379	val_379	2008-12-31	12
-18	val_18	2008-12-31	12
-462	val_462	2008-12-31	12
-492	val_492	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-9	val_9	2008-12-31	12
-341	val_341	2008-12-31	12
-498	val_498	2008-12-31	12
-146	val_146	2008-12-31	12
-458	val_458	2008-12-31	12
-362	val_362	2008-12-31	12
-186	val_186	2008-12-31	12
-285	val_285	2008-12-31	12
-348	val_348	2008-12-31	12
-167	val_167	2008-12-31	12
-18	val_18	2008-12-31	12
-273	val_273	2008-12-31	12
-183	val_183	2008-12-31	12
-281	val_281	2008-12-31	12
-344	val_344	2008-12-31	12
-97	val_97	2008-12-31	12
-469	val_469	2008-12-31	12
-315	val_315	2008-12-31	12
-84	val_84	2008-12-31	12
-28	val_28	2008-12-31	12
-37	val_37	2008-12-31	12
-448	val_448	2008-12-31	12
-152	val_152	2008-12-31	12
-348	val_348	2008-12-31	12
-307	val_307	2008-12-31	12
-194	val_194	2008-12-31	12
-414	val_414	2008-12-31	12
-477	val_477	2008-12-31	12
-222	val_222	2008-12-31	12
-126	val_126	2008-12-31	12
-90	val_90	2008-12-31	12
-169	val_169	2008-12-31	12
-403	val_403	2008-12-31	12
-400	val_400	2008-12-31	12
-200	val_200	2008-12-31	12
-97	val_97	2008-12-31	12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-0-463330cf55370dbe92d6ed74ef91302 b/sql/hive/src/test/resources/golden/load_dyn_part11-0-463330cf55370dbe92d6ed74ef91302
index e9c723bbd136e..8c43153cf66f9 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-0-463330cf55370dbe92d6ed74ef91302
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-0-463330cf55370dbe92d6ed74ef91302
@@ -1,4 +1,4 @@
 ds=2008-04-08/hr=11
 ds=2008-04-08/hr=12
 ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
+ds=2008-04-09/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-2-4301f87a8dbf9576788637386e26f9a2 b/sql/hive/src/test/resources/golden/load_dyn_part11-2-4301f87a8dbf9576788637386e26f9a2
index 6aa67737fa92d..36add9b0fc2ce 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-2-4301f87a8dbf9576788637386e26f9a2
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-2-4301f87a8dbf9576788637386e26f9a2
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part, dbName:default, owner:marmbrus, createTime:1389738838, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2847673635801001933/nzhang_part, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1389738838}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part, dbName:default, owner:marmbrus, createTime:1413887427, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887427}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/load_dyn_part11-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/load_dyn_part11-4-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-4-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-4-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-5-ea607fbed28d20e5726f4501285d698d b/sql/hive/src/test/resources/golden/load_dyn_part11-5-ea607fbed28d20e5726f4501285d698d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-5-ea607fbed28d20e5726f4501285d698d
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-5-ea607fbed28d20e5726f4501285d698d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-6-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part11-6-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-6-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-6-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-8-9a4433518ac9ff49cb4b71812705adbb b/sql/hive/src/test/resources/golden/load_dyn_part11-8-9a4433518ac9ff49cb4b71812705adbb
index 29ade2aff0542..7df5f90186db3 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-8-9a4433518ac9ff49cb4b71812705adbb
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-8-9a4433518ac9ff49cb4b71812705adbb
@@ -997,4 +997,4 @@
 403	val_403	2010-03-03	11
 400	val_400	2010-03-03	11
 200	val_200	2010-03-03	11
-97	val_97	2010-03-03	11
\ No newline at end of file
+97	val_97	2010-03-03	11
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-9-3889a0cba1cf3c8f8c2e67957e69406a b/sql/hive/src/test/resources/golden/load_dyn_part11-9-3889a0cba1cf3c8f8c2e67957e69406a
index b7b1b914db857..c869646753baa 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-9-3889a0cba1cf3c8f8c2e67957e69406a
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-9-3889a0cba1cf3c8f8c2e67957e69406a
@@ -997,4 +997,4 @@
 403	val_403	2010-03-03	12
 400	val_400	2010-03-03	12
 200	val_200	2010-03-03	12
-97	val_97	2010-03-03	12
\ No newline at end of file
+97	val_97	2010-03-03	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-0-463330cf55370dbe92d6ed74ef91302 b/sql/hive/src/test/resources/golden/load_dyn_part12-0-463330cf55370dbe92d6ed74ef91302
index e9c723bbd136e..8c43153cf66f9 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-0-463330cf55370dbe92d6ed74ef91302
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-0-463330cf55370dbe92d6ed74ef91302
@@ -1,4 +1,4 @@
 ds=2008-04-08/hr=11
 ds=2008-04-08/hr=12
 ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
+ds=2008-04-09/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-2-4a10b19bdc478379bb8c8c9e5fe52c9b b/sql/hive/src/test/resources/golden/load_dyn_part12-2-4a10b19bdc478379bb8c8c9e5fe52c9b
index 0c283c5378a6f..d42b4efb2cac8 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-2-4a10b19bdc478379bb8c8c9e5fe52c9b
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-2-4a10b19bdc478379bb8c8c9e5fe52c9b
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part12, dbName:default, owner:marmbrus, createTime:1389738821, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1431818582215388621/nzhang_part12, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1389738821}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part12, dbName:default, owner:marmbrus, createTime:1413887435, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part12, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887435}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/load_dyn_part12-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/load_dyn_part12-4-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-4-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-4-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-5-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part12-5-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-5-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-5-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-7-fd1422a86c9b12ce915a7fa2f2b7fc97 b/sql/hive/src/test/resources/golden/load_dyn_part12-7-fd1422a86c9b12ce915a7fa2f2b7fc97
index 8eb67466a7a89..9c35fe7429150 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-7-fd1422a86c9b12ce915a7fa2f2b7fc97
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-7-fd1422a86c9b12ce915a7fa2f2b7fc97
@@ -1,2 +1,2 @@
 ds=2010-03-03/hr=22
-ds=2010-03-03/hr=24
\ No newline at end of file
+ds=2010-03-03/hr=24
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-8-fd656b581b8f8fbb8ac22f444dbc345 b/sql/hive/src/test/resources/golden/load_dyn_part12-8-fd656b581b8f8fbb8ac22f444dbc345
index 9f50245c18177..861dc02d49d02 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-8-fd656b581b8f8fbb8ac22f444dbc345
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-8-fd656b581b8f8fbb8ac22f444dbc345
@@ -1997,4 +1997,4 @@
 403	val_403	2010-03-03	24
 400	val_400	2010-03-03	24
 200	val_200	2010-03-03	24
-97	val_97	2010-03-03	24
\ No newline at end of file
+97	val_97	2010-03-03	24
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-0-463330cf55370dbe92d6ed74ef91302 b/sql/hive/src/test/resources/golden/load_dyn_part13-0-463330cf55370dbe92d6ed74ef91302
index e9c723bbd136e..8c43153cf66f9 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-0-463330cf55370dbe92d6ed74ef91302
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-0-463330cf55370dbe92d6ed74ef91302
@@ -1,4 +1,4 @@
 ds=2008-04-08/hr=11
 ds=2008-04-08/hr=12
 ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
+ds=2008-04-09/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-2-d52536b8ac62f6e8152e394fe135a3e0 b/sql/hive/src/test/resources/golden/load_dyn_part13-2-d52536b8ac62f6e8152e394fe135a3e0
index 08699f286e384..06a16f6477cfa 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-2-d52536b8ac62f6e8152e394fe135a3e0
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-2-d52536b8ac62f6e8152e394fe135a3e0
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part13, dbName:default, owner:marmbrus, createTime:1389739606, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5344690580869150883/nzhang_part13, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1389739606}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part13, dbName:default, owner:marmbrus, createTime:1413887445, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part13, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887445}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/load_dyn_part13-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/load_dyn_part13-4-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-4-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-4-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-5-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part13-5-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-5-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-5-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-8-930d8e7a94f0cdf922322dae4a02e16 b/sql/hive/src/test/resources/golden/load_dyn_part13-8-930d8e7a94f0cdf922322dae4a02e16
index dfe9bcc93bcd3..c00f33327744a 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-8-930d8e7a94f0cdf922322dae4a02e16
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-8-930d8e7a94f0cdf922322dae4a02e16
@@ -1,2 +1,2 @@
 ds=2010-03-03/hr=22
-ds=2010-03-03/hr=33
\ No newline at end of file
+ds=2010-03-03/hr=33
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-9-c8de411bc094b37b59a2eb0baf6de55d b/sql/hive/src/test/resources/golden/load_dyn_part13-9-c8de411bc094b37b59a2eb0baf6de55d
index 88fe75804e584..b349a406e5e69 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-9-c8de411bc094b37b59a2eb0baf6de55d
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-9-c8de411bc094b37b59a2eb0baf6de55d
@@ -31,4 +31,4 @@
 24	val_24	2010-03-03	33
 26	val_26	2010-03-03	33
 28	val_28	2010-03-03	33
-37	val_37	2010-03-03	33
\ No newline at end of file
+37	val_37	2010-03-03	33
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14-1-253e2a15bfaef9aa781dc29fa324b51e b/sql/hive/src/test/resources/golden/load_dyn_part14-1-253e2a15bfaef9aa781dc29fa324b51e
index 170e3b095c5a9..0a50eb00d58d7 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14-1-253e2a15bfaef9aa781dc29fa324b51e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14-1-253e2a15bfaef9aa781dc29fa324b51e
@@ -1,9 +1,9 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-value               	string              	None                
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part14, dbName:default, owner:marmbrus, createTime:1389739459, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1999157106458385464/nzhang_part14, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1389739459}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part14, dbName:default, owner:marmbrus, createTime:1413887453, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part14, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887453}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part14-2-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14-2-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14-2-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/load_dyn_part14-3-16367c381d4b189b3640c92511244bfe
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14-3-16367c381d4b189b3640c92511244bfe
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14-3-16367c381d4b189b3640c92511244bfe
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14-6-3fd73cd9f8f9b991b7e72405c00cf5d6 b/sql/hive/src/test/resources/golden/load_dyn_part14-6-3fd73cd9f8f9b991b7e72405c00cf5d6
index 538ed22d29976..18b3cb0d99a4a 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14-6-3fd73cd9f8f9b991b7e72405c00cf5d6
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14-6-3fd73cd9f8f9b991b7e72405c00cf5d6
@@ -1,2 +1,2 @@
 value= 
-value=__HIVE_DEFAULT_PARTITION__
\ No newline at end of file
+value=__HIVE_DEFAULT_PARTITION__
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14-7-7c931249465f330d51ef0610f214429e b/sql/hive/src/test/resources/golden/load_dyn_part14-7-7c931249465f330d51ef0610f214429e
index 9e1bc82fe15f9..bdbb3fb9b611f 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14-7-7c931249465f330d51ef0610f214429e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14-7-7c931249465f330d51ef0610f214429e
@@ -3,4 +3,4 @@ k1	__HIVE_DEFAULT_PARTITION__
 k2	__HIVE_DEFAULT_PARTITION__
 k2	__HIVE_DEFAULT_PARTITION__
 k3	 
-k3	 
\ No newline at end of file
+k3	 
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14_win-1-253e2a15bfaef9aa781dc29fa324b51e b/sql/hive/src/test/resources/golden/load_dyn_part14_win-1-253e2a15bfaef9aa781dc29fa324b51e
index df0090edb6b9e..c51eca2423434 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14_win-1-253e2a15bfaef9aa781dc29fa324b51e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14_win-1-253e2a15bfaef9aa781dc29fa324b51e
@@ -1,9 +1,9 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-value               	string              	None                
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part14, dbName:default, owner:marmbrus, createTime:1389738860, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2847673635801001933/nzhang_part14, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1389738860}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part14, dbName:default, owner:marmbrus, createTime:1413887510, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part14, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887510}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14_win-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part14_win-2-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14_win-2-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14_win-2-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14_win-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/load_dyn_part14_win-3-16367c381d4b189b3640c92511244bfe
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14_win-3-16367c381d4b189b3640c92511244bfe
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14_win-3-16367c381d4b189b3640c92511244bfe
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14_win-6-3fd73cd9f8f9b991b7e72405c00cf5d6 b/sql/hive/src/test/resources/golden/load_dyn_part14_win-6-3fd73cd9f8f9b991b7e72405c00cf5d6
index 538ed22d29976..18b3cb0d99a4a 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14_win-6-3fd73cd9f8f9b991b7e72405c00cf5d6
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14_win-6-3fd73cd9f8f9b991b7e72405c00cf5d6
@@ -1,2 +1,2 @@
 value= 
-value=__HIVE_DEFAULT_PARTITION__
\ No newline at end of file
+value=__HIVE_DEFAULT_PARTITION__
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14_win-7-7c931249465f330d51ef0610f214429e b/sql/hive/src/test/resources/golden/load_dyn_part14_win-7-7c931249465f330d51ef0610f214429e
index 9e1bc82fe15f9..bdbb3fb9b611f 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14_win-7-7c931249465f330d51ef0610f214429e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14_win-7-7c931249465f330d51ef0610f214429e
@@ -3,4 +3,4 @@ k1	__HIVE_DEFAULT_PARTITION__
 k2	__HIVE_DEFAULT_PARTITION__
 k2	__HIVE_DEFAULT_PARTITION__
 k3	 
-k3	 
\ No newline at end of file
+k3	 
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-2-1379abc2de057dc6d240a526f0dd8a3c b/sql/hive/src/test/resources/golden/load_dyn_part15-2-1379abc2de057dc6d240a526f0dd8a3c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-3-9940fad8d8590e60bd726e9503ae4fa9 b/sql/hive/src/test/resources/golden/load_dyn_part15-3-9940fad8d8590e60bd726e9503ae4fa9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-4-fa01199bab3e6aa47a82c9aec88aa76a b/sql/hive/src/test/resources/golden/load_dyn_part15-4-fa01199bab3e6aa47a82c9aec88aa76a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-5-9940fad8d8590e60bd726e9503ae4fa9 b/sql/hive/src/test/resources/golden/load_dyn_part15-5-9940fad8d8590e60bd726e9503ae4fa9
deleted file mode 100644
index 99a66d603300d..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part15-5-9940fad8d8590e60bd726e9503ae4fa9
+++ /dev/null
@@ -1,3 +0,0 @@
-part_key=%7B2
-part_key=1
-part_key=3%5D
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-1-845923af04bb331c9f5995a3a3e84424 b/sql/hive/src/test/resources/golden/load_dyn_part2-1-845923af04bb331c9f5995a3a3e84424
index def850839a47b..5bdf30a775a6e 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-1-845923af04bb331c9f5995a3a3e84424
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-1-845923af04bb331c9f5995a3a3e84424
@@ -1,12 +1,12 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part_bucket, dbName:default, owner:marmbrus, createTime:1389739342, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/nzhang_part_bucket, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1389739342}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part_bucket, dbName:default, owner:marmbrus, createTime:1413887567, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part_bucket, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887567}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-2-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/load_dyn_part2-2-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-2-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-2-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/load_dyn_part2-3-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-3-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-4-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part2-4-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-4-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-4-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-7-86ffa99b03fa88235b61bf1af7062c33 b/sql/hive/src/test/resources/golden/load_dyn_part2-7-86ffa99b03fa88235b61bf1af7062c33
index 1f345dad614ad..7342c15206a35 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-7-86ffa99b03fa88235b61bf1af7062c33
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-7-86ffa99b03fa88235b61bf1af7062c33
@@ -1,2 +1,2 @@
 ds=2010-03-23/hr=11
-ds=2010-03-23/hr=12
\ No newline at end of file
+ds=2010-03-23/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-8-a1ff8a12d94378e7e1165bd78cf724cf b/sql/hive/src/test/resources/golden/load_dyn_part2-8-a1ff8a12d94378e7e1165bd78cf724cf
index 5f936fa91d2f5..ceb6e5f13d3c5 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-8-a1ff8a12d94378e7e1165bd78cf724cf
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-8-a1ff8a12d94378e7e1165bd78cf724cf
@@ -997,4 +997,4 @@
 98	val_98	2010-03-23	11
 98	val_98	2010-03-23	11
 98	val_98	2010-03-23	11
-98	val_98	2010-03-23	11
\ No newline at end of file
+98	val_98	2010-03-23	11
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-9-3f29de9877006f9448272ef2422d6132 b/sql/hive/src/test/resources/golden/load_dyn_part2-9-3f29de9877006f9448272ef2422d6132
index 45c45d0082ee3..bae05fd209e81 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-9-3f29de9877006f9448272ef2422d6132
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-9-3f29de9877006f9448272ef2422d6132
@@ -997,4 +997,4 @@
 98	val_98	2010-03-23	12
 98	val_98	2010-03-23	12
 98	val_98	2010-03-23	12
-98	val_98	2010-03-23	12
\ No newline at end of file
+98	val_98	2010-03-23	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part3-2-dbbba335c008a61a13c1472b34470397 b/sql/hive/src/test/resources/golden/load_dyn_part3-2-dbbba335c008a61a13c1472b34470397
index d35fbec80c19e..8dbbd4321c367 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part3-2-dbbba335c008a61a13c1472b34470397
+++ b/sql/hive/src/test/resources/golden/load_dyn_part3-2-dbbba335c008a61a13c1472b34470397
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part3, dbName:default, owner:marmbrus, createTime:1390899609, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part3, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899609}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part3, dbName:default, owner:marmbrus, createTime:1413887598, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part3, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887598}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part3-8-c32770da4784bfea4d0dd77fdcba4f0a b/sql/hive/src/test/resources/golden/load_dyn_part3-8-c32770da4784bfea4d0dd77fdcba4f0a
new file mode 100644
index 0000000000000..c38f4300e4058
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part3-8-c32770da4784bfea4d0dd77fdcba4f0a
@@ -0,0 +1,2000 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+0	val_0	2008-04-09	11
+0	val_0	2008-04-09	11
+0	val_0	2008-04-09	11
+2	val_2	2008-04-09	11
+4	val_4	2008-04-09	11
+5	val_5	2008-04-09	11
+5	val_5	2008-04-09	11
+5	val_5	2008-04-09	11
+8	val_8	2008-04-09	11
+9	val_9	2008-04-09	11
+10	val_10	2008-04-09	11
+11	val_11	2008-04-09	11
+12	val_12	2008-04-09	11
+12	val_12	2008-04-09	11
+15	val_15	2008-04-09	11
+15	val_15	2008-04-09	11
+17	val_17	2008-04-09	11
+18	val_18	2008-04-09	11
+18	val_18	2008-04-09	11
+19	val_19	2008-04-09	11
+20	val_20	2008-04-09	11
+24	val_24	2008-04-09	11
+24	val_24	2008-04-09	11
+26	val_26	2008-04-09	11
+26	val_26	2008-04-09	11
+27	val_27	2008-04-09	11
+28	val_28	2008-04-09	11
+30	val_30	2008-04-09	11
+33	val_33	2008-04-09	11
+34	val_34	2008-04-09	11
+35	val_35	2008-04-09	11
+35	val_35	2008-04-09	11
+35	val_35	2008-04-09	11
+37	val_37	2008-04-09	11
+37	val_37	2008-04-09	11
+41	val_41	2008-04-09	11
+42	val_42	2008-04-09	11
+42	val_42	2008-04-09	11
+43	val_43	2008-04-09	11
+44	val_44	2008-04-09	11
+47	val_47	2008-04-09	11
+51	val_51	2008-04-09	11
+51	val_51	2008-04-09	11
+53	val_53	2008-04-09	11
+54	val_54	2008-04-09	11
+57	val_57	2008-04-09	11
+58	val_58	2008-04-09	11
+58	val_58	2008-04-09	11
+64	val_64	2008-04-09	11
+65	val_65	2008-04-09	11
+66	val_66	2008-04-09	11
+67	val_67	2008-04-09	11
+67	val_67	2008-04-09	11
+69	val_69	2008-04-09	11
+70	val_70	2008-04-09	11
+70	val_70	2008-04-09	11
+70	val_70	2008-04-09	11
+72	val_72	2008-04-09	11
+72	val_72	2008-04-09	11
+74	val_74	2008-04-09	11
+76	val_76	2008-04-09	11
+76	val_76	2008-04-09	11
+77	val_77	2008-04-09	11
+78	val_78	2008-04-09	11
+80	val_80	2008-04-09	11
+82	val_82	2008-04-09	11
+83	val_83	2008-04-09	11
+83	val_83	2008-04-09	11
+84	val_84	2008-04-09	11
+84	val_84	2008-04-09	11
+85	val_85	2008-04-09	11
+86	val_86	2008-04-09	11
+87	val_87	2008-04-09	11
+90	val_90	2008-04-09	11
+90	val_90	2008-04-09	11
+90	val_90	2008-04-09	11
+92	val_92	2008-04-09	11
+95	val_95	2008-04-09	11
+95	val_95	2008-04-09	11
+96	val_96	2008-04-09	11
+97	val_97	2008-04-09	11
+97	val_97	2008-04-09	11
+98	val_98	2008-04-09	11
+98	val_98	2008-04-09	11
+100	val_100	2008-04-09	11
+100	val_100	2008-04-09	11
+103	val_103	2008-04-09	11
+103	val_103	2008-04-09	11
+104	val_104	2008-04-09	11
+104	val_104	2008-04-09	11
+105	val_105	2008-04-09	11
+111	val_111	2008-04-09	11
+113	val_113	2008-04-09	11
+113	val_113	2008-04-09	11
+114	val_114	2008-04-09	11
+116	val_116	2008-04-09	11
+118	val_118	2008-04-09	11
+118	val_118	2008-04-09	11
+119	val_119	2008-04-09	11
+119	val_119	2008-04-09	11
+119	val_119	2008-04-09	11
+120	val_120	2008-04-09	11
+120	val_120	2008-04-09	11
+125	val_125	2008-04-09	11
+125	val_125	2008-04-09	11
+126	val_126	2008-04-09	11
+128	val_128	2008-04-09	11
+128	val_128	2008-04-09	11
+128	val_128	2008-04-09	11
+129	val_129	2008-04-09	11
+129	val_129	2008-04-09	11
+131	val_131	2008-04-09	11
+133	val_133	2008-04-09	11
+134	val_134	2008-04-09	11
+134	val_134	2008-04-09	11
+136	val_136	2008-04-09	11
+137	val_137	2008-04-09	11
+137	val_137	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+143	val_143	2008-04-09	11
+145	val_145	2008-04-09	11
+146	val_146	2008-04-09	11
+146	val_146	2008-04-09	11
+149	val_149	2008-04-09	11
+149	val_149	2008-04-09	11
+150	val_150	2008-04-09	11
+152	val_152	2008-04-09	11
+152	val_152	2008-04-09	11
+153	val_153	2008-04-09	11
+155	val_155	2008-04-09	11
+156	val_156	2008-04-09	11
+157	val_157	2008-04-09	11
+158	val_158	2008-04-09	11
+160	val_160	2008-04-09	11
+162	val_162	2008-04-09	11
+163	val_163	2008-04-09	11
+164	val_164	2008-04-09	11
+164	val_164	2008-04-09	11
+165	val_165	2008-04-09	11
+165	val_165	2008-04-09	11
+166	val_166	2008-04-09	11
+167	val_167	2008-04-09	11
+167	val_167	2008-04-09	11
+167	val_167	2008-04-09	11
+168	val_168	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+170	val_170	2008-04-09	11
+172	val_172	2008-04-09	11
+172	val_172	2008-04-09	11
+174	val_174	2008-04-09	11
+174	val_174	2008-04-09	11
+175	val_175	2008-04-09	11
+175	val_175	2008-04-09	11
+176	val_176	2008-04-09	11
+176	val_176	2008-04-09	11
+177	val_177	2008-04-09	11
+178	val_178	2008-04-09	11
+179	val_179	2008-04-09	11
+179	val_179	2008-04-09	11
+180	val_180	2008-04-09	11
+181	val_181	2008-04-09	11
+183	val_183	2008-04-09	11
+186	val_186	2008-04-09	11
+187	val_187	2008-04-09	11
+187	val_187	2008-04-09	11
+187	val_187	2008-04-09	11
+189	val_189	2008-04-09	11
+190	val_190	2008-04-09	11
+191	val_191	2008-04-09	11
+191	val_191	2008-04-09	11
+192	val_192	2008-04-09	11
+193	val_193	2008-04-09	11
+193	val_193	2008-04-09	11
+193	val_193	2008-04-09	11
+194	val_194	2008-04-09	11
+195	val_195	2008-04-09	11
+195	val_195	2008-04-09	11
+196	val_196	2008-04-09	11
+197	val_197	2008-04-09	11
+197	val_197	2008-04-09	11
+199	val_199	2008-04-09	11
+199	val_199	2008-04-09	11
+199	val_199	2008-04-09	11
+200	val_200	2008-04-09	11
+200	val_200	2008-04-09	11
+201	val_201	2008-04-09	11
+202	val_202	2008-04-09	11
+203	val_203	2008-04-09	11
+203	val_203	2008-04-09	11
+205	val_205	2008-04-09	11
+205	val_205	2008-04-09	11
+207	val_207	2008-04-09	11
+207	val_207	2008-04-09	11
+208	val_208	2008-04-09	11
+208	val_208	2008-04-09	11
+208	val_208	2008-04-09	11
+209	val_209	2008-04-09	11
+209	val_209	2008-04-09	11
+213	val_213	2008-04-09	11
+213	val_213	2008-04-09	11
+214	val_214	2008-04-09	11
+216	val_216	2008-04-09	11
+216	val_216	2008-04-09	11
+217	val_217	2008-04-09	11
+217	val_217	2008-04-09	11
+218	val_218	2008-04-09	11
+219	val_219	2008-04-09	11
+219	val_219	2008-04-09	11
+221	val_221	2008-04-09	11
+221	val_221	2008-04-09	11
+222	val_222	2008-04-09	11
+223	val_223	2008-04-09	11
+223	val_223	2008-04-09	11
+224	val_224	2008-04-09	11
+224	val_224	2008-04-09	11
+226	val_226	2008-04-09	11
+228	val_228	2008-04-09	11
+229	val_229	2008-04-09	11
+229	val_229	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+233	val_233	2008-04-09	11
+233	val_233	2008-04-09	11
+235	val_235	2008-04-09	11
+237	val_237	2008-04-09	11
+237	val_237	2008-04-09	11
+238	val_238	2008-04-09	11
+238	val_238	2008-04-09	11
+239	val_239	2008-04-09	11
+239	val_239	2008-04-09	11
+241	val_241	2008-04-09	11
+242	val_242	2008-04-09	11
+242	val_242	2008-04-09	11
+244	val_244	2008-04-09	11
+247	val_247	2008-04-09	11
+248	val_248	2008-04-09	11
+249	val_249	2008-04-09	11
+252	val_252	2008-04-09	11
+255	val_255	2008-04-09	11
+255	val_255	2008-04-09	11
+256	val_256	2008-04-09	11
+256	val_256	2008-04-09	11
+257	val_257	2008-04-09	11
+258	val_258	2008-04-09	11
+260	val_260	2008-04-09	11
+262	val_262	2008-04-09	11
+263	val_263	2008-04-09	11
+265	val_265	2008-04-09	11
+265	val_265	2008-04-09	11
+266	val_266	2008-04-09	11
+272	val_272	2008-04-09	11
+272	val_272	2008-04-09	11
+273	val_273	2008-04-09	11
+273	val_273	2008-04-09	11
+273	val_273	2008-04-09	11
+274	val_274	2008-04-09	11
+275	val_275	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+278	val_278	2008-04-09	11
+278	val_278	2008-04-09	11
+280	val_280	2008-04-09	11
+280	val_280	2008-04-09	11
+281	val_281	2008-04-09	11
+281	val_281	2008-04-09	11
+282	val_282	2008-04-09	11
+282	val_282	2008-04-09	11
+283	val_283	2008-04-09	11
+284	val_284	2008-04-09	11
+285	val_285	2008-04-09	11
+286	val_286	2008-04-09	11
+287	val_287	2008-04-09	11
+288	val_288	2008-04-09	11
+288	val_288	2008-04-09	11
+289	val_289	2008-04-09	11
+291	val_291	2008-04-09	11
+292	val_292	2008-04-09	11
+296	val_296	2008-04-09	11
+298	val_298	2008-04-09	11
+298	val_298	2008-04-09	11
+298	val_298	2008-04-09	11
+302	val_302	2008-04-09	11
+305	val_305	2008-04-09	11
+306	val_306	2008-04-09	11
+307	val_307	2008-04-09	11
+307	val_307	2008-04-09	11
+308	val_308	2008-04-09	11
+309	val_309	2008-04-09	11
+309	val_309	2008-04-09	11
+310	val_310	2008-04-09	11
+311	val_311	2008-04-09	11
+311	val_311	2008-04-09	11
+311	val_311	2008-04-09	11
+315	val_315	2008-04-09	11
+316	val_316	2008-04-09	11
+316	val_316	2008-04-09	11
+316	val_316	2008-04-09	11
+317	val_317	2008-04-09	11
+317	val_317	2008-04-09	11
+318	val_318	2008-04-09	11
+318	val_318	2008-04-09	11
+318	val_318	2008-04-09	11
+321	val_321	2008-04-09	11
+321	val_321	2008-04-09	11
+322	val_322	2008-04-09	11
+322	val_322	2008-04-09	11
+323	val_323	2008-04-09	11
+325	val_325	2008-04-09	11
+325	val_325	2008-04-09	11
+327	val_327	2008-04-09	11
+327	val_327	2008-04-09	11
+327	val_327	2008-04-09	11
+331	val_331	2008-04-09	11
+331	val_331	2008-04-09	11
+332	val_332	2008-04-09	11
+333	val_333	2008-04-09	11
+333	val_333	2008-04-09	11
+335	val_335	2008-04-09	11
+336	val_336	2008-04-09	11
+338	val_338	2008-04-09	11
+339	val_339	2008-04-09	11
+341	val_341	2008-04-09	11
+342	val_342	2008-04-09	11
+342	val_342	2008-04-09	11
+344	val_344	2008-04-09	11
+344	val_344	2008-04-09	11
+345	val_345	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+351	val_351	2008-04-09	11
+353	val_353	2008-04-09	11
+353	val_353	2008-04-09	11
+356	val_356	2008-04-09	11
+360	val_360	2008-04-09	11
+362	val_362	2008-04-09	11
+364	val_364	2008-04-09	11
+365	val_365	2008-04-09	11
+366	val_366	2008-04-09	11
+367	val_367	2008-04-09	11
+367	val_367	2008-04-09	11
+368	val_368	2008-04-09	11
+369	val_369	2008-04-09	11
+369	val_369	2008-04-09	11
+369	val_369	2008-04-09	11
+373	val_373	2008-04-09	11
+374	val_374	2008-04-09	11
+375	val_375	2008-04-09	11
+377	val_377	2008-04-09	11
+378	val_378	2008-04-09	11
+379	val_379	2008-04-09	11
+382	val_382	2008-04-09	11
+382	val_382	2008-04-09	11
+384	val_384	2008-04-09	11
+384	val_384	2008-04-09	11
+384	val_384	2008-04-09	11
+386	val_386	2008-04-09	11
+389	val_389	2008-04-09	11
+392	val_392	2008-04-09	11
+393	val_393	2008-04-09	11
+394	val_394	2008-04-09	11
+395	val_395	2008-04-09	11
+395	val_395	2008-04-09	11
+396	val_396	2008-04-09	11
+396	val_396	2008-04-09	11
+396	val_396	2008-04-09	11
+397	val_397	2008-04-09	11
+397	val_397	2008-04-09	11
+399	val_399	2008-04-09	11
+399	val_399	2008-04-09	11
+400	val_400	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+402	val_402	2008-04-09	11
+403	val_403	2008-04-09	11
+403	val_403	2008-04-09	11
+403	val_403	2008-04-09	11
+404	val_404	2008-04-09	11
+404	val_404	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+407	val_407	2008-04-09	11
+409	val_409	2008-04-09	11
+409	val_409	2008-04-09	11
+409	val_409	2008-04-09	11
+411	val_411	2008-04-09	11
+413	val_413	2008-04-09	11
+413	val_413	2008-04-09	11
+414	val_414	2008-04-09	11
+414	val_414	2008-04-09	11
+417	val_417	2008-04-09	11
+417	val_417	2008-04-09	11
+417	val_417	2008-04-09	11
+418	val_418	2008-04-09	11
+419	val_419	2008-04-09	11
+421	val_421	2008-04-09	11
+424	val_424	2008-04-09	11
+424	val_424	2008-04-09	11
+427	val_427	2008-04-09	11
+429	val_429	2008-04-09	11
+429	val_429	2008-04-09	11
+430	val_430	2008-04-09	11
+430	val_430	2008-04-09	11
+430	val_430	2008-04-09	11
+431	val_431	2008-04-09	11
+431	val_431	2008-04-09	11
+431	val_431	2008-04-09	11
+432	val_432	2008-04-09	11
+435	val_435	2008-04-09	11
+436	val_436	2008-04-09	11
+437	val_437	2008-04-09	11
+438	val_438	2008-04-09	11
+438	val_438	2008-04-09	11
+438	val_438	2008-04-09	11
+439	val_439	2008-04-09	11
+439	val_439	2008-04-09	11
+443	val_443	2008-04-09	11
+444	val_444	2008-04-09	11
+446	val_446	2008-04-09	11
+448	val_448	2008-04-09	11
+449	val_449	2008-04-09	11
+452	val_452	2008-04-09	11
+453	val_453	2008-04-09	11
+454	val_454	2008-04-09	11
+454	val_454	2008-04-09	11
+454	val_454	2008-04-09	11
+455	val_455	2008-04-09	11
+457	val_457	2008-04-09	11
+458	val_458	2008-04-09	11
+458	val_458	2008-04-09	11
+459	val_459	2008-04-09	11
+459	val_459	2008-04-09	11
+460	val_460	2008-04-09	11
+462	val_462	2008-04-09	11
+462	val_462	2008-04-09	11
+463	val_463	2008-04-09	11
+463	val_463	2008-04-09	11
+466	val_466	2008-04-09	11
+466	val_466	2008-04-09	11
+466	val_466	2008-04-09	11
+467	val_467	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+470	val_470	2008-04-09	11
+472	val_472	2008-04-09	11
+475	val_475	2008-04-09	11
+477	val_477	2008-04-09	11
+478	val_478	2008-04-09	11
+478	val_478	2008-04-09	11
+479	val_479	2008-04-09	11
+480	val_480	2008-04-09	11
+480	val_480	2008-04-09	11
+480	val_480	2008-04-09	11
+481	val_481	2008-04-09	11
+482	val_482	2008-04-09	11
+483	val_483	2008-04-09	11
+484	val_484	2008-04-09	11
+485	val_485	2008-04-09	11
+487	val_487	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+490	val_490	2008-04-09	11
+491	val_491	2008-04-09	11
+492	val_492	2008-04-09	11
+492	val_492	2008-04-09	11
+493	val_493	2008-04-09	11
+494	val_494	2008-04-09	11
+495	val_495	2008-04-09	11
+496	val_496	2008-04-09	11
+497	val_497	2008-04-09	11
+498	val_498	2008-04-09	11
+498	val_498	2008-04-09	11
+498	val_498	2008-04-09	11
+0	val_0	2008-04-09	12
+0	val_0	2008-04-09	12
+0	val_0	2008-04-09	12
+2	val_2	2008-04-09	12
+4	val_4	2008-04-09	12
+5	val_5	2008-04-09	12
+5	val_5	2008-04-09	12
+5	val_5	2008-04-09	12
+8	val_8	2008-04-09	12
+9	val_9	2008-04-09	12
+10	val_10	2008-04-09	12
+11	val_11	2008-04-09	12
+12	val_12	2008-04-09	12
+12	val_12	2008-04-09	12
+15	val_15	2008-04-09	12
+15	val_15	2008-04-09	12
+17	val_17	2008-04-09	12
+18	val_18	2008-04-09	12
+18	val_18	2008-04-09	12
+19	val_19	2008-04-09	12
+20	val_20	2008-04-09	12
+24	val_24	2008-04-09	12
+24	val_24	2008-04-09	12
+26	val_26	2008-04-09	12
+26	val_26	2008-04-09	12
+27	val_27	2008-04-09	12
+28	val_28	2008-04-09	12
+30	val_30	2008-04-09	12
+33	val_33	2008-04-09	12
+34	val_34	2008-04-09	12
+35	val_35	2008-04-09	12
+35	val_35	2008-04-09	12
+35	val_35	2008-04-09	12
+37	val_37	2008-04-09	12
+37	val_37	2008-04-09	12
+41	val_41	2008-04-09	12
+42	val_42	2008-04-09	12
+42	val_42	2008-04-09	12
+43	val_43	2008-04-09	12
+44	val_44	2008-04-09	12
+47	val_47	2008-04-09	12
+51	val_51	2008-04-09	12
+51	val_51	2008-04-09	12
+53	val_53	2008-04-09	12
+54	val_54	2008-04-09	12
+57	val_57	2008-04-09	12
+58	val_58	2008-04-09	12
+58	val_58	2008-04-09	12
+64	val_64	2008-04-09	12
+65	val_65	2008-04-09	12
+66	val_66	2008-04-09	12
+67	val_67	2008-04-09	12
+67	val_67	2008-04-09	12
+69	val_69	2008-04-09	12
+70	val_70	2008-04-09	12
+70	val_70	2008-04-09	12
+70	val_70	2008-04-09	12
+72	val_72	2008-04-09	12
+72	val_72	2008-04-09	12
+74	val_74	2008-04-09	12
+76	val_76	2008-04-09	12
+76	val_76	2008-04-09	12
+77	val_77	2008-04-09	12
+78	val_78	2008-04-09	12
+80	val_80	2008-04-09	12
+82	val_82	2008-04-09	12
+83	val_83	2008-04-09	12
+83	val_83	2008-04-09	12
+84	val_84	2008-04-09	12
+84	val_84	2008-04-09	12
+85	val_85	2008-04-09	12
+86	val_86	2008-04-09	12
+87	val_87	2008-04-09	12
+90	val_90	2008-04-09	12
+90	val_90	2008-04-09	12
+90	val_90	2008-04-09	12
+92	val_92	2008-04-09	12
+95	val_95	2008-04-09	12
+95	val_95	2008-04-09	12
+96	val_96	2008-04-09	12
+97	val_97	2008-04-09	12
+97	val_97	2008-04-09	12
+98	val_98	2008-04-09	12
+98	val_98	2008-04-09	12
+100	val_100	2008-04-09	12
+100	val_100	2008-04-09	12
+103	val_103	2008-04-09	12
+103	val_103	2008-04-09	12
+104	val_104	2008-04-09	12
+104	val_104	2008-04-09	12
+105	val_105	2008-04-09	12
+111	val_111	2008-04-09	12
+113	val_113	2008-04-09	12
+113	val_113	2008-04-09	12
+114	val_114	2008-04-09	12
+116	val_116	2008-04-09	12
+118	val_118	2008-04-09	12
+118	val_118	2008-04-09	12
+119	val_119	2008-04-09	12
+119	val_119	2008-04-09	12
+119	val_119	2008-04-09	12
+120	val_120	2008-04-09	12
+120	val_120	2008-04-09	12
+125	val_125	2008-04-09	12
+125	val_125	2008-04-09	12
+126	val_126	2008-04-09	12
+128	val_128	2008-04-09	12
+128	val_128	2008-04-09	12
+128	val_128	2008-04-09	12
+129	val_129	2008-04-09	12
+129	val_129	2008-04-09	12
+131	val_131	2008-04-09	12
+133	val_133	2008-04-09	12
+134	val_134	2008-04-09	12
+134	val_134	2008-04-09	12
+136	val_136	2008-04-09	12
+137	val_137	2008-04-09	12
+137	val_137	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+143	val_143	2008-04-09	12
+145	val_145	2008-04-09	12
+146	val_146	2008-04-09	12
+146	val_146	2008-04-09	12
+149	val_149	2008-04-09	12
+149	val_149	2008-04-09	12
+150	val_150	2008-04-09	12
+152	val_152	2008-04-09	12
+152	val_152	2008-04-09	12
+153	val_153	2008-04-09	12
+155	val_155	2008-04-09	12
+156	val_156	2008-04-09	12
+157	val_157	2008-04-09	12
+158	val_158	2008-04-09	12
+160	val_160	2008-04-09	12
+162	val_162	2008-04-09	12
+163	val_163	2008-04-09	12
+164	val_164	2008-04-09	12
+164	val_164	2008-04-09	12
+165	val_165	2008-04-09	12
+165	val_165	2008-04-09	12
+166	val_166	2008-04-09	12
+167	val_167	2008-04-09	12
+167	val_167	2008-04-09	12
+167	val_167	2008-04-09	12
+168	val_168	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+170	val_170	2008-04-09	12
+172	val_172	2008-04-09	12
+172	val_172	2008-04-09	12
+174	val_174	2008-04-09	12
+174	val_174	2008-04-09	12
+175	val_175	2008-04-09	12
+175	val_175	2008-04-09	12
+176	val_176	2008-04-09	12
+176	val_176	2008-04-09	12
+177	val_177	2008-04-09	12
+178	val_178	2008-04-09	12
+179	val_179	2008-04-09	12
+179	val_179	2008-04-09	12
+180	val_180	2008-04-09	12
+181	val_181	2008-04-09	12
+183	val_183	2008-04-09	12
+186	val_186	2008-04-09	12
+187	val_187	2008-04-09	12
+187	val_187	2008-04-09	12
+187	val_187	2008-04-09	12
+189	val_189	2008-04-09	12
+190	val_190	2008-04-09	12
+191	val_191	2008-04-09	12
+191	val_191	2008-04-09	12
+192	val_192	2008-04-09	12
+193	val_193	2008-04-09	12
+193	val_193	2008-04-09	12
+193	val_193	2008-04-09	12
+194	val_194	2008-04-09	12
+195	val_195	2008-04-09	12
+195	val_195	2008-04-09	12
+196	val_196	2008-04-09	12
+197	val_197	2008-04-09	12
+197	val_197	2008-04-09	12
+199	val_199	2008-04-09	12
+199	val_199	2008-04-09	12
+199	val_199	2008-04-09	12
+200	val_200	2008-04-09	12
+200	val_200	2008-04-09	12
+201	val_201	2008-04-09	12
+202	val_202	2008-04-09	12
+203	val_203	2008-04-09	12
+203	val_203	2008-04-09	12
+205	val_205	2008-04-09	12
+205	val_205	2008-04-09	12
+207	val_207	2008-04-09	12
+207	val_207	2008-04-09	12
+208	val_208	2008-04-09	12
+208	val_208	2008-04-09	12
+208	val_208	2008-04-09	12
+209	val_209	2008-04-09	12
+209	val_209	2008-04-09	12
+213	val_213	2008-04-09	12
+213	val_213	2008-04-09	12
+214	val_214	2008-04-09	12
+216	val_216	2008-04-09	12
+216	val_216	2008-04-09	12
+217	val_217	2008-04-09	12
+217	val_217	2008-04-09	12
+218	val_218	2008-04-09	12
+219	val_219	2008-04-09	12
+219	val_219	2008-04-09	12
+221	val_221	2008-04-09	12
+221	val_221	2008-04-09	12
+222	val_222	2008-04-09	12
+223	val_223	2008-04-09	12
+223	val_223	2008-04-09	12
+224	val_224	2008-04-09	12
+224	val_224	2008-04-09	12
+226	val_226	2008-04-09	12
+228	val_228	2008-04-09	12
+229	val_229	2008-04-09	12
+229	val_229	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+233	val_233	2008-04-09	12
+233	val_233	2008-04-09	12
+235	val_235	2008-04-09	12
+237	val_237	2008-04-09	12
+237	val_237	2008-04-09	12
+238	val_238	2008-04-09	12
+238	val_238	2008-04-09	12
+239	val_239	2008-04-09	12
+239	val_239	2008-04-09	12
+241	val_241	2008-04-09	12
+242	val_242	2008-04-09	12
+242	val_242	2008-04-09	12
+244	val_244	2008-04-09	12
+247	val_247	2008-04-09	12
+248	val_248	2008-04-09	12
+249	val_249	2008-04-09	12
+252	val_252	2008-04-09	12
+255	val_255	2008-04-09	12
+255	val_255	2008-04-09	12
+256	val_256	2008-04-09	12
+256	val_256	2008-04-09	12
+257	val_257	2008-04-09	12
+258	val_258	2008-04-09	12
+260	val_260	2008-04-09	12
+262	val_262	2008-04-09	12
+263	val_263	2008-04-09	12
+265	val_265	2008-04-09	12
+265	val_265	2008-04-09	12
+266	val_266	2008-04-09	12
+272	val_272	2008-04-09	12
+272	val_272	2008-04-09	12
+273	val_273	2008-04-09	12
+273	val_273	2008-04-09	12
+273	val_273	2008-04-09	12
+274	val_274	2008-04-09	12
+275	val_275	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+278	val_278	2008-04-09	12
+278	val_278	2008-04-09	12
+280	val_280	2008-04-09	12
+280	val_280	2008-04-09	12
+281	val_281	2008-04-09	12
+281	val_281	2008-04-09	12
+282	val_282	2008-04-09	12
+282	val_282	2008-04-09	12
+283	val_283	2008-04-09	12
+284	val_284	2008-04-09	12
+285	val_285	2008-04-09	12
+286	val_286	2008-04-09	12
+287	val_287	2008-04-09	12
+288	val_288	2008-04-09	12
+288	val_288	2008-04-09	12
+289	val_289	2008-04-09	12
+291	val_291	2008-04-09	12
+292	val_292	2008-04-09	12
+296	val_296	2008-04-09	12
+298	val_298	2008-04-09	12
+298	val_298	2008-04-09	12
+298	val_298	2008-04-09	12
+302	val_302	2008-04-09	12
+305	val_305	2008-04-09	12
+306	val_306	2008-04-09	12
+307	val_307	2008-04-09	12
+307	val_307	2008-04-09	12
+308	val_308	2008-04-09	12
+309	val_309	2008-04-09	12
+309	val_309	2008-04-09	12
+310	val_310	2008-04-09	12
+311	val_311	2008-04-09	12
+311	val_311	2008-04-09	12
+311	val_311	2008-04-09	12
+315	val_315	2008-04-09	12
+316	val_316	2008-04-09	12
+316	val_316	2008-04-09	12
+316	val_316	2008-04-09	12
+317	val_317	2008-04-09	12
+317	val_317	2008-04-09	12
+318	val_318	2008-04-09	12
+318	val_318	2008-04-09	12
+318	val_318	2008-04-09	12
+321	val_321	2008-04-09	12
+321	val_321	2008-04-09	12
+322	val_322	2008-04-09	12
+322	val_322	2008-04-09	12
+323	val_323	2008-04-09	12
+325	val_325	2008-04-09	12
+325	val_325	2008-04-09	12
+327	val_327	2008-04-09	12
+327	val_327	2008-04-09	12
+327	val_327	2008-04-09	12
+331	val_331	2008-04-09	12
+331	val_331	2008-04-09	12
+332	val_332	2008-04-09	12
+333	val_333	2008-04-09	12
+333	val_333	2008-04-09	12
+335	val_335	2008-04-09	12
+336	val_336	2008-04-09	12
+338	val_338	2008-04-09	12
+339	val_339	2008-04-09	12
+341	val_341	2008-04-09	12
+342	val_342	2008-04-09	12
+342	val_342	2008-04-09	12
+344	val_344	2008-04-09	12
+344	val_344	2008-04-09	12
+345	val_345	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+351	val_351	2008-04-09	12
+353	val_353	2008-04-09	12
+353	val_353	2008-04-09	12
+356	val_356	2008-04-09	12
+360	val_360	2008-04-09	12
+362	val_362	2008-04-09	12
+364	val_364	2008-04-09	12
+365	val_365	2008-04-09	12
+366	val_366	2008-04-09	12
+367	val_367	2008-04-09	12
+367	val_367	2008-04-09	12
+368	val_368	2008-04-09	12
+369	val_369	2008-04-09	12
+369	val_369	2008-04-09	12
+369	val_369	2008-04-09	12
+373	val_373	2008-04-09	12
+374	val_374	2008-04-09	12
+375	val_375	2008-04-09	12
+377	val_377	2008-04-09	12
+378	val_378	2008-04-09	12
+379	val_379	2008-04-09	12
+382	val_382	2008-04-09	12
+382	val_382	2008-04-09	12
+384	val_384	2008-04-09	12
+384	val_384	2008-04-09	12
+384	val_384	2008-04-09	12
+386	val_386	2008-04-09	12
+389	val_389	2008-04-09	12
+392	val_392	2008-04-09	12
+393	val_393	2008-04-09	12
+394	val_394	2008-04-09	12
+395	val_395	2008-04-09	12
+395	val_395	2008-04-09	12
+396	val_396	2008-04-09	12
+396	val_396	2008-04-09	12
+396	val_396	2008-04-09	12
+397	val_397	2008-04-09	12
+397	val_397	2008-04-09	12
+399	val_399	2008-04-09	12
+399	val_399	2008-04-09	12
+400	val_400	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+402	val_402	2008-04-09	12
+403	val_403	2008-04-09	12
+403	val_403	2008-04-09	12
+403	val_403	2008-04-09	12
+404	val_404	2008-04-09	12
+404	val_404	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+407	val_407	2008-04-09	12
+409	val_409	2008-04-09	12
+409	val_409	2008-04-09	12
+409	val_409	2008-04-09	12
+411	val_411	2008-04-09	12
+413	val_413	2008-04-09	12
+413	val_413	2008-04-09	12
+414	val_414	2008-04-09	12
+414	val_414	2008-04-09	12
+417	val_417	2008-04-09	12
+417	val_417	2008-04-09	12
+417	val_417	2008-04-09	12
+418	val_418	2008-04-09	12
+419	val_419	2008-04-09	12
+421	val_421	2008-04-09	12
+424	val_424	2008-04-09	12
+424	val_424	2008-04-09	12
+427	val_427	2008-04-09	12
+429	val_429	2008-04-09	12
+429	val_429	2008-04-09	12
+430	val_430	2008-04-09	12
+430	val_430	2008-04-09	12
+430	val_430	2008-04-09	12
+431	val_431	2008-04-09	12
+431	val_431	2008-04-09	12
+431	val_431	2008-04-09	12
+432	val_432	2008-04-09	12
+435	val_435	2008-04-09	12
+436	val_436	2008-04-09	12
+437	val_437	2008-04-09	12
+438	val_438	2008-04-09	12
+438	val_438	2008-04-09	12
+438	val_438	2008-04-09	12
+439	val_439	2008-04-09	12
+439	val_439	2008-04-09	12
+443	val_443	2008-04-09	12
+444	val_444	2008-04-09	12
+446	val_446	2008-04-09	12
+448	val_448	2008-04-09	12
+449	val_449	2008-04-09	12
+452	val_452	2008-04-09	12
+453	val_453	2008-04-09	12
+454	val_454	2008-04-09	12
+454	val_454	2008-04-09	12
+454	val_454	2008-04-09	12
+455	val_455	2008-04-09	12
+457	val_457	2008-04-09	12
+458	val_458	2008-04-09	12
+458	val_458	2008-04-09	12
+459	val_459	2008-04-09	12
+459	val_459	2008-04-09	12
+460	val_460	2008-04-09	12
+462	val_462	2008-04-09	12
+462	val_462	2008-04-09	12
+463	val_463	2008-04-09	12
+463	val_463	2008-04-09	12
+466	val_466	2008-04-09	12
+466	val_466	2008-04-09	12
+466	val_466	2008-04-09	12
+467	val_467	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+470	val_470	2008-04-09	12
+472	val_472	2008-04-09	12
+475	val_475	2008-04-09	12
+477	val_477	2008-04-09	12
+478	val_478	2008-04-09	12
+478	val_478	2008-04-09	12
+479	val_479	2008-04-09	12
+480	val_480	2008-04-09	12
+480	val_480	2008-04-09	12
+480	val_480	2008-04-09	12
+481	val_481	2008-04-09	12
+482	val_482	2008-04-09	12
+483	val_483	2008-04-09	12
+484	val_484	2008-04-09	12
+485	val_485	2008-04-09	12
+487	val_487	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+490	val_490	2008-04-09	12
+491	val_491	2008-04-09	12
+492	val_492	2008-04-09	12
+492	val_492	2008-04-09	12
+493	val_493	2008-04-09	12
+494	val_494	2008-04-09	12
+495	val_495	2008-04-09	12
+496	val_496	2008-04-09	12
+497	val_497	2008-04-09	12
+498	val_498	2008-04-09	12
+498	val_498	2008-04-09	12
+498	val_498	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part3-8-c3b6f0cc860b84851dd60c53b36a6437 b/sql/hive/src/test/resources/golden/load_dyn_part3-8-c3b6f0cc860b84851dd60c53b36a6437
deleted file mode 100644
index 2857cdf0aba86..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part3-8-c3b6f0cc860b84851dd60c53b36a6437
+++ /dev/null
@@ -1,2000 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
-238	val_238	2008-04-09	11
-86	val_86	2008-04-09	11
-311	val_311	2008-04-09	11
-27	val_27	2008-04-09	11
-165	val_165	2008-04-09	11
-409	val_409	2008-04-09	11
-255	val_255	2008-04-09	11
-278	val_278	2008-04-09	11
-98	val_98	2008-04-09	11
-484	val_484	2008-04-09	11
-265	val_265	2008-04-09	11
-193	val_193	2008-04-09	11
-401	val_401	2008-04-09	11
-150	val_150	2008-04-09	11
-273	val_273	2008-04-09	11
-224	val_224	2008-04-09	11
-369	val_369	2008-04-09	11
-66	val_66	2008-04-09	11
-128	val_128	2008-04-09	11
-213	val_213	2008-04-09	11
-146	val_146	2008-04-09	11
-406	val_406	2008-04-09	11
-429	val_429	2008-04-09	11
-374	val_374	2008-04-09	11
-152	val_152	2008-04-09	11
-469	val_469	2008-04-09	11
-145	val_145	2008-04-09	11
-495	val_495	2008-04-09	11
-37	val_37	2008-04-09	11
-327	val_327	2008-04-09	11
-281	val_281	2008-04-09	11
-277	val_277	2008-04-09	11
-209	val_209	2008-04-09	11
-15	val_15	2008-04-09	11
-82	val_82	2008-04-09	11
-403	val_403	2008-04-09	11
-166	val_166	2008-04-09	11
-417	val_417	2008-04-09	11
-430	val_430	2008-04-09	11
-252	val_252	2008-04-09	11
-292	val_292	2008-04-09	11
-219	val_219	2008-04-09	11
-287	val_287	2008-04-09	11
-153	val_153	2008-04-09	11
-193	val_193	2008-04-09	11
-338	val_338	2008-04-09	11
-446	val_446	2008-04-09	11
-459	val_459	2008-04-09	11
-394	val_394	2008-04-09	11
-237	val_237	2008-04-09	11
-482	val_482	2008-04-09	11
-174	val_174	2008-04-09	11
-413	val_413	2008-04-09	11
-494	val_494	2008-04-09	11
-207	val_207	2008-04-09	11
-199	val_199	2008-04-09	11
-466	val_466	2008-04-09	11
-208	val_208	2008-04-09	11
-174	val_174	2008-04-09	11
-399	val_399	2008-04-09	11
-396	val_396	2008-04-09	11
-247	val_247	2008-04-09	11
-417	val_417	2008-04-09	11
-489	val_489	2008-04-09	11
-162	val_162	2008-04-09	11
-377	val_377	2008-04-09	11
-397	val_397	2008-04-09	11
-309	val_309	2008-04-09	11
-365	val_365	2008-04-09	11
-266	val_266	2008-04-09	11
-439	val_439	2008-04-09	11
-342	val_342	2008-04-09	11
-367	val_367	2008-04-09	11
-325	val_325	2008-04-09	11
-167	val_167	2008-04-09	11
-195	val_195	2008-04-09	11
-475	val_475	2008-04-09	11
-17	val_17	2008-04-09	11
-113	val_113	2008-04-09	11
-155	val_155	2008-04-09	11
-203	val_203	2008-04-09	11
-339	val_339	2008-04-09	11
-0	val_0	2008-04-09	11
-455	val_455	2008-04-09	11
-128	val_128	2008-04-09	11
-311	val_311	2008-04-09	11
-316	val_316	2008-04-09	11
-57	val_57	2008-04-09	11
-302	val_302	2008-04-09	11
-205	val_205	2008-04-09	11
-149	val_149	2008-04-09	11
-438	val_438	2008-04-09	11
-345	val_345	2008-04-09	11
-129	val_129	2008-04-09	11
-170	val_170	2008-04-09	11
-20	val_20	2008-04-09	11
-489	val_489	2008-04-09	11
-157	val_157	2008-04-09	11
-378	val_378	2008-04-09	11
-221	val_221	2008-04-09	11
-92	val_92	2008-04-09	11
-111	val_111	2008-04-09	11
-47	val_47	2008-04-09	11
-72	val_72	2008-04-09	11
-4	val_4	2008-04-09	11
-280	val_280	2008-04-09	11
-35	val_35	2008-04-09	11
-427	val_427	2008-04-09	11
-277	val_277	2008-04-09	11
-208	val_208	2008-04-09	11
-356	val_356	2008-04-09	11
-399	val_399	2008-04-09	11
-169	val_169	2008-04-09	11
-382	val_382	2008-04-09	11
-498	val_498	2008-04-09	11
-125	val_125	2008-04-09	11
-386	val_386	2008-04-09	11
-437	val_437	2008-04-09	11
-469	val_469	2008-04-09	11
-192	val_192	2008-04-09	11
-286	val_286	2008-04-09	11
-187	val_187	2008-04-09	11
-176	val_176	2008-04-09	11
-54	val_54	2008-04-09	11
-459	val_459	2008-04-09	11
-51	val_51	2008-04-09	11
-138	val_138	2008-04-09	11
-103	val_103	2008-04-09	11
-239	val_239	2008-04-09	11
-213	val_213	2008-04-09	11
-216	val_216	2008-04-09	11
-430	val_430	2008-04-09	11
-278	val_278	2008-04-09	11
-176	val_176	2008-04-09	11
-289	val_289	2008-04-09	11
-221	val_221	2008-04-09	11
-65	val_65	2008-04-09	11
-318	val_318	2008-04-09	11
-332	val_332	2008-04-09	11
-311	val_311	2008-04-09	11
-275	val_275	2008-04-09	11
-137	val_137	2008-04-09	11
-241	val_241	2008-04-09	11
-83	val_83	2008-04-09	11
-333	val_333	2008-04-09	11
-180	val_180	2008-04-09	11
-284	val_284	2008-04-09	11
-12	val_12	2008-04-09	11
-230	val_230	2008-04-09	11
-181	val_181	2008-04-09	11
-67	val_67	2008-04-09	11
-260	val_260	2008-04-09	11
-404	val_404	2008-04-09	11
-384	val_384	2008-04-09	11
-489	val_489	2008-04-09	11
-353	val_353	2008-04-09	11
-373	val_373	2008-04-09	11
-272	val_272	2008-04-09	11
-138	val_138	2008-04-09	11
-217	val_217	2008-04-09	11
-84	val_84	2008-04-09	11
-348	val_348	2008-04-09	11
-466	val_466	2008-04-09	11
-58	val_58	2008-04-09	11
-8	val_8	2008-04-09	11
-411	val_411	2008-04-09	11
-230	val_230	2008-04-09	11
-208	val_208	2008-04-09	11
-348	val_348	2008-04-09	11
-24	val_24	2008-04-09	11
-463	val_463	2008-04-09	11
-431	val_431	2008-04-09	11
-179	val_179	2008-04-09	11
-172	val_172	2008-04-09	11
-42	val_42	2008-04-09	11
-129	val_129	2008-04-09	11
-158	val_158	2008-04-09	11
-119	val_119	2008-04-09	11
-496	val_496	2008-04-09	11
-0	val_0	2008-04-09	11
-322	val_322	2008-04-09	11
-197	val_197	2008-04-09	11
-468	val_468	2008-04-09	11
-393	val_393	2008-04-09	11
-454	val_454	2008-04-09	11
-100	val_100	2008-04-09	11
-298	val_298	2008-04-09	11
-199	val_199	2008-04-09	11
-191	val_191	2008-04-09	11
-418	val_418	2008-04-09	11
-96	val_96	2008-04-09	11
-26	val_26	2008-04-09	11
-165	val_165	2008-04-09	11
-327	val_327	2008-04-09	11
-230	val_230	2008-04-09	11
-205	val_205	2008-04-09	11
-120	val_120	2008-04-09	11
-131	val_131	2008-04-09	11
-51	val_51	2008-04-09	11
-404	val_404	2008-04-09	11
-43	val_43	2008-04-09	11
-436	val_436	2008-04-09	11
-156	val_156	2008-04-09	11
-469	val_469	2008-04-09	11
-468	val_468	2008-04-09	11
-308	val_308	2008-04-09	11
-95	val_95	2008-04-09	11
-196	val_196	2008-04-09	11
-288	val_288	2008-04-09	11
-481	val_481	2008-04-09	11
-457	val_457	2008-04-09	11
-98	val_98	2008-04-09	11
-282	val_282	2008-04-09	11
-197	val_197	2008-04-09	11
-187	val_187	2008-04-09	11
-318	val_318	2008-04-09	11
-318	val_318	2008-04-09	11
-409	val_409	2008-04-09	11
-470	val_470	2008-04-09	11
-137	val_137	2008-04-09	11
-369	val_369	2008-04-09	11
-316	val_316	2008-04-09	11
-169	val_169	2008-04-09	11
-413	val_413	2008-04-09	11
-85	val_85	2008-04-09	11
-77	val_77	2008-04-09	11
-0	val_0	2008-04-09	11
-490	val_490	2008-04-09	11
-87	val_87	2008-04-09	11
-364	val_364	2008-04-09	11
-179	val_179	2008-04-09	11
-118	val_118	2008-04-09	11
-134	val_134	2008-04-09	11
-395	val_395	2008-04-09	11
-282	val_282	2008-04-09	11
-138	val_138	2008-04-09	11
-238	val_238	2008-04-09	11
-419	val_419	2008-04-09	11
-15	val_15	2008-04-09	11
-118	val_118	2008-04-09	11
-72	val_72	2008-04-09	11
-90	val_90	2008-04-09	11
-307	val_307	2008-04-09	11
-19	val_19	2008-04-09	11
-435	val_435	2008-04-09	11
-10	val_10	2008-04-09	11
-277	val_277	2008-04-09	11
-273	val_273	2008-04-09	11
-306	val_306	2008-04-09	11
-224	val_224	2008-04-09	11
-309	val_309	2008-04-09	11
-389	val_389	2008-04-09	11
-327	val_327	2008-04-09	11
-242	val_242	2008-04-09	11
-369	val_369	2008-04-09	11
-392	val_392	2008-04-09	11
-272	val_272	2008-04-09	11
-331	val_331	2008-04-09	11
-401	val_401	2008-04-09	11
-242	val_242	2008-04-09	11
-452	val_452	2008-04-09	11
-177	val_177	2008-04-09	11
-226	val_226	2008-04-09	11
-5	val_5	2008-04-09	11
-497	val_497	2008-04-09	11
-402	val_402	2008-04-09	11
-396	val_396	2008-04-09	11
-317	val_317	2008-04-09	11
-395	val_395	2008-04-09	11
-58	val_58	2008-04-09	11
-35	val_35	2008-04-09	11
-336	val_336	2008-04-09	11
-95	val_95	2008-04-09	11
-11	val_11	2008-04-09	11
-168	val_168	2008-04-09	11
-34	val_34	2008-04-09	11
-229	val_229	2008-04-09	11
-233	val_233	2008-04-09	11
-143	val_143	2008-04-09	11
-472	val_472	2008-04-09	11
-322	val_322	2008-04-09	11
-498	val_498	2008-04-09	11
-160	val_160	2008-04-09	11
-195	val_195	2008-04-09	11
-42	val_42	2008-04-09	11
-321	val_321	2008-04-09	11
-430	val_430	2008-04-09	11
-119	val_119	2008-04-09	11
-489	val_489	2008-04-09	11
-458	val_458	2008-04-09	11
-78	val_78	2008-04-09	11
-76	val_76	2008-04-09	11
-41	val_41	2008-04-09	11
-223	val_223	2008-04-09	11
-492	val_492	2008-04-09	11
-149	val_149	2008-04-09	11
-449	val_449	2008-04-09	11
-218	val_218	2008-04-09	11
-228	val_228	2008-04-09	11
-138	val_138	2008-04-09	11
-453	val_453	2008-04-09	11
-30	val_30	2008-04-09	11
-209	val_209	2008-04-09	11
-64	val_64	2008-04-09	11
-468	val_468	2008-04-09	11
-76	val_76	2008-04-09	11
-74	val_74	2008-04-09	11
-342	val_342	2008-04-09	11
-69	val_69	2008-04-09	11
-230	val_230	2008-04-09	11
-33	val_33	2008-04-09	11
-368	val_368	2008-04-09	11
-103	val_103	2008-04-09	11
-296	val_296	2008-04-09	11
-113	val_113	2008-04-09	11
-216	val_216	2008-04-09	11
-367	val_367	2008-04-09	11
-344	val_344	2008-04-09	11
-167	val_167	2008-04-09	11
-274	val_274	2008-04-09	11
-219	val_219	2008-04-09	11
-239	val_239	2008-04-09	11
-485	val_485	2008-04-09	11
-116	val_116	2008-04-09	11
-223	val_223	2008-04-09	11
-256	val_256	2008-04-09	11
-263	val_263	2008-04-09	11
-70	val_70	2008-04-09	11
-487	val_487	2008-04-09	11
-480	val_480	2008-04-09	11
-401	val_401	2008-04-09	11
-288	val_288	2008-04-09	11
-191	val_191	2008-04-09	11
-5	val_5	2008-04-09	11
-244	val_244	2008-04-09	11
-438	val_438	2008-04-09	11
-128	val_128	2008-04-09	11
-467	val_467	2008-04-09	11
-432	val_432	2008-04-09	11
-202	val_202	2008-04-09	11
-316	val_316	2008-04-09	11
-229	val_229	2008-04-09	11
-469	val_469	2008-04-09	11
-463	val_463	2008-04-09	11
-280	val_280	2008-04-09	11
-2	val_2	2008-04-09	11
-35	val_35	2008-04-09	11
-283	val_283	2008-04-09	11
-331	val_331	2008-04-09	11
-235	val_235	2008-04-09	11
-80	val_80	2008-04-09	11
-44	val_44	2008-04-09	11
-193	val_193	2008-04-09	11
-321	val_321	2008-04-09	11
-335	val_335	2008-04-09	11
-104	val_104	2008-04-09	11
-466	val_466	2008-04-09	11
-366	val_366	2008-04-09	11
-175	val_175	2008-04-09	11
-403	val_403	2008-04-09	11
-483	val_483	2008-04-09	11
-53	val_53	2008-04-09	11
-105	val_105	2008-04-09	11
-257	val_257	2008-04-09	11
-406	val_406	2008-04-09	11
-409	val_409	2008-04-09	11
-190	val_190	2008-04-09	11
-406	val_406	2008-04-09	11
-401	val_401	2008-04-09	11
-114	val_114	2008-04-09	11
-258	val_258	2008-04-09	11
-90	val_90	2008-04-09	11
-203	val_203	2008-04-09	11
-262	val_262	2008-04-09	11
-348	val_348	2008-04-09	11
-424	val_424	2008-04-09	11
-12	val_12	2008-04-09	11
-396	val_396	2008-04-09	11
-201	val_201	2008-04-09	11
-217	val_217	2008-04-09	11
-164	val_164	2008-04-09	11
-431	val_431	2008-04-09	11
-454	val_454	2008-04-09	11
-478	val_478	2008-04-09	11
-298	val_298	2008-04-09	11
-125	val_125	2008-04-09	11
-431	val_431	2008-04-09	11
-164	val_164	2008-04-09	11
-424	val_424	2008-04-09	11
-187	val_187	2008-04-09	11
-382	val_382	2008-04-09	11
-5	val_5	2008-04-09	11
-70	val_70	2008-04-09	11
-397	val_397	2008-04-09	11
-480	val_480	2008-04-09	11
-291	val_291	2008-04-09	11
-24	val_24	2008-04-09	11
-351	val_351	2008-04-09	11
-255	val_255	2008-04-09	11
-104	val_104	2008-04-09	11
-70	val_70	2008-04-09	11
-163	val_163	2008-04-09	11
-438	val_438	2008-04-09	11
-119	val_119	2008-04-09	11
-414	val_414	2008-04-09	11
-200	val_200	2008-04-09	11
-491	val_491	2008-04-09	11
-237	val_237	2008-04-09	11
-439	val_439	2008-04-09	11
-360	val_360	2008-04-09	11
-248	val_248	2008-04-09	11
-479	val_479	2008-04-09	11
-305	val_305	2008-04-09	11
-417	val_417	2008-04-09	11
-199	val_199	2008-04-09	11
-444	val_444	2008-04-09	11
-120	val_120	2008-04-09	11
-429	val_429	2008-04-09	11
-169	val_169	2008-04-09	11
-443	val_443	2008-04-09	11
-323	val_323	2008-04-09	11
-325	val_325	2008-04-09	11
-277	val_277	2008-04-09	11
-230	val_230	2008-04-09	11
-478	val_478	2008-04-09	11
-178	val_178	2008-04-09	11
-468	val_468	2008-04-09	11
-310	val_310	2008-04-09	11
-317	val_317	2008-04-09	11
-333	val_333	2008-04-09	11
-493	val_493	2008-04-09	11
-460	val_460	2008-04-09	11
-207	val_207	2008-04-09	11
-249	val_249	2008-04-09	11
-265	val_265	2008-04-09	11
-480	val_480	2008-04-09	11
-83	val_83	2008-04-09	11
-136	val_136	2008-04-09	11
-353	val_353	2008-04-09	11
-172	val_172	2008-04-09	11
-214	val_214	2008-04-09	11
-462	val_462	2008-04-09	11
-233	val_233	2008-04-09	11
-406	val_406	2008-04-09	11
-133	val_133	2008-04-09	11
-175	val_175	2008-04-09	11
-189	val_189	2008-04-09	11
-454	val_454	2008-04-09	11
-375	val_375	2008-04-09	11
-401	val_401	2008-04-09	11
-421	val_421	2008-04-09	11
-407	val_407	2008-04-09	11
-384	val_384	2008-04-09	11
-256	val_256	2008-04-09	11
-26	val_26	2008-04-09	11
-134	val_134	2008-04-09	11
-67	val_67	2008-04-09	11
-384	val_384	2008-04-09	11
-379	val_379	2008-04-09	11
-18	val_18	2008-04-09	11
-462	val_462	2008-04-09	11
-492	val_492	2008-04-09	11
-100	val_100	2008-04-09	11
-298	val_298	2008-04-09	11
-9	val_9	2008-04-09	11
-341	val_341	2008-04-09	11
-498	val_498	2008-04-09	11
-146	val_146	2008-04-09	11
-458	val_458	2008-04-09	11
-362	val_362	2008-04-09	11
-186	val_186	2008-04-09	11
-285	val_285	2008-04-09	11
-348	val_348	2008-04-09	11
-167	val_167	2008-04-09	11
-18	val_18	2008-04-09	11
-273	val_273	2008-04-09	11
-183	val_183	2008-04-09	11
-281	val_281	2008-04-09	11
-344	val_344	2008-04-09	11
-97	val_97	2008-04-09	11
-469	val_469	2008-04-09	11
-315	val_315	2008-04-09	11
-84	val_84	2008-04-09	11
-28	val_28	2008-04-09	11
-37	val_37	2008-04-09	11
-448	val_448	2008-04-09	11
-152	val_152	2008-04-09	11
-348	val_348	2008-04-09	11
-307	val_307	2008-04-09	11
-194	val_194	2008-04-09	11
-414	val_414	2008-04-09	11
-477	val_477	2008-04-09	11
-222	val_222	2008-04-09	11
-126	val_126	2008-04-09	11
-90	val_90	2008-04-09	11
-169	val_169	2008-04-09	11
-403	val_403	2008-04-09	11
-400	val_400	2008-04-09	11
-200	val_200	2008-04-09	11
-97	val_97	2008-04-09	11
-238	val_238	2008-04-09	12
-86	val_86	2008-04-09	12
-311	val_311	2008-04-09	12
-27	val_27	2008-04-09	12
-165	val_165	2008-04-09	12
-409	val_409	2008-04-09	12
-255	val_255	2008-04-09	12
-278	val_278	2008-04-09	12
-98	val_98	2008-04-09	12
-484	val_484	2008-04-09	12
-265	val_265	2008-04-09	12
-193	val_193	2008-04-09	12
-401	val_401	2008-04-09	12
-150	val_150	2008-04-09	12
-273	val_273	2008-04-09	12
-224	val_224	2008-04-09	12
-369	val_369	2008-04-09	12
-66	val_66	2008-04-09	12
-128	val_128	2008-04-09	12
-213	val_213	2008-04-09	12
-146	val_146	2008-04-09	12
-406	val_406	2008-04-09	12
-429	val_429	2008-04-09	12
-374	val_374	2008-04-09	12
-152	val_152	2008-04-09	12
-469	val_469	2008-04-09	12
-145	val_145	2008-04-09	12
-495	val_495	2008-04-09	12
-37	val_37	2008-04-09	12
-327	val_327	2008-04-09	12
-281	val_281	2008-04-09	12
-277	val_277	2008-04-09	12
-209	val_209	2008-04-09	12
-15	val_15	2008-04-09	12
-82	val_82	2008-04-09	12
-403	val_403	2008-04-09	12
-166	val_166	2008-04-09	12
-417	val_417	2008-04-09	12
-430	val_430	2008-04-09	12
-252	val_252	2008-04-09	12
-292	val_292	2008-04-09	12
-219	val_219	2008-04-09	12
-287	val_287	2008-04-09	12
-153	val_153	2008-04-09	12
-193	val_193	2008-04-09	12
-338	val_338	2008-04-09	12
-446	val_446	2008-04-09	12
-459	val_459	2008-04-09	12
-394	val_394	2008-04-09	12
-237	val_237	2008-04-09	12
-482	val_482	2008-04-09	12
-174	val_174	2008-04-09	12
-413	val_413	2008-04-09	12
-494	val_494	2008-04-09	12
-207	val_207	2008-04-09	12
-199	val_199	2008-04-09	12
-466	val_466	2008-04-09	12
-208	val_208	2008-04-09	12
-174	val_174	2008-04-09	12
-399	val_399	2008-04-09	12
-396	val_396	2008-04-09	12
-247	val_247	2008-04-09	12
-417	val_417	2008-04-09	12
-489	val_489	2008-04-09	12
-162	val_162	2008-04-09	12
-377	val_377	2008-04-09	12
-397	val_397	2008-04-09	12
-309	val_309	2008-04-09	12
-365	val_365	2008-04-09	12
-266	val_266	2008-04-09	12
-439	val_439	2008-04-09	12
-342	val_342	2008-04-09	12
-367	val_367	2008-04-09	12
-325	val_325	2008-04-09	12
-167	val_167	2008-04-09	12
-195	val_195	2008-04-09	12
-475	val_475	2008-04-09	12
-17	val_17	2008-04-09	12
-113	val_113	2008-04-09	12
-155	val_155	2008-04-09	12
-203	val_203	2008-04-09	12
-339	val_339	2008-04-09	12
-0	val_0	2008-04-09	12
-455	val_455	2008-04-09	12
-128	val_128	2008-04-09	12
-311	val_311	2008-04-09	12
-316	val_316	2008-04-09	12
-57	val_57	2008-04-09	12
-302	val_302	2008-04-09	12
-205	val_205	2008-04-09	12
-149	val_149	2008-04-09	12
-438	val_438	2008-04-09	12
-345	val_345	2008-04-09	12
-129	val_129	2008-04-09	12
-170	val_170	2008-04-09	12
-20	val_20	2008-04-09	12
-489	val_489	2008-04-09	12
-157	val_157	2008-04-09	12
-378	val_378	2008-04-09	12
-221	val_221	2008-04-09	12
-92	val_92	2008-04-09	12
-111	val_111	2008-04-09	12
-47	val_47	2008-04-09	12
-72	val_72	2008-04-09	12
-4	val_4	2008-04-09	12
-280	val_280	2008-04-09	12
-35	val_35	2008-04-09	12
-427	val_427	2008-04-09	12
-277	val_277	2008-04-09	12
-208	val_208	2008-04-09	12
-356	val_356	2008-04-09	12
-399	val_399	2008-04-09	12
-169	val_169	2008-04-09	12
-382	val_382	2008-04-09	12
-498	val_498	2008-04-09	12
-125	val_125	2008-04-09	12
-386	val_386	2008-04-09	12
-437	val_437	2008-04-09	12
-469	val_469	2008-04-09	12
-192	val_192	2008-04-09	12
-286	val_286	2008-04-09	12
-187	val_187	2008-04-09	12
-176	val_176	2008-04-09	12
-54	val_54	2008-04-09	12
-459	val_459	2008-04-09	12
-51	val_51	2008-04-09	12
-138	val_138	2008-04-09	12
-103	val_103	2008-04-09	12
-239	val_239	2008-04-09	12
-213	val_213	2008-04-09	12
-216	val_216	2008-04-09	12
-430	val_430	2008-04-09	12
-278	val_278	2008-04-09	12
-176	val_176	2008-04-09	12
-289	val_289	2008-04-09	12
-221	val_221	2008-04-09	12
-65	val_65	2008-04-09	12
-318	val_318	2008-04-09	12
-332	val_332	2008-04-09	12
-311	val_311	2008-04-09	12
-275	val_275	2008-04-09	12
-137	val_137	2008-04-09	12
-241	val_241	2008-04-09	12
-83	val_83	2008-04-09	12
-333	val_333	2008-04-09	12
-180	val_180	2008-04-09	12
-284	val_284	2008-04-09	12
-12	val_12	2008-04-09	12
-230	val_230	2008-04-09	12
-181	val_181	2008-04-09	12
-67	val_67	2008-04-09	12
-260	val_260	2008-04-09	12
-404	val_404	2008-04-09	12
-384	val_384	2008-04-09	12
-489	val_489	2008-04-09	12
-353	val_353	2008-04-09	12
-373	val_373	2008-04-09	12
-272	val_272	2008-04-09	12
-138	val_138	2008-04-09	12
-217	val_217	2008-04-09	12
-84	val_84	2008-04-09	12
-348	val_348	2008-04-09	12
-466	val_466	2008-04-09	12
-58	val_58	2008-04-09	12
-8	val_8	2008-04-09	12
-411	val_411	2008-04-09	12
-230	val_230	2008-04-09	12
-208	val_208	2008-04-09	12
-348	val_348	2008-04-09	12
-24	val_24	2008-04-09	12
-463	val_463	2008-04-09	12
-431	val_431	2008-04-09	12
-179	val_179	2008-04-09	12
-172	val_172	2008-04-09	12
-42	val_42	2008-04-09	12
-129	val_129	2008-04-09	12
-158	val_158	2008-04-09	12
-119	val_119	2008-04-09	12
-496	val_496	2008-04-09	12
-0	val_0	2008-04-09	12
-322	val_322	2008-04-09	12
-197	val_197	2008-04-09	12
-468	val_468	2008-04-09	12
-393	val_393	2008-04-09	12
-454	val_454	2008-04-09	12
-100	val_100	2008-04-09	12
-298	val_298	2008-04-09	12
-199	val_199	2008-04-09	12
-191	val_191	2008-04-09	12
-418	val_418	2008-04-09	12
-96	val_96	2008-04-09	12
-26	val_26	2008-04-09	12
-165	val_165	2008-04-09	12
-327	val_327	2008-04-09	12
-230	val_230	2008-04-09	12
-205	val_205	2008-04-09	12
-120	val_120	2008-04-09	12
-131	val_131	2008-04-09	12
-51	val_51	2008-04-09	12
-404	val_404	2008-04-09	12
-43	val_43	2008-04-09	12
-436	val_436	2008-04-09	12
-156	val_156	2008-04-09	12
-469	val_469	2008-04-09	12
-468	val_468	2008-04-09	12
-308	val_308	2008-04-09	12
-95	val_95	2008-04-09	12
-196	val_196	2008-04-09	12
-288	val_288	2008-04-09	12
-481	val_481	2008-04-09	12
-457	val_457	2008-04-09	12
-98	val_98	2008-04-09	12
-282	val_282	2008-04-09	12
-197	val_197	2008-04-09	12
-187	val_187	2008-04-09	12
-318	val_318	2008-04-09	12
-318	val_318	2008-04-09	12
-409	val_409	2008-04-09	12
-470	val_470	2008-04-09	12
-137	val_137	2008-04-09	12
-369	val_369	2008-04-09	12
-316	val_316	2008-04-09	12
-169	val_169	2008-04-09	12
-413	val_413	2008-04-09	12
-85	val_85	2008-04-09	12
-77	val_77	2008-04-09	12
-0	val_0	2008-04-09	12
-490	val_490	2008-04-09	12
-87	val_87	2008-04-09	12
-364	val_364	2008-04-09	12
-179	val_179	2008-04-09	12
-118	val_118	2008-04-09	12
-134	val_134	2008-04-09	12
-395	val_395	2008-04-09	12
-282	val_282	2008-04-09	12
-138	val_138	2008-04-09	12
-238	val_238	2008-04-09	12
-419	val_419	2008-04-09	12
-15	val_15	2008-04-09	12
-118	val_118	2008-04-09	12
-72	val_72	2008-04-09	12
-90	val_90	2008-04-09	12
-307	val_307	2008-04-09	12
-19	val_19	2008-04-09	12
-435	val_435	2008-04-09	12
-10	val_10	2008-04-09	12
-277	val_277	2008-04-09	12
-273	val_273	2008-04-09	12
-306	val_306	2008-04-09	12
-224	val_224	2008-04-09	12
-309	val_309	2008-04-09	12
-389	val_389	2008-04-09	12
-327	val_327	2008-04-09	12
-242	val_242	2008-04-09	12
-369	val_369	2008-04-09	12
-392	val_392	2008-04-09	12
-272	val_272	2008-04-09	12
-331	val_331	2008-04-09	12
-401	val_401	2008-04-09	12
-242	val_242	2008-04-09	12
-452	val_452	2008-04-09	12
-177	val_177	2008-04-09	12
-226	val_226	2008-04-09	12
-5	val_5	2008-04-09	12
-497	val_497	2008-04-09	12
-402	val_402	2008-04-09	12
-396	val_396	2008-04-09	12
-317	val_317	2008-04-09	12
-395	val_395	2008-04-09	12
-58	val_58	2008-04-09	12
-35	val_35	2008-04-09	12
-336	val_336	2008-04-09	12
-95	val_95	2008-04-09	12
-11	val_11	2008-04-09	12
-168	val_168	2008-04-09	12
-34	val_34	2008-04-09	12
-229	val_229	2008-04-09	12
-233	val_233	2008-04-09	12
-143	val_143	2008-04-09	12
-472	val_472	2008-04-09	12
-322	val_322	2008-04-09	12
-498	val_498	2008-04-09	12
-160	val_160	2008-04-09	12
-195	val_195	2008-04-09	12
-42	val_42	2008-04-09	12
-321	val_321	2008-04-09	12
-430	val_430	2008-04-09	12
-119	val_119	2008-04-09	12
-489	val_489	2008-04-09	12
-458	val_458	2008-04-09	12
-78	val_78	2008-04-09	12
-76	val_76	2008-04-09	12
-41	val_41	2008-04-09	12
-223	val_223	2008-04-09	12
-492	val_492	2008-04-09	12
-149	val_149	2008-04-09	12
-449	val_449	2008-04-09	12
-218	val_218	2008-04-09	12
-228	val_228	2008-04-09	12
-138	val_138	2008-04-09	12
-453	val_453	2008-04-09	12
-30	val_30	2008-04-09	12
-209	val_209	2008-04-09	12
-64	val_64	2008-04-09	12
-468	val_468	2008-04-09	12
-76	val_76	2008-04-09	12
-74	val_74	2008-04-09	12
-342	val_342	2008-04-09	12
-69	val_69	2008-04-09	12
-230	val_230	2008-04-09	12
-33	val_33	2008-04-09	12
-368	val_368	2008-04-09	12
-103	val_103	2008-04-09	12
-296	val_296	2008-04-09	12
-113	val_113	2008-04-09	12
-216	val_216	2008-04-09	12
-367	val_367	2008-04-09	12
-344	val_344	2008-04-09	12
-167	val_167	2008-04-09	12
-274	val_274	2008-04-09	12
-219	val_219	2008-04-09	12
-239	val_239	2008-04-09	12
-485	val_485	2008-04-09	12
-116	val_116	2008-04-09	12
-223	val_223	2008-04-09	12
-256	val_256	2008-04-09	12
-263	val_263	2008-04-09	12
-70	val_70	2008-04-09	12
-487	val_487	2008-04-09	12
-480	val_480	2008-04-09	12
-401	val_401	2008-04-09	12
-288	val_288	2008-04-09	12
-191	val_191	2008-04-09	12
-5	val_5	2008-04-09	12
-244	val_244	2008-04-09	12
-438	val_438	2008-04-09	12
-128	val_128	2008-04-09	12
-467	val_467	2008-04-09	12
-432	val_432	2008-04-09	12
-202	val_202	2008-04-09	12
-316	val_316	2008-04-09	12
-229	val_229	2008-04-09	12
-469	val_469	2008-04-09	12
-463	val_463	2008-04-09	12
-280	val_280	2008-04-09	12
-2	val_2	2008-04-09	12
-35	val_35	2008-04-09	12
-283	val_283	2008-04-09	12
-331	val_331	2008-04-09	12
-235	val_235	2008-04-09	12
-80	val_80	2008-04-09	12
-44	val_44	2008-04-09	12
-193	val_193	2008-04-09	12
-321	val_321	2008-04-09	12
-335	val_335	2008-04-09	12
-104	val_104	2008-04-09	12
-466	val_466	2008-04-09	12
-366	val_366	2008-04-09	12
-175	val_175	2008-04-09	12
-403	val_403	2008-04-09	12
-483	val_483	2008-04-09	12
-53	val_53	2008-04-09	12
-105	val_105	2008-04-09	12
-257	val_257	2008-04-09	12
-406	val_406	2008-04-09	12
-409	val_409	2008-04-09	12
-190	val_190	2008-04-09	12
-406	val_406	2008-04-09	12
-401	val_401	2008-04-09	12
-114	val_114	2008-04-09	12
-258	val_258	2008-04-09	12
-90	val_90	2008-04-09	12
-203	val_203	2008-04-09	12
-262	val_262	2008-04-09	12
-348	val_348	2008-04-09	12
-424	val_424	2008-04-09	12
-12	val_12	2008-04-09	12
-396	val_396	2008-04-09	12
-201	val_201	2008-04-09	12
-217	val_217	2008-04-09	12
-164	val_164	2008-04-09	12
-431	val_431	2008-04-09	12
-454	val_454	2008-04-09	12
-478	val_478	2008-04-09	12
-298	val_298	2008-04-09	12
-125	val_125	2008-04-09	12
-431	val_431	2008-04-09	12
-164	val_164	2008-04-09	12
-424	val_424	2008-04-09	12
-187	val_187	2008-04-09	12
-382	val_382	2008-04-09	12
-5	val_5	2008-04-09	12
-70	val_70	2008-04-09	12
-397	val_397	2008-04-09	12
-480	val_480	2008-04-09	12
-291	val_291	2008-04-09	12
-24	val_24	2008-04-09	12
-351	val_351	2008-04-09	12
-255	val_255	2008-04-09	12
-104	val_104	2008-04-09	12
-70	val_70	2008-04-09	12
-163	val_163	2008-04-09	12
-438	val_438	2008-04-09	12
-119	val_119	2008-04-09	12
-414	val_414	2008-04-09	12
-200	val_200	2008-04-09	12
-491	val_491	2008-04-09	12
-237	val_237	2008-04-09	12
-439	val_439	2008-04-09	12
-360	val_360	2008-04-09	12
-248	val_248	2008-04-09	12
-479	val_479	2008-04-09	12
-305	val_305	2008-04-09	12
-417	val_417	2008-04-09	12
-199	val_199	2008-04-09	12
-444	val_444	2008-04-09	12
-120	val_120	2008-04-09	12
-429	val_429	2008-04-09	12
-169	val_169	2008-04-09	12
-443	val_443	2008-04-09	12
-323	val_323	2008-04-09	12
-325	val_325	2008-04-09	12
-277	val_277	2008-04-09	12
-230	val_230	2008-04-09	12
-478	val_478	2008-04-09	12
-178	val_178	2008-04-09	12
-468	val_468	2008-04-09	12
-310	val_310	2008-04-09	12
-317	val_317	2008-04-09	12
-333	val_333	2008-04-09	12
-493	val_493	2008-04-09	12
-460	val_460	2008-04-09	12
-207	val_207	2008-04-09	12
-249	val_249	2008-04-09	12
-265	val_265	2008-04-09	12
-480	val_480	2008-04-09	12
-83	val_83	2008-04-09	12
-136	val_136	2008-04-09	12
-353	val_353	2008-04-09	12
-172	val_172	2008-04-09	12
-214	val_214	2008-04-09	12
-462	val_462	2008-04-09	12
-233	val_233	2008-04-09	12
-406	val_406	2008-04-09	12
-133	val_133	2008-04-09	12
-175	val_175	2008-04-09	12
-189	val_189	2008-04-09	12
-454	val_454	2008-04-09	12
-375	val_375	2008-04-09	12
-401	val_401	2008-04-09	12
-421	val_421	2008-04-09	12
-407	val_407	2008-04-09	12
-384	val_384	2008-04-09	12
-256	val_256	2008-04-09	12
-26	val_26	2008-04-09	12
-134	val_134	2008-04-09	12
-67	val_67	2008-04-09	12
-384	val_384	2008-04-09	12
-379	val_379	2008-04-09	12
-18	val_18	2008-04-09	12
-462	val_462	2008-04-09	12
-492	val_492	2008-04-09	12
-100	val_100	2008-04-09	12
-298	val_298	2008-04-09	12
-9	val_9	2008-04-09	12
-341	val_341	2008-04-09	12
-498	val_498	2008-04-09	12
-146	val_146	2008-04-09	12
-458	val_458	2008-04-09	12
-362	val_362	2008-04-09	12
-186	val_186	2008-04-09	12
-285	val_285	2008-04-09	12
-348	val_348	2008-04-09	12
-167	val_167	2008-04-09	12
-18	val_18	2008-04-09	12
-273	val_273	2008-04-09	12
-183	val_183	2008-04-09	12
-281	val_281	2008-04-09	12
-344	val_344	2008-04-09	12
-97	val_97	2008-04-09	12
-469	val_469	2008-04-09	12
-315	val_315	2008-04-09	12
-84	val_84	2008-04-09	12
-28	val_28	2008-04-09	12
-37	val_37	2008-04-09	12
-448	val_448	2008-04-09	12
-152	val_152	2008-04-09	12
-348	val_348	2008-04-09	12
-307	val_307	2008-04-09	12
-194	val_194	2008-04-09	12
-414	val_414	2008-04-09	12
-477	val_477	2008-04-09	12
-222	val_222	2008-04-09	12
-126	val_126	2008-04-09	12
-90	val_90	2008-04-09	12
-169	val_169	2008-04-09	12
-403	val_403	2008-04-09	12
-400	val_400	2008-04-09	12
-200	val_200	2008-04-09	12
-97	val_97	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part4-10-297cf42ec45b5aa78b80d9c35a79555a b/sql/hive/src/test/resources/golden/load_dyn_part4-10-297cf42ec45b5aa78b80d9c35a79555a
new file mode 100644
index 0000000000000..699bddc10b2ef
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part4-10-297cf42ec45b5aa78b80d9c35a79555a
@@ -0,0 +1,1500 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+0	val_0	2008-04-08	existing_value
+0	val_0	2008-04-08	existing_value
+0	val_0	2008-04-08	existing_value
+2	val_2	2008-04-08	existing_value
+4	val_4	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+8	val_8	2008-04-08	existing_value
+9	val_9	2008-04-08	existing_value
+10	val_10	2008-04-08	existing_value
+11	val_11	2008-04-08	existing_value
+12	val_12	2008-04-08	existing_value
+12	val_12	2008-04-08	existing_value
+15	val_15	2008-04-08	existing_value
+15	val_15	2008-04-08	existing_value
+17	val_17	2008-04-08	existing_value
+18	val_18	2008-04-08	existing_value
+18	val_18	2008-04-08	existing_value
+19	val_19	2008-04-08	existing_value
+20	val_20	2008-04-08	existing_value
+24	val_24	2008-04-08	existing_value
+24	val_24	2008-04-08	existing_value
+26	val_26	2008-04-08	existing_value
+26	val_26	2008-04-08	existing_value
+27	val_27	2008-04-08	existing_value
+28	val_28	2008-04-08	existing_value
+30	val_30	2008-04-08	existing_value
+33	val_33	2008-04-08	existing_value
+34	val_34	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+37	val_37	2008-04-08	existing_value
+37	val_37	2008-04-08	existing_value
+41	val_41	2008-04-08	existing_value
+42	val_42	2008-04-08	existing_value
+42	val_42	2008-04-08	existing_value
+43	val_43	2008-04-08	existing_value
+44	val_44	2008-04-08	existing_value
+47	val_47	2008-04-08	existing_value
+51	val_51	2008-04-08	existing_value
+51	val_51	2008-04-08	existing_value
+53	val_53	2008-04-08	existing_value
+54	val_54	2008-04-08	existing_value
+57	val_57	2008-04-08	existing_value
+58	val_58	2008-04-08	existing_value
+58	val_58	2008-04-08	existing_value
+64	val_64	2008-04-08	existing_value
+65	val_65	2008-04-08	existing_value
+66	val_66	2008-04-08	existing_value
+67	val_67	2008-04-08	existing_value
+67	val_67	2008-04-08	existing_value
+69	val_69	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+72	val_72	2008-04-08	existing_value
+72	val_72	2008-04-08	existing_value
+74	val_74	2008-04-08	existing_value
+76	val_76	2008-04-08	existing_value
+76	val_76	2008-04-08	existing_value
+77	val_77	2008-04-08	existing_value
+78	val_78	2008-04-08	existing_value
+80	val_80	2008-04-08	existing_value
+82	val_82	2008-04-08	existing_value
+83	val_83	2008-04-08	existing_value
+83	val_83	2008-04-08	existing_value
+84	val_84	2008-04-08	existing_value
+84	val_84	2008-04-08	existing_value
+85	val_85	2008-04-08	existing_value
+86	val_86	2008-04-08	existing_value
+87	val_87	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+92	val_92	2008-04-08	existing_value
+95	val_95	2008-04-08	existing_value
+95	val_95	2008-04-08	existing_value
+96	val_96	2008-04-08	existing_value
+97	val_97	2008-04-08	existing_value
+97	val_97	2008-04-08	existing_value
+98	val_98	2008-04-08	existing_value
+98	val_98	2008-04-08	existing_value
+100	val_100	2008-04-08	existing_value
+100	val_100	2008-04-08	existing_value
+103	val_103	2008-04-08	existing_value
+103	val_103	2008-04-08	existing_value
+104	val_104	2008-04-08	existing_value
+104	val_104	2008-04-08	existing_value
+105	val_105	2008-04-08	existing_value
+111	val_111	2008-04-08	existing_value
+113	val_113	2008-04-08	existing_value
+113	val_113	2008-04-08	existing_value
+114	val_114	2008-04-08	existing_value
+116	val_116	2008-04-08	existing_value
+118	val_118	2008-04-08	existing_value
+118	val_118	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+120	val_120	2008-04-08	existing_value
+120	val_120	2008-04-08	existing_value
+125	val_125	2008-04-08	existing_value
+125	val_125	2008-04-08	existing_value
+126	val_126	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+129	val_129	2008-04-08	existing_value
+129	val_129	2008-04-08	existing_value
+131	val_131	2008-04-08	existing_value
+133	val_133	2008-04-08	existing_value
+134	val_134	2008-04-08	existing_value
+134	val_134	2008-04-08	existing_value
+136	val_136	2008-04-08	existing_value
+137	val_137	2008-04-08	existing_value
+137	val_137	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+143	val_143	2008-04-08	existing_value
+145	val_145	2008-04-08	existing_value
+146	val_146	2008-04-08	existing_value
+146	val_146	2008-04-08	existing_value
+149	val_149	2008-04-08	existing_value
+149	val_149	2008-04-08	existing_value
+150	val_150	2008-04-08	existing_value
+152	val_152	2008-04-08	existing_value
+152	val_152	2008-04-08	existing_value
+153	val_153	2008-04-08	existing_value
+155	val_155	2008-04-08	existing_value
+156	val_156	2008-04-08	existing_value
+157	val_157	2008-04-08	existing_value
+158	val_158	2008-04-08	existing_value
+160	val_160	2008-04-08	existing_value
+162	val_162	2008-04-08	existing_value
+163	val_163	2008-04-08	existing_value
+164	val_164	2008-04-08	existing_value
+164	val_164	2008-04-08	existing_value
+165	val_165	2008-04-08	existing_value
+165	val_165	2008-04-08	existing_value
+166	val_166	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+168	val_168	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+170	val_170	2008-04-08	existing_value
+172	val_172	2008-04-08	existing_value
+172	val_172	2008-04-08	existing_value
+174	val_174	2008-04-08	existing_value
+174	val_174	2008-04-08	existing_value
+175	val_175	2008-04-08	existing_value
+175	val_175	2008-04-08	existing_value
+176	val_176	2008-04-08	existing_value
+176	val_176	2008-04-08	existing_value
+177	val_177	2008-04-08	existing_value
+178	val_178	2008-04-08	existing_value
+179	val_179	2008-04-08	existing_value
+179	val_179	2008-04-08	existing_value
+180	val_180	2008-04-08	existing_value
+181	val_181	2008-04-08	existing_value
+183	val_183	2008-04-08	existing_value
+186	val_186	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+189	val_189	2008-04-08	existing_value
+190	val_190	2008-04-08	existing_value
+191	val_191	2008-04-08	existing_value
+191	val_191	2008-04-08	existing_value
+192	val_192	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+194	val_194	2008-04-08	existing_value
+195	val_195	2008-04-08	existing_value
+195	val_195	2008-04-08	existing_value
+196	val_196	2008-04-08	existing_value
+197	val_197	2008-04-08	existing_value
+197	val_197	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+200	val_200	2008-04-08	existing_value
+200	val_200	2008-04-08	existing_value
+201	val_201	2008-04-08	existing_value
+202	val_202	2008-04-08	existing_value
+203	val_203	2008-04-08	existing_value
+203	val_203	2008-04-08	existing_value
+205	val_205	2008-04-08	existing_value
+205	val_205	2008-04-08	existing_value
+207	val_207	2008-04-08	existing_value
+207	val_207	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+209	val_209	2008-04-08	existing_value
+209	val_209	2008-04-08	existing_value
+213	val_213	2008-04-08	existing_value
+213	val_213	2008-04-08	existing_value
+214	val_214	2008-04-08	existing_value
+216	val_216	2008-04-08	existing_value
+216	val_216	2008-04-08	existing_value
+217	val_217	2008-04-08	existing_value
+217	val_217	2008-04-08	existing_value
+218	val_218	2008-04-08	existing_value
+219	val_219	2008-04-08	existing_value
+219	val_219	2008-04-08	existing_value
+221	val_221	2008-04-08	existing_value
+221	val_221	2008-04-08	existing_value
+222	val_222	2008-04-08	existing_value
+223	val_223	2008-04-08	existing_value
+223	val_223	2008-04-08	existing_value
+224	val_224	2008-04-08	existing_value
+224	val_224	2008-04-08	existing_value
+226	val_226	2008-04-08	existing_value
+228	val_228	2008-04-08	existing_value
+229	val_229	2008-04-08	existing_value
+229	val_229	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+233	val_233	2008-04-08	existing_value
+233	val_233	2008-04-08	existing_value
+235	val_235	2008-04-08	existing_value
+237	val_237	2008-04-08	existing_value
+237	val_237	2008-04-08	existing_value
+238	val_238	2008-04-08	existing_value
+238	val_238	2008-04-08	existing_value
+239	val_239	2008-04-08	existing_value
+239	val_239	2008-04-08	existing_value
+241	val_241	2008-04-08	existing_value
+242	val_242	2008-04-08	existing_value
+242	val_242	2008-04-08	existing_value
+244	val_244	2008-04-08	existing_value
+247	val_247	2008-04-08	existing_value
+248	val_248	2008-04-08	existing_value
+249	val_249	2008-04-08	existing_value
+252	val_252	2008-04-08	existing_value
+255	val_255	2008-04-08	existing_value
+255	val_255	2008-04-08	existing_value
+256	val_256	2008-04-08	existing_value
+256	val_256	2008-04-08	existing_value
+257	val_257	2008-04-08	existing_value
+258	val_258	2008-04-08	existing_value
+260	val_260	2008-04-08	existing_value
+262	val_262	2008-04-08	existing_value
+263	val_263	2008-04-08	existing_value
+265	val_265	2008-04-08	existing_value
+265	val_265	2008-04-08	existing_value
+266	val_266	2008-04-08	existing_value
+272	val_272	2008-04-08	existing_value
+272	val_272	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+274	val_274	2008-04-08	existing_value
+275	val_275	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+278	val_278	2008-04-08	existing_value
+278	val_278	2008-04-08	existing_value
+280	val_280	2008-04-08	existing_value
+280	val_280	2008-04-08	existing_value
+281	val_281	2008-04-08	existing_value
+281	val_281	2008-04-08	existing_value
+282	val_282	2008-04-08	existing_value
+282	val_282	2008-04-08	existing_value
+283	val_283	2008-04-08	existing_value
+284	val_284	2008-04-08	existing_value
+285	val_285	2008-04-08	existing_value
+286	val_286	2008-04-08	existing_value
+287	val_287	2008-04-08	existing_value
+288	val_288	2008-04-08	existing_value
+288	val_288	2008-04-08	existing_value
+289	val_289	2008-04-08	existing_value
+291	val_291	2008-04-08	existing_value
+292	val_292	2008-04-08	existing_value
+296	val_296	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+302	val_302	2008-04-08	existing_value
+305	val_305	2008-04-08	existing_value
+306	val_306	2008-04-08	existing_value
+307	val_307	2008-04-08	existing_value
+307	val_307	2008-04-08	existing_value
+308	val_308	2008-04-08	existing_value
+309	val_309	2008-04-08	existing_value
+309	val_309	2008-04-08	existing_value
+310	val_310	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+315	val_315	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+317	val_317	2008-04-08	existing_value
+317	val_317	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+321	val_321	2008-04-08	existing_value
+321	val_321	2008-04-08	existing_value
+322	val_322	2008-04-08	existing_value
+322	val_322	2008-04-08	existing_value
+323	val_323	2008-04-08	existing_value
+325	val_325	2008-04-08	existing_value
+325	val_325	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+331	val_331	2008-04-08	existing_value
+331	val_331	2008-04-08	existing_value
+332	val_332	2008-04-08	existing_value
+333	val_333	2008-04-08	existing_value
+333	val_333	2008-04-08	existing_value
+335	val_335	2008-04-08	existing_value
+336	val_336	2008-04-08	existing_value
+338	val_338	2008-04-08	existing_value
+339	val_339	2008-04-08	existing_value
+341	val_341	2008-04-08	existing_value
+342	val_342	2008-04-08	existing_value
+342	val_342	2008-04-08	existing_value
+344	val_344	2008-04-08	existing_value
+344	val_344	2008-04-08	existing_value
+345	val_345	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+351	val_351	2008-04-08	existing_value
+353	val_353	2008-04-08	existing_value
+353	val_353	2008-04-08	existing_value
+356	val_356	2008-04-08	existing_value
+360	val_360	2008-04-08	existing_value
+362	val_362	2008-04-08	existing_value
+364	val_364	2008-04-08	existing_value
+365	val_365	2008-04-08	existing_value
+366	val_366	2008-04-08	existing_value
+367	val_367	2008-04-08	existing_value
+367	val_367	2008-04-08	existing_value
+368	val_368	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+373	val_373	2008-04-08	existing_value
+374	val_374	2008-04-08	existing_value
+375	val_375	2008-04-08	existing_value
+377	val_377	2008-04-08	existing_value
+378	val_378	2008-04-08	existing_value
+379	val_379	2008-04-08	existing_value
+382	val_382	2008-04-08	existing_value
+382	val_382	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+386	val_386	2008-04-08	existing_value
+389	val_389	2008-04-08	existing_value
+392	val_392	2008-04-08	existing_value
+393	val_393	2008-04-08	existing_value
+394	val_394	2008-04-08	existing_value
+395	val_395	2008-04-08	existing_value
+395	val_395	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+397	val_397	2008-04-08	existing_value
+397	val_397	2008-04-08	existing_value
+399	val_399	2008-04-08	existing_value
+399	val_399	2008-04-08	existing_value
+400	val_400	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+402	val_402	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+404	val_404	2008-04-08	existing_value
+404	val_404	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+407	val_407	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+411	val_411	2008-04-08	existing_value
+413	val_413	2008-04-08	existing_value
+413	val_413	2008-04-08	existing_value
+414	val_414	2008-04-08	existing_value
+414	val_414	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+418	val_418	2008-04-08	existing_value
+419	val_419	2008-04-08	existing_value
+421	val_421	2008-04-08	existing_value
+424	val_424	2008-04-08	existing_value
+424	val_424	2008-04-08	existing_value
+427	val_427	2008-04-08	existing_value
+429	val_429	2008-04-08	existing_value
+429	val_429	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+432	val_432	2008-04-08	existing_value
+435	val_435	2008-04-08	existing_value
+436	val_436	2008-04-08	existing_value
+437	val_437	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+439	val_439	2008-04-08	existing_value
+439	val_439	2008-04-08	existing_value
+443	val_443	2008-04-08	existing_value
+444	val_444	2008-04-08	existing_value
+446	val_446	2008-04-08	existing_value
+448	val_448	2008-04-08	existing_value
+449	val_449	2008-04-08	existing_value
+452	val_452	2008-04-08	existing_value
+453	val_453	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+455	val_455	2008-04-08	existing_value
+457	val_457	2008-04-08	existing_value
+458	val_458	2008-04-08	existing_value
+458	val_458	2008-04-08	existing_value
+459	val_459	2008-04-08	existing_value
+459	val_459	2008-04-08	existing_value
+460	val_460	2008-04-08	existing_value
+462	val_462	2008-04-08	existing_value
+462	val_462	2008-04-08	existing_value
+463	val_463	2008-04-08	existing_value
+463	val_463	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+467	val_467	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+470	val_470	2008-04-08	existing_value
+472	val_472	2008-04-08	existing_value
+475	val_475	2008-04-08	existing_value
+477	val_477	2008-04-08	existing_value
+478	val_478	2008-04-08	existing_value
+478	val_478	2008-04-08	existing_value
+479	val_479	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+481	val_481	2008-04-08	existing_value
+482	val_482	2008-04-08	existing_value
+483	val_483	2008-04-08	existing_value
+484	val_484	2008-04-08	existing_value
+485	val_485	2008-04-08	existing_value
+487	val_487	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+490	val_490	2008-04-08	existing_value
+491	val_491	2008-04-08	existing_value
+492	val_492	2008-04-08	existing_value
+492	val_492	2008-04-08	existing_value
+493	val_493	2008-04-08	existing_value
+494	val_494	2008-04-08	existing_value
+495	val_495	2008-04-08	existing_value
+496	val_496	2008-04-08	existing_value
+497	val_497	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part4-10-7f0c1e54518d4d1d345577a100e670e8 b/sql/hive/src/test/resources/golden/load_dyn_part4-10-7f0c1e54518d4d1d345577a100e670e8
deleted file mode 100644
index 3bbd322e374ff..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part4-10-7f0c1e54518d4d1d345577a100e670e8
+++ /dev/null
@@ -1,1500 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
-238	val_238	2008-04-08	existing_value
-86	val_86	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-27	val_27	2008-04-08	existing_value
-165	val_165	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-255	val_255	2008-04-08	existing_value
-278	val_278	2008-04-08	existing_value
-98	val_98	2008-04-08	existing_value
-484	val_484	2008-04-08	existing_value
-265	val_265	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-150	val_150	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-224	val_224	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-66	val_66	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-213	val_213	2008-04-08	existing_value
-146	val_146	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-429	val_429	2008-04-08	existing_value
-374	val_374	2008-04-08	existing_value
-152	val_152	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-145	val_145	2008-04-08	existing_value
-495	val_495	2008-04-08	existing_value
-37	val_37	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-281	val_281	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-209	val_209	2008-04-08	existing_value
-15	val_15	2008-04-08	existing_value
-82	val_82	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-166	val_166	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-252	val_252	2008-04-08	existing_value
-292	val_292	2008-04-08	existing_value
-219	val_219	2008-04-08	existing_value
-287	val_287	2008-04-08	existing_value
-153	val_153	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-338	val_338	2008-04-08	existing_value
-446	val_446	2008-04-08	existing_value
-459	val_459	2008-04-08	existing_value
-394	val_394	2008-04-08	existing_value
-237	val_237	2008-04-08	existing_value
-482	val_482	2008-04-08	existing_value
-174	val_174	2008-04-08	existing_value
-413	val_413	2008-04-08	existing_value
-494	val_494	2008-04-08	existing_value
-207	val_207	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-174	val_174	2008-04-08	existing_value
-399	val_399	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-247	val_247	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-162	val_162	2008-04-08	existing_value
-377	val_377	2008-04-08	existing_value
-397	val_397	2008-04-08	existing_value
-309	val_309	2008-04-08	existing_value
-365	val_365	2008-04-08	existing_value
-266	val_266	2008-04-08	existing_value
-439	val_439	2008-04-08	existing_value
-342	val_342	2008-04-08	existing_value
-367	val_367	2008-04-08	existing_value
-325	val_325	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-195	val_195	2008-04-08	existing_value
-475	val_475	2008-04-08	existing_value
-17	val_17	2008-04-08	existing_value
-113	val_113	2008-04-08	existing_value
-155	val_155	2008-04-08	existing_value
-203	val_203	2008-04-08	existing_value
-339	val_339	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-455	val_455	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-57	val_57	2008-04-08	existing_value
-302	val_302	2008-04-08	existing_value
-205	val_205	2008-04-08	existing_value
-149	val_149	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-345	val_345	2008-04-08	existing_value
-129	val_129	2008-04-08	existing_value
-170	val_170	2008-04-08	existing_value
-20	val_20	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-157	val_157	2008-04-08	existing_value
-378	val_378	2008-04-08	existing_value
-221	val_221	2008-04-08	existing_value
-92	val_92	2008-04-08	existing_value
-111	val_111	2008-04-08	existing_value
-47	val_47	2008-04-08	existing_value
-72	val_72	2008-04-08	existing_value
-4	val_4	2008-04-08	existing_value
-280	val_280	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-427	val_427	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-356	val_356	2008-04-08	existing_value
-399	val_399	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-382	val_382	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-125	val_125	2008-04-08	existing_value
-386	val_386	2008-04-08	existing_value
-437	val_437	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-192	val_192	2008-04-08	existing_value
-286	val_286	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-176	val_176	2008-04-08	existing_value
-54	val_54	2008-04-08	existing_value
-459	val_459	2008-04-08	existing_value
-51	val_51	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-103	val_103	2008-04-08	existing_value
-239	val_239	2008-04-08	existing_value
-213	val_213	2008-04-08	existing_value
-216	val_216	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-278	val_278	2008-04-08	existing_value
-176	val_176	2008-04-08	existing_value
-289	val_289	2008-04-08	existing_value
-221	val_221	2008-04-08	existing_value
-65	val_65	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-332	val_332	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-275	val_275	2008-04-08	existing_value
-137	val_137	2008-04-08	existing_value
-241	val_241	2008-04-08	existing_value
-83	val_83	2008-04-08	existing_value
-333	val_333	2008-04-08	existing_value
-180	val_180	2008-04-08	existing_value
-284	val_284	2008-04-08	existing_value
-12	val_12	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-181	val_181	2008-04-08	existing_value
-67	val_67	2008-04-08	existing_value
-260	val_260	2008-04-08	existing_value
-404	val_404	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-353	val_353	2008-04-08	existing_value
-373	val_373	2008-04-08	existing_value
-272	val_272	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-217	val_217	2008-04-08	existing_value
-84	val_84	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-58	val_58	2008-04-08	existing_value
-8	val_8	2008-04-08	existing_value
-411	val_411	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-24	val_24	2008-04-08	existing_value
-463	val_463	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-179	val_179	2008-04-08	existing_value
-172	val_172	2008-04-08	existing_value
-42	val_42	2008-04-08	existing_value
-129	val_129	2008-04-08	existing_value
-158	val_158	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-496	val_496	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-322	val_322	2008-04-08	existing_value
-197	val_197	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-393	val_393	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-100	val_100	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-191	val_191	2008-04-08	existing_value
-418	val_418	2008-04-08	existing_value
-96	val_96	2008-04-08	existing_value
-26	val_26	2008-04-08	existing_value
-165	val_165	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-205	val_205	2008-04-08	existing_value
-120	val_120	2008-04-08	existing_value
-131	val_131	2008-04-08	existing_value
-51	val_51	2008-04-08	existing_value
-404	val_404	2008-04-08	existing_value
-43	val_43	2008-04-08	existing_value
-436	val_436	2008-04-08	existing_value
-156	val_156	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-308	val_308	2008-04-08	existing_value
-95	val_95	2008-04-08	existing_value
-196	val_196	2008-04-08	existing_value
-288	val_288	2008-04-08	existing_value
-481	val_481	2008-04-08	existing_value
-457	val_457	2008-04-08	existing_value
-98	val_98	2008-04-08	existing_value
-282	val_282	2008-04-08	existing_value
-197	val_197	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-470	val_470	2008-04-08	existing_value
-137	val_137	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-413	val_413	2008-04-08	existing_value
-85	val_85	2008-04-08	existing_value
-77	val_77	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-490	val_490	2008-04-08	existing_value
-87	val_87	2008-04-08	existing_value
-364	val_364	2008-04-08	existing_value
-179	val_179	2008-04-08	existing_value
-118	val_118	2008-04-08	existing_value
-134	val_134	2008-04-08	existing_value
-395	val_395	2008-04-08	existing_value
-282	val_282	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-238	val_238	2008-04-08	existing_value
-419	val_419	2008-04-08	existing_value
-15	val_15	2008-04-08	existing_value
-118	val_118	2008-04-08	existing_value
-72	val_72	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-307	val_307	2008-04-08	existing_value
-19	val_19	2008-04-08	existing_value
-435	val_435	2008-04-08	existing_value
-10	val_10	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-306	val_306	2008-04-08	existing_value
-224	val_224	2008-04-08	existing_value
-309	val_309	2008-04-08	existing_value
-389	val_389	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-242	val_242	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-392	val_392	2008-04-08	existing_value
-272	val_272	2008-04-08	existing_value
-331	val_331	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-242	val_242	2008-04-08	existing_value
-452	val_452	2008-04-08	existing_value
-177	val_177	2008-04-08	existing_value
-226	val_226	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-497	val_497	2008-04-08	existing_value
-402	val_402	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-317	val_317	2008-04-08	existing_value
-395	val_395	2008-04-08	existing_value
-58	val_58	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-336	val_336	2008-04-08	existing_value
-95	val_95	2008-04-08	existing_value
-11	val_11	2008-04-08	existing_value
-168	val_168	2008-04-08	existing_value
-34	val_34	2008-04-08	existing_value
-229	val_229	2008-04-08	existing_value
-233	val_233	2008-04-08	existing_value
-143	val_143	2008-04-08	existing_value
-472	val_472	2008-04-08	existing_value
-322	val_322	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-160	val_160	2008-04-08	existing_value
-195	val_195	2008-04-08	existing_value
-42	val_42	2008-04-08	existing_value
-321	val_321	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-458	val_458	2008-04-08	existing_value
-78	val_78	2008-04-08	existing_value
-76	val_76	2008-04-08	existing_value
-41	val_41	2008-04-08	existing_value
-223	val_223	2008-04-08	existing_value
-492	val_492	2008-04-08	existing_value
-149	val_149	2008-04-08	existing_value
-449	val_449	2008-04-08	existing_value
-218	val_218	2008-04-08	existing_value
-228	val_228	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-453	val_453	2008-04-08	existing_value
-30	val_30	2008-04-08	existing_value
-209	val_209	2008-04-08	existing_value
-64	val_64	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-76	val_76	2008-04-08	existing_value
-74	val_74	2008-04-08	existing_value
-342	val_342	2008-04-08	existing_value
-69	val_69	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-33	val_33	2008-04-08	existing_value
-368	val_368	2008-04-08	existing_value
-103	val_103	2008-04-08	existing_value
-296	val_296	2008-04-08	existing_value
-113	val_113	2008-04-08	existing_value
-216	val_216	2008-04-08	existing_value
-367	val_367	2008-04-08	existing_value
-344	val_344	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-274	val_274	2008-04-08	existing_value
-219	val_219	2008-04-08	existing_value
-239	val_239	2008-04-08	existing_value
-485	val_485	2008-04-08	existing_value
-116	val_116	2008-04-08	existing_value
-223	val_223	2008-04-08	existing_value
-256	val_256	2008-04-08	existing_value
-263	val_263	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-487	val_487	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-288	val_288	2008-04-08	existing_value
-191	val_191	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-244	val_244	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-467	val_467	2008-04-08	existing_value
-432	val_432	2008-04-08	existing_value
-202	val_202	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-229	val_229	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-463	val_463	2008-04-08	existing_value
-280	val_280	2008-04-08	existing_value
-2	val_2	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-283	val_283	2008-04-08	existing_value
-331	val_331	2008-04-08	existing_value
-235	val_235	2008-04-08	existing_value
-80	val_80	2008-04-08	existing_value
-44	val_44	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-321	val_321	2008-04-08	existing_value
-335	val_335	2008-04-08	existing_value
-104	val_104	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-366	val_366	2008-04-08	existing_value
-175	val_175	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-483	val_483	2008-04-08	existing_value
-53	val_53	2008-04-08	existing_value
-105	val_105	2008-04-08	existing_value
-257	val_257	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-190	val_190	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-114	val_114	2008-04-08	existing_value
-258	val_258	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-203	val_203	2008-04-08	existing_value
-262	val_262	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-424	val_424	2008-04-08	existing_value
-12	val_12	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-201	val_201	2008-04-08	existing_value
-217	val_217	2008-04-08	existing_value
-164	val_164	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-478	val_478	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-125	val_125	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-164	val_164	2008-04-08	existing_value
-424	val_424	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-382	val_382	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-397	val_397	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-291	val_291	2008-04-08	existing_value
-24	val_24	2008-04-08	existing_value
-351	val_351	2008-04-08	existing_value
-255	val_255	2008-04-08	existing_value
-104	val_104	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-163	val_163	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-414	val_414	2008-04-08	existing_value
-200	val_200	2008-04-08	existing_value
-491	val_491	2008-04-08	existing_value
-237	val_237	2008-04-08	existing_value
-439	val_439	2008-04-08	existing_value
-360	val_360	2008-04-08	existing_value
-248	val_248	2008-04-08	existing_value
-479	val_479	2008-04-08	existing_value
-305	val_305	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-444	val_444	2008-04-08	existing_value
-120	val_120	2008-04-08	existing_value
-429	val_429	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-443	val_443	2008-04-08	existing_value
-323	val_323	2008-04-08	existing_value
-325	val_325	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-478	val_478	2008-04-08	existing_value
-178	val_178	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-310	val_310	2008-04-08	existing_value
-317	val_317	2008-04-08	existing_value
-333	val_333	2008-04-08	existing_value
-493	val_493	2008-04-08	existing_value
-460	val_460	2008-04-08	existing_value
-207	val_207	2008-04-08	existing_value
-249	val_249	2008-04-08	existing_value
-265	val_265	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-83	val_83	2008-04-08	existing_value
-136	val_136	2008-04-08	existing_value
-353	val_353	2008-04-08	existing_value
-172	val_172	2008-04-08	existing_value
-214	val_214	2008-04-08	existing_value
-462	val_462	2008-04-08	existing_value
-233	val_233	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-133	val_133	2008-04-08	existing_value
-175	val_175	2008-04-08	existing_value
-189	val_189	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-375	val_375	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-421	val_421	2008-04-08	existing_value
-407	val_407	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-256	val_256	2008-04-08	existing_value
-26	val_26	2008-04-08	existing_value
-134	val_134	2008-04-08	existing_value
-67	val_67	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-379	val_379	2008-04-08	existing_value
-18	val_18	2008-04-08	existing_value
-462	val_462	2008-04-08	existing_value
-492	val_492	2008-04-08	existing_value
-100	val_100	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-9	val_9	2008-04-08	existing_value
-341	val_341	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-146	val_146	2008-04-08	existing_value
-458	val_458	2008-04-08	existing_value
-362	val_362	2008-04-08	existing_value
-186	val_186	2008-04-08	existing_value
-285	val_285	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-18	val_18	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-183	val_183	2008-04-08	existing_value
-281	val_281	2008-04-08	existing_value
-344	val_344	2008-04-08	existing_value
-97	val_97	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-315	val_315	2008-04-08	existing_value
-84	val_84	2008-04-08	existing_value
-28	val_28	2008-04-08	existing_value
-37	val_37	2008-04-08	existing_value
-448	val_448	2008-04-08	existing_value
-152	val_152	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-307	val_307	2008-04-08	existing_value
-194	val_194	2008-04-08	existing_value
-414	val_414	2008-04-08	existing_value
-477	val_477	2008-04-08	existing_value
-222	val_222	2008-04-08	existing_value
-126	val_126	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-400	val_400	2008-04-08	existing_value
-200	val_200	2008-04-08	existing_value
-97	val_97	2008-04-08	existing_value
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part4-11-1313644c9dad948bfcebd7386c309ab7 b/sql/hive/src/test/resources/golden/load_dyn_part4-11-1313644c9dad948bfcebd7386c309ab7
deleted file mode 100644
index f1801743dd4e1..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part4-11-1313644c9dad948bfcebd7386c309ab7
+++ /dev/null
@@ -1,2500 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
-238	val_238	2008-04-08	existing_value
-86	val_86	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-27	val_27	2008-04-08	existing_value
-165	val_165	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-255	val_255	2008-04-08	existing_value
-278	val_278	2008-04-08	existing_value
-98	val_98	2008-04-08	existing_value
-484	val_484	2008-04-08	existing_value
-265	val_265	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-150	val_150	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-224	val_224	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-66	val_66	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-213	val_213	2008-04-08	existing_value
-146	val_146	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-429	val_429	2008-04-08	existing_value
-374	val_374	2008-04-08	existing_value
-152	val_152	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-145	val_145	2008-04-08	existing_value
-495	val_495	2008-04-08	existing_value
-37	val_37	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-281	val_281	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-209	val_209	2008-04-08	existing_value
-15	val_15	2008-04-08	existing_value
-82	val_82	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-166	val_166	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-252	val_252	2008-04-08	existing_value
-292	val_292	2008-04-08	existing_value
-219	val_219	2008-04-08	existing_value
-287	val_287	2008-04-08	existing_value
-153	val_153	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-338	val_338	2008-04-08	existing_value
-446	val_446	2008-04-08	existing_value
-459	val_459	2008-04-08	existing_value
-394	val_394	2008-04-08	existing_value
-237	val_237	2008-04-08	existing_value
-482	val_482	2008-04-08	existing_value
-174	val_174	2008-04-08	existing_value
-413	val_413	2008-04-08	existing_value
-494	val_494	2008-04-08	existing_value
-207	val_207	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-174	val_174	2008-04-08	existing_value
-399	val_399	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-247	val_247	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-162	val_162	2008-04-08	existing_value
-377	val_377	2008-04-08	existing_value
-397	val_397	2008-04-08	existing_value
-309	val_309	2008-04-08	existing_value
-365	val_365	2008-04-08	existing_value
-266	val_266	2008-04-08	existing_value
-439	val_439	2008-04-08	existing_value
-342	val_342	2008-04-08	existing_value
-367	val_367	2008-04-08	existing_value
-325	val_325	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-195	val_195	2008-04-08	existing_value
-475	val_475	2008-04-08	existing_value
-17	val_17	2008-04-08	existing_value
-113	val_113	2008-04-08	existing_value
-155	val_155	2008-04-08	existing_value
-203	val_203	2008-04-08	existing_value
-339	val_339	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-455	val_455	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-57	val_57	2008-04-08	existing_value
-302	val_302	2008-04-08	existing_value
-205	val_205	2008-04-08	existing_value
-149	val_149	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-345	val_345	2008-04-08	existing_value
-129	val_129	2008-04-08	existing_value
-170	val_170	2008-04-08	existing_value
-20	val_20	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-157	val_157	2008-04-08	existing_value
-378	val_378	2008-04-08	existing_value
-221	val_221	2008-04-08	existing_value
-92	val_92	2008-04-08	existing_value
-111	val_111	2008-04-08	existing_value
-47	val_47	2008-04-08	existing_value
-72	val_72	2008-04-08	existing_value
-4	val_4	2008-04-08	existing_value
-280	val_280	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-427	val_427	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-356	val_356	2008-04-08	existing_value
-399	val_399	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-382	val_382	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-125	val_125	2008-04-08	existing_value
-386	val_386	2008-04-08	existing_value
-437	val_437	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-192	val_192	2008-04-08	existing_value
-286	val_286	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-176	val_176	2008-04-08	existing_value
-54	val_54	2008-04-08	existing_value
-459	val_459	2008-04-08	existing_value
-51	val_51	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-103	val_103	2008-04-08	existing_value
-239	val_239	2008-04-08	existing_value
-213	val_213	2008-04-08	existing_value
-216	val_216	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-278	val_278	2008-04-08	existing_value
-176	val_176	2008-04-08	existing_value
-289	val_289	2008-04-08	existing_value
-221	val_221	2008-04-08	existing_value
-65	val_65	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-332	val_332	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-275	val_275	2008-04-08	existing_value
-137	val_137	2008-04-08	existing_value
-241	val_241	2008-04-08	existing_value
-83	val_83	2008-04-08	existing_value
-333	val_333	2008-04-08	existing_value
-180	val_180	2008-04-08	existing_value
-284	val_284	2008-04-08	existing_value
-12	val_12	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-181	val_181	2008-04-08	existing_value
-67	val_67	2008-04-08	existing_value
-260	val_260	2008-04-08	existing_value
-404	val_404	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-353	val_353	2008-04-08	existing_value
-373	val_373	2008-04-08	existing_value
-272	val_272	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-217	val_217	2008-04-08	existing_value
-84	val_84	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-58	val_58	2008-04-08	existing_value
-8	val_8	2008-04-08	existing_value
-411	val_411	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-24	val_24	2008-04-08	existing_value
-463	val_463	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-179	val_179	2008-04-08	existing_value
-172	val_172	2008-04-08	existing_value
-42	val_42	2008-04-08	existing_value
-129	val_129	2008-04-08	existing_value
-158	val_158	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-496	val_496	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-322	val_322	2008-04-08	existing_value
-197	val_197	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-393	val_393	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-100	val_100	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-191	val_191	2008-04-08	existing_value
-418	val_418	2008-04-08	existing_value
-96	val_96	2008-04-08	existing_value
-26	val_26	2008-04-08	existing_value
-165	val_165	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-205	val_205	2008-04-08	existing_value
-120	val_120	2008-04-08	existing_value
-131	val_131	2008-04-08	existing_value
-51	val_51	2008-04-08	existing_value
-404	val_404	2008-04-08	existing_value
-43	val_43	2008-04-08	existing_value
-436	val_436	2008-04-08	existing_value
-156	val_156	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-308	val_308	2008-04-08	existing_value
-95	val_95	2008-04-08	existing_value
-196	val_196	2008-04-08	existing_value
-288	val_288	2008-04-08	existing_value
-481	val_481	2008-04-08	existing_value
-457	val_457	2008-04-08	existing_value
-98	val_98	2008-04-08	existing_value
-282	val_282	2008-04-08	existing_value
-197	val_197	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-470	val_470	2008-04-08	existing_value
-137	val_137	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-413	val_413	2008-04-08	existing_value
-85	val_85	2008-04-08	existing_value
-77	val_77	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-490	val_490	2008-04-08	existing_value
-87	val_87	2008-04-08	existing_value
-364	val_364	2008-04-08	existing_value
-179	val_179	2008-04-08	existing_value
-118	val_118	2008-04-08	existing_value
-134	val_134	2008-04-08	existing_value
-395	val_395	2008-04-08	existing_value
-282	val_282	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-238	val_238	2008-04-08	existing_value
-419	val_419	2008-04-08	existing_value
-15	val_15	2008-04-08	existing_value
-118	val_118	2008-04-08	existing_value
-72	val_72	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-307	val_307	2008-04-08	existing_value
-19	val_19	2008-04-08	existing_value
-435	val_435	2008-04-08	existing_value
-10	val_10	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-306	val_306	2008-04-08	existing_value
-224	val_224	2008-04-08	existing_value
-309	val_309	2008-04-08	existing_value
-389	val_389	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-242	val_242	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-392	val_392	2008-04-08	existing_value
-272	val_272	2008-04-08	existing_value
-331	val_331	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-242	val_242	2008-04-08	existing_value
-452	val_452	2008-04-08	existing_value
-177	val_177	2008-04-08	existing_value
-226	val_226	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-497	val_497	2008-04-08	existing_value
-402	val_402	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-317	val_317	2008-04-08	existing_value
-395	val_395	2008-04-08	existing_value
-58	val_58	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-336	val_336	2008-04-08	existing_value
-95	val_95	2008-04-08	existing_value
-11	val_11	2008-04-08	existing_value
-168	val_168	2008-04-08	existing_value
-34	val_34	2008-04-08	existing_value
-229	val_229	2008-04-08	existing_value
-233	val_233	2008-04-08	existing_value
-143	val_143	2008-04-08	existing_value
-472	val_472	2008-04-08	existing_value
-322	val_322	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-160	val_160	2008-04-08	existing_value
-195	val_195	2008-04-08	existing_value
-42	val_42	2008-04-08	existing_value
-321	val_321	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-458	val_458	2008-04-08	existing_value
-78	val_78	2008-04-08	existing_value
-76	val_76	2008-04-08	existing_value
-41	val_41	2008-04-08	existing_value
-223	val_223	2008-04-08	existing_value
-492	val_492	2008-04-08	existing_value
-149	val_149	2008-04-08	existing_value
-449	val_449	2008-04-08	existing_value
-218	val_218	2008-04-08	existing_value
-228	val_228	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-453	val_453	2008-04-08	existing_value
-30	val_30	2008-04-08	existing_value
-209	val_209	2008-04-08	existing_value
-64	val_64	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-76	val_76	2008-04-08	existing_value
-74	val_74	2008-04-08	existing_value
-342	val_342	2008-04-08	existing_value
-69	val_69	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-33	val_33	2008-04-08	existing_value
-368	val_368	2008-04-08	existing_value
-103	val_103	2008-04-08	existing_value
-296	val_296	2008-04-08	existing_value
-113	val_113	2008-04-08	existing_value
-216	val_216	2008-04-08	existing_value
-367	val_367	2008-04-08	existing_value
-344	val_344	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-274	val_274	2008-04-08	existing_value
-219	val_219	2008-04-08	existing_value
-239	val_239	2008-04-08	existing_value
-485	val_485	2008-04-08	existing_value
-116	val_116	2008-04-08	existing_value
-223	val_223	2008-04-08	existing_value
-256	val_256	2008-04-08	existing_value
-263	val_263	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-487	val_487	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-288	val_288	2008-04-08	existing_value
-191	val_191	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-244	val_244	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-467	val_467	2008-04-08	existing_value
-432	val_432	2008-04-08	existing_value
-202	val_202	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-229	val_229	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-463	val_463	2008-04-08	existing_value
-280	val_280	2008-04-08	existing_value
-2	val_2	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-283	val_283	2008-04-08	existing_value
-331	val_331	2008-04-08	existing_value
-235	val_235	2008-04-08	existing_value
-80	val_80	2008-04-08	existing_value
-44	val_44	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-321	val_321	2008-04-08	existing_value
-335	val_335	2008-04-08	existing_value
-104	val_104	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-366	val_366	2008-04-08	existing_value
-175	val_175	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-483	val_483	2008-04-08	existing_value
-53	val_53	2008-04-08	existing_value
-105	val_105	2008-04-08	existing_value
-257	val_257	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-190	val_190	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-114	val_114	2008-04-08	existing_value
-258	val_258	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-203	val_203	2008-04-08	existing_value
-262	val_262	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-424	val_424	2008-04-08	existing_value
-12	val_12	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-201	val_201	2008-04-08	existing_value
-217	val_217	2008-04-08	existing_value
-164	val_164	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-478	val_478	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-125	val_125	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-164	val_164	2008-04-08	existing_value
-424	val_424	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-382	val_382	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-397	val_397	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-291	val_291	2008-04-08	existing_value
-24	val_24	2008-04-08	existing_value
-351	val_351	2008-04-08	existing_value
-255	val_255	2008-04-08	existing_value
-104	val_104	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-163	val_163	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-414	val_414	2008-04-08	existing_value
-200	val_200	2008-04-08	existing_value
-491	val_491	2008-04-08	existing_value
-237	val_237	2008-04-08	existing_value
-439	val_439	2008-04-08	existing_value
-360	val_360	2008-04-08	existing_value
-248	val_248	2008-04-08	existing_value
-479	val_479	2008-04-08	existing_value
-305	val_305	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-444	val_444	2008-04-08	existing_value
-120	val_120	2008-04-08	existing_value
-429	val_429	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-443	val_443	2008-04-08	existing_value
-323	val_323	2008-04-08	existing_value
-325	val_325	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-478	val_478	2008-04-08	existing_value
-178	val_178	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-310	val_310	2008-04-08	existing_value
-317	val_317	2008-04-08	existing_value
-333	val_333	2008-04-08	existing_value
-493	val_493	2008-04-08	existing_value
-460	val_460	2008-04-08	existing_value
-207	val_207	2008-04-08	existing_value
-249	val_249	2008-04-08	existing_value
-265	val_265	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-83	val_83	2008-04-08	existing_value
-136	val_136	2008-04-08	existing_value
-353	val_353	2008-04-08	existing_value
-172	val_172	2008-04-08	existing_value
-214	val_214	2008-04-08	existing_value
-462	val_462	2008-04-08	existing_value
-233	val_233	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-133	val_133	2008-04-08	existing_value
-175	val_175	2008-04-08	existing_value
-189	val_189	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-375	val_375	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-421	val_421	2008-04-08	existing_value
-407	val_407	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-256	val_256	2008-04-08	existing_value
-26	val_26	2008-04-08	existing_value
-134	val_134	2008-04-08	existing_value
-67	val_67	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-379	val_379	2008-04-08	existing_value
-18	val_18	2008-04-08	existing_value
-462	val_462	2008-04-08	existing_value
-492	val_492	2008-04-08	existing_value
-100	val_100	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-9	val_9	2008-04-08	existing_value
-341	val_341	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-146	val_146	2008-04-08	existing_value
-458	val_458	2008-04-08	existing_value
-362	val_362	2008-04-08	existing_value
-186	val_186	2008-04-08	existing_value
-285	val_285	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-18	val_18	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-183	val_183	2008-04-08	existing_value
-281	val_281	2008-04-08	existing_value
-344	val_344	2008-04-08	existing_value
-97	val_97	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-315	val_315	2008-04-08	existing_value
-84	val_84	2008-04-08	existing_value
-28	val_28	2008-04-08	existing_value
-37	val_37	2008-04-08	existing_value
-448	val_448	2008-04-08	existing_value
-152	val_152	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-307	val_307	2008-04-08	existing_value
-194	val_194	2008-04-08	existing_value
-414	val_414	2008-04-08	existing_value
-477	val_477	2008-04-08	existing_value
-222	val_222	2008-04-08	existing_value
-126	val_126	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-400	val_400	2008-04-08	existing_value
-200	val_200	2008-04-08	existing_value
-97	val_97	2008-04-08	existing_value
-238	val_238	2008-04-09	11
-86	val_86	2008-04-09	11
-311	val_311	2008-04-09	11
-27	val_27	2008-04-09	11
-165	val_165	2008-04-09	11
-409	val_409	2008-04-09	11
-255	val_255	2008-04-09	11
-278	val_278	2008-04-09	11
-98	val_98	2008-04-09	11
-484	val_484	2008-04-09	11
-265	val_265	2008-04-09	11
-193	val_193	2008-04-09	11
-401	val_401	2008-04-09	11
-150	val_150	2008-04-09	11
-273	val_273	2008-04-09	11
-224	val_224	2008-04-09	11
-369	val_369	2008-04-09	11
-66	val_66	2008-04-09	11
-128	val_128	2008-04-09	11
-213	val_213	2008-04-09	11
-146	val_146	2008-04-09	11
-406	val_406	2008-04-09	11
-429	val_429	2008-04-09	11
-374	val_374	2008-04-09	11
-152	val_152	2008-04-09	11
-469	val_469	2008-04-09	11
-145	val_145	2008-04-09	11
-495	val_495	2008-04-09	11
-37	val_37	2008-04-09	11
-327	val_327	2008-04-09	11
-281	val_281	2008-04-09	11
-277	val_277	2008-04-09	11
-209	val_209	2008-04-09	11
-15	val_15	2008-04-09	11
-82	val_82	2008-04-09	11
-403	val_403	2008-04-09	11
-166	val_166	2008-04-09	11
-417	val_417	2008-04-09	11
-430	val_430	2008-04-09	11
-252	val_252	2008-04-09	11
-292	val_292	2008-04-09	11
-219	val_219	2008-04-09	11
-287	val_287	2008-04-09	11
-153	val_153	2008-04-09	11
-193	val_193	2008-04-09	11
-338	val_338	2008-04-09	11
-446	val_446	2008-04-09	11
-459	val_459	2008-04-09	11
-394	val_394	2008-04-09	11
-237	val_237	2008-04-09	11
-482	val_482	2008-04-09	11
-174	val_174	2008-04-09	11
-413	val_413	2008-04-09	11
-494	val_494	2008-04-09	11
-207	val_207	2008-04-09	11
-199	val_199	2008-04-09	11
-466	val_466	2008-04-09	11
-208	val_208	2008-04-09	11
-174	val_174	2008-04-09	11
-399	val_399	2008-04-09	11
-396	val_396	2008-04-09	11
-247	val_247	2008-04-09	11
-417	val_417	2008-04-09	11
-489	val_489	2008-04-09	11
-162	val_162	2008-04-09	11
-377	val_377	2008-04-09	11
-397	val_397	2008-04-09	11
-309	val_309	2008-04-09	11
-365	val_365	2008-04-09	11
-266	val_266	2008-04-09	11
-439	val_439	2008-04-09	11
-342	val_342	2008-04-09	11
-367	val_367	2008-04-09	11
-325	val_325	2008-04-09	11
-167	val_167	2008-04-09	11
-195	val_195	2008-04-09	11
-475	val_475	2008-04-09	11
-17	val_17	2008-04-09	11
-113	val_113	2008-04-09	11
-155	val_155	2008-04-09	11
-203	val_203	2008-04-09	11
-339	val_339	2008-04-09	11
-0	val_0	2008-04-09	11
-455	val_455	2008-04-09	11
-128	val_128	2008-04-09	11
-311	val_311	2008-04-09	11
-316	val_316	2008-04-09	11
-57	val_57	2008-04-09	11
-302	val_302	2008-04-09	11
-205	val_205	2008-04-09	11
-149	val_149	2008-04-09	11
-438	val_438	2008-04-09	11
-345	val_345	2008-04-09	11
-129	val_129	2008-04-09	11
-170	val_170	2008-04-09	11
-20	val_20	2008-04-09	11
-489	val_489	2008-04-09	11
-157	val_157	2008-04-09	11
-378	val_378	2008-04-09	11
-221	val_221	2008-04-09	11
-92	val_92	2008-04-09	11
-111	val_111	2008-04-09	11
-47	val_47	2008-04-09	11
-72	val_72	2008-04-09	11
-4	val_4	2008-04-09	11
-280	val_280	2008-04-09	11
-35	val_35	2008-04-09	11
-427	val_427	2008-04-09	11
-277	val_277	2008-04-09	11
-208	val_208	2008-04-09	11
-356	val_356	2008-04-09	11
-399	val_399	2008-04-09	11
-169	val_169	2008-04-09	11
-382	val_382	2008-04-09	11
-498	val_498	2008-04-09	11
-125	val_125	2008-04-09	11
-386	val_386	2008-04-09	11
-437	val_437	2008-04-09	11
-469	val_469	2008-04-09	11
-192	val_192	2008-04-09	11
-286	val_286	2008-04-09	11
-187	val_187	2008-04-09	11
-176	val_176	2008-04-09	11
-54	val_54	2008-04-09	11
-459	val_459	2008-04-09	11
-51	val_51	2008-04-09	11
-138	val_138	2008-04-09	11
-103	val_103	2008-04-09	11
-239	val_239	2008-04-09	11
-213	val_213	2008-04-09	11
-216	val_216	2008-04-09	11
-430	val_430	2008-04-09	11
-278	val_278	2008-04-09	11
-176	val_176	2008-04-09	11
-289	val_289	2008-04-09	11
-221	val_221	2008-04-09	11
-65	val_65	2008-04-09	11
-318	val_318	2008-04-09	11
-332	val_332	2008-04-09	11
-311	val_311	2008-04-09	11
-275	val_275	2008-04-09	11
-137	val_137	2008-04-09	11
-241	val_241	2008-04-09	11
-83	val_83	2008-04-09	11
-333	val_333	2008-04-09	11
-180	val_180	2008-04-09	11
-284	val_284	2008-04-09	11
-12	val_12	2008-04-09	11
-230	val_230	2008-04-09	11
-181	val_181	2008-04-09	11
-67	val_67	2008-04-09	11
-260	val_260	2008-04-09	11
-404	val_404	2008-04-09	11
-384	val_384	2008-04-09	11
-489	val_489	2008-04-09	11
-353	val_353	2008-04-09	11
-373	val_373	2008-04-09	11
-272	val_272	2008-04-09	11
-138	val_138	2008-04-09	11
-217	val_217	2008-04-09	11
-84	val_84	2008-04-09	11
-348	val_348	2008-04-09	11
-466	val_466	2008-04-09	11
-58	val_58	2008-04-09	11
-8	val_8	2008-04-09	11
-411	val_411	2008-04-09	11
-230	val_230	2008-04-09	11
-208	val_208	2008-04-09	11
-348	val_348	2008-04-09	11
-24	val_24	2008-04-09	11
-463	val_463	2008-04-09	11
-431	val_431	2008-04-09	11
-179	val_179	2008-04-09	11
-172	val_172	2008-04-09	11
-42	val_42	2008-04-09	11
-129	val_129	2008-04-09	11
-158	val_158	2008-04-09	11
-119	val_119	2008-04-09	11
-496	val_496	2008-04-09	11
-0	val_0	2008-04-09	11
-322	val_322	2008-04-09	11
-197	val_197	2008-04-09	11
-468	val_468	2008-04-09	11
-393	val_393	2008-04-09	11
-454	val_454	2008-04-09	11
-100	val_100	2008-04-09	11
-298	val_298	2008-04-09	11
-199	val_199	2008-04-09	11
-191	val_191	2008-04-09	11
-418	val_418	2008-04-09	11
-96	val_96	2008-04-09	11
-26	val_26	2008-04-09	11
-165	val_165	2008-04-09	11
-327	val_327	2008-04-09	11
-230	val_230	2008-04-09	11
-205	val_205	2008-04-09	11
-120	val_120	2008-04-09	11
-131	val_131	2008-04-09	11
-51	val_51	2008-04-09	11
-404	val_404	2008-04-09	11
-43	val_43	2008-04-09	11
-436	val_436	2008-04-09	11
-156	val_156	2008-04-09	11
-469	val_469	2008-04-09	11
-468	val_468	2008-04-09	11
-308	val_308	2008-04-09	11
-95	val_95	2008-04-09	11
-196	val_196	2008-04-09	11
-288	val_288	2008-04-09	11
-481	val_481	2008-04-09	11
-457	val_457	2008-04-09	11
-98	val_98	2008-04-09	11
-282	val_282	2008-04-09	11
-197	val_197	2008-04-09	11
-187	val_187	2008-04-09	11
-318	val_318	2008-04-09	11
-318	val_318	2008-04-09	11
-409	val_409	2008-04-09	11
-470	val_470	2008-04-09	11
-137	val_137	2008-04-09	11
-369	val_369	2008-04-09	11
-316	val_316	2008-04-09	11
-169	val_169	2008-04-09	11
-413	val_413	2008-04-09	11
-85	val_85	2008-04-09	11
-77	val_77	2008-04-09	11
-0	val_0	2008-04-09	11
-490	val_490	2008-04-09	11
-87	val_87	2008-04-09	11
-364	val_364	2008-04-09	11
-179	val_179	2008-04-09	11
-118	val_118	2008-04-09	11
-134	val_134	2008-04-09	11
-395	val_395	2008-04-09	11
-282	val_282	2008-04-09	11
-138	val_138	2008-04-09	11
-238	val_238	2008-04-09	11
-419	val_419	2008-04-09	11
-15	val_15	2008-04-09	11
-118	val_118	2008-04-09	11
-72	val_72	2008-04-09	11
-90	val_90	2008-04-09	11
-307	val_307	2008-04-09	11
-19	val_19	2008-04-09	11
-435	val_435	2008-04-09	11
-10	val_10	2008-04-09	11
-277	val_277	2008-04-09	11
-273	val_273	2008-04-09	11
-306	val_306	2008-04-09	11
-224	val_224	2008-04-09	11
-309	val_309	2008-04-09	11
-389	val_389	2008-04-09	11
-327	val_327	2008-04-09	11
-242	val_242	2008-04-09	11
-369	val_369	2008-04-09	11
-392	val_392	2008-04-09	11
-272	val_272	2008-04-09	11
-331	val_331	2008-04-09	11
-401	val_401	2008-04-09	11
-242	val_242	2008-04-09	11
-452	val_452	2008-04-09	11
-177	val_177	2008-04-09	11
-226	val_226	2008-04-09	11
-5	val_5	2008-04-09	11
-497	val_497	2008-04-09	11
-402	val_402	2008-04-09	11
-396	val_396	2008-04-09	11
-317	val_317	2008-04-09	11
-395	val_395	2008-04-09	11
-58	val_58	2008-04-09	11
-35	val_35	2008-04-09	11
-336	val_336	2008-04-09	11
-95	val_95	2008-04-09	11
-11	val_11	2008-04-09	11
-168	val_168	2008-04-09	11
-34	val_34	2008-04-09	11
-229	val_229	2008-04-09	11
-233	val_233	2008-04-09	11
-143	val_143	2008-04-09	11
-472	val_472	2008-04-09	11
-322	val_322	2008-04-09	11
-498	val_498	2008-04-09	11
-160	val_160	2008-04-09	11
-195	val_195	2008-04-09	11
-42	val_42	2008-04-09	11
-321	val_321	2008-04-09	11
-430	val_430	2008-04-09	11
-119	val_119	2008-04-09	11
-489	val_489	2008-04-09	11
-458	val_458	2008-04-09	11
-78	val_78	2008-04-09	11
-76	val_76	2008-04-09	11
-41	val_41	2008-04-09	11
-223	val_223	2008-04-09	11
-492	val_492	2008-04-09	11
-149	val_149	2008-04-09	11
-449	val_449	2008-04-09	11
-218	val_218	2008-04-09	11
-228	val_228	2008-04-09	11
-138	val_138	2008-04-09	11
-453	val_453	2008-04-09	11
-30	val_30	2008-04-09	11
-209	val_209	2008-04-09	11
-64	val_64	2008-04-09	11
-468	val_468	2008-04-09	11
-76	val_76	2008-04-09	11
-74	val_74	2008-04-09	11
-342	val_342	2008-04-09	11
-69	val_69	2008-04-09	11
-230	val_230	2008-04-09	11
-33	val_33	2008-04-09	11
-368	val_368	2008-04-09	11
-103	val_103	2008-04-09	11
-296	val_296	2008-04-09	11
-113	val_113	2008-04-09	11
-216	val_216	2008-04-09	11
-367	val_367	2008-04-09	11
-344	val_344	2008-04-09	11
-167	val_167	2008-04-09	11
-274	val_274	2008-04-09	11
-219	val_219	2008-04-09	11
-239	val_239	2008-04-09	11
-485	val_485	2008-04-09	11
-116	val_116	2008-04-09	11
-223	val_223	2008-04-09	11
-256	val_256	2008-04-09	11
-263	val_263	2008-04-09	11
-70	val_70	2008-04-09	11
-487	val_487	2008-04-09	11
-480	val_480	2008-04-09	11
-401	val_401	2008-04-09	11
-288	val_288	2008-04-09	11
-191	val_191	2008-04-09	11
-5	val_5	2008-04-09	11
-244	val_244	2008-04-09	11
-438	val_438	2008-04-09	11
-128	val_128	2008-04-09	11
-467	val_467	2008-04-09	11
-432	val_432	2008-04-09	11
-202	val_202	2008-04-09	11
-316	val_316	2008-04-09	11
-229	val_229	2008-04-09	11
-469	val_469	2008-04-09	11
-463	val_463	2008-04-09	11
-280	val_280	2008-04-09	11
-2	val_2	2008-04-09	11
-35	val_35	2008-04-09	11
-283	val_283	2008-04-09	11
-331	val_331	2008-04-09	11
-235	val_235	2008-04-09	11
-80	val_80	2008-04-09	11
-44	val_44	2008-04-09	11
-193	val_193	2008-04-09	11
-321	val_321	2008-04-09	11
-335	val_335	2008-04-09	11
-104	val_104	2008-04-09	11
-466	val_466	2008-04-09	11
-366	val_366	2008-04-09	11
-175	val_175	2008-04-09	11
-403	val_403	2008-04-09	11
-483	val_483	2008-04-09	11
-53	val_53	2008-04-09	11
-105	val_105	2008-04-09	11
-257	val_257	2008-04-09	11
-406	val_406	2008-04-09	11
-409	val_409	2008-04-09	11
-190	val_190	2008-04-09	11
-406	val_406	2008-04-09	11
-401	val_401	2008-04-09	11
-114	val_114	2008-04-09	11
-258	val_258	2008-04-09	11
-90	val_90	2008-04-09	11
-203	val_203	2008-04-09	11
-262	val_262	2008-04-09	11
-348	val_348	2008-04-09	11
-424	val_424	2008-04-09	11
-12	val_12	2008-04-09	11
-396	val_396	2008-04-09	11
-201	val_201	2008-04-09	11
-217	val_217	2008-04-09	11
-164	val_164	2008-04-09	11
-431	val_431	2008-04-09	11
-454	val_454	2008-04-09	11
-478	val_478	2008-04-09	11
-298	val_298	2008-04-09	11
-125	val_125	2008-04-09	11
-431	val_431	2008-04-09	11
-164	val_164	2008-04-09	11
-424	val_424	2008-04-09	11
-187	val_187	2008-04-09	11
-382	val_382	2008-04-09	11
-5	val_5	2008-04-09	11
-70	val_70	2008-04-09	11
-397	val_397	2008-04-09	11
-480	val_480	2008-04-09	11
-291	val_291	2008-04-09	11
-24	val_24	2008-04-09	11
-351	val_351	2008-04-09	11
-255	val_255	2008-04-09	11
-104	val_104	2008-04-09	11
-70	val_70	2008-04-09	11
-163	val_163	2008-04-09	11
-438	val_438	2008-04-09	11
-119	val_119	2008-04-09	11
-414	val_414	2008-04-09	11
-200	val_200	2008-04-09	11
-491	val_491	2008-04-09	11
-237	val_237	2008-04-09	11
-439	val_439	2008-04-09	11
-360	val_360	2008-04-09	11
-248	val_248	2008-04-09	11
-479	val_479	2008-04-09	11
-305	val_305	2008-04-09	11
-417	val_417	2008-04-09	11
-199	val_199	2008-04-09	11
-444	val_444	2008-04-09	11
-120	val_120	2008-04-09	11
-429	val_429	2008-04-09	11
-169	val_169	2008-04-09	11
-443	val_443	2008-04-09	11
-323	val_323	2008-04-09	11
-325	val_325	2008-04-09	11
-277	val_277	2008-04-09	11
-230	val_230	2008-04-09	11
-478	val_478	2008-04-09	11
-178	val_178	2008-04-09	11
-468	val_468	2008-04-09	11
-310	val_310	2008-04-09	11
-317	val_317	2008-04-09	11
-333	val_333	2008-04-09	11
-493	val_493	2008-04-09	11
-460	val_460	2008-04-09	11
-207	val_207	2008-04-09	11
-249	val_249	2008-04-09	11
-265	val_265	2008-04-09	11
-480	val_480	2008-04-09	11
-83	val_83	2008-04-09	11
-136	val_136	2008-04-09	11
-353	val_353	2008-04-09	11
-172	val_172	2008-04-09	11
-214	val_214	2008-04-09	11
-462	val_462	2008-04-09	11
-233	val_233	2008-04-09	11
-406	val_406	2008-04-09	11
-133	val_133	2008-04-09	11
-175	val_175	2008-04-09	11
-189	val_189	2008-04-09	11
-454	val_454	2008-04-09	11
-375	val_375	2008-04-09	11
-401	val_401	2008-04-09	11
-421	val_421	2008-04-09	11
-407	val_407	2008-04-09	11
-384	val_384	2008-04-09	11
-256	val_256	2008-04-09	11
-26	val_26	2008-04-09	11
-134	val_134	2008-04-09	11
-67	val_67	2008-04-09	11
-384	val_384	2008-04-09	11
-379	val_379	2008-04-09	11
-18	val_18	2008-04-09	11
-462	val_462	2008-04-09	11
-492	val_492	2008-04-09	11
-100	val_100	2008-04-09	11
-298	val_298	2008-04-09	11
-9	val_9	2008-04-09	11
-341	val_341	2008-04-09	11
-498	val_498	2008-04-09	11
-146	val_146	2008-04-09	11
-458	val_458	2008-04-09	11
-362	val_362	2008-04-09	11
-186	val_186	2008-04-09	11
-285	val_285	2008-04-09	11
-348	val_348	2008-04-09	11
-167	val_167	2008-04-09	11
-18	val_18	2008-04-09	11
-273	val_273	2008-04-09	11
-183	val_183	2008-04-09	11
-281	val_281	2008-04-09	11
-344	val_344	2008-04-09	11
-97	val_97	2008-04-09	11
-469	val_469	2008-04-09	11
-315	val_315	2008-04-09	11
-84	val_84	2008-04-09	11
-28	val_28	2008-04-09	11
-37	val_37	2008-04-09	11
-448	val_448	2008-04-09	11
-152	val_152	2008-04-09	11
-348	val_348	2008-04-09	11
-307	val_307	2008-04-09	11
-194	val_194	2008-04-09	11
-414	val_414	2008-04-09	11
-477	val_477	2008-04-09	11
-222	val_222	2008-04-09	11
-126	val_126	2008-04-09	11
-90	val_90	2008-04-09	11
-169	val_169	2008-04-09	11
-403	val_403	2008-04-09	11
-400	val_400	2008-04-09	11
-200	val_200	2008-04-09	11
-97	val_97	2008-04-09	11
-238	val_238	2008-04-09	12
-86	val_86	2008-04-09	12
-311	val_311	2008-04-09	12
-27	val_27	2008-04-09	12
-165	val_165	2008-04-09	12
-409	val_409	2008-04-09	12
-255	val_255	2008-04-09	12
-278	val_278	2008-04-09	12
-98	val_98	2008-04-09	12
-484	val_484	2008-04-09	12
-265	val_265	2008-04-09	12
-193	val_193	2008-04-09	12
-401	val_401	2008-04-09	12
-150	val_150	2008-04-09	12
-273	val_273	2008-04-09	12
-224	val_224	2008-04-09	12
-369	val_369	2008-04-09	12
-66	val_66	2008-04-09	12
-128	val_128	2008-04-09	12
-213	val_213	2008-04-09	12
-146	val_146	2008-04-09	12
-406	val_406	2008-04-09	12
-429	val_429	2008-04-09	12
-374	val_374	2008-04-09	12
-152	val_152	2008-04-09	12
-469	val_469	2008-04-09	12
-145	val_145	2008-04-09	12
-495	val_495	2008-04-09	12
-37	val_37	2008-04-09	12
-327	val_327	2008-04-09	12
-281	val_281	2008-04-09	12
-277	val_277	2008-04-09	12
-209	val_209	2008-04-09	12
-15	val_15	2008-04-09	12
-82	val_82	2008-04-09	12
-403	val_403	2008-04-09	12
-166	val_166	2008-04-09	12
-417	val_417	2008-04-09	12
-430	val_430	2008-04-09	12
-252	val_252	2008-04-09	12
-292	val_292	2008-04-09	12
-219	val_219	2008-04-09	12
-287	val_287	2008-04-09	12
-153	val_153	2008-04-09	12
-193	val_193	2008-04-09	12
-338	val_338	2008-04-09	12
-446	val_446	2008-04-09	12
-459	val_459	2008-04-09	12
-394	val_394	2008-04-09	12
-237	val_237	2008-04-09	12
-482	val_482	2008-04-09	12
-174	val_174	2008-04-09	12
-413	val_413	2008-04-09	12
-494	val_494	2008-04-09	12
-207	val_207	2008-04-09	12
-199	val_199	2008-04-09	12
-466	val_466	2008-04-09	12
-208	val_208	2008-04-09	12
-174	val_174	2008-04-09	12
-399	val_399	2008-04-09	12
-396	val_396	2008-04-09	12
-247	val_247	2008-04-09	12
-417	val_417	2008-04-09	12
-489	val_489	2008-04-09	12
-162	val_162	2008-04-09	12
-377	val_377	2008-04-09	12
-397	val_397	2008-04-09	12
-309	val_309	2008-04-09	12
-365	val_365	2008-04-09	12
-266	val_266	2008-04-09	12
-439	val_439	2008-04-09	12
-342	val_342	2008-04-09	12
-367	val_367	2008-04-09	12
-325	val_325	2008-04-09	12
-167	val_167	2008-04-09	12
-195	val_195	2008-04-09	12
-475	val_475	2008-04-09	12
-17	val_17	2008-04-09	12
-113	val_113	2008-04-09	12
-155	val_155	2008-04-09	12
-203	val_203	2008-04-09	12
-339	val_339	2008-04-09	12
-0	val_0	2008-04-09	12
-455	val_455	2008-04-09	12
-128	val_128	2008-04-09	12
-311	val_311	2008-04-09	12
-316	val_316	2008-04-09	12
-57	val_57	2008-04-09	12
-302	val_302	2008-04-09	12
-205	val_205	2008-04-09	12
-149	val_149	2008-04-09	12
-438	val_438	2008-04-09	12
-345	val_345	2008-04-09	12
-129	val_129	2008-04-09	12
-170	val_170	2008-04-09	12
-20	val_20	2008-04-09	12
-489	val_489	2008-04-09	12
-157	val_157	2008-04-09	12
-378	val_378	2008-04-09	12
-221	val_221	2008-04-09	12
-92	val_92	2008-04-09	12
-111	val_111	2008-04-09	12
-47	val_47	2008-04-09	12
-72	val_72	2008-04-09	12
-4	val_4	2008-04-09	12
-280	val_280	2008-04-09	12
-35	val_35	2008-04-09	12
-427	val_427	2008-04-09	12
-277	val_277	2008-04-09	12
-208	val_208	2008-04-09	12
-356	val_356	2008-04-09	12
-399	val_399	2008-04-09	12
-169	val_169	2008-04-09	12
-382	val_382	2008-04-09	12
-498	val_498	2008-04-09	12
-125	val_125	2008-04-09	12
-386	val_386	2008-04-09	12
-437	val_437	2008-04-09	12
-469	val_469	2008-04-09	12
-192	val_192	2008-04-09	12
-286	val_286	2008-04-09	12
-187	val_187	2008-04-09	12
-176	val_176	2008-04-09	12
-54	val_54	2008-04-09	12
-459	val_459	2008-04-09	12
-51	val_51	2008-04-09	12
-138	val_138	2008-04-09	12
-103	val_103	2008-04-09	12
-239	val_239	2008-04-09	12
-213	val_213	2008-04-09	12
-216	val_216	2008-04-09	12
-430	val_430	2008-04-09	12
-278	val_278	2008-04-09	12
-176	val_176	2008-04-09	12
-289	val_289	2008-04-09	12
-221	val_221	2008-04-09	12
-65	val_65	2008-04-09	12
-318	val_318	2008-04-09	12
-332	val_332	2008-04-09	12
-311	val_311	2008-04-09	12
-275	val_275	2008-04-09	12
-137	val_137	2008-04-09	12
-241	val_241	2008-04-09	12
-83	val_83	2008-04-09	12
-333	val_333	2008-04-09	12
-180	val_180	2008-04-09	12
-284	val_284	2008-04-09	12
-12	val_12	2008-04-09	12
-230	val_230	2008-04-09	12
-181	val_181	2008-04-09	12
-67	val_67	2008-04-09	12
-260	val_260	2008-04-09	12
-404	val_404	2008-04-09	12
-384	val_384	2008-04-09	12
-489	val_489	2008-04-09	12
-353	val_353	2008-04-09	12
-373	val_373	2008-04-09	12
-272	val_272	2008-04-09	12
-138	val_138	2008-04-09	12
-217	val_217	2008-04-09	12
-84	val_84	2008-04-09	12
-348	val_348	2008-04-09	12
-466	val_466	2008-04-09	12
-58	val_58	2008-04-09	12
-8	val_8	2008-04-09	12
-411	val_411	2008-04-09	12
-230	val_230	2008-04-09	12
-208	val_208	2008-04-09	12
-348	val_348	2008-04-09	12
-24	val_24	2008-04-09	12
-463	val_463	2008-04-09	12
-431	val_431	2008-04-09	12
-179	val_179	2008-04-09	12
-172	val_172	2008-04-09	12
-42	val_42	2008-04-09	12
-129	val_129	2008-04-09	12
-158	val_158	2008-04-09	12
-119	val_119	2008-04-09	12
-496	val_496	2008-04-09	12
-0	val_0	2008-04-09	12
-322	val_322	2008-04-09	12
-197	val_197	2008-04-09	12
-468	val_468	2008-04-09	12
-393	val_393	2008-04-09	12
-454	val_454	2008-04-09	12
-100	val_100	2008-04-09	12
-298	val_298	2008-04-09	12
-199	val_199	2008-04-09	12
-191	val_191	2008-04-09	12
-418	val_418	2008-04-09	12
-96	val_96	2008-04-09	12
-26	val_26	2008-04-09	12
-165	val_165	2008-04-09	12
-327	val_327	2008-04-09	12
-230	val_230	2008-04-09	12
-205	val_205	2008-04-09	12
-120	val_120	2008-04-09	12
-131	val_131	2008-04-09	12
-51	val_51	2008-04-09	12
-404	val_404	2008-04-09	12
-43	val_43	2008-04-09	12
-436	val_436	2008-04-09	12
-156	val_156	2008-04-09	12
-469	val_469	2008-04-09	12
-468	val_468	2008-04-09	12
-308	val_308	2008-04-09	12
-95	val_95	2008-04-09	12
-196	val_196	2008-04-09	12
-288	val_288	2008-04-09	12
-481	val_481	2008-04-09	12
-457	val_457	2008-04-09	12
-98	val_98	2008-04-09	12
-282	val_282	2008-04-09	12
-197	val_197	2008-04-09	12
-187	val_187	2008-04-09	12
-318	val_318	2008-04-09	12
-318	val_318	2008-04-09	12
-409	val_409	2008-04-09	12
-470	val_470	2008-04-09	12
-137	val_137	2008-04-09	12
-369	val_369	2008-04-09	12
-316	val_316	2008-04-09	12
-169	val_169	2008-04-09	12
-413	val_413	2008-04-09	12
-85	val_85	2008-04-09	12
-77	val_77	2008-04-09	12
-0	val_0	2008-04-09	12
-490	val_490	2008-04-09	12
-87	val_87	2008-04-09	12
-364	val_364	2008-04-09	12
-179	val_179	2008-04-09	12
-118	val_118	2008-04-09	12
-134	val_134	2008-04-09	12
-395	val_395	2008-04-09	12
-282	val_282	2008-04-09	12
-138	val_138	2008-04-09	12
-238	val_238	2008-04-09	12
-419	val_419	2008-04-09	12
-15	val_15	2008-04-09	12
-118	val_118	2008-04-09	12
-72	val_72	2008-04-09	12
-90	val_90	2008-04-09	12
-307	val_307	2008-04-09	12
-19	val_19	2008-04-09	12
-435	val_435	2008-04-09	12
-10	val_10	2008-04-09	12
-277	val_277	2008-04-09	12
-273	val_273	2008-04-09	12
-306	val_306	2008-04-09	12
-224	val_224	2008-04-09	12
-309	val_309	2008-04-09	12
-389	val_389	2008-04-09	12
-327	val_327	2008-04-09	12
-242	val_242	2008-04-09	12
-369	val_369	2008-04-09	12
-392	val_392	2008-04-09	12
-272	val_272	2008-04-09	12
-331	val_331	2008-04-09	12
-401	val_401	2008-04-09	12
-242	val_242	2008-04-09	12
-452	val_452	2008-04-09	12
-177	val_177	2008-04-09	12
-226	val_226	2008-04-09	12
-5	val_5	2008-04-09	12
-497	val_497	2008-04-09	12
-402	val_402	2008-04-09	12
-396	val_396	2008-04-09	12
-317	val_317	2008-04-09	12
-395	val_395	2008-04-09	12
-58	val_58	2008-04-09	12
-35	val_35	2008-04-09	12
-336	val_336	2008-04-09	12
-95	val_95	2008-04-09	12
-11	val_11	2008-04-09	12
-168	val_168	2008-04-09	12
-34	val_34	2008-04-09	12
-229	val_229	2008-04-09	12
-233	val_233	2008-04-09	12
-143	val_143	2008-04-09	12
-472	val_472	2008-04-09	12
-322	val_322	2008-04-09	12
-498	val_498	2008-04-09	12
-160	val_160	2008-04-09	12
-195	val_195	2008-04-09	12
-42	val_42	2008-04-09	12
-321	val_321	2008-04-09	12
-430	val_430	2008-04-09	12
-119	val_119	2008-04-09	12
-489	val_489	2008-04-09	12
-458	val_458	2008-04-09	12
-78	val_78	2008-04-09	12
-76	val_76	2008-04-09	12
-41	val_41	2008-04-09	12
-223	val_223	2008-04-09	12
-492	val_492	2008-04-09	12
-149	val_149	2008-04-09	12
-449	val_449	2008-04-09	12
-218	val_218	2008-04-09	12
-228	val_228	2008-04-09	12
-138	val_138	2008-04-09	12
-453	val_453	2008-04-09	12
-30	val_30	2008-04-09	12
-209	val_209	2008-04-09	12
-64	val_64	2008-04-09	12
-468	val_468	2008-04-09	12
-76	val_76	2008-04-09	12
-74	val_74	2008-04-09	12
-342	val_342	2008-04-09	12
-69	val_69	2008-04-09	12
-230	val_230	2008-04-09	12
-33	val_33	2008-04-09	12
-368	val_368	2008-04-09	12
-103	val_103	2008-04-09	12
-296	val_296	2008-04-09	12
-113	val_113	2008-04-09	12
-216	val_216	2008-04-09	12
-367	val_367	2008-04-09	12
-344	val_344	2008-04-09	12
-167	val_167	2008-04-09	12
-274	val_274	2008-04-09	12
-219	val_219	2008-04-09	12
-239	val_239	2008-04-09	12
-485	val_485	2008-04-09	12
-116	val_116	2008-04-09	12
-223	val_223	2008-04-09	12
-256	val_256	2008-04-09	12
-263	val_263	2008-04-09	12
-70	val_70	2008-04-09	12
-487	val_487	2008-04-09	12
-480	val_480	2008-04-09	12
-401	val_401	2008-04-09	12
-288	val_288	2008-04-09	12
-191	val_191	2008-04-09	12
-5	val_5	2008-04-09	12
-244	val_244	2008-04-09	12
-438	val_438	2008-04-09	12
-128	val_128	2008-04-09	12
-467	val_467	2008-04-09	12
-432	val_432	2008-04-09	12
-202	val_202	2008-04-09	12
-316	val_316	2008-04-09	12
-229	val_229	2008-04-09	12
-469	val_469	2008-04-09	12
-463	val_463	2008-04-09	12
-280	val_280	2008-04-09	12
-2	val_2	2008-04-09	12
-35	val_35	2008-04-09	12
-283	val_283	2008-04-09	12
-331	val_331	2008-04-09	12
-235	val_235	2008-04-09	12
-80	val_80	2008-04-09	12
-44	val_44	2008-04-09	12
-193	val_193	2008-04-09	12
-321	val_321	2008-04-09	12
-335	val_335	2008-04-09	12
-104	val_104	2008-04-09	12
-466	val_466	2008-04-09	12
-366	val_366	2008-04-09	12
-175	val_175	2008-04-09	12
-403	val_403	2008-04-09	12
-483	val_483	2008-04-09	12
-53	val_53	2008-04-09	12
-105	val_105	2008-04-09	12
-257	val_257	2008-04-09	12
-406	val_406	2008-04-09	12
-409	val_409	2008-04-09	12
-190	val_190	2008-04-09	12
-406	val_406	2008-04-09	12
-401	val_401	2008-04-09	12
-114	val_114	2008-04-09	12
-258	val_258	2008-04-09	12
-90	val_90	2008-04-09	12
-203	val_203	2008-04-09	12
-262	val_262	2008-04-09	12
-348	val_348	2008-04-09	12
-424	val_424	2008-04-09	12
-12	val_12	2008-04-09	12
-396	val_396	2008-04-09	12
-201	val_201	2008-04-09	12
-217	val_217	2008-04-09	12
-164	val_164	2008-04-09	12
-431	val_431	2008-04-09	12
-454	val_454	2008-04-09	12
-478	val_478	2008-04-09	12
-298	val_298	2008-04-09	12
-125	val_125	2008-04-09	12
-431	val_431	2008-04-09	12
-164	val_164	2008-04-09	12
-424	val_424	2008-04-09	12
-187	val_187	2008-04-09	12
-382	val_382	2008-04-09	12
-5	val_5	2008-04-09	12
-70	val_70	2008-04-09	12
-397	val_397	2008-04-09	12
-480	val_480	2008-04-09	12
-291	val_291	2008-04-09	12
-24	val_24	2008-04-09	12
-351	val_351	2008-04-09	12
-255	val_255	2008-04-09	12
-104	val_104	2008-04-09	12
-70	val_70	2008-04-09	12
-163	val_163	2008-04-09	12
-438	val_438	2008-04-09	12
-119	val_119	2008-04-09	12
-414	val_414	2008-04-09	12
-200	val_200	2008-04-09	12
-491	val_491	2008-04-09	12
-237	val_237	2008-04-09	12
-439	val_439	2008-04-09	12
-360	val_360	2008-04-09	12
-248	val_248	2008-04-09	12
-479	val_479	2008-04-09	12
-305	val_305	2008-04-09	12
-417	val_417	2008-04-09	12
-199	val_199	2008-04-09	12
-444	val_444	2008-04-09	12
-120	val_120	2008-04-09	12
-429	val_429	2008-04-09	12
-169	val_169	2008-04-09	12
-443	val_443	2008-04-09	12
-323	val_323	2008-04-09	12
-325	val_325	2008-04-09	12
-277	val_277	2008-04-09	12
-230	val_230	2008-04-09	12
-478	val_478	2008-04-09	12
-178	val_178	2008-04-09	12
-468	val_468	2008-04-09	12
-310	val_310	2008-04-09	12
-317	val_317	2008-04-09	12
-333	val_333	2008-04-09	12
-493	val_493	2008-04-09	12
-460	val_460	2008-04-09	12
-207	val_207	2008-04-09	12
-249	val_249	2008-04-09	12
-265	val_265	2008-04-09	12
-480	val_480	2008-04-09	12
-83	val_83	2008-04-09	12
-136	val_136	2008-04-09	12
-353	val_353	2008-04-09	12
-172	val_172	2008-04-09	12
-214	val_214	2008-04-09	12
-462	val_462	2008-04-09	12
-233	val_233	2008-04-09	12
-406	val_406	2008-04-09	12
-133	val_133	2008-04-09	12
-175	val_175	2008-04-09	12
-189	val_189	2008-04-09	12
-454	val_454	2008-04-09	12
-375	val_375	2008-04-09	12
-401	val_401	2008-04-09	12
-421	val_421	2008-04-09	12
-407	val_407	2008-04-09	12
-384	val_384	2008-04-09	12
-256	val_256	2008-04-09	12
-26	val_26	2008-04-09	12
-134	val_134	2008-04-09	12
-67	val_67	2008-04-09	12
-384	val_384	2008-04-09	12
-379	val_379	2008-04-09	12
-18	val_18	2008-04-09	12
-462	val_462	2008-04-09	12
-492	val_492	2008-04-09	12
-100	val_100	2008-04-09	12
-298	val_298	2008-04-09	12
-9	val_9	2008-04-09	12
-341	val_341	2008-04-09	12
-498	val_498	2008-04-09	12
-146	val_146	2008-04-09	12
-458	val_458	2008-04-09	12
-362	val_362	2008-04-09	12
-186	val_186	2008-04-09	12
-285	val_285	2008-04-09	12
-348	val_348	2008-04-09	12
-167	val_167	2008-04-09	12
-18	val_18	2008-04-09	12
-273	val_273	2008-04-09	12
-183	val_183	2008-04-09	12
-281	val_281	2008-04-09	12
-344	val_344	2008-04-09	12
-97	val_97	2008-04-09	12
-469	val_469	2008-04-09	12
-315	val_315	2008-04-09	12
-84	val_84	2008-04-09	12
-28	val_28	2008-04-09	12
-37	val_37	2008-04-09	12
-448	val_448	2008-04-09	12
-152	val_152	2008-04-09	12
-348	val_348	2008-04-09	12
-307	val_307	2008-04-09	12
-194	val_194	2008-04-09	12
-414	val_414	2008-04-09	12
-477	val_477	2008-04-09	12
-222	val_222	2008-04-09	12
-126	val_126	2008-04-09	12
-90	val_90	2008-04-09	12
-169	val_169	2008-04-09	12
-403	val_403	2008-04-09	12
-400	val_400	2008-04-09	12
-200	val_200	2008-04-09	12
-97	val_97	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part4-11-24618a43c4656b72f81683b45708045e b/sql/hive/src/test/resources/golden/load_dyn_part4-11-24618a43c4656b72f81683b45708045e
new file mode 100644
index 0000000000000..370744fed4f27
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part4-11-24618a43c4656b72f81683b45708045e
@@ -0,0 +1,2500 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+0	val_0	2008-04-08	existing_value
+0	val_0	2008-04-08	existing_value
+0	val_0	2008-04-08	existing_value
+2	val_2	2008-04-08	existing_value
+4	val_4	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+8	val_8	2008-04-08	existing_value
+9	val_9	2008-04-08	existing_value
+10	val_10	2008-04-08	existing_value
+11	val_11	2008-04-08	existing_value
+12	val_12	2008-04-08	existing_value
+12	val_12	2008-04-08	existing_value
+15	val_15	2008-04-08	existing_value
+15	val_15	2008-04-08	existing_value
+17	val_17	2008-04-08	existing_value
+18	val_18	2008-04-08	existing_value
+18	val_18	2008-04-08	existing_value
+19	val_19	2008-04-08	existing_value
+20	val_20	2008-04-08	existing_value
+24	val_24	2008-04-08	existing_value
+24	val_24	2008-04-08	existing_value
+26	val_26	2008-04-08	existing_value
+26	val_26	2008-04-08	existing_value
+27	val_27	2008-04-08	existing_value
+28	val_28	2008-04-08	existing_value
+30	val_30	2008-04-08	existing_value
+33	val_33	2008-04-08	existing_value
+34	val_34	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+37	val_37	2008-04-08	existing_value
+37	val_37	2008-04-08	existing_value
+41	val_41	2008-04-08	existing_value
+42	val_42	2008-04-08	existing_value
+42	val_42	2008-04-08	existing_value
+43	val_43	2008-04-08	existing_value
+44	val_44	2008-04-08	existing_value
+47	val_47	2008-04-08	existing_value
+51	val_51	2008-04-08	existing_value
+51	val_51	2008-04-08	existing_value
+53	val_53	2008-04-08	existing_value
+54	val_54	2008-04-08	existing_value
+57	val_57	2008-04-08	existing_value
+58	val_58	2008-04-08	existing_value
+58	val_58	2008-04-08	existing_value
+64	val_64	2008-04-08	existing_value
+65	val_65	2008-04-08	existing_value
+66	val_66	2008-04-08	existing_value
+67	val_67	2008-04-08	existing_value
+67	val_67	2008-04-08	existing_value
+69	val_69	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+72	val_72	2008-04-08	existing_value
+72	val_72	2008-04-08	existing_value
+74	val_74	2008-04-08	existing_value
+76	val_76	2008-04-08	existing_value
+76	val_76	2008-04-08	existing_value
+77	val_77	2008-04-08	existing_value
+78	val_78	2008-04-08	existing_value
+80	val_80	2008-04-08	existing_value
+82	val_82	2008-04-08	existing_value
+83	val_83	2008-04-08	existing_value
+83	val_83	2008-04-08	existing_value
+84	val_84	2008-04-08	existing_value
+84	val_84	2008-04-08	existing_value
+85	val_85	2008-04-08	existing_value
+86	val_86	2008-04-08	existing_value
+87	val_87	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+92	val_92	2008-04-08	existing_value
+95	val_95	2008-04-08	existing_value
+95	val_95	2008-04-08	existing_value
+96	val_96	2008-04-08	existing_value
+97	val_97	2008-04-08	existing_value
+97	val_97	2008-04-08	existing_value
+98	val_98	2008-04-08	existing_value
+98	val_98	2008-04-08	existing_value
+100	val_100	2008-04-08	existing_value
+100	val_100	2008-04-08	existing_value
+103	val_103	2008-04-08	existing_value
+103	val_103	2008-04-08	existing_value
+104	val_104	2008-04-08	existing_value
+104	val_104	2008-04-08	existing_value
+105	val_105	2008-04-08	existing_value
+111	val_111	2008-04-08	existing_value
+113	val_113	2008-04-08	existing_value
+113	val_113	2008-04-08	existing_value
+114	val_114	2008-04-08	existing_value
+116	val_116	2008-04-08	existing_value
+118	val_118	2008-04-08	existing_value
+118	val_118	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+120	val_120	2008-04-08	existing_value
+120	val_120	2008-04-08	existing_value
+125	val_125	2008-04-08	existing_value
+125	val_125	2008-04-08	existing_value
+126	val_126	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+129	val_129	2008-04-08	existing_value
+129	val_129	2008-04-08	existing_value
+131	val_131	2008-04-08	existing_value
+133	val_133	2008-04-08	existing_value
+134	val_134	2008-04-08	existing_value
+134	val_134	2008-04-08	existing_value
+136	val_136	2008-04-08	existing_value
+137	val_137	2008-04-08	existing_value
+137	val_137	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+143	val_143	2008-04-08	existing_value
+145	val_145	2008-04-08	existing_value
+146	val_146	2008-04-08	existing_value
+146	val_146	2008-04-08	existing_value
+149	val_149	2008-04-08	existing_value
+149	val_149	2008-04-08	existing_value
+150	val_150	2008-04-08	existing_value
+152	val_152	2008-04-08	existing_value
+152	val_152	2008-04-08	existing_value
+153	val_153	2008-04-08	existing_value
+155	val_155	2008-04-08	existing_value
+156	val_156	2008-04-08	existing_value
+157	val_157	2008-04-08	existing_value
+158	val_158	2008-04-08	existing_value
+160	val_160	2008-04-08	existing_value
+162	val_162	2008-04-08	existing_value
+163	val_163	2008-04-08	existing_value
+164	val_164	2008-04-08	existing_value
+164	val_164	2008-04-08	existing_value
+165	val_165	2008-04-08	existing_value
+165	val_165	2008-04-08	existing_value
+166	val_166	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+168	val_168	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+170	val_170	2008-04-08	existing_value
+172	val_172	2008-04-08	existing_value
+172	val_172	2008-04-08	existing_value
+174	val_174	2008-04-08	existing_value
+174	val_174	2008-04-08	existing_value
+175	val_175	2008-04-08	existing_value
+175	val_175	2008-04-08	existing_value
+176	val_176	2008-04-08	existing_value
+176	val_176	2008-04-08	existing_value
+177	val_177	2008-04-08	existing_value
+178	val_178	2008-04-08	existing_value
+179	val_179	2008-04-08	existing_value
+179	val_179	2008-04-08	existing_value
+180	val_180	2008-04-08	existing_value
+181	val_181	2008-04-08	existing_value
+183	val_183	2008-04-08	existing_value
+186	val_186	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+189	val_189	2008-04-08	existing_value
+190	val_190	2008-04-08	existing_value
+191	val_191	2008-04-08	existing_value
+191	val_191	2008-04-08	existing_value
+192	val_192	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+194	val_194	2008-04-08	existing_value
+195	val_195	2008-04-08	existing_value
+195	val_195	2008-04-08	existing_value
+196	val_196	2008-04-08	existing_value
+197	val_197	2008-04-08	existing_value
+197	val_197	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+200	val_200	2008-04-08	existing_value
+200	val_200	2008-04-08	existing_value
+201	val_201	2008-04-08	existing_value
+202	val_202	2008-04-08	existing_value
+203	val_203	2008-04-08	existing_value
+203	val_203	2008-04-08	existing_value
+205	val_205	2008-04-08	existing_value
+205	val_205	2008-04-08	existing_value
+207	val_207	2008-04-08	existing_value
+207	val_207	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+209	val_209	2008-04-08	existing_value
+209	val_209	2008-04-08	existing_value
+213	val_213	2008-04-08	existing_value
+213	val_213	2008-04-08	existing_value
+214	val_214	2008-04-08	existing_value
+216	val_216	2008-04-08	existing_value
+216	val_216	2008-04-08	existing_value
+217	val_217	2008-04-08	existing_value
+217	val_217	2008-04-08	existing_value
+218	val_218	2008-04-08	existing_value
+219	val_219	2008-04-08	existing_value
+219	val_219	2008-04-08	existing_value
+221	val_221	2008-04-08	existing_value
+221	val_221	2008-04-08	existing_value
+222	val_222	2008-04-08	existing_value
+223	val_223	2008-04-08	existing_value
+223	val_223	2008-04-08	existing_value
+224	val_224	2008-04-08	existing_value
+224	val_224	2008-04-08	existing_value
+226	val_226	2008-04-08	existing_value
+228	val_228	2008-04-08	existing_value
+229	val_229	2008-04-08	existing_value
+229	val_229	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+233	val_233	2008-04-08	existing_value
+233	val_233	2008-04-08	existing_value
+235	val_235	2008-04-08	existing_value
+237	val_237	2008-04-08	existing_value
+237	val_237	2008-04-08	existing_value
+238	val_238	2008-04-08	existing_value
+238	val_238	2008-04-08	existing_value
+239	val_239	2008-04-08	existing_value
+239	val_239	2008-04-08	existing_value
+241	val_241	2008-04-08	existing_value
+242	val_242	2008-04-08	existing_value
+242	val_242	2008-04-08	existing_value
+244	val_244	2008-04-08	existing_value
+247	val_247	2008-04-08	existing_value
+248	val_248	2008-04-08	existing_value
+249	val_249	2008-04-08	existing_value
+252	val_252	2008-04-08	existing_value
+255	val_255	2008-04-08	existing_value
+255	val_255	2008-04-08	existing_value
+256	val_256	2008-04-08	existing_value
+256	val_256	2008-04-08	existing_value
+257	val_257	2008-04-08	existing_value
+258	val_258	2008-04-08	existing_value
+260	val_260	2008-04-08	existing_value
+262	val_262	2008-04-08	existing_value
+263	val_263	2008-04-08	existing_value
+265	val_265	2008-04-08	existing_value
+265	val_265	2008-04-08	existing_value
+266	val_266	2008-04-08	existing_value
+272	val_272	2008-04-08	existing_value
+272	val_272	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+274	val_274	2008-04-08	existing_value
+275	val_275	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+278	val_278	2008-04-08	existing_value
+278	val_278	2008-04-08	existing_value
+280	val_280	2008-04-08	existing_value
+280	val_280	2008-04-08	existing_value
+281	val_281	2008-04-08	existing_value
+281	val_281	2008-04-08	existing_value
+282	val_282	2008-04-08	existing_value
+282	val_282	2008-04-08	existing_value
+283	val_283	2008-04-08	existing_value
+284	val_284	2008-04-08	existing_value
+285	val_285	2008-04-08	existing_value
+286	val_286	2008-04-08	existing_value
+287	val_287	2008-04-08	existing_value
+288	val_288	2008-04-08	existing_value
+288	val_288	2008-04-08	existing_value
+289	val_289	2008-04-08	existing_value
+291	val_291	2008-04-08	existing_value
+292	val_292	2008-04-08	existing_value
+296	val_296	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+302	val_302	2008-04-08	existing_value
+305	val_305	2008-04-08	existing_value
+306	val_306	2008-04-08	existing_value
+307	val_307	2008-04-08	existing_value
+307	val_307	2008-04-08	existing_value
+308	val_308	2008-04-08	existing_value
+309	val_309	2008-04-08	existing_value
+309	val_309	2008-04-08	existing_value
+310	val_310	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+315	val_315	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+317	val_317	2008-04-08	existing_value
+317	val_317	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+321	val_321	2008-04-08	existing_value
+321	val_321	2008-04-08	existing_value
+322	val_322	2008-04-08	existing_value
+322	val_322	2008-04-08	existing_value
+323	val_323	2008-04-08	existing_value
+325	val_325	2008-04-08	existing_value
+325	val_325	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+331	val_331	2008-04-08	existing_value
+331	val_331	2008-04-08	existing_value
+332	val_332	2008-04-08	existing_value
+333	val_333	2008-04-08	existing_value
+333	val_333	2008-04-08	existing_value
+335	val_335	2008-04-08	existing_value
+336	val_336	2008-04-08	existing_value
+338	val_338	2008-04-08	existing_value
+339	val_339	2008-04-08	existing_value
+341	val_341	2008-04-08	existing_value
+342	val_342	2008-04-08	existing_value
+342	val_342	2008-04-08	existing_value
+344	val_344	2008-04-08	existing_value
+344	val_344	2008-04-08	existing_value
+345	val_345	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+351	val_351	2008-04-08	existing_value
+353	val_353	2008-04-08	existing_value
+353	val_353	2008-04-08	existing_value
+356	val_356	2008-04-08	existing_value
+360	val_360	2008-04-08	existing_value
+362	val_362	2008-04-08	existing_value
+364	val_364	2008-04-08	existing_value
+365	val_365	2008-04-08	existing_value
+366	val_366	2008-04-08	existing_value
+367	val_367	2008-04-08	existing_value
+367	val_367	2008-04-08	existing_value
+368	val_368	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+373	val_373	2008-04-08	existing_value
+374	val_374	2008-04-08	existing_value
+375	val_375	2008-04-08	existing_value
+377	val_377	2008-04-08	existing_value
+378	val_378	2008-04-08	existing_value
+379	val_379	2008-04-08	existing_value
+382	val_382	2008-04-08	existing_value
+382	val_382	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+386	val_386	2008-04-08	existing_value
+389	val_389	2008-04-08	existing_value
+392	val_392	2008-04-08	existing_value
+393	val_393	2008-04-08	existing_value
+394	val_394	2008-04-08	existing_value
+395	val_395	2008-04-08	existing_value
+395	val_395	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+397	val_397	2008-04-08	existing_value
+397	val_397	2008-04-08	existing_value
+399	val_399	2008-04-08	existing_value
+399	val_399	2008-04-08	existing_value
+400	val_400	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+402	val_402	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+404	val_404	2008-04-08	existing_value
+404	val_404	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+407	val_407	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+411	val_411	2008-04-08	existing_value
+413	val_413	2008-04-08	existing_value
+413	val_413	2008-04-08	existing_value
+414	val_414	2008-04-08	existing_value
+414	val_414	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+418	val_418	2008-04-08	existing_value
+419	val_419	2008-04-08	existing_value
+421	val_421	2008-04-08	existing_value
+424	val_424	2008-04-08	existing_value
+424	val_424	2008-04-08	existing_value
+427	val_427	2008-04-08	existing_value
+429	val_429	2008-04-08	existing_value
+429	val_429	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+432	val_432	2008-04-08	existing_value
+435	val_435	2008-04-08	existing_value
+436	val_436	2008-04-08	existing_value
+437	val_437	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+439	val_439	2008-04-08	existing_value
+439	val_439	2008-04-08	existing_value
+443	val_443	2008-04-08	existing_value
+444	val_444	2008-04-08	existing_value
+446	val_446	2008-04-08	existing_value
+448	val_448	2008-04-08	existing_value
+449	val_449	2008-04-08	existing_value
+452	val_452	2008-04-08	existing_value
+453	val_453	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+455	val_455	2008-04-08	existing_value
+457	val_457	2008-04-08	existing_value
+458	val_458	2008-04-08	existing_value
+458	val_458	2008-04-08	existing_value
+459	val_459	2008-04-08	existing_value
+459	val_459	2008-04-08	existing_value
+460	val_460	2008-04-08	existing_value
+462	val_462	2008-04-08	existing_value
+462	val_462	2008-04-08	existing_value
+463	val_463	2008-04-08	existing_value
+463	val_463	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+467	val_467	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+470	val_470	2008-04-08	existing_value
+472	val_472	2008-04-08	existing_value
+475	val_475	2008-04-08	existing_value
+477	val_477	2008-04-08	existing_value
+478	val_478	2008-04-08	existing_value
+478	val_478	2008-04-08	existing_value
+479	val_479	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+481	val_481	2008-04-08	existing_value
+482	val_482	2008-04-08	existing_value
+483	val_483	2008-04-08	existing_value
+484	val_484	2008-04-08	existing_value
+485	val_485	2008-04-08	existing_value
+487	val_487	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+490	val_490	2008-04-08	existing_value
+491	val_491	2008-04-08	existing_value
+492	val_492	2008-04-08	existing_value
+492	val_492	2008-04-08	existing_value
+493	val_493	2008-04-08	existing_value
+494	val_494	2008-04-08	existing_value
+495	val_495	2008-04-08	existing_value
+496	val_496	2008-04-08	existing_value
+497	val_497	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
+0	val_0	2008-04-09	11
+0	val_0	2008-04-09	11
+0	val_0	2008-04-09	11
+2	val_2	2008-04-09	11
+4	val_4	2008-04-09	11
+5	val_5	2008-04-09	11
+5	val_5	2008-04-09	11
+5	val_5	2008-04-09	11
+8	val_8	2008-04-09	11
+9	val_9	2008-04-09	11
+10	val_10	2008-04-09	11
+11	val_11	2008-04-09	11
+12	val_12	2008-04-09	11
+12	val_12	2008-04-09	11
+15	val_15	2008-04-09	11
+15	val_15	2008-04-09	11
+17	val_17	2008-04-09	11
+18	val_18	2008-04-09	11
+18	val_18	2008-04-09	11
+19	val_19	2008-04-09	11
+20	val_20	2008-04-09	11
+24	val_24	2008-04-09	11
+24	val_24	2008-04-09	11
+26	val_26	2008-04-09	11
+26	val_26	2008-04-09	11
+27	val_27	2008-04-09	11
+28	val_28	2008-04-09	11
+30	val_30	2008-04-09	11
+33	val_33	2008-04-09	11
+34	val_34	2008-04-09	11
+35	val_35	2008-04-09	11
+35	val_35	2008-04-09	11
+35	val_35	2008-04-09	11
+37	val_37	2008-04-09	11
+37	val_37	2008-04-09	11
+41	val_41	2008-04-09	11
+42	val_42	2008-04-09	11
+42	val_42	2008-04-09	11
+43	val_43	2008-04-09	11
+44	val_44	2008-04-09	11
+47	val_47	2008-04-09	11
+51	val_51	2008-04-09	11
+51	val_51	2008-04-09	11
+53	val_53	2008-04-09	11
+54	val_54	2008-04-09	11
+57	val_57	2008-04-09	11
+58	val_58	2008-04-09	11
+58	val_58	2008-04-09	11
+64	val_64	2008-04-09	11
+65	val_65	2008-04-09	11
+66	val_66	2008-04-09	11
+67	val_67	2008-04-09	11
+67	val_67	2008-04-09	11
+69	val_69	2008-04-09	11
+70	val_70	2008-04-09	11
+70	val_70	2008-04-09	11
+70	val_70	2008-04-09	11
+72	val_72	2008-04-09	11
+72	val_72	2008-04-09	11
+74	val_74	2008-04-09	11
+76	val_76	2008-04-09	11
+76	val_76	2008-04-09	11
+77	val_77	2008-04-09	11
+78	val_78	2008-04-09	11
+80	val_80	2008-04-09	11
+82	val_82	2008-04-09	11
+83	val_83	2008-04-09	11
+83	val_83	2008-04-09	11
+84	val_84	2008-04-09	11
+84	val_84	2008-04-09	11
+85	val_85	2008-04-09	11
+86	val_86	2008-04-09	11
+87	val_87	2008-04-09	11
+90	val_90	2008-04-09	11
+90	val_90	2008-04-09	11
+90	val_90	2008-04-09	11
+92	val_92	2008-04-09	11
+95	val_95	2008-04-09	11
+95	val_95	2008-04-09	11
+96	val_96	2008-04-09	11
+97	val_97	2008-04-09	11
+97	val_97	2008-04-09	11
+98	val_98	2008-04-09	11
+98	val_98	2008-04-09	11
+100	val_100	2008-04-09	11
+100	val_100	2008-04-09	11
+103	val_103	2008-04-09	11
+103	val_103	2008-04-09	11
+104	val_104	2008-04-09	11
+104	val_104	2008-04-09	11
+105	val_105	2008-04-09	11
+111	val_111	2008-04-09	11
+113	val_113	2008-04-09	11
+113	val_113	2008-04-09	11
+114	val_114	2008-04-09	11
+116	val_116	2008-04-09	11
+118	val_118	2008-04-09	11
+118	val_118	2008-04-09	11
+119	val_119	2008-04-09	11
+119	val_119	2008-04-09	11
+119	val_119	2008-04-09	11
+120	val_120	2008-04-09	11
+120	val_120	2008-04-09	11
+125	val_125	2008-04-09	11
+125	val_125	2008-04-09	11
+126	val_126	2008-04-09	11
+128	val_128	2008-04-09	11
+128	val_128	2008-04-09	11
+128	val_128	2008-04-09	11
+129	val_129	2008-04-09	11
+129	val_129	2008-04-09	11
+131	val_131	2008-04-09	11
+133	val_133	2008-04-09	11
+134	val_134	2008-04-09	11
+134	val_134	2008-04-09	11
+136	val_136	2008-04-09	11
+137	val_137	2008-04-09	11
+137	val_137	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+143	val_143	2008-04-09	11
+145	val_145	2008-04-09	11
+146	val_146	2008-04-09	11
+146	val_146	2008-04-09	11
+149	val_149	2008-04-09	11
+149	val_149	2008-04-09	11
+150	val_150	2008-04-09	11
+152	val_152	2008-04-09	11
+152	val_152	2008-04-09	11
+153	val_153	2008-04-09	11
+155	val_155	2008-04-09	11
+156	val_156	2008-04-09	11
+157	val_157	2008-04-09	11
+158	val_158	2008-04-09	11
+160	val_160	2008-04-09	11
+162	val_162	2008-04-09	11
+163	val_163	2008-04-09	11
+164	val_164	2008-04-09	11
+164	val_164	2008-04-09	11
+165	val_165	2008-04-09	11
+165	val_165	2008-04-09	11
+166	val_166	2008-04-09	11
+167	val_167	2008-04-09	11
+167	val_167	2008-04-09	11
+167	val_167	2008-04-09	11
+168	val_168	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+170	val_170	2008-04-09	11
+172	val_172	2008-04-09	11
+172	val_172	2008-04-09	11
+174	val_174	2008-04-09	11
+174	val_174	2008-04-09	11
+175	val_175	2008-04-09	11
+175	val_175	2008-04-09	11
+176	val_176	2008-04-09	11
+176	val_176	2008-04-09	11
+177	val_177	2008-04-09	11
+178	val_178	2008-04-09	11
+179	val_179	2008-04-09	11
+179	val_179	2008-04-09	11
+180	val_180	2008-04-09	11
+181	val_181	2008-04-09	11
+183	val_183	2008-04-09	11
+186	val_186	2008-04-09	11
+187	val_187	2008-04-09	11
+187	val_187	2008-04-09	11
+187	val_187	2008-04-09	11
+189	val_189	2008-04-09	11
+190	val_190	2008-04-09	11
+191	val_191	2008-04-09	11
+191	val_191	2008-04-09	11
+192	val_192	2008-04-09	11
+193	val_193	2008-04-09	11
+193	val_193	2008-04-09	11
+193	val_193	2008-04-09	11
+194	val_194	2008-04-09	11
+195	val_195	2008-04-09	11
+195	val_195	2008-04-09	11
+196	val_196	2008-04-09	11
+197	val_197	2008-04-09	11
+197	val_197	2008-04-09	11
+199	val_199	2008-04-09	11
+199	val_199	2008-04-09	11
+199	val_199	2008-04-09	11
+200	val_200	2008-04-09	11
+200	val_200	2008-04-09	11
+201	val_201	2008-04-09	11
+202	val_202	2008-04-09	11
+203	val_203	2008-04-09	11
+203	val_203	2008-04-09	11
+205	val_205	2008-04-09	11
+205	val_205	2008-04-09	11
+207	val_207	2008-04-09	11
+207	val_207	2008-04-09	11
+208	val_208	2008-04-09	11
+208	val_208	2008-04-09	11
+208	val_208	2008-04-09	11
+209	val_209	2008-04-09	11
+209	val_209	2008-04-09	11
+213	val_213	2008-04-09	11
+213	val_213	2008-04-09	11
+214	val_214	2008-04-09	11
+216	val_216	2008-04-09	11
+216	val_216	2008-04-09	11
+217	val_217	2008-04-09	11
+217	val_217	2008-04-09	11
+218	val_218	2008-04-09	11
+219	val_219	2008-04-09	11
+219	val_219	2008-04-09	11
+221	val_221	2008-04-09	11
+221	val_221	2008-04-09	11
+222	val_222	2008-04-09	11
+223	val_223	2008-04-09	11
+223	val_223	2008-04-09	11
+224	val_224	2008-04-09	11
+224	val_224	2008-04-09	11
+226	val_226	2008-04-09	11
+228	val_228	2008-04-09	11
+229	val_229	2008-04-09	11
+229	val_229	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+233	val_233	2008-04-09	11
+233	val_233	2008-04-09	11
+235	val_235	2008-04-09	11
+237	val_237	2008-04-09	11
+237	val_237	2008-04-09	11
+238	val_238	2008-04-09	11
+238	val_238	2008-04-09	11
+239	val_239	2008-04-09	11
+239	val_239	2008-04-09	11
+241	val_241	2008-04-09	11
+242	val_242	2008-04-09	11
+242	val_242	2008-04-09	11
+244	val_244	2008-04-09	11
+247	val_247	2008-04-09	11
+248	val_248	2008-04-09	11
+249	val_249	2008-04-09	11
+252	val_252	2008-04-09	11
+255	val_255	2008-04-09	11
+255	val_255	2008-04-09	11
+256	val_256	2008-04-09	11
+256	val_256	2008-04-09	11
+257	val_257	2008-04-09	11
+258	val_258	2008-04-09	11
+260	val_260	2008-04-09	11
+262	val_262	2008-04-09	11
+263	val_263	2008-04-09	11
+265	val_265	2008-04-09	11
+265	val_265	2008-04-09	11
+266	val_266	2008-04-09	11
+272	val_272	2008-04-09	11
+272	val_272	2008-04-09	11
+273	val_273	2008-04-09	11
+273	val_273	2008-04-09	11
+273	val_273	2008-04-09	11
+274	val_274	2008-04-09	11
+275	val_275	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+278	val_278	2008-04-09	11
+278	val_278	2008-04-09	11
+280	val_280	2008-04-09	11
+280	val_280	2008-04-09	11
+281	val_281	2008-04-09	11
+281	val_281	2008-04-09	11
+282	val_282	2008-04-09	11
+282	val_282	2008-04-09	11
+283	val_283	2008-04-09	11
+284	val_284	2008-04-09	11
+285	val_285	2008-04-09	11
+286	val_286	2008-04-09	11
+287	val_287	2008-04-09	11
+288	val_288	2008-04-09	11
+288	val_288	2008-04-09	11
+289	val_289	2008-04-09	11
+291	val_291	2008-04-09	11
+292	val_292	2008-04-09	11
+296	val_296	2008-04-09	11
+298	val_298	2008-04-09	11
+298	val_298	2008-04-09	11
+298	val_298	2008-04-09	11
+302	val_302	2008-04-09	11
+305	val_305	2008-04-09	11
+306	val_306	2008-04-09	11
+307	val_307	2008-04-09	11
+307	val_307	2008-04-09	11
+308	val_308	2008-04-09	11
+309	val_309	2008-04-09	11
+309	val_309	2008-04-09	11
+310	val_310	2008-04-09	11
+311	val_311	2008-04-09	11
+311	val_311	2008-04-09	11
+311	val_311	2008-04-09	11
+315	val_315	2008-04-09	11
+316	val_316	2008-04-09	11
+316	val_316	2008-04-09	11
+316	val_316	2008-04-09	11
+317	val_317	2008-04-09	11
+317	val_317	2008-04-09	11
+318	val_318	2008-04-09	11
+318	val_318	2008-04-09	11
+318	val_318	2008-04-09	11
+321	val_321	2008-04-09	11
+321	val_321	2008-04-09	11
+322	val_322	2008-04-09	11
+322	val_322	2008-04-09	11
+323	val_323	2008-04-09	11
+325	val_325	2008-04-09	11
+325	val_325	2008-04-09	11
+327	val_327	2008-04-09	11
+327	val_327	2008-04-09	11
+327	val_327	2008-04-09	11
+331	val_331	2008-04-09	11
+331	val_331	2008-04-09	11
+332	val_332	2008-04-09	11
+333	val_333	2008-04-09	11
+333	val_333	2008-04-09	11
+335	val_335	2008-04-09	11
+336	val_336	2008-04-09	11
+338	val_338	2008-04-09	11
+339	val_339	2008-04-09	11
+341	val_341	2008-04-09	11
+342	val_342	2008-04-09	11
+342	val_342	2008-04-09	11
+344	val_344	2008-04-09	11
+344	val_344	2008-04-09	11
+345	val_345	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+351	val_351	2008-04-09	11
+353	val_353	2008-04-09	11
+353	val_353	2008-04-09	11
+356	val_356	2008-04-09	11
+360	val_360	2008-04-09	11
+362	val_362	2008-04-09	11
+364	val_364	2008-04-09	11
+365	val_365	2008-04-09	11
+366	val_366	2008-04-09	11
+367	val_367	2008-04-09	11
+367	val_367	2008-04-09	11
+368	val_368	2008-04-09	11
+369	val_369	2008-04-09	11
+369	val_369	2008-04-09	11
+369	val_369	2008-04-09	11
+373	val_373	2008-04-09	11
+374	val_374	2008-04-09	11
+375	val_375	2008-04-09	11
+377	val_377	2008-04-09	11
+378	val_378	2008-04-09	11
+379	val_379	2008-04-09	11
+382	val_382	2008-04-09	11
+382	val_382	2008-04-09	11
+384	val_384	2008-04-09	11
+384	val_384	2008-04-09	11
+384	val_384	2008-04-09	11
+386	val_386	2008-04-09	11
+389	val_389	2008-04-09	11
+392	val_392	2008-04-09	11
+393	val_393	2008-04-09	11
+394	val_394	2008-04-09	11
+395	val_395	2008-04-09	11
+395	val_395	2008-04-09	11
+396	val_396	2008-04-09	11
+396	val_396	2008-04-09	11
+396	val_396	2008-04-09	11
+397	val_397	2008-04-09	11
+397	val_397	2008-04-09	11
+399	val_399	2008-04-09	11
+399	val_399	2008-04-09	11
+400	val_400	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+402	val_402	2008-04-09	11
+403	val_403	2008-04-09	11
+403	val_403	2008-04-09	11
+403	val_403	2008-04-09	11
+404	val_404	2008-04-09	11
+404	val_404	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+407	val_407	2008-04-09	11
+409	val_409	2008-04-09	11
+409	val_409	2008-04-09	11
+409	val_409	2008-04-09	11
+411	val_411	2008-04-09	11
+413	val_413	2008-04-09	11
+413	val_413	2008-04-09	11
+414	val_414	2008-04-09	11
+414	val_414	2008-04-09	11
+417	val_417	2008-04-09	11
+417	val_417	2008-04-09	11
+417	val_417	2008-04-09	11
+418	val_418	2008-04-09	11
+419	val_419	2008-04-09	11
+421	val_421	2008-04-09	11
+424	val_424	2008-04-09	11
+424	val_424	2008-04-09	11
+427	val_427	2008-04-09	11
+429	val_429	2008-04-09	11
+429	val_429	2008-04-09	11
+430	val_430	2008-04-09	11
+430	val_430	2008-04-09	11
+430	val_430	2008-04-09	11
+431	val_431	2008-04-09	11
+431	val_431	2008-04-09	11
+431	val_431	2008-04-09	11
+432	val_432	2008-04-09	11
+435	val_435	2008-04-09	11
+436	val_436	2008-04-09	11
+437	val_437	2008-04-09	11
+438	val_438	2008-04-09	11
+438	val_438	2008-04-09	11
+438	val_438	2008-04-09	11
+439	val_439	2008-04-09	11
+439	val_439	2008-04-09	11
+443	val_443	2008-04-09	11
+444	val_444	2008-04-09	11
+446	val_446	2008-04-09	11
+448	val_448	2008-04-09	11
+449	val_449	2008-04-09	11
+452	val_452	2008-04-09	11
+453	val_453	2008-04-09	11
+454	val_454	2008-04-09	11
+454	val_454	2008-04-09	11
+454	val_454	2008-04-09	11
+455	val_455	2008-04-09	11
+457	val_457	2008-04-09	11
+458	val_458	2008-04-09	11
+458	val_458	2008-04-09	11
+459	val_459	2008-04-09	11
+459	val_459	2008-04-09	11
+460	val_460	2008-04-09	11
+462	val_462	2008-04-09	11
+462	val_462	2008-04-09	11
+463	val_463	2008-04-09	11
+463	val_463	2008-04-09	11
+466	val_466	2008-04-09	11
+466	val_466	2008-04-09	11
+466	val_466	2008-04-09	11
+467	val_467	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+470	val_470	2008-04-09	11
+472	val_472	2008-04-09	11
+475	val_475	2008-04-09	11
+477	val_477	2008-04-09	11
+478	val_478	2008-04-09	11
+478	val_478	2008-04-09	11
+479	val_479	2008-04-09	11
+480	val_480	2008-04-09	11
+480	val_480	2008-04-09	11
+480	val_480	2008-04-09	11
+481	val_481	2008-04-09	11
+482	val_482	2008-04-09	11
+483	val_483	2008-04-09	11
+484	val_484	2008-04-09	11
+485	val_485	2008-04-09	11
+487	val_487	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+490	val_490	2008-04-09	11
+491	val_491	2008-04-09	11
+492	val_492	2008-04-09	11
+492	val_492	2008-04-09	11
+493	val_493	2008-04-09	11
+494	val_494	2008-04-09	11
+495	val_495	2008-04-09	11
+496	val_496	2008-04-09	11
+497	val_497	2008-04-09	11
+498	val_498	2008-04-09	11
+498	val_498	2008-04-09	11
+498	val_498	2008-04-09	11
+0	val_0	2008-04-09	12
+0	val_0	2008-04-09	12
+0	val_0	2008-04-09	12
+2	val_2	2008-04-09	12
+4	val_4	2008-04-09	12
+5	val_5	2008-04-09	12
+5	val_5	2008-04-09	12
+5	val_5	2008-04-09	12
+8	val_8	2008-04-09	12
+9	val_9	2008-04-09	12
+10	val_10	2008-04-09	12
+11	val_11	2008-04-09	12
+12	val_12	2008-04-09	12
+12	val_12	2008-04-09	12
+15	val_15	2008-04-09	12
+15	val_15	2008-04-09	12
+17	val_17	2008-04-09	12
+18	val_18	2008-04-09	12
+18	val_18	2008-04-09	12
+19	val_19	2008-04-09	12
+20	val_20	2008-04-09	12
+24	val_24	2008-04-09	12
+24	val_24	2008-04-09	12
+26	val_26	2008-04-09	12
+26	val_26	2008-04-09	12
+27	val_27	2008-04-09	12
+28	val_28	2008-04-09	12
+30	val_30	2008-04-09	12
+33	val_33	2008-04-09	12
+34	val_34	2008-04-09	12
+35	val_35	2008-04-09	12
+35	val_35	2008-04-09	12
+35	val_35	2008-04-09	12
+37	val_37	2008-04-09	12
+37	val_37	2008-04-09	12
+41	val_41	2008-04-09	12
+42	val_42	2008-04-09	12
+42	val_42	2008-04-09	12
+43	val_43	2008-04-09	12
+44	val_44	2008-04-09	12
+47	val_47	2008-04-09	12
+51	val_51	2008-04-09	12
+51	val_51	2008-04-09	12
+53	val_53	2008-04-09	12
+54	val_54	2008-04-09	12
+57	val_57	2008-04-09	12
+58	val_58	2008-04-09	12
+58	val_58	2008-04-09	12
+64	val_64	2008-04-09	12
+65	val_65	2008-04-09	12
+66	val_66	2008-04-09	12
+67	val_67	2008-04-09	12
+67	val_67	2008-04-09	12
+69	val_69	2008-04-09	12
+70	val_70	2008-04-09	12
+70	val_70	2008-04-09	12
+70	val_70	2008-04-09	12
+72	val_72	2008-04-09	12
+72	val_72	2008-04-09	12
+74	val_74	2008-04-09	12
+76	val_76	2008-04-09	12
+76	val_76	2008-04-09	12
+77	val_77	2008-04-09	12
+78	val_78	2008-04-09	12
+80	val_80	2008-04-09	12
+82	val_82	2008-04-09	12
+83	val_83	2008-04-09	12
+83	val_83	2008-04-09	12
+84	val_84	2008-04-09	12
+84	val_84	2008-04-09	12
+85	val_85	2008-04-09	12
+86	val_86	2008-04-09	12
+87	val_87	2008-04-09	12
+90	val_90	2008-04-09	12
+90	val_90	2008-04-09	12
+90	val_90	2008-04-09	12
+92	val_92	2008-04-09	12
+95	val_95	2008-04-09	12
+95	val_95	2008-04-09	12
+96	val_96	2008-04-09	12
+97	val_97	2008-04-09	12
+97	val_97	2008-04-09	12
+98	val_98	2008-04-09	12
+98	val_98	2008-04-09	12
+100	val_100	2008-04-09	12
+100	val_100	2008-04-09	12
+103	val_103	2008-04-09	12
+103	val_103	2008-04-09	12
+104	val_104	2008-04-09	12
+104	val_104	2008-04-09	12
+105	val_105	2008-04-09	12
+111	val_111	2008-04-09	12
+113	val_113	2008-04-09	12
+113	val_113	2008-04-09	12
+114	val_114	2008-04-09	12
+116	val_116	2008-04-09	12
+118	val_118	2008-04-09	12
+118	val_118	2008-04-09	12
+119	val_119	2008-04-09	12
+119	val_119	2008-04-09	12
+119	val_119	2008-04-09	12
+120	val_120	2008-04-09	12
+120	val_120	2008-04-09	12
+125	val_125	2008-04-09	12
+125	val_125	2008-04-09	12
+126	val_126	2008-04-09	12
+128	val_128	2008-04-09	12
+128	val_128	2008-04-09	12
+128	val_128	2008-04-09	12
+129	val_129	2008-04-09	12
+129	val_129	2008-04-09	12
+131	val_131	2008-04-09	12
+133	val_133	2008-04-09	12
+134	val_134	2008-04-09	12
+134	val_134	2008-04-09	12
+136	val_136	2008-04-09	12
+137	val_137	2008-04-09	12
+137	val_137	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+143	val_143	2008-04-09	12
+145	val_145	2008-04-09	12
+146	val_146	2008-04-09	12
+146	val_146	2008-04-09	12
+149	val_149	2008-04-09	12
+149	val_149	2008-04-09	12
+150	val_150	2008-04-09	12
+152	val_152	2008-04-09	12
+152	val_152	2008-04-09	12
+153	val_153	2008-04-09	12
+155	val_155	2008-04-09	12
+156	val_156	2008-04-09	12
+157	val_157	2008-04-09	12
+158	val_158	2008-04-09	12
+160	val_160	2008-04-09	12
+162	val_162	2008-04-09	12
+163	val_163	2008-04-09	12
+164	val_164	2008-04-09	12
+164	val_164	2008-04-09	12
+165	val_165	2008-04-09	12
+165	val_165	2008-04-09	12
+166	val_166	2008-04-09	12
+167	val_167	2008-04-09	12
+167	val_167	2008-04-09	12
+167	val_167	2008-04-09	12
+168	val_168	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+170	val_170	2008-04-09	12
+172	val_172	2008-04-09	12
+172	val_172	2008-04-09	12
+174	val_174	2008-04-09	12
+174	val_174	2008-04-09	12
+175	val_175	2008-04-09	12
+175	val_175	2008-04-09	12
+176	val_176	2008-04-09	12
+176	val_176	2008-04-09	12
+177	val_177	2008-04-09	12
+178	val_178	2008-04-09	12
+179	val_179	2008-04-09	12
+179	val_179	2008-04-09	12
+180	val_180	2008-04-09	12
+181	val_181	2008-04-09	12
+183	val_183	2008-04-09	12
+186	val_186	2008-04-09	12
+187	val_187	2008-04-09	12
+187	val_187	2008-04-09	12
+187	val_187	2008-04-09	12
+189	val_189	2008-04-09	12
+190	val_190	2008-04-09	12
+191	val_191	2008-04-09	12
+191	val_191	2008-04-09	12
+192	val_192	2008-04-09	12
+193	val_193	2008-04-09	12
+193	val_193	2008-04-09	12
+193	val_193	2008-04-09	12
+194	val_194	2008-04-09	12
+195	val_195	2008-04-09	12
+195	val_195	2008-04-09	12
+196	val_196	2008-04-09	12
+197	val_197	2008-04-09	12
+197	val_197	2008-04-09	12
+199	val_199	2008-04-09	12
+199	val_199	2008-04-09	12
+199	val_199	2008-04-09	12
+200	val_200	2008-04-09	12
+200	val_200	2008-04-09	12
+201	val_201	2008-04-09	12
+202	val_202	2008-04-09	12
+203	val_203	2008-04-09	12
+203	val_203	2008-04-09	12
+205	val_205	2008-04-09	12
+205	val_205	2008-04-09	12
+207	val_207	2008-04-09	12
+207	val_207	2008-04-09	12
+208	val_208	2008-04-09	12
+208	val_208	2008-04-09	12
+208	val_208	2008-04-09	12
+209	val_209	2008-04-09	12
+209	val_209	2008-04-09	12
+213	val_213	2008-04-09	12
+213	val_213	2008-04-09	12
+214	val_214	2008-04-09	12
+216	val_216	2008-04-09	12
+216	val_216	2008-04-09	12
+217	val_217	2008-04-09	12
+217	val_217	2008-04-09	12
+218	val_218	2008-04-09	12
+219	val_219	2008-04-09	12
+219	val_219	2008-04-09	12
+221	val_221	2008-04-09	12
+221	val_221	2008-04-09	12
+222	val_222	2008-04-09	12
+223	val_223	2008-04-09	12
+223	val_223	2008-04-09	12
+224	val_224	2008-04-09	12
+224	val_224	2008-04-09	12
+226	val_226	2008-04-09	12
+228	val_228	2008-04-09	12
+229	val_229	2008-04-09	12
+229	val_229	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+233	val_233	2008-04-09	12
+233	val_233	2008-04-09	12
+235	val_235	2008-04-09	12
+237	val_237	2008-04-09	12
+237	val_237	2008-04-09	12
+238	val_238	2008-04-09	12
+238	val_238	2008-04-09	12
+239	val_239	2008-04-09	12
+239	val_239	2008-04-09	12
+241	val_241	2008-04-09	12
+242	val_242	2008-04-09	12
+242	val_242	2008-04-09	12
+244	val_244	2008-04-09	12
+247	val_247	2008-04-09	12
+248	val_248	2008-04-09	12
+249	val_249	2008-04-09	12
+252	val_252	2008-04-09	12
+255	val_255	2008-04-09	12
+255	val_255	2008-04-09	12
+256	val_256	2008-04-09	12
+256	val_256	2008-04-09	12
+257	val_257	2008-04-09	12
+258	val_258	2008-04-09	12
+260	val_260	2008-04-09	12
+262	val_262	2008-04-09	12
+263	val_263	2008-04-09	12
+265	val_265	2008-04-09	12
+265	val_265	2008-04-09	12
+266	val_266	2008-04-09	12
+272	val_272	2008-04-09	12
+272	val_272	2008-04-09	12
+273	val_273	2008-04-09	12
+273	val_273	2008-04-09	12
+273	val_273	2008-04-09	12
+274	val_274	2008-04-09	12
+275	val_275	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+278	val_278	2008-04-09	12
+278	val_278	2008-04-09	12
+280	val_280	2008-04-09	12
+280	val_280	2008-04-09	12
+281	val_281	2008-04-09	12
+281	val_281	2008-04-09	12
+282	val_282	2008-04-09	12
+282	val_282	2008-04-09	12
+283	val_283	2008-04-09	12
+284	val_284	2008-04-09	12
+285	val_285	2008-04-09	12
+286	val_286	2008-04-09	12
+287	val_287	2008-04-09	12
+288	val_288	2008-04-09	12
+288	val_288	2008-04-09	12
+289	val_289	2008-04-09	12
+291	val_291	2008-04-09	12
+292	val_292	2008-04-09	12
+296	val_296	2008-04-09	12
+298	val_298	2008-04-09	12
+298	val_298	2008-04-09	12
+298	val_298	2008-04-09	12
+302	val_302	2008-04-09	12
+305	val_305	2008-04-09	12
+306	val_306	2008-04-09	12
+307	val_307	2008-04-09	12
+307	val_307	2008-04-09	12
+308	val_308	2008-04-09	12
+309	val_309	2008-04-09	12
+309	val_309	2008-04-09	12
+310	val_310	2008-04-09	12
+311	val_311	2008-04-09	12
+311	val_311	2008-04-09	12
+311	val_311	2008-04-09	12
+315	val_315	2008-04-09	12
+316	val_316	2008-04-09	12
+316	val_316	2008-04-09	12
+316	val_316	2008-04-09	12
+317	val_317	2008-04-09	12
+317	val_317	2008-04-09	12
+318	val_318	2008-04-09	12
+318	val_318	2008-04-09	12
+318	val_318	2008-04-09	12
+321	val_321	2008-04-09	12
+321	val_321	2008-04-09	12
+322	val_322	2008-04-09	12
+322	val_322	2008-04-09	12
+323	val_323	2008-04-09	12
+325	val_325	2008-04-09	12
+325	val_325	2008-04-09	12
+327	val_327	2008-04-09	12
+327	val_327	2008-04-09	12
+327	val_327	2008-04-09	12
+331	val_331	2008-04-09	12
+331	val_331	2008-04-09	12
+332	val_332	2008-04-09	12
+333	val_333	2008-04-09	12
+333	val_333	2008-04-09	12
+335	val_335	2008-04-09	12
+336	val_336	2008-04-09	12
+338	val_338	2008-04-09	12
+339	val_339	2008-04-09	12
+341	val_341	2008-04-09	12
+342	val_342	2008-04-09	12
+342	val_342	2008-04-09	12
+344	val_344	2008-04-09	12
+344	val_344	2008-04-09	12
+345	val_345	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+351	val_351	2008-04-09	12
+353	val_353	2008-04-09	12
+353	val_353	2008-04-09	12
+356	val_356	2008-04-09	12
+360	val_360	2008-04-09	12
+362	val_362	2008-04-09	12
+364	val_364	2008-04-09	12
+365	val_365	2008-04-09	12
+366	val_366	2008-04-09	12
+367	val_367	2008-04-09	12
+367	val_367	2008-04-09	12
+368	val_368	2008-04-09	12
+369	val_369	2008-04-09	12
+369	val_369	2008-04-09	12
+369	val_369	2008-04-09	12
+373	val_373	2008-04-09	12
+374	val_374	2008-04-09	12
+375	val_375	2008-04-09	12
+377	val_377	2008-04-09	12
+378	val_378	2008-04-09	12
+379	val_379	2008-04-09	12
+382	val_382	2008-04-09	12
+382	val_382	2008-04-09	12
+384	val_384	2008-04-09	12
+384	val_384	2008-04-09	12
+384	val_384	2008-04-09	12
+386	val_386	2008-04-09	12
+389	val_389	2008-04-09	12
+392	val_392	2008-04-09	12
+393	val_393	2008-04-09	12
+394	val_394	2008-04-09	12
+395	val_395	2008-04-09	12
+395	val_395	2008-04-09	12
+396	val_396	2008-04-09	12
+396	val_396	2008-04-09	12
+396	val_396	2008-04-09	12
+397	val_397	2008-04-09	12
+397	val_397	2008-04-09	12
+399	val_399	2008-04-09	12
+399	val_399	2008-04-09	12
+400	val_400	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+402	val_402	2008-04-09	12
+403	val_403	2008-04-09	12
+403	val_403	2008-04-09	12
+403	val_403	2008-04-09	12
+404	val_404	2008-04-09	12
+404	val_404	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+407	val_407	2008-04-09	12
+409	val_409	2008-04-09	12
+409	val_409	2008-04-09	12
+409	val_409	2008-04-09	12
+411	val_411	2008-04-09	12
+413	val_413	2008-04-09	12
+413	val_413	2008-04-09	12
+414	val_414	2008-04-09	12
+414	val_414	2008-04-09	12
+417	val_417	2008-04-09	12
+417	val_417	2008-04-09	12
+417	val_417	2008-04-09	12
+418	val_418	2008-04-09	12
+419	val_419	2008-04-09	12
+421	val_421	2008-04-09	12
+424	val_424	2008-04-09	12
+424	val_424	2008-04-09	12
+427	val_427	2008-04-09	12
+429	val_429	2008-04-09	12
+429	val_429	2008-04-09	12
+430	val_430	2008-04-09	12
+430	val_430	2008-04-09	12
+430	val_430	2008-04-09	12
+431	val_431	2008-04-09	12
+431	val_431	2008-04-09	12
+431	val_431	2008-04-09	12
+432	val_432	2008-04-09	12
+435	val_435	2008-04-09	12
+436	val_436	2008-04-09	12
+437	val_437	2008-04-09	12
+438	val_438	2008-04-09	12
+438	val_438	2008-04-09	12
+438	val_438	2008-04-09	12
+439	val_439	2008-04-09	12
+439	val_439	2008-04-09	12
+443	val_443	2008-04-09	12
+444	val_444	2008-04-09	12
+446	val_446	2008-04-09	12
+448	val_448	2008-04-09	12
+449	val_449	2008-04-09	12
+452	val_452	2008-04-09	12
+453	val_453	2008-04-09	12
+454	val_454	2008-04-09	12
+454	val_454	2008-04-09	12
+454	val_454	2008-04-09	12
+455	val_455	2008-04-09	12
+457	val_457	2008-04-09	12
+458	val_458	2008-04-09	12
+458	val_458	2008-04-09	12
+459	val_459	2008-04-09	12
+459	val_459	2008-04-09	12
+460	val_460	2008-04-09	12
+462	val_462	2008-04-09	12
+462	val_462	2008-04-09	12
+463	val_463	2008-04-09	12
+463	val_463	2008-04-09	12
+466	val_466	2008-04-09	12
+466	val_466	2008-04-09	12
+466	val_466	2008-04-09	12
+467	val_467	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+470	val_470	2008-04-09	12
+472	val_472	2008-04-09	12
+475	val_475	2008-04-09	12
+477	val_477	2008-04-09	12
+478	val_478	2008-04-09	12
+478	val_478	2008-04-09	12
+479	val_479	2008-04-09	12
+480	val_480	2008-04-09	12
+480	val_480	2008-04-09	12
+480	val_480	2008-04-09	12
+481	val_481	2008-04-09	12
+482	val_482	2008-04-09	12
+483	val_483	2008-04-09	12
+484	val_484	2008-04-09	12
+485	val_485	2008-04-09	12
+487	val_487	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+490	val_490	2008-04-09	12
+491	val_491	2008-04-09	12
+492	val_492	2008-04-09	12
+492	val_492	2008-04-09	12
+493	val_493	2008-04-09	12
+494	val_494	2008-04-09	12
+495	val_495	2008-04-09	12
+496	val_496	2008-04-09	12
+497	val_497	2008-04-09	12
+498	val_498	2008-04-09	12
+498	val_498	2008-04-09	12
+498	val_498	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part4-2-9893d8c02ab8bd59d3e9fb0665f508bd b/sql/hive/src/test/resources/golden/load_dyn_part4-2-9893d8c02ab8bd59d3e9fb0665f508bd
index 8017948fc5f2f..3951005f24d2f 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part4-2-9893d8c02ab8bd59d3e9fb0665f508bd
+++ b/sql/hive/src/test/resources/golden/load_dyn_part4-2-9893d8c02ab8bd59d3e9fb0665f508bd
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part4, dbName:default, owner:marmbrus, createTime:1390899619, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part4, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899619}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part4, dbName:default, owner:marmbrus, createTime:1413887619, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part4, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887619}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part5-1-9a4d1f6a14227bb66bd01557a464da8b b/sql/hive/src/test/resources/golden/load_dyn_part5-1-9a4d1f6a14227bb66bd01557a464da8b
index 3f5f6b6f83715..6f2ec09483989 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part5-1-9a4d1f6a14227bb66bd01557a464da8b
+++ b/sql/hive/src/test/resources/golden/load_dyn_part5-1-9a4d1f6a14227bb66bd01557a464da8b
@@ -1,9 +1,9 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-value               	string              	None                
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part5, dbName:default, owner:marmbrus, createTime:1390899637, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part5, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899637}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part5, dbName:default, owner:marmbrus, createTime:1413887658, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part5, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887658}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part6-2-779aa345cf2875089312ec26b046415d b/sql/hive/src/test/resources/golden/load_dyn_part6-2-779aa345cf2875089312ec26b046415d
index 4b9e5b31f4401..3d4fc3e4d1fb1 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part6-2-779aa345cf2875089312ec26b046415d
+++ b/sql/hive/src/test/resources/golden/load_dyn_part6-2-779aa345cf2875089312ec26b046415d
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part6, dbName:default, owner:marmbrus, createTime:1390899654, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part6, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899654}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part6, dbName:default, owner:marmbrus, createTime:1413887681, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part6, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887681}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part7-2-5d4c3c48f53d55e26ca142ee70d1706a b/sql/hive/src/test/resources/golden/load_dyn_part7-2-5d4c3c48f53d55e26ca142ee70d1706a
index d62156d392d5e..be50a6974cd82 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part7-2-5d4c3c48f53d55e26ca142ee70d1706a
+++ b/sql/hive/src/test/resources/golden/load_dyn_part7-2-5d4c3c48f53d55e26ca142ee70d1706a
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part7, dbName:default, owner:marmbrus, createTime:1390899664, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part7, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899664}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part7, dbName:default, owner:marmbrus, createTime:1413887689, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part7, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887689}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part8-2-9e1df41acabef83f62464f52c2396c8a b/sql/hive/src/test/resources/golden/load_dyn_part8-2-9e1df41acabef83f62464f52c2396c8a
index 91fa51871a4b8..5496973d9b2eb 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part8-2-9e1df41acabef83f62464f52c2396c8a
+++ b/sql/hive/src/test/resources/golden/load_dyn_part8-2-9e1df41acabef83f62464f52c2396c8a
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part8, dbName:default, owner:marmbrus, createTime:1390899674, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part8, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899674}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part8, dbName:default, owner:marmbrus, createTime:1413887697, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part8, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887697}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part8-9-704bd110d9aaa2ac678b7fbf645abdb9 b/sql/hive/src/test/resources/golden/load_dyn_part8-9-704bd110d9aaa2ac678b7fbf645abdb9
new file mode 100644
index 0000000000000..c59426f6c153e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part8-9-704bd110d9aaa2ac678b7fbf645abdb9
@@ -0,0 +1,2000 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+2	val_2	2008-12-31	11
+4	val_4	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+8	val_8	2008-12-31	11
+9	val_9	2008-12-31	11
+10	val_10	2008-12-31	11
+11	val_11	2008-12-31	11
+12	val_12	2008-12-31	11
+12	val_12	2008-12-31	11
+15	val_15	2008-12-31	11
+15	val_15	2008-12-31	11
+17	val_17	2008-12-31	11
+18	val_18	2008-12-31	11
+18	val_18	2008-12-31	11
+19	val_19	2008-12-31	11
+20	val_20	2008-12-31	11
+24	val_24	2008-12-31	11
+24	val_24	2008-12-31	11
+26	val_26	2008-12-31	11
+26	val_26	2008-12-31	11
+27	val_27	2008-12-31	11
+28	val_28	2008-12-31	11
+30	val_30	2008-12-31	11
+33	val_33	2008-12-31	11
+34	val_34	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+37	val_37	2008-12-31	11
+37	val_37	2008-12-31	11
+41	val_41	2008-12-31	11
+42	val_42	2008-12-31	11
+42	val_42	2008-12-31	11
+43	val_43	2008-12-31	11
+44	val_44	2008-12-31	11
+47	val_47	2008-12-31	11
+51	val_51	2008-12-31	11
+51	val_51	2008-12-31	11
+53	val_53	2008-12-31	11
+54	val_54	2008-12-31	11
+57	val_57	2008-12-31	11
+58	val_58	2008-12-31	11
+58	val_58	2008-12-31	11
+64	val_64	2008-12-31	11
+65	val_65	2008-12-31	11
+66	val_66	2008-12-31	11
+67	val_67	2008-12-31	11
+67	val_67	2008-12-31	11
+69	val_69	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+72	val_72	2008-12-31	11
+72	val_72	2008-12-31	11
+74	val_74	2008-12-31	11
+76	val_76	2008-12-31	11
+76	val_76	2008-12-31	11
+77	val_77	2008-12-31	11
+78	val_78	2008-12-31	11
+80	val_80	2008-12-31	11
+82	val_82	2008-12-31	11
+83	val_83	2008-12-31	11
+83	val_83	2008-12-31	11
+84	val_84	2008-12-31	11
+84	val_84	2008-12-31	11
+85	val_85	2008-12-31	11
+86	val_86	2008-12-31	11
+87	val_87	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+92	val_92	2008-12-31	11
+95	val_95	2008-12-31	11
+95	val_95	2008-12-31	11
+96	val_96	2008-12-31	11
+97	val_97	2008-12-31	11
+97	val_97	2008-12-31	11
+98	val_98	2008-12-31	11
+98	val_98	2008-12-31	11
+100	val_100	2008-12-31	11
+100	val_100	2008-12-31	11
+103	val_103	2008-12-31	11
+103	val_103	2008-12-31	11
+104	val_104	2008-12-31	11
+104	val_104	2008-12-31	11
+105	val_105	2008-12-31	11
+111	val_111	2008-12-31	11
+113	val_113	2008-12-31	11
+113	val_113	2008-12-31	11
+114	val_114	2008-12-31	11
+116	val_116	2008-12-31	11
+118	val_118	2008-12-31	11
+118	val_118	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+120	val_120	2008-12-31	11
+120	val_120	2008-12-31	11
+125	val_125	2008-12-31	11
+125	val_125	2008-12-31	11
+126	val_126	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+129	val_129	2008-12-31	11
+129	val_129	2008-12-31	11
+131	val_131	2008-12-31	11
+133	val_133	2008-12-31	11
+134	val_134	2008-12-31	11
+134	val_134	2008-12-31	11
+136	val_136	2008-12-31	11
+137	val_137	2008-12-31	11
+137	val_137	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+143	val_143	2008-12-31	11
+145	val_145	2008-12-31	11
+146	val_146	2008-12-31	11
+146	val_146	2008-12-31	11
+149	val_149	2008-12-31	11
+149	val_149	2008-12-31	11
+150	val_150	2008-12-31	11
+152	val_152	2008-12-31	11
+152	val_152	2008-12-31	11
+153	val_153	2008-12-31	11
+155	val_155	2008-12-31	11
+156	val_156	2008-12-31	11
+157	val_157	2008-12-31	11
+158	val_158	2008-12-31	11
+160	val_160	2008-12-31	11
+162	val_162	2008-12-31	11
+163	val_163	2008-12-31	11
+164	val_164	2008-12-31	11
+164	val_164	2008-12-31	11
+165	val_165	2008-12-31	11
+165	val_165	2008-12-31	11
+166	val_166	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+168	val_168	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+170	val_170	2008-12-31	11
+172	val_172	2008-12-31	11
+172	val_172	2008-12-31	11
+174	val_174	2008-12-31	11
+174	val_174	2008-12-31	11
+175	val_175	2008-12-31	11
+175	val_175	2008-12-31	11
+176	val_176	2008-12-31	11
+176	val_176	2008-12-31	11
+177	val_177	2008-12-31	11
+178	val_178	2008-12-31	11
+179	val_179	2008-12-31	11
+179	val_179	2008-12-31	11
+180	val_180	2008-12-31	11
+181	val_181	2008-12-31	11
+183	val_183	2008-12-31	11
+186	val_186	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+189	val_189	2008-12-31	11
+190	val_190	2008-12-31	11
+191	val_191	2008-12-31	11
+191	val_191	2008-12-31	11
+192	val_192	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+194	val_194	2008-12-31	11
+195	val_195	2008-12-31	11
+195	val_195	2008-12-31	11
+196	val_196	2008-12-31	11
+197	val_197	2008-12-31	11
+197	val_197	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+200	val_200	2008-12-31	11
+200	val_200	2008-12-31	11
+201	val_201	2008-12-31	11
+202	val_202	2008-12-31	11
+203	val_203	2008-12-31	11
+203	val_203	2008-12-31	11
+205	val_205	2008-12-31	11
+205	val_205	2008-12-31	11
+207	val_207	2008-12-31	11
+207	val_207	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+209	val_209	2008-12-31	11
+209	val_209	2008-12-31	11
+213	val_213	2008-12-31	11
+213	val_213	2008-12-31	11
+214	val_214	2008-12-31	11
+216	val_216	2008-12-31	11
+216	val_216	2008-12-31	11
+217	val_217	2008-12-31	11
+217	val_217	2008-12-31	11
+218	val_218	2008-12-31	11
+219	val_219	2008-12-31	11
+219	val_219	2008-12-31	11
+221	val_221	2008-12-31	11
+221	val_221	2008-12-31	11
+222	val_222	2008-12-31	11
+223	val_223	2008-12-31	11
+223	val_223	2008-12-31	11
+224	val_224	2008-12-31	11
+224	val_224	2008-12-31	11
+226	val_226	2008-12-31	11
+228	val_228	2008-12-31	11
+229	val_229	2008-12-31	11
+229	val_229	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+233	val_233	2008-12-31	11
+233	val_233	2008-12-31	11
+235	val_235	2008-12-31	11
+237	val_237	2008-12-31	11
+237	val_237	2008-12-31	11
+238	val_238	2008-12-31	11
+238	val_238	2008-12-31	11
+239	val_239	2008-12-31	11
+239	val_239	2008-12-31	11
+241	val_241	2008-12-31	11
+242	val_242	2008-12-31	11
+242	val_242	2008-12-31	11
+244	val_244	2008-12-31	11
+247	val_247	2008-12-31	11
+248	val_248	2008-12-31	11
+249	val_249	2008-12-31	11
+252	val_252	2008-12-31	11
+255	val_255	2008-12-31	11
+255	val_255	2008-12-31	11
+256	val_256	2008-12-31	11
+256	val_256	2008-12-31	11
+257	val_257	2008-12-31	11
+258	val_258	2008-12-31	11
+260	val_260	2008-12-31	11
+262	val_262	2008-12-31	11
+263	val_263	2008-12-31	11
+265	val_265	2008-12-31	11
+265	val_265	2008-12-31	11
+266	val_266	2008-12-31	11
+272	val_272	2008-12-31	11
+272	val_272	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+274	val_274	2008-12-31	11
+275	val_275	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+278	val_278	2008-12-31	11
+278	val_278	2008-12-31	11
+280	val_280	2008-12-31	11
+280	val_280	2008-12-31	11
+281	val_281	2008-12-31	11
+281	val_281	2008-12-31	11
+282	val_282	2008-12-31	11
+282	val_282	2008-12-31	11
+283	val_283	2008-12-31	11
+284	val_284	2008-12-31	11
+285	val_285	2008-12-31	11
+286	val_286	2008-12-31	11
+287	val_287	2008-12-31	11
+288	val_288	2008-12-31	11
+288	val_288	2008-12-31	11
+289	val_289	2008-12-31	11
+291	val_291	2008-12-31	11
+292	val_292	2008-12-31	11
+296	val_296	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+302	val_302	2008-12-31	11
+305	val_305	2008-12-31	11
+306	val_306	2008-12-31	11
+307	val_307	2008-12-31	11
+307	val_307	2008-12-31	11
+308	val_308	2008-12-31	11
+309	val_309	2008-12-31	11
+309	val_309	2008-12-31	11
+310	val_310	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+315	val_315	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+317	val_317	2008-12-31	11
+317	val_317	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+321	val_321	2008-12-31	11
+321	val_321	2008-12-31	11
+322	val_322	2008-12-31	11
+322	val_322	2008-12-31	11
+323	val_323	2008-12-31	11
+325	val_325	2008-12-31	11
+325	val_325	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+331	val_331	2008-12-31	11
+331	val_331	2008-12-31	11
+332	val_332	2008-12-31	11
+333	val_333	2008-12-31	11
+333	val_333	2008-12-31	11
+335	val_335	2008-12-31	11
+336	val_336	2008-12-31	11
+338	val_338	2008-12-31	11
+339	val_339	2008-12-31	11
+341	val_341	2008-12-31	11
+342	val_342	2008-12-31	11
+342	val_342	2008-12-31	11
+344	val_344	2008-12-31	11
+344	val_344	2008-12-31	11
+345	val_345	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+351	val_351	2008-12-31	11
+353	val_353	2008-12-31	11
+353	val_353	2008-12-31	11
+356	val_356	2008-12-31	11
+360	val_360	2008-12-31	11
+362	val_362	2008-12-31	11
+364	val_364	2008-12-31	11
+365	val_365	2008-12-31	11
+366	val_366	2008-12-31	11
+367	val_367	2008-12-31	11
+367	val_367	2008-12-31	11
+368	val_368	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+373	val_373	2008-12-31	11
+374	val_374	2008-12-31	11
+375	val_375	2008-12-31	11
+377	val_377	2008-12-31	11
+378	val_378	2008-12-31	11
+379	val_379	2008-12-31	11
+382	val_382	2008-12-31	11
+382	val_382	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+386	val_386	2008-12-31	11
+389	val_389	2008-12-31	11
+392	val_392	2008-12-31	11
+393	val_393	2008-12-31	11
+394	val_394	2008-12-31	11
+395	val_395	2008-12-31	11
+395	val_395	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+397	val_397	2008-12-31	11
+397	val_397	2008-12-31	11
+399	val_399	2008-12-31	11
+399	val_399	2008-12-31	11
+400	val_400	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+402	val_402	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+404	val_404	2008-12-31	11
+404	val_404	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+407	val_407	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+411	val_411	2008-12-31	11
+413	val_413	2008-12-31	11
+413	val_413	2008-12-31	11
+414	val_414	2008-12-31	11
+414	val_414	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+418	val_418	2008-12-31	11
+419	val_419	2008-12-31	11
+421	val_421	2008-12-31	11
+424	val_424	2008-12-31	11
+424	val_424	2008-12-31	11
+427	val_427	2008-12-31	11
+429	val_429	2008-12-31	11
+429	val_429	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+432	val_432	2008-12-31	11
+435	val_435	2008-12-31	11
+436	val_436	2008-12-31	11
+437	val_437	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+439	val_439	2008-12-31	11
+439	val_439	2008-12-31	11
+443	val_443	2008-12-31	11
+444	val_444	2008-12-31	11
+446	val_446	2008-12-31	11
+448	val_448	2008-12-31	11
+449	val_449	2008-12-31	11
+452	val_452	2008-12-31	11
+453	val_453	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+455	val_455	2008-12-31	11
+457	val_457	2008-12-31	11
+458	val_458	2008-12-31	11
+458	val_458	2008-12-31	11
+459	val_459	2008-12-31	11
+459	val_459	2008-12-31	11
+460	val_460	2008-12-31	11
+462	val_462	2008-12-31	11
+462	val_462	2008-12-31	11
+463	val_463	2008-12-31	11
+463	val_463	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+467	val_467	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+470	val_470	2008-12-31	11
+472	val_472	2008-12-31	11
+475	val_475	2008-12-31	11
+477	val_477	2008-12-31	11
+478	val_478	2008-12-31	11
+478	val_478	2008-12-31	11
+479	val_479	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+481	val_481	2008-12-31	11
+482	val_482	2008-12-31	11
+483	val_483	2008-12-31	11
+484	val_484	2008-12-31	11
+485	val_485	2008-12-31	11
+487	val_487	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+490	val_490	2008-12-31	11
+491	val_491	2008-12-31	11
+492	val_492	2008-12-31	11
+492	val_492	2008-12-31	11
+493	val_493	2008-12-31	11
+494	val_494	2008-12-31	11
+495	val_495	2008-12-31	11
+496	val_496	2008-12-31	11
+497	val_497	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+2	val_2	2008-12-31	12
+4	val_4	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+8	val_8	2008-12-31	12
+9	val_9	2008-12-31	12
+10	val_10	2008-12-31	12
+11	val_11	2008-12-31	12
+12	val_12	2008-12-31	12
+12	val_12	2008-12-31	12
+15	val_15	2008-12-31	12
+15	val_15	2008-12-31	12
+17	val_17	2008-12-31	12
+18	val_18	2008-12-31	12
+18	val_18	2008-12-31	12
+19	val_19	2008-12-31	12
+20	val_20	2008-12-31	12
+24	val_24	2008-12-31	12
+24	val_24	2008-12-31	12
+26	val_26	2008-12-31	12
+26	val_26	2008-12-31	12
+27	val_27	2008-12-31	12
+28	val_28	2008-12-31	12
+30	val_30	2008-12-31	12
+33	val_33	2008-12-31	12
+34	val_34	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+37	val_37	2008-12-31	12
+37	val_37	2008-12-31	12
+41	val_41	2008-12-31	12
+42	val_42	2008-12-31	12
+42	val_42	2008-12-31	12
+43	val_43	2008-12-31	12
+44	val_44	2008-12-31	12
+47	val_47	2008-12-31	12
+51	val_51	2008-12-31	12
+51	val_51	2008-12-31	12
+53	val_53	2008-12-31	12
+54	val_54	2008-12-31	12
+57	val_57	2008-12-31	12
+58	val_58	2008-12-31	12
+58	val_58	2008-12-31	12
+64	val_64	2008-12-31	12
+65	val_65	2008-12-31	12
+66	val_66	2008-12-31	12
+67	val_67	2008-12-31	12
+67	val_67	2008-12-31	12
+69	val_69	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+72	val_72	2008-12-31	12
+72	val_72	2008-12-31	12
+74	val_74	2008-12-31	12
+76	val_76	2008-12-31	12
+76	val_76	2008-12-31	12
+77	val_77	2008-12-31	12
+78	val_78	2008-12-31	12
+80	val_80	2008-12-31	12
+82	val_82	2008-12-31	12
+83	val_83	2008-12-31	12
+83	val_83	2008-12-31	12
+84	val_84	2008-12-31	12
+84	val_84	2008-12-31	12
+85	val_85	2008-12-31	12
+86	val_86	2008-12-31	12
+87	val_87	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+92	val_92	2008-12-31	12
+95	val_95	2008-12-31	12
+95	val_95	2008-12-31	12
+96	val_96	2008-12-31	12
+97	val_97	2008-12-31	12
+97	val_97	2008-12-31	12
+98	val_98	2008-12-31	12
+98	val_98	2008-12-31	12
+100	val_100	2008-12-31	12
+100	val_100	2008-12-31	12
+103	val_103	2008-12-31	12
+103	val_103	2008-12-31	12
+104	val_104	2008-12-31	12
+104	val_104	2008-12-31	12
+105	val_105	2008-12-31	12
+111	val_111	2008-12-31	12
+113	val_113	2008-12-31	12
+113	val_113	2008-12-31	12
+114	val_114	2008-12-31	12
+116	val_116	2008-12-31	12
+118	val_118	2008-12-31	12
+118	val_118	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+120	val_120	2008-12-31	12
+120	val_120	2008-12-31	12
+125	val_125	2008-12-31	12
+125	val_125	2008-12-31	12
+126	val_126	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+129	val_129	2008-12-31	12
+129	val_129	2008-12-31	12
+131	val_131	2008-12-31	12
+133	val_133	2008-12-31	12
+134	val_134	2008-12-31	12
+134	val_134	2008-12-31	12
+136	val_136	2008-12-31	12
+137	val_137	2008-12-31	12
+137	val_137	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+143	val_143	2008-12-31	12
+145	val_145	2008-12-31	12
+146	val_146	2008-12-31	12
+146	val_146	2008-12-31	12
+149	val_149	2008-12-31	12
+149	val_149	2008-12-31	12
+150	val_150	2008-12-31	12
+152	val_152	2008-12-31	12
+152	val_152	2008-12-31	12
+153	val_153	2008-12-31	12
+155	val_155	2008-12-31	12
+156	val_156	2008-12-31	12
+157	val_157	2008-12-31	12
+158	val_158	2008-12-31	12
+160	val_160	2008-12-31	12
+162	val_162	2008-12-31	12
+163	val_163	2008-12-31	12
+164	val_164	2008-12-31	12
+164	val_164	2008-12-31	12
+165	val_165	2008-12-31	12
+165	val_165	2008-12-31	12
+166	val_166	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+168	val_168	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+170	val_170	2008-12-31	12
+172	val_172	2008-12-31	12
+172	val_172	2008-12-31	12
+174	val_174	2008-12-31	12
+174	val_174	2008-12-31	12
+175	val_175	2008-12-31	12
+175	val_175	2008-12-31	12
+176	val_176	2008-12-31	12
+176	val_176	2008-12-31	12
+177	val_177	2008-12-31	12
+178	val_178	2008-12-31	12
+179	val_179	2008-12-31	12
+179	val_179	2008-12-31	12
+180	val_180	2008-12-31	12
+181	val_181	2008-12-31	12
+183	val_183	2008-12-31	12
+186	val_186	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+189	val_189	2008-12-31	12
+190	val_190	2008-12-31	12
+191	val_191	2008-12-31	12
+191	val_191	2008-12-31	12
+192	val_192	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+194	val_194	2008-12-31	12
+195	val_195	2008-12-31	12
+195	val_195	2008-12-31	12
+196	val_196	2008-12-31	12
+197	val_197	2008-12-31	12
+197	val_197	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+200	val_200	2008-12-31	12
+200	val_200	2008-12-31	12
+201	val_201	2008-12-31	12
+202	val_202	2008-12-31	12
+203	val_203	2008-12-31	12
+203	val_203	2008-12-31	12
+205	val_205	2008-12-31	12
+205	val_205	2008-12-31	12
+207	val_207	2008-12-31	12
+207	val_207	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+209	val_209	2008-12-31	12
+209	val_209	2008-12-31	12
+213	val_213	2008-12-31	12
+213	val_213	2008-12-31	12
+214	val_214	2008-12-31	12
+216	val_216	2008-12-31	12
+216	val_216	2008-12-31	12
+217	val_217	2008-12-31	12
+217	val_217	2008-12-31	12
+218	val_218	2008-12-31	12
+219	val_219	2008-12-31	12
+219	val_219	2008-12-31	12
+221	val_221	2008-12-31	12
+221	val_221	2008-12-31	12
+222	val_222	2008-12-31	12
+223	val_223	2008-12-31	12
+223	val_223	2008-12-31	12
+224	val_224	2008-12-31	12
+224	val_224	2008-12-31	12
+226	val_226	2008-12-31	12
+228	val_228	2008-12-31	12
+229	val_229	2008-12-31	12
+229	val_229	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+233	val_233	2008-12-31	12
+233	val_233	2008-12-31	12
+235	val_235	2008-12-31	12
+237	val_237	2008-12-31	12
+237	val_237	2008-12-31	12
+238	val_238	2008-12-31	12
+238	val_238	2008-12-31	12
+239	val_239	2008-12-31	12
+239	val_239	2008-12-31	12
+241	val_241	2008-12-31	12
+242	val_242	2008-12-31	12
+242	val_242	2008-12-31	12
+244	val_244	2008-12-31	12
+247	val_247	2008-12-31	12
+248	val_248	2008-12-31	12
+249	val_249	2008-12-31	12
+252	val_252	2008-12-31	12
+255	val_255	2008-12-31	12
+255	val_255	2008-12-31	12
+256	val_256	2008-12-31	12
+256	val_256	2008-12-31	12
+257	val_257	2008-12-31	12
+258	val_258	2008-12-31	12
+260	val_260	2008-12-31	12
+262	val_262	2008-12-31	12
+263	val_263	2008-12-31	12
+265	val_265	2008-12-31	12
+265	val_265	2008-12-31	12
+266	val_266	2008-12-31	12
+272	val_272	2008-12-31	12
+272	val_272	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+274	val_274	2008-12-31	12
+275	val_275	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+278	val_278	2008-12-31	12
+278	val_278	2008-12-31	12
+280	val_280	2008-12-31	12
+280	val_280	2008-12-31	12
+281	val_281	2008-12-31	12
+281	val_281	2008-12-31	12
+282	val_282	2008-12-31	12
+282	val_282	2008-12-31	12
+283	val_283	2008-12-31	12
+284	val_284	2008-12-31	12
+285	val_285	2008-12-31	12
+286	val_286	2008-12-31	12
+287	val_287	2008-12-31	12
+288	val_288	2008-12-31	12
+288	val_288	2008-12-31	12
+289	val_289	2008-12-31	12
+291	val_291	2008-12-31	12
+292	val_292	2008-12-31	12
+296	val_296	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+302	val_302	2008-12-31	12
+305	val_305	2008-12-31	12
+306	val_306	2008-12-31	12
+307	val_307	2008-12-31	12
+307	val_307	2008-12-31	12
+308	val_308	2008-12-31	12
+309	val_309	2008-12-31	12
+309	val_309	2008-12-31	12
+310	val_310	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+315	val_315	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+317	val_317	2008-12-31	12
+317	val_317	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+321	val_321	2008-12-31	12
+321	val_321	2008-12-31	12
+322	val_322	2008-12-31	12
+322	val_322	2008-12-31	12
+323	val_323	2008-12-31	12
+325	val_325	2008-12-31	12
+325	val_325	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+331	val_331	2008-12-31	12
+331	val_331	2008-12-31	12
+332	val_332	2008-12-31	12
+333	val_333	2008-12-31	12
+333	val_333	2008-12-31	12
+335	val_335	2008-12-31	12
+336	val_336	2008-12-31	12
+338	val_338	2008-12-31	12
+339	val_339	2008-12-31	12
+341	val_341	2008-12-31	12
+342	val_342	2008-12-31	12
+342	val_342	2008-12-31	12
+344	val_344	2008-12-31	12
+344	val_344	2008-12-31	12
+345	val_345	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+351	val_351	2008-12-31	12
+353	val_353	2008-12-31	12
+353	val_353	2008-12-31	12
+356	val_356	2008-12-31	12
+360	val_360	2008-12-31	12
+362	val_362	2008-12-31	12
+364	val_364	2008-12-31	12
+365	val_365	2008-12-31	12
+366	val_366	2008-12-31	12
+367	val_367	2008-12-31	12
+367	val_367	2008-12-31	12
+368	val_368	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+373	val_373	2008-12-31	12
+374	val_374	2008-12-31	12
+375	val_375	2008-12-31	12
+377	val_377	2008-12-31	12
+378	val_378	2008-12-31	12
+379	val_379	2008-12-31	12
+382	val_382	2008-12-31	12
+382	val_382	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+386	val_386	2008-12-31	12
+389	val_389	2008-12-31	12
+392	val_392	2008-12-31	12
+393	val_393	2008-12-31	12
+394	val_394	2008-12-31	12
+395	val_395	2008-12-31	12
+395	val_395	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+397	val_397	2008-12-31	12
+397	val_397	2008-12-31	12
+399	val_399	2008-12-31	12
+399	val_399	2008-12-31	12
+400	val_400	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+402	val_402	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+404	val_404	2008-12-31	12
+404	val_404	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+407	val_407	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+411	val_411	2008-12-31	12
+413	val_413	2008-12-31	12
+413	val_413	2008-12-31	12
+414	val_414	2008-12-31	12
+414	val_414	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+418	val_418	2008-12-31	12
+419	val_419	2008-12-31	12
+421	val_421	2008-12-31	12
+424	val_424	2008-12-31	12
+424	val_424	2008-12-31	12
+427	val_427	2008-12-31	12
+429	val_429	2008-12-31	12
+429	val_429	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+432	val_432	2008-12-31	12
+435	val_435	2008-12-31	12
+436	val_436	2008-12-31	12
+437	val_437	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+439	val_439	2008-12-31	12
+439	val_439	2008-12-31	12
+443	val_443	2008-12-31	12
+444	val_444	2008-12-31	12
+446	val_446	2008-12-31	12
+448	val_448	2008-12-31	12
+449	val_449	2008-12-31	12
+452	val_452	2008-12-31	12
+453	val_453	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+455	val_455	2008-12-31	12
+457	val_457	2008-12-31	12
+458	val_458	2008-12-31	12
+458	val_458	2008-12-31	12
+459	val_459	2008-12-31	12
+459	val_459	2008-12-31	12
+460	val_460	2008-12-31	12
+462	val_462	2008-12-31	12
+462	val_462	2008-12-31	12
+463	val_463	2008-12-31	12
+463	val_463	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+467	val_467	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+470	val_470	2008-12-31	12
+472	val_472	2008-12-31	12
+475	val_475	2008-12-31	12
+477	val_477	2008-12-31	12
+478	val_478	2008-12-31	12
+478	val_478	2008-12-31	12
+479	val_479	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+481	val_481	2008-12-31	12
+482	val_482	2008-12-31	12
+483	val_483	2008-12-31	12
+484	val_484	2008-12-31	12
+485	val_485	2008-12-31	12
+487	val_487	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+490	val_490	2008-12-31	12
+491	val_491	2008-12-31	12
+492	val_492	2008-12-31	12
+492	val_492	2008-12-31	12
+493	val_493	2008-12-31	12
+494	val_494	2008-12-31	12
+495	val_495	2008-12-31	12
+496	val_496	2008-12-31	12
+497	val_497	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part8-9-a7456fceb35f62a282db750384f480db b/sql/hive/src/test/resources/golden/load_dyn_part8-9-a7456fceb35f62a282db750384f480db
deleted file mode 100644
index 4c0ed5d6fc5f1..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part8-9-a7456fceb35f62a282db750384f480db
+++ /dev/null
@@ -1,2000 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
-238	val_238	2008-12-31	11
-86	val_86	2008-12-31	11
-311	val_311	2008-12-31	11
-27	val_27	2008-12-31	11
-165	val_165	2008-12-31	11
-409	val_409	2008-12-31	11
-255	val_255	2008-12-31	11
-278	val_278	2008-12-31	11
-98	val_98	2008-12-31	11
-484	val_484	2008-12-31	11
-265	val_265	2008-12-31	11
-193	val_193	2008-12-31	11
-401	val_401	2008-12-31	11
-150	val_150	2008-12-31	11
-273	val_273	2008-12-31	11
-224	val_224	2008-12-31	11
-369	val_369	2008-12-31	11
-66	val_66	2008-12-31	11
-128	val_128	2008-12-31	11
-213	val_213	2008-12-31	11
-146	val_146	2008-12-31	11
-406	val_406	2008-12-31	11
-429	val_429	2008-12-31	11
-374	val_374	2008-12-31	11
-152	val_152	2008-12-31	11
-469	val_469	2008-12-31	11
-145	val_145	2008-12-31	11
-495	val_495	2008-12-31	11
-37	val_37	2008-12-31	11
-327	val_327	2008-12-31	11
-281	val_281	2008-12-31	11
-277	val_277	2008-12-31	11
-209	val_209	2008-12-31	11
-15	val_15	2008-12-31	11
-82	val_82	2008-12-31	11
-403	val_403	2008-12-31	11
-166	val_166	2008-12-31	11
-417	val_417	2008-12-31	11
-430	val_430	2008-12-31	11
-252	val_252	2008-12-31	11
-292	val_292	2008-12-31	11
-219	val_219	2008-12-31	11
-287	val_287	2008-12-31	11
-153	val_153	2008-12-31	11
-193	val_193	2008-12-31	11
-338	val_338	2008-12-31	11
-446	val_446	2008-12-31	11
-459	val_459	2008-12-31	11
-394	val_394	2008-12-31	11
-237	val_237	2008-12-31	11
-482	val_482	2008-12-31	11
-174	val_174	2008-12-31	11
-413	val_413	2008-12-31	11
-494	val_494	2008-12-31	11
-207	val_207	2008-12-31	11
-199	val_199	2008-12-31	11
-466	val_466	2008-12-31	11
-208	val_208	2008-12-31	11
-174	val_174	2008-12-31	11
-399	val_399	2008-12-31	11
-396	val_396	2008-12-31	11
-247	val_247	2008-12-31	11
-417	val_417	2008-12-31	11
-489	val_489	2008-12-31	11
-162	val_162	2008-12-31	11
-377	val_377	2008-12-31	11
-397	val_397	2008-12-31	11
-309	val_309	2008-12-31	11
-365	val_365	2008-12-31	11
-266	val_266	2008-12-31	11
-439	val_439	2008-12-31	11
-342	val_342	2008-12-31	11
-367	val_367	2008-12-31	11
-325	val_325	2008-12-31	11
-167	val_167	2008-12-31	11
-195	val_195	2008-12-31	11
-475	val_475	2008-12-31	11
-17	val_17	2008-12-31	11
-113	val_113	2008-12-31	11
-155	val_155	2008-12-31	11
-203	val_203	2008-12-31	11
-339	val_339	2008-12-31	11
-0	val_0	2008-12-31	11
-455	val_455	2008-12-31	11
-128	val_128	2008-12-31	11
-311	val_311	2008-12-31	11
-316	val_316	2008-12-31	11
-57	val_57	2008-12-31	11
-302	val_302	2008-12-31	11
-205	val_205	2008-12-31	11
-149	val_149	2008-12-31	11
-438	val_438	2008-12-31	11
-345	val_345	2008-12-31	11
-129	val_129	2008-12-31	11
-170	val_170	2008-12-31	11
-20	val_20	2008-12-31	11
-489	val_489	2008-12-31	11
-157	val_157	2008-12-31	11
-378	val_378	2008-12-31	11
-221	val_221	2008-12-31	11
-92	val_92	2008-12-31	11
-111	val_111	2008-12-31	11
-47	val_47	2008-12-31	11
-72	val_72	2008-12-31	11
-4	val_4	2008-12-31	11
-280	val_280	2008-12-31	11
-35	val_35	2008-12-31	11
-427	val_427	2008-12-31	11
-277	val_277	2008-12-31	11
-208	val_208	2008-12-31	11
-356	val_356	2008-12-31	11
-399	val_399	2008-12-31	11
-169	val_169	2008-12-31	11
-382	val_382	2008-12-31	11
-498	val_498	2008-12-31	11
-125	val_125	2008-12-31	11
-386	val_386	2008-12-31	11
-437	val_437	2008-12-31	11
-469	val_469	2008-12-31	11
-192	val_192	2008-12-31	11
-286	val_286	2008-12-31	11
-187	val_187	2008-12-31	11
-176	val_176	2008-12-31	11
-54	val_54	2008-12-31	11
-459	val_459	2008-12-31	11
-51	val_51	2008-12-31	11
-138	val_138	2008-12-31	11
-103	val_103	2008-12-31	11
-239	val_239	2008-12-31	11
-213	val_213	2008-12-31	11
-216	val_216	2008-12-31	11
-430	val_430	2008-12-31	11
-278	val_278	2008-12-31	11
-176	val_176	2008-12-31	11
-289	val_289	2008-12-31	11
-221	val_221	2008-12-31	11
-65	val_65	2008-12-31	11
-318	val_318	2008-12-31	11
-332	val_332	2008-12-31	11
-311	val_311	2008-12-31	11
-275	val_275	2008-12-31	11
-137	val_137	2008-12-31	11
-241	val_241	2008-12-31	11
-83	val_83	2008-12-31	11
-333	val_333	2008-12-31	11
-180	val_180	2008-12-31	11
-284	val_284	2008-12-31	11
-12	val_12	2008-12-31	11
-230	val_230	2008-12-31	11
-181	val_181	2008-12-31	11
-67	val_67	2008-12-31	11
-260	val_260	2008-12-31	11
-404	val_404	2008-12-31	11
-384	val_384	2008-12-31	11
-489	val_489	2008-12-31	11
-353	val_353	2008-12-31	11
-373	val_373	2008-12-31	11
-272	val_272	2008-12-31	11
-138	val_138	2008-12-31	11
-217	val_217	2008-12-31	11
-84	val_84	2008-12-31	11
-348	val_348	2008-12-31	11
-466	val_466	2008-12-31	11
-58	val_58	2008-12-31	11
-8	val_8	2008-12-31	11
-411	val_411	2008-12-31	11
-230	val_230	2008-12-31	11
-208	val_208	2008-12-31	11
-348	val_348	2008-12-31	11
-24	val_24	2008-12-31	11
-463	val_463	2008-12-31	11
-431	val_431	2008-12-31	11
-179	val_179	2008-12-31	11
-172	val_172	2008-12-31	11
-42	val_42	2008-12-31	11
-129	val_129	2008-12-31	11
-158	val_158	2008-12-31	11
-119	val_119	2008-12-31	11
-496	val_496	2008-12-31	11
-0	val_0	2008-12-31	11
-322	val_322	2008-12-31	11
-197	val_197	2008-12-31	11
-468	val_468	2008-12-31	11
-393	val_393	2008-12-31	11
-454	val_454	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-199	val_199	2008-12-31	11
-191	val_191	2008-12-31	11
-418	val_418	2008-12-31	11
-96	val_96	2008-12-31	11
-26	val_26	2008-12-31	11
-165	val_165	2008-12-31	11
-327	val_327	2008-12-31	11
-230	val_230	2008-12-31	11
-205	val_205	2008-12-31	11
-120	val_120	2008-12-31	11
-131	val_131	2008-12-31	11
-51	val_51	2008-12-31	11
-404	val_404	2008-12-31	11
-43	val_43	2008-12-31	11
-436	val_436	2008-12-31	11
-156	val_156	2008-12-31	11
-469	val_469	2008-12-31	11
-468	val_468	2008-12-31	11
-308	val_308	2008-12-31	11
-95	val_95	2008-12-31	11
-196	val_196	2008-12-31	11
-288	val_288	2008-12-31	11
-481	val_481	2008-12-31	11
-457	val_457	2008-12-31	11
-98	val_98	2008-12-31	11
-282	val_282	2008-12-31	11
-197	val_197	2008-12-31	11
-187	val_187	2008-12-31	11
-318	val_318	2008-12-31	11
-318	val_318	2008-12-31	11
-409	val_409	2008-12-31	11
-470	val_470	2008-12-31	11
-137	val_137	2008-12-31	11
-369	val_369	2008-12-31	11
-316	val_316	2008-12-31	11
-169	val_169	2008-12-31	11
-413	val_413	2008-12-31	11
-85	val_85	2008-12-31	11
-77	val_77	2008-12-31	11
-0	val_0	2008-12-31	11
-490	val_490	2008-12-31	11
-87	val_87	2008-12-31	11
-364	val_364	2008-12-31	11
-179	val_179	2008-12-31	11
-118	val_118	2008-12-31	11
-134	val_134	2008-12-31	11
-395	val_395	2008-12-31	11
-282	val_282	2008-12-31	11
-138	val_138	2008-12-31	11
-238	val_238	2008-12-31	11
-419	val_419	2008-12-31	11
-15	val_15	2008-12-31	11
-118	val_118	2008-12-31	11
-72	val_72	2008-12-31	11
-90	val_90	2008-12-31	11
-307	val_307	2008-12-31	11
-19	val_19	2008-12-31	11
-435	val_435	2008-12-31	11
-10	val_10	2008-12-31	11
-277	val_277	2008-12-31	11
-273	val_273	2008-12-31	11
-306	val_306	2008-12-31	11
-224	val_224	2008-12-31	11
-309	val_309	2008-12-31	11
-389	val_389	2008-12-31	11
-327	val_327	2008-12-31	11
-242	val_242	2008-12-31	11
-369	val_369	2008-12-31	11
-392	val_392	2008-12-31	11
-272	val_272	2008-12-31	11
-331	val_331	2008-12-31	11
-401	val_401	2008-12-31	11
-242	val_242	2008-12-31	11
-452	val_452	2008-12-31	11
-177	val_177	2008-12-31	11
-226	val_226	2008-12-31	11
-5	val_5	2008-12-31	11
-497	val_497	2008-12-31	11
-402	val_402	2008-12-31	11
-396	val_396	2008-12-31	11
-317	val_317	2008-12-31	11
-395	val_395	2008-12-31	11
-58	val_58	2008-12-31	11
-35	val_35	2008-12-31	11
-336	val_336	2008-12-31	11
-95	val_95	2008-12-31	11
-11	val_11	2008-12-31	11
-168	val_168	2008-12-31	11
-34	val_34	2008-12-31	11
-229	val_229	2008-12-31	11
-233	val_233	2008-12-31	11
-143	val_143	2008-12-31	11
-472	val_472	2008-12-31	11
-322	val_322	2008-12-31	11
-498	val_498	2008-12-31	11
-160	val_160	2008-12-31	11
-195	val_195	2008-12-31	11
-42	val_42	2008-12-31	11
-321	val_321	2008-12-31	11
-430	val_430	2008-12-31	11
-119	val_119	2008-12-31	11
-489	val_489	2008-12-31	11
-458	val_458	2008-12-31	11
-78	val_78	2008-12-31	11
-76	val_76	2008-12-31	11
-41	val_41	2008-12-31	11
-223	val_223	2008-12-31	11
-492	val_492	2008-12-31	11
-149	val_149	2008-12-31	11
-449	val_449	2008-12-31	11
-218	val_218	2008-12-31	11
-228	val_228	2008-12-31	11
-138	val_138	2008-12-31	11
-453	val_453	2008-12-31	11
-30	val_30	2008-12-31	11
-209	val_209	2008-12-31	11
-64	val_64	2008-12-31	11
-468	val_468	2008-12-31	11
-76	val_76	2008-12-31	11
-74	val_74	2008-12-31	11
-342	val_342	2008-12-31	11
-69	val_69	2008-12-31	11
-230	val_230	2008-12-31	11
-33	val_33	2008-12-31	11
-368	val_368	2008-12-31	11
-103	val_103	2008-12-31	11
-296	val_296	2008-12-31	11
-113	val_113	2008-12-31	11
-216	val_216	2008-12-31	11
-367	val_367	2008-12-31	11
-344	val_344	2008-12-31	11
-167	val_167	2008-12-31	11
-274	val_274	2008-12-31	11
-219	val_219	2008-12-31	11
-239	val_239	2008-12-31	11
-485	val_485	2008-12-31	11
-116	val_116	2008-12-31	11
-223	val_223	2008-12-31	11
-256	val_256	2008-12-31	11
-263	val_263	2008-12-31	11
-70	val_70	2008-12-31	11
-487	val_487	2008-12-31	11
-480	val_480	2008-12-31	11
-401	val_401	2008-12-31	11
-288	val_288	2008-12-31	11
-191	val_191	2008-12-31	11
-5	val_5	2008-12-31	11
-244	val_244	2008-12-31	11
-438	val_438	2008-12-31	11
-128	val_128	2008-12-31	11
-467	val_467	2008-12-31	11
-432	val_432	2008-12-31	11
-202	val_202	2008-12-31	11
-316	val_316	2008-12-31	11
-229	val_229	2008-12-31	11
-469	val_469	2008-12-31	11
-463	val_463	2008-12-31	11
-280	val_280	2008-12-31	11
-2	val_2	2008-12-31	11
-35	val_35	2008-12-31	11
-283	val_283	2008-12-31	11
-331	val_331	2008-12-31	11
-235	val_235	2008-12-31	11
-80	val_80	2008-12-31	11
-44	val_44	2008-12-31	11
-193	val_193	2008-12-31	11
-321	val_321	2008-12-31	11
-335	val_335	2008-12-31	11
-104	val_104	2008-12-31	11
-466	val_466	2008-12-31	11
-366	val_366	2008-12-31	11
-175	val_175	2008-12-31	11
-403	val_403	2008-12-31	11
-483	val_483	2008-12-31	11
-53	val_53	2008-12-31	11
-105	val_105	2008-12-31	11
-257	val_257	2008-12-31	11
-406	val_406	2008-12-31	11
-409	val_409	2008-12-31	11
-190	val_190	2008-12-31	11
-406	val_406	2008-12-31	11
-401	val_401	2008-12-31	11
-114	val_114	2008-12-31	11
-258	val_258	2008-12-31	11
-90	val_90	2008-12-31	11
-203	val_203	2008-12-31	11
-262	val_262	2008-12-31	11
-348	val_348	2008-12-31	11
-424	val_424	2008-12-31	11
-12	val_12	2008-12-31	11
-396	val_396	2008-12-31	11
-201	val_201	2008-12-31	11
-217	val_217	2008-12-31	11
-164	val_164	2008-12-31	11
-431	val_431	2008-12-31	11
-454	val_454	2008-12-31	11
-478	val_478	2008-12-31	11
-298	val_298	2008-12-31	11
-125	val_125	2008-12-31	11
-431	val_431	2008-12-31	11
-164	val_164	2008-12-31	11
-424	val_424	2008-12-31	11
-187	val_187	2008-12-31	11
-382	val_382	2008-12-31	11
-5	val_5	2008-12-31	11
-70	val_70	2008-12-31	11
-397	val_397	2008-12-31	11
-480	val_480	2008-12-31	11
-291	val_291	2008-12-31	11
-24	val_24	2008-12-31	11
-351	val_351	2008-12-31	11
-255	val_255	2008-12-31	11
-104	val_104	2008-12-31	11
-70	val_70	2008-12-31	11
-163	val_163	2008-12-31	11
-438	val_438	2008-12-31	11
-119	val_119	2008-12-31	11
-414	val_414	2008-12-31	11
-200	val_200	2008-12-31	11
-491	val_491	2008-12-31	11
-237	val_237	2008-12-31	11
-439	val_439	2008-12-31	11
-360	val_360	2008-12-31	11
-248	val_248	2008-12-31	11
-479	val_479	2008-12-31	11
-305	val_305	2008-12-31	11
-417	val_417	2008-12-31	11
-199	val_199	2008-12-31	11
-444	val_444	2008-12-31	11
-120	val_120	2008-12-31	11
-429	val_429	2008-12-31	11
-169	val_169	2008-12-31	11
-443	val_443	2008-12-31	11
-323	val_323	2008-12-31	11
-325	val_325	2008-12-31	11
-277	val_277	2008-12-31	11
-230	val_230	2008-12-31	11
-478	val_478	2008-12-31	11
-178	val_178	2008-12-31	11
-468	val_468	2008-12-31	11
-310	val_310	2008-12-31	11
-317	val_317	2008-12-31	11
-333	val_333	2008-12-31	11
-493	val_493	2008-12-31	11
-460	val_460	2008-12-31	11
-207	val_207	2008-12-31	11
-249	val_249	2008-12-31	11
-265	val_265	2008-12-31	11
-480	val_480	2008-12-31	11
-83	val_83	2008-12-31	11
-136	val_136	2008-12-31	11
-353	val_353	2008-12-31	11
-172	val_172	2008-12-31	11
-214	val_214	2008-12-31	11
-462	val_462	2008-12-31	11
-233	val_233	2008-12-31	11
-406	val_406	2008-12-31	11
-133	val_133	2008-12-31	11
-175	val_175	2008-12-31	11
-189	val_189	2008-12-31	11
-454	val_454	2008-12-31	11
-375	val_375	2008-12-31	11
-401	val_401	2008-12-31	11
-421	val_421	2008-12-31	11
-407	val_407	2008-12-31	11
-384	val_384	2008-12-31	11
-256	val_256	2008-12-31	11
-26	val_26	2008-12-31	11
-134	val_134	2008-12-31	11
-67	val_67	2008-12-31	11
-384	val_384	2008-12-31	11
-379	val_379	2008-12-31	11
-18	val_18	2008-12-31	11
-462	val_462	2008-12-31	11
-492	val_492	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-9	val_9	2008-12-31	11
-341	val_341	2008-12-31	11
-498	val_498	2008-12-31	11
-146	val_146	2008-12-31	11
-458	val_458	2008-12-31	11
-362	val_362	2008-12-31	11
-186	val_186	2008-12-31	11
-285	val_285	2008-12-31	11
-348	val_348	2008-12-31	11
-167	val_167	2008-12-31	11
-18	val_18	2008-12-31	11
-273	val_273	2008-12-31	11
-183	val_183	2008-12-31	11
-281	val_281	2008-12-31	11
-344	val_344	2008-12-31	11
-97	val_97	2008-12-31	11
-469	val_469	2008-12-31	11
-315	val_315	2008-12-31	11
-84	val_84	2008-12-31	11
-28	val_28	2008-12-31	11
-37	val_37	2008-12-31	11
-448	val_448	2008-12-31	11
-152	val_152	2008-12-31	11
-348	val_348	2008-12-31	11
-307	val_307	2008-12-31	11
-194	val_194	2008-12-31	11
-414	val_414	2008-12-31	11
-477	val_477	2008-12-31	11
-222	val_222	2008-12-31	11
-126	val_126	2008-12-31	11
-90	val_90	2008-12-31	11
-169	val_169	2008-12-31	11
-403	val_403	2008-12-31	11
-400	val_400	2008-12-31	11
-200	val_200	2008-12-31	11
-97	val_97	2008-12-31	11
-238	val_238	2008-12-31	12
-86	val_86	2008-12-31	12
-311	val_311	2008-12-31	12
-27	val_27	2008-12-31	12
-165	val_165	2008-12-31	12
-409	val_409	2008-12-31	12
-255	val_255	2008-12-31	12
-278	val_278	2008-12-31	12
-98	val_98	2008-12-31	12
-484	val_484	2008-12-31	12
-265	val_265	2008-12-31	12
-193	val_193	2008-12-31	12
-401	val_401	2008-12-31	12
-150	val_150	2008-12-31	12
-273	val_273	2008-12-31	12
-224	val_224	2008-12-31	12
-369	val_369	2008-12-31	12
-66	val_66	2008-12-31	12
-128	val_128	2008-12-31	12
-213	val_213	2008-12-31	12
-146	val_146	2008-12-31	12
-406	val_406	2008-12-31	12
-429	val_429	2008-12-31	12
-374	val_374	2008-12-31	12
-152	val_152	2008-12-31	12
-469	val_469	2008-12-31	12
-145	val_145	2008-12-31	12
-495	val_495	2008-12-31	12
-37	val_37	2008-12-31	12
-327	val_327	2008-12-31	12
-281	val_281	2008-12-31	12
-277	val_277	2008-12-31	12
-209	val_209	2008-12-31	12
-15	val_15	2008-12-31	12
-82	val_82	2008-12-31	12
-403	val_403	2008-12-31	12
-166	val_166	2008-12-31	12
-417	val_417	2008-12-31	12
-430	val_430	2008-12-31	12
-252	val_252	2008-12-31	12
-292	val_292	2008-12-31	12
-219	val_219	2008-12-31	12
-287	val_287	2008-12-31	12
-153	val_153	2008-12-31	12
-193	val_193	2008-12-31	12
-338	val_338	2008-12-31	12
-446	val_446	2008-12-31	12
-459	val_459	2008-12-31	12
-394	val_394	2008-12-31	12
-237	val_237	2008-12-31	12
-482	val_482	2008-12-31	12
-174	val_174	2008-12-31	12
-413	val_413	2008-12-31	12
-494	val_494	2008-12-31	12
-207	val_207	2008-12-31	12
-199	val_199	2008-12-31	12
-466	val_466	2008-12-31	12
-208	val_208	2008-12-31	12
-174	val_174	2008-12-31	12
-399	val_399	2008-12-31	12
-396	val_396	2008-12-31	12
-247	val_247	2008-12-31	12
-417	val_417	2008-12-31	12
-489	val_489	2008-12-31	12
-162	val_162	2008-12-31	12
-377	val_377	2008-12-31	12
-397	val_397	2008-12-31	12
-309	val_309	2008-12-31	12
-365	val_365	2008-12-31	12
-266	val_266	2008-12-31	12
-439	val_439	2008-12-31	12
-342	val_342	2008-12-31	12
-367	val_367	2008-12-31	12
-325	val_325	2008-12-31	12
-167	val_167	2008-12-31	12
-195	val_195	2008-12-31	12
-475	val_475	2008-12-31	12
-17	val_17	2008-12-31	12
-113	val_113	2008-12-31	12
-155	val_155	2008-12-31	12
-203	val_203	2008-12-31	12
-339	val_339	2008-12-31	12
-0	val_0	2008-12-31	12
-455	val_455	2008-12-31	12
-128	val_128	2008-12-31	12
-311	val_311	2008-12-31	12
-316	val_316	2008-12-31	12
-57	val_57	2008-12-31	12
-302	val_302	2008-12-31	12
-205	val_205	2008-12-31	12
-149	val_149	2008-12-31	12
-438	val_438	2008-12-31	12
-345	val_345	2008-12-31	12
-129	val_129	2008-12-31	12
-170	val_170	2008-12-31	12
-20	val_20	2008-12-31	12
-489	val_489	2008-12-31	12
-157	val_157	2008-12-31	12
-378	val_378	2008-12-31	12
-221	val_221	2008-12-31	12
-92	val_92	2008-12-31	12
-111	val_111	2008-12-31	12
-47	val_47	2008-12-31	12
-72	val_72	2008-12-31	12
-4	val_4	2008-12-31	12
-280	val_280	2008-12-31	12
-35	val_35	2008-12-31	12
-427	val_427	2008-12-31	12
-277	val_277	2008-12-31	12
-208	val_208	2008-12-31	12
-356	val_356	2008-12-31	12
-399	val_399	2008-12-31	12
-169	val_169	2008-12-31	12
-382	val_382	2008-12-31	12
-498	val_498	2008-12-31	12
-125	val_125	2008-12-31	12
-386	val_386	2008-12-31	12
-437	val_437	2008-12-31	12
-469	val_469	2008-12-31	12
-192	val_192	2008-12-31	12
-286	val_286	2008-12-31	12
-187	val_187	2008-12-31	12
-176	val_176	2008-12-31	12
-54	val_54	2008-12-31	12
-459	val_459	2008-12-31	12
-51	val_51	2008-12-31	12
-138	val_138	2008-12-31	12
-103	val_103	2008-12-31	12
-239	val_239	2008-12-31	12
-213	val_213	2008-12-31	12
-216	val_216	2008-12-31	12
-430	val_430	2008-12-31	12
-278	val_278	2008-12-31	12
-176	val_176	2008-12-31	12
-289	val_289	2008-12-31	12
-221	val_221	2008-12-31	12
-65	val_65	2008-12-31	12
-318	val_318	2008-12-31	12
-332	val_332	2008-12-31	12
-311	val_311	2008-12-31	12
-275	val_275	2008-12-31	12
-137	val_137	2008-12-31	12
-241	val_241	2008-12-31	12
-83	val_83	2008-12-31	12
-333	val_333	2008-12-31	12
-180	val_180	2008-12-31	12
-284	val_284	2008-12-31	12
-12	val_12	2008-12-31	12
-230	val_230	2008-12-31	12
-181	val_181	2008-12-31	12
-67	val_67	2008-12-31	12
-260	val_260	2008-12-31	12
-404	val_404	2008-12-31	12
-384	val_384	2008-12-31	12
-489	val_489	2008-12-31	12
-353	val_353	2008-12-31	12
-373	val_373	2008-12-31	12
-272	val_272	2008-12-31	12
-138	val_138	2008-12-31	12
-217	val_217	2008-12-31	12
-84	val_84	2008-12-31	12
-348	val_348	2008-12-31	12
-466	val_466	2008-12-31	12
-58	val_58	2008-12-31	12
-8	val_8	2008-12-31	12
-411	val_411	2008-12-31	12
-230	val_230	2008-12-31	12
-208	val_208	2008-12-31	12
-348	val_348	2008-12-31	12
-24	val_24	2008-12-31	12
-463	val_463	2008-12-31	12
-431	val_431	2008-12-31	12
-179	val_179	2008-12-31	12
-172	val_172	2008-12-31	12
-42	val_42	2008-12-31	12
-129	val_129	2008-12-31	12
-158	val_158	2008-12-31	12
-119	val_119	2008-12-31	12
-496	val_496	2008-12-31	12
-0	val_0	2008-12-31	12
-322	val_322	2008-12-31	12
-197	val_197	2008-12-31	12
-468	val_468	2008-12-31	12
-393	val_393	2008-12-31	12
-454	val_454	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-199	val_199	2008-12-31	12
-191	val_191	2008-12-31	12
-418	val_418	2008-12-31	12
-96	val_96	2008-12-31	12
-26	val_26	2008-12-31	12
-165	val_165	2008-12-31	12
-327	val_327	2008-12-31	12
-230	val_230	2008-12-31	12
-205	val_205	2008-12-31	12
-120	val_120	2008-12-31	12
-131	val_131	2008-12-31	12
-51	val_51	2008-12-31	12
-404	val_404	2008-12-31	12
-43	val_43	2008-12-31	12
-436	val_436	2008-12-31	12
-156	val_156	2008-12-31	12
-469	val_469	2008-12-31	12
-468	val_468	2008-12-31	12
-308	val_308	2008-12-31	12
-95	val_95	2008-12-31	12
-196	val_196	2008-12-31	12
-288	val_288	2008-12-31	12
-481	val_481	2008-12-31	12
-457	val_457	2008-12-31	12
-98	val_98	2008-12-31	12
-282	val_282	2008-12-31	12
-197	val_197	2008-12-31	12
-187	val_187	2008-12-31	12
-318	val_318	2008-12-31	12
-318	val_318	2008-12-31	12
-409	val_409	2008-12-31	12
-470	val_470	2008-12-31	12
-137	val_137	2008-12-31	12
-369	val_369	2008-12-31	12
-316	val_316	2008-12-31	12
-169	val_169	2008-12-31	12
-413	val_413	2008-12-31	12
-85	val_85	2008-12-31	12
-77	val_77	2008-12-31	12
-0	val_0	2008-12-31	12
-490	val_490	2008-12-31	12
-87	val_87	2008-12-31	12
-364	val_364	2008-12-31	12
-179	val_179	2008-12-31	12
-118	val_118	2008-12-31	12
-134	val_134	2008-12-31	12
-395	val_395	2008-12-31	12
-282	val_282	2008-12-31	12
-138	val_138	2008-12-31	12
-238	val_238	2008-12-31	12
-419	val_419	2008-12-31	12
-15	val_15	2008-12-31	12
-118	val_118	2008-12-31	12
-72	val_72	2008-12-31	12
-90	val_90	2008-12-31	12
-307	val_307	2008-12-31	12
-19	val_19	2008-12-31	12
-435	val_435	2008-12-31	12
-10	val_10	2008-12-31	12
-277	val_277	2008-12-31	12
-273	val_273	2008-12-31	12
-306	val_306	2008-12-31	12
-224	val_224	2008-12-31	12
-309	val_309	2008-12-31	12
-389	val_389	2008-12-31	12
-327	val_327	2008-12-31	12
-242	val_242	2008-12-31	12
-369	val_369	2008-12-31	12
-392	val_392	2008-12-31	12
-272	val_272	2008-12-31	12
-331	val_331	2008-12-31	12
-401	val_401	2008-12-31	12
-242	val_242	2008-12-31	12
-452	val_452	2008-12-31	12
-177	val_177	2008-12-31	12
-226	val_226	2008-12-31	12
-5	val_5	2008-12-31	12
-497	val_497	2008-12-31	12
-402	val_402	2008-12-31	12
-396	val_396	2008-12-31	12
-317	val_317	2008-12-31	12
-395	val_395	2008-12-31	12
-58	val_58	2008-12-31	12
-35	val_35	2008-12-31	12
-336	val_336	2008-12-31	12
-95	val_95	2008-12-31	12
-11	val_11	2008-12-31	12
-168	val_168	2008-12-31	12
-34	val_34	2008-12-31	12
-229	val_229	2008-12-31	12
-233	val_233	2008-12-31	12
-143	val_143	2008-12-31	12
-472	val_472	2008-12-31	12
-322	val_322	2008-12-31	12
-498	val_498	2008-12-31	12
-160	val_160	2008-12-31	12
-195	val_195	2008-12-31	12
-42	val_42	2008-12-31	12
-321	val_321	2008-12-31	12
-430	val_430	2008-12-31	12
-119	val_119	2008-12-31	12
-489	val_489	2008-12-31	12
-458	val_458	2008-12-31	12
-78	val_78	2008-12-31	12
-76	val_76	2008-12-31	12
-41	val_41	2008-12-31	12
-223	val_223	2008-12-31	12
-492	val_492	2008-12-31	12
-149	val_149	2008-12-31	12
-449	val_449	2008-12-31	12
-218	val_218	2008-12-31	12
-228	val_228	2008-12-31	12
-138	val_138	2008-12-31	12
-453	val_453	2008-12-31	12
-30	val_30	2008-12-31	12
-209	val_209	2008-12-31	12
-64	val_64	2008-12-31	12
-468	val_468	2008-12-31	12
-76	val_76	2008-12-31	12
-74	val_74	2008-12-31	12
-342	val_342	2008-12-31	12
-69	val_69	2008-12-31	12
-230	val_230	2008-12-31	12
-33	val_33	2008-12-31	12
-368	val_368	2008-12-31	12
-103	val_103	2008-12-31	12
-296	val_296	2008-12-31	12
-113	val_113	2008-12-31	12
-216	val_216	2008-12-31	12
-367	val_367	2008-12-31	12
-344	val_344	2008-12-31	12
-167	val_167	2008-12-31	12
-274	val_274	2008-12-31	12
-219	val_219	2008-12-31	12
-239	val_239	2008-12-31	12
-485	val_485	2008-12-31	12
-116	val_116	2008-12-31	12
-223	val_223	2008-12-31	12
-256	val_256	2008-12-31	12
-263	val_263	2008-12-31	12
-70	val_70	2008-12-31	12
-487	val_487	2008-12-31	12
-480	val_480	2008-12-31	12
-401	val_401	2008-12-31	12
-288	val_288	2008-12-31	12
-191	val_191	2008-12-31	12
-5	val_5	2008-12-31	12
-244	val_244	2008-12-31	12
-438	val_438	2008-12-31	12
-128	val_128	2008-12-31	12
-467	val_467	2008-12-31	12
-432	val_432	2008-12-31	12
-202	val_202	2008-12-31	12
-316	val_316	2008-12-31	12
-229	val_229	2008-12-31	12
-469	val_469	2008-12-31	12
-463	val_463	2008-12-31	12
-280	val_280	2008-12-31	12
-2	val_2	2008-12-31	12
-35	val_35	2008-12-31	12
-283	val_283	2008-12-31	12
-331	val_331	2008-12-31	12
-235	val_235	2008-12-31	12
-80	val_80	2008-12-31	12
-44	val_44	2008-12-31	12
-193	val_193	2008-12-31	12
-321	val_321	2008-12-31	12
-335	val_335	2008-12-31	12
-104	val_104	2008-12-31	12
-466	val_466	2008-12-31	12
-366	val_366	2008-12-31	12
-175	val_175	2008-12-31	12
-403	val_403	2008-12-31	12
-483	val_483	2008-12-31	12
-53	val_53	2008-12-31	12
-105	val_105	2008-12-31	12
-257	val_257	2008-12-31	12
-406	val_406	2008-12-31	12
-409	val_409	2008-12-31	12
-190	val_190	2008-12-31	12
-406	val_406	2008-12-31	12
-401	val_401	2008-12-31	12
-114	val_114	2008-12-31	12
-258	val_258	2008-12-31	12
-90	val_90	2008-12-31	12
-203	val_203	2008-12-31	12
-262	val_262	2008-12-31	12
-348	val_348	2008-12-31	12
-424	val_424	2008-12-31	12
-12	val_12	2008-12-31	12
-396	val_396	2008-12-31	12
-201	val_201	2008-12-31	12
-217	val_217	2008-12-31	12
-164	val_164	2008-12-31	12
-431	val_431	2008-12-31	12
-454	val_454	2008-12-31	12
-478	val_478	2008-12-31	12
-298	val_298	2008-12-31	12
-125	val_125	2008-12-31	12
-431	val_431	2008-12-31	12
-164	val_164	2008-12-31	12
-424	val_424	2008-12-31	12
-187	val_187	2008-12-31	12
-382	val_382	2008-12-31	12
-5	val_5	2008-12-31	12
-70	val_70	2008-12-31	12
-397	val_397	2008-12-31	12
-480	val_480	2008-12-31	12
-291	val_291	2008-12-31	12
-24	val_24	2008-12-31	12
-351	val_351	2008-12-31	12
-255	val_255	2008-12-31	12
-104	val_104	2008-12-31	12
-70	val_70	2008-12-31	12
-163	val_163	2008-12-31	12
-438	val_438	2008-12-31	12
-119	val_119	2008-12-31	12
-414	val_414	2008-12-31	12
-200	val_200	2008-12-31	12
-491	val_491	2008-12-31	12
-237	val_237	2008-12-31	12
-439	val_439	2008-12-31	12
-360	val_360	2008-12-31	12
-248	val_248	2008-12-31	12
-479	val_479	2008-12-31	12
-305	val_305	2008-12-31	12
-417	val_417	2008-12-31	12
-199	val_199	2008-12-31	12
-444	val_444	2008-12-31	12
-120	val_120	2008-12-31	12
-429	val_429	2008-12-31	12
-169	val_169	2008-12-31	12
-443	val_443	2008-12-31	12
-323	val_323	2008-12-31	12
-325	val_325	2008-12-31	12
-277	val_277	2008-12-31	12
-230	val_230	2008-12-31	12
-478	val_478	2008-12-31	12
-178	val_178	2008-12-31	12
-468	val_468	2008-12-31	12
-310	val_310	2008-12-31	12
-317	val_317	2008-12-31	12
-333	val_333	2008-12-31	12
-493	val_493	2008-12-31	12
-460	val_460	2008-12-31	12
-207	val_207	2008-12-31	12
-249	val_249	2008-12-31	12
-265	val_265	2008-12-31	12
-480	val_480	2008-12-31	12
-83	val_83	2008-12-31	12
-136	val_136	2008-12-31	12
-353	val_353	2008-12-31	12
-172	val_172	2008-12-31	12
-214	val_214	2008-12-31	12
-462	val_462	2008-12-31	12
-233	val_233	2008-12-31	12
-406	val_406	2008-12-31	12
-133	val_133	2008-12-31	12
-175	val_175	2008-12-31	12
-189	val_189	2008-12-31	12
-454	val_454	2008-12-31	12
-375	val_375	2008-12-31	12
-401	val_401	2008-12-31	12
-421	val_421	2008-12-31	12
-407	val_407	2008-12-31	12
-384	val_384	2008-12-31	12
-256	val_256	2008-12-31	12
-26	val_26	2008-12-31	12
-134	val_134	2008-12-31	12
-67	val_67	2008-12-31	12
-384	val_384	2008-12-31	12
-379	val_379	2008-12-31	12
-18	val_18	2008-12-31	12
-462	val_462	2008-12-31	12
-492	val_492	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-9	val_9	2008-12-31	12
-341	val_341	2008-12-31	12
-498	val_498	2008-12-31	12
-146	val_146	2008-12-31	12
-458	val_458	2008-12-31	12
-362	val_362	2008-12-31	12
-186	val_186	2008-12-31	12
-285	val_285	2008-12-31	12
-348	val_348	2008-12-31	12
-167	val_167	2008-12-31	12
-18	val_18	2008-12-31	12
-273	val_273	2008-12-31	12
-183	val_183	2008-12-31	12
-281	val_281	2008-12-31	12
-344	val_344	2008-12-31	12
-97	val_97	2008-12-31	12
-469	val_469	2008-12-31	12
-315	val_315	2008-12-31	12
-84	val_84	2008-12-31	12
-28	val_28	2008-12-31	12
-37	val_37	2008-12-31	12
-448	val_448	2008-12-31	12
-152	val_152	2008-12-31	12
-348	val_348	2008-12-31	12
-307	val_307	2008-12-31	12
-194	val_194	2008-12-31	12
-414	val_414	2008-12-31	12
-477	val_477	2008-12-31	12
-222	val_222	2008-12-31	12
-126	val_126	2008-12-31	12
-90	val_90	2008-12-31	12
-169	val_169	2008-12-31	12
-403	val_403	2008-12-31	12
-400	val_400	2008-12-31	12
-200	val_200	2008-12-31	12
-97	val_97	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part9-2-748ac33315295f8e55f2480f7714c27a b/sql/hive/src/test/resources/golden/load_dyn_part9-2-748ac33315295f8e55f2480f7714c27a
index 267cb634d6bd3..1240429107ddb 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part9-2-748ac33315295f8e55f2480f7714c27a
+++ b/sql/hive/src/test/resources/golden/load_dyn_part9-2-748ac33315295f8e55f2480f7714c27a
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part9, dbName:default, owner:marmbrus, createTime:1390899685, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part9, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899685}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part9, dbName:default, owner:marmbrus, createTime:1413887729, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part9, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887729}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part9-9-6954f5fc8dd82ca2c076ab8bcdbc148 b/sql/hive/src/test/resources/golden/load_dyn_part9-9-6954f5fc8dd82ca2c076ab8bcdbc148
new file mode 100644
index 0000000000000..04b36182974f9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part9-9-6954f5fc8dd82ca2c076ab8bcdbc148
@@ -0,0 +1,1000 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part9-9-72b1ad2231269b704130903b35ac04bb b/sql/hive/src/test/resources/golden/load_dyn_part9-9-72b1ad2231269b704130903b35ac04bb
deleted file mode 100644
index d7a8f25b41301..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part9-9-72b1ad2231269b704130903b35ac04bb
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-0-84028c4ca541d126baffc20d6d876810 b/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-0-84028c4ca541d126baffc20d6d876810
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-1-c84cd1b5b491bded8ac3b0521de599c5 b/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-1-c84cd1b5b491bded8ac3b0521de599c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-3-9ee887603dcba5200918ae5200afa5d5 b/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-3-9ee887603dcba5200918ae5200afa5d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-1-4399d9b3f970d3c5a34d1f9bf7b7447f b/sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-1-4399d9b3f970d3c5a34d1f9bf7b7447f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-19-90c84358c50e51be5ce210bd7dec9bc6 b/sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-1-d19201e2fcaee4d451292bd740e6c637
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-19-90c84358c50e51be5ce210bd7dec9bc6
rename to sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-1-d19201e2fcaee4d451292bd740e6c637
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-21-27e2e662d517f32952145cc2a51bf564 b/sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-2-ad8795e50f5998ea1d2eb64a0c02e6e5
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-21-27e2e662d517f32952145cc2a51bf564
rename to sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-2-ad8795e50f5998ea1d2eb64a0c02e6e5
diff --git a/sql/hive/src/test/resources/golden/load_fs2-0-517732da2c84ae17095b0e1d96f74d97 b/sql/hive/src/test/resources/golden/load_fs2-0-517732da2c84ae17095b0e1d96f74d97
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_fs2-1-5018c84e09be70bf663594a89f3ad731 b/sql/hive/src/test/resources/golden/load_fs2-1-5018c84e09be70bf663594a89f3ad731
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_fs2-2-94d2317b453b3b49bb9f2b58040b4748 b/sql/hive/src/test/resources/golden/load_fs2-2-94d2317b453b3b49bb9f2b58040b4748
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-0-f99b4f29506d65c841fb1db18263fbcc b/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-0-f99b4f29506d65c841fb1db18263fbcc
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-0-f99b4f29506d65c841fb1db18263fbcc
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-1-b64a19f7101a4fb3b5d08b2f6e296400 b/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-1-b64a19f7101a4fb3b5d08b2f6e296400
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-1-b64a19f7101a4fb3b5d08b2f6e296400
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-2-2087e00fe000e00f64e819dca59be450 b/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-2-2087e00fe000e00f64e819dca59be450
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_nonpart_authsuccess-0-fa705a031ff5d97558f29c2b5b9de282 b/sql/hive/src/test/resources/golden/load_nonpart_authsuccess-0-fa705a031ff5d97558f29c2b5b9de282
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_nonpart_authsuccess-2-9ee887603dcba5200918ae5200afa5d5 b/sql/hive/src/test/resources/golden/load_nonpart_authsuccess-2-9ee887603dcba5200918ae5200afa5d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_part_authsuccess-0-84028c4ca541d126baffc20d6d876810 b/sql/hive/src/test/resources/golden/load_part_authsuccess-0-84028c4ca541d126baffc20d6d876810
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_part_authsuccess-2-9ee887603dcba5200918ae5200afa5d5 b/sql/hive/src/test/resources/golden/load_part_authsuccess-2-9ee887603dcba5200918ae5200afa5d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/loadpart1-1-4bf1504274319c44d370b58092fe016c b/sql/hive/src/test/resources/golden/loadpart1-1-4bf1504274319c44d370b58092fe016c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-22-6775cb6aee040e22508cf3cac0b55f06 b/sql/hive/src/test/resources/golden/loadpart1-1-6cc94d19c536a996592629f7c82c2ac9
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-22-6775cb6aee040e22508cf3cac0b55f06
rename to sql/hive/src/test/resources/golden/loadpart1-1-6cc94d19c536a996592629f7c82c2ac9
diff --git a/sql/hive/src/test/resources/golden/loadpart1-4-e2e5e7a0378c6f0c28391c447ec9cee9 b/sql/hive/src/test/resources/golden/loadpart1-4-e2e5e7a0378c6f0c28391c447ec9cee9
index cf367dacbdf0e..bf2a7f452edc3 100644
--- a/sql/hive/src/test/resources/golden/loadpart1-4-e2e5e7a0378c6f0c28391c447ec9cee9
+++ b/sql/hive/src/test/resources/golden/loadpart1-4-e2e5e7a0378c6f0c28391c447ec9cee9
@@ -3,4 +3,4 @@
 3	test_part	test_Part
 4	test_part	test_Part
 5	test_part	test_Part
-6	test_part	test_Part
\ No newline at end of file
+6	test_part	test_Part
diff --git a/sql/hive/src/test/resources/golden/loadpart1-7-c6493490f898e72dc7ed1bc2d4721aa4 b/sql/hive/src/test/resources/golden/loadpart1-7-c6493490f898e72dc7ed1bc2d4721aa4
index cf367dacbdf0e..bf2a7f452edc3 100644
--- a/sql/hive/src/test/resources/golden/loadpart1-7-c6493490f898e72dc7ed1bc2d4721aa4
+++ b/sql/hive/src/test/resources/golden/loadpart1-7-c6493490f898e72dc7ed1bc2d4721aa4
@@ -3,4 +3,4 @@
 3	test_part	test_Part
 4	test_part	test_Part
 5	test_part	test_Part
-6	test_part	test_Part
\ No newline at end of file
+6	test_part	test_Part
diff --git a/sql/hive/src/test/resources/golden/loadpart_err-2-21fe8ff9059167209647e7ea086f483e b/sql/hive/src/test/resources/golden/loadpart_err-2-21fe8ff9059167209647e7ea086f483e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock1-0-cd46bc635e3010cf1b990a652a584a09 b/sql/hive/src/test/resources/golden/lock1-0-cd46bc635e3010cf1b990a652a584a09
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock1-1-3e95421993ab28d18245ec2340f580a3 b/sql/hive/src/test/resources/golden/lock1-1-3e95421993ab28d18245ec2340f580a3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock1-2-c0c18ac884677231a41eea8d980d0451 b/sql/hive/src/test/resources/golden/lock1-2-c0c18ac884677231a41eea8d980d0451
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-0-cd46bc635e3010cf1b990a652a584a09 b/sql/hive/src/test/resources/golden/lock2-0-cd46bc635e3010cf1b990a652a584a09
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-1-3e95421993ab28d18245ec2340f580a3 b/sql/hive/src/test/resources/golden/lock2-1-3e95421993ab28d18245ec2340f580a3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-2-c0c18ac884677231a41eea8d980d0451 b/sql/hive/src/test/resources/golden/lock2-2-c0c18ac884677231a41eea8d980d0451
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-3-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/lock2-3-27ad2962fed131f51ba802596ba37278
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-4-c06da7f8c1e98dc22e3171018e357f6a b/sql/hive/src/test/resources/golden/lock2-4-c06da7f8c1e98dc22e3171018e357f6a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-5-e8d1d10c308a73eef78dde414a5e40ca b/sql/hive/src/test/resources/golden/lock2-5-e8d1d10c308a73eef78dde414a5e40ca
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock3-0-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/lock3-0-27ad2962fed131f51ba802596ba37278
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock3-1-c06da7f8c1e98dc22e3171018e357f6a b/sql/hive/src/test/resources/golden/lock3-1-c06da7f8c1e98dc22e3171018e357f6a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock3-2-b1ca816784e88f105b2fce1175340c33 b/sql/hive/src/test/resources/golden/lock3-2-b1ca816784e88f105b2fce1175340c33
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock3-5-8096935c5c1755f9b88583e8c72921ac b/sql/hive/src/test/resources/golden/lock3-5-8096935c5c1755f9b88583e8c72921ac
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock3-6-598ba296ba1c6931f4161a9f50b00cbe b/sql/hive/src/test/resources/golden/lock3-6-598ba296ba1c6931f4161a9f50b00cbe
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock4-1-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/lock4-1-27ad2962fed131f51ba802596ba37278
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock4-2-c06da7f8c1e98dc22e3171018e357f6a b/sql/hive/src/test/resources/golden/lock4-2-c06da7f8c1e98dc22e3171018e357f6a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock4-3-b1ca816784e88f105b2fce1175340c33 b/sql/hive/src/test/resources/golden/lock4-3-b1ca816784e88f105b2fce1175340c33
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock4-6-8096935c5c1755f9b88583e8c72921ac b/sql/hive/src/test/resources/golden/lock4-6-8096935c5c1755f9b88583e8c72921ac
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock4-7-598ba296ba1c6931f4161a9f50b00cbe b/sql/hive/src/test/resources/golden/lock4-7-598ba296ba1c6931f4161a9f50b00cbe
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/louter_join_ppr-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/louter_join_ppr-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/louter_join_ppr-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/louter_join_ppr-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/louter_join_ppr-2-3d41e966f69a64babb783d1aad0f1b73 b/sql/hive/src/test/resources/golden/louter_join_ppr-2-3d41e966f69a64babb783d1aad0f1b73
index 19492fd335bcb..a86e9c5af723e 100644
--- a/sql/hive/src/test/resources/golden/louter_join_ppr-2-3d41e966f69a64babb783d1aad0f1b73
+++ b/sql/hive/src/test/resources/golden/louter_join_ppr-2-3d41e966f69a64babb783d1aad0f1b73
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/louter_join_ppr-4-a4f0ff6d2a367242836379fa9e3fe3ef b/sql/hive/src/test/resources/golden/louter_join_ppr-4-a4f0ff6d2a367242836379fa9e3fe3ef
index 3a57720041fb3..d091388cd5e19 100644
--- a/sql/hive/src/test/resources/golden/louter_join_ppr-4-a4f0ff6d2a367242836379fa9e3fe3ef
+++ b/sql/hive/src/test/resources/golden/louter_join_ppr-4-a4f0ff6d2a367242836379fa9e3fe3ef
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/louter_join_ppr-6-dedfbaea184f5e3a29226e6e6bc6735 b/sql/hive/src/test/resources/golden/louter_join_ppr-6-dedfbaea184f5e3a29226e6e6bc6735
index 19492fd335bcb..a86e9c5af723e 100644
--- a/sql/hive/src/test/resources/golden/louter_join_ppr-6-dedfbaea184f5e3a29226e6e6bc6735
+++ b/sql/hive/src/test/resources/golden/louter_join_ppr-6-dedfbaea184f5e3a29226e6e6bc6735
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/louter_join_ppr-8-6fca189c46645f124d5fcb82564b703 b/sql/hive/src/test/resources/golden/louter_join_ppr-8-6fca189c46645f124d5fcb82564b703
index 3a57720041fb3..d091388cd5e19 100644
--- a/sql/hive/src/test/resources/golden/louter_join_ppr-8-6fca189c46645f124d5fcb82564b703
+++ b/sql/hive/src/test/resources/golden/louter_join_ppr-8-6fca189c46645f124d5fcb82564b703
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-0-a267d586eb00766a0ac1b16f5b45cf9d b/sql/hive/src/test/resources/golden/mapjoin1-0-a267d586eb00766a0ac1b16f5b45cf9d
deleted file mode 100644
index 657eea30d475f..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-0-a267d586eb00766a0ac1b16f5b45cf9d
+++ /dev/null
@@ -1 +0,0 @@
-1114788
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-1-abd9364d276ec89352232da5e2237768 b/sql/hive/src/test/resources/golden/mapjoin1-1-abd9364d276ec89352232da5e2237768
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-10-c08fefa00b89f50dd365208151593505 b/sql/hive/src/test/resources/golden/mapjoin1-10-c08fefa00b89f50dd365208151593505
deleted file mode 100644
index cfae441c4a10a..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-10-c08fefa00b89f50dd365208151593505
+++ /dev/null
@@ -1,10 +0,0 @@
-NULL	NULL	238	val_238
-86	val_86	86	val_86
-NULL	NULL	311	val_311
-27	val_27	27	val_27
-NULL	NULL	165	val_165
-NULL	NULL	409	val_409
-NULL	NULL	255	val_255
-NULL	NULL	278	val_278
-98	val_98	98	val_98
-98	val_98	98	val_98
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-11-fb5e414c98754b7e79c744606aa6ccb7 b/sql/hive/src/test/resources/golden/mapjoin1-11-fb5e414c98754b7e79c744606aa6ccb7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-12-6fa8df1d49ba571bb9d2615ad22958d9 b/sql/hive/src/test/resources/golden/mapjoin1-12-6fa8df1d49ba571bb9d2615ad22958d9
deleted file mode 100644
index 6d8155efd76cd..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-12-6fa8df1d49ba571bb9d2615ad22958d9
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	238	{"key":238,"value":"val_238"}
-238	val_238	238	{"key":238,"value":"val_238"}
-NULL	NULL	86	{"key":86,"value":"val_86"}
-311	val_311	311	{"key":311,"value":"val_311"}
-311	val_311	311	{"key":311,"value":"val_311"}
-311	val_311	311	{"key":311,"value":"val_311"}
-NULL	NULL	27	{"key":27,"value":"val_27"}
-NULL	NULL	165	{"key":165,"value":"val_165"}
-409	val_409	409	{"key":409,"value":"val_409"}
-409	val_409	409	{"key":409,"value":"val_409"}
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-2-fe84593f006c85e68fbb797394cdccd0 b/sql/hive/src/test/resources/golden/mapjoin1-2-fe84593f006c85e68fbb797394cdccd0
deleted file mode 100644
index 44f1acd59de68..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-2-fe84593f006c85e68fbb797394cdccd0
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	238	val_238
-238	val_238	238	val_238
-86	val_86	86	val_86
-311	val_311	311	val_311
-311	val_311	311	val_311
-311	val_311	311	val_311
-27	val_27	27	val_27
-165	val_165	165	val_165
-165	val_165	165	val_165
-409	val_409	409	val_409
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-3-8439a0592619790b64d16d2506f2233d b/sql/hive/src/test/resources/golden/mapjoin1-3-8439a0592619790b64d16d2506f2233d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-4-c08fefa00b89f50dd365208151593505 b/sql/hive/src/test/resources/golden/mapjoin1-4-c08fefa00b89f50dd365208151593505
deleted file mode 100644
index cfae441c4a10a..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-4-c08fefa00b89f50dd365208151593505
+++ /dev/null
@@ -1,10 +0,0 @@
-NULL	NULL	238	val_238
-86	val_86	86	val_86
-NULL	NULL	311	val_311
-27	val_27	27	val_27
-NULL	NULL	165	val_165
-NULL	NULL	409	val_409
-NULL	NULL	255	val_255
-NULL	NULL	278	val_278
-98	val_98	98	val_98
-98	val_98	98	val_98
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-5-72068bd4cdac40e4d18fd729f39855ba b/sql/hive/src/test/resources/golden/mapjoin1-5-72068bd4cdac40e4d18fd729f39855ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-6-6fa8df1d49ba571bb9d2615ad22958d9 b/sql/hive/src/test/resources/golden/mapjoin1-6-6fa8df1d49ba571bb9d2615ad22958d9
deleted file mode 100644
index 6d8155efd76cd..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-6-6fa8df1d49ba571bb9d2615ad22958d9
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	238	{"key":238,"value":"val_238"}
-238	val_238	238	{"key":238,"value":"val_238"}
-NULL	NULL	86	{"key":86,"value":"val_86"}
-311	val_311	311	{"key":311,"value":"val_311"}
-311	val_311	311	{"key":311,"value":"val_311"}
-311	val_311	311	{"key":311,"value":"val_311"}
-NULL	NULL	27	{"key":27,"value":"val_27"}
-NULL	NULL	165	{"key":165,"value":"val_165"}
-409	val_409	409	{"key":409,"value":"val_409"}
-409	val_409	409	{"key":409,"value":"val_409"}
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-7-757dfb540b8a49b3663f8caba7476ec5 b/sql/hive/src/test/resources/golden/mapjoin1-7-757dfb540b8a49b3663f8caba7476ec5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-8-fe84593f006c85e68fbb797394cdccd0 b/sql/hive/src/test/resources/golden/mapjoin1-8-fe84593f006c85e68fbb797394cdccd0
deleted file mode 100644
index 44f1acd59de68..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-8-fe84593f006c85e68fbb797394cdccd0
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	238	val_238
-238	val_238	238	val_238
-86	val_86	86	val_86
-311	val_311	311	val_311
-311	val_311	311	val_311
-311	val_311	311	val_311
-27	val_27	27	val_27
-165	val_165	165	val_165
-165	val_165	165	val_165
-409	val_409	409	val_409
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-9-5eabdf151ff9fedb64559d2fbd1ae266 b/sql/hive/src/test/resources/golden/mapjoin1-9-5eabdf151ff9fedb64559d2fbd1ae266
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/mapjoin_distinct-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-1-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/mapjoin_distinct-1-a00d1791b7fa7ac5a0505d95c3d12257
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-1-a00d1791b7fa7ac5a0505d95c3d12257
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-1-a00d1791b7fa7ac5a0505d95c3d12257
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-11-4489654b888efc588b13ee1cda1b6a9f b/sql/hive/src/test/resources/golden/mapjoin_distinct-11-4489654b888efc588b13ee1cda1b6a9f
index eab7fd7a51ea4..36b4fccb585a8 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-11-4489654b888efc588b13ee1cda1b6a9f
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-11-4489654b888efc588b13ee1cda1b6a9f
@@ -7,4 +7,4 @@ val_105
 val_11
 val_111
 val_113
-val_114
\ No newline at end of file
+val_114
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-12-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/mapjoin_distinct-12-1d351f7e821fcaf66c6f7503e42fb291
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-12-1d351f7e821fcaf66c6f7503e42fb291
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-12-1d351f7e821fcaf66c6f7503e42fb291
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-13-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/mapjoin_distinct-13-a7dc16cb82c595b18d4258a38a304b1e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-13-a7dc16cb82c595b18d4258a38a304b1e
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-13-a7dc16cb82c595b18d4258a38a304b1e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-15-4489654b888efc588b13ee1cda1b6a9f b/sql/hive/src/test/resources/golden/mapjoin_distinct-15-4489654b888efc588b13ee1cda1b6a9f
index eab7fd7a51ea4..36b4fccb585a8 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-15-4489654b888efc588b13ee1cda1b6a9f
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-15-4489654b888efc588b13ee1cda1b6a9f
@@ -7,4 +7,4 @@ val_105
 val_11
 val_111
 val_113
-val_114
\ No newline at end of file
+val_114
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-3-4489654b888efc588b13ee1cda1b6a9f b/sql/hive/src/test/resources/golden/mapjoin_distinct-3-4489654b888efc588b13ee1cda1b6a9f
index eab7fd7a51ea4..36b4fccb585a8 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-3-4489654b888efc588b13ee1cda1b6a9f
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-3-4489654b888efc588b13ee1cda1b6a9f
@@ -7,4 +7,4 @@ val_105
 val_11
 val_111
 val_113
-val_114
\ No newline at end of file
+val_114
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-4-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/mapjoin_distinct-4-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-4-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-4-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-5-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/mapjoin_distinct-5-a7dc16cb82c595b18d4258a38a304b1e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-5-a7dc16cb82c595b18d4258a38a304b1e
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-5-a7dc16cb82c595b18d4258a38a304b1e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-7-4489654b888efc588b13ee1cda1b6a9f b/sql/hive/src/test/resources/golden/mapjoin_distinct-7-4489654b888efc588b13ee1cda1b6a9f
index eab7fd7a51ea4..36b4fccb585a8 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-7-4489654b888efc588b13ee1cda1b6a9f
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-7-4489654b888efc588b13ee1cda1b6a9f
@@ -7,4 +7,4 @@ val_105
 val_11
 val_111
 val_113
-val_114
\ No newline at end of file
+val_114
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-8-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/mapjoin_distinct-8-1d351f7e821fcaf66c6f7503e42fb291
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-8-1d351f7e821fcaf66c6f7503e42fb291
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-8-1d351f7e821fcaf66c6f7503e42fb291
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-9-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/mapjoin_distinct-9-a00d1791b7fa7ac5a0505d95c3d12257
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-9-a00d1791b7fa7ac5a0505d95c3d12257
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-9-a00d1791b7fa7ac5a0505d95c3d12257
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-1-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-1-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-1-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-1-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-2-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-2-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-2-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-2-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-23-eb2b625279d8794390d7e2dc8f2dc907 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-4-5166a5b9d30dfacbe33dd909c0df6310
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-23-eb2b625279d8794390d7e2dc8f2dc907
rename to sql/hive/src/test/resources/golden/mapjoin_mapjoin-4-5166a5b9d30dfacbe33dd909c0df6310
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-4-f9a2e0792bfe37c48895b8044a3a3702 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-4-f9a2e0792bfe37c48895b8044a3a3702
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-5ee898cab883074f3297198c52445ee4 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-5ee898cab883074f3297198c52445ee4
new file mode 100644
index 0000000000000..0588d12b85bd3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-5ee898cab883074f3297198c52445ee4
@@ -0,0 +1,20 @@
+98
+98
+66
+98
+98
+98
+98
+66
+98
+98
+98
+98
+66
+98
+98
+98
+98
+66
+98
+98
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-c47698bac140454637a999e583941ce7 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-c47698bac140454637a999e583941ce7
deleted file mode 100644
index 8e7fe1e4cae08..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-c47698bac140454637a999e583941ce7
+++ /dev/null
@@ -1,2 +0,0 @@
-5308
-5308
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-25-138e93f004f7bd16e63bbf8d9090af21 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-6-dca8c08a235b45d1cdcb94e363afb17
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-25-138e93f004f7bd16e63bbf8d9090af21
rename to sql/hive/src/test/resources/golden/mapjoin_mapjoin-6-dca8c08a235b45d1cdcb94e363afb17
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-7-fddbdea343a9ddb5f8dedc18147640b7 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-7-fddbdea343a9ddb5f8dedc18147640b7
new file mode 100644
index 0000000000000..293e1c3f1edca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-7-fddbdea343a9ddb5f8dedc18147640b7
@@ -0,0 +1,2 @@
+5308
+5308
diff --git a/sql/hive/src/test/resources/golden/groupby_bigdata-0-e011be1172043c0c6d0fd2c0e89f361e b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-8-2be637ed4f6146e8525ae1a863e72736
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_bigdata-0-e011be1172043c0c6d0fd2c0e89f361e
rename to sql/hive/src/test/resources/golden/mapjoin_mapjoin-8-2be637ed4f6146e8525ae1a863e72736
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-9-c47698bac140454637a999e583941ce7 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-9-c47698bac140454637a999e583941ce7
new file mode 100644
index 0000000000000..293e1c3f1edca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-9-c47698bac140454637a999e583941ce7
@@ -0,0 +1,2 @@
+5308
+5308
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/mapjoin_subquery-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery-1-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/mapjoin_subquery-1-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery-1-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery-1-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery-2-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/mapjoin_subquery-2-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery-2-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery-2-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery-4-7df121f9774cb23edc557b98ad1e1924 b/sql/hive/src/test/resources/golden/mapjoin_subquery-4-7df121f9774cb23edc557b98ad1e1924
index 482848c1ef8aa..89ea689f2a33b 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery-4-7df121f9774cb23edc557b98ad1e1924
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery-4-7df121f9774cb23edc557b98ad1e1924
@@ -104,4 +104,4 @@
 406	val_406
 406	val_406
 406	val_406
-406	val_406
\ No newline at end of file
+406	val_406
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery-6-dcdc6b87a9b87b0ab10ad0e44a197f1b b/sql/hive/src/test/resources/golden/mapjoin_subquery-6-dcdc6b87a9b87b0ab10ad0e44a197f1b
index 482848c1ef8aa..89ea689f2a33b 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery-6-dcdc6b87a9b87b0ab10ad0e44a197f1b
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery-6-dcdc6b87a9b87b0ab10ad0e44a197f1b
@@ -104,4 +104,4 @@
 406	val_406
 406	val_406
 406	val_406
-406	val_406
\ No newline at end of file
+406	val_406
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-10-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-10-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery2-10-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery2-10-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-11-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-11-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery2-11-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery2-11-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-13-c876a518451059f17fc15e29f6f57951 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-13-c876a518451059f17fc15e29f6f57951
index 5c62121a6600c..7b3f58fdde1af 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery2-13-c876a518451059f17fc15e29f6f57951
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery2-13-c876a518451059f17fc15e29f6f57951
@@ -1,2 +1,2 @@
 2	Joe	2	Tie	2	Tie
-2	Hank	2	Tie	2	Tie
\ No newline at end of file
+2	Hank	2	Tie	2	Tie
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-6-5353ee601eb42d5842690d3941683be1 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-6-5353ee601eb42d5842690d3941683be1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-26-93153fd67c1d19bb9ad64f98294e4981 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-6-9bf06af695892b0d7067d5b30e0b2425
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-26-93153fd67c1d19bb9ad64f98294e4981
rename to sql/hive/src/test/resources/golden/mapjoin_subquery2-6-9bf06af695892b0d7067d5b30e0b2425
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-27-ca369ab23c32070e2d42ba8df036175f b/sql/hive/src/test/resources/golden/mapjoin_subquery2-7-c6b0cdb137f13f8362c0c49c544151a4
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-27-ca369ab23c32070e2d42ba8df036175f
rename to sql/hive/src/test/resources/golden/mapjoin_subquery2-7-c6b0cdb137f13f8362c0c49c544151a4
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-7-fb516ed5906b1f485d3e7e7eeaedd862 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-7-fb516ed5906b1f485d3e7e7eeaedd862
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-29-4095695e88e23dd42acb1749a83bdbb7 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-8-131ae5ecfff2733b04bdfada0108cf40
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-29-4095695e88e23dd42acb1749a83bdbb7
rename to sql/hive/src/test/resources/golden/mapjoin_subquery2-8-131ae5ecfff2733b04bdfada0108cf40
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-8-d524906728fef9f559709fe0922ab24e b/sql/hive/src/test/resources/golden/mapjoin_subquery2-8-d524906728fef9f559709fe0922ab24e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-9-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-9-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery2-9-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery2-9-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-0-407016bf2679fb9e9d076a2d115e859d b/sql/hive/src/test/resources/golden/mapjoin_test_outer-0-407016bf2679fb9e9d076a2d115e859d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-0-407016bf2679fb9e9d076a2d115e859d
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-0-407016bf2679fb9e9d076a2d115e859d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-10-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/mapjoin_test_outer-10-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-10-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-10-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-11-cfaaae6c3ef2a5eb8cd7ec5065ca7795 b/sql/hive/src/test/resources/golden/mapjoin_test_outer-11-cfaaae6c3ef2a5eb8cd7ec5065ca7795
index 271f3e168fd23..984f4f37f676f 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-11-cfaaae6c3ef2a5eb8cd7ec5065ca7795
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-11-cfaaae6c3ef2a5eb8cd7ec5065ca7795
@@ -1,4 +1,4 @@
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-12-80993ab7f757001e1f058bf8609f8420 b/sql/hive/src/test/resources/golden/mapjoin_test_outer-12-80993ab7f757001e1f058bf8609f8420
index 271f3e168fd23..984f4f37f676f 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-12-80993ab7f757001e1f058bf8609f8420
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-12-80993ab7f757001e1f058bf8609f8420
@@ -1,4 +1,4 @@
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-14-7fe52008c4a98853d086d17fc3c21906 b/sql/hive/src/test/resources/golden/mapjoin_test_outer-14-7fe52008c4a98853d086d17fc3c21906
index f00666f6113d2..ece00d9946257 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-14-7fe52008c4a98853d086d17fc3c21906
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-14-7fe52008c4a98853d086d17fc3c21906
@@ -2,4 +2,4 @@ NULL	NULL	333444	555666	333444	555666
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-6-7fe52008c4a98853d086d17fc3c21906 b/sql/hive/src/test/resources/golden/mapjoin_test_outer-6-7fe52008c4a98853d086d17fc3c21906
index f00666f6113d2..ece00d9946257 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-6-7fe52008c4a98853d086d17fc3c21906
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-6-7fe52008c4a98853d086d17fc3c21906
@@ -2,4 +2,4 @@ NULL	NULL	333444	555666	333444	555666
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-8-dfb08d397d3fe163d75c3b758097b68a b/sql/hive/src/test/resources/golden/mapjoin_test_outer-8-dfb08d397d3fe163d75c3b758097b68a
index f00666f6113d2..ece00d9946257 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-8-dfb08d397d3fe163d75c3b758097b68a
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-8-dfb08d397d3fe163d75c3b758097b68a
@@ -2,4 +2,4 @@ NULL	NULL	333444	555666	333444	555666
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-9-6c45ce60b3dfce0e8bd19eedd57ee017 b/sql/hive/src/test/resources/golden/mapjoin_test_outer-9-6c45ce60b3dfce0e8bd19eedd57ee017
index 271f3e168fd23..984f4f37f676f 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-9-6c45ce60b3dfce0e8bd19eedd57ee017
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-9-6c45ce60b3dfce0e8bd19eedd57ee017
@@ -1,4 +1,4 @@
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapreduce2-3-adea843673e541da8a735a5a34e7c7dc b/sql/hive/src/test/resources/golden/mapreduce2-3-adea843673e541da8a735a5a34e7c7dc
index 4ab18ace38f46..777c07c766030 100644
--- a/sql/hive/src/test/resources/golden/mapreduce2-3-adea843673e541da8a735a5a34e7c7dc
+++ b/sql/hive/src/test/resources/golden/mapreduce2-3-adea843673e541da8a735a5a34e7c7dc
@@ -497,4 +497,4 @@
 497	49	7	val_497
 498	49	8	val_498
 498	49	8	val_498
-498	49	8	val_498
\ No newline at end of file
+498	49	8	val_498
diff --git a/sql/hive/src/test/resources/golden/mapreduce3-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/mapreduce3-3-adc1ec67836b26b60d8547c4996bfd8f
index 1504c19f78eb2..5aa1d6b005a24 100644
--- a/sql/hive/src/test/resources/golden/mapreduce3-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/mapreduce3-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -497,4 +497,4 @@
 97	9	7	val_97
 97	9	7	val_97
 98	9	8	val_98
-98	9	8	val_98
\ No newline at end of file
+98	9	8	val_98
diff --git a/sql/hive/src/test/resources/golden/merge1-0-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge1-0-593999fae618b6b38322bc9ae4e0c027
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge1-0-593999fae618b6b38322bc9ae4e0c027
+++ b/sql/hive/src/test/resources/golden/merge1-0-593999fae618b6b38322bc9ae4e0c027
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge1-12-2e8e4adbfb21f25e7557dd86363c7138 b/sql/hive/src/test/resources/golden/merge1-12-2e8e4adbfb21f25e7557dd86363c7138
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge1-12-2e8e4adbfb21f25e7557dd86363c7138
+++ b/sql/hive/src/test/resources/golden/merge1-12-2e8e4adbfb21f25e7557dd86363c7138
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge1-4-3277fe538b66923cd879b45371838d2b b/sql/hive/src/test/resources/golden/merge1-4-3277fe538b66923cd879b45371838d2b
index df07a9da29f01..93e965c771403 100644
--- a/sql/hive/src/test/resources/golden/merge1-4-3277fe538b66923cd879b45371838d2b
+++ b/sql/hive/src/test/resources/golden/merge1-4-3277fe538b66923cd879b45371838d2b
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/merge2-0-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/merge2-0-b12e5c70d6d29757471b900b6160fa8a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-0-b12e5c70d6d29757471b900b6160fa8a
+++ b/sql/hive/src/test/resources/golden/merge2-0-b12e5c70d6d29757471b900b6160fa8a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-1-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge2-1-593999fae618b6b38322bc9ae4e0c027
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-1-593999fae618b6b38322bc9ae4e0c027
+++ b/sql/hive/src/test/resources/golden/merge2-1-593999fae618b6b38322bc9ae4e0c027
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-17-2e8e4adbfb21f25e7557dd86363c7138 b/sql/hive/src/test/resources/golden/merge2-17-2e8e4adbfb21f25e7557dd86363c7138
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-17-2e8e4adbfb21f25e7557dd86363c7138
+++ b/sql/hive/src/test/resources/golden/merge2-17-2e8e4adbfb21f25e7557dd86363c7138
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-2-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/merge2-2-c95dc367df88c9e5cf77157f29ba2daf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-2-c95dc367df88c9e5cf77157f29ba2daf
+++ b/sql/hive/src/test/resources/golden/merge2-2-c95dc367df88c9e5cf77157f29ba2daf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86
+++ b/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076
+++ b/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea
+++ b/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-9-b81efaa65e1263e48278ef9062cca1dd b/sql/hive/src/test/resources/golden/merge2-9-b81efaa65e1263e48278ef9062cca1dd
index df07a9da29f01..93e965c771403 100644
--- a/sql/hive/src/test/resources/golden/merge2-9-b81efaa65e1263e48278ef9062cca1dd
+++ b/sql/hive/src/test/resources/golden/merge2-9-b81efaa65e1263e48278ef9062cca1dd
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/merge4-10-692a197bd688b48f762e72978f54aa32 b/sql/hive/src/test/resources/golden/merge4-10-692a197bd688b48f762e72978f54aa32
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-11-ca0336ac3f600cb8b4230d9904686868 b/sql/hive/src/test/resources/golden/merge4-11-ca0336ac3f600cb8b4230d9904686868
deleted file mode 100644
index 67c6db8591549..0000000000000
--- a/sql/hive/src/test/resources/golden/merge4-11-ca0336ac3f600cb8b4230d9904686868
+++ /dev/null
@@ -1,1500 +0,0 @@
-238	val_238	2010-08-15	11
-86	val_86	2010-08-15	11
-311	val_311	2010-08-15	11
-27	val_27	2010-08-15	11
-165	val_165	2010-08-15	11
-409	val_409	2010-08-15	11
-255	val_255	2010-08-15	11
-278	val_278	2010-08-15	11
-98	val_98	2010-08-15	11
-484	val_484	2010-08-15	11
-265	val_265	2010-08-15	11
-193	val_193	2010-08-15	11
-401	val_401	2010-08-15	11
-150	val_150	2010-08-15	11
-273	val_273	2010-08-15	11
-224	val_224	2010-08-15	11
-369	val_369	2010-08-15	11
-66	val_66	2010-08-15	11
-128	val_128	2010-08-15	11
-213	val_213	2010-08-15	11
-146	val_146	2010-08-15	11
-406	val_406	2010-08-15	11
-429	val_429	2010-08-15	11
-374	val_374	2010-08-15	11
-152	val_152	2010-08-15	11
-469	val_469	2010-08-15	11
-145	val_145	2010-08-15	11
-495	val_495	2010-08-15	11
-37	val_37	2010-08-15	11
-327	val_327	2010-08-15	11
-281	val_281	2010-08-15	11
-277	val_277	2010-08-15	11
-209	val_209	2010-08-15	11
-15	val_15	2010-08-15	11
-82	val_82	2010-08-15	11
-403	val_403	2010-08-15	11
-166	val_166	2010-08-15	11
-417	val_417	2010-08-15	11
-430	val_430	2010-08-15	11
-252	val_252	2010-08-15	11
-292	val_292	2010-08-15	11
-219	val_219	2010-08-15	11
-287	val_287	2010-08-15	11
-153	val_153	2010-08-15	11
-193	val_193	2010-08-15	11
-338	val_338	2010-08-15	11
-446	val_446	2010-08-15	11
-459	val_459	2010-08-15	11
-394	val_394	2010-08-15	11
-237	val_237	2010-08-15	11
-482	val_482	2010-08-15	11
-174	val_174	2010-08-15	11
-413	val_413	2010-08-15	11
-494	val_494	2010-08-15	11
-207	val_207	2010-08-15	11
-199	val_199	2010-08-15	11
-466	val_466	2010-08-15	11
-208	val_208	2010-08-15	11
-174	val_174	2010-08-15	11
-399	val_399	2010-08-15	11
-396	val_396	2010-08-15	11
-247	val_247	2010-08-15	11
-417	val_417	2010-08-15	11
-489	val_489	2010-08-15	11
-162	val_162	2010-08-15	11
-377	val_377	2010-08-15	11
-397	val_397	2010-08-15	11
-309	val_309	2010-08-15	11
-365	val_365	2010-08-15	11
-266	val_266	2010-08-15	11
-439	val_439	2010-08-15	11
-342	val_342	2010-08-15	11
-367	val_367	2010-08-15	11
-325	val_325	2010-08-15	11
-167	val_167	2010-08-15	11
-195	val_195	2010-08-15	11
-475	val_475	2010-08-15	11
-17	val_17	2010-08-15	11
-113	val_113	2010-08-15	11
-155	val_155	2010-08-15	11
-203	val_203	2010-08-15	11
-339	val_339	2010-08-15	11
-0	val_0	2010-08-15	11
-455	val_455	2010-08-15	11
-128	val_128	2010-08-15	11
-311	val_311	2010-08-15	11
-316	val_316	2010-08-15	11
-57	val_57	2010-08-15	11
-302	val_302	2010-08-15	11
-205	val_205	2010-08-15	11
-149	val_149	2010-08-15	11
-438	val_438	2010-08-15	11
-345	val_345	2010-08-15	11
-129	val_129	2010-08-15	11
-170	val_170	2010-08-15	11
-20	val_20	2010-08-15	11
-489	val_489	2010-08-15	11
-157	val_157	2010-08-15	11
-378	val_378	2010-08-15	11
-221	val_221	2010-08-15	11
-92	val_92	2010-08-15	11
-111	val_111	2010-08-15	11
-47	val_47	2010-08-15	11
-72	val_72	2010-08-15	11
-4	val_4	2010-08-15	11
-280	val_280	2010-08-15	11
-35	val_35	2010-08-15	11
-427	val_427	2010-08-15	11
-277	val_277	2010-08-15	11
-208	val_208	2010-08-15	11
-356	val_356	2010-08-15	11
-399	val_399	2010-08-15	11
-169	val_169	2010-08-15	11
-382	val_382	2010-08-15	11
-498	val_498	2010-08-15	11
-125	val_125	2010-08-15	11
-386	val_386	2010-08-15	11
-437	val_437	2010-08-15	11
-469	val_469	2010-08-15	11
-192	val_192	2010-08-15	11
-286	val_286	2010-08-15	11
-187	val_187	2010-08-15	11
-176	val_176	2010-08-15	11
-54	val_54	2010-08-15	11
-459	val_459	2010-08-15	11
-51	val_51	2010-08-15	11
-138	val_138	2010-08-15	11
-103	val_103	2010-08-15	11
-239	val_239	2010-08-15	11
-213	val_213	2010-08-15	11
-216	val_216	2010-08-15	11
-430	val_430	2010-08-15	11
-278	val_278	2010-08-15	11
-176	val_176	2010-08-15	11
-289	val_289	2010-08-15	11
-221	val_221	2010-08-15	11
-65	val_65	2010-08-15	11
-318	val_318	2010-08-15	11
-332	val_332	2010-08-15	11
-311	val_311	2010-08-15	11
-275	val_275	2010-08-15	11
-137	val_137	2010-08-15	11
-241	val_241	2010-08-15	11
-83	val_83	2010-08-15	11
-333	val_333	2010-08-15	11
-180	val_180	2010-08-15	11
-284	val_284	2010-08-15	11
-12	val_12	2010-08-15	11
-230	val_230	2010-08-15	11
-181	val_181	2010-08-15	11
-67	val_67	2010-08-15	11
-260	val_260	2010-08-15	11
-404	val_404	2010-08-15	11
-384	val_384	2010-08-15	11
-489	val_489	2010-08-15	11
-353	val_353	2010-08-15	11
-373	val_373	2010-08-15	11
-272	val_272	2010-08-15	11
-138	val_138	2010-08-15	11
-217	val_217	2010-08-15	11
-84	val_84	2010-08-15	11
-348	val_348	2010-08-15	11
-466	val_466	2010-08-15	11
-58	val_58	2010-08-15	11
-8	val_8	2010-08-15	11
-411	val_411	2010-08-15	11
-230	val_230	2010-08-15	11
-208	val_208	2010-08-15	11
-348	val_348	2010-08-15	11
-24	val_24	2010-08-15	11
-463	val_463	2010-08-15	11
-431	val_431	2010-08-15	11
-179	val_179	2010-08-15	11
-172	val_172	2010-08-15	11
-42	val_42	2010-08-15	11
-129	val_129	2010-08-15	11
-158	val_158	2010-08-15	11
-119	val_119	2010-08-15	11
-496	val_496	2010-08-15	11
-0	val_0	2010-08-15	11
-322	val_322	2010-08-15	11
-197	val_197	2010-08-15	11
-468	val_468	2010-08-15	11
-393	val_393	2010-08-15	11
-454	val_454	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-199	val_199	2010-08-15	11
-191	val_191	2010-08-15	11
-418	val_418	2010-08-15	11
-96	val_96	2010-08-15	11
-26	val_26	2010-08-15	11
-165	val_165	2010-08-15	11
-327	val_327	2010-08-15	11
-230	val_230	2010-08-15	11
-205	val_205	2010-08-15	11
-120	val_120	2010-08-15	11
-131	val_131	2010-08-15	11
-51	val_51	2010-08-15	11
-404	val_404	2010-08-15	11
-43	val_43	2010-08-15	11
-436	val_436	2010-08-15	11
-156	val_156	2010-08-15	11
-469	val_469	2010-08-15	11
-468	val_468	2010-08-15	11
-308	val_308	2010-08-15	11
-95	val_95	2010-08-15	11
-196	val_196	2010-08-15	11
-288	val_288	2010-08-15	11
-481	val_481	2010-08-15	11
-457	val_457	2010-08-15	11
-98	val_98	2010-08-15	11
-282	val_282	2010-08-15	11
-197	val_197	2010-08-15	11
-187	val_187	2010-08-15	11
-318	val_318	2010-08-15	11
-318	val_318	2010-08-15	11
-409	val_409	2010-08-15	11
-470	val_470	2010-08-15	11
-137	val_137	2010-08-15	11
-369	val_369	2010-08-15	11
-316	val_316	2010-08-15	11
-169	val_169	2010-08-15	11
-413	val_413	2010-08-15	11
-85	val_85	2010-08-15	11
-77	val_77	2010-08-15	11
-0	val_0	2010-08-15	11
-490	val_490	2010-08-15	11
-87	val_87	2010-08-15	11
-364	val_364	2010-08-15	11
-179	val_179	2010-08-15	11
-118	val_118	2010-08-15	11
-134	val_134	2010-08-15	11
-395	val_395	2010-08-15	11
-282	val_282	2010-08-15	11
-138	val_138	2010-08-15	11
-238	val_238	2010-08-15	11
-419	val_419	2010-08-15	11
-15	val_15	2010-08-15	11
-118	val_118	2010-08-15	11
-72	val_72	2010-08-15	11
-90	val_90	2010-08-15	11
-307	val_307	2010-08-15	11
-19	val_19	2010-08-15	11
-435	val_435	2010-08-15	11
-10	val_10	2010-08-15	11
-277	val_277	2010-08-15	11
-273	val_273	2010-08-15	11
-306	val_306	2010-08-15	11
-224	val_224	2010-08-15	11
-309	val_309	2010-08-15	11
-389	val_389	2010-08-15	11
-327	val_327	2010-08-15	11
-242	val_242	2010-08-15	11
-369	val_369	2010-08-15	11
-392	val_392	2010-08-15	11
-272	val_272	2010-08-15	11
-331	val_331	2010-08-15	11
-401	val_401	2010-08-15	11
-242	val_242	2010-08-15	11
-452	val_452	2010-08-15	11
-177	val_177	2010-08-15	11
-226	val_226	2010-08-15	11
-5	val_5	2010-08-15	11
-497	val_497	2010-08-15	11
-402	val_402	2010-08-15	11
-396	val_396	2010-08-15	11
-317	val_317	2010-08-15	11
-395	val_395	2010-08-15	11
-58	val_58	2010-08-15	11
-35	val_35	2010-08-15	11
-336	val_336	2010-08-15	11
-95	val_95	2010-08-15	11
-11	val_11	2010-08-15	11
-168	val_168	2010-08-15	11
-34	val_34	2010-08-15	11
-229	val_229	2010-08-15	11
-233	val_233	2010-08-15	11
-143	val_143	2010-08-15	11
-472	val_472	2010-08-15	11
-322	val_322	2010-08-15	11
-498	val_498	2010-08-15	11
-160	val_160	2010-08-15	11
-195	val_195	2010-08-15	11
-42	val_42	2010-08-15	11
-321	val_321	2010-08-15	11
-430	val_430	2010-08-15	11
-119	val_119	2010-08-15	11
-489	val_489	2010-08-15	11
-458	val_458	2010-08-15	11
-78	val_78	2010-08-15	11
-76	val_76	2010-08-15	11
-41	val_41	2010-08-15	11
-223	val_223	2010-08-15	11
-492	val_492	2010-08-15	11
-149	val_149	2010-08-15	11
-449	val_449	2010-08-15	11
-218	val_218	2010-08-15	11
-228	val_228	2010-08-15	11
-138	val_138	2010-08-15	11
-453	val_453	2010-08-15	11
-30	val_30	2010-08-15	11
-209	val_209	2010-08-15	11
-64	val_64	2010-08-15	11
-468	val_468	2010-08-15	11
-76	val_76	2010-08-15	11
-74	val_74	2010-08-15	11
-342	val_342	2010-08-15	11
-69	val_69	2010-08-15	11
-230	val_230	2010-08-15	11
-33	val_33	2010-08-15	11
-368	val_368	2010-08-15	11
-103	val_103	2010-08-15	11
-296	val_296	2010-08-15	11
-113	val_113	2010-08-15	11
-216	val_216	2010-08-15	11
-367	val_367	2010-08-15	11
-344	val_344	2010-08-15	11
-167	val_167	2010-08-15	11
-274	val_274	2010-08-15	11
-219	val_219	2010-08-15	11
-239	val_239	2010-08-15	11
-485	val_485	2010-08-15	11
-116	val_116	2010-08-15	11
-223	val_223	2010-08-15	11
-256	val_256	2010-08-15	11
-263	val_263	2010-08-15	11
-70	val_70	2010-08-15	11
-487	val_487	2010-08-15	11
-480	val_480	2010-08-15	11
-401	val_401	2010-08-15	11
-288	val_288	2010-08-15	11
-191	val_191	2010-08-15	11
-5	val_5	2010-08-15	11
-244	val_244	2010-08-15	11
-438	val_438	2010-08-15	11
-128	val_128	2010-08-15	11
-467	val_467	2010-08-15	11
-432	val_432	2010-08-15	11
-202	val_202	2010-08-15	11
-316	val_316	2010-08-15	11
-229	val_229	2010-08-15	11
-469	val_469	2010-08-15	11
-463	val_463	2010-08-15	11
-280	val_280	2010-08-15	11
-2	val_2	2010-08-15	11
-35	val_35	2010-08-15	11
-283	val_283	2010-08-15	11
-331	val_331	2010-08-15	11
-235	val_235	2010-08-15	11
-80	val_80	2010-08-15	11
-44	val_44	2010-08-15	11
-193	val_193	2010-08-15	11
-321	val_321	2010-08-15	11
-335	val_335	2010-08-15	11
-104	val_104	2010-08-15	11
-466	val_466	2010-08-15	11
-366	val_366	2010-08-15	11
-175	val_175	2010-08-15	11
-403	val_403	2010-08-15	11
-483	val_483	2010-08-15	11
-53	val_53	2010-08-15	11
-105	val_105	2010-08-15	11
-257	val_257	2010-08-15	11
-406	val_406	2010-08-15	11
-409	val_409	2010-08-15	11
-190	val_190	2010-08-15	11
-406	val_406	2010-08-15	11
-401	val_401	2010-08-15	11
-114	val_114	2010-08-15	11
-258	val_258	2010-08-15	11
-90	val_90	2010-08-15	11
-203	val_203	2010-08-15	11
-262	val_262	2010-08-15	11
-348	val_348	2010-08-15	11
-424	val_424	2010-08-15	11
-12	val_12	2010-08-15	11
-396	val_396	2010-08-15	11
-201	val_201	2010-08-15	11
-217	val_217	2010-08-15	11
-164	val_164	2010-08-15	11
-431	val_431	2010-08-15	11
-454	val_454	2010-08-15	11
-478	val_478	2010-08-15	11
-298	val_298	2010-08-15	11
-125	val_125	2010-08-15	11
-431	val_431	2010-08-15	11
-164	val_164	2010-08-15	11
-424	val_424	2010-08-15	11
-187	val_187	2010-08-15	11
-382	val_382	2010-08-15	11
-5	val_5	2010-08-15	11
-70	val_70	2010-08-15	11
-397	val_397	2010-08-15	11
-480	val_480	2010-08-15	11
-291	val_291	2010-08-15	11
-24	val_24	2010-08-15	11
-351	val_351	2010-08-15	11
-255	val_255	2010-08-15	11
-104	val_104	2010-08-15	11
-70	val_70	2010-08-15	11
-163	val_163	2010-08-15	11
-438	val_438	2010-08-15	11
-119	val_119	2010-08-15	11
-414	val_414	2010-08-15	11
-200	val_200	2010-08-15	11
-491	val_491	2010-08-15	11
-237	val_237	2010-08-15	11
-439	val_439	2010-08-15	11
-360	val_360	2010-08-15	11
-248	val_248	2010-08-15	11
-479	val_479	2010-08-15	11
-305	val_305	2010-08-15	11
-417	val_417	2010-08-15	11
-199	val_199	2010-08-15	11
-444	val_444	2010-08-15	11
-120	val_120	2010-08-15	11
-429	val_429	2010-08-15	11
-169	val_169	2010-08-15	11
-443	val_443	2010-08-15	11
-323	val_323	2010-08-15	11
-325	val_325	2010-08-15	11
-277	val_277	2010-08-15	11
-230	val_230	2010-08-15	11
-478	val_478	2010-08-15	11
-178	val_178	2010-08-15	11
-468	val_468	2010-08-15	11
-310	val_310	2010-08-15	11
-317	val_317	2010-08-15	11
-333	val_333	2010-08-15	11
-493	val_493	2010-08-15	11
-460	val_460	2010-08-15	11
-207	val_207	2010-08-15	11
-249	val_249	2010-08-15	11
-265	val_265	2010-08-15	11
-480	val_480	2010-08-15	11
-83	val_83	2010-08-15	11
-136	val_136	2010-08-15	11
-353	val_353	2010-08-15	11
-172	val_172	2010-08-15	11
-214	val_214	2010-08-15	11
-462	val_462	2010-08-15	11
-233	val_233	2010-08-15	11
-406	val_406	2010-08-15	11
-133	val_133	2010-08-15	11
-175	val_175	2010-08-15	11
-189	val_189	2010-08-15	11
-454	val_454	2010-08-15	11
-375	val_375	2010-08-15	11
-401	val_401	2010-08-15	11
-421	val_421	2010-08-15	11
-407	val_407	2010-08-15	11
-384	val_384	2010-08-15	11
-256	val_256	2010-08-15	11
-26	val_26	2010-08-15	11
-134	val_134	2010-08-15	11
-67	val_67	2010-08-15	11
-384	val_384	2010-08-15	11
-379	val_379	2010-08-15	11
-18	val_18	2010-08-15	11
-462	val_462	2010-08-15	11
-492	val_492	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-9	val_9	2010-08-15	11
-341	val_341	2010-08-15	11
-498	val_498	2010-08-15	11
-146	val_146	2010-08-15	11
-458	val_458	2010-08-15	11
-362	val_362	2010-08-15	11
-186	val_186	2010-08-15	11
-285	val_285	2010-08-15	11
-348	val_348	2010-08-15	11
-167	val_167	2010-08-15	11
-18	val_18	2010-08-15	11
-273	val_273	2010-08-15	11
-183	val_183	2010-08-15	11
-281	val_281	2010-08-15	11
-344	val_344	2010-08-15	11
-97	val_97	2010-08-15	11
-469	val_469	2010-08-15	11
-315	val_315	2010-08-15	11
-84	val_84	2010-08-15	11
-28	val_28	2010-08-15	11
-37	val_37	2010-08-15	11
-448	val_448	2010-08-15	11
-152	val_152	2010-08-15	11
-348	val_348	2010-08-15	11
-307	val_307	2010-08-15	11
-194	val_194	2010-08-15	11
-414	val_414	2010-08-15	11
-477	val_477	2010-08-15	11
-222	val_222	2010-08-15	11
-126	val_126	2010-08-15	11
-90	val_90	2010-08-15	11
-169	val_169	2010-08-15	11
-403	val_403	2010-08-15	11
-400	val_400	2010-08-15	11
-200	val_200	2010-08-15	11
-97	val_97	2010-08-15	11
-238	val_238	2010-08-15	11
-86	val_86	2010-08-15	11
-311	val_311	2010-08-15	11
-27	val_27	2010-08-15	11
-165	val_165	2010-08-15	11
-409	val_409	2010-08-15	11
-255	val_255	2010-08-15	11
-278	val_278	2010-08-15	11
-98	val_98	2010-08-15	11
-484	val_484	2010-08-15	11
-265	val_265	2010-08-15	11
-193	val_193	2010-08-15	11
-401	val_401	2010-08-15	11
-150	val_150	2010-08-15	11
-273	val_273	2010-08-15	11
-224	val_224	2010-08-15	11
-369	val_369	2010-08-15	11
-66	val_66	2010-08-15	11
-128	val_128	2010-08-15	11
-213	val_213	2010-08-15	11
-146	val_146	2010-08-15	11
-406	val_406	2010-08-15	11
-429	val_429	2010-08-15	11
-374	val_374	2010-08-15	11
-152	val_152	2010-08-15	11
-469	val_469	2010-08-15	11
-145	val_145	2010-08-15	11
-495	val_495	2010-08-15	11
-37	val_37	2010-08-15	11
-327	val_327	2010-08-15	11
-281	val_281	2010-08-15	11
-277	val_277	2010-08-15	11
-209	val_209	2010-08-15	11
-15	val_15	2010-08-15	11
-82	val_82	2010-08-15	11
-403	val_403	2010-08-15	11
-166	val_166	2010-08-15	11
-417	val_417	2010-08-15	11
-430	val_430	2010-08-15	11
-252	val_252	2010-08-15	11
-292	val_292	2010-08-15	11
-219	val_219	2010-08-15	11
-287	val_287	2010-08-15	11
-153	val_153	2010-08-15	11
-193	val_193	2010-08-15	11
-338	val_338	2010-08-15	11
-446	val_446	2010-08-15	11
-459	val_459	2010-08-15	11
-394	val_394	2010-08-15	11
-237	val_237	2010-08-15	11
-482	val_482	2010-08-15	11
-174	val_174	2010-08-15	11
-413	val_413	2010-08-15	11
-494	val_494	2010-08-15	11
-207	val_207	2010-08-15	11
-199	val_199	2010-08-15	11
-466	val_466	2010-08-15	11
-208	val_208	2010-08-15	11
-174	val_174	2010-08-15	11
-399	val_399	2010-08-15	11
-396	val_396	2010-08-15	11
-247	val_247	2010-08-15	11
-417	val_417	2010-08-15	11
-489	val_489	2010-08-15	11
-162	val_162	2010-08-15	11
-377	val_377	2010-08-15	11
-397	val_397	2010-08-15	11
-309	val_309	2010-08-15	11
-365	val_365	2010-08-15	11
-266	val_266	2010-08-15	11
-439	val_439	2010-08-15	11
-342	val_342	2010-08-15	11
-367	val_367	2010-08-15	11
-325	val_325	2010-08-15	11
-167	val_167	2010-08-15	11
-195	val_195	2010-08-15	11
-475	val_475	2010-08-15	11
-17	val_17	2010-08-15	11
-113	val_113	2010-08-15	11
-155	val_155	2010-08-15	11
-203	val_203	2010-08-15	11
-339	val_339	2010-08-15	11
-0	val_0	2010-08-15	11
-455	val_455	2010-08-15	11
-128	val_128	2010-08-15	11
-311	val_311	2010-08-15	11
-316	val_316	2010-08-15	11
-57	val_57	2010-08-15	11
-302	val_302	2010-08-15	11
-205	val_205	2010-08-15	11
-149	val_149	2010-08-15	11
-438	val_438	2010-08-15	11
-345	val_345	2010-08-15	11
-129	val_129	2010-08-15	11
-170	val_170	2010-08-15	11
-20	val_20	2010-08-15	11
-489	val_489	2010-08-15	11
-157	val_157	2010-08-15	11
-378	val_378	2010-08-15	11
-221	val_221	2010-08-15	11
-92	val_92	2010-08-15	11
-111	val_111	2010-08-15	11
-47	val_47	2010-08-15	11
-72	val_72	2010-08-15	11
-4	val_4	2010-08-15	11
-280	val_280	2010-08-15	11
-35	val_35	2010-08-15	11
-427	val_427	2010-08-15	11
-277	val_277	2010-08-15	11
-208	val_208	2010-08-15	11
-356	val_356	2010-08-15	11
-399	val_399	2010-08-15	11
-169	val_169	2010-08-15	11
-382	val_382	2010-08-15	11
-498	val_498	2010-08-15	11
-125	val_125	2010-08-15	11
-386	val_386	2010-08-15	11
-437	val_437	2010-08-15	11
-469	val_469	2010-08-15	11
-192	val_192	2010-08-15	11
-286	val_286	2010-08-15	11
-187	val_187	2010-08-15	11
-176	val_176	2010-08-15	11
-54	val_54	2010-08-15	11
-459	val_459	2010-08-15	11
-51	val_51	2010-08-15	11
-138	val_138	2010-08-15	11
-103	val_103	2010-08-15	11
-239	val_239	2010-08-15	11
-213	val_213	2010-08-15	11
-216	val_216	2010-08-15	11
-430	val_430	2010-08-15	11
-278	val_278	2010-08-15	11
-176	val_176	2010-08-15	11
-289	val_289	2010-08-15	11
-221	val_221	2010-08-15	11
-65	val_65	2010-08-15	11
-318	val_318	2010-08-15	11
-332	val_332	2010-08-15	11
-311	val_311	2010-08-15	11
-275	val_275	2010-08-15	11
-137	val_137	2010-08-15	11
-241	val_241	2010-08-15	11
-83	val_83	2010-08-15	11
-333	val_333	2010-08-15	11
-180	val_180	2010-08-15	11
-284	val_284	2010-08-15	11
-12	val_12	2010-08-15	11
-230	val_230	2010-08-15	11
-181	val_181	2010-08-15	11
-67	val_67	2010-08-15	11
-260	val_260	2010-08-15	11
-404	val_404	2010-08-15	11
-384	val_384	2010-08-15	11
-489	val_489	2010-08-15	11
-353	val_353	2010-08-15	11
-373	val_373	2010-08-15	11
-272	val_272	2010-08-15	11
-138	val_138	2010-08-15	11
-217	val_217	2010-08-15	11
-84	val_84	2010-08-15	11
-348	val_348	2010-08-15	11
-466	val_466	2010-08-15	11
-58	val_58	2010-08-15	11
-8	val_8	2010-08-15	11
-411	val_411	2010-08-15	11
-230	val_230	2010-08-15	11
-208	val_208	2010-08-15	11
-348	val_348	2010-08-15	11
-24	val_24	2010-08-15	11
-463	val_463	2010-08-15	11
-431	val_431	2010-08-15	11
-179	val_179	2010-08-15	11
-172	val_172	2010-08-15	11
-42	val_42	2010-08-15	11
-129	val_129	2010-08-15	11
-158	val_158	2010-08-15	11
-119	val_119	2010-08-15	11
-496	val_496	2010-08-15	11
-0	val_0	2010-08-15	11
-322	val_322	2010-08-15	11
-197	val_197	2010-08-15	11
-468	val_468	2010-08-15	11
-393	val_393	2010-08-15	11
-454	val_454	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-199	val_199	2010-08-15	11
-191	val_191	2010-08-15	11
-418	val_418	2010-08-15	11
-96	val_96	2010-08-15	11
-26	val_26	2010-08-15	11
-165	val_165	2010-08-15	11
-327	val_327	2010-08-15	11
-230	val_230	2010-08-15	11
-205	val_205	2010-08-15	11
-120	val_120	2010-08-15	11
-131	val_131	2010-08-15	11
-51	val_51	2010-08-15	11
-404	val_404	2010-08-15	11
-43	val_43	2010-08-15	11
-436	val_436	2010-08-15	11
-156	val_156	2010-08-15	11
-469	val_469	2010-08-15	11
-468	val_468	2010-08-15	11
-308	val_308	2010-08-15	11
-95	val_95	2010-08-15	11
-196	val_196	2010-08-15	11
-288	val_288	2010-08-15	11
-481	val_481	2010-08-15	11
-457	val_457	2010-08-15	11
-98	val_98	2010-08-15	11
-282	val_282	2010-08-15	11
-197	val_197	2010-08-15	11
-187	val_187	2010-08-15	11
-318	val_318	2010-08-15	11
-318	val_318	2010-08-15	11
-409	val_409	2010-08-15	11
-470	val_470	2010-08-15	11
-137	val_137	2010-08-15	11
-369	val_369	2010-08-15	11
-316	val_316	2010-08-15	11
-169	val_169	2010-08-15	11
-413	val_413	2010-08-15	11
-85	val_85	2010-08-15	11
-77	val_77	2010-08-15	11
-0	val_0	2010-08-15	11
-490	val_490	2010-08-15	11
-87	val_87	2010-08-15	11
-364	val_364	2010-08-15	11
-179	val_179	2010-08-15	11
-118	val_118	2010-08-15	11
-134	val_134	2010-08-15	11
-395	val_395	2010-08-15	11
-282	val_282	2010-08-15	11
-138	val_138	2010-08-15	11
-238	val_238	2010-08-15	11
-419	val_419	2010-08-15	11
-15	val_15	2010-08-15	11
-118	val_118	2010-08-15	11
-72	val_72	2010-08-15	11
-90	val_90	2010-08-15	11
-307	val_307	2010-08-15	11
-19	val_19	2010-08-15	11
-435	val_435	2010-08-15	11
-10	val_10	2010-08-15	11
-277	val_277	2010-08-15	11
-273	val_273	2010-08-15	11
-306	val_306	2010-08-15	11
-224	val_224	2010-08-15	11
-309	val_309	2010-08-15	11
-389	val_389	2010-08-15	11
-327	val_327	2010-08-15	11
-242	val_242	2010-08-15	11
-369	val_369	2010-08-15	11
-392	val_392	2010-08-15	11
-272	val_272	2010-08-15	11
-331	val_331	2010-08-15	11
-401	val_401	2010-08-15	11
-242	val_242	2010-08-15	11
-452	val_452	2010-08-15	11
-177	val_177	2010-08-15	11
-226	val_226	2010-08-15	11
-5	val_5	2010-08-15	11
-497	val_497	2010-08-15	11
-402	val_402	2010-08-15	11
-396	val_396	2010-08-15	11
-317	val_317	2010-08-15	11
-395	val_395	2010-08-15	11
-58	val_58	2010-08-15	11
-35	val_35	2010-08-15	11
-336	val_336	2010-08-15	11
-95	val_95	2010-08-15	11
-11	val_11	2010-08-15	11
-168	val_168	2010-08-15	11
-34	val_34	2010-08-15	11
-229	val_229	2010-08-15	11
-233	val_233	2010-08-15	11
-143	val_143	2010-08-15	11
-472	val_472	2010-08-15	11
-322	val_322	2010-08-15	11
-498	val_498	2010-08-15	11
-160	val_160	2010-08-15	11
-195	val_195	2010-08-15	11
-42	val_42	2010-08-15	11
-321	val_321	2010-08-15	11
-430	val_430	2010-08-15	11
-119	val_119	2010-08-15	11
-489	val_489	2010-08-15	11
-458	val_458	2010-08-15	11
-78	val_78	2010-08-15	11
-76	val_76	2010-08-15	11
-41	val_41	2010-08-15	11
-223	val_223	2010-08-15	11
-492	val_492	2010-08-15	11
-149	val_149	2010-08-15	11
-449	val_449	2010-08-15	11
-218	val_218	2010-08-15	11
-228	val_228	2010-08-15	11
-138	val_138	2010-08-15	11
-453	val_453	2010-08-15	11
-30	val_30	2010-08-15	11
-209	val_209	2010-08-15	11
-64	val_64	2010-08-15	11
-468	val_468	2010-08-15	11
-76	val_76	2010-08-15	11
-74	val_74	2010-08-15	11
-342	val_342	2010-08-15	11
-69	val_69	2010-08-15	11
-230	val_230	2010-08-15	11
-33	val_33	2010-08-15	11
-368	val_368	2010-08-15	11
-103	val_103	2010-08-15	11
-296	val_296	2010-08-15	11
-113	val_113	2010-08-15	11
-216	val_216	2010-08-15	11
-367	val_367	2010-08-15	11
-344	val_344	2010-08-15	11
-167	val_167	2010-08-15	11
-274	val_274	2010-08-15	11
-219	val_219	2010-08-15	11
-239	val_239	2010-08-15	11
-485	val_485	2010-08-15	11
-116	val_116	2010-08-15	11
-223	val_223	2010-08-15	11
-256	val_256	2010-08-15	11
-263	val_263	2010-08-15	11
-70	val_70	2010-08-15	11
-487	val_487	2010-08-15	11
-480	val_480	2010-08-15	11
-401	val_401	2010-08-15	11
-288	val_288	2010-08-15	11
-191	val_191	2010-08-15	11
-5	val_5	2010-08-15	11
-244	val_244	2010-08-15	11
-438	val_438	2010-08-15	11
-128	val_128	2010-08-15	11
-467	val_467	2010-08-15	11
-432	val_432	2010-08-15	11
-202	val_202	2010-08-15	11
-316	val_316	2010-08-15	11
-229	val_229	2010-08-15	11
-469	val_469	2010-08-15	11
-463	val_463	2010-08-15	11
-280	val_280	2010-08-15	11
-2	val_2	2010-08-15	11
-35	val_35	2010-08-15	11
-283	val_283	2010-08-15	11
-331	val_331	2010-08-15	11
-235	val_235	2010-08-15	11
-80	val_80	2010-08-15	11
-44	val_44	2010-08-15	11
-193	val_193	2010-08-15	11
-321	val_321	2010-08-15	11
-335	val_335	2010-08-15	11
-104	val_104	2010-08-15	11
-466	val_466	2010-08-15	11
-366	val_366	2010-08-15	11
-175	val_175	2010-08-15	11
-403	val_403	2010-08-15	11
-483	val_483	2010-08-15	11
-53	val_53	2010-08-15	11
-105	val_105	2010-08-15	11
-257	val_257	2010-08-15	11
-406	val_406	2010-08-15	11
-409	val_409	2010-08-15	11
-190	val_190	2010-08-15	11
-406	val_406	2010-08-15	11
-401	val_401	2010-08-15	11
-114	val_114	2010-08-15	11
-258	val_258	2010-08-15	11
-90	val_90	2010-08-15	11
-203	val_203	2010-08-15	11
-262	val_262	2010-08-15	11
-348	val_348	2010-08-15	11
-424	val_424	2010-08-15	11
-12	val_12	2010-08-15	11
-396	val_396	2010-08-15	11
-201	val_201	2010-08-15	11
-217	val_217	2010-08-15	11
-164	val_164	2010-08-15	11
-431	val_431	2010-08-15	11
-454	val_454	2010-08-15	11
-478	val_478	2010-08-15	11
-298	val_298	2010-08-15	11
-125	val_125	2010-08-15	11
-431	val_431	2010-08-15	11
-164	val_164	2010-08-15	11
-424	val_424	2010-08-15	11
-187	val_187	2010-08-15	11
-382	val_382	2010-08-15	11
-5	val_5	2010-08-15	11
-70	val_70	2010-08-15	11
-397	val_397	2010-08-15	11
-480	val_480	2010-08-15	11
-291	val_291	2010-08-15	11
-24	val_24	2010-08-15	11
-351	val_351	2010-08-15	11
-255	val_255	2010-08-15	11
-104	val_104	2010-08-15	11
-70	val_70	2010-08-15	11
-163	val_163	2010-08-15	11
-438	val_438	2010-08-15	11
-119	val_119	2010-08-15	11
-414	val_414	2010-08-15	11
-200	val_200	2010-08-15	11
-491	val_491	2010-08-15	11
-237	val_237	2010-08-15	11
-439	val_439	2010-08-15	11
-360	val_360	2010-08-15	11
-248	val_248	2010-08-15	11
-479	val_479	2010-08-15	11
-305	val_305	2010-08-15	11
-417	val_417	2010-08-15	11
-199	val_199	2010-08-15	11
-444	val_444	2010-08-15	11
-120	val_120	2010-08-15	11
-429	val_429	2010-08-15	11
-169	val_169	2010-08-15	11
-443	val_443	2010-08-15	11
-323	val_323	2010-08-15	11
-325	val_325	2010-08-15	11
-277	val_277	2010-08-15	11
-230	val_230	2010-08-15	11
-478	val_478	2010-08-15	11
-178	val_178	2010-08-15	11
-468	val_468	2010-08-15	11
-310	val_310	2010-08-15	11
-317	val_317	2010-08-15	11
-333	val_333	2010-08-15	11
-493	val_493	2010-08-15	11
-460	val_460	2010-08-15	11
-207	val_207	2010-08-15	11
-249	val_249	2010-08-15	11
-265	val_265	2010-08-15	11
-480	val_480	2010-08-15	11
-83	val_83	2010-08-15	11
-136	val_136	2010-08-15	11
-353	val_353	2010-08-15	11
-172	val_172	2010-08-15	11
-214	val_214	2010-08-15	11
-462	val_462	2010-08-15	11
-233	val_233	2010-08-15	11
-406	val_406	2010-08-15	11
-133	val_133	2010-08-15	11
-175	val_175	2010-08-15	11
-189	val_189	2010-08-15	11
-454	val_454	2010-08-15	11
-375	val_375	2010-08-15	11
-401	val_401	2010-08-15	11
-421	val_421	2010-08-15	11
-407	val_407	2010-08-15	11
-384	val_384	2010-08-15	11
-256	val_256	2010-08-15	11
-26	val_26	2010-08-15	11
-134	val_134	2010-08-15	11
-67	val_67	2010-08-15	11
-384	val_384	2010-08-15	11
-379	val_379	2010-08-15	11
-18	val_18	2010-08-15	11
-462	val_462	2010-08-15	11
-492	val_492	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-9	val_9	2010-08-15	11
-341	val_341	2010-08-15	11
-498	val_498	2010-08-15	11
-146	val_146	2010-08-15	11
-458	val_458	2010-08-15	11
-362	val_362	2010-08-15	11
-186	val_186	2010-08-15	11
-285	val_285	2010-08-15	11
-348	val_348	2010-08-15	11
-167	val_167	2010-08-15	11
-18	val_18	2010-08-15	11
-273	val_273	2010-08-15	11
-183	val_183	2010-08-15	11
-281	val_281	2010-08-15	11
-344	val_344	2010-08-15	11
-97	val_97	2010-08-15	11
-469	val_469	2010-08-15	11
-315	val_315	2010-08-15	11
-84	val_84	2010-08-15	11
-28	val_28	2010-08-15	11
-37	val_37	2010-08-15	11
-448	val_448	2010-08-15	11
-152	val_152	2010-08-15	11
-348	val_348	2010-08-15	11
-307	val_307	2010-08-15	11
-194	val_194	2010-08-15	11
-414	val_414	2010-08-15	11
-477	val_477	2010-08-15	11
-222	val_222	2010-08-15	11
-126	val_126	2010-08-15	11
-90	val_90	2010-08-15	11
-169	val_169	2010-08-15	11
-403	val_403	2010-08-15	11
-400	val_400	2010-08-15	11
-200	val_200	2010-08-15	11
-97	val_97	2010-08-15	11
-238	val_238	2010-08-15	12
-86	val_86	2010-08-15	12
-311	val_311	2010-08-15	12
-27	val_27	2010-08-15	12
-165	val_165	2010-08-15	12
-409	val_409	2010-08-15	12
-255	val_255	2010-08-15	12
-278	val_278	2010-08-15	12
-98	val_98	2010-08-15	12
-484	val_484	2010-08-15	12
-265	val_265	2010-08-15	12
-193	val_193	2010-08-15	12
-401	val_401	2010-08-15	12
-150	val_150	2010-08-15	12
-273	val_273	2010-08-15	12
-224	val_224	2010-08-15	12
-369	val_369	2010-08-15	12
-66	val_66	2010-08-15	12
-128	val_128	2010-08-15	12
-213	val_213	2010-08-15	12
-146	val_146	2010-08-15	12
-406	val_406	2010-08-15	12
-429	val_429	2010-08-15	12
-374	val_374	2010-08-15	12
-152	val_152	2010-08-15	12
-469	val_469	2010-08-15	12
-145	val_145	2010-08-15	12
-495	val_495	2010-08-15	12
-37	val_37	2010-08-15	12
-327	val_327	2010-08-15	12
-281	val_281	2010-08-15	12
-277	val_277	2010-08-15	12
-209	val_209	2010-08-15	12
-15	val_15	2010-08-15	12
-82	val_82	2010-08-15	12
-403	val_403	2010-08-15	12
-166	val_166	2010-08-15	12
-417	val_417	2010-08-15	12
-430	val_430	2010-08-15	12
-252	val_252	2010-08-15	12
-292	val_292	2010-08-15	12
-219	val_219	2010-08-15	12
-287	val_287	2010-08-15	12
-153	val_153	2010-08-15	12
-193	val_193	2010-08-15	12
-338	val_338	2010-08-15	12
-446	val_446	2010-08-15	12
-459	val_459	2010-08-15	12
-394	val_394	2010-08-15	12
-237	val_237	2010-08-15	12
-482	val_482	2010-08-15	12
-174	val_174	2010-08-15	12
-413	val_413	2010-08-15	12
-494	val_494	2010-08-15	12
-207	val_207	2010-08-15	12
-199	val_199	2010-08-15	12
-466	val_466	2010-08-15	12
-208	val_208	2010-08-15	12
-174	val_174	2010-08-15	12
-399	val_399	2010-08-15	12
-396	val_396	2010-08-15	12
-247	val_247	2010-08-15	12
-417	val_417	2010-08-15	12
-489	val_489	2010-08-15	12
-162	val_162	2010-08-15	12
-377	val_377	2010-08-15	12
-397	val_397	2010-08-15	12
-309	val_309	2010-08-15	12
-365	val_365	2010-08-15	12
-266	val_266	2010-08-15	12
-439	val_439	2010-08-15	12
-342	val_342	2010-08-15	12
-367	val_367	2010-08-15	12
-325	val_325	2010-08-15	12
-167	val_167	2010-08-15	12
-195	val_195	2010-08-15	12
-475	val_475	2010-08-15	12
-17	val_17	2010-08-15	12
-113	val_113	2010-08-15	12
-155	val_155	2010-08-15	12
-203	val_203	2010-08-15	12
-339	val_339	2010-08-15	12
-0	val_0	2010-08-15	12
-455	val_455	2010-08-15	12
-128	val_128	2010-08-15	12
-311	val_311	2010-08-15	12
-316	val_316	2010-08-15	12
-57	val_57	2010-08-15	12
-302	val_302	2010-08-15	12
-205	val_205	2010-08-15	12
-149	val_149	2010-08-15	12
-438	val_438	2010-08-15	12
-345	val_345	2010-08-15	12
-129	val_129	2010-08-15	12
-170	val_170	2010-08-15	12
-20	val_20	2010-08-15	12
-489	val_489	2010-08-15	12
-157	val_157	2010-08-15	12
-378	val_378	2010-08-15	12
-221	val_221	2010-08-15	12
-92	val_92	2010-08-15	12
-111	val_111	2010-08-15	12
-47	val_47	2010-08-15	12
-72	val_72	2010-08-15	12
-4	val_4	2010-08-15	12
-280	val_280	2010-08-15	12
-35	val_35	2010-08-15	12
-427	val_427	2010-08-15	12
-277	val_277	2010-08-15	12
-208	val_208	2010-08-15	12
-356	val_356	2010-08-15	12
-399	val_399	2010-08-15	12
-169	val_169	2010-08-15	12
-382	val_382	2010-08-15	12
-498	val_498	2010-08-15	12
-125	val_125	2010-08-15	12
-386	val_386	2010-08-15	12
-437	val_437	2010-08-15	12
-469	val_469	2010-08-15	12
-192	val_192	2010-08-15	12
-286	val_286	2010-08-15	12
-187	val_187	2010-08-15	12
-176	val_176	2010-08-15	12
-54	val_54	2010-08-15	12
-459	val_459	2010-08-15	12
-51	val_51	2010-08-15	12
-138	val_138	2010-08-15	12
-103	val_103	2010-08-15	12
-239	val_239	2010-08-15	12
-213	val_213	2010-08-15	12
-216	val_216	2010-08-15	12
-430	val_430	2010-08-15	12
-278	val_278	2010-08-15	12
-176	val_176	2010-08-15	12
-289	val_289	2010-08-15	12
-221	val_221	2010-08-15	12
-65	val_65	2010-08-15	12
-318	val_318	2010-08-15	12
-332	val_332	2010-08-15	12
-311	val_311	2010-08-15	12
-275	val_275	2010-08-15	12
-137	val_137	2010-08-15	12
-241	val_241	2010-08-15	12
-83	val_83	2010-08-15	12
-333	val_333	2010-08-15	12
-180	val_180	2010-08-15	12
-284	val_284	2010-08-15	12
-12	val_12	2010-08-15	12
-230	val_230	2010-08-15	12
-181	val_181	2010-08-15	12
-67	val_67	2010-08-15	12
-260	val_260	2010-08-15	12
-404	val_404	2010-08-15	12
-384	val_384	2010-08-15	12
-489	val_489	2010-08-15	12
-353	val_353	2010-08-15	12
-373	val_373	2010-08-15	12
-272	val_272	2010-08-15	12
-138	val_138	2010-08-15	12
-217	val_217	2010-08-15	12
-84	val_84	2010-08-15	12
-348	val_348	2010-08-15	12
-466	val_466	2010-08-15	12
-58	val_58	2010-08-15	12
-8	val_8	2010-08-15	12
-411	val_411	2010-08-15	12
-230	val_230	2010-08-15	12
-208	val_208	2010-08-15	12
-348	val_348	2010-08-15	12
-24	val_24	2010-08-15	12
-463	val_463	2010-08-15	12
-431	val_431	2010-08-15	12
-179	val_179	2010-08-15	12
-172	val_172	2010-08-15	12
-42	val_42	2010-08-15	12
-129	val_129	2010-08-15	12
-158	val_158	2010-08-15	12
-119	val_119	2010-08-15	12
-496	val_496	2010-08-15	12
-0	val_0	2010-08-15	12
-322	val_322	2010-08-15	12
-197	val_197	2010-08-15	12
-468	val_468	2010-08-15	12
-393	val_393	2010-08-15	12
-454	val_454	2010-08-15	12
-100	val_100	2010-08-15	12
-298	val_298	2010-08-15	12
-199	val_199	2010-08-15	12
-191	val_191	2010-08-15	12
-418	val_418	2010-08-15	12
-96	val_96	2010-08-15	12
-26	val_26	2010-08-15	12
-165	val_165	2010-08-15	12
-327	val_327	2010-08-15	12
-230	val_230	2010-08-15	12
-205	val_205	2010-08-15	12
-120	val_120	2010-08-15	12
-131	val_131	2010-08-15	12
-51	val_51	2010-08-15	12
-404	val_404	2010-08-15	12
-43	val_43	2010-08-15	12
-436	val_436	2010-08-15	12
-156	val_156	2010-08-15	12
-469	val_469	2010-08-15	12
-468	val_468	2010-08-15	12
-308	val_308	2010-08-15	12
-95	val_95	2010-08-15	12
-196	val_196	2010-08-15	12
-288	val_288	2010-08-15	12
-481	val_481	2010-08-15	12
-457	val_457	2010-08-15	12
-98	val_98	2010-08-15	12
-282	val_282	2010-08-15	12
-197	val_197	2010-08-15	12
-187	val_187	2010-08-15	12
-318	val_318	2010-08-15	12
-318	val_318	2010-08-15	12
-409	val_409	2010-08-15	12
-470	val_470	2010-08-15	12
-137	val_137	2010-08-15	12
-369	val_369	2010-08-15	12
-316	val_316	2010-08-15	12
-169	val_169	2010-08-15	12
-413	val_413	2010-08-15	12
-85	val_85	2010-08-15	12
-77	val_77	2010-08-15	12
-0	val_0	2010-08-15	12
-490	val_490	2010-08-15	12
-87	val_87	2010-08-15	12
-364	val_364	2010-08-15	12
-179	val_179	2010-08-15	12
-118	val_118	2010-08-15	12
-134	val_134	2010-08-15	12
-395	val_395	2010-08-15	12
-282	val_282	2010-08-15	12
-138	val_138	2010-08-15	12
-238	val_238	2010-08-15	12
-419	val_419	2010-08-15	12
-15	val_15	2010-08-15	12
-118	val_118	2010-08-15	12
-72	val_72	2010-08-15	12
-90	val_90	2010-08-15	12
-307	val_307	2010-08-15	12
-19	val_19	2010-08-15	12
-435	val_435	2010-08-15	12
-10	val_10	2010-08-15	12
-277	val_277	2010-08-15	12
-273	val_273	2010-08-15	12
-306	val_306	2010-08-15	12
-224	val_224	2010-08-15	12
-309	val_309	2010-08-15	12
-389	val_389	2010-08-15	12
-327	val_327	2010-08-15	12
-242	val_242	2010-08-15	12
-369	val_369	2010-08-15	12
-392	val_392	2010-08-15	12
-272	val_272	2010-08-15	12
-331	val_331	2010-08-15	12
-401	val_401	2010-08-15	12
-242	val_242	2010-08-15	12
-452	val_452	2010-08-15	12
-177	val_177	2010-08-15	12
-226	val_226	2010-08-15	12
-5	val_5	2010-08-15	12
-497	val_497	2010-08-15	12
-402	val_402	2010-08-15	12
-396	val_396	2010-08-15	12
-317	val_317	2010-08-15	12
-395	val_395	2010-08-15	12
-58	val_58	2010-08-15	12
-35	val_35	2010-08-15	12
-336	val_336	2010-08-15	12
-95	val_95	2010-08-15	12
-11	val_11	2010-08-15	12
-168	val_168	2010-08-15	12
-34	val_34	2010-08-15	12
-229	val_229	2010-08-15	12
-233	val_233	2010-08-15	12
-143	val_143	2010-08-15	12
-472	val_472	2010-08-15	12
-322	val_322	2010-08-15	12
-498	val_498	2010-08-15	12
-160	val_160	2010-08-15	12
-195	val_195	2010-08-15	12
-42	val_42	2010-08-15	12
-321	val_321	2010-08-15	12
-430	val_430	2010-08-15	12
-119	val_119	2010-08-15	12
-489	val_489	2010-08-15	12
-458	val_458	2010-08-15	12
-78	val_78	2010-08-15	12
-76	val_76	2010-08-15	12
-41	val_41	2010-08-15	12
-223	val_223	2010-08-15	12
-492	val_492	2010-08-15	12
-149	val_149	2010-08-15	12
-449	val_449	2010-08-15	12
-218	val_218	2010-08-15	12
-228	val_228	2010-08-15	12
-138	val_138	2010-08-15	12
-453	val_453	2010-08-15	12
-30	val_30	2010-08-15	12
-209	val_209	2010-08-15	12
-64	val_64	2010-08-15	12
-468	val_468	2010-08-15	12
-76	val_76	2010-08-15	12
-74	val_74	2010-08-15	12
-342	val_342	2010-08-15	12
-69	val_69	2010-08-15	12
-230	val_230	2010-08-15	12
-33	val_33	2010-08-15	12
-368	val_368	2010-08-15	12
-103	val_103	2010-08-15	12
-296	val_296	2010-08-15	12
-113	val_113	2010-08-15	12
-216	val_216	2010-08-15	12
-367	val_367	2010-08-15	12
-344	val_344	2010-08-15	12
-167	val_167	2010-08-15	12
-274	val_274	2010-08-15	12
-219	val_219	2010-08-15	12
-239	val_239	2010-08-15	12
-485	val_485	2010-08-15	12
-116	val_116	2010-08-15	12
-223	val_223	2010-08-15	12
-256	val_256	2010-08-15	12
-263	val_263	2010-08-15	12
-70	val_70	2010-08-15	12
-487	val_487	2010-08-15	12
-480	val_480	2010-08-15	12
-401	val_401	2010-08-15	12
-288	val_288	2010-08-15	12
-191	val_191	2010-08-15	12
-5	val_5	2010-08-15	12
-244	val_244	2010-08-15	12
-438	val_438	2010-08-15	12
-128	val_128	2010-08-15	12
-467	val_467	2010-08-15	12
-432	val_432	2010-08-15	12
-202	val_202	2010-08-15	12
-316	val_316	2010-08-15	12
-229	val_229	2010-08-15	12
-469	val_469	2010-08-15	12
-463	val_463	2010-08-15	12
-280	val_280	2010-08-15	12
-2	val_2	2010-08-15	12
-35	val_35	2010-08-15	12
-283	val_283	2010-08-15	12
-331	val_331	2010-08-15	12
-235	val_235	2010-08-15	12
-80	val_80	2010-08-15	12
-44	val_44	2010-08-15	12
-193	val_193	2010-08-15	12
-321	val_321	2010-08-15	12
-335	val_335	2010-08-15	12
-104	val_104	2010-08-15	12
-466	val_466	2010-08-15	12
-366	val_366	2010-08-15	12
-175	val_175	2010-08-15	12
-403	val_403	2010-08-15	12
-483	val_483	2010-08-15	12
-53	val_53	2010-08-15	12
-105	val_105	2010-08-15	12
-257	val_257	2010-08-15	12
-406	val_406	2010-08-15	12
-409	val_409	2010-08-15	12
-190	val_190	2010-08-15	12
-406	val_406	2010-08-15	12
-401	val_401	2010-08-15	12
-114	val_114	2010-08-15	12
-258	val_258	2010-08-15	12
-90	val_90	2010-08-15	12
-203	val_203	2010-08-15	12
-262	val_262	2010-08-15	12
-348	val_348	2010-08-15	12
-424	val_424	2010-08-15	12
-12	val_12	2010-08-15	12
-396	val_396	2010-08-15	12
-201	val_201	2010-08-15	12
-217	val_217	2010-08-15	12
-164	val_164	2010-08-15	12
-431	val_431	2010-08-15	12
-454	val_454	2010-08-15	12
-478	val_478	2010-08-15	12
-298	val_298	2010-08-15	12
-125	val_125	2010-08-15	12
-431	val_431	2010-08-15	12
-164	val_164	2010-08-15	12
-424	val_424	2010-08-15	12
-187	val_187	2010-08-15	12
-382	val_382	2010-08-15	12
-5	val_5	2010-08-15	12
-70	val_70	2010-08-15	12
-397	val_397	2010-08-15	12
-480	val_480	2010-08-15	12
-291	val_291	2010-08-15	12
-24	val_24	2010-08-15	12
-351	val_351	2010-08-15	12
-255	val_255	2010-08-15	12
-104	val_104	2010-08-15	12
-70	val_70	2010-08-15	12
-163	val_163	2010-08-15	12
-438	val_438	2010-08-15	12
-119	val_119	2010-08-15	12
-414	val_414	2010-08-15	12
-200	val_200	2010-08-15	12
-491	val_491	2010-08-15	12
-237	val_237	2010-08-15	12
-439	val_439	2010-08-15	12
-360	val_360	2010-08-15	12
-248	val_248	2010-08-15	12
-479	val_479	2010-08-15	12
-305	val_305	2010-08-15	12
-417	val_417	2010-08-15	12
-199	val_199	2010-08-15	12
-444	val_444	2010-08-15	12
-120	val_120	2010-08-15	12
-429	val_429	2010-08-15	12
-169	val_169	2010-08-15	12
-443	val_443	2010-08-15	12
-323	val_323	2010-08-15	12
-325	val_325	2010-08-15	12
-277	val_277	2010-08-15	12
-230	val_230	2010-08-15	12
-478	val_478	2010-08-15	12
-178	val_178	2010-08-15	12
-468	val_468	2010-08-15	12
-310	val_310	2010-08-15	12
-317	val_317	2010-08-15	12
-333	val_333	2010-08-15	12
-493	val_493	2010-08-15	12
-460	val_460	2010-08-15	12
-207	val_207	2010-08-15	12
-249	val_249	2010-08-15	12
-265	val_265	2010-08-15	12
-480	val_480	2010-08-15	12
-83	val_83	2010-08-15	12
-136	val_136	2010-08-15	12
-353	val_353	2010-08-15	12
-172	val_172	2010-08-15	12
-214	val_214	2010-08-15	12
-462	val_462	2010-08-15	12
-233	val_233	2010-08-15	12
-406	val_406	2010-08-15	12
-133	val_133	2010-08-15	12
-175	val_175	2010-08-15	12
-189	val_189	2010-08-15	12
-454	val_454	2010-08-15	12
-375	val_375	2010-08-15	12
-401	val_401	2010-08-15	12
-421	val_421	2010-08-15	12
-407	val_407	2010-08-15	12
-384	val_384	2010-08-15	12
-256	val_256	2010-08-15	12
-26	val_26	2010-08-15	12
-134	val_134	2010-08-15	12
-67	val_67	2010-08-15	12
-384	val_384	2010-08-15	12
-379	val_379	2010-08-15	12
-18	val_18	2010-08-15	12
-462	val_462	2010-08-15	12
-492	val_492	2010-08-15	12
-100	val_100	2010-08-15	12
-298	val_298	2010-08-15	12
-9	val_9	2010-08-15	12
-341	val_341	2010-08-15	12
-498	val_498	2010-08-15	12
-146	val_146	2010-08-15	12
-458	val_458	2010-08-15	12
-362	val_362	2010-08-15	12
-186	val_186	2010-08-15	12
-285	val_285	2010-08-15	12
-348	val_348	2010-08-15	12
-167	val_167	2010-08-15	12
-18	val_18	2010-08-15	12
-273	val_273	2010-08-15	12
-183	val_183	2010-08-15	12
-281	val_281	2010-08-15	12
-344	val_344	2010-08-15	12
-97	val_97	2010-08-15	12
-469	val_469	2010-08-15	12
-315	val_315	2010-08-15	12
-84	val_84	2010-08-15	12
-28	val_28	2010-08-15	12
-37	val_37	2010-08-15	12
-448	val_448	2010-08-15	12
-152	val_152	2010-08-15	12
-348	val_348	2010-08-15	12
-307	val_307	2010-08-15	12
-194	val_194	2010-08-15	12
-414	val_414	2010-08-15	12
-477	val_477	2010-08-15	12
-222	val_222	2010-08-15	12
-126	val_126	2010-08-15	12
-90	val_90	2010-08-15	12
-169	val_169	2010-08-15	12
-403	val_403	2010-08-15	12
-400	val_400	2010-08-15	12
-200	val_200	2010-08-15	12
-97	val_97	2010-08-15	12
diff --git a/sql/hive/src/test/resources/golden/merge4-12-62541540a18d68a3cb8497a741061d11 b/sql/hive/src/test/resources/golden/merge4-12-62541540a18d68a3cb8497a741061d11
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-13-ed1103f06609365b40e78d13c654cc71 b/sql/hive/src/test/resources/golden/merge4-13-ed1103f06609365b40e78d13c654cc71
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-14-ba5dbcd0527b8ddab284bc322255bfc7 b/sql/hive/src/test/resources/golden/merge4-14-ba5dbcd0527b8ddab284bc322255bfc7
deleted file mode 100644
index 30becc42d7b5a..0000000000000
--- a/sql/hive/src/test/resources/golden/merge4-14-ba5dbcd0527b8ddab284bc322255bfc7
+++ /dev/null
@@ -1,3 +0,0 @@
-ds=2010-08-15/hr=11
-ds=2010-08-15/hr=12
-ds=2010-08-15/hr=file,
diff --git a/sql/hive/src/test/resources/golden/merge4-15-68f50dc2ad6ff803a372bdd88dd8e19a b/sql/hive/src/test/resources/golden/merge4-15-68f50dc2ad6ff803a372bdd88dd8e19a
deleted file mode 100644
index 4c867a5deff08..0000000000000
--- a/sql/hive/src/test/resources/golden/merge4-15-68f50dc2ad6ff803a372bdd88dd8e19a
+++ /dev/null
@@ -1 +0,0 @@
-1	1	2010-08-15	file,
diff --git a/sql/hive/src/test/resources/golden/merge4-5-3d24d877366c42030f6d9a596665720d b/sql/hive/src/test/resources/golden/merge4-5-3d24d877366c42030f6d9a596665720d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-6-b3a76420183795720ab3a384046e5af b/sql/hive/src/test/resources/golden/merge4-6-b3a76420183795720ab3a384046e5af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-7-631a45828eae3f5f562d992efe4cd56d b/sql/hive/src/test/resources/golden/merge4-7-631a45828eae3f5f562d992efe4cd56d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-8-ca0336ac3f600cb8b4230d9904686868 b/sql/hive/src/test/resources/golden/merge4-8-ca0336ac3f600cb8b4230d9904686868
deleted file mode 100644
index 9feba1dea5fd8..0000000000000
--- a/sql/hive/src/test/resources/golden/merge4-8-ca0336ac3f600cb8b4230d9904686868
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2010-08-15	11
-86	val_86	2010-08-15	11
-311	val_311	2010-08-15	11
-27	val_27	2010-08-15	11
-165	val_165	2010-08-15	11
-409	val_409	2010-08-15	11
-255	val_255	2010-08-15	11
-278	val_278	2010-08-15	11
-98	val_98	2010-08-15	11
-484	val_484	2010-08-15	11
-265	val_265	2010-08-15	11
-193	val_193	2010-08-15	11
-401	val_401	2010-08-15	11
-150	val_150	2010-08-15	11
-273	val_273	2010-08-15	11
-224	val_224	2010-08-15	11
-369	val_369	2010-08-15	11
-66	val_66	2010-08-15	11
-128	val_128	2010-08-15	11
-213	val_213	2010-08-15	11
-146	val_146	2010-08-15	11
-406	val_406	2010-08-15	11
-429	val_429	2010-08-15	11
-374	val_374	2010-08-15	11
-152	val_152	2010-08-15	11
-469	val_469	2010-08-15	11
-145	val_145	2010-08-15	11
-495	val_495	2010-08-15	11
-37	val_37	2010-08-15	11
-327	val_327	2010-08-15	11
-281	val_281	2010-08-15	11
-277	val_277	2010-08-15	11
-209	val_209	2010-08-15	11
-15	val_15	2010-08-15	11
-82	val_82	2010-08-15	11
-403	val_403	2010-08-15	11
-166	val_166	2010-08-15	11
-417	val_417	2010-08-15	11
-430	val_430	2010-08-15	11
-252	val_252	2010-08-15	11
-292	val_292	2010-08-15	11
-219	val_219	2010-08-15	11
-287	val_287	2010-08-15	11
-153	val_153	2010-08-15	11
-193	val_193	2010-08-15	11
-338	val_338	2010-08-15	11
-446	val_446	2010-08-15	11
-459	val_459	2010-08-15	11
-394	val_394	2010-08-15	11
-237	val_237	2010-08-15	11
-482	val_482	2010-08-15	11
-174	val_174	2010-08-15	11
-413	val_413	2010-08-15	11
-494	val_494	2010-08-15	11
-207	val_207	2010-08-15	11
-199	val_199	2010-08-15	11
-466	val_466	2010-08-15	11
-208	val_208	2010-08-15	11
-174	val_174	2010-08-15	11
-399	val_399	2010-08-15	11
-396	val_396	2010-08-15	11
-247	val_247	2010-08-15	11
-417	val_417	2010-08-15	11
-489	val_489	2010-08-15	11
-162	val_162	2010-08-15	11
-377	val_377	2010-08-15	11
-397	val_397	2010-08-15	11
-309	val_309	2010-08-15	11
-365	val_365	2010-08-15	11
-266	val_266	2010-08-15	11
-439	val_439	2010-08-15	11
-342	val_342	2010-08-15	11
-367	val_367	2010-08-15	11
-325	val_325	2010-08-15	11
-167	val_167	2010-08-15	11
-195	val_195	2010-08-15	11
-475	val_475	2010-08-15	11
-17	val_17	2010-08-15	11
-113	val_113	2010-08-15	11
-155	val_155	2010-08-15	11
-203	val_203	2010-08-15	11
-339	val_339	2010-08-15	11
-0	val_0	2010-08-15	11
-455	val_455	2010-08-15	11
-128	val_128	2010-08-15	11
-311	val_311	2010-08-15	11
-316	val_316	2010-08-15	11
-57	val_57	2010-08-15	11
-302	val_302	2010-08-15	11
-205	val_205	2010-08-15	11
-149	val_149	2010-08-15	11
-438	val_438	2010-08-15	11
-345	val_345	2010-08-15	11
-129	val_129	2010-08-15	11
-170	val_170	2010-08-15	11
-20	val_20	2010-08-15	11
-489	val_489	2010-08-15	11
-157	val_157	2010-08-15	11
-378	val_378	2010-08-15	11
-221	val_221	2010-08-15	11
-92	val_92	2010-08-15	11
-111	val_111	2010-08-15	11
-47	val_47	2010-08-15	11
-72	val_72	2010-08-15	11
-4	val_4	2010-08-15	11
-280	val_280	2010-08-15	11
-35	val_35	2010-08-15	11
-427	val_427	2010-08-15	11
-277	val_277	2010-08-15	11
-208	val_208	2010-08-15	11
-356	val_356	2010-08-15	11
-399	val_399	2010-08-15	11
-169	val_169	2010-08-15	11
-382	val_382	2010-08-15	11
-498	val_498	2010-08-15	11
-125	val_125	2010-08-15	11
-386	val_386	2010-08-15	11
-437	val_437	2010-08-15	11
-469	val_469	2010-08-15	11
-192	val_192	2010-08-15	11
-286	val_286	2010-08-15	11
-187	val_187	2010-08-15	11
-176	val_176	2010-08-15	11
-54	val_54	2010-08-15	11
-459	val_459	2010-08-15	11
-51	val_51	2010-08-15	11
-138	val_138	2010-08-15	11
-103	val_103	2010-08-15	11
-239	val_239	2010-08-15	11
-213	val_213	2010-08-15	11
-216	val_216	2010-08-15	11
-430	val_430	2010-08-15	11
-278	val_278	2010-08-15	11
-176	val_176	2010-08-15	11
-289	val_289	2010-08-15	11
-221	val_221	2010-08-15	11
-65	val_65	2010-08-15	11
-318	val_318	2010-08-15	11
-332	val_332	2010-08-15	11
-311	val_311	2010-08-15	11
-275	val_275	2010-08-15	11
-137	val_137	2010-08-15	11
-241	val_241	2010-08-15	11
-83	val_83	2010-08-15	11
-333	val_333	2010-08-15	11
-180	val_180	2010-08-15	11
-284	val_284	2010-08-15	11
-12	val_12	2010-08-15	11
-230	val_230	2010-08-15	11
-181	val_181	2010-08-15	11
-67	val_67	2010-08-15	11
-260	val_260	2010-08-15	11
-404	val_404	2010-08-15	11
-384	val_384	2010-08-15	11
-489	val_489	2010-08-15	11
-353	val_353	2010-08-15	11
-373	val_373	2010-08-15	11
-272	val_272	2010-08-15	11
-138	val_138	2010-08-15	11
-217	val_217	2010-08-15	11
-84	val_84	2010-08-15	11
-348	val_348	2010-08-15	11
-466	val_466	2010-08-15	11
-58	val_58	2010-08-15	11
-8	val_8	2010-08-15	11
-411	val_411	2010-08-15	11
-230	val_230	2010-08-15	11
-208	val_208	2010-08-15	11
-348	val_348	2010-08-15	11
-24	val_24	2010-08-15	11
-463	val_463	2010-08-15	11
-431	val_431	2010-08-15	11
-179	val_179	2010-08-15	11
-172	val_172	2010-08-15	11
-42	val_42	2010-08-15	11
-129	val_129	2010-08-15	11
-158	val_158	2010-08-15	11
-119	val_119	2010-08-15	11
-496	val_496	2010-08-15	11
-0	val_0	2010-08-15	11
-322	val_322	2010-08-15	11
-197	val_197	2010-08-15	11
-468	val_468	2010-08-15	11
-393	val_393	2010-08-15	11
-454	val_454	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-199	val_199	2010-08-15	11
-191	val_191	2010-08-15	11
-418	val_418	2010-08-15	11
-96	val_96	2010-08-15	11
-26	val_26	2010-08-15	11
-165	val_165	2010-08-15	11
-327	val_327	2010-08-15	11
-230	val_230	2010-08-15	11
-205	val_205	2010-08-15	11
-120	val_120	2010-08-15	11
-131	val_131	2010-08-15	11
-51	val_51	2010-08-15	11
-404	val_404	2010-08-15	11
-43	val_43	2010-08-15	11
-436	val_436	2010-08-15	11
-156	val_156	2010-08-15	11
-469	val_469	2010-08-15	11
-468	val_468	2010-08-15	11
-308	val_308	2010-08-15	11
-95	val_95	2010-08-15	11
-196	val_196	2010-08-15	11
-288	val_288	2010-08-15	11
-481	val_481	2010-08-15	11
-457	val_457	2010-08-15	11
-98	val_98	2010-08-15	11
-282	val_282	2010-08-15	11
-197	val_197	2010-08-15	11
-187	val_187	2010-08-15	11
-318	val_318	2010-08-15	11
-318	val_318	2010-08-15	11
-409	val_409	2010-08-15	11
-470	val_470	2010-08-15	11
-137	val_137	2010-08-15	11
-369	val_369	2010-08-15	11
-316	val_316	2010-08-15	11
-169	val_169	2010-08-15	11
-413	val_413	2010-08-15	11
-85	val_85	2010-08-15	11
-77	val_77	2010-08-15	11
-0	val_0	2010-08-15	11
-490	val_490	2010-08-15	11
-87	val_87	2010-08-15	11
-364	val_364	2010-08-15	11
-179	val_179	2010-08-15	11
-118	val_118	2010-08-15	11
-134	val_134	2010-08-15	11
-395	val_395	2010-08-15	11
-282	val_282	2010-08-15	11
-138	val_138	2010-08-15	11
-238	val_238	2010-08-15	11
-419	val_419	2010-08-15	11
-15	val_15	2010-08-15	11
-118	val_118	2010-08-15	11
-72	val_72	2010-08-15	11
-90	val_90	2010-08-15	11
-307	val_307	2010-08-15	11
-19	val_19	2010-08-15	11
-435	val_435	2010-08-15	11
-10	val_10	2010-08-15	11
-277	val_277	2010-08-15	11
-273	val_273	2010-08-15	11
-306	val_306	2010-08-15	11
-224	val_224	2010-08-15	11
-309	val_309	2010-08-15	11
-389	val_389	2010-08-15	11
-327	val_327	2010-08-15	11
-242	val_242	2010-08-15	11
-369	val_369	2010-08-15	11
-392	val_392	2010-08-15	11
-272	val_272	2010-08-15	11
-331	val_331	2010-08-15	11
-401	val_401	2010-08-15	11
-242	val_242	2010-08-15	11
-452	val_452	2010-08-15	11
-177	val_177	2010-08-15	11
-226	val_226	2010-08-15	11
-5	val_5	2010-08-15	11
-497	val_497	2010-08-15	11
-402	val_402	2010-08-15	11
-396	val_396	2010-08-15	11
-317	val_317	2010-08-15	11
-395	val_395	2010-08-15	11
-58	val_58	2010-08-15	11
-35	val_35	2010-08-15	11
-336	val_336	2010-08-15	11
-95	val_95	2010-08-15	11
-11	val_11	2010-08-15	11
-168	val_168	2010-08-15	11
-34	val_34	2010-08-15	11
-229	val_229	2010-08-15	11
-233	val_233	2010-08-15	11
-143	val_143	2010-08-15	11
-472	val_472	2010-08-15	11
-322	val_322	2010-08-15	11
-498	val_498	2010-08-15	11
-160	val_160	2010-08-15	11
-195	val_195	2010-08-15	11
-42	val_42	2010-08-15	11
-321	val_321	2010-08-15	11
-430	val_430	2010-08-15	11
-119	val_119	2010-08-15	11
-489	val_489	2010-08-15	11
-458	val_458	2010-08-15	11
-78	val_78	2010-08-15	11
-76	val_76	2010-08-15	11
-41	val_41	2010-08-15	11
-223	val_223	2010-08-15	11
-492	val_492	2010-08-15	11
-149	val_149	2010-08-15	11
-449	val_449	2010-08-15	11
-218	val_218	2010-08-15	11
-228	val_228	2010-08-15	11
-138	val_138	2010-08-15	11
-453	val_453	2010-08-15	11
-30	val_30	2010-08-15	11
-209	val_209	2010-08-15	11
-64	val_64	2010-08-15	11
-468	val_468	2010-08-15	11
-76	val_76	2010-08-15	11
-74	val_74	2010-08-15	11
-342	val_342	2010-08-15	11
-69	val_69	2010-08-15	11
-230	val_230	2010-08-15	11
-33	val_33	2010-08-15	11
-368	val_368	2010-08-15	11
-103	val_103	2010-08-15	11
-296	val_296	2010-08-15	11
-113	val_113	2010-08-15	11
-216	val_216	2010-08-15	11
-367	val_367	2010-08-15	11
-344	val_344	2010-08-15	11
-167	val_167	2010-08-15	11
-274	val_274	2010-08-15	11
-219	val_219	2010-08-15	11
-239	val_239	2010-08-15	11
-485	val_485	2010-08-15	11
-116	val_116	2010-08-15	11
-223	val_223	2010-08-15	11
-256	val_256	2010-08-15	11
-263	val_263	2010-08-15	11
-70	val_70	2010-08-15	11
-487	val_487	2010-08-15	11
-480	val_480	2010-08-15	11
-401	val_401	2010-08-15	11
-288	val_288	2010-08-15	11
-191	val_191	2010-08-15	11
-5	val_5	2010-08-15	11
-244	val_244	2010-08-15	11
-438	val_438	2010-08-15	11
-128	val_128	2010-08-15	11
-467	val_467	2010-08-15	11
-432	val_432	2010-08-15	11
-202	val_202	2010-08-15	11
-316	val_316	2010-08-15	11
-229	val_229	2010-08-15	11
-469	val_469	2010-08-15	11
-463	val_463	2010-08-15	11
-280	val_280	2010-08-15	11
-2	val_2	2010-08-15	11
-35	val_35	2010-08-15	11
-283	val_283	2010-08-15	11
-331	val_331	2010-08-15	11
-235	val_235	2010-08-15	11
-80	val_80	2010-08-15	11
-44	val_44	2010-08-15	11
-193	val_193	2010-08-15	11
-321	val_321	2010-08-15	11
-335	val_335	2010-08-15	11
-104	val_104	2010-08-15	11
-466	val_466	2010-08-15	11
-366	val_366	2010-08-15	11
-175	val_175	2010-08-15	11
-403	val_403	2010-08-15	11
-483	val_483	2010-08-15	11
-53	val_53	2010-08-15	11
-105	val_105	2010-08-15	11
-257	val_257	2010-08-15	11
-406	val_406	2010-08-15	11
-409	val_409	2010-08-15	11
-190	val_190	2010-08-15	11
-406	val_406	2010-08-15	11
-401	val_401	2010-08-15	11
-114	val_114	2010-08-15	11
-258	val_258	2010-08-15	11
-90	val_90	2010-08-15	11
-203	val_203	2010-08-15	11
-262	val_262	2010-08-15	11
-348	val_348	2010-08-15	11
-424	val_424	2010-08-15	11
-12	val_12	2010-08-15	11
-396	val_396	2010-08-15	11
-201	val_201	2010-08-15	11
-217	val_217	2010-08-15	11
-164	val_164	2010-08-15	11
-431	val_431	2010-08-15	11
-454	val_454	2010-08-15	11
-478	val_478	2010-08-15	11
-298	val_298	2010-08-15	11
-125	val_125	2010-08-15	11
-431	val_431	2010-08-15	11
-164	val_164	2010-08-15	11
-424	val_424	2010-08-15	11
-187	val_187	2010-08-15	11
-382	val_382	2010-08-15	11
-5	val_5	2010-08-15	11
-70	val_70	2010-08-15	11
-397	val_397	2010-08-15	11
-480	val_480	2010-08-15	11
-291	val_291	2010-08-15	11
-24	val_24	2010-08-15	11
-351	val_351	2010-08-15	11
-255	val_255	2010-08-15	11
-104	val_104	2010-08-15	11
-70	val_70	2010-08-15	11
-163	val_163	2010-08-15	11
-438	val_438	2010-08-15	11
-119	val_119	2010-08-15	11
-414	val_414	2010-08-15	11
-200	val_200	2010-08-15	11
-491	val_491	2010-08-15	11
-237	val_237	2010-08-15	11
-439	val_439	2010-08-15	11
-360	val_360	2010-08-15	11
-248	val_248	2010-08-15	11
-479	val_479	2010-08-15	11
-305	val_305	2010-08-15	11
-417	val_417	2010-08-15	11
-199	val_199	2010-08-15	11
-444	val_444	2010-08-15	11
-120	val_120	2010-08-15	11
-429	val_429	2010-08-15	11
-169	val_169	2010-08-15	11
-443	val_443	2010-08-15	11
-323	val_323	2010-08-15	11
-325	val_325	2010-08-15	11
-277	val_277	2010-08-15	11
-230	val_230	2010-08-15	11
-478	val_478	2010-08-15	11
-178	val_178	2010-08-15	11
-468	val_468	2010-08-15	11
-310	val_310	2010-08-15	11
-317	val_317	2010-08-15	11
-333	val_333	2010-08-15	11
-493	val_493	2010-08-15	11
-460	val_460	2010-08-15	11
-207	val_207	2010-08-15	11
-249	val_249	2010-08-15	11
-265	val_265	2010-08-15	11
-480	val_480	2010-08-15	11
-83	val_83	2010-08-15	11
-136	val_136	2010-08-15	11
-353	val_353	2010-08-15	11
-172	val_172	2010-08-15	11
-214	val_214	2010-08-15	11
-462	val_462	2010-08-15	11
-233	val_233	2010-08-15	11
-406	val_406	2010-08-15	11
-133	val_133	2010-08-15	11
-175	val_175	2010-08-15	11
-189	val_189	2010-08-15	11
-454	val_454	2010-08-15	11
-375	val_375	2010-08-15	11
-401	val_401	2010-08-15	11
-421	val_421	2010-08-15	11
-407	val_407	2010-08-15	11
-384	val_384	2010-08-15	11
-256	val_256	2010-08-15	11
-26	val_26	2010-08-15	11
-134	val_134	2010-08-15	11
-67	val_67	2010-08-15	11
-384	val_384	2010-08-15	11
-379	val_379	2010-08-15	11
-18	val_18	2010-08-15	11
-462	val_462	2010-08-15	11
-492	val_492	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-9	val_9	2010-08-15	11
-341	val_341	2010-08-15	11
-498	val_498	2010-08-15	11
-146	val_146	2010-08-15	11
-458	val_458	2010-08-15	11
-362	val_362	2010-08-15	11
-186	val_186	2010-08-15	11
-285	val_285	2010-08-15	11
-348	val_348	2010-08-15	11
-167	val_167	2010-08-15	11
-18	val_18	2010-08-15	11
-273	val_273	2010-08-15	11
-183	val_183	2010-08-15	11
-281	val_281	2010-08-15	11
-344	val_344	2010-08-15	11
-97	val_97	2010-08-15	11
-469	val_469	2010-08-15	11
-315	val_315	2010-08-15	11
-84	val_84	2010-08-15	11
-28	val_28	2010-08-15	11
-37	val_37	2010-08-15	11
-448	val_448	2010-08-15	11
-152	val_152	2010-08-15	11
-348	val_348	2010-08-15	11
-307	val_307	2010-08-15	11
-194	val_194	2010-08-15	11
-414	val_414	2010-08-15	11
-477	val_477	2010-08-15	11
-222	val_222	2010-08-15	11
-126	val_126	2010-08-15	11
-90	val_90	2010-08-15	11
-169	val_169	2010-08-15	11
-403	val_403	2010-08-15	11
-400	val_400	2010-08-15	11
-200	val_200	2010-08-15	11
-97	val_97	2010-08-15	11
-238	val_238	2010-08-15	12
-86	val_86	2010-08-15	12
-311	val_311	2010-08-15	12
-27	val_27	2010-08-15	12
-165	val_165	2010-08-15	12
-409	val_409	2010-08-15	12
-255	val_255	2010-08-15	12
-278	val_278	2010-08-15	12
-98	val_98	2010-08-15	12
-484	val_484	2010-08-15	12
-265	val_265	2010-08-15	12
-193	val_193	2010-08-15	12
-401	val_401	2010-08-15	12
-150	val_150	2010-08-15	12
-273	val_273	2010-08-15	12
-224	val_224	2010-08-15	12
-369	val_369	2010-08-15	12
-66	val_66	2010-08-15	12
-128	val_128	2010-08-15	12
-213	val_213	2010-08-15	12
-146	val_146	2010-08-15	12
-406	val_406	2010-08-15	12
-429	val_429	2010-08-15	12
-374	val_374	2010-08-15	12
-152	val_152	2010-08-15	12
-469	val_469	2010-08-15	12
-145	val_145	2010-08-15	12
-495	val_495	2010-08-15	12
-37	val_37	2010-08-15	12
-327	val_327	2010-08-15	12
-281	val_281	2010-08-15	12
-277	val_277	2010-08-15	12
-209	val_209	2010-08-15	12
-15	val_15	2010-08-15	12
-82	val_82	2010-08-15	12
-403	val_403	2010-08-15	12
-166	val_166	2010-08-15	12
-417	val_417	2010-08-15	12
-430	val_430	2010-08-15	12
-252	val_252	2010-08-15	12
-292	val_292	2010-08-15	12
-219	val_219	2010-08-15	12
-287	val_287	2010-08-15	12
-153	val_153	2010-08-15	12
-193	val_193	2010-08-15	12
-338	val_338	2010-08-15	12
-446	val_446	2010-08-15	12
-459	val_459	2010-08-15	12
-394	val_394	2010-08-15	12
-237	val_237	2010-08-15	12
-482	val_482	2010-08-15	12
-174	val_174	2010-08-15	12
-413	val_413	2010-08-15	12
-494	val_494	2010-08-15	12
-207	val_207	2010-08-15	12
-199	val_199	2010-08-15	12
-466	val_466	2010-08-15	12
-208	val_208	2010-08-15	12
-174	val_174	2010-08-15	12
-399	val_399	2010-08-15	12
-396	val_396	2010-08-15	12
-247	val_247	2010-08-15	12
-417	val_417	2010-08-15	12
-489	val_489	2010-08-15	12
-162	val_162	2010-08-15	12
-377	val_377	2010-08-15	12
-397	val_397	2010-08-15	12
-309	val_309	2010-08-15	12
-365	val_365	2010-08-15	12
-266	val_266	2010-08-15	12
-439	val_439	2010-08-15	12
-342	val_342	2010-08-15	12
-367	val_367	2010-08-15	12
-325	val_325	2010-08-15	12
-167	val_167	2010-08-15	12
-195	val_195	2010-08-15	12
-475	val_475	2010-08-15	12
-17	val_17	2010-08-15	12
-113	val_113	2010-08-15	12
-155	val_155	2010-08-15	12
-203	val_203	2010-08-15	12
-339	val_339	2010-08-15	12
-0	val_0	2010-08-15	12
-455	val_455	2010-08-15	12
-128	val_128	2010-08-15	12
-311	val_311	2010-08-15	12
-316	val_316	2010-08-15	12
-57	val_57	2010-08-15	12
-302	val_302	2010-08-15	12
-205	val_205	2010-08-15	12
-149	val_149	2010-08-15	12
-438	val_438	2010-08-15	12
-345	val_345	2010-08-15	12
-129	val_129	2010-08-15	12
-170	val_170	2010-08-15	12
-20	val_20	2010-08-15	12
-489	val_489	2010-08-15	12
-157	val_157	2010-08-15	12
-378	val_378	2010-08-15	12
-221	val_221	2010-08-15	12
-92	val_92	2010-08-15	12
-111	val_111	2010-08-15	12
-47	val_47	2010-08-15	12
-72	val_72	2010-08-15	12
-4	val_4	2010-08-15	12
-280	val_280	2010-08-15	12
-35	val_35	2010-08-15	12
-427	val_427	2010-08-15	12
-277	val_277	2010-08-15	12
-208	val_208	2010-08-15	12
-356	val_356	2010-08-15	12
-399	val_399	2010-08-15	12
-169	val_169	2010-08-15	12
-382	val_382	2010-08-15	12
-498	val_498	2010-08-15	12
-125	val_125	2010-08-15	12
-386	val_386	2010-08-15	12
-437	val_437	2010-08-15	12
-469	val_469	2010-08-15	12
-192	val_192	2010-08-15	12
-286	val_286	2010-08-15	12
-187	val_187	2010-08-15	12
-176	val_176	2010-08-15	12
-54	val_54	2010-08-15	12
-459	val_459	2010-08-15	12
-51	val_51	2010-08-15	12
-138	val_138	2010-08-15	12
-103	val_103	2010-08-15	12
-239	val_239	2010-08-15	12
-213	val_213	2010-08-15	12
-216	val_216	2010-08-15	12
-430	val_430	2010-08-15	12
-278	val_278	2010-08-15	12
-176	val_176	2010-08-15	12
-289	val_289	2010-08-15	12
-221	val_221	2010-08-15	12
-65	val_65	2010-08-15	12
-318	val_318	2010-08-15	12
-332	val_332	2010-08-15	12
-311	val_311	2010-08-15	12
-275	val_275	2010-08-15	12
-137	val_137	2010-08-15	12
-241	val_241	2010-08-15	12
-83	val_83	2010-08-15	12
-333	val_333	2010-08-15	12
-180	val_180	2010-08-15	12
-284	val_284	2010-08-15	12
-12	val_12	2010-08-15	12
-230	val_230	2010-08-15	12
-181	val_181	2010-08-15	12
-67	val_67	2010-08-15	12
-260	val_260	2010-08-15	12
-404	val_404	2010-08-15	12
-384	val_384	2010-08-15	12
-489	val_489	2010-08-15	12
-353	val_353	2010-08-15	12
-373	val_373	2010-08-15	12
-272	val_272	2010-08-15	12
-138	val_138	2010-08-15	12
-217	val_217	2010-08-15	12
-84	val_84	2010-08-15	12
-348	val_348	2010-08-15	12
-466	val_466	2010-08-15	12
-58	val_58	2010-08-15	12
-8	val_8	2010-08-15	12
-411	val_411	2010-08-15	12
-230	val_230	2010-08-15	12
-208	val_208	2010-08-15	12
-348	val_348	2010-08-15	12
-24	val_24	2010-08-15	12
-463	val_463	2010-08-15	12
-431	val_431	2010-08-15	12
-179	val_179	2010-08-15	12
-172	val_172	2010-08-15	12
-42	val_42	2010-08-15	12
-129	val_129	2010-08-15	12
-158	val_158	2010-08-15	12
-119	val_119	2010-08-15	12
-496	val_496	2010-08-15	12
-0	val_0	2010-08-15	12
-322	val_322	2010-08-15	12
-197	val_197	2010-08-15	12
-468	val_468	2010-08-15	12
-393	val_393	2010-08-15	12
-454	val_454	2010-08-15	12
-100	val_100	2010-08-15	12
-298	val_298	2010-08-15	12
-199	val_199	2010-08-15	12
-191	val_191	2010-08-15	12
-418	val_418	2010-08-15	12
-96	val_96	2010-08-15	12
-26	val_26	2010-08-15	12
-165	val_165	2010-08-15	12
-327	val_327	2010-08-15	12
-230	val_230	2010-08-15	12
-205	val_205	2010-08-15	12
-120	val_120	2010-08-15	12
-131	val_131	2010-08-15	12
-51	val_51	2010-08-15	12
-404	val_404	2010-08-15	12
-43	val_43	2010-08-15	12
-436	val_436	2010-08-15	12
-156	val_156	2010-08-15	12
-469	val_469	2010-08-15	12
-468	val_468	2010-08-15	12
-308	val_308	2010-08-15	12
-95	val_95	2010-08-15	12
-196	val_196	2010-08-15	12
-288	val_288	2010-08-15	12
-481	val_481	2010-08-15	12
-457	val_457	2010-08-15	12
-98	val_98	2010-08-15	12
-282	val_282	2010-08-15	12
-197	val_197	2010-08-15	12
-187	val_187	2010-08-15	12
-318	val_318	2010-08-15	12
-318	val_318	2010-08-15	12
-409	val_409	2010-08-15	12
-470	val_470	2010-08-15	12
-137	val_137	2010-08-15	12
-369	val_369	2010-08-15	12
-316	val_316	2010-08-15	12
-169	val_169	2010-08-15	12
-413	val_413	2010-08-15	12
-85	val_85	2010-08-15	12
-77	val_77	2010-08-15	12
-0	val_0	2010-08-15	12
-490	val_490	2010-08-15	12
-87	val_87	2010-08-15	12
-364	val_364	2010-08-15	12
-179	val_179	2010-08-15	12
-118	val_118	2010-08-15	12
-134	val_134	2010-08-15	12
-395	val_395	2010-08-15	12
-282	val_282	2010-08-15	12
-138	val_138	2010-08-15	12
-238	val_238	2010-08-15	12
-419	val_419	2010-08-15	12
-15	val_15	2010-08-15	12
-118	val_118	2010-08-15	12
-72	val_72	2010-08-15	12
-90	val_90	2010-08-15	12
-307	val_307	2010-08-15	12
-19	val_19	2010-08-15	12
-435	val_435	2010-08-15	12
-10	val_10	2010-08-15	12
-277	val_277	2010-08-15	12
-273	val_273	2010-08-15	12
-306	val_306	2010-08-15	12
-224	val_224	2010-08-15	12
-309	val_309	2010-08-15	12
-389	val_389	2010-08-15	12
-327	val_327	2010-08-15	12
-242	val_242	2010-08-15	12
-369	val_369	2010-08-15	12
-392	val_392	2010-08-15	12
-272	val_272	2010-08-15	12
-331	val_331	2010-08-15	12
-401	val_401	2010-08-15	12
-242	val_242	2010-08-15	12
-452	val_452	2010-08-15	12
-177	val_177	2010-08-15	12
-226	val_226	2010-08-15	12
-5	val_5	2010-08-15	12
-497	val_497	2010-08-15	12
-402	val_402	2010-08-15	12
-396	val_396	2010-08-15	12
-317	val_317	2010-08-15	12
-395	val_395	2010-08-15	12
-58	val_58	2010-08-15	12
-35	val_35	2010-08-15	12
-336	val_336	2010-08-15	12
-95	val_95	2010-08-15	12
-11	val_11	2010-08-15	12
-168	val_168	2010-08-15	12
-34	val_34	2010-08-15	12
-229	val_229	2010-08-15	12
-233	val_233	2010-08-15	12
-143	val_143	2010-08-15	12
-472	val_472	2010-08-15	12
-322	val_322	2010-08-15	12
-498	val_498	2010-08-15	12
-160	val_160	2010-08-15	12
-195	val_195	2010-08-15	12
-42	val_42	2010-08-15	12
-321	val_321	2010-08-15	12
-430	val_430	2010-08-15	12
-119	val_119	2010-08-15	12
-489	val_489	2010-08-15	12
-458	val_458	2010-08-15	12
-78	val_78	2010-08-15	12
-76	val_76	2010-08-15	12
-41	val_41	2010-08-15	12
-223	val_223	2010-08-15	12
-492	val_492	2010-08-15	12
-149	val_149	2010-08-15	12
-449	val_449	2010-08-15	12
-218	val_218	2010-08-15	12
-228	val_228	2010-08-15	12
-138	val_138	2010-08-15	12
-453	val_453	2010-08-15	12
-30	val_30	2010-08-15	12
-209	val_209	2010-08-15	12
-64	val_64	2010-08-15	12
-468	val_468	2010-08-15	12
-76	val_76	2010-08-15	12
-74	val_74	2010-08-15	12
-342	val_342	2010-08-15	12
-69	val_69	2010-08-15	12
-230	val_230	2010-08-15	12
-33	val_33	2010-08-15	12
-368	val_368	2010-08-15	12
-103	val_103	2010-08-15	12
-296	val_296	2010-08-15	12
-113	val_113	2010-08-15	12
-216	val_216	2010-08-15	12
-367	val_367	2010-08-15	12
-344	val_344	2010-08-15	12
-167	val_167	2010-08-15	12
-274	val_274	2010-08-15	12
-219	val_219	2010-08-15	12
-239	val_239	2010-08-15	12
-485	val_485	2010-08-15	12
-116	val_116	2010-08-15	12
-223	val_223	2010-08-15	12
-256	val_256	2010-08-15	12
-263	val_263	2010-08-15	12
-70	val_70	2010-08-15	12
-487	val_487	2010-08-15	12
-480	val_480	2010-08-15	12
-401	val_401	2010-08-15	12
-288	val_288	2010-08-15	12
-191	val_191	2010-08-15	12
-5	val_5	2010-08-15	12
-244	val_244	2010-08-15	12
-438	val_438	2010-08-15	12
-128	val_128	2010-08-15	12
-467	val_467	2010-08-15	12
-432	val_432	2010-08-15	12
-202	val_202	2010-08-15	12
-316	val_316	2010-08-15	12
-229	val_229	2010-08-15	12
-469	val_469	2010-08-15	12
-463	val_463	2010-08-15	12
-280	val_280	2010-08-15	12
-2	val_2	2010-08-15	12
-35	val_35	2010-08-15	12
-283	val_283	2010-08-15	12
-331	val_331	2010-08-15	12
-235	val_235	2010-08-15	12
-80	val_80	2010-08-15	12
-44	val_44	2010-08-15	12
-193	val_193	2010-08-15	12
-321	val_321	2010-08-15	12
-335	val_335	2010-08-15	12
-104	val_104	2010-08-15	12
-466	val_466	2010-08-15	12
-366	val_366	2010-08-15	12
-175	val_175	2010-08-15	12
-403	val_403	2010-08-15	12
-483	val_483	2010-08-15	12
-53	val_53	2010-08-15	12
-105	val_105	2010-08-15	12
-257	val_257	2010-08-15	12
-406	val_406	2010-08-15	12
-409	val_409	2010-08-15	12
-190	val_190	2010-08-15	12
-406	val_406	2010-08-15	12
-401	val_401	2010-08-15	12
-114	val_114	2010-08-15	12
-258	val_258	2010-08-15	12
-90	val_90	2010-08-15	12
-203	val_203	2010-08-15	12
-262	val_262	2010-08-15	12
-348	val_348	2010-08-15	12
-424	val_424	2010-08-15	12
-12	val_12	2010-08-15	12
-396	val_396	2010-08-15	12
-201	val_201	2010-08-15	12
-217	val_217	2010-08-15	12
-164	val_164	2010-08-15	12
-431	val_431	2010-08-15	12
-454	val_454	2010-08-15	12
-478	val_478	2010-08-15	12
-298	val_298	2010-08-15	12
-125	val_125	2010-08-15	12
-431	val_431	2010-08-15	12
-164	val_164	2010-08-15	12
-424	val_424	2010-08-15	12
-187	val_187	2010-08-15	12
-382	val_382	2010-08-15	12
-5	val_5	2010-08-15	12
-70	val_70	2010-08-15	12
-397	val_397	2010-08-15	12
-480	val_480	2010-08-15	12
-291	val_291	2010-08-15	12
-24	val_24	2010-08-15	12
-351	val_351	2010-08-15	12
-255	val_255	2010-08-15	12
-104	val_104	2010-08-15	12
-70	val_70	2010-08-15	12
-163	val_163	2010-08-15	12
-438	val_438	2010-08-15	12
-119	val_119	2010-08-15	12
-414	val_414	2010-08-15	12
-200	val_200	2010-08-15	12
-491	val_491	2010-08-15	12
-237	val_237	2010-08-15	12
-439	val_439	2010-08-15	12
-360	val_360	2010-08-15	12
-248	val_248	2010-08-15	12
-479	val_479	2010-08-15	12
-305	val_305	2010-08-15	12
-417	val_417	2010-08-15	12
-199	val_199	2010-08-15	12
-444	val_444	2010-08-15	12
-120	val_120	2010-08-15	12
-429	val_429	2010-08-15	12
-169	val_169	2010-08-15	12
-443	val_443	2010-08-15	12
-323	val_323	2010-08-15	12
-325	val_325	2010-08-15	12
-277	val_277	2010-08-15	12
-230	val_230	2010-08-15	12
-478	val_478	2010-08-15	12
-178	val_178	2010-08-15	12
-468	val_468	2010-08-15	12
-310	val_310	2010-08-15	12
-317	val_317	2010-08-15	12
-333	val_333	2010-08-15	12
-493	val_493	2010-08-15	12
-460	val_460	2010-08-15	12
-207	val_207	2010-08-15	12
-249	val_249	2010-08-15	12
-265	val_265	2010-08-15	12
-480	val_480	2010-08-15	12
-83	val_83	2010-08-15	12
-136	val_136	2010-08-15	12
-353	val_353	2010-08-15	12
-172	val_172	2010-08-15	12
-214	val_214	2010-08-15	12
-462	val_462	2010-08-15	12
-233	val_233	2010-08-15	12
-406	val_406	2010-08-15	12
-133	val_133	2010-08-15	12
-175	val_175	2010-08-15	12
-189	val_189	2010-08-15	12
-454	val_454	2010-08-15	12
-375	val_375	2010-08-15	12
-401	val_401	2010-08-15	12
-421	val_421	2010-08-15	12
-407	val_407	2010-08-15	12
-384	val_384	2010-08-15	12
-256	val_256	2010-08-15	12
-26	val_26	2010-08-15	12
-134	val_134	2010-08-15	12
-67	val_67	2010-08-15	12
-384	val_384	2010-08-15	12
-379	val_379	2010-08-15	12
-18	val_18	2010-08-15	12
-462	val_462	2010-08-15	12
-492	val_492	2010-08-15	12
-100	val_100	2010-08-15	12
-298	val_298	2010-08-15	12
-9	val_9	2010-08-15	12
-341	val_341	2010-08-15	12
-498	val_498	2010-08-15	12
-146	val_146	2010-08-15	12
-458	val_458	2010-08-15	12
-362	val_362	2010-08-15	12
-186	val_186	2010-08-15	12
-285	val_285	2010-08-15	12
-348	val_348	2010-08-15	12
-167	val_167	2010-08-15	12
-18	val_18	2010-08-15	12
-273	val_273	2010-08-15	12
-183	val_183	2010-08-15	12
-281	val_281	2010-08-15	12
-344	val_344	2010-08-15	12
-97	val_97	2010-08-15	12
-469	val_469	2010-08-15	12
-315	val_315	2010-08-15	12
-84	val_84	2010-08-15	12
-28	val_28	2010-08-15	12
-37	val_37	2010-08-15	12
-448	val_448	2010-08-15	12
-152	val_152	2010-08-15	12
-348	val_348	2010-08-15	12
-307	val_307	2010-08-15	12
-194	val_194	2010-08-15	12
-414	val_414	2010-08-15	12
-477	val_477	2010-08-15	12
-222	val_222	2010-08-15	12
-126	val_126	2010-08-15	12
-90	val_90	2010-08-15	12
-169	val_169	2010-08-15	12
-403	val_403	2010-08-15	12
-400	val_400	2010-08-15	12
-200	val_200	2010-08-15	12
-97	val_97	2010-08-15	12
diff --git a/sql/hive/src/test/resources/golden/merge4-9-ad3dc168c8b6f048717e39ab16b0a319 b/sql/hive/src/test/resources/golden/merge4-9-ad3dc168c8b6f048717e39ab16b0a319
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-0-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-0-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/merge_dynamic_partition-1-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-1-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-10-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/merge_dynamic_partition-10-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-10-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-11-a49c9ee01ce8858a5f00c05523329200 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-11-a49c9ee01ce8858a5f00c05523329200
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-11-a49c9ee01ce8858a5f00c05523329200
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-12-dbf4eae8430a97a6e70b1c6222218cd3 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-12-dbf4eae8430a97a6e70b1c6222218cd3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-13-16adcdb0e324ad233769e124b5b349da b/sql/hive/src/test/resources/golden/merge_dynamic_partition-13-16adcdb0e324ad233769e124b5b349da
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-14-79da9a1ce5c2d058b924387ac9fcde92 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-14-79da9a1ce5c2d058b924387ac9fcde92
deleted file mode 100644
index a4c81ff9f99cd..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-14-79da9a1ce5c2d058b924387ac9fcde92
+++ /dev/null
@@ -1,500 +0,0 @@
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-2	val_2	2008-04-08	11
-4	val_4	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-8	val_8	2008-04-08	11
-9	val_9	2008-04-08	11
-10	val_10	2008-04-08	11
-11	val_11	2008-04-08	11
-12	val_12	2008-04-08	11
-12	val_12	2008-04-08	11
-15	val_15	2008-04-08	11
-15	val_15	2008-04-08	11
-17	val_17	2008-04-08	11
-18	val_18	2008-04-08	11
-18	val_18	2008-04-08	11
-19	val_19	2008-04-08	11
-20	val_20	2008-04-08	11
-24	val_24	2008-04-08	11
-24	val_24	2008-04-08	11
-26	val_26	2008-04-08	11
-26	val_26	2008-04-08	11
-27	val_27	2008-04-08	11
-28	val_28	2008-04-08	11
-30	val_30	2008-04-08	11
-33	val_33	2008-04-08	11
-34	val_34	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-37	val_37	2008-04-08	11
-37	val_37	2008-04-08	11
-41	val_41	2008-04-08	11
-42	val_42	2008-04-08	11
-42	val_42	2008-04-08	11
-43	val_43	2008-04-08	11
-44	val_44	2008-04-08	11
-47	val_47	2008-04-08	11
-51	val_51	2008-04-08	11
-51	val_51	2008-04-08	11
-53	val_53	2008-04-08	11
-54	val_54	2008-04-08	11
-57	val_57	2008-04-08	11
-58	val_58	2008-04-08	11
-58	val_58	2008-04-08	11
-64	val_64	2008-04-08	11
-65	val_65	2008-04-08	11
-66	val_66	2008-04-08	11
-67	val_67	2008-04-08	11
-67	val_67	2008-04-08	11
-69	val_69	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-72	val_72	2008-04-08	11
-72	val_72	2008-04-08	11
-74	val_74	2008-04-08	11
-76	val_76	2008-04-08	11
-76	val_76	2008-04-08	11
-77	val_77	2008-04-08	11
-78	val_78	2008-04-08	11
-80	val_80	2008-04-08	11
-82	val_82	2008-04-08	11
-83	val_83	2008-04-08	11
-83	val_83	2008-04-08	11
-84	val_84	2008-04-08	11
-84	val_84	2008-04-08	11
-85	val_85	2008-04-08	11
-86	val_86	2008-04-08	11
-87	val_87	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-92	val_92	2008-04-08	11
-95	val_95	2008-04-08	11
-95	val_95	2008-04-08	11
-96	val_96	2008-04-08	11
-97	val_97	2008-04-08	11
-97	val_97	2008-04-08	11
-98	val_98	2008-04-08	11
-98	val_98	2008-04-08	11
-100	val_100	2008-04-08	11
-100	val_100	2008-04-08	11
-103	val_103	2008-04-08	11
-103	val_103	2008-04-08	11
-104	val_104	2008-04-08	11
-104	val_104	2008-04-08	11
-105	val_105	2008-04-08	11
-111	val_111	2008-04-08	11
-113	val_113	2008-04-08	11
-113	val_113	2008-04-08	11
-114	val_114	2008-04-08	11
-116	val_116	2008-04-08	11
-118	val_118	2008-04-08	11
-118	val_118	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-120	val_120	2008-04-08	11
-120	val_120	2008-04-08	11
-125	val_125	2008-04-08	11
-125	val_125	2008-04-08	11
-126	val_126	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-129	val_129	2008-04-08	11
-129	val_129	2008-04-08	11
-131	val_131	2008-04-08	11
-133	val_133	2008-04-08	11
-134	val_134	2008-04-08	11
-134	val_134	2008-04-08	11
-136	val_136	2008-04-08	11
-137	val_137	2008-04-08	11
-137	val_137	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-143	val_143	2008-04-08	11
-145	val_145	2008-04-08	11
-146	val_146	2008-04-08	11
-146	val_146	2008-04-08	11
-149	val_149	2008-04-08	11
-149	val_149	2008-04-08	11
-150	val_150	2008-04-08	11
-152	val_152	2008-04-08	11
-152	val_152	2008-04-08	11
-153	val_153	2008-04-08	11
-155	val_155	2008-04-08	11
-156	val_156	2008-04-08	11
-157	val_157	2008-04-08	11
-158	val_158	2008-04-08	11
-160	val_160	2008-04-08	11
-162	val_162	2008-04-08	11
-163	val_163	2008-04-08	11
-164	val_164	2008-04-08	11
-164	val_164	2008-04-08	11
-165	val_165	2008-04-08	11
-165	val_165	2008-04-08	11
-166	val_166	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-168	val_168	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-170	val_170	2008-04-08	11
-172	val_172	2008-04-08	11
-172	val_172	2008-04-08	11
-174	val_174	2008-04-08	11
-174	val_174	2008-04-08	11
-175	val_175	2008-04-08	11
-175	val_175	2008-04-08	11
-176	val_176	2008-04-08	11
-176	val_176	2008-04-08	11
-177	val_177	2008-04-08	11
-178	val_178	2008-04-08	11
-179	val_179	2008-04-08	11
-179	val_179	2008-04-08	11
-180	val_180	2008-04-08	11
-181	val_181	2008-04-08	11
-183	val_183	2008-04-08	11
-186	val_186	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-189	val_189	2008-04-08	11
-190	val_190	2008-04-08	11
-191	val_191	2008-04-08	11
-191	val_191	2008-04-08	11
-192	val_192	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-194	val_194	2008-04-08	11
-195	val_195	2008-04-08	11
-195	val_195	2008-04-08	11
-196	val_196	2008-04-08	11
-197	val_197	2008-04-08	11
-197	val_197	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-200	val_200	2008-04-08	11
-200	val_200	2008-04-08	11
-201	val_201	2008-04-08	11
-202	val_202	2008-04-08	11
-203	val_203	2008-04-08	11
-203	val_203	2008-04-08	11
-205	val_205	2008-04-08	11
-205	val_205	2008-04-08	11
-207	val_207	2008-04-08	11
-207	val_207	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-209	val_209	2008-04-08	11
-209	val_209	2008-04-08	11
-213	val_213	2008-04-08	11
-213	val_213	2008-04-08	11
-214	val_214	2008-04-08	11
-216	val_216	2008-04-08	11
-216	val_216	2008-04-08	11
-217	val_217	2008-04-08	11
-217	val_217	2008-04-08	11
-218	val_218	2008-04-08	11
-219	val_219	2008-04-08	11
-219	val_219	2008-04-08	11
-221	val_221	2008-04-08	11
-221	val_221	2008-04-08	11
-222	val_222	2008-04-08	11
-223	val_223	2008-04-08	11
-223	val_223	2008-04-08	11
-224	val_224	2008-04-08	11
-224	val_224	2008-04-08	11
-226	val_226	2008-04-08	11
-228	val_228	2008-04-08	11
-229	val_229	2008-04-08	11
-229	val_229	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-233	val_233	2008-04-08	11
-233	val_233	2008-04-08	11
-235	val_235	2008-04-08	11
-237	val_237	2008-04-08	11
-237	val_237	2008-04-08	11
-238	val_238	2008-04-08	11
-238	val_238	2008-04-08	11
-239	val_239	2008-04-08	11
-239	val_239	2008-04-08	11
-241	val_241	2008-04-08	11
-242	val_242	2008-04-08	11
-242	val_242	2008-04-08	11
-244	val_244	2008-04-08	11
-247	val_247	2008-04-08	11
-248	val_248	2008-04-08	11
-249	val_249	2008-04-08	11
-252	val_252	2008-04-08	11
-255	val_255	2008-04-08	11
-255	val_255	2008-04-08	11
-256	val_256	2008-04-08	11
-256	val_256	2008-04-08	11
-257	val_257	2008-04-08	11
-258	val_258	2008-04-08	11
-260	val_260	2008-04-08	11
-262	val_262	2008-04-08	11
-263	val_263	2008-04-08	11
-265	val_265	2008-04-08	11
-265	val_265	2008-04-08	11
-266	val_266	2008-04-08	11
-272	val_272	2008-04-08	11
-272	val_272	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-274	val_274	2008-04-08	11
-275	val_275	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-278	val_278	2008-04-08	11
-278	val_278	2008-04-08	11
-280	val_280	2008-04-08	11
-280	val_280	2008-04-08	11
-281	val_281	2008-04-08	11
-281	val_281	2008-04-08	11
-282	val_282	2008-04-08	11
-282	val_282	2008-04-08	11
-283	val_283	2008-04-08	11
-284	val_284	2008-04-08	11
-285	val_285	2008-04-08	11
-286	val_286	2008-04-08	11
-287	val_287	2008-04-08	11
-288	val_288	2008-04-08	11
-288	val_288	2008-04-08	11
-289	val_289	2008-04-08	11
-291	val_291	2008-04-08	11
-292	val_292	2008-04-08	11
-296	val_296	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-302	val_302	2008-04-08	11
-305	val_305	2008-04-08	11
-306	val_306	2008-04-08	11
-307	val_307	2008-04-08	11
-307	val_307	2008-04-08	11
-308	val_308	2008-04-08	11
-309	val_309	2008-04-08	11
-309	val_309	2008-04-08	11
-310	val_310	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-315	val_315	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-317	val_317	2008-04-08	11
-317	val_317	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-321	val_321	2008-04-08	11
-321	val_321	2008-04-08	11
-322	val_322	2008-04-08	11
-322	val_322	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-325	val_325	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-331	val_331	2008-04-08	11
-331	val_331	2008-04-08	11
-332	val_332	2008-04-08	11
-333	val_333	2008-04-08	11
-333	val_333	2008-04-08	11
-335	val_335	2008-04-08	11
-336	val_336	2008-04-08	11
-338	val_338	2008-04-08	11
-339	val_339	2008-04-08	11
-341	val_341	2008-04-08	11
-342	val_342	2008-04-08	11
-342	val_342	2008-04-08	11
-344	val_344	2008-04-08	11
-344	val_344	2008-04-08	11
-345	val_345	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-351	val_351	2008-04-08	11
-353	val_353	2008-04-08	11
-353	val_353	2008-04-08	11
-356	val_356	2008-04-08	11
-360	val_360	2008-04-08	11
-362	val_362	2008-04-08	11
-364	val_364	2008-04-08	11
-365	val_365	2008-04-08	11
-366	val_366	2008-04-08	11
-367	val_367	2008-04-08	11
-367	val_367	2008-04-08	11
-368	val_368	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-373	val_373	2008-04-08	11
-374	val_374	2008-04-08	11
-375	val_375	2008-04-08	11
-377	val_377	2008-04-08	11
-378	val_378	2008-04-08	11
-379	val_379	2008-04-08	11
-382	val_382	2008-04-08	11
-382	val_382	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-386	val_386	2008-04-08	11
-389	val_389	2008-04-08	11
-392	val_392	2008-04-08	11
-393	val_393	2008-04-08	11
-394	val_394	2008-04-08	11
-395	val_395	2008-04-08	11
-395	val_395	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-397	val_397	2008-04-08	11
-397	val_397	2008-04-08	11
-399	val_399	2008-04-08	11
-399	val_399	2008-04-08	11
-400	val_400	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-402	val_402	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-404	val_404	2008-04-08	11
-404	val_404	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-407	val_407	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-411	val_411	2008-04-08	11
-413	val_413	2008-04-08	11
-413	val_413	2008-04-08	11
-414	val_414	2008-04-08	11
-414	val_414	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-418	val_418	2008-04-08	11
-419	val_419	2008-04-08	11
-421	val_421	2008-04-08	11
-424	val_424	2008-04-08	11
-424	val_424	2008-04-08	11
-427	val_427	2008-04-08	11
-429	val_429	2008-04-08	11
-429	val_429	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-432	val_432	2008-04-08	11
-435	val_435	2008-04-08	11
-436	val_436	2008-04-08	11
-437	val_437	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-439	val_439	2008-04-08	11
-439	val_439	2008-04-08	11
-443	val_443	2008-04-08	11
-444	val_444	2008-04-08	11
-446	val_446	2008-04-08	11
-448	val_448	2008-04-08	11
-449	val_449	2008-04-08	11
-452	val_452	2008-04-08	11
-453	val_453	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-455	val_455	2008-04-08	11
-457	val_457	2008-04-08	11
-458	val_458	2008-04-08	11
-458	val_458	2008-04-08	11
-459	val_459	2008-04-08	11
-459	val_459	2008-04-08	11
-460	val_460	2008-04-08	11
-462	val_462	2008-04-08	11
-462	val_462	2008-04-08	11
-463	val_463	2008-04-08	11
-463	val_463	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-467	val_467	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-470	val_470	2008-04-08	11
-472	val_472	2008-04-08	11
-475	val_475	2008-04-08	11
-477	val_477	2008-04-08	11
-478	val_478	2008-04-08	11
-478	val_478	2008-04-08	11
-479	val_479	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-481	val_481	2008-04-08	11
-482	val_482	2008-04-08	11
-483	val_483	2008-04-08	11
-484	val_484	2008-04-08	11
-485	val_485	2008-04-08	11
-487	val_487	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-490	val_490	2008-04-08	11
-491	val_491	2008-04-08	11
-492	val_492	2008-04-08	11
-492	val_492	2008-04-08	11
-493	val_493	2008-04-08	11
-494	val_494	2008-04-08	11
-495	val_495	2008-04-08	11
-496	val_496	2008-04-08	11
-497	val_497	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-15-d60297fed03b455c29daa4afb4d1e858 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-15-d60297fed03b455c29daa4afb4d1e858
deleted file mode 100644
index d66ce5d097ce6..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-15-d60297fed03b455c29daa4afb4d1e858
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:merge_dynamic_part
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1431818582215388621/merge_dynamic_part
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:4
-totalFileSize:5812
-maxFileSize:1612
-minFileSize:1358
-lastAccessTime:0
-lastUpdateTime:1389738875000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-16-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/merge_dynamic_partition-16-86a409d8b868dc5f1a3bd1e04c2bc28c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-16-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-17-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/merge_dynamic_partition-17-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-17-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-18-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-18-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-18-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-19-a49c9ee01ce8858a5f00c05523329200 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-19-a49c9ee01ce8858a5f00c05523329200
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-19-a49c9ee01ce8858a5f00c05523329200
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-2-190cefc93e46906e404039de0fd5f513 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-2-190cefc93e46906e404039de0fd5f513
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-20-d295db835d4fdeea34298702295ff7c5 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-20-d295db835d4fdeea34298702295ff7c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-21-9b9493a68ef7663e95ad86d02c45ec88 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-21-9b9493a68ef7663e95ad86d02c45ec88
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-22-79da9a1ce5c2d058b924387ac9fcde92 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-22-79da9a1ce5c2d058b924387ac9fcde92
deleted file mode 100644
index a4c81ff9f99cd..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-22-79da9a1ce5c2d058b924387ac9fcde92
+++ /dev/null
@@ -1,500 +0,0 @@
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-2	val_2	2008-04-08	11
-4	val_4	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-8	val_8	2008-04-08	11
-9	val_9	2008-04-08	11
-10	val_10	2008-04-08	11
-11	val_11	2008-04-08	11
-12	val_12	2008-04-08	11
-12	val_12	2008-04-08	11
-15	val_15	2008-04-08	11
-15	val_15	2008-04-08	11
-17	val_17	2008-04-08	11
-18	val_18	2008-04-08	11
-18	val_18	2008-04-08	11
-19	val_19	2008-04-08	11
-20	val_20	2008-04-08	11
-24	val_24	2008-04-08	11
-24	val_24	2008-04-08	11
-26	val_26	2008-04-08	11
-26	val_26	2008-04-08	11
-27	val_27	2008-04-08	11
-28	val_28	2008-04-08	11
-30	val_30	2008-04-08	11
-33	val_33	2008-04-08	11
-34	val_34	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-37	val_37	2008-04-08	11
-37	val_37	2008-04-08	11
-41	val_41	2008-04-08	11
-42	val_42	2008-04-08	11
-42	val_42	2008-04-08	11
-43	val_43	2008-04-08	11
-44	val_44	2008-04-08	11
-47	val_47	2008-04-08	11
-51	val_51	2008-04-08	11
-51	val_51	2008-04-08	11
-53	val_53	2008-04-08	11
-54	val_54	2008-04-08	11
-57	val_57	2008-04-08	11
-58	val_58	2008-04-08	11
-58	val_58	2008-04-08	11
-64	val_64	2008-04-08	11
-65	val_65	2008-04-08	11
-66	val_66	2008-04-08	11
-67	val_67	2008-04-08	11
-67	val_67	2008-04-08	11
-69	val_69	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-72	val_72	2008-04-08	11
-72	val_72	2008-04-08	11
-74	val_74	2008-04-08	11
-76	val_76	2008-04-08	11
-76	val_76	2008-04-08	11
-77	val_77	2008-04-08	11
-78	val_78	2008-04-08	11
-80	val_80	2008-04-08	11
-82	val_82	2008-04-08	11
-83	val_83	2008-04-08	11
-83	val_83	2008-04-08	11
-84	val_84	2008-04-08	11
-84	val_84	2008-04-08	11
-85	val_85	2008-04-08	11
-86	val_86	2008-04-08	11
-87	val_87	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-92	val_92	2008-04-08	11
-95	val_95	2008-04-08	11
-95	val_95	2008-04-08	11
-96	val_96	2008-04-08	11
-97	val_97	2008-04-08	11
-97	val_97	2008-04-08	11
-98	val_98	2008-04-08	11
-98	val_98	2008-04-08	11
-100	val_100	2008-04-08	11
-100	val_100	2008-04-08	11
-103	val_103	2008-04-08	11
-103	val_103	2008-04-08	11
-104	val_104	2008-04-08	11
-104	val_104	2008-04-08	11
-105	val_105	2008-04-08	11
-111	val_111	2008-04-08	11
-113	val_113	2008-04-08	11
-113	val_113	2008-04-08	11
-114	val_114	2008-04-08	11
-116	val_116	2008-04-08	11
-118	val_118	2008-04-08	11
-118	val_118	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-120	val_120	2008-04-08	11
-120	val_120	2008-04-08	11
-125	val_125	2008-04-08	11
-125	val_125	2008-04-08	11
-126	val_126	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-129	val_129	2008-04-08	11
-129	val_129	2008-04-08	11
-131	val_131	2008-04-08	11
-133	val_133	2008-04-08	11
-134	val_134	2008-04-08	11
-134	val_134	2008-04-08	11
-136	val_136	2008-04-08	11
-137	val_137	2008-04-08	11
-137	val_137	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-143	val_143	2008-04-08	11
-145	val_145	2008-04-08	11
-146	val_146	2008-04-08	11
-146	val_146	2008-04-08	11
-149	val_149	2008-04-08	11
-149	val_149	2008-04-08	11
-150	val_150	2008-04-08	11
-152	val_152	2008-04-08	11
-152	val_152	2008-04-08	11
-153	val_153	2008-04-08	11
-155	val_155	2008-04-08	11
-156	val_156	2008-04-08	11
-157	val_157	2008-04-08	11
-158	val_158	2008-04-08	11
-160	val_160	2008-04-08	11
-162	val_162	2008-04-08	11
-163	val_163	2008-04-08	11
-164	val_164	2008-04-08	11
-164	val_164	2008-04-08	11
-165	val_165	2008-04-08	11
-165	val_165	2008-04-08	11
-166	val_166	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-168	val_168	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-170	val_170	2008-04-08	11
-172	val_172	2008-04-08	11
-172	val_172	2008-04-08	11
-174	val_174	2008-04-08	11
-174	val_174	2008-04-08	11
-175	val_175	2008-04-08	11
-175	val_175	2008-04-08	11
-176	val_176	2008-04-08	11
-176	val_176	2008-04-08	11
-177	val_177	2008-04-08	11
-178	val_178	2008-04-08	11
-179	val_179	2008-04-08	11
-179	val_179	2008-04-08	11
-180	val_180	2008-04-08	11
-181	val_181	2008-04-08	11
-183	val_183	2008-04-08	11
-186	val_186	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-189	val_189	2008-04-08	11
-190	val_190	2008-04-08	11
-191	val_191	2008-04-08	11
-191	val_191	2008-04-08	11
-192	val_192	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-194	val_194	2008-04-08	11
-195	val_195	2008-04-08	11
-195	val_195	2008-04-08	11
-196	val_196	2008-04-08	11
-197	val_197	2008-04-08	11
-197	val_197	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-200	val_200	2008-04-08	11
-200	val_200	2008-04-08	11
-201	val_201	2008-04-08	11
-202	val_202	2008-04-08	11
-203	val_203	2008-04-08	11
-203	val_203	2008-04-08	11
-205	val_205	2008-04-08	11
-205	val_205	2008-04-08	11
-207	val_207	2008-04-08	11
-207	val_207	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-209	val_209	2008-04-08	11
-209	val_209	2008-04-08	11
-213	val_213	2008-04-08	11
-213	val_213	2008-04-08	11
-214	val_214	2008-04-08	11
-216	val_216	2008-04-08	11
-216	val_216	2008-04-08	11
-217	val_217	2008-04-08	11
-217	val_217	2008-04-08	11
-218	val_218	2008-04-08	11
-219	val_219	2008-04-08	11
-219	val_219	2008-04-08	11
-221	val_221	2008-04-08	11
-221	val_221	2008-04-08	11
-222	val_222	2008-04-08	11
-223	val_223	2008-04-08	11
-223	val_223	2008-04-08	11
-224	val_224	2008-04-08	11
-224	val_224	2008-04-08	11
-226	val_226	2008-04-08	11
-228	val_228	2008-04-08	11
-229	val_229	2008-04-08	11
-229	val_229	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-233	val_233	2008-04-08	11
-233	val_233	2008-04-08	11
-235	val_235	2008-04-08	11
-237	val_237	2008-04-08	11
-237	val_237	2008-04-08	11
-238	val_238	2008-04-08	11
-238	val_238	2008-04-08	11
-239	val_239	2008-04-08	11
-239	val_239	2008-04-08	11
-241	val_241	2008-04-08	11
-242	val_242	2008-04-08	11
-242	val_242	2008-04-08	11
-244	val_244	2008-04-08	11
-247	val_247	2008-04-08	11
-248	val_248	2008-04-08	11
-249	val_249	2008-04-08	11
-252	val_252	2008-04-08	11
-255	val_255	2008-04-08	11
-255	val_255	2008-04-08	11
-256	val_256	2008-04-08	11
-256	val_256	2008-04-08	11
-257	val_257	2008-04-08	11
-258	val_258	2008-04-08	11
-260	val_260	2008-04-08	11
-262	val_262	2008-04-08	11
-263	val_263	2008-04-08	11
-265	val_265	2008-04-08	11
-265	val_265	2008-04-08	11
-266	val_266	2008-04-08	11
-272	val_272	2008-04-08	11
-272	val_272	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-274	val_274	2008-04-08	11
-275	val_275	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-278	val_278	2008-04-08	11
-278	val_278	2008-04-08	11
-280	val_280	2008-04-08	11
-280	val_280	2008-04-08	11
-281	val_281	2008-04-08	11
-281	val_281	2008-04-08	11
-282	val_282	2008-04-08	11
-282	val_282	2008-04-08	11
-283	val_283	2008-04-08	11
-284	val_284	2008-04-08	11
-285	val_285	2008-04-08	11
-286	val_286	2008-04-08	11
-287	val_287	2008-04-08	11
-288	val_288	2008-04-08	11
-288	val_288	2008-04-08	11
-289	val_289	2008-04-08	11
-291	val_291	2008-04-08	11
-292	val_292	2008-04-08	11
-296	val_296	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-302	val_302	2008-04-08	11
-305	val_305	2008-04-08	11
-306	val_306	2008-04-08	11
-307	val_307	2008-04-08	11
-307	val_307	2008-04-08	11
-308	val_308	2008-04-08	11
-309	val_309	2008-04-08	11
-309	val_309	2008-04-08	11
-310	val_310	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-315	val_315	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-317	val_317	2008-04-08	11
-317	val_317	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-321	val_321	2008-04-08	11
-321	val_321	2008-04-08	11
-322	val_322	2008-04-08	11
-322	val_322	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-325	val_325	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-331	val_331	2008-04-08	11
-331	val_331	2008-04-08	11
-332	val_332	2008-04-08	11
-333	val_333	2008-04-08	11
-333	val_333	2008-04-08	11
-335	val_335	2008-04-08	11
-336	val_336	2008-04-08	11
-338	val_338	2008-04-08	11
-339	val_339	2008-04-08	11
-341	val_341	2008-04-08	11
-342	val_342	2008-04-08	11
-342	val_342	2008-04-08	11
-344	val_344	2008-04-08	11
-344	val_344	2008-04-08	11
-345	val_345	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-351	val_351	2008-04-08	11
-353	val_353	2008-04-08	11
-353	val_353	2008-04-08	11
-356	val_356	2008-04-08	11
-360	val_360	2008-04-08	11
-362	val_362	2008-04-08	11
-364	val_364	2008-04-08	11
-365	val_365	2008-04-08	11
-366	val_366	2008-04-08	11
-367	val_367	2008-04-08	11
-367	val_367	2008-04-08	11
-368	val_368	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-373	val_373	2008-04-08	11
-374	val_374	2008-04-08	11
-375	val_375	2008-04-08	11
-377	val_377	2008-04-08	11
-378	val_378	2008-04-08	11
-379	val_379	2008-04-08	11
-382	val_382	2008-04-08	11
-382	val_382	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-386	val_386	2008-04-08	11
-389	val_389	2008-04-08	11
-392	val_392	2008-04-08	11
-393	val_393	2008-04-08	11
-394	val_394	2008-04-08	11
-395	val_395	2008-04-08	11
-395	val_395	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-397	val_397	2008-04-08	11
-397	val_397	2008-04-08	11
-399	val_399	2008-04-08	11
-399	val_399	2008-04-08	11
-400	val_400	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-402	val_402	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-404	val_404	2008-04-08	11
-404	val_404	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-407	val_407	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-411	val_411	2008-04-08	11
-413	val_413	2008-04-08	11
-413	val_413	2008-04-08	11
-414	val_414	2008-04-08	11
-414	val_414	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-418	val_418	2008-04-08	11
-419	val_419	2008-04-08	11
-421	val_421	2008-04-08	11
-424	val_424	2008-04-08	11
-424	val_424	2008-04-08	11
-427	val_427	2008-04-08	11
-429	val_429	2008-04-08	11
-429	val_429	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-432	val_432	2008-04-08	11
-435	val_435	2008-04-08	11
-436	val_436	2008-04-08	11
-437	val_437	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-439	val_439	2008-04-08	11
-439	val_439	2008-04-08	11
-443	val_443	2008-04-08	11
-444	val_444	2008-04-08	11
-446	val_446	2008-04-08	11
-448	val_448	2008-04-08	11
-449	val_449	2008-04-08	11
-452	val_452	2008-04-08	11
-453	val_453	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-455	val_455	2008-04-08	11
-457	val_457	2008-04-08	11
-458	val_458	2008-04-08	11
-458	val_458	2008-04-08	11
-459	val_459	2008-04-08	11
-459	val_459	2008-04-08	11
-460	val_460	2008-04-08	11
-462	val_462	2008-04-08	11
-462	val_462	2008-04-08	11
-463	val_463	2008-04-08	11
-463	val_463	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-467	val_467	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-470	val_470	2008-04-08	11
-472	val_472	2008-04-08	11
-475	val_475	2008-04-08	11
-477	val_477	2008-04-08	11
-478	val_478	2008-04-08	11
-478	val_478	2008-04-08	11
-479	val_479	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-481	val_481	2008-04-08	11
-482	val_482	2008-04-08	11
-483	val_483	2008-04-08	11
-484	val_484	2008-04-08	11
-485	val_485	2008-04-08	11
-487	val_487	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-490	val_490	2008-04-08	11
-491	val_491	2008-04-08	11
-492	val_492	2008-04-08	11
-492	val_492	2008-04-08	11
-493	val_493	2008-04-08	11
-494	val_494	2008-04-08	11
-495	val_495	2008-04-08	11
-496	val_496	2008-04-08	11
-497	val_497	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-23-d60297fed03b455c29daa4afb4d1e858 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-23-d60297fed03b455c29daa4afb4d1e858
deleted file mode 100644
index 06444f372bd60..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-23-d60297fed03b455c29daa4afb4d1e858
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:merge_dynamic_part
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1431818582215388621/merge_dynamic_part
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389738910000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-24-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/merge_dynamic_partition-24-86a409d8b868dc5f1a3bd1e04c2bc28c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-24-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-25-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/merge_dynamic_partition-25-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-25-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-26-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-26-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-26-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-27-a49c9ee01ce8858a5f00c05523329200 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-27-a49c9ee01ce8858a5f00c05523329200
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-27-a49c9ee01ce8858a5f00c05523329200
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-28-ef7b35be7210f099d46448994d9dc605 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-28-ef7b35be7210f099d46448994d9dc605
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-29-876c8fa15a32ac5bfcc6cb60993f6a4d b/sql/hive/src/test/resources/golden/merge_dynamic_partition-29-876c8fa15a32ac5bfcc6cb60993f6a4d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-3-25401dd2c1c258e06f1b96fefd19e27f b/sql/hive/src/test/resources/golden/merge_dynamic_partition-3-25401dd2c1c258e06f1b96fefd19e27f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-30-79da9a1ce5c2d058b924387ac9fcde92 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-30-79da9a1ce5c2d058b924387ac9fcde92
deleted file mode 100644
index a4c81ff9f99cd..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-30-79da9a1ce5c2d058b924387ac9fcde92
+++ /dev/null
@@ -1,500 +0,0 @@
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-2	val_2	2008-04-08	11
-4	val_4	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-8	val_8	2008-04-08	11
-9	val_9	2008-04-08	11
-10	val_10	2008-04-08	11
-11	val_11	2008-04-08	11
-12	val_12	2008-04-08	11
-12	val_12	2008-04-08	11
-15	val_15	2008-04-08	11
-15	val_15	2008-04-08	11
-17	val_17	2008-04-08	11
-18	val_18	2008-04-08	11
-18	val_18	2008-04-08	11
-19	val_19	2008-04-08	11
-20	val_20	2008-04-08	11
-24	val_24	2008-04-08	11
-24	val_24	2008-04-08	11
-26	val_26	2008-04-08	11
-26	val_26	2008-04-08	11
-27	val_27	2008-04-08	11
-28	val_28	2008-04-08	11
-30	val_30	2008-04-08	11
-33	val_33	2008-04-08	11
-34	val_34	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-37	val_37	2008-04-08	11
-37	val_37	2008-04-08	11
-41	val_41	2008-04-08	11
-42	val_42	2008-04-08	11
-42	val_42	2008-04-08	11
-43	val_43	2008-04-08	11
-44	val_44	2008-04-08	11
-47	val_47	2008-04-08	11
-51	val_51	2008-04-08	11
-51	val_51	2008-04-08	11
-53	val_53	2008-04-08	11
-54	val_54	2008-04-08	11
-57	val_57	2008-04-08	11
-58	val_58	2008-04-08	11
-58	val_58	2008-04-08	11
-64	val_64	2008-04-08	11
-65	val_65	2008-04-08	11
-66	val_66	2008-04-08	11
-67	val_67	2008-04-08	11
-67	val_67	2008-04-08	11
-69	val_69	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-72	val_72	2008-04-08	11
-72	val_72	2008-04-08	11
-74	val_74	2008-04-08	11
-76	val_76	2008-04-08	11
-76	val_76	2008-04-08	11
-77	val_77	2008-04-08	11
-78	val_78	2008-04-08	11
-80	val_80	2008-04-08	11
-82	val_82	2008-04-08	11
-83	val_83	2008-04-08	11
-83	val_83	2008-04-08	11
-84	val_84	2008-04-08	11
-84	val_84	2008-04-08	11
-85	val_85	2008-04-08	11
-86	val_86	2008-04-08	11
-87	val_87	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-92	val_92	2008-04-08	11
-95	val_95	2008-04-08	11
-95	val_95	2008-04-08	11
-96	val_96	2008-04-08	11
-97	val_97	2008-04-08	11
-97	val_97	2008-04-08	11
-98	val_98	2008-04-08	11
-98	val_98	2008-04-08	11
-100	val_100	2008-04-08	11
-100	val_100	2008-04-08	11
-103	val_103	2008-04-08	11
-103	val_103	2008-04-08	11
-104	val_104	2008-04-08	11
-104	val_104	2008-04-08	11
-105	val_105	2008-04-08	11
-111	val_111	2008-04-08	11
-113	val_113	2008-04-08	11
-113	val_113	2008-04-08	11
-114	val_114	2008-04-08	11
-116	val_116	2008-04-08	11
-118	val_118	2008-04-08	11
-118	val_118	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-120	val_120	2008-04-08	11
-120	val_120	2008-04-08	11
-125	val_125	2008-04-08	11
-125	val_125	2008-04-08	11
-126	val_126	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-129	val_129	2008-04-08	11
-129	val_129	2008-04-08	11
-131	val_131	2008-04-08	11
-133	val_133	2008-04-08	11
-134	val_134	2008-04-08	11
-134	val_134	2008-04-08	11
-136	val_136	2008-04-08	11
-137	val_137	2008-04-08	11
-137	val_137	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-143	val_143	2008-04-08	11
-145	val_145	2008-04-08	11
-146	val_146	2008-04-08	11
-146	val_146	2008-04-08	11
-149	val_149	2008-04-08	11
-149	val_149	2008-04-08	11
-150	val_150	2008-04-08	11
-152	val_152	2008-04-08	11
-152	val_152	2008-04-08	11
-153	val_153	2008-04-08	11
-155	val_155	2008-04-08	11
-156	val_156	2008-04-08	11
-157	val_157	2008-04-08	11
-158	val_158	2008-04-08	11
-160	val_160	2008-04-08	11
-162	val_162	2008-04-08	11
-163	val_163	2008-04-08	11
-164	val_164	2008-04-08	11
-164	val_164	2008-04-08	11
-165	val_165	2008-04-08	11
-165	val_165	2008-04-08	11
-166	val_166	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-168	val_168	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-170	val_170	2008-04-08	11
-172	val_172	2008-04-08	11
-172	val_172	2008-04-08	11
-174	val_174	2008-04-08	11
-174	val_174	2008-04-08	11
-175	val_175	2008-04-08	11
-175	val_175	2008-04-08	11
-176	val_176	2008-04-08	11
-176	val_176	2008-04-08	11
-177	val_177	2008-04-08	11
-178	val_178	2008-04-08	11
-179	val_179	2008-04-08	11
-179	val_179	2008-04-08	11
-180	val_180	2008-04-08	11
-181	val_181	2008-04-08	11
-183	val_183	2008-04-08	11
-186	val_186	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-189	val_189	2008-04-08	11
-190	val_190	2008-04-08	11
-191	val_191	2008-04-08	11
-191	val_191	2008-04-08	11
-192	val_192	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-194	val_194	2008-04-08	11
-195	val_195	2008-04-08	11
-195	val_195	2008-04-08	11
-196	val_196	2008-04-08	11
-197	val_197	2008-04-08	11
-197	val_197	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-200	val_200	2008-04-08	11
-200	val_200	2008-04-08	11
-201	val_201	2008-04-08	11
-202	val_202	2008-04-08	11
-203	val_203	2008-04-08	11
-203	val_203	2008-04-08	11
-205	val_205	2008-04-08	11
-205	val_205	2008-04-08	11
-207	val_207	2008-04-08	11
-207	val_207	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-209	val_209	2008-04-08	11
-209	val_209	2008-04-08	11
-213	val_213	2008-04-08	11
-213	val_213	2008-04-08	11
-214	val_214	2008-04-08	11
-216	val_216	2008-04-08	11
-216	val_216	2008-04-08	11
-217	val_217	2008-04-08	11
-217	val_217	2008-04-08	11
-218	val_218	2008-04-08	11
-219	val_219	2008-04-08	11
-219	val_219	2008-04-08	11
-221	val_221	2008-04-08	11
-221	val_221	2008-04-08	11
-222	val_222	2008-04-08	11
-223	val_223	2008-04-08	11
-223	val_223	2008-04-08	11
-224	val_224	2008-04-08	11
-224	val_224	2008-04-08	11
-226	val_226	2008-04-08	11
-228	val_228	2008-04-08	11
-229	val_229	2008-04-08	11
-229	val_229	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-233	val_233	2008-04-08	11
-233	val_233	2008-04-08	11
-235	val_235	2008-04-08	11
-237	val_237	2008-04-08	11
-237	val_237	2008-04-08	11
-238	val_238	2008-04-08	11
-238	val_238	2008-04-08	11
-239	val_239	2008-04-08	11
-239	val_239	2008-04-08	11
-241	val_241	2008-04-08	11
-242	val_242	2008-04-08	11
-242	val_242	2008-04-08	11
-244	val_244	2008-04-08	11
-247	val_247	2008-04-08	11
-248	val_248	2008-04-08	11
-249	val_249	2008-04-08	11
-252	val_252	2008-04-08	11
-255	val_255	2008-04-08	11
-255	val_255	2008-04-08	11
-256	val_256	2008-04-08	11
-256	val_256	2008-04-08	11
-257	val_257	2008-04-08	11
-258	val_258	2008-04-08	11
-260	val_260	2008-04-08	11
-262	val_262	2008-04-08	11
-263	val_263	2008-04-08	11
-265	val_265	2008-04-08	11
-265	val_265	2008-04-08	11
-266	val_266	2008-04-08	11
-272	val_272	2008-04-08	11
-272	val_272	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-274	val_274	2008-04-08	11
-275	val_275	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-278	val_278	2008-04-08	11
-278	val_278	2008-04-08	11
-280	val_280	2008-04-08	11
-280	val_280	2008-04-08	11
-281	val_281	2008-04-08	11
-281	val_281	2008-04-08	11
-282	val_282	2008-04-08	11
-282	val_282	2008-04-08	11
-283	val_283	2008-04-08	11
-284	val_284	2008-04-08	11
-285	val_285	2008-04-08	11
-286	val_286	2008-04-08	11
-287	val_287	2008-04-08	11
-288	val_288	2008-04-08	11
-288	val_288	2008-04-08	11
-289	val_289	2008-04-08	11
-291	val_291	2008-04-08	11
-292	val_292	2008-04-08	11
-296	val_296	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-302	val_302	2008-04-08	11
-305	val_305	2008-04-08	11
-306	val_306	2008-04-08	11
-307	val_307	2008-04-08	11
-307	val_307	2008-04-08	11
-308	val_308	2008-04-08	11
-309	val_309	2008-04-08	11
-309	val_309	2008-04-08	11
-310	val_310	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-315	val_315	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-317	val_317	2008-04-08	11
-317	val_317	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-321	val_321	2008-04-08	11
-321	val_321	2008-04-08	11
-322	val_322	2008-04-08	11
-322	val_322	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-325	val_325	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-331	val_331	2008-04-08	11
-331	val_331	2008-04-08	11
-332	val_332	2008-04-08	11
-333	val_333	2008-04-08	11
-333	val_333	2008-04-08	11
-335	val_335	2008-04-08	11
-336	val_336	2008-04-08	11
-338	val_338	2008-04-08	11
-339	val_339	2008-04-08	11
-341	val_341	2008-04-08	11
-342	val_342	2008-04-08	11
-342	val_342	2008-04-08	11
-344	val_344	2008-04-08	11
-344	val_344	2008-04-08	11
-345	val_345	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-351	val_351	2008-04-08	11
-353	val_353	2008-04-08	11
-353	val_353	2008-04-08	11
-356	val_356	2008-04-08	11
-360	val_360	2008-04-08	11
-362	val_362	2008-04-08	11
-364	val_364	2008-04-08	11
-365	val_365	2008-04-08	11
-366	val_366	2008-04-08	11
-367	val_367	2008-04-08	11
-367	val_367	2008-04-08	11
-368	val_368	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-373	val_373	2008-04-08	11
-374	val_374	2008-04-08	11
-375	val_375	2008-04-08	11
-377	val_377	2008-04-08	11
-378	val_378	2008-04-08	11
-379	val_379	2008-04-08	11
-382	val_382	2008-04-08	11
-382	val_382	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-386	val_386	2008-04-08	11
-389	val_389	2008-04-08	11
-392	val_392	2008-04-08	11
-393	val_393	2008-04-08	11
-394	val_394	2008-04-08	11
-395	val_395	2008-04-08	11
-395	val_395	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-397	val_397	2008-04-08	11
-397	val_397	2008-04-08	11
-399	val_399	2008-04-08	11
-399	val_399	2008-04-08	11
-400	val_400	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-402	val_402	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-404	val_404	2008-04-08	11
-404	val_404	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-407	val_407	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-411	val_411	2008-04-08	11
-413	val_413	2008-04-08	11
-413	val_413	2008-04-08	11
-414	val_414	2008-04-08	11
-414	val_414	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-418	val_418	2008-04-08	11
-419	val_419	2008-04-08	11
-421	val_421	2008-04-08	11
-424	val_424	2008-04-08	11
-424	val_424	2008-04-08	11
-427	val_427	2008-04-08	11
-429	val_429	2008-04-08	11
-429	val_429	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-432	val_432	2008-04-08	11
-435	val_435	2008-04-08	11
-436	val_436	2008-04-08	11
-437	val_437	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-439	val_439	2008-04-08	11
-439	val_439	2008-04-08	11
-443	val_443	2008-04-08	11
-444	val_444	2008-04-08	11
-446	val_446	2008-04-08	11
-448	val_448	2008-04-08	11
-449	val_449	2008-04-08	11
-452	val_452	2008-04-08	11
-453	val_453	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-455	val_455	2008-04-08	11
-457	val_457	2008-04-08	11
-458	val_458	2008-04-08	11
-458	val_458	2008-04-08	11
-459	val_459	2008-04-08	11
-459	val_459	2008-04-08	11
-460	val_460	2008-04-08	11
-462	val_462	2008-04-08	11
-462	val_462	2008-04-08	11
-463	val_463	2008-04-08	11
-463	val_463	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-467	val_467	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-470	val_470	2008-04-08	11
-472	val_472	2008-04-08	11
-475	val_475	2008-04-08	11
-477	val_477	2008-04-08	11
-478	val_478	2008-04-08	11
-478	val_478	2008-04-08	11
-479	val_479	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-481	val_481	2008-04-08	11
-482	val_482	2008-04-08	11
-483	val_483	2008-04-08	11
-484	val_484	2008-04-08	11
-485	val_485	2008-04-08	11
-487	val_487	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-490	val_490	2008-04-08	11
-491	val_491	2008-04-08	11
-492	val_492	2008-04-08	11
-492	val_492	2008-04-08	11
-493	val_493	2008-04-08	11
-494	val_494	2008-04-08	11
-495	val_495	2008-04-08	11
-496	val_496	2008-04-08	11
-497	val_497	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-31-d60297fed03b455c29daa4afb4d1e858 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-31-d60297fed03b455c29daa4afb4d1e858
deleted file mode 100644
index 352ab5a6b96a4..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-31-d60297fed03b455c29daa4afb4d1e858
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:merge_dynamic_part
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1431818582215388621/merge_dynamic_part
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389738939000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-4-41df7b4938cff8b7ebffc3f5c701dccf b/sql/hive/src/test/resources/golden/merge_dynamic_partition-4-41df7b4938cff8b7ebffc3f5c701dccf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-5-8026cdd2390eec2c72a0591ae5668185 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-5-8026cdd2390eec2c72a0591ae5668185
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-6-3b57c0e3fccea5322373f3725c95ec00 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-6-3b57c0e3fccea5322373f3725c95ec00
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-7-777de794b7f27ea63f29a9784663393b b/sql/hive/src/test/resources/golden/merge_dynamic_partition-7-777de794b7f27ea63f29a9784663393b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-8-2f1578dbc029b62daa9d47d8fa473960 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-8-2f1578dbc029b62daa9d47d8fa473960
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-8-2f1578dbc029b62daa9d47d8fa473960
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-9-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-9-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-9-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-0-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-0-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-1-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-1-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-10-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-10-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-10-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-11-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-11-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-11-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-12-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-12-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-12-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-13-2a9cffbef1ebd3df8e4de4eb22777cf9 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-13-2a9cffbef1ebd3df8e4de4eb22777cf9
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-13-2a9cffbef1ebd3df8e4de4eb22777cf9
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-14-3a4c24fd561f459025264baa3fb6d87 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-14-3a4c24fd561f459025264baa3fb6d87
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-14-3a4c24fd561f459025264baa3fb6d87
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-15-dbf4eae8430a97a6e70b1c6222218cd3 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-15-dbf4eae8430a97a6e70b1c6222218cd3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-16-16adcdb0e324ad233769e124b5b349da b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-16-16adcdb0e324ad233769e124b5b349da
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-17-d60297fed03b455c29daa4afb4d1e858 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-17-d60297fed03b455c29daa4afb4d1e858
deleted file mode 100644
index f3812861b3d6d..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-17-d60297fed03b455c29daa4afb4d1e858
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:merge_dynamic_part
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5646492510204438812/merge_dynamic_part
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:3
-totalFileSize:17415
-maxFileSize:5901
-minFileSize:5702
-lastAccessTime:0
-lastUpdateTime:1389740265000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-2-190cefc93e46906e404039de0fd5f513 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-2-190cefc93e46906e404039de0fd5f513
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-3-25401dd2c1c258e06f1b96fefd19e27f b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-3-25401dd2c1c258e06f1b96fefd19e27f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-4-41df7b4938cff8b7ebffc3f5c701dccf b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-4-41df7b4938cff8b7ebffc3f5c701dccf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-5-8026cdd2390eec2c72a0591ae5668185 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-5-8026cdd2390eec2c72a0591ae5668185
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-6-3b57c0e3fccea5322373f3725c95ec00 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-6-3b57c0e3fccea5322373f3725c95ec00
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-7-777de794b7f27ea63f29a9784663393b b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-7-777de794b7f27ea63f29a9784663393b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-8-6f7f59de1fbd607e844a2dc9394a2df8 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-8-6f7f59de1fbd607e844a2dc9394a2df8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-9-982f10fc4e4f6bab9d4ca5f3cecca2ba b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-9-982f10fc4e4f6bab9d4ca5f3cecca2ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-0-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-0-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-1-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-1-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-10-d176a1b243ac7190fbc319d73a164e2d b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-10-d176a1b243ac7190fbc319d73a164e2d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-11-f64c176e6e3b2d9ffa1b9e14491dc4c6 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-11-f64c176e6e3b2d9ffa1b9e14491dc4c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-12-a37d4d5c4eec82d56f91754e5fdb0f9a b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-12-a37d4d5c4eec82d56f91754e5fdb0f9a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-13-c512eee654e7313ff9c6efb35c5b0a88 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-13-c512eee654e7313ff9c6efb35c5b0a88
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-14-ea3e89ffe987e20dffd8388a2dbcc260 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-14-ea3e89ffe987e20dffd8388a2dbcc260
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-15-e525a096de36a3d157db1b4947e1fbb0 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-15-e525a096de36a3d157db1b4947e1fbb0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-16-5621a4ac88bf8cffea061cb7cb9f8d73 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-16-5621a4ac88bf8cffea061cb7cb9f8d73
deleted file mode 100644
index e9c723bbd136e..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-16-5621a4ac88bf8cffea061cb7cb9f8d73
+++ /dev/null
@@ -1,4 +0,0 @@
-ds=2008-04-08/hr=11
-ds=2008-04-08/hr=12
-ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-17-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-17-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-17-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-18-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-18-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-18-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-19-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-19-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-19-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-2-190cefc93e46906e404039de0fd5f513 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-2-190cefc93e46906e404039de0fd5f513
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-20-2a9cffbef1ebd3df8e4de4eb22777cf9 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-20-2a9cffbef1ebd3df8e4de4eb22777cf9
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-20-2a9cffbef1ebd3df8e4de4eb22777cf9
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-21-3a4c24fd561f459025264baa3fb6d87 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-21-3a4c24fd561f459025264baa3fb6d87
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-21-3a4c24fd561f459025264baa3fb6d87
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-22-bf8877c86d4b4cd7da2939cdf517acc5 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-22-bf8877c86d4b4cd7da2939cdf517acc5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-23-3ffba3098571099bc2b13614ae3defc5 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-23-3ffba3098571099bc2b13614ae3defc5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-24-c9f91a11d9be1e981e6ec284572bbb2a b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-24-c9f91a11d9be1e981e6ec284572bbb2a
deleted file mode 100644
index 5e44ab6b5cef9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-24-c9f91a11d9be1e981e6ec284572bbb2a
+++ /dev/null
@@ -1,4 +0,0 @@
-2008-04-08	11	500
-2008-04-08	12	500
-2008-04-09	11	1000
-2008-04-09	12	1000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-25-d60297fed03b455c29daa4afb4d1e858 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-25-d60297fed03b455c29daa4afb4d1e858
deleted file mode 100644
index 8cd9e4d2c201a..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-25-d60297fed03b455c29daa4afb4d1e858
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:merge_dynamic_part
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/merge_dynamic_part
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:6
-totalFileSize:34830
-maxFileSize:5812
-minFileSize:5791
-lastAccessTime:0
-lastUpdateTime:1389739573000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-3-25401dd2c1c258e06f1b96fefd19e27f b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-3-25401dd2c1c258e06f1b96fefd19e27f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-4-41df7b4938cff8b7ebffc3f5c701dccf b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-4-41df7b4938cff8b7ebffc3f5c701dccf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-5-8026cdd2390eec2c72a0591ae5668185 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-5-8026cdd2390eec2c72a0591ae5668185
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-6-3b57c0e3fccea5322373f3725c95ec00 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-6-3b57c0e3fccea5322373f3725c95ec00
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-7-777de794b7f27ea63f29a9784663393b b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-7-777de794b7f27ea63f29a9784663393b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-8-6916eceaa04091d1453a7d0d5257213c b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-8-6916eceaa04091d1453a7d0d5257213c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-9-8d0305d089aa5198601cc39073fff528 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-9-8d0305d089aa5198601cc39073fff528
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-0-7537f6537cfced8ba5bd203e3d1c8a87 b/sql/hive/src/test/resources/golden/mergejoins_mixed-0-7537f6537cfced8ba5bd203e3d1c8a87
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-1-bd83c916b6b06b6e813d7ecc6ada1d03 b/sql/hive/src/test/resources/golden/mergejoins_mixed-1-bd83c916b6b06b6e813d7ecc6ada1d03
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-10-abb02d2cadc535ff51660d83e6609dc8 b/sql/hive/src/test/resources/golden/mergejoins_mixed-10-abb02d2cadc535ff51660d83e6609dc8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-11-725ed77dd110398f461926f190b4b5c8 b/sql/hive/src/test/resources/golden/mergejoins_mixed-11-725ed77dd110398f461926f190b4b5c8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-12-b5d5c30fab0edd7cb5dc535000d20e3b b/sql/hive/src/test/resources/golden/mergejoins_mixed-12-b5d5c30fab0edd7cb5dc535000d20e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-2-de24edb80e51049d241fa0ce2e3165ff b/sql/hive/src/test/resources/golden/mergejoins_mixed-2-de24edb80e51049d241fa0ce2e3165ff
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-3-5d4bbcec2f91b849642725fa843bf4e8 b/sql/hive/src/test/resources/golden/mergejoins_mixed-3-5d4bbcec2f91b849642725fa843bf4e8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-4-24a8048db8d561e28bcb4941498b9687 b/sql/hive/src/test/resources/golden/mergejoins_mixed-4-24a8048db8d561e28bcb4941498b9687
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-5-ab187bec261e23cca73a144d4ffcb4d8 b/sql/hive/src/test/resources/golden/mergejoins_mixed-5-ab187bec261e23cca73a144d4ffcb4d8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-6-e108b1560a601946194cecaf4da12491 b/sql/hive/src/test/resources/golden/mergejoins_mixed-6-e108b1560a601946194cecaf4da12491
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-7-d2068e6569b5d253932ce9d59be5221 b/sql/hive/src/test/resources/golden/mergejoins_mixed-7-d2068e6569b5d253932ce9d59be5221
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-8-bdba45fc8a7bd7af0a8f983160d550b0 b/sql/hive/src/test/resources/golden/mergejoins_mixed-8-bdba45fc8a7bd7af0a8f983160d550b0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-9-b7a719a0596b89bf6c35b3dc5dfbe1e6 b/sql/hive/src/test/resources/golden/mergejoins_mixed-9-b7a719a0596b89bf6c35b3dc5dfbe1e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-0-f0c7b56d5ad529ae6c98875501a9548d b/sql/hive/src/test/resources/golden/metadataonly1-0-f0c7b56d5ad529ae6c98875501a9548d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-1-4d93504b19d34fd360de6af7fb1f3715 b/sql/hive/src/test/resources/golden/metadataonly1-1-4d93504b19d34fd360de6af7fb1f3715
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-10-1c7bb3877b2e1e7bb9166537715d442d b/sql/hive/src/test/resources/golden/metadataonly1-10-1c7bb3877b2e1e7bb9166537715d442d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-11-30df79b460916f3f67ccf1d7b7a076f2 b/sql/hive/src/test/resources/golden/metadataonly1-11-30df79b460916f3f67ccf1d7b7a076f2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-3-7980a98d580a002b7ad7eef780039f67 b/sql/hive/src/test/resources/golden/metadataonly1-3-7980a98d580a002b7ad7eef780039f67
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-4-4d93504b19d34fd360de6af7fb1f3715 b/sql/hive/src/test/resources/golden/metadataonly1-4-4d93504b19d34fd360de6af7fb1f3715
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-6-537256f669bc9101d4834df67aae8cdf b/sql/hive/src/test/resources/golden/metadataonly1-6-537256f669bc9101d4834df67aae8cdf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-8-b140da3b2e7f4bdf101a8bfb0db88999 b/sql/hive/src/test/resources/golden/metadataonly1-8-b140da3b2e7f4bdf101a8bfb0db88999
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mi-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/mi-0-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-0-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/mi-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/mi-1-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-1-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/mi-2-abf8847fb25b96e0f9477808d8378e5e b/sql/hive/src/test/resources/golden/mi-2-abf8847fb25b96e0f9477808d8378e5e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mi-3-b66a495f7bdf106a7886b72267b8659d b/sql/hive/src/test/resources/golden/mi-3-b66a495f7bdf106a7886b72267b8659d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mi-4-ba6a06aef35cbd1e7c5a3c253f757b2c b/sql/hive/src/test/resources/golden/mi-4-ba6a06aef35cbd1e7c5a3c253f757b2c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mi-5-4aad9be12cbe4e7be5540924e6b5e7dc b/sql/hive/src/test/resources/golden/mi-5-4aad9be12cbe4e7be5540924e6b5e7dc
deleted file mode 100644
index 3b011a048ba42..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-5-4aad9be12cbe4e7be5540924e6b5e7dc
+++ /dev/null
@@ -1 +0,0 @@
-ds=2008-04-08/hr=11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/mi-6-b331d7aa963adac3b2e1de803f90e199 b/sql/hive/src/test/resources/golden/mi-6-b331d7aa963adac3b2e1de803f90e199
deleted file mode 100644
index 0cdd3e8594c59..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-6-b331d7aa963adac3b2e1de803f90e199
+++ /dev/null
@@ -1 +0,0 @@
-ds=2008-04-08/hr=12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/mi-7-fca4e06de103c3cbb675fa43e7077800 b/sql/hive/src/test/resources/golden/mi-7-fca4e06de103c3cbb675fa43e7077800
deleted file mode 100644
index f4026a591a958..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-7-fca4e06de103c3cbb675fa43e7077800
+++ /dev/null
@@ -1,500 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/mi-8-e946bdb81b0a831908c1c8196fdff215 b/sql/hive/src/test/resources/golden/mi-8-e946bdb81b0a831908c1c8196fdff215
deleted file mode 100644
index f132aba9f3ee1..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-8-e946bdb81b0a831908c1c8196fdff215
+++ /dev/null
@@ -1,309 +0,0 @@
-0	val_0	2008-04-08	12
-2	val_2	2008-04-08	12
-4	val_4	2008-04-08	12
-5	val_5	2008-04-08	12
-8	val_8	2008-04-08	12
-9	val_9	2008-04-08	12
-10	val_10	2008-04-08	12
-11	val_11	2008-04-08	12
-12	val_12	2008-04-08	12
-15	val_15	2008-04-08	12
-17	val_17	2008-04-08	12
-18	val_18	2008-04-08	12
-19	val_19	2008-04-08	12
-20	val_20	2008-04-08	12
-24	val_24	2008-04-08	12
-26	val_26	2008-04-08	12
-27	val_27	2008-04-08	12
-28	val_28	2008-04-08	12
-30	val_30	2008-04-08	12
-33	val_33	2008-04-08	12
-34	val_34	2008-04-08	12
-35	val_35	2008-04-08	12
-37	val_37	2008-04-08	12
-41	val_41	2008-04-08	12
-42	val_42	2008-04-08	12
-43	val_43	2008-04-08	12
-44	val_44	2008-04-08	12
-47	val_47	2008-04-08	12
-51	val_51	2008-04-08	12
-53	val_53	2008-04-08	12
-54	val_54	2008-04-08	12
-57	val_57	2008-04-08	12
-58	val_58	2008-04-08	12
-64	val_64	2008-04-08	12
-65	val_65	2008-04-08	12
-66	val_66	2008-04-08	12
-67	val_67	2008-04-08	12
-69	val_69	2008-04-08	12
-70	val_70	2008-04-08	12
-72	val_72	2008-04-08	12
-74	val_74	2008-04-08	12
-76	val_76	2008-04-08	12
-77	val_77	2008-04-08	12
-78	val_78	2008-04-08	12
-80	val_80	2008-04-08	12
-82	val_82	2008-04-08	12
-83	val_83	2008-04-08	12
-84	val_84	2008-04-08	12
-85	val_85	2008-04-08	12
-86	val_86	2008-04-08	12
-87	val_87	2008-04-08	12
-90	val_90	2008-04-08	12
-92	val_92	2008-04-08	12
-95	val_95	2008-04-08	12
-96	val_96	2008-04-08	12
-97	val_97	2008-04-08	12
-98	val_98	2008-04-08	12
-100	val_100	2008-04-08	12
-103	val_103	2008-04-08	12
-104	val_104	2008-04-08	12
-105	val_105	2008-04-08	12
-111	val_111	2008-04-08	12
-113	val_113	2008-04-08	12
-114	val_114	2008-04-08	12
-116	val_116	2008-04-08	12
-118	val_118	2008-04-08	12
-119	val_119	2008-04-08	12
-120	val_120	2008-04-08	12
-125	val_125	2008-04-08	12
-126	val_126	2008-04-08	12
-128	val_128	2008-04-08	12
-129	val_129	2008-04-08	12
-131	val_131	2008-04-08	12
-133	val_133	2008-04-08	12
-134	val_134	2008-04-08	12
-136	val_136	2008-04-08	12
-137	val_137	2008-04-08	12
-138	val_138	2008-04-08	12
-143	val_143	2008-04-08	12
-145	val_145	2008-04-08	12
-146	val_146	2008-04-08	12
-149	val_149	2008-04-08	12
-150	val_150	2008-04-08	12
-152	val_152	2008-04-08	12
-153	val_153	2008-04-08	12
-155	val_155	2008-04-08	12
-156	val_156	2008-04-08	12
-157	val_157	2008-04-08	12
-158	val_158	2008-04-08	12
-160	val_160	2008-04-08	12
-162	val_162	2008-04-08	12
-163	val_163	2008-04-08	12
-164	val_164	2008-04-08	12
-165	val_165	2008-04-08	12
-166	val_166	2008-04-08	12
-167	val_167	2008-04-08	12
-168	val_168	2008-04-08	12
-169	val_169	2008-04-08	12
-170	val_170	2008-04-08	12
-172	val_172	2008-04-08	12
-174	val_174	2008-04-08	12
-175	val_175	2008-04-08	12
-176	val_176	2008-04-08	12
-177	val_177	2008-04-08	12
-178	val_178	2008-04-08	12
-179	val_179	2008-04-08	12
-180	val_180	2008-04-08	12
-181	val_181	2008-04-08	12
-183	val_183	2008-04-08	12
-186	val_186	2008-04-08	12
-187	val_187	2008-04-08	12
-189	val_189	2008-04-08	12
-190	val_190	2008-04-08	12
-191	val_191	2008-04-08	12
-192	val_192	2008-04-08	12
-193	val_193	2008-04-08	12
-194	val_194	2008-04-08	12
-195	val_195	2008-04-08	12
-196	val_196	2008-04-08	12
-197	val_197	2008-04-08	12
-199	val_199	2008-04-08	12
-200	val_200	2008-04-08	12
-201	val_201	2008-04-08	12
-202	val_202	2008-04-08	12
-203	val_203	2008-04-08	12
-205	val_205	2008-04-08	12
-207	val_207	2008-04-08	12
-208	val_208	2008-04-08	12
-209	val_209	2008-04-08	12
-213	val_213	2008-04-08	12
-214	val_214	2008-04-08	12
-216	val_216	2008-04-08	12
-217	val_217	2008-04-08	12
-218	val_218	2008-04-08	12
-219	val_219	2008-04-08	12
-221	val_221	2008-04-08	12
-222	val_222	2008-04-08	12
-223	val_223	2008-04-08	12
-224	val_224	2008-04-08	12
-226	val_226	2008-04-08	12
-228	val_228	2008-04-08	12
-229	val_229	2008-04-08	12
-230	val_230	2008-04-08	12
-233	val_233	2008-04-08	12
-235	val_235	2008-04-08	12
-237	val_237	2008-04-08	12
-238	val_238	2008-04-08	12
-239	val_239	2008-04-08	12
-241	val_241	2008-04-08	12
-242	val_242	2008-04-08	12
-244	val_244	2008-04-08	12
-247	val_247	2008-04-08	12
-248	val_248	2008-04-08	12
-249	val_249	2008-04-08	12
-252	val_252	2008-04-08	12
-255	val_255	2008-04-08	12
-256	val_256	2008-04-08	12
-257	val_257	2008-04-08	12
-258	val_258	2008-04-08	12
-260	val_260	2008-04-08	12
-262	val_262	2008-04-08	12
-263	val_263	2008-04-08	12
-265	val_265	2008-04-08	12
-266	val_266	2008-04-08	12
-272	val_272	2008-04-08	12
-273	val_273	2008-04-08	12
-274	val_274	2008-04-08	12
-275	val_275	2008-04-08	12
-277	val_277	2008-04-08	12
-278	val_278	2008-04-08	12
-280	val_280	2008-04-08	12
-281	val_281	2008-04-08	12
-282	val_282	2008-04-08	12
-283	val_283	2008-04-08	12
-284	val_284	2008-04-08	12
-285	val_285	2008-04-08	12
-286	val_286	2008-04-08	12
-287	val_287	2008-04-08	12
-288	val_288	2008-04-08	12
-289	val_289	2008-04-08	12
-291	val_291	2008-04-08	12
-292	val_292	2008-04-08	12
-296	val_296	2008-04-08	12
-298	val_298	2008-04-08	12
-302	val_302	2008-04-08	12
-305	val_305	2008-04-08	12
-306	val_306	2008-04-08	12
-307	val_307	2008-04-08	12
-308	val_308	2008-04-08	12
-309	val_309	2008-04-08	12
-310	val_310	2008-04-08	12
-311	val_311	2008-04-08	12
-315	val_315	2008-04-08	12
-316	val_316	2008-04-08	12
-317	val_317	2008-04-08	12
-318	val_318	2008-04-08	12
-321	val_321	2008-04-08	12
-322	val_322	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-327	val_327	2008-04-08	12
-331	val_331	2008-04-08	12
-332	val_332	2008-04-08	12
-333	val_333	2008-04-08	12
-335	val_335	2008-04-08	12
-336	val_336	2008-04-08	12
-338	val_338	2008-04-08	12
-339	val_339	2008-04-08	12
-341	val_341	2008-04-08	12
-342	val_342	2008-04-08	12
-344	val_344	2008-04-08	12
-345	val_345	2008-04-08	12
-348	val_348	2008-04-08	12
-351	val_351	2008-04-08	12
-353	val_353	2008-04-08	12
-356	val_356	2008-04-08	12
-360	val_360	2008-04-08	12
-362	val_362	2008-04-08	12
-364	val_364	2008-04-08	12
-365	val_365	2008-04-08	12
-366	val_366	2008-04-08	12
-367	val_367	2008-04-08	12
-368	val_368	2008-04-08	12
-369	val_369	2008-04-08	12
-373	val_373	2008-04-08	12
-374	val_374	2008-04-08	12
-375	val_375	2008-04-08	12
-377	val_377	2008-04-08	12
-378	val_378	2008-04-08	12
-379	val_379	2008-04-08	12
-382	val_382	2008-04-08	12
-384	val_384	2008-04-08	12
-386	val_386	2008-04-08	12
-389	val_389	2008-04-08	12
-392	val_392	2008-04-08	12
-393	val_393	2008-04-08	12
-394	val_394	2008-04-08	12
-395	val_395	2008-04-08	12
-396	val_396	2008-04-08	12
-397	val_397	2008-04-08	12
-399	val_399	2008-04-08	12
-400	val_400	2008-04-08	12
-401	val_401	2008-04-08	12
-402	val_402	2008-04-08	12
-403	val_403	2008-04-08	12
-404	val_404	2008-04-08	12
-406	val_406	2008-04-08	12
-407	val_407	2008-04-08	12
-409	val_409	2008-04-08	12
-411	val_411	2008-04-08	12
-413	val_413	2008-04-08	12
-414	val_414	2008-04-08	12
-417	val_417	2008-04-08	12
-418	val_418	2008-04-08	12
-419	val_419	2008-04-08	12
-421	val_421	2008-04-08	12
-424	val_424	2008-04-08	12
-427	val_427	2008-04-08	12
-429	val_429	2008-04-08	12
-430	val_430	2008-04-08	12
-431	val_431	2008-04-08	12
-432	val_432	2008-04-08	12
-435	val_435	2008-04-08	12
-436	val_436	2008-04-08	12
-437	val_437	2008-04-08	12
-438	val_438	2008-04-08	12
-439	val_439	2008-04-08	12
-443	val_443	2008-04-08	12
-444	val_444	2008-04-08	12
-446	val_446	2008-04-08	12
-448	val_448	2008-04-08	12
-449	val_449	2008-04-08	12
-452	val_452	2008-04-08	12
-453	val_453	2008-04-08	12
-454	val_454	2008-04-08	12
-455	val_455	2008-04-08	12
-457	val_457	2008-04-08	12
-458	val_458	2008-04-08	12
-459	val_459	2008-04-08	12
-460	val_460	2008-04-08	12
-462	val_462	2008-04-08	12
-463	val_463	2008-04-08	12
-466	val_466	2008-04-08	12
-467	val_467	2008-04-08	12
-468	val_468	2008-04-08	12
-469	val_469	2008-04-08	12
-470	val_470	2008-04-08	12
-472	val_472	2008-04-08	12
-475	val_475	2008-04-08	12
-477	val_477	2008-04-08	12
-478	val_478	2008-04-08	12
-479	val_479	2008-04-08	12
-480	val_480	2008-04-08	12
-481	val_481	2008-04-08	12
-482	val_482	2008-04-08	12
-483	val_483	2008-04-08	12
-484	val_484	2008-04-08	12
-485	val_485	2008-04-08	12
-487	val_487	2008-04-08	12
-489	val_489	2008-04-08	12
-490	val_490	2008-04-08	12
-491	val_491	2008-04-08	12
-492	val_492	2008-04-08	12
-493	val_493	2008-04-08	12
-494	val_494	2008-04-08	12
-495	val_495	2008-04-08	12
-496	val_496	2008-04-08	12
-497	val_497	2008-04-08	12
-498	val_498	2008-04-08	12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/modulus-0-6afd4a359a478cfa3ebd9ad00ae3868e b/sql/hive/src/test/resources/golden/modulus-0-6afd4a359a478cfa3ebd9ad00ae3868e
new file mode 100644
index 0000000000000..52eab0653c505
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/modulus-0-6afd4a359a478cfa3ebd9ad00ae3868e
@@ -0,0 +1 @@
+1	true	0.5
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-10-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/multiMapJoin1-10-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-10-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-10-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-12-204073e1287b0582d50e652d466f1e66 b/sql/hive/src/test/resources/golden/multiMapJoin1-12-204073e1287b0582d50e652d466f1e66
index 55b22b1aa9f24..4eb9d0d9dbd22 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-12-204073e1287b0582d50e652d466f1e66
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-12-204073e1287b0582d50e652d466f1e66
@@ -1 +1 @@
-580
\ No newline at end of file
+580
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-13-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/multiMapJoin1-13-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-13-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-13-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-14-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/multiMapJoin1-14-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-14-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-14-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-16-c14b300770b329ecb71e0275c88532d3 b/sql/hive/src/test/resources/golden/multiMapJoin1-16-c14b300770b329ecb71e0275c88532d3
index 55b22b1aa9f24..4eb9d0d9dbd22 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-16-c14b300770b329ecb71e0275c88532d3
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-16-c14b300770b329ecb71e0275c88532d3
@@ -1 +1 @@
-580
\ No newline at end of file
+580
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-18-49bc7f430b2591978067ca8f7d181cee b/sql/hive/src/test/resources/golden/multiMapJoin1-18-49bc7f430b2591978067ca8f7d181cee
index fa8ef14301a13..7eb898446847a 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-18-49bc7f430b2591978067ca8f7d181cee
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-18-49bc7f430b2591978067ca8f7d181cee
@@ -3,4 +3,4 @@
 10
 270
 10
-10
\ No newline at end of file
+10
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-22-25e434b6d05e08fdd5f4d9957438917 b/sql/hive/src/test/resources/golden/multiMapJoin1-22-25e434b6d05e08fdd5f4d9957438917
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-22-25e434b6d05e08fdd5f4d9957438917
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-22-25e434b6d05e08fdd5f4d9957438917
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-24-feed626e3216bcbda66b17f48305b5a1 b/sql/hive/src/test/resources/golden/multiMapJoin1-24-feed626e3216bcbda66b17f48305b5a1
index a3a38a80b7910..654cce85277c0 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-24-feed626e3216bcbda66b17f48305b5a1
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-24-feed626e3216bcbda66b17f48305b5a1
@@ -1 +1 @@
-247580	247580	247580	247580	247580	247580	548662743780	548662743780
\ No newline at end of file
+247580	247580	247580	247580	247580	247580	548662743780	548662743780
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-25-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/multiMapJoin1-25-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-25-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-25-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-26-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/multiMapJoin1-26-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-26-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-26-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-28-feed626e3216bcbda66b17f48305b5a1 b/sql/hive/src/test/resources/golden/multiMapJoin1-28-feed626e3216bcbda66b17f48305b5a1
index a3a38a80b7910..654cce85277c0 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-28-feed626e3216bcbda66b17f48305b5a1
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-28-feed626e3216bcbda66b17f48305b5a1
@@ -1 +1 @@
-247580	247580	247580	247580	247580	247580	548662743780	548662743780
\ No newline at end of file
+247580	247580	247580	247580	247580	247580	548662743780	548662743780
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-29-ea23403b9eb55e8b06d1c198e439569f b/sql/hive/src/test/resources/golden/multiMapJoin1-29-ea23403b9eb55e8b06d1c198e439569f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-29-ea23403b9eb55e8b06d1c198e439569f
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-29-ea23403b9eb55e8b06d1c198e439569f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-31-feed626e3216bcbda66b17f48305b5a1 b/sql/hive/src/test/resources/golden/multiMapJoin1-31-feed626e3216bcbda66b17f48305b5a1
index a3a38a80b7910..654cce85277c0 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-31-feed626e3216bcbda66b17f48305b5a1
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-31-feed626e3216bcbda66b17f48305b5a1
@@ -1 +1 @@
-247580	247580	247580	247580	247580	247580	548662743780	548662743780
\ No newline at end of file
+247580	247580	247580	247580	247580	247580	548662743780	548662743780
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-32-e93301ee4ba157b466d7460775f3d350 b/sql/hive/src/test/resources/golden/multiMapJoin1-32-e93301ee4ba157b466d7460775f3d350
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-32-e93301ee4ba157b466d7460775f3d350
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-32-e93301ee4ba157b466d7460775f3d350
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-34-feed626e3216bcbda66b17f48305b5a1 b/sql/hive/src/test/resources/golden/multiMapJoin1-34-feed626e3216bcbda66b17f48305b5a1
index a3a38a80b7910..654cce85277c0 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-34-feed626e3216bcbda66b17f48305b5a1
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-34-feed626e3216bcbda66b17f48305b5a1
@@ -1 +1 @@
-247580	247580	247580	247580	247580	247580	548662743780	548662743780
\ No newline at end of file
+247580	247580	247580	247580	247580	247580	548662743780	548662743780
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/multiMapJoin2-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-1-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/multiMapJoin2-1-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-1-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-1-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-10-1905c7759350b107679aef86226739f8 b/sql/hive/src/test/resources/golden/multiMapJoin2-10-1905c7759350b107679aef86226739f8
index 3c505d9663010..d5bad34b0246e 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-10-1905c7759350b107679aef86226739f8
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-10-1905c7759350b107679aef86226739f8
@@ -50,4 +50,4 @@ NULL
 406
 406
 406
-406
\ No newline at end of file
+406
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-11-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/multiMapJoin2-11-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-11-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-11-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-13-6b984427a771fe650fa875be98722cbe b/sql/hive/src/test/resources/golden/multiMapJoin2-13-6b984427a771fe650fa875be98722cbe
index a877195ef3ab1..d00aeb4be0340 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-13-6b984427a771fe650fa875be98722cbe
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-13-6b984427a771fe650fa875be98722cbe
@@ -12,4 +12,4 @@
 311	1
 369	1
 401	1
-406	1
\ No newline at end of file
+406	1
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-14-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/multiMapJoin2-14-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-14-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-14-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-16-6b984427a771fe650fa875be98722cbe b/sql/hive/src/test/resources/golden/multiMapJoin2-16-6b984427a771fe650fa875be98722cbe
index a877195ef3ab1..d00aeb4be0340 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-16-6b984427a771fe650fa875be98722cbe
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-16-6b984427a771fe650fa875be98722cbe
@@ -12,4 +12,4 @@
 311	1
 369	1
 401	1
-406	1
\ No newline at end of file
+406	1
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-17-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/multiMapJoin2-17-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-17-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-17-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-19-77324702b091d514ca16d029f65d3d56 b/sql/hive/src/test/resources/golden/multiMapJoin2-19-77324702b091d514ca16d029f65d3d56
index a877195ef3ab1..d00aeb4be0340 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-19-77324702b091d514ca16d029f65d3d56
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-19-77324702b091d514ca16d029f65d3d56
@@ -12,4 +12,4 @@
 311	1
 369	1
 401	1
-406	1
\ No newline at end of file
+406	1
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-2-7f7f3daa7ff45944c3d534f9feff3bb6 b/sql/hive/src/test/resources/golden/multiMapJoin2-2-7f7f3daa7ff45944c3d534f9feff3bb6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-2-7f7f3daa7ff45944c3d534f9feff3bb6
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-2-7f7f3daa7ff45944c3d534f9feff3bb6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-20-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/multiMapJoin2-20-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-20-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-20-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-22-77324702b091d514ca16d029f65d3d56 b/sql/hive/src/test/resources/golden/multiMapJoin2-22-77324702b091d514ca16d029f65d3d56
index a877195ef3ab1..d00aeb4be0340 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-22-77324702b091d514ca16d029f65d3d56
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-22-77324702b091d514ca16d029f65d3d56
@@ -12,4 +12,4 @@
 311	1
 369	1
 401	1
-406	1
\ No newline at end of file
+406	1
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-27-d28d0f671f5d913a56d75812d24cca8e b/sql/hive/src/test/resources/golden/multiMapJoin2-27-d28d0f671f5d913a56d75812d24cca8e
index 25bf17fc5aaab..3c032078a4a21 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-27-d28d0f671f5d913a56d75812d24cca8e
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-27-d28d0f671f5d913a56d75812d24cca8e
@@ -1 +1 @@
-18
\ No newline at end of file
+18
diff --git a/sql/hive/src/test/resources/golden/groupby_bigdata-1-1100bb0c115c024998d35888ae5bbd71 b/sql/hive/src/test/resources/golden/multiMapJoin2-28-c14792ccac2ca64e3e9e21af4fd12d2c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_bigdata-1-1100bb0c115c024998d35888ae5bbd71
rename to sql/hive/src/test/resources/golden/multiMapJoin2-28-c14792ccac2ca64e3e9e21af4fd12d2c
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-0-b31bf66c43bb9f7ddc09b138b7bf36e0 b/sql/hive/src/test/resources/golden/multiMapJoin2-29-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-0-b31bf66c43bb9f7ddc09b138b7bf36e0
rename to sql/hive/src/test/resources/golden/multiMapJoin2-29-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-30-d9ba55c20c3f5df262e81cbf5dab5387 b/sql/hive/src/test/resources/golden/multiMapJoin2-30-6d1c7f7014fc709148b0f401c5f23cb3
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-30-d9ba55c20c3f5df262e81cbf5dab5387
rename to sql/hive/src/test/resources/golden/multiMapJoin2-30-6d1c7f7014fc709148b0f401c5f23cb3
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-31-2e9c4d5e2bd709e96f311488ada116b0 b/sql/hive/src/test/resources/golden/multiMapJoin2-31-2e9c4d5e2bd709e96f311488ada116b0
new file mode 100644
index 0000000000000..d07c26f55176d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-31-2e9c4d5e2bd709e96f311488ada116b0
@@ -0,0 +1,1000 @@
+0
+0
+0
+2
+4
+5
+5
+5
+8
+9
+10
+11
+12
+12
+15
+15
+17
+18
+18
+19
+20
+24
+24
+26
+26
+27
+28
+30
+33
+34
+35
+35
+35
+37
+37
+41
+42
+42
+43
+44
+47
+51
+51
+53
+54
+57
+58
+58
+64
+65
+66
+67
+67
+69
+70
+70
+70
+72
+72
+74
+76
+76
+77
+78
+80
+82
+83
+83
+84
+84
+85
+86
+87
+90
+90
+90
+92
+95
+95
+96
+97
+97
+98
+98
+100
+100
+103
+103
+104
+104
+105
+111
+113
+113
+114
+116
+118
+118
+119
+119
+119
+120
+120
+125
+125
+126
+128
+128
+128
+129
+129
+131
+133
+134
+134
+136
+137
+137
+138
+138
+138
+138
+143
+145
+146
+146
+149
+149
+150
+152
+152
+153
+155
+156
+157
+158
+160
+162
+163
+164
+164
+165
+165
+166
+167
+167
+167
+168
+169
+169
+169
+169
+170
+172
+172
+174
+174
+175
+175
+176
+176
+177
+178
+179
+179
+180
+181
+183
+186
+187
+187
+187
+189
+190
+191
+191
+192
+193
+193
+193
+194
+195
+195
+196
+197
+197
+199
+199
+199
+200
+200
+201
+202
+203
+203
+205
+205
+207
+207
+208
+208
+208
+209
+209
+213
+213
+214
+216
+216
+217
+217
+218
+219
+219
+221
+221
+222
+223
+223
+224
+224
+226
+228
+229
+229
+230
+230
+230
+230
+230
+233
+233
+235
+237
+237
+238
+238
+239
+239
+241
+242
+242
+244
+247
+248
+249
+252
+255
+255
+256
+256
+257
+258
+260
+262
+263
+265
+265
+266
+272
+272
+273
+273
+273
+274
+275
+277
+277
+277
+277
+278
+278
+280
+280
+281
+281
+282
+282
+283
+284
+285
+286
+287
+288
+288
+289
+291
+292
+296
+298
+298
+298
+302
+305
+306
+307
+307
+308
+309
+309
+310
+311
+311
+311
+315
+316
+316
+316
+317
+317
+318
+318
+318
+321
+321
+322
+322
+323
+325
+325
+327
+327
+327
+331
+331
+332
+333
+333
+335
+336
+338
+339
+341
+342
+342
+344
+344
+345
+348
+348
+348
+348
+348
+351
+353
+353
+356
+360
+362
+364
+365
+366
+367
+367
+368
+369
+369
+369
+373
+374
+375
+377
+378
+379
+382
+382
+384
+384
+384
+386
+389
+392
+393
+394
+395
+395
+396
+396
+396
+397
+397
+399
+399
+400
+401
+401
+401
+401
+401
+402
+403
+403
+403
+404
+404
+406
+406
+406
+406
+407
+409
+409
+409
+411
+413
+413
+414
+414
+417
+417
+417
+418
+419
+421
+424
+424
+427
+429
+429
+430
+430
+430
+431
+431
+431
+432
+435
+436
+437
+438
+438
+438
+439
+439
+443
+444
+446
+448
+449
+452
+453
+454
+454
+454
+455
+457
+458
+458
+459
+459
+460
+462
+462
+463
+463
+466
+466
+466
+467
+468
+468
+468
+468
+469
+469
+469
+469
+469
+470
+472
+475
+477
+478
+478
+479
+480
+480
+480
+481
+482
+483
+484
+485
+487
+489
+489
+489
+489
+490
+491
+492
+492
+493
+494
+495
+496
+497
+498
+498
+498
+0
+0
+0
+2
+4
+5
+5
+5
+8
+9
+10
+11
+12
+12
+15
+15
+17
+18
+18
+19
+20
+24
+24
+26
+26
+27
+28
+30
+33
+34
+35
+35
+35
+37
+37
+41
+42
+42
+43
+44
+47
+51
+51
+53
+54
+57
+58
+58
+64
+65
+66
+67
+67
+69
+70
+70
+70
+72
+72
+74
+76
+76
+77
+78
+80
+82
+83
+83
+84
+84
+85
+86
+87
+90
+90
+90
+92
+95
+95
+96
+97
+97
+98
+98
+100
+100
+103
+103
+104
+104
+105
+111
+113
+113
+114
+116
+118
+118
+119
+119
+119
+120
+120
+125
+125
+126
+128
+128
+128
+129
+129
+131
+133
+134
+134
+136
+137
+137
+138
+138
+138
+138
+143
+145
+146
+146
+149
+149
+150
+152
+152
+153
+155
+156
+157
+158
+160
+162
+163
+164
+164
+165
+165
+166
+167
+167
+167
+168
+169
+169
+169
+169
+170
+172
+172
+174
+174
+175
+175
+176
+176
+177
+178
+179
+179
+180
+181
+183
+186
+187
+187
+187
+189
+190
+191
+191
+192
+193
+193
+193
+194
+195
+195
+196
+197
+197
+199
+199
+199
+200
+200
+201
+202
+203
+203
+205
+205
+207
+207
+208
+208
+208
+209
+209
+213
+213
+214
+216
+216
+217
+217
+218
+219
+219
+221
+221
+222
+223
+223
+224
+224
+226
+228
+229
+229
+230
+230
+230
+230
+230
+233
+233
+235
+237
+237
+238
+238
+239
+239
+241
+242
+242
+244
+247
+248
+249
+252
+255
+255
+256
+256
+257
+258
+260
+262
+263
+265
+265
+266
+272
+272
+273
+273
+273
+274
+275
+277
+277
+277
+277
+278
+278
+280
+280
+281
+281
+282
+282
+283
+284
+285
+286
+287
+288
+288
+289
+291
+292
+296
+298
+298
+298
+302
+305
+306
+307
+307
+308
+309
+309
+310
+311
+311
+311
+315
+316
+316
+316
+317
+317
+318
+318
+318
+321
+321
+322
+322
+323
+325
+325
+327
+327
+327
+331
+331
+332
+333
+333
+335
+336
+338
+339
+341
+342
+342
+344
+344
+345
+348
+348
+348
+348
+348
+351
+353
+353
+356
+360
+362
+364
+365
+366
+367
+367
+368
+369
+369
+369
+373
+374
+375
+377
+378
+379
+382
+382
+384
+384
+384
+386
+389
+392
+393
+394
+395
+395
+396
+396
+396
+397
+397
+399
+399
+400
+401
+401
+401
+401
+401
+402
+403
+403
+403
+404
+404
+406
+406
+406
+406
+407
+409
+409
+409
+411
+413
+413
+414
+414
+417
+417
+417
+418
+419
+421
+424
+424
+427
+429
+429
+430
+430
+430
+431
+431
+431
+432
+435
+436
+437
+438
+438
+438
+439
+439
+443
+444
+446
+448
+449
+452
+453
+454
+454
+454
+455
+457
+458
+458
+459
+459
+460
+462
+462
+463
+463
+466
+466
+466
+467
+468
+468
+468
+468
+469
+469
+469
+469
+469
+470
+472
+475
+477
+478
+478
+479
+480
+480
+480
+481
+482
+483
+484
+485
+487
+489
+489
+489
+489
+490
+491
+492
+492
+493
+494
+495
+496
+497
+498
+498
+498
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-4-5ede8243cc4ba2fbd24a77578502a656 b/sql/hive/src/test/resources/golden/multiMapJoin2-4-5ede8243cc4ba2fbd24a77578502a656
index 8984af9eb83dd..be996088edb51 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-4-5ede8243cc4ba2fbd24a77578502a656
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-4-5ede8243cc4ba2fbd24a77578502a656
@@ -71,4 +71,4 @@
 406
 406
 406
-406
\ No newline at end of file
+406
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-5-21f96f651fb4f28ae5e2999fc1f82192 b/sql/hive/src/test/resources/golden/multiMapJoin2-5-21f96f651fb4f28ae5e2999fc1f82192
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-5-21f96f651fb4f28ae5e2999fc1f82192
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-5-21f96f651fb4f28ae5e2999fc1f82192
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-7-5ede8243cc4ba2fbd24a77578502a656 b/sql/hive/src/test/resources/golden/multiMapJoin2-7-5ede8243cc4ba2fbd24a77578502a656
index 8984af9eb83dd..be996088edb51 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-7-5ede8243cc4ba2fbd24a77578502a656
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-7-5ede8243cc4ba2fbd24a77578502a656
@@ -71,4 +71,4 @@
 406
 406
 406
-406
\ No newline at end of file
+406
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-8-7f7f3daa7ff45944c3d534f9feff3bb6 b/sql/hive/src/test/resources/golden/multiMapJoin2-8-7f7f3daa7ff45944c3d534f9feff3bb6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-8-7f7f3daa7ff45944c3d534f9feff3bb6
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-8-7f7f3daa7ff45944c3d534f9feff3bb6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby-4-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/multi_insert_gby-4-521e0c1054cfa35116c02245874a4e69
index 0e3cc2ad6a4e6..891c047cfaa8a 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby-4-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby-4-521e0c1054cfa35116c02245874a4e69
@@ -34,4 +34,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby-8-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/multi_insert_gby-8-521e0c1054cfa35116c02245874a4e69
index 0e3cc2ad6a4e6..891c047cfaa8a 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby-8-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby-8-521e0c1054cfa35116c02245874a4e69
@@ -34,4 +34,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby-9-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/multi_insert_gby-9-a9a491ed99b4629ee15bce994d5c6c63
index df07a9da29f01..93e965c771403 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby-9-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby-9-a9a491ed99b4629ee15bce994d5c6c63
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-0-b3ee4be40513342084411c5333416d69 b/sql/hive/src/test/resources/golden/multi_insert_gby2-0-b3ee4be40513342084411c5333416d69
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-1-2fd65cd047d2295eadfc4935cf67ddf7 b/sql/hive/src/test/resources/golden/multi_insert_gby2-1-2fd65cd047d2295eadfc4935cf67ddf7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-2-fc8c8df72e2136557a4cad9a78e921d2 b/sql/hive/src/test/resources/golden/multi_insert_gby2-2-fc8c8df72e2136557a4cad9a78e921d2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-3-538a02b95c066b307652c8d503470c8e b/sql/hive/src/test/resources/golden/multi_insert_gby2-3-538a02b95c066b307652c8d503470c8e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-4-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/multi_insert_gby2-4-521e0c1054cfa35116c02245874a4e69
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/multi_insert_gby2-4-521e0c1054cfa35116c02245874a4e69
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-5-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/multi_insert_gby2-5-a9a491ed99b4629ee15bce994d5c6c63
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/multi_insert_gby2-5-a9a491ed99b4629ee15bce994d5c6c63
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby3-10-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/multi_insert_gby3-10-a9a491ed99b4629ee15bce994d5c6c63
index e8804e62d0aaf..3c5c916a9fbcc 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby3-10-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby3-10-a9a491ed99b4629ee15bce994d5c6c63
@@ -306,4 +306,4 @@
 495	495.0	val_495
 496	496.0	val_496
 497	497.0	val_497
-498	1494.0	val_498
\ No newline at end of file
+498	1494.0	val_498
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby3-6-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/multi_insert_gby3-6-521e0c1054cfa35116c02245874a4e69
index fc0b77c7a3628..4494c27e47f59 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby3-6-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby3-6-521e0c1054cfa35116c02245874a4e69
@@ -306,4 +306,4 @@
 495	1.0
 496	1.0
 497	1.0
-498	1.0
\ No newline at end of file
+498	1.0
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby3-7-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/multi_insert_gby3-7-a9a491ed99b4629ee15bce994d5c6c63
index e8804e62d0aaf..3c5c916a9fbcc 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby3-7-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby3-7-a9a491ed99b4629ee15bce994d5c6c63
@@ -306,4 +306,4 @@
 495	495.0	val_495
 496	496.0	val_496
 497	497.0	val_497
-498	1494.0	val_498
\ No newline at end of file
+498	1494.0	val_498
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby3-9-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/multi_insert_gby3-9-521e0c1054cfa35116c02245874a4e69
index fc0b77c7a3628..4494c27e47f59 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby3-9-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby3-9-521e0c1054cfa35116c02245874a4e69
@@ -306,4 +306,4 @@
 495	1.0
 496	1.0
 497	1.0
-498	1.0
\ No newline at end of file
+498	1.0
diff --git a/sql/hive/src/test/resources/golden/multi_join_union-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/multi_join_union-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multi_join_union-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/multi_join_union-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nested_complex-0-6a7c4841dab05ebae84309c9571bec6 b/sql/hive/src/test/resources/golden/nested_complex-0-6a7c4841dab05ebae84309c9571bec6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nested_complex-1-abac744dee1a4f4152781b1565fe1364 b/sql/hive/src/test/resources/golden/nested_complex-1-abac744dee1a4f4152781b1565fe1364
deleted file mode 100644
index c10c17978a0d8..0000000000000
--- a/sql/hive/src/test/resources/golden/nested_complex-1-abac744dee1a4f4152781b1565fe1364
+++ /dev/null
@@ -1,5 +0,0 @@
-simple_int          	int                 	None                
-max_nested_array    	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<int>>>>>>>>>>>>>>>>>>>>>>>	None                
-max_nested_map      	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<map<string,string>>>>>>>>>>>>>>>>>>>>>>	None                
-max_nested_struct   	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<struct<s:string,i:bigint>>>>>>>>>>>>>>>>>>>>>>>	None                
-simple_string       	string              	None                
diff --git a/sql/hive/src/test/resources/golden/nested_complex-2-f07e9814229ed440bd0ac7e4bd924ce9 b/sql/hive/src/test/resources/golden/nested_complex-2-f07e9814229ed440bd0ac7e4bd924ce9
deleted file mode 100644
index 8ce70d8c71782..0000000000000
--- a/sql/hive/src/test/resources/golden/nested_complex-2-f07e9814229ed440bd0ac7e4bd924ce9
+++ /dev/null
@@ -1,8 +0,0 @@
-simple_int          	int                 	None                
-max_nested_array    	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<int>>>>>>>>>>>>>>>>>>>>>>>	None                
-max_nested_map      	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<map<string,string>>>>>>>>>>>>>>>>>>>>>>	None                
-max_nested_struct   	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<struct<s:string,i:bigint>>>>>>>>>>>>>>>>>>>>>>>	None                
-simple_string       	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:nestedcomplex, dbName:default, owner:marmbrus, createTime:1391226936, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:simple_int, type:int, comment:null), FieldSchema(name:max_nested_array, type:array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<int>>>>>>>>>>>>>>>>>>>>>>>, comment:null), FieldSchema(name:max_nested_map, type:array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<map<string,string>>>>>>>>>>>>>>>>>>>>>>, comment:null), FieldSchema(name:max_nested_struct, type:array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<struct<s:string,i:bigint>>>>>>>>>>>>>>>>>>>>>>>, comment:null), FieldSchema(name:simple_string, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/nestedcomplex, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1, line.delim=	 
-, hive.serialization.extend.nesting.levels=true}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1391226936}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)		 
diff --git a/sql/hive/src/test/resources/golden/nested_complex-3-fb014bc1e9bfd919a3703cf5afb77448 b/sql/hive/src/test/resources/golden/nested_complex-3-fb014bc1e9bfd919a3703cf5afb77448
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nested_complex-4-8fd07a5dec8db8e1618ab5b5f9c05c7d b/sql/hive/src/test/resources/golden/nested_complex-4-8fd07a5dec8db8e1618ab5b5f9c05c7d
deleted file mode 100644
index 228853cffe527..0000000000000
--- a/sql/hive/src/test/resources/golden/nested_complex-4-8fd07a5dec8db8e1618ab5b5f9c05c7d
+++ /dev/null
@@ -1,2 +0,0 @@
-2	[[[[[[[[[[[[[[[[[[[[[[[0,3,2]]]]]]]]]]]]]]]]]]]]]]]	[[[[[[[[[[[[[[[[[[[[[{"k1":"v1","k3":"v3"}]]]]]]]]]]]]]]]]]]]]]	[[[[[[[[[[[[[[[[[[[[[[{"s":"b","i":10}]]]]]]]]]]]]]]]]]]]]]]	2
-3	[[[[[[[[[[[[[[[[[[[[[[[0,1,2]]]]]]]]]]]]]]]]]]]]]]]	[[[[[[[[[[[[[[[[[[[[[{"k1":"v1","k2":"v2"}]]]]]]]]]]]]]]]]]]]]]	[[[[[[[[[[[[[[[[[[[[[[{"s":"a","i":10}]]]]]]]]]]]]]]]]]]]]]]	2
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-0-e9a72fa546e90b8cbbd2e9470450cb56 b/sql/hive/src/test/resources/golden/nestedvirtual-0-e9a72fa546e90b8cbbd2e9470450cb56
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-1-1ea9df2d7af3c79ebef07d6087c8106f b/sql/hive/src/test/resources/golden/nestedvirtual-1-1ea9df2d7af3c79ebef07d6087c8106f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-10-89696914fad2d7b7bfc5b7729a7e7c34 b/sql/hive/src/test/resources/golden/nestedvirtual-10-89696914fad2d7b7bfc5b7729a7e7c34
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-11-a299c8b1a9f8c2772989a5454574f4e5 b/sql/hive/src/test/resources/golden/nestedvirtual-11-a299c8b1a9f8c2772989a5454574f4e5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-12-e9a72fa546e90b8cbbd2e9470450cb56 b/sql/hive/src/test/resources/golden/nestedvirtual-12-e9a72fa546e90b8cbbd2e9470450cb56
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-13-1ea9df2d7af3c79ebef07d6087c8106f b/sql/hive/src/test/resources/golden/nestedvirtual-13-1ea9df2d7af3c79ebef07d6087c8106f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-14-67b834deba21676e02c155b25195a019 b/sql/hive/src/test/resources/golden/nestedvirtual-14-67b834deba21676e02c155b25195a019
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-15-52f90e6bf3f2c17b82ed34318d2c612f b/sql/hive/src/test/resources/golden/nestedvirtual-15-52f90e6bf3f2c17b82ed34318d2c612f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-16-89696914fad2d7b7bfc5b7729a7e7c34 b/sql/hive/src/test/resources/golden/nestedvirtual-16-89696914fad2d7b7bfc5b7729a7e7c34
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-17-a299c8b1a9f8c2772989a5454574f4e5 b/sql/hive/src/test/resources/golden/nestedvirtual-17-a299c8b1a9f8c2772989a5454574f4e5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-2-67b834deba21676e02c155b25195a019 b/sql/hive/src/test/resources/golden/nestedvirtual-2-67b834deba21676e02c155b25195a019
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-3-52f90e6bf3f2c17b82ed34318d2c612f b/sql/hive/src/test/resources/golden/nestedvirtual-3-52f90e6bf3f2c17b82ed34318d2c612f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-4-89696914fad2d7b7bfc5b7729a7e7c34 b/sql/hive/src/test/resources/golden/nestedvirtual-4-89696914fad2d7b7bfc5b7729a7e7c34
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-5-a299c8b1a9f8c2772989a5454574f4e5 b/sql/hive/src/test/resources/golden/nestedvirtual-5-a299c8b1a9f8c2772989a5454574f4e5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-6-e9a72fa546e90b8cbbd2e9470450cb56 b/sql/hive/src/test/resources/golden/nestedvirtual-6-e9a72fa546e90b8cbbd2e9470450cb56
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-7-1ea9df2d7af3c79ebef07d6087c8106f b/sql/hive/src/test/resources/golden/nestedvirtual-7-1ea9df2d7af3c79ebef07d6087c8106f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-8-67b834deba21676e02c155b25195a019 b/sql/hive/src/test/resources/golden/nestedvirtual-8-67b834deba21676e02c155b25195a019
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-9-52f90e6bf3f2c17b82ed34318d2c612f b/sql/hive/src/test/resources/golden/nestedvirtual-9-52f90e6bf3f2c17b82ed34318d2c612f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/newline-2-4eb54a664e549614d56ca088c8867d b/sql/hive/src/test/resources/golden/newline-2-4eb54a664e549614d56ca088c8867d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/noalias_subq1-1-f91e87acd51f7477d96620b5f9deece6 b/sql/hive/src/test/resources/golden/noalias_subq1-1-f91e87acd51f7477d96620b5f9deece6
index cdcd12dca3ca2..9c4a8fd131407 100644
--- a/sql/hive/src/test/resources/golden/noalias_subq1-1-f91e87acd51f7477d96620b5f9deece6
+++ b/sql/hive/src/test/resources/golden/noalias_subq1-1-f91e87acd51f7477d96620b5f9deece6
@@ -81,4 +81,4 @@ val_84
 val_28
 val_37
 val_90
-val_97
\ No newline at end of file
+val_97
diff --git a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-1-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-1-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-1-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-1-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-3-a873d2d6991308b21ecdc46ac777c716 b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-3-a873d2d6991308b21ecdc46ac777c716
index a0cfcf2621f2e..34e8c1127f016 100644
--- a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-3-a873d2d6991308b21ecdc46ac777c716
+++ b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-3-a873d2d6991308b21ecdc46ac777c716
@@ -22,4 +22,4 @@ NULL	val_484	25
 311	val_311	25
 369		25
 401	val_401	25
-406	val_406	25
\ No newline at end of file
+406	val_406	25
diff --git a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-4-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-4-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-4-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-4-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-6-a873d2d6991308b21ecdc46ac777c716 b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-6-a873d2d6991308b21ecdc46ac777c716
index a0cfcf2621f2e..34e8c1127f016 100644
--- a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-6-a873d2d6991308b21ecdc46ac777c716
+++ b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-6-a873d2d6991308b21ecdc46ac777c716
@@ -22,4 +22,4 @@ NULL	val_484	25
 311	val_311	25
 369		25
 401	val_401	25
-406	val_406	25
\ No newline at end of file
+406	val_406	25
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_input37-0-6ed1b2ff177492c003161ee91e982c10 b/sql/hive/src/test/resources/golden/nonreserved_keywords_input37-0-6ed1b2ff177492c003161ee91e982c10
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_input37-1-e3ab2563222fb8678c7c269e09e1e8d8 b/sql/hive/src/test/resources/golden/nonreserved_keywords_input37-1-e3ab2563222fb8678c7c269e09e1e8d8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-0-d3afbe52e3a1eb246a79c1320f82c480 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-0-d3afbe52e3a1eb246a79c1320f82c480
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-1-5c7fc72601c9add7bc86df7e4c24af63 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-1-5c7fc72601c9add7bc86df7e4c24af63
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-10-3c8f9dff0a12ca2b929d04b4873a4681 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-10-3c8f9dff0a12ca2b929d04b4873a4681
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-11-6f59e35684a552a855e4dc3aee667092 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-11-6f59e35684a552a855e4dc3aee667092
deleted file mode 100644
index 24e19ec6caa8f..0000000000000
--- a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-11-6f59e35684a552a855e4dc3aee667092
+++ /dev/null
@@ -1 +0,0 @@
--826625916
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-12-d3afbe52e3a1eb246a79c1320f82c480 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-12-d3afbe52e3a1eb246a79c1320f82c480
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-2-ef259cb012c8547e19dd4a75ac4f8ef5 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-2-ef259cb012c8547e19dd4a75ac4f8ef5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-3-2983d09b973ea94bc701970a17fc3687 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-3-2983d09b973ea94bc701970a17fc3687
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-4-a2f6c6c77e94833197095dc48643f9c9 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-4-a2f6c6c77e94833197095dc48643f9c9
deleted file mode 100644
index 05b0c43f926bd..0000000000000
--- a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-4-a2f6c6c77e94833197095dc48643f9c9
+++ /dev/null
@@ -1 +0,0 @@
-10226524244
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-5-ef259cb012c8547e19dd4a75ac4f8ef5 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-5-ef259cb012c8547e19dd4a75ac4f8ef5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-6-2983d09b973ea94bc701970a17fc3687 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-6-2983d09b973ea94bc701970a17fc3687
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-7-fe194a16b48b763e6efdf6fcc6116296 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-7-fe194a16b48b763e6efdf6fcc6116296
deleted file mode 100644
index bc15b9ca54985..0000000000000
--- a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-7-fe194a16b48b763e6efdf6fcc6116296
+++ /dev/null
@@ -1 +0,0 @@
-20453048488
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-8-ea9a965c3d3c8fb9271d8f7c5eee19ad b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-8-ea9a965c3d3c8fb9271d8f7c5eee19ad
deleted file mode 100644
index 08839f6bb296e..0000000000000
--- a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-8-ea9a965c3d3c8fb9271d8f7c5eee19ad
+++ /dev/null
@@ -1 +0,0 @@
-200
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-9-461110270cec5b6d012b2c685cf2cce9 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-9-461110270cec5b6d012b2c685cf2cce9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/notable_alias1-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/notable_alias1-3-adc1ec67836b26b60d8547c4996bfd8f
index fce122031e88f..138522d542895 100644
--- a/sql/hive/src/test/resources/golden/notable_alias1-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/notable_alias1-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -54,4 +54,4 @@
 1234	95	2.0
 1234	96	1.0
 1234	97	2.0
-1234	98	2.0
\ No newline at end of file
+1234	98	2.0
diff --git a/sql/hive/src/test/resources/golden/notable_alias2-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/notable_alias2-3-adc1ec67836b26b60d8547c4996bfd8f
index fce122031e88f..138522d542895 100644
--- a/sql/hive/src/test/resources/golden/notable_alias2-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/notable_alias2-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -54,4 +54,4 @@
 1234	95	2.0
 1234	96	1.0
 1234	97	2.0
-1234	98	2.0
\ No newline at end of file
+1234	98	2.0
diff --git a/sql/hive/src/test/resources/golden/newline-0-43392a20a8d249a279d50d96578e6a1b b/sql/hive/src/test/resources/golden/null case-0-581cdfe70091e546414b202da2cebdcb
similarity index 100%
rename from sql/hive/src/test/resources/golden/newline-0-43392a20a8d249a279d50d96578e6a1b
rename to sql/hive/src/test/resources/golden/null case-0-581cdfe70091e546414b202da2cebdcb
diff --git a/sql/hive/src/test/resources/golden/null_cast-0-48a80d4fdc8009234af31ebcb6e03364 b/sql/hive/src/test/resources/golden/null_cast-0-48a80d4fdc8009234af31ebcb6e03364
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/null_cast-1-7257e6f8170e545962d27741353f672c b/sql/hive/src/test/resources/golden/null_cast-1-7257e6f8170e545962d27741353f672c
deleted file mode 100644
index d111428eaabb0..0000000000000
--- a/sql/hive/src/test/resources/golden/null_cast-1-7257e6f8170e545962d27741353f672c
+++ /dev/null
@@ -1 +0,0 @@
-[null,0]	[null,[]]	[null,{}]	[null,{"col1":0}]
diff --git a/sql/hive/src/test/resources/golden/nullgroup-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/nullgroup-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/nullgroup-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/nullgroup-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/nullgroup-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-11-54a5fd76cdeff565c8c7724695aca302 b/sql/hive/src/test/resources/golden/nullgroup-11-54a5fd76cdeff565c8c7724695aca302
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-11-54a5fd76cdeff565c8c7724695aca302
+++ b/sql/hive/src/test/resources/golden/nullgroup-11-54a5fd76cdeff565c8c7724695aca302
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-12-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/nullgroup-12-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-12-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/nullgroup-12-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-13-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/nullgroup-13-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-13-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/nullgroup-13-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-15-54a5fd76cdeff565c8c7724695aca302 b/sql/hive/src/test/resources/golden/nullgroup-15-54a5fd76cdeff565c8c7724695aca302
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-15-54a5fd76cdeff565c8c7724695aca302
+++ b/sql/hive/src/test/resources/golden/nullgroup-15-54a5fd76cdeff565c8c7724695aca302
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-3-54a5fd76cdeff565c8c7724695aca302 b/sql/hive/src/test/resources/golden/nullgroup-3-54a5fd76cdeff565c8c7724695aca302
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-3-54a5fd76cdeff565c8c7724695aca302
+++ b/sql/hive/src/test/resources/golden/nullgroup-3-54a5fd76cdeff565c8c7724695aca302
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-4-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/nullgroup-4-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-4-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/nullgroup-4-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-5-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/nullgroup-5-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-5-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/nullgroup-5-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-7-54a5fd76cdeff565c8c7724695aca302 b/sql/hive/src/test/resources/golden/nullgroup-7-54a5fd76cdeff565c8c7724695aca302
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-7-54a5fd76cdeff565c8c7724695aca302
+++ b/sql/hive/src/test/resources/golden/nullgroup-7-54a5fd76cdeff565c8c7724695aca302
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-8-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/nullgroup-8-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-8-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/nullgroup-8-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-9-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/nullgroup-9-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-9-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/nullgroup-9-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/nullgroup2-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/nullgroup2-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/nullgroup2-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/nullgroup2-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-12-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/nullgroup2-12-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-12-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/nullgroup2-12-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-13-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/nullgroup2-13-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-13-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/nullgroup2-13-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-4-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/nullgroup2-4-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-4-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/nullgroup2-4-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-5-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/nullgroup2-5-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-5-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/nullgroup2-5-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-8-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/nullgroup2-8-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-8-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/nullgroup2-8-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-9-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/nullgroup2-9-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-9-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/nullgroup2-9-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-32-c88ee457dee7bb141a198a2ae39d787f b/sql/hive/src/test/resources/golden/nullgroup3-1-61ead7f73d525e0d9e21beba91a3d39e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-32-c88ee457dee7bb141a198a2ae39d787f
rename to sql/hive/src/test/resources/golden/nullgroup3-1-61ead7f73d525e0d9e21beba91a3d39e
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-1-a4d6d87d12f0e353d7a3ae5c087dab44 b/sql/hive/src/test/resources/golden/nullgroup3-1-a4d6d87d12f0e353d7a3ae5c087dab44
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-33-c04167e880fe3e942ce77e75d660f1ef b/sql/hive/src/test/resources/golden/nullgroup3-12-61ead7f73d525e0d9e21beba91a3d39e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-33-c04167e880fe3e942ce77e75d660f1ef
rename to sql/hive/src/test/resources/golden/nullgroup3-12-61ead7f73d525e0d9e21beba91a3d39e
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-12-a4d6d87d12f0e353d7a3ae5c087dab44 b/sql/hive/src/test/resources/golden/nullgroup3-12-a4d6d87d12f0e353d7a3ae5c087dab44
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-35-deb3f1793f51d1edf011a8405abf4968 b/sql/hive/src/test/resources/golden/nullgroup3-13-a5e12cfbc1799ce9fa9628d81b8c0b06
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-35-deb3f1793f51d1edf011a8405abf4968
rename to sql/hive/src/test/resources/golden/nullgroup3-13-a5e12cfbc1799ce9fa9628d81b8c0b06
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-13-f529994bef750d8edd02ede8a4323afb b/sql/hive/src/test/resources/golden/nullgroup3-13-f529994bef750d8edd02ede8a4323afb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-15-cd90e16da918bac569e9f04faaedd280 b/sql/hive/src/test/resources/golden/nullgroup3-15-cd90e16da918bac569e9f04faaedd280
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/nullgroup3-15-cd90e16da918bac569e9f04faaedd280
+++ b/sql/hive/src/test/resources/golden/nullgroup3-15-cd90e16da918bac569e9f04faaedd280
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-36-7871722f392f801a868e0e2fb372c610 b/sql/hive/src/test/resources/golden/nullgroup3-18-113e2b587784d54d2a5b5f091389397e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-36-7871722f392f801a868e0e2fb372c610
rename to sql/hive/src/test/resources/golden/nullgroup3-18-113e2b587784d54d2a5b5f091389397e
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-18-a24e178327b39f57ca7dfa2e69296bb2 b/sql/hive/src/test/resources/golden/nullgroup3-18-a24e178327b39f57ca7dfa2e69296bb2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-38-b71bdaa2b7c4b5c51a9773c123e5306d b/sql/hive/src/test/resources/golden/nullgroup3-19-77de4b2d65eee228848625cdd422178d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-38-b71bdaa2b7c4b5c51a9773c123e5306d
rename to sql/hive/src/test/resources/golden/nullgroup3-19-77de4b2d65eee228848625cdd422178d
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-19-b8f60d35c1c09c57efd4c339799e5404 b/sql/hive/src/test/resources/golden/nullgroup3-19-b8f60d35c1c09c57efd4c339799e5404
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-39-89aa7bab4272546e06cd7e504380d96b b/sql/hive/src/test/resources/golden/nullgroup3-2-a5e12cfbc1799ce9fa9628d81b8c0b06
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-39-89aa7bab4272546e06cd7e504380d96b
rename to sql/hive/src/test/resources/golden/nullgroup3-2-a5e12cfbc1799ce9fa9628d81b8c0b06
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-2-f529994bef750d8edd02ede8a4323afb b/sql/hive/src/test/resources/golden/nullgroup3-2-f529994bef750d8edd02ede8a4323afb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-21-e09c6bf2f6112981793fbd4386d43ff6 b/sql/hive/src/test/resources/golden/nullgroup3-21-e09c6bf2f6112981793fbd4386d43ff6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup3-21-e09c6bf2f6112981793fbd4386d43ff6
+++ b/sql/hive/src/test/resources/golden/nullgroup3-21-e09c6bf2f6112981793fbd4386d43ff6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-4-cd90e16da918bac569e9f04faaedd280 b/sql/hive/src/test/resources/golden/nullgroup3-4-cd90e16da918bac569e9f04faaedd280
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/nullgroup3-4-cd90e16da918bac569e9f04faaedd280
+++ b/sql/hive/src/test/resources/golden/nullgroup3-4-cd90e16da918bac569e9f04faaedd280
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-4-e906be6d27c9dfcffd4af171541639ad b/sql/hive/src/test/resources/golden/nullgroup3-6-113e2b587784d54d2a5b5f091389397e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-4-e906be6d27c9dfcffd4af171541639ad
rename to sql/hive/src/test/resources/golden/nullgroup3-6-113e2b587784d54d2a5b5f091389397e
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-6-a24e178327b39f57ca7dfa2e69296bb2 b/sql/hive/src/test/resources/golden/nullgroup3-6-a24e178327b39f57ca7dfa2e69296bb2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-41-348b0126cb1d214fea58d4af9d3dbf67 b/sql/hive/src/test/resources/golden/nullgroup3-7-77de4b2d65eee228848625cdd422178d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-41-348b0126cb1d214fea58d4af9d3dbf67
rename to sql/hive/src/test/resources/golden/nullgroup3-7-77de4b2d65eee228848625cdd422178d
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-7-b8f60d35c1c09c57efd4c339799e5404 b/sql/hive/src/test/resources/golden/nullgroup3-7-b8f60d35c1c09c57efd4c339799e5404
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-9-e09c6bf2f6112981793fbd4386d43ff6 b/sql/hive/src/test/resources/golden/nullgroup3-9-e09c6bf2f6112981793fbd4386d43ff6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup3-9-e09c6bf2f6112981793fbd4386d43ff6
+++ b/sql/hive/src/test/resources/golden/nullgroup3-9-e09c6bf2f6112981793fbd4386d43ff6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-42-83889e7dc73d796cc869160b6b35102c b/sql/hive/src/test/resources/golden/nullgroup5-1-642e12a05bf01a6716bfa15ed0012629
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-42-83889e7dc73d796cc869160b6b35102c
rename to sql/hive/src/test/resources/golden/nullgroup5-1-642e12a05bf01a6716bfa15ed0012629
diff --git a/sql/hive/src/test/resources/golden/nullgroup5-1-c75bafea030f127bce9a4a5dbb14f40b b/sql/hive/src/test/resources/golden/nullgroup5-1-c75bafea030f127bce9a4a5dbb14f40b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-44-41462b2e60bf44571a7b1fb435374d6a b/sql/hive/src/test/resources/golden/nullgroup5-3-35517978e09aa1bd1d15f51d11e08fd5
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-44-41462b2e60bf44571a7b1fb435374d6a
rename to sql/hive/src/test/resources/golden/nullgroup5-3-35517978e09aa1bd1d15f51d11e08fd5
diff --git a/sql/hive/src/test/resources/golden/nullgroup5-3-4492a9ce0d8502584b872860d53c449c b/sql/hive/src/test/resources/golden/nullgroup5-3-4492a9ce0d8502584b872860d53c449c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nullgroup5-5-60d7180632a63c79eeba47e30b854f4c b/sql/hive/src/test/resources/golden/nullgroup5-5-60d7180632a63c79eeba47e30b854f4c
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/nullgroup5-5-60d7180632a63c79eeba47e30b854f4c
+++ b/sql/hive/src/test/resources/golden/nullgroup5-5-60d7180632a63c79eeba47e30b854f4c
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/nullinput2-2-21058230c4992a682c4adef9881fa9a2 b/sql/hive/src/test/resources/golden/nullinput2-2-21058230c4992a682c4adef9881fa9a2
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullinput2-2-21058230c4992a682c4adef9881fa9a2
+++ b/sql/hive/src/test/resources/golden/nullinput2-2-21058230c4992a682c4adef9881fa9a2
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullscript-1-3729d183a27e89a87ca9b9c9946e13a5 b/sql/hive/src/test/resources/golden/nullscript-1-3729d183a27e89a87ca9b9c9946e13a5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-45-8aea6edf0481e2a10f14879acba62648 b/sql/hive/src/test/resources/golden/nullscript-1-f07dfd6ef687e038083deca5941d8174
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-45-8aea6edf0481e2a10f14879acba62648
rename to sql/hive/src/test/resources/golden/nullscript-1-f07dfd6ef687e038083deca5941d8174
diff --git a/sql/hive/src/test/resources/golden/nullscript-2-17238164053203d56d30704e2c098e80 b/sql/hive/src/test/resources/golden/nullscript-2-17238164053203d56d30704e2c098e80
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-46-4999db9eb86d9455c1d75e97b052f279 b/sql/hive/src/test/resources/golden/nullscript-2-bb1abcf2f4a2a5cd5c058104901627bb
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-46-4999db9eb86d9455c1d75e97b052f279
rename to sql/hive/src/test/resources/golden/nullscript-2-bb1abcf2f4a2a5cd5c058104901627bb
diff --git a/sql/hive/src/test/resources/golden/nullscript-4-472199a0c6b8f760a90863deb69e9710 b/sql/hive/src/test/resources/golden/nullscript-4-472199a0c6b8f760a90863deb69e9710
index d23e05acf7ba5..e34118512c1d7 100644
--- a/sql/hive/src/test/resources/golden/nullscript-4-472199a0c6b8f760a90863deb69e9710
+++ b/sql/hive/src/test/resources/golden/nullscript-4-472199a0c6b8f760a90863deb69e9710
@@ -497,4 +497,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/num_op_type_conv-0-c3f17f31d887479896bf454a2f6b15dc b/sql/hive/src/test/resources/golden/num_op_type_conv-0-c3f17f31d887479896bf454a2f6b15dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/num_op_type_conv-1-aad06d01bc3c868b70dbebf88c7e64b8 b/sql/hive/src/test/resources/golden/num_op_type_conv-1-aad06d01bc3c868b70dbebf88c7e64b8
deleted file mode 100644
index da0ddb96957e1..0000000000000
--- a/sql/hive/src/test/resources/golden/num_op_type_conv-1-aad06d01bc3c868b70dbebf88c7e64b8
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	1	0	0.0
diff --git a/sql/hive/src/test/resources/golden/orc_create-0-9e7ba3cead1b5040ee3c23e8fc235d25 b/sql/hive/src/test/resources/golden/orc_create-0-9e7ba3cead1b5040ee3c23e8fc235d25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-1-b5209e6f850fc958dc9ebced21519684 b/sql/hive/src/test/resources/golden/orc_create-1-b5209e6f850fc958dc9ebced21519684
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-10-3027edcda6a6030c6bff93681529c34d b/sql/hive/src/test/resources/golden/orc_create-10-3027edcda6a6030c6bff93681529c34d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-11-14ea7dcc6898979aaa61650e3fc46187 b/sql/hive/src/test/resources/golden/orc_create-11-14ea7dcc6898979aaa61650e3fc46187
deleted file mode 100644
index 7541739b48608..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-11-14ea7dcc6898979aaa61650e3fc46187
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1391226987          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-12-dd590639ea8d5b27ccf3312be2562fc2 b/sql/hive/src/test/resources/golden/orc_create-12-dd590639ea8d5b27ccf3312be2562fc2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-13-14ea7dcc6898979aaa61650e3fc46187 b/sql/hive/src/test/resources/golden/orc_create-13-14ea7dcc6898979aaa61650e3fc46187
deleted file mode 100644
index 78c0010bebbc6..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-13-14ea7dcc6898979aaa61650e3fc46187
+++ /dev/null
@@ -1,34 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	last_modified_by    	marmbrus            
-	last_modified_time  	1391226987          
-	transient_lastDdlTime	1391226987          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.ql.io.orc.OrcSerde	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.orc.OrcInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-14-9e7ba3cead1b5040ee3c23e8fc235d25 b/sql/hive/src/test/resources/golden/orc_create-14-9e7ba3cead1b5040ee3c23e8fc235d25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-16-3027edcda6a6030c6bff93681529c34d b/sql/hive/src/test/resources/golden/orc_create-16-3027edcda6a6030c6bff93681529c34d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-18-14ea7dcc6898979aaa61650e3fc46187 b/sql/hive/src/test/resources/golden/orc_create-18-14ea7dcc6898979aaa61650e3fc46187
deleted file mode 100644
index 6881640dd3cd4..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-18-14ea7dcc6898979aaa61650e3fc46187
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1391226987          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.ql.io.orc.OrcSerde	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.orc.OrcInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-19-a85f67347e5a19fc879ae83aa24c0ce4 b/sql/hive/src/test/resources/golden/orc_create-19-a85f67347e5a19fc879ae83aa24c0ce4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-2-f375f322df98df65e6b1bd0d4ce8e208 b/sql/hive/src/test/resources/golden/orc_create-2-f375f322df98df65e6b1bd0d4ce8e208
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-20-176d469a0edba57404416535c7d48023 b/sql/hive/src/test/resources/golden/orc_create-20-176d469a0edba57404416535c7d48023
deleted file mode 100644
index dd4fa77f326ad..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-20-176d469a0edba57404416535c7d48023
+++ /dev/null
@@ -1,29 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-str                 	string              	from deserializer   
-mp                  	map<string,string>  	from deserializer   
-lst                 	array<string>       	from deserializer   
-strct               	struct<A:string,B:string>	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create_complex	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1391226987          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.ql.io.orc.OrcSerde	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.orc.OrcInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-21-e3accaf8ebc11bac7091db28d01fd2a7 b/sql/hive/src/test/resources/golden/orc_create-21-e3accaf8ebc11bac7091db28d01fd2a7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-22-31944ad765bbf262b4ebafdb06df59a6 b/sql/hive/src/test/resources/golden/orc_create-22-31944ad765bbf262b4ebafdb06df59a6
deleted file mode 100644
index 88c8812029d82..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-22-31944ad765bbf262b4ebafdb06df59a6
+++ /dev/null
@@ -1,3 +0,0 @@
-line1	{"key11":"value11","key12":"value12","key13":"value13"}	["a","b","c"]	{"a":"one","b":"two"}
-line2	{"key21":"value21","key22":"value22","key23":"value23"}	["d","e","f"]	{"a":"three","b":"four"}
-line3	{"key31":"value31","key32":"value32","key33":"value33"}	["g","h","i"]	{"a":"five","b":"six"}
diff --git a/sql/hive/src/test/resources/golden/orc_create-23-be779533ea8967231e644209114c8350 b/sql/hive/src/test/resources/golden/orc_create-23-be779533ea8967231e644209114c8350
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-24-fe59ff341395bc347dfb9cfdee397da2 b/sql/hive/src/test/resources/golden/orc_create-24-fe59ff341395bc347dfb9cfdee397da2
deleted file mode 100644
index 67946888f1baf..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-24-fe59ff341395bc347dfb9cfdee397da2
+++ /dev/null
@@ -1,3 +0,0 @@
-line1	{"key12":"value12","key11":"value11","key13":"value13"}	["a","b","c"]	{"A":"one","B":"two"}
-line2	{"key21":"value21","key23":"value23","key22":"value22"}	["d","e","f"]	{"A":"three","B":"four"}
-line3	{"key33":"value33","key31":"value31","key32":"value32"}	["g","h","i"]	{"A":"five","B":"six"}
diff --git a/sql/hive/src/test/resources/golden/orc_create-25-c55e620d82921c40ebcdb94454ac1ead b/sql/hive/src/test/resources/golden/orc_create-25-c55e620d82921c40ebcdb94454ac1ead
deleted file mode 100644
index 83db48f84ec87..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-25-c55e620d82921c40ebcdb94454ac1ead
+++ /dev/null
@@ -1,3 +0,0 @@
-line1
-line2
-line3
diff --git a/sql/hive/src/test/resources/golden/orc_create-26-aa4e0c8a3de340c1133d9fdddda8a18c b/sql/hive/src/test/resources/golden/orc_create-26-aa4e0c8a3de340c1133d9fdddda8a18c
deleted file mode 100644
index db4876dd3c809..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-26-aa4e0c8a3de340c1133d9fdddda8a18c
+++ /dev/null
@@ -1,3 +0,0 @@
-{"key12":"value12","key11":"value11","key13":"value13"}
-{"key21":"value21","key23":"value23","key22":"value22"}
-{"key33":"value33","key31":"value31","key32":"value32"}
diff --git a/sql/hive/src/test/resources/golden/orc_create-27-2eefc0bb4283fc255409a5c41203c89 b/sql/hive/src/test/resources/golden/orc_create-27-2eefc0bb4283fc255409a5c41203c89
deleted file mode 100644
index 3b2e1cf7e0098..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-27-2eefc0bb4283fc255409a5c41203c89
+++ /dev/null
@@ -1,3 +0,0 @@
-["a","b","c"]
-["d","e","f"]
-["g","h","i"]
diff --git a/sql/hive/src/test/resources/golden/orc_create-28-cf8aa1014707dfe576820041e47436e2 b/sql/hive/src/test/resources/golden/orc_create-28-cf8aa1014707dfe576820041e47436e2
deleted file mode 100644
index 0f890dfd226d6..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-28-cf8aa1014707dfe576820041e47436e2
+++ /dev/null
@@ -1,3 +0,0 @@
-{"a":"one","b":"two"}
-{"a":"three","b":"four"}
-{"a":"five","b":"six"}
diff --git a/sql/hive/src/test/resources/golden/orc_create-29-cee6b57822d79ce80e14e58663bf7b86 b/sql/hive/src/test/resources/golden/orc_create-29-cee6b57822d79ce80e14e58663bf7b86
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-3-8480c37b4f3a6768f459361cf8470dae b/sql/hive/src/test/resources/golden/orc_create-3-8480c37b4f3a6768f459361cf8470dae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-30-b5e831bf4eb811ac4f06e9ac11a5283d b/sql/hive/src/test/resources/golden/orc_create-30-b5e831bf4eb811ac4f06e9ac11a5283d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-31-c934a1ca351b76c5c4ef87cb5655f1d3 b/sql/hive/src/test/resources/golden/orc_create-31-c934a1ca351b76c5c4ef87cb5655f1d3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-33-2151d07a07ab6b28782b7e8e4f848e36 b/sql/hive/src/test/resources/golden/orc_create-33-2151d07a07ab6b28782b7e8e4f848e36
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-36-ca15a32658195ecaffe11d11f487fb0a b/sql/hive/src/test/resources/golden/orc_create-36-ca15a32658195ecaffe11d11f487fb0a
deleted file mode 100644
index 0c6f532488607..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-36-ca15a32658195ecaffe11d11f487fb0a
+++ /dev/null
@@ -1,10 +0,0 @@
-91	Genevieve	Wilkins	908 Turpis. Street
-93	Mariko	Cline	P.O. Box 329, 5375 Ac St.
-95	Winifred	Hopper	Ap #140-8982 Velit Avenue
-97	Dana	Carter	814-601 Purus. Av.
-99	Wynter	Vincent	626-8492 Mollis Avenue
-92	Thane	Oneil	6766 Lectus St.
-94	Lael	Mclean	500-7010 Sit St.
-96	Rafael	England	P.O. Box 405, 7857 Eget Av.
-98	Juliet	Battle	Ap #535-1965 Cursus St.
-100	Wang	Mitchell	4023 Lacinia. Ave
diff --git a/sql/hive/src/test/resources/golden/orc_create-37-9e7ba3cead1b5040ee3c23e8fc235d25 b/sql/hive/src/test/resources/golden/orc_create-37-9e7ba3cead1b5040ee3c23e8fc235d25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-38-b5209e6f850fc958dc9ebced21519684 b/sql/hive/src/test/resources/golden/orc_create-38-b5209e6f850fc958dc9ebced21519684
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-39-f375f322df98df65e6b1bd0d4ce8e208 b/sql/hive/src/test/resources/golden/orc_create-39-f375f322df98df65e6b1bd0d4ce8e208
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-4-7a04a1c1fe76e48f3dc8ee07146cd1e3 b/sql/hive/src/test/resources/golden/orc_create-4-7a04a1c1fe76e48f3dc8ee07146cd1e3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-40-8480c37b4f3a6768f459361cf8470dae b/sql/hive/src/test/resources/golden/orc_create-40-8480c37b4f3a6768f459361cf8470dae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-41-7a04a1c1fe76e48f3dc8ee07146cd1e3 b/sql/hive/src/test/resources/golden/orc_create-41-7a04a1c1fe76e48f3dc8ee07146cd1e3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-5-4f8e2d1f18399679a97c49631c4ef921 b/sql/hive/src/test/resources/golden/orc_create-5-4f8e2d1f18399679a97c49631c4ef921
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-6-2ae0fd655aa777b41015e4125f680324 b/sql/hive/src/test/resources/golden/orc_create-6-2ae0fd655aa777b41015e4125f680324
deleted file mode 100644
index ecd0704ce3386..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-6-2ae0fd655aa777b41015e4125f680324
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-str                 	string              	None                
-mp                  	map<string,string>  	None                
-lst                 	array<string>       	None                
-strct               	struct<A:string,B:string>	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create_staging	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1391226986          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	colelction.delim    	,                   
-	field.delim         	|                   
-	mapkey.delim        	:                   
-	serialization.format	|                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-7-27aa4a8093e80a7437266f349ea927c0 b/sql/hive/src/test/resources/golden/orc_create-7-27aa4a8093e80a7437266f349ea927c0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-8-14ea7dcc6898979aaa61650e3fc46187 b/sql/hive/src/test/resources/golden/orc_create-8-14ea7dcc6898979aaa61650e3fc46187
deleted file mode 100644
index 6881640dd3cd4..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-8-14ea7dcc6898979aaa61650e3fc46187
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1391226987          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.ql.io.orc.OrcSerde	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.orc.OrcInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-9-9e7ba3cead1b5040ee3c23e8fc235d25 b/sql/hive/src/test/resources/golden/orc_create-9-9e7ba3cead1b5040ee3c23e8fc235d25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-0-a399c0ef0f1578f9d2456727008dee16 b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-0-a399c0ef0f1578f9d2456727008dee16
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-0-a399c0ef0f1578f9d2456727008dee16
+++ b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-0-a399c0ef0f1578f9d2456727008dee16
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-3-1a7f96f49e2992b93d97edcbdb0075d b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-3-1a7f96f49e2992b93d97edcbdb0075d
index f799fca27c353..5bba4beaf3355 100644
--- a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-3-1a7f96f49e2992b93d97edcbdb0075d
+++ b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-3-1a7f96f49e2992b93d97edcbdb0075d
@@ -7,4 +7,4 @@
 255
 278
 98
-484
\ No newline at end of file
+484
diff --git a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-6-272f5d299289829dc22cc31f70115dd9 b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-6-272f5d299289829dc22cc31f70115dd9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-47-fecb9f2203aeb2ac4b693a97badde6fa b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-6-6ced6a6f5189c7a315d92ebf3dcc68d3
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-47-fecb9f2203aeb2ac4b693a97badde6fa
rename to sql/hive/src/test/resources/golden/orc_dictionary_threshold-6-6ced6a6f5189c7a315d92ebf3dcc68d3
diff --git a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-7-acf39b28d4b76382acf5c56c21932ff9 b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-7-acf39b28d4b76382acf5c56c21932ff9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-7-acf39b28d4b76382acf5c56c21932ff9
+++ b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-7-acf39b28d4b76382acf5c56c21932ff9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-9-f7b722063a6948d22aaaab0707cddde1 b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-9-f7b722063a6948d22aaaab0707cddde1
index f953881c894c1..0b6d77d2e825b 100644
--- a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-9-f7b722063a6948d22aaaab0707cddde1
+++ b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-9-f7b722063a6948d22aaaab0707cddde1
@@ -1 +1 @@
-1082202951192
\ No newline at end of file
+1082202951192
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-0-bfdd54175515a0557f8bd427ec23c453 b/sql/hive/src/test/resources/golden/orc_diff_part_cols-0-bfdd54175515a0557f8bd427ec23c453
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-1-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/orc_diff_part_cols-1-86a409d8b868dc5f1a3bd1e04c2bc28c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_diff_part_cols-1-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-2-ab12575b9b663420d9b3702f5a32a086 b/sql/hive/src/test/resources/golden/orc_diff_part_cols-2-ab12575b9b663420d9b3702f5a32a086
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-3-50ef26d05e69c02cd6fc2423fde00cd9 b/sql/hive/src/test/resources/golden/orc_diff_part_cols-3-50ef26d05e69c02cd6fc2423fde00cd9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-4-1c1eab8fc34159875afe38eb2413434e b/sql/hive/src/test/resources/golden/orc_diff_part_cols-4-1c1eab8fc34159875afe38eb2413434e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-5-c0dce07a38f3c540c2b17db0a495a657 b/sql/hive/src/test/resources/golden/orc_diff_part_cols-5-c0dce07a38f3c540c2b17db0a495a657
deleted file mode 100644
index f172be3e72712..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_diff_part_cols-5-c0dce07a38f3c540c2b17db0a495a657
+++ /dev/null
@@ -1,10 +0,0 @@
-0	3	2
-165	NULL	1
-2	1	2
-238	NULL	1
-27	NULL	1
-311	NULL	1
-4	1	2
-5	3	2
-8	1	2
-86	NULL	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/orc_empty_strings-0-a338239d2fc4e22e23ab82fa5f62139 b/sql/hive/src/test/resources/golden/orc_empty_strings-0-a338239d2fc4e22e23ab82fa5f62139
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_empty_strings-1-a67f0c90e3f99257a7364cc96e5405cf b/sql/hive/src/test/resources/golden/orc_empty_strings-1-a67f0c90e3f99257a7364cc96e5405cf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_empty_strings-2-a34bd419a63852c1c75f195a495ff333 b/sql/hive/src/test/resources/golden/orc_empty_strings-2-a34bd419a63852c1c75f195a495ff333
deleted file mode 100644
index e3c0074c9d4f6..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_empty_strings-2-a34bd419a63852c1c75f195a495ff333
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
-
-
-
-
-
-
diff --git a/sql/hive/src/test/resources/golden/orc_empty_strings-3-3339ace17de3201296847caf29c42e99 b/sql/hive/src/test/resources/golden/orc_empty_strings-3-3339ace17de3201296847caf29c42e99
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_empty_strings-4-eee6b7f3a881c8f56510bbd53aeebe1e b/sql/hive/src/test/resources/golden/orc_empty_strings-4-eee6b7f3a881c8f56510bbd53aeebe1e
deleted file mode 100644
index 4cd1242d9fe61..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_empty_strings-4-eee6b7f3a881c8f56510bbd53aeebe1e
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-165
-255
-27
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-0-a338239d2fc4e22e23ab82fa5f62139 b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-0-a338239d2fc4e22e23ab82fa5f62139
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-1-2c16215823a5e6904059a48a3077da4e b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-1-2c16215823a5e6904059a48a3077da4e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-2-2ffa91c54b8de552e8010bf00ecd2d43 b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-2-2ffa91c54b8de552e8010bf00ecd2d43
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-3-bbe8d353c397b795e2732bd59648d291 b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-3-bbe8d353c397b795e2732bd59648d291
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-4-a0231a52d63534679bfab3f6a0b8effc b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-4-a0231a52d63534679bfab3f6a0b8effc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-5-4d467d3e186c59dee2c93c940a7b0e7f b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-5-4d467d3e186c59dee2c93c940a7b0e7f
deleted file mode 100644
index e7700b8aee5c6..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-5-4d467d3e186c59dee2c93c940a7b0e7f
+++ /dev/null
@@ -1,5 +0,0 @@
-1
-NULL
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/order-1-57d93bd7619dfc460dfa763c12142bb9 b/sql/hive/src/test/resources/golden/order-1-57d93bd7619dfc460dfa763c12142bb9
index 217a1915f8826..4dba25230ed1b 100644
--- a/sql/hive/src/test/resources/golden/order-1-57d93bd7619dfc460dfa763c12142bb9
+++ b/sql/hive/src/test/resources/golden/order-1-57d93bd7619dfc460dfa763c12142bb9
@@ -7,4 +7,4 @@
 5	val_5
 5	val_5
 8	val_8
-9	val_9
\ No newline at end of file
+9	val_9
diff --git a/sql/hive/src/test/resources/golden/order-3-8f2ca0c3a07d78ebfff23a7bc77e85e5 b/sql/hive/src/test/resources/golden/order-3-8f2ca0c3a07d78ebfff23a7bc77e85e5
index ab34c87e42364..fde2ef3e83ce5 100644
--- a/sql/hive/src/test/resources/golden/order-3-8f2ca0c3a07d78ebfff23a7bc77e85e5
+++ b/sql/hive/src/test/resources/golden/order-3-8f2ca0c3a07d78ebfff23a7bc77e85e5
@@ -7,4 +7,4 @@
 494	val_494
 493	val_493
 492	val_492
-492	val_492
\ No newline at end of file
+492	val_492
diff --git a/sql/hive/src/test/resources/golden/order2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/order2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/order2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/order2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/order2-2-4162aa366dc0836eed15cc819226907f b/sql/hive/src/test/resources/golden/order2-2-4162aa366dc0836eed15cc819226907f
index 217a1915f8826..4dba25230ed1b 100644
--- a/sql/hive/src/test/resources/golden/order2-2-4162aa366dc0836eed15cc819226907f
+++ b/sql/hive/src/test/resources/golden/order2-2-4162aa366dc0836eed15cc819226907f
@@ -7,4 +7,4 @@
 5	val_5
 5	val_5
 8	val_8
-9	val_9
\ No newline at end of file
+9	val_9
diff --git a/sql/hive/src/test/resources/golden/outer_join_ppr-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/outer_join_ppr-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/outer_join_ppr-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/outer_join_ppr-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/outer_join_ppr-2-b60ab17f7372863504804717c4276595 b/sql/hive/src/test/resources/golden/outer_join_ppr-2-b60ab17f7372863504804717c4276595
index 549da558b18da..83a26e92a694b 100644
--- a/sql/hive/src/test/resources/golden/outer_join_ppr-2-b60ab17f7372863504804717c4276595
+++ b/sql/hive/src/test/resources/golden/outer_join_ppr-2-b60ab17f7372863504804717c4276595
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 19	val_19	19	val_19
-19	val_19	19	val_19
\ No newline at end of file
+19	val_19	19	val_19
diff --git a/sql/hive/src/test/resources/golden/outer_join_ppr-4-be64a45a645ea5363109323a378d335 b/sql/hive/src/test/resources/golden/outer_join_ppr-4-be64a45a645ea5363109323a378d335
index 549da558b18da..83a26e92a694b 100644
--- a/sql/hive/src/test/resources/golden/outer_join_ppr-4-be64a45a645ea5363109323a378d335
+++ b/sql/hive/src/test/resources/golden/outer_join_ppr-4-be64a45a645ea5363109323a378d335
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 19	val_19	19	val_19
-19	val_19	19	val_19
\ No newline at end of file
+19	val_19	19	val_19
diff --git a/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9 b/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9
+++ b/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/parallel-1-851e262128626126ae1ad87869db7c54 b/sql/hive/src/test/resources/golden/parallel-1-851e262128626126ae1ad87869db7c54
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/parallel-1-851e262128626126ae1ad87869db7c54
+++ b/sql/hive/src/test/resources/golden/parallel-1-851e262128626126ae1ad87869db7c54
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/parallel-11-6230286bc168af7b010968b543690a2a b/sql/hive/src/test/resources/golden/parallel-11-6230286bc168af7b010968b543690a2a
index c912af4a5c676..5710fb29e7e02 100644
--- a/sql/hive/src/test/resources/golden/parallel-11-6230286bc168af7b010968b543690a2a
+++ b/sql/hive/src/test/resources/golden/parallel-11-6230286bc168af7b010968b543690a2a
@@ -306,4 +306,4 @@
 495	val_495
 496	val_496
 497	val_497
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parallel-12-73a915d42e62c0e895a82602a502ee43 b/sql/hive/src/test/resources/golden/parallel-12-73a915d42e62c0e895a82602a502ee43
index c912af4a5c676..5710fb29e7e02 100644
--- a/sql/hive/src/test/resources/golden/parallel-12-73a915d42e62c0e895a82602a502ee43
+++ b/sql/hive/src/test/resources/golden/parallel-12-73a915d42e62c0e895a82602a502ee43
@@ -306,4 +306,4 @@
 495	val_495
 496	val_496
 497	val_497
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parallel-2-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/parallel-2-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/parallel-2-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/parallel-2-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/parallel-7-6230286bc168af7b010968b543690a2a b/sql/hive/src/test/resources/golden/parallel-7-6230286bc168af7b010968b543690a2a
index c912af4a5c676..5710fb29e7e02 100644
--- a/sql/hive/src/test/resources/golden/parallel-7-6230286bc168af7b010968b543690a2a
+++ b/sql/hive/src/test/resources/golden/parallel-7-6230286bc168af7b010968b543690a2a
@@ -306,4 +306,4 @@
 495	val_495
 496	val_496
 497	val_497
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parallel-8-73a915d42e62c0e895a82602a502ee43 b/sql/hive/src/test/resources/golden/parallel-8-73a915d42e62c0e895a82602a502ee43
index c912af4a5c676..5710fb29e7e02 100644
--- a/sql/hive/src/test/resources/golden/parallel-8-73a915d42e62c0e895a82602a502ee43
+++ b/sql/hive/src/test/resources/golden/parallel-8-73a915d42e62c0e895a82602a502ee43
@@ -306,4 +306,4 @@
 495	val_495
 496	val_496
 497	val_497
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parallel-9-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/parallel-9-86a409d8b868dc5f1a3bd1e04c2bc28c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/parallel-9-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ b/sql/hive/src/test/resources/golden/parallel-9-86a409d8b868dc5f1a3bd1e04c2bc28c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-0-3ca1c197c5209d2fc9423fe84ad66e7d b/sql/hive/src/test/resources/golden/parallel_orderby-0-3ca1c197c5209d2fc9423fe84ad66e7d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-1-be7423a5e77b7289153f74bc3fd97f44 b/sql/hive/src/test/resources/golden/parallel_orderby-1-be7423a5e77b7289153f74bc3fd97f44
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-11-39767502cbda75590c0c4b8fd089b793 b/sql/hive/src/test/resources/golden/parallel_orderby-11-39767502cbda75590c0c4b8fd089b793
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-12-7943485bcc72b2040c45c62e45ac8853 b/sql/hive/src/test/resources/golden/parallel_orderby-12-7943485bcc72b2040c45c62e45ac8853
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-13-988591bf9ab008fdc4d71560aa57a736 b/sql/hive/src/test/resources/golden/parallel_orderby-13-988591bf9ab008fdc4d71560aa57a736
deleted file mode 100644
index b431d3fc6dcf6..0000000000000
--- a/sql/hive/src/test/resources/golden/parallel_orderby-13-988591bf9ab008fdc4d71560aa57a736
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-value               	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Apr 29 20:55:07 PDT 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/total_ordered	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	0                   
-	numRows             	48                  
-	rawDataSize         	512                 
-	totalSize           	560                 
-	transient_lastDdlTime	1398830107          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-14-d93951df2ffc18dc09ab0ba2d46f1823 b/sql/hive/src/test/resources/golden/parallel_orderby-14-d93951df2ffc18dc09ab0ba2d46f1823
deleted file mode 100644
index 3b733e2d6c451..0000000000000
--- a/sql/hive/src/test/resources/golden/parallel_orderby-14-d93951df2ffc18dc09ab0ba2d46f1823
+++ /dev/null
@@ -1,48 +0,0 @@
-128	val_128
-128	val_128
-150	val_150
-150	val_150
-165	val_165
-165	val_165
-193	val_193
-193	val_193
-213	val_213
-213	val_213
-213	val_213
-213	val_213
-213	val_214
-213	val_214
-224	val_224
-224	val_224
-238	val_238
-238	val_238
-238	val_239
-238	val_239
-238	val_240
-238	val_240
-255	val_255
-255	val_255
-265	val_265
-265	val_265
-27	val_27
-27	val_27
-273	val_273
-273	val_273
-278	val_278
-278	val_278
-311	val_311
-311	val_311
-369	val_369
-369	val_369
-401	val_401
-401	val_401
-409	val_409
-409	val_409
-484	val_484
-484	val_484
-66	val_66
-66	val_66
-86	val_86
-86	val_86
-98	val_98
-98	val_98
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-2-be7423a5e77b7289153f74bc3fd97f44 b/sql/hive/src/test/resources/golden/parallel_orderby-2-be7423a5e77b7289153f74bc3fd97f44
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-6-670ec1b1f28d92d72a924c29d622aa8f b/sql/hive/src/test/resources/golden/parallel_orderby-6-670ec1b1f28d92d72a924c29d622aa8f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-7-7943485bcc72b2040c45c62e45ac8853 b/sql/hive/src/test/resources/golden/parallel_orderby-7-7943485bcc72b2040c45c62e45ac8853
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-8-988591bf9ab008fdc4d71560aa57a736 b/sql/hive/src/test/resources/golden/parallel_orderby-8-988591bf9ab008fdc4d71560aa57a736
deleted file mode 100644
index 7f67251e61787..0000000000000
--- a/sql/hive/src/test/resources/golden/parallel_orderby-8-988591bf9ab008fdc4d71560aa57a736
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-value               	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Apr 29 20:54:55 PDT 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/total_ordered	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	0                   
-	numRows             	48                  
-	rawDataSize         	512                 
-	totalSize           	560                 
-	transient_lastDdlTime	1398830095          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-9-d93951df2ffc18dc09ab0ba2d46f1823 b/sql/hive/src/test/resources/golden/parallel_orderby-9-d93951df2ffc18dc09ab0ba2d46f1823
deleted file mode 100644
index 3b733e2d6c451..0000000000000
--- a/sql/hive/src/test/resources/golden/parallel_orderby-9-d93951df2ffc18dc09ab0ba2d46f1823
+++ /dev/null
@@ -1,48 +0,0 @@
-128	val_128
-128	val_128
-150	val_150
-150	val_150
-165	val_165
-165	val_165
-193	val_193
-193	val_193
-213	val_213
-213	val_213
-213	val_213
-213	val_213
-213	val_214
-213	val_214
-224	val_224
-224	val_224
-238	val_238
-238	val_238
-238	val_239
-238	val_239
-238	val_240
-238	val_240
-255	val_255
-255	val_255
-265	val_265
-265	val_265
-27	val_27
-27	val_27
-273	val_273
-273	val_273
-278	val_278
-278	val_278
-311	val_311
-311	val_311
-369	val_369
-369	val_369
-401	val_401
-401	val_401
-409	val_409
-409	val_409
-484	val_484
-484	val_484
-66	val_66
-66	val_66
-86	val_86
-86	val_86
-98	val_98
-98	val_98
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-0-57a4ea931689f9475b687292f34abfa4 b/sql/hive/src/test/resources/golden/parenthesis_star_by-0-57a4ea931689f9475b687292f34abfa4
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-0-57a4ea931689f9475b687292f34abfa4
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-0-57a4ea931689f9475b687292f34abfa4
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-1-41d474f5e6d7c61c36f74b4bec4e9e44 b/sql/hive/src/test/resources/golden/parenthesis_star_by-1-41d474f5e6d7c61c36f74b4bec4e9e44
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-1-41d474f5e6d7c61c36f74b4bec4e9e44
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-1-41d474f5e6d7c61c36f74b4bec4e9e44
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-2-7a45831bf96814d9a7fc3d78fb7bd8dc b/sql/hive/src/test/resources/golden/parenthesis_star_by-2-7a45831bf96814d9a7fc3d78fb7bd8dc
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-2-7a45831bf96814d9a7fc3d78fb7bd8dc
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-2-7a45831bf96814d9a7fc3d78fb7bd8dc
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-3-498e2973594ccf45448ba19552bfb1cd b/sql/hive/src/test/resources/golden/parenthesis_star_by-3-498e2973594ccf45448ba19552bfb1cd
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-3-498e2973594ccf45448ba19552bfb1cd
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-3-498e2973594ccf45448ba19552bfb1cd
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-4-dc9d9d8bbb6259c89a97065902a345ec b/sql/hive/src/test/resources/golden/parenthesis_star_by-4-dc9d9d8bbb6259c89a97065902a345ec
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-4-dc9d9d8bbb6259c89a97065902a345ec
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-4-dc9d9d8bbb6259c89a97065902a345ec
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-5-6888c7f7894910538d82eefa23443189 b/sql/hive/src/test/resources/golden/parenthesis_star_by-5-6888c7f7894910538d82eefa23443189
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-5-6888c7f7894910538d82eefa23443189
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-5-6888c7f7894910538d82eefa23443189
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-6-6b7a4fa7be24cf775fae1b8c540c3b02 b/sql/hive/src/test/resources/golden/parenthesis_star_by-6-6b7a4fa7be24cf775fae1b8c540c3b02
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-6-6b7a4fa7be24cf775fae1b8c540c3b02
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-6-6b7a4fa7be24cf775fae1b8c540c3b02
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-7-f36c71c612ab8da4f4191980c1b61fea b/sql/hive/src/test/resources/golden/parenthesis_star_by-7-f36c71c612ab8da4f4191980c1b61fea
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-7-f36c71c612ab8da4f4191980c1b61fea
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-7-f36c71c612ab8da4f4191980c1b61fea
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/partInit-0-aaa3124841a8140a41d3556a4ccaa6a5 b/sql/hive/src/test/resources/golden/partInit-0-aaa3124841a8140a41d3556a4ccaa6a5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partInit-1-c0ec92801bec7ece0a156d407b601f7b b/sql/hive/src/test/resources/golden/partInit-1-c0ec92801bec7ece0a156d407b601f7b
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/partInit-1-c0ec92801bec7ece0a156d407b601f7b
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partInit-2-5db6fb2267a648ac57af8f56f91cf9a2 b/sql/hive/src/test/resources/golden/partInit-2-5db6fb2267a648ac57af8f56f91cf9a2
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/partInit-2-5db6fb2267a648ac57af8f56f91cf9a2
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partInit-3-878a82ca7519e3eafd3f2661b2ac1b88 b/sql/hive/src/test/resources/golden/partInit-3-878a82ca7519e3eafd3f2661b2ac1b88
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partInit-4-5db6fb2267a648ac57af8f56f91cf9a2 b/sql/hive/src/test/resources/golden/partInit-4-5db6fb2267a648ac57af8f56f91cf9a2
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/partInit-4-5db6fb2267a648ac57af8f56f91cf9a2
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-0-8ea0fbf5d1f4c19b56bda7f05764a0c0 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-0-8ea0fbf5d1f4c19b56bda7f05764a0c0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-0-8ea0fbf5d1f4c19b56bda7f05764a0c0
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-0-8ea0fbf5d1f4c19b56bda7f05764a0c0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-3-7e6487ca8473fa3264fdd9fa3e4a2db2 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-3-7e6487ca8473fa3264fdd9fa3e4a2db2
index ffa0de8a0c322..a98dc8e8e1b86 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-3-7e6487ca8473fa3264fdd9fa3e4a2db2
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-3-7e6487ca8473fa3264fdd9fa3e4a2db2
@@ -1,24 +1,24 @@
 # col_name            	data_type           	comment             
 	 	 
-c1                  	tinyint             	None                
+c1                  	tinyint             	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c2                  	string              	None                
+c2                  	string              	                    
 	 	 
 # Detailed Partition Information	 	 
 Partition Value:    	[v1]                	 
 Database:           	default             	 
 Table:              	mytbl               	 
-CreateTime:         	Fri Jan 03 17:18:52 PST 2014	 
+CreateTime:         	Tue Oct 21 04:00:26 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/mytbl/c2=v1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/mytbl/c2=v1	 
 Partition Parameters:	 	 
 	a                   	myval               
 	b                   	yourval             
-	transient_lastDdlTime	1388798332          
+	transient_lastDdlTime	1413889226          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -29,4 +29,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-4-c04c695a6ebed215889ce75edcb33eb4 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-4-c04c695a6ebed215889ce75edcb33eb4
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-4-c04c695a6ebed215889ce75edcb33eb4
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-4-c04c695a6ebed215889ce75edcb33eb4
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-0-c367ba7f534037ab96efc7f2273508c7 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-0-c367ba7f534037ab96efc7f2273508c7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-0-c367ba7f534037ab96efc7f2273508c7
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-0-c367ba7f534037ab96efc7f2273508c7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-3-7e6487ca8473fa3264fdd9fa3e4a2db2 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-3-7e6487ca8473fa3264fdd9fa3e4a2db2
index 0909bab11a6ec..2bef99dafeb29 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-3-7e6487ca8473fa3264fdd9fa3e4a2db2
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-3-7e6487ca8473fa3264fdd9fa3e4a2db2
@@ -1,22 +1,22 @@
 # col_name            	data_type           	comment             
 	 	 
-c1                  	tinyint             	None                
+c1                  	tinyint             	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c2                  	string              	None                
+c2                  	string              	                    
 	 	 
 # Detailed Partition Information	 	 
 Partition Value:    	[v1]                	 
 Database:           	default             	 
 Table:              	mytbl               	 
-CreateTime:         	Fri Jan 03 17:18:59 PST 2014	 
+CreateTime:         	Tue Oct 21 04:00:27 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/mytbl/c2=v1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/mytbl/c2=v1	 
 Partition Parameters:	 	 
-	transient_lastDdlTime	1388798339          
+	transient_lastDdlTime	1413889227          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -27,4 +27,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-0-9131f0b22a7303a9f3bd9ec0d1c85b06 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-0-9131f0b22a7303a9f3bd9ec0d1c85b06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-0-9131f0b22a7303a9f3bd9ec0d1c85b06
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-0-9131f0b22a7303a9f3bd9ec0d1c85b06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-3-7e6487ca8473fa3264fdd9fa3e4a2db2 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-3-7e6487ca8473fa3264fdd9fa3e4a2db2
index 55fb6df62f0a7..7b6b90f7f4200 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-3-7e6487ca8473fa3264fdd9fa3e4a2db2
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-3-7e6487ca8473fa3264fdd9fa3e4a2db2
@@ -1,25 +1,25 @@
 # col_name            	data_type           	comment             
 	 	 
-c1                  	tinyint             	None                
+c1                  	tinyint             	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c2                  	string              	None                
+c2                  	string              	                    
 	 	 
 # Detailed Partition Information	 	 
 Partition Value:    	[v1]                	 
 Database:           	default             	 
 Table:              	mytbl               	 
-CreateTime:         	Fri Jan 03 17:19:02 PST 2014	 
+CreateTime:         	Tue Oct 21 04:00:28 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/mytbl/c2=v1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/mytbl/c2=v1	 
 Partition Parameters:	 	 
 	a                   	myval               
 	b                   	yourval             
 	c                   	noval               
-	transient_lastDdlTime	1388798342          
+	transient_lastDdlTime	1413889228          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -30,4 +30,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-4-c04c695a6ebed215889ce75edcb33eb4 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-4-c04c695a6ebed215889ce75edcb33eb4
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-4-c04c695a6ebed215889ce75edcb33eb4
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-4-c04c695a6ebed215889ce75edcb33eb4
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/partcols1-1-5ea0e79c5da4b87d90ab30a56f3598d4 b/sql/hive/src/test/resources/golden/partcols1-1-5ea0e79c5da4b87d90ab30a56f3598d4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-48-f0f18d5fa2824735799edc4bdeb1afb2 b/sql/hive/src/test/resources/golden/partcols1-1-b562ff3e36de23a686d122967a1f91c8
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-48-f0f18d5fa2824735799edc4bdeb1afb2
rename to sql/hive/src/test/resources/golden/partcols1-1-b562ff3e36de23a686d122967a1f91c8
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-5-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-0-1436cccda63b78dd6e43a399da6cc474
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-5-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-0-1436cccda63b78dd6e43a399da6cc474
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-50-461847b174096e7a255fb07cb35ab434 b/sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-1-8d9bf54373f45bc35f8cb6e82771b154
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-50-461847b174096e7a255fb07cb35ab434
rename to sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-1-8d9bf54373f45bc35f8cb6e82771b154
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-51-8da967e7c2210ad044ba8b08d1685065 b/sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-2-7816c17905012cf381abf93d230faa8d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-51-8da967e7c2210ad044ba8b08d1685065
rename to sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-2-7816c17905012cf381abf93d230faa8d
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-53-fdc295aaefba56548a22dfcddc2a94f2 b/sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-3-90089a6db3c3d8ee5ff5ea6b9153b3cc
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-53-fdc295aaefba56548a22dfcddc2a94f2
rename to sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-3-90089a6db3c3d8ee5ff5ea6b9153b3cc
diff --git a/sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-4-8caed2a6e80250a6d38a59388679c298 b/sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-4-8caed2a6e80250a6d38a59388679c298
new file mode 100644
index 0000000000000..f369f21e1833f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-4-8caed2a6e80250a6d38a59388679c298
@@ -0,0 +1,2 @@
+100	100	2010-01-01
+200	200	2010-01-02
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-54-74bfe3fbf9d68a75013fba1c3c7bbd7c b/sql/hive/src/test/resources/golden/partition_date-0-7ec1f3a845e2c49191460e15af30aa30
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-54-74bfe3fbf9d68a75013fba1c3c7bbd7c
rename to sql/hive/src/test/resources/golden/partition_date-0-7ec1f3a845e2c49191460e15af30aa30
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-55-1013d1ad014aa203b1dce26085b09c01 b/sql/hive/src/test/resources/golden/partition_date-1-6b0952309c3ebdd6dcb7066891d1bd74
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-55-1013d1ad014aa203b1dce26085b09c01
rename to sql/hive/src/test/resources/golden/partition_date-1-6b0952309c3ebdd6dcb7066891d1bd74
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-57-825135844e8ac6d8d5254cc961ec8fd0 b/sql/hive/src/test/resources/golden/partition_date-1-916193405ce5e020dcd32c58325db6fe
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-57-825135844e8ac6d8d5254cc961ec8fd0
rename to sql/hive/src/test/resources/golden/partition_date-1-916193405ce5e020dcd32c58325db6fe
diff --git a/sql/hive/src/test/resources/golden/orc_create-35-a7bdd6d4d98ead017c08349ee53e9ac2 b/sql/hive/src/test/resources/golden/partition_date-10-a8dde9c0b5746dd770c9c262d23ffb10
similarity index 100%
rename from sql/hive/src/test/resources/golden/orc_create-35-a7bdd6d4d98ead017c08349ee53e9ac2
rename to sql/hive/src/test/resources/golden/partition_date-10-a8dde9c0b5746dd770c9c262d23ffb10
diff --git a/sql/hive/src/test/resources/golden/partition_date-10-aad6078a09b7bd8f5141437e86bb229f b/sql/hive/src/test/resources/golden/partition_date-10-aad6078a09b7bd8f5141437e86bb229f
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-10-aad6078a09b7bd8f5141437e86bb229f
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/type_cast_1-1-53a667981ad567b2ab977f67d65c5825 b/sql/hive/src/test/resources/golden/partition_date-11-a01c3791f59e819c750e213b6c65d084
similarity index 100%
rename from sql/hive/src/test/resources/golden/type_cast_1-1-53a667981ad567b2ab977f67d65c5825
rename to sql/hive/src/test/resources/golden/partition_date-11-a01c3791f59e819c750e213b6c65d084
diff --git a/sql/hive/src/test/resources/golden/partition_date-11-fdface2fb6eef67f15bb7d0de2294957 b/sql/hive/src/test/resources/golden/partition_date-11-fdface2fb6eef67f15bb7d0de2294957
new file mode 100644
index 0000000000000..b4de394767536
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-11-fdface2fb6eef67f15bb7d0de2294957
@@ -0,0 +1 @@
+11
diff --git a/sql/hive/src/test/resources/golden/partition_date-12-2662a237c86cf9e4a150a4f1856b8239 b/sql/hive/src/test/resources/golden/partition_date-12-2662a237c86cf9e4a150a4f1856b8239
new file mode 100644
index 0000000000000..b4de394767536
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-12-2662a237c86cf9e4a150a4f1856b8239
@@ -0,0 +1 @@
+11
diff --git a/sql/hive/src/test/resources/golden/partition_date-12-9b945f8ece6e09ad28c866ff3a10cc24 b/sql/hive/src/test/resources/golden/partition_date-12-9b945f8ece6e09ad28c866ff3a10cc24
new file mode 100644
index 0000000000000..64bb6b746dcea
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-12-9b945f8ece6e09ad28c866ff3a10cc24
@@ -0,0 +1 @@
+30
diff --git a/sql/hive/src/test/resources/golden/partition_date-13-aa513c8ee1cbfd1c94f5772c110d4dc9 b/sql/hive/src/test/resources/golden/partition_date-13-aa513c8ee1cbfd1c94f5772c110d4dc9
new file mode 100644
index 0000000000000..64bb6b746dcea
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-13-aa513c8ee1cbfd1c94f5772c110d4dc9
@@ -0,0 +1 @@
+30
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/partition_date-13-b7cb91c7c459798078a79071d329dbf
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-0-dbcec232623048c7748b708123e18bf0
rename to sql/hive/src/test/resources/golden/partition_date-13-b7cb91c7c459798078a79071d329dbf
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/partition_date-14-3f187dad9a2fdfc6f7a3566309016f9c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063
rename to sql/hive/src/test/resources/golden/partition_date-14-3f187dad9a2fdfc6f7a3566309016f9c
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/partition_date-14-e4366325f3a0c4a8e92be59f4de73fce
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-2-83c59d378571a6e487aa20217bd87817
rename to sql/hive/src/test/resources/golden/partition_date-14-e4366325f3a0c4a8e92be59f4de73fce
diff --git a/sql/hive/src/test/resources/golden/partition_date-15-a062a6e87867d8c8cfbdad97bedcbe5f b/sql/hive/src/test/resources/golden/partition_date-15-a062a6e87867d8c8cfbdad97bedcbe5f
new file mode 100644
index 0000000000000..209e3ef4b6247
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-15-a062a6e87867d8c8cfbdad97bedcbe5f
@@ -0,0 +1 @@
+20
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-0-b31bf66c43bb9f7ddc09b138b7bf36e0 b/sql/hive/src/test/resources/golden/partition_date-15-e4366325f3a0c4a8e92be59f4de73fce
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-0-b31bf66c43bb9f7ddc09b138b7bf36e0
rename to sql/hive/src/test/resources/golden/partition_date-15-e4366325f3a0c4a8e92be59f4de73fce
diff --git a/sql/hive/src/test/resources/golden/partition_date-16-22a5627d9ac112665eae01d07a91c89c b/sql/hive/src/test/resources/golden/partition_date-16-22a5627d9ac112665eae01d07a91c89c
new file mode 100644
index 0000000000000..f599e28b8ab0d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-16-22a5627d9ac112665eae01d07a91c89c
@@ -0,0 +1 @@
+10
diff --git a/sql/hive/src/test/resources/golden/partition_date-16-32cf81c1a36451eccb07b20dffd930ac b/sql/hive/src/test/resources/golden/partition_date-16-32cf81c1a36451eccb07b20dffd930ac
new file mode 100644
index 0000000000000..209e3ef4b6247
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-16-32cf81c1a36451eccb07b20dffd930ac
@@ -0,0 +1 @@
+20
diff --git a/sql/hive/src/test/resources/golden/partition_date-17-8654528691598a5eef8e3c1059d24117 b/sql/hive/src/test/resources/golden/partition_date-17-8654528691598a5eef8e3c1059d24117
new file mode 100644
index 0000000000000..f599e28b8ab0d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-17-8654528691598a5eef8e3c1059d24117
@@ -0,0 +1 @@
+10
diff --git a/sql/hive/src/test/resources/golden/partition_date-17-b9ce94ef93cb16d629af7d7f8ee637e b/sql/hive/src/test/resources/golden/partition_date-17-b9ce94ef93cb16d629af7d7f8ee637e
new file mode 100644
index 0000000000000..209e3ef4b6247
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-17-b9ce94ef93cb16d629af7d7f8ee637e
@@ -0,0 +1 @@
+20
diff --git a/sql/hive/src/test/resources/golden/partition_date-18-72c6e9a4e0b434cef67144825346c687 b/sql/hive/src/test/resources/golden/partition_date-18-72c6e9a4e0b434cef67144825346c687
new file mode 100644
index 0000000000000..f599e28b8ab0d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-18-72c6e9a4e0b434cef67144825346c687
@@ -0,0 +1 @@
+10
diff --git a/sql/hive/src/test/resources/golden/partition_date-18-a1e769630ac3caed7325a3a256713b24 b/sql/hive/src/test/resources/golden/partition_date-18-a1e769630ac3caed7325a3a256713b24
new file mode 100644
index 0000000000000..209e3ef4b6247
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-18-a1e769630ac3caed7325a3a256713b24
@@ -0,0 +1 @@
+20
diff --git a/sql/hive/src/test/resources/golden/partition_date-19-44e5165eb210559e420105073bc96125 b/sql/hive/src/test/resources/golden/partition_date-19-44e5165eb210559e420105073bc96125
new file mode 100644
index 0000000000000..209e3ef4b6247
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-19-44e5165eb210559e420105073bc96125
@@ -0,0 +1 @@
+20
diff --git a/sql/hive/src/test/resources/golden/partition_date-19-95f1399e12124840caee7492c0f3036d b/sql/hive/src/test/resources/golden/partition_date-19-95f1399e12124840caee7492c0f3036d
new file mode 100644
index 0000000000000..f599e28b8ab0d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-19-95f1399e12124840caee7492c0f3036d
@@ -0,0 +1 @@
+10
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-58-e671e63f6b70094048563a9c33748c97 b/sql/hive/src/test/resources/golden/partition_date-2-220048240a7050a98411ddbc6b1f82cf
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-58-e671e63f6b70094048563a9c33748c97
rename to sql/hive/src/test/resources/golden/partition_date-2-220048240a7050a98411ddbc6b1f82cf
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-6-b6c452a800ff333aacb863bb3243c15b b/sql/hive/src/test/resources/golden/partition_date-2-e2e70ac0f4e0ea987b49b86f73d819c9
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-6-b6c452a800ff333aacb863bb3243c15b
rename to sql/hive/src/test/resources/golden/partition_date-2-e2e70ac0f4e0ea987b49b86f73d819c9
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-60-51824b04f2a008f63e1469695e60d9c8 b/sql/hive/src/test/resources/golden/partition_date-20-7ec1f3a845e2c49191460e15af30aa30
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-60-51824b04f2a008f63e1469695e60d9c8
rename to sql/hive/src/test/resources/golden/partition_date-20-7ec1f3a845e2c49191460e15af30aa30
diff --git a/sql/hive/src/test/resources/golden/partition_date-20-e734eb4fc8894c9a2b2b9cdac4270fba b/sql/hive/src/test/resources/golden/partition_date-20-e734eb4fc8894c9a2b2b9cdac4270fba
new file mode 100644
index 0000000000000..209e3ef4b6247
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-20-e734eb4fc8894c9a2b2b9cdac4270fba
@@ -0,0 +1 @@
+20
diff --git a/sql/hive/src/test/resources/golden/partition_date-21-b60c0a3677591991152a0aa500bdce68 b/sql/hive/src/test/resources/golden/partition_date-21-b60c0a3677591991152a0aa500bdce68
new file mode 100644
index 0000000000000..f599e28b8ab0d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-21-b60c0a3677591991152a0aa500bdce68
@@ -0,0 +1 @@
+10
diff --git a/sql/hive/src/test/resources/golden/partition_date-22-fe277bd0a30e016826d242fd5bd61714 b/sql/hive/src/test/resources/golden/partition_date-22-fe277bd0a30e016826d242fd5bd61714
new file mode 100644
index 0000000000000..7ed6ff82de6bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-22-fe277bd0a30e016826d242fd5bd61714
@@ -0,0 +1 @@
+5
diff --git a/sql/hive/src/test/resources/golden/partition_date-23-7f014b494185e4c2a6048efb89131344 b/sql/hive/src/test/resources/golden/partition_date-23-7f014b494185e4c2a6048efb89131344
new file mode 100644
index 0000000000000..7ed6ff82de6bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-23-7f014b494185e4c2a6048efb89131344
@@ -0,0 +1 @@
+5
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-61-18b0757f6d9e29808061ca3763b8b6d9 b/sql/hive/src/test/resources/golden/partition_date-24-7ec1f3a845e2c49191460e15af30aa30
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-61-18b0757f6d9e29808061ca3763b8b6d9
rename to sql/hive/src/test/resources/golden/partition_date-24-7ec1f3a845e2c49191460e15af30aa30
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-66-b4c5b3eeb74085711866a2eec27bcb37 b/sql/hive/src/test/resources/golden/partition_date-3-c938b08f57d588926a5d5fbfa4531012
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-66-b4c5b3eeb74085711866a2eec27bcb37
rename to sql/hive/src/test/resources/golden/partition_date-3-c938b08f57d588926a5d5fbfa4531012
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-67-b4fec0996399be2239961594897d6715 b/sql/hive/src/test/resources/golden/partition_date-3-f8887dd18c21bf0306e293d463b3e1d7
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-67-b4fec0996399be2239961594897d6715
rename to sql/hive/src/test/resources/golden/partition_date-3-f8887dd18c21bf0306e293d463b3e1d7
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-7-c0ea81b686236d661166912040a16ea7 b/sql/hive/src/test/resources/golden/partition_date-4-a93eff99ce43bb939ec1d6464c0ef0b3
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-7-c0ea81b686236d661166912040a16ea7
rename to sql/hive/src/test/resources/golden/partition_date-4-a93eff99ce43bb939ec1d6464c0ef0b3
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-70-2d5403943a3efdf9fd3eccb6927499cc b/sql/hive/src/test/resources/golden/partition_date-4-f7c20c208e052305c1710a5e1e80c5c8
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-70-2d5403943a3efdf9fd3eccb6927499cc
rename to sql/hive/src/test/resources/golden/partition_date-4-f7c20c208e052305c1710a5e1e80c5c8
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-71-ca16ff548ebb9bab4b76f2e339064a9e b/sql/hive/src/test/resources/golden/partition_date-5-59355f4e222dcd4f77f51d15bd896f11
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-71-ca16ff548ebb9bab4b76f2e339064a9e
rename to sql/hive/src/test/resources/golden/partition_date-5-59355f4e222dcd4f77f51d15bd896f11
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-74-6f7caef1c773268350c9cf94ad85be01 b/sql/hive/src/test/resources/golden/partition_date-5-a855aba47876561fd4fb095e09580686
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-74-6f7caef1c773268350c9cf94ad85be01
rename to sql/hive/src/test/resources/golden/partition_date-5-a855aba47876561fd4fb095e09580686
diff --git a/sql/hive/src/test/resources/golden/partition_date-6-1405c311915f27b0cc616c83d39eaacc b/sql/hive/src/test/resources/golden/partition_date-6-1405c311915f27b0cc616c83d39eaacc
new file mode 100644
index 0000000000000..051ca3d3c28e7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-6-1405c311915f27b0cc616c83d39eaacc
@@ -0,0 +1,2 @@
+2000-01-01
+2013-08-08
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-75-4931c5a72a5231f67317d27ca025bb97 b/sql/hive/src/test/resources/golden/partition_date-6-416ab10ac818d432cd3a81870d6e5164
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-75-4931c5a72a5231f67317d27ca025bb97
rename to sql/hive/src/test/resources/golden/partition_date-6-416ab10ac818d432cd3a81870d6e5164
diff --git a/sql/hive/src/test/resources/golden/partition_date-7-1405c311915f27b0cc616c83d39eaacc b/sql/hive/src/test/resources/golden/partition_date-7-1405c311915f27b0cc616c83d39eaacc
new file mode 100644
index 0000000000000..758a01e987473
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-7-1405c311915f27b0cc616c83d39eaacc
@@ -0,0 +1,3 @@
+2000-01-01
+2013-08-08
+2013-12-10
diff --git a/sql/hive/src/test/resources/golden/partition_date-7-2ac950d8d5656549dd453e5464cb8530 b/sql/hive/src/test/resources/golden/partition_date-7-2ac950d8d5656549dd453e5464cb8530
new file mode 100644
index 0000000000000..24192eefd2caf
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-7-2ac950d8d5656549dd453e5464cb8530
@@ -0,0 +1,5 @@
+165	val_165	2000-01-01	2
+238	val_238	2000-01-01	2
+27	val_27	2000-01-01	2
+311	val_311	2000-01-01	2
+86	val_86	2000-01-01	2
diff --git a/sql/hive/src/test/resources/golden/partition_date-8-7703adfcfdd44c9250f9eba478004714 b/sql/hive/src/test/resources/golden/partition_date-8-7703adfcfdd44c9250f9eba478004714
new file mode 100644
index 0000000000000..24192eefd2caf
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-8-7703adfcfdd44c9250f9eba478004714
@@ -0,0 +1,5 @@
+165	val_165	2000-01-01	2
+238	val_238	2000-01-01	2
+27	val_27	2000-01-01	2
+311	val_311	2000-01-01	2
+86	val_86	2000-01-01	2
diff --git a/sql/hive/src/test/resources/golden/partition_date-8-a425c11c12c9ce4c9c43d4fbccee5347 b/sql/hive/src/test/resources/golden/partition_date-8-a425c11c12c9ce4c9c43d4fbccee5347
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-8-a425c11c12c9ce4c9c43d4fbccee5347
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/partition_date-9-a425c11c12c9ce4c9c43d4fbccee5347 b/sql/hive/src/test/resources/golden/partition_date-9-a425c11c12c9ce4c9c43d4fbccee5347
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-9-a425c11c12c9ce4c9c43d4fbccee5347
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/partition_date-9-aad6078a09b7bd8f5141437e86bb229f b/sql/hive/src/test/resources/golden/partition_date-9-aad6078a09b7bd8f5141437e86bb229f
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-9-aad6078a09b7bd8f5141437e86bb229f
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-0-6ae3e0805cbfa967f50ad222bc33b772 b/sql/hive/src/test/resources/golden/partition_decode_name-0-6ae3e0805cbfa967f50ad222bc33b772
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-1-4de8e998198c8df484e9102f60ba05c1 b/sql/hive/src/test/resources/golden/partition_decode_name-1-4de8e998198c8df484e9102f60ba05c1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-10-f1c90c8f4afd0c2ee66019e017997eb5 b/sql/hive/src/test/resources/golden/partition_decode_name-10-f1c90c8f4afd0c2ee66019e017997eb5
deleted file mode 100644
index 81df179618406..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_decode_name-10-f1c90c8f4afd0c2ee66019e017997eb5
+++ /dev/null
@@ -1,3 +0,0 @@
-ts=2011-01-11+14:18:26
-ts=2011-01-11+15:18:26
-ts=2011-01-11+16:18:26
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-11-94d637f6e5cee2771b9844438008a618 b/sql/hive/src/test/resources/golden/partition_decode_name-11-94d637f6e5cee2771b9844438008a618
deleted file mode 100644
index 00750edc07d64..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_decode_name-11-94d637f6e5cee2771b9844438008a618
+++ /dev/null
@@ -1 +0,0 @@
-3
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-5-a3e8e57df86f00101d2f4dd0ce8c2bd7 b/sql/hive/src/test/resources/golden/partition_decode_name-5-a3e8e57df86f00101d2f4dd0ce8c2bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-6-f1c90c8f4afd0c2ee66019e017997eb5 b/sql/hive/src/test/resources/golden/partition_decode_name-6-f1c90c8f4afd0c2ee66019e017997eb5
deleted file mode 100644
index e16053e3110ae..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_decode_name-6-f1c90c8f4afd0c2ee66019e017997eb5
+++ /dev/null
@@ -1,3 +0,0 @@
-ts=2011-01-11+14%3A18%3A26
-ts=2011-01-11+15%3A18%3A26
-ts=2011-01-11+16%3A18%3A26
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-7-94d637f6e5cee2771b9844438008a618 b/sql/hive/src/test/resources/golden/partition_decode_name-7-94d637f6e5cee2771b9844438008a618
deleted file mode 100644
index 00750edc07d64..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_decode_name-7-94d637f6e5cee2771b9844438008a618
+++ /dev/null
@@ -1 +0,0 @@
-3
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-9-a3e8e57df86f00101d2f4dd0ce8c2bd7 b/sql/hive/src/test/resources/golden/partition_decode_name-9-a3e8e57df86f00101d2f4dd0ce8c2bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_schema1-2-4fcfc1d26e1de1ce3071f1f93c012988 b/sql/hive/src/test/resources/golden/partition_schema1-2-4fcfc1d26e1de1ce3071f1f93c012988
index 8af58e2b29f4a..c97e50a8a58cd 100644
--- a/sql/hive/src/test/resources/golden/partition_schema1-2-4fcfc1d26e1de1ce3071f1f93c012988
+++ b/sql/hive/src/test/resources/golden/partition_schema1-2-4fcfc1d26e1de1ce3071f1f93c012988
@@ -1,8 +1,8 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
\ No newline at end of file
+dt                  	string              	                    
diff --git a/sql/hive/src/test/resources/golden/partition_schema1-4-9b756f83973c37236e72f6927b1c02d7 b/sql/hive/src/test/resources/golden/partition_schema1-4-9b756f83973c37236e72f6927b1c02d7
index 82a07522b0af0..39db984c884ad 100644
--- a/sql/hive/src/test/resources/golden/partition_schema1-4-9b756f83973c37236e72f6927b1c02d7
+++ b/sql/hive/src/test/resources/golden/partition_schema1-4-9b756f83973c37236e72f6927b1c02d7
@@ -1,9 +1,9 @@
-key                 	string              	None                
-value               	string              	None                
-x                   	string              	None                
-dt                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+x                   	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
\ No newline at end of file
+dt                  	string              	                    
diff --git a/sql/hive/src/test/resources/golden/partition_schema1-5-52a518a4f7132598998c4f6781fd7634 b/sql/hive/src/test/resources/golden/partition_schema1-5-52a518a4f7132598998c4f6781fd7634
index 8af58e2b29f4a..c97e50a8a58cd 100644
--- a/sql/hive/src/test/resources/golden/partition_schema1-5-52a518a4f7132598998c4f6781fd7634
+++ b/sql/hive/src/test/resources/golden/partition_schema1-5-52a518a4f7132598998c4f6781fd7634
@@ -1,8 +1,8 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
\ No newline at end of file
+dt                  	string              	                    
diff --git a/sql/hive/src/test/resources/golden/partition_serde_format-3-54d18742b4eab85edd1946ef139771b4 b/sql/hive/src/test/resources/golden/partition_serde_format-3-54d18742b4eab85edd1946ef139771b4
index 95f5492558a9b..7e53c64af58aa 100644
--- a/sql/hive/src/test/resources/golden/partition_serde_format-3-54d18742b4eab85edd1946ef139771b4
+++ b/sql/hive/src/test/resources/golden/partition_serde_format-3-54d18742b4eab85edd1946ef139771b4
@@ -17,4 +17,4 @@
 17	val_17
 18	val_18
 18	val_18
-19	val_19
\ No newline at end of file
+19	val_19
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-0-6ae3e0805cbfa967f50ad222bc33b772 b/sql/hive/src/test/resources/golden/partition_special_char-0-6ae3e0805cbfa967f50ad222bc33b772
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-1-4de8e998198c8df484e9102f60ba05c1 b/sql/hive/src/test/resources/golden/partition_special_char-1-4de8e998198c8df484e9102f60ba05c1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-4-a3e8e57df86f00101d2f4dd0ce8c2bd7 b/sql/hive/src/test/resources/golden/partition_special_char-4-a3e8e57df86f00101d2f4dd0ce8c2bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-5-f1c90c8f4afd0c2ee66019e017997eb5 b/sql/hive/src/test/resources/golden/partition_special_char-5-f1c90c8f4afd0c2ee66019e017997eb5
deleted file mode 100644
index e16053e3110ae..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_special_char-5-f1c90c8f4afd0c2ee66019e017997eb5
+++ /dev/null
@@ -1,3 +0,0 @@
-ts=2011-01-11+14%3A18%3A26
-ts=2011-01-11+15%3A18%3A26
-ts=2011-01-11+16%3A18%3A26
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-6-94d637f6e5cee2771b9844438008a618 b/sql/hive/src/test/resources/golden/partition_special_char-6-94d637f6e5cee2771b9844438008a618
deleted file mode 100644
index 00750edc07d64..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_special_char-6-94d637f6e5cee2771b9844438008a618
+++ /dev/null
@@ -1 +0,0 @@
-3
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-7-a3e8e57df86f00101d2f4dd0ce8c2bd7 b/sql/hive/src/test/resources/golden/partition_special_char-7-a3e8e57df86f00101d2f4dd0ce8c2bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-8-f1c90c8f4afd0c2ee66019e017997eb5 b/sql/hive/src/test/resources/golden/partition_special_char-8-f1c90c8f4afd0c2ee66019e017997eb5
deleted file mode 100644
index e16053e3110ae..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_special_char-8-f1c90c8f4afd0c2ee66019e017997eb5
+++ /dev/null
@@ -1,3 +0,0 @@
-ts=2011-01-11+14%3A18%3A26
-ts=2011-01-11+15%3A18%3A26
-ts=2011-01-11+16%3A18%3A26
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-9-94d637f6e5cee2771b9844438008a618 b/sql/hive/src/test/resources/golden/partition_special_char-9-94d637f6e5cee2771b9844438008a618
deleted file mode 100644
index 00750edc07d64..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_special_char-9-94d637f6e5cee2771b9844438008a618
+++ /dev/null
@@ -1 +0,0 @@
-3
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-8-f0ee61903aeacb758e2eada242e5e14 b/sql/hive/src/test/resources/golden/partition_type_check-11-60aad884be613d18d3b89fca3b90dc94
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-8-f0ee61903aeacb758e2eada242e5e14
rename to sql/hive/src/test/resources/golden/partition_type_check-11-60aad884be613d18d3b89fca3b90dc94
diff --git a/sql/hive/src/test/resources/golden/partition_type_check-11-a1164f1770d2f787b520fbc3d345911a b/sql/hive/src/test/resources/golden/partition_type_check-11-a1164f1770d2f787b520fbc3d345911a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_type_check-12-7e053ba4f9dea1e74c1d04c557c3adac b/sql/hive/src/test/resources/golden/partition_type_check-12-7e053ba4f9dea1e74c1d04c557c3adac
new file mode 100644
index 0000000000000..91ba621412d72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_type_check-12-7e053ba4f9dea1e74c1d04c557c3adac
@@ -0,0 +1,6 @@
+1	11	2008-01-01
+2	12	2008-01-01
+3	13	2008-01-01
+7	17	2008-01-01
+8	18	2008-01-01
+8	28	2008-01-01
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-9-ebc7ac3b2dfdb958d161cd7c8f947a72 b/sql/hive/src/test/resources/golden/partition_type_check-13-45fb706ff448da1fe609c7ff76a80d4d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-9-ebc7ac3b2dfdb958d161cd7c8f947a72
rename to sql/hive/src/test/resources/golden/partition_type_check-13-45fb706ff448da1fe609c7ff76a80d4d
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-10-dc4ffd56a4be5e2c52df2f70c614720d b/sql/hive/src/test/resources/golden/partition_type_check-2-3a6bb204d9524299f28adf1cc35d6f4d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-10-dc4ffd56a4be5e2c52df2f70c614720d
rename to sql/hive/src/test/resources/golden/partition_type_check-2-3a6bb204d9524299f28adf1cc35d6f4d
diff --git a/sql/hive/src/test/resources/golden/partition_type_check-2-5e857e1536264658caf0df9b7e28652f b/sql/hive/src/test/resources/golden/partition_type_check-2-5e857e1536264658caf0df9b7e28652f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_type_check-6-1f5f161f5e992a6aa7358dcbbffc5939 b/sql/hive/src/test/resources/golden/partition_type_check-6-1f5f161f5e992a6aa7358dcbbffc5939
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-12-56f9f2a13698e71c4f00c93c48dffc30 b/sql/hive/src/test/resources/golden/partition_type_check-6-663051c7106b8ee6913ca1b007157941
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-12-56f9f2a13698e71c4f00c93c48dffc30
rename to sql/hive/src/test/resources/golden/partition_type_check-6-663051c7106b8ee6913ca1b007157941
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-14-d403afd709251edba6ae13b8cc14e0b7 b/sql/hive/src/test/resources/golden/partition_varchar1-2-bca5c9edccc3a84e0a9ef92ebcbe746
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-14-d403afd709251edba6ae13b8cc14e0b7
rename to sql/hive/src/test/resources/golden/partition_varchar1-2-bca5c9edccc3a84e0a9ef92ebcbe746
diff --git a/sql/hive/src/test/resources/golden/partition_varchar1-2-fc9381cdfb786fc3b7b9b35df6003474 b/sql/hive/src/test/resources/golden/partition_varchar1-2-fc9381cdfb786fc3b7b9b35df6003474
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_varchar1-3-968759281b7d1bf0a60991ed04953b93 b/sql/hive/src/test/resources/golden/partition_varchar1-3-968759281b7d1bf0a60991ed04953b93
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-16-166ae99e823bc1bcbee39a466f9e3551 b/sql/hive/src/test/resources/golden/partition_varchar1-3-9cec4b1d156f5a9cb587470b98de15
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-16-166ae99e823bc1bcbee39a466f9e3551
rename to sql/hive/src/test/resources/golden/partition_varchar1-3-9cec4b1d156f5a9cb587470b98de15
diff --git a/sql/hive/src/test/resources/golden/partition_varchar1-4-deb9b7715610152bda285a3a33b772ef b/sql/hive/src/test/resources/golden/partition_varchar1-4-deb9b7715610152bda285a3a33b772ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-19-46ca52d697918f2327b2293d9fd57d15 b/sql/hive/src/test/resources/golden/partition_varchar1-4-e9c4530e270db6d44cc54292e4eff680
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-19-46ca52d697918f2327b2293d9fd57d15
rename to sql/hive/src/test/resources/golden/partition_varchar1-4-e9c4530e270db6d44cc54292e4eff680
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-4-475d50465b23adfb70e67122425ede9e b/sql/hive/src/test/resources/golden/partition_varchar1-5-661aefd18c44c1eb02c2aaf8fab59f73
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-4-475d50465b23adfb70e67122425ede9e
rename to sql/hive/src/test/resources/golden/partition_varchar1-5-661aefd18c44c1eb02c2aaf8fab59f73
diff --git a/sql/hive/src/test/resources/golden/partition_varchar1-5-f77df9235261b945ca0a55e1afe2e5ce b/sql/hive/src/test/resources/golden/partition_varchar1-5-f77df9235261b945ca0a55e1afe2e5ce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-0-c25482149887659ec66d7534cad22f63 b/sql/hive/src/test/resources/golden/partition_vs_table_metadata-0-c25482149887659ec66d7534cad22f63
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-1-29f193b3e8def419733366c578e8a236 b/sql/hive/src/test/resources/golden/partition_vs_table_metadata-1-29f193b3e8def419733366c578e8a236
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-2-14033db9e60aa0bc4f64376c96195adf b/sql/hive/src/test/resources/golden/partition_vs_table_metadata-2-14033db9e60aa0bc4f64376c96195adf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-3-9bdf636332ca722093413523366efa86 b/sql/hive/src/test/resources/golden/partition_vs_table_metadata-3-9bdf636332ca722093413523366efa86
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-4-e784348b7221bb26830cc1ebda69bdde b/sql/hive/src/test/resources/golden/partition_vs_table_metadata-4-e784348b7221bb26830cc1ebda69bdde
deleted file mode 100644
index 39c80f1b77eab..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-4-e784348b7221bb26830cc1ebda69bdde
+++ /dev/null
@@ -1,1000 +0,0 @@
-0	val_0	NULL
-0	val_0	NULL
-0	val_0	NULL
-0	val_0	0
-0	val_0	0
-0	val_0	0
-10	val_10	NULL
-10	val_10	10
-100	val_100	NULL
-100	val_100	NULL
-100	val_100	100
-100	val_100	100
-103	val_103	NULL
-103	val_103	NULL
-103	val_103	103
-103	val_103	103
-104	val_104	NULL
-104	val_104	NULL
-104	val_104	104
-104	val_104	104
-105	val_105	NULL
-105	val_105	105
-11	val_11	NULL
-11	val_11	11
-111	val_111	NULL
-111	val_111	111
-113	val_113	NULL
-113	val_113	NULL
-113	val_113	113
-113	val_113	113
-114	val_114	NULL
-114	val_114	114
-116	val_116	NULL
-116	val_116	116
-118	val_118	NULL
-118	val_118	NULL
-118	val_118	118
-118	val_118	118
-119	val_119	NULL
-119	val_119	NULL
-119	val_119	NULL
-119	val_119	119
-119	val_119	119
-119	val_119	119
-12	val_12	NULL
-12	val_12	NULL
-12	val_12	12
-12	val_12	12
-120	val_120	NULL
-120	val_120	NULL
-120	val_120	120
-120	val_120	120
-125	val_125	NULL
-125	val_125	NULL
-125	val_125	125
-125	val_125	125
-126	val_126	NULL
-126	val_126	126
-128	val_128	NULL
-128	val_128	NULL
-128	val_128	NULL
-128	val_128	128
-128	val_128	128
-128	val_128	128
-129	val_129	NULL
-129	val_129	NULL
-129	val_129	129
-129	val_129	129
-131	val_131	NULL
-131	val_131	131
-133	val_133	NULL
-133	val_133	133
-134	val_134	NULL
-134	val_134	NULL
-134	val_134	134
-134	val_134	134
-136	val_136	NULL
-136	val_136	136
-137	val_137	NULL
-137	val_137	NULL
-137	val_137	137
-137	val_137	137
-138	val_138	NULL
-138	val_138	NULL
-138	val_138	NULL
-138	val_138	NULL
-138	val_138	138
-138	val_138	138
-138	val_138	138
-138	val_138	138
-143	val_143	NULL
-143	val_143	143
-145	val_145	NULL
-145	val_145	145
-146	val_146	NULL
-146	val_146	NULL
-146	val_146	146
-146	val_146	146
-149	val_149	NULL
-149	val_149	NULL
-149	val_149	149
-149	val_149	149
-15	val_15	NULL
-15	val_15	NULL
-15	val_15	15
-15	val_15	15
-150	val_150	NULL
-150	val_150	150
-152	val_152	NULL
-152	val_152	NULL
-152	val_152	152
-152	val_152	152
-153	val_153	NULL
-153	val_153	153
-155	val_155	NULL
-155	val_155	155
-156	val_156	NULL
-156	val_156	156
-157	val_157	NULL
-157	val_157	157
-158	val_158	NULL
-158	val_158	158
-160	val_160	NULL
-160	val_160	160
-162	val_162	NULL
-162	val_162	162
-163	val_163	NULL
-163	val_163	163
-164	val_164	NULL
-164	val_164	NULL
-164	val_164	164
-164	val_164	164
-165	val_165	NULL
-165	val_165	NULL
-165	val_165	165
-165	val_165	165
-166	val_166	NULL
-166	val_166	166
-167	val_167	NULL
-167	val_167	NULL
-167	val_167	NULL
-167	val_167	167
-167	val_167	167
-167	val_167	167
-168	val_168	NULL
-168	val_168	168
-169	val_169	NULL
-169	val_169	NULL
-169	val_169	NULL
-169	val_169	NULL
-169	val_169	169
-169	val_169	169
-169	val_169	169
-169	val_169	169
-17	val_17	NULL
-17	val_17	17
-170	val_170	NULL
-170	val_170	170
-172	val_172	NULL
-172	val_172	NULL
-172	val_172	172
-172	val_172	172
-174	val_174	NULL
-174	val_174	NULL
-174	val_174	174
-174	val_174	174
-175	val_175	NULL
-175	val_175	NULL
-175	val_175	175
-175	val_175	175
-176	val_176	NULL
-176	val_176	NULL
-176	val_176	176
-176	val_176	176
-177	val_177	NULL
-177	val_177	177
-178	val_178	NULL
-178	val_178	178
-179	val_179	NULL
-179	val_179	NULL
-179	val_179	179
-179	val_179	179
-18	val_18	NULL
-18	val_18	NULL
-18	val_18	18
-18	val_18	18
-180	val_180	NULL
-180	val_180	180
-181	val_181	NULL
-181	val_181	181
-183	val_183	NULL
-183	val_183	183
-186	val_186	NULL
-186	val_186	186
-187	val_187	NULL
-187	val_187	NULL
-187	val_187	NULL
-187	val_187	187
-187	val_187	187
-187	val_187	187
-189	val_189	NULL
-189	val_189	189
-19	val_19	NULL
-19	val_19	19
-190	val_190	NULL
-190	val_190	190
-191	val_191	NULL
-191	val_191	NULL
-191	val_191	191
-191	val_191	191
-192	val_192	NULL
-192	val_192	192
-193	val_193	NULL
-193	val_193	NULL
-193	val_193	NULL
-193	val_193	193
-193	val_193	193
-193	val_193	193
-194	val_194	NULL
-194	val_194	194
-195	val_195	NULL
-195	val_195	NULL
-195	val_195	195
-195	val_195	195
-196	val_196	NULL
-196	val_196	196
-197	val_197	NULL
-197	val_197	NULL
-197	val_197	197
-197	val_197	197
-199	val_199	NULL
-199	val_199	NULL
-199	val_199	NULL
-199	val_199	199
-199	val_199	199
-199	val_199	199
-2	val_2	NULL
-2	val_2	2
-20	val_20	NULL
-20	val_20	20
-200	val_200	NULL
-200	val_200	NULL
-200	val_200	200
-200	val_200	200
-201	val_201	NULL
-201	val_201	201
-202	val_202	NULL
-202	val_202	202
-203	val_203	NULL
-203	val_203	NULL
-203	val_203	203
-203	val_203	203
-205	val_205	NULL
-205	val_205	NULL
-205	val_205	205
-205	val_205	205
-207	val_207	NULL
-207	val_207	NULL
-207	val_207	207
-207	val_207	207
-208	val_208	NULL
-208	val_208	NULL
-208	val_208	NULL
-208	val_208	208
-208	val_208	208
-208	val_208	208
-209	val_209	NULL
-209	val_209	NULL
-209	val_209	209
-209	val_209	209
-213	val_213	NULL
-213	val_213	NULL
-213	val_213	213
-213	val_213	213
-214	val_214	NULL
-214	val_214	214
-216	val_216	NULL
-216	val_216	NULL
-216	val_216	216
-216	val_216	216
-217	val_217	NULL
-217	val_217	NULL
-217	val_217	217
-217	val_217	217
-218	val_218	NULL
-218	val_218	218
-219	val_219	NULL
-219	val_219	NULL
-219	val_219	219
-219	val_219	219
-221	val_221	NULL
-221	val_221	NULL
-221	val_221	221
-221	val_221	221
-222	val_222	NULL
-222	val_222	222
-223	val_223	NULL
-223	val_223	NULL
-223	val_223	223
-223	val_223	223
-224	val_224	NULL
-224	val_224	NULL
-224	val_224	224
-224	val_224	224
-226	val_226	NULL
-226	val_226	226
-228	val_228	NULL
-228	val_228	228
-229	val_229	NULL
-229	val_229	NULL
-229	val_229	229
-229	val_229	229
-230	val_230	NULL
-230	val_230	NULL
-230	val_230	NULL
-230	val_230	NULL
-230	val_230	NULL
-230	val_230	230
-230	val_230	230
-230	val_230	230
-230	val_230	230
-230	val_230	230
-233	val_233	NULL
-233	val_233	NULL
-233	val_233	233
-233	val_233	233
-235	val_235	NULL
-235	val_235	235
-237	val_237	NULL
-237	val_237	NULL
-237	val_237	237
-237	val_237	237
-238	val_238	NULL
-238	val_238	NULL
-238	val_238	238
-238	val_238	238
-239	val_239	NULL
-239	val_239	NULL
-239	val_239	239
-239	val_239	239
-24	val_24	NULL
-24	val_24	NULL
-24	val_24	24
-24	val_24	24
-241	val_241	NULL
-241	val_241	241
-242	val_242	NULL
-242	val_242	NULL
-242	val_242	242
-242	val_242	242
-244	val_244	NULL
-244	val_244	244
-247	val_247	NULL
-247	val_247	247
-248	val_248	NULL
-248	val_248	248
-249	val_249	NULL
-249	val_249	249
-252	val_252	NULL
-252	val_252	252
-255	val_255	NULL
-255	val_255	NULL
-255	val_255	255
-255	val_255	255
-256	val_256	NULL
-256	val_256	NULL
-256	val_256	256
-256	val_256	256
-257	val_257	NULL
-257	val_257	257
-258	val_258	NULL
-258	val_258	258
-26	val_26	NULL
-26	val_26	NULL
-26	val_26	26
-26	val_26	26
-260	val_260	NULL
-260	val_260	260
-262	val_262	NULL
-262	val_262	262
-263	val_263	NULL
-263	val_263	263
-265	val_265	NULL
-265	val_265	NULL
-265	val_265	265
-265	val_265	265
-266	val_266	NULL
-266	val_266	266
-27	val_27	NULL
-27	val_27	27
-272	val_272	NULL
-272	val_272	NULL
-272	val_272	272
-272	val_272	272
-273	val_273	NULL
-273	val_273	NULL
-273	val_273	NULL
-273	val_273	273
-273	val_273	273
-273	val_273	273
-274	val_274	NULL
-274	val_274	274
-275	val_275	NULL
-275	val_275	275
-277	val_277	NULL
-277	val_277	NULL
-277	val_277	NULL
-277	val_277	NULL
-277	val_277	277
-277	val_277	277
-277	val_277	277
-277	val_277	277
-278	val_278	NULL
-278	val_278	NULL
-278	val_278	278
-278	val_278	278
-28	val_28	NULL
-28	val_28	28
-280	val_280	NULL
-280	val_280	NULL
-280	val_280	280
-280	val_280	280
-281	val_281	NULL
-281	val_281	NULL
-281	val_281	281
-281	val_281	281
-282	val_282	NULL
-282	val_282	NULL
-282	val_282	282
-282	val_282	282
-283	val_283	NULL
-283	val_283	283
-284	val_284	NULL
-284	val_284	284
-285	val_285	NULL
-285	val_285	285
-286	val_286	NULL
-286	val_286	286
-287	val_287	NULL
-287	val_287	287
-288	val_288	NULL
-288	val_288	NULL
-288	val_288	288
-288	val_288	288
-289	val_289	NULL
-289	val_289	289
-291	val_291	NULL
-291	val_291	291
-292	val_292	NULL
-292	val_292	292
-296	val_296	NULL
-296	val_296	296
-298	val_298	NULL
-298	val_298	NULL
-298	val_298	NULL
-298	val_298	298
-298	val_298	298
-298	val_298	298
-30	val_30	NULL
-30	val_30	30
-302	val_302	NULL
-302	val_302	302
-305	val_305	NULL
-305	val_305	305
-306	val_306	NULL
-306	val_306	306
-307	val_307	NULL
-307	val_307	NULL
-307	val_307	307
-307	val_307	307
-308	val_308	NULL
-308	val_308	308
-309	val_309	NULL
-309	val_309	NULL
-309	val_309	309
-309	val_309	309
-310	val_310	NULL
-310	val_310	310
-311	val_311	NULL
-311	val_311	NULL
-311	val_311	NULL
-311	val_311	311
-311	val_311	311
-311	val_311	311
-315	val_315	NULL
-315	val_315	315
-316	val_316	NULL
-316	val_316	NULL
-316	val_316	NULL
-316	val_316	316
-316	val_316	316
-316	val_316	316
-317	val_317	NULL
-317	val_317	NULL
-317	val_317	317
-317	val_317	317
-318	val_318	NULL
-318	val_318	NULL
-318	val_318	NULL
-318	val_318	318
-318	val_318	318
-318	val_318	318
-321	val_321	NULL
-321	val_321	NULL
-321	val_321	321
-321	val_321	321
-322	val_322	NULL
-322	val_322	NULL
-322	val_322	322
-322	val_322	322
-323	val_323	NULL
-323	val_323	323
-325	val_325	NULL
-325	val_325	NULL
-325	val_325	325
-325	val_325	325
-327	val_327	NULL
-327	val_327	NULL
-327	val_327	NULL
-327	val_327	327
-327	val_327	327
-327	val_327	327
-33	val_33	NULL
-33	val_33	33
-331	val_331	NULL
-331	val_331	NULL
-331	val_331	331
-331	val_331	331
-332	val_332	NULL
-332	val_332	332
-333	val_333	NULL
-333	val_333	NULL
-333	val_333	333
-333	val_333	333
-335	val_335	NULL
-335	val_335	335
-336	val_336	NULL
-336	val_336	336
-338	val_338	NULL
-338	val_338	338
-339	val_339	NULL
-339	val_339	339
-34	val_34	NULL
-34	val_34	34
-341	val_341	NULL
-341	val_341	341
-342	val_342	NULL
-342	val_342	NULL
-342	val_342	342
-342	val_342	342
-344	val_344	NULL
-344	val_344	NULL
-344	val_344	344
-344	val_344	344
-345	val_345	NULL
-345	val_345	345
-348	val_348	NULL
-348	val_348	NULL
-348	val_348	NULL
-348	val_348	NULL
-348	val_348	NULL
-348	val_348	348
-348	val_348	348
-348	val_348	348
-348	val_348	348
-348	val_348	348
-35	val_35	NULL
-35	val_35	NULL
-35	val_35	NULL
-35	val_35	35
-35	val_35	35
-35	val_35	35
-351	val_351	NULL
-351	val_351	351
-353	val_353	NULL
-353	val_353	NULL
-353	val_353	353
-353	val_353	353
-356	val_356	NULL
-356	val_356	356
-360	val_360	NULL
-360	val_360	360
-362	val_362	NULL
-362	val_362	362
-364	val_364	NULL
-364	val_364	364
-365	val_365	NULL
-365	val_365	365
-366	val_366	NULL
-366	val_366	366
-367	val_367	NULL
-367	val_367	NULL
-367	val_367	367
-367	val_367	367
-368	val_368	NULL
-368	val_368	368
-369	val_369	NULL
-369	val_369	NULL
-369	val_369	NULL
-369	val_369	369
-369	val_369	369
-369	val_369	369
-37	val_37	NULL
-37	val_37	NULL
-37	val_37	37
-37	val_37	37
-373	val_373	NULL
-373	val_373	373
-374	val_374	NULL
-374	val_374	374
-375	val_375	NULL
-375	val_375	375
-377	val_377	NULL
-377	val_377	377
-378	val_378	NULL
-378	val_378	378
-379	val_379	NULL
-379	val_379	379
-382	val_382	NULL
-382	val_382	NULL
-382	val_382	382
-382	val_382	382
-384	val_384	NULL
-384	val_384	NULL
-384	val_384	NULL
-384	val_384	384
-384	val_384	384
-384	val_384	384
-386	val_386	NULL
-386	val_386	386
-389	val_389	NULL
-389	val_389	389
-392	val_392	NULL
-392	val_392	392
-393	val_393	NULL
-393	val_393	393
-394	val_394	NULL
-394	val_394	394
-395	val_395	NULL
-395	val_395	NULL
-395	val_395	395
-395	val_395	395
-396	val_396	NULL
-396	val_396	NULL
-396	val_396	NULL
-396	val_396	396
-396	val_396	396
-396	val_396	396
-397	val_397	NULL
-397	val_397	NULL
-397	val_397	397
-397	val_397	397
-399	val_399	NULL
-399	val_399	NULL
-399	val_399	399
-399	val_399	399
-4	val_4	NULL
-4	val_4	4
-400	val_400	NULL
-400	val_400	400
-401	val_401	NULL
-401	val_401	NULL
-401	val_401	NULL
-401	val_401	NULL
-401	val_401	NULL
-401	val_401	401
-401	val_401	401
-401	val_401	401
-401	val_401	401
-401	val_401	401
-402	val_402	NULL
-402	val_402	402
-403	val_403	NULL
-403	val_403	NULL
-403	val_403	NULL
-403	val_403	403
-403	val_403	403
-403	val_403	403
-404	val_404	NULL
-404	val_404	NULL
-404	val_404	404
-404	val_404	404
-406	val_406	NULL
-406	val_406	NULL
-406	val_406	NULL
-406	val_406	NULL
-406	val_406	406
-406	val_406	406
-406	val_406	406
-406	val_406	406
-407	val_407	NULL
-407	val_407	407
-409	val_409	NULL
-409	val_409	NULL
-409	val_409	NULL
-409	val_409	409
-409	val_409	409
-409	val_409	409
-41	val_41	NULL
-41	val_41	41
-411	val_411	NULL
-411	val_411	411
-413	val_413	NULL
-413	val_413	NULL
-413	val_413	413
-413	val_413	413
-414	val_414	NULL
-414	val_414	NULL
-414	val_414	414
-414	val_414	414
-417	val_417	NULL
-417	val_417	NULL
-417	val_417	NULL
-417	val_417	417
-417	val_417	417
-417	val_417	417
-418	val_418	NULL
-418	val_418	418
-419	val_419	NULL
-419	val_419	419
-42	val_42	NULL
-42	val_42	NULL
-42	val_42	42
-42	val_42	42
-421	val_421	NULL
-421	val_421	421
-424	val_424	NULL
-424	val_424	NULL
-424	val_424	424
-424	val_424	424
-427	val_427	NULL
-427	val_427	427
-429	val_429	NULL
-429	val_429	NULL
-429	val_429	429
-429	val_429	429
-43	val_43	NULL
-43	val_43	43
-430	val_430	NULL
-430	val_430	NULL
-430	val_430	NULL
-430	val_430	430
-430	val_430	430
-430	val_430	430
-431	val_431	NULL
-431	val_431	NULL
-431	val_431	NULL
-431	val_431	431
-431	val_431	431
-431	val_431	431
-432	val_432	NULL
-432	val_432	432
-435	val_435	NULL
-435	val_435	435
-436	val_436	NULL
-436	val_436	436
-437	val_437	NULL
-437	val_437	437
-438	val_438	NULL
-438	val_438	NULL
-438	val_438	NULL
-438	val_438	438
-438	val_438	438
-438	val_438	438
-439	val_439	NULL
-439	val_439	NULL
-439	val_439	439
-439	val_439	439
-44	val_44	NULL
-44	val_44	44
-443	val_443	NULL
-443	val_443	443
-444	val_444	NULL
-444	val_444	444
-446	val_446	NULL
-446	val_446	446
-448	val_448	NULL
-448	val_448	448
-449	val_449	NULL
-449	val_449	449
-452	val_452	NULL
-452	val_452	452
-453	val_453	NULL
-453	val_453	453
-454	val_454	NULL
-454	val_454	NULL
-454	val_454	NULL
-454	val_454	454
-454	val_454	454
-454	val_454	454
-455	val_455	NULL
-455	val_455	455
-457	val_457	NULL
-457	val_457	457
-458	val_458	NULL
-458	val_458	NULL
-458	val_458	458
-458	val_458	458
-459	val_459	NULL
-459	val_459	NULL
-459	val_459	459
-459	val_459	459
-460	val_460	NULL
-460	val_460	460
-462	val_462	NULL
-462	val_462	NULL
-462	val_462	462
-462	val_462	462
-463	val_463	NULL
-463	val_463	NULL
-463	val_463	463
-463	val_463	463
-466	val_466	NULL
-466	val_466	NULL
-466	val_466	NULL
-466	val_466	466
-466	val_466	466
-466	val_466	466
-467	val_467	NULL
-467	val_467	467
-468	val_468	NULL
-468	val_468	NULL
-468	val_468	NULL
-468	val_468	NULL
-468	val_468	468
-468	val_468	468
-468	val_468	468
-468	val_468	468
-469	val_469	NULL
-469	val_469	NULL
-469	val_469	NULL
-469	val_469	NULL
-469	val_469	NULL
-469	val_469	469
-469	val_469	469
-469	val_469	469
-469	val_469	469
-469	val_469	469
-47	val_47	NULL
-47	val_47	47
-470	val_470	NULL
-470	val_470	470
-472	val_472	NULL
-472	val_472	472
-475	val_475	NULL
-475	val_475	475
-477	val_477	NULL
-477	val_477	477
-478	val_478	NULL
-478	val_478	NULL
-478	val_478	478
-478	val_478	478
-479	val_479	NULL
-479	val_479	479
-480	val_480	NULL
-480	val_480	NULL
-480	val_480	NULL
-480	val_480	480
-480	val_480	480
-480	val_480	480
-481	val_481	NULL
-481	val_481	481
-482	val_482	NULL
-482	val_482	482
-483	val_483	NULL
-483	val_483	483
-484	val_484	NULL
-484	val_484	484
-485	val_485	NULL
-485	val_485	485
-487	val_487	NULL
-487	val_487	487
-489	val_489	NULL
-489	val_489	NULL
-489	val_489	NULL
-489	val_489	NULL
-489	val_489	489
-489	val_489	489
-489	val_489	489
-489	val_489	489
-490	val_490	NULL
-490	val_490	490
-491	val_491	NULL
-491	val_491	491
-492	val_492	NULL
-492	val_492	NULL
-492	val_492	492
-492	val_492	492
-493	val_493	NULL
-493	val_493	493
-494	val_494	NULL
-494	val_494	494
-495	val_495	NULL
-495	val_495	495
-496	val_496	NULL
-496	val_496	496
-497	val_497	NULL
-497	val_497	497
-498	val_498	NULL
-498	val_498	NULL
-498	val_498	NULL
-498	val_498	498
-498	val_498	498
-498	val_498	498
-5	val_5	NULL
-5	val_5	NULL
-5	val_5	NULL
-5	val_5	5
-5	val_5	5
-5	val_5	5
-51	val_51	NULL
-51	val_51	NULL
-51	val_51	51
-51	val_51	51
-53	val_53	NULL
-53	val_53	53
-54	val_54	NULL
-54	val_54	54
-57	val_57	NULL
-57	val_57	57
-58	val_58	NULL
-58	val_58	NULL
-58	val_58	58
-58	val_58	58
-64	val_64	NULL
-64	val_64	64
-65	val_65	NULL
-65	val_65	65
-66	val_66	NULL
-66	val_66	66
-67	val_67	NULL
-67	val_67	NULL
-67	val_67	67
-67	val_67	67
-69	val_69	NULL
-69	val_69	69
-70	val_70	NULL
-70	val_70	NULL
-70	val_70	NULL
-70	val_70	70
-70	val_70	70
-70	val_70	70
-72	val_72	NULL
-72	val_72	NULL
-72	val_72	72
-72	val_72	72
-74	val_74	NULL
-74	val_74	74
-76	val_76	NULL
-76	val_76	NULL
-76	val_76	76
-76	val_76	76
-77	val_77	NULL
-77	val_77	77
-78	val_78	NULL
-78	val_78	78
-8	val_8	NULL
-8	val_8	8
-80	val_80	NULL
-80	val_80	80
-82	val_82	NULL
-82	val_82	82
-83	val_83	NULL
-83	val_83	NULL
-83	val_83	83
-83	val_83	83
-84	val_84	NULL
-84	val_84	NULL
-84	val_84	84
-84	val_84	84
-85	val_85	NULL
-85	val_85	85
-86	val_86	NULL
-86	val_86	86
-87	val_87	NULL
-87	val_87	87
-9	val_9	NULL
-9	val_9	9
-90	val_90	NULL
-90	val_90	NULL
-90	val_90	NULL
-90	val_90	90
-90	val_90	90
-90	val_90	90
-92	val_92	NULL
-92	val_92	92
-95	val_95	NULL
-95	val_95	NULL
-95	val_95	95
-95	val_95	95
-96	val_96	NULL
-96	val_96	96
-97	val_97	NULL
-97	val_97	NULL
-97	val_97	97
-97	val_97	97
-98	val_98	NULL
-98	val_98	NULL
-98	val_98	98
-98	val_98	98
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-0-c854b607353e810be297d3159be30da4 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-0-c854b607353e810be297d3159be30da4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-1-9fad934993b77eef15a5d10eb203a378 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-1-9fad934993b77eef15a5d10eb203a378
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-10-f89a3f7c0ee5bc3f6bd04aa0b459dd4a b/sql/hive/src/test/resources/golden/partition_wise_fileformat-10-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
deleted file mode 100644
index dcf40c875b9ad..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-10-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=101
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:284
-maxFileSize:284
-minFileSize:284
-lastAccessTime:0
-lastUpdateTime:1390900729000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-11-606ad10de7caf7e65e09778f2673e712 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-11-606ad10de7caf7e65e09778f2673e712
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-11-606ad10de7caf7e65e09778f2673e712
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-12-c6bf6ee8fdc50f2abb3a71305d1e6882 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-12-c6bf6ee8fdc50f2abb3a71305d1e6882
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-12-c6bf6ee8fdc50f2abb3a71305d1e6882
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-13-c262e8c736b67119b9806f69eb492ef3 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-13-c262e8c736b67119b9806f69eb492ef3
deleted file mode 100644
index 9639a1e84c615..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-13-c262e8c736b67119b9806f69eb492ef3
+++ /dev/null
@@ -1,50 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-14-da1b1887eb530c7e9d37667b99c9793f b/sql/hive/src/test/resources/golden/partition_wise_fileformat-14-da1b1887eb530c7e9d37667b99c9793f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-15-517aaa22478287fa80eef4a19f2cb9ff b/sql/hive/src/test/resources/golden/partition_wise_fileformat-15-517aaa22478287fa80eef4a19f2cb9ff
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-16-7d619408a560b5c8d4a06dcd0ee106e5 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-16-7d619408a560b5c8d4a06dcd0ee106e5
deleted file mode 100644
index 707036ebbc76c..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-16-7d619408a560b5c8d4a06dcd0ee106e5
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned
-inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:3
-totalFileSize:1415
-maxFileSize:895
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900762000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-17-a488cb92e1388a7542d8787a22f4fb55 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-17-a488cb92e1388a7542d8787a22f4fb55
deleted file mode 100644
index 06316a924c38b..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-17-a488cb92e1388a7542d8787a22f4fb55
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=100
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:236
-maxFileSize:236
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900762000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-18-f89a3f7c0ee5bc3f6bd04aa0b459dd4a b/sql/hive/src/test/resources/golden/partition_wise_fileformat-18-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
deleted file mode 100644
index 735d41e54e958..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-18-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=101
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:284
-maxFileSize:284
-minFileSize:284
-lastAccessTime:0
-lastUpdateTime:1390900762000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-19-e3b55daf170a032dcc8ed12ee26ccf63 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-19-e3b55daf170a032dcc8ed12ee26ccf63
deleted file mode 100644
index 27dfc622c9bb6..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-19-e3b55daf170a032dcc8ed12ee26ccf63
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=102
-inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:895
-maxFileSize:895
-minFileSize:895
-lastAccessTime:0
-lastUpdateTime:1390900762000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-2-7d619408a560b5c8d4a06dcd0ee106e5 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-2-7d619408a560b5c8d4a06dcd0ee106e5
deleted file mode 100644
index 1812e0d53e443..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-2-7d619408a560b5c8d4a06dcd0ee106e5
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:236
-maxFileSize:236
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900706000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-20-606ad10de7caf7e65e09778f2673e712 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-20-606ad10de7caf7e65e09778f2673e712
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-20-606ad10de7caf7e65e09778f2673e712
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-21-c6bf6ee8fdc50f2abb3a71305d1e6882 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-21-c6bf6ee8fdc50f2abb3a71305d1e6882
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-21-c6bf6ee8fdc50f2abb3a71305d1e6882
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-22-c56c391659f5701a9d2e8782a60f7f8a b/sql/hive/src/test/resources/golden/partition_wise_fileformat-22-c56c391659f5701a9d2e8782a60f7f8a
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-22-c56c391659f5701a9d2e8782a60f7f8a
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-23-c262e8c736b67119b9806f69eb492ef3 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-23-c262e8c736b67119b9806f69eb492ef3
deleted file mode 100644
index fca99e91bbd8f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-23-c262e8c736b67119b9806f69eb492ef3
+++ /dev/null
@@ -1,75 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-24-fe564b4f68241ec5c5884e44a1306f4f b/sql/hive/src/test/resources/golden/partition_wise_fileformat-24-fe564b4f68241ec5c5884e44a1306f4f
deleted file mode 100644
index fca99e91bbd8f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-24-fe564b4f68241ec5c5884e44a1306f4f
+++ /dev/null
@@ -1,75 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-3-a488cb92e1388a7542d8787a22f4fb55 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-3-a488cb92e1388a7542d8787a22f4fb55
deleted file mode 100644
index 3532257511613..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-3-a488cb92e1388a7542d8787a22f4fb55
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=100
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:236
-maxFileSize:236
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900706000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-4-606ad10de7caf7e65e09778f2673e712 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-4-606ad10de7caf7e65e09778f2673e712
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-4-606ad10de7caf7e65e09778f2673e712
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-5-c262e8c736b67119b9806f69eb492ef3 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-5-c262e8c736b67119b9806f69eb492ef3
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-5-c262e8c736b67119b9806f69eb492ef3
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-6-6c4f7b115f18953dcc7710fa97287459 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-6-6c4f7b115f18953dcc7710fa97287459
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-7-f5f427b174dca478c14eddc371c0025a b/sql/hive/src/test/resources/golden/partition_wise_fileformat-7-f5f427b174dca478c14eddc371c0025a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-8-7d619408a560b5c8d4a06dcd0ee106e5 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-8-7d619408a560b5c8d4a06dcd0ee106e5
deleted file mode 100644
index a02458b88bf52..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-8-7d619408a560b5c8d4a06dcd0ee106e5
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:2
-totalFileSize:520
-maxFileSize:284
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900729000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-9-a488cb92e1388a7542d8787a22f4fb55 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-9-a488cb92e1388a7542d8787a22f4fb55
deleted file mode 100644
index 301bff44316ff..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-9-a488cb92e1388a7542d8787a22f4fb55
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=100
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:236
-maxFileSize:236
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900729000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-0-66ee62178e3576fb38cb09800cb610bf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-0-66ee62178e3576fb38cb09800cb610bf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-1-198cb7d650a506ec3420b94b82a01375 b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-1-198cb7d650a506ec3420b94b82a01375
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-2-f723dedd396bd468107755b4495c1031 b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-2-f723dedd396bd468107755b4495c1031
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-3-c278fd699aa25809bdef310fb92f510e b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-3-c278fd699aa25809bdef310fb92f510e
deleted file mode 100644
index 001841c8a1cd4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-3-c278fd699aa25809bdef310fb92f510e
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-4-620cb6a290ef5297ac1d3a9ea776e2bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-4-620cb6a290ef5297ac1d3a9ea776e2bf
deleted file mode 100644
index 63f35fd827de3..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-4-620cb6a290ef5297ac1d3a9ea776e2bf
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-5-dd4c3f1636af9a7371edf7142abee088 b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-5-dd4c3f1636af9a7371edf7142abee088
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-6-620cb6a290ef5297ac1d3a9ea776e2bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-6-620cb6a290ef5297ac1d3a9ea776e2bf
deleted file mode 100644
index 63f35fd827de3..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-6-620cb6a290ef5297ac1d3a9ea776e2bf
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-7-c278fd699aa25809bdef310fb92f510e b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-7-c278fd699aa25809bdef310fb92f510e
deleted file mode 100644
index f75126345f351..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-7-c278fd699aa25809bdef310fb92f510e
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-1-b30db33650de0545cbcd53769ed7cf40 b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-1-b30db33650de0545cbcd53769ed7cf40
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-10-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-10-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-10-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-11-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-11-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 11542e84a3768..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-11-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-2-92bfcf88ca528eb6c9259142bf6541e5 b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-2-92bfcf88ca528eb6c9259142bf6541e5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-3-61f1abcdc66a64c11df85dded920d167 b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-3-61f1abcdc66a64c11df85dded920d167
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-4-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-4-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-4-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-5-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-5-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index 53aab40f88b50..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-5-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-6-ee7394c912991b8cd4401fb94942351f b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-6-ee7394c912991b8cd4401fb94942351f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-7-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-7-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-7-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-8-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-8-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-8-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-9-ed1e2dbef8eb8efbf83a50dc646485ba b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-9-ed1e2dbef8eb8efbf83a50dc646485ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-1-b30db33650de0545cbcd53769ed7cf40 b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-1-b30db33650de0545cbcd53769ed7cf40
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-10-ed1e2dbef8eb8efbf83a50dc646485ba b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-10-ed1e2dbef8eb8efbf83a50dc646485ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-11-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-11-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index 1b97612ce3f5a..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-11-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,4 +0,0 @@
-476	val_238
-476	val_238
-194	val_97
-194	val_97
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-12-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-12-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index f8e13d5235028..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-12-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,4 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
-97	val_97	NULL	2
-97	val_97	NULL	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-13-26a9a9cafa15d0223b934eba4777aea7 b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-13-26a9a9cafa15d0223b934eba4777aea7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-14-d35f445501407d6fae5c3ad161fb2236 b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-14-d35f445501407d6fae5c3ad161fb2236
deleted file mode 100644
index 919a66a94c5cb..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-14-d35f445501407d6fae5c3ad161fb2236
+++ /dev/null
@@ -1,6 +0,0 @@
-476	val_238	NULL
-476	val_238	NULL
-194	val_97	NULL
-194	val_97	NULL
-400	val_200	val_200
-400	val_200	val_200
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-15-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-15-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 45c169f0d8330..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-15-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,6 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
-97	val_97	NULL	2
-97	val_97	NULL	2
-200	val_200	val_200	3
-200	val_200	val_200	3
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-2-92bfcf88ca528eb6c9259142bf6541e5 b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-2-92bfcf88ca528eb6c9259142bf6541e5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-3-61f1abcdc66a64c11df85dded920d167 b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-3-61f1abcdc66a64c11df85dded920d167
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-4-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-4-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-4-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-5-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-5-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index 53aab40f88b50..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-5-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-6-ee7394c912991b8cd4401fb94942351f b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-6-ee7394c912991b8cd4401fb94942351f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-7-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-7-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-7-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-8-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-8-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-8-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-9-b8aed3dd7d7c151f5d96050c4058e71d b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-9-b8aed3dd7d7c151f5d96050c4058e71d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-1-9a812f44c4c7a4c72b4be95139e6fd6 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-1-9a812f44c4c7a4c72b4be95139e6fd6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-10-5bc619cec1d519c1dc6423f841b1c0a4 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-10-5bc619cec1d519c1dc6423f841b1c0a4
deleted file mode 100644
index b6a7d89c68e0c..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-10-5bc619cec1d519c1dc6423f841b1c0a4
+++ /dev/null
@@ -1 +0,0 @@
-16
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-2-3b6e28e3c1c071583e9b3b8f1f997f75 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-2-3b6e28e3c1c071583e9b3b8f1f997f75
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-3-9b3e4a236550f1900a36566830b24024 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-3-9b3e4a236550f1900a36566830b24024
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-4-49cf189a09e11c2c635fbb574b89a2af b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-4-49cf189a09e11c2c635fbb574b89a2af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-5-2ebe5fe98e830143b9571b13fe59dc0b b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-5-2ebe5fe98e830143b9571b13fe59dc0b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-6-fa2f56078df18add8e5a77e538756488 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-6-fa2f56078df18add8e5a77e538756488
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-7-1d822cc037d9281ce172e2d5685b1495 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-7-1d822cc037d9281ce172e2d5685b1495
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-8-e4531456a7418952ec1d69e18bc8460b b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-8-e4531456a7418952ec1d69e18bc8460b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-9-8f6983fda271fba18d218c75b8fb2b3d b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-9-8f6983fda271fba18d218c75b8fb2b3d
deleted file mode 100644
index b6a7d89c68e0c..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-9-8f6983fda271fba18d218c75b8fb2b3d
+++ /dev/null
@@ -1 +0,0 @@
-16
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-10-e5e7913d0875ad01f5d6f5031e86f0a0 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-10-e5e7913d0875ad01f5d6f5031e86f0a0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-11-bbfb0a95274044dea4732e35c1d7ecbe b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-11-bbfb0a95274044dea4732e35c1d7ecbe
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-12-be84e8199b0a3b9f72e552018854ac15 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-12-be84e8199b0a3b9f72e552018854ac15
deleted file mode 100644
index 425151f3a411f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-12-be84e8199b0a3b9f72e552018854ac15
+++ /dev/null
@@ -1 +0,0 @@
-40
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-15-dd6e9965d271bd35604059540c23d967 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-15-dd6e9965d271bd35604059540c23d967
deleted file mode 100644
index 425151f3a411f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-15-dd6e9965d271bd35604059540c23d967
+++ /dev/null
@@ -1 +0,0 @@
-40
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-17-f4a3e39d5df18861e9fe67ef37af57e1 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-17-f4a3e39d5df18861e9fe67ef37af57e1
deleted file mode 100644
index 425151f3a411f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-17-f4a3e39d5df18861e9fe67ef37af57e1
+++ /dev/null
@@ -1 +0,0 @@
-40
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-18-58080916a9f5883121bcaad719be0309 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-18-58080916a9f5883121bcaad719be0309
deleted file mode 100644
index c739b42c4d2ce..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-18-58080916a9f5883121bcaad719be0309
+++ /dev/null
@@ -1 +0,0 @@
-44
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-3-2683f9835169a568c1f03dae859d27d2 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-3-2683f9835169a568c1f03dae859d27d2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-4-2d2e1149d2f035017bb37bbfaad4def0 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-4-2d2e1149d2f035017bb37bbfaad4def0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-5-61a8225e20f36e286b4d02501d2c80d9 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-5-61a8225e20f36e286b4d02501d2c80d9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-6-b5165befb75ebeed42f3e69d4d64375c b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-6-b5165befb75ebeed42f3e69d4d64375c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-7-e438f7ec386b2ab19660e9da7260dd95 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-7-e438f7ec386b2ab19660e9da7260dd95
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-8-f0e3ddaa1e6ea067444b1f540bfac293 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-8-f0e3ddaa1e6ea067444b1f540bfac293
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-9-adeaa033260f16c5bc106e761e5fef8b b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-9-adeaa033260f16c5bc106e761e5fef8b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-1-e1cf6c355de3ae8db7564b1676199117 b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-1-e1cf6c355de3ae8db7564b1676199117
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-10-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-10-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 11542e84a3768..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-10-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-11-662bece7f71ef854ca6e25f0eef5b830 b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-11-662bece7f71ef854ca6e25f0eef5b830
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-12-8250422b5ed16120ee33bd4fa69d3f47 b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-12-8250422b5ed16120ee33bd4fa69d3f47
deleted file mode 100644
index 025abe4ec330b..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-12-8250422b5ed16120ee33bd4fa69d3f47
+++ /dev/null
@@ -1,3 +0,0 @@
-476	val_238	NULL	1
-476	val_238	NULL	1
-172	val_86	val_86	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-13-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-13-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index f067c1ed602dc..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-13-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,3 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
-86	val_86	val_86	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-2-107d7c681b43611df056238be242127b b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-2-107d7c681b43611df056238be242127b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-3-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-3-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-3-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-4-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-4-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index 53aab40f88b50..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-4-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-5-ee7394c912991b8cd4401fb94942351f b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-5-ee7394c912991b8cd4401fb94942351f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-6-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-6-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-6-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-7-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-7-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-7-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-8-ed1e2dbef8eb8efbf83a50dc646485ba b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-8-ed1e2dbef8eb8efbf83a50dc646485ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-9-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-9-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-9-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-1-5b9fe6eb0e500ae6bf97e2a34d3d0ad9 b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-1-5b9fe6eb0e500ae6bf97e2a34d3d0ad9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-10-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-10-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 11542e84a3768..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-10-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-11-662bece7f71ef854ca6e25f0eef5b830 b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-11-662bece7f71ef854ca6e25f0eef5b830
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-12-8250422b5ed16120ee33bd4fa69d3f47 b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-12-8250422b5ed16120ee33bd4fa69d3f47
deleted file mode 100644
index 025abe4ec330b..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-12-8250422b5ed16120ee33bd4fa69d3f47
+++ /dev/null
@@ -1,3 +0,0 @@
-476	val_238	NULL	1
-476	val_238	NULL	1
-172	val_86	val_86	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-13-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-13-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index f067c1ed602dc..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-13-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,3 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
-86	val_86	val_86	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-2-107d7c681b43611df056238be242127b b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-2-107d7c681b43611df056238be242127b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-3-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-3-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-3-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-4-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-4-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index 53aab40f88b50..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-4-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-5-ee7394c912991b8cd4401fb94942351f b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-5-ee7394c912991b8cd4401fb94942351f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-6-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-6-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-6-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-7-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-7-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-7-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-8-ed1e2dbef8eb8efbf83a50dc646485ba b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-8-ed1e2dbef8eb8efbf83a50dc646485ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-9-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-9-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-9-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat17-0-7c49277a7217a147685d30e27822d273 b/sql/hive/src/test/resources/golden/partition_wise_fileformat17-0-7c49277a7217a147685d30e27822d273
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-0-c854b607353e810be297d3159be30da4 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-0-c854b607353e810be297d3159be30da4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-1-9fad934993b77eef15a5d10eb203a378 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-1-9fad934993b77eef15a5d10eb203a378
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-10-561ef0dbccfcbfbb0c75f33ebfd4203d b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-10-561ef0dbccfcbfbb0c75f33ebfd4203d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-11-20a02894f5e9340e89b55a30bef252b7 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-11-20a02894f5e9340e89b55a30bef252b7
deleted file mode 100644
index 325e26b3d9737..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-11-20a02894f5e9340e89b55a30bef252b7
+++ /dev/null
@@ -1,75 +0,0 @@
-238	val_238	100
-NULL		100
-311	val_311	100
-NULL	val_27	100
-NULL	val_165	100
-NULL	val_409	100
-255	val_255	100
-278	val_278	100
-98	val_98	100
-NULL	val_484	100
-NULL	val_265	100
-NULL	val_193	100
-401	val_401	100
-150	val_150	100
-273	val_273	100
-224		100
-369		100
-66	val_66	100
-128		100
-213	val_213	100
-146	val_146	100
-406	val_406	100
-NULL		100
-NULL		100
-NULL		100
-238	val_238	101
-NULL		101
-311	val_311	101
-NULL	val_27	101
-NULL	val_165	101
-NULL	val_409	101
-255	val_255	101
-278	val_278	101
-98	val_98	101
-NULL	val_484	101
-NULL	val_265	101
-NULL	val_193	101
-401	val_401	101
-150	val_150	101
-273	val_273	101
-224		101
-369		101
-66	val_66	101
-128		101
-213	val_213	101
-146	val_146	101
-406	val_406	101
-NULL		101
-NULL		101
-NULL		101
-238	val_238	102
-NULL		102
-311	val_311	102
-NULL	val_27	102
-NULL	val_165	102
-NULL	val_409	102
-255	val_255	102
-278	val_278	102
-98	val_98	102
-NULL	val_484	102
-NULL	val_265	102
-NULL	val_193	102
-401	val_401	102
-150	val_150	102
-273	val_273	102
-224		102
-369		102
-66	val_66	102
-128		102
-213	val_213	102
-146	val_146	102
-406	val_406	102
-NULL		102
-NULL		102
-NULL		102
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-2-6c4f7b115f18953dcc7710fa97287459 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-2-6c4f7b115f18953dcc7710fa97287459
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-3-f5f427b174dca478c14eddc371c0025a b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-3-f5f427b174dca478c14eddc371c0025a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-4-da1b1887eb530c7e9d37667b99c9793f b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-4-da1b1887eb530c7e9d37667b99c9793f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-5-517aaa22478287fa80eef4a19f2cb9ff b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-5-517aaa22478287fa80eef4a19f2cb9ff
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-6-32b36a6c3344c5fcca0ad6c93ffcab62 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-6-32b36a6c3344c5fcca0ad6c93ffcab62
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-6-32b36a6c3344c5fcca0ad6c93ffcab62
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-7-561ef0dbccfcbfbb0c75f33ebfd4203d b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-7-561ef0dbccfcbfbb0c75f33ebfd4203d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-8-20a02894f5e9340e89b55a30bef252b7 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-8-20a02894f5e9340e89b55a30bef252b7
deleted file mode 100644
index 325e26b3d9737..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-8-20a02894f5e9340e89b55a30bef252b7
+++ /dev/null
@@ -1,75 +0,0 @@
-238	val_238	100
-NULL		100
-311	val_311	100
-NULL	val_27	100
-NULL	val_165	100
-NULL	val_409	100
-255	val_255	100
-278	val_278	100
-98	val_98	100
-NULL	val_484	100
-NULL	val_265	100
-NULL	val_193	100
-401	val_401	100
-150	val_150	100
-273	val_273	100
-224		100
-369		100
-66	val_66	100
-128		100
-213	val_213	100
-146	val_146	100
-406	val_406	100
-NULL		100
-NULL		100
-NULL		100
-238	val_238	101
-NULL		101
-311	val_311	101
-NULL	val_27	101
-NULL	val_165	101
-NULL	val_409	101
-255	val_255	101
-278	val_278	101
-98	val_98	101
-NULL	val_484	101
-NULL	val_265	101
-NULL	val_193	101
-401	val_401	101
-150	val_150	101
-273	val_273	101
-224		101
-369		101
-66	val_66	101
-128		101
-213	val_213	101
-146	val_146	101
-406	val_406	101
-NULL		101
-NULL		101
-NULL		101
-238	val_238	102
-NULL		102
-311	val_311	102
-NULL	val_27	102
-NULL	val_165	102
-NULL	val_409	102
-255	val_255	102
-278	val_278	102
-98	val_98	102
-NULL	val_484	102
-NULL	val_265	102
-NULL	val_193	102
-401	val_401	102
-150	val_150	102
-273	val_273	102
-224		102
-369		102
-66	val_66	102
-128		102
-213	val_213	102
-146	val_146	102
-406	val_406	102
-NULL		102
-NULL		102
-NULL		102
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-9-50131c0ba7b7a6b65c789a5a8497bada b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-9-50131c0ba7b7a6b65c789a5a8497bada
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-9-50131c0ba7b7a6b65c789a5a8497bada
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-0-c854b607353e810be297d3159be30da4 b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-0-c854b607353e810be297d3159be30da4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-1-6c4f7b115f18953dcc7710fa97287459 b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-1-6c4f7b115f18953dcc7710fa97287459
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-10-c6bf6ee8fdc50f2abb3a71305d1e6882 b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-10-c6bf6ee8fdc50f2abb3a71305d1e6882
deleted file mode 100644
index 79c8f8e614a1f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-10-c6bf6ee8fdc50f2abb3a71305d1e6882
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-2-f5f427b174dca478c14eddc371c0025a b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-2-f5f427b174dca478c14eddc371c0025a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-3-f89a3f7c0ee5bc3f6bd04aa0b459dd4a b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-3-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
deleted file mode 100644
index f487b340cd55b..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-3-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/partition_test_partitioned/dt=101
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:284
-maxFileSize:284
-minFileSize:284
-lastAccessTime:0
-lastUpdateTime:1388799388000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-4-da1b1887eb530c7e9d37667b99c9793f b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-4-da1b1887eb530c7e9d37667b99c9793f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-5-517aaa22478287fa80eef4a19f2cb9ff b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-5-517aaa22478287fa80eef4a19f2cb9ff
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-6-e3b55daf170a032dcc8ed12ee26ccf63 b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-6-e3b55daf170a032dcc8ed12ee26ccf63
deleted file mode 100644
index 0c7739c2b9fbb..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-6-e3b55daf170a032dcc8ed12ee26ccf63
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/partition_test_partitioned/dt=102
-inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:895
-maxFileSize:895
-minFileSize:895
-lastAccessTime:0
-lastUpdateTime:1388799405000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-7-c56c391659f5701a9d2e8782a60f7f8a b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-7-c56c391659f5701a9d2e8782a60f7f8a
deleted file mode 100644
index 79c8f8e614a1f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-7-c56c391659f5701a9d2e8782a60f7f8a
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-8-f5f427b174dca478c14eddc371c0025a b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-8-f5f427b174dca478c14eddc371c0025a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-9-f89a3f7c0ee5bc3f6bd04aa0b459dd4a b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-9-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
deleted file mode 100644
index 0c6fbc997de7a..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-9-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/partition_test_partitioned/dt=101
-inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:895
-maxFileSize:895
-minFileSize:895
-lastAccessTime:0
-lastUpdateTime:1388799434000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-0-66ee62178e3576fb38cb09800cb610bf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-0-66ee62178e3576fb38cb09800cb610bf
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-0-66ee62178e3576fb38cb09800cb610bf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-6-a0e23b26ee1777ccc8947fb5eb1e8745 b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-6-a0e23b26ee1777ccc8947fb5eb1e8745
index f0d140e18c053..eb4c6a843cb5d 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-6-a0e23b26ee1777ccc8947fb5eb1e8745
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-6-a0e23b26ee1777ccc8947fb5eb1e8745
@@ -1,2 +1,2 @@
 101	25
-102	25
\ No newline at end of file
+102	25
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-8-a0e23b26ee1777ccc8947fb5eb1e8745 b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-8-a0e23b26ee1777ccc8947fb5eb1e8745
index 6425bae2a6a8f..95846abf28b2f 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-8-a0e23b26ee1777ccc8947fb5eb1e8745
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-8-a0e23b26ee1777ccc8947fb5eb1e8745
@@ -1,3 +1,3 @@
 101	25
 102	25
-103	25
\ No newline at end of file
+103	25
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-0-66ee62178e3576fb38cb09800cb610bf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-0-66ee62178e3576fb38cb09800cb610bf
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-0-66ee62178e3576fb38cb09800cb610bf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-6-e95296c9f7056b0075007c61d4e5e92f b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-6-e95296c9f7056b0075007c61d4e5e92f
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-6-e95296c9f7056b0075007c61d4e5e92f
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-6-e95296c9f7056b0075007c61d4e5e92f
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-7-4758d41d052eba37a9acd90c2dbc58f0 b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-7-4758d41d052eba37a9acd90c2dbc58f0
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-7-4758d41d052eba37a9acd90c2dbc58f0
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-7-4758d41d052eba37a9acd90c2dbc58f0
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-0-66ee62178e3576fb38cb09800cb610bf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-0-66ee62178e3576fb38cb09800cb610bf
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-0-66ee62178e3576fb38cb09800cb610bf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-4-a34505bd397bb2a66e46408d1dfb6bf2 b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-4-a34505bd397bb2a66e46408d1dfb6bf2
index 3f10ffe7a4c47..60d3b2f4a4cd5 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-4-a34505bd397bb2a66e46408d1dfb6bf2
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-4-a34505bd397bb2a66e46408d1dfb6bf2
@@ -1 +1 @@
-15
\ No newline at end of file
+15
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-5-f2c42f1f32eb3cb300420fb36cbf2362 b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-5-f2c42f1f32eb3cb300420fb36cbf2362
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-5-f2c42f1f32eb3cb300420fb36cbf2362
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-5-f2c42f1f32eb3cb300420fb36cbf2362
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-0-66ee62178e3576fb38cb09800cb610bf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-0-66ee62178e3576fb38cb09800cb610bf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-1-22e3d59a0423473051535684bca72b27 b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-1-22e3d59a0423473051535684bca72b27
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-2-55ae9fbf6daa36225dd386e34025dd38 b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-2-55ae9fbf6daa36225dd386e34025dd38
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-3-c561806d8f9ad419dc9b17ae995aab68 b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-3-c561806d8f9ad419dc9b17ae995aab68
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-4-ae71ce67b5d4a91bce1b34acde830268 b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-4-ae71ce67b5d4a91bce1b34acde830268
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-5-cb3d7c0fec42ef7d68b67c8e4cdeab3e b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-5-cb3d7c0fec42ef7d68b67c8e4cdeab3e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-6-ae6a5016b6a6ace6b6f2576a4c666b4e b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-6-ae6a5016b6a6ace6b6f2576a4c666b4e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-7-d1a5913edaaec9654edb333e8207f57b b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-7-d1a5913edaaec9654edb333e8207f57b
deleted file mode 100644
index 5e06930239fea..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-7-d1a5913edaaec9654edb333e8207f57b
+++ /dev/null
@@ -1,20 +0,0 @@
-0	val_0	1
-0	val_0	1
-0	val_0	1
-0	val_0	2
-0	val_0	2
-0	val_0	2
-0	val_0	3
-0	val_0	3
-0	val_0	3
-10	val_10	1
-10	val_10	2
-10	val_10	3
-100	val_100	1
-100	val_100	1
-100	val_100	2
-100	val_100	2
-100	val_100	3
-100	val_100	3
-103	val_103	1
-103	val_103	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-8-624b059dfaa86e2c78f065169de325cd b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-8-624b059dfaa86e2c78f065169de325cd
deleted file mode 100644
index 1bd9063a9c63c..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-8-624b059dfaa86e2c78f065169de325cd
+++ /dev/null
@@ -1,20 +0,0 @@
-0.0	val_0	1
-0.0	val_0	1
-0.0	val_0	1
-0.0	val_0	2
-0.0	val_0	2
-0.0	val_0	2
-0.0	val_0	3
-0.0	val_0	3
-0.0	val_0	3
-4.0	val_2	1
-4.0	val_2	2
-4.0	val_2	3
-8.0	val_4	1
-8.0	val_4	2
-8.0	val_4	3
-10.0	val_5	1
-10.0	val_5	1
-10.0	val_5	1
-10.0	val_5	2
-10.0	val_5	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-0-66ee62178e3576fb38cb09800cb610bf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-0-66ee62178e3576fb38cb09800cb610bf
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-0-66ee62178e3576fb38cb09800cb610bf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-5-d1a5913edaaec9654edb333e8207f57b b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-5-d1a5913edaaec9654edb333e8207f57b
index f259b4eefc608..967305a18236e 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-5-d1a5913edaaec9654edb333e8207f57b
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-5-d1a5913edaaec9654edb333e8207f57b
@@ -17,4 +17,4 @@
 104	val_104	1
 104	val_104	1
 104	val_104	2
-104	val_104	2
\ No newline at end of file
+104	val_104	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-6-624b059dfaa86e2c78f065169de325cd b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-6-624b059dfaa86e2c78f065169de325cd
index 89afce150e7a9..f26a7375a6819 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-6-624b059dfaa86e2c78f065169de325cd
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-6-624b059dfaa86e2c78f065169de325cd
@@ -17,4 +17,4 @@
 16.0	val_8	1
 16.0	val_8	2
 18.0	val_9	1
-18.0	val_9	2
\ No newline at end of file
+18.0	val_9	2
diff --git a/sql/hive/src/test/resources/golden/ppd1-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd1-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd1-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd1-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd1-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd1-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd1-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd1-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd1-3-1d3f71876ba45f942e93a68c0be15ef5 b/sql/hive/src/test/resources/golden/ppd1-3-1d3f71876ba45f942e93a68c0be15ef5
index 55b2a1b47afa2..ef98fd7908a01 100644
--- a/sql/hive/src/test/resources/golden/ppd1-3-1d3f71876ba45f942e93a68c0be15ef5
+++ b/sql/hive/src/test/resources/golden/ppd1-3-1d3f71876ba45f942e93a68c0be15ef5
@@ -493,4 +493,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/ppd1-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd1-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd1-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd1-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd1-6-1d3f71876ba45f942e93a68c0be15ef5 b/sql/hive/src/test/resources/golden/ppd1-6-1d3f71876ba45f942e93a68c0be15ef5
index 55b2a1b47afa2..ef98fd7908a01 100644
--- a/sql/hive/src/test/resources/golden/ppd1-6-1d3f71876ba45f942e93a68c0be15ef5
+++ b/sql/hive/src/test/resources/golden/ppd1-6-1d3f71876ba45f942e93a68c0be15ef5
@@ -493,4 +493,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/ppd2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd2-1-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd2-1-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd2-1-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd2-1-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd2-3-fccdc7a24b626308d9ec17608f36564b b/sql/hive/src/test/resources/golden/ppd2-3-fccdc7a24b626308d9ec17608f36564b
index 878c86cb46b8a..d2aff0f701be9 100644
--- a/sql/hive/src/test/resources/golden/ppd2-3-fccdc7a24b626308d9ec17608f36564b
+++ b/sql/hive/src/test/resources/golden/ppd2-3-fccdc7a24b626308d9ec17608f36564b
@@ -133,4 +133,4 @@
 480	3
 489	4
 492	2
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/ppd2-6-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd2-6-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd2-6-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd2-6-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd2-7-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd2-7-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd2-7-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd2-7-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd2-9-fccdc7a24b626308d9ec17608f36564b b/sql/hive/src/test/resources/golden/ppd2-9-fccdc7a24b626308d9ec17608f36564b
index 878c86cb46b8a..d2aff0f701be9 100644
--- a/sql/hive/src/test/resources/golden/ppd2-9-fccdc7a24b626308d9ec17608f36564b
+++ b/sql/hive/src/test/resources/golden/ppd2-9-fccdc7a24b626308d9ec17608f36564b
@@ -133,4 +133,4 @@
 480	3
 489	4
 492	2
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_expr-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_constant_expr-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_expr-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_constant_expr-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_expr-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_constant_expr-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_expr-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_constant_expr-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_expr-5-7da8c74586c99d96254f6f031bcaeb1c b/sql/hive/src/test/resources/golden/ppd_constant_expr-5-7da8c74586c99d96254f6f031bcaeb1c
index c87107a2f1168..416fbfb9e5228 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_expr-5-7da8c74586c99d96254f6f031bcaeb1c
+++ b/sql/hive/src/test/resources/golden/ppd_constant_expr-5-7da8c74586c99d96254f6f031bcaeb1c
@@ -22,4 +22,4 @@ NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
-NULL	NULL	NULL
\ No newline at end of file
+NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_expr-6-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_constant_expr-6-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_expr-6-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_constant_expr-6-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_expr-9-7da8c74586c99d96254f6f031bcaeb1c b/sql/hive/src/test/resources/golden/ppd_constant_expr-9-7da8c74586c99d96254f6f031bcaeb1c
index c87107a2f1168..416fbfb9e5228 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_expr-9-7da8c74586c99d96254f6f031bcaeb1c
+++ b/sql/hive/src/test/resources/golden/ppd_constant_expr-9-7da8c74586c99d96254f6f031bcaeb1c
@@ -22,4 +22,4 @@ NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
-NULL	NULL	NULL
\ No newline at end of file
+NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_where-1-84c951641740895ca1c8ddc098805da5 b/sql/hive/src/test/resources/golden/ppd_constant_where-1-84c951641740895ca1c8ddc098805da5
index e37d32abba426..83b33d238dab9 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_where-1-84c951641740895ca1c8ddc098805da5
+++ b/sql/hive/src/test/resources/golden/ppd_constant_where-1-84c951641740895ca1c8ddc098805da5
@@ -1 +1 @@
-1000
\ No newline at end of file
+1000
diff --git a/sql/hive/src/test/resources/golden/ppd_gby-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_gby-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_gby-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_gby-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_gby-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby-3-97b8d0091058060f5f17cf5e81cce73d b/sql/hive/src/test/resources/golden/ppd_gby-3-97b8d0091058060f5f17cf5e81cce73d
index e1659e6072577..f005a4fe5bf86 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby-3-97b8d0091058060f5f17cf5e81cce73d
+++ b/sql/hive/src/test/resources/golden/ppd_gby-3-97b8d0091058060f5f17cf5e81cce73d
@@ -126,4 +126,4 @@ val_395
 val_396
 val_397
 val_399
-val_4
\ No newline at end of file
+val_4
diff --git a/sql/hive/src/test/resources/golden/ppd_gby-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_gby-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_gby-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby-6-97b8d0091058060f5f17cf5e81cce73d b/sql/hive/src/test/resources/golden/ppd_gby-6-97b8d0091058060f5f17cf5e81cce73d
index e1659e6072577..f005a4fe5bf86 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby-6-97b8d0091058060f5f17cf5e81cce73d
+++ b/sql/hive/src/test/resources/golden/ppd_gby-6-97b8d0091058060f5f17cf5e81cce73d
@@ -126,4 +126,4 @@ val_395
 val_396
 val_397
 val_399
-val_4
\ No newline at end of file
+val_4
diff --git a/sql/hive/src/test/resources/golden/ppd_gby2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_gby2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_gby2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby2-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_gby2-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby2-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_gby2-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby2-3-321628d4c52f6992f2680a3a162f19f b/sql/hive/src/test/resources/golden/ppd_gby2-3-321628d4c52f6992f2680a3a162f19f
index 7e66866e2dd60..ab02a73437ed7 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby2-3-321628d4c52f6992f2680a3a162f19f
+++ b/sql/hive/src/test/resources/golden/ppd_gby2-3-321628d4c52f6992f2680a3a162f19f
@@ -2,4 +2,4 @@ val_4	1
 val_399	2
 val_396	3
 val_277	4
-val_348	5
\ No newline at end of file
+val_348	5
diff --git a/sql/hive/src/test/resources/golden/ppd_gby2-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_gby2-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby2-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_gby2-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby2-6-321628d4c52f6992f2680a3a162f19f b/sql/hive/src/test/resources/golden/ppd_gby2-6-321628d4c52f6992f2680a3a162f19f
index 7e66866e2dd60..ab02a73437ed7 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby2-6-321628d4c52f6992f2680a3a162f19f
+++ b/sql/hive/src/test/resources/golden/ppd_gby2-6-321628d4c52f6992f2680a3a162f19f
@@ -2,4 +2,4 @@ val_4	1
 val_399	2
 val_396	3
 val_277	4
-val_348	5
\ No newline at end of file
+val_348	5
diff --git a/sql/hive/src/test/resources/golden/ppd_gby_join-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_gby_join-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby_join-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_gby_join-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby_join-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_gby_join-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby_join-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_gby_join-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby_join-3-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_gby_join-3-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby_join-3-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_gby_join-3-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_join-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_join-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_join-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_join-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join-3-aab5d092ca17ed23ee71d3a6ef653998 b/sql/hive/src/test/resources/golden/ppd_join-3-aab5d092ca17ed23ee71d3a6ef653998
index fbce9efa766d7..a29747aef8046 100644
--- a/sql/hive/src/test/resources/golden/ppd_join-3-aab5d092ca17ed23ee71d3a6ef653998
+++ b/sql/hive/src/test/resources/golden/ppd_join-3-aab5d092ca17ed23ee71d3a6ef653998
@@ -709,4 +709,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_join-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_join-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_join-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join-6-aab5d092ca17ed23ee71d3a6ef653998 b/sql/hive/src/test/resources/golden/ppd_join-6-aab5d092ca17ed23ee71d3a6ef653998
index fbce9efa766d7..a29747aef8046 100644
--- a/sql/hive/src/test/resources/golden/ppd_join-6-aab5d092ca17ed23ee71d3a6ef653998
+++ b/sql/hive/src/test/resources/golden/ppd_join-6-aab5d092ca17ed23ee71d3a6ef653998
@@ -709,4 +709,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_join2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_join2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_join2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join2-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join2-3-d03c0ad3ab713691cf9d3b27ae1223f9 b/sql/hive/src/test/resources/golden/ppd_join2-3-d03c0ad3ab713691cf9d3b27ae1223f9
index 9e6c4359e78a8..f2748faa4ea7e 100644
--- a/sql/hive/src/test/resources/golden/ppd_join2-3-d03c0ad3ab713691cf9d3b27ae1223f9
+++ b/sql/hive/src/test/resources/golden/ppd_join2-3-d03c0ad3ab713691cf9d3b27ae1223f9
@@ -1707,4 +1707,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_join2-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_join2-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join2-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_join2-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join2-6-d03c0ad3ab713691cf9d3b27ae1223f9 b/sql/hive/src/test/resources/golden/ppd_join2-6-d03c0ad3ab713691cf9d3b27ae1223f9
index 9e6c4359e78a8..f2748faa4ea7e 100644
--- a/sql/hive/src/test/resources/golden/ppd_join2-6-d03c0ad3ab713691cf9d3b27ae1223f9
+++ b/sql/hive/src/test/resources/golden/ppd_join2-6-d03c0ad3ab713691cf9d3b27ae1223f9
@@ -1707,4 +1707,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_join3-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_join3-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join3-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_join3-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join3-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join3-3-42cd793c031af1f7961d7b5e237de76b b/sql/hive/src/test/resources/golden/ppd_join3-3-42cd793c031af1f7961d7b5e237de76b
index 0e11aea14d61d..91f2ce2b61787 100644
--- a/sql/hive/src/test/resources/golden/ppd_join3-3-42cd793c031af1f7961d7b5e237de76b
+++ b/sql/hive/src/test/resources/golden/ppd_join3-3-42cd793c031af1f7961d7b5e237de76b
@@ -1764,4 +1764,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_join3-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_join3-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join3-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_join3-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join3-6-42cd793c031af1f7961d7b5e237de76b b/sql/hive/src/test/resources/golden/ppd_join3-6-42cd793c031af1f7961d7b5e237de76b
index 0e11aea14d61d..91f2ce2b61787 100644
--- a/sql/hive/src/test/resources/golden/ppd_join3-6-42cd793c031af1f7961d7b5e237de76b
+++ b/sql/hive/src/test/resources/golden/ppd_join3-6-42cd793c031af1f7961d7b5e237de76b
@@ -1764,4 +1764,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join1-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_outer_join1-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join1-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join1-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join1-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_outer_join1-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join1-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join1-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join1-3-9dfd333c14f82fd71c213e1d39c83524 b/sql/hive/src/test/resources/golden/ppd_outer_join1-3-9dfd333c14f82fd71c213e1d39c83524
index 997f37b76bedb..c0ffb7aeca9b6 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join1-3-9dfd333c14f82fd71c213e1d39c83524
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join1-3-9dfd333c14f82fd71c213e1d39c83524
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join1-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_outer_join1-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join1-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join1-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join1-6-9dfd333c14f82fd71c213e1d39c83524 b/sql/hive/src/test/resources/golden/ppd_outer_join1-6-9dfd333c14f82fd71c213e1d39c83524
index 997f37b76bedb..c0ffb7aeca9b6 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join1-6-9dfd333c14f82fd71c213e1d39c83524
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join1-6-9dfd333c14f82fd71c213e1d39c83524
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_outer_join2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join2-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_outer_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join2-3-5340cd2d88dcf41dc18119389a475e36 b/sql/hive/src/test/resources/golden/ppd_outer_join2-3-5340cd2d88dcf41dc18119389a475e36
index 997f37b76bedb..c0ffb7aeca9b6 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join2-3-5340cd2d88dcf41dc18119389a475e36
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join2-3-5340cd2d88dcf41dc18119389a475e36
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join2-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_outer_join2-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join2-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join2-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join2-6-5340cd2d88dcf41dc18119389a475e36 b/sql/hive/src/test/resources/golden/ppd_outer_join2-6-5340cd2d88dcf41dc18119389a475e36
index 997f37b76bedb..c0ffb7aeca9b6 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join2-6-5340cd2d88dcf41dc18119389a475e36
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join2-6-5340cd2d88dcf41dc18119389a475e36
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join3-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_outer_join3-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join3-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join3-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join3-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_outer_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join3-3-1e3af37cc2d9c2059488b5940a33a1d0 b/sql/hive/src/test/resources/golden/ppd_outer_join3-3-1e3af37cc2d9c2059488b5940a33a1d0
index b3bf95dd32bac..9365b77dec065 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join3-3-1e3af37cc2d9c2059488b5940a33a1d0
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join3-3-1e3af37cc2d9c2059488b5940a33a1d0
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-19	val_19	19	val_19
\ No newline at end of file
+19	val_19	19	val_19
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join3-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_outer_join3-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join3-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join3-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join3-6-1e3af37cc2d9c2059488b5940a33a1d0 b/sql/hive/src/test/resources/golden/ppd_outer_join3-6-1e3af37cc2d9c2059488b5940a33a1d0
index b3bf95dd32bac..9365b77dec065 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join3-6-1e3af37cc2d9c2059488b5940a33a1d0
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join3-6-1e3af37cc2d9c2059488b5940a33a1d0
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-19	val_19	19	val_19
\ No newline at end of file
+19	val_19	19	val_19
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join4-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_outer_join4-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join4-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join4-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join4-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_outer_join4-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join4-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join4-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join4-3-ac57dc2a7151f20029d6a97049d6eebe b/sql/hive/src/test/resources/golden/ppd_outer_join4-3-ac57dc2a7151f20029d6a97049d6eebe
index d4a363c49aeaf..daa4ea309e2c1 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join4-3-ac57dc2a7151f20029d6a97049d6eebe
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join4-3-ac57dc2a7151f20029d6a97049d6eebe
@@ -7,4 +7,4 @@
 18	val_18	18	val_18	18
 18	val_18	18	val_18	18
 18	val_18	18	val_18	18
-18	val_18	18	val_18	18
\ No newline at end of file
+18	val_18	18	val_18	18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join4-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_outer_join4-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join4-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join4-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join4-6-ac57dc2a7151f20029d6a97049d6eebe b/sql/hive/src/test/resources/golden/ppd_outer_join4-6-ac57dc2a7151f20029d6a97049d6eebe
index d4a363c49aeaf..daa4ea309e2c1 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join4-6-ac57dc2a7151f20029d6a97049d6eebe
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join4-6-ac57dc2a7151f20029d6a97049d6eebe
@@ -7,4 +7,4 @@
 18	val_18	18	val_18	18
 18	val_18	18	val_18	18
 18	val_18	18	val_18	18
-18	val_18	18	val_18	18
\ No newline at end of file
+18	val_18	18	val_18	18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join5-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_outer_join5-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join5-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join5-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join5-1-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_outer_join5-1-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join5-1-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join5-1-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_random-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_random-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_random-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_random-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_random-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_random-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_random-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_random-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_random-3-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_random-3-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_random-3-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_random-3-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-2-75f6b66b7ad7ca4ca8f1357d0de41bd9 b/sql/hive/src/test/resources/golden/ppd_transform-2-75f6b66b7ad7ca4ca8f1357d0de41bd9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-3-5d0e4236af589d3e63a8dd84e663b745 b/sql/hive/src/test/resources/golden/ppd_transform-3-5d0e4236af589d3e63a8dd84e663b745
deleted file mode 100644
index 0190981db84ed..0000000000000
--- a/sql/hive/src/test/resources/golden/ppd_transform-3-5d0e4236af589d3e63a8dd84e663b745
+++ /dev/null
@@ -1,84 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-10	val_10
-11	val_11
-12	val_12
-12	val_12
-15	val_15
-15	val_15
-17	val_17
-18	val_18
-18	val_18
-19	val_19
-2	val_2
-20	val_20
-24	val_24
-24	val_24
-26	val_26
-26	val_26
-27	val_27
-28	val_28
-30	val_30
-33	val_33
-34	val_34
-35	val_35
-35	val_35
-35	val_35
-37	val_37
-37	val_37
-4	val_4
-41	val_41
-42	val_42
-42	val_42
-43	val_43
-44	val_44
-47	val_47
-5	val_5
-5	val_5
-5	val_5
-51	val_51
-51	val_51
-53	val_53
-54	val_54
-57	val_57
-58	val_58
-58	val_58
-64	val_64
-65	val_65
-66	val_66
-67	val_67
-67	val_67
-69	val_69
-70	val_70
-70	val_70
-70	val_70
-72	val_72
-72	val_72
-74	val_74
-76	val_76
-76	val_76
-77	val_77
-78	val_78
-8	val_8
-80	val_80
-82	val_82
-83	val_83
-83	val_83
-84	val_84
-84	val_84
-85	val_85
-86	val_86
-87	val_87
-9	val_9
-90	val_90
-90	val_90
-90	val_90
-92	val_92
-95	val_95
-95	val_95
-96	val_96
-97	val_97
-97	val_97
-98	val_98
-98	val_98
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-5-75f6b66b7ad7ca4ca8f1357d0de41bd9 b/sql/hive/src/test/resources/golden/ppd_transform-5-75f6b66b7ad7ca4ca8f1357d0de41bd9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-6-5d0e4236af589d3e63a8dd84e663b745 b/sql/hive/src/test/resources/golden/ppd_transform-6-5d0e4236af589d3e63a8dd84e663b745
deleted file mode 100644
index 0190981db84ed..0000000000000
--- a/sql/hive/src/test/resources/golden/ppd_transform-6-5d0e4236af589d3e63a8dd84e663b745
+++ /dev/null
@@ -1,84 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-10	val_10
-11	val_11
-12	val_12
-12	val_12
-15	val_15
-15	val_15
-17	val_17
-18	val_18
-18	val_18
-19	val_19
-2	val_2
-20	val_20
-24	val_24
-24	val_24
-26	val_26
-26	val_26
-27	val_27
-28	val_28
-30	val_30
-33	val_33
-34	val_34
-35	val_35
-35	val_35
-35	val_35
-37	val_37
-37	val_37
-4	val_4
-41	val_41
-42	val_42
-42	val_42
-43	val_43
-44	val_44
-47	val_47
-5	val_5
-5	val_5
-5	val_5
-51	val_51
-51	val_51
-53	val_53
-54	val_54
-57	val_57
-58	val_58
-58	val_58
-64	val_64
-65	val_65
-66	val_66
-67	val_67
-67	val_67
-69	val_69
-70	val_70
-70	val_70
-70	val_70
-72	val_72
-72	val_72
-74	val_74
-76	val_76
-76	val_76
-77	val_77
-78	val_78
-8	val_8
-80	val_80
-82	val_82
-83	val_83
-83	val_83
-84	val_84
-84	val_84
-85	val_85
-86	val_86
-87	val_87
-9	val_9
-90	val_90
-90	val_90
-90	val_90
-92	val_92
-95	val_95
-95	val_95
-96	val_96
-97	val_97
-97	val_97
-98	val_98
-98	val_98
diff --git a/sql/hive/src/test/resources/golden/ppd_udf_col-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_udf_col-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_udf_col-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_udf_col-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_udf_col-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_udf_col-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_udf_col-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_udf_col-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_udf_col-6-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_udf_col-6-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_udf_col-6-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_udf_col-6-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_union-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_union-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_union-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_union-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_union-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_union-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_union-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_union-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_union-3-678c8197f458b459171c266f7431683e b/sql/hive/src/test/resources/golden/ppd_union-3-678c8197f458b459171c266f7431683e
index 4c3919232c73e..c30d4a581ba6c 100644
--- a/sql/hive/src/test/resources/golden/ppd_union-3-678c8197f458b459171c266f7431683e
+++ b/sql/hive/src/test/resources/golden/ppd_union-3-678c8197f458b459171c266f7431683e
@@ -168,4 +168,4 @@
 90	val_90
 403	val_403
 400	val_400
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_union-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_union-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_union-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_union-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_union-6-678c8197f458b459171c266f7431683e b/sql/hive/src/test/resources/golden/ppd_union-6-678c8197f458b459171c266f7431683e
index 4c3919232c73e..c30d4a581ba6c 100644
--- a/sql/hive/src/test/resources/golden/ppd_union-6-678c8197f458b459171c266f7431683e
+++ b/sql/hive/src/test/resources/golden/ppd_union-6-678c8197f458b459171c266f7431683e
@@ -168,4 +168,4 @@
 90	val_90
 403	val_403
 400	val_400
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-0-d680c8ac7e5121c6971458a9022c00b9 b/sql/hive/src/test/resources/golden/ppd_union_view-0-d680c8ac7e5121c6971458a9022c00b9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-1-bfb5ca0dd2d5f070ce359790f8b91480 b/sql/hive/src/test/resources/golden/ppd_union_view-1-bfb5ca0dd2d5f070ce359790f8b91480
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-10-a74a5ff32204d842175b3d880477178f b/sql/hive/src/test/resources/golden/ppd_union_view-10-a74a5ff32204d842175b3d880477178f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-11-745e750f4f4a36af27e87338a979240c b/sql/hive/src/test/resources/golden/ppd_union_view-11-745e750f4f4a36af27e87338a979240c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-2-ac54e78582e6bd5d5533a0f3bfb51369 b/sql/hive/src/test/resources/golden/ppd_union_view-2-ac54e78582e6bd5d5533a0f3bfb51369
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-3-f6fd9a5271e172e4a65ea57aaa41d512 b/sql/hive/src/test/resources/golden/ppd_union_view-3-f6fd9a5271e172e4a65ea57aaa41d512
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-4-19cd3ea4e274befd809e4aad21da8d50 b/sql/hive/src/test/resources/golden/ppd_union_view-4-19cd3ea4e274befd809e4aad21da8d50
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-5-443c0979b586f6a6dfb0dc5d28cd5124 b/sql/hive/src/test/resources/golden/ppd_union_view-5-443c0979b586f6a6dfb0dc5d28cd5124
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-6-b57b2b4f4cd8012fbfcd0b69c8d95e13 b/sql/hive/src/test/resources/golden/ppd_union_view-6-b57b2b4f4cd8012fbfcd0b69c8d95e13
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-7-aab9d79f65d6edfc1cae88a14b8e106c b/sql/hive/src/test/resources/golden/ppd_union_view-7-aab9d79f65d6edfc1cae88a14b8e106c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-8-e3ee46daeac605b05c7ada97b3c43578 b/sql/hive/src/test/resources/golden/ppd_union_view-8-e3ee46daeac605b05c7ada97b3c43578
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-9-8a77c30b6b07717d1c0bee971fb0132c b/sql/hive/src/test/resources/golden/ppd_union_view-9-8a77c30b6b07717d1c0bee971fb0132c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_vc-0-cf479fbfecc042e8c9ea63e761da62a6 b/sql/hive/src/test/resources/golden/ppd_vc-0-cf479fbfecc042e8c9ea63e761da62a6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_vc-1-e442e970ac492f95e5d8e55e21c0d229 b/sql/hive/src/test/resources/golden/ppd_vc-1-e442e970ac492f95e5d8e55e21c0d229
deleted file mode 100644
index d183a30ddf3b6..0000000000000
--- a/sql/hive/src/test/resources/golden/ppd_vc-1-e442e970ac492f95e5d8e55e21c0d229
+++ /dev/null
@@ -1,36 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-238	val_238	2008-04-09	11
-86	val_86	2008-04-09	11
-311	val_311	2008-04-09	11
-27	val_27	2008-04-09	11
-165	val_165	2008-04-09	11
-409	val_409	2008-04-09	11
-255	val_255	2008-04-09	11
-278	val_278	2008-04-09	11
-98	val_98	2008-04-09	11
-238	val_238	2008-04-09	12
-86	val_86	2008-04-09	12
-311	val_311	2008-04-09	12
-27	val_27	2008-04-09	12
-165	val_165	2008-04-09	12
-409	val_409	2008-04-09	12
-255	val_255	2008-04-09	12
-278	val_278	2008-04-09	12
-98	val_98	2008-04-09	12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppd_vc-2-c9e44ddbb494ff7f02027205610bcb65 b/sql/hive/src/test/resources/golden/ppd_vc-2-c9e44ddbb494ff7f02027205610bcb65
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_vc-3-be78760949abf728580442e9e37a3ce7 b/sql/hive/src/test/resources/golden/ppd_vc-3-be78760949abf728580442e9e37a3ce7
deleted file mode 100644
index 5b2461e35b5f4..0000000000000
--- a/sql/hive/src/test/resources/golden/ppd_vc-3-be78760949abf728580442e9e37a3ce7
+++ /dev/null
@@ -1,36 +0,0 @@
-238	val_238	2008-04-08	11	0
-238	val_238	2008-04-08	11	0
-86	val_86	2008-04-08	11	12
-311	val_311	2008-04-08	11	22
-311	val_311	2008-04-08	11	22
-311	val_311	2008-04-08	11	22
-27	val_27	2008-04-08	11	34
-165	val_165	2008-04-08	11	44
-165	val_165	2008-04-08	11	44
-238	val_238	2008-04-08	12	0
-238	val_238	2008-04-08	12	0
-86	val_86	2008-04-08	12	12
-311	val_311	2008-04-08	12	22
-311	val_311	2008-04-08	12	22
-311	val_311	2008-04-08	12	22
-27	val_27	2008-04-08	12	34
-165	val_165	2008-04-08	12	44
-165	val_165	2008-04-08	12	44
-238	val_238	2008-04-09	11	0
-238	val_238	2008-04-09	11	0
-86	val_86	2008-04-09	11	12
-311	val_311	2008-04-09	11	22
-311	val_311	2008-04-09	11	22
-311	val_311	2008-04-09	11	22
-27	val_27	2008-04-09	11	34
-165	val_165	2008-04-09	11	44
-165	val_165	2008-04-09	11	44
-238	val_238	2008-04-09	12	0
-238	val_238	2008-04-09	12	0
-86	val_86	2008-04-09	12	12
-311	val_311	2008-04-09	12	22
-311	val_311	2008-04-09	12	22
-311	val_311	2008-04-09	12	22
-27	val_27	2008-04-09	12	34
-165	val_165	2008-04-09	12	44
-165	val_165	2008-04-09	12	44
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/ppr_pushdown-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-0-67e864faaff4c6b2a8e1c9fbd188bb66
rename to sql/hive/src/test/resources/golden/ppr_pushdown-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-0-855b235f9c760ba9d6d0200bfd1ad08b b/sql/hive/src/test/resources/golden/ppr_pushdown-0-855b235f9c760ba9d6d0200bfd1ad08b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-5-7ca5ebad57578206b8830da6746fb952 b/sql/hive/src/test/resources/golden/ppr_pushdown-1-855b235f9c760ba9d6d0200bfd1ad08b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-5-7ca5ebad57578206b8830da6746fb952
rename to sql/hive/src/test/resources/golden/ppr_pushdown-1-855b235f9c760ba9d6d0200bfd1ad08b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-1-f689aaf15b08c433d1e93ce977a9b6d5 b/sql/hive/src/test/resources/golden/ppr_pushdown-1-f689aaf15b08c433d1e93ce977a9b6d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-10-178be64f122542983ff4384df4bb1530 b/sql/hive/src/test/resources/golden/ppr_pushdown-10-178be64f122542983ff4384df4bb1530
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-6-1c033f86ad59eb6ab59008d12cd00e7 b/sql/hive/src/test/resources/golden/ppr_pushdown-10-2957fd9b211cee5f0372525a1de55c19
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-6-1c033f86ad59eb6ab59008d12cd00e7
rename to sql/hive/src/test/resources/golden/ppr_pushdown-10-2957fd9b211cee5f0372525a1de55c19
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-11-3828393aa33a55cf1aea707e1be0a452 b/sql/hive/src/test/resources/golden/ppr_pushdown-11-3828393aa33a55cf1aea707e1be0a452
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-8-4bf6c5e938afa4f937b69d2a095c675c b/sql/hive/src/test/resources/golden/ppr_pushdown-11-b8d6f0ffc8294497c792b26958adee45
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-8-4bf6c5e938afa4f937b69d2a095c675c
rename to sql/hive/src/test/resources/golden/ppr_pushdown-11-b8d6f0ffc8294497c792b26958adee45
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-12-5affd35b94b0de3fb2a43f8729526055 b/sql/hive/src/test/resources/golden/ppr_pushdown-12-5affd35b94b0de3fb2a43f8729526055
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-5-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/ppr_pushdown-12-ab1b9a0cdd9586c96d8856a9d632b89c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_2-5-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/ppr_pushdown-12-ab1b9a0cdd9586c96d8856a9d632b89c
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-5-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/ppr_pushdown-13-2c316c67fd494d878fbbea107d283c3b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_3-5-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/ppr_pushdown-13-2c316c67fd494d878fbbea107d283c3b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-13-2c64f5abe8c23957d7f0602e9c257cd1 b/sql/hive/src/test/resources/golden/ppr_pushdown-13-2c64f5abe8c23957d7f0602e9c257cd1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-5-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/ppr_pushdown-14-53b4be82a1538844d03b200429efa02b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_4-5-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/ppr_pushdown-14-53b4be82a1538844d03b200429efa02b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-14-c3188230751166d9d90082cd357ecb0c b/sql/hive/src/test/resources/golden/ppr_pushdown-14-c3188230751166d9d90082cd357ecb0c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-13-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/ppr_pushdown-15-71ab93d38ed2908069091c7c8cc0aba1
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_5-13-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/ppr_pushdown-15-71ab93d38ed2908069091c7c8cc0aba1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-15-e52a39743f82af47902341a7ecd38afb b/sql/hive/src/test/resources/golden/ppr_pushdown-15-e52a39743f82af47902341a7ecd38afb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-16-70bef3ba441873327e188ff2ec6d68ae b/sql/hive/src/test/resources/golden/ppr_pushdown-16-70bef3ba441873327e188ff2ec6d68ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-20-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/ppr_pushdown-16-855cb54d28034fdb20a3615ee0918d63
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_5-20-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/ppr_pushdown-16-855cb54d28034fdb20a3615ee0918d63
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-5-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/ppr_pushdown-17-d3ccf2722a8b7281fcee61b2544772c8
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_5-5-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/ppr_pushdown-17-d3ccf2722a8b7281fcee61b2544772c8
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-17-fcd10fbfc55a2c0aa843fe618f9613c6 b/sql/hive/src/test/resources/golden/ppr_pushdown-18-fcd10fbfc55a2c0aa843fe618f9613c6
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-17-fcd10fbfc55a2c0aa843fe618f9613c6
rename to sql/hive/src/test/resources/golden/ppr_pushdown-18-fcd10fbfc55a2c0aa843fe618f9613c6
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-18-ff36e3978571ac05f11e8322c024e4b6 b/sql/hive/src/test/resources/golden/ppr_pushdown-19-ff36e3978571ac05f11e8322c024e4b6
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-18-ff36e3978571ac05f11e8322c024e4b6
rename to sql/hive/src/test/resources/golden/ppr_pushdown-19-ff36e3978571ac05f11e8322c024e4b6
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-2-5eede4874e678021938683fc2f5dc900 b/sql/hive/src/test/resources/golden/ppr_pushdown-2-5eede4874e678021938683fc2f5dc900
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-9-7b84dbb0895a623d460c70678bd74a64 b/sql/hive/src/test/resources/golden/ppr_pushdown-2-f689aaf15b08c433d1e93ce977a9b6d5
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_6-9-7b84dbb0895a623d460c70678bd74a64
rename to sql/hive/src/test/resources/golden/ppr_pushdown-2-f689aaf15b08c433d1e93ce977a9b6d5
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-19-481005cf034ef3d7b998da32eb82aa9a b/sql/hive/src/test/resources/golden/ppr_pushdown-20-481005cf034ef3d7b998da32eb82aa9a
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-19-481005cf034ef3d7b998da32eb82aa9a
rename to sql/hive/src/test/resources/golden/ppr_pushdown-20-481005cf034ef3d7b998da32eb82aa9a
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-20-9073825e8b9804331f780980d1f9fa92 b/sql/hive/src/test/resources/golden/ppr_pushdown-21-9073825e8b9804331f780980d1f9fa92
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-20-9073825e8b9804331f780980d1f9fa92
rename to sql/hive/src/test/resources/golden/ppr_pushdown-21-9073825e8b9804331f780980d1f9fa92
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-21-9cb28f0d3a434c9d1ab039192906ec9d b/sql/hive/src/test/resources/golden/ppr_pushdown-22-9cb28f0d3a434c9d1ab039192906ec9d
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-21-9cb28f0d3a434c9d1ab039192906ec9d
rename to sql/hive/src/test/resources/golden/ppr_pushdown-22-9cb28f0d3a434c9d1ab039192906ec9d
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-22-678f50025924fee7b59f66e2abdb472d b/sql/hive/src/test/resources/golden/ppr_pushdown-23-678f50025924fee7b59f66e2abdb472d
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-22-678f50025924fee7b59f66e2abdb472d
rename to sql/hive/src/test/resources/golden/ppr_pushdown-23-678f50025924fee7b59f66e2abdb472d
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-23-4d5bfa800ba434f464b07bf9b5d39f20 b/sql/hive/src/test/resources/golden/ppr_pushdown-24-4d5bfa800ba434f464b07bf9b5d39f20
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-23-4d5bfa800ba434f464b07bf9b5d39f20
rename to sql/hive/src/test/resources/golden/ppr_pushdown-24-4d5bfa800ba434f464b07bf9b5d39f20
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-24-22663f09ea1c1bc303756067e84df5a7 b/sql/hive/src/test/resources/golden/ppr_pushdown-25-22663f09ea1c1bc303756067e84df5a7
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-24-22663f09ea1c1bc303756067e84df5a7
rename to sql/hive/src/test/resources/golden/ppr_pushdown-25-22663f09ea1c1bc303756067e84df5a7
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-25-e789d9b469aa1fffe4ce0a15a8c1fb9b b/sql/hive/src/test/resources/golden/ppr_pushdown-25-e789d9b469aa1fffe4ce0a15a8c1fb9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-5-8b5d511014e1776743cacaf77f68d5fb b/sql/hive/src/test/resources/golden/ppr_pushdown-26-e789d9b469aa1fffe4ce0a15a8c1fb9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_7-5-8b5d511014e1776743cacaf77f68d5fb
rename to sql/hive/src/test/resources/golden/ppr_pushdown-26-e789d9b469aa1fffe4ce0a15a8c1fb9b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-26-8065c18e387fd8bffae19a80af8dc1d4 b/sql/hive/src/test/resources/golden/ppr_pushdown-27-8065c18e387fd8bffae19a80af8dc1d4
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-26-8065c18e387fd8bffae19a80af8dc1d4
rename to sql/hive/src/test/resources/golden/ppr_pushdown-27-8065c18e387fd8bffae19a80af8dc1d4
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-27-b72de558c88ae91460989938000e0d27 b/sql/hive/src/test/resources/golden/ppr_pushdown-28-b72de558c88ae91460989938000e0d27
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-27-b72de558c88ae91460989938000e0d27
rename to sql/hive/src/test/resources/golden/ppr_pushdown-28-b72de558c88ae91460989938000e0d27
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-28-7217735d357770af4ffee730e4e9add4 b/sql/hive/src/test/resources/golden/ppr_pushdown-29-7217735d357770af4ffee730e4e9add4
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-28-7217735d357770af4ffee730e4e9add4
rename to sql/hive/src/test/resources/golden/ppr_pushdown-29-7217735d357770af4ffee730e4e9add4
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-5-8b5d511014e1776743cacaf77f68d5fb b/sql/hive/src/test/resources/golden/ppr_pushdown-3-5eede4874e678021938683fc2f5dc900
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_8-5-8b5d511014e1776743cacaf77f68d5fb
rename to sql/hive/src/test/resources/golden/ppr_pushdown-3-5eede4874e678021938683fc2f5dc900
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-3-c5c542f8ee81cd0afd44e67fc7b4d306 b/sql/hive/src/test/resources/golden/ppr_pushdown-3-c5c542f8ee81cd0afd44e67fc7b4d306
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-29-630e2f7918b7727fc4ca057fa21e2eea b/sql/hive/src/test/resources/golden/ppr_pushdown-30-630e2f7918b7727fc4ca057fa21e2eea
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-29-630e2f7918b7727fc4ca057fa21e2eea
rename to sql/hive/src/test/resources/golden/ppr_pushdown-30-630e2f7918b7727fc4ca057fa21e2eea
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-30-2e062414293b643ea4d7f6df92f939e4 b/sql/hive/src/test/resources/golden/ppr_pushdown-31-2e062414293b643ea4d7f6df92f939e4
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-30-2e062414293b643ea4d7f6df92f939e4
rename to sql/hive/src/test/resources/golden/ppr_pushdown-31-2e062414293b643ea4d7f6df92f939e4
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-31-5eba4bf10315099129eae319d73636cf b/sql/hive/src/test/resources/golden/ppr_pushdown-32-5eba4bf10315099129eae319d73636cf
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-31-5eba4bf10315099129eae319d73636cf
rename to sql/hive/src/test/resources/golden/ppr_pushdown-32-5eba4bf10315099129eae319d73636cf
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-32-35af0585a4f98bc222c786688cb6de6b b/sql/hive/src/test/resources/golden/ppr_pushdown-33-35af0585a4f98bc222c786688cb6de6b
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-32-35af0585a4f98bc222c786688cb6de6b
rename to sql/hive/src/test/resources/golden/ppr_pushdown-33-35af0585a4f98bc222c786688cb6de6b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-33-a5594625510703427ab8bae5d0563f73 b/sql/hive/src/test/resources/golden/ppr_pushdown-34-a5594625510703427ab8bae5d0563f73
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-33-a5594625510703427ab8bae5d0563f73
rename to sql/hive/src/test/resources/golden/ppr_pushdown-34-a5594625510703427ab8bae5d0563f73
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-5-8b5d511014e1776743cacaf77f68d5fb b/sql/hive/src/test/resources/golden/ppr_pushdown-4-c5c542f8ee81cd0afd44e67fc7b4d306
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_9-5-8b5d511014e1776743cacaf77f68d5fb
rename to sql/hive/src/test/resources/golden/ppr_pushdown-4-c5c542f8ee81cd0afd44e67fc7b4d306
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-4-f54bebec398f0fdfdbc0393123dba234 b/sql/hive/src/test/resources/golden/ppr_pushdown-4-f54bebec398f0fdfdbc0393123dba234
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-5-78af12432bcbf99d4a0d41c25f964de b/sql/hive/src/test/resources/golden/ppr_pushdown-5-78af12432bcbf99d4a0d41c25f964de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-10-ebc7ac3b2dfdb958d161cd7c8f947a72 b/sql/hive/src/test/resources/golden/ppr_pushdown-5-f54bebec398f0fdfdbc0393123dba234
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-10-ebc7ac3b2dfdb958d161cd7c8f947a72
rename to sql/hive/src/test/resources/golden/ppr_pushdown-5-f54bebec398f0fdfdbc0393123dba234
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-6-58724fbe96a0b3deceef20c8cc5e318d b/sql/hive/src/test/resources/golden/ppr_pushdown-6-58724fbe96a0b3deceef20c8cc5e318d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-12-c166396bbdb62490f56ab0bc84aaa7d9 b/sql/hive/src/test/resources/golden/ppr_pushdown-6-78af12432bcbf99d4a0d41c25f964de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-12-c166396bbdb62490f56ab0bc84aaa7d9
rename to sql/hive/src/test/resources/golden/ppr_pushdown-6-78af12432bcbf99d4a0d41c25f964de
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-7-14570f946e75924d7926c809485951d1 b/sql/hive/src/test/resources/golden/ppr_pushdown-7-14570f946e75924d7926c809485951d1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-13-c8a51e8a269da4c4ae98ac105a573f3c b/sql/hive/src/test/resources/golden/ppr_pushdown-7-58724fbe96a0b3deceef20c8cc5e318d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-13-c8a51e8a269da4c4ae98ac105a573f3c
rename to sql/hive/src/test/resources/golden/ppr_pushdown-7-58724fbe96a0b3deceef20c8cc5e318d
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-14-46c4a3675c8de0510b648856a193f3e7 b/sql/hive/src/test/resources/golden/ppr_pushdown-8-14570f946e75924d7926c809485951d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-14-46c4a3675c8de0510b648856a193f3e7
rename to sql/hive/src/test/resources/golden/ppr_pushdown-8-14570f946e75924d7926c809485951d1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-8-3ba325662296fc455f07f1c835495e4c b/sql/hive/src/test/resources/golden/ppr_pushdown-8-3ba325662296fc455f07f1c835495e4c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-16-54f3a2a97939d3eca8a601b74ef30dea b/sql/hive/src/test/resources/golden/ppr_pushdown-9-3ba325662296fc455f07f1c835495e4c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-16-54f3a2a97939d3eca8a601b74ef30dea
rename to sql/hive/src/test/resources/golden/ppr_pushdown-9-3ba325662296fc455f07f1c835495e4c
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-9-4375f17bed264d5554a67d119fa5dd1 b/sql/hive/src/test/resources/golden/ppr_pushdown-9-4375f17bed264d5554a67d119fa5dd1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/ppr_pushdown2-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-0-855b235f9c760ba9d6d0200bfd1ad08b b/sql/hive/src/test/resources/golden/ppr_pushdown2-0-855b235f9c760ba9d6d0200bfd1ad08b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-1-1f31dc385f79c5a7ae6a0d55b49bd583 b/sql/hive/src/test/resources/golden/ppr_pushdown2-1-1f31dc385f79c5a7ae6a0d55b49bd583
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-17-236d51792f4913b5858f367e3cff2c60 b/sql/hive/src/test/resources/golden/ppr_pushdown2-1-855b235f9c760ba9d6d0200bfd1ad08b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-17-236d51792f4913b5858f367e3cff2c60
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-1-855b235f9c760ba9d6d0200bfd1ad08b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-10-4fa4ba4c739b6f44975d41e4705d4389 b/sql/hive/src/test/resources/golden/ppr_pushdown2-10-4fa4ba4c739b6f44975d41e4705d4389
new file mode 100644
index 0000000000000..f50a5fea8dd5d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-10-4fa4ba4c739b6f44975d41e4705d4389
@@ -0,0 +1 @@
+1	1	2
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-10-ab3e46183487096745d7d90e3020e94c b/sql/hive/src/test/resources/golden/ppr_pushdown2-10-ab3e46183487096745d7d90e3020e94c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-11-680316eba447eb4649530fdc1c37d95b b/sql/hive/src/test/resources/golden/ppr_pushdown2-11-680316eba447eb4649530fdc1c37d95b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-19-1e77dcdd6f54334dbae6a4d11ad6ff64 b/sql/hive/src/test/resources/golden/ppr_pushdown2-11-ab3e46183487096745d7d90e3020e94c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-19-1e77dcdd6f54334dbae6a4d11ad6ff64
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-11-ab3e46183487096745d7d90e3020e94c
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-12-720582e599a974ee9ca46b653678a14a b/sql/hive/src/test/resources/golden/ppr_pushdown2-12-720582e599a974ee9ca46b653678a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-20-90c84358c50e51be5ce210bd7dec9bc6 b/sql/hive/src/test/resources/golden/ppr_pushdown2-12-b10b9e14f5a7a7a92c0c68df6dbc656a
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-20-90c84358c50e51be5ce210bd7dec9bc6
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-12-b10b9e14f5a7a7a92c0c68df6dbc656a
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-22-27e2e662d517f32952145cc2a51bf564 b/sql/hive/src/test/resources/golden/ppr_pushdown2-13-154f9859bd0822e287fbfdff12fd45ff
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-22-27e2e662d517f32952145cc2a51bf564
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-13-154f9859bd0822e287fbfdff12fd45ff
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-13-bd9067aeee8884a32db921b6d478f806 b/sql/hive/src/test/resources/golden/ppr_pushdown2-13-bd9067aeee8884a32db921b6d478f806
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-14-8d01597374157f2d3d066840983ba1f8 b/sql/hive/src/test/resources/golden/ppr_pushdown2-14-8d01597374157f2d3d066840983ba1f8
deleted file mode 100644
index e3e04ee48543d..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-14-8d01597374157f2d3d066840983ba1f8
+++ /dev/null
@@ -1 +0,0 @@
-3	1	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-23-6775cb6aee040e22508cf3cac0b55f06 b/sql/hive/src/test/resources/golden/ppr_pushdown2-14-cf4a30b5c8329d8d79ddf762f318fbb3
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-23-6775cb6aee040e22508cf3cac0b55f06
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-14-cf4a30b5c8329d8d79ddf762f318fbb3
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-15-5614065e1b8e709f68be4fa67666f41 b/sql/hive/src/test/resources/golden/ppr_pushdown2-15-5614065e1b8e709f68be4fa67666f41
deleted file mode 100644
index c458b0f57aba9..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-15-5614065e1b8e709f68be4fa67666f41
+++ /dev/null
@@ -1 +0,0 @@
-2	1	1	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-15-8d01597374157f2d3d066840983ba1f8 b/sql/hive/src/test/resources/golden/ppr_pushdown2-15-8d01597374157f2d3d066840983ba1f8
new file mode 100644
index 0000000000000..3def25c3c65af
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-15-8d01597374157f2d3d066840983ba1f8
@@ -0,0 +1 @@
+3	1	2	1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-16-3a67618e47c977f58c9dd8f4b9a576eb b/sql/hive/src/test/resources/golden/ppr_pushdown2-16-3a67618e47c977f58c9dd8f4b9a576eb
deleted file mode 100644
index c458b0f57aba9..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-16-3a67618e47c977f58c9dd8f4b9a576eb
+++ /dev/null
@@ -1 +0,0 @@
-2	1	1	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-16-5614065e1b8e709f68be4fa67666f41 b/sql/hive/src/test/resources/golden/ppr_pushdown2-16-5614065e1b8e709f68be4fa67666f41
new file mode 100644
index 0000000000000..55c794b56ec9a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-16-5614065e1b8e709f68be4fa67666f41
@@ -0,0 +1 @@
+2	1	1	2
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-17-3a67618e47c977f58c9dd8f4b9a576eb b/sql/hive/src/test/resources/golden/ppr_pushdown2-17-3a67618e47c977f58c9dd8f4b9a576eb
new file mode 100644
index 0000000000000..55c794b56ec9a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-17-3a67618e47c977f58c9dd8f4b9a576eb
@@ -0,0 +1 @@
+2	1	1	2
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-17-44e5f3ac566f60d8b17ef19c18a11ebe b/sql/hive/src/test/resources/golden/ppr_pushdown2-17-44e5f3ac566f60d8b17ef19c18a11ebe
deleted file mode 100644
index 63511415ddf55..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-17-44e5f3ac566f60d8b17ef19c18a11ebe
+++ /dev/null
@@ -1,2 +0,0 @@
-3	1	2	1
-1	1	2	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-18-44e5f3ac566f60d8b17ef19c18a11ebe b/sql/hive/src/test/resources/golden/ppr_pushdown2-18-44e5f3ac566f60d8b17ef19c18a11ebe
new file mode 100644
index 0000000000000..8d13286371dab
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-18-44e5f3ac566f60d8b17ef19c18a11ebe
@@ -0,0 +1,2 @@
+3	1	2	1
+1	1	2	3
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-18-e2c7e9b01ec95dfcc685827e24d66775 b/sql/hive/src/test/resources/golden/ppr_pushdown2-18-e2c7e9b01ec95dfcc685827e24d66775
deleted file mode 100644
index c592b5d505b0e..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-18-e2c7e9b01ec95dfcc685827e24d66775
+++ /dev/null
@@ -1,3 +0,0 @@
-2	1	1	2
-3	1	2	1
-1	1	2	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-19-bd7e1917f8d2cf50c062a22ef3fa15b5 b/sql/hive/src/test/resources/golden/ppr_pushdown2-19-bd7e1917f8d2cf50c062a22ef3fa15b5
deleted file mode 100644
index e3e04ee48543d..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-19-bd7e1917f8d2cf50c062a22ef3fa15b5
+++ /dev/null
@@ -1 +0,0 @@
-3	1	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-19-e2c7e9b01ec95dfcc685827e24d66775 b/sql/hive/src/test/resources/golden/ppr_pushdown2-19-e2c7e9b01ec95dfcc685827e24d66775
new file mode 100644
index 0000000000000..2bc7fedb12a50
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-19-e2c7e9b01ec95dfcc685827e24d66775
@@ -0,0 +1,3 @@
+2	1	1	2
+3	1	2	1
+1	1	2	3
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-2-efd3e4c333d4efb81218df8921e58f9e b/sql/hive/src/test/resources/golden/ppr_pushdown2-2-efd3e4c333d4efb81218df8921e58f9e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-24-eb2b625279d8794390d7e2dc8f2dc907 b/sql/hive/src/test/resources/golden/ppr_pushdown2-2-ffa167b63d612a4986d02f5c0623ea7b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-24-eb2b625279d8794390d7e2dc8f2dc907
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-2-ffa167b63d612a4986d02f5c0623ea7b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-20-bd7e1917f8d2cf50c062a22ef3fa15b5 b/sql/hive/src/test/resources/golden/ppr_pushdown2-20-bd7e1917f8d2cf50c062a22ef3fa15b5
new file mode 100644
index 0000000000000..3def25c3c65af
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-20-bd7e1917f8d2cf50c062a22ef3fa15b5
@@ -0,0 +1 @@
+3	1	2	1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-20-ece6fe0efc1e658b36ddc10f0653d229 b/sql/hive/src/test/resources/golden/ppr_pushdown2-20-ece6fe0efc1e658b36ddc10f0653d229
deleted file mode 100644
index e3e04ee48543d..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-20-ece6fe0efc1e658b36ddc10f0653d229
+++ /dev/null
@@ -1 +0,0 @@
-3	1	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-21-ece6fe0efc1e658b36ddc10f0653d229 b/sql/hive/src/test/resources/golden/ppr_pushdown2-21-ece6fe0efc1e658b36ddc10f0653d229
new file mode 100644
index 0000000000000..3def25c3c65af
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-21-ece6fe0efc1e658b36ddc10f0653d229
@@ -0,0 +1 @@
+3	1	2	1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-3-1886675984606b6c972c4a26dca6fd2c b/sql/hive/src/test/resources/golden/ppr_pushdown2-3-1886675984606b6c972c4a26dca6fd2c
deleted file mode 100644
index 679926f7d738c..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-3-1886675984606b6c972c4a26dca6fd2c
+++ /dev/null
@@ -1 +0,0 @@
-2	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-26-138e93f004f7bd16e63bbf8d9090af21 b/sql/hive/src/test/resources/golden/ppr_pushdown2-3-c7753746c190414723d66a8f876499c7
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-26-138e93f004f7bd16e63bbf8d9090af21
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-3-c7753746c190414723d66a8f876499c7
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-4-1886675984606b6c972c4a26dca6fd2c b/sql/hive/src/test/resources/golden/ppr_pushdown2-4-1886675984606b6c972c4a26dca6fd2c
new file mode 100644
index 0000000000000..bfde072a7963c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-4-1886675984606b6c972c4a26dca6fd2c
@@ -0,0 +1 @@
+2	2
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-4-33b137b28e7246ec3c2acb937c638910 b/sql/hive/src/test/resources/golden/ppr_pushdown2-4-33b137b28e7246ec3c2acb937c638910
deleted file mode 100644
index fcc7be2cb12aa..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-4-33b137b28e7246ec3c2acb937c638910
+++ /dev/null
@@ -1 +0,0 @@
-22	22
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-5-33b137b28e7246ec3c2acb937c638910 b/sql/hive/src/test/resources/golden/ppr_pushdown2-5-33b137b28e7246ec3c2acb937c638910
new file mode 100644
index 0000000000000..38212d1943095
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-5-33b137b28e7246ec3c2acb937c638910
@@ -0,0 +1 @@
+22	22
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-5-b5a2518af801f95fe52a75dfc1d3e867 b/sql/hive/src/test/resources/golden/ppr_pushdown2-5-b5a2518af801f95fe52a75dfc1d3e867
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-6-96059d8042ccb2ce355541daf9893954 b/sql/hive/src/test/resources/golden/ppr_pushdown2-6-96059d8042ccb2ce355541daf9893954
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-27-93153fd67c1d19bb9ad64f98294e4981 b/sql/hive/src/test/resources/golden/ppr_pushdown2-6-b5a2518af801f95fe52a75dfc1d3e867
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-27-93153fd67c1d19bb9ad64f98294e4981
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-6-b5a2518af801f95fe52a75dfc1d3e867
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-7-93922fe8d88643e1b8aa7e3ef4195404 b/sql/hive/src/test/resources/golden/ppr_pushdown2-7-93922fe8d88643e1b8aa7e3ef4195404
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-28-ca369ab23c32070e2d42ba8df036175f b/sql/hive/src/test/resources/golden/ppr_pushdown2-7-e89a8d1f66fdf9ce68f345de1f728c5b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-28-ca369ab23c32070e2d42ba8df036175f
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-7-e89a8d1f66fdf9ce68f345de1f728c5b
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-30-4095695e88e23dd42acb1749a83bdbb7 b/sql/hive/src/test/resources/golden/ppr_pushdown2-8-4507a3f200b3ce384191c91acd324dc7
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-30-4095695e88e23dd42acb1749a83bdbb7
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-8-4507a3f200b3ce384191c91acd324dc7
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-8-c86940e70f705f50e9091c257ee2bb40 b/sql/hive/src/test/resources/golden/ppr_pushdown2-8-c86940e70f705f50e9091c257ee2bb40
deleted file mode 100644
index 3ebc6d7fa2375..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-8-c86940e70f705f50e9091c257ee2bb40
+++ /dev/null
@@ -1 +0,0 @@
-2	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-9-4fa4ba4c739b6f44975d41e4705d4389 b/sql/hive/src/test/resources/golden/ppr_pushdown2-9-4fa4ba4c739b6f44975d41e4705d4389
deleted file mode 100644
index a4544ab84afa3..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-9-4fa4ba4c739b6f44975d41e4705d4389
+++ /dev/null
@@ -1 +0,0 @@
-1	1	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-9-c86940e70f705f50e9091c257ee2bb40 b/sql/hive/src/test/resources/golden/ppr_pushdown2-9-c86940e70f705f50e9091c257ee2bb40
new file mode 100644
index 0000000000000..699fa0cd95c4f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-9-c86940e70f705f50e9091c257ee2bb40
@@ -0,0 +1 @@
+2	2	1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown3-0-df2401785dfa257de49c3ad80b0f480a b/sql/hive/src/test/resources/golden/ppr_pushdown3-0-df2401785dfa257de49c3ad80b0f480a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppr_pushdown3-0-df2401785dfa257de49c3ad80b0f480a
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown3-0-df2401785dfa257de49c3ad80b0f480a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown3-2-e879051803d0b64139e703e40fb007d0 b/sql/hive/src/test/resources/golden/ppr_pushdown3-2-e879051803d0b64139e703e40fb007d0
index e22f840876d2e..546fd0b0e1051 100644
--- a/sql/hive/src/test/resources/golden/ppr_pushdown3-2-e879051803d0b64139e703e40fb007d0
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown3-2-e879051803d0b64139e703e40fb007d0
@@ -37,4 +37,4 @@
 5	val_5	2008-04-09	12
 2	val_2	2008-04-09	12
 5	val_5	2008-04-09	12
-9	val_9	2008-04-09	12
\ No newline at end of file
+9	val_9	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown3-4-a1c18483e5f5d2fe351be09af658acbb b/sql/hive/src/test/resources/golden/ppr_pushdown3-4-a1c18483e5f5d2fe351be09af658acbb
index 355ed1617e200..2857cdf0aba86 100644
--- a/sql/hive/src/test/resources/golden/ppr_pushdown3-4-a1c18483e5f5d2fe351be09af658acbb
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown3-4-a1c18483e5f5d2fe351be09af658acbb
@@ -1997,4 +1997,4 @@
 403	val_403	2008-04-09	12
 400	val_400	2008-04-09	12
 200	val_200	2008-04-09	12
-97	val_97	2008-04-09	12
\ No newline at end of file
+97	val_97	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown3-6-67118904e6cc8e9f5210fab88e87fb8f b/sql/hive/src/test/resources/golden/ppr_pushdown3-6-67118904e6cc8e9f5210fab88e87fb8f
index 57005044dde38..7f3ca6e01ea06 100644
--- a/sql/hive/src/test/resources/golden/ppr_pushdown3-6-67118904e6cc8e9f5210fab88e87fb8f
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown3-6-67118904e6cc8e9f5210fab88e87fb8f
@@ -1997,4 +1997,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/print_header-1-8540676fc16ac91f3629c40f393a890a b/sql/hive/src/test/resources/golden/print_header-1-8540676fc16ac91f3629c40f393a890a
deleted file mode 100644
index 943e1be13b615..0000000000000
--- a/sql/hive/src/test/resources/golden/print_header-1-8540676fc16ac91f3629c40f393a890a
+++ /dev/null
@@ -1,10 +0,0 @@
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	2	val_2
diff --git a/sql/hive/src/test/resources/golden/print_header-2-5cff10d4b561206e7e0b2e81d862ff93 b/sql/hive/src/test/resources/golden/print_header-2-5cff10d4b561206e7e0b2e81d862ff93
deleted file mode 100644
index 951e74db0fe23..0000000000000
--- a/sql/hive/src/test/resources/golden/print_header-2-5cff10d4b561206e7e0b2e81d862ff93
+++ /dev/null
@@ -1,10 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
diff --git a/sql/hive/src/test/resources/golden/print_header-3-e86d559aeb84a4cc017a103182c22bfb b/sql/hive/src/test/resources/golden/print_header-3-e86d559aeb84a4cc017a103182c22bfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/progress_1-0-fe903cc5a573e7aced5ee17b35d2fb04 b/sql/hive/src/test/resources/golden/progress_1-0-fe903cc5a573e7aced5ee17b35d2fb04
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/progress_1-0-fe903cc5a573e7aced5ee17b35d2fb04
+++ b/sql/hive/src/test/resources/golden/progress_1-0-fe903cc5a573e7aced5ee17b35d2fb04
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/progress_1-2-70ba81c09588aa586e62ebaee2af685f b/sql/hive/src/test/resources/golden/progress_1-2-70ba81c09588aa586e62ebaee2af685f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-31-d9ba55c20c3f5df262e81cbf5dab5387 b/sql/hive/src/test/resources/golden/progress_1-2-b6c8c8fc9df98af4dead5efabf5f162c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-31-d9ba55c20c3f5df262e81cbf5dab5387
rename to sql/hive/src/test/resources/golden/progress_1-2-b6c8c8fc9df98af4dead5efabf5f162c
diff --git a/sql/hive/src/test/resources/golden/progress_1-3-43d286eebddaee26cf95f26e62a75fe4 b/sql/hive/src/test/resources/golden/progress_1-3-43d286eebddaee26cf95f26e62a75fe4
index 0b3e0a69a8c79..e9c02dad1826a 100644
--- a/sql/hive/src/test/resources/golden/progress_1-3-43d286eebddaee26cf95f26e62a75fe4
+++ b/sql/hive/src/test/resources/golden/progress_1-3-43d286eebddaee26cf95f26e62a75fe4
@@ -1 +1 @@
-5000
\ No newline at end of file
+5000
diff --git a/sql/hive/src/test/resources/golden/protectmode-19-b6e156f104768706aa587b762a9d4d18 b/sql/hive/src/test/resources/golden/protectmode-19-b6e156f104768706aa587b762a9d4d18
index eb63636d0bfd4..41b92dbf0214a 100644
--- a/sql/hive/src/test/resources/golden/protectmode-19-b6e156f104768706aa587b762a9d4d18
+++ b/sql/hive/src/test/resources/golden/protectmode-19-b6e156f104768706aa587b762a9d4d18
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl2, createTime:1388801768, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388801769, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1388801769})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl2, createTime:1413890256, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413890256, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1413890256, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/protectmode-21-a31442a7d6c6950c137856ab861d622d b/sql/hive/src/test/resources/golden/protectmode-21-a31442a7d6c6950c137856ab861d622d
index 964ede006ad21..27254dd97006d 100644
--- a/sql/hive/src/test/resources/golden/protectmode-21-a31442a7d6c6950c137856ab861d622d
+++ b/sql/hive/src/test/resources/golden/protectmode-21-a31442a7d6c6950c137856ab861d622d
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1388801768, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388801770, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1388801770}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1413890256, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413890256, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1413890256}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/protectmode-23-a31442a7d6c6950c137856ab861d622d b/sql/hive/src/test/resources/golden/protectmode-23-a31442a7d6c6950c137856ab861d622d
index 68b03670f5f80..4d09f8c357ea7 100644
--- a/sql/hive/src/test/resources/golden/protectmode-23-a31442a7d6c6950c137856ab861d622d
+++ b/sql/hive/src/test/resources/golden/protectmode-23-a31442a7d6c6950c137856ab861d622d
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1388801768, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388801770, PROTECT_MODE=OFFLINE,NO_DROP, transient_lastDdlTime=1388801770}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1413890256, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413890256, PROTECT_MODE=OFFLINE,NO_DROP, transient_lastDdlTime=1413890256}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/protectmode-26-a31442a7d6c6950c137856ab861d622d b/sql/hive/src/test/resources/golden/protectmode-26-a31442a7d6c6950c137856ab861d622d
index ec18ce7411535..dbc128d2a1d8c 100644
--- a/sql/hive/src/test/resources/golden/protectmode-26-a31442a7d6c6950c137856ab861d622d
+++ b/sql/hive/src/test/resources/golden/protectmode-26-a31442a7d6c6950c137856ab861d622d
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1388801768, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388801770, PROTECT_MODE=NO_DROP, transient_lastDdlTime=1388801770}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1413890256, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413890256, PROTECT_MODE=NO_DROP, transient_lastDdlTime=1413890256}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/protectmode-28-a31442a7d6c6950c137856ab861d622d b/sql/hive/src/test/resources/golden/protectmode-28-a31442a7d6c6950c137856ab861d622d
index a6202ac5fc0eb..7774c774cc8a1 100644
--- a/sql/hive/src/test/resources/golden/protectmode-28-a31442a7d6c6950c137856ab861d622d
+++ b/sql/hive/src/test/resources/golden/protectmode-28-a31442a7d6c6950c137856ab861d622d
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1388801768, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388801771, transient_lastDdlTime=1388801771}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1413890256, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413890256, transient_lastDdlTime=1413890256}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/protectmode-32-b6e156f104768706aa587b762a9d4d18 b/sql/hive/src/test/resources/golden/protectmode-32-b6e156f104768706aa587b762a9d4d18
index 0a6cebbbd0b82..567b9b3a5d228 100644
--- a/sql/hive/src/test/resources/golden/protectmode-32-b6e156f104768706aa587b762a9d4d18
+++ b/sql/hive/src/test/resources/golden/protectmode-32-b6e156f104768706aa587b762a9d4d18
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl2, createTime:1388801768, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388801783, transient_lastDdlTime=1388801783})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl2, createTime:1413890256, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413890263, transient_lastDdlTime=1413890263, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/protectmode-6-e2d1fc9c94e0d3597f393aec99850510 b/sql/hive/src/test/resources/golden/protectmode-6-e2d1fc9c94e0d3597f393aec99850510
index a1ff1f8341f30..be56722166fe4 100644
--- a/sql/hive/src/test/resources/golden/protectmode-6-e2d1fc9c94e0d3597f393aec99850510
+++ b/sql/hive/src/test/resources/golden/protectmode-6-e2d1fc9c94e0d3597f393aec99850510
@@ -1,3 +1,3 @@
-col                 	string              	None                
+col                 	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl1, dbName:default, owner:marmbrus, createTime:1388801746, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388801756, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1388801756}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl1, dbName:default, owner:marmbrus, createTime:1413890242, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413890249, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1413890249, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/protectmode-8-e2d1fc9c94e0d3597f393aec99850510 b/sql/hive/src/test/resources/golden/protectmode-8-e2d1fc9c94e0d3597f393aec99850510
index 9098c9419556b..a3c3c67860fdf 100644
--- a/sql/hive/src/test/resources/golden/protectmode-8-e2d1fc9c94e0d3597f393aec99850510
+++ b/sql/hive/src/test/resources/golden/protectmode-8-e2d1fc9c94e0d3597f393aec99850510
@@ -1,3 +1,3 @@
-col                 	string              	None                
+col                 	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl1, dbName:default, owner:marmbrus, createTime:1388801746, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388801756, transient_lastDdlTime=1388801756}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl1, dbName:default, owner:marmbrus, createTime:1413890242, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413890249, transient_lastDdlTime=1413890249, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/push_or-5-c94def4b18b9c8c00e7a93eb19ec694a b/sql/hive/src/test/resources/golden/push_or-5-c94def4b18b9c8c00e7a93eb19ec694a
index 3a2f20c637883..e3c6c66098c10 100644
--- a/sql/hive/src/test/resources/golden/push_or-5-c94def4b18b9c8c00e7a93eb19ec694a
+++ b/sql/hive/src/test/resources/golden/push_or-5-c94def4b18b9c8c00e7a93eb19ec694a
@@ -20,4 +20,4 @@
 17	val_17	2000-04-09
 18	val_18	2000-04-09
 18	val_18	2000-04-09
-19	val_19	2000-04-09
\ No newline at end of file
+19	val_19	2000-04-09
diff --git a/sql/hive/src/test/resources/golden/query_with_semi-0-3731ce715b60549c17b7993927d48436 b/sql/hive/src/test/resources/golden/query_with_semi-0-3731ce715b60549c17b7993927d48436
index 2f3fe0189d2c6..10db2ad303244 100644
--- a/sql/hive/src/test/resources/golden/query_with_semi-0-3731ce715b60549c17b7993927d48436
+++ b/sql/hive/src/test/resources/golden/query_with_semi-0-3731ce715b60549c17b7993927d48436
@@ -1 +1 @@
-aa;
\ No newline at end of file
+aa;
diff --git a/sql/hive/src/test/resources/golden/query_with_semi-1-3f53ec3b276b32cf81729433e47010cb b/sql/hive/src/test/resources/golden/query_with_semi-1-3f53ec3b276b32cf81729433e47010cb
index b5b5773c405b4..e0b3f1b09bd18 100644
--- a/sql/hive/src/test/resources/golden/query_with_semi-1-3f53ec3b276b32cf81729433e47010cb
+++ b/sql/hive/src/test/resources/golden/query_with_semi-1-3f53ec3b276b32cf81729433e47010cb
@@ -1 +1 @@
-bb
\ No newline at end of file
+bb
diff --git a/sql/hive/src/test/resources/golden/query_with_semi-2-bf8cb175f9b13fcc23ba46be674b5767 b/sql/hive/src/test/resources/golden/query_with_semi-2-bf8cb175f9b13fcc23ba46be674b5767
index 2652f5f42c003..46c1d6125b7b4 100644
--- a/sql/hive/src/test/resources/golden/query_with_semi-2-bf8cb175f9b13fcc23ba46be674b5767
+++ b/sql/hive/src/test/resources/golden/query_with_semi-2-bf8cb175f9b13fcc23ba46be674b5767
@@ -1 +1 @@
-cc
\ No newline at end of file
+cc
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/quote2-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-0-e39f59c35ebbe686a18d45d9d8bf3ab0
rename to sql/hive/src/test/resources/golden/quote2-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/quote2-0-ea1a1d0c5f9a3248afbb65e6632c5118 b/sql/hive/src/test/resources/golden/quote2-0-ea1a1d0c5f9a3248afbb65e6632c5118
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/quote2-1-34f3c423b2fb1f0b11457f45a60042b9 b/sql/hive/src/test/resources/golden/quote2-1-34f3c423b2fb1f0b11457f45a60042b9
deleted file mode 100644
index ed1f53a6588d0..0000000000000
--- a/sql/hive/src/test/resources/golden/quote2-1-34f3c423b2fb1f0b11457f45a60042b9
+++ /dev/null
@@ -1 +0,0 @@
-abc	abc	abc'	abc"	abc\	abc\	abc\'	abc\"	abc\\	abc\\	abc\\'	abc\\"	abc\\\	abc\\\	abc""""\	abc''''\	awk '{print NR"\t"$0}'	tab	tab	tab	tab
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-33-c88ee457dee7bb141a198a2ae39d787f b/sql/hive/src/test/resources/golden/quote2-1-ea1a1d0c5f9a3248afbb65e6632c5118
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-33-c88ee457dee7bb141a198a2ae39d787f
rename to sql/hive/src/test/resources/golden/quote2-1-ea1a1d0c5f9a3248afbb65e6632c5118
diff --git a/sql/hive/src/test/resources/golden/quote2-2-34f3c423b2fb1f0b11457f45a60042b9 b/sql/hive/src/test/resources/golden/quote2-2-34f3c423b2fb1f0b11457f45a60042b9
new file mode 100644
index 0000000000000..4c8564d085999
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/quote2-2-34f3c423b2fb1f0b11457f45a60042b9
@@ -0,0 +1 @@
+abc	abc	abc'	abc"	abc\	abc\	abc\'	abc\"	abc\\	abc\\	abc\\'	abc\\"	abc\\\	abc\\\	abc""""\	abc''''\	awk '{print NR"\t"$0}'	tab	tab	tab	tab
diff --git a/sql/hive/src/test/resources/golden/quoted alias.attr-0-97b3c408090f758257e4bd20597a525e b/sql/hive/src/test/resources/golden/quoted alias.attr-0-97b3c408090f758257e4bd20597a525e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/quoted alias.attr-0-97b3c408090f758257e4bd20597a525e	
+++ b/sql/hive/src/test/resources/golden/quoted alias.attr-0-97b3c408090f758257e4bd20597a525e	
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner1-0-a7e4414330751eb8ad486bb11643f64d b/sql/hive/src/test/resources/golden/rand_partitionpruner1-0-a7e4414330751eb8ad486bb11643f64d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner1-1-11cdebc422d7a0e7b257279ac9524321 b/sql/hive/src/test/resources/golden/rand_partitionpruner1-1-11cdebc422d7a0e7b257279ac9524321
deleted file mode 100644
index 73b8edab36833..0000000000000
--- a/sql/hive/src/test/resources/golden/rand_partitionpruner1-1-11cdebc422d7a0e7b257279ac9524321
+++ /dev/null
@@ -1,56 +0,0 @@
-409	val_409
-429	val_429
-209	val_209
-153	val_153
-203	val_203
-170	val_170
-489	val_489
-378	val_378
-221	val_221
-498	val_498
-469	val_469
-176	val_176
-176	val_176
-384	val_384
-217	val_217
-431	val_431
-51	val_51
-288	val_288
-457	val_457
-197	val_197
-77	val_77
-138	val_138
-277	val_277
-224	val_224
-309	val_309
-389	val_389
-331	val_331
-317	val_317
-336	val_336
-42	val_42
-458	val_458
-78	val_78
-453	val_453
-74	val_74
-103	val_103
-467	val_467
-202	val_202
-469	val_469
-44	val_44
-454	val_454
-70	val_70
-491	val_491
-199	val_199
-169	val_169
-310	val_310
-233	val_233
-133	val_133
-26	val_26
-134	val_134
-18	val_18
-298	val_298
-348	val_348
-469	val_469
-37	val_37
-152	val_152
-400	val_400
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner2-0-b9598847d77e2c425423f51d755380e8 b/sql/hive/src/test/resources/golden/rand_partitionpruner2-0-b9598847d77e2c425423f51d755380e8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner2-1-b7dcf0277eab6c02a7ca47aea7703bf7 b/sql/hive/src/test/resources/golden/rand_partitionpruner2-1-b7dcf0277eab6c02a7ca47aea7703bf7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner2-2-db276de57ad86e8880037336886cd557 b/sql/hive/src/test/resources/golden/rand_partitionpruner2-2-db276de57ad86e8880037336886cd557
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner2-3-e4419c33287ca1f48a43f61cca5b5928 b/sql/hive/src/test/resources/golden/rand_partitionpruner2-3-e4419c33287ca1f48a43f61cca5b5928
deleted file mode 100644
index e8738e9c837ee..0000000000000
--- a/sql/hive/src/test/resources/golden/rand_partitionpruner2-3-e4419c33287ca1f48a43f61cca5b5928
+++ /dev/null
@@ -1,116 +0,0 @@
-103	val_103	2008-04-08	11
-118	val_118	2008-04-08	12
-119	val_119	2008-04-08	12
-119	val_119	2008-04-08	12
-126	val_126	2008-04-08	12
-131	val_131	2008-04-08	12
-133	val_133	2008-04-08	11
-134	val_134	2008-04-08	11
-138	val_138	2008-04-08	11
-143	val_143	2008-04-08	12
-152	val_152	2008-04-08	11
-153	val_153	2008-04-08	11
-162	val_162	2008-04-08	12
-169	val_169	2008-04-08	11
-170	val_170	2008-04-08	11
-175	val_175	2008-04-08	12
-176	val_176	2008-04-08	11
-176	val_176	2008-04-08	11
-18	val_18	2008-04-08	11
-18	val_18	2008-04-08	12
-191	val_191	2008-04-08	12
-197	val_197	2008-04-08	11
-199	val_199	2008-04-08	11
-200	val_200	2008-04-08	12
-201	val_201	2008-04-08	12
-202	val_202	2008-04-08	11
-203	val_203	2008-04-08	11
-209	val_209	2008-04-08	11
-214	val_214	2008-04-08	12
-217	val_217	2008-04-08	11
-218	val_218	2008-04-08	12
-221	val_221	2008-04-08	11
-223	val_223	2008-04-08	12
-224	val_224	2008-04-08	11
-229	val_229	2008-04-08	12
-230	val_230	2008-04-08	12
-233	val_233	2008-04-08	11
-233	val_233	2008-04-08	12
-237	val_237	2008-04-08	12
-238	val_238	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	11
-265	val_265	2008-04-08	12
-273	val_273	2008-04-08	12
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	12
-280	val_280	2008-04-08	12
-286	val_286	2008-04-08	12
-288	val_288	2008-04-08	11
-298	val_298	2008-04-08	11
-309	val_309	2008-04-08	11
-309	val_309	2008-04-08	12
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-322	val_322	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-331	val_331	2008-04-08	11
-332	val_332	2008-04-08	12
-336	val_336	2008-04-08	11
-336	val_336	2008-04-08	12
-339	val_339	2008-04-08	12
-341	val_341	2008-04-08	12
-342	val_342	2008-04-08	12
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	12
-35	val_35	2008-04-08	12
-364	val_364	2008-04-08	12
-37	val_37	2008-04-08	11
-378	val_378	2008-04-08	11
-384	val_384	2008-04-08	11
-389	val_389	2008-04-08	11
-400	val_400	2008-04-08	11
-403	val_403	2008-04-08	12
-407	val_407	2008-04-08	12
-409	val_409	2008-04-08	11
-417	val_417	2008-04-08	12
-42	val_42	2008-04-08	11
-424	val_424	2008-04-08	12
-429	val_429	2008-04-08	11
-429	val_429	2008-04-08	12
-430	val_430	2008-04-08	12
-431	val_431	2008-04-08	11
-432	val_432	2008-04-08	12
-44	val_44	2008-04-08	11
-453	val_453	2008-04-08	11
-454	val_454	2008-04-08	11
-457	val_457	2008-04-08	11
-457	val_457	2008-04-08	12
-458	val_458	2008-04-08	11
-466	val_466	2008-04-08	12
-467	val_467	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-47	val_47	2008-04-08	12
-470	val_470	2008-04-08	12
-489	val_489	2008-04-08	11
-491	val_491	2008-04-08	11
-496	val_496	2008-04-08	12
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	12
-51	val_51	2008-04-08	11
-58	val_58	2008-04-08	12
-70	val_70	2008-04-08	11
-72	val_72	2008-04-08	12
-74	val_74	2008-04-08	11
-77	val_77	2008-04-08	11
-77	val_77	2008-04-08	12
-78	val_78	2008-04-08	11
-82	val_82	2008-04-08	12
-87	val_87	2008-04-08	12
-90	val_90	2008-04-08	12
-97	val_97	2008-04-08	12
-97	val_97	2008-04-08	12
-98	val_98	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/rcfile_bigdata-3-fc0c054cdfbf5c130532e139d497866a b/sql/hive/src/test/resources/golden/rcfile_bigdata-3-fc0c054cdfbf5c130532e139d497866a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_columnar-2-b2f56374f2ea8a967b38e77b57138d4b b/sql/hive/src/test/resources/golden/rcfile_columnar-2-b2f56374f2ea8a967b38e77b57138d4b
index d1b3011bffd91..2d2bcebee4a8d 100644
--- a/sql/hive/src/test/resources/golden/rcfile_columnar-2-b2f56374f2ea8a967b38e77b57138d4b
+++ b/sql/hive/src/test/resources/golden/rcfile_columnar-2-b2f56374f2ea8a967b38e77b57138d4b
@@ -1,2 +1,2 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-11-3fb2f07dd710f180a3c39dd17beccaa5 b/sql/hive/src/test/resources/golden/rcfile_merge1-11-3fb2f07dd710f180a3c39dd17beccaa5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-12-150cb190dc2343a747ea89298eb6352c b/sql/hive/src/test/resources/golden/rcfile_merge1-12-150cb190dc2343a747ea89298eb6352c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-13-af7b1e79eb40854ea82d61debaafcf40 b/sql/hive/src/test/resources/golden/rcfile_merge1-13-af7b1e79eb40854ea82d61debaafcf40
deleted file mode 100644
index b141fed81f206..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge1-13-af7b1e79eb40854ea82d61debaafcf40
+++ /dev/null
@@ -1 +0,0 @@
-53278638794
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-14-4547f75ed3cb94914c2d025c1e057b5 b/sql/hive/src/test/resources/golden/rcfile_merge1-14-4547f75ed3cb94914c2d025c1e057b5
deleted file mode 100644
index b141fed81f206..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge1-14-4547f75ed3cb94914c2d025c1e057b5
+++ /dev/null
@@ -1 +0,0 @@
-53278638794
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-15-2f190c7e30999cbdf1ce62e8f31438f4 b/sql/hive/src/test/resources/golden/rcfile_merge1-15-2f190c7e30999cbdf1ce62e8f31438f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-16-c198c437f48c3844d8d0ace881b3107e b/sql/hive/src/test/resources/golden/rcfile_merge1-16-c198c437f48c3844d8d0ace881b3107e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-4-2f190c7e30999cbdf1ce62e8f31438f4 b/sql/hive/src/test/resources/golden/rcfile_merge1-4-2f190c7e30999cbdf1ce62e8f31438f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-5-c198c437f48c3844d8d0ace881b3107e b/sql/hive/src/test/resources/golden/rcfile_merge1-5-c198c437f48c3844d8d0ace881b3107e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-6-8adb6d3de3839fc1f0bd2598fdcc3d46 b/sql/hive/src/test/resources/golden/rcfile_merge1-6-8adb6d3de3839fc1f0bd2598fdcc3d46
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-7-8c76c8d41133f29366359c308b0c9cc0 b/sql/hive/src/test/resources/golden/rcfile_merge1-7-8c76c8d41133f29366359c308b0c9cc0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-8-ed4ca53754ed08a0fab1d788e3c65d6f b/sql/hive/src/test/resources/golden/rcfile_merge1-8-ed4ca53754ed08a0fab1d788e3c65d6f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-9-780b166bd3fcc932cb217273b0636d63 b/sql/hive/src/test/resources/golden/rcfile_merge1-9-780b166bd3fcc932cb217273b0636d63
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-10-5b4fb8038f522877101a4e429f082f11 b/sql/hive/src/test/resources/golden/rcfile_merge2-10-5b4fb8038f522877101a4e429f082f11
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-4-5b4fb8038f522877101a4e429f082f11 b/sql/hive/src/test/resources/golden/rcfile_merge2-4-5b4fb8038f522877101a4e429f082f11
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-5-78b97f09b49452e054cba2ae461813c3 b/sql/hive/src/test/resources/golden/rcfile_merge2-5-78b97f09b49452e054cba2ae461813c3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-6-d5a91ca1ff9039b2dc55af8735d0249a b/sql/hive/src/test/resources/golden/rcfile_merge2-6-d5a91ca1ff9039b2dc55af8735d0249a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-7-805fc662f9b3b7a587d1043b659e2424 b/sql/hive/src/test/resources/golden/rcfile_merge2-7-805fc662f9b3b7a587d1043b659e2424
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-8-afd119f412a66bdf5cd1c74ae8965685 b/sql/hive/src/test/resources/golden/rcfile_merge2-8-afd119f412a66bdf5cd1c74ae8965685
deleted file mode 100644
index 8d25d618795b9..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge2-8-afd119f412a66bdf5cd1c74ae8965685
+++ /dev/null
@@ -1 +0,0 @@
--4208881187
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-9-351c25ee0ca991ca9e0dbafa873b7592 b/sql/hive/src/test/resources/golden/rcfile_merge2-9-351c25ee0ca991ca9e0dbafa873b7592
deleted file mode 100644
index 8d25d618795b9..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge2-9-351c25ee0ca991ca9e0dbafa873b7592
+++ /dev/null
@@ -1 +0,0 @@
--4208881187
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-10-1486feb77f13bb9a0ed606fe795ef686 b/sql/hive/src/test/resources/golden/rcfile_merge3-10-1486feb77f13bb9a0ed606fe795ef686
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-11-7674266b00c52a2b6755910ea0793b69 b/sql/hive/src/test/resources/golden/rcfile_merge3-11-7674266b00c52a2b6755910ea0793b69
deleted file mode 100644
index 11ff946b46f0f..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge3-11-7674266b00c52a2b6755910ea0793b69
+++ /dev/null
@@ -1 +0,0 @@
-14412220296
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-12-56703d58e54ca8b4fb86c92ffd74cc1f b/sql/hive/src/test/resources/golden/rcfile_merge3-12-56703d58e54ca8b4fb86c92ffd74cc1f
deleted file mode 100644
index 11ff946b46f0f..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge3-12-56703d58e54ca8b4fb86c92ffd74cc1f
+++ /dev/null
@@ -1 +0,0 @@
-14412220296
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-13-d57efd24ca36f282a37be5d0bf6452e6 b/sql/hive/src/test/resources/golden/rcfile_merge3-13-d57efd24ca36f282a37be5d0bf6452e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-14-5ccde6b58d36e11d6e3c02a867eced7b b/sql/hive/src/test/resources/golden/rcfile_merge3-14-5ccde6b58d36e11d6e3c02a867eced7b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-3-d57efd24ca36f282a37be5d0bf6452e6 b/sql/hive/src/test/resources/golden/rcfile_merge3-3-d57efd24ca36f282a37be5d0bf6452e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-4-5ccde6b58d36e11d6e3c02a867eced7b b/sql/hive/src/test/resources/golden/rcfile_merge3-4-5ccde6b58d36e11d6e3c02a867eced7b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-5-bb2196ee46a7719cc88dcd6976a6eb38 b/sql/hive/src/test/resources/golden/rcfile_merge3-5-bb2196ee46a7719cc88dcd6976a6eb38
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-6-e4d1c54cf6a802eef3342bec2505f69b b/sql/hive/src/test/resources/golden/rcfile_merge3-6-e4d1c54cf6a802eef3342bec2505f69b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-7-f9661455ce2c3bbe3d9cb4dc4ce0eb98 b/sql/hive/src/test/resources/golden/rcfile_merge3-7-f9661455ce2c3bbe3d9cb4dc4ce0eb98
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-8-4891ccd223f2403e235e2c9d6b5fc18 b/sql/hive/src/test/resources/golden/rcfile_merge3-8-4891ccd223f2403e235e2c9d6b5fc18
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-9-7c38c42af102069e08a85a27a735eb61 b/sql/hive/src/test/resources/golden/rcfile_merge3-9-7c38c42af102069e08a85a27a735eb61
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-10-1486feb77f13bb9a0ed606fe795ef686 b/sql/hive/src/test/resources/golden/rcfile_merge4-10-1486feb77f13bb9a0ed606fe795ef686
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-11-7674266b00c52a2b6755910ea0793b69 b/sql/hive/src/test/resources/golden/rcfile_merge4-11-7674266b00c52a2b6755910ea0793b69
deleted file mode 100644
index 11ff946b46f0f..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge4-11-7674266b00c52a2b6755910ea0793b69
+++ /dev/null
@@ -1 +0,0 @@
-14412220296
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-12-56703d58e54ca8b4fb86c92ffd74cc1f b/sql/hive/src/test/resources/golden/rcfile_merge4-12-56703d58e54ca8b4fb86c92ffd74cc1f
deleted file mode 100644
index 11ff946b46f0f..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge4-12-56703d58e54ca8b4fb86c92ffd74cc1f
+++ /dev/null
@@ -1 +0,0 @@
-14412220296
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-13-d57efd24ca36f282a37be5d0bf6452e6 b/sql/hive/src/test/resources/golden/rcfile_merge4-13-d57efd24ca36f282a37be5d0bf6452e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-14-5ccde6b58d36e11d6e3c02a867eced7b b/sql/hive/src/test/resources/golden/rcfile_merge4-14-5ccde6b58d36e11d6e3c02a867eced7b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-3-d57efd24ca36f282a37be5d0bf6452e6 b/sql/hive/src/test/resources/golden/rcfile_merge4-3-d57efd24ca36f282a37be5d0bf6452e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-4-5ccde6b58d36e11d6e3c02a867eced7b b/sql/hive/src/test/resources/golden/rcfile_merge4-4-5ccde6b58d36e11d6e3c02a867eced7b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-5-18f216bf1ea33debc2576ac85ac1a449 b/sql/hive/src/test/resources/golden/rcfile_merge4-5-18f216bf1ea33debc2576ac85ac1a449
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-6-3e292c7212dd02d7d6000837e09a42f9 b/sql/hive/src/test/resources/golden/rcfile_merge4-6-3e292c7212dd02d7d6000837e09a42f9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-7-f9661455ce2c3bbe3d9cb4dc4ce0eb98 b/sql/hive/src/test/resources/golden/rcfile_merge4-7-f9661455ce2c3bbe3d9cb4dc4ce0eb98
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-8-4891ccd223f2403e235e2c9d6b5fc18 b/sql/hive/src/test/resources/golden/rcfile_merge4-8-4891ccd223f2403e235e2c9d6b5fc18
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-9-7c38c42af102069e08a85a27a735eb61 b/sql/hive/src/test/resources/golden/rcfile_merge4-9-7c38c42af102069e08a85a27a735eb61
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_null_value-2-e721b8138774fdefca7171d1051841ee b/sql/hive/src/test/resources/golden/rcfile_null_value-2-e721b8138774fdefca7171d1051841ee
index b0135533064c9..2918f17e964c0 100644
--- a/sql/hive/src/test/resources/golden/rcfile_null_value-2-e721b8138774fdefca7171d1051841ee
+++ b/sql/hive/src/test/resources/golden/rcfile_null_value-2-e721b8138774fdefca7171d1051841ee
@@ -22,4 +22,4 @@ NULL	val_193
 406	val_406
 NULL	
 NULL	
-NULL	
\ No newline at end of file
+NULL	
diff --git a/sql/hive/src/test/resources/golden/rcfile_null_value-6-a7a74aeb8c5cac1f2fd2362398a52d2 b/sql/hive/src/test/resources/golden/rcfile_null_value-6-a7a74aeb8c5cac1f2fd2362398a52d2
index 06b2b4d7e6d47..b52cff5c472e4 100644
--- a/sql/hive/src/test/resources/golden/rcfile_null_value-6-a7a74aeb8c5cac1f2fd2362398a52d2
+++ b/sql/hive/src/test/resources/golden/rcfile_null_value-6-a7a74aeb8c5cac1f2fd2362398a52d2
@@ -6,4 +6,4 @@ NULL	NULL	24	val_24
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-1-a9f718201b71fa855fb81a29e1d7bd3b b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-1-a9f718201b71fa855fb81a29e1d7bd3b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-1-a9f718201b71fa855fb81a29e1d7bd3b
+++ b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-1-a9f718201b71fa855fb81a29e1d7bd3b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-2-4a9a9175758ef576804c8b7309b019e8 b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-2-4a9a9175758ef576804c8b7309b019e8
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-2-4a9a9175758ef576804c8b7309b019e8
+++ b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-2-4a9a9175758ef576804c8b7309b019e8
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-3-ea607fbed28d20e5726f4501285d698d b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-3-ea607fbed28d20e5726f4501285d698d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-3-ea607fbed28d20e5726f4501285d698d
+++ b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-3-ea607fbed28d20e5726f4501285d698d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-5-6a3af12e36cec853c876a2cbae61c23a b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-5-6a3af12e36cec853c876a2cbae61c23a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-5-6a3af12e36cec853c876a2cbae61c23a
+++ b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-5-6a3af12e36cec853c876a2cbae61c23a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-6-718032defb864225dd664b1719f3b590 b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-6-718032defb864225dd664b1719f3b590
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-6-718032defb864225dd664b1719f3b590
+++ b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-6-718032defb864225dd664b1719f3b590
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/read from uncached table-0-ce3797dc14a603cba2a5e58c8612de5b b/sql/hive/src/test/resources/golden/read from uncached table-0-ce3797dc14a603cba2a5e58c8612de5b
deleted file mode 100644
index 60878ffb77064..0000000000000
--- a/sql/hive/src/test/resources/golden/read from uncached table-0-ce3797dc14a603cba2a5e58c8612de5b	
+++ /dev/null
@@ -1 +0,0 @@
-238	val_238
diff --git a/sql/hive/src/test/resources/golden/recursive_dir-0-fb096f0f4ecc530357ad76ae0353d338 b/sql/hive/src/test/resources/golden/recursive_dir-0-fb096f0f4ecc530357ad76ae0353d338
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-0-aa047b3a8b40b68b93c4ad11e173c767 b/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-0-aa047b3a8b40b68b93c4ad11e173c767
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-0-aa047b3a8b40b68b93c4ad11e173c767
+++ b/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-0-aa047b3a8b40b68b93c4ad11e173c767
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/regex_col-0-ac78bd83c9aa538c2827598dd6007a69 b/sql/hive/src/test/resources/golden/regex_col-0-ac78bd83c9aa538c2827598dd6007a69
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/regex_col-1-42751bfc3f1e83e7a014db9272d597db b/sql/hive/src/test/resources/golden/regex_col-1-42751bfc3f1e83e7a014db9272d597db
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/regex_col-2-21564f64cdfd46098e1254380490701 b/sql/hive/src/test/resources/golden/regex_col-2-21564f64cdfd46098e1254380490701
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/regex_col-3-f0c653593a7a7b701f0614f3f0a7ba61 b/sql/hive/src/test/resources/golden/regex_col-3-f0c653593a7a7b701f0614f3f0a7ba61
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/regex_col-4-daf9d3ca22b243870a138ba90d1593c4 b/sql/hive/src/test/resources/golden/regex_col-4-daf9d3ca22b243870a138ba90d1593c4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/remote_script-0-4e2e94d649b1792c5bd3fd879349ef92 b/sql/hive/src/test/resources/golden/remote_script-0-4e2e94d649b1792c5bd3fd879349ef92
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/remote_script-0-4e2e94d649b1792c5bd3fd879349ef92
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/remote_script-3-4eb54a664e549614d56ca088c8867d b/sql/hive/src/test/resources/golden/remote_script-3-4eb54a664e549614d56ca088c8867d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rename_column-1-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-1-6a3bbeb3203ce4df35275dccc4c4e37b
index 726e0947d8302..017e14d2ebed4 100644
--- a/sql/hive/src/test/resources/golden/rename_column-1-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-1-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a                   	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-11-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-11-6a3bbeb3203ce4df35275dccc4c4e37b
index 500201be8d922..a92663b0674bf 100644
--- a/sql/hive/src/test/resources/golden/rename_column-11-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-11-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
+b                   	int                 	                    
 a1                  	int                 	test comment1       
-c                   	int                 	None                
\ No newline at end of file
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-13-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-13-6a3bbeb3203ce4df35275dccc4c4e37b
index 71af16a5fa4d7..899341a881857 100644
--- a/sql/hive/src/test/resources/golden/rename_column-13-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-13-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
 a2                  	int                 	test comment2       
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-15-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-15-6a3bbeb3203ce4df35275dccc4c4e37b
index f5b2e72aeccab..26b38dcc6d855 100644
--- a/sql/hive/src/test/resources/golden/rename_column-15-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-15-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
+b                   	int                 	                    
 a                   	int                 	test comment2       
-c                   	int                 	None                
\ No newline at end of file
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-17-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/rename_column-17-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/rename_column-17-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/rename_column-17-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/rename_column-21-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-21-6a3bbeb3203ce4df35275dccc4c4e37b
index 726e0947d8302..017e14d2ebed4 100644
--- a/sql/hive/src/test/resources/golden/rename_column-21-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-21-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a                   	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-23-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-23-6a3bbeb3203ce4df35275dccc4c4e37b
index 17127eaec9755..2fbb615dd5994 100644
--- a/sql/hive/src/test/resources/golden/rename_column-23-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-23-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a                   	string              	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a                   	string              	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-25-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-25-6a3bbeb3203ce4df35275dccc4c4e37b
index bfcefe4d18046..173fbad7b1eb3 100644
--- a/sql/hive/src/test/resources/golden/rename_column-25-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-25-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a1                  	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a1                  	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-27-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-27-6a3bbeb3203ce4df35275dccc4c4e37b
index c436c39a16b8a..bad9feb96a886 100644
--- a/sql/hive/src/test/resources/golden/rename_column-27-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-27-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a2                  	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a2                  	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-29-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-29-6a3bbeb3203ce4df35275dccc4c4e37b
index bb1507e7488f5..4f23db53afff2 100644
--- a/sql/hive/src/test/resources/golden/rename_column-29-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-29-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
-a                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+b                   	int                 	                    
+a                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-3-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-3-6a3bbeb3203ce4df35275dccc4c4e37b
index 17127eaec9755..2fbb615dd5994 100644
--- a/sql/hive/src/test/resources/golden/rename_column-3-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-3-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a                   	string              	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a                   	string              	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-31-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-31-6a3bbeb3203ce4df35275dccc4c4e37b
index 500201be8d922..a92663b0674bf 100644
--- a/sql/hive/src/test/resources/golden/rename_column-31-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-31-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
+b                   	int                 	                    
 a1                  	int                 	test comment1       
-c                   	int                 	None                
\ No newline at end of file
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-33-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-33-6a3bbeb3203ce4df35275dccc4c4e37b
index 71af16a5fa4d7..899341a881857 100644
--- a/sql/hive/src/test/resources/golden/rename_column-33-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-33-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
 a2                  	int                 	test comment2       
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-35-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-35-6a3bbeb3203ce4df35275dccc4c4e37b
index f5b2e72aeccab..26b38dcc6d855 100644
--- a/sql/hive/src/test/resources/golden/rename_column-35-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-35-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
+b                   	int                 	                    
 a                   	int                 	test comment2       
-c                   	int                 	None                
\ No newline at end of file
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-5-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-5-6a3bbeb3203ce4df35275dccc4c4e37b
index bfcefe4d18046..173fbad7b1eb3 100644
--- a/sql/hive/src/test/resources/golden/rename_column-5-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-5-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a1                  	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a1                  	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-7-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-7-6a3bbeb3203ce4df35275dccc4c4e37b
index c436c39a16b8a..bad9feb96a886 100644
--- a/sql/hive/src/test/resources/golden/rename_column-7-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-7-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a2                  	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a2                  	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-9-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-9-6a3bbeb3203ce4df35275dccc4c4e37b
index bb1507e7488f5..4f23db53afff2 100644
--- a/sql/hive/src/test/resources/golden/rename_column-9-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-9-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
-a                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+b                   	int                 	                    
+a                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_external_partition_location-0-5c73d46fb91e9d4b3dc916622df09290 b/sql/hive/src/test/resources/golden/rename_external_partition_location-0-5c73d46fb91e9d4b3dc916622df09290
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/rename_external_partition_location-0-5c73d46fb91e9d4b3dc916622df09290
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/router_join_ppr-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/router_join_ppr-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/router_join_ppr-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/router_join_ppr-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/router_join_ppr-2-49b8b038ed8f5513405071c532967c47 b/sql/hive/src/test/resources/golden/router_join_ppr-2-49b8b038ed8f5513405071c532967c47
index 3a57720041fb3..d091388cd5e19 100644
--- a/sql/hive/src/test/resources/golden/router_join_ppr-2-49b8b038ed8f5513405071c532967c47
+++ b/sql/hive/src/test/resources/golden/router_join_ppr-2-49b8b038ed8f5513405071c532967c47
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/router_join_ppr-4-baaf33e5c11c65e1aa2b6be37de6eef6 b/sql/hive/src/test/resources/golden/router_join_ppr-4-baaf33e5c11c65e1aa2b6be37de6eef6
index 19492fd335bcb..a86e9c5af723e 100644
--- a/sql/hive/src/test/resources/golden/router_join_ppr-4-baaf33e5c11c65e1aa2b6be37de6eef6
+++ b/sql/hive/src/test/resources/golden/router_join_ppr-4-baaf33e5c11c65e1aa2b6be37de6eef6
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/router_join_ppr-6-2edf8e4ca99543089fdacbf506caa94f b/sql/hive/src/test/resources/golden/router_join_ppr-6-2edf8e4ca99543089fdacbf506caa94f
index 3a57720041fb3..d091388cd5e19 100644
--- a/sql/hive/src/test/resources/golden/router_join_ppr-6-2edf8e4ca99543089fdacbf506caa94f
+++ b/sql/hive/src/test/resources/golden/router_join_ppr-6-2edf8e4ca99543089fdacbf506caa94f
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/router_join_ppr-8-b07ad21f47a74162f438abf579675f8e b/sql/hive/src/test/resources/golden/router_join_ppr-8-b07ad21f47a74162f438abf579675f8e
index 19492fd335bcb..a86e9c5af723e 100644
--- a/sql/hive/src/test/resources/golden/router_join_ppr-8-b07ad21f47a74162f438abf579675f8e
+++ b/sql/hive/src/test/resources/golden/router_join_ppr-8-b07ad21f47a74162f438abf579675f8e
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/sample2-0-13ab74a58da514fe01dbeda0c3e79883 b/sql/hive/src/test/resources/golden/sample2-0-13ab74a58da514fe01dbeda0c3e79883
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample2-1-a1d8184eab25b242a961533cc016efd1 b/sql/hive/src/test/resources/golden/sample2-1-a1d8184eab25b242a961533cc016efd1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-10-60eadbb52f8857830a3034952c631ace b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-10-60eadbb52f8857830a3034952c631ace
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-11-dbe79f90862dc5c6cc4a4fa4b4b6c655 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-11-dbe79f90862dc5c6cc4a4fa4b4b6c655
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-12-60018cae9a0476dc6a0ab4264310edb5 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-12-60018cae9a0476dc6a0ab4264310edb5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-7-ad4ddb5c5d6b994f4dba35f6162b6a9f b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-7-ad4ddb5c5d6b994f4dba35f6162b6a9f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-8-f9dd797f1c90e2108cfee585f443c132 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-8-f9dd797f1c90e2108cfee585f443c132
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-9-22fdd8380f2652de2492b34a425d46d7 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-9-22fdd8380f2652de2492b34a425d46d7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-0-7a9e67189d3d4151f23b12c22bde06b5 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-0-7a9e67189d3d4151f23b12c22bde06b5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-10-22fdd8380f2652de2492b34a425d46d7 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-10-22fdd8380f2652de2492b34a425d46d7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-11-60eadbb52f8857830a3034952c631ace b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-11-60eadbb52f8857830a3034952c631ace
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-12-dbe79f90862dc5c6cc4a4fa4b4b6c655 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-12-dbe79f90862dc5c6cc4a4fa4b4b6c655
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-13-60018cae9a0476dc6a0ab4264310edb5 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-13-60018cae9a0476dc6a0ab4264310edb5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-8-7a45282169e5a15d70ae0afb9e67ec9a b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-8-7a45282169e5a15d70ae0afb9e67ec9a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-9-f9dd797f1c90e2108cfee585f443c132 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-9-f9dd797f1c90e2108cfee585f443c132
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/script_env_var1-0-16015162957e1d8e0ef586b44b276f64 b/sql/hive/src/test/resources/golden/script_env_var1-0-16015162957e1d8e0ef586b44b276f64
deleted file mode 100644
index 0bc999a3e6aa2..0000000000000
--- a/sql/hive/src/test/resources/golden/script_env_var1-0-16015162957e1d8e0ef586b44b276f64
+++ /dev/null
@@ -1,2 +0,0 @@
-1
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/script_env_var2-0-e5c4893b2ff412f9df0632328d404cef b/sql/hive/src/test/resources/golden/script_env_var2-0-e5c4893b2ff412f9df0632328d404cef
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/script_env_var2-0-e5c4893b2ff412f9df0632328d404cef
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/script_env_var2-1-81cb3e18ab89c533a253defff35e90f0 b/sql/hive/src/test/resources/golden/script_env_var2-1-81cb3e18ab89c533a253defff35e90f0
deleted file mode 100644
index 0bc999a3e6aa2..0000000000000
--- a/sql/hive/src/test/resources/golden/script_env_var2-1-81cb3e18ab89c533a253defff35e90f0
+++ /dev/null
@@ -1,2 +0,0 @@
-1
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/script_pipe-0-415536ae9ab41c3997f3f31cd52bcbb9 b/sql/hive/src/test/resources/golden/script_pipe-0-415536ae9ab41c3997f3f31cd52bcbb9
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/script_pipe-0-415536ae9ab41c3997f3f31cd52bcbb9
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/script_pipe-1-a6827d31c2c855e36bd51a21a16aecf0 b/sql/hive/src/test/resources/golden/script_pipe-1-a6827d31c2c855e36bd51a21a16aecf0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/script_pipe-2-3bf368261963ee10883e97c7746796b5 b/sql/hive/src/test/resources/golden/script_pipe-2-3bf368261963ee10883e97c7746796b5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/script_pipe-3-afe5db164ccf986c5badd0655e009ea1 b/sql/hive/src/test/resources/golden/script_pipe-3-afe5db164ccf986c5badd0655e009ea1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/script_pipe-4-7fe60c2fcca928a497252d99436b513f b/sql/hive/src/test/resources/golden/script_pipe-4-7fe60c2fcca928a497252d99436b513f
deleted file mode 100644
index 9cf5170e82d7d..0000000000000
--- a/sql/hive/src/test/resources/golden/script_pipe-4-7fe60c2fcca928a497252d99436b513f
+++ /dev/null
@@ -1 +0,0 @@
-238	val_238	238	val_238
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/scriptfile1-1-89cf592f66b7276add70eb2c7689aa34 b/sql/hive/src/test/resources/golden/scriptfile1-1-89cf592f66b7276add70eb2c7689aa34
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/select from thrift based table-0-304c4992f5517febd10f43c57df4da49 b/sql/hive/src/test/resources/golden/select from thrift based table-0-304c4992f5517febd10f43c57df4da49
new file mode 100644
index 0000000000000..a81afd7b04319
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/select from thrift based table-0-304c4992f5517febd10f43c57df4da49	
@@ -0,0 +1,11 @@
+1712634731	record_0	[0,0,0]	["0","0","0"]	[{"myint":0,"mystring":"0","underscore_int":0}]	{"key_0":"value_0"}
+465985200	record_1	[1,2,3]	["10","100","1000"]	[{"myint":1,"mystring":"1","underscore_int":1}]	{"key_1":"value_1"}
+-751827638	record_2	[2,4,6]	["20","200","2000"]	[{"myint":4,"mystring":"8","underscore_int":2}]	{"key_2":"value_2"}
+477111222	record_3	[3,6,9]	["30","300","3000"]	[{"myint":9,"mystring":"27","underscore_int":3}]	{"key_3":"value_3"}
+-734328909	record_4	[4,8,12]	["40","400","4000"]	[{"myint":16,"mystring":"64","underscore_int":4}]	{"key_4":"value_4"}
+-1952710710	record_5	[5,10,15]	["50","500","5000"]	[{"myint":25,"mystring":"125","underscore_int":5}]	{"key_5":"value_5"}
+1244525190	record_6	[6,12,18]	["60","600","6000"]	[{"myint":36,"mystring":"216","underscore_int":6}]	{"key_6":"value_6"}
+-1461153973	record_7	[7,14,21]	["70","700","7000"]	[{"myint":49,"mystring":"343","underscore_int":7}]	{"key_7":"value_7"}
+1638581578	record_8	[8,16,24]	["80","800","8000"]	[{"myint":64,"mystring":"512","underscore_int":8}]	{"key_8":"value_8"}
+336964413	record_9	[9,18,27]	["90","900","9000"]	[{"myint":81,"mystring":"729","underscore_int":9}]	{"key_9":"value_9"}
+0	NULL	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-20-131900d39d9a20b431731a32fb9715f8 b/sql/hive/src/test/resources/golden/select null from table-0-5bb53cca754cc8afe9cd22feb8c586d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-20-131900d39d9a20b431731a32fb9715f8
rename to sql/hive/src/test/resources/golden/select null from table-0-5bb53cca754cc8afe9cd22feb8c586d1
diff --git a/sql/hive/src/test/resources/golden/select_as_omitted-1-99d8c7e0fa02f47d19df6a0d7dabe145 b/sql/hive/src/test/resources/golden/select_as_omitted-1-99d8c7e0fa02f47d19df6a0d7dabe145
index 5f4de85940513..016f64cc26f2a 100644
--- a/sql/hive/src/test/resources/golden/select_as_omitted-1-99d8c7e0fa02f47d19df6a0d7dabe145
+++ b/sql/hive/src/test/resources/golden/select_as_omitted-1-99d8c7e0fa02f47d19df6a0d7dabe145
@@ -1 +1 @@
-0	val_0
\ No newline at end of file
+0	val_0
diff --git a/sql/hive/src/test/resources/golden/select_unquote_and-3-683007138a712792041ef3c8b84e914e b/sql/hive/src/test/resources/golden/select_unquote_and-3-683007138a712792041ef3c8b84e914e
index f35d9602e1a7b..f9aaa4d565f4a 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_and-3-683007138a712792041ef3c8b84e914e
+++ b/sql/hive/src/test/resources/golden/select_unquote_and-3-683007138a712792041ef3c8b84e914e
@@ -1 +1 @@
-498
\ No newline at end of file
+498
diff --git a/sql/hive/src/test/resources/golden/select_unquote_and-5-1a5a68a098bfb9c93b76a458a9faf232 b/sql/hive/src/test/resources/golden/select_unquote_and-5-1a5a68a098bfb9c93b76a458a9faf232
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_and-5-1a5a68a098bfb9c93b76a458a9faf232
+++ b/sql/hive/src/test/resources/golden/select_unquote_and-5-1a5a68a098bfb9c93b76a458a9faf232
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/select_unquote_not-3-683007138a712792041ef3c8b84e914e b/sql/hive/src/test/resources/golden/select_unquote_not-3-683007138a712792041ef3c8b84e914e
index f35d9602e1a7b..f9aaa4d565f4a 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_not-3-683007138a712792041ef3c8b84e914e
+++ b/sql/hive/src/test/resources/golden/select_unquote_not-3-683007138a712792041ef3c8b84e914e
@@ -1 +1 @@
-498
\ No newline at end of file
+498
diff --git a/sql/hive/src/test/resources/golden/select_unquote_not-5-ceada0d3fa65bb2ec65154e797332cde b/sql/hive/src/test/resources/golden/select_unquote_not-5-ceada0d3fa65bb2ec65154e797332cde
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_not-5-ceada0d3fa65bb2ec65154e797332cde
+++ b/sql/hive/src/test/resources/golden/select_unquote_not-5-ceada0d3fa65bb2ec65154e797332cde
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/select_unquote_or-3-683007138a712792041ef3c8b84e914e b/sql/hive/src/test/resources/golden/select_unquote_or-3-683007138a712792041ef3c8b84e914e
index f35d9602e1a7b..f9aaa4d565f4a 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_or-3-683007138a712792041ef3c8b84e914e
+++ b/sql/hive/src/test/resources/golden/select_unquote_or-3-683007138a712792041ef3c8b84e914e
@@ -1 +1 @@
-498
\ No newline at end of file
+498
diff --git a/sql/hive/src/test/resources/golden/select_unquote_or-5-55cd874f705673f9de6ec8e3643c760f b/sql/hive/src/test/resources/golden/select_unquote_or-5-55cd874f705673f9de6ec8e3643c760f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_or-5-55cd874f705673f9de6ec8e3643c760f
+++ b/sql/hive/src/test/resources/golden/select_unquote_or-5-55cd874f705673f9de6ec8e3643c760f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e b/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
index d00ee7786a57c..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
+++ b/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
@@ -1,22 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATETABLE (TOK_TABNAME serde_regex) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL host TOK_STRING) (TOK_TABCOL identity TOK_STRING) (TOK_TABCOL user TOK_STRING) (TOK_TABCOL time TOK_STRING) (TOK_TABCOL request TOK_STRING) (TOK_TABCOL status TOK_STRING) (TOK_TABCOL size TOK_INT) (TOK_TABCOL referer TOK_STRING) (TOK_TABCOL agent TOK_STRING)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?"))))) TOK_TBLTEXTFILE)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Create Table Operator:
-        Create Table
-          columns: host string, identity string, user string, time string, request string, status string, size int, referer string, agent string
-          if not exists: false
-          input format: org.apache.hadoop.mapred.TextInputFormat
-          # buckets: -1
-          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
-          serde name: org.apache.hadoop.hive.serde2.RegexSerDe
-          serde properties:
-            input.regex ([^ ]*) ([^ ]*) ([^ ]*) (-|\[[^\]]*\]) ([^ "]*|"[^"]*") (-|[0-9]*) (-|[0-9]*)(?: ([^ "]*|"[^"]*") ([^ "]*|"[^"]*"))?
-          name: serde_regex
-          isExternal: false
-
-
diff --git a/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429 b/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
index 3e290231c27e2..93cdc5c85645c 100644
--- a/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
+++ b/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
@@ -1,5 +1,4 @@
 NULL	0
-NULL	0
 -1234567890.123456789	-1234567890
 -4400	4400
 -1255.49	-1255
@@ -11,6 +10,7 @@ NULL	0
 -0.3	0
 0	0
 0	0
+0	0
 0.01	0
 0.02	0
 0.1	0
@@ -18,7 +18,7 @@ NULL	0
 0.3	0
 0.33	0
 0.333	0
-0.9999999999999999999999999	1
+1	1
 1	1
 1	1
 1.12	1
diff --git a/sql/hive/src/test/resources/golden/serde_regex-2-9d00484beaee46cf72b154a1351aeee9 b/sql/hive/src/test/resources/golden/serde_regex-2-9d00484beaee46cf72b154a1351aeee9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-34-c04167e880fe3e942ce77e75d660f1ef b/sql/hive/src/test/resources/golden/serde_regex-2-e84d30fcc6cf11e82b54ea63e7d1d611
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-34-c04167e880fe3e942ce77e75d660f1ef
rename to sql/hive/src/test/resources/golden/serde_regex-2-e84d30fcc6cf11e82b54ea63e7d1d611
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-36-deb3f1793f51d1edf011a8405abf4968 b/sql/hive/src/test/resources/golden/serde_regex-3-3ee9e78ff563d6b48741a41885f92c81
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-36-deb3f1793f51d1edf011a8405abf4968
rename to sql/hive/src/test/resources/golden/serde_regex-3-3ee9e78ff563d6b48741a41885f92c81
diff --git a/sql/hive/src/test/resources/golden/serde_regex-3-817190d8871b70611483cd2abe2e55dc b/sql/hive/src/test/resources/golden/serde_regex-3-817190d8871b70611483cd2abe2e55dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0 b/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0
deleted file mode 100644
index da61769c6599d..0000000000000
--- a/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0
+++ /dev/null
@@ -1,22 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATETABLE (TOK_TABNAME serde_regex1) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_DECIMAL) (TOK_TABCOL value TOK_INT)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*)"))))) TOK_TBLTEXTFILE)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Create Table Operator:
-        Create Table
-          columns: key decimal, value int
-          if not exists: false
-          input format: org.apache.hadoop.mapred.TextInputFormat
-          # buckets: -1
-          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
-          serde name: org.apache.hadoop.hive.serde2.RegexSerDe
-          serde properties:
-            input.regex ([^ ]*) ([^ ]*)
-          name: serde_regex1
-          isExternal: false
-
-
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-37-7871722f392f801a868e0e2fb372c610 b/sql/hive/src/test/resources/golden/serde_regex-7-bf456bcf6be7334488424dfeadf27d75
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-37-7871722f392f801a868e0e2fb372c610
rename to sql/hive/src/test/resources/golden/serde_regex-7-bf456bcf6be7334488424dfeadf27d75
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-39-b71bdaa2b7c4b5c51a9773c123e5306d b/sql/hive/src/test/resources/golden/serde_regex-8-a4cf34af32b83e40e5c8b6d083938b54
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-39-b71bdaa2b7c4b5c51a9773c123e5306d
rename to sql/hive/src/test/resources/golden/serde_regex-8-a4cf34af32b83e40e5c8b6d083938b54
diff --git a/sql/hive/src/test/resources/golden/serde_regex-8-c429ee76b751e674992f61a29c95af77 b/sql/hive/src/test/resources/golden/serde_regex-8-c429ee76b751e674992f61a29c95af77
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/serde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b b/sql/hive/src/test/resources/golden/serde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-40-89aa7bab4272546e06cd7e504380d96b b/sql/hive/src/test/resources/golden/serde_regex-9-f1175f3322abec6f258dd49a5905bce0
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-40-89aa7bab4272546e06cd7e504380d96b
rename to sql/hive/src/test/resources/golden/serde_regex-9-f1175f3322abec6f258dd49a5905bce0
diff --git a/sql/hive/src/test/resources/golden/serde_reported_schema-1-e8c6de8cd50be582d78c9a8244cd92a1 b/sql/hive/src/test/resources/golden/serde_reported_schema-1-e8c6de8cd50be582d78c9a8244cd92a1
index d1168556e09d4..b1663e9a8c00d 100644
--- a/sql/hive/src/test/resources/golden/serde_reported_schema-1-e8c6de8cd50be582d78c9a8244cd92a1
+++ b/sql/hive/src/test/resources/golden/serde_reported_schema-1-e8c6de8cd50be582d78c9a8244cd92a1
@@ -1,11 +1,11 @@
 myint               	int                 	from deserializer   
 mystring            	string              	from deserializer   
 underscore_int      	int                 	from deserializer   
-b                   	string              	None                
+b                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-b                   	string              	None                
+b                   	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:int_string, dbName:default, owner:marmbrus, createTime:1389733035, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/int_string, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.IntString, serialization.format=org.apache.thrift.protocol.TBinaryProtocol}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:b, type:string, comment:null)], parameters:{transient_lastDdlTime=1389733035}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:int_string, dbName:default, owner:marmbrus, createTime:1413891326, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/int_string, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.IntString, serialization.format=org.apache.thrift.protocol.TBinaryProtocol}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:b, type:string, comment:null)], parameters:{transient_lastDdlTime=1413891326}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/serde_reported_schema-3-738e1d72a19c3db37ded87ca2fb148fa b/sql/hive/src/test/resources/golden/serde_reported_schema-3-738e1d72a19c3db37ded87ca2fb148fa
index 0b8f428b24193..f5ec7a9aca8f3 100644
--- a/sql/hive/src/test/resources/golden/serde_reported_schema-3-738e1d72a19c3db37ded87ca2fb148fa
+++ b/sql/hive/src/test/resources/golden/serde_reported_schema-3-738e1d72a19c3db37ded87ca2fb148fa
@@ -1,11 +1,11 @@
 myint               	int                 	from deserializer   
 mystring            	string              	from deserializer   
 underscore_int      	int                 	from deserializer   
-b                   	string              	None                
+b                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-b                   	string              	None                
+b                   	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[part1], dbName:default, tableName:int_string, createTime:1389733036, lastAccessTime:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/int_string/b=part1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.IntString, serialization.format=org.apache.thrift.protocol.TBinaryProtocol}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1389733036})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[part1], dbName:default, tableName:int_string, createTime:1413891326, lastAccessTime:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/int_string/b=part1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.IntString, serialization.format=org.apache.thrift.protocol.TBinaryProtocol}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413891326})	
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-0-b0b2ec9de0599f8dc2eb160299a2699d b/sql/hive/src/test/resources/golden/set_variable_sub-0-b0b2ec9de0599f8dc2eb160299a2699d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-0-b0b2ec9de0599f8dc2eb160299a2699d
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-0-b0b2ec9de0599f8dc2eb160299a2699d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-10-bf5d1e710ce0974a67b4084aaa868f67 b/sql/hive/src/test/resources/golden/set_variable_sub-10-bf5d1e710ce0974a67b4084aaa868f67
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-10-bf5d1e710ce0974a67b4084aaa868f67
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-10-bf5d1e710ce0974a67b4084aaa868f67
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-3-266170978f88a14c20c3944bfb55f5c7 b/sql/hive/src/test/resources/golden/set_variable_sub-3-266170978f88a14c20c3944bfb55f5c7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-3-266170978f88a14c20c3944bfb55f5c7
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-3-266170978f88a14c20c3944bfb55f5c7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-4-3839b7ac878ed6bdd4f9d242f965ded7 b/sql/hive/src/test/resources/golden/set_variable_sub-4-3839b7ac878ed6bdd4f9d242f965ded7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-4-3839b7ac878ed6bdd4f9d242f965ded7
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-4-3839b7ac878ed6bdd4f9d242f965ded7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-5-ee3a31bb9bb742f90daea98b290c34cd b/sql/hive/src/test/resources/golden/set_variable_sub-5-ee3a31bb9bb742f90daea98b290c34cd
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-5-ee3a31bb9bb742f90daea98b290c34cd
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-5-ee3a31bb9bb742f90daea98b290c34cd
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-7-e25bdc67a6d9ea3f5d65676a92d1217b b/sql/hive/src/test/resources/golden/set_variable_sub-7-e25bdc67a6d9ea3f5d65676a92d1217b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-7-e25bdc67a6d9ea3f5d65676a92d1217b
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-7-e25bdc67a6d9ea3f5d65676a92d1217b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-8-f321516e93eb0206453465a9b85cf67d b/sql/hive/src/test/resources/golden/set_variable_sub-8-f321516e93eb0206453465a9b85cf67d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-8-f321516e93eb0206453465a9b85cf67d
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-8-f321516e93eb0206453465a9b85cf67d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-9-fbb54d457caeaafce723856429bbc0b2 b/sql/hive/src/test/resources/golden/set_variable_sub-9-fbb54d457caeaafce723856429bbc0b2
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-9-fbb54d457caeaafce723856429bbc0b2
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-9-fbb54d457caeaafce723856429bbc0b2
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-42-348b0126cb1d214fea58d4af9d3dbf67 b/sql/hive/src/test/resources/golden/show_columns-0-d84a430d0ab7a63a0a73361f8d188a4b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-42-348b0126cb1d214fea58d4af9d3dbf67
rename to sql/hive/src/test/resources/golden/show_columns-0-d84a430d0ab7a63a0a73361f8d188a4b
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-43-83889e7dc73d796cc869160b6b35102c b/sql/hive/src/test/resources/golden/show_columns-1-ac73cff018bf94944244117a2eb76f7f
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-43-83889e7dc73d796cc869160b6b35102c
rename to sql/hive/src/test/resources/golden/show_columns-1-ac73cff018bf94944244117a2eb76f7f
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-45-41462b2e60bf44571a7b1fb435374d6a b/sql/hive/src/test/resources/golden/show_columns-10-695a68c82308540eba1d0a04e032cf39
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-45-41462b2e60bf44571a7b1fb435374d6a
rename to sql/hive/src/test/resources/golden/show_columns-10-695a68c82308540eba1d0a04e032cf39
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-46-8aea6edf0481e2a10f14879acba62648 b/sql/hive/src/test/resources/golden/show_columns-11-691b4e6664e6d435233ea4e8c3b585d5
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-46-8aea6edf0481e2a10f14879acba62648
rename to sql/hive/src/test/resources/golden/show_columns-11-691b4e6664e6d435233ea4e8c3b585d5
diff --git a/sql/hive/src/test/resources/golden/show_columns-12-afc350d459a8f794cc3ca93092163a0c b/sql/hive/src/test/resources/golden/show_columns-12-afc350d459a8f794cc3ca93092163a0c
new file mode 100644
index 0000000000000..1711d0c2bb253
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_columns-12-afc350d459a8f794cc3ca93092163a0c
@@ -0,0 +1 @@
+a                   
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-47-4999db9eb86d9455c1d75e97b052f279 b/sql/hive/src/test/resources/golden/show_columns-13-e86d559aeb84a4cc017a103182c22bfb
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-47-4999db9eb86d9455c1d75e97b052f279
rename to sql/hive/src/test/resources/golden/show_columns-13-e86d559aeb84a4cc017a103182c22bfb
diff --git a/sql/hive/src/test/resources/golden/show_columns-14-7c1d63fa270b4d94b69ad49c3e2378a6 b/sql/hive/src/test/resources/golden/show_columns-14-7c1d63fa270b4d94b69ad49c3e2378a6
new file mode 100644
index 0000000000000..1711d0c2bb253
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_columns-14-7c1d63fa270b4d94b69ad49c3e2378a6
@@ -0,0 +1 @@
+a                   
diff --git a/sql/hive/src/test/resources/golden/show_columns-15-2c404655e35f7bd7ce54500c832f9d8e b/sql/hive/src/test/resources/golden/show_columns-15-2c404655e35f7bd7ce54500c832f9d8e
new file mode 100644
index 0000000000000..1711d0c2bb253
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_columns-15-2c404655e35f7bd7ce54500c832f9d8e
@@ -0,0 +1 @@
+a                   
diff --git a/sql/hive/src/test/resources/golden/show_columns-2-b74990316ec4245fd8a7011e684b39da b/sql/hive/src/test/resources/golden/show_columns-2-b74990316ec4245fd8a7011e684b39da
new file mode 100644
index 0000000000000..70c14c3ef34ab
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_columns-2-b74990316ec4245fd8a7011e684b39da
@@ -0,0 +1,3 @@
+key                 
+value               
+ds                  
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-48-fecb9f2203aeb2ac4b693a97badde6fa b/sql/hive/src/test/resources/golden/show_columns-3-6e40309b0ca10f353a68395ccd64d566
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-48-fecb9f2203aeb2ac4b693a97badde6fa
rename to sql/hive/src/test/resources/golden/show_columns-3-6e40309b0ca10f353a68395ccd64d566
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-49-f0f18d5fa2824735799edc4bdeb1afb2 b/sql/hive/src/test/resources/golden/show_columns-4-a62fc229d241303bffb29b34ad125f8c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-49-f0f18d5fa2824735799edc4bdeb1afb2
rename to sql/hive/src/test/resources/golden/show_columns-4-a62fc229d241303bffb29b34ad125f8c
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-5-e906be6d27c9dfcffd4af171541639ad b/sql/hive/src/test/resources/golden/show_columns-5-691b4e6664e6d435233ea4e8c3b585d5
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-5-e906be6d27c9dfcffd4af171541639ad
rename to sql/hive/src/test/resources/golden/show_columns-5-691b4e6664e6d435233ea4e8c3b585d5
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-51-461847b174096e7a255fb07cb35ab434 b/sql/hive/src/test/resources/golden/show_columns-6-37c88438bd364343a50f64cf39bfcaf6
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-51-461847b174096e7a255fb07cb35ab434
rename to sql/hive/src/test/resources/golden/show_columns-6-37c88438bd364343a50f64cf39bfcaf6
diff --git a/sql/hive/src/test/resources/golden/show_columns-7-afc350d459a8f794cc3ca93092163a0c b/sql/hive/src/test/resources/golden/show_columns-7-afc350d459a8f794cc3ca93092163a0c
new file mode 100644
index 0000000000000..1711d0c2bb253
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_columns-7-afc350d459a8f794cc3ca93092163a0c
@@ -0,0 +1 @@
+a                   
diff --git a/sql/hive/src/test/resources/golden/show_columns-8-9b0b96593ca513c6932f3ed8df68808a b/sql/hive/src/test/resources/golden/show_columns-8-9b0b96593ca513c6932f3ed8df68808a
new file mode 100644
index 0000000000000..1711d0c2bb253
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_columns-8-9b0b96593ca513c6932f3ed8df68808a
@@ -0,0 +1 @@
+a                   
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-52-8da967e7c2210ad044ba8b08d1685065 b/sql/hive/src/test/resources/golden/show_columns-9-6c0fa8be1c19d4d216dfe7427df1275f
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-52-8da967e7c2210ad044ba8b08d1685065
rename to sql/hive/src/test/resources/golden/show_columns-9-6c0fa8be1c19d4d216dfe7427df1275f
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-54-fdc295aaefba56548a22dfcddc2a94f2 b/sql/hive/src/test/resources/golden/show_create_table_alter-0-813886d6cf0875c62e89cd1d06b8b0b4
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-54-fdc295aaefba56548a22dfcddc2a94f2
rename to sql/hive/src/test/resources/golden/show_create_table_alter-0-813886d6cf0875c62e89cd1d06b8b0b4
diff --git a/sql/hive/src/test/resources/golden/show_create_table_alter-1-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_alter-1-2a91d52719cf4552ebeb867204552a26
new file mode 100644
index 0000000000000..69c7709aa90cb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_alter-1-2a91d52719cf4552ebeb867204552a26
@@ -0,0 +1,18 @@
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` smallint, 
+  `value` float)
+CLUSTERED BY ( 
+  key) 
+SORTED BY ( 
+  value DESC) 
+INTO 5 BUCKETS
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
+TBLPROPERTIES (
+  'transient_lastDdlTime'='1413891329')
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-55-74bfe3fbf9d68a75013fba1c3c7bbd7c b/sql/hive/src/test/resources/golden/show_create_table_alter-10-259d978ed9543204c8b9c25b6e25b0de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-55-74bfe3fbf9d68a75013fba1c3c7bbd7c
rename to sql/hive/src/test/resources/golden/show_create_table_alter-10-259d978ed9543204c8b9c25b6e25b0de
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-56-1013d1ad014aa203b1dce26085b09c01 b/sql/hive/src/test/resources/golden/show_create_table_alter-2-928cc85c025440b731e5ee33e437e404
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-56-1013d1ad014aa203b1dce26085b09c01
rename to sql/hive/src/test/resources/golden/show_create_table_alter-2-928cc85c025440b731e5ee33e437e404
diff --git a/sql/hive/src/test/resources/golden/show_create_table_alter-3-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_alter-3-2a91d52719cf4552ebeb867204552a26
new file mode 100644
index 0000000000000..501bb6ab32f25
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_alter-3-2a91d52719cf4552ebeb867204552a26
@@ -0,0 +1,27 @@
+CREATE  TABLE `tmp_showcrt1`(
+  `key` smallint, 
+  `value` float)
+COMMENT 'temporary table'
+CLUSTERED BY ( 
+  key) 
+SORTED BY ( 
+  value DESC) 
+INTO 5 BUCKETS
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
+TBLPROPERTIES (
+  'numFiles'='0', 
+  'EXTERNAL'='FALSE', 
+  'last_modified_by'='marmbrus', 
+  'last_modified_time'='1413891329', 
+  'transient_lastDdlTime'='1413891329', 
+  'COLUMN_STATS_ACCURATE'='false', 
+  'totalSize'='0', 
+  'numRows'='-1', 
+  'rawDataSize'='-1')
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-58-825135844e8ac6d8d5254cc961ec8fd0 b/sql/hive/src/test/resources/golden/show_create_table_alter-4-c2cb6a7d942d4dddd1aababccb1239f9
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-58-825135844e8ac6d8d5254cc961ec8fd0
rename to sql/hive/src/test/resources/golden/show_create_table_alter-4-c2cb6a7d942d4dddd1aababccb1239f9
diff --git a/sql/hive/src/test/resources/golden/show_create_table_alter-5-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_alter-5-2a91d52719cf4552ebeb867204552a26
new file mode 100644
index 0000000000000..6e353675b5ed8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_alter-5-2a91d52719cf4552ebeb867204552a26
@@ -0,0 +1,26 @@
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` smallint, 
+  `value` float)
+COMMENT 'changed comment'
+CLUSTERED BY ( 
+  key) 
+SORTED BY ( 
+  value DESC) 
+INTO 5 BUCKETS
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
+TBLPROPERTIES (
+  'numFiles'='0', 
+  'last_modified_by'='marmbrus', 
+  'last_modified_time'='1413891329', 
+  'transient_lastDdlTime'='1413891329', 
+  'COLUMN_STATS_ACCURATE'='false', 
+  'totalSize'='0', 
+  'numRows'='-1', 
+  'rawDataSize'='-1')
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-59-e671e63f6b70094048563a9c33748c97 b/sql/hive/src/test/resources/golden/show_create_table_alter-6-fdd1bd7f9acf0b2c8c9b7503d4046cb
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-59-e671e63f6b70094048563a9c33748c97
rename to sql/hive/src/test/resources/golden/show_create_table_alter-6-fdd1bd7f9acf0b2c8c9b7503d4046cb
diff --git a/sql/hive/src/test/resources/golden/show_create_table_alter-7-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_alter-7-2a91d52719cf4552ebeb867204552a26
new file mode 100644
index 0000000000000..6e353675b5ed8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_alter-7-2a91d52719cf4552ebeb867204552a26
@@ -0,0 +1,26 @@
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` smallint, 
+  `value` float)
+COMMENT 'changed comment'
+CLUSTERED BY ( 
+  key) 
+SORTED BY ( 
+  value DESC) 
+INTO 5 BUCKETS
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
+TBLPROPERTIES (
+  'numFiles'='0', 
+  'last_modified_by'='marmbrus', 
+  'last_modified_time'='1413891329', 
+  'transient_lastDdlTime'='1413891329', 
+  'COLUMN_STATS_ACCURATE'='false', 
+  'totalSize'='0', 
+  'numRows'='-1', 
+  'rawDataSize'='-1')
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-6-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/show_create_table_alter-8-22ab6ed5b15a018756f454dd2294847e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-6-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/show_create_table_alter-8-22ab6ed5b15a018756f454dd2294847e
diff --git a/sql/hive/src/test/resources/golden/show_create_table_alter-9-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_alter-9-2a91d52719cf4552ebeb867204552a26
new file mode 100644
index 0000000000000..da849512f4d8f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_alter-9-2a91d52719cf4552ebeb867204552a26
@@ -0,0 +1,26 @@
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` smallint, 
+  `value` float)
+COMMENT 'changed comment'
+CLUSTERED BY ( 
+  key) 
+SORTED BY ( 
+  value DESC) 
+INTO 5 BUCKETS
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED BY 
+  'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler' 
+WITH SERDEPROPERTIES ( 
+  'serialization.format'='1')
+LOCATION
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
+TBLPROPERTIES (
+  'numFiles'='0', 
+  'last_modified_by'='marmbrus', 
+  'last_modified_time'='1413891329', 
+  'transient_lastDdlTime'='1413891329', 
+  'COLUMN_STATS_ACCURATE'='false', 
+  'totalSize'='0', 
+  'numRows'='-1', 
+  'rawDataSize'='-1')
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-61-51824b04f2a008f63e1469695e60d9c8 b/sql/hive/src/test/resources/golden/show_create_table_db_table-0-67509558a4b2d39b25787cca33f52635
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-61-51824b04f2a008f63e1469695e60d9c8
rename to sql/hive/src/test/resources/golden/show_create_table_db_table-0-67509558a4b2d39b25787cca33f52635
diff --git a/sql/hive/src/test/resources/golden/show_create_table_db_table-1-549981e00a3d95f03dd5a9ef6044aa20 b/sql/hive/src/test/resources/golden/show_create_table_db_table-1-549981e00a3d95f03dd5a9ef6044aa20
new file mode 100644
index 0000000000000..707b2ae3ed1df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_db_table-1-549981e00a3d95f03dd5a9ef6044aa20
@@ -0,0 +1,2 @@
+default
+tmp_feng
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-62-18b0757f6d9e29808061ca3763b8b6d9 b/sql/hive/src/test/resources/golden/show_create_table_db_table-2-34ae7e611d0aedbc62b6e420347abee
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-62-18b0757f6d9e29808061ca3763b8b6d9
rename to sql/hive/src/test/resources/golden/show_create_table_db_table-2-34ae7e611d0aedbc62b6e420347abee
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-67-b4c5b3eeb74085711866a2eec27bcb37 b/sql/hive/src/test/resources/golden/show_create_table_db_table-3-7a9e67189d3d4151f23b12c22bde06b5
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-67-b4c5b3eeb74085711866a2eec27bcb37
rename to sql/hive/src/test/resources/golden/show_create_table_db_table-3-7a9e67189d3d4151f23b12c22bde06b5
diff --git a/sql/hive/src/test/resources/golden/show_create_table_db_table-4-b585371b624cbab2616a49f553a870a0 b/sql/hive/src/test/resources/golden/show_create_table_db_table-4-b585371b624cbab2616a49f553a870a0
new file mode 100644
index 0000000000000..90f8415a1c6be
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_db_table-4-b585371b624cbab2616a49f553a870a0
@@ -0,0 +1,13 @@
+CREATE  TABLE `tmp_feng.tmp_showcrt`(
+  `key` string, 
+  `value` int)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_feng.db/tmp_showcrt'
+TBLPROPERTIES (
+  'transient_lastDdlTime'='1413891330')
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-68-b4fec0996399be2239961594897d6715 b/sql/hive/src/test/resources/golden/show_create_table_db_table-5-964757b7e7f2a69fe36132c1a5712199
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-68-b4fec0996399be2239961594897d6715
rename to sql/hive/src/test/resources/golden/show_create_table_db_table-5-964757b7e7f2a69fe36132c1a5712199
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-7-b6c452a800ff333aacb863bb3243c15b b/sql/hive/src/test/resources/golden/show_create_table_db_table-6-ac09cf81e7e734cf10406f30b9fa566e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-7-b6c452a800ff333aacb863bb3243c15b
rename to sql/hive/src/test/resources/golden/show_create_table_db_table-6-ac09cf81e7e734cf10406f30b9fa566e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-71-2d5403943a3efdf9fd3eccb6927499cc b/sql/hive/src/test/resources/golden/show_create_table_delimited-0-52b0e534c7df544258a1c59df9f816ce
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-71-2d5403943a3efdf9fd3eccb6927499cc
rename to sql/hive/src/test/resources/golden/show_create_table_delimited-0-52b0e534c7df544258a1c59df9f816ce
diff --git a/sql/hive/src/test/resources/golden/show_create_table_delimited-1-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_delimited-1-2a91d52719cf4552ebeb867204552a26
new file mode 100644
index 0000000000000..4ee22e5230316
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_delimited-1-2a91d52719cf4552ebeb867204552a26
@@ -0,0 +1,17 @@
+CREATE  TABLE `tmp_showcrt1`(
+  `key` int, 
+  `value` string, 
+  `newvalue` bigint)
+ROW FORMAT DELIMITED 
+  FIELDS TERMINATED BY ',' 
+  COLLECTION ITEMS TERMINATED BY '|' 
+  MAP KEYS TERMINATED BY '%' 
+  LINES TERMINATED BY '\n' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/testTempFiles4427612185729633290spark.hive.tmp/tmp_showcrt1'
+TBLPROPERTIES (
+  'transient_lastDdlTime'='1413891331')
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-72-ca16ff548ebb9bab4b76f2e339064a9e b/sql/hive/src/test/resources/golden/show_create_table_delimited-2-259d978ed9543204c8b9c25b6e25b0de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-72-ca16ff548ebb9bab4b76f2e339064a9e
rename to sql/hive/src/test/resources/golden/show_create_table_delimited-2-259d978ed9543204c8b9c25b6e25b0de
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-75-6f7caef1c773268350c9cf94ad85be01 b/sql/hive/src/test/resources/golden/show_create_table_partitioned-0-4be9a3b1ff0840786a1f001cba170a0c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-75-6f7caef1c773268350c9cf94ad85be01
rename to sql/hive/src/test/resources/golden/show_create_table_partitioned-0-4be9a3b1ff0840786a1f001cba170a0c
diff --git a/sql/hive/src/test/resources/golden/show_create_table_partitioned-1-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_partitioned-1-2a91d52719cf4552ebeb867204552a26
new file mode 100644
index 0000000000000..2a1acca6efb8d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_partitioned-1-2a91d52719cf4552ebeb867204552a26
@@ -0,0 +1,16 @@
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` string, 
+  `newvalue` boolean COMMENT 'a new value')
+COMMENT 'temporary table'
+PARTITIONED BY ( 
+  `value` bigint COMMENT 'some value')
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
+TBLPROPERTIES (
+  'transient_lastDdlTime'='1413891331')
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-76-4931c5a72a5231f67317d27ca025bb97 b/sql/hive/src/test/resources/golden/show_create_table_partitioned-2-259d978ed9543204c8b9c25b6e25b0de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-76-4931c5a72a5231f67317d27ca025bb97
rename to sql/hive/src/test/resources/golden/show_create_table_partitioned-2-259d978ed9543204c8b9c25b6e25b0de
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-8-c0ea81b686236d661166912040a16ea7 b/sql/hive/src/test/resources/golden/show_create_table_serde-0-33f15d91810b75ee05c7b9dea0abb01c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-8-c0ea81b686236d661166912040a16ea7
rename to sql/hive/src/test/resources/golden/show_create_table_serde-0-33f15d91810b75ee05c7b9dea0abb01c
diff --git a/sql/hive/src/test/resources/golden/show_create_table_serde-1-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_serde-1-2a91d52719cf4552ebeb867204552a26
new file mode 100644
index 0000000000000..6fda2570b53f1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_serde-1-2a91d52719cf4552ebeb867204552a26
@@ -0,0 +1,20 @@
+CREATE  TABLE `tmp_showcrt1`(
+  `key` int, 
+  `value` string, 
+  `newvalue` bigint)
+COMMENT 'temporary table'
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.RCFileInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.RCFileOutputFormat'
+LOCATION
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
+TBLPROPERTIES (
+  'numFiles'='0', 
+  'transient_lastDdlTime'='1413891332', 
+  'COLUMN_STATS_ACCURATE'='false', 
+  'totalSize'='0', 
+  'numRows'='-1', 
+  'rawDataSize'='-1')
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-9-f0ee61903aeacb758e2eada242e5e14 b/sql/hive/src/test/resources/golden/show_create_table_serde-2-259d978ed9543204c8b9c25b6e25b0de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-9-f0ee61903aeacb758e2eada242e5e14
rename to sql/hive/src/test/resources/golden/show_create_table_serde-2-259d978ed9543204c8b9c25b6e25b0de
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-6-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/show_create_table_serde-3-fd12b3e0fe30f5d71c67676791b4a33b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_test_1-6-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/show_create_table_serde-3-fd12b3e0fe30f5d71c67676791b4a33b
diff --git a/sql/hive/src/test/resources/golden/show_create_table_serde-4-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_serde-4-2a91d52719cf4552ebeb867204552a26
new file mode 100644
index 0000000000000..cbbbb7b3ce3c7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_serde-4-2a91d52719cf4552ebeb867204552a26
@@ -0,0 +1,14 @@
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` string, 
+  `value` boolean)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' 
+STORED BY 
+  'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler' 
+WITH SERDEPROPERTIES ( 
+  'serialization.format'='$', 
+  'field.delim'=',')
+LOCATION
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
+TBLPROPERTIES (
+  'transient_lastDdlTime'='1413891332')
diff --git a/sql/hive/src/test/resources/golden/hook_context_cs-0-e319c8574a6cd8739e5fd5984ceed3cf b/sql/hive/src/test/resources/golden/show_create_table_serde-5-259d978ed9543204c8b9c25b6e25b0de
similarity index 100%
rename from sql/hive/src/test/resources/golden/hook_context_cs-0-e319c8574a6cd8739e5fd5984ceed3cf
rename to sql/hive/src/test/resources/golden/show_create_table_serde-5-259d978ed9543204c8b9c25b6e25b0de
diff --git a/sql/hive/src/test/resources/golden/hook_context_cs-1-de3aa1c4674fb166b825b1e2f58d1950 b/sql/hive/src/test/resources/golden/show_create_table_view-0-ecef6821e4e9212e553ca38142fd0250
similarity index 100%
rename from sql/hive/src/test/resources/golden/hook_context_cs-1-de3aa1c4674fb166b825b1e2f58d1950
rename to sql/hive/src/test/resources/golden/show_create_table_view-0-ecef6821e4e9212e553ca38142fd0250
diff --git a/sql/hive/src/test/resources/golden/show_create_table_view-1-1e931ea3fa6065107859ffbb29bb0ed7 b/sql/hive/src/test/resources/golden/show_create_table_view-1-1e931ea3fa6065107859ffbb29bb0ed7
new file mode 100644
index 0000000000000..a721f07bb90b7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_create_table_view-1-1e931ea3fa6065107859ffbb29bb0ed7
@@ -0,0 +1 @@
+CREATE VIEW `tmp_copy_src` AS SELECT `src`.`key`, `src`.`value` FROM `default`.`src`
diff --git a/sql/hive/src/test/resources/golden/index_creation-0-f880114c33c99a5f23c1465fd88f0db3 b/sql/hive/src/test/resources/golden/show_create_table_view-2-ed97e9e56d95c5b3db57485cba5ad17f
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-0-f880114c33c99a5f23c1465fd88f0db3
rename to sql/hive/src/test/resources/golden/show_create_table_view-2-ed97e9e56d95c5b3db57485cba5ad17f
diff --git a/sql/hive/src/test/resources/golden/show_describe_func_quotes-0-65fee14fcf58502241f0772b21096780 b/sql/hive/src/test/resources/golden/show_describe_func_quotes-0-65fee14fcf58502241f0772b21096780
index fd33cfcc9ab20..442b05b4cf4ee 100644
--- a/sql/hive/src/test/resources/golden/show_describe_func_quotes-0-65fee14fcf58502241f0772b21096780
+++ b/sql/hive/src/test/resources/golden/show_describe_func_quotes-0-65fee14fcf58502241f0772b21096780
@@ -1 +1 @@
-concat
\ No newline at end of file
+concat
diff --git a/sql/hive/src/test/resources/golden/show_describe_func_quotes-1-26b98b2901556449d5431d731aaa642d b/sql/hive/src/test/resources/golden/show_describe_func_quotes-1-26b98b2901556449d5431d731aaa642d
index fd33cfcc9ab20..442b05b4cf4ee 100644
--- a/sql/hive/src/test/resources/golden/show_describe_func_quotes-1-26b98b2901556449d5431d731aaa642d
+++ b/sql/hive/src/test/resources/golden/show_describe_func_quotes-1-26b98b2901556449d5431d731aaa642d
@@ -1 +1 @@
-concat
\ No newline at end of file
+concat
diff --git a/sql/hive/src/test/resources/golden/show_describe_func_quotes-2-f795383fcecedf7266cd7aed8283cec3 b/sql/hive/src/test/resources/golden/show_describe_func_quotes-2-f795383fcecedf7266cd7aed8283cec3
index a3998eeeee623..b576089faa484 100644
--- a/sql/hive/src/test/resources/golden/show_describe_func_quotes-2-f795383fcecedf7266cd7aed8283cec3
+++ b/sql/hive/src/test/resources/golden/show_describe_func_quotes-2-f795383fcecedf7266cd7aed8283cec3
@@ -1 +1 @@
-concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
\ No newline at end of file
+concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
diff --git a/sql/hive/src/test/resources/golden/show_describe_func_quotes-3-7bc53505a4e6587132870d8d0a704d2 b/sql/hive/src/test/resources/golden/show_describe_func_quotes-3-7bc53505a4e6587132870d8d0a704d2
index a3998eeeee623..b576089faa484 100644
--- a/sql/hive/src/test/resources/golden/show_describe_func_quotes-3-7bc53505a4e6587132870d8d0a704d2
+++ b/sql/hive/src/test/resources/golden/show_describe_func_quotes-3-7bc53505a4e6587132870d8d0a704d2
@@ -1 +1 @@
-concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
\ No newline at end of file
+concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
diff --git a/sql/hive/src/test/resources/golden/show_functions-0-45a7762c39f1b0f26f076220e2764043 b/sql/hive/src/test/resources/golden/show_functions-0-45a7762c39f1b0f26f076220e2764043
index 9d40ffaef5862..3049cd6243ad8 100644
--- a/sql/hive/src/test/resources/golden/show_functions-0-45a7762c39f1b0f26f076220e2764043
+++ b/sql/hive/src/test/resources/golden/show_functions-0-45a7762c39f1b0f26f076220e2764043
@@ -32,6 +32,7 @@ case
 ceil
 ceiling
 coalesce
+collect_list
 collect_set
 compute_stats
 concat
@@ -45,6 +46,7 @@ covar_pop
 covar_samp
 create_union
 cume_dist
+current_database
 date_add
 date_sub
 datediff
@@ -123,6 +125,7 @@ percentile
 percentile_approx
 pi
 pmod
+posexplode
 positive
 pow
 power
@@ -189,4 +192,4 @@ xpath_short
 xpath_string
 year
 |
-~
\ No newline at end of file
+~
diff --git a/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797 b/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
index c62b965cb1559..175795534fff5 100644
--- a/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
+++ b/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
@@ -2,6 +2,7 @@ case
 ceil
 ceiling
 coalesce
+collect_list
 collect_set
 compute_stats
 concat
@@ -14,4 +15,5 @@ count
 covar_pop
 covar_samp
 create_union
-cume_dist
\ No newline at end of file
+cume_dist
+current_database
diff --git a/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c b/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
index a56b5a3766c5c..3c25d656bda1c 100644
--- a/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
+++ b/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
@@ -1,6 +1,7 @@
 assert_true
 case
 coalesce
+current_database
 decode
 e
 encode
@@ -19,6 +20,7 @@ negative
 ntile
 parse_url_tuple
 percentile
+posexplode
 positive
 regexp_replace
 reverse
@@ -29,4 +31,4 @@ to_date
 translate
 ucase
 variance
-xpath_double
\ No newline at end of file
+xpath_double
diff --git a/sql/hive/src/test/resources/golden/show_functions-3-86945c60aed23626c43b507ee4ee6049 b/sql/hive/src/test/resources/golden/show_functions-3-86945c60aed23626c43b507ee4ee6049
index 86605075c3d25..b5a372a5ff50a 100644
--- a/sql/hive/src/test/resources/golden/show_functions-3-86945c60aed23626c43b507ee4ee6049
+++ b/sql/hive/src/test/resources/golden/show_functions-3-86945c60aed23626c43b507ee4ee6049
@@ -1,3 +1,3 @@
 log
 log10
-log2
\ No newline at end of file
+log2
diff --git a/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48 b/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
index 312f6cdbf68c7..cd2e58d04a4ef 100644
--- a/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
+++ b/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
@@ -1,4 +1,4 @@
 date_add
 date_sub
 datediff
-to_date
\ No newline at end of file
+to_date
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-0-9acff7f5096cdafc92aa2ddb0f296f83 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-0-9acff7f5096cdafc92aa2ddb0f296f83
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-1-3f42728fb0083beb962f07c43dd9c9b7 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-1-3f42728fb0083beb962f07c43dd9c9b7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-10-d759a63f08c878456c3401626f253ff5 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-10-d759a63f08c878456c3401626f253ff5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-11-afe66851d1cdc5d8a8a0d21c5705a59e b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-11-afe66851d1cdc5d8a8a0d21c5705a59e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-12-f2dd38f0a56cd2137c9e1b870271550b b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-12-f2dd38f0a56cd2137c9e1b870271550b
deleted file mode 100644
index e772f4a83fecd..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-12-f2dd38f0a56cd2137c9e1b870271550b
+++ /dev/null
@@ -1,16 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWINDEXES show_idx_full)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Index Operator:
-        Show Indexes
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-13-a9224a7a0012e407da67041bf680d490 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-13-a9224a7a0012e407da67041bf680d490
deleted file mode 100644
index d68fbfc9c1e0f..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-13-a9224a7a0012e407da67041bf680d490
+++ /dev/null
@@ -1,4 +0,0 @@
-idx_1               	show_idx_full       	key                 	default__show_idx_full_idx_1__	compact             	
-idx_2               	show_idx_full       	value1              	default__show_idx_full_idx_2__	compact             	
-idx_comment         	show_idx_full       	value2              	default__show_idx_full_idx_comment__	compact             	index comment       
-idx_compound        	show_idx_full       	key, value1         	default__show_idx_full_idx_compound__	compact             	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-14-9d121385c5ab32d659dc7d0374ae8d6e b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-14-9d121385c5ab32d659dc7d0374ae8d6e
deleted file mode 100644
index 94ee57be9dcaf..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-14-9d121385c5ab32d659dc7d0374ae8d6e
+++ /dev/null
@@ -1,16 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWINDEXES show_idx_empty)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Index Operator:
-        Show Indexes
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-15-b032f4869c43d1278a890328d0591d5d b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-15-b032f4869c43d1278a890328d0591d5d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-16-8b2dc53c795584e1e23a8d631c82b43f b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-16-8b2dc53c795584e1e23a8d631c82b43f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-17-c93fd07893f47b712165725c78d95555 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-17-c93fd07893f47b712165725c78d95555
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-18-9acff7f5096cdafc92aa2ddb0f296f83 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-18-9acff7f5096cdafc92aa2ddb0f296f83
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-19-3f42728fb0083beb962f07c43dd9c9b7 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-19-3f42728fb0083beb962f07c43dd9c9b7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-2-e90c14f9edaeab8cf4540e9a35b11546 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-2-e90c14f9edaeab8cf4540e9a35b11546
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-3-aa5935155586821fb35e17156c8d8460 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-3-aa5935155586821fb35e17156c8d8460
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-4-6eb587e2751942de625c9229872ca0dc b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-4-6eb587e2751942de625c9229872ca0dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-5-ad307c2c7edb4bb2e6c34ef1eb7b47f9 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-5-ad307c2c7edb4bb2e6c34ef1eb7b47f9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-6-ae97a64481efe733a19007ed400925bc b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-6-ae97a64481efe733a19007ed400925bc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-7-34016fb262ce768de45ec1b7693fd6c8 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-7-34016fb262ce768de45ec1b7693fd6c8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-8-16d39297488db165145e1546c4cb222c b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-8-16d39297488db165145e1546c4cb222c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-9-fba02256147a1a753d40f56825449471 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-9-fba02256147a1a753d40f56825449471
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-0-b6a94a6f5b3938d53ddf419ff97a87ec b/sql/hive/src/test/resources/golden/show_indexes_syntax-0-b6a94a6f5b3938d53ddf419ff97a87ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-1-fe3da04846e702e0bbe22e05b136a3b3 b/sql/hive/src/test/resources/golden/show_indexes_syntax-1-fe3da04846e702e0bbe22e05b136a3b3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-10-b6a94a6f5b3938d53ddf419ff97a87ec b/sql/hive/src/test/resources/golden/show_indexes_syntax-10-b6a94a6f5b3938d53ddf419ff97a87ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-2-8207d7ca034ed62b9fb2c727497c22b3 b/sql/hive/src/test/resources/golden/show_indexes_syntax-2-8207d7ca034ed62b9fb2c727497c22b3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-3-d687cc8f5d39bfbad3537a6e92788bb7 b/sql/hive/src/test/resources/golden/show_indexes_syntax-3-d687cc8f5d39bfbad3537a6e92788bb7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-4-187e7dfb63d6b70982db8b4ddeb82ed7 b/sql/hive/src/test/resources/golden/show_indexes_syntax-4-187e7dfb63d6b70982db8b4ddeb82ed7
deleted file mode 100644
index 7e68a8acb1a87..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-4-187e7dfb63d6b70982db8b4ddeb82ed7
+++ /dev/null
@@ -1,16 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWINDEXES show_idx_t1)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Index Operator:
-        Show Indexes
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-5-47d09f8540512a5f3e21a3e1d4fd2d49 b/sql/hive/src/test/resources/golden/show_indexes_syntax-5-47d09f8540512a5f3e21a3e1d4fd2d49
deleted file mode 100644
index 36d22451eba3e..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-5-47d09f8540512a5f3e21a3e1d4fd2d49
+++ /dev/null
@@ -1 +0,0 @@
-idx_t1              	show_idx_t1         	key                 	default__show_idx_t1_idx_t1__	compact             	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-6-de64752733e0dcae32e692d2ad38e3d4 b/sql/hive/src/test/resources/golden/show_indexes_syntax-6-de64752733e0dcae32e692d2ad38e3d4
deleted file mode 100644
index 7e68a8acb1a87..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-6-de64752733e0dcae32e692d2ad38e3d4
+++ /dev/null
@@ -1,16 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWINDEXES show_idx_t1)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Index Operator:
-        Show Indexes
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-7-991839c8e50c5b4e490ec7faf2228d58 b/sql/hive/src/test/resources/golden/show_indexes_syntax-7-991839c8e50c5b4e490ec7faf2228d58
deleted file mode 100644
index 36d22451eba3e..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-7-991839c8e50c5b4e490ec7faf2228d58
+++ /dev/null
@@ -1 +0,0 @@
-idx_t1              	show_idx_t1         	key                 	default__show_idx_t1_idx_t1__	compact             	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-8-8c10f994f51bce851ecb0acee65ced7e b/sql/hive/src/test/resources/golden/show_indexes_syntax-8-8c10f994f51bce851ecb0acee65ced7e
deleted file mode 100644
index 4dddeee9a233f..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-8-8c10f994f51bce851ecb0acee65ced7e
+++ /dev/null
@@ -1,16 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWINDEXES show_idx_t1 FORMATTED)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Index Operator:
-        Show Indexes
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-9-f8385127c6405a2c9e48b5988184b515 b/sql/hive/src/test/resources/golden/show_indexes_syntax-9-f8385127c6405a2c9e48b5988184b515
deleted file mode 100644
index 76e0434294b4f..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-9-f8385127c6405a2c9e48b5988184b515
+++ /dev/null
@@ -1,4 +0,0 @@
-idx_name            	tab_name            	col_names           	idx_tab_name        	idx_type            	comment             
-	 	 	 	 	 
-	 	 	 	 	 
-idx_t1              	show_idx_t1         	key                 	default__show_idx_t1_idx_t1__	compact             	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_partitions-0-73d0fdcea0bd9b828cbc3c2e88acb51a b/sql/hive/src/test/resources/golden/show_partitions-0-73d0fdcea0bd9b828cbc3c2e88acb51a
index e9c723bbd136e..8c43153cf66f9 100644
--- a/sql/hive/src/test/resources/golden/show_partitions-0-73d0fdcea0bd9b828cbc3c2e88acb51a
+++ b/sql/hive/src/test/resources/golden/show_partitions-0-73d0fdcea0bd9b828cbc3c2e88acb51a
@@ -1,4 +1,4 @@
 ds=2008-04-08/hr=11
 ds=2008-04-08/hr=12
 ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
+ds=2008-04-09/hr=12
diff --git a/sql/hive/src/test/resources/golden/stats4-2-463330cf55370dbe92d6ed74ef91302 b/sql/hive/src/test/resources/golden/show_partitions-1-e69b801a3c6c5f6692050bcdb0e31db9
similarity index 100%
rename from sql/hive/src/test/resources/golden/stats4-2-463330cf55370dbe92d6ed74ef91302
rename to sql/hive/src/test/resources/golden/show_partitions-1-e69b801a3c6c5f6692050bcdb0e31db9
diff --git a/sql/hive/src/test/resources/golden/show_partitions-1-e94d4100cb64c67f1127b4e255d28ae0 b/sql/hive/src/test/resources/golden/show_partitions-1-e94d4100cb64c67f1127b4e255d28ae0
deleted file mode 100644
index 19b4a62499762..0000000000000
--- a/sql/hive/src/test/resources/golden/show_partitions-1-e94d4100cb64c67f1127b4e255d28ae0
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=2008-04-08/hr=11
-ds=2008-04-09/hr=11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_partitions-2-a1bde7c2c040b4d45ddceac9983c2ca b/sql/hive/src/test/resources/golden/show_partitions-2-a1bde7c2c040b4d45ddceac9983c2ca
deleted file mode 100644
index f3614273fa8fa..0000000000000
--- a/sql/hive/src/test/resources/golden/show_partitions-2-a1bde7c2c040b4d45ddceac9983c2ca
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=2008-04-08/hr=11
-ds=2008-04-08/hr=12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_partitions-2-e94d4100cb64c67f1127b4e255d28ae0 b/sql/hive/src/test/resources/golden/show_partitions-2-e94d4100cb64c67f1127b4e255d28ae0
new file mode 100644
index 0000000000000..8b3fd053b6fb6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_partitions-2-e94d4100cb64c67f1127b4e255d28ae0
@@ -0,0 +1,2 @@
+ds=2008-04-08/hr=11
+ds=2008-04-09/hr=11
diff --git a/sql/hive/src/test/resources/golden/show_partitions-3-9e3f80cb6ed9883c715ea8aa3f391d70 b/sql/hive/src/test/resources/golden/show_partitions-3-9e3f80cb6ed9883c715ea8aa3f391d70
deleted file mode 100644
index 0cdd3e8594c59..0000000000000
--- a/sql/hive/src/test/resources/golden/show_partitions-3-9e3f80cb6ed9883c715ea8aa3f391d70
+++ /dev/null
@@ -1 +0,0 @@
-ds=2008-04-08/hr=12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats4-11-ea921e0af59a4940a11c94143b1c4b32 b/sql/hive/src/test/resources/golden/show_partitions-3-a1bde7c2c040b4d45ddceac9983c2ca
similarity index 100%
rename from sql/hive/src/test/resources/golden/stats4-11-ea921e0af59a4940a11c94143b1c4b32
rename to sql/hive/src/test/resources/golden/show_partitions-3-a1bde7c2c040b4d45ddceac9983c2ca
diff --git a/sql/hive/src/test/resources/golden/show_partitions-4-9e3f80cb6ed9883c715ea8aa3f391d70 b/sql/hive/src/test/resources/golden/show_partitions-4-9e3f80cb6ed9883c715ea8aa3f391d70
new file mode 100644
index 0000000000000..dbd11ad78405b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_partitions-4-9e3f80cb6ed9883c715ea8aa3f391d70
@@ -0,0 +1 @@
+ds=2008-04-08/hr=12
diff --git a/sql/hive/src/test/resources/golden/show_tables-0-679cc07f8475a20b49927a5bbbd3d702 b/sql/hive/src/test/resources/golden/show_tables-0-679cc07f8475a20b49927a5bbbd3d702
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-1-ac1c8cca812353544d3f7dead5d033ce b/sql/hive/src/test/resources/golden/show_tables-1-ac1c8cca812353544d3f7dead5d033ce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-10-643b24446d74450c0f83144b1d0ec433 b/sql/hive/src/test/resources/golden/show_tables-10-643b24446d74450c0f83144b1d0ec433
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-11-3f9a7f993510123059493826470f78f7 b/sql/hive/src/test/resources/golden/show_tables-11-3f9a7f993510123059493826470f78f7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-12-c31d2f4105ec3714cfc55eef68d3f60c b/sql/hive/src/test/resources/golden/show_tables-12-c31d2f4105ec3714cfc55eef68d3f60c
deleted file mode 100644
index 60f7943eda4a9..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-12-c31d2f4105ec3714cfc55eef68d3f60c
+++ /dev/null
@@ -1,3 +0,0 @@
-bar
-baz
-foo
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-13-f72d7ab6f3f04161ce2b8f8332244657 b/sql/hive/src/test/resources/golden/show_tables-13-f72d7ab6f3f04161ce2b8f8332244657
deleted file mode 100644
index 4ffc580e2b8f3..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-13-f72d7ab6f3f04161ce2b8f8332244657
+++ /dev/null
@@ -1,4 +0,0 @@
-shtb_test1
-shtb_test2
-src
-srcpart
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-14-26ca9b5509544ebac182d8aa4264ff1c b/sql/hive/src/test/resources/golden/show_tables-14-26ca9b5509544ebac182d8aa4264ff1c
deleted file mode 100644
index 60f7943eda4a9..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-14-26ca9b5509544ebac182d8aa4264ff1c
+++ /dev/null
@@ -1,3 +0,0 @@
-bar
-baz
-foo
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-15-72a95694f749cb3d5760a69083e9cafe b/sql/hive/src/test/resources/golden/show_tables-15-72a95694f749cb3d5760a69083e9cafe
deleted file mode 100644
index 4ffc580e2b8f3..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-15-72a95694f749cb3d5760a69083e9cafe
+++ /dev/null
@@ -1,4 +0,0 @@
-shtb_test1
-shtb_test2
-src
-srcpart
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-16-dfd802554b6764b3105b6fd8dbb8e173 b/sql/hive/src/test/resources/golden/show_tables-16-dfd802554b6764b3105b6fd8dbb8e173
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-17-49777c49d2627373ed5e459c4848c9ab b/sql/hive/src/test/resources/golden/show_tables-17-49777c49d2627373ed5e459c4848c9ab
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-18-c22c5af6ef0bcb03fdafe3c4df34ec93 b/sql/hive/src/test/resources/golden/show_tables-18-c22c5af6ef0bcb03fdafe3c4df34ec93
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-19-695a68c82308540eba1d0a04e032cf39 b/sql/hive/src/test/resources/golden/show_tables-19-695a68c82308540eba1d0a04e032cf39
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-2-c96604d76bcb3721d5e5a327cac0d5e5 b/sql/hive/src/test/resources/golden/show_tables-2-c96604d76bcb3721d5e5a327cac0d5e5
deleted file mode 100644
index 916ac1482c061..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-2-c96604d76bcb3721d5e5a327cac0d5e5
+++ /dev/null
@@ -1,18 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWTABLES 'shtb_*')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Table Operator:
-        Show Tables
-          database name: default
-          pattern: shtb_*
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_tables-20-691b4e6664e6d435233ea4e8c3b585d5 b/sql/hive/src/test/resources/golden/show_tables-20-691b4e6664e6d435233ea4e8c3b585d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-21-7a9e67189d3d4151f23b12c22bde06b5 b/sql/hive/src/test/resources/golden/show_tables-21-7a9e67189d3d4151f23b12c22bde06b5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-22-274454ebeb7f98690a3c152617a0e391 b/sql/hive/src/test/resources/golden/show_tables-22-274454ebeb7f98690a3c152617a0e391
deleted file mode 100644
index 19102815663d2..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-22-274454ebeb7f98690a3c152617a0e391
+++ /dev/null
@@ -1 +0,0 @@
-foo
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-3-a56f51be557c6f636f99fafdbbbbcd9c b/sql/hive/src/test/resources/golden/show_tables-3-a56f51be557c6f636f99fafdbbbbcd9c
deleted file mode 100644
index b67b816ee4b45..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-3-a56f51be557c6f636f99fafdbbbbcd9c
+++ /dev/null
@@ -1,2 +0,0 @@
-shtb_test1
-shtb_test2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-4-743d585ec3da5fbb927581cd0683ae35 b/sql/hive/src/test/resources/golden/show_tables-4-743d585ec3da5fbb927581cd0683ae35
deleted file mode 100644
index 8f06e234b2a6e..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-4-743d585ec3da5fbb927581cd0683ae35
+++ /dev/null
@@ -1,18 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWTABLES 'shtb_test1|shtb_test2')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Table Operator:
-        Show Tables
-          database name: default
-          pattern: shtb_test1|shtb_test2
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_tables-5-c685b358b604bf3ef980a78d9178d87d b/sql/hive/src/test/resources/golden/show_tables-5-c685b358b604bf3ef980a78d9178d87d
deleted file mode 100644
index b67b816ee4b45..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-5-c685b358b604bf3ef980a78d9178d87d
+++ /dev/null
@@ -1,2 +0,0 @@
-shtb_test1
-shtb_test2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-6-1086ed68a5cf2540a72b3e949b9ea5f8 b/sql/hive/src/test/resources/golden/show_tables-6-1086ed68a5cf2540a72b3e949b9ea5f8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-7-a62fc229d241303bffb29b34ad125f8c b/sql/hive/src/test/resources/golden/show_tables-7-a62fc229d241303bffb29b34ad125f8c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-8-691b4e6664e6d435233ea4e8c3b585d5 b/sql/hive/src/test/resources/golden/show_tables-8-691b4e6664e6d435233ea4e8c3b585d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-9-64c9bf0618541518f2ba30ec24a94423 b/sql/hive/src/test/resources/golden/show_tables-9-64c9bf0618541518f2ba30ec24a94423
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-0-4fa957197c8678b0a3a64d8f4f6da1fa b/sql/hive/src/test/resources/golden/show_tablestatus-0-4fa957197c8678b0a3a64d8f4f6da1fa
deleted file mode 100644
index bec424bb026e9..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-0-4fa957197c8678b0a3a64d8f4f6da1fa
+++ /dev/null
@@ -1,14 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOW_TABLESTATUS `src` default)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-1-4c31924711bdb64603a14ce57da86ab7 b/sql/hive/src/test/resources/golden/show_tablestatus-1-4c31924711bdb64603a14ce57da86ab7
deleted file mode 100644
index 9392b7dc686f6..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-1-4c31924711bdb64603a14ce57da86ab7
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:src
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/src
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389733248000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-2-ecddce523f2af516700677a051581330 b/sql/hive/src/test/resources/golden/show_tablestatus-2-ecddce523f2af516700677a051581330
deleted file mode 100644
index 9392b7dc686f6..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-2-ecddce523f2af516700677a051581330
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:src
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/src
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389733248000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-3-dccfbc8b5a223a9fe47120ca771ee61d b/sql/hive/src/test/resources/golden/show_tablestatus-3-dccfbc8b5a223a9fe47120ca771ee61d
deleted file mode 100644
index 9392b7dc686f6..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-3-dccfbc8b5a223a9fe47120ca771ee61d
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:src
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/src
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389733248000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-4-5208ae4e509cb7f10dd4e0d29b5ab346 b/sql/hive/src/test/resources/golden/show_tablestatus-4-5208ae4e509cb7f10dd4e0d29b5ab346
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-5-5bd2196d71aa5308a5d01d9e9df3f59 b/sql/hive/src/test/resources/golden/show_tablestatus-5-5bd2196d71aa5308a5d01d9e9df3f59
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-6-d5e2f1950de4bf0ff77b7c66cddf3eb8 b/sql/hive/src/test/resources/golden/show_tablestatus-6-d5e2f1950de4bf0ff77b7c66cddf3eb8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-7-f7b9148c16045269344c5d74fb8a449c b/sql/hive/src/test/resources/golden/show_tablestatus-7-f7b9148c16045269344c5d74fb8a449c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-8-1cd5acb4091d916e5e18948a39979b51 b/sql/hive/src/test/resources/golden/show_tablestatus-8-1cd5acb4091d916e5e18948a39979b51
deleted file mode 100644
index f8b64f6056ea7..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-8-1cd5acb4091d916e5e18948a39979b51
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:srcpart
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/srcpart/ds=2008-04-08/hr=11
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389733249000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-9-e3cc1823000abb51d2635e0c824e81a9 b/sql/hive/src/test/resources/golden/show_tablestatus-9-e3cc1823000abb51d2635e0c824e81a9
deleted file mode 100644
index 9392b7dc686f6..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-9-e3cc1823000abb51d2635e0c824e81a9
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:src
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/src
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389733248000
diff --git a/sql/hive/src/test/resources/golden/index_creation-1-a8bc76559014d9cdf07184208d582d25 b/sql/hive/src/test/resources/golden/show_tblproperties-0-ca75bef7d151a44b6a89dd92333eb12a
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-1-a8bc76559014d9cdf07184208d582d25
rename to sql/hive/src/test/resources/golden/show_tblproperties-0-ca75bef7d151a44b6a89dd92333eb12a
diff --git a/sql/hive/src/test/resources/golden/show_tblproperties-1-be4adb893c7f946ebd76a648ce3cc1ae b/sql/hive/src/test/resources/golden/show_tblproperties-1-be4adb893c7f946ebd76a648ce3cc1ae
new file mode 100644
index 0000000000000..0f6cc6f44f1f7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_tblproperties-1-be4adb893c7f946ebd76a648ce3cc1ae
@@ -0,0 +1 @@
+Table tmpfoo does not have property: bar	 
diff --git a/sql/hive/src/test/resources/golden/index_creation-11-b5b7e4f7af5186033be12a4393dc3bb7 b/sql/hive/src/test/resources/golden/show_tblproperties-2-7c7993eea8e41cf095afae07772cc16e
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-11-b5b7e4f7af5186033be12a4393dc3bb7
rename to sql/hive/src/test/resources/golden/show_tblproperties-2-7c7993eea8e41cf095afae07772cc16e
diff --git a/sql/hive/src/test/resources/golden/index_creation-13-9c0ec67e79a0d50b46bd5c944c710fc4 b/sql/hive/src/test/resources/golden/show_tblproperties-3-2b4b8c43ef08bdb418405917d475ac1d
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-13-9c0ec67e79a0d50b46bd5c944c710fc4
rename to sql/hive/src/test/resources/golden/show_tblproperties-3-2b4b8c43ef08bdb418405917d475ac1d
diff --git a/sql/hive/src/test/resources/golden/show_tblproperties-4-6c63215ea599f6533666c4d70606b139 b/sql/hive/src/test/resources/golden/show_tblproperties-4-6c63215ea599f6533666c4d70606b139
new file mode 100644
index 0000000000000..d882eea53ca3c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_tblproperties-4-6c63215ea599f6533666c4d70606b139
@@ -0,0 +1,10 @@
+numFiles	0
+last_modified_by	marmbrus
+last_modified_time	1413891337
+tmp	true
+transient_lastDdlTime	1413891337
+COLUMN_STATS_ACCURATE	false
+totalSize	0
+numRows	-1
+bar	bar value
+rawDataSize	-1
diff --git a/sql/hive/src/test/resources/golden/show_tblproperties-5-be4adb893c7f946ebd76a648ce3cc1ae b/sql/hive/src/test/resources/golden/show_tblproperties-5-be4adb893c7f946ebd76a648ce3cc1ae
new file mode 100644
index 0000000000000..37214958dafe5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_tblproperties-5-be4adb893c7f946ebd76a648ce3cc1ae
@@ -0,0 +1 @@
+bar value	 
diff --git a/sql/hive/src/test/resources/golden/index_creation-15-7d7e1a479e7dcd8f9d4199775e05bc1 b/sql/hive/src/test/resources/golden/show_tblproperties-6-9dd8d67460f558955d96a107ca996ad
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-15-7d7e1a479e7dcd8f9d4199775e05bc1
rename to sql/hive/src/test/resources/golden/show_tblproperties-6-9dd8d67460f558955d96a107ca996ad
diff --git a/sql/hive/src/test/resources/golden/showparts-0-593619bb962b318b82896658deaea1f1 b/sql/hive/src/test/resources/golden/showparts-0-593619bb962b318b82896658deaea1f1
deleted file mode 100644
index b590724bca78d..0000000000000
--- a/sql/hive/src/test/resources/golden/showparts-0-593619bb962b318b82896658deaea1f1
+++ /dev/null
@@ -1,17 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWPARTITIONS srcpart)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Partitions Operator:
-        Show Partitions
-          table: srcpart
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/showparts-1-73d0fdcea0bd9b828cbc3c2e88acb51a b/sql/hive/src/test/resources/golden/showparts-1-73d0fdcea0bd9b828cbc3c2e88acb51a
deleted file mode 100644
index e9c723bbd136e..0000000000000
--- a/sql/hive/src/test/resources/golden/showparts-1-73d0fdcea0bd9b828cbc3c2e88acb51a
+++ /dev/null
@@ -1,4 +0,0 @@
-ds=2008-04-08/hr=11
-ds=2008-04-08/hr=12
-ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat17-1-df4a3bf439eadc5ada3303a9b5e6aa46 b/sql/hive/src/test/resources/golden/single case-0-c264e319c52f1840a32959d552b99e73
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat17-1-df4a3bf439eadc5ada3303a9b5e6aa46
rename to sql/hive/src/test/resources/golden/single case-0-c264e319c52f1840a32959d552b99e73
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-10-fa00cf008a039908eec64ad5dd415c5b b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-10-fa00cf008a039908eec64ad5dd415c5b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-11-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-11-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-12-dd683e148baed6b27642eebacaa87a4f b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-12-dd683e148baed6b27642eebacaa87a4f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-8-daf10744f465e055b35809a528135370 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-8-daf10744f465e055b35809a528135370
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-9-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-9-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-10-bebf0a312f3110d0b518153543030f06 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-10-bebf0a312f3110d0b518153543030f06
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-11-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-11-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-12-fa53198d9eecb9d274b09d4351b9274e b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-12-fa53198d9eecb9d274b09d4351b9274e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-13-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-13-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-14-bf8bd6dbf9485c05f8fd4f84e2530724 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-14-bf8bd6dbf9485c05f8fd4f84e2530724
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-3-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-3-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-3-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-4-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-4-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-4-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-5-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-5-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-6-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-6-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-8-c64266431d312784ebc7b9ca07ab5188 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-8-c64266431d312784ebc7b9ca07ab5188
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-9-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-9-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt1-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt1-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt1-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt1-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt1-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt1-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-3-9669bca0e2da11221b2e9eb21322e0c6 b/sql/hive/src/test/resources/golden/skewjoinopt1-3-9669bca0e2da11221b2e9eb21322e0c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt1-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-5-fa00cf008a039908eec64ad5dd415c5b b/sql/hive/src/test/resources/golden/skewjoinopt1-5-fa00cf008a039908eec64ad5dd415c5b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt1-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-7-f66c3935651d3cc5fef7d0284e123614 b/sql/hive/src/test/resources/golden/skewjoinopt1-7-f66c3935651d3cc5fef7d0284e123614
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt10-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt10-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt10-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt10-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt10-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt10-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-3-27fc8f7d7456a761e1d0c2c075b84dc6 b/sql/hive/src/test/resources/golden/skewjoinopt10-3-27fc8f7d7456a761e1d0c2c075b84dc6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt10-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-5-9abe9cb11e3336a689a4116f8804c02a b/sql/hive/src/test/resources/golden/skewjoinopt10-5-9abe9cb11e3336a689a4116f8804c02a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-6-bc7008e74f5eccab48d820877d83e7e4 b/sql/hive/src/test/resources/golden/skewjoinopt10-6-bc7008e74f5eccab48d820877d83e7e4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-7-f4015c993efd5fc155e2faec784718d0 b/sql/hive/src/test/resources/golden/skewjoinopt10-7-f4015c993efd5fc155e2faec784718d0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-8-c9624d1650d395f18d9e510cab4fab79 b/sql/hive/src/test/resources/golden/skewjoinopt10-8-c9624d1650d395f18d9e510cab4fab79
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt11-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt11-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt11-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt11-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt11-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt11-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-3-a079ede4f4245e62b02f624efedfb597 b/sql/hive/src/test/resources/golden/skewjoinopt11-3-a079ede4f4245e62b02f624efedfb597
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt11-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-5-744a018c78bae6e09853dd202981e850 b/sql/hive/src/test/resources/golden/skewjoinopt11-5-744a018c78bae6e09853dd202981e850
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt11-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-7-9e7e6016590d33c617cb568cbd45ef68 b/sql/hive/src/test/resources/golden/skewjoinopt11-7-9e7e6016590d33c617cb568cbd45ef68
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt12-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt12-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt12-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt12-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt12-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt12-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-3-da45999e75a09b27161af2c7414c1170 b/sql/hive/src/test/resources/golden/skewjoinopt12-3-da45999e75a09b27161af2c7414c1170
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt12-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-5-e6db5d1ec4694475ab0b8f43eba415cd b/sql/hive/src/test/resources/golden/skewjoinopt12-5-e6db5d1ec4694475ab0b8f43eba415cd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt12-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-7-8bbc680be8a68053db008789f335c2f0 b/sql/hive/src/test/resources/golden/skewjoinopt12-7-8bbc680be8a68053db008789f335c2f0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt13-0-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt13-0-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/skewjoinopt13-0-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt13-1-16a1f74642d7ea4dac66a5ce15469c22
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt13-1-16a1f74642d7ea4dac66a5ce15469c22
+++ b/sql/hive/src/test/resources/golden/skewjoinopt13-1-16a1f74642d7ea4dac66a5ce15469c22
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-10-b81a7fa7b6158dd4d77fa4c62db1f223 b/sql/hive/src/test/resources/golden/skewjoinopt13-10-b81a7fa7b6158dd4d77fa4c62db1f223
index 872146532307a..19304c010452e 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt13-10-b81a7fa7b6158dd4d77fa4c62db1f223
+++ b/sql/hive/src/test/resources/golden/skewjoinopt13-10-b81a7fa7b6158dd4d77fa4c62db1f223
@@ -1 +1 @@
-2	12	2	22	2	12
\ No newline at end of file
+2	12	2	22	2	12
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt13-2-114600d46ae86edcb66a500b4cac657d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt13-2-114600d46ae86edcb66a500b4cac657d
+++ b/sql/hive/src/test/resources/golden/skewjoinopt13-2-114600d46ae86edcb66a500b4cac657d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt13-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-17-18ed0b70b0b6d076b96c9f73bfa721ad b/sql/hive/src/test/resources/golden/skewjoinopt13-4-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-17-18ed0b70b0b6d076b96c9f73bfa721ad
rename to sql/hive/src/test/resources/golden/skewjoinopt13-4-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt13-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-19-98dbf83283f9e073e88ba770ec5a707f b/sql/hive/src/test/resources/golden/skewjoinopt13-6-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-19-98dbf83283f9e073e88ba770ec5a707f
rename to sql/hive/src/test/resources/golden/skewjoinopt13-6-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-8-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/skewjoinopt13-8-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-2-9d5d11cb38f2e097f16d2db5693f4f1 b/sql/hive/src/test/resources/golden/skewjoinopt13-8-8eb53fb8f05a43ee377aa1c927857e7c
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-2-9d5d11cb38f2e097f16d2db5693f4f1
rename to sql/hive/src/test/resources/golden/skewjoinopt13-8-8eb53fb8f05a43ee377aa1c927857e7c
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt14-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt14-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt14-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt14-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt14-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt14-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-3-9669bca0e2da11221b2e9eb21322e0c6 b/sql/hive/src/test/resources/golden/skewjoinopt14-3-9669bca0e2da11221b2e9eb21322e0c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt14-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-5-744a018c78bae6e09853dd202981e850 b/sql/hive/src/test/resources/golden/skewjoinopt14-5-744a018c78bae6e09853dd202981e850
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt14-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-7-c329b937ad1d7cf1c838640ef5f4d135 b/sql/hive/src/test/resources/golden/skewjoinopt14-7-c329b937ad1d7cf1c838640ef5f4d135
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-8-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/skewjoinopt14-8-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-9-cdf19a17f3295447b66e6e6445742a74 b/sql/hive/src/test/resources/golden/skewjoinopt14-9-cdf19a17f3295447b66e6e6445742a74
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt15-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt15-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt15-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt15-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-10-7df9fe6075a96bf9849848c93e449469 b/sql/hive/src/test/resources/golden/skewjoinopt15-10-7df9fe6075a96bf9849848c93e449469
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-11-3f5ba247cb51c79bacdd56ef3ecbb601 b/sql/hive/src/test/resources/golden/skewjoinopt15-11-3f5ba247cb51c79bacdd56ef3ecbb601
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt15-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt15-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-3-32fed3a53e7c15b549a71c0e71d93484 b/sql/hive/src/test/resources/golden/skewjoinopt15-3-32fed3a53e7c15b549a71c0e71d93484
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-4-27dc133d5ad9806d0c8ff0ebf8f9a469 b/sql/hive/src/test/resources/golden/skewjoinopt15-4-27dc133d5ad9806d0c8ff0ebf8f9a469
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-5-dff9d122eb83760f08d1d77814c24c91 b/sql/hive/src/test/resources/golden/skewjoinopt15-5-dff9d122eb83760f08d1d77814c24c91
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-6-717b85f496a5cf006cb352f9d884608d b/sql/hive/src/test/resources/golden/skewjoinopt15-6-717b85f496a5cf006cb352f9d884608d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-7-ba43a86694107dd4cb754d676935408 b/sql/hive/src/test/resources/golden/skewjoinopt15-7-ba43a86694107dd4cb754d676935408
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-8-7381c1f36c997afac91d8f8f29e865f3 b/sql/hive/src/test/resources/golden/skewjoinopt15-8-7381c1f36c997afac91d8f8f29e865f3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-9-ccb54093d518eaca1e4644686e9e776e b/sql/hive/src/test/resources/golden/skewjoinopt15-9-ccb54093d518eaca1e4644686e9e776e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt16-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt16-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt16-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt16-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt16-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt16-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-3-25f94adcba19b899d1db3af93ea1c95b b/sql/hive/src/test/resources/golden/skewjoinopt16-3-25f94adcba19b899d1db3af93ea1c95b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt16-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-5-fa00cf008a039908eec64ad5dd415c5b b/sql/hive/src/test/resources/golden/skewjoinopt16-5-fa00cf008a039908eec64ad5dd415c5b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt16-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-7-b3208400a48223a410b76a4bd1362da5 b/sql/hive/src/test/resources/golden/skewjoinopt16-7-b3208400a48223a410b76a4bd1362da5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt17-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt17-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt17-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt17-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt17-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt17-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-3-25f94adcba19b899d1db3af93ea1c95b b/sql/hive/src/test/resources/golden/skewjoinopt17-3-25f94adcba19b899d1db3af93ea1c95b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt17-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-5-2e4b9b1d820a7ef31c51bd4fd2cc28f3 b/sql/hive/src/test/resources/golden/skewjoinopt17-5-2e4b9b1d820a7ef31c51bd4fd2cc28f3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt17-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-7-373b8a6363139ca37395b1cc8c9808d9 b/sql/hive/src/test/resources/golden/skewjoinopt17-7-373b8a6363139ca37395b1cc8c9808d9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt18-0-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt18-0-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/skewjoinopt18-0-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt18-1-16a1f74642d7ea4dac66a5ce15469c22
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt18-1-16a1f74642d7ea4dac66a5ce15469c22
+++ b/sql/hive/src/test/resources/golden/skewjoinopt18-1-16a1f74642d7ea4dac66a5ce15469c22
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-10-8c180ce74ca1ac0eefab2d70d38e44fa b/sql/hive/src/test/resources/golden/skewjoinopt18-10-8c180ce74ca1ac0eefab2d70d38e44fa
index ee1bb6b112381..6ca70c5267e65 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt18-10-8c180ce74ca1ac0eefab2d70d38e44fa
+++ b/sql/hive/src/test/resources/golden/skewjoinopt18-10-8c180ce74ca1ac0eefab2d70d38e44fa
@@ -3,4 +3,4 @@
 8	18	8	18
 8	18	8	18
 8	28	8	18
-8	28	8	18
\ No newline at end of file
+8	28	8	18
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt18-2-114600d46ae86edcb66a500b4cac657d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt18-2-114600d46ae86edcb66a500b4cac657d
+++ b/sql/hive/src/test/resources/golden/skewjoinopt18-2-114600d46ae86edcb66a500b4cac657d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-4-27dc133d5ad9806d0c8ff0ebf8f9a469 b/sql/hive/src/test/resources/golden/skewjoinopt18-4-27dc133d5ad9806d0c8ff0ebf8f9a469
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-21-4c8f6b48c437bf0be109fc0be1dc840e b/sql/hive/src/test/resources/golden/skewjoinopt18-4-abf4b7f158999af331d5dbfddf32fa68
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-21-4c8f6b48c437bf0be109fc0be1dc840e
rename to sql/hive/src/test/resources/golden/skewjoinopt18-4-abf4b7f158999af331d5dbfddf32fa68
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-8-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt18-8-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-23-e7f21f556d3aa2bedb0717a167720741 b/sql/hive/src/test/resources/golden/skewjoinopt18-8-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-23-e7f21f556d3aa2bedb0717a167720741
rename to sql/hive/src/test/resources/golden/skewjoinopt18-8-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt19-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt19-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt19-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt19-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt19-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt19-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-3-6eda8acf464a18cfd9909255ddcef37e b/sql/hive/src/test/resources/golden/skewjoinopt19-3-6eda8acf464a18cfd9909255ddcef37e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt19-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-5-744a018c78bae6e09853dd202981e850 b/sql/hive/src/test/resources/golden/skewjoinopt19-5-744a018c78bae6e09853dd202981e850
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt19-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-7-1e9c17669089eea559f8fa0b6977b249 b/sql/hive/src/test/resources/golden/skewjoinopt19-7-1e9c17669089eea559f8fa0b6977b249
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt2-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt2-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt2-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt2-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt2-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt2-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-3-52247b4dd98092bf829254e17424657d b/sql/hive/src/test/resources/golden/skewjoinopt2-3-52247b4dd98092bf829254e17424657d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt2-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-5-bebf0a312f3110d0b518153543030f06 b/sql/hive/src/test/resources/golden/skewjoinopt2-5-bebf0a312f3110d0b518153543030f06
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt2-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-7-2a2e8dfb78c2dfcba51a4cf91da25ae4 b/sql/hive/src/test/resources/golden/skewjoinopt2-7-2a2e8dfb78c2dfcba51a4cf91da25ae4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt20-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt20-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt20-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt20-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt20-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt20-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-3-4420d88e35aa84327bc95153eed299e0 b/sql/hive/src/test/resources/golden/skewjoinopt20-3-4420d88e35aa84327bc95153eed299e0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt20-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-5-744a018c78bae6e09853dd202981e850 b/sql/hive/src/test/resources/golden/skewjoinopt20-5-744a018c78bae6e09853dd202981e850
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt20-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-7-e209254ae404366e6adca673d666aecb b/sql/hive/src/test/resources/golden/skewjoinopt20-7-e209254ae404366e6adca673d666aecb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt3-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt3-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt3-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt3-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt3-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt3-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-3-c64266431d312784ebc7b9ca07ab5188 b/sql/hive/src/test/resources/golden/skewjoinopt3-3-c64266431d312784ebc7b9ca07ab5188
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt3-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-5-bebf0a312f3110d0b518153543030f06 b/sql/hive/src/test/resources/golden/skewjoinopt3-5-bebf0a312f3110d0b518153543030f06
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt3-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-7-e54bbab48fcc3c41806a101293577e9f b/sql/hive/src/test/resources/golden/skewjoinopt3-7-e54bbab48fcc3c41806a101293577e9f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt4-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt4-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt4-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt4-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt4-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt4-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-3-9669bca0e2da11221b2e9eb21322e0c6 b/sql/hive/src/test/resources/golden/skewjoinopt4-3-9669bca0e2da11221b2e9eb21322e0c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt4-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-5-744a018c78bae6e09853dd202981e850 b/sql/hive/src/test/resources/golden/skewjoinopt4-5-744a018c78bae6e09853dd202981e850
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt4-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-7-c7e2ccaba57a581f27cfdcca76891133 b/sql/hive/src/test/resources/golden/skewjoinopt4-7-c7e2ccaba57a581f27cfdcca76891133
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt5-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt5-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt5-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt5-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt5-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt5-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-3-25f94adcba19b899d1db3af93ea1c95b b/sql/hive/src/test/resources/golden/skewjoinopt5-3-25f94adcba19b899d1db3af93ea1c95b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt5-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-5-fa00cf008a039908eec64ad5dd415c5b b/sql/hive/src/test/resources/golden/skewjoinopt5-5-fa00cf008a039908eec64ad5dd415c5b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt5-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-7-f38878761b2aeeee0c04387dff60894d b/sql/hive/src/test/resources/golden/skewjoinopt5-7-f38878761b2aeeee0c04387dff60894d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt6-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt6-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt6-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt6-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt6-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt6-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-3-da45999e75a09b27161af2c7414c1170 b/sql/hive/src/test/resources/golden/skewjoinopt6-3-da45999e75a09b27161af2c7414c1170
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt6-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-5-e6db5d1ec4694475ab0b8f43eba415cd b/sql/hive/src/test/resources/golden/skewjoinopt6-5-e6db5d1ec4694475ab0b8f43eba415cd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt6-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-7-4dd78e79dc6ccab0cf472c8745d1f384 b/sql/hive/src/test/resources/golden/skewjoinopt6-7-4dd78e79dc6ccab0cf472c8745d1f384
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt7-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt7-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt7-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt7-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt7-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt7-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-3-c64266431d312784ebc7b9ca07ab5188 b/sql/hive/src/test/resources/golden/skewjoinopt7-3-c64266431d312784ebc7b9ca07ab5188
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt7-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-5-bebf0a312f3110d0b518153543030f06 b/sql/hive/src/test/resources/golden/skewjoinopt7-5-bebf0a312f3110d0b518153543030f06
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt7-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-7-fa53198d9eecb9d274b09d4351b9274e b/sql/hive/src/test/resources/golden/skewjoinopt7-7-fa53198d9eecb9d274b09d4351b9274e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-8-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/skewjoinopt7-8-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-9-b54d2a1f5d3bea81680ab06dead952c b/sql/hive/src/test/resources/golden/skewjoinopt7-9-b54d2a1f5d3bea81680ab06dead952c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt8-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt8-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt8-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt8-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt8-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt8-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-3-caf1c5fd299fdbdb655234d01d44caf2 b/sql/hive/src/test/resources/golden/skewjoinopt8-3-caf1c5fd299fdbdb655234d01d44caf2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt8-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-5-bebf0a312f3110d0b518153543030f06 b/sql/hive/src/test/resources/golden/skewjoinopt8-5-bebf0a312f3110d0b518153543030f06
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt8-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-7-fa53198d9eecb9d274b09d4351b9274e b/sql/hive/src/test/resources/golden/skewjoinopt8-7-fa53198d9eecb9d274b09d4351b9274e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-8-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/skewjoinopt8-8-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-9-9b26e8e33d3109e059e7494b53aee6fd b/sql/hive/src/test/resources/golden/skewjoinopt8-9-9b26e8e33d3109e059e7494b53aee6fd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt9-0-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt9-0-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/skewjoinopt9-0-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt9-1-16a1f74642d7ea4dac66a5ce15469c22
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt9-1-16a1f74642d7ea4dac66a5ce15469c22
+++ b/sql/hive/src/test/resources/golden/skewjoinopt9-1-16a1f74642d7ea4dac66a5ce15469c22
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-10-5c8be465ceef8151f172b82a13e81fa8 b/sql/hive/src/test/resources/golden/skewjoinopt9-10-5c8be465ceef8151f172b82a13e81fa8
index acd4039d35669..d3938a35d72f5 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt9-10-5c8be465ceef8151f172b82a13e81fa8
+++ b/sql/hive/src/test/resources/golden/skewjoinopt9-10-5c8be465ceef8151f172b82a13e81fa8
@@ -1,4 +1,4 @@
 2	1	2	22
 3	1	3	13
 8	2	8	18
-8	2	8	18
\ No newline at end of file
+8	2	8	18
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt9-2-114600d46ae86edcb66a500b4cac657d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt9-2-114600d46ae86edcb66a500b4cac657d
+++ b/sql/hive/src/test/resources/golden/skewjoinopt9-2-114600d46ae86edcb66a500b4cac657d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt9-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-25-33c48966230b934ae8ddf74ff18bb9ca b/sql/hive/src/test/resources/golden/skewjoinopt9-4-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-25-33c48966230b934ae8ddf74ff18bb9ca
rename to sql/hive/src/test/resources/golden/skewjoinopt9-4-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt9-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-26-f85db55b3f63ae186a1b6d5cec545939 b/sql/hive/src/test/resources/golden/skewjoinopt9-6-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-26-f85db55b3f63ae186a1b6d5cec545939
rename to sql/hive/src/test/resources/golden/skewjoinopt9-6-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-8-446c5e33062b109341add64a9860207d b/sql/hive/src/test/resources/golden/skewjoinopt9-8-446c5e33062b109341add64a9860207d
index f608d53f51ebe..826338ea56810 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt9-8-446c5e33062b109341add64a9860207d
+++ b/sql/hive/src/test/resources/golden/skewjoinopt9-8-446c5e33062b109341add64a9860207d
@@ -9,4 +9,4 @@
 8	28	8	18
 8	28	8	18
 8	28	8	18
-8	28	8	18
\ No newline at end of file
+8	28	8	18
diff --git a/sql/hive/src/test/resources/golden/small.cartesian-0-e3c7f62795b0e7c14b41b0dc29c47619 b/sql/hive/src/test/resources/golden/small.cartesian-0-e3c7f62795b0e7c14b41b0dc29c47619
index 7a442f02e8d7a..1179e20c2847c 100644
--- a/sql/hive/src/test/resources/golden/small.cartesian-0-e3c7f62795b0e7c14b41b0dc29c47619
+++ b/sql/hive/src/test/resources/golden/small.cartesian-0-e3c7f62795b0e7c14b41b0dc29c47619
@@ -1,3 +1,3 @@
 0	2
 0	2
-0	2
\ No newline at end of file
+0	2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-10-b1114520feaa15ad7621b6a0e571c244 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-10-b1114520feaa15ad7621b6a0e571c244
index 836ee718649ad..da83658b68646 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-10-b1114520feaa15ad7621b6a0e571c244
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-10-b1114520feaa15ad7621b6a0e571c244
@@ -1 +1 @@
-51	val_3	51	val_30
\ No newline at end of file
+51	val_3	51	val_30
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-12-8fb6fea440e845ce23b06deed8f519fa b/sql/hive/src/test/resources/golden/smb_mapjoin_1-12-8fb6fea440e845ce23b06deed8f519fa
index 8f358bffec51d..1ab49661a01f4 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-12-8fb6fea440e845ce23b06deed8f519fa
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-12-8fb6fea440e845ce23b06deed8f519fa
@@ -2,4 +2,4 @@
 51	val_3	51	val_30
 52	val_4	NULL	NULL
 53	val_5	NULL	NULL
-49	val_10	NULL	NULL
\ No newline at end of file
+49	val_10	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-14-b9e32ef49286a471ae2c4e7e66a2f5e1 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-14-b9e32ef49286a471ae2c4e7e66a2f5e1
index 6197cf72c3454..ba7969b99d79f 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-14-b9e32ef49286a471ae2c4e7e66a2f5e1
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-14-b9e32ef49286a471ae2c4e7e66a2f5e1
@@ -1,4 +1,4 @@
 NULL	NULL	50	val_20
 NULL	NULL	50	val_23
 NULL	NULL	50	val_25
-51	val_3	51	val_30
\ No newline at end of file
+51	val_3	51	val_30
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-16-c120e505c143721a36287bf992dbc1a1 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-16-c120e505c143721a36287bf992dbc1a1
index d650d44f42404..ad8b511265e20 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-16-c120e505c143721a36287bf992dbc1a1
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-16-c120e505c143721a36287bf992dbc1a1
@@ -5,4 +5,4 @@ NULL	NULL	50	val_23
 NULL	NULL	50	val_25
 51	val_3	51	val_30
 52	val_4	NULL	NULL
-53	val_5	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-18-bcd8f7a7a4a77c8d6a39e38b93e5c5a1 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-18-bcd8f7a7a4a77c8d6a39e38b93e5c5a1
index 836ee718649ad..da83658b68646 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-18-bcd8f7a7a4a77c8d6a39e38b93e5c5a1
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-18-bcd8f7a7a4a77c8d6a39e38b93e5c5a1
@@ -1 +1 @@
-51	val_3	51	val_30
\ No newline at end of file
+51	val_3	51	val_30
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-20-d7979e9ab355d8856c2d12e66e7bb838 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-20-d7979e9ab355d8856c2d12e66e7bb838
index 8f358bffec51d..1ab49661a01f4 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-20-d7979e9ab355d8856c2d12e66e7bb838
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-20-d7979e9ab355d8856c2d12e66e7bb838
@@ -2,4 +2,4 @@
 51	val_3	51	val_30
 52	val_4	NULL	NULL
 53	val_5	NULL	NULL
-49	val_10	NULL	NULL
\ No newline at end of file
+49	val_10	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-22-df6bdfe3c5a7927687f4d1fbf0c9c25b b/sql/hive/src/test/resources/golden/smb_mapjoin_1-22-df6bdfe3c5a7927687f4d1fbf0c9c25b
index 6197cf72c3454..ba7969b99d79f 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-22-df6bdfe3c5a7927687f4d1fbf0c9c25b
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-22-df6bdfe3c5a7927687f4d1fbf0c9c25b
@@ -1,4 +1,4 @@
 NULL	NULL	50	val_20
 NULL	NULL	50	val_23
 NULL	NULL	50	val_25
-51	val_3	51	val_30
\ No newline at end of file
+51	val_3	51	val_30
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-24-558e9ab6324f8082231b2fdd0e12f7ae b/sql/hive/src/test/resources/golden/smb_mapjoin_1-24-558e9ab6324f8082231b2fdd0e12f7ae
index d650d44f42404..ad8b511265e20 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-24-558e9ab6324f8082231b2fdd0e12f7ae
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-24-558e9ab6324f8082231b2fdd0e12f7ae
@@ -5,4 +5,4 @@ NULL	NULL	50	val_23
 NULL	NULL	50	val_25
 51	val_3	51	val_30
 52	val_4	NULL	NULL
-53	val_5	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/index_creation-27-e4856f13692e63d61f72aaf75e50e5f1 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-3-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-27-e4856f13692e63d61f72aaf75e50e5f1
rename to sql/hive/src/test/resources/golden/smb_mapjoin_1-3-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-3-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-3-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-28-bd20d4b59e6489082a92fcbfcc5f8dbe b/sql/hive/src/test/resources/golden/smb_mapjoin_1-4-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-28-bd20d4b59e6489082a92fcbfcc5f8dbe
rename to sql/hive/src/test/resources/golden/smb_mapjoin_1-4-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-4-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_1-4-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-5-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-5-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-29-ee8d287111069805c41c9c0032adc46f b/sql/hive/src/test/resources/golden/smb_mapjoin_1-5-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-29-ee8d287111069805c41c9c0032adc46f
rename to sql/hive/src/test/resources/golden/smb_mapjoin_1-5-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-6-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_1-6-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-6-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-6-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-7-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-7-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-7-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-7-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-8-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_1-8-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-8-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-8-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/index_creation-3-14b999fc6dfb10a3632afe14e08003e1 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-3-68d65d622e45f86d4a6c7d1d09ef823b
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-3-14b999fc6dfb10a3632afe14e08003e1
rename to sql/hive/src/test/resources/golden/smb_mapjoin_10-3-68d65d622e45f86d4a6c7d1d09ef823b
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-3-bfb76fa2eccda1c64a85ea3841202705 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-3-bfb76fa2eccda1c64a85ea3841202705
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-30-f880114c33c99a5f23c1465fd88f0db3 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-4-d31ad2289181131982ef3e9cd8c6386e
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-30-f880114c33c99a5f23c1465fd88f0db3
rename to sql/hive/src/test/resources/golden/smb_mapjoin_10-4-d31ad2289181131982ef3e9cd8c6386e
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-4-d4746bf376bce0bf561450c75b83fb74 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-4-d4746bf376bce0bf561450c75b83fb74
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-5-f05f1ef1d08dbe6b02139fe9d0a68ed8 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-5-f05f1ef1d08dbe6b02139fe9d0a68ed8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-31-a8bc76559014d9cdf07184208d582d25 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-5-f7fb003fa65cadcd0b13cbdd7b355988
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-31-a8bc76559014d9cdf07184208d582d25
rename to sql/hive/src/test/resources/golden/smb_mapjoin_10-5-f7fb003fa65cadcd0b13cbdd7b355988
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-6-1094dbf800159e1e2382ec238b2466d7 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-6-1094dbf800159e1e2382ec238b2466d7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-32-9d5d11cb38f2e097f16d2db5693f4f1 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-6-14b8b2e10032ab2d4a0e7a18979cdb59
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-32-9d5d11cb38f2e097f16d2db5693f4f1
rename to sql/hive/src/test/resources/golden/smb_mapjoin_10-6-14b8b2e10032ab2d4a0e7a18979cdb59
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_10-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_10-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_10-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_10-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_10-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_10-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_10-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_10-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_11-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-10-3d92573eecd22525a75464ad27b1dcaa b/sql/hive/src/test/resources/golden/smb_mapjoin_11-10-3d92573eecd22525a75464ad27b1dcaa
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-11-685ec4fbbf73330e026fba9b8cc53e92 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-11-685ec4fbbf73330e026fba9b8cc53e92
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-11-685ec4fbbf73330e026fba9b8cc53e92
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-12-c05c09243793df14546e8577ee369d58 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-12-c05c09243793df14546e8577ee369d58
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-12-c05c09243793df14546e8577ee369d58
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-13-1e0f21b664c1940d10531b3025be7f10 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-13-1e0f21b664c1940d10531b3025be7f10
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-14-f13be826d8f11be64c5414d591f70fd6 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-14-f13be826d8f11be64c5414d591f70fd6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-15-b62714cb184523454314d19949dba9f4 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-15-b62714cb184523454314d19949dba9f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-16-bee943a85cd82bd199b089fbdd7450af b/sql/hive/src/test/resources/golden/smb_mapjoin_11-16-bee943a85cd82bd199b089fbdd7450af
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-16-bee943a85cd82bd199b089fbdd7450af
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_11-2-b89ea2173180c8ae423d856f943e061f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-2-b89ea2173180c8ae423d856f943e061f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_11-3-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-4-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-4-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-5-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-5-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-6-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-6-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_11-7-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-7-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-8-beae7266b997c97798631b9dc558534f b/sql/hive/src/test/resources/golden/smb_mapjoin_11-8-beae7266b997c97798631b9dc558534f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-9-74936eafc274242beb49bc83d7a8af30 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-9-74936eafc274242beb49bc83d7a8af30
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_12-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-10-bc8140f238cfd13ea4fc4c4173a05454 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-10-bc8140f238cfd13ea4fc4c4173a05454
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-11-685ec4fbbf73330e026fba9b8cc53e92 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-11-685ec4fbbf73330e026fba9b8cc53e92
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-11-685ec4fbbf73330e026fba9b8cc53e92
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-12-c05c09243793df14546e8577ee369d58 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-12-c05c09243793df14546e8577ee369d58
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-12-c05c09243793df14546e8577ee369d58
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-13-7e7645d5ee7d79991d8fdde072c8dbb b/sql/hive/src/test/resources/golden/smb_mapjoin_12-13-7e7645d5ee7d79991d8fdde072c8dbb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-14-e9715c66355e9bc54155c79a4e82b34f b/sql/hive/src/test/resources/golden/smb_mapjoin_12-14-e9715c66355e9bc54155c79a4e82b34f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-15-42b623410c408e09153a773db91c0334 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-15-42b623410c408e09153a773db91c0334
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-16-bee943a85cd82bd199b089fbdd7450af b/sql/hive/src/test/resources/golden/smb_mapjoin_12-16-bee943a85cd82bd199b089fbdd7450af
deleted file mode 100644
index 8975db9a05036..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-16-bee943a85cd82bd199b089fbdd7450af
+++ /dev/null
@@ -1 +0,0 @@
-293
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-17-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_12-17-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-17-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-18-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-18-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-18-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-19-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_12-19-b89ea2173180c8ae423d856f943e061f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-19-b89ea2173180c8ae423d856f943e061f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_12-2-b89ea2173180c8ae423d856f943e061f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-2-b89ea2173180c8ae423d856f943e061f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-20-1fc1f40252a7e9d2ad5133f358b71f6b b/sql/hive/src/test/resources/golden/smb_mapjoin_12-20-1fc1f40252a7e9d2ad5133f358b71f6b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-21-3814ec07d1b074eb0fc44e8f77d1f40e b/sql/hive/src/test/resources/golden/smb_mapjoin_12-21-3814ec07d1b074eb0fc44e8f77d1f40e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-22-b1e1754efd667aa801b194985d41eb6e b/sql/hive/src/test/resources/golden/smb_mapjoin_12-22-b1e1754efd667aa801b194985d41eb6e
deleted file mode 100644
index 8975db9a05036..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-22-b1e1754efd667aa801b194985d41eb6e
+++ /dev/null
@@ -1 +0,0 @@
-293
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_12-3-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-4-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-4-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-5-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-5-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-6-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-6-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_12-7-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-7-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-8-3d016b9a5b0143b7a01b34472b569fb9 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-8-3d016b9a5b0143b7a01b34472b569fb9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-9-74936eafc274242beb49bc83d7a8af30 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-9-74936eafc274242beb49bc83d7a8af30
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_13-0-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-0-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-0-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_13-1-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-1-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-1-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-14-b92cb43f66838319f5d607c431fe1eb3 b/sql/hive/src/test/resources/golden/smb_mapjoin_13-14-b92cb43f66838319f5d607c431fe1eb3
index b828077157966..9f4c46e548d04 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-14-b92cb43f66838319f5d607c431fe1eb3
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-14-b92cb43f66838319f5d607c431fe1eb3
@@ -7,4 +7,4 @@
 0	val_0	0	val_0
 0	val_0	0	val_0
 0	val_0	0	val_0
-2	val_2	2	val_2
\ No newline at end of file
+2	val_2	2	val_2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_13-2-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-2-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-2-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_13-3-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-3-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_13-4-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-4-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-4-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_13-5-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-5-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-5-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_13-6-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-6-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-6-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_13-7-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-7-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-7-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-11-23d2ee09b01309b9cfcd0d625afc535d b/sql/hive/src/test/resources/golden/smb_mapjoin_14-11-23d2ee09b01309b9cfcd0d625afc535d
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-11-23d2ee09b01309b9cfcd0d625afc535d
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-11-23d2ee09b01309b9cfcd0d625afc535d
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-13-95a09a0af2a77ac6b772c41a0f6a885a b/sql/hive/src/test/resources/golden/smb_mapjoin_14-13-95a09a0af2a77ac6b772c41a0f6a885a
index 4a9735f855f96..ec7496a567609 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-13-95a09a0af2a77ac6b772c41a0f6a885a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-13-95a09a0af2a77ac6b772c41a0f6a885a
@@ -3,4 +3,4 @@
 4	1
 5	9
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-15-e0f20915e07acf5ddfdbde40ef924e55 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-15-e0f20915e07acf5ddfdbde40ef924e55
index 62f9457511f87..1e8b314962144 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-15-e0f20915e07acf5ddfdbde40ef924e55
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-15-e0f20915e07acf5ddfdbde40ef924e55
@@ -1 +1 @@
-6
\ No newline at end of file
+6
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-17-5983d1e12e5a2bdd0f41549110e066ee b/sql/hive/src/test/resources/golden/smb_mapjoin_14-17-5983d1e12e5a2bdd0f41549110e066ee
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-17-5983d1e12e5a2bdd0f41549110e066ee
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-17-5983d1e12e5a2bdd0f41549110e066ee
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-19-163af33279f8d08d747a00ffd1cdfac7 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-19-163af33279f8d08d747a00ffd1cdfac7
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-19-163af33279f8d08d747a00ffd1cdfac7
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-19-163af33279f8d08d747a00ffd1cdfac7
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-21-b6c3a3f68f212a966450286e23f59713 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-21-b6c3a3f68f212a966450286e23f59713
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-21-b6c3a3f68f212a966450286e23f59713
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-21-b6c3a3f68f212a966450286e23f59713
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-23-941d6ef1aaf1b2f16cf9b55eaea49068 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-23-941d6ef1aaf1b2f16cf9b55eaea49068
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-23-941d6ef1aaf1b2f16cf9b55eaea49068
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-23-941d6ef1aaf1b2f16cf9b55eaea49068
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-25-23f011143d8db18dd0f3d684adf7b8e b/sql/hive/src/test/resources/golden/smb_mapjoin_14-25-23f011143d8db18dd0f3d684adf7b8e
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-25-23f011143d8db18dd0f3d684adf7b8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-25-23f011143d8db18dd0f3d684adf7b8e
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-27-ba437062dd6661fc2fdcd41514711093 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-27-ba437062dd6661fc2fdcd41514711093
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-27-ba437062dd6661fc2fdcd41514711093
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-27-ba437062dd6661fc2fdcd41514711093
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-29-d191c9ace84072ef430d7ed36ea8181b b/sql/hive/src/test/resources/golden/smb_mapjoin_14-29-d191c9ace84072ef430d7ed36ea8181b
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-29-d191c9ace84072ef430d7ed36ea8181b
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-29-d191c9ace84072ef430d7ed36ea8181b
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-31-fde2ad19052435bd8c155f18fe579070 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-31-fde2ad19052435bd8c155f18fe579070
index 2ebc6516c7df1..f6b91e0e1f8dd 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-31-fde2ad19052435bd8c155f18fe579070
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-31-fde2ad19052435bd8c155f18fe579070
@@ -1 +1 @@
-56
\ No newline at end of file
+56
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-33-fb532dd5af8cfe6b2af5c4752a3b0a44 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-33-fb532dd5af8cfe6b2af5c4752a3b0a44
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-33-fb532dd5af8cfe6b2af5c4752a3b0a44
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-33-fb532dd5af8cfe6b2af5c4752a3b0a44
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_15-0-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-0-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-0-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_15-1-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-1-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-1-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-12-b5a588fb78fda8e3c41bbc4d973c1a7a b/sql/hive/src/test/resources/golden/smb_mapjoin_15-12-b5a588fb78fda8e3c41bbc4d973c1a7a
index b828077157966..9f4c46e548d04 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-12-b5a588fb78fda8e3c41bbc4d973c1a7a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-12-b5a588fb78fda8e3c41bbc4d973c1a7a
@@ -7,4 +7,4 @@
 0	val_0	0	val_0
 0	val_0	0	val_0
 0	val_0	0	val_0
-2	val_2	2	val_2
\ No newline at end of file
+2	val_2	2	val_2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-19-f49ef85423bb1766362f05651c9bb37f b/sql/hive/src/test/resources/golden/smb_mapjoin_15-19-f49ef85423bb1766362f05651c9bb37f
index 471d725e7bfa3..46d384b9dfb9b 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-19-f49ef85423bb1766362f05651c9bb37f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-19-f49ef85423bb1766362f05651c9bb37f
@@ -7,4 +7,4 @@
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
-2	2	val_2	2	2	val_2
\ No newline at end of file
+2	2	val_2	2	2	val_2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_15-2-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-2-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-2-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-21-af3880637379684acd440830c2361f6e b/sql/hive/src/test/resources/golden/smb_mapjoin_15-21-af3880637379684acd440830c2361f6e
index 471d725e7bfa3..46d384b9dfb9b 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-21-af3880637379684acd440830c2361f6e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-21-af3880637379684acd440830c2361f6e
@@ -7,4 +7,4 @@
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
-2	2	val_2	2	2	val_2
\ No newline at end of file
+2	2	val_2	2	2	val_2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-23-e5e54dd17b69773812af376bfec33200 b/sql/hive/src/test/resources/golden/smb_mapjoin_15-23-e5e54dd17b69773812af376bfec33200
index 471d725e7bfa3..46d384b9dfb9b 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-23-e5e54dd17b69773812af376bfec33200
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-23-e5e54dd17b69773812af376bfec33200
@@ -7,4 +7,4 @@
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
-2	2	val_2	2	2	val_2
\ No newline at end of file
+2	2	val_2	2	2	val_2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_15-3-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-3-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_15-4-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-4-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-4-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_15-5-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-5-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-5-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_15-6-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-6-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-6-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_15-7-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-7-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-7-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_16-0-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-0-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-0-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_16-1-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-1-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-1-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-12-32f3716e22149e3d0c1f3ac26d414baf b/sql/hive/src/test/resources/golden/smb_mapjoin_16-12-32f3716e22149e3d0c1f3ac26d414baf
index 67d892c80f493..1f3d8a7a1fc08 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-12-32f3716e22149e3d0c1f3ac26d414baf
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-12-32f3716e22149e3d0c1f3ac26d414baf
@@ -1 +1 @@
-1028
\ No newline at end of file
+1028
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_16-2-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-2-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-2-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_16-3-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-3-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_16-4-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-4-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-4-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_16-5-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-5-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-5-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_16-6-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-6-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-6-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_16-7-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-7-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-7-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_17-0-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-0-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-0-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-1-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-1-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-1-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_17-2-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-2-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-2-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-25-f066907fca3448b27aab623d05258a9a b/sql/hive/src/test/resources/golden/smb_mapjoin_17-25-f066907fca3448b27aab623d05258a9a
index 25398d9017c7f..a84e60c846ab2 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-25-f066907fca3448b27aab623d05258a9a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-25-f066907fca3448b27aab623d05258a9a
@@ -1 +1 @@
-4378
\ No newline at end of file
+4378
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-26-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-26-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-26-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-26-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-27-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/smb_mapjoin_17-27-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-27-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-27-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-29-65d3ae14a785f319658812b51e4620a3 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-29-65d3ae14a785f319658812b51e4620a3
index 25398d9017c7f..a84e60c846ab2 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-29-65d3ae14a785f319658812b51e4620a3
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-29-65d3ae14a785f319658812b51e4620a3
@@ -1 +1 @@
-4378
\ No newline at end of file
+4378
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_17-3-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-3-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-31-783fc1b07f117cd027395cf1c09149bc b/sql/hive/src/test/resources/golden/smb_mapjoin_17-31-783fc1b07f117cd027395cf1c09149bc
index 09b5b315bcf45..e0fa4e2d8601d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-31-783fc1b07f117cd027395cf1c09149bc
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-31-783fc1b07f117cd027395cf1c09149bc
@@ -1 +1 @@
-13126
\ No newline at end of file
+13126
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-4-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-4-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-4-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-5-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-5-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-5-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-6-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-6-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-6-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_17-7-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-7-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-7-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_18-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-10-75e2e1eb0f45f4fad3e1ce24517dd81e b/sql/hive/src/test/resources/golden/smb_mapjoin_18-10-75e2e1eb0f45f4fad3e1ce24517dd81e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-11-fb15bd6eceb333302535c0bcdd2d078f b/sql/hive/src/test/resources/golden/smb_mapjoin_18-11-fb15bd6eceb333302535c0bcdd2d078f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-12-27762aa3d846e8f50d97350eaa7563a1 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-12-27762aa3d846e8f50d97350eaa7563a1
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-12-27762aa3d846e8f50d97350eaa7563a1
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-13-91f2af0da94e5a83601d02045980f556 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-13-91f2af0da94e5a83601d02045980f556
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-13-91f2af0da94e5a83601d02045980f556
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-14-1d0b88d6e6f84e485a05c712dd185531 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-14-1d0b88d6e6f84e485a05c712dd185531
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-14-1d0b88d6e6f84e485a05c712dd185531
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-15-21a6e09c24697160bf70cb17254ff504 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-15-21a6e09c24697160bf70cb17254ff504
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-15-21a6e09c24697160bf70cb17254ff504
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-16-35dfd037075aac5a9891bf99ea01e156 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-16-35dfd037075aac5a9891bf99ea01e156
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-16-35dfd037075aac5a9891bf99ea01e156
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-17-6bea38b0ee0cccb3dfe7fe47c7c3e9c4 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-17-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-17-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-18-ba77d1d4a1754489e2a8d5006b0af54d b/sql/hive/src/test/resources/golden/smb_mapjoin_18-18-ba77d1d4a1754489e2a8d5006b0af54d
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-18-ba77d1d4a1754489e2a8d5006b0af54d
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-19-da6eb8ded1f72f518617339f58202cc5 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-19-da6eb8ded1f72f518617339f58202cc5
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-19-da6eb8ded1f72f518617339f58202cc5
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-2-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_18-2-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-2-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-20-940ff79d8f3b401a05b19b9417824d7a b/sql/hive/src/test/resources/golden/smb_mapjoin_18-20-940ff79d8f3b401a05b19b9417824d7a
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-20-940ff79d8f3b401a05b19b9417824d7a
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-21-7ce17310f9fd362e2cc8a80211063264 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-21-7ce17310f9fd362e2cc8a80211063264
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-21-7ce17310f9fd362e2cc8a80211063264
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-22-a92f50cba6740abb3596c885a8157861 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-22-a92f50cba6740abb3596c885a8157861
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-23-f0675c53cddf75b012f64e797f5824c8 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-23-f0675c53cddf75b012f64e797f5824c8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-24-a919b505c34a237ead34eea40b7d136c b/sql/hive/src/test/resources/golden/smb_mapjoin_18-24-a919b505c34a237ead34eea40b7d136c
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-24-a919b505c34a237ead34eea40b7d136c
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-25-d014ae435859316a1ad43548b72ecb7 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-25-d014ae435859316a1ad43548b72ecb7
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-25-d014ae435859316a1ad43548b72ecb7
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-26-52d3bbbbef7c580a324d991f52f44e1f b/sql/hive/src/test/resources/golden/smb_mapjoin_18-26-52d3bbbbef7c580a324d991f52f44e1f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-26-52d3bbbbef7c580a324d991f52f44e1f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-27-6c5e30c84cb539cbf689a0d4cb4ed0e3 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-27-6c5e30c84cb539cbf689a0d4cb4ed0e3
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-27-6c5e30c84cb539cbf689a0d4cb4ed0e3
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-28-c83e75a3e18e68ef10d9970b3b8857ab b/sql/hive/src/test/resources/golden/smb_mapjoin_18-28-c83e75a3e18e68ef10d9970b3b8857ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-28-c83e75a3e18e68ef10d9970b3b8857ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-29-ecbfca5624b26a8eaa468a6bf46e3189 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-29-ecbfca5624b26a8eaa468a6bf46e3189
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-3-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-3-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-3-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-30-76e467313085467a3aa929b3665f9863 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-30-76e467313085467a3aa929b3665f9863
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-31-7f1e9114f4db63982985068c4bf36a29 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-31-7f1e9114f4db63982985068c4bf36a29
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-31-7f1e9114f4db63982985068c4bf36a29
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-32-b59c406dae57fa6ab656a27e310ac54c b/sql/hive/src/test/resources/golden/smb_mapjoin_18-32-b59c406dae57fa6ab656a27e310ac54c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-32-b59c406dae57fa6ab656a27e310ac54c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-33-95b42e61bf5eed67123e30d482e0c7fe b/sql/hive/src/test/resources/golden/smb_mapjoin_18-33-95b42e61bf5eed67123e30d482e0c7fe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-33-95b42e61bf5eed67123e30d482e0c7fe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-34-14ae716c8c0f47db61b89a2a17e89415 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-34-14ae716c8c0f47db61b89a2a17e89415
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-34-14ae716c8c0f47db61b89a2a17e89415
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-35-549b3df453bf939e1c0a38df861245e5 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-35-549b3df453bf939e1c0a38df861245e5
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-35-549b3df453bf939e1c0a38df861245e5
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-4-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-4-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-4-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-5-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-5-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_18-6-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-6-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-7-c248759cecf0e2c223579f5e37f6669c b/sql/hive/src/test/resources/golden/smb_mapjoin_18-7-c248759cecf0e2c223579f5e37f6669c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-8-724d37bd4a841f1fa4062f4f3e3eb353 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-8-724d37bd4a841f1fa4062f4f3e3eb353
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-9-c0c82db5bd80edc57f6b3cb0e807f2ec b/sql/hive/src/test/resources/golden/smb_mapjoin_18-9-c0c82db5bd80edc57f6b3cb0e807f2ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_19-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-10-75e2e1eb0f45f4fad3e1ce24517dd81e b/sql/hive/src/test/resources/golden/smb_mapjoin_19-10-75e2e1eb0f45f4fad3e1ce24517dd81e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-11-fb15bd6eceb333302535c0bcdd2d078f b/sql/hive/src/test/resources/golden/smb_mapjoin_19-11-fb15bd6eceb333302535c0bcdd2d078f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-12-27762aa3d846e8f50d97350eaa7563a1 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-12-27762aa3d846e8f50d97350eaa7563a1
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-12-27762aa3d846e8f50d97350eaa7563a1
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-13-4876f6e3c0ffde24afd82ec462962f19 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-13-4876f6e3c0ffde24afd82ec462962f19
deleted file mode 100644
index dce6588ca1420..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-13-4876f6e3c0ffde24afd82ec462962f19
+++ /dev/null
@@ -1 +0,0 @@
-36
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-14-cfc24b330d7c6a11ac2e4f2ea17b3f06 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-14-cfc24b330d7c6a11ac2e4f2ea17b3f06
deleted file mode 100644
index 86ee83a4a2686..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-14-cfc24b330d7c6a11ac2e4f2ea17b3f06
+++ /dev/null
@@ -1 +0,0 @@
-40
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-15-44ad799a82d847054f02d37139cc6aab b/sql/hive/src/test/resources/golden/smb_mapjoin_19-15-44ad799a82d847054f02d37139cc6aab
deleted file mode 100644
index d99e90eb9675f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-15-44ad799a82d847054f02d37139cc6aab
+++ /dev/null
@@ -1 +0,0 @@
-29
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-16-8ee972ce0d73f28e98f46361082c83dc b/sql/hive/src/test/resources/golden/smb_mapjoin_19-16-8ee972ce0d73f28e98f46361082c83dc
deleted file mode 100644
index dce6588ca1420..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-16-8ee972ce0d73f28e98f46361082c83dc
+++ /dev/null
@@ -1 +0,0 @@
-36
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-17-af5c2dab8b5e2cb53b58fdeee911b7ec b/sql/hive/src/test/resources/golden/smb_mapjoin_19-17-af5c2dab8b5e2cb53b58fdeee911b7ec
deleted file mode 100644
index 86ee83a4a2686..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-17-af5c2dab8b5e2cb53b58fdeee911b7ec
+++ /dev/null
@@ -1 +0,0 @@
-40
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-18-b2546caccb501fca356930e6cd00ea2e b/sql/hive/src/test/resources/golden/smb_mapjoin_19-18-b2546caccb501fca356930e6cd00ea2e
deleted file mode 100644
index d99e90eb9675f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-18-b2546caccb501fca356930e6cd00ea2e
+++ /dev/null
@@ -1 +0,0 @@
-29
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-19-6bea38b0ee0cccb3dfe7fe47c7c3e9c4 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-19-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-19-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-2-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_19-2-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-2-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-20-bceffa38b796fbc2a85daac23357da7b b/sql/hive/src/test/resources/golden/smb_mapjoin_19-20-bceffa38b796fbc2a85daac23357da7b
deleted file mode 100644
index dce6588ca1420..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-20-bceffa38b796fbc2a85daac23357da7b
+++ /dev/null
@@ -1 +0,0 @@
-36
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-21-a1f4332461d1810334e7ae6d7d43f829 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-21-a1f4332461d1810334e7ae6d7d43f829
deleted file mode 100644
index 86ee83a4a2686..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-21-a1f4332461d1810334e7ae6d7d43f829
+++ /dev/null
@@ -1 +0,0 @@
-40
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-22-ebd323090f771227039cd21b1b8c4f3f b/sql/hive/src/test/resources/golden/smb_mapjoin_19-22-ebd323090f771227039cd21b1b8c4f3f
deleted file mode 100644
index d99e90eb9675f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-22-ebd323090f771227039cd21b1b8c4f3f
+++ /dev/null
@@ -1 +0,0 @@
-29
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-23-90f0bad0606e63e4405eac9ec8c11deb b/sql/hive/src/test/resources/golden/smb_mapjoin_19-23-90f0bad0606e63e4405eac9ec8c11deb
deleted file mode 100644
index dce6588ca1420..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-23-90f0bad0606e63e4405eac9ec8c11deb
+++ /dev/null
@@ -1 +0,0 @@
-36
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-24-3ae0277bb7a74cd6bee704313dc102e6 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-24-3ae0277bb7a74cd6bee704313dc102e6
deleted file mode 100644
index 86ee83a4a2686..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-24-3ae0277bb7a74cd6bee704313dc102e6
+++ /dev/null
@@ -1 +0,0 @@
-40
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-25-e621350131d50867015e75677cca031f b/sql/hive/src/test/resources/golden/smb_mapjoin_19-25-e621350131d50867015e75677cca031f
deleted file mode 100644
index d99e90eb9675f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-25-e621350131d50867015e75677cca031f
+++ /dev/null
@@ -1 +0,0 @@
-29
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-3-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-3-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-3-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-4-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-4-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-4-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-5-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-5-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_19-6-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-6-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-7-64b3fd0c215f4b8187866fa7eb55d34d b/sql/hive/src/test/resources/golden/smb_mapjoin_19-7-64b3fd0c215f4b8187866fa7eb55d34d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-8-ee04de92100063f9b2063baddb204864 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-8-ee04de92100063f9b2063baddb204864
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-9-c0c82db5bd80edc57f6b3cb0e807f2ec b/sql/hive/src/test/resources/golden/smb_mapjoin_19-9-c0c82db5bd80edc57f6b3cb0e807f2ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-10-1530d7565a22ace89ed16e0e1f9988ac b/sql/hive/src/test/resources/golden/smb_mapjoin_2-10-1530d7565a22ace89ed16e0e1f9988ac
index 7b5974818c085..4482f7ff91c62 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-10-1530d7565a22ace89ed16e0e1f9988ac
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-10-1530d7565a22ace89ed16e0e1f9988ac
@@ -4,4 +4,4 @@
 49	val_1	49	val_17
 49	val_10	49	val_17
 49	val_1	49	val_19
-49	val_10	49	val_19
\ No newline at end of file
+49	val_10	49	val_19
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-12-48e7d8fcb2a4c31c5304045517a3bb89 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-12-48e7d8fcb2a4c31c5304045517a3bb89
index 0e1d132524064..cdacc0434caa7 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-12-48e7d8fcb2a4c31c5304045517a3bb89
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-12-48e7d8fcb2a4c31c5304045517a3bb89
@@ -6,4 +6,4 @@
 53	val_5	NULL	NULL
 49	val_10	49	val_10
 49	val_10	49	val_17
-49	val_10	49	val_19
\ No newline at end of file
+49	val_10	49	val_19
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-14-3e7d435c7a5560abe539918adc4fa922 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-14-3e7d435c7a5560abe539918adc4fa922
index 32be455fba8c4..37d71f5b522c6 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-14-3e7d435c7a5560abe539918adc4fa922
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-14-3e7d435c7a5560abe539918adc4fa922
@@ -6,4 +6,4 @@
 49	val_1	49	val_19
 49	val_10	49	val_19
 NULL	NULL	50	val_20
-NULL	NULL	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-16-daeabb554f29b77f3c5ff7acff8c58ca b/sql/hive/src/test/resources/golden/smb_mapjoin_2-16-daeabb554f29b77f3c5ff7acff8c58ca
index 2496c4400b0b9..364a70c242916 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-16-daeabb554f29b77f3c5ff7acff8c58ca
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-16-daeabb554f29b77f3c5ff7acff8c58ca
@@ -8,4 +8,4 @@ NULL	NULL	50	val_20
 NULL	NULL	50	val_23
 51	val_3	NULL	NULL
 52	val_4	52	val_4
-53	val_5	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-18-411bc909c1c29811d184d1f4aceb25b3 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-18-411bc909c1c29811d184d1f4aceb25b3
index 7b5974818c085..4482f7ff91c62 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-18-411bc909c1c29811d184d1f4aceb25b3
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-18-411bc909c1c29811d184d1f4aceb25b3
@@ -4,4 +4,4 @@
 49	val_1	49	val_17
 49	val_10	49	val_17
 49	val_1	49	val_19
-49	val_10	49	val_19
\ No newline at end of file
+49	val_10	49	val_19
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-20-43bc9e7e9e4d1bb1f42e7911bd79ea4b b/sql/hive/src/test/resources/golden/smb_mapjoin_2-20-43bc9e7e9e4d1bb1f42e7911bd79ea4b
index 0e1d132524064..cdacc0434caa7 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-20-43bc9e7e9e4d1bb1f42e7911bd79ea4b
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-20-43bc9e7e9e4d1bb1f42e7911bd79ea4b
@@ -6,4 +6,4 @@
 53	val_5	NULL	NULL
 49	val_10	49	val_10
 49	val_10	49	val_17
-49	val_10	49	val_19
\ No newline at end of file
+49	val_10	49	val_19
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-22-de0b3ef8ef1c5902908dadd06042b84 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-22-de0b3ef8ef1c5902908dadd06042b84
index 32be455fba8c4..37d71f5b522c6 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-22-de0b3ef8ef1c5902908dadd06042b84
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-22-de0b3ef8ef1c5902908dadd06042b84
@@ -6,4 +6,4 @@
 49	val_1	49	val_19
 49	val_10	49	val_19
 NULL	NULL	50	val_20
-NULL	NULL	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-24-709966d157a75ffb1c6ef70d7c72a498 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-24-709966d157a75ffb1c6ef70d7c72a498
index 2496c4400b0b9..364a70c242916 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-24-709966d157a75ffb1c6ef70d7c72a498
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-24-709966d157a75ffb1c6ef70d7c72a498
@@ -8,4 +8,4 @@ NULL	NULL	50	val_20
 NULL	NULL	50	val_23
 51	val_3	NULL	NULL
 52	val_4	52	val_4
-53	val_5	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/index_creation-33-14b999fc6dfb10a3632afe14e08003e1 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-3-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-33-14b999fc6dfb10a3632afe14e08003e1
rename to sql/hive/src/test/resources/golden/smb_mapjoin_2-3-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-3-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-3-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-34-c8c5d4c45e59d041dcbbdfc5426e5fa0 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-4-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-34-c8c5d4c45e59d041dcbbdfc5426e5fa0
rename to sql/hive/src/test/resources/golden/smb_mapjoin_2-4-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-4-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_2-4-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-5-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-5-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-35-e78eb4d61c0ddb272fd94c5f7a8c0e84 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-5-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-35-e78eb4d61c0ddb272fd94c5f7a8c0e84
rename to sql/hive/src/test/resources/golden/smb_mapjoin_2-5-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-6-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_2-6-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-6-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-6-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-7-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-7-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-7-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-7-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-8-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_2-8-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-8-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-8-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_20-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-10-82b3bc9314fa0cdb7fa59c58f22fb598 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-10-82b3bc9314fa0cdb7fa59c58f22fb598
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-11-63ba770ebeff6032b68ba1aabbc1bee8 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-11-63ba770ebeff6032b68ba1aabbc1bee8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-12-6bea38b0ee0cccb3dfe7fe47c7c3e9c4 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-12-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-12-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-13-ba77d1d4a1754489e2a8d5006b0af54d b/sql/hive/src/test/resources/golden/smb_mapjoin_20-13-ba77d1d4a1754489e2a8d5006b0af54d
deleted file mode 100644
index b6e27607fb529..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-13-ba77d1d4a1754489e2a8d5006b0af54d
+++ /dev/null
@@ -1 +0,0 @@
-242
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-14-da6eb8ded1f72f518617339f58202cc5 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-14-da6eb8ded1f72f518617339f58202cc5
deleted file mode 100644
index ce83bd94b3310..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-14-da6eb8ded1f72f518617339f58202cc5
+++ /dev/null
@@ -1 +0,0 @@
-258
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-15-5acb0ec2e1abcc6d57de7529f414a75a b/sql/hive/src/test/resources/golden/smb_mapjoin_20-15-5acb0ec2e1abcc6d57de7529f414a75a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-16-5b9583aecebb7480d778dc2a8605434a b/sql/hive/src/test/resources/golden/smb_mapjoin_20-16-5b9583aecebb7480d778dc2a8605434a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-17-e26f212ca34d303036299ba709f65522 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-17-e26f212ca34d303036299ba709f65522
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-18-7fb8778a58cc27dc04d5947df15b250e b/sql/hive/src/test/resources/golden/smb_mapjoin_20-18-7fb8778a58cc27dc04d5947df15b250e
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-18-7fb8778a58cc27dc04d5947df15b250e
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-19-6dd859e98c140df728f858a7a7598462 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-19-6dd859e98c140df728f858a7a7598462
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-19-6dd859e98c140df728f858a7a7598462
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-2-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_20-2-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-2-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-20-fca1f7361c63f0ba1e9d365cf0743845 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-20-fca1f7361c63f0ba1e9d365cf0743845
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-20-fca1f7361c63f0ba1e9d365cf0743845
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-21-28d04e54c0a986079ae9d52788846519 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-21-28d04e54c0a986079ae9d52788846519
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-21-28d04e54c0a986079ae9d52788846519
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-22-1baa93dd15ab33240255c5d6d5d57366 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-22-1baa93dd15ab33240255c5d6d5d57366
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-22-1baa93dd15ab33240255c5d6d5d57366
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-23-5c5eaf9922e7110c6d7260c738b17457 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-23-5c5eaf9922e7110c6d7260c738b17457
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-3-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-3-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-3-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-4-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-4-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-4-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-5-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-5-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_20-6-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-6-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-7-c9981ec081cbf54c9323e0dee977934 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-7-c9981ec081cbf54c9323e0dee977934
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-8-5c9994d48509136efd1dcb4e3f3e7aff b/sql/hive/src/test/resources/golden/smb_mapjoin_20-8-5c9994d48509136efd1dcb4e3f3e7aff
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-9-c0c82db5bd80edc57f6b3cb0e807f2ec b/sql/hive/src/test/resources/golden/smb_mapjoin_20-9-c0c82db5bd80edc57f6b3cb0e807f2ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_21-0-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-0-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-0-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_21-1-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-1-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-1-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-2-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_21-2-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-2-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-2-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-3-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_21-3-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-3-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-3-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-4-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_21-4-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-4-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-4-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_21-5-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-5-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-5-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_21-6-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-6-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-6-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_22-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-10-f0def0d77c93f6faebeca9b7a5340dbc b/sql/hive/src/test/resources/golden/smb_mapjoin_22-10-f0def0d77c93f6faebeca9b7a5340dbc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-11-b3e577f3099b5e4acffdd050f4c7f4dc b/sql/hive/src/test/resources/golden/smb_mapjoin_22-11-b3e577f3099b5e4acffdd050f4c7f4dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-12-187c201f61c23833d0d193031926445a b/sql/hive/src/test/resources/golden/smb_mapjoin_22-12-187c201f61c23833d0d193031926445a
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-12-187c201f61c23833d0d193031926445a
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-13-5c5c521954e5ade621fd0cbff5794c80 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-13-5c5c521954e5ade621fd0cbff5794c80
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-13-5c5c521954e5ade621fd0cbff5794c80
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-14-b2534937758d2ff0a08e729f7b3ace4 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-14-b2534937758d2ff0a08e729f7b3ace4
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-14-b2534937758d2ff0a08e729f7b3ace4
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-15-83d9e3d23d6612a926e53d57a5d07307 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-15-83d9e3d23d6612a926e53d57a5d07307
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-15-83d9e3d23d6612a926e53d57a5d07307
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-16-abc95b36345d3e37abb196088897c7fe b/sql/hive/src/test/resources/golden/smb_mapjoin_22-16-abc95b36345d3e37abb196088897c7fe
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-17-720e78ad8ffc6620cc89a7e03bea3c4b b/sql/hive/src/test/resources/golden/smb_mapjoin_22-17-720e78ad8ffc6620cc89a7e03bea3c4b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-18-2c29fc18b24368938f880a1bf3d5eb54 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-18-2c29fc18b24368938f880a1bf3d5eb54
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-19-5dc91a74bf4c43e20a8a213ad08c352e b/sql/hive/src/test/resources/golden/smb_mapjoin_22-19-5dc91a74bf4c43e20a8a213ad08c352e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-2-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_22-2-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-2-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-20-ac918fa1a8e75031a0cc3eef0c9b77ad b/sql/hive/src/test/resources/golden/smb_mapjoin_22-20-ac918fa1a8e75031a0cc3eef0c9b77ad
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-21-f0def0d77c93f6faebeca9b7a5340dbc b/sql/hive/src/test/resources/golden/smb_mapjoin_22-21-f0def0d77c93f6faebeca9b7a5340dbc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-22-b3e577f3099b5e4acffdd050f4c7f4dc b/sql/hive/src/test/resources/golden/smb_mapjoin_22-22-b3e577f3099b5e4acffdd050f4c7f4dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-23-187c201f61c23833d0d193031926445a b/sql/hive/src/test/resources/golden/smb_mapjoin_22-23-187c201f61c23833d0d193031926445a
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-23-187c201f61c23833d0d193031926445a
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-24-5c5c521954e5ade621fd0cbff5794c80 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-24-5c5c521954e5ade621fd0cbff5794c80
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-24-5c5c521954e5ade621fd0cbff5794c80
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-25-b2534937758d2ff0a08e729f7b3ace4 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-25-b2534937758d2ff0a08e729f7b3ace4
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-25-b2534937758d2ff0a08e729f7b3ace4
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-26-83d9e3d23d6612a926e53d57a5d07307 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-26-83d9e3d23d6612a926e53d57a5d07307
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-26-83d9e3d23d6612a926e53d57a5d07307
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-3-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-3-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-3-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-4-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-4-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-4-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-5-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-5-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_22-6-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-6-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-7-8317b719ffcf85da8c618e8f2379a31b b/sql/hive/src/test/resources/golden/smb_mapjoin_22-7-8317b719ffcf85da8c618e8f2379a31b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-8-8cfa26d100b454c0b0f3443f62389abb b/sql/hive/src/test/resources/golden/smb_mapjoin_22-8-8cfa26d100b454c0b0f3443f62389abb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-9-ac918fa1a8e75031a0cc3eef0c9b77ad b/sql/hive/src/test/resources/golden/smb_mapjoin_22-9-ac918fa1a8e75031a0cc3eef0c9b77ad
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-0-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_25-0-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-0-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-0-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-1-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-1-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-1-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-1-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/index_creation-36-21bcf37075b02097f16c8fc8130a83b8 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-10-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-36-21bcf37075b02097f16c8fc8130a83b8
rename to sql/hive/src/test/resources/golden/smb_mapjoin_25-10-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-10-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-10-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-37-9334418431eca405f13206bd8db42a1b b/sql/hive/src/test/resources/golden/smb_mapjoin_25-11-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-37-9334418431eca405f13206bd8db42a1b
rename to sql/hive/src/test/resources/golden/smb_mapjoin_25-11-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-11-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_25-11-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-12-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-12-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-38-f1f56119aede4f42221a68f6aaa42a26 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-12-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-38-f1f56119aede4f42221a68f6aaa42a26
rename to sql/hive/src/test/resources/golden/smb_mapjoin_25-12-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-14-c23ea191ee4d60c0a6252ce763b1beed b/sql/hive/src/test/resources/golden/smb_mapjoin_25-14-c23ea191ee4d60c0a6252ce763b1beed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-14-c23ea191ee4d60c0a6252ce763b1beed
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-14-c23ea191ee4d60c0a6252ce763b1beed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-15-442e69416faaea9309bb8c2a3eb73ef b/sql/hive/src/test/resources/golden/smb_mapjoin_25-15-442e69416faaea9309bb8c2a3eb73ef
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-15-442e69416faaea9309bb8c2a3eb73ef
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-15-442e69416faaea9309bb8c2a3eb73ef
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-16-beaea10543cfd753458b43d8aeb7571f b/sql/hive/src/test/resources/golden/smb_mapjoin_25-16-beaea10543cfd753458b43d8aeb7571f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-16-beaea10543cfd753458b43d8aeb7571f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-16-beaea10543cfd753458b43d8aeb7571f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-17-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-17-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-17-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-17-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-18-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/smb_mapjoin_25-18-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-18-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-18-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-19-16112d7ada2ccc6f4a3b5d627410cb01 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-19-16112d7ada2ccc6f4a3b5d627410cb01
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-19-16112d7ada2ccc6f4a3b5d627410cb01
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-19-16112d7ada2ccc6f4a3b5d627410cb01
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-2-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/smb_mapjoin_25-2-16367c381d4b189b3640c92511244bfe
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-2-16367c381d4b189b3640c92511244bfe
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-2-16367c381d4b189b3640c92511244bfe
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-20-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-20-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-20-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-20-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-21-4d3e60a0e9bd8c12fdba4e010493537d b/sql/hive/src/test/resources/golden/smb_mapjoin_25-21-4d3e60a0e9bd8c12fdba4e010493537d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-21-4d3e60a0e9bd8c12fdba4e010493537d
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-21-4d3e60a0e9bd8c12fdba4e010493537d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-22-d0a93f40892e3894460553b443c77428 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-22-d0a93f40892e3894460553b443c77428
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-22-d0a93f40892e3894460553b443c77428
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-22-d0a93f40892e3894460553b443c77428
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-23-ae45f6382268c1035c11aa4fc8a23e2 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-23-ae45f6382268c1035c11aa4fc8a23e2
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-23-ae45f6382268c1035c11aa4fc8a23e2
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-23-ae45f6382268c1035c11aa4fc8a23e2
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-24-d5ad76f9178cb787cee037f25b19b270 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-24-d5ad76f9178cb787cee037f25b19b270
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-24-d5ad76f9178cb787cee037f25b19b270
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-24-d5ad76f9178cb787cee037f25b19b270
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-3-bcde511b8d560ca83888265b96a781ff b/sql/hive/src/test/resources/golden/smb_mapjoin_25-3-bcde511b8d560ca83888265b96a781ff
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-3-bcde511b8d560ca83888265b96a781ff
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-3-bcde511b8d560ca83888265b96a781ff
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-4-8067b7ab260021bc70af9ab47309ee35 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-4-8067b7ab260021bc70af9ab47309ee35
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-4-8067b7ab260021bc70af9ab47309ee35
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-4-8067b7ab260021bc70af9ab47309ee35
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-5-545c0ea2ebd7a141526ee13059a857f6 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-5-545c0ea2ebd7a141526ee13059a857f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-5-545c0ea2ebd7a141526ee13059a857f6
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-5-545c0ea2ebd7a141526ee13059a857f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-6-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-6-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-6-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-6-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-10-efadb45f09b92d27233601394d53d79 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-10-efadb45f09b92d27233601394d53d79
index 2c05a72679b22..8aa583680ba51 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-10-efadb45f09b92d27233601394d53d79
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-10-efadb45f09b92d27233601394d53d79
@@ -3,4 +3,4 @@
 50	val_25	50	val_20
 50	val_20	50	val_23
 50	val_23	50	val_23
-50	val_25	50	val_23
\ No newline at end of file
+50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-12-77988b41de4b5a165c93942fbb2220d6 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-12-77988b41de4b5a165c93942fbb2220d6
index 0d9bce83ea87c..8f8addd11eabf 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-12-77988b41de4b5a165c93942fbb2220d6
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-12-77988b41de4b5a165c93942fbb2220d6
@@ -4,4 +4,4 @@
 50	val_23	50	val_23
 50	val_25	50	val_20
 50	val_25	50	val_23
-51	val_30	NULL	NULL
\ No newline at end of file
+51	val_30	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-14-5456a3a43bfabcfdbb57257b633e299e b/sql/hive/src/test/resources/golden/smb_mapjoin_3-14-5456a3a43bfabcfdbb57257b633e299e
index d4c1adc92802d..c94cb185c6199 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-14-5456a3a43bfabcfdbb57257b633e299e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-14-5456a3a43bfabcfdbb57257b633e299e
@@ -7,4 +7,4 @@ NULL	NULL	49	val_19
 50	val_25	50	val_20
 50	val_20	50	val_23
 50	val_23	50	val_23
-50	val_25	50	val_23
\ No newline at end of file
+50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-16-fff1acc77150f6ea92fe8eefc04b079a b/sql/hive/src/test/resources/golden/smb_mapjoin_3-16-fff1acc77150f6ea92fe8eefc04b079a
index b62eec8583c92..fb499272e90c5 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-16-fff1acc77150f6ea92fe8eefc04b079a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-16-fff1acc77150f6ea92fe8eefc04b079a
@@ -8,4 +8,4 @@ NULL	NULL	49	val_19
 50	val_25	50	val_20
 50	val_25	50	val_23
 51	val_30	NULL	NULL
-NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-18-96a658e934543dd77020ad264ec9b8c1 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-18-96a658e934543dd77020ad264ec9b8c1
index 2c05a72679b22..8aa583680ba51 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-18-96a658e934543dd77020ad264ec9b8c1
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-18-96a658e934543dd77020ad264ec9b8c1
@@ -3,4 +3,4 @@
 50	val_25	50	val_20
 50	val_20	50	val_23
 50	val_23	50	val_23
-50	val_25	50	val_23
\ No newline at end of file
+50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-20-867845ed2cb38b55928f5310b4ae91bd b/sql/hive/src/test/resources/golden/smb_mapjoin_3-20-867845ed2cb38b55928f5310b4ae91bd
index 0d9bce83ea87c..8f8addd11eabf 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-20-867845ed2cb38b55928f5310b4ae91bd
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-20-867845ed2cb38b55928f5310b4ae91bd
@@ -4,4 +4,4 @@
 50	val_23	50	val_23
 50	val_25	50	val_20
 50	val_25	50	val_23
-51	val_30	NULL	NULL
\ No newline at end of file
+51	val_30	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-22-5826f60da3a4b0c731c53340d50b0a06 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-22-5826f60da3a4b0c731c53340d50b0a06
index d4c1adc92802d..c94cb185c6199 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-22-5826f60da3a4b0c731c53340d50b0a06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-22-5826f60da3a4b0c731c53340d50b0a06
@@ -7,4 +7,4 @@ NULL	NULL	49	val_19
 50	val_25	50	val_20
 50	val_20	50	val_23
 50	val_23	50	val_23
-50	val_25	50	val_23
\ No newline at end of file
+50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-24-c73176ea199d4d20934cf2659d42ec25 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-24-c73176ea199d4d20934cf2659d42ec25
index b62eec8583c92..fb499272e90c5 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-24-c73176ea199d4d20934cf2659d42ec25
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-24-c73176ea199d4d20934cf2659d42ec25
@@ -8,4 +8,4 @@ NULL	NULL	49	val_19
 50	val_25	50	val_20
 50	val_25	50	val_23
 51	val_30	NULL	NULL
-NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/index_creation-4-c8c5d4c45e59d041dcbbdfc5426e5fa0 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-3-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-4-c8c5d4c45e59d041dcbbdfc5426e5fa0
rename to sql/hive/src/test/resources/golden/smb_mapjoin_3-3-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-3-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-3-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-5-e78eb4d61c0ddb272fd94c5f7a8c0e84 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-4-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-5-e78eb4d61c0ddb272fd94c5f7a8c0e84
rename to sql/hive/src/test/resources/golden/smb_mapjoin_3-4-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-4-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_3-4-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-5-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-5-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-6-21bcf37075b02097f16c8fc8130a83b8 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-5-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-6-21bcf37075b02097f16c8fc8130a83b8
rename to sql/hive/src/test/resources/golden/smb_mapjoin_3-5-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-6-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_3-6-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-6-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-6-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-7-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-7-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-7-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-7-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-8-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_3-8-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-8-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-8-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-14-a6efb153fe9be91c7055a5642e0f642d b/sql/hive/src/test/resources/golden/smb_mapjoin_4-14-a6efb153fe9be91c7055a5642e0f642d
index 46af2ffd2b620..31c2549443bb1 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-14-a6efb153fe9be91c7055a5642e0f642d
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-14-a6efb153fe9be91c7055a5642e0f642d
@@ -2,4 +2,4 @@
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 53	val_5	NULL	NULL	NULL	NULL
-49	val_10	NULL	NULL	NULL	NULL
\ No newline at end of file
+49	val_10	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-16-514034935af5348f67f1934cf5429d57 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-16-514034935af5348f67f1934cf5429d57
index 9047baeb236e5..fa5860b62f611 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-16-514034935af5348f67f1934cf5429d57
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-16-514034935af5348f67f1934cf5429d57
@@ -3,4 +3,4 @@ NULL	NULL	NULL	NULL	49	val_10
 NULL	NULL	NULL	NULL	49	val_17
 NULL	NULL	NULL	NULL	49	val_19
 NULL	NULL	NULL	NULL	50	val_20
-NULL	NULL	NULL	NULL	50	val_23
\ No newline at end of file
+NULL	NULL	NULL	NULL	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-18-7f8bfb8ce6c1825708c37daa826fe5 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-18-7f8bfb8ce6c1825708c37daa826fe5
index 47626758b4182..00e10b37e2fa1 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-18-7f8bfb8ce6c1825708c37daa826fe5
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-18-7f8bfb8ce6c1825708c37daa826fe5
@@ -8,4 +8,4 @@ NULL	NULL	NULL	NULL	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 NULL	NULL	NULL	NULL	52	val_4
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-20-be9350a00a523e1758427a1c8e4bdf09 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-20-be9350a00a523e1758427a1c8e4bdf09
index 29616d6368661..84486137a37e4 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-20-be9350a00a523e1758427a1c8e4bdf09
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-20-be9350a00a523e1758427a1c8e4bdf09
@@ -3,4 +3,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_23
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-22-da023084bbab6727dc3823cfce500308 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-22-da023084bbab6727dc3823cfce500308
index 6f90cbf839656..e2c6bab43530d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-22-da023084bbab6727dc3823cfce500308
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-22-da023084bbab6727dc3823cfce500308
@@ -4,4 +4,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
-51	val_3	51	val_30	NULL	NULL
\ No newline at end of file
+51	val_3	51	val_30	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-24-7783ab33a26d29a2c0235f52f0e4ad86 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-24-7783ab33a26d29a2c0235f52f0e4ad86
index 3b195f9dff935..8ea9abf42f678 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-24-7783ab33a26d29a2c0235f52f0e4ad86
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-24-7783ab33a26d29a2c0235f52f0e4ad86
@@ -7,4 +7,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_23
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-26-de14aa74d0da48dff6271410cc5dd98e b/sql/hive/src/test/resources/golden/smb_mapjoin_4-26-de14aa74d0da48dff6271410cc5dd98e
index e18ae75d14a37..f0bda77df9202 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-26-de14aa74d0da48dff6271410cc5dd98e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-26-de14aa74d0da48dff6271410cc5dd98e
@@ -8,4 +8,4 @@ NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
-NULL	NULL	NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-28-f71280b4b556515d068d074378c3a54 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-28-f71280b4b556515d068d074378c3a54
index 8e51f0864314a..0c9e2268914bd 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-28-f71280b4b556515d068d074378c3a54
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-28-f71280b4b556515d068d074378c3a54
@@ -3,4 +3,4 @@ NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/index_creation-7-9334418431eca405f13206bd8db42a1b b/sql/hive/src/test/resources/golden/smb_mapjoin_4-3-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-7-9334418431eca405f13206bd8db42a1b
rename to sql/hive/src/test/resources/golden/smb_mapjoin_4-3-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-3-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-3-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-30-a0d3abda611809460bcea4101967f21f b/sql/hive/src/test/resources/golden/smb_mapjoin_4-30-a0d3abda611809460bcea4101967f21f
index 51676ca2c94e4..c9ca4763cbabe 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-30-a0d3abda611809460bcea4101967f21f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-30-a0d3abda611809460bcea4101967f21f
@@ -8,4 +8,4 @@ NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-32-6477640b8a0a96f7a1c9290b8e71b5a8 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-32-6477640b8a0a96f7a1c9290b8e71b5a8
index 4a125291de271..812d839a1c642 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-32-6477640b8a0a96f7a1c9290b8e71b5a8
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-32-6477640b8a0a96f7a1c9290b8e71b5a8
@@ -7,4 +7,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
-NULL	NULL	NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-34-48869ba51ea36864e720f94b09bf6b3 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-34-48869ba51ea36864e720f94b09bf6b3
index 660e9044e889a..c019550d25827 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-34-48869ba51ea36864e720f94b09bf6b3
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-34-48869ba51ea36864e720f94b09bf6b3
@@ -12,4 +12,4 @@ NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 NULL	NULL	NULL	NULL	52	val_4
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/index_creation-8-f1f56119aede4f42221a68f6aaa42a26 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-4-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-8-f1f56119aede4f42221a68f6aaa42a26
rename to sql/hive/src/test/resources/golden/smb_mapjoin_4-4-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-4-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_4-4-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-5-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-5-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-9-bf40d4d50d050f2f8342c07f5a9dcf0c b/sql/hive/src/test/resources/golden/smb_mapjoin_4-5-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-9-bf40d4d50d050f2f8342c07f5a9dcf0c
rename to sql/hive/src/test/resources/golden/smb_mapjoin_4-5-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-6-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_4-6-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-6-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-6-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-7-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-7-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-7-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-7-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-8-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_4-8-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-8-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-8-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-14-e20bcd28cfd26259bcde9cffec8d2280 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-14-e20bcd28cfd26259bcde9cffec8d2280
index 46af2ffd2b620..31c2549443bb1 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-14-e20bcd28cfd26259bcde9cffec8d2280
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-14-e20bcd28cfd26259bcde9cffec8d2280
@@ -2,4 +2,4 @@
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 53	val_5	NULL	NULL	NULL	NULL
-49	val_10	NULL	NULL	NULL	NULL
\ No newline at end of file
+49	val_10	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-16-8a8dc418de3716f5112b9868f472e4b9 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-16-8a8dc418de3716f5112b9868f472e4b9
index 9047baeb236e5..fa5860b62f611 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-16-8a8dc418de3716f5112b9868f472e4b9
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-16-8a8dc418de3716f5112b9868f472e4b9
@@ -3,4 +3,4 @@ NULL	NULL	NULL	NULL	49	val_10
 NULL	NULL	NULL	NULL	49	val_17
 NULL	NULL	NULL	NULL	49	val_19
 NULL	NULL	NULL	NULL	50	val_20
-NULL	NULL	NULL	NULL	50	val_23
\ No newline at end of file
+NULL	NULL	NULL	NULL	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-18-1f0dd26e6088c4f359fb691a8ef650bc b/sql/hive/src/test/resources/golden/smb_mapjoin_5-18-1f0dd26e6088c4f359fb691a8ef650bc
index 47626758b4182..00e10b37e2fa1 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-18-1f0dd26e6088c4f359fb691a8ef650bc
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-18-1f0dd26e6088c4f359fb691a8ef650bc
@@ -8,4 +8,4 @@ NULL	NULL	NULL	NULL	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 NULL	NULL	NULL	NULL	52	val_4
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-20-18fe4e120d3c0d663c360b2504b4f88d b/sql/hive/src/test/resources/golden/smb_mapjoin_5-20-18fe4e120d3c0d663c360b2504b4f88d
index 29616d6368661..84486137a37e4 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-20-18fe4e120d3c0d663c360b2504b4f88d
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-20-18fe4e120d3c0d663c360b2504b4f88d
@@ -3,4 +3,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_23
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-22-6ed2620fe017e454459a83061f25951a b/sql/hive/src/test/resources/golden/smb_mapjoin_5-22-6ed2620fe017e454459a83061f25951a
index 6f90cbf839656..e2c6bab43530d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-22-6ed2620fe017e454459a83061f25951a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-22-6ed2620fe017e454459a83061f25951a
@@ -4,4 +4,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
-51	val_3	51	val_30	NULL	NULL
\ No newline at end of file
+51	val_3	51	val_30	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-24-87f758a20c15fa3b97b4ba6703ae541b b/sql/hive/src/test/resources/golden/smb_mapjoin_5-24-87f758a20c15fa3b97b4ba6703ae541b
index 3b195f9dff935..8ea9abf42f678 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-24-87f758a20c15fa3b97b4ba6703ae541b
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-24-87f758a20c15fa3b97b4ba6703ae541b
@@ -7,4 +7,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_23
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-26-1c1a9519e207edc3c3a927c986a37177 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-26-1c1a9519e207edc3c3a927c986a37177
index e18ae75d14a37..f0bda77df9202 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-26-1c1a9519e207edc3c3a927c986a37177
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-26-1c1a9519e207edc3c3a927c986a37177
@@ -8,4 +8,4 @@ NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
-NULL	NULL	NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-28-94440444fff7d2629a23a30fd778fcc7 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-28-94440444fff7d2629a23a30fd778fcc7
index 8e51f0864314a..0c9e2268914bd 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-28-94440444fff7d2629a23a30fd778fcc7
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-28-94440444fff7d2629a23a30fd778fcc7
@@ -3,4 +3,4 @@ NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/index_serde-0-6560d12b69d55e5297a145ebc4bb0cb3 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-3-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-0-6560d12b69d55e5297a145ebc4bb0cb3
rename to sql/hive/src/test/resources/golden/smb_mapjoin_5-3-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-3-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-3-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-30-574d26a5179e5cebbbff5bbb425a9609 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-30-574d26a5179e5cebbbff5bbb425a9609
index 51676ca2c94e4..c9ca4763cbabe 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-30-574d26a5179e5cebbbff5bbb425a9609
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-30-574d26a5179e5cebbbff5bbb425a9609
@@ -8,4 +8,4 @@ NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-32-17c99f827824994cfd21c40dbf4abfc b/sql/hive/src/test/resources/golden/smb_mapjoin_5-32-17c99f827824994cfd21c40dbf4abfc
index 4a125291de271..812d839a1c642 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-32-17c99f827824994cfd21c40dbf4abfc
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-32-17c99f827824994cfd21c40dbf4abfc
@@ -7,4 +7,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
-NULL	NULL	NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-34-4db871582cf4f3038d43d0a2d5ae6895 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-34-4db871582cf4f3038d43d0a2d5ae6895
index 660e9044e889a..c019550d25827 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-34-4db871582cf4f3038d43d0a2d5ae6895
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-34-4db871582cf4f3038d43d0a2d5ae6895
@@ -12,4 +12,4 @@ NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 NULL	NULL	NULL	NULL	52	val_4
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/index_serde-11-309e916d683a1a12ab62565697cb0046 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-4-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-11-309e916d683a1a12ab62565697cb0046
rename to sql/hive/src/test/resources/golden/smb_mapjoin_5-4-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-4-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_5-4-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-5-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-5-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_serde-12-d590fd7cb9d433143de490d75686dd4 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-5-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-12-d590fd7cb9d433143de490d75686dd4
rename to sql/hive/src/test/resources/golden/smb_mapjoin_5-5-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-6-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_5-6-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-6-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-6-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-7-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-7-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-7-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-7-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-8-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_5-8-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-8-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-8-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/smb_mapjoin_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/smb_mapjoin_6-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-10-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_6-10-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-10-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-10-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-11-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_6-11-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-11-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-11-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-14-43123d2821871aa5b4d4a9e22e03d1ca b/sql/hive/src/test/resources/golden/smb_mapjoin_6-14-43123d2821871aa5b4d4a9e22e03d1ca
index c2e3ea8b0c8e2..b212e93a0a8c2 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-14-43123d2821871aa5b4d4a9e22e03d1ca
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-14-43123d2821871aa5b4d4a9e22e03d1ca
@@ -1025,4 +1025,4 @@
 498	val_498	498	val_498
 498	val_498	498	val_498
 498	val_498	498	val_498
-498	val_498	498	val_498
\ No newline at end of file
+498	val_498	498	val_498
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-16-449cb60e6537ba0810ea6879a7351a1e b/sql/hive/src/test/resources/golden/smb_mapjoin_6-16-449cb60e6537ba0810ea6879a7351a1e
index 0e17d179f4167..11bd621866ba8 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-16-449cb60e6537ba0810ea6879a7351a1e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-16-449cb60e6537ba0810ea6879a7351a1e
@@ -1 +1 @@
-278697	278697	101852390308	101852390308
\ No newline at end of file
+278697	278697	101852390308	101852390308
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-17-aa5f237005fb69b3f52808c80b4a276a b/sql/hive/src/test/resources/golden/smb_mapjoin_6-17-aa5f237005fb69b3f52808c80b4a276a
index 0e17d179f4167..11bd621866ba8 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-17-aa5f237005fb69b3f52808c80b4a276a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-17-aa5f237005fb69b3f52808c80b4a276a
@@ -1 +1 @@
-278697	278697	101852390308	101852390308
\ No newline at end of file
+278697	278697	101852390308	101852390308
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_6-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-21-43123d2821871aa5b4d4a9e22e03d1ca b/sql/hive/src/test/resources/golden/smb_mapjoin_6-21-43123d2821871aa5b4d4a9e22e03d1ca
index c2e3ea8b0c8e2..b212e93a0a8c2 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-21-43123d2821871aa5b4d4a9e22e03d1ca
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-21-43123d2821871aa5b4d4a9e22e03d1ca
@@ -1025,4 +1025,4 @@
 498	val_498	498	val_498
 498	val_498	498	val_498
 498	val_498	498	val_498
-498	val_498	498	val_498
\ No newline at end of file
+498	val_498	498	val_498
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-23-449cb60e6537ba0810ea6879a7351a1e b/sql/hive/src/test/resources/golden/smb_mapjoin_6-23-449cb60e6537ba0810ea6879a7351a1e
index 0e17d179f4167..11bd621866ba8 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-23-449cb60e6537ba0810ea6879a7351a1e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-23-449cb60e6537ba0810ea6879a7351a1e
@@ -1 +1 @@
-278697	278697	101852390308	101852390308
\ No newline at end of file
+278697	278697	101852390308	101852390308
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-24-aa5f237005fb69b3f52808c80b4a276a b/sql/hive/src/test/resources/golden/smb_mapjoin_6-24-aa5f237005fb69b3f52808c80b4a276a
index 0e17d179f4167..11bd621866ba8 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-24-aa5f237005fb69b3f52808c80b4a276a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-24-aa5f237005fb69b3f52808c80b4a276a
@@ -1 +1 @@
-278697	278697	101852390308	101852390308
\ No newline at end of file
+278697	278697	101852390308	101852390308
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-9-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_6-9-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-9-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-9-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-11-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_7-11-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-11-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-11-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-12-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-12-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-12-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-12-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-13-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_7-13-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-13-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-13-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-16-18d2a689883588ae3c24b2810663ab80 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-16-18d2a689883588ae3c24b2810663ab80
index 4cd5eefea2a45..dfdc3444cc072 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-16-18d2a689883588ae3c24b2810663ab80
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-16-18d2a689883588ae3c24b2810663ab80
@@ -497,4 +497,4 @@ NULL	NULL	496	val_496
 NULL	NULL	497	val_497
 NULL	NULL	498	val_498
 NULL	NULL	498	val_498
-NULL	NULL	498	val_498
\ No newline at end of file
+NULL	NULL	498	val_498
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-19-ab46164010b27950c293f32fb89f9f8a b/sql/hive/src/test/resources/golden/smb_mapjoin_7-19-ab46164010b27950c293f32fb89f9f8a
index 4cd5eefea2a45..dfdc3444cc072 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-19-ab46164010b27950c293f32fb89f9f8a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-19-ab46164010b27950c293f32fb89f9f8a
@@ -497,4 +497,4 @@ NULL	NULL	496	val_496
 NULL	NULL	497	val_497
 NULL	NULL	498	val_498
 NULL	NULL	498	val_498
-NULL	NULL	498	val_498
\ No newline at end of file
+NULL	NULL	498	val_498
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-21-449cb60e6537ba0810ea6879a7351a1e b/sql/hive/src/test/resources/golden/smb_mapjoin_7-21-449cb60e6537ba0810ea6879a7351a1e
index ca97f45c90026..2e711200bae28 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-21-449cb60e6537ba0810ea6879a7351a1e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-21-449cb60e6537ba0810ea6879a7351a1e
@@ -1 +1 @@
-0	130091	0	36210398070
\ No newline at end of file
+0	130091	0	36210398070
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-22-aa5f237005fb69b3f52808c80b4a276a b/sql/hive/src/test/resources/golden/smb_mapjoin_7-22-aa5f237005fb69b3f52808c80b4a276a
index ca97f45c90026..2e711200bae28 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-22-aa5f237005fb69b3f52808c80b4a276a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-22-aa5f237005fb69b3f52808c80b4a276a
@@ -1 +1 @@
-0	130091	0	36210398070
\ No newline at end of file
+0	130091	0	36210398070
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-23-1b6140f49d49eb1195c8551270b97ff b/sql/hive/src/test/resources/golden/smb_mapjoin_7-23-1b6140f49d49eb1195c8551270b97ff
index ca97f45c90026..2e711200bae28 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-23-1b6140f49d49eb1195c8551270b97ff
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-23-1b6140f49d49eb1195c8551270b97ff
@@ -1 +1 @@
-0	130091	0	36210398070
\ No newline at end of file
+0	130091	0	36210398070
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-8-5f141437e3e929cde1b2e5d8458b46eb b/sql/hive/src/test/resources/golden/smb_mapjoin_7-8-5f141437e3e929cde1b2e5d8458b46eb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_serde-2-fd1b220f4eafb0ba9b519a156e3c87c b/sql/hive/src/test/resources/golden/smb_mapjoin_7-8-f983875c44b290b0884a22b6be6adc8
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-2-fd1b220f4eafb0ba9b519a156e3c87c
rename to sql/hive/src/test/resources/golden/smb_mapjoin_7-8-f983875c44b290b0884a22b6be6adc8
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-9-461966a6ca78552b62bbbae4ff5abf1f b/sql/hive/src/test/resources/golden/smb_mapjoin_7-9-461966a6ca78552b62bbbae4ff5abf1f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_serde-3-afcf2a156ccd4f79a0489b4593908d79 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-9-84a394d962965e38593883742cc32c0d
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-3-afcf2a156ccd4f79a0489b4593908d79
rename to sql/hive/src/test/resources/golden/smb_mapjoin_7-9-84a394d962965e38593883742cc32c0d
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/smb_mapjoin_8-0-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-0-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-0-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-1-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-1-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-1-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-1-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-11-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_8-11-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-11-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-11-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-12-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-12-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-12-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-12-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-13-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_8-13-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-13-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-13-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-2-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-2-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-2-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-3-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-3-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-3-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-3-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-5-a1f5562186e9e22e18ebd42208943525 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-5-a1f5562186e9e22e18ebd42208943525
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_serde-5-e6ff4b23b7f102e359afb4d53a1dedc3 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-5-eee18fc4192a4aa92a066eb66513be93
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-5-e6ff4b23b7f102e359afb4d53a1dedc3
rename to sql/hive/src/test/resources/golden/smb_mapjoin_8-5-eee18fc4192a4aa92a066eb66513be93
diff --git a/sql/hive/src/test/resources/golden/sort-1-10c8b0a592ed15eff488a71ec5909f45 b/sql/hive/src/test/resources/golden/sort-1-10c8b0a592ed15eff488a71ec5909f45
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/sort-1-10c8b0a592ed15eff488a71ec5909f45
+++ b/sql/hive/src/test/resources/golden/sort-1-10c8b0a592ed15eff488a71ec5909f45
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-11-3599c6aa6da9420bdcec89c8516ffe89 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-11-3599c6aa6da9420bdcec89c8516ffe89
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-11-3599c6aa6da9420bdcec89c8516ffe89
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-11-3599c6aa6da9420bdcec89c8516ffe89
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-2-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-2-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-2-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-11-47bc9cb5f049aaca33b394ea78578bdd b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-11-47bc9cb5f049aaca33b394ea78578bdd
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-11-47bc9cb5f049aaca33b394ea78578bdd
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-11-47bc9cb5f049aaca33b394ea78578bdd
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-2-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-2-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-2-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-11-47bc9cb5f049aaca33b394ea78578bdd b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-11-47bc9cb5f049aaca33b394ea78578bdd
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-11-47bc9cb5f049aaca33b394ea78578bdd
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-11-47bc9cb5f049aaca33b394ea78578bdd
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-2-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-2-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-2-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-11-47bc9cb5f049aaca33b394ea78578bdd b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-11-47bc9cb5f049aaca33b394ea78578bdd
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-11-47bc9cb5f049aaca33b394ea78578bdd
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-11-47bc9cb5f049aaca33b394ea78578bdd
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-2-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-2-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-2-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-0-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-0-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-0-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-0-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-1-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-1-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-1-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-1-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-10-cd2f3c647c0821eb53e67d36b1556a4a b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-10-cd2f3c647c0821eb53e67d36b1556a4a
index 67d892c80f493..1f3d8a7a1fc08 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-10-cd2f3c647c0821eb53e67d36b1556a4a
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-10-cd2f3c647c0821eb53e67d36b1556a4a
@@ -1 +1 @@
-1028
\ No newline at end of file
+1028
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-7-c23ea191ee4d60c0a6252ce763b1beed b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-7-c23ea191ee4d60c0a6252ce763b1beed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-7-c23ea191ee4d60c0a6252ce763b1beed
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-7-c23ea191ee4d60c0a6252ce763b1beed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-0-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-0-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-0-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-0-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-1-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-1-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-1-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-1-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-11-cd2f3c647c0821eb53e67d36b1556a4a b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-11-cd2f3c647c0821eb53e67d36b1556a4a
index 67d892c80f493..1f3d8a7a1fc08 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-11-cd2f3c647c0821eb53e67d36b1556a4a
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-11-cd2f3c647c0821eb53e67d36b1556a4a
@@ -1 +1 @@
-1028
\ No newline at end of file
+1028
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-8-c23ea191ee4d60c0a6252ce763b1beed b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-8-c23ea191ee4d60c0a6252ce763b1beed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-8-c23ea191ee4d60c0a6252ce763b1beed
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-8-c23ea191ee4d60c0a6252ce763b1beed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-9-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-9-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-9-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-9-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats0-14-4f18f4b06db06844920b14e2d19471a9 b/sql/hive/src/test/resources/golden/stats0-14-4f18f4b06db06844920b14e2d19471a9
index 688eea009d292..d3a7b34f283b1 100644
--- a/sql/hive/src/test/resources/golden/stats0-14-4f18f4b06db06844920b14e2d19471a9
+++ b/sql/hive/src/test/resources/golden/stats0-14-4f18f4b06db06844920b14e2d19471a9
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[1], dbName:default, tableName:stats_partitioned, createTime:1390903702, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_partitioned/ds=1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390903702, numRows=500, totalSize=5812, rawDataSize=5312})	
+Detailed Partition Information	Partition(values:[1], dbName:default, tableName:stats_partitioned, createTime:1413893941, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_partitioned/ds=1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413893941, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=500, rawDataSize=5312})	
diff --git a/sql/hive/src/test/resources/golden/stats0-15-4a7ed5b7e3deb4106204f8f950357e1c b/sql/hive/src/test/resources/golden/stats0-15-4a7ed5b7e3deb4106204f8f950357e1c
index 4d3a3d8f237b3..965913df89c64 100644
--- a/sql/hive/src/test/resources/golden/stats0-15-4a7ed5b7e3deb4106204f8f950357e1c
+++ b/sql/hive/src/test/resources/golden/stats0-15-4a7ed5b7e3deb4106204f8f950357e1c
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:stats_partitioned, dbName:default, owner:marmbrus, createTime:1390903694, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, p3=v3, transient_lastDdlTime=1390903702, numRows=500, totalSize=5812, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:stats_partitioned, dbName:default, owner:marmbrus, createTime:1413893934, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1413893934}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/stats0-23-247568f4b3ce6b97979ca9d28c2ba05c b/sql/hive/src/test/resources/golden/stats0-23-247568f4b3ce6b97979ca9d28c2ba05c
index a18ab93992cc5..b34342d7815e1 100644
--- a/sql/hive/src/test/resources/golden/stats0-23-247568f4b3ce6b97979ca9d28c2ba05c
+++ b/sql/hive/src/test/resources/golden/stats0-23-247568f4b3ce6b97979ca9d28c2ba05c
@@ -1,4 +1,4 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:stats_non_partitioned, dbName:default, owner:marmbrus, createTime:1390903702, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_non_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numPartitions=0, numFiles=1, p3=v3, transient_lastDdlTime=1390903710, numRows=500, totalSize=5812, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:stats_non_partitioned, dbName:default, owner:marmbrus, createTime:1413893941, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_non_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1413893948, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=500, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/stats0-30-4f18f4b06db06844920b14e2d19471a9 b/sql/hive/src/test/resources/golden/stats0-30-4f18f4b06db06844920b14e2d19471a9
index d861060f13cae..c1e56cd3180cb 100644
--- a/sql/hive/src/test/resources/golden/stats0-30-4f18f4b06db06844920b14e2d19471a9
+++ b/sql/hive/src/test/resources/golden/stats0-30-4f18f4b06db06844920b14e2d19471a9
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[1], dbName:default, tableName:stats_partitioned, createTime:1390903719, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_partitioned/ds=1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390903719, numRows=500, totalSize=5812, rawDataSize=5312})	
+Detailed Partition Information	Partition(values:[1], dbName:default, tableName:stats_partitioned, createTime:1413893955, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_partitioned/ds=1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413893955, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=500, rawDataSize=5312})	
diff --git a/sql/hive/src/test/resources/golden/stats0-31-4a7ed5b7e3deb4106204f8f950357e1c b/sql/hive/src/test/resources/golden/stats0-31-4a7ed5b7e3deb4106204f8f950357e1c
index aa4dfe1e5ba39..25d227999c390 100644
--- a/sql/hive/src/test/resources/golden/stats0-31-4a7ed5b7e3deb4106204f8f950357e1c
+++ b/sql/hive/src/test/resources/golden/stats0-31-4a7ed5b7e3deb4106204f8f950357e1c
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:stats_partitioned, dbName:default, owner:marmbrus, createTime:1390903710, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, p3=v3, transient_lastDdlTime=1390903719, numRows=500, totalSize=5812, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:stats_partitioned, dbName:default, owner:marmbrus, createTime:1413893948, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1413893948}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/stats0-7-247568f4b3ce6b97979ca9d28c2ba05c b/sql/hive/src/test/resources/golden/stats0-7-247568f4b3ce6b97979ca9d28c2ba05c
index 8a5ae7cda1c57..8b503b045f6ec 100644
--- a/sql/hive/src/test/resources/golden/stats0-7-247568f4b3ce6b97979ca9d28c2ba05c
+++ b/sql/hive/src/test/resources/golden/stats0-7-247568f4b3ce6b97979ca9d28c2ba05c
@@ -1,4 +1,4 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:stats_non_partitioned, dbName:default, owner:marmbrus, createTime:1390903686, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_non_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numPartitions=0, numFiles=1, p3=v3, transient_lastDdlTime=1390903694, numRows=500, totalSize=5812, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:stats_non_partitioned, dbName:default, owner:marmbrus, createTime:1413893927, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_non_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1413893934, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=500, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/stats2-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats2-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats2-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats2-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats2-10-56dfd0f30574154dc1c2f55c29e2fa0e b/sql/hive/src/test/resources/golden/stats2-10-56dfd0f30574154dc1c2f55c29e2fa0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats2-11-912c785dbcef3251dd1d6b7fc56eae5b b/sql/hive/src/test/resources/golden/stats2-11-912c785dbcef3251dd1d6b7fc56eae5b
deleted file mode 100644
index f87b67cb2006f..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-11-912c785dbcef3251dd1d6b7fc56eae5b
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:17:02 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_t1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	4                   
-	numPartitions       	4                   
-	numRows             	2000                
-	p3                  	v3                  
-	rawDataSize         	21248               
-	totalSize           	23248               
-	transient_lastDdlTime	1390904237          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats2-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats2-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats2-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats2-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats2-4-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/stats2-4-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-4-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats2-5-6717e1026e24a38af19b6bce1951e3d7 b/sql/hive/src/test/resources/golden/stats2-5-6717e1026e24a38af19b6bce1951e3d7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats2-6-45d4fb785fc94d266096fc29a9e90d73 b/sql/hive/src/test/resources/golden/stats2-6-45d4fb785fc94d266096fc29a9e90d73
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats2-7-6436db7a7506b560d0d1759db94a76b9 b/sql/hive/src/test/resources/golden/stats2-7-6436db7a7506b560d0d1759db94a76b9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats2-8-72621dba638b15d244850018e9f64d7 b/sql/hive/src/test/resources/golden/stats2-8-72621dba638b15d244850018e9f64d7
deleted file mode 100644
index 19a5e1e042ae7..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-8-72621dba638b15d244850018e9f64d7
+++ /dev/null
@@ -1,34 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:17:02 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_t1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	p3                  	v3                  
-	transient_lastDdlTime	1390904222          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats2-9-6d93732dc2ca622eb60c171389caee8e b/sql/hive/src/test/resources/golden/stats2-9-6d93732dc2ca622eb60c171389caee8e
deleted file mode 100644
index 7ddea146475d1..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-9-6d93732dc2ca622eb60c171389caee8e
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_t1) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_t1 
-          TableScan
-            alias: analyze_t1
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats20-0-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats20-0-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-0-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats20-1-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats20-1-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-1-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats20-2-4711e55648c25c86bb526ed50b0c3d09 b/sql/hive/src/test/resources/golden/stats20-2-4711e55648c25c86bb526ed50b0c3d09
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-2-4711e55648c25c86bb526ed50b0c3d09
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats20-3-98c925a2b2c4de06e270e1b52437a98b b/sql/hive/src/test/resources/golden/stats20-3-98c925a2b2c4de06e270e1b52437a98b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats20-4-82294461be4728b4b191414bf2fb3bd7 b/sql/hive/src/test/resources/golden/stats20-4-82294461be4728b4b191414bf2fb3bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats20-5-76509775cfe11bb98ee088188a07668a b/sql/hive/src/test/resources/golden/stats20-5-76509775cfe11bb98ee088188a07668a
deleted file mode 100644
index 5c8878cba5e71..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-5-76509775cfe11bb98ee088188a07668a
+++ /dev/null
@@ -1,37 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 15:57:53 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2617911713640090101/stats_partitioned	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390953481          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats20-6-1f7f76b2a9d50f86de42edf6ba8a3f84 b/sql/hive/src/test/resources/golden/stats20-6-1f7f76b2a9d50f86de42edf6ba8a3f84
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-6-1f7f76b2a9d50f86de42edf6ba8a3f84
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats20-7-82294461be4728b4b191414bf2fb3bd7 b/sql/hive/src/test/resources/golden/stats20-7-82294461be4728b4b191414bf2fb3bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats20-8-300c971de74642118d36d36349bc81aa b/sql/hive/src/test/resources/golden/stats20-8-300c971de74642118d36d36349bc81aa
deleted file mode 100644
index d9ba3814de367..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-8-300c971de74642118d36d36349bc81aa
+++ /dev/null
@@ -1,37 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 15:57:53 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2617911713640090101/stats_partitioned	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	1                   
-	numRows             	500                 
-	rawDataSize         	0                   
-	totalSize           	5812                
-	transient_lastDdlTime	1390953492          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats3-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats3-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats3-1-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats3-1-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-1-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats3-10-e2e5e7a0378c6f0c28391c447ec9cee9 b/sql/hive/src/test/resources/golden/stats3-10-e2e5e7a0378c6f0c28391c447ec9cee9
deleted file mode 100644
index bf2a7f452edc3..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-10-e2e5e7a0378c6f0c28391c447ec9cee9
+++ /dev/null
@@ -1,6 +0,0 @@
-1	test_part	test_Part
-2	test_part	test_Part
-3	test_part	test_Part
-4	test_part	test_Part
-5	test_part	test_Part
-6	test_part	test_Part
diff --git a/sql/hive/src/test/resources/golden/stats3-11-2e8d5cefd9a1b1e8f25b117ca68bfeaa b/sql/hive/src/test/resources/golden/stats3-11-2e8d5cefd9a1b1e8f25b117ca68bfeaa
deleted file mode 100644
index 1e8b314962144..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-11-2e8d5cefd9a1b1e8f25b117ca68bfeaa
+++ /dev/null
@@ -1 +0,0 @@
-6
diff --git a/sql/hive/src/test/resources/golden/stats3-12-892cb7ecc26e84f1c033b95a3ee3edc b/sql/hive/src/test/resources/golden/stats3-12-892cb7ecc26e84f1c033b95a3ee3edc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-13-ca5e3149f2b190d7df923a3e5c1cb07 b/sql/hive/src/test/resources/golden/stats3-13-ca5e3149f2b190d7df923a3e5c1cb07
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-14-2e8d5cefd9a1b1e8f25b117ca68bfeaa b/sql/hive/src/test/resources/golden/stats3-14-2e8d5cefd9a1b1e8f25b117ca68bfeaa
deleted file mode 100644
index 1e8b314962144..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-14-2e8d5cefd9a1b1e8f25b117ca68bfeaa
+++ /dev/null
@@ -1 +0,0 @@
-6
diff --git a/sql/hive/src/test/resources/golden/stats3-15-c6493490f898e72dc7ed1bc2d4721aa4 b/sql/hive/src/test/resources/golden/stats3-15-c6493490f898e72dc7ed1bc2d4721aa4
deleted file mode 100644
index bf2a7f452edc3..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-15-c6493490f898e72dc7ed1bc2d4721aa4
+++ /dev/null
@@ -1,6 +0,0 @@
-1	test_part	test_Part
-2	test_part	test_Part
-3	test_part	test_Part
-4	test_part	test_Part
-5	test_part	test_Part
-6	test_part	test_Part
diff --git a/sql/hive/src/test/resources/golden/stats3-16-ca5e3149f2b190d7df923a3e5c1cb07 b/sql/hive/src/test/resources/golden/stats3-16-ca5e3149f2b190d7df923a3e5c1cb07
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-17-c012b29f0d7720fbc515aa5fe2759ac7 b/sql/hive/src/test/resources/golden/stats3-17-c012b29f0d7720fbc515aa5fe2759ac7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-18-eefbb3ee8b538aec85c609351e52901b b/sql/hive/src/test/resources/golden/stats3-18-eefbb3ee8b538aec85c609351e52901b
deleted file mode 100644
index cb1c9def371df..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-18-eefbb3ee8b538aec85c609351e52901b
+++ /dev/null
@@ -1,38 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-col1                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:17:38 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/hive_test_dst	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	1                   
-	numRows             	6                   
-	p3                  	v3                  
-	rawDataSize         	6                   
-	totalSize           	171                 
-	transient_lastDdlTime	1390904285          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.SequenceFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats3-19-c1bbac06a43b5b00a69356955e4a1dd3 b/sql/hive/src/test/resources/golden/stats3-19-c1bbac06a43b5b00a69356955e4a1dd3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-2-c1bbac06a43b5b00a69356955e4a1dd3 b/sql/hive/src/test/resources/golden/stats3-2-c1bbac06a43b5b00a69356955e4a1dd3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-20-ca048ad81b3df7159822073d206f0790 b/sql/hive/src/test/resources/golden/stats3-20-ca048ad81b3df7159822073d206f0790
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-3-ca048ad81b3df7159822073d206f0790 b/sql/hive/src/test/resources/golden/stats3-3-ca048ad81b3df7159822073d206f0790
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-4-fa705a031ff5d97558f29c2b5b9de282 b/sql/hive/src/test/resources/golden/stats3-4-fa705a031ff5d97558f29c2b5b9de282
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-5-1b2be7f8bd7bacc8f77a24ffdc0830f1 b/sql/hive/src/test/resources/golden/stats3-5-1b2be7f8bd7bacc8f77a24ffdc0830f1
deleted file mode 100644
index 8bc7c14574497..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-5-1b2be7f8bd7bacc8f77a24ffdc0830f1
+++ /dev/null
@@ -1,43 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_LOAD '/Users/marmbrus/workspace/hive/data/files/test.dat' (TOK_TAB (TOK_TABNAME hive_test_src)) local overwrite)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-  Stage-2 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-0
-    Copy
-      source: file:/Users/marmbrus/workspace/hive/data/files/test.dat
-      destination: file:/tmp/hive-marmbrus/hive_2014-01-28_02-17-38_072_1020656193356018580-1/-ext-10000
-
-  Stage: Stage-1
-    Move Operator
-      tables:
-          replace: true
-          source: file:/tmp/hive-marmbrus/hive_2014-01-28_02-17-38_072_1020656193356018580-1/-ext-10000
-          table:
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              properties:
-                bucket_count -1
-                columns col1
-                columns.types string
-                file.inputformat org.apache.hadoop.mapred.TextInputFormat
-                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/hive_test_src
-                name default.hive_test_src
-                p3 v3
-                serialization.ddl struct hive_test_src { string col1}
-                serialization.format 1
-                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                transient_lastDdlTime 1390904258
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.hive_test_src
-          tmp directory: file:/tmp/hive-marmbrus/hive_2014-01-28_02-17-38_072_1020656193356018580-1/-ext-10001
-
-  Stage: Stage-2
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats3-6-4bf1504274319c44d370b58092fe016c b/sql/hive/src/test/resources/golden/stats3-6-4bf1504274319c44d370b58092fe016c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-7-73d7d55d6e5a57aacce8618902904d b/sql/hive/src/test/resources/golden/stats3-7-73d7d55d6e5a57aacce8618902904d
deleted file mode 100644
index 4916ef91b0866..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-7-73d7d55d6e5a57aacce8618902904d
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-col1                	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:17:38 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/hive_test_src	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	0                   
-	numRows             	0                   
-	p3                  	v3                  
-	rawDataSize         	0                   
-	totalSize           	11                  
-	transient_lastDdlTime	1390904258          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats3-8-b0ebbe71c220979b8fd4a36ffa501bf6 b/sql/hive/src/test/resources/golden/stats3-8-b0ebbe71c220979b8fd4a36ffa501bf6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-9-21f4ee91fa1c65e8579e4cbe4777d7a0 b/sql/hive/src/test/resources/golden/stats3-9-21f4ee91fa1c65e8579e4cbe4777d7a0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats4-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats4-1-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats4-1-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-1-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats4-10-a33b2c9d962e4921c98e62387f3989f7 b/sql/hive/src/test/resources/golden/stats4-10-a33b2c9d962e4921c98e62387f3989f7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-12-30bc31441828a053d1a675b225a5d617 b/sql/hive/src/test/resources/golden/stats4-12-30bc31441828a053d1a675b225a5d617
deleted file mode 100644
index 1634adfc4d70d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-12-30bc31441828a053d1a675b225a5d617
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=2008-12-31/hr=11
-ds=2008-12-31/hr=12
diff --git a/sql/hive/src/test/resources/golden/stats4-13-fca9513ea05bfb8b7e0e6f337d184d66 b/sql/hive/src/test/resources/golden/stats4-13-fca9513ea05bfb8b7e0e6f337d184d66
deleted file mode 100644
index d7a8f25b41301..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-13-fca9513ea05bfb8b7e0e6f337d184d66
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/stats4-14-9c82167763a771c175c656786d545798 b/sql/hive/src/test/resources/golden/stats4-14-9c82167763a771c175c656786d545798
deleted file mode 100644
index 653516475da22..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-14-9c82167763a771c175c656786d545798
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-12-31	11
-86	val_86	2008-12-31	11
-311	val_311	2008-12-31	11
-27	val_27	2008-12-31	11
-165	val_165	2008-12-31	11
-409	val_409	2008-12-31	11
-255	val_255	2008-12-31	11
-278	val_278	2008-12-31	11
-98	val_98	2008-12-31	11
-484	val_484	2008-12-31	11
-265	val_265	2008-12-31	11
-193	val_193	2008-12-31	11
-401	val_401	2008-12-31	11
-150	val_150	2008-12-31	11
-273	val_273	2008-12-31	11
-224	val_224	2008-12-31	11
-369	val_369	2008-12-31	11
-66	val_66	2008-12-31	11
-128	val_128	2008-12-31	11
-213	val_213	2008-12-31	11
-146	val_146	2008-12-31	11
-406	val_406	2008-12-31	11
-429	val_429	2008-12-31	11
-374	val_374	2008-12-31	11
-152	val_152	2008-12-31	11
-469	val_469	2008-12-31	11
-145	val_145	2008-12-31	11
-495	val_495	2008-12-31	11
-37	val_37	2008-12-31	11
-327	val_327	2008-12-31	11
-281	val_281	2008-12-31	11
-277	val_277	2008-12-31	11
-209	val_209	2008-12-31	11
-15	val_15	2008-12-31	11
-82	val_82	2008-12-31	11
-403	val_403	2008-12-31	11
-166	val_166	2008-12-31	11
-417	val_417	2008-12-31	11
-430	val_430	2008-12-31	11
-252	val_252	2008-12-31	11
-292	val_292	2008-12-31	11
-219	val_219	2008-12-31	11
-287	val_287	2008-12-31	11
-153	val_153	2008-12-31	11
-193	val_193	2008-12-31	11
-338	val_338	2008-12-31	11
-446	val_446	2008-12-31	11
-459	val_459	2008-12-31	11
-394	val_394	2008-12-31	11
-237	val_237	2008-12-31	11
-482	val_482	2008-12-31	11
-174	val_174	2008-12-31	11
-413	val_413	2008-12-31	11
-494	val_494	2008-12-31	11
-207	val_207	2008-12-31	11
-199	val_199	2008-12-31	11
-466	val_466	2008-12-31	11
-208	val_208	2008-12-31	11
-174	val_174	2008-12-31	11
-399	val_399	2008-12-31	11
-396	val_396	2008-12-31	11
-247	val_247	2008-12-31	11
-417	val_417	2008-12-31	11
-489	val_489	2008-12-31	11
-162	val_162	2008-12-31	11
-377	val_377	2008-12-31	11
-397	val_397	2008-12-31	11
-309	val_309	2008-12-31	11
-365	val_365	2008-12-31	11
-266	val_266	2008-12-31	11
-439	val_439	2008-12-31	11
-342	val_342	2008-12-31	11
-367	val_367	2008-12-31	11
-325	val_325	2008-12-31	11
-167	val_167	2008-12-31	11
-195	val_195	2008-12-31	11
-475	val_475	2008-12-31	11
-17	val_17	2008-12-31	11
-113	val_113	2008-12-31	11
-155	val_155	2008-12-31	11
-203	val_203	2008-12-31	11
-339	val_339	2008-12-31	11
-0	val_0	2008-12-31	11
-455	val_455	2008-12-31	11
-128	val_128	2008-12-31	11
-311	val_311	2008-12-31	11
-316	val_316	2008-12-31	11
-57	val_57	2008-12-31	11
-302	val_302	2008-12-31	11
-205	val_205	2008-12-31	11
-149	val_149	2008-12-31	11
-438	val_438	2008-12-31	11
-345	val_345	2008-12-31	11
-129	val_129	2008-12-31	11
-170	val_170	2008-12-31	11
-20	val_20	2008-12-31	11
-489	val_489	2008-12-31	11
-157	val_157	2008-12-31	11
-378	val_378	2008-12-31	11
-221	val_221	2008-12-31	11
-92	val_92	2008-12-31	11
-111	val_111	2008-12-31	11
-47	val_47	2008-12-31	11
-72	val_72	2008-12-31	11
-4	val_4	2008-12-31	11
-280	val_280	2008-12-31	11
-35	val_35	2008-12-31	11
-427	val_427	2008-12-31	11
-277	val_277	2008-12-31	11
-208	val_208	2008-12-31	11
-356	val_356	2008-12-31	11
-399	val_399	2008-12-31	11
-169	val_169	2008-12-31	11
-382	val_382	2008-12-31	11
-498	val_498	2008-12-31	11
-125	val_125	2008-12-31	11
-386	val_386	2008-12-31	11
-437	val_437	2008-12-31	11
-469	val_469	2008-12-31	11
-192	val_192	2008-12-31	11
-286	val_286	2008-12-31	11
-187	val_187	2008-12-31	11
-176	val_176	2008-12-31	11
-54	val_54	2008-12-31	11
-459	val_459	2008-12-31	11
-51	val_51	2008-12-31	11
-138	val_138	2008-12-31	11
-103	val_103	2008-12-31	11
-239	val_239	2008-12-31	11
-213	val_213	2008-12-31	11
-216	val_216	2008-12-31	11
-430	val_430	2008-12-31	11
-278	val_278	2008-12-31	11
-176	val_176	2008-12-31	11
-289	val_289	2008-12-31	11
-221	val_221	2008-12-31	11
-65	val_65	2008-12-31	11
-318	val_318	2008-12-31	11
-332	val_332	2008-12-31	11
-311	val_311	2008-12-31	11
-275	val_275	2008-12-31	11
-137	val_137	2008-12-31	11
-241	val_241	2008-12-31	11
-83	val_83	2008-12-31	11
-333	val_333	2008-12-31	11
-180	val_180	2008-12-31	11
-284	val_284	2008-12-31	11
-12	val_12	2008-12-31	11
-230	val_230	2008-12-31	11
-181	val_181	2008-12-31	11
-67	val_67	2008-12-31	11
-260	val_260	2008-12-31	11
-404	val_404	2008-12-31	11
-384	val_384	2008-12-31	11
-489	val_489	2008-12-31	11
-353	val_353	2008-12-31	11
-373	val_373	2008-12-31	11
-272	val_272	2008-12-31	11
-138	val_138	2008-12-31	11
-217	val_217	2008-12-31	11
-84	val_84	2008-12-31	11
-348	val_348	2008-12-31	11
-466	val_466	2008-12-31	11
-58	val_58	2008-12-31	11
-8	val_8	2008-12-31	11
-411	val_411	2008-12-31	11
-230	val_230	2008-12-31	11
-208	val_208	2008-12-31	11
-348	val_348	2008-12-31	11
-24	val_24	2008-12-31	11
-463	val_463	2008-12-31	11
-431	val_431	2008-12-31	11
-179	val_179	2008-12-31	11
-172	val_172	2008-12-31	11
-42	val_42	2008-12-31	11
-129	val_129	2008-12-31	11
-158	val_158	2008-12-31	11
-119	val_119	2008-12-31	11
-496	val_496	2008-12-31	11
-0	val_0	2008-12-31	11
-322	val_322	2008-12-31	11
-197	val_197	2008-12-31	11
-468	val_468	2008-12-31	11
-393	val_393	2008-12-31	11
-454	val_454	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-199	val_199	2008-12-31	11
-191	val_191	2008-12-31	11
-418	val_418	2008-12-31	11
-96	val_96	2008-12-31	11
-26	val_26	2008-12-31	11
-165	val_165	2008-12-31	11
-327	val_327	2008-12-31	11
-230	val_230	2008-12-31	11
-205	val_205	2008-12-31	11
-120	val_120	2008-12-31	11
-131	val_131	2008-12-31	11
-51	val_51	2008-12-31	11
-404	val_404	2008-12-31	11
-43	val_43	2008-12-31	11
-436	val_436	2008-12-31	11
-156	val_156	2008-12-31	11
-469	val_469	2008-12-31	11
-468	val_468	2008-12-31	11
-308	val_308	2008-12-31	11
-95	val_95	2008-12-31	11
-196	val_196	2008-12-31	11
-288	val_288	2008-12-31	11
-481	val_481	2008-12-31	11
-457	val_457	2008-12-31	11
-98	val_98	2008-12-31	11
-282	val_282	2008-12-31	11
-197	val_197	2008-12-31	11
-187	val_187	2008-12-31	11
-318	val_318	2008-12-31	11
-318	val_318	2008-12-31	11
-409	val_409	2008-12-31	11
-470	val_470	2008-12-31	11
-137	val_137	2008-12-31	11
-369	val_369	2008-12-31	11
-316	val_316	2008-12-31	11
-169	val_169	2008-12-31	11
-413	val_413	2008-12-31	11
-85	val_85	2008-12-31	11
-77	val_77	2008-12-31	11
-0	val_0	2008-12-31	11
-490	val_490	2008-12-31	11
-87	val_87	2008-12-31	11
-364	val_364	2008-12-31	11
-179	val_179	2008-12-31	11
-118	val_118	2008-12-31	11
-134	val_134	2008-12-31	11
-395	val_395	2008-12-31	11
-282	val_282	2008-12-31	11
-138	val_138	2008-12-31	11
-238	val_238	2008-12-31	11
-419	val_419	2008-12-31	11
-15	val_15	2008-12-31	11
-118	val_118	2008-12-31	11
-72	val_72	2008-12-31	11
-90	val_90	2008-12-31	11
-307	val_307	2008-12-31	11
-19	val_19	2008-12-31	11
-435	val_435	2008-12-31	11
-10	val_10	2008-12-31	11
-277	val_277	2008-12-31	11
-273	val_273	2008-12-31	11
-306	val_306	2008-12-31	11
-224	val_224	2008-12-31	11
-309	val_309	2008-12-31	11
-389	val_389	2008-12-31	11
-327	val_327	2008-12-31	11
-242	val_242	2008-12-31	11
-369	val_369	2008-12-31	11
-392	val_392	2008-12-31	11
-272	val_272	2008-12-31	11
-331	val_331	2008-12-31	11
-401	val_401	2008-12-31	11
-242	val_242	2008-12-31	11
-452	val_452	2008-12-31	11
-177	val_177	2008-12-31	11
-226	val_226	2008-12-31	11
-5	val_5	2008-12-31	11
-497	val_497	2008-12-31	11
-402	val_402	2008-12-31	11
-396	val_396	2008-12-31	11
-317	val_317	2008-12-31	11
-395	val_395	2008-12-31	11
-58	val_58	2008-12-31	11
-35	val_35	2008-12-31	11
-336	val_336	2008-12-31	11
-95	val_95	2008-12-31	11
-11	val_11	2008-12-31	11
-168	val_168	2008-12-31	11
-34	val_34	2008-12-31	11
-229	val_229	2008-12-31	11
-233	val_233	2008-12-31	11
-143	val_143	2008-12-31	11
-472	val_472	2008-12-31	11
-322	val_322	2008-12-31	11
-498	val_498	2008-12-31	11
-160	val_160	2008-12-31	11
-195	val_195	2008-12-31	11
-42	val_42	2008-12-31	11
-321	val_321	2008-12-31	11
-430	val_430	2008-12-31	11
-119	val_119	2008-12-31	11
-489	val_489	2008-12-31	11
-458	val_458	2008-12-31	11
-78	val_78	2008-12-31	11
-76	val_76	2008-12-31	11
-41	val_41	2008-12-31	11
-223	val_223	2008-12-31	11
-492	val_492	2008-12-31	11
-149	val_149	2008-12-31	11
-449	val_449	2008-12-31	11
-218	val_218	2008-12-31	11
-228	val_228	2008-12-31	11
-138	val_138	2008-12-31	11
-453	val_453	2008-12-31	11
-30	val_30	2008-12-31	11
-209	val_209	2008-12-31	11
-64	val_64	2008-12-31	11
-468	val_468	2008-12-31	11
-76	val_76	2008-12-31	11
-74	val_74	2008-12-31	11
-342	val_342	2008-12-31	11
-69	val_69	2008-12-31	11
-230	val_230	2008-12-31	11
-33	val_33	2008-12-31	11
-368	val_368	2008-12-31	11
-103	val_103	2008-12-31	11
-296	val_296	2008-12-31	11
-113	val_113	2008-12-31	11
-216	val_216	2008-12-31	11
-367	val_367	2008-12-31	11
-344	val_344	2008-12-31	11
-167	val_167	2008-12-31	11
-274	val_274	2008-12-31	11
-219	val_219	2008-12-31	11
-239	val_239	2008-12-31	11
-485	val_485	2008-12-31	11
-116	val_116	2008-12-31	11
-223	val_223	2008-12-31	11
-256	val_256	2008-12-31	11
-263	val_263	2008-12-31	11
-70	val_70	2008-12-31	11
-487	val_487	2008-12-31	11
-480	val_480	2008-12-31	11
-401	val_401	2008-12-31	11
-288	val_288	2008-12-31	11
-191	val_191	2008-12-31	11
-5	val_5	2008-12-31	11
-244	val_244	2008-12-31	11
-438	val_438	2008-12-31	11
-128	val_128	2008-12-31	11
-467	val_467	2008-12-31	11
-432	val_432	2008-12-31	11
-202	val_202	2008-12-31	11
-316	val_316	2008-12-31	11
-229	val_229	2008-12-31	11
-469	val_469	2008-12-31	11
-463	val_463	2008-12-31	11
-280	val_280	2008-12-31	11
-2	val_2	2008-12-31	11
-35	val_35	2008-12-31	11
-283	val_283	2008-12-31	11
-331	val_331	2008-12-31	11
-235	val_235	2008-12-31	11
-80	val_80	2008-12-31	11
-44	val_44	2008-12-31	11
-193	val_193	2008-12-31	11
-321	val_321	2008-12-31	11
-335	val_335	2008-12-31	11
-104	val_104	2008-12-31	11
-466	val_466	2008-12-31	11
-366	val_366	2008-12-31	11
-175	val_175	2008-12-31	11
-403	val_403	2008-12-31	11
-483	val_483	2008-12-31	11
-53	val_53	2008-12-31	11
-105	val_105	2008-12-31	11
-257	val_257	2008-12-31	11
-406	val_406	2008-12-31	11
-409	val_409	2008-12-31	11
-190	val_190	2008-12-31	11
-406	val_406	2008-12-31	11
-401	val_401	2008-12-31	11
-114	val_114	2008-12-31	11
-258	val_258	2008-12-31	11
-90	val_90	2008-12-31	11
-203	val_203	2008-12-31	11
-262	val_262	2008-12-31	11
-348	val_348	2008-12-31	11
-424	val_424	2008-12-31	11
-12	val_12	2008-12-31	11
-396	val_396	2008-12-31	11
-201	val_201	2008-12-31	11
-217	val_217	2008-12-31	11
-164	val_164	2008-12-31	11
-431	val_431	2008-12-31	11
-454	val_454	2008-12-31	11
-478	val_478	2008-12-31	11
-298	val_298	2008-12-31	11
-125	val_125	2008-12-31	11
-431	val_431	2008-12-31	11
-164	val_164	2008-12-31	11
-424	val_424	2008-12-31	11
-187	val_187	2008-12-31	11
-382	val_382	2008-12-31	11
-5	val_5	2008-12-31	11
-70	val_70	2008-12-31	11
-397	val_397	2008-12-31	11
-480	val_480	2008-12-31	11
-291	val_291	2008-12-31	11
-24	val_24	2008-12-31	11
-351	val_351	2008-12-31	11
-255	val_255	2008-12-31	11
-104	val_104	2008-12-31	11
-70	val_70	2008-12-31	11
-163	val_163	2008-12-31	11
-438	val_438	2008-12-31	11
-119	val_119	2008-12-31	11
-414	val_414	2008-12-31	11
-200	val_200	2008-12-31	11
-491	val_491	2008-12-31	11
-237	val_237	2008-12-31	11
-439	val_439	2008-12-31	11
-360	val_360	2008-12-31	11
-248	val_248	2008-12-31	11
-479	val_479	2008-12-31	11
-305	val_305	2008-12-31	11
-417	val_417	2008-12-31	11
-199	val_199	2008-12-31	11
-444	val_444	2008-12-31	11
-120	val_120	2008-12-31	11
-429	val_429	2008-12-31	11
-169	val_169	2008-12-31	11
-443	val_443	2008-12-31	11
-323	val_323	2008-12-31	11
-325	val_325	2008-12-31	11
-277	val_277	2008-12-31	11
-230	val_230	2008-12-31	11
-478	val_478	2008-12-31	11
-178	val_178	2008-12-31	11
-468	val_468	2008-12-31	11
-310	val_310	2008-12-31	11
-317	val_317	2008-12-31	11
-333	val_333	2008-12-31	11
-493	val_493	2008-12-31	11
-460	val_460	2008-12-31	11
-207	val_207	2008-12-31	11
-249	val_249	2008-12-31	11
-265	val_265	2008-12-31	11
-480	val_480	2008-12-31	11
-83	val_83	2008-12-31	11
-136	val_136	2008-12-31	11
-353	val_353	2008-12-31	11
-172	val_172	2008-12-31	11
-214	val_214	2008-12-31	11
-462	val_462	2008-12-31	11
-233	val_233	2008-12-31	11
-406	val_406	2008-12-31	11
-133	val_133	2008-12-31	11
-175	val_175	2008-12-31	11
-189	val_189	2008-12-31	11
-454	val_454	2008-12-31	11
-375	val_375	2008-12-31	11
-401	val_401	2008-12-31	11
-421	val_421	2008-12-31	11
-407	val_407	2008-12-31	11
-384	val_384	2008-12-31	11
-256	val_256	2008-12-31	11
-26	val_26	2008-12-31	11
-134	val_134	2008-12-31	11
-67	val_67	2008-12-31	11
-384	val_384	2008-12-31	11
-379	val_379	2008-12-31	11
-18	val_18	2008-12-31	11
-462	val_462	2008-12-31	11
-492	val_492	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-9	val_9	2008-12-31	11
-341	val_341	2008-12-31	11
-498	val_498	2008-12-31	11
-146	val_146	2008-12-31	11
-458	val_458	2008-12-31	11
-362	val_362	2008-12-31	11
-186	val_186	2008-12-31	11
-285	val_285	2008-12-31	11
-348	val_348	2008-12-31	11
-167	val_167	2008-12-31	11
-18	val_18	2008-12-31	11
-273	val_273	2008-12-31	11
-183	val_183	2008-12-31	11
-281	val_281	2008-12-31	11
-344	val_344	2008-12-31	11
-97	val_97	2008-12-31	11
-469	val_469	2008-12-31	11
-315	val_315	2008-12-31	11
-84	val_84	2008-12-31	11
-28	val_28	2008-12-31	11
-37	val_37	2008-12-31	11
-448	val_448	2008-12-31	11
-152	val_152	2008-12-31	11
-348	val_348	2008-12-31	11
-307	val_307	2008-12-31	11
-194	val_194	2008-12-31	11
-414	val_414	2008-12-31	11
-477	val_477	2008-12-31	11
-222	val_222	2008-12-31	11
-126	val_126	2008-12-31	11
-90	val_90	2008-12-31	11
-169	val_169	2008-12-31	11
-403	val_403	2008-12-31	11
-400	val_400	2008-12-31	11
-200	val_200	2008-12-31	11
-97	val_97	2008-12-31	11
-238	val_238	2008-12-31	12
-86	val_86	2008-12-31	12
-311	val_311	2008-12-31	12
-27	val_27	2008-12-31	12
-165	val_165	2008-12-31	12
-409	val_409	2008-12-31	12
-255	val_255	2008-12-31	12
-278	val_278	2008-12-31	12
-98	val_98	2008-12-31	12
-484	val_484	2008-12-31	12
-265	val_265	2008-12-31	12
-193	val_193	2008-12-31	12
-401	val_401	2008-12-31	12
-150	val_150	2008-12-31	12
-273	val_273	2008-12-31	12
-224	val_224	2008-12-31	12
-369	val_369	2008-12-31	12
-66	val_66	2008-12-31	12
-128	val_128	2008-12-31	12
-213	val_213	2008-12-31	12
-146	val_146	2008-12-31	12
-406	val_406	2008-12-31	12
-429	val_429	2008-12-31	12
-374	val_374	2008-12-31	12
-152	val_152	2008-12-31	12
-469	val_469	2008-12-31	12
-145	val_145	2008-12-31	12
-495	val_495	2008-12-31	12
-37	val_37	2008-12-31	12
-327	val_327	2008-12-31	12
-281	val_281	2008-12-31	12
-277	val_277	2008-12-31	12
-209	val_209	2008-12-31	12
-15	val_15	2008-12-31	12
-82	val_82	2008-12-31	12
-403	val_403	2008-12-31	12
-166	val_166	2008-12-31	12
-417	val_417	2008-12-31	12
-430	val_430	2008-12-31	12
-252	val_252	2008-12-31	12
-292	val_292	2008-12-31	12
-219	val_219	2008-12-31	12
-287	val_287	2008-12-31	12
-153	val_153	2008-12-31	12
-193	val_193	2008-12-31	12
-338	val_338	2008-12-31	12
-446	val_446	2008-12-31	12
-459	val_459	2008-12-31	12
-394	val_394	2008-12-31	12
-237	val_237	2008-12-31	12
-482	val_482	2008-12-31	12
-174	val_174	2008-12-31	12
-413	val_413	2008-12-31	12
-494	val_494	2008-12-31	12
-207	val_207	2008-12-31	12
-199	val_199	2008-12-31	12
-466	val_466	2008-12-31	12
-208	val_208	2008-12-31	12
-174	val_174	2008-12-31	12
-399	val_399	2008-12-31	12
-396	val_396	2008-12-31	12
-247	val_247	2008-12-31	12
-417	val_417	2008-12-31	12
-489	val_489	2008-12-31	12
-162	val_162	2008-12-31	12
-377	val_377	2008-12-31	12
-397	val_397	2008-12-31	12
-309	val_309	2008-12-31	12
-365	val_365	2008-12-31	12
-266	val_266	2008-12-31	12
-439	val_439	2008-12-31	12
-342	val_342	2008-12-31	12
-367	val_367	2008-12-31	12
-325	val_325	2008-12-31	12
-167	val_167	2008-12-31	12
-195	val_195	2008-12-31	12
-475	val_475	2008-12-31	12
-17	val_17	2008-12-31	12
-113	val_113	2008-12-31	12
-155	val_155	2008-12-31	12
-203	val_203	2008-12-31	12
-339	val_339	2008-12-31	12
-0	val_0	2008-12-31	12
-455	val_455	2008-12-31	12
-128	val_128	2008-12-31	12
-311	val_311	2008-12-31	12
-316	val_316	2008-12-31	12
-57	val_57	2008-12-31	12
-302	val_302	2008-12-31	12
-205	val_205	2008-12-31	12
-149	val_149	2008-12-31	12
-438	val_438	2008-12-31	12
-345	val_345	2008-12-31	12
-129	val_129	2008-12-31	12
-170	val_170	2008-12-31	12
-20	val_20	2008-12-31	12
-489	val_489	2008-12-31	12
-157	val_157	2008-12-31	12
-378	val_378	2008-12-31	12
-221	val_221	2008-12-31	12
-92	val_92	2008-12-31	12
-111	val_111	2008-12-31	12
-47	val_47	2008-12-31	12
-72	val_72	2008-12-31	12
-4	val_4	2008-12-31	12
-280	val_280	2008-12-31	12
-35	val_35	2008-12-31	12
-427	val_427	2008-12-31	12
-277	val_277	2008-12-31	12
-208	val_208	2008-12-31	12
-356	val_356	2008-12-31	12
-399	val_399	2008-12-31	12
-169	val_169	2008-12-31	12
-382	val_382	2008-12-31	12
-498	val_498	2008-12-31	12
-125	val_125	2008-12-31	12
-386	val_386	2008-12-31	12
-437	val_437	2008-12-31	12
-469	val_469	2008-12-31	12
-192	val_192	2008-12-31	12
-286	val_286	2008-12-31	12
-187	val_187	2008-12-31	12
-176	val_176	2008-12-31	12
-54	val_54	2008-12-31	12
-459	val_459	2008-12-31	12
-51	val_51	2008-12-31	12
-138	val_138	2008-12-31	12
-103	val_103	2008-12-31	12
-239	val_239	2008-12-31	12
-213	val_213	2008-12-31	12
-216	val_216	2008-12-31	12
-430	val_430	2008-12-31	12
-278	val_278	2008-12-31	12
-176	val_176	2008-12-31	12
-289	val_289	2008-12-31	12
-221	val_221	2008-12-31	12
-65	val_65	2008-12-31	12
-318	val_318	2008-12-31	12
-332	val_332	2008-12-31	12
-311	val_311	2008-12-31	12
-275	val_275	2008-12-31	12
-137	val_137	2008-12-31	12
-241	val_241	2008-12-31	12
-83	val_83	2008-12-31	12
-333	val_333	2008-12-31	12
-180	val_180	2008-12-31	12
-284	val_284	2008-12-31	12
-12	val_12	2008-12-31	12
-230	val_230	2008-12-31	12
-181	val_181	2008-12-31	12
-67	val_67	2008-12-31	12
-260	val_260	2008-12-31	12
-404	val_404	2008-12-31	12
-384	val_384	2008-12-31	12
-489	val_489	2008-12-31	12
-353	val_353	2008-12-31	12
-373	val_373	2008-12-31	12
-272	val_272	2008-12-31	12
-138	val_138	2008-12-31	12
-217	val_217	2008-12-31	12
-84	val_84	2008-12-31	12
-348	val_348	2008-12-31	12
-466	val_466	2008-12-31	12
-58	val_58	2008-12-31	12
-8	val_8	2008-12-31	12
-411	val_411	2008-12-31	12
-230	val_230	2008-12-31	12
-208	val_208	2008-12-31	12
-348	val_348	2008-12-31	12
-24	val_24	2008-12-31	12
-463	val_463	2008-12-31	12
-431	val_431	2008-12-31	12
-179	val_179	2008-12-31	12
-172	val_172	2008-12-31	12
-42	val_42	2008-12-31	12
-129	val_129	2008-12-31	12
-158	val_158	2008-12-31	12
-119	val_119	2008-12-31	12
-496	val_496	2008-12-31	12
-0	val_0	2008-12-31	12
-322	val_322	2008-12-31	12
-197	val_197	2008-12-31	12
-468	val_468	2008-12-31	12
-393	val_393	2008-12-31	12
-454	val_454	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-199	val_199	2008-12-31	12
-191	val_191	2008-12-31	12
-418	val_418	2008-12-31	12
-96	val_96	2008-12-31	12
-26	val_26	2008-12-31	12
-165	val_165	2008-12-31	12
-327	val_327	2008-12-31	12
-230	val_230	2008-12-31	12
-205	val_205	2008-12-31	12
-120	val_120	2008-12-31	12
-131	val_131	2008-12-31	12
-51	val_51	2008-12-31	12
-404	val_404	2008-12-31	12
-43	val_43	2008-12-31	12
-436	val_436	2008-12-31	12
-156	val_156	2008-12-31	12
-469	val_469	2008-12-31	12
-468	val_468	2008-12-31	12
-308	val_308	2008-12-31	12
-95	val_95	2008-12-31	12
-196	val_196	2008-12-31	12
-288	val_288	2008-12-31	12
-481	val_481	2008-12-31	12
-457	val_457	2008-12-31	12
-98	val_98	2008-12-31	12
-282	val_282	2008-12-31	12
-197	val_197	2008-12-31	12
-187	val_187	2008-12-31	12
-318	val_318	2008-12-31	12
-318	val_318	2008-12-31	12
-409	val_409	2008-12-31	12
-470	val_470	2008-12-31	12
-137	val_137	2008-12-31	12
-369	val_369	2008-12-31	12
-316	val_316	2008-12-31	12
-169	val_169	2008-12-31	12
-413	val_413	2008-12-31	12
-85	val_85	2008-12-31	12
-77	val_77	2008-12-31	12
-0	val_0	2008-12-31	12
-490	val_490	2008-12-31	12
-87	val_87	2008-12-31	12
-364	val_364	2008-12-31	12
-179	val_179	2008-12-31	12
-118	val_118	2008-12-31	12
-134	val_134	2008-12-31	12
-395	val_395	2008-12-31	12
-282	val_282	2008-12-31	12
-138	val_138	2008-12-31	12
-238	val_238	2008-12-31	12
-419	val_419	2008-12-31	12
-15	val_15	2008-12-31	12
-118	val_118	2008-12-31	12
-72	val_72	2008-12-31	12
-90	val_90	2008-12-31	12
-307	val_307	2008-12-31	12
-19	val_19	2008-12-31	12
-435	val_435	2008-12-31	12
-10	val_10	2008-12-31	12
-277	val_277	2008-12-31	12
-273	val_273	2008-12-31	12
-306	val_306	2008-12-31	12
-224	val_224	2008-12-31	12
-309	val_309	2008-12-31	12
-389	val_389	2008-12-31	12
-327	val_327	2008-12-31	12
-242	val_242	2008-12-31	12
-369	val_369	2008-12-31	12
-392	val_392	2008-12-31	12
-272	val_272	2008-12-31	12
-331	val_331	2008-12-31	12
-401	val_401	2008-12-31	12
-242	val_242	2008-12-31	12
-452	val_452	2008-12-31	12
-177	val_177	2008-12-31	12
-226	val_226	2008-12-31	12
-5	val_5	2008-12-31	12
-497	val_497	2008-12-31	12
-402	val_402	2008-12-31	12
-396	val_396	2008-12-31	12
-317	val_317	2008-12-31	12
-395	val_395	2008-12-31	12
-58	val_58	2008-12-31	12
-35	val_35	2008-12-31	12
-336	val_336	2008-12-31	12
-95	val_95	2008-12-31	12
-11	val_11	2008-12-31	12
-168	val_168	2008-12-31	12
-34	val_34	2008-12-31	12
-229	val_229	2008-12-31	12
-233	val_233	2008-12-31	12
-143	val_143	2008-12-31	12
-472	val_472	2008-12-31	12
-322	val_322	2008-12-31	12
-498	val_498	2008-12-31	12
-160	val_160	2008-12-31	12
-195	val_195	2008-12-31	12
-42	val_42	2008-12-31	12
-321	val_321	2008-12-31	12
-430	val_430	2008-12-31	12
-119	val_119	2008-12-31	12
-489	val_489	2008-12-31	12
-458	val_458	2008-12-31	12
-78	val_78	2008-12-31	12
-76	val_76	2008-12-31	12
-41	val_41	2008-12-31	12
-223	val_223	2008-12-31	12
-492	val_492	2008-12-31	12
-149	val_149	2008-12-31	12
-449	val_449	2008-12-31	12
-218	val_218	2008-12-31	12
-228	val_228	2008-12-31	12
-138	val_138	2008-12-31	12
-453	val_453	2008-12-31	12
-30	val_30	2008-12-31	12
-209	val_209	2008-12-31	12
-64	val_64	2008-12-31	12
-468	val_468	2008-12-31	12
-76	val_76	2008-12-31	12
-74	val_74	2008-12-31	12
-342	val_342	2008-12-31	12
-69	val_69	2008-12-31	12
-230	val_230	2008-12-31	12
-33	val_33	2008-12-31	12
-368	val_368	2008-12-31	12
-103	val_103	2008-12-31	12
-296	val_296	2008-12-31	12
-113	val_113	2008-12-31	12
-216	val_216	2008-12-31	12
-367	val_367	2008-12-31	12
-344	val_344	2008-12-31	12
-167	val_167	2008-12-31	12
-274	val_274	2008-12-31	12
-219	val_219	2008-12-31	12
-239	val_239	2008-12-31	12
-485	val_485	2008-12-31	12
-116	val_116	2008-12-31	12
-223	val_223	2008-12-31	12
-256	val_256	2008-12-31	12
-263	val_263	2008-12-31	12
-70	val_70	2008-12-31	12
-487	val_487	2008-12-31	12
-480	val_480	2008-12-31	12
-401	val_401	2008-12-31	12
-288	val_288	2008-12-31	12
-191	val_191	2008-12-31	12
-5	val_5	2008-12-31	12
-244	val_244	2008-12-31	12
-438	val_438	2008-12-31	12
-128	val_128	2008-12-31	12
-467	val_467	2008-12-31	12
-432	val_432	2008-12-31	12
-202	val_202	2008-12-31	12
-316	val_316	2008-12-31	12
-229	val_229	2008-12-31	12
-469	val_469	2008-12-31	12
-463	val_463	2008-12-31	12
-280	val_280	2008-12-31	12
-2	val_2	2008-12-31	12
-35	val_35	2008-12-31	12
-283	val_283	2008-12-31	12
-331	val_331	2008-12-31	12
-235	val_235	2008-12-31	12
-80	val_80	2008-12-31	12
-44	val_44	2008-12-31	12
-193	val_193	2008-12-31	12
-321	val_321	2008-12-31	12
-335	val_335	2008-12-31	12
-104	val_104	2008-12-31	12
-466	val_466	2008-12-31	12
-366	val_366	2008-12-31	12
-175	val_175	2008-12-31	12
-403	val_403	2008-12-31	12
-483	val_483	2008-12-31	12
-53	val_53	2008-12-31	12
-105	val_105	2008-12-31	12
-257	val_257	2008-12-31	12
-406	val_406	2008-12-31	12
-409	val_409	2008-12-31	12
-190	val_190	2008-12-31	12
-406	val_406	2008-12-31	12
-401	val_401	2008-12-31	12
-114	val_114	2008-12-31	12
-258	val_258	2008-12-31	12
-90	val_90	2008-12-31	12
-203	val_203	2008-12-31	12
-262	val_262	2008-12-31	12
-348	val_348	2008-12-31	12
-424	val_424	2008-12-31	12
-12	val_12	2008-12-31	12
-396	val_396	2008-12-31	12
-201	val_201	2008-12-31	12
-217	val_217	2008-12-31	12
-164	val_164	2008-12-31	12
-431	val_431	2008-12-31	12
-454	val_454	2008-12-31	12
-478	val_478	2008-12-31	12
-298	val_298	2008-12-31	12
-125	val_125	2008-12-31	12
-431	val_431	2008-12-31	12
-164	val_164	2008-12-31	12
-424	val_424	2008-12-31	12
-187	val_187	2008-12-31	12
-382	val_382	2008-12-31	12
-5	val_5	2008-12-31	12
-70	val_70	2008-12-31	12
-397	val_397	2008-12-31	12
-480	val_480	2008-12-31	12
-291	val_291	2008-12-31	12
-24	val_24	2008-12-31	12
-351	val_351	2008-12-31	12
-255	val_255	2008-12-31	12
-104	val_104	2008-12-31	12
-70	val_70	2008-12-31	12
-163	val_163	2008-12-31	12
-438	val_438	2008-12-31	12
-119	val_119	2008-12-31	12
-414	val_414	2008-12-31	12
-200	val_200	2008-12-31	12
-491	val_491	2008-12-31	12
-237	val_237	2008-12-31	12
-439	val_439	2008-12-31	12
-360	val_360	2008-12-31	12
-248	val_248	2008-12-31	12
-479	val_479	2008-12-31	12
-305	val_305	2008-12-31	12
-417	val_417	2008-12-31	12
-199	val_199	2008-12-31	12
-444	val_444	2008-12-31	12
-120	val_120	2008-12-31	12
-429	val_429	2008-12-31	12
-169	val_169	2008-12-31	12
-443	val_443	2008-12-31	12
-323	val_323	2008-12-31	12
-325	val_325	2008-12-31	12
-277	val_277	2008-12-31	12
-230	val_230	2008-12-31	12
-478	val_478	2008-12-31	12
-178	val_178	2008-12-31	12
-468	val_468	2008-12-31	12
-310	val_310	2008-12-31	12
-317	val_317	2008-12-31	12
-333	val_333	2008-12-31	12
-493	val_493	2008-12-31	12
-460	val_460	2008-12-31	12
-207	val_207	2008-12-31	12
-249	val_249	2008-12-31	12
-265	val_265	2008-12-31	12
-480	val_480	2008-12-31	12
-83	val_83	2008-12-31	12
-136	val_136	2008-12-31	12
-353	val_353	2008-12-31	12
-172	val_172	2008-12-31	12
-214	val_214	2008-12-31	12
-462	val_462	2008-12-31	12
-233	val_233	2008-12-31	12
-406	val_406	2008-12-31	12
-133	val_133	2008-12-31	12
-175	val_175	2008-12-31	12
-189	val_189	2008-12-31	12
-454	val_454	2008-12-31	12
-375	val_375	2008-12-31	12
-401	val_401	2008-12-31	12
-421	val_421	2008-12-31	12
-407	val_407	2008-12-31	12
-384	val_384	2008-12-31	12
-256	val_256	2008-12-31	12
-26	val_26	2008-12-31	12
-134	val_134	2008-12-31	12
-67	val_67	2008-12-31	12
-384	val_384	2008-12-31	12
-379	val_379	2008-12-31	12
-18	val_18	2008-12-31	12
-462	val_462	2008-12-31	12
-492	val_492	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-9	val_9	2008-12-31	12
-341	val_341	2008-12-31	12
-498	val_498	2008-12-31	12
-146	val_146	2008-12-31	12
-458	val_458	2008-12-31	12
-362	val_362	2008-12-31	12
-186	val_186	2008-12-31	12
-285	val_285	2008-12-31	12
-348	val_348	2008-12-31	12
-167	val_167	2008-12-31	12
-18	val_18	2008-12-31	12
-273	val_273	2008-12-31	12
-183	val_183	2008-12-31	12
-281	val_281	2008-12-31	12
-344	val_344	2008-12-31	12
-97	val_97	2008-12-31	12
-469	val_469	2008-12-31	12
-315	val_315	2008-12-31	12
-84	val_84	2008-12-31	12
-28	val_28	2008-12-31	12
-37	val_37	2008-12-31	12
-448	val_448	2008-12-31	12
-152	val_152	2008-12-31	12
-348	val_348	2008-12-31	12
-307	val_307	2008-12-31	12
-194	val_194	2008-12-31	12
-414	val_414	2008-12-31	12
-477	val_477	2008-12-31	12
-222	val_222	2008-12-31	12
-126	val_126	2008-12-31	12
-90	val_90	2008-12-31	12
-169	val_169	2008-12-31	12
-403	val_403	2008-12-31	12
-400	val_400	2008-12-31	12
-200	val_200	2008-12-31	12
-97	val_97	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/stats4-15-f02b95f20b526fcf2850b07ca6be4f8c b/sql/hive/src/test/resources/golden/stats4-15-f02b95f20b526fcf2850b07ca6be4f8c
deleted file mode 100644
index 2ac44ee23f45c..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-15-f02b95f20b526fcf2850b07ca6be4f8c
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	nzhang_part1        	 
-CreateTime:         	Tue Jan 28 02:18:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part1/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-16-f2bdfe1d8be2ff6f784fcf1c892e1b70 b/sql/hive/src/test/resources/golden/stats4-16-f2bdfe1d8be2ff6f784fcf1c892e1b70
deleted file mode 100644
index f0b07ca89fcb3..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-16-f2bdfe1d8be2ff6f784fcf1c892e1b70
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	nzhang_part1        	 
-CreateTime:         	Tue Jan 28 02:18:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part1/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-17-746b888d14c1b3fa28aa8549c174f6d9 b/sql/hive/src/test/resources/golden/stats4-17-746b888d14c1b3fa28aa8549c174f6d9
deleted file mode 100644
index f94614124bcf6..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-17-746b888d14c1b3fa28aa8549c174f6d9
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-12-31, 11]    	 
-Database:           	default             	 
-Table:              	nzhang_part2        	 
-CreateTime:         	Tue Jan 28 02:18:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part2/ds=2008-12-31/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-18-dbe13731de4ab2a3c23343b78525b2f7 b/sql/hive/src/test/resources/golden/stats4-18-dbe13731de4ab2a3c23343b78525b2f7
deleted file mode 100644
index f00fb5b3bd491..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-18-dbe13731de4ab2a3c23343b78525b2f7
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-12-31, 12]    	 
-Database:           	default             	 
-Table:              	nzhang_part2        	 
-CreateTime:         	Tue Jan 28 02:18:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part2/ds=2008-12-31/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-19-3d752e1f8748ba98bebb2cef3f473cd5 b/sql/hive/src/test/resources/golden/stats4-19-3d752e1f8748ba98bebb2cef3f473cd5
deleted file mode 100644
index 7335c8d32fa68..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-19-3d752e1f8748ba98bebb2cef3f473cd5
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:18:17 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	2                   
-	numPartitions       	2                   
-	numRows             	1000                
-	p3                  	v3                  
-	rawDataSize         	10624               
-	totalSize           	11624               
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-20-f63000f2c395b935199c9829964f98c1 b/sql/hive/src/test/resources/golden/stats4-20-f63000f2c395b935199c9829964f98c1
deleted file mode 100644
index 674cd9b0b64e5..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-20-f63000f2c395b935199c9829964f98c1
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:18:17 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part2	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	2                   
-	numPartitions       	2                   
-	numRows             	1000                
-	p3                  	v3                  
-	rawDataSize         	10624               
-	totalSize           	11624               
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-21-c2faa5beb457f3e6da58d4d91b5d1cbd b/sql/hive/src/test/resources/golden/stats4-21-c2faa5beb457f3e6da58d4d91b5d1cbd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-22-f709d5c1d4c9f13b7a521867674ac54c b/sql/hive/src/test/resources/golden/stats4-22-f709d5c1d4c9f13b7a521867674ac54c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-3-c2faa5beb457f3e6da58d4d91b5d1cbd b/sql/hive/src/test/resources/golden/stats4-3-c2faa5beb457f3e6da58d4d91b5d1cbd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-4-f709d5c1d4c9f13b7a521867674ac54c b/sql/hive/src/test/resources/golden/stats4-4-f709d5c1d4c9f13b7a521867674ac54c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-5-cd849c4fe1229428da98947e3e43b46d b/sql/hive/src/test/resources/golden/stats4-5-cd849c4fe1229428da98947e3e43b46d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-6-9c0d4354b6a9be351fa32a66ff58a177 b/sql/hive/src/test/resources/golden/stats4-6-9c0d4354b6a9be351fa32a66ff58a177
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-7-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats4-7-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-7-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats4-8-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats4-8-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-8-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats4-9-255ad4511130fb8c9ee9d65b7c95743f b/sql/hive/src/test/resources/golden/stats4-9-255ad4511130fb8c9ee9d65b7c95743f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats5-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats5-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats5-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats5-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats5-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats5-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats5-2-6f5d06d6100de19ec398891cb2eae161 b/sql/hive/src/test/resources/golden/stats5-2-6f5d06d6100de19ec398891cb2eae161
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats5-3-96d9aa9c32a081518604959dcfac42df b/sql/hive/src/test/resources/golden/stats5-3-96d9aa9c32a081518604959dcfac42df
deleted file mode 100644
index c9a75eb6639d4..0000000000000
--- a/sql/hive/src/test/resources/golden/stats5-3-96d9aa9c32a081518604959dcfac42df
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_src)))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_src 
-          TableScan
-            alias: analyze_src
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats5-4-dbf81a12f6c19c14dce831e942870744 b/sql/hive/src/test/resources/golden/stats5-4-dbf81a12f6c19c14dce831e942870744
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats5-5-3980dfc2e4f882dd3bf478e56929361 b/sql/hive/src/test/resources/golden/stats5-5-3980dfc2e4f882dd3bf478e56929361
deleted file mode 100644
index e622a0d3704a1..0000000000000
--- a/sql/hive/src/test/resources/golden/stats5-5-3980dfc2e4f882dd3bf478e56929361
+++ /dev/null
@@ -1,33 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:18:36 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_src	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	0                   
-	numRows             	500                 
-	p3                  	v3                  
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904324          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats6-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats6-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats6-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats6-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats6-10-653f748fa2d690502ba4fda407841a20 b/sql/hive/src/test/resources/golden/stats6-10-653f748fa2d690502ba4fda407841a20
deleted file mode 100644
index bf589ab894312..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-10-653f748fa2d690502ba4fda407841a20
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:18:52 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1390904332          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats6-11-1c9f833953403596ad50fd32e513642c b/sql/hive/src/test/resources/golden/stats6-11-1c9f833953403596ad50fd32e513642c
deleted file mode 100644
index e6170a33d7de7..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-11-1c9f833953403596ad50fd32e513642c
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:18:52 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=12	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1390904332          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats6-12-fdbe263d925f28d598a71b7a4c577492 b/sql/hive/src/test/resources/golden/stats6-12-fdbe263d925f28d598a71b7a4c577492
deleted file mode 100644
index 45f6b344b04ca..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-12-fdbe263d925f28d598a71b7a4c577492
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:18:45 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	2                   
-	numPartitions       	2                   
-	numRows             	1000                
-	p3                  	v3                  
-	rawDataSize         	10624               
-	totalSize           	11624               
-	transient_lastDdlTime	1390904348          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats6-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats6-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats6-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats6-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats6-4-a88c476a632cd92f748967fadb242405 b/sql/hive/src/test/resources/golden/stats6-4-a88c476a632cd92f748967fadb242405
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats6-5-66590e5ed0bd8cd22ae01ecb658c11dc b/sql/hive/src/test/resources/golden/stats6-5-66590e5ed0bd8cd22ae01ecb658c11dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats6-6-8926095434b70c83bf88c70559d38dce b/sql/hive/src/test/resources/golden/stats6-6-8926095434b70c83bf88c70559d38dce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats6-7-6615049191cfa4026a3a870c3c3749f4 b/sql/hive/src/test/resources/golden/stats6-7-6615049191cfa4026a3a870c3c3749f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats6-8-e15e242124e61ede9196130cb3fb69e7 b/sql/hive/src/test/resources/golden/stats6-8-e15e242124e61ede9196130cb3fb69e7
deleted file mode 100644
index 9ff40db416cb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-8-e15e242124e61ede9196130cb3fb69e7
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:18:52 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904340          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats6-9-e6b884de17a29eb476fd6cc502fc615d b/sql/hive/src/test/resources/golden/stats6-9-e6b884de17a29eb476fd6cc502fc615d
deleted file mode 100644
index 5c13e8793cc76..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-9-e6b884de17a29eb476fd6cc502fc615d
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:18:52 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904348          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats7-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats7-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats7-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-10-fdbe263d925f28d598a71b7a4c577492 b/sql/hive/src/test/resources/golden/stats7-10-fdbe263d925f28d598a71b7a4c577492
deleted file mode 100644
index bafa551db56e9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-10-fdbe263d925f28d598a71b7a4c577492
+++ /dev/null
@@ -1,38 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 14:57:42 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	2                   
-	numPartitions       	2                   
-	numRows             	1000                
-	rawDataSize         	10624               
-	totalSize           	11624               
-	transient_lastDdlTime	1389740280          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats7-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats7-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-4-a88c476a632cd92f748967fadb242405 b/sql/hive/src/test/resources/golden/stats7-4-a88c476a632cd92f748967fadb242405
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats7-5-66590e5ed0bd8cd22ae01ecb658c11dc b/sql/hive/src/test/resources/golden/stats7-5-66590e5ed0bd8cd22ae01ecb658c11dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats7-6-5d0c6aa78c9786d904728ff7adab85f2 b/sql/hive/src/test/resources/golden/stats7-6-5d0c6aa78c9786d904728ff7adab85f2
deleted file mode 100644
index d5341825cff70..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-6-5d0c6aa78c9786d904728ff7adab85f2
+++ /dev/null
@@ -1,18 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
diff --git a/sql/hive/src/test/resources/golden/stats7-7-4912a8bbc138ab97ac0983bc90951de4 b/sql/hive/src/test/resources/golden/stats7-7-4912a8bbc138ab97ac0983bc90951de4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats7-8-e15e242124e61ede9196130cb3fb69e7 b/sql/hive/src/test/resources/golden/stats7-8-e15e242124e61ede9196130cb3fb69e7
deleted file mode 100644
index 7085e9397e59b..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-8-e15e242124e61ede9196130cb3fb69e7
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:57:50 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1389740280          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-9-e6b884de17a29eb476fd6cc502fc615d b/sql/hive/src/test/resources/golden/stats7-9-e6b884de17a29eb476fd6cc502fc615d
deleted file mode 100644
index a3eb8c6466032..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-9-e6b884de17a29eb476fd6cc502fc615d
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:57:50 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1389740280          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats8-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats8-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats8-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats8-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats8-10-ce78d029b7764edce3a26336cfac6a8 b/sql/hive/src/test/resources/golden/stats8-10-ce78d029b7764edce3a26336cfac6a8
deleted file mode 100644
index d1e0c6243244d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-10-ce78d029b7764edce3a26336cfac6a8
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 12))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats8-11-6615049191cfa4026a3a870c3c3749f4 b/sql/hive/src/test/resources/golden/stats8-11-6615049191cfa4026a3a870c3c3749f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-12-e6b884de17a29eb476fd6cc502fc615d b/sql/hive/src/test/resources/golden/stats8-12-e6b884de17a29eb476fd6cc502fc615d
deleted file mode 100644
index 27ff9a687f5c6..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-12-e6b884de17a29eb476fd6cc502fc615d
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904374          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-13-c728cdab72daf6b0fedcf0a42f0dd07d b/sql/hive/src/test/resources/golden/stats8-13-c728cdab72daf6b0fedcf0a42f0dd07d
deleted file mode 100644
index ce340cced8844..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-13-c728cdab72daf6b0fedcf0a42f0dd07d
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-09') (TOK_PARTVAL hr 11))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats8-14-ea9afc1343991ed4d410231803a174f7 b/sql/hive/src/test/resources/golden/stats8-14-ea9afc1343991ed4d410231803a174f7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-15-653f748fa2d690502ba4fda407841a20 b/sql/hive/src/test/resources/golden/stats8-15-653f748fa2d690502ba4fda407841a20
deleted file mode 100644
index 649828a63174b..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-15-653f748fa2d690502ba4fda407841a20
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904382          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-16-5179c0a116edc8dc9f7913fd3097bdd6 b/sql/hive/src/test/resources/golden/stats8-16-5179c0a116edc8dc9f7913fd3097bdd6
deleted file mode 100644
index e4c8bf3e0005f..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-16-5179c0a116edc8dc9f7913fd3097bdd6
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-09') (TOK_PARTVAL hr 12))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats8-17-9d896286dccb22d6e1b6abd5c55eaaa4 b/sql/hive/src/test/resources/golden/stats8-17-9d896286dccb22d6e1b6abd5c55eaaa4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-18-1c9f833953403596ad50fd32e513642c b/sql/hive/src/test/resources/golden/stats8-18-1c9f833953403596ad50fd32e513642c
deleted file mode 100644
index 74aeceec37398..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-18-1c9f833953403596ad50fd32e513642c
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904390          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-19-dbf72430cff3df0e6e9405ad64531b16 b/sql/hive/src/test/resources/golden/stats8-19-dbf72430cff3df0e6e9405ad64531b16
deleted file mode 100644
index 25d9c86f74634..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-19-dbf72430cff3df0e6e9405ad64531b16
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats8-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats8-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats8-20-bff592750d1057448d2cff7694b6dad2 b/sql/hive/src/test/resources/golden/stats8-20-bff592750d1057448d2cff7694b6dad2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-21-e15e242124e61ede9196130cb3fb69e7 b/sql/hive/src/test/resources/golden/stats8-21-e15e242124e61ede9196130cb3fb69e7
deleted file mode 100644
index 5939c257f2f67..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-21-e15e242124e61ede9196130cb3fb69e7
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-22-e6b884de17a29eb476fd6cc502fc615d b/sql/hive/src/test/resources/golden/stats8-22-e6b884de17a29eb476fd6cc502fc615d
deleted file mode 100644
index 8dc22de8e0ee6..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-22-e6b884de17a29eb476fd6cc502fc615d
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-23-653f748fa2d690502ba4fda407841a20 b/sql/hive/src/test/resources/golden/stats8-23-653f748fa2d690502ba4fda407841a20
deleted file mode 100644
index a19f38cc6a364..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-23-653f748fa2d690502ba4fda407841a20
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-24-1c9f833953403596ad50fd32e513642c b/sql/hive/src/test/resources/golden/stats8-24-1c9f833953403596ad50fd32e513642c
deleted file mode 100644
index aead24acd518c..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-24-1c9f833953403596ad50fd32e513642c
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-25-fdbe263d925f28d598a71b7a4c577492 b/sql/hive/src/test/resources/golden/stats8-25-fdbe263d925f28d598a71b7a4c577492
deleted file mode 100644
index 5f8bf1774f144..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-25-fdbe263d925f28d598a71b7a4c577492
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:19:10 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	4                   
-	numPartitions       	4                   
-	numRows             	2000                
-	p3                  	v3                  
-	rawDataSize         	21248               
-	totalSize           	23248               
-	transient_lastDdlTime	1390904398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats8-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats8-4-a88c476a632cd92f748967fadb242405 b/sql/hive/src/test/resources/golden/stats8-4-a88c476a632cd92f748967fadb242405
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-5-66590e5ed0bd8cd22ae01ecb658c11dc b/sql/hive/src/test/resources/golden/stats8-5-66590e5ed0bd8cd22ae01ecb658c11dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-6-821e1f061960081b9b379d9bfb47f267 b/sql/hive/src/test/resources/golden/stats8-6-821e1f061960081b9b379d9bfb47f267
deleted file mode 100644
index d30acbf86a295..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-6-821e1f061960081b9b379d9bfb47f267
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats8-7-8926095434b70c83bf88c70559d38dce b/sql/hive/src/test/resources/golden/stats8-7-8926095434b70c83bf88c70559d38dce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-8-e15e242124e61ede9196130cb3fb69e7 b/sql/hive/src/test/resources/golden/stats8-8-e15e242124e61ede9196130cb3fb69e7
deleted file mode 100644
index 07a61fc1a8bd1..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-8-e15e242124e61ede9196130cb3fb69e7
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904366          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-9-fdbe263d925f28d598a71b7a4c577492 b/sql/hive/src/test/resources/golden/stats8-9-fdbe263d925f28d598a71b7a4c577492
deleted file mode 100644
index bd16e76d28d83..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-9-fdbe263d925f28d598a71b7a4c577492
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:19:10 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	1                   
-	numRows             	500                 
-	p3                  	v3                  
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904366          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats9-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats9-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats9-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats9-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats9-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats9-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-0-6d1832d28f897d0049de053617bd36 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-0-6d1832d28f897d0049de053617bd36
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_serde-9-c85e061ea9c5b90ca69b7450faad14b6 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-0-6f3df708fa339df236ec9375d2ad37fa
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-9-c85e061ea9c5b90ca69b7450faad14b6
rename to sql/hive/src/test/resources/golden/stats_aggregator_error_1-0-6f3df708fa339df236ec9375d2ad37fa
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-1-36eee5cbac5c0c3228e499805b32f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-1-ffe97dc8c1df3195982e38263fbe8717
rename to sql/hive/src/test/resources/golden/stats_aggregator_error_1-1-36eee5cbac5c0c3228e499805b32f6
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-1-887fe99770f53e7e0a0fbdc190118612 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-1-887fe99770f53e7e0a0fbdc190118612
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-1-887fe99770f53e7e0a0fbdc190118612
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-10-a31221a0c377c14e11b14484ddaa49a6 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-10-a31221a0c377c14e11b14484ddaa49a6
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-10-a31221a0c377c14e11b14484ddaa49a6
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-10-a31221a0c377c14e11b14484ddaa49a6
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-11-d58626190cded8d09f0457739a980eb b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-11-d58626190cded8d09f0457739a980eb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-11-d58626190cded8d09f0457739a980eb
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-11-d58626190cded8d09f0457739a980eb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-13-a31221a0c377c14e11b14484ddaa49a6 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-13-a31221a0c377c14e11b14484ddaa49a6
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-13-a31221a0c377c14e11b14484ddaa49a6
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-13-a31221a0c377c14e11b14484ddaa49a6
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-14-60a231b64a4a0e414d0ddce1c813c614 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-14-60a231b64a4a0e414d0ddce1c813c614
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-14-60a231b64a4a0e414d0ddce1c813c614
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-14-60a231b64a4a0e414d0ddce1c813c614
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-16-a31221a0c377c14e11b14484ddaa49a6 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-16-a31221a0c377c14e11b14484ddaa49a6
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-16-a31221a0c377c14e11b14484ddaa49a6
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-16-a31221a0c377c14e11b14484ddaa49a6
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-2-2ca079278e0de95eecb5df315ce05c6 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-2-2ca079278e0de95eecb5df315ce05c6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-2-2ca079278e0de95eecb5df315ce05c6
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-2-2ca079278e0de95eecb5df315ce05c6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-3-66e3e0c942759f679c270698b49bfcf1 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-3-66e3e0c942759f679c270698b49bfcf1
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-3-66e3e0c942759f679c270698b49bfcf1
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-3-66e3e0c942759f679c270698b49bfcf1
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-4-d389db66cc7fd9b144445e364dac30e3 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-4-d389db66cc7fd9b144445e364dac30e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-4-d389db66cc7fd9b144445e364dac30e3
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-4-d389db66cc7fd9b144445e364dac30e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-5-63abf47101c5097e66a9c3ee92b128e3 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-5-63abf47101c5097e66a9c3ee92b128e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-5-63abf47101c5097e66a9c3ee92b128e3
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-5-63abf47101c5097e66a9c3ee92b128e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-7-a31221a0c377c14e11b14484ddaa49a6 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-7-a31221a0c377c14e11b14484ddaa49a6
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-7-a31221a0c377c14e11b14484ddaa49a6
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-7-a31221a0c377c14e11b14484ddaa49a6
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-8-11f77597cc11fc71e95c0d0d7502c5c3 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-8-11f77597cc11fc71e95c0d0d7502c5c3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-8-11f77597cc11fc71e95c0d0d7502c5c3
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-8-11f77597cc11fc71e95c0d0d7502c5c3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-0-9505721cd28fc4ec94ef4baf07029027 b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-0-9505721cd28fc4ec94ef4baf07029027
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-1-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-1-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-1-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-2-c14f09f88961dbad4d800317079a9105 b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-2-c14f09f88961dbad4d800317079a9105
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-2-c14f09f88961dbad4d800317079a9105
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-4-cc664530711607c530a2cd384e67a600 b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-4-cc664530711607c530a2cd384e67a600
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-5-76d56e06108f0c6da75aa821452fa873 b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-5-76d56e06108f0c6da75aa821452fa873
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_empty_partition-1-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats_empty_partition-1-418ec894d08c33fd712eb358f579b7a0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_empty_partition-1-418ec894d08c33fd712eb358f579b7a0
+++ b/sql/hive/src/test/resources/golden/stats_empty_partition-1-418ec894d08c33fd712eb358f579b7a0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_empty_partition-2-c14f09f88961dbad4d800317079a9105 b/sql/hive/src/test/resources/golden/stats_empty_partition-2-c14f09f88961dbad4d800317079a9105
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_empty_partition-2-c14f09f88961dbad4d800317079a9105
+++ b/sql/hive/src/test/resources/golden/stats_empty_partition-2-c14f09f88961dbad4d800317079a9105
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_empty_partition-4-aed016ae4b528521874a719a5b129a55 b/sql/hive/src/test/resources/golden/stats_empty_partition-4-aed016ae4b528521874a719a5b129a55
index 4475cb837eaa4..e01e7187693ed 100644
--- a/sql/hive/src/test/resources/golden/stats_empty_partition-4-aed016ae4b528521874a719a5b129a55
+++ b/sql/hive/src/test/resources/golden/stats_empty_partition-4-aed016ae4b528521874a719a5b129a55
@@ -1,27 +1,28 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-part                	string              	None                
+part                	string              	                    
 	 	 
 # Detailed Partition Information	 	 
 Partition Value:    	[1]                 	 
 Database:           	default             	 
 Table:              	tmptable            	 
-CreateTime:         	Sun Jan 05 00:32:00 PST 2014	 
+CreateTime:         	Tue Oct 21 05:20:33 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2020775722466758355/tmptable/part=1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmptable/part=1	 
 Partition Parameters:	 	 
+	COLUMN_STATS_ACCURATE	true                
 	numFiles            	1                   
 	numRows             	0                   
 	rawDataSize         	0                   
 	totalSize           	0                   
-	transient_lastDdlTime	1388910720          
+	transient_lastDdlTime	1413894033          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -32,4 +33,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats_noscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats_noscan_1-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-10-e6b884de17a29eb476fd6cc502fc615d b/sql/hive/src/test/resources/golden/stats_noscan_1-10-e6b884de17a29eb476fd6cc502fc615d
deleted file mode 100644
index 5949a76d84a11..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-10-e6b884de17a29eb476fd6cc502fc615d
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:58:14 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
-	totalSize           	5812                
-	transient_lastDdlTime	1389740295          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-11-653f748fa2d690502ba4fda407841a20 b/sql/hive/src/test/resources/golden/stats_noscan_1-11-653f748fa2d690502ba4fda407841a20
deleted file mode 100644
index 97f5929fd816a..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-11-653f748fa2d690502ba4fda407841a20
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:58:14 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389740294          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-12-1c9f833953403596ad50fd32e513642c b/sql/hive/src/test/resources/golden/stats_noscan_1-12-1c9f833953403596ad50fd32e513642c
deleted file mode 100644
index 6d08ff47abc2c..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-12-1c9f833953403596ad50fd32e513642c
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:58:14 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-09/hr=12	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389740294          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-13-fdbe263d925f28d598a71b7a4c577492 b/sql/hive/src/test/resources/golden/stats_noscan_1-13-fdbe263d925f28d598a71b7a4c577492
deleted file mode 100644
index f441c8b483868..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-13-fdbe263d925f28d598a71b7a4c577492
+++ /dev/null
@@ -1,38 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 14:58:04 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	2                   
-	numPartitions       	2                   
-	numRows             	0                   
-	rawDataSize         	0                   
-	totalSize           	11624               
-	transient_lastDdlTime	1389740295          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-14-c51fe56935caed4f7ca6f7d9cd6a17ab b/sql/hive/src/test/resources/golden/stats_noscan_1-14-c51fe56935caed4f7ca6f7d9cd6a17ab
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-15-db563e338e4f658e5072cc60aef15480 b/sql/hive/src/test/resources/golden/stats_noscan_1-15-db563e338e4f658e5072cc60aef15480
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-16-e446db2c3ddce173d0a51bf77a489382 b/sql/hive/src/test/resources/golden/stats_noscan_1-16-e446db2c3ddce173d0a51bf77a489382
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-17-82369b182db851e06bfddb62965e03a3 b/sql/hive/src/test/resources/golden/stats_noscan_1-17-82369b182db851e06bfddb62965e03a3
deleted file mode 100644
index f263eed117b10..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-17-82369b182db851e06bfddb62965e03a3
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart_partial) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08'))) noscan)
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-1
-    Stats-Aggr Operator
-
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-18-eece38f413a0a4f6f5b63cea0751d225 b/sql/hive/src/test/resources/golden/stats_noscan_1-18-eece38f413a0a4f6f5b63cea0751d225
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-19-1e9c23f4b90d83f4e8c5c7f08365928e b/sql/hive/src/test/resources/golden/stats_noscan_1-19-1e9c23f4b90d83f4e8c5c7f08365928e
deleted file mode 100644
index 6a115f7e6e61a..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-19-1e9c23f4b90d83f4e8c5c7f08365928e
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial	 
-CreateTime:         	Tue Jan 14 14:58:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart_partial/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
-	totalSize           	5812                
-	transient_lastDdlTime	1389740307          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats_noscan_1-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-20-9871c619bb5bf0a28f8d60e6332a614f b/sql/hive/src/test/resources/golden/stats_noscan_1-20-9871c619bb5bf0a28f8d60e6332a614f
deleted file mode 100644
index 1d96413b915da..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-20-9871c619bb5bf0a28f8d60e6332a614f
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial	 
-CreateTime:         	Tue Jan 14 14:58:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart_partial/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
-	totalSize           	5812                
-	transient_lastDdlTime	1389740307          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-21-35a7cb6076ef7bd253ea9c1f5d009709 b/sql/hive/src/test/resources/golden/stats_noscan_1-21-35a7cb6076ef7bd253ea9c1f5d009709
deleted file mode 100644
index 4c4380e63b7f9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-21-35a7cb6076ef7bd253ea9c1f5d009709
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial	 
-CreateTime:         	Tue Jan 14 14:58:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart_partial/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389740307          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-22-521b49d223a32056025fb8dbd371a72a b/sql/hive/src/test/resources/golden/stats_noscan_1-22-521b49d223a32056025fb8dbd371a72a
deleted file mode 100644
index ba90d8d3d2477..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-22-521b49d223a32056025fb8dbd371a72a
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial	 
-CreateTime:         	Tue Jan 14 14:58:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart_partial/ds=2008-04-09/hr=12	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389740307          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-23-fba14d8647e0c8ca214b724486af7763 b/sql/hive/src/test/resources/golden/stats_noscan_1-23-fba14d8647e0c8ca214b724486af7763
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats_noscan_1-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-4-7938a68104e163566da69ccc70a18f2c b/sql/hive/src/test/resources/golden/stats_noscan_1-4-7938a68104e163566da69ccc70a18f2c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-5-66590e5ed0bd8cd22ae01ecb658c11dc b/sql/hive/src/test/resources/golden/stats_noscan_1-5-66590e5ed0bd8cd22ae01ecb658c11dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-6-a1fd405e5175757aaa80033162c87670 b/sql/hive/src/test/resources/golden/stats_noscan_1-6-a1fd405e5175757aaa80033162c87670
deleted file mode 100644
index b671e68cc4c3b..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-6-a1fd405e5175757aaa80033162c87670
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11))) noscan)
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-1
-    Stats-Aggr Operator
-
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-7-497861ae04753ffbb63ee43d89eedd9e b/sql/hive/src/test/resources/golden/stats_noscan_1-7-497861ae04753ffbb63ee43d89eedd9e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-8-623f3701ead20fff786c203d23dd60ae b/sql/hive/src/test/resources/golden/stats_noscan_1-8-623f3701ead20fff786c203d23dd60ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-9-a9ee4584e32fa7b934a1e6ab5403de44 b/sql/hive/src/test/resources/golden/stats_noscan_1-9-a9ee4584e32fa7b934a1e6ab5403de44
deleted file mode 100644
index 96499c0fb0f7e..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-9-a9ee4584e32fa7b934a1e6ab5403de44
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:58:14 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
-	totalSize           	5812                
-	transient_lastDdlTime	1389740294          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats_partscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats_partscan_1-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-10-c06f6ce9878b7eededf8c2a085ffb380 b/sql/hive/src/test/resources/golden/stats_partscan_1-10-c06f6ce9878b7eededf8c2a085ffb380
deleted file mode 100644
index b5fc469438c83..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-10-c06f6ce9878b7eededf8c2a085ffb380
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 14 14:36:56 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2847673635801001933/analyze_srcpart_partial_scan/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389739016          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-11-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats_partscan_1-11-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-11-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-12-3e423642a5a00dc66cc709f474a3ecef b/sql/hive/src/test/resources/golden/stats_partscan_1-12-3e423642a5a00dc66cc709f474a3ecef
deleted file mode 100644
index cc58ef9026786..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-12-3e423642a5a00dc66cc709f474a3ecef
+++ /dev/null
@@ -1,14 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart_partial_scan) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11))) partialscan)
-
-STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
-
-STAGE PLANS:
-  Stage: Stage-2
-    Partial Scan Statistics
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-13-2fe3131322b6c82e217f27e95581e681 b/sql/hive/src/test/resources/golden/stats_partscan_1-13-2fe3131322b6c82e217f27e95581e681
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-14-2c66f128acea649e8debc3c4b04fcb9c b/sql/hive/src/test/resources/golden/stats_partscan_1-14-2c66f128acea649e8debc3c4b04fcb9c
deleted file mode 100644
index 3243fe9fb497d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-14-2c66f128acea649e8debc3c4b04fcb9c
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 14 14:36:56 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2847673635801001933/analyze_srcpart_partial_scan/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	4812                
-	totalSize           	7456                
-	transient_lastDdlTime	1389739019          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-15-c05c5dc6a34b2a55526a43f2c900ad40 b/sql/hive/src/test/resources/golden/stats_partscan_1-15-c05c5dc6a34b2a55526a43f2c900ad40
deleted file mode 100644
index 6accd64d06d69..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-15-c05c5dc6a34b2a55526a43f2c900ad40
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 14 14:36:56 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2847673635801001933/analyze_srcpart_partial_scan/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389739016          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-16-f93e6c408fcb4570fda5b09e4c7b1040 b/sql/hive/src/test/resources/golden/stats_partscan_1-16-f93e6c408fcb4570fda5b09e4c7b1040
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats_partscan_1-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats_partscan_1-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-4-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/stats_partscan_1-4-c95dc367df88c9e5cf77157f29ba2daf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-4-c95dc367df88c9e5cf77157f29ba2daf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-5-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/stats_partscan_1-5-6e53a3ac93113f20db3a12f1dcf30e86
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-5-6e53a3ac93113f20db3a12f1dcf30e86
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-6-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/stats_partscan_1-6-84967075baa3e56fff2a23f8ab9ba076
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-6-84967075baa3e56fff2a23f8ab9ba076
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-7-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/stats_partscan_1-7-2ee5d706fe3a3bcc38b795f6e94970ea
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-7-2ee5d706fe3a3bcc38b795f6e94970ea
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-8-29279aa84d6ba9dea9e56b493504dd30 b/sql/hive/src/test/resources/golden/stats_partscan_1-8-29279aa84d6ba9dea9e56b493504dd30
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-9-90d41ae72606f9616cb7b1303f997348 b/sql/hive/src/test/resources/golden/stats_partscan_1-9-90d41ae72606f9616cb7b1303f997348
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats_partscan_1_23-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-10-c06f6ce9878b7eededf8c2a085ffb380 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-10-c06f6ce9878b7eededf8c2a085ffb380
deleted file mode 100644
index 942b7cfe07235..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-10-c06f6ce9878b7eededf8c2a085ffb380
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 28 02:20:21 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart_partial_scan/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1390904421          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-11-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-11-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-11-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-12-3e423642a5a00dc66cc709f474a3ecef b/sql/hive/src/test/resources/golden/stats_partscan_1_23-12-3e423642a5a00dc66cc709f474a3ecef
deleted file mode 100644
index 9c704a6ef4126..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-12-3e423642a5a00dc66cc709f474a3ecef
+++ /dev/null
@@ -1,15 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart_partial_scan) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11))) partialscan)
-
-STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
-
-STAGE PLANS:
-  Stage: Stage-2
-    Partial Scan Statistics
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-13-2fe3131322b6c82e217f27e95581e681 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-13-2fe3131322b6c82e217f27e95581e681
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-14-2c66f128acea649e8debc3c4b04fcb9c b/sql/hive/src/test/resources/golden/stats_partscan_1_23-14-2c66f128acea649e8debc3c4b04fcb9c
deleted file mode 100644
index feee75f095d0d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-14-2c66f128acea649e8debc3c4b04fcb9c
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 28 02:20:21 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart_partial_scan/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	4812                
-	totalSize           	7456                
-	transient_lastDdlTime	1390904425          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-15-c05c5dc6a34b2a55526a43f2c900ad40 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-15-c05c5dc6a34b2a55526a43f2c900ad40
deleted file mode 100644
index f39d366764c95..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-15-c05c5dc6a34b2a55526a43f2c900ad40
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 28 02:20:21 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart_partial_scan/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1390904421          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-16-f93e6c408fcb4570fda5b09e4c7b1040 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-16-f93e6c408fcb4570fda5b09e4c7b1040
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats_partscan_1_23-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-4-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/stats_partscan_1_23-4-c95dc367df88c9e5cf77157f29ba2daf
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-4-c95dc367df88c9e5cf77157f29ba2daf
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-5-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-5-6e53a3ac93113f20db3a12f1dcf30e86
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-5-6e53a3ac93113f20db3a12f1dcf30e86
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-6-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-6-84967075baa3e56fff2a23f8ab9ba076
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-6-84967075baa3e56fff2a23f8ab9ba076
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-7-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/stats_partscan_1_23-7-2ee5d706fe3a3bcc38b795f6e94970ea
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-7-2ee5d706fe3a3bcc38b795f6e94970ea
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-8-b158e24051ecb66b8af46743917771ca b/sql/hive/src/test/resources/golden/stats_partscan_1_23-8-b158e24051ecb66b8af46743917771ca
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-9-90d41ae72606f9616cb7b1303f997348 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-9-90d41ae72606f9616cb7b1303f997348
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-18-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/stats_publisher_error_1-1-36eee5cbac5c0c3228e499805b32f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-18-67e864faaff4c6b2a8e1c9fbd188bb66
rename to sql/hive/src/test/resources/golden/stats_publisher_error_1-1-36eee5cbac5c0c3228e499805b32f6
diff --git a/sql/hive/src/test/resources/golden/stats_publisher_error_1-1-887fe99770f53e7e0a0fbdc190118612 b/sql/hive/src/test/resources/golden/stats_publisher_error_1-1-887fe99770f53e7e0a0fbdc190118612
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_publisher_error_1-1-887fe99770f53e7e0a0fbdc190118612
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/str_to_map-0-aefd618b58ad6c37956755b6572cbc73 b/sql/hive/src/test/resources/golden/str_to_map-0-aefd618b58ad6c37956755b6572cbc73
deleted file mode 100644
index 9bed96d855476..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-0-aefd618b58ad6c37956755b6572cbc73
+++ /dev/null
@@ -1 +0,0 @@
-str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text 
diff --git a/sql/hive/src/test/resources/golden/str_to_map-1-c66590cd4ac0a6f9c2bd88db7efcb16c b/sql/hive/src/test/resources/golden/str_to_map-1-c66590cd4ac0a6f9c2bd88db7efcb16c
deleted file mode 100644
index 1da11fbfa31d5..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-1-c66590cd4ac0a6f9c2bd88db7efcb16c
+++ /dev/null
@@ -1,2 +0,0 @@
-str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text 
-Split text into key-value pairs using two delimiters. The first delimiter seperates pairs, and the second delimiter sperates key and value. If only one parameter is given, default delimiters are used: ',' as delimiter1 and '=' as delimiter2.
diff --git a/sql/hive/src/test/resources/golden/str_to_map-10-32997010bba305ec40812df254490730 b/sql/hive/src/test/resources/golden/str_to_map-10-32997010bba305ec40812df254490730
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-11-d99f1b631bc42a6a57c581025230537a b/sql/hive/src/test/resources/golden/str_to_map-11-d99f1b631bc42a6a57c581025230537a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-12-f793eb7b36a1d6379b90e241df62c72e b/sql/hive/src/test/resources/golden/str_to_map-12-f793eb7b36a1d6379b90e241df62c72e
deleted file mode 100644
index cd4d4a384163b..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-12-f793eb7b36a1d6379b90e241df62c72e
+++ /dev/null
@@ -1,3 +0,0 @@
-444
-444
-444
diff --git a/sql/hive/src/test/resources/golden/str_to_map-13-32997010bba305ec40812df254490730 b/sql/hive/src/test/resources/golden/str_to_map-13-32997010bba305ec40812df254490730
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-2-28d8e6677d025e1266ce95ae23dfd9ec b/sql/hive/src/test/resources/golden/str_to_map-2-28d8e6677d025e1266ce95ae23dfd9ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-3-8b76ce17aa29e2eb37a4b953e9c80e66 b/sql/hive/src/test/resources/golden/str_to_map-3-8b76ce17aa29e2eb37a4b953e9c80e66
deleted file mode 100644
index e8183f05f5db6..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-3-8b76ce17aa29e2eb37a4b953e9c80e66
+++ /dev/null
@@ -1,3 +0,0 @@
-1
-1
-1
diff --git a/sql/hive/src/test/resources/golden/str_to_map-4-f356516aec917fe0d34df4dc8d9d0f95 b/sql/hive/src/test/resources/golden/str_to_map-4-f356516aec917fe0d34df4dc8d9d0f95
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-5-269cfcefe2ea6c19ac6c824d3259bbda b/sql/hive/src/test/resources/golden/str_to_map-5-269cfcefe2ea6c19ac6c824d3259bbda
deleted file mode 100644
index 62813f9d10491..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-5-269cfcefe2ea6c19ac6c824d3259bbda
+++ /dev/null
@@ -1,3 +0,0 @@
-{"b":"2","a":"1","c":"3"}
-{"b":"2","a":"1","c":"3"}
-{"b":"2","a":"1","c":"3"}
diff --git a/sql/hive/src/test/resources/golden/str_to_map-6-53a3d5f99c0fbe17179cb01387a7ccaf b/sql/hive/src/test/resources/golden/str_to_map-6-53a3d5f99c0fbe17179cb01387a7ccaf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-7-5641678c53ce6ef1dbce3994843cfcad b/sql/hive/src/test/resources/golden/str_to_map-7-5641678c53ce6ef1dbce3994843cfcad
deleted file mode 100644
index 62813f9d10491..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-7-5641678c53ce6ef1dbce3994843cfcad
+++ /dev/null
@@ -1,3 +0,0 @@
-{"b":"2","a":"1","c":"3"}
-{"b":"2","a":"1","c":"3"}
-{"b":"2","a":"1","c":"3"}
diff --git a/sql/hive/src/test/resources/golden/str_to_map-8-84121d964faad3547f0e5cce9d268612 b/sql/hive/src/test/resources/golden/str_to_map-8-84121d964faad3547f0e5cce9d268612
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-9-903f3b3137bfa3f4594fec7b604c62bd b/sql/hive/src/test/resources/golden/str_to_map-9-903f3b3137bfa3f4594fec7b604c62bd
deleted file mode 100644
index e8183f05f5db6..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-9-903f3b3137bfa3f4594fec7b604c62bd
+++ /dev/null
@@ -1,3 +0,0 @@
-1
-1
-1
diff --git a/sql/hive/src/test/resources/golden/string literal-0-ff43f1a7d06479b78622c8bb10e9f9a1 b/sql/hive/src/test/resources/golden/string literal-0-ff43f1a7d06479b78622c8bb10e9f9a1
index 8938b37682023..cfb0572d8663d 100644
--- a/sql/hive/src/test/resources/golden/string literal-0-ff43f1a7d06479b78622c8bb10e9f9a1	
+++ b/sql/hive/src/test/resources/golden/string literal-0-ff43f1a7d06479b78622c8bb10e9f9a1	
@@ -497,4 +497,4 @@ test
 test
 test
 test
-test
\ No newline at end of file
+test
diff --git a/sql/hive/src/test/resources/golden/subq2-1-235919a7ddb574662158503b8052e7ca b/sql/hive/src/test/resources/golden/subq2-1-235919a7ddb574662158503b8052e7ca
index 5b3f76a5f7797..0bc48337e2dc0 100644
--- a/sql/hive/src/test/resources/golden/subq2-1-235919a7ddb574662158503b8052e7ca
+++ b/sql/hive/src/test/resources/golden/subq2-1-235919a7ddb574662158503b8052e7ca
@@ -255,4 +255,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/subquery-alias.attr-0-fc8183d758151be72b3d75d9df124504 b/sql/hive/src/test/resources/golden/subquery-alias.attr-0-fc8183d758151be72b3d75d9df124504
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/subquery-alias.attr-0-fc8183d758151be72b3d75d9df124504
+++ b/sql/hive/src/test/resources/golden/subquery-alias.attr-0-fc8183d758151be72b3d75d9df124504
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-0-3b2fa9592648fc07c0d43e0d7d7f9411 b/sql/hive/src/test/resources/golden/symlink_text_input_format-0-3b2fa9592648fc07c0d43e0d7d7f9411
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-1-d498fb503b8f47db4741fdad3d266b4a b/sql/hive/src/test/resources/golden/symlink_text_input_format-1-d498fb503b8f47db4741fdad3d266b4a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-2-42119039bf8023f90b7f474f235c5dd5 b/sql/hive/src/test/resources/golden/symlink_text_input_format-2-42119039bf8023f90b7f474f235c5dd5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/symlink_text_input_format-2-42119039bf8023f90b7f474f235c5dd5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-3-77b57147024eb6b28cc9f525fdaab615 b/sql/hive/src/test/resources/golden/symlink_text_input_format-3-77b57147024eb6b28cc9f525fdaab615
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/symlink_text_input_format-3-77b57147024eb6b28cc9f525fdaab615
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-4-676cb274a770a6b9ca86df5dc7f912d4 b/sql/hive/src/test/resources/golden/symlink_text_input_format-4-676cb274a770a6b9ca86df5dc7f912d4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-5-ef3052815ec41b5957627698ba06707b b/sql/hive/src/test/resources/golden/symlink_text_input_format-5-ef3052815ec41b5957627698ba06707b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-6-891be0baec05e358a647dcca77724446 b/sql/hive/src/test/resources/golden/symlink_text_input_format-6-891be0baec05e358a647dcca77724446
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-7-208bcc9c918cbeb52907c8871be19cd5 b/sql/hive/src/test/resources/golden/symlink_text_input_format-7-208bcc9c918cbeb52907c8871be19cd5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-8-fb2e7127e07ad9f7e97ad3df3eba3a35 b/sql/hive/src/test/resources/golden/symlink_text_input_format-8-fb2e7127e07ad9f7e97ad3df3eba3a35
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-9-433d5dbbcf75ff68d6897f31baa46841 b/sql/hive/src/test/resources/golden/symlink_text_input_format-9-433d5dbbcf75ff68d6897f31baa46841
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/symlink_text_input_format-9-433d5dbbcf75ff68d6897f31baa46841
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/table.attr-0-26c9d24eb6305ea2106c26bdca38012e b/sql/hive/src/test/resources/golden/table.attr-0-26c9d24eb6305ea2106c26bdca38012e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/table.attr-0-26c9d24eb6305ea2106c26bdca38012e
+++ b/sql/hive/src/test/resources/golden/table.attr-0-26c9d24eb6305ea2106c26bdca38012e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/tablename_with_select-1-736d6a05e1fe3249a84eb58f7cd806d2 b/sql/hive/src/test/resources/golden/tablename_with_select-1-736d6a05e1fe3249a84eb58f7cd806d2
index 92cca3b6f1e24..95e2ae6a0fd50 100644
--- a/sql/hive/src/test/resources/golden/tablename_with_select-1-736d6a05e1fe3249a84eb58f7cd806d2
+++ b/sql/hive/src/test/resources/golden/tablename_with_select-1-736d6a05e1fe3249a84eb58f7cd806d2
@@ -1,2 +1,2 @@
-a                   	int                 	None                
-b                   	string              	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/tablename_with_select-3-35c08c648a66f09f2cf7cfa6019c2113 b/sql/hive/src/test/resources/golden/tablename_with_select-3-35c08c648a66f09f2cf7cfa6019c2113
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/tablename_with_select-3-35c08c648a66f09f2cf7cfa6019c2113
+++ b/sql/hive/src/test/resources/golden/tablename_with_select-3-35c08c648a66f09f2cf7cfa6019c2113
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/test_boolean_whereclause-0-b38bf01368da26ec9c60e9433a9c59a1 b/sql/hive/src/test/resources/golden/test_boolean_whereclause-0-b38bf01368da26ec9c60e9433a9c59a1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/test_boolean_whereclause-1-3e38e42c5b72364c5461c626f312be8c b/sql/hive/src/test/resources/golden/test_boolean_whereclause-1-3e38e42c5b72364c5461c626f312be8c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2 b/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2
new file mode 100644
index 0000000000000..5625e59da8873
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2	
@@ -0,0 +1 @@
+1.2
diff --git a/sql/hive/src/test/resources/golden/rcfile_bigdata-2-3688b45adbdb190d58799c0b6d601055 b/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_bigdata-2-3688b45adbdb190d58799c0b6d601055
rename to sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2 b/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2
new file mode 100644
index 0000000000000..5625e59da8873
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2	
@@ -0,0 +1 @@
+1.2
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247 b/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247
new file mode 100644
index 0000000000000..27de46fdf22ac
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247	
@@ -0,0 +1 @@
+-0.0010000000000000009
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79 b/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79
new file mode 100644
index 0000000000000..1d94c8a014fb4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79	
@@ -0,0 +1 @@
+-1.2
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f b/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f
new file mode 100644
index 0000000000000..3fbedf693b51d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f	
@@ -0,0 +1 @@
+-2
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79 b/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79
new file mode 100644
index 0000000000000..1d94c8a014fb4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79	
@@ -0,0 +1 @@
+-1.2
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/timestamp_1-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-2-7cccbdffc32975f8935eeba14a28147
rename to sql/hive/src/test/resources/golden/timestamp_1-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-0-d362501d0176855077e65f8faf067fa8 b/sql/hive/src/test/resources/golden/timestamp_1-0-d362501d0176855077e65f8faf067fa8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-1-1d7cf3a2512fa1876b422b79bbe05426 b/sql/hive/src/test/resources/golden/timestamp_1-1-1d7cf3a2512fa1876b422b79bbe05426
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/infer_const_type-0-e231c5154b18cbc0baa082a7461dd13e b/sql/hive/src/test/resources/golden/timestamp_1-1-d362501d0176855077e65f8faf067fa8
similarity index 100%
rename from sql/hive/src/test/resources/golden/infer_const_type-0-e231c5154b18cbc0baa082a7461dd13e
rename to sql/hive/src/test/resources/golden/timestamp_1-1-d362501d0176855077e65f8faf067fa8
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-10-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-10-67f274bf16de625cf4e85af0c6185cac
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-10-67f274bf16de625cf4e85af0c6185cac
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-11-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-11-343c75daac6695917608c17db8bf473e
new file mode 100644
index 0000000000000..b77c91ccee412
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-11-343c75daac6695917608c17db8bf473e
@@ -0,0 +1 @@
+1.293872461E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-12-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-12-cf19f7359a6d3456c4526b2c69f92d6a
new file mode 100644
index 0000000000000..bc2423dc08a1b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-12-cf19f7359a6d3456c4526b2c69f92d6a
@@ -0,0 +1 @@
+2011-01-01 01:01:01
diff --git a/sql/hive/src/test/resources/golden/infer_const_type-1-c836a98522549d2a3fd43998afd8ae94 b/sql/hive/src/test/resources/golden/timestamp_1-13-d242038c04dd4ee6075c7eebc0f75f17
similarity index 100%
rename from sql/hive/src/test/resources/golden/infer_const_type-1-c836a98522549d2a3fd43998afd8ae94
rename to sql/hive/src/test/resources/golden/timestamp_1-13-d242038c04dd4ee6075c7eebc0f75f17
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-4-2602c9a6c910ec7fdd439212c648333d b/sql/hive/src/test/resources/golden/timestamp_1-14-90269c1e50c7ae8e75ca9cc297982135
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-4-2602c9a6c910ec7fdd439212c648333d
rename to sql/hive/src/test/resources/golden/timestamp_1-14-90269c1e50c7ae8e75ca9cc297982135
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-15-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-15-e6bfca320c4ee3aff39cf2f179d57da6
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-15-e6bfca320c4ee3aff39cf2f179d57da6
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-16-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-16-d0291a9bd42054b2732cb4f54cf39ae7
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-16-d0291a9bd42054b2732cb4f54cf39ae7
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-17-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-17-e7b398d2a8107a42419c83771bda41e6
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-17-e7b398d2a8107a42419c83771bda41e6
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-18-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-18-a3eeec08bccae78d0d94ad2cb923e1cf
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-18-a3eeec08bccae78d0d94ad2cb923e1cf
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-19-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-19-67f274bf16de625cf4e85af0c6185cac
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-19-67f274bf16de625cf4e85af0c6185cac
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/infer_const_type-2-d8590c7336ae771b7a685bb544e8d2bd b/sql/hive/src/test/resources/golden/timestamp_1-2-1d7cf3a2512fa1876b422b79bbe05426
similarity index 100%
rename from sql/hive/src/test/resources/golden/infer_const_type-2-d8590c7336ae771b7a685bb544e8d2bd
rename to sql/hive/src/test/resources/golden/timestamp_1-2-1d7cf3a2512fa1876b422b79bbe05426
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-2-74f477a8b726f5193dd42ef378a793c4 b/sql/hive/src/test/resources/golden/timestamp_1-2-74f477a8b726f5193dd42ef378a793c4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-20-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-20-343c75daac6695917608c17db8bf473e
new file mode 100644
index 0000000000000..b77c91ccee412
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-20-343c75daac6695917608c17db8bf473e
@@ -0,0 +1 @@
+1.293872461E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-21-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-21-cf19f7359a6d3456c4526b2c69f92d6a
new file mode 100644
index 0000000000000..bc2423dc08a1b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-21-cf19f7359a6d3456c4526b2c69f92d6a
@@ -0,0 +1 @@
+2011-01-01 01:01:01
diff --git a/sql/hive/src/test/resources/golden/input14_limit-0-13ab74a58da514fe01dbeda0c3e79883 b/sql/hive/src/test/resources/golden/timestamp_1-22-cdb04b49b836e0244f6f0857aea7da8a
similarity index 100%
rename from sql/hive/src/test/resources/golden/input14_limit-0-13ab74a58da514fe01dbeda0c3e79883
rename to sql/hive/src/test/resources/golden/timestamp_1-22-cdb04b49b836e0244f6f0857aea7da8a
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-5-a9fe3bd1d2c99c89f019e92e5e8a7cad b/sql/hive/src/test/resources/golden/timestamp_1-23-90269c1e50c7ae8e75ca9cc297982135
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-5-a9fe3bd1d2c99c89f019e92e5e8a7cad
rename to sql/hive/src/test/resources/golden/timestamp_1-23-90269c1e50c7ae8e75ca9cc297982135
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-24-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-24-e6bfca320c4ee3aff39cf2f179d57da6
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-24-e6bfca320c4ee3aff39cf2f179d57da6
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-25-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-25-d0291a9bd42054b2732cb4f54cf39ae7
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-25-d0291a9bd42054b2732cb4f54cf39ae7
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-26-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-26-e7b398d2a8107a42419c83771bda41e6
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-26-e7b398d2a8107a42419c83771bda41e6
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-27-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-27-a3eeec08bccae78d0d94ad2cb923e1cf
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-27-a3eeec08bccae78d0d94ad2cb923e1cf
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-28-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-28-67f274bf16de625cf4e85af0c6185cac
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-28-67f274bf16de625cf4e85af0c6185cac
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-29-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-29-343c75daac6695917608c17db8bf473e
new file mode 100644
index 0000000000000..2158e77f78768
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-29-343c75daac6695917608c17db8bf473e
@@ -0,0 +1 @@
+1.2938724611E9
diff --git a/sql/hive/src/test/resources/golden/input14_limit-1-be9934fc5e6ecb9854eb7531a5929dcf b/sql/hive/src/test/resources/golden/timestamp_1-3-74f477a8b726f5193dd42ef378a793c4
similarity index 100%
rename from sql/hive/src/test/resources/golden/input14_limit-1-be9934fc5e6ecb9854eb7531a5929dcf
rename to sql/hive/src/test/resources/golden/timestamp_1-3-74f477a8b726f5193dd42ef378a793c4
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-30-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-30-cf19f7359a6d3456c4526b2c69f92d6a
new file mode 100644
index 0000000000000..4bdd802b9cda9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-30-cf19f7359a6d3456c4526b2c69f92d6a
@@ -0,0 +1 @@
+2011-01-01 01:01:01.1
diff --git a/sql/hive/src/test/resources/golden/input14_limit-2-780cdc89e0e736790124b6bdac827951 b/sql/hive/src/test/resources/golden/timestamp_1-31-9587976bd7e6caa5b667975c14e8dd53
similarity index 100%
rename from sql/hive/src/test/resources/golden/input14_limit-2-780cdc89e0e736790124b6bdac827951
rename to sql/hive/src/test/resources/golden/timestamp_1-31-9587976bd7e6caa5b667975c14e8dd53
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-6-1e29d35d9b02f6800c8939910a1e4c2f b/sql/hive/src/test/resources/golden/timestamp_1-32-90269c1e50c7ae8e75ca9cc297982135
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-6-1e29d35d9b02f6800c8939910a1e4c2f
rename to sql/hive/src/test/resources/golden/timestamp_1-32-90269c1e50c7ae8e75ca9cc297982135
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-33-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-33-e6bfca320c4ee3aff39cf2f179d57da6
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-33-e6bfca320c4ee3aff39cf2f179d57da6
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-34-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-34-d0291a9bd42054b2732cb4f54cf39ae7
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-34-d0291a9bd42054b2732cb4f54cf39ae7
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-35-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-35-e7b398d2a8107a42419c83771bda41e6
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-35-e7b398d2a8107a42419c83771bda41e6
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-36-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-36-a3eeec08bccae78d0d94ad2cb923e1cf
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-36-a3eeec08bccae78d0d94ad2cb923e1cf
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-37-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-37-67f274bf16de625cf4e85af0c6185cac
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-37-67f274bf16de625cf4e85af0c6185cac
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-38-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-38-343c75daac6695917608c17db8bf473e
new file mode 100644
index 0000000000000..b71ff60863360
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-38-343c75daac6695917608c17db8bf473e
@@ -0,0 +1 @@
+1.2938724610001E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-39-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-39-cf19f7359a6d3456c4526b2c69f92d6a
new file mode 100644
index 0000000000000..8b014c4cd8d6e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-39-cf19f7359a6d3456c4526b2c69f92d6a
@@ -0,0 +1 @@
+2011-01-01 01:01:01.0001
diff --git a/sql/hive/src/test/resources/golden/input16_cc-1-5180e975a6babd51752706f1799e7df5 b/sql/hive/src/test/resources/golden/timestamp_1-4-d833b177fac3162215468dde991f71d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/input16_cc-1-5180e975a6babd51752706f1799e7df5
rename to sql/hive/src/test/resources/golden/timestamp_1-4-d833b177fac3162215468dde991f71d1
diff --git a/sql/hive/src/test/resources/golden/input17-0-13ab74a58da514fe01dbeda0c3e79883 b/sql/hive/src/test/resources/golden/timestamp_1-40-4ebcf4bcc059feba0fd9f76f26193f3b
similarity index 100%
rename from sql/hive/src/test/resources/golden/input17-0-13ab74a58da514fe01dbeda0c3e79883
rename to sql/hive/src/test/resources/golden/timestamp_1-40-4ebcf4bcc059feba0fd9f76f26193f3b
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-7-93d8249c035f34bfca2fa041ae97f55a b/sql/hive/src/test/resources/golden/timestamp_1-41-90269c1e50c7ae8e75ca9cc297982135
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-7-93d8249c035f34bfca2fa041ae97f55a
rename to sql/hive/src/test/resources/golden/timestamp_1-41-90269c1e50c7ae8e75ca9cc297982135
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-42-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-42-e6bfca320c4ee3aff39cf2f179d57da6
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-42-e6bfca320c4ee3aff39cf2f179d57da6
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-43-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-43-d0291a9bd42054b2732cb4f54cf39ae7
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-43-d0291a9bd42054b2732cb4f54cf39ae7
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-44-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-44-e7b398d2a8107a42419c83771bda41e6
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-44-e7b398d2a8107a42419c83771bda41e6
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-45-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-45-a3eeec08bccae78d0d94ad2cb923e1cf
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-45-a3eeec08bccae78d0d94ad2cb923e1cf
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-46-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-46-67f274bf16de625cf4e85af0c6185cac
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-46-67f274bf16de625cf4e85af0c6185cac
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-47-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-47-343c75daac6695917608c17db8bf473e
new file mode 100644
index 0000000000000..b71ff60863360
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-47-343c75daac6695917608c17db8bf473e
@@ -0,0 +1 @@
+1.2938724610001E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-48-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-48-cf19f7359a6d3456c4526b2c69f92d6a
new file mode 100644
index 0000000000000..8b014c4cd8d6e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-48-cf19f7359a6d3456c4526b2c69f92d6a
@@ -0,0 +1 @@
+2011-01-01 01:01:01.0001
diff --git a/sql/hive/src/test/resources/golden/input17-1-be9cde5e769f171f60f61a7739de8f17 b/sql/hive/src/test/resources/golden/timestamp_1-49-7a59f9f939efc4b96f8159d00b39ed3
similarity index 100%
rename from sql/hive/src/test/resources/golden/input17-1-be9cde5e769f171f60f61a7739de8f17
rename to sql/hive/src/test/resources/golden/timestamp_1-49-7a59f9f939efc4b96f8159d00b39ed3
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-8-37229f303635a030f6cab20e0381f51f b/sql/hive/src/test/resources/golden/timestamp_1-5-90269c1e50c7ae8e75ca9cc297982135
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-8-37229f303635a030f6cab20e0381f51f
rename to sql/hive/src/test/resources/golden/timestamp_1-5-90269c1e50c7ae8e75ca9cc297982135
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-50-90269c1e50c7ae8e75ca9cc297982135 b/sql/hive/src/test/resources/golden/timestamp_1-50-90269c1e50c7ae8e75ca9cc297982135
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-50-90269c1e50c7ae8e75ca9cc297982135
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-51-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-51-e6bfca320c4ee3aff39cf2f179d57da6
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-51-e6bfca320c4ee3aff39cf2f179d57da6
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-52-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-52-d0291a9bd42054b2732cb4f54cf39ae7
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-52-d0291a9bd42054b2732cb4f54cf39ae7
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-53-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-53-e7b398d2a8107a42419c83771bda41e6
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-53-e7b398d2a8107a42419c83771bda41e6
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-54-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-54-a3eeec08bccae78d0d94ad2cb923e1cf
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-54-a3eeec08bccae78d0d94ad2cb923e1cf
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-55-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-55-67f274bf16de625cf4e85af0c6185cac
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-55-67f274bf16de625cf4e85af0c6185cac
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-56-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-56-343c75daac6695917608c17db8bf473e
new file mode 100644
index 0000000000000..3eefb349894ac
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-56-343c75daac6695917608c17db8bf473e
@@ -0,0 +1 @@
+1.293872461001E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-57-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-57-cf19f7359a6d3456c4526b2c69f92d6a
new file mode 100644
index 0000000000000..acce9d97b5eba
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-57-cf19f7359a6d3456c4526b2c69f92d6a
@@ -0,0 +1 @@
+2011-01-01 01:01:01.001000011
diff --git a/sql/hive/src/test/resources/golden/input17-2-21166e268096f6ec67f4f57ec333e901 b/sql/hive/src/test/resources/golden/timestamp_1-58-d362501d0176855077e65f8faf067fa8
similarity index 100%
rename from sql/hive/src/test/resources/golden/input17-2-21166e268096f6ec67f4f57ec333e901
rename to sql/hive/src/test/resources/golden/timestamp_1-58-d362501d0176855077e65f8faf067fa8
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-6-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-6-e6bfca320c4ee3aff39cf2f179d57da6
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-6-e6bfca320c4ee3aff39cf2f179d57da6
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-7-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-7-d0291a9bd42054b2732cb4f54cf39ae7
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-7-d0291a9bd42054b2732cb4f54cf39ae7
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-8-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-8-e7b398d2a8107a42419c83771bda41e6
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-8-e7b398d2a8107a42419c83771bda41e6
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-9-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-9-a3eeec08bccae78d0d94ad2cb923e1cf
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_1-9-a3eeec08bccae78d0d94ad2cb923e1cf
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/timestamp_2-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-3-fffea659b633b1f269b38556a7f54634
rename to sql/hive/src/test/resources/golden/timestamp_2-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-0-ea7192a4a5a985bcc8aab9aa79d9f028 b/sql/hive/src/test/resources/golden/timestamp_2-0-ea7192a4a5a985bcc8aab9aa79d9f028
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-1-61dbdf6d26c2a3f1143f6fdae999b1b4 b/sql/hive/src/test/resources/golden/timestamp_2-1-61dbdf6d26c2a3f1143f6fdae999b1b4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input19-1-f2832e249ab28bb3fb8e472012c5ffc b/sql/hive/src/test/resources/golden/timestamp_2-1-ea7192a4a5a985bcc8aab9aa79d9f028
similarity index 100%
rename from sql/hive/src/test/resources/golden/input19-1-f2832e249ab28bb3fb8e472012c5ffc
rename to sql/hive/src/test/resources/golden/timestamp_2-1-ea7192a4a5a985bcc8aab9aa79d9f028
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-10-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-10-252aebfe7882335d31bfc53a8705b7a
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-10-252aebfe7882335d31bfc53a8705b7a
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-11-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-11-5181279a0bf8939fe46ddacae015dad8
new file mode 100644
index 0000000000000..b77c91ccee412
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-11-5181279a0bf8939fe46ddacae015dad8
@@ -0,0 +1 @@
+1.293872461E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-12-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-12-240fce5f58794fa051824e8732c00c03
new file mode 100644
index 0000000000000..bc2423dc08a1b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-12-240fce5f58794fa051824e8732c00c03
@@ -0,0 +1 @@
+2011-01-01 01:01:01
diff --git a/sql/hive/src/test/resources/golden/input21-1-70b803742328eacc69eb1ed044a5c6b8 b/sql/hive/src/test/resources/golden/timestamp_2-13-5f450162886ccc79af149541527f5643
similarity index 100%
rename from sql/hive/src/test/resources/golden/input21-1-70b803742328eacc69eb1ed044a5c6b8
rename to sql/hive/src/test/resources/golden/timestamp_2-13-5f450162886ccc79af149541527f5643
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-14-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_2-14-25f6ec69328af6cba76899194e0dd84e
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-14-25f6ec69328af6cba76899194e0dd84e
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-15-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-15-93c769be4cff93bea6e62bfe4e2a8742
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-15-93c769be4cff93bea6e62bfe4e2a8742
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-16-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-16-5bdbf67419cc060b82d091d80ce59bf9
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-16-5bdbf67419cc060b82d091d80ce59bf9
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-17-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-17-de3c42ab06c17ae895fd7deaf7bd9571
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-17-de3c42ab06c17ae895fd7deaf7bd9571
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-18-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-18-da3937d21b7c2cfe1e624e812ae1d3ef
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-18-da3937d21b7c2cfe1e624e812ae1d3ef
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-19-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-19-252aebfe7882335d31bfc53a8705b7a
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-19-252aebfe7882335d31bfc53a8705b7a
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/input22-1-b7f46eccd104e6ed1b29e2de45089f66 b/sql/hive/src/test/resources/golden/timestamp_2-2-61dbdf6d26c2a3f1143f6fdae999b1b4
similarity index 100%
rename from sql/hive/src/test/resources/golden/input22-1-b7f46eccd104e6ed1b29e2de45089f66
rename to sql/hive/src/test/resources/golden/timestamp_2-2-61dbdf6d26c2a3f1143f6fdae999b1b4
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-2-a5092ff0f5a3d3b8f4171994932d4d19 b/sql/hive/src/test/resources/golden/timestamp_2-2-a5092ff0f5a3d3b8f4171994932d4d19
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-20-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-20-5181279a0bf8939fe46ddacae015dad8
new file mode 100644
index 0000000000000..b77c91ccee412
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-20-5181279a0bf8939fe46ddacae015dad8
@@ -0,0 +1 @@
+1.293872461E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-21-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-21-240fce5f58794fa051824e8732c00c03
new file mode 100644
index 0000000000000..bc2423dc08a1b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-21-240fce5f58794fa051824e8732c00c03
@@ -0,0 +1 @@
+2011-01-01 01:01:01
diff --git a/sql/hive/src/test/resources/golden/input30-0-582c5fcbe2fe12cc8e7b21225583d96c b/sql/hive/src/test/resources/golden/timestamp_2-22-469fe94fb60f4b00809190c303434641
similarity index 100%
rename from sql/hive/src/test/resources/golden/input30-0-582c5fcbe2fe12cc8e7b21225583d96c
rename to sql/hive/src/test/resources/golden/timestamp_2-22-469fe94fb60f4b00809190c303434641
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-23-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_2-23-25f6ec69328af6cba76899194e0dd84e
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-23-25f6ec69328af6cba76899194e0dd84e
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-24-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-24-93c769be4cff93bea6e62bfe4e2a8742
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-24-93c769be4cff93bea6e62bfe4e2a8742
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-25-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-25-5bdbf67419cc060b82d091d80ce59bf9
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-25-5bdbf67419cc060b82d091d80ce59bf9
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-26-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-26-de3c42ab06c17ae895fd7deaf7bd9571
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-26-de3c42ab06c17ae895fd7deaf7bd9571
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-27-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-27-da3937d21b7c2cfe1e624e812ae1d3ef
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-27-da3937d21b7c2cfe1e624e812ae1d3ef
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-28-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-28-252aebfe7882335d31bfc53a8705b7a
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-28-252aebfe7882335d31bfc53a8705b7a
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-29-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-29-5181279a0bf8939fe46ddacae015dad8
new file mode 100644
index 0000000000000..2158e77f78768
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-29-5181279a0bf8939fe46ddacae015dad8
@@ -0,0 +1 @@
+1.2938724611E9
diff --git a/sql/hive/src/test/resources/golden/input30-1-90c0d1a75de78c405413fd627caea4ab b/sql/hive/src/test/resources/golden/timestamp_2-3-a5092ff0f5a3d3b8f4171994932d4d19
similarity index 100%
rename from sql/hive/src/test/resources/golden/input30-1-90c0d1a75de78c405413fd627caea4ab
rename to sql/hive/src/test/resources/golden/timestamp_2-3-a5092ff0f5a3d3b8f4171994932d4d19
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-30-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-30-240fce5f58794fa051824e8732c00c03
new file mode 100644
index 0000000000000..4bdd802b9cda9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-30-240fce5f58794fa051824e8732c00c03
@@ -0,0 +1 @@
+2011-01-01 01:01:01.1
diff --git a/sql/hive/src/test/resources/golden/input30-4-f0ebd08e7675b19ae831824ef4d9e223 b/sql/hive/src/test/resources/golden/timestamp_2-31-8f506498acf0c99c30960a00981ef460
similarity index 100%
rename from sql/hive/src/test/resources/golden/input30-4-f0ebd08e7675b19ae831824ef4d9e223
rename to sql/hive/src/test/resources/golden/timestamp_2-31-8f506498acf0c99c30960a00981ef460
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-32-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_2-32-25f6ec69328af6cba76899194e0dd84e
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-32-25f6ec69328af6cba76899194e0dd84e
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-33-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-33-93c769be4cff93bea6e62bfe4e2a8742
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-33-93c769be4cff93bea6e62bfe4e2a8742
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-34-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-34-5bdbf67419cc060b82d091d80ce59bf9
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-34-5bdbf67419cc060b82d091d80ce59bf9
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-35-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-35-de3c42ab06c17ae895fd7deaf7bd9571
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-35-de3c42ab06c17ae895fd7deaf7bd9571
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-36-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-36-da3937d21b7c2cfe1e624e812ae1d3ef
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-36-da3937d21b7c2cfe1e624e812ae1d3ef
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-37-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-37-252aebfe7882335d31bfc53a8705b7a
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-37-252aebfe7882335d31bfc53a8705b7a
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-38-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-38-5181279a0bf8939fe46ddacae015dad8
new file mode 100644
index 0000000000000..b71ff60863360
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-38-5181279a0bf8939fe46ddacae015dad8
@@ -0,0 +1 @@
+1.2938724610001E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-39-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-39-240fce5f58794fa051824e8732c00c03
new file mode 100644
index 0000000000000..8b014c4cd8d6e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-39-240fce5f58794fa051824e8732c00c03
@@ -0,0 +1 @@
+2011-01-01 01:01:01.0001
diff --git a/sql/hive/src/test/resources/golden/input30-5-38734677c27b5f90a8df5da6e6351c76 b/sql/hive/src/test/resources/golden/timestamp_2-4-81d6d29dcb3fd12a519426dff64411d2
similarity index 100%
rename from sql/hive/src/test/resources/golden/input30-5-38734677c27b5f90a8df5da6e6351c76
rename to sql/hive/src/test/resources/golden/timestamp_2-4-81d6d29dcb3fd12a519426dff64411d2
diff --git a/sql/hive/src/test/resources/golden/input31-2-705764f8f7cab9378964af30b83f7fe b/sql/hive/src/test/resources/golden/timestamp_2-40-972a007e54d1c09e9ac9549c19a32dbb
similarity index 100%
rename from sql/hive/src/test/resources/golden/input31-2-705764f8f7cab9378964af30b83f7fe
rename to sql/hive/src/test/resources/golden/timestamp_2-40-972a007e54d1c09e9ac9549c19a32dbb
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-41-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_2-41-25f6ec69328af6cba76899194e0dd84e
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-41-25f6ec69328af6cba76899194e0dd84e
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-42-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-42-93c769be4cff93bea6e62bfe4e2a8742
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-42-93c769be4cff93bea6e62bfe4e2a8742
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-43-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-43-5bdbf67419cc060b82d091d80ce59bf9
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-43-5bdbf67419cc060b82d091d80ce59bf9
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-44-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-44-de3c42ab06c17ae895fd7deaf7bd9571
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-44-de3c42ab06c17ae895fd7deaf7bd9571
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-45-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-45-da3937d21b7c2cfe1e624e812ae1d3ef
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-45-da3937d21b7c2cfe1e624e812ae1d3ef
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-46-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-46-252aebfe7882335d31bfc53a8705b7a
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-46-252aebfe7882335d31bfc53a8705b7a
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-47-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-47-5181279a0bf8939fe46ddacae015dad8
new file mode 100644
index 0000000000000..b71ff60863360
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-47-5181279a0bf8939fe46ddacae015dad8
@@ -0,0 +1 @@
+1.2938724610001E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-48-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-48-240fce5f58794fa051824e8732c00c03
new file mode 100644
index 0000000000000..8b014c4cd8d6e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-48-240fce5f58794fa051824e8732c00c03
@@ -0,0 +1 @@
+2011-01-01 01:01:01.0001
diff --git a/sql/hive/src/test/resources/golden/input31-3-50c905261882f7fd8539fdd91e68151f b/sql/hive/src/test/resources/golden/timestamp_2-49-650d2727b007638e0ed39b37c9498d66
similarity index 100%
rename from sql/hive/src/test/resources/golden/input31-3-50c905261882f7fd8539fdd91e68151f
rename to sql/hive/src/test/resources/golden/timestamp_2-49-650d2727b007638e0ed39b37c9498d66
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-5-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_2-5-25f6ec69328af6cba76899194e0dd84e
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-5-25f6ec69328af6cba76899194e0dd84e
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-50-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_2-50-25f6ec69328af6cba76899194e0dd84e
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-50-25f6ec69328af6cba76899194e0dd84e
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-51-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-51-93c769be4cff93bea6e62bfe4e2a8742
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-51-93c769be4cff93bea6e62bfe4e2a8742
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-52-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-52-5bdbf67419cc060b82d091d80ce59bf9
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-52-5bdbf67419cc060b82d091d80ce59bf9
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-53-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-53-de3c42ab06c17ae895fd7deaf7bd9571
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-53-de3c42ab06c17ae895fd7deaf7bd9571
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-54-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-54-da3937d21b7c2cfe1e624e812ae1d3ef
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-54-da3937d21b7c2cfe1e624e812ae1d3ef
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-55-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-55-252aebfe7882335d31bfc53a8705b7a
new file mode 100644
index 0000000000000..502f94a71edbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-55-252aebfe7882335d31bfc53a8705b7a
@@ -0,0 +1 @@
+1.29387251E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-56-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-56-5181279a0bf8939fe46ddacae015dad8
new file mode 100644
index 0000000000000..3eefb349894ac
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-56-5181279a0bf8939fe46ddacae015dad8
@@ -0,0 +1 @@
+1.293872461001E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-57-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-57-240fce5f58794fa051824e8732c00c03
new file mode 100644
index 0000000000000..acce9d97b5eba
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-57-240fce5f58794fa051824e8732c00c03
@@ -0,0 +1 @@
+2011-01-01 01:01:01.001000011
diff --git a/sql/hive/src/test/resources/golden/input31-4-2f886fa357df9342733551fa1b53f913 b/sql/hive/src/test/resources/golden/timestamp_2-58-ea7192a4a5a985bcc8aab9aa79d9f028
similarity index 100%
rename from sql/hive/src/test/resources/golden/input31-4-2f886fa357df9342733551fa1b53f913
rename to sql/hive/src/test/resources/golden/timestamp_2-58-ea7192a4a5a985bcc8aab9aa79d9f028
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-6-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-6-93c769be4cff93bea6e62bfe4e2a8742
new file mode 100644
index 0000000000000..987e7ca9a76df
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-6-93c769be4cff93bea6e62bfe4e2a8742
@@ -0,0 +1 @@
+77
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-7-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-7-5bdbf67419cc060b82d091d80ce59bf9
new file mode 100644
index 0000000000000..8f3a49478b26b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-7-5bdbf67419cc060b82d091d80ce59bf9
@@ -0,0 +1 @@
+-4787
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-8-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-8-de3c42ab06c17ae895fd7deaf7bd9571
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-8-de3c42ab06c17ae895fd7deaf7bd9571
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-9-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-9-da3937d21b7c2cfe1e624e812ae1d3ef
new file mode 100644
index 0000000000000..211a320b9ca72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_2-9-da3937d21b7c2cfe1e624e812ae1d3ef
@@ -0,0 +1 @@
+1293872461
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-0-165256158e3db1ce19c3c9db3c8011d2 b/sql/hive/src/test/resources/golden/timestamp_3-0-165256158e3db1ce19c3c9db3c8011d2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input16_cc-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/timestamp_3-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/input16_cc-0-43d53504df013e6b35f81811138a167a
rename to sql/hive/src/test/resources/golden/timestamp_3-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/input32-3-b0070890240c15d647af59f41b77ba3d b/sql/hive/src/test/resources/golden/timestamp_3-1-165256158e3db1ce19c3c9db3c8011d2
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-3-b0070890240c15d647af59f41b77ba3d
rename to sql/hive/src/test/resources/golden/timestamp_3-1-165256158e3db1ce19c3c9db3c8011d2
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-1-81edf5107270547641586aa02b4e7d9b b/sql/hive/src/test/resources/golden/timestamp_3-1-81edf5107270547641586aa02b4e7d9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-10-ffc79abb874323e165963aa39f460a9b b/sql/hive/src/test/resources/golden/timestamp_3-10-ffc79abb874323e165963aa39f460a9b
new file mode 100644
index 0000000000000..d21deca762237
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_3-10-ffc79abb874323e165963aa39f460a9b
@@ -0,0 +1 @@
+1.30413517E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-11-7b1ec929239ee305ea9da46ebb990c67 b/sql/hive/src/test/resources/golden/timestamp_3-11-7b1ec929239ee305ea9da46ebb990c67
new file mode 100644
index 0000000000000..1b0a140b5a384
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_3-11-7b1ec929239ee305ea9da46ebb990c67
@@ -0,0 +1 @@
+1.3041352164485E9
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-12-a63f40f6c4a022c16f8cf810e3b7ed2a b/sql/hive/src/test/resources/golden/timestamp_3-12-a63f40f6c4a022c16f8cf810e3b7ed2a
new file mode 100644
index 0000000000000..d7ff6cd63d9f0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_3-12-a63f40f6c4a022c16f8cf810e3b7ed2a
@@ -0,0 +1 @@
+2011-04-29 20:46:56.4485
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-13-7d225bcfa35f20da7dd63e7f8a413a77 b/sql/hive/src/test/resources/golden/timestamp_3-13-7d225bcfa35f20da7dd63e7f8a413a77
new file mode 100644
index 0000000000000..4cfaa1b324da3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_3-13-7d225bcfa35f20da7dd63e7f8a413a77
@@ -0,0 +1 @@
+2011-04-29 20:46:56.4485	1.3041352164485E9	1	1.3041352164485E9	1.3041352164485E9
diff --git a/sql/hive/src/test/resources/golden/input32-4-d0e1378a30e21e0198e47d9b668ee1f6 b/sql/hive/src/test/resources/golden/timestamp_3-14-165256158e3db1ce19c3c9db3c8011d2
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-4-d0e1378a30e21e0198e47d9b668ee1f6
rename to sql/hive/src/test/resources/golden/timestamp_3-14-165256158e3db1ce19c3c9db3c8011d2
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-2-7a012a0d98729da25b5ac374855dcee4 b/sql/hive/src/test/resources/golden/timestamp_3-2-7a012a0d98729da25b5ac374855dcee4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input32-5-8789d32fc5b784fe2d171566732c573e b/sql/hive/src/test/resources/golden/timestamp_3-2-81edf5107270547641586aa02b4e7d9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-5-8789d32fc5b784fe2d171566732c573e
rename to sql/hive/src/test/resources/golden/timestamp_3-2-81edf5107270547641586aa02b4e7d9b
diff --git a/sql/hive/src/test/resources/golden/input37-0-86e2e274650fb56651607ea10d356fc0 b/sql/hive/src/test/resources/golden/timestamp_3-3-7a012a0d98729da25b5ac374855dcee4
similarity index 100%
rename from sql/hive/src/test/resources/golden/input37-0-86e2e274650fb56651607ea10d356fc0
rename to sql/hive/src/test/resources/golden/timestamp_3-3-7a012a0d98729da25b5ac374855dcee4
diff --git a/sql/hive/src/test/resources/golden/input37-1-6bb557a5cfe7ceaa2f749494ea32b9e3 b/sql/hive/src/test/resources/golden/timestamp_3-4-86514381187b246a5685577c1968c559
similarity index 100%
rename from sql/hive/src/test/resources/golden/input37-1-6bb557a5cfe7ceaa2f749494ea32b9e3
rename to sql/hive/src/test/resources/golden/timestamp_3-4-86514381187b246a5685577c1968c559
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-5-935d0d2492beab99bbbba26ba62a1db4 b/sql/hive/src/test/resources/golden/timestamp_3-5-935d0d2492beab99bbbba26ba62a1db4
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_3-5-935d0d2492beab99bbbba26ba62a1db4
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-6-8fe348d5d9b9903a26eda32d308b8e41 b/sql/hive/src/test/resources/golden/timestamp_3-6-8fe348d5d9b9903a26eda32d308b8e41
new file mode 100644
index 0000000000000..21e72e8ac3d7e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_3-6-8fe348d5d9b9903a26eda32d308b8e41
@@ -0,0 +1 @@
+48
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-7-6be5fe01c502cd24db32a3781c97a703 b/sql/hive/src/test/resources/golden/timestamp_3-7-6be5fe01c502cd24db32a3781c97a703
new file mode 100644
index 0000000000000..ee3be2941da6e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_3-7-6be5fe01c502cd24db32a3781c97a703
@@ -0,0 +1 @@
+-31184
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-8-6066ba0451cd0fcfac4bea6376e72add b/sql/hive/src/test/resources/golden/timestamp_3-8-6066ba0451cd0fcfac4bea6376e72add
new file mode 100644
index 0000000000000..1cf1952ac0372
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_3-8-6066ba0451cd0fcfac4bea6376e72add
@@ -0,0 +1 @@
+1304135216
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-9-22e03daa775eab145d39ec0730953f7e b/sql/hive/src/test/resources/golden/timestamp_3-9-22e03daa775eab145d39ec0730953f7e
new file mode 100644
index 0000000000000..1cf1952ac0372
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_3-9-22e03daa775eab145d39ec0730953f7e
@@ -0,0 +1 @@
+1304135216
diff --git a/sql/hive/src/test/resources/golden/input31-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/timestamp_comparison-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/input31-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/timestamp_comparison-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-9-be623247e4dbf119b43458b72d1be017 b/sql/hive/src/test/resources/golden/timestamp_comparison-1-4b68f7ad0f8cf337e42bf16a45e15818
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-9-be623247e4dbf119b43458b72d1be017
rename to sql/hive/src/test/resources/golden/timestamp_comparison-1-4b68f7ad0f8cf337e42bf16a45e15818
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-2-60557e7bd2822c89fa8b076a9d0520fc b/sql/hive/src/test/resources/golden/timestamp_comparison-2-60557e7bd2822c89fa8b076a9d0520fc
new file mode 100644
index 0000000000000..c508d5366f70b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_comparison-2-60557e7bd2822c89fa8b076a9d0520fc
@@ -0,0 +1 @@
+false
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-3-f96a9d88327951bd93f672dc2463ecd4 b/sql/hive/src/test/resources/golden/timestamp_comparison-3-f96a9d88327951bd93f672dc2463ecd4
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_comparison-3-f96a9d88327951bd93f672dc2463ecd4
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-4-13e17ed811165196416f777cbc162592 b/sql/hive/src/test/resources/golden/timestamp_comparison-4-13e17ed811165196416f777cbc162592
new file mode 100644
index 0000000000000..c508d5366f70b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_comparison-4-13e17ed811165196416f777cbc162592
@@ -0,0 +1 @@
+false
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-5-4fa8a36edbefde4427c2ab2cf30e6399 b/sql/hive/src/test/resources/golden/timestamp_comparison-5-4fa8a36edbefde4427c2ab2cf30e6399
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_comparison-5-4fa8a36edbefde4427c2ab2cf30e6399
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-6-7e4fb6e8ba01df422e4c67e06a0c8453 b/sql/hive/src/test/resources/golden/timestamp_comparison-6-7e4fb6e8ba01df422e4c67e06a0c8453
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_comparison-6-7e4fb6e8ba01df422e4c67e06a0c8453
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-7-8c8e73673a950f6b3d960b08fcea076f b/sql/hive/src/test/resources/golden/timestamp_comparison-7-8c8e73673a950f6b3d960b08fcea076f
new file mode 100644
index 0000000000000..c508d5366f70b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_comparison-7-8c8e73673a950f6b3d960b08fcea076f
@@ -0,0 +1 @@
+false
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-8-510c0a2a57dc5df8588bd13c4152f8bc b/sql/hive/src/test/resources/golden/timestamp_comparison-8-510c0a2a57dc5df8588bd13c4152f8bc
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_comparison-8-510c0a2a57dc5df8588bd13c4152f8bc
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-9-659d5b1ae8200f13f265270e52a3dd65 b/sql/hive/src/test/resources/golden/timestamp_comparison-9-659d5b1ae8200f13f265270e52a3dd65
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_comparison-9-659d5b1ae8200f13f265270e52a3dd65
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/input39-0-7bd12162381231be9d578797818957a7 b/sql/hive/src/test/resources/golden/timestamp_lazy-2-bb5a4a13274290029bd07d95c2f92563
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-0-7bd12162381231be9d578797818957a7
rename to sql/hive/src/test/resources/golden/timestamp_lazy-2-bb5a4a13274290029bd07d95c2f92563
diff --git a/sql/hive/src/test/resources/golden/timestamp_lazy-3-79e0c72c4fb3b259dfbffd245ccaa636 b/sql/hive/src/test/resources/golden/timestamp_lazy-3-79e0c72c4fb3b259dfbffd245ccaa636
new file mode 100644
index 0000000000000..e6bfe0b1667ae
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_lazy-3-79e0c72c4fb3b259dfbffd245ccaa636
@@ -0,0 +1,5 @@
+2011-01-01 01:01:01	165	val_165
+2011-01-01 01:01:01	238	val_238
+2011-01-01 01:01:01	27	val_27
+2011-01-01 01:01:01	311	val_311
+2011-01-01 01:01:01	86	val_86
diff --git a/sql/hive/src/test/resources/golden/timestamp_lazy-4-b4c4417ce9f08baeb82ffde6ef1baa25 b/sql/hive/src/test/resources/golden/timestamp_lazy-4-b4c4417ce9f08baeb82ffde6ef1baa25
new file mode 100644
index 0000000000000..e6bfe0b1667ae
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_lazy-4-b4c4417ce9f08baeb82ffde6ef1baa25
@@ -0,0 +1,5 @@
+2011-01-01 01:01:01	165	val_165
+2011-01-01 01:01:01	238	val_238
+2011-01-01 01:01:01	27	val_27
+2011-01-01 01:01:01	311	val_311
+2011-01-01 01:01:01	86	val_86
diff --git a/sql/hive/src/test/resources/golden/input39-1-92a6ee4486a9fc8fc7bc567e42b9e2a3 b/sql/hive/src/test/resources/golden/timestamp_null-2-51762cf5079877abf7d81127738f4e5
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-1-92a6ee4486a9fc8fc7bc567e42b9e2a3
rename to sql/hive/src/test/resources/golden/timestamp_null-2-51762cf5079877abf7d81127738f4e5
diff --git a/sql/hive/src/test/resources/golden/timestamp_null-2-b3071984dee42c9e698e947fcbc2a1e8 b/sql/hive/src/test/resources/golden/timestamp_null-2-b3071984dee42c9e698e947fcbc2a1e8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-21-a5e28f4eb819e5a5e292e279f2990a7a b/sql/hive/src/test/resources/golden/timestamp_null-3-222c5ea127c747c71738b5dc5b80459c
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-21-a5e28f4eb819e5a5e292e279f2990a7a
rename to sql/hive/src/test/resources/golden/timestamp_null-3-222c5ea127c747c71738b5dc5b80459c
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-22-93278c10d642fa242f303d89b3b1961d b/sql/hive/src/test/resources/golden/timestamp_null-4-ffc86f5c714eceabc36e92931b96beb0
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-22-93278c10d642fa242f303d89b3b1961d
rename to sql/hive/src/test/resources/golden/timestamp_null-4-ffc86f5c714eceabc36e92931b96beb0
diff --git a/sql/hive/src/test/resources/golden/input31-1-c21dba410fb07a098f93430a9d21df79 b/sql/hive/src/test/resources/golden/timestamp_udf-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/input31-1-c21dba410fb07a098f93430a9d21df79
rename to sql/hive/src/test/resources/golden/timestamp_udf-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-0-79914c5347620c6e62a8e0b9a95984af b/sql/hive/src/test/resources/golden/timestamp_udf-0-79914c5347620c6e62a8e0b9a95984af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-1-59fc1842a23369235d42ed040d45fb3d b/sql/hive/src/test/resources/golden/timestamp_udf-1-59fc1842a23369235d42ed040d45fb3d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input39-10-28bf1b34c04a048da339eddd4c1fd779 b/sql/hive/src/test/resources/golden/timestamp_udf-1-79914c5347620c6e62a8e0b9a95984af
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-10-28bf1b34c04a048da339eddd4c1fd779
rename to sql/hive/src/test/resources/golden/timestamp_udf-1-79914c5347620c6e62a8e0b9a95984af
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-18-cb033ecad964a2623bc633ac1d3f752a b/sql/hive/src/test/resources/golden/timestamp_udf-10-287614364eaa3fb82aad08c6b62cc938
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-18-cb033ecad964a2623bc633ac1d3f752a
rename to sql/hive/src/test/resources/golden/timestamp_udf-10-287614364eaa3fb82aad08c6b62cc938
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-10-dbc23736a61d9482d13cacada02a7a09 b/sql/hive/src/test/resources/golden/timestamp_udf-11-dbc23736a61d9482d13cacada02a7a09
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-10-dbc23736a61d9482d13cacada02a7a09
rename to sql/hive/src/test/resources/golden/timestamp_udf-11-dbc23736a61d9482d13cacada02a7a09
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-11-442cf850a0cc1f1dcfdeaeffbffb2c35 b/sql/hive/src/test/resources/golden/timestamp_udf-12-442cf850a0cc1f1dcfdeaeffbffb2c35
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-11-442cf850a0cc1f1dcfdeaeffbffb2c35
rename to sql/hive/src/test/resources/golden/timestamp_udf-12-442cf850a0cc1f1dcfdeaeffbffb2c35
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-12-51959036fd4ac4f1e24f4e06eb9b0b6 b/sql/hive/src/test/resources/golden/timestamp_udf-13-51959036fd4ac4f1e24f4e06eb9b0b6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-12-51959036fd4ac4f1e24f4e06eb9b0b6
rename to sql/hive/src/test/resources/golden/timestamp_udf-13-51959036fd4ac4f1e24f4e06eb9b0b6
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-13-6ab3f356deaf807e8accc37e1f4849a b/sql/hive/src/test/resources/golden/timestamp_udf-14-6ab3f356deaf807e8accc37e1f4849a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-13-6ab3f356deaf807e8accc37e1f4849a
rename to sql/hive/src/test/resources/golden/timestamp_udf-14-6ab3f356deaf807e8accc37e1f4849a
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-14-c745a1016461403526d44928a269c1de b/sql/hive/src/test/resources/golden/timestamp_udf-15-c745a1016461403526d44928a269c1de
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-14-c745a1016461403526d44928a269c1de
rename to sql/hive/src/test/resources/golden/timestamp_udf-15-c745a1016461403526d44928a269c1de
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-16-7ab76c4458c7f78038c8b1df0fdeafbe b/sql/hive/src/test/resources/golden/timestamp_udf-16-7ab76c4458c7f78038c8b1df0fdeafbe
new file mode 100644
index 0000000000000..19497254f8f7e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_udf-16-7ab76c4458c7f78038c8b1df0fdeafbe
@@ -0,0 +1 @@
+2011-05-11	2011-04-26
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-16-b36e87e17ca24d82072220bff559c718 b/sql/hive/src/test/resources/golden/timestamp_udf-17-b36e87e17ca24d82072220bff559c718
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-16-b36e87e17ca24d82072220bff559c718
rename to sql/hive/src/test/resources/golden/timestamp_udf-17-b36e87e17ca24d82072220bff559c718
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-17-dad44d2d4a421286e9da080271bd2639 b/sql/hive/src/test/resources/golden/timestamp_udf-18-dad44d2d4a421286e9da080271bd2639
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-17-dad44d2d4a421286e9da080271bd2639
rename to sql/hive/src/test/resources/golden/timestamp_udf-18-dad44d2d4a421286e9da080271bd2639
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-19-79914c5347620c6e62a8e0b9a95984af b/sql/hive/src/test/resources/golden/timestamp_udf-19-79914c5347620c6e62a8e0b9a95984af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-9-287614364eaa3fb82aad08c6b62cc938 b/sql/hive/src/test/resources/golden/timestamp_udf-19-cb033ecad964a2623bc633ac1d3f752a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-9-287614364eaa3fb82aad08c6b62cc938
rename to sql/hive/src/test/resources/golden/timestamp_udf-19-cb033ecad964a2623bc633ac1d3f752a
diff --git a/sql/hive/src/test/resources/golden/input39-2-ee667c6ab003bb83f6bf1c72153eba39 b/sql/hive/src/test/resources/golden/timestamp_udf-2-59fc1842a23369235d42ed040d45fb3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-2-ee667c6ab003bb83f6bf1c72153eba39
rename to sql/hive/src/test/resources/golden/timestamp_udf-2-59fc1842a23369235d42ed040d45fb3d
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-2-9039f474f9a96e9f15ace528faeed923 b/sql/hive/src/test/resources/golden/timestamp_udf-2-9039f474f9a96e9f15ace528faeed923
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-20-59fc1842a23369235d42ed040d45fb3d b/sql/hive/src/test/resources/golden/timestamp_udf-20-59fc1842a23369235d42ed040d45fb3d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input39-3-9e1699caf2caa347fa8ee6e9b6e7da6e b/sql/hive/src/test/resources/golden/timestamp_udf-20-79914c5347620c6e62a8e0b9a95984af
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-3-9e1699caf2caa347fa8ee6e9b6e7da6e
rename to sql/hive/src/test/resources/golden/timestamp_udf-20-79914c5347620c6e62a8e0b9a95984af
diff --git a/sql/hive/src/test/resources/golden/input39-4-53453776bf062f28d371fc7336b7eae2 b/sql/hive/src/test/resources/golden/timestamp_udf-21-59fc1842a23369235d42ed040d45fb3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-4-53453776bf062f28d371fc7336b7eae2
rename to sql/hive/src/test/resources/golden/timestamp_udf-21-59fc1842a23369235d42ed040d45fb3d
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-0-4c33233bafd910d69c2b8845d35abffe b/sql/hive/src/test/resources/golden/timestamp_udf-3-9039f474f9a96e9f15ace528faeed923
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-0-4c33233bafd910d69c2b8845d35abffe
rename to sql/hive/src/test/resources/golden/timestamp_udf-3-9039f474f9a96e9f15ace528faeed923
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-3-b0fd4ca3b22eb732a32772399331352f b/sql/hive/src/test/resources/golden/timestamp_udf-3-b0fd4ca3b22eb732a32772399331352f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-4-80ce02ec84ee8abcb046367ca37279cc b/sql/hive/src/test/resources/golden/timestamp_udf-4-80ce02ec84ee8abcb046367ca37279cc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-1-92a6ee4486a9fc8fc7bc567e42b9e2a3 b/sql/hive/src/test/resources/golden/timestamp_udf-4-b0fd4ca3b22eb732a32772399331352f
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-1-92a6ee4486a9fc8fc7bc567e42b9e2a3
rename to sql/hive/src/test/resources/golden/timestamp_udf-4-b0fd4ca3b22eb732a32772399331352f
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-2-ee667c6ab003bb83f6bf1c72153eba39 b/sql/hive/src/test/resources/golden/timestamp_udf-5-66868a2b075de978784011e9955483d
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-2-ee667c6ab003bb83f6bf1c72153eba39
rename to sql/hive/src/test/resources/golden/timestamp_udf-5-66868a2b075de978784011e9955483d
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-5-1124399033bcadf3874fb48f593392d b/sql/hive/src/test/resources/golden/timestamp_udf-6-1124399033bcadf3874fb48f593392d
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-5-1124399033bcadf3874fb48f593392d
rename to sql/hive/src/test/resources/golden/timestamp_udf-6-1124399033bcadf3874fb48f593392d
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-7-5810193ce35d38c23f4fc4b4979d60a4 b/sql/hive/src/test/resources/golden/timestamp_udf-7-5810193ce35d38c23f4fc4b4979d60a4
new file mode 100644
index 0000000000000..19497254f8f7e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_udf-7-5810193ce35d38c23f4fc4b4979d60a4
@@ -0,0 +1 @@
+2011-05-11	2011-04-26
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-7-250e640a6a818f989f3f3280b00f64f9 b/sql/hive/src/test/resources/golden/timestamp_udf-8-250e640a6a818f989f3f3280b00f64f9
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-7-250e640a6a818f989f3f3280b00f64f9
rename to sql/hive/src/test/resources/golden/timestamp_udf-8-250e640a6a818f989f3f3280b00f64f9
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-8-975df43df015d86422965af456f87a94 b/sql/hive/src/test/resources/golden/timestamp_udf-9-975df43df015d86422965af456f87a94
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-8-975df43df015d86422965af456f87a94
rename to sql/hive/src/test/resources/golden/timestamp_udf-9-975df43df015d86422965af456f87a94
diff --git a/sql/hive/src/test/resources/golden/transform-0-d81d055660f6ef3d9cc60dd673a8c0fe b/sql/hive/src/test/resources/golden/transform-0-d81d055660f6ef3d9cc60dd673a8c0fe
index d23e05acf7ba5..e34118512c1d7 100644
--- a/sql/hive/src/test/resources/golden/transform-0-d81d055660f6ef3d9cc60dd673a8c0fe
+++ b/sql/hive/src/test/resources/golden/transform-0-d81d055660f6ef3d9cc60dd673a8c0fe
@@ -497,4 +497,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/transform1-0-b6919fc48901e388c869c84ae0211102 b/sql/hive/src/test/resources/golden/transform1-0-b6919fc48901e388c869c84ae0211102
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-1-bb8804b6b511cb7e0c4dbdf2b978f737 b/sql/hive/src/test/resources/golden/transform1-1-bb8804b6b511cb7e0c4dbdf2b978f737
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-2-25d6cab86c36d65fabf5645db3126a19 b/sql/hive/src/test/resources/golden/transform1-2-25d6cab86c36d65fabf5645db3126a19
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-3-8324a70d533a06a5171c1016b1fea7c3 b/sql/hive/src/test/resources/golden/transform1-3-8324a70d533a06a5171c1016b1fea7c3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-4-65527bae8e73262255ef83082c6968f9 b/sql/hive/src/test/resources/golden/transform1-4-65527bae8e73262255ef83082c6968f9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-5-e0037a3f97ce0127a40d163af4c20ad5 b/sql/hive/src/test/resources/golden/transform1-5-e0037a3f97ce0127a40d163af4c20ad5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-6-3b862abd732c9e9f0db50ad0b9dae6f b/sql/hive/src/test/resources/golden/transform1-6-3b862abd732c9e9f0db50ad0b9dae6f
deleted file mode 100644
index c6f628b1a3eef..0000000000000
--- a/sql/hive/src/test/resources/golden/transform1-6-3b862abd732c9e9f0db50ad0b9dae6f
+++ /dev/null
@@ -1 +0,0 @@
-[0,1,2]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/transform_ppr1-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/transform_ppr1-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/transform_ppr1-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/transform_ppr1-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/transform_ppr1-2-8de14457978564d5fe4ab9a1c2a87d47 b/sql/hive/src/test/resources/golden/transform_ppr1-2-8de14457978564d5fe4ab9a1c2a87d47
index 839efe2d57956..9370a501a5179 100644
--- a/sql/hive/src/test/resources/golden/transform_ppr1-2-8de14457978564d5fe4ab9a1c2a87d47
+++ b/sql/hive/src/test/resources/golden/transform_ppr1-2-8de14457978564d5fe4ab9a1c2a87d47
@@ -165,4 +165,4 @@
 98	val_98
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/transform_ppr2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/transform_ppr2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/transform_ppr2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/transform_ppr2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/transform_ppr2-2-636c4938673a273299c8569295d27c99 b/sql/hive/src/test/resources/golden/transform_ppr2-2-636c4938673a273299c8569295d27c99
index 839efe2d57956..9370a501a5179 100644
--- a/sql/hive/src/test/resources/golden/transform_ppr2-2-636c4938673a273299c8569295d27c99
+++ b/sql/hive/src/test/resources/golden/transform_ppr2-2-636c4938673a273299c8569295d27c99
@@ -165,4 +165,4 @@
 98	val_98
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/trivial join ON clause-0-3b6afcbd622aa111ee260bebc763613d b/sql/hive/src/test/resources/golden/trivial join ON clause-0-3b6afcbd622aa111ee260bebc763613d
index 66fafbdf72b63..3df4716f0b05f 100644
--- a/sql/hive/src/test/resources/golden/trivial join ON clause-0-3b6afcbd622aa111ee260bebc763613d	
+++ b/sql/hive/src/test/resources/golden/trivial join ON clause-0-3b6afcbd622aa111ee260bebc763613d	
@@ -1025,4 +1025,4 @@
 200	val_200	200	val_200
 200	val_200	200	val_200
 97	val_97	97	val_97
-97	val_97	97	val_97
\ No newline at end of file
+97	val_97	97	val_97
diff --git a/sql/hive/src/test/resources/golden/trivial join where clause-0-25ffeb9d5e570c8b62b6ae2829655fe3 b/sql/hive/src/test/resources/golden/trivial join where clause-0-25ffeb9d5e570c8b62b6ae2829655fe3
index 66fafbdf72b63..3df4716f0b05f 100644
--- a/sql/hive/src/test/resources/golden/trivial join where clause-0-25ffeb9d5e570c8b62b6ae2829655fe3	
+++ b/sql/hive/src/test/resources/golden/trivial join where clause-0-25ffeb9d5e570c8b62b6ae2829655fe3	
@@ -1025,4 +1025,4 @@
 200	val_200	200	val_200
 200	val_200	200	val_200
 97	val_97	97	val_97
-97	val_97	97	val_97
\ No newline at end of file
+97	val_97	97	val_97
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-3-9e1699caf2caa347fa8ee6e9b6e7da6e b/sql/hive/src/test/resources/golden/truncate_column-0-616cad77ad5e7ac74da0d7425a7869a
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-3-9e1699caf2caa347fa8ee6e9b6e7da6e
rename to sql/hive/src/test/resources/golden/truncate_column-0-616cad77ad5e7ac74da0d7425a7869a
diff --git a/sql/hive/src/test/resources/golden/input32-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/truncate_column-1-418ec894d08c33fd712eb358f579b7a0
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/truncate_column-1-418ec894d08c33fd712eb358f579b7a0
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-4-53453776bf062f28d371fc7336b7eae2 b/sql/hive/src/test/resources/golden/truncate_column_merge-0-46e8cc1556fa8586802a26267a906acf
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-4-53453776bf062f28d371fc7336b7eae2
rename to sql/hive/src/test/resources/golden/truncate_column_merge-0-46e8cc1556fa8586802a26267a906acf
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-9-28bf1b34c04a048da339eddd4c1fd779 b/sql/hive/src/test/resources/golden/truncate_table-0-d16efe9bac079f0c5fc6cc424a8fa3eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-9-28bf1b34c04a048da339eddd4c1fd779
rename to sql/hive/src/test/resources/golden/truncate_table-0-d16efe9bac079f0c5fc6cc424a8fa3eb
diff --git a/sql/hive/src/test/resources/golden/input3_limit-0-27fc8f7d7456a761e1d0c2c075b84dc6 b/sql/hive/src/test/resources/golden/truncate_table-1-7fc255c86d7c3a9ff088f9eb29a42565
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-0-27fc8f7d7456a761e1d0c2c075b84dc6
rename to sql/hive/src/test/resources/golden/truncate_table-1-7fc255c86d7c3a9ff088f9eb29a42565
diff --git a/sql/hive/src/test/resources/golden/input3_limit-1-7b46b8baf9c7628da9c190c96b917057 b/sql/hive/src/test/resources/golden/truncate_table-1-ec0e3744208003f18c33a1f2c4c1e2c6
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-1-7b46b8baf9c7628da9c190c96b917057
rename to sql/hive/src/test/resources/golden/truncate_table-1-ec0e3744208003f18c33a1f2c4c1e2c6
diff --git a/sql/hive/src/test/resources/golden/input3_limit-2-c6583bdb759c8a050238a32a6ce8273d b/sql/hive/src/test/resources/golden/truncate_table-10-9ba46fdca3f0f4da8991cb5c7b01efdb
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-2-c6583bdb759c8a050238a32a6ce8273d
rename to sql/hive/src/test/resources/golden/truncate_table-10-9ba46fdca3f0f4da8991cb5c7b01efdb
diff --git a/sql/hive/src/test/resources/golden/input3_limit-3-2a87d8faa18a6311376812bd0453fece b/sql/hive/src/test/resources/golden/truncate_table-10-c32b771845f4d5a0330e2cfa09f89a7f
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-3-2a87d8faa18a6311376812bd0453fece
rename to sql/hive/src/test/resources/golden/truncate_table-10-c32b771845f4d5a0330e2cfa09f89a7f
diff --git a/sql/hive/src/test/resources/golden/input32-1-c21dba410fb07a098f93430a9d21df79 b/sql/hive/src/test/resources/golden/truncate_table-11-6e0b877ea24fa88c5461b02f7bda0746
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-1-c21dba410fb07a098f93430a9d21df79
rename to sql/hive/src/test/resources/golden/truncate_table-11-6e0b877ea24fa88c5461b02f7bda0746
diff --git a/sql/hive/src/test/resources/golden/input3_limit-4-70dad45d534146923fce88b2ffb99b0d b/sql/hive/src/test/resources/golden/truncate_table-12-7dee32ebe9887833a9ae2ea6e5568028
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-4-70dad45d534146923fce88b2ffb99b0d
rename to sql/hive/src/test/resources/golden/truncate_table-12-7dee32ebe9887833a9ae2ea6e5568028
diff --git a/sql/hive/src/test/resources/golden/input3_limit-5-3664b564747487df13a5d109837219b5 b/sql/hive/src/test/resources/golden/truncate_table-13-3230cfbe1871330193c3190c77582fe
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-5-3664b564747487df13a5d109837219b5
rename to sql/hive/src/test/resources/golden/truncate_table-13-3230cfbe1871330193c3190c77582fe
diff --git a/sql/hive/src/test/resources/golden/input4-2-b7f46eccd104e6ed1b29e2de45089f66 b/sql/hive/src/test/resources/golden/truncate_table-14-ae23925663d7e9b7e97c42b66086d835
similarity index 100%
rename from sql/hive/src/test/resources/golden/input4-2-b7f46eccd104e6ed1b29e2de45089f66
rename to sql/hive/src/test/resources/golden/truncate_table-14-ae23925663d7e9b7e97c42b66086d835
diff --git a/sql/hive/src/test/resources/golden/input40-1-acb61ae95ffabcb4a8ea3444d704e6b5 b/sql/hive/src/test/resources/golden/truncate_table-15-7850dc059f9d00eb9439d477e92cb913
similarity index 100%
rename from sql/hive/src/test/resources/golden/input40-1-acb61ae95ffabcb4a8ea3444d704e6b5
rename to sql/hive/src/test/resources/golden/truncate_table-15-7850dc059f9d00eb9439d477e92cb913
diff --git a/sql/hive/src/test/resources/golden/input40-4-f241eafbb8d5da3f9c1737aed7b4f94e b/sql/hive/src/test/resources/golden/truncate_table-16-623e41aa678d5abc8341a8cee0ac8f94
similarity index 100%
rename from sql/hive/src/test/resources/golden/input40-4-f241eafbb8d5da3f9c1737aed7b4f94e
rename to sql/hive/src/test/resources/golden/truncate_table-16-623e41aa678d5abc8341a8cee0ac8f94
diff --git a/sql/hive/src/test/resources/golden/input40-6-93a0c37189dfe2318ba6ad84616f0d64 b/sql/hive/src/test/resources/golden/truncate_table-17-8c71d29e7db6a8d1cb5746458c7741e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/input40-6-93a0c37189dfe2318ba6ad84616f0d64
rename to sql/hive/src/test/resources/golden/truncate_table-17-8c71d29e7db6a8d1cb5746458c7741e6
diff --git a/sql/hive/src/test/resources/golden/input43-0-2baba8070f3585debc14b6bb3c83607a b/sql/hive/src/test/resources/golden/truncate_table-18-64d431f93d8a44fb143cb4b87d63a105
similarity index 100%
rename from sql/hive/src/test/resources/golden/input43-0-2baba8070f3585debc14b6bb3c83607a
rename to sql/hive/src/test/resources/golden/truncate_table-18-64d431f93d8a44fb143cb4b87d63a105
diff --git a/sql/hive/src/test/resources/golden/input46-0-b0cdbecce0321ac452c8e13e1bfc6924 b/sql/hive/src/test/resources/golden/truncate_table-19-1325d566d66f21a06543271c73a95a6f
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-0-b0cdbecce0321ac452c8e13e1bfc6924
rename to sql/hive/src/test/resources/golden/truncate_table-19-1325d566d66f21a06543271c73a95a6f
diff --git a/sql/hive/src/test/resources/golden/input46-1-1efdd5ebfa732abdedeb10467ca71f7f b/sql/hive/src/test/resources/golden/truncate_table-2-fc4118284bf8301cf0d1056c388f963a
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-1-1efdd5ebfa732abdedeb10467ca71f7f
rename to sql/hive/src/test/resources/golden/truncate_table-2-fc4118284bf8301cf0d1056c388f963a
diff --git a/sql/hive/src/test/resources/golden/input46-2-4b3b3bedcb5765c5cfaa5d8e8bfb69ca b/sql/hive/src/test/resources/golden/truncate_table-20-91f869cc79191b87d31cfd0eca2839f4
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-2-4b3b3bedcb5765c5cfaa5d8e8bfb69ca
rename to sql/hive/src/test/resources/golden/truncate_table-20-91f869cc79191b87d31cfd0eca2839f4
diff --git a/sql/hive/src/test/resources/golden/input46-3-c185163787977498a4b84f39f983c431 b/sql/hive/src/test/resources/golden/truncate_table-21-f635675d59df31843e7be41af7b9e4fa
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-3-c185163787977498a4b84f39f983c431
rename to sql/hive/src/test/resources/golden/truncate_table-21-f635675d59df31843e7be41af7b9e4fa
diff --git a/sql/hive/src/test/resources/golden/input46-4-7f05cde078d90c25780a7d5491b20c27 b/sql/hive/src/test/resources/golden/truncate_table-22-f121fdc101603a8220c0f18e867f581e
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-4-7f05cde078d90c25780a7d5491b20c27
rename to sql/hive/src/test/resources/golden/truncate_table-22-f121fdc101603a8220c0f18e867f581e
diff --git a/sql/hive/src/test/resources/golden/input46-5-f5c502e88a3dc3edb37b04af7d7955ab b/sql/hive/src/test/resources/golden/truncate_table-23-63988ac685a3bd645787116353f024d2
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-5-f5c502e88a3dc3edb37b04af7d7955ab
rename to sql/hive/src/test/resources/golden/truncate_table-23-63988ac685a3bd645787116353f024d2
diff --git a/sql/hive/src/test/resources/golden/input4_cb_delim-1-5692d0e91dd0114729b8eb3aee388b72 b/sql/hive/src/test/resources/golden/truncate_table-3-ecca1d24f36175932911a6e7a78ece2d
similarity index 100%
rename from sql/hive/src/test/resources/golden/input4_cb_delim-1-5692d0e91dd0114729b8eb3aee388b72
rename to sql/hive/src/test/resources/golden/truncate_table-3-ecca1d24f36175932911a6e7a78ece2d
diff --git a/sql/hive/src/test/resources/golden/input4_limit-0-4f0124854141b8be1defa7a6d0877d8d b/sql/hive/src/test/resources/golden/truncate_table-4-88e636ed8bdf647a02ff269aa3ebfe62
similarity index 100%
rename from sql/hive/src/test/resources/golden/input4_limit-0-4f0124854141b8be1defa7a6d0877d8d
rename to sql/hive/src/test/resources/golden/truncate_table-4-88e636ed8bdf647a02ff269aa3ebfe62
diff --git a/sql/hive/src/test/resources/golden/input5-0-659e06570690cceeb3f37e10e855d2ea b/sql/hive/src/test/resources/golden/truncate_table-5-42aeecc67917d731e60fc46bde021d49
similarity index 100%
rename from sql/hive/src/test/resources/golden/input5-0-659e06570690cceeb3f37e10e855d2ea
rename to sql/hive/src/test/resources/golden/truncate_table-5-42aeecc67917d731e60fc46bde021d49
diff --git a/sql/hive/src/test/resources/golden/input5-1-b1062ddf6be670dbfc66a6e7dc0e7a56 b/sql/hive/src/test/resources/golden/truncate_table-6-5a6776344f711298f27a8f1d3b47d107
similarity index 100%
rename from sql/hive/src/test/resources/golden/input5-1-b1062ddf6be670dbfc66a6e7dc0e7a56
rename to sql/hive/src/test/resources/golden/truncate_table-6-5a6776344f711298f27a8f1d3b47d107
diff --git a/sql/hive/src/test/resources/golden/input5-2-3d6eb15b4fe23d0a1aa303da818d97ad b/sql/hive/src/test/resources/golden/truncate_table-7-1ad5d350714e3d4ea17201153772d58d
similarity index 100%
rename from sql/hive/src/test/resources/golden/input5-2-3d6eb15b4fe23d0a1aa303da818d97ad
rename to sql/hive/src/test/resources/golden/truncate_table-7-1ad5d350714e3d4ea17201153772d58d
diff --git a/sql/hive/src/test/resources/golden/input_columnarserde-0-df919fd41f281bf7b45a2340d0c9d43e b/sql/hive/src/test/resources/golden/truncate_table-7-65e270fb0b61886aa85255d77eb65794
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_columnarserde-0-df919fd41f281bf7b45a2340d0c9d43e
rename to sql/hive/src/test/resources/golden/truncate_table-7-65e270fb0b61886aa85255d77eb65794
diff --git a/sql/hive/src/test/resources/golden/input_columnarserde-1-2db9e6115d209fabcb0c06e5e666fa3 b/sql/hive/src/test/resources/golden/truncate_table-8-76c754eac44c7254b45807255d4dbc3a
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_columnarserde-1-2db9e6115d209fabcb0c06e5e666fa3
rename to sql/hive/src/test/resources/golden/truncate_table-8-76c754eac44c7254b45807255d4dbc3a
diff --git a/sql/hive/src/test/resources/golden/input_columnarserde-2-ac60752a883d3204c215fa01811701be b/sql/hive/src/test/resources/golden/truncate_table-8-e7699db3640f3b9b1fe44d6b8c9b507e
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_columnarserde-2-ac60752a883d3204c215fa01811701be
rename to sql/hive/src/test/resources/golden/truncate_table-8-e7699db3640f3b9b1fe44d6b8c9b507e
diff --git a/sql/hive/src/test/resources/golden/input_dynamicserde-0-92c95af00fd419aa106571f72fcad67d b/sql/hive/src/test/resources/golden/truncate_table-9-eedfbb9479ac6c1b955b8e9b41994da4
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_dynamicserde-0-92c95af00fd419aa106571f72fcad67d
rename to sql/hive/src/test/resources/golden/truncate_table-9-eedfbb9479ac6c1b955b8e9b41994da4
diff --git a/sql/hive/src/test/resources/golden/input_dynamicserde-1-39bc8a2aea379563a62a465cc54aecbc b/sql/hive/src/test/resources/golden/truncate_table-9-f4286b5657674a6a6b6bc6680f72f89a
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_dynamicserde-1-39bc8a2aea379563a62a465cc54aecbc
rename to sql/hive/src/test/resources/golden/truncate_table-9-f4286b5657674a6a6b6bc6680f72f89a
diff --git a/sql/hive/src/test/resources/golden/input32-2-1ba7748b3d2f8908c2e81771ab229316 b/sql/hive/src/test/resources/golden/type_cast_1-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-2-1ba7748b3d2f8908c2e81771ab229316
rename to sql/hive/src/test/resources/golden/type_cast_1-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/type_cast_1-0-60ea21e6e7d054a65f959fc89acf1b3d b/sql/hive/src/test/resources/golden/type_cast_1-0-60ea21e6e7d054a65f959fc89acf1b3d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_dynamicserde-2-7bee1cc403a04b53d0a0324819e1d768 b/sql/hive/src/test/resources/golden/type_cast_1-1-60ea21e6e7d054a65f959fc89acf1b3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_dynamicserde-2-7bee1cc403a04b53d0a0324819e1d768
rename to sql/hive/src/test/resources/golden/type_cast_1-1-60ea21e6e7d054a65f959fc89acf1b3d
diff --git a/sql/hive/src/test/resources/golden/type_cast_1-2-53a667981ad567b2ab977f67d65c5825 b/sql/hive/src/test/resources/golden/type_cast_1-2-53a667981ad567b2ab977f67d65c5825
new file mode 100644
index 0000000000000..7ed6ff82de6bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/type_cast_1-2-53a667981ad567b2ab977f67d65c5825
@@ -0,0 +1 @@
+5
diff --git a/sql/hive/src/test/resources/golden/join_view-4-763ab5853bff619e6525c01e46b2a923 b/sql/hive/src/test/resources/golden/type_widening-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/join_view-4-763ab5853bff619e6525c01e46b2a923
rename to sql/hive/src/test/resources/golden/type_widening-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/type_widening-0-630ac2c7e7dea4837384ccd572209229 b/sql/hive/src/test/resources/golden/type_widening-0-630ac2c7e7dea4837384ccd572209229
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-0-92c95af00fd419aa106571f72fcad67d b/sql/hive/src/test/resources/golden/type_widening-1-630ac2c7e7dea4837384ccd572209229
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_lazyserde-0-92c95af00fd419aa106571f72fcad67d
rename to sql/hive/src/test/resources/golden/type_widening-1-630ac2c7e7dea4837384ccd572209229
diff --git a/sql/hive/src/test/resources/golden/type_widening-1-cfbdf2b6fca84c6e23d4e691d2221bd6 b/sql/hive/src/test/resources/golden/type_widening-1-cfbdf2b6fca84c6e23d4e691d2221bd6
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/type_widening-1-cfbdf2b6fca84c6e23d4e691d2221bd6
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/type_widening-2-a0ba6952d9bf830d1d1ea7aebd3784a2 b/sql/hive/src/test/resources/golden/type_widening-2-a0ba6952d9bf830d1d1ea7aebd3784a2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/type_widening-2-cfbdf2b6fca84c6e23d4e691d2221bd6
similarity index 100%
rename from sql/hive/src/test/resources/golden/load_dyn_part15-0-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/type_widening-2-cfbdf2b6fca84c6e23d4e691d2221bd6
diff --git a/sql/hive/src/test/resources/golden/type_widening-3-65da8c67f6903286168acb39ac67fc04 b/sql/hive/src/test/resources/golden/type_widening-3-65da8c67f6903286168acb39ac67fc04
deleted file mode 100644
index cf940f4c5faa8..0000000000000
--- a/sql/hive/src/test/resources/golden/type_widening-3-65da8c67f6903286168acb39ac67fc04
+++ /dev/null
@@ -1,1000 +0,0 @@
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-1-bf8ce1d1366256d5c07fc4b55dde7ba7 b/sql/hive/src/test/resources/golden/type_widening-3-a0ba6952d9bf830d1d1ea7aebd3784a2
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_lazyserde-1-bf8ce1d1366256d5c07fc4b55dde7ba7
rename to sql/hive/src/test/resources/golden/type_widening-3-a0ba6952d9bf830d1d1ea7aebd3784a2
diff --git a/sql/hive/src/test/resources/golden/type_widening-4-65da8c67f6903286168acb39ac67fc04 b/sql/hive/src/test/resources/golden/type_widening-4-65da8c67f6903286168acb39ac67fc04
new file mode 100644
index 0000000000000..00841d23b3f94
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/type_widening-4-65da8c67f6903286168acb39ac67fc04
@@ -0,0 +1,1000 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-0-38512a3299e2390dd813e61a0f63f35e b/sql/hive/src/test/resources/golden/udaf_collect_set-0-38512a3299e2390dd813e61a0f63f35e
index fed3d6802023a..bb3393324db7a 100644
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-0-38512a3299e2390dd813e61a0f63f35e
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-0-38512a3299e2390dd813e61a0f63f35e
@@ -1 +1 @@
-collect_set(x) - Returns a set of objects with duplicate elements eliminated
\ No newline at end of file
+collect_set(x) - Returns a set of objects with duplicate elements eliminated
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-1-78aa199d061d2ff9ba426849ea1eb449 b/sql/hive/src/test/resources/golden/udaf_collect_set-1-78aa199d061d2ff9ba426849ea1eb449
index fed3d6802023a..bb3393324db7a 100644
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-1-78aa199d061d2ff9ba426849ea1eb449
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-1-78aa199d061d2ff9ba426849ea1eb449
@@ -1 +1 @@
-collect_set(x) - Returns a set of objects with duplicate elements eliminated
\ No newline at end of file
+collect_set(x) - Returns a set of objects with duplicate elements eliminated
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-10-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-10-c8bc33095e1a195bb7b5e579d8d78db
index cedc3068ee4a1..c87ba74c9000a 100644
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-10-c8bc33095e1a195bb7b5e579d8d78db
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-10-c8bc33095e1a195bb7b5e579d8d78db
@@ -17,4 +17,4 @@
 27	["val_27"]
 28	["val_28"]
 30	["val_30"]
-33	["val_33"]
\ No newline at end of file
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-11-5c3768074977ef68a1b9bb72eb9ef02 b/sql/hive/src/test/resources/golden/udaf_collect_set-11-5c3768074977ef68a1b9bb72eb9ef02
new file mode 100644
index 0000000000000..337e96635cc70
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-11-5c3768074977ef68a1b9bb72eb9ef02
@@ -0,0 +1,20 @@
+0	["val_0","val_0","val_0"]
+2	["val_2"]
+4	["val_4"]
+5	["val_5","val_5","val_5"]
+8	["val_8"]
+9	["val_9"]
+10	["val_10"]
+11	["val_11"]
+12	["val_12","val_12"]
+15	["val_15","val_15"]
+17	["val_17"]
+18	["val_18","val_18"]
+19	["val_19"]
+20	["val_20"]
+24	["val_24","val_24"]
+26	["val_26","val_26"]
+27	["val_27"]
+28	["val_28"]
+30	["val_30"]
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-11-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udaf_collect_set-11-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-11-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udaf_collect_set-12-1d351f7e821fcaf66c6f7503e42fb291
similarity index 100%
rename from sql/hive/src/test/resources/golden/load_dyn_part15-1-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udaf_collect_set-12-1d351f7e821fcaf66c6f7503e42fb291
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-12-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udaf_collect_set-12-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-12-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-2-7368973ec1870dd3b237c37eb3857b1e b/sql/hive/src/test/resources/golden/udaf_collect_set-13-a00d1791b7fa7ac5a0505d95c3d12257
similarity index 100%
rename from sql/hive/src/test/resources/golden/load_exist_part_authsuccess-2-7368973ec1870dd3b237c37eb3857b1e
rename to sql/hive/src/test/resources/golden/udaf_collect_set-13-a00d1791b7fa7ac5a0505d95c3d12257
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-13-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-13-c8bc33095e1a195bb7b5e579d8d78db
deleted file mode 100644
index cedc3068ee4a1..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-13-c8bc33095e1a195bb7b5e579d8d78db
+++ /dev/null
@@ -1,20 +0,0 @@
-0	["val_0"]
-2	["val_2"]
-4	["val_4"]
-5	["val_5"]
-8	["val_8"]
-9	["val_9"]
-10	["val_10"]
-11	["val_11"]
-12	["val_12"]
-15	["val_15"]
-17	["val_17"]
-18	["val_18"]
-19	["val_19"]
-20	["val_20"]
-24	["val_24"]
-26	["val_26"]
-27	["val_27"]
-28	["val_28"]
-30	["val_30"]
-33	["val_33"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-14-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-14-c8bc33095e1a195bb7b5e579d8d78db
new file mode 100644
index 0000000000000..c87ba74c9000a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-14-c8bc33095e1a195bb7b5e579d8d78db
@@ -0,0 +1,20 @@
+0	["val_0"]
+2	["val_2"]
+4	["val_4"]
+5	["val_5"]
+8	["val_8"]
+9	["val_9"]
+10	["val_10"]
+11	["val_11"]
+12	["val_12"]
+15	["val_15"]
+17	["val_17"]
+18	["val_18"]
+19	["val_19"]
+20	["val_20"]
+24	["val_24"]
+26	["val_26"]
+27	["val_27"]
+28	["val_28"]
+30	["val_30"]
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/load_nonpart_authsuccess-1-7368973ec1870dd3b237c37eb3857b1e b/sql/hive/src/test/resources/golden/udaf_collect_set-15-863233ccd616401efb4bf83c4b9e3a52
similarity index 100%
rename from sql/hive/src/test/resources/golden/load_nonpart_authsuccess-1-7368973ec1870dd3b237c37eb3857b1e
rename to sql/hive/src/test/resources/golden/udaf_collect_set-15-863233ccd616401efb4bf83c4b9e3a52
diff --git a/sql/hive/src/test/resources/golden/load_part_authsuccess-1-7368973ec1870dd3b237c37eb3857b1e b/sql/hive/src/test/resources/golden/udaf_collect_set-16-a00d1791b7fa7ac5a0505d95c3d12257
similarity index 100%
rename from sql/hive/src/test/resources/golden/load_part_authsuccess-1-7368973ec1870dd3b237c37eb3857b1e
rename to sql/hive/src/test/resources/golden/udaf_collect_set-16-a00d1791b7fa7ac5a0505d95c3d12257
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-17-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-17-c8bc33095e1a195bb7b5e579d8d78db
new file mode 100644
index 0000000000000..c87ba74c9000a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-17-c8bc33095e1a195bb7b5e579d8d78db
@@ -0,0 +1,20 @@
+0	["val_0"]
+2	["val_2"]
+4	["val_4"]
+5	["val_5"]
+8	["val_8"]
+9	["val_9"]
+10	["val_10"]
+11	["val_11"]
+12	["val_12"]
+15	["val_15"]
+17	["val_17"]
+18	["val_18"]
+19	["val_19"]
+20	["val_20"]
+24	["val_24"]
+26	["val_26"]
+27	["val_27"]
+28	["val_28"]
+30	["val_30"]
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-2-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udaf_collect_set-2-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-2-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-2-4747c35670a011344982573ba31a9bb b/sql/hive/src/test/resources/golden/udaf_collect_set-2-4747c35670a011344982573ba31a9bb
new file mode 100644
index 0000000000000..28abc06ee9140
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-2-4747c35670a011344982573ba31a9bb
@@ -0,0 +1 @@
+collect_list(x) - Returns a list of objects with duplicates
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-3-9aa348a25ca17ab5b636d3ea2d6df986 b/sql/hive/src/test/resources/golden/udaf_collect_set-3-9aa348a25ca17ab5b636d3ea2d6df986
new file mode 100644
index 0000000000000..28abc06ee9140
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-3-9aa348a25ca17ab5b636d3ea2d6df986
@@ -0,0 +1 @@
+collect_list(x) - Returns a list of objects with duplicates
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-3-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udaf_collect_set-3-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-3-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/loadpart_err-0-8d7fa992960b4c8ec2cb874be479de37 b/sql/hive/src/test/resources/golden/udaf_collect_set-4-1d351f7e821fcaf66c6f7503e42fb291
similarity index 100%
rename from sql/hive/src/test/resources/golden/loadpart_err-0-8d7fa992960b4c8ec2cb874be479de37
rename to sql/hive/src/test/resources/golden/udaf_collect_set-4-1d351f7e821fcaf66c6f7503e42fb291
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-4-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-4-c8bc33095e1a195bb7b5e579d8d78db
deleted file mode 100644
index cedc3068ee4a1..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-4-c8bc33095e1a195bb7b5e579d8d78db
+++ /dev/null
@@ -1,20 +0,0 @@
-0	["val_0"]
-2	["val_2"]
-4	["val_4"]
-5	["val_5"]
-8	["val_8"]
-9	["val_9"]
-10	["val_10"]
-11	["val_11"]
-12	["val_12"]
-15	["val_15"]
-17	["val_17"]
-18	["val_18"]
-19	["val_19"]
-20	["val_20"]
-24	["val_24"]
-26	["val_26"]
-27	["val_27"]
-28	["val_28"]
-30	["val_30"]
-33	["val_33"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-5-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udaf_collect_set-5-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-5-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/lock3-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udaf_collect_set-5-a7dc16cb82c595b18d4258a38a304b1e
similarity index 100%
rename from sql/hive/src/test/resources/golden/lock3-3-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udaf_collect_set-5-a7dc16cb82c595b18d4258a38a304b1e
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-6-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udaf_collect_set-6-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-6-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-6-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-6-c8bc33095e1a195bb7b5e579d8d78db
new file mode 100644
index 0000000000000..c87ba74c9000a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-6-c8bc33095e1a195bb7b5e579d8d78db
@@ -0,0 +1,20 @@
+0	["val_0"]
+2	["val_2"]
+4	["val_4"]
+5	["val_5"]
+8	["val_8"]
+9	["val_9"]
+10	["val_10"]
+11	["val_11"]
+12	["val_12"]
+15	["val_15"]
+17	["val_17"]
+18	["val_18"]
+19	["val_19"]
+20	["val_20"]
+24	["val_24"]
+26	["val_26"]
+27	["val_27"]
+28	["val_28"]
+30	["val_30"]
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-7-1fd4f3dcdac818ccc95c5033c6d01b56 b/sql/hive/src/test/resources/golden/udaf_collect_set-7-1fd4f3dcdac818ccc95c5033c6d01b56
new file mode 100644
index 0000000000000..337e96635cc70
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-7-1fd4f3dcdac818ccc95c5033c6d01b56
@@ -0,0 +1,20 @@
+0	["val_0","val_0","val_0"]
+2	["val_2"]
+4	["val_4"]
+5	["val_5","val_5","val_5"]
+8	["val_8"]
+9	["val_9"]
+10	["val_10"]
+11	["val_11"]
+12	["val_12","val_12"]
+15	["val_15","val_15"]
+17	["val_17"]
+18	["val_18","val_18"]
+19	["val_19"]
+20	["val_20"]
+24	["val_24","val_24"]
+26	["val_26","val_26"]
+27	["val_27"]
+28	["val_28"]
+30	["val_30"]
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-7-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-7-c8bc33095e1a195bb7b5e579d8d78db
deleted file mode 100644
index cedc3068ee4a1..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-7-c8bc33095e1a195bb7b5e579d8d78db
+++ /dev/null
@@ -1,20 +0,0 @@
-0	["val_0"]
-2	["val_2"]
-4	["val_4"]
-5	["val_5"]
-8	["val_8"]
-9	["val_9"]
-10	["val_10"]
-11	["val_11"]
-12	["val_12"]
-15	["val_15"]
-17	["val_17"]
-18	["val_18"]
-19	["val_19"]
-20	["val_20"]
-24	["val_24"]
-26	["val_26"]
-27	["val_27"]
-28	["val_28"]
-30	["val_30"]
-33	["val_33"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-8-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udaf_collect_set-8-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-8-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/lock3-4-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udaf_collect_set-8-863233ccd616401efb4bf83c4b9e3a52
similarity index 100%
rename from sql/hive/src/test/resources/golden/lock3-4-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udaf_collect_set-8-863233ccd616401efb4bf83c4b9e3a52
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-9-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udaf_collect_set-9-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-9-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/lock4-0-9583a6f9fe5ca6c74ff44d7d064fed92 b/sql/hive/src/test/resources/golden/udaf_collect_set-9-a7dc16cb82c595b18d4258a38a304b1e
similarity index 100%
rename from sql/hive/src/test/resources/golden/lock4-0-9583a6f9fe5ca6c74ff44d7d064fed92
rename to sql/hive/src/test/resources/golden/udaf_collect_set-9-a7dc16cb82c595b18d4258a38a304b1e
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-2-c6f2dc536bf105650a461816ae5e330 b/sql/hive/src/test/resources/golden/udaf_corr-2-c6f2dc536bf105650a461816ae5e330
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-10-d915efbc5233a49f48e81e74eef2a3c8 b/sql/hive/src/test/resources/golden/udaf_corr-2-e886f45c8f085596ffd420f89cdc2909
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_lazyserde-10-d915efbc5233a49f48e81e74eef2a3c8
rename to sql/hive/src/test/resources/golden/udaf_corr-2-e886f45c8f085596ffd420f89cdc2909
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-3-ddf417dbc3b551cc8df47d950cec03e b/sql/hive/src/test/resources/golden/udaf_corr-3-ddf417dbc3b551cc8df47d950cec03e
index 6e5d422b3132e..06b8ee9e72496 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-3-ddf417dbc3b551cc8df47d950cec03e
+++ b/sql/hive/src/test/resources/golden/udaf_corr-3-ddf417dbc3b551cc8df47d950cec03e
@@ -1,2 +1,2 @@
 corr(x,y) - Returns the Pearson coefficient of correlation
-between a set of number pairs
\ No newline at end of file
+between a set of number pairs
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-4-8771b2d83d14b3b641e8a77fcdc5081a b/sql/hive/src/test/resources/golden/udaf_corr-4-8771b2d83d14b3b641e8a77fcdc5081a
index fcb49ae69f74f..f516ef3a30fdf 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-4-8771b2d83d14b3b641e8a77fcdc5081a
+++ b/sql/hive/src/test/resources/golden/udaf_corr-4-8771b2d83d14b3b641e8a77fcdc5081a
@@ -6,4 +6,4 @@ a singleton set, NULL will be returned. Otherwise, it computes the following:
    COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y))
 where neither x nor y is null,
 COVAR_POP is the population covariance,
-and STDDEV_POP is the population standard deviation.
\ No newline at end of file
+and STDDEV_POP is the population standard deviation.
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-5-8abbd73784728b599f8c2a90f53da9fb b/sql/hive/src/test/resources/golden/udaf_corr-5-8abbd73784728b599f8c2a90f53da9fb
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-5-8abbd73784728b599f8c2a90f53da9fb
+++ b/sql/hive/src/test/resources/golden/udaf_corr-5-8abbd73784728b599f8c2a90f53da9fb
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-6-4324e1f0a83a7491f3d4e3eef34f8727 b/sql/hive/src/test/resources/golden/udaf_corr-6-4324e1f0a83a7491f3d4e3eef34f8727
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-6-4324e1f0a83a7491f3d4e3eef34f8727
+++ b/sql/hive/src/test/resources/golden/udaf_corr-6-4324e1f0a83a7491f3d4e3eef34f8727
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-7-70e701f50c3418ff91649b2bd8287da3 b/sql/hive/src/test/resources/golden/udaf_corr-7-70e701f50c3418ff91649b2bd8287da3
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-7-70e701f50c3418ff91649b2bd8287da3
+++ b/sql/hive/src/test/resources/golden/udaf_corr-7-70e701f50c3418ff91649b2bd8287da3
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-8-f2f0c7735f8b24266d5aaff96644e369 b/sql/hive/src/test/resources/golden/udaf_corr-8-f2f0c7735f8b24266d5aaff96644e369
index 3f730875aef8c..946c9d58047e5 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-8-f2f0c7735f8b24266d5aaff96644e369
+++ b/sql/hive/src/test/resources/golden/udaf_corr-8-f2f0c7735f8b24266d5aaff96644e369
@@ -3,4 +3,4 @@
 3	NULL
 4	NULL
 5	NULL
-6	NULL
\ No newline at end of file
+6	NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-9-e2a0fa75c43279764ebca015f62bcf16 b/sql/hive/src/test/resources/golden/udaf_corr-9-e2a0fa75c43279764ebca015f62bcf16
index 5d97236e8b03f..011d78d68766d 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-9-e2a0fa75c43279764ebca015f62bcf16
+++ b/sql/hive/src/test/resources/golden/udaf_corr-9-e2a0fa75c43279764ebca015f62bcf16
@@ -1 +1 @@
-0.6633880657639323
\ No newline at end of file
+0.6633880657639323
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-2-c6f2dc536bf105650a461816ae5e330 b/sql/hive/src/test/resources/golden/udaf_covar_pop-2-c6f2dc536bf105650a461816ae5e330
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-11-6aaa88142e86a9a39e980caed145e32c b/sql/hive/src/test/resources/golden/udaf_covar_pop-2-e886f45c8f085596ffd420f89cdc2909
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_lazyserde-11-6aaa88142e86a9a39e980caed145e32c
rename to sql/hive/src/test/resources/golden/udaf_covar_pop-2-e886f45c8f085596ffd420f89cdc2909
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-3-fe27ea6dae14250e8835baef3c2e20f6 b/sql/hive/src/test/resources/golden/udaf_covar_pop-3-fe27ea6dae14250e8835baef3c2e20f6
index 0fed030c22af3..eadc2e1aa99b7 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-3-fe27ea6dae14250e8835baef3c2e20f6
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-3-fe27ea6dae14250e8835baef3c2e20f6
@@ -1 +1 @@
-covar_pop(x,y) - Returns the population covariance of a set of number pairs
\ No newline at end of file
+covar_pop(x,y) - Returns the population covariance of a set of number pairs
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-4-7af9231ae293d0c4b84050176e1d73ad b/sql/hive/src/test/resources/golden/udaf_covar_pop-4-7af9231ae293d0c4b84050176e1d73ad
index 4037062d2da93..7323e72fccc7c 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-4-7af9231ae293d0c4b84050176e1d73ad
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-4-7af9231ae293d0c4b84050176e1d73ad
@@ -3,4 +3,4 @@ The function takes as arguments any pair of numeric types and returns a double.
 Any pair with a NULL is ignored. If the function is applied to an empty set, NULL
 will be returned. Otherwise, it computes the following:
    (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y)
-where neither x nor y is null.
\ No newline at end of file
+where neither x nor y is null.
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-5-22004d36f6f3770db284644317770fcd b/sql/hive/src/test/resources/golden/udaf_covar_pop-5-22004d36f6f3770db284644317770fcd
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-5-22004d36f6f3770db284644317770fcd
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-5-22004d36f6f3770db284644317770fcd
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-6-bc03cfbf7ae382ce707bf83e7fb2fb8b b/sql/hive/src/test/resources/golden/udaf_covar_pop-6-bc03cfbf7ae382ce707bf83e7fb2fb8b
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-6-bc03cfbf7ae382ce707bf83e7fb2fb8b
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-6-bc03cfbf7ae382ce707bf83e7fb2fb8b
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-7-37e59e993e08216e6c69f88d6ac673ae b/sql/hive/src/test/resources/golden/udaf_covar_pop-7-37e59e993e08216e6c69f88d6ac673ae
index 171538eb0b00f..ba66466c2a0d0 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-7-37e59e993e08216e6c69f88d6ac673ae
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-7-37e59e993e08216e6c69f88d6ac673ae
@@ -1 +1 @@
-0.0
\ No newline at end of file
+0.0
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-8-1e51388408dad651127edf940c11d91f b/sql/hive/src/test/resources/golden/udaf_covar_pop-8-1e51388408dad651127edf940c11d91f
index 848e15bc61476..104018ecd43d6 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-8-1e51388408dad651127edf940c11d91f
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-8-1e51388408dad651127edf940c11d91f
@@ -3,4 +3,4 @@
 3	0.0
 4	0.0
 5	0.0
-6	0.0
\ No newline at end of file
+6	0.0
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-9-b3cc8c5b5b384622e212dbaaf3f09623 b/sql/hive/src/test/resources/golden/udaf_covar_pop-9-b3cc8c5b5b384622e212dbaaf3f09623
index 1a49bf590b346..16f4e6bd601b6 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-9-b3cc8c5b5b384622e212dbaaf3f09623
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-9-b3cc8c5b5b384622e212dbaaf3f09623
@@ -1 +1 @@
-3.624999999999999
\ No newline at end of file
+3.624999999999999
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-2-c6f2dc536bf105650a461816ae5e330 b/sql/hive/src/test/resources/golden/udaf_covar_samp-2-c6f2dc536bf105650a461816ae5e330
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-2-7a165139976654640c86db8e4e5871cc b/sql/hive/src/test/resources/golden/udaf_covar_samp-2-e886f45c8f085596ffd420f89cdc2909
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_lazyserde-2-7a165139976654640c86db8e4e5871cc
rename to sql/hive/src/test/resources/golden/udaf_covar_samp-2-e886f45c8f085596ffd420f89cdc2909
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-3-7aa25da7ccb88ba67b100888b6227aaf b/sql/hive/src/test/resources/golden/udaf_covar_samp-3-7aa25da7ccb88ba67b100888b6227aaf
index 97a97c8b5965a..b301d988192fd 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-3-7aa25da7ccb88ba67b100888b6227aaf
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-3-7aa25da7ccb88ba67b100888b6227aaf
@@ -1 +1 @@
-covar_samp(x,y) - Returns the sample covariance of a set of number pairs
\ No newline at end of file
+covar_samp(x,y) - Returns the sample covariance of a set of number pairs
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-4-7e705a637950911e0a18059d8bf1fd2c b/sql/hive/src/test/resources/golden/udaf_covar_samp-4-7e705a637950911e0a18059d8bf1fd2c
index e666adba8df68..de059e0e79a27 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-4-7e705a637950911e0a18059d8bf1fd2c
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-4-7e705a637950911e0a18059d8bf1fd2c
@@ -3,4 +3,4 @@ The function takes as arguments any pair of numeric types and returns a double.
 Any pair with a NULL is ignored. If the function is applied to an empty set, NULL
 will be returned. Otherwise, it computes the following:
    (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/(COUNT(x,y)-1)
-where neither x nor y is null.
\ No newline at end of file
+where neither x nor y is null.
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-5-2cfd48c62fcfb58936ea1222ecf6a2b b/sql/hive/src/test/resources/golden/udaf_covar_samp-5-2cfd48c62fcfb58936ea1222ecf6a2b
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-5-2cfd48c62fcfb58936ea1222ecf6a2b
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-5-2cfd48c62fcfb58936ea1222ecf6a2b
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-6-8a701e7f4b9ce986f7923ae083bce0f1 b/sql/hive/src/test/resources/golden/udaf_covar_samp-6-8a701e7f4b9ce986f7923ae083bce0f1
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-6-8a701e7f4b9ce986f7923ae083bce0f1
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-6-8a701e7f4b9ce986f7923ae083bce0f1
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-7-2702986700ff9a1c962f8b3762c1b5f3 b/sql/hive/src/test/resources/golden/udaf_covar_samp-7-2702986700ff9a1c962f8b3762c1b5f3
index 171538eb0b00f..ba66466c2a0d0 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-7-2702986700ff9a1c962f8b3762c1b5f3
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-7-2702986700ff9a1c962f8b3762c1b5f3
@@ -1 +1 @@
-0.0
\ No newline at end of file
+0.0
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-8-44861ae58cf0951eeda28a5f778f778a b/sql/hive/src/test/resources/golden/udaf_covar_samp-8-44861ae58cf0951eeda28a5f778f778a
index 848e15bc61476..104018ecd43d6 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-8-44861ae58cf0951eeda28a5f778f778a
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-8-44861ae58cf0951eeda28a5f778f778a
@@ -3,4 +3,4 @@
 3	0.0
 4	0.0
 5	0.0
-6	0.0
\ No newline at end of file
+6	0.0
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-9-234a5b02085d2228473d7ef15a6af683 b/sql/hive/src/test/resources/golden/udaf_covar_samp-9-234a5b02085d2228473d7ef15a6af683
index 30d98234a23e9..b8adc8f23da34 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-9-234a5b02085d2228473d7ef15a6af683
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-9-234a5b02085d2228473d7ef15a6af683
@@ -1 +1 @@
-4.833333333333332
\ No newline at end of file
+4.833333333333332
diff --git a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-0-86b9fb8bef8a5c51077623f1db3a0251 b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-0-86b9fb8bef8a5c51077623f1db3a0251
index 3e8bb17f24d6f..df3189a887974 100644
--- a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-0-86b9fb8bef8a5c51077623f1db3a0251
+++ b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-0-86b9fb8bef8a5c51077623f1db3a0251
@@ -1 +1 @@
-[{"x":135.0284552845532,"y":246.0},{"x":381.39370078740143,"y":254.0}]
\ No newline at end of file
+[{"x":135.0284552845532,"y":246.0},{"x":381.39370078740143,"y":254.0}]
diff --git a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-1-5303011455a61171f7eb7eb4bd0ca2a3 b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-1-5303011455a61171f7eb7eb4bd0ca2a3
index 048770d4c8402..b1f27ead33687 100644
--- a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-1-5303011455a61171f7eb7eb4bd0ca2a3
+++ b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-1-5303011455a61171f7eb7eb4bd0ca2a3
@@ -1 +1 @@
-[{"x":96.7349397590361,"y":166.0},{"x":257.14970059880255,"y":167.0},{"x":425.6826347305388,"y":167.0}]
\ No newline at end of file
+[{"x":96.7349397590361,"y":166.0},{"x":257.14970059880255,"y":167.0},{"x":425.6826347305388,"y":167.0}]
diff --git a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-2-b3b431c36154a1ae022bf01d55a6ecb1 b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-2-b3b431c36154a1ae022bf01d55a6ecb1
index 8b54db8da7167..78a46aac82c11 100644
--- a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-2-b3b431c36154a1ae022bf01d55a6ecb1
+++ b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-2-b3b431c36154a1ae022bf01d55a6ecb1
@@ -1 +1 @@
-[{"x":9.761904761904763,"y":21.0},{"x":33.84210526315789,"y":19.0},{"x":62.75000000000001,"y":20.0},{"x":90.90322580645162,"y":31.0},{"x":122.91666666666667,"y":24.0},{"x":146.33333333333334,"y":21.0},{"x":170.70967741935485,"y":31.0},{"x":194.3571428571428,"y":28.0},{"x":214.84615384615384,"y":26.0},{"x":235.08695652173907,"y":23.0},{"x":257.80000000000007,"y":15.0},{"x":281.0333333333333,"y":30.0},{"x":298.0,"y":1.0},{"x":313.0000000000001,"y":29.0},{"x":339.5925925925926,"y":27.0},{"x":372.49999999999983,"y":24.0},{"x":402.23684210526324,"y":38.0},{"x":430.6896551724138,"y":29.0},{"x":462.32352941176464,"y":34.0},{"x":487.72413793103453,"y":29.0}]
\ No newline at end of file
+[{"x":9.761904761904763,"y":21.0},{"x":33.84210526315789,"y":19.0},{"x":62.75000000000001,"y":20.0},{"x":90.90322580645162,"y":31.0},{"x":122.91666666666667,"y":24.0},{"x":146.33333333333334,"y":21.0},{"x":170.70967741935485,"y":31.0},{"x":194.3571428571428,"y":28.0},{"x":214.84615384615384,"y":26.0},{"x":235.08695652173907,"y":23.0},{"x":257.80000000000007,"y":15.0},{"x":281.0333333333333,"y":30.0},{"x":298.0,"y":1.0},{"x":313.0000000000001,"y":29.0},{"x":339.5925925925926,"y":27.0},{"x":372.49999999999983,"y":24.0},{"x":402.23684210526324,"y":38.0},{"x":430.6896551724138,"y":29.0},{"x":462.32352941176464,"y":34.0},{"x":487.72413793103453,"y":29.0}]
diff --git a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-3-ff41f4450d6ae372633fde865ae187c6 b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-3-ff41f4450d6ae372633fde865ae187c6
index aed3f1e704750..4f7995f874388 100644
--- a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-3-ff41f4450d6ae372633fde865ae187c6
+++ b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-3-ff41f4450d6ae372633fde865ae187c6
@@ -1 +1 @@
-[{"x":0.0,"y":3.0},{"x":2.0,"y":1.0},{"x":4.75,"y":4.0},{"x":8.0,"y":1.0},{"x":9.5,"y":2.0},{"x":11.666666666666666,"y":3.0},{"x":15.0,"y":2.0},{"x":17.666666666666664,"y":3.0},{"x":19.5,"y":2.0},{"x":24.0,"y":2.0},{"x":26.333333333333336,"y":3.0},{"x":28.0,"y":1.0},{"x":30.0,"y":1.0},{"x":33.0,"y":1.0},{"x":34.75,"y":4.0},{"x":37.0,"y":2.0},{"x":41.666666666666664,"y":3.0},{"x":43.5,"y":2.0},{"x":47.0,"y":1.0},{"x":51.0,"y":2.0},{"x":53.5,"y":2.0},{"x":57.666666666666664,"y":3.0},{"x":64.5,"y":2.0},{"x":66.66666666666666,"y":3.0},{"x":69.75,"y":4.0},{"x":72.0,"y":2.0},{"x":74.0,"y":1.0},{"x":76.33333333333333,"y":3.0},{"x":78.0,"y":1.0},{"x":80.0,"y":1.0},{"x":82.0,"y":1.0},{"x":83.5,"y":4.0},{"x":85.5,"y":2.0},{"x":87.0,"y":1.0},{"x":90.0,"y":3.0},{"x":92.0,"y":1.0},{"x":95.33333333333333,"y":3.0},{"x":97.5,"y":4.0},{"x":100.0,"y":2.0},{"x":103.5,"y":4.0},{"x":105.0,"y":1.0},{"x":111.0,"y":1.0},{"x":113.33333333333333,"y":3.0},{"x":116.0,"y":1.0},{"x":118.0,"y":2.0},{"x":119.4,"y":5.0},{"x":125.33333333333333,"y":3.0},{"x":128.4,"y":5.0},{"x":131.0,"y":1.0},{"x":133.66666666666666,"y":3.0},{"x":136.66666666666666,"y":3.0},{"x":138.0,"y":4.0},{"x":143.0,"y":1.0},{"x":145.66666666666666,"y":3.0},{"x":149.33333333333331,"y":3.0},{"x":152.33333333333334,"y":3.0},{"x":155.5,"y":2.0},{"x":157.5,"y":2.0},{"x":160.0,"y":1.0},{"x":162.5,"y":2.0},{"x":164.5,"y":4.0},{"x":166.75,"y":4.0},{"x":168.8,"y":5.0},{"x":170.0,"y":1.0},{"x":172.0,"y":2.0},{"x":174.5,"y":4.0},{"x":176.33333333333331,"y":3.0},{"x":178.0,"y":1.0},{"x":179.33333333333331,"y":3.0},{"x":181.0,"y":1.0},{"x":183.0,"y":1.0},{"x":186.75,"y":4.0},{"x":189.0,"y":1.0},{"x":190.66666666666666,"y":3.0},{"x":192.75,"y":4.0},{"x":194.0,"y":1.0},{"x":195.33333333333331,"y":3.0},{"x":197.0,"y":2.0},{"x":199.4,"y":5.0},{"x":201.0,"y":1.0},{"x":202.66666666666669,"y":3.0},{"x":205.0,"y":2.0},{"x":207.0,"y":2.0},{"x":208.40000000000003,"y":5.0},{"x":213.33333333333331,"y":3.0},{"x":216.0,"y":2.0},{"x":217.33333333333331,"y":3.0},{"x":219.0,"y":2.0},{"x":221.33333333333331,"y":3.0},{"x":223.5,"y":4.0},{"x":226.0,"y":1.0},{"x":228.66666666666663,"y":3.0},{"x":230.0,"y":5.0},{"x":233.0,"y":2.0},{"x":235.0,"y":1.0},{"x":237.5,"y":4.0},{"x":239.0,"y":2.0},{"x":241.66666666666669,"y":3.0},{"x":244.0,"y":1.0},{"x":247.5,"y":2.0},{"x":249.0,"y":1.0},{"x":252.0,"y":1.0},{"x":255.5,"y":4.0},{"x":257.5,"y":2.0},{"x":260.0,"y":1.0},{"x":262.5,"y":2.0},{"x":265.3333333333333,"y":3.0},{"x":272.6,"y":5.0},{"x":274.5,"y":2.0},{"x":277.3333333333333,"y":6.0},{"x":280.0,"y":2.0},{"x":281.5,"y":4.0},{"x":283.5,"y":2.0},{"x":285.0,"y":1.0},{"x":286.5,"y":2.0},{"x":288.3333333333333,"y":3.0},{"x":291.5,"y":2.0},{"x":296.0,"y":1.0},{"x":298.0,"y":3.0},{"x":302.0,"y":1.0},{"x":305.5,"y":2.0},{"x":307.3333333333333,"y":3.0},{"x":309.0,"y":2.0},{"x":310.75,"y":4.0},{"x":315.75,"y":4.0},{"x":317.6,"y":5.0},{"x":321.5,"y":4.0},{"x":323.0,"y":1.0},{"x":325.0,"y":2.0},{"x":327.0,"y":3.0},{"x":331.3333333333333,"y":3.0},{"x":333.0,"y":2.0},{"x":335.5,"y":2.0},{"x":338.5,"y":2.0},{"x":341.66666666666663,"y":3.0},{"x":344.3333333333333,"y":3.0},{"x":348.0,"y":5.0},{"x":351.0,"y":1.0},{"x":353.0,"y":2.0},{"x":356.0,"y":1.0},{"x":360.0,"y":1.0},{"x":362.0,"y":1.0},{"x":364.5,"y":2.0},{"x":366.66666666666663,"y":3.0},{"x":368.75,"y":4.0},{"x":373.5,"y":2.0},{"x":375.0,"y":1.0},{"x":377.5,"y":2.0},{"x":379.0,"y":1.0},{"x":382.0,"y":2.0},{"x":384.0,"y":3.0},{"x":386.0,"y":1.0},{"x":389.0,"y":1.0},{"x":392.0,"y":1.0},{"x":393.5,"y":2.0},{"x":395.6,"y":5.0},{"x":397.0,"y":2.0},{"x":399.0,"y":2.0},{"x":400.0,"y":1.0},{"x":401.16666666666663,"y":6.0},{"x":403.40000000000003,"y":5.0},{"x":406.20000000000005,"y":5.0},{"x":409.0,"y":3.0},{"x":411.0,"y":1.0},{"x":413.5,"y":4.0},{"x":417.0,"y":3.0},{"x":418.5,"y":2.0},{"x":421.0,"y":1.0},{"x":424.0,"y":2.0},{"x":427.0,"y":1.0},{"x":429.6,"y":5.0},{"x":431.25,"y":4.0},{"x":435.5,"y":2.0},{"x":437.75,"y":4.0},{"x":439.0,"y":2.0},{"x":443.5,"y":2.0},{"x":446.0,"y":1.0},{"x":448.5,"y":2.0},{"x":452.5,"y":2.0},{"x":454.24999999999994,"y":4.0},{"x":457.66666666666663,"y":3.0},{"x":459.33333333333337,"y":3.0},{"x":462.5,"y":4.0},{"x":466.0,"y":3.0},{"x":467.80000000000007,"y":5.0},{"x":469.16666666666663,"y":6.0},{"x":472.0,"y":1.0},{"x":475.0,"y":1.0},{"x":477.0,"y":1.0},{"x":478.33333333333326,"y":3.0},{"x":480.25,"y":4.0},{"x":482.5,"y":2.0},{"x":484.5,"y":2.0},{"x":487.0,"y":1.0},{"x":489.2,"y":5.0},{"x":491.66666666666663,"y":3.0},{"x":493.0,"y":1.0},{"x":494.5,"y":2.0},{"x":496.0,"y":1.0},{"x":497.75,"y":4.0}]
\ No newline at end of file
+[{"x":0.0,"y":3.0},{"x":2.0,"y":1.0},{"x":4.75,"y":4.0},{"x":8.0,"y":1.0},{"x":9.5,"y":2.0},{"x":11.666666666666666,"y":3.0},{"x":15.0,"y":2.0},{"x":17.666666666666664,"y":3.0},{"x":19.5,"y":2.0},{"x":24.0,"y":2.0},{"x":26.333333333333336,"y":3.0},{"x":28.0,"y":1.0},{"x":30.0,"y":1.0},{"x":33.0,"y":1.0},{"x":34.75,"y":4.0},{"x":37.0,"y":2.0},{"x":41.666666666666664,"y":3.0},{"x":43.5,"y":2.0},{"x":47.0,"y":1.0},{"x":51.0,"y":2.0},{"x":53.5,"y":2.0},{"x":57.666666666666664,"y":3.0},{"x":64.5,"y":2.0},{"x":66.66666666666666,"y":3.0},{"x":69.75,"y":4.0},{"x":72.0,"y":2.0},{"x":74.0,"y":1.0},{"x":76.33333333333333,"y":3.0},{"x":78.0,"y":1.0},{"x":80.0,"y":1.0},{"x":82.0,"y":1.0},{"x":83.5,"y":4.0},{"x":85.5,"y":2.0},{"x":87.0,"y":1.0},{"x":90.0,"y":3.0},{"x":92.0,"y":1.0},{"x":95.33333333333333,"y":3.0},{"x":97.5,"y":4.0},{"x":100.0,"y":2.0},{"x":103.5,"y":4.0},{"x":105.0,"y":1.0},{"x":111.0,"y":1.0},{"x":113.33333333333333,"y":3.0},{"x":116.0,"y":1.0},{"x":118.0,"y":2.0},{"x":119.4,"y":5.0},{"x":125.33333333333333,"y":3.0},{"x":128.4,"y":5.0},{"x":131.0,"y":1.0},{"x":133.66666666666666,"y":3.0},{"x":136.66666666666666,"y":3.0},{"x":138.0,"y":4.0},{"x":143.0,"y":1.0},{"x":145.66666666666666,"y":3.0},{"x":149.33333333333331,"y":3.0},{"x":152.33333333333334,"y":3.0},{"x":155.5,"y":2.0},{"x":157.5,"y":2.0},{"x":160.0,"y":1.0},{"x":162.5,"y":2.0},{"x":164.5,"y":4.0},{"x":166.75,"y":4.0},{"x":168.8,"y":5.0},{"x":170.0,"y":1.0},{"x":172.0,"y":2.0},{"x":174.5,"y":4.0},{"x":176.33333333333331,"y":3.0},{"x":178.0,"y":1.0},{"x":179.33333333333331,"y":3.0},{"x":181.0,"y":1.0},{"x":183.0,"y":1.0},{"x":186.75,"y":4.0},{"x":189.0,"y":1.0},{"x":190.66666666666666,"y":3.0},{"x":192.75,"y":4.0},{"x":194.0,"y":1.0},{"x":195.33333333333331,"y":3.0},{"x":197.0,"y":2.0},{"x":199.4,"y":5.0},{"x":201.0,"y":1.0},{"x":202.66666666666669,"y":3.0},{"x":205.0,"y":2.0},{"x":207.0,"y":2.0},{"x":208.40000000000003,"y":5.0},{"x":213.33333333333331,"y":3.0},{"x":216.0,"y":2.0},{"x":217.33333333333331,"y":3.0},{"x":219.0,"y":2.0},{"x":221.33333333333331,"y":3.0},{"x":223.5,"y":4.0},{"x":226.0,"y":1.0},{"x":228.66666666666663,"y":3.0},{"x":230.0,"y":5.0},{"x":233.0,"y":2.0},{"x":235.0,"y":1.0},{"x":237.5,"y":4.0},{"x":239.0,"y":2.0},{"x":241.66666666666669,"y":3.0},{"x":244.0,"y":1.0},{"x":247.5,"y":2.0},{"x":249.0,"y":1.0},{"x":252.0,"y":1.0},{"x":255.5,"y":4.0},{"x":257.5,"y":2.0},{"x":260.0,"y":1.0},{"x":262.5,"y":2.0},{"x":265.3333333333333,"y":3.0},{"x":272.6,"y":5.0},{"x":274.5,"y":2.0},{"x":277.3333333333333,"y":6.0},{"x":280.0,"y":2.0},{"x":281.5,"y":4.0},{"x":283.5,"y":2.0},{"x":285.0,"y":1.0},{"x":286.5,"y":2.0},{"x":288.3333333333333,"y":3.0},{"x":291.5,"y":2.0},{"x":296.0,"y":1.0},{"x":298.0,"y":3.0},{"x":302.0,"y":1.0},{"x":305.5,"y":2.0},{"x":307.3333333333333,"y":3.0},{"x":309.0,"y":2.0},{"x":310.75,"y":4.0},{"x":315.75,"y":4.0},{"x":317.6,"y":5.0},{"x":321.5,"y":4.0},{"x":323.0,"y":1.0},{"x":325.0,"y":2.0},{"x":327.0,"y":3.0},{"x":331.3333333333333,"y":3.0},{"x":333.0,"y":2.0},{"x":335.5,"y":2.0},{"x":338.5,"y":2.0},{"x":341.66666666666663,"y":3.0},{"x":344.3333333333333,"y":3.0},{"x":348.0,"y":5.0},{"x":351.0,"y":1.0},{"x":353.0,"y":2.0},{"x":356.0,"y":1.0},{"x":360.0,"y":1.0},{"x":362.0,"y":1.0},{"x":364.5,"y":2.0},{"x":366.66666666666663,"y":3.0},{"x":368.75,"y":4.0},{"x":373.5,"y":2.0},{"x":375.0,"y":1.0},{"x":377.5,"y":2.0},{"x":379.0,"y":1.0},{"x":382.0,"y":2.0},{"x":384.0,"y":3.0},{"x":386.0,"y":1.0},{"x":389.0,"y":1.0},{"x":392.0,"y":1.0},{"x":393.5,"y":2.0},{"x":395.6,"y":5.0},{"x":397.0,"y":2.0},{"x":399.0,"y":2.0},{"x":400.0,"y":1.0},{"x":401.16666666666663,"y":6.0},{"x":403.40000000000003,"y":5.0},{"x":406.20000000000005,"y":5.0},{"x":409.0,"y":3.0},{"x":411.0,"y":1.0},{"x":413.5,"y":4.0},{"x":417.0,"y":3.0},{"x":418.5,"y":2.0},{"x":421.0,"y":1.0},{"x":424.0,"y":2.0},{"x":427.0,"y":1.0},{"x":429.6,"y":5.0},{"x":431.25,"y":4.0},{"x":435.5,"y":2.0},{"x":437.75,"y":4.0},{"x":439.0,"y":2.0},{"x":443.5,"y":2.0},{"x":446.0,"y":1.0},{"x":448.5,"y":2.0},{"x":452.5,"y":2.0},{"x":454.24999999999994,"y":4.0},{"x":457.66666666666663,"y":3.0},{"x":459.33333333333337,"y":3.0},{"x":462.5,"y":4.0},{"x":466.0,"y":3.0},{"x":467.80000000000007,"y":5.0},{"x":469.16666666666663,"y":6.0},{"x":472.0,"y":1.0},{"x":475.0,"y":1.0},{"x":477.0,"y":1.0},{"x":478.33333333333326,"y":3.0},{"x":480.25,"y":4.0},{"x":482.5,"y":2.0},{"x":484.5,"y":2.0},{"x":487.0,"y":1.0},{"x":489.2,"y":5.0},{"x":491.66666666666663,"y":3.0},{"x":493.0,"y":1.0},{"x":494.5,"y":2.0},{"x":496.0,"y":1.0},{"x":497.75,"y":4.0}]
diff --git a/sql/hive/src/test/resources/golden/udaf_number_format-0-eff4ef3c207d14d5121368f294697964 b/sql/hive/src/test/resources/golden/udaf_number_format-0-eff4ef3c207d14d5121368f294697964
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_number_format-1-4a03c4328565c60ca99689239f07fb16 b/sql/hive/src/test/resources/golden/udaf_number_format-1-4a03c4328565c60ca99689239f07fb16
deleted file mode 100644
index 2953abcf1e644..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_number_format-1-4a03c4328565c60ca99689239f07fb16
+++ /dev/null
@@ -1 +0,0 @@
-0.0	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-0-9ce9365f7b3f35a10b5305251c3e81ac b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-0-9ce9365f7b3f35a10b5305251c3e81ac
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-1-c7d32089880679d178dea94f1fe118e6 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-1-c7d32089880679d178dea94f1fe118e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-10-b7e588217a3cd184dbbb8d419d3e33ae b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-10-b7e588217a3cd184dbbb8d419d3e33ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-11-5034ec132cb8b0a6bd6357a7e1abd755 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-11-5034ec132cb8b0a6bd6357a7e1abd755
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-12-914ba18b45a27894bd82302f07efc789 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-12-914ba18b45a27894bd82302f07efc789
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-13-4bd5703fa32f3283f38841acadc97adb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-13-4bd5703fa32f3283f38841acadc97adb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-14-d861a06b90896a097901d64ab9fbec53 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-14-d861a06b90896a097901d64ab9fbec53
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-15-ca796efecd0d064e9e688a17ce75d80f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-15-ca796efecd0d064e9e688a17ce75d80f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-16-c838e13d9aafe1212a76d2cf5fe085a0 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-16-c838e13d9aafe1212a76d2cf5fe085a0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-17-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-17-b89ea2173180c8ae423d856f943e061f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-17-b89ea2173180c8ae423d856f943e061f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-18-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-18-67e864faaff4c6b2a8e1c9fbd188bb66
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-18-67e864faaff4c6b2a8e1c9fbd188bb66
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-19-b931aec4add0a66c23e444cdd5c33c5 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-19-b931aec4add0a66c23e444cdd5c33c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-2-ac53a7ba5e8a208255008d3a71fa321a b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-2-ac53a7ba5e8a208255008d3a71fa321a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-20-6f85afbfa98a19d78ab7fd9d46ed3c0c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-20-6f85afbfa98a19d78ab7fd9d46ed3c0c
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-20-6f85afbfa98a19d78ab7fd9d46ed3c0c
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-21-3cd4e1282d82d07785051a1cf0e9b4ff b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-21-3cd4e1282d82d07785051a1cf0e9b4ff
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-21-3cd4e1282d82d07785051a1cf0e9b4ff
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-22-ed1aec1a908310db90c5f8667631a1df b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-22-ed1aec1a908310db90c5f8667631a1df
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-22-ed1aec1a908310db90c5f8667631a1df
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-23-333d72e8bce6d11a35fc7a30418f225b b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-23-333d72e8bce6d11a35fc7a30418f225b
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-23-333d72e8bce6d11a35fc7a30418f225b
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-24-61903781f5cd75e6f11d85e7e89c1cb3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-24-61903781f5cd75e6f11d85e7e89c1cb3
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-24-61903781f5cd75e6f11d85e7e89c1cb3
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-25-15f40568b41c4505841f5ad13c526f51 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-25-15f40568b41c4505841f5ad13c526f51
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-25-15f40568b41c4505841f5ad13c526f51
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-27-ee4c065e8557632a9ee348dd9223c3a1 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-27-ee4c065e8557632a9ee348dd9223c3a1
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-27-ee4c065e8557632a9ee348dd9223c3a1
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-28-ba77d1a26f87385f046129b6eb7d2ec3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-28-ba77d1a26f87385f046129b6eb7d2ec3
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-28-ba77d1a26f87385f046129b6eb7d2ec3
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-29-956d6b6bc69c8035f80de2e60eda65fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-29-956d6b6bc69c8035f80de2e60eda65fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-29-956d6b6bc69c8035f80de2e60eda65fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-3-1dae5b2a11507c83b0f76e677a368712 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-3-1dae5b2a11507c83b0f76e677a368712
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-30-d196cc7f52bb6ae19a5e66eb2a99577c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-30-d196cc7f52bb6ae19a5e66eb2a99577c
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-30-d196cc7f52bb6ae19a5e66eb2a99577c
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-31-fe2c6a36a769f9f88a0ac9be1a4f0c28 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-31-fe2c6a36a769f9f88a0ac9be1a4f0c28
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-31-fe2c6a36a769f9f88a0ac9be1a4f0c28
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-32-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-32-dbcec232623048c7748b708123e18bf0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-32-dbcec232623048c7748b708123e18bf0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-33-f28c7b0408737da815493741c806ff80 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-33-f28c7b0408737da815493741c806ff80
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-34-6f85afbfa98a19d78ab7fd9d46ed3c0c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-34-6f85afbfa98a19d78ab7fd9d46ed3c0c
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-34-6f85afbfa98a19d78ab7fd9d46ed3c0c
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-35-3cd4e1282d82d07785051a1cf0e9b4ff b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-35-3cd4e1282d82d07785051a1cf0e9b4ff
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-35-3cd4e1282d82d07785051a1cf0e9b4ff
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-36-ed1aec1a908310db90c5f8667631a1df b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-36-ed1aec1a908310db90c5f8667631a1df
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-36-ed1aec1a908310db90c5f8667631a1df
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-37-333d72e8bce6d11a35fc7a30418f225b b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-37-333d72e8bce6d11a35fc7a30418f225b
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-37-333d72e8bce6d11a35fc7a30418f225b
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-38-61903781f5cd75e6f11d85e7e89c1cb3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-38-61903781f5cd75e6f11d85e7e89c1cb3
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-38-61903781f5cd75e6f11d85e7e89c1cb3
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-39-15f40568b41c4505841f5ad13c526f51 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-39-15f40568b41c4505841f5ad13c526f51
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-39-15f40568b41c4505841f5ad13c526f51
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-4-b2e21ffa55342d4f3c243728dfe6b11f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-4-b2e21ffa55342d4f3c243728dfe6b11f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-41-ee4c065e8557632a9ee348dd9223c3a1 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-41-ee4c065e8557632a9ee348dd9223c3a1
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-41-ee4c065e8557632a9ee348dd9223c3a1
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-42-ba77d1a26f87385f046129b6eb7d2ec3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-42-ba77d1a26f87385f046129b6eb7d2ec3
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-42-ba77d1a26f87385f046129b6eb7d2ec3
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-43-956d6b6bc69c8035f80de2e60eda65fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-43-956d6b6bc69c8035f80de2e60eda65fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-43-956d6b6bc69c8035f80de2e60eda65fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-44-d196cc7f52bb6ae19a5e66eb2a99577c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-44-d196cc7f52bb6ae19a5e66eb2a99577c
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-44-d196cc7f52bb6ae19a5e66eb2a99577c
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-45-fe2c6a36a769f9f88a0ac9be1a4f0c28 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-45-fe2c6a36a769f9f88a0ac9be1a4f0c28
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-45-fe2c6a36a769f9f88a0ac9be1a4f0c28
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-5-8ae1465266d28bc2e5da8d89617873c4 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-5-8ae1465266d28bc2e5da8d89617873c4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-6-69cdebe8b2d4d2bbf2eef64a8c789596 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-6-69cdebe8b2d4d2bbf2eef64a8c789596
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-7-ab438ea40bc5dddf76fd0a7a2529b8f7 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-7-ab438ea40bc5dddf76fd0a7a2529b8f7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-8-7e3cf228c457279965b7414bd05527f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-8-7e3cf228c457279965b7414bd05527f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-9-5aea8aa95a85c46284f7c1f45978a228 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-9-5aea8aa95a85c46284f7c1f45978a228
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-0-477a942be95c0616c72f02a0077f9ace b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-0-477a942be95c0616c72f02a0077f9ace
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-1-c7d32089880679d178dea94f1fe118e6 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-1-c7d32089880679d178dea94f1fe118e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-10-b7e588217a3cd184dbbb8d419d3e33ae b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-10-b7e588217a3cd184dbbb8d419d3e33ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-11-5034ec132cb8b0a6bd6357a7e1abd755 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-11-5034ec132cb8b0a6bd6357a7e1abd755
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-12-914ba18b45a27894bd82302f07efc789 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-12-914ba18b45a27894bd82302f07efc789
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-13-4bd5703fa32f3283f38841acadc97adb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-13-4bd5703fa32f3283f38841acadc97adb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-14-d861a06b90896a097901d64ab9fbec53 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-14-d861a06b90896a097901d64ab9fbec53
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-15-ca796efecd0d064e9e688a17ce75d80f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-15-ca796efecd0d064e9e688a17ce75d80f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-16-c838e13d9aafe1212a76d2cf5fe085a0 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-16-c838e13d9aafe1212a76d2cf5fe085a0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-17-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-17-b89ea2173180c8ae423d856f943e061f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-17-b89ea2173180c8ae423d856f943e061f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-18-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-18-67e864faaff4c6b2a8e1c9fbd188bb66
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-18-67e864faaff4c6b2a8e1c9fbd188bb66
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-19-b931aec4add0a66c23e444cdd5c33c5 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-19-b931aec4add0a66c23e444cdd5c33c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-2-ac53a7ba5e8a208255008d3a71fa321a b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-2-ac53a7ba5e8a208255008d3a71fa321a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-20-6f85afbfa98a19d78ab7fd9d46ed3c0c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-20-6f85afbfa98a19d78ab7fd9d46ed3c0c
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-20-6f85afbfa98a19d78ab7fd9d46ed3c0c
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-21-3cd4e1282d82d07785051a1cf0e9b4ff b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-21-3cd4e1282d82d07785051a1cf0e9b4ff
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-21-3cd4e1282d82d07785051a1cf0e9b4ff
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-22-ed1aec1a908310db90c5f8667631a1df b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-22-ed1aec1a908310db90c5f8667631a1df
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-22-ed1aec1a908310db90c5f8667631a1df
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-23-333d72e8bce6d11a35fc7a30418f225b b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-23-333d72e8bce6d11a35fc7a30418f225b
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-23-333d72e8bce6d11a35fc7a30418f225b
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-24-61903781f5cd75e6f11d85e7e89c1cb3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-24-61903781f5cd75e6f11d85e7e89c1cb3
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-24-61903781f5cd75e6f11d85e7e89c1cb3
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-25-15f40568b41c4505841f5ad13c526f51 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-25-15f40568b41c4505841f5ad13c526f51
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-25-15f40568b41c4505841f5ad13c526f51
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-27-ee4c065e8557632a9ee348dd9223c3a1 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-27-ee4c065e8557632a9ee348dd9223c3a1
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-27-ee4c065e8557632a9ee348dd9223c3a1
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-28-ba77d1a26f87385f046129b6eb7d2ec3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-28-ba77d1a26f87385f046129b6eb7d2ec3
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-28-ba77d1a26f87385f046129b6eb7d2ec3
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-29-956d6b6bc69c8035f80de2e60eda65fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-29-956d6b6bc69c8035f80de2e60eda65fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-29-956d6b6bc69c8035f80de2e60eda65fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-3-1dae5b2a11507c83b0f76e677a368712 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-3-1dae5b2a11507c83b0f76e677a368712
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-30-d196cc7f52bb6ae19a5e66eb2a99577c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-30-d196cc7f52bb6ae19a5e66eb2a99577c
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-30-d196cc7f52bb6ae19a5e66eb2a99577c
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-31-fe2c6a36a769f9f88a0ac9be1a4f0c28 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-31-fe2c6a36a769f9f88a0ac9be1a4f0c28
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-31-fe2c6a36a769f9f88a0ac9be1a4f0c28
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-32-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-32-dbcec232623048c7748b708123e18bf0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-32-dbcec232623048c7748b708123e18bf0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-33-f28c7b0408737da815493741c806ff80 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-33-f28c7b0408737da815493741c806ff80
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-34-6f85afbfa98a19d78ab7fd9d46ed3c0c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-34-6f85afbfa98a19d78ab7fd9d46ed3c0c
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-34-6f85afbfa98a19d78ab7fd9d46ed3c0c
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-35-3cd4e1282d82d07785051a1cf0e9b4ff b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-35-3cd4e1282d82d07785051a1cf0e9b4ff
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-35-3cd4e1282d82d07785051a1cf0e9b4ff
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-36-ed1aec1a908310db90c5f8667631a1df b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-36-ed1aec1a908310db90c5f8667631a1df
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-36-ed1aec1a908310db90c5f8667631a1df
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-37-333d72e8bce6d11a35fc7a30418f225b b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-37-333d72e8bce6d11a35fc7a30418f225b
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-37-333d72e8bce6d11a35fc7a30418f225b
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-38-61903781f5cd75e6f11d85e7e89c1cb3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-38-61903781f5cd75e6f11d85e7e89c1cb3
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-38-61903781f5cd75e6f11d85e7e89c1cb3
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-39-15f40568b41c4505841f5ad13c526f51 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-39-15f40568b41c4505841f5ad13c526f51
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-39-15f40568b41c4505841f5ad13c526f51
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-4-b2e21ffa55342d4f3c243728dfe6b11f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-4-b2e21ffa55342d4f3c243728dfe6b11f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-41-ee4c065e8557632a9ee348dd9223c3a1 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-41-ee4c065e8557632a9ee348dd9223c3a1
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-41-ee4c065e8557632a9ee348dd9223c3a1
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-42-ba77d1a26f87385f046129b6eb7d2ec3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-42-ba77d1a26f87385f046129b6eb7d2ec3
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-42-ba77d1a26f87385f046129b6eb7d2ec3
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-43-956d6b6bc69c8035f80de2e60eda65fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-43-956d6b6bc69c8035f80de2e60eda65fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-43-956d6b6bc69c8035f80de2e60eda65fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-44-d196cc7f52bb6ae19a5e66eb2a99577c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-44-d196cc7f52bb6ae19a5e66eb2a99577c
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-44-d196cc7f52bb6ae19a5e66eb2a99577c
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-45-fe2c6a36a769f9f88a0ac9be1a4f0c28 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-45-fe2c6a36a769f9f88a0ac9be1a4f0c28
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-45-fe2c6a36a769f9f88a0ac9be1a4f0c28
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-5-8ae1465266d28bc2e5da8d89617873c4 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-5-8ae1465266d28bc2e5da8d89617873c4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-6-69cdebe8b2d4d2bbf2eef64a8c789596 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-6-69cdebe8b2d4d2bbf2eef64a8c789596
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-7-ab438ea40bc5dddf76fd0a7a2529b8f7 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-7-ab438ea40bc5dddf76fd0a7a2529b8f7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-8-7e3cf228c457279965b7414bd05527f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-8-7e3cf228c457279965b7414bd05527f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-9-5aea8aa95a85c46284f7c1f45978a228 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-9-5aea8aa95a85c46284f7c1f45978a228
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf1-0-7a6f90d9c0931145bee4fe4f5caa0859 b/sql/hive/src/test/resources/golden/udf1-0-7a6f90d9c0931145bee4fe4f5caa0859
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf1-1-8281592c818ada269024ac669bec78da b/sql/hive/src/test/resources/golden/udf1-1-8281592c818ada269024ac669bec78da
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf1-2-c7648c913ec336559fb67e3ab6938c8f b/sql/hive/src/test/resources/golden/udf1-2-c7648c913ec336559fb67e3ab6938c8f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf1-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf1-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 493daf5d79c54..0000000000000
--- a/sql/hive/src/test/resources/golden/udf1-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1 +0,0 @@
-true	false	true	true	true	false	false	false	true	true	false	true	true	acc	abc	abb	hive	hadoop	AaAbAcA	false
diff --git a/sql/hive/src/test/resources/golden/udf2-3-c5938fcdd5675b58a4ed54269b5f5591 b/sql/hive/src/test/resources/golden/udf2-3-c5938fcdd5675b58a4ed54269b5f5591
index 398f517b25b5f..2d3e77d99bca3 100644
--- a/sql/hive/src/test/resources/golden/udf2-3-c5938fcdd5675b58a4ed54269b5f5591
+++ b/sql/hive/src/test/resources/golden/udf2-3-c5938fcdd5675b58a4ed54269b5f5591
@@ -1 +1 @@
-|	abc	|	  abc	|	abc  	|
\ No newline at end of file
+|	abc	|	  abc	|	abc  	|
diff --git a/sql/hive/src/test/resources/golden/udf3-0-66a2b926dd5439d4da9eb58743c18a8c b/sql/hive/src/test/resources/golden/udf3-0-66a2b926dd5439d4da9eb58743c18a8c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf3-1-1d04874d496d05cfe0b9d86de1111 b/sql/hive/src/test/resources/golden/udf3-1-1d04874d496d05cfe0b9d86de1111
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf3-2-25fe77d053e2bad8ae99757ce237052e b/sql/hive/src/test/resources/golden/udf3-2-25fe77d053e2bad8ae99757ce237052e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf3-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf3-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index b2890dc4cdb3a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf3-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1 +0,0 @@
-0	NULL	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf6-3-e579646b969eef49b09656114da52a73 b/sql/hive/src/test/resources/golden/udf6-3-e579646b969eef49b09656114da52a73
index 56a6051ca2b02..d00491fd7e5bb 100644
--- a/sql/hive/src/test/resources/golden/udf6-3-e579646b969eef49b09656114da52a73
+++ b/sql/hive/src/test/resources/golden/udf6-3-e579646b969eef49b09656114da52a73
@@ -1 +1 @@
-1
\ No newline at end of file
+1
diff --git a/sql/hive/src/test/resources/golden/udf6-5-fe336cd9850d6357980bd19139f76e b/sql/hive/src/test/resources/golden/udf6-5-fe336cd9850d6357980bd19139f76e
index 132dbea91f8a0..5657917e40f7d 100644
--- a/sql/hive/src/test/resources/golden/udf6-5-fe336cd9850d6357980bd19139f76e
+++ b/sql/hive/src/test/resources/golden/udf6-5-fe336cd9850d6357980bd19139f76e
@@ -1 +1 @@
-1	2	2	a	0.1	2	126	128	128	1.0	128
\ No newline at end of file
+1	2	2	a	0.1	2	126	128	128	1.0	128
diff --git a/sql/hive/src/test/resources/golden/udf7-3-b3afef6eb68f8e29e31d6bb6a7903045 b/sql/hive/src/test/resources/golden/udf7-3-b3afef6eb68f8e29e31d6bb6a7903045
index 51958ff45ab8d..2fcaff3dad9fe 100644
--- a/sql/hive/src/test/resources/golden/udf7-3-b3afef6eb68f8e29e31d6bb6a7903045
+++ b/sql/hive/src/test/resources/golden/udf7-3-b3afef6eb68f8e29e31d6bb6a7903045
@@ -1 +1 @@
-1.098612288668	NULL	NULL	1.098612288668	NULL	NULL	1.584962500721	NULL	NULL	0.47712125472	NULL	NULL	1.584962500721	NULL	NULL	NULL	-1.0	7.389056098931	8.0	8.0	0.125	8.0	2.0	NaN	1.0	1	8	8
+1.098612288668	NULL	NULL	1.098612288668	NULL	NULL	1.584962500721	NULL	NULL	0.47712125472	NULL	NULL	1.584962500721	NULL	NULL	NULL	-1.0	7.389056098931	8.0	8.0	0.125	8.0	2.0	NaN	1.0	1.0	8.0	8.0
diff --git a/sql/hive/src/test/resources/golden/udf8-4-9f22d5a65353432826a526b1d76eb65b b/sql/hive/src/test/resources/golden/udf8-4-9f22d5a65353432826a526b1d76eb65b
index 8a89b039b7151..60b09adaf8029 100644
--- a/sql/hive/src/test/resources/golden/udf8-4-9f22d5a65353432826a526b1d76eb65b
+++ b/sql/hive/src/test/resources/golden/udf8-4-9f22d5a65353432826a526b1d76eb65b
@@ -1 +1 @@
-1.0	1.0	1
\ No newline at end of file
+1.0	1.0	1
diff --git a/sql/hive/src/test/resources/golden/udf9-1-dd0981dc44ac24d445af5412e9f7fa8c b/sql/hive/src/test/resources/golden/udf9-1-dd0981dc44ac24d445af5412e9f7fa8c
index 83e2e121222a2..cb93f99495494 100644
--- a/sql/hive/src/test/resources/golden/udf9-1-dd0981dc44ac24d445af5412e9f7fa8c
+++ b/sql/hive/src/test/resources/golden/udf9-1-dd0981dc44ac24d445af5412e9f7fa8c
@@ -1 +1 @@
--1	2	32	-1	2009-01-01	2009-12-31	2008-03-01	2009-03-02	2008-02-28	2009-02-27	2008-12-31	2008-01-02	2008-02-28	2009-02-27	2006-02-28	2005-02-28
\ No newline at end of file
+-1	2	32	-1	2009-01-01	2009-12-31	2008-03-01	2009-03-02	2008-02-28	2009-02-27	2008-12-31	2008-01-02	2008-02-28	2009-02-27	2006-02-28	2005-02-28
diff --git a/sql/hive/src/test/resources/golden/udf_E-0-33251f00f840de3672f19d353fcfa66f b/sql/hive/src/test/resources/golden/udf_E-0-33251f00f840de3672f19d353fcfa66f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock4-4-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_E-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/lock4-4-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_E-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-5-460dffb0f1ab0ac0ebc4fd545809aa9a b/sql/hive/src/test/resources/golden/udf_E-1-cad0779d18f326c8e453bf2b5fe43596
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_lazyserde-5-460dffb0f1ab0ac0ebc4fd545809aa9a
rename to sql/hive/src/test/resources/golden/udf_E-1-cad0779d18f326c8e453bf2b5fe43596
diff --git a/sql/hive/src/test/resources/golden/udf_E-1-d744dcbbb9d70b3dc4722b80ba9d929c b/sql/hive/src/test/resources/golden/udf_E-2-d0fd9aa04fdeb948bdcf8559f7095c02
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-1-d744dcbbb9d70b3dc4722b80ba9d929c
rename to sql/hive/src/test/resources/golden/udf_E-2-d0fd9aa04fdeb948bdcf8559f7095c02
diff --git a/sql/hive/src/test/resources/golden/udf_E-2-72bb4231ea2a877b4d93a53cd7b6b82a b/sql/hive/src/test/resources/golden/udf_E-3-72bb4231ea2a877b4d93a53cd7b6b82a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-2-72bb4231ea2a877b4d93a53cd7b6b82a
rename to sql/hive/src/test/resources/golden/udf_E-3-72bb4231ea2a877b4d93a53cd7b6b82a
diff --git a/sql/hive/src/test/resources/golden/udf_E-4-33251f00f840de3672f19d353fcfa66f b/sql/hive/src/test/resources/golden/udf_E-4-33251f00f840de3672f19d353fcfa66f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_E-3-e8924af3bf99d2e01546a965303ffd09 b/sql/hive/src/test/resources/golden/udf_E-4-e8924af3bf99d2e01546a965303ffd09
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-3-e8924af3bf99d2e01546a965303ffd09
rename to sql/hive/src/test/resources/golden/udf_E-4-e8924af3bf99d2e01546a965303ffd09
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-6-42e2838ee69484bf5301475905cee12 b/sql/hive/src/test/resources/golden/udf_E-5-9d54c12bf727e05e9f9d67c61402a1d4
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_lazyserde-6-42e2838ee69484bf5301475905cee12
rename to sql/hive/src/test/resources/golden/udf_E-5-9d54c12bf727e05e9f9d67c61402a1d4
diff --git a/sql/hive/src/test/resources/golden/udf_E-5-d744dcbbb9d70b3dc4722b80ba9d929c b/sql/hive/src/test/resources/golden/udf_E-6-d0fd9aa04fdeb948bdcf8559f7095c02
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-5-d744dcbbb9d70b3dc4722b80ba9d929c
rename to sql/hive/src/test/resources/golden/udf_E-6-d0fd9aa04fdeb948bdcf8559f7095c02
diff --git a/sql/hive/src/test/resources/golden/udf_E-6-72bb4231ea2a877b4d93a53cd7b6b82a b/sql/hive/src/test/resources/golden/udf_E-7-72bb4231ea2a877b4d93a53cd7b6b82a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-6-72bb4231ea2a877b4d93a53cd7b6b82a
rename to sql/hive/src/test/resources/golden/udf_E-7-72bb4231ea2a877b4d93a53cd7b6b82a
diff --git a/sql/hive/src/test/resources/golden/udf_E-7-e8924af3bf99d2e01546a965303ffd09 b/sql/hive/src/test/resources/golden/udf_E-8-e8924af3bf99d2e01546a965303ffd09
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-7-e8924af3bf99d2e01546a965303ffd09
rename to sql/hive/src/test/resources/golden/udf_E-8-e8924af3bf99d2e01546a965303ffd09
diff --git a/sql/hive/src/test/resources/golden/lock4-5-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_PI-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/lock4-5-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_PI-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_PI-0-b28e761e5564b51f98f182f561c1369f b/sql/hive/src/test/resources/golden/udf_PI-0-b28e761e5564b51f98f182f561c1369f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-7-b44a44827ad0dce044badd6b258eabd5 b/sql/hive/src/test/resources/golden/udf_PI-1-13fd9345fd15b654d18b2707e5274b2b
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_lazyserde-7-b44a44827ad0dce044badd6b258eabd5
rename to sql/hive/src/test/resources/golden/udf_PI-1-13fd9345fd15b654d18b2707e5274b2b
diff --git a/sql/hive/src/test/resources/golden/udf_PI-1-1ea1f486385c62adeafcc5c52335bbf7 b/sql/hive/src/test/resources/golden/udf_PI-2-97a12f6967726e425469ecfa70177ff0
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-1-1ea1f486385c62adeafcc5c52335bbf7
rename to sql/hive/src/test/resources/golden/udf_PI-2-97a12f6967726e425469ecfa70177ff0
diff --git a/sql/hive/src/test/resources/golden/udf_PI-2-9c1476a2eab7455594e97b338ee3c188 b/sql/hive/src/test/resources/golden/udf_PI-3-9c1476a2eab7455594e97b338ee3c188
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-2-9c1476a2eab7455594e97b338ee3c188
rename to sql/hive/src/test/resources/golden/udf_PI-3-9c1476a2eab7455594e97b338ee3c188
diff --git a/sql/hive/src/test/resources/golden/udf_PI-3-890f3c276eff2c459d8dc79d5a71c866 b/sql/hive/src/test/resources/golden/udf_PI-4-890f3c276eff2c459d8dc79d5a71c866
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-3-890f3c276eff2c459d8dc79d5a71c866
rename to sql/hive/src/test/resources/golden/udf_PI-4-890f3c276eff2c459d8dc79d5a71c866
diff --git a/sql/hive/src/test/resources/golden/udf_PI-4-b28e761e5564b51f98f182f561c1369f b/sql/hive/src/test/resources/golden/udf_PI-4-b28e761e5564b51f98f182f561c1369f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-9-460dffb0f1ab0ac0ebc4fd545809aa9a b/sql/hive/src/test/resources/golden/udf_PI-5-cd1c31c39277a02bab8e44f8c29a6c2d
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_lazyserde-9-460dffb0f1ab0ac0ebc4fd545809aa9a
rename to sql/hive/src/test/resources/golden/udf_PI-5-cd1c31c39277a02bab8e44f8c29a6c2d
diff --git a/sql/hive/src/test/resources/golden/udf_PI-5-1ea1f486385c62adeafcc5c52335bbf7 b/sql/hive/src/test/resources/golden/udf_PI-6-97a12f6967726e425469ecfa70177ff0
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-5-1ea1f486385c62adeafcc5c52335bbf7
rename to sql/hive/src/test/resources/golden/udf_PI-6-97a12f6967726e425469ecfa70177ff0
diff --git a/sql/hive/src/test/resources/golden/udf_PI-6-9c1476a2eab7455594e97b338ee3c188 b/sql/hive/src/test/resources/golden/udf_PI-7-9c1476a2eab7455594e97b338ee3c188
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-6-9c1476a2eab7455594e97b338ee3c188
rename to sql/hive/src/test/resources/golden/udf_PI-7-9c1476a2eab7455594e97b338ee3c188
diff --git a/sql/hive/src/test/resources/golden/udf_PI-7-890f3c276eff2c459d8dc79d5a71c866 b/sql/hive/src/test/resources/golden/udf_PI-8-890f3c276eff2c459d8dc79d5a71c866
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-7-890f3c276eff2c459d8dc79d5a71c866
rename to sql/hive/src/test/resources/golden/udf_PI-8-890f3c276eff2c459d8dc79d5a71c866
diff --git a/sql/hive/src/test/resources/golden/merge4-0-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/udf_abs-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/merge4-0-b12e5c70d6d29757471b900b6160fa8a
rename to sql/hive/src/test/resources/golden/udf_abs-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_abs-0-6fe2e69c338fc823d3f61c9236eb2234 b/sql/hive/src/test/resources/golden/udf_abs-0-6fe2e69c338fc823d3f61c9236eb2234
deleted file mode 100644
index b613b3b9fe967..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_abs-0-6fe2e69c338fc823d3f61c9236eb2234
+++ /dev/null
@@ -1 +0,0 @@
-abs(x) - returns the absolute value of x
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_abs-1-6fe2e69c338fc823d3f61c9236eb2234 b/sql/hive/src/test/resources/golden/udf_abs-1-6fe2e69c338fc823d3f61c9236eb2234
new file mode 100644
index 0000000000000..f1e01bf0d2fc8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_abs-1-6fe2e69c338fc823d3f61c9236eb2234
@@ -0,0 +1 @@
+abs(x) - returns the absolute value of x
diff --git a/sql/hive/src/test/resources/golden/udf_abs-1-eeb77ae8a0dcebbc0991923ca0932072 b/sql/hive/src/test/resources/golden/udf_abs-1-eeb77ae8a0dcebbc0991923ca0932072
deleted file mode 100644
index c2fb6dc49322d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_abs-1-eeb77ae8a0dcebbc0991923ca0932072
+++ /dev/null
@@ -1,6 +0,0 @@
-abs(x) - returns the absolute value of x
-Example:
-  > SELECT abs(0) FROM src LIMIT 1;
-  0
-  > SELECT abs(-5) FROM src LIMIT 1;
-  5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_abs-2-6a0ea751ac70262d478b23888dcbdc96 b/sql/hive/src/test/resources/golden/udf_abs-2-6a0ea751ac70262d478b23888dcbdc96
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_abs-2-eeb77ae8a0dcebbc0991923ca0932072 b/sql/hive/src/test/resources/golden/udf_abs-2-eeb77ae8a0dcebbc0991923ca0932072
new file mode 100644
index 0000000000000..7dea2445fa616
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_abs-2-eeb77ae8a0dcebbc0991923ca0932072
@@ -0,0 +1,6 @@
+abs(x) - returns the absolute value of x
+Example:
+  > SELECT abs(0) FROM src LIMIT 1;
+  0
+  > SELECT abs(-5) FROM src LIMIT 1;
+  5
diff --git a/sql/hive/src/test/resources/golden/input_testxpath-0-3c8a098a179d578119f75e5d7b214bd5 b/sql/hive/src/test/resources/golden/udf_abs-3-50cb3c23902cd29e0dbff188c71062e5
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_testxpath-0-3c8a098a179d578119f75e5d7b214bd5
rename to sql/hive/src/test/resources/golden/udf_abs-3-50cb3c23902cd29e0dbff188c71062e5
diff --git a/sql/hive/src/test/resources/golden/udf_abs-3-52f5c6cba1b9d48046073a0c2e106530 b/sql/hive/src/test/resources/golden/udf_abs-3-52f5c6cba1b9d48046073a0c2e106530
deleted file mode 100644
index f5a78bc6e59b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_abs-3-52f5c6cba1b9d48046073a0c2e106530
+++ /dev/null
@@ -1 +0,0 @@
-0	1	123	9223372036854775807	9223372036854775807
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_abs-4-30cd5a94c13e1619ee18b9551db879c b/sql/hive/src/test/resources/golden/udf_abs-4-30cd5a94c13e1619ee18b9551db879c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_abs-4-4ae7f62f8d996f0066037cecbf2e01c4 b/sql/hive/src/test/resources/golden/udf_abs-4-4ae7f62f8d996f0066037cecbf2e01c4
new file mode 100644
index 0000000000000..c7b1f75a06cb3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_abs-4-4ae7f62f8d996f0066037cecbf2e01c4
@@ -0,0 +1 @@
+0	1	123	9223372036854775807	9223372036854775807
diff --git a/sql/hive/src/test/resources/golden/udf_abs-5-343e899acb67c283391387f02aa7b5c4 b/sql/hive/src/test/resources/golden/udf_abs-5-343e899acb67c283391387f02aa7b5c4
deleted file mode 100644
index 9e57fa65e8014..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_abs-5-343e899acb67c283391387f02aa7b5c4
+++ /dev/null
@@ -1 +0,0 @@
-0.0	3.14159265	3.14159265
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath-1-ac18d29e8bd8aeed00296992edc17c0f b/sql/hive/src/test/resources/golden/udf_abs-5-5cd4c198e0de884ad436864b95fece6c
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_testxpath-1-ac18d29e8bd8aeed00296992edc17c0f
rename to sql/hive/src/test/resources/golden/udf_abs-5-5cd4c198e0de884ad436864b95fece6c
diff --git a/sql/hive/src/test/resources/golden/udf_abs-6-7aa32a019499c6464aded2e357c6843b b/sql/hive/src/test/resources/golden/udf_abs-6-7aa32a019499c6464aded2e357c6843b
new file mode 100644
index 0000000000000..a07dd4e9d970d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_abs-6-7aa32a019499c6464aded2e357c6843b
@@ -0,0 +1 @@
+0.0	3.14159265	3.14159265
diff --git a/sql/hive/src/test/resources/golden/udf_acos-0-4f49cb5a5c87efea534d63ed76435f06 b/sql/hive/src/test/resources/golden/udf_acos-0-4f49cb5a5c87efea534d63ed76435f06
deleted file mode 100644
index 204db415144a7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-0-4f49cb5a5c87efea534d63ed76435f06
+++ /dev/null
@@ -1 +0,0 @@
-acos(x) - returns the arc cosine of x if -1<=x<=1 or NULL otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge4-1-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/udf_acos-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/merge4-1-593999fae618b6b38322bc9ae4e0c027
rename to sql/hive/src/test/resources/golden/udf_acos-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_acos-1-4f49cb5a5c87efea534d63ed76435f06 b/sql/hive/src/test/resources/golden/udf_acos-1-4f49cb5a5c87efea534d63ed76435f06
new file mode 100644
index 0000000000000..b32b4244d6aeb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_acos-1-4f49cb5a5c87efea534d63ed76435f06
@@ -0,0 +1 @@
+acos(x) - returns the arc cosine of x if -1<=x<=1 or NULL otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_acos-1-d330511cf4f626cd844b18f57f99a85f b/sql/hive/src/test/resources/golden/udf_acos-1-d330511cf4f626cd844b18f57f99a85f
deleted file mode 100644
index d49af1cbf4800..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-1-d330511cf4f626cd844b18f57f99a85f
+++ /dev/null
@@ -1,6 +0,0 @@
-acos(x) - returns the arc cosine of x if -1<=x<=1 or NULL otherwise
-Example:
-  > SELECT acos(1) FROM src LIMIT 1;
-  0
-  > SELECT acos(2) FROM src LIMIT 1;
-  NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_acos-2-86fca49baf270873b46709c9eaeab87b b/sql/hive/src/test/resources/golden/udf_acos-2-86fca49baf270873b46709c9eaeab87b
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-2-86fca49baf270873b46709c9eaeab87b
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_acos-2-d330511cf4f626cd844b18f57f99a85f b/sql/hive/src/test/resources/golden/udf_acos-2-d330511cf4f626cd844b18f57f99a85f
new file mode 100644
index 0000000000000..93925317cd798
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_acos-2-d330511cf4f626cd844b18f57f99a85f
@@ -0,0 +1,6 @@
+acos(x) - returns the arc cosine of x if -1<=x<=1 or NULL otherwise
+Example:
+  > SELECT acos(1) FROM src LIMIT 1;
+  0
+  > SELECT acos(2) FROM src LIMIT 1;
+  NULL
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-23-828558020ce907ffa7e847762a5e2358 b/sql/hive/src/test/resources/golden/udf_acos-3-661a0a85283df2a5c1567d60850e362b
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-23-828558020ce907ffa7e847762a5e2358
rename to sql/hive/src/test/resources/golden/udf_acos-3-661a0a85283df2a5c1567d60850e362b
diff --git a/sql/hive/src/test/resources/golden/udf_acos-3-f7f199e5f3dde8056465d55aca29e884 b/sql/hive/src/test/resources/golden/udf_acos-3-f7f199e5f3dde8056465d55aca29e884
deleted file mode 100644
index 5548bdb7cf26a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-3-f7f199e5f3dde8056465d55aca29e884
+++ /dev/null
@@ -1 +0,0 @@
-1.5707963267948966
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_acos-4-23d588eece08fbea7431044524f1cecf b/sql/hive/src/test/resources/golden/udf_acos-4-23d588eece08fbea7431044524f1cecf
new file mode 100644
index 0000000000000..53a6e14702ed1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_acos-4-23d588eece08fbea7431044524f1cecf
@@ -0,0 +1 @@
+1.5707963267948966
diff --git a/sql/hive/src/test/resources/golden/udf_acos-4-e66fd90808b7c0eacbfe7ddd8624d79a b/sql/hive/src/test/resources/golden/udf_acos-4-e66fd90808b7c0eacbfe7ddd8624d79a
deleted file mode 100644
index 1f2d6faad9a2c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-4-e66fd90808b7c0eacbfe7ddd8624d79a
+++ /dev/null
@@ -1 +0,0 @@
-2.0943951023931957	0.7208187608700897
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_acos-5-578612589fdb1ae21ee488924848fb4e b/sql/hive/src/test/resources/golden/udf_acos-5-578612589fdb1ae21ee488924848fb4e
new file mode 100644
index 0000000000000..edee342d1d902
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_acos-5-578612589fdb1ae21ee488924848fb4e
@@ -0,0 +1 @@
+2.0943951023931957	0.7208187608700897
diff --git a/sql/hive/src/test/resources/golden/udf_acos-5-85869fd1e3a6fccaacd54a4315ae6d2e b/sql/hive/src/test/resources/golden/udf_acos-5-85869fd1e3a6fccaacd54a4315ae6d2e
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-5-85869fd1e3a6fccaacd54a4315ae6d2e
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_acos-6-4d2bd33cee047e9a8bb740760c7cc3b4 b/sql/hive/src/test/resources/golden/udf_acos-6-4d2bd33cee047e9a8bb740760c7cc3b4
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_acos-6-4d2bd33cee047e9a8bb740760c7cc3b4
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/udf_add-0-5db764318a918a5f6d7c1d95d9e86045 b/sql/hive/src/test/resources/golden/udf_add-0-5db764318a918a5f6d7c1d95d9e86045
index e8f11c444a808..9d794539b4ae0 100644
--- a/sql/hive/src/test/resources/golden/udf_add-0-5db764318a918a5f6d7c1d95d9e86045
+++ b/sql/hive/src/test/resources/golden/udf_add-0-5db764318a918a5f6d7c1d95d9e86045
@@ -1 +1 @@
-a + b - Returns a+b
\ No newline at end of file
+a + b - Returns a+b
diff --git a/sql/hive/src/test/resources/golden/udf_add-1-400b238f4e6cdf7120be566b0ef079c5 b/sql/hive/src/test/resources/golden/udf_add-1-400b238f4e6cdf7120be566b0ef079c5
index e8f11c444a808..9d794539b4ae0 100644
--- a/sql/hive/src/test/resources/golden/udf_add-1-400b238f4e6cdf7120be566b0ef079c5
+++ b/sql/hive/src/test/resources/golden/udf_add-1-400b238f4e6cdf7120be566b0ef079c5
@@ -1 +1 @@
-a + b - Returns a+b
\ No newline at end of file
+a + b - Returns a+b
diff --git a/sql/hive/src/test/resources/golden/merge4-2-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/udf_array-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/merge4-2-43d53504df013e6b35f81811138a167a
rename to sql/hive/src/test/resources/golden/udf_array-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_array-0-2e9c2a7d9325bd9a53cf9f181b6333ee b/sql/hive/src/test/resources/golden/udf_array-1-2e9c2a7d9325bd9a53cf9f181b6333ee
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_array-0-2e9c2a7d9325bd9a53cf9f181b6333ee
rename to sql/hive/src/test/resources/golden/udf_array-1-2e9c2a7d9325bd9a53cf9f181b6333ee
diff --git a/sql/hive/src/test/resources/golden/udf_array-1-570741914bb78300b0233e5f38d7f08a b/sql/hive/src/test/resources/golden/udf_array-2-570741914bb78300b0233e5f38d7f08a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_array-1-570741914bb78300b0233e5f38d7f08a
rename to sql/hive/src/test/resources/golden/udf_array-2-570741914bb78300b0233e5f38d7f08a
diff --git a/sql/hive/src/test/resources/golden/udf_array-2-db6d3c6c67faed3ceb019cb1993502f6 b/sql/hive/src/test/resources/golden/udf_array-2-db6d3c6c67faed3ceb019cb1993502f6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath-2-86b0a60940ffc4bdaafcc4f5a8c0972 b/sql/hive/src/test/resources/golden/udf_array-3-47818d42e5e7667d8754c3f9a4b8053a
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_testxpath-2-86b0a60940ffc4bdaafcc4f5a8c0972
rename to sql/hive/src/test/resources/golden/udf_array-3-47818d42e5e7667d8754c3f9a4b8053a
diff --git a/sql/hive/src/test/resources/golden/udf_array-3-a5d12c41277fb158e09281169c905122 b/sql/hive/src/test/resources/golden/udf_array-4-51410e4d4d679fe5a8dd7a860f4efc47
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_array-3-a5d12c41277fb158e09281169c905122
rename to sql/hive/src/test/resources/golden/udf_array-4-51410e4d4d679fe5a8dd7a860f4efc47
diff --git a/sql/hive/src/test/resources/golden/merge4-3-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_array_contains-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/merge4-3-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_array_contains-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-0-d9a90108b052b111e8de4433e008b25a b/sql/hive/src/test/resources/golden/udf_array_contains-0-d9a90108b052b111e8de4433e008b25a
deleted file mode 100644
index 783e7d086a5c6..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_array_contains-0-d9a90108b052b111e8de4433e008b25a
+++ /dev/null
@@ -1 +0,0 @@
-array_contains(array, value) - Returns TRUE if the array contains value.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-1-d9a90108b052b111e8de4433e008b25a b/sql/hive/src/test/resources/golden/udf_array_contains-1-d9a90108b052b111e8de4433e008b25a
new file mode 100644
index 0000000000000..f17ecea1ab5ac
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_array_contains-1-d9a90108b052b111e8de4433e008b25a
@@ -0,0 +1 @@
+array_contains(array, value) - Returns TRUE if the array contains value.
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-1-eff16c7836252e01f3d8190cd833f79c b/sql/hive/src/test/resources/golden/udf_array_contains-1-eff16c7836252e01f3d8190cd833f79c
deleted file mode 100644
index 0ccae5649af4a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_array_contains-1-eff16c7836252e01f3d8190cd833f79c
+++ /dev/null
@@ -1,4 +0,0 @@
-array_contains(array, value) - Returns TRUE if the array contains value.
-Example:
-  > SELECT array_contains(array(1, 2, 3), 2) FROM src LIMIT 1;
-  true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-2-42d966b28e61a465d638bffc20ac7247 b/sql/hive/src/test/resources/golden/udf_array_contains-2-42d966b28e61a465d638bffc20ac7247
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_array_contains-2-42d966b28e61a465d638bffc20ac7247
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-2-eff16c7836252e01f3d8190cd833f79c b/sql/hive/src/test/resources/golden/udf_array_contains-2-eff16c7836252e01f3d8190cd833f79c
new file mode 100644
index 0000000000000..7f4bbfc04c1c9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_array_contains-2-eff16c7836252e01f3d8190cd833f79c
@@ -0,0 +1,4 @@
+array_contains(array, value) - Returns TRUE if the array contains value.
+Example:
+  > SELECT array_contains(array(1, 2, 3), 2) FROM src LIMIT 1;
+  true
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-3-6b700cb69af298aef45b96bf5ac862d b/sql/hive/src/test/resources/golden/udf_array_contains-3-6b700cb69af298aef45b96bf5ac862d
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_array_contains-3-6b700cb69af298aef45b96bf5ac862d
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-3-ec0cd851fd8135dd9bc822d9a0432569 b/sql/hive/src/test/resources/golden/udf_array_contains-3-ec0cd851fd8135dd9bc822d9a0432569
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_array_contains-3-ec0cd851fd8135dd9bc822d9a0432569
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-4-bdb5a7e0ab81172a438145a1c406b1e8 b/sql/hive/src/test/resources/golden/udf_array_contains-4-bdb5a7e0ab81172a438145a1c406b1e8
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_array_contains-4-bdb5a7e0ab81172a438145a1c406b1e8
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/merge4-4-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_ascii-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/merge4-4-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_ascii-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-0-72924c23459330cca6a54c70b12a542c b/sql/hive/src/test/resources/golden/udf_ascii-0-72924c23459330cca6a54c70b12a542c
deleted file mode 100644
index e121c27212c66..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_ascii-0-72924c23459330cca6a54c70b12a542c
+++ /dev/null
@@ -1 +0,0 @@
-ascii(str) - returns the numeric value of the first character of str
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-1-72924c23459330cca6a54c70b12a542c b/sql/hive/src/test/resources/golden/udf_ascii-1-72924c23459330cca6a54c70b12a542c
new file mode 100644
index 0000000000000..5e2c4b7209d9b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_ascii-1-72924c23459330cca6a54c70b12a542c
@@ -0,0 +1 @@
+ascii(str) - returns the numeric value of the first character of str
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-1-dab8656d7f001e85615442d60df4b6b3 b/sql/hive/src/test/resources/golden/udf_ascii-1-dab8656d7f001e85615442d60df4b6b3
deleted file mode 100644
index 7c9fb835dfe57..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_ascii-1-dab8656d7f001e85615442d60df4b6b3
+++ /dev/null
@@ -1,6 +0,0 @@
-ascii(str) - returns the numeric value of the first character of str
-Returns 0 if str is empty or NULL if str is NULL
-Example:
-  > SELECT ascii('222') FROM src LIMIT 1;  50
-  > SELECT ascii(2) FROM src LIMIT 1;
-  50
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-2-a9e207503f52d43903877fb998eabeaa b/sql/hive/src/test/resources/golden/udf_ascii-2-a9e207503f52d43903877fb998eabeaa
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-2-dab8656d7f001e85615442d60df4b6b3 b/sql/hive/src/test/resources/golden/udf_ascii-2-dab8656d7f001e85615442d60df4b6b3
new file mode 100644
index 0000000000000..87ae6b33499c4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_ascii-2-dab8656d7f001e85615442d60df4b6b3
@@ -0,0 +1,6 @@
+ascii(str) - returns the numeric value of the first character of str
+Returns 0 if str is empty or NULL if str is NULL
+Example:
+  > SELECT ascii('222') FROM src LIMIT 1;  50
+  > SELECT ascii(2) FROM src LIMIT 1;
+  50
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-3-28fc6497c9835c2ef331aba44576f1b1 b/sql/hive/src/test/resources/golden/udf_ascii-3-28fc6497c9835c2ef331aba44576f1b1
deleted file mode 100644
index 726fda4c15ed3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_ascii-3-28fc6497c9835c2ef331aba44576f1b1
+++ /dev/null
@@ -1 +0,0 @@
-70	0	33
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath2-0-a1baa22f15f745a2dfe27ce52d363704 b/sql/hive/src/test/resources/golden/udf_ascii-3-fc25cec86e0dafaf1633c2e3a6d2fc34
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_testxpath2-0-a1baa22f15f745a2dfe27ce52d363704
rename to sql/hive/src/test/resources/golden/udf_ascii-3-fc25cec86e0dafaf1633c2e3a6d2fc34
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-4-db9a06881a216f0252fa786d98c9bf b/sql/hive/src/test/resources/golden/udf_ascii-4-db9a06881a216f0252fa786d98c9bf
new file mode 100644
index 0000000000000..ba9d9a3b79bab
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_ascii-4-db9a06881a216f0252fa786d98c9bf
@@ -0,0 +1 @@
+70	0	33
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-9-2c3b29a3df9b07e7add15e4e7c561644 b/sql/hive/src/test/resources/golden/udf_asin-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/metadataonly1-9-2c3b29a3df9b07e7add15e4e7c561644
rename to sql/hive/src/test/resources/golden/udf_asin-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_asin-0-99da197a53767060e3fa0250254d59cd b/sql/hive/src/test/resources/golden/udf_asin-0-99da197a53767060e3fa0250254d59cd
deleted file mode 100644
index 3a56bffc8da34..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-0-99da197a53767060e3fa0250254d59cd
+++ /dev/null
@@ -1 +0,0 @@
-asin(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_asin-1-3d0c3fa6121f8f5158d221074f1d4129 b/sql/hive/src/test/resources/golden/udf_asin-1-3d0c3fa6121f8f5158d221074f1d4129
deleted file mode 100644
index ef207d3f5da38..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-1-3d0c3fa6121f8f5158d221074f1d4129
+++ /dev/null
@@ -1,6 +0,0 @@
-asin(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise
-Example:
-  > SELECT asin(0) FROM src LIMIT 1;
-  0
-  > SELECT asin(2) FROM src LIMIT 1;
-  NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_asin-1-99da197a53767060e3fa0250254d59cd b/sql/hive/src/test/resources/golden/udf_asin-1-99da197a53767060e3fa0250254d59cd
new file mode 100644
index 0000000000000..f5ffb5d0b3939
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_asin-1-99da197a53767060e3fa0250254d59cd
@@ -0,0 +1 @@
+asin(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_asin-2-3d0c3fa6121f8f5158d221074f1d4129 b/sql/hive/src/test/resources/golden/udf_asin-2-3d0c3fa6121f8f5158d221074f1d4129
new file mode 100644
index 0000000000000..80c16be805ee8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_asin-2-3d0c3fa6121f8f5158d221074f1d4129
@@ -0,0 +1,6 @@
+asin(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise
+Example:
+  > SELECT asin(0) FROM src LIMIT 1;
+  0
+  > SELECT asin(2) FROM src LIMIT 1;
+  NULL
diff --git a/sql/hive/src/test/resources/golden/udf_asin-2-da1b36ab73c6791eb3c4bd1dd2bec52f b/sql/hive/src/test/resources/golden/udf_asin-2-da1b36ab73c6791eb3c4bd1dd2bec52f
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-2-da1b36ab73c6791eb3c4bd1dd2bec52f
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-24-e8ca597d87932af16c0cf29d662e92da b/sql/hive/src/test/resources/golden/udf_asin-3-4b7ee6310a49ebf784a4a712748348ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-24-e8ca597d87932af16c0cf29d662e92da
rename to sql/hive/src/test/resources/golden/udf_asin-3-4b7ee6310a49ebf784a4a712748348ac
diff --git a/sql/hive/src/test/resources/golden/udf_asin-3-b67069d37df3a7fb7a3e2138d8558343 b/sql/hive/src/test/resources/golden/udf_asin-3-b67069d37df3a7fb7a3e2138d8558343
deleted file mode 100644
index 171538eb0b00f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-3-b67069d37df3a7fb7a3e2138d8558343
+++ /dev/null
@@ -1 +0,0 @@
-0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_asin-4-929dabad86ef0e564802de8f663a9e66 b/sql/hive/src/test/resources/golden/udf_asin-4-929dabad86ef0e564802de8f663a9e66
deleted file mode 100644
index a9d9b65be4189..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-4-929dabad86ef0e564802de8f663a9e66
+++ /dev/null
@@ -1 +0,0 @@
--0.5235987755982989	0.7208187608700897
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_asin-4-a3edf78ff8ccc629ee7d7518707b69ce b/sql/hive/src/test/resources/golden/udf_asin-4-a3edf78ff8ccc629ee7d7518707b69ce
new file mode 100644
index 0000000000000..ba66466c2a0d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_asin-4-a3edf78ff8ccc629ee7d7518707b69ce
@@ -0,0 +1 @@
+0.0
diff --git a/sql/hive/src/test/resources/golden/udf_asin-5-1ee8715cce9831623d0af0031964d284 b/sql/hive/src/test/resources/golden/udf_asin-5-1ee8715cce9831623d0af0031964d284
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-5-1ee8715cce9831623d0af0031964d284
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_asin-5-8dcbcf784496053e3b57c579aca809a6 b/sql/hive/src/test/resources/golden/udf_asin-5-8dcbcf784496053e3b57c579aca809a6
new file mode 100644
index 0000000000000..46381bfe173d5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_asin-5-8dcbcf784496053e3b57c579aca809a6
@@ -0,0 +1 @@
+-0.5235987755982989	0.7208187608700897
diff --git a/sql/hive/src/test/resources/golden/udf_asin-6-114c8141f1e831c70d70c570f0ae778f b/sql/hive/src/test/resources/golden/udf_asin-6-114c8141f1e831c70d70c570f0ae778f
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_asin-6-114c8141f1e831c70d70c570f0ae778f
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/newline-1-a19a19272149c732977c37e043910505 b/sql/hive/src/test/resources/golden/udf_atan-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/newline-1-a19a19272149c732977c37e043910505
rename to sql/hive/src/test/resources/golden/udf_atan-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_atan-0-c79ed30c2444c8493d0db98c33c9132b b/sql/hive/src/test/resources/golden/udf_atan-0-c79ed30c2444c8493d0db98c33c9132b
deleted file mode 100644
index 019676ad50bc3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-0-c79ed30c2444c8493d0db98c33c9132b
+++ /dev/null
@@ -1 +0,0 @@
-atan(x) - returns the atan (arctan) of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-1-77e7ac1b89a4eac9102176cd73f67a62 b/sql/hive/src/test/resources/golden/udf_atan-1-77e7ac1b89a4eac9102176cd73f67a62
deleted file mode 100644
index ce9e52716ff44..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-1-77e7ac1b89a4eac9102176cd73f67a62
+++ /dev/null
@@ -1,4 +0,0 @@
-atan(x) - returns the atan (arctan) of x (x is in radians)
-Example:
-   > SELECT atan(0) FROM src LIMIT 1;
-  0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-1-c79ed30c2444c8493d0db98c33c9132b b/sql/hive/src/test/resources/golden/udf_atan-1-c79ed30c2444c8493d0db98c33c9132b
new file mode 100644
index 0000000000000..1d34577c5c6b9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-1-c79ed30c2444c8493d0db98c33c9132b
@@ -0,0 +1 @@
+atan(x) - returns the atan (arctan) of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_atan-2-77e7ac1b89a4eac9102176cd73f67a62 b/sql/hive/src/test/resources/golden/udf_atan-2-77e7ac1b89a4eac9102176cd73f67a62
new file mode 100644
index 0000000000000..6d7f1d49d9e72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-2-77e7ac1b89a4eac9102176cd73f67a62
@@ -0,0 +1,4 @@
+atan(x) - returns the atan (arctan) of x (x is in radians)
+Example:
+   > SELECT atan(0) FROM src LIMIT 1;
+  0
diff --git a/sql/hive/src/test/resources/golden/udf_atan-2-bf1c7875364d5518e0ac9c1ac6943764 b/sql/hive/src/test/resources/golden/udf_atan-2-bf1c7875364d5518e0ac9c1ac6943764
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-2-bf1c7875364d5518e0ac9c1ac6943764
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-3-9a6252f36fe5ec1387186bf47d74a139 b/sql/hive/src/test/resources/golden/udf_atan-3-9a6252f36fe5ec1387186bf47d74a139
deleted file mode 100644
index 37b3e6018d62c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-3-9a6252f36fe5ec1387186bf47d74a139
+++ /dev/null
@@ -1 +0,0 @@
-0.7853981633974483	1.4056476493802699	-0.7853981633974483
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-25-86245727f90de9ce65a12c97a03a5635 b/sql/hive/src/test/resources/golden/udf_atan-3-e6f97a834028a67e6c3033949f98fbf8
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-25-86245727f90de9ce65a12c97a03a5635
rename to sql/hive/src/test/resources/golden/udf_atan-3-e6f97a834028a67e6c3033949f98fbf8
diff --git a/sql/hive/src/test/resources/golden/udf_atan-4-c79ed30c2444c8493d0db98c33c9132b b/sql/hive/src/test/resources/golden/udf_atan-4-c79ed30c2444c8493d0db98c33c9132b
deleted file mode 100644
index 019676ad50bc3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-4-c79ed30c2444c8493d0db98c33c9132b
+++ /dev/null
@@ -1 +0,0 @@
-atan(x) - returns the atan (arctan) of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-4-eacd47571ba5c67f11e025d8d4de5811 b/sql/hive/src/test/resources/golden/udf_atan-4-eacd47571ba5c67f11e025d8d4de5811
new file mode 100644
index 0000000000000..fe43015585aa4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-4-eacd47571ba5c67f11e025d8d4de5811
@@ -0,0 +1 @@
+0.7853981633974483	1.4056476493802699	-0.7853981633974483
diff --git a/sql/hive/src/test/resources/golden/udf_atan-5-77e7ac1b89a4eac9102176cd73f67a62 b/sql/hive/src/test/resources/golden/udf_atan-5-77e7ac1b89a4eac9102176cd73f67a62
deleted file mode 100644
index ce9e52716ff44..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-5-77e7ac1b89a4eac9102176cd73f67a62
+++ /dev/null
@@ -1,4 +0,0 @@
-atan(x) - returns the atan (arctan) of x (x is in radians)
-Example:
-   > SELECT atan(0) FROM src LIMIT 1;
-  0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-5-c79ed30c2444c8493d0db98c33c9132b b/sql/hive/src/test/resources/golden/udf_atan-5-c79ed30c2444c8493d0db98c33c9132b
new file mode 100644
index 0000000000000..1d34577c5c6b9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-5-c79ed30c2444c8493d0db98c33c9132b
@@ -0,0 +1 @@
+atan(x) - returns the atan (arctan) of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_atan-6-77e7ac1b89a4eac9102176cd73f67a62 b/sql/hive/src/test/resources/golden/udf_atan-6-77e7ac1b89a4eac9102176cd73f67a62
new file mode 100644
index 0000000000000..6d7f1d49d9e72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-6-77e7ac1b89a4eac9102176cd73f67a62
@@ -0,0 +1,4 @@
+atan(x) - returns the atan (arctan) of x (x is in radians)
+Example:
+   > SELECT atan(0) FROM src LIMIT 1;
+  0
diff --git a/sql/hive/src/test/resources/golden/udf_atan-6-bf1c7875364d5518e0ac9c1ac6943764 b/sql/hive/src/test/resources/golden/udf_atan-6-bf1c7875364d5518e0ac9c1ac6943764
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-6-bf1c7875364d5518e0ac9c1ac6943764
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-7-9a6252f36fe5ec1387186bf47d74a139 b/sql/hive/src/test/resources/golden/udf_atan-7-9a6252f36fe5ec1387186bf47d74a139
deleted file mode 100644
index 37b3e6018d62c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-7-9a6252f36fe5ec1387186bf47d74a139
+++ /dev/null
@@ -1 +0,0 @@
-0.7853981633974483	1.4056476493802699	-0.7853981633974483
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-26-552d7ec5a4e0c93dc59a61973e2d63a2 b/sql/hive/src/test/resources/golden/udf_atan-7-e6f97a834028a67e6c3033949f98fbf8
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-26-552d7ec5a4e0c93dc59a61973e2d63a2
rename to sql/hive/src/test/resources/golden/udf_atan-7-e6f97a834028a67e6c3033949f98fbf8
diff --git a/sql/hive/src/test/resources/golden/udf_atan-8-eacd47571ba5c67f11e025d8d4de5811 b/sql/hive/src/test/resources/golden/udf_atan-8-eacd47571ba5c67f11e025d8d4de5811
new file mode 100644
index 0000000000000..fe43015585aa4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-8-eacd47571ba5c67f11e025d8d4de5811
@@ -0,0 +1 @@
+0.7853981633974483	1.4056476493802699	-0.7853981633974483
diff --git a/sql/hive/src/test/resources/golden/udf_avg-0-2d715528b290951fb9874f60d7e9b537 b/sql/hive/src/test/resources/golden/udf_avg-0-2d715528b290951fb9874f60d7e9b537
index bd171b0e9a29c..13ba346a52c93 100644
--- a/sql/hive/src/test/resources/golden/udf_avg-0-2d715528b290951fb9874f60d7e9b537
+++ b/sql/hive/src/test/resources/golden/udf_avg-0-2d715528b290951fb9874f60d7e9b537
@@ -1 +1 @@
-avg(x) - Returns the mean of a set of numbers
\ No newline at end of file
+avg(x) - Returns the mean of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_avg-1-c707c56871a903e4e022b3df5c92fc3f b/sql/hive/src/test/resources/golden/udf_avg-1-c707c56871a903e4e022b3df5c92fc3f
index bd171b0e9a29c..13ba346a52c93 100644
--- a/sql/hive/src/test/resources/golden/udf_avg-1-c707c56871a903e4e022b3df5c92fc3f
+++ b/sql/hive/src/test/resources/golden/udf_avg-1-c707c56871a903e4e022b3df5c92fc3f
@@ -1 +1 @@
-avg(x) - Returns the mean of a set of numbers
\ No newline at end of file
+avg(x) - Returns the mean of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_bigint-0-6c5b1e4b9d725caeb786bb18448a7927 b/sql/hive/src/test/resources/golden/udf_bigint-0-6c5b1e4b9d725caeb786bb18448a7927
index 27b6cd42b68cf..c538e81220b1f 100644
--- a/sql/hive/src/test/resources/golden/udf_bigint-0-6c5b1e4b9d725caeb786bb18448a7927
+++ b/sql/hive/src/test/resources/golden/udf_bigint-0-6c5b1e4b9d725caeb786bb18448a7927
@@ -1 +1 @@
-There is no documentation for function 'bigint'
\ No newline at end of file
+There is no documentation for function 'bigint'
diff --git a/sql/hive/src/test/resources/golden/udf_bigint-1-4636e4f0083ea54814995a03b7c81202 b/sql/hive/src/test/resources/golden/udf_bigint-1-4636e4f0083ea54814995a03b7c81202
index 27b6cd42b68cf..c538e81220b1f 100644
--- a/sql/hive/src/test/resources/golden/udf_bigint-1-4636e4f0083ea54814995a03b7c81202
+++ b/sql/hive/src/test/resources/golden/udf_bigint-1-4636e4f0083ea54814995a03b7c81202
@@ -1 +1 @@
-There is no documentation for function 'bigint'
\ No newline at end of file
+There is no documentation for function 'bigint'
diff --git a/sql/hive/src/test/resources/golden/orc_create-15-e54b12bec275bc53ffe4b7252a99bab8 b/sql/hive/src/test/resources/golden/udf_bin-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/orc_create-15-e54b12bec275bc53ffe4b7252a99bab8
rename to sql/hive/src/test/resources/golden/udf_bin-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_bin-0-ebbb090f6fa6b322a52bec3ba19dfe5b b/sql/hive/src/test/resources/golden/udf_bin-0-ebbb090f6fa6b322a52bec3ba19dfe5b
deleted file mode 100644
index 4790a2c1f9292..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bin-0-ebbb090f6fa6b322a52bec3ba19dfe5b
+++ /dev/null
@@ -1 +0,0 @@
-bin(n) - returns n in binary
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bin-1-843803a1b6ada107c11397af1a2f55d6 b/sql/hive/src/test/resources/golden/udf_bin-1-843803a1b6ada107c11397af1a2f55d6
deleted file mode 100644
index 4d82712e858e4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bin-1-843803a1b6ada107c11397af1a2f55d6
+++ /dev/null
@@ -1,5 +0,0 @@
-bin(n) - returns n in binary
-n is a BIGINT. Returns NULL if n is NULL.
-Example:
-  > SELECT bin(13) FROM src LIMIT 1
-  '1101'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bin-1-ebbb090f6fa6b322a52bec3ba19dfe5b b/sql/hive/src/test/resources/golden/udf_bin-1-ebbb090f6fa6b322a52bec3ba19dfe5b
new file mode 100644
index 0000000000000..ef60184c54fb5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bin-1-ebbb090f6fa6b322a52bec3ba19dfe5b
@@ -0,0 +1 @@
+bin(n) - returns n in binary
diff --git a/sql/hive/src/test/resources/golden/udf_bin-2-5ee3932ab9cd164f1005a4413a68007b b/sql/hive/src/test/resources/golden/udf_bin-2-5ee3932ab9cd164f1005a4413a68007b
deleted file mode 100644
index 20fae133f0b4d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bin-2-5ee3932ab9cd164f1005a4413a68007b
+++ /dev/null
@@ -1 +0,0 @@
-1	0	101111101011100001101100101
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bin-2-843803a1b6ada107c11397af1a2f55d6 b/sql/hive/src/test/resources/golden/udf_bin-2-843803a1b6ada107c11397af1a2f55d6
new file mode 100644
index 0000000000000..115f93fe0fd3e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bin-2-843803a1b6ada107c11397af1a2f55d6
@@ -0,0 +1,5 @@
+bin(n) - returns n in binary
+n is a BIGINT. Returns NULL if n is NULL.
+Example:
+  > SELECT bin(13) FROM src LIMIT 1
+  '1101'
diff --git a/sql/hive/src/test/resources/golden/udf_bin-3-6fda27c8567ac896538cba3f2b230ab b/sql/hive/src/test/resources/golden/udf_bin-3-6fda27c8567ac896538cba3f2b230ab
new file mode 100644
index 0000000000000..83b020539cb9a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bin-3-6fda27c8567ac896538cba3f2b230ab
@@ -0,0 +1 @@
+1	0	101111101011100001101100101
diff --git a/sql/hive/src/test/resources/golden/udf_bin-3-b72fc578a7c677e15b8598248c81901 b/sql/hive/src/test/resources/golden/udf_bin-3-b72fc578a7c677e15b8598248c81901
deleted file mode 100644
index 76ea1be7f9214..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bin-3-b72fc578a7c677e15b8598248c81901
+++ /dev/null
@@ -1 +0,0 @@
-1111111111111111111111111111111111111111111111111111111111111011
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bin-4-b9bac215e81c8d5c8324b1287542ced3 b/sql/hive/src/test/resources/golden/udf_bin-4-b9bac215e81c8d5c8324b1287542ced3
new file mode 100644
index 0000000000000..1b12d2f6de475
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bin-4-b9bac215e81c8d5c8324b1287542ced3
@@ -0,0 +1 @@
+1111111111111111111111111111111111111111111111111111111111111011
diff --git a/sql/hive/src/test/resources/golden/orc_create-17-f42f66edf8a9f6ffdbe83608cc353beb b/sql/hive/src/test/resources/golden/udf_bitmap_and-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/orc_create-17-f42f66edf8a9f6ffdbe83608cc353beb
rename to sql/hive/src/test/resources/golden/udf_bitmap_and-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-0-abea2a2780fad42422774174cbfd603d b/sql/hive/src/test/resources/golden/udf_bitmap_and-0-abea2a2780fad42422774174cbfd603d
deleted file mode 100644
index 652a71b48011c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_and-0-abea2a2780fad42422774174cbfd603d
+++ /dev/null
@@ -1 +0,0 @@
-[13,2,4,8589934592,4096,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-1-414291f11711df40fb8362e0a0156b25 b/sql/hive/src/test/resources/golden/udf_bitmap_and-1-414291f11711df40fb8362e0a0156b25
deleted file mode 100644
index 6d061bf1198c2..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_and-1-414291f11711df40fb8362e0a0156b25
+++ /dev/null
@@ -1 +0,0 @@
-[13,1,4,2,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-1-c9e0d8424ec5f433565397b113ae4f57 b/sql/hive/src/test/resources/golden/udf_bitmap_and-1-c9e0d8424ec5f433565397b113ae4f57
new file mode 100644
index 0000000000000..1211192ad4be3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_and-1-c9e0d8424ec5f433565397b113ae4f57
@@ -0,0 +1 @@
+[13,2,4,8589934592,4096,0]
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-2-5a60dfc80bef392fa390adddab0c0f87 b/sql/hive/src/test/resources/golden/udf_bitmap_and-2-5a60dfc80bef392fa390adddab0c0f87
new file mode 100644
index 0000000000000..33b9ad33d4f01
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_and-2-5a60dfc80bef392fa390adddab0c0f87
@@ -0,0 +1 @@
+[13,1,4,2,0]
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-2-d550d017f9eb9176593719ea34eaae9b b/sql/hive/src/test/resources/golden/udf_bitmap_and-2-d550d017f9eb9176593719ea34eaae9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-3-a486db1f5a06f9623a0e5abd7737b0c6 b/sql/hive/src/test/resources/golden/udf_bitmap_and-3-a486db1f5a06f9623a0e5abd7737b0c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath2-1-27b77465d23aebf66a50c8074a75b755 b/sql/hive/src/test/resources/golden/udf_bitmap_and-3-d550d017f9eb9176593719ea34eaae9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_testxpath2-1-27b77465d23aebf66a50c8074a75b755
rename to sql/hive/src/test/resources/golden/udf_bitmap_and-3-d550d017f9eb9176593719ea34eaae9b
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-4-6320e1c4829024f819d3883df9a782c0 b/sql/hive/src/test/resources/golden/udf_bitmap_and-4-6320e1c4829024f819d3883df9a782c0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath2-2-6b8b2daaeaa985a7de0e377ffc4436dd b/sql/hive/src/test/resources/golden/udf_bitmap_and-4-a486db1f5a06f9623a0e5abd7737b0c6
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_testxpath2-2-6b8b2daaeaa985a7de0e377ffc4436dd
rename to sql/hive/src/test/resources/golden/udf_bitmap_and-4-a486db1f5a06f9623a0e5abd7737b0c6
diff --git a/sql/hive/src/test/resources/golden/input_testxpath3-0-f05134d54292acd1f2067027889a4fac b/sql/hive/src/test/resources/golden/udf_bitmap_and-5-549dbeb1293c4c49ae08bf08acdbdf23
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_testxpath3-0-f05134d54292acd1f2067027889a4fac
rename to sql/hive/src/test/resources/golden/udf_bitmap_and-5-549dbeb1293c4c49ae08bf08acdbdf23
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-5-ff2860a163cbe78d5affac8047199296 b/sql/hive/src/test/resources/golden/udf_bitmap_and-5-ff2860a163cbe78d5affac8047199296
deleted file mode 100644
index 43be09952b09c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_and-5-ff2860a163cbe78d5affac8047199296
+++ /dev/null
@@ -1,10 +0,0 @@
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-6-d550d017f9eb9176593719ea34eaae9b b/sql/hive/src/test/resources/golden/udf_bitmap_and-6-d550d017f9eb9176593719ea34eaae9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-6-ff2860a163cbe78d5affac8047199296 b/sql/hive/src/test/resources/golden/udf_bitmap_and-6-ff2860a163cbe78d5affac8047199296
new file mode 100644
index 0000000000000..0db773930c274
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_and-6-ff2860a163cbe78d5affac8047199296
@@ -0,0 +1,10 @@
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-1-f746888141a38ba707fad01d86d41960 b/sql/hive/src/test/resources/golden/udf_bitmap_and-7-d550d017f9eb9176593719ea34eaae9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_testxpath4-1-f746888141a38ba707fad01d86d41960
rename to sql/hive/src/test/resources/golden/udf_bitmap_and-7-d550d017f9eb9176593719ea34eaae9b
diff --git a/sql/hive/src/test/resources/golden/orc_create-32-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_bitmap_empty-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/orc_create-32-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_bitmap_empty-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_empty-0-6c80525a4eab6761596e6ad5aa75bc90 b/sql/hive/src/test/resources/golden/udf_bitmap_empty-0-6c80525a4eab6761596e6ad5aa75bc90
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_empty-0-6c80525a4eab6761596e6ad5aa75bc90
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_empty-1-a03987655a167f5b18c37b643391a0df b/sql/hive/src/test/resources/golden/udf_bitmap_empty-1-a03987655a167f5b18c37b643391a0df
deleted file mode 100644
index 02e4a84d62c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_empty-1-a03987655a167f5b18c37b643391a0df
+++ /dev/null
@@ -1 +0,0 @@
-false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_empty-1-a174269b5d1757398ab7f89cf1c97bfa b/sql/hive/src/test/resources/golden/udf_bitmap_empty-1-a174269b5d1757398ab7f89cf1c97bfa
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_empty-1-a174269b5d1757398ab7f89cf1c97bfa
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_empty-2-f5d9880a3278b5632b356bbe6ecd90d3 b/sql/hive/src/test/resources/golden/udf_bitmap_empty-2-f5d9880a3278b5632b356bbe6ecd90d3
new file mode 100644
index 0000000000000..c508d5366f70b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_empty-2-f5d9880a3278b5632b356bbe6ecd90d3
@@ -0,0 +1 @@
+false
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-0-377e873cf3cc23e9234ce9aa7c235d8c b/sql/hive/src/test/resources/golden/udf_bitmap_or-0-377e873cf3cc23e9234ce9aa7c235d8c
deleted file mode 100644
index 652a71b48011c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_or-0-377e873cf3cc23e9234ce9aa7c235d8c
+++ /dev/null
@@ -1 +0,0 @@
-[13,2,4,8589934592,4096,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/orc_create-34-3b03210f94ec40db9ab02620645014d1 b/sql/hive/src/test/resources/golden/udf_bitmap_or-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/orc_create-34-3b03210f94ec40db9ab02620645014d1
rename to sql/hive/src/test/resources/golden/udf_bitmap_or-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-1-85cab84fba419b436b033e3ea07e02ef b/sql/hive/src/test/resources/golden/udf_bitmap_or-1-85cab84fba419b436b033e3ea07e02ef
deleted file mode 100644
index 163f4734f4c21..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_or-1-85cab84fba419b436b033e3ea07e02ef
+++ /dev/null
@@ -1 +0,0 @@
-[13,2,4,8589934592,4224,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-1-cd510a5926df24d1ddbf8d0cce9d76ef b/sql/hive/src/test/resources/golden/udf_bitmap_or-1-cd510a5926df24d1ddbf8d0cce9d76ef
new file mode 100644
index 0000000000000..1211192ad4be3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_or-1-cd510a5926df24d1ddbf8d0cce9d76ef
@@ -0,0 +1 @@
+[13,2,4,8589934592,4096,0]
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-2-ab062e08acfd7e552a64ea967a0360c8 b/sql/hive/src/test/resources/golden/udf_bitmap_or-2-ab062e08acfd7e552a64ea967a0360c8
new file mode 100644
index 0000000000000..c1bd6bef82398
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_or-2-ab062e08acfd7e552a64ea967a0360c8
@@ -0,0 +1 @@
+[13,2,4,8589934592,4224,0]
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-2-d550d017f9eb9176593719ea34eaae9b b/sql/hive/src/test/resources/golden/udf_bitmap_or-2-d550d017f9eb9176593719ea34eaae9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-3-a486db1f5a06f9623a0e5abd7737b0c6 b/sql/hive/src/test/resources/golden/udf_bitmap_or-3-a486db1f5a06f9623a0e5abd7737b0c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-4-f746888141a38ba707fad01d86d41960 b/sql/hive/src/test/resources/golden/udf_bitmap_or-3-d550d017f9eb9176593719ea34eaae9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/input_testxpath4-4-f746888141a38ba707fad01d86d41960
rename to sql/hive/src/test/resources/golden/udf_bitmap_or-3-d550d017f9eb9176593719ea34eaae9b
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-4-6320e1c4829024f819d3883df9a782c0 b/sql/hive/src/test/resources/golden/udf_bitmap_or-4-6320e1c4829024f819d3883df9a782c0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/inputddl6-1-8b96b4fba4cf54c399a008d0f3be1edf b/sql/hive/src/test/resources/golden/udf_bitmap_or-4-a486db1f5a06f9623a0e5abd7737b0c6
similarity index 100%
rename from sql/hive/src/test/resources/golden/inputddl6-1-8b96b4fba4cf54c399a008d0f3be1edf
rename to sql/hive/src/test/resources/golden/udf_bitmap_or-4-a486db1f5a06f9623a0e5abd7737b0c6
diff --git a/sql/hive/src/test/resources/golden/inputddl6-2-c4c902d39d8dd9568f1d95ac3a8e5c6c b/sql/hive/src/test/resources/golden/udf_bitmap_or-5-549dbeb1293c4c49ae08bf08acdbdf23
similarity index 100%
rename from sql/hive/src/test/resources/golden/inputddl6-2-c4c902d39d8dd9568f1d95ac3a8e5c6c
rename to sql/hive/src/test/resources/golden/udf_bitmap_or-5-549dbeb1293c4c49ae08bf08acdbdf23
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-5-ea92fff4d814552b57535ed342a5dde0 b/sql/hive/src/test/resources/golden/udf_bitmap_or-5-ea92fff4d814552b57535ed342a5dde0
deleted file mode 100644
index c977a4ce25c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_or-5-ea92fff4d814552b57535ed342a5dde0
+++ /dev/null
@@ -1,10 +0,0 @@
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-6-d550d017f9eb9176593719ea34eaae9b b/sql/hive/src/test/resources/golden/udf_bitmap_or-6-d550d017f9eb9176593719ea34eaae9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-6-ea92fff4d814552b57535ed342a5dde0 b/sql/hive/src/test/resources/golden/udf_bitmap_or-6-ea92fff4d814552b57535ed342a5dde0
new file mode 100644
index 0000000000000..62f4e245e2cdd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_or-6-ea92fff4d814552b57535ed342a5dde0
@@ -0,0 +1,10 @@
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
diff --git a/sql/hive/src/test/resources/golden/inputddl7-1-7b46b8baf9c7628da9c190c96b917057 b/sql/hive/src/test/resources/golden/udf_bitmap_or-7-d550d017f9eb9176593719ea34eaae9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/inputddl7-1-7b46b8baf9c7628da9c190c96b917057
rename to sql/hive/src/test/resources/golden/udf_bitmap_or-7-d550d017f9eb9176593719ea34eaae9b
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_and-0-e2374700cd32add926992d5539bd463a b/sql/hive/src/test/resources/golden/udf_bitwise_and-0-e2374700cd32add926992d5539bd463a
index 71cadba7649e8..bb00e0889dc07 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_and-0-e2374700cd32add926992d5539bd463a
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_and-0-e2374700cd32add926992d5539bd463a
@@ -1 +1 @@
-a & b - Bitwise and
\ No newline at end of file
+a & b - Bitwise and
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_and-1-2e63ac31262106160ab043027e356a4b b/sql/hive/src/test/resources/golden/udf_bitwise_and-1-2e63ac31262106160ab043027e356a4b
index ead02cba53aa4..dc67d624d14be 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_and-1-2e63ac31262106160ab043027e356a4b
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_and-1-2e63ac31262106160ab043027e356a4b
@@ -1,4 +1,4 @@
 a & b - Bitwise and
 Example:
   > SELECT 3 & 5 FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_not-0-34abab2f47f90f0f194ef44aed1cdd7f b/sql/hive/src/test/resources/golden/udf_bitwise_not-0-34abab2f47f90f0f194ef44aed1cdd7f
index 8b06e374c708d..d417f76be9940 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_not-0-34abab2f47f90f0f194ef44aed1cdd7f
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_not-0-34abab2f47f90f0f194ef44aed1cdd7f
@@ -1 +1 @@
-~ n - Bitwise not
\ No newline at end of file
+~ n - Bitwise not
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_not-1-ccc0c59ea3e29b6d55e1edee7029155d b/sql/hive/src/test/resources/golden/udf_bitwise_not-1-ccc0c59ea3e29b6d55e1edee7029155d
index 413ca8a653e1f..db249e9032680 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_not-1-ccc0c59ea3e29b6d55e1edee7029155d
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_not-1-ccc0c59ea3e29b6d55e1edee7029155d
@@ -1,4 +1,4 @@
 ~ n - Bitwise not
 Example:
   > SELECT ~ 0 FROM src LIMIT 1;
-  -1
\ No newline at end of file
+  -1
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_or-0-bf5bd0beebdd89b3fbbbbb8195a9bbe6 b/sql/hive/src/test/resources/golden/udf_bitwise_or-0-bf5bd0beebdd89b3fbbbbb8195a9bbe6
index bdbd28cad97c4..32343b4afe8bb 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_or-0-bf5bd0beebdd89b3fbbbbb8195a9bbe6
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_or-0-bf5bd0beebdd89b3fbbbbb8195a9bbe6
@@ -1 +1 @@
-a | b - Bitwise or
\ No newline at end of file
+a | b - Bitwise or
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_or-1-272722c23fece2807c08191d3969c3bb b/sql/hive/src/test/resources/golden/udf_bitwise_or-1-272722c23fece2807c08191d3969c3bb
index 4c116be2acae3..8ced93b4f8e88 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_or-1-272722c23fece2807c08191d3969c3bb
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_or-1-272722c23fece2807c08191d3969c3bb
@@ -1,4 +1,4 @@
 a | b - Bitwise or
 Example:
   > SELECT 3 | 5 FROM src LIMIT 1;
-  7
\ No newline at end of file
+  7
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_xor-0-6b05142d0b7e444a3e52a11b59d60a16 b/sql/hive/src/test/resources/golden/udf_bitwise_xor-0-6b05142d0b7e444a3e52a11b59d60a16
index b483f75f1b956..778fda3969951 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_xor-0-6b05142d0b7e444a3e52a11b59d60a16
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_xor-0-6b05142d0b7e444a3e52a11b59d60a16
@@ -1 +1 @@
-a ^ b - Bitwise exclusive or
\ No newline at end of file
+a ^ b - Bitwise exclusive or
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_xor-1-8fc9b9bf1aced42754419310784e0a9f b/sql/hive/src/test/resources/golden/udf_bitwise_xor-1-8fc9b9bf1aced42754419310784e0a9f
index 03bf261d7cf51..6f1ab55c41ce7 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_xor-1-8fc9b9bf1aced42754419310784e0a9f
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_xor-1-8fc9b9bf1aced42754419310784e0a9f
@@ -1,4 +1,4 @@
 a ^ b - Bitwise exclusive or
 Example:
   > SELECT 3 ^ 5 FROM src LIMIT 1;
-  2
\ No newline at end of file
+  2
diff --git a/sql/hive/src/test/resources/golden/udf_boolean-0-a68fc036f5f5b7f9c85d71bb9627c749 b/sql/hive/src/test/resources/golden/udf_boolean-0-a68fc036f5f5b7f9c85d71bb9627c749
index 8134698dfbdcd..495b36e68ecde 100644
--- a/sql/hive/src/test/resources/golden/udf_boolean-0-a68fc036f5f5b7f9c85d71bb9627c749
+++ b/sql/hive/src/test/resources/golden/udf_boolean-0-a68fc036f5f5b7f9c85d71bb9627c749
@@ -1 +1 @@
-There is no documentation for function 'boolean'
\ No newline at end of file
+There is no documentation for function 'boolean'
diff --git a/sql/hive/src/test/resources/golden/udf_boolean-1-23178b473a9d25d6be0abe378d44fb0e b/sql/hive/src/test/resources/golden/udf_boolean-1-23178b473a9d25d6be0abe378d44fb0e
index 8134698dfbdcd..495b36e68ecde 100644
--- a/sql/hive/src/test/resources/golden/udf_boolean-1-23178b473a9d25d6be0abe378d44fb0e
+++ b/sql/hive/src/test/resources/golden/udf_boolean-1-23178b473a9d25d6be0abe378d44fb0e
@@ -1 +1 @@
-There is no documentation for function 'boolean'
\ No newline at end of file
+There is no documentation for function 'boolean'
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-10-767848077fc2929ccedfd28e50564b19 b/sql/hive/src/test/resources/golden/udf_case-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/parallel_orderby-10-767848077fc2929ccedfd28e50564b19
rename to sql/hive/src/test/resources/golden/udf_case-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_case-0-5bcbe4c0e0a75462160814a10b9449f4 b/sql/hive/src/test/resources/golden/udf_case-0-5bcbe4c0e0a75462160814a10b9449f4
deleted file mode 100644
index 645bd7302810f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_case-0-5bcbe4c0e0a75462160814a10b9449f4
+++ /dev/null
@@ -1 +0,0 @@
-There is no documentation for function 'case'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_case-1-54acf006155d8822a50e47729be24004 b/sql/hive/src/test/resources/golden/udf_case-1-54acf006155d8822a50e47729be24004
deleted file mode 100644
index 645bd7302810f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_case-1-54acf006155d8822a50e47729be24004
+++ /dev/null
@@ -1 +0,0 @@
-There is no documentation for function 'case'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_case-1-5bcbe4c0e0a75462160814a10b9449f4 b/sql/hive/src/test/resources/golden/udf_case-1-5bcbe4c0e0a75462160814a10b9449f4
new file mode 100644
index 0000000000000..e5bc5948a0894
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_case-1-5bcbe4c0e0a75462160814a10b9449f4
@@ -0,0 +1 @@
+There is no documentation for function 'case'
diff --git a/sql/hive/src/test/resources/golden/udf_case-2-54acf006155d8822a50e47729be24004 b/sql/hive/src/test/resources/golden/udf_case-2-54acf006155d8822a50e47729be24004
new file mode 100644
index 0000000000000..e5bc5948a0894
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_case-2-54acf006155d8822a50e47729be24004
@@ -0,0 +1 @@
+There is no documentation for function 'case'
diff --git a/sql/hive/src/test/resources/golden/udf_case-2-98ee676f92950375917f09d2e492253f b/sql/hive/src/test/resources/golden/udf_case-2-98ee676f92950375917f09d2e492253f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/inputddl7-10-4eec8cef401b6bba00457dfbec58cc2d b/sql/hive/src/test/resources/golden/udf_case-3-48789112d79aeb450d9f49184fc20e1c
similarity index 100%
rename from sql/hive/src/test/resources/golden/inputddl7-10-4eec8cef401b6bba00457dfbec58cc2d
rename to sql/hive/src/test/resources/golden/udf_case-3-48789112d79aeb450d9f49184fc20e1c
diff --git a/sql/hive/src/test/resources/golden/udf_case-3-ec7343402fd77807842a0eaf2497a47c b/sql/hive/src/test/resources/golden/udf_case-3-ec7343402fd77807842a0eaf2497a47c
deleted file mode 100644
index 1ca086d1dca4d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_case-3-ec7343402fd77807842a0eaf2497a47c
+++ /dev/null
@@ -1 +0,0 @@
-2	5	15	NULL	20	24
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_case-4-3b3e7ab775f45d24f39e281b0d2f8d76 b/sql/hive/src/test/resources/golden/udf_case-4-3b3e7ab775f45d24f39e281b0d2f8d76
deleted file mode 100644
index b920295f69a53..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_case-4-3b3e7ab775f45d24f39e281b0d2f8d76
+++ /dev/null
@@ -1 +0,0 @@
-yo
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_case-4-d39ed6ecd256fa99657f13709cb1c6e3 b/sql/hive/src/test/resources/golden/udf_case-4-d39ed6ecd256fa99657f13709cb1c6e3
new file mode 100644
index 0000000000000..a7f5e19894027
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_case-4-d39ed6ecd256fa99657f13709cb1c6e3
@@ -0,0 +1 @@
+2	5	15	NULL	20	24
diff --git a/sql/hive/src/test/resources/golden/udf_case-5-f53c9bb8a2d20ef7ff1fc7b3403270eb b/sql/hive/src/test/resources/golden/udf_case-5-f53c9bb8a2d20ef7ff1fc7b3403270eb
new file mode 100644
index 0000000000000..092bfb9bdf74d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_case-5-f53c9bb8a2d20ef7ff1fc7b3403270eb
@@ -0,0 +1 @@
+yo
diff --git a/sql/hive/src/test/resources/golden/udf_case-6-ff583116ba2edd78202349faf1e757dc b/sql/hive/src/test/resources/golden/udf_case-6-ff583116ba2edd78202349faf1e757dc
new file mode 100644
index 0000000000000..db46b5a72aa80
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_case-6-ff583116ba2edd78202349faf1e757dc
@@ -0,0 +1 @@
+123	123.0	abcd
diff --git a/sql/hive/src/test/resources/golden/udf_case_column_pruning-0-dd2d7a075df235f17c26bac8713e939c b/sql/hive/src/test/resources/golden/udf_case_column_pruning-0-dd2d7a075df235f17c26bac8713e939c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_case_thrift-0-e3c6c400a6588fd7b52f9540fd621cd8 b/sql/hive/src/test/resources/golden/udf_case_thrift-0-e3c6c400a6588fd7b52f9540fd621cd8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_case_thrift-1-406cf6f97996bd921cf22e1ddd2d9f96 b/sql/hive/src/test/resources/golden/udf_case_thrift-1-406cf6f97996bd921cf22e1ddd2d9f96
deleted file mode 100644
index d97a0ef52b2e8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_case_thrift-1-406cf6f97996bd921cf22e1ddd2d9f96
+++ /dev/null
@@ -1,3 +0,0 @@
-1	zero	0
-3	10 is ten	NULL
-100	default	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_ceil-0-4b40e67b8ca75729ab07df966d814e06 b/sql/hive/src/test/resources/golden/udf_ceil-0-4b40e67b8ca75729ab07df966d814e06
index 4854a3c7773ac..badaea42f1d2c 100644
--- a/sql/hive/src/test/resources/golden/udf_ceil-0-4b40e67b8ca75729ab07df966d814e06
+++ b/sql/hive/src/test/resources/golden/udf_ceil-0-4b40e67b8ca75729ab07df966d814e06
@@ -1 +1 @@
-ceil(x) - Find the smallest integer not smaller than x
\ No newline at end of file
+ceil(x) - Find the smallest integer not smaller than x
diff --git a/sql/hive/src/test/resources/golden/udf_ceil-1-f410065d893a263f375fcf74072877bb b/sql/hive/src/test/resources/golden/udf_ceil-1-f410065d893a263f375fcf74072877bb
index 23685df97892d..7d608c5ebd2a3 100644
--- a/sql/hive/src/test/resources/golden/udf_ceil-1-f410065d893a263f375fcf74072877bb
+++ b/sql/hive/src/test/resources/golden/udf_ceil-1-f410065d893a263f375fcf74072877bb
@@ -4,4 +4,4 @@ Example:
   > SELECT ceil(-0.1) FROM src LIMIT 1;
   0
   > SELECT ceil(5) FROM src LIMIT 1;
-  5
\ No newline at end of file
+  5
diff --git a/sql/hive/src/test/resources/golden/udf_ceiling-0-d5685e38400e68341e36a59671dcbdfd b/sql/hive/src/test/resources/golden/udf_ceiling-0-d5685e38400e68341e36a59671dcbdfd
index bf61b7e89d8b3..31d18aaefac89 100644
--- a/sql/hive/src/test/resources/golden/udf_ceiling-0-d5685e38400e68341e36a59671dcbdfd
+++ b/sql/hive/src/test/resources/golden/udf_ceiling-0-d5685e38400e68341e36a59671dcbdfd
@@ -1 +1 @@
-ceiling(x) - Find the smallest integer not smaller than x
\ No newline at end of file
+ceiling(x) - Find the smallest integer not smaller than x
diff --git a/sql/hive/src/test/resources/golden/udf_ceiling-1-69b4ba577d2f0f18befdaa1ee7a858df b/sql/hive/src/test/resources/golden/udf_ceiling-1-69b4ba577d2f0f18befdaa1ee7a858df
index 79a06f0ee2a5d..564d0566997d4 100644
--- a/sql/hive/src/test/resources/golden/udf_ceiling-1-69b4ba577d2f0f18befdaa1ee7a858df
+++ b/sql/hive/src/test/resources/golden/udf_ceiling-1-69b4ba577d2f0f18befdaa1ee7a858df
@@ -4,4 +4,4 @@ Example:
   > SELECT ceiling(-0.1) FROM src LIMIT 1;
   0
   > SELECT ceiling(5) FROM src LIMIT 1;
-  5
\ No newline at end of file
+  5
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-0-8d1c97c292c51447e58606f2cefa87c1 b/sql/hive/src/test/resources/golden/udf_coalesce-0-8d1c97c292c51447e58606f2cefa87c1
deleted file mode 100644
index 66313c54cc684..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_coalesce-0-8d1c97c292c51447e58606f2cefa87c1
+++ /dev/null
@@ -1 +0,0 @@
-coalesce(a1, a2, ...) - Returns the first non-null argument
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-1-e2c086f5148f10264c09ecbd7284c809 b/sql/hive/src/test/resources/golden/udf_coalesce-1-e2c086f5148f10264c09ecbd7284c809
deleted file mode 100644
index 04d69977c3df4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_coalesce-1-e2c086f5148f10264c09ecbd7284c809
+++ /dev/null
@@ -1,4 +0,0 @@
-coalesce(a1, a2, ...) - Returns the first non-null argument
-Example:
-  > SELECT coalesce(NULL, 1, NULL) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-2-bd78a25868e5598ea275e0be5e4c716 b/sql/hive/src/test/resources/golden/udf_coalesce-2-bd78a25868e5598ea275e0be5e4c716
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-3-badb02b0dfa13d6a1519e6198bb511d2 b/sql/hive/src/test/resources/golden/udf_coalesce-3-badb02b0dfa13d6a1519e6198bb511d2
deleted file mode 100644
index cd5686d89eba4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_coalesce-3-badb02b0dfa13d6a1519e6198bb511d2
+++ /dev/null
@@ -1 +0,0 @@
-1	1	2	1	3	4	1	1	2	1	3	4	1.0	1.0	2.0	2.0	2.0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-4-83f323874d7941c463ced2aee6cc5157 b/sql/hive/src/test/resources/golden/udf_coalesce-4-83f323874d7941c463ced2aee6cc5157
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-5-4bcad31a47f4bfc3baef75b65baa8082 b/sql/hive/src/test/resources/golden/udf_coalesce-5-4bcad31a47f4bfc3baef75b65baa8082
deleted file mode 100644
index 98339f97ef2f8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_coalesce-5-4bcad31a47f4bfc3baef75b65baa8082
+++ /dev/null
@@ -1,11 +0,0 @@
-0	0	999
-2	1	999
-4	8	value_2
-6	27	999
-8	64	999
-10	125	999
-12	216	999
-14	343	999
-16	512	999
-18	729	999
-999	999	999
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_compare_java_string-0-32b16ab99287db115e8de5214ac24b77 b/sql/hive/src/test/resources/golden/udf_compare_java_string-0-32b16ab99287db115e8de5214ac24b77
deleted file mode 100644
index 59d3f01051903..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_compare_java_string-0-32b16ab99287db115e8de5214ac24b77
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION test_udf_get_java_string 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaString')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-3-8ae9591fe39cd390619181e9664a92c1 b/sql/hive/src/test/resources/golden/udf_concat-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/parallel_orderby-3-8ae9591fe39cd390619181e9664a92c1
rename to sql/hive/src/test/resources/golden/udf_concat-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_concat-0-7bc53505a4e6587132870d8d0a704d2 b/sql/hive/src/test/resources/golden/udf_concat-0-7bc53505a4e6587132870d8d0a704d2
deleted file mode 100644
index a3998eeeee623..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat-0-7bc53505a4e6587132870d8d0a704d2
+++ /dev/null
@@ -1 +0,0 @@
-concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat-1-765c520f239cdff1ea3f8d22ba83e031 b/sql/hive/src/test/resources/golden/udf_concat-1-765c520f239cdff1ea3f8d22ba83e031
deleted file mode 100644
index 0afd542a05033..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat-1-765c520f239cdff1ea3f8d22ba83e031
+++ /dev/null
@@ -1,5 +0,0 @@
-concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
-Returns NULL if any argument is NULL.
-Example:
-  > SELECT concat('abc', 'def') FROM src LIMIT 1;
-  'abcdef'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat-1-7bc53505a4e6587132870d8d0a704d2 b/sql/hive/src/test/resources/golden/udf_concat-1-7bc53505a4e6587132870d8d0a704d2
new file mode 100644
index 0000000000000..b576089faa484
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat-1-7bc53505a4e6587132870d8d0a704d2
@@ -0,0 +1 @@
+concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
diff --git a/sql/hive/src/test/resources/golden/udf_concat-2-2c4f33a0b709a7d00c3083e8aa5fc0d5 b/sql/hive/src/test/resources/golden/udf_concat-2-2c4f33a0b709a7d00c3083e8aa5fc0d5
deleted file mode 100644
index 69af4e63792dc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat-2-2c4f33a0b709a7d00c3083e8aa5fc0d5
+++ /dev/null
@@ -1 +0,0 @@
-ab	abc	NULL	NULL	a	NULL	123a	12	1	1234abcextra argument
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat-2-765c520f239cdff1ea3f8d22ba83e031 b/sql/hive/src/test/resources/golden/udf_concat-2-765c520f239cdff1ea3f8d22ba83e031
new file mode 100644
index 0000000000000..e303d18a08100
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat-2-765c520f239cdff1ea3f8d22ba83e031
@@ -0,0 +1,5 @@
+concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
+Returns NULL if any argument is NULL.
+Example:
+  > SELECT concat('abc', 'def') FROM src LIMIT 1;
+  'abcdef'
diff --git a/sql/hive/src/test/resources/golden/udf_concat-3-a38183c2685e912befe6246f1b6f93b8 b/sql/hive/src/test/resources/golden/udf_concat-3-a38183c2685e912befe6246f1b6f93b8
new file mode 100644
index 0000000000000..21f03c7abb9b9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat-3-a38183c2685e912befe6246f1b6f93b8
@@ -0,0 +1 @@
+ab	abc	NULL	NULL	a	NULL	123a	12	1	1234abcextra argument
diff --git a/sql/hive/src/test/resources/golden/udf_concat-4-a20ebbc181e5ee4a1c22ddafd212ddde b/sql/hive/src/test/resources/golden/udf_concat-4-a20ebbc181e5ee4a1c22ddafd212ddde
new file mode 100644
index 0000000000000..09b34f2aa1b69
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat-4-a20ebbc181e5ee4a1c22ddafd212ddde
@@ -0,0 +1 @@
+abcd	abcd
diff --git a/sql/hive/src/test/resources/golden/udf_concat_insert1-2-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf_concat_insert1-2-adc1ec67836b26b60d8547c4996bfd8f
index aecd5d9f2a5c2..f4cd225e4b2b7 100644
--- a/sql/hive/src/test/resources/golden/udf_concat_insert1-2-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/udf_concat_insert1-2-adc1ec67836b26b60d8547c4996bfd8f
@@ -54,4 +54,4 @@
 1234	95
 1234	96
 1234	97
-1234	98
\ No newline at end of file
+1234	98
diff --git a/sql/hive/src/test/resources/golden/udf_concat_insert2-2-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf_concat_insert2-2-adc1ec67836b26b60d8547c4996bfd8f
index 455cef3f57162..fcdbdaa75fd52 100644
--- a/sql/hive/src/test/resources/golden/udf_concat_insert2-2-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/udf_concat_insert2-2-adc1ec67836b26b60d8547c4996bfd8f
@@ -81,4 +81,4 @@
 1234abcextra argument	val_28
 1234abcextra argument	val_37
 1234abcextra argument	val_90
-1234abcextra argument	val_97
\ No newline at end of file
+1234abcextra argument	val_97
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-0-4c8bd7794a527e544c838408313eeaa8 b/sql/hive/src/test/resources/golden/udf_concat_ws-0-4c8bd7794a527e544c838408313eeaa8
deleted file mode 100644
index c901f70500cba..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat_ws-0-4c8bd7794a527e544c838408313eeaa8
+++ /dev/null
@@ -1 +0,0 @@
-concat_ws(separator, [string | array(string)]+) - returns the concatenation of the strings separated by the separator.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-4-a928b93138e9c5547c40ff2024b2b4b6 b/sql/hive/src/test/resources/golden/udf_concat_ws-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/parallel_orderby-4-a928b93138e9c5547c40ff2024b2b4b6
rename to sql/hive/src/test/resources/golden/udf_concat_ws-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-1-4c8bd7794a527e544c838408313eeaa8 b/sql/hive/src/test/resources/golden/udf_concat_ws-1-4c8bd7794a527e544c838408313eeaa8
new file mode 100644
index 0000000000000..14d190e03b755
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat_ws-1-4c8bd7794a527e544c838408313eeaa8
@@ -0,0 +1 @@
+concat_ws(separator, [string | array(string)]+) - returns the concatenation of the strings separated by the separator.
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-1-b8b80f7e9bf4348367444c73020b3cab b/sql/hive/src/test/resources/golden/udf_concat_ws-1-b8b80f7e9bf4348367444c73020b3cab
deleted file mode 100644
index 3e751463e055d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat_ws-1-b8b80f7e9bf4348367444c73020b3cab
+++ /dev/null
@@ -1,4 +0,0 @@
-concat_ws(separator, [string | array(string)]+) - returns the concatenation of the strings separated by the separator.
-Example:
-  > SELECT concat_ws('.', 'www', array('facebook', 'com')) FROM src LIMIT 1;
-  'www.facebook.com'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-2-b8b80f7e9bf4348367444c73020b3cab b/sql/hive/src/test/resources/golden/udf_concat_ws-2-b8b80f7e9bf4348367444c73020b3cab
new file mode 100644
index 0000000000000..f3be71e54421c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat_ws-2-b8b80f7e9bf4348367444c73020b3cab
@@ -0,0 +1,4 @@
+concat_ws(separator, [string | array(string)]+) - returns the concatenation of the strings separated by the separator.
+Example:
+  > SELECT concat_ws('.', 'www', array('facebook', 'com')) FROM src LIMIT 1;
+  'www.facebook.com'
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-2-ce7c8205cdc107e1fb865d7d48b84a3c b/sql/hive/src/test/resources/golden/udf_concat_ws-2-ce7c8205cdc107e1fb865d7d48b84a3c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-3-b13a1f7f63d2a54efa331c82bd635d63 b/sql/hive/src/test/resources/golden/udf_concat_ws-3-b13a1f7f63d2a54efa331c82bd635d63
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/inputddl7-4-7513658e8abe9d9b72c7219321c56fa9 b/sql/hive/src/test/resources/golden/udf_concat_ws-3-ce7c8205cdc107e1fb865d7d48b84a3c
similarity index 100%
rename from sql/hive/src/test/resources/golden/inputddl7-4-7513658e8abe9d9b72c7219321c56fa9
rename to sql/hive/src/test/resources/golden/udf_concat_ws-3-ce7c8205cdc107e1fb865d7d48b84a3c
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-4-a507af4996b13433b0ae100fcb32358f b/sql/hive/src/test/resources/golden/udf_concat_ws-4-a507af4996b13433b0ae100fcb32358f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/inputddl7-7-48640fff8428a0dc6e90a7243adaf730 b/sql/hive/src/test/resources/golden/udf_concat_ws-4-b13a1f7f63d2a54efa331c82bd635d63
similarity index 100%
rename from sql/hive/src/test/resources/golden/inputddl7-7-48640fff8428a0dc6e90a7243adaf730
rename to sql/hive/src/test/resources/golden/udf_concat_ws-4-b13a1f7f63d2a54efa331c82bd635d63
diff --git a/sql/hive/src/test/resources/golden/insert2_overwrite_partitions-3-b7aaedd7d624af4e48637ff1acabe485 b/sql/hive/src/test/resources/golden/udf_concat_ws-5-a507af4996b13433b0ae100fcb32358f
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert2_overwrite_partitions-3-b7aaedd7d624af4e48637ff1acabe485
rename to sql/hive/src/test/resources/golden/udf_concat_ws-5-a507af4996b13433b0ae100fcb32358f
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-5-ca4f051369183cae36fc9a7bec6a9341 b/sql/hive/src/test/resources/golden/udf_concat_ws-5-ca4f051369183cae36fc9a7bec6a9341
deleted file mode 100644
index 7896fd787f3a2..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat_ws-5-ca4f051369183cae36fc9a7bec6a9341
+++ /dev/null
@@ -1 +0,0 @@
-xyzabc8675309	abc,xyz,8675309	NULL	abc**8675309
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-6-98276439c0605401ff89c6a5ae54be09 b/sql/hive/src/test/resources/golden/udf_concat_ws-6-98276439c0605401ff89c6a5ae54be09
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-6-ca4f051369183cae36fc9a7bec6a9341 b/sql/hive/src/test/resources/golden/udf_concat_ws-6-ca4f051369183cae36fc9a7bec6a9341
new file mode 100644
index 0000000000000..720cafd9370a7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat_ws-6-ca4f051369183cae36fc9a7bec6a9341
@@ -0,0 +1 @@
+xyzabc8675309	abc,xyz,8675309	NULL	abc**8675309
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-7-8f08128276e7e50eeb5a6932c763126c b/sql/hive/src/test/resources/golden/udf_concat_ws-7-8f08128276e7e50eeb5a6932c763126c
deleted file mode 100644
index a0ec688a3b084..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat_ws-7-8f08128276e7e50eeb5a6932c763126c
+++ /dev/null
@@ -1 +0,0 @@
-www.face.book.com.1234	www-face-book-com-1234	wwwFfaceFbookFcomF1234	www_face_book_com_1234	www**face**book**com**1234	www[]face[]book[]com[]1234	wwwAAAfaceAAAbookAAAcomAAA1234
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert2_overwrite_partitions-4-dece2650bf0615e566cd6c84181ce026 b/sql/hive/src/test/resources/golden/udf_concat_ws-7-97071809ba7701b78e3729996f14b591
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert2_overwrite_partitions-4-dece2650bf0615e566cd6c84181ce026
rename to sql/hive/src/test/resources/golden/udf_concat_ws-7-97071809ba7701b78e3729996f14b591
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-8-3bfc563ebf7e2cdb811766a54d84f224 b/sql/hive/src/test/resources/golden/udf_concat_ws-8-3bfc563ebf7e2cdb811766a54d84f224
new file mode 100644
index 0000000000000..93b36d28322c3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat_ws-8-3bfc563ebf7e2cdb811766a54d84f224
@@ -0,0 +1 @@
+www.face.book.com.1234	www-face-book-com-1234	wwwFfaceFbookFcomF1234	www_face_book_com_1234	www**face**book**com**1234	www[]face[]book[]com[]1234	wwwAAAfaceAAAbookAAAcomAAA1234
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-8-7c9629cc90e72046b61c0b83ebc7bab4 b/sql/hive/src/test/resources/golden/udf_concat_ws-8-7c9629cc90e72046b61c0b83ebc7bab4
deleted file mode 100644
index 0c9f2d12ba117..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat_ws-8-7c9629cc90e72046b61c0b83ebc7bab4
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	NULL	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-9-8f60d81b410f4825809aa510806f2df2 b/sql/hive/src/test/resources/golden/udf_concat_ws-9-8f60d81b410f4825809aa510806f2df2
new file mode 100644
index 0000000000000..edb4b1f84001b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat_ws-9-8f60d81b410f4825809aa510806f2df2
@@ -0,0 +1 @@
+NULL	NULL	NULL	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-5-a58344acc57aaa38e2cb5f11a0576681 b/sql/hive/src/test/resources/golden/udf_conv-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/parallel_orderby-5-a58344acc57aaa38e2cb5f11a0576681
rename to sql/hive/src/test/resources/golden/udf_conv-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_conv-0-d552befca345f3396464529cfde9f75a b/sql/hive/src/test/resources/golden/udf_conv-0-d552befca345f3396464529cfde9f75a
deleted file mode 100644
index b8fbe88a19971..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-0-d552befca345f3396464529cfde9f75a
+++ /dev/null
@@ -1 +0,0 @@
-conv(num, from_base, to_base) - convert num from from_base to to_base
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-1-5e5904af10b5d23f20ae28dc4b9a49ab b/sql/hive/src/test/resources/golden/udf_conv-1-5e5904af10b5d23f20ae28dc4b9a49ab
deleted file mode 100644
index 4f4b0c594c459..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-1-5e5904af10b5d23f20ae28dc4b9a49ab
+++ /dev/null
@@ -1,7 +0,0 @@
-conv(num, from_base, to_base) - convert num from from_base to to_base
-If to_base is negative, treat num as a signed integer,otherwise, treat it as an unsigned integer.
-Example:
-  > SELECT conv('100', 2, 10) FROM src LIMIT 1;
-  '4'
-  > SELECT conv(-10, 16, -10) FROM src LIMIT 1;
-  '16'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-1-d552befca345f3396464529cfde9f75a b/sql/hive/src/test/resources/golden/udf_conv-1-d552befca345f3396464529cfde9f75a
new file mode 100644
index 0000000000000..0753228c31bd6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-1-d552befca345f3396464529cfde9f75a
@@ -0,0 +1 @@
+conv(num, from_base, to_base) - convert num from from_base to to_base
diff --git a/sql/hive/src/test/resources/golden/udf_conv-10-5d38e8d3f2d4c89d57d916c3a5891a52 b/sql/hive/src/test/resources/golden/udf_conv-10-5d38e8d3f2d4c89d57d916c3a5891a52
deleted file mode 100644
index 9512cc4241554..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-10-5d38e8d3f2d4c89d57d916c3a5891a52
+++ /dev/null
@@ -1,3 +0,0 @@
-EE	568
-56	134
-137	785
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-10-f9ea15984e84250494e81e25d6a401c0 b/sql/hive/src/test/resources/golden/udf_conv-10-f9ea15984e84250494e81e25d6a401c0
new file mode 100644
index 0000000000000..191900972dd95
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-10-f9ea15984e84250494e81e25d6a401c0
@@ -0,0 +1 @@
+5	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_conv-11-2ce9111b47ed100bebc8d08de54efd1f b/sql/hive/src/test/resources/golden/udf_conv-11-2ce9111b47ed100bebc8d08de54efd1f
new file mode 100644
index 0000000000000..ad3fa0267fa6b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-11-2ce9111b47ed100bebc8d08de54efd1f
@@ -0,0 +1,3 @@
+EE	568
+56	134
+137	785
diff --git a/sql/hive/src/test/resources/golden/udf_conv-2-5e5904af10b5d23f20ae28dc4b9a49ab b/sql/hive/src/test/resources/golden/udf_conv-2-5e5904af10b5d23f20ae28dc4b9a49ab
new file mode 100644
index 0000000000000..2d66703d6f3f0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-2-5e5904af10b5d23f20ae28dc4b9a49ab
@@ -0,0 +1,7 @@
+conv(num, from_base, to_base) - convert num from from_base to to_base
+If to_base is negative, treat num as a signed integer,otherwise, treat it as an unsigned integer.
+Example:
+  > SELECT conv('100', 2, 10) FROM src LIMIT 1;
+  '4'
+  > SELECT conv(-10, 16, -10) FROM src LIMIT 1;
+  '16'
diff --git a/sql/hive/src/test/resources/golden/udf_conv-2-6d61a2118b54843716aef87fe539b595 b/sql/hive/src/test/resources/golden/udf_conv-2-6d61a2118b54843716aef87fe539b595
deleted file mode 100644
index 91e56a11fa628..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-2-6d61a2118b54843716aef87fe539b595
+++ /dev/null
@@ -1 +0,0 @@
-3HL	22	33	116ED2B2FB4
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-3-5f43d0bec0421c86c49e2546c5ee923a b/sql/hive/src/test/resources/golden/udf_conv-3-5f43d0bec0421c86c49e2546c5ee923a
new file mode 100644
index 0000000000000..4563fcc478648
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-3-5f43d0bec0421c86c49e2546c5ee923a
@@ -0,0 +1 @@
+3HL	22	33	116ED2B2FB4
diff --git a/sql/hive/src/test/resources/golden/udf_conv-3-97161f7a60851d445b23c4ebe4095a1d b/sql/hive/src/test/resources/golden/udf_conv-3-97161f7a60851d445b23c4ebe4095a1d
deleted file mode 100644
index 08f70d742b8e4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-3-97161f7a60851d445b23c4ebe4095a1d
+++ /dev/null
@@ -1 +0,0 @@
--641	B	FFFFFFFFFFFFFFFF	FFFFFFFFFFFFFFF1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-4-568e843076f358c404a8634b18541c55 b/sql/hive/src/test/resources/golden/udf_conv-4-568e843076f358c404a8634b18541c55
deleted file mode 100644
index a2a44daa718cc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-4-568e843076f358c404a8634b18541c55
+++ /dev/null
@@ -1 +0,0 @@
-FFFFFFFFFFFFFFFF	-1	FFFFFFFFFFFFFFFF	-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-4-5df8d45902a3537e67545e629a96328a b/sql/hive/src/test/resources/golden/udf_conv-4-5df8d45902a3537e67545e629a96328a
new file mode 100644
index 0000000000000..632c3705a0493
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-4-5df8d45902a3537e67545e629a96328a
@@ -0,0 +1 @@
+-641	B	FFFFFFFFFFFFFFFF	FFFFFFFFFFFFFFF1
diff --git a/sql/hive/src/test/resources/golden/udf_conv-5-3f23d98799b825a2e9594066f973d183 b/sql/hive/src/test/resources/golden/udf_conv-5-3f23d98799b825a2e9594066f973d183
deleted file mode 100644
index 6948fe3783119..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-5-3f23d98799b825a2e9594066f973d183
+++ /dev/null
@@ -1 +0,0 @@
-5	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-5-8cdbb45b8c44fa97456da5bc4a43f459 b/sql/hive/src/test/resources/golden/udf_conv-5-8cdbb45b8c44fa97456da5bc4a43f459
new file mode 100644
index 0000000000000..3a7a2ec34f909
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-5-8cdbb45b8c44fa97456da5bc4a43f459
@@ -0,0 +1 @@
+FFFFFFFFFFFFFFFF	-1	FFFFFFFFFFFFFFFF	-1
diff --git a/sql/hive/src/test/resources/golden/udf_conv-6-4981b5b92d87fd1000fa0ac26fa6163a b/sql/hive/src/test/resources/golden/udf_conv-6-4981b5b92d87fd1000fa0ac26fa6163a
deleted file mode 100644
index e1021e50fdcbb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-6-4981b5b92d87fd1000fa0ac26fa6163a
+++ /dev/null
@@ -1 +0,0 @@
-3HL	22	33
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-6-e5430adfa782ea9094e570d339626c0f b/sql/hive/src/test/resources/golden/udf_conv-6-e5430adfa782ea9094e570d339626c0f
new file mode 100644
index 0000000000000..191900972dd95
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-6-e5430adfa782ea9094e570d339626c0f
@@ -0,0 +1 @@
+5	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_conv-7-687f9b8a09f458d771d5641eec40031b b/sql/hive/src/test/resources/golden/udf_conv-7-687f9b8a09f458d771d5641eec40031b
new file mode 100644
index 0000000000000..c5348e173c243
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-7-687f9b8a09f458d771d5641eec40031b
@@ -0,0 +1 @@
+3HL	22	33
diff --git a/sql/hive/src/test/resources/golden/udf_conv-7-77bd25ad13e3697c80464e4a2682360e b/sql/hive/src/test/resources/golden/udf_conv-7-77bd25ad13e3697c80464e4a2682360e
deleted file mode 100644
index 08f70d742b8e4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-7-77bd25ad13e3697c80464e4a2682360e
+++ /dev/null
@@ -1 +0,0 @@
--641	B	FFFFFFFFFFFFFFFF	FFFFFFFFFFFFFFF1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-8-2fae52d392251be476e0c8f6071a4aeb b/sql/hive/src/test/resources/golden/udf_conv-8-2fae52d392251be476e0c8f6071a4aeb
deleted file mode 100644
index a2a44daa718cc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-8-2fae52d392251be476e0c8f6071a4aeb
+++ /dev/null
@@ -1 +0,0 @@
-FFFFFFFFFFFFFFFF	-1	FFFFFFFFFFFFFFFF	-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-8-384902bf8d45b6b56b2bdc5db550c10 b/sql/hive/src/test/resources/golden/udf_conv-8-384902bf8d45b6b56b2bdc5db550c10
new file mode 100644
index 0000000000000..632c3705a0493
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-8-384902bf8d45b6b56b2bdc5db550c10
@@ -0,0 +1 @@
+-641	B	FFFFFFFFFFFFFFFF	FFFFFFFFFFFFFFF1
diff --git a/sql/hive/src/test/resources/golden/udf_conv-9-28b833d0cd96f74c23aa7cf8c4f5a167 b/sql/hive/src/test/resources/golden/udf_conv-9-28b833d0cd96f74c23aa7cf8c4f5a167
new file mode 100644
index 0000000000000..3a7a2ec34f909
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-9-28b833d0cd96f74c23aa7cf8c4f5a167
@@ -0,0 +1 @@
+FFFFFFFFFFFFFFFF	-1	FFFFFFFFFFFFFFFF	-1
diff --git a/sql/hive/src/test/resources/golden/udf_conv-9-2f0098c00c10044522cd23a4a2f54957 b/sql/hive/src/test/resources/golden/udf_conv-9-2f0098c00c10044522cd23a4a2f54957
deleted file mode 100644
index 6948fe3783119..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-9-2f0098c00c10044522cd23a4a2f54957
+++ /dev/null
@@ -1 +0,0 @@
-5	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_cos-0-44f411146a7190b89f2bc8b4aa61cae3 b/sql/hive/src/test/resources/golden/udf_cos-0-44f411146a7190b89f2bc8b4aa61cae3
deleted file mode 100644
index 99a7d42bb5b57..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_cos-0-44f411146a7190b89f2bc8b4aa61cae3
+++ /dev/null
@@ -1 +0,0 @@
-cos(x) - returns the cosine of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_cos-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_decode_name-2-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_cos-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_cos-1-176030bdf43ff83ed8b3112d0c79f2f5 b/sql/hive/src/test/resources/golden/udf_cos-1-176030bdf43ff83ed8b3112d0c79f2f5
deleted file mode 100644
index 196294de1f19f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_cos-1-176030bdf43ff83ed8b3112d0c79f2f5
+++ /dev/null
@@ -1,4 +0,0 @@
-cos(x) - returns the cosine of x (x is in radians)
-Example:
-   > SELECT cos(0) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_cos-1-44f411146a7190b89f2bc8b4aa61cae3 b/sql/hive/src/test/resources/golden/udf_cos-1-44f411146a7190b89f2bc8b4aa61cae3
new file mode 100644
index 0000000000000..fa0e6975503cc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_cos-1-44f411146a7190b89f2bc8b4aa61cae3
@@ -0,0 +1 @@
+cos(x) - returns the cosine of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_cos-2-176030bdf43ff83ed8b3112d0c79f2f5 b/sql/hive/src/test/resources/golden/udf_cos-2-176030bdf43ff83ed8b3112d0c79f2f5
new file mode 100644
index 0000000000000..09d6150733802
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_cos-2-176030bdf43ff83ed8b3112d0c79f2f5
@@ -0,0 +1,4 @@
+cos(x) - returns the cosine of x (x is in radians)
+Example:
+   > SELECT cos(0) FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_cos-2-542c7c9742bdb10b614298a0e9a6fa01 b/sql/hive/src/test/resources/golden/udf_cos-2-542c7c9742bdb10b614298a0e9a6fa01
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_cos-2-542c7c9742bdb10b614298a0e9a6fa01
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-27-b61509b01b2fe3e7e4b72fedc74ff4f9 b/sql/hive/src/test/resources/golden/udf_cos-3-166acc86afd6ececfe43800e38f106c9
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-27-b61509b01b2fe3e7e4b72fedc74ff4f9
rename to sql/hive/src/test/resources/golden/udf_cos-3-166acc86afd6ececfe43800e38f106c9
diff --git a/sql/hive/src/test/resources/golden/udf_cos-3-7f30fb51fe862ef46b1ccdb3f5f9a429 b/sql/hive/src/test/resources/golden/udf_cos-3-7f30fb51fe862ef46b1ccdb3f5f9a429
deleted file mode 100644
index c0690acff887b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_cos-3-7f30fb51fe862ef46b1ccdb3f5f9a429
+++ /dev/null
@@ -1 +0,0 @@
-0.5570225467662173	7.963267107332633E-4	0.8775825618903728
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_cos-4-efda2f85872c6144280970eab84ef4d4 b/sql/hive/src/test/resources/golden/udf_cos-4-efda2f85872c6144280970eab84ef4d4
new file mode 100644
index 0000000000000..14417ab71b694
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_cos-4-efda2f85872c6144280970eab84ef4d4
@@ -0,0 +1 @@
+0.5570225467662173	7.963267107332633E-4	0.8775825618903728
diff --git a/sql/hive/src/test/resources/golden/udf_count-0-534a9b25b87d09e418645b1216949560 b/sql/hive/src/test/resources/golden/udf_count-0-534a9b25b87d09e418645b1216949560
index e01715295adc8..56cf7bdf7b040 100644
--- a/sql/hive/src/test/resources/golden/udf_count-0-534a9b25b87d09e418645b1216949560
+++ b/sql/hive/src/test/resources/golden/udf_count-0-534a9b25b87d09e418645b1216949560
@@ -1,3 +1,3 @@
 count(*) - Returns the total number of retrieved rows, including rows containing NULL values.
 count(expr) - Returns the number of rows for which the supplied expression is non-NULL.
-count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.
\ No newline at end of file
+count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.
diff --git a/sql/hive/src/test/resources/golden/udf_count-1-d566feb21bc894b97e6416b65fe5c02f b/sql/hive/src/test/resources/golden/udf_count-1-d566feb21bc894b97e6416b65fe5c02f
index e01715295adc8..56cf7bdf7b040 100644
--- a/sql/hive/src/test/resources/golden/udf_count-1-d566feb21bc894b97e6416b65fe5c02f
+++ b/sql/hive/src/test/resources/golden/udf_count-1-d566feb21bc894b97e6416b65fe5c02f
@@ -1,3 +1,3 @@
 count(*) - Returns the total number of retrieved rows, including rows containing NULL values.
 count(expr) - Returns the number of rows for which the supplied expression is non-NULL.
-count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.
\ No newline at end of file
+count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.
diff --git a/sql/hive/src/test/resources/golden/udf_count-11-3b201ca546a8b0b5e5afaa1ff6aaee3e b/sql/hive/src/test/resources/golden/udf_count-11-3b201ca546a8b0b5e5afaa1ff6aaee3e
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/udf_count-11-3b201ca546a8b0b5e5afaa1ff6aaee3e
+++ b/sql/hive/src/test/resources/golden/udf_count-11-3b201ca546a8b0b5e5afaa1ff6aaee3e
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/udf_count-12-9f41ac3eb9a6e77b3d612afc2f2b8e0e b/sql/hive/src/test/resources/golden/udf_count-12-9f41ac3eb9a6e77b3d612afc2f2b8e0e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/udf_count-12-9f41ac3eb9a6e77b3d612afc2f2b8e0e
+++ b/sql/hive/src/test/resources/golden/udf_count-12-9f41ac3eb9a6e77b3d612afc2f2b8e0e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/udf_count-13-9286bc5f08bf4db183719b1b49dc5b7 b/sql/hive/src/test/resources/golden/udf_count-13-9286bc5f08bf4db183719b1b49dc5b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/udf_count-13-9286bc5f08bf4db183719b1b49dc5b7
+++ b/sql/hive/src/test/resources/golden/udf_count-13-9286bc5f08bf4db183719b1b49dc5b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/udf_count-3-e43165f41597d2a1c9e8cf780b99a4a8 b/sql/hive/src/test/resources/golden/udf_count-3-e43165f41597d2a1c9e8cf780b99a4a8
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/udf_count-3-e43165f41597d2a1c9e8cf780b99a4a8
+++ b/sql/hive/src/test/resources/golden/udf_count-3-e43165f41597d2a1c9e8cf780b99a4a8
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/udf_count-5-bdee61c35a27bfab974e2ba199d5dfa4 b/sql/hive/src/test/resources/golden/udf_count-5-bdee61c35a27bfab974e2ba199d5dfa4
index e0da8ae09ae60..7536e3d326192 100644
--- a/sql/hive/src/test/resources/golden/udf_count-5-bdee61c35a27bfab974e2ba199d5dfa4
+++ b/sql/hive/src/test/resources/golden/udf_count-5-bdee61c35a27bfab974e2ba199d5dfa4
@@ -1 +1 @@
-309
\ No newline at end of file
+309
diff --git a/sql/hive/src/test/resources/golden/udf_count-7-b975ad0d5f293508ce4832a7b19399b6 b/sql/hive/src/test/resources/golden/udf_count-7-b975ad0d5f293508ce4832a7b19399b6
index e0da8ae09ae60..7536e3d326192 100644
--- a/sql/hive/src/test/resources/golden/udf_count-7-b975ad0d5f293508ce4832a7b19399b6
+++ b/sql/hive/src/test/resources/golden/udf_count-7-b975ad0d5f293508ce4832a7b19399b6
@@ -1 +1 @@
-309
\ No newline at end of file
+309
diff --git a/sql/hive/src/test/resources/golden/udf_count-9-75b3d8a0dac332ea00af5ef8971ca643 b/sql/hive/src/test/resources/golden/udf_count-9-75b3d8a0dac332ea00af5ef8971ca643
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/udf_count-9-75b3d8a0dac332ea00af5ef8971ca643
+++ b/sql/hive/src/test/resources/golden/udf_count-9-75b3d8a0dac332ea00af5ef8971ca643
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/udf_date_add-0-74d34471bfa0880230d8e3351eb0ab45 b/sql/hive/src/test/resources/golden/udf_date_add-0-74d34471bfa0880230d8e3351eb0ab45
index 83e0fc4e63a25..2e77bafd12f7d 100644
--- a/sql/hive/src/test/resources/golden/udf_date_add-0-74d34471bfa0880230d8e3351eb0ab45
+++ b/sql/hive/src/test/resources/golden/udf_date_add-0-74d34471bfa0880230d8e3351eb0ab45
@@ -1 +1 @@
-date_add(start_date, num_days) - Returns the date that is num_days after start_date.
\ No newline at end of file
+date_add(start_date, num_days) - Returns the date that is num_days after start_date.
diff --git a/sql/hive/src/test/resources/golden/udf_date_add-1-efb60fcbd6d78ad35257fb1ec39ace2 b/sql/hive/src/test/resources/golden/udf_date_add-1-efb60fcbd6d78ad35257fb1ec39ace2
index 83b9851499f71..3c91e138d7bd5 100644
--- a/sql/hive/src/test/resources/golden/udf_date_add-1-efb60fcbd6d78ad35257fb1ec39ace2
+++ b/sql/hive/src/test/resources/golden/udf_date_add-1-efb60fcbd6d78ad35257fb1ec39ace2
@@ -2,4 +2,4 @@ date_add(start_date, num_days) - Returns the date that is num_days after start_d
 start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. num_days is a number. The time part of start_date is ignored.
 Example:
    > SELECT date_add('2009-30-07', 1) FROM src LIMIT 1;
-  '2009-31-07'
\ No newline at end of file
+  '2009-31-07'
diff --git a/sql/hive/src/test/resources/golden/udf_date_sub-0-f8fbf85026da1b0778fd325d9b5dae33 b/sql/hive/src/test/resources/golden/udf_date_sub-0-f8fbf85026da1b0778fd325d9b5dae33
index 7dec81f2b6d4e..3ee6ac4820852 100644
--- a/sql/hive/src/test/resources/golden/udf_date_sub-0-f8fbf85026da1b0778fd325d9b5dae33
+++ b/sql/hive/src/test/resources/golden/udf_date_sub-0-f8fbf85026da1b0778fd325d9b5dae33
@@ -1 +1 @@
-date_sub(start_date, num_days) - Returns the date that is num_days before start_date.
\ No newline at end of file
+date_sub(start_date, num_days) - Returns the date that is num_days before start_date.
diff --git a/sql/hive/src/test/resources/golden/udf_date_sub-1-7efeb74367835ade71e5e42b22f8ced4 b/sql/hive/src/test/resources/golden/udf_date_sub-1-7efeb74367835ade71e5e42b22f8ced4
index 105b63424062b..29d663f35c586 100644
--- a/sql/hive/src/test/resources/golden/udf_date_sub-1-7efeb74367835ade71e5e42b22f8ced4
+++ b/sql/hive/src/test/resources/golden/udf_date_sub-1-7efeb74367835ade71e5e42b22f8ced4
@@ -2,4 +2,4 @@ date_sub(start_date, num_days) - Returns the date that is num_days before start_
 start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. num_days is a number. The time part of start_date is ignored.
 Example:
    > SELECT date_sub('2009-30-07', 1) FROM src LIMIT 1;
-  '2009-29-07'
\ No newline at end of file
+  '2009-29-07'
diff --git a/sql/hive/src/test/resources/golden/udf_datediff-0-3bd040a96a4568e7ea4922faa408ada5 b/sql/hive/src/test/resources/golden/udf_datediff-0-3bd040a96a4568e7ea4922faa408ada5
index b59d456397d53..64cae647c8005 100644
--- a/sql/hive/src/test/resources/golden/udf_datediff-0-3bd040a96a4568e7ea4922faa408ada5
+++ b/sql/hive/src/test/resources/golden/udf_datediff-0-3bd040a96a4568e7ea4922faa408ada5
@@ -1 +1 @@
-datediff(date1, date2) - Returns the number of days between date1 and date2
\ No newline at end of file
+datediff(date1, date2) - Returns the number of days between date1 and date2
diff --git a/sql/hive/src/test/resources/golden/udf_datediff-1-34ae7a68b13c2bc9a89f61acf2edd4c5 b/sql/hive/src/test/resources/golden/udf_datediff-1-34ae7a68b13c2bc9a89f61acf2edd4c5
index c240df94594d4..7ccaee7ad3bd4 100644
--- a/sql/hive/src/test/resources/golden/udf_datediff-1-34ae7a68b13c2bc9a89f61acf2edd4c5
+++ b/sql/hive/src/test/resources/golden/udf_datediff-1-34ae7a68b13c2bc9a89f61acf2edd4c5
@@ -2,4 +2,4 @@ datediff(date1, date2) - Returns the number of days between date1 and date2
 date1 and date2 are strings in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. The time parts are ignored.If date1 is earlier than date2, the result is negative.
 Example:
    > SELECT datediff('2009-30-07', '2009-31-07') FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_day-0-c4c503756384ff1220222d84fd25e756 b/sql/hive/src/test/resources/golden/udf_day-0-c4c503756384ff1220222d84fd25e756
index 11e32a4509b3f..d4017178b4e6b 100644
--- a/sql/hive/src/test/resources/golden/udf_day-0-c4c503756384ff1220222d84fd25e756
+++ b/sql/hive/src/test/resources/golden/udf_day-0-c4c503756384ff1220222d84fd25e756
@@ -1 +1 @@
-day(date) - Returns the date of the month of date
\ No newline at end of file
+day(date) - Returns the date of the month of date
diff --git a/sql/hive/src/test/resources/golden/udf_day-1-87168babe1110fe4c38269843414ca4 b/sql/hive/src/test/resources/golden/udf_day-1-87168babe1110fe4c38269843414ca4
index 9da0858ba92a5..6135aafa50860 100644
--- a/sql/hive/src/test/resources/golden/udf_day-1-87168babe1110fe4c38269843414ca4
+++ b/sql/hive/src/test/resources/golden/udf_day-1-87168babe1110fe4c38269843414ca4
@@ -3,4 +3,4 @@ Synonyms: dayofmonth
 date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'.
 Example:
    > SELECT day('2009-30-07', 1) FROM src LIMIT 1;
-  30
\ No newline at end of file
+  30
diff --git a/sql/hive/src/test/resources/golden/udf_dayofmonth-0-7b2caf942528656555cf19c261a18502 b/sql/hive/src/test/resources/golden/udf_dayofmonth-0-7b2caf942528656555cf19c261a18502
index 33e90a2af295f..47a7018d9d5ac 100644
--- a/sql/hive/src/test/resources/golden/udf_dayofmonth-0-7b2caf942528656555cf19c261a18502
+++ b/sql/hive/src/test/resources/golden/udf_dayofmonth-0-7b2caf942528656555cf19c261a18502
@@ -1 +1 @@
-dayofmonth(date) - Returns the date of the month of date
\ No newline at end of file
+dayofmonth(date) - Returns the date of the month of date
diff --git a/sql/hive/src/test/resources/golden/udf_dayofmonth-1-ca24d07102ad264d79ff30c64a73a7e8 b/sql/hive/src/test/resources/golden/udf_dayofmonth-1-ca24d07102ad264d79ff30c64a73a7e8
index ee9911af3248c..d9490e20a3b6d 100644
--- a/sql/hive/src/test/resources/golden/udf_dayofmonth-1-ca24d07102ad264d79ff30c64a73a7e8
+++ b/sql/hive/src/test/resources/golden/udf_dayofmonth-1-ca24d07102ad264d79ff30c64a73a7e8
@@ -3,4 +3,4 @@ Synonyms: day
 date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'.
 Example:
    > SELECT dayofmonth('2009-30-07', 1) FROM src LIMIT 1;
-  30
\ No newline at end of file
+  30
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_degrees-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_decode_name-3-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_degrees-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-0-85f4957cd7cd6c517f6941af1289d8ae b/sql/hive/src/test/resources/golden/udf_degrees-0-85f4957cd7cd6c517f6941af1289d8ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into1-0-ae5ea07929262bde22fbe7ebe80d4992 b/sql/hive/src/test/resources/golden/udf_degrees-1-f24ce67606944e23a4adc79f91cf0c17
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into1-0-ae5ea07929262bde22fbe7ebe80d4992
rename to sql/hive/src/test/resources/golden/udf_degrees-1-f24ce67606944e23a4adc79f91cf0c17
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-1-aabc6065a03b7da809376cc127af47d7 b/sql/hive/src/test/resources/golden/udf_degrees-2-aaee5dd4e87eaae3e65a585e07f1a3e4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-1-aabc6065a03b7da809376cc127af47d7
rename to sql/hive/src/test/resources/golden/udf_degrees-2-aaee5dd4e87eaae3e65a585e07f1a3e4
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-2-2fd3a55901fe765f8f154531a7f5fd6b b/sql/hive/src/test/resources/golden/udf_degrees-3-2fd3a55901fe765f8f154531a7f5fd6b
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-2-2fd3a55901fe765f8f154531a7f5fd6b
rename to sql/hive/src/test/resources/golden/udf_degrees-3-2fd3a55901fe765f8f154531a7f5fd6b
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-3-42f653c3c3cc3c94bb9ab9c5a4d1ca96 b/sql/hive/src/test/resources/golden/udf_degrees-4-42f653c3c3cc3c94bb9ab9c5a4d1ca96
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-3-42f653c3c3cc3c94bb9ab9c5a4d1ca96
rename to sql/hive/src/test/resources/golden/udf_degrees-4-42f653c3c3cc3c94bb9ab9c5a4d1ca96
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-4-85f4957cd7cd6c517f6941af1289d8ae b/sql/hive/src/test/resources/golden/udf_degrees-4-85f4957cd7cd6c517f6941af1289d8ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into1-1-f1699bc0ef0a84dd9c23ccff37e13d7b b/sql/hive/src/test/resources/golden/udf_degrees-5-3a6468b02be2605c91b31987e76fb9a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into1-1-f1699bc0ef0a84dd9c23ccff37e13d7b
rename to sql/hive/src/test/resources/golden/udf_degrees-5-3a6468b02be2605c91b31987e76fb9a8
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-5-aabc6065a03b7da809376cc127af47d7 b/sql/hive/src/test/resources/golden/udf_degrees-6-aaee5dd4e87eaae3e65a585e07f1a3e4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-5-aabc6065a03b7da809376cc127af47d7
rename to sql/hive/src/test/resources/golden/udf_degrees-6-aaee5dd4e87eaae3e65a585e07f1a3e4
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-6-2fd3a55901fe765f8f154531a7f5fd6b b/sql/hive/src/test/resources/golden/udf_degrees-7-2fd3a55901fe765f8f154531a7f5fd6b
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-6-2fd3a55901fe765f8f154531a7f5fd6b
rename to sql/hive/src/test/resources/golden/udf_degrees-7-2fd3a55901fe765f8f154531a7f5fd6b
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-7-42f653c3c3cc3c94bb9ab9c5a4d1ca96 b/sql/hive/src/test/resources/golden/udf_degrees-8-42f653c3c3cc3c94bb9ab9c5a4d1ca96
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-7-42f653c3c3cc3c94bb9ab9c5a4d1ca96
rename to sql/hive/src/test/resources/golden/udf_degrees-8-42f653c3c3cc3c94bb9ab9c5a4d1ca96
diff --git a/sql/hive/src/test/resources/golden/udf_div-0-31d31c7d5c544327dabfd874c88314db b/sql/hive/src/test/resources/golden/udf_div-0-31d31c7d5c544327dabfd874c88314db
deleted file mode 100644
index d98718752a36f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_div-0-31d31c7d5c544327dabfd874c88314db
+++ /dev/null
@@ -1 +0,0 @@
-a div b - Divide a by b rounded to the long integer
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-4-e90740a9a52c37a964ee204379f01412 b/sql/hive/src/test/resources/golden/udf_div-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_decode_name-4-e90740a9a52c37a964ee204379f01412
rename to sql/hive/src/test/resources/golden/udf_div-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_div-1-31d31c7d5c544327dabfd874c88314db b/sql/hive/src/test/resources/golden/udf_div-1-31d31c7d5c544327dabfd874c88314db
new file mode 100644
index 0000000000000..b02a7003ce768
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_div-1-31d31c7d5c544327dabfd874c88314db
@@ -0,0 +1 @@
+a div b - Divide a by b rounded to the long integer
diff --git a/sql/hive/src/test/resources/golden/udf_div-1-f23a07b1c6b1a98b303863188c10a8d8 b/sql/hive/src/test/resources/golden/udf_div-1-f23a07b1c6b1a98b303863188c10a8d8
deleted file mode 100644
index 59265a74f9b3f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_div-1-f23a07b1c6b1a98b303863188c10a8d8
+++ /dev/null
@@ -1,4 +0,0 @@
-a div b - Divide a by b rounded to the long integer
-Example:
-  > SELECT 3 div 2 FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_div-2-c71acf88a51fc6d2b23bbb91db2d7b b/sql/hive/src/test/resources/golden/udf_div-2-c71acf88a51fc6d2b23bbb91db2d7b
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_div-2-c71acf88a51fc6d2b23bbb91db2d7b
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_div-2-f23a07b1c6b1a98b303863188c10a8d8 b/sql/hive/src/test/resources/golden/udf_div-2-f23a07b1c6b1a98b303863188c10a8d8
new file mode 100644
index 0000000000000..6b79eff6e6092
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_div-2-f23a07b1c6b1a98b303863188c10a8d8
@@ -0,0 +1,4 @@
+a div b - Divide a by b rounded to the long integer
+Example:
+  > SELECT 3 div 2 FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/remote_script-1-e168f471980470d93b790702a70238fa b/sql/hive/src/test/resources/golden/udf_div-3-5111340caad64e36370d9d4bc4db5f27
similarity index 100%
rename from sql/hive/src/test/resources/golden/remote_script-1-e168f471980470d93b790702a70238fa
rename to sql/hive/src/test/resources/golden/udf_div-3-5111340caad64e36370d9d4bc4db5f27
diff --git a/sql/hive/src/test/resources/golden/udf_divide-0-1af8b249439ee5b7d4978c31669bc208 b/sql/hive/src/test/resources/golden/udf_divide-0-1af8b249439ee5b7d4978c31669bc208
deleted file mode 100644
index 8b623e47785f6..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_divide-0-1af8b249439ee5b7d4978c31669bc208
+++ /dev/null
@@ -1 +0,0 @@
-a / b - Divide a by b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_divide-1-fa932664bae88683a222b71ac45fb840 b/sql/hive/src/test/resources/golden/udf_divide-1-fa932664bae88683a222b71ac45fb840
deleted file mode 100644
index 2acf2b6a64648..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_divide-1-fa932664bae88683a222b71ac45fb840
+++ /dev/null
@@ -1,4 +0,0 @@
-a / b - Divide a by b
-Example:
-  > SELECT 3 / 2 FROM src LIMIT 1;
-  1.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_divide-2-ce54773b1babe6dde982e4e91ebaeb50 b/sql/hive/src/test/resources/golden/udf_divide-2-ce54773b1babe6dde982e4e91ebaeb50
deleted file mode 100644
index 400122e60f599..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_divide-2-ce54773b1babe6dde982e4e91ebaeb50
+++ /dev/null
@@ -1 +0,0 @@
-1.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_double-0-aa32d73a99587cae2f1efd9a2094d617 b/sql/hive/src/test/resources/golden/udf_double-0-aa32d73a99587cae2f1efd9a2094d617
index 54de9e9f8edec..1d0a61f1b635f 100644
--- a/sql/hive/src/test/resources/golden/udf_double-0-aa32d73a99587cae2f1efd9a2094d617
+++ b/sql/hive/src/test/resources/golden/udf_double-0-aa32d73a99587cae2f1efd9a2094d617
@@ -1 +1 @@
-There is no documentation for function 'double'
\ No newline at end of file
+There is no documentation for function 'double'
diff --git a/sql/hive/src/test/resources/golden/udf_double-1-79380157cbd6624d760335f8291e6fb4 b/sql/hive/src/test/resources/golden/udf_double-1-79380157cbd6624d760335f8291e6fb4
index 54de9e9f8edec..1d0a61f1b635f 100644
--- a/sql/hive/src/test/resources/golden/udf_double-1-79380157cbd6624d760335f8291e6fb4
+++ b/sql/hive/src/test/resources/golden/udf_double-1-79380157cbd6624d760335f8291e6fb4
@@ -1 +1 @@
-There is no documentation for function 'double'
\ No newline at end of file
+There is no documentation for function 'double'
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-8-bff58433eb2b500bb1d2f6ea495a5f20 b/sql/hive/src/test/resources/golden/udf_elt-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_decode_name-8-bff58433eb2b500bb1d2f6ea495a5f20
rename to sql/hive/src/test/resources/golden/udf_elt-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_elt-0-b46b060da76d1772db998c26a62a608f b/sql/hive/src/test/resources/golden/udf_elt-1-b46b060da76d1772db998c26a62a608f
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_elt-0-b46b060da76d1772db998c26a62a608f
rename to sql/hive/src/test/resources/golden/udf_elt-1-b46b060da76d1772db998c26a62a608f
diff --git a/sql/hive/src/test/resources/golden/udf_elt-2-5b58f1cfb0392452bf5c28a37d51508a b/sql/hive/src/test/resources/golden/udf_elt-2-5b58f1cfb0392452bf5c28a37d51508a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_elt-1-e9f1bf17ad2a6f7bf3e40798ceebdbf4 b/sql/hive/src/test/resources/golden/udf_elt-2-e9f1bf17ad2a6f7bf3e40798ceebdbf4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_elt-1-e9f1bf17ad2a6f7bf3e40798ceebdbf4
rename to sql/hive/src/test/resources/golden/udf_elt-2-e9f1bf17ad2a6f7bf3e40798ceebdbf4
diff --git a/sql/hive/src/test/resources/golden/insert_into1-10-c260979323c1ebdf68c6fbe003d43792 b/sql/hive/src/test/resources/golden/udf_elt-3-c2554fac72a2a51bb33faae16aec3507
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into1-10-c260979323c1ebdf68c6fbe003d43792
rename to sql/hive/src/test/resources/golden/udf_elt-3-c2554fac72a2a51bb33faae16aec3507
diff --git a/sql/hive/src/test/resources/golden/udf_elt-3-f3be980cf4fa166f299c6ec79e981814 b/sql/hive/src/test/resources/golden/udf_elt-4-533ad9c703c320a6556c09dd5f9ac351
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_elt-3-f3be980cf4fa166f299c6ec79e981814
rename to sql/hive/src/test/resources/golden/udf_elt-4-533ad9c703c320a6556c09dd5f9ac351
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_equal-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_special_char-2-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_equal-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_equal-1-36b6cdf7c5f68c91155569b1622f5876 b/sql/hive/src/test/resources/golden/udf_equal-1-36b6cdf7c5f68c91155569b1622f5876
new file mode 100644
index 0000000000000..9b9b6312a269a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_equal-1-36b6cdf7c5f68c91155569b1622f5876
@@ -0,0 +1 @@
+a = b - Returns TRUE if a equals b and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_equal-2-2422b50b96502dde8b661acdfebd8892 b/sql/hive/src/test/resources/golden/udf_equal-2-2422b50b96502dde8b661acdfebd8892
new file mode 100644
index 0000000000000..30fdf50f62e4e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_equal-2-2422b50b96502dde8b661acdfebd8892
@@ -0,0 +1,2 @@
+a = b - Returns TRUE if a equals b and false otherwise
+Synonyms: ==
diff --git a/sql/hive/src/test/resources/golden/udf_equal-3-e0faab0f5e736c24bcc5503aeac55053 b/sql/hive/src/test/resources/golden/udf_equal-3-e0faab0f5e736c24bcc5503aeac55053
new file mode 100644
index 0000000000000..d6b4c860778b7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_equal-3-e0faab0f5e736c24bcc5503aeac55053
@@ -0,0 +1 @@
+a == b - Returns TRUE if a equals b and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_equal-4-39d8d6f197803de927f0af5409ec2f33 b/sql/hive/src/test/resources/golden/udf_equal-4-39d8d6f197803de927f0af5409ec2f33
new file mode 100644
index 0000000000000..71e55d6d638a6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_equal-4-39d8d6f197803de927f0af5409ec2f33
@@ -0,0 +1,2 @@
+a == b - Returns TRUE if a equals b and false otherwise
+Synonyms: =
diff --git a/sql/hive/src/test/resources/golden/udf_equal-5-ee018fc3267dbdd55b60ed4e6f56c9ca b/sql/hive/src/test/resources/golden/udf_equal-5-ee018fc3267dbdd55b60ed4e6f56c9ca
new file mode 100644
index 0000000000000..015c417bc68f0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_equal-5-ee018fc3267dbdd55b60ed4e6f56c9ca
@@ -0,0 +1 @@
+false	false	true	true	NULL	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_equal-6-878650cf21e9360a07d204c8ffb0cde7 b/sql/hive/src/test/resources/golden/udf_equal-6-878650cf21e9360a07d204c8ffb0cde7
new file mode 100644
index 0000000000000..aa7b4b51edea7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_equal-6-878650cf21e9360a07d204c8ffb0cde7
@@ -0,0 +1 @@
+a <=> b - Returns same result with EQUAL(=) operator for non-null operands, but returns TRUE if both are NULL, FALSE if one of the them is NULL
diff --git a/sql/hive/src/test/resources/golden/udf_equal-7-1635ef051fecdfc7891d9f5a9a3a545e b/sql/hive/src/test/resources/golden/udf_equal-7-1635ef051fecdfc7891d9f5a9a3a545e
new file mode 100644
index 0000000000000..aa7b4b51edea7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_equal-7-1635ef051fecdfc7891d9f5a9a3a545e
@@ -0,0 +1 @@
+a <=> b - Returns same result with EQUAL(=) operator for non-null operands, but returns TRUE if both are NULL, FALSE if one of the them is NULL
diff --git a/sql/hive/src/test/resources/golden/udf_equal-8-276101b04b10b7cd6d59061a8cbf42d2 b/sql/hive/src/test/resources/golden/udf_equal-8-276101b04b10b7cd6d59061a8cbf42d2
new file mode 100644
index 0000000000000..05292fb23192d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_equal-8-276101b04b10b7cd6d59061a8cbf42d2
@@ -0,0 +1 @@
+false	false	true	true	true	false	false	false	false
diff --git a/sql/hive/src/test/resources/golden/udf_exp-0-814f16e1acabe30740d7b815e4b5cc3a b/sql/hive/src/test/resources/golden/udf_exp-0-814f16e1acabe30740d7b815e4b5cc3a
index 9b3dbf316d1fb..2a5080be93ac4 100644
--- a/sql/hive/src/test/resources/golden/udf_exp-0-814f16e1acabe30740d7b815e4b5cc3a
+++ b/sql/hive/src/test/resources/golden/udf_exp-0-814f16e1acabe30740d7b815e4b5cc3a
@@ -1 +1 @@
-exp(x) - Returns e to the power of x
\ No newline at end of file
+exp(x) - Returns e to the power of x
diff --git a/sql/hive/src/test/resources/golden/udf_exp-1-d10d879c74951e9a1f1717cb1a2488c6 b/sql/hive/src/test/resources/golden/udf_exp-1-d10d879c74951e9a1f1717cb1a2488c6
index a42c95bb8d213..b5a4d037f4014 100644
--- a/sql/hive/src/test/resources/golden/udf_exp-1-d10d879c74951e9a1f1717cb1a2488c6
+++ b/sql/hive/src/test/resources/golden/udf_exp-1-d10d879c74951e9a1f1717cb1a2488c6
@@ -1,4 +1,4 @@
 exp(x) - Returns e to the power of x
 Example:
    > SELECT exp(0) FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_field-0-277b4a4dcb38cabb6df50147c77e0a33 b/sql/hive/src/test/resources/golden/udf_field-0-277b4a4dcb38cabb6df50147c77e0a33
deleted file mode 100644
index a30bc26f5ba58..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-0-277b4a4dcb38cabb6df50147c77e0a33
+++ /dev/null
@@ -1 +0,0 @@
-field(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_field-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_special_char-3-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_field-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_field-1-277b4a4dcb38cabb6df50147c77e0a33 b/sql/hive/src/test/resources/golden/udf_field-1-277b4a4dcb38cabb6df50147c77e0a33
new file mode 100644
index 0000000000000..2e6133785ac7c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-1-277b4a4dcb38cabb6df50147c77e0a33
@@ -0,0 +1 @@
+field(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found
diff --git a/sql/hive/src/test/resources/golden/udf_field-1-379d8580693493f620a8f4084709324f b/sql/hive/src/test/resources/golden/udf_field-1-379d8580693493f620a8f4084709324f
deleted file mode 100644
index bb55c0a1db1f5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-1-379d8580693493f620a8f4084709324f
+++ /dev/null
@@ -1,2 +0,0 @@
-field(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found
-All primitive types are supported, arguments are compared using str.equals(x). If str is NULL, the return value is 0.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_field-10-7982ea72163dbc4cd45f53454edf66c8 b/sql/hive/src/test/resources/golden/udf_field-10-7982ea72163dbc4cd45f53454edf66c8
deleted file mode 100644
index 275f46482425d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-10-7982ea72163dbc4cd45f53454edf66c8
+++ /dev/null
@@ -1,2 +0,0 @@
-86	val_86	0	0	2	1	1	0	0
-66	val_66	1	1	0	0	0	0	0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into1-12-ae5ea07929262bde22fbe7ebe80d4992 b/sql/hive/src/test/resources/golden/udf_field-10-ca9db7e6bb687606bc273d1f6c191035
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into1-12-ae5ea07929262bde22fbe7ebe80d4992
rename to sql/hive/src/test/resources/golden/udf_field-10-ca9db7e6bb687606bc273d1f6c191035
diff --git a/sql/hive/src/test/resources/golden/udf_field-11-7982ea72163dbc4cd45f53454edf66c8 b/sql/hive/src/test/resources/golden/udf_field-11-7982ea72163dbc4cd45f53454edf66c8
new file mode 100644
index 0000000000000..a13456f1bfdda
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-11-7982ea72163dbc4cd45f53454edf66c8
@@ -0,0 +1,2 @@
+86	val_86	0	0	2	1	1	0	0
+66	val_66	1	1	0	0	0	0	0
diff --git a/sql/hive/src/test/resources/golden/udf_field-2-379d8580693493f620a8f4084709324f b/sql/hive/src/test/resources/golden/udf_field-2-379d8580693493f620a8f4084709324f
new file mode 100644
index 0000000000000..f9d418fee7b53
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-2-379d8580693493f620a8f4084709324f
@@ -0,0 +1,2 @@
+field(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found
+All primitive types are supported, arguments are compared using str.equals(x). If str is NULL, the return value is 0.
diff --git a/sql/hive/src/test/resources/golden/udf_field-2-d2c6583a79d77aabe388a52ec164c38b b/sql/hive/src/test/resources/golden/udf_field-2-d2c6583a79d77aabe388a52ec164c38b
deleted file mode 100644
index c42823854fb0f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-2-d2c6583a79d77aabe388a52ec164c38b
+++ /dev/null
@@ -1 +0,0 @@
-0	0	0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/div-0-5e7fc5719c7265bc1d6af52005ebad03 b/sql/hive/src/test/resources/golden/udf_field-3-5960d42257b272f6ba043406229bbf26
similarity index 100%
rename from sql/hive/src/test/resources/golden/div-0-5e7fc5719c7265bc1d6af52005ebad03
rename to sql/hive/src/test/resources/golden/udf_field-3-5960d42257b272f6ba043406229bbf26
diff --git a/sql/hive/src/test/resources/golden/udf_field-3-fea09e934696af40bb604b40225bbc98 b/sql/hive/src/test/resources/golden/udf_field-3-fea09e934696af40bb604b40225bbc98
deleted file mode 100644
index 5869234249808..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-3-fea09e934696af40bb604b40225bbc98
+++ /dev/null
@@ -1 +0,0 @@
-1	2	3	4	4
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_field-4-212d8b2297bf6a3311d24d68de67b5c6 b/sql/hive/src/test/resources/golden/udf_field-4-212d8b2297bf6a3311d24d68de67b5c6
new file mode 100644
index 0000000000000..e5449f0bfa473
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-4-212d8b2297bf6a3311d24d68de67b5c6
@@ -0,0 +1 @@
+1	2	3	4	4
diff --git a/sql/hive/src/test/resources/golden/udf_field-4-b0815d34893d6cba8c07d0a0721c1d29 b/sql/hive/src/test/resources/golden/udf_field-4-b0815d34893d6cba8c07d0a0721c1d29
deleted file mode 100644
index 5869234249808..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-4-b0815d34893d6cba8c07d0a0721c1d29
+++ /dev/null
@@ -1 +0,0 @@
-1	2	3	4	4
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_field-5-2d7c5cbe891c4a9dda34f9842f8e0828 b/sql/hive/src/test/resources/golden/udf_field-5-2d7c5cbe891c4a9dda34f9842f8e0828
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_field-5-949c2de113b214d83734c0c177f04b6b b/sql/hive/src/test/resources/golden/udf_field-5-949c2de113b214d83734c0c177f04b6b
new file mode 100644
index 0000000000000..e5449f0bfa473
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-5-949c2de113b214d83734c0c177f04b6b
@@ -0,0 +1 @@
+1	2	3	4	4
diff --git a/sql/hive/src/test/resources/golden/insert_into1-2-ff6a1b25c911def274921df1bae476b7 b/sql/hive/src/test/resources/golden/udf_field-6-2d7c5cbe891c4a9dda34f9842f8e0828
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into1-2-ff6a1b25c911def274921df1bae476b7
rename to sql/hive/src/test/resources/golden/udf_field-6-2d7c5cbe891c4a9dda34f9842f8e0828
diff --git a/sql/hive/src/test/resources/golden/udf_field-6-de02aaf3bbb137ba032810bb9ad7a3a3 b/sql/hive/src/test/resources/golden/udf_field-6-de02aaf3bbb137ba032810bb9ad7a3a3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into1-3-6687f7d58bd1faf1c2ee4f52f03ce048 b/sql/hive/src/test/resources/golden/udf_field-7-3fd8b0c333acdf28c676315b03e2e10
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into1-3-6687f7d58bd1faf1c2ee4f52f03ce048
rename to sql/hive/src/test/resources/golden/udf_field-7-3fd8b0c333acdf28c676315b03e2e10
diff --git a/sql/hive/src/test/resources/golden/udf_field-7-6aa3518e9f55299754521e959e9376ef b/sql/hive/src/test/resources/golden/udf_field-7-6aa3518e9f55299754521e959e9376ef
deleted file mode 100644
index a76563207da24..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-7-6aa3518e9f55299754521e959e9376ef
+++ /dev/null
@@ -1,2 +0,0 @@
-86	val_86	0	0	2	0	0	0	0	0	2	0
-66	val_66	1	1	0	0	0	1	0	0	2	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_field-8-66dc6c81db0ac9b2075783b0d8976083 b/sql/hive/src/test/resources/golden/udf_field-8-66dc6c81db0ac9b2075783b0d8976083
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_field-8-6aa3518e9f55299754521e959e9376ef b/sql/hive/src/test/resources/golden/udf_field-8-6aa3518e9f55299754521e959e9376ef
new file mode 100644
index 0000000000000..9af9d61b8b135
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-8-6aa3518e9f55299754521e959e9376ef
@@ -0,0 +1,2 @@
+86	val_86	0	0	2	0	0	0	0	0	2	0
+66	val_66	1	1	0	0	0	1	0	0	2	2
diff --git a/sql/hive/src/test/resources/golden/insert_into1-5-d47a5e2ff879b37c8b6ca948ed47b7d2 b/sql/hive/src/test/resources/golden/udf_field-9-66dc6c81db0ac9b2075783b0d8976083
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into1-5-d47a5e2ff879b37c8b6ca948ed47b7d2
rename to sql/hive/src/test/resources/golden/udf_field-9-66dc6c81db0ac9b2075783b0d8976083
diff --git a/sql/hive/src/test/resources/golden/udf_field-9-f053f2d16812aa60b6dd1cab61e90a95 b/sql/hive/src/test/resources/golden/udf_field-9-f053f2d16812aa60b6dd1cab61e90a95
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-0-18d3e88b18c18a00598146a3307276f2 b/sql/hive/src/test/resources/golden/udf_find_in_set-0-18d3e88b18c18a00598146a3307276f2
deleted file mode 100644
index f14679978b79e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-0-18d3e88b18c18a00598146a3307276f2
+++ /dev/null
@@ -1 +0,0 @@
-find_in_set(str,str_array) - Returns the first occurrence  of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/udf_find_in_set-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat11-0-66ee62178e3576fb38cb09800cb610bf
rename to sql/hive/src/test/resources/golden/udf_find_in_set-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-1-18d3e88b18c18a00598146a3307276f2 b/sql/hive/src/test/resources/golden/udf_find_in_set-1-18d3e88b18c18a00598146a3307276f2
new file mode 100644
index 0000000000000..342fb7fad55b0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-1-18d3e88b18c18a00598146a3307276f2
@@ -0,0 +1 @@
+find_in_set(str,str_array) - Returns the first occurrence  of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas.
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-1-5fb7a7a1725749dc3853eb80fba19246 b/sql/hive/src/test/resources/golden/udf_find_in_set-1-5fb7a7a1725749dc3853eb80fba19246
deleted file mode 100644
index 1bebc68416bff..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-1-5fb7a7a1725749dc3853eb80fba19246
+++ /dev/null
@@ -1,7 +0,0 @@
-find_in_set(str,str_array) - Returns the first occurrence  of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas.
-Example:
-  > SELECT find_in_set('ab','abc,b,ab,c,def') FROM src LIMIT 1;
-  3
-  > SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,956')=0;
-  311  val_311
-  128
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-10-16355c6b7e169b3c0ef506c149c6853c b/sql/hive/src/test/resources/golden/udf_find_in_set-10-16355c6b7e169b3c0ef506c149c6853c
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-10-16355c6b7e169b3c0ef506c149c6853c
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-0-df3f6ff9fdf525a7c617e4c33d5c81a4 b/sql/hive/src/test/resources/golden/udf_find_in_set-10-df21f44247d7275a292520c1605c4aab
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-0-df3f6ff9fdf525a7c617e4c33d5c81a4
rename to sql/hive/src/test/resources/golden/udf_find_in_set-10-df21f44247d7275a292520c1605c4aab
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-11-5a8515684c458d3fffea539a3d170e3a b/sql/hive/src/test/resources/golden/udf_find_in_set-11-5a8515684c458d3fffea539a3d170e3a
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-11-5a8515684c458d3fffea539a3d170e3a
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/scriptfile1-2-b08adb4c792bd84b426a7f0bb9e835a5 b/sql/hive/src/test/resources/golden/udf_find_in_set-11-91f8c37820f31d0d1b16029a59a185ad
similarity index 100%
rename from sql/hive/src/test/resources/golden/scriptfile1-2-b08adb4c792bd84b426a7f0bb9e835a5
rename to sql/hive/src/test/resources/golden/udf_find_in_set-11-91f8c37820f31d0d1b16029a59a185ad
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-12-3fb21e2befb41ba72a1bbffa645c1e3 b/sql/hive/src/test/resources/golden/udf_find_in_set-12-3fb21e2befb41ba72a1bbffa645c1e3
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-12-3fb21e2befb41ba72a1bbffa645c1e3
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/test_boolean_whereclause-2-183920d856ad75e6d1e15121d3cd7364 b/sql/hive/src/test/resources/golden/udf_find_in_set-12-692f41c998bbc6bec0f374eae4356739
similarity index 100%
rename from sql/hive/src/test/resources/golden/test_boolean_whereclause-2-183920d856ad75e6d1e15121d3cd7364
rename to sql/hive/src/test/resources/golden/udf_find_in_set-12-692f41c998bbc6bec0f374eae4356739
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-13-2c986a80620c9238e1f663fc591760a b/sql/hive/src/test/resources/golden/udf_find_in_set-13-2c986a80620c9238e1f663fc591760a
deleted file mode 100644
index e440e5c842586..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-13-2c986a80620c9238e1f663fc591760a
+++ /dev/null
@@ -1 +0,0 @@
-3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-13-45e5ae8f60de2c41f189db7922a04917 b/sql/hive/src/test/resources/golden/udf_find_in_set-13-45e5ae8f60de2c41f189db7922a04917
new file mode 100644
index 0000000000000..0cfbf08886fca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-13-45e5ae8f60de2c41f189db7922a04917
@@ -0,0 +1 @@
+2
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-14-189def133b1871ce8345a8123811a6b5 b/sql/hive/src/test/resources/golden/udf_find_in_set-14-189def133b1871ce8345a8123811a6b5
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-14-189def133b1871ce8345a8123811a6b5
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/decimal_2-27-a5ea3949eb5ab338916e4316c676c7f6 b/sql/hive/src/test/resources/golden/udf_find_in_set-14-8e410ecfad2d408ad7d2554ccd3a6621
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-27-a5ea3949eb5ab338916e4316c676c7f6
rename to sql/hive/src/test/resources/golden/udf_find_in_set-14-8e410ecfad2d408ad7d2554ccd3a6621
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-15-671bff8f50feea55015a8412fc6e5ceb b/sql/hive/src/test/resources/golden/udf_find_in_set-15-671bff8f50feea55015a8412fc6e5ceb
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-15-671bff8f50feea55015a8412fc6e5ceb
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/udf_find_in_set-15-c50e6ff95c05bfa854b33b03db858cd9
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat12-0-66ee62178e3576fb38cb09800cb610bf
rename to sql/hive/src/test/resources/golden/udf_find_in_set-15-c50e6ff95c05bfa854b33b03db858cd9
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/udf_find_in_set-16-8e17f41ae6e8b1075af4790a8fd88e13
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat13-0-66ee62178e3576fb38cb09800cb610bf
rename to sql/hive/src/test/resources/golden/udf_find_in_set-16-8e17f41ae6e8b1075af4790a8fd88e13
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-16-d5d22082588c5fc30ef502237c5797f4 b/sql/hive/src/test/resources/golden/udf_find_in_set-16-d5d22082588c5fc30ef502237c5797f4
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-16-d5d22082588c5fc30ef502237c5797f4
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-17-5b556a29e1685605bcc47bce60cf66c8 b/sql/hive/src/test/resources/golden/udf_find_in_set-17-5b556a29e1685605bcc47bce60cf66c8
deleted file mode 100644
index 99f516951ae7d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-17-5b556a29e1685605bcc47bce60cf66c8
+++ /dev/null
@@ -1,2 +0,0 @@
-311	val_311
-128	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/udf_find_in_set-17-fe61f992f2d971d006155bdec3143803
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
rename to sql/hive/src/test/resources/golden/udf_find_in_set-17-fe61f992f2d971d006155bdec3143803
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-18-5b556a29e1685605bcc47bce60cf66c8 b/sql/hive/src/test/resources/golden/udf_find_in_set-18-5b556a29e1685605bcc47bce60cf66c8
new file mode 100644
index 0000000000000..01228944b05a5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-18-5b556a29e1685605bcc47bce60cf66c8
@@ -0,0 +1,2 @@
+311	val_311
+128	
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-2-5fb7a7a1725749dc3853eb80fba19246 b/sql/hive/src/test/resources/golden/udf_find_in_set-2-5fb7a7a1725749dc3853eb80fba19246
new file mode 100644
index 0000000000000..d856144af1f86
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-2-5fb7a7a1725749dc3853eb80fba19246
@@ -0,0 +1,7 @@
+find_in_set(str,str_array) - Returns the first occurrence  of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas.
+Example:
+  > SELECT find_in_set('ab','abc,b,ab,c,def') FROM src LIMIT 1;
+  3
+  > SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,956')=0;
+  311  val_311
+  128
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-2-b3823bdc04a7f98951b55c3e30d2a772 b/sql/hive/src/test/resources/golden/udf_find_in_set-2-b3823bdc04a7f98951b55c3e30d2a772
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-3-132b7bc7812db7683eb3bff607275d0e b/sql/hive/src/test/resources/golden/udf_find_in_set-3-132b7bc7812db7683eb3bff607275d0e
deleted file mode 100644
index 5817cbcff62ac..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-3-132b7bc7812db7683eb3bff607275d0e
+++ /dev/null
@@ -1,25 +0,0 @@
-1
-NULL
-1
-NULL
-NULL
-NULL
-1
-1
-1
-NULL
-NULL
-NULL
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-NULL
-NULL
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into1-6-4b2e32fe57b2297d22bccb4656bdee30 b/sql/hive/src/test/resources/golden/udf_find_in_set-3-b3823bdc04a7f98951b55c3e30d2a772
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into1-6-4b2e32fe57b2297d22bccb4656bdee30
rename to sql/hive/src/test/resources/golden/udf_find_in_set-3-b3823bdc04a7f98951b55c3e30d2a772
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-4-132b7bc7812db7683eb3bff607275d0e b/sql/hive/src/test/resources/golden/udf_find_in_set-4-132b7bc7812db7683eb3bff607275d0e
new file mode 100644
index 0000000000000..df725cb2c6aa7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-4-132b7bc7812db7683eb3bff607275d0e
@@ -0,0 +1,25 @@
+1
+NULL
+1
+NULL
+NULL
+NULL
+1
+1
+1
+NULL
+NULL
+NULL
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+NULL
+NULL
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-4-a35471c87ba597a6d3c7c880704cac0b b/sql/hive/src/test/resources/golden/udf_find_in_set-4-a35471c87ba597a6d3c7c880704cac0b
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-4-a35471c87ba597a6d3c7c880704cac0b
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-1-86bb7f03311e7ea8bda76f24412bc9f3 b/sql/hive/src/test/resources/golden/udf_find_in_set-5-6f25b5bba89e1fcae171f5d595acc4ee
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-1-86bb7f03311e7ea8bda76f24412bc9f3
rename to sql/hive/src/test/resources/golden/udf_find_in_set-5-6f25b5bba89e1fcae171f5d595acc4ee
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-5-ddaa3551dffb1169b2fbb671f373b82f b/sql/hive/src/test/resources/golden/udf_find_in_set-5-ddaa3551dffb1169b2fbb671f373b82f
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-5-ddaa3551dffb1169b2fbb671f373b82f
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-6-591e070365f19c65e453b98b88f5f823 b/sql/hive/src/test/resources/golden/udf_find_in_set-6-591e070365f19c65e453b98b88f5f823
deleted file mode 100644
index e440e5c842586..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-6-591e070365f19c65e453b98b88f5f823
+++ /dev/null
@@ -1 +0,0 @@
-3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-6-7bf387b94afb755faca4ad73bb7c42ba b/sql/hive/src/test/resources/golden/udf_find_in_set-6-7bf387b94afb755faca4ad73bb7c42ba
new file mode 100644
index 0000000000000..0cfbf08886fca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-6-7bf387b94afb755faca4ad73bb7c42ba
@@ -0,0 +1 @@
+2
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-7-72d05b5cf99388d539adec38c40978c3 b/sql/hive/src/test/resources/golden/udf_find_in_set-7-72d05b5cf99388d539adec38c40978c3
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-7-72d05b5cf99388d539adec38c40978c3
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/decimal_2-28-4a5410f96c6ef0843f12b0f593c104b1 b/sql/hive/src/test/resources/golden/udf_find_in_set-7-730d5e95ef748ad946eceefbcd633826
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-28-4a5410f96c6ef0843f12b0f593c104b1
rename to sql/hive/src/test/resources/golden/udf_find_in_set-7-730d5e95ef748ad946eceefbcd633826
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-8-780771cad9bec96a216aea8ab293c941 b/sql/hive/src/test/resources/golden/udf_find_in_set-8-780771cad9bec96a216aea8ab293c941
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-8-780771cad9bec96a216aea8ab293c941
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/udf_find_in_set-8-ea11724531f191940e455d13878a0e69
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-1-ffe97dc8c1df3195982e38263fbe8717
rename to sql/hive/src/test/resources/golden/udf_find_in_set-8-ea11724531f191940e455d13878a0e69
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-0-cbe030be095a93a9ae810ce7e66bdca7 b/sql/hive/src/test/resources/golden/udf_find_in_set-9-81f9999ed1b063ce7f17d89bd0162777
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_double-0-cbe030be095a93a9ae810ce7e66bdca7
rename to sql/hive/src/test/resources/golden/udf_find_in_set-9-81f9999ed1b063ce7f17d89bd0162777
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-9-d59f5aabe1ea0963f9328065c699d175 b/sql/hive/src/test/resources/golden/udf_find_in_set-9-d59f5aabe1ea0963f9328065c699d175
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-9-d59f5aabe1ea0963f9328065c699d175
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_float-0-7987032f1c9dcad07001445f3ca1f7a7 b/sql/hive/src/test/resources/golden/udf_float-0-7987032f1c9dcad07001445f3ca1f7a7
index 9d15b5f5956d7..467e25bc261c2 100644
--- a/sql/hive/src/test/resources/golden/udf_float-0-7987032f1c9dcad07001445f3ca1f7a7
+++ b/sql/hive/src/test/resources/golden/udf_float-0-7987032f1c9dcad07001445f3ca1f7a7
@@ -1 +1 @@
-There is no documentation for function 'float'
\ No newline at end of file
+There is no documentation for function 'float'
diff --git a/sql/hive/src/test/resources/golden/udf_float-1-2abdfb4c67fe3aec2bc9cc128f407e6b b/sql/hive/src/test/resources/golden/udf_float-1-2abdfb4c67fe3aec2bc9cc128f407e6b
index 9d15b5f5956d7..467e25bc261c2 100644
--- a/sql/hive/src/test/resources/golden/udf_float-1-2abdfb4c67fe3aec2bc9cc128f407e6b
+++ b/sql/hive/src/test/resources/golden/udf_float-1-2abdfb4c67fe3aec2bc9cc128f407e6b
@@ -1 +1 @@
-There is no documentation for function 'float'
\ No newline at end of file
+There is no documentation for function 'float'
diff --git a/sql/hive/src/test/resources/golden/udf_floor-0-e35abe1d5534243e96d71bd0c28761d6 b/sql/hive/src/test/resources/golden/udf_floor-0-e35abe1d5534243e96d71bd0c28761d6
index c76710bfcc1b5..de1563b40b836 100644
--- a/sql/hive/src/test/resources/golden/udf_floor-0-e35abe1d5534243e96d71bd0c28761d6
+++ b/sql/hive/src/test/resources/golden/udf_floor-0-e35abe1d5534243e96d71bd0c28761d6
@@ -1 +1 @@
-floor(x) - Find the largest integer not greater than x
\ No newline at end of file
+floor(x) - Find the largest integer not greater than x
diff --git a/sql/hive/src/test/resources/golden/udf_floor-1-497a1ddbcf738aead319fde4f90f5248 b/sql/hive/src/test/resources/golden/udf_floor-1-497a1ddbcf738aead319fde4f90f5248
index 3f73eea16e183..ab6951202eb2a 100644
--- a/sql/hive/src/test/resources/golden/udf_floor-1-497a1ddbcf738aead319fde4f90f5248
+++ b/sql/hive/src/test/resources/golden/udf_floor-1-497a1ddbcf738aead319fde4f90f5248
@@ -3,4 +3,4 @@ Example:
   > SELECT floor(-0.1) FROM src LIMIT 1;
   -1
   > SELECT floor(5) FROM src LIMIT 1;
-  5
\ No newline at end of file
+  5
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-13-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/udf_format_number-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-13-3b0f76816be2c1b18a2058027a19bc9f
rename to sql/hive/src/test/resources/golden/udf_format_number-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-0-e86d559aeb84a4cc017a103182c22bfb b/sql/hive/src/test/resources/golden/udf_format_number-0-e86d559aeb84a4cc017a103182c22bfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-1-525f133cfff86d44afdeeda667c365a5 b/sql/hive/src/test/resources/golden/udf_format_number-1-525f133cfff86d44afdeeda667c365a5
deleted file mode 100644
index c981e1726c070..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-1-525f133cfff86d44afdeeda667c365a5
+++ /dev/null
@@ -1 +0,0 @@
-format_number(X, D) - Formats the number X to a format like '#,###,###.##', rounded to D decimal places, and returns the result as a string. If D is 0, the result has no decimal point or fractional part. This is supposed to function like MySQL's FORMAT
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into1-9-31eb4770dc60e4765065ac7f84811d1b b/sql/hive/src/test/resources/golden/udf_format_number-1-e86d559aeb84a4cc017a103182c22bfb
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into1-9-31eb4770dc60e4765065ac7f84811d1b
rename to sql/hive/src/test/resources/golden/udf_format_number-1-e86d559aeb84a4cc017a103182c22bfb
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-10-3bddca6913ea7e281e223b0603010b77 b/sql/hive/src/test/resources/golden/udf_format_number-10-3bddca6913ea7e281e223b0603010b77
new file mode 100644
index 0000000000000..8ee27a864b6dd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-10-3bddca6913ea7e281e223b0603010b77
@@ -0,0 +1 @@
+-9,223,372,036,854,775,807.0000000000	9,223,372,036,854,775,807.00000000000000000000	0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005	179,769,313,486,231,570,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-2-525f133cfff86d44afdeeda667c365a5 b/sql/hive/src/test/resources/golden/udf_format_number-2-525f133cfff86d44afdeeda667c365a5
new file mode 100644
index 0000000000000..14a40602519b8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-2-525f133cfff86d44afdeeda667c365a5
@@ -0,0 +1 @@
+format_number(X, D) - Formats the number X to a format like '#,###,###.##', rounded to D decimal places, and returns the result as a string. If D is 0, the result has no decimal point or fractional part. This is supposed to function like MySQL's FORMAT
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-2-591f302d5c1cd24e153a598fa0b352fd b/sql/hive/src/test/resources/golden/udf_format_number-2-591f302d5c1cd24e153a598fa0b352fd
deleted file mode 100644
index b2aa527ca0ddb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-2-591f302d5c1cd24e153a598fa0b352fd
+++ /dev/null
@@ -1,4 +0,0 @@
-format_number(X, D) - Formats the number X to a format like '#,###,###.##', rounded to D decimal places, and returns the result as a string. If D is 0, the result has no decimal point or fractional part. This is supposed to function like MySQL's FORMAT
-Example:
-  > SELECT format_number(12332.123456, 4) FROM src LIMIT 1;
-  '12,332.1235'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-3-591f302d5c1cd24e153a598fa0b352fd b/sql/hive/src/test/resources/golden/udf_format_number-3-591f302d5c1cd24e153a598fa0b352fd
new file mode 100644
index 0000000000000..def95a79e7375
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-3-591f302d5c1cd24e153a598fa0b352fd
@@ -0,0 +1,4 @@
+format_number(X, D) - Formats the number X to a format like '#,###,###.##', rounded to D decimal places, and returns the result as a string. If D is 0, the result has no decimal point or fractional part. This is supposed to function like MySQL's FORMAT
+Example:
+  > SELECT format_number(12332.123456, 4) FROM src LIMIT 1;
+  '12,332.1235'
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-3-c89564db1ab953e28b050b9740f2650c b/sql/hive/src/test/resources/golden/udf_format_number-3-c89564db1ab953e28b050b9740f2650c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-4-295d41a2146a27320c2be90499343260 b/sql/hive/src/test/resources/golden/udf_format_number-4-295d41a2146a27320c2be90499343260
deleted file mode 100644
index 89e118cc62bf3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-4-295d41a2146a27320c2be90499343260
+++ /dev/null
@@ -1 +0,0 @@
-12,332.1235	12,332.1000	12,332
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into2-0-957c6402df0fd7d2fccbc688e49e9661 b/sql/hive/src/test/resources/golden/udf_format_number-4-7969ffc4e80f7214a8eead8e1084368a
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into2-0-957c6402df0fd7d2fccbc688e49e9661
rename to sql/hive/src/test/resources/golden/udf_format_number-4-7969ffc4e80f7214a8eead8e1084368a
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-5-881f33f6727a30629bde6e4b178cf7d9 b/sql/hive/src/test/resources/golden/udf_format_number-5-881f33f6727a30629bde6e4b178cf7d9
deleted file mode 100644
index 33e21fa7dbfc4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-5-881f33f6727a30629bde6e4b178cf7d9
+++ /dev/null
@@ -1 +0,0 @@
-0.123456789000	12,345,678.12346	1,234,567.1234568	123,456
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-5-da5bf00d45d2bd758489f661caffd4dc b/sql/hive/src/test/resources/golden/udf_format_number-5-da5bf00d45d2bd758489f661caffd4dc
new file mode 100644
index 0000000000000..055b84b4b35d8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-5-da5bf00d45d2bd758489f661caffd4dc
@@ -0,0 +1 @@
+12,332.1235	12,332.1000	12,332
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-6-6dfca21d142652fec9017ba828a226c8 b/sql/hive/src/test/resources/golden/udf_format_number-6-6dfca21d142652fec9017ba828a226c8
new file mode 100644
index 0000000000000..9f12f9921318d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-6-6dfca21d142652fec9017ba828a226c8
@@ -0,0 +1 @@
+0.123456789000	12,345,678.12346	1,234,567.1234568	123,456
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-6-a6720a128716e179e18933992ca899b3 b/sql/hive/src/test/resources/golden/udf_format_number-6-a6720a128716e179e18933992ca899b3
deleted file mode 100644
index 07b05cd16f5a3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-6-a6720a128716e179e18933992ca899b3
+++ /dev/null
@@ -1 +0,0 @@
--123,456	-1,234,567.12	-0.123456789000000	-12,345.1235
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-7-31eda4b0f31406add3a61e2503590113 b/sql/hive/src/test/resources/golden/udf_format_number-7-31eda4b0f31406add3a61e2503590113
new file mode 100644
index 0000000000000..032768d688943
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-7-31eda4b0f31406add3a61e2503590113
@@ -0,0 +1 @@
+-123,456	-1,234,567.12	-0.123456789000000	-12,345.1235
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-7-84a460780828b0b9a2235314cfc24766 b/sql/hive/src/test/resources/golden/udf_format_number-7-84a460780828b0b9a2235314cfc24766
deleted file mode 100644
index 2b600a6a83aa8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-7-84a460780828b0b9a2235314cfc24766
+++ /dev/null
@@ -1 +0,0 @@
-0.0000	0.0	0.0	0.0	-0.0000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-8-b297476c6348209933132202030eb8ea b/sql/hive/src/test/resources/golden/udf_format_number-8-b297476c6348209933132202030eb8ea
new file mode 100644
index 0000000000000..8077e5a60e4ef
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-8-b297476c6348209933132202030eb8ea
@@ -0,0 +1 @@
+0.0000	0.0	0.0	0.0	-0.0000
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-8-e7eedc849c74ce7d33c559067dd9ca0e b/sql/hive/src/test/resources/golden/udf_format_number-8-e7eedc849c74ce7d33c559067dd9ca0e
deleted file mode 100644
index d027b2cb0b94a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-8-e7eedc849c74ce7d33c559067dd9ca0e
+++ /dev/null
@@ -1 +0,0 @@
-0	1.0000	12.00	123.00000	1,234.0000000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-9-407a0a7c277bb4c5c94ce16533ce1646 b/sql/hive/src/test/resources/golden/udf_format_number-9-407a0a7c277bb4c5c94ce16533ce1646
deleted file mode 100644
index afdec63c8dfca..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-9-407a0a7c277bb4c5c94ce16533ce1646
+++ /dev/null
@@ -1 +0,0 @@
--9,223,372,036,854,775,807.0000000000	9,223,372,036,854,775,807.00000000000000000000	0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005	179,769,313,486,231,570,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-9-a21fbe58ff475634c8ed9829c6b8c187 b/sql/hive/src/test/resources/golden/udf_format_number-9-a21fbe58ff475634c8ed9829c6b8c187
new file mode 100644
index 0000000000000..f9f98b94234f3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-9-a21fbe58ff475634c8ed9829c6b8c187
@@ -0,0 +1 @@
+0	1.0000	12.00	123.00000	1,234.0000000
diff --git a/sql/hive/src/test/resources/golden/udf_from_unixtime-0-c3adaeede5c48d232473d78acf0eed7f b/sql/hive/src/test/resources/golden/udf_from_unixtime-0-c3adaeede5c48d232473d78acf0eed7f
index 10ce106f121ba..38550ea419625 100644
--- a/sql/hive/src/test/resources/golden/udf_from_unixtime-0-c3adaeede5c48d232473d78acf0eed7f
+++ b/sql/hive/src/test/resources/golden/udf_from_unixtime-0-c3adaeede5c48d232473d78acf0eed7f
@@ -1 +1 @@
-from_unixtime(unix_time, format) - returns unix_time in the specified format
\ No newline at end of file
+from_unixtime(unix_time, format) - returns unix_time in the specified format
diff --git a/sql/hive/src/test/resources/golden/udf_from_unixtime-1-d1a511d2084c7c621b5f638908c8db65 b/sql/hive/src/test/resources/golden/udf_from_unixtime-1-d1a511d2084c7c621b5f638908c8db65
index 20f4d2b35c49a..ef15f822d80f5 100644
--- a/sql/hive/src/test/resources/golden/udf_from_unixtime-1-d1a511d2084c7c621b5f638908c8db65
+++ b/sql/hive/src/test/resources/golden/udf_from_unixtime-1-d1a511d2084c7c621b5f638908c8db65
@@ -1,4 +1,4 @@
 from_unixtime(unix_time, format) - returns unix_time in the specified format
 Example:
   > SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') FROM src LIMIT 1;
-  '1970-01-01 00:00:00'
\ No newline at end of file
+  '1970-01-01 00:00:00'
diff --git a/sql/hive/src/test/resources/golden/udf_get_json_object-0-c08e7139c00878b98d396e65d958100f b/sql/hive/src/test/resources/golden/udf_get_json_object-0-c08e7139c00878b98d396e65d958100f
deleted file mode 100644
index 4e4f3f7255fd3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_get_json_object-0-c08e7139c00878b98d396e65d958100f
+++ /dev/null
@@ -1 +0,0 @@
-get_json_object(json_txt, path) - Extract a json object from path 
diff --git a/sql/hive/src/test/resources/golden/udf_get_json_object-1-706bcfd51431ec7f2b80145837f94917 b/sql/hive/src/test/resources/golden/udf_get_json_object-1-706bcfd51431ec7f2b80145837f94917
deleted file mode 100644
index 0e1dcf934a023..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_get_json_object-1-706bcfd51431ec7f2b80145837f94917
+++ /dev/null
@@ -1,16 +0,0 @@
-get_json_object(json_txt, path) - Extract a json object from path 
-Extract json object from a json string based on json path specified, and return json string of the extracted json object. It will return null if the input json string is invalid.
-A limited version of JSONPath supported:
-  $   : Root object
-  .   : Child operator
-  []  : Subscript operator for array
-  *   : Wildcard for []
-Syntax not supported that's worth noticing:
-  ''  : Zero length string as key
-  ..  : Recursive descent
-  &amp;#064;   : Current object/element
-  ()  : Script expression
-  ?() : Filter (script) expression.
-  [,] : Union operator
-  [start:end:step] : array slice operator
-
diff --git a/sql/hive/src/test/resources/golden/udf_get_json_object-2-2a18d9570d9b676e240cda76df818c42 b/sql/hive/src/test/resources/golden/udf_get_json_object-2-2a18d9570d9b676e240cda76df818c42
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_get_json_object-3-f60851dc36f579e83d6848d7d3c589e6 b/sql/hive/src/test/resources/golden/udf_get_json_object-3-f60851dc36f579e83d6848d7d3c589e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_get_json_object-4-4f08101fd66fb25d7b322d47773e49f3 b/sql/hive/src/test/resources/golden/udf_get_json_object-4-4f08101fd66fb25d7b322d47773e49f3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-14-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/udf_greaterthan-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-14-b89ea2173180c8ae423d856f943e061f
rename to sql/hive/src/test/resources/golden/udf_greaterthan-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-0-99d268829a124103cb3429c53fdc4de4 b/sql/hive/src/test/resources/golden/udf_greaterthan-0-99d268829a124103cb3429c53fdc4de4
deleted file mode 100644
index 54d6096d07bbe..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthan-0-99d268829a124103cb3429c53fdc4de4
+++ /dev/null
@@ -1 +0,0 @@
-a > b - Returns TRUE if a is greater than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-1-8aab8e39726a986e10e1e572939fd63c b/sql/hive/src/test/resources/golden/udf_greaterthan-1-8aab8e39726a986e10e1e572939fd63c
deleted file mode 100644
index 54d6096d07bbe..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthan-1-8aab8e39726a986e10e1e572939fd63c
+++ /dev/null
@@ -1 +0,0 @@
-a > b - Returns TRUE if a is greater than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-1-99d268829a124103cb3429c53fdc4de4 b/sql/hive/src/test/resources/golden/udf_greaterthan-1-99d268829a124103cb3429c53fdc4de4
new file mode 100644
index 0000000000000..1eec522da2a1e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthan-1-99d268829a124103cb3429c53fdc4de4
@@ -0,0 +1 @@
+a > b - Returns TRUE if a is greater than b
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-2-79ba62f35a9432647e31c6722b44fe6f b/sql/hive/src/test/resources/golden/udf_greaterthan-2-79ba62f35a9432647e31c6722b44fe6f
deleted file mode 100644
index 679b0376125f0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthan-2-79ba62f35a9432647e31c6722b44fe6f
+++ /dev/null
@@ -1 +0,0 @@
-true	false	false	false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-2-8aab8e39726a986e10e1e572939fd63c b/sql/hive/src/test/resources/golden/udf_greaterthan-2-8aab8e39726a986e10e1e572939fd63c
new file mode 100644
index 0000000000000..1eec522da2a1e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthan-2-8aab8e39726a986e10e1e572939fd63c
@@ -0,0 +1 @@
+a > b - Returns TRUE if a is greater than b
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-3-75fcadcdc6c050f1c7e70c71dc89c800 b/sql/hive/src/test/resources/golden/udf_greaterthan-3-75fcadcdc6c050f1c7e70c71dc89c800
new file mode 100644
index 0000000000000..096c64e2afd93
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthan-3-75fcadcdc6c050f1c7e70c71dc89c800
@@ -0,0 +1 @@
+true	false	false	false
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-16-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-16-86473a0498e4361e4db0b4a22f2e8571
rename to sql/hive/src/test/resources/golden/udf_greaterthanorequal-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-0-a7214027a91abf6501881e2af313347a b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-0-a7214027a91abf6501881e2af313347a
deleted file mode 100644
index abf7dfdab730c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-0-a7214027a91abf6501881e2af313347a
+++ /dev/null
@@ -1 +0,0 @@
-a >= b - Returns TRUE if a is not smaller than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-3669f2008e7f428f365efadbcb5ae451 b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-3669f2008e7f428f365efadbcb5ae451
deleted file mode 100644
index abf7dfdab730c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-3669f2008e7f428f365efadbcb5ae451
+++ /dev/null
@@ -1 +0,0 @@
-a >= b - Returns TRUE if a is not smaller than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-a7214027a91abf6501881e2af313347a b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-a7214027a91abf6501881e2af313347a
new file mode 100644
index 0000000000000..d5422146acd0e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-a7214027a91abf6501881e2af313347a
@@ -0,0 +1 @@
+a >= b - Returns TRUE if a is not smaller than b
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-3669f2008e7f428f365efadbcb5ae451 b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-3669f2008e7f428f365efadbcb5ae451
new file mode 100644
index 0000000000000..d5422146acd0e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-3669f2008e7f428f365efadbcb5ae451
@@ -0,0 +1 @@
+a >= b - Returns TRUE if a is not smaller than b
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-d2690cc7713e91201bb10ef291c95819 b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-d2690cc7713e91201bb10ef291c95819
deleted file mode 100644
index 1fb1894fc21f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-d2690cc7713e91201bb10ef291c95819
+++ /dev/null
@@ -1 +0,0 @@
-true	false	true	true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-3-631662997e0c8d24d80b5d64a17446d2 b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-3-631662997e0c8d24d80b5d64a17446d2
new file mode 100644
index 0000000000000..435a5f7b0efb3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-3-631662997e0c8d24d80b5d64a17446d2
@@ -0,0 +1 @@
+true	false	true	true
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/udf_hash-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-2-7cccbdffc32975f8935eeba14a28147
rename to sql/hive/src/test/resources/golden/udf_hash-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_hash-0-b9e3a3986320d275982797140edfccf4 b/sql/hive/src/test/resources/golden/udf_hash-1-b9e3a3986320d275982797140edfccf4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_hash-0-b9e3a3986320d275982797140edfccf4
rename to sql/hive/src/test/resources/golden/udf_hash-1-b9e3a3986320d275982797140edfccf4
diff --git a/sql/hive/src/test/resources/golden/udf_hash-1-a18646b51501d0b1beb967dc79afbd1a b/sql/hive/src/test/resources/golden/udf_hash-2-a18646b51501d0b1beb967dc79afbd1a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_hash-1-a18646b51501d0b1beb967dc79afbd1a
rename to sql/hive/src/test/resources/golden/udf_hash-2-a18646b51501d0b1beb967dc79afbd1a
diff --git a/sql/hive/src/test/resources/golden/udf_hash-2-cc121f3c38a7a522abd824940fe04285 b/sql/hive/src/test/resources/golden/udf_hash-2-cc121f3c38a7a522abd824940fe04285
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-1-9828bb831fd11667b48678e5952a0941 b/sql/hive/src/test/resources/golden/udf_hash-3-2646a87ce26c383a9dafea9b56281ab7
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into2-1-9828bb831fd11667b48678e5952a0941
rename to sql/hive/src/test/resources/golden/udf_hash-3-2646a87ce26c383a9dafea9b56281ab7
diff --git a/sql/hive/src/test/resources/golden/udf_hash-3-23991312391d518aacf3d4469c816eae b/sql/hive/src/test/resources/golden/udf_hash-4-d1368c2e3cd113e46202156b44811987
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_hash-3-23991312391d518aacf3d4469c816eae
rename to sql/hive/src/test/resources/golden/udf_hash-4-d1368c2e3cd113e46202156b44811987
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/udf_hex-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat15-0-66ee62178e3576fb38cb09800cb610bf
rename to sql/hive/src/test/resources/golden/udf_hex-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_hex-0-c8b923c23d5eb31446780f28acbd4e16 b/sql/hive/src/test/resources/golden/udf_hex-0-c8b923c23d5eb31446780f28acbd4e16
deleted file mode 100644
index f87a6117eacf7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hex-0-c8b923c23d5eb31446780f28acbd4e16
+++ /dev/null
@@ -1 +0,0 @@
-hex(n, bin, or str) - Convert the argument to hexadecimal 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_hex-1-c8b923c23d5eb31446780f28acbd4e16 b/sql/hive/src/test/resources/golden/udf_hex-1-c8b923c23d5eb31446780f28acbd4e16
new file mode 100644
index 0000000000000..c45cea8bc13b8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_hex-1-c8b923c23d5eb31446780f28acbd4e16
@@ -0,0 +1 @@
+hex(n, bin, or str) - Convert the argument to hexadecimal 
diff --git a/sql/hive/src/test/resources/golden/udf_hex-1-d55348c0ccd133b7abb690f6949b520c b/sql/hive/src/test/resources/golden/udf_hex-1-d55348c0ccd133b7abb690f6949b520c
deleted file mode 100644
index e8cee4afc3a80..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hex-1-d55348c0ccd133b7abb690f6949b520c
+++ /dev/null
@@ -1,8 +0,0 @@
-hex(n, bin, or str) - Convert the argument to hexadecimal 
-If the argument is a string, returns two hex digits for each character in the string.
-If the argument is a number or binary, returns the hexadecimal representation.
-Example:
-  > SELECT hex(17) FROM src LIMIT 1;
-  'H1'
-  > SELECT hex('Facebook') FROM src LIMIT 1;
-  '46616365626F6F6B'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_hex-2-332aa8b643b3f6bebd32c558ad4b1559 b/sql/hive/src/test/resources/golden/udf_hex-2-332aa8b643b3f6bebd32c558ad4b1559
deleted file mode 100644
index 34eb75a6c784a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hex-2-332aa8b643b3f6bebd32c558ad4b1559
+++ /dev/null
@@ -1 +0,0 @@
-46616365626F6F6B	00	71776572747975696F706173646667686A6B6C
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_hex-2-d55348c0ccd133b7abb690f6949b520c b/sql/hive/src/test/resources/golden/udf_hex-2-d55348c0ccd133b7abb690f6949b520c
new file mode 100644
index 0000000000000..bcc46336d0fd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_hex-2-d55348c0ccd133b7abb690f6949b520c
@@ -0,0 +1,8 @@
+hex(n, bin, or str) - Convert the argument to hexadecimal 
+If the argument is a string, returns two hex digits for each character in the string.
+If the argument is a number or binary, returns the hexadecimal representation.
+Example:
+  > SELECT hex(17) FROM src LIMIT 1;
+  'H1'
+  > SELECT hex('Facebook') FROM src LIMIT 1;
+  '46616365626F6F6B'
diff --git a/sql/hive/src/test/resources/golden/udf_hex-3-3a1de5be8ce350612ee6a93303107470 b/sql/hive/src/test/resources/golden/udf_hex-3-3a1de5be8ce350612ee6a93303107470
new file mode 100644
index 0000000000000..b0ffe57c8e161
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_hex-3-3a1de5be8ce350612ee6a93303107470
@@ -0,0 +1 @@
+46616365626F6F6B	00	71776572747975696F706173646667686A6B6C
diff --git a/sql/hive/src/test/resources/golden/udf_hex-3-963ad47b5fa3898a71f3c62f592e34bf b/sql/hive/src/test/resources/golden/udf_hex-3-963ad47b5fa3898a71f3c62f592e34bf
deleted file mode 100644
index 2a45250c67a63..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hex-3-963ad47b5fa3898a71f3c62f592e34bf
+++ /dev/null
@@ -1 +0,0 @@
-1	0	FACEB005
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_hex-4-a7f99c9ad67c837658b924c0a979cf01 b/sql/hive/src/test/resources/golden/udf_hex-4-a7f99c9ad67c837658b924c0a979cf01
new file mode 100644
index 0000000000000..8c56faa0f9db9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_hex-4-a7f99c9ad67c837658b924c0a979cf01
@@ -0,0 +1 @@
+1	0	FACEB005
diff --git a/sql/hive/src/test/resources/golden/udf_hex-4-e07a02365ad1f5b47e0a08fec64aebea b/sql/hive/src/test/resources/golden/udf_hex-4-e07a02365ad1f5b47e0a08fec64aebea
deleted file mode 100644
index 50d9557967ac4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hex-4-e07a02365ad1f5b47e0a08fec64aebea
+++ /dev/null
@@ -1 +0,0 @@
-FFFFFFFFFFFFFFFB
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_hex-5-1a9b53bd38a3693e66f7c03092e15c8e b/sql/hive/src/test/resources/golden/udf_hex-5-1a9b53bd38a3693e66f7c03092e15c8e
new file mode 100644
index 0000000000000..b766160c67704
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_hex-5-1a9b53bd38a3693e66f7c03092e15c8e
@@ -0,0 +1 @@
+FFFFFFFFFFFFFFFB
diff --git a/sql/hive/src/test/resources/golden/udf_hour-0-ba1c46a403b807fe0a28b85e62d869ce b/sql/hive/src/test/resources/golden/udf_hour-0-ba1c46a403b807fe0a28b85e62d869ce
deleted file mode 100644
index 48911456dc339..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hour-0-ba1c46a403b807fe0a28b85e62d869ce
+++ /dev/null
@@ -1 +0,0 @@
-hour(date) - Returns the hour of date
diff --git a/sql/hive/src/test/resources/golden/udf_hour-1-3db41b9fe9966a45e663bc42cf182c04 b/sql/hive/src/test/resources/golden/udf_hour-1-3db41b9fe9966a45e663bc42cf182c04
deleted file mode 100644
index be1a966635202..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hour-1-3db41b9fe9966a45e663bc42cf182c04
+++ /dev/null
@@ -1,7 +0,0 @@
-hour(date) - Returns the hour of date
-date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.
-Example:
-   > SELECT hour('2009-07-30 12:58:59') FROM src LIMIT 1;
-  12
-  > SELECT hour('12:58:59') FROM src LIMIT 1;
-  12
diff --git a/sql/hive/src/test/resources/golden/udf_hour-2-ace1054795b20abd5ae829814cfe15a b/sql/hive/src/test/resources/golden/udf_hour-2-ace1054795b20abd5ae829814cfe15a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_hour-3-415b0842ab0818c82baf9fbf07180613 b/sql/hive/src/test/resources/golden/udf_hour-3-415b0842ab0818c82baf9fbf07180613
deleted file mode 100644
index 1a35f9f158133..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hour-3-415b0842ab0818c82baf9fbf07180613
+++ /dev/null
@@ -1 +0,0 @@
-13	13	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_hour-4-73bfac513b993dedbe143306865a44a b/sql/hive/src/test/resources/golden/udf_hour-4-73bfac513b993dedbe143306865a44a
deleted file mode 100644
index b1bd38b62a080..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hour-4-73bfac513b993dedbe143306865a44a
+++ /dev/null
@@ -1 +0,0 @@
-13
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/udf_if-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat16-0-66ee62178e3576fb38cb09800cb610bf
rename to sql/hive/src/test/resources/golden/udf_if-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_if-1-b7ffa85b5785cccef2af1b285348cc2c b/sql/hive/src/test/resources/golden/udf_if-1-b7ffa85b5785cccef2af1b285348cc2c
new file mode 100644
index 0000000000000..2cf0d9d61882e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_if-1-b7ffa85b5785cccef2af1b285348cc2c
@@ -0,0 +1 @@
+There is no documentation for function 'if'
diff --git a/sql/hive/src/test/resources/golden/udf_if-2-30cf7f51f92b5684e556deff3032d49a b/sql/hive/src/test/resources/golden/udf_if-2-30cf7f51f92b5684e556deff3032d49a
new file mode 100644
index 0000000000000..2cf0d9d61882e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_if-2-30cf7f51f92b5684e556deff3032d49a
@@ -0,0 +1 @@
+There is no documentation for function 'if'
diff --git a/sql/hive/src/test/resources/golden/insert_into2-10-df53336f364fe09e9591e769c13b5519 b/sql/hive/src/test/resources/golden/udf_if-3-59e90bb74481aaf35480076806daf365
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into2-10-df53336f364fe09e9591e769c13b5519
rename to sql/hive/src/test/resources/golden/udf_if-3-59e90bb74481aaf35480076806daf365
diff --git a/sql/hive/src/test/resources/golden/udf_if-4-c4f4d2c83281f9c2380b5efac55fe6eb b/sql/hive/src/test/resources/golden/udf_if-4-c4f4d2c83281f9c2380b5efac55fe6eb
new file mode 100644
index 0000000000000..a29e96cbd1db7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_if-4-c4f4d2c83281f9c2380b5efac55fe6eb
@@ -0,0 +1 @@
+1	1	1	1	NULL	2
diff --git a/sql/hive/src/test/resources/golden/insert_into2-11-a19a7ab57f6ff69f1dff405bc3d4b7aa b/sql/hive/src/test/resources/golden/udf_if-5-841a8068d35a42179d3654e1a2848c43
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into2-11-a19a7ab57f6ff69f1dff405bc3d4b7aa
rename to sql/hive/src/test/resources/golden/udf_if-5-841a8068d35a42179d3654e1a2848c43
diff --git a/sql/hive/src/test/resources/golden/udf_if-6-508f9140dd33931c7b9ad336dceb32cf b/sql/hive/src/test/resources/golden/udf_if-6-508f9140dd33931c7b9ad336dceb32cf
new file mode 100644
index 0000000000000..f0669b86989d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_if-6-508f9140dd33931c7b9ad336dceb32cf
@@ -0,0 +1 @@
+128	1.1	ABC	12.3
diff --git a/sql/hive/src/test/resources/golden/udf_in-0-b21369b3d0dd47d347e0e0af25f06ce4 b/sql/hive/src/test/resources/golden/udf_in-0-b21369b3d0dd47d347e0e0af25f06ce4
deleted file mode 100644
index ee6e42ce6a83d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_in-0-b21369b3d0dd47d347e0e0af25f06ce4
+++ /dev/null
@@ -1 +0,0 @@
-true	false	true	true	false	true	true	true	NULL	NULL	true
diff --git a/sql/hive/src/test/resources/golden/udf_in-1-ce5f3a3da5f3602a23fc107325dd13d7 b/sql/hive/src/test/resources/golden/udf_in-1-ce5f3a3da5f3602a23fc107325dd13d7
deleted file mode 100644
index 993d93304f95e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_in-1-ce5f3a3da5f3602a23fc107325dd13d7
+++ /dev/null
@@ -1,3 +0,0 @@
-238
-86
-238
diff --git a/sql/hive/src/test/resources/golden/udf_in_file-0-1775b929e50cae8b3e957d99c5378f59 b/sql/hive/src/test/resources/golden/udf_in_file-0-1775b929e50cae8b3e957d99c5378f59
deleted file mode 100644
index cd15a08a539ab..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_in_file-0-1775b929e50cae8b3e957d99c5378f59
+++ /dev/null
@@ -1 +0,0 @@
-in_file(str, filename) - Returns true if str appears in the file
diff --git a/sql/hive/src/test/resources/golden/udf_in_file-1-2f23153970a569a4643574dde8d78a58 b/sql/hive/src/test/resources/golden/udf_in_file-1-2f23153970a569a4643574dde8d78a58
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_index-0-a277ac394cae40cb55d1ef3aa5add260 b/sql/hive/src/test/resources/golden/udf_index-0-a277ac394cae40cb55d1ef3aa5add260
deleted file mode 100644
index df5a0561fb8f5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_index-0-a277ac394cae40cb55d1ef3aa5add260
+++ /dev/null
@@ -1 +0,0 @@
-Function '`index`' does not exist.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/udf_index-0-e91e3e5a22029b9b979ccbbef97add66
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppd_transform-0-ae225e86c2ae20519ffdf23190454161
rename to sql/hive/src/test/resources/golden/udf_index-0-e91e3e5a22029b9b979ccbbef97add66
diff --git a/sql/hive/src/test/resources/golden/udf_index-1-1f5e109131b0c67ebea521fa4902a8f6 b/sql/hive/src/test/resources/golden/udf_index-1-1f5e109131b0c67ebea521fa4902a8f6
deleted file mode 100644
index df5a0561fb8f5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_index-1-1f5e109131b0c67ebea521fa4902a8f6
+++ /dev/null
@@ -1 +0,0 @@
-Function '`index`' does not exist.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_index-1-a277ac394cae40cb55d1ef3aa5add260 b/sql/hive/src/test/resources/golden/udf_index-1-a277ac394cae40cb55d1ef3aa5add260
new file mode 100644
index 0000000000000..bb0a912824bbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_index-1-a277ac394cae40cb55d1ef3aa5add260
@@ -0,0 +1 @@
+Function '`index`' does not exist.
diff --git a/sql/hive/src/test/resources/golden/udf_index-2-1f5e109131b0c67ebea521fa4902a8f6 b/sql/hive/src/test/resources/golden/udf_index-2-1f5e109131b0c67ebea521fa4902a8f6
new file mode 100644
index 0000000000000..bb0a912824bbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_index-2-1f5e109131b0c67ebea521fa4902a8f6
@@ -0,0 +1 @@
+Function '`index`' does not exist.
diff --git a/sql/hive/src/test/resources/golden/udf_instr-0-2e76f819563dbaba4beb51e3a130b922 b/sql/hive/src/test/resources/golden/udf_instr-0-2e76f819563dbaba4beb51e3a130b922
deleted file mode 100644
index ae27b5efea045..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_instr-0-2e76f819563dbaba4beb51e3a130b922
+++ /dev/null
@@ -1 +0,0 @@
-instr(str, substr) - Returns the index of the first occurance of substr in str
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/udf_instr-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppd_transform-1-f18babdee8d2d4206ce4f2a93b6575f9
rename to sql/hive/src/test/resources/golden/udf_instr-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_instr-1-2e76f819563dbaba4beb51e3a130b922 b/sql/hive/src/test/resources/golden/udf_instr-1-2e76f819563dbaba4beb51e3a130b922
new file mode 100644
index 0000000000000..06461b525b058
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_instr-1-2e76f819563dbaba4beb51e3a130b922
@@ -0,0 +1 @@
+instr(str, substr) - Returns the index of the first occurance of substr in str
diff --git a/sql/hive/src/test/resources/golden/udf_instr-1-32da357fc754badd6e3898dcc8989182 b/sql/hive/src/test/resources/golden/udf_instr-1-32da357fc754badd6e3898dcc8989182
deleted file mode 100644
index 35de2f0d8653f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_instr-1-32da357fc754badd6e3898dcc8989182
+++ /dev/null
@@ -1,4 +0,0 @@
-instr(str, substr) - Returns the index of the first occurance of substr in str
-Example:
-  > SELECT instr('Facebook', 'boo') FROM src LIMIT 1;
-  5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_instr-2-10147893f38fc08dad4fa7f2bc843fc2 b/sql/hive/src/test/resources/golden/udf_instr-2-10147893f38fc08dad4fa7f2bc843fc2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_instr-2-32da357fc754badd6e3898dcc8989182 b/sql/hive/src/test/resources/golden/udf_instr-2-32da357fc754badd6e3898dcc8989182
new file mode 100644
index 0000000000000..5a8c34271f443
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_instr-2-32da357fc754badd6e3898dcc8989182
@@ -0,0 +1,4 @@
+instr(str, substr) - Returns the index of the first occurance of substr in str
+Example:
+  > SELECT instr('Facebook', 'boo') FROM src LIMIT 1;
+  5
diff --git a/sql/hive/src/test/resources/golden/udf_instr-3-2cb7f5ef9ec07402e3cae7b5279ebe12 b/sql/hive/src/test/resources/golden/udf_instr-3-2cb7f5ef9ec07402e3cae7b5279ebe12
deleted file mode 100644
index 4d34e6df7039b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_instr-3-2cb7f5ef9ec07402e3cae7b5279ebe12
+++ /dev/null
@@ -1 +0,0 @@
-1	0	2	2	0	0	2	3	4	2	3	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into2-13-957c6402df0fd7d2fccbc688e49e9661 b/sql/hive/src/test/resources/golden/udf_instr-3-c40fbd09410b11388ce7a6e9bea5846f
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into2-13-957c6402df0fd7d2fccbc688e49e9661
rename to sql/hive/src/test/resources/golden/udf_instr-3-c40fbd09410b11388ce7a6e9bea5846f
diff --git a/sql/hive/src/test/resources/golden/udf_instr-4-7017a441a31abc235d9359440cefda49 b/sql/hive/src/test/resources/golden/udf_instr-4-7017a441a31abc235d9359440cefda49
new file mode 100644
index 0000000000000..8883b1a631ab7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_instr-4-7017a441a31abc235d9359440cefda49
@@ -0,0 +1 @@
+1	0	2	2	0	0	2	3	4	2	3	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_int-0-c24c3d4e15b5cdf081fee0a8c8ea13ba b/sql/hive/src/test/resources/golden/udf_int-0-c24c3d4e15b5cdf081fee0a8c8ea13ba
index cda33a8e2e555..5549ad557f6b2 100644
--- a/sql/hive/src/test/resources/golden/udf_int-0-c24c3d4e15b5cdf081fee0a8c8ea13ba
+++ b/sql/hive/src/test/resources/golden/udf_int-0-c24c3d4e15b5cdf081fee0a8c8ea13ba
@@ -1 +1 @@
-There is no documentation for function 'int'
\ No newline at end of file
+There is no documentation for function 'int'
diff --git a/sql/hive/src/test/resources/golden/udf_int-1-3f0405ff93adfe8b3402b118567867d7 b/sql/hive/src/test/resources/golden/udf_int-1-3f0405ff93adfe8b3402b118567867d7
index cda33a8e2e555..5549ad557f6b2 100644
--- a/sql/hive/src/test/resources/golden/udf_int-1-3f0405ff93adfe8b3402b118567867d7
+++ b/sql/hive/src/test/resources/golden/udf_int-1-3f0405ff93adfe8b3402b118567867d7
@@ -1 +1 @@
-There is no documentation for function 'int'
\ No newline at end of file
+There is no documentation for function 'int'
diff --git a/sql/hive/src/test/resources/golden/udf_isnotnull-0-44584503014c378bb916b38e1879bfb6 b/sql/hive/src/test/resources/golden/udf_isnotnull-0-44584503014c378bb916b38e1879bfb6
index 723b5aaf2af6b..4ae9ea2a16e87 100644
--- a/sql/hive/src/test/resources/golden/udf_isnotnull-0-44584503014c378bb916b38e1879bfb6
+++ b/sql/hive/src/test/resources/golden/udf_isnotnull-0-44584503014c378bb916b38e1879bfb6
@@ -1 +1 @@
-isnotnull a - Returns true if a is not NULL and false otherwise
\ No newline at end of file
+isnotnull a - Returns true if a is not NULL and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_isnotnull-1-6ca2ea7938c7dac331c603ad343c1a7 b/sql/hive/src/test/resources/golden/udf_isnotnull-1-6ca2ea7938c7dac331c603ad343c1a7
index 723b5aaf2af6b..4ae9ea2a16e87 100644
--- a/sql/hive/src/test/resources/golden/udf_isnotnull-1-6ca2ea7938c7dac331c603ad343c1a7
+++ b/sql/hive/src/test/resources/golden/udf_isnotnull-1-6ca2ea7938c7dac331c603ad343c1a7
@@ -1 +1 @@
-isnotnull a - Returns true if a is not NULL and false otherwise
\ No newline at end of file
+isnotnull a - Returns true if a is not NULL and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_isnull-0-ac8e7827d760108923509f9ea1691d53 b/sql/hive/src/test/resources/golden/udf_isnull-0-ac8e7827d760108923509f9ea1691d53
index 99510c6f1db15..237cdf99168d0 100644
--- a/sql/hive/src/test/resources/golden/udf_isnull-0-ac8e7827d760108923509f9ea1691d53
+++ b/sql/hive/src/test/resources/golden/udf_isnull-0-ac8e7827d760108923509f9ea1691d53
@@ -1 +1 @@
-isnull a - Returns true if a is NULL and false otherwise
\ No newline at end of file
+isnull a - Returns true if a is NULL and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_isnull-1-55d9d04204f30cde4aa2667db88db262 b/sql/hive/src/test/resources/golden/udf_isnull-1-55d9d04204f30cde4aa2667db88db262
index 99510c6f1db15..237cdf99168d0 100644
--- a/sql/hive/src/test/resources/golden/udf_isnull-1-55d9d04204f30cde4aa2667db88db262
+++ b/sql/hive/src/test/resources/golden/udf_isnull-1-55d9d04204f30cde4aa2667db88db262
@@ -1 +1 @@
-isnull a - Returns true if a is NULL and false otherwise
\ No newline at end of file
+isnull a - Returns true if a is NULL and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-0-ac8e7827d760108923509f9ea1691d53 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-0-ac8e7827d760108923509f9ea1691d53
deleted file mode 100644
index 99510c6f1db15..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-0-ac8e7827d760108923509f9ea1691d53
+++ /dev/null
@@ -1 +0,0 @@
-isnull a - Returns true if a is NULL and false otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-1-55d9d04204f30cde4aa2667db88db262 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-1-55d9d04204f30cde4aa2667db88db262
deleted file mode 100644
index 99510c6f1db15..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-1-55d9d04204f30cde4aa2667db88db262
+++ /dev/null
@@ -1 +0,0 @@
-isnull a - Returns true if a is NULL and false otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-2-44584503014c378bb916b38e1879bfb6 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-2-44584503014c378bb916b38e1879bfb6
deleted file mode 100644
index 723b5aaf2af6b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-2-44584503014c378bb916b38e1879bfb6
+++ /dev/null
@@ -1 +0,0 @@
-isnotnull a - Returns true if a is not NULL and false otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-3-6ca2ea7938c7dac331c603ad343c1a7 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-3-6ca2ea7938c7dac331c603ad343c1a7
deleted file mode 100644
index 723b5aaf2af6b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-3-6ca2ea7938c7dac331c603ad343c1a7
+++ /dev/null
@@ -1 +0,0 @@
-isnotnull a - Returns true if a is not NULL and false otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-4-3dd03048c0152f565b21b6d3b7b010f1 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-4-3dd03048c0152f565b21b6d3b7b010f1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-5-253ed8f6f8659120af927871f57d81a1 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-5-253ed8f6f8659120af927871f57d81a1
deleted file mode 100644
index eedfbc67d50c7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-5-253ed8f6f8659120af927871f57d81a1
+++ /dev/null
@@ -1 +0,0 @@
-true	true	true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-6-9daf0ab0e3b8315018341d6f72bd3174 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-6-9daf0ab0e3b8315018341d6f72bd3174
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-7-bb1030dea18d2a0c2c00a6e2de835d6b b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-7-bb1030dea18d2a0c2c00a6e2de835d6b
deleted file mode 100644
index eedfbc67d50c7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-7-bb1030dea18d2a0c2c00a6e2de835d6b
+++ /dev/null
@@ -1 +0,0 @@
-true	true	true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_java_method-0-991b98a25032b21802bc2a1efde606c7 b/sql/hive/src/test/resources/golden/udf_java_method-0-991b98a25032b21802bc2a1efde606c7
deleted file mode 100644
index b703a30abffba..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_java_method-0-991b98a25032b21802bc2a1efde606c7
+++ /dev/null
@@ -1 +0,0 @@
-java_method(class,method[,arg1[,arg2..]]) calls method with reflection
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_java_method-1-a3b94d9f2c2caf85a588b6686a64630a b/sql/hive/src/test/resources/golden/udf_java_method-1-a3b94d9f2c2caf85a588b6686a64630a
deleted file mode 100644
index 07375dc92d625..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_java_method-1-a3b94d9f2c2caf85a588b6686a64630a
+++ /dev/null
@@ -1,3 +0,0 @@
-java_method(class,method[,arg1[,arg2..]]) calls method with reflection
-Synonyms: reflect
-Use this UDF to call Java methods by matching the argument signature
diff --git a/sql/hive/src/test/resources/golden/udf_java_method-2-69e6b8725086a8fb8f55721705442112 b/sql/hive/src/test/resources/golden/udf_java_method-2-69e6b8725086a8fb8f55721705442112
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_java_method-3-c526dfd4d9eac718ced9afb3cf9a62fd b/sql/hive/src/test/resources/golden/udf_java_method-3-c526dfd4d9eac718ced9afb3cf9a62fd
deleted file mode 100644
index 9b93703dae806..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_java_method-3-c526dfd4d9eac718ced9afb3cf9a62fd
+++ /dev/null
@@ -1 +0,0 @@
-1	true	3	2	3	2.718281828459045	1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lcase-0-649df2b742e6a03d0e0e364f5bee76ad b/sql/hive/src/test/resources/golden/udf_lcase-0-649df2b742e6a03d0e0e364f5bee76ad
index ec6779df2818d..afe1bf6cd351a 100644
--- a/sql/hive/src/test/resources/golden/udf_lcase-0-649df2b742e6a03d0e0e364f5bee76ad
+++ b/sql/hive/src/test/resources/golden/udf_lcase-0-649df2b742e6a03d0e0e364f5bee76ad
@@ -1 +1 @@
-lcase(str) - Returns str with all characters changed to lowercase
\ No newline at end of file
+lcase(str) - Returns str with all characters changed to lowercase
diff --git a/sql/hive/src/test/resources/golden/udf_lcase-1-d947c47e03bedbfd4954853cc134c66e b/sql/hive/src/test/resources/golden/udf_lcase-1-d947c47e03bedbfd4954853cc134c66e
index 4da30f6c23a1b..191001deca9f7 100644
--- a/sql/hive/src/test/resources/golden/udf_lcase-1-d947c47e03bedbfd4954853cc134c66e
+++ b/sql/hive/src/test/resources/golden/udf_lcase-1-d947c47e03bedbfd4954853cc134c66e
@@ -2,4 +2,4 @@ lcase(str) - Returns str with all characters changed to lowercase
 Synonyms: lower
 Example:
   > SELECT lcase('Facebook') FROM src LIMIT 1;
-  'facebook'
\ No newline at end of file
+  'facebook'
diff --git a/sql/hive/src/test/resources/golden/udf_length-0-38364b60c3a2409f53c9aa2dae19903b b/sql/hive/src/test/resources/golden/udf_length-0-38364b60c3a2409f53c9aa2dae19903b
deleted file mode 100644
index d4d2bd508b44f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_length-0-38364b60c3a2409f53c9aa2dae19903b
+++ /dev/null
@@ -1 +0,0 @@
-length(str | binary) - Returns the length of str or number of bytes in binary data
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/udf_length-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppd_transform-4-145c2779dadb5bd921dc2baac608b803
rename to sql/hive/src/test/resources/golden/udf_length-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_length-1-38364b60c3a2409f53c9aa2dae19903b b/sql/hive/src/test/resources/golden/udf_length-1-38364b60c3a2409f53c9aa2dae19903b
new file mode 100644
index 0000000000000..3e2bae7430e67
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_length-1-38364b60c3a2409f53c9aa2dae19903b
@@ -0,0 +1 @@
+length(str | binary) - Returns the length of str or number of bytes in binary data
diff --git a/sql/hive/src/test/resources/golden/udf_length-1-f183e1f8ae516bb483132ed106289b67 b/sql/hive/src/test/resources/golden/udf_length-1-f183e1f8ae516bb483132ed106289b67
deleted file mode 100644
index 79c1c54639266..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_length-1-f183e1f8ae516bb483132ed106289b67
+++ /dev/null
@@ -1,4 +0,0 @@
-length(str | binary) - Returns the length of str or number of bytes in binary data
-Example:
-  > SELECT length('Facebook') FROM src LIMIT 1;
-  8
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into2-2-f83e3ad75a2c7b290f8cf5f6153b9671 b/sql/hive/src/test/resources/golden/udf_length-10-de456a5765db4a06110d9483985aa4a6
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into2-2-f83e3ad75a2c7b290f8cf5f6153b9671
rename to sql/hive/src/test/resources/golden/udf_length-10-de456a5765db4a06110d9483985aa4a6
diff --git a/sql/hive/src/test/resources/golden/udf_length-10-f3a9bd30540345db0f69b6847014b333 b/sql/hive/src/test/resources/golden/udf_length-10-f3a9bd30540345db0f69b6847014b333
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_length-10-f3a9bd30540345db0f69b6847014b333
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_length-11-f3a9bd30540345db0f69b6847014b333 b/sql/hive/src/test/resources/golden/udf_length-11-f3a9bd30540345db0f69b6847014b333
new file mode 100644
index 0000000000000..0cfbf08886fca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_length-11-f3a9bd30540345db0f69b6847014b333
@@ -0,0 +1 @@
+2
diff --git a/sql/hive/src/test/resources/golden/udf_length-2-af46cb6887618240836eaf5be8afbba6 b/sql/hive/src/test/resources/golden/udf_length-2-af46cb6887618240836eaf5be8afbba6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_length-2-f183e1f8ae516bb483132ed106289b67 b/sql/hive/src/test/resources/golden/udf_length-2-f183e1f8ae516bb483132ed106289b67
new file mode 100644
index 0000000000000..f1d3b1648718d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_length-2-f183e1f8ae516bb483132ed106289b67
@@ -0,0 +1,4 @@
+length(str | binary) - Returns the length of str or number of bytes in binary data
+Example:
+  > SELECT length('Facebook') FROM src LIMIT 1;
+  8
diff --git a/sql/hive/src/test/resources/golden/insert_into2-3-9d5556040de01fd02d5501d141effff2 b/sql/hive/src/test/resources/golden/udf_length-3-af46cb6887618240836eaf5be8afbba6
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into2-3-9d5556040de01fd02d5501d141effff2
rename to sql/hive/src/test/resources/golden/udf_length-3-af46cb6887618240836eaf5be8afbba6
diff --git a/sql/hive/src/test/resources/golden/udf_length-3-dcd6404afce1103d5054527e6c216d6d b/sql/hive/src/test/resources/golden/udf_length-3-dcd6404afce1103d5054527e6c216d6d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_length-4-ba49ba4e6505c74bc33b5003f3930c43 b/sql/hive/src/test/resources/golden/udf_length-4-ba49ba4e6505c74bc33b5003f3930c43
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-4-9d5556040de01fd02d5501d141effff2 b/sql/hive/src/test/resources/golden/udf_length-4-dcd6404afce1103d5054527e6c216d6d
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into2-4-9d5556040de01fd02d5501d141effff2
rename to sql/hive/src/test/resources/golden/udf_length-4-dcd6404afce1103d5054527e6c216d6d
diff --git a/sql/hive/src/test/resources/golden/udf_length-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf_length-5-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 6ecb2baabb297..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_length-5-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,25 +0,0 @@
-7
-0
-7
-6
-7
-7
-7
-7
-6
-7
-7
-7
-7
-7
-7
-0
-0
-6
-0
-7
-7
-7
-0
-0
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into2-7-6db9da2d1a64be8cd58f0313c4970fc9 b/sql/hive/src/test/resources/golden/udf_length-5-ba49ba4e6505c74bc33b5003f3930c43
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into2-7-6db9da2d1a64be8cd58f0313c4970fc9
rename to sql/hive/src/test/resources/golden/udf_length-5-ba49ba4e6505c74bc33b5003f3930c43
diff --git a/sql/hive/src/test/resources/golden/udf_length-6-460dffb0f1ab0ac0ebc4fd545809aa9a b/sql/hive/src/test/resources/golden/udf_length-6-460dffb0f1ab0ac0ebc4fd545809aa9a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_length-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf_length-6-adc1ec67836b26b60d8547c4996bfd8f
new file mode 100644
index 0000000000000..c77515b2f479b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_length-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -0,0 +1,25 @@
+7
+0
+7
+6
+7
+7
+7
+7
+6
+7
+7
+7
+7
+7
+7
+0
+0
+6
+0
+7
+7
+7
+0
+0
+0
diff --git a/sql/hive/src/test/resources/golden/insert_into2-8-452111285dda40205ee587de8e972896 b/sql/hive/src/test/resources/golden/udf_length-7-460dffb0f1ab0ac0ebc4fd545809aa9a
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into2-8-452111285dda40205ee587de8e972896
rename to sql/hive/src/test/resources/golden/udf_length-7-460dffb0f1ab0ac0ebc4fd545809aa9a
diff --git a/sql/hive/src/test/resources/golden/udf_length-7-8f28e6c488df47d89dca670f04a7563f b/sql/hive/src/test/resources/golden/udf_length-7-8f28e6c488df47d89dca670f04a7563f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_length-8-5e0fe761b7520651c3446ce7f9179caf b/sql/hive/src/test/resources/golden/udf_length-8-5e0fe761b7520651c3446ce7f9179caf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-0-7e35c30348a53b0748bc1bb85f75c195 b/sql/hive/src/test/resources/golden/udf_length-8-8f28e6c488df47d89dca670f04a7563f
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into3-0-7e35c30348a53b0748bc1bb85f75c195
rename to sql/hive/src/test/resources/golden/udf_length-8-8f28e6c488df47d89dca670f04a7563f
diff --git a/sql/hive/src/test/resources/golden/udf_length-9-de456a5765db4a06110d9483985aa4a6 b/sql/hive/src/test/resources/golden/udf_length-9-de456a5765db4a06110d9483985aa4a6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-1-f6dd7262d45a21781d85bc343ede8fb5 b/sql/hive/src/test/resources/golden/udf_length-9-e41b220da98996f997b26ba7ef457a84
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into3-1-f6dd7262d45a21781d85bc343ede8fb5
rename to sql/hive/src/test/resources/golden/udf_length-9-e41b220da98996f997b26ba7ef457a84
diff --git a/sql/hive/src/test/resources/golden/print_header-0-860e298a0b70e7a531431e9386ddc0e7 b/sql/hive/src/test/resources/golden/udf_lessthan-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/print_header-0-860e298a0b70e7a531431e9386ddc0e7
rename to sql/hive/src/test/resources/golden/udf_lessthan-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-0-a0d9e8b51e3d13685b3889db38f22427 b/sql/hive/src/test/resources/golden/udf_lessthan-0-a0d9e8b51e3d13685b3889db38f22427
deleted file mode 100644
index b43707d550c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthan-0-a0d9e8b51e3d13685b3889db38f22427
+++ /dev/null
@@ -1 +0,0 @@
-a < b - Returns TRUE if a is less than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-1-952c655a1092a410e5346f1205cb8142 b/sql/hive/src/test/resources/golden/udf_lessthan-1-952c655a1092a410e5346f1205cb8142
deleted file mode 100644
index b43707d550c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthan-1-952c655a1092a410e5346f1205cb8142
+++ /dev/null
@@ -1 +0,0 @@
-a < b - Returns TRUE if a is less than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-1-a0d9e8b51e3d13685b3889db38f22427 b/sql/hive/src/test/resources/golden/udf_lessthan-1-a0d9e8b51e3d13685b3889db38f22427
new file mode 100644
index 0000000000000..d7621677e2fce
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthan-1-a0d9e8b51e3d13685b3889db38f22427
@@ -0,0 +1 @@
+a < b - Returns TRUE if a is less than b
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-2-92fa47f7af4a03ce1a965472eaad23a7 b/sql/hive/src/test/resources/golden/udf_lessthan-2-92fa47f7af4a03ce1a965472eaad23a7
deleted file mode 100644
index 4364012093724..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthan-2-92fa47f7af4a03ce1a965472eaad23a7
+++ /dev/null
@@ -1 +0,0 @@
-false	true	false	false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-2-952c655a1092a410e5346f1205cb8142 b/sql/hive/src/test/resources/golden/udf_lessthan-2-952c655a1092a410e5346f1205cb8142
new file mode 100644
index 0000000000000..d7621677e2fce
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthan-2-952c655a1092a410e5346f1205cb8142
@@ -0,0 +1 @@
+a < b - Returns TRUE if a is less than b
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-3-677a1383983c94ba8008535b5a193153 b/sql/hive/src/test/resources/golden/udf_lessthan-3-677a1383983c94ba8008535b5a193153
new file mode 100644
index 0000000000000..5926a6ac7746d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthan-3-677a1383983c94ba8008535b5a193153
@@ -0,0 +1 @@
+false	true	false	false
diff --git a/sql/hive/src/test/resources/golden/rcfile_bigdata-0-e011be1172043c0c6d0fd2c0e89f361e b/sql/hive/src/test/resources/golden/udf_lessthanorequal-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_bigdata-0-e011be1172043c0c6d0fd2c0e89f361e
rename to sql/hive/src/test/resources/golden/udf_lessthanorequal-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-0-b3b021456c85da76d1879191886a425b b/sql/hive/src/test/resources/golden/udf_lessthanorequal-0-b3b021456c85da76d1879191886a425b
deleted file mode 100644
index c36acd7ce80bc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthanorequal-0-b3b021456c85da76d1879191886a425b
+++ /dev/null
@@ -1 +0,0 @@
-a <= b - Returns TRUE if a is not greater than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-869d983466744ad73b109211e1638869 b/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-869d983466744ad73b109211e1638869
deleted file mode 100644
index c36acd7ce80bc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-869d983466744ad73b109211e1638869
+++ /dev/null
@@ -1 +0,0 @@
-a <= b - Returns TRUE if a is not greater than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-b3b021456c85da76d1879191886a425b b/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-b3b021456c85da76d1879191886a425b
new file mode 100644
index 0000000000000..7f18733b6e47e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-b3b021456c85da76d1879191886a425b
@@ -0,0 +1 @@
+a <= b - Returns TRUE if a is not greater than b
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-56775013e20ecf2287e07e83eccf2e0c b/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-56775013e20ecf2287e07e83eccf2e0c
deleted file mode 100644
index 38e013b6914c2..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-56775013e20ecf2287e07e83eccf2e0c
+++ /dev/null
@@ -1 +0,0 @@
-false	true	true	true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-869d983466744ad73b109211e1638869 b/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-869d983466744ad73b109211e1638869
new file mode 100644
index 0000000000000..7f18733b6e47e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-869d983466744ad73b109211e1638869
@@ -0,0 +1 @@
+a <= b - Returns TRUE if a is not greater than b
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-3-947dd56091ae1ef399ab32ce58317667 b/sql/hive/src/test/resources/golden/udf_lessthanorequal-3-947dd56091ae1ef399ab32ce58317667
new file mode 100644
index 0000000000000..fc2c7b1da1920
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthanorequal-3-947dd56091ae1ef399ab32ce58317667
@@ -0,0 +1 @@
+false	true	true	true
diff --git a/sql/hive/src/test/resources/golden/rcfile_bigdata-1-6d0d6f4de136f56ab91987e19df8e178 b/sql/hive/src/test/resources/golden/udf_like-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_bigdata-1-6d0d6f4de136f56ab91987e19df8e178
rename to sql/hive/src/test/resources/golden/udf_like-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_like-0-e0ba9a953e50554bdcbc55585cffde09 b/sql/hive/src/test/resources/golden/udf_like-1-e0ba9a953e50554bdcbc55585cffde09
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_like-0-e0ba9a953e50554bdcbc55585cffde09
rename to sql/hive/src/test/resources/golden/udf_like-1-e0ba9a953e50554bdcbc55585cffde09
diff --git a/sql/hive/src/test/resources/golden/udf_like-1-9781f89d352c506e972ad2a1d58ec03a b/sql/hive/src/test/resources/golden/udf_like-2-9781f89d352c506e972ad2a1d58ec03a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_like-1-9781f89d352c506e972ad2a1d58ec03a
rename to sql/hive/src/test/resources/golden/udf_like-2-9781f89d352c506e972ad2a1d58ec03a
diff --git a/sql/hive/src/test/resources/golden/udf_like-2-dbc46cb33f0dd356af03006d9492f8b7 b/sql/hive/src/test/resources/golden/udf_like-2-dbc46cb33f0dd356af03006d9492f8b7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-12-7e35c30348a53b0748bc1bb85f75c195 b/sql/hive/src/test/resources/golden/udf_like-3-dbc46cb33f0dd356af03006d9492f8b7
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into3-12-7e35c30348a53b0748bc1bb85f75c195
rename to sql/hive/src/test/resources/golden/udf_like-3-dbc46cb33f0dd356af03006d9492f8b7
diff --git a/sql/hive/src/test/resources/golden/udf_like-3-bef03784eab9d5e8404fd24960dea4fc b/sql/hive/src/test/resources/golden/udf_like-4-bef03784eab9d5e8404fd24960dea4fc
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_like-3-bef03784eab9d5e8404fd24960dea4fc
rename to sql/hive/src/test/resources/golden/udf_like-4-bef03784eab9d5e8404fd24960dea4fc
diff --git a/sql/hive/src/test/resources/golden/udf_like-4-af5fe5d5d176f751747bf14055d00a12 b/sql/hive/src/test/resources/golden/udf_like-5-47bfd4d65090dab890b467ae06cf3bd5
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_like-4-af5fe5d5d176f751747bf14055d00a12
rename to sql/hive/src/test/resources/golden/udf_like-5-47bfd4d65090dab890b467ae06cf3bd5
diff --git a/sql/hive/src/test/resources/golden/udf_ln-0-779eed5722a0efaa85efe24c559072b4 b/sql/hive/src/test/resources/golden/udf_ln-0-779eed5722a0efaa85efe24c559072b4
index 0b7372cb446d9..134096b19d8c3 100644
--- a/sql/hive/src/test/resources/golden/udf_ln-0-779eed5722a0efaa85efe24c559072b4
+++ b/sql/hive/src/test/resources/golden/udf_ln-0-779eed5722a0efaa85efe24c559072b4
@@ -1 +1 @@
-ln(x) - Returns the natural logarithm of x
\ No newline at end of file
+ln(x) - Returns the natural logarithm of x
diff --git a/sql/hive/src/test/resources/golden/udf_ln-1-60e3541b3c703d6413869d774df9b7e4 b/sql/hive/src/test/resources/golden/udf_ln-1-60e3541b3c703d6413869d774df9b7e4
index bdf2f49ab17ca..c4a1a46f93370 100644
--- a/sql/hive/src/test/resources/golden/udf_ln-1-60e3541b3c703d6413869d774df9b7e4
+++ b/sql/hive/src/test/resources/golden/udf_ln-1-60e3541b3c703d6413869d774df9b7e4
@@ -1,4 +1,4 @@
 ln(x) - Returns the natural logarithm of x
 Example:
   > SELECT ln(1) FROM src LIMIT 1;
-  0
\ No newline at end of file
+  0
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-0-d877ca1eefa4344eae30ad3ef2039b00 b/sql/hive/src/test/resources/golden/udf_locate-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge1-0-d877ca1eefa4344eae30ad3ef2039b00
rename to sql/hive/src/test/resources/golden/udf_locate-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_locate-0-6e41693c9c6dceea4d7fab4c02884e4e b/sql/hive/src/test/resources/golden/udf_locate-0-6e41693c9c6dceea4d7fab4c02884e4e
deleted file mode 100644
index 63b152162407b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_locate-0-6e41693c9c6dceea4d7fab4c02884e4e
+++ /dev/null
@@ -1 +0,0 @@
-locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_locate-1-6e41693c9c6dceea4d7fab4c02884e4e b/sql/hive/src/test/resources/golden/udf_locate-1-6e41693c9c6dceea4d7fab4c02884e4e
new file mode 100644
index 0000000000000..84bea329540d1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_locate-1-6e41693c9c6dceea4d7fab4c02884e4e
@@ -0,0 +1 @@
+locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
diff --git a/sql/hive/src/test/resources/golden/udf_locate-1-d9b5934457931447874d6bb7c13de478 b/sql/hive/src/test/resources/golden/udf_locate-1-d9b5934457931447874d6bb7c13de478
deleted file mode 100644
index ea5465b67683e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_locate-1-d9b5934457931447874d6bb7c13de478
+++ /dev/null
@@ -1,4 +0,0 @@
-locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
-Example:
-  > SELECT locate('bar', 'foobarbar', 5) FROM src LIMIT 1;
-  7
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_locate-2-849e7058dbbd7d422a9f3eb08d85b15c b/sql/hive/src/test/resources/golden/udf_locate-2-849e7058dbbd7d422a9f3eb08d85b15c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_locate-2-d9b5934457931447874d6bb7c13de478 b/sql/hive/src/test/resources/golden/udf_locate-2-d9b5934457931447874d6bb7c13de478
new file mode 100644
index 0000000000000..092e12586b9e8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_locate-2-d9b5934457931447874d6bb7c13de478
@@ -0,0 +1,4 @@
+locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
+Example:
+  > SELECT locate('bar', 'foobarbar', 5) FROM src LIMIT 1;
+  7
diff --git a/sql/hive/src/test/resources/golden/udf_locate-3-2a260e4b8e909eb5e848bf31a07f2531 b/sql/hive/src/test/resources/golden/udf_locate-3-2a260e4b8e909eb5e848bf31a07f2531
deleted file mode 100644
index cd97bbb17fa81..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_locate-3-2a260e4b8e909eb5e848bf31a07f2531
+++ /dev/null
@@ -1 +0,0 @@
-1	0	2	2	4	4	0	0	2	3	4	2	3	NULL	NULL	0	0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-13-f6dd7262d45a21781d85bc343ede8fb5 b/sql/hive/src/test/resources/golden/udf_locate-3-ce4a131f99dc9befa926027380b38dbb
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into3-13-f6dd7262d45a21781d85bc343ede8fb5
rename to sql/hive/src/test/resources/golden/udf_locate-3-ce4a131f99dc9befa926027380b38dbb
diff --git a/sql/hive/src/test/resources/golden/udf_locate-4-104cbfb3b59ad563810ddd7304a58b1b b/sql/hive/src/test/resources/golden/udf_locate-4-104cbfb3b59ad563810ddd7304a58b1b
new file mode 100644
index 0000000000000..f98ccc3556b42
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_locate-4-104cbfb3b59ad563810ddd7304a58b1b
@@ -0,0 +1 @@
+1	0	2	2	4	4	0	0	2	3	4	2	3	NULL	NULL	0	0
diff --git a/sql/hive/src/test/resources/golden/udf_log-0-ca773bc1afa66218f3c13dee676bd87a b/sql/hive/src/test/resources/golden/udf_log-0-ca773bc1afa66218f3c13dee676bd87a
index e0dba6b06d9a4..c2c4ca2cd4385 100644
--- a/sql/hive/src/test/resources/golden/udf_log-0-ca773bc1afa66218f3c13dee676bd87a
+++ b/sql/hive/src/test/resources/golden/udf_log-0-ca773bc1afa66218f3c13dee676bd87a
@@ -1 +1 @@
-log([b], x) - Returns the logarithm of x with base b
\ No newline at end of file
+log([b], x) - Returns the logarithm of x with base b
diff --git a/sql/hive/src/test/resources/golden/udf_log-1-db9dd44bf8c6225f835819a8cdf20d70 b/sql/hive/src/test/resources/golden/udf_log-1-db9dd44bf8c6225f835819a8cdf20d70
index 3dedfb4ddfd88..d6bc6ebbfd2d1 100644
--- a/sql/hive/src/test/resources/golden/udf_log-1-db9dd44bf8c6225f835819a8cdf20d70
+++ b/sql/hive/src/test/resources/golden/udf_log-1-db9dd44bf8c6225f835819a8cdf20d70
@@ -1,4 +1,4 @@
 log([b], x) - Returns the logarithm of x with base b
 Example:
   > SELECT log(13, 13) FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_log10-0-35028570b378a2c7ea25b6bf6a4fac1f b/sql/hive/src/test/resources/golden/udf_log10-0-35028570b378a2c7ea25b6bf6a4fac1f
index 61749c48f158e..b9540eb85a360 100644
--- a/sql/hive/src/test/resources/golden/udf_log10-0-35028570b378a2c7ea25b6bf6a4fac1f
+++ b/sql/hive/src/test/resources/golden/udf_log10-0-35028570b378a2c7ea25b6bf6a4fac1f
@@ -1 +1 @@
-log10(x) - Returns the logarithm of x with base 10
\ No newline at end of file
+log10(x) - Returns the logarithm of x with base 10
diff --git a/sql/hive/src/test/resources/golden/udf_log10-1-abf1173290ef905d24d422faf7801fe3 b/sql/hive/src/test/resources/golden/udf_log10-1-abf1173290ef905d24d422faf7801fe3
index 68ce39dd2b084..b71baf542f924 100644
--- a/sql/hive/src/test/resources/golden/udf_log10-1-abf1173290ef905d24d422faf7801fe3
+++ b/sql/hive/src/test/resources/golden/udf_log10-1-abf1173290ef905d24d422faf7801fe3
@@ -1,4 +1,4 @@
 log10(x) - Returns the logarithm of x with base 10
 Example:
   > SELECT log10(10) FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_log2-0-6c9ae9d4deb1b42500ad2796a99e2bc6 b/sql/hive/src/test/resources/golden/udf_log2-0-6c9ae9d4deb1b42500ad2796a99e2bc6
index 177008a543627..8214317d87b0b 100644
--- a/sql/hive/src/test/resources/golden/udf_log2-0-6c9ae9d4deb1b42500ad2796a99e2bc6
+++ b/sql/hive/src/test/resources/golden/udf_log2-0-6c9ae9d4deb1b42500ad2796a99e2bc6
@@ -1 +1 @@
-log2(x) - Returns the logarithm of x with base 2
\ No newline at end of file
+log2(x) - Returns the logarithm of x with base 2
diff --git a/sql/hive/src/test/resources/golden/udf_log2-1-a79f0dce2cfc000b11a3b5299f02db56 b/sql/hive/src/test/resources/golden/udf_log2-1-a79f0dce2cfc000b11a3b5299f02db56
index 4b856f91a7a14..579ad959cd884 100644
--- a/sql/hive/src/test/resources/golden/udf_log2-1-a79f0dce2cfc000b11a3b5299f02db56
+++ b/sql/hive/src/test/resources/golden/udf_log2-1-a79f0dce2cfc000b11a3b5299f02db56
@@ -1,4 +1,4 @@
 log2(x) - Returns the logarithm of x with base 2
 Example:
   > SELECT log2(2) FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_logic_java_boolean-0-2e7b9484514a049bbf72a4a0af5ee127 b/sql/hive/src/test/resources/golden/udf_logic_java_boolean-0-2e7b9484514a049bbf72a4a0af5ee127
deleted file mode 100644
index d9d0dc035c9ab..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_logic_java_boolean-0-2e7b9484514a049bbf72a4a0af5ee127
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION test_udf_get_java_boolean 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaBoolean')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/udf_lower-0-257a0065c0e0df1d0b35a0c6eb30a668 b/sql/hive/src/test/resources/golden/udf_lower-0-257a0065c0e0df1d0b35a0c6eb30a668
index 9bfc44922ea38..de8e1518c93d7 100644
--- a/sql/hive/src/test/resources/golden/udf_lower-0-257a0065c0e0df1d0b35a0c6eb30a668
+++ b/sql/hive/src/test/resources/golden/udf_lower-0-257a0065c0e0df1d0b35a0c6eb30a668
@@ -1 +1 @@
-lower(str) - Returns str with all characters changed to lowercase
\ No newline at end of file
+lower(str) - Returns str with all characters changed to lowercase
diff --git a/sql/hive/src/test/resources/golden/udf_lower-1-550f0a6da388596a775d921b9da995c b/sql/hive/src/test/resources/golden/udf_lower-1-550f0a6da388596a775d921b9da995c
index da912776ab009..532f348d7da6f 100644
--- a/sql/hive/src/test/resources/golden/udf_lower-1-550f0a6da388596a775d921b9da995c
+++ b/sql/hive/src/test/resources/golden/udf_lower-1-550f0a6da388596a775d921b9da995c
@@ -2,4 +2,4 @@ lower(str) - Returns str with all characters changed to lowercase
 Synonyms: lcase
 Example:
   > SELECT lower('Facebook') FROM src LIMIT 1;
-  'facebook'
\ No newline at end of file
+  'facebook'
diff --git a/sql/hive/src/test/resources/golden/udf_lower-3-61b2e3e72180c80d52cf9bed18125e08 b/sql/hive/src/test/resources/golden/udf_lower-3-61b2e3e72180c80d52cf9bed18125e08
index 9640d1794dcbc..8fbb5d4429d96 100644
--- a/sql/hive/src/test/resources/golden/udf_lower-3-61b2e3e72180c80d52cf9bed18125e08
+++ b/sql/hive/src/test/resources/golden/udf_lower-3-61b2e3e72180c80d52cf9bed18125e08
@@ -1 +1 @@
-abc 123	ABC 123
\ No newline at end of file
+abc 123	ABC 123
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-1-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_lpad-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge1-1-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_lpad-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-0-995646acf1e23cea7825412915921bef b/sql/hive/src/test/resources/golden/udf_lpad-0-995646acf1e23cea7825412915921bef
deleted file mode 100644
index 672d2d07f68b5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lpad-0-995646acf1e23cea7825412915921bef
+++ /dev/null
@@ -1 +0,0 @@
-lpad(str, len, pad) - Returns str, left-padded with pad to a length of len
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-1-995646acf1e23cea7825412915921bef b/sql/hive/src/test/resources/golden/udf_lpad-1-995646acf1e23cea7825412915921bef
new file mode 100644
index 0000000000000..e756eab5119d2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lpad-1-995646acf1e23cea7825412915921bef
@@ -0,0 +1 @@
+lpad(str, len, pad) - Returns str, left-padded with pad to a length of len
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-1-f58bb0fd11cb70cf197c01555ac924a8 b/sql/hive/src/test/resources/golden/udf_lpad-1-f58bb0fd11cb70cf197c01555ac924a8
deleted file mode 100644
index 66db6624a376a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lpad-1-f58bb0fd11cb70cf197c01555ac924a8
+++ /dev/null
@@ -1,6 +0,0 @@
-lpad(str, len, pad) - Returns str, left-padded with pad to a length of len
-If str is longer than len, the return value is shortened to len characters.
-Example:
-  > SELECT lpad('hi', 5, '??') FROM src LIMIT 1;
-  '???hi'  > SELECT lpad('hi', 1, '??') FROM src LIMIT 1;
-  'h'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-2-e779f6358f58919e49fcd6e4b4d007b2 b/sql/hive/src/test/resources/golden/udf_lpad-2-e779f6358f58919e49fcd6e4b4d007b2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-2-f58bb0fd11cb70cf197c01555ac924a8 b/sql/hive/src/test/resources/golden/udf_lpad-2-f58bb0fd11cb70cf197c01555ac924a8
new file mode 100644
index 0000000000000..122971dbc2df9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lpad-2-f58bb0fd11cb70cf197c01555ac924a8
@@ -0,0 +1,6 @@
+lpad(str, len, pad) - Returns str, left-padded with pad to a length of len
+If str is longer than len, the return value is shortened to len characters.
+Example:
+  > SELECT lpad('hi', 5, '??') FROM src LIMIT 1;
+  '???hi'  > SELECT lpad('hi', 1, '??') FROM src LIMIT 1;
+  'h'
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-3-5b04264ae9ada1304acd0410ce31f2ae b/sql/hive/src/test/resources/golden/udf_lpad-3-5b04264ae9ada1304acd0410ce31f2ae
deleted file mode 100644
index 63a7235050df3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lpad-3-5b04264ae9ada1304acd0410ce31f2ae
+++ /dev/null
@@ -1 +0,0 @@
-h	...hi	1231hi
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-2-e51c25bae2408422a56826a263479468 b/sql/hive/src/test/resources/golden/udf_lpad-3-ea9a05f035dedfe15d3a7f3d7756a2d7
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into3-2-e51c25bae2408422a56826a263479468
rename to sql/hive/src/test/resources/golden/udf_lpad-3-ea9a05f035dedfe15d3a7f3d7756a2d7
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-4-48234ef55a8ec06cd4b570b9b9edab73 b/sql/hive/src/test/resources/golden/udf_lpad-4-48234ef55a8ec06cd4b570b9b9edab73
new file mode 100644
index 0000000000000..f75d0d891636a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lpad-4-48234ef55a8ec06cd4b570b9b9edab73
@@ -0,0 +1 @@
+h	...hi	1231hi
diff --git a/sql/hive/src/test/resources/golden/udf_ltrim-0-398a623504c47bcd64fe8d200c41402f b/sql/hive/src/test/resources/golden/udf_ltrim-0-398a623504c47bcd64fe8d200c41402f
index 0c9f6fd1c269e..419d80fe502a3 100644
--- a/sql/hive/src/test/resources/golden/udf_ltrim-0-398a623504c47bcd64fe8d200c41402f
+++ b/sql/hive/src/test/resources/golden/udf_ltrim-0-398a623504c47bcd64fe8d200c41402f
@@ -1 +1 @@
-ltrim(str) - Removes the leading space characters from str 
\ No newline at end of file
+ltrim(str) - Removes the leading space characters from str 
diff --git a/sql/hive/src/test/resources/golden/udf_ltrim-1-658d495908097792a0e33a77becac2 b/sql/hive/src/test/resources/golden/udf_ltrim-1-658d495908097792a0e33a77becac2
index 3ee2e690d28c1..fc994e90aaa05 100644
--- a/sql/hive/src/test/resources/golden/udf_ltrim-1-658d495908097792a0e33a77becac2
+++ b/sql/hive/src/test/resources/golden/udf_ltrim-1-658d495908097792a0e33a77becac2
@@ -1,4 +1,4 @@
 ltrim(str) - Removes the leading space characters from str 
 Example:
   > SELECT ltrim('   facebook') FROM src LIMIT 1;
-  'facebook'
\ No newline at end of file
+  'facebook'
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-10-f94fdb0b79dcf73989e6fbce87355753 b/sql/hive/src/test/resources/golden/udf_map-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge1-10-f94fdb0b79dcf73989e6fbce87355753
rename to sql/hive/src/test/resources/golden/udf_map-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_map-0-9feb9f29115f94b3bb4f6a36160bd17e b/sql/hive/src/test/resources/golden/udf_map-1-9feb9f29115f94b3bb4f6a36160bd17e
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_map-0-9feb9f29115f94b3bb4f6a36160bd17e
rename to sql/hive/src/test/resources/golden/udf_map-1-9feb9f29115f94b3bb4f6a36160bd17e
diff --git a/sql/hive/src/test/resources/golden/udf_map-1-1f8cd98df9bf7b2528506551fef87dcf b/sql/hive/src/test/resources/golden/udf_map-2-1f8cd98df9bf7b2528506551fef87dcf
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_map-1-1f8cd98df9bf7b2528506551fef87dcf
rename to sql/hive/src/test/resources/golden/udf_map-2-1f8cd98df9bf7b2528506551fef87dcf
diff --git a/sql/hive/src/test/resources/golden/udf_map-2-a3f90085abab46205e732b4c27b18340 b/sql/hive/src/test/resources/golden/udf_map-2-a3f90085abab46205e732b4c27b18340
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-3-c46699c465fefe6baab35499a32b452d b/sql/hive/src/test/resources/golden/udf_map-3-be7b52baa973b8b59b7ca63fea19aa99
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into3-3-c46699c465fefe6baab35499a32b452d
rename to sql/hive/src/test/resources/golden/udf_map-3-be7b52baa973b8b59b7ca63fea19aa99
diff --git a/sql/hive/src/test/resources/golden/udf_map-3-75348d588d3452e6cb35f5197f4ebeb1 b/sql/hive/src/test/resources/golden/udf_map-4-60cb9c30285f7a9f99377ccbd143eb06
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_map-3-75348d588d3452e6cb35f5197f4ebeb1
rename to sql/hive/src/test/resources/golden/udf_map-4-60cb9c30285f7a9f99377ccbd143eb06
diff --git a/sql/hive/src/test/resources/golden/udf_map_keys-0-e86d559aeb84a4cc017a103182c22bfb b/sql/hive/src/test/resources/golden/udf_map_keys-0-e86d559aeb84a4cc017a103182c22bfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_map_keys-1-9a5714f8790219e9a9708a2c45fc87aa b/sql/hive/src/test/resources/golden/udf_map_keys-1-9a5714f8790219e9a9708a2c45fc87aa
deleted file mode 100644
index b88b74b978b8f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_keys-1-9a5714f8790219e9a9708a2c45fc87aa
+++ /dev/null
@@ -1 +0,0 @@
-map_keys(map) - Returns an unordered array containing the keys of the input map.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_map_keys-2-731b529a9a234473312b7d1db15be75f b/sql/hive/src/test/resources/golden/udf_map_keys-2-731b529a9a234473312b7d1db15be75f
deleted file mode 100644
index b88b74b978b8f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_keys-2-731b529a9a234473312b7d1db15be75f
+++ /dev/null
@@ -1 +0,0 @@
-map_keys(map) - Returns an unordered array containing the keys of the input map.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_map_keys-3-a3d6a9c2dd5db33c58513ffba1a4103e b/sql/hive/src/test/resources/golden/udf_map_keys-3-a3d6a9c2dd5db33c58513ffba1a4103e
deleted file mode 100644
index 3a26a2e5e94d5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_keys-3-a3d6a9c2dd5db33c58513ffba1a4103e
+++ /dev/null
@@ -1 +0,0 @@
-[1,2,3]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_map_keys-4-10cb8da631c1c058dacbbe4834a5928a b/sql/hive/src/test/resources/golden/udf_map_keys-4-10cb8da631c1c058dacbbe4834a5928a
deleted file mode 100644
index 9b4c194f58a8e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_keys-4-10cb8da631c1c058dacbbe4834a5928a
+++ /dev/null
@@ -1 +0,0 @@
-["b","a","c"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_map_values-0-e86d559aeb84a4cc017a103182c22bfb b/sql/hive/src/test/resources/golden/udf_map_values-0-e86d559aeb84a4cc017a103182c22bfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_map_values-1-a1d9810d9793d4ca2d17f539ca72bd08 b/sql/hive/src/test/resources/golden/udf_map_values-1-a1d9810d9793d4ca2d17f539ca72bd08
deleted file mode 100644
index e811d1976a24c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_values-1-a1d9810d9793d4ca2d17f539ca72bd08
+++ /dev/null
@@ -1 +0,0 @@
-map_values(map) - Returns an unordered array containing the values of the input map.
diff --git a/sql/hive/src/test/resources/golden/udf_map_values-2-ed39a40cbe55bb33d2bc19f0941dae69 b/sql/hive/src/test/resources/golden/udf_map_values-2-ed39a40cbe55bb33d2bc19f0941dae69
deleted file mode 100644
index e811d1976a24c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_values-2-ed39a40cbe55bb33d2bc19f0941dae69
+++ /dev/null
@@ -1 +0,0 @@
-map_values(map) - Returns an unordered array containing the values of the input map.
diff --git a/sql/hive/src/test/resources/golden/udf_map_values-3-ea2d6bed07d285bc8a7d7cab4cbf69d1 b/sql/hive/src/test/resources/golden/udf_map_values-3-ea2d6bed07d285bc8a7d7cab4cbf69d1
deleted file mode 100644
index 9d804e48987c6..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_values-3-ea2d6bed07d285bc8a7d7cab4cbf69d1
+++ /dev/null
@@ -1 +0,0 @@
-["a","b","c"]
diff --git a/sql/hive/src/test/resources/golden/udf_map_values-4-a000d06dd3941756b4bb9ccc46f3620e b/sql/hive/src/test/resources/golden/udf_map_values-4-a000d06dd3941756b4bb9ccc46f3620e
deleted file mode 100644
index be56bd1673eaa..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_values-4-a000d06dd3941756b4bb9ccc46f3620e
+++ /dev/null
@@ -1 +0,0 @@
-[2,1,3]
diff --git a/sql/hive/src/test/resources/golden/udf_max-0-ac7d002a46f773ab680ed8c1ac97821f b/sql/hive/src/test/resources/golden/udf_max-0-ac7d002a46f773ab680ed8c1ac97821f
deleted file mode 100644
index c6cd9573ed8ed..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-0-ac7d002a46f773ab680ed8c1ac97821f
+++ /dev/null
@@ -1 +0,0 @@
-max(expr) - Returns the maximum value of expr
diff --git a/sql/hive/src/test/resources/golden/udf_max-1-14afa1f14687893233a662f0f32a40c9 b/sql/hive/src/test/resources/golden/udf_max-1-14afa1f14687893233a662f0f32a40c9
deleted file mode 100644
index c6cd9573ed8ed..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-1-14afa1f14687893233a662f0f32a40c9
+++ /dev/null
@@ -1 +0,0 @@
-max(expr) - Returns the maximum value of expr
diff --git a/sql/hive/src/test/resources/golden/udf_max-10-1e9209f5b2ba926df36b692a4dcd09f6 b/sql/hive/src/test/resources/golden/udf_max-10-1e9209f5b2ba926df36b692a4dcd09f6
deleted file mode 100644
index 21096f0f69749..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-10-1e9209f5b2ba926df36b692a4dcd09f6
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":498,"col2":"val_498"}	{"col1":498,"col2":"val_498"}
diff --git a/sql/hive/src/test/resources/golden/udf_max-11-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udf_max-11-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-11-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-12-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udf_max-12-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-12-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-13-1e9209f5b2ba926df36b692a4dcd09f6 b/sql/hive/src/test/resources/golden/udf_max-13-1e9209f5b2ba926df36b692a4dcd09f6
deleted file mode 100644
index 21096f0f69749..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-13-1e9209f5b2ba926df36b692a4dcd09f6
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":498,"col2":"val_498"}	{"col1":498,"col2":"val_498"}
diff --git a/sql/hive/src/test/resources/golden/udf_max-2-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udf_max-2-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-2-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-3-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udf_max-3-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-3-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-4-1e9209f5b2ba926df36b692a4dcd09f6 b/sql/hive/src/test/resources/golden/udf_max-4-1e9209f5b2ba926df36b692a4dcd09f6
deleted file mode 100644
index 21096f0f69749..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-4-1e9209f5b2ba926df36b692a4dcd09f6
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":498,"col2":"val_498"}	{"col1":498,"col2":"val_498"}
diff --git a/sql/hive/src/test/resources/golden/udf_max-5-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udf_max-5-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-5-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-6-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udf_max-6-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-6-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-7-1e9209f5b2ba926df36b692a4dcd09f6 b/sql/hive/src/test/resources/golden/udf_max-7-1e9209f5b2ba926df36b692a4dcd09f6
deleted file mode 100644
index 21096f0f69749..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-7-1e9209f5b2ba926df36b692a4dcd09f6
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":498,"col2":"val_498"}	{"col1":498,"col2":"val_498"}
diff --git a/sql/hive/src/test/resources/golden/udf_max-8-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udf_max-8-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-8-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-9-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udf_max-9-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-9-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-0-1a67398a47b4232c3786485b451d1ff8 b/sql/hive/src/test/resources/golden/udf_min-0-1a67398a47b4232c3786485b451d1ff8
deleted file mode 100644
index 0d59cabcde8fb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-0-1a67398a47b4232c3786485b451d1ff8
+++ /dev/null
@@ -1 +0,0 @@
-min(expr) - Returns the minimum value of expr
diff --git a/sql/hive/src/test/resources/golden/udf_min-1-69d749d0bca0ebe56e930009e30f4f19 b/sql/hive/src/test/resources/golden/udf_min-1-69d749d0bca0ebe56e930009e30f4f19
deleted file mode 100644
index 0d59cabcde8fb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-1-69d749d0bca0ebe56e930009e30f4f19
+++ /dev/null
@@ -1 +0,0 @@
-min(expr) - Returns the minimum value of expr
diff --git a/sql/hive/src/test/resources/golden/udf_min-10-191613d4d46d1884d0694fcd8c5fb802 b/sql/hive/src/test/resources/golden/udf_min-10-191613d4d46d1884d0694fcd8c5fb802
deleted file mode 100644
index b26a7e93c5d9f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-10-191613d4d46d1884d0694fcd8c5fb802
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":0,"col2":"val_0"}	{"col1":0,"col2":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/udf_min-11-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udf_min-11-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-11-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-12-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udf_min-12-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-12-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-13-191613d4d46d1884d0694fcd8c5fb802 b/sql/hive/src/test/resources/golden/udf_min-13-191613d4d46d1884d0694fcd8c5fb802
deleted file mode 100644
index b26a7e93c5d9f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-13-191613d4d46d1884d0694fcd8c5fb802
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":0,"col2":"val_0"}	{"col1":0,"col2":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/udf_min-2-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udf_min-2-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-2-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-3-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udf_min-3-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-3-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-4-191613d4d46d1884d0694fcd8c5fb802 b/sql/hive/src/test/resources/golden/udf_min-4-191613d4d46d1884d0694fcd8c5fb802
deleted file mode 100644
index b26a7e93c5d9f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-4-191613d4d46d1884d0694fcd8c5fb802
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":0,"col2":"val_0"}	{"col1":0,"col2":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/udf_min-5-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udf_min-5-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-5-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-6-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udf_min-6-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-6-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-7-191613d4d46d1884d0694fcd8c5fb802 b/sql/hive/src/test/resources/golden/udf_min-7-191613d4d46d1884d0694fcd8c5fb802
deleted file mode 100644
index b26a7e93c5d9f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-7-191613d4d46d1884d0694fcd8c5fb802
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":0,"col2":"val_0"}	{"col1":0,"col2":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/udf_min-8-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udf_min-8-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-8-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-9-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udf_min-9-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-9-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_minute-0-9a38997c1f41f4afe00faa0abc471aee b/sql/hive/src/test/resources/golden/udf_minute-0-9a38997c1f41f4afe00faa0abc471aee
index b6bd6049a90b9..231e4f382566d 100644
--- a/sql/hive/src/test/resources/golden/udf_minute-0-9a38997c1f41f4afe00faa0abc471aee
+++ b/sql/hive/src/test/resources/golden/udf_minute-0-9a38997c1f41f4afe00faa0abc471aee
@@ -1 +1 @@
-minute(date) - Returns the minute of date
\ No newline at end of file
+minute(date) - Returns the minute of date
diff --git a/sql/hive/src/test/resources/golden/udf_minute-1-16995573ac4f4a1b047ad6ee88699e48 b/sql/hive/src/test/resources/golden/udf_minute-1-16995573ac4f4a1b047ad6ee88699e48
index 64c3cda697e1e..ea842ea174ae4 100644
--- a/sql/hive/src/test/resources/golden/udf_minute-1-16995573ac4f4a1b047ad6ee88699e48
+++ b/sql/hive/src/test/resources/golden/udf_minute-1-16995573ac4f4a1b047ad6ee88699e48
@@ -4,4 +4,4 @@ Example:
    > SELECT minute('2009-07-30 12:58:59') FROM src LIMIT 1;
   58
   > SELECT minute('12:58:59') FROM src LIMIT 1;
-  58
\ No newline at end of file
+  58
diff --git a/sql/hive/src/test/resources/golden/udf_minute-3-270055c684846e87444b037226cf554c b/sql/hive/src/test/resources/golden/udf_minute-3-270055c684846e87444b037226cf554c
index 5bd5433375d5c..e9ca4578409c8 100644
--- a/sql/hive/src/test/resources/golden/udf_minute-3-270055c684846e87444b037226cf554c
+++ b/sql/hive/src/test/resources/golden/udf_minute-3-270055c684846e87444b037226cf554c
@@ -1 +1 @@
-14	14	NULL
\ No newline at end of file
+14	14	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_modulo-0-4e06551d4aa9464492e0f53374a280d5 b/sql/hive/src/test/resources/golden/udf_modulo-0-4e06551d4aa9464492e0f53374a280d5
index 1bcf0c71008b0..6df0bfb4fecf8 100644
--- a/sql/hive/src/test/resources/golden/udf_modulo-0-4e06551d4aa9464492e0f53374a280d5
+++ b/sql/hive/src/test/resources/golden/udf_modulo-0-4e06551d4aa9464492e0f53374a280d5
@@ -1 +1 @@
-a % b - Returns the remainder when dividing a by b
\ No newline at end of file
+a % b - Returns the remainder when dividing a by b
diff --git a/sql/hive/src/test/resources/golden/udf_modulo-1-cc0579c83ca1b36fa8a3a1622d19e877 b/sql/hive/src/test/resources/golden/udf_modulo-1-cc0579c83ca1b36fa8a3a1622d19e877
index 1bcf0c71008b0..6df0bfb4fecf8 100644
--- a/sql/hive/src/test/resources/golden/udf_modulo-1-cc0579c83ca1b36fa8a3a1622d19e877
+++ b/sql/hive/src/test/resources/golden/udf_modulo-1-cc0579c83ca1b36fa8a3a1622d19e877
@@ -1 +1 @@
-a % b - Returns the remainder when dividing a by b
\ No newline at end of file
+a % b - Returns the remainder when dividing a by b
diff --git a/sql/hive/src/test/resources/golden/udf_month-0-9a38997c1f41f4afe00faa0abc471aee b/sql/hive/src/test/resources/golden/udf_month-0-9a38997c1f41f4afe00faa0abc471aee
index b6bd6049a90b9..231e4f382566d 100644
--- a/sql/hive/src/test/resources/golden/udf_month-0-9a38997c1f41f4afe00faa0abc471aee
+++ b/sql/hive/src/test/resources/golden/udf_month-0-9a38997c1f41f4afe00faa0abc471aee
@@ -1 +1 @@
-minute(date) - Returns the minute of date
\ No newline at end of file
+minute(date) - Returns the minute of date
diff --git a/sql/hive/src/test/resources/golden/udf_month-1-16995573ac4f4a1b047ad6ee88699e48 b/sql/hive/src/test/resources/golden/udf_month-1-16995573ac4f4a1b047ad6ee88699e48
index 64c3cda697e1e..ea842ea174ae4 100644
--- a/sql/hive/src/test/resources/golden/udf_month-1-16995573ac4f4a1b047ad6ee88699e48
+++ b/sql/hive/src/test/resources/golden/udf_month-1-16995573ac4f4a1b047ad6ee88699e48
@@ -4,4 +4,4 @@ Example:
    > SELECT minute('2009-07-30 12:58:59') FROM src LIMIT 1;
   58
   > SELECT minute('12:58:59') FROM src LIMIT 1;
-  58
\ No newline at end of file
+  58
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-2-690b2898f94ef32f01ded0ddb737a056 b/sql/hive/src/test/resources/golden/udf_named_struct-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge1-2-690b2898f94ef32f01ded0ddb737a056
rename to sql/hive/src/test/resources/golden/udf_named_struct-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-0-8f0ea83364b78634fbb3752c5a5c725 b/sql/hive/src/test/resources/golden/udf_named_struct-0-8f0ea83364b78634fbb3752c5a5c725
new file mode 100644
index 0000000000000..9bff96e7fa20e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-0-8f0ea83364b78634fbb3752c5a5c725
@@ -0,0 +1 @@
+named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-1-380c9638cc6ea8ea42f187bf0cedf350 b/sql/hive/src/test/resources/golden/udf_named_struct-1-380c9638cc6ea8ea42f187bf0cedf350
new file mode 100644
index 0000000000000..9bff96e7fa20e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-1-380c9638cc6ea8ea42f187bf0cedf350
@@ -0,0 +1 @@
+named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-1-8f0ea83364b78634fbb3752c5a5c725 b/sql/hive/src/test/resources/golden/udf_named_struct-1-8f0ea83364b78634fbb3752c5a5c725
new file mode 100644
index 0000000000000..9bff96e7fa20e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-1-8f0ea83364b78634fbb3752c5a5c725
@@ -0,0 +1 @@
+named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values
diff --git a/sql/hive/src/test/resources/golden/insert_into3-4-e9f4f47686fe97482b0a769a15481dd b/sql/hive/src/test/resources/golden/udf_named_struct-2-22a79ac608b1249306f82f4bdc669b17
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into3-4-e9f4f47686fe97482b0a769a15481dd
rename to sql/hive/src/test/resources/golden/udf_named_struct-2-22a79ac608b1249306f82f4bdc669b17
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-2-380c9638cc6ea8ea42f187bf0cedf350 b/sql/hive/src/test/resources/golden/udf_named_struct-2-380c9638cc6ea8ea42f187bf0cedf350
new file mode 100644
index 0000000000000..9bff96e7fa20e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-2-380c9638cc6ea8ea42f187bf0cedf350
@@ -0,0 +1 @@
+named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values
diff --git a/sql/hive/src/test/resources/golden/insert_into3-5-a83d22fa3c2fb32bd08305a1729bf7f8 b/sql/hive/src/test/resources/golden/udf_named_struct-3-c069e28293a12a813f8e881f776bae90
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into3-5-a83d22fa3c2fb32bd08305a1729bf7f8
rename to sql/hive/src/test/resources/golden/udf_named_struct-3-c069e28293a12a813f8e881f776bae90
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-3-d7e4a555934307155784904ff9df188b b/sql/hive/src/test/resources/golden/udf_named_struct-3-d7e4a555934307155784904ff9df188b
new file mode 100644
index 0000000000000..de25f51b5b56d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-3-d7e4a555934307155784904ff9df188b
@@ -0,0 +1 @@
+{"foo":1,"bar":2}	1
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-4-b499d4120e009f222f2fab160a9006d7 b/sql/hive/src/test/resources/golden/udf_named_struct-4-b499d4120e009f222f2fab160a9006d7
new file mode 100644
index 0000000000000..de25f51b5b56d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-4-b499d4120e009f222f2fab160a9006d7
@@ -0,0 +1 @@
+{"foo":1,"bar":2}	1
diff --git a/sql/hive/src/test/resources/golden/udf_negative-0-1b770ec6fb07bb771af2231a9723ec8 b/sql/hive/src/test/resources/golden/udf_negative-0-1b770ec6fb07bb771af2231a9723ec8
deleted file mode 100644
index 3142f7f94be9e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-0-1b770ec6fb07bb771af2231a9723ec8
+++ /dev/null
@@ -1 +0,0 @@
-negative a - Returns -a
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-3-f88c85ce3cbcdc607bce650e1ccff4ee b/sql/hive/src/test/resources/golden/udf_negative-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge1-3-f88c85ce3cbcdc607bce650e1ccff4ee
rename to sql/hive/src/test/resources/golden/udf_negative-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_negative-1-1b770ec6fb07bb771af2231a9723ec8 b/sql/hive/src/test/resources/golden/udf_negative-1-1b770ec6fb07bb771af2231a9723ec8
new file mode 100644
index 0000000000000..7ba1b77782f07
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_negative-1-1b770ec6fb07bb771af2231a9723ec8
@@ -0,0 +1 @@
+negative a - Returns -a
diff --git a/sql/hive/src/test/resources/golden/udf_negative-1-5f64266721b1ed31cfe84ee2f2377bdf b/sql/hive/src/test/resources/golden/udf_negative-1-5f64266721b1ed31cfe84ee2f2377bdf
deleted file mode 100644
index 3142f7f94be9e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-1-5f64266721b1ed31cfe84ee2f2377bdf
+++ /dev/null
@@ -1 +0,0 @@
-negative a - Returns -a
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-0-7646eca02448547eedf84a81bf42be89 b/sql/hive/src/test/resources/golden/udf_negative-10-1cd28efecc0d26f463221195f5e39956
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_float-0-7646eca02448547eedf84a81bf42be89
rename to sql/hive/src/test/resources/golden/udf_negative-10-1cd28efecc0d26f463221195f5e39956
diff --git a/sql/hive/src/test/resources/golden/udf_negative-2-5f64266721b1ed31cfe84ee2f2377bdf b/sql/hive/src/test/resources/golden/udf_negative-2-5f64266721b1ed31cfe84ee2f2377bdf
new file mode 100644
index 0000000000000..7ba1b77782f07
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_negative-2-5f64266721b1ed31cfe84ee2f2377bdf
@@ -0,0 +1 @@
+negative a - Returns -a
diff --git a/sql/hive/src/test/resources/golden/udf_negative-2-a6863d2c5fc8c3131fe70080a011392c b/sql/hive/src/test/resources/golden/udf_negative-2-a6863d2c5fc8c3131fe70080a011392c
deleted file mode 100644
index 0f4a4dce9dd3d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-2-a6863d2c5fc8c3131fe70080a011392c
+++ /dev/null
@@ -1 +0,0 @@
-a - b - Returns the difference a-b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_negative-3-a6863d2c5fc8c3131fe70080a011392c b/sql/hive/src/test/resources/golden/udf_negative-3-a6863d2c5fc8c3131fe70080a011392c
new file mode 100644
index 0000000000000..1ad8be1242f00
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_negative-3-a6863d2c5fc8c3131fe70080a011392c
@@ -0,0 +1 @@
+a - b - Returns the difference a-b
diff --git a/sql/hive/src/test/resources/golden/udf_negative-3-b90eec030fee9cbd177f9615b782d722 b/sql/hive/src/test/resources/golden/udf_negative-3-b90eec030fee9cbd177f9615b782d722
deleted file mode 100644
index 0f4a4dce9dd3d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-3-b90eec030fee9cbd177f9615b782d722
+++ /dev/null
@@ -1 +0,0 @@
-a - b - Returns the difference a-b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_negative-4-b90eec030fee9cbd177f9615b782d722 b/sql/hive/src/test/resources/golden/udf_negative-4-b90eec030fee9cbd177f9615b782d722
new file mode 100644
index 0000000000000..1ad8be1242f00
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_negative-4-b90eec030fee9cbd177f9615b782d722
@@ -0,0 +1 @@
+a - b - Returns the difference a-b
diff --git a/sql/hive/src/test/resources/golden/udf_negative-4-e27bf3f44ccb2e051877da8a4455f50c b/sql/hive/src/test/resources/golden/udf_negative-4-e27bf3f44ccb2e051877da8a4455f50c
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-4-e27bf3f44ccb2e051877da8a4455f50c
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-0-ebe447e8fb03de8844951250fe50320a b/sql/hive/src/test/resources/golden/udf_negative-5-771e76b0acd8ddb128781da7819d0e47
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-0-ebe447e8fb03de8844951250fe50320a
rename to sql/hive/src/test/resources/golden/udf_negative-5-771e76b0acd8ddb128781da7819d0e47
diff --git a/sql/hive/src/test/resources/golden/udf_negative-5-93d7dd808d4af59bda601faf249a9e b/sql/hive/src/test/resources/golden/udf_negative-5-93d7dd808d4af59bda601faf249a9e
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-5-93d7dd808d4af59bda601faf249a9e
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_negative-6-6758b00c5acc7aac320238accf299219 b/sql/hive/src/test/resources/golden/udf_negative-6-6758b00c5acc7aac320238accf299219
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-6-6758b00c5acc7aac320238accf299219
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-0-8a246f07cfa1a8e96a2bf0600f8bd95a b/sql/hive/src/test/resources/golden/udf_negative-6-f62c4a097c592871d896a7dc47c42f61
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-0-8a246f07cfa1a8e96a2bf0600f8bd95a
rename to sql/hive/src/test/resources/golden/udf_negative-6-f62c4a097c592871d896a7dc47c42f61
diff --git a/sql/hive/src/test/resources/golden/udf_negative-7-6d8783f0ed7a4b7058c95f90da3fb4b b/sql/hive/src/test/resources/golden/udf_negative-7-6d8783f0ed7a4b7058c95f90da3fb4b
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-7-6d8783f0ed7a4b7058c95f90da3fb4b
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-0-a032eb8f819689a374852c20336d5cc b/sql/hive/src/test/resources/golden/udf_negative-7-f838053f5ca5c8746dc299473dff0490
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_string-0-a032eb8f819689a374852c20336d5cc
rename to sql/hive/src/test/resources/golden/udf_negative-7-f838053f5ca5c8746dc299473dff0490
diff --git a/sql/hive/src/test/resources/golden/udf_negative-8-634af0478ed9ed44b851cd7ef834a489 b/sql/hive/src/test/resources/golden/udf_negative-8-634af0478ed9ed44b851cd7ef834a489
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-8-634af0478ed9ed44b851cd7ef834a489
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_negative-8-f4f23aa6f634913d194a69261af8f3f6 b/sql/hive/src/test/resources/golden/udf_negative-8-f4f23aa6f634913d194a69261af8f3f6
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_negative-8-f4f23aa6f634913d194a69261af8f3f6
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_negative-9-80b4c1fe00f7997838bba64a2cb5f8aa b/sql/hive/src/test/resources/golden/udf_negative-9-80b4c1fe00f7997838bba64a2cb5f8aa
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-9-80b4c1fe00f7997838bba64a2cb5f8aa
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_negative-9-f6a78fa3ea0f519d0e4abc5be7a960e5 b/sql/hive/src/test/resources/golden/udf_negative-9-f6a78fa3ea0f519d0e4abc5be7a960e5
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_negative-9-f6a78fa3ea0f519d0e4abc5be7a960e5
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_not-0-9ddee9ccebe4acbf903da1f42fe55dbc b/sql/hive/src/test/resources/golden/udf_not-0-9ddee9ccebe4acbf903da1f42fe55dbc
index 1ad92784d47ac..85b491240e711 100644
--- a/sql/hive/src/test/resources/golden/udf_not-0-9ddee9ccebe4acbf903da1f42fe55dbc
+++ b/sql/hive/src/test/resources/golden/udf_not-0-9ddee9ccebe4acbf903da1f42fe55dbc
@@ -1 +1 @@
-not a - Logical not
\ No newline at end of file
+not a - Logical not
diff --git a/sql/hive/src/test/resources/golden/udf_not-1-efefc8302b02224d20f4bb0f159a6911 b/sql/hive/src/test/resources/golden/udf_not-1-efefc8302b02224d20f4bb0f159a6911
index 6520d74f02566..0956574686405 100644
--- a/sql/hive/src/test/resources/golden/udf_not-1-efefc8302b02224d20f4bb0f159a6911
+++ b/sql/hive/src/test/resources/golden/udf_not-1-efefc8302b02224d20f4bb0f159a6911
@@ -1,2 +1,2 @@
 not a - Logical not
-Synonyms: !
\ No newline at end of file
+Synonyms: !
diff --git a/sql/hive/src/test/resources/golden/udf_not-2-7e63750d3027ced0e3452ad4eb0df117 b/sql/hive/src/test/resources/golden/udf_not-2-7e63750d3027ced0e3452ad4eb0df117
index c0e84e95d01c2..e6b0fa3967745 100644
--- a/sql/hive/src/test/resources/golden/udf_not-2-7e63750d3027ced0e3452ad4eb0df117
+++ b/sql/hive/src/test/resources/golden/udf_not-2-7e63750d3027ced0e3452ad4eb0df117
@@ -1 +1 @@
-! a - Logical not
\ No newline at end of file
+! a - Logical not
diff --git a/sql/hive/src/test/resources/golden/udf_not-3-aa0c674f9ce0feba86448448a211bd2a b/sql/hive/src/test/resources/golden/udf_not-3-aa0c674f9ce0feba86448448a211bd2a
index 05cb1f6ab03d7..00f749069b4a3 100644
--- a/sql/hive/src/test/resources/golden/udf_not-3-aa0c674f9ce0feba86448448a211bd2a
+++ b/sql/hive/src/test/resources/golden/udf_not-3-aa0c674f9ce0feba86448448a211bd2a
@@ -1,2 +1,2 @@
 ! a - Logical not
-Synonyms: not
\ No newline at end of file
+Synonyms: not
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-0-27c0d57f7c7c48ceb7bc671f7160254e b/sql/hive/src/test/resources/golden/udf_notequal-0-27c0d57f7c7c48ceb7bc671f7160254e
deleted file mode 100644
index 9e2108ff72b18..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-0-27c0d57f7c7c48ceb7bc671f7160254e
+++ /dev/null
@@ -1 +0,0 @@
-a <> b - Returns TRUE if a is not equal to b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-0-f94fdb0b79dcf73989e6fbce87355753 b/sql/hive/src/test/resources/golden/udf_notequal-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge2-0-f94fdb0b79dcf73989e6fbce87355753
rename to sql/hive/src/test/resources/golden/udf_notequal-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-1-27c0d57f7c7c48ceb7bc671f7160254e b/sql/hive/src/test/resources/golden/udf_notequal-1-27c0d57f7c7c48ceb7bc671f7160254e
new file mode 100644
index 0000000000000..5bcfd313a8ff6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-1-27c0d57f7c7c48ceb7bc671f7160254e
@@ -0,0 +1 @@
+a <> b - Returns TRUE if a is not equal to b
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-1-a7f0d1064f8f398ef504008015fddf9a b/sql/hive/src/test/resources/golden/udf_notequal-1-a7f0d1064f8f398ef504008015fddf9a
deleted file mode 100644
index 162f808ed5b67..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-1-a7f0d1064f8f398ef504008015fddf9a
+++ /dev/null
@@ -1,2 +0,0 @@
-a <> b - Returns TRUE if a is not equal to b
-Synonyms: !=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-2-7d815b1218c85e4cf69d6780cab17520 b/sql/hive/src/test/resources/golden/udf_notequal-2-7d815b1218c85e4cf69d6780cab17520
deleted file mode 100644
index 2dab01a3b2765..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-2-7d815b1218c85e4cf69d6780cab17520
+++ /dev/null
@@ -1 +0,0 @@
-a != b - Returns TRUE if a is not equal to b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-2-a7f0d1064f8f398ef504008015fddf9a b/sql/hive/src/test/resources/golden/udf_notequal-2-a7f0d1064f8f398ef504008015fddf9a
new file mode 100644
index 0000000000000..d8be357a858c4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-2-a7f0d1064f8f398ef504008015fddf9a
@@ -0,0 +1,2 @@
+a <> b - Returns TRUE if a is not equal to b
+Synonyms: !=
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-3-7d815b1218c85e4cf69d6780cab17520 b/sql/hive/src/test/resources/golden/udf_notequal-3-7d815b1218c85e4cf69d6780cab17520
new file mode 100644
index 0000000000000..aeffc394679cf
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-3-7d815b1218c85e4cf69d6780cab17520
@@ -0,0 +1 @@
+a != b - Returns TRUE if a is not equal to b
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-3-b72baeb22fad61bb31ce2d2e69375f57 b/sql/hive/src/test/resources/golden/udf_notequal-3-b72baeb22fad61bb31ce2d2e69375f57
deleted file mode 100644
index 1f73486ba2896..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-3-b72baeb22fad61bb31ce2d2e69375f57
+++ /dev/null
@@ -1,2 +0,0 @@
-a != b - Returns TRUE if a is not equal to b
-Synonyms: <>
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-4-b72baeb22fad61bb31ce2d2e69375f57 b/sql/hive/src/test/resources/golden/udf_notequal-4-b72baeb22fad61bb31ce2d2e69375f57
new file mode 100644
index 0000000000000..32e73741d4cbc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-4-b72baeb22fad61bb31ce2d2e69375f57
@@ -0,0 +1,2 @@
+a != b - Returns TRUE if a is not equal to b
+Synonyms: <>
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-4-eb04e5ee00967799c913e8a5b424a332 b/sql/hive/src/test/resources/golden/udf_notequal-4-eb04e5ee00967799c913e8a5b424a332
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-5-e361b9cf294c4aa25018b081a2c05e07 b/sql/hive/src/test/resources/golden/udf_notequal-5-e361b9cf294c4aa25018b081a2c05e07
deleted file mode 100644
index f82286d8bb069..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-5-e361b9cf294c4aa25018b081a2c05e07
+++ /dev/null
@@ -1,499 +0,0 @@
-238	val_238
-86	val_86
-311	val_311
-27	val_27
-165	val_165
-409	val_409
-255	val_255
-278	val_278
-98	val_98
-484	val_484
-265	val_265
-193	val_193
-401	val_401
-150	val_150
-273	val_273
-224	val_224
-369	val_369
-66	val_66
-128	val_128
-213	val_213
-146	val_146
-406	val_406
-429	val_429
-374	val_374
-152	val_152
-469	val_469
-145	val_145
-495	val_495
-37	val_37
-327	val_327
-281	val_281
-277	val_277
-209	val_209
-15	val_15
-82	val_82
-403	val_403
-166	val_166
-417	val_417
-430	val_430
-252	val_252
-292	val_292
-219	val_219
-287	val_287
-153	val_153
-193	val_193
-338	val_338
-446	val_446
-459	val_459
-394	val_394
-237	val_237
-482	val_482
-174	val_174
-413	val_413
-494	val_494
-207	val_207
-199	val_199
-466	val_466
-208	val_208
-174	val_174
-399	val_399
-396	val_396
-247	val_247
-417	val_417
-489	val_489
-162	val_162
-377	val_377
-397	val_397
-309	val_309
-365	val_365
-266	val_266
-439	val_439
-342	val_342
-367	val_367
-325	val_325
-167	val_167
-195	val_195
-475	val_475
-17	val_17
-113	val_113
-155	val_155
-203	val_203
-339	val_339
-0	val_0
-455	val_455
-128	val_128
-311	val_311
-316	val_316
-57	val_57
-205	val_205
-149	val_149
-438	val_438
-345	val_345
-129	val_129
-170	val_170
-20	val_20
-489	val_489
-157	val_157
-378	val_378
-221	val_221
-92	val_92
-111	val_111
-47	val_47
-72	val_72
-4	val_4
-280	val_280
-35	val_35
-427	val_427
-277	val_277
-208	val_208
-356	val_356
-399	val_399
-169	val_169
-382	val_382
-498	val_498
-125	val_125
-386	val_386
-437	val_437
-469	val_469
-192	val_192
-286	val_286
-187	val_187
-176	val_176
-54	val_54
-459	val_459
-51	val_51
-138	val_138
-103	val_103
-239	val_239
-213	val_213
-216	val_216
-430	val_430
-278	val_278
-176	val_176
-289	val_289
-221	val_221
-65	val_65
-318	val_318
-332	val_332
-311	val_311
-275	val_275
-137	val_137
-241	val_241
-83	val_83
-333	val_333
-180	val_180
-284	val_284
-12	val_12
-230	val_230
-181	val_181
-67	val_67
-260	val_260
-404	val_404
-384	val_384
-489	val_489
-353	val_353
-373	val_373
-272	val_272
-138	val_138
-217	val_217
-84	val_84
-348	val_348
-466	val_466
-58	val_58
-8	val_8
-411	val_411
-230	val_230
-208	val_208
-348	val_348
-24	val_24
-463	val_463
-431	val_431
-179	val_179
-172	val_172
-42	val_42
-129	val_129
-158	val_158
-119	val_119
-496	val_496
-0	val_0
-322	val_322
-197	val_197
-468	val_468
-393	val_393
-454	val_454
-100	val_100
-298	val_298
-199	val_199
-191	val_191
-418	val_418
-96	val_96
-26	val_26
-165	val_165
-327	val_327
-230	val_230
-205	val_205
-120	val_120
-131	val_131
-51	val_51
-404	val_404
-43	val_43
-436	val_436
-156	val_156
-469	val_469
-468	val_468
-308	val_308
-95	val_95
-196	val_196
-288	val_288
-481	val_481
-457	val_457
-98	val_98
-282	val_282
-197	val_197
-187	val_187
-318	val_318
-318	val_318
-409	val_409
-470	val_470
-137	val_137
-369	val_369
-316	val_316
-169	val_169
-413	val_413
-85	val_85
-77	val_77
-0	val_0
-490	val_490
-87	val_87
-364	val_364
-179	val_179
-118	val_118
-134	val_134
-395	val_395
-282	val_282
-138	val_138
-238	val_238
-419	val_419
-15	val_15
-118	val_118
-72	val_72
-90	val_90
-307	val_307
-19	val_19
-435	val_435
-10	val_10
-277	val_277
-273	val_273
-306	val_306
-224	val_224
-309	val_309
-389	val_389
-327	val_327
-242	val_242
-369	val_369
-392	val_392
-272	val_272
-331	val_331
-401	val_401
-242	val_242
-452	val_452
-177	val_177
-226	val_226
-5	val_5
-497	val_497
-402	val_402
-396	val_396
-317	val_317
-395	val_395
-58	val_58
-35	val_35
-336	val_336
-95	val_95
-11	val_11
-168	val_168
-34	val_34
-229	val_229
-233	val_233
-143	val_143
-472	val_472
-322	val_322
-498	val_498
-160	val_160
-195	val_195
-42	val_42
-321	val_321
-430	val_430
-119	val_119
-489	val_489
-458	val_458
-78	val_78
-76	val_76
-41	val_41
-223	val_223
-492	val_492
-149	val_149
-449	val_449
-218	val_218
-228	val_228
-138	val_138
-453	val_453
-30	val_30
-209	val_209
-64	val_64
-468	val_468
-76	val_76
-74	val_74
-342	val_342
-69	val_69
-230	val_230
-33	val_33
-368	val_368
-103	val_103
-296	val_296
-113	val_113
-216	val_216
-367	val_367
-344	val_344
-167	val_167
-274	val_274
-219	val_219
-239	val_239
-485	val_485
-116	val_116
-223	val_223
-256	val_256
-263	val_263
-70	val_70
-487	val_487
-480	val_480
-401	val_401
-288	val_288
-191	val_191
-5	val_5
-244	val_244
-438	val_438
-128	val_128
-467	val_467
-432	val_432
-202	val_202
-316	val_316
-229	val_229
-469	val_469
-463	val_463
-280	val_280
-2	val_2
-35	val_35
-283	val_283
-331	val_331
-235	val_235
-80	val_80
-44	val_44
-193	val_193
-321	val_321
-335	val_335
-104	val_104
-466	val_466
-366	val_366
-175	val_175
-403	val_403
-483	val_483
-53	val_53
-105	val_105
-257	val_257
-406	val_406
-409	val_409
-190	val_190
-406	val_406
-401	val_401
-114	val_114
-258	val_258
-90	val_90
-203	val_203
-262	val_262
-348	val_348
-424	val_424
-12	val_12
-396	val_396
-201	val_201
-217	val_217
-164	val_164
-431	val_431
-454	val_454
-478	val_478
-298	val_298
-125	val_125
-431	val_431
-164	val_164
-424	val_424
-187	val_187
-382	val_382
-5	val_5
-70	val_70
-397	val_397
-480	val_480
-291	val_291
-24	val_24
-351	val_351
-255	val_255
-104	val_104
-70	val_70
-163	val_163
-438	val_438
-119	val_119
-414	val_414
-200	val_200
-491	val_491
-237	val_237
-439	val_439
-360	val_360
-248	val_248
-479	val_479
-305	val_305
-417	val_417
-199	val_199
-444	val_444
-120	val_120
-429	val_429
-169	val_169
-443	val_443
-323	val_323
-325	val_325
-277	val_277
-230	val_230
-478	val_478
-178	val_178
-468	val_468
-310	val_310
-317	val_317
-333	val_333
-493	val_493
-460	val_460
-207	val_207
-249	val_249
-265	val_265
-480	val_480
-83	val_83
-136	val_136
-353	val_353
-172	val_172
-214	val_214
-462	val_462
-233	val_233
-406	val_406
-133	val_133
-175	val_175
-189	val_189
-454	val_454
-375	val_375
-401	val_401
-421	val_421
-407	val_407
-384	val_384
-256	val_256
-26	val_26
-134	val_134
-67	val_67
-384	val_384
-379	val_379
-18	val_18
-462	val_462
-492	val_492
-100	val_100
-298	val_298
-9	val_9
-341	val_341
-498	val_498
-146	val_146
-458	val_458
-362	val_362
-186	val_186
-285	val_285
-348	val_348
-167	val_167
-18	val_18
-273	val_273
-183	val_183
-281	val_281
-344	val_344
-97	val_97
-469	val_469
-315	val_315
-84	val_84
-28	val_28
-37	val_37
-448	val_448
-152	val_152
-348	val_348
-307	val_307
-194	val_194
-414	val_414
-477	val_477
-222	val_222
-126	val_126
-90	val_90
-169	val_169
-403	val_403
-400	val_400
-200	val_200
-97	val_97
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-8-e3b8d90def4a6ec1e3b5ee9cdaf31c01 b/sql/hive/src/test/resources/golden/udf_notequal-5-eb04e5ee00967799c913e8a5b424a332
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into3-8-e3b8d90def4a6ec1e3b5ee9cdaf31c01
rename to sql/hive/src/test/resources/golden/udf_notequal-5-eb04e5ee00967799c913e8a5b424a332
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-6-46a6514f2d7e6a097035ec1559df0096 b/sql/hive/src/test/resources/golden/udf_notequal-6-46a6514f2d7e6a097035ec1559df0096
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-6-e361b9cf294c4aa25018b081a2c05e07 b/sql/hive/src/test/resources/golden/udf_notequal-6-e361b9cf294c4aa25018b081a2c05e07
new file mode 100644
index 0000000000000..36244b327dc2f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-6-e361b9cf294c4aa25018b081a2c05e07
@@ -0,0 +1,499 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/insert_into3-9-19d1be183f7985f7066f86572abc82c8 b/sql/hive/src/test/resources/golden/udf_notequal-7-46a6514f2d7e6a097035ec1559df0096
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into3-9-19d1be183f7985f7066f86572abc82c8
rename to sql/hive/src/test/resources/golden/udf_notequal-7-46a6514f2d7e6a097035ec1559df0096
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-7-a71fea4e9514cda0da9542a7701613dd b/sql/hive/src/test/resources/golden/udf_notequal-7-a71fea4e9514cda0da9542a7701613dd
deleted file mode 100644
index f82286d8bb069..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-7-a71fea4e9514cda0da9542a7701613dd
+++ /dev/null
@@ -1,499 +0,0 @@
-238	val_238
-86	val_86
-311	val_311
-27	val_27
-165	val_165
-409	val_409
-255	val_255
-278	val_278
-98	val_98
-484	val_484
-265	val_265
-193	val_193
-401	val_401
-150	val_150
-273	val_273
-224	val_224
-369	val_369
-66	val_66
-128	val_128
-213	val_213
-146	val_146
-406	val_406
-429	val_429
-374	val_374
-152	val_152
-469	val_469
-145	val_145
-495	val_495
-37	val_37
-327	val_327
-281	val_281
-277	val_277
-209	val_209
-15	val_15
-82	val_82
-403	val_403
-166	val_166
-417	val_417
-430	val_430
-252	val_252
-292	val_292
-219	val_219
-287	val_287
-153	val_153
-193	val_193
-338	val_338
-446	val_446
-459	val_459
-394	val_394
-237	val_237
-482	val_482
-174	val_174
-413	val_413
-494	val_494
-207	val_207
-199	val_199
-466	val_466
-208	val_208
-174	val_174
-399	val_399
-396	val_396
-247	val_247
-417	val_417
-489	val_489
-162	val_162
-377	val_377
-397	val_397
-309	val_309
-365	val_365
-266	val_266
-439	val_439
-342	val_342
-367	val_367
-325	val_325
-167	val_167
-195	val_195
-475	val_475
-17	val_17
-113	val_113
-155	val_155
-203	val_203
-339	val_339
-0	val_0
-455	val_455
-128	val_128
-311	val_311
-316	val_316
-57	val_57
-205	val_205
-149	val_149
-438	val_438
-345	val_345
-129	val_129
-170	val_170
-20	val_20
-489	val_489
-157	val_157
-378	val_378
-221	val_221
-92	val_92
-111	val_111
-47	val_47
-72	val_72
-4	val_4
-280	val_280
-35	val_35
-427	val_427
-277	val_277
-208	val_208
-356	val_356
-399	val_399
-169	val_169
-382	val_382
-498	val_498
-125	val_125
-386	val_386
-437	val_437
-469	val_469
-192	val_192
-286	val_286
-187	val_187
-176	val_176
-54	val_54
-459	val_459
-51	val_51
-138	val_138
-103	val_103
-239	val_239
-213	val_213
-216	val_216
-430	val_430
-278	val_278
-176	val_176
-289	val_289
-221	val_221
-65	val_65
-318	val_318
-332	val_332
-311	val_311
-275	val_275
-137	val_137
-241	val_241
-83	val_83
-333	val_333
-180	val_180
-284	val_284
-12	val_12
-230	val_230
-181	val_181
-67	val_67
-260	val_260
-404	val_404
-384	val_384
-489	val_489
-353	val_353
-373	val_373
-272	val_272
-138	val_138
-217	val_217
-84	val_84
-348	val_348
-466	val_466
-58	val_58
-8	val_8
-411	val_411
-230	val_230
-208	val_208
-348	val_348
-24	val_24
-463	val_463
-431	val_431
-179	val_179
-172	val_172
-42	val_42
-129	val_129
-158	val_158
-119	val_119
-496	val_496
-0	val_0
-322	val_322
-197	val_197
-468	val_468
-393	val_393
-454	val_454
-100	val_100
-298	val_298
-199	val_199
-191	val_191
-418	val_418
-96	val_96
-26	val_26
-165	val_165
-327	val_327
-230	val_230
-205	val_205
-120	val_120
-131	val_131
-51	val_51
-404	val_404
-43	val_43
-436	val_436
-156	val_156
-469	val_469
-468	val_468
-308	val_308
-95	val_95
-196	val_196
-288	val_288
-481	val_481
-457	val_457
-98	val_98
-282	val_282
-197	val_197
-187	val_187
-318	val_318
-318	val_318
-409	val_409
-470	val_470
-137	val_137
-369	val_369
-316	val_316
-169	val_169
-413	val_413
-85	val_85
-77	val_77
-0	val_0
-490	val_490
-87	val_87
-364	val_364
-179	val_179
-118	val_118
-134	val_134
-395	val_395
-282	val_282
-138	val_138
-238	val_238
-419	val_419
-15	val_15
-118	val_118
-72	val_72
-90	val_90
-307	val_307
-19	val_19
-435	val_435
-10	val_10
-277	val_277
-273	val_273
-306	val_306
-224	val_224
-309	val_309
-389	val_389
-327	val_327
-242	val_242
-369	val_369
-392	val_392
-272	val_272
-331	val_331
-401	val_401
-242	val_242
-452	val_452
-177	val_177
-226	val_226
-5	val_5
-497	val_497
-402	val_402
-396	val_396
-317	val_317
-395	val_395
-58	val_58
-35	val_35
-336	val_336
-95	val_95
-11	val_11
-168	val_168
-34	val_34
-229	val_229
-233	val_233
-143	val_143
-472	val_472
-322	val_322
-498	val_498
-160	val_160
-195	val_195
-42	val_42
-321	val_321
-430	val_430
-119	val_119
-489	val_489
-458	val_458
-78	val_78
-76	val_76
-41	val_41
-223	val_223
-492	val_492
-149	val_149
-449	val_449
-218	val_218
-228	val_228
-138	val_138
-453	val_453
-30	val_30
-209	val_209
-64	val_64
-468	val_468
-76	val_76
-74	val_74
-342	val_342
-69	val_69
-230	val_230
-33	val_33
-368	val_368
-103	val_103
-296	val_296
-113	val_113
-216	val_216
-367	val_367
-344	val_344
-167	val_167
-274	val_274
-219	val_219
-239	val_239
-485	val_485
-116	val_116
-223	val_223
-256	val_256
-263	val_263
-70	val_70
-487	val_487
-480	val_480
-401	val_401
-288	val_288
-191	val_191
-5	val_5
-244	val_244
-438	val_438
-128	val_128
-467	val_467
-432	val_432
-202	val_202
-316	val_316
-229	val_229
-469	val_469
-463	val_463
-280	val_280
-2	val_2
-35	val_35
-283	val_283
-331	val_331
-235	val_235
-80	val_80
-44	val_44
-193	val_193
-321	val_321
-335	val_335
-104	val_104
-466	val_466
-366	val_366
-175	val_175
-403	val_403
-483	val_483
-53	val_53
-105	val_105
-257	val_257
-406	val_406
-409	val_409
-190	val_190
-406	val_406
-401	val_401
-114	val_114
-258	val_258
-90	val_90
-203	val_203
-262	val_262
-348	val_348
-424	val_424
-12	val_12
-396	val_396
-201	val_201
-217	val_217
-164	val_164
-431	val_431
-454	val_454
-478	val_478
-298	val_298
-125	val_125
-431	val_431
-164	val_164
-424	val_424
-187	val_187
-382	val_382
-5	val_5
-70	val_70
-397	val_397
-480	val_480
-291	val_291
-24	val_24
-351	val_351
-255	val_255
-104	val_104
-70	val_70
-163	val_163
-438	val_438
-119	val_119
-414	val_414
-200	val_200
-491	val_491
-237	val_237
-439	val_439
-360	val_360
-248	val_248
-479	val_479
-305	val_305
-417	val_417
-199	val_199
-444	val_444
-120	val_120
-429	val_429
-169	val_169
-443	val_443
-323	val_323
-325	val_325
-277	val_277
-230	val_230
-478	val_478
-178	val_178
-468	val_468
-310	val_310
-317	val_317
-333	val_333
-493	val_493
-460	val_460
-207	val_207
-249	val_249
-265	val_265
-480	val_480
-83	val_83
-136	val_136
-353	val_353
-172	val_172
-214	val_214
-462	val_462
-233	val_233
-406	val_406
-133	val_133
-175	val_175
-189	val_189
-454	val_454
-375	val_375
-401	val_401
-421	val_421
-407	val_407
-384	val_384
-256	val_256
-26	val_26
-134	val_134
-67	val_67
-384	val_384
-379	val_379
-18	val_18
-462	val_462
-492	val_492
-100	val_100
-298	val_298
-9	val_9
-341	val_341
-498	val_498
-146	val_146
-458	val_458
-362	val_362
-186	val_186
-285	val_285
-348	val_348
-167	val_167
-18	val_18
-273	val_273
-183	val_183
-281	val_281
-344	val_344
-97	val_97
-469	val_469
-315	val_315
-84	val_84
-28	val_28
-37	val_37
-448	val_448
-152	val_152
-348	val_348
-307	val_307
-194	val_194
-414	val_414
-477	val_477
-222	val_222
-126	val_126
-90	val_90
-169	val_169
-403	val_403
-400	val_400
-200	val_200
-97	val_97
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-8-a71fea4e9514cda0da9542a7701613dd b/sql/hive/src/test/resources/golden/udf_notequal-8-a71fea4e9514cda0da9542a7701613dd
new file mode 100644
index 0000000000000..36244b327dc2f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-8-a71fea4e9514cda0da9542a7701613dd
@@ -0,0 +1,499 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-1-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_notop-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge2-1-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_notop-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_notop-0-825431072651228a5a9de7f85a0498d6 b/sql/hive/src/test/resources/golden/udf_notop-0-825431072651228a5a9de7f85a0498d6
deleted file mode 100644
index a55e3339049e8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notop-0-825431072651228a5a9de7f85a0498d6
+++ /dev/null
@@ -1 +0,0 @@
-false	true	false	false	true	false	true	false	true	true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_notop-1-1ce21a9b4492969c1a97612b0ccc19f2 b/sql/hive/src/test/resources/golden/udf_notop-1-1ce21a9b4492969c1a97612b0ccc19f2
new file mode 100644
index 0000000000000..5ffd61b380318
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notop-1-1ce21a9b4492969c1a97612b0ccc19f2
@@ -0,0 +1 @@
+false	true	false	false	true	false	true	false	true	true
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-2-690b2898f94ef32f01ded0ddb737a056 b/sql/hive/src/test/resources/golden/udf_nvl-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge2-2-690b2898f94ef32f01ded0ddb737a056
rename to sql/hive/src/test/resources/golden/udf_nvl-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-0-df7383141df0bb71ccb57f3eef9775b4 b/sql/hive/src/test/resources/golden/udf_nvl-0-df7383141df0bb71ccb57f3eef9775b4
deleted file mode 100644
index 5ffcb851d56f9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_nvl-0-df7383141df0bb71ccb57f3eef9775b4
+++ /dev/null
@@ -1 +0,0 @@
-nvl(value,default_value) - Returns default value if value is null else returns value
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-1-6ec6c4b23c742fc604c9937a25b0b092 b/sql/hive/src/test/resources/golden/udf_nvl-1-6ec6c4b23c742fc604c9937a25b0b092
deleted file mode 100644
index 5afff220da695..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_nvl-1-6ec6c4b23c742fc604c9937a25b0b092
+++ /dev/null
@@ -1,4 +0,0 @@
-nvl(value,default_value) - Returns default value if value is null else returns value
-Example:
-  > SELECT nvl(null,'bla') FROM src LIMIT 1;
-  bla
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-1-df7383141df0bb71ccb57f3eef9775b4 b/sql/hive/src/test/resources/golden/udf_nvl-1-df7383141df0bb71ccb57f3eef9775b4
new file mode 100644
index 0000000000000..b043150b9c901
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_nvl-1-df7383141df0bb71ccb57f3eef9775b4
@@ -0,0 +1 @@
+nvl(value,default_value) - Returns default value if value is null else returns value
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-2-175ed7006e8907b65e0e5357f00a0def b/sql/hive/src/test/resources/golden/udf_nvl-2-175ed7006e8907b65e0e5357f00a0def
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-2-6ec6c4b23c742fc604c9937a25b0b092 b/sql/hive/src/test/resources/golden/udf_nvl-2-6ec6c4b23c742fc604c9937a25b0b092
new file mode 100644
index 0000000000000..4daa1bbffa621
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_nvl-2-6ec6c4b23c742fc604c9937a25b0b092
@@ -0,0 +1,4 @@
+nvl(value,default_value) - Returns default value if value is null else returns value
+Example:
+  > SELECT nvl(null,'bla') FROM src LIMIT 1;
+  bla
diff --git a/sql/hive/src/test/resources/golden/insert_into4-1-3d466d45197fcf1eff55d76ef0a29720 b/sql/hive/src/test/resources/golden/udf_nvl-3-47199a1c23cb1cc6827c601bb66513d3
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-1-3d466d45197fcf1eff55d76ef0a29720
rename to sql/hive/src/test/resources/golden/udf_nvl-3-47199a1c23cb1cc6827c601bb66513d3
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-3-7dd762d0da31b9bb0edbabaad1a4dce0 b/sql/hive/src/test/resources/golden/udf_nvl-3-7dd762d0da31b9bb0edbabaad1a4dce0
deleted file mode 100644
index 2087e17494459..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_nvl-3-7dd762d0da31b9bb0edbabaad1a4dce0
+++ /dev/null
@@ -1 +0,0 @@
-1	5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-4-656661e80deb75729fef313d5e2bd330 b/sql/hive/src/test/resources/golden/udf_nvl-4-656661e80deb75729fef313d5e2bd330
new file mode 100644
index 0000000000000..273bc7331072a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_nvl-4-656661e80deb75729fef313d5e2bd330
@@ -0,0 +1 @@
+1	5
diff --git a/sql/hive/src/test/resources/golden/udf_or-0-c404aa929eb0dd87269121f8f99ada70 b/sql/hive/src/test/resources/golden/udf_or-0-c404aa929eb0dd87269121f8f99ada70
index 9138f44ad2a41..f0e20ea40509c 100644
--- a/sql/hive/src/test/resources/golden/udf_or-0-c404aa929eb0dd87269121f8f99ada70
+++ b/sql/hive/src/test/resources/golden/udf_or-0-c404aa929eb0dd87269121f8f99ada70
@@ -1 +1 @@
-a or b - Logical or
\ No newline at end of file
+a or b - Logical or
diff --git a/sql/hive/src/test/resources/golden/udf_or-1-e21a84ffd26beabb958518ca5e46d6e0 b/sql/hive/src/test/resources/golden/udf_or-1-e21a84ffd26beabb958518ca5e46d6e0
index 9138f44ad2a41..f0e20ea40509c 100644
--- a/sql/hive/src/test/resources/golden/udf_or-1-e21a84ffd26beabb958518ca5e46d6e0
+++ b/sql/hive/src/test/resources/golden/udf_or-1-e21a84ffd26beabb958518ca5e46d6e0
@@ -1 +1 @@
-a or b - Logical or
\ No newline at end of file
+a or b - Logical or
diff --git a/sql/hive/src/test/resources/golden/udf_parse_url-0-7571c0423df7bf158ea9ca98142b26b8 b/sql/hive/src/test/resources/golden/udf_parse_url-0-7571c0423df7bf158ea9ca98142b26b8
index 0535b085e50c2..1ce92fd715822 100644
--- a/sql/hive/src/test/resources/golden/udf_parse_url-0-7571c0423df7bf158ea9ca98142b26b8
+++ b/sql/hive/src/test/resources/golden/udf_parse_url-0-7571c0423df7bf158ea9ca98142b26b8
@@ -1 +1 @@
-parse_url(url, partToExtract[, key]) - extracts a part from a URL
\ No newline at end of file
+parse_url(url, partToExtract[, key]) - extracts a part from a URL
diff --git a/sql/hive/src/test/resources/golden/udf_parse_url-1-67adfb10d4a35c4d031f26adde9f61ab b/sql/hive/src/test/resources/golden/udf_parse_url-1-67adfb10d4a35c4d031f26adde9f61ab
index 7178e07a07e48..bd448a4e4cade 100644
--- a/sql/hive/src/test/resources/golden/udf_parse_url-1-67adfb10d4a35c4d031f26adde9f61ab
+++ b/sql/hive/src/test/resources/golden/udf_parse_url-1-67adfb10d4a35c4d031f26adde9f61ab
@@ -7,4 +7,4 @@ Example:
   > SELECT parse_url('http://facebook.com/path/p1.php?query=1', 'QUERY') FROM src LIMIT 1;
   'query=1'
   > SELECT parse_url('http://facebook.com/path/p1.php?query=1', 'QUERY', 'query') FROM src LIMIT 1;
-  '1'
\ No newline at end of file
+  '1'
diff --git a/sql/hive/src/test/resources/golden/udf_parse_url-3-3a43b1f94ffb8082419bd7cc0b371ce2 b/sql/hive/src/test/resources/golden/udf_parse_url-3-3a43b1f94ffb8082419bd7cc0b371ce2
index f5483d4e3dafe..e3ece483b53fd 100644
--- a/sql/hive/src/test/resources/golden/udf_parse_url-3-3a43b1f94ffb8082419bd7cc0b371ce2
+++ b/sql/hive/src/test/resources/golden/udf_parse_url-3-3a43b1f94ffb8082419bd7cc0b371ce2
@@ -1 +1 @@
-facebook.com	/path1/p.php	k1=v1&k2=v2	Ref1	v2	v1	NULL	/path1/p.php?k1=v1&k2=v2	http	NULL	facebook.com
\ No newline at end of file
+facebook.com	/path1/p.php	k1=v1&k2=v2	Ref1	v2	v1	NULL	/path1/p.php?k1=v1&k2=v2	http	NULL	facebook.com
diff --git a/sql/hive/src/test/resources/golden/udf_percentile-0-8f99f54ff944f252e47d0af1f4ed1553 b/sql/hive/src/test/resources/golden/udf_percentile-0-8f99f54ff944f252e47d0af1f4ed1553
deleted file mode 100644
index 2025042f5d493..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_percentile-0-8f99f54ff944f252e47d0af1f4ed1553
+++ /dev/null
@@ -1 +0,0 @@
-percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array
diff --git a/sql/hive/src/test/resources/golden/udf_percentile-1-c0825a744cd14917d2c904d014449a4a b/sql/hive/src/test/resources/golden/udf_percentile-1-c0825a744cd14917d2c904d014449a4a
deleted file mode 100644
index 2025042f5d493..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_percentile-1-c0825a744cd14917d2c904d014449a4a
+++ /dev/null
@@ -1 +0,0 @@
-percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array
diff --git a/sql/hive/src/test/resources/golden/udf_percentile-2-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udf_percentile-2-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_percentile-2-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_percentile-3-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udf_percentile-3-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_percentile-3-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-0-ed67184beaf84c0542117c26651938e1 b/sql/hive/src/test/resources/golden/udf_pmod-0-ed67184beaf84c0542117c26651938e1
deleted file mode 100644
index 5d2fc352ee060..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_pmod-0-ed67184beaf84c0542117c26651938e1
+++ /dev/null
@@ -1 +0,0 @@
-a pmod b - Compute the positive modulo
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-1-90f75e01dcee85253a501d53b8562dae b/sql/hive/src/test/resources/golden/udf_pmod-1-90f75e01dcee85253a501d53b8562dae
deleted file mode 100644
index 5d2fc352ee060..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_pmod-1-90f75e01dcee85253a501d53b8562dae
+++ /dev/null
@@ -1 +0,0 @@
-a pmod b - Compute the positive modulo
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-10-b2c7b3ae343b0a21037fe089c1348bf2 b/sql/hive/src/test/resources/golden/udf_pmod-10-b2c7b3ae343b0a21037fe089c1348bf2
deleted file mode 100644
index 0b46af11c4516..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_pmod-10-b2c7b3ae343b0a21037fe089c1348bf2
+++ /dev/null
@@ -1 +0,0 @@
-6.89	51.7	18.09
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-3-26d9546f030281a29a50a3e8e5858234 b/sql/hive/src/test/resources/golden/udf_pmod-3-26d9546f030281a29a50a3e8e5858234
deleted file mode 100644
index 5eb0813b60eb6..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_pmod-3-26d9546f030281a29a50a3e8e5858234
+++ /dev/null
@@ -1 +0,0 @@
-8	51	15
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-4-7695df16d24a821224676e6bad3d66d1 b/sql/hive/src/test/resources/golden/udf_pmod-4-7695df16d24a821224676e6bad3d66d1
deleted file mode 100644
index e21e4b08e7a62..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_pmod-4-7695df16d24a821224676e6bad3d66d1
+++ /dev/null
@@ -1 +0,0 @@
-5	50	0
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-5-cf5311d51d44afb8d73f588e27d5e029 b/sql/hive/src/test/resources/golden/udf_pmod-5-cf5311d51d44afb8d73f588e27d5e029
deleted file mode 100644
index e0bc2a844fb46..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_pmod-5-cf5311d51d44afb8d73f588e27d5e029
+++ /dev/null
@@ -1 +0,0 @@
-8	51	16
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-6-3c09a8da2f5645e732c22a45d055125 b/sql/hive/src/test/resources/golden/udf_pmod-6-3c09a8da2f5645e732c22a45d055125
deleted file mode 100644
index e0bc2a844fb46..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_pmod-6-3c09a8da2f5645e732c22a45d055125
+++ /dev/null
@@ -1 +0,0 @@
-8	51	16
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-7-a5fcbb9c74f9ee98e65b74197b10f618 b/sql/hive/src/test/resources/golden/udf_pmod-7-a5fcbb9c74f9ee98e65b74197b10f618
deleted file mode 100644
index e0bc2a844fb46..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_pmod-7-a5fcbb9c74f9ee98e65b74197b10f618
+++ /dev/null
@@ -1 +0,0 @@
-8	51	16
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-8-f49d1f1fab1d9bc19be787efbe6036dd b/sql/hive/src/test/resources/golden/udf_pmod-8-f49d1f1fab1d9bc19be787efbe6036dd
deleted file mode 100644
index 48371142e9b5d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_pmod-8-f49d1f1fab1d9bc19be787efbe6036dd
+++ /dev/null
@@ -1 +0,0 @@
-6.8899984	51.700005	18.089996
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-9-e7280393102077442aa1d10eb69a6d57 b/sql/hive/src/test/resources/golden/udf_pmod-9-e7280393102077442aa1d10eb69a6d57
deleted file mode 100644
index ab842acd48b3c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_pmod-9-e7280393102077442aa1d10eb69a6d57
+++ /dev/null
@@ -1 +0,0 @@
-6.890000000000011	51.699999999999996	18.090000000000003
diff --git a/sql/hive/src/test/resources/golden/udf_positive-0-50ee5b92ad14e9f49d7ce6df7847c9b2 b/sql/hive/src/test/resources/golden/udf_positive-0-50ee5b92ad14e9f49d7ce6df7847c9b2
index 7c98729cc195c..6374b08a607ab 100644
--- a/sql/hive/src/test/resources/golden/udf_positive-0-50ee5b92ad14e9f49d7ce6df7847c9b2
+++ b/sql/hive/src/test/resources/golden/udf_positive-0-50ee5b92ad14e9f49d7ce6df7847c9b2
@@ -1 +1 @@
-positive a - Returns a
\ No newline at end of file
+positive a - Returns a
diff --git a/sql/hive/src/test/resources/golden/udf_positive-1-1b98434a841d2248ed985c5f6ba2cc3c b/sql/hive/src/test/resources/golden/udf_positive-1-1b98434a841d2248ed985c5f6ba2cc3c
index 7c98729cc195c..6374b08a607ab 100644
--- a/sql/hive/src/test/resources/golden/udf_positive-1-1b98434a841d2248ed985c5f6ba2cc3c
+++ b/sql/hive/src/test/resources/golden/udf_positive-1-1b98434a841d2248ed985c5f6ba2cc3c
@@ -1 +1 @@
-positive a - Returns a
\ No newline at end of file
+positive a - Returns a
diff --git a/sql/hive/src/test/resources/golden/udf_positive-2-610d421e590f035c24e29694a68b0d23 b/sql/hive/src/test/resources/golden/udf_positive-2-610d421e590f035c24e29694a68b0d23
index e8f11c444a808..9d794539b4ae0 100644
--- a/sql/hive/src/test/resources/golden/udf_positive-2-610d421e590f035c24e29694a68b0d23
+++ b/sql/hive/src/test/resources/golden/udf_positive-2-610d421e590f035c24e29694a68b0d23
@@ -1 +1 @@
-a + b - Returns a+b
\ No newline at end of file
+a + b - Returns a+b
diff --git a/sql/hive/src/test/resources/golden/udf_positive-3-400b238f4e6cdf7120be566b0ef079c5 b/sql/hive/src/test/resources/golden/udf_positive-3-400b238f4e6cdf7120be566b0ef079c5
index e8f11c444a808..9d794539b4ae0 100644
--- a/sql/hive/src/test/resources/golden/udf_positive-3-400b238f4e6cdf7120be566b0ef079c5
+++ b/sql/hive/src/test/resources/golden/udf_positive-3-400b238f4e6cdf7120be566b0ef079c5
@@ -1 +1 @@
-a + b - Returns a+b
\ No newline at end of file
+a + b - Returns a+b
diff --git a/sql/hive/src/test/resources/golden/udf_pow-0-c7f5178951dd45dc2a41c16729314d81 b/sql/hive/src/test/resources/golden/udf_pow-0-c7f5178951dd45dc2a41c16729314d81
index 43f197e360c82..67377963d2aa1 100644
--- a/sql/hive/src/test/resources/golden/udf_pow-0-c7f5178951dd45dc2a41c16729314d81
+++ b/sql/hive/src/test/resources/golden/udf_pow-0-c7f5178951dd45dc2a41c16729314d81
@@ -1 +1 @@
-pow(x1, x2) - raise x1 to the power of x2
\ No newline at end of file
+pow(x1, x2) - raise x1 to the power of x2
diff --git a/sql/hive/src/test/resources/golden/udf_pow-1-3c22c000c35144135aedbc7052f10803 b/sql/hive/src/test/resources/golden/udf_pow-1-3c22c000c35144135aedbc7052f10803
index afc2e4462a1a8..ded9d10a595ff 100644
--- a/sql/hive/src/test/resources/golden/udf_pow-1-3c22c000c35144135aedbc7052f10803
+++ b/sql/hive/src/test/resources/golden/udf_pow-1-3c22c000c35144135aedbc7052f10803
@@ -2,4 +2,4 @@ pow(x1, x2) - raise x1 to the power of x2
 Synonyms: power
 Example:
   > SELECT pow(2, 3) FROM src LIMIT 1;
-  8
\ No newline at end of file
+  8
diff --git a/sql/hive/src/test/resources/golden/udf_power-0-57001d802c281743322d28bbc520cd4 b/sql/hive/src/test/resources/golden/udf_power-0-57001d802c281743322d28bbc520cd4
index 5e3a6a8f31fd3..90e23c3255b77 100644
--- a/sql/hive/src/test/resources/golden/udf_power-0-57001d802c281743322d28bbc520cd4
+++ b/sql/hive/src/test/resources/golden/udf_power-0-57001d802c281743322d28bbc520cd4
@@ -1 +1 @@
-power(x1, x2) - raise x1 to the power of x2
\ No newline at end of file
+power(x1, x2) - raise x1 to the power of x2
diff --git a/sql/hive/src/test/resources/golden/udf_power-1-ebd0398b2cb03f382a16382ddac13426 b/sql/hive/src/test/resources/golden/udf_power-1-ebd0398b2cb03f382a16382ddac13426
index c3414e29768e3..4890e2e989d34 100644
--- a/sql/hive/src/test/resources/golden/udf_power-1-ebd0398b2cb03f382a16382ddac13426
+++ b/sql/hive/src/test/resources/golden/udf_power-1-ebd0398b2cb03f382a16382ddac13426
@@ -2,4 +2,4 @@ power(x1, x2) - raise x1 to the power of x2
 Synonyms: pow
 Example:
   > SELECT power(2, 3) FROM src LIMIT 1;
-  8
\ No newline at end of file
+  8
diff --git a/sql/hive/src/test/resources/golden/udf_printf-0-e86d559aeb84a4cc017a103182c22bfb b/sql/hive/src/test/resources/golden/udf_printf-0-e86d559aeb84a4cc017a103182c22bfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_printf-1-19c61fce27310ab2590062d643f7b26e b/sql/hive/src/test/resources/golden/udf_printf-1-19c61fce27310ab2590062d643f7b26e
deleted file mode 100644
index 1635ff88dd768..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_printf-1-19c61fce27310ab2590062d643f7b26e
+++ /dev/null
@@ -1 +0,0 @@
-printf(String format, Obj... args) - function that can format strings according to printf-style format strings
diff --git a/sql/hive/src/test/resources/golden/udf_printf-2-25aa6950cae2bb781c336378f63ceaee b/sql/hive/src/test/resources/golden/udf_printf-2-25aa6950cae2bb781c336378f63ceaee
deleted file mode 100644
index 62440ee68e145..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_printf-2-25aa6950cae2bb781c336378f63ceaee
+++ /dev/null
@@ -1,4 +0,0 @@
-printf(String format, Obj... args) - function that can format strings according to printf-style format strings
-Example:
-  > SELECT printf("Hello World %d %s", 100, "days")FROM src LIMIT 1;
-  "Hello World 100 days"
diff --git a/sql/hive/src/test/resources/golden/udf_printf-3-9c568a0473888396bd46507e8b330c36 b/sql/hive/src/test/resources/golden/udf_printf-3-9c568a0473888396bd46507e8b330c36
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_printf-4-91728e546b450bdcbb05ef30f13be475 b/sql/hive/src/test/resources/golden/udf_printf-4-91728e546b450bdcbb05ef30f13be475
deleted file mode 100644
index 39cb945991403..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_printf-4-91728e546b450bdcbb05ef30f13be475
+++ /dev/null
@@ -1 +0,0 @@
-Hello World 100 days
diff --git a/sql/hive/src/test/resources/golden/udf_printf-5-3141a0421605b091ee5a9e99d7d605fb b/sql/hive/src/test/resources/golden/udf_printf-5-3141a0421605b091ee5a9e99d7d605fb
deleted file mode 100644
index 04bf5e552a576..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_printf-5-3141a0421605b091ee5a9e99d7d605fb
+++ /dev/null
@@ -1 +0,0 @@
-All Type Test: false, A, 15000, 1.234000e+01, +27183.2401, 2300.41, 32, corret, 0x1.002p8
diff --git a/sql/hive/src/test/resources/golden/udf_printf-6-ec37b73012f3cbbbc0422744b0db8294 b/sql/hive/src/test/resources/golden/udf_printf-6-ec37b73012f3cbbbc0422744b0db8294
deleted file mode 100644
index 2e9f7509968a3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_printf-6-ec37b73012f3cbbbc0422744b0db8294
+++ /dev/null
@@ -1 +0,0 @@
-Color red, String Null: null, number1 123456, number2 00089, Integer Null: null, hex 0xff, float  3.14 Double Null: null
diff --git a/sql/hive/src/test/resources/golden/udf_printf-7-5769f3a5b3300ca1d8b861229e976126 b/sql/hive/src/test/resources/golden/udf_printf-7-5769f3a5b3300ca1d8b861229e976126
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-3-778b659dba30ece782a956d87b1a31eb b/sql/hive/src/test/resources/golden/udf_radians-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge2-3-778b659dba30ece782a956d87b1a31eb
rename to sql/hive/src/test/resources/golden/udf_radians-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_radians-0-f899daf93b02ca681e0230a792c65e86 b/sql/hive/src/test/resources/golden/udf_radians-0-f899daf93b02ca681e0230a792c65e86
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-11-bdbfaf06bc4e323863db65fd29882eaa b/sql/hive/src/test/resources/golden/udf_radians-1-58b73fc96927d447d1225f021eaa378
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-11-bdbfaf06bc4e323863db65fd29882eaa
rename to sql/hive/src/test/resources/golden/udf_radians-1-58b73fc96927d447d1225f021eaa378
diff --git a/sql/hive/src/test/resources/golden/udf_radians-1-eaaa62dd3935ff3152323dfafd136e93 b/sql/hive/src/test/resources/golden/udf_radians-1-eaaa62dd3935ff3152323dfafd136e93
deleted file mode 100644
index 116b6bc461ed0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-1-eaaa62dd3935ff3152323dfafd136e93
+++ /dev/null
@@ -1 +0,0 @@
-1.000000357564167
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-10-9f1b8ddf9be2e5f9b9b7ff4f00cfb66 b/sql/hive/src/test/resources/golden/udf_radians-10-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
new file mode 100644
index 0000000000000..b00bf83e61d82
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-10-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
@@ -0,0 +1,5 @@
+radians(x) - Converts degrees to radians
+Example:
+  > SELECT radians(90) FROM src LIMIT 1;
+  1.5707963267949mo
+
diff --git a/sql/hive/src/test/resources/golden/udf_radians-2-bcaca433f704f71cf9d44c238a33c7b3 b/sql/hive/src/test/resources/golden/udf_radians-2-bcaca433f704f71cf9d44c238a33c7b3
deleted file mode 100644
index aaf3b31fef488..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-2-bcaca433f704f71cf9d44c238a33c7b3
+++ /dev/null
@@ -1 +0,0 @@
-2.4999991485811655
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-2-cb8462f25c18b7405c41a50e52476d04 b/sql/hive/src/test/resources/golden/udf_radians-2-cb8462f25c18b7405c41a50e52476d04
new file mode 100644
index 0000000000000..6b0996864478b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-2-cb8462f25c18b7405c41a50e52476d04
@@ -0,0 +1 @@
+1.000000357564167
diff --git a/sql/hive/src/test/resources/golden/udf_radians-3-65e16c7b13de48a5d36793d0c7d35e14 b/sql/hive/src/test/resources/golden/udf_radians-3-65e16c7b13de48a5d36793d0c7d35e14
deleted file mode 100644
index 73ad88be4ef3d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-3-65e16c7b13de48a5d36793d0c7d35e14
+++ /dev/null
@@ -1 +0,0 @@
-radians(x) - Converts degrees to radians
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-3-bd00297cb26f599913b14a635e768be3 b/sql/hive/src/test/resources/golden/udf_radians-3-bd00297cb26f599913b14a635e768be3
new file mode 100644
index 0000000000000..1f204866982ad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-3-bd00297cb26f599913b14a635e768be3
@@ -0,0 +1 @@
+2.4999991485811655
diff --git a/sql/hive/src/test/resources/golden/udf_radians-4-65e16c7b13de48a5d36793d0c7d35e14 b/sql/hive/src/test/resources/golden/udf_radians-4-65e16c7b13de48a5d36793d0c7d35e14
new file mode 100644
index 0000000000000..99e71b16f18ad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-4-65e16c7b13de48a5d36793d0c7d35e14
@@ -0,0 +1 @@
+radians(x) - Converts degrees to radians
diff --git a/sql/hive/src/test/resources/golden/udf_radians-4-9f1b8ddf9be2e5f9b9b7ff4f00cfb66 b/sql/hive/src/test/resources/golden/udf_radians-4-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
deleted file mode 100644
index e0237c0058f55..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-4-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
+++ /dev/null
@@ -1,4 +0,0 @@
-radians(x) - Converts degrees to radians
-Example:
-  > SELECT radians(90) FROM src LIMIT 1;
-  1.5707963267949mo
diff --git a/sql/hive/src/test/resources/golden/udf_radians-5-9f1b8ddf9be2e5f9b9b7ff4f00cfb66 b/sql/hive/src/test/resources/golden/udf_radians-5-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
new file mode 100644
index 0000000000000..b00bf83e61d82
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-5-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
@@ -0,0 +1,5 @@
+radians(x) - Converts degrees to radians
+Example:
+  > SELECT radians(90) FROM src LIMIT 1;
+  1.5707963267949mo
+
diff --git a/sql/hive/src/test/resources/golden/udf_radians-5-f899daf93b02ca681e0230a792c65e86 b/sql/hive/src/test/resources/golden/udf_radians-5-f899daf93b02ca681e0230a792c65e86
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-12-49b12993ebc1647b9dca8b9af19deca9 b/sql/hive/src/test/resources/golden/udf_radians-6-70c9e7199b5898e2c3a4943ec58da113
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-12-49b12993ebc1647b9dca8b9af19deca9
rename to sql/hive/src/test/resources/golden/udf_radians-6-70c9e7199b5898e2c3a4943ec58da113
diff --git a/sql/hive/src/test/resources/golden/udf_radians-6-eaaa62dd3935ff3152323dfafd136e93 b/sql/hive/src/test/resources/golden/udf_radians-6-eaaa62dd3935ff3152323dfafd136e93
deleted file mode 100644
index 116b6bc461ed0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-6-eaaa62dd3935ff3152323dfafd136e93
+++ /dev/null
@@ -1 +0,0 @@
-1.000000357564167
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-7-bcaca433f704f71cf9d44c238a33c7b3 b/sql/hive/src/test/resources/golden/udf_radians-7-bcaca433f704f71cf9d44c238a33c7b3
deleted file mode 100644
index aaf3b31fef488..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-7-bcaca433f704f71cf9d44c238a33c7b3
+++ /dev/null
@@ -1 +0,0 @@
-2.4999991485811655
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-7-cb8462f25c18b7405c41a50e52476d04 b/sql/hive/src/test/resources/golden/udf_radians-7-cb8462f25c18b7405c41a50e52476d04
new file mode 100644
index 0000000000000..6b0996864478b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-7-cb8462f25c18b7405c41a50e52476d04
@@ -0,0 +1 @@
+1.000000357564167
diff --git a/sql/hive/src/test/resources/golden/udf_radians-8-65e16c7b13de48a5d36793d0c7d35e14 b/sql/hive/src/test/resources/golden/udf_radians-8-65e16c7b13de48a5d36793d0c7d35e14
deleted file mode 100644
index 73ad88be4ef3d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-8-65e16c7b13de48a5d36793d0c7d35e14
+++ /dev/null
@@ -1 +0,0 @@
-radians(x) - Converts degrees to radians
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-8-bd00297cb26f599913b14a635e768be3 b/sql/hive/src/test/resources/golden/udf_radians-8-bd00297cb26f599913b14a635e768be3
new file mode 100644
index 0000000000000..1f204866982ad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-8-bd00297cb26f599913b14a635e768be3
@@ -0,0 +1 @@
+2.4999991485811655
diff --git a/sql/hive/src/test/resources/golden/udf_radians-9-65e16c7b13de48a5d36793d0c7d35e14 b/sql/hive/src/test/resources/golden/udf_radians-9-65e16c7b13de48a5d36793d0c7d35e14
new file mode 100644
index 0000000000000..99e71b16f18ad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-9-65e16c7b13de48a5d36793d0c7d35e14
@@ -0,0 +1 @@
+radians(x) - Converts degrees to radians
diff --git a/sql/hive/src/test/resources/golden/udf_radians-9-9f1b8ddf9be2e5f9b9b7ff4f00cfb66 b/sql/hive/src/test/resources/golden/udf_radians-9-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
deleted file mode 100644
index e0237c0058f55..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-9-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
+++ /dev/null
@@ -1,4 +0,0 @@
-radians(x) - Converts degrees to radians
-Example:
-  > SELECT radians(90) FROM src LIMIT 1;
-  1.5707963267949mo
diff --git a/sql/hive/src/test/resources/golden/udf_rand-0-f6d991f4e0dfea517dfc3dcaf1ff6af2 b/sql/hive/src/test/resources/golden/udf_rand-0-f6d991f4e0dfea517dfc3dcaf1ff6af2
index d19c5b50fe5c4..a4de35428cc8f 100644
--- a/sql/hive/src/test/resources/golden/udf_rand-0-f6d991f4e0dfea517dfc3dcaf1ff6af2
+++ b/sql/hive/src/test/resources/golden/udf_rand-0-f6d991f4e0dfea517dfc3dcaf1ff6af2
@@ -1 +1 @@
-rand([seed]) - Returns a pseudorandom number between 0 and 1
\ No newline at end of file
+rand([seed]) - Returns a pseudorandom number between 0 and 1
diff --git a/sql/hive/src/test/resources/golden/udf_rand-1-c6229b8f2ca3001663229cfb8ee4763e b/sql/hive/src/test/resources/golden/udf_rand-1-c6229b8f2ca3001663229cfb8ee4763e
index d19c5b50fe5c4..a4de35428cc8f 100644
--- a/sql/hive/src/test/resources/golden/udf_rand-1-c6229b8f2ca3001663229cfb8ee4763e
+++ b/sql/hive/src/test/resources/golden/udf_rand-1-c6229b8f2ca3001663229cfb8ee4763e
@@ -1 +1 @@
-rand([seed]) - Returns a pseudorandom number between 0 and 1
\ No newline at end of file
+rand([seed]) - Returns a pseudorandom number between 0 and 1
diff --git a/sql/hive/src/test/resources/golden/udf_reflect-0-904138e2a1f831c308b7f0aacc859ae1 b/sql/hive/src/test/resources/golden/udf_reflect-0-904138e2a1f831c308b7f0aacc859ae1
deleted file mode 100644
index 1d7658151cd62..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_reflect-0-904138e2a1f831c308b7f0aacc859ae1
+++ /dev/null
@@ -1 +0,0 @@
-reflect(class,method[,arg1[,arg2..]]) calls method with reflection
diff --git a/sql/hive/src/test/resources/golden/udf_reflect-1-21ec7caa253c7f95b7cf60191140e2ee b/sql/hive/src/test/resources/golden/udf_reflect-1-21ec7caa253c7f95b7cf60191140e2ee
deleted file mode 100644
index ddf986c01e5b1..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_reflect-1-21ec7caa253c7f95b7cf60191140e2ee
+++ /dev/null
@@ -1,4 +0,0 @@
-reflect(class,method[,arg1[,arg2..]]) calls method with reflection
-Synonyms: java_method
-Use this UDF to call Java methods by matching the argument signature
-
diff --git a/sql/hive/src/test/resources/golden/udf_reflect-2-b868357466bab2f04685c2dc73604cf0 b/sql/hive/src/test/resources/golden/udf_reflect-2-b868357466bab2f04685c2dc73604cf0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_reflect2-0-7bec330c7bc6f71cbaf9bf1883d1b184 b/sql/hive/src/test/resources/golden/udf_reflect2-0-7bec330c7bc6f71cbaf9bf1883d1b184
deleted file mode 100644
index cd35e5b290db5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_reflect2-0-7bec330c7bc6f71cbaf9bf1883d1b184
+++ /dev/null
@@ -1 +0,0 @@
-reflect2(arg0,method[,arg1[,arg2..]]) calls method of arg0 with reflection
diff --git a/sql/hive/src/test/resources/golden/udf_reflect2-1-c5a05379f482215a5a484bed0299bf19 b/sql/hive/src/test/resources/golden/udf_reflect2-1-c5a05379f482215a5a484bed0299bf19
deleted file mode 100644
index 48ef97292ab62..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_reflect2-1-c5a05379f482215a5a484bed0299bf19
+++ /dev/null
@@ -1,3 +0,0 @@
-reflect2(arg0,method[,arg1[,arg2..]]) calls method of arg0 with reflection
-Use this UDF to call Java methods by matching the argument signature
-
diff --git a/sql/hive/src/test/resources/golden/udf_reflect2-2-effc057c78c00b0af26a4ac0f5f116ca b/sql/hive/src/test/resources/golden/udf_reflect2-2-effc057c78c00b0af26a4ac0f5f116ca
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-0-f94fdb0b79dcf73989e6fbce87355753 b/sql/hive/src/test/resources/golden/udf_regexp-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge3-0-f94fdb0b79dcf73989e6fbce87355753
rename to sql/hive/src/test/resources/golden/udf_regexp-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_regexp-0-19917611f74aedc0922560f7f2595948 b/sql/hive/src/test/resources/golden/udf_regexp-1-19917611f74aedc0922560f7f2595948
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_regexp-0-19917611f74aedc0922560f7f2595948
rename to sql/hive/src/test/resources/golden/udf_regexp-1-19917611f74aedc0922560f7f2595948
diff --git a/sql/hive/src/test/resources/golden/udf_regexp-1-f7f0527cd47612d7f256edd5f8963800 b/sql/hive/src/test/resources/golden/udf_regexp-2-f7f0527cd47612d7f256edd5f8963800
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_regexp-1-f7f0527cd47612d7f256edd5f8963800
rename to sql/hive/src/test/resources/golden/udf_regexp-2-f7f0527cd47612d7f256edd5f8963800
diff --git a/sql/hive/src/test/resources/golden/udf_regexp-2-5e1798db3ba058e7b202d8a98f228b11 b/sql/hive/src/test/resources/golden/udf_regexp-3-59aff54bae544ee620141e4e629f167a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_regexp-2-5e1798db3ba058e7b202d8a98f228b11
rename to sql/hive/src/test/resources/golden/udf_regexp-3-59aff54bae544ee620141e4e629f167a
diff --git a/sql/hive/src/test/resources/golden/udf_regexp_extract-0-e251e1a4b1e191814f26c54b14ab6cd9 b/sql/hive/src/test/resources/golden/udf_regexp_extract-0-e251e1a4b1e191814f26c54b14ab6cd9
index 429057caf71f0..6f4b3cea94c03 100644
--- a/sql/hive/src/test/resources/golden/udf_regexp_extract-0-e251e1a4b1e191814f26c54b14ab6cd9
+++ b/sql/hive/src/test/resources/golden/udf_regexp_extract-0-e251e1a4b1e191814f26c54b14ab6cd9
@@ -1 +1 @@
-regexp_extract(str, regexp[, idx]) - extracts a group that matches regexp
\ No newline at end of file
+regexp_extract(str, regexp[, idx]) - extracts a group that matches regexp
diff --git a/sql/hive/src/test/resources/golden/udf_regexp_extract-1-8add879ab5904bd805412ef8723276fb b/sql/hive/src/test/resources/golden/udf_regexp_extract-1-8add879ab5904bd805412ef8723276fb
index 30e237490811f..fc9d4aa686db9 100644
--- a/sql/hive/src/test/resources/golden/udf_regexp_extract-1-8add879ab5904bd805412ef8723276fb
+++ b/sql/hive/src/test/resources/golden/udf_regexp_extract-1-8add879ab5904bd805412ef8723276fb
@@ -1,4 +1,4 @@
 regexp_extract(str, regexp[, idx]) - extracts a group that matches regexp
 Example:
   > SELECT regexp_extract('100-200', '(\d+)-(\d+)', 1) FROM src LIMIT 1;
-  '100'
\ No newline at end of file
+  '100'
diff --git a/sql/hive/src/test/resources/golden/udf_regexp_replace-0-3ef9cc6da51dd1f5c6b71cf8a464ca0e b/sql/hive/src/test/resources/golden/udf_regexp_replace-0-3ef9cc6da51dd1f5c6b71cf8a464ca0e
index 1a38701d68875..193093b0edcaa 100644
--- a/sql/hive/src/test/resources/golden/udf_regexp_replace-0-3ef9cc6da51dd1f5c6b71cf8a464ca0e
+++ b/sql/hive/src/test/resources/golden/udf_regexp_replace-0-3ef9cc6da51dd1f5c6b71cf8a464ca0e
@@ -1 +1 @@
-regexp_replace(str, regexp, rep) - replace all substrings of str that match regexp with rep
\ No newline at end of file
+regexp_replace(str, regexp, rep) - replace all substrings of str that match regexp with rep
diff --git a/sql/hive/src/test/resources/golden/udf_regexp_replace-1-e79b45aa220d3c4c3b4523ac9c897bc b/sql/hive/src/test/resources/golden/udf_regexp_replace-1-e79b45aa220d3c4c3b4523ac9c897bc
index bebfabaf637fb..6e3577aba5da2 100644
--- a/sql/hive/src/test/resources/golden/udf_regexp_replace-1-e79b45aa220d3c4c3b4523ac9c897bc
+++ b/sql/hive/src/test/resources/golden/udf_regexp_replace-1-e79b45aa220d3c4c3b4523ac9c897bc
@@ -1,4 +1,4 @@
 regexp_replace(str, regexp, rep) - replace all substrings of str that match regexp with rep
 Example:
   > SELECT regexp_replace('100-200', '(\d+)', 'num') FROM src LIMIT 1;
-  'num-num'
\ No newline at end of file
+  'num-num'
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-1-690b2898f94ef32f01ded0ddb737a056 b/sql/hive/src/test/resources/golden/udf_repeat-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge3-1-690b2898f94ef32f01ded0ddb737a056
rename to sql/hive/src/test/resources/golden/udf_repeat-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-0-fdeae3e88f27ef148281d69ac8c4b23b b/sql/hive/src/test/resources/golden/udf_repeat-0-fdeae3e88f27ef148281d69ac8c4b23b
deleted file mode 100644
index 694c367436f3c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_repeat-0-fdeae3e88f27ef148281d69ac8c4b23b
+++ /dev/null
@@ -1 +0,0 @@
-repeat(str, n) - repeat str n times 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-1-836be47190989d8975a09a545ecbfe0b b/sql/hive/src/test/resources/golden/udf_repeat-1-836be47190989d8975a09a545ecbfe0b
deleted file mode 100644
index 5df19ba1c2cbf..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_repeat-1-836be47190989d8975a09a545ecbfe0b
+++ /dev/null
@@ -1,4 +0,0 @@
-repeat(str, n) - repeat str n times 
-Example:
-  > SELECT repeat('123', 2) FROM src LIMIT 1;
-  '123123'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-1-fdeae3e88f27ef148281d69ac8c4b23b b/sql/hive/src/test/resources/golden/udf_repeat-1-fdeae3e88f27ef148281d69ac8c4b23b
new file mode 100644
index 0000000000000..23a6a30503468
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_repeat-1-fdeae3e88f27ef148281d69ac8c4b23b
@@ -0,0 +1 @@
+repeat(str, n) - repeat str n times 
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-2-836be47190989d8975a09a545ecbfe0b b/sql/hive/src/test/resources/golden/udf_repeat-2-836be47190989d8975a09a545ecbfe0b
new file mode 100644
index 0000000000000..4f4b491b2807c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_repeat-2-836be47190989d8975a09a545ecbfe0b
@@ -0,0 +1,4 @@
+repeat(str, n) - repeat str n times 
+Example:
+  > SELECT repeat('123', 2) FROM src LIMIT 1;
+  '123123'
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-2-e1dbea7182ec1653e1123b5b67a6d20a b/sql/hive/src/test/resources/golden/udf_repeat-2-e1dbea7182ec1653e1123b5b67a6d20a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-14-3d466d45197fcf1eff55d76ef0a29720 b/sql/hive/src/test/resources/golden/udf_repeat-3-3a3180b4d7c59ee477ce4bebf8e6adec
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-14-3d466d45197fcf1eff55d76ef0a29720
rename to sql/hive/src/test/resources/golden/udf_repeat-3-3a3180b4d7c59ee477ce4bebf8e6adec
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-3-ba9dd02f59c74d63d60d60b6231a0365 b/sql/hive/src/test/resources/golden/udf_repeat-3-ba9dd02f59c74d63d60d60b6231a0365
deleted file mode 100644
index 45425cf087c09..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_repeat-3-ba9dd02f59c74d63d60d60b6231a0365
+++ /dev/null
@@ -1 +0,0 @@
-FacebookFacebookFacebook			
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-4-64c5fce0c5ad4c26680a842aa740dc57 b/sql/hive/src/test/resources/golden/udf_repeat-4-64c5fce0c5ad4c26680a842aa740dc57
new file mode 100644
index 0000000000000..5a355c1c58fc7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_repeat-4-64c5fce0c5ad4c26680a842aa740dc57
@@ -0,0 +1 @@
+FacebookFacebookFacebook			
diff --git a/sql/hive/src/test/resources/golden/udf_rlike-0-6ec6ef55ac041208627454e16b501d38 b/sql/hive/src/test/resources/golden/udf_rlike-0-6ec6ef55ac041208627454e16b501d38
index fd9ac3081a1cc..068aeb36f6477 100644
--- a/sql/hive/src/test/resources/golden/udf_rlike-0-6ec6ef55ac041208627454e16b501d38
+++ b/sql/hive/src/test/resources/golden/udf_rlike-0-6ec6ef55ac041208627454e16b501d38
@@ -1 +1 @@
-str rlike regexp - Returns true if str matches regexp and false otherwise
\ No newline at end of file
+str rlike regexp - Returns true if str matches regexp and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_rlike-1-829611a596e0c87431006f7247d25eca b/sql/hive/src/test/resources/golden/udf_rlike-1-829611a596e0c87431006f7247d25eca
index 43b06945caa54..ab346793b9d3d 100644
--- a/sql/hive/src/test/resources/golden/udf_rlike-1-829611a596e0c87431006f7247d25eca
+++ b/sql/hive/src/test/resources/golden/udf_rlike-1-829611a596e0c87431006f7247d25eca
@@ -2,4 +2,4 @@ str rlike regexp - Returns true if str matches regexp and false otherwise
 Synonyms: regexp
 Example:
   > SELECT 'fb' rlike '.*' FROM src LIMIT 1;
-  true
\ No newline at end of file
+  true
diff --git a/sql/hive/src/test/resources/golden/udf_round-0-10b53ca1f15fd7879365926f86512d15 b/sql/hive/src/test/resources/golden/udf_round-0-10b53ca1f15fd7879365926f86512d15
deleted file mode 100644
index e4586b2e73a93..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-0-10b53ca1f15fd7879365926f86512d15
+++ /dev/null
@@ -1 +0,0 @@
-round(x[, d]) - round x to d decimal places
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-2-778b659dba30ece782a956d87b1a31eb b/sql/hive/src/test/resources/golden/udf_round-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge3-2-778b659dba30ece782a956d87b1a31eb
rename to sql/hive/src/test/resources/golden/udf_round-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_round-1-10b53ca1f15fd7879365926f86512d15 b/sql/hive/src/test/resources/golden/udf_round-1-10b53ca1f15fd7879365926f86512d15
new file mode 100644
index 0000000000000..49fdc0a774e70
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-1-10b53ca1f15fd7879365926f86512d15
@@ -0,0 +1 @@
+round(x[, d]) - round x to d decimal places
diff --git a/sql/hive/src/test/resources/golden/udf_round-1-2367bcc43510dedc80bdb6707e434da8 b/sql/hive/src/test/resources/golden/udf_round-1-2367bcc43510dedc80bdb6707e434da8
deleted file mode 100644
index c0d5b480e9751..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-1-2367bcc43510dedc80bdb6707e434da8
+++ /dev/null
@@ -1,4 +0,0 @@
-round(x[, d]) - round x to d decimal places
-Example:
-  > SELECT round(12.3456, 1) FROM src LIMIT 1;
-  12.3'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round-2-2367bcc43510dedc80bdb6707e434da8 b/sql/hive/src/test/resources/golden/udf_round-2-2367bcc43510dedc80bdb6707e434da8
new file mode 100644
index 0000000000000..862adeae821ff
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-2-2367bcc43510dedc80bdb6707e434da8
@@ -0,0 +1,4 @@
+round(x[, d]) - round x to d decimal places
+Example:
+  > SELECT round(12.3456, 1) FROM src LIMIT 1;
+  12.3'
diff --git a/sql/hive/src/test/resources/golden/udf_round-2-9ffa2b573360cd879338de46d91ab374 b/sql/hive/src/test/resources/golden/udf_round-2-9ffa2b573360cd879338de46d91ab374
deleted file mode 100644
index 0924e3869076d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-2-9ffa2b573360cd879338de46d91ab374
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	Infinity	NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round-3-42a221909d3f7ed51bed01a65670461c b/sql/hive/src/test/resources/golden/udf_round-3-42a221909d3f7ed51bed01a65670461c
deleted file mode 100644
index 3b9c30929a240..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-3-42a221909d3f7ed51bed01a65670461c
+++ /dev/null
@@ -1 +0,0 @@
-55555	55555.0	55555.0	55555.0	55555.0	55560.0	55600.0	56000.0	60000.0	100000.0	0.0	0.0	0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round-3-fa4d11da8e1eba258ed191ed5f1447de b/sql/hive/src/test/resources/golden/udf_round-3-fa4d11da8e1eba258ed191ed5f1447de
new file mode 100644
index 0000000000000..fc6e4224259d8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-3-fa4d11da8e1eba258ed191ed5f1447de
@@ -0,0 +1 @@
+NULL	NULL	NULL	NULL	NaN
diff --git a/sql/hive/src/test/resources/golden/udf_round-4-b87ccaa1e0a87c558b56d59a8a074396 b/sql/hive/src/test/resources/golden/udf_round-4-b87ccaa1e0a87c558b56d59a8a074396
new file mode 100644
index 0000000000000..f8d833cc0880e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-4-b87ccaa1e0a87c558b56d59a8a074396
@@ -0,0 +1 @@
+55555	55555	55555	55555	55555	55560	55600	56000	60000	100000	0	0	0
diff --git a/sql/hive/src/test/resources/golden/udf_round-4-dc80ec5189a4c6ce07688df3debe7de4 b/sql/hive/src/test/resources/golden/udf_round-4-dc80ec5189a4c6ce07688df3debe7de4
deleted file mode 100644
index 1f243f6cbc6db..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-4-dc80ec5189a4c6ce07688df3debe7de4
+++ /dev/null
@@ -1 +0,0 @@
-125.0	125.0	125.3	125.32	125.315	125.315	130.0	100.0	0.0	0.0	-125.0	-125.0	-125.3	-125.32	-125.315	-125.315	-130.0	-100.0	0.0	0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round-5-441d0075081ae87579c959d714c4922d b/sql/hive/src/test/resources/golden/udf_round-5-441d0075081ae87579c959d714c4922d
new file mode 100644
index 0000000000000..389ab6417f19d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-5-441d0075081ae87579c959d714c4922d
@@ -0,0 +1 @@
+125.0	125.0	125.3	125.32	125.315	125.315	130.0	100.0	0.0	0.0	-125.0	-125.0	-125.3	-125.32	-125.315	-125.315	-130.0	-100.0	0.0	0.0
diff --git a/sql/hive/src/test/resources/golden/udf_round-5-a2414e7845ffafc61f75465508a1850a b/sql/hive/src/test/resources/golden/udf_round-5-a2414e7845ffafc61f75465508a1850a
deleted file mode 100644
index 918404cec8047..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-5-a2414e7845ffafc61f75465508a1850a
+++ /dev/null
@@ -1 +0,0 @@
-0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	3.0	3.1	3.14	3.142	3.1416	3.14159	3.141593	3.1415927	3.14159265	3.141592654	3.1415926536	3.14159265359	3.14159265359	3.1415926535898	3.1415926535898	3.14159265358979	3.141592653589793	3.141592653589793
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round-6-4658ec3bc034b43e0477bf2474939449 b/sql/hive/src/test/resources/golden/udf_round-6-4658ec3bc034b43e0477bf2474939449
new file mode 100644
index 0000000000000..3b083b3e26c4a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-6-4658ec3bc034b43e0477bf2474939449
@@ -0,0 +1 @@
+0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	3.0	3.1	3.14	3.142	3.1416	3.14159	3.141593	3.1415927	3.14159265	3.141592654	3.1415926536	3.14159265359	3.14159265359	3.1415926535898	3.1415926535898	3.14159265358979	3.141592653589793	3.141592653589793
diff --git a/sql/hive/src/test/resources/golden/udf_round-6-48439efa5c34e7589ab5003ed916f12b b/sql/hive/src/test/resources/golden/udf_round-6-48439efa5c34e7589ab5003ed916f12b
deleted file mode 100644
index af105563af144..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-6-48439efa5c34e7589ab5003ed916f12b
+++ /dev/null
@@ -1 +0,0 @@
-1809242.315111134	-1809242.315111134
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round-7-74ff5a4862c80bd8fd84bede1a0320d b/sql/hive/src/test/resources/golden/udf_round-7-74ff5a4862c80bd8fd84bede1a0320d
new file mode 100644
index 0000000000000..c3496bf5b6d1b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-7-74ff5a4862c80bd8fd84bede1a0320d
@@ -0,0 +1 @@
+1809242.315111134	-1809242.315111134	1809242.315111134	-1809242.315111134
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-0-91afaf77ef4061fa20005a3c87dfef32 b/sql/hive/src/test/resources/golden/udf_round_2-0-91afaf77ef4061fa20005a3c87dfef32
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-1-5e44354af73865d03e9088c0232f16ce b/sql/hive/src/test/resources/golden/udf_round_2-1-5e44354af73865d03e9088c0232f16ce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-2-83f91f60dcb2036b61b8b21f18281298 b/sql/hive/src/test/resources/golden/udf_round_2-2-83f91f60dcb2036b61b8b21f18281298
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_2-2-83f91f60dcb2036b61b8b21f18281298
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-3-c62cf7b74a91f605cf669e34a9315f93 b/sql/hive/src/test/resources/golden/udf_round_2-3-c62cf7b74a91f605cf669e34a9315f93
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_2-3-c62cf7b74a91f605cf669e34a9315f93
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-4-797fa8ed05cb953327d0f6306b30d6c3 b/sql/hive/src/test/resources/golden/udf_round_2-4-797fa8ed05cb953327d0f6306b30d6c3
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_2-4-797fa8ed05cb953327d0f6306b30d6c3
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-5-e41b862db8cc76c1fe004cf006fad50b b/sql/hive/src/test/resources/golden/udf_round_2-5-e41b862db8cc76c1fe004cf006fad50b
deleted file mode 100644
index f40e633f703c9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_2-5-e41b862db8cc76c1fe004cf006fad50b
+++ /dev/null
@@ -1 +0,0 @@
-Infinity	Infinity	Infinity	Infinity
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-0-f94fdb0b79dcf73989e6fbce87355753 b/sql/hive/src/test/resources/golden/udf_round_3-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge4-0-f94fdb0b79dcf73989e6fbce87355753
rename to sql/hive/src/test/resources/golden/udf_round_3-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-0-8415af605db167315e4d9d3c69d89e6c b/sql/hive/src/test/resources/golden/udf_round_3-0-8415af605db167315e4d9d3c69d89e6c
deleted file mode 100644
index 3714de0db18dc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_3-0-8415af605db167315e4d9d3c69d89e6c
+++ /dev/null
@@ -1 +0,0 @@
--128	127	0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-1-15a7f123f596e28e6f238063ba4e3d6d b/sql/hive/src/test/resources/golden/udf_round_3-1-15a7f123f596e28e6f238063ba4e3d6d
new file mode 100644
index 0000000000000..0a104d81ef51e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round_3-1-15a7f123f596e28e6f238063ba4e3d6d
@@ -0,0 +1 @@
+-128	127	0
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-1-3ecc9de58a7ea3af5b2315ca119403d0 b/sql/hive/src/test/resources/golden/udf_round_3-1-3ecc9de58a7ea3af5b2315ca119403d0
deleted file mode 100644
index a9265d7b8a1b7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_3-1-3ecc9de58a7ea3af5b2315ca119403d0
+++ /dev/null
@@ -1 +0,0 @@
--32768	32767	-129	128
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-2-5e7ada966f726ceb748f98c20eab4f10 b/sql/hive/src/test/resources/golden/udf_round_3-2-5e7ada966f726ceb748f98c20eab4f10
deleted file mode 100644
index 2d25f54073df7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_3-2-5e7ada966f726ceb748f98c20eab4f10
+++ /dev/null
@@ -1 +0,0 @@
--2147483648	2147483647	-32769	32768
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-2-a03bf4e99027d4814a32c84d89d42cca b/sql/hive/src/test/resources/golden/udf_round_3-2-a03bf4e99027d4814a32c84d89d42cca
new file mode 100644
index 0000000000000..972dee75fae8a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round_3-2-a03bf4e99027d4814a32c84d89d42cca
@@ -0,0 +1 @@
+-32768	32767	-129	128
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-3-e3c5b35d67ef3de2800a1836718e8ac9 b/sql/hive/src/test/resources/golden/udf_round_3-3-e3c5b35d67ef3de2800a1836718e8ac9
new file mode 100644
index 0000000000000..50928a309cf3a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round_3-3-e3c5b35d67ef3de2800a1836718e8ac9
@@ -0,0 +1 @@
+-2147483648	2147483647	-32769	32768
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-3-e94ab3326df006c7203ead86752f16a9 b/sql/hive/src/test/resources/golden/udf_round_3-3-e94ab3326df006c7203ead86752f16a9
deleted file mode 100644
index 41a2624c6cfeb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_3-3-e94ab3326df006c7203ead86752f16a9
+++ /dev/null
@@ -1 +0,0 @@
--9223372036854775808	9223372036854775807	-2147483649	2147483648
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-4-8449fbdabbb4b1e6beab89be0af498f2 b/sql/hive/src/test/resources/golden/udf_round_3-4-8449fbdabbb4b1e6beab89be0af498f2
new file mode 100644
index 0000000000000..c9e55e0ec0b95
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round_3-4-8449fbdabbb4b1e6beab89be0af498f2
@@ -0,0 +1 @@
+-9223372036854775808	9223372036854775807	-2147483649	2147483648
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-4-c3227c37fbbddd79a423bd6abe67a6d7 b/sql/hive/src/test/resources/golden/udf_round_3-4-c3227c37fbbddd79a423bd6abe67a6d7
deleted file mode 100644
index 98d3f53dfc442..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_3-4-c3227c37fbbddd79a423bd6abe67a6d7
+++ /dev/null
@@ -1 +0,0 @@
-126.0	127.0	32766.0	32767.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-5-3844531c8cff115d6a33636db0a26ad b/sql/hive/src/test/resources/golden/udf_round_3-5-3844531c8cff115d6a33636db0a26ad
new file mode 100644
index 0000000000000..3898a62cae1b5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round_3-5-3844531c8cff115d6a33636db0a26ad
@@ -0,0 +1 @@
+126.0	127.0	32766.0	32767.0
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-1-690b2898f94ef32f01ded0ddb737a056 b/sql/hive/src/test/resources/golden/udf_rpad-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge4-1-690b2898f94ef32f01ded0ddb737a056
rename to sql/hive/src/test/resources/golden/udf_rpad-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-0-56de896c5fe8b40b22a9ed55ed79889c b/sql/hive/src/test/resources/golden/udf_rpad-0-56de896c5fe8b40b22a9ed55ed79889c
deleted file mode 100644
index 7cb2d71d4b80d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_rpad-0-56de896c5fe8b40b22a9ed55ed79889c
+++ /dev/null
@@ -1 +0,0 @@
-rpad(str, len, pad) - Returns str, right-padded with pad to a length of len
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-1-48d17e5d5d6188300d048f987fab2ca0 b/sql/hive/src/test/resources/golden/udf_rpad-1-48d17e5d5d6188300d048f987fab2ca0
deleted file mode 100644
index 2b198b8dc96c4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_rpad-1-48d17e5d5d6188300d048f987fab2ca0
+++ /dev/null
@@ -1,6 +0,0 @@
-rpad(str, len, pad) - Returns str, right-padded with pad to a length of len
-If str is longer than len, the return value is shortened to len characters.
-Example:
-  > SELECT rpad('hi', 5, '??') FROM src LIMIT 1;
-  'hi???'  > SELECT rpad('hi', 1, '??') FROM src LIMIT 1;
-  'h'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-1-56de896c5fe8b40b22a9ed55ed79889c b/sql/hive/src/test/resources/golden/udf_rpad-1-56de896c5fe8b40b22a9ed55ed79889c
new file mode 100644
index 0000000000000..f451030a3a142
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_rpad-1-56de896c5fe8b40b22a9ed55ed79889c
@@ -0,0 +1 @@
+rpad(str, len, pad) - Returns str, right-padded with pad to a length of len
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-2-48835c7f49d78f8a6da2a90e11514b3 b/sql/hive/src/test/resources/golden/udf_rpad-2-48835c7f49d78f8a6da2a90e11514b3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-2-48d17e5d5d6188300d048f987fab2ca0 b/sql/hive/src/test/resources/golden/udf_rpad-2-48d17e5d5d6188300d048f987fab2ca0
new file mode 100644
index 0000000000000..67dcf2427362d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_rpad-2-48d17e5d5d6188300d048f987fab2ca0
@@ -0,0 +1,6 @@
+rpad(str, len, pad) - Returns str, right-padded with pad to a length of len
+If str is longer than len, the return value is shortened to len characters.
+Example:
+  > SELECT rpad('hi', 5, '??') FROM src LIMIT 1;
+  'hi???'  > SELECT rpad('hi', 1, '??') FROM src LIMIT 1;
+  'h'
diff --git a/sql/hive/src/test/resources/golden/insert_into4-15-f6ad1a49459fb6cd232fccc4a6062b25 b/sql/hive/src/test/resources/golden/udf_rpad-3-66acb969c28a8e376782ccd0d442b450
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-15-f6ad1a49459fb6cd232fccc4a6062b25
rename to sql/hive/src/test/resources/golden/udf_rpad-3-66acb969c28a8e376782ccd0d442b450
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-3-dcad885650ea575ab1ac5cfeb76cf871 b/sql/hive/src/test/resources/golden/udf_rpad-3-dcad885650ea575ab1ac5cfeb76cf871
deleted file mode 100644
index b2645e365c8a9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_rpad-3-dcad885650ea575ab1ac5cfeb76cf871
+++ /dev/null
@@ -1 +0,0 @@
-h	hi...	hi1231
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-4-299dee5a72aad2a2738d7841a89bb71b b/sql/hive/src/test/resources/golden/udf_rpad-4-299dee5a72aad2a2738d7841a89bb71b
new file mode 100644
index 0000000000000..0d73ca82abf89
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_rpad-4-299dee5a72aad2a2738d7841a89bb71b
@@ -0,0 +1 @@
+h	hi...	hi1231
diff --git a/sql/hive/src/test/resources/golden/udf_rtrim-0-7acca21c725995febdf2a9c1fdf0535a b/sql/hive/src/test/resources/golden/udf_rtrim-0-7acca21c725995febdf2a9c1fdf0535a
index 94d6aeed8f533..d0d2416d7ee90 100644
--- a/sql/hive/src/test/resources/golden/udf_rtrim-0-7acca21c725995febdf2a9c1fdf0535a
+++ b/sql/hive/src/test/resources/golden/udf_rtrim-0-7acca21c725995febdf2a9c1fdf0535a
@@ -1 +1 @@
-rtrim(str) - Removes the trailing space characters from str 
\ No newline at end of file
+rtrim(str) - Removes the trailing space characters from str 
diff --git a/sql/hive/src/test/resources/golden/udf_rtrim-1-66d61255134c09d37cbfedd757ae47fd b/sql/hive/src/test/resources/golden/udf_rtrim-1-66d61255134c09d37cbfedd757ae47fd
index 957e608a7c732..697cdcc8198b3 100644
--- a/sql/hive/src/test/resources/golden/udf_rtrim-1-66d61255134c09d37cbfedd757ae47fd
+++ b/sql/hive/src/test/resources/golden/udf_rtrim-1-66d61255134c09d37cbfedd757ae47fd
@@ -1,4 +1,4 @@
 rtrim(str) - Removes the trailing space characters from str 
 Example:
   > SELECT rtrim('facebook   ') FROM src LIMIT 1;
-  'facebook'
\ No newline at end of file
+  'facebook'
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-2-778b659dba30ece782a956d87b1a31eb b/sql/hive/src/test/resources/golden/udf_second-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge4-2-778b659dba30ece782a956d87b1a31eb
rename to sql/hive/src/test/resources/golden/udf_second-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_second-0-e004a6f20da3fa6db868ec847b217ff2 b/sql/hive/src/test/resources/golden/udf_second-0-e004a6f20da3fa6db868ec847b217ff2
deleted file mode 100644
index 577c90254cb5a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_second-0-e004a6f20da3fa6db868ec847b217ff2
+++ /dev/null
@@ -1 +0,0 @@
-second(date) - Returns the second of date
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_second-1-3525f55f4f13253c42b3abaa53d77888 b/sql/hive/src/test/resources/golden/udf_second-1-3525f55f4f13253c42b3abaa53d77888
deleted file mode 100644
index 4b48294e5b9ad..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_second-1-3525f55f4f13253c42b3abaa53d77888
+++ /dev/null
@@ -1,7 +0,0 @@
-second(date) - Returns the second of date
-date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.
-Example:
-   > SELECT second('2009-07-30 12:58:59') FROM src LIMIT 1;
-  59
-  > SELECT second('12:58:59') FROM src LIMIT 1;
-  59
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_second-1-e004a6f20da3fa6db868ec847b217ff2 b/sql/hive/src/test/resources/golden/udf_second-1-e004a6f20da3fa6db868ec847b217ff2
new file mode 100644
index 0000000000000..ad2b24b8eb11c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_second-1-e004a6f20da3fa6db868ec847b217ff2
@@ -0,0 +1 @@
+second(date) - Returns the second of date
diff --git a/sql/hive/src/test/resources/golden/udf_second-2-3525f55f4f13253c42b3abaa53d77888 b/sql/hive/src/test/resources/golden/udf_second-2-3525f55f4f13253c42b3abaa53d77888
new file mode 100644
index 0000000000000..9dc38c3e79129
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_second-2-3525f55f4f13253c42b3abaa53d77888
@@ -0,0 +1,7 @@
+second(date) - Returns the second of date
+date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.
+Example:
+   > SELECT second('2009-07-30 12:58:59') FROM src LIMIT 1;
+  59
+  > SELECT second('12:58:59') FROM src LIMIT 1;
+  59
diff --git a/sql/hive/src/test/resources/golden/udf_second-2-d678372e3837a16be245d2e33482f17f b/sql/hive/src/test/resources/golden/udf_second-2-d678372e3837a16be245d2e33482f17f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_second-3-2496e4d3c64ca028184431c2930d82cf b/sql/hive/src/test/resources/golden/udf_second-3-2496e4d3c64ca028184431c2930d82cf
deleted file mode 100644
index 4d5ef5cf4a699..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_second-3-2496e4d3c64ca028184431c2930d82cf
+++ /dev/null
@@ -1 +0,0 @@
-15	15	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into4-2-f6ad1a49459fb6cd232fccc4a6062b25 b/sql/hive/src/test/resources/golden/udf_second-3-d678372e3837a16be245d2e33482f17f
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-2-f6ad1a49459fb6cd232fccc4a6062b25
rename to sql/hive/src/test/resources/golden/udf_second-3-d678372e3837a16be245d2e33482f17f
diff --git a/sql/hive/src/test/resources/golden/udf_second-4-2496e4d3c64ca028184431c2930d82cf b/sql/hive/src/test/resources/golden/udf_second-4-2496e4d3c64ca028184431c2930d82cf
new file mode 100644
index 0000000000000..3ddfab9c754f4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_second-4-2496e4d3c64ca028184431c2930d82cf
@@ -0,0 +1 @@
+15	15	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-0-ec0dad44fa033691a731f6e4c6b5cf7f b/sql/hive/src/test/resources/golden/udf_sentences-0-ec0dad44fa033691a731f6e4c6b5cf7f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-1-2dc07f4f0f0cb20d08c424e067ed8f69 b/sql/hive/src/test/resources/golden/udf_sentences-1-2dc07f4f0f0cb20d08c424e067ed8f69
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-10-ce188a53f69129c14cbf378d2c3f6630 b/sql/hive/src/test/resources/golden/udf_sentences-10-ce188a53f69129c14cbf378d2c3f6630
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-11-3c8672506e23434187caf4e0064a8a80 b/sql/hive/src/test/resources/golden/udf_sentences-11-3c8672506e23434187caf4e0064a8a80
deleted file mode 100644
index fea4f860c8465..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sentences-11-3c8672506e23434187caf4e0064a8a80
+++ /dev/null
@@ -1,31 +0,0 @@
-41626672616765
-446174656E
-48697665
-496E74657270756E6B74696F6E
-4D756C7469706C65
-53C3A4747A65
-554446
-5665727765636873656C756E67
-5765726B7A657567
-616C73
-61757367657A656963686E65746573
-646965
-646965
-6469657365
-646F6368
-65696E
-66756E6B74696F6E69657274
-66C3BC72
-676562696C646574656E
-696D6D6572
-697374
-697374
-6D61736368696E656C6C65
-6E6F6368
-7363686C65636874
-756E64
-756E64
-7669656C6C6569636874
-7669656C7365697469676572
-766F6E
-C39C6265727365747A756E67
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-12-d55c04a079ca97402509868f24921685 b/sql/hive/src/test/resources/golden/udf_sentences-12-d55c04a079ca97402509868f24921685
deleted file mode 100644
index c49ca6bb5a1b0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sentences-12-d55c04a079ca97402509868f24921685
+++ /dev/null
@@ -1 +0,0 @@
-[["Hive","is","an","excellent","tool","for","data","querying","and","perhaps","more","versatile","than","machine","translation"],["Multiple","ill-formed","sentences","confounding","punctuation","and","yet","this","UDF","still","works"]]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-2-1f218343f90e698fb9ed81c4f51d3d14 b/sql/hive/src/test/resources/golden/udf_sentences-2-1f218343f90e698fb9ed81c4f51d3d14
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-3-ce188a53f69129c14cbf378d2c3f6630 b/sql/hive/src/test/resources/golden/udf_sentences-3-ce188a53f69129c14cbf378d2c3f6630
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-4-3c8672506e23434187caf4e0064a8a80 b/sql/hive/src/test/resources/golden/udf_sentences-4-3c8672506e23434187caf4e0064a8a80
deleted file mode 100644
index b798628e56686..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sentences-4-3c8672506e23434187caf4e0064a8a80
+++ /dev/null
@@ -1,50 +0,0 @@
-48697665
-48697665
-554446
-6175746F6D617469717565
-6175746F6D617469717565
-6365
-636F6E667573696F6E
-6465
-6465
-646573
-646F6E6EC3A96573
-646F6E6EC3A96573
-656E636F7265
-657374
-657374
-6574
-6574
-6574
-657863656C6C656E74
-657863656C6C656E74
-666F6E6374696F6E6E65
-666F726DC3A96573
-6C61
-6C61
-6C61
-6C6573
-6C6573
-6D616C
-6D756C7469706C6573
-6F7574696C
-6F7574696C
-706575742DC3AA747265
-706575742DC3AA747265
-70687261736573
-706C7573
-706C7573
-706F6C7976616C656E74
-706F6C7976616C656E74
-706F6E6374756174696F6E
-706F7572
-706F7572
-706F757274616E74
-717565
-717565
-72657175C3AA746573
-72657175C3AA746573
-74726164756374696F6E
-74726164756374696F6E
-756E
-756E
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-5-60823f4912be26bee1a0b52a0a9588a9 b/sql/hive/src/test/resources/golden/udf_sentences-5-60823f4912be26bee1a0b52a0a9588a9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-6-27b7eeae683a87507f35e61fd4ce67de b/sql/hive/src/test/resources/golden/udf_sentences-6-27b7eeae683a87507f35e61fd4ce67de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-7-ec0dad44fa033691a731f6e4c6b5cf7f b/sql/hive/src/test/resources/golden/udf_sentences-7-ec0dad44fa033691a731f6e4c6b5cf7f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-8-2dc07f4f0f0cb20d08c424e067ed8f69 b/sql/hive/src/test/resources/golden/udf_sentences-8-2dc07f4f0f0cb20d08c424e067ed8f69
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-9-68c61b4882802e416d5adaa2de440b59 b/sql/hive/src/test/resources/golden/udf_sentences-9-68c61b4882802e416d5adaa2de440b59
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sign-0-14f3c3034ac8039fc81681003bbf5e0e b/sql/hive/src/test/resources/golden/udf_sign-0-14f3c3034ac8039fc81681003bbf5e0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/remote_script-2-a19a19272149c732977c37e043910505 b/sql/hive/src/test/resources/golden/udf_sign-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/remote_script-2-a19a19272149c732977c37e043910505
rename to sql/hive/src/test/resources/golden/udf_sign-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/insert_into4-3-43629aaa698fb6e2db4586124561e69b b/sql/hive/src/test/resources/golden/udf_sign-1-cddd6ec2a7dfc2f8f7e35bc39df541f9
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-3-43629aaa698fb6e2db4586124561e69b
rename to sql/hive/src/test/resources/golden/udf_sign-1-cddd6ec2a7dfc2f8f7e35bc39df541f9
diff --git a/sql/hive/src/test/resources/golden/udf_sign-1-e6cbbd93f7d4fa596ecb7ef44d12c159 b/sql/hive/src/test/resources/golden/udf_sign-1-e6cbbd93f7d4fa596ecb7ef44d12c159
deleted file mode 100644
index 171538eb0b00f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-1-e6cbbd93f7d4fa596ecb7ef44d12c159
+++ /dev/null
@@ -1 +0,0 @@
-0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-1-eac0237ee0294d635c2b538f6e2f0a5c b/sql/hive/src/test/resources/golden/udf_sign-10-9a5326b0bf612fed4ce0b04770bebc16
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_float-1-eac0237ee0294d635c2b538f6e2f0a5c
rename to sql/hive/src/test/resources/golden/udf_sign-10-9a5326b0bf612fed4ce0b04770bebc16
diff --git a/sql/hive/src/test/resources/golden/udf_sign-10-fc7341f89c3cd3c43e436242d8aa61fa b/sql/hive/src/test/resources/golden/udf_sign-10-fc7341f89c3cd3c43e436242d8aa61fa
deleted file mode 100644
index 3345fbb8f2c35..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-10-fc7341f89c3cd3c43e436242d8aa61fa
+++ /dev/null
@@ -1 +0,0 @@
-sign(x) - returns the sign of x )
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-11-74237f5ecc497813cf9738b21647847a b/sql/hive/src/test/resources/golden/udf_sign-11-74237f5ecc497813cf9738b21647847a
deleted file mode 100644
index bdacec8810e77..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-11-74237f5ecc497813cf9738b21647847a
+++ /dev/null
@@ -1,4 +0,0 @@
-sign(x) - returns the sign of x )
-Example:
-   > SELECT sign(40) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-11-fc7341f89c3cd3c43e436242d8aa61fa b/sql/hive/src/test/resources/golden/udf_sign-11-fc7341f89c3cd3c43e436242d8aa61fa
new file mode 100644
index 0000000000000..60533947bcfb9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-11-fc7341f89c3cd3c43e436242d8aa61fa
@@ -0,0 +1 @@
+sign(x) - returns the sign of x )
diff --git a/sql/hive/src/test/resources/golden/udf_sign-12-74237f5ecc497813cf9738b21647847a b/sql/hive/src/test/resources/golden/udf_sign-12-74237f5ecc497813cf9738b21647847a
new file mode 100644
index 0000000000000..d888f9e9cda19
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-12-74237f5ecc497813cf9738b21647847a
@@ -0,0 +1,4 @@
+sign(x) - returns the sign of x )
+Example:
+   > SELECT sign(40) FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_sign-2-85b743f0eed93904789cde4d1b5eafef b/sql/hive/src/test/resources/golden/udf_sign-2-85b743f0eed93904789cde4d1b5eafef
deleted file mode 100644
index 31a5b0b81dc51..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-2-85b743f0eed93904789cde4d1b5eafef
+++ /dev/null
@@ -1 +0,0 @@
--1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-2-fba3eb5c16eca01b0c0f5918dbbffbc7 b/sql/hive/src/test/resources/golden/udf_sign-2-fba3eb5c16eca01b0c0f5918dbbffbc7
new file mode 100644
index 0000000000000..ba66466c2a0d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-2-fba3eb5c16eca01b0c0f5918dbbffbc7
@@ -0,0 +1 @@
+0.0
diff --git a/sql/hive/src/test/resources/golden/udf_sign-3-9eeb3985359429abba5d1dd702c66b0d b/sql/hive/src/test/resources/golden/udf_sign-3-9eeb3985359429abba5d1dd702c66b0d
deleted file mode 100644
index 9f8e9b69a33f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-3-9eeb3985359429abba5d1dd702c66b0d
+++ /dev/null
@@ -1 +0,0 @@
-1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-3-d3e4489fc6873b5dbc4fe3e99ef13900 b/sql/hive/src/test/resources/golden/udf_sign-3-d3e4489fc6873b5dbc4fe3e99ef13900
new file mode 100644
index 0000000000000..18e16e38c5de6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-3-d3e4489fc6873b5dbc4fe3e99ef13900
@@ -0,0 +1 @@
+-1.0
diff --git a/sql/hive/src/test/resources/golden/udf_sign-4-9a5326b0bf612fed4ce0b04770bebc16 b/sql/hive/src/test/resources/golden/udf_sign-4-9a5326b0bf612fed4ce0b04770bebc16
new file mode 100644
index 0000000000000..d3827e75a5cad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-4-9a5326b0bf612fed4ce0b04770bebc16
@@ -0,0 +1 @@
+1.0
diff --git a/sql/hive/src/test/resources/golden/udf_sign-4-fc7341f89c3cd3c43e436242d8aa61fa b/sql/hive/src/test/resources/golden/udf_sign-4-fc7341f89c3cd3c43e436242d8aa61fa
deleted file mode 100644
index 3345fbb8f2c35..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-4-fc7341f89c3cd3c43e436242d8aa61fa
+++ /dev/null
@@ -1 +0,0 @@
-sign(x) - returns the sign of x )
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-5-74237f5ecc497813cf9738b21647847a b/sql/hive/src/test/resources/golden/udf_sign-5-74237f5ecc497813cf9738b21647847a
deleted file mode 100644
index bdacec8810e77..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-5-74237f5ecc497813cf9738b21647847a
+++ /dev/null
@@ -1,4 +0,0 @@
-sign(x) - returns the sign of x )
-Example:
-   > SELECT sign(40) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-5-fc7341f89c3cd3c43e436242d8aa61fa b/sql/hive/src/test/resources/golden/udf_sign-5-fc7341f89c3cd3c43e436242d8aa61fa
new file mode 100644
index 0000000000000..60533947bcfb9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-5-fc7341f89c3cd3c43e436242d8aa61fa
@@ -0,0 +1 @@
+sign(x) - returns the sign of x )
diff --git a/sql/hive/src/test/resources/golden/udf_sign-6-14f3c3034ac8039fc81681003bbf5e0e b/sql/hive/src/test/resources/golden/udf_sign-6-14f3c3034ac8039fc81681003bbf5e0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sign-6-74237f5ecc497813cf9738b21647847a b/sql/hive/src/test/resources/golden/udf_sign-6-74237f5ecc497813cf9738b21647847a
new file mode 100644
index 0000000000000..d888f9e9cda19
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-6-74237f5ecc497813cf9738b21647847a
@@ -0,0 +1,4 @@
+sign(x) - returns the sign of x )
+Example:
+   > SELECT sign(40) FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_sign-7-e6cbbd93f7d4fa596ecb7ef44d12c159 b/sql/hive/src/test/resources/golden/udf_sign-7-e6cbbd93f7d4fa596ecb7ef44d12c159
deleted file mode 100644
index 171538eb0b00f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-7-e6cbbd93f7d4fa596ecb7ef44d12c159
+++ /dev/null
@@ -1 +0,0 @@
-0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into4-4-8adba808fd505f4bf0ffcc61a618480a b/sql/hive/src/test/resources/golden/udf_sign-7-ed2aaa1a416c0cccc04de970424e1860
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-4-8adba808fd505f4bf0ffcc61a618480a
rename to sql/hive/src/test/resources/golden/udf_sign-7-ed2aaa1a416c0cccc04de970424e1860
diff --git a/sql/hive/src/test/resources/golden/udf_sign-8-85b743f0eed93904789cde4d1b5eafef b/sql/hive/src/test/resources/golden/udf_sign-8-85b743f0eed93904789cde4d1b5eafef
deleted file mode 100644
index 31a5b0b81dc51..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-8-85b743f0eed93904789cde4d1b5eafef
+++ /dev/null
@@ -1 +0,0 @@
--1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-8-fba3eb5c16eca01b0c0f5918dbbffbc7 b/sql/hive/src/test/resources/golden/udf_sign-8-fba3eb5c16eca01b0c0f5918dbbffbc7
new file mode 100644
index 0000000000000..ba66466c2a0d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-8-fba3eb5c16eca01b0c0f5918dbbffbc7
@@ -0,0 +1 @@
+0.0
diff --git a/sql/hive/src/test/resources/golden/udf_sign-9-9eeb3985359429abba5d1dd702c66b0d b/sql/hive/src/test/resources/golden/udf_sign-9-9eeb3985359429abba5d1dd702c66b0d
deleted file mode 100644
index 9f8e9b69a33f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-9-9eeb3985359429abba5d1dd702c66b0d
+++ /dev/null
@@ -1 +0,0 @@
-1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-9-d3e4489fc6873b5dbc4fe3e99ef13900 b/sql/hive/src/test/resources/golden/udf_sign-9-d3e4489fc6873b5dbc4fe3e99ef13900
new file mode 100644
index 0000000000000..18e16e38c5de6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-9-d3e4489fc6873b5dbc4fe3e99ef13900
@@ -0,0 +1 @@
+-1.0
diff --git a/sql/hive/src/test/resources/golden/udf_sin-0-40b50393869eb0bcde66e36fe41078ee b/sql/hive/src/test/resources/golden/udf_sin-0-40b50393869eb0bcde66e36fe41078ee
deleted file mode 100644
index 6155d9a2eae55..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sin-0-40b50393869eb0bcde66e36fe41078ee
+++ /dev/null
@@ -1 +0,0 @@
-sin(x) - returns the sine of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-0-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/udf_sin-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook-0-86a409d8b868dc5f1a3bd1e04c2bc28c
rename to sql/hive/src/test/resources/golden/udf_sin-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_sin-1-2f867f432fb322e21dce353d7eb50c63 b/sql/hive/src/test/resources/golden/udf_sin-1-2f867f432fb322e21dce353d7eb50c63
deleted file mode 100644
index a39ed8840b916..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sin-1-2f867f432fb322e21dce353d7eb50c63
+++ /dev/null
@@ -1,4 +0,0 @@
-sin(x) - returns the sine of x (x is in radians)
-Example:
-   > SELECT sin(0) FROM src LIMIT 1;
-  0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sin-1-40b50393869eb0bcde66e36fe41078ee b/sql/hive/src/test/resources/golden/udf_sin-1-40b50393869eb0bcde66e36fe41078ee
new file mode 100644
index 0000000000000..86bbf99dbf067
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sin-1-40b50393869eb0bcde66e36fe41078ee
@@ -0,0 +1 @@
+sin(x) - returns the sine of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_sin-2-2f867f432fb322e21dce353d7eb50c63 b/sql/hive/src/test/resources/golden/udf_sin-2-2f867f432fb322e21dce353d7eb50c63
new file mode 100644
index 0000000000000..0efbed397abf9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sin-2-2f867f432fb322e21dce353d7eb50c63
@@ -0,0 +1,4 @@
+sin(x) - returns the sine of x (x is in radians)
+Example:
+   > SELECT sin(0) FROM src LIMIT 1;
+  0
diff --git a/sql/hive/src/test/resources/golden/udf_sin-2-3f8e9d5a3e69f39a99090840be2b5cab b/sql/hive/src/test/resources/golden/udf_sin-2-3f8e9d5a3e69f39a99090840be2b5cab
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sin-2-3f8e9d5a3e69f39a99090840be2b5cab
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sin-3-1d3a615e3aa252a317daa601811820b1 b/sql/hive/src/test/resources/golden/udf_sin-3-1d3a615e3aa252a317daa601811820b1
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sin-3-1d3a615e3aa252a317daa601811820b1
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_sin-3-e3b2b18e530eb504ea5017ca348f12e3 b/sql/hive/src/test/resources/golden/udf_sin-3-e3b2b18e530eb504ea5017ca348f12e3
deleted file mode 100644
index ee21925b0ccc5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sin-3-e3b2b18e530eb504ea5017ca348f12e3
+++ /dev/null
@@ -1 +0,0 @@
-0.8304973704919705	0.9999996829318346	-0.479425538604203
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sin-4-951fb8d311f52ab59d3bacd37d3e611a b/sql/hive/src/test/resources/golden/udf_sin-4-951fb8d311f52ab59d3bacd37d3e611a
new file mode 100644
index 0000000000000..4ca4af756468c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sin-4-951fb8d311f52ab59d3bacd37d3e611a
@@ -0,0 +1 @@
+0.8304973704919705	0.9999996829318346	-0.479425538604203
diff --git a/sql/hive/src/test/resources/golden/udf_size-0-9c49a52514b1b940a7e8bb93c35eda62 b/sql/hive/src/test/resources/golden/udf_size-0-9c49a52514b1b940a7e8bb93c35eda62
deleted file mode 100644
index d8952629349a9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_size-0-9c49a52514b1b940a7e8bb93c35eda62
+++ /dev/null
@@ -1 +0,0 @@
-size(a) - Returns the size of a
diff --git a/sql/hive/src/test/resources/golden/udf_size-1-3608160636eaa7e053171bdcefc0b1a8 b/sql/hive/src/test/resources/golden/udf_size-1-3608160636eaa7e053171bdcefc0b1a8
deleted file mode 100644
index d8952629349a9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_size-1-3608160636eaa7e053171bdcefc0b1a8
+++ /dev/null
@@ -1 +0,0 @@
-size(a) - Returns the size of a
diff --git a/sql/hive/src/test/resources/golden/udf_size-2-96d41c9f054662827d1b6b63f5dd8db7 b/sql/hive/src/test/resources/golden/udf_size-2-96d41c9f054662827d1b6b63f5dd8db7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_size-3-63df892215b6ce189d6c3e20cfc4bdbd b/sql/hive/src/test/resources/golden/udf_size-3-63df892215b6ce189d6c3e20cfc4bdbd
deleted file mode 100644
index 08a708b090282..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_size-3-63df892215b6ce189d6c3e20cfc4bdbd
+++ /dev/null
@@ -1 +0,0 @@
-3	1	1	-1
diff --git a/sql/hive/src/test/resources/golden/udf_smallint-0-f28e857ef74c967303855c21dc60c042 b/sql/hive/src/test/resources/golden/udf_smallint-0-f28e857ef74c967303855c21dc60c042
index 572ecb0462eb7..132d6138e4983 100644
--- a/sql/hive/src/test/resources/golden/udf_smallint-0-f28e857ef74c967303855c21dc60c042
+++ b/sql/hive/src/test/resources/golden/udf_smallint-0-f28e857ef74c967303855c21dc60c042
@@ -1 +1 @@
-There is no documentation for function 'smallint'
\ No newline at end of file
+There is no documentation for function 'smallint'
diff --git a/sql/hive/src/test/resources/golden/udf_smallint-1-37d8db74267ae370d6a076b3057c5ed6 b/sql/hive/src/test/resources/golden/udf_smallint-1-37d8db74267ae370d6a076b3057c5ed6
index 572ecb0462eb7..132d6138e4983 100644
--- a/sql/hive/src/test/resources/golden/udf_smallint-1-37d8db74267ae370d6a076b3057c5ed6
+++ b/sql/hive/src/test/resources/golden/udf_smallint-1-37d8db74267ae370d6a076b3057c5ed6
@@ -1 +1 @@
-There is no documentation for function 'smallint'
\ No newline at end of file
+There is no documentation for function 'smallint'
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-10-9e047718e5fea6ea79124f1e899f1c13 b/sql/hive/src/test/resources/golden/udf_sort_array-10-9e047718e5fea6ea79124f1e899f1c13
new file mode 100644
index 0000000000000..9d33cd51fef04
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sort_array-10-9e047718e5fea6ea79124f1e899f1c13
@@ -0,0 +1 @@
+[1,2,3,4,5]	[1,2,7,8,9]	[4,8,16,32,64]	[1,100,246,357,1000]	[false,true]	[1.414,1.618,2.718,3.141]	[1.41421,1.61803,2.71828,3.14159]	["","aramis","athos","portos"]	["1970-01-05 13:51:04.042","1970-01-07 00:54:54.442","1970-01-16 12:50:35.242"]
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-4-3edb0151fae0622cb79cd04156cb4c44 b/sql/hive/src/test/resources/golden/udf_sort_array-4-3edb0151fae0622cb79cd04156cb4c44
deleted file mode 100644
index 2e9458debfd0f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sort_array-4-3edb0151fae0622cb79cd04156cb4c44
+++ /dev/null
@@ -1 +0,0 @@
-["a","b","c","d","e","f","g"]
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-6-f754ac1296d16e168abb3d0ebcc35bd3 b/sql/hive/src/test/resources/golden/udf_sort_array-6-f754ac1296d16e168abb3d0ebcc35bd3
deleted file mode 100644
index e1968ef44a2eb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sort_array-6-f754ac1296d16e168abb3d0ebcc35bd3
+++ /dev/null
@@ -1 +0,0 @@
-[1,2,3,4,5,6,7,8,9]
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-7-a9c52a8684cefc095470f5a93f63d2a8 b/sql/hive/src/test/resources/golden/udf_sort_array-7-a9c52a8684cefc095470f5a93f63d2a8
deleted file mode 100644
index b6c12ace4162b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sort_array-7-a9c52a8684cefc095470f5a93f63d2a8
+++ /dev/null
@@ -1 +0,0 @@
-[-3.445,0.777,1.0,1.325,2.003,2.333,9.0]
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-8-d79f0084177230a7a2845c4791c22d25 b/sql/hive/src/test/resources/golden/udf_sort_array-8-d79f0084177230a7a2845c4791c22d25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-9-45ef2679e195a269878527d5f264488a b/sql/hive/src/test/resources/golden/udf_sort_array-9-45ef2679e195a269878527d5f264488a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-1-2b1df88619e34f221d39598b5cd73283 b/sql/hive/src/test/resources/golden/udf_space-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook-1-2b1df88619e34f221d39598b5cd73283
rename to sql/hive/src/test/resources/golden/udf_space-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_space-0-91e879c9f01d90eab7bf12fcef256010 b/sql/hive/src/test/resources/golden/udf_space-0-91e879c9f01d90eab7bf12fcef256010
deleted file mode 100644
index ed5bda787df23..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_space-0-91e879c9f01d90eab7bf12fcef256010
+++ /dev/null
@@ -1 +0,0 @@
-space(n) - returns n spaces
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_space-1-91e879c9f01d90eab7bf12fcef256010 b/sql/hive/src/test/resources/golden/udf_space-1-91e879c9f01d90eab7bf12fcef256010
new file mode 100644
index 0000000000000..a443bc3cbf0bd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_space-1-91e879c9f01d90eab7bf12fcef256010
@@ -0,0 +1 @@
+space(n) - returns n spaces
diff --git a/sql/hive/src/test/resources/golden/udf_space-1-e4eaf5e96807e122548cb43be9a26754 b/sql/hive/src/test/resources/golden/udf_space-1-e4eaf5e96807e122548cb43be9a26754
deleted file mode 100644
index 5713d4b0464be..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_space-1-e4eaf5e96807e122548cb43be9a26754
+++ /dev/null
@@ -1,4 +0,0 @@
-space(n) - returns n spaces
-Example:
-   > SELECT space(2) FROM src LIMIT 1;
-  '  '
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_space-2-a23a06eef37709e8587647a74bbfa7e8 b/sql/hive/src/test/resources/golden/udf_space-2-a23a06eef37709e8587647a74bbfa7e8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_space-2-e4eaf5e96807e122548cb43be9a26754 b/sql/hive/src/test/resources/golden/udf_space-2-e4eaf5e96807e122548cb43be9a26754
new file mode 100644
index 0000000000000..25a7583ef01e9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_space-2-e4eaf5e96807e122548cb43be9a26754
@@ -0,0 +1,4 @@
+space(n) - returns n spaces
+Example:
+   > SELECT space(2) FROM src LIMIT 1;
+  '  '
diff --git a/sql/hive/src/test/resources/golden/udf_space-3-59903e27d8188d6209e007ff643d5956 b/sql/hive/src/test/resources/golden/udf_space-3-59903e27d8188d6209e007ff643d5956
deleted file mode 100644
index 85a16b2abe5ef..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_space-3-59903e27d8188d6209e007ff643d5956
+++ /dev/null
@@ -1 +0,0 @@
-10	0	1	0	0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into4-5-6bc47894aa917051abb98d0b52f43881 b/sql/hive/src/test/resources/golden/udf_space-3-a1b9dad63547f7ba73a5230d650983b0
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-5-6bc47894aa917051abb98d0b52f43881
rename to sql/hive/src/test/resources/golden/udf_space-3-a1b9dad63547f7ba73a5230d650983b0
diff --git a/sql/hive/src/test/resources/golden/udf_space-4-7adb35ad867ba3e980d937a0038ac1a5 b/sql/hive/src/test/resources/golden/udf_space-4-7adb35ad867ba3e980d937a0038ac1a5
deleted file mode 100644
index 8dfaf2745f666..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_space-4-7adb35ad867ba3e980d937a0038ac1a5
+++ /dev/null
@@ -1 +0,0 @@
-          		 		
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_space-4-d9de5746edd753507c3f054e1bee7702 b/sql/hive/src/test/resources/golden/udf_space-4-d9de5746edd753507c3f054e1bee7702
new file mode 100644
index 0000000000000..6f07be9b1d043
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_space-4-d9de5746edd753507c3f054e1bee7702
@@ -0,0 +1 @@
+10	0	1	0	0
diff --git a/sql/hive/src/test/resources/golden/udf_space-5-ce5288dcc60f9412109930bd56752a65 b/sql/hive/src/test/resources/golden/udf_space-5-ce5288dcc60f9412109930bd56752a65
new file mode 100644
index 0000000000000..8f243e851f12f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_space-5-ce5288dcc60f9412109930bd56752a65
@@ -0,0 +1 @@
+          		 		
diff --git a/sql/hive/src/test/resources/golden/udf_split-0-7accac7fc71ba74d61c01a69d3978338 b/sql/hive/src/test/resources/golden/udf_split-0-7accac7fc71ba74d61c01a69d3978338
deleted file mode 100644
index 6b183ccfb17b4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_split-0-7accac7fc71ba74d61c01a69d3978338
+++ /dev/null
@@ -1 +0,0 @@
-split(str, regex) - Splits str around occurances that match regex
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_split-1-ebe303dfc3db504deffe5c355efd0fd1 b/sql/hive/src/test/resources/golden/udf_split-1-ebe303dfc3db504deffe5c355efd0fd1
deleted file mode 100644
index 9e5c522da155b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_split-1-ebe303dfc3db504deffe5c355efd0fd1
+++ /dev/null
@@ -1,4 +0,0 @@
-split(str, regex) - Splits str around occurances that match regex
-Example:
-  > SELECT split('oneAtwoBthreeC', '[ABC]') FROM src LIMIT 1;
-  ["one", "two", "three"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_split-2-7bba11f8fc359f2d4b863cda11c643f9 b/sql/hive/src/test/resources/golden/udf_split-2-7bba11f8fc359f2d4b863cda11c643f9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_split-3-c08ccb3ccd2bdc89c5c3745b63305f23 b/sql/hive/src/test/resources/golden/udf_split-3-c08ccb3ccd2bdc89c5c3745b63305f23
deleted file mode 100644
index 9174f1a92557e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_split-3-c08ccb3ccd2bdc89c5c3745b63305f23
+++ /dev/null
@@ -1 +0,0 @@
-["a","b","c"]	["one","two","three"]	[]	["5","4","1","2"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sqrt-0-d5e4a818c2b3255ef0e32876cd8ed240 b/sql/hive/src/test/resources/golden/udf_sqrt-0-d5e4a818c2b3255ef0e32876cd8ed240
index eadd6eeb8b149..01efc31b30ede 100644
--- a/sql/hive/src/test/resources/golden/udf_sqrt-0-d5e4a818c2b3255ef0e32876cd8ed240
+++ b/sql/hive/src/test/resources/golden/udf_sqrt-0-d5e4a818c2b3255ef0e32876cd8ed240
@@ -1 +1 @@
-sqrt(x) - returns the square root of x
\ No newline at end of file
+sqrt(x) - returns the square root of x
diff --git a/sql/hive/src/test/resources/golden/udf_sqrt-1-323cb8b7f5f2c93bdb1b5aed77c08e77 b/sql/hive/src/test/resources/golden/udf_sqrt-1-323cb8b7f5f2c93bdb1b5aed77c08e77
index 74df32f221278..16b77c4130fb6 100644
--- a/sql/hive/src/test/resources/golden/udf_sqrt-1-323cb8b7f5f2c93bdb1b5aed77c08e77
+++ b/sql/hive/src/test/resources/golden/udf_sqrt-1-323cb8b7f5f2c93bdb1b5aed77c08e77
@@ -1,4 +1,4 @@
 sqrt(x) - returns the square root of x
 Example:
    > SELECT sqrt(4) FROM src LIMIT 1;
-  2
\ No newline at end of file
+  2
diff --git a/sql/hive/src/test/resources/golden/udf_std-0-e3613484de2b3fa707995720ec3f8a5b b/sql/hive/src/test/resources/golden/udf_std-0-e3613484de2b3fa707995720ec3f8a5b
index 2966c01c1b7b2..5cedcfd415c88 100644
--- a/sql/hive/src/test/resources/golden/udf_std-0-e3613484de2b3fa707995720ec3f8a5b
+++ b/sql/hive/src/test/resources/golden/udf_std-0-e3613484de2b3fa707995720ec3f8a5b
@@ -1 +1 @@
-std(x) - Returns the standard deviation of a set of numbers
\ No newline at end of file
+std(x) - Returns the standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_std-1-6759bde0e50a3607b7c3fd5a93cbd027 b/sql/hive/src/test/resources/golden/udf_std-1-6759bde0e50a3607b7c3fd5a93cbd027
index 388eaa1c7439a..d54ebfbd6fb1a 100644
--- a/sql/hive/src/test/resources/golden/udf_std-1-6759bde0e50a3607b7c3fd5a93cbd027
+++ b/sql/hive/src/test/resources/golden/udf_std-1-6759bde0e50a3607b7c3fd5a93cbd027
@@ -1,2 +1,2 @@
 std(x) - Returns the standard deviation of a set of numbers
-Synonyms: stddev_pop, stddev
\ No newline at end of file
+Synonyms: stddev_pop, stddev
diff --git a/sql/hive/src/test/resources/golden/udf_stddev-0-ad7627185d89a60b83ce19966eddbc92 b/sql/hive/src/test/resources/golden/udf_stddev-0-ad7627185d89a60b83ce19966eddbc92
index 74c6e1eacc379..c8b11307792b3 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev-0-ad7627185d89a60b83ce19966eddbc92
+++ b/sql/hive/src/test/resources/golden/udf_stddev-0-ad7627185d89a60b83ce19966eddbc92
@@ -1 +1 @@
-stddev(x) - Returns the standard deviation of a set of numbers
\ No newline at end of file
+stddev(x) - Returns the standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_stddev-1-18e1d598820013453fad45852e1a303d b/sql/hive/src/test/resources/golden/udf_stddev-1-18e1d598820013453fad45852e1a303d
index 1c461b68c1440..5f674788180e8 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev-1-18e1d598820013453fad45852e1a303d
+++ b/sql/hive/src/test/resources/golden/udf_stddev-1-18e1d598820013453fad45852e1a303d
@@ -1,2 +1,2 @@
 stddev(x) - Returns the standard deviation of a set of numbers
-Synonyms: stddev_pop, std
\ No newline at end of file
+Synonyms: stddev_pop, std
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_pop-0-96788538f1f20eb879a1add4bb5f9d12 b/sql/hive/src/test/resources/golden/udf_stddev_pop-0-96788538f1f20eb879a1add4bb5f9d12
index 741771707f280..98b461bd09708 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_pop-0-96788538f1f20eb879a1add4bb5f9d12
+++ b/sql/hive/src/test/resources/golden/udf_stddev_pop-0-96788538f1f20eb879a1add4bb5f9d12
@@ -1 +1 @@
-Function 'udf_stddev_pop' does not exist.
\ No newline at end of file
+Function 'udf_stddev_pop' does not exist.
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_pop-1-6286ef94de26050617bf69c17a3b4a10 b/sql/hive/src/test/resources/golden/udf_stddev_pop-1-6286ef94de26050617bf69c17a3b4a10
index 741771707f280..98b461bd09708 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_pop-1-6286ef94de26050617bf69c17a3b4a10
+++ b/sql/hive/src/test/resources/golden/udf_stddev_pop-1-6286ef94de26050617bf69c17a3b4a10
@@ -1 +1 @@
-Function 'udf_stddev_pop' does not exist.
\ No newline at end of file
+Function 'udf_stddev_pop' does not exist.
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_samp-0-29e22949ef00f5ece1b5fd6bb1923a4 b/sql/hive/src/test/resources/golden/udf_stddev_samp-0-29e22949ef00f5ece1b5fd6bb1923a4
index c1fb091d2c425..cc3c37142270e 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_samp-0-29e22949ef00f5ece1b5fd6bb1923a4
+++ b/sql/hive/src/test/resources/golden/udf_stddev_samp-0-29e22949ef00f5ece1b5fd6bb1923a4
@@ -1 +1 @@
-stddev_samp(x) - Returns the sample standard deviation of a set of numbers
\ No newline at end of file
+stddev_samp(x) - Returns the sample standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_samp-1-ac7c0f92fe3b72287df2c7a719280bc4 b/sql/hive/src/test/resources/golden/udf_stddev_samp-1-ac7c0f92fe3b72287df2c7a719280bc4
index c1fb091d2c425..cc3c37142270e 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_samp-1-ac7c0f92fe3b72287df2c7a719280bc4
+++ b/sql/hive/src/test/resources/golden/udf_stddev_samp-1-ac7c0f92fe3b72287df2c7a719280bc4
@@ -1 +1 @@
-stddev_samp(x) - Returns the sample standard deviation of a set of numbers
\ No newline at end of file
+stddev_samp(x) - Returns the sample standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_samp-2-29e22949ef00f5ece1b5fd6bb1923a4 b/sql/hive/src/test/resources/golden/udf_stddev_samp-2-29e22949ef00f5ece1b5fd6bb1923a4
index c1fb091d2c425..cc3c37142270e 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_samp-2-29e22949ef00f5ece1b5fd6bb1923a4
+++ b/sql/hive/src/test/resources/golden/udf_stddev_samp-2-29e22949ef00f5ece1b5fd6bb1923a4
@@ -1 +1 @@
-stddev_samp(x) - Returns the sample standard deviation of a set of numbers
\ No newline at end of file
+stddev_samp(x) - Returns the sample standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_samp-3-ac7c0f92fe3b72287df2c7a719280bc4 b/sql/hive/src/test/resources/golden/udf_stddev_samp-3-ac7c0f92fe3b72287df2c7a719280bc4
index c1fb091d2c425..cc3c37142270e 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_samp-3-ac7c0f92fe3b72287df2c7a719280bc4
+++ b/sql/hive/src/test/resources/golden/udf_stddev_samp-3-ac7c0f92fe3b72287df2c7a719280bc4
@@ -1 +1 @@
-stddev_samp(x) - Returns the sample standard deviation of a set of numbers
\ No newline at end of file
+stddev_samp(x) - Returns the sample standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_string-0-17412ad1c1a827411caa7b5e891b6ac3 b/sql/hive/src/test/resources/golden/udf_string-0-17412ad1c1a827411caa7b5e891b6ac3
index c61c4e1135410..4568e35cb1a21 100644
--- a/sql/hive/src/test/resources/golden/udf_string-0-17412ad1c1a827411caa7b5e891b6ac3
+++ b/sql/hive/src/test/resources/golden/udf_string-0-17412ad1c1a827411caa7b5e891b6ac3
@@ -1 +1 @@
-There is no documentation for function 'string'
\ No newline at end of file
+There is no documentation for function 'string'
diff --git a/sql/hive/src/test/resources/golden/udf_string-1-53b00551846b7f8bb27874b3a466e68d b/sql/hive/src/test/resources/golden/udf_string-1-53b00551846b7f8bb27874b3a466e68d
index c61c4e1135410..4568e35cb1a21 100644
--- a/sql/hive/src/test/resources/golden/udf_string-1-53b00551846b7f8bb27874b3a466e68d
+++ b/sql/hive/src/test/resources/golden/udf_string-1-53b00551846b7f8bb27874b3a466e68d
@@ -1 +1 @@
-There is no documentation for function 'string'
\ No newline at end of file
+There is no documentation for function 'string'
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-2-7562d4fee13f3ba935a2e824f86a4224 b/sql/hive/src/test/resources/golden/udf_struct-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook-2-7562d4fee13f3ba935a2e824f86a4224
rename to sql/hive/src/test/resources/golden/udf_struct-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_struct-0-f41043b7d9f14fa5e998c90454c7bdb1 b/sql/hive/src/test/resources/golden/udf_struct-0-f41043b7d9f14fa5e998c90454c7bdb1
new file mode 100644
index 0000000000000..062cb1bc683b1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-0-f41043b7d9f14fa5e998c90454c7bdb1
@@ -0,0 +1 @@
+struct(col1, col2, col3, ...) - Creates a struct with the given field values
diff --git a/sql/hive/src/test/resources/golden/udf_struct-1-8ccdb20153debdab789ea8ad0228e2eb b/sql/hive/src/test/resources/golden/udf_struct-1-8ccdb20153debdab789ea8ad0228e2eb
new file mode 100644
index 0000000000000..062cb1bc683b1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-1-8ccdb20153debdab789ea8ad0228e2eb
@@ -0,0 +1 @@
+struct(col1, col2, col3, ...) - Creates a struct with the given field values
diff --git a/sql/hive/src/test/resources/golden/udf_struct-1-f41043b7d9f14fa5e998c90454c7bdb1 b/sql/hive/src/test/resources/golden/udf_struct-1-f41043b7d9f14fa5e998c90454c7bdb1
new file mode 100644
index 0000000000000..062cb1bc683b1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-1-f41043b7d9f14fa5e998c90454c7bdb1
@@ -0,0 +1 @@
+struct(col1, col2, col3, ...) - Creates a struct with the given field values
diff --git a/sql/hive/src/test/resources/golden/insert_into4-6-8c1683bee2927da76bb0dbf44a373738 b/sql/hive/src/test/resources/golden/udf_struct-2-4a62774a6de7571c8d2bcb77da63f8f3
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-6-8c1683bee2927da76bb0dbf44a373738
rename to sql/hive/src/test/resources/golden/udf_struct-2-4a62774a6de7571c8d2bcb77da63f8f3
diff --git a/sql/hive/src/test/resources/golden/udf_struct-2-8ccdb20153debdab789ea8ad0228e2eb b/sql/hive/src/test/resources/golden/udf_struct-2-8ccdb20153debdab789ea8ad0228e2eb
new file mode 100644
index 0000000000000..062cb1bc683b1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-2-8ccdb20153debdab789ea8ad0228e2eb
@@ -0,0 +1 @@
+struct(col1, col2, col3, ...) - Creates a struct with the given field values
diff --git a/sql/hive/src/test/resources/golden/insert_into4-8-6bc47894aa917051abb98d0b52f43881 b/sql/hive/src/test/resources/golden/udf_struct-3-71361a92b74c4d026ac7ae6e1e6746f1
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-8-6bc47894aa917051abb98d0b52f43881
rename to sql/hive/src/test/resources/golden/udf_struct-3-71361a92b74c4d026ac7ae6e1e6746f1
diff --git a/sql/hive/src/test/resources/golden/udf_struct-3-abffdaacb0c7076ab538fbeec072daa2 b/sql/hive/src/test/resources/golden/udf_struct-3-abffdaacb0c7076ab538fbeec072daa2
new file mode 100644
index 0000000000000..ff1a28fa47f18
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-3-abffdaacb0c7076ab538fbeec072daa2
@@ -0,0 +1 @@
+{"col1":1}	{"col1":1,"col2":"a"}	1	a
diff --git a/sql/hive/src/test/resources/golden/udf_struct-4-b196b5d8849d52bbe5e2ee683f29e051 b/sql/hive/src/test/resources/golden/udf_struct-4-b196b5d8849d52bbe5e2ee683f29e051
new file mode 100644
index 0000000000000..ff1a28fa47f18
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-4-b196b5d8849d52bbe5e2ee683f29e051
@@ -0,0 +1 @@
+{"col1":1}	{"col1":1,"col2":"a"}	1	a
diff --git a/sql/hive/src/test/resources/golden/udf_substr-0-20fb50d79b45264548b953e37d837fcd b/sql/hive/src/test/resources/golden/udf_substr-0-20fb50d79b45264548b953e37d837fcd
deleted file mode 100644
index c4efdaebc8aab..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-0-20fb50d79b45264548b953e37d837fcd
+++ /dev/null
@@ -1 +0,0 @@
-substr(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
diff --git a/sql/hive/src/test/resources/golden/udf_substr-1-2af34de7fd584c5f1ead85b3891b0920 b/sql/hive/src/test/resources/golden/udf_substr-1-2af34de7fd584c5f1ead85b3891b0920
deleted file mode 100644
index 44958996269c9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-1-2af34de7fd584c5f1ead85b3891b0920
+++ /dev/null
@@ -1,10 +0,0 @@
-substr(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
-Synonyms: substring
-pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str.
-Example:
-   > SELECT substr('Facebook', 5) FROM src LIMIT 1;
-  'book'
-  > SELECT substr('Facebook', -5) FROM src LIMIT 1;
-  'ebook'
-  > SELECT substr('Facebook', 5, 1) FROM src LIMIT 1;
-  'b'
diff --git a/sql/hive/src/test/resources/golden/udf_substr-2-d0268ad73ab5d94af15d9e1703b424d1 b/sql/hive/src/test/resources/golden/udf_substr-2-d0268ad73ab5d94af15d9e1703b424d1
deleted file mode 100644
index 9516b839a3956..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-2-d0268ad73ab5d94af15d9e1703b424d1
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-3-fe643fe7e2fd8b65e3ca0dbf6c98a122 b/sql/hive/src/test/resources/golden/udf_substr-3-fe643fe7e2fd8b65e3ca0dbf6c98a122
deleted file mode 100644
index 7f6c057b5eebd..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-3-fe643fe7e2fd8b65e3ca0dbf6c98a122
+++ /dev/null
@@ -1 +0,0 @@
-												
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-4-f7933c0bb28e9a815555edfa3764524a b/sql/hive/src/test/resources/golden/udf_substr-4-f7933c0bb28e9a815555edfa3764524a
deleted file mode 100644
index 042ca15005869..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-4-f7933c0bb28e9a815555edfa3764524a
+++ /dev/null
@@ -1 +0,0 @@
-CDEF	CDEF	CDEFG	CDEFG	ABC	ABC	BC	C	ABC	BC	A	A	A
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-5-4f29c1273b8010ce2d05882fc44793d8 b/sql/hive/src/test/resources/golden/udf_substr-5-4f29c1273b8010ce2d05882fc44793d8
deleted file mode 100644
index 85346dc923b4a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-5-4f29c1273b8010ce2d05882fc44793d8
+++ /dev/null
@@ -1 +0,0 @@
-A	AB	ABC	ABC	A	AB	ABC	ABC	B	BC	BC	BC	C	C	C	C	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-6-ebbb2e30ccf4a2f3a9525325529a9fe6 b/sql/hive/src/test/resources/golden/udf_substr-6-ebbb2e30ccf4a2f3a9525325529a9fe6
deleted file mode 100644
index 9d607f4d84214..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-6-ebbb2e30ccf4a2f3a9525325529a9fe6
+++ /dev/null
@@ -1 +0,0 @@
-C	C	C	C	B	BC	BC	BC	A	AB	ABC	ABC	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-7-1742c72a1a8a6c6fd3eb2449d4747496 b/sql/hive/src/test/resources/golden/udf_substr-7-1742c72a1a8a6c6fd3eb2449d4747496
deleted file mode 100644
index 042ca15005869..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-7-1742c72a1a8a6c6fd3eb2449d4747496
+++ /dev/null
@@ -1 +0,0 @@
-CDEF	CDEF	CDEFG	CDEFG	ABC	ABC	BC	C	ABC	BC	A	A	A
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-8-ba6ca6bac87ca99aabd60b8e76537ade b/sql/hive/src/test/resources/golden/udf_substr-8-ba6ca6bac87ca99aabd60b8e76537ade
deleted file mode 100644
index c14d8d4279c5f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-8-ba6ca6bac87ca99aabd60b8e76537ade
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	NULL	NULL	A	AB	ABC	ABC	A	AB	ABC	ABC	B	BC	BC	BC	C	C	C	C		C	C	C	C	B	BC	BC	BC	A	AB	ABC	ABC	
diff --git a/sql/hive/src/test/resources/golden/udf_substr-9-a9aa82080133620d017160f6a644455d b/sql/hive/src/test/resources/golden/udf_substr-9-a9aa82080133620d017160f6a644455d
deleted file mode 100644
index 94a57d86c88fd..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-9-a9aa82080133620d017160f6a644455d
+++ /dev/null
@@ -1 +0,0 @@
-玩	玩	玩玩玩 abc	玩玩玩
diff --git a/sql/hive/src/test/resources/golden/udf_substring-0-8297700b238f417dea2bd60ba72a6ece b/sql/hive/src/test/resources/golden/udf_substring-0-8297700b238f417dea2bd60ba72a6ece
index b0ade52e69a9d..6a2376d85e7bc 100644
--- a/sql/hive/src/test/resources/golden/udf_substring-0-8297700b238f417dea2bd60ba72a6ece
+++ b/sql/hive/src/test/resources/golden/udf_substring-0-8297700b238f417dea2bd60ba72a6ece
@@ -1 +1 @@
-substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstring(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
\ No newline at end of file
+substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstring(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
diff --git a/sql/hive/src/test/resources/golden/udf_substring-1-e5df65419ecd9e837dadfcdd7f9074f8 b/sql/hive/src/test/resources/golden/udf_substring-1-e5df65419ecd9e837dadfcdd7f9074f8
index 1d61dc4518087..32941e727ce38 100644
--- a/sql/hive/src/test/resources/golden/udf_substring-1-e5df65419ecd9e837dadfcdd7f9074f8
+++ b/sql/hive/src/test/resources/golden/udf_substring-1-e5df65419ecd9e837dadfcdd7f9074f8
@@ -7,4 +7,4 @@ Example:
   > SELECT substring('Facebook', -5) FROM src LIMIT 1;
   'ebook'
   > SELECT substring('Facebook', 5, 1) FROM src LIMIT 1;
-  'b'
\ No newline at end of file
+  'b'
diff --git a/sql/hive/src/test/resources/golden/udf_subtract-0-8f69db6aad14e23c9c32bf21bd0b3bf3 b/sql/hive/src/test/resources/golden/udf_subtract-0-8f69db6aad14e23c9c32bf21bd0b3bf3
index 0f4a4dce9dd3d..1ad8be1242f00 100644
--- a/sql/hive/src/test/resources/golden/udf_subtract-0-8f69db6aad14e23c9c32bf21bd0b3bf3
+++ b/sql/hive/src/test/resources/golden/udf_subtract-0-8f69db6aad14e23c9c32bf21bd0b3bf3
@@ -1 +1 @@
-a - b - Returns the difference a-b
\ No newline at end of file
+a - b - Returns the difference a-b
diff --git a/sql/hive/src/test/resources/golden/udf_subtract-1-b90eec030fee9cbd177f9615b782d722 b/sql/hive/src/test/resources/golden/udf_subtract-1-b90eec030fee9cbd177f9615b782d722
index 0f4a4dce9dd3d..1ad8be1242f00 100644
--- a/sql/hive/src/test/resources/golden/udf_subtract-1-b90eec030fee9cbd177f9615b782d722
+++ b/sql/hive/src/test/resources/golden/udf_subtract-1-b90eec030fee9cbd177f9615b782d722
@@ -1 +1 @@
-a - b - Returns the difference a-b
\ No newline at end of file
+a - b - Returns the difference a-b
diff --git a/sql/hive/src/test/resources/golden/udf_sum-0-d5b30a6a291025b1f9334ce8944dab31 b/sql/hive/src/test/resources/golden/udf_sum-0-d5b30a6a291025b1f9334ce8944dab31
index 744935026110c..3c8fbd8c4d900 100644
--- a/sql/hive/src/test/resources/golden/udf_sum-0-d5b30a6a291025b1f9334ce8944dab31
+++ b/sql/hive/src/test/resources/golden/udf_sum-0-d5b30a6a291025b1f9334ce8944dab31
@@ -1 +1 @@
-sum(x) - Returns the sum of a set of numbers
\ No newline at end of file
+sum(x) - Returns the sum of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_sum-1-ddae1a511d3371122ab79918be5b495b b/sql/hive/src/test/resources/golden/udf_sum-1-ddae1a511d3371122ab79918be5b495b
index 744935026110c..3c8fbd8c4d900 100644
--- a/sql/hive/src/test/resources/golden/udf_sum-1-ddae1a511d3371122ab79918be5b495b
+++ b/sql/hive/src/test/resources/golden/udf_sum-1-ddae1a511d3371122ab79918be5b495b
@@ -1 +1 @@
-sum(x) - Returns the sum of a set of numbers
\ No newline at end of file
+sum(x) - Returns the sum of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_sum-2-d5b30a6a291025b1f9334ce8944dab31 b/sql/hive/src/test/resources/golden/udf_sum-2-d5b30a6a291025b1f9334ce8944dab31
index 744935026110c..3c8fbd8c4d900 100644
--- a/sql/hive/src/test/resources/golden/udf_sum-2-d5b30a6a291025b1f9334ce8944dab31
+++ b/sql/hive/src/test/resources/golden/udf_sum-2-d5b30a6a291025b1f9334ce8944dab31
@@ -1 +1 @@
-sum(x) - Returns the sum of a set of numbers
\ No newline at end of file
+sum(x) - Returns the sum of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_sum-3-ddae1a511d3371122ab79918be5b495b b/sql/hive/src/test/resources/golden/udf_sum-3-ddae1a511d3371122ab79918be5b495b
index 744935026110c..3c8fbd8c4d900 100644
--- a/sql/hive/src/test/resources/golden/udf_sum-3-ddae1a511d3371122ab79918be5b495b
+++ b/sql/hive/src/test/resources/golden/udf_sum-3-ddae1a511d3371122ab79918be5b495b
@@ -1 +1 @@
-sum(x) - Returns the sum of a set of numbers
\ No newline at end of file
+sum(x) - Returns the sum of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-3-bdb30a5d6887ee4fb089f8676313eafd b/sql/hive/src/test/resources/golden/udf_tan-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook-3-bdb30a5d6887ee4fb089f8676313eafd
rename to sql/hive/src/test/resources/golden/udf_tan-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_tan-0-c21aa640b4edabf6613dd705d029c878 b/sql/hive/src/test/resources/golden/udf_tan-0-c21aa640b4edabf6613dd705d029c878
deleted file mode 100644
index c91072e69740b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-0-c21aa640b4edabf6613dd705d029c878
+++ /dev/null
@@ -1 +0,0 @@
-tan(x) - returns the tangent of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-1-a8c8eaa832aa9a4345b2fb9cd5e1d505 b/sql/hive/src/test/resources/golden/udf_tan-1-a8c8eaa832aa9a4345b2fb9cd5e1d505
deleted file mode 100644
index 4cc0d466f00f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-1-a8c8eaa832aa9a4345b2fb9cd5e1d505
+++ /dev/null
@@ -1,4 +0,0 @@
-tan(x) - returns the tangent of x (x is in radians)
-Example:
-   > SELECT tan(0) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-1-c21aa640b4edabf6613dd705d029c878 b/sql/hive/src/test/resources/golden/udf_tan-1-c21aa640b4edabf6613dd705d029c878
new file mode 100644
index 0000000000000..3ac5f19b2f2f3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-1-c21aa640b4edabf6613dd705d029c878
@@ -0,0 +1 @@
+tan(x) - returns the tangent of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_tan-2-27a29c68f846824990e9e443ac95da85 b/sql/hive/src/test/resources/golden/udf_tan-2-27a29c68f846824990e9e443ac95da85
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-2-27a29c68f846824990e9e443ac95da85
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-2-a8c8eaa832aa9a4345b2fb9cd5e1d505 b/sql/hive/src/test/resources/golden/udf_tan-2-a8c8eaa832aa9a4345b2fb9cd5e1d505
new file mode 100644
index 0000000000000..f26ecfe4a29d3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-2-a8c8eaa832aa9a4345b2fb9cd5e1d505
@@ -0,0 +1,4 @@
+tan(x) - returns the tangent of x (x is in radians)
+Example:
+   > SELECT tan(0) FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_tan-3-77bedd6c76bdc33428d13a894f468a97 b/sql/hive/src/test/resources/golden/udf_tan-3-77bedd6c76bdc33428d13a894f468a97
deleted file mode 100644
index 04ec33c11c3b4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-3-77bedd6c76bdc33428d13a894f468a97
+++ /dev/null
@@ -1 +0,0 @@
-1.5574077246549023	-0.29100619138474915	-1.5574077246549023
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-3-8b46b68ff11c5fb05fb4fd7605895f0b b/sql/hive/src/test/resources/golden/udf_tan-3-8b46b68ff11c5fb05fb4fd7605895f0b
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-3-8b46b68ff11c5fb05fb4fd7605895f0b
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_tan-4-769fde617744ccfaa29cefec81b8704c b/sql/hive/src/test/resources/golden/udf_tan-4-769fde617744ccfaa29cefec81b8704c
new file mode 100644
index 0000000000000..e9f2fa411fa33
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-4-769fde617744ccfaa29cefec81b8704c
@@ -0,0 +1 @@
+1.5574077246549023	-0.29100619138474915	-1.5574077246549023
diff --git a/sql/hive/src/test/resources/golden/udf_tan-4-c21aa640b4edabf6613dd705d029c878 b/sql/hive/src/test/resources/golden/udf_tan-4-c21aa640b4edabf6613dd705d029c878
deleted file mode 100644
index c91072e69740b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-4-c21aa640b4edabf6613dd705d029c878
+++ /dev/null
@@ -1 +0,0 @@
-tan(x) - returns the tangent of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-5-a8c8eaa832aa9a4345b2fb9cd5e1d505 b/sql/hive/src/test/resources/golden/udf_tan-5-a8c8eaa832aa9a4345b2fb9cd5e1d505
deleted file mode 100644
index 4cc0d466f00f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-5-a8c8eaa832aa9a4345b2fb9cd5e1d505
+++ /dev/null
@@ -1,4 +0,0 @@
-tan(x) - returns the tangent of x (x is in radians)
-Example:
-   > SELECT tan(0) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-5-c21aa640b4edabf6613dd705d029c878 b/sql/hive/src/test/resources/golden/udf_tan-5-c21aa640b4edabf6613dd705d029c878
new file mode 100644
index 0000000000000..3ac5f19b2f2f3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-5-c21aa640b4edabf6613dd705d029c878
@@ -0,0 +1 @@
+tan(x) - returns the tangent of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_tan-6-27a29c68f846824990e9e443ac95da85 b/sql/hive/src/test/resources/golden/udf_tan-6-27a29c68f846824990e9e443ac95da85
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-6-27a29c68f846824990e9e443ac95da85
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-6-a8c8eaa832aa9a4345b2fb9cd5e1d505 b/sql/hive/src/test/resources/golden/udf_tan-6-a8c8eaa832aa9a4345b2fb9cd5e1d505
new file mode 100644
index 0000000000000..f26ecfe4a29d3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-6-a8c8eaa832aa9a4345b2fb9cd5e1d505
@@ -0,0 +1,4 @@
+tan(x) - returns the tangent of x (x is in radians)
+Example:
+   > SELECT tan(0) FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_tan-7-77bedd6c76bdc33428d13a894f468a97 b/sql/hive/src/test/resources/golden/udf_tan-7-77bedd6c76bdc33428d13a894f468a97
deleted file mode 100644
index 04ec33c11c3b4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-7-77bedd6c76bdc33428d13a894f468a97
+++ /dev/null
@@ -1 +0,0 @@
-1.5574077246549023	-0.29100619138474915	-1.5574077246549023
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-7-8b46b68ff11c5fb05fb4fd7605895f0b b/sql/hive/src/test/resources/golden/udf_tan-7-8b46b68ff11c5fb05fb4fd7605895f0b
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-7-8b46b68ff11c5fb05fb4fd7605895f0b
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_tan-8-769fde617744ccfaa29cefec81b8704c b/sql/hive/src/test/resources/golden/udf_tan-8-769fde617744ccfaa29cefec81b8704c
new file mode 100644
index 0000000000000..e9f2fa411fa33
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-8-769fde617744ccfaa29cefec81b8704c
@@ -0,0 +1 @@
+1.5574077246549023	-0.29100619138474915	-1.5574077246549023
diff --git a/sql/hive/src/test/resources/golden/udf_testlength-0-b91edca0cfe8a8a8ff3b7a7fbc6e23ad b/sql/hive/src/test/resources/golden/udf_testlength-0-b91edca0cfe8a8a8ff3b7a7fbc6e23ad
deleted file mode 100644
index 89617d8af0085..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_testlength-0-b91edca0cfe8a8a8ff3b7a7fbc6e23ad
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION testlength 'org.apache.hadoop.hive.ql.udf.UDFTestLength')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/udf_testlength2-0-3da4fe901124f2bbf3d02822652c4e55 b/sql/hive/src/test/resources/golden/udf_testlength2-0-3da4fe901124f2bbf3d02822652c4e55
deleted file mode 100644
index 6d967660b3d41..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_testlength2-0-3da4fe901124f2bbf3d02822652c4e55
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION testlength2 'org.apache.hadoop.hive.ql.udf.UDFTestLength2')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/udf_tinyint-0-4a2b3d48ca445776d4a2d8c445f9633d b/sql/hive/src/test/resources/golden/udf_tinyint-0-4a2b3d48ca445776d4a2d8c445f9633d
index 1b13d42908539..898364b6d3e0b 100644
--- a/sql/hive/src/test/resources/golden/udf_tinyint-0-4a2b3d48ca445776d4a2d8c445f9633d
+++ b/sql/hive/src/test/resources/golden/udf_tinyint-0-4a2b3d48ca445776d4a2d8c445f9633d
@@ -1 +1 @@
-There is no documentation for function 'tinyint'
\ No newline at end of file
+There is no documentation for function 'tinyint'
diff --git a/sql/hive/src/test/resources/golden/udf_tinyint-1-417de1aeb44510aa9746729f9ff3b426 b/sql/hive/src/test/resources/golden/udf_tinyint-1-417de1aeb44510aa9746729f9ff3b426
index 1b13d42908539..898364b6d3e0b 100644
--- a/sql/hive/src/test/resources/golden/udf_tinyint-1-417de1aeb44510aa9746729f9ff3b426
+++ b/sql/hive/src/test/resources/golden/udf_tinyint-1-417de1aeb44510aa9746729f9ff3b426
@@ -1 +1 @@
-There is no documentation for function 'tinyint'
\ No newline at end of file
+There is no documentation for function 'tinyint'
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-4-10713b30ecb3c88acdd775bf9628c38c b/sql/hive/src/test/resources/golden/udf_to_byte-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook-4-10713b30ecb3c88acdd775bf9628c38c
rename to sql/hive/src/test/resources/golden/udf_to_byte-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-1-94eb069fb446b7758f7e06386486bec9 b/sql/hive/src/test/resources/golden/udf_to_byte-1-94eb069fb446b7758f7e06386486bec9
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_byte-1-94eb069fb446b7758f7e06386486bec9
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-1-61e6679e5a37889bc596590bde0228f0 b/sql/hive/src/test/resources/golden/udf_to_byte-2-233102b562824cf38010868478e91e1
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-1-61e6679e5a37889bc596590bde0228f0
rename to sql/hive/src/test/resources/golden/udf_to_byte-2-233102b562824cf38010868478e91e1
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-2-108a5c01e12e8823f6affc3d1664a1fc b/sql/hive/src/test/resources/golden/udf_to_byte-3-5dc0e4c21764683d98700860d2c8ab31
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-2-108a5c01e12e8823f6affc3d1664a1fc
rename to sql/hive/src/test/resources/golden/udf_to_byte-3-5dc0e4c21764683d98700860d2c8ab31
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-4-a8b51dcda7b83f0fb8fb05d572b17579 b/sql/hive/src/test/resources/golden/udf_to_byte-4-a8b51dcda7b83f0fb8fb05d572b17579
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_byte-4-a8b51dcda7b83f0fb8fb05d572b17579
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-3-78e5f8f149031d91a1aa3ae3372a8b b/sql/hive/src/test/resources/golden/udf_to_byte-4-dafb27507b4d30fd2231680f9ea80c82
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-3-78e5f8f149031d91a1aa3ae3372a8b
rename to sql/hive/src/test/resources/golden/udf_to_byte-4-dafb27507b4d30fd2231680f9ea80c82
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-5-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/udf_to_byte-5-eb6600cd2260e8e75253e7844c0d7dc2
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-5-677ddd4b14eb6f19cfcf0c3d57f54e22
rename to sql/hive/src/test/resources/golden/udf_to_byte-5-eb6600cd2260e8e75253e7844c0d7dc2
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-5-2adc360a8c6a9f4eac05f186ced2d67 b/sql/hive/src/test/resources/golden/udf_to_byte-6-489cd2d26b9efde2cdbff19254289371
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-5-2adc360a8c6a9f4eac05f186ced2d67
rename to sql/hive/src/test/resources/golden/udf_to_byte-6-489cd2d26b9efde2cdbff19254289371
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-6-75067817a1fe70c8c03c18ec7ed2f404 b/sql/hive/src/test/resources/golden/udf_to_byte-7-1eaba393f93af1763dd761172fb78d52
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-6-75067817a1fe70c8c03c18ec7ed2f404
rename to sql/hive/src/test/resources/golden/udf_to_byte-7-1eaba393f93af1763dd761172fb78d52
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-7-4b9de0160a65b212cbe42059d98aa718 b/sql/hive/src/test/resources/golden/udf_to_byte-8-8fe36cf8fba87514744a89fe50414f79
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-7-4b9de0160a65b212cbe42059d98aa718
rename to sql/hive/src/test/resources/golden/udf_to_byte-8-8fe36cf8fba87514744a89fe50414f79
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-8-7bf107e0fcc94ab00a5481a9f67aec25 b/sql/hive/src/test/resources/golden/udf_to_byte-9-322163c32973ccc3a5168463db7a8589
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-8-7bf107e0fcc94ab00a5481a9f67aec25
rename to sql/hive/src/test/resources/golden/udf_to_byte-9-322163c32973ccc3a5168463db7a8589
diff --git a/sql/hive/src/test/resources/golden/udf_to_date-0-ab5c4edc1825010642bd24f4cfc26166 b/sql/hive/src/test/resources/golden/udf_to_date-0-ab5c4edc1825010642bd24f4cfc26166
index 54a99d2a94ef4..caf10b953f895 100644
--- a/sql/hive/src/test/resources/golden/udf_to_date-0-ab5c4edc1825010642bd24f4cfc26166
+++ b/sql/hive/src/test/resources/golden/udf_to_date-0-ab5c4edc1825010642bd24f4cfc26166
@@ -1 +1 @@
-to_date(expr) - Extracts the date part of the date or datetime expression expr
\ No newline at end of file
+to_date(expr) - Extracts the date part of the date or datetime expression expr
diff --git a/sql/hive/src/test/resources/golden/udf_to_date-1-da3c817bc5f4458078c6199390ac915e b/sql/hive/src/test/resources/golden/udf_to_date-1-da3c817bc5f4458078c6199390ac915e
index 81ddc59fcb531..d9cc4956e5f84 100644
--- a/sql/hive/src/test/resources/golden/udf_to_date-1-da3c817bc5f4458078c6199390ac915e
+++ b/sql/hive/src/test/resources/golden/udf_to_date-1-da3c817bc5f4458078c6199390ac915e
@@ -1,4 +1,4 @@
 to_date(expr) - Extracts the date part of the date or datetime expression expr
 Example:
-   > SELECT to_date('2009-30-07 04:17:52') FROM src LIMIT 1;
-  '2009-30-07'
\ No newline at end of file
+   > SELECT to_date('2009-07-30 04:17:52') FROM src LIMIT 1;
+  '2009-07-30'
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-2-69bf8a5a4cb378bbd54c20cb8aa97abe b/sql/hive/src/test/resources/golden/udf_to_double-2-69bf8a5a4cb378bbd54c20cb8aa97abe
deleted file mode 100644
index 319fde05380bc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_double-2-69bf8a5a4cb378bbd54c20cb8aa97abe
+++ /dev/null
@@ -1 +0,0 @@
--7.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-3-ab23099412d24154ff369d8bd6bde89f b/sql/hive/src/test/resources/golden/udf_to_double-3-ab23099412d24154ff369d8bd6bde89f
deleted file mode 100644
index 8c1c4fe62b6c2..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_double-3-ab23099412d24154ff369d8bd6bde89f
+++ /dev/null
@@ -1 +0,0 @@
--18.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-4-293a639a2b61a11da6ca798c04624f68 b/sql/hive/src/test/resources/golden/udf_to_double-4-293a639a2b61a11da6ca798c04624f68
deleted file mode 100644
index 1b650de78904f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_double-4-293a639a2b61a11da6ca798c04624f68
+++ /dev/null
@@ -1 +0,0 @@
--129.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-5-42d1e80bb3324030c62a23c6d1b786a8 b/sql/hive/src/test/resources/golden/udf_to_double-5-42d1e80bb3324030c62a23c6d1b786a8
deleted file mode 100644
index 3a3bd0df03b5b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_double-5-42d1e80bb3324030c62a23c6d1b786a8
+++ /dev/null
@@ -1 +0,0 @@
--1025.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-6-5bac1a7db00d788fd7d82e3a78e60be6 b/sql/hive/src/test/resources/golden/udf_to_double-6-5bac1a7db00d788fd7d82e3a78e60be6
deleted file mode 100644
index 38f7ad5afa0ab..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_double-6-5bac1a7db00d788fd7d82e3a78e60be6
+++ /dev/null
@@ -1 +0,0 @@
--3.140000104904175
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-7-97080ab9cd416f8acd8803291e9dc9e5 b/sql/hive/src/test/resources/golden/udf_to_double-7-97080ab9cd416f8acd8803291e9dc9e5
deleted file mode 100644
index 01e913dbfe725..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_double-7-97080ab9cd416f8acd8803291e9dc9e5
+++ /dev/null
@@ -1 +0,0 @@
--3.14
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-8-df51146f6ef960c77cd1722191e4b982 b/sql/hive/src/test/resources/golden/udf_to_double-8-df51146f6ef960c77cd1722191e4b982
deleted file mode 100644
index f45d1f04dc920..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_double-8-df51146f6ef960c77cd1722191e4b982
+++ /dev/null
@@ -1 +0,0 @@
--38.14
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-2-39a67183b6d2a4da005baed849c5e971 b/sql/hive/src/test/resources/golden/udf_to_float-2-39a67183b6d2a4da005baed849c5e971
deleted file mode 100644
index 319fde05380bc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_float-2-39a67183b6d2a4da005baed849c5e971
+++ /dev/null
@@ -1 +0,0 @@
--7.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-3-5c1ae08cfd9ffd4d3e57b7a6ec4e39ce b/sql/hive/src/test/resources/golden/udf_to_float-3-5c1ae08cfd9ffd4d3e57b7a6ec4e39ce
deleted file mode 100644
index 8c1c4fe62b6c2..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_float-3-5c1ae08cfd9ffd4d3e57b7a6ec4e39ce
+++ /dev/null
@@ -1 +0,0 @@
--18.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-4-f3e0ab8ed691a386e6be4ce6993be507 b/sql/hive/src/test/resources/golden/udf_to_float-4-f3e0ab8ed691a386e6be4ce6993be507
deleted file mode 100644
index 1b650de78904f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_float-4-f3e0ab8ed691a386e6be4ce6993be507
+++ /dev/null
@@ -1 +0,0 @@
--129.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-5-75f364708c01b5e31f988f19e52b2201 b/sql/hive/src/test/resources/golden/udf_to_float-5-75f364708c01b5e31f988f19e52b2201
deleted file mode 100644
index 3a3bd0df03b5b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_float-5-75f364708c01b5e31f988f19e52b2201
+++ /dev/null
@@ -1 +0,0 @@
--1025.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-6-1d9b690354f7b04df660a9e3c448a002 b/sql/hive/src/test/resources/golden/udf_to_float-6-1d9b690354f7b04df660a9e3c448a002
deleted file mode 100644
index 01e913dbfe725..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_float-6-1d9b690354f7b04df660a9e3c448a002
+++ /dev/null
@@ -1 +0,0 @@
--3.14
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-7-cdfefa5173854b647a76383300f8b9d1 b/sql/hive/src/test/resources/golden/udf_to_float-7-cdfefa5173854b647a76383300f8b9d1
deleted file mode 100644
index 01e913dbfe725..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_float-7-cdfefa5173854b647a76383300f8b9d1
+++ /dev/null
@@ -1 +0,0 @@
--3.14
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-8-5b0a785185bcaa98b581c5b3dbb3e12c b/sql/hive/src/test/resources/golden/udf_to_float-8-5b0a785185bcaa98b581c5b3dbb3e12c
deleted file mode 100644
index f45d1f04dc920..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_float-8-5b0a785185bcaa98b581c5b3dbb3e12c
+++ /dev/null
@@ -1 +0,0 @@
--38.14
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-5-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/udf_to_long-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook-5-bab89dfffa77258e34a595e0e79986e3
rename to sql/hive/src/test/resources/golden/udf_to_long-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-1-8aeb3ba62d4a0cecfff363741e8042f6 b/sql/hive/src/test/resources/golden/udf_to_long-1-8aeb3ba62d4a0cecfff363741e8042f6
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_long-1-8aeb3ba62d4a0cecfff363741e8042f6
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-1-32c4a61f9166c2a9d7038553d589bd9b b/sql/hive/src/test/resources/golden/udf_to_long-2-7e8fa1ae8d00a121ec14941a48d24947
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-1-32c4a61f9166c2a9d7038553d589bd9b
rename to sql/hive/src/test/resources/golden/udf_to_long-2-7e8fa1ae8d00a121ec14941a48d24947
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-2-5efd128fd5cbfbf1794c7da16d4b18d5 b/sql/hive/src/test/resources/golden/udf_to_long-3-6e5936fba8e7486beb9ab998548bbe9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-2-5efd128fd5cbfbf1794c7da16d4b18d5
rename to sql/hive/src/test/resources/golden/udf_to_long-3-6e5936fba8e7486beb9ab998548bbe9b
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-3-cb3318ba365833316645e1b9890f4613 b/sql/hive/src/test/resources/golden/udf_to_long-4-8c284b082a256abf0426d4f6f1971703
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-3-cb3318ba365833316645e1b9890f4613
rename to sql/hive/src/test/resources/golden/udf_to_long-4-8c284b082a256abf0426d4f6f1971703
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-4-55b7b7d046541d63e30f76111734c0f6 b/sql/hive/src/test/resources/golden/udf_to_long-5-6bb29b93f6b0f1427ba93efb4e78810a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-4-55b7b7d046541d63e30f76111734c0f6
rename to sql/hive/src/test/resources/golden/udf_to_long-5-6bb29b93f6b0f1427ba93efb4e78810a
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-5-a0d382d243a226f8d4381970b6831c3d b/sql/hive/src/test/resources/golden/udf_to_long-6-290b5a4ce01563482e81b3b532ebf9db
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-5-a0d382d243a226f8d4381970b6831c3d
rename to sql/hive/src/test/resources/golden/udf_to_long-6-290b5a4ce01563482e81b3b532ebf9db
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-6-47ab11eae68329cc80232fc4089479f0 b/sql/hive/src/test/resources/golden/udf_to_long-7-da20f84586dac3e50ee9d5b9078f44db
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-6-47ab11eae68329cc80232fc4089479f0
rename to sql/hive/src/test/resources/golden/udf_to_long-7-da20f84586dac3e50ee9d5b9078f44db
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-7-b87c0cabb166f33984cc1b191694918e b/sql/hive/src/test/resources/golden/udf_to_long-8-90f068b4b6275bdd1c4c431fb7fa90e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-7-b87c0cabb166f33984cc1b191694918e
rename to sql/hive/src/test/resources/golden/udf_to_long-8-90f068b4b6275bdd1c4c431fb7fa90e2
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-8-52a24d7040db321a842f9201d245ee9 b/sql/hive/src/test/resources/golden/udf_to_long-9-cc8b79539085fe0e00f672b562c51cd0
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-8-52a24d7040db321a842f9201d245ee9
rename to sql/hive/src/test/resources/golden/udf_to_long-9-cc8b79539085fe0e00f672b562c51cd0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-6-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/udf_to_short-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook-6-6f53d5613262d393d82d159ec5dc16dc
rename to sql/hive/src/test/resources/golden/udf_to_short-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-1-94f15fe043839493107058a06a210cf7 b/sql/hive/src/test/resources/golden/udf_to_short-1-94f15fe043839493107058a06a210cf7
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_short-1-94f15fe043839493107058a06a210cf7
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-2-bc6b37e1d5a8d28df8a957c88104c9a5 b/sql/hive/src/test/resources/golden/udf_to_short-2-981e4cb6654fde7eb4634c7ad72f8570
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_string-2-bc6b37e1d5a8d28df8a957c88104c9a5
rename to sql/hive/src/test/resources/golden/udf_to_short-2-981e4cb6654fde7eb4634c7ad72f8570
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-2-a4ab375c99efbfe0d5a45941552509c8 b/sql/hive/src/test/resources/golden/udf_to_short-3-b259ee30ecf279bb4ad12d1515ca2767
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-2-a4ab375c99efbfe0d5a45941552509c8
rename to sql/hive/src/test/resources/golden/udf_to_short-3-b259ee30ecf279bb4ad12d1515ca2767
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-3-8d5108d9585d8ca6ca3eefac05afc3ce b/sql/hive/src/test/resources/golden/udf_to_short-4-40ffb132d5641645e2b8043dc056fb0
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-3-8d5108d9585d8ca6ca3eefac05afc3ce
rename to sql/hive/src/test/resources/golden/udf_to_short-4-40ffb132d5641645e2b8043dc056fb0
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-4-8d76a5a410e5533f2ae625504478cda8 b/sql/hive/src/test/resources/golden/udf_to_short-5-5d6c46b0154d1073c035a79dbf612479
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-4-8d76a5a410e5533f2ae625504478cda8
rename to sql/hive/src/test/resources/golden/udf_to_short-5-5d6c46b0154d1073c035a79dbf612479
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-5-d467dc39319796ff5cbe600ff6530ad5 b/sql/hive/src/test/resources/golden/udf_to_short-6-6561b41835a21f973cbbc2dd80eef87f
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-5-d467dc39319796ff5cbe600ff6530ad5
rename to sql/hive/src/test/resources/golden/udf_to_short-6-6561b41835a21f973cbbc2dd80eef87f
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-6-c3b304122e874868f37c6fbaeccf0397 b/sql/hive/src/test/resources/golden/udf_to_short-7-9f83813005b639a23901ca6ff87ff473
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-6-c3b304122e874868f37c6fbaeccf0397
rename to sql/hive/src/test/resources/golden/udf_to_short-7-9f83813005b639a23901ca6ff87ff473
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-7-827f3181c216fd2e990637c9a091bf0d b/sql/hive/src/test/resources/golden/udf_to_short-8-885656e165feb3a674cf636dbf08716c
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-7-827f3181c216fd2e990637c9a091bf0d
rename to sql/hive/src/test/resources/golden/udf_to_short-8-885656e165feb3a674cf636dbf08716c
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-8-d35f9502fc7b4205e18ecdf53189bdc4 b/sql/hive/src/test/resources/golden/udf_to_short-9-750382fa1a1b3ed5dca0d549d3a68996
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-8-d35f9502fc7b4205e18ecdf53189bdc4
rename to sql/hive/src/test/resources/golden/udf_to_short-9-750382fa1a1b3ed5dca0d549d3a68996
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-1-b461f0e6e98991aede40c7c68561dc44 b/sql/hive/src/test/resources/golden/udf_to_string-1-b461f0e6e98991aede40c7c68561dc44
deleted file mode 100644
index ef2f5130b8575..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-1-b461f0e6e98991aede40c7c68561dc44
+++ /dev/null
@@ -1 +0,0 @@
-TRUE
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-3-ddbb829eedc6aa1e27169303a7957d15 b/sql/hive/src/test/resources/golden/udf_to_string-3-ddbb829eedc6aa1e27169303a7957d15
deleted file mode 100644
index 6cc8a61f8f6ad..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-3-ddbb829eedc6aa1e27169303a7957d15
+++ /dev/null
@@ -1 +0,0 @@
--18
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-4-c20301e9bbf10143bb9bf67cd7367c21 b/sql/hive/src/test/resources/golden/udf_to_string-4-c20301e9bbf10143bb9bf67cd7367c21
deleted file mode 100644
index 9828ff22b667b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-4-c20301e9bbf10143bb9bf67cd7367c21
+++ /dev/null
@@ -1 +0,0 @@
--129
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-5-1040b37847d20ef29d545934316303 b/sql/hive/src/test/resources/golden/udf_to_string-5-1040b37847d20ef29d545934316303
deleted file mode 100644
index 450a6125550e5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-5-1040b37847d20ef29d545934316303
+++ /dev/null
@@ -1 +0,0 @@
--1025
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-6-4181d264a7af3c6488da2f1db20dc384 b/sql/hive/src/test/resources/golden/udf_to_string-6-4181d264a7af3c6488da2f1db20dc384
deleted file mode 100644
index 01e913dbfe725..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-6-4181d264a7af3c6488da2f1db20dc384
+++ /dev/null
@@ -1 +0,0 @@
--3.14
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-7-567bc77f96e7dc8c89bae912e9a3af15 b/sql/hive/src/test/resources/golden/udf_to_string-7-567bc77f96e7dc8c89bae912e9a3af15
deleted file mode 100644
index 01e913dbfe725..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-7-567bc77f96e7dc8c89bae912e9a3af15
+++ /dev/null
@@ -1 +0,0 @@
--3.14
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-8-a70b03d79ebd989c8141f9d70dbca8ea b/sql/hive/src/test/resources/golden/udf_to_string-8-a70b03d79ebd989c8141f9d70dbca8ea
deleted file mode 100644
index 01e913dbfe725..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-8-a70b03d79ebd989c8141f9d70dbca8ea
+++ /dev/null
@@ -1 +0,0 @@
--3.14
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-9-51677fbf5d2fc5478853cec1df039e3b b/sql/hive/src/test/resources/golden/udf_to_string-9-51677fbf5d2fc5478853cec1df039e3b
deleted file mode 100644
index bc56c4d89448a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-9-51677fbf5d2fc5478853cec1df039e3b
+++ /dev/null
@@ -1 +0,0 @@
-Foo
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-1-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/udf_translate-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-1-86a409d8b868dc5f1a3bd1e04c2bc28c
rename to sql/hive/src/test/resources/golden/udf_translate-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_translate-0-7fe940a2b26fa19a3cfee39e56fb1241 b/sql/hive/src/test/resources/golden/udf_translate-0-7fe940a2b26fa19a3cfee39e56fb1241
deleted file mode 100644
index 4255dc76f501e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-0-7fe940a2b26fa19a3cfee39e56fb1241
+++ /dev/null
@@ -1 +0,0 @@
-translate(input, from, to) - translates the input string by replacing the characters present in the from string with the corresponding characters in the to string
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-1-7fe940a2b26fa19a3cfee39e56fb1241 b/sql/hive/src/test/resources/golden/udf_translate-1-7fe940a2b26fa19a3cfee39e56fb1241
new file mode 100644
index 0000000000000..e92c71fd3ccf9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-1-7fe940a2b26fa19a3cfee39e56fb1241
@@ -0,0 +1 @@
+translate(input, from, to) - translates the input string by replacing the characters present in the from string with the corresponding characters in the to string
diff --git a/sql/hive/src/test/resources/golden/udf_translate-1-f7aa38a33ca0df73b7a1e6b6da4b7fe8 b/sql/hive/src/test/resources/golden/udf_translate-1-f7aa38a33ca0df73b7a1e6b6da4b7fe8
deleted file mode 100644
index e7beead53b399..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-1-f7aa38a33ca0df73b7a1e6b6da4b7fe8
+++ /dev/null
@@ -1,13 +0,0 @@
-translate(input, from, to) - translates the input string by replacing the characters present in the from string with the corresponding characters in the to string
-translate(string input, string from, string to) is an equivalent function to translate in PostGreSQL. It works on a character by character basis on the input string (first parameter). A character in the input is checked for presence in the from string (second parameter). If a match happens, the character from to string (third parameter) which appears at the same index as the character in from string is obtained. This character is emitted in the output string  instead of the original character from the input string. If the to string is shorter than the from string, there may not be a character present at the same index in the to string. In such a case, nothing is emitted for the original character and it's deleted from the output string.
-For example,
-
-translate('abcdef', 'adc', '19') returns '1b9ef' replacing 'a' with '1', 'd' with '9' and removing 'c' from the input string
-
-translate('a b c d', ' ', '') return 'abcd' removing all spaces from the input string
-
-If the same character is present multiple times in the input string, the first occurence of the character is the one that's considered for matching. However, it is not recommended to have the same character more than once in the from string since it's not required and adds to confusion.
-
-For example,
-
-translate('abcdef', 'ada', '192') returns '1bc9ef' replaces 'a' with '1' and 'd' with '9' ignoring the second occurence of 'a' in the from string mapping it to '2'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-10-2f9daada8878081cb8954880ad5a65c4 b/sql/hive/src/test/resources/golden/udf_translate-10-2f9daada8878081cb8954880ad5a65c4
deleted file mode 100644
index 7d44692e4f7c0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-10-2f9daada8878081cb8954880ad5a65c4
+++ /dev/null
@@ -1 +0,0 @@
-12cd	12cd
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-10-ca7c17e78c6a3d4e19dbd66622a87eae b/sql/hive/src/test/resources/golden/udf_translate-10-ca7c17e78c6a3d4e19dbd66622a87eae
new file mode 100644
index 0000000000000..ae8343d33bc11
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-10-ca7c17e78c6a3d4e19dbd66622a87eae
@@ -0,0 +1 @@
+NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_translate-11-40c4e7adff4dde739d7797d212892c5a b/sql/hive/src/test/resources/golden/udf_translate-11-40c4e7adff4dde739d7797d212892c5a
new file mode 100644
index 0000000000000..2e2f35d06c042
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-11-40c4e7adff4dde739d7797d212892c5a
@@ -0,0 +1 @@
+12cd	12cd
diff --git a/sql/hive/src/test/resources/golden/udf_translate-11-76b7a339d5c62808b9f4f78816d4c55b b/sql/hive/src/test/resources/golden/udf_translate-11-76b7a339d5c62808b9f4f78816d4c55b
deleted file mode 100644
index 90e0d872f8bfc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-11-76b7a339d5c62808b9f4f78816d4c55b
+++ /dev/null
@@ -1 +0,0 @@
-123d
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-12-a5b3e5fadeec1e03371160879f060b05 b/sql/hive/src/test/resources/golden/udf_translate-12-a5b3e5fadeec1e03371160879f060b05
deleted file mode 100644
index 0770d02cb2303..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-12-a5b3e5fadeec1e03371160879f060b05
+++ /dev/null
@@ -1 +0,0 @@
-Ãbcd
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-12-d81fd3267ec96cff31079142bf5d49bf b/sql/hive/src/test/resources/golden/udf_translate-12-d81fd3267ec96cff31079142bf5d49bf
new file mode 100644
index 0000000000000..36284d8236d0e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-12-d81fd3267ec96cff31079142bf5d49bf
@@ -0,0 +1 @@
+123d
diff --git a/sql/hive/src/test/resources/golden/udf_translate-13-26085a3eba1a1b34684ec4e6c1723527 b/sql/hive/src/test/resources/golden/udf_translate-13-26085a3eba1a1b34684ec4e6c1723527
new file mode 100644
index 0000000000000..0208f1f4980c2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-13-26085a3eba1a1b34684ec4e6c1723527
@@ -0,0 +1 @@
+Ãbcd
diff --git a/sql/hive/src/test/resources/golden/udf_translate-2-42aba80bf1913dd7c64545831f476c58 b/sql/hive/src/test/resources/golden/udf_translate-2-42aba80bf1913dd7c64545831f476c58
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_translate-2-f7aa38a33ca0df73b7a1e6b6da4b7fe8 b/sql/hive/src/test/resources/golden/udf_translate-2-f7aa38a33ca0df73b7a1e6b6da4b7fe8
new file mode 100644
index 0000000000000..9ced4ee32cf0b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-2-f7aa38a33ca0df73b7a1e6b6da4b7fe8
@@ -0,0 +1,13 @@
+translate(input, from, to) - translates the input string by replacing the characters present in the from string with the corresponding characters in the to string
+translate(string input, string from, string to) is an equivalent function to translate in PostGreSQL. It works on a character by character basis on the input string (first parameter). A character in the input is checked for presence in the from string (second parameter). If a match happens, the character from to string (third parameter) which appears at the same index as the character in from string is obtained. This character is emitted in the output string  instead of the original character from the input string. If the to string is shorter than the from string, there may not be a character present at the same index in the to string. In such a case, nothing is emitted for the original character and it's deleted from the output string.
+For example,
+
+translate('abcdef', 'adc', '19') returns '1b9ef' replacing 'a' with '1', 'd' with '9' and removing 'c' from the input string
+
+translate('a b c d', ' ', '') return 'abcd' removing all spaces from the input string
+
+If the same character is present multiple times in the input string, the first occurence of the character is the one that's considered for matching. However, it is not recommended to have the same character more than once in the from string since it's not required and adds to confusion.
+
+For example,
+
+translate('abcdef', 'ada', '192') returns '1bc9ef' replaces 'a' with '1' and 'd' with '9' ignoring the second occurence of 'a' in the from string mapping it to '2'
diff --git a/sql/hive/src/test/resources/golden/udf_translate-3-20904c8be8fed5cbd2d66ead6248a60a b/sql/hive/src/test/resources/golden/udf_translate-3-20904c8be8fed5cbd2d66ead6248a60a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-9-8c1683bee2927da76bb0dbf44a373738 b/sql/hive/src/test/resources/golden/udf_translate-3-42aba80bf1913dd7c64545831f476c58
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into4-9-8c1683bee2927da76bb0dbf44a373738
rename to sql/hive/src/test/resources/golden/udf_translate-3-42aba80bf1913dd7c64545831f476c58
diff --git a/sql/hive/src/test/resources/golden/insert_into5-0-9afa473f2111cf0d9ae62041bd97f840 b/sql/hive/src/test/resources/golden/udf_translate-4-20904c8be8fed5cbd2d66ead6248a60a
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-0-9afa473f2111cf0d9ae62041bd97f840
rename to sql/hive/src/test/resources/golden/udf_translate-4-20904c8be8fed5cbd2d66ead6248a60a
diff --git a/sql/hive/src/test/resources/golden/udf_translate-4-5d4abaf86254bacaa545c769bd7e50ba b/sql/hive/src/test/resources/golden/udf_translate-4-5d4abaf86254bacaa545c769bd7e50ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-1-8fc8db6a5564324339192f23208ffc1c b/sql/hive/src/test/resources/golden/udf_translate-5-5d4abaf86254bacaa545c769bd7e50ba
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-1-8fc8db6a5564324339192f23208ffc1c
rename to sql/hive/src/test/resources/golden/udf_translate-5-5d4abaf86254bacaa545c769bd7e50ba
diff --git a/sql/hive/src/test/resources/golden/udf_translate-5-f2637240d227f9732d3db76f2e9d3a59 b/sql/hive/src/test/resources/golden/udf_translate-5-f2637240d227f9732d3db76f2e9d3a59
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_translate-6-55122cc5ea4f49e737fee58945f3f21b b/sql/hive/src/test/resources/golden/udf_translate-6-55122cc5ea4f49e737fee58945f3f21b
deleted file mode 100644
index 0a54ee40ecb13..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-6-55122cc5ea4f49e737fee58945f3f21b
+++ /dev/null
@@ -1 +0,0 @@
-12cd	12d
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into5-10-375cae396c768c1efe5d17b9f5f45f8 b/sql/hive/src/test/resources/golden/udf_translate-6-f2637240d227f9732d3db76f2e9d3a59
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-10-375cae396c768c1efe5d17b9f5f45f8
rename to sql/hive/src/test/resources/golden/udf_translate-6-f2637240d227f9732d3db76f2e9d3a59
diff --git a/sql/hive/src/test/resources/golden/udf_translate-7-856c9d0157c34ab85cc6c83d560bfd47 b/sql/hive/src/test/resources/golden/udf_translate-7-856c9d0157c34ab85cc6c83d560bfd47
deleted file mode 100644
index 0a54ee40ecb13..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-7-856c9d0157c34ab85cc6c83d560bfd47
+++ /dev/null
@@ -1 +0,0 @@
-12cd	12d
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-7-f8de3ab54db5d6a44fddb542b3d99704 b/sql/hive/src/test/resources/golden/udf_translate-7-f8de3ab54db5d6a44fddb542b3d99704
new file mode 100644
index 0000000000000..fbf71ff5ad9eb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-7-f8de3ab54db5d6a44fddb542b3d99704
@@ -0,0 +1 @@
+12cd	12d
diff --git a/sql/hive/src/test/resources/golden/udf_translate-8-1747ed8fbb4ef889df3db937ee51e2b0 b/sql/hive/src/test/resources/golden/udf_translate-8-1747ed8fbb4ef889df3db937ee51e2b0
new file mode 100644
index 0000000000000..fbf71ff5ad9eb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-8-1747ed8fbb4ef889df3db937ee51e2b0
@@ -0,0 +1 @@
+12cd	12d
diff --git a/sql/hive/src/test/resources/golden/udf_translate-8-f56e5c0a7fdd81d6ca0bb84f2cadcf11 b/sql/hive/src/test/resources/golden/udf_translate-8-f56e5c0a7fdd81d6ca0bb84f2cadcf11
deleted file mode 100644
index 50c123df9d1d3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-8-f56e5c0a7fdd81d6ca0bb84f2cadcf11
+++ /dev/null
@@ -1 +0,0 @@
-1bc
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-9-ee69663d7662760973b72785595be2b1 b/sql/hive/src/test/resources/golden/udf_translate-9-ee69663d7662760973b72785595be2b1
deleted file mode 100644
index b98d7e1c34024..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-9-ee69663d7662760973b72785595be2b1
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-9-f1bd0dd5226ee632db3c72c5fc2aaeb0 b/sql/hive/src/test/resources/golden/udf_translate-9-f1bd0dd5226ee632db3c72c5fc2aaeb0
new file mode 100644
index 0000000000000..bb456baa0a608
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-9-f1bd0dd5226ee632db3c72c5fc2aaeb0
@@ -0,0 +1 @@
+1bc
diff --git a/sql/hive/src/test/resources/golden/udf_trim-0-18aa2b7ff8d263124ea47c9e27fc672f b/sql/hive/src/test/resources/golden/udf_trim-0-18aa2b7ff8d263124ea47c9e27fc672f
index f0620a190ae4e..598aeb205875b 100644
--- a/sql/hive/src/test/resources/golden/udf_trim-0-18aa2b7ff8d263124ea47c9e27fc672f
+++ b/sql/hive/src/test/resources/golden/udf_trim-0-18aa2b7ff8d263124ea47c9e27fc672f
@@ -1 +1 @@
-trim(str) - Removes the leading and trailing space characters from str 
\ No newline at end of file
+trim(str) - Removes the leading and trailing space characters from str 
diff --git a/sql/hive/src/test/resources/golden/udf_trim-1-e23715e112959e6840b6feed2ecf38a7 b/sql/hive/src/test/resources/golden/udf_trim-1-e23715e112959e6840b6feed2ecf38a7
index 49a13dd64b286..b3ec336a877ee 100644
--- a/sql/hive/src/test/resources/golden/udf_trim-1-e23715e112959e6840b6feed2ecf38a7
+++ b/sql/hive/src/test/resources/golden/udf_trim-1-e23715e112959e6840b6feed2ecf38a7
@@ -1,4 +1,4 @@
 trim(str) - Removes the leading and trailing space characters from str 
 Example:
   > SELECT trim('   facebook  ') FROM src LIMIT 1;
-  'facebook'
\ No newline at end of file
+  'facebook'
diff --git a/sql/hive/src/test/resources/golden/udf_ucase-0-8f8c18102eb02df524106be5ea49f23d b/sql/hive/src/test/resources/golden/udf_ucase-0-8f8c18102eb02df524106be5ea49f23d
index 057c231dccf0e..66412c3ba77a4 100644
--- a/sql/hive/src/test/resources/golden/udf_ucase-0-8f8c18102eb02df524106be5ea49f23d
+++ b/sql/hive/src/test/resources/golden/udf_ucase-0-8f8c18102eb02df524106be5ea49f23d
@@ -1 +1 @@
-ucase(str) - Returns str with all characters changed to uppercase
\ No newline at end of file
+ucase(str) - Returns str with all characters changed to uppercase
diff --git a/sql/hive/src/test/resources/golden/udf_ucase-1-640713eb89dbb09dbb6e5b472fc0ec8f b/sql/hive/src/test/resources/golden/udf_ucase-1-640713eb89dbb09dbb6e5b472fc0ec8f
index 128d1e21fae83..814e4f0d39dcf 100644
--- a/sql/hive/src/test/resources/golden/udf_ucase-1-640713eb89dbb09dbb6e5b472fc0ec8f
+++ b/sql/hive/src/test/resources/golden/udf_ucase-1-640713eb89dbb09dbb6e5b472fc0ec8f
@@ -2,4 +2,4 @@ ucase(str) - Returns str with all characters changed to uppercase
 Synonyms: upper
 Example:
   > SELECT ucase('Facebook') FROM src LIMIT 1;
-  'FACEBOOK'
\ No newline at end of file
+  'FACEBOOK'
diff --git a/sql/hive/src/test/resources/golden/udf_unhex-0-11eb3cc5216d5446f4165007203acc47 b/sql/hive/src/test/resources/golden/udf_unhex-0-11eb3cc5216d5446f4165007203acc47
deleted file mode 100644
index 44b2a42cc26c5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_unhex-0-11eb3cc5216d5446f4165007203acc47
+++ /dev/null
@@ -1 +0,0 @@
-unhex(str) - Converts hexadecimal argument to binary
diff --git a/sql/hive/src/test/resources/golden/udf_unhex-1-a660886085b8651852b9b77934848ae4 b/sql/hive/src/test/resources/golden/udf_unhex-1-a660886085b8651852b9b77934848ae4
deleted file mode 100644
index 97af3b812a429..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_unhex-1-a660886085b8651852b9b77934848ae4
+++ /dev/null
@@ -1,14 +0,0 @@
-unhex(str) - Converts hexadecimal argument to binary
-Performs the inverse operation of HEX(str). That is, it interprets
-each pair of hexadecimal digits in the argument as a number and
-converts it to the byte representation of the number. The
-resulting characters are returned as a binary string.
-
-Example:
-> SELECT DECODE(UNHEX('4D7953514C'), 'UTF-8') from src limit 1;
-'MySQL'
-
-The characters in the argument string must be legal hexadecimal
-digits: '0' .. '9', 'A' .. 'F', 'a' .. 'f'. If UNHEX() encounters
-any nonhexadecimal digits in the argument, it returns NULL. Also,
-if there are an odd number of characters a leading 0 is appended.
diff --git a/sql/hive/src/test/resources/golden/udf_unhex-2-78ba44cd7dae6619772c7620cb39b68 b/sql/hive/src/test/resources/golden/udf_unhex-2-78ba44cd7dae6619772c7620cb39b68
deleted file mode 100644
index b4a6f2b692227..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_unhex-2-78ba44cd7dae6619772c7620cb39b68
+++ /dev/null
@@ -1 +0,0 @@
-MySQL	1267	a	-4	
diff --git a/sql/hive/src/test/resources/golden/udf_unhex-3-8823f9cc76adaf32a9a6110df99ce4d6 b/sql/hive/src/test/resources/golden/udf_unhex-3-8823f9cc76adaf32a9a6110df99ce4d6
deleted file mode 100644
index 3a67adaf0a9a8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_unhex-3-8823f9cc76adaf32a9a6110df99ce4d6
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_union-0-e3498ef95bc9d8c8ce55e75962b4a92c b/sql/hive/src/test/resources/golden/udf_union-0-e3498ef95bc9d8c8ce55e75962b4a92c
deleted file mode 100644
index 3f3bb2b7ce02e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_union-0-e3498ef95bc9d8c8ce55e75962b4a92c
+++ /dev/null
@@ -1 +0,0 @@
-create_union(tag, obj1, obj2, obj3, ...) - Creates a union with the object for given tag
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_union-1-f6f01250718087029165e23badc02cd6 b/sql/hive/src/test/resources/golden/udf_union-1-f6f01250718087029165e23badc02cd6
deleted file mode 100644
index 748ed97055401..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_union-1-f6f01250718087029165e23badc02cd6
+++ /dev/null
@@ -1,4 +0,0 @@
-create_union(tag, obj1, obj2, obj3, ...) - Creates a union with the object for given tag
-Example:
-  > SELECT create_union(1, 1, "one") FROM src LIMIT 1;
-  one
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_union-2-6af20858e3209d7cf37f736805ec5182 b/sql/hive/src/test/resources/golden/udf_union-2-6af20858e3209d7cf37f736805ec5182
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_union-3-705d165fec6761744dd19b142c566d61 b/sql/hive/src/test/resources/golden/udf_union-3-705d165fec6761744dd19b142c566d61
deleted file mode 100644
index 4b9ae5402aa53..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_union-3-705d165fec6761744dd19b142c566d61
+++ /dev/null
@@ -1,2 +0,0 @@
-{0:238}	{1:"val_238"}	{1:{"col1":2,"col2":"b"}}
-{0:86}	{0:2.0}	{1:{"col1":2,"col2":"b"}}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-2-2b1df88619e34f221d39598b5cd73283 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-2-2b1df88619e34f221d39598b5cd73283
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-1-d555c8cd733572bfa8cd3362da9480cb b/sql/hive/src/test/resources/golden/udf_unix_timestamp-1-d555c8cd733572bfa8cd3362da9480cb
new file mode 100644
index 0000000000000..9913d42ffc1aa
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_unix_timestamp-1-d555c8cd733572bfa8cd3362da9480cb
@@ -0,0 +1 @@
+unix_timestamp([date[, pattern]]) - Returns the UNIX timestamp
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-2-8a9dbadae706047715cf5f903ff4a724 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-2-8a9dbadae706047715cf5f903ff4a724
new file mode 100644
index 0000000000000..ef4aa8e9595d1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_unix_timestamp-2-8a9dbadae706047715cf5f903ff4a724
@@ -0,0 +1,2 @@
+unix_timestamp([date[, pattern]]) - Returns the UNIX timestamp
+Converts the current or specified time to number of seconds since 1970-01-01.
diff --git a/sql/hive/src/test/resources/golden/insert_into5-11-a36eeaf2fb0ef52b63a21f0f9b80b27d b/sql/hive/src/test/resources/golden/udf_unix_timestamp-3-28c40e51e55bed62693e626efda5d9c5
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-11-a36eeaf2fb0ef52b63a21f0f9b80b27d
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-3-28c40e51e55bed62693e626efda5d9c5
diff --git a/sql/hive/src/test/resources/golden/insert_into5-13-e06a0b7252278141d50466e08f15b391 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-4-6059ff48788d0fb8317fd331172ecea9
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-13-e06a0b7252278141d50466e08f15b391
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-4-6059ff48788d0fb8317fd331172ecea9
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-5-b2e42ebb75cecf09961d36587797f6d0 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-5-b2e42ebb75cecf09961d36587797f6d0
new file mode 100644
index 0000000000000..31aaa952b4cc5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_unix_timestamp-5-b2e42ebb75cecf09961d36587797f6d0
@@ -0,0 +1 @@
+2009-03-20 11:30:01	1237573801
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-6-31243f5cb64356425b9f95ba011ac9d6 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-6-31243f5cb64356425b9f95ba011ac9d6
new file mode 100644
index 0000000000000..6d9ee690cea4f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_unix_timestamp-6-31243f5cb64356425b9f95ba011ac9d6
@@ -0,0 +1 @@
+2009-03-20	1237532400
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-7-9b0f20bde1aaf9102b67a5498b167f31 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-7-9b0f20bde1aaf9102b67a5498b167f31
new file mode 100644
index 0000000000000..e1d0cbb6c8495
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_unix_timestamp-7-9b0f20bde1aaf9102b67a5498b167f31
@@ -0,0 +1 @@
+2009 Mar 20 11:30:01 am	1237573801
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-8-47f433ff6ccce4c666440cc1a228a96d b/sql/hive/src/test/resources/golden/udf_unix_timestamp-8-47f433ff6ccce4c666440cc1a228a96d
new file mode 100644
index 0000000000000..6b40e687afd5d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_unix_timestamp-8-47f433ff6ccce4c666440cc1a228a96d
@@ -0,0 +1 @@
+random_string	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_upper-0-47dc226b0435f668df20fe0e84293ead b/sql/hive/src/test/resources/golden/udf_upper-0-47dc226b0435f668df20fe0e84293ead
index 2e239cca5b860..40014886d9646 100644
--- a/sql/hive/src/test/resources/golden/udf_upper-0-47dc226b0435f668df20fe0e84293ead
+++ b/sql/hive/src/test/resources/golden/udf_upper-0-47dc226b0435f668df20fe0e84293ead
@@ -1 +1 @@
-upper(str) - Returns str with all characters changed to uppercase
\ No newline at end of file
+upper(str) - Returns str with all characters changed to uppercase
diff --git a/sql/hive/src/test/resources/golden/udf_upper-1-f81f11c3563dbc9ff80496c4b10bcd1d b/sql/hive/src/test/resources/golden/udf_upper-1-f81f11c3563dbc9ff80496c4b10bcd1d
index 673307e284b2e..0fb1bf1823d84 100644
--- a/sql/hive/src/test/resources/golden/udf_upper-1-f81f11c3563dbc9ff80496c4b10bcd1d
+++ b/sql/hive/src/test/resources/golden/udf_upper-1-f81f11c3563dbc9ff80496c4b10bcd1d
@@ -2,4 +2,4 @@ upper(str) - Returns str with all characters changed to uppercase
 Synonyms: ucase
 Example:
   > SELECT upper('Facebook') FROM src LIMIT 1;
-  'FACEBOOK'
\ No newline at end of file
+  'FACEBOOK'
diff --git a/sql/hive/src/test/resources/golden/udf_var_pop-0-3187e740690ccc1988a19fea4202a6de b/sql/hive/src/test/resources/golden/udf_var_pop-0-3187e740690ccc1988a19fea4202a6de
index 9dbf241e28107..79e293fd4ed52 100644
--- a/sql/hive/src/test/resources/golden/udf_var_pop-0-3187e740690ccc1988a19fea4202a6de
+++ b/sql/hive/src/test/resources/golden/udf_var_pop-0-3187e740690ccc1988a19fea4202a6de
@@ -1 +1 @@
-var_pop(x) - Returns the variance of a set of numbers
\ No newline at end of file
+var_pop(x) - Returns the variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_var_pop-1-fd25e5226312bf54d918858511814766 b/sql/hive/src/test/resources/golden/udf_var_pop-1-fd25e5226312bf54d918858511814766
index 234a65bd8255d..4b4de5db812e9 100644
--- a/sql/hive/src/test/resources/golden/udf_var_pop-1-fd25e5226312bf54d918858511814766
+++ b/sql/hive/src/test/resources/golden/udf_var_pop-1-fd25e5226312bf54d918858511814766
@@ -1,2 +1,2 @@
 var_pop(x) - Returns the variance of a set of numbers
-Synonyms: variance
\ No newline at end of file
+Synonyms: variance
diff --git a/sql/hive/src/test/resources/golden/udf_var_samp-0-b918928871d1b7f944315558c230c229 b/sql/hive/src/test/resources/golden/udf_var_samp-0-b918928871d1b7f944315558c230c229
index 673deeb031b44..42a442f7a2ded 100644
--- a/sql/hive/src/test/resources/golden/udf_var_samp-0-b918928871d1b7f944315558c230c229
+++ b/sql/hive/src/test/resources/golden/udf_var_samp-0-b918928871d1b7f944315558c230c229
@@ -1 +1 @@
-var_samp(x) - Returns the sample variance of a set of numbers
\ No newline at end of file
+var_samp(x) - Returns the sample variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_var_samp-1-59032ed5856fd4aa17c3e8e6721eec2b b/sql/hive/src/test/resources/golden/udf_var_samp-1-59032ed5856fd4aa17c3e8e6721eec2b
index 673deeb031b44..42a442f7a2ded 100644
--- a/sql/hive/src/test/resources/golden/udf_var_samp-1-59032ed5856fd4aa17c3e8e6721eec2b
+++ b/sql/hive/src/test/resources/golden/udf_var_samp-1-59032ed5856fd4aa17c3e8e6721eec2b
@@ -1 +1 @@
-var_samp(x) - Returns the sample variance of a set of numbers
\ No newline at end of file
+var_samp(x) - Returns the sample variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_var_samp-2-b918928871d1b7f944315558c230c229 b/sql/hive/src/test/resources/golden/udf_var_samp-2-b918928871d1b7f944315558c230c229
index 673deeb031b44..42a442f7a2ded 100644
--- a/sql/hive/src/test/resources/golden/udf_var_samp-2-b918928871d1b7f944315558c230c229
+++ b/sql/hive/src/test/resources/golden/udf_var_samp-2-b918928871d1b7f944315558c230c229
@@ -1 +1 @@
-var_samp(x) - Returns the sample variance of a set of numbers
\ No newline at end of file
+var_samp(x) - Returns the sample variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_var_samp-3-59032ed5856fd4aa17c3e8e6721eec2b b/sql/hive/src/test/resources/golden/udf_var_samp-3-59032ed5856fd4aa17c3e8e6721eec2b
index 673deeb031b44..42a442f7a2ded 100644
--- a/sql/hive/src/test/resources/golden/udf_var_samp-3-59032ed5856fd4aa17c3e8e6721eec2b
+++ b/sql/hive/src/test/resources/golden/udf_var_samp-3-59032ed5856fd4aa17c3e8e6721eec2b
@@ -1 +1 @@
-var_samp(x) - Returns the sample variance of a set of numbers
\ No newline at end of file
+var_samp(x) - Returns the sample variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_variance-0-fd23be1baa8b5ffa0d4519560d3fca87 b/sql/hive/src/test/resources/golden/udf_variance-0-fd23be1baa8b5ffa0d4519560d3fca87
index f58ae0769d5a9..260300b7d3bd9 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-0-fd23be1baa8b5ffa0d4519560d3fca87
+++ b/sql/hive/src/test/resources/golden/udf_variance-0-fd23be1baa8b5ffa0d4519560d3fca87
@@ -1 +1 @@
-variance(x) - Returns the variance of a set of numbers
\ No newline at end of file
+variance(x) - Returns the variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_variance-1-c1856abae908b05bfd6183189b4fd06a b/sql/hive/src/test/resources/golden/udf_variance-1-c1856abae908b05bfd6183189b4fd06a
index e17528e607aba..b14cf43bc0a95 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-1-c1856abae908b05bfd6183189b4fd06a
+++ b/sql/hive/src/test/resources/golden/udf_variance-1-c1856abae908b05bfd6183189b4fd06a
@@ -1,2 +1,2 @@
 variance(x) - Returns the variance of a set of numbers
-Synonyms: var_pop
\ No newline at end of file
+Synonyms: var_pop
diff --git a/sql/hive/src/test/resources/golden/udf_variance-2-3187e740690ccc1988a19fea4202a6de b/sql/hive/src/test/resources/golden/udf_variance-2-3187e740690ccc1988a19fea4202a6de
index 9dbf241e28107..79e293fd4ed52 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-2-3187e740690ccc1988a19fea4202a6de
+++ b/sql/hive/src/test/resources/golden/udf_variance-2-3187e740690ccc1988a19fea4202a6de
@@ -1 +1 @@
-var_pop(x) - Returns the variance of a set of numbers
\ No newline at end of file
+var_pop(x) - Returns the variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_variance-3-fd25e5226312bf54d918858511814766 b/sql/hive/src/test/resources/golden/udf_variance-3-fd25e5226312bf54d918858511814766
index 234a65bd8255d..4b4de5db812e9 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-3-fd25e5226312bf54d918858511814766
+++ b/sql/hive/src/test/resources/golden/udf_variance-3-fd25e5226312bf54d918858511814766
@@ -1,2 +1,2 @@
 var_pop(x) - Returns the variance of a set of numbers
-Synonyms: variance
\ No newline at end of file
+Synonyms: variance
diff --git a/sql/hive/src/test/resources/golden/udf_variance-4-fd23be1baa8b5ffa0d4519560d3fca87 b/sql/hive/src/test/resources/golden/udf_variance-4-fd23be1baa8b5ffa0d4519560d3fca87
index f58ae0769d5a9..260300b7d3bd9 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-4-fd23be1baa8b5ffa0d4519560d3fca87
+++ b/sql/hive/src/test/resources/golden/udf_variance-4-fd23be1baa8b5ffa0d4519560d3fca87
@@ -1 +1 @@
-variance(x) - Returns the variance of a set of numbers
\ No newline at end of file
+variance(x) - Returns the variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_variance-5-c1856abae908b05bfd6183189b4fd06a b/sql/hive/src/test/resources/golden/udf_variance-5-c1856abae908b05bfd6183189b4fd06a
index e17528e607aba..b14cf43bc0a95 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-5-c1856abae908b05bfd6183189b4fd06a
+++ b/sql/hive/src/test/resources/golden/udf_variance-5-c1856abae908b05bfd6183189b4fd06a
@@ -1,2 +1,2 @@
 variance(x) - Returns the variance of a set of numbers
-Synonyms: var_pop
\ No newline at end of file
+Synonyms: var_pop
diff --git a/sql/hive/src/test/resources/golden/udf_variance-6-3187e740690ccc1988a19fea4202a6de b/sql/hive/src/test/resources/golden/udf_variance-6-3187e740690ccc1988a19fea4202a6de
index 9dbf241e28107..79e293fd4ed52 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-6-3187e740690ccc1988a19fea4202a6de
+++ b/sql/hive/src/test/resources/golden/udf_variance-6-3187e740690ccc1988a19fea4202a6de
@@ -1 +1 @@
-var_pop(x) - Returns the variance of a set of numbers
\ No newline at end of file
+var_pop(x) - Returns the variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_variance-7-fd25e5226312bf54d918858511814766 b/sql/hive/src/test/resources/golden/udf_variance-7-fd25e5226312bf54d918858511814766
index 234a65bd8255d..4b4de5db812e9 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-7-fd25e5226312bf54d918858511814766
+++ b/sql/hive/src/test/resources/golden/udf_variance-7-fd25e5226312bf54d918858511814766
@@ -1,2 +1,2 @@
 var_pop(x) - Returns the variance of a set of numbers
-Synonyms: variance
\ No newline at end of file
+Synonyms: variance
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-3-7562d4fee13f3ba935a2e824f86a4224 b/sql/hive/src/test/resources/golden/udf_weekofyear-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-3-7562d4fee13f3ba935a2e824f86a4224
rename to sql/hive/src/test/resources/golden/udf_weekofyear-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-0-d6b4490b549a358be375511e39627dc2 b/sql/hive/src/test/resources/golden/udf_weekofyear-0-d6b4490b549a358be375511e39627dc2
deleted file mode 100644
index 29e3b370b03b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_weekofyear-0-d6b4490b549a358be375511e39627dc2
+++ /dev/null
@@ -1 +0,0 @@
-weekofyear(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-1-b7bbdfabe6054a66701250fd70065ddd b/sql/hive/src/test/resources/golden/udf_weekofyear-1-b7bbdfabe6054a66701250fd70065ddd
deleted file mode 100644
index c7939a11937c5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_weekofyear-1-b7bbdfabe6054a66701250fd70065ddd
+++ /dev/null
@@ -1,6 +0,0 @@
-weekofyear(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.
-Examples:
-  > SELECT weekofyear('2008-02-20') FROM src LIMIT 1;
-  8
-  > SELECT weekofyear('1980-12-31 12:59:59') FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-1-d6b4490b549a358be375511e39627dc2 b/sql/hive/src/test/resources/golden/udf_weekofyear-1-d6b4490b549a358be375511e39627dc2
new file mode 100644
index 0000000000000..dca04d210339a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_weekofyear-1-d6b4490b549a358be375511e39627dc2
@@ -0,0 +1 @@
+weekofyear(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-2-57ec578b3e226b6971e0fc0694b513d6 b/sql/hive/src/test/resources/golden/udf_weekofyear-2-57ec578b3e226b6971e0fc0694b513d6
deleted file mode 100644
index 20d1dc50ce1f7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_weekofyear-2-57ec578b3e226b6971e0fc0694b513d6
+++ /dev/null
@@ -1 +0,0 @@
-1	1	2	1	52	8	52	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-2-b7bbdfabe6054a66701250fd70065ddd b/sql/hive/src/test/resources/golden/udf_weekofyear-2-b7bbdfabe6054a66701250fd70065ddd
new file mode 100644
index 0000000000000..337257dabd8d1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_weekofyear-2-b7bbdfabe6054a66701250fd70065ddd
@@ -0,0 +1,6 @@
+weekofyear(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.
+Examples:
+  > SELECT weekofyear('2008-02-20') FROM src LIMIT 1;
+  8
+  > SELECT weekofyear('1980-12-31 12:59:59') FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-3-d5dd3abb6c8c7046a85dd05f51126285 b/sql/hive/src/test/resources/golden/udf_weekofyear-3-d5dd3abb6c8c7046a85dd05f51126285
new file mode 100644
index 0000000000000..9d0163569f7b3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_weekofyear-3-d5dd3abb6c8c7046a85dd05f51126285
@@ -0,0 +1 @@
+1	1	2	1	52	8	52	1
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-4-bdb30a5d6887ee4fb089f8676313eafd b/sql/hive/src/test/resources/golden/udf_when-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-4-bdb30a5d6887ee4fb089f8676313eafd
rename to sql/hive/src/test/resources/golden/udf_when-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_when-0-88b97c6722176393e9b3d089559d2d11 b/sql/hive/src/test/resources/golden/udf_when-0-88b97c6722176393e9b3d089559d2d11
deleted file mode 100644
index 2e36162dbaa33..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_when-0-88b97c6722176393e9b3d089559d2d11
+++ /dev/null
@@ -1 +0,0 @@
-There is no documentation for function 'when'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_when-1-7365d5fe39dc7a025c942dad8fb9f0d4 b/sql/hive/src/test/resources/golden/udf_when-1-7365d5fe39dc7a025c942dad8fb9f0d4
deleted file mode 100644
index 2e36162dbaa33..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_when-1-7365d5fe39dc7a025c942dad8fb9f0d4
+++ /dev/null
@@ -1 +0,0 @@
-There is no documentation for function 'when'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_when-1-88b97c6722176393e9b3d089559d2d11 b/sql/hive/src/test/resources/golden/udf_when-1-88b97c6722176393e9b3d089559d2d11
new file mode 100644
index 0000000000000..3626da8447ca0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_when-1-88b97c6722176393e9b3d089559d2d11
@@ -0,0 +1 @@
+There is no documentation for function 'when'
diff --git a/sql/hive/src/test/resources/golden/udf_when-2-7365d5fe39dc7a025c942dad8fb9f0d4 b/sql/hive/src/test/resources/golden/udf_when-2-7365d5fe39dc7a025c942dad8fb9f0d4
new file mode 100644
index 0000000000000..3626da8447ca0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_when-2-7365d5fe39dc7a025c942dad8fb9f0d4
@@ -0,0 +1 @@
+There is no documentation for function 'when'
diff --git a/sql/hive/src/test/resources/golden/udf_when-2-ff1118e7d06a4725e1e98a6d70a59295 b/sql/hive/src/test/resources/golden/udf_when-2-ff1118e7d06a4725e1e98a6d70a59295
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-14-a3a4c16b0b723f97f654b4b5e80575c2 b/sql/hive/src/test/resources/golden/udf_when-3-734890c41528b9d918db66b0582228a4
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-14-a3a4c16b0b723f97f654b4b5e80575c2
rename to sql/hive/src/test/resources/golden/udf_when-3-734890c41528b9d918db66b0582228a4
diff --git a/sql/hive/src/test/resources/golden/udf_when-3-e63043e8d6ecf300c1fcf8654176896f b/sql/hive/src/test/resources/golden/udf_when-3-e63043e8d6ecf300c1fcf8654176896f
deleted file mode 100644
index 6929fdca28eb8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_when-3-e63043e8d6ecf300c1fcf8654176896f
+++ /dev/null
@@ -1 +0,0 @@
-2	9	14	NULL	24	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_when-4-c57d6eb11efc29ce3a9c450488f3d750 b/sql/hive/src/test/resources/golden/udf_when-4-c57d6eb11efc29ce3a9c450488f3d750
new file mode 100644
index 0000000000000..b28b0050514f3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_when-4-c57d6eb11efc29ce3a9c450488f3d750
@@ -0,0 +1 @@
+2	9	14	NULL	24	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_when-5-6ed21e998c0fc32c39f6375136f55de6 b/sql/hive/src/test/resources/golden/udf_when-5-6ed21e998c0fc32c39f6375136f55de6
new file mode 100644
index 0000000000000..db46b5a72aa80
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_when-5-6ed21e998c0fc32c39f6375136f55de6
@@ -0,0 +1 @@
+123	123.0	abcd
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-5-10713b30ecb3c88acdd775bf9628c38c b/sql/hive/src/test/resources/golden/udf_xpath-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-5-10713b30ecb3c88acdd775bf9628c38c
rename to sql/hive/src/test/resources/golden/udf_xpath-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-0-622670bd1cbf4bf0cf04a553006e3c8 b/sql/hive/src/test/resources/golden/udf_xpath-0-622670bd1cbf4bf0cf04a553006e3c8
deleted file mode 100644
index 18d9720653a5b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-0-622670bd1cbf4bf0cf04a553006e3c8
+++ /dev/null
@@ -1 +0,0 @@
-xpath(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-1-5a6f10392053a4eabe62f5cabb073a71 b/sql/hive/src/test/resources/golden/udf_xpath-1-5a6f10392053a4eabe62f5cabb073a71
deleted file mode 100644
index 3b092dafd32c2..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-1-5a6f10392053a4eabe62f5cabb073a71
+++ /dev/null
@@ -1,8 +0,0 @@
-xpath(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression
-Example:
-  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/text()') FROM src LIMIT 1
-  []
-  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/b/text()') FROM src LIMIT 1
-  ["b1","b2","b3"]
-  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/c/text()') FROM src LIMIT 1
-  ["c1","c2"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-1-622670bd1cbf4bf0cf04a553006e3c8 b/sql/hive/src/test/resources/golden/udf_xpath-1-622670bd1cbf4bf0cf04a553006e3c8
new file mode 100644
index 0000000000000..d53b034f8fc6d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-1-622670bd1cbf4bf0cf04a553006e3c8
@@ -0,0 +1 @@
+xpath(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-2-5a6f10392053a4eabe62f5cabb073a71 b/sql/hive/src/test/resources/golden/udf_xpath-2-5a6f10392053a4eabe62f5cabb073a71
new file mode 100644
index 0000000000000..bff795649a7ea
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-2-5a6f10392053a4eabe62f5cabb073a71
@@ -0,0 +1,8 @@
+xpath(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression
+Example:
+  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/text()') FROM src LIMIT 1
+  []
+  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/b/text()') FROM src LIMIT 1
+  ["b1","b2","b3"]
+  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/c/text()') FROM src LIMIT 1
+  ["c1","c2"]
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-2-6b0a9d3874868d88d54ae133c978753d b/sql/hive/src/test/resources/golden/udf_xpath-2-6b0a9d3874868d88d54ae133c978753d
deleted file mode 100644
index 0637a088a01e8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-2-6b0a9d3874868d88d54ae133c978753d
+++ /dev/null
@@ -1 +0,0 @@
-[]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-3-5700d81a9c2a22dcca287bf8439277ea b/sql/hive/src/test/resources/golden/udf_xpath-3-5700d81a9c2a22dcca287bf8439277ea
deleted file mode 100644
index 8e8aff2387621..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-3-5700d81a9c2a22dcca287bf8439277ea
+++ /dev/null
@@ -1 +0,0 @@
-["b1","b2","b3","c1","c2"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-3-f0b9adf99c68290c86d0f40f45111e18 b/sql/hive/src/test/resources/golden/udf_xpath-3-f0b9adf99c68290c86d0f40f45111e18
new file mode 100644
index 0000000000000..fe51488c7066f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-3-f0b9adf99c68290c86d0f40f45111e18
@@ -0,0 +1 @@
+[]
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-4-2960b453dd1dcf9ab7d4156c717d9e0a b/sql/hive/src/test/resources/golden/udf_xpath-4-2960b453dd1dcf9ab7d4156c717d9e0a
deleted file mode 100644
index e518b1f37e124..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-4-2960b453dd1dcf9ab7d4156c717d9e0a
+++ /dev/null
@@ -1 +0,0 @@
-["b1","b2","b3"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-4-6d5e7ed902ac051f0cdba43d7a30434e b/sql/hive/src/test/resources/golden/udf_xpath-4-6d5e7ed902ac051f0cdba43d7a30434e
new file mode 100644
index 0000000000000..96e245a7eb37c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-4-6d5e7ed902ac051f0cdba43d7a30434e
@@ -0,0 +1 @@
+["b1","b2","b3","c1","c2"]
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-5-7395e1cd3b543316a753978f556975e0 b/sql/hive/src/test/resources/golden/udf_xpath-5-7395e1cd3b543316a753978f556975e0
deleted file mode 100644
index 45615b772ba35..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-5-7395e1cd3b543316a753978f556975e0
+++ /dev/null
@@ -1 +0,0 @@
-["c1","c2"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-5-b66a64f91dd812fda2859863855988b5 b/sql/hive/src/test/resources/golden/udf_xpath-5-b66a64f91dd812fda2859863855988b5
new file mode 100644
index 0000000000000..44def14ea2c68
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-5-b66a64f91dd812fda2859863855988b5
@@ -0,0 +1 @@
+["b1","b2","b3"]
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-6-108134b2ae4a8de5f6257d6c6391fb3f b/sql/hive/src/test/resources/golden/udf_xpath-6-108134b2ae4a8de5f6257d6c6391fb3f
deleted file mode 100644
index 327bf3a1e76cc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-6-108134b2ae4a8de5f6257d6c6391fb3f
+++ /dev/null
@@ -1 +0,0 @@
-["b1","c1"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-6-d4f95ebc0057639138900722c74ee17a b/sql/hive/src/test/resources/golden/udf_xpath-6-d4f95ebc0057639138900722c74ee17a
new file mode 100644
index 0000000000000..6a97cc328aa70
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-6-d4f95ebc0057639138900722c74ee17a
@@ -0,0 +1 @@
+["c1","c2"]
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-7-6b8fceac3bd654662f067da60670e1d9 b/sql/hive/src/test/resources/golden/udf_xpath-7-6b8fceac3bd654662f067da60670e1d9
new file mode 100644
index 0000000000000..8b4abae5acff8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-7-6b8fceac3bd654662f067da60670e1d9
@@ -0,0 +1 @@
+["b1","c1"]
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-6-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-6-bab89dfffa77258e34a595e0e79986e3
rename to sql/hive/src/test/resources/golden/udf_xpath_boolean-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-0-8b2f3f8132bfc8344e243cdaf45eb371 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-0-8b2f3f8132bfc8344e243cdaf45eb371
deleted file mode 100644
index 134e9e99b7423..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-0-8b2f3f8132bfc8344e243cdaf45eb371
+++ /dev/null
@@ -1 +0,0 @@
-xpath_boolean(xml, xpath) - Evaluates a boolean xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-8b2f3f8132bfc8344e243cdaf45eb371 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-8b2f3f8132bfc8344e243cdaf45eb371
new file mode 100644
index 0000000000000..d47e3fe7d34a1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-8b2f3f8132bfc8344e243cdaf45eb371
@@ -0,0 +1 @@
+xpath_boolean(xml, xpath) - Evaluates a boolean xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-9e935539c9a3f3a118145096e7f978c4 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-9e935539c9a3f3a118145096e7f978c4
deleted file mode 100644
index 624889cc0ac67..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-9e935539c9a3f3a118145096e7f978c4
+++ /dev/null
@@ -1,6 +0,0 @@
-xpath_boolean(xml, xpath) - Evaluates a boolean xpath expression
-Example:
-  > SELECT xpath_boolean('<a><b>1</b></a>','a/b') FROM src LIMIT 1;
-  true
-  > SELECT xpath_boolean('<a><b>1</b></a>','a/b = 2') FROM src LIMIT 1;
-  false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-66148c16fde3008348a24cec5d9ba02b b/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-66148c16fde3008348a24cec5d9ba02b
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-66148c16fde3008348a24cec5d9ba02b
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-9e935539c9a3f3a118145096e7f978c4 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-9e935539c9a3f3a118145096e7f978c4
new file mode 100644
index 0000000000000..4f3c2652aad11
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-9e935539c9a3f3a118145096e7f978c4
@@ -0,0 +1,6 @@
+xpath_boolean(xml, xpath) - Evaluates a boolean xpath expression
+Example:
+  > SELECT xpath_boolean('<a><b>1</b></a>','a/b') FROM src LIMIT 1;
+  true
+  > SELECT xpath_boolean('<a><b>1</b></a>','a/b = 2') FROM src LIMIT 1;
+  false
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-9b97a08303a7a89e7575687f6d7ba435 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-9b97a08303a7a89e7575687f6d7ba435
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-9b97a08303a7a89e7575687f6d7ba435
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-c6cf7ede46f0412fe7a37ac52061b060 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-c6cf7ede46f0412fe7a37ac52061b060
deleted file mode 100644
index 02e4a84d62c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-c6cf7ede46f0412fe7a37ac52061b060
+++ /dev/null
@@ -1 +0,0 @@
-false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-4-284ce7d6fc6850ca853111145784286b b/sql/hive/src/test/resources/golden/udf_xpath_boolean-4-284ce7d6fc6850ca853111145784286b
new file mode 100644
index 0000000000000..c508d5366f70b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-4-284ce7d6fc6850ca853111145784286b
@@ -0,0 +1 @@
+false
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-4-45d2e8baee72a0d741eb0a976af3a965 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-4-45d2e8baee72a0d741eb0a976af3a965
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-4-45d2e8baee72a0d741eb0a976af3a965
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-95a0dfa897ba9050ad751a78aeb72f3d b/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-95a0dfa897ba9050ad751a78aeb72f3d
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-95a0dfa897ba9050ad751a78aeb72f3d
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-dbc57416174a6ba26f29dfc13f91f302 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-dbc57416174a6ba26f29dfc13f91f302
deleted file mode 100644
index 02e4a84d62c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-dbc57416174a6ba26f29dfc13f91f302
+++ /dev/null
@@ -1 +0,0 @@
-false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-6-45d3c4fa4f86577fc26e9de7fc552e8f b/sql/hive/src/test/resources/golden/udf_xpath_boolean-6-45d3c4fa4f86577fc26e9de7fc552e8f
deleted file mode 100644
index 02e4a84d62c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-6-45d3c4fa4f86577fc26e9de7fc552e8f
+++ /dev/null
@@ -1 +0,0 @@
-false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-6-796c717c9d52d3efa374d12fe65259e6 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-6-796c717c9d52d3efa374d12fe65259e6
new file mode 100644
index 0000000000000..c508d5366f70b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-6-796c717c9d52d3efa374d12fe65259e6
@@ -0,0 +1 @@
+false
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-824c07ed4ef40cd509fea55575e43303 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-824c07ed4ef40cd509fea55575e43303
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-824c07ed4ef40cd509fea55575e43303
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-cc3ae9a7505e04a2e9b950442a81a559 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-cc3ae9a7505e04a2e9b950442a81a559
new file mode 100644
index 0000000000000..c508d5366f70b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-cc3ae9a7505e04a2e9b950442a81a559
@@ -0,0 +1 @@
+false
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-8-62a1af222d4e12c59cef71b979c6e58 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-8-62a1af222d4e12c59cef71b979c6e58
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-8-62a1af222d4e12c59cef71b979c6e58
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-0-39199612969071d58b24034a2d17ca67 b/sql/hive/src/test/resources/golden/udf_xpath_double-0-39199612969071d58b24034a2d17ca67
deleted file mode 100644
index b56f4d00a8a74..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-0-39199612969071d58b24034a2d17ca67
+++ /dev/null
@@ -1 +0,0 @@
-xpath_number(xml, xpath) - Returns a double value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-7-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/udf_xpath_double-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-7-6f53d5613262d393d82d159ec5dc16dc
rename to sql/hive/src/test/resources/golden/udf_xpath_double-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-1-2d5ceab0a14d6e837ed153e1a2235bb2 b/sql/hive/src/test/resources/golden/udf_xpath_double-1-2d5ceab0a14d6e837ed153e1a2235bb2
deleted file mode 100644
index b6b6227174647..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-1-2d5ceab0a14d6e837ed153e1a2235bb2
+++ /dev/null
@@ -1,5 +0,0 @@
-xpath_number(xml, xpath) - Returns a double value that matches the xpath expression
-Synonyms: xpath_double
-Example:
-  > SELECT xpath_number('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-1-39199612969071d58b24034a2d17ca67 b/sql/hive/src/test/resources/golden/udf_xpath_double-1-39199612969071d58b24034a2d17ca67
new file mode 100644
index 0000000000000..a9cb2ecb60e28
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-1-39199612969071d58b24034a2d17ca67
@@ -0,0 +1 @@
+xpath_number(xml, xpath) - Returns a double value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-10-ad0f9117f6f52590d38e619e998a6648 b/sql/hive/src/test/resources/golden/udf_xpath_double-10-ad0f9117f6f52590d38e619e998a6648
new file mode 100644
index 0000000000000..fe6b09a7d14cc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-10-ad0f9117f6f52590d38e619e998a6648
@@ -0,0 +1 @@
+15.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-10-fe9ae5013ee4f11b357128ee5ffb56c0 b/sql/hive/src/test/resources/golden/udf_xpath_double-10-fe9ae5013ee4f11b357128ee5ffb56c0
deleted file mode 100644
index 7104585f9a439..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-10-fe9ae5013ee4f11b357128ee5ffb56c0
+++ /dev/null
@@ -1 +0,0 @@
-7.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-11-a1960676c1b2dc76aa178ea288d801d2 b/sql/hive/src/test/resources/golden/udf_xpath_double-11-a1960676c1b2dc76aa178ea288d801d2
deleted file mode 100644
index 6e6366051638f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-11-a1960676c1b2dc76aa178ea288d801d2
+++ /dev/null
@@ -1 +0,0 @@
-5.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-11-bf1f96ce71879c17d91ac9df44c36d29 b/sql/hive/src/test/resources/golden/udf_xpath_double-11-bf1f96ce71879c17d91ac9df44c36d29
new file mode 100644
index 0000000000000..4fedf1d20e157
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-11-bf1f96ce71879c17d91ac9df44c36d29
@@ -0,0 +1 @@
+7.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-12-9621353ce5c583ca34216d357c5bb0eb b/sql/hive/src/test/resources/golden/udf_xpath_double-12-9621353ce5c583ca34216d357c5bb0eb
new file mode 100644
index 0000000000000..819e07a22435f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-12-9621353ce5c583ca34216d357c5bb0eb
@@ -0,0 +1 @@
+5.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-2-2d5ceab0a14d6e837ed153e1a2235bb2 b/sql/hive/src/test/resources/golden/udf_xpath_double-2-2d5ceab0a14d6e837ed153e1a2235bb2
new file mode 100644
index 0000000000000..0f303917eb1bd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-2-2d5ceab0a14d6e837ed153e1a2235bb2
@@ -0,0 +1,5 @@
+xpath_number(xml, xpath) - Returns a double value that matches the xpath expression
+Synonyms: xpath_double
+Example:
+  > SELECT xpath_number('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-2-a4d22cea9dffaf26b485b3eb08963789 b/sql/hive/src/test/resources/golden/udf_xpath_double-2-a4d22cea9dffaf26b485b3eb08963789
deleted file mode 100644
index 5b6de7db2f9f3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-2-a4d22cea9dffaf26b485b3eb08963789
+++ /dev/null
@@ -1 +0,0 @@
-xpath_double(xml, xpath) - Returns a double value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-3-a4d22cea9dffaf26b485b3eb08963789 b/sql/hive/src/test/resources/golden/udf_xpath_double-3-a4d22cea9dffaf26b485b3eb08963789
new file mode 100644
index 0000000000000..20ff7713a919d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-3-a4d22cea9dffaf26b485b3eb08963789
@@ -0,0 +1 @@
+xpath_double(xml, xpath) - Returns a double value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-3-ea050c5b9e74d4b87b037236ef9e4fc2 b/sql/hive/src/test/resources/golden/udf_xpath_double-3-ea050c5b9e74d4b87b037236ef9e4fc2
deleted file mode 100644
index b8accbcae1e17..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-3-ea050c5b9e74d4b87b037236ef9e4fc2
+++ /dev/null
@@ -1,5 +0,0 @@
-xpath_double(xml, xpath) - Returns a double value that matches the xpath expression
-Synonyms: xpath_number
-Example:
-  > SELECT xpath_double('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-4-93f47057c68385cff3b6f5c42307590c b/sql/hive/src/test/resources/golden/udf_xpath_double-4-93f47057c68385cff3b6f5c42307590c
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-4-93f47057c68385cff3b6f5c42307590c
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-4-ea050c5b9e74d4b87b037236ef9e4fc2 b/sql/hive/src/test/resources/golden/udf_xpath_double-4-ea050c5b9e74d4b87b037236ef9e4fc2
new file mode 100644
index 0000000000000..89c174f371b88
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-4-ea050c5b9e74d4b87b037236ef9e4fc2
@@ -0,0 +1,5 @@
+xpath_double(xml, xpath) - Returns a double value that matches the xpath expression
+Synonyms: xpath_number
+Example:
+  > SELECT xpath_double('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-5-1e0514d71f99da09e01a414a4e01d046 b/sql/hive/src/test/resources/golden/udf_xpath_double-5-1e0514d71f99da09e01a414a4e01d046
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-5-1e0514d71f99da09e01a414a4e01d046
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-5-c811a2353f5baf585da8654acd13b0e5 b/sql/hive/src/test/resources/golden/udf_xpath_double-5-c811a2353f5baf585da8654acd13b0e5
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-5-c811a2353f5baf585da8654acd13b0e5
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-6-10fcb281fd8db12dd1ac41380b8030c6 b/sql/hive/src/test/resources/golden/udf_xpath_double-6-10fcb281fd8db12dd1ac41380b8030c6
deleted file mode 100644
index 0a8ac8c629540..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-6-10fcb281fd8db12dd1ac41380b8030c6
+++ /dev/null
@@ -1 +0,0 @@
-8.0E19
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-6-6a3985167fedd579f7bd5745133a3524 b/sql/hive/src/test/resources/golden/udf_xpath_double-6-6a3985167fedd579f7bd5745133a3524
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-6-6a3985167fedd579f7bd5745133a3524
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-7-4532934141d2adb1f4ff56e65e8cf14c b/sql/hive/src/test/resources/golden/udf_xpath_double-7-4532934141d2adb1f4ff56e65e8cf14c
deleted file mode 100644
index 171538eb0b00f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-7-4532934141d2adb1f4ff56e65e8cf14c
+++ /dev/null
@@ -1 +0,0 @@
-0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-7-d97e93fb4b69522300f505e04b6674c8 b/sql/hive/src/test/resources/golden/udf_xpath_double-7-d97e93fb4b69522300f505e04b6674c8
new file mode 100644
index 0000000000000..f65000a36a478
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-7-d97e93fb4b69522300f505e04b6674c8
@@ -0,0 +1 @@
+8.0E19
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-8-547e750f5e401511db56283e66d1231d b/sql/hive/src/test/resources/golden/udf_xpath_double-8-547e750f5e401511db56283e66d1231d
deleted file mode 100644
index 9f8e9b69a33f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-8-547e750f5e401511db56283e66d1231d
+++ /dev/null
@@ -1 +0,0 @@
-1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-8-fce6cafa07b75c9843c1c1964e84fc10 b/sql/hive/src/test/resources/golden/udf_xpath_double-8-fce6cafa07b75c9843c1c1964e84fc10
new file mode 100644
index 0000000000000..ba66466c2a0d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-8-fce6cafa07b75c9843c1c1964e84fc10
@@ -0,0 +1 @@
+0.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-9-b45b8b60031ac43ed2ffcd883ba2f19e b/sql/hive/src/test/resources/golden/udf_xpath_double-9-b45b8b60031ac43ed2ffcd883ba2f19e
deleted file mode 100644
index 3d3be3c32ed85..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-9-b45b8b60031ac43ed2ffcd883ba2f19e
+++ /dev/null
@@ -1 +0,0 @@
-15.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-9-f27361521c35bf4f6581dba4c185d550 b/sql/hive/src/test/resources/golden/udf_xpath_double-9-f27361521c35bf4f6581dba4c185d550
new file mode 100644
index 0000000000000..d3827e75a5cad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-9-f27361521c35bf4f6581dba4c185d550
@@ -0,0 +1 @@
+1.0
diff --git a/sql/hive/src/test/resources/golden/scriptfile1-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/udf_xpath_float-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/scriptfile1-0-43d53504df013e6b35f81811138a167a
rename to sql/hive/src/test/resources/golden/udf_xpath_float-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-0-7483dafee0dc7334eecabba31977e791 b/sql/hive/src/test/resources/golden/udf_xpath_float-0-7483dafee0dc7334eecabba31977e791
deleted file mode 100644
index ea4df0ae58e12..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-0-7483dafee0dc7334eecabba31977e791
+++ /dev/null
@@ -1 +0,0 @@
-xpath_float(xml, xpath) - Returns a float value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-1-7483dafee0dc7334eecabba31977e791 b/sql/hive/src/test/resources/golden/udf_xpath_float-1-7483dafee0dc7334eecabba31977e791
new file mode 100644
index 0000000000000..b7dea42c78212
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-1-7483dafee0dc7334eecabba31977e791
@@ -0,0 +1 @@
+xpath_float(xml, xpath) - Returns a float value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-1-f6ddff2daba006787aeb861ca6f9d37a b/sql/hive/src/test/resources/golden/udf_xpath_float-1-f6ddff2daba006787aeb861ca6f9d37a
deleted file mode 100644
index 6bc4d4a46de72..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-1-f6ddff2daba006787aeb861ca6f9d37a
+++ /dev/null
@@ -1,4 +0,0 @@
-xpath_float(xml, xpath) - Returns a float value that matches the xpath expression
-Example:
-  > SELECT xpath_float('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-10-89ae28cf8e6b6f422d307a0085774cab b/sql/hive/src/test/resources/golden/udf_xpath_float-10-89ae28cf8e6b6f422d307a0085774cab
new file mode 100644
index 0000000000000..819e07a22435f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-10-89ae28cf8e6b6f422d307a0085774cab
@@ -0,0 +1 @@
+5.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-2-70b7180f7bcd1d09727ad73084788d16 b/sql/hive/src/test/resources/golden/udf_xpath_float-2-70b7180f7bcd1d09727ad73084788d16
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-2-70b7180f7bcd1d09727ad73084788d16
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-2-f6ddff2daba006787aeb861ca6f9d37a b/sql/hive/src/test/resources/golden/udf_xpath_float-2-f6ddff2daba006787aeb861ca6f9d37a
new file mode 100644
index 0000000000000..02b139e7a3610
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-2-f6ddff2daba006787aeb861ca6f9d37a
@@ -0,0 +1,4 @@
+xpath_float(xml, xpath) - Returns a float value that matches the xpath expression
+Example:
+  > SELECT xpath_float('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-3-79b7e57efe31a0373c39f0ba79b5bd54 b/sql/hive/src/test/resources/golden/udf_xpath_float-3-79b7e57efe31a0373c39f0ba79b5bd54
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-3-79b7e57efe31a0373c39f0ba79b5bd54
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-3-b743a9cb9f8688220e0a6346856f18ce b/sql/hive/src/test/resources/golden/udf_xpath_float-3-b743a9cb9f8688220e0a6346856f18ce
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-3-b743a9cb9f8688220e0a6346856f18ce
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-4-6720ee0163b0f0ddd2ab72fa9ab935e3 b/sql/hive/src/test/resources/golden/udf_xpath_float-4-6720ee0163b0f0ddd2ab72fa9ab935e3
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-4-6720ee0163b0f0ddd2ab72fa9ab935e3
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-4-6e1f61ebe10c5fde60148e3a31706352 b/sql/hive/src/test/resources/golden/udf_xpath_float-4-6e1f61ebe10c5fde60148e3a31706352
deleted file mode 100644
index 0a8ac8c629540..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-4-6e1f61ebe10c5fde60148e3a31706352
+++ /dev/null
@@ -1 +0,0 @@
-8.0E19
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-5-410760f9560157587fbba7a677e12b9f b/sql/hive/src/test/resources/golden/udf_xpath_float-5-410760f9560157587fbba7a677e12b9f
deleted file mode 100644
index 171538eb0b00f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-5-410760f9560157587fbba7a677e12b9f
+++ /dev/null
@@ -1 +0,0 @@
-0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-5-f5e1100f6e8de31081042413b4039fb2 b/sql/hive/src/test/resources/golden/udf_xpath_float-5-f5e1100f6e8de31081042413b4039fb2
new file mode 100644
index 0000000000000..f65000a36a478
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-5-f5e1100f6e8de31081042413b4039fb2
@@ -0,0 +1 @@
+8.0E19
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-6-5e8457209d15467af7f14c09dfadb894 b/sql/hive/src/test/resources/golden/udf_xpath_float-6-5e8457209d15467af7f14c09dfadb894
new file mode 100644
index 0000000000000..ba66466c2a0d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-6-5e8457209d15467af7f14c09dfadb894
@@ -0,0 +1 @@
+0.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-6-d83a5002f52e3acb7dbb725bb434eebf b/sql/hive/src/test/resources/golden/udf_xpath_float-6-d83a5002f52e3acb7dbb725bb434eebf
deleted file mode 100644
index 9f8e9b69a33f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-6-d83a5002f52e3acb7dbb725bb434eebf
+++ /dev/null
@@ -1 +0,0 @@
-1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-7-580ffe4dabef758c4fcb52050b315615 b/sql/hive/src/test/resources/golden/udf_xpath_float-7-580ffe4dabef758c4fcb52050b315615
new file mode 100644
index 0000000000000..d3827e75a5cad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-7-580ffe4dabef758c4fcb52050b315615
@@ -0,0 +1 @@
+1.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-7-b57c58f9343a82846f54ef60b216dfaa b/sql/hive/src/test/resources/golden/udf_xpath_float-7-b57c58f9343a82846f54ef60b216dfaa
deleted file mode 100644
index 3d3be3c32ed85..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-7-b57c58f9343a82846f54ef60b216dfaa
+++ /dev/null
@@ -1 +0,0 @@
-15.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-8-3a62c730d4e5d9be6fb01307577d6f48 b/sql/hive/src/test/resources/golden/udf_xpath_float-8-3a62c730d4e5d9be6fb01307577d6f48
deleted file mode 100644
index 7104585f9a439..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-8-3a62c730d4e5d9be6fb01307577d6f48
+++ /dev/null
@@ -1 +0,0 @@
-7.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-8-73cfa6fe399ca3e240b84b47ee4f8bc8 b/sql/hive/src/test/resources/golden/udf_xpath_float-8-73cfa6fe399ca3e240b84b47ee4f8bc8
new file mode 100644
index 0000000000000..fe6b09a7d14cc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-8-73cfa6fe399ca3e240b84b47ee4f8bc8
@@ -0,0 +1 @@
+15.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-9-16793f703b552fcb24f7aea9bb8a2401 b/sql/hive/src/test/resources/golden/udf_xpath_float-9-16793f703b552fcb24f7aea9bb8a2401
deleted file mode 100644
index 6e6366051638f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-9-16793f703b552fcb24f7aea9bb8a2401
+++ /dev/null
@@ -1 +0,0 @@
-5.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-9-bf66b884da717d46f0b12c3b8cf8313a b/sql/hive/src/test/resources/golden/udf_xpath_float-9-bf66b884da717d46f0b12c3b8cf8313a
new file mode 100644
index 0000000000000..4fedf1d20e157
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-9-bf66b884da717d46f0b12c3b8cf8313a
@@ -0,0 +1 @@
+7.0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-0-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/udf_xpath_int-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_1-0-16a1f74642d7ea4dac66a5ce15469c22
rename to sql/hive/src/test/resources/golden/udf_xpath_int-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-0-a9ed102a70e7e9d505be1555f0c7eb12 b/sql/hive/src/test/resources/golden/udf_xpath_int-0-a9ed102a70e7e9d505be1555f0c7eb12
deleted file mode 100644
index 67ab19492e405..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-0-a9ed102a70e7e9d505be1555f0c7eb12
+++ /dev/null
@@ -1 +0,0 @@
-xpath_int(xml, xpath) - Returns an integer value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-1-26ddf7e46a66065909e9e547f0459910 b/sql/hive/src/test/resources/golden/udf_xpath_int-1-26ddf7e46a66065909e9e547f0459910
deleted file mode 100644
index 33349b0b22ad9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-1-26ddf7e46a66065909e9e547f0459910
+++ /dev/null
@@ -1,4 +0,0 @@
-xpath_int(xml, xpath) - Returns an integer value that matches the xpath expression
-Example:
-  > SELECT xpath_int('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-1-a9ed102a70e7e9d505be1555f0c7eb12 b/sql/hive/src/test/resources/golden/udf_xpath_int-1-a9ed102a70e7e9d505be1555f0c7eb12
new file mode 100644
index 0000000000000..000f90d69e7f7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-1-a9ed102a70e7e9d505be1555f0c7eb12
@@ -0,0 +1 @@
+xpath_int(xml, xpath) - Returns an integer value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-10-ea25feb474c40c6985152093d4dbb13a b/sql/hive/src/test/resources/golden/udf_xpath_int-10-ea25feb474c40c6985152093d4dbb13a
new file mode 100644
index 0000000000000..7ed6ff82de6bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-10-ea25feb474c40c6985152093d4dbb13a
@@ -0,0 +1 @@
+5
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-2-26ddf7e46a66065909e9e547f0459910 b/sql/hive/src/test/resources/golden/udf_xpath_int-2-26ddf7e46a66065909e9e547f0459910
new file mode 100644
index 0000000000000..f969b9a48d10f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-2-26ddf7e46a66065909e9e547f0459910
@@ -0,0 +1,4 @@
+xpath_int(xml, xpath) - Returns an integer value that matches the xpath expression
+Example:
+  > SELECT xpath_int('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-2-f10e246ebfd5f82545a3df64f51f58ba b/sql/hive/src/test/resources/golden/udf_xpath_int-2-f10e246ebfd5f82545a3df64f51f58ba
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-2-f10e246ebfd5f82545a3df64f51f58ba
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-1-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/udf_xpath_int-3-a04ed6832ab542d6ee5903039511a826
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_1-1-114600d46ae86edcb66a500b4cac657d
rename to sql/hive/src/test/resources/golden/udf_xpath_int-3-a04ed6832ab542d6ee5903039511a826
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-3-eaa4a790c726cfa76a247d08a6ee7aba b/sql/hive/src/test/resources/golden/udf_xpath_int-3-eaa4a790c726cfa76a247d08a6ee7aba
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-3-eaa4a790c726cfa76a247d08a6ee7aba
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-4-b9ce6e38b52d6ffb9fc62f5e0a373890 b/sql/hive/src/test/resources/golden/udf_xpath_int-4-b9ce6e38b52d6ffb9fc62f5e0a373890
deleted file mode 100644
index 55a89f5564d21..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-4-b9ce6e38b52d6ffb9fc62f5e0a373890
+++ /dev/null
@@ -1 +0,0 @@
-2147483647
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/udf_xpath_int-4-bf5a4dbb7a98abc91111a3798b56809f
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_1-13-43d53504df013e6b35f81811138a167a
rename to sql/hive/src/test/resources/golden/udf_xpath_int-4-bf5a4dbb7a98abc91111a3798b56809f
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-5-7907dda43c9b4e9e0b5b01cf20413cd7 b/sql/hive/src/test/resources/golden/udf_xpath_int-5-7907dda43c9b4e9e0b5b01cf20413cd7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-5-7907dda43c9b4e9e0b5b01cf20413cd7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-5-f49db0ecb889722ec68d1753c084b0e1 b/sql/hive/src/test/resources/golden/udf_xpath_int-5-f49db0ecb889722ec68d1753c084b0e1
new file mode 100644
index 0000000000000..a51fa7d1efef6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-5-f49db0ecb889722ec68d1753c084b0e1
@@ -0,0 +1 @@
+2147483647
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-6-9bfa4fdc7d551742ff23efad8507ba0a b/sql/hive/src/test/resources/golden/udf_xpath_int-6-9bfa4fdc7d551742ff23efad8507ba0a
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-6-9bfa4fdc7d551742ff23efad8507ba0a
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/udf_xpath_int-6-ac509f06f01c02924adef220404fc515
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_1-2-cafed8ca348b243372b9114910be1557
rename to sql/hive/src/test/resources/golden/udf_xpath_int-6-ac509f06f01c02924adef220404fc515
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-7-87ff12d650afb8f511d906778450fae7 b/sql/hive/src/test/resources/golden/udf_xpath_int-7-87ff12d650afb8f511d906778450fae7
new file mode 100644
index 0000000000000..d00491fd7e5bb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-7-87ff12d650afb8f511d906778450fae7
@@ -0,0 +1 @@
+1
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-7-e7026efa45ba8af0f569fcbe4f7119a4 b/sql/hive/src/test/resources/golden/udf_xpath_int-7-e7026efa45ba8af0f569fcbe4f7119a4
deleted file mode 100644
index 3f10ffe7a4c47..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-7-e7026efa45ba8af0f569fcbe4f7119a4
+++ /dev/null
@@ -1 +0,0 @@
-15
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-8-a175811eca252aa77c16a44fbb0ee7b2 b/sql/hive/src/test/resources/golden/udf_xpath_int-8-a175811eca252aa77c16a44fbb0ee7b2
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-8-a175811eca252aa77c16a44fbb0ee7b2
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-8-d31e667d1c2ade0bec52cddf8f2b3833 b/sql/hive/src/test/resources/golden/udf_xpath_int-8-d31e667d1c2ade0bec52cddf8f2b3833
deleted file mode 100644
index c7930257dfef5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-8-d31e667d1c2ade0bec52cddf8f2b3833
+++ /dev/null
@@ -1 +0,0 @@
-7
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-9-7da88f589199f5ca873780fb22614573 b/sql/hive/src/test/resources/golden/udf_xpath_int-9-7da88f589199f5ca873780fb22614573
new file mode 100644
index 0000000000000..7f8f011eb73d6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-9-7da88f589199f5ca873780fb22614573
@@ -0,0 +1 @@
+7
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-9-e6c2548098879ee747a933c755c4c869 b/sql/hive/src/test/resources/golden/udf_xpath_int-9-e6c2548098879ee747a933c755c4c869
deleted file mode 100644
index 7813681f5b41c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-9-e6c2548098879ee747a933c755c4c869
+++ /dev/null
@@ -1 +0,0 @@
-5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-3-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/udf_xpath_long-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_1-3-3c29684bfd2df7439ee0551eb42cfa0
rename to sql/hive/src/test/resources/golden/udf_xpath_long-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-0-d274b272251e95ec2e8139bfa471bd0b b/sql/hive/src/test/resources/golden/udf_xpath_long-0-d274b272251e95ec2e8139bfa471bd0b
deleted file mode 100644
index 748fd827e8545..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-0-d274b272251e95ec2e8139bfa471bd0b
+++ /dev/null
@@ -1 +0,0 @@
-xpath_long(xml, xpath) - Returns a long value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-1-43fbf4d944cf7eaf57b4d6166b803fb6 b/sql/hive/src/test/resources/golden/udf_xpath_long-1-43fbf4d944cf7eaf57b4d6166b803fb6
deleted file mode 100644
index 65241b6ce9b64..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-1-43fbf4d944cf7eaf57b4d6166b803fb6
+++ /dev/null
@@ -1,4 +0,0 @@
-xpath_long(xml, xpath) - Returns a long value that matches the xpath expression
-Example:
-  > SELECT xpath_long('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-1-d274b272251e95ec2e8139bfa471bd0b b/sql/hive/src/test/resources/golden/udf_xpath_long-1-d274b272251e95ec2e8139bfa471bd0b
new file mode 100644
index 0000000000000..ee1a589d6652b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-1-d274b272251e95ec2e8139bfa471bd0b
@@ -0,0 +1 @@
+xpath_long(xml, xpath) - Returns a long value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-10-caeac94758a40493a5227fcdb8ec2f87 b/sql/hive/src/test/resources/golden/udf_xpath_long-10-caeac94758a40493a5227fcdb8ec2f87
new file mode 100644
index 0000000000000..7ed6ff82de6bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-10-caeac94758a40493a5227fcdb8ec2f87
@@ -0,0 +1 @@
+5
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-2-43fbf4d944cf7eaf57b4d6166b803fb6 b/sql/hive/src/test/resources/golden/udf_xpath_long-2-43fbf4d944cf7eaf57b4d6166b803fb6
new file mode 100644
index 0000000000000..b9eb5a4a99c02
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-2-43fbf4d944cf7eaf57b4d6166b803fb6
@@ -0,0 +1,4 @@
+xpath_long(xml, xpath) - Returns a long value that matches the xpath expression
+Example:
+  > SELECT xpath_long('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-2-d697d943b1f7e7762d804064d11b905b b/sql/hive/src/test/resources/golden/udf_xpath_long-2-d697d943b1f7e7762d804064d11b905b
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-2-d697d943b1f7e7762d804064d11b905b
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-4-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/udf_xpath_long-3-9df8d27e31d96e0b35b9b40910d4bd98
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_1-4-16a6a293f1d2ce481b1d2482b1d5787c
rename to sql/hive/src/test/resources/golden/udf_xpath_long-3-9df8d27e31d96e0b35b9b40910d4bd98
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-3-c0b20c651ae5a352322067f97bd6ae5d b/sql/hive/src/test/resources/golden/udf_xpath_long-3-c0b20c651ae5a352322067f97bd6ae5d
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-3-c0b20c651ae5a352322067f97bd6ae5d
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/udf_xpath_long-4-3211913c56521887d30e3d1a50762b3f
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_1-5-dc129f70e75cd575ce8c0de288884523
rename to sql/hive/src/test/resources/golden/udf_xpath_long-4-3211913c56521887d30e3d1a50762b3f
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-4-821e1cdea837bee7a8c2745bc3b85b9 b/sql/hive/src/test/resources/golden/udf_xpath_long-4-821e1cdea837bee7a8c2745bc3b85b9
deleted file mode 100644
index 996d127e59365..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-4-821e1cdea837bee7a8c2745bc3b85b9
+++ /dev/null
@@ -1 +0,0 @@
-9223372036854775807
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-5-22a71b88c5bcb3db6e299a88ab791d4d b/sql/hive/src/test/resources/golden/udf_xpath_long-5-22a71b88c5bcb3db6e299a88ab791d4d
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-5-22a71b88c5bcb3db6e299a88ab791d4d
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-5-d580a8f01a546baddd939b95722e6354 b/sql/hive/src/test/resources/golden/udf_xpath_long-5-d580a8f01a546baddd939b95722e6354
new file mode 100644
index 0000000000000..2045006edaf5e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-5-d580a8f01a546baddd939b95722e6354
@@ -0,0 +1 @@
+9223372036854775807
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/udf_xpath_long-6-b695348ed3faec63be2c07d0d4afaaf3
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_1-6-a572a07cd60fd4607ddd7613db8a64ab
rename to sql/hive/src/test/resources/golden/udf_xpath_long-6-b695348ed3faec63be2c07d0d4afaaf3
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-6-f2460325cf46c830631d8bc32565c787 b/sql/hive/src/test/resources/golden/udf_xpath_long-6-f2460325cf46c830631d8bc32565c787
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-6-f2460325cf46c830631d8bc32565c787
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-7-b3f1d4b505151180b82fddb18cf795d0 b/sql/hive/src/test/resources/golden/udf_xpath_long-7-b3f1d4b505151180b82fddb18cf795d0
deleted file mode 100644
index 3f10ffe7a4c47..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-7-b3f1d4b505151180b82fddb18cf795d0
+++ /dev/null
@@ -1 +0,0 @@
-15
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-7-ed5af6d7451107a753b2c7ff130ac73b b/sql/hive/src/test/resources/golden/udf_xpath_long-7-ed5af6d7451107a753b2c7ff130ac73b
new file mode 100644
index 0000000000000..d00491fd7e5bb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-7-ed5af6d7451107a753b2c7ff130ac73b
@@ -0,0 +1 @@
+1
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-8-2f952b56682969bb203fa3d9102f7015 b/sql/hive/src/test/resources/golden/udf_xpath_long-8-2f952b56682969bb203fa3d9102f7015
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-8-2f952b56682969bb203fa3d9102f7015
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-8-c21bebec7d1a4aec99fba6b0a9a03083 b/sql/hive/src/test/resources/golden/udf_xpath_long-8-c21bebec7d1a4aec99fba6b0a9a03083
deleted file mode 100644
index c7930257dfef5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-8-c21bebec7d1a4aec99fba6b0a9a03083
+++ /dev/null
@@ -1 +0,0 @@
-7
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-9-7bafedc7b884df49a9f6752360969bf1 b/sql/hive/src/test/resources/golden/udf_xpath_long-9-7bafedc7b884df49a9f6752360969bf1
deleted file mode 100644
index 7813681f5b41c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-9-7bafedc7b884df49a9f6752360969bf1
+++ /dev/null
@@ -1 +0,0 @@
-5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-9-947b6e08ba9c7defd75d00412f9bc4fd b/sql/hive/src/test/resources/golden/udf_xpath_long-9-947b6e08ba9c7defd75d00412f9bc4fd
new file mode 100644
index 0000000000000..7f8f011eb73d6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-9-947b6e08ba9c7defd75d00412f9bc4fd
@@ -0,0 +1 @@
+7
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/udf_xpath_short-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_1-7-a2a411ad6620aa1ab24550ade336e785
rename to sql/hive/src/test/resources/golden/udf_xpath_short-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-0-7d5231aed9cbbf68cd740791f9e5be17 b/sql/hive/src/test/resources/golden/udf_xpath_short-0-7d5231aed9cbbf68cd740791f9e5be17
deleted file mode 100644
index 22ccb9ca9defa..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-0-7d5231aed9cbbf68cd740791f9e5be17
+++ /dev/null
@@ -1 +0,0 @@
-xpath_short(xml, xpath) - Returns a short value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-1-5d45932563b78e0b435b8cfebfe3cc2 b/sql/hive/src/test/resources/golden/udf_xpath_short-1-5d45932563b78e0b435b8cfebfe3cc2
deleted file mode 100644
index 931ce2abb3d4d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-1-5d45932563b78e0b435b8cfebfe3cc2
+++ /dev/null
@@ -1,4 +0,0 @@
-xpath_short(xml, xpath) - Returns a short value that matches the xpath expression
-Example:
-  > SELECT xpath_short('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-1-7d5231aed9cbbf68cd740791f9e5be17 b/sql/hive/src/test/resources/golden/udf_xpath_short-1-7d5231aed9cbbf68cd740791f9e5be17
new file mode 100644
index 0000000000000..63ab4f15e8c84
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_short-1-7d5231aed9cbbf68cd740791f9e5be17
@@ -0,0 +1 @@
+xpath_short(xml, xpath) - Returns a short value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-10-b537709676634250e13914e76cd9a530 b/sql/hive/src/test/resources/golden/udf_xpath_short-10-b537709676634250e13914e76cd9a530
new file mode 100644
index 0000000000000..7ed6ff82de6bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_short-10-b537709676634250e13914e76cd9a530
@@ -0,0 +1 @@
+5
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-2-10e9d4899d2fd352b58010c778c1f7a8 b/sql/hive/src/test/resources/golden/udf_xpath_short-2-10e9d4899d2fd352b58010c778c1f7a8
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-2-10e9d4899d2fd352b58010c778c1f7a8
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-2-5d45932563b78e0b435b8cfebfe3cc2 b/sql/hive/src/test/resources/golden/udf_xpath_short-2-5d45932563b78e0b435b8cfebfe3cc2
new file mode 100644
index 0000000000000..ecb5e9a83109c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_short-2-5d45932563b78e0b435b8cfebfe3cc2
@@ -0,0 +1,4 @@
+xpath_short(xml, xpath) - Returns a short value that matches the xpath expression
+Example:
+  > SELECT xpath_short('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-3-5ac84f46610107b1846f4a9b26a69576 b/sql/hive/src/test/resources/golden/udf_xpath_short-3-5ac84f46610107b1846f4a9b26a69576
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-3-5ac84f46610107b1846f4a9b26a69576
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-0-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/udf_xpath_short-3-c09b4ae6886fa58dcdd728bef45e7efa
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_2-0-16a1f74642d7ea4dac66a5ce15469c22
rename to sql/hive/src/test/resources/golden/udf_xpath_short-3-c09b4ae6886fa58dcdd728bef45e7efa
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-1-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/udf_xpath_short-4-84f5f6bb2fdc0987d281d52a53a4b24e
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_2-1-114600d46ae86edcb66a500b4cac657d
rename to sql/hive/src/test/resources/golden/udf_xpath_short-4-84f5f6bb2fdc0987d281d52a53a4b24e
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-4-8a300079521fefbe0d2f943851c1c53c b/sql/hive/src/test/resources/golden/udf_xpath_short-4-8a300079521fefbe0d2f943851c1c53c
deleted file mode 100644
index d7d17fcbef95c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-4-8a300079521fefbe0d2f943851c1c53c
+++ /dev/null
@@ -1 +0,0 @@
--1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-5-51f5de44cf1d5289fa5892ffe16e473e b/sql/hive/src/test/resources/golden/udf_xpath_short-5-51f5de44cf1d5289fa5892ffe16e473e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-5-51f5de44cf1d5289fa5892ffe16e473e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-6-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/udf_xpath_short-5-c09fd0565ed041c773fee9bd0436e861
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-6-a9f93b4185a714e4f6d14171d10a6c07
rename to sql/hive/src/test/resources/golden/udf_xpath_short-5-c09fd0565ed041c773fee9bd0436e861
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-15-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/udf_xpath_short-6-16ced3de15d4ec87a4e7001376551758
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_2-15-43d53504df013e6b35f81811138a167a
rename to sql/hive/src/test/resources/golden/udf_xpath_short-6-16ced3de15d4ec87a4e7001376551758
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-6-fc85e2c7b89fe11adb7b6b1fb696bd04 b/sql/hive/src/test/resources/golden/udf_xpath_short-6-fc85e2c7b89fe11adb7b6b1fb696bd04
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-6-fc85e2c7b89fe11adb7b6b1fb696bd04
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-7-8ffdf20c15f3ed81bb5a92c61d200ae2 b/sql/hive/src/test/resources/golden/udf_xpath_short-7-8ffdf20c15f3ed81bb5a92c61d200ae2
new file mode 100644
index 0000000000000..d00491fd7e5bb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_short-7-8ffdf20c15f3ed81bb5a92c61d200ae2
@@ -0,0 +1 @@
+1
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-7-e24ee123f331429c22de0a06054d0d5d b/sql/hive/src/test/resources/golden/udf_xpath_short-7-e24ee123f331429c22de0a06054d0d5d
deleted file mode 100644
index 3f10ffe7a4c47..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-7-e24ee123f331429c22de0a06054d0d5d
+++ /dev/null
@@ -1 +0,0 @@
-15
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-8-102ad2dea8d94528b402d980a45d53d4 b/sql/hive/src/test/resources/golden/udf_xpath_short-8-102ad2dea8d94528b402d980a45d53d4
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_short-8-102ad2dea8d94528b402d980a45d53d4
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-8-f8256e7c0dc4ac31303c1e21c8fcba95 b/sql/hive/src/test/resources/golden/udf_xpath_short-8-f8256e7c0dc4ac31303c1e21c8fcba95
deleted file mode 100644
index c7930257dfef5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-8-f8256e7c0dc4ac31303c1e21c8fcba95
+++ /dev/null
@@ -1 +0,0 @@
-7
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-9-22088ba0fb00eaa28e3460ca018b343e b/sql/hive/src/test/resources/golden/udf_xpath_short-9-22088ba0fb00eaa28e3460ca018b343e
deleted file mode 100644
index 7813681f5b41c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-9-22088ba0fb00eaa28e3460ca018b343e
+++ /dev/null
@@ -1 +0,0 @@
-5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-9-d571e18b7d8ad44fef2e0b2424f34a0d b/sql/hive/src/test/resources/golden/udf_xpath_short-9-d571e18b7d8ad44fef2e0b2424f34a0d
new file mode 100644
index 0000000000000..7f8f011eb73d6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_short-9-d571e18b7d8ad44fef2e0b2424f34a0d
@@ -0,0 +1 @@
+7
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/udf_xpath_string-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/skewjoin_union_remove_2-2-cafed8ca348b243372b9114910be1557
rename to sql/hive/src/test/resources/golden/udf_xpath_string-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-0-e315d11e9feb29177b5cb1e221c7cfa4 b/sql/hive/src/test/resources/golden/udf_xpath_string-1-e315d11e9feb29177b5cb1e221c7cfa4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-0-e315d11e9feb29177b5cb1e221c7cfa4
rename to sql/hive/src/test/resources/golden/udf_xpath_string-1-e315d11e9feb29177b5cb1e221c7cfa4
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-8-8dcf59ffc63ea3b225fcdd33dc2be9fc b/sql/hive/src/test/resources/golden/udf_xpath_string-10-d87fb71039c9d2419d750a0721c5696f
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-8-8dcf59ffc63ea3b225fcdd33dc2be9fc
rename to sql/hive/src/test/resources/golden/udf_xpath_string-10-d87fb71039c9d2419d750a0721c5696f
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-1-110b583cde6cd23c486d8223c444cbe9 b/sql/hive/src/test/resources/golden/udf_xpath_string-2-110b583cde6cd23c486d8223c444cbe9
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-1-110b583cde6cd23c486d8223c444cbe9
rename to sql/hive/src/test/resources/golden/udf_xpath_string-2-110b583cde6cd23c486d8223c444cbe9
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-2-a147b4eaa40b03355f666ea660cbff1f b/sql/hive/src/test/resources/golden/udf_xpath_string-3-17e4d8122b93a1ebdba6c1d2cf9ce0c4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-2-a147b4eaa40b03355f666ea660cbff1f
rename to sql/hive/src/test/resources/golden/udf_xpath_string-3-17e4d8122b93a1ebdba6c1d2cf9ce0c4
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-3-a62072b86a6044d5b97911d662899b5a b/sql/hive/src/test/resources/golden/udf_xpath_string-4-302630fe7dac2cc61fe7d36ead0f41ab
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-3-a62072b86a6044d5b97911d662899b5a
rename to sql/hive/src/test/resources/golden/udf_xpath_string-4-302630fe7dac2cc61fe7d36ead0f41ab
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-4-152b0424ec68120f4ef15269eac0528a b/sql/hive/src/test/resources/golden/udf_xpath_string-5-19357ba9cb87d3a5717543d2afdc96e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-4-152b0424ec68120f4ef15269eac0528a
rename to sql/hive/src/test/resources/golden/udf_xpath_string-5-19357ba9cb87d3a5717543d2afdc96e2
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-5-2ab527e587583138ca764f5518a88c14 b/sql/hive/src/test/resources/golden/udf_xpath_string-6-4837e470f745487fae4f498b3a2946bc
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-5-2ab527e587583138ca764f5518a88c14
rename to sql/hive/src/test/resources/golden/udf_xpath_string-6-4837e470f745487fae4f498b3a2946bc
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-6-58204a4df0fbf861028a1a94fb20ec61 b/sql/hive/src/test/resources/golden/udf_xpath_string-7-8e42951d002e3c4034b4a51928442706
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-6-58204a4df0fbf861028a1a94fb20ec61
rename to sql/hive/src/test/resources/golden/udf_xpath_string-7-8e42951d002e3c4034b4a51928442706
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-7-b5b211e4abd14990c28007c6638cb44f b/sql/hive/src/test/resources/golden/udf_xpath_string-8-fdfa4e17d70608dcc634c9e1e8a8f288
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-7-b5b211e4abd14990c28007c6638cb44f
rename to sql/hive/src/test/resources/golden/udf_xpath_string-8-fdfa4e17d70608dcc634c9e1e8a8f288
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-9-647aca756f43e791dd5497e1b4c6af89 b/sql/hive/src/test/resources/golden/udf_xpath_string-9-84109613320bd05abccd1058044d62c3
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-9-647aca756f43e791dd5497e1b4c6af89
rename to sql/hive/src/test/resources/golden/udf_xpath_string-9-84109613320bd05abccd1058044d62c3
diff --git a/sql/hive/src/test/resources/golden/udtf_stack-0-665a27212319e1b2d1ad6b6dbaa3ce9a b/sql/hive/src/test/resources/golden/udtf_stack-0-665a27212319e1b2d1ad6b6dbaa3ce9a
deleted file mode 100644
index 599bf1880a83a..0000000000000
--- a/sql/hive/src/test/resources/golden/udtf_stack-0-665a27212319e1b2d1ad6b6dbaa3ce9a
+++ /dev/null
@@ -1 +0,0 @@
-stack(n, cols...) - turns k columns into n rows of size k/n each
diff --git a/sql/hive/src/test/resources/golden/udtf_stack-1-879ca1a8453ced55a8617b390670a4e1 b/sql/hive/src/test/resources/golden/udtf_stack-1-879ca1a8453ced55a8617b390670a4e1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udtf_stack-2-e9b1b8a1b6172a00deeb15a07f928dc5 b/sql/hive/src/test/resources/golden/udtf_stack-2-e9b1b8a1b6172a00deeb15a07f928dc5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/unicode_notation-1-3de206f543c9e1525c54547f076b99c3 b/sql/hive/src/test/resources/golden/unicode_notation-1-3de206f543c9e1525c54547f076b99c3
index e82f93fe65c5e..f00ecaf75c084 100644
--- a/sql/hive/src/test/resources/golden/unicode_notation-1-3de206f543c9e1525c54547f076b99c3
+++ b/sql/hive/src/test/resources/golden/unicode_notation-1-3de206f543c9e1525c54547f076b99c3
@@ -1,18 +1,18 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
+a                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 03 18:39:52 PST 2014	 
+CreateTime:         	Tue Oct 21 05:37:51 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5393816597631538368/k1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/k1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1388803192          
+	transient_lastDdlTime	1413895071          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +24,4 @@ Bucket Columns:     	[]
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
 	field.delim         	\u0001              
-	serialization.format	\u0001              
\ No newline at end of file
+	serialization.format	\u0001              
diff --git a/sql/hive/src/test/resources/golden/unicode_notation-4-3de206f543c9e1525c54547f076b99c3 b/sql/hive/src/test/resources/golden/unicode_notation-4-3de206f543c9e1525c54547f076b99c3
index e82f93fe65c5e..f00ecaf75c084 100644
--- a/sql/hive/src/test/resources/golden/unicode_notation-4-3de206f543c9e1525c54547f076b99c3
+++ b/sql/hive/src/test/resources/golden/unicode_notation-4-3de206f543c9e1525c54547f076b99c3
@@ -1,18 +1,18 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
+a                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 03 18:39:52 PST 2014	 
+CreateTime:         	Tue Oct 21 05:37:51 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5393816597631538368/k1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/k1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1388803192          
+	transient_lastDdlTime	1413895071          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +24,4 @@ Bucket Columns:     	[]
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
 	field.delim         	\u0001              
-	serialization.format	\u0001              
\ No newline at end of file
+	serialization.format	\u0001              
diff --git a/sql/hive/src/test/resources/golden/unicode_notation-7-3de206f543c9e1525c54547f076b99c3 b/sql/hive/src/test/resources/golden/unicode_notation-7-3de206f543c9e1525c54547f076b99c3
index 2be72c6e4e8c7..79e197569a51c 100644
--- a/sql/hive/src/test/resources/golden/unicode_notation-7-3de206f543c9e1525c54547f076b99c3
+++ b/sql/hive/src/test/resources/golden/unicode_notation-7-3de206f543c9e1525c54547f076b99c3
@@ -1,18 +1,18 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
+a                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 03 18:39:52 PST 2014	 
+CreateTime:         	Tue Oct 21 05:37:51 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5393816597631538368/k1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/k1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1388803192          
+	transient_lastDdlTime	1413895071          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +24,4 @@ Bucket Columns:     	[]
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
 	field.delim         	|                   
-	serialization.format	|                   
\ No newline at end of file
+	serialization.format	|                   
diff --git a/sql/hive/src/test/resources/golden/union10-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union10-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union10-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union10-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union10-4-7f83822f19aa9b973198fe4c42c66856 b/sql/hive/src/test/resources/golden/union10-4-7f83822f19aa9b973198fe4c42c66856
index 1d5891034ddec..6489eb562dbf7 100644
--- a/sql/hive/src/test/resources/golden/union10-4-7f83822f19aa9b973198fe4c42c66856
+++ b/sql/hive/src/test/resources/golden/union10-4-7f83822f19aa9b973198fe4c42c66856
@@ -1,3 +1,3 @@
 tst1	500
 tst2	500
-tst3	500
\ No newline at end of file
+tst3	500
diff --git a/sql/hive/src/test/resources/golden/union11-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union11-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union11-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union11-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union11-2-cd756f39e22e121bdbd51400662aa47f b/sql/hive/src/test/resources/golden/union11-2-cd756f39e22e121bdbd51400662aa47f
index 1ec0096b865a7..c86307863b92d 100644
--- a/sql/hive/src/test/resources/golden/union11-2-cd756f39e22e121bdbd51400662aa47f
+++ b/sql/hive/src/test/resources/golden/union11-2-cd756f39e22e121bdbd51400662aa47f
@@ -1,3 +1,3 @@
 tst1	1
 tst2	1
-tst3	1
\ No newline at end of file
+tst3	1
diff --git a/sql/hive/src/test/resources/golden/union12-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union12-0-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union12-0-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union12-1-9d2793d1cfd2645ac7f373a0a127e599 b/sql/hive/src/test/resources/golden/union12-1-9d2793d1cfd2645ac7f373a0a127e599
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union12-2-69ba763b72862bf0f8b2bdf1a712e3b5 b/sql/hive/src/test/resources/golden/union12-2-69ba763b72862bf0f8b2bdf1a712e3b5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union13-1-534d0853c5fc094404f65ca4631c1c20 b/sql/hive/src/test/resources/golden/union13-1-534d0853c5fc094404f65ca4631c1c20
index 2209b11d74282..f085b06e588d4 100644
--- a/sql/hive/src/test/resources/golden/union13-1-534d0853c5fc094404f65ca4631c1c20
+++ b/sql/hive/src/test/resources/golden/union13-1-534d0853c5fc094404f65ca4631c1c20
@@ -997,4 +997,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/union14-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union14-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union14-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union14-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union14-2-8e01b2f4a18ad41a622e0aadbe680398 b/sql/hive/src/test/resources/golden/union14-2-8e01b2f4a18ad41a622e0aadbe680398
index 07f67df79dcbd..5ba4e492aa0f4 100644
--- a/sql/hive/src/test/resources/golden/union14-2-8e01b2f4a18ad41a622e0aadbe680398
+++ b/sql/hive/src/test/resources/golden/union14-2-8e01b2f4a18ad41a622e0aadbe680398
@@ -14,4 +14,4 @@ NULL	10
 406	1
 66	1
 98	1
-tst1	1
\ No newline at end of file
+tst1	1
diff --git a/sql/hive/src/test/resources/golden/union15-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union15-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union15-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union15-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union15-2-a89acfb4bbc044c483b94e28152a41e0 b/sql/hive/src/test/resources/golden/union15-2-a89acfb4bbc044c483b94e28152a41e0
index b00b55f6fa92d..20cb88d87f6bf 100644
--- a/sql/hive/src/test/resources/golden/union15-2-a89acfb4bbc044c483b94e28152a41e0
+++ b/sql/hive/src/test/resources/golden/union15-2-a89acfb4bbc044c483b94e28152a41e0
@@ -14,4 +14,4 @@ NULL	20
 406	2
 66	2
 98	2
-tst1	1
\ No newline at end of file
+tst1	1
diff --git a/sql/hive/src/test/resources/golden/union16-1-9f76074598f9b55d8afbb5659737a382 b/sql/hive/src/test/resources/golden/union16-1-9f76074598f9b55d8afbb5659737a382
index 6f680072350ab..eeb33ca418288 100644
--- a/sql/hive/src/test/resources/golden/union16-1-9f76074598f9b55d8afbb5659737a382
+++ b/sql/hive/src/test/resources/golden/union16-1-9f76074598f9b55d8afbb5659737a382
@@ -1 +1 @@
-12500
\ No newline at end of file
+12500
diff --git a/sql/hive/src/test/resources/golden/union17-4-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/union17-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
index a5a9e42fff209..4d199c29dd4db 100644
--- a/sql/hive/src/test/resources/golden/union17-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/union17-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -307,4 +307,4 @@
 96	1
 97	1
 98	1
-tst1	1
\ No newline at end of file
+tst1	1
diff --git a/sql/hive/src/test/resources/golden/union17-5-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/union17-5-404392d6faff5db5f36b4aa87ac8e8c9
index be13b26eadc86..5b6f2e2d4f773 100644
--- a/sql/hive/src/test/resources/golden/union17-5-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/union17-5-404392d6faff5db5f36b4aa87ac8e8c9
@@ -307,4 +307,4 @@
 96	val_96	1
 97	val_97	1
 98	val_98	1
-tst1	500	1
\ No newline at end of file
+tst1	500	1
diff --git a/sql/hive/src/test/resources/golden/union18-4-1799ebb147238db6032fd6fe2fd36878 b/sql/hive/src/test/resources/golden/union18-4-1799ebb147238db6032fd6fe2fd36878
index 3dee790d7c2ca..11a5a2c531a3d 100644
--- a/sql/hive/src/test/resources/golden/union18-4-1799ebb147238db6032fd6fe2fd36878
+++ b/sql/hive/src/test/resources/golden/union18-4-1799ebb147238db6032fd6fe2fd36878
@@ -498,4 +498,4 @@
 97	val_97
 98	val_98
 98	val_98
-tst1	500
\ No newline at end of file
+tst1	500
diff --git a/sql/hive/src/test/resources/golden/union18-5-b12dcddfa4f02a14318f6564947c98a0 b/sql/hive/src/test/resources/golden/union18-5-b12dcddfa4f02a14318f6564947c98a0
index e438a64050723..b95429f0c39e8 100644
--- a/sql/hive/src/test/resources/golden/union18-5-b12dcddfa4f02a14318f6564947c98a0
+++ b/sql/hive/src/test/resources/golden/union18-5-b12dcddfa4f02a14318f6564947c98a0
@@ -498,4 +498,4 @@
 97	val_97	val_97
 98	val_98	val_98
 98	val_98	val_98
-tst1	500	500
\ No newline at end of file
+tst1	500	500
diff --git a/sql/hive/src/test/resources/golden/union19-4-1799ebb147238db6032fd6fe2fd36878 b/sql/hive/src/test/resources/golden/union19-4-1799ebb147238db6032fd6fe2fd36878
index e4dfefc43e26a..f21cd3313298c 100644
--- a/sql/hive/src/test/resources/golden/union19-4-1799ebb147238db6032fd6fe2fd36878
+++ b/sql/hive/src/test/resources/golden/union19-4-1799ebb147238db6032fd6fe2fd36878
@@ -307,4 +307,4 @@
 96	1
 97	2
 98	2
-tst1	1
\ No newline at end of file
+tst1	1
diff --git a/sql/hive/src/test/resources/golden/union19-5-b12dcddfa4f02a14318f6564947c98a0 b/sql/hive/src/test/resources/golden/union19-5-b12dcddfa4f02a14318f6564947c98a0
index e438a64050723..b95429f0c39e8 100644
--- a/sql/hive/src/test/resources/golden/union19-5-b12dcddfa4f02a14318f6564947c98a0
+++ b/sql/hive/src/test/resources/golden/union19-5-b12dcddfa4f02a14318f6564947c98a0
@@ -498,4 +498,4 @@
 97	val_97	val_97
 98	val_98	val_98
 98	val_98	val_98
-tst1	500	500
\ No newline at end of file
+tst1	500	500
diff --git a/sql/hive/src/test/resources/golden/union2-1-90d739774cb96e7d0d96513c1c9968b4 b/sql/hive/src/test/resources/golden/union2-1-90d739774cb96e7d0d96513c1c9968b4
index e37d32abba426..83b33d238dab9 100644
--- a/sql/hive/src/test/resources/golden/union2-1-90d739774cb96e7d0d96513c1c9968b4
+++ b/sql/hive/src/test/resources/golden/union2-1-90d739774cb96e7d0d96513c1c9968b4
@@ -1 +1 @@
-1000
\ No newline at end of file
+1000
diff --git a/sql/hive/src/test/resources/golden/union20-1-968e353589f1fddb914242beb25be94c b/sql/hive/src/test/resources/golden/union20-1-968e353589f1fddb914242beb25be94c
index 385b8df6703a0..f8539c4817e21 100644
--- a/sql/hive/src/test/resources/golden/union20-1-968e353589f1fddb914242beb25be94c
+++ b/sql/hive/src/test/resources/golden/union20-1-968e353589f1fddb914242beb25be94c
@@ -20,4 +20,4 @@
 5	val_5	5	val_5
 5	val_5	5	val_5
 9	val_9	9	val_9
-tst1	500	tst1	500
\ No newline at end of file
+tst1	500	tst1	500
diff --git a/sql/hive/src/test/resources/golden/union21-0-ecfd22e2a24ed9f113229c80a2aaee9c b/sql/hive/src/test/resources/golden/union21-0-ecfd22e2a24ed9f113229c80a2aaee9c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union21-1-fb1497f4c21bf7d28162f27d50320d13 b/sql/hive/src/test/resources/golden/union21-1-fb1497f4c21bf7d28162f27d50320d13
deleted file mode 100644
index 26a2aff52e9a9..0000000000000
--- a/sql/hive/src/test/resources/golden/union21-1-fb1497f4c21bf7d28162f27d50320d13
+++ /dev/null
@@ -1,536 +0,0 @@
-NULL	2
-0	7
-001	2
-002	2
-004	1
-01	1
-013	1
-02	1
-021	2
-03	1
-032	5
-034	3
-051	1
-061	1
-062	1
-063	1
-064	1
-07	3
-071	1
-074	1
-08	1
-081	1
-082	2
-084	3
-09	3
-091	1
-094	1
-1	500
-10	2
-100	2
-102	1
-103	2
-104	7
-105	1
-11	2
-111	2
-113	5
-114	2
-116	1
-118	2
-119	3
-12	2
-120	2
-122	2
-123	2
-124	1
-125	2
-126	1
-128	3
-129	2
-131	2
-133	3
-134	5
-136	1
-137	2
-138	4
-14	1
-142	1
-143	2
-145	1
-146	2
-149	2
-15	4
-150	1
-152	2
-153	2
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	2
-165	2
-166	1
-167	3
-168	1
-169	4
-17	1
-170	1
-172	2
-174	2
-175	2
-176	2
-177	1
-178	1
-179	2
-18	2
-180	1
-181	2
-182	2
-183	1
-184	1
-186	1
-187	3
-189	1
-19	1
-190	1
-191	4
-192	2
-193	3
-194	2
-195	2
-196	1
-197	2
-199	3
-2	2
-20	2
-200	2
-201	1
-202	2
-203	3
-204	1
-205	2
-207	2
-208	3
-209	2
-21	2
-213	2
-214	1
-216	2
-217	2
-218	1
-219	2
-221	2
-222	2
-223	4
-224	2
-226	1
-228	1
-229	2
-230	5
-233	3
-234	1
-235	1
-237	2
-238	2
-239	2
-24	4
-241	1
-242	4
-243	2
-244	1
-247	1
-248	1
-249	1
-251	2
-252	2
-254	1
-255	2
-256	2
-257	1
-258	1
-26	2
-260	1
-261	1
-262	2
-263	2
-264	2
-265	2
-266	1
-27	3
-271	2
-272	4
-273	3
-274	2
-275	1
-277	4
-278	2
-28	2
-280	2
-281	2
-282	4
-283	3
-284	2
-285	1
-286	1
-287	1
-288	2
-289	1
-29	1
-291	2
-292	2
-293	1
-294	2
-296	1
-298	3
-30	2
-301	2
-302	3
-304	3
-305	1
-306	1
-307	2
-308	1
-309	2
-310	1
-311	5
-312	2
-314	2
-315	1
-316	3
-317	2
-318	3
-321	2
-322	4
-323	2
-325	2
-327	3
-33	2
-331	3
-332	3
-333	4
-335	1
-336	1
-338	1
-339	1
-34	2
-341	2
-342	2
-344	3
-345	1
-348	5
-35	4
-351	2
-353	4
-354	1
-356	1
-360	1
-361	1
-362	2
-364	3
-365	1
-366	1
-367	2
-368	1
-369	3
-37	2
-372	3
-373	2
-374	1
-375	1
-377	1
-378	1
-379	1
-38	2
-381	1
-382	3
-384	4
-386	1
-389	1
-391	3
-392	1
-393	2
-394	2
-395	2
-396	3
-397	2
-399	2
-4	2
-40	1
-400	1
-401	7
-402	1
-403	3
-404	4
-406	4
-407	1
-409	3
-41	1
-411	2
-412	1
-413	2
-414	4
-417	3
-418	1
-419	1
-42	4
-421	1
-422	2
-424	4
-427	1
-429	2
-43	2
-430	3
-431	5
-432	1
-435	1
-436	1
-437	1
-438	3
-439	2
-44	2
-442	1
-443	3
-444	2
-446	1
-448	1
-449	1
-45	1
-452	1
-453	1
-454	6
-455	1
-457	1
-458	2
-459	2
-46	1
-460	1
-461	2
-462	2
-463	3
-466	3
-467	1
-468	4
-469	5
-47	2
-470	1
-471	2
-472	2
-473	1
-475	1
-477	1
-478	2
-479	1
-48	2
-480	3
-481	1
-482	2
-483	4
-484	2
-485	1
-487	1
-489	4
-490	1
-491	2
-492	2
-493	2
-494	2
-495	1
-496	1
-497	1
-498	3
-5	6
-50	1
-501	1
-502	2
-503	1
-51	4
-513	1
-521	2
-523	2
-53	4
-532	1
-533	1
-534	1
-54	1
-541	1
-543	1
-551	1
-552	2
-554	1
-56	1
-561	2
-562	2
-563	1
-57	1
-571	2
-572	1
-573	1
-574	1
-58	3
-582	1
-584	1
-59	2
-591	2
-593	2
-594	1
-60	1
-603	1
-604	4
-611	1
-612	2
-613	3
-62	2
-621	1
-622	1
-631	1
-633	1
-634	1
-64	1
-641	2
-644	1
-65	1
-651	1
-652	2
-653	1
-66	2
-661	1
-662	1
-663	1
-664	3
-67	4
-671	2
-68	1
-681	1
-682	1
-683	1
-69	2
-691	1
-692	1
-693	3
-694	1
-70	4
-702	2
-703	2
-704	1
-71	1
-712	2
-713	2
-714	3
-72	3
-723	3
-724	1
-73	2
-731	2
-732	2
-734	1
-74	2
-742	1
-75	1
-751	1
-752	1
-754	1
-76	4
-761	3
-763	2
-764	1
-77	2
-771	1
-772	4
-773	1
-774	1
-78	2
-781	3
-782	1
-784	1
-79	2
-791	2
-793	2
-794	1
-8	2
-80	2
-802	3
-803	1
-81	2
-811	2
-812	1
-813	3
-814	1
-82	2
-821	3
-822	1
-83	2
-831	4
-832	2
-833	1
-834	3
-84	2
-842	1
-843	5
-844	1
-85	3
-851	1
-852	1
-854	2
-86	1
-861	1
-863	1
-864	4
-87	2
-871	1
-872	2
-873	1
-874	2
-882	2
-89	2
-892	3
-894	3
-9	2
-90	4
-902	2
-903	2
-904	3
-91	1
-911	3
-912	2
-914	1
-92	1
-921	2
-922	2
-924	2
-932	2
-933	1
-934	2
-941	2
-942	1
-944	1
-95	2
-954	2
-96	2
-961	4
-963	3
-964	5
-97	2
-971	2
-973	1
-974	1
-98	2
-981	1
-982	1
-983	1
-984	4
-991	3
-993	2
-record_0	1
-record_1	1
-record_2	1
-record_3	1
-record_4	1
-record_5	1
-record_6	1
-record_7	1
-record_8	1
-record_9	1
diff --git a/sql/hive/src/test/resources/golden/union23-1-7830963417e3535034962e2597970ddd b/sql/hive/src/test/resources/golden/union23-1-7830963417e3535034962e2597970ddd
index bf0f76662bd2c..14ac2a0543eb7 100644
--- a/sql/hive/src/test/resources/golden/union23-1-7830963417e3535034962e2597970ddd
+++ b/sql/hive/src/test/resources/golden/union23-1-7830963417e3535034962e2597970ddd
@@ -997,4 +997,4 @@
 98	val_98
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/union27-3-ab84df3813ff23be99f148449610e530 b/sql/hive/src/test/resources/golden/union27-3-ab84df3813ff23be99f148449610e530
index 199095f1f9848..61d7280758de9 100644
--- a/sql/hive/src/test/resources/golden/union27-3-ab84df3813ff23be99f148449610e530
+++ b/sql/hive/src/test/resources/golden/union27-3-ab84df3813ff23be99f148449610e530
@@ -5,4 +5,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/union28-3-b1d75ba0d33a452619e41f70e69616e9 b/sql/hive/src/test/resources/golden/union28-3-b1d75ba0d33a452619e41f70e69616e9
index 293f324297419..0e14af56f8b08 100644
--- a/sql/hive/src/test/resources/golden/union28-3-b1d75ba0d33a452619e41f70e69616e9
+++ b/sql/hive/src/test/resources/golden/union28-3-b1d75ba0d33a452619e41f70e69616e9
@@ -17,4 +17,4 @@
 8	val_8
 8	val_8
 8	val_8
-9	val_9
\ No newline at end of file
+9	val_9
diff --git a/sql/hive/src/test/resources/golden/union29-3-b1d75ba0d33a452619e41f70e69616e9 b/sql/hive/src/test/resources/golden/union29-3-b1d75ba0d33a452619e41f70e69616e9
index 87a971baed428..ba254c7ab023b 100644
--- a/sql/hive/src/test/resources/golden/union29-3-b1d75ba0d33a452619e41f70e69616e9
+++ b/sql/hive/src/test/resources/golden/union29-3-b1d75ba0d33a452619e41f70e69616e9
@@ -17,4 +17,4 @@
 5	val_5
 5	val_5
 5	val_5
-5	val_5
\ No newline at end of file
+5	val_5
diff --git a/sql/hive/src/test/resources/golden/union30-3-b1d75ba0d33a452619e41f70e69616e9 b/sql/hive/src/test/resources/golden/union30-3-b1d75ba0d33a452619e41f70e69616e9
index a6f502e6c3e3a..996f02bc6496c 100644
--- a/sql/hive/src/test/resources/golden/union30-3-b1d75ba0d33a452619e41f70e69616e9
+++ b/sql/hive/src/test/resources/golden/union30-3-b1d75ba0d33a452619e41f70e69616e9
@@ -17,4 +17,4 @@
 5	val_5
 5	val_5
 5	val_5
-5	val_5
\ No newline at end of file
+5	val_5
diff --git a/sql/hive/src/test/resources/golden/union31-14-c36a1d8de2713f722ec42bc4686d6125 b/sql/hive/src/test/resources/golden/union31-14-c36a1d8de2713f722ec42bc4686d6125
index dadf53962e7a1..b1fb9cce36fa2 100644
--- a/sql/hive/src/test/resources/golden/union31-14-c36a1d8de2713f722ec42bc4686d6125
+++ b/sql/hive/src/test/resources/golden/union31-14-c36a1d8de2713f722ec42bc4686d6125
@@ -3,4 +3,4 @@
 4	2
 5	6
 8	2
-9	2
\ No newline at end of file
+9	2
diff --git a/sql/hive/src/test/resources/golden/union31-15-5df6435aed6e0a6a6853480a027b911e b/sql/hive/src/test/resources/golden/union31-15-5df6435aed6e0a6a6853480a027b911e
index dadf53962e7a1..b1fb9cce36fa2 100644
--- a/sql/hive/src/test/resources/golden/union31-15-5df6435aed6e0a6a6853480a027b911e
+++ b/sql/hive/src/test/resources/golden/union31-15-5df6435aed6e0a6a6853480a027b911e
@@ -3,4 +3,4 @@
 4	2
 5	6
 8	2
-9	2
\ No newline at end of file
+9	2
diff --git a/sql/hive/src/test/resources/golden/union31-24-df38c8164af7cc164c728b8178da72c5 b/sql/hive/src/test/resources/golden/union31-24-df38c8164af7cc164c728b8178da72c5
index ede645acaf95b..48255af904349 100644
--- a/sql/hive/src/test/resources/golden/union31-24-df38c8164af7cc164c728b8178da72c5
+++ b/sql/hive/src/test/resources/golden/union31-24-df38c8164af7cc164c728b8178da72c5
@@ -3,4 +3,4 @@
 4	2
 5	2
 8	2
-9	2
\ No newline at end of file
+9	2
diff --git a/sql/hive/src/test/resources/golden/union31-25-1485e295a99908e1862eae397b814045 b/sql/hive/src/test/resources/golden/union31-25-1485e295a99908e1862eae397b814045
index ede645acaf95b..48255af904349 100644
--- a/sql/hive/src/test/resources/golden/union31-25-1485e295a99908e1862eae397b814045
+++ b/sql/hive/src/test/resources/golden/union31-25-1485e295a99908e1862eae397b814045
@@ -3,4 +3,4 @@
 4	2
 5	2
 8	2
-9	2
\ No newline at end of file
+9	2
diff --git a/sql/hive/src/test/resources/golden/union31-8-ba92b89786ffaecd74a740705e0fa0cb b/sql/hive/src/test/resources/golden/union31-8-ba92b89786ffaecd74a740705e0fa0cb
index dadf53962e7a1..b1fb9cce36fa2 100644
--- a/sql/hive/src/test/resources/golden/union31-8-ba92b89786ffaecd74a740705e0fa0cb
+++ b/sql/hive/src/test/resources/golden/union31-8-ba92b89786ffaecd74a740705e0fa0cb
@@ -3,4 +3,4 @@
 4	2
 5	6
 8	2
-9	2
\ No newline at end of file
+9	2
diff --git a/sql/hive/src/test/resources/golden/union31-9-56dfdb30edd8a687f9aa9cad29b42760 b/sql/hive/src/test/resources/golden/union31-9-56dfdb30edd8a687f9aa9cad29b42760
index a71793e8c52a3..c7f78ecade3e8 100644
--- a/sql/hive/src/test/resources/golden/union31-9-56dfdb30edd8a687f9aa9cad29b42760
+++ b/sql/hive/src/test/resources/golden/union31-9-56dfdb30edd8a687f9aa9cad29b42760
@@ -3,4 +3,4 @@ val_2	2
 val_4	2
 val_5	6
 val_8	2
-val_9	2
\ No newline at end of file
+val_9	2
diff --git a/sql/hive/src/test/resources/golden/union32-0-4c7f0fb27ce3a8f80b02dab6fc5cb45e b/sql/hive/src/test/resources/golden/union32-0-4c7f0fb27ce3a8f80b02dab6fc5cb45e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-1-e6c80e7d6171ae5fc428506e57dc8753 b/sql/hive/src/test/resources/golden/union32-1-e6c80e7d6171ae5fc428506e57dc8753
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-10-44a9b373ee7d43a4ef2bc4b8a708601b b/sql/hive/src/test/resources/golden/union32-10-44a9b373ee7d43a4ef2bc4b8a708601b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-11-40d51bc9958b27c36ef647f0598fdee5 b/sql/hive/src/test/resources/golden/union32-11-40d51bc9958b27c36ef647f0598fdee5
deleted file mode 100644
index 9bf0de2f06c9a..0000000000000
--- a/sql/hive/src/test/resources/golden/union32-11-40d51bc9958b27c36ef647f0598fdee5
+++ /dev/null
@@ -1,32 +0,0 @@
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-2.0	2.0
-2.0	2.0
-4.0	4.0
-4.0	4.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-8.0	8.0
-8.0	8.0
-9.0	9.0
-9.0	9.0
diff --git a/sql/hive/src/test/resources/golden/union32-2-8e39fef33c859ef83912d0fcda319218 b/sql/hive/src/test/resources/golden/union32-2-8e39fef33c859ef83912d0fcda319218
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-3-d31e252450077ac54f4cb18a9ad95a84 b/sql/hive/src/test/resources/golden/union32-3-d31e252450077ac54f4cb18a9ad95a84
deleted file mode 100644
index 462dca3124c41..0000000000000
--- a/sql/hive/src/test/resources/golden/union32-3-d31e252450077ac54f4cb18a9ad95a84
+++ /dev/null
@@ -1,20 +0,0 @@
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-2.0
-2.0
-4.0
-4.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-8.0
-8.0
-9.0
-9.0
diff --git a/sql/hive/src/test/resources/golden/union32-4-79787e084ca15d479cee3a7e1ed2281e b/sql/hive/src/test/resources/golden/union32-4-79787e084ca15d479cee3a7e1ed2281e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-5-51c997d0a1103b60764bbb8316a38746 b/sql/hive/src/test/resources/golden/union32-5-51c997d0a1103b60764bbb8316a38746
deleted file mode 100644
index 2e662f39d1572..0000000000000
--- a/sql/hive/src/test/resources/golden/union32-5-51c997d0a1103b60764bbb8316a38746
+++ /dev/null
@@ -1,32 +0,0 @@
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-2.0
-2.0
-4.0
-4.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-8.0
-8.0
-9.0
-9.0
diff --git a/sql/hive/src/test/resources/golden/union32-6-96fa13d8790bbfa1d6109b7cbf890d1b b/sql/hive/src/test/resources/golden/union32-6-96fa13d8790bbfa1d6109b7cbf890d1b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-7-f936440d63f4e1027dda4de605660633 b/sql/hive/src/test/resources/golden/union32-7-f936440d63f4e1027dda4de605660633
deleted file mode 100644
index 2e662f39d1572..0000000000000
--- a/sql/hive/src/test/resources/golden/union32-7-f936440d63f4e1027dda4de605660633
+++ /dev/null
@@ -1,32 +0,0 @@
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-2.0
-2.0
-4.0
-4.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-8.0
-8.0
-9.0
-9.0
diff --git a/sql/hive/src/test/resources/golden/union32-8-f42d83f502a7b8d3a36331e0f5621cfb b/sql/hive/src/test/resources/golden/union32-8-f42d83f502a7b8d3a36331e0f5621cfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-9-74fb695786df4c024288ae23ac8c00db b/sql/hive/src/test/resources/golden/union32-9-74fb695786df4c024288ae23ac8c00db
deleted file mode 100644
index 106f1874451ff..0000000000000
--- a/sql/hive/src/test/resources/golden/union32-9-74fb695786df4c024288ae23ac8c00db
+++ /dev/null
@@ -1,32 +0,0 @@
-0.0	0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-2.0	2.0
-2.0	2
-4.0	4
-4.0	4.0
-5.0	5
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5
-5.0	5.0
-5.0	5.0
-5.0	5.0
-8.0	8.0
-8.0	8
-9.0	9
-9.0	9.0
diff --git a/sql/hive/src/test/resources/golden/union34-10-da2b79118c21ac45ce85001fa61b0043 b/sql/hive/src/test/resources/golden/union34-10-da2b79118c21ac45ce85001fa61b0043
index d572335ff5185..7369ee2317540 100644
--- a/sql/hive/src/test/resources/golden/union34-10-da2b79118c21ac45ce85001fa61b0043
+++ b/sql/hive/src/test/resources/golden/union34-10-da2b79118c21ac45ce85001fa61b0043
@@ -27,4 +27,4 @@
 86	val_86
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/union34-4-101829a66cab2efd31dcb0d86e302956 b/sql/hive/src/test/resources/golden/union34-4-101829a66cab2efd31dcb0d86e302956
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-16-9afa473f2111cf0d9ae62041bd97f840 b/sql/hive/src/test/resources/golden/union34-4-70479e10c016e5ac448394dbadb32794
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-16-9afa473f2111cf0d9ae62041bd97f840
rename to sql/hive/src/test/resources/golden/union34-4-70479e10c016e5ac448394dbadb32794
diff --git a/sql/hive/src/test/resources/golden/union34-5-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/union34-5-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union34-5-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/union34-5-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union34-7-da2b79118c21ac45ce85001fa61b0043 b/sql/hive/src/test/resources/golden/union34-7-da2b79118c21ac45ce85001fa61b0043
index d572335ff5185..7369ee2317540 100644
--- a/sql/hive/src/test/resources/golden/union34-7-da2b79118c21ac45ce85001fa61b0043
+++ b/sql/hive/src/test/resources/golden/union34-7-da2b79118c21ac45ce85001fa61b0043
@@ -27,4 +27,4 @@
 86	val_86
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/union34-8-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/union34-8-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union34-8-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/union34-8-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union4-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union4-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union4-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union4-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union4-4-7f83822f19aa9b973198fe4c42c66856 b/sql/hive/src/test/resources/golden/union4-4-7f83822f19aa9b973198fe4c42c66856
index 948aca9180ba9..abc382d9b3571 100644
--- a/sql/hive/src/test/resources/golden/union4-4-7f83822f19aa9b973198fe4c42c66856
+++ b/sql/hive/src/test/resources/golden/union4-4-7f83822f19aa9b973198fe4c42c66856
@@ -1,2 +1,2 @@
 tst1	500
-tst2	500
\ No newline at end of file
+tst2	500
diff --git a/sql/hive/src/test/resources/golden/union5-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union5-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union5-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union5-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union5-2-2c19c8d564b010eeb42deee63d66a292 b/sql/hive/src/test/resources/golden/union5-2-2c19c8d564b010eeb42deee63d66a292
index c0c7d9f5e8ef5..9daa3f92b46e0 100644
--- a/sql/hive/src/test/resources/golden/union5-2-2c19c8d564b010eeb42deee63d66a292
+++ b/sql/hive/src/test/resources/golden/union5-2-2c19c8d564b010eeb42deee63d66a292
@@ -1,2 +1,2 @@
 tst1	1
-tst2	1
\ No newline at end of file
+tst2	1
diff --git a/sql/hive/src/test/resources/golden/union6-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union6-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union6-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union6-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union6-4-a03959cc5aaa8f6521a73e6dae04cd15 b/sql/hive/src/test/resources/golden/union6-4-a03959cc5aaa8f6521a73e6dae04cd15
index b5e91e032212a..6f79451ce69b5 100644
--- a/sql/hive/src/test/resources/golden/union6-4-a03959cc5aaa8f6521a73e6dae04cd15
+++ b/sql/hive/src/test/resources/golden/union6-4-a03959cc5aaa8f6521a73e6dae04cd15
@@ -23,4 +23,4 @@ NULL	val_484
 406	val_406
 66	val_66
 98	val_98
-tst1	500
\ No newline at end of file
+tst1	500
diff --git a/sql/hive/src/test/resources/golden/union7-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union7-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union7-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union7-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union7-2-55d6e503a281acf3289a7874c0fba3f5 b/sql/hive/src/test/resources/golden/union7-2-55d6e503a281acf3289a7874c0fba3f5
index 07f67df79dcbd..5ba4e492aa0f4 100644
--- a/sql/hive/src/test/resources/golden/union7-2-55d6e503a281acf3289a7874c0fba3f5
+++ b/sql/hive/src/test/resources/golden/union7-2-55d6e503a281acf3289a7874c0fba3f5
@@ -14,4 +14,4 @@ NULL	10
 406	1
 66	1
 98	1
-tst1	1
\ No newline at end of file
+tst1	1
diff --git a/sql/hive/src/test/resources/golden/union8-1-1b422e4c1c8b97775518f760b995c771 b/sql/hive/src/test/resources/golden/union8-1-1b422e4c1c8b97775518f760b995c771
index 3617909902993..52982787dbb4a 100644
--- a/sql/hive/src/test/resources/golden/union8-1-1b422e4c1c8b97775518f760b995c771
+++ b/sql/hive/src/test/resources/golden/union8-1-1b422e4c1c8b97775518f760b995c771
@@ -1497,4 +1497,4 @@
 200	val_200
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/union9-1-a77ee9f723b3b17a3a02164c5d0000c1 b/sql/hive/src/test/resources/golden/union9-1-a77ee9f723b3b17a3a02164c5d0000c1
index 37021f4a27201..3d86ec6498f3f 100644
--- a/sql/hive/src/test/resources/golden/union9-1-a77ee9f723b3b17a3a02164c5d0000c1
+++ b/sql/hive/src/test/resources/golden/union9-1-a77ee9f723b3b17a3a02164c5d0000c1
@@ -1 +1 @@
-1500
\ No newline at end of file
+1500
diff --git a/sql/hive/src/test/resources/golden/union_date-4-d812f7feef3b6857aeca9007f0af44c b/sql/hive/src/test/resources/golden/union_date-4-d812f7feef3b6857aeca9007f0af44c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-2-548a3a10c15c333c63fc1c239ee8b62c b/sql/hive/src/test/resources/golden/union_date-4-d85fe746334b430941c5db3665e744d4
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-2-548a3a10c15c333c63fc1c239ee8b62c
rename to sql/hive/src/test/resources/golden/union_date-4-d85fe746334b430941c5db3665e744d4
diff --git a/sql/hive/src/test/resources/golden/insert_into5-3-a4b25f172af356ec98035329b95ddbd3 b/sql/hive/src/test/resources/golden/union_date-5-82eebfded24cef08e0a881d1bcca02b1
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-3-a4b25f172af356ec98035329b95ddbd3
rename to sql/hive/src/test/resources/golden/union_date-5-82eebfded24cef08e0a881d1bcca02b1
diff --git a/sql/hive/src/test/resources/golden/union_date-5-b54839e0200bec94aa751fec8c5dbd3d b/sql/hive/src/test/resources/golden/union_date-5-b54839e0200bec94aa751fec8c5dbd3d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_date-6-f4d5c71145a9b7464685aa7d09cd4dfd b/sql/hive/src/test/resources/golden/union_date-6-f4d5c71145a9b7464685aa7d09cd4dfd
new file mode 100644
index 0000000000000..7941f53d8d4c7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/union_date-6-f4d5c71145a9b7464685aa7d09cd4dfd
@@ -0,0 +1,40 @@
+1064	2000-11-20
+1064	2000-11-20
+1142	2000-11-21
+1142	2000-11-21
+1599	2000-11-22
+1599	2000-11-22
+361	2000-11-23
+361	2000-11-23
+897	2000-11-24
+897	2000-11-24
+1531	2000-11-25
+1531	2000-11-25
+1610	2000-11-26
+1610	2000-11-26
+3198	2000-11-27
+3198	2000-11-27
+1064	2000-11-28
+1064	2000-11-28
+1142	2000-11-28
+1142	2000-11-28
+1064	2010-10-20
+1064	2010-10-20
+1142	2010-10-21
+1142	2010-10-21
+1599	2010-10-22
+1599	2010-10-22
+361	2010-10-23
+361	2010-10-23
+897	2010-10-24
+897	2010-10-24
+1531	2010-10-25
+1531	2010-10-25
+1610	2010-10-26
+1610	2010-10-26
+3198	2010-10-27
+3198	2010-10-27
+1064	2010-10-28
+1064	2010-10-28
+1142	2010-10-29
+1142	2010-10-29
diff --git a/sql/hive/src/test/resources/golden/insert_into5-4-3d5343a79ee8b680f3b74b22db6658e6 b/sql/hive/src/test/resources/golden/union_date-7-a0bade1c77338d4f72962389a1f5bea2
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-4-3d5343a79ee8b680f3b74b22db6658e6
rename to sql/hive/src/test/resources/golden/union_date-7-a0bade1c77338d4f72962389a1f5bea2
diff --git a/sql/hive/src/test/resources/golden/insert_into5-5-f382c5b31afe9e0251fa9457c46c12a5 b/sql/hive/src/test/resources/golden/union_date-8-21306adbd8be8ad75174ad9d3e42b73c
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-5-f382c5b31afe9e0251fa9457c46c12a5
rename to sql/hive/src/test/resources/golden/union_date-8-21306adbd8be8ad75174ad9d3e42b73c
diff --git a/sql/hive/src/test/resources/golden/union_null-0-27e98c4939abf1ad4445b4e715b0262a b/sql/hive/src/test/resources/golden/union_null-0-27e98c4939abf1ad4445b4e715b0262a
deleted file mode 100644
index 468f774ef5146..0000000000000
--- a/sql/hive/src/test/resources/golden/union_null-0-27e98c4939abf1ad4445b4e715b0262a
+++ /dev/null
@@ -1,10 +0,0 @@
-val_238
-NULL
-val_86
-NULL
-val_311
-NULL
-val_27
-NULL
-val_165
-NULL
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_1-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_1-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-10-3ef350a0f7bbc1c54774e33dd54d9e46 b/sql/hive/src/test/resources/golden/union_remove_1-10-3ef350a0f7bbc1c54774e33dd54d9e46
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_1-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index c203753d8e42d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:49:42 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069004          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_1-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_1-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_1-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_1-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_1-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-6-a6c043a89a9c3456af8ee065cb17239 b/sql/hive/src/test/resources/golden/union_remove_1-6-a6c043a89a9c3456af8ee065cb17239
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-7-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_1-7-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_1-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-9-19865a08066d80cb069ae6312c465ee6 b/sql/hive/src/test/resources/golden/union_remove_1-9-19865a08066d80cb069ae6312c465ee6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_10-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_10-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-10-7eaf13bc61bd4b43f4da67c347768598 b/sql/hive/src/test/resources/golden/union_remove_10-10-7eaf13bc61bd4b43f4da67c347768598
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-11-b62595b91d2d9e03a010b49ab81725d5 b/sql/hive/src/test/resources/golden/union_remove_10-11-b62595b91d2d9e03a010b49ab81725d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-12-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_10-12-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 94a711cdab117..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-12-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:50:08 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069036          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_10-13-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-13-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_10-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_10-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_10-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-5-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_10-5-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-5-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-6-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_10-6-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-6-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-7-a7ba0436265932086d2a2e228356971 b/sql/hive/src/test/resources/golden/union_remove_10-7-a7ba0436265932086d2a2e228356971
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-8-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_10-8-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_10-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_11-0-3c29684bfd2df7439ee0551eb42cfa0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-0-3c29684bfd2df7439ee0551eb42cfa0
+++ b/sql/hive/src/test/resources/golden/union_remove_11-0-3c29684bfd2df7439ee0551eb42cfa0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_11-1-16a6a293f1d2ce481b1d2482b1d5787c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ b/sql/hive/src/test/resources/golden/union_remove_11-1-16a6a293f1d2ce481b1d2482b1d5787c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-12-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_11-12-ea111d286c70e4a0c6a68a7420dc7b7
index d9cfa9e952c3a..4c47424eb1160 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-12-ea111d286c70e4a0c6a68a7420dc7b7
+++ b/sql/hive/src/test/resources/golden/union_remove_11-12-ea111d286c70e4a0c6a68a7420dc7b7
@@ -1,19 +1,24 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
+key                 	string              	                    
+values              	bigint              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 03 19:05:51 PST 2014	 
+CreateTime:         	Tue Oct 21 05:59:44 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5393816597631538368/outputtbl1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/outputtbl1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1388804758          
+	COLUMN_STATS_ACCURATE	false               
+	numFiles            	1                   
+	numRows             	-1                  
+	rawDataSize         	-1                  
+	totalSize           	184                 
+	transient_lastDdlTime	1413896391          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
@@ -24,4 +29,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_11-13-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-13-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/union_remove_11-13-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-14-e409e7032445097ace016b1876d95b3e b/sql/hive/src/test/resources/golden/union_remove_11-14-e409e7032445097ace016b1876d95b3e
index 2817d74afee13..8930b40bed84f 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-14-e409e7032445097ace016b1876d95b3e
+++ b/sql/hive/src/test/resources/golden/union_remove_11-14-e409e7032445097ace016b1876d95b3e
@@ -15,4 +15,4 @@
 8	2
 8	2
 8	3
-8	3
\ No newline at end of file
+8	3
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_11-2-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-2-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/union_remove_11-2-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_11-3-b12e5c70d6d29757471b900b6160fa8a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-3-b12e5c70d6d29757471b900b6160fa8a
+++ b/sql/hive/src/test/resources/golden/union_remove_11-3-b12e5c70d6d29757471b900b6160fa8a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_11-4-593999fae618b6b38322bc9ae4e0c027
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-4-593999fae618b6b38322bc9ae4e0c027
+++ b/sql/hive/src/test/resources/golden/union_remove_11-4-593999fae618b6b38322bc9ae4e0c027
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-5-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_11-5-6f53d5613262d393d82d159ec5dc16dc
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-5-6f53d5613262d393d82d159ec5dc16dc
+++ b/sql/hive/src/test/resources/golden/union_remove_11-5-6f53d5613262d393d82d159ec5dc16dc
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-6-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_11-6-a2a411ad6620aa1ab24550ade336e785
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-6-a2a411ad6620aa1ab24550ade336e785
+++ b/sql/hive/src/test/resources/golden/union_remove_11-6-a2a411ad6620aa1ab24550ade336e785
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_11-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-7-2004b4ecf6ceddb033727f8d5ebab42f b/sql/hive/src/test/resources/golden/union_remove_11-9-94da21f150ed2c56046b80e46da8884d
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-7-2004b4ecf6ceddb033727f8d5ebab42f
rename to sql/hive/src/test/resources/golden/union_remove_11-9-94da21f150ed2c56046b80e46da8884d
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_12-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_12-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_12-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-11-a667f24e26435cd2a29fef0ee45bab3c b/sql/hive/src/test/resources/golden/union_remove_12-11-a667f24e26435cd2a29fef0ee45bab3c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-12-c696750d8b7b65b00bf9d402cf7abaa5 b/sql/hive/src/test/resources/golden/union_remove_12-12-c696750d8b7b65b00bf9d402cf7abaa5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_12-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 57d5146698ee7..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:50:41 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069061          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-14-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_12-14-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-14-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_12-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-3-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/union_remove_12-3-24ca942f094b14b92086305cc125e833
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-3-24ca942f094b14b92086305cc125e833
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-4-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_12-4-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-4-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-5-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_12-5-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-5-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-6-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_12-6-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-6-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_12-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-8-9dd030d38eece4630dec5951fc8a0622 b/sql/hive/src/test/resources/golden/union_remove_12-8-9dd030d38eece4630dec5951fc8a0622
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-9-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_12-9-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_13-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_13-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_13-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-11-ed5d1c2bfc3dd1b2f5321bc8800e69e8 b/sql/hive/src/test/resources/golden/union_remove_13-11-ed5d1c2bfc3dd1b2f5321bc8800e69e8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-12-27c0fa25c4f67fc1e0e113aca6dd47af b/sql/hive/src/test/resources/golden/union_remove_13-12-27c0fa25c4f67fc1e0e113aca6dd47af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_13-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index c914024c14c8e..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:51:05 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069086          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-14-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_13-14-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-14-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_13-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-3-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/union_remove_13-3-24ca942f094b14b92086305cc125e833
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-3-24ca942f094b14b92086305cc125e833
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-4-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_13-4-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-4-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-5-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_13-5-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-5-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-6-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_13-6-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-6-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_13-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-8-234ecbc3f8c0e4686d3586f81cf191eb b/sql/hive/src/test/resources/golden/union_remove_13-8-234ecbc3f8c0e4686d3586f81cf191eb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-9-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_13-9-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_14-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_14-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_14-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-11-a667f24e26435cd2a29fef0ee45bab3c b/sql/hive/src/test/resources/golden/union_remove_14-11-a667f24e26435cd2a29fef0ee45bab3c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-12-c696750d8b7b65b00bf9d402cf7abaa5 b/sql/hive/src/test/resources/golden/union_remove_14-12-c696750d8b7b65b00bf9d402cf7abaa5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_14-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 5a55428afbe1a..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:51:30 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069108          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-14-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_14-14-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-14-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_14-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_14-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_14-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-5-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/union_remove_14-5-24ca942f094b14b92086305cc125e833
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-5-24ca942f094b14b92086305cc125e833
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-6-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_14-6-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-6-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_14-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-8-ed33d620523b2634285698a83f433b6d b/sql/hive/src/test/resources/golden/union_remove_14-8-ed33d620523b2634285698a83f433b6d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-9-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_14-9-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_15-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_15-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_15-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-11-93b7341b523213ab6e58169459bc6818 b/sql/hive/src/test/resources/golden/union_remove_15-11-93b7341b523213ab6e58169459bc6818
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-12-616cc477ed00e691dbc2b310d1c6dd12 b/sql/hive/src/test/resources/golden/union_remove_15-12-616cc477ed00e691dbc2b310d1c6dd12
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_15-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index a2441e330ffc6..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:51:52 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069112          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-14-37f9f7bc2d7456046a9f967347337e47 b/sql/hive/src/test/resources/golden/union_remove_15-14-37f9f7bc2d7456046a9f967347337e47
deleted file mode 100644
index a58273a1b054e..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-14-37f9f7bc2d7456046a9f967347337e47
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=1
-ds=2
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-15-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_15-15-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-15-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_15-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_15-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_15-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-5-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/union_remove_15-5-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-5-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-6-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/union_remove_15-6-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-6-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_15-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-8-552c4eba867e7408fc8652ff0a19170d b/sql/hive/src/test/resources/golden/union_remove_15-8-552c4eba867e7408fc8652ff0a19170d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-9-a63925fb2aa8c5df6854c248e674b0ef b/sql/hive/src/test/resources/golden/union_remove_15-9-a63925fb2aa8c5df6854c248e674b0ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_16-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_16-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-10-a63925fb2aa8c5df6854c248e674b0ef b/sql/hive/src/test/resources/golden/union_remove_16-10-a63925fb2aa8c5df6854c248e674b0ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-11-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_16-11-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-12-93b7341b523213ab6e58169459bc6818 b/sql/hive/src/test/resources/golden/union_remove_16-12-93b7341b523213ab6e58169459bc6818
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-13-616cc477ed00e691dbc2b310d1c6dd12 b/sql/hive/src/test/resources/golden/union_remove_16-13-616cc477ed00e691dbc2b310d1c6dd12
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-14-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_16-14-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index bec8202dcbcad..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-14-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:52:19 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069139          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-15-37f9f7bc2d7456046a9f967347337e47 b/sql/hive/src/test/resources/golden/union_remove_16-15-37f9f7bc2d7456046a9f967347337e47
deleted file mode 100644
index a58273a1b054e..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-15-37f9f7bc2d7456046a9f967347337e47
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=1
-ds=2
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-16-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_16-16-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-16-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_16-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_16-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_16-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-5-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_16-5-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-5-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-6-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_16-6-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-6-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-7-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/union_remove_16-7-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-7-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-8-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/union_remove_16-8-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-8-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-9-ec47ba0fc527a4a04d452a009d59147a b/sql/hive/src/test/resources/golden/union_remove_16-9-ec47ba0fc527a4a04d452a009d59147a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_17-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_17-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_17-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-11-a55dcc374cb69e225a4f1da56bbd7a68 b/sql/hive/src/test/resources/golden/union_remove_17-11-a55dcc374cb69e225a4f1da56bbd7a68
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-12-c91289e16ad403babfc91c093ac9b86d b/sql/hive/src/test/resources/golden/union_remove_17-12-c91289e16ad403babfc91c093ac9b86d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_17-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index d0b37e7625a64..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 14:38:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4075462935071533647/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1389739098          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-14-37f9f7bc2d7456046a9f967347337e47 b/sql/hive/src/test/resources/golden/union_remove_17-14-37f9f7bc2d7456046a9f967347337e47
deleted file mode 100644
index c21f4017362c1..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-14-37f9f7bc2d7456046a9f967347337e47
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=1
-ds=2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-15-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_17-15-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-15-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-16-626a252f75285872c1d72706f7f972c6 b/sql/hive/src/test/resources/golden/union_remove_17-16-626a252f75285872c1d72706f7f972c6
deleted file mode 100644
index 1ab5c482feca9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-16-626a252f75285872c1d72706f7f972c6
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	1
-2	1	1
-3	1	1
-7	1	1
-8	1	1
-8	1	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-17-1252985379f11ae4b98d2a0e2f121b8a b/sql/hive/src/test/resources/golden/union_remove_17-17-1252985379f11ae4b98d2a0e2f121b8a
deleted file mode 100644
index 94ec062e2f4f3..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-17-1252985379f11ae4b98d2a0e2f121b8a
+++ /dev/null
@@ -1,6 +0,0 @@
-1	2	2
-2	2	2
-3	2	2
-7	2	2
-8	2	2
-8	2	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_17-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_17-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_17-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-5-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/union_remove_17-5-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-5-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-6-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/union_remove_17-6-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-6-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_17-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-8-67e57f56d1106a57329bf75c491e3c8b b/sql/hive/src/test/resources/golden/union_remove_17-8-67e57f56d1106a57329bf75c491e3c8b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-9-a63925fb2aa8c5df6854c248e674b0ef b/sql/hive/src/test/resources/golden/union_remove_17-9-a63925fb2aa8c5df6854c248e674b0ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_18-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_18-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_18-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-11-37ae8e8b4f39f3ff5516afd44715c2ad b/sql/hive/src/test/resources/golden/union_remove_18-11-37ae8e8b4f39f3ff5516afd44715c2ad
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-12-461a24ba6c00f60d75afba62da2ac4f1 b/sql/hive/src/test/resources/golden/union_remove_18-12-461a24ba6c00f60d75afba62da2ac4f1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_18-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 971e095df7be8..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:52:47 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069167          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-14-37f9f7bc2d7456046a9f967347337e47 b/sql/hive/src/test/resources/golden/union_remove_18-14-37f9f7bc2d7456046a9f967347337e47
deleted file mode 100644
index 5690d94a65857..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-14-37f9f7bc2d7456046a9f967347337e47
+++ /dev/null
@@ -1,6 +0,0 @@
-ds=11
-ds=12
-ds=13
-ds=17
-ds=18
-ds=28
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-15-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_18-15-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-15-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_18-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_18-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_18-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-5-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/union_remove_18-5-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-5-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-6-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/union_remove_18-6-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-6-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_18-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-8-b1b996b2e72ca14150af7c82dbc6e139 b/sql/hive/src/test/resources/golden/union_remove_18-8-b1b996b2e72ca14150af7c82dbc6e139
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-9-ea467d0fee062a23c720cf47eacfef08 b/sql/hive/src/test/resources/golden/union_remove_18-9-ea467d0fee062a23c720cf47eacfef08
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_19-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_19-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-10-8d1e5af37e7992708bf15ab7d887405b b/sql/hive/src/test/resources/golden/union_remove_19-10-8d1e5af37e7992708bf15ab7d887405b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_19-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 0d0d3a30af36c..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:53:14 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069215          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-12-68e15b1729669c4cb2291dcabfea4387 b/sql/hive/src/test/resources/golden/union_remove_19-12-68e15b1729669c4cb2291dcabfea4387
deleted file mode 100644
index 719e4a7bd4dbb..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-12-68e15b1729669c4cb2291dcabfea4387
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-13-b8b9df8f376df228e3c8ae65defe2801 b/sql/hive/src/test/resources/golden/union_remove_19-13-b8b9df8f376df228e3c8ae65defe2801
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-14-eed866a1ad3106a306322519f4bb52f2 b/sql/hive/src/test/resources/golden/union_remove_19-14-eed866a1ad3106a306322519f4bb52f2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-15-68e15b1729669c4cb2291dcabfea4387 b/sql/hive/src/test/resources/golden/union_remove_19-15-68e15b1729669c4cb2291dcabfea4387
deleted file mode 100644
index 361f2ab78eae3..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-15-68e15b1729669c4cb2291dcabfea4387
+++ /dev/null
@@ -1,2 +0,0 @@
-7	1
-7	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-16-471f8e794fd712dce2e40334b383e08e b/sql/hive/src/test/resources/golden/union_remove_19-16-471f8e794fd712dce2e40334b383e08e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-17-ae0f7d5734ca67cbfebed70c4657e330 b/sql/hive/src/test/resources/golden/union_remove_19-17-ae0f7d5734ca67cbfebed70c4657e330
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-18-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_19-18-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-18-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_19-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_19-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_19-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_19-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-6-a6c043a89a9c3456af8ee065cb17239 b/sql/hive/src/test/resources/golden/union_remove_19-6-a6c043a89a9c3456af8ee065cb17239
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-7-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_19-7-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_19-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-9-8a08edd1aa63fd3b051da82246793259 b/sql/hive/src/test/resources/golden/union_remove_19-9-8a08edd1aa63fd3b051da82246793259
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_2-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_2-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-10-2309570010c3e679b884c100de57d002 b/sql/hive/src/test/resources/golden/union_remove_2-10-2309570010c3e679b884c100de57d002
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_2-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index d3c821fa22182..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:53:36 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069242          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_2-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_2-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_2-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_2-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_2-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-6-1acf02741a2ff987d3e00ae9722c26e b/sql/hive/src/test/resources/golden/union_remove_2-6-1acf02741a2ff987d3e00ae9722c26e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-7-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_2-7-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_2-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-9-4a9974900fb5bc2fc8e5d614f5026c0f b/sql/hive/src/test/resources/golden/union_remove_2-9-4a9974900fb5bc2fc8e5d614f5026c0f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_20-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_20-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-10-89c57c91facbf54299e08955e3783ea6 b/sql/hive/src/test/resources/golden/union_remove_20-10-89c57c91facbf54299e08955e3783ea6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_20-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 978049640929c..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-values              	bigint              	None                
-key                 	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:54:06 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069268          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_20-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_20-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_20-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_20-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_20-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-6-a73143117ffec1225f6d492e5aa577e b/sql/hive/src/test/resources/golden/union_remove_20-6-a73143117ffec1225f6d492e5aa577e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-7-82f81adc097c247475fd29076e0cb85f b/sql/hive/src/test/resources/golden/union_remove_20-7-82f81adc097c247475fd29076e0cb85f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_20-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-9-683949ae07de12da0b7e7ba7f4450daa b/sql/hive/src/test/resources/golden/union_remove_20-9-683949ae07de12da0b7e7ba7f4450daa
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_21-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_21-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-10-e19469a1b70be25caaf670fe68f0a747 b/sql/hive/src/test/resources/golden/union_remove_21-10-e19469a1b70be25caaf670fe68f0a747
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_21-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index ce7e0438f9eff..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,26 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:54:32 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069293          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_21-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_21-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_21-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_21-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_21-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-6-aeba356b56f8659963d8b2dc07a84a6f b/sql/hive/src/test/resources/golden/union_remove_21-6-aeba356b56f8659963d8b2dc07a84a6f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-7-5716c408db679fb88352eaceb1703bd7 b/sql/hive/src/test/resources/golden/union_remove_21-7-5716c408db679fb88352eaceb1703bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_21-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-9-deadbce171926623b0586587fbbcd144 b/sql/hive/src/test/resources/golden/union_remove_21-9-deadbce171926623b0586587fbbcd144
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_22-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_22-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-10-403471d96c56b565272d2e4c4926d240 b/sql/hive/src/test/resources/golden/union_remove_22-10-403471d96c56b565272d2e4c4926d240
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_22-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index c261ce48bdfb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,28 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-values2             	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:54:58 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069319          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-12-68e15b1729669c4cb2291dcabfea4387 b/sql/hive/src/test/resources/golden/union_remove_22-12-68e15b1729669c4cb2291dcabfea4387
deleted file mode 100644
index d36ac6b834792..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-12-68e15b1729669c4cb2291dcabfea4387
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1	1
-2	1	1
-3	1	1
-7	1	1
-8	2	2
-1	1	1
-2	1	1
-3	1	1
-7	1	1
-8	2	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-13-a28b876b5df29140ef2bf62b4d0de3fd b/sql/hive/src/test/resources/golden/union_remove_22-13-a28b876b5df29140ef2bf62b4d0de3fd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-14-8eb6d4bed951caa7ba9fe6b3a83d76af b/sql/hive/src/test/resources/golden/union_remove_22-14-8eb6d4bed951caa7ba9fe6b3a83d76af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-15-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_22-15-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-15-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_22-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_22-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_22-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_22-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-6-722acd65700dd132dc6b4bc8c56f4ce0 b/sql/hive/src/test/resources/golden/union_remove_22-6-722acd65700dd132dc6b4bc8c56f4ce0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-7-46da090f5a2c73b175207cf63ff46653 b/sql/hive/src/test/resources/golden/union_remove_22-7-46da090f5a2c73b175207cf63ff46653
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_22-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-9-b3edbbee0543ff268db4059afb9cb2cb b/sql/hive/src/test/resources/golden/union_remove_22-9-b3edbbee0543ff268db4059afb9cb2cb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_23-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_23-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-10-613ce50facecdc8d7bf8806a8ff17c13 b/sql/hive/src/test/resources/golden/union_remove_23-10-613ce50facecdc8d7bf8806a8ff17c13
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_23-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 50297abd4e9a6..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:55:20 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069345          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_23-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_23-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_23-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_23-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_23-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-6-b4e2e8cf42d1f3d4f54615a7b3d4c9a7 b/sql/hive/src/test/resources/golden/union_remove_23-6-b4e2e8cf42d1f3d4f54615a7b3d4c9a7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-7-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_23-7-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_23-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-9-2dca12ca51c74540e7cdbbb05e336ed5 b/sql/hive/src/test/resources/golden/union_remove_23-9-2dca12ca51c74540e7cdbbb05e336ed5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_24-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_24-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-10-6d89089b1eead05510dbccad5fcc4805 b/sql/hive/src/test/resources/golden/union_remove_24-10-6d89089b1eead05510dbccad5fcc4805
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_24-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index f3c452ebe9284..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	double              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:55:49 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069371          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_24-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_24-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_24-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_24-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_24-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-6-72ae7a9640ae611f61ac954ac1a4b682 b/sql/hive/src/test/resources/golden/union_remove_24-6-72ae7a9640ae611f61ac954ac1a4b682
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-7-44a7b0f14b4f5151c37498367ad7fe1e b/sql/hive/src/test/resources/golden/union_remove_24-7-44a7b0f14b4f5151c37498367ad7fe1e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_24-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-9-4c0550cc9c28de25993c1f98de39168f b/sql/hive/src/test/resources/golden/union_remove_24-9-4c0550cc9c28de25993c1f98de39168f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_3-0-3c29684bfd2df7439ee0551eb42cfa0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-0-3c29684bfd2df7439ee0551eb42cfa0
+++ b/sql/hive/src/test/resources/golden/union_remove_3-0-3c29684bfd2df7439ee0551eb42cfa0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_3-1-16a6a293f1d2ce481b1d2482b1d5787c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ b/sql/hive/src/test/resources/golden/union_remove_3-1-16a6a293f1d2ce481b1d2482b1d5787c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_3-11-ea111d286c70e4a0c6a68a7420dc7b7
index d46af80f0d1ae..8cd7d42b20df5 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ b/sql/hive/src/test/resources/golden/union_remove_3-11-ea111d286c70e4a0c6a68a7420dc7b7
@@ -1,19 +1,24 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	None                
-values              	bigint              	None                
+key                 	string              	                    
+values              	bigint              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 03 19:09:55 PST 2014	 
+CreateTime:         	Tue Oct 21 06:00:02 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5393816597631538368/outputtbl1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/outputtbl1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1388805002          
+	COLUMN_STATS_ACCURATE	false               
+	numFiles            	1                   
+	numRows             	-1                  
+	rawDataSize         	-1                  
+	totalSize           	72                  
+	transient_lastDdlTime	1413896409          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +29,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_3-12-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-12-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/union_remove_3-12-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-13-e409e7032445097ace016b1876d95b3e b/sql/hive/src/test/resources/golden/union_remove_3-13-e409e7032445097ace016b1876d95b3e
index 2817d74afee13..8930b40bed84f 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-13-e409e7032445097ace016b1876d95b3e
+++ b/sql/hive/src/test/resources/golden/union_remove_3-13-e409e7032445097ace016b1876d95b3e
@@ -15,4 +15,4 @@
 8	2
 8	2
 8	3
-8	3
\ No newline at end of file
+8	3
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_3-2-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-2-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/union_remove_3-2-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_3-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/union_remove_3-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_3-4-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-4-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/union_remove_3-4-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_3-5-a2a411ad6620aa1ab24550ade336e785
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-5-a2a411ad6620aa1ab24550ade336e785
+++ b/sql/hive/src/test/resources/golden/union_remove_3-5-a2a411ad6620aa1ab24550ade336e785
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_3-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-8-4e489b45a3dacf7fdf0b12e4f8ab7873 b/sql/hive/src/test/resources/golden/union_remove_3-8-94da21f150ed2c56046b80e46da8884d
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into5-8-4e489b45a3dacf7fdf0b12e4f8ab7873
rename to sql/hive/src/test/resources/golden/union_remove_3-8-94da21f150ed2c56046b80e46da8884d
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_4-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_4-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-10-19865a08066d80cb069ae6312c465ee6 b/sql/hive/src/test/resources/golden/union_remove_4-10-19865a08066d80cb069ae6312c465ee6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-11-3ef350a0f7bbc1c54774e33dd54d9e46 b/sql/hive/src/test/resources/golden/union_remove_4-11-3ef350a0f7bbc1c54774e33dd54d9e46
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-12-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_4-12-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 67faa5b13d984..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-12-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:56:16 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_4-13-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-13-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_4-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_4-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_4-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_4-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-6-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_4-6-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-6-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-7-90fcd4eb330919ad92aecb8a5bf30ead b/sql/hive/src/test/resources/golden/union_remove_4-7-90fcd4eb330919ad92aecb8a5bf30ead
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-8-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_4-8-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_4-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_5-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_5-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-10-4a9974900fb5bc2fc8e5d614f5026c0f b/sql/hive/src/test/resources/golden/union_remove_5-10-4a9974900fb5bc2fc8e5d614f5026c0f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-11-2309570010c3e679b884c100de57d002 b/sql/hive/src/test/resources/golden/union_remove_5-11-2309570010c3e679b884c100de57d002
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-12-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_5-12-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 19aeb0b602859..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-12-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:56:42 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069430          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_5-13-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-13-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_5-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_5-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_5-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-5-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_5-5-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-5-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-6-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_5-6-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-6-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-7-4da6ca94af4caf4426e5676a32b70375 b/sql/hive/src/test/resources/golden/union_remove_5-7-4da6ca94af4caf4426e5676a32b70375
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-8-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_5-8-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_5-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_6-0-3c29684bfd2df7439ee0551eb42cfa0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-0-3c29684bfd2df7439ee0551eb42cfa0
+++ b/sql/hive/src/test/resources/golden/union_remove_6-0-3c29684bfd2df7439ee0551eb42cfa0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_6-1-16a6a293f1d2ce481b1d2482b1d5787c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ b/sql/hive/src/test/resources/golden/union_remove_6-1-16a6a293f1d2ce481b1d2482b1d5787c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_6-12-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-12-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/union_remove_6-12-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-13-e409e7032445097ace016b1876d95b3e b/sql/hive/src/test/resources/golden/union_remove_6-13-e409e7032445097ace016b1876d95b3e
index 8e7ee8a2b47bb..b4ea93dc7dd89 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-13-e409e7032445097ace016b1876d95b3e
+++ b/sql/hive/src/test/resources/golden/union_remove_6-13-e409e7032445097ace016b1876d95b3e
@@ -7,4 +7,4 @@
 7	1
 7	1
 8	2
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-14-f74b28904e86047150396bc42680ca38 b/sql/hive/src/test/resources/golden/union_remove_6-14-f74b28904e86047150396bc42680ca38
index 8e7ee8a2b47bb..b4ea93dc7dd89 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-14-f74b28904e86047150396bc42680ca38
+++ b/sql/hive/src/test/resources/golden/union_remove_6-14-f74b28904e86047150396bc42680ca38
@@ -7,4 +7,4 @@
 7	1
 7	1
 8	2
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_6-2-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-2-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/union_remove_6-2-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_6-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/union_remove_6-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_6-4-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-4-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/union_remove_6-4-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_6-5-a2a411ad6620aa1ab24550ade336e785
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-5-a2a411ad6620aa1ab24550ade336e785
+++ b/sql/hive/src/test/resources/golden/union_remove_6-5-a2a411ad6620aa1ab24550ade336e785
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_6-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-10-16500f4af2c8638a670e867e59f6d457 b/sql/hive/src/test/resources/golden/union_remove_6-9-94da21f150ed2c56046b80e46da8884d
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into6-10-16500f4af2c8638a670e867e59f6d457
rename to sql/hive/src/test/resources/golden/union_remove_6-9-94da21f150ed2c56046b80e46da8884d
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_7-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_7-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-10-3ef350a0f7bbc1c54774e33dd54d9e46 b/sql/hive/src/test/resources/golden/union_remove_7-10-3ef350a0f7bbc1c54774e33dd54d9e46
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_7-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 1c63b3851aad2..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:57:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069463          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_7-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_7-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_7-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_7-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_7-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-6-3744f0ebf5c002fdfcec67fbce03dfca b/sql/hive/src/test/resources/golden/union_remove_7-6-3744f0ebf5c002fdfcec67fbce03dfca
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-7-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_7-7-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_7-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-9-19865a08066d80cb069ae6312c465ee6 b/sql/hive/src/test/resources/golden/union_remove_7-9-19865a08066d80cb069ae6312c465ee6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_8-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_8-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-10-2309570010c3e679b884c100de57d002 b/sql/hive/src/test/resources/golden/union_remove_8-10-2309570010c3e679b884c100de57d002
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_8-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index f718ab2990cdd..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:57:48 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069495          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_8-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_8-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_8-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_8-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_8-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-6-895f2432b4be6fcb11641c1d063570ee b/sql/hive/src/test/resources/golden/union_remove_8-6-895f2432b4be6fcb11641c1d063570ee
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-7-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_8-7-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_8-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-9-4a9974900fb5bc2fc8e5d614f5026c0f b/sql/hive/src/test/resources/golden/union_remove_8-9-4a9974900fb5bc2fc8e5d614f5026c0f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_9-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_9-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-10-12cf3335c756f8715a07c5a604f10f64 b/sql/hive/src/test/resources/golden/union_remove_9-10-12cf3335c756f8715a07c5a604f10f64
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-11-4e84cd589eceda668833f8f19ec28e7c b/sql/hive/src/test/resources/golden/union_remove_9-11-4e84cd589eceda668833f8f19ec28e7c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-12-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_9-12-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 65a91a6027545..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-12-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:58:19 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069518          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_9-13-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-13-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_9-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_9-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_9-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-5-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_9-5-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-5-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-6-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_9-6-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-6-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-7-f77ac921b27860fac94cac6b352f3eb5 b/sql/hive/src/test/resources/golden/union_remove_9-7-f77ac921b27860fac94cac6b352f3eb5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-8-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_9-8-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_9-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_script-0-ca195b08d98d7f21fe93208499bf0ff6 b/sql/hive/src/test/resources/golden/union_script-0-ca195b08d98d7f21fe93208499bf0ff6
index 42d07ed10bad4..760e62cce0bba 100644
--- a/sql/hive/src/test/resources/golden/union_script-0-ca195b08d98d7f21fe93208499bf0ff6
+++ b/sql/hive/src/test/resources/golden/union_script-0-ca195b08d98d7f21fe93208499bf0ff6
@@ -497,4 +497,4 @@
 97
 97
 98
-98
\ No newline at end of file
+98
diff --git a/sql/hive/src/test/resources/golden/union_script-1-982cc6d7b98f8fb1055a10ef021e2769 b/sql/hive/src/test/resources/golden/union_script-1-982cc6d7b98f8fb1055a10ef021e2769
index 2f23db6a2ff4a..3bafdf359f6fa 100644
--- a/sql/hive/src/test/resources/golden/union_script-1-982cc6d7b98f8fb1055a10ef021e2769
+++ b/sql/hive/src/test/resources/golden/union_script-1-982cc6d7b98f8fb1055a10ef021e2769
@@ -997,4 +997,4 @@
 98
 98
 98
-98
\ No newline at end of file
+98
diff --git a/sql/hive/src/test/resources/golden/union_view-0-e56367a21517656c18a5bcfeecb4327d b/sql/hive/src/test/resources/golden/union_view-0-e56367a21517656c18a5bcfeecb4327d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-1-c790d4344144460224b0f02be7e137a8 b/sql/hive/src/test/resources/golden/union_view-1-c790d4344144460224b0f02be7e137a8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-2-7e33b0744f57bdc8ebcd9d45348aef14 b/sql/hive/src/test/resources/golden/union_view-2-7e33b0744f57bdc8ebcd9d45348aef14
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-3-4a746bd076e063017c4d6a2f9218a6e4 b/sql/hive/src/test/resources/golden/union_view-3-4a746bd076e063017c4d6a2f9218a6e4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-4-d3d75f376f83b694b1dc62c46fa53f4e b/sql/hive/src/test/resources/golden/union_view-4-d3d75f376f83b694b1dc62c46fa53f4e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-5-5d56e4e800a2a2ea70bd1f2ac31f2245 b/sql/hive/src/test/resources/golden/union_view-5-5d56e4e800a2a2ea70bd1f2ac31f2245
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-6-c9d7dcde469d3b9a66965a64dd15e4ae b/sql/hive/src/test/resources/golden/union_view-6-c9d7dcde469d3b9a66965a64dd15e4ae
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_view-6-c9d7dcde469d3b9a66965a64dd15e4ae
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_view-7-3b03210f94ec40db9ab02620645014d1 b/sql/hive/src/test/resources/golden/union_view-7-3b03210f94ec40db9ab02620645014d1
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_view-7-3b03210f94ec40db9ab02620645014d1
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_view-8-35f48c7d6fa164bb84643657bc9280a8 b/sql/hive/src/test/resources/golden/union_view-8-35f48c7d6fa164bb84643657bc9280a8
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_view-8-35f48c7d6fa164bb84643657bc9280a8
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/varchar_2-3-a8c072e5b13997e9c79484b4af9d78da b/sql/hive/src/test/resources/golden/varchar_2-3-a8c072e5b13997e9c79484b4af9d78da
index 08661a3d7f794..6fc243e718c1b 100644
--- a/sql/hive/src/test/resources/golden/varchar_2-3-a8c072e5b13997e9c79484b4af9d78da
+++ b/sql/hive/src/test/resources/golden/varchar_2-3-a8c072e5b13997e9c79484b4af9d78da
@@ -2,4 +2,4 @@ val_0	0	3
 val_10	10	1
 val_100	200	2
 val_103	206	2
-val_104	208	2
\ No newline at end of file
+val_104	208	2
diff --git a/sql/hive/src/test/resources/golden/varchar_2-4-c7ac6a1b3e9416e42d2d1b56f017fe6f b/sql/hive/src/test/resources/golden/varchar_2-4-c7ac6a1b3e9416e42d2d1b56f017fe6f
index 08661a3d7f794..6fc243e718c1b 100644
--- a/sql/hive/src/test/resources/golden/varchar_2-4-c7ac6a1b3e9416e42d2d1b56f017fe6f
+++ b/sql/hive/src/test/resources/golden/varchar_2-4-c7ac6a1b3e9416e42d2d1b56f017fe6f
@@ -2,4 +2,4 @@ val_0	0	3
 val_10	10	1
 val_100	200	2
 val_103	206	2
-val_104	208	2
\ No newline at end of file
+val_104	208	2
diff --git a/sql/hive/src/test/resources/golden/varchar_2-5-96353c24b5f2b361f72e5c26b4135519 b/sql/hive/src/test/resources/golden/varchar_2-5-96353c24b5f2b361f72e5c26b4135519
index f0c744ca37d95..0052fe0c959c5 100644
--- a/sql/hive/src/test/resources/golden/varchar_2-5-96353c24b5f2b361f72e5c26b4135519
+++ b/sql/hive/src/test/resources/golden/varchar_2-5-96353c24b5f2b361f72e5c26b4135519
@@ -2,4 +2,4 @@ val_98	196	2
 val_97	194	2
 val_96	96	1
 val_95	190	2
-val_92	92	1
\ No newline at end of file
+val_92	92	1
diff --git a/sql/hive/src/test/resources/golden/varchar_2-6-2b62789d07b4044bc32190261bf3490f b/sql/hive/src/test/resources/golden/varchar_2-6-2b62789d07b4044bc32190261bf3490f
index f0c744ca37d95..0052fe0c959c5 100644
--- a/sql/hive/src/test/resources/golden/varchar_2-6-2b62789d07b4044bc32190261bf3490f
+++ b/sql/hive/src/test/resources/golden/varchar_2-6-2b62789d07b4044bc32190261bf3490f
@@ -2,4 +2,4 @@ val_98	196	2
 val_97	194	2
 val_96	96	1
 val_95	190	2
-val_92	92	1
\ No newline at end of file
+val_92	92	1
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-10-1958143ee083437e87662cadb48c37ce b/sql/hive/src/test/resources/golden/varchar_join1-10-1958143ee083437e87662cadb48c37ce
index 35e7795b9c2a7..fa55ed96ee0b9 100644
--- a/sql/hive/src/test/resources/golden/varchar_join1-10-1958143ee083437e87662cadb48c37ce
+++ b/sql/hive/src/test/resources/golden/varchar_join1-10-1958143ee083437e87662cadb48c37ce
@@ -1,3 +1,3 @@
 1	abc	1	abc
 2	abc 	2	abc 
-3	 abc	3	 abc
\ No newline at end of file
+3	 abc	3	 abc
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-11-a55f750032663f77066e4979dedea1c b/sql/hive/src/test/resources/golden/varchar_join1-11-a55f750032663f77066e4979dedea1c
index 35e7795b9c2a7..fa55ed96ee0b9 100644
--- a/sql/hive/src/test/resources/golden/varchar_join1-11-a55f750032663f77066e4979dedea1c
+++ b/sql/hive/src/test/resources/golden/varchar_join1-11-a55f750032663f77066e4979dedea1c
@@ -1,3 +1,3 @@
 1	abc	1	abc
 2	abc 	2	abc 
-3	 abc	3	 abc
\ No newline at end of file
+3	 abc	3	 abc
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-6-6bb08c5baa913d9dc506aef65425ef b/sql/hive/src/test/resources/golden/varchar_join1-6-6bb08c5baa913d9dc506aef65425ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-11-95297f4dcc4dd6a51de4785ccd25fbba b/sql/hive/src/test/resources/golden/varchar_join1-6-ab4392aa5ff499ec43229425ff23e22f
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into6-11-95297f4dcc4dd6a51de4785ccd25fbba
rename to sql/hive/src/test/resources/golden/varchar_join1-6-ab4392aa5ff499ec43229425ff23e22f
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-7-341118dab140b17f0b9d2c7b101d1298 b/sql/hive/src/test/resources/golden/varchar_join1-7-341118dab140b17f0b9d2c7b101d1298
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-14-1f00d9cc34225cb358a54c6d4008cb47 b/sql/hive/src/test/resources/golden/varchar_join1-7-a01639290aaceb4b85aa6e44319f6386
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into6-14-1f00d9cc34225cb358a54c6d4008cb47
rename to sql/hive/src/test/resources/golden/varchar_join1-7-a01639290aaceb4b85aa6e44319f6386
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-8-afe7304d94450481c01ddbaf6cc3f596 b/sql/hive/src/test/resources/golden/varchar_join1-8-afe7304d94450481c01ddbaf6cc3f596
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-15-14d4d62ee9a5da9fbd7c3e0c021fdc0d b/sql/hive/src/test/resources/golden/varchar_join1-8-c05d7b534b51cecdc2ba2de4ce57ba37
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into6-15-14d4d62ee9a5da9fbd7c3e0c021fdc0d
rename to sql/hive/src/test/resources/golden/varchar_join1-8-c05d7b534b51cecdc2ba2de4ce57ba37
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-9-c152d3dc6495ef75e4872dea4b7e0f82 b/sql/hive/src/test/resources/golden/varchar_join1-9-c152d3dc6495ef75e4872dea4b7e0f82
index 35e7795b9c2a7..fa55ed96ee0b9 100644
--- a/sql/hive/src/test/resources/golden/varchar_join1-9-c152d3dc6495ef75e4872dea4b7e0f82
+++ b/sql/hive/src/test/resources/golden/varchar_join1-9-c152d3dc6495ef75e4872dea4b7e0f82
@@ -1,3 +1,3 @@
 1	abc	1	abc
 2	abc 	2	abc 
-3	 abc	3	 abc
\ No newline at end of file
+3	 abc	3	 abc
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-0-e41b0e9db7a9dbd0540e29df341933bc b/sql/hive/src/test/resources/golden/varchar_nested_types-0-e41b0e9db7a9dbd0540e29df341933bc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-1-87a28b0e800f051525899324a064b878 b/sql/hive/src/test/resources/golden/varchar_nested_types-1-87a28b0e800f051525899324a064b878
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-10-92f25849990eb777ac2711f9dd2e628 b/sql/hive/src/test/resources/golden/varchar_nested_types-10-92f25849990eb777ac2711f9dd2e628
deleted file mode 100644
index 05066db296756..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-10-92f25849990eb777ac2711f9dd2e628
+++ /dev/null
@@ -1 +0,0 @@
-c1                  	array<varchar(20)>  	None                
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-11-9780781a92fdd992f3cee080a8717238 b/sql/hive/src/test/resources/golden/varchar_nested_types-11-9780781a92fdd992f3cee080a8717238
deleted file mode 100644
index 05cd8fccf276c..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-11-9780781a92fdd992f3cee080a8717238
+++ /dev/null
@@ -1 +0,0 @@
-["val_0","val_0"]
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-12-e1a4006971319a352280fc52eabf449f b/sql/hive/src/test/resources/golden/varchar_nested_types-12-e1a4006971319a352280fc52eabf449f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-13-d4574217a243a7d506398a819cd0eab4 b/sql/hive/src/test/resources/golden/varchar_nested_types-13-d4574217a243a7d506398a819cd0eab4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-14-7c33a62195359bc89460ad65f6a5f763 b/sql/hive/src/test/resources/golden/varchar_nested_types-14-7c33a62195359bc89460ad65f6a5f763
deleted file mode 100644
index 1ec6828af9fd2..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-14-7c33a62195359bc89460ad65f6a5f763
+++ /dev/null
@@ -1 +0,0 @@
-c1                  	map<int,varchar(20)>	None                
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-15-d5e5cec4ce94e8928e69e3ffb8e0215 b/sql/hive/src/test/resources/golden/varchar_nested_types-15-d5e5cec4ce94e8928e69e3ffb8e0215
deleted file mode 100644
index 8f12062de4e27..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-15-d5e5cec4ce94e8928e69e3ffb8e0215
+++ /dev/null
@@ -1 +0,0 @@
-{0:"val_0"}
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-16-93811d146a429c44a2494b3aa4b2caa2 b/sql/hive/src/test/resources/golden/varchar_nested_types-16-93811d146a429c44a2494b3aa4b2caa2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-17-5724af3985c67a0cb69919c9bbce15dc b/sql/hive/src/test/resources/golden/varchar_nested_types-17-5724af3985c67a0cb69919c9bbce15dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-18-d1be2ee3765a80469837ba11eb8685e1 b/sql/hive/src/test/resources/golden/varchar_nested_types-18-d1be2ee3765a80469837ba11eb8685e1
deleted file mode 100644
index 030d9d6ec0429..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-18-d1be2ee3765a80469837ba11eb8685e1
+++ /dev/null
@@ -1 +0,0 @@
-c1                  	struct<a:int,b:varchar(20),c:string>	None                
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-19-a54f9a284228e7cdce8c34b8094f2377 b/sql/hive/src/test/resources/golden/varchar_nested_types-19-a54f9a284228e7cdce8c34b8094f2377
deleted file mode 100644
index f350c1112ac2c..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-19-a54f9a284228e7cdce8c34b8094f2377
+++ /dev/null
@@ -1 +0,0 @@
-{"a":0,"b":"val_0","c":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-2-ae69b82461acc2aa366d8b1f8626d6fb b/sql/hive/src/test/resources/golden/varchar_nested_types-2-ae69b82461acc2aa366d8b1f8626d6fb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-20-51a9279006f61097e68a52201daf6710 b/sql/hive/src/test/resources/golden/varchar_nested_types-20-51a9279006f61097e68a52201daf6710
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-21-1d304e09cc2a8b2824bfc04bdbc976ad b/sql/hive/src/test/resources/golden/varchar_nested_types-21-1d304e09cc2a8b2824bfc04bdbc976ad
deleted file mode 100644
index 030d9d6ec0429..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-21-1d304e09cc2a8b2824bfc04bdbc976ad
+++ /dev/null
@@ -1 +0,0 @@
-c1                  	struct<a:int,b:varchar(20),c:string>	None                
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-22-c2bf5acbe5ebc16eaf2388fd36a61fe8 b/sql/hive/src/test/resources/golden/varchar_nested_types-22-c2bf5acbe5ebc16eaf2388fd36a61fe8
deleted file mode 100644
index f350c1112ac2c..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-22-c2bf5acbe5ebc16eaf2388fd36a61fe8
+++ /dev/null
@@ -1 +0,0 @@
-{"a":0,"b":"val_0","c":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-23-c7ea918777b725f2790da4fa00a3efa6 b/sql/hive/src/test/resources/golden/varchar_nested_types-23-c7ea918777b725f2790da4fa00a3efa6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-24-5a69236334a3a1c4e771206cf547d730 b/sql/hive/src/test/resources/golden/varchar_nested_types-24-5a69236334a3a1c4e771206cf547d730
deleted file mode 100644
index 030d9d6ec0429..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-24-5a69236334a3a1c4e771206cf547d730
+++ /dev/null
@@ -1 +0,0 @@
-c1                  	struct<a:int,b:varchar(20),c:string>	None                
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-25-61b030bb2220a533532d871ae0e08cdb b/sql/hive/src/test/resources/golden/varchar_nested_types-25-61b030bb2220a533532d871ae0e08cdb
deleted file mode 100644
index f350c1112ac2c..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-25-61b030bb2220a533532d871ae0e08cdb
+++ /dev/null
@@ -1 +0,0 @@
-{"a":0,"b":"val_0","c":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-26-e41b0e9db7a9dbd0540e29df341933bc b/sql/hive/src/test/resources/golden/varchar_nested_types-26-e41b0e9db7a9dbd0540e29df341933bc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-27-87a28b0e800f051525899324a064b878 b/sql/hive/src/test/resources/golden/varchar_nested_types-27-87a28b0e800f051525899324a064b878
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-28-ae69b82461acc2aa366d8b1f8626d6fb b/sql/hive/src/test/resources/golden/varchar_nested_types-28-ae69b82461acc2aa366d8b1f8626d6fb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-29-39d44d19c7963a9647fd3293eef670d4 b/sql/hive/src/test/resources/golden/varchar_nested_types-29-39d44d19c7963a9647fd3293eef670d4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-3-39d44d19c7963a9647fd3293eef670d4 b/sql/hive/src/test/resources/golden/varchar_nested_types-3-39d44d19c7963a9647fd3293eef670d4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-30-162806477075d97de16dfa6f2576b751 b/sql/hive/src/test/resources/golden/varchar_nested_types-30-162806477075d97de16dfa6f2576b751
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-31-5b28e1fdb28b365ef419008a4752ed53 b/sql/hive/src/test/resources/golden/varchar_nested_types-31-5b28e1fdb28b365ef419008a4752ed53
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-4-162806477075d97de16dfa6f2576b751 b/sql/hive/src/test/resources/golden/varchar_nested_types-4-162806477075d97de16dfa6f2576b751
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-5-5b28e1fdb28b365ef419008a4752ed53 b/sql/hive/src/test/resources/golden/varchar_nested_types-5-5b28e1fdb28b365ef419008a4752ed53
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-6-a47b99c355df4aad56dceb7f81a7fd5b b/sql/hive/src/test/resources/golden/varchar_nested_types-6-a47b99c355df4aad56dceb7f81a7fd5b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-7-8c483a7a0e148ca13a292a625f8702f1 b/sql/hive/src/test/resources/golden/varchar_nested_types-7-8c483a7a0e148ca13a292a625f8702f1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-8-94309e2b4e68ab7e25f7d9656f10b352 b/sql/hive/src/test/resources/golden/varchar_nested_types-8-94309e2b4e68ab7e25f7d9656f10b352
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-9-f248796769bc7f57cf56a75034a45520 b/sql/hive/src/test/resources/golden/varchar_nested_types-9-f248796769bc7f57cf56a75034a45520
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-0-750a23ebdd77f32b555d4caba7ac5445 b/sql/hive/src/test/resources/golden/varchar_serde-0-750a23ebdd77f32b555d4caba7ac5445
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-1-6af2fafa0ffdb36eec00f28ed8e1d76d b/sql/hive/src/test/resources/golden/varchar_serde-1-6af2fafa0ffdb36eec00f28ed8e1d76d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-10-496280d9365ee601d9b68a91495d7160 b/sql/hive/src/test/resources/golden/varchar_serde-10-496280d9365ee601d9b68a91495d7160
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-11-2f4d5f574f659df2bd0fb97f2d50a36e b/sql/hive/src/test/resources/golden/varchar_serde-11-2f4d5f574f659df2bd0fb97f2d50a36e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-12-80727f22f1343407ba9200c86ed84280 b/sql/hive/src/test/resources/golden/varchar_serde-12-80727f22f1343407ba9200c86ed84280
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-13-4829e6e5822c73fb33ba3d619b4bd31e b/sql/hive/src/test/resources/golden/varchar_serde-13-4829e6e5822c73fb33ba3d619b4bd31e
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-13-4829e6e5822c73fb33ba3d619b4bd31e
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-14-4794ee93811ce3a81f64c65aed0b8b13 b/sql/hive/src/test/resources/golden/varchar_serde-14-4794ee93811ce3a81f64c65aed0b8b13
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-14-4794ee93811ce3a81f64c65aed0b8b13
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-15-6ae634e1ae1aa0730a33396bce5a6604 b/sql/hive/src/test/resources/golden/varchar_serde-15-6ae634e1ae1aa0730a33396bce5a6604
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-16-516202183287d734d35d8c7788d22652 b/sql/hive/src/test/resources/golden/varchar_serde-16-516202183287d734d35d8c7788d22652
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-17-8d4419222a728e2bbc6a464b5a0b5f7a b/sql/hive/src/test/resources/golden/varchar_serde-17-8d4419222a728e2bbc6a464b5a0b5f7a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-18-b2dfd28d6a5b1e6f4556ad19755b739d b/sql/hive/src/test/resources/golden/varchar_serde-18-b2dfd28d6a5b1e6f4556ad19755b739d
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-18-b2dfd28d6a5b1e6f4556ad19755b739d
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-19-f258df2db09e9cc0e049e85e6ad950ad b/sql/hive/src/test/resources/golden/varchar_serde-19-f258df2db09e9cc0e049e85e6ad950ad
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-19-f258df2db09e9cc0e049e85e6ad950ad
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-2-87ba3f40293b9c79fcdb3064d964232e b/sql/hive/src/test/resources/golden/varchar_serde-2-87ba3f40293b9c79fcdb3064d964232e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-20-dcbc6c836bd9b95be56dd0c9c3ea83d2 b/sql/hive/src/test/resources/golden/varchar_serde-20-dcbc6c836bd9b95be56dd0c9c3ea83d2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-21-64536c77ae91bfb6cf7f93f178c6200b b/sql/hive/src/test/resources/golden/varchar_serde-21-64536c77ae91bfb6cf7f93f178c6200b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-22-e7fea2396e28d9db1dc9dea3aacc7b7 b/sql/hive/src/test/resources/golden/varchar_serde-22-e7fea2396e28d9db1dc9dea3aacc7b7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-23-d4dfc5f1e98b84b16b2b5b1d50917b70 b/sql/hive/src/test/resources/golden/varchar_serde-23-d4dfc5f1e98b84b16b2b5b1d50917b70
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-23-d4dfc5f1e98b84b16b2b5b1d50917b70
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-24-770169b632b76cedcd6dfb87fdc46575 b/sql/hive/src/test/resources/golden/varchar_serde-24-770169b632b76cedcd6dfb87fdc46575
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-24-770169b632b76cedcd6dfb87fdc46575
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-25-3470a259b04e126c655531491787e2fc b/sql/hive/src/test/resources/golden/varchar_serde-25-3470a259b04e126c655531491787e2fc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-26-55808e190e0ab81dcdc1feb52543ad9f b/sql/hive/src/test/resources/golden/varchar_serde-26-55808e190e0ab81dcdc1feb52543ad9f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-27-8fe526fdd347c25529a383f27ad20566 b/sql/hive/src/test/resources/golden/varchar_serde-27-8fe526fdd347c25529a383f27ad20566
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-28-5e4de93349ba89a8344bb799ad60678e b/sql/hive/src/test/resources/golden/varchar_serde-28-5e4de93349ba89a8344bb799ad60678e
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-28-5e4de93349ba89a8344bb799ad60678e
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-29-42b8e7b82cd061dfd388fbf13c8f6b0c b/sql/hive/src/test/resources/golden/varchar_serde-29-42b8e7b82cd061dfd388fbf13c8f6b0c
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-29-42b8e7b82cd061dfd388fbf13c8f6b0c
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-3-d2a0276dbf0ab98a4aa5192b7c0f2709 b/sql/hive/src/test/resources/golden/varchar_serde-3-d2a0276dbf0ab98a4aa5192b7c0f2709
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-30-c7a29f8b2668b31bb34cafdc1e6f3c9a b/sql/hive/src/test/resources/golden/varchar_serde-30-c7a29f8b2668b31bb34cafdc1e6f3c9a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-31-db1e88efcd55aaef567edaa89c1c1e12 b/sql/hive/src/test/resources/golden/varchar_serde-31-db1e88efcd55aaef567edaa89c1c1e12
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-32-bffe130dfc0a3b6a072b5ebbaf092cfb b/sql/hive/src/test/resources/golden/varchar_serde-32-bffe130dfc0a3b6a072b5ebbaf092cfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-33-620729dc7661c22f1acdc425a7cf0364 b/sql/hive/src/test/resources/golden/varchar_serde-33-620729dc7661c22f1acdc425a7cf0364
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-33-620729dc7661c22f1acdc425a7cf0364
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-34-807ee73e1cd66704dd585f7e0de954d9 b/sql/hive/src/test/resources/golden/varchar_serde-34-807ee73e1cd66704dd585f7e0de954d9
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-34-807ee73e1cd66704dd585f7e0de954d9
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-35-750a23ebdd77f32b555d4caba7ac5445 b/sql/hive/src/test/resources/golden/varchar_serde-35-750a23ebdd77f32b555d4caba7ac5445
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-36-6af2fafa0ffdb36eec00f28ed8e1d76d b/sql/hive/src/test/resources/golden/varchar_serde-36-6af2fafa0ffdb36eec00f28ed8e1d76d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-37-87ba3f40293b9c79fcdb3064d964232e b/sql/hive/src/test/resources/golden/varchar_serde-37-87ba3f40293b9c79fcdb3064d964232e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-38-d2a0276dbf0ab98a4aa5192b7c0f2709 b/sql/hive/src/test/resources/golden/varchar_serde-38-d2a0276dbf0ab98a4aa5192b7c0f2709
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-39-8b5f81c27c41807c757522e257a8003f b/sql/hive/src/test/resources/golden/varchar_serde-39-8b5f81c27c41807c757522e257a8003f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-4-8b5f81c27c41807c757522e257a8003f b/sql/hive/src/test/resources/golden/varchar_serde-4-8b5f81c27c41807c757522e257a8003f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-40-787193a1679a2153c037d3e4c8192bba b/sql/hive/src/test/resources/golden/varchar_serde-40-787193a1679a2153c037d3e4c8192bba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-5-787193a1679a2153c037d3e4c8192bba b/sql/hive/src/test/resources/golden/varchar_serde-5-787193a1679a2153c037d3e4c8192bba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-6-122f15d410249b554e12eccdfa46cc43 b/sql/hive/src/test/resources/golden/varchar_serde-6-122f15d410249b554e12eccdfa46cc43
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-7-81e68559f5ba5fe6d3c590ae75fdd810 b/sql/hive/src/test/resources/golden/varchar_serde-7-81e68559f5ba5fe6d3c590ae75fdd810
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-8-3bfa13085b001c1a333cc72d5c9f4244 b/sql/hive/src/test/resources/golden/varchar_serde-8-3bfa13085b001c1a333cc72d5c9f4244
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-8-3bfa13085b001c1a333cc72d5c9f4244
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-9-ec43be632e5f74057aba54c4f562c601 b/sql/hive/src/test/resources/golden/varchar_serde-9-ec43be632e5f74057aba54c4f562c601
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-9-ec43be632e5f74057aba54c4f562c601
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-10-6ec48d5fea3e4a35275956b9b4467715 b/sql/hive/src/test/resources/golden/varchar_union1-10-6ec48d5fea3e4a35275956b9b4467715
index 173eb955a485a..52bb2199e16a3 100644
--- a/sql/hive/src/test/resources/golden/varchar_union1-10-6ec48d5fea3e4a35275956b9b4467715
+++ b/sql/hive/src/test/resources/golden/varchar_union1-10-6ec48d5fea3e4a35275956b9b4467715
@@ -1,4 +1,4 @@
 1	abc
 1	abc
 2	abc 
-3	 abc
\ No newline at end of file
+3	 abc
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-11-78f6e219b974e1fdf3663e46f57892a9 b/sql/hive/src/test/resources/golden/varchar_union1-11-78f6e219b974e1fdf3663e46f57892a9
index 173eb955a485a..52bb2199e16a3 100644
--- a/sql/hive/src/test/resources/golden/varchar_union1-11-78f6e219b974e1fdf3663e46f57892a9
+++ b/sql/hive/src/test/resources/golden/varchar_union1-11-78f6e219b974e1fdf3663e46f57892a9
@@ -1,4 +1,4 @@
 1	abc
 1	abc
 2	abc 
-3	 abc
\ No newline at end of file
+3	 abc
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-6-67e66fa14dddc17757436539eca9ef64 b/sql/hive/src/test/resources/golden/varchar_union1-6-67e66fa14dddc17757436539eca9ef64
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-2-1f00d9cc34225cb358a54c6d4008cb47 b/sql/hive/src/test/resources/golden/varchar_union1-6-f338f341c5f86d0a44cabfb4f7bddc3b
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into6-2-1f00d9cc34225cb358a54c6d4008cb47
rename to sql/hive/src/test/resources/golden/varchar_union1-6-f338f341c5f86d0a44cabfb4f7bddc3b
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-7-48766d09c5ed1b6abe9ce0b8996adf36 b/sql/hive/src/test/resources/golden/varchar_union1-7-48766d09c5ed1b6abe9ce0b8996adf36
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-3-14d4d62ee9a5da9fbd7c3e0c021fdc0d b/sql/hive/src/test/resources/golden/varchar_union1-7-ea0d1fbae997b50dc34f7610480bbe29
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into6-3-14d4d62ee9a5da9fbd7c3e0c021fdc0d
rename to sql/hive/src/test/resources/golden/varchar_union1-7-ea0d1fbae997b50dc34f7610480bbe29
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-8-3a6c2e0125fb0c18f5f783b5d3c409d7 b/sql/hive/src/test/resources/golden/varchar_union1-8-3a6c2e0125fb0c18f5f783b5d3c409d7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-4-5f12794e99c74355a23d2fda9c7c170f b/sql/hive/src/test/resources/golden/varchar_union1-8-f3be9a2498927d692356c2cf871d25bf
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into6-4-5f12794e99c74355a23d2fda9c7c170f
rename to sql/hive/src/test/resources/golden/varchar_union1-8-f3be9a2498927d692356c2cf871d25bf
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-9-b9455ffec62df97cfec63204ce02a110 b/sql/hive/src/test/resources/golden/varchar_union1-9-b9455ffec62df97cfec63204ce02a110
index 173eb955a485a..52bb2199e16a3 100644
--- a/sql/hive/src/test/resources/golden/varchar_union1-9-b9455ffec62df97cfec63204ce02a110
+++ b/sql/hive/src/test/resources/golden/varchar_union1-9-b9455ffec62df97cfec63204ce02a110
@@ -1,4 +1,4 @@
 1	abc
 1	abc
 2	abc 
-3	 abc
\ No newline at end of file
+3	 abc
diff --git a/sql/hive/src/test/resources/golden/create table as with db name-1-417609d2bb67ba26de38e92ad834008f b/sql/hive/src/test/resources/golden/view-16-3077fd708f97a03d4151a1a30e4308d8
similarity index 100%
rename from sql/hive/src/test/resources/golden/create table as with db name-1-417609d2bb67ba26de38e92ad834008f
rename to sql/hive/src/test/resources/golden/view-16-3077fd708f97a03d4151a1a30e4308d8
diff --git a/sql/hive/src/test/resources/golden/view-17-544b822e12afa24d2c64d6149e19c12c b/sql/hive/src/test/resources/golden/view-17-544b822e12afa24d2c64d6149e19c12c
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-17-544b822e12afa24d2c64d6149e19c12c
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/view-18-cd6a123a71769b082000669292e57add b/sql/hive/src/test/resources/golden/view-18-cd6a123a71769b082000669292e57add
new file mode 100644
index 0000000000000..68782c7a277bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-18-cd6a123a71769b082000669292e57add
@@ -0,0 +1,1028 @@
+238	val_238	238
+238	val_238	238
+86	val_86	86
+311	val_311	311
+311	val_311	311
+311	val_311	311
+27	val_27	27
+165	val_165	165
+165	val_165	165
+409	val_409	409
+409	val_409	409
+409	val_409	409
+255	val_255	255
+255	val_255	255
+278	val_278	278
+278	val_278	278
+98	val_98	98
+98	val_98	98
+484	val_484	484
+265	val_265	265
+265	val_265	265
+193	val_193	193
+193	val_193	193
+193	val_193	193
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+150	val_150	150
+273	val_273	273
+273	val_273	273
+273	val_273	273
+224	val_224	224
+224	val_224	224
+369	val_369	369
+369	val_369	369
+369	val_369	369
+66	val_66	66
+128	val_128	128
+128	val_128	128
+128	val_128	128
+213	val_213	213
+213	val_213	213
+146	val_146	146
+146	val_146	146
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+429	val_429	429
+429	val_429	429
+374	val_374	374
+152	val_152	152
+152	val_152	152
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+145	val_145	145
+495	val_495	495
+37	val_37	37
+37	val_37	37
+327	val_327	327
+327	val_327	327
+327	val_327	327
+281	val_281	281
+281	val_281	281
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+209	val_209	209
+209	val_209	209
+15	val_15	15
+15	val_15	15
+82	val_82	82
+403	val_403	403
+403	val_403	403
+403	val_403	403
+166	val_166	166
+417	val_417	417
+417	val_417	417
+417	val_417	417
+430	val_430	430
+430	val_430	430
+430	val_430	430
+252	val_252	252
+292	val_292	292
+219	val_219	219
+219	val_219	219
+287	val_287	287
+153	val_153	153
+193	val_193	193
+193	val_193	193
+193	val_193	193
+338	val_338	338
+446	val_446	446
+459	val_459	459
+459	val_459	459
+394	val_394	394
+237	val_237	237
+237	val_237	237
+482	val_482	482
+174	val_174	174
+174	val_174	174
+413	val_413	413
+413	val_413	413
+494	val_494	494
+207	val_207	207
+207	val_207	207
+199	val_199	199
+199	val_199	199
+199	val_199	199
+466	val_466	466
+466	val_466	466
+466	val_466	466
+208	val_208	208
+208	val_208	208
+208	val_208	208
+174	val_174	174
+174	val_174	174
+399	val_399	399
+399	val_399	399
+396	val_396	396
+396	val_396	396
+396	val_396	396
+247	val_247	247
+417	val_417	417
+417	val_417	417
+417	val_417	417
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+162	val_162	162
+377	val_377	377
+397	val_397	397
+397	val_397	397
+309	val_309	309
+309	val_309	309
+365	val_365	365
+266	val_266	266
+439	val_439	439
+439	val_439	439
+342	val_342	342
+342	val_342	342
+367	val_367	367
+367	val_367	367
+325	val_325	325
+325	val_325	325
+167	val_167	167
+167	val_167	167
+167	val_167	167
+195	val_195	195
+195	val_195	195
+475	val_475	475
+17	val_17	17
+113	val_113	113
+113	val_113	113
+155	val_155	155
+203	val_203	203
+203	val_203	203
+339	val_339	339
+0	val_0	0
+0	val_0	0
+0	val_0	0
+455	val_455	455
+128	val_128	128
+128	val_128	128
+128	val_128	128
+311	val_311	311
+311	val_311	311
+311	val_311	311
+316	val_316	316
+316	val_316	316
+316	val_316	316
+57	val_57	57
+302	val_302	302
+205	val_205	205
+205	val_205	205
+149	val_149	149
+149	val_149	149
+438	val_438	438
+438	val_438	438
+438	val_438	438
+345	val_345	345
+129	val_129	129
+129	val_129	129
+170	val_170	170
+20	val_20	20
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+157	val_157	157
+378	val_378	378
+221	val_221	221
+221	val_221	221
+92	val_92	92
+111	val_111	111
+47	val_47	47
+72	val_72	72
+72	val_72	72
+4	val_4	4
+280	val_280	280
+280	val_280	280
+35	val_35	35
+35	val_35	35
+35	val_35	35
+427	val_427	427
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+208	val_208	208
+208	val_208	208
+208	val_208	208
+356	val_356	356
+399	val_399	399
+399	val_399	399
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+382	val_382	382
+382	val_382	382
+498	val_498	498
+498	val_498	498
+498	val_498	498
+125	val_125	125
+125	val_125	125
+386	val_386	386
+437	val_437	437
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+192	val_192	192
+286	val_286	286
+187	val_187	187
+187	val_187	187
+187	val_187	187
+176	val_176	176
+176	val_176	176
+54	val_54	54
+459	val_459	459
+459	val_459	459
+51	val_51	51
+51	val_51	51
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+103	val_103	103
+103	val_103	103
+239	val_239	239
+239	val_239	239
+213	val_213	213
+213	val_213	213
+216	val_216	216
+216	val_216	216
+430	val_430	430
+430	val_430	430
+430	val_430	430
+278	val_278	278
+278	val_278	278
+176	val_176	176
+176	val_176	176
+289	val_289	289
+221	val_221	221
+221	val_221	221
+65	val_65	65
+318	val_318	318
+318	val_318	318
+318	val_318	318
+332	val_332	332
+311	val_311	311
+311	val_311	311
+311	val_311	311
+275	val_275	275
+137	val_137	137
+137	val_137	137
+241	val_241	241
+83	val_83	83
+83	val_83	83
+333	val_333	333
+333	val_333	333
+180	val_180	180
+284	val_284	284
+12	val_12	12
+12	val_12	12
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+181	val_181	181
+67	val_67	67
+67	val_67	67
+260	val_260	260
+404	val_404	404
+404	val_404	404
+384	val_384	384
+384	val_384	384
+384	val_384	384
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+353	val_353	353
+353	val_353	353
+373	val_373	373
+272	val_272	272
+272	val_272	272
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+217	val_217	217
+217	val_217	217
+84	val_84	84
+84	val_84	84
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+466	val_466	466
+466	val_466	466
+466	val_466	466
+58	val_58	58
+58	val_58	58
+8	val_8	8
+411	val_411	411
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+208	val_208	208
+208	val_208	208
+208	val_208	208
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+24	val_24	24
+24	val_24	24
+463	val_463	463
+463	val_463	463
+431	val_431	431
+431	val_431	431
+431	val_431	431
+179	val_179	179
+179	val_179	179
+172	val_172	172
+172	val_172	172
+42	val_42	42
+42	val_42	42
+129	val_129	129
+129	val_129	129
+158	val_158	158
+119	val_119	119
+119	val_119	119
+119	val_119	119
+496	val_496	496
+0	val_0	0
+0	val_0	0
+0	val_0	0
+322	val_322	322
+322	val_322	322
+197	val_197	197
+197	val_197	197
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+393	val_393	393
+454	val_454	454
+454	val_454	454
+454	val_454	454
+100	val_100	100
+100	val_100	100
+298	val_298	298
+298	val_298	298
+298	val_298	298
+199	val_199	199
+199	val_199	199
+199	val_199	199
+191	val_191	191
+191	val_191	191
+418	val_418	418
+96	val_96	96
+26	val_26	26
+26	val_26	26
+165	val_165	165
+165	val_165	165
+327	val_327	327
+327	val_327	327
+327	val_327	327
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+205	val_205	205
+205	val_205	205
+120	val_120	120
+120	val_120	120
+131	val_131	131
+51	val_51	51
+51	val_51	51
+404	val_404	404
+404	val_404	404
+43	val_43	43
+436	val_436	436
+156	val_156	156
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+308	val_308	308
+95	val_95	95
+95	val_95	95
+196	val_196	196
+288	val_288	288
+288	val_288	288
+481	val_481	481
+457	val_457	457
+98	val_98	98
+98	val_98	98
+282	val_282	282
+282	val_282	282
+197	val_197	197
+197	val_197	197
+187	val_187	187
+187	val_187	187
+187	val_187	187
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+409	val_409	409
+409	val_409	409
+409	val_409	409
+470	val_470	470
+137	val_137	137
+137	val_137	137
+369	val_369	369
+369	val_369	369
+369	val_369	369
+316	val_316	316
+316	val_316	316
+316	val_316	316
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+413	val_413	413
+413	val_413	413
+85	val_85	85
+77	val_77	77
+0	val_0	0
+0	val_0	0
+0	val_0	0
+490	val_490	490
+87	val_87	87
+364	val_364	364
+179	val_179	179
+179	val_179	179
+118	val_118	118
+118	val_118	118
+134	val_134	134
+134	val_134	134
+395	val_395	395
+395	val_395	395
+282	val_282	282
+282	val_282	282
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+238	val_238	238
+238	val_238	238
+419	val_419	419
+15	val_15	15
+15	val_15	15
+118	val_118	118
+118	val_118	118
+72	val_72	72
+72	val_72	72
+90	val_90	90
+90	val_90	90
+90	val_90	90
+307	val_307	307
+307	val_307	307
+19	val_19	19
+435	val_435	435
+10	val_10	10
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+273	val_273	273
+273	val_273	273
+273	val_273	273
+306	val_306	306
+224	val_224	224
+224	val_224	224
+309	val_309	309
+309	val_309	309
+389	val_389	389
+327	val_327	327
+327	val_327	327
+327	val_327	327
+242	val_242	242
+242	val_242	242
+369	val_369	369
+369	val_369	369
+369	val_369	369
+392	val_392	392
+272	val_272	272
+272	val_272	272
+331	val_331	331
+331	val_331	331
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+242	val_242	242
+242	val_242	242
+452	val_452	452
+177	val_177	177
+226	val_226	226
+5	val_5	5
+5	val_5	5
+5	val_5	5
+497	val_497	497
+402	val_402	402
+396	val_396	396
+396	val_396	396
+396	val_396	396
+317	val_317	317
+317	val_317	317
+395	val_395	395
+395	val_395	395
+58	val_58	58
+58	val_58	58
+35	val_35	35
+35	val_35	35
+35	val_35	35
+336	val_336	336
+95	val_95	95
+95	val_95	95
+11	val_11	11
+168	val_168	168
+34	val_34	34
+229	val_229	229
+229	val_229	229
+233	val_233	233
+233	val_233	233
+143	val_143	143
+472	val_472	472
+322	val_322	322
+322	val_322	322
+498	val_498	498
+498	val_498	498
+498	val_498	498
+160	val_160	160
+195	val_195	195
+195	val_195	195
+42	val_42	42
+42	val_42	42
+321	val_321	321
+321	val_321	321
+430	val_430	430
+430	val_430	430
+430	val_430	430
+119	val_119	119
+119	val_119	119
+119	val_119	119
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+458	val_458	458
+458	val_458	458
+78	val_78	78
+76	val_76	76
+76	val_76	76
+41	val_41	41
+223	val_223	223
+223	val_223	223
+492	val_492	492
+492	val_492	492
+149	val_149	149
+149	val_149	149
+449	val_449	449
+218	val_218	218
+228	val_228	228
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+453	val_453	453
+30	val_30	30
+209	val_209	209
+209	val_209	209
+64	val_64	64
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+76	val_76	76
+76	val_76	76
+74	val_74	74
+342	val_342	342
+342	val_342	342
+69	val_69	69
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+33	val_33	33
+368	val_368	368
+103	val_103	103
+103	val_103	103
+296	val_296	296
+113	val_113	113
+113	val_113	113
+216	val_216	216
+216	val_216	216
+367	val_367	367
+367	val_367	367
+344	val_344	344
+344	val_344	344
+167	val_167	167
+167	val_167	167
+167	val_167	167
+274	val_274	274
+219	val_219	219
+219	val_219	219
+239	val_239	239
+239	val_239	239
+485	val_485	485
+116	val_116	116
+223	val_223	223
+223	val_223	223
+256	val_256	256
+256	val_256	256
+263	val_263	263
+70	val_70	70
+70	val_70	70
+70	val_70	70
+487	val_487	487
+480	val_480	480
+480	val_480	480
+480	val_480	480
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+288	val_288	288
+288	val_288	288
+191	val_191	191
+191	val_191	191
+5	val_5	5
+5	val_5	5
+5	val_5	5
+244	val_244	244
+438	val_438	438
+438	val_438	438
+438	val_438	438
+128	val_128	128
+128	val_128	128
+128	val_128	128
+467	val_467	467
+432	val_432	432
+202	val_202	202
+316	val_316	316
+316	val_316	316
+316	val_316	316
+229	val_229	229
+229	val_229	229
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+463	val_463	463
+463	val_463	463
+280	val_280	280
+280	val_280	280
+2	val_2	2
+35	val_35	35
+35	val_35	35
+35	val_35	35
+283	val_283	283
+331	val_331	331
+331	val_331	331
+235	val_235	235
+80	val_80	80
+44	val_44	44
+193	val_193	193
+193	val_193	193
+193	val_193	193
+321	val_321	321
+321	val_321	321
+335	val_335	335
+104	val_104	104
+104	val_104	104
+466	val_466	466
+466	val_466	466
+466	val_466	466
+366	val_366	366
+175	val_175	175
+175	val_175	175
+403	val_403	403
+403	val_403	403
+403	val_403	403
+483	val_483	483
+53	val_53	53
+105	val_105	105
+257	val_257	257
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+409	val_409	409
+409	val_409	409
+409	val_409	409
+190	val_190	190
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+114	val_114	114
+258	val_258	258
+90	val_90	90
+90	val_90	90
+90	val_90	90
+203	val_203	203
+203	val_203	203
+262	val_262	262
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+424	val_424	424
+424	val_424	424
+12	val_12	12
+12	val_12	12
+396	val_396	396
+396	val_396	396
+396	val_396	396
+201	val_201	201
+217	val_217	217
+217	val_217	217
+164	val_164	164
+164	val_164	164
+431	val_431	431
+431	val_431	431
+431	val_431	431
+454	val_454	454
+454	val_454	454
+454	val_454	454
+478	val_478	478
+478	val_478	478
+298	val_298	298
+298	val_298	298
+298	val_298	298
+125	val_125	125
+125	val_125	125
+431	val_431	431
+431	val_431	431
+431	val_431	431
+164	val_164	164
+164	val_164	164
+424	val_424	424
+424	val_424	424
+187	val_187	187
+187	val_187	187
+187	val_187	187
+382	val_382	382
+382	val_382	382
+5	val_5	5
+5	val_5	5
+5	val_5	5
+70	val_70	70
+70	val_70	70
+70	val_70	70
+397	val_397	397
+397	val_397	397
+480	val_480	480
+480	val_480	480
+480	val_480	480
+291	val_291	291
+24	val_24	24
+24	val_24	24
+351	val_351	351
+255	val_255	255
+255	val_255	255
+104	val_104	104
+104	val_104	104
+70	val_70	70
+70	val_70	70
+70	val_70	70
+163	val_163	163
+438	val_438	438
+438	val_438	438
+438	val_438	438
+119	val_119	119
+119	val_119	119
+119	val_119	119
+414	val_414	414
+414	val_414	414
+200	val_200	200
+200	val_200	200
+491	val_491	491
+237	val_237	237
+237	val_237	237
+439	val_439	439
+439	val_439	439
+360	val_360	360
+248	val_248	248
+479	val_479	479
+305	val_305	305
+417	val_417	417
+417	val_417	417
+417	val_417	417
+199	val_199	199
+199	val_199	199
+199	val_199	199
+444	val_444	444
+120	val_120	120
+120	val_120	120
+429	val_429	429
+429	val_429	429
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+443	val_443	443
+323	val_323	323
+325	val_325	325
+325	val_325	325
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+478	val_478	478
+478	val_478	478
+178	val_178	178
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+310	val_310	310
+317	val_317	317
+317	val_317	317
+333	val_333	333
+333	val_333	333
+493	val_493	493
+460	val_460	460
+207	val_207	207
+207	val_207	207
+249	val_249	249
+265	val_265	265
+265	val_265	265
+480	val_480	480
+480	val_480	480
+480	val_480	480
+83	val_83	83
+83	val_83	83
+136	val_136	136
+353	val_353	353
+353	val_353	353
+172	val_172	172
+172	val_172	172
+214	val_214	214
+462	val_462	462
+462	val_462	462
+233	val_233	233
+233	val_233	233
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+133	val_133	133
+175	val_175	175
+175	val_175	175
+189	val_189	189
+454	val_454	454
+454	val_454	454
+454	val_454	454
+375	val_375	375
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+421	val_421	421
+407	val_407	407
+384	val_384	384
+384	val_384	384
+384	val_384	384
+256	val_256	256
+256	val_256	256
+26	val_26	26
+26	val_26	26
+134	val_134	134
+134	val_134	134
+67	val_67	67
+67	val_67	67
+384	val_384	384
+384	val_384	384
+384	val_384	384
+379	val_379	379
+18	val_18	18
+18	val_18	18
+462	val_462	462
+462	val_462	462
+492	val_492	492
+492	val_492	492
+100	val_100	100
+100	val_100	100
+298	val_298	298
+298	val_298	298
+298	val_298	298
+9	val_9	9
+341	val_341	341
+498	val_498	498
+498	val_498	498
+498	val_498	498
+146	val_146	146
+146	val_146	146
+458	val_458	458
+458	val_458	458
+362	val_362	362
+186	val_186	186
+285	val_285	285
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+167	val_167	167
+167	val_167	167
+167	val_167	167
+18	val_18	18
+18	val_18	18
+273	val_273	273
+273	val_273	273
+273	val_273	273
+183	val_183	183
+281	val_281	281
+281	val_281	281
+344	val_344	344
+344	val_344	344
+97	val_97	97
+97	val_97	97
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+315	val_315	315
+84	val_84	84
+84	val_84	84
+28	val_28	28
+37	val_37	37
+37	val_37	37
+448	val_448	448
+152	val_152	152
+152	val_152	152
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+307	val_307	307
+307	val_307	307
+194	val_194	194
+414	val_414	414
+414	val_414	414
+477	val_477	477
+222	val_222	222
+126	val_126	126
+90	val_90	90
+90	val_90	90
+90	val_90	90
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+403	val_403	403
+403	val_403	403
+403	val_403	403
+400	val_400	400
+200	val_200	200
+200	val_200	200
+97	val_97	97
+97	val_97	97
diff --git a/sql/hive/src/test/resources/golden/view-19-4a8f62f10a8a7b1f6e0d1c15481590a6 b/sql/hive/src/test/resources/golden/view-19-4a8f62f10a8a7b1f6e0d1c15481590a6
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-19-4a8f62f10a8a7b1f6e0d1c15481590a6
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/view-20-92aa822cb9dc29a1d3ad37d3ebaa344a b/sql/hive/src/test/resources/golden/view-20-92aa822cb9dc29a1d3ad37d3ebaa344a
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-20-92aa822cb9dc29a1d3ad37d3ebaa344a
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/view-21-4fa118ed540dfe42748bbed1e7fb513d b/sql/hive/src/test/resources/golden/view-21-4fa118ed540dfe42748bbed1e7fb513d
new file mode 100644
index 0000000000000..68782c7a277bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-21-4fa118ed540dfe42748bbed1e7fb513d
@@ -0,0 +1,1028 @@
+238	val_238	238
+238	val_238	238
+86	val_86	86
+311	val_311	311
+311	val_311	311
+311	val_311	311
+27	val_27	27
+165	val_165	165
+165	val_165	165
+409	val_409	409
+409	val_409	409
+409	val_409	409
+255	val_255	255
+255	val_255	255
+278	val_278	278
+278	val_278	278
+98	val_98	98
+98	val_98	98
+484	val_484	484
+265	val_265	265
+265	val_265	265
+193	val_193	193
+193	val_193	193
+193	val_193	193
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+150	val_150	150
+273	val_273	273
+273	val_273	273
+273	val_273	273
+224	val_224	224
+224	val_224	224
+369	val_369	369
+369	val_369	369
+369	val_369	369
+66	val_66	66
+128	val_128	128
+128	val_128	128
+128	val_128	128
+213	val_213	213
+213	val_213	213
+146	val_146	146
+146	val_146	146
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+429	val_429	429
+429	val_429	429
+374	val_374	374
+152	val_152	152
+152	val_152	152
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+145	val_145	145
+495	val_495	495
+37	val_37	37
+37	val_37	37
+327	val_327	327
+327	val_327	327
+327	val_327	327
+281	val_281	281
+281	val_281	281
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+209	val_209	209
+209	val_209	209
+15	val_15	15
+15	val_15	15
+82	val_82	82
+403	val_403	403
+403	val_403	403
+403	val_403	403
+166	val_166	166
+417	val_417	417
+417	val_417	417
+417	val_417	417
+430	val_430	430
+430	val_430	430
+430	val_430	430
+252	val_252	252
+292	val_292	292
+219	val_219	219
+219	val_219	219
+287	val_287	287
+153	val_153	153
+193	val_193	193
+193	val_193	193
+193	val_193	193
+338	val_338	338
+446	val_446	446
+459	val_459	459
+459	val_459	459
+394	val_394	394
+237	val_237	237
+237	val_237	237
+482	val_482	482
+174	val_174	174
+174	val_174	174
+413	val_413	413
+413	val_413	413
+494	val_494	494
+207	val_207	207
+207	val_207	207
+199	val_199	199
+199	val_199	199
+199	val_199	199
+466	val_466	466
+466	val_466	466
+466	val_466	466
+208	val_208	208
+208	val_208	208
+208	val_208	208
+174	val_174	174
+174	val_174	174
+399	val_399	399
+399	val_399	399
+396	val_396	396
+396	val_396	396
+396	val_396	396
+247	val_247	247
+417	val_417	417
+417	val_417	417
+417	val_417	417
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+162	val_162	162
+377	val_377	377
+397	val_397	397
+397	val_397	397
+309	val_309	309
+309	val_309	309
+365	val_365	365
+266	val_266	266
+439	val_439	439
+439	val_439	439
+342	val_342	342
+342	val_342	342
+367	val_367	367
+367	val_367	367
+325	val_325	325
+325	val_325	325
+167	val_167	167
+167	val_167	167
+167	val_167	167
+195	val_195	195
+195	val_195	195
+475	val_475	475
+17	val_17	17
+113	val_113	113
+113	val_113	113
+155	val_155	155
+203	val_203	203
+203	val_203	203
+339	val_339	339
+0	val_0	0
+0	val_0	0
+0	val_0	0
+455	val_455	455
+128	val_128	128
+128	val_128	128
+128	val_128	128
+311	val_311	311
+311	val_311	311
+311	val_311	311
+316	val_316	316
+316	val_316	316
+316	val_316	316
+57	val_57	57
+302	val_302	302
+205	val_205	205
+205	val_205	205
+149	val_149	149
+149	val_149	149
+438	val_438	438
+438	val_438	438
+438	val_438	438
+345	val_345	345
+129	val_129	129
+129	val_129	129
+170	val_170	170
+20	val_20	20
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+157	val_157	157
+378	val_378	378
+221	val_221	221
+221	val_221	221
+92	val_92	92
+111	val_111	111
+47	val_47	47
+72	val_72	72
+72	val_72	72
+4	val_4	4
+280	val_280	280
+280	val_280	280
+35	val_35	35
+35	val_35	35
+35	val_35	35
+427	val_427	427
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+208	val_208	208
+208	val_208	208
+208	val_208	208
+356	val_356	356
+399	val_399	399
+399	val_399	399
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+382	val_382	382
+382	val_382	382
+498	val_498	498
+498	val_498	498
+498	val_498	498
+125	val_125	125
+125	val_125	125
+386	val_386	386
+437	val_437	437
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+192	val_192	192
+286	val_286	286
+187	val_187	187
+187	val_187	187
+187	val_187	187
+176	val_176	176
+176	val_176	176
+54	val_54	54
+459	val_459	459
+459	val_459	459
+51	val_51	51
+51	val_51	51
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+103	val_103	103
+103	val_103	103
+239	val_239	239
+239	val_239	239
+213	val_213	213
+213	val_213	213
+216	val_216	216
+216	val_216	216
+430	val_430	430
+430	val_430	430
+430	val_430	430
+278	val_278	278
+278	val_278	278
+176	val_176	176
+176	val_176	176
+289	val_289	289
+221	val_221	221
+221	val_221	221
+65	val_65	65
+318	val_318	318
+318	val_318	318
+318	val_318	318
+332	val_332	332
+311	val_311	311
+311	val_311	311
+311	val_311	311
+275	val_275	275
+137	val_137	137
+137	val_137	137
+241	val_241	241
+83	val_83	83
+83	val_83	83
+333	val_333	333
+333	val_333	333
+180	val_180	180
+284	val_284	284
+12	val_12	12
+12	val_12	12
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+181	val_181	181
+67	val_67	67
+67	val_67	67
+260	val_260	260
+404	val_404	404
+404	val_404	404
+384	val_384	384
+384	val_384	384
+384	val_384	384
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+353	val_353	353
+353	val_353	353
+373	val_373	373
+272	val_272	272
+272	val_272	272
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+217	val_217	217
+217	val_217	217
+84	val_84	84
+84	val_84	84
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+466	val_466	466
+466	val_466	466
+466	val_466	466
+58	val_58	58
+58	val_58	58
+8	val_8	8
+411	val_411	411
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+208	val_208	208
+208	val_208	208
+208	val_208	208
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+24	val_24	24
+24	val_24	24
+463	val_463	463
+463	val_463	463
+431	val_431	431
+431	val_431	431
+431	val_431	431
+179	val_179	179
+179	val_179	179
+172	val_172	172
+172	val_172	172
+42	val_42	42
+42	val_42	42
+129	val_129	129
+129	val_129	129
+158	val_158	158
+119	val_119	119
+119	val_119	119
+119	val_119	119
+496	val_496	496
+0	val_0	0
+0	val_0	0
+0	val_0	0
+322	val_322	322
+322	val_322	322
+197	val_197	197
+197	val_197	197
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+393	val_393	393
+454	val_454	454
+454	val_454	454
+454	val_454	454
+100	val_100	100
+100	val_100	100
+298	val_298	298
+298	val_298	298
+298	val_298	298
+199	val_199	199
+199	val_199	199
+199	val_199	199
+191	val_191	191
+191	val_191	191
+418	val_418	418
+96	val_96	96
+26	val_26	26
+26	val_26	26
+165	val_165	165
+165	val_165	165
+327	val_327	327
+327	val_327	327
+327	val_327	327
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+205	val_205	205
+205	val_205	205
+120	val_120	120
+120	val_120	120
+131	val_131	131
+51	val_51	51
+51	val_51	51
+404	val_404	404
+404	val_404	404
+43	val_43	43
+436	val_436	436
+156	val_156	156
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+308	val_308	308
+95	val_95	95
+95	val_95	95
+196	val_196	196
+288	val_288	288
+288	val_288	288
+481	val_481	481
+457	val_457	457
+98	val_98	98
+98	val_98	98
+282	val_282	282
+282	val_282	282
+197	val_197	197
+197	val_197	197
+187	val_187	187
+187	val_187	187
+187	val_187	187
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+409	val_409	409
+409	val_409	409
+409	val_409	409
+470	val_470	470
+137	val_137	137
+137	val_137	137
+369	val_369	369
+369	val_369	369
+369	val_369	369
+316	val_316	316
+316	val_316	316
+316	val_316	316
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+413	val_413	413
+413	val_413	413
+85	val_85	85
+77	val_77	77
+0	val_0	0
+0	val_0	0
+0	val_0	0
+490	val_490	490
+87	val_87	87
+364	val_364	364
+179	val_179	179
+179	val_179	179
+118	val_118	118
+118	val_118	118
+134	val_134	134
+134	val_134	134
+395	val_395	395
+395	val_395	395
+282	val_282	282
+282	val_282	282
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+238	val_238	238
+238	val_238	238
+419	val_419	419
+15	val_15	15
+15	val_15	15
+118	val_118	118
+118	val_118	118
+72	val_72	72
+72	val_72	72
+90	val_90	90
+90	val_90	90
+90	val_90	90
+307	val_307	307
+307	val_307	307
+19	val_19	19
+435	val_435	435
+10	val_10	10
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+273	val_273	273
+273	val_273	273
+273	val_273	273
+306	val_306	306
+224	val_224	224
+224	val_224	224
+309	val_309	309
+309	val_309	309
+389	val_389	389
+327	val_327	327
+327	val_327	327
+327	val_327	327
+242	val_242	242
+242	val_242	242
+369	val_369	369
+369	val_369	369
+369	val_369	369
+392	val_392	392
+272	val_272	272
+272	val_272	272
+331	val_331	331
+331	val_331	331
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+242	val_242	242
+242	val_242	242
+452	val_452	452
+177	val_177	177
+226	val_226	226
+5	val_5	5
+5	val_5	5
+5	val_5	5
+497	val_497	497
+402	val_402	402
+396	val_396	396
+396	val_396	396
+396	val_396	396
+317	val_317	317
+317	val_317	317
+395	val_395	395
+395	val_395	395
+58	val_58	58
+58	val_58	58
+35	val_35	35
+35	val_35	35
+35	val_35	35
+336	val_336	336
+95	val_95	95
+95	val_95	95
+11	val_11	11
+168	val_168	168
+34	val_34	34
+229	val_229	229
+229	val_229	229
+233	val_233	233
+233	val_233	233
+143	val_143	143
+472	val_472	472
+322	val_322	322
+322	val_322	322
+498	val_498	498
+498	val_498	498
+498	val_498	498
+160	val_160	160
+195	val_195	195
+195	val_195	195
+42	val_42	42
+42	val_42	42
+321	val_321	321
+321	val_321	321
+430	val_430	430
+430	val_430	430
+430	val_430	430
+119	val_119	119
+119	val_119	119
+119	val_119	119
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+458	val_458	458
+458	val_458	458
+78	val_78	78
+76	val_76	76
+76	val_76	76
+41	val_41	41
+223	val_223	223
+223	val_223	223
+492	val_492	492
+492	val_492	492
+149	val_149	149
+149	val_149	149
+449	val_449	449
+218	val_218	218
+228	val_228	228
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+453	val_453	453
+30	val_30	30
+209	val_209	209
+209	val_209	209
+64	val_64	64
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+76	val_76	76
+76	val_76	76
+74	val_74	74
+342	val_342	342
+342	val_342	342
+69	val_69	69
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+33	val_33	33
+368	val_368	368
+103	val_103	103
+103	val_103	103
+296	val_296	296
+113	val_113	113
+113	val_113	113
+216	val_216	216
+216	val_216	216
+367	val_367	367
+367	val_367	367
+344	val_344	344
+344	val_344	344
+167	val_167	167
+167	val_167	167
+167	val_167	167
+274	val_274	274
+219	val_219	219
+219	val_219	219
+239	val_239	239
+239	val_239	239
+485	val_485	485
+116	val_116	116
+223	val_223	223
+223	val_223	223
+256	val_256	256
+256	val_256	256
+263	val_263	263
+70	val_70	70
+70	val_70	70
+70	val_70	70
+487	val_487	487
+480	val_480	480
+480	val_480	480
+480	val_480	480
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+288	val_288	288
+288	val_288	288
+191	val_191	191
+191	val_191	191
+5	val_5	5
+5	val_5	5
+5	val_5	5
+244	val_244	244
+438	val_438	438
+438	val_438	438
+438	val_438	438
+128	val_128	128
+128	val_128	128
+128	val_128	128
+467	val_467	467
+432	val_432	432
+202	val_202	202
+316	val_316	316
+316	val_316	316
+316	val_316	316
+229	val_229	229
+229	val_229	229
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+463	val_463	463
+463	val_463	463
+280	val_280	280
+280	val_280	280
+2	val_2	2
+35	val_35	35
+35	val_35	35
+35	val_35	35
+283	val_283	283
+331	val_331	331
+331	val_331	331
+235	val_235	235
+80	val_80	80
+44	val_44	44
+193	val_193	193
+193	val_193	193
+193	val_193	193
+321	val_321	321
+321	val_321	321
+335	val_335	335
+104	val_104	104
+104	val_104	104
+466	val_466	466
+466	val_466	466
+466	val_466	466
+366	val_366	366
+175	val_175	175
+175	val_175	175
+403	val_403	403
+403	val_403	403
+403	val_403	403
+483	val_483	483
+53	val_53	53
+105	val_105	105
+257	val_257	257
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+409	val_409	409
+409	val_409	409
+409	val_409	409
+190	val_190	190
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+114	val_114	114
+258	val_258	258
+90	val_90	90
+90	val_90	90
+90	val_90	90
+203	val_203	203
+203	val_203	203
+262	val_262	262
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+424	val_424	424
+424	val_424	424
+12	val_12	12
+12	val_12	12
+396	val_396	396
+396	val_396	396
+396	val_396	396
+201	val_201	201
+217	val_217	217
+217	val_217	217
+164	val_164	164
+164	val_164	164
+431	val_431	431
+431	val_431	431
+431	val_431	431
+454	val_454	454
+454	val_454	454
+454	val_454	454
+478	val_478	478
+478	val_478	478
+298	val_298	298
+298	val_298	298
+298	val_298	298
+125	val_125	125
+125	val_125	125
+431	val_431	431
+431	val_431	431
+431	val_431	431
+164	val_164	164
+164	val_164	164
+424	val_424	424
+424	val_424	424
+187	val_187	187
+187	val_187	187
+187	val_187	187
+382	val_382	382
+382	val_382	382
+5	val_5	5
+5	val_5	5
+5	val_5	5
+70	val_70	70
+70	val_70	70
+70	val_70	70
+397	val_397	397
+397	val_397	397
+480	val_480	480
+480	val_480	480
+480	val_480	480
+291	val_291	291
+24	val_24	24
+24	val_24	24
+351	val_351	351
+255	val_255	255
+255	val_255	255
+104	val_104	104
+104	val_104	104
+70	val_70	70
+70	val_70	70
+70	val_70	70
+163	val_163	163
+438	val_438	438
+438	val_438	438
+438	val_438	438
+119	val_119	119
+119	val_119	119
+119	val_119	119
+414	val_414	414
+414	val_414	414
+200	val_200	200
+200	val_200	200
+491	val_491	491
+237	val_237	237
+237	val_237	237
+439	val_439	439
+439	val_439	439
+360	val_360	360
+248	val_248	248
+479	val_479	479
+305	val_305	305
+417	val_417	417
+417	val_417	417
+417	val_417	417
+199	val_199	199
+199	val_199	199
+199	val_199	199
+444	val_444	444
+120	val_120	120
+120	val_120	120
+429	val_429	429
+429	val_429	429
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+443	val_443	443
+323	val_323	323
+325	val_325	325
+325	val_325	325
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+478	val_478	478
+478	val_478	478
+178	val_178	178
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+310	val_310	310
+317	val_317	317
+317	val_317	317
+333	val_333	333
+333	val_333	333
+493	val_493	493
+460	val_460	460
+207	val_207	207
+207	val_207	207
+249	val_249	249
+265	val_265	265
+265	val_265	265
+480	val_480	480
+480	val_480	480
+480	val_480	480
+83	val_83	83
+83	val_83	83
+136	val_136	136
+353	val_353	353
+353	val_353	353
+172	val_172	172
+172	val_172	172
+214	val_214	214
+462	val_462	462
+462	val_462	462
+233	val_233	233
+233	val_233	233
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+133	val_133	133
+175	val_175	175
+175	val_175	175
+189	val_189	189
+454	val_454	454
+454	val_454	454
+454	val_454	454
+375	val_375	375
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+421	val_421	421
+407	val_407	407
+384	val_384	384
+384	val_384	384
+384	val_384	384
+256	val_256	256
+256	val_256	256
+26	val_26	26
+26	val_26	26
+134	val_134	134
+134	val_134	134
+67	val_67	67
+67	val_67	67
+384	val_384	384
+384	val_384	384
+384	val_384	384
+379	val_379	379
+18	val_18	18
+18	val_18	18
+462	val_462	462
+462	val_462	462
+492	val_492	492
+492	val_492	492
+100	val_100	100
+100	val_100	100
+298	val_298	298
+298	val_298	298
+298	val_298	298
+9	val_9	9
+341	val_341	341
+498	val_498	498
+498	val_498	498
+498	val_498	498
+146	val_146	146
+146	val_146	146
+458	val_458	458
+458	val_458	458
+362	val_362	362
+186	val_186	186
+285	val_285	285
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+167	val_167	167
+167	val_167	167
+167	val_167	167
+18	val_18	18
+18	val_18	18
+273	val_273	273
+273	val_273	273
+273	val_273	273
+183	val_183	183
+281	val_281	281
+281	val_281	281
+344	val_344	344
+344	val_344	344
+97	val_97	97
+97	val_97	97
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+315	val_315	315
+84	val_84	84
+84	val_84	84
+28	val_28	28
+37	val_37	37
+37	val_37	37
+448	val_448	448
+152	val_152	152
+152	val_152	152
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+307	val_307	307
+307	val_307	307
+194	val_194	194
+414	val_414	414
+414	val_414	414
+477	val_477	477
+222	val_222	222
+126	val_126	126
+90	val_90	90
+90	val_90	90
+90	val_90	90
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+403	val_403	403
+403	val_403	403
+403	val_403	403
+400	val_400	400
+200	val_200	200
+200	val_200	200
+97	val_97	97
+97	val_97	97
diff --git a/sql/hive/src/test/resources/golden/view-22-f83b15c828d4ec599d7827af8b25f578 b/sql/hive/src/test/resources/golden/view-22-f83b15c828d4ec599d7827af8b25f578
new file mode 100644
index 0000000000000..e34118512c1d7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-22-f83b15c828d4ec599d7827af8b25f578
@@ -0,0 +1,500 @@
+238
+86
+311
+27
+165
+409
+255
+278
+98
+484
+265
+193
+401
+150
+273
+224
+369
+66
+128
+213
+146
+406
+429
+374
+152
+469
+145
+495
+37
+327
+281
+277
+209
+15
+82
+403
+166
+417
+430
+252
+292
+219
+287
+153
+193
+338
+446
+459
+394
+237
+482
+174
+413
+494
+207
+199
+466
+208
+174
+399
+396
+247
+417
+489
+162
+377
+397
+309
+365
+266
+439
+342
+367
+325
+167
+195
+475
+17
+113
+155
+203
+339
+0
+455
+128
+311
+316
+57
+302
+205
+149
+438
+345
+129
+170
+20
+489
+157
+378
+221
+92
+111
+47
+72
+4
+280
+35
+427
+277
+208
+356
+399
+169
+382
+498
+125
+386
+437
+469
+192
+286
+187
+176
+54
+459
+51
+138
+103
+239
+213
+216
+430
+278
+176
+289
+221
+65
+318
+332
+311
+275
+137
+241
+83
+333
+180
+284
+12
+230
+181
+67
+260
+404
+384
+489
+353
+373
+272
+138
+217
+84
+348
+466
+58
+8
+411
+230
+208
+348
+24
+463
+431
+179
+172
+42
+129
+158
+119
+496
+0
+322
+197
+468
+393
+454
+100
+298
+199
+191
+418
+96
+26
+165
+327
+230
+205
+120
+131
+51
+404
+43
+436
+156
+469
+468
+308
+95
+196
+288
+481
+457
+98
+282
+197
+187
+318
+318
+409
+470
+137
+369
+316
+169
+413
+85
+77
+0
+490
+87
+364
+179
+118
+134
+395
+282
+138
+238
+419
+15
+118
+72
+90
+307
+19
+435
+10
+277
+273
+306
+224
+309
+389
+327
+242
+369
+392
+272
+331
+401
+242
+452
+177
+226
+5
+497
+402
+396
+317
+395
+58
+35
+336
+95
+11
+168
+34
+229
+233
+143
+472
+322
+498
+160
+195
+42
+321
+430
+119
+489
+458
+78
+76
+41
+223
+492
+149
+449
+218
+228
+138
+453
+30
+209
+64
+468
+76
+74
+342
+69
+230
+33
+368
+103
+296
+113
+216
+367
+344
+167
+274
+219
+239
+485
+116
+223
+256
+263
+70
+487
+480
+401
+288
+191
+5
+244
+438
+128
+467
+432
+202
+316
+229
+469
+463
+280
+2
+35
+283
+331
+235
+80
+44
+193
+321
+335
+104
+466
+366
+175
+403
+483
+53
+105
+257
+406
+409
+190
+406
+401
+114
+258
+90
+203
+262
+348
+424
+12
+396
+201
+217
+164
+431
+454
+478
+298
+125
+431
+164
+424
+187
+382
+5
+70
+397
+480
+291
+24
+351
+255
+104
+70
+163
+438
+119
+414
+200
+491
+237
+439
+360
+248
+479
+305
+417
+199
+444
+120
+429
+169
+443
+323
+325
+277
+230
+478
+178
+468
+310
+317
+333
+493
+460
+207
+249
+265
+480
+83
+136
+353
+172
+214
+462
+233
+406
+133
+175
+189
+454
+375
+401
+421
+407
+384
+256
+26
+134
+67
+384
+379
+18
+462
+492
+100
+298
+9
+341
+498
+146
+458
+362
+186
+285
+348
+167
+18
+273
+183
+281
+344
+97
+469
+315
+84
+28
+37
+448
+152
+348
+307
+194
+414
+477
+222
+126
+90
+169
+403
+400
+200
+97
diff --git a/sql/hive/src/test/resources/golden/view-23-f6a52dd2ff5b11ea3bba2feb867f00c2 b/sql/hive/src/test/resources/golden/view-23-f6a52dd2ff5b11ea3bba2feb867f00c2
new file mode 100644
index 0000000000000..e34118512c1d7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-23-f6a52dd2ff5b11ea3bba2feb867f00c2
@@ -0,0 +1,500 @@
+238
+86
+311
+27
+165
+409
+255
+278
+98
+484
+265
+193
+401
+150
+273
+224
+369
+66
+128
+213
+146
+406
+429
+374
+152
+469
+145
+495
+37
+327
+281
+277
+209
+15
+82
+403
+166
+417
+430
+252
+292
+219
+287
+153
+193
+338
+446
+459
+394
+237
+482
+174
+413
+494
+207
+199
+466
+208
+174
+399
+396
+247
+417
+489
+162
+377
+397
+309
+365
+266
+439
+342
+367
+325
+167
+195
+475
+17
+113
+155
+203
+339
+0
+455
+128
+311
+316
+57
+302
+205
+149
+438
+345
+129
+170
+20
+489
+157
+378
+221
+92
+111
+47
+72
+4
+280
+35
+427
+277
+208
+356
+399
+169
+382
+498
+125
+386
+437
+469
+192
+286
+187
+176
+54
+459
+51
+138
+103
+239
+213
+216
+430
+278
+176
+289
+221
+65
+318
+332
+311
+275
+137
+241
+83
+333
+180
+284
+12
+230
+181
+67
+260
+404
+384
+489
+353
+373
+272
+138
+217
+84
+348
+466
+58
+8
+411
+230
+208
+348
+24
+463
+431
+179
+172
+42
+129
+158
+119
+496
+0
+322
+197
+468
+393
+454
+100
+298
+199
+191
+418
+96
+26
+165
+327
+230
+205
+120
+131
+51
+404
+43
+436
+156
+469
+468
+308
+95
+196
+288
+481
+457
+98
+282
+197
+187
+318
+318
+409
+470
+137
+369
+316
+169
+413
+85
+77
+0
+490
+87
+364
+179
+118
+134
+395
+282
+138
+238
+419
+15
+118
+72
+90
+307
+19
+435
+10
+277
+273
+306
+224
+309
+389
+327
+242
+369
+392
+272
+331
+401
+242
+452
+177
+226
+5
+497
+402
+396
+317
+395
+58
+35
+336
+95
+11
+168
+34
+229
+233
+143
+472
+322
+498
+160
+195
+42
+321
+430
+119
+489
+458
+78
+76
+41
+223
+492
+149
+449
+218
+228
+138
+453
+30
+209
+64
+468
+76
+74
+342
+69
+230
+33
+368
+103
+296
+113
+216
+367
+344
+167
+274
+219
+239
+485
+116
+223
+256
+263
+70
+487
+480
+401
+288
+191
+5
+244
+438
+128
+467
+432
+202
+316
+229
+469
+463
+280
+2
+35
+283
+331
+235
+80
+44
+193
+321
+335
+104
+466
+366
+175
+403
+483
+53
+105
+257
+406
+409
+190
+406
+401
+114
+258
+90
+203
+262
+348
+424
+12
+396
+201
+217
+164
+431
+454
+478
+298
+125
+431
+164
+424
+187
+382
+5
+70
+397
+480
+291
+24
+351
+255
+104
+70
+163
+438
+119
+414
+200
+491
+237
+439
+360
+248
+479
+305
+417
+199
+444
+120
+429
+169
+443
+323
+325
+277
+230
+478
+178
+468
+310
+317
+333
+493
+460
+207
+249
+265
+480
+83
+136
+353
+172
+214
+462
+233
+406
+133
+175
+189
+454
+375
+401
+421
+407
+384
+256
+26
+134
+67
+384
+379
+18
+462
+492
+100
+298
+9
+341
+498
+146
+458
+362
+186
+285
+348
+167
+18
+273
+183
+281
+344
+97
+469
+315
+84
+28
+37
+448
+152
+348
+307
+194
+414
+477
+222
+126
+90
+169
+403
+400
+200
+97
diff --git a/sql/hive/src/test/resources/golden/view-3-89c80c0e90409d5e304775c9f420915a b/sql/hive/src/test/resources/golden/view-3-89c80c0e90409d5e304775c9f420915a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-5-de641eb41a9100e755a9ae641c752b30 b/sql/hive/src/test/resources/golden/view-3-e7dd3b24daa60d8955b22f0441f01a6a
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into6-5-de641eb41a9100e755a9ae641c752b30
rename to sql/hive/src/test/resources/golden/view-3-e7dd3b24daa60d8955b22f0441f01a6a
diff --git a/sql/hive/src/test/resources/golden/insert_into6-6-f6e7141a435922193937aa10085b0656 b/sql/hive/src/test/resources/golden/view-5-7abee38ed087f13f03ac216ef0decf4c
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into6-6-f6e7141a435922193937aa10085b0656
rename to sql/hive/src/test/resources/golden/view-5-7abee38ed087f13f03ac216ef0decf4c
diff --git a/sql/hive/src/test/resources/golden/view-5-f6d1bce095ecbf1aa484891392fdb07b b/sql/hive/src/test/resources/golden/view-5-f6d1bce095ecbf1aa484891392fdb07b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/view_cast-11-b809f5d793b072146ccf577abf286003 b/sql/hive/src/test/resources/golden/view_cast-11-b809f5d793b072146ccf577abf286003
new file mode 100644
index 0000000000000..da7e68de78227
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view_cast-11-b809f5d793b072146ccf577abf286003
@@ -0,0 +1,8 @@
+1111	abc	fun	bar	NULL	2222	NULL	99999.0	99999.0
+1111	abc	fun	bar	NULL	2222	NULL	99999.0	99999.0
+2222	abc	fun	bar	NULL	4444	NULL	99999.0	99999.0
+2222	abc	fun	bar	NULL	4444	NULL	99999.0	99999.0
+3333	abc	fun	bar	NULL	6666	NULL	99999.0	10.0
+3333	abc	fun	bar	NULL	6666	NULL	99999.0	10.0
+3333	abc	fun	bar	NULL	6666	NULL	99999.0	10.0
+4444	abc	fun	bar	NULL	8888	NULL	99999.0	99999.0
diff --git a/sql/hive/src/test/resources/golden/insert_into6-7-6826c610b8d04ab3464712bd9cddbcd3 b/sql/hive/src/test/resources/golden/view_cast-2-635031c0752d5b30c44dfb3dec759a6c
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into6-7-6826c610b8d04ab3464712bd9cddbcd3
rename to sql/hive/src/test/resources/golden/view_cast-2-635031c0752d5b30c44dfb3dec759a6c
diff --git a/sql/hive/src/test/resources/golden/view_cast-2-af2050aa97f0cd930cb1b8ec791007de b/sql/hive/src/test/resources/golden/view_cast-2-af2050aa97f0cd930cb1b8ec791007de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/view_cast-3-2a232c31d056e6abc76f8ebe53ccd97 b/sql/hive/src/test/resources/golden/view_cast-3-2a232c31d056e6abc76f8ebe53ccd97
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-8-33ec9514947e0b737e957bdcbbd87573 b/sql/hive/src/test/resources/golden/view_cast-3-9f675e7edd64a33713e91282dc201867
similarity index 100%
rename from sql/hive/src/test/resources/golden/insert_into6-8-33ec9514947e0b737e957bdcbbd87573
rename to sql/hive/src/test/resources/golden/view_cast-3-9f675e7edd64a33713e91282dc201867
diff --git a/sql/hive/src/test/resources/golden/view_cast-6-aa5be3380ddc7104258567b406d93cc5 b/sql/hive/src/test/resources/golden/view_cast-6-aa5be3380ddc7104258567b406d93cc5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insertexternal1-0-eb0745518b859c8497506a627bfd9860 b/sql/hive/src/test/resources/golden/view_cast-6-b18da53e46b62d6d91efac88ba62f308
similarity index 100%
rename from sql/hive/src/test/resources/golden/insertexternal1-0-eb0745518b859c8497506a627bfd9860
rename to sql/hive/src/test/resources/golden/view_cast-6-b18da53e46b62d6d91efac88ba62f308
diff --git a/sql/hive/src/test/resources/golden/view_inputs-2-626fa3664754125edc44b7ca7f8630db b/sql/hive/src/test/resources/golden/view_inputs-2-626fa3664754125edc44b7ca7f8630db
new file mode 100644
index 0000000000000..1f3d8a7a1fc08
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view_inputs-2-626fa3664754125edc44b7ca7f8630db
@@ -0,0 +1 @@
+1028
diff --git a/sql/hive/src/test/resources/golden/virtual_column-0-9bacd1908e56d621913a74fe9a583d9d b/sql/hive/src/test/resources/golden/virtual_column-0-9bacd1908e56d621913a74fe9a583d9d
deleted file mode 100644
index ea00577174e43..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-0-9bacd1908e56d621913a74fe9a583d9d
+++ /dev/null
@@ -1,500 +0,0 @@
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	238	0
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	86	12
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	311	22
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	27	34
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	165	44
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	409	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	255	68
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	278	80
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	98	92
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	484	102
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	265	114
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	193	126
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	138
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	150	150
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	273	162
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	224	174
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	369	186
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	66	198
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	128	208
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	213	220
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	146	232
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	406	244
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	429	256
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	374	268
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	152	280
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	292
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	145	304
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	495	316
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	37	328
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	327	338
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	281	350
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	277	362
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	209	374
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	15	386
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	82	396
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	403	406
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	166	418
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	417	430
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	430	442
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	252	454
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	292	466
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	219	478
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	287	490
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	153	502
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	193	514
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	338	526
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	446	538
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	459	550
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	394	562
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	237	574
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	482	586
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	174	598
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	413	610
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	494	622
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	207	634
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	199	646
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	466	658
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	208	670
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	174	682
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	399	694
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	396	706
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	247	718
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	417	730
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	489	742
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	162	754
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	377	766
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	397	778
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	309	790
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	365	802
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	266	814
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	439	826
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	342	838
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	367	850
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	325	862
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	167	874
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	195	886
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	475	898
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	17	910
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	113	920
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	155	932
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	203	944
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	339	956
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	0	968
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	455	976
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	128	988
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	311	1000
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	316	1012
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	57	1024
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	302	1034
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	205	1046
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	149	1058
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	438	1070
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	345	1082
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	129	1094
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	170	1106
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	20	1118
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	489	1128
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	157	1140
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	378	1152
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	221	1164
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	92	1176
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	111	1186
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	47	1198
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	72	1208
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	4	1218
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	280	1226
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	35	1238
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	427	1248
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	277	1260
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	208	1272
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	356	1284
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	399	1296
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	169	1308
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	382	1320
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	498	1332
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	125	1344
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	386	1356
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	437	1368
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	1380
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	192	1392
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	286	1404
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	187	1416
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	176	1428
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	54	1440
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	459	1450
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	51	1462
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	138	1472
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	103	1484
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	239	1496
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	213	1508
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	216	1520
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	430	1532
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	278	1544
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	176	1556
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	289	1568
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	221	1580
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	65	1592
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	318	1602
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	332	1614
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	311	1626
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	275	1638
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	137	1650
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	241	1662
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	83	1674
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	333	1684
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	180	1696
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	284	1708
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	12	1720
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	1730
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	181	1742
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	67	1754
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	260	1764
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	404	1776
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	384	1788
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	489	1800
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	353	1812
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	373	1824
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	272	1836
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	138	1848
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	217	1860
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	84	1872
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	1882
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	466	1894
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	58	1906
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	8	1916
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	411	1924
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	1936
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	208	1948
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	1960
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	24	1972
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	463	1982
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	431	1994
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	179	2006
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	172	2018
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	42	2030
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	129	2040
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	158	2052
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	119	2064
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	496	2076
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	0	2088
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	322	2096
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	197	2108
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	468	2120
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	393	2132
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	454	2144
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	100	2156
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	298	2168
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	199	2180
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	191	2192
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	418	2204
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	96	2216
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	26	2226
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	165	2236
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	327	2248
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	2260
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	205	2272
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	120	2284
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	131	2296
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	51	2308
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	404	2318
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	43	2330
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	436	2340
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	156	2352
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	2364
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	468	2376
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	308	2388
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	95	2400
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	196	2410
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	288	2422
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	481	2434
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	457	2446
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	98	2458
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	282	2468
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	197	2480
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	187	2492
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	318	2504
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	318	2516
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	409	2528
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	470	2540
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	137	2552
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	369	2564
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	316	2576
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	169	2588
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	413	2600
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	85	2612
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	77	2622
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	0	2632
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	490	2640
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	87	2652
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	364	2662
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	179	2674
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	118	2686
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	134	2698
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	395	2710
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	282	2722
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	138	2734
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	238	2746
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	419	2758
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	15	2770
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	118	2780
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	72	2792
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	90	2802
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	307	2812
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	19	2824
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	435	2834
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	10	2846
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	277	2856
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	273	2868
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	306	2880
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	224	2892
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	309	2904
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	389	2916
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	327	2928
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	242	2940
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	369	2952
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	392	2964
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	272	2976
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	331	2988
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	3000
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	242	3012
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	452	3024
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	177	3036
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	226	3048
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	5	3060
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	497	3068
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	402	3080
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	396	3092
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	317	3104
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	395	3116
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	58	3128
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	35	3138
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	336	3148
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	95	3160
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	11	3170
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	168	3180
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	34	3192
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	229	3202
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	233	3214
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	143	3226
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	472	3238
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	322	3250
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	498	3262
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	160	3274
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	195	3286
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	42	3298
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	321	3308
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	430	3320
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	119	3332
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	489	3344
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	458	3356
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	78	3368
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	76	3378
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	41	3388
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	223	3398
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	492	3410
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	149	3422
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	449	3434
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	218	3446
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	228	3458
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	138	3470
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	453	3482
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	30	3494
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	209	3504
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	64	3516
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	468	3526
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	76	3538
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	74	3548
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	342	3558
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	69	3570
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	3580
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	33	3592
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	368	3602
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	103	3614
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	296	3626
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	113	3638
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	216	3650
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	367	3662
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	344	3674
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	167	3686
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	274	3698
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	219	3710
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	239	3722
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	485	3734
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	116	3746
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	223	3758
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	256	3770
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	263	3782
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	70	3794
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	487	3804
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	480	3816
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	3828
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	288	3840
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	191	3852
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	5	3864
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	244	3872
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	438	3884
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	128	3896
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	467	3908
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	432	3920
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	202	3932
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	316	3944
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	229	3956
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	3968
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	463	3980
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	280	3992
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	2	4004
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	35	4012
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	283	4022
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	331	4034
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	235	4046
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	80	4058
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	44	4068
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	193	4078
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	321	4090
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	335	4102
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	104	4114
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	466	4126
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	366	4138
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	175	4150
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	403	4162
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	483	4174
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	53	4186
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	105	4196
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	257	4208
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	406	4220
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	409	4232
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	190	4244
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	406	4256
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	4268
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	114	4280
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	258	4292
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	90	4304
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	203	4314
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	262	4326
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	4338
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	424	4350
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	12	4362
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	396	4372
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	201	4384
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	217	4396
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	164	4408
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	431	4420
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	454	4432
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	478	4444
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	298	4456
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	125	4468
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	431	4480
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	164	4492
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	424	4504
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	187	4516
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	382	4528
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	5	4540
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	70	4548
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	397	4558
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	480	4570
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	291	4582
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	24	4594
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	351	4604
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	255	4616
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	104	4628
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	70	4640
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	163	4650
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	438	4662
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	119	4674
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	414	4686
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	200	4698
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	491	4710
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	237	4722
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	439	4734
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	360	4746
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	248	4758
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	479	4770
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	305	4782
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	417	4794
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	199	4806
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	444	4818
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	120	4830
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	429	4842
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	169	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	443	4866
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	323	4878
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	325	4890
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	277	4902
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	4914
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	478	4926
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	178	4938
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	468	4950
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	310	4962
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	317	4974
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	333	4986
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	493	4998
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	460	5010
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	207	5022
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	249	5034
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	265	5046
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	480	5058
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	83	5070
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	136	5080
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	353	5092
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	172	5104
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	214	5116
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	462	5128
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	233	5140
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	406	5152
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	133	5164
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	175	5176
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	189	5188
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	454	5200
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	375	5212
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	5224
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	421	5236
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	407	5248
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	384	5260
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	256	5272
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	26	5284
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	134	5294
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	67	5306
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	384	5316
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	379	5328
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	18	5340
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	462	5350
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	492	5362
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	100	5374
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	298	5386
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	9	5398
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	341	5406
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	498	5418
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	146	5430
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	458	5442
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	362	5454
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	186	5466
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	285	5478
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	5490
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	167	5502
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	18	5514
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	273	5524
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	183	5536
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	281	5548
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	344	5560
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	97	5572
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	5582
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	315	5594
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	84	5606
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	28	5616
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	37	5626
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	448	5636
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	152	5648
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	5660
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	307	5672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	194	5684
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	414	5696
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	477	5708
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	222	5720
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	126	5732
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	90	5744
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	169	5754
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	403	5766
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	400	5778
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	200	5790
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	97	5802
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-1-3ebad682d4ff6ca9c806db3471cf3945 b/sql/hive/src/test/resources/golden/virtual_column-1-3ebad682d4ff6ca9c806db3471cf3945
deleted file mode 100644
index df07a9da29f01..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-1-3ebad682d4ff6ca9c806db3471cf3945
+++ /dev/null
@@ -1,309 +0,0 @@
-0	3
-2	1
-4	1
-5	3
-8	1
-9	1
-10	1
-11	1
-12	2
-15	2
-17	1
-18	2
-19	1
-20	1
-24	2
-26	2
-27	1
-28	1
-30	1
-33	1
-34	1
-35	3
-37	2
-41	1
-42	2
-43	1
-44	1
-47	1
-51	2
-53	1
-54	1
-57	1
-58	2
-64	1
-65	1
-66	1
-67	2
-69	1
-70	3
-72	2
-74	1
-76	2
-77	1
-78	1
-80	1
-82	1
-83	2
-84	2
-85	1
-86	1
-87	1
-90	3
-92	1
-95	2
-96	1
-97	2
-98	2
-100	2
-103	2
-104	2
-105	1
-111	1
-113	2
-114	1
-116	1
-118	2
-119	3
-120	2
-125	2
-126	1
-128	3
-129	2
-131	1
-133	1
-134	2
-136	1
-137	2
-138	4
-143	1
-145	1
-146	2
-149	2
-150	1
-152	2
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	2
-165	2
-166	1
-167	3
-168	1
-169	4
-170	1
-172	2
-174	2
-175	2
-176	2
-177	1
-178	1
-179	2
-180	1
-181	1
-183	1
-186	1
-187	3
-189	1
-190	1
-191	2
-192	1
-193	3
-194	1
-195	2
-196	1
-197	2
-199	3
-200	2
-201	1
-202	1
-203	2
-205	2
-207	2
-208	3
-209	2
-213	2
-214	1
-216	2
-217	2
-218	1
-219	2
-221	2
-222	1
-223	2
-224	2
-226	1
-228	1
-229	2
-230	5
-233	2
-235	1
-237	2
-238	2
-239	2
-241	1
-242	2
-244	1
-247	1
-248	1
-249	1
-252	1
-255	2
-256	2
-257	1
-258	1
-260	1
-262	1
-263	1
-265	2
-266	1
-272	2
-273	3
-274	1
-275	1
-277	4
-278	2
-280	2
-281	2
-282	2
-283	1
-284	1
-285	1
-286	1
-287	1
-288	2
-289	1
-291	1
-292	1
-296	1
-298	3
-302	1
-305	1
-306	1
-307	2
-308	1
-309	2
-310	1
-311	3
-315	1
-316	3
-317	2
-318	3
-321	2
-322	2
-323	1
-325	2
-327	3
-331	2
-332	1
-333	2
-335	1
-336	1
-338	1
-339	1
-341	1
-342	2
-344	2
-345	1
-348	5
-351	1
-353	2
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	2
-368	1
-369	3
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	2
-384	3
-386	1
-389	1
-392	1
-393	1
-394	1
-395	2
-396	3
-397	2
-399	2
-400	1
-401	5
-402	1
-403	3
-404	2
-406	4
-407	1
-409	3
-411	1
-413	2
-414	2
-417	3
-418	1
-419	1
-421	1
-424	2
-427	1
-429	2
-430	3
-431	3
-432	1
-435	1
-436	1
-437	1
-438	3
-439	2
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	3
-455	1
-457	1
-458	2
-459	2
-460	1
-462	2
-463	2
-466	3
-467	1
-468	4
-469	5
-470	1
-472	1
-475	1
-477	1
-478	2
-479	1
-480	3
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	4
-490	1
-491	1
-492	2
-493	1
-494	1
-495	1
-496	1
-497	1
-498	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-10-2915b222a58bc994246591e536d388b4 b/sql/hive/src/test/resources/golden/virtual_column-10-2915b222a58bc994246591e536d388b4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/virtual_column-2-1536b365fe0a94b30a62364996529966 b/sql/hive/src/test/resources/golden/virtual_column-2-1536b365fe0a94b30a62364996529966
deleted file mode 100644
index 2a0cdc268347a..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-2-1536b365fe0a94b30a62364996529966
+++ /dev/null
@@ -1,309 +0,0 @@
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	0	[968,2632,2088]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	2	[4004]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	4	[1218]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	5	[4540,3864,3060]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	8	[1916]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	9	[5398]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	10	[2846]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	11	[3170]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	12	[4362,1720]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	15	[386,2770]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	17	[910]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	18	[5514,5340]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	19	[2824]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	20	[1118]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	24	[1972,4594]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	26	[5284,2226]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	27	[34]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	28	[5616]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	30	[3494]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	33	[3592]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	34	[3192]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	35	[1238,4012,3138]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	37	[328,5626]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	41	[3388]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	42	[3298,2030]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	43	[2330]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	44	[4068]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	47	[1198]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	51	[1462,2308]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	53	[4186]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	54	[1440]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	57	[1024]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	58	[3128,1906]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	64	[3516]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	65	[1592]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	66	[198]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	67	[5306,1754]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	69	[3570]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	70	[3794,4640,4548]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	72	[2792,1208]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	74	[3548]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	76	[3538,3378]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	77	[2622]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	78	[3368]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	80	[4058]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	82	[396]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	83	[5070,1674]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	84	[5606,1872]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	85	[2612]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	86	[12]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	87	[2652]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	90	[2802,5744,4304]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	92	[1176]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	95	[2400,3160]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	96	[2216]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	97	[5572,5802]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	98	[2458,92]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	100	[5374,2156]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	103	[3614,1484]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	104	[4628,4114]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	105	[4196]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	111	[1186]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	113	[920,3638]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	114	[4280]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	116	[3746]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	118	[2780,2686]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	119	[2064,4674,3332]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	120	[2284,4830]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	125	[4468,1344]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	126	[5732]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	128	[3896,988,208]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	129	[2040,1094]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	131	[2296]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	133	[5164]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	134	[2698,5294]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	136	[5080]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	137	[2552,1650]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	138	[2734,1848,1472,3470]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	143	[3226]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	145	[304]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	146	[232,5430]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	149	[3422,1058]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	150	[150]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	152	[280,5648]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	153	[502]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	155	[932]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	156	[2352]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	157	[1140]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	158	[2052]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	160	[3274]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	162	[754]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	163	[4650]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	164	[4492,4408]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	165	[2236,44]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	166	[418]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	167	[5502,874,3686]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	168	[3180]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	169	[5754,1308,2588,4854]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	170	[1106]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	172	[2018,5104]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	174	[682,598]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	175	[5176,4150]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	176	[1428,1556]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	177	[3036]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	178	[4938]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	179	[2674,2006]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	180	[1696]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	181	[1742]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	183	[5536]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	186	[5466]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	187	[2492,1416,4516]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	189	[5188]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	190	[4244]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	191	[3852,2192]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	192	[1392]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	193	[514,126,4078]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	194	[5684]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	195	[3286,886]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	196	[2410]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	197	[2480,2108]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	199	[646,2180,4806]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	200	[4698,5790]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	201	[4384]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	202	[3932]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	203	[4314,944]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	205	[1046,2272]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	207	[5022,634]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	208	[670,1948,1272]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	209	[3504,374]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	213	[220,1508]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	214	[5116]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	216	[1520,3650]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	217	[4396,1860]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	218	[3446]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	219	[478,3710]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	221	[1580,1164]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	222	[5720]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	223	[3398,3758]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	224	[174,2892]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	226	[3048]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	228	[3458]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	229	[3202,3956]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	[1730,1936,4914,2260,3580]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	233	[5140,3214]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	235	[4046]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	237	[4722,574]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	238	[0,2746]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	239	[1496,3722]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	241	[1662]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	242	[2940,3012]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	244	[3872]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	247	[718]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	248	[4758]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	249	[5034]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	252	[454]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	255	[68,4616]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	256	[5272,3770]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	257	[4208]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	258	[4292]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	260	[1764]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	262	[4326]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	263	[3782]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	265	[114,5046]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	266	[814]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	272	[1836,2976]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	273	[2868,5524,162]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	274	[3698]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	275	[1638]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	277	[4902,1260,2856,362]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	278	[1544,80]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	280	[3992,1226]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	281	[350,5548]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	282	[2468,2722]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	283	[4022]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	284	[1708]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	285	[5478]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	286	[1404]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	287	[490]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	288	[2422,3840]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	289	[1568]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	291	[4582]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	292	[466]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	296	[3626]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	298	[5386,4456,2168]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	302	[1034]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	305	[4782]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	306	[2880]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	307	[2812,5672]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	308	[2388]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	309	[790,2904]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	310	[4962]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	311	[1000,1626,22]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	315	[5594]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	316	[3944,2576,1012]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	317	[3104,4974]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	318	[2504,1602,2516]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	321	[4090,3308]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	322	[3250,2096]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	323	[4878]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	325	[4890,862]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	327	[2928,338,2248]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	331	[2988,4034]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	332	[1614]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	333	[1684,4986]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	335	[4102]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	336	[3148]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	338	[526]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	339	[956]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	341	[5406]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	342	[3558,838]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	344	[3674,5560]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	345	[1082]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	[5660,1882,1960,4338,5490]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	351	[4604]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	353	[1812,5092]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	356	[1284]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	360	[4746]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	362	[5454]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	364	[2662]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	365	[802]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	366	[4138]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	367	[850,3662]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	368	[3602]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	369	[186,2564,2952]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	373	[1824]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	374	[268]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	375	[5212]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	377	[766]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	378	[1152]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	379	[5328]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	382	[1320,4528]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	384	[5316,5260,1788]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	386	[1356]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	389	[2916]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	392	[2964]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	393	[2132]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	394	[562]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	395	[3116,2710]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	396	[4372,706,3092]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	397	[4558,778]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	399	[694,1296]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	400	[5778]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	[138,3000,3828,4268,5224]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	402	[3080]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	403	[5766,4162,406]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	404	[1776,2318]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	406	[244,4220,4256,5152]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	407	[5248]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	409	[4232,56,2528]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	411	[1924]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	413	[610,2600]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	414	[5696,4686]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	417	[730,4794,430]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	418	[2204]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	419	[2758]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	421	[5236]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	424	[4350,4504]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	427	[1248]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	429	[4842,256]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	430	[442,1532,3320]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	431	[4420,1994,4480]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	432	[3920]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	435	[2834]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	436	[2340]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	437	[1368]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	438	[3884,4662,1070]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	439	[4734,826]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	443	[4866]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	444	[4818]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	446	[538]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	448	[5636]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	449	[3434]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	452	[3024]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	453	[3482]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	454	[2144,4432,5200]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	455	[976]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	457	[2446]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	458	[3356,5442]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	459	[550,1450]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	460	[5010]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	462	[5350,5128]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	463	[1982,3980]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	466	[658,1894,4126]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	467	[3908]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	468	[3526,4950,2120,2376]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	[292,3968,1380,5582,2364]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	470	[2540]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	472	[3238]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	475	[898]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	477	[5708]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	478	[4444,4926]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	479	[4770]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	480	[4570,5058,3816]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	481	[2434]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	482	[586]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	483	[4174]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	484	[102]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	485	[3734]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	487	[3804]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	489	[1128,1800,3344,742]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	490	[2640]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	491	[4710]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	492	[5362,3410]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	493	[4998]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	494	[622]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	495	[316]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	496	[2076]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	497	[3068]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	498	[5418,3262,1332]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-3-c66776673c986b59b27e704664935988 b/sql/hive/src/test/resources/golden/virtual_column-3-c66776673c986b59b27e704664935988
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/virtual_column-4-e47094c927b1091e31c185db0a4e69a6 b/sql/hive/src/test/resources/golden/virtual_column-4-e47094c927b1091e31c185db0a4e69a6
deleted file mode 100644
index 94754d8bd4c82..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-4-e47094c927b1091e31c185db0a4e69a6
+++ /dev/null
@@ -1 +0,0 @@
-97	val_97
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-5-d137fa7c27bc98d5f1a33f666a07f6b7 b/sql/hive/src/test/resources/golden/virtual_column-5-d137fa7c27bc98d5f1a33f666a07f6b7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/virtual_column-6-68d6973677af5c9f1f1f49360c3175e7 b/sql/hive/src/test/resources/golden/virtual_column-6-68d6973677af5c9f1f1f49360c3175e7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-6-68d6973677af5c9f1f1f49360c3175e7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-7-20d7d672a4289fbd1a5de485a8353ac6 b/sql/hive/src/test/resources/golden/virtual_column-7-20d7d672a4289fbd1a5de485a8353ac6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/virtual_column-8-2b312255c5d1dcf9a617b1ebfd8a00f7 b/sql/hive/src/test/resources/golden/virtual_column-8-2b312255c5d1dcf9a617b1ebfd8a00f7
deleted file mode 100644
index e6902f60ff486..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-8-2b312255c5d1dcf9a617b1ebfd8a00f7
+++ /dev/null
@@ -1,500 +0,0 @@
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	0	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	0	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	0	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	2	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	4	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	5	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	5	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	5	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	8	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	9	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	10	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	11	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	12	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	12	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	15	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	15	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	17	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	18	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	18	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	19	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	20	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	24	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	24	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	26	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	26	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	27	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	28	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	30	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	33	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	34	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	35	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	35	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	35	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	37	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	37	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	41	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	42	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	42	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	43	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	44	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	47	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	51	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	51	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	53	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	54	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	57	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	58	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	58	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	64	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	65	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	66	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	67	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	67	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	69	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	70	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	70	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	70	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	72	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	72	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	74	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	76	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	76	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	77	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	78	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	80	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	82	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	83	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	83	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	84	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	84	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	85	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	86	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	87	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	90	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	90	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	90	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	92	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	95	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	95	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	96	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	97	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	97	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	98	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	98	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	100	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	100	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	103	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	103	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	104	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	104	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	105	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	111	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	113	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	113	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	114	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	116	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	118	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	118	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	119	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	119	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	119	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	120	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	120	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	125	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	125	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	126	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	128	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	128	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	128	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	129	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	129	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	131	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	133	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	134	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	134	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	136	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	137	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	137	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	138	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	138	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	138	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	138	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	143	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	145	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	146	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	146	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	149	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	149	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	150	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	152	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	152	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	153	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	155	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	156	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	157	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	158	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	160	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	162	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	163	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	164	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	164	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	165	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	165	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	166	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	167	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	167	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	167	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	168	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	169	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	169	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	169	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	169	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	170	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	172	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	172	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	174	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	174	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	175	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	175	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	176	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	176	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	177	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	178	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	179	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	179	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	180	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	181	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	183	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	186	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	187	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	187	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	187	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	189	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	190	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	191	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	191	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	192	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	193	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	193	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	193	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	194	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	195	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	195	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	196	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	197	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	197	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	199	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	199	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	199	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	200	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	200	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	201	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	202	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	203	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	203	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	205	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	205	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	207	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	207	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	208	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	208	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	208	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	209	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	209	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	213	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	213	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	214	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	216	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	216	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	217	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	217	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	218	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	219	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	219	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	221	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	221	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	222	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	223	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	223	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	224	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	224	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	226	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	228	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	229	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	229	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	230	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	230	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	230	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	230	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	230	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	233	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	233	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	235	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	237	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	237	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	238	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	238	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	239	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	239	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	241	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	242	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	242	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	244	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	247	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	248	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	249	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	252	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	255	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	255	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	256	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	256	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	257	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	258	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	260	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	262	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	263	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	265	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	265	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	266	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	272	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	272	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	273	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	273	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	273	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	274	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	275	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	277	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	277	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	277	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	277	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	278	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	278	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	280	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	280	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	281	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	281	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	282	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	282	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	283	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	284	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	285	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	286	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	287	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	288	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	288	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	289	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	291	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	292	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	296	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	298	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	298	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	298	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	302	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	305	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	306	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	307	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	307	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	308	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	309	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	309	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	310	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	311	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	311	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	311	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	315	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	316	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	316	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	316	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	317	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	317	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	318	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	318	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	318	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	321	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	321	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	322	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	322	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	323	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	325	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	325	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	327	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	327	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	327	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	331	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	331	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	332	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	333	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	333	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	335	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	336	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	338	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	339	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	341	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	342	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	342	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	344	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	344	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	345	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	348	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	348	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	348	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	348	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	348	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	351	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	353	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	353	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	356	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	360	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	362	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	364	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	365	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	366	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	367	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	367	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	368	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	369	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	369	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	369	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	373	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	374	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	375	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	377	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	378	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	379	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	382	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	382	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	384	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	384	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	384	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	386	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	389	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	392	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	393	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	394	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	395	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	395	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	396	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	396	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	396	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	397	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	397	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	399	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	399	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	400	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	401	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	401	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	401	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	401	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	401	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	402	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	403	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	403	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	403	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	404	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	404	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	406	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	406	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	406	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	406	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	407	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	409	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	409	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	409	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	411	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	413	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	413	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	414	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	414	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	417	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	417	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	417	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	418	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	419	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	421	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	424	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	424	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	427	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	429	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	429	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	430	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	430	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	430	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	431	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	431	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	431	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	432	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	435	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	436	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	437	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	438	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	438	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	438	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	439	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	439	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	443	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	444	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	446	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	448	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	449	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	452	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	453	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	454	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	454	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	454	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	455	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	457	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	458	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	458	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	459	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	459	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	460	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	462	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	462	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	463	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	463	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	466	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	466	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	466	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	467	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	468	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	468	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	468	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	468	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	469	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	469	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	469	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	469	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	469	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	470	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	472	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	475	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	477	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	478	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	478	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	479	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	480	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	480	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	480	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	481	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	482	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	483	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	484	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	485	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	487	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	489	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	489	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	489	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	489	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	490	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	491	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	492	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	492	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	493	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	494	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	495	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	496	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	497	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	498	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	498	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	498	2449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-9-c152da33c1517ecfc336f72b9c133d5 b/sql/hive/src/test/resources/golden/virtual_column-9-c152da33c1517ecfc336f72b9c133d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/log4j.properties b/sql/hive/src/test/resources/log4j.properties
index c07d8fedf1993..5bc08062d30eb 100644
--- a/sql/hive/src/test/resources/log4j.properties
+++ b/sql/hive/src/test/resources/log4j.properties
@@ -30,7 +30,7 @@ log4j.appender.FA=org.apache.log4j.FileAppender
 log4j.appender.FA.append=false
 log4j.appender.FA.file=target/unit-tests.log
 log4j.appender.FA.layout=org.apache.log4j.PatternLayout
-log4j.appender.FA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.FA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Set the logger level of File Appender to WARN
 log4j.appender.FA.Threshold = INFO
@@ -42,6 +42,12 @@ log4j.logger.org.apache.hadoop.hive.serde2.lazy.LazyStruct=OFF
 log4j.additivity.org.apache.hadoop.hive.metastore.RetryingHMSHandler=false
 log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=OFF
 
+log4j.additivity.hive.log=false
+log4j.logger.hive.log=OFF
+
+log4j.additivity.parquet.hadoop.ParquetRecordReader=false
+log4j.logger.parquet.hadoop.ParquetRecordReader=OFF
+
 log4j.additivity.hive.ql.metadata.Hive=false
 log4j.logger.hive.ql.metadata.Hive=OFF
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs.q
new file mode 100644
index 0000000000000..c640ca148b70b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs.q
@@ -0,0 +1,43 @@
+
+select
+   cdouble
+  ,Round(cdouble, 2)
+  ,Floor(cdouble)
+  ,Ceil(cdouble)
+  ,Rand(98007) as rnd
+  ,Exp(ln(cdouble))
+  ,Ln(cdouble)  
+  ,Ln(cfloat)
+  ,Log10(cdouble)
+  -- Use log2 as a representative function to test all input types.
+  ,Log2(cdouble)
+  ,Log2(cfloat)
+  ,Log2(cbigint)
+  ,Log2(cint)
+  ,Log2(csmallint)
+  ,Log2(ctinyint)
+  ,Log(2.0, cdouble)
+  ,Pow(log2(cdouble), 2.0)  
+  ,Power(log2(cdouble), 2.0)
+  ,Sqrt(cdouble)
+  ,Sqrt(cbigint)
+  ,Bin(cbigint)
+  ,Hex(cdouble)
+  ,Conv(cbigint, 10, 16)
+  ,Abs(cdouble)
+  ,Abs(ctinyint)
+  ,Pmod(cint, 3)
+  ,Sin(cdouble)
+  ,Asin(cdouble)
+  ,Cos(cdouble)
+  ,ACos(cdouble)
+  ,Atan(cdouble)
+  ,Degrees(cdouble)
+  ,Radians(cdouble)
+  ,Positive(cdouble)
+  ,Positive(cbigint)
+  ,Negative(cdouble)
+  ,Sign(cdouble)
+  ,Sign(cbigint)
+from alltypesorc order by rnd limit 400;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_00.qv b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_00.qv
new file mode 100644
index 0000000000000..51f231008f6d2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_00.qv
@@ -0,0 +1 @@
+SET hive.vectorized.execution.enabled = false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_01.qv b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_01.qv
new file mode 100644
index 0000000000000..18e02dc854baf
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_01.qv
@@ -0,0 +1 @@
+SET hive.vectorized.execution.enabled = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_concatenate_indexed_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_concatenate_indexed_table.q
index 4881757a4613a..4193315d30043 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_concatenate_indexed_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_concatenate_indexed_table.q
@@ -1,9 +1,9 @@
 set hive.exec.concatenate.check.index=true;
 create table src_rc_concatenate_test(key int, value string) stored as rcfile;
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_concatenate_test;
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_concatenate_test;
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_concatenate_test;
 
 show table extended like `src_rc_concatenate_test`;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_invalidspec.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_invalidspec.q
index 5f9d5ef9ca09e..8cbb25cfa9725 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_invalidspec.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_invalidspec.q
@@ -2,7 +2,7 @@
 create table if not exists alter_part_invalidspec(key string, value string ) partitioned by (year string, month string) stored as textfile ;
 
 -- Load data
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_invalidspec partition (year='1996', month='10');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_invalidspec partition (year='1996', month='12');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_invalidspec partition (year='1996', month='10');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_invalidspec partition (year='1996', month='12');
 
 alter table alter_part_invalidspec partition (year='1997') enable no_drop;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop.q
index 92af30b6b5a15..3c0ff02b1ac1d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop.q
@@ -2,8 +2,8 @@
 create table if not exists alter_part_nodrop_part(key string, value string ) partitioned by (year string, month string) stored as textfile ;
 
 -- Load data
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_nodrop_part partition (year='1996', month='10');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_nodrop_part partition (year='1996', month='12');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_nodrop_part partition (year='1996', month='10');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_nodrop_part partition (year='1996', month='12');
 
 alter table alter_part_nodrop_part partition (year='1996') enable no_drop;
 alter table alter_part_nodrop_part drop partition (year='1996');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop_table.q
index 135411fd320ef..f2135b1aa02e4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop_table.q
@@ -2,8 +2,8 @@
 create table if not exists alter_part_nodrop_table(key string, value string ) partitioned by (year string, month string) stored as textfile ;
 
 -- Load data
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_nodrop_table partition (year='1996', month='10');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_nodrop_table partition (year='1996', month='12');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_nodrop_table partition (year='1996', month='10');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_nodrop_table partition (year='1996', month='12');
 
 alter table alter_part_nodrop_table partition (year='1996') enable no_drop;
 drop table alter_part_nodrop_table;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_offline.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_offline.q
index 899145deaf8c5..7376d8bfe4a74 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_offline.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_offline.q
@@ -2,8 +2,8 @@
 create table if not exists alter_part_offline (key string, value string ) partitioned by (year string, month string) stored as textfile ;
 
 -- Load data
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_offline partition (year='1996', month='10');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_offline partition (year='1996', month='12');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_offline partition (year='1996', month='10');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_offline partition (year='1996', month='12');
 
 alter table alter_part_offline partition (year='1996') disable offline;
 select * from alter_part_offline where year = '1996';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure.q
index 26ba2878903fe..be971f1849869 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure.q
@@ -1,5 +1,5 @@
 create table alter_rename_partition_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter_rename_partition_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter_rename_partition_src ;
 create table alter_rename_partition ( col1 string ) partitioned by (pcol1 string , pcol2 string) stored as sequencefile;
 insert overwrite table alter_rename_partition partition (pCol1='old_part1:', pcol2='old_part2:') select col1 from alter_rename_partition_src ;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure2.q
index 6e51c2f7629c9..4babdda2dbe2a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure2.q
@@ -1,5 +1,5 @@
 create table alter_rename_partition_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter_rename_partition_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter_rename_partition_src ;
 create table alter_rename_partition ( col1 string ) partitioned by (pcol1 string , pcol2 string) stored as sequencefile;
 insert overwrite table alter_rename_partition partition (pCol1='old_part1:', pcol2='old_part2:') select col1 from alter_rename_partition_src ;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure3.q
index 2d4ce0b9f6051..3af807ef61217 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure3.q
@@ -1,5 +1,5 @@
 create table alter_rename_partition_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter_rename_partition_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter_rename_partition_src ;
 create table alter_rename_partition ( col1 string ) partitioned by (pcol1 string , pcol2 string) stored as sequencefile;
 insert overwrite table alter_rename_partition partition (pCol1='old_part1:', pcol2='old_part2:') select col1 from alter_rename_partition_src ;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col1.q
index fdf20f850e8f8..9e8bcbd1bbf78 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col1.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 -- TOK_TABLE_OR_COL
 explain select * from (select `.*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col2.q
index de59bc579a0fe..33d4aed3cd9ad 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col2.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 -- DOT
 explain select * from (select a.`[kv].*`, b.`[kv].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/analyze_non_existent_tbl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/analyze_non_existent_tbl.q
new file mode 100644
index 0000000000000..78a97019f192e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/analyze_non_existent_tbl.q
@@ -0,0 +1 @@
+analyze table nonexistent compute statistics;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/archive_corrupt.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/archive_corrupt.q
index bea25391628a6..130b37b5c9d52 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/archive_corrupt.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/archive_corrupt.q
@@ -14,5 +14,5 @@ create table tstsrcpart like srcpart;
 -- to be thrown during the LOAD step. This former behavior is tested
 -- in clientpositive/archive_corrupt.q
 
-load data local inpath '../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11');
+load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addjar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addjar.q
new file mode 100644
index 0000000000000..a1709dae5f5b8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addjar.q
@@ -0,0 +1,7 @@
+set hive.security.authorization.enabled=true;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory;
+
+-- running a sql query to initialize the authorization - not needed in real HS2 mode
+show tables;
+
+add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addpartition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addpartition.q
new file mode 100644
index 0000000000000..8abdd2b3cde8b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addpartition.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+set user.name=user1;
+-- check add partition without insert privilege
+create table tpart(i int, j int) partitioned by (k string);         
+
+set user.name=user2;
+alter table tpart add partition (k = 'abc');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner.q
new file mode 100644
index 0000000000000..f716262e23bbb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table owner fails 
+-- for now, alter db owner is allowed only for admin
+
+create database dbao;
+alter database dbao set owner user user2;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner_default.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner_default.q
new file mode 100644
index 0000000000000..f9049350180ee
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner_default.q
@@ -0,0 +1,8 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table owner fails
+alter database default set owner user user1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_all_role.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_all_role.q
new file mode 100644
index 0000000000000..de91e91923308
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_all_role.q
@@ -0,0 +1,6 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+create role all;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_default_role.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_default_role.q
new file mode 100644
index 0000000000000..42a42f65b28a9
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_default_role.q
@@ -0,0 +1,6 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+create role default;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_none_role.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_none_role.q
new file mode 100644
index 0000000000000..0d14cde6d5460
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_none_role.q
@@ -0,0 +1,6 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+create role None;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_caseinsensitivity.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_caseinsensitivity.q
new file mode 100644
index 0000000000000..d5ea284f14749
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_caseinsensitivity.q
@@ -0,0 +1,17 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+
+create role testrole;
+show roles;
+drop role TESTROLE;
+show roles;
+create role TESTROLE;
+show roles;
+grant role testROLE to user hive_admin_user;
+set role testrolE;
+set role adMin;
+show roles;
+create role TESTRoLE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func1.q
new file mode 100644
index 0000000000000..02bbe090cfba7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func1.q
@@ -0,0 +1,7 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_test_user;
+
+-- permanent function creation should fail for non-admin roles
+create function perm_fn as 'org.apache.hadoop.hive.ql.udf.UDFAscii';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func2.q
new file mode 100644
index 0000000000000..8760fa8d82259
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func2.q
@@ -0,0 +1,8 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_test_user;
+
+-- temp function creation should fail for non-admin roles
+create temporary function temp_fn as 'org.apache.hadoop.hive.ql.udf.UDFAscii';
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_macro1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_macro1.q
new file mode 100644
index 0000000000000..c904a100c515c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_macro1.q
@@ -0,0 +1,8 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_test_user;
+
+-- temp macro creation should fail for non-admin roles
+create temporary macro mymacro1(x double) x * x;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_role_no_admin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_role_no_admin.q
new file mode 100644
index 0000000000000..a84fe64bd618e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_role_no_admin.q
@@ -0,0 +1,3 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+-- this test will fail because hive_test_user is not in admin role.
+create role r1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_createview.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_createview.q
new file mode 100644
index 0000000000000..9b1f2ea6c6acb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_createview.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check create view without select privileges
+create table t1(i int);
+set user.name=user1;
+create view v1 as select * from t1;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_ctas.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_ctas.q
new file mode 100644
index 0000000000000..1cf74a365d79e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_ctas.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check query without select privilege fails
+create table t1(i int);
+
+set user.name=user1;
+create table t2 as select * from t1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_desc_table_nosel.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_desc_table_nosel.q
new file mode 100644
index 0000000000000..47663c9bb93e3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_desc_table_nosel.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table fails as different user
+create table t1(i int);
+desc t1;
+
+grant all on table t1 to user user2;
+revoke select on table t1 from user user2;
+
+set user.name=user2;
+desc t1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_dfs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_dfs.q
new file mode 100644
index 0000000000000..7d47a7b64967b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_dfs.q
@@ -0,0 +1,7 @@
+set hive.security.authorization.enabled=true;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory;
+
+-- running a sql query to initialize the authorization - not needed in real HS2 mode
+show tables;
+dfs -ls ${system:test.tmp.dir}/
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_disallow_transform.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_disallow_transform.q
new file mode 100644
index 0000000000000..64b300c8d9b2f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_disallow_transform.q
@@ -0,0 +1,3 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set role ALL;
+SELECT TRANSFORM (*) USING 'cat' AS (key, value) FROM src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_cascade.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_cascade.q
new file mode 100644
index 0000000000000..edeae9b71d7ac
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_cascade.q
@@ -0,0 +1,22 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- ensure that drop database cascade works
+create database dba1;
+create table dba1.tab1(i int);
+drop database dba1 cascade;
+
+-- check if drop database fails if the db has a table for which user does not have permission
+create database dba2;
+create table dba2.tab2(i int);
+
+set user.name=hive_admin_user;
+set role ADMIN;
+alter database dba2 set owner user user2;
+
+set user.name=user2;
+show current roles;
+drop database dba2 cascade ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_empty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_empty.q
new file mode 100644
index 0000000000000..46d4d0f92c8e3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_empty.q
@@ -0,0 +1,27 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if changing owner and dropping as other user works
+create database dba1;
+
+set user.name=hive_admin_user;
+set role ADMIN;
+alter database dba1 set owner user user2;
+
+set user.name=user2;
+show current roles;
+drop database dba1;
+
+
+set user.name=user1;
+-- check if dropping db as another user fails
+show current roles;
+create database dba2;
+
+set user.name=user2;
+show current roles;
+
+drop database dba2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_role_no_admin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_role_no_admin.q
new file mode 100644
index 0000000000000..a7aa17f5abfcf
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_role_no_admin.q
@@ -0,0 +1,10 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+show current roles;
+create role r1;
+set role ALL;
+show current roles;
+drop role r1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_droppartition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_droppartition.q
new file mode 100644
index 0000000000000..f05e9458fa804
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_droppartition.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/authz_drop_part_1;
+
+-- check drop partition without delete privilege
+create table tpart(i int, j int) partitioned by (k string);
+alter table tpart add partition (k = 'abc') location 'file:${system:test.tmp.dir}/authz_drop_part_1' ;
+set user.name=user1;
+alter table tpart drop partition (k = 'abc');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_create_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_create_db.q
new file mode 100644
index 0000000000000..d969e39027e99
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_create_db.q
@@ -0,0 +1,5 @@
+set hive.security.authorization.enabled=true;
+
+create database db_to_fail;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_drop_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_drop_db.q
new file mode 100644
index 0000000000000..87719b0043e2e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_drop_db.q
@@ -0,0 +1,5 @@
+set hive.security.authorization.enabled=false;
+create database db_fail_to_drop;
+set hive.security.authorization.enabled=true;
+
+drop database db_fail_to_drop;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_allpriv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_allpriv.q
new file mode 100644
index 0000000000000..f3c86b97ce76f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_allpriv.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE table_priv_allf(i int);
+
+-- grant insert to user2 WITH grant option
+GRANT INSERT ON table_priv_allf TO USER user2 with grant option;
+
+set user.name=user2;
+-- try grant all to user3, without having all privileges
+GRANT ALL ON table_priv_allf TO USER user3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_dup.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_dup.q
new file mode 100644
index 0000000000000..7808cb3ec7b39
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_dup.q
@@ -0,0 +1,16 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE  tauth_gdup(i int);
+
+-- It should be possible to revert owners privileges
+revoke SELECT ON tauth_gdup from user user1;
+
+show grant user user1 on table tauth_gdup;
+
+-- Owner already has all privileges granted, another grant would become duplicate
+-- and result in error
+GRANT INSERT ON tauth_gdup TO USER user1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail1.q
new file mode 100644
index 0000000000000..8dc8e45a79075
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail1.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE  table_priv_gfail1(i int);
+
+set user.name=user2;
+-- try grant insert to user3 as user2
+GRANT INSERT ON table_priv_gfail1 TO USER user3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail_nogrant.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail_nogrant.q
new file mode 100644
index 0000000000000..d51c1c3507eef
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail_nogrant.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE table_priv_gfail1(i int);
+
+-- grant insert to user2 WITHOUT grant option
+GRANT INSERT ON table_priv_gfail1 TO USER user2;
+
+set user.name=user2;
+-- try grant insert to user3
+GRANT INSERT ON table_priv_gfail1 TO USER user3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noinspriv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noinspriv.q
new file mode 100644
index 0000000000000..2fa3cb260b07e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noinspriv.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check insert without select priv
+create table t1(i int);
+
+set user.name=user1;
+create table user2tab(i int);
+insert into table t1 select * from user2tab;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noselectpriv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noselectpriv.q
new file mode 100644
index 0000000000000..b9bee4ea40d40
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noselectpriv.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check insert without select priv
+create table t1(i int);
+
+set user.name=user1;
+create table t2(i int);
+insert into table t2 select * from t1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v1.q
new file mode 100644
index 0000000000000..2a1da23daeb18
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v1.q
@@ -0,0 +1,6 @@
+create table if not exists authorization_invalid_v1 (key int, value string);
+grant delete on table authorization_invalid_v1 to user hive_test_user;
+drop table authorization_invalid_v1;
+
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v2.q
new file mode 100644
index 0000000000000..9c724085d9901
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v2.q
@@ -0,0 +1,5 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table if not exists authorization_invalid_v2 (key int, value string);
+grant index on table authorization_invalid_v2 to user hive_test_user;
+drop table authorization_invalid_v2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_rename.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_rename.q
new file mode 100644
index 0000000000000..8a3300cb2e378
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_rename.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table fails as different user
+create table t1(i int);
+
+set user.name=user2;
+alter table t1 rename to tnew1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_serdeprop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_serdeprop.q
new file mode 100644
index 0000000000000..0172c4c74c82d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_serdeprop.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table fails as different user
+create table t1(i int);
+
+set user.name=user2;
+ALTER TABLE t1 SET SERDEPROPERTIES ('field.delim' = ',');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_tab.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_tab.q
new file mode 100644
index 0000000000000..2d0e52da008d8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_tab.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if create table fails as different user
+create table t1(i int);
+
+set user.name=user2;
+drop table t1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_view.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_view.q
new file mode 100644
index 0000000000000..76bbab42b3750
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_view.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if create table fails as different user
+create table t1(i int);
+create view vt1 as select * from t1;
+
+set user.name=user2;
+drop view vt1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_priv_current_role_neg.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_priv_current_role_neg.q
new file mode 100644
index 0000000000000..bbf3b66970b6a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_priv_current_role_neg.q
@@ -0,0 +1,29 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+
+-- the test verifies that authorization is happening with privileges of the current roles
+
+-- grant privileges with grant option for table to role2 
+create role role2;
+grant role role2 to user user2;
+create table tpriv_current_role(i int);
+grant all on table tpriv_current_role to role role2 with grant option;
+
+set user.name=user2;
+-- switch to user2
+
+-- by default all roles should be in current roles, and grant to new user should work
+show current roles;
+grant all on table tpriv_current_role to user user3;
+
+set role role2;
+-- switch to role2, grant should work
+grant all on table tpriv_current_role to user user4;
+show grant user user4 on table tpriv_current_role;
+
+set role PUBLIC;
+-- set role to public, should fail as role2 is not one of the current roles
+grant all on table tpriv_current_role to user user5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_create.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_create.q
new file mode 100644
index 0000000000000..002389f203e25
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_create.q
@@ -0,0 +1 @@
+create role PUBLIC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_drop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_drop.q
new file mode 100644
index 0000000000000..69c5a8de8b05f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_drop.q
@@ -0,0 +1 @@
+drop role PUBLIC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail1.q
new file mode 100644
index 0000000000000..e19bf370fa077
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail1.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE table_priv_rfail1(i int);
+
+-- grant insert to user2
+GRANT INSERT ON table_priv_rfail1 TO USER user2;
+
+set user.name=user3;
+-- try dropping the privilege as user3
+REVOKE INSERT ON TABLE table_priv_rfail1 FROM USER user2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail2.q
new file mode 100644
index 0000000000000..4b0cf3286ae71
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail2.q
@@ -0,0 +1,18 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE table_priv_rfai2(i int);
+
+-- grant insert to user2
+GRANT INSERT ON table_priv_rfai2 TO USER user2;
+GRANT SELECT ON table_priv_rfai2 TO USER user3 WITH GRANT OPTION;
+
+set user.name=user3;
+-- grant select as user3 to user 2
+GRANT SELECT ON table_priv_rfai2 TO USER user2;
+
+-- try dropping the privilege as user3
+REVOKE INSERT ON TABLE table_priv_rfai2 FROM USER user2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles1.q
new file mode 100644
index 0000000000000..a819d204f56b6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles1.q
@@ -0,0 +1,12 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+-- this is applicable to any security mode as check is in metastore
+create role role1;
+create role role2;
+grant role role1 to role role2;
+
+-- this will create a cycle
+grant role role2 to role role1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles2.q
new file mode 100644
index 0000000000000..423f030630b6c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles2.q
@@ -0,0 +1,24 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=hive_admin_user;
+set role ADMIN;
+-- this is applicable to any security mode as check is in metastore
+
+create role role1;
+
+create role role2;
+grant role role2 to role role1;
+
+create role role3;
+grant role role3 to role role2;
+
+create role role4;
+grant role role4 to role role3;
+
+create role role5;
+grant role role5 to role role4;
+
+-- this will create a cycle in middle of the hierarchy
+grant role role2 to role role4;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_grant.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_grant.q
new file mode 100644
index 0000000000000..c5c500a71251f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_grant.q
@@ -0,0 +1,22 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+
+set role ADMIN;
+
+----------------------------------------
+-- role granting with admin option
+-- since user2 doesn't have admin option for role_noadmin, last grant should fail
+----------------------------------------
+
+create role role_noadmin;
+create role src_role_wadmin;
+grant  src_role_wadmin to user user2 with admin option;
+grant  role_noadmin to user user2;
+show role grant user user2;
+
+
+set user.name=user2;
+set role role_noadmin;
+grant  src_role_wadmin to user user3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_rolehierarchy_privs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_rolehierarchy_privs.q
new file mode 100644
index 0000000000000..d9f4c7cdb850b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_rolehierarchy_privs.q
@@ -0,0 +1,74 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+set user.name=hive_admin_user;
+show current roles;
+set role ADMIN;
+
+----------
+-- create the following user, role mapping
+-- user1 -> role1 -> role2 -> role3
+----------
+
+create role role1;
+grant role1 to user user1;
+
+create role role2;
+grant role2 to role role1;
+
+create role role3;
+grant role3 to role role2;
+
+
+create table t1(i int);
+grant select on t1 to role role3;
+
+set user.name=user1;
+show current roles;
+select * from t1;
+
+set user.name=hive_admin_user;
+show current roles;
+grant select on t1 to role role2;
+
+
+set user.name=user1;
+show current roles;
+select * from t1;
+
+set user.name=hive_admin_user;
+set role ADMIN;
+show current roles;
+revoke select on table t1 from role role2;
+
+
+create role role4;
+grant role4 to user user1;
+grant role3 to role role4;;
+
+set user.name=user1;
+show current roles;
+select * from t1;
+
+set user.name=hive_admin_user;
+show current roles;
+set role ADMIN;
+
+-- Revoke role3 from hierarchy one at a time and check permissions
+-- after revoking from both, select should fail
+revoke role3 from role role2;
+
+set user.name=user1;
+show current roles;
+select * from t1;
+
+set user.name=hive_admin_user;
+show current roles;
+set role ADMIN;
+revoke role3 from role role4;
+
+set user.name=user1;
+show current roles;
+select * from t1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select.q
new file mode 100644
index 0000000000000..39871793af398
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select.q
@@ -0,0 +1,9 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check query without select privilege fails
+create table t1(i int);
+
+set user.name=user1;
+select * from t1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select_view.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select_view.q
new file mode 100644
index 0000000000000..a4071cd0d4d87
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select_view.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check create view without select privileges
+create table t1(i int);
+create view v1 as select * from t1;
+set user.name=user1;
+select * from v1;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg1.q
new file mode 100644
index 0000000000000..9ba3a82a5608e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg1.q
@@ -0,0 +1,6 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+-- an error should be thrown if 'set role ' is done for role that does not exist
+
+set role nosuchroleexists;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg2.q
new file mode 100644
index 0000000000000..03f748fcc9b7a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg2.q
@@ -0,0 +1,16 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+
+-- an error should be thrown if 'set role ' is done for role that does not exist
+
+create role rset_role_neg;
+grant role rset_role_neg to user user2;
+
+set user.name=user2;
+set role rset_role_neg;
+set role public;
+set role nosuchroleexists;;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_parts_nosel.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_parts_nosel.q
new file mode 100644
index 0000000000000..d8190de950de7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_parts_nosel.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table fails as different user
+create table t_show_parts(i int) partitioned by (j string);
+
+set user.name=user2;
+show partitions t_show_parts;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_no_admin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_no_admin.q
new file mode 100644
index 0000000000000..2afe87fc30c9e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_no_admin.q
@@ -0,0 +1,3 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+-- This test will fail because hive_test_user is not in admin role
+show principals role1; 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_v1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_v1.q
new file mode 100644
index 0000000000000..69cea2f2673f0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_v1.q
@@ -0,0 +1,2 @@
+-- This test will fail because the command is not currently supported in auth mode v1
+show principals role1; 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_roles_no_admin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_roles_no_admin.q
new file mode 100644
index 0000000000000..0fc9fca940c39
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_roles_no_admin.q
@@ -0,0 +1,3 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+-- This test will fail because hive_test_user is not in admin role
+show roles; 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_truncate.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_truncate.q
new file mode 100644
index 0000000000000..285600b23a149
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_truncate.q
@@ -0,0 +1,9 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check add partition without insert privilege
+create table t1(i int, j int);
+set user.name=user1;
+truncate table t1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_add_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_add_partition.q
new file mode 100644
index 0000000000000..d82ac710cc3ba
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_add_partition.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/a_uri_add_part;
+dfs -touchz ${system:test.tmp.dir}/a_uri_add_part/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/a_uri_add_part/1.txt;
+
+create table tpart(i int, j int) partitioned by (k string);
+alter table tpart add partition (k = 'abc') location '${system:test.tmp.dir}/a_uri_add_part/';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_alterpart_loc.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_alterpart_loc.q
new file mode 100644
index 0000000000000..d38ba74d9006a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_alterpart_loc.q
@@ -0,0 +1,16 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_alterpart_loc_perm;
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_alterpart_loc;
+dfs -touchz ${system:test.tmp.dir}/az_uri_alterpart_loc/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/az_uri_alterpart_loc/1.txt;
+
+create table tpart(i int, j int) partitioned by (k string);
+alter table tpart add partition (k = 'abc') location '${system:test.tmp.dir}/az_uri_alterpart_loc_perm/';
+
+alter table tpart partition (k = 'abc') set location '${system:test.tmp.dir}/az_uri_alterpart_loc/';
+
+
+-- Attempt to set partition to location without permissions should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_altertab_setloc.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_altertab_setloc.q
new file mode 100644
index 0000000000000..c446b8636fb32
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_altertab_setloc.q
@@ -0,0 +1,13 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_altertab_setloc;
+dfs -touchz ${system:test.tmp.dir}/az_uri_altertab_setloc/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/az_uri_altertab_setloc/1.txt;
+
+create table t1(i int);
+
+alter table t1 set location '${system:test.tmp.dir}/az_uri_altertab_setloc/1.txt'
+
+-- Attempt to set location of table to a location without permissions should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table1.q
new file mode 100644
index 0000000000000..c8e1fb43ee317
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table1.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/a_uri_crtab1;
+dfs -touchz ${system:test.tmp.dir}/a_uri_crtab1/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/a_uri_crtab1/1.txt;
+
+create table t1(i int) location '${system:test.tmp.dir}/a_uri_crtab_ext';
+
+-- Attempt to create table with dir that does not have write permission should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table_ext.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table_ext.q
new file mode 100644
index 0000000000000..c8549b4563b2f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table_ext.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/a_uri_crtab_ext;
+dfs -touchz ${system:test.tmp.dir}/a_uri_crtab_ext/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/a_uri_crtab_ext/1.txt;
+
+create external table t1(i int) location '${system:test.tmp.dir}/a_uri_crtab_ext';
+
+-- Attempt to create table with dir that does not have write permission should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_createdb.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_createdb.q
new file mode 100644
index 0000000000000..edfdf5a8fc407
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_createdb.q
@@ -0,0 +1,12 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_createdb;
+dfs -touchz ${system:test.tmp.dir}/az_uri_createdb/1.txt;
+dfs -chmod 300 ${system:test.tmp.dir}/az_uri_createdb/1.txt;
+
+create database az_test_db location '${system:test.tmp.dir}/az_uri_createdb/';
+
+-- Attempt to create db for dir without sufficient permissions should fail
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_export.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_export.q
new file mode 100644
index 0000000000000..81763916a0b81
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_export.q
@@ -0,0 +1,22 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+set hive.test.mode=true;
+set hive.test.mode.prefix=;
+set hive.test.mode.nosamplelist=export_auth_uri;
+
+
+create table export_auth_uri ( dep_id int comment "department id")
+	stored as textfile;
+
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/export_auth_uri/temp;
+dfs -rmr target/tmp/ql/test/data/exports/export_auth_uri;
+
+
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/export_auth_uri/;
+dfs -chmod 555 target/tmp/ql/test/data/exports/export_auth_uri;
+
+export table export_auth_uri to 'ql/test/data/exports/export_auth_uri';
+
+-- Attempt to export to location without sufficient permissions should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_import.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_import.q
new file mode 100644
index 0000000000000..4ea4dc0a4747a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_import.q
@@ -0,0 +1,25 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+set hive.test.mode=true;
+set hive.test.mode.prefix=;
+set hive.test.mode.nosamplelist=import_auth_uri;
+
+
+create table import_auth_uri ( dep_id int comment "department id")
+	stored as textfile;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/import_auth_uri/temp;
+dfs -rmr target/tmp/ql/test/data/exports/import_auth_uri;
+export table import_auth_uri to 'ql/test/data/exports/import_auth_uri';
+drop table import_auth_uri;
+
+dfs -touchz target/tmp/ql/test/data/exports/import_auth_uri/1.txt;
+dfs -chmod 555 target/tmp/ql/test/data/exports/import_auth_uri/1.txt;
+
+create database importer;
+use importer;
+
+import from 'ql/test/data/exports/import_auth_uri';
+
+-- Attempt to import from location without sufficient permissions should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_index.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_index.q
new file mode 100644
index 0000000000000..1a8f9cb2ad197
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_index.q
@@ -0,0 +1,13 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_index;
+dfs -touchz ${system:test.tmp.dir}/az_uri_index/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/az_uri_index/1.txt;
+
+
+create table t1(i int);
+create index idt1 on table t1 (i) as 'COMPACT' WITH DEFERRED REBUILD LOCATION '${system:test.tmp.dir}/az_uri_index/';
+
+-- Attempt to use location for index that does not have permissions should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert.q
new file mode 100644
index 0000000000000..81b6e522c1abb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_insert;
+dfs -touchz ${system:test.tmp.dir}/az_uri_insert/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/az_uri_insert/1.txt;
+
+create table t1(i int, j int);
+
+insert overwrite directory '${system:test.tmp.dir}/az_uri_insert/' select * from t1;
+
+-- Attempt to insert into uri without permissions should fail
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert_local.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert_local.q
new file mode 100644
index 0000000000000..0a2fd8919f455
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert_local.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_insert_local;
+dfs -touchz ${system:test.tmp.dir}/az_uri_insert_local/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/az_uri_insert_local/1.txt;
+
+create table t1(i int, j int);
+
+insert overwrite local directory '${system:test.tmp.dir}/az_uri_insert_local/' select * from t1;
+
+-- Attempt to insert into uri without permissions should fail
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_load_data.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_load_data.q
new file mode 100644
index 0000000000000..6af41f0cdaa23
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_load_data.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/authz_uri_load_data;
+dfs -touchz ${system:test.tmp.dir}/authz_uri_load_data/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/authz_uri_load_data/1.txt;
+
+create table t1(i int);
+load data inpath 'pfile:${system:test.tmp.dir}/authz_uri_load_data/' overwrite into table t1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_create_tbl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_create_tbl.q
new file mode 100644
index 0000000000000..d8beac370d4b6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_create_tbl.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set hive.security.authorization.enabled=true;
+set user.name=user33;
+create database db23221;
+use db23221;
+
+set user.name=user44;
+create table twew221(a string);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_grant_public.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_grant_public.q
new file mode 100644
index 0000000000000..bfd3165237774
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_grant_public.q
@@ -0,0 +1 @@
+grant role PUBLIC to user hive_test_user;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_revoke_public.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_revoke_public.q
new file mode 100644
index 0000000000000..2b29822371b19
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_revoke_public.q
@@ -0,0 +1 @@
+revoke role PUBLIC from user hive_test_user;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q
index 048a8fd5cfb8e..6bebb8942d613 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q
@@ -1,19 +1,19 @@
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) 
   partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS
   STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' 
+load data local inpath '../../data/files/srcbucket20.txt' 
   INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' 
+load data local inpath '../../data/files/srcbucket21.txt' 
   INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' 
+load data local inpath '../../data/files/srcbucket22.txt' 
   INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string)
   partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS
   STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt'
+load data local inpath '../../data/files/srcbucket22.txt'
   INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt'
+load data local inpath '../../data/files/srcbucket23.txt'
   INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 -- The number of buckets in the 2 tables above (being joined later) dont match.
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q
index 9478a2f1b989c..802fcd903c0ac 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q
@@ -8,10 +8,10 @@ into 2 BUCKETS stored as textfile;
 create table table2(key string, value string) clustered by (value, key)
 into 2 BUCKETS stored as textfile;
 
-load data local inpath '../data/files/T1.txt' overwrite into table table1;
+load data local inpath '../../data/files/T1.txt' overwrite into table table1;
 
-load data local inpath '../data/files/T1.txt' overwrite into table table2;
-load data local inpath '../data/files/T2.txt' overwrite into table table2;
+load data local inpath '../../data/files/T1.txt' overwrite into table table2;
+load data local inpath '../../data/files/T2.txt' overwrite into table table2;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q
index 69afe0ae700ed..ac5abebb0b4b6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q
@@ -8,13 +8,13 @@ into 2 BUCKETS stored as textfile;
 create table table2(key string, value string) clustered by (value, key)
 into 2 BUCKETS stored as textfile;
 
-load data local inpath '../data/files/T1.txt' overwrite into table table1 partition (ds='1');
-load data local inpath '../data/files/T2.txt' overwrite into table table1 partition (ds='1');
+load data local inpath '../../data/files/T1.txt' overwrite into table table1 partition (ds='1');
+load data local inpath '../../data/files/T2.txt' overwrite into table table1 partition (ds='1');
 
-load data local inpath '../data/files/T1.txt' overwrite into table table1 partition (ds='2');
+load data local inpath '../../data/files/T1.txt' overwrite into table table1 partition (ds='2');
 
-load data local inpath '../data/files/T1.txt' overwrite into table table2;
-load data local inpath '../data/files/T2.txt' overwrite into table table2;
+load data local inpath '../../data/files/T1.txt' overwrite into table table2;
+load data local inpath '../../data/files/T2.txt' overwrite into table table2;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/clustern1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/clustern1.q
deleted file mode 100644
index 0ff44779657ee..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/clustern1.q
+++ /dev/null
@@ -1,2 +0,0 @@
-EXPLAIN
-SELECT x.key, x.value as key FROM SRC x CLUSTER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q
index af923504c8c4f..b4887c4115854 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q
@@ -3,12 +3,12 @@ DROP TABLE Employee_Part;
 CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
 
 -- dynamic partitioning syntax
 explain 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q
index d9725ddc7fdf6..2f8e9271ddd34 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q
@@ -3,12 +3,12 @@ DROP TABLE Employee_Part;
 CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
 
 -- don't specify all partitioning keys
 explain 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q
index eb73962ba133c..34f91fc8d1de8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q
@@ -3,12 +3,12 @@ DROP TABLE Employee_Part;
 CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
 
 -- specify invalid values for the partitioning keys
 explain
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_multiple_part_clause.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_multiple_part_clause.q
index dbfaaecbdf614..49d89dd12132b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_multiple_part_clause.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_multiple_part_clause.q
@@ -3,12 +3,12 @@ DROP TABLE Employee_Part;
 CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
 
 -- specify partitioning clause multiple times
 explain
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl.q
index ca8548958fcd6..a4e0056bff370 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl.q
@@ -13,7 +13,7 @@ CREATE TABLE UserVisits_web_text_none (
   avgTimeOnSite int)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
+LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
 
 explain 
 analyze table UserVisits_web_text_none compute statistics for columns destIP;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_complex_type.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_complex_type.q
index 5bbd70d86b297..85a5f0a021940 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_complex_type.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_complex_type.q
@@ -8,7 +8,7 @@ CREATE TABLE table_complex_type (
        d MAP<STRING,ARRAY<STRING>>
        ) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/create_nested_type.txt' OVERWRITE INTO TABLE table_complex_type;
+LOAD DATA LOCAL INPATH '../../data/files/create_nested_type.txt' OVERWRITE INTO TABLE table_complex_type;
 
 
 explain 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_incorrect_column.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_incorrect_column.q
index ca8548958fcd6..a4e0056bff370 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_incorrect_column.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_incorrect_column.q
@@ -13,7 +13,7 @@ CREATE TABLE UserVisits_web_text_none (
   avgTimeOnSite int)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
+LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
 
 explain 
 analyze table UserVisits_web_text_none compute statistics for columns destIP;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compile_processor.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compile_processor.q
new file mode 100644
index 0000000000000..c314a940f95c2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compile_processor.q
@@ -0,0 +1,8 @@
+
+compile `import org.apache.hadoop.hive.ql.exec.UDF \;
+public class Pyth extsfgsfgfsends UDF {
+  public double evaluate(double a, double b){
+    return Math.sqrt((a*a) + (b*b)) \;
+  }
+} ` AS GROOVY NAMED Pyth.groovy;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compute_stats_long.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compute_stats_long.q
new file mode 100644
index 0000000000000..5974811280350
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compute_stats_long.q
@@ -0,0 +1,7 @@
+create table tab_int(a int);
+
+-- insert some data
+LOAD DATA LOCAL INPATH "../../data/files/int.txt" INTO TABLE tab_int;
+
+-- compute stats should raise an error since the number of bit vectors > 1024
+select compute_stats(a, 10000) from tab_int;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_class.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_class.q
new file mode 100644
index 0000000000000..3b71e00b2eaa2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_class.q
@@ -0,0 +1 @@
+create function default.badfunc as 'my.nonexistent.class';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_db.q
new file mode 100644
index 0000000000000..ae95391edd3e5
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_db.q
@@ -0,0 +1 @@
+create function nonexistentdb.badfunc as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonudf_class.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonudf_class.q
new file mode 100644
index 0000000000000..2083064593299
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonudf_class.q
@@ -0,0 +1 @@
+create function default.badfunc as 'java.lang.String';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_recursion.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_recursion.q
new file mode 100644
index 0000000000000..2160b4719662b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_recursion.q
@@ -0,0 +1,4 @@
+explain
+with q1 as ( select key from q2 where key = '5'),
+q2 as ( select key from q1 where key = '5')
+select * from (select key from q1) a;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_with_in_subquery.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_with_in_subquery.q
new file mode 100644
index 0000000000000..e52a1d97db801
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_with_in_subquery.q
@@ -0,0 +1 @@
+select * from (with q1 as ( select key from q2 where key = '5') select * from q1) a;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/date_literal1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/date_literal1.q
deleted file mode 100644
index b7fac0d3df346..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/date_literal1.q
+++ /dev/null
@@ -1,2 +0,0 @@
--- Not in YYYY-MM-DD format
-SELECT DATE '2001-1-1' FROM src LIMIT 2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodblock.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodblock.q
new file mode 100644
index 0000000000000..1c658c79b99ea
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodblock.q
@@ -0,0 +1,6 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+drop database if exists drop_nodblock;
+create database drop_nodblock;
+lock database drop_nodblock shared;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodbunlock.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodbunlock.q
new file mode 100644
index 0000000000000..ef4b323f063b6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodbunlock.q
@@ -0,0 +1,6 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+drop database if exists drop_nodbunlock;
+create database drop_nodbunlock;
+unlock database drop_nodbunlock;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notablelock.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notablelock.q
new file mode 100644
index 0000000000000..4a0c6c25c67c7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notablelock.q
@@ -0,0 +1,6 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+drop table if exists drop_notablelock;
+create table drop_notablelock (c int);
+lock table drop_notablelock shared;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notableunlock.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notableunlock.q
new file mode 100644
index 0000000000000..0b00046579f43
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notableunlock.q
@@ -0,0 +1,6 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+drop table if exists drop_notableunlock;
+create table drop_notableunlock (c int);
+unlock table drop_notableunlock;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/deletejar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/deletejar.q
index 7b0c92311a11a..0bd6985e031b6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/deletejar.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/deletejar.q
@@ -1,4 +1,4 @@
 
-ADD JAR ../data/files/TestSerDe.jar;
-DELETE JAR ../data/files/TestSerDe.jar;
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
+DELETE JAR ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
 CREATE TABLE DELETEJAR(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' STORED AS TEXTFILE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_func_nonexistent.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_func_nonexistent.q
new file mode 100644
index 0000000000000..892ef00e3f86c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_func_nonexistent.q
@@ -0,0 +1,3 @@
+set hive.exec.drop.ignorenonexistent=false;
+-- Can't use DROP FUNCTION if the function doesn't exist and IF EXISTS isn't specified
+drop function nonexistent_function;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_partition_filter_failure2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_partition_filter_failure2.q
deleted file mode 100644
index 4d238d73a9116..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_partition_filter_failure2.q
+++ /dev/null
@@ -1,11 +0,0 @@
-create table ptestfilter (a string, b int) partitioned by (c string, d int);
-describe ptestfilter;
-
-alter table ptestfilter add partition (c='US', d=1);
-alter table ptestfilter add partition (c='US', d=2);
-show partitions ptestfilter;
-
-alter table ptestfilter drop partition (c='US', d<'2');
-
-
-
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dynamic_partitions_with_whitelist.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dynamic_partitions_with_whitelist.q
index 0be2e71c94dff..0ad99d100dc07 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dynamic_partitions_with_whitelist.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dynamic_partitions_with_whitelist.q
@@ -8,7 +8,7 @@ create table source_table like srcpart;
 
 create table dest_table like srcpart;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE source_table partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE source_table partition(ds='2008-04-08', hr=11);
 
 -- Tests creating dynamic partitions with characters not in the whitelist (i.e. 9)
 -- If the directory is not empty the hook will throw an error, instead the error should come from the metastore
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_incomplete_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_incomplete_partition.q
index e4f0daca92bd1..ca60d047efdd5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_incomplete_partition.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_incomplete_partition.q
@@ -3,8 +3,8 @@ CREATE TABLE exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING, hr STRIN
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05', hr='h1');
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05', hr='h2');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='h1');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='h2');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists.q
index 4d1e0a62a431f..7083edc32b98d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists.q
@@ -8,5 +8,5 @@ ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
--- exchange_part_test2 table partition (ds='2013-04-05') already exists thus this query will fail
+-- exchange_part_test1 table partition (ds='2013-04-05') already exists thus this query will fail
 alter table exchange_part_test1 exchange partition (ds='2013-04-05') with table exchange_part_test2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists2.q
index 23777db3ea93d..6dfe81a8b0568 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists2.q
@@ -9,5 +9,5 @@ ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='3');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
--- exchange_part_test2 table partition (ds='2013-04-05', hr='3') already exists thus this query will fail
+-- exchange_part_test1 table partition (ds='2013-04-05') already exists thus this query will fail
 alter table exchange_part_test1 exchange partition (ds='2013-04-05') with table exchange_part_test2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists3.q
index 350bf248acc9c..60671e52e05d5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists3.q
@@ -9,5 +9,5 @@ ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='1');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
--- exchange_part_test2 table partition (ds='2013-04-05', hr='1') already exists thus this query will fail
+-- exchange_part_test2 table partition (ds='2013-04-05') already exists thus this query will fail
 alter table exchange_part_test1 exchange partition (ds='2013-04-05') with table exchange_part_test2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_missing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_missing.q
index 81944b3330853..38c0eda2368bd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_missing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_missing.q
@@ -2,5 +2,5 @@ CREATE TABLE exchange_part_test1 (f1 string) PARTITIONED BY (ds STRING);
 CREATE TABLE exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING);
 SHOW PARTITIONS exchange_part_test1;
 
--- exchange_part_test1 partition (ds='2013-04-05') does not exist thus this query will fail
+-- exchange_part_test2 partition (ds='2013-04-05') does not exist thus this query will fail
 alter table exchange_part_test1 exchange partition (ds='2013-04-05') with table exchange_part_test2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_00_unsupported_schema.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_00_unsupported_schema.q
index d86ecd5785d02..6ffc33acb92ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_00_unsupported_schema.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_00_unsupported_schema.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id")
 	stored as textfile
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'nosuchschema://nosuchauthority/ql/test/data/exports/exim_department';
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_01_nonpart_over_loaded.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_01_nonpart_over_loaded.q
index 5f3223152f766..970e6463e24a5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_01_nonpart_over_loaded.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_01_nonpart_over_loaded.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id")
 	stored as textfile
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -16,9 +16,9 @@ use importer;
 create table exim_department ( dep_id int comment "department identifier")
 	stored as textfile
 	tblproperties("maker"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
+load data local inpath "../../data/files/test.dat" into table exim_department;	
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_02_all_part_over_overlap.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_02_all_part_over_overlap.q
index d7204dc478d25..358918363d830 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_02_all_part_over_overlap.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_02_all_part_over_overlap.q
@@ -6,16 +6,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -27,12 +27,12 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "iso code", emp_state string comment "free-form text")
 	stored as textfile	
 	tblproperties("maker"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");			
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_03_nonpart_noncompat_colschema.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_03_nonpart_noncompat_colschema.q
index 6cd7eda455ee6..45268c21c00e3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_03_nonpart_noncompat_colschema.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_03_nonpart_noncompat_colschema.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,6 +18,6 @@ create table exim_department ( dep_key int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_04_nonpart_noncompat_colnumber.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_04_nonpart_noncompat_colnumber.q
index 7f3f577c433b3..cad6c90fd316e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_04_nonpart_noncompat_colnumber.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_04_nonpart_noncompat_colnumber.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,6 +18,6 @@ create table exim_department ( dep_id int comment "department id", dep_name stri
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_05_nonpart_noncompat_coltype.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_05_nonpart_noncompat_coltype.q
index d3ec9fff82bf9..f5f904f42af5a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_05_nonpart_noncompat_coltype.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_05_nonpart_noncompat_coltype.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,6 +18,6 @@ create table exim_department ( dep_id bigint comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_06_nonpart_noncompat_storage.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_06_nonpart_noncompat_storage.q
index 1cc691fc2912d..c56329c03f89f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_06_nonpart_noncompat_storage.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_06_nonpart_noncompat_storage.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,6 +18,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_07_nonpart_noncompat_ifof.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_07_nonpart_noncompat_ifof.q
index 27830ad5f93dd..afaedcd37bf72 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_07_nonpart_noncompat_ifof.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_07_nonpart_noncompat_ifof.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -21,6 +21,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_08_nonpart_noncompat_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_08_nonpart_noncompat_serde.q
index d85048a97a876..230b28c402cc5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_08_nonpart_noncompat_serde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_08_nonpart_noncompat_serde.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -19,6 +19,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_09_nonpart_noncompat_serdeparam.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_09_nonpart_noncompat_serdeparam.q
index 84b3786a161c6..c2e00a9663468 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_09_nonpart_noncompat_serdeparam.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_09_nonpart_noncompat_serdeparam.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -23,6 +23,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_10_nonpart_noncompat_bucketing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_10_nonpart_noncompat_bucketing.q
index eaf9c579d51d4..a6586ead0c23f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_10_nonpart_noncompat_bucketing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_10_nonpart_noncompat_bucketing.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -19,6 +19,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q
index 092fd779541c3..990a686ebeea6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q
@@ -5,9 +5,9 @@ create table exim_department ( dep_id int comment "department id")
 	clustered by (dep_id) sorted by (dep_id desc) into 10 buckets
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -20,6 +20,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_13_nonnative_import.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_13_nonnative_import.q
index 05de3d77b07b8..02537ef022d82 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_13_nonnative_import.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_13_nonnative_import.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,7 +18,7 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
 	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_14_nonpart_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_14_nonpart_part.q
index dc194ca814ee4..897c6747354b7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_14_nonpart_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_14_nonpart_part.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -19,7 +19,7 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
 	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_15_part_nonpart.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_15_part_nonpart.q
index e233707cc4db2..12013e5ccfc49 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_15_part_nonpart.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_15_part_nonpart.q
@@ -5,9 +5,9 @@ create table exim_department ( dep_id int comment "department id")
 	partitioned by (dep_org string)
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department partition (dep_org="hr");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department partition (dep_org="hr");		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -19,7 +19,7 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
 	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_16_part_noncompat_schema.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_16_part_noncompat_schema.q
index a10788e3e3b6b..d8d2b8008c9ee 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_16_part_noncompat_schema.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_16_part_noncompat_schema.q
@@ -5,9 +5,9 @@ create table exim_department ( dep_id int comment "department id")
 	partitioned by (dep_org string)
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department partition (dep_org="hr");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department partition (dep_org="hr");		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -20,7 +20,7 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
 	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_17_part_spec_underspec.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_17_part_spec_underspec.q
index cc4a56ca34b88..82dcce9455958 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_17_part_spec_underspec.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_17_part_spec_underspec.q
@@ -6,16 +6,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -25,6 +25,6 @@ import table exim_employee partition (emp_country="us") from 'ql/test/data/expor
 describe extended exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_18_part_spec_missing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_18_part_spec_missing.q
index 140e3bb3b1bbc..d92efeb9a70ef 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_18_part_spec_missing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_18_part_spec_missing.q
@@ -6,16 +6,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -25,6 +25,6 @@ import table exim_employee partition (emp_country="us", emp_state="kl") from 'ql
 describe extended exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_19_external_over_existing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_19_external_over_existing.q
index 048befe4d3d43..12d827b9c838c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_19_external_over_existing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_19_external_over_existing.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -17,7 +17,7 @@ create  table exim_department ( dep_id int comment "department id")
 	stored as textfile
 	tblproperties("creator"="krishna");
 import external table exim_department from 'ql/test/data/exports/exim_department';
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 drop table exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_20_managed_location_over_existing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_20_managed_location_over_existing.q
index 89cbb9ecd8086..726dee53955af 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_20_managed_location_over_existing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_20_managed_location_over_existing.q
@@ -4,17 +4,17 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_department/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile
@@ -22,9 +22,9 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import table exim_department from 'ql/test/data/exports/exim_department'
 	location 'ql/test/data/tablestore2/exim_department';
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_21_part_managed_external.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_21_part_managed_external.q
index 0cbfc85258d2c..d187c78202034 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_21_part_managed_external.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_21_part_managed_external.q
@@ -6,16 +6,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -29,7 +29,7 @@ create table exim_employee ( emp_id int comment "employee id")
 	tblproperties("creator"="krishna");
 import external table exim_employee partition (emp_country="us", emp_state="tn") 
 	from 'ql/test/data/exports/exim_employee';
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 drop table exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_22_export_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_22_export_authfail.q
index d9ab0cf0e4e50..b818686f773df 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_22_export_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_22_export_authfail.q
@@ -5,8 +5,8 @@ create table exim_department ( dep_id int) stored as textfile;
 
 set hive.security.authorization.enabled=true;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 
 set hive.security.authorization.enabled=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_23_import_exist_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_23_import_exist_authfail.q
index 2dbd534074fa8..4acefb9f0ae12 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_23_import_exist_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_23_import_exist_authfail.q
@@ -2,9 +2,9 @@ set hive.test.mode=true;
 set hive.test.mode.prefix=;
 
 create table exim_department ( dep_id int) stored as textfile;
-load data local inpath "../data/files/test.dat" into table exim_department;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,5 +18,5 @@ import from 'ql/test/data/exports/exim_department';
 set hive.security.authorization.enabled=false;
 drop table exim_department;
 drop database importer;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_24_import_part_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_24_import_part_authfail.q
index ccbcee3698dae..467014e4679f6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_24_import_part_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_24_import_part_authfail.q
@@ -7,10 +7,10 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -26,6 +26,6 @@ set hive.security.authorization.enabled=true;
 import from 'ql/test/data/exports/exim_employee';
 set hive.security.authorization.enabled=false;
 
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 drop table exim_employee;
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_25_import_nonexist_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_25_import_nonexist_authfail.q
index 50bfe005c4278..595fa7e764952 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_25_import_nonexist_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_25_import_nonexist_authfail.q
@@ -3,9 +3,9 @@ set hive.test.mode.prefix=;
 set hive.test.mode.nosamplelist=exim_department,exim_employee;
 
 create table exim_department ( dep_id int) stored as textfile;
-load data local inpath "../data/files/test.dat" into table exim_department;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -19,5 +19,5 @@ set hive.security.authorization.enabled=false;
 select * from exim_department;
 drop table exim_department;
 drop database importer;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/fetchtask_ioexception.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/fetchtask_ioexception.q
index 9f44f225e955a..82230f782eac3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/fetchtask_ioexception.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/fetchtask_ioexception.q
@@ -2,6 +2,6 @@ CREATE TABLE fetchtask_ioexception (
   KEY STRING,
   VALUE STRING) STORED AS SEQUENCEFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv1_broken.seq' OVERWRITE INTO TABLE fetchtask_ioexception;
+LOAD DATA LOCAL INPATH '../../data/files/kv1_broken.seq' OVERWRITE INTO TABLE fetchtask_ioexception;
 
 SELECT * FROM fetchtask_ioexception;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/file_with_header_footer_negative.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/file_with_header_footer_negative.q
new file mode 100644
index 0000000000000..286cf1afb491b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/file_with_header_footer_negative.q
@@ -0,0 +1,13 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_file_with_header_footer_negative/;
+
+dfs -copyFromLocal ../data/files/header_footer_table_1 hdfs:///tmp/test_file_with_header_footer_negative/header_footer_table_1;
+
+dfs -copyFromLocal ../data/files/header_footer_table_2 hdfs:///tmp/test_file_with_header_footer_negative/header_footer_table_2;
+
+CREATE EXTERNAL TABLE header_footer_table_1 (name string, message string, id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 'hdfs:///tmp/test_file_with_header_footer_negative/header_footer_table_1' tblproperties ("skip.header.line.count"="1", "skip.footer.line.count"="200");
+
+SELECT * FROM header_footer_table_1;
+
+DROP TABLE header_footer_table_1;
+
+dfs -rmr hdfs:///tmp/test_file_with_header_footer_negative;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type.q
index 1cdaffd1f31a3..1ab828c8beae4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type.q
@@ -1,6 +1,6 @@
 -- begin part(string, int) pass(string, string)
 CREATE TABLE tab1 (id1 int,id2 string) PARTITIONED BY(month string,day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='second');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='second');
 
 select * from tab1;
 drop table tab1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type3.q
new file mode 100644
index 0000000000000..49e6a092fc127
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type3.q
@@ -0,0 +1,4 @@
+create table tab1(c int) partitioned by (i int);
+alter table tab1 add partition(i = "some name");
+
+drop table tab1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type4.q
new file mode 100644
index 0000000000000..50f486e6245cf
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type4.q
@@ -0,0 +1,3 @@
+create table tab1(s string) PARTITIONED BY(dt date, st string);
+alter table tab1 add partition (dt=date 'foo', st='foo');
+drop table tab1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_entry_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_entry_limit.q
index 7d003e3e4b440..5bb889c027743 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_entry_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_entry_limit.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 drop index src_index on src;
 
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_size_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_size_limit.q
index d79674539a1b2..c6600e69b6a7c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_size_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_size_limit.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 drop index src_index on src;
 
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into5.q
new file mode 100644
index 0000000000000..c20c168a887c2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into5.q
@@ -0,0 +1,9 @@
+DROP TABLE if exists insert_into5_neg;
+
+CREATE TABLE insert_into5_neg (key int, value string) TBLPROPERTIES ("immutable"="true");
+
+INSERT INTO TABLE insert_into5_neg SELECT * FROM src LIMIT 100;
+
+INSERT INTO TABLE insert_into5_neg SELECT * FROM src LIMIT 100;
+
+DROP TABLE insert_into5_neg;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into6.q
new file mode 100644
index 0000000000000..a92ee5ca94a33
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into6.q
@@ -0,0 +1,17 @@
+DROP TABLE IF EXISTS insert_into6_neg;
+
+CREATE TABLE insert_into6_neg (key int, value string)
+  PARTITIONED BY (ds string) TBLPROPERTIES("immutable"="true") ;
+
+INSERT INTO TABLE insert_into6_neg PARTITION (ds='1')
+  SELECT * FROM src LIMIT 100;
+
+INSERT INTO TABLE insert_into6_neg PARTITION (ds='2')
+  SELECT * FROM src LIMIT 100;
+
+SELECT COUNT(*) from insert_into6_neg;
+
+INSERT INTO TABLE insert_into6_neg PARTITION (ds='1')
+  SELECT * FROM src LIMIT 100;
+
+DROP TABLE insert_into6_neg;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insertover_dynapart_ifnotexists.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insertover_dynapart_ifnotexists.q
index cbf65c4ac69fe..a8f77c28a8251 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insertover_dynapart_ifnotexists.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insertover_dynapart_ifnotexists.q
@@ -4,6 +4,6 @@ create table srcpart_dp like srcpart;
 
 create table destpart_dp like srcpart;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_dp partition(ds='2008-04-08', hr=11);
 
 insert overwrite table destpart_dp partition (ds='2008-04-08', hr) if not exists select key, value, hr from srcpart_dp where ds='2008-04-08';
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_1.q
new file mode 100644
index 0000000000000..ba7d164c77155
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_1.q
@@ -0,0 +1,2 @@
+drop table invalid_char_length_1;
+create table invalid_char_length_1 (c1 char(1000000));
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_2.q
new file mode 100644
index 0000000000000..866b43d31273d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_2.q
@@ -0,0 +1 @@
+select cast(value as char(100000)) from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_3.q
new file mode 100644
index 0000000000000..481b630d20489
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_3.q
@@ -0,0 +1,3 @@
+drop table invalid_char_length_3;
+create table invalid_char_length_3 (c1 char(0));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_columns.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_columns.q
deleted file mode 100644
index 14b3409cb4cc4..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_columns.q
+++ /dev/null
@@ -1,4 +0,0 @@
-ADD JAR ../data/files/TestSerDe.jar;
-CREATE TABLE DELETEJAR(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' 
-STORED AS TEXTFILE
-TBLPROPERTIES('columns'='valid_colname,invalid.colname');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_alt_syntax_comma_on.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_alt_syntax_comma_on.q
new file mode 100644
index 0000000000000..e39a38e2fcd47
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_alt_syntax_comma_on.q
@@ -0,0 +1,3 @@
+explain select *
+from src s1 , 
+src s2 on s1.key = s2.key;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous.q
new file mode 100644
index 0000000000000..c0da913c28812
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous.q
@@ -0,0 +1,6 @@
+
+
+explain select s1.key, s2.key
+from src s1, src s2
+where key = s2.key
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous_vc.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous_vc.q
new file mode 100644
index 0000000000000..8e219637eb0c6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous_vc.q
@@ -0,0 +1,5 @@
+
+explain select s1.key, s2.key
+from src s1, src s2
+where INPUT__FILE__NAME = s2.INPUT__FILE__NAME
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition.q
new file mode 100644
index 0000000000000..d59394544ccf0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition.q
@@ -0,0 +1,7 @@
+set hive.limit.query.max.table.partition=1;
+
+explain select * from srcpart limit 1;
+select * from srcpart limit 1;
+
+explain select * from srcpart;
+select * from srcpart;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition_stats.q
new file mode 100644
index 0000000000000..0afd4a965ab94
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition_stats.q
@@ -0,0 +1,18 @@
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.stats.autogather=true;
+set hive.compute.query.using.stats=true;
+
+create table part (c int) partitioned by (d string);
+insert into table part partition (d)
+select hr,ds from srcpart;
+
+set hive.limit.query.max.table.partition=1;
+
+explain select count(*) from part;
+select count(*) from part;
+
+set hive.compute.query.using.stats=false;
+
+explain select count(*) from part;
+select count(*) from part;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_exist_part_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_exist_part_authfail.q
index f86cd92d9d4c7..eb72d940a5392 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_exist_part_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_exist_part_authfail.q
@@ -1,4 +1,4 @@
 create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored as textfile;
 alter table hive_test_src add partition (pcol1 = 'test_part');
 set hive.security.authorization.enabled=true;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_non_native.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_non_native.q
index 387aaed9a1e50..75a5216e00d82 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_non_native.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_non_native.q
@@ -2,4 +2,4 @@
 CREATE TABLE non_native2(key int, value string) 
 STORED BY 'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler';
 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE non_native2;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE non_native2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_nonpart_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_nonpart_authfail.q
index d807c698777f1..32653631ad6a4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_nonpart_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_nonpart_authfail.q
@@ -1,3 +1,3 @@
 create table hive_test_src ( col1 string ) stored as textfile;
 set hive.security.authorization.enabled=true;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_authfail.q
index c409d5a94a9ce..315988dc0a959 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_authfail.q
@@ -1,3 +1,3 @@
 create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored as textfile;
 set hive.security.authorization.enabled=true;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_nospec.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_nospec.q
index 356c16a664386..81517991b26fa 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_nospec.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_nospec.q
@@ -1,2 +1,2 @@
 create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored as textfile;
-load data local inpath '../data/files/test.dat' into table hive_test_src;
+load data local inpath '../../data/files/test.dat' into table hive_test_src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_stored_as_dirs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_stored_as_dirs.q
index eed5651cbf20b..c56f0d408d4ad 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_stored_as_dirs.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_stored_as_dirs.q
@@ -4,4 +4,4 @@ set hive.mapred.supports.subdirectories=true;
 CREATE TABLE  if not exists stored_as_dirs_multiple (col1 STRING, col2 int, col3 STRING) 
 SKEWED BY (col1, col2) ON (('s1',1), ('s3',3), ('s13',13), ('s78',78))  stored as DIRECTORIES;
 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE stored_as_dirs_multiple;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE stored_as_dirs_multiple;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_view_failure.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_view_failure.q
index 927f02e82bf91..64182eac8362d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_view_failure.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_view_failure.q
@@ -1,3 +1,3 @@
 DROP VIEW xxx11;
 CREATE VIEW xxx11 AS SELECT * FROM src;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE xxx11;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE xxx11;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat.q
index 16feeca22649a..f0c3b59d30ddf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat.q
@@ -3,4 +3,4 @@
 
 
 CREATE TABLE load_wrong_fileformat_T1(name STRING) STORED AS SEQUENCEFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE load_wrong_fileformat_T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE load_wrong_fileformat_T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q
index 7e589fbfde620..4d79bbeb102c7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q
@@ -3,4 +3,4 @@
 
 
 CREATE TABLE T1(name STRING) STORED AS RCFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T1;
\ No newline at end of file
+LOAD DATA LOCAL INPATH '../../data/files/kv1.seq' INTO TABLE T1;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q
index ff5ed4e2e3107..050c819a2f04b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q
@@ -3,4 +3,4 @@
 
 
 CREATE TABLE T1(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T1;
\ No newline at end of file
+LOAD DATA LOCAL INPATH '../../data/files/kv1.seq' INTO TABLE T1;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_noof_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_noof_part.q
index ffb64ed643b14..7f5ad754142ab 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_noof_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_noof_part.q
@@ -1,3 +1,3 @@
 
 CREATE TABLE loadpart1(a STRING, b STRING) PARTITIONED BY (ds STRING,ds1 STRING);
-LOAD DATA LOCAL INPATH '../data1/files/kv1.txt' INTO TABLE loadpart1 PARTITION(ds='2009-05-05');
+LOAD DATA LOCAL INPATH '../../data1/files/kv1.txt' INTO TABLE loadpart1 PARTITION(ds='2009-05-05');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/local_mapred_error_cache.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/local_mapred_error_cache.q
index 8f4b37a9d49f6..ed9e21dd8a1fb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/local_mapred_error_cache.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/local_mapred_error_cache.q
@@ -1,4 +1,4 @@
 set hive.exec.mode.local.auto=true;
 set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.VerifySessionStateLocalErrorsHook;
 
-FROM src SELECT TRANSFORM(key, value) USING 'python ../data/scripts/cat_error.py' AS (key, value);
+FROM src SELECT TRANSFORM(key, value) USING 'python ../../data/scripts/cat_error.py' AS (key, value);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_query_tbl_in_locked_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_query_tbl_in_locked_db.q
new file mode 100644
index 0000000000000..4966f2b9b2825
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_query_tbl_in_locked_db.q
@@ -0,0 +1,17 @@
+create database lockneg1;
+use lockneg1;
+
+create table tstsrcpart like default.srcpart;
+
+insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11')
+select key, value from default.srcpart where ds='2008-04-08' and hr='11';
+
+lock database lockneg1 shared;
+show locks database lockneg1;
+select count(1) from tstsrcpart where ds='2008-04-08' and hr='11';
+
+unlock database lockneg1;
+show locks database lockneg1;
+lock database lockneg1 exclusive;
+show locks database lockneg1;
+select count(1) from tstsrcpart where ds='2008-04-08' and hr='11';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_db_lock_conflict.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_db_lock_conflict.q
new file mode 100644
index 0000000000000..1f9ad90898dce
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_db_lock_conflict.q
@@ -0,0 +1,6 @@
+set hive.lock.numretries=0;
+
+create database lockneg4;
+
+lock database lockneg4 exclusive;
+lock database lockneg4 shared;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_drop_locked_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_drop_locked_db.q
new file mode 100644
index 0000000000000..8cbe31083b400
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_drop_locked_db.q
@@ -0,0 +1,8 @@
+set hive.lock.numretries=0;
+
+create database lockneg9;
+
+lock database lockneg9 shared;
+show locks database lockneg9;
+
+drop database lockneg9;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_lock_db_in_use.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_lock_db_in_use.q
new file mode 100644
index 0000000000000..4127a6f150a13
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_lock_db_in_use.q
@@ -0,0 +1,15 @@
+set hive.lock.numretries=0;
+
+create database lockneg2;
+use lockneg2;
+
+create table tstsrcpart like default.srcpart;
+
+insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11')
+select key, value from default.srcpart where ds='2008-04-08' and hr='11';
+
+lock table tstsrcpart shared;
+show locks;
+
+lock database lockneg2 exclusive;
+show locks;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nested_complex_neg.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nested_complex_neg.q
index ac6c4ee549d85..09f13f52aeadf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nested_complex_neg.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nested_complex_neg.q
@@ -10,6 +10,6 @@ simple_string string)
 
 
 -- This should fail in as extended nesting levels are not enabled using the serdeproperty hive.serialization.extend.nesting.levels
-load data local inpath '../data/files/nested_complex.txt' overwrite into table nestedcomplex;
+load data local inpath '../../data/files/nested_complex.txt' overwrite into table nestedcomplex;
 
 select * from nestedcomplex sort by simple_int;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_insert.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_insert.q
index 4841f9e11c84e..6669bf62d8822 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_insert.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_insert.q
@@ -2,6 +2,6 @@
 CREATE TABLE nopart_insert(a STRING, b STRING) PARTITIONED BY (ds STRING);
 
 INSERT OVERWRITE TABLE nopart_insert 
-SELECT TRANSFORM(src.key, src.value) USING '../data/scripts/error_script' AS (tkey, tvalue)
+SELECT TRANSFORM(src.key, src.value) USING '../../data/scripts/error_script' AS (tkey, tvalue)
 FROM src;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_load.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_load.q
index 6e5ad6eb41a8b..966982fd5ce52 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_load.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_load.q
@@ -1,5 +1,5 @@
 
 CREATE TABLE nopart_load(a STRING, b STRING) PARTITIONED BY (ds STRING);
 
-load data local inpath '../data/files/kv1.txt' overwrite into table nopart_load ;
+load data local inpath '../../data/files/kv1.txt' overwrite into table nopart_load ;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/notable_alias3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/notable_alias3.q
deleted file mode 100644
index 6cc3e87288a89..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/notable_alias3.q
+++ /dev/null
@@ -1,4 +0,0 @@
-CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE;
-
-FROM src
-INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_char.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_char.q
new file mode 100644
index 0000000000000..745a7867264e3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_char.q
@@ -0,0 +1,3 @@
+drop table if exists parquet_char;
+
+create table parquet_char (t char(10)) stored as parquet;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_date.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_date.q
new file mode 100644
index 0000000000000..89d3602fd3e97
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_date.q
@@ -0,0 +1,3 @@
+drop table if exists parquet_date;
+
+create table parquet_date (t date) stored as parquet;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_decimal.q
new file mode 100644
index 0000000000000..8a4973110a51f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_decimal.q
@@ -0,0 +1,3 @@
+drop table if exists parquet_decimal;
+
+create table parquet_decimal (t decimal(4,2)) stored as parquet;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_timestamp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_timestamp.q
new file mode 100644
index 0000000000000..4ef36fa0efc49
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_timestamp.q
@@ -0,0 +1,3 @@
+drop table if exists parquet_timestamp;
+
+create table parquet_timestamp (t timestamp) stored as parquet;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_varchar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_varchar.q
new file mode 100644
index 0000000000000..55825f76dc240
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_varchar.q
@@ -0,0 +1,3 @@
+drop table if exists parquet_varchar;
+
+create table parquet_varchar (t varchar(10)) stored as parquet;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/protectmode_part2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/protectmode_part2.q
index 72b55ea25d8e9..3fdc036996563 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/protectmode_part2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/protectmode_part2.q
@@ -4,6 +4,6 @@ drop table tbl_protectmode6;
 
 create table tbl_protectmode6  (c1 string,c2 string) partitioned by (p string);
 alter table tbl_protectmode6 add partition (p='p1');
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' OVERWRITE INTO TABLE tbl_protectmode6 partition (p='p1');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE tbl_protectmode6 partition (p='p1');
 alter table tbl_protectmode6 partition (p='p1') enable offline; 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' OVERWRITE INTO TABLE tbl_protectmode6 partition (p='p1');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE tbl_protectmode6 partition (p='p1');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AggrFuncsWithNoGBYNoPartDef.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AggrFuncsWithNoGBYNoPartDef.q
index 542367ace22e0..ef372259ed3e3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AggrFuncsWithNoGBYNoPartDef.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AggrFuncsWithNoGBYNoPartDef.q
@@ -12,7 +12,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- testAggrFuncsWithNoGBYNoPartDef
 select p_mfgr, 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AmbiguousWindowDefn.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AmbiguousWindowDefn.q
index 95b35113e3cf0..58430423436b7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AmbiguousWindowDefn.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AmbiguousWindowDefn.q
@@ -12,7 +12,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- testAmbiguousWindowDefn
 select p_mfgr, p_name, p_size, 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_1.q
index 8333ddc948419..a171961a683ee 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_1.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 EXPLAIN
 SELECT `+++` FROM srcpart;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_2.q
index d1aa1f1a9542d..7bac1c775522b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_2.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 EXPLAIN
 SELECT `.a.` FROM srcpart;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_groupby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_groupby.q
index 53971916e6c96..300d145508887 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_groupby.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_groupby.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 EXPLAIN
 SELECT `..`, count(1) FROM srcpart GROUP BY `..`;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_broken_pipe1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_broken_pipe1.q
deleted file mode 100644
index 6b1c09decfb35..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_broken_pipe1.q
+++ /dev/null
@@ -1,3 +0,0 @@
-set hive.exec.script.allow.partial.consumption = false;
--- Tests exception in ScriptOperator.close() by passing to the operator a small amount of data
-SELECT TRANSFORM(*) USING 'true' AS a, b FROM (SELECT TRANSFORM(*) USING 'echo' AS a, b FROM src LIMIT 1) tmp;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_error.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_error.q
index e46aed03b147f..8ca849b82d8ad 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_error.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_error.q
@@ -1,7 +1,7 @@
 EXPLAIN
-SELECT TRANSFORM(src.key, src.value) USING '../data/scripts/error_script' AS (tkey, tvalue)
+SELECT TRANSFORM(src.key, src.value) USING '../../data/scripts/error_script' AS (tkey, tvalue)
 FROM src;
 
-SELECT TRANSFORM(src.key, src.value) USING '../data/scripts/error_script' AS (tkey, tvalue)
+SELECT TRANSFORM(src.key, src.value) USING '../../data/scripts/error_script' AS (tkey, tvalue)
 FROM src;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/serde_regex2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/serde_regex2.q
index a3955744221a4..d523d03e906c1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/serde_regex2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/serde_regex2.q
@@ -16,8 +16,8 @@ WITH SERDEPROPERTIES (
 )
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH "../data/files/apache.access.log" INTO TABLE serde_regex;
-LOAD DATA LOCAL INPATH "../data/files/apache.access.2.log" INTO TABLE serde_regex;
+LOAD DATA LOCAL INPATH "../../data/files/apache.access.log" INTO TABLE serde_regex;
+LOAD DATA LOCAL INPATH "../../data/files/apache.access.2.log" INTO TABLE serde_regex;
 
 -- raise an exception 
 SELECT * FROM serde_regex ORDER BY time;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q
new file mode 100644
index 0000000000000..579e9408b6c35
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q
@@ -0,0 +1,5 @@
+-- should fail: hive.fetch.task.conversion accepts minimal or more
+desc src;
+
+set hive.conf.validation=true;
+set hive.fetch.task.conversion=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_1.q
index 401cc37f67dd0..1b2872d3d7ed8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_1.q
@@ -6,7 +6,7 @@
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.DummyStatsAggregator;
 set hive.test.dummystats.aggregator=connect;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_2.q
index c7e63591adde0..0fa9ff6820371 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_2.q
@@ -5,7 +5,7 @@
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator="";
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_1.q
index 7fa0f55f2a450..be7c4f72feb9b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_1.q
@@ -6,7 +6,7 @@
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.DummyStatsAggregator;
 set hive.test.dummystats.publisher=connect;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_2.q
index f82d4b54b697b..652afe7c5bfba 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_2.q
@@ -5,7 +5,7 @@
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher="";
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.DummyStatsAggregator;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_exists_implicit_gby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_exists_implicit_gby.q
new file mode 100644
index 0000000000000..9013df6f938dc
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_exists_implicit_gby.q
@@ -0,0 +1,10 @@
+
+
+select * 
+from src b 
+where exists 
+  (select count(*) 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9'
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_groupby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_groupby.q
new file mode 100644
index 0000000000000..a9bc6ee6a38cb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_groupby.q
@@ -0,0 +1,5 @@
+
+
+select count(*) 
+from src 
+group by src.key in (select key from src s1 where s1.key > '9')
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_select.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_select.q
new file mode 100644
index 0000000000000..1365389cb269a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_select.q
@@ -0,0 +1,6 @@
+
+
+
+select src.key in (select key from src s1 where s1.key > '9') 
+from src
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_multiple_cols_in_select.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_multiple_cols_in_select.q
new file mode 100644
index 0000000000000..6805c5b16b0f4
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_multiple_cols_in_select.q
@@ -0,0 +1,7 @@
+
+
+explain
+ select * 
+from src 
+where src.key in (select * from src s1 where s1.key > '9')
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_nested_subquery.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_nested_subquery.q
new file mode 100644
index 0000000000000..e8c41e6b17ae7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_nested_subquery.q
@@ -0,0 +1,18 @@
+
+
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+select *
+from part x 
+where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name))
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_notexists_implicit_gby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_notexists_implicit_gby.q
new file mode 100644
index 0000000000000..852b2953ff463
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_notexists_implicit_gby.q
@@ -0,0 +1,10 @@
+
+
+select * 
+from src b 
+where not exists 
+  (select sum(1)
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9'
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_shared_alias.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_shared_alias.q
new file mode 100644
index 0000000000000..d442f077c0707
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_shared_alias.q
@@ -0,0 +1,6 @@
+
+
+select *
+from src
+where src.key in (select key from src where key > '9')
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_subquery_chain.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_subquery_chain.q
new file mode 100644
index 0000000000000..8ea94c5fc6d76
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_subquery_chain.q
@@ -0,0 +1,6 @@
+
+explain
+select * 
+from src 
+where src.key in (select key from src) in (select key from src)
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_unqual_corr_expr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_unqual_corr_expr.q
new file mode 100644
index 0000000000000..99ff9ca703835
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_unqual_corr_expr.q
@@ -0,0 +1,6 @@
+
+
+select *
+from src
+where key in (select key from src)
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_windowing_corr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_windowing_corr.q
new file mode 100644
index 0000000000000..105d3d22d9d2b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_windowing_corr.q
@@ -0,0 +1,26 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+-- corr and windowing 
+select p_mfgr, p_name, p_size 
+from part a 
+where a.p_size in 
+  (select first_value(p_size) over(partition by p_mfgr order by p_size) 
+   from part b 
+   where a.p_brand = b.p_brand)
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_with_or_cond.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_with_or_cond.q
new file mode 100644
index 0000000000000..c2c322178f386
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_with_or_cond.q
@@ -0,0 +1,5 @@
+
+select count(*) 
+from src 
+where src.key in (select key from src s1 where s1.key > '9') or src.value is not null
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong.q
deleted file mode 100644
index 2fb5ff74cc3ce..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT CASE '1'
-        WHEN 1 THEN 2
-        WHEN 3 THEN 4
-        ELSE 5
-       END
-FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong2.q
deleted file mode 100644
index 5772dc1a95c9d..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong2.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT CASE 1
-        WHEN 1 THEN '2'
-        WHEN 3 THEN 4
-        ELSE 5
-       END
-FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong3.q
deleted file mode 100644
index 5aaf0188eb9c6..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong3.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT CASE 1
-        WHEN 1 THEN NULL
-        WHEN 3 THEN '2'
-        ELSE 7
-       END
-FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_invalid.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_invalid.q
new file mode 100644
index 0000000000000..68050fd95cd22
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_invalid.q
@@ -0,0 +1 @@
+select default.nonexistfunc() from src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_local_resource.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_local_resource.q
new file mode 100644
index 0000000000000..bcfa217737e33
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_local_resource.q
@@ -0,0 +1 @@
+create function lookup as 'org.apache.hadoop.hive.ql.udf.UDFFileLookup' using file '../../data/files/sales.txt';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_nonexistent_resource.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_nonexistent_resource.q
new file mode 100644
index 0000000000000..d37665dde69bc
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_nonexistent_resource.q
@@ -0,0 +1 @@
+create function lookup as 'org.apache.hadoop.hive.ql.udf.UDFFileLookup' using file 'nonexistent_file.txt';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_qualified_name.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_qualified_name.q
new file mode 100644
index 0000000000000..476dfa21a237f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_qualified_name.q
@@ -0,0 +1 @@
+create temporary function default.myfunc as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong2.q
deleted file mode 100644
index 79fa65f63da56..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong2.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT CASE
-        WHEN 1=2 THEN '2'
-        WHEN 3=4 THEN 4
-        ELSE 5
-       END
-FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong3.q
deleted file mode 100644
index 8bb5fdd7ea377..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong3.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT CASE
-        WHEN 1=2 THEN '2'
-        WHEN 3=4 THEN '5'
-        ELSE 5.3
-       END
-FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udfnull.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udfnull.q
deleted file mode 100644
index 3c4204f780004..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udfnull.q
+++ /dev/null
@@ -1,6 +0,0 @@
-
-CREATE TEMPORARY FUNCTION example_arraysum AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleArraySum';
-
-SELECT example_arraysum(lint)FROM src_thrift;
-
-DROP TEMPORARY FUNCTION example_arraysum;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/union.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/union.q
deleted file mode 100644
index e3c5c830897ea..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/union.q
+++ /dev/null
@@ -1,4 +0,0 @@
-explain
-select s1.key as key, s1.value as value from src s1
-  UNION  ALL  
-select s2.key as key, s2.value as value from src s2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_invalid_udaf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_invalid_udaf.q
new file mode 100644
index 0000000000000..c5b593e4bb556
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_invalid_udaf.q
@@ -0,0 +1 @@
+select nonexistfunc(key) over () from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_ll_no_neg.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_ll_no_neg.q
new file mode 100644
index 0000000000000..15f8fae292bbb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_ll_no_neg.q
@@ -0,0 +1,26 @@
+DROP TABLE IF EXISTS part;
+
+-- data setup
+CREATE TABLE part(
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+select p_mfgr, p_name, p_size,
+min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name)as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,-1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter1.q
index 5fd19455f93cd..312a0177fafff 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter1.q
@@ -15,7 +15,7 @@ describe extended alter1;
 alter table alter1 set serdeproperties('s1'='10', 's2' ='20');
 describe extended alter1;
 
-add jar ../data/files/TestSerDe.jar;
+add jar ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
 alter table alter1 set serde 'org.apache.hadoop.hive.serde2.TestSerDe' with serdeproperties('s1'='9');
 describe extended alter1;
 
@@ -56,7 +56,7 @@ DESCRIBE EXTENDED alter1;
 ALTER TABLE alter1 SET SERDEPROPERTIES('s1'='10', 's2' ='20');
 DESCRIBE EXTENDED alter1;
 
-add jar ../data/files/TestSerDe.jar;
+add jar ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
 ALTER TABLE alter1 SET SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' WITH SERDEPROPERTIES ('s1'='9');
 DESCRIBE EXTENDED alter1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter3.q
index 3cde00748b98e..91e4e9bad0eda 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter3.q
@@ -1,5 +1,5 @@
 create table alter3_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter3_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter3_src ;
 
 create table alter3 ( col1 string ) partitioned by (pcol1 string , pcol2 string) stored as sequencefile;
 
@@ -34,7 +34,7 @@ USE alter3_db;
 SHOW TABLES;
 
 CREATE TABLE alter3_src (col1 STRING) STORED AS TEXTFILE ;
-LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE alter3_src ;
+LOAD DATA LOCAL INPATH '../../data/files/test.dat' OVERWRITE INTO TABLE alter3_src ;
 
 CREATE TABLE alter3 (col1 STRING) PARTITIONED BY (pcol1 STRING, pcol2 STRING) STORED AS SEQUENCEFILE;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter5.q
index 0d14f228d1c30..66c9f8dc54b91 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter5.q
@@ -3,7 +3,7 @@
 --
 
 create table alter5_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter5_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter5_src ;
 
 create table alter5 ( col1 string ) partitioned by (dt string);
 
@@ -32,7 +32,7 @@ USE alter5_db;
 SHOW TABLES;
 
 create table alter5_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter5_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter5_src ;
 
 create table alter5 ( col1 string ) partitioned by (dt string);
 alter table alter5 add partition (dt='a') location 'parta';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char1.q
new file mode 100644
index 0000000000000..4ecb7e7389f6b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char1.q
@@ -0,0 +1,32 @@
+drop table alter_char_1;
+
+create table alter_char_1 (key string, value string);
+insert overwrite table alter_char_1
+  select key, value from src order by key limit 5;
+
+select * from alter_char_1 order by key;
+
+-- change column to char
+alter table alter_char_1 change column value value char(20);
+-- contents should still look the same
+select * from alter_char_1 order by key;
+
+-- change column to smaller char
+alter table alter_char_1 change column value value char(3);
+-- value column should be truncated now
+select * from alter_char_1 order by key;
+
+-- change back to bigger char
+alter table alter_char_1 change column value value char(20);
+-- column values should be full size again
+select * from alter_char_1 order by key;
+
+-- add char column
+alter table alter_char_1 add columns (key2 int, value2 char(10));
+select * from alter_char_1 order by key;
+
+insert overwrite table alter_char_1
+  select key, value, key, value from src order by key limit 5;
+select * from alter_char_1 order by key;
+
+drop table alter_char_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char2.q
new file mode 100644
index 0000000000000..7fa9fcef11a14
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char2.q
@@ -0,0 +1,22 @@
+
+-- alter column type, with partitioned table
+drop table if exists alter_char2;
+
+create table alter_char2 (
+  c1 char(255)
+) partitioned by (hr int);
+
+insert overwrite table alter_char2 partition (hr=1)
+  select value from src limit 1;
+
+select c1, length(c1) from alter_char2;
+
+alter table alter_char2 change column c1 c1 char(10);
+
+select hr, c1, length(c1) from alter_char2 where hr = 1;
+
+insert overwrite table alter_char2 partition (hr=2)
+  select key from src limit 1;
+
+select hr, c1, length(c1) from alter_char2 where hr = 1;
+select hr, c1, length(c1) from alter_char2 where hr = 2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_concatenate_indexed_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_concatenate_indexed_table.q
index 807ef539c8643..e1c3780a9b0ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_concatenate_indexed_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_concatenate_indexed_table.q
@@ -1,9 +1,9 @@
 set hive.exec.concatenate.check.index =false;
 create table src_rc_concatenate_test(key int, value string) stored as rcfile;
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_concatenate_test;
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_concatenate_test;
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_concatenate_test;
 
 show table extended like `src_rc_concatenate_test`;
 
@@ -26,9 +26,9 @@ create table src_rc_concatenate_test_part(key int, value string) partitioned by
 
 alter table src_rc_concatenate_test_part add partition (ds='2011');
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_concatenate_test_part partition (ds='2011');
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_concatenate_test_part partition (ds='2011');
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_concatenate_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_concatenate_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_concatenate_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_concatenate_test_part partition (ds='2011');
 
 show table extended like `src_rc_concatenate_test_part` partition (ds='2011');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_db_owner.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_db_owner.q
new file mode 100644
index 0000000000000..b224f3339e4ab
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_db_owner.q
@@ -0,0 +1,9 @@
+
+create database db_alter_onr;
+describe database db_alter_onr;
+
+alter database db_alter_onr set owner user user1;
+describe database db_alter_onr;
+
+alter database db_alter_onr set owner role role1;
+describe database db_alter_onr;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge.q
index c3502739d54a9..ceabd0830a03d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge.q
@@ -1,8 +1,8 @@
 create table src_rc_merge_test(key int, value string) stored as rcfile;
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test;
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_merge_test;
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_merge_test;
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test;
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_merge_test;
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_merge_test;
 
 show table extended like `src_rc_merge_test`;
 
@@ -21,9 +21,9 @@ create table src_rc_merge_test_part(key int, value string) partitioned by (ds st
 
 alter table src_rc_merge_test_part add partition (ds='2011');
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test_part partition (ds='2011');
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_merge_test_part partition (ds='2011');
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_merge_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_merge_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_merge_test_part partition (ds='2011');
 
 show table extended like `src_rc_merge_test_part` partition (ds='2011');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_2.q
index 65ddfed13dac4..e09703d1c6c85 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_2.q
@@ -3,9 +3,9 @@ create table src_rc_merge_test_part(key int, value string) partitioned by (ds st
 alter table src_rc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
 desc extended src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
 
 select count(1) from src_rc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31';
 select sum(hash(key)), sum(hash(value)) from src_rc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_stats.q
index 23bae55ab2add..0af87e2c076e6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_stats.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_stats.q
@@ -1,8 +1,8 @@
 create table src_rc_merge_test_stat(key int, value string) stored as rcfile;
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test_stat;
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_merge_test_stat;
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_merge_test_stat;
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test_stat;
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_merge_test_stat;
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_merge_test_stat;
 
 show table extended like `src_rc_merge_test_stat`;
 desc extended src_rc_merge_test_stat;
@@ -21,9 +21,9 @@ create table src_rc_merge_test_part_stat(key int, value string) partitioned by (
 
 alter table src_rc_merge_test_part_stat add partition (ds='2011');
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
 
 show table extended like `src_rc_merge_test_part_stat` partition (ds='2011');
 desc extended src_rc_merge_test_part_stat;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q
index b6d1eb8f2d4b8..5dda4c08fd5b3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q
@@ -1,4 +1,4 @@
-
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S)
 create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets;
 
 alter table tst1 clustered by (key) into 8 buckets;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q
index c6a4ad24fcdde..acc028bdd81eb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q
@@ -1,3 +1,4 @@
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S)
 -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata
 -- the partition metadata is updated as well.
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q
new file mode 100644
index 0000000000000..d81430441c720
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q
@@ -0,0 +1,85 @@
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S)
+-- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata
+-- the partition metadata is updated as well.
+
+CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING);
+
+DESCRIBE FORMATTED tst1;
+
+SET hive.enforce.bucketing=true;
+SET hive.enforce.sorting=true;
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test an unbucketed partition gets converted to bucketed
+ALTER TABLE tst1 CLUSTERED BY (key) INTO 8 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test an unsorted partition gets converted to sorted
+ALTER TABLE tst1 CLUSTERED BY (key) SORTED BY (key DESC) INTO 8 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test changing the bucket columns
+ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 8 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test changing the number of buckets
+ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 4 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test changing the sort columns
+ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value DESC) INTO 4 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test changing the sort order
+ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value ASC) INTO 4 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test a sorted partition gets converted to unsorted
+ALTER TABLE tst1 CLUSTERED BY (value) INTO 4 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test a bucketed partition gets converted to unbucketed
+ALTER TABLE tst1 NOT CLUSTERED;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q
new file mode 100644
index 0000000000000..a03992510b072
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q
@@ -0,0 +1,59 @@
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S)
+create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets;
+
+alter table tst1 clustered by (key) into 8 buckets;
+
+describe formatted tst1;
+
+set hive.enforce.bucketing=true;
+insert overwrite table tst1 partition (ds='1') select key, value from src;
+
+describe formatted tst1 partition (ds = '1');
+
+-- Test changing bucket number
+
+alter table tst1 clustered by (key) into 12 buckets;
+
+insert overwrite table tst1 partition (ds='1') select key, value from src;
+
+describe formatted tst1 partition (ds = '1');
+
+describe formatted tst1;
+
+-- Test changing bucket number of (table/partition)
+
+alter table tst1 into 4 buckets;
+
+describe formatted tst1;
+
+describe formatted tst1 partition (ds = '1');
+
+alter table tst1 partition (ds = '1') into 6 buckets;
+
+describe formatted tst1;
+
+describe formatted tst1 partition (ds = '1');
+
+-- Test adding sort order
+
+alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets;
+
+describe formatted tst1;
+
+-- Test changing sort order
+
+alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets;
+
+describe formatted tst1;
+
+-- Test removing test order
+
+alter table tst1 clustered by (value) into 12 buckets;
+
+describe formatted tst1;
+
+-- Test removing buckets
+
+alter table tst1 not clustered;
+
+describe formatted tst1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_coltype.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_coltype.q
index 5479afbbd5ab0..19c0f9d1d88ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_coltype.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_coltype.q
@@ -10,48 +10,48 @@ desc alter_coltype;
 select count(*) from alter_coltype where dt = '100x';
 
 -- alter partition key column data type for dt column.
-alter table alter_coltype partition column (dt int);
+-- alter table alter_coltype partition column (dt int);
 
 -- load a new partition using new data type.
-insert overwrite table alter_coltype partition(dt=10, ts='3.0') select * from src1;
+-- insert overwrite table alter_coltype partition(dt=10, ts='3.0') select * from src1;
 
 -- make sure the partition predicate still works. 
-select count(*) from alter_coltype where dt = '100x';
-explain extended select count(*) from alter_coltype where dt = '100x';
+-- select count(*) from alter_coltype where dt = '100x';
+-- explain extended select count(*) from alter_coltype where dt = '100x';
 
-select count(*) from alter_coltype where dt = 100;
+-- select count(*) from alter_coltype where dt = '100';
 
 -- alter partition key column data type for ts column.
-alter table alter_coltype partition column (ts double);
+-- alter table alter_coltype partition column (ts double);
 
-alter table alter_coltype partition column (dt string);
+-- alter table alter_coltype partition column (dt string);
 
 -- load a new partition using new data type.
-insert overwrite table alter_coltype partition(dt='100x', ts=3.0) select * from src1;
+-- insert overwrite table alter_coltype partition(dt='100x', ts=3.0) select * from src1;
 
 --  validate partition key column predicate can still work.
-select count(*) from alter_coltype where ts = '6:30pm';
-explain extended select count(*) from alter_coltype where ts = '6:30pm';
+-- select count(*) from alter_coltype where ts = '6:30pm';
+-- explain extended select count(*) from alter_coltype where ts = '6:30pm';
 
 --  validate partition key column predicate on two different partition column data type 
 --  can still work.
-select count(*) from alter_coltype where ts = 3.0 and dt=10;
-explain extended select count(*) from alter_coltype where ts = 3.0 and dt=10;
+-- select count(*) from alter_coltype where ts = 3.0 and dt=10;
+-- explain extended select count(*) from alter_coltype where ts = 3.0 and dt=10;
 
 -- query where multiple partition values (of different datatypes) are being selected 
-select key, value, dt, ts from alter_coltype where dt is not null;
-explain extended select key, value, dt, ts from alter_coltype where dt is not null;
+-- select key, value, dt, ts from alter_coltype where dt is not null;
+-- explain extended select key, value, dt, ts from alter_coltype where dt is not null;
 
-select count(*) from alter_coltype where ts = 3.0;
+-- select count(*) from alter_coltype where ts = 3.0;
 
 -- make sure the partition predicate still works. 
-select count(*) from alter_coltype where dt = '100x' or dt = '10';
-explain extended select count(*) from alter_coltype where dt = '100x' or dt = '10';
+-- select count(*) from alter_coltype where dt = '100x' or dt = '10';
+-- explain extended select count(*) from alter_coltype where dt = '100x' or dt = '10';
 
-desc alter_coltype;
-desc alter_coltype partition (dt='100x', ts='6:30pm');
-desc alter_coltype partition (dt='100x', ts=3.0);
-desc alter_coltype partition (dt=10, ts=3.0);
+-- desc alter_coltype;
+-- desc alter_coltype partition (dt='100x', ts='6:30pm');
+-- desc alter_coltype partition (dt='100x', ts=3.0);
+-- desc alter_coltype partition (dt=10, ts=3.0);
 
 drop table alter_coltype;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_protect_mode.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_protect_mode.q
index 7bcb9f071c415..7a1f3dd51d617 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_protect_mode.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_protect_mode.q
@@ -2,10 +2,10 @@
 create table if not exists alter_part_protect_mode(key string, value string ) partitioned by (year string, month string) stored as textfile ;
 
 -- Load data
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='10');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='12');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1995', month='09');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1994', month='07');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='10');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='12');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1995', month='09');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1994', month='07');
 
 -- offline
 alter table alter_part_protect_mode partition (year='1996') disable offline;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_rename_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_rename_partition.q
index d498cd52a5ffb..8ebbe98824dec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_rename_partition.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_rename_partition.q
@@ -4,7 +4,7 @@ DROP TABLE alter_rename_partition;
 SHOW TABLES;
 
 create table alter_rename_partition_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter_rename_partition_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter_rename_partition_src ;
 
 create table alter_rename_partition ( col1 string ) partitioned by (pcol1 string , pcol2 string) stored as sequencefile;
 
@@ -28,7 +28,7 @@ USE alter_rename_partition_db;
 SHOW TABLES;
 
 CREATE TABLE alter_rename_partition_src (col1 STRING) STORED AS TEXTFILE ;
-LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE alter_rename_partition_src ;
+LOAD DATA LOCAL INPATH '../../data/files/test.dat' OVERWRITE INTO TABLE alter_rename_partition_src ;
 
 CREATE TABLE alter_rename_partition (col1 STRING) PARTITIONED BY (pcol1 STRING, pcol2 STRING) STORED AS SEQUENCEFILE;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_varchar2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_varchar2.q
index 5a481e7f8ebfe..b870108bddd2b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_varchar2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_varchar2.q
@@ -7,7 +7,7 @@ create table alter_varchar2 (
 ) partitioned by (hr int);
 
 insert overwrite table alter_varchar2 partition (hr=1)
-  select value from src limit 1;
+  select value from src tablesample (1 rows);
 
 select c1, length(c1) from alter_varchar2;
 
@@ -16,7 +16,9 @@ alter table alter_varchar2 change column c1 c1 varchar(10);
 select hr, c1, length(c1) from alter_varchar2 where hr = 1;
 
 insert overwrite table alter_varchar2 partition (hr=2)
-  select key from src limit 1;
+  select key from src tablesample (1 rows);
+
+set hive.fetch.task.conversion=more;
 
 select hr, c1, length(c1) from alter_varchar2 where hr = 1;
 select hr, c1, length(c1) from alter_varchar2 where hr = 2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ambiguous_col.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ambiguous_col.q
index e7053c1c8eb55..5ccd2c8c62dcf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ambiguous_col.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ambiguous_col.q
@@ -1,3 +1,4 @@
+set hive.support.quoted.identifiers=none;
 -- TOK_ALLCOLREF
 explain select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t;
 -- DOT
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_filter.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_filter.q
new file mode 100644
index 0000000000000..ec973e15969d4
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_filter.q
@@ -0,0 +1,76 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging;
+
+insert overwrite table loc_orc select * from loc_staging;
+
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc;
+
+-- column stats are not COMPLETE, so stats are not updated
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc where state='OH';
+
+analyze table loc_orc compute statistics for columns state,locid,zip,year;
+
+-- state column has 5 distincts. numRows/countDistincts
+-- numRows: 1 rawDataSize: 102
+explain extended select * from loc_orc where state='OH';
+
+-- not equals comparison shouldn't affect number of rows
+-- numRows: 8 rawDataSize: 804
+explain extended select * from loc_orc where state!='OH';
+explain extended select * from loc_orc where state<>'OH';
+
+-- nulls are treated as constant equality comparison
+-- numRows: 1 rawDataSize: 102
+explain extended select * from loc_orc where zip is null;
+-- numRows: 1 rawDataSize: 102
+explain extended select * from loc_orc where !(zip is not null);
+
+-- not nulls are treated as inverse of nulls
+-- numRows: 7 rawDataSize: 702
+explain extended select * from loc_orc where zip is not null;
+-- numRows: 7 rawDataSize: 702
+explain extended select * from loc_orc where !(zip is null);
+
+-- NOT evaluation. true will pass all rows, false will not pass any rows
+-- numRows: 8 rawDataSize: 804
+explain extended select * from loc_orc where !false;
+-- numRows: 0 rawDataSize: 0
+explain extended select * from loc_orc where !true;
+
+-- OR evaluation. 1 row for OH and 1 row for CA
+-- numRows: 2 rawDataSize: 204
+explain extended select * from loc_orc where state='OH' or state='CA';
+
+-- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2
+-- numRows: 2 rawDataSize: 204
+explain extended select * from loc_orc where year=2001 and year is null;
+-- numRows: 1 rawDataSize: 102
+explain extended select * from loc_orc where year=2001 and state='OH' and state='FL';
+
+-- AND and OR together. left expr will yield 1 row and right will yield 1 row
+-- numRows: 3 rawDataSize: 306
+explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA');
+
+-- AND and OR together. left expr will yield 8 rows and right will yield 1 row
+-- numRows: 1 rawDataSize: 102
+explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA');
+
+-- all inequality conditions rows/3 is the rules
+-- numRows: 2 rawDataSize: 204
+explain extended select * from loc_orc where locid < 30;
+explain extended select * from loc_orc where locid > 30;
+explain extended select * from loc_orc where locid <= 30;
+explain extended select * from loc_orc where locid >= 30;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_groupby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
new file mode 100644
index 0000000000000..05cb036b466a2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
@@ -0,0 +1,69 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging;
+
+insert overwrite table loc_orc select * from loc_staging;
+
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc;
+
+-- partial column stats
+analyze table loc_orc compute statistics for columns state;
+
+-- inner group by: map - numRows: 8 reduce - numRows: 4
+-- outer group by: map - numRows: 4 reduce numRows: 2
+explain extended select a, c, min(b)
+from ( select state as a, locid as b, count(*) as c
+       from loc_orc
+       group by state,locid
+     ) sq1
+group by a,c;
+
+analyze table loc_orc compute statistics for columns state,locid,zip,year;
+
+-- only one distinct value in year column + 1 NULL value
+-- map-side GBY: numRows: 8 (map-side will not do any reduction)
+-- reduce-side GBY: numRows: 2
+explain extended select year from loc_orc group by year;
+
+-- map-side GBY: numRows: 8
+-- reduce-side GBY: numRows: 4
+explain extended select state,locid from loc_orc group by state,locid;
+
+-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain extended select state,locid from loc_orc group by state,locid with cube;
+
+-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain extended select state,locid from loc_orc group by state,locid with rollup;
+
+-- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+explain extended select state,locid from loc_orc group by state,locid grouping sets((state));
+
+-- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
+
+-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
+
+-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
+
+set hive.stats.map.parallelism=10;
+
+-- map-side GBY: numRows: 80 (map-side will not do any reduction)
+-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+explain extended select year from loc_orc group by year;
+
+-- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+explain extended select state,locid from loc_orc group by state,locid with cube;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_join.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_join.q
new file mode 100644
index 0000000000000..965b0b7ed0a3e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_join.q
@@ -0,0 +1,81 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists emp_staging (
+  lastname string,
+  deptid int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table if not exists dept_staging (
+  deptid int,
+  deptname string
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table if not exists emp_orc like emp_staging;
+alter table emp_orc set fileformat orc;
+
+create table if not exists dept_orc like dept_staging;
+alter table dept_orc set fileformat orc;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
+LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging;
+LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging;
+
+insert overwrite table emp_orc select * from emp_staging;
+insert overwrite table dept_orc select * from dept_staging;
+insert overwrite table loc_orc select * from loc_staging;
+
+analyze table emp_orc compute statistics for columns lastname,deptid;
+analyze table dept_orc compute statistics for columns deptname,deptid;
+analyze table loc_orc compute statistics for columns state,locid,zip,year;
+
+-- number of rows
+-- emp_orc  - 6
+-- dept_orc - 4
+-- loc_orc  - 8
+
+-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows)
+-- emp_orc.deptid - 3
+-- emp_orc.lastname - 7
+-- dept_orc.deptid - 6
+-- dept_orc.deptname - 5
+-- loc_orc.locid - 6
+-- loc_orc.state - 7
+
+-- Expected output rows: 4
+-- Reason: #rows = (6*4)/max(3,6)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid);
+
+-- 3 way join
+-- Expected output rows: 4
+-- Reason: #rows = (6*4*6)/max(3,6)*max(6,3)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid);
+
+-- Expected output rows: 5
+-- Reason: #rows = (6*4*8)/max(3,6)*max(6,6)
+explain extended select * from emp_orc e join dept_orc d  on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid);
+
+-- join keys of different types
+-- Expected output rows: 4
+-- Reason: #rows = (6*4*8)/max(3,6)*max(6,7)
+explain extended select * from emp_orc e join dept_orc d  on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state);
+
+-- multi-attribute join
+-- Expected output rows: 0
+-- Reason: #rows = (6*4)/max(3,6)*max(7,5)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname);
+
+-- 3 way and multi-attribute join
+-- Expected output rows: 0
+-- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state);
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_limit.q
new file mode 100644
index 0000000000000..0a9f880b5f31e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_limit.q
@@ -0,0 +1,30 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging;
+
+insert overwrite table loc_orc select * from loc_staging;
+
+analyze table loc_orc compute statistics for columns state, locid, zip, year;
+
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc;
+
+-- numRows: 4 rawDataSize: 396
+explain extended select * from loc_orc limit 4;
+
+-- greater than the available number of rows
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc limit 16;
+
+-- numRows: 0 rawDataSize: 0
+explain extended select * from loc_orc limit 0;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_part.q
new file mode 100644
index 0000000000000..839c7d84962fc
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_part.q
@@ -0,0 +1,85 @@
+set hive.stats.fetch.column.stats=true;
+set hive.stats.autogather=false;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year string
+) row format delimited fields terminated by '|' stored as textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging;
+
+create table if not exists loc_orc (
+  state string,
+  locid int,
+  zip bigint
+) partitioned by(year string) stored as orc;
+
+-- basicStatState: NONE colStatState: NONE
+explain extended select * from loc_orc;
+
+insert overwrite table loc_orc partition(year) select * from loc_staging;
+
+-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
+
+-- basicStatState: PARTIAL colStatState: NONE
+explain extended select * from loc_orc;
+
+-- partition level analyze statistics for specific parition
+analyze table loc_orc partition(year='2001') compute statistics;
+
+-- basicStatState: PARTIAL colStatState: NONE
+explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+
+-- basicStatState: PARTIAL colStatState: NONE
+explain extended select * from loc_orc;
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select * from loc_orc where year='2001';
+
+-- partition level analyze statistics for all partitions
+analyze table loc_orc partition(year) compute statistics;
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select * from loc_orc;
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__';
+
+-- both partitions will be pruned
+-- basicStatState: NONE colStatState: NONE
+explain extended select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__';
+
+-- partition level partial column statistics
+analyze table loc_orc partition(year='2001') compute statistics for columns state,locid;
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select zip from loc_orc;
+
+-- basicStatState: COMPLETE colStatState: PARTIAL
+explain extended select state from loc_orc;
+
+-- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL
+-- basicStatState: COMPLETE colStatState: PARTIAL
+explain extended select state,locid from loc_orc;
+
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select state,locid from loc_orc where year='2001';
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select state,locid from loc_orc where year!='2001';
+
+-- basicStatState: COMPLETE colStatState: PARTIAL
+explain extended select * from loc_orc;
+
+-- This is to test filter expression evaluation on partition column
+-- numRows: 2 dataSize: 8 basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select locid from loc_orc where locid>0 and year='2001';
+explain extended select locid,year from loc_orc where locid>0 and year='2001';
+explain extended select * from (select locid,year from loc_orc) test where locid>0 and year='2001';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_select.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_select.q
new file mode 100644
index 0000000000000..5fc3f64b90180
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_select.q
@@ -0,0 +1,143 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists alltypes (
+ bo1 boolean,
+ ti1 tinyint,
+ si1 smallint,
+ i1 int,
+ bi1 bigint,
+ f1 float,
+ d1 double,
+ de1 decimal,
+ ts1 timestamp,
+ da1 timestamp,
+ s1 string,
+ vc1 varchar(5),
+ m1 map<string, string>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:string>
+) row format delimited fields terminated by '|'
+collection items terminated by ','
+map keys terminated by ':' stored as textfile;
+
+create table alltypes_orc like alltypes;
+alter table alltypes_orc set fileformat orc;
+
+load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes;
+
+insert overwrite table alltypes_orc select * from alltypes;
+
+-- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514
+explain extended select * from alltypes_orc;
+
+-- statistics for complex types are not supported yet
+analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1;
+
+-- numRows: 2 rawDataSize: 1514
+explain extended select * from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 8
+explain extended select bo1 from alltypes_orc;
+
+-- col alias renaming
+-- numRows: 2 rawDataSize: 8
+explain extended select i1 as int1 from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 174
+explain extended select s1 from alltypes_orc;
+
+-- column statistics for complex types unsupported and so statistics will not be updated
+-- numRows: 2 rawDataSize: 1514
+explain extended select m1 from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 246
+explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 0
+explain extended select null from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 8
+explain extended select 11 from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 16
+explain extended select 11L from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 16
+explain extended select 11.0 from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 178
+explain extended select "hello" from alltypes_orc;
+explain extended select cast("hello" as char(5)) from alltypes_orc;
+explain extended select cast("hello" as varchar(5)) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 96
+explain extended select unbase64("0xe23") from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 16
+explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 80
+explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 112
+explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 224
+explain extended select cast("58.174" as DECIMAL) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 112
+explain extended select array(1,2,3) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 1508
+explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 112
+explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 250
+explain extended select CREATE_UNION(0, "hello") from alltypes_orc;
+
+-- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
+-- numRows: 1 rawDataSize: 8
+explain extended select count(*) from alltypes_orc;
+
+-- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
+-- numRows: 1 rawDataSize: 8
+explain extended select count(1) from alltypes_orc;
+
+-- column statistics for complex column types will be missing. data size will be calculated from available column statistics
+-- numRows: 2 rawDataSize: 254
+explain extended select *,11 from alltypes_orc;
+
+-- subquery selects
+-- inner select - numRows: 2 rawDataSize: 8
+-- outer select - numRows: 2 rawDataSize: 8
+explain extended select i1 from (select i1 from alltypes_orc limit 10) temp;
+
+-- inner select - numRows: 2 rawDataSize: 16
+-- outer select - numRows: 2 rawDataSize: 8
+explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp;
+
+-- inner select - numRows: 2 rawDataSize: 16
+-- outer select - numRows: 2 rawDataSize: 186
+explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp;
+
+-- inner select - numRows: 2 rawDataSize: 24
+-- outer select - numRows: 2 rawDataSize: 16
+explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp;
+
+-- inner select - numRows: 2 rawDataSize: 104
+-- outer select - numRows: 2 rawDataSize: 186
+explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp;
+
+-- inner select -  numRows: 2 rawDataSize: 186
+-- middle select - numRows: 2 rawDataSize: 178
+-- outer select -  numRows: 2 rawDataSize: 194
+explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2;
+
+-- This test is for FILTER operator where filter expression is a boolean column
+-- numRows: 2 rawDataSize: 8
+explain extended select bo1 from alltypes_orc where bo1;
+
+-- numRows: 0 rawDataSize: 0
+explain extended select bo1 from alltypes_orc where !bo1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_table.q
new file mode 100644
index 0000000000000..4140fe610d7cd
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_table.q
@@ -0,0 +1,53 @@
+set hive.stats.fetch.column.stats=true;
+set hive.stats.autogather=false;
+
+create table if not exists emp_staging (
+  lastname string,
+  deptid int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table if not exists emp_orc like emp_staging;
+alter table emp_orc set fileformat orc;
+
+-- basicStatState: NONE colStatState: NONE
+explain extended select * from emp_orc;
+
+LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
+
+insert overwrite table emp_orc select * from emp_staging;
+
+-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
+
+-- basicStatState: PARTIAL colStatState: NONE
+explain extended select * from emp_orc;
+
+-- table level analyze statistics
+analyze table emp_orc compute statistics;
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select * from emp_orc;
+
+-- column level partial statistics
+analyze table emp_orc compute statistics for columns deptid;
+
+-- basicStatState: COMPLETE colStatState: PARTIAL
+explain extended select * from emp_orc;
+
+-- all selected columns have statistics
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select deptid from emp_orc;
+
+-- column level complete statistics
+analyze table emp_orc compute statistics for columns lastname,deptid;
+
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select * from emp_orc;
+
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select lastname from emp_orc;
+
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select deptid from emp_orc;
+
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select lastname,deptid from emp_orc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_union.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_union.q
new file mode 100644
index 0000000000000..586d9e1e2c023
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_union.q
@@ -0,0 +1,55 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging;
+
+insert overwrite table loc_orc select * from loc_staging;
+
+analyze table loc_orc compute statistics for columns state,locid,zip,year;
+
+-- numRows: 8 rawDataSize: 688
+explain extended select state from loc_orc;
+
+-- numRows: 16 rawDataSize: 1376
+explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp;
+
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc;
+
+-- numRows: 16 rawDataSize: 1592
+explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp;
+
+create database test;
+use test;
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging;
+
+insert overwrite table loc_orc select * from loc_staging;
+
+analyze table loc_staging compute statistics;
+analyze table loc_staging compute statistics for columns state,locid,zip,year;
+analyze table loc_orc compute statistics for columns state,locid,zip,year;
+
+-- numRows: 16 rawDataSize: 1376
+explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp;
+
+-- numRows: 16 rawDataSize: 1376
+explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ansi_sql_arithmetic.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ansi_sql_arithmetic.q
new file mode 100644
index 0000000000000..3788301ebb3d2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ansi_sql_arithmetic.q
@@ -0,0 +1,13 @@
+
+set hive.compat=latest;
+
+-- With ansi sql arithmetic enabled, int / int => exact numeric type
+explain select cast(key as int) / cast(key as int) from src limit 1;
+select cast(key as int) / cast(key as int) from src limit 1;
+
+
+set hive.compat=0.12;
+
+-- With ansi sql arithmetic disabled, int / int => double
+explain select cast(key as int) / cast(key as int) from src limit 1;
+select cast(key as int) / cast(key as int) from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_corrupt.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_corrupt.q
index b83eab5d38137..cc9801d8871b8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_corrupt.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_corrupt.q
@@ -14,7 +14,7 @@ create table tstsrcpart like srcpart;
 -- to be thrown during the LOAD step. This behavior is now tested in
 -- clientnegative/archive_corrupt.q
 
-load data local inpath '../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11');
+load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11');
 
 insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12')
 select key, value from srcpart where ds='2008-04-08' and hr='12';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q
index ddc06a99bb5bc..50c0faa5e4108 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q
@@ -1,7 +1,7 @@
 set hive.archive.enabled = true;
 set hive.enforce.bucketing = true;
 
--- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20)
 
 drop table tstsrc;
 drop table tstsrcpart;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_1_sql_std.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_1_sql_std.q
new file mode 100644
index 0000000000000..79ae17ad5da2c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_1_sql_std.q
@@ -0,0 +1,36 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+
+create table src_autho_test (key STRING, value STRING) ;
+
+set hive.security.authorization.enabled=true;
+set  role ADMIN; 
+--table grant to user
+
+grant select on table src_autho_test to user user_sauth;
+
+show grant user user_sauth on table src_autho_test;
+
+
+revoke select on table src_autho_test from user user_sauth;
+show grant user user_sauth on table src_autho_test;
+
+--role
+create role src_role;
+grant role src_role to user user_sauth;
+show role grant user user_sauth;
+
+--table grant to role
+
+grant select on table src_autho_test to role src_role;
+
+show grant role src_role on table src_autho_test;
+revoke select on table src_autho_test from role src_role;
+
+-- drop role
+drop role src_role;
+
+set hive.security.authorization.enabled=false;
+drop table src_autho_test;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_2.q
index 4fc79b9ed294a..3353c534e1793 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_2.q
@@ -6,7 +6,7 @@ ALTER TABLE authorization_part SET TBLPROPERTIES ("PARTITION_LEVEL_PRIVILEGE"="T
 set hive.security.authorization.enabled=true;
 
 -- column grant to user
-grant Create on table authorization_part to user hive_test_user;
+grant Create on  authorization_part to user hive_test_user;
 grant Update on table authorization_part to user hive_test_user;
 grant Drop on table authorization_part to user hive_test_user;
 grant select on table src_auth_tmp to user hive_test_user;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_9.q
new file mode 100644
index 0000000000000..1abe659fa4476
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_9.q
@@ -0,0 +1,17 @@
+-- SORT_BEFORE_DIFF
+
+create table dummy (key string, value string);
+
+grant select on database default to user hive_test_user;
+grant select on table dummy to user hive_test_user;
+grant select (key, value) on table dummy to user hive_test_user;
+
+show grant user hive_test_user on database default;
+show grant user hive_test_user on table dummy;
+show grant user hive_test_user on all;
+
+grant select on database default to user hive_test_user2;
+grant select on table dummy to user hive_test_user2;
+grant select (key, value) on table dummy to user hive_test_user2;
+
+show grant on all;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_admin_almighty1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_admin_almighty1.q
new file mode 100644
index 0000000000000..45c4a7dc85b72
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_admin_almighty1.q
@@ -0,0 +1,17 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_test_user;
+
+-- actions from admin should work as if admin has all privileges
+
+create table t1(i int);
+set user.name=hive_admin_user;
+
+show current roles;
+set role ADMIN;
+show current roles;
+select * from t1;
+grant all on table t1 to user user1;
+show grant user user1 on table t1;
+drop table t1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_func1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_func1.q
new file mode 100644
index 0000000000000..65a7b339c2b76
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_func1.q
@@ -0,0 +1,14 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_admin_user;
+
+-- admin required for create function
+set role ADMIN;
+
+create temporary function temp_fn as 'org.apache.hadoop.hive.ql.udf.UDFAscii';
+create function perm_fn as 'org.apache.hadoop.hive.ql.udf.UDFAscii';
+
+drop temporary function temp_fn;
+drop function perm_fn;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_macro1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_macro1.q
new file mode 100644
index 0000000000000..fb60500b899ec
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_macro1.q
@@ -0,0 +1,12 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_admin_user;
+
+-- admin required for create macro
+set role ADMIN;
+
+create temporary macro mymacro1(x double) x * x;
+
+drop temporary macro mymacro1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_table_owner_privs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_table_owner_privs.q
new file mode 100644
index 0000000000000..17f4861cd20f8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_table_owner_privs.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+
+create table create_table_creator_priv_test(i int);
+
+-- all privileges should have been set for user
+
+show grant user user1 on table create_table_creator_priv_test;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_public_role.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_public_role.q
new file mode 100644
index 0000000000000..8473178cd6607
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_public_role.q
@@ -0,0 +1,18 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE  t_gpr1(i int);
+
+-- all privileges should have been set for user
+
+GRANT ALL ON t_gpr1 TO ROLE public;
+
+SHOW GRANT ON TABLE t_gpr1;
+
+set user.name=user2;
+SHOW CURRENT ROLES;
+-- user2 should be able to do a describe table, as pubic is in the current roles
+DESC t_gpr1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_table_priv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_table_priv.q
new file mode 100644
index 0000000000000..02d364edb488d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_table_priv.q
@@ -0,0 +1,43 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE  table_priv1(i int);
+
+-- all privileges should have been set for user
+
+-- grant insert privilege to another user
+GRANT INSERT ON table_priv1 TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv1;
+
+-- grant select privilege to another user with grant
+GRANT SELECT ON table_priv1 TO USER user2 with grant option;
+SHOW GRANT USER user2 ON TABLE table_priv1;
+
+set user.name=user2;
+-- change to other user - user2
+-- grant permissions to another user as user2
+GRANT SELECT ON table_priv1 TO USER user3 with grant option;
+SHOW GRANT USER user3 ON TABLE table_priv1;
+
+set user.name=user3;
+-- change to other user - user3
+-- grant permissions to another user as user3
+GRANT SELECT ON table_priv1 TO USER user4 with grant option;
+SHOW GRANT USER user4 ON TABLE table_priv1;
+
+set user.name=user1;
+-- switched back to table owner
+
+-- grant all with grant to user22
+GRANT ALL ON table_priv1 TO USER user22 with grant option;
+SHOW GRANT USER user22 ON TABLE table_priv1;
+
+set user.name=user22;
+
+-- grant all without grant to user33
+GRANT ALL ON table_priv1 TO USER user33 with grant option;
+SHOW GRANT USER user33 ON TABLE table_priv1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_index.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_index.q
new file mode 100644
index 0000000000000..1f177ffd1fadb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_index.q
@@ -0,0 +1,12 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.stats.dbclass=fs;
+set hive.security.authorization.enabled=true;
+create table t1 (a int);
+create index t1_index on table t1(a) as 'COMPACT' WITH DEFERRED REBUILD;
+desc formatted default__t1_t1_index__;
+alter index t1_index on t1 rebuild;
+
+drop table t1;
+
+set hive.security.authorization.enabled=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions.q
new file mode 100644
index 0000000000000..85d8b1114b010
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions.q
@@ -0,0 +1,16 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- actions that require user to be table owner
+create table t1(i int);
+
+ALTER TABLE t1 SET SERDEPROPERTIES ('field.delim' = ',');
+drop table t1;
+
+create table t1(i int);
+create view vt1 as select * from t1;
+
+drop view vt1;
+alter table t1 rename to tnew1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions_db.q
new file mode 100644
index 0000000000000..36ab2600dcd2d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions_db.q
@@ -0,0 +1,21 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_admin_user;
+
+set role admin;
+-- create role, db, make role the owner of db
+create role testrole;
+grant role testrole to user hrt_1;
+create database testdb;
+alter database testdb set owner role testrole;
+desc database testdb;
+
+-- actions that require user to be db owner 
+-- create table
+use testdb;
+create table foobar (foo string, bar string);
+
+-- drop db
+drop database testdb cascade;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_parts.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_parts.q
new file mode 100644
index 0000000000000..bee091b1d349c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_parts.q
@@ -0,0 +1,19 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/a_uri_add_part1;
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/a_uri_add_part2;
+
+
+
+
+-- check add partition without insert privilege
+create table tpart(i int, j int) partitioned by (k string);
+
+alter table tpart add partition (k = '1') location '${system:test.tmp.dir}/a_uri_add_part1/';
+alter table tpart add partition (k = '2') location '${system:test.tmp.dir}/a_uri_add_part2/';
+
+select count(*) from tpart;
+
+analyze table tpart partition (k) compute statistics;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_revoke_table_priv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_revoke_table_priv.q
new file mode 100644
index 0000000000000..ccda3b5157979
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_revoke_table_priv.q
@@ -0,0 +1,61 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE table_priv_rev(i int);
+
+-- grant insert privilege to user2
+GRANT INSERT ON table_priv_rev TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+SHOW GRANT USER user2 ON ALL;
+
+-- revoke insert privilege from user2
+REVOKE INSERT ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+-- grant all privileges one at a time --
+-- grant insert privilege to user2
+GRANT INSERT ON table_priv_rev TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+SHOW GRANT USER user2 ON ALL;
+
+-- grant select privilege to user2, with grant option
+GRANT SELECT ON table_priv_rev TO USER user2 WITH GRANT OPTION;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+-- grant update privilege to user2
+GRANT UPDATE ON table_priv_rev TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+-- grant delete privilege to user2
+GRANT DELETE ON table_priv_rev TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+
+-- start revoking --
+-- revoke update privilege from user2
+REVOKE UPDATE ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+SHOW GRANT USER user2 ON ALL;
+
+-- revoke DELETE privilege from user2
+REVOKE DELETE ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+-- revoke insert privilege from user2
+REVOKE INSERT ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+-- revoke select privilege from user2
+REVOKE SELECT ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+SHOW GRANT USER user2 ON ALL;
+
+-- grant all followed by revoke all
+GRANT ALL ON table_priv_rev TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+REVOKE ALL ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant1.q
new file mode 100644
index 0000000000000..f89d0dc985688
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant1.q
@@ -0,0 +1,38 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+
+-- enable sql standard authorization
+-- role granting without role keyword
+set role ADMIN;
+create role src_role2;
+grant  src_role2 to user user2 ;
+show role grant user user2;
+show roles;
+
+-- revoke role without role keyword
+revoke src_role2 from user user2;
+show role grant user user2;
+show roles;
+
+----------------------------------------
+-- role granting without role keyword, with admin option (syntax check)
+----------------------------------------
+
+create role src_role_wadmin;
+grant  src_role_wadmin to user user2 with admin option;
+show role grant user user2;
+
+-- revoke role without role keyword
+revoke src_role_wadmin from user user2;
+show role grant user user2;
+
+
+
+-- drop roles
+show roles;
+drop role src_role2;
+show roles;
+drop role src_role_wadmin;
+show roles;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant2.q
new file mode 100644
index 0000000000000..984d7ed1d091a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant2.q
@@ -0,0 +1,34 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set hive.cli.print.header=true;
+set user.name=hive_admin_user;
+set role ADMIN;
+
+----------------------------------------
+-- role granting with admin option
+----------------------------------------
+
+create role src_role_wadmin;
+grant  src_role_wadmin to user user2 with admin option;
+show role grant user user2;
+show principals src_role_wadmin;
+
+set user.name=user2;
+set role src_role_wadmin;
+grant  src_role_wadmin to user user3;
+show role grant user user3;
+
+set user.name=hive_admin_user;
+set role ADMIN;
+show principals src_role_wadmin;
+
+set user.name=user2;
+set role src_role_wadmin;
+revoke src_role_wadmin from user user3;
+show role grant user user3;
+
+set user.name=hive_admin_user;
+set role ADMIN;
+show principals src_role_wadmin;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_set_show_current_role.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_set_show_current_role.q
new file mode 100644
index 0000000000000..6b5af6e94e16a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_set_show_current_role.q
@@ -0,0 +1,21 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+show current roles;
+
+create role r1;
+grant role r1 to user hive_admin_user;
+set role r1;
+show current roles;
+
+set role PUBLIC;
+show current roles;
+
+set role ALL;
+show current roles;
+
+set role ADMIN;
+drop role r1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_view_sqlstd.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_view_sqlstd.q
new file mode 100644
index 0000000000000..bd7bbfedf83e8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_view_sqlstd.q
@@ -0,0 +1,66 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- Test view authorization , and 'show grant' variants
+
+create table t1(i int, j int, k int);
+show grant on table t1;
+
+-- protecting certain columns
+create view vt1 as select i,k from t1;
+
+-- protecting certain rows
+create view vt2 as select * from t1 where i > 1;
+
+show grant user user1 on all;
+
+--view grant to user
+-- try with and without table keyword
+
+grant select on vt1 to user user2;
+grant insert on table vt1 to user user3;
+
+show grant user user2 on table vt1;
+show grant user user3 on table vt1;
+
+
+set user.name=user2;
+select * from vt1;
+
+set user.name=user1;
+
+grant all on table vt2 to user user2;
+show grant user user2 on table vt2;
+show grant user user2 on all;
+
+revoke all on vt2 from user user2;
+show grant user user2 on table vt2;
+
+show grant on table vt2;
+
+
+revoke select on table vt1 from user user2;
+show grant user user2 on table vt1;
+
+show grant user user2 on all;
+
+-- grant privileges on roles for view, after next statement
+show grant user user3 on table vt1;
+
+set user.name=hive_admin_user;
+show current roles;
+set role ADMIN;
+create role role_v;
+grant  role_v to user user4 ;
+show role grant user user4;
+show roles;
+
+grant all on table vt2 to role role_v;
+show grant role role_v on table vt2;
+
+revoke delete on table vt2 from role role_v;
+show grant role role_v on table vt2;
+show grant on table vt2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join25.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join25.q
index eaf7489a17025..b8734abfd12ac 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join25.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join25.q
@@ -1,3 +1,5 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
+
 set hive.auto.convert.join = true;
 set hive.mapjoin.localtask.max.memory.usage = 0.0001;
 set hive.mapjoin.check.memory.rows = 2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join32.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join32.q
index 289bfbc6da917..e7846eeecf5c3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join32.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join32.q
@@ -32,10 +32,10 @@ from studenttab10k_smb s join votertab10k_smb v
 on (s.name = v.name)
 group by s.name;
 
-load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb;
-load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb;
-load data local inpath '../data/files/empty1.txt' into table votertab10k_smb;
-load data local inpath '../data/files/empty2.txt' into table votertab10k_smb;
+load data local inpath '../../data/files/empty1.txt' into table studenttab10k_smb;
+load data local inpath '../../data/files/empty2.txt' into table studenttab10k_smb;
+load data local inpath '../../data/files/empty1.txt' into table votertab10k_smb;
+load data local inpath '../../data/files/empty2.txt' into table votertab10k_smb;
 
 explain select s.name, count(distinct registration)
 from studenttab10k_smb s join votertab10k_smb v
@@ -51,10 +51,10 @@ group by s.name;
 create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets;
 create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets;
 
-load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition (p='foo');
-load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition (p='foo');
-load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo');
-load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo');
+load data local inpath '../../data/files/empty1.txt' into table studenttab10k_part partition (p='foo');
+load data local inpath '../../data/files/empty2.txt' into table studenttab10k_part partition (p='foo');
+load data local inpath '../../data/files/empty1.txt' into table votertab10k_part partition (p='foo');
+load data local inpath '../../data/files/empty2.txt' into table votertab10k_part partition (p='foo');
 
 explain select s.name, count(distinct registration)
 from studenttab10k_part s join votertab10k_part v
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_filters.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_filters.q
index 458504cdc3d04..eefd2111c97b4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_filters.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_filters.q
@@ -1,7 +1,7 @@
 set hive.auto.convert.join = true;
 
 CREATE TABLE myinput1(key int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in3.txt' INTO TABLE myinput1;
+LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1;
 
 SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
 SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
@@ -38,10 +38,10 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN my
 
 CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; 
 CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; 
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2;
 
 SET hive.optimize.bucketmapjoin = true;
 SET hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_nulls.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_nulls.q
index 766348d7c001e..d1b7bb40189a4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_nulls.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_nulls.q
@@ -1,7 +1,7 @@
 set hive.auto.convert.join = true;
 
 CREATE TABLE myinput1(key int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' INTO TABLE myinput1;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1;
 
 SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b;
 SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_reordering_values.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_reordering_values.q
index 46a4a0d5339a7..55bd975803d53 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_reordering_values.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_reordering_values.q
@@ -1,13 +1,13 @@
 -- HIVE-5056 RS has expression list for values, but it's ignored in MapJoinProcessor
 
 create table testsrc ( `key` int,`val` string);
-load data local inpath '../data/files/kv1.txt' overwrite into table testsrc;
+load data local inpath '../../data/files/kv1.txt' overwrite into table testsrc;
 drop table if exists orderpayment_small;
 create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int);
-insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc limit 1;
+insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc tablesample (1 rows);
 drop table if exists user_small;
 create table user_small( userid int);
-insert overwrite table user_small select key from testsrc limit 100;
+insert overwrite table user_small select key from testsrc tablesample (100 rows);
 
 set hive.auto.convert.join.noconditionaltask.size = 200;
 explain extended SELECT
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_without_localtask.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_without_localtask.q
new file mode 100644
index 0000000000000..f23e227f1ec90
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_without_localtask.q
@@ -0,0 +1,29 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.use.nonstaged=true;
+
+set hive.auto.convert.join.noconditionaltask.size=100;
+
+explain
+select a.* from src a join src b on a.key=b.key limit 40;
+
+select a.* from src a join src b on a.key=b.key limit 40;
+
+explain
+select a.* from src a join src b on a.key=b.key join src c on a.value=c.value limit 40;
+
+select a.* from src a join src b on a.key=b.key join src c on a.value=c.value limit 40;
+
+set hive.auto.convert.join.noconditionaltask.size=100;
+
+explain
+select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 limit 40;
+
+select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 limit 40;
+
+set hive.mapjoin.localtask.max.memory.usage = 0.0001;
+set hive.mapjoin.check.memory.rows = 2;
+
+-- fallback to common join
+select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 limit 40;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
index e76b560f41899..ddd2c1882ec4d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
@@ -2,19 +2,19 @@
 
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) 
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
index f9fa1e4d4e1db..da2e26fde7069 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
@@ -1,19 +1,19 @@
 -- small 1 part, 2 bucket & big 2 part, 4 bucket
 
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
index db53a656a850a..f434b33603603 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
@@ -2,19 +2,19 @@
 
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
@@ -23,9 +23,9 @@ set hive.optimize.bucketmapjoin.sortedmerge = true;
 
 CREATE TABLE bucket_medium (key string, value string) partitioned by (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
 
 explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key;
 select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q
new file mode 100644
index 0000000000000..7e9555d7917df
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q
@@ -0,0 +1,92 @@
+set hive.auto.convert.join=true;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+CREATE TABLE stage_bucket_big
+(
+key BIGINT,
+value STRING
+)
+PARTITIONED BY (file_tag STRING);
+
+CREATE TABLE bucket_big
+(
+key BIGINT,
+value STRING
+)
+PARTITIONED BY (day STRING, pri bigint)
+clustered by (key) sorted by (key) into 12 buckets
+stored as RCFile;
+
+CREATE TABLE stage_bucket_small
+(
+key BIGINT,
+value string
+)
+PARTITIONED BY (file_tag STRING);
+
+CREATE TABLE bucket_small
+(
+key BIGINT,
+value string
+)
+PARTITIONED BY (pri bigint)
+clustered by (key) sorted by (key) into 12 buckets
+stored as RCFile;
+
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' overwrite into table stage_bucket_small partition (file_tag='1'); 
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' overwrite into table stage_bucket_small partition (file_tag='2'); 
+
+insert overwrite table bucket_small partition(pri) 
+select 
+key, 
+value, 
+file_tag as pri 
+from 
+stage_bucket_small 
+where file_tag between 1 and 2;
+
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' overwrite into table stage_bucket_big partition (file_tag='1'); 
+
+insert overwrite table bucket_big partition(day,pri) 
+select 
+key, 
+value, 
+'day1' as day, 
+1 as pri 
+from 
+stage_bucket_big 
+where 
+file_tag='1'; 
+
+select 
+a.key , 
+a.value , 
+b.value , 
+'day1' as day, 
+1 as pri 
+from 
+( 
+select 
+key, 
+value 
+from bucket_big where day='day1'
+) a 
+left outer join 
+( 
+select 
+key, 
+value
+from bucket_small 
+where pri between 1 and 2
+) b 
+on 
+(a.key = b.key) 
+; 
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
index 75339778af802..eef5483b5347d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
@@ -1,16 +1,16 @@
 -- small 1 part, 4 bucket & big 2 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
index 39a695fdf039d..c094ecdb6be5e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
@@ -1,16 +1,16 @@
 -- small 2 part, 2 bucket & big 1 part, 4 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
index 6072272c478a3..18acfbfb76543 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
@@ -1,18 +1,18 @@
 -- small 2 part, 4 bucket & big 1 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q
index a28ce3de5fc63..98d6df9b19591 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q
@@ -1,13 +1,13 @@
 -- small no part, 4 bucket & big no part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small;
 
 CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big;
 
 set hive.auto.convert.sortmerge.join=true;
 set hive.optimize.bucketmapjoin = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
index d62f6377218c6..e19cc317f36e3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
@@ -1,21 +1,21 @@
 -- small 2 part, 4 bucket & big 2 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
index 6302a1be1477c..a66806f21a8a3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
@@ -1,21 +1,21 @@
 -- small 2 part, 2 bucket & big 2 part, 4 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_compression_enabled.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_compression_enabled.q
index 8367206231974..cb6f173ccfa76 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_compression_enabled.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_compression_enabled.q
@@ -35,7 +35,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
   ]
 }');
 
-LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors4;
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4;
 
 set hive.exec.compress.output=true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_evolved_schemas.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_evolved_schemas.q
index 3fe8ff12b4506..f723cbcc608fb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_evolved_schemas.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_evolved_schemas.q
@@ -36,7 +36,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
 
 DESCRIBE doctors_with_new_field;
 
-LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors_with_new_field;
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_with_new_field;
 
 SELECT * FROM doctors_with_new_field ORDER BY first_name;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_joins.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_joins.q
index 25b77c06270d0..4c33a834668b0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_joins.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_joins.q
@@ -37,7 +37,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
 
 DESCRIBE doctors4;
 
-LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors4;
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4;
 
 CREATE TABLE episodes
 ROW FORMAT
@@ -70,7 +70,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
 
 DESCRIBE episodes;
 
-LOAD DATA LOCAL INPATH '../data/files/episodes.avro' INTO TABLE episodes;
+LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes;
 
 SELECT e.title, e.air_date, d.first_name, d.last_name, d.extra_field, e.air_date
 FROM doctors4 d JOIN episodes e ON (d.number=e.doctor)
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_nullable_fields.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_nullable_fields.q
index 584c6f740ba91..f90ceb96f5db9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_nullable_fields.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_nullable_fields.q
@@ -17,7 +17,7 @@ CREATE TABLE test_serializer(string1 STRING,
  ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n'
  STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/csv.txt' INTO TABLE test_serializer;
+LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE test_serializer;
 
 CREATE TABLE as_avro
   ROW FORMAT
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_partitioned.q
index 8e4d40f2bd59b..6fe5117026ce8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_partitioned.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_partitioned.q
@@ -28,7 +28,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
   ]
 }');
 
-LOAD DATA LOCAL INPATH '../data/files/episodes.avro' INTO TABLE episodes;
+LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes;
 
 CREATE TABLE episodes_partitioned
 PARTITIONED BY (doctor_pt INT)
@@ -66,7 +66,7 @@ INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title,
 SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 ORDER BY air_date;
 
 -- Verify that Fetch works in addition to Map
-SELECT * FROM episodes_partitioned LIMIT 5;
+SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5;
 -- Fetch w/filter to specific partition
 SELECT * FROM episodes_partitioned WHERE doctor_pt = 6;
 -- Fetch w/non-existant partition
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_sanity_test.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_sanity_test.q
index e3f8b07b30c59..dbb999503b60f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_sanity_test.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_sanity_test.q
@@ -30,7 +30,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
 
 DESCRIBE doctors;
 
-LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors;
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors;
 
 SELECT * FROM doctors ORDER BY number;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_constant.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_constant.q
index e0a8b95401d0b..4f80dc33c9666 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_constant.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_constant.q
@@ -1 +1,3 @@
-select cast(cast('a' as binary) as string) from src limit 1;
+set hive.fetch.task.conversion=more;
+
+select cast(cast('a' as binary) as string) from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_table_colserde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_table_colserde.q
index eadf07d14d756..1f5c98a23918f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_table_colserde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_table_colserde.q
@@ -3,6 +3,7 @@ drop table ba_test;
 -- Everything in ba_table1.q + columnar serde in RCFILE.
 
 create table ba_test (ba_key binary, ba_val binary) stored as rcfile;
+alter table ba_test set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
 
 describe extended ba_test;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binarysortable_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binarysortable_1.q
index a98a2305cfcaf..39c1d25e73397 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binarysortable_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binarysortable_1.q
@@ -3,7 +3,7 @@ ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '9'
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/string.txt' INTO TABLE mytable;
+LOAD DATA LOCAL INPATH '../../data/files/string.txt' INTO TABLE mytable;
 
 EXPLAIN
 SELECT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(key, '\001', '^A'), '\0', '^@'), '\002', '^B'), value
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q
new file mode 100644
index 0000000000000..956a61f7bd3c1
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q
@@ -0,0 +1,15 @@
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/bmjpathfilter;
+
+create table t1 (dt string) location '${system:test.tmp.dir}/bmjpathfilter/t1';
+Create table t2 (dt string) stored as orc; 
+dfs -touchz ${system:test.tmp.dir}/bmjpathfilter/t1/_SUCCESS;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; 
+SET hive.optimize.bucketmapjoin=true; 
+
+SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt);
+ 
+SET hive.optimize.bucketmapjoin=false;
+set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+
+dfs -rmr ${system:test.tmp.dir}/bmjpathfilter;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_1.q
index 33dd5d5cd2e0a..6bdb09ed64b07 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_1.q
@@ -9,8 +9,8 @@ sorted by (key, value) into 1 BUCKETS stored as textfile;
 create table table2(key string, value string) clustered by (value, key)
 sorted by (value, key) into 1 BUCKETS stored as textfile;
 
-load data local inpath '../data/files/SortCol1Col2.txt' overwrite into table table1;
-load data local inpath '../data/files/SortCol2Col1.txt' overwrite into table table2;
+load data local inpath '../../data/files/SortCol1Col2.txt' overwrite into table table1;
+load data local inpath '../../data/files/SortCol2Col1.txt' overwrite into table table2;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_2.q
index d1097e70a9880..07f6d150ea053 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_2.q
@@ -9,8 +9,8 @@ sorted by (key desc, value desc) into 1 BUCKETS stored as textfile;
 create table table2(key string, value string) clustered by (value, key)
 sorted by (value desc, key desc) into 1 BUCKETS stored as textfile;
 
-load data local inpath '../data/files/SortCol1Col2.txt' overwrite into table table1;
-load data local inpath '../data/files/SortCol2Col1.txt' overwrite into table table2;
+load data local inpath '../../data/files/SortCol1Col2.txt' overwrite into table table1;
+load data local inpath '../../data/files/SortCol2Col1.txt' overwrite into table table2;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
new file mode 100644
index 0000000000000..c9266a59c342a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
@@ -0,0 +1,85 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting = true;
+set hive.optimize.bucketingsorting=false;
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part;
+
+CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+set hive.convert.join.bucket.mapjoin.tez = true;
+explain
+select a.key, a.value, b.value
+from tab a join tab_part b on a.key = b.key;
+
+-- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
+-- In this case the sub-query is chosen as the big table.
+explain
+select a.k1, a.v1, b.value
+from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a
+join tab b on a.k1 = b.key;
+
+explain
+select a.k1, a.v1, b.value
+from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a
+join tab b on a.k1 = b.key;
+
+explain
+select a.k1, a.v1, b.value
+from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a
+join tab_part b on a.k1 = b.key;
+
+-- multi-way join
+explain
+select a.key, a.value, b.value
+from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key;
+
+explain
+select a.key, a.value, c.value
+from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key;
+
+-- in this case sub-query is the small table
+explain
+select a.key, a.value, b.value
+from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a
+join tab_part b on a.key = b.key;
+
+set hive.map.aggr=false;
+explain
+select a.key, a.value, b.value
+from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a
+join tab_part b on a.key = b.key;
+
+-- join on non-bucketed column results in broadcast join.
+explain
+select a.key, a.value, b.value
+from tab a join tab_part b on a.value = b.value;
+
+CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab1
+select key,value from srcbucket_mapjoin;
+
+explain
+select a.key, a.value, b.value
+from tab1 a join tab_part b on a.key = b.key;
+
+explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q
new file mode 100644
index 0000000000000..a3588ec94ccee
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q
@@ -0,0 +1,50 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting = true;
+set hive.optimize.bucketingsorting=false;
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part;
+
+CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+set hive.convert.join.bucket.mapjoin.tez = true;
+
+explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value;
+
+CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab1
+select key,value from srcbucket_mapjoin;
+
+explain
+select a.key, a.value, b.value
+from tab1 a join src b on a.key = b.key;
+
+explain
+select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key;
+
+explain
+select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key;
+
+explain
+select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key;
+
+explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key;
+
+explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_1.q
index 5b013995d5ba4..047a2a5230ae0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_1.q
@@ -1,18 +1,18 @@
 -- small 1 part, 2 bucket & big 2 part, 4 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_2.q
index f952f2ee6d445..d58e8448a09a8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_2.q
@@ -1,16 +1,16 @@
 -- small 1 part, 4 bucket & big 2 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_3.q
index 461fbb196e758..fd80174f23119 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_3.q
@@ -1,16 +1,16 @@
 -- small 2 part, 2 bucket & big 1 part, 4 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_4.q
index 366da4473b36d..5d21ea5d63218 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_4.q
@@ -1,18 +1,18 @@
 -- small 2 part, 4 bucket & big 1 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_5.q
index 411fed392a45e..5078072677be6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_5.q
@@ -1,13 +1,13 @@
 -- small no part, 4 bucket & big no part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small;
 
 CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big;
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_6.q
index 204d1e9010d5d..0f7c72f4eaf43 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_6.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_6.q
@@ -1,16 +1,16 @@
 -- small no part, 4 bucket & big 2 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small;
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_7.q
index b0bca460f303f..c528f61302743 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_7.q
@@ -1,21 +1,21 @@
 -- small 2 part, 4 bucket & big 2 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_8.q
index 9533c5512f3dd..27c55a36f445b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_8.q
@@ -1,21 +1,21 @@
 -- small 2 part, 2 bucket & big 2 part, 4 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q
index 4c7f0c9595488..d2e12e82d4a26 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q
@@ -3,7 +3,7 @@ set mapred.min.split.size = 64;
 
 CREATE TABLE T1(name STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE;
 
@@ -26,8 +26,8 @@ EXPLAIN SELECT COUNT(1) FROM T2;
 SELECT COUNT(1) FROM T2;
 
 CREATE TABLE T3(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T3;
-LOAD DATA LOCAL INPATH '../data/files/kv2.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T3;
 
 EXPLAIN SELECT COUNT(1) FROM T3;
 SELECT COUNT(1) FROM T3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q
index 9cdfe8e6e9f6e..8b7535dce1ee2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q
@@ -1,17 +1,17 @@
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin1.q
index 2bd8e1e2e5d98..204e75988fb54 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin1.q
@@ -25,16 +25,16 @@ select /*+mapjoin(a)*/ a.key, a.value, b.value
 from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
 on a.key=b.key where b.ds="2008-04-08";
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin10.q
index cf4222bdd781e..09c0ae2bfcbc1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin10.q
@@ -2,23 +2,23 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin11.q
index e10ab522a296d..d330b7718c5f2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin11.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin11.q
@@ -2,25 +2,25 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 4 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
 
 
 set hive.optimize.bucketmapjoin=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin12.q
index 8139000f4a733..43a9de4e8edef 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin12.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin12.q
@@ -2,20 +2,20 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 NOT CLUSTERED;
 
 CREATE TABLE srcbucket_mapjoin_part_3 (key INT, value STRING) PARTITIONED BY (part STRING)
 STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_3 CLUSTERED BY (key) INTO 2 BUCKETS;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin2.q
index fdbadfcb04c9c..108b67a9f7644 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin2.q
@@ -1,12 +1,12 @@
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
@@ -76,8 +76,8 @@ from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b
 on a.key = b.key;
 
 -- HIVE-3210
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin3.q
index 8fda80231522e..78c23d5132503 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin3.q
@@ -1,16 +1,16 @@
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin4.q
index c1a8f2ab7bfa0..54626e7c48ded 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin4.q
@@ -1,17 +1,17 @@
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin5.q
index 2df49b64f3227..72cffc2da2874 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin5.q
@@ -1,22 +1,22 @@
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin7.q
index 3a96c642b3e4c..a15570b77687f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin7.q
@@ -2,13 +2,13 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0');
 
 set hive.optimize.bucketmapjoin=true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin8.q
index 5e8daa5f8f125..f467ea6cb3ea1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin8.q
@@ -2,13 +2,13 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin9.q
index 86344a53f5a95..f1d5f581848d0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin9.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin9.q
@@ -2,14 +2,14 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
 
@@ -28,8 +28,8 @@ ON a.key = b.key AND a.part = '1' and b.part = '1';
 
 ALTER TABLE srcbucket_mapjoin_part_2 DROP PARTITION (part='1');
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q
index d7634333e2fee..ea140ddda91fe 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q
@@ -3,13 +3,13 @@
 
 
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q
index 901f0567590ec..e2c0d8c591719 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q
@@ -1,12 +1,12 @@
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 create table bucketmapjoin_tmp_result (key string , value1 string, value2 string);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q
index d66e1238bb79b..6398fff909eab 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q
@@ -8,21 +8,21 @@ create table test2 (key string, value string) clustered by (value) sorted by (va
 create table test3 (key string, value string) clustered by (key, value) sorted by (key, value) into 3 buckets;
 create table test4 (key string, value string) clustered by (value, key) sorted by (value, key) into 3 buckets;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE test1;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE test1;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE test1;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test1;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test1;
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test1;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE test2;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE test2;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE test2;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test2;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test2;
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test2;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE test3;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE test3;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE test3;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test3;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test3;
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test3;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE test4;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE test4;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE test4;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test4;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test4;
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test4;
 
 set hive.optimize.bucketmapjoin = true;
 -- should be allowed
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cast_to_int.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cast_to_int.q
index 729ffdc8687b5..b1551f2feb900 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cast_to_int.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cast_to_int.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 -- cast string floats to integer types
 select
   cast('1' as float),
@@ -27,4 +29,4 @@ select
   cast('127' as tinyint),
   cast('1.0a' as int),
   cast('-1.-1' as int)
-from src limit 1;
+from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_1.q
new file mode 100644
index 0000000000000..840a84a7b3e30
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_1.q
@@ -0,0 +1,32 @@
+drop table char1;
+drop table char1_1;
+
+create table char1 (key char(10), value char(20));
+create table char1_1 (key string, value string);
+
+-- load from file
+load data local inpath '../../data/files/srcbucket0.txt' overwrite into table char1;
+select * from char1 order by key, value limit 2;
+
+-- insert overwrite, from same/different length char
+insert overwrite table char1
+  select cast(key as char(10)), cast(value as char(15)) from src order by key, value limit 2;
+select key, value from char1 order by key, value;
+
+-- insert overwrite, from string
+insert overwrite table char1
+  select key, value from src order by key, value limit 2;
+select key, value from char1 order by key, value;
+
+-- insert string from char
+insert overwrite table char1_1
+  select key, value from char1 order by key, value limit 2;
+select key, value from char1_1 order by key, value;
+
+-- respect string length
+insert overwrite table char1 
+  select key, cast(value as char(3)) from src order by key, value limit 2;
+select key, value from char1 order by key, value;
+
+drop table char1;
+drop table char1_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_2.q
new file mode 100644
index 0000000000000..3e4900cb7c949
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_2.q
@@ -0,0 +1,36 @@
+drop table char_2;
+
+create table char_2 (
+  key char(10),
+  value char(20)
+);
+
+insert overwrite table char_2 select * from src;
+
+select value, sum(cast(key as int)), count(*) numrows
+from src
+group by value
+order by value asc
+limit 5;
+
+-- should match the query from src
+select value, sum(cast(key as int)), count(*) numrows
+from char_2
+group by value
+order by value asc
+limit 5;
+
+select value, sum(cast(key as int)), count(*) numrows
+from src
+group by value
+order by value desc
+limit 5;
+
+-- should match the query from src
+select value, sum(cast(key as int)), count(*) numrows
+from char_2
+group by value
+order by value desc
+limit 5;
+
+drop table char_2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_cast.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_cast.q
new file mode 100644
index 0000000000000..7f44d4d508537
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_cast.q
@@ -0,0 +1,92 @@
+
+-- Cast from char to other data types
+select
+  cast(cast('11' as string) as tinyint),
+  cast(cast('11' as string) as smallint),
+  cast(cast('11' as string) as int),
+  cast(cast('11' as string) as bigint),
+  cast(cast('11.00' as string) as float),
+  cast(cast('11.00' as string) as double),
+  cast(cast('11.00' as string) as decimal)
+from src limit 1;
+
+select
+  cast(cast('11' as char(10)) as tinyint),
+  cast(cast('11' as char(10)) as smallint),
+  cast(cast('11' as char(10)) as int),
+  cast(cast('11' as char(10)) as bigint),
+  cast(cast('11.00' as char(10)) as float),
+  cast(cast('11.00' as char(10)) as double),
+  cast(cast('11.00' as char(10)) as decimal)
+from src limit 1;
+
+select
+  cast(cast('2011-01-01' as string) as date),
+  cast(cast('2011-01-01 01:02:03' as string) as timestamp)
+from src limit 1;
+
+select
+  cast(cast('2011-01-01' as char(10)) as date),
+  cast(cast('2011-01-01 01:02:03' as char(30)) as timestamp)
+from src limit 1;
+
+-- no tests from string/char to boolean, that conversion doesn't look useful
+select
+  cast(cast('abc123' as string) as string),
+  cast(cast('abc123' as string) as varchar(10)),
+  cast(cast('abc123' as string) as char(10))
+from src limit 1;
+
+select
+  cast(cast('abc123' as char(10)) as string),
+  cast(cast('abc123' as char(10)) as varchar(10)),
+  cast(cast('abc123' as char(10)) as char(10))
+from src limit 1;
+
+select
+  cast(cast('abc123' as varchar(10)) as string),
+  cast(cast('abc123' as varchar(10)) as varchar(10)),
+  cast(cast('abc123' as varchar(10)) as char(10))
+from src limit 1;
+
+-- cast from other types to char
+select
+  cast(cast(11 as tinyint) as string),
+  cast(cast(11 as smallint) as string),
+  cast(cast(11 as int) as string),
+  cast(cast(11 as bigint) as string),
+  cast(cast(11.00 as float) as string),
+  cast(cast(11.00 as double) as string),
+  cast(cast(11.00 as decimal) as string)
+from src limit 1;
+
+select
+  cast(cast(11 as tinyint) as char(10)),
+  cast(cast(11 as smallint) as char(10)),
+  cast(cast(11 as int) as char(10)),
+  cast(cast(11 as bigint) as char(10)),
+  cast(cast(11.00 as float) as char(10)),
+  cast(cast(11.00 as double) as char(10)),
+  cast(cast(11.00 as decimal) as char(10))
+from src limit 1;
+
+select
+  cast(date '2011-01-01' as string),
+  cast(timestamp('2011-01-01 01:02:03') as string)
+from src limit 1;
+
+select
+  cast(date '2011-01-01' as char(10)),
+  cast(timestamp('2011-01-01 01:02:03') as char(30))
+from src limit 1;
+
+select
+  cast(true as string),
+  cast(false as string)
+from src limit 1;
+
+select
+  cast(true as char(10)),
+  cast(false as char(10))
+from src limit 1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_comparison.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_comparison.q
new file mode 100644
index 0000000000000..e1cfdb2bdb07a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_comparison.q
@@ -0,0 +1,40 @@
+
+-- Should all be true
+select
+  cast('abc' as char(10)) =  cast('abc' as char(10)),
+  cast('abc' as char(10)) <= cast('abc' as char(10)),
+  cast('abc' as char(10)) >= cast('abc' as char(10)),
+  cast('abc' as char(10)) <  cast('abd' as char(10)),
+  cast('abc' as char(10)) >  cast('abb' as char(10)),
+  cast('abc' as char(10)) <> cast('abb' as char(10))
+from src limit 1;
+
+-- Different char lengths should still compare the same
+select
+  cast('abc' as char(10)) =  cast('abc' as char(3)),
+  cast('abc' as char(10)) <= cast('abc' as char(3)),
+  cast('abc' as char(10)) >= cast('abc' as char(3)),
+  cast('abc' as char(10)) <  cast('abd' as char(3)),
+  cast('abc' as char(10)) >  cast('abb' as char(3)),
+  cast('abc' as char(10)) <> cast('abb' as char(3))
+from src limit 1;
+
+-- Should work with string types as well
+select
+  cast('abc' as char(10)) =  'abc',
+  cast('abc' as char(10)) <= 'abc',
+  cast('abc' as char(10)) >= 'abc',
+  cast('abc' as char(10)) <  'abd',
+  cast('abc' as char(10)) >  'abb',
+  cast('abc' as char(10)) <> 'abb'
+from src limit 1;
+
+-- leading space is significant for char
+select
+  cast(' abc' as char(10)) <> cast('abc' as char(10))
+from src limit 1;
+
+-- trailing space is not significant for char
+select
+  cast('abc ' as char(10)) = cast('abc' as char(10))
+from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_join1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_join1.q
new file mode 100644
index 0000000000000..373352dee0078
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_join1.q
@@ -0,0 +1,35 @@
+drop table char_join1_ch1;
+drop table char_join1_ch2;
+drop table char_join1_str;
+
+create table  char_join1_ch1 (
+  c1 int,
+  c2 char(10)
+);
+
+create table  char_join1_ch2 (
+  c1 int,
+  c2 char(20)
+);
+
+create table  char_join1_str (
+  c1 int,
+  c2 string
+);
+
+load data local inpath '../../data/files/vc1.txt' into table char_join1_ch1;
+load data local inpath '../../data/files/vc1.txt' into table char_join1_ch2;
+load data local inpath '../../data/files/vc1.txt' into table char_join1_str;
+
+-- Join char with same length char
+select * from char_join1_ch1 a join char_join1_ch1 b on (a.c2 = b.c2) order by a.c1;
+
+-- Join char with different length char
+select * from char_join1_ch1 a join char_join1_ch2 b on (a.c2 = b.c2) order by a.c1;
+
+-- Join char with string
+select * from char_join1_ch1 a join char_join1_str b on (a.c2 = b.c2) order by a.c1;
+
+drop table char_join1_ch1;
+drop table char_join1_ch2;
+drop table char_join1_str;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_nested_types.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_nested_types.q
new file mode 100644
index 0000000000000..c710b6c73174b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_nested_types.q
@@ -0,0 +1,53 @@
+drop table char_nested_1;
+drop table char_nested_array;
+drop table char_nested_map;
+drop table char_nested_struct;
+drop table char_nested_cta;
+drop table char_nested_view;
+
+create table char_nested_1 (key int, value char(20));
+insert overwrite table char_nested_1
+  select key, value from src order by key limit 1;
+
+-- arrays
+create table char_nested_array (c1 array<char(20)>);
+insert overwrite table char_nested_array
+  select array(value, value) from char_nested_1;
+describe char_nested_array;
+select * from char_nested_array;
+
+-- maps
+create table char_nested_map (c1 map<int, char(20)>);
+insert overwrite table char_nested_map
+  select map(key, value) from char_nested_1;
+describe char_nested_map;
+select * from char_nested_map;
+
+-- structs
+create table char_nested_struct (c1 struct<a:int, b:char(20), c:string>);
+insert overwrite table char_nested_struct
+  select named_struct('a', key,
+                      'b', value,
+                      'c', cast(value as string))
+  from char_nested_1;
+describe char_nested_struct;
+select * from char_nested_struct;
+
+-- nested type with create table as
+create table char_nested_cta as 
+  select * from char_nested_struct;
+describe char_nested_cta;
+select * from char_nested_cta;
+
+-- nested type with view
+create table char_nested_view as 
+  select * from char_nested_struct;
+describe char_nested_view;
+select * from char_nested_view;
+
+drop table char_nested_1;
+drop table char_nested_array;
+drop table char_nested_map;
+drop table char_nested_struct;
+drop table char_nested_cta;
+drop table char_nested_view;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_serde.q
new file mode 100644
index 0000000000000..4340b4de6d597
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_serde.q
@@ -0,0 +1,102 @@
+drop table if exists char_serde_regex;
+drop table if exists char_serde_lb;
+drop table if exists char_serde_ls;
+drop table if exists char_serde_c;
+drop table if exists char_serde_lbc;
+drop table if exists char_serde_orc;
+
+--
+-- RegexSerDe
+--
+create table  char_serde_regex (
+  key char(15),
+  value char(20)
+)
+row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
+with serdeproperties (
+  "input.regex" = "([^]*)([^]*)"
+)
+stored as textfile;
+
+load data local inpath '../../data/files/srcbucket0.txt' overwrite into table char_serde_regex;
+
+select * from char_serde_regex limit 5;
+select value, count(*) from char_serde_regex group by value limit 5;
+
+--
+-- LazyBinary
+--
+create table  char_serde_lb (
+  key char(15),
+  value char(20)
+);
+alter table char_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe';
+
+insert overwrite table char_serde_lb
+  select key, value from char_serde_regex;
+select * from char_serde_lb limit 5;
+select value, count(*) from char_serde_lb group by value limit 5;
+
+--
+-- LazySimple
+--
+create table  char_serde_ls (
+  key char(15),
+  value char(20)
+);
+alter table char_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
+
+insert overwrite table char_serde_ls
+  select key, value from char_serde_lb;
+select * from char_serde_ls limit 5;
+select value, count(*) from char_serde_ls group by value limit 5;
+
+--
+-- Columnar
+--
+create table  char_serde_c (
+  key char(15),
+  value char(20)
+) stored as rcfile;
+alter table char_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
+
+insert overwrite table char_serde_c
+  select key, value from char_serde_ls;
+select * from char_serde_c limit 5;
+select value, count(*) from char_serde_c group by value limit 5;
+
+--
+-- LazyBinaryColumnar
+--
+create table char_serde_lbc (
+  key char(15),
+  value char(20)
+) stored as rcfile;
+alter table char_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
+
+insert overwrite table char_serde_lbc
+  select key, value from char_serde_c;
+select * from char_serde_lbc limit 5;
+select value, count(*) from char_serde_lbc group by value limit 5;
+
+--
+-- ORC
+--
+create table char_serde_orc (
+  key char(15),
+  value char(20)
+) stored as orc;
+alter table char_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde';
+
+
+insert overwrite table char_serde_orc
+  select key, value from char_serde_lbc;
+select * from char_serde_orc limit 5;
+select value, count(*) from char_serde_orc group by value limit 5;
+
+drop table if exists char_serde_regex;
+drop table if exists char_serde_lb;
+drop table if exists char_serde_ls;
+drop table if exists char_serde_c;
+drop table if exists char_serde_lbc;
+drop table if exists char_serde_orc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_udf1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_udf1.q
new file mode 100644
index 0000000000000..629d41dca9870
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_udf1.q
@@ -0,0 +1,156 @@
+drop table char_udf_1;
+
+create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20));
+insert overwrite table char_udf_1
+  select key, value, key, value from src where key = '238' limit 1;
+
+-- UDFs with char support
+select 
+  concat(c1, c2),
+  concat(c3, c4),
+  concat(c1, c2) = concat(c3, c4)
+from char_udf_1 limit 1;
+
+select
+  upper(c2),
+  upper(c4),
+  upper(c2) = upper(c4)
+from char_udf_1 limit 1;
+
+select
+  lower(c2),
+  lower(c4),
+  lower(c2) = lower(c4)
+from char_udf_1 limit 1;
+
+-- Scalar UDFs
+select
+  ascii(c2),
+  ascii(c4),
+  ascii(c2) = ascii(c4)
+from char_udf_1 limit 1;
+
+select 
+  concat_ws('|', c1, c2),
+  concat_ws('|', c3, c4),
+  concat_ws('|', c1, c2) = concat_ws('|', c3, c4)
+from char_udf_1 limit 1;
+
+select
+  decode(encode(c2, 'US-ASCII'), 'US-ASCII'),
+  decode(encode(c4, 'US-ASCII'), 'US-ASCII'),
+  decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII')
+from char_udf_1 limit 1;
+
+select
+  instr(c2, '_'),
+  instr(c4, '_'),
+  instr(c2, '_') = instr(c4, '_')
+from char_udf_1 limit 1;
+
+select
+  length(c2),
+  length(c4),
+  length(c2) = length(c4)
+from char_udf_1 limit 1;
+
+select
+  locate('a', 'abcdabcd', 3),
+  locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3),
+  locate('a', 'abcdabcd', 3) = locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3)
+from char_udf_1 limit 1;
+
+select
+  lpad(c2, 15, ' '),
+  lpad(c4, 15, ' '),
+  lpad(c2, 15, ' ') = lpad(c4, 15, ' ')
+from char_udf_1 limit 1;
+
+select
+  ltrim(c2),
+  ltrim(c4),
+  ltrim(c2) = ltrim(c4)
+from char_udf_1 limit 1;
+
+select
+  regexp(c2, 'val'),
+  regexp(c4, 'val'),
+  regexp(c2, 'val') = regexp(c4, 'val')
+from char_udf_1 limit 1;
+
+select
+  regexp_extract(c2, 'val_([0-9]+)', 1),
+  regexp_extract(c4, 'val_([0-9]+)', 1),
+  regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from char_udf_1 limit 1;
+
+select
+  regexp_replace(c2, 'val', 'replaced'),
+  regexp_replace(c4, 'val', 'replaced'),
+  regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from char_udf_1 limit 1;
+
+select
+  reverse(c2),
+  reverse(c4),
+  reverse(c2) = reverse(c4)
+from char_udf_1 limit 1;
+
+select
+  rpad(c2, 15, ' '),
+  rpad(c4, 15, ' '),
+  rpad(c2, 15, ' ') = rpad(c4, 15, ' ')
+from char_udf_1 limit 1;
+
+select
+  rtrim(c2),
+  rtrim(c4),
+  rtrim(c2) = rtrim(c4)
+from char_udf_1 limit 1;
+
+select
+  sentences('See spot run.  See jane run.'),
+  sentences(cast('See spot run.  See jane run.' as char(50)))
+from char_udf_1 limit 1;
+
+select
+  split(c2, '_'),
+  split(c4, '_')
+from char_udf_1 limit 1;
+
+select 
+  str_to_map('a:1,b:2,c:3',',',':'),
+  str_to_map(cast('a:1,b:2,c:3' as char(20)),',',':')
+from char_udf_1 limit 1;
+
+select
+  substr(c2, 1, 3),
+  substr(c4, 1, 3),
+  substr(c2, 1, 3) = substr(c4, 1, 3)
+from char_udf_1 limit 1;
+
+select
+  trim(c2),
+  trim(c4),
+  trim(c2) = trim(c4)
+from char_udf_1 limit 1;
+
+
+-- Aggregate Functions
+select
+  compute_stats(c2, 16),
+  compute_stats(c4, 16)
+from char_udf_1;
+
+select
+  min(c2),
+  min(c4)
+from char_udf_1;
+
+select
+  max(c2),
+  max(c4)
+from char_udf_1;
+
+
+drop table char_udf_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_union1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_union1.q
new file mode 100644
index 0000000000000..2ce5e89c2aa9e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_union1.q
@@ -0,0 +1,47 @@
+drop table char_union1_ch1;
+drop table char_union1_ch2;
+drop table char_union1_str;
+
+create table  char_union1_ch1 (
+  c1 int,
+  c2 char(10)
+);
+
+create table  char_union1_ch2 (
+  c1 int,
+  c2 char(20)
+);
+
+create table  char_union1_str (
+  c1 int,
+  c2 string
+);
+
+load data local inpath '../../data/files/vc1.txt' into table char_union1_ch1;
+load data local inpath '../../data/files/vc1.txt' into table char_union1_ch2;
+load data local inpath '../../data/files/vc1.txt' into table char_union1_str;
+
+-- union char with same length char
+select * from (
+  select * from char_union1_ch1
+  union all
+  select * from char_union1_ch1 limit 1
+) q1 sort by c1;
+
+-- union char with different length char
+select * from (
+  select * from char_union1_ch1
+  union all
+  select * from char_union1_ch2 limit 1
+) q1 sort by c1;
+
+-- union char with string
+select * from (
+  select * from char_union1_ch1
+  union all
+  select * from char_union1_str limit 1
+) q1 sort by c1;
+
+drop table char_union1_ch1;
+drop table char_union1_ch2;
+drop table char_union1_str;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_varchar_udf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_varchar_udf.q
new file mode 100644
index 0000000000000..332b84087e8ef
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_varchar_udf.q
@@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS  char_varchar_udf;
+
+CREATE TABLE char_varchar_udf (c char(8), vc varchar(10)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
+LOAD DATA LOCAL INPATH '../../data/files/char_varchar_udf.txt' INTO TABLE char_varchar_udf;
+
+SELECT ROUND(c, 2), ROUND(vc, 3) FROM char_varchar_udf;
+SELECT AVG(c), AVG(vc), SUM(c), SUM(vc) FROM char_varchar_udf;
+
+DROP TABLE char_varchar_udf;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/column_access_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/column_access_stats.q
index 3c8a309991d75..fbf8bba56e52c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/column_access_stats.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/column_access_stats.q
@@ -4,7 +4,7 @@ SET hive.stats.collect.scancols=true;
 -- This test is used for testing the ColumnAccessAnalyzer
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_partlvl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_partlvl.q
index 98627776d3163..9dfe8ffbc3058 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_partlvl.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_partlvl.q
@@ -4,8 +4,8 @@ DROP TABLE Employee_Part;
 CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=2000.0);
-LOAD DATA LOCAL INPATH "../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=4000.0);
+LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=2000.0);
+LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=4000.0);
 
 explain 
 analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_tbllvl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
index 72d88a67b5212..170fbc51916a0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
@@ -13,7 +13,7 @@ CREATE TABLE UserVisits_web_text_none (
   avgTimeOnSite int)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
+LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
 
 explain 
 analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compile_processor.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compile_processor.q
new file mode 100644
index 0000000000000..6be02ec8a7bbb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compile_processor.q
@@ -0,0 +1,12 @@
+
+compile `import org.apache.hadoop.hive.ql.exec.UDF \;
+public class Pyth extends UDF {
+  public double evaluate(double a, double b){
+    return Math.sqrt((a*a) + (b*b)) \;
+  }
+} ` AS GROOVY NAMED Pyth.groovy;
+CREATE TEMPORARY FUNCTION Pyth as 'Pyth';
+
+SELECT Pyth(3,4) FROM src tablesample (1 rows);
+
+DROP TEMPORARY FUNCTION Pyth;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_binary.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_binary.q
index c19813602782f..fd15634f202f7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_binary.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_binary.q
@@ -1,7 +1,7 @@
 create table tab_binary(a binary);
 
 -- insert some data
-LOAD DATA LOCAL INPATH "../data/files/binary.txt" INTO TABLE tab_binary;
+LOAD DATA LOCAL INPATH "../../data/files/binary.txt" INTO TABLE tab_binary;
 
 select count(*) from tab_binary;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_boolean.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_boolean.q
index dc76f7c7d57e1..cddb53f8f64f5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_boolean.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_boolean.q
@@ -1,7 +1,7 @@
 create table tab_bool(a boolean);
 
 -- insert some data
-LOAD DATA LOCAL INPATH "../data/files/bool.txt" INTO TABLE tab_bool;
+LOAD DATA LOCAL INPATH "../../data/files/bool.txt" INTO TABLE tab_bool;
 
 select count(*) from tab_bool;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_decimal.q
new file mode 100644
index 0000000000000..77ec066dad985
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_decimal.q
@@ -0,0 +1,11 @@
+set hive.stats.autogather=true;
+
+create table tab_decimal(a decimal(10,3));
+
+-- insert some data
+LOAD DATA LOCAL INPATH "../../data/files/decimal.txt" INTO TABLE tab_decimal;
+
+select count(*) from tab_decimal;
+
+-- compute statistical summary of data
+select compute_stats(a, 18) from tab_decimal;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_double.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_double.q
index 6c6dc47e67701..7a1e0f6295dc4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_double.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_double.q
@@ -1,7 +1,7 @@
 create table tab_double(a double);
 
 -- insert some data
-LOAD DATA LOCAL INPATH "../data/files/double.txt" INTO TABLE tab_double;
+LOAD DATA LOCAL INPATH "../../data/files/double.txt" INTO TABLE tab_double;
 
 select count(*) from tab_double;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_long.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_long.q
index 7d0a1584a28cc..6a2070f7808f5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_long.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_long.q
@@ -1,7 +1,7 @@
 create table tab_int(a int);
 
 -- insert some data
-LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int;
+LOAD DATA LOCAL INPATH "../../data/files/int.txt" INTO TABLE tab_int;
 
 select count(*) from tab_int;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_string.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_string.q
index f146f6b95b544..0023e7f6bd818 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_string.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_string.q
@@ -1,7 +1,7 @@
 create table tab_string(a string);
 
 -- insert some data
-LOAD DATA LOCAL INPATH "../data/files/string.txt" INTO TABLE tab_string;
+LOAD DATA LOCAL INPATH "../../data/files/string.txt" INTO TABLE tab_string;
 
 select count(*) from tab_string;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/constant_prop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/constant_prop.q
index ced72d60445c2..d51b80194e90a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/constant_prop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/constant_prop.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 SELECT NAMED_STRUCT(
          IF(ARRAY_CONTAINS(ARRAY(1, 2), 3), "F1", "B1"), 1,
@@ -7,7 +9,7 @@ SELECT NAMED_STRUCT(
          IF(ARRAY_CONTAINS(ARRAY(1, 2), 3), "F1", "B1"), 1,
          IF(ARRAY_CONTAINS(MAP_KEYS(MAP("b", "x")), "b"), "F2", "B2"), 2   
        ).F2
-       FROM src LIMIT 1;
+       FROM src tablesample (1 rows);
 
 SELECT NAMED_STRUCT(
          IF(ARRAY_CONTAINS(ARRAY(1, 2), 3), "F1", "B1"), 1,
@@ -17,4 +19,4 @@ SELECT NAMED_STRUCT(
          IF(ARRAY_CONTAINS(ARRAY(1, 2), 3), "F1", "B1"), 1,
          IF(ARRAY_CONTAINS(MAP_KEYS(MAP("b", "x")), "b"), "F2", "B2"), 2   
        ).F2
-       FROM src LIMIT 1;
+       FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer1.q
index b3fd3f760eaf0..0596f965ed89c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer1.q
@@ -104,7 +104,7 @@ FROM (SELECT x.key AS key, count(1) AS cnt
       
 set hive.optimize.correlation=false;
 -- If the key of a GroupByOperator is the right table's key in
--- a Left Outer Join, we cannot use a single MR to execute these two 
+-- a Left Outer Join, we cannot use a single MR to execute these two
 -- operators because those keys with a null value are not grouped.
 EXPLAIN
 SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
@@ -129,6 +129,29 @@ FROM (SELECT y.key AS key, count(1) AS cnt
       FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key)
       GROUP BY y.key) tmp;
 
+set hive.optimize.correlation=false;
+-- If a column of the key of a GroupByOperator is the right table's key in
+-- a Left Outer Join, we cannot use a single MR to execute these two
+-- operators because those keys with a null value are not grouped.
+EXPLAIN
+SELECT x.key, y.value, count(1) AS cnt
+FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key AND x.value = y.value)
+GROUP BY x.key, y.value;
+
+SELECT x.key, y.value, count(1) AS cnt
+FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key AND x.value = y.value)
+GROUP BY x.key, y.value;
+
+set hive.optimize.correlation=true;
+EXPLAIN
+SELECT x.key, y.value, count(1) AS cnt
+FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key AND x.value = y.value)
+GROUP BY x.key, y.value;
+
+SELECT x.key, y.value, count(1) AS cnt
+FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key AND x.value = y.value)
+GROUP BY x.key, y.value;
+
 set hive.optimize.correlation=false;
 -- If the key of a GroupByOperator is the right table's key in
 -- a Right Outer Join, these two operators will be executed in
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer4.q
index 70fcdfc0b4288..953d191fc6087 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer4.q
@@ -1,9 +1,9 @@
 CREATE TABLE T1(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 CREATE TABLE T2(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 CREATE TABLE T3(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T3;
 
 set hive.auto.convert.join=false;
 set hive.optimize.correlation=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer5.q
index ac836c0bfdc91..287c7a389d1a8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer5.q
@@ -1,11 +1,11 @@
 CREATE TABLE T1(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1;
 CREATE TABLE T2(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/kv2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T2;
 CREATE TABLE T3(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/kv3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE T3;
 CREATE TABLE T4(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/kv5.txt' INTO TABLE T4;
+LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE T4;
 
 CREATE TABLE dest_co1(key INT, val STRING);
 CREATE TABLE dest_co2(key INT, val STRING);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/count.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/count.q
index 0d66a5ec345e6..74ae9e428befe 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/count.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/count.q
@@ -1,5 +1,5 @@
 create table abcd (a int, b int, c int, d int);
-LOAD DATA LOCAL INPATH '../data/files/in4.txt' INTO TABLE abcd;
+LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd;
 
 select * from abcd;
 set hive.map.aggr=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_func1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_func1.q
new file mode 100644
index 0000000000000..ad924d345304c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_func1.q
@@ -0,0 +1,30 @@
+
+-- qtest_get_java_boolean should already be created during test initialization
+select qtest_get_java_boolean('true'), qtest_get_java_boolean('false') from src limit 1;
+
+create database mydb;
+create function mydb.func1 as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper';
+
+show functions mydb.func1;
+
+select mydb.func1('abc') from src limit 1;
+
+drop function mydb.func1;
+
+-- function should now be gone
+show functions mydb.func1;
+
+-- To test function name resolution
+create function mydb.qtest_get_java_boolean as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper';
+
+use default;
+-- unqualified function should resolve to one in default db
+select qtest_get_java_boolean('abc'), default.qtest_get_java_boolean('abc'), mydb.qtest_get_java_boolean('abc') from default.src limit 1;
+
+use mydb;
+-- unqualified function should resolve to one in mydb db
+select qtest_get_java_boolean('abc'), default.qtest_get_java_boolean('abc'), mydb.qtest_get_java_boolean('abc') from default.src limit 1;
+
+drop function mydb.qtest_get_java_boolean;
+
+drop database mydb cascade;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_like.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_like.q
index cb4d6578af476..13539a65f512f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_like.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_like.q
@@ -21,8 +21,8 @@ INSERT OVERWRITE TABLE table2 SELECT key, value FROM src WHERE key = 100;
 SELECT * FROM table1;
 SELECT * FROM table2;
 
-CREATE EXTERNAL TABLE table4 (a INT) LOCATION '${system:test.src.data.dir}/files/ext_test';
-CREATE EXTERNAL TABLE table5 LIKE table4 LOCATION '${system:test.src.data.dir}/files/ext_test';
+CREATE EXTERNAL TABLE table4 (a INT) LOCATION '${system:hive.root}/data/files/ext_test';
+CREATE EXTERNAL TABLE table5 LIKE table4 LOCATION '${system:hive.root}/data/files/ext_test';
 
 SELECT * FROM table4;
 SELECT * FROM table5;
@@ -31,5 +31,5 @@ DROP TABLE table5;
 SELECT * FROM table4;
 DROP TABLE table4;
 
-CREATE EXTERNAL TABLE table4 (a INT) LOCATION '${system:test.src.data.dir}/files/ext_test';
+CREATE EXTERNAL TABLE table4 (a INT) LOCATION '${system:hive.root}/data/files/ext_test';
 SELECT * FROM table4;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_merge_compressed.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_merge_compressed.q
index 4418b3430affc..483931b6fff4b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_merge_compressed.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_merge_compressed.q
@@ -1,6 +1,6 @@
 create table src_rc_merge_test(key int, value string) stored as rcfile;
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test;
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test;
 
 set hive.exec.compress.output = true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_nested_type.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_nested_type.q
index 2debd0d71d224..735b139719700 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_nested_type.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_nested_type.q
@@ -9,7 +9,7 @@ CREATE TABLE table1 (
 DESCRIBE table1;
 DESCRIBE EXTENDED table1;
 
-LOAD DATA LOCAL INPATH '../data/files/create_nested_type.txt' OVERWRITE INTO TABLE table1;
+LOAD DATA LOCAL INPATH '../../data/files/create_nested_type.txt' OVERWRITE INTO TABLE table1;
 
 SELECT * from table1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_struct_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_struct_table.q
index dd5aa63e4578a..1e5d151f286ba 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_struct_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_struct_table.q
@@ -4,7 +4,7 @@ row format delimited
   fields terminated by '\t'
   collection items terminated by '\001';
 
-load data local inpath '../data/files/kv1.txt'
+load data local inpath '../../data/files/kv1.txt'
 overwrite into table abc;
 
 SELECT strct, strct.a, strct.b FROM abc LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_union_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_union_table.q
index bb0e5b989d54d..6bc4d29358c11 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_union_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_union_table.q
@@ -4,7 +4,7 @@ strct struct<a:int, b:string, c:string>);
 create table abc(mydata uniontype<int,double,array<string>,struct<a:int,b:string>>,
 strct struct<a:int, b:string, c:string>);
 
-load data local inpath '../data/files/union_input.txt'
+load data local inpath '../../data/files/union_input.txt'
 overwrite into table abc;
 
 SELECT * FROM abc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_view_translate.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_view_translate.q
index 2199750f42f7e..11ba9c8afc61d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_view_translate.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_view_translate.q
@@ -11,3 +11,14 @@ describe formatted w;
 
 drop view v;
 drop view w;
+
+
+-- HIVE-4116 Can't use views using map datatype.
+
+CREATE TABLE items (id INT, name STRING, info MAP<STRING,STRING>);
+
+explain
+CREATE VIEW priceview AS SELECT items.id, items.info['price'] FROM items;
+CREATE VIEW priceview AS SELECT items.id, items.info['price'] FROM items;
+
+select * from priceview;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_1.q
new file mode 100644
index 0000000000000..fb38c947b32d4
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_1.q
@@ -0,0 +1,26 @@
+
+create table A as
+select * from src;
+
+create table B as
+select * from src
+limit 10;
+
+set hive.auto.convert.join.noconditionaltask.size=100;
+
+explain select * from A join B;
+
+explain select * from B d1 join B d2 on d1.key = d2.key join A;
+
+explain select * from A join 
+         (select d1.key 
+          from B d1 join B d2 on d1.key = d2.key 
+          where 1 = 1 group by d1.key) od1;
+          
+explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1;
+
+explain select * from 
+(select A.key from A group by key) ss join 
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_2.q
new file mode 100644
index 0000000000000..479d57137e946
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_2.q
@@ -0,0 +1,27 @@
+create table A as
+select * from src;
+
+create table B as
+select * from src
+limit 10;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000000;
+
+explain select * from A join B;
+
+explain select * from B d1 join B d2 on d1.key = d2.key join A;
+
+explain select * from A join 
+         (select d1.key 
+          from B d1 join B d2 on d1.key = d2.key 
+          where 1 = 1 group by d1.key) od1;
+          
+explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1;
+
+explain select * from 
+(select A.key from A group by key) ss join 
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas.q
index e595904b412fa..71af40e7e4140 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas.q
@@ -56,7 +56,7 @@ set hive.exec.mode.local.auto=true;
 create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10;
 
 create table nzhang_ctas6 (key string, `to` string);
-insert overwrite table nzhang_ctas6 select key, value from src limit 10;
+insert overwrite table nzhang_ctas6 select key, value from src tablesample (10 rows);
 create table nzhang_ctas7 as select key, `to` from nzhang_ctas6;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_char.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_char.q
new file mode 100644
index 0000000000000..ecfe74afd0273
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_char.q
@@ -0,0 +1,22 @@
+drop table ctas_char_1;
+drop table ctas_char_2;
+drop view ctas_char_3;
+
+create table ctas_char_1 (key char(10), value string);
+insert overwrite table ctas_char_1 
+  select key, value from src sort by key, value limit 5;
+
+-- create table as with char column
+create table ctas_char_2 as select key, value from ctas_char_1;
+
+-- view with char column
+create view ctas_char_3 as select key, value from ctas_char_2;
+
+select key, value from ctas_char_1;
+select * from ctas_char_2;
+select * from ctas_char_3;
+
+
+drop table ctas_char_1;
+drop table ctas_char_2;
+drop view ctas_char_3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_hadoop20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_hadoop20.q
index 4961b971dbdec..f39689de03a55 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_hadoop20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_hadoop20.q
@@ -58,11 +58,6 @@ create table nzhang_ctas6 (key string, `to` string);
 insert overwrite table nzhang_ctas6 select key, value from src limit 10;
 create table nzhang_ctas7 as select key, `to` from nzhang_ctas6;
 
-
-
-
-
-
-
-
-
+create table nzhang_ctas8 as select 3.14BD from nzhang_ctas6 limit 1;
+desc nzhang_ctas8;
+drop table nzhang_ctas8;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_1.q
new file mode 100644
index 0000000000000..ca4132f7d3fb9
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_1.q
@@ -0,0 +1,28 @@
+explain
+with q1 as ( select key from src where key = '5')
+select *
+from q1
+;
+
+with q1 as ( select key from src where key = '5')
+select *
+from q1
+;
+
+-- in subquery
+explain
+with q1 as ( select key from src where key = '5')
+select * from (select key from q1) a;
+
+with q1 as ( select key from src where key = '5')
+select * from (select key from q1) a;
+
+-- chaining
+explain
+with q1 as ( select key from q2 where key = '5'),
+q2 as ( select key from src where key = '5')
+select * from (select key from q1) a;
+
+with q1 as ( select key from q2 where key = '5'),
+q2 as ( select key from src where key = '5')
+select * from (select key from q1) a;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_2.q
new file mode 100644
index 0000000000000..b49620aca144d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_2.q
@@ -0,0 +1,56 @@
+
+-- union test
+with q1 as (select * from src where key= '5'),
+q2 as (select * from src s2 where key = '4')
+select * from q1 union all select * from q2
+;
+
+-- insert test
+create table s1 like src;
+with q1 as ( select key, value from src where key = '5')
+from q1
+insert overwrite table s1
+select *
+;
+select * from s1;
+drop table s1;
+
+-- from style
+with q1 as (select * from src where key= '5')
+from q1
+select *
+;
+
+-- ctas
+create table s2 as
+with q1 as ( select key from src where key = '4')
+select * from q1
+;
+
+select * from s2;
+drop table s2;
+
+-- view test
+create view v1 as
+with q1 as ( select key from src where key = '5')
+select * from q1
+;
+
+select * from v1;
+
+drop view v1;
+
+
+-- view test, name collision
+create view v1 as
+with q1 as ( select key from src where key = '5')
+select * from q1
+;
+
+with q1 as ( select key from src where key = '4')
+select * from v1
+;
+
+drop view v1;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/custom_input_output_format.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/custom_input_output_format.q
index ff5e86dc5a6b4..d769d05602a17 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/custom_input_output_format.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/custom_input_output_format.q
@@ -1,6 +1,7 @@
-ADD JAR ../build/ql/test/test-udfs.jar;
 CREATE TABLE src1_rot13_iof(key STRING, value STRING) 
   STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13InputFormat'
             OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13OutputFormat';
+DESCRIBE EXTENDED src1_rot13_iof;
+SELECT * FROM src1 ORDER BY key, value;
 INSERT OVERWRITE TABLE src1_rot13_iof SELECT * FROM src1;
-SELECT * FROM src1_rot13_iof;
+SELECT * FROM src1_rot13_iof ORDER BY key, value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database.q
index 9140a42b650e8..e3ceaccfa5ef6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database.q
@@ -58,7 +58,7 @@ SHOW TABLES;
 DESCRIBE EXTENDED test_table_like;
 
 -- LOAD and SELECT
-LOAD DATA LOCAL INPATH '../data/files/test.dat'
+LOAD DATA LOCAL INPATH '../../data/files/test.dat'
 OVERWRITE INTO TABLE test_table;
 SELECT * FROM test_table;
 
@@ -146,7 +146,7 @@ CREATE TABLE db1.src(key STRING, value STRING)
 STORED AS TEXTFILE;
 
 -- LOAD into foreign table
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt'
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt'
 OVERWRITE INTO TABLE db1.src;
 
 -- SELECT from foreign table
@@ -158,7 +158,7 @@ PARTITIONED BY (ds STRING, hr STRING)
 STORED AS TEXTFILE;
 
 -- LOAD data into Partitioned foreign table
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt'
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt'
 OVERWRITE INTO TABLE db1.srcpart
 PARTITION (ds='2008-04-08', hr='11');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database_drop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database_drop.q
index 4e17c7ad5ac10..1371273245215 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database_drop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database_drop.q
@@ -8,13 +8,14 @@ CREATE DATABASE db5;
 SHOW DATABASES;
 USE db5;
 
+set hive.stats.dbclass=fs;
 dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade/temp;
 dfs -rmr ${system:test.tmp.dir}/dbcascade;
 dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade;
 
 -- add a table, index and view
 CREATE TABLE temp_tbl (id INT, name STRING);
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE temp_tbl;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE temp_tbl;
 CREATE VIEW temp_tbl_view AS SELECT * FROM temp_tbl;
 CREATE INDEX idx1 ON TABLE temp_tbl(id) AS 'COMPACT' with DEFERRED REBUILD;
 ALTER INDEX idx1 ON temp_tbl REBUILD;
@@ -23,15 +24,15 @@ dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade/temp_tbl2;
 dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade/temp_tbl2_idx2;
 -- add a table, index and view with a different storage location
 CREATE TABLE temp_tbl2 (id INT, name STRING) LOCATION 'file:${system:test.tmp.dir}/dbcascade/temp_tbl2';
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' into table temp_tbl2;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' into table temp_tbl2;
 CREATE VIEW temp_tbl2_view AS SELECT * FROM temp_tbl2;
 CREATE INDEX idx2 ON TABLE temp_tbl2(id) AS 'COMPACT' with DEFERRED REBUILD LOCATION 'file:${system:test.tmp.dir}/dbcascade/temp_tbl2_idx2';
 ALTER INDEX idx2 ON temp_tbl2 REBUILD;
 
 -- add a partitioned table, index and view
 CREATE TABLE part_tab (id INT, name STRING)  PARTITIONED BY (ds string);
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab PARTITION (ds='2009-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab PARTITION (ds='2009-04-09');
 CREATE INDEX idx3 ON TABLE part_tab(id) AS 'COMPACT' with DEFERRED REBUILD;
 ALTER INDEX idx3 ON part_tab PARTITION (ds='2008-04-09') REBUILD;
 ALTER INDEX idx3 ON part_tab PARTITION (ds='2009-04-09') REBUILD;
@@ -41,8 +42,8 @@ dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade/part_tab2_idx4;
 -- add a partitioned table, index and view with a different storage location
 CREATE TABLE part_tab2 (id INT, name STRING)  PARTITIONED BY (ds string)
 		LOCATION 'file:${system:test.tmp.dir}/dbcascade/part_tab2';
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab2 PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab2 PARTITION (ds='2009-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab2 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab2 PARTITION (ds='2009-04-09');
 CREATE INDEX idx4 ON TABLE part_tab2(id) AS 'COMPACT' with DEFERRED REBUILD
 		LOCATION 'file:${system:test.tmp.dir}/dbcascade/part_tab2_idx4';
 ALTER INDEX idx4 ON part_tab2 PARTITION (ds='2008-04-09') REBUILD;
@@ -56,8 +57,8 @@ dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade/part_tab3_idx5;
 CREATE TABLE part_tab3 (id INT, name STRING)  PARTITIONED BY (ds string)
 		LOCATION 'file:${system:test.tmp.dir}/dbcascade/part_tab3';
 ALTER TABLE part_tab3 ADD PARTITION  (ds='2007-04-09') LOCATION 'file:${system:test.tmp.dir}/dbcascade/part_tab3_p1';
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab3 PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab3 PARTITION (ds='2009-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab3 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab3 PARTITION (ds='2009-04-09');
 CREATE INDEX idx5 ON TABLE part_tab3(id) AS 'COMPACT' with DEFERRED REBUILD
 		LOCATION 'file:${system:test.tmp.dir}/dbcascade/part_tab3_idx5';
 ALTER INDEX idx5 ON part_tab3 PARTITION (ds='2008-04-09') REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_1.q
index a2322fc1e45a0..7d89ac9268f9b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_1.q
@@ -1,21 +1,23 @@
+set hive.fetch.task.conversion=more;
+
 drop table date_1;
 
 create table date_1 (d date);
 
 insert overwrite table date_1 
-  select cast('2011-01-01' as date) from src limit 1;
+  select cast('2011-01-01' as date) from src tablesample (1 rows);
 
 select * from date_1 limit 1;
 select d, count(d) from date_1 group by d;
 
 insert overwrite table date_1 
-  select date '2011-01-01' from src limit 1;
+  select date '2011-01-01' from src tablesample (1 rows);
 
 select * from date_1 limit 1;
 select d, count(d) from date_1 group by d;
 
 insert overwrite table date_1 
-  select cast(cast('2011-01-01 00:00:00' as timestamp) as date) from src limit 1;
+  select cast(cast('2011-01-01 00:00:00' as timestamp) as date) from src tablesample (1 rows);
 
 select * from date_1 limit 1;
 select d, count(d) from date_1 group by d;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_2.q
index 0821e012ba26b..c5346c87dd25c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_2.q
@@ -8,7 +8,7 @@ create table date_2 (
   FL_NUM int
 );
 
-LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_2;
+LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_2;
 
 select fl_date, fl_num from date_2 order by fl_date asc, fl_num desc;
 select fl_date, fl_num from date_2 order by fl_date desc, fl_num asc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_3.q
index be251484459b6..383fb4e9904da 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_3.q
@@ -7,7 +7,7 @@ create table date_3 (
 alter table date_3 add columns (c2 date);
 
 insert overwrite table date_3
-  select 1, cast(cast('2011-01-01 00:00:00' as timestamp) as date) from src limit 1;
+  select 1, cast(cast('2011-01-01 00:00:00' as timestamp) as date) from src tablesample (1 rows);
 
 select * from date_3;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_4.q
index 4801a79b8795b..c840089f2efeb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_4.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 drop table date_4;
 
 create table date_4 (d date);
@@ -5,7 +7,7 @@ alter table date_4 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 -- Test date literal syntax
 insert overwrite table date_4 
-  select date '2011-01-01' from src limit 1;
+  select date '2011-01-01' from src tablesample (1 rows);
 select d, date '2011-01-01' from date_4 limit 1;
 
 drop table date_4;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_comparison.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_comparison.q
index bdcb6c1b6e447..86c7362e295f2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_comparison.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_comparison.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 -- Comparisons against same value
 select cast('2011-05-06' as date) > 
   cast('2011-05-06' as date) from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_join1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_join1.q
index a5844b76e9ef8..34bb8c8990a6b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_join1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_join1.q
@@ -8,7 +8,7 @@ create table date_join1 (
   FL_NUM int
 );
 
-LOAD DATA LOCAL INPATH '../data/files/flights_join.txt' OVERWRITE INTO TABLE date_join1;
+LOAD DATA LOCAL INPATH '../../data/files/flights_join.txt' OVERWRITE INTO TABLE date_join1;
 
 -- Note that there are 2 rows with date 2000-11-28, so we should expect 4 rows with that date in the join results
 select t1.fl_num, t1.fl_date, t2.fl_num, t2.fl_date
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_serde.q
index ffc06d270d53a..24b48206802ae 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_serde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_serde.q
@@ -22,7 +22,7 @@ with serdeproperties (
 )
 stored as textfile;
 
-load data local inpath '../data/files/flights_tiny.txt.1' overwrite into table date_serde_regex;
+load data local inpath '../../data/files/flights_tiny.txt.1' overwrite into table date_serde_regex;
 
 select * from date_serde_regex;
 select fl_date, count(*) from date_serde_regex group by fl_date;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_udf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_udf.q
index 9696320a85fcd..c55b9f9147ada 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_udf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_udf.q
@@ -17,7 +17,7 @@ create table date_udf_flight (
   ARR_DELAY float,
   FL_NUM int
 );
-LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight;
+LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight;
 
 -- Test UDFs with date input
 select unix_timestamp(d), year(d), month(d), day(d), dayofmonth(d),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact1.q
new file mode 100644
index 0000000000000..6612fe8babdf8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact1.q
@@ -0,0 +1,12 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.txn.testing=true;
+
+create table T1(key string, val string) stored as textfile;
+
+set hive.txn.testing=true;
+alter table T1 compact 'major';
+
+alter table T1 compact 'minor';
+
+drop table T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact2.q
new file mode 100644
index 0000000000000..599cad9afc611
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact2.q
@@ -0,0 +1,14 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.txn.testing=true;
+
+create table T1(key string, val string) partitioned by (ds string) stored as textfile;
+
+alter table T1 add partition (ds = 'today');
+alter table T1 add partition (ds = 'yesterday');
+
+alter table T1 partition (ds = 'today') compact 'major';
+
+alter table T1 partition (ds = 'yesterday') compact 'minor';
+
+drop table T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact3.q
new file mode 100644
index 0000000000000..871d292a59ce3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact3.q
@@ -0,0 +1,15 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.txn.testing=true;
+
+create database D1;
+
+use D1;
+
+create table T1(key string, val string) stored as textfile;
+
+alter table T1 compact 'major';
+
+alter table T1 compact 'minor';
+
+drop table T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_ddl1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_ddl1.q
new file mode 100644
index 0000000000000..3126bd6e5460e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_ddl1.q
@@ -0,0 +1,59 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create database D1;
+
+alter database D1 set dbproperties('test'='yesthisis');
+
+drop database D1;
+
+create table T1(key string, val string) stored as textfile;
+
+create table T2 like T1;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+select * from T1;
+
+create table T3 as select * from T1;
+
+create table T4 (key char(10), val decimal(5,2), b int)
+    partitioned by (ds string)
+    clustered by (b) into 10 buckets
+    stored as orc;
+
+alter table T3 rename to newT3;
+
+alter table T2 set tblproperties ('test'='thisisatest');
+
+alter table T2 set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde';
+alter table T2 set serdeproperties ('test'='thisisatest');
+
+alter table T2 clustered by (key) into 32 buckets;
+
+alter table T4 add partition (ds='today'); 
+
+alter table T4 partition (ds='today') rename to partition(ds='yesterday');
+
+alter table T4 drop partition (ds='yesterday');
+
+alter table T4 add partition (ds='tomorrow'); 
+
+create table T5 (a string, b int);
+alter table T5 set fileformat orc;
+
+create table T7 (a string, b int);
+alter table T7 set location 'file:///tmp';
+
+alter table T2 touch;
+alter table T4 touch partition (ds='tomorrow');
+
+create view V1 as select key from T1;
+alter view V1 set tblproperties ('test'='thisisatest');
+drop view V1;
+
+
+
+drop table T1;
+drop table T2;
+drop table newT3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query1.q
new file mode 100644
index 0000000000000..970069aca6766
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query1.q
@@ -0,0 +1,17 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table T1(key string, val string) stored as textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+select * from T1;
+
+create table T2(key string, val string) stored as textfile;
+
+insert into table T2 select * from T1;
+
+select * from T2;
+
+drop table T1;
+drop table T2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query2.q
new file mode 100644
index 0000000000000..00942e5357892
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query2.q
@@ -0,0 +1,17 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table T1(key string, val string) stored as textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+select * from T1;
+
+create table T2(key string, val string) stored as textfile;
+
+insert overwrite table T2 select * from T1;
+
+select * from T2;
+
+drop table T1;
+drop table T2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query3.q
new file mode 100644
index 0000000000000..75b642b5492cc
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query3.q
@@ -0,0 +1,21 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table T1(key string, val string) stored as textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+select * from T1;
+
+create table T2(key string, val string) partitioned by (pval string) stored as textfile;
+
+insert into table T2 partition (pval = '1') select * from T1;
+
+select * from T2;
+
+insert overwrite table T2 partition (pval = '1') select * from T1;
+
+select * from T2;
+
+drop table T1;
+drop table T2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query4.q
new file mode 100644
index 0000000000000..57eb4424ea5cc
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query4.q
@@ -0,0 +1,19 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.dynamic.partition=true;
+
+create table T1(key string, val string) stored as textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+select * from T1;
+
+create table T2(key string) partitioned by (val string) stored as textfile;
+
+insert overwrite table T2 partition (val) select key, val from T1;
+
+select * from T2;
+
+drop table T1;
+drop table T2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query5.q
new file mode 100644
index 0000000000000..d22b98fd7df2d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query5.q
@@ -0,0 +1,24 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create database foo;
+
+use foo;
+
+create table T1(key string, val string) partitioned by (ds string) stored as textfile;
+
+alter table T1 add partition (ds='today');
+
+create view V1 as select key from T1;
+
+show tables;
+
+describe T1;
+
+drop view V1;
+
+drop table T1;
+
+show databases;
+
+drop database foo;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_showlocks.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_showlocks.q
new file mode 100644
index 0000000000000..7c71fdd9d230f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_showlocks.q
@@ -0,0 +1,11 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.txn.testing=true;
+
+show locks;
+
+show locks extended;
+
+show locks default;
+
+show transactions;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_1.q
index 6c689e188a908..f52b1923eb067 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_1.q
@@ -1,18 +1,22 @@
-drop table decimal_1;
+set hive.fetch.task.conversion=more;
+
+drop table if exists decimal_1;
 
-create table decimal_1 (t decimal);
+create table decimal_1 (t decimal(4,2), u decimal(5), v decimal);
 alter table decimal_1 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
 
+desc decimal_1;
+
 insert overwrite table decimal_1
-  select cast('17.29' as decimal) from src limit 1;
-select cast(t as boolean) from decimal_1 limit 1;
-select cast(t as tinyint) from decimal_1 limit 1;
-select cast(t as smallint) from decimal_1 limit 1;
-select cast(t as int) from decimal_1 limit 1;
-select cast(t as bigint) from decimal_1 limit 1;
-select cast(t as float) from decimal_1 limit 1;
-select cast(t as double) from decimal_1 limit 1;
-select cast(t as string) from decimal_1 limit 1;
-select cast(t as timestamp) from decimal_1 limit 1;
+  select cast('17.29' as decimal(4,2)), 3.1415926BD, 3115926.54321BD from src tablesample (1 rows);
+select cast(t as boolean) from decimal_1;
+select cast(t as tinyint) from decimal_1;
+select cast(t as smallint) from decimal_1;
+select cast(t as int) from decimal_1;
+select cast(t as bigint) from decimal_1;
+select cast(t as float) from decimal_1;
+select cast(t as double) from decimal_1;
+select cast(t as string) from decimal_1;
+select cast(t as timestamp) from decimal_1;
 
 drop table decimal_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_2.q
index 4890618a0dc32..2c4d919079423 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_2.q
@@ -1,40 +1,42 @@
+set hive.fetch.task.conversion=more;
+
 drop table decimal_2;
 
-create table decimal_2 (t decimal);
+create table decimal_2 (t decimal(18,9));
 alter table decimal_2 set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe';
 
 insert overwrite table decimal_2
-  select cast('17.29' as decimal) from src limit 1;
+  select cast('17.29' as decimal(4,2)) from src tablesample (1 rows);
 
-select cast(t as boolean) from decimal_2 limit 1;
-select cast(t as tinyint) from decimal_2 limit 1;
-select cast(t as smallint) from decimal_2 limit 1;
-select cast(t as int) from decimal_2 limit 1;
-select cast(t as bigint) from decimal_2 limit 1;
-select cast(t as float) from decimal_2 limit 1;
-select cast(t as double) from decimal_2 limit 1;
-select cast(t as string) from decimal_2 limit 1;
+select cast(t as boolean) from decimal_2;
+select cast(t as tinyint) from decimal_2;
+select cast(t as smallint) from decimal_2;
+select cast(t as int) from decimal_2;
+select cast(t as bigint) from decimal_2;
+select cast(t as float) from decimal_2;
+select cast(t as double) from decimal_2;
+select cast(t as string) from decimal_2;
 
 insert overwrite table decimal_2
-  select cast('3404045.5044003' as decimal) from src limit 1;
+  select cast('3404045.5044003' as decimal(18,9)) from src tablesample (1 rows);
 
-select cast(t as boolean) from decimal_2 limit 1;
-select cast(t as tinyint) from decimal_2 limit 1;
-select cast(t as smallint) from decimal_2 limit 1;
-select cast(t as int) from decimal_2 limit 1;
-select cast(t as bigint) from decimal_2 limit 1;
-select cast(t as float) from decimal_2 limit 1;
-select cast(t as double) from decimal_2 limit 1;
-select cast(t as string) from decimal_2 limit 1;
+select cast(t as boolean) from decimal_2;
+select cast(t as tinyint) from decimal_2;
+select cast(t as smallint) from decimal_2;
+select cast(t as int) from decimal_2;
+select cast(t as bigint) from decimal_2;
+select cast(t as float) from decimal_2;
+select cast(t as double) from decimal_2;
+select cast(t as string) from decimal_2;
 
-select cast(3.14 as decimal) from decimal_2 limit 1;
-select cast(cast(3.14 as float) as decimal) from decimal_2 limit 1;
-select cast(cast('2012-12-19 11:12:19.1234567' as timestamp) as decimal) from decimal_2 limit 1;
-select cast(true as decimal) from decimal_2 limit 1;
-select cast(3Y as decimal) from decimal_2 limit 1;
-select cast(3S as decimal) from decimal_2 limit 1;
-select cast(cast(3 as int) as decimal) from decimal_2 limit 1;
-select cast(3L as decimal) from decimal_2 limit 1;
-select cast(0.99999999999999999999 as decimal) from decimal_2 limit 1;
-select cast('0.99999999999999999999' as decimal) from decimal_2 limit 1;
+select cast(3.14 as decimal(4,2)) from decimal_2;
+select cast(cast(3.14 as float) as decimal(4,2)) from decimal_2;
+select cast(cast('2012-12-19 11:12:19.1234567' as timestamp) as decimal(30,8)) from decimal_2;
+select cast(true as decimal) from decimal_2;
+select cast(3Y as decimal) from decimal_2;
+select cast(3S as decimal) from decimal_2;
+select cast(cast(3 as int) as decimal) from decimal_2;
+select cast(3L as decimal) from decimal_2;
+select cast(0.99999999999999999999 as decimal(20,19)) from decimal_2;
+select cast('0.99999999999999999999' as decimal(20,20)) from decimal_2;
 drop table decimal_2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_3.q
index 28211e3f14cc1..e4fba06fea1a9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_3.q
@@ -1,11 +1,11 @@
 DROP TABLE IF EXISTS DECIMAL_3;
 
-CREATE TABLE DECIMAL_3(key decimal, value int) 
+CREATE TABLE DECIMAL_3(key decimal(38,18), value int)
 ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ' '
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_3;
+LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_3;
 
 SELECT * FROM DECIMAL_3 ORDER BY key, value;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_4.q
index e8a89c131cd85..699ba3cb4f558 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_4.q
@@ -1,15 +1,15 @@
 DROP TABLE IF EXISTS DECIMAL_4_1;
 DROP TABLE IF EXISTS DECIMAL_4_2;
 
-CREATE TABLE DECIMAL_4_1(key decimal, value int) 
+CREATE TABLE DECIMAL_4_1(key decimal(35,25), value int) 
 ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ' '
 STORED AS TEXTFILE;
 
-CREATE TABLE DECIMAL_4_2(key decimal, value decimal) 
+CREATE TABLE DECIMAL_4_2(key decimal(35,25), value decimal(35,25)) 
 STORED AS ORC;
 
-LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_4_1;
+LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_4_1;
 
 INSERT OVERWRITE TABLE DECIMAL_4_2 SELECT key, key * 3 FROM DECIMAL_4_1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_5.q
new file mode 100644
index 0000000000000..70e5db0f70182
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_5.q
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS DECIMAL_5;
+
+CREATE TABLE DECIMAL_5(key decimal(10,5), value int)
+ROW FORMAT DELIMITED
+   FIELDS TERMINATED BY ' '
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_5;
+
+SELECT key FROM DECIMAL_5 ORDER BY key;
+
+SELECT DISTINCT key FROM DECIMAL_5 ORDER BY key;
+
+SELECT cast(key as decimal) FROM DECIMAL_5;
+
+SELECT cast(key as decimal(6,3)) FROM DECIMAL_5;
+
+DROP TABLE DECIMAL_5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_6.q
new file mode 100644
index 0000000000000..b58e224256122
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_6.q
@@ -0,0 +1,27 @@
+DROP TABLE IF EXISTS DECIMAL_6_1;
+DROP TABLE IF EXISTS DECIMAL_6_2;
+DROP TABLE IF EXISTS DECIMAL_6_3;
+
+CREATE TABLE DECIMAL_6_1(key decimal(10,5), value int)
+ROW FORMAT DELIMITED
+   FIELDS TERMINATED BY ' '
+STORED AS TEXTFILE;
+
+CREATE TABLE DECIMAL_6_2(key decimal(17,4), value int)
+ROW FORMAT DELIMITED
+   FIELDS TERMINATED BY ' '
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/kv9.txt' INTO TABLE DECIMAL_6_1;
+LOAD DATA LOCAL INPATH '../../data/files/kv9.txt' INTO TABLE DECIMAL_6_2;
+
+SELECT T.key from (
+  SELECT key, value from DECIMAL_6_1
+  UNION ALL
+  SELECT key, value from DECIMAL_6_2
+) T order by T.key;
+
+CREATE TABLE DECIMAL_6_3 AS SELECT key + 5.5 AS k, value * 11 AS v from DECIMAL_6_1 ORDER BY v;
+
+desc DECIMAL_6_3;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_join.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_join.q
index 589fc6597dfa4..86c14d9351ac1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_join.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_join.q
@@ -1,6 +1,6 @@
 -- HIVE-5292 Join on decimal columns fails
 
-create table src_dec (key decimal, value string);
-load data local inpath '../data/files/kv1.txt' into table src_dec;
+create table src_dec (key decimal(3,0), value string);
+load data local inpath '../../data/files/kv1.txt' into table src_dec;
 
 select * from src_dec a join src_dec b on a.key=b.key+450;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_precision.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_precision.q
index 403c2be3fbc10..739352f9ef1e5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_precision.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_precision.q
@@ -1,11 +1,11 @@
 DROP TABLE IF EXISTS DECIMAL_PRECISION;
 
-CREATE TABLE DECIMAL_PRECISION(dec decimal) 
+CREATE TABLE DECIMAL_PRECISION(dec decimal(20,10))
 ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ' '
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv8.txt' INTO TABLE DECIMAL_PRECISION;
+LOAD DATA LOCAL INPATH '../../data/files/kv8.txt' INTO TABLE DECIMAL_PRECISION;
 
 SELECT * FROM DECIMAL_PRECISION ORDER BY dec;
 
@@ -15,13 +15,14 @@ SELECT dec, dec / 9 FROM DECIMAL_PRECISION ORDER BY dec;
 SELECT dec, dec / 27 FROM DECIMAL_PRECISION ORDER BY dec;
 SELECT dec, dec * dec FROM DECIMAL_PRECISION ORDER BY dec;
 
+EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION;
 SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION;
 
-SELECT dec * cast('123456789012345678901234567890.123456789' as decimal) FROM DECIMAL_PRECISION LIMIT 1;
-SELECT * from DECIMAL_PRECISION WHERE dec > cast('123456789012345678901234567890.123456789' as decimal) LIMIT 1;
-SELECT dec * 123456789012345678901234567890.123456789 FROM DECIMAL_PRECISION LIMIT 1;
+SELECT dec * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION LIMIT 1;
+SELECT * from DECIMAL_PRECISION WHERE dec > cast('1234567890123456789012345678.12345678' as decimal(38,18)) LIMIT 1;
+SELECT dec * 12345678901234567890.12345678 FROM DECIMAL_PRECISION LIMIT 1;
 
-SELECT MIN(cast('123456789012345678901234567890.123456789' as decimal)) FROM DECIMAL_PRECISION;
-SELECT COUNT(cast('123456789012345678901234567890.123456789' as decimal)) FROM DECIMAL_PRECISION;
+SELECT MIN(cast('12345678901234567890.12345678' as decimal(38,18))) FROM DECIMAL_PRECISION;
+SELECT COUNT(cast('12345678901234567890.12345678' as decimal(38,18))) FROM DECIMAL_PRECISION;
 
 DROP TABLE DECIMAL_PRECISION;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_serde.q
index 3556807705ae5..cf3a86cd4d78f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_serde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_serde.q
@@ -8,7 +8,7 @@ ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ' '
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_TEXT;
+LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_TEXT;
 
 SELECT * FROM DECIMAL_TEXT ORDER BY key, value;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_udf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_udf.q
index b5ff088d1613a..0c9f1b86a9e97 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_udf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_udf.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DROP TABLE IF EXISTS DECIMAL_UDF;
 
-CREATE TABLE DECIMAL_UDF (key decimal, value int) 
+CREATE TABLE DECIMAL_UDF (key decimal(20,10), value int)
 ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ' '
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_UDF;
+LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_UDF;
 
 -- addition
 EXPLAIN SELECT key + key FROM DECIMAL_UDF;
@@ -70,8 +72,8 @@ EXPLAIN SELECT abs(key) FROM DECIMAL_UDF;
 SELECT abs(key) FROM DECIMAL_UDF;
 
 -- avg
-EXPLAIN SELECT value, sum(key) / count(key), avg(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value;
-SELECT value, sum(key) / count(key), avg(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value;
+EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value;
+SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value;
 
 -- negative
 EXPLAIN SELECT -key FROM DECIMAL_UDF;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/delimiter.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/delimiter.q
index 112ac57c3be5b..14d508c07dd3d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/delimiter.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/delimiter.q
@@ -3,7 +3,7 @@ row format delimited
 fields terminated by '\t'
 lines terminated by '\n'
 stored as textfile;
-LOAD DATA LOCAL INPATH '../data/files/in7.txt' INTO TABLE impressions;
+LOAD DATA LOCAL INPATH '../../data/files/in7.txt' INTO TABLE impressions;
 
 select * from impressions;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/desc_tbl_part_cols.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/desc_tbl_part_cols.q
new file mode 100644
index 0000000000000..89e49311fa48d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/desc_tbl_part_cols.q
@@ -0,0 +1,7 @@
+create table t1 (a int, b string) partitioned by (c int, d string);
+describe t1;
+
+set hive.display.partition.cols.separately=false;
+describe t1;
+
+set hive.display.partition.cols.separately=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disable_file_format_check.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disable_file_format_check.q
index 6ea4156b3493e..81a5b3a6e6c82 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disable_file_format_check.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disable_file_format_check.q
@@ -1,9 +1,9 @@
 set hive.fileformat.check = false;
 create table kv_fileformat_check_txt (key string, value string) stored as textfile;
-load data local inpath '../data/files/kv1.seq' overwrite into table kv_fileformat_check_txt;
+load data local inpath '../../data/files/kv1.seq' overwrite into table kv_fileformat_check_txt;
 
 create table kv_fileformat_check_seq (key string, value string) stored as sequencefile;
-load data local inpath '../data/files/kv1.txt' overwrite into table kv_fileformat_check_seq;
+load data local inpath '../../data/files/kv1.txt' overwrite into table kv_fileformat_check_seq;
 
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disallow_incompatible_type_change_off.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disallow_incompatible_type_change_off.q
index 2a1e7276cacfe..991b930d54ca4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disallow_incompatible_type_change_off.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disallow_incompatible_type_change_off.q
@@ -1,7 +1,9 @@
+set hive.fetch.task.conversion=more;
+
 SET hive.metastore.disallow.incompatible.col.type.changes=false;
 SELECT * FROM src LIMIT 1;
 CREATE TABLE test_table123 (a INT, b MAP<STRING, STRING>) PARTITIONED BY (ds STRING) STORED AS SEQUENCEFILE;
-INSERT OVERWRITE TABLE test_table123 PARTITION(ds="foo1") SELECT 1, MAP("a1", "b1") FROM src LIMIT 1;
+INSERT OVERWRITE TABLE test_table123 PARTITION(ds="foo1") SELECT 1, MAP("a1", "b1") FROM src tablesample (1 rows);
 SELECT * from test_table123 WHERE ds="foo1";
 -- This should now work as hive.metastore.disallow.incompatible.col.type.changes is false
 ALTER TABLE test_table123 REPLACE COLUMNS (a INT, b STRING);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/distinct_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/distinct_stats.q
new file mode 100644
index 0000000000000..725183380b9ad
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/distinct_stats.q
@@ -0,0 +1,20 @@
+set hive.stats.autogather=true;
+
+set hive.compute.query.using.stats=true;
+create table t1 (a string, b string);
+
+insert into table t1 select * from src;
+
+analyze table t1 compute statistics for columns a,b;
+
+explain 
+select count(distinct b) from t1 group by a;
+
+explain 
+select distinct(b) from t1;
+
+explain 
+select a, count(*) from t1 group by a;
+
+drop table t1;
+set hive.compute.query.using.stats = false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_partitions_filter2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_partitions_filter2.q
index 798aa6d51a185..54e6a35b5adfe 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_partitions_filter2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_partitions_filter2.q
@@ -6,7 +6,7 @@ alter table ptestfilter add partition (c=1, d=2);
 alter table ptestFilter add partition (c=2, d=1);
 alter table ptestfilter add partition (c=2, d=2);
 alter table ptestfilter add partition (c=3, d=1);
-alter table ptestfilter add partition (c=3, d=2);
+alter table ptestfilter add partition (c=30, d=2);
 show partitions ptestfilter;
 
 alter table ptestfilter drop partition (c=1, d=1);
@@ -15,6 +15,9 @@ show partitions ptestfilter;
 alter table ptestfilter drop partition (c=2);
 show partitions ptestfilter;
 
+alter table ptestfilter drop partition (c<4);
+show partitions ptestfilter;
+
 drop table ptestfilter;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_with_concurrency.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_with_concurrency.q
new file mode 100644
index 0000000000000..797a27c23b01e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_with_concurrency.q
@@ -0,0 +1,8 @@
+set hive.lock.numretries=1;
+set hive.lock.sleep.between.retries=1;
+set hive.support.concurrency=true;
+set hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager;
+
+drop table if exists drop_with_concurrency_1;
+create table drop_with_concurrency_1 (c1 int);
+drop table drop_with_concurrency_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q
index 397a2200a8972..699e58effcac0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q
@@ -1,19 +1,19 @@
-create table dynamic_part_table(intcol int) partitioned by (partcol1 int, partcol2 int);
+create table dynamic_part_table(intcol string) partitioned by (partcol1 string, partcol2 string);
 
 set hive.exec.dynamic.partition.mode=nonstrict;
 
-insert into table dynamic_part_table partition(partcol1, partcol2) select 1, 1, 1 from src where key=150;
+insert into table dynamic_part_table partition(partcol1, partcol2) select '1', '1', '1' from src where key=150;
 
-insert into table dynamic_part_table partition(partcol1, partcol2) select 1, NULL, 1 from src where key=150;
+insert into table dynamic_part_table partition(partcol1, partcol2) select '1', NULL, '1' from src where key=150;
 
-insert into table dynamic_part_table partition(partcol1, partcol2) select 1, 1, NULL from src where key=150;
+insert into table dynamic_part_table partition(partcol1, partcol2) select '1', '1', NULL from src where key=150;
 
-insert into table dynamic_part_table partition(partcol1, partcol2) select 1, NULL, NULL from src where key=150;
+insert into table dynamic_part_table partition(partcol1, partcol2) select '1', NULL, NULL from src where key=150;
 
-explain extended select intcol from dynamic_part_table where partcol1=1 and partcol2=1;
+explain extended select intcol from dynamic_part_table where partcol1='1' and partcol2='1';
 
 set hive.exec.dynamic.partition.mode=strict;
 
-explain extended select intcol from dynamic_part_table where partcol1=1 and partcol2=1;
+explain extended select intcol from dynamic_part_table where partcol1='1' and partcol2='1';
 
-explain extended select intcol from dynamic_part_table where (partcol1=1 and partcol2=1)or (partcol1=1 and partcol2='__HIVE_DEFAULT_PARTITION__');
+explain extended select intcol from dynamic_part_table where (partcol1='1' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q
new file mode 100644
index 0000000000000..5f1a5ce809e0c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q
@@ -0,0 +1,161 @@
+set hive.optimize.sort.dynamic.partition=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.max.dynamic.partitions=1000;
+set hive.exec.max.dynamic.partitions.pernode=1000;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.vectorized.execution.enabled=true;
+set hive.enforce.bucketing=false;
+set hive.enforce.sorting=false;
+
+create table over1k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over1k' into table over1k;
+
+create table over1k_orc like over1k;
+alter table over1k_orc set fileformat orc;
+insert overwrite table over1k_orc select * from over1k;
+
+create table over1k_part_orc(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (ds string, t tinyint) stored as orc;
+
+create table over1k_part_limit_orc like over1k_part_orc;
+alter table over1k_part_limit_orc set fileformat orc;
+
+create table over1k_part_buck_orc(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si) into 4 buckets stored as orc;
+
+create table over1k_part_buck_sort_orc(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si) 
+       sorted by (f) into 4 buckets stored as orc;
+
+-- map-only jobs converted to map-reduce job by hive.optimize.sort.dynamic.partition optimization
+explain insert overwrite table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si;
+explain insert overwrite table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10;
+explain insert overwrite table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+explain insert overwrite table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+insert overwrite table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si;
+insert overwrite table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10;
+insert overwrite table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+insert overwrite table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+
+-- map-reduce jobs modified by hive.optimize.sort.dynamic.partition optimization
+explain insert into table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si;
+explain insert into table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10;
+explain insert into table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+explain insert into table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+insert into table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si;
+insert into table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10;
+insert into table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+insert into table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+desc formatted over1k_part_orc partition(ds="foo",t=27);
+desc formatted over1k_part_orc partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_limit_orc partition(ds="foo",t=27);
+desc formatted over1k_part_limit_orc partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_buck_orc partition(t=27);
+desc formatted over1k_part_buck_orc partition(t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_buck_sort_orc partition(t=27);
+desc formatted over1k_part_buck_sort_orc partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select count(*) from over1k_part_orc;
+select count(*) from over1k_part_limit_orc;
+select count(*) from over1k_part_buck_orc;
+select count(*) from over1k_part_buck_sort_orc;
+
+-- tests for HIVE-6883
+create table over1k_part2_orc(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (ds string, t tinyint);
+
+set hive.optimize.sort.dynamic.partition=false;
+explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i;
+set hive.optimize.sort.dynamic.partition=true;
+explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i;
+
+set hive.optimize.sort.dynamic.partition=false;
+insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i;
+
+desc formatted over1k_part2_orc partition(ds="foo",t=27);
+desc formatted over1k_part2_orc partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part2_orc;
+select count(*) from over1k_part2_orc;
+
+set hive.optimize.sort.dynamic.partition=true;
+insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i;
+
+desc formatted over1k_part2_orc partition(ds="foo",t=27);
+desc formatted over1k_part2_orc partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part2_orc;
+select count(*) from over1k_part2_orc;
+
+-- hadoop-1 does not honor number of reducers in local mode. There is always only 1 reducer irrespective of the number of buckets.
+-- Hence all records go to one bucket and all other buckets will be empty. Similar to HIVE-6867. However, hadoop-2 honors number
+-- of reducers and records are spread across all reducers. To avoid this inconsistency we will make number of buckets to 1 for this test.
+create table over1k_part_buck_sort2_orc(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si)
+       sorted by (f) into 1 buckets;
+
+set hive.optimize.sort.dynamic.partition=false;
+explain insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+set hive.optimize.sort.dynamic.partition=true;
+explain insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+set hive.optimize.sort.dynamic.partition=false;
+insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+desc formatted over1k_part_buck_sort2_orc partition(t=27);
+desc formatted over1k_part_buck_sort2_orc partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part_buck_sort2_orc;
+select count(*) from over1k_part_buck_sort2_orc;
+
+set hive.optimize.sort.dynamic.partition=true;
+insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+desc formatted over1k_part_buck_sort2_orc partition(t=27);
+desc formatted over1k_part_buck_sort2_orc partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part_buck_sort2_orc;
+select count(*) from over1k_part_buck_sort2_orc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q
new file mode 100644
index 0000000000000..52b5d1e0c1d8a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q
@@ -0,0 +1,155 @@
+set hive.optimize.sort.dynamic.partition=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.max.dynamic.partitions=1000;
+set hive.exec.max.dynamic.partitions.pernode=1000;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.enforce.bucketing=false;
+set hive.enforce.sorting=false;
+
+create table over1k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over1k' into table over1k;
+
+create table over1k_part(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (ds string, t tinyint);
+
+create table over1k_part_limit like over1k_part;
+
+create table over1k_part_buck(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si) into 4 buckets;
+
+create table over1k_part_buck_sort(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si) 
+       sorted by (f) into 4 buckets;
+
+-- map-only jobs converted to map-reduce job by hive.optimize.sort.dynamic.partition optimization
+explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27;
+explain insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10;
+explain insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+explain insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27;
+insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10;
+insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+
+-- map-reduce jobs modified by hive.optimize.sort.dynamic.partition optimization
+explain insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27;
+explain insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10;
+explain insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+explain insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27;
+insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10;
+insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+desc formatted over1k_part partition(ds="foo",t=27);
+desc formatted over1k_part partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_limit partition(ds="foo",t=27);
+desc formatted over1k_part_limit partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_buck partition(t=27);
+desc formatted over1k_part_buck partition(t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_buck_sort partition(t=27);
+desc formatted over1k_part_buck_sort partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select count(*) from over1k_part;
+select count(*) from over1k_part_limit;
+select count(*) from over1k_part_buck;
+select count(*) from over1k_part_buck_sort;
+
+-- tests for HIVE-6883
+create table over1k_part2(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (ds string, t tinyint);
+
+set hive.optimize.sort.dynamic.partition=false;
+explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i;
+set hive.optimize.sort.dynamic.partition=true;
+explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i;
+
+set hive.optimize.sort.dynamic.partition=false;
+insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i;
+
+desc formatted over1k_part2 partition(ds="foo",t=27);
+desc formatted over1k_part2 partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part2;
+select count(*) from over1k_part2;
+
+set hive.optimize.sort.dynamic.partition=true;
+insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i;
+
+desc formatted over1k_part2 partition(ds="foo",t=27);
+desc formatted over1k_part2 partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part2;
+select count(*) from over1k_part2;
+
+-- hadoop-1 does not honor number of reducers in local mode. There is always only 1 reducer irrespective of the number of buckets.
+-- Hence all records go to one bucket and all other buckets will be empty. Similar to HIVE-6867. However, hadoop-2 honors number
+-- of reducers and records are spread across all reducers. To avoid this inconsistency we will make number of buckets to 1 for this test.
+create table over1k_part_buck_sort2(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si)
+       sorted by (f) into 1 buckets;
+
+set hive.optimize.sort.dynamic.partition=false;
+explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+set hive.optimize.sort.dynamic.partition=true;
+explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+set hive.optimize.sort.dynamic.partition=false;
+insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+desc formatted over1k_part_buck_sort2 partition(t=27);
+desc formatted over1k_part_buck_sort2 partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part_buck_sort2;
+select count(*) from over1k_part_buck_sort2;
+
+set hive.optimize.sort.dynamic.partition=true;
+insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+desc formatted over1k_part_buck_sort2 partition(t=27);
+desc formatted over1k_part_buck_sort2 partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part_buck_sort2;
+select count(*) from over1k_part_buck_sort2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape1.q
index a7f4cf79c980f..d29a7a8f105d6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape1.q
@@ -8,7 +8,7 @@ DROP TABLE escape1;
 DROP TABLE escape_raw;
 
 CREATE TABLE escape_raw (s STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/escapetest.txt' INTO TABLE  escape_raw;
+LOAD DATA LOCAL INPATH '../../data/files/escapetest.txt' INTO TABLE  escape_raw;
 
 SELECT count(*) from escape_raw;
 SELECT * from escape_raw;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape2.q
index 473cbf8c94e4e..24601343b1474 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape2.q
@@ -10,7 +10,7 @@ DROP TABLE IF EXISTS escape2;
 DROP TABLE IF EXISTS escape_raw;
 
 CREATE TABLE escape_raw (s STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/escapetest.txt' INTO TABLE  escape_raw;
+LOAD DATA LOCAL INPATH '../../data/files/escapetest.txt' INTO TABLE  escape_raw;
 
 SELECT count(*) from escape_raw;
 SELECT * from escape_raw;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition.q
index 6e8bf8ebc1fa2..4be6e3f6d876b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition.q
@@ -3,7 +3,7 @@ CREATE TABLE exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING);
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition2.q
index 27b335a3d7844..f346ddeec4963 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition2.q
@@ -3,7 +3,7 @@ CREATE TABLE exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING, hr STRIN
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05', hr='1');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='1');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition3.q
index 7b9060d420a1e..7c076cebe8ad4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition3.q
@@ -3,8 +3,9 @@ CREATE TABLE exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING, hr STRIN
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05', hr='1');
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05', hr='2');
+ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2014-01-03', hr='1');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='1');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='2');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_00_nonpart_empty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_00_nonpart_empty.q
index 7fa96b629a0fa..8288bbfd86acc 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_00_nonpart_empty.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_00_nonpart_empty.q
@@ -1,3 +1,6 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
 set hive.test.mode=true;
 set hive.test.mode.prefix=;
 set hive.test.mode.nosamplelist=exim_department,exim_employee;
@@ -5,8 +8,8 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -16,7 +19,7 @@ use importer;
 import from 'ql/test/data/exports/exim_department';
 describe extended exim_department;
 show table extended like exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 select * from exim_department;
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_01_nonpart.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_01_nonpart.q
index 9920e778d18a6..1e2eed803a01d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_01_nonpart.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_01_nonpart.q
@@ -5,9 +5,9 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -17,7 +17,7 @@ use importer;
 import from 'ql/test/data/exports/exim_department';
 describe extended exim_department;
 show table extended like exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 select * from exim_department;
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_00_part_empty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_00_part_empty.q
index 4017c83aa3dd4..474a5a4a5bb5f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_00_part_empty.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_00_part_empty.q
@@ -7,8 +7,8 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -18,7 +18,7 @@ use importer;
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_part.q
index 21138f0263418..dbd2c6bf5c4c5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_part.q
@@ -7,10 +7,10 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -20,7 +20,7 @@ use importer;
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_03_nonpart_over_compat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_03_nonpart_over_compat.q
index 5f6bdee83cc90..47d949aa36d9b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_03_nonpart_over_compat.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_03_nonpart_over_compat.q
@@ -5,9 +5,9 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -21,6 +21,6 @@ import from 'ql/test/data/exports/exim_department';
 describe extended exim_department;
 select * from exim_department;
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_all_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_all_part.q
index 69c6faa30a07c..b2567fb270326 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_all_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_all_part.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -26,7 +26,7 @@ use importer;
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_evolved_parts.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_evolved_parts.q
index cdc02fa25c169..82df69874b47a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_evolved_parts.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_evolved_parts.q
@@ -19,8 +19,8 @@ alter table exim_employee set fileformat
 	outputformat "org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat";
 
 alter table exim_employee add partition (emp_country='in', emp_state='ka');
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -32,7 +32,7 @@ describe extended exim_employee;
 describe extended exim_employee partition (emp_country='in', emp_state='tn');
 describe extended exim_employee partition (emp_country='in', emp_state='ka');
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_05_some_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_05_some_part.q
index 50a59463b1870..a2c977356c84c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_05_some_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_05_some_part.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee partition (emp_state="ka") to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -26,7 +26,7 @@ use importer;
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_06_one_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_06_one_part.q
index 5136090929fc5..3a612964222eb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_06_one_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_06_one_part.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee partition (emp_country="in",emp_state="ka") to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -26,7 +26,7 @@ use importer;
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_07_all_part_over_nonoverlap.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_07_all_part_over_nonoverlap.q
index 5b9d4ddc03c59..8c774d5a8ba84 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_07_all_part_over_nonoverlap.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_07_all_part_over_nonoverlap.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -28,12 +28,12 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "iso code", emp_state string comment "free-form text")
 	stored as textfile	
 	tblproperties("maker"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="al");			
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_08_nonpart_rename.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_08_nonpart_rename.q
index 173f1569c501a..8a1d9454763c7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_08_nonpart_rename.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_08_nonpart_rename.q
@@ -5,9 +5,9 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee,exim_imported_dept
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -17,12 +17,12 @@ create table exim_department ( dep_id int comment "department id")
 	partitioned by (emp_org string)
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department partition (emp_org="hr");
+load data local inpath "../../data/files/test.dat" into table exim_department partition (emp_org="hr");
 import table exim_imported_dept from 'ql/test/data/exports/exim_department';
 describe extended exim_imported_dept;
 select * from exim_imported_dept;
 drop table exim_imported_dept;
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_09_part_spec_nonoverlap.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_09_part_spec_nonoverlap.q
index 178b76674d862..53fc2936bb2f3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_09_part_spec_nonoverlap.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_09_part_spec_nonoverlap.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -27,14 +27,14 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
 import table exim_employee partition (emp_country="us", emp_state="tn") from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_10_external_managed.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_10_external_managed.q
index 413f2aa4762f5..54859eed196cd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_10_external_managed.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_10_external_managed.q
@@ -2,18 +2,18 @@ set hive.test.mode=true;
 set hive.test.mode.prefix=;
 set hive.test.mode.nosamplelist=exim_department,exim_employee;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_department/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 create external table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	location 'ql/test/data/tablestore/exim_department'
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 
 create database importer;
 use importer;
@@ -22,6 +22,6 @@ import from 'ql/test/data/exports/exim_department';
 describe extended exim_department;
 select * from exim_department;
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_11_managed_external.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_11_managed_external.q
index f3b2896a322a5..4fc39dcff0620 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_11_managed_external.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_11_managed_external.q
@@ -5,9 +5,9 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -17,7 +17,7 @@ use importer;
 import external table exim_department from 'ql/test/data/exports/exim_department';
 describe extended exim_department;
 select * from exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 select * from exim_department;
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_12_external_location.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_12_external_location.q
index 37d063432e16a..e4d50ffe5b8f0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_12_external_location.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_12_external_location.q
@@ -5,24 +5,24 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/ql/test/data/exports/exim_department/temp;
+dfs -rmr ${system:test.tmp.dir}/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_department/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/ql/test/data/tablestore/exim_department/temp;
+dfs -rmr ${system:test.tmp.dir}/ql/test/data/tablestore/exim_department;
 
 import external table exim_department from 'ql/test/data/exports/exim_department' 
 	location 'ql/test/data/tablestore/exim_department';
 describe extended exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr ${system:test.tmp.dir}/ql/test/data/exports/exim_department;
 select * from exim_department;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs -rmr ${system:test.tmp.dir}/ql/test/data/tablestore/exim_department;
 select * from exim_department;
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_13_managed_location.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_13_managed_location.q
index fb5058b840bae..909d23794be9d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_13_managed_location.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_13_managed_location.q
@@ -5,24 +5,24 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_department/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 
 import table exim_department from 'ql/test/data/exports/exim_department' 
 	location 'ql/test/data/tablestore/exim_department';
 describe extended exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 select * from exim_department;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 select * from exim_department;
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_14_managed_location_over_existing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_14_managed_location_over_existing.q
index 031b6bda6cece..dbb5fd93435a0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_14_managed_location_over_existing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_14_managed_location_over_existing.q
@@ -5,17 +5,17 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_department/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile
@@ -24,9 +24,9 @@ create table exim_department ( dep_id int comment "department id")
 import table exim_department from 'ql/test/data/exports/exim_department'
 	location 'ql/test/data/tablestore/exim_department';
 describe extended exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 select * from exim_department;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 select * from exim_department;	
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_15_external_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_15_external_part.q
index ff088c70d7ffb..989dd6cf56547 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_15_external_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_15_external_part.q
@@ -7,24 +7,24 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 
 create external table exim_employee ( emp_id int comment "employee id") 	
 	comment "employee table"
@@ -32,17 +32,17 @@ create external table exim_employee ( emp_id int comment "employee id")
 	stored as textfile	
 	location 'ql/test/data/tablestore/exim_employee'
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
 import external table exim_employee partition (emp_country="us", emp_state="tn") 
 	from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 select * from exim_employee;	
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_16_part_external.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_16_part_external.q
index 6f4ee7a01c256..7eec358850e29 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_16_part_external.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_16_part_external.q
@@ -7,26 +7,26 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore2/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore2/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore2/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore2/exim_employee;
 
 create external table exim_employee ( emp_id int comment "employee id") 	
 	comment "employee table"
@@ -39,11 +39,11 @@ import table exim_employee partition (emp_country="us", emp_state="tn")
 	location 'ql/test/data/tablestore/exim_employee';
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="us", emp_state="tn");		
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore2/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore2/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_17_part_managed.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_17_part_managed.q
index 56ec152948aed..20cd7e0513c7b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_17_part_managed.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_17_part_managed.q
@@ -7,24 +7,24 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 
 create table exim_employee ( emp_id int comment "employee id") 	
 	comment "employee table"
@@ -39,9 +39,9 @@ alter table exim_employee add partition	(emp_country="us", emp_state="ap")
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="us", emp_state="tn");
 show table extended like exim_employee partition (emp_country="us", emp_state="ap");	
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_18_part_external.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_18_part_external.q
index 7aa1297dc7d02..a300b1dbf1285 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_18_part_external.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_18_part_external.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -29,7 +29,7 @@ describe extended exim_employee;
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="us", emp_state="tn");
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_00_part_external_location.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_00_part_external_location.q
index cb9f8efc087da..a821c75d70cca 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_00_part_external_location.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_00_part_external_location.q
@@ -7,20 +7,20 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test2.dat" 
+load data local inpath "../../data/files/test2.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 
 import external table exim_employee 
 	from 'ql/test/data/exports/exim_employee'
@@ -29,9 +29,9 @@ describe extended exim_employee;
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="in", emp_state="tn");
 show table extended like exim_employee partition (emp_country="in", emp_state="ka");
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_part_external_location.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_part_external_location.q
index bdbd19df70a21..be1216453bc18 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_part_external_location.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_part_external_location.q
@@ -7,24 +7,24 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 
 import external table exim_employee partition (emp_country="us", emp_state="tn") 
 	from 'ql/test/data/exports/exim_employee'
@@ -32,9 +32,9 @@ import external table exim_employee partition (emp_country="us", emp_state="tn")
 describe extended exim_employee;	
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="us", emp_state="tn");
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_20_part_managed_location.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_20_part_managed_location.q
index eb44961a9b7ca..000904aa6634f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_20_part_managed_location.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_20_part_managed_location.q
@@ -7,24 +7,24 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 
 import table exim_employee partition (emp_country="us", emp_state="tn") 
 	from 'ql/test/data/exports/exim_employee'
@@ -32,9 +32,9 @@ import table exim_employee partition (emp_country="us", emp_state="tn")
 describe extended exim_employee;	
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="us", emp_state="tn");
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_21_export_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_21_export_authsuccess.q
index 822ed70a38c0c..293a011cb20f6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_21_export_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_21_export_authsuccess.q
@@ -2,13 +2,13 @@ set hive.test.mode=true;
 set hive.test.mode.prefix=;
 
 create table exim_department ( dep_id int) stored as textfile;
-load data local inpath "../data/files/test.dat" into table exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
 
 set hive.security.authorization.enabled=true;
 
 grant Select on table exim_department to user hive_test_user;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 
 set hive.security.authorization.enabled=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_22_import_exist_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_22_import_exist_authsuccess.q
index 440d08d2dc7b5..03714ab17dcbf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_22_import_exist_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_22_import_exist_authsuccess.q
@@ -3,9 +3,9 @@ set hive.test.mode.prefix=;
 set hive.test.mode.nosamplelist=exim_department,exim_employee;
 
 create table exim_department ( dep_id int) stored as textfile;
-load data local inpath "../data/files/test.dat" into table exim_department;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -22,5 +22,5 @@ set hive.security.authorization.enabled=false;
 select * from exim_department;
 drop table exim_department;
 drop database importer;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_23_import_part_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_23_import_part_authsuccess.q
index 30fc343dd8f9c..cb6af0efbca62 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_23_import_part_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_23_import_part_authsuccess.q
@@ -7,10 +7,10 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -29,6 +29,6 @@ import from 'ql/test/data/exports/exim_employee';
 
 set hive.security.authorization.enabled=false;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 drop table exim_employee;
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_24_import_nonexist_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_24_import_nonexist_authsuccess.q
index 2dc5af6ce4774..8934c47372384 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_24_import_nonexist_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_24_import_nonexist_authsuccess.q
@@ -3,9 +3,9 @@ set hive.test.mode.prefix=;
 set hive.test.mode.nosamplelist=exim_department,exim_employee;
 
 create table exim_department ( dep_id int) stored as textfile;
-load data local inpath "../data/files/test.dat" into table exim_department;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/test;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/test;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -20,5 +20,5 @@ set hive.security.authorization.enabled=false;
 select * from exim_department;
 drop table exim_department;
 drop database importer;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_hidden_files.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_hidden_files.q
new file mode 100644
index 0000000000000..f58c9f948d62b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_hidden_files.q
@@ -0,0 +1,22 @@
+set hive.test.mode=true;
+set hive.test.mode.prefix=;
+set hive.test.mode.nosamplelist=exim_department,exim_employee;
+
+create table exim_employee ( emp_id int) partitioned by (emp_country string);
+load data local inpath "../../data/files/test.dat" into table exim_employee partition (emp_country="in");		
+
+dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/exim_employee/emp_country=in/_logs;
+dfs -touchz ${system:test.warehouse.dir}/exim_employee/emp_country=in/_logs/job.xml;
+export table exim_employee to 'ql/test/data/exports/exim_employee';
+drop table exim_employee;
+
+create database importer;
+use importer;
+
+import from 'ql/test/data/exports/exim_employee';
+describe formatted exim_employee;
+select * from exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
+drop table exim_employee;
+drop database importer;
+use default;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/explain_rearrange.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/explain_rearrange.q
new file mode 100644
index 0000000000000..ca2da354bd367
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/explain_rearrange.q
@@ -0,0 +1,98 @@
+-- query from auto_sortmerge_join_9.q
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+set hive.auto.convert.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+
+set hive.explain.dependency.append.tasktype=true;
+
+-- default behavior
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+set hive.stageid.rearrange=IDONLY;
+
+-- changes id only
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+set hive.stageid.rearrange=TRAVERSE;
+
+-- assign ids in traverse order
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+set hive.stageid.rearrange=EXECUTION;
+
+-- assign ids in execution order
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/external_table_with_space_in_location_path.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/external_table_with_space_in_location_path.q
new file mode 100644
index 0000000000000..ad070464f9c4e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/external_table_with_space_in_location_path.q
@@ -0,0 +1,23 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test/;
+
+dfs -copyFromLocal ../../data/files/ext_test_space hdfs:///tmp/test/ext_test_space;
+
+CREATE EXTERNAL TABLE spacetest (id int, message string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 'hdfs:///tmp/test/ext_test_space/folder+with space';
+
+SELECT * FROM spacetest;
+
+SELECT count(*) FROM spacetest;
+
+DROP TABLE spacetest;
+
+CREATE EXTERNAL TABLE spacetestpartition (id int, message string) PARTITIONED BY (day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
+
+ALTER TABLE spacetestpartition ADD PARTITION (day=10) LOCATION 'hdfs:///tmp/test/ext_test_space/folder+with space';
+
+SELECT * FROM spacetestpartition;
+
+SELECT count(*) FROM spacetestpartition;
+
+DROP TABLE spacetestpartition;
+
+dfs -rmr hdfs:///tmp/test;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/file_with_header_footer.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/file_with_header_footer.q
new file mode 100644
index 0000000000000..8b65c7896d590
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/file_with_header_footer.q
@@ -0,0 +1,39 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test/;
+
+dfs -copyFromLocal ../../data/files/header_footer_table_1 hdfs:///tmp/test/header_footer_table_1;
+
+dfs -copyFromLocal ../../data/files/header_footer_table_2 hdfs:///tmp/test/header_footer_table_2;
+
+dfs -copyFromLocal ../../data/files/header_footer_table_3 hdfs:///tmp/test/header_footer_table_3;
+
+CREATE EXTERNAL TABLE header_footer_table_1 (name string, message string, id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 'hdfs:///tmp/test/header_footer_table_1' tblproperties ("skip.header.line.count"="1", "skip.footer.line.count"="2");
+
+SELECT * FROM header_footer_table_1;
+
+SELECT * FROM header_footer_table_1 WHERE id < 50;
+
+CREATE EXTERNAL TABLE header_footer_table_2 (name string, message string, id int) PARTITIONED BY (year int, month int, day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' tblproperties ("skip.header.line.count"="1", "skip.footer.line.count"="2");
+
+ALTER TABLE header_footer_table_2 ADD PARTITION (year=2012, month=1, day=1) location 'hdfs:///tmp/test/header_footer_table_2/2012/01/01';
+
+ALTER TABLE header_footer_table_2 ADD PARTITION (year=2012, month=1, day=2) location 'hdfs:///tmp/test/header_footer_table_2/2012/01/02';
+
+ALTER TABLE header_footer_table_2 ADD PARTITION (year=2012, month=1, day=3) location 'hdfs:///tmp/test/header_footer_table_2/2012/01/03';
+
+SELECT * FROM header_footer_table_2;
+
+SELECT * FROM header_footer_table_2 WHERE id < 50;
+
+CREATE EXTERNAL TABLE emptytable (name string, message string, id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 'hdfs:///tmp/test/header_footer_table_3' tblproperties ("skip.header.line.count"="1", "skip.footer.line.count"="2");
+
+SELECT * FROM emptytable;
+
+SELECT * FROM emptytable WHERE id < 50;
+
+DROP TABLE header_footer_table_1;
+
+DROP TABLE header_footer_table_2;
+
+DROP TABLE emptytable;
+
+dfs -rmr hdfs:///tmp/test;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_join_breaktask2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_join_breaktask2.q
index f8d855b25948a..7f4258f7bc15b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_join_breaktask2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_join_breaktask2.q
@@ -12,11 +12,11 @@ create table T3 (c0 bigint,  c1 bigint, c2 int) partitioned by (ds string);
 
 create table T4 (c0 bigint, c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string, c26 string, c27 string, c28 string, c29 string, c30 string, c31 string, c32 string, c33 string, c34 string, c35 string, c36 string, c37 string, c38 string, c39 string, c40 string, c41 string, c42 string, c43 string, c44 string, c45 string, c46 string, c47 string, c48 string, c49 string, c50 string, c51 string, c52 string, c53 string, c54 string, c55 string, c56 string, c57 string, c58 string, c59 string, c60 string, c61 string, c62 string, c63 string, c64 string, c65 string, c66 string, c67 bigint, c68 string, c69 string, c70 bigint, c71 bigint, c72 bigint, c73 string, c74 string, c75 string, c76 string, c77 string, c78 string, c79 string, c80 string, c81 bigint, c82 bigint, c83 bigint) partitioned by (ds string);
 
-insert overwrite table T1 partition (ds='2010-04-17') select '5', '1', '1', '1',  0, 0,4 from src limit 1;
+insert overwrite table T1 partition (ds='2010-04-17') select '5', '1', '1', '1',  0, 0,4 from src tablesample (1 rows);
 
-insert overwrite table T2 partition(ds='2010-04-17') select '5','name', NULL, '2', 'kavin',NULL, '9', 'c', '8', '0', '0', '7', '1','2', '0', '3','2', NULL, '1', NULL, '3','2','0','0','5','10' from src limit 1;
+insert overwrite table T2 partition(ds='2010-04-17') select '5','name', NULL, '2', 'kavin',NULL, '9', 'c', '8', '0', '0', '7', '1','2', '0', '3','2', NULL, '1', NULL, '3','2','0','0','5','10' from src tablesample (1 rows);
 
-insert overwrite table T3 partition (ds='2010-04-17') select 4,5,0 from src limit 1;
+insert overwrite table T3 partition (ds='2010-04-17') select 4,5,0 from src tablesample (1 rows);
 
 insert overwrite table T4 partition(ds='2010-04-17') 
 select 4,'1','1','8','4','5','1','0','9','U','2','2', '0','2','1','1','J','C','A','U', '2','s', '2',NULL, NULL, NULL,NULL, NULL, NULL,'1','j', 'S', '6',NULL,'1', '2', 'J', 'g', '1', 'e', '2', '1', '2', 'U', 'P', 'p', '3', '0', '0', '0', '1', '1', '1', '0', '0', '0', '6', '2', 'j',NULL, NULL, NULL,NULL,NULL, NULL, '5',NULL, 'j', 'j', 2, 2, 1, '2', '2', '1', '1', '1', '1', '1', '1', 1, 1, 32,NULL from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_numeric.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_numeric.q
new file mode 100644
index 0000000000000..69d543f472347
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_numeric.q
@@ -0,0 +1,21 @@
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+create table partint(key string, value string) partitioned by (ds string, hr int);
+insert overwrite table partint partition(ds, hr) select * from srcpart where ds = '2008-04-08';
+
+explain select key, value, hr from partint where hr < 11;
+select key, value, hr from partint where hr < 11;
+
+explain select key, value, hr from partint where hr <= 12 and hr > 11;
+select key, value, hr from partint where hr <= 12 and hr > 11;
+
+explain select key, value, hr from partint where hr between 11 and 12;
+select key, value, hr from partint where hr between 11 and 12;
+
+explain select key, value, hr from partint where hr not between 12 and 14;
+select key, value, hr from partint where hr not between 12 and 14;
+
+explain select key, value, hr from partint where hr < 13;
+select key, value, hr from partint where hr < 13;
+
+drop table partint;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/global_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/global_limit.q
index b76cf34120701..c8a08af054579 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/global_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/global_limit.q
@@ -8,9 +8,9 @@ drop table gl_src_part1;
 
 
 create table gl_src1 (key int, value string) stored as textfile;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src1;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src1;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src1;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src1;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src1;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src1;
 
 
 
@@ -49,10 +49,10 @@ select key from gl_src2 ORDER BY key ASC limit 10;
 
 -- partition
 create table gl_src_part1 (key int, value string) partitioned by (p string) stored as textfile;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE gl_src_part1 partition(p='11');
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE gl_src_part1 partition(p='11');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
 
 select key from gl_src_part1 where p like '1%' ORDER BY key ASC limit 10;
 select key from gl_src_part1 where p='11' ORDER BY key ASC limit 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby10.q
index db38d43fe4354..7750cb90b5ca5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby10.q
@@ -6,7 +6,7 @@ CREATE TABLE dest1(key INT, val1 INT, val2 INT);
 CREATE TABLE dest2(key INT, val1 INT, val2 INT);
 
 CREATE TABLE INPUT(key INT, value STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv5.txt' INTO TABLE INPUT;
+LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE INPUT;
 
 EXPLAIN
 FROM INPUT 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby12.q
new file mode 100644
index 0000000000000..6e9aee1d11d58
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby12.q
@@ -0,0 +1,13 @@
+set hive.map.aggr=false;
+
+CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key;
+
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key;
+
+SELECT dest1.* FROM dest1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_limit.q
index 1b6891e33a37e..55133332a8662 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_limit.q
@@ -5,6 +5,6 @@ CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE;
 EXPLAIN
 FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key LIMIT 5;
 
-FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key LIMIT 5;
+FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key ORDER BY src.key LIMIT 5;
 
 SELECT dest1.* FROM dest1 ORDER BY dest1.key ASC , dest1.value ASC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map.q
index 82cff36422e62..dde37dfd47145 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map.q
@@ -9,4 +9,4 @@ FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) G
 
 FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key;
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map_skew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map_skew.q
index 874995888b447..f346cb7e90147 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map_skew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map_skew.q
@@ -9,4 +9,4 @@ FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) G
 
 FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key;
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_noskew.q
index 1b10f1e024819..c587b5f658f68 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_noskew.q
@@ -9,4 +9,4 @@ FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5))
 
 FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key;
 
-SELECT dest_g1.* FROM dest_g1;
+SELECT dest_g1.* FROM dest_g1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_limit.q
index 4bc263c77f1b4..30499248cac15 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_limit.q
@@ -1,7 +1,7 @@
 set mapred.reduce.tasks=31;
 
 EXPLAIN
-SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key LIMIT 5;
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5;
 
-SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key LIMIT 5;
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map.q
index c3cf598fb1c90..794ec758e9edb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map.q
@@ -11,4 +11,4 @@ INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(s
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map_multi_distinct.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map_multi_distinct.q
index 25e6789b63ef2..55d1a34b3c921 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map_multi_distinct.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map_multi_distinct.q
@@ -11,4 +11,15 @@ INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(s
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
+
+-- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1);
+
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1);
+
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew.q
index c3c82d51749fe..6d7cb61e2d44a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew.q
@@ -11,4 +11,4 @@ INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr
 FROM src
 INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1);
 
-SELECT dest_g2.* FROM dest_g2;
+SELECT dest_g2.* FROM dest_g2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew_multi_distinct.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew_multi_distinct.q
index b80c271afe5dc..b2450c9ea04e1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew_multi_distinct.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew_multi_distinct.q
@@ -11,4 +11,4 @@ INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr
 FROM src
 INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1);
 
-SELECT dest_g2.* FROM dest_g2;
+SELECT dest_g2.* FROM dest_g2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby4_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby4_noskew.q
index 99c2d2d5a1784..a1ebf90aadfea 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby4_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby4_noskew.q
@@ -12,5 +12,5 @@ INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY c1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby5_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby5_noskew.q
index be60785d87ea5..e96568b398d87 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby5_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby5_noskew.q
@@ -16,5 +16,5 @@ SELECT src.key, sum(substr(src.value,5))
 FROM src
 GROUP BY src.key;
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map.q
index fbf761c3aea55..ced122fae3f50 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map.q
@@ -11,6 +11,6 @@ INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY c1;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map_skew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map_skew.q
index ac79a286055d5..0d3727b052858 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map_skew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map_skew.q
@@ -11,6 +11,6 @@ INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY c1;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_noskew.q
index 2c99d362ffff7..466c13222f29f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_noskew.q
@@ -12,6 +12,6 @@ INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY c1;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map.q
index b1457d9349aee..2b8c5db41ea92 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map.q
@@ -18,5 +18,5 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_multi_single_reducer.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_multi_single_reducer.q
index 481b3cd084f16..5895ed4599849 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_multi_single_reducer.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_multi_single_reducer.q
@@ -17,5 +17,5 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_skew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_skew.q
index a34ac8f64b117..ee6d7bf83084e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_skew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_skew.q
@@ -17,5 +17,5 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew.q
index 94a3dcf7ec3f2..8c2308e5d75c3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew.q
@@ -18,5 +18,5 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q
index 802aea244da9e..e673cc61622c8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q
@@ -14,8 +14,8 @@ INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY S
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10;
 
 FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10;
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10;
 
 SELECT DEST1.* FROM DEST1 ORDER BY key ASC, value ASC;
 SELECT DEST2.* FROM DEST2 ORDER BY key ASC, value ASC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map.q
index 62b6ff5ddebb6..0252e993363aa 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map.q
@@ -14,6 +14,6 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map_skew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map_skew.q
index 846fd01017172..b5e1f63a45257 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map_skew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map_skew.q
@@ -14,6 +14,6 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_noskew.q
index 20c8bef34223a..da85504ca18c6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_noskew.q
@@ -15,5 +15,5 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_bigdata.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_bigdata.q
index 7e97f75cecc92..2e3eddcb1f866 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_bigdata.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_bigdata.q
@@ -1,7 +1,7 @@
 set hive.map.aggr.hash.percentmemory = 0.3;
 set hive.mapred.local.mem = 384;
 
-add file ../data/scripts/dumpdata_script.py;
+add file ../../data/scripts/dumpdata_script.py;
 
 select count(distinct subq.key) from
 (FROM src MAP src.key USING 'python dumpdata_script.py' AS key WHERE src.key = 10) subq;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_cube1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_cube1.q
index 46e1f00d0f224..099beb4319e09 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_cube1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_cube1.q
@@ -3,7 +3,7 @@ set hive.groupby.skewindata=false;
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 EXPLAIN
 SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
index bced21f9e494a..de4a7c3cb5e54 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
@@ -1,6 +1,6 @@
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 SELECT key, val, GROUPING__ID from T1 group by key, val with cube;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
index ffc627c82eaff..f451f17834502 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
@@ -1,6 +1,6 @@
 CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/groupby_groupingid.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1;
 
 set hive.groupby.skewindata = true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
index 4fba7338f1d0a..804dfb36cf2c6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
@@ -1,6 +1,6 @@
 CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; 
 
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
 
 SELECT * FROM T1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
index 9f2286cc9be31..30f1b420cc7c2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
@@ -2,7 +2,7 @@ set hive.new.job.grouping.set.cardinality=2;
 
 CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; 
 
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
 
 -- Since 4 grouping sets would be generated for the query below, an additional MR job should be created
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
index 9a00d0a7aa77f..707737798dd6a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
@@ -4,8 +4,8 @@
 -- additional MR job is created for processing the grouping sets.
 CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; 
 
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets2.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1;
 
 set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
 set hive.new.job.grouping.set.cardinality = 30;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
index 25f1fcd793a74..ff83185d819c5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
@@ -4,7 +4,7 @@ set hive.merge.mapredfiles = false;
 
 CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; 
 
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
 
 -- This tests that cubes and rollups work fine inside sub-queries.
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
index fb0c5913fa07f..d94bd81f84f3b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
@@ -4,7 +4,7 @@ set hive.merge.mapredfiles = false;
 
 CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; 
 
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
 
 -- This tests that cubes and rollups work fine where the source is a sub-query
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr.q
index f0a8b72b0c83e..4a199365cf968 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr.q
@@ -17,4 +17,4 @@ SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(s
 WHERE src.ds = '2008-04-08'
 GROUP BY substr(src.key,1,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr_multi_distinct.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr_multi_distinct.q
index b863344485d9f..cb3ee82918611 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr_multi_distinct.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr_multi_distinct.q
@@ -17,4 +17,4 @@ SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(s
 WHERE src.ds = '2008-04-08'
 GROUP BY substr(src.key,1,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_resolution.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_resolution.q
new file mode 100644
index 0000000000000..663e33b4c7ad8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_resolution.q
@@ -0,0 +1,61 @@
+
+
+set hive.map.aggr=false;
+set hive.groupby.skewindata=false;
+explain select key, count(*) from src b group by b.key;
+explain select b.key, count(*) from src b group by key;
+
+set hive.map.aggr=false;
+set hive.groupby.skewindata=true;
+explain select key, count(*) from src b group by b.key;
+explain select b.key, count(*) from src b group by key;
+
+set hive.map.aggr=true;
+set hive.groupby.skewindata=false;
+explain select key, count(*) from src b group by b.key;
+explain select b.key, count(*) from src b group by key;
+
+set hive.map.aggr=true;
+set hive.groupby.skewindata=true;
+explain select key, count(*) from src b group by b.key;
+explain select b.key, count(*) from src b group by key;
+
+-- windowing after group by
+select key, count(*), rank() over(order by count(*))
+from src b
+where key < '12'
+group by b.key
+order by b.key;
+
+-- having after group by
+select key, count(*)
+from src b
+group by b.key
+having key < '12'
+order by b.key;
+
+-- having and windowing
+select key, count(*), rank() over(order by count(*))
+from src b
+group by b.key
+having key < '12'
+order by b.key
+;
+
+explain
+select key, count(*), rank() over(order by count(*))
+from src b
+group by b.key
+having key < '12'
+;
+
+-- order by
+select key 
+from src t 
+where key < '12'
+group by t.key 
+order by t.key;
+
+-- cluster by
+EXPLAIN
+SELECT x.key, x.value as key FROM SRC x CLUSTER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_rollup1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_rollup1.q
index f79b0c472ebb1..ee8038c7d9f5a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_rollup1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_rollup1.q
@@ -3,7 +3,7 @@ set hive.groupby.skewindata=false;
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 EXPLAIN
 SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_1.q
index 911a11ae8990f..7401a9ca1d9bd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_1.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_2.q
index 31b4ec5c74dc2..700a8af91548e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_2.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (val) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_3.q
index 103c57a123576..2ef8447935a66 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_3.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_4.q
index e43da3c93225f..3c959e381f220 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_4.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key, val) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_5.q
index bef5e5d2d547a..dd05238f1ccc4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_5.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
@@ -30,7 +30,7 @@ DROP TABLE T1;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (val, key) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
@@ -52,7 +52,7 @@ DROP TABLE T1;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (val) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_6.q
index cf076e8125612..aa09aec34b233 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_6.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_6.q
@@ -17,7 +17,7 @@ SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key;
 
 SELECT * FROM outputTbl1 ORDER BY key;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='2');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='2');
 
 -- The plan should not be converted to a map-side group since no partition is being accessed
 EXPLAIN EXTENDED
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_7.q
index c2d42154e516c..99337859fb00b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_7.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
 CLUSTERED BY (val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_8.q
index 121804e60a9da..f53295e4b2435 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_8.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_9.q
index 1c3d1cdcc265d..296336d0f9f1a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_9.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_9.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q
index 068c26a9c2b4b..db0faa04da0ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q
@@ -7,7 +7,7 @@ set hive.groupby.skewindata=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_test_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
index 8efa05e2544d4..4ec138e51a806 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
@@ -7,7 +7,7 @@ set hive.map.groupby.sorted.testmode=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/import_exported_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/import_exported_table.q
new file mode 100644
index 0000000000000..cb147c5feab24
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/import_exported_table.q
@@ -0,0 +1,13 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_import_exported_table/;
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_import_exported_table/exported_table/;
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_import_exported_table/exported_table/data/;
+
+dfs -copyFromLocal ../../data/files/exported_table/_metadata hdfs:///tmp/test_import_exported_table/exported_table;
+dfs -copyFromLocal ../../data/files/exported_table/data/data hdfs:///tmp/test_import_exported_table/exported_table/data;
+
+IMPORT FROM '/tmp/test_import_exported_table/exported_table';
+DESCRIBE j1_41;
+SELECT * from j1_41;
+
+dfs -rmr hdfs:///tmp/test_import_exported_table;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auth.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auth.q
index 33a1fc581ed13..03d77f1f19b01 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auth.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auth.q
@@ -1,15 +1,18 @@
+set hive.stats.dbclass=fs;
 SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 create table foobar(key int, value string) PARTITIONED BY (ds string, hr string);
 alter table foobar add partition (ds='2008-04-08',hr='12');
 
-CREATE INDEX srcpart_auth_index ON TABLE foobar(key) as 'BITMAP' WITH DEFERRED REBUILD;
+CREATE INDEX srcpart_AUTH_index ON TABLE foobar(key) as 'BITMAP' WITH DEFERRED REBUILD;
+SHOW INDEXES ON foobar;
+
 grant select on table foobar to user hive_test_user;
-grant select on table default__foobar_srcpart_auth_index__ to user hive_test_user;
-grant update on table default__foobar_srcpart_auth_index__ to user hive_test_user;
-grant create on table default__foobar_srcpart_auth_index__ to user hive_test_user;
+grant select on table default__foobar_srcpart_auth_indeX__ to user hive_test_user;
+grant update on table default__foobar_srcpart_auth_indEx__ to user hive_test_user;
+grant create on table default__foobar_srcpart_auth_inDex__ to user hive_test_user;
 set hive.security.authorization.enabled=true;
 
-ALTER INDEX srcpart_auth_index ON foobar PARTITION (ds='2008-04-08',hr='12')  REBUILD;
+ALTER INDEX srcpart_auth_INDEX ON foobar PARTITION (ds='2008-04-08',hr='12')  REBUILD;
 set hive.security.authorization.enabled=false;
 DROP INDEX srcpart_auth_index on foobar;
 DROP TABLE foobar;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto.q
index cb8a1d6293f8d..77733aac02686 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto.q
@@ -3,6 +3,7 @@
 -- without indexing
 SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key;
 
+set hive.stats.dbclass=fs;
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
 
@@ -25,4 +26,4 @@ SET hive.optimize.index.filter.compact.minsize=0;
 EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key;
 SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key;
 
-DROP INDEX src_index on src;
\ No newline at end of file
+DROP INDEX src_index on src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_empty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_empty.q
index cb32162d40a06..41f4a40823e4d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_empty.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_empty.q
@@ -3,6 +3,7 @@
 -- Create temp, and populate it with some values in src.
 CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE;
 
+set hive.stats.dbclass=fs;
 -- Build an index on temp.
 CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX temp_index ON temp REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_file_format.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_file_format.q
index 790e6c223f746..2967bd60d8bc6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_file_format.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_file_format.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- test automatic use of index on different file formats
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
@@ -16,4 +17,4 @@ SET hive.optimize.index.filter.compact.minsize=0;
 EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key;
 SELECT key, value FROM src WHERE key=86 ORDER BY key;
 
-DROP INDEX src_index on src;
\ No newline at end of file
+DROP INDEX src_index on src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
index 2bf8481f1d6e2..a672e06e79332 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
@@ -4,6 +4,7 @@
 EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 
+set hive.stats.dbclass=fs;
 
 CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
index 808a04cc360a6..d78e0fd58a074 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
@@ -4,6 +4,7 @@
 EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 
+set hive.stats.dbclass=fs;
 
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_multiple.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_multiple.q
index 06e97fa76bc97..f0a91b4b8a592 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_multiple.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_multiple.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- With multiple indexes, make sure we choose which to use in a consistent order
 
 CREATE INDEX src_key_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
@@ -13,4 +14,4 @@ EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key;
 SELECT key, value FROM src WHERE key=86 ORDER BY key;
 
 DROP INDEX src_key_index ON src;
-DROP INDEX src_val_index ON src;
\ No newline at end of file
+DROP INDEX src_val_index ON src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_partitioned.q
index 5013d29e732c4..70166b36c5f3c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_partitioned.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_partitioned.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- test automatic use of index on table with partitions
 CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_part_index ON srcpart REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_self_join.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_self_join.q
index 0984a4a21ba17..1d9efbbc6d738 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_self_join.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_self_join.q
@@ -3,6 +3,7 @@
 EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 
+set hive.stats.dbclass=fs;
 CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_unused.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_unused.q
index d8f3eda1813c8..acd4194b0e7a4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_unused.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_unused.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- test cases where the index should not be used automatically
 
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap.q
index f9deb2883960c..673c835fb9084 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 DROP INDEX srcpart_index_proj on srcpart;
 
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap1.q
index 55633d9225fa7..adec8f1b3bfb7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap1.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 EXPLAIN
 CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap2.q
index bd15a21fa4200..1ffa6eeebbfb3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap2.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 EXPLAIN
 CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap3.q
index 0d2c811459dcb..e7a093c118ba0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap3.q
@@ -1,3 +1,6 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+
 EXPLAIN
 CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 EXPLAIN
@@ -17,10 +20,10 @@ SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
 FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
         WHERE key = 0) a
   JOIN 
-     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
-        WHERE value = "val_0") b
-  ON
-    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+    (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+       WHERE value = "val_0") b
+ ON
+   a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
 EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
 
 INSERT OVERWRITE DIRECTORY "${system:test.tmp.dir}/index_result" 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto.q
index 672ce29f1bb47..56cd44dd5b323 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto.q
@@ -1,3 +1,6 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+
 -- try the query without indexing, with manual indexing, and with automatic indexing
 -- without indexing
 SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key;
@@ -21,12 +24,12 @@ SELECT * FROM default__src_src2_index__ ORDER BY value;
 EXPLAIN
 SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
 FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
-        WHERE key = 0) a
-  JOIN 
-     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
-        WHERE value = "val_0") b
-  ON
-    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+       WHERE key = 0) a
+ JOIN 
+    (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+       WHERE value = "val_0") b
+ ON
+   a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
 EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
 
 INSERT OVERWRITE DIRECTORY "${system:test.tmp.dir}/index_result" 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q
index 90d7987594bac..3b310cee4cb29 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- test automatic use of index on table with partitions
 CREATE INDEX src_part_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD;
 ALTER INDEX src_part_index ON srcpart REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_compression.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_compression.q
index 2f5e5d4fb231b..32ecfb9db8a0e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_compression.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_compression.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 SET hive.exec.compress.result=true;
 CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_rc.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_rc.q
index 054df51c32180..26a351ea31858 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_rc.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_rc.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 CREATE TABLE srcpart_rc (key int, value string) PARTITIONED BY (ds string, hr int) STORED AS RCFILE;
 
 INSERT OVERWRITE TABLE srcpart_rc PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact.q
index a936f1127f690..98cbec147db51 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 DROP INDEX srcpart_index_proj on srcpart;
 
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_1.q
index 837033be403a9..97276f488e742 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_1.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 EXPLAIN
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_2.q
index 56119ac11f32f..1eb3f5c3dbcbf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_2.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 CREATE TABLE srcpart_rc (key int, value string) PARTITIONED BY (ds string, hr int) STORED AS RCFILE;
 
 INSERT OVERWRITE TABLE srcpart_rc PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11;
@@ -42,4 +43,4 @@ SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 SELECT key, value FROM srcpart_rc WHERE key=100 ORDER BY key;
 
 DROP INDEX srcpart_rc_index on srcpart_rc;
-DROP TABLE srcpart_rc;
\ No newline at end of file
+DROP TABLE srcpart_rc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_3.q
index f3fcb4af3d5e6..599b4ac1149ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_3.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 CREATE TABLE src_index_test_rc (key int, value string) STORED AS RCFILE;
 
 INSERT OVERWRITE TABLE src_index_test_rc SELECT * FROM src;
@@ -16,4 +17,4 @@ SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 SELECT key, value FROM src_index_test_rc WHERE key=100 ORDER BY key;
 
 DROP INDEX src_index on src_index_test_rc;
-DROP TABLE src_index_test_rc;
\ No newline at end of file
+DROP TABLE src_index_test_rc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_binary_search.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_binary_search.q
index d0d9a32adcba1..e72b27c781a80 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_binary_search.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_binary_search.q
@@ -1,6 +1,6 @@
 SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 SET hive.default.fileformat=TextFile;
-
+set hive.stats.dbclass=fs;
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
 
@@ -129,4 +129,4 @@ SELECT * FROM src WHERE key >= '9';
 
 SET hive.exec.post.hooks=;
 
-DROP INDEX src_index ON src;
\ No newline at end of file
+DROP INDEX src_index ON src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compression.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compression.q
index 84ed3cc932e8c..963b8f74e5f07 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compression.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compression.q
@@ -1,4 +1,5 @@
 SET hive.exec.compress.result=true;
+set hive.stats.dbclass=fs;
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_creation.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_creation.q
index 062821e870dce..ef020b63d5a72 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_creation.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_creation.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 drop index src_index_2 on src;
 drop index src_index_3 on src;
 drop index src_index_4 on src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_serde.q
index a6fe16ba3b809..20186a7400494 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_serde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_serde.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- Want to ensure we can build and use indices on tables stored with SerDes
 -- Build the (Avro backed) table
 CREATE TABLE doctors 
@@ -31,7 +32,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
 
 DESCRIBE doctors;
 
-LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors;
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors;
 
 -- Create and build an index
 CREATE INDEX doctors_index ON TABLE doctors(number) AS 'COMPACT' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale.q
index 82e15b97b6ac4..ecab2b7f6b332 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- test that stale indexes are not used
 
 CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale_partitioned.q
index e7cfeff31f6dc..a93ccf7f95fc3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale_partitioned.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale_partitioned.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- Test if index is actually being used.
 
 -- Create temp, and populate it with some values in src.
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q
index 119994e91b056..728b8cc4a9497 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q
@@ -47,12 +47,12 @@ CREATE TABLE srcpart_merge_dp LIKE srcpart;
 CREATE TABLE srcpart_merge_dp_rc LIKE srcpart;
 ALTER TABLE srcpart_merge_dp_rc SET FILEFORMAT RCFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
-LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
 
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=12);
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=12);
 
 INSERT OVERWRITE TABLE srcpart_merge_dp_rc PARTITION (ds = '2008-04-08', hr) 
 SELECT key, value, hr FROM srcpart_merge_dp WHERE ds = '2008-04-08';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_const_type.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_const_type.q
index a039dc5a2a040..ce5ed8419dd44 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_const_type.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_const_type.q
@@ -1,7 +1,7 @@
 DROP TABLE infertypes;
 CREATE TABLE infertypes(ti TINYINT, si SMALLINT, i INT, bi BIGINT, fl FLOAT, db DOUBLE, str STRING);
 
-LOAD DATA LOCAL INPATH '../data/files/infer_const_type.txt' OVERWRITE INTO TABLE infertypes;
+LOAD DATA LOCAL INPATH '../../data/files/infer_const_type.txt' OVERWRITE INTO TABLE infertypes;
 
 SELECT * FROM infertypes;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input13.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input13.q
index 40fbc84a981f1..620e9dcfae6cf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input13.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input13.q
@@ -7,15 +7,15 @@ FROM src
 INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100
 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200
 INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 and src.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300;
 
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100
 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200
 INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 and src.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300;
 
 SELECT dest1.* FROM dest1;
 SELECT dest2.* FROM dest2;
 SELECT dest3.* FROM dest3;
-dfs -cat ../build/ql/test/data/warehouse/dest4.out/*;
+dfs -cat ${system:test.warehouse.dir}/dest4.out/*;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16.q
index 82e6d81426efc..4990d0ba1cd54 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16.q
@@ -1,6 +1,6 @@
 -- TestSerDe is a user defined serde where the default delimiter is Ctrl-B
 DROP TABLE INPUT16;
-ADD JAR ../data/files/TestSerDe.jar;
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
 CREATE TABLE INPUT16(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1_cb.txt' INTO TABLE INPUT16;
+LOAD DATA LOCAL INPATH '../../data/files/kv1_cb.txt' INTO TABLE INPUT16;
 SELECT INPUT16.VALUE, INPUT16.KEY FROM INPUT16;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16_cc.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16_cc.q
index 5dab4103d8dda..9272a92c8102e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16_cc.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16_cc.q
@@ -4,8 +4,8 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 -- the user is overwriting it with ctrlC
 
 DROP TABLE INPUT16_CC;
-ADD JAR ../data/files/TestSerDe.jar;
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
 CREATE TABLE INPUT16_CC(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe'  with serdeproperties ('testserde.default.serialization.format'='\003', 'dummy.prop.not.used'='dummyy.val') STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1_cc.txt' INTO TABLE INPUT16_CC;
+LOAD DATA LOCAL INPATH '../../data/files/kv1_cc.txt' INTO TABLE INPUT16_CC;
 SELECT INPUT16_CC.VALUE, INPUT16_CC.KEY FROM INPUT16_CC;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input19.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input19.q
index fec44e97669dd..3dc7fec9f6669 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input19.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input19.q
@@ -1,5 +1,5 @@
 
 create table apachelog(ipaddress STRING,identd STRING,user_name STRING,finishtime STRING,requestline string,returncode INT,size INT) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe' WITH SERDEPROPERTIES (  'serialization.format'= 'org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol',  'quote.delim'= '("|\\[|\\])',  'field.delim'=' ',  'serialization.null.format'='-'  ) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/apache.access.log' INTO TABLE apachelog;
+LOAD DATA LOCAL INPATH '../../data/files/apache.access.log' INTO TABLE apachelog;
 SELECT a.* FROM apachelog a;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input20.q
index 0566ab18c2537..ff430abb8e8aa 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input20.q
@@ -1,6 +1,6 @@
 CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
 
-ADD FILE ../data/scripts/input20_script;
+ADD FILE ../../data/scripts/input20_script.py;
 
 EXPLAIN
 FROM (
@@ -12,7 +12,7 @@ FROM (
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
-USING 'input20_script'
+USING 'python input20_script.py'
 AS key, value;
 
 FROM (
@@ -24,7 +24,7 @@ FROM (
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
-USING 'input20_script'
+USING 'python input20_script.py'
 AS key, value;
 
 SELECT * FROM dest1 SORT BY key, value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input21.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input21.q
index d7c814e58061c..43cd01e684b37 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input21.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input21.q
@@ -1,7 +1,7 @@
 
 
 CREATE TABLE src_null(a STRING, b STRING, c STRING, d STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/null.txt' INTO TABLE src_null;
+LOAD DATA LOCAL INPATH '../../data/files/null.txt' INTO TABLE src_null;
 
 EXPLAIN SELECT * FROM src_null DISTRIBUTE BY c SORT BY d;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input22.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input22.q
index 853947be57a1f..8803e4dbeb8cf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input22.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input22.q
@@ -1,5 +1,5 @@
 CREATE TABLE INPUT4(KEY STRING, VALUE STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUT4;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4;
 
 EXPLAIN
 SELECT a.KEY2
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input33.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input33.q
index 7ab17515af5c7..8b6b21502001e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input33.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input33.q
@@ -1,6 +1,6 @@
 CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
 
-ADD FILE ../data/scripts/input20_script;
+ADD FILE ../../data/scripts/input20_script.py;
 
 EXPLAIN
 FROM (
@@ -12,7 +12,7 @@ FROM (
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
-USING 'input20_script'
+USING 'python input20_script.py'
 AS (key STRING, value STRING);
 
 FROM (
@@ -24,7 +24,7 @@ FROM (
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
-USING 'input20_script'
+USING 'python input20_script.py'
 AS (key STRING, value STRING);
 
 SELECT * FROM dest1 SORT BY key, value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input37.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input37.q
index 6fd136afec444..6ded61aa23990 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input37.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input37.q
@@ -1,6 +1,6 @@
 create table documents(contents string) stored as textfile;
 
-LOAD DATA LOCAL INPATH '../data/files/docurl.txt' INTO TABLE documents;
+LOAD DATA LOCAL INPATH '../../data/files/docurl.txt' INTO TABLE documents;
 
 
 select url, count(1) 
@@ -8,7 +8,7 @@ FROM
 (
   FROM documents
   MAP documents.contents
-  USING 'java -cp ../build/ql/test/classes org.apache.hadoop.hive.scripts.extracturl' AS (url, count)
+  USING 'java -cp ../util/target/classes/ org.apache.hadoop.hive.scripts.extracturl' AS (url, count)
 ) subq
 group by url;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input3_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input3_limit.q
index 3584820acaf4a..f983aca847d95 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input3_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input3_limit.q
@@ -1,7 +1,7 @@
 
 CREATE TABLE T1(key STRING, value STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/kv2.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T1;
 
 
 CREATE TABLE T2(key STRING, value STRING);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4.q
index 08d6d97603045..1186bbbbe6bbd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4.q
@@ -1,7 +1,7 @@
 CREATE TABLE INPUT4(KEY STRING, VALUE STRING) STORED AS TEXTFILE;
 EXPLAIN
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUT4;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUT4;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4;
 EXPLAIN FORMATTED
 SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias;
 SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input40.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input40.q
index 4166cb5f94939..ab187b5d7e7d1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input40.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input40.q
@@ -2,15 +2,15 @@
 
 
 create table tmp_insert_test (key string, value string) stored as textfile;
-load data local inpath '../data/files/kv1.txt' into table tmp_insert_test;
+load data local inpath '../../data/files/kv1.txt' into table tmp_insert_test;
 select * from tmp_insert_test;
 
 create table tmp_insert_test_p (key string, value string) partitioned by (ds string) stored as textfile;
 
-load data local inpath '../data/files/kv1.txt' into table tmp_insert_test_p partition (ds = '2009-08-01');
+load data local inpath '../../data/files/kv1.txt' into table tmp_insert_test_p partition (ds = '2009-08-01');
 select * from tmp_insert_test_p where ds= '2009-08-01'
 order by key, value;
 
-load data local inpath '../data/files/kv2.txt' into table tmp_insert_test_p partition (ds = '2009-08-01');
+load data local inpath '../../data/files/kv2.txt' into table tmp_insert_test_p partition (ds = '2009-08-01');
 select * from tmp_insert_test_p where ds= '2009-08-01'
 order by key, value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input43.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input43.q
index 5512dc328065d..3182bbef39812 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input43.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input43.q
@@ -1,7 +1,7 @@
 drop table tst_src1;
 create table tst_src1 like src1;
-load data local inpath '../data/files/kv1.txt' into table tst_src1 ;
+load data local inpath '../../data/files/kv1.txt' into table tst_src1 ;
 select count(1) from tst_src1;
-load data local inpath '../data/files/kv1.txt' into table tst_src1 ;
+load data local inpath '../../data/files/kv1.txt' into table tst_src1 ;
 select count(1) from tst_src1;
 drop table tst_src1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input44.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input44.q
index 4557edc178b7f..2e975e58c1e93 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input44.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input44.q
@@ -3,4 +3,4 @@ CREATE TABLE dest(key INT, value STRING) STORED AS TEXTFILE;
 SET hive.output.file.extension=.txt;
 INSERT OVERWRITE TABLE dest SELECT src.* FROM src;
 
-dfs -cat ../build/ql/test/data/warehouse/dest/*.txt
\ No newline at end of file
+dfs -cat ${system:test.warehouse.dir}/dest/*.txt
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input45.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input45.q
index 633a8c6edb5cc..334da264d6e55 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input45.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input45.q
@@ -2,8 +2,8 @@ SET hive.insert.into.multilevel.dirs=true;
 
 SET hive.output.file.extension=.txt;
 
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/x/y/z/' SELECT src.* FROM src;
+INSERT OVERWRITE DIRECTORY 'target/data/x/y/z/' SELECT src.* FROM src;
 
-dfs -cat ../build/ql/test/data/x/y/z/*.txt;
+dfs -cat ${system:build.dir}/data/x/y/z/*.txt;
 
-dfs -rmr ../build/ql/test/data/x;
\ No newline at end of file
+dfs -rmr ${system:build.dir}/data/x;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4_cb_delim.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4_cb_delim.q
index 8c57dd3f25aa6..b18d60aa74e73 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4_cb_delim.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4_cb_delim.q
@@ -1,4 +1,4 @@
 CREATE TABLE INPUT4_CB(KEY STRING, VALUE STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\002' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1_cb.txt' INTO TABLE INPUT4_CB;
+LOAD DATA LOCAL INPATH '../../data/files/kv1_cb.txt' INTO TABLE INPUT4_CB;
 SELECT INPUT4_CB.VALUE, INPUT4_CB.KEY FROM INPUT4_CB;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input_dfs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input_dfs.q
index 4f5824df5c9d5..b108cbd6b23e5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input_dfs.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input_dfs.q
@@ -1,2 +1,2 @@
-dfs -cat ../data/files/kv1.txt;
+dfs -cat ../../data/files/kv1.txt;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl5.q
index 9a7ca5da1277f..87c55a26d7b83 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl5.q
@@ -1,7 +1,7 @@
 -- test for internationalization
 -- kv4.txt contains the utf-8 character 0xE982B5E993AE which we are verifying later on
 CREATE TABLE INPUTDDL5(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv4.txt' INTO TABLE INPUTDDL5;
+LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE INPUTDDL5;
 DESCRIBE INPUTDDL5;
 SELECT INPUTDDL5.name from INPUTDDL5;
 SELECT count(1) FROM INPUTDDL5 WHERE INPUTDDL5.name = _UTF-8 0xE982B5E993AE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl6.q
index d33ab8d9a78f4..6c709399a3c0d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl6.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl6.q
@@ -2,8 +2,8 @@
 -- test for describe extended table partition
 -- test for alter table drop partition
 CREATE TABLE INPUTDDL6(KEY STRING, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUTDDL6 PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUTDDL6 PARTITION (ds='2008-04-08');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUTDDL6 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUTDDL6 PARTITION (ds='2008-04-08');
 DESCRIBE EXTENDED INPUTDDL6;
 DESCRIBE EXTENDED INPUTDDL6 PARTITION (ds='2008-04-08');
 SHOW PARTITIONS INPUTDDL6;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl7.q
index 8a73935feec51..27e587a283cd3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl7.q
@@ -3,22 +3,22 @@
 
 
 CREATE TABLE T1(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1;
 SELECT COUNT(1) FROM T1;
 
 
 CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.seq' INTO TABLE T2;
 SELECT COUNT(1) FROM T2;
 
 
 CREATE TABLE T3(name STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T3 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3 PARTITION (ds='2008-04-09');
 SELECT COUNT(1) FROM T3 where T3.ds='2008-04-09';
 
 
 CREATE TABLE T4(name STRING) PARTITIONED BY(ds STRING) STORED AS SEQUENCEFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T4 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.seq' INTO TABLE T4 PARTITION (ds='2008-04-09');
 SELECT COUNT(1) FROM T4 where T4.ds='2008-04-09';
 
 DESCRIBE EXTENDED T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert1_overwrite_partitions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert1_overwrite_partitions.q
index 6ad70b5673f30..6b00f977c4c68 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert1_overwrite_partitions.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert1_overwrite_partitions.q
@@ -1,8 +1,8 @@
 CREATE TABLE sourceTable (one string,two string) PARTITIONED BY (ds string,hr string);
 
-load data local inpath '../data/files/kv1.txt' INTO TABLE sourceTable partition(ds='2011-11-11', hr='11');
+load data local inpath '../../data/files/kv1.txt' INTO TABLE sourceTable partition(ds='2011-11-11', hr='11');
 
-load data local inpath '../data/files/kv3.txt' INTO TABLE sourceTable partition(ds='2011-11-11', hr='12');
+load data local inpath '../../data/files/kv3.txt' INTO TABLE sourceTable partition(ds='2011-11-11', hr='12');
 
 CREATE TABLE destinTable (one string,two string) PARTITIONED BY (ds string,hr string);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert2_overwrite_partitions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert2_overwrite_partitions.q
index 598d30eaebba2..bd1eb752879e6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert2_overwrite_partitions.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert2_overwrite_partitions.q
@@ -4,9 +4,9 @@ CREATE DATABASE db2;
 
 CREATE TABLE db1.sourceTable (one string,two string) PARTITIONED BY (ds string);
 
-load data local inpath '../data/files/kv1.txt' INTO TABLE db1.sourceTable partition(ds='2011-11-11');
+load data local inpath '../../data/files/kv1.txt' INTO TABLE db1.sourceTable partition(ds='2011-11-11');
 
-load data local inpath '../data/files/kv3.txt' INTO TABLE db1.sourceTable partition(ds='2011-11-11');
+load data local inpath '../../data/files/kv3.txt' INTO TABLE db1.sourceTable partition(ds='2011-11-11');
 
 CREATE TABLE db2.destinTable (one string,two string) PARTITIONED BY (ds string);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_into3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_into3.q
index e58b212e190bb..4ff0edcd61bca 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_into3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_into3.q
@@ -4,10 +4,10 @@ DROP TABLE insert_into3b;
 CREATE TABLE insert_into3a (key int, value string);
 CREATE TABLE insert_into3b (key int, value string);
 
-EXPLAIN FROM src INSERT INTO TABLE insert_into3a SELECT * LIMIT 50
-                 INSERT INTO TABLE insert_into3b SELECT * LIMIT 100;
-FROM src INSERT INTO TABLE insert_into3a SELECT * LIMIT 50
-         INSERT INTO TABLE insert_into3b SELECT * LIMIT 100;
+EXPLAIN FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50
+                 INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100;
+FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50
+         INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100;
 SELECT SUM(HASH(c)) FROM (
     SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM insert_into3a
 ) t;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q
index 25c127f67f980..6d069f5411d45 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q
@@ -1,40 +1,40 @@
-insert overwrite local directory '../data/files/local_src_table_1'
+insert overwrite local directory '../../data/files/local_src_table_1'
 select * from src ;
-dfs -cat ../data/files/local_src_table_1/000000_0;
+dfs -cat ../../data/files/local_src_table_1/000000_0;
 
-insert overwrite local directory '../data/files/local_src_table_2'
+insert overwrite local directory '../../data/files/local_src_table_2'
 row format delimited 
 FIELDS TERMINATED BY ':' 
 select * from src ;
 
-dfs -cat ../data/files/local_src_table_2/000000_0;
+dfs -cat ../../data/files/local_src_table_2/000000_0;
 
 create table array_table (a array<string>, b array<string>)
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '\t'
 COLLECTION ITEMS TERMINATED BY ',';
 
-load data local inpath "../data/files/array_table.txt" overwrite into table array_table;
+load data local inpath "../../data/files/array_table.txt" overwrite into table array_table;
 
-insert overwrite local directory '../data/files/local_array_table_1'
+insert overwrite local directory '../../data/files/local_array_table_1'
 select * from array_table;
-dfs -cat ../data/files/local_array_table_1/000000_0;
+dfs -cat ../../data/files/local_array_table_1/000000_0;
 
-insert overwrite local directory '../data/files/local_array_table_2'
+insert overwrite local directory '../../data/files/local_array_table_2'
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY ':'
 COLLECTION ITEMS TERMINATED BY '#'
 select * from array_table;
 
-dfs -cat ../data/files/local_array_table_2/000000_0;
+dfs -cat ../../data/files/local_array_table_2/000000_0;
 
-insert overwrite local directory '../data/files/local_array_table_2_withfields'
+insert overwrite local directory '../../data/files/local_array_table_2_withfields'
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY ':'
 COLLECTION ITEMS TERMINATED BY '#'
 select b,a from array_table;
 
-dfs -cat ../data/files/local_array_table_2_withfields/000000_0;
+dfs -cat ../../data/files/local_array_table_2_withfields/000000_0;
 
 
 create table map_table (foo STRING , bar MAP<STRING, STRING>)
@@ -44,63 +44,63 @@ COLLECTION ITEMS TERMINATED BY ','
 MAP KEYS TERMINATED BY ':'
 STORED AS TEXTFILE;
 
-load data local inpath "../data/files/map_table.txt" overwrite into table map_table;
+load data local inpath "../../data/files/map_table.txt" overwrite into table map_table;
 
-insert overwrite local directory '../data/files/local_map_table_1'
+insert overwrite local directory '../../data/files/local_map_table_1'
 select * from map_table;
-dfs -cat ../data/files/local_map_table_1/000000_0;
+dfs -cat ../../data/files/local_map_table_1/000000_0;
 
-insert overwrite local directory '../data/files/local_map_table_2'
+insert overwrite local directory '../../data/files/local_map_table_2'
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY ':'
 COLLECTION ITEMS TERMINATED BY '#'
 MAP KEYS TERMINATED BY '='
 select * from map_table;
 
-dfs -cat ../data/files/local_map_table_2/000000_0;
+dfs -cat ../../data/files/local_map_table_2/000000_0;
 
-insert overwrite local directory '../data/files/local_map_table_2_withfields'
+insert overwrite local directory '../../data/files/local_map_table_2_withfields'
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY ':'
 COLLECTION ITEMS TERMINATED BY '#'
 MAP KEYS TERMINATED BY '='
 select bar,foo from map_table;
 
-dfs -cat ../data/files/local_map_table_2_withfields/000000_0;
+dfs -cat ../../data/files/local_map_table_2_withfields/000000_0;
 
-insert overwrite local directory '../data/files/local_array_table_3'
+insert overwrite local directory '../../data/files/local_array_table_3'
 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.DelimitedJSONSerDe'
 STORED AS TEXTFILE
 select * from array_table;
 
-dfs -cat ../data/files/local_array_table_3/000000_0;
+dfs -cat ../../data/files/local_array_table_3/000000_0;
 
-insert overwrite local directory '../data/files/local_map_table_3'
+insert overwrite local directory '../../data/files/local_map_table_3'
 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.DelimitedJSONSerDe'
 STORED AS TEXTFILE
 select * from map_table;
 
-dfs -cat ../data/files/local_map_table_3/000000_0;
+dfs -cat ../../data/files/local_map_table_3/000000_0;
 
-insert overwrite local directory '../data/files/local_rctable'
+insert overwrite local directory '../../data/files/local_rctable'
 STORED AS RCFILE
 select value,key from src;
 
 dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/local_rctable/temp;
 dfs -rmr ${system:test.tmp.dir}/local_rctable;
 dfs ${system:test.dfs.mkdir}  ${system:test.tmp.dir}/local_rctable;
-dfs -put ../data/files/local_rctable/000000_0 ${system:test.tmp.dir}/local_rctable/000000_0;
+dfs -put ../../data/files/local_rctable/000000_0 ${system:test.tmp.dir}/local_rctable/000000_0;
 
 create external table local_rctable(value string, key string)
 STORED AS RCFILE
 LOCATION '${system:test.tmp.dir}/local_rctable';
 
-insert overwrite local directory '../data/files/local_rctable_out'
+insert overwrite local directory '../../data/files/local_rctable_out'
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '\t'
 select key,value from local_rctable;
 
-dfs -cat ../data/files/local_rctable_out/000000_0;
+dfs -cat ../../data/files/local_rctable_out/000000_0;
 
 drop table local_rctable;
 drop table array_table;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_1to1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_1to1.q
index b403814adb4c9..4d1ae2186e796 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_1to1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_1to1.q
@@ -1,9 +1,9 @@
 
 CREATE TABLE join_1to1_1(key1 int, key2 int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in5.txt' INTO TABLE join_1to1_1;
+LOAD DATA LOCAL INPATH '../../data/files/in5.txt' INTO TABLE join_1to1_1;
 
 CREATE TABLE join_1to1_2(key1 int, key2 int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in6.txt' INTO TABLE join_1to1_2;
+LOAD DATA LOCAL INPATH '../../data/files/in6.txt' INTO TABLE join_1to1_2;
 
 
 set hive.outerjoin.supports.filters=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_alt_syntax.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_alt_syntax.q
new file mode 100644
index 0000000000000..0b0c53803d01e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_alt_syntax.q
@@ -0,0 +1,41 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+explain select p1.p_name, p2.p_name
+from part p1 , part p2;
+
+explain select p1.p_name, p2.p_name, p3.p_name
+from part p1 ,part p2 ,part p3 
+where p1.p_name = p2.p_name and p2.p_name = p3.p_name;
+
+explain select p1.p_name, p2.p_name, p3.p_name
+from part p1 , (select p_name from part) p2 ,part p3 
+where p1.p_name = p2.p_name and p2.p_name = p3.p_name;
+
+explain select p1.p_name, p2.p_name, p3.p_name
+from part p1 , part p2 , part p3 
+where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name;
+
+explain select p1.p_name, p2.p_name, p3.p_name, p4.p_name
+from part p1 , part p2 join part p3 on p2.p_name = p1.p_name join part p4 
+where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2.p_partkey;
+            
+explain select p1.p_name, p2.p_name, p3.p_name, p4.p_name
+from part p1 join part p2 on p2.p_name = p1.p_name , part p3  , part p4 
+where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2.p_partkey;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_array.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_array.q
index e4d95a51fa927..81e984e7eb018 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_array.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_array.q
@@ -1,8 +1,8 @@
 create table tinyA(a bigint, b bigint) stored as textfile;
 create table tinyB(a bigint, bList array<int>) stored as textfile;
 
-load data local inpath '../data/files/tiny_a.txt' into table tinyA;
-load data local inpath '../data/files/tiny_b.txt' into table tinyB;
+load data local inpath '../../data/files/tiny_a.txt' into table tinyA;
+load data local inpath '../../data/files/tiny_b.txt' into table tinyB;
 
 select * from tinyA;
 select * from tinyB;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_casesensitive.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_casesensitive.q
index a9b69c5b1c448..0c0962ceceebc 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_casesensitive.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_casesensitive.q
@@ -1,8 +1,8 @@
 
 CREATE TABLE joinone(key1 int, key2 int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in5.txt' INTO TABLE joinone;
+LOAD DATA LOCAL INPATH '../../data/files/in5.txt' INTO TABLE joinone;
 
 CREATE TABLE joinTwo(key1 int, key2 int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in6.txt' INTO TABLE joinTwo;
+LOAD DATA LOCAL INPATH '../../data/files/in6.txt' INTO TABLE joinTwo;
 
 SELECT * FROM joinone JOIN joinTwo ON(joinone.key2=joinTwo.key2) ORDER BY joinone.key1 ASC, joinone.key2 ASC, joinone.value ASC, joinTwo.key1 ASC, joinTwo.key2 ASC, joinTwo.value ASC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_1.q
new file mode 100644
index 0000000000000..7f493671b80c1
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_1.q
@@ -0,0 +1,30 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+
+explain select *
+from part p1 join part p2 join part p3 on p1.p_name = p2.p_name and p2.p_name = p3.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 on p2.p_name = p1.p_name and p3.p_name = p2.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_2.q
new file mode 100644
index 0000000000000..ca280104d9a75
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_2.q
@@ -0,0 +1,24 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+explain select *
+from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 on p2.p_name = p3.p_name and p1.p_name = p4.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 on p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2.p_partkey;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_3.q
new file mode 100644
index 0000000000000..b308838d6243b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_3.q
@@ -0,0 +1,34 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+
+explain select *
+from part p1 join part p2 join part p3 
+where p1.p_name = p2.p_name and p2.p_name = p3.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 
+where p2.p_name = p1.p_name and p3.p_name = p2.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 
+where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 
+where p2.p_partkey = 1 and p3.p_name = p2.p_name;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_4.q
new file mode 100644
index 0000000000000..477682e4e8ac7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_4.q
@@ -0,0 +1,26 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+explain select *
+from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 
+where p2.p_name = p3.p_name and p1.p_name = p4.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 
+where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2.p_partkey;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual1.q
new file mode 100644
index 0000000000000..1013f51a0d806
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual1.q
@@ -0,0 +1,52 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+create table part2( 
+    p2_partkey INT,
+    p2_name STRING,
+    p2_mfgr STRING,
+    p2_brand STRING,
+    p2_type STRING,
+    p2_size INT,
+    p2_container STRING,
+    p2_retailprice DOUBLE,
+    p2_comment STRING
+);
+
+create table part3( 
+    p3_partkey INT,
+    p3_name STRING,
+    p3_mfgr STRING,
+    p3_brand STRING,
+    p3_type STRING,
+    p3_size INT,
+    p3_container STRING,
+    p3_retailprice DOUBLE,
+    p3_comment STRING
+);
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name and p2_name = p3_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name and p3_name = p2_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p2_partkey = 1 and p3_name = p2_name;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual2.q
new file mode 100644
index 0000000000000..6232357752851
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual2.q
@@ -0,0 +1,47 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+create table part2( 
+    p2_partkey INT,
+    p2_name STRING,
+    p2_mfgr STRING,
+    p2_brand STRING,
+    p2_type STRING,
+    p2_size INT,
+    p2_container STRING,
+    p2_retailprice DOUBLE,
+    p2_comment STRING
+);
+
+create table part3( 
+    p3_partkey INT,
+    p3_name STRING,
+    p3_mfgr STRING,
+    p3_brand STRING,
+    p3_type STRING,
+    p3_size INT,
+    p3_container STRING,
+    p3_retailprice DOUBLE,
+    p3_comment STRING
+);
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 on p2_name = p3_name and p1.p_name = p4.p_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name join part p4 on p2_name = p3_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2_partkey;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual3.q
new file mode 100644
index 0000000000000..6ac86042c0452
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual3.q
@@ -0,0 +1,56 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+create table part2( 
+    p2_partkey INT,
+    p2_name STRING,
+    p2_mfgr STRING,
+    p2_brand STRING,
+    p2_type STRING,
+    p2_size INT,
+    p2_container STRING,
+    p2_retailprice DOUBLE,
+    p2_comment STRING
+);
+
+create table part3( 
+    p3_partkey INT,
+    p3_name STRING,
+    p3_mfgr STRING,
+    p3_brand STRING,
+    p3_type STRING,
+    p3_size INT,
+    p3_container STRING,
+    p3_retailprice DOUBLE,
+    p3_comment STRING
+);
+
+explain select *
+from part p1 join part2 p2 join part3 p3 
+where p1.p_name = p2_name and p2_name = p3_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 
+where p2_name = p1.p_name and p3_name = p2_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 
+where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 
+where p2_partkey = 1 and p3_name = p2_name;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual4.q
new file mode 100644
index 0000000000000..0db4d5e3cff1a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual4.q
@@ -0,0 +1,49 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+create table part2( 
+    p2_partkey INT,
+    p2_name STRING,
+    p2_mfgr STRING,
+    p2_brand STRING,
+    p2_type STRING,
+    p2_size INT,
+    p2_container STRING,
+    p2_retailprice DOUBLE,
+    p2_comment STRING
+);
+
+create table part3( 
+    p3_partkey INT,
+    p3_name STRING,
+    p3_mfgr STRING,
+    p3_brand STRING,
+    p3_type STRING,
+    p3_size INT,
+    p3_container STRING,
+    p3_retailprice DOUBLE,
+    p3_comment STRING
+);
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 
+where p2_name = p3_name and p1.p_name = p4.p_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name join part p4 
+where p2_name = p3_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2_partkey;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_filters.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_filters.q
index d54aa950a1ec2..49b6c6f920209 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_filters.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_filters.q
@@ -1,5 +1,5 @@
 CREATE TABLE myinput1(key int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in3.txt' INTO TABLE myinput1;
+LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1;
 
 SELECT * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value ORDER BY a.key ASC, a.value ASC, b.key ASC, b.value ASC;
 SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value ORDER BY a.key ASC, a.value ASC, b.key ASC, b.value ASC;
@@ -55,10 +55,10 @@ SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.valu
 
 CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; 
 CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; 
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2;
 
 SET hive.optimize.bucketmapjoin = true;
 SET hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_hive_626.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_hive_626.q
index 31b0c8c91c10b..c4c239cae2d8a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_hive_626.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_hive_626.q
@@ -13,9 +13,9 @@ delimited fields terminated by ',' stored as textfile;
 create table hive_count (bar_id int, n int) row format delimited fields 
 terminated by ',' stored as textfile;
 
-load data local inpath '../data/files/hive_626_foo.txt' overwrite into table hive_foo;
-load data local inpath '../data/files/hive_626_bar.txt' overwrite into table hive_bar;
-load data local inpath '../data/files/hive_626_count.txt' overwrite into table hive_count;
+load data local inpath '../../data/files/hive_626_foo.txt' overwrite into table hive_foo;
+load data local inpath '../../data/files/hive_626_bar.txt' overwrite into table hive_bar;
+load data local inpath '../../data/files/hive_626_count.txt' overwrite into table hive_count;
 
 explain
 select hive_foo.foo_name, hive_bar.bar_name, n from hive_foo join hive_bar on hive_foo.foo_id =
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_merging.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_merging.q
new file mode 100644
index 0000000000000..a0046dbc41332
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_merging.q
@@ -0,0 +1,25 @@
+
+
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+explain select p1.p_size, p2.p_size 
+from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey 
+  right outer join part p3 on p2.p_partkey = p3.p_partkey and 
+              p1.p_size > 10
+;
+
+explain select p1.p_size, p2.p_size 
+from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey 
+  right outer join part p3 on p2.p_partkey = p3.p_partkey and 
+              p1.p_size > 10 and p1.p_size > p2.p_size + 10
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nulls.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nulls.q
index 4ff60713d6b44..047a769eb8e95 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nulls.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nulls.q
@@ -1,5 +1,5 @@
 CREATE TABLE myinput1(key int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' INTO TABLE myinput1;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1;
 
 SELECT * FROM myinput1 a JOIN myinput1 b ORDER BY a.key ASC, a.value ASC, b.key ASC, b.value ASC;
 SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ORDER BY a.key ASC, a.value ASC, b.key ASC, b.value ASC;
@@ -42,10 +42,10 @@ SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.valu
 
 CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; 
 CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; 
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2;
 
 SET hive.optimize.bucketmapJOIN = true;
 SET hive.optimize.bucketmapJOIN.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nullsafe.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nullsafe.q
index 05b57bce202d2..5e22517edbd72 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nullsafe.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nullsafe.q
@@ -1,7 +1,7 @@
 set hive.nullsafe.equijoin=true;
 
 CREATE TABLE myinput1(key int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in8.txt' INTO TABLE myinput1;
+LOAD DATA LOCAL INPATH '../../data/files/in8.txt' INTO TABLE myinput1;
 
 -- merging
 explain select * from myinput1 a join myinput1 b on a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value;
@@ -31,10 +31,10 @@ SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value OR
 -- smbs
 CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
 CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/in8.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in9.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in8.txt' into table smb_input2;
-LOAD DATA LOCAL INPATH '../data/files/in9.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in8.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in9.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in8.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in9.txt' into table smb_input2;
 
 SET hive.optimize.bucketmapJOIN = true;
 SET hive.optimize.bucketmapJOIN.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder.q
index b92a79ba07ab2..b209c50b66194 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder.q
@@ -6,9 +6,9 @@ CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key
 SELECT a.key, a.val, c.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder2.q
index 238c0adad3128..ca1e65ebef6f0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder2.q
@@ -8,10 +8,10 @@ CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T4;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4;
 
 EXPLAIN
 SELECT /*+ STREAMTABLE(a) */ *
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder3.q
index 1bda28fbc3d5f..994be164aa62c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder3.q
@@ -8,10 +8,10 @@ CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T4;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4;
 
 EXPLAIN
 SELECT /*+ STREAMTABLE(a,c) */ *
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder4.q
index 126f356ef785a..16ef2046be35a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder4.q
@@ -2,9 +2,9 @@ CREATE TABLE T1(key1 STRING, val1 STRING) STORED AS TEXTFILE;
 CREATE TABLE T2(key2 STRING, val2 STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key3 STRING, val3 STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 set hive.auto.convert.join=true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_star.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_star.q
index 8314161975761..c95a13b9cac5a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_star.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_star.q
@@ -7,14 +7,14 @@ create table dim5(f9 int, f10 int);
 create table dim6(f11 int, f12 int);
 create table dim7(f13 int, f14 int);
 
-LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim4;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim5;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim6;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim7;
+LOAD DATA LOCAL INPATH '../../data/files/fact-data.txt' INTO TABLE fact;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim1;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim2;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim3;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim4;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim5;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim6;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim7;
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_noalias.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_noalias.q
index 11e54a5b04c07..df7343259ae75 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_noalias.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_noalias.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 --HIVE-2608 Do not require AS a,b,c part in LATERAL VIEW
 EXPLAIN SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2;
 SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_ppd.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_ppd.q
index 7be86a6f10ea2..65ae518cd5be2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_ppd.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_ppd.q
@@ -11,3 +11,7 @@ SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3
 
 EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0';
 SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0';
+
+-- HIVE-4293 Predicates following UDTF operator are removed by PPD
+EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol WHERE myCol > 1) a WHERE key='0';
+SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol WHERE myCol > 1) a WHERE key='0';
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lb_fs_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lb_fs_stats.q
new file mode 100644
index 0000000000000..7f31797f314b3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lb_fs_stats.q
@@ -0,0 +1,19 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false; 
+set mapred.input.dir.recursive=true;
+set hive.stats.dbclass=fs;
+-- Tests truncating a column from a list bucketing table
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+CREATE TABLE test_tab (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE;
+
+ALTER TABLE test_tab SKEWED BY (key) ON ("484") STORED AS DIRECTORIES;
+
+INSERT OVERWRITE TABLE test_tab PARTITION (part = '1') SELECT * FROM src;
+
+describe formatted test_tab partition (part='1');
+
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag.q
index f49766771067b..5623cbfac51a7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 --1. testLagWithPTFWindowing
 select p_mfgr, p_name,
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag_queries.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag_queries.q
index 6ef3bdb107a12..e53abce763865 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag_queries.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag_queries.q
@@ -11,7 +11,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- 1. testLeadUDAF
 select p_mfgr, p_retailprice,
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin.q
index abe3d3317157c..0c16fb8dfd988 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin.q
@@ -9,9 +9,9 @@ ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
 CREATE TABLE things (id INT, name STRING) partitioned by (ds string)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
 
-load data local inpath '../data/files/sales.txt' INTO TABLE sales;
-load data local inpath '../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23');
-load data local inpath '../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24');
+load data local inpath '../../data/files/sales.txt' INTO TABLE sales;
+load data local inpath '../../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23');
+load data local inpath '../../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24');
 
 SELECT name,id FROM sales ORDER BY name ASC, id ASC;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin_mr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin_mr.q
index 5813ca3c619cc..c9ebe0e8fad12 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin_mr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin_mr.q
@@ -1,7 +1,7 @@
 CREATE TABLE T1(key INT);
-LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1;
 CREATE TABLE T2(key INT);
-LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2;
 
 -- Run this query using TestMinimrCliDriver
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_partition_metadataonly.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_partition_metadataonly.q
new file mode 100644
index 0000000000000..e91adab59ddfa
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_partition_metadataonly.q
@@ -0,0 +1,7 @@
+set hive.limit.query.max.table.partition=1;
+
+explain select ds from srcpart where hr=11 and ds='2008-04-08';
+select ds from srcpart where hr=11 and ds='2008-04-08';
+
+explain select distinct hr from srcpart;
+select distinct hr from srcpart;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown.q
index e4d0aa06bde87..adfe1e63e889f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown.q
@@ -22,12 +22,17 @@ select value,avg(key + 1) from src group by value order by value limit 20;
 
 -- distincts
 explain
-select distinct(key) from src limit 20;
-select distinct(key) from src limit 20;
+select distinct(cdouble) from alltypesorc limit 20;
+select distinct(cdouble) from alltypesorc limit 20;
 
 explain
-select key, count(distinct(key)) from src group by key limit 20;
-select key, count(distinct(key)) from src group by key limit 20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
+
+-- multi distinct
+explain
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint limit 20;
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint limit 20;
 
 -- limit zero
 explain
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown_negative.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown_negative.q
index a86ddf14047ef..e17ded1ee1b9b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown_negative.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown_negative.q
@@ -16,7 +16,3 @@ CREATE TABLE dest_3(key STRING, c1 INT);
 EXPLAIN FROM src
 INSERT OVERWRITE TABLE dest_2 SELECT value, sum(key) GROUP BY value
 INSERT OVERWRITE TABLE dest_3 SELECT value, sum(key) GROUP BY value limit 20;
-
--- nagative, multi distinct
-explain
-select count(distinct key)+count(distinct value) from src limit 20;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_2.q
index 3a39f42b2a77d..80aba5d4a526f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_2.q
@@ -48,13 +48,13 @@ set hive.optimize.listbucketing=true;
 explain extended
 select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
 select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
-select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' ORDER BY key, value;
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
 
 -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none
 -- but query should succeed for 51 or 51 and val_14
-select * from srcpart where ds = '2008-04-08' and key = '51' ORDER BY key, value;
+select * from srcpart where ds = '2008-04-08' and key = '51' ORDER BY key, value, ds, hr;
 select * from list_bucketing_static_part where key = '51' ORDER BY key, value, ds, hr;
-select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14'  ORDER BY key, value;
+select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14'  ORDER BY key, value, ds, hr;
 select * from list_bucketing_static_part where key = '51' and value = 'val_14' ORDER BY key, value, ds, hr;
 
 -- queries with < <= > >= should work for skewed test although we don't benefit from pruning
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_4.q
index 918c817e49eff..380d148ac9f43 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_4.q
@@ -65,7 +65,7 @@ set hive.optimize.listbucketing=true;
 explain extended
 select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
 select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
-select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' ORDER BY key, value;
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
 
 -- clean up
 drop table list_bucketing_static_part;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_decimal.q
index a6ad4b8485a26..08b21dc689424 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_decimal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_decimal.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN SELECT -1BD, 0BD, 1BD, 3.14BD, -3.14BD, 99999999999999999BD, 99999999999999999.9999999999999BD, 1E-99BD, 1E99BD FROM src LIMIT 1;
 
 SELECT -1BD, 0BD, 1BD, 3.14BD, -3.14BD, 99999999999999999BD, 99999999999999999.9999999999999BD, 1E-99BD, 1E99BD FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_double.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_double.q
index 08836127b9958..766da699ea250 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_double.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_double.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8 FROM src LIMIT 1;
 SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8 FROM src LIMIT 1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_ints.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_ints.q
index 9da622e246793..5fd0cfabf3076 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_ints.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_ints.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN SELECT 100, 100Y, 100S, 100L FROM src LIMIT 1;
 
 SELECT 100, 100Y, 100S, 100L FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_string.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_string.q
index 21f0890ada168..c57dc572529e1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_string.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_string.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN SELECT 'face''book', 'face' 'book', 'face'
                                             'book',
                "face""book", "face" "book", "face"
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_binary_data.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_binary_data.q
index 7da363183ccf6..653918afc0fa3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_binary_data.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_binary_data.q
@@ -4,7 +4,7 @@ FIELDS TERMINATED BY '9'
 STORED AS TEXTFILE;
 -- this query loads native binary data, stores in a table and then queries it. Note that string.txt contains binary data. Also uses transform clause and then length udf.
 
-LOAD DATA LOCAL INPATH '../data/files/string.txt' INTO TABLE mytable;
+LOAD DATA LOCAL INPATH '../../data/files/string.txt' INTO TABLE mytable;
 
 create table dest1 (key binary, value int);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part1.q
index 52b4937d4a9dd..5f0a015693d9f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part1.q
@@ -23,8 +23,8 @@ insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, v
 show partitions nzhang_part1;
 show partitions nzhang_part2;
 
-select * from nzhang_part1 where ds is not null and hr is not null;
-select * from nzhang_part2 where ds is not null and hr is not null;
+select * from nzhang_part1 where ds is not null and hr is not null order by ds, hr, key;
+select * from nzhang_part2 where ds is not null and hr is not null order by ds, hr, key;
 
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part10.q
index 9517664675d69..dd84599e69e6d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part10.q
@@ -19,6 +19,6 @@ insert overwrite table nzhang_part10 partition(ds='2008-12-31', hr) select key,
 
 show partitions nzhang_part10;
 
-select * from nzhang_part10 where ds is not null and hr is not null;
+select * from nzhang_part10 where ds is not null and hr is not null order by ds, hr, key;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part3.q
index e4c8c17f63048..29f951aa69f67 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part3.q
@@ -14,6 +14,6 @@ insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr
 
 insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null;
 
-select * from nzhang_part3 where ds is not null and hr is not null;
+select * from nzhang_part3 where ds is not null and hr is not null order by ds, hr, key;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part4.q
index 3f3a0c8d51b41..942c245db8b64 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part4.q
@@ -17,8 +17,8 @@ insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr
 insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null;
 
 show partitions nzhang_part4;
-select * from nzhang_part4 where ds='2008-04-08' and hr is not null;
+select * from nzhang_part4 where ds='2008-04-08' and hr is not null order by ds, hr, key;
 
-select * from nzhang_part4 where ds is not null and hr is not null;
+select * from nzhang_part4 where ds is not null and hr is not null order by ds, hr, key;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part8.q
index 8073500c0bf07..6768e4373a056 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part8.q
@@ -20,5 +20,5 @@ insert overwrite table nzhang_part8 partition(ds='2008-12-31', hr) select key, v
 
 show partitions nzhang_part8;
 
-select * from nzhang_part8 where ds is not null and hr is not null;
+select * from nzhang_part8 where ds is not null and hr is not null order by ds, hr, key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part9.q
index 01fa596cdf04a..4680033cbd541 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part9.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part9.q
@@ -19,5 +19,5 @@ insert overwrite table nzhang_part9 partition (ds, hr) select key, value, ds, hr
 
 show partitions nzhang_part9;
 
-select * from nzhang_part9 where ds is not null and hr is not null;
+select * from nzhang_part9 where ds is not null and hr is not null order by ds, hr, key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_exist_part_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_exist_part_authsuccess.q
index 6d2a8b82d33c1..35eb2198081f0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_exist_part_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_exist_part_authsuccess.q
@@ -2,4 +2,4 @@ create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored
 alter table hive_test_src add partition (pcol1 = 'test_part');
 set hive.security.authorization.enabled=true;
 grant Update on table hive_test_src to user hive_test_user;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_file_with_space_in_the_name.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_file_with_space_in_the_name.q
index 3b8951a1a782f..6bac47fb9052a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_file_with_space_in_the_name.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_file_with_space_in_the_name.q
@@ -2,4 +2,5 @@
 
 
 CREATE TABLE load_file_with_space_in_the_name(name STRING, age INT);
-LOAD DATA LOCAL INPATH '../data/files/person age.txt' INTO TABLE load_file_with_space_in_the_name;
+LOAD DATA LOCAL INPATH '../../data/files/person age.txt' INTO TABLE load_file_with_space_in_the_name;
+LOAD DATA LOCAL INPATH '../../data/files/person+age.txt' INTO TABLE load_file_with_space_in_the_name;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs.q
index c1ac29c172f60..2f06ca464ff99 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs.q
@@ -2,9 +2,9 @@
 create table load_overwrite (key string, value string) stored as textfile location 'file:${system:test.tmp.dir}/load_overwrite';
 create table load_overwrite2 (key string, value string) stored as textfile location 'file:${system:test.tmp.dir}/load2_overwrite2';
 
-load data local inpath '../data/files/kv1.txt' into table load_overwrite;
-load data local inpath '../data/files/kv2.txt' into table load_overwrite;
-load data local inpath '../data/files/kv3.txt' into table load_overwrite;
+load data local inpath '../../data/files/kv1.txt' into table load_overwrite;
+load data local inpath '../../data/files/kv2.txt' into table load_overwrite;
+load data local inpath '../../data/files/kv3.txt' into table load_overwrite;
 
 show table extended like load_overwrite;
 desc extended load_overwrite;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
index 7255324d1653d..a75758a0728d5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
@@ -4,17 +4,17 @@
 create table result (key string, value string);
 create table loader (key string, value string);
 
-load data local inpath '../data/files/kv1.txt' into table loader;
+load data local inpath '../../data/files/kv1.txt' into table loader;
 
 load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result;
 show table extended like result;
 
-load data local inpath '../data/files/kv1.txt' into table loader;
+load data local inpath '../../data/files/kv1.txt' into table loader;
 
 load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result;
 show table extended like result;
 
-load data local inpath '../data/files/kv1.txt' into table loader;
+load data local inpath '../../data/files/kv1.txt' into table loader;
 
 load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result;
 show table extended like result;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs_overwrite.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs_overwrite.q
new file mode 100644
index 0000000000000..51a803130a660
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs_overwrite.q
@@ -0,0 +1,20 @@
+--HIVE 6209
+
+drop table target;
+drop table temp;
+
+create table target (key string, value string) stored as textfile location 'file:${system:test.tmp.dir}/target';
+create table temp (key string, value string) stored as textfile location 'file:${system:test.tmp.dir}/temp';
+
+set fs.pfile.impl.disable.cache=false;
+
+load data local inpath '../../data/files/kv1.txt' into table temp;
+load data inpath '${system:test.tmp.dir}/temp/kv1.txt' overwrite into table target;
+select count(*) from target;
+
+load data local inpath '../../data/files/kv2.txt' into table temp;
+load data inpath '${system:test.tmp.dir}/temp/kv2.txt' overwrite into table target;
+select count(*) from target;
+
+drop table target;
+drop table temp;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q
index cce297cca46db..55ac1a8185a46 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q
@@ -1,9 +1,10 @@
-dfs -mkdir hdfs:///tmp/test/;
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_load_hdfs_file_with_space_in_the_name/;
 
-dfs -copyFromLocal ../data/files hdfs:///tmp/test/.;
+dfs -copyFromLocal ../../data/files hdfs:///tmp/test_load_hdfs_file_with_space_in_the_name/.;
 
 CREATE TABLE load_file_with_space_in_the_name(name STRING, age INT);
-LOAD DATA INPATH 'hdfs:///tmp/test/files/person age.txt' INTO TABLE load_file_with_space_in_the_name;
+LOAD DATA INPATH 'hdfs:///tmp/test_load_hdfs_file_with_space_in_the_name/files/person age.txt' INTO TABLE load_file_with_space_in_the_name;
+LOAD DATA INPATH 'hdfs:///tmp/test_load_hdfs_file_with_space_in_the_name/files/person+age.txt' INTO TABLE load_file_with_space_in_the_name;
 
-dfs -rmr hdfs:///tmp/test;
+dfs -rmr hdfs:///tmp/test_load_hdfs_file_with_space_in_the_name;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_nonpart_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_nonpart_authsuccess.q
index 40d8210e57d2a..fdee45114bb15 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_nonpart_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_nonpart_authsuccess.q
@@ -1,4 +1,4 @@
 create table hive_test_src ( col1 string ) stored as textfile;
 set hive.security.authorization.enabled=true;
 grant Update on table hive_test_src to user hive_test_user;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_overwrite.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_overwrite.q
index 73853f15a94bb..080c78496a653 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_overwrite.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_overwrite.q
@@ -5,11 +5,11 @@ show table extended like load_overwrite;
 select count(*) from load_overwrite;
 
 
-load data local inpath '../data/files/kv1.txt' into table load_overwrite;
+load data local inpath '../../data/files/kv1.txt' into table load_overwrite;
 show table extended like load_overwrite;
 select count(*) from load_overwrite;
 
 
-load data local inpath '../data/files/kv1.txt' overwrite into table load_overwrite;
+load data local inpath '../../data/files/kv1.txt' overwrite into table load_overwrite;
 show table extended like load_overwrite;
 select count(*) from load_overwrite;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_part_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_part_authsuccess.q
index ff54324a5a50c..cee5873ca5b9f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_part_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_part_authsuccess.q
@@ -1,4 +1,4 @@
 create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored as textfile;
 set hive.security.authorization.enabled=true;
 grant Update on table hive_test_src to user hive_test_user;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart1.q
index 0813bb23c3746..735befef6f9c9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart1.q
@@ -2,7 +2,7 @@
 
 
 create table hive_test_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src ;
 
 create table hive_test_dst ( col1 string ) partitioned by ( pcol1 string , pcol2 string) stored as sequencefile;
 insert overwrite table hive_test_dst partition ( pcol1='test_part', pCol2='test_Part') select col1 from hive_test_src ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart2.q
new file mode 100644
index 0000000000000..a252eaa00d77f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart2.q
@@ -0,0 +1,9 @@
+
+create table hive_test ( col1 string ) partitioned by ( pcol1 string , pcol2 string) stored as textfile;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test partition (pcol1='part1',pcol2='part1') ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test partition (pcol2='part2',pcol1='part2') ;
+select * from hive_test where pcol1='part1' and pcol2='part1';
+select * from hive_test where pcol1='part2' and pcol2='part2';
+
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart_err.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart_err.q
index 6e4df215479bb..cc9c1fec3bc42 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart_err.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart_err.q
@@ -1,6 +1,6 @@
 set hive.cli.errors.ignore=true;
 
-ADD FILE ../data/scripts/error_script;
+ADD FILE ../../data/scripts/error_script;
 
 -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19, 0.20, 0.20S, 0.23)
 -- (this test is flaky so it is currently disabled for all Hadoop versions)
@@ -14,7 +14,7 @@ FROM src;
 DESCRIBE loadpart1;
 SHOW PARTITIONS loadpart1;
 
-LOAD DATA LOCAL INPATH '../data1/files/kv1.txt' INTO TABLE loadpart1 PARTITION(ds='2009-05-05');
+LOAD DATA LOCAL INPATH '../../data1/files/kv1.txt' INTO TABLE loadpart1 PARTITION(ds='2009-05-05');
 SHOW PARTITIONS loadpart1;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/macro.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/macro.q
index fd0f7f2b0cdd4..47b05ff4490fb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/macro.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/macro.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x));
 SELECT SIGMOID(2) FROM src LIMIT 1;
 EXPLAIN SELECT SIGMOID(2) FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_addjar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_addjar.q
new file mode 100644
index 0000000000000..f56f074616678
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_addjar.q
@@ -0,0 +1,14 @@
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.use.nonstaged=false;
+
+add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar;
+
+CREATE TABLE t1 (a string, b string)
+ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
+;
+LOAD DATA LOCAL INPATH "../../data/files/sample.json" INTO TABLE t1;
+select * from src join t1 on src.key =t1.a;
+drop table t1;
+set hive.auto.convert.join=false;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_decimal.q
new file mode 100644
index 0000000000000..b65a7be2d25cd
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_decimal.q
@@ -0,0 +1,35 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000000;
+
+CREATE TABLE over1k(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k;
+
+CREATE TABLE t1(dec decimal(4,2)) STORED AS ORC;
+INSERT INTO TABLE t1 select dec from over1k;
+CREATE TABLE t2(dec decimal(4,0)) STORED AS ORC;
+INSERT INTO TABLE t2 select dec from over1k;
+
+explain
+select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec);
+
+set hive.mapjoin.optimized.keys=false;
+
+select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec);
+
+set hive.mapjoin.optimized.keys=true;
+
+select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec);
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_hook.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_hook.q
index d6811d493263f..a9e1960a5bb05 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_hook.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_hook.q
@@ -1,4 +1,5 @@
-set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.MapJoinCounterHook ;
+set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.MapJoinCounterHook,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
+
 drop table dest1;
 CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
index 3f87db28ed2e2..1eb95f6378669 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
@@ -6,6 +6,14 @@ set hive.auto.convert.join.noconditionaltask.size=10000;
 
 explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key);
 
-explain select count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds;
+explain
+select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450';
+select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450';
+
+explain
+select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds;
+select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds;
+
+set hive.mapjoin.lazy.hashtable=false;
 
 select count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_memcheck.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_memcheck.q
new file mode 100644
index 0000000000000..b23361724e669
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_memcheck.q
@@ -0,0 +1,16 @@
+
+set hive.auto.convert.join = true;
+
+create table src0 like src;
+insert into table src0 select * from src where src.key < 10;
+
+set hive.mapjoin.check.memory.rows=1;
+
+explain 
+select src1.key as k1, src1.value as v1, src2.key, src2.value
+from src0 src1 inner join src0 src2 on src1.key = src2.key order by k1, v1;
+
+select src1.key as k1, src1.value as v1, src2.key, src2.value
+from src0 src1 inner join src0 src2 on src1.key = src2.key order by k1, v1;
+
+drop table src0;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_subquery2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_subquery2.q
index 9980946057fe1..aed89905238b2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_subquery2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_subquery2.q
@@ -11,9 +11,9 @@ ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
 CREATE TABLE z (id INT, name STRING)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
 
-load data local inpath '../data/files/x.txt' INTO TABLE x;
-load data local inpath '../data/files/y.txt' INTO TABLE y;
-load data local inpath '../data/files/z.txt' INTO TABLE z;
+load data local inpath '../../data/files/x.txt' INTO TABLE x;
+load data local inpath '../../data/files/y.txt' INTO TABLE y;
+load data local inpath '../../data/files/z.txt' INTO TABLE z;
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge3.q
index aacd0cd68fc50..c5c7ea202fa76 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge3.q
@@ -37,7 +37,7 @@ where ds is not null;
 
 show partitions merge_src_part2;
 
-select * from merge_src_part2 where ds is not null ORDER BY key ASC, value ASC;
+select * from merge_src_part2 where ds is not null ORDER BY key ASC, value ASC, ds ASC;
 
 drop table merge_src_part2;
 
@@ -54,4 +54,4 @@ select key, value, ds;
 
 show partitions merge_src_part2;
 
-select * from merge_src_part2 where ds is not null ORDER BY key ASC, value ASC;
+select * from merge_src_part2 where ds is not null ORDER BY key ASC, value ASC, ds ASC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge4.q
index 744783bd62f8d..5a167aa0f08b3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge4.q
@@ -11,14 +11,14 @@ insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select key, v
 
 insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select key, value, hr from srcpart where ds='2008-04-08';
 
-select * from nzhang_part;
+select * from nzhang_part ORDER BY key, value, ds, hr;
 
 explain
 insert overwrite table nzhang_part partition (ds='2010-08-15', hr=11) select key, value from srcpart where ds='2008-04-08';
 
 insert overwrite table nzhang_part partition (ds='2010-08-15', hr=11) select key, value from srcpart where ds='2008-04-08';
 
-select * from nzhang_part;
+select * from nzhang_part ORDER BY key, value, ds, hr;
 
 explain
 insert overwrite table nzhang_part partition (ds='2010-08-15', hr) 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition.q
index 1379426b601f0..ae319865f6e7b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition.q
@@ -5,15 +5,16 @@ create table srcpart_merge_dp like srcpart;
 
 create table merge_dynamic_part like srcpart;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
 
 set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; 
 set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false;
 set hive.merge.smallfiles.avgsize=1000000000;
+set hive.optimize.sort.dynamic.partition=false;
 explain
 insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08';
 insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition2.q
index b51c70ed03a46..73a71e6265d50 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition2.q
@@ -5,12 +5,12 @@ create table srcpart_merge_dp like srcpart;
 
 create table merge_dynamic_part like srcpart;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket0.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket0.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
 
 
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; 
@@ -18,6 +18,7 @@ set hive.merge.mapfiles=true;
 set hive.merge.mapredfiles=true;
 set hive.merge.smallfiles.avgsize=3000;
 set hive.exec.compress.output=false;
+set hive.optimize.sort.dynamic.partition=false;
 
 explain
 insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition3.q
index b3bcf01ea043d..43be59e5d5f40 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition3.q
@@ -5,20 +5,20 @@ create table srcpart_merge_dp like srcpart;
 
 create table merge_dynamic_part like srcpart;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-
-load data local inpath '../data/files/kv1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=11);
-load data local inpath '../data/files/kv2.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=11);
-load data local inpath '../data/files/kv1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=12);
-load data local inpath '../data/files/kv2.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=12);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+
+load data local inpath '../../data/files/kv1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=11);
+load data local inpath '../../data/files/kv2.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=11);
+load data local inpath '../../data/files/kv1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=12);
+load data local inpath '../../data/files/kv2.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=12);
 
 show partitions srcpart_merge_dp;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition4.q
index ef769a042d7ce..589717096a9d9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition4.q
@@ -9,15 +9,15 @@ alter table srcpart_merge_dp_rc set fileformat RCFILE;
 create table merge_dynamic_part like srcpart;
 alter table merge_dynamic_part set fileformat RCFILE;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
 
 insert overwrite table srcpart_merge_dp_rc partition (ds = '2008-04-08', hr) 
 select key, value, hr from srcpart_merge_dp where ds = '2008-04-08';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition5.q
index a196fa05288b7..9f64724563865 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition5.q
@@ -8,12 +8,12 @@ alter table srcpart_merge_dp_rc set fileformat RCFILE;
 create table merge_dynamic_part like srcpart;
 alter table merge_dynamic_part set fileformat RCFILE;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
 
 insert overwrite table srcpart_merge_dp_rc partition (ds = '2008-04-08', hr) 
 select key, value, hr from srcpart_merge_dp where ds = '2008-04-08';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_export_drop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_export_drop.q
index 41be152e7871d..e2da61a783950 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_export_drop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_export_drop.q
@@ -1,8 +1,8 @@
 create table tmp_meta_export_listener_drop_test (foo string);
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/HIVE-3427;
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/data/exports/HIVE-3427;
 set hive.metastore.pre.event.listeners=org.apache.hadoop.hive.ql.parse.MetaDataExportListener;
-set hive.metadata.export.location=../build/ql/test/data/exports/HIVE-3427;
+set hive.metadata.export.location=${system:test.tmp.dir}/data/exports/HIVE-3427;
 set hive.move.exported.metadata.to.trash=false;
 drop table tmp_meta_export_listener_drop_test;
-dfs -rmr ../build/ql/test/data/exports/HIVE-3427;
+dfs -rmr ${system:test.tmp.dir}/data/exports/HIVE-3427;
 set hive.metastore.pre.event.listeners=;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries.q
new file mode 100644
index 0000000000000..b549a56232108
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries.q
@@ -0,0 +1,77 @@
+set hive.stats.dbclass=fs;
+set hive.compute.query.using.stats=true;
+set hive.stats.autogather=true;
+create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp, 
+           dec decimal,  
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+create table stats_tbl(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+	   ts timestamp, 
+           dec decimal,  
+           bin binary);
+
+create table stats_tbl_part(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+	   ts timestamp, 
+           dec decimal,  
+           bin binary) partitioned by (dt string);
+
+
+insert overwrite table stats_tbl select * from over10k;
+
+insert into table stats_tbl_part partition (dt='2010') select * from over10k where t>0 and t<30;
+insert into table stats_tbl_part partition (dt='2011') select * from over10k where t>30 and t<60;
+insert into table stats_tbl_part partition (dt='2012') select * from over10k where t>60;
+
+explain 
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl;
+explain
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
+
+analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+analyze table stats_tbl_part partition(dt='2011') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+analyze table stats_tbl_part partition(dt='2012') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+
+explain 
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
+explain 
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
+
+explain select count(ts) from stats_tbl_part;
+
+drop table stats_tbl;
+drop table stats_tbl_part;
+
+set hive.compute.query.using.stats=false;
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q
new file mode 100644
index 0000000000000..09f4bff616a50
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q
@@ -0,0 +1,51 @@
+set hive.stats.dbclass=fs;
+set hive.compute.query.using.stats=true;
+create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp, 
+           dec decimal,  
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+create table stats_tbl_part(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp, 
+           dec decimal,  
+           bin binary) partitioned by (dt int);
+
+
+from over10k 
+insert overwrite table stats_tbl_part partition (dt=2010) select t,si,i,b,f,d,bo,s,ts,dec,bin where t>0 and t<30 
+insert overwrite table stats_tbl_part partition (dt=2014) select t,si,i,b,f,d,bo,s,ts,dec,bin where t > 30 and t<60;
+
+analyze table stats_tbl_part partition(dt) compute statistics;
+analyze table stats_tbl_part partition(dt=2010) compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+analyze table stats_tbl_part partition(dt=2014) compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+
+explain 
+select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010;
+select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010;
+explain 
+select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010;
+select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010;
+
+drop table stats_tbl_part;
+set hive.compute.query.using.stats=false;
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mi.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mi.q
index 067c143c0e684..2a6059b3a37ff 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mi.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mi.q
@@ -15,7 +15,7 @@ GROUP BY key, value, ds, hr;
 show partitions nzhang_t1;
 show partitions nzhang_t2;
 
-select * from nzhang_t1;
-select * from nzhang_t2;
+select * from nzhang_t1 order by key, value;
+select * from nzhang_t2 order by key, value;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mrr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mrr.q
new file mode 100644
index 0000000000000..9f068cc71394e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mrr.q
@@ -0,0 +1,59 @@
+-- simple query with multiple reduce stages
+EXPLAIN SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt;
+SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt;
+
+set hive.auto.convert.join=false;
+-- join query with multiple reduce stages;
+EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt;
+SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt;
+
+set hive.auto.convert.join=true;
+-- same query with broadcast join
+EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt;
+SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt;
+
+set hive.auto.convert.join=false;
+-- query with multiple branches in the task dag
+EXPLAIN
+SELECT * 
+FROM
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s1
+  JOIN
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s2
+  JOIN
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s3
+  ON (s1.key = s2.key and s1.key = s3.key)
+WHERE
+  s1.cnt > 1
+ORDER BY s1.key;
+
+SELECT * 
+FROM
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s1
+  JOIN
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s2
+  JOIN
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s3
+  ON (s1.key = s2.key and s1.key = s3.key)
+WHERE
+  s1.cnt > 1
+ORDER BY s1.key;
+
+set hive.auto.convert.join=true;
+-- query with broadcast join in the reduce stage
+EXPLAIN
+SELECT *
+FROM
+  (SELECT key, count(value) as cnt FROM src GROUP BY key) s1
+  JOIN src ON (s1.key = src.key);
+
+SELECT *
+FROM
+  (SELECT key, count(value) as cnt FROM src GROUP BY key) s1
+  JOIN src ON (s1.key = src.key);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin1.q
index 9a0a792a91897..455f550ae3ac5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin1.q
@@ -1,3 +1,5 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
+
 create table smallTbl1(key string, value string);
 insert overwrite table smallTbl1 select * from src where key < 10;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin2.q
index ce6cf6d8d6c0d..141db4db0a3f3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin2.q
@@ -1,3 +1,4 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
 set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
 set hive.auto.convert.join.noconditionaltask.size=6000;
@@ -187,3 +188,28 @@ FROM part_table x JOIN src1 y ON (x.key = y.key);
 SELECT count(*)
 FROM part_table x JOIN src1 y ON (x.key = y.key);
 
+set hive.auto.convert.join.noconditionaltask.size=10000000;
+set hive.optimize.correlation=false;
+-- HIVE-5891 Alias conflict when merging multiple mapjoin tasks into their common
+-- child mapred task
+EXPLAIN   
+SELECT * FROM (
+  SELECT c.key FROM
+    (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
+    JOIN src c ON tmp.key=c.key
+  UNION ALL
+  SELECT c.key FROM
+    (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
+    JOIN src c ON tmp.key=c.key
+) x;
+
+SELECT * FROM (
+  SELECT c.key FROM
+    (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
+    JOIN src c ON tmp.key=c.key
+  UNION ALL
+  SELECT c.key FROM
+    (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
+    JOIN src c ON tmp.key=c.key
+) x;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nested_complex.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nested_complex.q
index b94fbb7b8be43..6fd76b859e4e4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nested_complex.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nested_complex.q
@@ -17,6 +17,6 @@ describe nestedcomplex;
 describe extended nestedcomplex;
 
 
-load data local inpath '../data/files/nested_complex.txt' overwrite into table nestedcomplex;
+load data local inpath '../../data/files/nested_complex.txt' overwrite into table nestedcomplex;
 
 select * from nestedcomplex sort by simple_int;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/newline.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/newline.q
index 722ecf6d972ca..11168fcd3b05c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/newline.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/newline.q
@@ -1,4 +1,4 @@
-add file ../data/scripts/newline.py;
+add file ../../data/scripts/newline.py;
 set hive.transform.escape.input=true;
 
 create table tmp_tmp(key string, value string) stored as rcfile;
@@ -10,10 +10,10 @@ select * from tmp_tmp ORDER BY key ASC, value ASC;
 
 drop table tmp_tmp;
 
-add file ../data/scripts/escapednewline.py;
-add file ../data/scripts/escapedtab.py;
-add file ../data/scripts/doubleescapedtab.py;
-add file ../data/scripts/escapedcarriagereturn.py;
+add file ../../data/scripts/escapednewline.py;
+add file ../../data/scripts/escapedtab.py;
+add file ../../data/scripts/doubleescapedtab.py;
+add file ../../data/scripts/escapedcarriagereturn.py;
 
 create table tmp_tmp(key string, value string) stored as rcfile;
 insert overwrite table tmp_tmp
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q
new file mode 100644
index 0000000000000..e6343e2f535c0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q
@@ -0,0 +1,9 @@
+set hive.fetch.task.conversion=more;
+
+explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10;
+explain select cast(key as int) * 10, upper(value) from src limit 10;
+
+set hive.fetch.task.conversion.threshold=100;
+
+explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10;
+explain select cast(key as int) * 10, upper(value) from src limit 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonreserved_keywords_input37.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonreserved_keywords_input37.q
index 9cb89da373d29..e33b4bfcab7ba 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonreserved_keywords_input37.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonreserved_keywords_input37.q
@@ -1,12 +1,12 @@
 CREATE TABLE table(string string) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/docurl.txt' INTO TABLE table;
+LOAD DATA LOCAL INPATH '../../data/files/docurl.txt' INTO TABLE table;
 
 SELECT table, count(1)
 FROM
 (
   FROM table
   SELECT TRANSFORM (table.string)
-  USING 'java -cp ../build/ql/test/classes org.apache.hadoop.hive.scripts.extracturl' AS (table, count)
+  USING 'java -cp ../util/target/classes/ org.apache.hadoop.hive.scripts.extracturl' AS (table, count)
 ) subq
 GROUP BY table;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/notable_alias3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/notable_alias3.q
new file mode 100644
index 0000000000000..aa79674409d2d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/notable_alias3.q
@@ -0,0 +1,4 @@
+CREATE TABLE dest1(c string, key INT, value DOUBLE) STORED AS TEXTFILE;
+
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_cast.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_cast.q
index 48c39b81fdd8f..bd0cb8d12d88f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_cast.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_cast.q
@@ -2,10 +2,10 @@ EXPLAIN SELECT ARRAY(NULL, 0),
                ARRAY(NULL, ARRAY()),
                ARRAY(NULL, MAP()),
                ARRAY(NULL, STRUCT(0))
-        FROM src LIMIT 1;
+        FROM src tablesample (1 rows);
 
 SELECT ARRAY(NULL, 0), 
        ARRAY(NULL, ARRAY()),
        ARRAY(NULL, MAP()),
        ARRAY(NULL, STRUCT(0))
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_column.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_column.q
index fa4a8639446e5..4b43d608e86df 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_column.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_column.q
@@ -3,7 +3,7 @@
 
 
 create table temp_null(a int) stored as textfile;
-load data local inpath '../data/files/test.dat' overwrite into table temp_null;
+load data local inpath '../../data/files/test.dat' overwrite into table temp_null;
 
 select null, null from temp_null;
 
@@ -15,8 +15,8 @@ create table tt_b(a int, b string) row format serde "org.apache.hadoop.hive.serd
 insert overwrite table tt_b select null, null from temp_null;
 select * from tt_b;
 
-insert overwrite directory "../build/ql/test/data/warehouse/null_columns.out" select null, null from temp_null;
-dfs -cat ../build/ql/test/data/warehouse/null_columns.out/*;
+insert overwrite directory "target/warehouse/null_columns.out" select null, null from temp_null;
+dfs -cat ${system:test.warehouse.dir}/null_columns.out/*;
 
 
 create table temp_null2 (key string, value string) partitioned by (ds string);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformat.q
new file mode 100644
index 0000000000000..c9a7dab5eb9e7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformat.q
@@ -0,0 +1,24 @@
+-- base table with null data
+DROP TABLE IF EXISTS base_tab;
+CREATE TABLE base_tab(a STRING, b STRING, c STRING, d STRING) STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../../data/files/null.txt' INTO TABLE base_tab;
+DESCRIBE EXTENDED base_tab;
+
+-- table with non-default null format
+DROP TABLE IF EXISTS null_tab1;
+EXPLAIN CREATE TABLE null_tab1(a STRING, b STRING) ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull';
+CREATE TABLE null_tab1(a STRING, b STRING) ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull';
+DESCRIBE EXTENDED null_tab1;
+SHOW CREATE TABLE null_tab1;
+
+-- load null data from another table and verify that the null is stored in the expected format
+INSERT OVERWRITE TABLE null_tab1 SELECT a,b FROM base_tab;
+dfs -cat ${system:test.warehouse.dir}/null_tab1/*;
+SELECT * FROM null_tab1;
+-- alter the null format and verify that the old null format is no longer in effect
+ALTER TABLE null_tab1 SET SERDEPROPERTIES ( 'serialization.null.format'='foo');
+SELECT * FROM null_tab1;
+
+
+DROP TABLE null_tab1;
+DROP TABLE base_tab;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatCTAS.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatCTAS.q
new file mode 100644
index 0000000000000..d077981d02ad0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatCTAS.q
@@ -0,0 +1,24 @@
+-- base table with null data
+DROP TABLE IF EXISTS base_tab;
+CREATE TABLE base_tab(a STRING, b STRING, c STRING, d STRING) STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../../data/files/null.txt' INTO TABLE base_tab;
+DESCRIBE EXTENDED base_tab;
+
+-- table with non-default null format
+DROP TABLE IF EXISTS null_tab3;
+EXPLAIN CREATE TABLE null_tab3 ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull'
+   AS SELECT a, b FROM base_tab;
+CREATE TABLE null_tab3 ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull'
+   AS SELECT a, b FROM base_tab;
+DESCRIBE EXTENDED null_tab3;
+SHOW CREATE TABLE null_tab3;
+
+dfs -cat ${system:test.warehouse.dir}/null_tab3/*;
+SELECT * FROM null_tab3;
+-- alter the null format and verify that the old null format is no longer in effect
+ALTER TABLE null_tab3 SET SERDEPROPERTIES ( 'serialization.null.format'='foo');
+SELECT * FROM null_tab3;
+
+
+DROP TABLE null_tab3;
+DROP TABLE base_tab;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatdir.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatdir.q
new file mode 100644
index 0000000000000..d29863839f728
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatdir.q
@@ -0,0 +1,21 @@
+-- base table with null data
+DROP TABLE IF EXISTS base_tab;
+CREATE TABLE base_tab(a STRING, b STRING, c STRING, d STRING) STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../../data/files/null.txt' INTO TABLE base_tab;
+DESCRIBE EXTENDED base_tab;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/hive_test/nullformat/tmp;
+dfs -rmr ${system:test.tmp.dir}/hive_test/nullformat/*;
+INSERT OVERWRITE LOCAL DIRECTORY '${system:test.tmp.dir}/hive_test/nullformat'
+   ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull' SELECT a,b FROM base_tab;
+dfs -cat ${system:test.tmp.dir}/hive_test/nullformat/000000_0;
+
+-- load the exported data back into a table with same null format and verify null values
+DROP TABLE IF EXISTS null_tab2;
+CREATE TABLE null_tab2(a STRING, b STRING) ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull';
+LOAD DATA LOCAL INPATH '${system:test.tmp.dir}/hive_test/nullformat/000000_0' INTO TABLE null_tab2;
+SELECT * FROM null_tab2;
+
+
+dfs -rmr ${system:test.tmp.dir}/hive_test/nullformat;
+DROP TABLE base_tab;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup3.q
index a5bc9ff1f36ff..19e5b10786fc0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup3.q
@@ -1,28 +1,28 @@
 CREATE TABLE tstparttbl(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-08');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-08');
 explain
 select count(1) from tstparttbl;
 select count(1) from tstparttbl;
 
 CREATE TABLE tstparttbl2(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-08');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-08');
 explain
 select count(1) from tstparttbl2;
 select count(1) from tstparttbl2;
 DROP TABLE tstparttbl;
 CREATE TABLE tstparttbl(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-08');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-08');
 explain
 select count(1) from tstparttbl;
 select count(1) from tstparttbl;
 
 DROP TABLE tstparttbl2;
 CREATE TABLE tstparttbl2(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-08');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-08');
 explain
 select count(1) from tstparttbl2;
 select count(1) from tstparttbl2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup5.q
index 12773b6159a5d..b4b68fb8b693a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup5.q
@@ -1,10 +1,10 @@
 
 CREATE TABLE tstparttbl(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2009-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2009-04-09');
 
 
 CREATE TABLE tstparttbl2(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE tstparttbl2 PARTITION (ds='2009-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE tstparttbl2 PARTITION (ds='2009-04-09');
 
 explain
 select u.* from
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullscript.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullscript.q
index 95c9e1df37679..11f4a7a78f813 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullscript.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullscript.q
@@ -1,7 +1,7 @@
 
 CREATE TABLE nullscript(KEY STRING, VALUE STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE nullscript;
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE nullscript;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE nullscript;
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE nullscript;
 explain
 select transform(key) using 'cat' as key1 from nullscript;
 select transform(key) using 'cat' as key1 from nullscript;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/num_op_type_conv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/num_op_type_conv.q
index 7f858d3e6f136..d51c2107e1586 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/num_op_type_conv.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/num_op_type_conv.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN SELECT null + 7, 1.0 - null, null + null,
                CAST(21 AS BIGINT) % CAST(5 AS TINYINT),
                CAST(21 AS BIGINT) % CAST(21 AS BIGINT),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ops_comparison.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ops_comparison.q
index b685ae6c4e53f..ec9e8076fd533 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ops_comparison.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ops_comparison.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 
 select 1.0 < 2.0 from src limit 1;
 select 2.0 < 2.0 from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/optrstat_groupby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/optrstat_groupby.q
deleted file mode 100644
index 5993041405ed8..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/optrstat_groupby.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.OptrStatGroupByHook;
-SET hive.exec.mode.local.auto=false;
-SET hive.task.progress=true;
--- This test executes the OptrStatGroupBy hook which prints the optr level
--- stats of GROUPBY optr present is the plan of below query
-SELECT count(1) FROM src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_analyze.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_analyze.q
new file mode 100644
index 0000000000000..915f4f0d71e63
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_analyze.q
@@ -0,0 +1,179 @@
+CREATE TABLE orc_create_people_staging (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp,
+  state string);
+
+LOAD DATA LOCAL INPATH '../../data/files/orc_create_people.txt' OVERWRITE INTO TABLE orc_create_people_staging;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+set hive.stats.autogather=false;
+-- non-partitioned table
+-- partial scan gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp,
+  state string)
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people SELECT * FROM orc_create_people_staging ORDER BY id;
+
+set hive.stats.autogather = true;
+analyze table orc_create_people compute statistics partialscan;
+
+desc formatted orc_create_people;
+
+drop table orc_create_people;
+
+-- auto stats gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp,
+  state string)
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people SELECT * FROM orc_create_people_staging ORDER BY id;
+
+desc formatted orc_create_people;
+
+drop table orc_create_people;
+
+set hive.stats.autogather=false;
+-- partitioned table
+-- partial scan gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp)
+PARTITIONED BY (state string)
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
+  SELECT * FROM orc_create_people_staging ORDER BY id;
+
+set hive.stats.autogather = true;
+analyze table orc_create_people partition(state) compute statistics partialscan;
+
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
+drop table orc_create_people;
+
+-- auto stats gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp)
+PARTITIONED BY (state string)
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
+  SELECT * FROM orc_create_people_staging ORDER BY id;
+
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
+drop table orc_create_people;
+
+set hive.stats.autogather=false;
+-- partitioned and bucketed table
+-- partial scan gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp)
+PARTITIONED BY (state string)
+clustered by (first_name)
+sorted by (last_name)
+into 4 buckets
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
+  SELECT * FROM orc_create_people_staging ORDER BY id;
+
+set hive.stats.autogather = true;
+analyze table orc_create_people partition(state) compute statistics partialscan;
+
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
+drop table orc_create_people;
+
+-- auto stats gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp)
+PARTITIONED BY (state string)
+clustered by (first_name)
+sorted by (last_name)
+into 4 buckets
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
+  SELECT * FROM orc_create_people_staging ORDER BY id;
+
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
+drop table orc_create_people;
+
+set hive.stats.autogather=false;
+-- create table with partitions containing text and ORC files.
+-- ORC files implements StatsProvidingRecordReader but text files does not.
+-- So the partition containing text file should not have statistics.
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp)
+PARTITIONED BY (state string)
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
+  SELECT * FROM orc_create_people_staging ORDER BY id;
+
+-- set the table to text format
+ALTER TABLE orc_create_people SET SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
+ALTER TABLE orc_create_people SET FILEFORMAT TEXTFILE;
+
+-- load the text data into a new partition
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE orc_create_people PARTITION(state="OH");
+
+-- set the table back to orc
+ALTER TABLE orc_create_people SET SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde';
+ALTER TABLE orc_create_people SET FILEFORMAT ORC;
+
+set hive.stats.autogather = true;
+analyze table orc_create_people partition(state) compute statistics noscan;
+
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="OH");
+
+drop table orc_create_people;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_create.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_create.q
index 6aca5486445c8..a82c1a55d5f6c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_create.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_create.q
@@ -40,7 +40,7 @@ set hive.default.fileformat=orc;
 CREATE TABLE orc_create (key INT, value STRING)
    PARTITIONED BY (ds string);
 
-set hive.default.fileformat=text;
+set hive.default.fileformat=TextFile;
 
 DESCRIBE FORMATTED orc_create;
 
@@ -53,7 +53,7 @@ CREATE TABLE orc_create_complex (
 
 DESCRIBE FORMATTED orc_create_complex;
 
-LOAD DATA LOCAL INPATH '../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging;
+LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging;
 
 SELECT * from orc_create_staging;
 
@@ -70,16 +70,20 @@ CREATE TABLE orc_create_people_staging (
   first_name string,
   last_name string,
   address string,
+  salary decimal,
+  start_date timestamp,
   state string);
 
-LOAD DATA LOCAL INPATH '../data/files/orc_create_people.txt'
+LOAD DATA LOCAL INPATH '../../data/files/orc_create_people.txt'
   OVERWRITE INTO TABLE orc_create_people_staging;
 
 CREATE TABLE orc_create_people (
   id int,
   first_name string,
   last_name string,
-  address string)
+  address string,
+  salary decimal,
+  start_date timestamp)
 PARTITIONED BY (state string)
 STORED AS orc;
 
@@ -92,9 +96,26 @@ SET hive.optimize.index.filter=true;
 -- test predicate push down with partition pruning
 SELECT COUNT(*) FROM orc_create_people where id < 10 and state = 'Ca';
 
+-- test predicate push down
+SELECT COUNT(*) FROM orc_create_people where id = 50;
+SELECT COUNT(*) FROM orc_create_people where id between 10 and 20;
+SELECT COUNT(*) FROM orc_create_people where id > 10 and id < 100;
+SELECT COUNT(*) FROM orc_create_people where (id + 1) = 20;
+SELECT COUNT(*) FROM orc_create_people where (id + 10) < 200;
+SELECT COUNT(*) FROM orc_create_people where id < 30  or first_name = "Rafael";
+SELECT COUNT(*) FROM orc_create_people 
+   where length(substr(first_name, 1, 2)) <= 2 and last_name like '%';
+SELECT COUNT(*) FROM orc_create_people where salary = 200.00;
+SELECT COUNT(*) FROM orc_create_people WHERE start_date IS NULL;
+SELECT COUNT(*) FROM orc_create_people WHERE YEAR(start_date) = 2014;
+
+-- test predicate push down with partition pruning
+SELECT COUNT(*) FROM orc_create_people where salary = 200.00 and state = 'Ca';
+
 -- test predicate push down with no column projection
 SELECT id, first_name, last_name, address
-  FROM orc_create_people WHERE id > 90;
+  FROM orc_create_people WHERE id > 90
+  ORDER BY id, first_name, last_name;
 
 DROP TABLE orc_create;
 DROP TABLE orc_create_complex;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q
index f916012b8365e..a0eaab75fac3e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q
@@ -19,7 +19,7 @@ SELECT * FROM test_orc;
 ALTER TABLE test_orc SET SERDEPROPERTIES ('orc.stripe.size' = '1');
 
 CREATE TABLE src_thousand(key STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1kv2.cogroup.txt' 
+LOAD DATA LOCAL INPATH '../../data/files/kv1kv2.cogroup.txt' 
      INTO TABLE src_thousand;
 
 set hive.exec.orc.dictionary.key.size.threshold=0.5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols.q
index cbfd7b359f878..0c8861e41c4bb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols.q
@@ -10,7 +10,7 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
 -- to another partition
 -- This can produce unexpected results with CombineHiveInputFormat
 
-INSERT OVERWRITE TABLE test_orc PARTITION (part = '1') SELECT key FROM src LIMIT 5;
+INSERT OVERWRITE TABLE test_orc PARTITION (part = '1') SELECT key FROM src tablesample (5 rows);
 
 ALTER TABLE test_orc ADD COLUMNS (cnt INT);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols2.q
new file mode 100644
index 0000000000000..f7e80a75b3ba6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols2.q
@@ -0,0 +1,11 @@
+-- Create a table with one column, write to it, then add an additional column
+-- This can break reads
+
+CREATE TABLE test_orc (key STRING)
+STORED AS ORC;
+
+INSERT OVERWRITE TABLE test_orc SELECT key FROM src LIMIT 5;
+
+ALTER TABLE test_orc ADD COLUMNS (value STRING);
+
+SELECT * FROM test_orc order by key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_empty_strings.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_empty_strings.q
index 0ef57d18ccaf7..34cd6d47dfcd9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_empty_strings.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_empty_strings.q
@@ -3,13 +3,13 @@ ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
 STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat';
 
-INSERT OVERWRITE TABLE test_orc SELECT '' FROM src limit 10;
+INSERT OVERWRITE TABLE test_orc SELECT '' FROM src tablesample (10 rows);
 
 -- Test reading a column which is just empty strings
 
 SELECT * FROM test_orc; 
 
-INSERT OVERWRITE TABLE test_orc SELECT IF (key % 3 = 0, key, '') FROM src limit 10;
+INSERT OVERWRITE TABLE test_orc SELECT IF (key % 3 = 0, key, '') FROM src tablesample (10 rows);
 
 -- Test reading a column which has some empty strings
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ends_with_nulls.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ends_with_nulls.q
index 6685da7a82245..83c5a0505e57e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ends_with_nulls.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ends_with_nulls.q
@@ -10,7 +10,7 @@ ALTER TABLE test_orc SET SERDEPROPERTIES ('orc.row.index.stride' = '1000');
 -- to last index stride are the same (there's only two index strides)
 
 CREATE TABLE src_null(a STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/nulls.txt' INTO TABLE src_null;
+LOAD DATA LOCAL INPATH '../../data/files/nulls.txt' INTO TABLE src_null;
 
 INSERT OVERWRITE TABLE test_orc SELECT a FROM src_null;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_min_max.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_min_max.q
new file mode 100644
index 0000000000000..b81adf2af3e53
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_min_max.q
@@ -0,0 +1,32 @@
+create table if not exists alltypes (
+ bo boolean,
+ ti tinyint,
+ si smallint,
+ i int,
+ bi bigint,
+ f float,
+ d double,
+ de decimal(10,3),
+ ts timestamp,
+ da date,
+ s string,
+ c char(5),
+ vc varchar(5),
+ m map<string, string>,
+ l array<int>,
+ st struct<c1:int, c2:string>
+) row format delimited fields terminated by '|'
+collection items terminated by ','
+map keys terminated by ':' stored as textfile;
+
+create table alltypes_orc like alltypes;
+alter table alltypes_orc set fileformat orc;
+
+load data local inpath '../../data/files/alltypes2.txt' overwrite into table alltypes;
+
+insert overwrite table alltypes_orc select * from alltypes;
+
+select min(bo), max(bo), min(ti), max(ti), min(si), max(si), min(i), max(i), min(bi), max(bi), min(f), max(f), min(d), max(d), min(de), max(de), min(ts), max(ts), min(da), max(da), min(s), max(s), min(c), max(c), min(vc), max(vc) from alltypes;
+
+select min(bo), max(bo), min(ti), max(ti), min(si), max(si), min(i), max(i), min(bi), max(bi), min(f), max(f), min(d), max(d), min(de), max(de), min(ts), max(ts), min(da), max(da), min(s), max(s), min(c), max(c), min(vc), max(vc) from alltypes_orc;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_char.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_char.q
new file mode 100644
index 0000000000000..1f5f54ae19ee8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_char.q
@@ -0,0 +1,76 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); 
+
+insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+set hive.optimize.index.filter=false;
+
+-- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypesorc where c="apple";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c="apple";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c!="apple";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c!="apple";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c<"hello";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c<"hello";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c<="hello";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c<="hello";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c="apple ";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c="apple ";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c in ("apple", "carrot");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c in ("apple", "carrot");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c in ("apple", "hello");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c in ("apple", "hello");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c in ("carrot");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c in ("carrot");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c between "apple" and "carrot";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c between "apple" and "carrot";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c between "apple" and "zombie";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c between "apple" and "zombie";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c between "carrot" and "carrot1";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c between "carrot" and "carrot1";
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_date.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_date.q
new file mode 100644
index 0000000000000..c34be867e484f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_date.q
@@ -0,0 +1,97 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); 
+
+insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+-- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypesorc where da='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da='1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as date);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as date);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as varchar(20));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as varchar(20));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da!='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da!='1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da<'1970-02-27';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da<'1970-02-27';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da<'1970-02-29';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da<'1970-02-29';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da<'1970-02-15';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da<'1970-02-15';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da<='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da<='1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da<='1970-02-27';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da<='1970-02-27';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-20' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-20' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-22' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-22' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-28';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-28';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da between '1970-02-18' and '1970-02-19';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da between '1970-02-18' and '1970-02-19';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_decimal.q
new file mode 100644
index 0000000000000..a93590eacca01
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_decimal.q
@@ -0,0 +1,151 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); 
+
+insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+-- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypesorc where d=0.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d=0.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d='0.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d='0.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d=cast('0.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d=cast('0.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d!=0.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d!=0.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d!='0.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d!='0.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d!=cast('0.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d!=cast('0.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<11.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<11.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<'11.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<'11.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<cast('11.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<cast('11.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<1;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<1;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<=11.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<=11.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<='11.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<='11.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<=cast('11.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<=cast('11.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<=12;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<=12;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d in ('0.22', '1.0');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d in ('0.22', '1.0');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d in ('0.22', '11.22');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d in ('0.22', '11.22');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d in ('0.9', '1.0');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d in ('0.9', '1.0');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d in ('0.9', 0.22);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d in ('0.9', 0.22);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d in ('0.9', 0.22, cast('11.22' as float));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d in ('0.9', 0.22, cast('11.22' as float));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d between 0 and 1;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d between 0 and 1;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d between 0 and 1000;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d between 0 and 1000;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d between 0 and '2.0';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d between 0 and '2.0';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d between 0 and cast(3 as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d between 0 and cast(3 as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d between 1 and cast(30 as char(10));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d between 1 and cast(30 as char(10));
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_varchar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_varchar.q
new file mode 100644
index 0000000000000..0fecc664e46db
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_varchar.q
@@ -0,0 +1,76 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); 
+
+insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+set hive.optimize.index.filter=false;
+
+-- varchar data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypesorc where v="bee";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v="bee";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v!="bee";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v!="bee";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v<"world";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v<"world";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v<="world";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v<="world";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v="bee   ";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v="bee   ";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v in ("bee", "orange");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v in ("bee", "orange");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v in ("bee", "world");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v in ("bee", "world");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v in ("orange");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v in ("orange");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v between "bee" and "orange";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v between "bee" and "orange";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v between "bee" and "zombie";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v between "bee" and "zombie";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v between "orange" and "pine";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v between "orange" and "pine";
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
index f5f25f00c951a..a267bfe8e13b9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
@@ -7,7 +7,7 @@ CREATE TABLE orc_pred(t tinyint,
            bo boolean,
            s string,
            ts timestamp,
-           dec decimal,
+           dec decimal(4,2),
            bin binary)
 STORED AS ORC;
 
@@ -22,12 +22,12 @@ CREATE TABLE staging(t tinyint,
            bo boolean,
            s string,
            ts timestamp,
-           dec decimal,
+           dec decimal(4,2),
            bin binary)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/over1k' OVERWRITE INTO TABLE staging;
+LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging;
 
 INSERT INTO TABLE orc_pred select * from staging;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_split_elimination.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_split_elimination.q
new file mode 100644
index 0000000000000..54eb23e776b88
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_split_elimination.q
@@ -0,0 +1,168 @@
+create table orc_split_elim (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc;
+
+load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+SET hive.optimize.index.filter=false;
+
+-- The above table will have 5 splits with the followings stats
+--  Stripe 1:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 2 max: 100 sum: 499902
+--    Column 2: count: 5000 min: foo max: zebra sum: 24998
+--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
+--    Column 4: count: 5000 min: 0 max: 1.2 sum: 1.2
+--    Column 5: count: 5000
+--  Stripe 2:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 13 max: 100 sum: 499913
+--    Column 2: count: 5000 min: bar max: zebra sum: 24998
+--    Column 3: count: 5000 min: 8.0 max: 80.0 sum: 40072.0
+--    Column 4: count: 5000 min: 0 max: 2.2 sum: 2.2
+--    Column 5: count: 5000
+--  Stripe 3:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 29 max: 100 sum: 499929
+--    Column 2: count: 5000 min: cat max: zebra sum: 24998
+--    Column 3: count: 5000 min: 8.0 max: 8.0 sum: 40000.0
+--    Column 4: count: 5000 min: 0 max: 3.3 sum: 3.3
+--    Column 5: count: 5000
+--  Stripe 4:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 70 max: 100 sum: 499970
+--    Column 2: count: 5000 min: dog max: zebra sum: 24998
+--    Column 3: count: 5000 min: 1.8 max: 8.0 sum: 39993.8
+--    Column 4: count: 5000 min: 0 max: 4.4 sum: 4.4
+--    Column 5: count: 5000
+--  Stripe 5:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 5 max: 100 sum: 499905
+--    Column 2: count: 5000 min: eat max: zebra sum: 24998
+--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
+--    Column 4: count: 5000 min: 0 max: 5.5 sum: 5.5
+--    Column 5: count: 5000
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0;
+
+SET hive.optimize.index.filter=true;
+-- 0 mapper
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers. count should be 0
+select count(*) from orc_split_elim where userid<=0;
+
+SET hive.optimize.index.filter=true;
+-- 0 mapper
+select count(*) from orc_split_elim where userid<=0;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 1 mapper
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 2 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 3 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 4 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- partitioned table
+create table orc_split_elim_part (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (country string, year int) stored as orc;
+
+alter table orc_split_elim_part add partition(country='us', year=2000);
+alter table orc_split_elim_part add partition(country='us', year=2001);
+
+load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_part partition(country='us', year=2000);
+load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_part partition(country='us', year=2001);
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us'order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 2 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and (year=2000 or year=2001) order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 2 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and (year=2000 or year=2001) order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and year=2000 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 1 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and year=2000 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 4 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and (year=2000 or year=2001) order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 4 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and (year=2000 or year=2001) order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and year=2000 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 2 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and year=2000 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 0 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='in' order by userid;
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='us' and year=2002 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 0 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='in' order by userid;
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='us' and year=2002 order by userid;
+SET hive.optimize.index.filter=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_vectorization_ppd.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_vectorization_ppd.q
new file mode 100644
index 0000000000000..9bdad86e41ebf
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_vectorization_ppd.q
@@ -0,0 +1,69 @@
+-- create table with 1000 rows
+create table srcorc(key string, value string) stored as textfile;
+insert overwrite table srcorc select * from src;
+insert into table srcorc select * from src;
+
+-- load table with each row group having 1000 rows and stripe 1 & 2 having 5000 & 2000 rows respectively
+create table if not exists vectororc
+(s1 string,
+s2 string,
+d double,
+s3 string)
+stored as ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="100000", "orc.compress.size"="10000");
+
+-- insert creates separate orc files
+insert overwrite table vectororc select "apple", "a", rand(1), "zoo" from srcorc;
+insert into table vectororc select null, "b", rand(2), "zoo" from srcorc;
+insert into table vectororc select null, "c", rand(3), "zoo" from srcorc;
+insert into table vectororc select "apple", "d", rand(4), "zoo" from srcorc;
+insert into table vectororc select null, "e", rand(5), "z" from srcorc;
+insert into table vectororc select "apple", "f", rand(6), "z" from srcorc;
+insert into table vectororc select null, "g", rand(7), "zoo" from srcorc;
+
+-- since vectororc table has multiple orc file we will load them into a single file using another table
+create table if not exists testorc
+(s1 string,
+s2 string,
+d double,
+s3 string)
+stored as ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="100000", "orc.compress.size"="10000");
+insert overwrite table testorc select * from vectororc order by s2;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.optimize.index.filter=true;
+
+set hive.vectorized.execution.enabled=false;
+-- row group (1,4) from stripe 1 and row group (1) from stripe 2
+-- PPD ONLY
+select count(*),int(sum(d)) from testorc where s1 is not null;
+set hive.vectorized.execution.enabled=true;
+-- VECTORIZATION + PPD
+select count(*),int(sum(d)) from testorc where s1 is not null;
+
+set hive.vectorized.execution.enabled=false;
+-- row group (2,3,5) from stripe 1 and row group (2) from stripe 2
+-- PPD ONLY
+select count(*),int(sum(d)) from testorc where s2 in ("b", "c", "e", "g");
+set hive.vectorized.execution.enabled=true;
+-- VECTORIZATION + PPD
+select count(*),int(sum(d)) from testorc where s2 in ("b", "c", "e", "g");
+
+set hive.vectorized.execution.enabled=false;
+-- last row group of stripe 1 and first row group of stripe 2
+-- PPD ONLY
+select count(*),int(sum(d)) from testorc where s3="z";
+set hive.vectorized.execution.enabled=true;
+-- VECTORIZATION + PPD
+select count(*),int(sum(d)) from testorc where s3="z";
+
+set hive.vectorized.execution.enabled=false;
+-- first row group of stripe 1 and last row group of stripe 2
+-- PPD ONLY
+select count(*),int(sum(d)) from testorc where s2="a" or s2="g";
+set hive.vectorized.execution.enabled=true;
+-- VECTORIZATION + PPD
+select count(*),int(sum(d)) from testorc where s2="a" or s2="g";
+
+drop table srcorc;
+drop table vectororc;
+drop table testorc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/order_within_subquery.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/order_within_subquery.q
new file mode 100644
index 0000000000000..7fc9b44cbc149
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/order_within_subquery.q
@@ -0,0 +1,19 @@
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+select t1.p_name, t2.p_name 
+from (select * from part order by p_size limit 10) t1 join part t2 on t1.p_partkey = t2.p_partkey and t1.p_size = t2.p_size 
+where t1.p_partkey < 100000;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parallel_orderby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parallel_orderby.q
index 5e09395901c08..73c3940644844 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parallel_orderby.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parallel_orderby.q
@@ -1,6 +1,6 @@
 create table src5 (key string, value string);
-load data local inpath '../data/files/kv5.txt' into table src5;
-load data local inpath '../data/files/kv5.txt' into table src5;
+load data local inpath '../../data/files/kv5.txt' into table src5;
+load data local inpath '../../data/files/kv5.txt' into table src5;
 
 set mapred.reduce.tasks = 4;
 set hive.optimize.sampling.orderby=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_create.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_create.q
new file mode 100644
index 0000000000000..0b976bdbaf700
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_create.q
@@ -0,0 +1,36 @@
+DROP TABLE parquet_create_staging;
+DROP TABLE parquet_create;
+
+CREATE TABLE parquet_create_staging (
+    id int,
+    str string,
+    mp  MAP<STRING,STRING>,
+    lst ARRAY<STRING>,
+    strct STRUCT<A:STRING,B:STRING>
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':';
+
+CREATE TABLE parquet_create (
+    id int,
+    str string,
+    mp  MAP<STRING,STRING>,
+    lst ARRAY<STRING>,
+    strct STRUCT<A:STRING,B:STRING>
+) STORED AS PARQUET;
+
+DESCRIBE FORMATTED parquet_create;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_create.txt' OVERWRITE INTO TABLE parquet_create_staging;
+
+SELECT * FROM parquet_create_staging;
+
+INSERT OVERWRITE TABLE parquet_create SELECT * FROM parquet_create_staging;
+
+SELECT * FROM parquet_create group by id;
+SELECT id, count(0) FROM parquet_create group by id;
+SELECT str from parquet_create;
+SELECT mp from parquet_create;
+SELECT lst from parquet_create;
+SELECT strct from parquet_create;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_ctas.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_ctas.q
new file mode 100644
index 0000000000000..652aef1b2ba53
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_ctas.q
@@ -0,0 +1,24 @@
+drop table staging;
+drop table parquet_ctas;
+drop table parquet_ctas_advanced;
+drop table parquet_ctas_alias;
+drop table parquet_ctas_mixed;
+
+create table staging (key int, value string) stored as textfile;
+insert into table staging select * from src order by key limit 10;
+
+create table parquet_ctas stored as parquet as select * from staging;
+describe parquet_ctas;
+select * from parquet_ctas;
+
+create table parquet_ctas_advanced stored as parquet as select key+1,concat(value,"value") from staging;
+describe parquet_ctas_advanced;
+select * from parquet_ctas_advanced;
+
+create table parquet_ctas_alias stored as parquet as select key+1 as mykey,concat(value,"value") as myvalue from staging;
+describe parquet_ctas_alias;
+select * from parquet_ctas_alias;
+
+create table parquet_ctas_mixed stored as parquet as select key,key+1,concat(value,"value") as myvalue from staging;
+describe parquet_ctas_mixed;
+select * from parquet_ctas_mixed;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_partitioned.q
new file mode 100644
index 0000000000000..5d4f68ea43723
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_partitioned.q
@@ -0,0 +1,34 @@
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.dynamic.partition=true;
+
+DROP TABLE parquet_partitioned_staging;
+DROP TABLE parquet_partitioned;
+
+CREATE TABLE parquet_partitioned_staging (
+    id int,
+    str string,
+    part string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+CREATE TABLE parquet_partitioned (
+    id int,
+    str string
+) PARTITIONED BY (part string)
+STORED AS PARQUET;
+
+DESCRIBE FORMATTED parquet_partitioned;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_partitioned_staging;
+
+SELECT * FROM parquet_partitioned_staging;
+
+INSERT OVERWRITE TABLE parquet_partitioned PARTITION (part) SELECT * FROM parquet_partitioned_staging;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SELECT * FROM parquet_partitioned ORDER BY id, str;
+SELECT part, COUNT(0) FROM parquet_partitioned GROUP BY part;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+SELECT * FROM parquet_partitioned ORDER BY id, str;
+SELECT part, COUNT(0) FROM parquet_partitioned GROUP BY part;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_types.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_types.q
new file mode 100644
index 0000000000000..5d6333c934b74
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_types.q
@@ -0,0 +1,38 @@
+DROP TABLE parquet_types_staging;
+DROP TABLE parquet_types;
+
+CREATE TABLE parquet_types_staging (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+CREATE TABLE parquet_types (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) STORED AS PARQUET;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging;
+
+INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging;
+
+SELECT * FROM parquet_types;
+
+SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  AVG(cfloat),
+  STDDEV_POP(cdouble)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partcols1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partcols1.q
index b7f8c64d4261f..03a5760e690e9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partcols1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partcols1.q
@@ -1,7 +1,7 @@
 
 create table test1(col1 string) partitioned by (partitionId int);
 insert overwrite table test1 partition (partitionId=1)
-  select key from src limit 10;
+  select key from src tablesample (10 rows);
 
  FROM (
  FROM test1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date.q
index 8738afdfa099a..70a7b252154d1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date.q
@@ -1,45 +1,58 @@
 drop table partition_date_1;
 
-create table partition_date_1 (key string, value string) partitioned by (dt date, region int);
+create table partition_date_1 (key string, value string) partitioned by (dt date, region string);
+
+insert overwrite table partition_date_1 partition(dt='2000-01-01', region= '1')
+  select * from src tablesample (10 rows);
+insert overwrite table partition_date_1 partition(dt='2000-01-01', region= '2')
+  select * from src tablesample (5 rows);
+insert overwrite table partition_date_1 partition(dt='2013-12-10', region= '2020-20-20')
+  select * from src tablesample (5 rows);
+insert overwrite table partition_date_1 partition(dt='2013-08-08', region= '1') 
+  select * from src tablesample (20 rows);
+insert overwrite table partition_date_1 partition(dt='2013-08-08', region= '10') 
+  select * from src tablesample (11 rows);
 
-insert overwrite table partition_date_1 partition(dt='2000-01-01', region=1) 
-  select * from src limit 10;
-insert overwrite table partition_date_1 partition(dt='2000-01-01', region=2) 
-  select * from src limit 5;
-insert overwrite table partition_date_1 partition(dt='2013-08-08', region=1) 
-  select * from src limit 20;
-insert overwrite table partition_date_1 partition(dt='2013-08-08', region=10) 
-  select * from src limit 11;
 
 select distinct dt from partition_date_1;
-select * from partition_date_1 where dt = '2000-01-01' and region = 2 order by key,value;
+select * from partition_date_1 where dt = '2000-01-01' and region = '2' order by key,value;
 
 -- 15
 select count(*) from partition_date_1 where dt = date '2000-01-01';
 -- 15.  Also try with string value in predicate
 select count(*) from partition_date_1 where dt = '2000-01-01';
 -- 5
-select count(*) from partition_date_1 where dt = date '2000-01-01' and region = 2;
+select count(*) from partition_date_1 where dt = date '2000-01-01' and region = '2';
 -- 11
-select count(*) from partition_date_1 where dt = date '2013-08-08' and region = 10;
+select count(*) from partition_date_1 where dt = date '2013-08-08' and region = '10';
 -- 30
-select count(*) from partition_date_1 where region = 1;
+select count(*) from partition_date_1 where region = '1';
 -- 0
-select count(*) from partition_date_1 where dt = date '2000-01-01' and region = 3;
+select count(*) from partition_date_1 where dt = date '2000-01-01' and region = '3';
 -- 0
 select count(*) from partition_date_1 where dt = date '1999-01-01';
 
 -- Try other comparison operations
 
 -- 20
-select count(*) from partition_date_1 where dt > date '2000-01-01' and region = 1;
+select count(*) from partition_date_1 where dt > date '2000-01-01' and region = '1';
 -- 10
-select count(*) from partition_date_1 where dt < date '2000-01-02' and region = 1;
+select count(*) from partition_date_1 where dt < date '2000-01-02' and region = '1';
 -- 20
-select count(*) from partition_date_1 where dt >= date '2000-01-02' and region = 1;
+select count(*) from partition_date_1 where dt >= date '2000-01-02' and region = '1';
 -- 10
-select count(*) from partition_date_1 where dt <= date '2000-01-01' and region = 1;
+select count(*) from partition_date_1 where dt <= date '2000-01-01' and region = '1';
 -- 20
-select count(*) from partition_date_1 where dt <> date '2000-01-01' and region = 1;
+select count(*) from partition_date_1 where dt <> date '2000-01-01' and region = '1';
+-- 10
+select count(*) from partition_date_1 where dt between date '1999-12-30' and date '2000-01-03' and region = '1';
+
+
+-- Try a string key with date-like strings
+
+-- 5
+select count(*) from partition_date_1 where region = '2020-20-20';
+-- 5
+select count(*) from partition_date_1 where region > '2010-01-01';
 
 drop table partition_date_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date2.q
index 9b84b59608503..c932ed1023637 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date2.q
@@ -3,7 +3,7 @@ drop table partition_date2_1;
 create table partition_date2_1 (key string, value string) partitioned by (dt date, region int);
 
 -- test date literal syntax
-from (select * from src limit 1) x
+from (select * from src tablesample (1 rows)) x
 insert overwrite table partition_date2_1 partition(dt=date '2000-01-01', region=1) select *
 insert overwrite table partition_date2_1 partition(dt=date '2000-01-01', region=2) select *
 insert overwrite table partition_date2_1 partition(dt=date '1999-01-01', region=2) select *;
@@ -13,7 +13,7 @@ select * from partition_date2_1;
 
 -- insert overwrite
 insert overwrite table partition_date2_1 partition(dt=date '2000-01-01', region=2) 
-  select 'changed_key', 'changed_value' from src limit 2;
+  select 'changed_key', 'changed_value' from src tablesample (2 rows);
 select * from partition_date2_1;
 
 -- truncate
@@ -41,7 +41,7 @@ alter table partition_date2_1 partition(dt=date '1980-01-02', region=3)
 describe extended partition_date2_1  partition(dt=date '1980-01-02', region=3);
 
 insert overwrite table partition_date2_1 partition(dt=date '1980-01-02', region=3)
-  select * from src limit 2;
+  select * from src tablesample (2 rows);
 select * from partition_date2_1 order by key,value,dt,region;
 
 -- alter table set location
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_decode_name.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_decode_name.q
index ba193cd51a26d..a8381a4200f0a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_decode_name.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_decode_name.q
@@ -1,9 +1,9 @@
 create table sc as select * 
-from (select '2011-01-11', '2011-01-11+14:18:26' from src limit 1 
+from (select '2011-01-11', '2011-01-11+14:18:26' from src tablesample (1 rows)
       union all 
-      select '2011-01-11', '2011-01-11+15:18:26' from src limit 1 
+      select '2011-01-11', '2011-01-11+15:18:26' from src tablesample (1 rows)
       union all 
-      select '2011-01-11', '2011-01-11+16:18:26' from src limit 1 ) s;
+      select '2011-01-11', '2011-01-11+16:18:26' from src tablesample (1 rows) ) s;
 
 create table sc_part (key string) partitioned by (ts string) stored as rcfile;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_special_char.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_special_char.q
index 81344334dfe01..b0b1ff4db6074 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_special_char.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_special_char.q
@@ -1,9 +1,9 @@
 create table sc as select * 
-from (select '2011-01-11', '2011-01-11+14:18:26' from src limit 1 
+from (select '2011-01-11', '2011-01-11+14:18:26' from src tablesample (1 rows)
       union all 
-      select '2011-01-11', '2011-01-11+15:18:26' from src limit 1 
+      select '2011-01-11', '2011-01-11+15:18:26' from src tablesample (1 rows)
       union all 
-      select '2011-01-11', '2011-01-11+16:18:26' from src limit 1 ) s;
+      select '2011-01-11', '2011-01-11+16:18:26' from src tablesample (1 rows) ) s;
 
 create table sc_part (key string) partitioned by (ts string) stored as rcfile;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_type_check.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_type_check.q
index 7f1accadac6ea..c9bca99b9cdf6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_type_check.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_type_check.q
@@ -2,14 +2,14 @@ set hive.typecheck.on.insert = true;
 
 -- begin part(string, string) pass(string, int)
 CREATE TABLE tab1 (id1 int,id2 string) PARTITIONED BY(month string,day string) stored as textfile;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day=2);
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day=2);
 
 select * from tab1;
 drop table tab1;
 
 -- begin part(string, int) pass(string, string)
 CREATE TABLE tab1 (id1 int,id2 string) PARTITIONED BY(month string,day int) stored as textfile;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='2');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='2');
 
 select * from tab1;
 drop table tab1;
@@ -17,7 +17,7 @@ drop table tab1;
 -- begin part(string, date) pass(string, date)
 create table tab1 (id1 int, id2 string) PARTITIONED BY(month string,day date) stored as textfile;
 alter table tab1 add partition (month='June', day='2008-01-01');
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='2008-01-01');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='2008-01-01');
 
 select id1, id2, day from tab1 where day='2008-01-01';
 drop table tab1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar1.q
index d700b1cbf8566..22aadd3b5359f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar1.q
@@ -3,13 +3,13 @@ drop table partition_varchar_1;
 create table partition_varchar_1 (key string, value varchar(20)) partitioned by (dt varchar(10), region int);
 
 insert overwrite table partition_varchar_1 partition(dt='2000-01-01', region=1)
-  select * from src limit 10;
+  select * from src tablesample (10 rows);
 insert overwrite table partition_varchar_1 partition(dt='2000-01-01', region=2)
-  select * from src limit 5;
+  select * from src tablesample (5 rows);
 insert overwrite table partition_varchar_1 partition(dt='2013-08-08', region=1)
-  select * from src limit 20;
+  select * from src tablesample (20 rows);
 insert overwrite table partition_varchar_1 partition(dt='2013-08-08', region=10)
-  select * from src limit 11;
+  select * from src tablesample (11 rows);
 
 select distinct dt from partition_varchar_1;
 select * from partition_varchar_1 where dt = '2000-01-01' and region = 2 order by key,value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar2.q
new file mode 100644
index 0000000000000..92cb742f15011
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar2.q
@@ -0,0 +1,10 @@
+drop table partition_varchar_2;
+
+create table partition_varchar_2 (key string, value varchar(20)) partitioned by (dt varchar(15), region int);
+
+insert overwrite table partition_varchar_2 partition(dt='2000-01-01', region=1)
+  select * from src order by key limit 1;
+
+select * from partition_varchar_2 where cast(dt as varchar(10)) = '2000-01-01';
+
+drop table partition_varchar_2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat17.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat17.q
index e9b574c1ca380..3cf488fb0337f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat17.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat17.q
@@ -3,9 +3,9 @@
 -- CustomSerDe(1, 2, 3) irrespective of the inserted values
 
 DROP TABLE PW17;
-ADD JAR ../build/ql/test/test-serdes.jar;
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-custom-serde/${system:hive.version}/hive-it-custom-serde-${system:hive.version}.jar;
 CREATE TABLE PW17(USER STRING, COMPLEXDT ARRAY<INT>) PARTITIONED BY (YEAR STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe1';
-LOAD DATA LOCAL INPATH '../data/files/pw17.txt' INTO TABLE PW17 PARTITION (YEAR='1');
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW17 PARTITION (YEAR='1');
 ALTER TABLE PW17 PARTITION(YEAR='1') SET SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe2';
 ALTER TABLE PW17 SET SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe1';
 -- Without the fix HIVE-5199, will throw cast exception via FetchOperator
@@ -14,13 +14,13 @@ SELECT * FROM PW17;
 -- Test for non-parititioned table. 
 DROP TABLE PW17_2;
 CREATE TABLE PW17_2(USER STRING, COMPLEXDT ARRAY<INT>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe1';
-LOAD DATA LOCAL INPATH '../data/files/pw17.txt' INTO TABLE PW17_2;
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW17_2;
 -- Without the fix HIVE-5199, will throw cast exception via MapOperator
 SELECT COUNT(*) FROM PW17_2;
 
 DROP TABLE PW17_3;
 CREATE TABLE PW17_3(USER STRING, COMPLEXDT ARRAY<ARRAY<INT> >) PARTITIONED BY (YEAR STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe3';
-LOAD DATA LOCAL INPATH '../data/files/pw17.txt' INTO TABLE PW17_3 PARTITION (YEAR='1');
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW17_3 PARTITION (YEAR='1');
 ALTER TABLE PW17_3 PARTITION(YEAR='1') SET SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe2';
 ALTER TABLE PW17_3 SET SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe3';
 -- Without the fix HIVE-5285, will throw cast exception via FetchOperator
@@ -28,7 +28,7 @@ SELECT * FROM PW17;
 
 DROP TABLE PW17_4;
 CREATE TABLE PW17_4(USER STRING, COMPLEXDT ARRAY<ARRAY<INT> >) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe3';
-LOAD DATA LOCAL INPATH '../data/files/pw17.txt' INTO TABLE PW17_4;
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW17_4;
 -- Without the fix HIVE-5285, will throw cast exception via MapOperator
 SELECT COUNT(*) FROM PW17_4;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat18.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat18.q
new file mode 100644
index 0000000000000..40ed2585f5122
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat18.q
@@ -0,0 +1,19 @@
+-- HIVE-5202 : Tests for SettableUnionObjectInspectors
+-- CustomSerDe(4,5) are used here. 
+-- The final results should be all NULL columns deserialized using 
+-- CustomSerDe(4, 5) irrespective of the inserted values
+
+DROP TABLE PW18;
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-custom-serde/${system:hive.version}/hive-it-custom-serde-${system:hive.version}.jar;
+CREATE TABLE PW18(USER STRING, COMPLEXDT UNIONTYPE<INT, DOUBLE>) PARTITIONED BY (YEAR STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe5';
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW18 PARTITION (YEAR='1');
+ALTER TABLE PW18 PARTITION(YEAR='1') SET SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe4';
+-- Without the fix HIVE-5202, will throw unsupported data type exception.
+SELECT * FROM PW18;
+
+-- Test for non-parititioned table. 
+DROP TABLE PW18_2;
+CREATE TABLE PW18_2(USER STRING, COMPLEXDT UNIONTYPE<INT, DOUBLE>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe5';
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW18_2;
+-- Without the fix HIVE-5202, will throw unsupported data type exception
+SELECT COUNT(*) FROM PW18_2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/pcr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/pcr.q
index 09a39ae4e4476..3be0ff23b8124 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/pcr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/pcr.q
@@ -127,7 +127,7 @@ create table ab(strct struct<a:int, b:string>)
 row format delimited
   fields terminated by '\t'
   collection items terminated by '\001';
-load data local inpath '../data/files/kv1.txt'
+load data local inpath '../../data/files/kv1.txt'
 overwrite into table ab;
 
 -- Create partitioned table with struct data:
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_join4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_join4.q
new file mode 100644
index 0000000000000..475d45c19ff0c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_join4.q
@@ -0,0 +1,22 @@
+create table dual(a string);
+
+set hive.optimize.ppd=true;
+drop table if exists test_tbl ;
+
+create table test_tbl (id string,name string);
+
+insert into table test_tbl
+select 'a','b' from dual;
+
+explain
+select t2.* 
+from
+(select id,name from (select id,name from test_tbl) t1 sort by id) t2
+join test_tbl t3 on (t2.id=t3.id )
+where t2.name='c' and t3.id='a';
+
+select t2.* 
+from
+(select id,name from (select id,name from test_tbl) t1 sort by id) t2
+join test_tbl t3 on (t2.id=t3.id )
+where t2.name='c' and t3.id='a';
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_multi_insert.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_multi_insert.q
index a802df1b98819..06fe7ce580c84 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_multi_insert.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_multi_insert.q
@@ -10,18 +10,18 @@ FROM src a JOIN src b ON (a.key = b.key)
 INSERT OVERWRITE TABLE mi1 SELECT a.* WHERE a.key < 100
 INSERT OVERWRITE TABLE mi2 SELECT a.key, a.value WHERE a.key >= 100 and a.key < 200
 INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHERE a.key >= 200 and a.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
 
 FROM src a JOIN src b ON (a.key = b.key)
 INSERT OVERWRITE TABLE mi1 SELECT a.* WHERE a.key < 100
 INSERT OVERWRITE TABLE mi2 SELECT a.key, a.value WHERE a.key >= 100 and a.key < 200
 INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHERE a.key >= 200 and a.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
 
 SELECT mi1.* FROM mi1;
 SELECT mi2.* FROM mi2;
 SELECT mi3.* FROM mi3;
-dfs -cat ../build/ql/test/data/warehouse/mi4.out/*;
+dfs -cat ${system:test.warehouse.dir}/mi4.out/*;
 
 
 set hive.ppd.remove.duplicatefilters=true;
@@ -31,15 +31,15 @@ FROM src a JOIN src b ON (a.key = b.key)
 INSERT OVERWRITE TABLE mi1 SELECT a.* WHERE a.key < 100
 INSERT OVERWRITE TABLE mi2 SELECT a.key, a.value WHERE a.key >= 100 and a.key < 200
 INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHERE a.key >= 200 and a.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
 
 FROM src a JOIN src b ON (a.key = b.key)
 INSERT OVERWRITE TABLE mi1 SELECT a.* WHERE a.key < 100
 INSERT OVERWRITE TABLE mi2 SELECT a.key, a.value WHERE a.key >= 100 and a.key < 200
 INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHERE a.key >= 200 and a.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
 
 SELECT mi1.* FROM mi1;
 SELECT mi2.* FROM mi2;
 SELECT mi3.* FROM mi3;
-dfs -cat ../build/ql/test/data/warehouse/mi4.out/*;
+dfs -cat ${system:test.warehouse.dir}/mi4.out/*;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_transform.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_transform.q
index 65a498d021f77..530ef9c4d849a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_transform.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_transform.q
@@ -36,3 +36,12 @@ FROM (
   CLUSTER BY tkey 
 ) tmap
 SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100;
+
+-- test described in HIVE-4598
+
+EXPLAIN
+FROM (
+    FROM ( SELECT * FROM src ) mapout REDUCE * USING 'cat' AS x,y
+) reduced
+insert overwrite local directory '/tmp/a' select * where x='a' or x='b'
+insert overwrite local directory '/tmp/b' select * where x='c' or x='d';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_udtf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_udtf.q
new file mode 100644
index 0000000000000..d90532cfa4bcd
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_udtf.q
@@ -0,0 +1,12 @@
+explain
+SELECT value from (
+  select explode(array(key, value)) as (value) from (
+    select * FROM src WHERE key > 400
+  ) A
+) B WHERE value < 450;
+
+SELECT value from (
+  select explode(array(key, value)) as (value) from (
+    select * FROM src WHERE key > 400
+  ) A
+) B WHERE value < 450;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_union_view.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_union_view.q
index d635e2d171469..a7606c5a5f2a5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_union_view.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_union_view.q
@@ -5,26 +5,26 @@ drop view v;
 create table t1_new (key string, value string) partitioned by (ds string);
 
 insert overwrite table t1_new partition (ds = '2011-10-15')
-select 'key1', 'value1' from src limit 1;
+select 'key1', 'value1' from src tablesample (1 rows);
 
 insert overwrite table t1_new partition (ds = '2011-10-16')
-select 'key2', 'value2' from src limit 1;
+select 'key2', 'value2' from src tablesample (1 rows);
 
 create table t1_old (keymap string, value string) partitioned by (ds string);
 
 insert overwrite table t1_old partition (ds = '2011-10-13')
-select 'keymap3', 'value3' from src limit 1;
+select 'keymap3', 'value3' from src tablesample (1 rows);
 
 insert overwrite table t1_old partition (ds = '2011-10-14')
-select 'keymap4', 'value4' from src limit 1;
+select 'keymap4', 'value4' from src tablesample (1 rows);
 
 create table t1_mapping (key string, keymap string) partitioned by (ds string);
 
 insert overwrite table t1_mapping partition (ds = '2011-10-13')
-select 'key3', 'keymap3' from src limit 1;
+select 'key3', 'keymap3' from src tablesample (1 rows);
 
 insert overwrite table t1_mapping partition (ds = '2011-10-14')
-select 'key4', 'keymap4' from src limit 1;
+select 'key4', 'keymap4' from src tablesample (1 rows);
 
 
 create view t1 partitioned on (ds) as
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown.q
index 860dd631ce10a..440005fdee951 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 create table ppr_test (key string) partitioned by (ds string);
 
 alter table ppr_test add partition (ds = '1234');
@@ -9,14 +11,14 @@ alter table ppr_test add partition (ds = '12:4');
 alter table ppr_test add partition (ds = '12%4');
 alter table ppr_test add partition (ds = '12*4');
 
-insert overwrite table ppr_test partition(ds = '1234') select * from (select '1234' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '1224') select * from (select '1224' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '1214') select * from (select '1214' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '12+4') select * from (select '12+4' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '12.4') select * from (select '12.4' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '12:4') select * from (select '12:4' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '12%4') select * from (select '12%4' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '12*4') select * from (select '12*4' from src limit 1 union all select 'abcd' from src limit 1) s;
+insert overwrite table ppr_test partition(ds = '1234') select * from (select '1234' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '1224') select * from (select '1224' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '1214') select * from (select '1214' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '12+4') select * from (select '12+4' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '12.4') select * from (select '12.4' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '12:4') select * from (select '12:4' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '12%4') select * from (select '12%4' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '12*4') select * from (select '12*4' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
 
 
 select * from ppr_test where ds = '1234' order by key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown2.q
index 67c0da0dfc591..8c6090653811b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown2.q
@@ -1,24 +1,26 @@
+set hive.fetch.task.conversion=more;
+
 create table ppr_test (key string) partitioned by (ds string);
 
-insert overwrite table ppr_test partition(ds='2') select '2' from src limit 1;
-insert overwrite table ppr_test partition(ds='22') select '22' from src limit 1;
+insert overwrite table ppr_test partition(ds='2') select '2' from src tablesample (1 rows);
+insert overwrite table ppr_test partition(ds='22') select '22' from src tablesample (1 rows);
 
 select * from ppr_test where ds = '2';
 select * from ppr_test where ds = '22';
 
 
 create table ppr_test2 (key string) partitioned by (ds string, s string);
-insert overwrite table ppr_test2 partition(ds='1', s='2') select '1' from src limit 1;
-insert overwrite table ppr_test2 partition(ds='2', s='1') select '2' from src limit 1;
+insert overwrite table ppr_test2 partition(ds='1', s='2') select '1' from src tablesample (1 rows);
+insert overwrite table ppr_test2 partition(ds='2', s='1') select '2' from src tablesample (1 rows);
 
 select * from ppr_test2 where s = '1';
 select * from ppr_test2 where ds = '1';
 
 
 create table ppr_test3 (key string) partitioned by (col string, ol string, l string);
-insert overwrite table ppr_test3 partition(col='1', ol='2', l = '3') select '1' from src limit 1;
-insert overwrite table ppr_test3 partition(col='1', ol='1', l = '2') select '2' from src limit 1;
-insert overwrite table ppr_test3 partition(col='1', ol='2', l = '1') select '3' from src limit 1;
+insert overwrite table ppr_test3 partition(col='1', ol='2', l = '3') select '1' from src tablesample (1 rows);
+insert overwrite table ppr_test3 partition(col='1', ol='1', l = '2') select '2' from src tablesample (1 rows);
+insert overwrite table ppr_test3 partition(col='1', ol='2', l = '1') select '3' from src tablesample (1 rows);
 
 select * from ppr_test3 where l = '1';
 select * from ppr_test3 where l = '2';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/progress_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/progress_1.q
index ad908a02ad181..22ee92634d123 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/progress_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/progress_1.q
@@ -2,7 +2,7 @@ set hive.heartbeat.interval=5;
 
 
 CREATE TABLE PROGRESS_1(key int, value string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv6.txt' INTO TABLE PROGRESS_1;
+LOAD DATA LOCAL INPATH '../../data/files/kv6.txt' INTO TABLE PROGRESS_1;
 
 select count(1) from PROGRESS_1 t1 join PROGRESS_1 t2 on t1.key=t2.key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf.q
index eea5415d682a1..d56b4123554c4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 --1. test1
 select p_mfgr, p_name, p_size,
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_decimal.q
index 03f435e4539e2..9799534ff4a55 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_decimal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_decimal.q
@@ -9,11 +9,11 @@ CREATE TABLE part(
     p_type STRING,
     p_size INT,
     p_container STRING,
-    p_retailprice DECIMAL,
+    p_retailprice DECIMAL(6,2),
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- 1. aggregate functions with decimal type
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_general_queries.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_general_queries.q
index 885c3b3d43e17..4fe9710d0d247 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_general_queries.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_general_queries.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- 1. testNoPTFNoWindowing
 select p_mfgr, p_name, p_size
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_matchpath.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_matchpath.q
index 72eeb104d5298..0cde350f73693 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_matchpath.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_matchpath.q
@@ -10,7 +10,7 @@ ARR_DELAY float,
 FL_NUM string 
 );
 
-LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny;
+LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny;
 
 -- 1. basic Matchpath test
 select origin_city_name, fl_num, year, month, day_of_month, sz, tpath 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_rcfile.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_rcfile.q
index 535a233a9e3c4..a68c578848dcb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_rcfile.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_rcfile.q
@@ -12,7 +12,7 @@ CREATE TABLE part_rc(
     p_comment STRING
 )  STORED AS RCFILE ;
 
-LOAD DATA LOCAL INPATH '../data/files/part.rc' overwrite into table part_rc;
+LOAD DATA LOCAL INPATH '../../data/files/part.rc' overwrite into table part_rc;
 
 -- testWindowingPTFWithPartRC
 select p_mfgr, p_name, p_size, 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_register_tblfn.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_register_tblfn.q
index a2140cd049f2d..4b508e9df0449 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_register_tblfn.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_register_tblfn.q
@@ -10,7 +10,7 @@ ARR_DELAY float,
 FL_NUM string 
 );
 
-LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny;
+LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny;
 
 create temporary function matchpathtest as 'org.apache.hadoop.hive.ql.udf.ptf.MatchPath$MatchPathResolver';
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_seqfile.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_seqfile.q
index 4aa8ce11bec9c..c5d65f0efa211 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_seqfile.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_seqfile.q
@@ -12,7 +12,7 @@ CREATE TABLE part_seq(
     p_comment STRING
 ) STORED AS SEQUENCEFILE ;
 
-LOAD DATA LOCAL INPATH '../data/files/part.seq' overwrite into table part_seq;
+LOAD DATA LOCAL INPATH '../../data/files/part.seq' overwrite into table part_seq;
 
 -- testWindowingPTFWithPartSeqFile
 select p_mfgr, p_name, p_size, 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
index f198baa6e4c5b..57e8cc673cace 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
@@ -1,3 +1,5 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
 
 DROP TABLE lineitem;
 CREATE TABLE lineitem (L_ORDERKEY      INT,
@@ -19,7 +21,7 @@ CREATE TABLE lineitem (L_ORDERKEY      INT,
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '|';
 
-LOAD DATA LOCAL INPATH '../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
 
 CREATE INDEX lineitem_lshipdate_idx ON TABLE lineitem(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)");
 ALTER INDEX lineitem_lshipdate_idx ON lineitem REBUILD;
@@ -156,7 +158,7 @@ DROP INDEX tbl_part_index on tblpart;
 DROP TABLE tblpart;
 
 CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'; 
-LOAD DATA LOCAL INPATH '../data/files/tbl.txt' OVERWRITE INTO TABLE tbl;
+LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl;
 
 CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)");
 ALTER INDEX tbl_key_idx ON tbl REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quote2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quote2.q
index 65b9f8776d192..c93902ab3e39c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quote2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quote2.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 SELECT
     'abc',        "abc",
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_alter.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_alter.q
new file mode 100644
index 0000000000000..a34a25af4bb96
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_alter.q
@@ -0,0 +1,21 @@
+
+set hive.support.quoted.identifiers=column;
+
+create table src_b3(`x+1` string, `!@#$%^&*()_q` string) ;
+
+alter table src_b3 
+clustered by (`!@#$%^&*()_q`) sorted by (`!@#$%^&*()_q`) into 2 buckets
+;
+
+
+-- alter partition
+create table src_p3(`x+1` string, `y&y` string) partitioned by (`!@#$%^&*()_q` string);
+
+insert overwrite table src_p3 partition(`!@#$%^&*()_q`='a') select * from src;
+show partitions src_p3;
+
+alter table src_p3 add if not exists partition(`!@#$%^&*()_q`='b');
+show partitions src_p3;
+
+alter table src_p3 partition(`!@#$%^&*()_q`='b') rename to partition(`!@#$%^&*()_q`='c');
+show partitions src_p3;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_basic.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_basic.q
new file mode 100644
index 0000000000000..680868e549cee
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_basic.q
@@ -0,0 +1,34 @@
+
+set hive.support.quoted.identifiers=column;
+
+-- basic
+create table t1(`x+1` string, `y&y` string, `!@#$%^&*()_q` string);
+describe t1;
+select `x+1`, `y&y`, `!@#$%^&*()_q` from t1;
+explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1;
+explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1 where `!@#$%^&*()_q` = '1';
+explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1 where `!@#$%^&*()_q` = '1' group by `x+1`, `y&y`, `!@#$%^&*()_q` having `!@#$%^&*()_q` = '1';
+explain select `x+1`, `y&y`, `!@#$%^&*()_q`, rank() over(partition by `!@#$%^&*()_q` order by  `y&y`)  
+from t1 where `!@#$%^&*()_q` = '1' group by `x+1`, `y&y`, `!@#$%^&*()_q` having `!@#$%^&*()_q` = '1';
+
+-- case insensitive
+explain select `X+1`, `Y&y`, `!@#$%^&*()_Q`, rank() over(partition by `!@#$%^&*()_q` order by  `y&y`)  
+from t1 where `!@#$%^&*()_q` = '1' group by `x+1`, `y&Y`, `!@#$%^&*()_q` having `!@#$%^&*()_Q` = '1';
+
+
+-- escaped back ticks
+create table t4(`x+1``` string, `y&y` string);
+describe t4;
+insert into table t4 select * from src;
+select `x+1```, `y&y`, rank() over(partition by `x+1``` order by  `y&y`)  
+from t4 where `x+1``` = '10' group by `x+1```, `y&y` having `x+1``` = '10';
+
+-- view
+create view v1 as 
+select `x+1```, `y&y`
+from t4 where `x+1``` < '200';
+
+select `x+1```, `y&y`, rank() over(partition by `x+1``` order by  `y&y`)
+from v1
+group by `x+1```, `y&y`
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_partition.q
new file mode 100644
index 0000000000000..e9416ae282228
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_partition.q
@@ -0,0 +1,24 @@
+
+set hive.support.quoted.identifiers=column;
+
+
+create table src_p(`x+1` string, `y&y` string) partitioned by (`!@#$%^&*()_q` string);
+insert overwrite table src_p partition(`!@#$%^&*()_q`='a') select * from src;
+
+show partitions src_p;
+
+explain select `x+1`, `y&y`, `!@#$%^&*()_q` 
+from src_p where `!@#$%^&*()_q` = 'a' and `x+1`='10'
+group by `x+1`, `y&y`, `!@#$%^&*()_q` having `!@#$%^&*()_q` = 'a'
+;
+
+set hive.exec.dynamic.partition.mode=nonstrict
+;
+
+create table src_p2(`x+1` string) partitioned by (`!@#$%^&*()_q` string);
+
+insert overwrite table src_p2 partition(`!@#$%^&*()_q`)
+select key, value as `!@#$%^&*()_q` from src where key < '200'
+;
+
+show partitions src_p2;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_skew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_skew.q
new file mode 100644
index 0000000000000..5c959674117f1
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_skew.q
@@ -0,0 +1,26 @@
+
+set hive.support.quoted.identifiers=column;
+
+set hive.mapred.supports.subdirectories=true;
+set hive.internal.ddl.list.bucketing.enable=true;
+set hive.optimize.skewjoin.compiletime = true;
+
+CREATE TABLE T1(`!@#$%^&*()_q` string, `y&y` string)
+SKEWED BY (`!@#$%^&*()_q`) ON ((2)) STORED AS TEXTFILE
+;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(`!@#$%^&*()_q` string, `y&y` string)
+SKEWED BY (`!@#$%^&*()_q`) ON ((2)) STORED AS TEXTFILE
+;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T2;
+
+-- a simple join query with skew on both the tables on the join key
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a. `!@#$%^&*()_q`  = b. `!@#$%^&*()_q` 
+;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_smb.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_smb.q
new file mode 100644
index 0000000000000..38d1b99c4b8e6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_smb.q
@@ -0,0 +1,34 @@
+
+set hive.support.quoted.identifiers=column;
+
+
+set hive.enforce.bucketing = true;  
+set hive.enforce.sorting = true;  
+create table src_b(`x+1` string, `!@#$%^&*()_q` string)  
+clustered by (`!@#$%^&*()_q`) sorted by (`!@#$%^&*()_q`) into 2 buckets
+;
+
+insert overwrite table src_b
+select * from src
+;
+
+create table src_b2(`x+1` string, `!@#$%^&*()_q` string)  
+clustered by (`!@#$%^&*()_q`) sorted by (`!@#$%^&*()_q`) into 2 buckets
+;
+
+insert overwrite table src_b2
+select * from src
+;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+
+select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
+from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
+where a.`x+1` < '11'
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_tblproperty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_tblproperty.q
new file mode 100644
index 0000000000000..d64e9cb9d524d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_tblproperty.q
@@ -0,0 +1,8 @@
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
+
+CREATE TABLE xyz(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' 
+STORED AS TEXTFILE
+TBLPROPERTIES('columns'='valid_colname,invalid.colname')
+;
+
+describe xyz;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/rcfile_bigdata.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/rcfile_bigdata.q
index 3e83e6693b276..df460c89aa896 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/rcfile_bigdata.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/rcfile_bigdata.q
@@ -1,7 +1,7 @@
 set hive.map.aggr.hash.percentmemory = 0.3;
 set hive.mapred.local.mem = 256;
 
-add file ../data/scripts/dumpdata_script.py;
+add file ../../data/scripts/dumpdata_script.py;
 
 CREATE table columnTable_Bigdata (key STRING, value STRING)
 ROW FORMAT SERDE
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/regex_col.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/regex_col.q
index 9cfcee5a599c6..1c311fc478203 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/regex_col.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/regex_col.q
@@ -1,3 +1,5 @@
+set hive.support.quoted.identifiers=none;
+
 EXPLAIN
 SELECT * FROM srcpart;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/remote_script.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/remote_script.q
index 926601c647bc3..c4fcaaf95b6a1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/remote_script.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/remote_script.q
@@ -1,4 +1,4 @@
-dfs -put ../data/scripts/newline.py /newline.py;
+dfs -put ../../data/scripts/newline.py /newline.py;
 add file hdfs:///newline.py;
 set hive.transform.escape.input=true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/repair.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/repair.q
index 8d04d3e991c59..df199b0d7765b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/repair.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/repair.q
@@ -1,10 +1,12 @@
+DROP TABLE IF EXISTS repairtable;
+
 CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING);
 
 MSCK TABLE repairtable;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/warehouse/repairtable/p1=a/p2=a;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/warehouse/repairtable/p1=b/p2=a;
-dfs -touchz ../build/ql/test/data/warehouse/repairtable/p1=b/p2=a/datafile;
+dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=a/p2=a;
+dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=b/p2=a;
+dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=b/p2=a/datafile;
 
 MSCK TABLE repairtable;
 
@@ -12,4 +14,4 @@ MSCK REPAIR TABLE repairtable;
 
 MSCK TABLE repairtable;
 
-
+DROP TABLE repairtable;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/root_dir_external_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/root_dir_external_table.q
new file mode 100644
index 0000000000000..a0514c86ff211
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/root_dir_external_table.q
@@ -0,0 +1,11 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_root_dir_external_table;
+
+insert overwrite directory "hdfs:///tmp/test_root_dir_external_table" select key from src where (key < 20) order by key;
+
+dfs -cp /tmp/test_root_dir_external_table/000000_0 /000000_0;
+dfs -rmr hdfs:///tmp/test_root_dir_external_table;
+
+create external table roottable (key string) row format delimited fields terminated by '\\t' stored as textfile location 'hdfs:///';
+select count(*) from roottable;
+
+dfs -rmr /000000_0;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/schemeAuthority2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/schemeAuthority2.q
index ecd4d13d0e23b..b3c38bf577199 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/schemeAuthority2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/schemeAuthority2.q
@@ -1,5 +1,5 @@
-dfs -mkdir file:///tmp/test;
-dfs -mkdir hdfs:///tmp/test;
+dfs ${system:test.dfs.mkdir} file:///tmp/test;
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test;
 
 create external table dynPart (key string) partitioned by (value string, value2 string) row format delimited fields terminated by '\\t' stored as textfile;
 insert overwrite local directory "/tmp/test" select key from src where (key = 10) order by key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1.q
index 4f65016f3081c..2dfb12951f9d3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1.q
@@ -1,7 +1,9 @@
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+-- EXCLUDE_OS_WINDOWS
 CREATE TABLE dest1(key INT, value STRING);
 
-ADD FILE src/test/scripts/testgrep;
+ADD FILE ../../ql/src/test/scripts/testgrep;
 
 FROM (
   FROM src
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1_win.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1_win.q
new file mode 100644
index 0000000000000..0008ae51c4365
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1_win.q
@@ -0,0 +1,16 @@
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+-- INCLUDE_OS_WINDOWS
+
+CREATE TABLE dest1(key INT, value STRING);
+
+ADD FILE src/test/scripts/testgrep_win.bat;
+
+FROM (
+  FROM src
+  SELECT TRANSFORM(src.key, src.value)
+         USING 'testgrep_win.bat' AS (tkey, tvalue)
+  CLUSTER BY tkey
+) tmap
+INSERT OVERWRITE TABLE dest1 SELECT tmap.tkey, tmap.tvalue;
+
+SELECT dest1.* FROM dest1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/select_dummy_source.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/select_dummy_source.q
new file mode 100644
index 0000000000000..25a1a81283221
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/select_dummy_source.q
@@ -0,0 +1,33 @@
+explain
+select 'a', 100;
+select 'a', 100;
+
+--evaluation
+explain
+select 1 + 1;
+select 1 + 1;
+
+-- explode (not possible for lateral view)
+explain
+select explode(array('a', 'b'));
+select explode(array('a', 'b'));
+
+set hive.fetch.task.conversion=more;
+
+explain
+select 'a', 100;
+select 'a', 100;
+
+explain
+select 1 + 1;
+select 1 + 1;
+
+explain
+select explode(array('a', 'b'));
+select explode(array('a', 'b'));
+
+-- subquery
+explain
+select 2 + 3,x from (select 1 + 2 x) X;
+select 2 + 3,x from (select 1 + 2 x) X;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/serde_regex.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/serde_regex.q
index 2a287bd877759..accdb54744cc1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/serde_regex.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/serde_regex.q
@@ -31,8 +31,8 @@ WITH SERDEPROPERTIES (
 )
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH "../data/files/apache.access.log" INTO TABLE serde_regex;
-LOAD DATA LOCAL INPATH "../data/files/apache.access.2.log" INTO TABLE serde_regex;
+LOAD DATA LOCAL INPATH "../../data/files/apache.access.log" INTO TABLE serde_regex;
+LOAD DATA LOCAL INPATH "../../data/files/apache.access.2.log" INTO TABLE serde_regex;
 
 SELECT * FROM serde_regex ORDER BY time;
 
@@ -42,7 +42,7 @@ DROP TABLE serde_regex;
 
 EXPLAIN
 CREATE TABLE serde_regex1(
-  key decimal,
+  key decimal(38,18),
   value int)
 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
 WITH SERDEPROPERTIES (
@@ -51,7 +51,7 @@ WITH SERDEPROPERTIES (
 STORED AS TEXTFILE;
 
 CREATE TABLE serde_regex1(
-  key decimal,
+  key decimal(38,18),
   value int)
 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
 WITH SERDEPROPERTIES (
@@ -59,7 +59,7 @@ WITH SERDEPROPERTIES (
 )
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH "../data/files/kv7.txt" INTO TABLE serde_regex1;
+LOAD DATA LOCAL INPATH "../../data/files/kv7.txt" INTO TABLE serde_regex1;
 
 SELECT key, value FROM serde_regex1 ORDER BY key, value;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/set_processor_namespaces.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/set_processor_namespaces.q
index 7e3d1f4d8aa41..d10239c31af62 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/set_processor_namespaces.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/set_processor_namespaces.q
@@ -24,7 +24,7 @@ set b=a;
 set c=${hiveconf:${hiveconf:b}};
 set c;
 
-set jar=${system:build.ivy.lib.dir}/default/derby-${system:derby.version}.jar;
+set jar=${system:maven.local.repository}/org/apache/derby/derby/${system:derby.version}/derby-${system:derby.version}.jar;
 
 add file ${hiveconf:jar};
 delete file ${hiveconf:jar};
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_edge_cases.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_edge_cases.q
index 5fcdf97e2db47..9758c16caa5ad 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_edge_cases.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_edge_cases.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 DROP TABLE show_idx_empty;
 DROP TABLE show_idx_full;
 
@@ -24,4 +25,4 @@ SHOW INDEXES ON show_idx_empty;
 DROP INDEX idx_1 on show_idx_full;
 DROP INDEX idx_2 on show_idx_full;
 DROP TABLE show_idx_empty;
-DROP TABLE show_idx_full;
\ No newline at end of file
+DROP TABLE show_idx_full;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_syntax.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_syntax.q
index ab588937e179b..bb43c5e1387a9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_syntax.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_syntax.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 DROP TABLE show_idx_t1;
 
 CREATE TABLE show_idx_t1(KEY STRING, VALUE STRING);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_partitions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_partitions.q
index 7fa7b828bd72e..1fc1d8e1f2a8b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_partitions.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_partitions.q
@@ -1,4 +1,5 @@
 SHOW PARTITIONS srcpart;
+SHOW PARTITIONS default.srcpart;
 SHOW PARTITIONS srcpart PARTITION(hr='11');
 SHOW PARTITIONS srcpart PARTITION(ds='2008-04-08');
-SHOW PARTITIONS srcpart PARTITION(ds='2008-04-08', hr='12');
\ No newline at end of file
+SHOW PARTITIONS srcpart PARTITION(ds='2008-04-08', hr='12');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_roles.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_roles.q
new file mode 100644
index 0000000000000..d8ce96a37d7a8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_roles.q
@@ -0,0 +1,4 @@
+create role role1;
+create role role2;
+
+show roles;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_tablestatus.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_tablestatus.q
index 9184d6da897c0..55fb7b67ffc83 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_tablestatus.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_tablestatus.q
@@ -1,3 +1,4 @@
+set hive.support.quoted.identifiers=none;
 EXPLAIN 
 SHOW TABLE EXTENDED IN default LIKE `src`;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin.q
index ad917beeef9ce..47535eab638d9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin.q
@@ -13,10 +13,10 @@ CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T4;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4;
 
 
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_noskew.q
new file mode 100644
index 0000000000000..b8ca592ab70a1
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_noskew.q
@@ -0,0 +1,9 @@
+set hive.auto.convert.join=false;
+set hive.optimize.skewjoin=true;
+
+explain
+create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30;
+
+create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30;
+
+select * from noskew;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q
index 03eab4cd6d54e..fc07742cd7422 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q
@@ -20,12 +20,12 @@ set mapred.input.dir.recursive=true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- a simple join query with skew on both the tables on the join key
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q
index 9cb919531f7c5..50cfc61962af9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q
@@ -12,16 +12,16 @@ set mapred.input.dir.recursive=true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 -- This is to test the union->selectstar->filesink and skewjoin optimization
 -- Union of 3 map-reduce subqueries is performed for the skew join
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt1.q
index af446bb65c08e..504ba8be2a29e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt1.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- a simple join query with skew on both the tables on the join key
 -- adding a order by at the end to make the results deterministic
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt10.q
index 199f3201afb77..f35af901704ef 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt10.q
@@ -4,7 +4,7 @@ set hive.optimize.skewjoin.compiletime = true;
 
 CREATE TABLE T1(key STRING, value STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 drop table array_valued_T1;
 create table array_valued_T1 (key string, value array<string>) SKEWED BY (key) ON ((8));
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt11.q
index ef61fb22f17a1..9e00bdcd76080 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt11.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt11.q
@@ -5,11 +5,11 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)	
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;	
        
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;	
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;	
      
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;	
        
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;	
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;	
      
 -- This test is to verify the skew join compile optimization when the join is followed
 -- by a union. Both sides of a union consist of a join, which should have used
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt12.q
index b5d9d9bc46874..171995069b77b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt12.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt12.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- Both the join tables are skewed by 2 keys, and one of the skewed values
 -- is common to both the tables. The join key matches the skewed key set.
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt13.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt13.q
index 0634c4f4ff6db..5ef217c90064a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt13.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt13.q
@@ -4,16 +4,16 @@ set hive.optimize.skewjoin.compiletime = true;
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 CREATE TABLE T3(key STRING, val STRING)
 SKEWED BY (val) ON ((12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 -- This test is for skewed join compile time optimization for more than 2 tables.
 -- The join key for table 3 is different from the join key used for joining
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt14.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt14.q
index 0f031dd4fc0ae..df1a26bcc7d9d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt14.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt14.q
@@ -5,16 +5,16 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 CREATE TABLE T3(key STRING, val STRING)
 SKEWED BY (val) ON ((12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 -- This test is for skewed join compile time optimization for more than 2 tables.
 -- The join key for table 3 is different from the join key used for joining
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt15.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt15.q
index d5474a455e9dd..1db5472396db1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt15.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt15.q
@@ -4,7 +4,7 @@ set hive.optimize.skewjoin.compiletime = true;
 
 CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE tmpT1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1;
 
 -- testing skew on other data types - int
 CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2));
@@ -12,7 +12,7 @@ INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1;
 
 CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE tmpT2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2;
 
 CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3));
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt16.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt16.q
index 46b4f6d6e600d..915de612ded53 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt16.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt16.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- One of the tables is skewed by 2 columns, and the other table is
 -- skewed by one column. Ths join is performed on the both the columns
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt17.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt17.q
index 0592ca8c3e498..2ee79cc758531 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt17.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt17.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- One of the tables is skewed by 2 columns, and the other table is
 -- skewed by one column. Ths join is performed on the first skewed column
@@ -31,12 +31,12 @@ DROP TABLE T2;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- One of the tables is skewed by 2 columns, and the other table is
 -- skewed by one column. Ths join is performed on the both the columns
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt18.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt18.q
index 433fea336dfd4..9d06cc030699a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt18.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt18.q
@@ -4,7 +4,7 @@ set hive.optimize.skewjoin.compiletime = true;
 
 CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE tmpT1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1;
 
 -- testing skew on other data types - int
 CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2));
@@ -16,7 +16,7 @@ INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1;
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- Once HIVE-3445 is fixed, the compile time skew join optimization would be
 -- applicable here. Till the above jira is fixed, it would be performed as a
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt19.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt19.q
index 0b11ebe4cb696..075645f89d452 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt19.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt19.q
@@ -6,11 +6,11 @@ CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) INTO 4 BUCKETS
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- add a test where the skewed key is also the bucketized key
 -- it should not matter, and the compile time skewed join
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt2.q
index 34fcdbfac4cb7..f7acaad18e1ea 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt2.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2), (7)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- a simple query with skew on both the tables on the join key
 -- multiple skew values are present for the skewed keys
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt20.q
index f217052881e24..9b908ce21b1d2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt20.q
@@ -6,11 +6,11 @@ CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- add a test where the skewed key is also the bucketized/sorted key
 -- it should not matter, and the compile time skewed join
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt3.q
index f6002ad49802a..22ea4f06218ac 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt3.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- a simple query with skew on both the tables. One of the skewed
 -- value is common to both the tables. The skewed value should not be
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt4.q
index ca83c446085fa..8496b1aa79c0a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt4.q
@@ -5,11 +5,11 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- only of the tables of the join (the left table of the join) is skewed
 -- the skewed filter would still be applied to both the tables
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt5.q
index 3d7884c5e3dcb..152de5bde72c5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt5.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- One of the tables is skewed by 2 columns, and the other table is
 -- skewed by one column. Ths join is performed on the first skewed column
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt6.q
index 36cf8ceeaebb4..2e261bde66bbf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt6.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt6.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- Both the join tables are skewed by 2 keys, and one of the skewed values
 -- is common to both the tables. The join key is a subset of the skewed key set:
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt7.q
index cf84f67b6a0fc..e4d9605f6f7af 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt7.q
@@ -5,16 +5,16 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 -- This test is for validating skewed join compile time optimization for more than
 -- 2 tables. The join key is the same, and so a 3-way join would be performed.
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt8.q
index d0ac845f86581..85746d9611dab 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt8.q
@@ -4,16 +4,16 @@ set hive.optimize.skewjoin.compiletime = true;
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 -- This test is for validating skewed join compile time optimization for more than
 -- 2 tables. The join key is the same, and so a 3-way join would be performed.
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt9.q
index 04834033a11e6..889ab6c3f5534 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt9.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt9.q
@@ -5,11 +5,11 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- no skew join compile time optimization would be performed if one of the
 -- join sources is a sub-query consisting of a union all
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_1.q
index 359513e424dba..9dee4110f5991 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_1.q
@@ -6,9 +6,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_10.q
index a79ebf62d0693..1fbe2090eaf2a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_10.q
@@ -6,11 +6,11 @@ alter table tmp_smb_bucket_10 add partition (ds = '2');
 
 -- add dummy files to make sure that the number of files in each partition is same as number of buckets
  
-load data local inpath '../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1');
-load data local inpath '../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1');
+load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1');
+load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1');
 
-load data local inpath '../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
-load data local inpath '../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
+load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
+load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_2.q
index 9d86314879d68..e2b24333ad416 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_2.q
@@ -6,9 +6,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
  
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_25.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_25.q
index 8b534e85aee1d..e43174bc0768b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_25.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_25.q
@@ -10,9 +10,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
 
 explain 
 select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_3.q
index 73b21fae250e0..b379706cc8ac5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_3.q
@@ -6,9 +6,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_4.q
index 83143b170ed5c..2b3f67ea4eea1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_4.q
@@ -6,9 +6,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_5.q
index 61ec084f64ffa..406604e621ad0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_5.q
@@ -6,9 +6,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_7.q
index 1488b1f949527..ca1c7491b7298 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_7.q
@@ -15,8 +15,8 @@ create table smb_join_results(k1 int, v1 string, k2 int, v2 string);
 create table smb_join_results_empty_bigtable(k1 int, v1 string, k2 int, v2 string);
 create table normal_join_results(k1 int, v1 string, k2 int, v2 string);
 
-load data local inpath '../data/files/empty1.txt' into table smb_bucket4_1;
-load data local inpath '../data/files/empty2.txt' into table smb_bucket4_1;
+load data local inpath '../../data/files/empty1.txt' into table smb_bucket4_1;
+load data local inpath '../../data/files/empty2.txt' into table smb_bucket4_1;
 
 insert overwrite table smb_bucket4_2
 select * from src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_8.q
index 6f282ed441bfa..f296057d43e38 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_8.q
@@ -5,7 +5,7 @@ set hive.exec.reducers.max = 1;
 
 
 create table smb_bucket_input (key int, value string) stored as rcfile;
-load data local inpath '../data/files/smb_bucket_input.rc' into table smb_bucket_input;
+load data local inpath '../../data/files/smb_bucket_input.rc' into table smb_bucket_input;
 
 
 CREATE TABLE smb_bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/source.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/source.q
index 6fe3d211a00b0..76ca152ef55f4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/source.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/source.q
@@ -1 +1 @@
-source ../data/files/source.txt;
+source ../../data/files/source.txt;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/split.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/split.q
new file mode 100644
index 0000000000000..f5d7ff8fdd9bf
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/split.q
@@ -0,0 +1,8 @@
+DROP TABLE tmp_jo_tab_test;
+CREATE table tmp_jo_tab_test (message_line STRING)
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/input.txt'
+OVERWRITE INTO TABLE tmp_jo_tab_test;
+
+select size(split(message_line, '\t')) from tmp_jo_tab_test;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats1.q
index 0b783de153b29..359d27b31523a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats1.q
@@ -26,5 +26,5 @@ DESCRIBE FORMATTED tmptable;
 -- Load a file into a existing table
 -- Some stats (numFiles, totalSize) should be updated correctly
 -- Some other stats (numRows, rawDataSize) should be cleared
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE tmptable;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE tmptable;
 DESCRIBE FORMATTED tmptable;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats11.q
index 6618c913ea700..d037c003b7582 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats11.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats11.q
@@ -2,25 +2,25 @@ set datanucleus.cache.collections=false;
 set hive.stats.autogather=true;
 
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
 explain
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 desc formatted srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 desc formatted srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 desc formatted srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 desc formatted srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats18.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats18.q
index 425de64c26e83..e773cd749403f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats18.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats18.q
@@ -13,7 +13,7 @@ insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select
 -- Some other stats (numRows, rawDataSize) should be cleared
 desc formatted stats_part partition (ds='2010-04-08', hr='13');
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE stats_part partition (ds='2010-04-08', hr='13');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE stats_part partition (ds='2010-04-08', hr='13');
 
 desc formatted stats_part partition (ds='2010-04-08', hr='13');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats19.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats19.q
index da4af9655d16a..51514bd7738ff 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats19.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats19.q
@@ -1,7 +1,7 @@
 set datanucleus.cache.collections=false;
 set hive.stats.autogather=true;
 set hive.stats.reliable=true;
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.KeyVerifyingStatsAggregator;
 
@@ -56,7 +56,7 @@ insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select
 desc formatted stats_part partition (ds='2010-04-08', hr = '13');
 
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.KeyVerifyingStatsAggregator;
 set hive.stats.key.prefix.max.length=0;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats3.q
index 5962348d9c317..fd7e0eaca8c39 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats3.q
@@ -5,9 +5,9 @@ drop table hive_test_dst;
 
 create table hive_test_src ( col1 string ) stored as textfile ;
 explain extended
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src ;
 
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src ;
 
 desc formatted hive_test_src;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats4.q
index 62580042d4ded..80a67f405cb0b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats4.q
@@ -25,8 +25,8 @@ insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, v
 show partitions nzhang_part1;
 show partitions nzhang_part2;
 
-select * from nzhang_part1 where ds is not null and hr is not null;
-select * from nzhang_part2 where ds is not null and hr is not null;
+select * from nzhang_part1 where ds is not null and hr is not null order by ds, hr, key;
+select * from nzhang_part2 where ds is not null and hr is not null order by ds, hr, key;
 
 describe formatted nzhang_part1 partition(ds='2008-04-08',hr=11);
 describe formatted nzhang_part1 partition(ds='2008-04-08',hr=12);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_aggregator_error_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_aggregator_error_1.q
index 4e7d3dc547a30..5e6b0aaa1253b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_aggregator_error_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_aggregator_error_1.q
@@ -1,12 +1,12 @@
 -- In this test, there is a dummy stats aggregator which throws an error when various
--- methods are called (as indicated by the parameter hive.test.dummystats.agregator)
+-- methods are called (as indicated by the parameter hive.test.dummystats.aggregator)
 -- Since stats need not be reliable (by setting hive.stats.reliable to false), the 
 -- insert statements succeed. The insert statement succeeds even if the stats aggregator
 -- is set to null, since stats need not be reliable.
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.DummyStatsAggregator;
 set hive.stats.reliable=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter.q
new file mode 100644
index 0000000000000..3c1f132a68f2f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter.q
@@ -0,0 +1,16 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=false;
+
+-- by analyze
+create table dummy1 as select * from src;
+
+analyze table dummy1 compute statistics;
+desc formatted dummy1;
+
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+
+-- by autogather
+create table dummy2 as select * from src;
+
+desc formatted dummy2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter_partitioned.q
new file mode 100644
index 0000000000000..e1274c0cb5197
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter_partitioned.q
@@ -0,0 +1,45 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+-- partitioned table analyze 
+
+create table dummy (key string, value string) partitioned by (ds string, hr string);
+
+load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12');
+load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11');
+
+analyze table dummy partition (ds,hr) compute statistics;
+describe formatted dummy partition (ds='2008', hr='11');
+describe formatted dummy partition (ds='2008', hr='12');
+
+drop table dummy;
+
+-- static partitioned table on insert
+
+create table dummy (key string, value string) partitioned by (ds string, hr string);
+
+insert overwrite table dummy partition (ds='10',hr='11') select * from src;
+insert overwrite table dummy partition (ds='10',hr='12') select * from src;
+
+describe formatted dummy partition (ds='10', hr='11');
+describe formatted dummy partition (ds='10', hr='12');
+
+drop table dummy;
+
+-- dynamic partitioned table on insert
+
+create table dummy (key int) partitioned by (hr int);
+                                                                                                      
+CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|';
+LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl;                           
+                                                                                                      
+insert overwrite table dummy partition (hr) select * from tbl;
+
+describe formatted dummy partition (hr=1997);
+describe formatted dummy partition (hr=1994);
+describe formatted dummy partition (hr=1998);
+describe formatted dummy partition (hr=1996);
+
+drop table tbl;
+drop table dummy; 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_invalidation.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_invalidation.q
new file mode 100644
index 0000000000000..a7fce6e3e503f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_invalidation.q
@@ -0,0 +1,15 @@
+set hive.stats.autogather=true;
+
+CREATE TABLE stats_invalid (key string, value string);
+
+insert overwrite table stats_invalid
+select * from src;
+
+analyze table stats_invalid compute statistics for columns key,value;
+
+desc formatted stats_invalid;
+alter table stats_invalid add  columns (new_col string);
+
+desc formatted stats_invalid;
+drop table stats_invalid;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_list_bucket.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_list_bucket.q
new file mode 100644
index 0000000000000..5982643741548
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_list_bucket.q
@@ -0,0 +1,45 @@
+
+set hive.mapred.supports.subdirectories=true;
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+drop table stats_list_bucket;
+drop table stats_list_bucket_1;
+
+create table stats_list_bucket (
+  c1 string,
+  c2 string
+) partitioned by (ds string, hr string)
+skewed by (c1, c2) on  (('466','val_466'),('287','val_287'),('82','val_82'))
+stored as directories
+stored as rcfile;
+
+set hive.stats.key.prefix.max.length=1;
+
+-- Make sure we use hashed IDs during stats publishing.
+-- Try partitioned table with list bucketing.
+-- The stats should show 500 rows loaded, as many rows as the src table has.
+
+insert overwrite table stats_list_bucket partition (ds = '2008-04-08',  hr = '11')
+  select key, value from src;
+
+desc formatted stats_list_bucket partition (ds = '2008-04-08',  hr = '11');
+
+-- Also try non-partitioned table with list bucketing.
+-- Stats should show the same number of rows.
+
+create table stats_list_bucket_1 (
+  c1 string,
+  c2 string
+)
+skewed by (c1, c2) on  (('466','val_466'),('287','val_287'),('82','val_82'))
+stored as directories
+stored as rcfile;
+
+insert overwrite table stats_list_bucket_1
+  select key, value from src;
+
+desc formatted stats_list_bucket_1;
+
+drop table stats_list_bucket;
+drop table stats_list_bucket_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_noscan_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_noscan_2.q
index c934fb2051258..b106b30476c00 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_noscan_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_noscan_2.q
@@ -1,12 +1,12 @@
 
 -- test analyze table compute statistiscs [noscan] on external table 
 -- 1 test table
-CREATE EXTERNAL TABLE anaylyze_external (a INT) LOCATION '${system:test.src.data.dir}/files/ext_test';
+CREATE EXTERNAL TABLE anaylyze_external (a INT) LOCATION '${system:hive.root}/data/files/ext_test';
 SELECT * FROM anaylyze_external;
-analyze table anaylyze_external compute statistics;
-describe formatted anaylyze_external;
 analyze table anaylyze_external compute statistics noscan;
 describe formatted anaylyze_external;
+analyze table anaylyze_external compute statistics;
+describe formatted anaylyze_external;
 drop table anaylyze_external;
 
 -- 2 test partition
@@ -21,10 +21,10 @@ CREATE EXTERNAL TABLE anaylyze_external (key string, val string) partitioned by
 ALTER TABLE anaylyze_external ADD PARTITION (insertdate='2008-01-01') location 'pfile://${system:test.tmp.dir}/texternal/2008-01-01';
 select count(*) from anaylyze_external where insertdate='2008-01-01';
 -- analyze
-analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics;
-describe formatted anaylyze_external PARTITION (insertdate='2008-01-01');
 analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics noscan;
 describe formatted anaylyze_external PARTITION (insertdate='2008-01-01');
+analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics;
+describe formatted anaylyze_external PARTITION (insertdate='2008-01-01');
 dfs -rmr ${system:test.tmp.dir}/texternal;
 drop table anaylyze_external;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_only_null.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_only_null.q
new file mode 100644
index 0000000000000..b47bc48958c8d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_only_null.q
@@ -0,0 +1,41 @@
+set hive.stats.dbclass=fs;
+set hive.compute.query.using.stats=true;
+set hive.stats.autogather=true;
+CREATE TABLE temps_null(a double, b int, c STRING, d smallint) STORED AS TEXTFILE; 
+
+CREATE TABLE stats_null(a double, b int, c STRING, d smallint) STORED AS TEXTFILE; 
+
+CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) partitioned by (dt string) STORED AS TEXTFILE; 
+
+LOAD DATA LOCAL INPATH '../../data/files/null.txt' INTO TABLE temps_null;
+
+insert overwrite table stats_null select * from temps_null;
+insert into table stats_null_part partition(dt='2010') select * from temps_null where d <=5;
+
+insert into table stats_null_part partition(dt='2011') select * from temps_null where d > 5;
+explain 
+select count(*), count(a), count(b), count(c), count(d) from stats_null;
+explain 
+select count(*), count(a), count(b), count(c), count(d) from stats_null_part;
+
+
+analyze table stats_null compute statistics for columns a,b,c,d;
+analyze table stats_null_part partition(dt='2010') compute statistics for columns a,b,c,d;
+analyze table stats_null_part partition(dt='2011') compute statistics for columns a,b,c,d;
+
+describe formatted stats_null_part partition (dt='2010');
+describe formatted stats_null_part partition (dt='2011');
+
+explain 
+select count(*), count(a), count(b), count(c), count(d) from stats_null;
+explain 
+select count(*), count(a), count(b), count(c), count(d) from stats_null_part;
+
+
+select count(*), count(a), count(b), count(c), count(d) from stats_null;
+select count(*), count(a), count(b), count(c), count(d) from stats_null_part;
+drop table stats_null;
+drop table stats_null_part;
+drop table temps_null;
+set hive.compute.query.using.stats=false;
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_publisher_error_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_publisher_error_1.q
index 6d383f213d1be..513b8e75a0c5e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_publisher_error_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_publisher_error_1.q
@@ -6,7 +6,7 @@
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.DummyStatsAggregator;
 set hive.stats.reliable=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/statsfs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/statsfs.q
new file mode 100644
index 0000000000000..82a2295ac27b4
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/statsfs.q
@@ -0,0 +1,63 @@
+set hive.stats.dbclass=fs;
+
+-- stats computation on partitioned table with analyze command
+
+create table t1 (key string, value string) partitioned by (ds string);
+load data local inpath '../../data/files/kv1.txt' into table t1 partition (ds = '2010');
+load data local inpath '../../data/files/kv1.txt' into table t1 partition (ds = '2011');
+
+analyze table t1 partition (ds) compute statistics;
+
+describe formatted t1 partition (ds='2010');
+describe formatted t1 partition (ds='2011');
+
+drop table t1;
+
+-- stats computation on partitioned table with autogather on insert query
+
+create table t1 (key string, value string) partitioned by (ds string);
+
+insert into table t1 partition (ds='2010') select * from src;
+insert into table t1 partition (ds='2011') select * from src;
+
+describe formatted t1 partition (ds='2010');
+describe formatted t1 partition (ds='2011');
+
+drop table t1;
+
+-- analyze stmt on unpartitioned table
+
+create table t1 (key string, value string); 
+load data local inpath '../../data/files/kv1.txt' into table t1; 
+
+analyze table t1 compute statistics;
+
+describe formatted t1 ;
+
+drop table t1;
+
+-- stats computation on unpartitioned table with autogather on insert query
+
+create table t1 (key string, value string); 
+
+insert into table t1  select * from src;
+
+describe formatted t1 ;
+
+drop table t1;
+
+-- stats computation on partitioned table with autogather on insert query with dynamic partitioning
+
+
+create table t1 (key string, value string) partitioned by (ds string, hr string);
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+insert into table t1 partition (ds,hr) select * from srcpart;
+
+describe formatted t1 partition (ds='2008-04-08',hr='11');
+describe formatted t1 partition (ds='2008-04-09',hr='12');
+
+drop table t1;
+set hive.exec.dynamic.partition.mode=strict;
+
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/str_to_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/str_to_map.q
index c3b206bba6325..ae83407f84333 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/str_to_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/str_to_map.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 desc function str_to_map;
 desc function extended str_to_map;
 
@@ -19,7 +21,7 @@ limit 3;
 
 
 drop table tbl_s2m;
-create table tbl_s2m as select 'ABC=CC_333=444' as t from src limit 3;
+create table tbl_s2m as select 'ABC=CC_333=444' as t from src tablesample (3 rows);
 
 select str_to_map(t,'_','=')['333'] from tbl_s2m;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq.q
index 3fb1558a90f0b..14fa321c11c2f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq.q
@@ -2,12 +2,12 @@ EXPLAIN
 FROM (
   FROM src select src.* WHERE src.key < 100
 ) unioninput
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/union.out' SELECT unioninput.*;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.*;
 
 FROM (
   FROM src select src.* WHERE src.key < 100
 ) unioninput
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/union.out' SELECT unioninput.*;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.*;
 
-dfs -cat ../build/ql/test/data/warehouse/union.out/*;
+dfs -cat ${system:test.warehouse.dir}/union.out/*;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq_where_serialization.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq_where_serialization.q
new file mode 100644
index 0000000000000..1d539825424f0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq_where_serialization.q
@@ -0,0 +1,5 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask.size=10000000;
+explain select src.key from src where src.key in ( select distinct key from src);
+
+set hive.auto.convert.join=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_alias.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_alias.q
new file mode 100644
index 0000000000000..ffc33dc2cf852
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_alias.q
@@ -0,0 +1,16 @@
+EXPLAIN
+FROM (
+  FROM src select src.* WHERE src.key < 100
+) as unioninput
+INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.*;
+
+EXPLAIN
+SELECT * FROM
+( SELECT * FROM 
+   ( SELECT * FROM src as s ) as src1 
+) as src2;
+
+SELECT * FROM
+( SELECT * FROM 
+   ( SELECT * FROM src as s ) as src1 
+) as src2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists.q
new file mode 100644
index 0000000000000..f812e36070023
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists.q
@@ -0,0 +1,45 @@
+
+
+-- no agg, corr
+explain
+select * 
+from src b 
+where exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9'
+  )
+;
+
+select * 
+from src b 
+where exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9'
+  )
+;
+
+-- view test
+create view cv1 as 
+select * 
+from src b 
+where exists
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+;
+
+select * from cv1
+;
+
+-- sq in from
+select * 
+from (select * 
+      from src b 
+      where exists 
+          (select a.key 
+          from src a 
+          where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+     ) a
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists_having.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists_having.q
new file mode 100644
index 0000000000000..690aa10527a89
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists_having.q
@@ -0,0 +1,60 @@
+
+
+-- no agg, corr
+explain
+select b.key, count(*) 
+from src b 
+group by b.key
+having exists 
+  (select a.key 
+  from src a 
+  where a.key = b.key and a.value > 'val_9'
+  )
+;
+
+select b.key, count(*) 
+from src b 
+group by b.key
+having exists 
+  (select a.key 
+  from src a 
+  where a.key = b.key and a.value > 'val_9'
+  )
+;
+
+-- view test
+create view cv1 as 
+select b.key, count(*) as c
+from src b
+group by b.key
+having exists
+  (select a.key
+  from src a
+  where a.key = b.key and a.value > 'val_9'
+  )
+;
+
+select * from cv1;
+
+-- sq in from
+select *
+from (select b.key, count(*) 
+  from src b 
+  group by b.key
+  having exists 
+    (select a.key 
+    from src a 
+    where a.key = b.key and a.value > 'val_9'
+    )
+) a
+;
+
+-- join on agg
+select b.key, min(b.value)
+from src b
+group by b.key
+having exists ( select a.key
+                from src a
+                where a.value > 'val_9' and a.value = min(b.value)
+                )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in.q
new file mode 100644
index 0000000000000..69f40f9b8ca9a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in.q
@@ -0,0 +1,163 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+DROP TABLE lineitem;
+CREATE TABLE lineitem (L_ORDERKEY      INT,
+                                L_PARTKEY       INT,
+                                L_SUPPKEY       INT,
+                                L_LINENUMBER    INT,
+                                L_QUANTITY      DOUBLE,
+                                L_EXTENDEDPRICE DOUBLE,
+                                L_DISCOUNT      DOUBLE,
+                                L_TAX           DOUBLE,
+                                L_RETURNFLAG    STRING,
+                                L_LINESTATUS    STRING,
+                                l_shipdate      STRING,
+                                L_COMMITDATE    STRING,
+                                L_RECEIPTDATE   STRING,
+                                L_SHIPINSTRUCT  STRING,
+                                L_SHIPMODE      STRING,
+                                L_COMMENT       STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
+
+-- non agg, non corr
+explain
+ select * 
+from src 
+where src.key in (select key from src s1 where s1.key > '9')
+;
+
+select * 
+from src 
+where src.key in (select key from src s1 where s1.key > '9')
+order by key
+;
+
+-- non agg, corr
+explain 
+select * 
+from src b 
+where b.key in
+        (select a.key 
+         from src a 
+         where b.value = a.value and a.key > '9'
+        )
+;
+
+select * 
+from src b 
+where b.key in
+        (select a.key 
+         from src a 
+         where b.value = a.value and a.key > '9'
+        )
+order by b.key
+;
+
+-- agg, non corr
+explain
+select p_name, p_size 
+from 
+part where part.p_size in 
+	(select avg(p_size) 
+	 from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+	 where r <= 2
+	)
+;
+select p_name, p_size 
+from 
+part where part.p_size in 
+	(select avg(p_size) 
+	 from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+	 where r <= 2
+	)
+order by p_name
+;
+
+-- agg, corr
+explain
+select p_mfgr, p_name, p_size 
+from part b where b.p_size in 
+	(select min(p_size) 
+	 from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+	 where r <= 2 and b.p_mfgr = a.p_mfgr
+	)
+;
+
+select p_mfgr, p_name, p_size 
+from part b where b.p_size in 
+	(select min(p_size) 
+	 from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+	 where r <= 2 and b.p_mfgr = a.p_mfgr
+	)
+order by p_mfgr, p_name, p_size 
+;
+
+-- distinct, corr
+explain 
+select * 
+from src b 
+where b.key in
+        (select distinct a.key 
+         from src a 
+         where b.value = a.value and a.key > '9'
+        )
+;
+
+select * 
+from src b 
+where b.key in
+        (select distinct a.key 
+         from src a 
+         where b.value = a.value and a.key > '9'
+        )
+order by b.key
+;
+
+-- non agg, non corr, windowing
+select p_mfgr, p_name, p_size 
+from part 
+where part.p_size in 
+  (select first_value(p_size) over(partition by p_mfgr order by p_size) from part)
+order by p_mfgr, p_name, p_size 
+;
+
+-- non agg, non corr, with join in Parent Query
+explain
+select p.p_partkey, li.l_suppkey 
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
+;
+
+select p.p_partkey, li.l_suppkey 
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
+order by p.p_partkey, li.l_suppkey 
+;
+
+-- non agg, corr, with join in Parent Query
+select p.p_partkey, li.l_suppkey 
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+order by p.p_partkey, li.l_suppkey 
+;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in_having.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in_having.q
new file mode 100644
index 0000000000000..84045568f4501
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in_having.q
@@ -0,0 +1,104 @@
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+-- non agg, non corr
+explain
+ select key, count(*) 
+from src 
+group by key
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+;
+
+
+select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key order by s1.key;
+
+select key, count(*) 
+from src 
+group by key
+having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key )
+order by key
+;
+
+-- non agg, corr
+explain
+ select key, value, count(*) 
+from src b
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9'  and s1.value = b.value group by s1.key )
+;
+
+-- agg, non corr
+explain
+select p_mfgr, avg(p_size)
+from part b
+group by b.p_mfgr
+having b.p_mfgr in 
+   (select p_mfgr 
+    from part
+    group by p_mfgr
+    having max(p_size) - min(p_size) < 20
+   )
+;
+
+-- join on agg
+select b.key, min(b.value)
+from src b
+group by b.key
+having b.key in ( select a.key
+                from src a
+                where a.value > 'val_9' and a.value = min(b.value)
+                )
+order by b.key
+;
+
+-- where and having
+-- Plan is:
+-- Stage 1: b semijoin sq1:src (subquery in where)
+-- Stage 2: group by Stage 1 o/p
+-- Stage 5: group by on sq2:src (subquery in having)
+-- Stage 6: Stage 2 o/p semijoin Stage 5
+explain
+select key, value, count(*) 
+from src b
+where b.key in (select key from src where src.key > '8')
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+;
+
+set hive.auto.convert.join=true;
+-- Plan is:
+-- Stage  5: group by on sq2:src (subquery in having)
+-- Stage 10: hashtable for sq1:src (subquery in where)
+-- Stage  2: b map-side semijoin Stage 10 o/p
+-- Stage  3: Stage 2 semijoin Stage 5
+-- Stage  9: construct hastable for Stage 5 o/p
+-- Stage  6: Stage 2 map-side semijoin Stage 9
+explain
+select key, value, count(*) 
+from src b
+where b.key in (select key from src where src.key > '8')
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+;
+
+-- non agg, non corr, windowing
+explain
+select p_mfgr, p_name, avg(p_size) 
+from part 
+group by p_mfgr, p_name
+having p_name in 
+  (select first_value(p_name) over(partition by p_mfgr order by p_size) from part)
+;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_multiinsert.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_multiinsert.q
new file mode 100644
index 0000000000000..ed36d9ef6e961
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_multiinsert.q
@@ -0,0 +1,82 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
+
+CREATE TABLE src_4(
+  key STRING, 
+  value STRING
+)
+;
+
+CREATE TABLE src_5( 
+  key STRING, 
+  value STRING
+)
+;
+
+explain
+from src b 
+INSERT OVERWRITE TABLE src_4 
+  select * 
+  where b.key in 
+   (select a.key 
+    from src a 
+    where b.value = a.value and a.key > '9'
+   ) 
+INSERT OVERWRITE TABLE src_5 
+  select *  
+  where b.key not in  ( select key from src s1 where s1.key > '2') 
+  order by key 
+;
+
+from src b 
+INSERT OVERWRITE TABLE src_4 
+  select * 
+  where b.key in 
+   (select a.key 
+    from src a 
+    where b.value = a.value and a.key > '9'
+   ) 
+INSERT OVERWRITE TABLE src_5 
+  select *  
+  where b.key not in  ( select key from src s1 where s1.key > '2') 
+  order by key 
+;
+
+select * from src_4
+;
+select * from src_5
+;
+set hive.auto.convert.join=true;
+
+explain
+from src b 
+INSERT OVERWRITE TABLE src_4 
+  select * 
+  where b.key in 
+   (select a.key 
+    from src a 
+    where b.value = a.value and a.key > '9'
+   ) 
+INSERT OVERWRITE TABLE src_5 
+  select *  
+  where b.key not in  ( select key from src s1 where s1.key > '2') 
+  order by key 
+;
+
+from src b 
+INSERT OVERWRITE TABLE src_4 
+  select * 
+  where b.key in 
+   (select a.key 
+    from src a 
+    where b.value = a.value and a.key > '9'
+   ) 
+INSERT OVERWRITE TABLE src_5 
+  select *  
+  where b.key not in  ( select key from src s1 where s1.key > '2') 
+  order by key 
+;
+
+select * from src_4
+;
+select * from src_5
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists.q
new file mode 100644
index 0000000000000..43a801fa9683e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists.q
@@ -0,0 +1,41 @@
+
+
+-- no agg, corr
+explain
+select * 
+from src b 
+where not exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_2'
+  )
+;
+
+select * 
+from src b 
+where not exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_2'
+  )
+;
+
+-- distinct, corr
+explain
+select * 
+from src b 
+where not exists 
+  (select distinct a.key 
+  from src a 
+  where b.value = a.value and a.value > 'val_2'
+  )
+;
+
+select * 
+from src b 
+where not exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value and a.value > 'val_2'
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists_having.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists_having.q
new file mode 100644
index 0000000000000..7205d17bc4861
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists_having.q
@@ -0,0 +1,46 @@
+
+
+-- no agg, corr
+explain
+select * 
+from src b 
+group by key, value
+having not exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_12'
+  )
+;
+
+select * 
+from src b 
+group by key, value
+having not exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_12'
+  )
+;
+
+
+-- distinct, corr
+explain
+select * 
+from src b 
+group by key, value
+having not exists 
+  (select distinct a.key 
+  from src a 
+  where b.value = a.value and a.value > 'val_12'
+  )
+;
+
+select * 
+from src b 
+group by key, value
+having not exists 
+  (select distinct a.key 
+  from src a 
+  where b.value = a.value and a.value > 'val_12'
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin.q
new file mode 100644
index 0000000000000..d5f60860313da
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin.q
@@ -0,0 +1,143 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+DROP TABLE lineitem;
+CREATE TABLE lineitem (L_ORDERKEY      INT,
+                                L_PARTKEY       INT,
+                                L_SUPPKEY       INT,
+                                L_LINENUMBER    INT,
+                                L_QUANTITY      DOUBLE,
+                                L_EXTENDEDPRICE DOUBLE,
+                                L_DISCOUNT      DOUBLE,
+                                L_TAX           DOUBLE,
+                                L_RETURNFLAG    STRING,
+                                L_LINESTATUS    STRING,
+                                l_shipdate      STRING,
+                                L_COMMITDATE    STRING,
+                                L_RECEIPTDATE   STRING,
+                                L_SHIPINSTRUCT  STRING,
+                                L_SHIPMODE      STRING,
+                                L_COMMENT       STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
+
+-- non agg, non corr
+explain
+select * 
+from src 
+where src.key not in  
+  ( select key  from src s1 
+    where s1.key > '2'
+  )
+;
+
+select * 
+from src 
+where src.key not in  ( select key from src s1 where s1.key > '2')
+order by key
+;
+
+-- non agg, corr
+explain
+select p_mfgr, b.p_name, p_size 
+from part b 
+where b.p_name not in 
+  (select p_name 
+  from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2 and b.p_mfgr = a.p_mfgr 
+  )
+;
+
+select p_mfgr, b.p_name, p_size 
+from part b 
+where b.p_name not in 
+  (select p_name 
+  from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2 and b.p_mfgr = a.p_mfgr 
+  )
+order by p_mfgr, b.p_name
+;
+
+-- agg, non corr
+explain
+select p_name, p_size 
+from 
+part where part.p_size not in 
+  (select avg(p_size) 
+  from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2
+  )
+;
+select p_name, p_size 
+from 
+part where part.p_size not in 
+  (select avg(p_size) 
+  from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2
+  )
+order by p_name, p_size
+;
+
+-- agg, corr
+explain
+select p_mfgr, p_name, p_size 
+from part b where b.p_size not in 
+  (select min(p_size) 
+  from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2 and b.p_mfgr = a.p_mfgr
+  )
+;
+
+select p_mfgr, p_name, p_size 
+from part b where b.p_size not in 
+  (select min(p_size) 
+  from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2 and b.p_mfgr = a.p_mfgr
+  )
+order by p_mfgr, p_size
+;
+
+-- non agg, non corr, Group By in Parent Query
+select li.l_partkey, count(*) 
+from lineitem li 
+where li.l_linenumber = 1 and 
+  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') 
+group by li.l_partkey
+;
+
+-- alternate not in syntax
+select * 
+from src 
+where not src.key in  ( select key from src s1 where s1.key > '2')
+order by key
+;
+
+-- null check
+create view T1_v as 
+select key from src where key <'11';
+
+create view T2_v as 
+select case when key > '104' then null else key end as key from T1_v;
+
+explain
+select * 
+from T1_v where T1_v.key not in (select T2_v.key from T2_v);
+
+select * 
+from T1_v where T1_v.key not in (select T2_v.key from T2_v);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin_having.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin_having.q
new file mode 100644
index 0000000000000..a586f02272a73
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin_having.q
@@ -0,0 +1,74 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+-- non agg, non corr
+explain
+select key, count(*) 
+from src 
+group by key
+having key not in  
+  ( select key  from src s1 
+    where s1.key > '12'
+  )
+;
+
+-- non agg, corr
+explain
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a 
+  where min(p_retailprice) = l and r - l > 600
+  )
+;
+
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a 
+  where min(p_retailprice) = l and r - l > 600
+  )
+;
+
+-- agg, non corr
+explain
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from part a
+  group by p_mfgr
+  having max(p_retailprice) - min(p_retailprice) > 600
+  )
+;
+
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from part a
+  group by p_mfgr
+  having max(p_retailprice) - min(p_retailprice) > 600
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_unqualcolumnrefs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_unqualcolumnrefs.q
new file mode 100644
index 0000000000000..749435c005134
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_unqualcolumnrefs.q
@@ -0,0 +1,83 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+create table src11 (key1 string, value1 string);
+
+create table part2( 
+    p2_partkey INT,
+    p2_name STRING,
+    p2_mfgr STRING,
+    p2_brand STRING,
+    p2_type STRING,
+    p2_size INT,
+    p2_container STRING,
+    p2_retailprice DOUBLE,
+    p2_comment STRING
+);
+
+-- non agg, corr
+explain select * from src11 where src11.key1 in (select key from src where src11.value1 = value and key > '9');
+
+explain select * from src a where a.key in (select key from src where a.value = value and key > '9');
+
+-- agg, corr
+explain
+select p_mfgr, p_name, p_size 
+from part b where b.p_size in 
+  (select min(p2_size) 
+    from (select p2_mfgr, p2_size, rank() over(partition by p2_mfgr order by p2_size) as r from part2) a 
+    where r <= 2 and b.p_mfgr = p2_mfgr
+  )
+;
+
+
+explain
+select p_mfgr, p_name, p_size 
+from part b where b.p_size in 
+  (select min(p_size) 
+   from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+   where r <= 2 and b.p_mfgr = p_mfgr
+  )
+;
+
+-- distinct, corr
+explain 
+select * 
+from src b 
+where b.key in
+        (select distinct key 
+         from src 
+         where b.value = value and key > '9'
+        )
+;
+
+-- non agg, corr, having
+explain
+ select key, value, count(*) 
+from src b
+group by key, value
+having count(*) in (select count(*) from src where src.key > '9'  and src.value = b.value group by key )
+;
+
+-- non agg, corr
+explain
+select p_mfgr, b.p_name, p_size 
+from part b 
+where b.p_name not in 
+  (select p_name 
+  from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2 and b.p_mfgr = p_mfgr 
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_views.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_views.q
new file mode 100644
index 0000000000000..9f6712fc181ff
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_views.q
@@ -0,0 +1,48 @@
+
+
+-- exists test
+create view cv1 as 
+select * 
+from src b 
+where exists
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+;
+
+select * 
+from cv1 where cv1.key in (select key from cv1 c where c.key > '95') order by key;
+;
+
+
+-- not in test
+create view cv2 as 
+select * 
+from src b 
+where b.key not in
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_11'
+  )
+;
+
+select * 
+from cv2 where cv2.key in (select key from cv2 c where c.key < '11') order by key;
+;
+
+-- in where + having
+create view cv3 as
+select key, value, count(*) 
+from src b
+where b.key in (select key from src where src.key > '8')
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+;
+
+select * from cv3 order by key;
+
+
+-- join of subquery views
+select *
+from cv3
+where cv3.key in (select key from cv1) order by key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/symlink_text_input_format.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/symlink_text_input_format.q
index bb9d6f34ed8ec..d633b97f4c931 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/symlink_text_input_format.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/symlink_text_input_format.q
@@ -1,12 +1,12 @@
-
+DROP TABLE IF EXISTS symlink_text_input_format;
 
 EXPLAIN
 CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat';
 
 CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat';
 
-dfs -cp ../data/files/symlink1.txt ../build/ql/test/data/warehouse/symlink_text_input_format/symlink1.txt;
-dfs -cp ../data/files/symlink2.txt ../build/ql/test/data/warehouse/symlink_text_input_format/symlink2.txt;
+dfs -cp ../../data/files/symlink1.txt ${system:test.warehouse.dir}/symlink_text_input_format/symlink1.txt;
+dfs -cp ../../data/files/symlink2.txt ${system:test.warehouse.dir}/symlink_text_input_format/symlink2.txt;
 
 EXPLAIN SELECT * FROM symlink_text_input_format order by key, value;
 
@@ -20,5 +20,4 @@ EXPLAIN SELECT count(1) FROM symlink_text_input_format;
 
 SELECT count(1) FROM symlink_text_input_format;
 
-
-
+DROP TABLE symlink_text_input_format;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/table_access_keys_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/table_access_keys_stats.q
index 8b1a390149865..23209d85e4f15 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/table_access_keys_stats.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/table_access_keys_stats.q
@@ -4,7 +4,7 @@ SET hive.stats.collect.tablekeys=true;
 -- This test is used for testing the TableAccessAnalyzer
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/test_boolean_whereclause.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/test_boolean_whereclause.q
index d2da5ac174a15..a4f0fdb1c1f26 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/test_boolean_whereclause.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/test_boolean_whereclause.q
@@ -1,5 +1,5 @@
 create table if not exists test_boolean(dummy tinyint);
-insert overwrite table test_boolean  select 1 from src limit 1;
+insert overwrite table test_boolean  select 1 from src tablesample (1 rows);
 
 SELECT 1
 FROM (
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_dml.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_dml.q
new file mode 100644
index 0000000000000..87d251f40fd7a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_dml.q
@@ -0,0 +1,40 @@
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+-- CTAS
+EXPLAIN CREATE TABLE tmp_src AS SELECT * FROM (SELECT value, count(value) AS cnt FROM src GROUP BY value) f1 ORDER BY cnt;
+CREATE TABLE tmp_src AS SELECT * FROM (SELECT value, count(value) AS cnt FROM src GROUP BY value) f1 ORDER BY cnt;
+
+SELECT * FROM tmp_src;
+
+-- dyn partitions
+CREATE TABLE tmp_src_part (c string) PARTITIONED BY (d int);
+EXPLAIN INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src;
+INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src;
+
+SELECT * FROM tmp_src_part;
+
+-- multi insert
+CREATE TABLE even (c int, d string);
+CREATE TABLE odd (c int, d string);
+
+EXPLAIN
+FROM src
+INSERT INTO TABLE even SELECT key, value WHERE key % 2 = 0 
+INSERT INTO TABLE odd SELECT key, value WHERE key % 2 = 1;
+
+FROM src
+INSERT INTO TABLE even SELECT key, value WHERE key % 2 = 0 
+INSERT INTO TABLE odd SELECT key, value WHERE key % 2 = 1;
+
+SELECT * FROM even;
+SELECT * FROM odd;
+
+-- create empty table
+CREATE TABLE empty STORED AS orc AS SELECT * FROM tmp_src_part WHERE d = -1000;
+SELECT * FROM empty;
+
+-- drop the tables
+DROP TABLE even;
+DROP TABLE odd;
+DROP TABLE tmp_src;
+DROP TABLE tmp_src_part;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_fsstat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_fsstat.q
new file mode 100644
index 0000000000000..7f2e28fbafa8d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_fsstat.q
@@ -0,0 +1,19 @@
+set hive.execution.engine=tez;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE t1 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE t1 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE t1 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE t1 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE t1 partition(ds='2008-04-08');
+
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting = true;
+set hive.optimize.bucketingsorting=false;
+set hive.stats.dbclass=fs;
+
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from t1;
+describe formatted tab_part partition(ds='2008-04-08');
+
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_insert_overwrite_local_directory_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_insert_overwrite_local_directory_1.q
new file mode 100644
index 0000000000000..d7a652fb8c8bd
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_insert_overwrite_local_directory_1.q
@@ -0,0 +1,5 @@
+insert overwrite local directory '${system:test.tmp.dir}/tez_local_src_table_1'
+select * from src order by key limit 10 ;
+dfs -cat file:${system:test.tmp.dir}/tez_local_src_table_1/000000_0 ;
+
+dfs -rmr file:${system:test.tmp.dir}/tez_local_src_table_1/ ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_join_tests.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_join_tests.q
new file mode 100644
index 0000000000000..f309e3fe0eed6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_join_tests.q
@@ -0,0 +1,12 @@
+explain
+select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key;
+
+select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key;
+select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key)) x right outer join src c on (x.value = c.value) order by x.key;
+select * from src1 a left outer join src b on (a.key = b.key) right outer join src c on (a.value = c.value) order by a.key;
+select * from src1 a left outer join src b on (a.key = b.key) left outer join src c on (a.value = c.value) order by a.key;
+select * from src1 a left outer join src b on (a.key = b.key) join src c on (a.key = c.key);
+select * from src1 a join src b on (a.key = b.key) join src c on (a.key = c.key);
+
+select count(*) from src1 a join src b on (a.key = b.key) join src c on (a.key = c.key);
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_joins_explain.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_joins_explain.q
new file mode 100644
index 0000000000000..9193843824f6d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_joins_explain.q
@@ -0,0 +1,5 @@
+explain
+select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key;
+
+select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_schema_evolution.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_schema_evolution.q
new file mode 100644
index 0000000000000..2f1c73f8e528a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_schema_evolution.q
@@ -0,0 +1,14 @@
+create table test (key int, value string) partitioned by (p int) stored as textfile;
+
+insert into table test partition (p=1) select * from src limit 10;
+
+alter table test set fileformat orc;
+
+insert into table test partition (p=2) select * from src limit 10;
+
+describe test;
+
+select * from test where p=1 and key > 0;
+select * from test where p=2 and key > 0;
+select * from test where key > 0;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_union.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_union.q
new file mode 100644
index 0000000000000..f80d94c4a15fd
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_union.q
@@ -0,0 +1,94 @@
+set hive.auto.convert.join=true;
+
+explain 
+select s1.key as key, s1.value as value from src s1 join src s3 on s1.key=s3.key
+UNION  ALL  
+select s2.key as key, s2.value as value from src s2;
+
+create table ut as
+select s1.key as key, s1.value as value from src s1 join src s3 on s1.key=s3.key
+UNION  ALL  
+select s2.key as key, s2.value as value from src s2;
+
+select * from ut order by key, value limit 20;
+drop table ut;
+
+set hive.auto.convert.join=false;
+
+explain
+with u as (select * from src union all select * from src)
+select count(*) from (select u1.key as k1, u2.key as k2 from
+u as u1 join u as u2 on (u1.key = u2.key)) a;
+
+create table ut as
+with u as (select * from src union all select * from src)
+select count(*) as cnt from (select u1.key as k1, u2.key as k2 from
+u as u1 join u as u2 on (u1.key = u2.key)) a;
+
+select * from ut order by cnt limit 20;
+drop table ut;
+
+set hive.auto.convert.join=true;
+
+explain select s1.key as skey, u1.key as ukey from
+src s1
+join (select * from src union all select * from src) u1 on s1.key = u1.key;
+
+create table ut as
+select s1.key as skey, u1.key as ukey from
+src s1
+join (select * from src union all select * from src) u1 on s1.key = u1.key;
+
+select * from ut order by skey, ukey limit 20;
+drop table ut;
+
+explain select s1.key as skey, u1.key as ukey, s8.key as lkey from 
+src s1
+join (select s2.key as key from src s2 join src s3 on s2.key = s3.key
+      union all select s4.key from src s4 join src s5 on s4.key = s5.key
+      union all select s6.key from src s6 join src s7 on s6.key = s7.key) u1 on (s1.key = u1.key)
+join src s8 on (u1.key = s8.key)
+order by lkey;
+
+create table ut as
+select s1.key as skey, u1.key as ukey, s8.key as lkey from 
+src s1
+join (select s2.key as key from src s2 join src s3 on s2.key = s3.key
+      union all select s4.key from src s4 join src s5 on s4.key = s5.key
+      union all select s6.key from src s6 join src s7 on s6.key = s7.key) u1 on (s1.key = u1.key)
+join src s8 on (u1.key = s8.key)
+order by lkey;
+
+select * from ut order by skey, ukey, lkey limit 100;
+
+drop table ut;
+
+explain
+select s2.key as key from src s2 join src s3 on s2.key = s3.key
+union all select s4.key from src s4 join src s5 on s4.key = s5.key;
+
+create table ut as
+select s2.key as key from src s2 join src s3 on s2.key = s3.key
+union all select s4.key from src s4 join src s5 on s4.key = s5.key;
+
+select * from ut order by key limit 30;
+
+drop table ut;
+
+explain
+select * from
+(select * from src union all select * from src) u
+left outer join src s on u.key = s.key;
+
+explain
+select u.key as ukey, s.key as skey from
+(select * from src union all select * from src) u
+right outer join src s on u.key = s.key;
+
+create table ut as
+select u.key as ukey, s.key as skey from
+(select * from src union all select * from src) u
+right outer join src s on u.key = s.key;
+
+select * from ut order by ukey, skey limit 20;
+drop table ut;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_1.q
index f2c3b596af44a..ce79eefaae4d3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_1.q
@@ -1,10 +1,12 @@
+set hive.fetch.task.conversion=more;
+
 drop table timestamp_1;
 
 create table timestamp_1 (t timestamp);
 alter table timestamp_1 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
 
-insert overwrite table timestamp_1 
-  select cast('2011-01-01 01:01:01' as timestamp) from src limit 1;
+insert overwrite table timestamp_1
+  select cast('2011-01-01 01:01:01' as timestamp) from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
@@ -15,7 +17,7 @@ select cast(t as double) from timestamp_1 limit 1;
 select cast(t as string) from timestamp_1 limit 1;
 
 insert overwrite table timestamp_1
-  select '2011-01-01 01:01:01' from src limit 1;
+  select '2011-01-01 01:01:01' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
@@ -26,7 +28,7 @@ select cast(t as double) from timestamp_1 limit 1;
 select cast(t as string) from timestamp_1 limit 1;
 
 insert overwrite table timestamp_1
-  select '2011-01-01 01:01:01.1' from src limit 1;
+  select '2011-01-01 01:01:01.1' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
@@ -37,7 +39,7 @@ select cast(t as double) from timestamp_1 limit 1;
 select cast(t as string) from timestamp_1 limit 1;
 
 insert overwrite table timestamp_1
-  select '2011-01-01 01:01:01.0001' from src limit 1;
+  select '2011-01-01 01:01:01.0001' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
@@ -48,7 +50,7 @@ select cast(t as double) from timestamp_1 limit 1;
 select cast(t as string) from timestamp_1 limit 1;
 
 insert overwrite table timestamp_1
-  select '2011-01-01 01:01:01.000100000' from src limit 1;
+  select '2011-01-01 01:01:01.000100000' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
@@ -59,7 +61,7 @@ select cast(t as double) from timestamp_1 limit 1;
 select cast(t as string) from timestamp_1 limit 1;
 
 insert overwrite table timestamp_1
-  select '2011-01-01 01:01:01.001000011' from src limit 1;
+  select '2011-01-01 01:01:01.001000011' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_2.q
index b93208f48c453..351f5ca519499 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_2.q
@@ -1,10 +1,12 @@
+set hive.fetch.task.conversion=more;
+
 drop table timestamp_2;
 
 create table timestamp_2 (t timestamp);
 alter table timestamp_2 set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe';
 
-insert overwrite table timestamp_2 
-  select cast('2011-01-01 01:01:01' as timestamp) from src limit 1;
+insert overwrite table timestamp_2
+  select cast('2011-01-01 01:01:01' as timestamp) from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
@@ -15,7 +17,7 @@ select cast(t as double) from timestamp_2 limit 1;
 select cast(t as string) from timestamp_2 limit 1;
 
 insert overwrite table timestamp_2
-  select '2011-01-01 01:01:01' from src limit 1;
+  select '2011-01-01 01:01:01' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
@@ -26,7 +28,7 @@ select cast(t as double) from timestamp_2 limit 1;
 select cast(t as string) from timestamp_2 limit 1;
 
 insert overwrite table timestamp_2
-  select '2011-01-01 01:01:01.1' from src limit 1;
+  select '2011-01-01 01:01:01.1' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
@@ -37,7 +39,7 @@ select cast(t as double) from timestamp_2 limit 1;
 select cast(t as string) from timestamp_2 limit 1;
 
 insert overwrite table timestamp_2
-  select '2011-01-01 01:01:01.0001' from src limit 1;
+  select '2011-01-01 01:01:01.0001' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
@@ -48,7 +50,7 @@ select cast(t as double) from timestamp_2 limit 1;
 select cast(t as string) from timestamp_2 limit 1;
 
 insert overwrite table timestamp_2
-  select '2011-01-01 01:01:01.000100000' from src limit 1;
+  select '2011-01-01 01:01:01.000100000' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
@@ -59,7 +61,7 @@ select cast(t as double) from timestamp_2 limit 1;
 select cast(t as string) from timestamp_2 limit 1;
 
 insert overwrite table timestamp_2
-  select '2011-01-01 01:01:01.001000011' from src limit 1;
+  select '2011-01-01 01:01:01.001000011' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_3.q
index cda724f9e8f43..0e1a8d5526783 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_3.q
@@ -1,10 +1,12 @@
+set hive.fetch.task.conversion=more;
+
 drop table timestamp_3;
 
 create table timestamp_3 (t timestamp);
 alter table timestamp_3 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
 
-insert overwrite table timestamp_3 
-  select cast(cast('1.3041352164485E9' as double) as timestamp) from src limit 1;
+insert overwrite table timestamp_3
+  select cast(cast('1.3041352164485E9' as double) as timestamp) from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_3 limit 1;
 select cast(t as tinyint) from timestamp_3 limit 1;
 select cast(t as smallint) from timestamp_3 limit 1;
@@ -14,4 +16,6 @@ select cast(t as float) from timestamp_3 limit 1;
 select cast(t as double) from timestamp_3 limit 1;
 select cast(t as string) from timestamp_3 limit 1;
 
+select t, sum(t), count(*), sum(t)/count(*), avg(t) from timestamp_3 group by t;
+
 drop table timestamp_3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_comparison.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_comparison.q
index f64ae48b85db7..30fee3cbf6013 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_comparison.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_comparison.q
@@ -1,5 +1,6 @@
+set hive.fetch.task.conversion=more;
 
-select cast('2011-05-06 07:08:09' as timestamp) > 
+select cast('2011-05-06 07:08:09' as timestamp) >
   cast('2011-05-06 07:08:09' as timestamp) from src limit 1;
 
 select cast('2011-05-06 07:08:09' as timestamp) <
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_lazy.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_lazy.q
index 7a1005295eb28..e9a0cfae7cc9a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_lazy.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_lazy.q
@@ -1,6 +1,6 @@
 drop table timestamp_lazy;
 create table timestamp_lazy (t timestamp, key string, value string);
-insert overwrite table timestamp_lazy select cast('2011-01-01 01:01:01' as timestamp), key, value from src limit 5;
+insert overwrite table timestamp_lazy select cast('2011-01-01 01:01:01' as timestamp), key, value from src tablesample (5 rows);
 
 select t,key,value from timestamp_lazy ORDER BY key ASC, value ASC;
 select t,key,value from timestamp_lazy distribute by t sort by key ASC, value ASC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_null.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_null.q
index efd5bc4b78bf4..36f35413e9b48 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_null.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_null.q
@@ -1,6 +1,6 @@
 DROP TABLE IF EXISTS timestamp_null;
 CREATE TABLE timestamp_null (t1 TIMESTAMP);
-LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE timestamp_null;
+LOAD DATA LOCAL INPATH '../../data/files/test.dat' OVERWRITE INTO TABLE timestamp_null;
 
 SELECT * FROM timestamp_null LIMIT 1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_udf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_udf.q
index 2620acefee7f7..ade9fb408c092 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_udf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_udf.q
@@ -1,13 +1,15 @@
+set hive.fetch.task.conversion=more;
+
 drop table timestamp_udf;
 drop table timestamp_udf_string;
 
 create table timestamp_udf (t timestamp);
 create table timestamp_udf_string (t string);
-from src
+from (select * from src tablesample (1 rows)) s
   insert overwrite table timestamp_udf 
-    select '2011-05-06 07:08:09.1234567' limit 1
+    select '2011-05-06 07:08:09.1234567'
   insert overwrite table timestamp_udf_string
-    select '2011-05-06 07:08:09.1234567' limit 1;
+    select '2011-05-06 07:08:09.1234567';
 
 -- Test UDFs with Timestamp input
 select unix_timestamp(t), year(t), month(t), day(t), dayofmonth(t),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/transform1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/transform1.q
index 962077c2ca565..3bed2b6727e7b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/transform1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/transform1.q
@@ -12,7 +12,7 @@ SELECT transform(*) USING 'cat' AS (col array<bigint>) FROM transform1_t1;
 create table transform1_t2(col array<int>);
 
 insert overwrite table transform1_t2
-select array(1,2,3) from src limit 1;
+select array(1,2,3) from src tablesample (1 rows);
 
 EXPLAIN
 SELECT transform('0\0021\0022') USING 'cat' AS (col array<int>) FROM transform1_t2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column.q
index d756b47e464d0..0bfb23ead6869 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column.q
@@ -5,7 +5,7 @@ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED A
 
 set hive.stats.autogather=true;
 
-INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10;
+INSERT OVERWRITE TABLE test_tab SELECT * FROM src tablesample (10 rows);
 
 DESC FORMATTED test_tab;
 
@@ -20,7 +20,7 @@ DESC FORMATTED test_tab;
 SELECT * FROM test_tab ORDER BY value;
 
 -- Truncate multiple columns
-INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10;
+INSERT OVERWRITE TABLE test_tab SELECT * FROM src tablesample (10 rows);
 
 TRUNCATE TABLE test_tab COLUMNS (key, value);
 
@@ -40,7 +40,7 @@ SELECT * FROM test_tab ORDER BY value;
 -- Test truncating with a binary serde
 ALTER TABLE test_tab SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
 
-INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10;
+INSERT OVERWRITE TABLE test_tab SELECT * FROM src tablesample (10 rows);
 
 DESC FORMATTED test_tab;
 
@@ -65,7 +65,7 @@ SELECT * FROM test_tab ORDER BY value;
 -- Test truncating a partition
 CREATE TABLE test_tab_part (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE;
 
-INSERT OVERWRITE TABLE test_tab_part PARTITION (part = '1') SELECT * FROM src LIMIT 10;
+INSERT OVERWRITE TABLE test_tab_part PARTITION (part = '1') SELECT * FROM src tablesample (10 rows);
 
 DESC FORMATTED test_tab_part PARTITION (part = '1');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column_merge.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column_merge.q
index a7aab357eaf2f..7a59efc4d1b01 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column_merge.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column_merge.q
@@ -2,9 +2,9 @@
 
 CREATE TABLE test_tab (key STRING, value STRING) STORED AS RCFILE;
 
-INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 5;
+INSERT OVERWRITE TABLE test_tab SELECT * FROM src tablesample (5 rows);
 
-INSERT INTO TABLE test_tab SELECT * FROM src LIMIT 5;
+INSERT INTO TABLE test_tab SELECT * FROM src tablesample (5 rows);
 
 -- The value should be 2 indicating the table has 2 files
 SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM test_tab;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_table.q
index c0e81e9ac051f..975c0f1ae8426 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_table.q
@@ -1,5 +1,5 @@
 create table src_truncate (key string, value string);
-load data local inpath '../data/files/kv1.txt' into table src_truncate;;
+load data local inpath '../../data/files/kv1.txt' into table src_truncate;;
 
 create table srcpart_truncate (key string, value string) partitioned by (ds string, hr string);
 alter table srcpart_truncate add partition (ds='2008-04-08', hr='11');        
@@ -7,10 +7,10 @@ alter table srcpart_truncate add partition (ds='2008-04-08', hr='12');
 alter table srcpart_truncate add partition (ds='2008-04-09', hr='11');
 alter table srcpart_truncate add partition (ds='2008-04-09', hr='12');
 
-load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-08', hr='11');
-load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-08', hr='12');
-load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-09', hr='11');
-load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-09', hr='12');
+load data local inpath '../../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-08', hr='11');
+load data local inpath '../../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-08', hr='12');
+load data local inpath '../../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-09', hr='11');
+load data local inpath '../../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-09', hr='12');
 
 set hive.fetch.task.convertion=more;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_cast_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_cast_1.q
index a1607320c7a2d..4d1d978f829ee 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_cast_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_cast_1.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 SELECT IF(false, 1, cast(2 as smallint)) + 3 FROM src LIMIT 1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_conversions_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_conversions_1.q
index 63dd66ebedd0c..4c4a828fe0ee2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_conversions_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_conversions_1.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 
 -- casting from null should yield null
 select
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_widening.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_widening.q
index 0d36bc44fe08b..b18c01425c8c6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_widening.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_widening.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 -- Check for int, bigint automatic type widening conversions in UDFs, UNIONS
 EXPLAIN SELECT COALESCE(0, 9223372036854775807) FROM src LIMIT 1;
 SELECT COALESCE(0, 9223372036854775807) FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_collect_set.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_collect_set.q
index 45aaa022dc8a4..04bea32101bdf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_collect_set.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_collect_set.q
@@ -1,6 +1,9 @@
 DESCRIBE FUNCTION collect_set;
 DESCRIBE FUNCTION EXTENDED collect_set;
 
+DESCRIBE FUNCTION collect_list;
+DESCRIBE FUNCTION EXTENDED collect_list;
+
 set hive.map.aggr = false;
 set hive.groupby.skewindata = false;
 
@@ -8,6 +11,10 @@ SELECT key, collect_set(value)
 FROM src
 GROUP BY key ORDER BY key limit 20;
 
+SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER by key limit 20;
+
 set hive.map.aggr = true;
 set hive.groupby.skewindata = false;
 
@@ -15,6 +22,10 @@ SELECT key, collect_set(value)
 FROM src
 GROUP BY key ORDER BY key limit 20;
 
+SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;
+
 set hive.map.aggr = false;
 set hive.groupby.skewindata = true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_context_ngrams.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_context_ngrams.q
index dda7aaa5f2041..f065385688a1d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_context_ngrams.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_context_ngrams.q
@@ -1,5 +1,5 @@
 CREATE TABLE kafka (contents STRING);
-LOAD DATA LOCAL INPATH '../data/files/text-en.txt' INTO TABLE kafka;
+LOAD DATA LOCAL INPATH '../../data/files/text-en.txt' INTO TABLE kafka;
 set mapred.reduce.tasks=1;
 set hive.exec.reducers.max=1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_corr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_corr.q
index 6cc9ce2630dd1..a2edec4d64e4c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_corr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_corr.q
@@ -2,7 +2,7 @@ DROP TABLE covar_tab;
 CREATE TABLE covar_tab (a INT, b INT, c INT)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
 STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/covar_tab.txt' OVERWRITE
+LOAD DATA LOCAL INPATH '../../data/files/covar_tab.txt' OVERWRITE
 INTO TABLE covar_tab;
 
 DESCRIBE FUNCTION corr;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_pop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_pop.q
index 0f5d5f35bf02a..a9937bae3c21c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_pop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_pop.q
@@ -2,7 +2,7 @@ DROP TABLE covar_tab;
 CREATE TABLE covar_tab (a INT, b INT, c INT)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
 STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/covar_tab.txt' OVERWRITE
+LOAD DATA LOCAL INPATH '../../data/files/covar_tab.txt' OVERWRITE
 INTO TABLE covar_tab;
 
 DESCRIBE FUNCTION covar_pop;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_samp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_samp.q
index 72b9c4bd40049..2b50d8f238140 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_samp.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_samp.q
@@ -2,7 +2,7 @@ DROP TABLE covar_tab;
 CREATE TABLE covar_tab (a INT, b INT, c INT)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
 STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/covar_tab.txt' OVERWRITE
+LOAD DATA LOCAL INPATH '../../data/files/covar_tab.txt' OVERWRITE
 INTO TABLE covar_tab;
 
 DESCRIBE FUNCTION covar_samp;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_ngrams.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_ngrams.q
index 31ffd29a88acd..6a2fde52e42f6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_ngrams.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_ngrams.q
@@ -1,5 +1,5 @@
 CREATE TABLE kafka (contents STRING);
-LOAD DATA LOCAL INPATH '../data/files/text-en.txt' INTO TABLE kafka;
+LOAD DATA LOCAL INPATH '../../data/files/text-en.txt' INTO TABLE kafka;
 set mapred.reduce.tasks=1;
 set hive.exec.reducers.max=1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile.q
new file mode 100644
index 0000000000000..8ebf01dcecb8c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile.q
@@ -0,0 +1 @@
+select percentile(cast(key as bigint), 0.3) from src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q
index 66c408d71bc1a..5b8ad7a08f5d3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q
@@ -1,10 +1,10 @@
 -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
 
 CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC)  INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket;
 
 create table t1 (result double);
 create table t2 (result double);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q
index 07bfb6e1fb2ab..1efa2951efd27 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q
@@ -2,10 +2,10 @@
 -- 0.23 changed input order of data in reducer task, which affects result of percentile_approx
 
 CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC)  INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket;
 
 create table t1 (result double);
 create table t2 (result double);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_sum_list.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_sum_list.q
new file mode 100644
index 0000000000000..0d86a42128d25
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_sum_list.q
@@ -0,0 +1,6 @@
+-- HIVE-5279
+-- GenericUDAFSumList has Converter which does not have default constructor
+-- After
+create temporary function sum_list as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSumList';
+
+select sum_list(array(key, key)) from src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_E.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_E.q
index 113af61062b0d..41bdec08278a8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_E.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_E.q
@@ -1,14 +1,16 @@
-explain 
-select E() FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
+
+explain
+select E() FROM src tablesample (1 rows);
 
-select E() FROM src LIMIT 1;
+select E() FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION E;
 DESCRIBE FUNCTION EXTENDED E;
 explain 
-select E() FROM src LIMIT 1;
+select E() FROM src tablesample (1 rows);
 
-select E() FROM src LIMIT 1;
+select E() FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION E;
 DESCRIBE FUNCTION EXTENDED E;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_PI.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_PI.q
index 1fde7df5d251e..945483ecbfea8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_PI.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_PI.q
@@ -1,14 +1,16 @@
-explain 
-select PI() FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
+
+explain
+select PI() FROM src tablesample (1 rows);
 
-select PI() FROM src LIMIT 1;
+select PI() FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION PI;
 DESCRIBE FUNCTION EXTENDED PI;
 explain 
-select PI() FROM src LIMIT 1;
+select PI() FROM src tablesample (1 rows);
 
-select PI() FROM src LIMIT 1;
+select PI() FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION PI;
 DESCRIBE FUNCTION EXTENDED PI;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_abs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_abs.q
index f4f227d0dc168..0c06a5b6cd7f9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_abs.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_abs.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION abs;
 DESCRIBE FUNCTION EXTENDED abs;
 
@@ -7,7 +9,7 @@ EXPLAIN SELECT
   abs(123),
   abs(-9223372036854775807),
   abs(9223372036854775807)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   abs(0),
@@ -15,16 +17,16 @@ SELECT
   abs(123),
   abs(-9223372036854775807),
   abs(9223372036854775807)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 EXPLAIN SELECT
   abs(0.0),
   abs(-3.14159265),
   abs(3.14159265)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   abs(0.0),
   abs(-3.14159265),
   abs(3.14159265)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_acos.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_acos.q
index 625a2aa5c6aa6..f9adc16931dd6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_acos.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_acos.q
@@ -1,14 +1,16 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION acos;
 DESCRIBE FUNCTION EXTENDED acos;
 
 SELECT acos(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT acos(0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT acos(-0.5), asin(0.66)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT acos(2)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array.q
index fca8fe8d1c00c..5a6a1830b1a51 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION array;
 DESCRIBE FUNCTION EXTENDED array;
 
 EXPLAIN SELECT array(), array()[1], array(1, 2, 3), array(1, 2, 3)[2], array(1,"a", 2, 3), array(1,"a", 2, 3)[2],
-array(array(1), array(2), array(3), array(4))[1][0] FROM src LIMIT 1;
+array(array(1), array(2), array(3), array(4))[1][0] FROM src tablesample (1 rows);
 
 SELECT array(), array()[1], array(1, 2, 3), array(1, 2, 3)[2], array(1,"a", 2, 3), array(1,"a", 2, 3)[2],
-array(array(1), array(2), array(3), array(4))[1][0] FROM src LIMIT 1;
+array(array(1), array(2), array(3), array(4))[1][0] FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array_contains.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array_contains.q
index 937bb0be03bd3..d2dad644065a2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array_contains.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array_contains.q
@@ -1,9 +1,11 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION array_contains;
 DESCRIBE FUNCTION EXTENDED array_contains;
 
 -- evalutes function for array of primitives
-SELECT array_contains(array(1, 2, 3), 1) FROM src LIMIT 1;
+SELECT array_contains(array(1, 2, 3), 1) FROM src tablesample (1 rows);
 
 -- evaluates function for nested arrays
 SELECT array_contains(array(array(1,2), array(2,3), array(3,4)), array(1,2))
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_ascii.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_ascii.q
index 53b389fd38fd8..3d885a2563520 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_ascii.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_ascii.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION ascii;
 DESCRIBE FUNCTION EXTENDED ascii;
 
@@ -5,10 +7,10 @@ EXPLAIN SELECT
   ascii('Facebook'),
   ascii(''),
   ascii('!')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   ascii('Facebook'),
   ascii(''),
   ascii('!')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_asin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_asin.q
index f95a5f57df8f8..73b77d10f0b69 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_asin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_asin.q
@@ -1,14 +1,16 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION asin;
 DESCRIBE FUNCTION EXTENDED asin;
 
 SELECT asin(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT asin(0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT asin(-0.5), asin(0.66)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT asin(2)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_atan.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_atan.q
index d4ef03deb81b0..090438cb0f0cb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_atan.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_atan.q
@@ -1,16 +1,18 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION atan;
 DESCRIBE FUNCTION EXTENDED atan;
 
 SELECT atan(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT atan(1), atan(6), atan(-1.0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 DESCRIBE FUNCTION atan;
 DESCRIBE FUNCTION EXTENDED atan;
 
 SELECT atan(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT atan(1), atan(6), atan(-1.0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_between.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_between.q
index eb3ccea82e631..b22ee9c3cecf1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_between.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_between.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 describe function between;
 describe function extended between;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bin.q
index 2b9ad62a39dbe..c5a7ac1a60bcd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bin.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION bin;
 DESCRIBE FUNCTION EXTENDED bin;
 
@@ -5,7 +7,7 @@ SELECT
   bin(1),
   bin(0),
   bin(99992421)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Negative numbers should be treated as two's complement (64 bit).
-SELECT bin(-5) FROM src LIMIT 1;
+SELECT bin(-5) FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_and.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_and.q
index 7ea50dac31d07..ed7711cd6d5de 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_and.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_and.q
@@ -1,11 +1,13 @@
-select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(13,2,4,8589934592,4096,0)) from src limit 1;
-select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src limit 1;
+set hive.fetch.task.conversion=more;
+
+select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(13,2,4,8589934592,4096,0)) from src tablesample (1 rows);
+select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src tablesample (1 rows);
 
 drop table bitmap_test;
 create table bitmap_test (a array<bigint>, b array<bigint>);
 
 insert overwrite table bitmap_test
-select array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0) from src limit 10;
+select array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0) from src tablesample (10 rows);
 
 select ewah_bitmap_and(a,b) from bitmap_test;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_empty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_empty.q
index 88e961683e289..142b248cdd250 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_empty.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_empty.q
@@ -1,3 +1,5 @@
-select ewah_bitmap_empty(array(13,2,4,8589934592,0,0)) from src limit 1;
+set hive.fetch.task.conversion=more;
 
-select ewah_bitmap_empty(array(13,2,4,8589934592,4096,0)) from src limit 1;
+select ewah_bitmap_empty(array(13,2,4,8589934592,0,0)) from src tablesample (1 rows);
+
+select ewah_bitmap_empty(array(13,2,4,8589934592,4096,0)) from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_or.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_or.q
index 0b71e681a53d5..00785b73faa05 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_or.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_or.q
@@ -1,11 +1,13 @@
-select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(13,2,4,8589934592,4096,0)) from src limit 1;
-select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src limit 1;
+set hive.fetch.task.conversion=more;
+
+select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(13,2,4,8589934592,4096,0)) from src tablesample (1 rows);
+select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src tablesample (1 rows);
 
 drop table bitmap_test;
 create table bitmap_test (a array<bigint>, b array<bigint>);
 
 insert overwrite table bitmap_test
-select array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0) from src limit 10;
+select array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0) from src tablesample (10 rows);
 
 select ewah_bitmap_or(a,b) from bitmap_test;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case.q
index 4f71e70e1f3bf..43573bfb12fd8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION case;
 DESCRIBE FUNCTION EXTENDED case;
 
@@ -27,7 +29,7 @@ SELECT CASE 1
         WHEN 22 THEN 23
         WHEN 21 THEN 24
        END
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT CASE 1
         WHEN 1 THEN 2
@@ -54,10 +56,27 @@ SELECT CASE 1
         WHEN 22 THEN 23
         WHEN 21 THEN 24
        END
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- verify that short-circuiting is working correctly for CASE
 -- we should never get to the ELSE branch, which would raise an exception
 SELECT CASE 1 WHEN 1 THEN 'yo'
 ELSE reflect('java.lang.String', 'bogus', 1) END
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
+
+-- Allow compatible types in when/return type
+SELECT CASE 1
+        WHEN 1 THEN 123.0BD
+        ELSE 0.0BD
+       END,
+       CASE 1
+        WHEN 1.0 THEN 123
+        WHEN 2 THEN 1.0
+        ELSE 222.02BD
+       END,
+       CASE 'abc'
+        WHEN cast('abc' as varchar(3)) THEN 'abcd'
+        WHEN 'efg' THEN cast('efgh' as varchar(10))
+        ELSE cast('ijkl' as char(4))
+       END
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case_thrift.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case_thrift.q
index 736bb053cddc7..2aa76f1f1d82e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case_thrift.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case_thrift.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 SELECT CASE src_thrift.lint[0]
         WHEN 0 THEN src_thrift.lint[0] + 1
@@ -14,7 +16,7 @@ SELECT CASE src_thrift.lint[0]
         WHEN '0' THEN src_thrift.lstring
         ELSE NULL
        END)[0]
-FROM src_thrift LIMIT 3;
+FROM src_thrift tablesample (3 rows);
 
 SELECT CASE src_thrift.lint[0]
         WHEN 0 THEN src_thrift.lint[0] + 1
@@ -31,4 +33,4 @@ SELECT CASE src_thrift.lint[0]
         WHEN '0' THEN src_thrift.lstring
         ELSE NULL
        END)[0]
-FROM src_thrift LIMIT 3;
+FROM src_thrift tablesample (3 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_coalesce.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_coalesce.q
index 48ca29cbc3ba6..d3c417babd466 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_coalesce.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_coalesce.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION coalesce;
 DESCRIBE FUNCTION EXTENDED coalesce;
 
@@ -20,7 +22,7 @@ SELECT COALESCE(1),
        COALESCE(NULL, 2.0, 3.0),
        COALESCE(2.0, NULL, 3.0),
        COALESCE(IF(TRUE, NULL, 0), NULL)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT COALESCE(1),
        COALESCE(1, 2),
@@ -40,7 +42,7 @@ SELECT COALESCE(1),
        COALESCE(NULL, 2.0, 3.0),
        COALESCE(2.0, NULL, 3.0),
        COALESCE(IF(TRUE, NULL, 0), NULL)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 EXPLAIN
 SELECT COALESCE(src_thrift.lint[1], 999),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_compare_java_string.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_compare_java_string.q
index 6c12f81304e46..c7983b8eb2050 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_compare_java_string.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_compare_java_string.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 CREATE TEMPORARY FUNCTION test_udf_get_java_string AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaString';
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat.q
index f642f6a2d00df..e35a1cfa170ca 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION concat;
 DESCRIBE FUNCTION EXTENDED concat;
 
@@ -12,4 +14,10 @@ SELECT
   concat(1, 2),
   concat(1),
   concat('1234', 'abc', 'extra argument')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
+
+-- binary/mixed
+SELECT
+  concat(cast('ab' as binary), cast('cd' as binary)),
+  concat('ab', cast('cd' as binary))
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat_ws.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat_ws.q
index 6a0ce20dc37b7..538dfae06f20d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat_ws.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat_ws.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION concat_ws;
 DESCRIBE FUNCTION EXTENDED concat_ws;
 
@@ -24,7 +26,7 @@ SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'),
        concat_ws('_', array('www', 'face'), array('book', 'com', '1234')),
        concat_ws('**', 'www', array('face'), array('book', 'com', '1234')),
        concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')),
-       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1;
+       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 tablesample (1 rows);
 
 SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'),
        concat_ws('-', 'www', array('face', 'book', 'com'), '1234'),
@@ -32,7 +34,7 @@ SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'),
        concat_ws('_', array('www', 'face'), array('book', 'com', '1234')),
        concat_ws('**', 'www', array('face'), array('book', 'com', '1234')),
        concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')),
-       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1;
+       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 tablesample (1 rows);
 
 SELECT concat_ws(NULL, array('www', 'face', 'book', 'com'), '1234'),
        concat_ws(NULL, 'www', array('face', 'book', 'com'), '1234'),
@@ -40,4 +42,4 @@ SELECT concat_ws(NULL, array('www', 'face', 'book', 'com'), '1234'),
        concat_ws(NULL, array('www', 'face'), array('book', 'com', '1234')),
        concat_ws(NULL, 'www', array('face'), array('book', 'com', '1234')),
        concat_ws(NULL, array('www'), 'face', array('book', 'com', '1234')),
-       concat_ws(NULL, array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1;
+       concat_ws(NULL, array('www'), array('face', 'book', 'com'), '1234') FROM dest1 tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_conv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_conv.q
index 212bcfb57938f..c6d6cf8600c8c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_conv.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_conv.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION conv;
 DESCRIBE FUNCTION EXTENDED conv;
 
@@ -9,7 +11,7 @@ SELECT
   conv('22', 10, 10),
   conv('110011', 2, 16),
   conv('facebook', 36, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Test negative numbers. If to_base is positive, the number should be handled
 -- as a two's complement (64-bit)
@@ -18,7 +20,7 @@ SELECT
   conv('1011', 2, -16),
   conv('-1', 10, 16),
   conv('-15', 10, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Test overflow. If a number is two large, the result should be -1 (if signed)
 -- or MAX_LONG (if unsigned)
@@ -27,7 +29,7 @@ SELECT
   conv('9223372036854775807', 36, -16),
   conv('-9223372036854775807', 36, 16),
   conv('-9223372036854775807', 36, -16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Test with invalid input. If one of the bases is invalid, the result should
 -- be NULL. If there is an invalid digit in the number, the longest valid
@@ -37,7 +39,7 @@ SELECT
   conv('131', 1, 5),
   conv('515', 5, 100),
   conv('10', -2, 2)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Perform the same tests with number arguments.
 
@@ -45,31 +47,31 @@ SELECT
   conv(4521, 10, 36),
   conv(22, 10, 10),
   conv(110011, 2, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   conv(-641, 10, -10),
   conv(1011, 2, -16),
   conv(-1, 10, 16),
   conv(-15, 10, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   conv(9223372036854775807, 36, 16),
   conv(9223372036854775807, 36, -16),
   conv(-9223372036854775807, 36, 16),
   conv(-9223372036854775807, 36, -16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   conv(123455, 3, 10),
   conv(131, 1, 5),
   conv(515, 5, 100),
   conv('10', -2, 2)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Make sure that state is properly reset.
 
 SELECT conv(key, 10, 16),
        conv(key, 16, 10)
-FROM src LIMIT 3;
+FROM src tablesample (3 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_cos.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_cos.q
index 7887c4c2603f1..11ef8d7d87b28 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_cos.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_cos.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION cos;
 DESCRIBE FUNCTION EXTENDED cos;
 
 SELECT cos(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT cos(0.98), cos(1.57), cos(-0.5)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_current_database.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_current_database.q
new file mode 100644
index 0000000000000..4ada035d87726
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_current_database.q
@@ -0,0 +1,26 @@
+DESCRIBE FUNCTION current_database;
+
+explain
+select current_database();
+select current_database();
+
+create database xxx;
+use xxx;
+
+explain
+select current_database();
+select current_database();
+
+set hive.fetch.task.conversion=more;
+
+use default;
+
+explain
+select current_database();
+select current_database();
+
+use xxx;
+
+explain
+select current_database();
+select current_database();
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_degrees.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_degrees.q
index 014ca1c6e6afb..d5360fe3b22f7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_degrees.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_degrees.q
@@ -1,14 +1,16 @@
-explain 
-select degrees(PI()) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
+
+explain
+select degrees(PI()) FROM src tablesample (1 rows);
 
-select degrees(PI()) FROM src LIMIT 1;
+select degrees(PI()) FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION degrees;
 DESCRIBE FUNCTION EXTENDED degrees;
 explain 
-select degrees(PI()) FROM src LIMIT 1;
+select degrees(PI()) FROM src tablesample (1 rows);
 
-select degrees(PI()) FROM src LIMIT 1;
+select degrees(PI()) FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION degrees;
 DESCRIBE FUNCTION EXTENDED degrees;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_div.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_div.q
index 4229e625a042a..b0d2844d42346 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_div.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_div.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION div;
 DESCRIBE FUNCTION EXTENDED div;
 
-SELECT 3 DIV 2 FROM SRC LIMIT 1;
+SELECT 3 DIV 2 FROM SRC tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_divide.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_divide.q
index dc4b2e7884d68..d36ba99118f51 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_divide.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_divide.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION /;
 DESCRIBE FUNCTION EXTENDED /;
 
-SELECT 3 / 2 FROM SRC LIMIT 1;
+SELECT 3 / 2 FROM SRC tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_elt.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_elt.q
index c32340ac89feb..fae764965a9a6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_elt.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_elt.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION elt;
 DESCRIBE FUNCTION EXTENDED elt;
 
@@ -13,7 +15,7 @@ SELECT elt(2, 'abc', 'defg'),
        elt(null, 'abc', 'defg'),
        elt(0, 'abc', 'defg'),
        elt(3, 'abc', 'defg')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT elt(2, 'abc', 'defg'),
        elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'),
@@ -26,4 +28,4 @@ SELECT elt(2, 'abc', 'defg'),
        elt(null, 'abc', 'defg'),
        elt(0, 'abc', 'defg'),
        elt(3, 'abc', 'defg')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_equal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_equal.q
index f5e9a7df4be00..ea9b18bf1fad2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_equal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_equal.q
@@ -1,12 +1,14 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION =;
 DESCRIBE FUNCTION EXTENDED =;
 
 DESCRIBE FUNCTION ==;
 DESCRIBE FUNCTION EXTENDED ==;
 
-SELECT true=false, false=true, false=false, true=true, NULL=NULL, true=NULL, NULL=true, false=NULL, NULL=false FROM src LIMIT 1;
+SELECT true=false, false=true, false=false, true=true, NULL=NULL, true=NULL, NULL=true, false=NULL, NULL=false FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION <=>;
 DESCRIBE FUNCTION EXTENDED <=>;
 
-SELECT true<=>false, false<=>true, false<=>false, true<=>true, NULL<=>NULL, true<=>NULL, NULL<=>true, false<=>NULL, NULL<=>false FROM src LIMIT 1;
+SELECT true<=>false, false<=>true, false<=>false, true<=>true, NULL<=>NULL, true<=>NULL, NULL<=>true, false<=>NULL, NULL<=>false FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_explode.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_explode.q
index 19af288ff8404..ae651644a778a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_explode.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_explode.q
@@ -1,22 +1,24 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION explode;
 DESCRIBE FUNCTION EXTENDED explode;
 
-EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3;
-EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol;
+EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows);
+EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows)) a GROUP BY a.myCol;
 
-SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3;
-SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3;
-SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol;
+SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows);
+SELECT explode(array(1,2,3)) AS (myCol) FROM src tablesample (1 rows);
+SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows)) a GROUP BY a.myCol;
 
-EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3;
-EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val;
+EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows);
+EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows)) a GROUP BY a.key, a.val;
 
-SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3;
-SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val;
+SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows);
+SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows)) a GROUP BY a.key, a.val;
 
 drop table lazy_array_map;
 create table lazy_array_map (map_col map<int,string>, array_col array<string>);
-INSERT OVERWRITE TABLE lazy_array_map select map(1,'one',2,'two',3,'three'), array('100','200','300') FROM src LIMIT 1;
+INSERT OVERWRITE TABLE lazy_array_map select map(1,'one',2,'two',3,'three'), array('100','200','300') FROM src tablesample (1 rows);
 
 SELECT array_col, myCol from lazy_array_map lateral view explode(array_col) X AS myCol;
 SELECT map_col, myKey, myValue from lazy_array_map lateral view explode(map_col) X AS myKey, myValue;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_field.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_field.q
index e995f5cf3c084..be92c024d3ed2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_field.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_field.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION field;
 DESCRIBE FUNCTION EXTENDED field;
 
@@ -5,7 +7,7 @@ SELECT
   field("x", "a", "b", "c", "d"),
   field(NULL, "a", "b", "c", "d"),
   field(0, 1, 2, 3, 4)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   field("a", "a", "b", "c", "d"),
@@ -13,7 +15,7 @@ SELECT
   field("c", "a", "b", "c", "d"),
   field("d", "a", "b", "c", "d"),
   field("d", "a", "b", NULL, "d")
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   field(1, 1, 2, 3, 4),
@@ -21,11 +23,11 @@ SELECT
   field(3, 1, 2, 3, 4),
   field(4, 1, 2, 3, 4),
   field(4, 1, 2, NULL, 4)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 
 CREATE TABLE test_table(col1 STRING, col2 STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE test_table;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE test_table;
 
 select col1,col2,
   field("66",col1),
@@ -42,7 +44,7 @@ from test_table where col1="86" or col1="66";
 
 
 CREATE TABLE test_table1(col1 int, col2 string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE test_table1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE test_table1;
 
 select col1,col2,
   field(66,col1),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_find_in_set.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_find_in_set.q
index eac2c6e91c3d1..72c65b419808e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_find_in_set.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_find_in_set.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION find_in_set;
 DESCRIBE FUNCTION EXTENDED find_in_set; 
 
@@ -6,18 +8,18 @@ FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value));
 
 FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value));
 
-SELECT find_in_set('ab','ab,abc,abcde') FROM src1 LIMIT 1;
-SELECT find_in_set('ab','abc,ab,bbb') FROM src1 LIMIT 1;
-SELECT find_in_set('ab','def,abc,ab') FROM src1 LIMIT 1;
-SELECT find_in_set('ab','abc,abd,abf') FROM src1 LIMIT 1;
-SELECT find_in_set(null,'a,b,c') FROM src1 LIMIT 1;
-SELECT find_in_set('a',null) FROM src1 LIMIT 1;
-SELECT find_in_set('', '') FROM src1 LIMIT 1;
-SELECT find_in_set('',',') FROM src1 LIMIT 1;
-SELECT find_in_set('','a,,b') FROM src1 LIMIT 1;
-SELECT find_in_set('','a,b,') FROM src1 LIMIT 1;
-SELECT find_in_set(',','a,b,d,') FROM src1 LIMIT 1;
-SELECT find_in_set('a','') FROM src1 LIMIT 1;
-SELECT find_in_set('a,','a,b,c,d') FROM src1 LIMIT 1;
+SELECT find_in_set('ab','ab,abc,abcde') FROM src1 tablesample (1 rows);
+SELECT find_in_set('ab','abc,ab,bbb') FROM src1 tablesample (1 rows);
+SELECT find_in_set('ab','def,abc,ab') FROM src1 tablesample (1 rows);
+SELECT find_in_set('ab','abc,abd,abf') FROM src1 tablesample (1 rows);
+SELECT find_in_set(null,'a,b,c') FROM src1 tablesample (1 rows);
+SELECT find_in_set('a',null) FROM src1 tablesample (1 rows);
+SELECT find_in_set('', '') FROM src1 tablesample (1 rows);
+SELECT find_in_set('',',') FROM src1 tablesample (1 rows);
+SELECT find_in_set('','a,,b') FROM src1 tablesample (1 rows);
+SELECT find_in_set('','a,b,') FROM src1 tablesample (1 rows);
+SELECT find_in_set(',','a,b,d,') FROM src1 tablesample (1 rows);
+SELECT find_in_set('a','') FROM src1 tablesample (1 rows);
+SELECT find_in_set('a,','a,b,c,d') FROM src1 tablesample (1 rows);
 
 SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,2,956')=0;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_format_number.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_format_number.q
index e2084cddf0ec7..2504bd0b68306 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_format_number.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_format_number.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 use default;
 -- Test format_number() UDF
 
@@ -7,26 +9,26 @@ DESCRIBE FUNCTION EXTENDED format_number;
 EXPLAIN
 SELECT format_number(12332.123456, 4),
     format_number(12332.1,4),
-    format_number(12332.2,0) FROM src limit 1;
+    format_number(12332.2,0) FROM src tablesample (1 rows);
 
 SELECT format_number(12332.123456, 4),
     format_number(12332.1,4),
     format_number(12332.2,0)
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- positive numbers
 SELECT format_number(0.123456789, 12),
     format_number(12345678.123456789, 5),
     format_number(1234567.123456789, 7),
     format_number(123456.123456789, 0)
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- negative numbers
 SELECT format_number(-123456.123456789, 0),
     format_number(-1234567.123456789, 2),
     format_number(-0.123456789, 15),
     format_number(-12345.123456789, 4)
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- zeros
 SELECT format_number(0.0, 4),
@@ -34,7 +36,7 @@ SELECT format_number(0.0, 4),
     format_number(000.0000, 1),
     format_number(00000.0000, 1),
     format_number(-00.0, 4)
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- integers
 SELECT format_number(0, 0),
@@ -42,7 +44,7 @@ SELECT format_number(0, 0),
     format_number(12, 2),
     format_number(123, 5),
     format_number(1234, 7)
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- long and double boundary
 -- 9223372036854775807 is LONG_MAX
@@ -54,4 +56,4 @@ SELECT format_number(-9223372036854775807, 10),
     format_number(9223372036854775807, 20),
     format_number(4.9E-324, 324),
     format_number(1.7976931348623157E308, 308)
-FROM src limit 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_get_json_object.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_get_json_object.q
index 464f2df3dcd7a..05f7f5a9811bc 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_get_json_object.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_get_json_object.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION get_json_object;
 DESCRIBE FUNCTION EXTENDED get_json_object;
 
@@ -5,6 +7,8 @@ CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE;
 
 FROM src INSERT OVERWRITE TABLE dest1 SELECT '  abc  ' WHERE src.key = 86;
 
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 SELECT get_json_object(src_json.json, '$.owner') FROM src_json;
 
@@ -33,8 +37,8 @@ SELECT get_json_object(src_json.json, '$.fb:testid') FROM src_json;
 
 CREATE TABLE dest2(c1 STRING) STORED AS RCFILE;
 
-INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src LIMIT 1;
+INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src tablesample (1 rows);
 
 SELECT * FROM dest2;
 
-SELECT get_json_object(c1, '$.a') FROM dest2;
\ No newline at end of file
+SELECT get_json_object(c1, '$.a') FROM dest2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthan.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthan.q
index aea110a9431cf..230bd244ffbc7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthan.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthan.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION >;
 DESCRIBE FUNCTION EXTENDED >;
 
-SELECT true>false, false>true, false>false, true>true FROM src LIMIT 1;
\ No newline at end of file
+SELECT true>false, false>true, false>false, true>true FROM src tablesample (1 rows);
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthanorequal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthanorequal.q
index 8de165b9ea9ed..025eed7dd5582 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthanorequal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthanorequal.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION >=;
 DESCRIBE FUNCTION EXTENDED >=;
 
-SELECT true>=false, false>=true, false>=false, true>=true FROM src LIMIT 1;
\ No newline at end of file
+SELECT true>=false, false>=true, false>=false, true>=true FROM src tablesample (1 rows);
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hash.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hash.q
index faf372218a107..5814a1779bd16 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hash.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hash.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION hash;
 DESCRIBE FUNCTION EXTENDED hash;
 
@@ -7,11 +9,11 @@ SELECT hash(CAST(1 AS TINYINT)), hash(CAST(2 AS SMALLINT)),
        hash(CAST(1.25 AS FLOAT)), hash(CAST(16.0 AS DOUBLE)),
        hash('400'), hash('abc'), hash(TRUE), hash(FALSE),
        hash(1, 2, 3)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT hash(CAST(1 AS TINYINT)), hash(CAST(2 AS SMALLINT)),
        hash(3), hash(CAST('123456789012' AS BIGINT)),
        hash(CAST(1.25 AS FLOAT)), hash(CAST(16.0 AS DOUBLE)),
        hash('400'), hash('abc'), hash(TRUE), hash(FALSE),
        hash(1, 2, 3)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hex.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hex.q
index 37e035ad42b00..0e5457965a71e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hex.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hex.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION hex;
 DESCRIBE FUNCTION EXTENDED hex;
 
@@ -7,14 +9,14 @@ SELECT
   hex('Facebook'),
   hex('\0'),
   hex('qwertyuiopasdfghjkl')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- If the argument is a number, hex should convert it to hexadecimal.
 SELECT
   hex(1),
   hex(0),
   hex(4207849477)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Negative numbers should be treated as two's complement (64 bit).
-SELECT hex(-5) FROM src LIMIT 1;
+SELECT hex(-5) FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hour.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hour.q
index c5c366daa8c83..b9811e6c6f652 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hour.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hour.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION hour;
 DESCRIBE FUNCTION EXTENDED hour;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_if.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_if.q
index 4f7c8b4a36bad..d9285ff7ce19d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_if.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_if.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION if;
 DESCRIBE FUNCTION EXTENDED if;
 
@@ -8,7 +10,7 @@ SELECT IF(TRUE, 1, 2) AS COL1,
        IF(2=2, 1, NULL) AS COL4,
        IF(2=2, NULL, 1) AS COL5,
        IF(IF(TRUE, NULL, FALSE), 1, 2) AS COL6
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 
 SELECT IF(TRUE, 1, 2) AS COL1,
@@ -17,7 +19,7 @@ SELECT IF(TRUE, 1, 2) AS COL1,
        IF(2=2, 1, NULL) AS COL4,
        IF(2=2, NULL, 1) AS COL5,
        IF(IF(TRUE, NULL, FALSE), 1, 2) AS COL6
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Type conversions
 EXPLAIN
@@ -25,10 +27,10 @@ SELECT IF(TRUE, CAST(128 AS SMALLINT), CAST(1 AS TINYINT)) AS COL1,
        IF(FALSE, 1, 1.1) AS COL2,
        IF(FALSE, 1, 'ABC') AS COL3,
        IF(FALSE, 'ABC', 12.3) AS COL4
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT IF(TRUE, CAST(128 AS SMALLINT), CAST(1 AS TINYINT)) AS COL1,
        IF(FALSE, 1, 1.1) AS COL2,
        IF(FALSE, 1, 'ABC') AS COL3,
        IF(FALSE, 'ABC', 12.3) AS COL4
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in.q
index 75778138742d5..a7ce3c6f0bdbb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 SELECT 1 IN (1, 2, 3),
        4 IN (1, 2, 3),
        array(1,2,3) IN (array(1,2,3)),
@@ -8,6 +10,6 @@ SELECT 1 IN (1, 2, 3),
        1 IN (1, 2, 3) OR false IN(false),
        NULL IN (1, 2, 3),
        4 IN (1, 2, 3, NULL),
-       (1+3) IN (5, 6, (1+2) + 1) FROM src LIMIT 1;
+       (1+3) IN (5, 6, (1+2) + 1) FROM src tablesample (1 rows);
 
 SELECT key FROM src WHERE key IN ("238", 86);
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in_file.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in_file.q
index 4da478908ddbd..9d9efe8e23d6e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in_file.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in_file.q
@@ -1,12 +1,12 @@
 DESCRIBE FUNCTION in_file;
 
 EXPLAIN
-SELECT in_file("303", "../data/files/test2.dat"),
-       in_file("304", "../data/files/test2.dat"),
-       in_file(CAST(NULL AS STRING), "../data/files/test2.dat")
+SELECT in_file("303", "../../data/files/test2.dat"),
+       in_file("304", "../../data/files/test2.dat"),
+       in_file(CAST(NULL AS STRING), "../../data/files/test2.dat")
 FROM src LIMIT 1;
 
-SELECT in_file("303", "../data/files/test2.dat"),
-       in_file("304", "../data/files/test2.dat"),
-       in_file(CAST(NULL AS STRING), "../data/files/test2.dat")
+SELECT in_file("303", "../../data/files/test2.dat"),
+       in_file("304", "../../data/files/test2.dat"),
+       in_file(CAST(NULL AS STRING), "../../data/files/test2.dat")
 FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_index.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_index.q
index 9079d0e5743f6..6844f9d0bfc6c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_index.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_index.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 DESCRIBE FUNCTION `index`;
 DESCRIBE FUNCTION EXTENDED `index`;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_inline.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_inline.q
index 39006f4b9b117..95d55f71c11cd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_inline.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_inline.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 describe function inline;
 
 explain SELECT inline( 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_instr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_instr.q
index 20ed8e4ea0176..790a1049d1777 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_instr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_instr.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION instr;
 DESCRIBE FUNCTION EXTENDED instr;
 
@@ -15,7 +17,7 @@ SELECT instr('abcd', 'abc'),
        instr(CAST(16.0 AS DOUBLE), '.0'),
        instr(null, 'abc'),
        instr('abcd', null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT instr('abcd', 'abc'),
        instr('abcabc', 'ccc'),
@@ -30,4 +32,4 @@ SELECT instr('abcd', 'abc'),
        instr(CAST(16.0 AS DOUBLE), '.0'),
        instr(null, 'abc'),
        instr('abcd', null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_isnull_isnotnull.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_isnull_isnotnull.q
index d1569cc7f6dbb..efb834efdc64f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_isnull_isnotnull.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_isnull_isnotnull.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION isnull;
 DESCRIBE FUNCTION EXTENDED isnull;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_java_method.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_java_method.q
index 2f28be1e9d0e1..51280b2567cca 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_java_method.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_java_method.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION java_method;
 DESCRIBE FUNCTION EXTENDED java_method;
 
@@ -11,7 +13,7 @@ SELECT java_method("java.lang.String", "valueOf", 1),
        java_method("java.lang.Math", "round", 2.5),
        java_method("java.lang.Math", "exp", 1.0),
        java_method("java.lang.Math", "floor", 1.9)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 
 SELECT java_method("java.lang.String", "valueOf", 1),
@@ -21,5 +23,5 @@ SELECT java_method("java.lang.String", "valueOf", 1),
        java_method("java.lang.Math", "round", 2.5),
        java_method("java.lang.Math", "exp", 1.0),
        java_method("java.lang.Math", "floor", 1.9)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_length.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_length.q
index b84307970d630..4413751ae647b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_length.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_length.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION length;
 DESCRIBE FUNCTION EXTENDED length;
 
@@ -9,6 +11,6 @@ DROP TABLE dest1;
 
 -- Test with non-ascii characters. 
 CREATE TABLE dest1(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv4.txt' INTO TABLE dest1;
+LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1;
 EXPLAIN SELECT length(dest1.name) FROM dest1;
 SELECT length(dest1.name) FROM dest1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthan.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthan.q
index a2577006a885d..03326777bfa9f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthan.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthan.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION <;
 DESCRIBE FUNCTION EXTENDED <;
 
-SELECT true<false, false<true, false<false, true<true FROM src LIMIT 1;
+SELECT true<false, false<true, false<false, true<true FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthanorequal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthanorequal.q
index e741b12e72305..ae7109fb92b28 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthanorequal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthanorequal.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION <=;
 DESCRIBE FUNCTION EXTENDED <=;
 
-SELECT true<=false, false<=true, false<=false, true<=true FROM src LIMIT 1;
+SELECT true<=false, false<=true, false<=false, true<=true FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_like.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_like.q
index 02c2924fca35b..12983bdceb69b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_like.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_like.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION like;
 DESCRIBE FUNCTION EXTENDED like;
 
@@ -18,4 +20,4 @@ SELECT '1+2' LIKE '_+_',
        '112' LIKE '1+_',
        '|||' LIKE '|_|', 
        '+++' LIKE '1+_' 
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_locate.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_locate.q
index 80148d0923306..68216d597da57 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_locate.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_locate.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION locate;
 DESCRIBE FUNCTION EXTENDED locate;
 
@@ -19,7 +21,7 @@ SELECT locate('abc', 'abcd'),
        locate('abc', null),
        locate('abc', 'abcd', null),
        locate('abc', 'abcd', 'invalid number')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT locate('abc', 'abcd'),
        locate('ccc', 'abcabc'),
@@ -38,4 +40,4 @@ SELECT locate('abc', 'abcd'),
        locate('abc', null),
        locate('abc', 'abcd', null),
        locate('abc', 'abcd', 'invalid number')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_logic_java_boolean.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_logic_java_boolean.q
index 508f9fd5582e1..a4aa6bc8e861c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_logic_java_boolean.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_logic_java_boolean.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 CREATE TEMPORARY FUNCTION test_udf_get_java_boolean AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaBoolean';
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lpad.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lpad.q
index 8879231aeeb95..937c92a29ece3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lpad.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lpad.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION lpad;
 DESCRIBE FUNCTION EXTENDED lpad;
 
@@ -5,10 +7,10 @@ EXPLAIN SELECT
   lpad('hi', 1, '?'),
   lpad('hi', 5, '.'),
   lpad('hi', 6, '123')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   lpad('hi', 1, '?'),
   lpad('hi', 5, '.'),
   lpad('hi', 6, '123')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map.q
index e975131bd5599..e1923b9ae0973 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION map;
 DESCRIBE FUNCTION EXTENDED map;
 
 EXPLAIN SELECT map(), map(1, "a", 2, "b", 3, "c"), map(1, 2, "a", "b"), 
-map(1, "a", 2, "b", 3, "c")[2],  map(1, 2, "a", "b")["a"], map(1, array("a"))[1][0] FROM src LIMIT 1;
+map(1, "a", 2, "b", 3, "c")[2],  map(1, 2, "a", "b")["a"], map(1, array("a"))[1][0] FROM src tablesample (1 rows);
 
 SELECT map(), map(1, "a", 2, "b", 3, "c"), map(1, 2, "a", "b"), 
-map(1, "a", 2, "b", 3, "c")[2],  map(1, 2, "a", "b")["a"], map(1, array("a"))[1][0] FROM src LIMIT 1;
+map(1, "a", 2, "b", 3, "c")[2],  map(1, 2, "a", "b")["a"], map(1, array("a"))[1][0] FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_keys.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_keys.q
index 7ae8d78823ce9..78778e73cd834 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_keys.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_keys.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 use default;
 -- Test map_keys() UDF
 
@@ -5,7 +7,7 @@ DESCRIBE FUNCTION map_keys;
 DESCRIBE FUNCTION EXTENDED map_keys;
 
 -- Evaluate function against INT valued keys
-SELECT map_keys(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1;
+SELECT map_keys(map(1, "a", 2, "b", 3, "c")) FROM src tablesample (1 rows);
 
 -- Evaluate function against STRING valued keys
-SELECT map_keys(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1;
+SELECT map_keys(map("a", 1, "b", 2, "c", 3)) FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_values.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_values.q
index e25b9bc669759..4b55873fb9714 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_values.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_values.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 use default;
 -- Test map_values() UDF
 
@@ -5,7 +7,7 @@ DESCRIBE FUNCTION map_values;
 DESCRIBE FUNCTION EXTENDED map_values;
 
 -- Evaluate function against STRING valued values
-SELECT map_values(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1;
+SELECT map_values(map(1, "a", 2, "b", 3, "c")) FROM src tablesample (1 rows);
 
 -- Evaluate function against INT valued keys
-SELECT map_values(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1;
+SELECT map_values(map("a", 1, "b", 2, "c", 3)) FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_named_struct.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_named_struct.q
index bbf0f67d81408..ad6fd7a021fc7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_named_struct.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_named_struct.q
@@ -1,9 +1,11 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION named_struct;
 DESCRIBE FUNCTION EXTENDED named_struct;
 
 EXPLAIN
 SELECT named_struct("foo", 1, "bar", 2),
-       named_struct("foo", 1, "bar", 2).foo FROM src LIMIT 1;
+       named_struct("foo", 1, "bar", 2).foo FROM src tablesample (1 rows);
 
 SELECT named_struct("foo", 1, "bar", 2),
-       named_struct("foo", 1, "bar", 2).foo FROM src LIMIT 1;
+       named_struct("foo", 1, "bar", 2).foo FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_negative.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_negative.q
index 6c06938214077..b038c8cad57f2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_negative.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_negative.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION negative;
 DESCRIBE FUNCTION EXTENDED negative;
 
@@ -5,9 +7,9 @@ DESCRIBE FUNCTION EXTENDED negative;
 DESCRIBE FUNCTION -;
 DESCRIBE FUNCTION EXTENDED -;
 
-select - null from src limit 1;
-select - cast(null as int) from src limit 1;
-select - cast(null as smallint) from src limit 1;
-select - cast(null as bigint) from src limit 1;
-select - cast(null as double) from src limit 1;
-select - cast(null as float) from src limit 1;
+select - null from src tablesample (1 rows);
+select - cast(null as int) from src tablesample (1 rows);
+select - cast(null as smallint) from src tablesample (1 rows);
+select - cast(null as bigint) from src tablesample (1 rows);
+select - cast(null as double) from src tablesample (1 rows);
+select - cast(null as float) from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notequal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notequal.q
index e1a15098db369..138110f6edd64 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notequal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notequal.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION <>;
 DESCRIBE FUNCTION EXTENDED <>;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notop.q
index 88396545de933..dceab7edaa1fa 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notop.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 SELECT 1 NOT IN (1, 2, 3),
        4 NOT IN (1, 2, 3),
        1 = 2 NOT IN (true, false),
@@ -7,4 +9,4 @@ SELECT 1 NOT IN (1, 2, 3),
        "abc" NOT RLIKE "^bc",
        "abc" NOT REGEXP "^ab",
        "abc" NOT REGEXP "^bc",
-       1 IN (1, 2) AND "abc" NOT LIKE "bc%" FROM src LIMIT 1;
+       1 IN (1, 2) AND "abc" NOT LIKE "bc%" FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_nvl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_nvl.q
index 0133b4b90bcbc..97162576df167 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_nvl.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_nvl.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 
 DESCRIBE FUNCTION nvl;
 DESCRIBE FUNCTION EXTENDED nvl;
@@ -5,9 +6,9 @@ DESCRIBE FUNCTION EXTENDED nvl;
 EXPLAIN
 SELECT NVL( 1 , 2 ) AS COL1,
        NVL( NULL, 5 ) AS COL2
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT NVL( 1 , 2 ) AS COL1,
        NVL( NULL, 5 ) AS COL2
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_pmod.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_pmod.q
index 9ff73d42b41e6..d42a2f337ebca 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_pmod.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_pmod.q
@@ -1,20 +1,22 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION pmod;
 DESCRIBE FUNCTION EXTENDED pmod;
 
 SELECT pmod(null, null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT pmod(-100,9), pmod(-50,101), pmod(-1000,29)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT pmod(100,19), pmod(50,125), pmod(300,15)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
-SELECT pmod(CAST(-100 AS TINYINT),CAST(9 AS TINYINT)), pmod(CAST(-50 AS TINYINT),CAST(101 AS TINYINT)), pmod(CAST(-100 AS TINYINT),CAST(29 AS TINYINT)) FROM src LIMIT 1;
-SELECT pmod(CAST(-100 AS SMALLINT),CAST(9 AS SMALLINT)), pmod(CAST(-50 AS SMALLINT),CAST(101 AS SMALLINT)), pmod(CAST(-100 AS SMALLINT),CAST(29 AS SMALLINT)) FROM src LIMIT 1;
-SELECT pmod(CAST(-100 AS BIGINT),CAST(9 AS BIGINT)), pmod(CAST(-50 AS BIGINT),CAST(101 AS BIGINT)), pmod(CAST(-100 AS BIGINT),CAST(29 AS BIGINT)) FROM src LIMIT 1;
+SELECT pmod(CAST(-100 AS TINYINT),CAST(9 AS TINYINT)), pmod(CAST(-50 AS TINYINT),CAST(101 AS TINYINT)), pmod(CAST(-100 AS TINYINT),CAST(29 AS TINYINT)) FROM src tablesample (1 rows);
+SELECT pmod(CAST(-100 AS SMALLINT),CAST(9 AS SMALLINT)), pmod(CAST(-50 AS SMALLINT),CAST(101 AS SMALLINT)), pmod(CAST(-100 AS SMALLINT),CAST(29 AS SMALLINT)) FROM src tablesample (1 rows);
+SELECT pmod(CAST(-100 AS BIGINT),CAST(9 AS BIGINT)), pmod(CAST(-50 AS BIGINT),CAST(101 AS BIGINT)), pmod(CAST(-100 AS BIGINT),CAST(29 AS BIGINT)) FROM src tablesample (1 rows);
 
-SELECT pmod(CAST(-100.91 AS FLOAT),CAST(9.8 AS FLOAT)), pmod(CAST(-50.1 AS FLOAT),CAST(101.8 AS FLOAT)), pmod(CAST(-100.91 AS FLOAT),CAST(29.75 AS FLOAT)) FROM src LIMIT 1;
-SELECT pmod(CAST(-100.91 AS DOUBLE),CAST(9.8 AS DOUBLE)), pmod(CAST(-50.1 AS DOUBLE),CAST(101.8 AS DOUBLE)), pmod(CAST(-100.91 AS DOUBLE),CAST(29.75 AS DOUBLE)) FROM src LIMIT 1;
-SELECT pmod(CAST(-100.91 AS DECIMAL),CAST(9.8 AS DECIMAL)), pmod(CAST(-50.1 AS DECIMAL),CAST(101.8 AS DECIMAL)), pmod(CAST(-100.91 AS DECIMAL),CAST(29.75 AS DECIMAL)) FROM src LIMIT 1;
+SELECT pmod(CAST(-100.91 AS FLOAT),CAST(9.8 AS FLOAT)), pmod(CAST(-50.1 AS FLOAT),CAST(101.8 AS FLOAT)), pmod(CAST(-100.91 AS FLOAT),CAST(29.75 AS FLOAT)) FROM src tablesample (1 rows);
+SELECT pmod(CAST(-100.91 AS DOUBLE),CAST(9.8 AS DOUBLE)), pmod(CAST(-50.1 AS DOUBLE),CAST(101.8 AS DOUBLE)), pmod(CAST(-100.91 AS DOUBLE),CAST(29.75 AS DOUBLE)) FROM src tablesample (1 rows);
+SELECT pmod(CAST(-100.91 AS DECIMAL(5,2)),CAST(9.8 AS DECIMAL(2,1))), pmod(CAST(-50.1 AS DECIMAL(3,1)),CAST(101.8 AS DECIMAL(4,1))), pmod(CAST(-100.91 AS DECIMAL(5,2)),CAST(29.75 AS DECIMAL(4,2))) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_printf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_printf.q
index 99e89ccbf773a..115e4e56f1806 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_printf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_printf.q
@@ -4,21 +4,23 @@ use default;
 DESCRIBE FUNCTION printf;
 DESCRIBE FUNCTION EXTENDED printf;
 
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
-SELECT printf("Hello World %d %s", 100, "days") FROM src LIMIT 1;
+SELECT printf("Hello World %d %s", 100, "days") FROM src tablesample (1 rows);
 
 -- Test Primitive Types
-SELECT printf("Hello World %d %s", 100, "days") FROM src LIMIT 1;
-SELECT printf("All Type Test: %b, %c, %d, %e, %+10.4f, %g, %h, %s, %a", false, 65, 15000, 12.3400, 27183.240051, 2300.41, 50, "corret", 256.125) FROM src LIMIT 1;
+SELECT printf("Hello World %d %s", 100, "days") FROM src tablesample (1 rows);
+SELECT printf("All Type Test: %b, %c, %d, %e, %+10.4f, %g, %h, %s, %a", false, 65, 15000, 12.3400, 27183.240051, 2300.41, 50, "corret", 256.125) FROM src tablesample (1 rows);
 
 -- Test NULL Values
-SELECT printf("Color %s, String Null: %s, number1 %d, number2 %05d, Integer Null: %d, hex %#x, float %5.2f Double Null: %f\n", "red", NULL, 123456, 89, NULL, 255, 3.14159, NULL) FROM src LIMIT 1;
+SELECT printf("Color %s, String Null: %s, number1 %d, number2 %05d, Integer Null: %d, hex %#x, float %5.2f Double Null: %f\n", "red", NULL, 123456, 89, NULL, 255, 3.14159, NULL) FROM src tablesample (1 rows);
 
 -- Test Timestamp
 create table timestamp_udf (t timestamp);
-from src
+from (select * from src tablesample (1 rows)) s
   insert overwrite table timestamp_udf
-    select '2011-05-06 07:08:09.1234567' limit 1;
+    select '2011-05-06 07:08:09.1234567';
 select printf("timestamp: %s", t) from timestamp_udf;
 drop table timestamp_udf;
 
@@ -27,7 +29,7 @@ CREATE TABLE binay_udf(key binary, value int)
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '9'
 STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/string.txt' INTO TABLE binay_udf;
+LOAD DATA LOCAL INPATH '../../data/files/string.txt' INTO TABLE binay_udf;
 create table dest1 (key binary, value int);
 insert overwrite table dest1 select transform(*) using 'cat' as key binary, value int from binay_udf;
 select value, printf("format key: %s", key) from dest1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_radians.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_radians.q
index 001d1cf8b6464..19242bd757b41 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_radians.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_radians.q
@@ -1,16 +1,18 @@
-explain 
-select radians(57.2958) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
+
+explain
+select radians(57.2958) FROM src tablesample (1 rows);
 
-select radians(57.2958) FROM src LIMIT 1;
-select radians(143.2394) FROM src LIMIT 1;
+select radians(57.2958) FROM src tablesample (1 rows);
+select radians(143.2394) FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION radians;
 DESCRIBE FUNCTION EXTENDED radians;
 explain 
-select radians(57.2958) FROM src LIMIT 1;
+select radians(57.2958) FROM src tablesample (1 rows);
 
-select radians(57.2958) FROM src LIMIT 1;
-select radians(143.2394) FROM src LIMIT 1;
+select radians(57.2958) FROM src tablesample (1 rows);
+select radians(143.2394) FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION radians;
 DESCRIBE FUNCTION EXTENDED radians;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect.q
index f357ff5049564..cef1e4a5d9195 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION reflect;
 DESCRIBE FUNCTION EXTENDED reflect;
 
@@ -10,7 +12,7 @@ SELECT reflect("java.lang.String", "valueOf", 1),
        reflect("java.lang.Math", "exp", 1.0),
        reflect("java.lang.Math", "floor", 1.9),
        reflect("java.lang.Integer", "valueOf", key, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 
 SELECT reflect("java.lang.String", "valueOf", 1),
@@ -21,4 +23,4 @@ SELECT reflect("java.lang.String", "valueOf", 1),
        reflect("java.lang.Math", "exp", 1.0),
        reflect("java.lang.Math", "floor", 1.9),
        reflect("java.lang.Integer", "valueOf", key, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect2.q
index 9ffd755292f66..a65294b335844 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect2.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION reflect2;
 DESCRIBE FUNCTION EXTENDED reflect2;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_regexp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_regexp.q
index 3aee10963d747..12b685b32c69c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_regexp.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_regexp.q
@@ -1,6 +1,8 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION regexp;
 DESCRIBE FUNCTION EXTENDED regexp;
 
 SELECT 'fofo' REGEXP '^fo', 'fo\no' REGEXP '^fo\no$', 'Bn' REGEXP '^Ba*n', 'afofo' REGEXP 'fo',
 'afofo' REGEXP '^fo', 'Baan' REGEXP '^Ba?n', 'axe' REGEXP 'pi|apa', 'pip' REGEXP '^(pi)*$'
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_repeat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_repeat.q
index 162085f4c71c8..91474bac2a16b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_repeat.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_repeat.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION repeat;
 DESCRIBE FUNCTION EXTENDED repeat;
 
@@ -6,11 +8,11 @@ EXPLAIN SELECT
   repeat("", 4),
   repeat("asd", 0),
   repeat("asdf", -1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   repeat("Facebook", 3),
   repeat("", 4),
   repeat("asd", 0),
   repeat("asdf", -1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reverse.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reverse.q
index 81f765ec5937c..89aafe3443eeb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reverse.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reverse.q
@@ -11,5 +11,5 @@ DROP TABLE dest1;
 -- kv4.txt contains the text 0xE982B5E993AE, which should be reversed to
 -- 0xE993AEE982B5
 CREATE TABLE dest1(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv4.txt' INTO TABLE dest1;
+LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1;
 SELECT count(1) FROM dest1 WHERE reverse(dest1.name) = _UTF-8 0xE993AEE982B5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round.q
index 18ebba8708493..88b22749a3095 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round.q
@@ -1,15 +1,17 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION round;
 DESCRIBE FUNCTION EXTENDED round;
 
 SELECT round(null), round(null, 0), round(125, null), 
 round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   round(55555), round(55555, 0), round(55555, 1), round(55555, 2), round(55555, 3),
   round(55555, -1), round(55555, -2), round(55555, -3), round(55555, -4),
   round(55555, -5), round(55555, -6), round(55555, -7), round(55555, -8)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   round(125.315), round(125.315, 0),
@@ -18,7 +20,7 @@ SELECT
   round(-125.315), round(-125.315, 0),
   round(-125.315, 1), round(-125.315, 2), round(-125.315, 3), round(-125.315, 4),
   round(-125.315, -1), round(-125.315, -2), round(-125.315, -3), round(-125.315, -4)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   round(3.141592653589793, -15), round(3.141592653589793, -16),
@@ -38,7 +40,7 @@ SELECT
   round(3.141592653589793, 12), round(3.141592653589793, 13),
   round(3.141592653589793, 13), round(3.141592653589793, 14),
   round(3.141592653589793, 15), round(3.141592653589793, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
-SELECT round(1809242.3151111344, 9), round(-1809242.3151111344, 9)
-FROM src LIMIT 1;
+SELECT round(1809242.3151111344, 9), round(-1809242.3151111344, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9)
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_2.q
index 6be30855aea1b..43988c1225cd8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_2.q
@@ -1,8 +1,10 @@
--- test for NaN (not-a-number) 
+set hive.fetch.task.conversion=more;
+
+-- test for NaN (not-a-number)
 create table tstTbl1(n double);
 
 insert overwrite table tstTbl1
-select 'NaN' from src limit 1;
+select 'NaN' from src tablesample (1 rows);
 
 select * from tstTbl1;
 
@@ -10,4 +12,4 @@ select round(n, 1) from tstTbl1;
 select round(n) from tstTbl1;
 
 -- test for Infinity
-select round(1/0), round(1/0, 2), round(1.0/0.0), round(1.0/0.0, 2) from src limit 1;
+select round(1/0), round(1/0, 2), round(1.0/0.0), round(1.0/0.0, 2) from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_3.q
index 50a1f44419047..f042b6f3fa9c7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_3.q
@@ -1,14 +1,16 @@
+set hive.fetch.task.conversion=more;
+
 -- test for TINYINT
-select round(-128), round(127), round(0) from src limit 1;
+select round(-128), round(127), round(0) from src tablesample (1 rows);
 
 -- test for SMALLINT
-select round(-32768), round(32767), round(-129), round(128) from src limit 1;
+select round(-32768), round(32767), round(-129), round(128) from src tablesample (1 rows);
 
 -- test for INT
-select round(cast(negative(pow(2, 31)) as INT)), round(cast((pow(2, 31) - 1) as INT)), round(-32769), round(32768) from src limit 1;
+select round(cast(negative(pow(2, 31)) as INT)), round(cast((pow(2, 31) - 1) as INT)), round(-32769), round(32768) from src tablesample (1 rows);
 
 -- test for BIGINT
-select round(cast(negative(pow(2, 63)) as BIGINT)), round(cast((pow(2, 63) - 1) as BIGINT)), round(cast(negative(pow(2, 31) + 1) as BIGINT)), round(cast(pow(2, 31) as BIGINT)) from src limit 1;
+select round(cast(negative(pow(2, 63)) as BIGINT)), round(cast((pow(2, 63) - 1) as BIGINT)), round(cast(negative(pow(2, 31) + 1) as BIGINT)), round(cast(pow(2, 31) as BIGINT)) from src tablesample (1 rows);
 
 -- test for DOUBLE
-select round(126.1), round(126.7), round(32766.1), round(32766.7) from src limit 1;
+select round(126.1), round(126.7), round(32766.1), round(32766.7) from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_rpad.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_rpad.q
index 01e5fbd429b17..4ee69e8985075 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_rpad.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_rpad.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION rpad;
 DESCRIBE FUNCTION EXTENDED rpad;
 
@@ -5,10 +7,10 @@ EXPLAIN SELECT
   rpad('hi', 1, '?'),
   rpad('hi', 5, '.'),
   rpad('hi', 6, '123')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   rpad('hi', 1, '?'),
   rpad('hi', 5, '.'),
   rpad('hi', 6, '123')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_second.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_second.q
index 1943188086ea5..f63426d7e7257 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_second.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_second.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION second;
 DESCRIBE FUNCTION EXTENDED second;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sign.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sign.q
index abceb3439ec1e..b1602e8a257ef 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sign.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sign.q
@@ -1,20 +1,22 @@
-explain 
-select sign(0) FROM src LIMIT 1;
-select sign(0) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
+
+explain
+select sign(0) FROM src tablesample (1 rows);
+select sign(0) FROM src tablesample (1 rows);
 
-select sign(-45) FROM src LIMIT 1;
+select sign(-45) FROM src tablesample (1 rows);
 
-select sign(46)  FROM src LIMIT 1;
+select sign(46)  FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION sign;
 DESCRIBE FUNCTION EXTENDED sign;
 explain 
-select sign(0) FROM src LIMIT 1;
-select sign(0) FROM src LIMIT 1;
+select sign(0) FROM src tablesample (1 rows);
+select sign(0) FROM src tablesample (1 rows);
 
-select sign(-45) FROM src LIMIT 1;
+select sign(-45) FROM src tablesample (1 rows);
 
-select sign(46)  FROM src LIMIT 1;
+select sign(46)  FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION sign;
 DESCRIBE FUNCTION EXTENDED sign;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sin.q
index abb7cac8dac9d..79745be772f6f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sin.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION sin;
 DESCRIBE FUNCTION EXTENDED sin;
 
 SELECT sin(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT sin(0.98), sin(1.57), sin(-0.5)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_size.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_size.q
index 8aaa68a6e318a..f6f76a30e7258 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_size.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_size.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION size;
 DESCRIBE FUNCTION EXTENDED size;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sort_array.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sort_array.q
index ef0973212a27c..313bcf8a1ea52 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sort_array.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sort_array.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 use default;
 -- Test sort_array() UDF
 
@@ -6,16 +8,16 @@ DESCRIBE FUNCTION EXTENDED sort_array;
 
 -- Evaluate function against STRING valued keys
 EXPLAIN
-SELECT sort_array(array("b", "d", "c", "a")) FROM src LIMIT 1;
+SELECT sort_array(array("b", "d", "c", "a")) FROM src tablesample (1 rows);
 
-SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src LIMIT 1;
-SELECT sort_array(sort_array(array("hadoop distributed file system", "enterprise databases", "hadoop map-reduce"))) FROM src LIMIT 1;
+SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src tablesample (1 rows);
+SELECT sort_array(sort_array(array("hadoop distributed file system", "enterprise databases", "hadoop map-reduce"))) FROM src tablesample (1 rows);
 
 -- Evaluate function against INT valued keys
-SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1;
+SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src tablesample (1 rows);
 
 -- Evaluate function against FLOAT valued keys
-SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1;
+SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src tablesample (1 rows);
 
 -- Test it against data in a table.
 CREATE TABLE dest1 (
@@ -30,7 +32,7 @@ CREATE TABLE dest1 (
 	timestamps ARRAY<TIMESTAMP>
 ) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/primitive_type_arrays.txt' OVERWRITE INTO TABLE dest1;
+LOAD DATA LOCAL INPATH '../../data/files/primitive_type_arrays.txt' OVERWRITE INTO TABLE dest1;
 
 SELECT	sort_array(tinyints), sort_array(smallints), sort_array(ints),
 	sort_array(bigints), sort_array(booleans), sort_array(floats),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_space.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_space.q
index cf6466fb63bad..cc616f784fcea 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_space.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_space.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION space;
 DESCRIBE FUNCTION EXTENDED space;
 
@@ -7,7 +9,7 @@ EXPLAIN SELECT
   space(1),
   space(-1),
   space(-100)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   length(space(10)),
@@ -15,7 +17,7 @@ SELECT
   length(space(1)),
   length(space(-1)),
   length(space(-100))
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   space(10),
@@ -23,5 +25,5 @@ SELECT
   space(1),
   space(-1),
   space(-100)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_split.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_split.q
index f79901736cf7c..55919eac743b7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_split.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_split.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION split;
 DESCRIBE FUNCTION EXTENDED split;
 
@@ -6,11 +8,11 @@ EXPLAIN SELECT
   split('oneAtwoBthreeC', '[ABC]'),
   split('', '.'),
   split(50401020, 0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT 
   split('a b c', ' '),
   split('oneAtwoBthreeC', '[ABC]'),
   split('', '.'),
   split(50401020, 0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_struct.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_struct.q
index 3ee19c83699bb..ee2135b509ae2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_struct.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_struct.q
@@ -1,9 +1,11 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION struct;
 DESCRIBE FUNCTION EXTENDED struct;
 
 EXPLAIN
 SELECT struct(1), struct(1, "a"), struct(1, "b", 1.5).col1, struct(1, struct("a", 1.5)).col2.col1
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT struct(1), struct(1, "a"), struct(1, "b", 1.5).col1, struct(1, struct("a", 1.5)).col2.col1
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_substr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_substr.q
index 32757bef51c1c..2d04f904bbc6f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_substr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_substr.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION substr;
 DESCRIBE FUNCTION EXTENDED substr;
 
@@ -5,7 +7,7 @@ SELECT
   substr(null, 1), substr(null, 1, 1),
   substr('ABC', null), substr('ABC', null, 1),
   substr('ABC', 1, null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   substr('ABC', 1, 0), substr('ABC', 1, -1), substr('ABC', 2, -100),
@@ -14,7 +16,7 @@ SELECT
   substr('ABC', 100), substr('ABC', 100, 100),
   substr('ABC', -100), substr('ABC', -100, 100),
   substr('ABC', 2147483647), substr('ABC', 2147483647, 2147483647)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   substr('ABCDEFG', 3, 4), substr('ABCDEFG', -5, 4),
@@ -22,7 +24,7 @@ SELECT
   substr('ABC', 0), substr('ABC', 1), substr('ABC', 2), substr('ABC', 3),
   substr('ABC', 1, 2147483647), substr('ABC', 2, 2147483647),
   substr('A', 0), substr('A', 1), substr('A', -1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   substr('ABC', 0, 1), substr('ABC', 0, 2), substr('ABC', 0, 3), substr('ABC', 0, 4),
@@ -30,14 +32,14 @@ SELECT
   substr('ABC', 2, 1), substr('ABC', 2, 2), substr('ABC', 2, 3), substr('ABC', 2, 4),
   substr('ABC', 3, 1), substr('ABC', 3, 2), substr('ABC', 3, 3), substr('ABC', 3, 4),
   substr('ABC', 4, 1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   substr('ABC', -1, 1), substr('ABC', -1, 2), substr('ABC', -1, 3), substr('ABC', -1, 4),
   substr('ABC', -2, 1), substr('ABC', -2, 2), substr('ABC', -2, 3), substr('ABC', -2, 4),
   substr('ABC', -3, 1), substr('ABC', -3, 2), substr('ABC', -3, 3), substr('ABC', -3, 4),
   substr('ABC', -4, 1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- substring() is a synonim of substr(), so just perform some basic tests
 SELECT
@@ -46,7 +48,7 @@ SELECT
   substring('ABC', 0), substring('ABC', 1), substring('ABC', 2), substring('ABC', 3),
   substring('ABC', 1, 2147483647), substring('ABC', 2, 2147483647),
   substring('A', 0), substring('A', 1), substring('A', -1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- test for binary substr
 SELECT
@@ -63,7 +65,7 @@ SELECT
   substr(ABC, -3, 1), substr(ABC, -3, 2), substr(ABC, -3, 3), substr(ABC, -3, 4),
   substr(ABC, -4, 1)
 FROM (
-   select CAST(concat(substr(value, 1, 0), 'ABC') as BINARY) as ABC from src LIMIT 1
+   select CAST(concat(substr(value, 1, 0), 'ABC') as BINARY) as ABC from src tablesample (1 rows)
 ) X;
 
 -- test UTF-8 substr
@@ -72,4 +74,4 @@ SELECT
   substr("abc 玩", 5),
   substr("abc 玩玩玩 abc", 5),
   substr("abc 玩玩玩 abc", 5, 3)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_tan.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_tan.q
index f103da9ecb2de..3980fe83fbcb2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_tan.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_tan.q
@@ -1,16 +1,18 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION tan;
 DESCRIBE FUNCTION EXTENDED tan;
 
 SELECT tan(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT tan(1), tan(6), tan(-1.0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 DESCRIBE FUNCTION tan;
 DESCRIBE FUNCTION EXTENDED tan;
 
 SELECT tan(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT tan(1), tan(6), tan(-1.0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength.q
index 322a061d645d7..c94a52133d324 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength.q
@@ -1,12 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 CREATE TEMPORARY FUNCTION testlength AS 'org.apache.hadoop.hive.ql.udf.UDFTestLength';
 
 CREATE TEMPORARY FUNCTION testlength AS 'org.apache.hadoop.hive.ql.udf.UDFTestLength';
 
-CREATE TABLE dest1(len INT);
-
-FROM src INSERT OVERWRITE TABLE dest1 SELECT testlength(src.value);
-
-SELECT dest1.* FROM dest1;
+SELECT testlength(src.value) FROM src;
 
 DROP TEMPORARY FUNCTION testlength;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength2.q
index 6de270902f7fb..27e46c24a83d5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength2.q
@@ -1,12 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 CREATE TEMPORARY FUNCTION testlength2 AS 'org.apache.hadoop.hive.ql.udf.UDFTestLength2';
 
 CREATE TEMPORARY FUNCTION testlength2 AS 'org.apache.hadoop.hive.ql.udf.UDFTestLength2';
 
-CREATE TABLE dest1(len INT);
-
-FROM src INSERT OVERWRITE TABLE dest1 SELECT testlength2(src.value);
-
-SELECT dest1.* FROM dest1;
+SELECT testlength2(src.value) FROM src;
 
 DROP TEMPORARY FUNCTION testlength2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_boolean.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_boolean.q
index ca23f719f93e3..8bea7abcbc4d3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_boolean.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_boolean.q
@@ -1,44 +1,46 @@
--- 'true' cases: 
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(CAST(1 AS TINYINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(2 AS SMALLINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(-4 AS INT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(-444 AS BIGINT) AS BOOLEAN) FROM src LIMIT 1;
+-- 'true' cases:
 
-SELECT CAST(CAST(7.0 AS FLOAT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(-8.0 AS DOUBLE) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(-99.0 AS DECIMAL) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(1 AS TINYINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(2 AS SMALLINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-4 AS INT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-444 AS BIGINT) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST('Foo' AS STRING) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(7.0 AS FLOAT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-8.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-99.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST('Foo' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
+
+SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src tablesample (1 rows);
 
 -- 'false' cases: 
 
-SELECT CAST(CAST(0 AS TINYINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(0 AS SMALLINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(0 AS INT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(0 AS BIGINT) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(0 AS TINYINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(0 AS SMALLINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(0 AS INT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(0 AS BIGINT) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(0.0 AS FLOAT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(0.0 AS DOUBLE) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(0.0 AS DECIMAL) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(0.0 AS FLOAT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(0.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(0.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST('' AS STRING) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST('' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src tablesample (1 rows);
 
 -- 'NULL' cases:
-SELECT CAST(NULL AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(NULL AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(NULL AS TINYINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL AS SMALLINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL AS INT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL AS BIGINT) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(NULL AS TINYINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL AS SMALLINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL AS INT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL AS BIGINT) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(NULL AS FLOAT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL AS DOUBLE) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL AS DECIMAL) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(NULL AS FLOAT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(NULL AS STRING) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL as timestamp) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(NULL AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL as timestamp) AS BOOLEAN) FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_byte.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_byte.q
index ded930d1115a4..aa0a250e4678d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_byte.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_byte.q
@@ -1,15 +1,17 @@
--- Conversion of main primitive types to Byte type: 
-SELECT CAST(NULL AS TINYINT) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS TINYINT) FROM src LIMIT 1;
+-- Conversion of main primitive types to Byte type:
+SELECT CAST(NULL AS TINYINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-18 AS SMALLINT) AS TINYINT) FROM src LIMIT 1;
-SELECT CAST(-129 AS TINYINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-1025 AS BIGINT) AS TINYINT) FROM src LIMIT 1;
+SELECT CAST(TRUE AS TINYINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS DOUBLE) AS TINYINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS FLOAT) AS TINYINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS TINYINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-18 AS SMALLINT) AS TINYINT) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS TINYINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-1025 AS BIGINT) AS TINYINT) FROM src tablesample (1 rows);
 
-SELECT CAST('-38' AS TINYINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS DOUBLE) AS TINYINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS FLOAT) AS TINYINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL) AS TINYINT) FROM src tablesample (1 rows);
+
+SELECT CAST('-38' AS TINYINT) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_double.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_double.q
index b0a248ad70bba..005ec9d24e1ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_double.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_double.q
@@ -1,15 +1,17 @@
--- Conversion of main primitive types to Double type: 
-SELECT CAST(NULL AS DOUBLE) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS DOUBLE) FROM src LIMIT 1;
+-- Conversion of main primitive types to Double type:
+SELECT CAST(NULL AS DOUBLE) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-7 AS TINYINT) AS DOUBLE) FROM src LIMIT 1;
-SELECT CAST(CAST(-18 AS SMALLINT) AS DOUBLE) FROM src LIMIT 1;
-SELECT CAST(-129 AS DOUBLE) FROM src LIMIT 1;
-SELECT CAST(CAST(-1025 AS BIGINT) AS DOUBLE) FROM src LIMIT 1;
+SELECT CAST(TRUE AS DOUBLE) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS FLOAT) AS DOUBLE) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS DOUBLE) FROM src LIMIT 1;
+SELECT CAST(CAST(-7 AS TINYINT) AS DOUBLE) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-18 AS SMALLINT) AS DOUBLE) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS DOUBLE) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-1025 AS BIGINT) AS DOUBLE) FROM src tablesample (1 rows);
 
-SELECT CAST('-38.14' AS DOUBLE) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS FLOAT) AS DOUBLE) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL(3,2)) AS DOUBLE) FROM src tablesample (1 rows);
+
+SELECT CAST('-38.14' AS DOUBLE) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_float.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_float.q
index c91d18cc2f5c7..95671f15fe0ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_float.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_float.q
@@ -1,15 +1,17 @@
--- Conversion of main primitive types to Float type: 
-SELECT CAST(NULL AS FLOAT) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS FLOAT) FROM src LIMIT 1;
+-- Conversion of main primitive types to Float type:
+SELECT CAST(NULL AS FLOAT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-7 AS TINYINT) AS FLOAT) FROM src LIMIT 1;
-SELECT CAST(CAST(-18 AS SMALLINT) AS FLOAT) FROM src LIMIT 1;
-SELECT CAST(-129 AS FLOAT) FROM src LIMIT 1;
-SELECT CAST(CAST(-1025 AS BIGINT) AS FLOAT) FROM src LIMIT 1;
+SELECT CAST(TRUE AS FLOAT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS DOUBLE) AS FLOAT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS FLOAT) FROM src LIMIT 1;
+SELECT CAST(CAST(-7 AS TINYINT) AS FLOAT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-18 AS SMALLINT) AS FLOAT) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS FLOAT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-1025 AS BIGINT) AS FLOAT) FROM src tablesample (1 rows);
 
-SELECT CAST('-38.14' AS FLOAT) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS DOUBLE) AS FLOAT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL(3,2)) AS FLOAT) FROM src tablesample (1 rows);
+
+SELECT CAST('-38.14' AS FLOAT) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_long.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_long.q
index 45dc6f8bd32fa..706411a398463 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_long.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_long.q
@@ -1,15 +1,17 @@
--- Conversion of main primitive types to Long type: 
-SELECT CAST(NULL AS BIGINT) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS BIGINT) FROM src LIMIT 1;
+-- Conversion of main primitive types to Long type:
+SELECT CAST(NULL AS BIGINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-7 AS TINYINT) AS BIGINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-18 AS SMALLINT) AS BIGINT) FROM src LIMIT 1;
-SELECT CAST(-129 AS BIGINT) FROM src LIMIT 1;
+SELECT CAST(TRUE AS BIGINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS DOUBLE) AS BIGINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS FLOAT) AS BIGINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS BIGINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-7 AS TINYINT) AS BIGINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-18 AS SMALLINT) AS BIGINT) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS BIGINT) FROM src tablesample (1 rows);
 
-SELECT CAST('-38' AS BIGINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS DOUBLE) AS BIGINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS FLOAT) AS BIGINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL) AS BIGINT) FROM src tablesample (1 rows);
+
+SELECT CAST('-38' AS BIGINT) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_short.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_short.q
index 7d843c1ea12ea..5cc4e57c8c9b5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_short.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_short.q
@@ -1,15 +1,17 @@
--- Conversion of main primitive types to Short type: 
-SELECT CAST(NULL AS SMALLINT) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS SMALLINT) FROM src LIMIT 1;
+-- Conversion of main primitive types to Short type:
+SELECT CAST(NULL AS SMALLINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-18 AS TINYINT) AS SMALLINT) FROM src LIMIT 1;
-SELECT CAST(-129 AS SMALLINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-1025 AS BIGINT) AS SMALLINT) FROM src LIMIT 1;
+SELECT CAST(TRUE AS SMALLINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS DOUBLE) AS SMALLINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS FLOAT) AS SMALLINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS SMALLINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-18 AS TINYINT) AS SMALLINT) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS SMALLINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-1025 AS BIGINT) AS SMALLINT) FROM src tablesample (1 rows);
 
-SELECT CAST('-38' AS SMALLINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS DOUBLE) AS SMALLINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS FLOAT) AS SMALLINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL) AS SMALLINT) FROM src tablesample (1 rows);
+
+SELECT CAST('-38' AS SMALLINT) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_string.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_string.q
index 3b585e7170c60..ac4b5242e1fc4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_string.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_string.q
@@ -1,16 +1,18 @@
--- Conversion of main primitive types to String type: 
-SELECT CAST(NULL AS STRING) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS STRING) FROM src LIMIT 1;
+-- Conversion of main primitive types to String type:
+SELECT CAST(NULL AS STRING) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(1 AS TINYINT) AS STRING) FROM src LIMIT 1;
-SELECT CAST(CAST(-18 AS SMALLINT) AS STRING) FROM src LIMIT 1;
-SELECT CAST(-129 AS STRING) FROM src LIMIT 1;
-SELECT CAST(CAST(-1025 AS BIGINT) AS STRING) FROM src LIMIT 1;
+SELECT CAST(TRUE AS STRING) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS DOUBLE) AS STRING) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS FLOAT) AS STRING) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS STRING) FROM src LIMIT 1;
+SELECT CAST(CAST(1 AS TINYINT) AS STRING) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-18 AS SMALLINT) AS STRING) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS STRING) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-1025 AS BIGINT) AS STRING) FROM src tablesample (1 rows);
 
-SELECT CAST('Foo' AS STRING) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS DOUBLE) AS STRING) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS FLOAT) AS STRING) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL(3,2)) AS STRING) FROM src tablesample (1 rows);
+
+SELECT CAST('Foo' AS STRING) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_unix_timestamp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_unix_timestamp.q
index 3024074bba4b3..0a2758edfcd4f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_unix_timestamp.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_unix_timestamp.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION to_unix_timestamp;
 DESCRIBE FUNCTION EXTENDED to_unix_timestamp;
 
 create table oneline(key int, value string);
-load data local inpath '../data/files/things.txt' into table oneline;
+load data local inpath '../../data/files/things.txt' into table oneline;
 
 SELECT
   '2009-03-20 11:30:01',
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_translate.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_translate.q
index cba6ff90f643f..21d799882f59b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_translate.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_translate.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION translate;
 DESCRIBE FUNCTION EXTENDED translate;
 
@@ -10,28 +12,28 @@ FROM src INSERT OVERWRITE TABLE table_translate SELECT 'abcd', 'ahd', '12' WHERE
 
 -- Run some queries on constant input parameters
 SELECT  translate('abcd', 'ab', '12'),
-        translate('abcd', 'abc', '12') FROM src LIMIT 1;
+        translate('abcd', 'abc', '12') FROM src tablesample (1 rows);
 
 -- Run some queries where first parameter being a table column while the other two being constants
 SELECT translate(table_input.input, 'ab', '12'),
-       translate(table_input.input, 'abc', '12') FROM table_input LIMIT 1;
+       translate(table_input.input, 'abc', '12') FROM table_input tablesample (1 rows);
 
 -- Run some queries where all parameters are coming from table columns
-SELECT translate(input_string, from_string, to_string) FROM table_translate LIMIT 1;
+SELECT translate(input_string, from_string, to_string) FROM table_translate tablesample (1 rows);
 
 -- Run some queries where some parameters are NULL
 SELECT translate(NULL, 'ab', '12'),
        translate('abcd', NULL, '12'),
        translate('abcd', 'ab', NULL),
-       translate(NULL, NULL, NULL) FROM src LIMIT 1;
+       translate(NULL, NULL, NULL) FROM src tablesample (1 rows);
 
 -- Run some queries where the same character appears several times in the from string (2nd argument) of the UDF
 SELECT translate('abcd', 'aba', '123'),
-       translate('abcd', 'aba', '12') FROM src LIMIT 1;
+       translate('abcd', 'aba', '12') FROM src tablesample (1 rows);
 
 -- Run some queries for the ignorant case when the 3rd parameter has more characters than the second one
-SELECT translate('abcd', 'abc', '1234') FROM src LIMIT 1;
+SELECT translate('abcd', 'abc', '1234') FROM src tablesample (1 rows);
 
 -- Test proper function over UTF-8 characters
-SELECT translate('Àbcd', 'À', 'Ã') FROM src LIMIT 1;
+SELECT translate('Àbcd', 'À', 'Ã') FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unhex.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unhex.q
index e80021c8b2c51..257e469ffb16c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unhex.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unhex.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION unhex;
 DESCRIBE FUNCTION EXTENDED unhex;
 
@@ -9,11 +11,11 @@ SELECT
   unhex('61'),
   unhex('2D34'),
   unhex('')
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- Bad inputs
 SELECT
   unhex('MySQL'),
   unhex('G123'),
   unhex('\0')
-FROM src limit 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_union.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_union.q
index 9140d22fb58c7..3876beb17d7ce 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_union.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_union.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION create_union;
 DESCRIBE FUNCTION EXTENDED create_union;
 
 EXPLAIN
 SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value),
 create_union(1, "a", struct(2, "b"))
-FROM src LIMIT 2;
+FROM src tablesample (2 rows);
 
 SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value),
 create_union(1, "a", struct(2, "b"))
-FROM src LIMIT 2;
+FROM src tablesample (2 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unix_timestamp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unix_timestamp.q
index 89288a1193c87..1664329c33d98 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unix_timestamp.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unix_timestamp.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION unix_timestamp;
 DESCRIBE FUNCTION EXTENDED unix_timestamp;
 
 create table oneline(key int, value string);
-load data local inpath '../data/files/things.txt' into table oneline;
+load data local inpath '../../data/files/things.txt' into table oneline;
 
 SELECT
   '2009-03-20 11:30:01',
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_using.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_using.q
new file mode 100644
index 0000000000000..093187ddc30cb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_using.q
@@ -0,0 +1,15 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/udf_using;
+
+dfs -copyFromLocal ../../data/files/sales.txt hdfs:///tmp/udf_using/sales.txt;
+
+create function lookup as 'org.apache.hadoop.hive.ql.udf.UDFFileLookup' using file 'hdfs:///tmp/udf_using/sales.txt';
+
+create table udf_using (c1 string);
+insert overwrite table udf_using select 'Joe' from src limit 2;
+
+select c1, lookup(c1) from udf_using;
+
+drop table udf_using;
+drop function lookup;
+
+dfs -rmr hdfs:///tmp/udf_using;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_weekofyear.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_weekofyear.q
index 4b7b4ea55aa81..abb0a2d7d2c08 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_weekofyear.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_weekofyear.q
@@ -1,6 +1,8 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION weekofyear;
 DESCRIBE FUNCTION EXTENDED weekofyear;
 
 SELECT weekofyear('1980-01-01'), weekofyear('1980-01-06'), weekofyear('1980-01-07'), weekofyear('1980-12-31'),
 weekofyear('1984-1-1'), weekofyear('2008-02-20 00:00:00'), weekofyear('1980-12-28 23:59:59'), weekofyear('1980-12-29 23:59:59')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_when.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_when.q
index d94a594f026c5..4eb7f6918a0dc 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_when.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_when.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION when;
 DESCRIBE FUNCTION EXTENDED when;
 
@@ -27,7 +29,7 @@ SELECT CASE
         WHEN 25=26 THEN 27
         WHEN 28=28 THEN NULL
        END
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT CASE
         WHEN 1=1 THEN 2
@@ -54,4 +56,21 @@ SELECT CASE
         WHEN 25=26 THEN 27
         WHEN 28=28 THEN NULL
        END
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
+
+-- Allow compatible types to be used in return value
+SELECT CASE
+        WHEN 1=1 THEN 123.0BD
+        ELSE 0.0BD
+       END,
+       CASE
+        WHEN 1=1 THEN 123
+        WHEN 1=2 THEN 1.0
+        ELSE 222.02BD
+       END,
+       CASE
+        WHEN 1=1 THEN 'abcd'
+        WHEN 1=2 THEN cast('efgh' as varchar(10))
+        ELSE cast('ijkl' as char(4))
+       END
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath.q
index fca1ba11d85b2..1ad38abcf6ec5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath ;
 DESCRIBE FUNCTION EXTENDED xpath ;
 
-SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/text()') FROM src LIMIT 1 ;
-SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/*/text()') FROM src LIMIT 1 ;
-SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/b/text()') FROM src LIMIT 1 ;
-SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/c/text()') FROM src LIMIT 1 ;
-SELECT xpath ('<a><b class="bb">b1</b><b>b2</b><b>b3</b><c class="bb">c1</c><c>c2</c></a>', 'a/*[@class="bb"]/text()') FROM src LIMIT 1 ;
\ No newline at end of file
+SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/text()') FROM src tablesample (1 rows) ;
+SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/*/text()') FROM src tablesample (1 rows) ;
+SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/b/text()') FROM src tablesample (1 rows) ;
+SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/c/text()') FROM src tablesample (1 rows) ;
+SELECT xpath ('<a><b class="bb">b1</b><b>b2</b><b>b3</b><c class="bb">c1</c><c>c2</c></a>', 'a/*[@class="bb"]/text()') FROM src tablesample (1 rows) ;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_boolean.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_boolean.q
index 3a6e613eb6a0e..6e3ff244b0434 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_boolean.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_boolean.q
@@ -1,9 +1,11 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_boolean ;
 DESCRIBE FUNCTION EXTENDED xpath_boolean ;
 
-SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_boolean ('<a><b>b</b></a>', 'a/c') FROM src LIMIT 1 ;
-SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b = "b"') FROM src LIMIT 1 ;
-SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b = "c"') FROM src LIMIT 1 ;
-SELECT xpath_boolean ('<a><b>10</b></a>', 'a/b < 10') FROM src LIMIT 1 ;
-SELECT xpath_boolean ('<a><b>10</b></a>', 'a/b = 10') FROM src LIMIT 1 ;
+SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_boolean ('<a><b>b</b></a>', 'a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b = "b"') FROM src tablesample (1 rows) ;
+SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b = "c"') FROM src tablesample (1 rows) ;
+SELECT xpath_boolean ('<a><b>10</b></a>', 'a/b < 10') FROM src tablesample (1 rows) ;
+SELECT xpath_boolean ('<a><b>10</b></a>', 'a/b = 10') FROM src tablesample (1 rows) ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_double.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_double.q
index 4328747f44497..68441762e5a66 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_double.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_double.q
@@ -1,14 +1,16 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_number ;
 DESCRIBE FUNCTION EXTENDED xpath_number ;
 
 DESCRIBE FUNCTION xpath_double ;
 DESCRIBE FUNCTION EXTENDED xpath_double ;
 
-SELECT xpath_double ('<a>this is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a>this 2 is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a>try a boolean</a>', 'a = 10') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src LIMIT 1 ;
\ No newline at end of file
+SELECT xpath_double ('<a>this is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a>this 2 is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a>try a boolean</a>', 'a = 10') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src tablesample (1 rows) ;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_float.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_float.q
index 1f1482833c858..4596a322604a5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_float.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_float.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_float ;
 DESCRIBE FUNCTION EXTENDED xpath_float ;
 
-SELECT xpath_float ('<a>this is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a>this 2 is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a>try a boolean</a>', 'a = 10') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src LIMIT 1 ;
\ No newline at end of file
+SELECT xpath_float ('<a>this is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a>this 2 is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a>try a boolean</a>', 'a = 10') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src tablesample (1 rows) ;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_int.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_int.q
index 9b50bd9eeb365..9f3898f1147ac 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_int.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_int.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_int ;
 DESCRIBE FUNCTION EXTENDED xpath_int ;
 
-SELECT xpath_int ('<a>this is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a>this 2 is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a>try a boolean</a>', 'a = 10') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src LIMIT 1 ;
\ No newline at end of file
+SELECT xpath_int ('<a>this is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a>this 2 is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a>try a boolean</a>', 'a = 10') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src tablesample (1 rows) ;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_long.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_long.q
index 04ee61b1f0e9d..3a335937c54fb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_long.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_long.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_long ;
 DESCRIBE FUNCTION EXTENDED xpath_long ;
 
-SELECT xpath_long ('<a>this is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a>this 2 is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a>try a boolean</a>', 'a = 10') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src LIMIT 1 ;
+SELECT xpath_long ('<a>this is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a>this 2 is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a>try a boolean</a>', 'a = 10') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src tablesample (1 rows) ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_short.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_short.q
index 6a1abdc0bdb21..073056e72e7a1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_short.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_short.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_short ;
 DESCRIBE FUNCTION EXTENDED xpath_short ;
 
-SELECT xpath_short ('<a>this is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a>this 2 is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a>try a boolean</a>', 'a = 10') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src LIMIT 1 ;
\ No newline at end of file
+SELECT xpath_short ('<a>this is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a>this 2 is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a>try a boolean</a>', 'a = 10') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src tablesample (1 rows) ;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_string.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_string.q
index ebbc913b71bfb..1f1731c67d114 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_string.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_string.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_string ;
 DESCRIBE FUNCTION EXTENDED xpath_string ;
 
-SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/c') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/d') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', '//b') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', 'a/b[1]') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', 'a/b[2]') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>b1</b><b id="b_2">b2</b></a>', 'a/b[@id="b_2"]') FROM src LIMIT 1 ;
+SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/d') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', '//b') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', 'a/b[1]') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', 'a/b[2]') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>b1</b><b id="b_2">b2</b></a>', 'a/b[@id="b_2"]') FROM src tablesample (1 rows) ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_explode.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_explode.q
index 638a4e9ca57e7..1d405b3560c5c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_explode.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_explode.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION explode;
 DESCRIBE FUNCTION EXTENDED explode;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_json_tuple.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_json_tuple.q
index 712d9598c755d..93d829d4ed175 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_json_tuple.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_json_tuple.q
@@ -2,17 +2,17 @@ create table json_t (key string, jstring string);
 
 insert overwrite table json_t
 select * from (
-  select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1
+  select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src tablesample (1 rows)
   union all
-  select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1
+  select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src tablesample (1 rows)
   union all
-  select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}' from src limit 1
+  select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}' from src tablesample (1 rows)
   union all
-  select '4', cast(null as string) from src limit 1
+  select '4', cast(null as string) from src tablesample (1 rows)
   union all
-  select '5', '{"f1": "", "f5": null}' from src limit 1
+  select '5', '{"f1": "", "f5": null}' from src tablesample (1 rows)
   union all
-  select '6', '[invalid JSON string]' from src limit 1
+  select '6', '[invalid JSON string]' from src tablesample (1 rows)
 ) s;
 
 explain 
@@ -40,7 +40,7 @@ select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2',
 
 CREATE TABLE dest1(c1 STRING) STORED AS RCFILE;
 
-INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src LIMIT 1;
+INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src tablesample (1 rows);
 
 SELECT * FROM dest1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q
index 055e39b2973a6..0870cbc4a9950 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q
@@ -2,17 +2,17 @@ create table url_t (key string, fullurl string);
 
 insert overwrite table url_t
 select * from (
-  select '1', 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' from src limit 1
+  select '1', 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' from src tablesample (1 rows)
   union all
-  select '2', 'https://www.socs.uts.edu.au:80/MosaicDocs-old/url-primer.html?k1=tps#chapter1' from src limit 1
+  select '2', 'https://www.socs.uts.edu.au:80/MosaicDocs-old/url-primer.html?k1=tps#chapter1' from src tablesample (1 rows)
   union all
-  select '3', 'ftp://sites.google.com/a/example.com/site/page' from src limit 1
+  select '3', 'ftp://sites.google.com/a/example.com/site/page' from src tablesample (1 rows)
   union all
-  select '4', cast(null as string) from src limit 1
+  select '4', cast(null as string) from src tablesample (1 rows)
   union all
-  select '5', 'htttp://' from src limit 1
+  select '5', 'htttp://' from src tablesample (1 rows)
   union all
-  select '6', '[invalid url string]' from src limit 1
+  select '6', '[invalid url string]' from src tablesample (1 rows)
 ) s;
 
 describe function parse_url_tuple;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_posexplode.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_posexplode.q
new file mode 100644
index 0000000000000..343f08ba6fe4b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_posexplode.q
@@ -0,0 +1,15 @@
+CREATE TABLE employees (
+name STRING,
+salary FLOAT,
+subordinates ARRAY<STRING>,
+deductions MAP<STRING, FLOAT>,
+address STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>);
+
+LOAD DATA LOCAL INPATH '../../data/files/posexplode_data.txt' INTO TABLE employees;
+
+SELECT
+  name, pos, sub
+FROM
+  employees
+LATERAL VIEW
+  posexplode(subordinates) subView AS pos, sub;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union.q
index 91bbd1b07f08b..525eccbbfe227 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union.q
@@ -6,13 +6,13 @@ FROM (
   UNION ALL
   FROM src SELECT src.* WHERE src.key > 100
 ) unioninput
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/union.out' SELECT unioninput.*;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.*;
 
 FROM (
   FROM src select src.key, src.value WHERE src.key < 100
   UNION ALL
   FROM src SELECT src.* WHERE src.key > 100
 ) unioninput
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/union.out' SELECT unioninput.*;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.*;
 
-dfs -cat ../build/ql/test/data/warehouse/union.out/*;
+dfs -cat ${system:test.warehouse.dir}/union.out/*;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union34.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union34.q
index 36bc865be81fe..238b583fd86a8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union34.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union34.q
@@ -3,7 +3,7 @@ create table src10_2 (key string, value string);
 create table src10_3 (key string, value string);
 create table src10_4 (key string, value string);
 
-from (select * from src limit 10) a
+from (select * from src tablesample (10 rows)) a
 insert overwrite table src10_1 select *
 insert overwrite table src10_2 select *
 insert overwrite table src10_3 select *
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_date.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_date.q
index e332a8af7bb1b..dd6f08e5b4e21 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_date.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_date.q
@@ -17,8 +17,8 @@ create table union_date_2 (
   FL_NUM int
 );
 
-LOAD DATA LOCAL INPATH '../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_1;
-LOAD DATA LOCAL INPATH '../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_2;
+LOAD DATA LOCAL INPATH '../../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_1;
+LOAD DATA LOCAL INPATH '../../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_2;
 
 select * from (
   select fl_num, fl_date from union_date_1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_null.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_null.q
index 70147070feaca..4368b8a5b6c36 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_null.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_null.q
@@ -1,2 +1,5 @@
 -- HIVE-2901
 select x from (select value as x from src union all select NULL as x from src)a limit 10;
+
+-- HIVE-4837
+select * from (select null as N from src1 group by key UNION ALL select null as N from src1 group by key ) a;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_1.q
index c9f920c648a92..c87b3fef1e462 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_1.q
@@ -19,7 +19,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_10.q
index a072fe3b56e43..6701952717eb5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_10.q
@@ -24,7 +24,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_11.q
index 6250a20210f9f..4b2fa42f1b54d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_11.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_11.q
@@ -24,7 +24,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_12.q
index 168eac34a0301..69d0d0af9ff68 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_12.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_12.q
@@ -24,7 +24,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_13.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_13.q
index a88a83e152a5f..7605f0ec26a08 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_13.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_13.q
@@ -24,7 +24,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_14.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_14.q
index e588e8fd62ab9..a4fdfc8ee44ef 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_14.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_14.q
@@ -25,7 +25,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_15.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_15.q
index 237f2e7629057..e3c937b7b1e06 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_15.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_15.q
@@ -25,7 +25,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1 partition (ds)
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_16.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_16.q
index 06d5043f9cf9d..537078b080433 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_16.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_16.q
@@ -25,7 +25,7 @@ set hive.exec.dynamic.partition=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile ;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1 partition (ds)
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_17.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_17.q
index 65b8255c3e954..d70f3d3da31a2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_17.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_17.q
@@ -22,7 +22,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1 partition (ds)
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_18.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_18.q
index 265acfd35758f..478650038cc9c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_18.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_18.q
@@ -24,7 +24,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, ds string) stored as textfile;
 create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1 partition (ds)
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_19.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_19.q
index 1450caa2d07a7..8c45953ed877c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_19.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_19.q
@@ -19,7 +19,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_2.q
index 015c146ecadf0..83cd2887ab23f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_2.q
@@ -20,7 +20,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_20.q
index ac727889273d7..f80f7c1dfdadd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_20.q
@@ -20,7 +20,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(values bigint, key string) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_21.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_21.q
index f89744624116f..8963c2576ca76 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_21.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_21.q
@@ -20,7 +20,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_22.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_22.q
index f01053d27d0c0..b0c1ccd73db68 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_22.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_22.q
@@ -19,7 +19,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint, values2 bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_23.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_23.q
index 805dd76274c42..a1b989a0eb63c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_23.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_23.q
@@ -20,7 +20,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_24.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_24.q
index 36fd947d6035d..ec561e0979e0c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_24.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_24.q
@@ -18,7 +18,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key double, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 EXPLAIN
 INSERT OVERWRITE TABLE outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_3.q
index da0f1c0473ffe..9617f737bc552 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_3.q
@@ -20,7 +20,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_4.q
index 18d4730a12024..cae323b1ef89d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_4.q
@@ -20,7 +20,7 @@ set hive.merge.smallfiles.avgsize=1;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_5.q
index a6fbeb03ddcc3..5df84e145c709 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_5.q
@@ -22,7 +22,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_6.q
index 7ae5af30c1bae..bfce26d0fbe1f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_6.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_6.q
@@ -17,7 +17,7 @@ create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 create table outputTbl2(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 FROM (
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_7.q
index 5a639ca11700d..3a956747a473d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_7.q
@@ -21,7 +21,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_8.q
index 79b84e81181d3..a83a43e466f55 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_8.q
@@ -22,7 +22,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_9.q
index f6038384f3fa8..e71f6dd001dd6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_9.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_9.q
@@ -22,7 +22,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_top_level.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_top_level.q
new file mode 100644
index 0000000000000..6a4b45fbc4599
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_top_level.q
@@ -0,0 +1,106 @@
+-- top level
+explain
+select * from (
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3
+) q1 order by key;
+
+select * from (
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3
+) q1 order by key;
+
+explain
+select * from (
+select s1.key as k, s2.value as v from src s1 join src s2 on (s1.key = s2.key) limit 10
+union all
+select s1.key as k, s2.value as v from src s1 join src s2 on (s1.key = s2.key) limit 10
+) q1 order by k;
+
+select * from (
+select s1.key as k, s2.value as v from src s1 join src s2 on (s1.key = s2.key) limit 10
+union all
+select s1.key as k, s2.value as v from src s1 join src s2 on (s1.key = s2.key) limit 10
+) q1 order by k;
+
+-- ctas
+explain
+create table union_top as
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+create table union_top as
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+select * from union_top order by key;
+
+truncate table union_top;
+
+-- insert into
+explain
+insert into table union_top
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+insert into table union_top
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+select * from union_top order by key;
+
+explain
+insert overwrite table union_top
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+insert overwrite table union_top
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+select * from union_top order by key;
+
+-- create view
+explain
+create view union_top_view as
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+create view union_top_view as
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+select * from union_top_view order by key;
+
+drop table union_top;
+drop view union_top_view;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_view.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_view.q
index 4f8bafe37a809..b727199cf4f25 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_view.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_view.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 CREATE TABLE src_union_1 (key int, value string) PARTITIONED BY (ds string);
 CREATE INDEX src_union_1_key_idx ON TABLE src_union_1(key) AS 'COMPACT' WITH DEFERRED REBUILD;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/uniquejoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/uniquejoin.q
index 51bcf22bfa0be..3bc8ef931bcd3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/uniquejoin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/uniquejoin.q
@@ -2,9 +2,9 @@ CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 FROM UNIQUEJOIN PRESERVE T1 a (a.key), PRESERVE T2 b (b.key), PRESERVE T3 c (c.key)
 SELECT a.key, b.key, c.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_1.q
index 53273b3187a78..bed61f381ce96 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_1.q
@@ -5,7 +5,7 @@ create table varchar1 (key varchar(10), value varchar(20));
 create table varchar1_1 (key string, value string);
 
 -- load from file
-load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar1;
+load data local inpath '../../data/files/srcbucket0.txt' overwrite into table varchar1;
 select * from varchar1 order by key, value limit 2;
 
 -- insert overwrite, from same/different length varchar
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_cast.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_cast.q
index 550f3dc728df2..c356b1dbca6ab 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_cast.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_cast.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 
 -- Cast from varchar to other data types
 select
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_comparison.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_comparison.q
index b6c6f406412fe..05cad852a2680 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_comparison.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_comparison.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 
 -- Should all be true
 select
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_join1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_join1.q
index 6a19efaa3c5a1..94226879c53ab 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_join1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_join1.q
@@ -17,9 +17,9 @@ create table  varchar_join1_str (
   c2 string
 );
 
-load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc1;
-load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc2;
-load data local inpath '../data/files/vc1.txt' into table varchar_join1_str;
+load data local inpath '../../data/files/vc1.txt' into table varchar_join1_vc1;
+load data local inpath '../../data/files/vc1.txt' into table varchar_join1_vc2;
+load data local inpath '../../data/files/vc1.txt' into table varchar_join1_str;
 
 -- Join varchar with same length varchar
 select * from varchar_join1_vc1 a join varchar_join1_vc1 b on (a.c2 = b.c2) order by a.c1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_serde.q
index 7351b688cf504..ea2a022b94037 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_serde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_serde.q
@@ -18,7 +18,7 @@ with serdeproperties (
 )
 stored as textfile;
 
-load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar_serde_regex;
+load data local inpath '../../data/files/srcbucket0.txt' overwrite into table varchar_serde_regex;
 
 select * from varchar_serde_regex limit 5;
 select value, count(*) from varchar_serde_regex group by value limit 5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_union1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_union1.q
index cf90eab33e2d0..dd3cffe0db4fe 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_union1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_union1.q
@@ -17,9 +17,9 @@ create table  varchar_union1_str (
   c2 string
 );
 
-load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc1;
-load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc2;
-load data local inpath '../data/files/vc1.txt' into table varchar_union1_str;
+load data local inpath '../../data/files/vc1.txt' into table varchar_union1_vc1;
+load data local inpath '../../data/files/vc1.txt' into table varchar_union1_vc2;
+load data local inpath '../../data/files/vc1.txt' into table varchar_union1_str;
 
 -- union varchar with same length varchar
 select * from (
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_between_in.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_between_in.q
new file mode 100644
index 0000000000000..1bc66118f8ff3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_between_in.q
@@ -0,0 +1,35 @@
+SET hive.vectorized.execution.enabled=true;
+
+CREATE TABLE decimal_date_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, CAST(CAST((CAST(cint AS BIGINT) *ctinyint) AS TIMESTAMP) AS DATE) AS cdate FROM alltypesorc ORDER BY cdate;
+
+EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate;
+
+EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE));
+
+EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1;
+
+EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568);
+
+EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate;
+
+EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate;
+
+EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1;
+
+EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351;
+
+SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate;
+
+SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE));
+
+SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1;
+
+SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568);
+
+SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate;
+
+SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate;
+
+SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1;
+
+SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_coalesce.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_coalesce.q
new file mode 100644
index 0000000000000..052ab716bee1a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_coalesce.q
@@ -0,0 +1,32 @@
+SET hive.vectorized.execution.enabled=true;
+EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) 
+FROM alltypesorc
+WHERE (cdouble IS NULL) LIMIT 10;
+
+SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) 
+FROM alltypesorc
+WHERE (cdouble IS NULL) LIMIT 10;
+
+EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) 
+FROM alltypesorc
+WHERE (ctinyint IS NULL) LIMIT 10;
+
+SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) 
+FROM alltypesorc
+WHERE (ctinyint IS NULL) LIMIT 10;
+
+EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) 
+FROM alltypesorc
+WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10;
+
+SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) 
+FROM alltypesorc
+WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10;
+
+EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) 
+FROM alltypesorc 
+WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10;
+
+SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) 
+FROM alltypesorc 
+WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_aggregate.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_aggregate.q
new file mode 100644
index 0000000000000..eb9146e95d437
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_aggregate.q
@@ -0,0 +1,20 @@
+CREATE TABLE decimal_vgby STORED AS ORC AS 
+	SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, 
+	CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
+	cint
+	FROM alltypesorc;
+
+SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN SELECT cint,
+	COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+	COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+	FROM decimal_vgby
+	GROUP BY cint
+	HAVING COUNT(*) > 1;
+SELECT cint,
+	COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+	COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+	FROM decimal_vgby
+	GROUP BY cint
+	HAVING COUNT(*) > 1;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_cast.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_cast.q
new file mode 100644
index 0000000000000..ea7a5b817d7c0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_cast.q
@@ -0,0 +1,5 @@
+SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10;
+
+SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_expressions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_expressions.q
new file mode 100644
index 0000000000000..a74b17bd127d6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_expressions.q
@@ -0,0 +1,5 @@
+CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc;
+SET hive.vectorized.execution.enabled=true;
+EXPLAIN SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)), cdecimal1 % 10, CAST(cdecimal1 AS INT), CAST(cdecimal2 AS SMALLINT), CAST(cdecimal2 AS TINYINT), CAST(cdecimal1 AS BIGINT), CAST (cdecimal1 AS BOOLEAN), CAST(cdecimal2 AS DOUBLE), CAST(cdecimal1 AS FLOAT), CAST(cdecimal2 AS STRING), CAST(cdecimal1 AS TIMESTAMP) FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL LIMIT 10;
+
+SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)), cdecimal1 % 10, CAST(cdecimal1 AS INT), CAST(cdecimal2 AS SMALLINT), CAST(cdecimal2 AS TINYINT), CAST(cdecimal1 AS BIGINT), CAST (cdecimal1 AS BOOLEAN), CAST(cdecimal2 AS DOUBLE), CAST(cdecimal1 AS FLOAT), CAST(cdecimal2 AS STRING), CAST(cdecimal1 AS TIMESTAMP) FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q
new file mode 100644
index 0000000000000..d8b3d1a9ac13f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q
@@ -0,0 +1,19 @@
+CREATE TABLE decimal_mapjoin STORED AS ORC AS 
+  SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, 
+  CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
+  cint
+  FROM alltypesorc;
+ 
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.nonconditionaltask=true;
+SET hive.auto.convert.join.nonconditionaltask.size=1000000000;
+SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981;
+SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q
new file mode 100644
index 0000000000000..6e2c0b1fd403f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q
@@ -0,0 +1,77 @@
+CREATE TABLE decimal_test STORED AS ORC AS SELECT cbigint, cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc;
+SET hive.vectorized.execution.enabled=true;
+
+-- Test math functions in vectorized mode to verify they run correctly end-to-end.
+
+explain 
+select
+   cdecimal1
+  ,Round(cdecimal1, 2)
+  ,Round(cdecimal1)
+  ,Floor(cdecimal1)
+  ,Ceil(cdecimal1)
+  ,Exp(cdecimal1)
+  ,Ln(cdecimal1)  
+  ,Log10(cdecimal1)
+  -- Use log2 as a representative function to test all input types.
+  ,Log2(cdecimal1)
+  -- Use 15601.0 to test zero handling, as there are no zeroes in the table
+  ,Log2(cdecimal1 - 15601.0)
+  ,Log(2.0, cdecimal1)
+  ,Pow(log2(cdecimal1), 2.0)  
+  ,Power(log2(cdecimal1), 2.0)
+  ,Sqrt(cdecimal1)
+  ,Abs(cdecimal1)
+  ,Sin(cdecimal1)
+  ,Asin(cdecimal1)
+  ,Cos(cdecimal1)
+  ,ACos(cdecimal1)
+  ,Atan(cdecimal1)
+  ,Degrees(cdecimal1)
+  ,Radians(cdecimal1)
+  ,Positive(cdecimal1)
+  ,Negative(cdecimal1)
+  ,Sign(cdecimal1)
+  -- Test nesting
+  ,cos(-sin(log(cdecimal1)) + 3.14159)
+from decimal_test
+-- limit output to a reasonably small number of rows
+where cbigint % 500 = 0
+-- test use of a math function in the WHERE clause
+and sin(cdecimal1) >= -1.0;
+
+select
+   cdecimal1
+  ,Round(cdecimal1, 2)
+  ,Round(cdecimal1)
+  ,Floor(cdecimal1)
+  ,Ceil(cdecimal1)
+  ,Exp(cdecimal1)
+  ,Ln(cdecimal1)  
+  ,Log10(cdecimal1)
+  -- Use log2 as a representative function to test all input types.
+  ,Log2(cdecimal1)
+  -- Use 15601.0 to test zero handling, as there are no zeroes in the table
+  ,Log2(cdecimal1 - 15601.0)
+  ,Log(2.0, cdecimal1)
+  ,Pow(log2(cdecimal1), 2.0)  
+  ,Power(log2(cdecimal1), 2.0)
+  ,Sqrt(cdecimal1)
+  ,Abs(cdecimal1)
+  ,Sin(cdecimal1)
+  ,Asin(cdecimal1)
+  ,Cos(cdecimal1)
+  ,ACos(cdecimal1)
+  ,Atan(cdecimal1)
+  ,Degrees(cdecimal1)
+  ,Radians(cdecimal1)
+  ,Positive(cdecimal1)
+  ,Negative(cdecimal1)
+  ,Sign(cdecimal1)
+  -- Test nesting
+  ,cos(-sin(log(cdecimal1)) + 3.14159)
+from decimal_test
+-- limit output to a reasonably small number of rows
+where cbigint % 500 = 0
+-- test use of a math function in the WHERE clause
+and sin(cdecimal1) >= -1.0;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_left_outer_join.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_left_outer_join.q
new file mode 100644
index 0000000000000..6e96690497390
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_left_outer_join.q
@@ -0,0 +1,21 @@
+set hive.vectorized.execution.enabled=true;
+set hive.auto.convert.join=true;
+explain 
+select count(*) from (select c.ctinyint 
+from alltypesorc c
+left outer join alltypesorc cd
+  on cd.cint = c.cint 
+left outer join alltypesorc hd
+  on hd.ctinyint = c.ctinyint
+) t1
+;
+select count(*) from (select c.ctinyint
+from alltypesorc c
+left outer join alltypesorc cd
+  on cd.cint = c.cint 
+left outer join alltypesorc hd
+  on hd.ctinyint = c.ctinyint
+) t1;
+
+set hive.auto.convert.join=false;
+set hive.vectorized.execution.enabled=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_non_string_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_non_string_partition.q
new file mode 100644
index 0000000000000..fc1dc6d3b89f3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_non_string_partition.q
@@ -0,0 +1,17 @@
+SET hive.vectorized.execution.enabled=true;
+CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC;
+SET hive.exec.dynamic.partition.mode=nonstrict;
+SET hive.exec.dynamic.partition=true;
+
+INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc 
+WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble;
+
+SHOW PARTITIONS non_string_part;
+
+EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10;
+
+SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10;
+
+EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10;
+
+SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_0.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_0.q
new file mode 100644
index 0000000000000..39fba7d1ac32d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_0.q
@@ -0,0 +1,27 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT AVG(cbigint),
+       (-(AVG(cbigint))),
+       (-6432 + AVG(cbigint)),
+       STDDEV_POP(cbigint),
+       (-((-6432 + AVG(cbigint)))),
+       ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+       VAR_SAMP(cbigint),
+       (-((-6432 + AVG(cbigint)))),
+       (-6432 + (-((-6432 + AVG(cbigint))))),
+       (-((-6432 + AVG(cbigint)))),
+       ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+       COUNT(*),
+       SUM(cfloat),
+       (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+       (-(VAR_SAMP(cbigint))),
+       ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+       MIN(ctinyint),
+       (-(MIN(ctinyint)))
+FROM   alltypesorc
+WHERE  (((cstring2 LIKE '%b%')
+         OR ((79.553 != cint)
+             OR (cbigint < cdouble)))
+        OR ((ctinyint >= csmallint)
+            AND ((cboolean2 = 1)
+                 AND (3569 = ctinyint))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_1.q
new file mode 100644
index 0000000000000..745aa06f2f524
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_1.q
@@ -0,0 +1,21 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT VAR_POP(ctinyint),
+       (VAR_POP(ctinyint) / -26.28),
+       SUM(cfloat),
+       (-1.389 + SUM(cfloat)),
+       (SUM(cfloat) * (-1.389 + SUM(cfloat))),
+       MAX(ctinyint),
+       (-((SUM(cfloat) * (-1.389 + SUM(cfloat))))),
+       MAX(cint),
+       (MAX(cint) * 79.553),
+       VAR_SAMP(cdouble),
+       (10.175 % (-((SUM(cfloat) * (-1.389 + SUM(cfloat)))))),
+       COUNT(cint),
+       (-563 % MAX(cint))
+FROM   alltypesorc
+WHERE  (((cdouble > ctinyint)
+         AND (cboolean2 > 0))
+        OR ((cbigint < ctinyint)
+            OR ((cint > cbigint)
+                OR (cboolean1 < 0))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_10.q
new file mode 100644
index 0000000000000..720f38d02b072
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_10.q
@@ -0,0 +1,24 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT cdouble,
+       ctimestamp1,
+       ctinyint,
+       cboolean1,
+       cstring1,
+       (-(cdouble)),
+       (cdouble + csmallint),
+       ((cdouble + csmallint) % 33),
+       (-(cdouble)),
+       (ctinyint % cdouble),
+       (ctinyint % csmallint),
+       (-(cdouble)),
+       (cbigint * (ctinyint % csmallint)),
+       (9763215.5639 - (cdouble + csmallint)),
+       (-((-(cdouble))))
+FROM   alltypesorc
+WHERE  (((cstring2 <= '10')
+         OR ((ctinyint > cdouble)
+             AND (-5638.15 >= ctinyint)))
+        OR ((cdouble > 6981)
+            AND ((csmallint = 9763215.5639)
+                 OR (cstring1 LIKE '%a'))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_11.q
new file mode 100644
index 0000000000000..329ed28948434
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_11.q
@@ -0,0 +1,15 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT cstring1,
+       cboolean1,
+       cdouble,
+       ctimestamp1,
+       (-3728 * csmallint),
+       (cdouble - 9763215.5639),
+       (-(cdouble)),
+       ((-(cdouble)) + 6981),
+       (cdouble * -5638.15)
+FROM   alltypesorc
+WHERE  ((cstring2 = cstring1)
+        OR ((ctimestamp1 IS NULL)
+            AND (cstring1 LIKE '%a')));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_12.q
new file mode 100644
index 0000000000000..fef62fc7a0eb6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_12.q
@@ -0,0 +1,32 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   cbigint,
+         cboolean1,
+         cstring1,
+         ctimestamp1,
+         cdouble,
+         (-6432 * cdouble),
+         (-(cbigint)),
+         COUNT(cbigint),
+         (cbigint * COUNT(cbigint)),
+         STDDEV_SAMP(cbigint),
+         ((-6432 * cdouble) / -6432),
+         (-(((-6432 * cdouble) / -6432))),
+         AVG(cdouble),
+         (-((-6432 * cdouble))),
+         (-5638.15 + cbigint),
+         SUM(cbigint),
+         (AVG(cdouble) / (-6432 * cdouble)),
+         AVG(cdouble),
+         (-((-(((-6432 * cdouble) / -6432))))),
+         (((-6432 * cdouble) / -6432) + (-((-6432 * cdouble)))),
+         STDDEV_POP(cdouble)
+FROM     alltypesorc
+WHERE    (((ctimestamp1 IS NULL)
+           AND ((cboolean1 >= cboolean2)
+                OR (ctinyint != csmallint)))
+          AND ((cstring1 LIKE '%a')
+              OR ((cboolean2 <= 1)
+                  AND (cbigint >= csmallint))))
+GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble
+ORDER BY ctimestamp1, cdouble, cbigint, cstring1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_13.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_13.q
new file mode 100644
index 0000000000000..fad2585d22725
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_13.q
@@ -0,0 +1,31 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   cboolean1,
+         ctinyint,
+         ctimestamp1,
+         cfloat,
+         cstring1,
+         (-(ctinyint)),
+         MAX(ctinyint),
+         ((-(ctinyint)) + MAX(ctinyint)),
+         SUM(cfloat),
+         (SUM(cfloat) * ((-(ctinyint)) + MAX(ctinyint))),
+         (-(SUM(cfloat))),
+         (79.553 * cfloat),
+         STDDEV_POP(cfloat),
+         (-(SUM(cfloat))),
+         STDDEV_POP(ctinyint),
+         (((-(ctinyint)) + MAX(ctinyint)) - 10.175),
+         (-((-(SUM(cfloat))))),
+         (-26.28 / (-((-(SUM(cfloat)))))),
+         MAX(cfloat),
+         ((SUM(cfloat) * ((-(ctinyint)) + MAX(ctinyint))) / ctinyint),
+         MIN(ctinyint)
+FROM     alltypesorc
+WHERE    (((cfloat < 3569)
+           AND ((10.175 >= cdouble)
+                AND (cboolean1 != 1)))
+          OR ((ctimestamp1 > -29071)
+              AND ((ctimestamp2 != -29071)
+                   AND (ctinyint < 9763215.5639))))
+GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_14.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_14.q
new file mode 100644
index 0000000000000..a121c64edc7b4
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_14.q
@@ -0,0 +1,33 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   ctimestamp1,
+         cfloat,
+         cstring1,
+         cboolean1,
+         cdouble,
+         (-26.28 + cdouble),
+         (-((-26.28 + cdouble))),
+         STDDEV_SAMP((-((-26.28 + cdouble)))),
+         (cfloat * -26.28),
+         MAX(cfloat),
+         (-(cfloat)),
+         (-(MAX(cfloat))),
+         ((-((-26.28 + cdouble))) / 10.175),
+         STDDEV_POP(cfloat),
+         COUNT(cfloat),
+         (-(((-((-26.28 + cdouble))) / 10.175))),
+         (-1.389 % STDDEV_SAMP((-((-26.28 + cdouble))))),
+         (cfloat - cdouble),
+         VAR_POP(cfloat),
+         (VAR_POP(cfloat) % 10.175),
+         VAR_SAMP(cfloat),
+         (-((cfloat - cdouble)))
+FROM     alltypesorc
+WHERE    (((ctinyint <= cbigint)
+           AND ((cint <= cdouble)
+                OR (ctimestamp2 < ctimestamp1)))
+          AND ((cdouble < ctinyint)
+              AND ((cbigint > -257)
+                  OR (cfloat < cint))))
+GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble
+ORDER BY cstring1, cfloat, cdouble, ctimestamp1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_15.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_15.q
new file mode 100644
index 0000000000000..7daad0b2c50a5
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_15.q
@@ -0,0 +1,31 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   cfloat,
+         cboolean1,
+         cdouble,
+         cstring1,
+         ctinyint,
+         cint,
+         ctimestamp1,
+         STDDEV_SAMP(cfloat),
+         (-26.28 - cint),
+         MIN(cdouble),
+         (cdouble * 79.553),
+         (33 % cfloat),
+         STDDEV_SAMP(ctinyint),
+         VAR_POP(ctinyint),
+         (-23 % cdouble),
+         (-(ctinyint)),
+         VAR_SAMP(cint),
+         (cint - cfloat),
+         (-23 % ctinyint),
+         (-((-26.28 - cint))),
+         STDDEV_POP(cint)
+FROM     alltypesorc
+WHERE    (((cstring2 LIKE '%ss%')
+           OR (cstring1 LIKE '10%'))
+          OR ((cint >= -75)
+              AND ((ctinyint = csmallint)
+                   AND (cdouble >= -3728))))
+GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
+ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_16.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_16.q
new file mode 100644
index 0000000000000..39a9402680496
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_16.q
@@ -0,0 +1,20 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   cstring1,
+         cdouble,
+         ctimestamp1,
+         (cdouble - 9763215.5639),
+         (-((cdouble - 9763215.5639))),
+         COUNT(cdouble),
+         STDDEV_SAMP(cdouble),
+         (-(STDDEV_SAMP(cdouble))),
+         (STDDEV_SAMP(cdouble) * COUNT(cdouble)),
+         MIN(cdouble),
+         (9763215.5639 / cdouble),
+         (COUNT(cdouble) / -1.389),
+         STDDEV_SAMP(cdouble)
+FROM     alltypesorc
+WHERE    ((cstring2 LIKE '%b%')
+          AND ((cdouble >= -1.389)
+              OR (cstring1 < 'a')))
+GROUP BY cstring1, cdouble, ctimestamp1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_2.q
new file mode 100644
index 0000000000000..b8647a434414b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_2.q
@@ -0,0 +1,23 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT AVG(csmallint),
+       (AVG(csmallint) % -563),
+       (AVG(csmallint) + 762),
+       SUM(cfloat),
+       VAR_POP(cbigint),
+       (-(VAR_POP(cbigint))),
+       (SUM(cfloat) - AVG(csmallint)),
+       COUNT(*),
+       (-((SUM(cfloat) - AVG(csmallint)))),
+       (VAR_POP(cbigint) - 762),
+       MIN(ctinyint),
+       ((-(VAR_POP(cbigint))) + MIN(ctinyint)),
+       AVG(cdouble),
+       (((-(VAR_POP(cbigint))) + MIN(ctinyint)) - SUM(cfloat))
+FROM   alltypesorc
+WHERE  (((ctimestamp1 < ctimestamp2)
+         AND ((cstring2 LIKE 'b%')
+              AND (cfloat <= -5638.15)))
+        OR ((cdouble < ctinyint)
+            AND ((-10669 != ctimestamp2)
+                 OR (359 > cint))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_3.q
new file mode 100644
index 0000000000000..1d53994519db3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_3.q
@@ -0,0 +1,25 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT STDDEV_SAMP(csmallint),
+       (STDDEV_SAMP(csmallint) - 10.175),
+       STDDEV_POP(ctinyint),
+       (STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)),
+       (-(STDDEV_POP(ctinyint))),
+       (STDDEV_SAMP(csmallint) % 79.553),
+       (-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))),
+       STDDEV_SAMP(cfloat),
+       (-(STDDEV_SAMP(csmallint))),
+       SUM(cfloat),
+       ((-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))) / (STDDEV_SAMP(csmallint) - 10.175)),
+       (-((STDDEV_SAMP(csmallint) - 10.175))),
+       AVG(cint),
+       (-3728 - STDDEV_SAMP(csmallint)),
+       STDDEV_POP(cint),
+       (AVG(cint) / STDDEV_SAMP(cfloat))
+FROM   alltypesorc
+WHERE  (((cint <= cfloat)
+         AND ((79.553 != cbigint)
+              AND (ctimestamp2 = -29071)))
+        OR ((cbigint > cdouble)
+            AND ((79.553 <= csmallint)
+                 AND (ctimestamp1 > ctimestamp2))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_4.q
new file mode 100644
index 0000000000000..1eb324d1902e8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_4.q
@@ -0,0 +1,23 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT SUM(cint),
+       (SUM(cint) * -563),
+       (-3728 + SUM(cint)),
+       STDDEV_POP(cdouble),
+       (-(STDDEV_POP(cdouble))),
+       AVG(cdouble),
+       ((SUM(cint) * -563) % SUM(cint)),
+       (((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)),
+       VAR_POP(cdouble),
+       (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))),
+       ((-3728 + SUM(cint)) - (SUM(cint) * -563)),
+       MIN(ctinyint),
+       MIN(ctinyint),
+       (MIN(ctinyint) * (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))))
+FROM   alltypesorc
+WHERE  (((csmallint >= cint)
+         OR ((-89010 >= ctinyint)
+             AND (cdouble > 79.553)))
+        OR ((-563 != cbigint)
+            AND ((ctinyint != cbigint)
+                 OR (-3728 >= cdouble))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_5.q
new file mode 100644
index 0000000000000..826b20fd4f124
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_5.q
@@ -0,0 +1,20 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesorc
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_6.q
new file mode 100644
index 0000000000000..2b59f10ed89c3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_6.q
@@ -0,0 +1,21 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT cboolean1,
+       cfloat,
+       cstring1,
+       (988888 * csmallint),
+       (-(csmallint)),
+       (-(cfloat)),
+       (-26.28 / cfloat),
+       (cfloat * 359),
+       (cint % ctinyint),
+       (-(cdouble)),
+       (ctinyint - -75),
+       (762 * (cint % ctinyint))
+FROM   alltypesorc
+WHERE  ((ctinyint != 0)
+        AND ((((cboolean1 <= 0)
+          AND (cboolean2 >= cboolean1))
+          OR ((cbigint IS NOT NULL)
+              AND ((cstring2 LIKE '%a')
+                   OR (cfloat <= -257))))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_7.q
new file mode 100644
index 0000000000000..20c1148659195
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_7.q
@@ -0,0 +1,25 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT cboolean1,
+       cbigint,
+       csmallint,
+       ctinyint,
+       ctimestamp1,
+       cstring1,
+       (cbigint + cbigint),
+       (csmallint % -257),
+       (-(csmallint)),
+       (-(ctinyint)),
+       ((-(ctinyint)) + 17),
+       (cbigint * (-(csmallint))),
+       (cint % csmallint),
+       (-(ctinyint)),
+       ((-(ctinyint)) % ctinyint)
+FROM   alltypesorc
+WHERE  ((ctinyint != 0)
+        AND (((ctimestamp1 <= 0)
+          OR ((ctinyint = cint)
+               OR (cstring2 LIKE 'ss')))
+          AND ((988888 < cdouble)
+              OR ((ctimestamp2 > -29071)
+                  AND (3569 >= cdouble)))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_8.q
new file mode 100644
index 0000000000000..98b3385f165a9
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_8.q
@@ -0,0 +1,23 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT ctimestamp1,
+       cdouble,
+       cboolean1,
+       cstring1,
+       cfloat,
+       (-(cdouble)),
+       (-5638.15 - cdouble),
+       (cdouble * -257),
+       (cint + cfloat),
+       ((-(cdouble)) + cbigint),
+       (-(cdouble)),
+       (-1.389 - cfloat),
+       (-(cfloat)),
+       ((-5638.15 - cdouble) + (cint + cfloat))
+FROM   alltypesorc
+WHERE  (((cstring2 IS NOT NULL)
+         AND ((ctimestamp1 <= -29071)
+             AND (ctimestamp2 != 16558)))
+        OR ((cfloat < -6432)
+            OR ((cboolean1 IS NOT NULL)
+                AND (cdouble = 988888))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_9.q
new file mode 100644
index 0000000000000..252c426d64a5e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_9.q
@@ -0,0 +1,24 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   cfloat,
+         cstring1,
+         cint,
+         ctimestamp1,
+         cdouble,
+         cbigint,
+         (cfloat / ctinyint),
+         (cint % cbigint),
+         (-(cdouble)),
+         (cdouble + (cfloat / ctinyint)),
+         (cdouble / cint),
+         (-((-(cdouble)))),
+         (9763215.5639 % cbigint),
+         (2563.58 + (-((-(cdouble)))))
+FROM     alltypesorc
+WHERE    (((cbigint > -23)
+           AND ((cdouble != 988888)
+                OR (cint > -863.257)))
+          AND ((ctinyint >= 33)
+              OR ((csmallint >= cbigint)
+                  OR (cfloat = cdouble))))
+ORDER BY cbigint, cfloat;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_decimal_date.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_decimal_date.q
new file mode 100644
index 0000000000000..2b82a5aa0452a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_decimal_date.q
@@ -0,0 +1,4 @@
+CREATE TABLE date_decimal_test STORED AS ORC AS SELECT cint, cdouble, CAST (CAST (cint AS TIMESTAMP) AS DATE) AS cdate, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal FROM alltypesorc;
+SET hive.vectorized.execution.enabled=true;
+EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10;
+SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_div0.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_div0.q
new file mode 100644
index 0000000000000..69e388a28e411
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_div0.q
@@ -0,0 +1,24 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants
+explain
+select cdouble / 0.0 from alltypesorc limit 100;
+select cdouble / 0.0 from alltypesorc limit 100;
+
+-- There are no zeros in the table, but there is 988888, so use it as zero
+
+-- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators
+explain
+select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) 
+from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100;
+select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) 
+from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100;
+
+-- There are no zeros in the table, but there is -200.0, so use it as zero
+
+explain
+select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 1 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) 
+from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100;
+select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 1 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) 
+from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_limit.q
new file mode 100644
index 0000000000000..094a8d26a3842
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_limit.q
@@ -0,0 +1,37 @@
+SET hive.vectorized.execution.enabled=true;
+explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7;
+SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7;
+
+set hive.optimize.reducededuplication.min.reducer=1;
+set hive.limit.pushdown.memory.usage=0.3f;
+
+-- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown
+
+explain
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20;
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20;
+
+-- deduped RS
+explain
+select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20;
+select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20;
+
+-- distincts
+explain
+select distinct(ctinyint) from alltypesorc limit 20;
+select distinct(ctinyint) from alltypesorc limit 20;
+
+explain
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
+
+-- limit zero
+explain
+select ctinyint,cdouble from alltypesorc order by ctinyint limit 0;
+select ctinyint,cdouble from alltypesorc order by ctinyint limit 0;
+
+-- 2MR (applied to last RS)
+explain
+select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20;
+select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_nested_udf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_nested_udf.q
new file mode 100644
index 0000000000000..bb50f9b853287
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_nested_udf.q
@@ -0,0 +1,3 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT SUM(abs(ctinyint)) from alltypesorc;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_not.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_not.q
new file mode 100644
index 0000000000000..bfd3dd722190c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_not.q
@@ -0,0 +1,27 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT AVG(cbigint),
+       (-(AVG(cbigint))),
+       (-6432 + AVG(cbigint)),
+       STDDEV_POP(cbigint),
+       (-((-6432 + AVG(cbigint)))),
+       ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+       VAR_SAMP(cbigint),
+       (-((-6432 + AVG(cbigint)))),
+       (-6432 + (-((-6432 + AVG(cbigint))))),
+       (-((-6432 + AVG(cbigint)))),
+       ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+       COUNT(*),
+       SUM(cfloat),
+       (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+       (-(VAR_SAMP(cbigint))),
+       ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+       MIN(ctinyint),
+       (-(MIN(ctinyint)))
+FROM   alltypesorc
+WHERE  (((cstring2 LIKE '%b%')
+         OR ((79.553 != cint)
+             OR (NOT(cbigint >= cdouble))))
+        OR ((ctinyint >= csmallint)
+            AND (NOT ((cboolean2 != 1)
+                 OR (3569 != ctinyint)))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part.q
new file mode 100644
index 0000000000000..0e34585b5a8ba
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part.q
@@ -0,0 +1,7 @@
+SET hive.vectorized.execution.enabled=true;
+CREATE TABLE alltypesorc_part(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds string) STORED AS ORC;
+insert overwrite table alltypesorc_part partition (ds='2011') select * from alltypesorc limit 100;
+insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc limit 100;
+
+select count(cdouble), cint from alltypesorc_part where ds='2011' group by cint limit 10;
+select count(*) from alltypesorc_part A join alltypesorc_part B on A.ds=B.ds;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part_project.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part_project.q
new file mode 100644
index 0000000000000..c68ce56fd47db
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part_project.q
@@ -0,0 +1,7 @@
+SET hive.vectorized.execution.enabled=true;
+CREATE TABLE alltypesorc_part(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds string) STORED AS ORC;
+insert overwrite table alltypesorc_part partition (ds='2011') select * from alltypesorc limit 100;
+insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc limit 100;
+
+explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10;
+select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_pushdown.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_pushdown.q
new file mode 100644
index 0000000000000..bafe5504789e8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_pushdown.q
@@ -0,0 +1,4 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.optimize.index.filter=true;
+explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble;
+SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_short_regress.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_short_regress.q
new file mode 100644
index 0000000000000..638a31ff413b7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_short_regress.q
@@ -0,0 +1,852 @@
+SET hive.vectorized.execution.enabled=true;
+
+-- If you look at ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/OrcFileGenerator.java
+-- which is the data generation class you'll see that those values are specified in the
+-- initializeFixedPointValues for each data type. When I created the queries I usedthose values
+-- where I needed scalar values to ensure that when the queries executed their predicates would be
+-- filtering on values that are guaranteed to exist.
+
+-- Beyond those values, all the other data in the alltypesorc file is random, but there is a
+-- specific pattern to the data that is important for coverage. In orc and subsequently
+-- vectorization there are a number of optimizations for certain data patterns: AllValues, NoNulls,
+-- RepeatingValue, RepeatingNull. The data in alltypesorc is generated such that each column has
+-- exactly 3 batches of each data pattern. This gives us coverage for the vector expression
+-- optimizations and ensure the metadata in appropriately set on the row batch object which are
+-- reused across batches. 
+
+-- For the queries themselves in order to efficiently cover as much of the new vectorization
+-- functionality as I could I used a number of different techniques to create the
+-- vectorization_short_regress.q test suite, primarily equivalence classes, and pairwise
+-- combinations.
+
+-- First I divided the search space into a number of dimensions such as type, aggregate function,
+-- filter operation, arithmetic operation, etc. The types were explored as equivalence classes of
+-- long, double, time, string, and bool. Also, rather than creating a very large number of small
+-- queries the resulting vectors were grouped by compatible dimensions to reduce the number of
+-- queries.
+
+-- TargetTypeClasses: Long, Timestamp, Double, String, Bool
+-- Functions: Avg, Sum, StDevP, StDev, Var, Min, Count
+-- ArithmeticOps: Add, Multiply, Subtract, Divide
+-- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual
+-- GroupBy: NoGroupByProjectAggs
+EXPLAIN SELECT AVG(cint),
+       (AVG(cint) + -3728),
+       (-((AVG(cint) + -3728))),
+       (-((-((AVG(cint) + -3728))))),
+       ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)),
+       SUM(cdouble),
+       (-(AVG(cint))),
+       STDDEV_POP(cint),
+       (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * (-((-((AVG(cint) + -3728)))))),
+       STDDEV_SAMP(csmallint),
+       (-(STDDEV_POP(cint))),
+       (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))),
+       ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)),
+       VAR_SAMP(cint),
+       AVG(cfloat),
+       (10.175 - VAR_SAMP(cint)),
+       (-((10.175 - VAR_SAMP(cint)))),
+       ((-(STDDEV_POP(cint))) / -563),
+       STDDEV_SAMP(cint),
+       (-(((-(STDDEV_POP(cint))) / -563))),
+       (AVG(cint) / SUM(cdouble)),
+       MIN(ctinyint),
+       COUNT(csmallint),
+       (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)),
+       (-((AVG(cint) / SUM(cdouble))))
+FROM   alltypesorc
+WHERE  ((762 = cbigint)
+        OR ((csmallint < cfloat)
+            AND ((ctimestamp2 > -10669)
+                 AND (cdouble != cint)))
+        OR (cstring1 = 'a')
+           OR ((cbigint <= -1.389)
+               AND ((cstring2 != 'a')
+                    AND ((79.553 != cint)
+                         AND (cboolean2 != cboolean1)))));
+SELECT AVG(cint),
+       (AVG(cint) + -3728),
+       (-((AVG(cint) + -3728))),
+       (-((-((AVG(cint) + -3728))))),
+       ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)),
+       SUM(cdouble),
+       (-(AVG(cint))),
+       STDDEV_POP(cint),
+       (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * (-((-((AVG(cint) + -3728)))))),
+       STDDEV_SAMP(csmallint),
+       (-(STDDEV_POP(cint))),
+       (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))),
+       ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)),
+       VAR_SAMP(cint),
+       AVG(cfloat),
+       (10.175 - VAR_SAMP(cint)),
+       (-((10.175 - VAR_SAMP(cint)))),
+       ((-(STDDEV_POP(cint))) / -563),
+       STDDEV_SAMP(cint),
+       (-(((-(STDDEV_POP(cint))) / -563))),
+       (AVG(cint) / SUM(cdouble)),
+       MIN(ctinyint),
+       COUNT(csmallint),
+       (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)),
+       (-((AVG(cint) / SUM(cdouble))))
+FROM   alltypesorc
+WHERE  ((762 = cbigint)
+        OR ((csmallint < cfloat)
+            AND ((ctimestamp2 > -10669)
+                 AND (cdouble != cint)))
+        OR (cstring1 = 'a')
+           OR ((cbigint <= -1.389)
+               AND ((cstring2 != 'a')
+                    AND ((79.553 != cint)
+                         AND (cboolean2 != cboolean1)))));
+
+-- TargetTypeClasses: Long, Bool, Double, String, Timestamp
+-- Functions: Max, VarP, StDevP, Avg, Min, StDev, Var
+-- ArithmeticOps: Divide, Multiply, Remainder, Subtract
+-- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike
+-- GroupBy: NoGroupByProjectAggs
+EXPLAIN SELECT MAX(cint),
+       (MAX(cint) / -3728),
+       (MAX(cint) * -3728),
+       VAR_POP(cbigint),
+       (-((MAX(cint) * -3728))),
+       STDDEV_POP(csmallint),
+       (-563 % (MAX(cint) * -3728)),
+       (VAR_POP(cbigint) / STDDEV_POP(csmallint)),
+       (-(STDDEV_POP(csmallint))),
+       MAX(cdouble),
+       AVG(ctinyint),
+       (STDDEV_POP(csmallint) - 10.175),
+       MIN(cint),
+       ((MAX(cint) * -3728) % (STDDEV_POP(csmallint) - 10.175)),
+       (-(MAX(cdouble))),
+       MIN(cdouble),
+       (MAX(cdouble) % -26.28),
+       STDDEV_SAMP(csmallint),
+       (-((MAX(cint) / -3728))),
+       ((-((MAX(cint) * -3728))) % (-563 % (MAX(cint) * -3728))),
+       ((MAX(cint) / -3728) - AVG(ctinyint)),
+       (-((MAX(cint) * -3728))),
+       VAR_SAMP(cint)
+FROM   alltypesorc
+WHERE  (((cbigint <= 197)
+         AND (cint < cbigint))
+        OR ((cdouble >= -26.28)
+            AND (csmallint > cdouble))
+        OR ((ctinyint > cfloat)
+            AND (cstring1 RLIKE '.*ss.*'))
+           OR ((cfloat > 79.553)
+               AND (cstring2 LIKE '10%')));
+SELECT MAX(cint),
+       (MAX(cint) / -3728),
+       (MAX(cint) * -3728),
+       VAR_POP(cbigint),
+       (-((MAX(cint) * -3728))),
+       STDDEV_POP(csmallint),
+       (-563 % (MAX(cint) * -3728)),
+       (VAR_POP(cbigint) / STDDEV_POP(csmallint)),
+       (-(STDDEV_POP(csmallint))),
+       MAX(cdouble),
+       AVG(ctinyint),
+       (STDDEV_POP(csmallint) - 10.175),
+       MIN(cint),
+       ((MAX(cint) * -3728) % (STDDEV_POP(csmallint) - 10.175)),
+       (-(MAX(cdouble))),
+       MIN(cdouble),
+       (MAX(cdouble) % -26.28),
+       STDDEV_SAMP(csmallint),
+       (-((MAX(cint) / -3728))),
+       ((-((MAX(cint) * -3728))) % (-563 % (MAX(cint) * -3728))),
+       ((MAX(cint) / -3728) - AVG(ctinyint)),
+       (-((MAX(cint) * -3728))),
+       VAR_SAMP(cint)
+FROM   alltypesorc
+WHERE  (((cbigint <= 197)
+         AND (cint < cbigint))
+        OR ((cdouble >= -26.28)
+            AND (csmallint > cdouble))
+        OR ((ctinyint > cfloat)
+            AND (cstring1 RLIKE '.*ss.*'))
+           OR ((cfloat > 79.553)
+               AND (cstring2 LIKE '10%')));
+
+-- TargetTypeClasses: String, Long, Bool, Double, Timestamp
+-- Functions: VarP, Count, Max, StDevP, StDev, Avg
+-- ArithmeticOps: Subtract, Remainder, Multiply, Add
+-- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan
+-- GroupBy: NoGroupByProjectAggs
+EXPLAIN SELECT VAR_POP(cbigint),
+       (-(VAR_POP(cbigint))),
+       (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))),
+       COUNT(*),
+       (COUNT(*) % 79.553),
+       MAX(ctinyint),
+       (COUNT(*) - (-(VAR_POP(cbigint)))),
+       (-((-(VAR_POP(cbigint))))),
+       (-1 % (-(VAR_POP(cbigint)))),
+       COUNT(*),
+       (-(COUNT(*))),
+       STDDEV_POP(csmallint),
+       (-((-((-(VAR_POP(cbigint))))))),
+       (762 * (-(COUNT(*)))),
+       MAX(cint),
+       (MAX(ctinyint) + (762 * (-(COUNT(*))))),
+       ((-(VAR_POP(cbigint))) + MAX(cint)),
+       STDDEV_SAMP(cdouble),
+       ((-(COUNT(*))) % COUNT(*)),
+       COUNT(ctinyint),
+       AVG(ctinyint),
+       (-3728 % (MAX(ctinyint) + (762 * (-(COUNT(*))))))
+FROM   alltypesorc
+WHERE  ((ctimestamp1 = ctimestamp2)
+        OR (762 = cfloat)
+        OR (cstring1 = 'ss')
+           OR ((csmallint <= cbigint)
+               AND (1 = cboolean2))
+              OR ((cboolean1 IS NOT NULL)
+                  AND ((ctimestamp2 IS NOT NULL)
+                       AND (cstring2 > 'a'))));
+SELECT VAR_POP(cbigint),
+       (-(VAR_POP(cbigint))),
+       (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))),
+       COUNT(*),
+       (COUNT(*) % 79.553),
+       MAX(ctinyint),
+       (COUNT(*) - (-(VAR_POP(cbigint)))),
+       (-((-(VAR_POP(cbigint))))),
+       (-1 % (-(VAR_POP(cbigint)))),
+       COUNT(*),
+       (-(COUNT(*))),
+       STDDEV_POP(csmallint),
+       (-((-((-(VAR_POP(cbigint))))))),
+       (762 * (-(COUNT(*)))),
+       MAX(cint),
+       (MAX(ctinyint) + (762 * (-(COUNT(*))))),
+       ((-(VAR_POP(cbigint))) + MAX(cint)),
+       STDDEV_SAMP(cdouble),
+       ((-(COUNT(*))) % COUNT(*)),
+       COUNT(ctinyint),
+       AVG(ctinyint),
+       (-3728 % (MAX(ctinyint) + (762 * (-(COUNT(*))))))
+FROM   alltypesorc
+WHERE  ((ctimestamp1 = ctimestamp2)
+        OR (762 = cfloat)
+        OR (cstring1 = 'ss')
+           OR ((csmallint <= cbigint)
+               AND (1 = cboolean2))
+              OR ((cboolean1 IS NOT NULL)
+                  AND ((ctimestamp2 IS NOT NULL)
+                       AND (cstring2 > 'a'))));
+
+-- TargetTypeClasses: String, Bool, Timestamp, Long, Double
+-- Functions: Avg, Max, StDev, VarP
+-- ArithmeticOps: Add, Divide, Remainder, Multiply
+-- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal
+-- GroupBy: NoGroupByProjectAggs
+EXPLAIN SELECT AVG(ctinyint),
+       (AVG(ctinyint) + 6981),
+       ((AVG(ctinyint) + 6981) + AVG(ctinyint)),
+       MAX(cbigint),
+       (((AVG(ctinyint) + 6981) + AVG(ctinyint)) / AVG(ctinyint)),
+       (-((AVG(ctinyint) + 6981))),
+       STDDEV_SAMP(cint),
+       (AVG(ctinyint) % (-((AVG(ctinyint) + 6981)))),
+       VAR_POP(cint),
+       VAR_POP(cbigint),
+       (-(MAX(cbigint))),
+       ((-(MAX(cbigint))) / STDDEV_SAMP(cint)),
+       MAX(cfloat),
+       (VAR_POP(cbigint) * -26.28)
+FROM   alltypesorc
+WHERE  (((ctimestamp2 <= ctimestamp1)
+         AND ((cbigint != cdouble)
+              AND ('ss' <= cstring1)))
+        OR ((csmallint < ctinyint)
+            AND (ctimestamp1 >= 0))
+           OR (cfloat = 17));
+SELECT AVG(ctinyint),
+       (AVG(ctinyint) + 6981),
+       ((AVG(ctinyint) + 6981) + AVG(ctinyint)),
+       MAX(cbigint),
+       (((AVG(ctinyint) + 6981) + AVG(ctinyint)) / AVG(ctinyint)),
+       (-((AVG(ctinyint) + 6981))),
+       STDDEV_SAMP(cint),
+       (AVG(ctinyint) % (-((AVG(ctinyint) + 6981)))),
+       VAR_POP(cint),
+       VAR_POP(cbigint),
+       (-(MAX(cbigint))),
+       ((-(MAX(cbigint))) / STDDEV_SAMP(cint)),
+       MAX(cfloat),
+       (VAR_POP(cbigint) * -26.28)
+FROM   alltypesorc
+WHERE  (((ctimestamp2 <= ctimestamp1)
+         AND ((cbigint != cdouble)
+              AND ('ss' <= cstring1)))
+        OR ((csmallint < ctinyint)
+            AND (ctimestamp1 >= 0))
+           OR (cfloat = 17));
+
+-- TargetTypeClasses: Timestamp, String, Long, Double, Bool
+-- Functions: Max, Avg, Min, Var, StDev, Count, StDevP, Sum
+-- ArithmeticOps: Multiply, Subtract, Add, Divide
+-- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike
+-- GroupBy: NoGroupByProjectColumns
+EXPLAIN SELECT cint,
+       cdouble,
+       ctimestamp2,
+       cstring1,
+       cboolean2,
+       ctinyint,
+       cfloat,
+       ctimestamp1,
+       csmallint,
+       cbigint,
+       (-3728 * cbigint),
+       (-(cint)),
+       (-863.257 - cint),
+       (-(csmallint)),
+       (csmallint - (-(csmallint))),
+       ((csmallint - (-(csmallint))) + (-(csmallint))),
+       (cint / cint),
+       ((-863.257 - cint) - -26.28),
+       (-(cfloat)),
+       (cdouble * -89010),
+       (ctinyint / 988888),
+       (-(ctinyint)),
+       (79.553 / ctinyint)
+FROM   alltypesorc
+WHERE  (((cstring1 RLIKE 'a.*')
+         AND (cstring2 LIKE '%ss%'))
+        OR ((1 != cboolean2)
+            AND ((csmallint < 79.553)
+                 AND (-257 != ctinyint)))
+        OR ((cdouble > ctinyint)
+            AND (cfloat >= cint))
+           OR ((cint < cbigint)
+               AND (ctinyint > cbigint)));
+SELECT cint,
+       cdouble,
+       ctimestamp2,
+       cstring1,
+       cboolean2,
+       ctinyint,
+       cfloat,
+       ctimestamp1,
+       csmallint,
+       cbigint,
+       (-3728 * cbigint),
+       (-(cint)),
+       (-863.257 - cint),
+       (-(csmallint)),
+       (csmallint - (-(csmallint))),
+       ((csmallint - (-(csmallint))) + (-(csmallint))),
+       (cint / cint),
+       ((-863.257 - cint) - -26.28),
+       (-(cfloat)),
+       (cdouble * -89010),
+       (ctinyint / 988888),
+       (-(ctinyint)),
+       (79.553 / ctinyint)
+FROM   alltypesorc
+WHERE  (((cstring1 RLIKE 'a.*')
+         AND (cstring2 LIKE '%ss%'))
+        OR ((1 != cboolean2)
+            AND ((csmallint < 79.553)
+                 AND (-257 != ctinyint)))
+        OR ((cdouble > ctinyint)
+            AND (cfloat >= cint))
+           OR ((cint < cbigint)
+               AND (ctinyint > cbigint)));
+
+-- TargetTypeClasses: Long, String, Double, Bool, Timestamp
+-- Functions: VarP, Var, StDev, StDevP, Max, Sum
+-- ArithmeticOps: Divide, Remainder, Subtract, Multiply
+-- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan
+-- GroupBy: NoGroupByProjectColumns
+EXPLAIN SELECT cint,
+       cbigint,
+       cstring1,
+       cboolean1,
+       cfloat,
+       cdouble,
+       ctimestamp2,
+       csmallint,
+       cstring2,
+       cboolean2,
+       (cint / cbigint),
+       (cbigint % 79.553),
+       (-((cint / cbigint))),
+       (10.175 % cfloat),
+       (-(cfloat)),
+       (cfloat - (-(cfloat))),
+       ((cfloat - (-(cfloat))) % -6432),
+       (cdouble * csmallint),
+       (-(cdouble)),
+       (-(cbigint)),
+       (cfloat - (cint / cbigint)),
+       (-(csmallint)),
+       (3569 % cbigint),
+       (359 - cdouble),
+       (-(csmallint))
+FROM   alltypesorc
+WHERE  (((197 > ctinyint)
+         AND (cint = cbigint))
+        OR (cbigint = 359)
+        OR (cboolean1 < 0)
+           OR ((cstring1 LIKE '%ss')
+               AND (cfloat <= ctinyint)));
+
+SELECT cint,
+       cbigint,
+       cstring1,
+       cboolean1,
+       cfloat,
+       cdouble,
+       ctimestamp2,
+       csmallint,
+       cstring2,
+       cboolean2,
+       (cint / cbigint),
+       (cbigint % 79.553),
+       (-((cint / cbigint))),
+       (10.175 % cfloat),
+       (-(cfloat)),
+       (cfloat - (-(cfloat))),
+       ((cfloat - (-(cfloat))) % -6432),
+       (cdouble * csmallint),
+       (-(cdouble)),
+       (-(cbigint)),
+       (cfloat - (cint / cbigint)),
+       (-(csmallint)),
+       (3569 % cbigint),
+       (359 - cdouble),
+       (-(csmallint))
+FROM   alltypesorc
+WHERE  (((197 > ctinyint)
+         AND (cint = cbigint))
+        OR (cbigint = 359)
+        OR (cboolean1 < 0)
+           OR ((cstring1 LIKE '%ss')
+               AND (cfloat <= ctinyint)));
+
+-- TargetTypeClasses: String, Bool, Double, Long, Timestamp
+-- Functions: Sum, Max, Avg, Var, StDevP, VarP
+-- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder
+-- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan
+-- GroupBy: NoGroupByProjectColumns
+EXPLAIN SELECT   cint,
+         cstring1,
+         cboolean2,
+         ctimestamp2,
+         cdouble,
+         cfloat,
+         cbigint,
+         csmallint,
+         cboolean1,
+         (cint + csmallint),
+         (cbigint - ctinyint),
+         (-(cbigint)),
+         (-(cfloat)),
+         ((cbigint - ctinyint) + cbigint),
+         (cdouble / cdouble),
+         (-(cdouble)),
+         ((cint + csmallint) * (-(cbigint))),
+         ((-(cdouble)) + cbigint),
+         (-1.389 / ctinyint),
+         (cbigint % cdouble),
+         (-(csmallint)),
+         (csmallint + (cint + csmallint))
+FROM     alltypesorc
+WHERE    (((csmallint > -26.28)
+           AND (cstring2 LIKE 'ss'))
+          OR ((cdouble <= cbigint)
+              AND ((cstring1 >= 'ss')
+                   AND (cint != cdouble)))
+          OR (ctinyint = -89010)
+             OR ((cbigint <= cfloat)
+                 AND (-26.28 <= csmallint)))
+ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble;
+SELECT   cint,
+         cstring1,
+         cboolean2,
+         ctimestamp2,
+         cdouble,
+         cfloat,
+         cbigint,
+         csmallint,
+         cboolean1,
+         (cint + csmallint),
+         (cbigint - ctinyint),
+         (-(cbigint)),
+         (-(cfloat)),
+         ((cbigint - ctinyint) + cbigint),
+         (cdouble / cdouble),
+         (-(cdouble)),
+         ((cint + csmallint) * (-(cbigint))),
+         ((-(cdouble)) + cbigint),
+         (-1.389 / ctinyint),
+         (cbigint % cdouble),
+         (-(csmallint)),
+         (csmallint + (cint + csmallint))
+FROM     alltypesorc
+WHERE    (((csmallint > -26.28)
+           AND (cstring2 LIKE 'ss'))
+          OR ((cdouble <= cbigint)
+              AND ((cstring1 >= 'ss')
+                   AND (cint != cdouble)))
+          OR (ctinyint = -89010)
+             OR ((cbigint <= cfloat)
+                 AND (-26.28 <= csmallint)))
+ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble;
+
+-- TargetTypeClasses: Long, String, Double, Timestamp
+-- Functions: Avg, Min, StDevP, Sum, Var
+-- ArithmeticOps: Divide, Subtract, Multiply, Remainder
+-- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like
+-- GroupBy: NoGroupByProjectColumns
+EXPLAIN SELECT   ctimestamp1,
+         cstring2,
+         cdouble,
+         cfloat,
+         cbigint,
+         csmallint,
+         (cbigint / 3569),
+         (-257 - csmallint),
+         (-6432 * cfloat),
+         (-(cdouble)),
+         (cdouble * 10.175),
+         ((-6432 * cfloat) / cfloat),
+         (-(cfloat)),
+         (cint % csmallint),
+         (-(cdouble)),
+         (cdouble * (-(cdouble)))
+FROM     alltypesorc
+WHERE    (((-1.389 >= cint)
+           AND ((csmallint < ctinyint)
+                AND (-6432 > csmallint)))
+          OR ((cdouble >= cfloat)
+              AND (cstring2 <= 'a'))
+             OR ((cstring1 LIKE 'ss%')
+                 AND (10.175 > cbigint)))
+ORDER BY csmallint, cstring2, cdouble;
+SELECT   ctimestamp1,
+         cstring2,
+         cdouble,
+         cfloat,
+         cbigint,
+         csmallint,
+         (cbigint / 3569),
+         (-257 - csmallint),
+         (-6432 * cfloat),
+         (-(cdouble)),
+         (cdouble * 10.175),
+         ((-6432 * cfloat) / cfloat),
+         (-(cfloat)),
+         (cint % csmallint),
+         (-(cdouble)),
+         (cdouble * (-(cdouble)))
+FROM     alltypesorc
+WHERE    (((-1.389 >= cint)
+           AND ((csmallint < ctinyint)
+                AND (-6432 > csmallint)))
+          OR ((cdouble >= cfloat)
+              AND (cstring2 <= 'a'))
+             OR ((cstring1 LIKE 'ss%')
+                 AND (10.175 > cbigint)))
+ORDER BY csmallint, cstring2, cdouble;
+
+-- TargetTypeClasses: Double, String, Long
+-- Functions: StDev, Sum, VarP, Count
+-- ArithmeticOps: Remainder, Divide, Subtract
+-- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual
+-- GroupBy: GroupBy
+EXPLAIN SELECT   csmallint,
+         (csmallint % -75),
+         STDDEV_SAMP(csmallint),
+         (-1.389 / csmallint),
+         SUM(cbigint),
+         ((csmallint % -75) / SUM(cbigint)),
+         (-((csmallint % -75))),
+         VAR_POP(ctinyint),
+         (-((-((csmallint % -75))))),
+         COUNT(*),
+         (COUNT(*) - -89010)
+FROM     alltypesorc
+WHERE    (((csmallint >= -257))
+          AND ((-6432 = csmallint)
+               OR ((cint >= cdouble)
+                   AND (ctinyint <= cint))))
+GROUP BY csmallint
+ORDER BY csmallint;
+SELECT   csmallint,
+         (csmallint % -75),
+         STDDEV_SAMP(csmallint),
+         (-1.389 / csmallint),
+         SUM(cbigint),
+         ((csmallint % -75) / SUM(cbigint)),
+         (-((csmallint % -75))),
+         VAR_POP(ctinyint),
+         (-((-((csmallint % -75))))),
+         COUNT(*),
+         (COUNT(*) - -89010)
+FROM     alltypesorc
+WHERE    (((csmallint >= -257))
+          AND ((-6432 = csmallint)
+               OR ((cint >= cdouble)
+                   AND (ctinyint <= cint))))
+GROUP BY csmallint
+ORDER BY csmallint;
+
+-- TargetTypeClasses: Long, Double, Timestamp
+-- Functions: Var, Count, Sum, VarP, StDevP
+-- ArithmeticOps: Multiply, Add, Subtract, Remainder
+-- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual
+-- GroupBy: GroupBy
+EXPLAIN SELECT   cdouble,
+         VAR_SAMP(cdouble),
+         (2563.58 * VAR_SAMP(cdouble)),
+         (-(VAR_SAMP(cdouble))),
+         COUNT(cfloat),
+         ((2563.58 * VAR_SAMP(cdouble)) + -5638.15),
+         ((-(VAR_SAMP(cdouble))) * ((2563.58 * VAR_SAMP(cdouble)) + -5638.15)),
+         SUM(cfloat),
+         VAR_POP(cdouble),
+         (cdouble - (-(VAR_SAMP(cdouble)))),
+         STDDEV_POP(cdouble),
+         (cdouble + VAR_SAMP(cdouble)),
+         (cdouble * 762),
+         SUM(cdouble),
+         (-863.257 % (cdouble * 762)),
+         SUM(cdouble)
+FROM     alltypesorc
+WHERE    (((cdouble > 2563.58))
+          AND (((cbigint >= cint)
+                AND ((csmallint < cint)
+                     AND (cfloat < -5638.15)))
+               OR (2563.58 = ctinyint)
+                  OR ((cdouble <= cbigint)
+                      AND (-5638.15 > cbigint))))
+GROUP BY cdouble
+ORDER BY cdouble;
+SELECT   cdouble,
+         VAR_SAMP(cdouble),
+         (2563.58 * VAR_SAMP(cdouble)),
+         (-(VAR_SAMP(cdouble))),
+         COUNT(cfloat),
+         ((2563.58 * VAR_SAMP(cdouble)) + -5638.15),
+         ((-(VAR_SAMP(cdouble))) * ((2563.58 * VAR_SAMP(cdouble)) + -5638.15)),
+         SUM(cfloat),
+         VAR_POP(cdouble),
+         (cdouble - (-(VAR_SAMP(cdouble)))),
+         STDDEV_POP(cdouble),
+         (cdouble + VAR_SAMP(cdouble)),
+         (cdouble * 762),
+         SUM(cdouble),
+         (-863.257 % (cdouble * 762)),
+         SUM(cdouble)
+FROM     alltypesorc
+WHERE    (((cdouble > 2563.58))
+          AND (((cbigint >= cint)
+                AND ((csmallint < cint)
+                     AND (cfloat < -5638.15)))
+               OR (2563.58 = ctinyint)
+                  OR ((cdouble <= cbigint)
+                      AND (-5638.15 > cbigint))))
+GROUP BY cdouble
+ORDER BY cdouble;
+
+-- TargetTypeClasses: Bool, Timestamp, String, Double, Long
+-- Functions: StDevP, Avg, Count, Min, Var, VarP, Sum
+-- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder
+-- FilterOps: NotEqual, LessThan, Like, Equal, RLike
+-- GroupBy: GroupBy
+EXPLAIN SELECT   ctimestamp1,
+         cstring1,
+         STDDEV_POP(cint),
+         (STDDEV_POP(cint) * 10.175),
+         (-(STDDEV_POP(cint))),
+         AVG(csmallint),
+         (-(STDDEV_POP(cint))),
+         (-26.28 - STDDEV_POP(cint)),
+         COUNT(*),
+         (-(COUNT(*))),
+         ((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))),
+         MIN(ctinyint),
+         (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*)))),
+         (-((STDDEV_POP(cint) * 10.175))),
+         VAR_SAMP(csmallint),
+         (VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))),
+         (-((-(STDDEV_POP(cint))))),
+         ((-(COUNT(*))) / STDDEV_POP(cint)),
+         VAR_POP(cfloat),
+         (10.175 / AVG(csmallint)),
+         AVG(cint),
+         VAR_SAMP(cfloat),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) - (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))),
+         (-((-((STDDEV_POP(cint) * 10.175))))),
+         AVG(cfloat),
+         (((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) - (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) * 10.175),
+         (10.175 % (10.175 / AVG(csmallint))),
+         (-(MIN(ctinyint))),
+         MIN(cdouble),
+         VAR_POP(csmallint),
+         (-(((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))))),
+         ((-(STDDEV_POP(cint))) % AVG(cfloat)),
+         (-26.28 / (-(MIN(ctinyint)))),
+         STDDEV_POP(ctinyint),
+         SUM(cint),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) / VAR_POP(cfloat)),
+         (-((-(COUNT(*))))),
+         COUNT(*),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) % -26.28)
+FROM     alltypesorc
+WHERE    (((ctimestamp1 != 0))
+          AND ((((-257 != ctinyint)
+                 AND (cboolean2 IS NOT NULL))
+                AND ((cstring1 RLIKE '.*ss')
+                     AND (-10669 < ctimestamp1)))
+               OR (ctimestamp2 = -10669)
+               OR ((ctimestamp1 < 0)
+                   AND (cstring2 LIKE '%b%'))
+                  OR (cdouble = cint)
+                     OR ((cboolean1 IS NULL)
+                         AND (cfloat < cint))))
+GROUP BY ctimestamp1, cstring1;
+SELECT   ctimestamp1,
+         cstring1,
+         STDDEV_POP(cint),
+         (STDDEV_POP(cint) * 10.175),
+         (-(STDDEV_POP(cint))),
+         AVG(csmallint),
+         (-(STDDEV_POP(cint))),
+         (-26.28 - STDDEV_POP(cint)),
+         COUNT(*),
+         (-(COUNT(*))),
+         ((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))),
+         MIN(ctinyint),
+         (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*)))),
+         (-((STDDEV_POP(cint) * 10.175))),
+         VAR_SAMP(csmallint),
+         (VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))),
+         (-((-(STDDEV_POP(cint))))),
+         ((-(COUNT(*))) / STDDEV_POP(cint)),
+         VAR_POP(cfloat),
+         (10.175 / AVG(csmallint)),
+         AVG(cint),
+         VAR_SAMP(cfloat),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) - (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))),
+         (-((-((STDDEV_POP(cint) * 10.175))))),
+         AVG(cfloat),
+         (((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) - (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) * 10.175),
+         (10.175 % (10.175 / AVG(csmallint))),
+         (-(MIN(ctinyint))),
+         MIN(cdouble),
+         VAR_POP(csmallint),
+         (-(((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))))),
+         ((-(STDDEV_POP(cint))) % AVG(cfloat)),
+         (-26.28 / (-(MIN(ctinyint)))),
+         STDDEV_POP(ctinyint),
+         SUM(cint),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) / VAR_POP(cfloat)),
+         (-((-(COUNT(*))))),
+         COUNT(*),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) % -26.28)
+FROM     alltypesorc
+WHERE    (((ctimestamp1 != 0))
+          AND ((((-257 != ctinyint)
+                 AND (cboolean2 IS NOT NULL))
+                AND ((cstring1 RLIKE '.*ss')
+                     AND (-10669 < ctimestamp1)))
+               OR (ctimestamp2 = -10669)
+               OR ((ctimestamp1 < 0)
+                   AND (cstring2 LIKE '%b%'))
+                  OR (cdouble = cint)
+                     OR ((cboolean1 IS NULL)
+                         AND (cfloat < cint))))
+GROUP BY ctimestamp1, cstring1;
+
+-- TargetTypeClasses: Double, Long, String, Timestamp, Bool
+-- Functions: Max, Sum, Var, Avg, Min, VarP, StDev, StDevP
+-- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply
+-- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike
+-- GroupBy: GroupBy
+EXPLAIN SELECT   cboolean1,
+         MAX(cfloat),
+         (-(MAX(cfloat))),
+         (-26.28 / MAX(cfloat)),
+         SUM(cbigint),
+         (SUM(cbigint) - 10.175),
+         VAR_SAMP(cint),
+         (VAR_SAMP(cint) % MAX(cfloat)),
+         (10.175 + (-(MAX(cfloat)))),
+         AVG(cdouble),
+         ((SUM(cbigint) - 10.175) + VAR_SAMP(cint)),
+         MIN(cbigint),
+         VAR_POP(cbigint),
+         (-((10.175 + (-(MAX(cfloat)))))),
+         (79.553 / VAR_POP(cbigint)),
+         (VAR_SAMP(cint) % (79.553 / VAR_POP(cbigint))),
+         (-((10.175 + (-(MAX(cfloat)))))),
+         SUM(cint),
+         STDDEV_SAMP(ctinyint),
+         (-1.389 * MIN(cbigint)),
+         (SUM(cint) - (-1.389 * MIN(cbigint))),
+         STDDEV_POP(csmallint),
+         (-((SUM(cint) - (-1.389 * MIN(cbigint))))),
+         AVG(cint),
+         (-(AVG(cint))),
+         (AVG(cint) * SUM(cint))
+FROM     alltypesorc
+WHERE    (((cboolean1 IS NOT NULL))
+          AND (((cdouble < csmallint)
+                AND ((cboolean2 = cboolean1)
+                     AND (cbigint <= -863.257)))
+               OR ((cint >= -257)
+                   AND ((cstring1 IS NOT NULL)
+                        AND (cboolean1 >= 1)))
+               OR (cstring2 RLIKE 'b')
+                  OR ((csmallint >= ctinyint)
+                      AND (ctimestamp2 IS NULL))))
+GROUP BY cboolean1
+ORDER BY cboolean1;
+SELECT   cboolean1,
+         MAX(cfloat),
+         (-(MAX(cfloat))),
+         (-26.28 / MAX(cfloat)),
+         SUM(cbigint),
+         (SUM(cbigint) - 10.175),
+         VAR_SAMP(cint),
+         (VAR_SAMP(cint) % MAX(cfloat)),
+         (10.175 + (-(MAX(cfloat)))),
+         AVG(cdouble),
+         ((SUM(cbigint) - 10.175) + VAR_SAMP(cint)),
+         MIN(cbigint),
+         VAR_POP(cbigint),
+         (-((10.175 + (-(MAX(cfloat)))))),
+         (79.553 / VAR_POP(cbigint)),
+         (VAR_SAMP(cint) % (79.553 / VAR_POP(cbigint))),
+         (-((10.175 + (-(MAX(cfloat)))))),
+         SUM(cint),
+         STDDEV_SAMP(ctinyint),
+         (-1.389 * MIN(cbigint)),
+         (SUM(cint) - (-1.389 * MIN(cbigint))),
+         STDDEV_POP(csmallint),
+         (-((SUM(cint) - (-1.389 * MIN(cbigint))))),
+         AVG(cint),
+         (-(AVG(cint))),
+         (AVG(cint) * SUM(cint))
+FROM     alltypesorc
+WHERE    (((cboolean1 IS NOT NULL))
+          AND (((cdouble < csmallint)
+                AND ((cboolean2 = cboolean1)
+                     AND (cbigint <= -863.257)))
+               OR ((cint >= -257)
+                   AND ((cstring1 IS NOT NULL)
+                        AND (cboolean1 >= 1)))
+               OR (cstring2 RLIKE 'b')
+                  OR ((csmallint >= ctinyint)
+                      AND (ctimestamp2 IS NULL))))
+GROUP BY cboolean1
+ORDER BY cboolean1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q
new file mode 100644
index 0000000000000..e309713795af8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q
@@ -0,0 +1,46 @@
+create table vsmb_bucket_1(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS ORC;
+create table vsmb_bucket_2(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS ORC;
+
+create table vsmb_bucket_RC(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS RCFILE;
+
+create table vsmb_bucket_TXT(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS TEXTFILE;
+  
+insert into table vsmb_bucket_1 select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_2 select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_RC select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_TXT select cint, cstring1 from alltypesorc limit 2;  
+
+set hive.vectorized.execution.enabled=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.sortmerge.join.noconditionaltask = true;
+set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+
+explain
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key;
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key;
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key;
+
+-- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box
+-- explain
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key;
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_case.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_case.q
new file mode 100644
index 0000000000000..e448d51f6bc30
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_case.q
@@ -0,0 +1,37 @@
+set hive.vectorized.execution.enabled = true
+;
+explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else "c"
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then "b"
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+;
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else "c"
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then "b"
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_casts.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_casts.q
new file mode 100644
index 0000000000000..3f818b18534af
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_casts.q
@@ -0,0 +1,149 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- Test type casting in vectorized mode to verify end-to-end functionality.
+
+explain 
+select 
+-- to boolean
+   cast (ctinyint as boolean)
+  ,cast (csmallint as boolean)
+  ,cast (cint as boolean)
+  ,cast (cbigint as boolean)
+  ,cast (cfloat as boolean)
+  ,cast (cdouble as boolean)
+  ,cast (cboolean1 as boolean)
+  ,cast (cbigint * 0 as boolean)
+  ,cast (ctimestamp1 as boolean)
+  ,cast (cstring1 as boolean)
+-- to int family
+  ,cast (ctinyint as int)
+  ,cast (csmallint as int)
+  ,cast (cint as int)
+  ,cast (cbigint as int)
+  ,cast (cfloat as int)
+  ,cast (cdouble as int)
+  ,cast (cboolean1 as int)
+  ,cast (ctimestamp1 as int)
+  ,cast (cstring1 as int)
+  ,cast (substr(cstring1, 1, 1) as int)
+  ,cast (cfloat as tinyint)
+  ,cast (cfloat as smallint)
+  ,cast (cfloat as bigint)
+-- to float family
+  ,cast (ctinyint as double)
+  ,cast (csmallint as double)
+  ,cast (cint as double)
+  ,cast (cbigint as double)
+  ,cast (cfloat as double)
+  ,cast (cdouble as double)
+  ,cast (cboolean1 as double)
+  ,cast (ctimestamp1 as double)
+  ,cast (cstring1 as double)
+  ,cast (substr(cstring1, 1, 1) as double)
+  ,cast (cint as float)
+  ,cast (cdouble as float)
+-- to timestamp
+  ,cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+-- to string
+  ,cast (ctinyint as string)
+  ,cast (csmallint as string)
+  ,cast (cint as string)
+  ,cast (cbigint as string)
+  ,cast (cfloat as string)
+  ,cast (cdouble as string)
+  ,cast (cboolean1 as string)
+  ,cast (cbigint * 0 as string)
+  ,cast (ctimestamp1 as string)
+  ,cast (cstring1 as string)
+-- nested and expression arguments
+  ,cast (cast (cfloat as int) as float)
+  ,cast (cint * 2 as double)
+  ,cast (sin(cfloat) as string)
+  ,cast (cint as float) + cast(cboolean1 as double)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0;
+
+
+select 
+-- to boolean
+   cast (ctinyint as boolean)
+  ,cast (csmallint as boolean)
+  ,cast (cint as boolean)
+  ,cast (cbigint as boolean)
+  ,cast (cfloat as boolean)
+  ,cast (cdouble as boolean)
+  ,cast (cboolean1 as boolean)
+  ,cast (cbigint * 0 as boolean)
+  ,cast (ctimestamp1 as boolean)
+  ,cast (cstring1 as boolean)
+-- to int family
+  ,cast (ctinyint as int)
+  ,cast (csmallint as int)
+  ,cast (cint as int)
+  ,cast (cbigint as int)
+  ,cast (cfloat as int)
+  ,cast (cdouble as int)
+  ,cast (cboolean1 as int)
+  ,cast (ctimestamp1 as int)
+  ,cast (cstring1 as int)
+  ,cast (substr(cstring1, 1, 1) as int)
+  ,cast (cfloat as tinyint)
+  ,cast (cfloat as smallint)
+  ,cast (cfloat as bigint)
+-- to float family
+  ,cast (ctinyint as double)
+  ,cast (csmallint as double)
+  ,cast (cint as double)
+  ,cast (cbigint as double)
+  ,cast (cfloat as double)
+  ,cast (cdouble as double)
+  ,cast (cboolean1 as double)
+  ,cast (ctimestamp1 as double)
+  ,cast (cstring1 as double)
+  ,cast (substr(cstring1, 1, 1) as double)
+  ,cast (cint as float)
+  ,cast (cdouble as float)
+-- to timestamp
+  ,cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+-- to string
+  ,cast (ctinyint as string)
+  ,cast (csmallint as string)
+  ,cast (cint as string)
+  ,cast (cbigint as string)
+  ,cast (cfloat as string)
+  ,cast (cdouble as string)
+  ,cast (cboolean1 as string)
+  ,cast (cbigint * 0 as string)
+  ,cast (ctimestamp1 as string)
+  ,cast (cstring1 as string)
+-- nested and expression arguments
+  ,cast (cast (cfloat as int) as float)
+  ,cast (cint * 2 as double)
+  ,cast (sin(cfloat) as string)
+  ,cast (cint as float) + cast(cboolean1 as double)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0;
+
+ 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_context.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_context.q
new file mode 100644
index 0000000000000..381e4255ca51c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_context.q
@@ -0,0 +1,47 @@
+create table store(s_store_sk int, s_city string)
+stored as orc;
+insert overwrite table store
+select cint, cstring1
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+create table store_sales(ss_store_sk int, ss_hdemo_sk int, ss_net_profit double)
+stored as orc;
+insert overwrite table store_sales
+select cint, cint, cdouble
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+create table household_demographics(hd_demo_sk int)
+stored as orc;
+insert overwrite table household_demographics
+select cint
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+set hive.auto.convert.join=true;
+set hive.vectorized.execution.enabled=true;
+
+
+explain 
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+limit 100
+;
+
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+limit 100
+;
+
+set hive.auto.convert.join=false;
+set hive.vectorized.execution.enabled=false;
+
+drop table store;
+drop table store_sales;
+drop table household_demographics;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_date_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
new file mode 100644
index 0000000000000..b7aa3c28ac059
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
@@ -0,0 +1,122 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- Test timestamp functions in vectorized mode to verify they run correctly end-to-end.
+
+CREATE TABLE date_udf_flight (
+  origin_city_name STRING,
+  dest_city_name STRING,
+  fl_date DATE,
+  arr_delay FLOAT,
+  fl_num INT
+);
+LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight;
+
+CREATE TABLE date_udf_flight_orc (
+  fl_date DATE,
+  fl_time TIMESTAMP
+) STORED AS ORC;
+
+INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date, 'America/Los_Angeles') FROM date_udf_flight;
+
+SELECT * FROM date_udf_flight_orc;
+
+EXPLAIN SELECT
+  to_unix_timestamp(fl_time),
+  year(fl_time),
+  month(fl_time),
+  day(fl_time),
+  dayofmonth(fl_time),
+  weekofyear(fl_time),
+  date(fl_time),
+  to_date(fl_time),
+  date_add(fl_time, 2),
+  date_sub(fl_time, 2),
+  datediff(fl_time, "2000-01-01")
+FROM date_udf_flight_orc;
+
+SELECT
+  to_unix_timestamp(fl_time),
+  year(fl_time),
+  month(fl_time),
+  day(fl_time),
+  dayofmonth(fl_time),
+  weekofyear(fl_time),
+  date(fl_time),
+  to_date(fl_time),
+  date_add(fl_time, 2),
+  date_sub(fl_time, 2),
+  datediff(fl_time, "2000-01-01")
+FROM date_udf_flight_orc;
+
+EXPLAIN SELECT
+  to_unix_timestamp(fl_date),
+  year(fl_date),
+  month(fl_date),
+  day(fl_date),
+  dayofmonth(fl_date),
+  weekofyear(fl_date),
+  date(fl_date),
+  to_date(fl_date),
+  date_add(fl_date, 2),
+  date_sub(fl_date, 2),
+  datediff(fl_date, "2000-01-01")
+FROM date_udf_flight_orc;
+
+SELECT
+  to_unix_timestamp(fl_date),
+  year(fl_date),
+  month(fl_date),
+  day(fl_date),
+  dayofmonth(fl_date),
+  weekofyear(fl_date),
+  date(fl_date),
+  to_date(fl_date),
+  date_add(fl_date, 2),
+  date_sub(fl_date, 2),
+  datediff(fl_date, "2000-01-01")
+FROM date_udf_flight_orc;
+
+EXPLAIN SELECT
+  year(fl_time) = year(fl_date),
+  month(fl_time) = month(fl_date),
+  day(fl_time) = day(fl_date),
+  dayofmonth(fl_time) = dayofmonth(fl_date),
+  weekofyear(fl_time) = weekofyear(fl_date),
+  date(fl_time) = date(fl_date),
+  to_date(fl_time) = to_date(fl_date),
+  date_add(fl_time, 2) = date_add(fl_date, 2),
+  date_sub(fl_time, 2) = date_sub(fl_date, 2),
+  datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01")
+FROM date_udf_flight_orc;
+
+-- Should all be true or NULL
+SELECT
+  year(fl_time) = year(fl_date),
+  month(fl_time) = month(fl_date),
+  day(fl_time) = day(fl_date),
+  dayofmonth(fl_time) = dayofmonth(fl_date),
+  weekofyear(fl_time) = weekofyear(fl_date),
+  date(fl_time) = date(fl_date),
+  to_date(fl_time) = to_date(fl_date),
+  date_add(fl_time, 2) = date_add(fl_date, 2),
+  date_sub(fl_time, 2) = date_sub(fl_date, 2),
+  datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01")
+FROM date_udf_flight_orc;
+
+EXPLAIN SELECT 
+  fl_date, 
+  to_date(date_add(fl_date, 2)), 
+  to_date(date_sub(fl_date, 2)),
+  datediff(fl_date, date_add(fl_date, 2)), 
+  datediff(fl_date, date_sub(fl_date, 2)),
+  datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) 
+FROM date_udf_flight_orc LIMIT 10;
+
+SELECT 
+  fl_date, 
+  to_date(date_add(fl_date, 2)), 
+  to_date(date_sub(fl_date, 2)),
+  datediff(fl_date, date_add(fl_date, 2)), 
+  datediff(fl_date, date_sub(fl_date, 2)),
+  datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) 
+FROM date_udf_flight_orc LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_distinct_gby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_distinct_gby.q
new file mode 100644
index 0000000000000..6e622007e5dcb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_distinct_gby.q
@@ -0,0 +1,12 @@
+SET hive.vectorized.execution.enabled=true;
+
+SET hive.map.groupby.sorted=true;
+
+create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc;
+insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src limit 1) y lateral view  explode(a) t1 as c;
+
+explain select sum(distinct a), count(distinct a) from dtest;
+select sum(distinct a), count(distinct a) from dtest;
+
+explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc;
+select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_mapjoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_mapjoin.q
new file mode 100644
index 0000000000000..f390c2caafeb2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_mapjoin.q
@@ -0,0 +1,12 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.nonconditionaltask=true;
+SET hive.auto.convert.join.nonconditionaltask.size=1000000000;
+
+EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint;
+
+SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint;  
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_math_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_math_funcs.q
new file mode 100644
index 0000000000000..d6b082467938f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_math_funcs.q
@@ -0,0 +1,107 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- Test math functions in vectorized mode to verify they run correctly end-to-end.
+
+explain 
+select
+   cdouble
+  ,Round(cdouble, 2)
+  ,Floor(cdouble)
+  ,Ceil(cdouble)
+  ,Rand()
+  ,Rand(98007)
+  ,Exp(ln(cdouble))
+  ,Ln(cdouble)  
+  ,Ln(cfloat)
+  ,Log10(cdouble)
+  -- Use log2 as a representative function to test all input types.
+  ,Log2(cdouble)
+  -- Use 15601.0 to test zero handling, as there are no zeroes in the table
+  ,Log2(cdouble - 15601.0)
+  ,Log2(cfloat)
+  ,Log2(cbigint)
+  ,Log2(cint)
+  ,Log2(csmallint)
+  ,Log2(ctinyint)
+  ,Log(2.0, cdouble)
+  ,Pow(log2(cdouble), 2.0)  
+  ,Power(log2(cdouble), 2.0)
+  ,Sqrt(cdouble)
+  ,Sqrt(cbigint)
+  ,Bin(cbigint)
+  ,Hex(cdouble)
+  ,Conv(cbigint, 10, 16)
+  ,Abs(cdouble)
+  ,Abs(ctinyint)
+  ,Pmod(cint, 3)
+  ,Sin(cdouble)
+  ,Asin(cdouble)
+  ,Cos(cdouble)
+  ,ACos(cdouble)
+  ,Atan(cdouble)
+  ,Degrees(cdouble)
+  ,Radians(cdouble)
+  ,Positive(cdouble)
+  ,Positive(cbigint)
+  ,Negative(cdouble)
+  ,Sign(cdouble)
+  ,Sign(cbigint)
+  -- Test nesting
+  ,cos(-sin(log(cdouble)) + 3.14159)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 500 = 0
+-- test use of a math function in the WHERE clause
+and sin(cfloat) >= -1.0;
+
+select
+   cdouble
+  ,Round(cdouble, 2)
+  ,Floor(cdouble)
+  ,Ceil(cdouble)
+  -- Omit rand() from runtime test because it's nondeterministic.
+  -- ,Rand()
+  ,Rand(98007)
+  ,Exp(ln(cdouble))
+  ,Ln(cdouble)  
+  ,Ln(cfloat)
+  ,Log10(cdouble)
+  -- Use log2 as a representative function to test all input types.
+  ,Log2(cdouble)
+  -- Use 15601.0 to test zero handling, as there are no zeroes in the table
+  ,Log2(cdouble - 15601.0)
+  ,Log2(cfloat)
+  ,Log2(cbigint)
+  ,Log2(cint)
+  ,Log2(csmallint)
+  ,Log2(ctinyint)
+  ,Log(2.0, cdouble)
+  ,Pow(log2(cdouble), 2.0)  
+  ,Power(log2(cdouble), 2.0)
+  ,Sqrt(cdouble)
+  ,Sqrt(cbigint)
+  ,Bin(cbigint)
+  ,Hex(cdouble)
+  ,Conv(cbigint, 10, 16)
+  ,Abs(cdouble)
+  ,Abs(ctinyint)
+  ,Pmod(cint, 3)
+  ,Sin(cdouble)
+  ,Asin(cdouble)
+  ,Cos(cdouble)
+  ,ACos(cdouble)
+  ,Atan(cdouble)
+  ,Degrees(cdouble)
+  ,Radians(cdouble)
+  ,Positive(cdouble)
+  ,Positive(cbigint)
+  ,Negative(cdouble)
+  ,Sign(cdouble)
+  ,Sign(cbigint)
+  -- Test nesting
+  ,cos(-sin(log(cdouble)) + 3.14159)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 500 = 0
+-- test use of a math function in the WHERE clause
+and sin(cfloat) >= -1.0;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q
new file mode 100644
index 0000000000000..ce4227cf0a700
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q
@@ -0,0 +1,8 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.nonconditionaltask=true;
+SET hive.auto.convert.join.nonconditionaltask.size=1000000000;
+
+explain select sum(t1.td) from (select  v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint;
+
+select sum(t1.td) from (select  v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_rcfile_columnar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_rcfile_columnar.q
new file mode 100644
index 0000000000000..488d2f38859b3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_rcfile_columnar.q
@@ -0,0 +1,18 @@
+--This query must pass even when vectorized reader is not available for
+--RC files. The query must fall back to the non-vector mode and run successfully.
+
+CREATE table columnTable (key STRING, value STRING)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+STORED AS
+  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat'
+  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat';
+
+FROM src
+INSERT OVERWRITE TABLE columnTable SELECT src.key, src.value LIMIT 10;
+describe columnTable;
+
+SET hive.vectorized.execution.enabled=true;
+
+SELECT key, value FROM columnTable ORDER BY key;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q
new file mode 100644
index 0000000000000..6b60aa08c581b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q
@@ -0,0 +1,10 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=false;
+
+EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint;
+
+SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint;  
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_string_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_string_funcs.q
new file mode 100644
index 0000000000000..96fe53da1ea35
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_string_funcs.q
@@ -0,0 +1,46 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- Test string functions in vectorized mode to verify end-to-end functionality.
+
+explain 
+select 
+   substr(cstring1, 1, 2)
+  ,substr(cstring1, 2)
+  ,lower(cstring1)
+  ,upper(cstring1)
+  ,ucase(cstring1)
+  ,length(cstring1)
+  ,trim(cstring1)
+  ,ltrim(cstring1)
+  ,rtrim(cstring1)
+  ,concat(cstring1, cstring2)
+  ,concat('>', cstring1)
+  ,concat(cstring1, '<')
+  ,concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2))
+from alltypesorc
+-- Limit the number of rows of output to a reasonable amount.
+where cbigint % 237 = 0
+-- Test function use in the WHERE clause.
+and length(substr(cstring1, 1, 2)) <= 2
+and cstring1 like '%';
+ 
+select 
+   substr(cstring1, 1, 2)
+  ,substr(cstring1, 2)
+  ,lower(cstring1)
+  ,upper(cstring1)
+  ,ucase(cstring1)
+  ,length(cstring1)
+  ,trim(cstring1)
+  ,ltrim(cstring1)
+  ,rtrim(cstring1)
+  ,concat(cstring1, cstring2)
+  ,concat('>', cstring1)
+  ,concat(cstring1, '<')
+  ,concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2))
+from alltypesorc
+-- Limit the number of rows of output to a reasonable amount.
+where cbigint % 237 = 0
+-- Test function use in the WHERE clause.
+and length(substr(cstring1, 1, 2)) <= 2
+and cstring1 like '%';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
new file mode 100644
index 0000000000000..95eedd3b581d6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
@@ -0,0 +1,124 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- Test timestamp functions in vectorized mode to verify they run correctly end-to-end.
+
+CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC;
+
+INSERT OVERWRITE TABLE alltypesorc_string
+SELECT
+  to_utc_timestamp(ctimestamp1, 'America/Los_Angeles'),
+  CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING)
+FROM alltypesorc
+LIMIT 40;
+
+CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC;
+
+INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1;
+INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1;
+INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1;
+
+EXPLAIN SELECT
+  to_unix_timestamp(ctimestamp1) AS c1,
+  year(ctimestamp1),
+  month(ctimestamp1),
+  day(ctimestamp1),
+  dayofmonth(ctimestamp1),
+  weekofyear(ctimestamp1),
+  hour(ctimestamp1),
+  minute(ctimestamp1),
+  second(ctimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+SELECT
+  to_unix_timestamp(ctimestamp1) AS c1,
+  year(ctimestamp1),
+  month(ctimestamp1),
+  day(ctimestamp1),
+  dayofmonth(ctimestamp1),
+  weekofyear(ctimestamp1),
+  hour(ctimestamp1),
+  minute(ctimestamp1),
+  second(ctimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+EXPLAIN SELECT
+  to_unix_timestamp(stimestamp1) AS c1,
+  year(stimestamp1),
+  month(stimestamp1),
+  day(stimestamp1),
+  dayofmonth(stimestamp1),
+  weekofyear(stimestamp1),
+  hour(stimestamp1),
+  minute(stimestamp1),
+  second(stimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+SELECT
+  to_unix_timestamp(stimestamp1) AS c1,
+  year(stimestamp1),
+  month(stimestamp1),
+  day(stimestamp1),
+  dayofmonth(stimestamp1),
+  weekofyear(stimestamp1),
+  hour(stimestamp1),
+  minute(stimestamp1),
+  second(stimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+EXPLAIN SELECT
+  to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1,
+  year(ctimestamp1) = year(stimestamp1),
+  month(ctimestamp1) = month(stimestamp1),
+  day(ctimestamp1) = day(stimestamp1),
+  dayofmonth(ctimestamp1) = dayofmonth(stimestamp1),
+  weekofyear(ctimestamp1) = weekofyear(stimestamp1),
+  hour(ctimestamp1) = hour(stimestamp1),
+  minute(ctimestamp1) = minute(stimestamp1),
+  second(ctimestamp1) = second(stimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+-- Should all be true or NULL
+SELECT
+  to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1,
+  year(ctimestamp1) = year(stimestamp1),
+  month(ctimestamp1) = month(stimestamp1),
+  day(ctimestamp1) = day(stimestamp1),
+  dayofmonth(ctimestamp1) = dayofmonth(stimestamp1),
+  weekofyear(ctimestamp1) = weekofyear(stimestamp1),
+  hour(ctimestamp1) = hour(stimestamp1),
+  minute(ctimestamp1) = minute(stimestamp1),
+  second(ctimestamp1) = second(stimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+-- Wrong format. Should all be NULL.
+EXPLAIN SELECT
+  to_unix_timestamp(stimestamp1) AS c1,
+  year(stimestamp1),
+  month(stimestamp1),
+  day(stimestamp1),
+  dayofmonth(stimestamp1),
+  weekofyear(stimestamp1),
+  hour(stimestamp1),
+  minute(stimestamp1),
+  second(stimestamp1)
+FROM alltypesorc_wrong
+ORDER BY c1;
+
+SELECT
+  to_unix_timestamp(stimestamp1) AS c1,
+  year(stimestamp1),
+  month(stimestamp1),
+  day(stimestamp1),
+  dayofmonth(stimestamp1),
+  weekofyear(stimestamp1),
+  hour(stimestamp1),
+  minute(stimestamp1),
+  second(stimestamp1)
+FROM alltypesorc_wrong
+ORDER BY c1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view.q
index 4e3d0572921d6..bc193554f9182 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view.q
@@ -4,13 +4,13 @@ USE db1;
 CREATE TABLE table1 (key STRING, value STRING)
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt'
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt'
 OVERWRITE INTO TABLE table1;
 
 CREATE TABLE table2 (key STRING, value STRING)
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt'
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt'
 OVERWRITE INTO TABLE table2;
 
 -- relative reference, no alias
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view_cast.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view_cast.q
index b0b078ec628b3..95517c3bcd367 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view_cast.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view_cast.q
@@ -1,11 +1,11 @@
 DROP TABLE IF EXISTS atab; 
 CREATE TABLE IF NOT EXISTS atab (ks_uid BIGINT, sr_uid STRING, sr_id STRING, tstamp STRING, m_id STRING, act STRING, at_sr_uid STRING, tstamp_type STRING, original_m_id STRING, original_tstamp STRING, registered_flag TINYINT, at_ks_uid BIGINT) PARTITIONED BY (dt STRING,nt STRING); 
-LOAD DATA LOCAL INPATH '../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130312', nt='tw');
-LOAD DATA LOCAL INPATH '../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130311', nt='tw');
+LOAD DATA LOCAL INPATH '../../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130312', nt='tw');
+LOAD DATA LOCAL INPATH '../../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130311', nt='tw');
 
 DROP TABLE IF EXISTS  mstab;
 CREATE TABLE  mstab(ks_uid INT, csc INT) PARTITIONED BY (dt STRING);
-LOAD DATA LOCAL INPATH '../data/files/v2.txt' INTO TABLE mstab PARTITION (dt='20130311');
+LOAD DATA LOCAL INPATH '../../data/files/v2.txt' INTO TABLE mstab PARTITION (dt='20130311');
 
 DROP VIEW IF EXISTS aa_view_tw;
 CREATE VIEW aa_view_tw AS SELECT ks_uid, sr_id, act, at_ks_uid, at_sr_uid, from_unixtime(CAST(CAST( tstamp as BIGINT)/1000 AS BIGINT),'yyyyMMdd') AS act_date, from_unixtime(CAST(CAST( original_tstamp AS BIGINT)/1000 AS BIGINT),'yyyyMMdd') AS content_creation_date FROM atab WHERE dt='20130312' AND nt='tw' AND ks_uid != at_ks_uid;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing.q
index a7297db6104c0..2f22145518c56 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- 1. testWindowing
 select p_mfgr, p_name, p_size,
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_adjust_rowcontainer_sz.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_adjust_rowcontainer_sz.q
index 9c7625dcd786a..67cab9f7b273a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_adjust_rowcontainer_sz.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_adjust_rowcontainer_sz.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 set hive.join.cache.size=1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_columnPruning.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_columnPruning.q
index 7c4ab386145d5..24f9ff73a30a1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_columnPruning.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_columnPruning.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- 1. testQueryLevelPartitionColsNotInSelect
 select p_size,
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_expressions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_expressions.q
index 2c3339095f22b..7e27c6b1c098d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_expressions.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_expressions.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 drop table over10k;
 
@@ -27,12 +27,12 @@ create table over10k(
            bo boolean,
            s string,
 	   ts timestamp, 
-           dec decimal,  
+           dec decimal(4,2),  
            bin binary)
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select p_mfgr, p_retailprice, p_size,
 round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_multipartitioning.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_multipartitioning.q
index bb371e900975b..1c6e1aac37a1a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_multipartitioning.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_multipartitioning.q
@@ -10,12 +10,12 @@ create table over10k(
            bo boolean,
            s string,
 	   ts timestamp, 
-           dec decimal,  
+           dec decimal(4,2),  
            bin binary)
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select s, rank() over (partition by s order by si), sum(b) over (partition by s order by si) from over10k limit 100;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_navfn.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_navfn.q
index 8a9d0012593e6..05da2ba7efeaf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_navfn.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_navfn.q
@@ -9,13 +9,13 @@ create table over10k(
            d double,
            bo boolean,
            s string,
-	   ts timestamp, 
-           dec decimal,  
+           ts timestamp, 
+           dec decimal(4,2),  
            bin binary)
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select s, row_number() over (partition by d order by dec) from over10k limit 100;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_ntile.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_ntile.q
index 505c259f4b7b3..73e8192ee6e4c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_ntile.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_ntile.q
@@ -10,12 +10,12 @@ create table over10k(
            bo boolean,
            s string,
 	   ts timestamp, 
-           dec decimal,  
+           dec decimal(4,2),  
            bin binary)
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select i, ntile(10) over (partition by s order by i) from over10k limit 100;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_rank.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_rank.q
index bf76867813e4e..4b951179e09e6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_rank.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_rank.q
@@ -10,12 +10,12 @@ create table over10k(
            bo boolean,
            s string,
 	   ts timestamp, 
-           dec decimal,  
+           dec decimal(4,2),  
            bin binary)
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select s, rank() over (partition by f order by t) from over10k limit 100;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf.q
index f22b992cd4386..0173ab7a3ac56 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf.q
@@ -15,7 +15,7 @@ create table over10k(
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select s, min(i) over (partition by s) from over10k limit 100;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf2.q
new file mode 100644
index 0000000000000..b813657baee47
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf2.q
@@ -0,0 +1,4 @@
+-- user-added aggregates should be usable as windowing functions
+create temporary function mysum as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum';
+
+select sum(key) over (), mysum(key) over () from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_windowspec.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_windowspec.q
index 7cc1367306956..6d8ce670454d0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_windowspec.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_windowspec.q
@@ -15,7 +15,7 @@ create table over10k(
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100;
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 11d8b1f0a3d96..f89c49d292c6c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.sql.catalyst.expressions.{ExprId, AttributeReference}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.util._
 
@@ -29,7 +31,26 @@ import org.apache.spark.sql.catalyst.util._
  * It is hard to have maven allow one subproject depend on another subprojects test code.
  * So, we duplicate this code here.
  */
-class QueryTest extends FunSuite {
+class QueryTest extends PlanTest {
+  /**
+   * Runs the plan and makes sure the answer contains all of the keywords, or the
+   * none of keywords are listed in the answer
+   * @param rdd the [[SchemaRDD]] to be executed
+   * @param exists true for make sure the keywords are listed in the output, otherwise
+   *               to make sure none of the keyword are not listed in the output
+   * @param keywords keyword in string array
+   */
+  def checkExistence(rdd: SchemaRDD, exists: Boolean, keywords: String*) {
+    val outputs = rdd.collect().map(_.mkString).mkString
+    for (key <- keywords) {
+      if (exists) {
+        assert(outputs.contains(key), s"Failed for $rdd ($key doens't exist in result)")
+      } else {
+        assert(!outputs.contains(key), s"Failed for $rdd ($key existed in the result)")
+      }
+    }
+  }
+
   /**
    * Runs the plan and makes sure the answer matches the expected result.
    * @param rdd the [[SchemaRDD]] to be executed
@@ -51,9 +72,9 @@ class QueryTest extends FunSuite {
         fail(
           s"""
             |Exception thrown while executing query:
-            |${rdd.logicalPlan}
+            |${rdd.queryExecution}
             |== Exception ==
-            |$e
+            |${stackTraceToString(e)}
           """.stripMargin)
     }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
new file mode 100644
index 0000000000000..081d94b6fc020
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans
+
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, ExprId}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util._
+import org.scalatest.FunSuite
+
+/**
+ * *** DUPLICATED FROM sql/catalyst/plans. ***
+ *
+ * It is hard to have maven allow one subproject depend on another subprojects test code.
+ * So, we duplicate this code here.
+ */
+class PlanTest extends FunSuite {
+
+  /**
+   * Since attribute references are given globally unique ids during analysis,
+   * we must normalize them to check if two different queries are identical.
+   */
+  protected def normalizeExprIds(plan: LogicalPlan) = {
+    val list = plan.flatMap(_.expressions.flatMap(_.references).map(_.exprId.id))
+    val minId = if (list.isEmpty) 0 else list.min
+    plan transformAllExpressions {
+      case a: AttributeReference =>
+        AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(a.exprId.id - minId))
+    }
+  }
+
+  /** Fails the test if the two plans do not match */
+  protected def comparePlans(plan1: LogicalPlan, plan2: LogicalPlan) {
+    val normalized1 = normalizeExprIds(plan1)
+    val normalized2 = normalizeExprIds(plan2)
+    if (normalized1 != normalized2)
+      fail(
+        s"""
+          |== FAIL: Plans do not match ===
+          |${sideBySide(normalized1.treeString, normalized2.treeString).mkString("\n")}
+        """.stripMargin)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index 3132d0112c708..2060e1f1a7a4b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -17,40 +17,88 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.execution.SparkLogicalPlan
-import org.apache.spark.sql.columnar.{InMemoryRelation, InMemoryColumnarTableScan}
-import org.apache.spark.sql.hive.execution.HiveComparisonTest
+import org.apache.spark.sql.columnar.{InMemoryColumnarTableScan, InMemoryRelation}
 import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.{QueryTest, SchemaRDD}
+import org.apache.spark.storage.RDDBlockId
 
-class CachedTableSuite extends HiveComparisonTest {
-  TestHive.loadTestTable("src")
+class CachedTableSuite extends QueryTest {
+  /**
+   * Throws a test failed exception when the number of cached tables differs from the expected
+   * number.
+   */
+  def assertCached(query: SchemaRDD, numCachedTables: Int = 1): Unit = {
+    val planWithCaching = query.queryExecution.withCachedData
+    val cachedData = planWithCaching collect {
+      case cached: InMemoryRelation => cached
+    }
 
-  test("cache table") {
-    TestHive.cacheTable("src")
+    assert(
+      cachedData.size == numCachedTables,
+      s"Expected query to contain $numCachedTables, but it actually had ${cachedData.size}\n" +
+        planWithCaching)
   }
 
-  createQueryTest("read from cached table",
-    "SELECT * FROM src LIMIT 1", reset = false)
+  def rddIdOf(tableName: String): Int = {
+    val executedPlan = table(tableName).queryExecution.executedPlan
+    executedPlan.collect {
+      case InMemoryColumnarTableScan(_, _, relation) =>
+        relation.cachedColumnBuffers.id
+      case _ =>
+        fail(s"Table $tableName is not cached\n" + executedPlan)
+    }.head
+  }
 
-  test("check that table is cached and uncache") {
-    TestHive.table("src").queryExecution.analyzed match {
-      case _ : InMemoryRelation => // Found evidence of caching
-      case noCache => fail(s"No cache node found in plan $noCache")
-    }
-    TestHive.uncacheTable("src")
+  def isMaterialized(rddId: Int): Boolean = {
+    sparkContext.env.blockManager.get(RDDBlockId(rddId, 0)).nonEmpty
+  }
+
+  test("cache table") {
+    val preCacheResults = sql("SELECT * FROM src").collect().toSeq
+
+    cacheTable("src")
+    assertCached(sql("SELECT * FROM src"))
+
+    checkAnswer(
+      sql("SELECT * FROM src"),
+      preCacheResults)
+
+    uncacheTable("src")
+    assertCached(sql("SELECT * FROM src"), 0)
   }
 
-  createQueryTest("read from uncached table",
-    "SELECT * FROM src LIMIT 1", reset = false)
+  test("cache invalidation") {
+    sql("CREATE TABLE cachedTable(key INT, value STRING)")
+
+    sql("INSERT INTO TABLE cachedTable SELECT * FROM src")
+    checkAnswer(sql("SELECT * FROM cachedTable"), table("src").collect().toSeq)
+
+    cacheTable("cachedTable")
+    checkAnswer(sql("SELECT * FROM cachedTable"), table("src").collect().toSeq)
 
-  test("make sure table is uncached") {
-    TestHive.table("src").queryExecution.analyzed match {
-      case cachePlan: InMemoryRelation =>
-        fail(s"Table still cached after uncache: $cachePlan")
-      case noCache => // Table uncached successfully
+    sql("INSERT INTO TABLE cachedTable SELECT * FROM src")
+    checkAnswer(
+      sql("SELECT * FROM cachedTable"),
+      table("src").collect().toSeq ++ table("src").collect().toSeq)
+
+    sql("DROP TABLE cachedTable")
+  }
+
+  test("Drop cached table") {
+    sql("CREATE TABLE test(a INT)")
+    cacheTable("test")
+    sql("SELECT * FROM test").collect()
+    sql("DROP TABLE test")
+    intercept[org.apache.hadoop.hive.ql.metadata.InvalidTableException] {
+      sql("SELECT * FROM test").collect()
     }
   }
 
+  test("DROP nonexistant table") {
+    sql("DROP TABLE IF EXISTS nonexistantTable")
+  }
+
   test("correct error on uncache of non-cached table") {
     intercept[IllegalArgumentException] {
       TestHive.uncacheTable("src")
@@ -58,18 +106,56 @@ class CachedTableSuite extends HiveComparisonTest {
   }
 
   test("'CACHE TABLE' and 'UNCACHE TABLE' HiveQL statement") {
-    TestHive.hql("CACHE TABLE src")
-    TestHive.table("src").queryExecution.executedPlan match {
-      case _: InMemoryColumnarTableScan => // Found evidence of caching
-      case _ => fail(s"Table 'src' should be cached")
-    }
+    TestHive.sql("CACHE TABLE src")
+    assertCached(table("src"))
     assert(TestHive.isCached("src"), "Table 'src' should be cached")
 
-    TestHive.hql("UNCACHE TABLE src")
-    TestHive.table("src").queryExecution.executedPlan match {
-      case _: InMemoryColumnarTableScan => fail(s"Table 'src' should not be cached")
-      case _ => // Found evidence of uncaching
-    }
+    TestHive.sql("UNCACHE TABLE src")
+    assertCached(table("src"), 0)
     assert(!TestHive.isCached("src"), "Table 'src' should not be cached")
   }
+
+  test("CACHE TABLE tableName AS SELECT * FROM anotherTable") {
+    sql("CACHE TABLE testCacheTable AS SELECT * FROM src")
+    assertCached(table("testCacheTable"))
+
+    val rddId = rddIdOf("testCacheTable")
+    assert(
+      isMaterialized(rddId),
+      "Eagerly cached in-memory table should have already been materialized")
+
+    uncacheTable("testCacheTable")
+    assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+  }
+
+  test("CACHE TABLE tableName AS SELECT ...") {
+    sql("CACHE TABLE testCacheTable AS SELECT key FROM src LIMIT 10")
+    assertCached(table("testCacheTable"))
+
+    val rddId = rddIdOf("testCacheTable")
+    assert(
+      isMaterialized(rddId),
+      "Eagerly cached in-memory table should have already been materialized")
+
+    uncacheTable("testCacheTable")
+    assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+  }
+
+  test("CACHE LAZY TABLE tableName") {
+    sql("CACHE LAZY TABLE src")
+    assertCached(table("src"))
+
+    val rddId = rddIdOf("src")
+    assert(
+      !isMaterialized(rddId),
+      "Lazily cached in-memory table shouldn't be materialized eagerly")
+
+    sql("SELECT COUNT(*) FROM src").collect()
+    assert(
+      isMaterialized(rddId),
+      "Lazily cached in-memory table should have been materialized")
+
+    uncacheTable("src")
+    assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index 4a64b5f5eb1b4..86535f8dd4f58 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.hive
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.sql.catalyst.types.{DataType, StructType}
+import org.apache.spark.sql.catalyst.types.StructType
+import org.apache.spark.sql.test.ExamplePointUDT
 
 class HiveMetastoreCatalogSuite extends FunSuite {
 
@@ -29,4 +30,10 @@ class HiveMetastoreCatalogSuite extends FunSuite {
     val datatype = HiveMetastoreTypes.toDataType(metastr)
     assert(datatype.isInstanceOf[StructType])
   }
+
+  test("udt to metastore type conversion") {
+    val udt = new ExamplePointUDT
+    assert(HiveMetastoreTypes.toMetastoreType(udt) ===
+      HiveMetastoreTypes.toMetastoreType(udt.sqlType))
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
index 833f3502154f3..fb481edc853b7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
@@ -17,7 +17,10 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.QueryTest
+import java.io.File
+
+import com.google.common.io.Files
+import org.apache.spark.sql.{QueryTest, _}
 import org.apache.spark.sql.hive.test.TestHive
 
 /* Implicits */
@@ -28,7 +31,7 @@ case class TestData(key: Int, value: String)
 class InsertIntoHiveTableSuite extends QueryTest {
   val testData = TestHive.sparkContext.parallelize(
     (1 to 100).map(i => TestData(i, i.toString)))
-  testData.registerAsTable("testData")
+  testData.registerTempTable("testData")
 
   test("insertInto() HiveTable") {
     createTable[TestData]("createAndInsertTest")
@@ -73,4 +76,99 @@ class InsertIntoHiveTableSuite extends QueryTest {
     createTable[TestData]("createAndInsertTest")
     createTable[TestData]("createAndInsertTest")
   }
+
+  test("SPARK-4052: scala.collection.Map as value type of MapType") {
+    val schema = StructType(StructField("m", MapType(StringType, StringType), true) :: Nil)
+    val rowRDD = TestHive.sparkContext.parallelize(
+      (1 to 100).map(i => Row(scala.collection.mutable.HashMap(s"key$i" -> s"value$i"))))
+    val schemaRDD = applySchema(rowRDD, schema)
+    schemaRDD.registerTempTable("tableWithMapValue")
+    sql("CREATE TABLE hiveTableWithMapValue(m MAP <STRING, STRING>)")
+    sql("INSERT OVERWRITE TABLE hiveTableWithMapValue SELECT m FROM tableWithMapValue")
+
+    checkAnswer(
+      sql("SELECT * FROM hiveTableWithMapValue"),
+      rowRDD.collect().toSeq
+    )
+
+    sql("DROP TABLE hiveTableWithMapValue")
+  }
+
+  test("SPARK-4203:random partition directory order") {
+    createTable[TestData]("tmp_table")
+    val tmpDir = Files.createTempDir()
+    sql(s"CREATE TABLE table_with_partition(c1 string) PARTITIONED by (p1 string,p2 string,p3 string,p4 string,p5 string) location '${tmpDir.toURI.toString}'  ")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (p1='a',p2='b',p3='c',p4='c',p5='1') SELECT 'blarr' FROM tmp_table")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (p1='a',p2='b',p3='c',p4='c',p5='2') SELECT 'blarr' FROM tmp_table")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (p1='a',p2='b',p3='c',p4='c',p5='3') SELECT 'blarr' FROM tmp_table")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (p1='a',p2='b',p3='c',p4='c',p5='4') SELECT 'blarr' FROM tmp_table")
+    def listFolders(path: File, acc: List[String]): List[List[String]] = {
+      val dir = path.listFiles()
+      val folders = dir.filter(_.isDirectory).toList
+      if (folders.isEmpty) {
+        List(acc.reverse)
+      } else {
+        folders.flatMap(x => listFolders(x, x.getName :: acc))
+      }
+    }
+    val expected = List(
+      "p1=a"::"p2=b"::"p3=c"::"p4=c"::"p5=2"::Nil,
+      "p1=a"::"p2=b"::"p3=c"::"p4=c"::"p5=3"::Nil ,
+      "p1=a"::"p2=b"::"p3=c"::"p4=c"::"p5=1"::Nil ,
+      "p1=a"::"p2=b"::"p3=c"::"p4=c"::"p5=4"::Nil
+    )
+    assert(listFolders(tmpDir,List()).sortBy(_.toString()) == expected.sortBy(_.toString))
+    sql("DROP TABLE table_with_partition")
+    sql("DROP TABLE tmp_table")
+  }
+
+  test("Insert ArrayType.containsNull == false") {
+    val schema = StructType(Seq(
+      StructField("a", ArrayType(StringType, containsNull = false))))
+    val rowRDD = TestHive.sparkContext.parallelize((1 to 100).map(i => Row(Seq(s"value$i"))))
+    val schemaRDD = applySchema(rowRDD, schema)
+    schemaRDD.registerTempTable("tableWithArrayValue")
+    sql("CREATE TABLE hiveTableWithArrayValue(a Array <STRING>)")
+    sql("INSERT OVERWRITE TABLE hiveTableWithArrayValue SELECT a FROM tableWithArrayValue")
+
+    checkAnswer(
+      sql("SELECT * FROM hiveTableWithArrayValue"),
+      rowRDD.collect().toSeq)
+
+    sql("DROP TABLE hiveTableWithArrayValue")
+  }
+
+  test("Insert MapType.valueContainsNull == false") {
+    val schema = StructType(Seq(
+      StructField("m", MapType(StringType, StringType, valueContainsNull = false))))
+    val rowRDD = TestHive.sparkContext.parallelize(
+      (1 to 100).map(i => Row(Map(s"key$i" -> s"value$i"))))
+    val schemaRDD = applySchema(rowRDD, schema)
+    schemaRDD.registerTempTable("tableWithMapValue")
+    sql("CREATE TABLE hiveTableWithMapValue(m Map <STRING, STRING>)")
+    sql("INSERT OVERWRITE TABLE hiveTableWithMapValue SELECT m FROM tableWithMapValue")
+
+    checkAnswer(
+      sql("SELECT * FROM hiveTableWithMapValue"),
+      rowRDD.collect().toSeq)
+
+    sql("DROP TABLE hiveTableWithMapValue")
+  }
+
+  test("Insert StructType.fields.exists(_.nullable == false)") {
+    val schema = StructType(Seq(
+      StructField("s", StructType(Seq(StructField("f", StringType, nullable = false))))))
+    val rowRDD = TestHive.sparkContext.parallelize(
+      (1 to 100).map(i => Row(Row(s"value$i"))))
+    val schemaRDD = applySchema(rowRDD, schema)
+    schemaRDD.registerTempTable("tableWithStructValue")
+    sql("CREATE TABLE hiveTableWithStructValue(s Struct <f: STRING>)")
+    sql("INSERT OVERWRITE TABLE hiveTableWithStructValue SELECT s FROM tableWithStructValue")
+
+    checkAnswer(
+      sql("SELECT * FROM hiveTableWithStructValue"),
+      rowRDD.collect().toSeq)
+
+    sql("DROP TABLE hiveTableWithStructValue")
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
new file mode 100644
index 0000000000000..a90fc023e67d8
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.scalatest.BeforeAndAfterAll
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.{SQLConf, QueryTest}
+import org.apache.spark.sql.catalyst.plans.logical.NativeCommand
+import org.apache.spark.sql.execution.joins.{BroadcastHashJoin, ShuffledHashJoin}
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.TestHive._
+
+class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
+  TestHive.reset()
+  TestHive.cacheTables = false
+
+  test("parse analyze commands") {
+    def assertAnalyzeCommand(analyzeCommand: String, c: Class[_]) {
+      val parsed = HiveQl.parseSql(analyzeCommand)
+      val operators = parsed.collect {
+        case a: AnalyzeTable => a
+        case o => o
+      }
+
+      assert(operators.size === 1)
+      if (operators(0).getClass() != c) {
+        fail(
+          s"""$analyzeCommand expected command: $c, but got ${operators(0)}
+             |parsed command:
+             |$parsed
+           """.stripMargin)
+      }
+    }
+
+    assertAnalyzeCommand(
+      "ANALYZE TABLE Table1 COMPUTE STATISTICS",
+      classOf[NativeCommand])
+    assertAnalyzeCommand(
+      "ANALYZE TABLE Table1 PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS",
+      classOf[NativeCommand])
+    assertAnalyzeCommand(
+      "ANALYZE TABLE Table1 PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS noscan",
+      classOf[NativeCommand])
+    assertAnalyzeCommand(
+      "ANALYZE TABLE Table1 PARTITION(ds, hr) COMPUTE STATISTICS",
+      classOf[NativeCommand])
+    assertAnalyzeCommand(
+      "ANALYZE TABLE Table1 PARTITION(ds, hr) COMPUTE STATISTICS noscan",
+      classOf[NativeCommand])
+
+    assertAnalyzeCommand(
+      "ANALYZE TABLE Table1 COMPUTE STATISTICS nOscAn",
+      classOf[AnalyzeTable])
+  }
+
+  test("analyze MetastoreRelations") {
+    def queryTotalSize(tableName: String): BigInt =
+      catalog.lookupRelation(None, tableName).statistics.sizeInBytes
+
+    // Non-partitioned table
+    sql("CREATE TABLE analyzeTable (key STRING, value STRING)").collect()
+    sql("INSERT INTO TABLE analyzeTable SELECT * FROM src").collect()
+    sql("INSERT INTO TABLE analyzeTable SELECT * FROM src").collect()
+
+    // TODO: How does it works? needs to add it back for other hive version.
+    if (HiveShim.version =="0.12.0") {
+      assert(queryTotalSize("analyzeTable") === defaultSizeInBytes)
+    }
+    sql("ANALYZE TABLE analyzeTable COMPUTE STATISTICS noscan")
+
+    assert(queryTotalSize("analyzeTable") === BigInt(11624))
+
+    sql("DROP TABLE analyzeTable").collect()
+
+    // Partitioned table
+    sql(
+      """
+        |CREATE TABLE analyzeTable_part (key STRING, value STRING) PARTITIONED BY (ds STRING)
+      """.stripMargin).collect()
+    sql(
+      """
+        |INSERT INTO TABLE analyzeTable_part PARTITION (ds='2010-01-01')
+        |SELECT * FROM src
+      """.stripMargin).collect()
+    sql(
+      """
+        |INSERT INTO TABLE analyzeTable_part PARTITION (ds='2010-01-02')
+        |SELECT * FROM src
+      """.stripMargin).collect()
+    sql(
+      """
+        |INSERT INTO TABLE analyzeTable_part PARTITION (ds='2010-01-03')
+        |SELECT * FROM src
+      """.stripMargin).collect()
+
+    assert(queryTotalSize("analyzeTable_part") === defaultSizeInBytes)
+
+    sql("ANALYZE TABLE analyzeTable_part COMPUTE STATISTICS noscan")
+
+    assert(queryTotalSize("analyzeTable_part") === BigInt(17436))
+
+    sql("DROP TABLE analyzeTable_part").collect()
+
+    // Try to analyze a temp table
+    sql("""SELECT * FROM src""").registerTempTable("tempTable")
+    intercept[NotImplementedError] {
+      analyze("tempTable")
+    }
+    catalog.unregisterTable(None, "tempTable")
+  }
+
+  test("estimates the size of a test MetastoreRelation") {
+    val rdd = sql("""SELECT * FROM src""")
+    val sizes = rdd.queryExecution.analyzed.collect { case mr: MetastoreRelation =>
+      mr.statistics.sizeInBytes
+    }
+    assert(sizes.size === 1, s"Size wrong for:\n ${rdd.queryExecution}")
+    assert(sizes(0).equals(BigInt(5812)),
+      s"expected exact size 5812 for test table 'src', got: ${sizes(0)}")
+  }
+
+  test("auto converts to broadcast hash join, by size estimate of a relation") {
+    def mkTest(
+        before: () => Unit,
+        after: () => Unit,
+        query: String,
+        expectedAnswer: Seq[Any],
+        ct: ClassTag[_]) = {
+      before()
+
+      var rdd = sql(query)
+
+      // Assert src has a size smaller than the threshold.
+      val sizes = rdd.queryExecution.analyzed.collect {
+        case r if ct.runtimeClass.isAssignableFrom(r.getClass) => r.statistics.sizeInBytes
+      }
+      assert(sizes.size === 2 && sizes(0) <= autoBroadcastJoinThreshold
+        && sizes(1) <= autoBroadcastJoinThreshold,
+        s"query should contain two relations, each of which has size smaller than autoConvertSize")
+
+      // Using `sparkPlan` because for relevant patterns in HashJoin to be
+      // matched, other strategies need to be applied.
+      var bhj = rdd.queryExecution.sparkPlan.collect { case j: BroadcastHashJoin => j }
+      assert(bhj.size === 1,
+        s"actual query plans do not contain broadcast join: ${rdd.queryExecution}")
+
+      checkAnswer(rdd, expectedAnswer) // check correctness of output
+
+      TestHive.settings.synchronized {
+        val tmp = autoBroadcastJoinThreshold
+
+        sql(s"""SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD}=-1""")
+        rdd = sql(query)
+        bhj = rdd.queryExecution.sparkPlan.collect { case j: BroadcastHashJoin => j }
+        assert(bhj.isEmpty, "BroadcastHashJoin still planned even though it is switched off")
+
+        val shj = rdd.queryExecution.sparkPlan.collect { case j: ShuffledHashJoin => j }
+        assert(shj.size === 1,
+          "ShuffledHashJoin should be planned when BroadcastHashJoin is turned off")
+
+        sql(s"""SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD}=$tmp""")
+      }
+
+      after()
+    }
+
+    /** Tests for MetastoreRelation */
+    val metastoreQuery = """SELECT * FROM src a JOIN src b ON a.key = 238 AND a.key = b.key"""
+    val metastoreAnswer = Seq.fill(4)((238, "val_238", 238, "val_238"))
+    mkTest(
+      () => (),
+      () => (),
+      metastoreQuery,
+      metastoreAnswer,
+      implicitly[ClassTag[MetastoreRelation]]
+    )
+  }
+
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveQLSuite.scala
index 10c8069a624e6..ca78dfba4fa38 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveQLSuite.scala
@@ -22,75 +22,65 @@ import scala.util.Try
 import org.scalatest.FunSuite
 
 import org.apache.spark.api.java.JavaSparkContext
-import org.apache.spark.sql.api.java.JavaSchemaRDD
+import org.apache.spark.sql.api.java.{JavaSQLContext, JavaSchemaRDD}
 import org.apache.spark.sql.execution.ExplainCommand
 import org.apache.spark.sql.hive.test.TestHive
-import org.apache.spark.sql.test.TestSQLContext
 
 // Implicits
 import scala.collection.JavaConversions._
 
 class JavaHiveQLSuite extends FunSuite {
-  lazy val javaCtx = new JavaSparkContext(TestSQLContext.sparkContext)
+  lazy val javaCtx = new JavaSparkContext(TestHive.sparkContext)
 
   // There is a little trickery here to avoid instantiating two HiveContexts in the same JVM
-  lazy val javaHiveCtx = new JavaHiveContext(javaCtx) {
-    override val sqlContext = TestHive
-  }
+  lazy val javaHiveCtx = new JavaHiveContext(TestHive)
 
-  ignore("SELECT * FROM src") {
+  test("SELECT * FROM src") {
     assert(
-      javaHiveCtx.hql("SELECT * FROM src").collect().map(_.getInt(0)) ===
+      javaHiveCtx.sql("SELECT * FROM src").collect().map(_.getInt(0)) ===
         TestHive.sql("SELECT * FROM src").collect().map(_.getInt(0)).toSeq)
   }
 
-  private val explainCommandClassName =
-    classOf[ExplainCommand].getSimpleName.stripSuffix("$")
-
   def isExplanation(result: JavaSchemaRDD) = {
     val explanation = result.collect().map(_.getString(0))
-    explanation.size > 1 && explanation.head.startsWith(explainCommandClassName)
+    explanation.size > 1 && explanation.head.startsWith("== Physical Plan ==")
   }
 
-  ignore("Query Hive native command execution result") {
+  test("Query Hive native command execution result") {
     val tableName = "test_native_commands"
 
     assertResult(0) {
-      javaHiveCtx.hql(s"DROP TABLE IF EXISTS $tableName").count()
+      javaHiveCtx.sql(s"DROP TABLE IF EXISTS $tableName").count()
     }
 
     assertResult(0) {
-      javaHiveCtx.hql(s"CREATE TABLE $tableName(key INT, value STRING)").count()
+      javaHiveCtx.sql(s"CREATE TABLE $tableName(key INT, value STRING)").count()
     }
 
-    javaHiveCtx.hql("SHOW TABLES").registerAsTable("show_tables")
-
     assert(
       javaHiveCtx
-        .hql("SELECT result FROM show_tables")
+        .sql("SHOW TABLES")
         .collect()
         .map(_.getString(0))
         .contains(tableName))
 
-    assertResult(Array(Array("key", "int", "None"), Array("value", "string", "None"))) {
-      javaHiveCtx.hql(s"DESCRIBE $tableName").registerAsTable("describe_table")
-
+    assertResult(Array(Array("key", "int"), Array("value", "string"))) {
       javaHiveCtx
-        .hql("SELECT result FROM describe_table")
+        .sql(s"describe $tableName")
         .collect()
-        .map(_.getString(0).split("\t").map(_.trim))
+        .map(row => Array(row.get(0).asInstanceOf[String], row.get(1).asInstanceOf[String]))
         .toArray
     }
 
-    assert(isExplanation(javaHiveCtx.hql(
+    assert(isExplanation(javaHiveCtx.sql(
       s"EXPLAIN SELECT key, COUNT(*) FROM $tableName GROUP BY key")))
 
     TestHive.reset()
   }
 
-  ignore("Exactly once semantics for DDL and command statements") {
+  test("Exactly once semantics for DDL and command statements") {
     val tableName = "test_exactly_once"
-    val q0 = javaHiveCtx.hql(s"CREATE TABLE $tableName(key INT, value STRING)")
+    val q0 = javaHiveCtx.sql(s"CREATE TABLE $tableName(key INT, value STRING)")
 
     // If the table was not created, the following assertion would fail
     assert(Try(TestHive.table(tableName)).isSuccess)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 08ef4d9b6bb93..44eb4cfa59335 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -21,7 +21,7 @@ import java.io._
 
 import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen}
 
-import org.apache.spark.sql.Logging
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.{NativeCommand => LogicalNativeCommand}
@@ -132,7 +132,7 @@ abstract class HiveComparisonTest
     answer: Seq[String]): Seq[String] = {
 
     def isSorted(plan: LogicalPlan): Boolean = plan match {
-      case _: Join | _: Aggregate | _: BaseRelation | _: Generate | _: Sample | _: Distinct => false
+      case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
       case PhysicalOperation(_, _, Sort(_, _)) => true
       case _ => plan.children.iterator.exists(isSorted)
     }
@@ -142,14 +142,25 @@ abstract class HiveComparisonTest
       // Hack: Hive simply prints the result of a SET command to screen,
       // and does not return it as a query answer.
       case _: SetCommand => Seq("0")
+      case LogicalNativeCommand(c) if c.toLowerCase.contains("desc") =>
+        answer
+          .filterNot(nonDeterministicLine)
+          .map(_.replaceAll("from deserializer", ""))
+          .map(_.replaceAll("None", ""))
+          .map(_.trim)
+          .filterNot(_ == "")
       case _: LogicalNativeCommand => answer.filterNot(nonDeterministicLine).filterNot(_ == "")
       case _: ExplainCommand => answer
       case _: DescribeCommand =>
         // Filter out non-deterministic lines and lines which do not have actual results but
         // can introduce problems because of the way Hive formats these lines.
         // Then, remove empty lines. Do not sort the results.
-        answer.filterNot(
-          r => nonDeterministicLine(r) || ignoredLine(r)).map(_.trim).filterNot(_ == "")
+        answer
+          .filterNot(r => nonDeterministicLine(r) || ignoredLine(r))
+          .map(_.replaceAll("from deserializer", ""))
+          .map(_.replaceAll("None", ""))
+          .map(_.trim)
+          .filterNot(_ == "")
       case plan => if (isSorted(plan)) answer else answer.sorted
     }
     orderedAnswer.map(cleanPaths)
@@ -161,8 +172,10 @@ abstract class HiveComparisonTest
     "transient_lastDdlTime",
     "grantTime",
     "lastUpdateTime",
+    "last_modified_by",
     "last_modified_time",
     "Owner:",
+    "COLUMN_STATS_ACCURATE",
     // The following are hive specific schema parameters which we do not need to match exactly.
     "numFiles",
     "numRows",
@@ -194,10 +207,13 @@ abstract class HiveComparisonTest
 
   val installHooksCommand = "(?i)SET.*hooks".r
   def createQueryTest(testCaseName: String, sql: String, reset: Boolean = true) {
+    // testCaseName must not contain ':', which is not allowed to appear in a filename of Windows
+    assert(!testCaseName.contains(":"))
+
     // If test sharding is enable, skip tests that are not in the correct shard.
     shardInfo.foreach {
       case (shardId, numShards) if testCaseName.hashCode % numShards != shardId => return
-      case (shardId, _) => logger.debug(s"Shard $shardId includes test '$testCaseName'")
+      case (shardId, _) => logDebug(s"Shard $shardId includes test '$testCaseName'")
     }
 
     // Skip tests found in directories specified by user.
@@ -213,13 +229,13 @@ abstract class HiveComparisonTest
         .map(new File(_, testCaseName))
         .filter(_.exists)
     if (runOnlyDirectories.nonEmpty && runIndicators.isEmpty) {
-      logger.debug(
+      logDebug(
         s"Skipping test '$testCaseName' not found in ${runOnlyDirectories.map(_.getCanonicalPath)}")
       return
     }
 
     test(testCaseName) {
-      logger.debug(s"=== HIVE TEST: $testCaseName ===")
+      logDebug(s"=== HIVE TEST: $testCaseName ===")
 
       // Clear old output for this testcase.
       outputDirectories.map(new File(_, testCaseName)).filter(_.exists()).foreach(_.delete())
@@ -233,22 +249,23 @@ abstract class HiveComparisonTest
           // the system to return the wrong answer.  Since we have no intention of mirroring their
           // previously broken behavior we simply filter out changes to this setting.
           .filterNot(_ contains "hive.outerjoin.supports.filters")
+          .filterNot(_ contains "hive.exec.post.hooks")
 
       if (allQueries != queryList)
-        logger.warn(s"Simplifications made on unsupported operations for test $testCaseName")
+        logWarning(s"Simplifications made on unsupported operations for test $testCaseName")
 
       lazy val consoleTestCase = {
         val quotes = "\"\"\""
         queryList.zipWithIndex.map {
           case (query, i) =>
-            s"""val q$i = hql($quotes$query$quotes); q$i.collect()"""
+            s"""val q$i = sql($quotes$query$quotes); q$i.collect()"""
         }.mkString("\n== Console version of this test ==\n", "\n", "\n")
       }
 
       try {
-        // MINOR HACK: You must run a query before calling reset the first time.
-        TestHive.hql("SHOW TABLES")
-        if (reset) { TestHive.reset() }
+        if (reset) {
+          TestHive.reset()
+        }
 
         val hiveCacheFiles = queryList.zipWithIndex.map {
           case (queryString, i)  =>
@@ -257,11 +274,11 @@ abstract class HiveComparisonTest
         }
 
         val hiveCachedResults = hiveCacheFiles.flatMap { cachedAnswerFile =>
-          logger.debug(s"Looking for cached answer file $cachedAnswerFile.")
+          logDebug(s"Looking for cached answer file $cachedAnswerFile.")
           if (cachedAnswerFile.exists) {
             Some(fileToString(cachedAnswerFile))
           } else {
-            logger.debug(s"File $cachedAnswerFile not found")
+            logDebug(s"File $cachedAnswerFile not found")
             None
           }
         }.map {
@@ -272,7 +289,7 @@ abstract class HiveComparisonTest
 
         val hiveResults: Seq[Seq[String]] =
           if (hiveCachedResults.size == queryList.size) {
-            logger.info(s"Using answer cache for test: $testCaseName")
+            logInfo(s"Using answer cache for test: $testCaseName")
             hiveCachedResults
           } else {
 
@@ -287,7 +304,7 @@ abstract class HiveComparisonTest
                   if (installHooksCommand.findAllMatchIn(queryString).nonEmpty)
                     sys.error("hive exec hooks not supported for tests.")
 
-                  logger.warn(s"Running query ${i+1}/${queryList.size} with hive.")
+                  logWarning(s"Running query ${i+1}/${queryList.size} with hive.")
                   // Analyze the query with catalyst to ensure test tables are loaded.
                   val answer = hiveQuery.analyzed match {
                     case _: ExplainCommand => Nil // No need to execute EXPLAIN queries as we don't check the output.
@@ -341,7 +358,7 @@ abstract class HiveComparisonTest
         (queryList, hiveResults, catalystResults).zipped.foreach {
           case (query, hive, (hiveQuery, catalyst)) =>
             // Check that the results match unless its an EXPLAIN query.
-            val preparedHive = prepareAnswer(hiveQuery,hive)
+            val preparedHive = prepareAnswer(hiveQuery, hive)
 
             if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) && preparedHive != catalyst) {
 
@@ -350,14 +367,8 @@ abstract class HiveComparisonTest
 
               val resultComparison = sideBySide(hivePrintOut, catalystPrintOut).mkString("\n")
 
-              println("hive output")
-              hive.foreach(println)
-
-              println("catalyst printout")
-              catalyst.foreach(println)
-
               if (recomputeCache) {
-                logger.warn(s"Clearing cache files for failed test $testCaseName")
+                logWarning(s"Clearing cache files for failed test $testCaseName")
                 hiveCacheFiles.foreach(_.delete())
               }
 
@@ -386,7 +397,7 @@ abstract class HiveComparisonTest
               TestHive.runSqlHive("SELECT key FROM src")
             } catch {
               case e: Exception =>
-                logger.error(s"FATAL ERROR: Canary query threw $e This implies that the testing environment has likely been corrupted.")
+                logError(s"FATAL ERROR: Canary query threw $e This implies that the testing environment has likely been corrupted.")
                 // The testing setup traps exits so wait here for a long time so the developer can see when things started
                 // to go wrong.
                 Thread.sleep(1000000)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
new file mode 100644
index 0000000000000..a68fc2a803bb4
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.hive.test.TestHive._
+
+/**
+ * A set of tests that validates support for Hive Explain command.
+ */
+class HiveExplainSuite extends QueryTest {
+  test("explain extended command") {
+    checkExistence(sql(" explain   select * from src where key=123 "), true,
+                   "== Physical Plan ==")
+    checkExistence(sql(" explain   select * from src where key=123 "), false,
+                   "== Parsed Logical Plan ==",
+                   "== Analyzed Logical Plan ==",
+                   "== Optimized Logical Plan ==")
+    checkExistence(sql(" explain   extended select * from src where key=123 "), true,
+                   "== Parsed Logical Plan ==",
+                   "== Analyzed Logical Plan ==",
+                   "== Optimized Logical Plan ==",
+                   "== Physical Plan ==",
+                   "Code Generation", "== RDD ==")
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HivePlanTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HivePlanTest.scala
new file mode 100644
index 0000000000000..c939e6e99d28a
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HivePlanTest.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.hive.test.TestHive
+
+class HivePlanTest extends QueryTest {
+  import TestHive._
+
+  test("udf constant folding") {
+    val optimized = sql("SELECT cos(null) FROM src").queryExecution.optimizedPlan
+    val correctAnswer = sql("SELECT cast(null as double) FROM src").queryExecution.optimizedPlan
+
+    comparePlans(optimized, correctAnswer)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
index 50ab71a9003d3..02518d516261b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
@@ -53,7 +53,7 @@ abstract class HiveQueryFileTest extends HiveComparisonTest {
   testCases.sorted.foreach {
     case (testCaseName, testCaseFile) =>
       if (blackList.map(_.r.pattern.matcher(testCaseName).matches()).reduceLeft(_||_)) {
-        logger.debug(s"Blacklisted test skipped $testCaseName")
+        logDebug(s"Blacklisted test skipped $testCaseName")
       } else if (realWhiteList.map(_.r.pattern.matcher(testCaseName).matches()).reduceLeft(_||_) || runAll) {
         // Build a test case and submit it to scala test framework...
         val queriesString = fileToString(testCaseFile)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index a623d29b53973..0dd766f25348d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -17,22 +17,180 @@
 
 package org.apache.spark.sql.hive.execution
 
+import java.io.File
+import java.util.{Locale, TimeZone}
+
+import org.scalatest.BeforeAndAfter
+
 import scala.util.Try
 
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+
+import org.apache.spark.{SparkFiles, SparkException}
+import org.apache.spark.sql.catalyst.plans.logical.Project
+import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.{SchemaRDD, Row}
+import org.apache.spark.sql.{SQLConf, Row, SchemaRDD}
 
 case class TestData(a: Int, b: String)
 
 /**
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
  */
-class HiveQuerySuite extends HiveComparisonTest {
+class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
+  private val originalTimeZone = TimeZone.getDefault
+  private val originalLocale = Locale.getDefault
+
+  override def beforeAll() {
+    TestHive.cacheTables = true
+    // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
+    TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
+    // Add Locale setting
+    Locale.setDefault(Locale.US)
+  }
+
+  override def afterAll() {
+    TestHive.cacheTables = false
+    TimeZone.setDefault(originalTimeZone)
+    Locale.setDefault(originalLocale)
+  }
+
+  createQueryTest("NaN to Decimal",
+    "SELECT CAST(CAST('NaN' AS DOUBLE) AS DECIMAL(1,1)) FROM src LIMIT 1")
+
+  createQueryTest("constant null testing",
+    """SELECT
+      |IF(FALSE, CAST(NULL AS STRING), CAST(1 AS STRING)) AS COL1,
+      |IF(TRUE, CAST(NULL AS STRING), CAST(1 AS STRING)) AS COL2,
+      |IF(FALSE, CAST(NULL AS INT), CAST(1 AS INT)) AS COL3,
+      |IF(TRUE, CAST(NULL AS INT), CAST(1 AS INT)) AS COL4,
+      |IF(FALSE, CAST(NULL AS DOUBLE), CAST(1 AS DOUBLE)) AS COL5,
+      |IF(TRUE, CAST(NULL AS DOUBLE), CAST(1 AS DOUBLE)) AS COL6,
+      |IF(FALSE, CAST(NULL AS BOOLEAN), CAST(1 AS BOOLEAN)) AS COL7,
+      |IF(TRUE, CAST(NULL AS BOOLEAN), CAST(1 AS BOOLEAN)) AS COL8,
+      |IF(FALSE, CAST(NULL AS BIGINT), CAST(1 AS BIGINT)) AS COL9,
+      |IF(TRUE, CAST(NULL AS BIGINT), CAST(1 AS BIGINT)) AS COL10,
+      |IF(FALSE, CAST(NULL AS FLOAT), CAST(1 AS FLOAT)) AS COL11,
+      |IF(TRUE, CAST(NULL AS FLOAT), CAST(1 AS FLOAT)) AS COL12,
+      |IF(FALSE, CAST(NULL AS SMALLINT), CAST(1 AS SMALLINT)) AS COL13,
+      |IF(TRUE, CAST(NULL AS SMALLINT), CAST(1 AS SMALLINT)) AS COL14,
+      |IF(FALSE, CAST(NULL AS TINYINT), CAST(1 AS TINYINT)) AS COL15,
+      |IF(TRUE, CAST(NULL AS TINYINT), CAST(1 AS TINYINT)) AS COL16,
+      |IF(FALSE, CAST(NULL AS BINARY), CAST("1" AS BINARY)) AS COL17,
+      |IF(TRUE, CAST(NULL AS BINARY), CAST("1" AS BINARY)) AS COL18,
+      |IF(FALSE, CAST(NULL AS DATE), CAST("1970-01-01" AS DATE)) AS COL19,
+      |IF(TRUE, CAST(NULL AS DATE), CAST("1970-01-01" AS DATE)) AS COL20,
+      |IF(FALSE, CAST(NULL AS TIMESTAMP), CAST(1 AS TIMESTAMP)) AS COL21,
+      |IF(TRUE, CAST(NULL AS TIMESTAMP), CAST(1 AS TIMESTAMP)) AS COL22,
+      |IF(FALSE, CAST(NULL AS DECIMAL), CAST(1 AS DECIMAL)) AS COL23,
+      |IF(TRUE, CAST(NULL AS DECIMAL), CAST(1 AS DECIMAL)) AS COL24
+      |FROM src LIMIT 1""".stripMargin)
+
+  createQueryTest("constant array",
+  """
+    |SELECT sort_array(
+    |  sort_array(
+    |    array("hadoop distributed file system",
+    |          "enterprise databases", "hadoop map-reduce")))
+    |FROM src LIMIT 1;
+  """.stripMargin)
+
+  createQueryTest("count distinct 0 values",
+    """
+      |SELECT COUNT(DISTINCT a) FROM (
+      |  SELECT 'a' AS a FROM src LIMIT 0) table
+    """.stripMargin)
+
+  createQueryTest("count distinct 1 value strings",
+    """
+      |SELECT COUNT(DISTINCT a) FROM (
+      |  SELECT 'a' AS a FROM src LIMIT 1 UNION ALL
+      |  SELECT 'b' AS a FROM src LIMIT 1) table
+    """.stripMargin)
+
+  createQueryTest("count distinct 1 value",
+    """
+      |SELECT COUNT(DISTINCT a) FROM (
+      |  SELECT 1 AS a FROM src LIMIT 1 UNION ALL
+      |  SELECT 1 AS a FROM src LIMIT 1) table
+    """.stripMargin)
+
+  createQueryTest("count distinct 2 values",
+    """
+      |SELECT COUNT(DISTINCT a) FROM (
+      |  SELECT 1 AS a FROM src LIMIT 1 UNION ALL
+      |  SELECT 2 AS a FROM src LIMIT 1) table
+    """.stripMargin)
+
+  createQueryTest("count distinct 2 values including null",
+    """
+      |SELECT COUNT(DISTINCT a, 1) FROM (
+      |  SELECT 1 AS a FROM src LIMIT 1 UNION ALL
+      |  SELECT 1 AS a FROM src LIMIT 1 UNION ALL
+      |  SELECT null AS a FROM src LIMIT 1) table
+    """.stripMargin)
+
+  createQueryTest("count distinct 1 value + null",
+  """
+    |SELECT COUNT(DISTINCT a) FROM (
+    |  SELECT 1 AS a FROM src LIMIT 1 UNION ALL
+    |  SELECT 1 AS a FROM src LIMIT 1 UNION ALL
+    |  SELECT null AS a FROM src LIMIT 1) table
+  """.stripMargin)
+
+  createQueryTest("count distinct 1 value long",
+    """
+      |SELECT COUNT(DISTINCT a) FROM (
+      |  SELECT 1L AS a FROM src LIMIT 1 UNION ALL
+      |  SELECT 1L AS a FROM src LIMIT 1) table
+    """.stripMargin)
+
+  createQueryTest("count distinct 2 values long",
+    """
+      |SELECT COUNT(DISTINCT a) FROM (
+      |  SELECT 1L AS a FROM src LIMIT 1 UNION ALL
+      |  SELECT 2L AS a FROM src LIMIT 1) table
+    """.stripMargin)
+
+  createQueryTest("count distinct 1 value + null long",
+    """
+      |SELECT COUNT(DISTINCT a) FROM (
+      |  SELECT 1L AS a FROM src LIMIT 1 UNION ALL
+      |  SELECT 1L AS a FROM src LIMIT 1 UNION ALL
+      |  SELECT null AS a FROM src LIMIT 1) table
+    """.stripMargin)
+
+  createQueryTest("null case",
+    "SELECT case when(true) then 1 else null end FROM src LIMIT 1")
+
+  createQueryTest("single case",
+    """SELECT case when true then 1 else 2 end FROM src LIMIT 1""")
+
+  createQueryTest("double case",
+    """SELECT case when 1 = 2 then 1 when 2 = 2 then 3 else 2 end FROM src LIMIT 1""")
+
+  createQueryTest("case else null",
+    """SELECT case when 1 = 2 then 1 when 2 = 2 then 3 else null end FROM src LIMIT 1""")
+
+  createQueryTest("having no references",
+    "SELECT key FROM src GROUP BY key HAVING COUNT(*) > 1")
+
+  createQueryTest("boolean = number",
+    """
+      |SELECT
+      |  1 = true, 1L = true, 1Y = true, true = 1, true = 1L, true = 1Y,
+      |  0 = true, 0L = true, 0Y = true, true = 0, true = 0L, true = 0Y,
+      |  1 = false, 1L = false, 1Y = false, false = 1, false = 1L, false = 1Y,
+      |  0 = false, 0L = false, 0Y = false, false = 0, false = 0L, false = 0Y,
+      |  2 = true, 2L = true, 2Y = true, true = 2, true = 2L, true = 2Y,
+      |  2 = false, 2L = false, 2Y = false, false = 2, false = 2L, false = 2Y
+      |FROM src LIMIT 1
+    """.stripMargin)
 
   test("CREATE TABLE AS runs once") {
-    hql("CREATE TABLE foo AS SELECT 1 FROM src LIMIT 1").collect()
-    assert(hql("SELECT COUNT(*) FROM foo").collect().head.getLong(0) === 1,
+    sql("CREATE TABLE foo AS SELECT 1 FROM src LIMIT 1").collect()
+    assert(sql("SELECT COUNT(*) FROM foo").collect().head.getLong(0) === 1,
       "Incorrect number of rows in created table")
   }
 
@@ -40,20 +198,41 @@ class HiveQuerySuite extends HiveComparisonTest {
     "SELECT * FROM src WHERE key Between 1 and 2")
 
   createQueryTest("div",
-    "SELECT 1 DIV 2, 1 div 2, 1 dIv 2 FROM src LIMIT 1")
+    "SELECT 1 DIV 2, 1 div 2, 1 dIv 2, 100 DIV 51, 100 DIV 49 FROM src LIMIT 1")
+
+  // Jdk version leads to different query output for double, so not use createQueryTest here
+  test("division") {
+    val res = sql("SELECT 2 / 1, 1 / 2, 1 / 3, 1 / COUNT(*) FROM src LIMIT 1").collect().head
+    Seq(2.0, 0.5, 0.3333333333333333, 0.002).zip(res).foreach( x =>
+      assert(x._1 == x._2.asInstanceOf[Double]))
+  }
+
+  createQueryTest("modulus",
+    "SELECT 11 % 10, IF((101.1 % 100.0) BETWEEN 1.01 AND 1.11, \"true\", \"false\"), (101 / 2) % 10 FROM src LIMIT 1")
 
   test("Query expressed in SQL") {
+    setConf("spark.sql.dialect", "sql")
     assert(sql("SELECT 1").collect() === Array(Seq(1)))
+    setConf("spark.sql.dialect", "hiveql")
   }
 
   test("Query expressed in HiveQL") {
-    hql("FROM src SELECT key").collect()
-    hiveql("FROM src SELECT key").collect()
+    sql("FROM src SELECT key").collect()
+  }
+
+  test("Query with constant folding the CAST") {
+    sql("SELECT CAST(CAST('123' AS binary) AS binary) FROM src LIMIT 1").collect()
   }
 
   createQueryTest("Constant Folding Optimization for AVG_SUM_COUNT",
     "SELECT AVG(0), SUM(0), COUNT(null), COUNT(value) FROM src GROUP BY key")
 
+  createQueryTest("Cast Timestamp to Timestamp in UDF",
+    """
+       | SELECT DATEDIFF(CAST(value AS timestamp), CAST('2002-03-21 00:00:00' AS timestamp))
+       | FROM src LIMIT 1
+    """.stripMargin)
+
   createQueryTest("Simple Average",
     "SELECT AVG(key) FROM src")
 
@@ -164,12 +343,12 @@ class HiveQuerySuite extends HiveComparisonTest {
     "SELECT * FROM src LATERAL VIEW explode(map(key+3,key+4)) D as k, v")
 
   test("sampling") {
-    hql("SELECT * FROM src TABLESAMPLE(0.1 PERCENT) s")
+    sql("SELECT * FROM src TABLESAMPLE(0.1 PERCENT) s")
   }
 
   test("SchemaRDD toString") {
-    hql("SHOW TABLES").toString
-    hql("SELECT * FROM src").toString
+    sql("SHOW TABLES").toString
+    sql("SELECT * FROM src").toString
   }
 
   createQueryTest("case statements with key #1",
@@ -196,9 +375,47 @@ class HiveQuerySuite extends HiveComparisonTest {
   createQueryTest("case statements WITHOUT key #4",
     "SELECT (CASE WHEN key > 2 THEN 3 WHEN 2 > key THEN 2 ELSE 0 END) FROM src WHERE key < 15")
 
+  // Jdk version leads to different query output for double, so not use createQueryTest here
+  test("timestamp cast #1") {
+    val res = sql("SELECT CAST(CAST(1 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1").collect().head
+    assert(0.001 == res.getDouble(0))
+  }
+
+  createQueryTest("timestamp cast #2",
+    "SELECT CAST(CAST(1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #3",
+    "SELECT CAST(CAST(1200 AS TIMESTAMP) AS INT) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #4",
+    "SELECT CAST(CAST(1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #5",
+    "SELECT CAST(CAST(-1 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #6",
+    "SELECT CAST(CAST(-1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #7",
+    "SELECT CAST(CAST(-1200 AS TIMESTAMP) AS INT) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #8",
+    "SELECT CAST(CAST(-1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+  createQueryTest("select null from table",
+    "SELECT null FROM src LIMIT 1")
+
   test("implement identity function using case statement") {
-    val actual = hql("SELECT (CASE key WHEN key THEN key END) FROM src").collect().toSet
-    val expected = hql("SELECT key FROM src").collect().toSet
+    val actual = sql("SELECT (CASE key WHEN key THEN key END) FROM src")
+      .map { case Row(i: Int) => i }
+      .collect()
+      .toSet
+
+    val expected = sql("SELECT key FROM src")
+      .map { case Row(i: Int) => i }
+      .collect()
+      .toSet
+
     assert(actual === expected)
   }
 
@@ -206,35 +423,35 @@ class HiveQuerySuite extends HiveComparisonTest {
   // See https://github.com/apache/spark/pull/1055#issuecomment-45820167 for a discussion.
   ignore("non-boolean conditions in a CaseWhen are illegal") {
     intercept[Exception] {
-      hql("SELECT (CASE WHEN key > 2 THEN 3 WHEN 1 THEN 2 ELSE 0 END) FROM src").collect()
+      sql("SELECT (CASE WHEN key > 2 THEN 3 WHEN 1 THEN 2 ELSE 0 END) FROM src").collect()
     }
   }
 
-  createQueryTest("case sensitivity: Hive table",
+  createQueryTest("case sensitivity when query Hive table",
     "SELECT srcalias.KEY, SRCALIAS.value FROM sRc SrCAlias WHERE SrCAlias.kEy < 15")
 
   test("case sensitivity: registered table") {
-    val testData: SchemaRDD =
+    val testData =
       TestHive.sparkContext.parallelize(
         TestData(1, "str1") ::
         TestData(2, "str2") :: Nil)
-    testData.registerAsTable("REGisteredTABle")
+    testData.registerTempTable("REGisteredTABle")
 
     assertResult(Array(Array(2, "str2"))) {
-      hql("SELECT tablealias.A, TABLEALIAS.b FROM reGisteredTABle TableAlias " +
+      sql("SELECT tablealias.A, TABLEALIAS.b FROM reGisteredTABle TableAlias " +
         "WHERE TableAliaS.a > 1").collect()
     }
   }
 
   def isExplanation(result: SchemaRDD) = {
     val explanation = result.select('plan).collect().map { case Row(plan: String) => plan }
-    explanation.size > 1 && explanation.head.startsWith("Physical execution plan")
+    explanation.contains("== Physical Plan ==")
   }
 
   test("SPARK-1704: Explain commands as a SchemaRDD") {
-    hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+    sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
 
-    val rdd = hql("explain select key, count(value) from src group by key")
+    val rdd = sql("explain select key, count(value) from src group by key")
     assert(isExplanation(rdd))
 
     TestHive.reset()
@@ -243,9 +460,9 @@ class HiveQuerySuite extends HiveComparisonTest {
   test("SPARK-2180: HAVING support in GROUP BY clauses (positive)") {
     val fixture = List(("foo", 2), ("bar", 1), ("foo", 4), ("bar", 3))
       .zipWithIndex.map {case Pair(Pair(value, attr), key) => HavingRow(key, value, attr)}
-    TestHive.sparkContext.parallelize(fixture).registerAsTable("having_test")
+    TestHive.sparkContext.parallelize(fixture).registerTempTable("having_test")
     val results =
-      hql("SELECT value, max(attr) AS attr FROM having_test GROUP BY value HAVING attr > 3")
+      sql("SELECT value, max(attr) AS attr FROM having_test GROUP BY value HAVING attr > 3")
       .collect()
       .map(x => Pair(x.getString(0), x.getInt(1)))
 
@@ -254,39 +471,39 @@ class HiveQuerySuite extends HiveComparisonTest {
   }
 
   test("SPARK-2180: HAVING with non-boolean clause raises no exceptions") {
-    hql("select key, count(*) c from src group by key having c").collect()
+    sql("select key, count(*) c from src group by key having c").collect()
   }
 
   test("SPARK-2225: turn HAVING without GROUP BY into a simple filter") {
-    assert(hql("select key from src having key > 490").collect().size < 100)
+    assert(sql("select key from src having key > 490").collect().size < 100)
   }
 
   test("Query Hive native command execution result") {
     val tableName = "test_native_commands"
 
     assertResult(0) {
-      hql(s"DROP TABLE IF EXISTS $tableName").count()
+      sql(s"DROP TABLE IF EXISTS $tableName").count()
     }
 
     assertResult(0) {
-      hql(s"CREATE TABLE $tableName(key INT, value STRING)").count()
+      sql(s"CREATE TABLE $tableName(key INT, value STRING)").count()
     }
 
     assert(
-      hql("SHOW TABLES")
+      sql("SHOW TABLES")
         .select('result)
         .collect()
         .map(_.getString(0))
         .contains(tableName))
 
-    assert(isExplanation(hql(s"EXPLAIN SELECT key, COUNT(*) FROM $tableName GROUP BY key")))
+    assert(isExplanation(sql(s"EXPLAIN SELECT key, COUNT(*) FROM $tableName GROUP BY key")))
 
     TestHive.reset()
   }
 
   test("Exactly once semantics for DDL and command statements") {
     val tableName = "test_exactly_once"
-    val q0 = hql(s"CREATE TABLE $tableName(key INT, value STRING)")
+    val q0 = sql(s"CREATE TABLE $tableName(key INT, value STRING)")
 
     // If the table was not created, the following assertion would fail
     assert(Try(table(tableName)).isSuccess)
@@ -296,9 +513,9 @@ class HiveQuerySuite extends HiveComparisonTest {
   }
 
   test("DESCRIBE commands") {
-    hql(s"CREATE TABLE test_describe_commands1 (key INT, value STRING) PARTITIONED BY (dt STRING)")
+    sql(s"CREATE TABLE test_describe_commands1 (key INT, value STRING) PARTITIONED BY (dt STRING)")
 
-    hql(
+    sql(
       """FROM src INSERT OVERWRITE TABLE test_describe_commands1 PARTITION (dt='2008-06-08')
         |SELECT key, value
       """.stripMargin)
@@ -313,7 +530,7 @@ class HiveQuerySuite extends HiveComparisonTest {
         Array("# col_name", "data_type", "comment"),
         Array("dt", "string", null))
     ) {
-      hql("DESCRIBE test_describe_commands1")
+      sql("DESCRIBE test_describe_commands1")
         .select('col_name, 'data_type, 'comment)
         .collect()
     }
@@ -328,14 +545,14 @@ class HiveQuerySuite extends HiveComparisonTest {
         Array("# col_name", "data_type", "comment"),
         Array("dt", "string", null))
     ) {
-      hql("DESCRIBE default.test_describe_commands1")
+      sql("DESCRIBE default.test_describe_commands1")
         .select('col_name, 'data_type, 'comment)
         .collect()
     }
 
     // Describe a column is a native command
     assertResult(Array(Array("value", "string", "from deserializer"))) {
-      hql("DESCRIBE test_describe_commands1 value")
+      sql("DESCRIBE test_describe_commands1 value")
         .select('result)
         .collect()
         .map(_.getString(0).split("\t").map(_.trim))
@@ -343,7 +560,7 @@ class HiveQuerySuite extends HiveComparisonTest {
 
     // Describe a column is a native command
     assertResult(Array(Array("value", "string", "from deserializer"))) {
-      hql("DESCRIBE default.test_describe_commands1 value")
+      sql("DESCRIBE default.test_describe_commands1 value")
         .select('result)
         .collect()
         .map(_.getString(0).split("\t").map(_.trim))
@@ -352,27 +569,27 @@ class HiveQuerySuite extends HiveComparisonTest {
     // Describe a partition is a native command
     assertResult(
       Array(
-        Array("key", "int", "None"),
-        Array("value", "string", "None"),
-        Array("dt", "string", "None"),
-        Array("", "", ""),
-        Array("# Partition Information", "", ""),
+        Array("key", "int"),
+        Array("value", "string"),
+        Array("dt", "string"),
+        Array(""),
+        Array("# Partition Information"),
         Array("# col_name", "data_type", "comment"),
-        Array("", "", ""),
-        Array("dt", "string", "None"))
+        Array(""),
+        Array("dt", "string"))
     ) {
-      hql("DESCRIBE test_describe_commands1 PARTITION (dt='2008-06-08')")
+      sql("DESCRIBE test_describe_commands1 PARTITION (dt='2008-06-08')")
         .select('result)
         .collect()
-        .map(_.getString(0).split("\t").map(_.trim))
+        .map(_.getString(0).replaceAll("None", "").trim.split("\t").map(_.trim))
     }
 
     // Describe a registered temporary table.
-    val testData: SchemaRDD =
+    val testData =
       TestHive.sparkContext.parallelize(
         TestData(1, "str1") ::
         TestData(1, "str2") :: Nil)
-    testData.registerAsTable("test_describe_commands2")
+    testData.registerTempTable("test_describe_commands2")
 
     assertResult(
       Array(
@@ -380,110 +597,265 @@ class HiveQuerySuite extends HiveComparisonTest {
         Array("a", "IntegerType", null),
         Array("b", "StringType", null))
     ) {
-      hql("DESCRIBE test_describe_commands2")
+      sql("DESCRIBE test_describe_commands2")
         .select('col_name, 'data_type, 'comment)
         .collect()
     }
   }
 
   test("SPARK-2263: Insert Map<K, V> values") {
-    hql("CREATE TABLE m(value MAP<INT, STRING>)")
-    hql("INSERT OVERWRITE TABLE m SELECT MAP(key, value) FROM src LIMIT 10")
-    hql("SELECT * FROM m").collect().zip(hql("SELECT * FROM src LIMIT 10").collect()).map {
-      case (Row(map: Map[Int, String]), Row(key: Int, value: String)) =>
+    sql("CREATE TABLE m(value MAP<INT, STRING>)")
+    sql("INSERT OVERWRITE TABLE m SELECT MAP(key, value) FROM src LIMIT 10")
+    sql("SELECT * FROM m").collect().zip(sql("SELECT * FROM src LIMIT 10").collect()).map {
+      case (Row(map: Map[_, _]), Row(key: Int, value: String)) =>
         assert(map.size === 1)
         assert(map.head === (key, value))
     }
   }
 
-  test("parse HQL set commands") {
-    // Adapted from its SQL counterpart.
-    val testKey = "spark.sql.key.usedfortestonly"
-    val testVal = "val0,val_1,val2.3,my_table"
-
-    hql(s"set $testKey=$testVal")
-    assert(get(testKey, testVal + "_") == testVal)
+  test("ADD JAR command") {
+    val testJar = TestHive.getHiveFile("data/files/TestSerDe.jar").getCanonicalPath
+    sql("CREATE TABLE alter1(a INT, b INT)")
+    intercept[Exception] {
+      sql(
+        """ALTER TABLE alter1 SET SERDE 'org.apache.hadoop.hive.serde2.TestSerDe'
+          |WITH serdeproperties('s1'='9')
+        """.stripMargin)
+    }
+    // Now only verify 0.12.0, and ignore other versions due to binary compatibility
+    // current TestSerDe.jar is from 0.12.0
+    if (HiveShim.version == "0.12.0") {
+      sql(s"ADD JAR $testJar")
+      sql(
+        """ALTER TABLE alter1 SET SERDE 'org.apache.hadoop.hive.serde2.TestSerDe'
+          |WITH serdeproperties('s1'='9')
+        """.stripMargin)
+    }
+    sql("DROP TABLE alter1")
+  }
 
-    hql("set mapred.reduce.tasks=20")
-    assert(get("mapred.reduce.tasks", "0") == "20")
-    hql("set mapred.reduce.tasks = 40")
-    assert(get("mapred.reduce.tasks", "0") == "40")
+  test("ADD FILE command") {
+    val testFile = TestHive.getHiveFile("data/files/v1.txt").getCanonicalFile
+    sql(s"ADD FILE $testFile")
 
-    hql(s"set $testKey=$testVal")
-    assert(get(testKey, "0") == testVal)
+    val checkAddFileRDD = sparkContext.parallelize(1 to 2, 1).mapPartitions { _ =>
+      Iterator.single(new File(SparkFiles.get("v1.txt")).canRead)
+    }
 
-    hql(s"set $testKey=")
-    assert(get(testKey, "0") == "")
+    assert(checkAddFileRDD.first())
   }
 
-  test("SET commands semantics for a HiveContext") {
-    // Adapted from its SQL counterpart.
-    val testKey = "spark.sql.key.usedfortestonly"
-    val testVal = "test.val.0"
-    val nonexistentKey = "nonexistent"
-    def rowsToPairs(rows: Array[Row]) = rows.map { case Row(key: String, value: String) =>
-      key -> value
+  case class LogEntry(filename: String, message: String)
+  case class LogFile(name: String)
+
+  createQueryTest("dynamic_partition",
+    """
+      |DROP TABLE IF EXISTS dynamic_part_table;
+      |CREATE TABLE dynamic_part_table(intcol INT) PARTITIONED BY (partcol1 INT, partcol2 INT);
+      |
+      |SET hive.exec.dynamic.partition.mode=nonstrict;
+      |
+      |INSERT INTO TABLE dynamic_part_table PARTITION(partcol1, partcol2)
+      |SELECT 1, 1, 1 FROM src WHERE key=150;
+      |
+      |INSERT INTO TABLE dynamic_part_table PARTITION(partcol1, partcol2)
+      |SELECT 1, NULL, 1 FROM src WHERE key=150;
+      |
+      |INSERT INTO TABLE dynamic_part_table PARTITION(partcol1, partcol2)
+      |SELECT 1, 1, NULL FROM src WHERE key=150;
+      |
+      |INSERT INTO TABLe dynamic_part_table PARTITION(partcol1, partcol2)
+      |SELECT 1, NULL, NULL FROM src WHERE key=150;
+      |
+      |DROP TABLE IF EXISTS dynamic_part_table;
+    """.stripMargin)
+
+  test("Dynamic partition folder layout") {
+    sql("DROP TABLE IF EXISTS dynamic_part_table")
+    sql("CREATE TABLE dynamic_part_table(intcol INT) PARTITIONED BY (partcol1 INT, partcol2 INT)")
+    sql("SET hive.exec.dynamic.partition.mode=nonstrict")
+
+    val data = Map(
+      Seq("1", "1") -> 1,
+      Seq("1", "NULL") -> 2,
+      Seq("NULL", "1") -> 3,
+      Seq("NULL", "NULL") -> 4)
+
+    data.foreach { case (parts, value) =>
+      sql(
+        s"""INSERT INTO TABLE dynamic_part_table PARTITION(partcol1, partcol2)
+           |SELECT $value, ${parts.mkString(", ")} FROM src WHERE key=150
+         """.stripMargin)
+
+      val partFolder = Seq("partcol1", "partcol2")
+        .zip(parts)
+        .map { case (k, v) =>
+          if (v == "NULL") {
+            s"$k=${ConfVars.DEFAULTPARTITIONNAME.defaultVal}"
+          } else {
+            s"$k=$v"
+          }
+        }
+        .mkString("/")
+
+      // Loads partition data to a temporary table to verify contents
+      val path = s"$warehousePath/dynamic_part_table/$partFolder/part-00000"
+
+      sql("DROP TABLE IF EXISTS dp_verify")
+      sql("CREATE TABLE dp_verify(intcol INT)")
+      sql(s"LOAD DATA LOCAL INPATH '$path' INTO TABLE dp_verify")
+
+      assert(sql("SELECT * FROM dp_verify").collect() === Array(Row(value)))
     }
+  }
 
-    clear()
+  test("Partition spec validation") {
+    sql("DROP TABLE IF EXISTS dp_test")
+    sql("CREATE TABLE dp_test(key INT, value STRING) PARTITIONED BY (dp INT, sp INT)")
+    sql("SET hive.exec.dynamic.partition.mode=strict")
+
+    // Should throw when using strict dynamic partition mode without any static partition
+    intercept[SparkException] {
+      sql(
+        """INSERT INTO TABLE dp_test PARTITION(dp)
+          |SELECT key, value, key % 5 FROM src
+        """.stripMargin)
+    }
 
-    // "set" itself returns all config variables currently specified in SQLConf.
-    assert(hql("SET").collect().size == 0)
+    sql("SET hive.exec.dynamic.partition.mode=nonstrict")
 
-    assertResult(Array(testKey -> testVal)) {
-      rowsToPairs(hql(s"SET $testKey=$testVal").collect())
+    // Should throw when a static partition appears after a dynamic partition
+    intercept[SparkException] {
+      sql(
+        """INSERT INTO TABLE dp_test PARTITION(dp, sp = 1)
+          |SELECT key, value, key % 5 FROM src
+        """.stripMargin)
     }
+  }
 
-    assert(hiveconf.get(testKey, "") == testVal)
-    assertResult(Array(testKey -> testVal)) {
-      rowsToPairs(hql("SET").collect())
+  test("SPARK-3414 regression: should store analyzed logical plan when registering a temp table") {
+    sparkContext.makeRDD(Seq.empty[LogEntry]).registerTempTable("rawLogs")
+    sparkContext.makeRDD(Seq.empty[LogFile]).registerTempTable("logFiles")
+
+    sql(
+      """
+      SELECT name, message
+      FROM rawLogs
+      JOIN (
+        SELECT name
+        FROM logFiles
+      ) files
+      ON rawLogs.filename = files.name
+      """).registerTempTable("boom")
+
+    // This should be successfully analyzed
+    sql("SELECT * FROM boom").queryExecution.analyzed
+  }
+
+  test("SPARK-3810: PreInsertionCasts static partitioning support") {
+    val analyzedPlan = {
+      loadTestTable("srcpart")
+      sql("DROP TABLE IF EXISTS withparts")
+      sql("CREATE TABLE withparts LIKE srcpart")
+      sql("INSERT INTO TABLE withparts PARTITION(ds='1', hr='2') SELECT key, value FROM src")
+        .queryExecution.analyzed
     }
 
-    hql(s"SET ${testKey + testKey}=${testVal + testVal}")
-    assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
-    assertResult(Array(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
-      rowsToPairs(hql("SET").collect())
+    assertResult(1, "Duplicated project detected\n" + analyzedPlan) {
+      analyzedPlan.collect {
+        case _: Project => ()
+      }.size
     }
+  }
 
-    // "set key"
-    assertResult(Array(testKey -> testVal)) {
-      rowsToPairs(hql(s"SET $testKey").collect())
+  test("SPARK-3810: PreInsertionCasts dynamic partitioning support") {
+    val analyzedPlan = {
+      loadTestTable("srcpart")
+      sql("DROP TABLE IF EXISTS withparts")
+      sql("CREATE TABLE withparts LIKE srcpart")
+      sql("SET hive.exec.dynamic.partition.mode=nonstrict")
+
+      sql("CREATE TABLE IF NOT EXISTS withparts LIKE srcpart")
+      sql("INSERT INTO TABLE withparts PARTITION(ds, hr) SELECT key, value FROM src")
+        .queryExecution.analyzed
     }
 
-    assertResult(Array(nonexistentKey -> "<undefined>")) {
-      rowsToPairs(hql(s"SET $nonexistentKey").collect())
+    assertResult(1, "Duplicated project detected\n" + analyzedPlan) {
+      analyzedPlan.collect {
+        case _: Project => ()
+      }.size
     }
+  }
+
+  test("parse HQL set commands") {
+    // Adapted from its SQL counterpart.
+    val testKey = "spark.sql.key.usedfortestonly"
+    val testVal = "val0,val_1,val2.3,my_table"
 
-    // Assert that sql() should have the same effects as hql() by repeating the above using sql().
+    sql(s"set $testKey=$testVal")
+    assert(getConf(testKey, testVal + "_") == testVal)
+
+    sql("set some.property=20")
+    assert(getConf("some.property", "0") == "20")
+    sql("set some.property = 40")
+    assert(getConf("some.property", "0") == "40")
+
+    sql(s"set $testKey=$testVal")
+    assert(getConf(testKey, "0") == testVal)
+
+    sql(s"set $testKey=")
+    assert(getConf(testKey, "0") == "")
+  }
+
+  test("SET commands semantics for a HiveContext") {
+    // Adapted from its SQL counterpart.
+    val testKey = "spark.sql.key.usedfortestonly"
+    val testVal = "test.val.0"
+    val nonexistentKey = "nonexistent"
+    val KV = "([^=]+)=([^=]*)".r
+    def collectResults(rdd: SchemaRDD): Set[(String, String)] =
+      rdd.collect().map {
+        case Row(key: String, value: String) => key -> value
+        case Row(KV(key, value)) => key -> value
+      }.toSet
     clear()
+
+    // "SET" itself returns all config variables currently specified in SQLConf.
+    // TODO: Should we be listing the default here always? probably...
     assert(sql("SET").collect().size == 0)
 
-    assertResult(Array(testKey -> testVal)) {
-      rowsToPairs(sql(s"SET $testKey=$testVal").collect())
+    assertResult(Set(testKey -> testVal)) {
+      collectResults(sql(s"SET $testKey=$testVal"))
     }
 
     assert(hiveconf.get(testKey, "") == testVal)
-    assertResult(Array(testKey -> testVal)) {
-      rowsToPairs(sql("SET").collect())
-    }
+    assertResult(Set(testKey -> testVal))(collectResults(sql("SET")))
+    assertResult(Set(testKey -> testVal))(collectResults(sql("SET -v")))
 
     sql(s"SET ${testKey + testKey}=${testVal + testVal}")
     assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
-    assertResult(Array(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
-      rowsToPairs(sql("SET").collect())
+    assertResult(Set(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
+      collectResults(sql("SET"))
+    }
+    assertResult(Set(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
+      collectResults(sql("SET -v"))
     }
 
-    assertResult(Array(testKey -> testVal)) {
-      rowsToPairs(sql(s"SET $testKey").collect())
+    // "SET key"
+    assertResult(Set(testKey -> testVal)) {
+      collectResults(sql(s"SET $testKey"))
     }
 
-    assertResult(Array(nonexistentKey -> "<undefined>")) {
-      rowsToPairs(sql(s"SET $nonexistentKey").collect())
+    assertResult(Set(nonexistentKey -> "<undefined>")) {
+      collectResults(sql(s"SET $nonexistentKey"))
     }
 
     clear()
   }
 
+  createQueryTest("select from thrift based table",
+    "SELECT * from src_thrift")
+
   // Put tests that depend on specific Hive settings before these last two test,
   // since they modify /clear stuff.
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index 67594b57d3dfa..ee9d08ff75450 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.hive.execution
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
 
-case class Data(a: Int, B: Int, n: Nested)
 case class Nested(a: Int, B: Int)
+case class Data(a: Int, B: Int, n: Nested, nestedArray: Seq[Nested])
 
 /**
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
@@ -36,6 +36,9 @@ class HiveResolutionSuite extends HiveComparisonTest {
   createQueryTest("database.table table.attr",
     "SELECT src.key FROM default.src ORDER BY key LIMIT 1")
 
+  createQueryTest("database.table table.attr case insensitive",
+    "SELECT SRC.Key FROM Default.Src ORDER BY key LIMIT 1")
+
   createQueryTest("alias.attr",
     "SELECT a.key FROM src a ORDER BY key LIMIT 1")
 
@@ -53,10 +56,27 @@ class HiveResolutionSuite extends HiveComparisonTest {
 
   test("case insensitivity with scala reflection") {
     // Test resolution with Scala Reflection
-    TestHive.sparkContext.parallelize(Data(1, 2, Nested(1,2)) :: Nil)
-      .registerAsTable("caseSensitivityTest")
+    TestHive.sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil)
+      .registerTempTable("caseSensitivityTest")
+
+    val query = sql("SELECT a, b, A, B, n.a, n.b, n.A, n.B FROM caseSensitivityTest")
+    assert(query.schema.fields.map(_.name) === Seq("a", "b", "A", "B", "a", "b", "A", "B"),
+      "The output schema did not preserve the case of the query.")
+    query.collect()
+  }
+
+  ignore("case insensitivity with scala reflection joins") {
+    // Test resolution with Scala Reflection
+    TestHive.sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil)
+      .registerTempTable("caseSensitivityTest")
+
+    sql("SELECT * FROM casesensitivitytest a JOIN casesensitivitytest b ON a.a = b.a").collect()
+  }
 
-    hql("SELECT a, b, A, B, n.a, n.b, n.A, n.B FROM caseSensitivityTest")
+  test("nested repeated resolution") {
+    TestHive.sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil)
+      .registerTempTable("nestedRepeatedTest")
+    assert(sql("SELECT nestedArray[0].a FROM nestedRepeatedTest").collect().head(0) === 1)
   }
 
   /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
index df9bae96494d5..7486bfa82b00b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
@@ -17,10 +17,19 @@
 
 package org.apache.spark.sql.hive.execution
 
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.sql.hive.test.TestHive
+
 /**
  * A set of tests that validates support for Hive SerDe.
  */
-class HiveSerDeSuite extends HiveComparisonTest {
+class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll {
+
+  override def beforeAll() = {
+    TestHive.cacheTables = false
+  }
+
   createQueryTest(
     "Read and write with LazySimpleSerDe (tab separated)",
     "SELECT * from serdeins")
@@ -28,4 +37,6 @@ class HiveSerDeSuite extends HiveComparisonTest {
   createQueryTest("Read with RegexSerDe", "SELECT * FROM sales")
 
   createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes")
+
+  createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part")
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
new file mode 100644
index 0000000000000..54c0f017d4cb6
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.{Row, SchemaRDD}
+
+import org.apache.spark.util.Utils
+
+class HiveTableScanSuite extends HiveComparisonTest {
+
+  createQueryTest("partition_based_table_scan_with_different_serde",
+    """
+      |CREATE TABLE part_scan_test (key STRING, value STRING) PARTITIONED BY (ds STRING)
+      |ROW FORMAT SERDE
+      |'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'
+      |STORED AS RCFILE;
+      |
+      |FROM src
+      |INSERT INTO TABLE part_scan_test PARTITION (ds='2010-01-01')
+      |SELECT 100,100 LIMIT 1;
+      |
+      |ALTER TABLE part_scan_test SET SERDE
+      |'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
+      |
+      |FROM src INSERT INTO TABLE part_scan_test PARTITION (ds='2010-01-02')
+      |SELECT 200,200 LIMIT 1;
+      |
+      |SELECT * from part_scan_test;
+    """.stripMargin)
+
+  test("Spark-4041: lowercase issue") {
+    TestHive.sql("CREATE TABLE tb (KEY INT, VALUE STRING) STORED AS ORC")
+    TestHive.sql("insert into table tb select key, value from src")
+    TestHive.sql("select KEY from tb where VALUE='just_for_test' limit 5").collect()
+    TestHive.sql("drop table tb")
+  }
+  
+  test("Spark-4077: timestamp query for null value") {
+    TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null")
+    TestHive.sql(
+      """
+        CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT)
+        ROW FORMAT DELIMITED
+        FIELDS TERMINATED BY ','
+        LINES TERMINATED BY '\n'
+      """.stripMargin)
+    val location = 
+      Utils.getSparkClassLoader.getResource("data/files/issue-4077-data.txt").getFile()
+     
+    TestHive.sql(s"LOAD DATA LOCAL INPATH '$location' INTO TABLE timestamp_query_null")
+    assert(TestHive.sql("SELECT time from timestamp_query_null limit 2").collect() 
+      === Array(Row(java.sql.Timestamp.valueOf("2014-12-11 00:00:00")),Row(null)))
+    TestHive.sql("DROP TABLE timestamp_query_null")
+  }
+  
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
index 7436de264a1e1..48fffe53cf2ff 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
@@ -33,9 +33,15 @@ class HiveTypeCoercionSuite extends HiveComparisonTest {
     }
   }
 
+  val nullVal = "null"
+  baseTypes.init.foreach { i =>
+    createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $i else $nullVal end FROM src limit 1")
+    createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $i end FROM src limit 1")
+  }
+
   test("[SPARK-2210] boolean cast on boolean value should be removed") {
     val q = "select cast(cast(key=0 as boolean) as boolean) from src"
-    val project = TestHive.hql(q).queryExecution.executedPlan.collect { case e: Project => e }.head
+    val project = TestHive.sql(q).queryExecution.executedPlan.collect { case e: Project => e }.head
 
     // No cast expression introduced
     project.transformAllExpressions { case c: Cast =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala
index f944d010660eb..872f28d514efe 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala
@@ -17,47 +17,153 @@
 
 package org.apache.spark.sql.hive.execution
 
-import org.apache.spark.sql.hive.test.TestHive
-import org.apache.hadoop.conf.Configuration
-import org.apache.spark.SparkContext._
+import java.io.{DataInput, DataOutput}
 import java.util
-import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.hive.serde2.{SerDeStats, AbstractSerDe}
-import org.apache.hadoop.io.{NullWritable, Writable}
-import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspectorFactory, ObjectInspector}
 import java.util.Properties
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
-import scala.collection.JavaConversions._
-import java.io.{DataOutput, DataInput}
+
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
+import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorFactory}
+import org.apache.hadoop.hive.serde2.{AbstractSerDe, SerDeStats}
+import org.apache.hadoop.io.Writable
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.hive.test.TestHive
+
+import org.apache.spark.util.Utils
+
+import scala.collection.JavaConversions._
+
+case class Fields(f1: Int, f2: Int, f3: Int, f4: Int, f5: Int)
+
+// Case classes for the custom UDF's.
+case class IntegerCaseClass(i: Int)
+case class ListListIntCaseClass(lli: Seq[(Int, Int, Int)])
+case class StringCaseClass(s: String)
+case class ListStringCaseClass(l: Seq[String])
 
 /**
  * A test suite for Hive custom UDFs.
  */
-class HiveUdfSuite extends HiveComparisonTest {
+class HiveUdfSuite extends QueryTest {
+  import TestHive._
+
+  test("spark sql udf test that returns a struct") {
+    registerFunction("getStruct", (_: Int) => Fields(1, 2, 3, 4, 5))
+    assert(sql(
+      """
+        |SELECT getStruct(1).f1,
+        |       getStruct(1).f2,
+        |       getStruct(1).f3,
+        |       getStruct(1).f4,
+        |       getStruct(1).f5 FROM src LIMIT 1
+      """.stripMargin).first() === Row(1, 2, 3, 4, 5))
+  }
 
-  TestHive.hql(
-    """
+  test("hive struct udf") {
+    sql(
+      """
       |CREATE EXTERNAL TABLE hiveUdfTestTable (
       |   pair STRUCT<id: INT, value: INT>
       |)
       |PARTITIONED BY (partition STRING)
       |ROW FORMAT SERDE '%s'
       |STORED AS SEQUENCEFILE
-    """.stripMargin.format(classOf[PairSerDe].getName)
-  )
+    """.
+        stripMargin.format(classOf[PairSerDe].getName))
+
+    val location = Utils.getSparkClassLoader.getResource("data/files/testUdf").getFile
+    sql(s"""
+      ALTER TABLE hiveUdfTestTable
+      ADD IF NOT EXISTS PARTITION(partition='testUdf')
+      LOCATION '$location'""")
+
+    sql(s"CREATE TEMPORARY FUNCTION testUdf AS '${classOf[PairUdf].getName}'")
+    sql("SELECT testUdf(pair) FROM hiveUdfTestTable")
+    sql("DROP TEMPORARY FUNCTION IF EXISTS testUdf")
+  }
 
-  TestHive.hql(
-    "ALTER TABLE hiveUdfTestTable ADD IF NOT EXISTS PARTITION(partition='testUdf') LOCATION '%s'"
-      .format(this.getClass.getClassLoader.getResource("data/files/testUdf").getFile)
-  )
+  test("SPARK-2693 udaf aggregates test") {
+    checkAnswer(sql("SELECT percentile(key,1) FROM src LIMIT 1"),
+      sql("SELECT max(key) FROM src").collect().toSeq)
+  }
+
+  test("UDFIntegerToString") {
+    val testData = TestHive.sparkContext.parallelize(
+      IntegerCaseClass(1) :: IntegerCaseClass(2) :: Nil)
+    testData.registerTempTable("integerTable")
+
+    sql(s"CREATE TEMPORARY FUNCTION testUDFIntegerToString AS '${classOf[UDFIntegerToString].getName}'")
+    checkAnswer(
+      sql("SELECT testUDFIntegerToString(i) FROM integerTable"), //.collect(),
+      Seq(Seq("1"), Seq("2")))
+    sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFIntegerToString")
+
+    TestHive.reset()
+  }
+
+  test("UDFListListInt") {
+    val testData = TestHive.sparkContext.parallelize(
+      ListListIntCaseClass(Nil) ::
+      ListListIntCaseClass(Seq((1, 2, 3))) ::
+      ListListIntCaseClass(Seq((4, 5, 6), (7, 8, 9))) :: Nil)
+    testData.registerTempTable("listListIntTable")
+
+    sql(s"CREATE TEMPORARY FUNCTION testUDFListListInt AS '${classOf[UDFListListInt].getName}'")
+    checkAnswer(
+      sql("SELECT testUDFListListInt(lli) FROM listListIntTable"), //.collect(),
+      Seq(Seq(0), Seq(2), Seq(13)))
+    sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFListListInt")
+
+    TestHive.reset()
+  }
+
+  test("UDFListString") {
+    val testData = TestHive.sparkContext.parallelize(
+      ListStringCaseClass(Seq("a", "b", "c")) ::
+      ListStringCaseClass(Seq("d", "e")) :: Nil)
+    testData.registerTempTable("listStringTable")
+
+    sql(s"CREATE TEMPORARY FUNCTION testUDFListString AS '${classOf[UDFListString].getName}'")
+    checkAnswer(
+      sql("SELECT testUDFListString(l) FROM listStringTable"), //.collect(),
+      Seq(Seq("a,b,c"), Seq("d,e")))
+    sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFListString")
+
+    TestHive.reset()
+  }
+
+  test("UDFStringString") {
+    val testData = TestHive.sparkContext.parallelize(
+      StringCaseClass("world") :: StringCaseClass("goodbye") :: Nil)
+    testData.registerTempTable("stringTable")
 
-  TestHive.hql("CREATE TEMPORARY FUNCTION testUdf AS '%s'".format(classOf[PairUdf].getName))
+    sql(s"CREATE TEMPORARY FUNCTION testStringStringUdf AS '${classOf[UDFStringString].getName}'")
+    checkAnswer(
+      sql("SELECT testStringStringUdf(\"hello\", s) FROM stringTable"), //.collect(),
+      Seq(Seq("hello world"), Seq("hello goodbye")))
+    sql("DROP TEMPORARY FUNCTION IF EXISTS testStringStringUdf")
 
-  TestHive.hql("SELECT testUdf(pair) FROM hiveUdfTestTable")
+    TestHive.reset()
+  }
 
-  TestHive.hql("DROP TEMPORARY FUNCTION IF EXISTS testUdf")
+  test("UDFTwoListList") {
+    val testData = TestHive.sparkContext.parallelize(
+      ListListIntCaseClass(Nil) ::
+      ListListIntCaseClass(Seq((1, 2, 3))) ::
+      ListListIntCaseClass(Seq((4, 5, 6), (7, 8, 9))) ::
+      Nil)
+    testData.registerTempTable("TwoListTable")
+
+    sql(s"CREATE TEMPORARY FUNCTION testUDFTwoListList AS '${classOf[UDFTwoListList].getName}'")
+    checkAnswer(
+      sql("SELECT testUDFTwoListList(lli, lli) FROM TwoListTable"), //.collect(),
+      Seq(Seq("0, 0"), Seq("2, 2"), Seq("13, 13")))
+    sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFTwoListList")
+
+    TestHive.reset()
+  }
 }
 
 class TestPair(x: Int, y: Int) extends Writable with Serializable {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
index 34d8a061ccc83..8474d850c9c6c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive.execution
 
+import org.scalatest.BeforeAndAfter
+
 import org.apache.spark.sql.hive.test.TestHive
 
 /* Implicit conversions */
@@ -25,9 +27,8 @@ import scala.collection.JavaConversions._
 /**
  * A set of test cases that validate partition and column pruning.
  */
-class PruningSuite extends HiveComparisonTest {
-  // MINOR HACK: You must run a query before calling reset the first time.
-  TestHive.hql("SHOW TABLES")
+class PruningSuite extends HiveComparisonTest with BeforeAndAfter {
+  TestHive.cacheTables = false
 
   // Column/partition pruning is not implemented for `InMemoryColumnarTableScan` yet, need to reset
   // the environment to ensure all referenced tables in this suites are not cached in-memory.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
new file mode 100644
index 0000000000000..e9b1943ff8db7
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.spark.sql.QueryTest
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.hive.test.TestHive._
+
+case class Nested1(f1: Nested2)
+case class Nested2(f2: Nested3)
+case class Nested3(f3: Int)
+
+/**
+ * A collection of hive query tests where we generate the answers ourselves instead of depending on
+ * Hive to generate them (in contrast to HiveQuerySuite).  Often this is because the query is
+ * valid, but Hive currently cannot execute it.
+ */
+class SQLQuerySuite extends QueryTest {
+  test("CTAS with serde") {
+    sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value").collect
+    sql(
+      """CREATE TABLE ctas2
+        | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
+        | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2")
+        | STORED AS RCFile
+        | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22")
+        | AS
+        |   SELECT key, value
+        |   FROM src
+        |   ORDER BY key, value""".stripMargin).collect
+    sql(
+      """CREATE TABLE ctas3
+        | ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\012'
+        | STORED AS textfile AS
+        |   SELECT key, value
+        |   FROM src
+        |   ORDER BY key, value""".stripMargin).collect
+
+    // the table schema may like (key: integer, value: string)
+    sql(
+      """CREATE TABLE IF NOT EXISTS ctas4 AS
+        | SELECT 1 AS key, value FROM src LIMIT 1""".stripMargin).collect
+    // do nothing cause the table ctas4 already existed.
+    sql(
+      """CREATE TABLE IF NOT EXISTS ctas4 AS
+        | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect
+
+    checkAnswer(
+      sql("SELECT k, value FROM ctas1 ORDER BY k, value"),
+      sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq)
+    checkAnswer(
+      sql("SELECT key, value FROM ctas2 ORDER BY key, value"),
+      sql(
+        """
+          SELECT key, value
+          FROM src
+          ORDER BY key, value""").collect().toSeq)
+    checkAnswer(
+      sql("SELECT key, value FROM ctas3 ORDER BY key, value"),
+      sql(
+        """
+          SELECT key, value
+          FROM src
+          ORDER BY key, value""").collect().toSeq)
+    intercept[org.apache.hadoop.hive.metastore.api.AlreadyExistsException] {
+      sql(
+        """CREATE TABLE ctas4 AS
+          | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect
+    }
+    checkAnswer(
+      sql("SELECT key, value FROM ctas4 ORDER BY key, value"),
+      sql("SELECT key, value FROM ctas4 LIMIT 1").collect().toSeq)
+
+    checkExistence(sql("DESC EXTENDED ctas2"), true,
+      "name:key", "type:string", "name:value", "ctas2",
+      "org.apache.hadoop.hive.ql.io.RCFileInputFormat",
+      "org.apache.hadoop.hive.ql.io.RCFileOutputFormat",
+      "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe",
+      "serde_p1=p1", "serde_p2=p2", "tbl_p1=p11", "tbl_p2=p22","MANAGED_TABLE"
+    )
+  }
+
+  test("ordering not in select") {
+    checkAnswer(
+      sql("SELECT key FROM src ORDER BY value"),
+      sql("SELECT key FROM (SELECT key, value FROM src ORDER BY value) a").collect().toSeq)
+  }
+
+  test("ordering not in agg") {
+    checkAnswer(
+      sql("SELECT key FROM src GROUP BY key, value ORDER BY value"),
+      sql("""
+        SELECT key
+        FROM (
+          SELECT key, value
+          FROM src
+          GROUP BY key, value
+          ORDER BY value) a""").collect().toSeq)
+  }
+
+  test("double nested data") {
+    sparkContext.parallelize(Nested1(Nested2(Nested3(1))) :: Nil).registerTempTable("nested")
+    checkAnswer(
+      sql("SELECT f1.f2.f3 FROM nested"),
+      1)
+  }
+
+  test("test CTAS") {
+    checkAnswer(sql("CREATE TABLE test_ctas_123 AS SELECT key, value FROM src"), Seq.empty[Row])
+    checkAnswer(
+      sql("SELECT key, value FROM test_ctas_123 ORDER BY key"), 
+      sql("SELECT key, value FROM src ORDER BY key").collect().toSeq)
+  }
+
+  test("SPARK-3708 Backticks aren't handled correctly is aliases") {
+    checkAnswer(
+      sql("SELECT k FROM (SELECT `key` AS `k` FROM src) a"),
+      sql("SELECT `key` FROM src").collect().toSeq)
+  }
+
+  test("SPARK-3834 Backticks not correctly handled in subquery aliases") {
+    checkAnswer(
+      sql("SELECT a.key FROM (SELECT key FROM src) `a`"),
+      sql("SELECT `key` FROM src").collect().toSeq)
+  }
+
+  test("SPARK-3814 Support Bitwise & operator") {
+    checkAnswer(
+      sql("SELECT case when 1&1=1 then 1 else 0 end FROM src"),
+      sql("SELECT 1 FROM src").collect().toSeq)
+  }
+
+  test("SPARK-3814 Support Bitwise | operator") {
+    checkAnswer(
+      sql("SELECT case when 1|0=1 then 1 else 0 end FROM src"),
+      sql("SELECT 1 FROM src").collect().toSeq)
+  }
+
+  test("SPARK-3814 Support Bitwise ^ operator") {
+    checkAnswer(
+      sql("SELECT case when 1^0=1 then 1 else 0 end FROM src"),
+      sql("SELECT 1 FROM src").collect().toSeq)
+  }
+
+  test("SPARK-3814 Support Bitwise ~ operator") {
+    checkAnswer(
+      sql("SELECT case when ~1=-2 then 1 else 0 end FROM src"),
+      sql("SELECT 1 FROM src").collect().toSeq)
+  }
+  
+ test("SPARK-4154 Query does not work if it has 'not between' in Spark SQL and HQL") {
+    checkAnswer(sql("SELECT key FROM src WHERE key not between 0 and 10 order by key"), 
+        sql("SELECT key FROM src WHERE key between 11 and 500 order by key").collect().toSeq)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
index 91ad59d7f82c0..6f57fe8958387 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.parquet
 
+import java.io.File
+
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
 
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Row}
@@ -27,6 +29,8 @@ import org.apache.spark.util.Utils
 // Implicits
 import org.apache.spark.sql.hive.test.TestHive._
 
+case class Cases(lower: String, UPPER: String)
+
 class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAfterEach {
 
   val dirname = Utils.createTempDir()
@@ -35,9 +39,9 @@ class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAft
 
   override def beforeAll() {
     // write test data
-    ParquetTestData.writeFile
+    ParquetTestData.writeFile()
     testRDD = parquetFile(ParquetTestData.testDir.toString)
-    testRDD.registerAsTable("testsource")
+    testRDD.registerTempTable("testsource")
   }
 
   override def afterAll() {
@@ -55,35 +59,49 @@ class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAft
     Utils.deleteRecursively(dirname)
   }
 
+  test("Case insensitive attribute names") {
+    val tempFile = File.createTempFile("parquet", "")
+    tempFile.delete()
+    sparkContext.parallelize(1 to 10)
+      .map(_.toString)
+      .map(i => Cases(i, i))
+      .saveAsParquetFile(tempFile.getCanonicalPath)
+
+    parquetFile(tempFile.getCanonicalPath).registerTempTable("cases")
+    sql("SELECT upper FROM cases").collect().map(_.getString(0)) === (1 to 10).map(_.toString)
+    sql("SELECT LOWER FROM cases").collect().map(_.getString(0)) === (1 to 10).map(_.toString)
+  }
+
   test("SELECT on Parquet table") {
-    val rdd = hql("SELECT * FROM testsource").collect()
+    val rdd = sql("SELECT * FROM testsource").collect()
     assert(rdd != null)
     assert(rdd.forall(_.size == 6))
   }
 
   test("Simple column projection + filter on Parquet table") {
-    val rdd = hql("SELECT myboolean, mylong FROM testsource WHERE myboolean=true").collect()
+    val rdd = sql("SELECT myboolean, mylong FROM testsource WHERE myboolean=true").collect()
     assert(rdd.size === 5, "Filter returned incorrect number of rows")
     assert(rdd.forall(_.getBoolean(0)), "Filter returned incorrect Boolean field value")
   }
 
   test("Converting Hive to Parquet Table via saveAsParquetFile") {
-    hql("SELECT * FROM src").saveAsParquetFile(dirname.getAbsolutePath)
-    parquetFile(dirname.getAbsolutePath).registerAsTable("ptable")
-    val rddOne = hql("SELECT * FROM src").collect().sortBy(_.getInt(0))
-    val rddTwo = hql("SELECT * from ptable").collect().sortBy(_.getInt(0))
+    sql("SELECT * FROM src").saveAsParquetFile(dirname.getAbsolutePath)
+    parquetFile(dirname.getAbsolutePath).registerTempTable("ptable")
+    val rddOne = sql("SELECT * FROM src").collect().sortBy(_.getInt(0))
+    val rddTwo = sql("SELECT * from ptable").collect().sortBy(_.getInt(0))
+
     compareRDDs(rddOne, rddTwo, "src (Hive)", Seq("key:Int", "value:String"))
   }
 
   test("INSERT OVERWRITE TABLE Parquet table") {
-    hql("SELECT * FROM testsource").saveAsParquetFile(dirname.getAbsolutePath)
-    parquetFile(dirname.getAbsolutePath).registerAsTable("ptable")
+    sql("SELECT * FROM testsource").saveAsParquetFile(dirname.getAbsolutePath)
+    parquetFile(dirname.getAbsolutePath).registerTempTable("ptable")
     // let's do three overwrites for good measure
-    hql("INSERT OVERWRITE TABLE ptable SELECT * FROM testsource").collect()
-    hql("INSERT OVERWRITE TABLE ptable SELECT * FROM testsource").collect()
-    hql("INSERT OVERWRITE TABLE ptable SELECT * FROM testsource").collect()
-    val rddCopy = hql("SELECT * FROM ptable").collect()
-    val rddOrig = hql("SELECT * FROM testsource").collect()
+    sql("INSERT OVERWRITE TABLE ptable SELECT * FROM testsource").collect()
+    sql("INSERT OVERWRITE TABLE ptable SELECT * FROM testsource").collect()
+    sql("INSERT OVERWRITE TABLE ptable SELECT * FROM testsource").collect()
+    val rddCopy = sql("SELECT * FROM ptable").collect()
+    val rddOrig = sql("SELECT * FROM testsource").collect()
     assert(rddCopy.size === rddOrig.size, "INSERT OVERWRITE changed size of table??")
     compareRDDs(rddOrig, rddCopy, "testsource", ParquetTestData.testSchemaFieldNames)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
new file mode 100644
index 0000000000000..86adbbf3ad2d8
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
@@ -0,0 +1,173 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parquet
+
+import java.io.File
+
+import org.apache.spark.sql.catalyst.expressions.Row
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.hive.execution.HiveTableScan
+import org.apache.spark.sql.hive.test.TestHive._
+
+case class ParquetData(intField: Int, stringField: String)
+
+/**
+ * Tests for our SerDe -> Native parquet scan conversion.
+ */
+class ParquetMetastoreSuite extends QueryTest with BeforeAndAfterAll {
+  override def beforeAll(): Unit = {
+    val partitionedTableDir = File.createTempFile("parquettests", "sparksql")
+    partitionedTableDir.delete()
+    partitionedTableDir.mkdir()
+
+    (1 to 10).foreach { p =>
+      val partDir = new File(partitionedTableDir, s"p=$p")
+      sparkContext.makeRDD(1 to 10)
+        .map(i => ParquetData(i, s"part-$p"))
+        .saveAsParquetFile(partDir.getCanonicalPath)
+    }
+
+    sql(s"""
+    create external table partitioned_parquet
+    (
+      intField INT,
+      stringField STRING
+    )
+    PARTITIONED BY (p int)
+    ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+     STORED AS
+     INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+     OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+    location '${partitionedTableDir.getCanonicalPath}'
+    """)
+
+    sql(s"""
+    create external table normal_parquet
+    (
+      intField INT,
+      stringField STRING
+    )
+    ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+     STORED AS
+     INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+     OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+    location '${new File(partitionedTableDir, "p=1").getCanonicalPath}'
+    """)
+
+    (1 to 10).foreach { p =>
+      sql(s"ALTER TABLE partitioned_parquet ADD PARTITION (p=$p)")
+    }
+
+    setConf("spark.sql.hive.convertMetastoreParquet", "true")
+  }
+
+  override def afterAll(): Unit = {
+    setConf("spark.sql.hive.convertMetastoreParquet", "false")
+  }
+
+  test("project the partitioning column") {
+    checkAnswer(
+      sql("SELECT p, count(*) FROM partitioned_parquet group by p"),
+      (1, 10) ::
+      (2, 10) ::
+      (3, 10) ::
+      (4, 10) ::
+      (5, 10) ::
+      (6, 10) ::
+      (7, 10) ::
+      (8, 10) ::
+      (9, 10) ::
+      (10, 10) :: Nil
+    )
+  }
+
+  test("project partitioning and non-partitioning columns") {
+    checkAnswer(
+      sql("SELECT stringField, p, count(intField) " +
+        "FROM partitioned_parquet GROUP BY p, stringField"),
+      ("part-1", 1, 10) ::
+      ("part-2", 2, 10) ::
+      ("part-3", 3, 10) ::
+      ("part-4", 4, 10) ::
+      ("part-5", 5, 10) ::
+      ("part-6", 6, 10) ::
+      ("part-7", 7, 10) ::
+      ("part-8", 8, 10) ::
+      ("part-9", 9, 10) ::
+      ("part-10", 10, 10) :: Nil
+    )
+  }
+
+  test("simple count") {
+    checkAnswer(
+      sql("SELECT COUNT(*) FROM partitioned_parquet"),
+      100)
+  }
+
+  test("pruned count") {
+    checkAnswer(
+      sql("SELECT COUNT(*) FROM partitioned_parquet WHERE p = 1"),
+      10)
+  }
+
+  test("multi-partition pruned count") {
+    checkAnswer(
+      sql("SELECT COUNT(*) FROM partitioned_parquet WHERE p IN (1,2,3)"),
+      30)
+  }
+
+  test("non-partition predicates") {
+    checkAnswer(
+      sql("SELECT COUNT(*) FROM partitioned_parquet WHERE intField IN (1,2,3)"),
+      30)
+  }
+
+  test("sum") {
+    checkAnswer(
+      sql("SELECT SUM(intField) FROM partitioned_parquet WHERE intField IN (1,2,3) AND p = 1"),
+      1 + 2 + 3)
+  }
+
+  test("hive udfs") {
+    checkAnswer(
+      sql("SELECT concat(stringField, stringField) FROM partitioned_parquet"),
+      sql("SELECT stringField FROM partitioned_parquet").map {
+        case Row(s: String) => Row(s + s)
+      }.collect().toSeq)
+  }
+
+  test("non-part select(*)") {
+    checkAnswer(
+      sql("SELECT COUNT(*) FROM normal_parquet"),
+      10)
+  }
+
+  test("conversion is working") {
+    assert(
+      sql("SELECT * FROM normal_parquet").queryExecution.executedPlan.collect {
+        case _: HiveTableScan => true
+      }.isEmpty)
+    assert(
+      sql("SELECT * FROM normal_parquet").queryExecution.executedPlan.collect {
+        case _: ParquetTableScan => true
+      }.nonEmpty)
+  }
+}
diff --git a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
new file mode 100644
index 0000000000000..8ba25f889d176
--- /dev/null
+++ b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.net.URI
+import java.util.{ArrayList => JArrayList}
+import java.util.Properties
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.common.`type`.HiveDecimal
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.Context
+import org.apache.hadoop.hive.ql.metadata.{Hive, Partition, Table}
+import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc}
+import org.apache.hadoop.hive.ql.processors._
+import org.apache.hadoop.hive.ql.stats.StatsSetupConst
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.{HiveDecimalObjectInspector, PrimitiveObjectInspectorFactory}
+import org.apache.hadoop.hive.serde2.objectinspector.{PrimitiveObjectInspector, ObjectInspector}
+import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfo, TypeInfoFactory}
+import org.apache.hadoop.hive.serde2.{Deserializer, ColumnProjectionUtils}
+import org.apache.hadoop.hive.serde2.{io => hiveIo}
+import org.apache.hadoop.{io => hadoopIo}
+import org.apache.hadoop.mapred.InputFormat
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+import scala.collection.JavaConversions._
+import scala.language.implicitConversions
+
+import org.apache.spark.sql.catalyst.types.DecimalType
+
+/**
+ * A compatibility layer for interacting with Hive version 0.12.0.
+ */
+private[hive] object HiveShim {
+  val version = "0.12.0"
+
+  def getTableDesc(
+    serdeClass: Class[_ <: Deserializer],
+    inputFormatClass: Class[_ <: InputFormat[_, _]],
+    outputFormatClass: Class[_],
+    properties: Properties) = {
+    new TableDesc(serdeClass, inputFormatClass, outputFormatClass, properties)
+  }
+
+  def getStringWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.STRING,
+      if (value == null) null else new hadoopIo.Text(value.asInstanceOf[String]))
+
+  def getIntWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.INT,
+      if (value == null) null else new hadoopIo.IntWritable(value.asInstanceOf[Int]))
+
+  def getDoubleWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.DOUBLE,
+      if (value == null) null else new hiveIo.DoubleWritable(value.asInstanceOf[Double]))
+
+  def getBooleanWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.BOOLEAN,
+      if (value == null) null else new hadoopIo.BooleanWritable(value.asInstanceOf[Boolean]))
+
+  def getLongWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.LONG,
+      if (value == null) null else new hadoopIo.LongWritable(value.asInstanceOf[Long]))
+
+  def getFloatWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.FLOAT,
+      if (value == null) null else new hadoopIo.FloatWritable(value.asInstanceOf[Float]))
+
+  def getShortWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.SHORT,
+      if (value == null) null else new hiveIo.ShortWritable(value.asInstanceOf[Short]))
+
+  def getByteWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.BYTE,
+      if (value == null) null else new hiveIo.ByteWritable(value.asInstanceOf[Byte]))
+
+  def getBinaryWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.BINARY,
+      if (value == null) null else new hadoopIo.BytesWritable(value.asInstanceOf[Array[Byte]]))
+
+  def getDateWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.DATE,
+      if (value == null) null else new hiveIo.DateWritable(value.asInstanceOf[java.sql.Date]))
+
+  def getTimestampWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.TIMESTAMP,
+      if (value == null) {
+        null
+      } else {
+        new hiveIo.TimestampWritable(value.asInstanceOf[java.sql.Timestamp])
+      })
+
+  def getDecimalWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.DECIMAL,
+      if (value == null) {
+        null
+      } else {
+        new hiveIo.HiveDecimalWritable(
+          HiveShim.createDecimal(value.asInstanceOf[Decimal].toBigDecimal.underlying()))
+      })
+
+  def getPrimitiveNullWritableConstantObjectInspector: ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.VOID, null)
+
+  def createDriverResultsArray = new JArrayList[String]
+
+  def processResults(results: JArrayList[String]) = results
+
+  def getStatsSetupConstTotalSize = StatsSetupConst.TOTAL_SIZE
+
+  def createDefaultDBIfNeeded(context: HiveContext) = {  }
+
+  def getCommandProcessor(cmd: Array[String], conf: HiveConf) = {
+    CommandProcessorFactory.get(cmd(0), conf)
+  }
+
+  def createDecimal(bd: java.math.BigDecimal): HiveDecimal = {
+    new HiveDecimal(bd)
+  }
+
+  def appendReadColumns(conf: Configuration, ids: Seq[Integer], names: Seq[String]) {
+    ColumnProjectionUtils.appendReadColumnIDs(conf, ids)
+    ColumnProjectionUtils.appendReadColumnNames(conf, names)
+  }
+
+  def getExternalTmpPath(context: Context, uri: URI) = {
+    context.getExternalTmpFileURI(uri)
+  }
+
+  def getDataLocationPath(p: Partition) = p.getPartitionPath
+
+  def getAllPartitionsOf(client: Hive, tbl: Table) =  client.getAllPartitionsForPruner(tbl)
+
+  def compatibilityBlackList = Seq(
+    "decimal_.*",
+    "drop_partitions_filter2",
+    "show_.*",
+    "serde_regex",
+    "udf_to_date",
+    "udaf_collect_set",
+    "udf_concat"
+  )
+
+  def setLocation(tbl: Table, crtTbl: CreateTableDesc): Unit = {
+    tbl.setDataLocation(new Path(crtTbl.getLocation()).toUri())
+  }
+
+  def decimalMetastoreString(decimalType: DecimalType): String = "decimal"
+
+  def decimalTypeInfo(decimalType: DecimalType): TypeInfo =
+    TypeInfoFactory.decimalTypeInfo
+
+  def decimalTypeInfoToCatalyst(inspector: PrimitiveObjectInspector): DecimalType = {
+    DecimalType.Unlimited
+  }
+
+  def toCatalystDecimal(hdoi: HiveDecimalObjectInspector, data: Any): Decimal = {
+    Decimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue())
+  }
+}
+
+class ShimFileSinkDesc(var dir: String, var tableInfo: TableDesc, var compressed: Boolean)
+  extends FileSinkDesc(dir, tableInfo, compressed) {
+}
diff --git a/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
new file mode 100644
index 0000000000000..e4aee57f0ad9f
--- /dev/null
+++ b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
@@ -0,0 +1,284 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.util.{ArrayList => JArrayList}
+import java.util.Properties
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapred.InputFormat
+import org.apache.hadoop.hive.common.StatsSetupConst
+import org.apache.hadoop.hive.common.`type`.{HiveDecimal}
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.Context
+import org.apache.hadoop.hive.ql.metadata.{Table, Hive, Partition}
+import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc}
+import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory
+import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfo, DecimalTypeInfo, TypeInfoFactory}
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.{HiveDecimalObjectInspector, PrimitiveObjectInspectorFactory}
+import org.apache.hadoop.hive.serde2.objectinspector.{PrimitiveObjectInspector, ObjectInspector}
+import org.apache.hadoop.hive.serde2.{Deserializer, ColumnProjectionUtils}
+import org.apache.hadoop.hive.serde2.{io => hiveIo}
+import org.apache.hadoop.{io => hadoopIo}
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.types.DecimalType
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+
+import scala.collection.JavaConversions._
+import scala.language.implicitConversions
+
+/**
+ * A compatibility layer for interacting with Hive version 0.13.1.
+ */
+private[hive] object HiveShim {
+  val version = "0.13.1"
+
+  def getTableDesc(
+    serdeClass: Class[_ <: Deserializer],
+    inputFormatClass: Class[_ <: InputFormat[_, _]],
+    outputFormatClass: Class[_],
+    properties: Properties) = {
+    new TableDesc(inputFormatClass, outputFormatClass, properties)
+  }
+
+  def getStringWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.stringTypeInfo,
+      if (value == null) null else new hadoopIo.Text(value.asInstanceOf[String]))
+
+  def getIntWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.intTypeInfo,
+      if (value == null) null else new hadoopIo.IntWritable(value.asInstanceOf[Int]))
+
+  def getDoubleWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.doubleTypeInfo, if (value == null) {
+        null
+      } else {
+        new hiveIo.DoubleWritable(value.asInstanceOf[Double])
+      })
+
+  def getBooleanWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.booleanTypeInfo, if (value == null) {
+        null
+      } else {
+        new hadoopIo.BooleanWritable(value.asInstanceOf[Boolean])
+      })
+
+  def getLongWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.longTypeInfo,
+      if (value == null) null else new hadoopIo.LongWritable(value.asInstanceOf[Long]))
+
+  def getFloatWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.floatTypeInfo, if (value == null) {
+        null
+      } else {
+        new hadoopIo.FloatWritable(value.asInstanceOf[Float])
+      })
+
+  def getShortWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.shortTypeInfo,
+      if (value == null) null else new hiveIo.ShortWritable(value.asInstanceOf[Short]))
+
+  def getByteWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.byteTypeInfo,
+      if (value == null) null else new hiveIo.ByteWritable(value.asInstanceOf[Byte]))
+
+  def getBinaryWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.binaryTypeInfo, if (value == null) {
+        null
+      } else {
+        new hadoopIo.BytesWritable(value.asInstanceOf[Array[Byte]])
+      })
+
+  def getDateWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.dateTypeInfo,
+      if (value == null) null else new hiveIo.DateWritable(value.asInstanceOf[java.sql.Date]))
+
+  def getTimestampWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.timestampTypeInfo, if (value == null) {
+        null
+      } else {
+        new hiveIo.TimestampWritable(value.asInstanceOf[java.sql.Timestamp])
+      })
+
+  def getDecimalWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.decimalTypeInfo,
+      if (value == null) {
+        null
+      } else {
+        // TODO precise, scale?
+        new hiveIo.HiveDecimalWritable(
+          HiveShim.createDecimal(value.asInstanceOf[Decimal].toBigDecimal.underlying()))
+      })
+
+  def getPrimitiveNullWritableConstantObjectInspector: ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.voidTypeInfo, null)
+
+  def createDriverResultsArray = new JArrayList[Object]
+
+  def processResults(results: JArrayList[Object]) = {
+    results.map { r =>
+      r match {
+        case s: String => s
+        case a: Array[Object] => a(0).asInstanceOf[String]
+      }
+    }
+  }
+
+  def getStatsSetupConstTotalSize = StatsSetupConst.TOTAL_SIZE
+
+  def createDefaultDBIfNeeded(context: HiveContext) = {
+    context.runSqlHive("CREATE DATABASE default")
+    context.runSqlHive("USE default")
+  }
+
+  def getCommandProcessor(cmd: Array[String], conf: HiveConf) = {
+    CommandProcessorFactory.get(cmd, conf)
+  }
+
+  def createDecimal(bd: java.math.BigDecimal): HiveDecimal = {
+    HiveDecimal.create(bd)
+  }
+
+  /*
+   * This function in hive-0.13 become private, but we have to do this to walkaround hive bug
+   */
+  private def appendReadColumnNames(conf: Configuration, cols: Seq[String]) {
+    val old: String = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "")
+    val result: StringBuilder = new StringBuilder(old)
+    var first: Boolean = old.isEmpty
+
+    for (col <- cols) {
+      if (first) {
+        first = false
+      } else {
+        result.append(',')
+      }
+      result.append(col)
+    }
+    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, result.toString)
+  }
+
+  /*
+   * Cannot use ColumnProjectionUtils.appendReadColumns directly, if ids is null or empty
+   */
+  def appendReadColumns(conf: Configuration, ids: Seq[Integer], names: Seq[String]) {
+    if (ids != null && ids.size > 0) {
+      ColumnProjectionUtils.appendReadColumns(conf, ids)
+    }
+    if (names != null && names.size > 0) {
+      appendReadColumnNames(conf, names)
+    }
+  }
+
+  def getExternalTmpPath(context: Context, path: Path) = {
+    context.getExternalTmpPath(path.toUri)
+  }
+
+  def getDataLocationPath(p: Partition) = p.getDataLocation
+
+  def getAllPartitionsOf(client: Hive, tbl: Table) =  client.getAllPartitionsOf(tbl)
+
+  def compatibilityBlackList = Seq()
+
+  def setLocation(tbl: Table, crtTbl: CreateTableDesc): Unit = {
+    tbl.setDataLocation(new Path(crtTbl.getLocation()))
+  }
+
+  /*
+   * Bug introdiced in hive-0.13. FileSinkDesc is serializable, but its member path is not.
+   * Fix it through wrapper.
+   * */
+  implicit def wrapperToFileSinkDesc(w: ShimFileSinkDesc): FileSinkDesc = {
+    var f = new FileSinkDesc(new Path(w.dir), w.tableInfo, w.compressed)
+    f.setCompressCodec(w.compressCodec)
+    f.setCompressType(w.compressType)
+    f.setTableInfo(w.tableInfo)
+    f.setDestTableId(w.destTableId)
+    f
+  }
+
+  // Precision and scale to pass for unlimited decimals; these are the same as the precision and
+  // scale Hive 0.13 infers for BigDecimals from sources that don't specify them (e.g. UDFs)
+  private val UNLIMITED_DECIMAL_PRECISION = 38
+  private val UNLIMITED_DECIMAL_SCALE = 18
+
+  def decimalMetastoreString(decimalType: DecimalType): String = decimalType match {
+    case DecimalType.Fixed(precision, scale) => s"decimal($precision,$scale)"
+    case _ => s"decimal($UNLIMITED_DECIMAL_PRECISION,$UNLIMITED_DECIMAL_SCALE)"
+  }
+
+  def decimalTypeInfo(decimalType: DecimalType): TypeInfo = decimalType match {
+    case DecimalType.Fixed(precision, scale) => new DecimalTypeInfo(precision, scale)
+    case _ => new DecimalTypeInfo(UNLIMITED_DECIMAL_PRECISION, UNLIMITED_DECIMAL_SCALE)
+  }
+
+  def decimalTypeInfoToCatalyst(inspector: PrimitiveObjectInspector): DecimalType = {
+    val info = inspector.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+    DecimalType(info.precision(), info.scale())
+  }
+
+  def toCatalystDecimal(hdoi: HiveDecimalObjectInspector, data: Any): Decimal = {
+    Decimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue(), hdoi.precision(), hdoi.scale())
+  }
+}
+
+/*
+ * Bug introdiced in hive-0.13. FileSinkDesc is serilizable, but its member path is not.
+ * Fix it through wrapper.
+ */
+class ShimFileSinkDesc(var dir: String, var tableInfo: TableDesc, var compressed: Boolean)
+  extends Serializable with Logging {
+  var compressCodec: String = _
+  var compressType: String = _
+  var destTableId: Int = _
+
+  def setCompressed(compressed: Boolean) {
+    this.compressed = compressed
+  }
+
+  def getDirName = dir
+
+  def setDestTableId(destTableId: Int) {
+    this.destTableId = destTableId
+  }
+
+  def setTableInfo(tableInfo: TableDesc) {
+    this.tableInfo = tableInfo
+  }
+
+  def setCompressCodec(intermediateCompressorCodec: String) {
+    compressCodec = intermediateCompressorCodec
+  }
+
+  def setCompressType(intermediateCompressType: String) {
+    compressType = intermediateCompressType
+  }
+}
diff --git a/streaming/pom.xml b/streaming/pom.xml
index f506d6ce34a6f..12f900c91eb98 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,12 +21,15 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-streaming_2.10</artifactId>
+  <properties>
+    <sbt.project.name>streaming</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project Streaming</name>
   <url>http://spark.apache.org/</url>
@@ -55,6 +58,11 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
@@ -73,11 +81,11 @@
       <!-- 
            This plugin forces the generation of jar containing streaming test classes, 
            so that the tests classes of external modules can use them. The two execution profiles
-           are necessary - first one for 'mvn package', second one for 'mvn compile'. Ideally, 
+           are necessary - first one for 'mvn package', second one for 'mvn test-compile'. Ideally,
            'mvn compile' should not compile test classes and therefore should not need this. 
            However, an open Maven bug (http://jira.codehaus.org/browse/MNG-3559)
            causes the compilation to fail if streaming test-jar is not generated. Hence, the 
-           second execution profile for 'mvn compile'.
+           second execution profile for 'mvn test-compile'.
       -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -89,8 +97,8 @@
             </goals>
           </execution>
           <execution>
-            <id>test-jar-on-compile</id>
-            <phase>compile</phase>
+            <id>test-jar-on-test-compile</id>
+            <phase>test-compile</phase>
             <goals>
               <goal>test-jar</goal>
             </goals>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index ac56ff709c1c4..b780282bdac37 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -35,7 +35,6 @@ class Checkpoint(@transient ssc: StreamingContext, val checkpointTime: Time)
   extends Logging with Serializable {
   val master = ssc.sc.master
   val framework = ssc.sc.appName
-  val sparkHome = ssc.sc.getSparkHome.getOrElse(null)
   val jars = ssc.sc.jars
   val graph = ssc.graph
   val checkpointDir = ssc.checkpointDir
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index b4adf0e9651a8..e59c24adb84af 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -22,6 +22,7 @@ import java.io.{ObjectInputStream, IOException, ObjectOutputStream}
 import org.apache.spark.Logging
 import org.apache.spark.streaming.scheduler.Job
 import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream, InputDStream}
+import org.apache.spark.util.Utils
 
 final private[streaming] class DStreamGraph extends Serializable with Logging {
 
@@ -160,7 +161,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
   }
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     logDebug("DStreamGraph.writeObject used")
     this.synchronized {
       checkpointInProgress = true
@@ -172,7 +173,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
   }
 
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     logDebug("DStreamGraph.readObject used")
     this.synchronized {
       checkpointInProgress = true
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Duration.scala b/streaming/src/main/scala/org/apache/spark/streaming/Duration.scala
index 6bf275f5afcb2..a0d8fb5ab93ec 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Duration.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Duration.scala
@@ -37,6 +37,25 @@ case class Duration (private val millis: Long) {
 
   def / (that: Duration): Double = millis.toDouble / that.millis.toDouble
 
+  // Java-friendlier versions of the above.
+
+  def less(that: Duration): Boolean = this < that
+
+  def lessEq(that: Duration): Boolean = this <= that
+
+  def greater(that: Duration): Boolean = this > that
+
+  def greaterEq(that: Duration): Boolean = this >= that
+
+  def plus(that: Duration): Duration = this + that
+
+  def minus(that: Duration): Duration = this - that
+
+  def times(times: Int): Duration = this * times
+
+  def div(that: Duration): Double = this / that
+
+
   def isMultipleOf(that: Duration): Boolean =
     (this.millis % that.millis == 0)
 
@@ -80,4 +99,24 @@ object Minutes {
   def apply(minutes: Long) = new Duration(minutes * 60000)
 }
 
+// Java-friendlier versions of the objects above.
+// Named "Durations" instead of "Duration" to avoid changing the case class's implied API.
+
+object Durations {
+
+  /**
+   * @return [[org.apache.spark.streaming.Duration]] representing given number of milliseconds.
+   */
+  def milliseconds(milliseconds: Long) = Milliseconds(milliseconds)
 
+  /**
+   * @return [[org.apache.spark.streaming.Duration]] representing given number of seconds.
+   */
+  def seconds(seconds: Long) = Seconds(seconds)
+
+  /**
+   * @return [[org.apache.spark.streaming.Duration]] representing given number of minutes.
+   */
+  def minutes(minutes: Long) = Minutes(minutes)
+
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index e0677b795cb94..54b219711efb9 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -35,10 +35,9 @@ import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream._
-import org.apache.spark.streaming.receiver.{ActorSupervisorStrategy, ActorReceiver, Receiver}
+import org.apache.spark.streaming.receiver.{ActorReceiver, ActorSupervisorStrategy, Receiver}
 import org.apache.spark.streaming.scheduler._
-import org.apache.spark.streaming.ui.StreamingTab
-import org.apache.spark.util.MetadataCleaner
+import org.apache.spark.streaming.ui.{StreamingJobProgressListener, StreamingTab}
 
 /**
  * Main entry point for Spark Streaming functionality. It provides methods used to create
@@ -48,7 +47,7 @@ import org.apache.spark.util.MetadataCleaner
  * The associated SparkContext can be accessed using `context.sparkContext`. After
  * creating and transforming DStreams, the streaming computation can be started and stopped
  * using `context.start()` and `context.stop()`, respectively.
- * `context.awaitTransformation()` allows the current thread to wait for the termination
+ * `context.awaitTermination()` allows the current thread to wait for the termination
  * of the context by `stop()` or by an exception.
  */
 class StreamingContext private[streaming] (
@@ -98,9 +97,15 @@ class StreamingContext private[streaming] (
    * @param hadoopConf Optional, configuration object if necessary for reading from
    *                   HDFS compatible filesystems
    */
-  def this(path: String, hadoopConf: Configuration = new Configuration) =
+  def this(path: String, hadoopConf: Configuration) =
     this(null, CheckpointReader.read(path, new SparkConf(), hadoopConf).get, null)
 
+  /**
+   * Recreate a StreamingContext from a checkpoint file.
+   * @param path Path to the directory that was specified as the checkpoint directory
+   */
+  def this(path: String) = this(path, new Configuration)
+
   if (sc_ == null && cp_ == null) {
     throw new Exception("Spark Streaming cannot be initialized with " +
       "both SparkContext and checkpoint as null")
@@ -152,7 +157,14 @@ class StreamingContext private[streaming] (
 
   private[streaming] val waiter = new ContextWaiter
 
-  private[streaming] val uiTab = new StreamingTab(this)
+  private[streaming] val progressListener = new StreamingJobProgressListener(this)
+
+  private[streaming] val uiTab: Option[StreamingTab] =
+    if (conf.getBoolean("spark.ui.enabled", true)) {
+      Some(new StreamingTab(this))
+    } else {
+      None
+    }
 
   /** Register streaming source to metrics system */
   private val streamingSource = new StreamingSource(this)
@@ -234,7 +246,7 @@ class StreamingContext private[streaming] (
    * Find more details at: http://spark.apache.org/docs/latest/streaming-custom-receivers.html
    * @param props Props object defining creation of the actor
    * @param name Name of the actor
-   * @param storageLevel RDD storage level. Defaults to memory-only.
+   * @param storageLevel RDD storage level (default: StorageLevel.MEMORY_AND_DISK_SER_2)
    *
    * @note An important point to note:
    *       Since Actor may exist outside the spark framework, It is thus user's responsibility
@@ -424,10 +436,10 @@ class StreamingContext private[streaming] (
 
   /**
    * Start the execution of the streams.
+   *
+   * @throws SparkException if the context has already been started or stopped.
    */
   def start(): Unit = synchronized {
-    // Throw exception if the context has already been started once
-    // or if a stopped context is being started again
     if (state == Started) {
       throw new SparkException("StreamingContext has already been started")
     }
@@ -435,6 +447,7 @@ class StreamingContext private[streaming] (
       throw new SparkException("StreamingContext has already been stopped")
     }
     validate()
+    sparkContext.setCallSite(DStream.getCreationSite())
     scheduler.start()
     state = Started
   }
@@ -459,8 +472,10 @@ class StreamingContext private[streaming] (
   /**
    * Stop the execution of the streams immediately (does not wait for all received data
    * to be processed).
-   * @param stopSparkContext Stop the associated SparkContext or not
    *
+   * @param stopSparkContext if true, stops the associated SparkContext. The underlying SparkContext
+   *                         will be stopped regardless of whether this StreamingContext has been
+   *                         started.
    */
   def stop(stopSparkContext: Boolean = true): Unit = synchronized {
     stop(stopSparkContext, false)
@@ -469,25 +484,27 @@ class StreamingContext private[streaming] (
   /**
    * Stop the execution of the streams, with option of ensuring all received data
    * has been processed.
-   * @param stopSparkContext Stop the associated SparkContext or not
-   * @param stopGracefully Stop gracefully by waiting for the processing of all
+   *
+   * @param stopSparkContext if true, stops the associated SparkContext. The underlying SparkContext
+   *                         will be stopped regardless of whether this StreamingContext has been
+   *                         started.
+   * @param stopGracefully if true, stops gracefully by waiting for the processing of all
    *                       received data to be completed
    */
   def stop(stopSparkContext: Boolean, stopGracefully: Boolean): Unit = synchronized {
-    // Warn (but not fail) if context is stopped twice,
-    // or context is stopped before starting
-    if (state == Initialized) {
-      logWarning("StreamingContext has not been started yet")
-      return
+    state match {
+      case Initialized => logWarning("StreamingContext has not been started yet")
+      case Stopped => logWarning("StreamingContext has already been stopped")
+      case Started =>
+        scheduler.stop(stopGracefully)
+        logInfo("StreamingContext stopped successfully")
+        waiter.notifyStop()
     }
-    if (state == Stopped) {
-      logWarning("StreamingContext has already been stopped")
-      return
-    } // no need to throw an exception as its okay to stop twice
-    scheduler.stop(stopGracefully)
-    logInfo("StreamingContext stopped successfully")
-    waiter.notifyStop()
+    // Even if the streaming context has not been started, we still need to stop the SparkContext.
+    // Even if we have already stopped, we still need to attempt to stop the SparkContext because
+    // a user might stop(stopSparkContext = false) and then call stop(stopSparkContext = true).
     if (stopSparkContext) sc.stop()
+    // The state should always be Stopped after calling `stop()`, even if we haven't started yet:
     state = Stopped
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingSource.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingSource.scala
index 774adc3c23c21..e35a568ddf115 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingSource.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingSource.scala
@@ -23,10 +23,10 @@ import org.apache.spark.metrics.source.Source
 import org.apache.spark.streaming.ui.StreamingJobProgressListener
 
 private[streaming] class StreamingSource(ssc: StreamingContext) extends Source {
-  val metricRegistry = new MetricRegistry
-  val sourceName = "%s.StreamingMetrics".format(ssc.sparkContext.appName)
+  override val metricRegistry = new MetricRegistry
+  override val sourceName = "%s.StreamingMetrics".format(ssc.sparkContext.appName)
 
-  val streamingListener = ssc.uiTab.listener
+  private val streamingListener = ssc.progressListener
 
   private def registerGauge[T](name: String, f: StreamingJobProgressListener => T,
       defaultValue: T) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Time.scala b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala
index 37b3b28fa01cb..42c49678d24f0 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Time.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala
@@ -41,10 +41,26 @@ case class Time(private val millis: Long) {
 
   def - (that: Duration): Time = new Time(millis - that.milliseconds)
 
+  // Java-friendlier versions of the above.
+
+  def less(that: Time): Boolean = this < that
+
+  def lessEq(that: Time): Boolean = this <= that
+
+  def greater(that: Time): Boolean = this > that
+
+  def greaterEq(that: Time): Boolean = this >= that
+
+  def plus(that: Duration): Time = this + that
+
+  def minus(that: Time): Duration = this - that
+
+  def minus(that: Duration): Time = this - that
+
+
   def floor(that: Duration): Time = {
     val t = that.milliseconds
-    val m = math.floor(this.millis / t).toLong
-    new Time(m * t)
+    new Time((this.millis / t) * t)
   }
 
   def isMultipleOf(that: Duration): Boolean =
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
index a6184de4e83c1..2a7004e56ef53 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
@@ -167,7 +167,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
     new JavaPairDStream(dstream.flatMap(fn)(cm))(fakeClassTag[K2], fakeClassTag[V2])
   }
 
-    /**
+   /**
    * Return a new DStream in which each RDD is generated by applying mapPartitions() to each RDDs
    * of this DStream. Applying mapPartitions() to an RDD applies a function to each partition
    * of the RDD.
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index c4bdf01fa3744..bb44b906d7386 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -492,6 +492,25 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     dstream.updateStateByKey(convertUpdateStateFunction(updateFunc), partitioner)
   }
 
+  /**
+   * Return a new "state" DStream where the state for each key is updated by applying
+   * the given function on the previous state of the key and the new values of the key.
+   * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
+   * @param updateFunc State update function. If `this` function returns None, then
+   *                   corresponding state key-value pair will be eliminated.
+   * @param partitioner Partitioner for controlling the partitioning of each RDD in the new
+   *                    DStream.
+   * @param initialRDD initial state value of each key.
+   * @tparam S State type
+   */
+  def updateStateByKey[S](
+      updateFunc: JFunction2[JList[V], Optional[S], Optional[S]],
+      partitioner: Partitioner,
+      initialRDD: JavaPairRDD[K, S]
+  ): JavaPairDStream[K, S] = {
+    implicit val cm: ClassTag[S] = fakeClassTag
+    dstream.updateStateByKey(convertUpdateStateFunction(updateFunc), partitioner, initialRDD)
+  }
 
   /**
    * Return a new DStream by applying a map function to the value of each key-value pairs in
@@ -606,8 +625,9 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   }
 
   /**
-   * Return a new DStream by applying 'join' between RDDs of `this` DStream and `other` DStream.
-   * The supplied org.apache.spark.Partitioner is used to control the partitioning of each RDD.
+   * Return a new DStream by applying 'left outer join' between RDDs of `this` DStream and
+   * `other` DStream. The supplied org.apache.spark.Partitioner is used to control
+   * the partitioning of each RDD.
    */
   def leftOuterJoin[W](
       other: JavaPairDStream[K, W],
@@ -624,8 +644,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * number of partitions.
    */
   def rightOuterJoin[W](other: JavaPairDStream[K, W]): JavaPairDStream[K, (Optional[V], W)] = {
-    implicit val cm: ClassTag[W] =
-      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
+    implicit val cm: ClassTag[W] = fakeClassTag
     val joinResult = dstream.rightOuterJoin(other.dstream)
     joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)}
   }
@@ -658,6 +677,52 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)}
   }
 
+  /**
+   * Return a new DStream by applying 'full outer join' between RDDs of `this` DStream and
+   * `other` DStream. Hash partitioning is used to generate the RDDs with Spark's default
+   * number of partitions.
+   */
+  def fullOuterJoin[W](other: JavaPairDStream[K, W])
+      : JavaPairDStream[K, (Optional[V], Optional[W])] = {
+    implicit val cm: ClassTag[W] = fakeClassTag
+    val joinResult = dstream.fullOuterJoin(other.dstream)
+    joinResult.mapValues{ case (v, w) =>
+      (JavaUtils.optionToOptional(v), JavaUtils.optionToOptional(w))
+    }
+  }
+
+  /**
+   * Return a new DStream by applying 'full outer join' between RDDs of `this` DStream and
+   * `other` DStream. Hash partitioning is used to generate the RDDs with `numPartitions`
+   * partitions.
+   */
+  def fullOuterJoin[W](
+      other: JavaPairDStream[K, W],
+      numPartitions: Int
+    ): JavaPairDStream[K, (Optional[V], Optional[W])] = {
+    implicit val cm: ClassTag[W] = fakeClassTag
+    val joinResult = dstream.fullOuterJoin(other.dstream, numPartitions)
+    joinResult.mapValues{ case (v, w) =>
+      (JavaUtils.optionToOptional(v), JavaUtils.optionToOptional(w))
+    }
+  }
+
+  /**
+   * Return a new DStream by applying 'full outer join' between RDDs of `this` DStream and
+   * `other` DStream. The supplied org.apache.spark.Partitioner is used to control
+   * the partitioning of each RDD.
+   */
+  def fullOuterJoin[W](
+      other: JavaPairDStream[K, W],
+      partitioner: Partitioner
+    ): JavaPairDStream[K, (Optional[V], Optional[W])] = {
+    implicit val cm: ClassTag[W] = fakeClassTag
+    val joinResult = dstream.fullOuterJoin(other.dstream, partitioner)
+    joinResult.mapValues{ case (v, w) =>
+      (JavaUtils.optionToOptional(v), JavaUtils.optionToOptional(w))
+    }
+  }
+
   /**
    * Save each RDD in `this` DStream as a Hadoop file. The file name at each batch interval is
    * generated based on `prefix` and `suffix`: "prefix-TIME_IN_MS.suffix".
@@ -737,7 +802,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
 }
 
 object JavaPairDStream {
-  implicit def fromPairDStream[K: ClassTag, V: ClassTag](dstream: DStream[(K, V)]) = {
+  implicit def fromPairDStream[K: ClassTag, V: ClassTag](dstream: DStream[(K, V)])
+  : JavaPairDStream[K, V] = {
     new JavaPairDStream[K, V](dstream)
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 18605cac7006c..7db66c69a6d73 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -21,7 +21,7 @@ package org.apache.spark.streaming.api.java
 import scala.collection.JavaConversions._
 import scala.reflect.ClassTag
 
-import java.io.InputStream
+import java.io.{Closeable, InputStream}
 import java.util.{List => JList, Map => JMap}
 
 import akka.actor.{Props, SupervisorStrategy}
@@ -46,10 +46,10 @@ import org.apache.spark.streaming.receiver.Receiver
  * org.apache.spark.api.java.JavaSparkContext (see core Spark documentation) can be accessed
  * using `context.sparkContext`. After creating and transforming DStreams, the streaming
  * computation can be started and stopped using `context.start()` and `context.stop()`,
- * respectively. `context.awaitTransformation()` allows the current thread to wait for the
+ * respectively. `context.awaitTermination()` allows the current thread to wait for the
  * termination of a context by `stop()` or by an exception.
  */
-class JavaStreamingContext(val ssc: StreamingContext) {
+class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
 
   /**
    * Create a StreamingContext.
@@ -540,6 +540,9 @@ class JavaStreamingContext(val ssc: StreamingContext) {
   def stop(stopSparkContext: Boolean, stopGracefully: Boolean) = {
     ssc.stop(stopSparkContext, stopGracefully)
   }
+
+  override def close(): Unit = stop()
+
 }
 
 /**
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
new file mode 100644
index 0000000000000..7053f47ec69a2
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.api.python
+
+import java.io.{ObjectInputStream, ObjectOutputStream}
+import java.lang.reflect.Proxy
+import java.util.{ArrayList => JArrayList, List => JList}
+import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+import scala.language.existentials
+
+import py4j.GatewayServer
+
+import org.apache.spark.api.java._
+import org.apache.spark.api.python._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Interval, Duration, Time}
+import org.apache.spark.streaming.dstream._
+import org.apache.spark.streaming.api.java._
+import org.apache.spark.util.Utils
+
+
+/**
+ * Interface for Python callback function which is used to transform RDDs
+ */
+private[python] trait PythonTransformFunction {
+  def call(time: Long, rdds: JList[_]): JavaRDD[Array[Byte]]
+}
+
+/**
+ * Interface for Python Serializer to serialize PythonTransformFunction
+ */
+private[python] trait PythonTransformFunctionSerializer {
+  def dumps(id: String): Array[Byte]
+  def loads(bytes: Array[Byte]): PythonTransformFunction
+}
+
+/**
+ * Wraps a PythonTransformFunction (which is a Python object accessed through Py4J)
+ * so that it looks like a Scala function and can be transparently serialized and
+ * deserialized by Java.
+ */
+private[python] class TransformFunction(@transient var pfunc: PythonTransformFunction)
+  extends function.Function2[JList[JavaRDD[_]], Time, JavaRDD[Array[Byte]]] {
+
+  def apply(rdd: Option[RDD[_]], time: Time): Option[RDD[Array[Byte]]] = {
+    Option(pfunc.call(time.milliseconds, List(rdd.map(JavaRDD.fromRDD(_)).orNull).asJava))
+      .map(_.rdd)
+  }
+
+  def apply(rdd: Option[RDD[_]], rdd2: Option[RDD[_]], time: Time): Option[RDD[Array[Byte]]] = {
+    val rdds = List(rdd.map(JavaRDD.fromRDD(_)).orNull, rdd2.map(JavaRDD.fromRDD(_)).orNull).asJava
+    Option(pfunc.call(time.milliseconds, rdds)).map(_.rdd)
+  }
+
+  // for function.Function2
+  def call(rdds: JList[JavaRDD[_]], time: Time): JavaRDD[Array[Byte]] = {
+    pfunc.call(time.milliseconds, rdds)
+  }
+
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
+    val bytes = PythonTransformFunctionSerializer.serialize(pfunc)
+    out.writeInt(bytes.length)
+    out.write(bytes)
+  }
+
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
+    val length = in.readInt()
+    val bytes = new Array[Byte](length)
+    in.readFully(bytes)
+    pfunc = PythonTransformFunctionSerializer.deserialize(bytes)
+  }
+}
+
+/**
+ * Helpers for PythonTransformFunctionSerializer
+ *
+ * PythonTransformFunctionSerializer is logically a singleton that's happens to be
+ * implemented as a Python object.
+ */
+private[python] object PythonTransformFunctionSerializer {
+
+  /**
+   * A serializer in Python, used to serialize PythonTransformFunction
+    */
+  private var serializer: PythonTransformFunctionSerializer = _
+
+  /*
+   * Register a serializer from Python, should be called during initialization
+   */
+  def register(ser: PythonTransformFunctionSerializer): Unit = {
+    serializer = ser
+  }
+
+  def serialize(func: PythonTransformFunction): Array[Byte] = {
+    assert(serializer != null, "Serializer has not been registered!")
+    // get the id of PythonTransformFunction in py4j
+    val h = Proxy.getInvocationHandler(func.asInstanceOf[Proxy])
+    val f = h.getClass().getDeclaredField("id")
+    f.setAccessible(true)
+    val id = f.get(h).asInstanceOf[String]
+    serializer.dumps(id)
+  }
+
+  def deserialize(bytes: Array[Byte]): PythonTransformFunction = {
+    assert(serializer != null, "Serializer has not been registered!")
+    serializer.loads(bytes)
+  }
+}
+
+/**
+ * Helper functions, which are called from Python via Py4J.
+ */
+private[python] object PythonDStream {
+
+  /**
+   * can not access PythonTransformFunctionSerializer.register() via Py4j
+   * Py4JError: PythonTransformFunctionSerializerregister does not exist in the JVM
+   */
+  def registerSerializer(ser: PythonTransformFunctionSerializer): Unit = {
+    PythonTransformFunctionSerializer.register(ser)
+  }
+
+  /**
+   * Update the port of callback client to `port`
+   */
+  def updatePythonGatewayPort(gws: GatewayServer, port: Int): Unit = {
+    val cl = gws.getCallbackClient
+    val f = cl.getClass.getDeclaredField("port")
+    f.setAccessible(true)
+    f.setInt(cl, port)
+  }
+
+  /**
+   * helper function for DStream.foreachRDD(),
+   * cannot be `foreachRDD`, it will confusing py4j
+   */
+  def callForeachRDD(jdstream: JavaDStream[Array[Byte]], pfunc: PythonTransformFunction) {
+    val func = new TransformFunction((pfunc))
+    jdstream.dstream.foreachRDD((rdd, time) => func(Some(rdd), time))
+  }
+
+  /**
+   * convert list of RDD into queue of RDDs, for ssc.queueStream()
+   */
+  def toRDDQueue(rdds: JArrayList[JavaRDD[Array[Byte]]]): java.util.Queue[JavaRDD[Array[Byte]]] = {
+    val queue = new java.util.LinkedList[JavaRDD[Array[Byte]]]
+    rdds.forall(queue.add(_))
+    queue
+  }
+}
+
+/**
+ * Base class for PythonDStream with some common methods
+ */
+private[python] abstract class PythonDStream(
+    parent: DStream[_],
+    @transient pfunc: PythonTransformFunction)
+  extends DStream[Array[Byte]] (parent.ssc) {
+
+  val func = new TransformFunction(pfunc)
+
+  override def dependencies = List(parent)
+
+  override def slideDuration: Duration = parent.slideDuration
+
+  val asJavaDStream  = JavaDStream.fromDStream(this)
+}
+
+/**
+ * Transformed DStream in Python.
+ */
+private[python] class PythonTransformedDStream (
+    parent: DStream[_],
+    @transient pfunc: PythonTransformFunction)
+  extends PythonDStream(parent, pfunc) {
+
+  override def compute(validTime: Time): Option[RDD[Array[Byte]]] = {
+    val rdd = parent.getOrCompute(validTime)
+    if (rdd.isDefined) {
+      func(rdd, validTime)
+    } else {
+      None
+    }
+  }
+}
+
+/**
+ * Transformed from two DStreams in Python.
+ */
+private[python] class PythonTransformed2DStream(
+    parent: DStream[_],
+    parent2: DStream[_],
+    @transient pfunc: PythonTransformFunction)
+  extends DStream[Array[Byte]] (parent.ssc) {
+
+  val func = new TransformFunction(pfunc)
+
+  override def dependencies = List(parent, parent2)
+
+  override def slideDuration: Duration = parent.slideDuration
+
+  override def compute(validTime: Time): Option[RDD[Array[Byte]]] = {
+    val empty: RDD[_] = ssc.sparkContext.emptyRDD
+    val rdd1 = parent.getOrCompute(validTime).getOrElse(empty)
+    val rdd2 = parent2.getOrCompute(validTime).getOrElse(empty)
+    func(Some(rdd1), Some(rdd2), validTime)
+  }
+
+  val asJavaDStream  = JavaDStream.fromDStream(this)
+}
+
+/**
+ * similar to StateDStream
+ */
+private[python] class PythonStateDStream(
+    parent: DStream[Array[Byte]],
+    @transient reduceFunc: PythonTransformFunction)
+  extends PythonDStream(parent, reduceFunc) {
+
+  super.persist(StorageLevel.MEMORY_ONLY)
+  override val mustCheckpoint = true
+
+  override def compute(validTime: Time): Option[RDD[Array[Byte]]] = {
+    val lastState = getOrCompute(validTime - slideDuration)
+    val rdd = parent.getOrCompute(validTime)
+    if (rdd.isDefined) {
+      func(lastState, rdd, validTime)
+    } else {
+      lastState
+    }
+  }
+}
+
+/**
+ * similar to ReducedWindowedDStream
+ */
+private[python] class PythonReducedWindowedDStream(
+    parent: DStream[Array[Byte]],
+    @transient preduceFunc: PythonTransformFunction,
+    @transient pinvReduceFunc: PythonTransformFunction,
+    _windowDuration: Duration,
+    _slideDuration: Duration)
+  extends PythonDStream(parent, preduceFunc) {
+
+  super.persist(StorageLevel.MEMORY_ONLY)
+  override val mustCheckpoint = true
+
+  val invReduceFunc = new TransformFunction(pinvReduceFunc)
+
+  def windowDuration: Duration = _windowDuration
+  override def slideDuration: Duration = _slideDuration
+  override def parentRememberDuration: Duration = rememberDuration + windowDuration
+
+  override def compute(validTime: Time): Option[RDD[Array[Byte]]] = {
+    val currentTime = validTime
+    val current = new Interval(currentTime - windowDuration, currentTime)
+    val previous = current - slideDuration
+
+    //  _____________________________
+    // |  previous window   _________|___________________
+    // |___________________|       current window        |  --------------> Time
+    //                     |_____________________________|
+    //
+    // |________ _________|          |________ _________|
+    //          |                             |
+    //          V                             V
+    //       old RDDs                     new RDDs
+    //
+    val previousRDD = getOrCompute(previous.endTime)
+
+    // for small window, reduce once will be better than twice
+    if (pinvReduceFunc != null && previousRDD.isDefined
+        && windowDuration >= slideDuration * 5) {
+
+      // subtract the values from old RDDs
+      val oldRDDs = parent.slice(previous.beginTime + parent.slideDuration, current.beginTime)
+      val subtracted = if (oldRDDs.size > 0) {
+        invReduceFunc(previousRDD, Some(ssc.sc.union(oldRDDs)), validTime)
+      } else {
+        previousRDD
+      }
+
+      // add the RDDs of the reduced values in "new time steps"
+      val newRDDs = parent.slice(previous.endTime + parent.slideDuration, current.endTime)
+      if (newRDDs.size > 0) {
+        func(subtracted, Some(ssc.sc.union(newRDDs)), validTime)
+      } else {
+        subtracted
+      }
+    } else {
+      // Get the RDDs of the reduced values in current window
+      val currentRDDs = parent.slice(current.beginTime + parent.slideDuration, current.endTime)
+      if (currentRDDs.size > 0) {
+        func(None, Some(ssc.sc.union(currentRDDs)), validTime)
+      } else {
+        None
+      }
+    }
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index e05db236addca..eabd61d713e0c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -23,6 +23,7 @@ import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
 import scala.deprecated
 import scala.collection.mutable.HashMap
 import scala.reflect.ClassTag
+import scala.util.matching.Regex
 
 import org.apache.spark.{Logging, SparkException}
 import org.apache.spark.rdd.{BlockRDD, RDD}
@@ -30,7 +31,7 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.StreamingContext._
 import org.apache.spark.streaming.scheduler.Job
-import org.apache.spark.util.MetadataCleaner
+import org.apache.spark.util.{CallSite, MetadataCleaner, Utils}
 
 /**
  * A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous
@@ -106,6 +107,9 @@ abstract class DStream[T: ClassTag] (
   /** Return the StreamingContext associated with this DStream */
   def context = ssc
 
+  /* Set the creation call site */
+  private[streaming] val creationSite = DStream.getCreationSite()
+
   /** Persist the RDDs of this DStream with the given storage level */
   def persist(level: StorageLevel): DStream[T] = {
     if (this.isInitialized) {
@@ -272,43 +276,41 @@ abstract class DStream[T: ClassTag] (
   }
 
   /**
-   * Retrieve a precomputed RDD of this DStream, or computes the RDD. This is an internal
-   * method that should not be called directly.
+   * Get the RDD corresponding to the given time; either retrieve it from cache
+   * or compute-and-cache it.
    */
   private[streaming] def getOrCompute(time: Time): Option[RDD[T]] = {
-    // If this DStream was not initialized (i.e., zeroTime not set), then do it
-    // If RDD was already generated, then retrieve it from HashMap
-    generatedRDDs.get(time) match {
-
-      // If an RDD was already generated and is being reused, then
-      // probably all RDDs in this DStream will be reused and hence should be cached
-      case Some(oldRDD) => Some(oldRDD)
-
-      // if RDD was not generated, and if the time is valid
-      // (based on sliding time of this DStream), then generate the RDD
-      case None => {
-        if (isTimeValid(time)) {
-          compute(time) match {
-            case Some(newRDD) =>
-              if (storageLevel != StorageLevel.NONE) {
-                newRDD.persist(storageLevel)
-                logInfo("Persisting RDD " + newRDD.id + " for time " +
-                  time + " to " + storageLevel + " at time " + time)
-              }
-              if (checkpointDuration != null &&
-                (time - zeroTime).isMultipleOf(checkpointDuration)) {
-                newRDD.checkpoint()
-                logInfo("Marking RDD " + newRDD.id + " for time " + time +
-                  " for checkpointing at time " + time)
-              }
-              generatedRDDs.put(time, newRDD)
-              Some(newRDD)
-            case None =>
-              None
+    // If RDD was already generated, then retrieve it from HashMap,
+    // or else compute the RDD
+    generatedRDDs.get(time).orElse {
+      // Compute the RDD if time is valid (e.g. correct time in a sliding window)
+      // of RDD generation, else generate nothing.
+      if (isTimeValid(time)) {
+        // Set the thread-local property for call sites to this DStream's creation site
+        // such that RDDs generated by compute gets that as their creation site.
+        // Note that this `getOrCompute` may get called from another DStream which may have
+        // set its own call site. So we store its call site in a temporary variable,
+        // set this DStream's creation site, generate RDDs and then restore the previous call site.
+        val prevCallSite = ssc.sparkContext.getCallSite()
+        ssc.sparkContext.setCallSite(creationSite)
+        val rddOption = compute(time)
+        ssc.sparkContext.setCallSite(prevCallSite)
+
+        rddOption.foreach { case newRDD =>
+          // Register the generated RDD for caching and checkpointing
+          if (storageLevel != StorageLevel.NONE) {
+            newRDD.persist(storageLevel)
+            logDebug(s"Persisting RDD ${newRDD.id} for time $time to $storageLevel")
           }
-        } else {
-          None
+          if (checkpointDuration != null && (time - zeroTime).isMultipleOf(checkpointDuration)) {
+            newRDD.checkpoint()
+            logInfo(s"Marking RDD ${newRDD.id} for time $time for checkpointing")
+          }
+          generatedRDDs.put(time, newRDD)
         }
+        rddOption
+      } else {
+        None
       }
     }
   }
@@ -398,7 +400,7 @@ abstract class DStream[T: ClassTag] (
   }
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     logDebug(this.getClass().getSimpleName + ".writeObject used")
     if (graph != null) {
       graph.synchronized {
@@ -421,7 +423,7 @@ abstract class DStream[T: ClassTag] (
   }
 
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     logDebug(this.getClass().getSimpleName + ".readObject used")
     ois.defaultReadObject()
     generatedRDDs = new HashMap[Time, RDD[T]] ()
@@ -799,3 +801,29 @@ abstract class DStream[T: ClassTag] (
     this
   }
 }
+
+private[streaming] object DStream {
+
+  /** Get the creation site of a DStream from the stack trace of when the DStream is created. */
+  def getCreationSite(): CallSite = {
+    val SPARK_CLASS_REGEX = """^org\.apache\.spark""".r
+    val SPARK_STREAMING_TESTCLASS_REGEX = """^org\.apache\.spark\.streaming\.test""".r
+    val SPARK_EXAMPLES_CLASS_REGEX = """^org\.apache\.spark\.examples""".r
+    val SCALA_CLASS_REGEX = """^scala""".r
+
+    /** Filtering function that excludes non-user classes for a streaming application */
+    def streamingExclustionFunction(className: String): Boolean = {
+      def doesMatch(r: Regex) = r.findFirstIn(className).isDefined
+      val isSparkClass = doesMatch(SPARK_CLASS_REGEX)
+      val isSparkExampleClass = doesMatch(SPARK_EXAMPLES_CLASS_REGEX)
+      val isSparkStreamingTestClass = doesMatch(SPARK_STREAMING_TESTCLASS_REGEX)
+      val isScalaClass = doesMatch(SCALA_CLASS_REGEX)
+
+      // If the class is a spark example class or a streaming test class then it is considered
+      // as a streaming application class and don't exclude. Otherwise, exclude any
+      // non-Spark and non-Scala class, as the rest would streaming application classes.
+      (isSparkClass || isScalaClass) && !isSparkExampleClass && !isSparkStreamingTestClass
+    }
+    org.apache.spark.util.Utils.getCallSite(streamingExclustionFunction)
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
index f33c0ceafdf42..0dc72790fbdbd 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.fs.FileSystem
 import org.apache.spark.Logging
 import org.apache.spark.streaming.Time
+import org.apache.spark.util.Utils
 
 private[streaming]
 class DStreamCheckpointData[T: ClassTag] (dstream: DStream[T])
@@ -119,7 +120,7 @@ class DStreamCheckpointData[T: ClassTag] (dstream: DStream[T])
   }
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     logDebug(this.getClass().getSimpleName + ".writeObject used")
     if (dstream.context.graph != null) {
       dstream.context.graph.synchronized {
@@ -142,7 +143,7 @@ class DStreamCheckpointData[T: ClassTag] (dstream: DStream[T])
   }
 
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     logDebug(this.getClass().getSimpleName + ".readObject used")
     ois.defaultReadObject()
     timeToOldestCheckpointFileTime = new HashMap[Time, Time]
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
index e878285f6a854..55d6cf6a783ea 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rdd.UnionRDD
 import org.apache.spark.streaming.{StreamingContext, Time}
-import org.apache.spark.util.TimeStampedHashMap
+import org.apache.spark.util.{TimeStampedHashMap, Utils}
 
 
 private[streaming]
@@ -45,7 +45,7 @@ class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : Clas
   // Files with mod time earlier than this is ignored. This is updated every interval
   // such that in the current interval, files older than any file found in the
   // previous interval will be ignored. Obviously this time keeps moving forward.
-  private var ignoreTime = if (newFilesOnly) 0L else System.currentTimeMillis()
+  private var ignoreTime = if (newFilesOnly) System.currentTimeMillis() else 0L
 
   // Latest file mod time seen till any point of time
   @transient private var path_ : Path = null
@@ -120,14 +120,15 @@ class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : Clas
 
   /** Generate one RDD from an array of files */
   private def filesToRDD(files: Seq[String]): RDD[(K, V)] = {
-    val fileRDDs = files.map(file => context.sparkContext.newAPIHadoopFile[K, V, F](file))
-    files.zip(fileRDDs).foreach { case (file, rdd) => {
+    val fileRDDs = files.map(file =>{
+      val rdd = context.sparkContext.newAPIHadoopFile[K, V, F](file)
       if (rdd.partitions.size == 0) {
         logError("File " + file + " has no data in it. Spark Streaming can only ingest " +
           "files that have been \"moved\" to the directory assigned to the file stream. " +
           "Refer to the streaming programming guide for more details.")
       }
-    }}
+      rdd
+    })
     new UnionRDD(context.sparkContext, fileRDDs)
   }
 
@@ -151,7 +152,7 @@ class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : Clas
   }
 
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     logDebug(this.getClass().getSimpleName + ".readObject used")
     ois.defaultReadObject()
     generatedRDDs = new HashMap[Time, RDD[(K,V)]] ()
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index 826bf39e860e1..b39f47f04a38b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -413,7 +413,54 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       partitioner: Partitioner,
       rememberPartitioner: Boolean
     ): DStream[(K, S)] = {
-     new StateDStream(self, ssc.sc.clean(updateFunc), partitioner, rememberPartitioner)
+     new StateDStream(self, ssc.sc.clean(updateFunc), partitioner, rememberPartitioner, None)
+  }
+
+  /**
+   * Return a new "state" DStream where the state for each key is updated by applying
+   * the given function on the previous state of the key and the new values of the key.
+   * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
+   * @param updateFunc State update function. If `this` function returns None, then
+   *                   corresponding state key-value pair will be eliminated.
+   * @param partitioner Partitioner for controlling the partitioning of each RDD in the new
+   *                    DStream.
+   * @param initialRDD initial state value of each key.
+   * @tparam S State type
+   */
+  def updateStateByKey[S: ClassTag](
+      updateFunc: (Seq[V], Option[S]) => Option[S],
+      partitioner: Partitioner,
+      initialRDD: RDD[(K, S)]
+    ): DStream[(K, S)] = {
+    val newUpdateFunc = (iterator: Iterator[(K, Seq[V], Option[S])]) => {
+      iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s)))
+    }
+    updateStateByKey(newUpdateFunc, partitioner, true, initialRDD)
+  }
+
+  /**
+   * Return a new "state" DStream where the state for each key is updated by applying
+   * the given function on the previous state of the key and the new values of each key.
+   * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
+   * @param updateFunc State update function. If `this` function returns None, then
+   *                   corresponding state key-value pair will be eliminated. Note, that
+   *                   this function may generate a different a tuple with a different key
+   *                   than the input key. It is up to the developer to decide whether to
+   *                   remember the partitioner despite the key being changed.
+   * @param partitioner Partitioner for controlling the partitioning of each RDD in the new
+   *                    DStream
+   * @param rememberPartitioner Whether to remember the paritioner object in the generated RDDs.
+   * @param initialRDD initial state value of each key.
+   * @tparam S State type
+   */
+  def updateStateByKey[S: ClassTag](
+      updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)],
+      partitioner: Partitioner,
+      rememberPartitioner: Boolean,
+      initialRDD: RDD[(K, S)]
+    ): DStream[(K, S)] = {
+     new StateDStream(self, ssc.sc.clean(updateFunc), partitioner,
+       rememberPartitioner, Some(initialRDD))
   }
 
   /**
@@ -568,6 +615,42 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
     )
   }
 
+  /**
+   * Return a new DStream by applying 'full outer join' between RDDs of `this` DStream and
+   * `other` DStream. Hash partitioning is used to generate the RDDs with Spark's default
+   * number of partitions.
+   */
+  def fullOuterJoin[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (Option[V], Option[W]))] = {
+    fullOuterJoin[W](other, defaultPartitioner())
+  }
+
+  /**
+   * Return a new DStream by applying 'full outer join' between RDDs of `this` DStream and
+   * `other` DStream. Hash partitioning is used to generate the RDDs with `numPartitions`
+   * partitions.
+   */
+  def fullOuterJoin[W: ClassTag](
+      other: DStream[(K, W)],
+      numPartitions: Int
+    ): DStream[(K, (Option[V], Option[W]))] = {
+    fullOuterJoin[W](other, defaultPartitioner(numPartitions))
+  }
+
+  /**
+   * Return a new DStream by applying 'full outer join' between RDDs of `this` DStream and
+   * `other` DStream. The supplied org.apache.spark.Partitioner is used to control
+   * the partitioning of each RDD.
+   */
+  def fullOuterJoin[W: ClassTag](
+      other: DStream[(K, W)],
+      partitioner: Partitioner
+    ): DStream[(K, (Option[V], Option[W]))] = {
+    self.transformWith(
+      other,
+      (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.fullOuterJoin(rdd2, partitioner)
+    )
+  }
+
   /**
    * Save each RDD in `this` DStream as a Hadoop file. The file name at each batch interval
    * is generated based on `prefix` and `suffix`: "prefix-TIME_IN_MS.suffix"
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
index 6376cff78b78a..ed7da6dc1315e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
@@ -41,7 +41,7 @@ class QueueInputDStream[T: ClassTag](
     if (oneAtATime && queue.size > 0) {
       buffer += queue.dequeue()
     } else {
-      buffer ++= queue
+      buffer ++= queue.dequeueAll(_ => true)
     }
     if (buffer.size > 0) {
       if (oneAtATime) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
index 391e40924f38a..3e67161363e50 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.streaming.dstream
 
-import scala.collection.mutable.HashMap
 import scala.reflect.ClassTag
 
 import org.apache.spark.rdd.{BlockRDD, RDD}
-import org.apache.spark.storage.BlockId
+import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.streaming._
-import org.apache.spark.streaming.receiver.Receiver
+import org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD
+import org.apache.spark.streaming.receiver.{Receiver, WriteAheadLogBasedStoreResult}
 import org.apache.spark.streaming.scheduler.ReceivedBlockInfo
 
 /**
@@ -39,9 +39,6 @@ import org.apache.spark.streaming.scheduler.ReceivedBlockInfo
 abstract class ReceiverInputDStream[T: ClassTag](@transient ssc_ : StreamingContext)
   extends InputDStream[T](ssc_) {
 
-  /** Keeps all received blocks information */
-  private lazy val receivedBlockInfo = new HashMap[Time, Array[ReceivedBlockInfo]]
-
   /** This is an unique identifier for the network input stream. */
   val id = ssc.getNewReceiverStreamId()
 
@@ -57,24 +54,45 @@ abstract class ReceiverInputDStream[T: ClassTag](@transient ssc_ : StreamingCont
 
   def stop() {}
 
-  /** Ask ReceiverInputTracker for received data blocks and generates RDDs with them. */
+  /**
+   * Generates RDDs with blocks received by the receiver of this stream. */
   override def compute(validTime: Time): Option[RDD[T]] = {
-    // If this is called for any time before the start time of the context,
-    // then this returns an empty RDD. This may happen when recovering from a
-    // master failure
-    if (validTime >= graph.startTime) {
-      val blockInfo = ssc.scheduler.receiverTracker.getReceivedBlockInfo(id)
-      receivedBlockInfo(validTime) = blockInfo
-      val blockIds = blockInfo.map(_.blockId.asInstanceOf[BlockId])
-      Some(new BlockRDD[T](ssc.sc, blockIds))
-    } else {
-      Some(new BlockRDD[T](ssc.sc, Array[BlockId]()))
-    }
-  }
+    val blockRDD = {
 
-  /** Get information on received blocks. */
-  private[streaming] def getReceivedBlockInfo(time: Time) = {
-    receivedBlockInfo.get(time).getOrElse(Array.empty[ReceivedBlockInfo])
+      if (validTime < graph.startTime) {
+        // If this is called for any time before the start time of the context,
+        // then this returns an empty RDD. This may happen when recovering from a
+        // driver failure without any write ahead log to recover pre-failure data.
+        new BlockRDD[T](ssc.sc, Array.empty)
+      } else {
+        // Otherwise, ask the tracker for all the blocks that have been allocated to this stream
+        // for this batch
+        val blockInfos =
+          ssc.scheduler.receiverTracker.getBlocksOfBatch(validTime).get(id).getOrElse(Seq.empty)
+        val blockStoreResults = blockInfos.map { _.blockStoreResult }
+        val blockIds = blockStoreResults.map { _.blockId.asInstanceOf[BlockId] }.toArray
+
+        // Check whether all the results are of the same type
+        val resultTypes = blockStoreResults.map { _.getClass }.distinct
+        if (resultTypes.size > 1) {
+          logWarning("Multiple result types in block information, WAL information will be ignored.")
+        }
+
+        // If all the results are of type WriteAheadLogBasedStoreResult, then create
+        // WriteAheadLogBackedBlockRDD else create simple BlockRDD.
+        if (resultTypes.size == 1 && resultTypes.head == classOf[WriteAheadLogBasedStoreResult]) {
+          val logSegments = blockStoreResults.map {
+            _.asInstanceOf[WriteAheadLogBasedStoreResult].segment
+          }.toArray
+          // Since storeInBlockManager = false, the storage level does not matter.
+          new WriteAheadLogBackedBlockRDD[T](ssc.sparkContext,
+            blockIds, logSegments, storeInBlockManager = true, StorageLevel.MEMORY_ONLY_SER)
+        } else {
+          new BlockRDD[T](ssc.sc, blockIds)
+        }
+      }
+    }
+    Some(blockRDD)
   }
 
   /**
@@ -85,10 +103,6 @@ abstract class ReceiverInputDStream[T: ClassTag](@transient ssc_ : StreamingCont
    */
   private[streaming] override def clearMetadata(time: Time) {
     super.clearMetadata(time)
-    val oldReceivedBlocks = receivedBlockInfo.filter(_._1 <= (time - rememberDuration))
-    receivedBlockInfo --= oldReceivedBlocks.keys
-    logDebug("Cleared " + oldReceivedBlocks.size + " RDDs that were older than " +
-      (time - rememberDuration) + ": " + oldReceivedBlocks.keys.mkString(", "))
+    ssc.scheduler.receiverTracker.cleanupOldMetadata(time - rememberDuration)
   }
 }
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
index 40da31318942e..1a47089e513c4 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
@@ -133,17 +133,17 @@ class ReducedWindowedDStream[K: ClassTag, V: ClassTag](
     val numOldValues = oldRDDs.size
     val numNewValues = newRDDs.size
 
-    val mergeValues = (seqOfValues: Seq[Seq[V]]) => {
-      if (seqOfValues.size != 1 + numOldValues + numNewValues) {
+    val mergeValues = (arrayOfValues: Array[Iterable[V]]) => {
+      if (arrayOfValues.size != 1 + numOldValues + numNewValues) {
         throw new Exception("Unexpected number of sequences of reduced values")
       }
       // Getting reduced values "old time steps" that will be removed from current window
-      val oldValues = (1 to numOldValues).map(i => seqOfValues(i)).filter(!_.isEmpty).map(_.head)
+      val oldValues = (1 to numOldValues).map(i => arrayOfValues(i)).filter(!_.isEmpty).map(_.head)
       // Getting reduced values "new time steps"
       val newValues =
-        (1 to numNewValues).map(i => seqOfValues(numOldValues + i)).filter(!_.isEmpty).map(_.head)
+        (1 to numNewValues).map(i => arrayOfValues(numOldValues + i)).filter(!_.isEmpty).map(_.head)
 
-      if (seqOfValues(0).isEmpty) {
+      if (arrayOfValues(0).isEmpty) {
         // If previous window's reduce value does not exist, then at least new values should exist
         if (newValues.isEmpty) {
           throw new Exception("Neither previous window has value for key, nor new values found. " +
@@ -153,7 +153,7 @@ class ReducedWindowedDStream[K: ClassTag, V: ClassTag](
         newValues.reduce(reduceF) // return
       } else {
         // Get the previous window's reduced value
-        var tempValue = seqOfValues(0).head
+        var tempValue = arrayOfValues(0).head
         // If old values exists, then inverse reduce then from previous value
         if (!oldValues.isEmpty) {
           tempValue = invReduceF(tempValue, oldValues.reduce(reduceF))
@@ -166,7 +166,8 @@ class ReducedWindowedDStream[K: ClassTag, V: ClassTag](
       }
     }
 
-    val mergedValuesRDD = cogroupedRDD.asInstanceOf[RDD[(K,Seq[Seq[V]])]].mapValues(mergeValues)
+    val mergedValuesRDD = cogroupedRDD.asInstanceOf[RDD[(K, Array[Iterable[V]])]]
+      .mapValues(mergeValues)
 
     if (filterFunc.isDefined) {
       Some(mergedValuesRDD.filter(filterFunc.get))
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
index 7e22268767de7..ebb04dd35b9a2 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
@@ -30,7 +30,8 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
     parent: DStream[(K, V)],
     updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)],
     partitioner: Partitioner,
-    preservePartitioning: Boolean
+    preservePartitioning: Boolean,
+    initialRDD : Option[RDD[(K, S)]]
   ) extends DStream[(K, S)](parent.ssc) {
 
   super.persist(StorageLevel.MEMORY_ONLY_SER)
@@ -41,6 +42,25 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
 
   override val mustCheckpoint = true
 
+  private [this] def computeUsingPreviousRDD (
+    parentRDD : RDD[(K, V)], prevStateRDD : RDD[(K, S)]) = {
+    // Define the function for the mapPartition operation on cogrouped RDD;
+    // first map the cogrouped tuple to tuples of required type,
+    // and then apply the update function
+    val updateFuncLocal = updateFunc
+    val finalFunc = (iterator: Iterator[(K, (Iterable[V], Iterable[S]))]) => {
+      val i = iterator.map(t => {
+        val itr = t._2._2.iterator
+        val headOption = if(itr.hasNext) Some(itr.next) else None
+        (t._1, t._2._1.toSeq, headOption)
+      })
+      updateFuncLocal(i)
+    }
+    val cogroupedRDD = parentRDD.cogroup(prevStateRDD, partitioner)
+    val stateRDD = cogroupedRDD.mapPartitions(finalFunc, preservePartitioning)
+    Some(stateRDD)
+  }
+
   override def compute(validTime: Time): Option[RDD[(K, S)]] = {
 
     // Try to get the previous state RDD
@@ -51,25 +71,7 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
         // Try to get the parent RDD
         parent.getOrCompute(validTime) match {
           case Some(parentRDD) => {   // If parent RDD exists, then compute as usual
-
-            // Define the function for the mapPartition operation on cogrouped RDD;
-            // first map the cogrouped tuple to tuples of required type,
-            // and then apply the update function
-            val updateFuncLocal = updateFunc
-            val finalFunc = (iterator: Iterator[(K, (Iterable[V], Iterable[S]))]) => {
-              val i = iterator.map(t => {
-                val itr = t._2._2.iterator
-                val headOption = itr.hasNext match {
-                  case true => Some(itr.next())
-                  case false => None
-                }
-                (t._1, t._2._1.toSeq, headOption)
-              })
-              updateFuncLocal(i)
-            }
-            val cogroupedRDD = parentRDD.cogroup(prevStateRDD, partitioner)
-            val stateRDD = cogroupedRDD.mapPartitions(finalFunc, preservePartitioning)
-            Some(stateRDD)
+            computeUsingPreviousRDD (parentRDD, prevStateRDD)
           }
           case None => {    // If parent RDD does not exist
 
@@ -90,19 +92,25 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
         // Try to get the parent RDD
         parent.getOrCompute(validTime) match {
           case Some(parentRDD) => {   // If parent RDD exists, then compute as usual
+            initialRDD match {
+              case None => {
+                // Define the function for the mapPartition operation on grouped RDD;
+                // first map the grouped tuple to tuples of required type,
+                // and then apply the update function
+                val updateFuncLocal = updateFunc
+                val finalFunc = (iterator : Iterator[(K, Iterable[V])]) => {
+                  updateFuncLocal (iterator.map (tuple => (tuple._1, tuple._2.toSeq, None)))
+                }
 
-            // Define the function for the mapPartition operation on grouped RDD;
-            // first map the grouped tuple to tuples of required type,
-            // and then apply the update function
-            val updateFuncLocal = updateFunc
-            val finalFunc = (iterator: Iterator[(K, Iterable[V])]) => {
-              updateFuncLocal(iterator.map(tuple => (tuple._1, tuple._2.toSeq, None)))
+                val groupedRDD = parentRDD.groupByKey (partitioner)
+                val sessionRDD = groupedRDD.mapPartitions (finalFunc, preservePartitioning)
+                // logDebug("Generating state RDD for time " + validTime + " (first)")
+                Some (sessionRDD)
+              }
+              case Some (initialStateRDD) => {
+                computeUsingPreviousRDD(parentRDD, initialStateRDD)
+              }
             }
-
-            val groupedRDD = parentRDD.groupByKey(partitioner)
-            val sessionRDD = groupedRDD.mapPartitions(finalFunc, preservePartitioning)
-            // logDebug("Generating state RDD for time " + validTime + " (first)")
-            Some(sessionRDD)
           }
           case None => { // If parent RDD does not exist, then nothing to do!
             // logDebug("Not generating state RDD (no previous state, no parent)")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
new file mode 100644
index 0000000000000..dd1e96334952f
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.rdd
+
+import scala.reflect.ClassTag
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark._
+import org.apache.spark.rdd.BlockRDD
+import org.apache.spark.storage.{BlockId, StorageLevel}
+import org.apache.spark.streaming.util.{HdfsUtils, WriteAheadLogFileSegment, WriteAheadLogRandomReader}
+
+/**
+ * Partition class for [[org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD]].
+ * It contains information about the id of the blocks having this partition's data and
+ * the segment of the write ahead log that backs the partition.
+ * @param index index of the partition
+ * @param blockId id of the block having the partition data
+ * @param segment segment of the write ahead log having the partition data
+ */
+private[streaming]
+class WriteAheadLogBackedBlockRDDPartition(
+    val index: Int,
+    val blockId: BlockId,
+    val segment: WriteAheadLogFileSegment)
+  extends Partition
+
+
+/**
+ * This class represents a special case of the BlockRDD where the data blocks in
+ * the block manager are also backed by segments in write ahead logs. For reading
+ * the data, this RDD first looks up the blocks by their ids in the block manager.
+ * If it does not find them, it looks up the corresponding file segment.
+ *
+ * @param sc SparkContext
+ * @param blockIds Ids of the blocks that contains this RDD's data
+ * @param segments Segments in write ahead logs that contain this RDD's data
+ * @param storeInBlockManager Whether to store in the block manager after reading from the segment
+ * @param storageLevel storage level to store when storing in block manager
+ *                     (applicable when storeInBlockManager = true)
+ */
+private[streaming]
+class WriteAheadLogBackedBlockRDD[T: ClassTag](
+    @transient sc: SparkContext,
+    @transient blockIds: Array[BlockId],
+    @transient segments: Array[WriteAheadLogFileSegment],
+    storeInBlockManager: Boolean,
+    storageLevel: StorageLevel)
+  extends BlockRDD[T](sc, blockIds) {
+
+  require(
+    blockIds.length == segments.length,
+    s"Number of block ids (${blockIds.length}) must be " +
+      s"the same as number of segments (${segments.length}})!")
+
+  // Hadoop configuration is not serializable, so broadcast it as a serializable.
+  @transient private val hadoopConfig = sc.hadoopConfiguration
+  private val broadcastedHadoopConf = new SerializableWritable(hadoopConfig)
+
+  override def getPartitions: Array[Partition] = {
+    assertValid()
+    Array.tabulate(blockIds.size) { i =>
+      new WriteAheadLogBackedBlockRDDPartition(i, blockIds(i), segments(i))
+    }
+  }
+
+  /**
+   * Gets the partition data by getting the corresponding block from the block manager.
+   * If the block does not exist, then the data is read from the corresponding segment
+   * in write ahead log files.
+   */
+  override def compute(split: Partition, context: TaskContext): Iterator[T] = {
+    assertValid()
+    val hadoopConf = broadcastedHadoopConf.value
+    val blockManager = SparkEnv.get.blockManager
+    val partition = split.asInstanceOf[WriteAheadLogBackedBlockRDDPartition]
+    val blockId = partition.blockId
+    blockManager.get(blockId) match {
+      case Some(block) => // Data is in Block Manager
+        val iterator = block.data.asInstanceOf[Iterator[T]]
+        logDebug(s"Read partition data of $this from block manager, block $blockId")
+        iterator
+      case None => // Data not found in Block Manager, grab it from write ahead log file
+        val reader = new WriteAheadLogRandomReader(partition.segment.path, hadoopConf)
+        val dataRead = reader.read(partition.segment)
+        reader.close()
+        logInfo(s"Read partition data of $this from write ahead log, segment ${partition.segment}")
+        if (storeInBlockManager) {
+          blockManager.putBytes(blockId, dataRead, storageLevel)
+          logDebug(s"Stored partition data of $this into block manager with level $storageLevel")
+          dataRead.rewind()
+        }
+        blockManager.dataDeserialize(blockId, dataRead).asInstanceOf[Iterator[T]]
+    }
+  }
+
+  /**
+   * Get the preferred location of the partition. This returns the locations of the block
+   * if it is present in the block manager, else it returns the location of the
+   * corresponding segment in HDFS.
+   */
+  override def getPreferredLocations(split: Partition): Seq[String] = {
+    val partition = split.asInstanceOf[WriteAheadLogBackedBlockRDDPartition]
+    val blockLocations = getBlockIdLocations().get(partition.blockId)
+    def segmentLocations = HdfsUtils.getFileSegmentLocations(
+      partition.segment.path, partition.segment.offset, partition.segment.length, hadoopConfig)
+    blockLocations.getOrElse(segmentLocations)
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ActorReceiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ActorReceiver.scala
index 743be58950c09..1868a1ebc7b4a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ActorReceiver.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ActorReceiver.scala
@@ -68,13 +68,13 @@ object ActorSupervisorStrategy {
  *       should be same.
  */
 @DeveloperApi
-trait ActorHelper {
+trait ActorHelper extends Logging{
 
   self: Actor => // to ensure that this can be added to Actor classes only
 
   /** Store an iterator of received data as a data block into Spark's memory. */
   def store[T](iter: Iterator[T]) {
-    println("Storing iterator")
+    logDebug("Storing iterator")
     context.parent ! IteratorData(iter)
   }
 
@@ -84,6 +84,7 @@ trait ActorHelper {
    * that Spark is configured to use.
    */
   def store(bytes: ByteBuffer) {
+    logDebug("Storing Bytes")
     context.parent ! ByteBufferData(bytes)
   }
 
@@ -93,7 +94,7 @@ trait ActorHelper {
    * being pushed into Spark's memory.
    */
   def store[T](item: T) {
-    println("Storing item")
+    logDebug("Storing item")
     context.parent ! SingleItemData(item)
   }
 }
@@ -157,15 +158,16 @@ private[streaming] class ActorReceiver[T: ClassTag](
     def receive = {
 
       case IteratorData(iterator) =>
-        println("received iterator")
+        logDebug("received iterator")
         store(iterator.asInstanceOf[Iterator[T]])
 
       case SingleItemData(msg) =>
-        println("received single")
+        logDebug("received single")
         store(msg.asInstanceOf[T])
         n.incrementAndGet
 
       case ByteBufferData(bytes) =>
+        logDebug("received bytes")
         store(bytes)
 
       case props: Props =>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
index 78cc2daa56e53..55765dc90698b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
@@ -27,9 +27,38 @@ import org.apache.spark.streaming.util.{RecurringTimer, SystemClock}
 
 /** Listener object for BlockGenerator events */
 private[streaming] trait BlockGeneratorListener {
-  /** Called when a new block needs to be pushed */
+  /**
+   * Called after a data item is added into the BlockGenerator. The data addition and this
+   * callback are synchronized with the block generation and its associated callback,
+   * so block generation waits for the active data addition+callback to complete. This is useful
+   * for updating metadata on successful buffering of a data item, specifically that metadata
+   * that will be useful when a block is generated. Any long blocking operation in this callback
+   * will hurt the throughput.
+   */
+  def onAddData(data: Any, metadata: Any)
+
+  /**
+   * Called when a new block of data is generated by the block generator. The block generation
+   * and this callback are synchronized with the data addition and its associated callback, so
+   * the data addition waits for the block generation+callback to complete. This is useful
+   * for updating metadata when a block has been generated, specifically metadata that will
+   * be useful when the block has been successfully stored. Any long blocking operation in this
+   * callback will hurt the throughput.
+   */
+  def onGenerateBlock(blockId: StreamBlockId)
+
+  /**
+   * Called when a new block is ready to be pushed. Callers are supposed to store the block into
+   * Spark in this method. Internally this is called from a single
+   * thread, that is not synchronized with any other callbacks. Hence it is okay to do long
+   * blocking operation in this callback.
+   */
   def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_])
-  /** Called when an error has occurred in BlockGenerator */
+
+  /**
+   * Called when an error has occurred in the BlockGenerator. Can be called form many places
+   * so better to not do any long block operation in this callback.
+   */
   def onError(message: String, throwable: Throwable)
 }
 
@@ -44,7 +73,7 @@ private[streaming] class BlockGenerator(
     listener: BlockGeneratorListener,
     receiverId: Int,
     conf: SparkConf
-  ) extends Logging {
+  ) extends RateLimiter(conf) with Logging {
 
   private case class Block(id: StreamBlockId, buffer: ArrayBuffer[Any])
 
@@ -80,8 +109,20 @@ private[streaming] class BlockGenerator(
    * Push a single data item into the buffer. All received data items
    * will be periodically pushed into BlockManager.
    */
-  def += (data: Any): Unit = synchronized {
+  def addData (data: Any): Unit = synchronized {
+    waitToPush()
+    currentBuffer += data
+  }
+
+  /**
+   * Push a single data item into the buffer. After buffering the data, the
+   * `BlockGeneratorListnere.onAddData` callback will be called. All received data items
+   * will be periodically pushed into BlockManager.
+   */
+  def addDataWithCallback(data: Any, metadata: Any) = synchronized {
+    waitToPush()
     currentBuffer += data
+    listener.onAddData(data, metadata)
   }
 
   /** Change the buffer to which single records are added to. */
@@ -92,14 +133,15 @@ private[streaming] class BlockGenerator(
       if (newBlockBuffer.size > 0) {
         val blockId = StreamBlockId(receiverId, time - blockInterval)
         val newBlock = new Block(blockId, newBlockBuffer)
+        listener.onGenerateBlock(blockId)
         blocksForPushing.put(newBlock)  // put is blocking when queue is full
         logDebug("Last element in " + blockId + " is " + newBlockBuffer.last)
       }
     } catch {
       case ie: InterruptedException =>
         logInfo("Block updating timer thread was interrupted")
-      case t: Throwable =>
-        reportError("Error in block updating thread", t)
+      case e: Exception =>
+        reportError("Error in block updating thread", e)
     }
   }
 
@@ -125,8 +167,8 @@ private[streaming] class BlockGenerator(
     } catch {
       case ie: InterruptedException =>
         logInfo("Block pushing thread was interrupted")
-      case t: Throwable =>
-        reportError("Error in block pushing thread", t)
+      case e: Exception =>
+        reportError("Error in block pushing thread", e)
     }
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala
new file mode 100644
index 0000000000000..e4f6ba626ebbf
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.receiver
+
+import org.apache.spark.{Logging, SparkConf}
+import java.util.concurrent.TimeUnit._
+
+/** Provides waitToPush() method to limit the rate at which receivers consume data.
+  *
+  * waitToPush method will block the thread if too many messages have been pushed too quickly,
+  * and only return when a new message has been pushed. It assumes that only one message is
+  * pushed at a time.
+  *
+  * The spark configuration spark.streaming.receiver.maxRate gives the maximum number of messages
+  * per second that each receiver will accept.
+  *
+  * @param conf spark configuration
+  */
+private[receiver] abstract class RateLimiter(conf: SparkConf) extends Logging {
+
+  private var lastSyncTime = System.nanoTime
+  private var messagesWrittenSinceSync = 0L
+  private val desiredRate = conf.getInt("spark.streaming.receiver.maxRate", 0)
+  private val SYNC_INTERVAL = NANOSECONDS.convert(10, SECONDS)
+
+  def waitToPush() {
+    if( desiredRate <= 0 ) {
+      return
+    }
+    val now = System.nanoTime
+    val elapsedNanosecs = math.max(now - lastSyncTime, 1)
+    val rate = messagesWrittenSinceSync.toDouble * 1000000000 / elapsedNanosecs
+    if (rate < desiredRate) {
+      // It's okay to write; just update some variables and return
+      messagesWrittenSinceSync += 1
+      if (now > lastSyncTime + SYNC_INTERVAL) {
+        // Sync interval has passed; let's resync
+        lastSyncTime = now
+        messagesWrittenSinceSync = 1
+      }
+    } else {
+      // Calculate how much time we should sleep to bring ourselves to the desired rate.
+      val targetTimeInMillis = messagesWrittenSinceSync * 1000 / desiredRate
+      val elapsedTimeInMillis = elapsedNanosecs / 1000000
+      val sleepTimeInMillis = targetTimeInMillis - elapsedTimeInMillis
+      if (sleepTimeInMillis > 0) {
+        logTrace("Natural rate is " + rate + " per second but desired rate is " +
+          desiredRate + ", sleeping for " + sleepTimeInMillis + " ms to compensate.")
+        Thread.sleep(sleepTimeInMillis)
+      }
+      waitToPush()
+    }
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
new file mode 100644
index 0000000000000..47968afef2dbf
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.receiver
+
+import java.nio.ByteBuffer
+
+import scala.collection.mutable.ArrayBuffer
+import scala.language.existentials
+
+/** Trait representing a received block */
+private[streaming] sealed trait ReceivedBlock
+
+/** class representing a block received as an ArrayBuffer */
+private[streaming] case class ArrayBufferBlock(arrayBuffer: ArrayBuffer[_]) extends ReceivedBlock
+
+/** class representing a block received as an Iterator */
+private[streaming] case class IteratorBlock(iterator: Iterator[_]) extends ReceivedBlock
+
+/** class representing a block received as an ByteBuffer */
+private[streaming] case class ByteBufferBlock(byteBuffer: ByteBuffer) extends ReceivedBlock
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
new file mode 100644
index 0000000000000..fdf995320beb4
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.receiver
+
+import scala.concurrent.{Await, ExecutionContext, Future}
+import scala.concurrent.duration._
+import scala.language.{existentials, postfixOps}
+
+import WriteAheadLogBasedBlockHandler._
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.{Logging, SparkConf, SparkException}
+import org.apache.spark.storage._
+import org.apache.spark.streaming.util.{Clock, SystemClock, WriteAheadLogFileSegment, WriteAheadLogManager}
+import org.apache.spark.util.Utils
+
+/** Trait that represents the metadata related to storage of blocks */
+private[streaming] trait ReceivedBlockStoreResult {
+  def blockId: StreamBlockId  // Any implementation of this trait will store a block id
+}
+
+/** Trait that represents a class that handles the storage of blocks received by receiver */
+private[streaming] trait ReceivedBlockHandler {
+
+  /** Store a received block with the given block id and return related metadata */
+  def storeBlock(blockId: StreamBlockId, receivedBlock: ReceivedBlock): ReceivedBlockStoreResult
+
+  /** Cleanup old blocks older than the given threshold time */
+  def cleanupOldBlock(threshTime: Long)
+}
+
+
+/**
+ * Implementation of [[org.apache.spark.streaming.receiver.ReceivedBlockStoreResult]]
+ * that stores the metadata related to storage of blocks using
+ * [[org.apache.spark.streaming.receiver.BlockManagerBasedBlockHandler]]
+ */
+private[streaming] case class BlockManagerBasedStoreResult(blockId: StreamBlockId)
+  extends ReceivedBlockStoreResult
+
+
+/**
+ * Implementation of a [[org.apache.spark.streaming.receiver.ReceivedBlockHandler]] which
+ * stores the received blocks into a block manager with the specified storage level.
+ */
+private[streaming] class BlockManagerBasedBlockHandler(
+    blockManager: BlockManager, storageLevel: StorageLevel)
+  extends ReceivedBlockHandler with Logging {
+  
+  def storeBlock(blockId: StreamBlockId, block: ReceivedBlock): ReceivedBlockStoreResult = {
+    val putResult: Seq[(BlockId, BlockStatus)] = block match {
+      case ArrayBufferBlock(arrayBuffer) =>
+        blockManager.putIterator(blockId, arrayBuffer.iterator, storageLevel, tellMaster = true)
+      case IteratorBlock(iterator) =>
+        blockManager.putIterator(blockId, iterator, storageLevel, tellMaster = true)
+      case ByteBufferBlock(byteBuffer) =>
+        blockManager.putBytes(blockId, byteBuffer, storageLevel, tellMaster = true)
+      case o =>
+        throw new SparkException(
+          s"Could not store $blockId to block manager, unexpected block type ${o.getClass.getName}")
+    }
+    if (!putResult.map { _._1 }.contains(blockId)) {
+      throw new SparkException(
+        s"Could not store $blockId to block manager with storage level $storageLevel")
+    }
+    BlockManagerBasedStoreResult(blockId)
+  }
+
+  def cleanupOldBlock(threshTime: Long) {
+    // this is not used as blocks inserted into the BlockManager are cleared by DStream's clearing
+    // of BlockRDDs.
+  }
+}
+
+
+/**
+ * Implementation of [[org.apache.spark.streaming.receiver.ReceivedBlockStoreResult]]
+ * that stores the metadata related to storage of blocks using
+ * [[org.apache.spark.streaming.receiver.WriteAheadLogBasedBlockHandler]]
+ */
+private[streaming] case class WriteAheadLogBasedStoreResult(
+    blockId: StreamBlockId,
+    segment: WriteAheadLogFileSegment
+  ) extends ReceivedBlockStoreResult
+
+
+/**
+ * Implementation of a [[org.apache.spark.streaming.receiver.ReceivedBlockHandler]] which
+ * stores the received blocks in both, a write ahead log and a block manager.
+ */
+private[streaming] class WriteAheadLogBasedBlockHandler(
+    blockManager: BlockManager,
+    streamId: Int,
+    storageLevel: StorageLevel,
+    conf: SparkConf,
+    hadoopConf: Configuration,
+    checkpointDir: String,
+    clock: Clock = new SystemClock
+  ) extends ReceivedBlockHandler with Logging {
+
+  private val blockStoreTimeout = conf.getInt(
+    "spark.streaming.receiver.blockStoreTimeout", 30).seconds
+  private val rollingInterval = conf.getInt(
+    "spark.streaming.receiver.writeAheadLog.rollingInterval", 60)
+  private val maxFailures = conf.getInt(
+    "spark.streaming.receiver.writeAheadLog.maxFailures", 3)
+
+  // Manages rolling log files
+  private val logManager = new WriteAheadLogManager(
+    checkpointDirToLogDir(checkpointDir, streamId),
+    hadoopConf, rollingInterval, maxFailures,
+    callerName = this.getClass.getSimpleName,
+    clock = clock
+  )
+
+  // For processing futures used in parallel block storing into block manager and write ahead log
+  // # threads = 2, so that both writing to BM and WAL can proceed in parallel
+  implicit private val executionContext = ExecutionContext.fromExecutorService(
+    Utils.newDaemonFixedThreadPool(2, this.getClass.getSimpleName))
+
+  /**
+   * This implementation stores the block into the block manager as well as a write ahead log.
+   * It does this in parallel, using Scala Futures, and returns only after the block has
+   * been stored in both places.
+   */
+  def storeBlock(blockId: StreamBlockId, block: ReceivedBlock): ReceivedBlockStoreResult = {
+
+    // Serialize the block so that it can be inserted into both
+    val serializedBlock = block match {
+      case ArrayBufferBlock(arrayBuffer) =>
+        blockManager.dataSerialize(blockId, arrayBuffer.iterator)
+      case IteratorBlock(iterator) =>
+        blockManager.dataSerialize(blockId, iterator)
+      case ByteBufferBlock(byteBuffer) =>
+        byteBuffer
+      case _ =>
+        throw new Exception(s"Could not push $blockId to block manager, unexpected block type")
+    }
+
+    // Store the block in block manager
+    val storeInBlockManagerFuture = Future {
+      val putResult =
+        blockManager.putBytes(blockId, serializedBlock, storageLevel, tellMaster = true)
+      if (!putResult.map { _._1 }.contains(blockId)) {
+        throw new SparkException(
+          s"Could not store $blockId to block manager with storage level $storageLevel")
+      }
+    }
+
+    // Store the block in write ahead log
+    val storeInWriteAheadLogFuture = Future {
+      logManager.writeToLog(serializedBlock)
+    }
+
+    // Combine the futures, wait for both to complete, and return the write ahead log segment
+    val combinedFuture = for {
+      _ <- storeInBlockManagerFuture
+      fileSegment <- storeInWriteAheadLogFuture
+    } yield fileSegment
+    val segment = Await.result(combinedFuture, blockStoreTimeout)
+    WriteAheadLogBasedStoreResult(blockId, segment)
+  }
+
+  def cleanupOldBlock(threshTime: Long) {
+    logManager.cleanupOldLogs(threshTime)
+  }
+
+  def stop() {
+    logManager.stop()
+  }
+}
+
+private[streaming] object WriteAheadLogBasedBlockHandler {
+  def checkpointDirToLogDir(checkpointDir: String, streamId: Int): String = {
+    new Path(checkpointDir, new Path("receivedData", streamId.toString)).toString
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
index 09be3a50d2dfa..1f0244c251eba 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
@@ -138,7 +138,7 @@ private[streaming] abstract class ReceiverSupervisor(
       onReceiverStop(message, error)
     } catch {
       case t: Throwable =>
-        stop("Error stopping receiver " + streamId, Some(t))
+        logError("Error stopping receiver " + streamId + t.getStackTraceString)
     }
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
index ce8316bb14891..3b1233e86c210 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
@@ -20,22 +20,18 @@ package org.apache.spark.streaming.receiver
 import java.nio.ByteBuffer
 import java.util.concurrent.atomic.AtomicLong
 
-import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
+import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.Await
 
 import akka.actor.{Actor, Props}
 import akka.pattern.ask
+import com.google.common.base.Throwables
+import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.{Logging, SparkEnv}
+import org.apache.spark.{Logging, SparkEnv, SparkException}
 import org.apache.spark.storage.StreamBlockId
 import org.apache.spark.streaming.scheduler._
-import org.apache.spark.util.{Utils, AkkaUtils}
-import org.apache.spark.storage.StreamBlockId
-import org.apache.spark.streaming.scheduler.DeregisterReceiver
-import org.apache.spark.streaming.scheduler.AddBlock
-import scala.Some
-import org.apache.spark.streaming.scheduler.RegisterReceiver
-import com.google.common.base.Throwables
+import org.apache.spark.util.{AkkaUtils, Utils}
 
 /**
  * Concrete implementation of [[org.apache.spark.streaming.receiver.ReceiverSupervisor]]
@@ -45,18 +41,33 @@ import com.google.common.base.Throwables
  */
 private[streaming] class ReceiverSupervisorImpl(
     receiver: Receiver[_],
-    env: SparkEnv
+    env: SparkEnv,
+    hadoopConf: Configuration,
+    checkpointDirOption: Option[String]
   ) extends ReceiverSupervisor(receiver, env.conf) with Logging {
 
-  private val blockManager = env.blockManager
+  private val receivedBlockHandler: ReceivedBlockHandler = {
+    if (env.conf.getBoolean("spark.streaming.receiver.writeAheadLog.enable", false)) {
+      if (checkpointDirOption.isEmpty) {
+        throw new SparkException(
+          "Cannot enable receiver write-ahead log without checkpoint directory set. " +
+            "Please use streamingContext.checkpoint() to set the checkpoint directory. " +
+            "See documentation for more details.")
+      }
+      new WriteAheadLogBasedBlockHandler(env.blockManager, receiver.streamId,
+        receiver.storageLevel, env.conf, hadoopConf, checkpointDirOption.get)
+    } else {
+      new BlockManagerBasedBlockHandler(env.blockManager, receiver.storageLevel)
+    }
+  }
 
-  private val storageLevel = receiver.storageLevel
 
   /** Remote Akka actor for the ReceiverTracker */
   private val trackerActor = {
     val ip = env.conf.get("spark.driver.host", "localhost")
     val port = env.conf.getInt("spark.driver.port", 7077)
-    val url = "akka.tcp://spark@%s:%s/user/ReceiverTracker".format(ip, port)
+    val url = "akka.tcp://%s@%s:%s/user/ReceiverTracker".format(
+      SparkEnv.driverActorSystemName, ip, port)
     env.actorSystem.actorSelection(url)
   }
 
@@ -88,6 +99,10 @@ private[streaming] class ReceiverSupervisorImpl(
 
   /** Divides received data records into data blocks for pushing in BlockManager. */
   private val blockGenerator = new BlockGenerator(new BlockGeneratorListener {
+    def onAddData(data: Any, metadata: Any): Unit = { }
+
+    def onGenerateBlock(blockId: StreamBlockId): Unit = { }
+
     def onError(message: String, throwable: Throwable) {
       reportError(message, throwable)
     }
@@ -99,54 +114,56 @@ private[streaming] class ReceiverSupervisorImpl(
 
   /** Push a single record of received data into block generator. */
   def pushSingle(data: Any) {
-    blockGenerator += (data)
+    blockGenerator.addData(data)
   }
 
   /** Store an ArrayBuffer of received data as a data block into Spark's memory. */
   def pushArrayBuffer(
       arrayBuffer: ArrayBuffer[_],
-      optionalMetadata: Option[Any],
-      optionalBlockId: Option[StreamBlockId]
+      metadataOption: Option[Any],
+      blockIdOption: Option[StreamBlockId]
     ) {
-    val blockId = optionalBlockId.getOrElse(nextBlockId)
-    val time = System.currentTimeMillis
-    blockManager.put(blockId, arrayBuffer.asInstanceOf[ArrayBuffer[Any]],
-      storageLevel, tellMaster = true)
-    logDebug("Pushed block " + blockId + " in " + (System.currentTimeMillis - time)  + " ms")
-    reportPushedBlock(blockId, arrayBuffer.size, optionalMetadata)
+    pushAndReportBlock(ArrayBufferBlock(arrayBuffer), metadataOption, blockIdOption)
   }
 
   /** Store a iterator of received data as a data block into Spark's memory. */
   def pushIterator(
       iterator: Iterator[_],
-      optionalMetadata: Option[Any],
-      optionalBlockId: Option[StreamBlockId]
+      metadataOption: Option[Any],
+      blockIdOption: Option[StreamBlockId]
     ) {
-    val blockId = optionalBlockId.getOrElse(nextBlockId)
-    val time = System.currentTimeMillis
-    blockManager.put(blockId, iterator, storageLevel, tellMaster = true)
-    logDebug("Pushed block " + blockId + " in " + (System.currentTimeMillis - time)  + " ms")
-    reportPushedBlock(blockId, -1, optionalMetadata)
+    pushAndReportBlock(IteratorBlock(iterator), metadataOption, blockIdOption)
   }
 
   /** Store the bytes of received data as a data block into Spark's memory. */
   def pushBytes(
       bytes: ByteBuffer,
-      optionalMetadata: Option[Any],
-      optionalBlockId: Option[StreamBlockId]
+      metadataOption: Option[Any],
+      blockIdOption: Option[StreamBlockId]
     ) {
-    val blockId = optionalBlockId.getOrElse(nextBlockId)
-    val time = System.currentTimeMillis
-    blockManager.putBytes(blockId, bytes, storageLevel, tellMaster = true)
-    logDebug("Pushed block " + blockId + " in " + (System.currentTimeMillis - time)  + " ms")
-    reportPushedBlock(blockId, -1, optionalMetadata)
+    pushAndReportBlock(ByteBufferBlock(bytes), metadataOption, blockIdOption)
   }
 
-  /** Report pushed block */
-  def reportPushedBlock(blockId: StreamBlockId, numRecords: Long, optionalMetadata: Option[Any]) {
-    val blockInfo = ReceivedBlockInfo(streamId, blockId, numRecords, optionalMetadata.orNull)
-    trackerActor ! AddBlock(blockInfo)
-    logDebug("Reported block " + blockId)
+  /** Store block and report it to driver */
+  def pushAndReportBlock(
+      receivedBlock: ReceivedBlock,
+      metadataOption: Option[Any],
+      blockIdOption: Option[StreamBlockId]
+    ) {
+    val blockId = blockIdOption.getOrElse(nextBlockId)
+    val numRecords = receivedBlock match {
+      case ArrayBufferBlock(arrayBuffer) => arrayBuffer.size
+      case _ => -1
+    }
+
+    val time = System.currentTimeMillis
+    val blockStoreResult = receivedBlockHandler.storeBlock(blockId, receivedBlock)
+    logDebug(s"Pushed block $blockId in ${(System.currentTimeMillis - time)} ms")
+
+    val blockInfo = ReceivedBlockInfo(streamId, numRecords, blockStoreResult)
+    val future = trackerActor.ask(AddBlock(blockInfo))(askTimeout)
+    Await.result(future, askTimeout)
+    logDebug(s"Reported block $blockId")
   }
 
   /** Report error to the receiver tracker */
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
index a68aecb881117..92dc113f397ca 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.streaming.scheduler
 
-import org.apache.spark.streaming.Time
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.streaming.Time
 
 /**
  * :: DeveloperApi ::
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
index 374848358e700..39b66e1130768 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -112,7 +112,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
       // Wait until all the received blocks in the network input tracker has
       // been consumed by network input DStreams, and jobs have been generated with them
       logInfo("Waiting for all received blocks to be consumed for job generation")
-      while(!hasTimedOut && jobScheduler.receiverTracker.hasMoreReceivedBlockIds) {
+      while(!hasTimedOut && jobScheduler.receiverTracker.hasUnallocatedBlocks) {
         Thread.sleep(pollTime)
       }
       logInfo("Waited for all received blocks to be consumed for job generation")
@@ -217,15 +217,18 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
 
   /** Generate jobs and perform checkpoint for the given `time`.  */
   private def generateJobs(time: Time) {
+    // Set the SparkEnv in this thread, so that job generation code can access the environment
+    // Example: BlockRDDs are created in this thread, and it needs to access BlockManager
+    // Update: This is probably redundant after threadlocal stuff in SparkEnv has been removed.
     SparkEnv.set(ssc.env)
-    Try(graph.generateJobs(time)) match {
+    Try {
+      jobScheduler.receiverTracker.allocateBlocksToBatch(time) // allocate received blocks to batch
+      graph.generateJobs(time) // generate jobs using allocated block
+    } match {
       case Success(jobs) =>
-        val receivedBlockInfo = graph.getReceiverInputStreams.map { stream =>
-          val streamId = stream.id
-          val receivedBlockInfo = stream.getReceivedBlockInfo(time)
-          (streamId, receivedBlockInfo)
-        }.toMap
-        jobScheduler.submitJobSet(JobSet(time, jobs, receivedBlockInfo))
+        val receivedBlockInfos =
+          jobScheduler.receiverTracker.getBlocksOfBatch(time).mapValues { _.toArray }
+        jobScheduler.submitJobSet(JobSet(time, jobs, receivedBlockInfos))
       case Failure(e) =>
         jobScheduler.reportError("Error generating jobs for time " + time, e)
     }
@@ -235,6 +238,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   /** Clear DStream metadata for the given `time`. */
   private def clearMetadata(time: Time) {
     ssc.graph.clearMetadata(time)
+    jobScheduler.receiverTracker.cleanupOldMetadata(time - graph.batchDuration)
 
     // If checkpointing is enabled, then checkpoint,
     // else mark batch to be fully processed
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
index 1b034b9fb187c..cfa3cd8925c80 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
@@ -138,7 +138,6 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     }
     jobSet.handleJobStart(job)
     logInfo("Starting job " + job.id + " from job set of time " + jobSet.time)
-    SparkEnv.set(ssc.env)
   }
 
   private def handleJobCompletion(job: Job) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
index a69d74362173e..8c15a75b1b0e0 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.streaming.scheduler
 
-import scala.collection.mutable.{ArrayBuffer, HashSet}
+import scala.collection.mutable.HashSet
+
 import org.apache.spark.streaming.Time
 
 /** Class representing a set of Jobs
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala
new file mode 100644
index 0000000000000..94beb590f52d6
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.scheduler
+
+import org.apache.spark.streaming.receiver.ReceivedBlockStoreResult
+
+/** Information about blocks received by the receiver */
+private[streaming] case class ReceivedBlockInfo(
+    streamId: Int,
+    numRecords: Long,
+    blockStoreResult: ReceivedBlockStoreResult
+  )
+
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
new file mode 100644
index 0000000000000..5f5e1909908d5
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.scheduler
+
+import java.nio.ByteBuffer
+
+import scala.collection.mutable
+import scala.language.implicitConversions
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.{SparkException, Logging, SparkConf}
+import org.apache.spark.streaming.Time
+import org.apache.spark.streaming.util.{Clock, WriteAheadLogManager}
+import org.apache.spark.util.Utils
+
+/** Trait representing any event in the ReceivedBlockTracker that updates its state. */
+private[streaming] sealed trait ReceivedBlockTrackerLogEvent
+
+private[streaming] case class BlockAdditionEvent(receivedBlockInfo: ReceivedBlockInfo)
+  extends ReceivedBlockTrackerLogEvent
+private[streaming] case class BatchAllocationEvent(time: Time, allocatedBlocks: AllocatedBlocks)
+  extends ReceivedBlockTrackerLogEvent
+private[streaming] case class BatchCleanupEvent(times: Seq[Time])
+  extends ReceivedBlockTrackerLogEvent
+
+
+/** Class representing the blocks of all the streams allocated to a batch */
+private[streaming]
+case class AllocatedBlocks(streamIdToAllocatedBlocks: Map[Int, Seq[ReceivedBlockInfo]]) {
+  def getBlocksOfStream(streamId: Int): Seq[ReceivedBlockInfo] = {
+    streamIdToAllocatedBlocks.get(streamId).getOrElse(Seq.empty)
+  }
+}
+
+/**
+ * Class that keep track of all the received blocks, and allocate them to batches
+ * when required. All actions taken by this class can be saved to a write ahead log
+ * (if a checkpoint directory has been provided), so that the state of the tracker
+ * (received blocks and block-to-batch allocations) can be recovered after driver failure.
+ *
+ * Note that when any instance of this class is created with a checkpoint directory,
+ * it will try reading events from logs in the directory.
+ */
+private[streaming] class ReceivedBlockTracker(
+    conf: SparkConf,
+    hadoopConf: Configuration,
+    streamIds: Seq[Int],
+    clock: Clock,
+    checkpointDirOption: Option[String])
+  extends Logging {
+
+  private type ReceivedBlockQueue = mutable.Queue[ReceivedBlockInfo]
+  
+  private val streamIdToUnallocatedBlockQueues = new mutable.HashMap[Int, ReceivedBlockQueue]
+  private val timeToAllocatedBlocks = new mutable.HashMap[Time, AllocatedBlocks]
+
+  private val logManagerRollingIntervalSecs = conf.getInt(
+    "spark.streaming.receivedBlockTracker.writeAheadLog.rotationIntervalSecs", 60)
+  private val logManagerOption = checkpointDirOption.map { checkpointDir =>
+    new WriteAheadLogManager(
+      ReceivedBlockTracker.checkpointDirToLogDir(checkpointDir),
+      hadoopConf,
+      rollingIntervalSecs = logManagerRollingIntervalSecs,
+      callerName = "ReceivedBlockHandlerMaster",
+      clock = clock
+    )
+  }
+
+  private var lastAllocatedBatchTime: Time = null
+
+  // Recover block information from write ahead logs
+  recoverFromWriteAheadLogs()
+
+  /** Add received block. This event will get written to the write ahead log (if enabled). */
+  def addBlock(receivedBlockInfo: ReceivedBlockInfo): Boolean = synchronized {
+    try {
+      writeToLog(BlockAdditionEvent(receivedBlockInfo))
+      getReceivedBlockQueue(receivedBlockInfo.streamId) += receivedBlockInfo
+      logDebug(s"Stream ${receivedBlockInfo.streamId} received " +
+        s"block ${receivedBlockInfo.blockStoreResult.blockId}")
+      true
+    } catch {
+      case e: Exception =>
+        logError(s"Error adding block $receivedBlockInfo", e)
+        false
+    }
+  }
+
+  /**
+   * Allocate all unallocated blocks to the given batch.
+   * This event will get written to the write ahead log (if enabled).
+   */
+  def allocateBlocksToBatch(batchTime: Time): Unit = synchronized {
+    if (lastAllocatedBatchTime == null || batchTime > lastAllocatedBatchTime) {
+      val streamIdToBlocks = streamIds.map { streamId =>
+          (streamId, getReceivedBlockQueue(streamId).dequeueAll(x => true))
+      }.toMap
+      val allocatedBlocks = AllocatedBlocks(streamIdToBlocks)
+      writeToLog(BatchAllocationEvent(batchTime, allocatedBlocks))
+      timeToAllocatedBlocks(batchTime) = allocatedBlocks
+      lastAllocatedBatchTime = batchTime
+      allocatedBlocks
+    } else {
+      throw new SparkException(s"Unexpected allocation of blocks, " +
+        s"last batch = $lastAllocatedBatchTime, batch time to allocate = $batchTime  ")
+    }
+  }
+
+  /** Get the blocks allocated to the given batch. */
+  def getBlocksOfBatch(batchTime: Time): Map[Int, Seq[ReceivedBlockInfo]] = synchronized {
+    timeToAllocatedBlocks.get(batchTime).map { _.streamIdToAllocatedBlocks }.getOrElse(Map.empty)
+  }
+
+  /** Get the blocks allocated to the given batch and stream. */
+  def getBlocksOfBatchAndStream(batchTime: Time, streamId: Int): Seq[ReceivedBlockInfo] = {
+    synchronized {
+      timeToAllocatedBlocks.get(batchTime).map {
+        _.getBlocksOfStream(streamId)
+      }.getOrElse(Seq.empty)
+    }
+  }
+
+  /** Check if any blocks are left to be allocated to batches. */
+  def hasUnallocatedReceivedBlocks: Boolean = synchronized {
+    !streamIdToUnallocatedBlockQueues.values.forall(_.isEmpty)
+  }
+
+  /**
+   * Get blocks that have been added but not yet allocated to any batch. This method
+   * is primarily used for testing.
+   */
+  def getUnallocatedBlocks(streamId: Int): Seq[ReceivedBlockInfo] = synchronized {
+    getReceivedBlockQueue(streamId).toSeq
+  }
+
+  /** Clean up block information of old batches. */
+  def cleanupOldBatches(cleanupThreshTime: Time): Unit = synchronized {
+    assert(cleanupThreshTime.milliseconds < clock.currentTime())
+    val timesToCleanup = timeToAllocatedBlocks.keys.filter { _ < cleanupThreshTime }.toSeq
+    logInfo("Deleting batches " + timesToCleanup)
+    writeToLog(BatchCleanupEvent(timesToCleanup))
+    timeToAllocatedBlocks --= timesToCleanup
+    logManagerOption.foreach(_.cleanupOldLogs(cleanupThreshTime.milliseconds))
+    log
+  }
+
+  /** Stop the block tracker. */
+  def stop() {
+    logManagerOption.foreach { _.stop() }
+  }
+
+  /**
+   * Recover all the tracker actions from the write ahead logs to recover the state (unallocated
+   * and allocated block info) prior to failure.
+   */
+  private def recoverFromWriteAheadLogs(): Unit = synchronized {
+    // Insert the recovered block information
+    def insertAddedBlock(receivedBlockInfo: ReceivedBlockInfo) {
+      logTrace(s"Recovery: Inserting added block $receivedBlockInfo")
+      getReceivedBlockQueue(receivedBlockInfo.streamId) += receivedBlockInfo
+    }
+
+    // Insert the recovered block-to-batch allocations and clear the queue of received blocks
+    // (when the blocks were originally allocated to the batch, the queue must have been cleared).
+    def insertAllocatedBatch(batchTime: Time, allocatedBlocks: AllocatedBlocks) {
+      logTrace(s"Recovery: Inserting allocated batch for time $batchTime to " +
+        s"${allocatedBlocks.streamIdToAllocatedBlocks}")
+      streamIdToUnallocatedBlockQueues.values.foreach { _.clear() }
+      lastAllocatedBatchTime = batchTime
+      timeToAllocatedBlocks.put(batchTime, allocatedBlocks)
+    }
+
+    // Cleanup the batch allocations
+    def cleanupBatches(batchTimes: Seq[Time]) {
+      logTrace(s"Recovery: Cleaning up batches $batchTimes")
+      timeToAllocatedBlocks --= batchTimes
+    }
+
+    logManagerOption.foreach { logManager =>
+      logInfo(s"Recovering from write ahead logs in ${checkpointDirOption.get}")
+      logManager.readFromLog().foreach { byteBuffer =>
+        logTrace("Recovering record " + byteBuffer)
+        Utils.deserialize[ReceivedBlockTrackerLogEvent](byteBuffer.array) match {
+          case BlockAdditionEvent(receivedBlockInfo) =>
+            insertAddedBlock(receivedBlockInfo)
+          case BatchAllocationEvent(time, allocatedBlocks) =>
+            insertAllocatedBatch(time, allocatedBlocks)
+          case BatchCleanupEvent(batchTimes) =>
+            cleanupBatches(batchTimes)
+        }
+      }
+    }
+  }
+
+  /** Write an update to the tracker to the write ahead log */
+  private def writeToLog(record: ReceivedBlockTrackerLogEvent) {
+    logDebug(s"Writing to log $record")
+    logManagerOption.foreach { logManager =>
+        logManager.writeToLog(ByteBuffer.wrap(Utils.serialize(record)))
+    }
+  }
+
+  /** Get the queue of received blocks belonging to a particular stream */
+  private def getReceivedBlockQueue(streamId: Int): ReceivedBlockQueue = {
+    streamIdToUnallocatedBlockQueues.getOrElseUpdate(streamId, new ReceivedBlockQueue)
+  }
+}
+
+private[streaming] object ReceivedBlockTracker {
+  def checkpointDirToLogDir(checkpointDir: String): String = {
+    new Path(checkpointDir, "receivedBlockMetadata").toString
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
index 5307fe189d717..1c3984d968d20 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
@@ -17,24 +17,16 @@
 
 package org.apache.spark.streaming.scheduler
 
-import scala.collection.mutable.{HashMap, SynchronizedMap, SynchronizedQueue}
+
+import scala.collection.mutable.{HashMap, SynchronizedMap}
 import scala.language.existentials
 
 import akka.actor._
-import org.apache.spark.{Logging, SparkEnv, SparkException}
+
+import org.apache.spark.{Logging, SerializableWritable, SparkEnv, SparkException}
 import org.apache.spark.SparkContext._
-import org.apache.spark.storage.StreamBlockId
 import org.apache.spark.streaming.{StreamingContext, Time}
 import org.apache.spark.streaming.receiver.{Receiver, ReceiverSupervisorImpl, StopReceiver}
-import org.apache.spark.util.AkkaUtils
-
-/** Information about blocks received by the receiver */
-private[streaming] case class ReceivedBlockInfo(
-    streamId: Int,
-    blockId: StreamBlockId,
-    numRecords: Long,
-    metadata: Any
-  )
 
 /**
  * Messages used by the NetworkReceiver and the ReceiverTracker to communicate
@@ -57,23 +49,28 @@ private[streaming] case class DeregisterReceiver(streamId: Int, msg: String, err
  * This class manages the execution of the receivers of NetworkInputDStreams. Instance of
  * this class must be created after all input streams have been added and StreamingContext.start()
  * has been called because it needs the final set of input streams at the time of instantiation.
+ *
+ * @param skipReceiverLaunch Do not launch the receiver. This is useful for testing.
  */
 private[streaming]
-class ReceiverTracker(ssc: StreamingContext) extends Logging {
+class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false) extends Logging {
 
-  val receiverInputStreams = ssc.graph.getReceiverInputStreams()
-  val receiverInputStreamMap = Map(receiverInputStreams.map(x => (x.id, x)): _*)
-  val receiverExecutor = new ReceiverLauncher()
-  val receiverInfo = new HashMap[Int, ReceiverInfo] with SynchronizedMap[Int, ReceiverInfo]
-  val receivedBlockInfo = new HashMap[Int, SynchronizedQueue[ReceivedBlockInfo]]
-    with SynchronizedMap[Int, SynchronizedQueue[ReceivedBlockInfo]]
-  val timeout = AkkaUtils.askTimeout(ssc.conf)
-  val listenerBus = ssc.scheduler.listenerBus
+  private val receiverInputStreams = ssc.graph.getReceiverInputStreams()
+  private val receiverInputStreamIds = receiverInputStreams.map { _.id }
+  private val receiverExecutor = new ReceiverLauncher()
+  private val receiverInfo = new HashMap[Int, ReceiverInfo] with SynchronizedMap[Int, ReceiverInfo]
+  private val receivedBlockTracker = new ReceivedBlockTracker(
+    ssc.sparkContext.conf,
+    ssc.sparkContext.hadoopConfiguration,
+    receiverInputStreamIds,
+    ssc.scheduler.clock,
+    Option(ssc.checkpointDir)
+  )
+  private val listenerBus = ssc.scheduler.listenerBus
 
   // actor is created when generator starts.
   // This not being null means the tracker has been started and not stopped
-  var actor: ActorRef = null
-  var currentTime: Time = null
+  private var actor: ActorRef = null
 
   /** Start the actor and receiver execution thread. */
   def start() = synchronized {
@@ -84,7 +81,7 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
     if (!receiverInputStreams.isEmpty) {
       actor = ssc.env.actorSystem.actorOf(Props(new ReceiverTrackerActor),
         "ReceiverTracker")
-      receiverExecutor.start()
+      if (!skipReceiverLaunch) receiverExecutor.start()
       logInfo("ReceiverTracker started")
     }
   }
@@ -93,45 +90,59 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
   def stop() = synchronized {
     if (!receiverInputStreams.isEmpty && actor != null) {
       // First, stop the receivers
-      receiverExecutor.stop()
+      if (!skipReceiverLaunch) receiverExecutor.stop()
 
       // Finally, stop the actor
       ssc.env.actorSystem.stop(actor)
       actor = null
+      receivedBlockTracker.stop()
       logInfo("ReceiverTracker stopped")
     }
   }
 
-  /** Return all the blocks received from a receiver. */
-  def getReceivedBlockInfo(streamId: Int): Array[ReceivedBlockInfo] = {
-    val receivedBlockInfo = getReceivedBlockInfoQueue(streamId).dequeueAll(x => true)
-    logInfo("Stream " + streamId + " received " + receivedBlockInfo.size + " blocks")
-    receivedBlockInfo.toArray
+  /** Allocate all unallocated blocks to the given batch. */
+  def allocateBlocksToBatch(batchTime: Time): Unit = {
+    if (receiverInputStreams.nonEmpty) {
+      receivedBlockTracker.allocateBlocksToBatch(batchTime)
+    }
+  }
+
+  /** Get the blocks for the given batch and all input streams. */
+  def getBlocksOfBatch(batchTime: Time): Map[Int, Seq[ReceivedBlockInfo]] = {
+    receivedBlockTracker.getBlocksOfBatch(batchTime)
   }
 
-  private def getReceivedBlockInfoQueue(streamId: Int) = {
-    receivedBlockInfo.getOrElseUpdate(streamId, new SynchronizedQueue[ReceivedBlockInfo])
+  /** Get the blocks allocated to the given batch and stream. */
+  def getBlocksOfBatchAndStream(batchTime: Time, streamId: Int): Seq[ReceivedBlockInfo] = {
+    synchronized {
+      receivedBlockTracker.getBlocksOfBatchAndStream(batchTime, streamId)
+    }
+  }
+
+    /** Clean up metadata older than the given threshold time */
+  def cleanupOldMetadata(cleanupThreshTime: Time) {
+    receivedBlockTracker.cleanupOldBatches(cleanupThreshTime)
   }
 
   /** Register a receiver */
-  def registerReceiver(
+  private def registerReceiver(
       streamId: Int,
       typ: String,
       host: String,
       receiverActor: ActorRef,
       sender: ActorRef
     ) {
-    if (!receiverInputStreamMap.contains(streamId)) {
-      throw new Exception("Register received for unexpected id " + streamId)
+    if (!receiverInputStreamIds.contains(streamId)) {
+      throw new SparkException("Register received for unexpected id " + streamId)
     }
     receiverInfo(streamId) = ReceiverInfo(
       streamId, s"${typ}-${streamId}", receiverActor, true, host)
-    ssc.scheduler.listenerBus.post(StreamingListenerReceiverStarted(receiverInfo(streamId)))
+    listenerBus.post(StreamingListenerReceiverStarted(receiverInfo(streamId)))
     logInfo("Registered receiver for stream " + streamId + " from " + sender.path.address)
   }
 
   /** Deregister a receiver */
-  def deregisterReceiver(streamId: Int, message: String, error: String) {
+  private def deregisterReceiver(streamId: Int, message: String, error: String) {
     val newReceiverInfo = receiverInfo.get(streamId) match {
       case Some(oldInfo) =>
         oldInfo.copy(actor = null, active = false, lastErrorMessage = message, lastError = error)
@@ -140,7 +151,7 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
         ReceiverInfo(streamId, "", null, false, "", lastErrorMessage = message, lastError = error)
     }
     receiverInfo(streamId) = newReceiverInfo
-    ssc.scheduler.listenerBus.post(StreamingListenerReceiverStopped(receiverInfo(streamId)))
+    listenerBus.post(StreamingListenerReceiverStopped(receiverInfo(streamId)))
     val messageWithError = if (error != null && !error.isEmpty) {
       s"$message - $error"
     } else {
@@ -150,14 +161,12 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
   }
 
   /** Add new blocks for the given stream */
-  def addBlocks(receivedBlockInfo: ReceivedBlockInfo) {
-    getReceivedBlockInfoQueue(receivedBlockInfo.streamId) += receivedBlockInfo
-    logDebug("Stream " + receivedBlockInfo.streamId + " received new blocks: " +
-      receivedBlockInfo.blockId)
+  private def addBlock(receivedBlockInfo: ReceivedBlockInfo): Boolean = {
+    receivedBlockTracker.addBlock(receivedBlockInfo)
   }
 
   /** Report error sent by a receiver */
-  def reportError(streamId: Int, message: String, error: String) {
+  private def reportError(streamId: Int, message: String, error: String) {
     val newReceiverInfo = receiverInfo.get(streamId) match {
       case Some(oldInfo) =>
         oldInfo.copy(lastErrorMessage = message, lastError = error)
@@ -166,7 +175,7 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
         ReceiverInfo(streamId, "", null, false, "", lastErrorMessage = message, lastError = error)
     }
     receiverInfo(streamId) = newReceiverInfo
-    ssc.scheduler.listenerBus.post(StreamingListenerReceiverError(receiverInfo(streamId)))
+    listenerBus.post(StreamingListenerReceiverError(receiverInfo(streamId)))
     val messageWithError = if (error != null && !error.isEmpty) {
       s"$message - $error"
     } else {
@@ -176,8 +185,8 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
   }
 
   /** Check if any blocks are left to be processed */
-  def hasMoreReceivedBlockIds: Boolean = {
-    !receivedBlockInfo.values.forall(_.isEmpty)
+  def hasUnallocatedBlocks: Boolean = {
+    receivedBlockTracker.hasUnallocatedReceivedBlocks
   }
 
   /** Actor to receive messages from the receivers. */
@@ -187,7 +196,7 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
         registerReceiver(streamId, typ, host, receiverActor, sender)
         sender ! true
       case AddBlock(receivedBlockInfo) =>
-        addBlocks(receivedBlockInfo)
+        sender ! addBlock(receivedBlockInfo)
       case ReportError(streamId, message, error) =>
         reportError(streamId, message, error)
       case DeregisterReceiver(streamId, message, error) =>
@@ -253,6 +262,9 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
           ssc.sc.makeRDD(receivers, receivers.size)
         }
 
+      val checkpointDirOption = Option(ssc.checkpointDir)
+      val serializableHadoopConf = new SerializableWritable(ssc.sparkContext.hadoopConfiguration)
+
       // Function to start the receiver on the worker node
       val startReceiver = (iterator: Iterator[Receiver[_]]) => {
         if (!iterator.hasNext) {
@@ -260,9 +272,10 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
             "Could not start receiver as object not found.")
         }
         val receiver = iterator.next()
-        val executor = new ReceiverSupervisorImpl(receiver, SparkEnv.get)
-        executor.start()
-        executor.awaitTermination()
+        val supervisor = new ReceiverSupervisorImpl(
+          receiver, SparkEnv.get, serializableHadoopConf.value, checkpointDirOption)
+        supervisor.start()
+        supervisor.awaitTermination()
       }
       // Run the dummy Spark job to ensure that all slaves have registered.
       // This avoids all the receivers to be scheduled on the same node.
@@ -272,7 +285,7 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
 
       // Distribute the receivers and start them
       logInfo("Starting " + receivers.length + " receivers")
-      ssc.sparkContext.runJob(tempRDD, startReceiver)
+      ssc.sparkContext.runJob(tempRDD, ssc.sparkContext.clean(startReceiver))
       logInfo("All of the receivers have been terminated")
     }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index 451b23e01c995..1353e487c72cf 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -42,8 +42,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
       <h4>Statistics over last {listener.retainedCompletedBatches.size} processed batches</h4> ++
       generateReceiverStats() ++
       generateBatchStatsTable()
-    UIUtils.headerSparkPage(
-      content, parent.basePath, parent.appName, "Streaming", parent.headerTabs, parent, Some(5000))
+    UIUtils.headerSparkPage("Streaming", content, parent, Some(5000))
   }
 
   /** Generate basic stats of the streaming program */
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
index 51448d15c6516..d9d04cd706a04 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
@@ -17,20 +17,31 @@
 
 package org.apache.spark.streaming.ui
 
-import org.apache.spark.Logging
+import org.apache.spark.{Logging, SparkException}
 import org.apache.spark.streaming.StreamingContext
-import org.apache.spark.ui.WebUITab
+import org.apache.spark.ui.{SparkUI, SparkUITab}
 
-/** Spark Web UI tab that shows statistics of a streaming job */
+import StreamingTab._
+
+/**
+ * Spark Web UI tab that shows statistics of a streaming job.
+ * This assumes the given SparkContext has enabled its SparkUI.
+ */
 private[spark] class StreamingTab(ssc: StreamingContext)
-  extends WebUITab(ssc.sc.ui, "streaming") with Logging {
+  extends SparkUITab(getSparkUI(ssc), "streaming") with Logging {
 
-  val parent = ssc.sc.ui
-  val appName = parent.appName
-  val basePath = parent.basePath
-  val listener = new StreamingJobProgressListener(ssc)
+  val parent = getSparkUI(ssc)
+  val listener = ssc.progressListener
 
   ssc.addStreamingListener(listener)
   attachPage(new StreamingPage(this))
   parent.attachTab(this)
 }
+
+private object StreamingTab {
+  def getSparkUI(ssc: StreamingContext): SparkUI = {
+    ssc.sc.ui.getOrElse {
+      throw new SparkException("Parent SparkUI to attach this tab to not found!")
+    }
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala
index 39145a3ab081a..7cd867ce34b87 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala
@@ -41,13 +41,7 @@ class SystemClock() extends Clock {
       return currentTime
     }
 
-    val pollTime = {
-      if (waitTime / 10.0 > minPollTime) {
-        (waitTime / 10.0).toLong
-      } else {
-        minPollTime
-      }
-    }
+    val pollTime = math.max(waitTime / 10.0, minPollTime).toLong
 
     while (true) {
       currentTime = System.currentTimeMillis()
@@ -55,12 +49,7 @@ class SystemClock() extends Clock {
       if (waitTime <= 0) {
         return currentTime
       }
-      val sleepTime =
-        if (waitTime < pollTime) {
-          waitTime
-        } else {
-          pollTime
-        }
+      val sleepTime = math.min(waitTime, pollTime)
       Thread.sleep(sleepTime)
     }
     -1
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
new file mode 100644
index 0000000000000..27a28bab83ed5
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs._
+
+private[streaming] object HdfsUtils {
+
+  def getOutputStream(path: String, conf: Configuration): FSDataOutputStream = {
+    val dfsPath = new Path(path)
+    val dfs = getFileSystemForPath(dfsPath, conf)
+    // If the file exists and we have append support, append instead of creating a new file
+    val stream: FSDataOutputStream = {
+      if (dfs.isFile(dfsPath)) {
+        if (conf.getBoolean("hdfs.append.support", false)) {
+          dfs.append(dfsPath)
+        } else {
+          throw new IllegalStateException("File exists and there is no append support!")
+        }
+      } else {
+        dfs.create(dfsPath)
+      }
+    }
+    stream
+  }
+
+  def getInputStream(path: String, conf: Configuration): FSDataInputStream = {
+    val dfsPath = new Path(path)
+    val dfs = getFileSystemForPath(dfsPath, conf)
+    val instream = dfs.open(dfsPath)
+    instream
+  }
+
+  def checkState(state: Boolean, errorMsg: => String) {
+    if (!state) {
+      throw new IllegalStateException(errorMsg)
+    }
+  }
+
+  /** Get the locations of the HDFS blocks containing the given file segment. */
+  def getFileSegmentLocations(
+      path: String, offset: Long, length: Long, conf: Configuration): Array[String] = {
+    val dfsPath = new Path(path)
+    val dfs = getFileSystemForPath(dfsPath, conf)
+    val fileStatus = dfs.getFileStatus(dfsPath)
+    val blockLocs = Option(dfs.getFileBlockLocations(fileStatus, offset, length))
+    blockLocs.map(_.flatMap(_.getHosts)).getOrElse(Array.empty)
+  }
+
+  def getFileSystemForPath(path: Path, conf: Configuration): FileSystem = {
+    // For local file systems, return the raw loca file system, such calls to flush()
+    // actually flushes the stream.
+    val fs = path.getFileSystem(conf)
+    fs match {
+      case localFs: LocalFileSystem => localFs.getRawFileSystem
+      case _ => fs
+    }
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogFileSegment.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogFileSegment.scala
new file mode 100644
index 0000000000000..1005a2c8ec303
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogFileSegment.scala
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+/** Class for representing a segment of data in a write ahead log file */
+private[streaming] case class WriteAheadLogFileSegment (path: String, offset: Long, length: Int)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogManager.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogManager.scala
new file mode 100644
index 0000000000000..70d234320be7c
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogManager.scala
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import java.nio.ByteBuffer
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.{ExecutionContext, Future}
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.permission.FsPermission
+import org.apache.spark.Logging
+import org.apache.spark.util.Utils
+import WriteAheadLogManager._
+
+/**
+ * This class manages write ahead log files.
+ * - Writes records (bytebuffers) to periodically rotating log files.
+ * - Recovers the log files and the reads the recovered records upon failures.
+ * - Cleans up old log files.
+ *
+ * Uses [[org.apache.spark.streaming.util.WriteAheadLogWriter]] to write
+ * and [[org.apache.spark.streaming.util.WriteAheadLogReader]] to read.
+ *
+ * @param logDirectory Directory when rotating log files will be created.
+ * @param hadoopConf Hadoop configuration for reading/writing log files.
+ * @param rollingIntervalSecs The interval in seconds with which logs will be rolled over.
+ *                            Default is one minute.
+ * @param maxFailures Max number of failures that is tolerated for every attempt to write to log.
+ *                    Default is three.
+ * @param callerName Optional name of the class who is using this manager.
+ * @param clock Optional clock that is used to check for rotation interval.
+ */
+private[streaming] class WriteAheadLogManager(
+    logDirectory: String,
+    hadoopConf: Configuration,
+    rollingIntervalSecs: Int = 60,
+    maxFailures: Int = 3,
+    callerName: String = "",
+    clock: Clock = new SystemClock
+  ) extends Logging {
+
+  private val pastLogs = new ArrayBuffer[LogInfo]
+  private val callerNameTag =
+    if (callerName.nonEmpty) s" for $callerName" else ""
+  private val threadpoolName = s"WriteAheadLogManager $callerNameTag"
+  implicit private val executionContext = ExecutionContext.fromExecutorService(
+    Utils.newDaemonFixedThreadPool(1, threadpoolName))
+  override protected val logName = s"WriteAheadLogManager $callerNameTag"
+
+  private var currentLogPath: Option[String] = None
+  private var currentLogWriter: WriteAheadLogWriter = null
+  private var currentLogWriterStartTime: Long = -1L
+  private var currentLogWriterStopTime: Long = -1L
+
+  initializeOrRecover()
+
+  /**
+   * Write a byte buffer to the log file. This method synchronously writes the data in the
+   * ByteBuffer to HDFS. When this method returns, the data is guaranteed to have been flushed
+   * to HDFS, and will be available for readers to read.
+   */
+  def writeToLog(byteBuffer: ByteBuffer): WriteAheadLogFileSegment = synchronized {
+    var fileSegment: WriteAheadLogFileSegment = null
+    var failures = 0
+    var lastException: Exception = null
+    var succeeded = false
+    while (!succeeded && failures < maxFailures) {
+      try {
+        fileSegment = getLogWriter(clock.currentTime).write(byteBuffer)
+        succeeded = true
+      } catch {
+        case ex: Exception =>
+          lastException = ex
+          logWarning("Failed to write to write ahead log")
+          resetWriter()
+          failures += 1
+      }
+    }
+    if (fileSegment == null) {
+      logError(s"Failed to write to write ahead log after $failures failures")
+      throw lastException
+    }
+    fileSegment
+  }
+
+  /**
+   * Read all the existing logs from the log directory.
+   *
+   * Note that this is typically called when the caller is initializing and wants
+   * to recover past state from the write ahead logs (that is, before making any writes).
+   * If this is called after writes have been made using this manager, then it may not return
+   * the latest the records. This does not deal with currently active log files, and
+   * hence the implementation is kept simple.
+   */
+  def readFromLog(): Iterator[ByteBuffer] = synchronized {
+    val logFilesToRead = pastLogs.map{ _.path} ++ currentLogPath
+    logInfo("Reading from the logs: " + logFilesToRead.mkString("\n"))
+    logFilesToRead.iterator.map { file =>
+      logDebug(s"Creating log reader with $file")
+      new WriteAheadLogReader(file, hadoopConf)
+    } flatMap { x => x }
+  }
+
+  /**
+   * Delete the log files that are older than the threshold time.
+   *
+   * Its important to note that the threshold time is based on the time stamps used in the log
+   * files, which is usually based on the local system time. So if there is coordination necessary
+   * between the node calculating the threshTime (say, driver node), and the local system time
+   * (say, worker node), the caller has to take account of possible time skew.
+   */
+  def cleanupOldLogs(threshTime: Long): Unit = {
+    val oldLogFiles = synchronized { pastLogs.filter { _.endTime < threshTime } }
+    logInfo(s"Attempting to clear ${oldLogFiles.size} old log files in $logDirectory " +
+      s"older than $threshTime: ${oldLogFiles.map { _.path }.mkString("\n")}")
+
+    def deleteFiles() {
+      oldLogFiles.foreach { logInfo =>
+        try {
+          val path = new Path(logInfo.path)
+          val fs = HdfsUtils.getFileSystemForPath(path, hadoopConf)
+          fs.delete(path, true)
+          synchronized { pastLogs -= logInfo }
+          logDebug(s"Cleared log file $logInfo")
+        } catch {
+          case ex: Exception =>
+            logWarning(s"Error clearing write ahead log file $logInfo", ex)
+        }
+      }
+      logInfo(s"Cleared log files in $logDirectory older than $threshTime")
+    }
+    if (!executionContext.isShutdown) {
+      Future { deleteFiles() }
+    }
+  }
+
+  /** Stop the manager, close any open log writer */
+  def stop(): Unit = synchronized {
+    if (currentLogWriter != null) {
+      currentLogWriter.close()
+    }
+    executionContext.shutdown()
+    logInfo("Stopped write ahead log manager")
+  }
+
+  /** Get the current log writer while taking care of rotation */
+  private def getLogWriter(currentTime: Long): WriteAheadLogWriter = synchronized {
+    if (currentLogWriter == null || currentTime > currentLogWriterStopTime) {
+      resetWriter()
+      currentLogPath.foreach {
+        pastLogs += LogInfo(currentLogWriterStartTime, currentLogWriterStopTime, _)
+      }
+      currentLogWriterStartTime = currentTime
+      currentLogWriterStopTime = currentTime + (rollingIntervalSecs * 1000)
+      val newLogPath = new Path(logDirectory,
+        timeToLogFile(currentLogWriterStartTime, currentLogWriterStopTime))
+      currentLogPath = Some(newLogPath.toString)
+      currentLogWriter = new WriteAheadLogWriter(currentLogPath.get, hadoopConf)
+    }
+    currentLogWriter
+  }
+
+  /** Initialize the log directory or recover existing logs inside the directory */
+  private def initializeOrRecover(): Unit = synchronized {
+    val logDirectoryPath = new Path(logDirectory)
+    val fileSystem =  HdfsUtils.getFileSystemForPath(logDirectoryPath, hadoopConf)
+
+    if (fileSystem.exists(logDirectoryPath) && fileSystem.getFileStatus(logDirectoryPath).isDir) {
+      val logFileInfo = logFilesTologInfo(fileSystem.listStatus(logDirectoryPath).map { _.getPath })
+      pastLogs.clear()
+      pastLogs ++= logFileInfo
+      logInfo(s"Recovered ${logFileInfo.size} write ahead log files from $logDirectory")
+      logDebug(s"Recovered files are:\n${logFileInfo.map(_.path).mkString("\n")}")
+    }
+  }
+
+  private def resetWriter(): Unit = synchronized {
+    if (currentLogWriter != null) {
+      currentLogWriter.close()
+      currentLogWriter = null
+    }
+  }
+}
+
+private[util] object WriteAheadLogManager {
+
+  case class LogInfo(startTime: Long, endTime: Long, path: String)
+
+  val logFileRegex = """log-(\d+)-(\d+)""".r
+
+  def timeToLogFile(startTime: Long, stopTime: Long): String = {
+    s"log-$startTime-$stopTime"
+  }
+
+  /** Convert a sequence of files to a sequence of sorted LogInfo objects */
+  def logFilesTologInfo(files: Seq[Path]): Seq[LogInfo] = {
+    files.flatMap { file =>
+      logFileRegex.findFirstIn(file.getName()) match {
+        case Some(logFileRegex(startTimeStr, stopTimeStr)) =>
+          val startTime = startTimeStr.toLong
+          val stopTime = stopTimeStr.toLong
+          Some(LogInfo(startTime, stopTime, file.toString))
+        case None =>
+          None
+      }
+    }.sortBy { _.startTime }
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogRandomReader.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogRandomReader.scala
new file mode 100644
index 0000000000000..003989092a42a
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogRandomReader.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import java.io.Closeable
+import java.nio.ByteBuffer
+
+import org.apache.hadoop.conf.Configuration
+
+/**
+ * A random access reader for reading write ahead log files written using
+ * [[org.apache.spark.streaming.util.WriteAheadLogWriter]]. Given the file segment info,
+ * this reads the record (bytebuffer) from the log file.
+ */
+private[streaming] class WriteAheadLogRandomReader(path: String, conf: Configuration)
+  extends Closeable {
+
+  private val instream = HdfsUtils.getInputStream(path, conf)
+  private var closed = false
+
+  def read(segment: WriteAheadLogFileSegment): ByteBuffer = synchronized {
+    assertOpen()
+    instream.seek(segment.offset)
+    val nextLength = instream.readInt()
+    HdfsUtils.checkState(nextLength == segment.length,
+      s"Expected message length to be ${segment.length}, but was $nextLength")
+    val buffer = new Array[Byte](nextLength)
+    instream.readFully(buffer)
+    ByteBuffer.wrap(buffer)
+  }
+
+  override def close(): Unit = synchronized {
+    closed = true
+    instream.close()
+  }
+
+  private def assertOpen() {
+    HdfsUtils.checkState(!closed, "Stream is closed. Create a new Reader to read from the file.")
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogReader.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogReader.scala
new file mode 100644
index 0000000000000..2afc0d1551acf
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogReader.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import java.io.{Closeable, EOFException}
+import java.nio.ByteBuffer
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.Logging
+
+/**
+ * A reader for reading write ahead log files written using
+ * [[org.apache.spark.streaming.util.WriteAheadLogWriter]]. This reads
+ * the records (bytebuffers) in the log file sequentially and return them as an
+ * iterator of bytebuffers.
+ */
+private[streaming] class WriteAheadLogReader(path: String, conf: Configuration)
+  extends Iterator[ByteBuffer] with Closeable with Logging {
+
+  private val instream = HdfsUtils.getInputStream(path, conf)
+  private var closed = false
+  private var nextItem: Option[ByteBuffer] = None
+
+  override def hasNext: Boolean = synchronized {
+    if (closed) {
+      return false
+    }
+
+    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
+      true
+    } else {
+      try {
+        val length = instream.readInt()
+        val buffer = new Array[Byte](length)
+        instream.readFully(buffer)
+        nextItem = Some(ByteBuffer.wrap(buffer))
+        logTrace("Read next item " + nextItem.get)
+        true
+      } catch {
+        case e: EOFException =>
+          logDebug("Error reading next item, EOF reached", e)
+          close()
+          false
+        case e: Exception =>
+          logWarning("Error while trying to read data from HDFS.", e)
+          close()
+          throw e
+      }
+    }
+  }
+
+  override def next(): ByteBuffer = synchronized {
+    val data = nextItem.getOrElse {
+      close()
+      throw new IllegalStateException(
+        "next called without calling hasNext or after hasNext returned false")
+    }
+    nextItem = None // Ensure the next hasNext call loads new data.
+    data
+  }
+
+  override def close(): Unit = synchronized {
+    if (!closed) {
+      instream.close()
+    }
+    closed = true
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogWriter.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogWriter.scala
new file mode 100644
index 0000000000000..679f6a6dfd7c1
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogWriter.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import java.io._
+import java.net.URI
+import java.nio.ByteBuffer
+
+import scala.util.Try
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FSDataOutputStream, FileSystem}
+
+/**
+ * A writer for writing byte-buffers to a write ahead log file.
+ */
+private[streaming] class WriteAheadLogWriter(path: String, hadoopConf: Configuration)
+  extends Closeable {
+
+  private lazy val stream = HdfsUtils.getOutputStream(path, hadoopConf)
+
+  private lazy val hadoopFlushMethod = {
+    // Use reflection to get the right flush operation
+    val cls = classOf[FSDataOutputStream]
+    Try(cls.getMethod("hflush")).orElse(Try(cls.getMethod("sync"))).toOption
+  }
+
+  private var nextOffset = stream.getPos()
+  private var closed = false
+
+  /** Write the bytebuffer to the log file */
+  def write(data: ByteBuffer): WriteAheadLogFileSegment = synchronized {
+    assertOpen()
+    data.rewind() // Rewind to ensure all data in the buffer is retrieved
+    val lengthToWrite = data.remaining()
+    val segment = new WriteAheadLogFileSegment(path, nextOffset, lengthToWrite)
+    stream.writeInt(lengthToWrite)
+    if (data.hasArray) {
+      stream.write(data.array())
+    } else {
+      // If the buffer is not backed by an array, we transfer using temp array
+      // Note that despite the extra array copy, this should be faster than byte-by-byte copy
+      while (data.hasRemaining) {
+        val array = new Array[Byte](data.remaining)
+        data.get(array)
+        stream.write(array)
+      }
+    }
+    flush()
+    nextOffset = stream.getPos()
+    segment
+  }
+
+  override def close(): Unit = synchronized {
+    closed = true
+    stream.close()
+  }
+
+  private def flush() {
+    hadoopFlushMethod.foreach { _.invoke(stream) }
+    // Useful for local file system where hflush/sync does not work (HADOOP-7844)
+    stream.getWrappedStream.flush()
+  }
+
+  private def assertOpen() {
+    HdfsUtils.checkState(!closed, "Stream is closed. Create a new Writer to write to file.")
+  }
+}
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
index 4efeb8dfbe1ad..ce645fccba1d0 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
@@ -806,15 +806,17 @@ public void testUnion() {
    * Performs an order-invariant comparison of lists representing two RDD streams. This allows
    * us to account for ordering variation within individual RDD's which occurs during windowing.
    */
-  public static <T extends Comparable<T>> void assertOrderInvariantEquals(
+  public static <T> void assertOrderInvariantEquals(
       List<List<T>> expected, List<List<T>> actual) {
+    List<Set<T>> expectedSets = new ArrayList<Set<T>>();
     for (List<T> list: expected) {
-      Collections.sort(list);
+      expectedSets.add(Collections.unmodifiableSet(new HashSet<T>(list)));
     }
+    List<Set<T>> actualSets = new ArrayList<Set<T>>();
     for (List<T> list: actual) {
-      Collections.sort(list);
+      actualSets.add(Collections.unmodifiableSet(new HashSet<T>(list)));
     }
-    Assert.assertEquals(expected, actual);
+    Assert.assertEquals(expectedSets, actualSets);
   }
 
 
@@ -1239,6 +1241,49 @@ public Optional<Integer> call(List<Integer> values, Optional<Integer> state) {
     Assert.assertEquals(expected, result);
   }
 
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testUpdateStateByKeyWithInitial() {
+    List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
+
+    List<Tuple2<String, Integer>> initial = Arrays.asList (
+            new Tuple2<String, Integer> ("california", 1),
+            new Tuple2<String, Integer> ("new york", 2));
+
+    JavaRDD<Tuple2<String, Integer>> tmpRDD = ssc.sparkContext().parallelize(initial);
+    JavaPairRDD<String, Integer> initialRDD = JavaPairRDD.fromJavaRDD (tmpRDD);
+
+    List<List<Tuple2<String, Integer>>> expected = Arrays.asList(
+        Arrays.asList(new Tuple2<String, Integer>("california", 5),
+            new Tuple2<String, Integer>("new york", 7)),
+        Arrays.asList(new Tuple2<String, Integer>("california", 15),
+            new Tuple2<String, Integer>("new york", 11)),
+        Arrays.asList(new Tuple2<String, Integer>("california", 15),
+            new Tuple2<String, Integer>("new york", 11)));
+
+    JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream);
+
+    JavaPairDStream<String, Integer> updated = pairStream.updateStateByKey(
+        new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() {
+        @Override
+        public Optional<Integer> call(List<Integer> values, Optional<Integer> state) {
+          int out = 0;
+          if (state.isPresent()) {
+            out = out + state.get();
+          }
+          for (Integer v: values) {
+            out = out + v;
+          }
+          return Optional.of(out);
+        }
+        }, new HashPartitioner(1), initialRDD);
+    JavaTestUtils.attachTestOutputStream(updated);
+    List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 3, 3);
+
+    assertOrderInvariantEquals(expected, result);
+  }
+
   @SuppressWarnings("unchecked")
   @Test
   public void testReduceByKeyAndWindowWithInverse() {
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaDurationSuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaDurationSuite.java
new file mode 100644
index 0000000000000..76425fe2aa2d3
--- /dev/null
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaDurationSuite.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.spark.streaming;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class JavaDurationSuite {
+
+  // Just testing the methods that are specially exposed for Java.
+  // This does not repeat all tests found in the Scala suite.
+
+  @Test
+  public void testLess() {
+    Assert.assertTrue(new Duration(999).less(new Duration(1000)));
+  }
+
+  @Test
+  public void testLessEq() {
+    Assert.assertTrue(new Duration(1000).lessEq(new Duration(1000)));
+  }
+
+  @Test
+  public void testGreater() {
+    Assert.assertTrue(new Duration(1000).greater(new Duration(999)));
+  }
+
+  @Test
+  public void testGreaterEq() {
+    Assert.assertTrue(new Duration(1000).greaterEq(new Duration(1000)));
+  }
+
+  @Test
+  public void testPlus() {
+    Assert.assertEquals(new Duration(1100), new Duration(1000).plus(new Duration(100)));
+  }
+
+  @Test
+  public void testMinus() {
+    Assert.assertEquals(new Duration(900), new Duration(1000).minus(new Duration(100)));
+  }
+
+  @Test
+  public void testTimes() {
+    Assert.assertEquals(new Duration(200), new Duration(100).times(2));
+  }
+
+  @Test
+  public void testDiv() {
+    Assert.assertEquals(200.0, new Duration(1000).div(new Duration(5)), 1.0e-12);
+  }
+
+  @Test
+  public void testMilliseconds() {
+    Assert.assertEquals(new Duration(100), Durations.milliseconds(100));
+  }
+
+  @Test
+  public void testSeconds() {
+    Assert.assertEquals(new Duration(30 * 1000), Durations.seconds(30));
+  }
+
+  @Test
+  public void testMinutes() {
+    Assert.assertEquals(new Duration(2 * 60 * 1000), Durations.minutes(2));
+  }
+
+}
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaTimeSuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaTimeSuite.java
new file mode 100644
index 0000000000000..ad6b1853e3d12
--- /dev/null
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaTimeSuite.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class JavaTimeSuite {
+
+  // Just testing the methods that are specially exposed for Java.
+  // This does not repeat all tests found in the Scala suite.
+
+  @Test
+  public void testLess() {
+    Assert.assertTrue(new Time(999).less(new Time(1000)));
+  }
+
+  @Test
+  public void testLessEq() {
+    Assert.assertTrue(new Time(1000).lessEq(new Time(1000)));
+  }
+
+  @Test
+  public void testGreater() {
+    Assert.assertTrue(new Time(1000).greater(new Time(999)));
+  }
+
+  @Test
+  public void testGreaterEq() {
+    Assert.assertTrue(new Time(1000).greaterEq(new Time(1000)));
+  }
+
+  @Test
+  public void testPlus() {
+    Assert.assertEquals(new Time(1100), new Time(1000).plus(new Duration(100)));
+  }
+
+  @Test
+  public void testMinusTime() {
+    Assert.assertEquals(new Duration(900), new Time(1000).minus(new Time(100)));
+  }
+
+  @Test
+  public void testMinusDuration() {
+    Assert.assertEquals(new Time(900), new Time(1000).minus(new Duration(100)));
+  }
+
+}
diff --git a/streaming/src/test/resources/log4j.properties b/streaming/src/test/resources/log4j.properties
index 45d2ec676df66..4411d6e20c52a 100644
--- a/streaming/src/test/resources/log4j.properties
+++ b/streaming/src/test/resources/log4j.properties
@@ -22,7 +22,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
 log4j.logger.org.eclipse.jetty=WARN
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
index ff6d86c8f81ac..30a359677cc74 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
@@ -17,18 +17,20 @@
 
 package org.apache.spark.streaming
 
-import org.apache.spark.streaming.StreamingContext._
-
-import org.apache.spark.rdd.{BlockRDD, RDD}
-import org.apache.spark.SparkContext._
+import scala.collection.mutable
+import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
+import scala.language.existentials
+import scala.reflect.ClassTag
 
 import util.ManualClock
-import org.apache.spark.{SparkException, SparkConf}
-import org.apache.spark.streaming.dstream.{WindowedDStream, DStream}
-import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
-import scala.reflect.ClassTag
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.{BlockRDD, RDD}
 import org.apache.spark.storage.StorageLevel
-import scala.collection.mutable
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.dstream.{DStream, WindowedDStream}
+import org.apache.spark.HashPartitioner
 
 class BasicOperationsSuite extends TestSuiteBase {
   test("map") {
@@ -303,6 +305,21 @@ class BasicOperationsSuite extends TestSuiteBase {
     testOperation(inputData1, inputData2, operation, outputData, true)
   }
 
+  test("fullOuterJoin") {
+    val inputData1 = Seq( Seq("a", "b"), Seq("a", ""), Seq(""), Seq() )
+    val inputData2 = Seq( Seq("a", "b"), Seq("b", ""), Seq(), Seq("")   )
+    val outputData = Seq(
+      Seq( ("a", (Some(1), Some("x"))), ("b", (Some(1), Some("x"))) ),
+      Seq( ("", (Some(1), Some("x"))), ("a", (Some(1), None)), ("b", (None, Some("x"))) ),
+      Seq( ("", (Some(1), None)) ),
+      Seq( ("", (None, Some("x"))) )
+    )
+    val operation = (s1: DStream[String], s2: DStream[String]) => {
+      s1.map(x => (x, 1)).fullOuterJoin(s2.map(x => (x, "x")))
+    }
+    testOperation(inputData1, inputData2, operation, outputData, true)
+  }
+
   test("updateStateByKey") {
     val inputData =
       Seq(
@@ -334,6 +351,79 @@ class BasicOperationsSuite extends TestSuiteBase {
     testOperation(inputData, updateStateOperation, outputData, true)
   }
 
+  test("updateStateByKey - simple with initial value RDD") {
+    val initial = Seq(("a", 1), ("c", 2))
+
+    val inputData =
+      Seq(
+        Seq("a"),
+        Seq("a", "b"),
+        Seq("a", "b", "c"),
+        Seq("a", "b"),
+        Seq("a"),
+        Seq()
+      )
+
+    val outputData =
+      Seq(
+        Seq(("a", 2), ("c", 2)),
+        Seq(("a", 3), ("b", 1), ("c", 2)),
+        Seq(("a", 4), ("b", 2), ("c", 3)),
+        Seq(("a", 5), ("b", 3), ("c", 3)),
+        Seq(("a", 6), ("b", 3), ("c", 3)),
+        Seq(("a", 6), ("b", 3), ("c", 3))
+      )
+
+    val updateStateOperation = (s: DStream[String]) => {
+      val initialRDD = s.context.sparkContext.makeRDD(initial)
+      val updateFunc = (values: Seq[Int], state: Option[Int]) => {
+        Some(values.sum + state.getOrElse(0))
+      }
+      s.map(x => (x, 1)).updateStateByKey[Int](updateFunc,
+        new HashPartitioner (numInputPartitions), initialRDD)
+    }
+
+    testOperation(inputData, updateStateOperation, outputData, true)
+  }
+
+  test("updateStateByKey - with initial value RDD") {
+    val initial = Seq(("a", 1), ("c", 2))
+
+    val inputData =
+      Seq(
+        Seq("a"),
+        Seq("a", "b"),
+        Seq("a", "b", "c"),
+        Seq("a", "b"),
+        Seq("a"),
+        Seq()
+      )
+
+    val outputData =
+      Seq(
+        Seq(("a", 2), ("c", 2)),
+        Seq(("a", 3), ("b", 1), ("c", 2)),
+        Seq(("a", 4), ("b", 2), ("c", 3)),
+        Seq(("a", 5), ("b", 3), ("c", 3)),
+        Seq(("a", 6), ("b", 3), ("c", 3)),
+        Seq(("a", 6), ("b", 3), ("c", 3))
+      )
+
+    val updateStateOperation = (s: DStream[String]) => {
+      val initialRDD = s.context.sparkContext.makeRDD(initial)
+      val updateFunc = (values: Seq[Int], state: Option[Int]) => {
+        Some(values.sum + state.getOrElse(0))
+      }
+      val newUpdateFunc = (iterator: Iterator[(String, Seq[Int], Option[Int])]) => {
+        iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s)))
+      }
+      s.map(x => (x, 1)).updateStateByKey[Int](newUpdateFunc,
+        new HashPartitioner (numInputPartitions), true, initialRDD)
+    }
+
+    testOperation(inputData, updateStateOperation, outputData, true)
+  }
+
   test("updateStateByKey - object lifecycle") {
     val inputData =
       Seq(
@@ -452,7 +542,7 @@ class BasicOperationsSuite extends TestSuiteBase {
 
   test("rdd cleanup - updateStateByKey") {
     val updateFunc = (values: Seq[Int], state: Option[Int]) => {
-      Some(values.foldLeft(0)(_ + _) + state.getOrElse(0))
+      Some(values.sum + state.getOrElse(0))
     }
     val stateStream = runCleanupTest(
       conf, _.map(_ -> 1).updateStateByKey(updateFunc).checkpoint(Seconds(3)))
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index 10ad3c9e1adc9..e5592e52b0d2d 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -70,7 +70,7 @@ class CheckpointSuite extends TestSuiteBase {
     val input = (1 to 10).map(_ => Seq("a")).toSeq
     val operation = (st: DStream[String]) => {
       val updateFunc = (values: Seq[Int], state: Option[Int]) => {
-        Some((values.foldLeft(0)(_ + _) + state.getOrElse(0)))
+        Some((values.sum + state.getOrElse(0)))
       }
       st.map(x => (x, 1))
       .updateStateByKey(updateFunc)
@@ -214,7 +214,7 @@ class CheckpointSuite extends TestSuiteBase {
     val output = (1 to 10).map(x => Seq(("a", x))).toSeq
     val operation = (st: DStream[String]) => {
       val updateFunc = (values: Seq[Int], state: Option[Int]) => {
-        Some((values.foldLeft(0)(_ + _) + state.getOrElse(0)))
+        Some((values.sum + state.getOrElse(0)))
       }
       st.map(x => (x, 1))
         .updateStateByKey(updateFunc)
@@ -231,8 +231,7 @@ class CheckpointSuite extends TestSuiteBase {
   // failure, are re-processed or not.
   test("recovery with file input stream") {
     // Set up the streaming context and input streams
-    val testDir = Files.createTempDir()
-    testDir.deleteOnExit()
+    val testDir = Utils.createTempDir()
     var ssc = new StreamingContext(master, framework, Seconds(1))
     ssc.checkpoint(checkpointDir)
     val fileStream = ssc.textFileStream(testDir.toString)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DurationSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DurationSuite.scala
new file mode 100644
index 0000000000000..6202250e897f2
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/DurationSuite.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+class DurationSuite extends TestSuiteBase {
+
+  test("less") {
+    assert(new Duration(999) < new Duration(1000))
+    assert(new Duration(0) < new Duration(1))
+    assert(!(new Duration(1000) < new Duration(999)))
+    assert(!(new Duration(1000) < new Duration(1000)))
+  }
+
+  test("lessEq") {
+    assert(new Duration(999) <= new Duration(1000))
+    assert(new Duration(0) <= new Duration(1))
+    assert(!(new Duration(1000) <= new Duration(999)))
+    assert(new Duration(1000) <= new Duration(1000))
+  }
+
+  test("greater") {
+    assert(!(new Duration(999) > new Duration(1000)))
+    assert(!(new Duration(0) > new Duration(1)))
+    assert(new Duration(1000) > new Duration(999))
+    assert(!(new Duration(1000) > new Duration(1000)))
+  }
+
+  test("greaterEq") {
+    assert(!(new Duration(999) >= new Duration(1000)))
+    assert(!(new Duration(0) >= new Duration(1)))
+    assert(new Duration(1000) >= new Duration(999))
+    assert(new Duration(1000) >= new Duration(1000))
+  }
+
+  test("plus") {
+    assert((new Duration(1000) + new Duration(100)) == new Duration(1100))
+    assert((new Duration(1000) + new Duration(0)) == new Duration(1000))
+  }
+
+  test("minus") {
+    assert((new Duration(1000) - new Duration(100)) == new Duration(900))
+    assert((new Duration(1000) - new Duration(0)) == new Duration(1000))
+    assert((new Duration(1000) - new Duration(1000)) == new Duration(0))
+  }
+
+  test("times") {
+    assert((new Duration(100) * 2) == new Duration(200))
+    assert((new Duration(100) * 1) == new Duration(100))
+    assert((new Duration(100) * 0) == new Duration(0))
+  }
+
+  test("div") {
+    assert((new Duration(1000) / new Duration(5)) == 200.0)
+    assert((new Duration(1000) / new Duration(1)) == 1000.0)
+    assert((new Duration(1000) / new Duration(1000)) == 1.0)
+    assert((new Duration(1000) / new Duration(2000)) == 0.5)
+  }
+
+  test("isMultipleOf") {
+    assert(new Duration(1000).isMultipleOf(new Duration(5)))
+    assert(new Duration(1000).isMultipleOf(new Duration(1000)))
+    assert(new Duration(1000).isMultipleOf(new Duration(1)))
+    assert(!new Duration(1000).isMultipleOf(new Duration(6)))
+  }
+
+  test("min") {
+    assert(new Duration(999).min(new Duration(1000)) == new Duration(999))
+    assert(new Duration(1000).min(new Duration(999)) == new Duration(999))
+    assert(new Duration(1000).min(new Duration(1000)) == new Duration(1000))
+  }
+
+  test("max") {
+    assert(new Duration(999).max(new Duration(1000)) == new Duration(1000))
+    assert(new Duration(1000).max(new Duration(999)) == new Duration(1000))
+    assert(new Duration(1000).max(new Duration(1000)) == new Duration(1000))
+  }
+
+  test("isZero") {
+    assert(new Duration(0).isZero)
+    assert(!(new Duration(1).isZero))
+  }
+
+  test("Milliseconds") {
+    assert(new Duration(100) == Milliseconds(100))
+  }
+
+  test("Seconds") {
+    assert(new Duration(30 * 1000) == Seconds(30))
+  }
+
+  test("Minutes") {
+    assert(new Duration(2 * 60 * 1000) == Minutes(2))
+  }
+
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
index 92e1b76d28301..40434b1f9b709 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.streaming
 
 import org.apache.spark.Logging
-import org.apache.spark.streaming.util.MasterFailureTest
 import org.apache.spark.util.Utils
 
 import java.io.File
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index cd0aa4d0dce70..fa04fa326e370 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -18,8 +18,6 @@
 package org.apache.spark.streaming
 
 import akka.actor.Actor
-import akka.actor.IO
-import akka.actor.IOManager
 import akka.actor.Props
 import akka.util.ByteString
 
@@ -29,7 +27,7 @@ import java.nio.charset.Charset
 import java.util.concurrent.{Executors, TimeUnit, ArrayBlockingQueue}
 import java.util.concurrent.atomic.AtomicInteger
 
-import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
+import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer, SynchronizedQueue}
 
 import com.google.common.io.Files
 import org.scalatest.BeforeAndAfter
@@ -39,6 +37,7 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.util.ManualClock
 import org.apache.spark.util.Utils
 import org.apache.spark.streaming.receiver.{ActorHelper, Receiver}
+import org.apache.spark.rdd.RDD
 
 class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
@@ -97,8 +96,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
 
     // Set up the streaming context and input streams
-    val testDir = Files.createTempDir()
-    testDir.deleteOnExit()
+    val testDir = Utils.createTempDir()
     val ssc = new StreamingContext(conf, batchDuration)
     val fileStream = ssc.textFileStream(testDir.toString)
     val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
@@ -143,37 +141,68 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
   }
 
-  // TODO: This test works in IntelliJ but not through SBT
-  ignore("actor input stream") {
-    // Start the server
-    val testServer = new TestServer()
-    val port = testServer.port
-    testServer.start()
+  test("multi-thread receiver") {
+    // set up the test receiver
+    val numThreads = 10
+    val numRecordsPerThread = 1000
+    val numTotalRecords = numThreads * numRecordsPerThread
+    val testReceiver = new MultiThreadTestReceiver(numThreads, numRecordsPerThread)
+    MultiThreadTestReceiver.haveAllThreadsFinished = false
 
-    // Set up the streaming context and input streams
+    // set up the network stream using the test receiver
     val ssc = new StreamingContext(conf, batchDuration)
-    val networkStream = ssc.actorStream[String](Props(new TestActor(port)), "TestActor",
-      // Had to pass the local value of port to prevent from closing over entire scope
-      StorageLevel.MEMORY_AND_DISK)
-    val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
-    val outputStream = new TestOutputStream(networkStream, outputBuffer)
+    val networkStream = ssc.receiverStream[Int](testReceiver)
+    val countStream = networkStream.count
+    val outputBuffer = new ArrayBuffer[Seq[Long]] with SynchronizedBuffer[Seq[Long]]
+    val outputStream = new TestOutputStream(countStream, outputBuffer)
     def output = outputBuffer.flatMap(x => x)
     outputStream.register()
     ssc.start()
 
-    // Feed data to the server to send to the network receiver
+    // Let the data from the receiver be received
     val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    val input = 1 to 9
-    val expectedOutput = input.map(x => x.toString)
+    val startTime = System.currentTimeMillis()
+    while((!MultiThreadTestReceiver.haveAllThreadsFinished || output.sum < numTotalRecords) &&
+      System.currentTimeMillis() - startTime < 5000) {
+      Thread.sleep(100)
+      clock.addToTime(batchDuration.milliseconds)
+    }
     Thread.sleep(1000)
+    logInfo("Stopping context")
+    ssc.stop()
+
+    // Verify whether data received was as expected
+    logInfo("--------------------------------")
+    logInfo("output.size = " + outputBuffer.size)
+    logInfo("output")
+    outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("--------------------------------")
+    assert(output.sum === numTotalRecords)
+  }
+
+  test("queue input stream - oneAtATime=true") {
+    // Set up the streaming context and input streams
+    val ssc = new StreamingContext(conf, batchDuration)
+    val queue = new SynchronizedQueue[RDD[String]]()
+    val queueStream = ssc.queueStream(queue, oneAtATime = true)
+    val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
+    val outputStream = new TestOutputStream(queueStream, outputBuffer)
+    def output = outputBuffer.filter(_.size > 0)
+    outputStream.register()
+    ssc.start()
+
+    // Setup data queued into the stream
+    val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+    val input = Seq("1", "2", "3", "4", "5")
+    val expectedOutput = input.map(Seq(_))
+    //Thread.sleep(1000)
+    val inputIterator = input.toIterator
     for (i <- 0 until input.size) {
-      testServer.send(input(i).toString)
-      Thread.sleep(500)
+      // Enqueue more than 1 item per tick but they should dequeue one at a time
+      inputIterator.take(2).foreach(i => queue += ssc.sparkContext.makeRDD(Seq(i)))
       clock.addToTime(batchDuration.milliseconds)
     }
     Thread.sleep(1000)
-    logInfo("Stopping server")
-    testServer.stop()
     logInfo("Stopping context")
     ssc.stop()
 
@@ -188,40 +217,37 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     logInfo("--------------------------------")
 
     // Verify whether all the elements received are as expected
-    // (whether the elements were received one in each interval is not verified)
     assert(output.size === expectedOutput.size)
     for (i <- 0 until output.size) {
       assert(output(i) === expectedOutput(i))
     }
   }
 
-
-  test("multi-thread receiver") {
-    // set up the test receiver
-    val numThreads = 10
-    val numRecordsPerThread = 1000
-    val numTotalRecords = numThreads * numRecordsPerThread
-    val testReceiver = new MultiThreadTestReceiver(numThreads, numRecordsPerThread)
-    MultiThreadTestReceiver.haveAllThreadsFinished = false
-
-    // set up the network stream using the test receiver
+  test("queue input stream - oneAtATime=false") {
+    // Set up the streaming context and input streams
     val ssc = new StreamingContext(conf, batchDuration)
-    val networkStream = ssc.receiverStream[Int](testReceiver)
-    val countStream = networkStream.count
-    val outputBuffer = new ArrayBuffer[Seq[Long]] with SynchronizedBuffer[Seq[Long]]
-    val outputStream = new TestOutputStream(countStream, outputBuffer)
-    def output = outputBuffer.flatMap(x => x)
+    val queue = new SynchronizedQueue[RDD[String]]()
+    val queueStream = ssc.queueStream(queue, oneAtATime = false)
+    val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
+    val outputStream = new TestOutputStream(queueStream, outputBuffer)
+    def output = outputBuffer.filter(_.size > 0)
     outputStream.register()
     ssc.start()
 
-    // Let the data from the receiver be received
+    // Setup data queued into the stream
     val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    val startTime = System.currentTimeMillis()
-    while((!MultiThreadTestReceiver.haveAllThreadsFinished || output.sum < numTotalRecords) &&
-      System.currentTimeMillis() - startTime < 5000) {
-      Thread.sleep(100)
-      clock.addToTime(batchDuration.milliseconds)
-    }
+    val input = Seq("1", "2", "3", "4", "5")
+    val expectedOutput = Seq(Seq("1", "2", "3"), Seq("4", "5"))
+
+    // Enqueue the first 3 items (one by one), they should be merged in the next batch
+    val inputIterator = input.toIterator
+    inputIterator.take(3).foreach(i => queue += ssc.sparkContext.makeRDD(Seq(i)))
+    clock.addToTime(batchDuration.milliseconds)
+    Thread.sleep(1000)
+
+    // Enqueue the remaining items (again one by one), merged in the final batch
+    inputIterator.foreach(i => queue += ssc.sparkContext.makeRDD(Seq(i)))
+    clock.addToTime(batchDuration.milliseconds)
     Thread.sleep(1000)
     logInfo("Stopping context")
     ssc.stop()
@@ -231,8 +257,16 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     logInfo("output.size = " + outputBuffer.size)
     logInfo("output")
     outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+    logInfo("expected output.size = " + expectedOutput.size)
+    logInfo("expected output")
+    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
     logInfo("--------------------------------")
-    assert(output.sum === numTotalRecords)
+
+    // Verify whether all the elements received are as expected
+    assert(output.size === expectedOutput.size)
+    for (i <- 0 until output.size) {
+      assert(output(i) === expectedOutput(i))
+    }
   }
 }
 
@@ -288,19 +322,6 @@ class TestServer(portToBind: Int = 0) extends Logging {
   def port = serverSocket.getLocalPort
 }
 
-/** This is an actor for testing actor input stream */
-class TestActor(port: Int) extends Actor with ActorHelper {
-
-  def bytesToString(byteString: ByteString) = byteString.utf8String
-
-  override def preStart = IOManager(context.system).connect(new InetSocketAddress(port))
-
-  def receive = {
-    case IO.Read(socket, bytes) =>
-      store(bytesToString(bytes))
-  }
-}
-
 /** This is a receiver to test multiple threads inserting data using block generator */
 class MultiThreadTestReceiver(numThreads: Int, numRecordsPerThread: Int)
   extends Receiver[Int](StorageLevel.MEMORY_ONLY_SER) with Logging {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
similarity index 91%
rename from streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala
rename to streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
index 98e17ff92e205..5dbb7232009eb 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
@@ -15,20 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.spark.streaming.util
+package org.apache.spark.streaming
 
 import org.apache.spark.Logging
-import org.apache.spark.rdd.RDD
-import org.apache.spark.streaming._
-import org.apache.spark.streaming.dstream.{DStream, ForEachDStream}
+import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.util.Utils
-import StreamingContext._
+import org.apache.spark.streaming.StreamingContext._
 
 import scala.util.Random
-import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
+import scala.collection.mutable.ArrayBuffer
 import scala.reflect.ClassTag
 
-import java.io.{File, ObjectInputStream, IOException}
+import java.io.{File, IOException}
 import java.nio.charset.Charset
 import java.util.UUID
 
@@ -91,7 +89,7 @@ object MasterFailureTest extends Logging {
     // Input: time=1 ==> [ a ] , time=2 ==> [ a, a ] , time=3 ==> [ a, a, a ] , ...
     val input = (1 to numBatches).map(i => (1 to i).map(_ => "a").mkString(" ")).toSeq
     // Expected output: time=1 ==> [ (a, 1) ] , time=2 ==> [ (a, 3) ] , time=3 ==> [ (a,6) ] , ...
-    val expectedOutput = (1L to numBatches).map(i => (1L to i).reduce(_ + _)).map(j => ("a", j))
+    val expectedOutput = (1L to numBatches).map(i => (1L to i).sum).map(j => ("a", j))
 
     val operation = (st: DStream[String]) => {
       val updateFunc = (values: Seq[Long], state: Option[Long]) => {
@@ -218,7 +216,7 @@ object MasterFailureTest extends Logging {
 
     while(!isLastOutputGenerated && !isTimedOut) {
       // Get the output buffer
-      val outputBuffer = ssc.graph.getOutputStreams.head.asInstanceOf[TestOutputStream[T]].output
+      val outputBuffer = ssc.graph.getOutputStreams().head.asInstanceOf[TestOutputStream[T]].output
       def output = outputBuffer.flatMap(x => x)
 
       // Start the thread to kill the streaming after some time
@@ -239,7 +237,7 @@ object MasterFailureTest extends Logging {
         while (!killed && !isLastOutputGenerated && !isTimedOut) {
           Thread.sleep(100)
           timeRan = System.currentTimeMillis() - startTime
-          isLastOutputGenerated = (!output.isEmpty && output.last == lastExpectedOutput)
+          isLastOutputGenerated = (output.nonEmpty && output.last == lastExpectedOutput)
           isTimedOut = (timeRan + totalTimeRan > maxTimeToRun)
         }
       } catch {
@@ -313,31 +311,6 @@ object MasterFailureTest extends Logging {
   }
 }
 
-/**
- * This is a output stream just for testing. All the output is collected into a
- * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
- */
-private[streaming]
-class TestOutputStream[T: ClassTag](
-    parent: DStream[T],
-    val output: ArrayBuffer[Seq[T]] = new ArrayBuffer[Seq[T]] with SynchronizedBuffer[Seq[T]]
-  ) extends ForEachDStream[T](
-    parent,
-    (rdd: RDD[T], t: Time) => {
-      val collected = rdd.collect()
-      output += collected
-    }
-  ) {
-
-  // This is to clear the output buffer every it is read from a checkpoint
-  @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
-    ois.defaultReadObject()
-    output.clear()
-  }
-}
-
-
 /**
  * Thread to kill streaming context after a random period of time.
  */
@@ -379,8 +352,7 @@ class FileGeneratingThread(input: Seq[String], testDir: Path, interval: Long)
   extends Thread with Logging {
 
   override def run() {
-    val localTestDir = Files.createTempDir()
-    localTestDir.deleteOnExit()
+    val localTestDir = Utils.createTempDir()
     var fs = testDir.getFileSystem(new Configuration())
     val maxTries = 3
     try {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/NetworkReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/NetworkReceiverSuite.scala
deleted file mode 100644
index d9ac3c91f6e36..0000000000000
--- a/streaming/src/test/scala/org/apache/spark/streaming/NetworkReceiverSuite.scala
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming
-
-import java.nio.ByteBuffer
-
-import scala.collection.mutable.ArrayBuffer
-import scala.language.postfixOps
-
-import org.apache.spark.SparkConf
-import org.apache.spark.storage.{StorageLevel, StreamBlockId}
-import org.apache.spark.streaming.receiver.{BlockGenerator, BlockGeneratorListener, Receiver, ReceiverSupervisor}
-import org.scalatest.FunSuite
-import org.scalatest.concurrent.Timeouts
-import org.scalatest.concurrent.Eventually._
-import org.scalatest.time.SpanSugar._
-
-/** Testsuite for testing the network receiver behavior */
-class NetworkReceiverSuite extends FunSuite with Timeouts {
-
-  test("network receiver life cycle") {
-
-    val receiver = new FakeReceiver
-    val executor = new FakeReceiverSupervisor(receiver)
-
-    assert(executor.isAllEmpty)
-
-    // Thread that runs the executor
-    val executingThread = new Thread() {
-      override def run() {
-        executor.start()
-        executor.awaitTermination()
-      }
-    }
-
-    // Start the receiver
-    executingThread.start()
-
-    // Verify that the receiver
-    intercept[Exception] {
-      failAfter(200 millis) {
-        executingThread.join()
-      }
-    }
-
-    // Verify that receiver was started
-    assert(receiver.onStartCalled)
-    assert(executor.isReceiverStarted)
-    assert(receiver.isStarted)
-    assert(!receiver.isStopped())
-    assert(receiver.otherThread.isAlive)
-    eventually(timeout(100 millis), interval(10 millis)) {
-      assert(receiver.receiving)
-    }
-
-    // Verify whether the data stored by the receiver was sent to the executor
-    val byteBuffer = ByteBuffer.allocate(100)
-    val arrayBuffer = new ArrayBuffer[Int]()
-    val iterator = arrayBuffer.iterator
-    receiver.store(1)
-    receiver.store(byteBuffer)
-    receiver.store(arrayBuffer)
-    receiver.store(iterator)
-    assert(executor.singles.size === 1)
-    assert(executor.singles.head === 1)
-    assert(executor.byteBuffers.size === 1)
-    assert(executor.byteBuffers.head.eq(byteBuffer))
-    assert(executor.iterators.size === 1)
-    assert(executor.iterators.head.eq(iterator))
-    assert(executor.arrayBuffers.size === 1)
-    assert(executor.arrayBuffers.head.eq(arrayBuffer))
-
-    // Verify whether the exceptions reported by the receiver was sent to the executor
-    val exception = new Exception
-    receiver.reportError("Error", exception)
-    assert(executor.errors.size === 1)
-    assert(executor.errors.head.eq(exception))
-
-    // Verify restarting actually stops and starts the receiver
-    receiver.restart("restarting", null, 100)
-    eventually(timeout(50 millis), interval(10 millis)) {
-      // receiver will be stopped async
-      assert(receiver.isStopped)
-      assert(receiver.onStopCalled)
-    }
-    eventually(timeout(1000 millis), interval(100 millis)) {
-      // receiver will be started async
-      assert(receiver.onStartCalled)
-      assert(executor.isReceiverStarted)
-      assert(receiver.isStarted)
-      assert(!receiver.isStopped)
-      assert(receiver.receiving)
-    }
-
-    // Verify that stopping actually stops the thread
-    failAfter(100 millis) {
-      receiver.stop("test")
-      assert(receiver.isStopped)
-      assert(!receiver.otherThread.isAlive)
-
-      // The thread that started the executor should complete
-      // as stop() stops everything
-      executingThread.join()
-    }
-  }
-
-  test("block generator") {
-    val blockGeneratorListener = new FakeBlockGeneratorListener
-    val blockInterval = 200
-    val conf = new SparkConf().set("spark.streaming.blockInterval", blockInterval.toString)
-    val blockGenerator = new BlockGenerator(blockGeneratorListener, 1, conf)
-    val expectedBlocks = 5
-    val waitTime = expectedBlocks * blockInterval + (blockInterval / 2)
-    val generatedData = new ArrayBuffer[Int]
-
-    // Generate blocks
-    val startTime = System.currentTimeMillis()
-    blockGenerator.start()
-    var count = 0
-    while(System.currentTimeMillis - startTime < waitTime) {
-      blockGenerator += count
-      generatedData += count
-      count += 1
-      Thread.sleep(10)
-    }
-    blockGenerator.stop()
-
-    val recordedData = blockGeneratorListener.arrayBuffers.flatten
-    assert(blockGeneratorListener.arrayBuffers.size > 0)
-    assert(recordedData.toSet === generatedData.toSet)
-  }
-
-  /**
-   * An implementation of NetworkReceiver that is used for testing a receiver's life cycle.
-   */
-  class FakeReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) {
-    var otherThread: Thread = null
-    var receiving = false
-    var onStartCalled = false
-    var onStopCalled = false
-
-    def onStart() {
-      otherThread = new Thread() {
-        override def run() {
-          receiving = true
-          while(!isStopped()) {
-            Thread.sleep(10)
-          }
-        }
-      }
-      onStartCalled = true
-      otherThread.start()
-
-    }
-
-    def onStop() {
-      onStopCalled = true
-      otherThread.join()
-    }
-
-    def reset() {
-      receiving = false
-      onStartCalled = false
-      onStopCalled = false
-    }
-  }
-
-  /**
-   * An implementation of NetworkReceiverExecutor used for testing a NetworkReceiver.
-   * Instead of storing the data in the BlockManager, it stores all the data in a local buffer
-   * that can used for verifying that the data has been forwarded correctly.
-   */
-  class FakeReceiverSupervisor(receiver: FakeReceiver)
-    extends ReceiverSupervisor(receiver, new SparkConf()) {
-    val singles = new ArrayBuffer[Any]
-    val byteBuffers = new ArrayBuffer[ByteBuffer]
-    val iterators = new ArrayBuffer[Iterator[_]]
-    val arrayBuffers = new ArrayBuffer[ArrayBuffer[_]]
-    val errors = new ArrayBuffer[Throwable]
-
-    /** Check if all data structures are clean */
-    def isAllEmpty = {
-      singles.isEmpty && byteBuffers.isEmpty && iterators.isEmpty &&
-        arrayBuffers.isEmpty && errors.isEmpty
-    }
-
-    def pushSingle(data: Any) {
-      singles += data
-    }
-
-    def pushBytes(
-        bytes: ByteBuffer,
-        optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]
-      ) {
-      byteBuffers += bytes
-    }
-
-    def pushIterator(
-        iterator: Iterator[_],
-        optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]
-      ) {
-      iterators += iterator
-    }
-
-    def pushArrayBuffer(
-        arrayBuffer: ArrayBuffer[_],
-        optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]
-      ) {
-      arrayBuffers +=  arrayBuffer
-    }
-
-    def reportError(message: String, throwable: Throwable) {
-      errors += throwable
-    }
-  }
-
-  /**
-   * An implementation of BlockGeneratorListener that is used to test the BlockGenerator.
-   */
-  class FakeBlockGeneratorListener(pushDelay: Long = 0) extends BlockGeneratorListener {
-    // buffer of data received as ArrayBuffers
-    val arrayBuffers = new ArrayBuffer[ArrayBuffer[Int]]
-    val errors = new ArrayBuffer[Throwable]
-
-    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]) {
-      val bufferOfInts = arrayBuffer.map(_.asInstanceOf[Int])
-      arrayBuffers += bufferOfInts
-      Thread.sleep(0)
-    }
-
-    def onError(message: String, throwable: Throwable) {
-      errors += throwable
-    }
-  }
-}
-
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
new file mode 100644
index 0000000000000..3661e16a9ef2f
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.File
+import java.nio.ByteBuffer
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
+import scala.language.postfixOps
+
+import akka.actor.{ActorSystem, Props}
+import com.google.common.io.Files
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.conf.Configuration
+import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
+import org.scalatest.concurrent.Eventually._
+
+import org.apache.spark._
+import org.apache.spark.network.nio.NioBlockTransferService
+import org.apache.spark.scheduler.LiveListenerBus
+import org.apache.spark.serializer.KryoSerializer
+import org.apache.spark.shuffle.hash.HashShuffleManager
+import org.apache.spark.storage._
+import org.apache.spark.streaming.receiver._
+import org.apache.spark.streaming.util._
+import org.apache.spark.util.AkkaUtils
+import WriteAheadLogBasedBlockHandler._
+import WriteAheadLogSuite._
+
+class ReceivedBlockHandlerSuite extends FunSuite with BeforeAndAfter with Matchers with Logging {
+
+  val conf = new SparkConf().set("spark.streaming.receiver.writeAheadLog.rollingInterval", "1")
+  val hadoopConf = new Configuration()
+  val storageLevel = StorageLevel.MEMORY_ONLY_SER
+  val streamId = 1
+  val securityMgr = new SecurityManager(conf)
+  val mapOutputTracker = new MapOutputTrackerMaster(conf)
+  val shuffleManager = new HashShuffleManager(conf)
+  val serializer = new KryoSerializer(conf)
+  val manualClock = new ManualClock
+  val blockManagerSize = 10000000
+
+  var actorSystem: ActorSystem = null
+  var blockManagerMaster: BlockManagerMaster = null
+  var blockManager: BlockManager = null
+  var tempDirectory: File = null
+
+  before {
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem(
+      "test", "localhost", 0, conf = conf, securityManager = securityMgr)
+    this.actorSystem = actorSystem
+    conf.set("spark.driver.port", boundPort.toString)
+
+    blockManagerMaster = new BlockManagerMaster(
+      actorSystem.actorOf(Props(new BlockManagerMasterActor(true, conf, new LiveListenerBus))),
+      conf, true)
+
+    blockManager = new BlockManager("bm", actorSystem, blockManagerMaster, serializer,
+      blockManagerSize, conf, mapOutputTracker, shuffleManager,
+      new NioBlockTransferService(conf, securityMgr), securityMgr, 0)
+    blockManager.initialize("app-id")
+
+    tempDirectory = Files.createTempDir()
+    manualClock.setTime(0)
+  }
+
+  after {
+    if (blockManager != null) {
+      blockManager.stop()
+      blockManager = null
+    }
+    if (blockManagerMaster != null) {
+      blockManagerMaster.stop()
+      blockManagerMaster = null
+    }
+    actorSystem.shutdown()
+    actorSystem.awaitTermination()
+    actorSystem = null
+
+    if (tempDirectory != null && tempDirectory.exists()) {
+      FileUtils.deleteDirectory(tempDirectory)
+      tempDirectory = null
+    }
+  }
+
+  test("BlockManagerBasedBlockHandler - store blocks") {
+    withBlockManagerBasedBlockHandler { handler =>
+      testBlockStoring(handler) { case (data, blockIds, storeResults) =>
+        // Verify the data in block manager is correct
+        val storedData = blockIds.flatMap { blockId =>
+          blockManager.getLocal(blockId).map { _.data.map {_.toString}.toList }.getOrElse(List.empty)
+        }.toList
+        storedData shouldEqual data
+
+        // Verify that the store results are instances of BlockManagerBasedStoreResult
+        assert(
+          storeResults.forall { _.isInstanceOf[BlockManagerBasedStoreResult] },
+          "Unexpected store result type"
+        )
+      }
+    }
+  }
+
+  test("BlockManagerBasedBlockHandler - handle errors in storing block") {
+    withBlockManagerBasedBlockHandler { handler =>
+      testErrorHandling(handler)
+    }
+  }
+
+  test("WriteAheadLogBasedBlockHandler - store blocks") {
+    withWriteAheadLogBasedBlockHandler { handler =>
+      testBlockStoring(handler) { case (data, blockIds, storeResults) =>
+        // Verify the data in block manager is correct
+        val storedData = blockIds.flatMap { blockId =>
+          blockManager.getLocal(blockId).map { _.data.map {_.toString}.toList }.getOrElse(List.empty)
+        }.toList
+        storedData shouldEqual data
+
+        // Verify that the store results are instances of WriteAheadLogBasedStoreResult
+        assert(
+          storeResults.forall { _.isInstanceOf[WriteAheadLogBasedStoreResult] },
+          "Unexpected store result type"
+        )
+        // Verify the data in write ahead log files is correct
+        val fileSegments = storeResults.map { _.asInstanceOf[WriteAheadLogBasedStoreResult].segment}
+        val loggedData = fileSegments.flatMap { segment =>
+          val reader = new WriteAheadLogRandomReader(segment.path, hadoopConf)
+          val bytes = reader.read(segment)
+          reader.close()
+          blockManager.dataDeserialize(generateBlockId(), bytes).toList
+        }
+        loggedData shouldEqual data
+      }
+    }
+  }
+
+  test("WriteAheadLogBasedBlockHandler - handle errors in storing block") {
+    withWriteAheadLogBasedBlockHandler { handler =>
+      testErrorHandling(handler)
+    }
+  }
+
+  test("WriteAheadLogBasedBlockHandler - cleanup old blocks") {
+    withWriteAheadLogBasedBlockHandler { handler =>
+      val blocks = Seq.tabulate(10) { i => IteratorBlock(Iterator(1 to i)) }
+      storeBlocks(handler, blocks)
+
+      val preCleanupLogFiles = getWriteAheadLogFiles()
+      preCleanupLogFiles.size should be > 1
+
+      // this depends on the number of blocks inserted using generateAndStoreData()
+      manualClock.currentTime() shouldEqual 5000L
+
+      val cleanupThreshTime = 3000L
+      handler.cleanupOldBlock(cleanupThreshTime)
+      eventually(timeout(10000 millis), interval(10 millis)) {
+        getWriteAheadLogFiles().size should be < preCleanupLogFiles.size
+      }
+    }
+  }
+
+  /**
+   * Test storing of data using different forms of ReceivedBlocks and verify that they succeeded
+   * using the given verification function
+   */
+  private def testBlockStoring(receivedBlockHandler: ReceivedBlockHandler)
+      (verifyFunc: (Seq[String], Seq[StreamBlockId], Seq[ReceivedBlockStoreResult]) => Unit) {
+    val data = Seq.tabulate(100) { _.toString }
+
+    def storeAndVerify(blocks: Seq[ReceivedBlock]) {
+      blocks should not be empty
+      val (blockIds, storeResults) = storeBlocks(receivedBlockHandler, blocks)
+      withClue(s"Testing with ${blocks.head.getClass.getSimpleName}s:") {
+        // Verify returns store results have correct block ids
+        (storeResults.map { _.blockId }) shouldEqual blockIds
+
+        // Call handler-specific verification function
+        verifyFunc(data, blockIds, storeResults)
+      }
+    }
+
+    def dataToByteBuffer(b: Seq[String]) = blockManager.dataSerialize(generateBlockId, b.iterator)
+
+    val blocks = data.grouped(10).toSeq
+
+    storeAndVerify(blocks.map { b => IteratorBlock(b.toIterator) })
+    storeAndVerify(blocks.map { b => ArrayBufferBlock(new ArrayBuffer ++= b) })
+    storeAndVerify(blocks.map { b => ByteBufferBlock(dataToByteBuffer(b)) })
+  }
+
+  /** Test error handling when blocks that cannot be stored */
+  private def testErrorHandling(receivedBlockHandler: ReceivedBlockHandler) {
+    // Handle error in iterator (e.g. divide-by-zero error)
+    intercept[Exception] {
+      val iterator = (10 to (-10, -1)).toIterator.map { _ / 0 }
+      receivedBlockHandler.storeBlock(StreamBlockId(1, 1), IteratorBlock(iterator))
+    }
+
+    // Handler error in block manager storing (e.g. too big block)
+    intercept[SparkException] {
+      val byteBuffer = ByteBuffer.wrap(new Array[Byte](blockManagerSize + 1))
+      receivedBlockHandler.storeBlock(StreamBlockId(1, 1), ByteBufferBlock(byteBuffer))
+    }
+  }
+
+  /** Instantiate a BlockManagerBasedBlockHandler and run a code with it */
+  private def withBlockManagerBasedBlockHandler(body: BlockManagerBasedBlockHandler => Unit) {
+    body(new BlockManagerBasedBlockHandler(blockManager, storageLevel))
+  }
+
+  /** Instantiate a WriteAheadLogBasedBlockHandler and run a code with it */
+  private def withWriteAheadLogBasedBlockHandler(body: WriteAheadLogBasedBlockHandler => Unit) {
+    val receivedBlockHandler = new WriteAheadLogBasedBlockHandler(blockManager, 1,
+      storageLevel, conf, hadoopConf, tempDirectory.toString, manualClock)
+    try {
+      body(receivedBlockHandler)
+    } finally {
+      receivedBlockHandler.stop()
+    }
+  }
+
+  /** Store blocks using a handler */
+  private def storeBlocks(
+      receivedBlockHandler: ReceivedBlockHandler,
+      blocks: Seq[ReceivedBlock]
+    ): (Seq[StreamBlockId], Seq[ReceivedBlockStoreResult]) = {
+    val blockIds = Seq.fill(blocks.size)(generateBlockId())
+    val storeResults = blocks.zip(blockIds).map {
+      case (block, id) =>
+        manualClock.addToTime(500) // log rolling interval set to 1000 ms through SparkConf
+        logDebug("Inserting block " + id)
+        receivedBlockHandler.storeBlock(id, block)
+    }.toList
+    logDebug("Done inserting")
+    (blockIds, storeResults)
+  }
+
+  private def getWriteAheadLogFiles(): Seq[String] = {
+    getLogFilesInDirectory(checkpointDirToLogDir(tempDirectory.toString, streamId))
+  }
+
+  private def generateBlockId(): StreamBlockId = StreamBlockId(streamId, scala.util.Random.nextLong)
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
new file mode 100644
index 0000000000000..fd9c97f551c62
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
@@ -0,0 +1,242 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.File
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
+import scala.language.{implicitConversions, postfixOps}
+import scala.util.Random
+
+import com.google.common.io.Files
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.conf.Configuration
+import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
+import org.scalatest.concurrent.Eventually._
+
+import org.apache.spark.{Logging, SparkConf, SparkException}
+import org.apache.spark.storage.StreamBlockId
+import org.apache.spark.streaming.receiver.BlockManagerBasedStoreResult
+import org.apache.spark.streaming.scheduler._
+import org.apache.spark.streaming.util.{Clock, ManualClock, SystemClock, WriteAheadLogReader}
+import org.apache.spark.streaming.util.WriteAheadLogSuite._
+import org.apache.spark.util.Utils
+
+class ReceivedBlockTrackerSuite
+  extends FunSuite with BeforeAndAfter with Matchers with Logging {
+
+  val conf = new SparkConf().setMaster("local[2]").setAppName("ReceivedBlockTrackerSuite")
+  conf.set("spark.streaming.receivedBlockTracker.writeAheadLog.rotationIntervalSecs", "1")
+
+  val hadoopConf = new Configuration()
+  val akkaTimeout = 10 seconds
+  val streamId = 1
+
+  var allReceivedBlockTrackers = new ArrayBuffer[ReceivedBlockTracker]()
+  var checkpointDirectory: File = null
+
+  before {
+    checkpointDirectory = Files.createTempDir()
+  }
+
+  after {
+    allReceivedBlockTrackers.foreach { _.stop() }
+    if (checkpointDirectory != null && checkpointDirectory.exists()) {
+      FileUtils.deleteDirectory(checkpointDirectory)
+      checkpointDirectory = null
+    }
+  }
+
+  test("block addition, and block to batch allocation") {
+    val receivedBlockTracker = createTracker(enableCheckpoint = false)
+    receivedBlockTracker.getUnallocatedBlocks(streamId) shouldEqual Seq.empty
+
+    val blockInfos = generateBlockInfos()
+    blockInfos.map(receivedBlockTracker.addBlock)
+
+    // Verify added blocks are unallocated blocks
+    receivedBlockTracker.getUnallocatedBlocks(streamId) shouldEqual blockInfos
+
+    // Allocate the blocks to a batch and verify that all of them have been allocated
+    receivedBlockTracker.allocateBlocksToBatch(1)
+    receivedBlockTracker.getBlocksOfBatchAndStream(1, streamId) shouldEqual blockInfos
+    receivedBlockTracker.getUnallocatedBlocks(streamId) shouldBe empty
+
+    // Allocate no blocks to another batch
+    receivedBlockTracker.allocateBlocksToBatch(2)
+    receivedBlockTracker.getBlocksOfBatchAndStream(2, streamId) shouldBe empty
+
+    // Verify that batch 2 cannot be allocated again
+    intercept[SparkException] {
+      receivedBlockTracker.allocateBlocksToBatch(2)
+    }
+
+    // Verify that older batches cannot be allocated again
+    intercept[SparkException] {
+      receivedBlockTracker.allocateBlocksToBatch(1)
+    }
+  }
+
+  test("block addition, block to batch allocation and cleanup with write ahead log") {
+    val manualClock = new ManualClock
+    conf.getInt(
+      "spark.streaming.receivedBlockTracker.writeAheadLog.rotationIntervalSecs", -1) should be (1)
+
+    // Set the time increment level to twice the rotation interval so that every increment creates
+    // a new log file
+    val timeIncrementMillis = 2000L
+    def incrementTime() {
+      manualClock.addToTime(timeIncrementMillis)
+    }
+
+    // Generate and add blocks to the given tracker
+    def addBlockInfos(tracker: ReceivedBlockTracker): Seq[ReceivedBlockInfo] = {
+      val blockInfos = generateBlockInfos()
+      blockInfos.map(tracker.addBlock)
+      blockInfos
+    }
+
+    // Print the data present in the log ahead files in the log directory
+    def printLogFiles(message: String) {
+      val fileContents = getWriteAheadLogFiles().map { file =>
+        (s"\n>>>>> $file: <<<<<\n${getWrittenLogData(file).mkString("\n")}")
+      }.mkString("\n")
+      logInfo(s"\n\n=====================\n$message\n$fileContents\n=====================\n")
+    }
+
+    // Start tracker and add blocks
+    val tracker1 = createTracker(enableCheckpoint = true, clock = manualClock)
+    val blockInfos1 = addBlockInfos(tracker1)
+    tracker1.getUnallocatedBlocks(streamId).toList shouldEqual blockInfos1
+
+    // Verify whether write ahead log has correct contents
+    val expectedWrittenData1 = blockInfos1.map(BlockAdditionEvent)
+    getWrittenLogData() shouldEqual expectedWrittenData1
+    getWriteAheadLogFiles() should have size 1
+
+    // Restart tracker and verify recovered list of unallocated blocks
+    incrementTime()
+    val tracker2 = createTracker(enableCheckpoint = true, clock = manualClock)
+    tracker2.getUnallocatedBlocks(streamId).toList shouldEqual blockInfos1
+
+    // Allocate blocks to batch and verify whether the unallocated blocks got allocated
+    val batchTime1 = manualClock.currentTime
+    tracker2.allocateBlocksToBatch(batchTime1)
+    tracker2.getBlocksOfBatchAndStream(batchTime1, streamId) shouldEqual blockInfos1
+
+    // Add more blocks and allocate to another batch
+    incrementTime()
+    val batchTime2 = manualClock.currentTime
+    val blockInfos2 = addBlockInfos(tracker2)
+    tracker2.allocateBlocksToBatch(batchTime2)
+    tracker2.getBlocksOfBatchAndStream(batchTime2, streamId) shouldEqual blockInfos2
+
+    // Verify whether log has correct contents
+    val expectedWrittenData2 = expectedWrittenData1 ++
+      Seq(createBatchAllocation(batchTime1, blockInfos1)) ++
+      blockInfos2.map(BlockAdditionEvent) ++
+      Seq(createBatchAllocation(batchTime2, blockInfos2))
+    getWrittenLogData() shouldEqual expectedWrittenData2
+
+    // Restart tracker and verify recovered state
+    incrementTime()
+    val tracker3 = createTracker(enableCheckpoint = true, clock = manualClock)
+    tracker3.getBlocksOfBatchAndStream(batchTime1, streamId) shouldEqual blockInfos1
+    tracker3.getBlocksOfBatchAndStream(batchTime2, streamId) shouldEqual blockInfos2
+    tracker3.getUnallocatedBlocks(streamId) shouldBe empty
+
+    // Cleanup first batch but not second batch
+    val oldestLogFile = getWriteAheadLogFiles().head
+    incrementTime()
+    tracker3.cleanupOldBatches(batchTime2)
+
+    // Verify that the batch allocations have been cleaned, and the act has been written to log
+    tracker3.getBlocksOfBatchAndStream(batchTime1, streamId) shouldEqual Seq.empty
+    getWrittenLogData(getWriteAheadLogFiles().last) should contain(createBatchCleanup(batchTime1))
+
+    // Verify that at least one log file gets deleted
+    eventually(timeout(10 seconds), interval(10 millisecond)) {
+      getWriteAheadLogFiles() should not contain oldestLogFile
+    }
+    printLogFiles("After cleanup")
+
+    // Restart tracker and verify recovered state, specifically whether info about the first
+    // batch has been removed, but not the second batch
+    incrementTime()
+    val tracker4 = createTracker(enableCheckpoint = true, clock = manualClock)
+    tracker4.getUnallocatedBlocks(streamId) shouldBe empty
+    tracker4.getBlocksOfBatchAndStream(batchTime1, streamId) shouldBe empty  // should be cleaned
+    tracker4.getBlocksOfBatchAndStream(batchTime2, streamId) shouldEqual blockInfos2
+  }
+
+  /**
+   * Create tracker object with the optional provided clock. Use fake clock if you
+   * want to control time by manually incrementing it to test log cleanup.
+   */
+  def createTracker(enableCheckpoint: Boolean, clock: Clock = new SystemClock): ReceivedBlockTracker = {
+    val cpDirOption = if (enableCheckpoint) Some(checkpointDirectory.toString) else None
+    val tracker = new ReceivedBlockTracker(conf, hadoopConf, Seq(streamId), clock, cpDirOption)
+    allReceivedBlockTrackers += tracker
+    tracker
+  }
+
+  /** Generate blocks infos using random ids */
+  def generateBlockInfos(): Seq[ReceivedBlockInfo] = {
+    List.fill(5)(ReceivedBlockInfo(streamId, 0,
+      BlockManagerBasedStoreResult(StreamBlockId(streamId, math.abs(Random.nextInt)))))
+  }
+
+  /** Get all the data written in the given write ahead log file. */
+  def getWrittenLogData(logFile: String): Seq[ReceivedBlockTrackerLogEvent] = {
+    getWrittenLogData(Seq(logFile))
+  }
+
+  /**
+   * Get all the data written in the given write ahead log files. By default, it will read all
+   * files in the test log directory.
+   */
+  def getWrittenLogData(logFiles: Seq[String] = getWriteAheadLogFiles): Seq[ReceivedBlockTrackerLogEvent] = {
+    logFiles.flatMap {
+      file => new WriteAheadLogReader(file, hadoopConf).toSeq
+    }.map { byteBuffer =>
+      Utils.deserialize[ReceivedBlockTrackerLogEvent](byteBuffer.array)
+    }.toList
+  }
+
+  /** Get all the write ahead log files in the test directory */
+  def getWriteAheadLogFiles(): Seq[String] = {
+    import ReceivedBlockTracker._
+    val logDir = checkpointDirToLogDir(checkpointDirectory.toString)
+    getLogFilesInDirectory(logDir).map { _.toString }
+  }
+
+  /** Create batch allocation object from the given info */
+  def createBatchAllocation(time: Long, blockInfos: Seq[ReceivedBlockInfo]): BatchAllocationEvent = {
+    BatchAllocationEvent(time, AllocatedBlocks(Map((streamId -> blockInfos))))
+  }
+
+  /** Create batch cleanup object from the given info */
+  def createBatchCleanup(time: Long, moreTimes: Long*): BatchCleanupEvent = {
+    BatchCleanupEvent((Seq(time) ++ moreTimes).map(Time.apply))
+  }
+
+  implicit def millisToTime(milliseconds: Long): Time = Time(milliseconds)
+
+  implicit def timeToMillis(time: Time): Long = time.milliseconds
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
new file mode 100644
index 0000000000000..e26c0c6859e57
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.nio.ByteBuffer
+import java.util.concurrent.Semaphore
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.SparkConf
+import org.apache.spark.storage.{StorageLevel, StreamBlockId}
+import org.apache.spark.streaming.receiver.{BlockGenerator, BlockGeneratorListener, Receiver, ReceiverSupervisor}
+import org.scalatest.FunSuite
+import org.scalatest.concurrent.Timeouts
+import org.scalatest.concurrent.Eventually._
+import org.scalatest.time.SpanSugar._
+
+/** Testsuite for testing the network receiver behavior */
+class ReceiverSuite extends FunSuite with Timeouts {
+
+  test("receiver life cycle") {
+
+    val receiver = new FakeReceiver
+    val executor = new FakeReceiverSupervisor(receiver)
+    val executorStarted = new Semaphore(0)
+
+    assert(executor.isAllEmpty)
+
+    // Thread that runs the executor
+    val executingThread = new Thread() {
+      override def run() {
+        executor.start()
+        executorStarted.release(1)
+        executor.awaitTermination()
+      }
+    }
+
+    // Start the receiver
+    executingThread.start()
+
+    // Verify that the receiver
+    intercept[Exception] {
+      failAfter(200 millis) {
+        executingThread.join()
+      }
+    }
+
+    // Ensure executor is started
+    executorStarted.acquire()
+
+    // Verify that receiver was started
+    assert(receiver.onStartCalled)
+    assert(executor.isReceiverStarted)
+    assert(receiver.isStarted)
+    assert(!receiver.isStopped())
+    assert(receiver.otherThread.isAlive)
+    eventually(timeout(100 millis), interval(10 millis)) {
+      assert(receiver.receiving)
+    }
+
+    // Verify whether the data stored by the receiver was sent to the executor
+    val byteBuffer = ByteBuffer.allocate(100)
+    val arrayBuffer = new ArrayBuffer[Int]()
+    val iterator = arrayBuffer.iterator
+    receiver.store(1)
+    receiver.store(byteBuffer)
+    receiver.store(arrayBuffer)
+    receiver.store(iterator)
+    assert(executor.singles.size === 1)
+    assert(executor.singles.head === 1)
+    assert(executor.byteBuffers.size === 1)
+    assert(executor.byteBuffers.head.eq(byteBuffer))
+    assert(executor.iterators.size === 1)
+    assert(executor.iterators.head.eq(iterator))
+    assert(executor.arrayBuffers.size === 1)
+    assert(executor.arrayBuffers.head.eq(arrayBuffer))
+
+    // Verify whether the exceptions reported by the receiver was sent to the executor
+    val exception = new Exception
+    receiver.reportError("Error", exception)
+    assert(executor.errors.size === 1)
+    assert(executor.errors.head.eq(exception))
+
+    // Verify restarting actually stops and starts the receiver
+    receiver.restart("restarting", null, 100)
+    eventually(timeout(50 millis), interval(10 millis)) {
+      // receiver will be stopped async
+      assert(receiver.isStopped)
+      assert(receiver.onStopCalled)
+    }
+    eventually(timeout(1000 millis), interval(100 millis)) {
+      // receiver will be started async
+      assert(receiver.onStartCalled)
+      assert(executor.isReceiverStarted)
+      assert(receiver.isStarted)
+      assert(!receiver.isStopped)
+      assert(receiver.receiving)
+    }
+
+    // Verify that stopping actually stops the thread
+    failAfter(100 millis) {
+      receiver.stop("test")
+      assert(receiver.isStopped)
+      assert(!receiver.otherThread.isAlive)
+
+      // The thread that started the executor should complete
+      // as stop() stops everything
+      executingThread.join()
+    }
+  }
+
+  test("block generator") {
+    val blockGeneratorListener = new FakeBlockGeneratorListener
+    val blockInterval = 200
+    val conf = new SparkConf().set("spark.streaming.blockInterval", blockInterval.toString)
+    val blockGenerator = new BlockGenerator(blockGeneratorListener, 1, conf)
+    val expectedBlocks = 5
+    val waitTime = expectedBlocks * blockInterval + (blockInterval / 2)
+    val generatedData = new ArrayBuffer[Int]
+
+    // Generate blocks
+    val startTime = System.currentTimeMillis()
+    blockGenerator.start()
+    var count = 0
+    while(System.currentTimeMillis - startTime < waitTime) {
+      blockGenerator.addData(count)
+      generatedData += count
+      count += 1
+      Thread.sleep(10)
+    }
+    blockGenerator.stop()
+
+    val recordedData = blockGeneratorListener.arrayBuffers.flatten
+    assert(blockGeneratorListener.arrayBuffers.size > 0)
+    assert(recordedData.toSet === generatedData.toSet)
+  }
+
+  test("block generator throttling") {
+    val blockGeneratorListener = new FakeBlockGeneratorListener
+    val blockInterval = 100
+    val maxRate = 100
+    val conf = new SparkConf().set("spark.streaming.blockInterval", blockInterval.toString).
+      set("spark.streaming.receiver.maxRate", maxRate.toString)
+    val blockGenerator = new BlockGenerator(blockGeneratorListener, 1, conf)
+    val expectedBlocks = 20
+    val waitTime = expectedBlocks * blockInterval
+    val expectedMessages = maxRate * waitTime / 1000
+    val expectedMessagesPerBlock = maxRate * blockInterval / 1000
+    val generatedData = new ArrayBuffer[Int]
+
+    // Generate blocks
+    val startTime = System.currentTimeMillis()
+    blockGenerator.start()
+    var count = 0
+    while(System.currentTimeMillis - startTime < waitTime) {
+      blockGenerator.addData(count)
+      generatedData += count
+      count += 1
+      Thread.sleep(1)
+    }
+    blockGenerator.stop()
+
+    val recordedBlocks = blockGeneratorListener.arrayBuffers
+    val recordedData = recordedBlocks.flatten
+    assert(blockGeneratorListener.arrayBuffers.size > 0, "No blocks received")
+    assert(recordedData.toSet === generatedData.toSet, "Received data not same")
+
+    // recordedData size should be close to the expected rate
+    val minExpectedMessages = expectedMessages - 3
+    val maxExpectedMessages = expectedMessages + 1
+    val numMessages = recordedData.size
+    assert(
+      numMessages >= minExpectedMessages && numMessages <= maxExpectedMessages,
+      s"#records received = $numMessages, not between $minExpectedMessages and $maxExpectedMessages"
+    )
+
+    val minExpectedMessagesPerBlock = expectedMessagesPerBlock - 3
+    val maxExpectedMessagesPerBlock = expectedMessagesPerBlock + 1
+    val receivedBlockSizes = recordedBlocks.map { _.size }.mkString(",")
+    println(minExpectedMessagesPerBlock, maxExpectedMessagesPerBlock, ":", receivedBlockSizes)
+    assert(
+      // the first and last block may be incomplete, so we slice them out
+      recordedBlocks.drop(1).dropRight(1).forall { block =>
+        block.size >= minExpectedMessagesPerBlock && block.size <= maxExpectedMessagesPerBlock
+      },
+      s"# records in received blocks = [$receivedBlockSizes], not between " +
+        s"$minExpectedMessagesPerBlock and $maxExpectedMessagesPerBlock"
+    )
+  }
+
+
+  /**
+   * An implementation of NetworkReceiver that is used for testing a receiver's life cycle.
+   */
+  class FakeReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) {
+    @volatile var otherThread: Thread = null
+    @volatile var receiving = false
+    @volatile var onStartCalled = false
+    @volatile var onStopCalled = false
+
+    def onStart() {
+      otherThread = new Thread() {
+        override def run() {
+          receiving = true
+          while(!isStopped()) {
+            Thread.sleep(10)
+          }
+        }
+      }
+      onStartCalled = true
+      otherThread.start()
+
+    }
+
+    def onStop() {
+      onStopCalled = true
+      otherThread.join()
+    }
+
+    def reset() {
+      receiving = false
+      onStartCalled = false
+      onStopCalled = false
+    }
+  }
+
+  /**
+   * An implementation of NetworkReceiverExecutor used for testing a NetworkReceiver.
+   * Instead of storing the data in the BlockManager, it stores all the data in a local buffer
+   * that can used for verifying that the data has been forwarded correctly.
+   */
+  class FakeReceiverSupervisor(receiver: FakeReceiver)
+    extends ReceiverSupervisor(receiver, new SparkConf()) {
+    val singles = new ArrayBuffer[Any]
+    val byteBuffers = new ArrayBuffer[ByteBuffer]
+    val iterators = new ArrayBuffer[Iterator[_]]
+    val arrayBuffers = new ArrayBuffer[ArrayBuffer[_]]
+    val errors = new ArrayBuffer[Throwable]
+
+    /** Check if all data structures are clean */
+    def isAllEmpty = {
+      singles.isEmpty && byteBuffers.isEmpty && iterators.isEmpty &&
+        arrayBuffers.isEmpty && errors.isEmpty
+    }
+
+    def pushSingle(data: Any) {
+      singles += data
+    }
+
+    def pushBytes(
+        bytes: ByteBuffer,
+        optionalMetadata: Option[Any],
+        optionalBlockId: Option[StreamBlockId]
+      ) {
+      byteBuffers += bytes
+    }
+
+    def pushIterator(
+        iterator: Iterator[_],
+        optionalMetadata: Option[Any],
+        optionalBlockId: Option[StreamBlockId]
+      ) {
+      iterators += iterator
+    }
+
+    def pushArrayBuffer(
+        arrayBuffer: ArrayBuffer[_],
+        optionalMetadata: Option[Any],
+        optionalBlockId: Option[StreamBlockId]
+      ) {
+      arrayBuffers +=  arrayBuffer
+    }
+
+    def reportError(message: String, throwable: Throwable) {
+      errors += throwable
+    }
+  }
+
+  /**
+   * An implementation of BlockGeneratorListener that is used to test the BlockGenerator.
+   */
+  class FakeBlockGeneratorListener(pushDelay: Long = 0) extends BlockGeneratorListener {
+    // buffer of data received as ArrayBuffers
+    val arrayBuffers = new ArrayBuffer[ArrayBuffer[Int]]
+    val errors = new ArrayBuffer[Throwable]
+
+    def onAddData(data: Any, metadata: Any) { }
+
+    def onGenerateBlock(blockId: StreamBlockId) { }
+
+    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]) {
+      val bufferOfInts = arrayBuffer.map(_.asInstanceOf[Int])
+      arrayBuffers += bufferOfInts
+      Thread.sleep(0)
+    }
+
+    def onError(message: String, throwable: Throwable) {
+      errors += throwable
+    }
+  }
+}
+
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 7b33d3b235466..4b49c4d251645 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -19,17 +19,18 @@ package org.apache.spark.streaming
 
 import java.util.concurrent.atomic.AtomicInteger
 
+import org.scalatest.{Assertions, BeforeAndAfter, FunSuite}
+import org.scalatest.concurrent.Timeouts
+import org.scalatest.concurrent.Eventually._
+import org.scalatest.exceptions.TestFailedDueToTimeoutException
+import org.scalatest.time.SpanSugar._
+
 import org.apache.spark.{Logging, SparkConf, SparkContext, SparkException}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.streaming.receiver.Receiver
-import org.apache.spark.util.{MetadataCleaner, Utils}
-import org.scalatest.{BeforeAndAfter, FunSuite}
-import org.scalatest.concurrent.Timeouts
-import org.scalatest.exceptions.TestFailedDueToTimeoutException
-import org.scalatest.time.SpanSugar._
+import org.apache.spark.util.Utils
 
-import scala.language.postfixOps
 
 class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts with Logging {
 
@@ -67,7 +68,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
   test("from no conf + spark home + env") {
     ssc = new StreamingContext(master, appName, batchDuration,
       sparkHome, Nil, Map(envPair))
-    assert(ssc.conf.getExecutorEnv.exists(_ == envPair))
+    assert(ssc.conf.getExecutorEnv.contains(envPair))
   }
 
   test("from conf with settings") {
@@ -93,7 +94,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     val myConf = SparkContext.updatedConf(new SparkConf(false), master, appName)
     myConf.set("spark.cleaner.ttl", "10")
     val ssc1 = new StreamingContext(myConf, batchDuration)
-    addInputStream(ssc1).register
+    addInputStream(ssc1).register()
     ssc1.start()
     val cp = new Checkpoint(ssc1, Time(1000))
     assert(cp.sparkConfPairs.toMap.getOrElse("spark.cleaner.ttl", "-1") === "10")
@@ -106,7 +107,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
 
   test("start and stop state check") {
     ssc = new StreamingContext(master, appName, batchDuration)
-    addInputStream(ssc).register
+    addInputStream(ssc).register()
 
     assert(ssc.state === ssc.StreamingContextState.Initialized)
     ssc.start()
@@ -117,7 +118,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
 
   test("start multiple times") {
     ssc = new StreamingContext(master, appName, batchDuration)
-    addInputStream(ssc).register
+    addInputStream(ssc).register()
     ssc.start()
     intercept[SparkException] {
       ssc.start()
@@ -126,17 +127,22 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
 
   test("stop multiple times") {
     ssc = new StreamingContext(master, appName, batchDuration)
-    addInputStream(ssc).register
+    addInputStream(ssc).register()
     ssc.start()
     ssc.stop()
     ssc.stop()
   }
 
-  test("stop before start and start after stop") {
+  test("stop before start") {
     ssc = new StreamingContext(master, appName, batchDuration)
-    addInputStream(ssc).register
+    addInputStream(ssc).register()
     ssc.stop()  // stop before start should not throw exception
-    ssc.start()
+  }
+
+  test("start after stop") {
+    // Regression test for SPARK-4301
+    ssc = new StreamingContext(master, appName, batchDuration)
+    addInputStream(ssc).register()
     ssc.stop()
     intercept[SparkException] {
       ssc.start() // start after stop should throw exception
@@ -146,16 +152,28 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
   test("stop only streaming context") {
     ssc = new StreamingContext(master, appName, batchDuration)
     sc = ssc.sparkContext
-    addInputStream(ssc).register
+    addInputStream(ssc).register()
     ssc.start()
-    ssc.stop(false)
+    ssc.stop(stopSparkContext = false)
     assert(sc.makeRDD(1 to 100).collect().size === 100)
     ssc = new StreamingContext(sc, batchDuration)
-    addInputStream(ssc).register
+    addInputStream(ssc).register()
     ssc.start()
     ssc.stop()
   }
 
+  test("stop(stopSparkContext=true) after stop(stopSparkContext=false)") {
+    ssc = new StreamingContext(master, appName, batchDuration)
+    addInputStream(ssc).register()
+    ssc.stop(stopSparkContext = false)
+    assert(ssc.sc.makeRDD(1 to 100).collect().size === 100)
+    ssc.stop(stopSparkContext = true)
+    // Check that the SparkContext is actually stopped:
+    intercept[Exception] {
+      ssc.sc.makeRDD(1 to 100).collect()
+    }
+  }
+
   test("stop gracefully") {
     val conf = new SparkConf().setMaster(master).setAppName(appName)
     conf.set("spark.cleaner.ttl", "3600")
@@ -166,11 +184,11 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
       var runningCount = 0
       TestReceiver.counter.set(1)
       val input = ssc.receiverStream(new TestReceiver)
-      input.count.foreachRDD(rdd => {
+      input.count().foreachRDD { rdd =>
         val count = rdd.first()
         runningCount += count.toInt
         logInfo("Count = " + count + ", Running count = " + runningCount)
-      })
+      }
       ssc.start()
       ssc.awaitTermination(500)
       ssc.stop(stopSparkContext = false, stopGracefully = true)
@@ -190,7 +208,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
   test("awaitTermination") {
     ssc = new StreamingContext(master, appName, batchDuration)
     val inputStream = addInputStream(ssc)
-    inputStream.map(x => x).register
+    inputStream.map(x => x).register()
 
     // test whether start() blocks indefinitely or not
     failAfter(2000 millis) {
@@ -214,7 +232,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     // test whether wait exits if context is stopped
     failAfter(10000 millis) { // 10 seconds because spark takes a long time to shutdown
       new Thread() {
-        override def run {
+        override def run() {
           Thread.sleep(500)
           ssc.stop()
         }
@@ -238,8 +256,9 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
   test("awaitTermination with error in task") {
     ssc = new StreamingContext(master, appName, batchDuration)
     val inputStream = addInputStream(ssc)
-    inputStream.map(x => { throw new TestException("error in map task"); x})
-               .foreachRDD(_.count)
+    inputStream
+      .map { x => throw new TestException("error in map task"); x }
+      .foreachRDD(_.count())
 
     val exception = intercept[Exception] {
       ssc.start()
@@ -251,7 +270,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
   test("awaitTermination with error in job generation") {
     ssc = new StreamingContext(master, appName, batchDuration)
     val inputStream = addInputStream(ssc)
-    inputStream.transform(rdd => { throw new TestException("error in transform"); rdd }).register
+    inputStream.transform { rdd => throw new TestException("error in transform"); rdd }.register()
     val exception = intercept[TestException] {
       ssc.start()
       ssc.awaitTermination(5000)
@@ -259,8 +278,12 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     assert(exception.getMessage.contains("transform"), "Expected exception not thrown")
   }
 
+  test("DStream and generated RDD creation sites") {
+    testPackage.test()
+  }
+
   def addInputStream(s: StreamingContext): DStream[Int] = {
-    val input = (1 to 100).map(i => (1 to i))
+    val input = (1 to 100).map(i => 1 to i)
     val inputStream = new TestInputStream(s, input, 1)
     inputStream
   }
@@ -295,3 +318,37 @@ class TestReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) with Logging
 object TestReceiver {
   val counter = new AtomicInteger(1)
 }
+
+/** Streaming application for testing DStream and RDD creation sites */
+package object testPackage extends Assertions {
+  def test() {
+    val conf = new SparkConf().setMaster("local").setAppName("CreationSite test")
+    val ssc = new StreamingContext(conf , Milliseconds(100))
+    try {
+      val inputStream = ssc.receiverStream(new TestReceiver)
+
+      // Verify creation site of DStream
+      val creationSite = inputStream.creationSite
+      assert(creationSite.shortForm.contains("receiverStream") &&
+        creationSite.shortForm.contains("StreamingContextSuite")
+      )
+      assert(creationSite.longForm.contains("testPackage"))
+
+      // Verify creation site of generated RDDs
+      var rddGenerated = false
+      var rddCreationSiteCorrect = true
+
+      inputStream.foreachRDD { rdd =>
+        rddCreationSiteCorrect = rdd.creationSite == creationSite
+        rddGenerated = true
+      }
+      ssc.start()
+
+      eventually(timeout(10000 millis), interval(10 millis)) {
+        assert(rddGenerated && rddCreationSiteCorrect, "RDD creation site was not correct")
+      }
+    } finally {
+      ssc.stop()
+    }
+  }
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
index 2861f5335ae36..84fed95a75e67 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.streaming
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.Future
 import scala.concurrent.ExecutionContext.Implicits.global
-import scala.language.postfixOps
 
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream.DStream
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index cc178fba12c9d..2154c24abda3a 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -17,19 +17,19 @@
 
 package org.apache.spark.streaming
 
-import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
-import org.apache.spark.streaming.util.ManualClock
+import java.io.{ObjectInputStream, IOException}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.SynchronizedBuffer
 import scala.reflect.ClassTag
 
-import java.io.{ObjectInputStream, IOException}
-
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
-import org.apache.spark.{SparkContext, SparkConf, Logging}
+import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
 
 /**
  * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
@@ -73,7 +73,7 @@ class TestOutputStream[T: ClassTag](parent: DStream[T],
 
   // This is to clear the output buffer every it is read from a checkpoint
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     ois.defaultReadObject()
     output.clear()
   }
@@ -95,7 +95,7 @@ class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
 
   // This is to clear the output buffer every it is read from a checkpoint
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     ois.defaultReadObject()
     output.clear()
   }
@@ -119,7 +119,11 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
   def batchDuration = Seconds(1)
 
   // Directory where the checkpoint data will be saved
-  def checkpointDir = "checkpoint"
+  lazy val checkpointDir = {
+    val dir = Utils.createTempDir()
+    logDebug(s"checkpointDir: $dir")
+    dir.toString
+  }
 
   // Number of partitions of the input parallel collections created for testing
   def numInputPartitions = 2
@@ -242,7 +246,9 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
     logInfo("numBatches = " + numBatches + ", numExpectedOutput = " + numExpectedOutput)
 
     // Get the output buffer
-    val outputStream = ssc.graph.getOutputStreams.head.asInstanceOf[TestOutputStreamWithPartitions[V]]
+    val outputStream = ssc.graph.getOutputStreams.
+      filter(_.isInstanceOf[TestOutputStreamWithPartitions[_]]).
+      head.asInstanceOf[TestOutputStreamWithPartitions[V]]
     val output = outputStream.output
 
     try {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala
new file mode 100644
index 0000000000000..5579ac364346c
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+class TimeSuite extends TestSuiteBase {
+
+  test("less") {
+    assert(new Time(999) < new Time(1000))
+    assert(new Time(0) < new Time(1))
+    assert(!(new Time(1000) < new Time(999)))
+    assert(!(new Time(1000) < new Time(1000)))
+  }
+
+  test("lessEq") {
+    assert(new Time(999) <= new Time(1000))
+    assert(new Time(0) <= new Time(1))
+    assert(!(new Time(1000) <= new Time(999)))
+    assert(new Time(1000) <= new Time(1000))
+  }
+
+  test("greater") {
+    assert(!(new Time(999) > new Time(1000)))
+    assert(!(new Time(0) > new Time(1)))
+    assert(new Time(1000) > new Time(999))
+    assert(!(new Time(1000) > new Time(1000)))
+  }
+
+  test("greaterEq") {
+    assert(!(new Time(999) >= new Time(1000)))
+    assert(!(new Time(0) >= new Time(1)))
+    assert(new Time(1000) >= new Time(999))
+    assert(new Time(1000) >= new Time(1000))
+  }
+
+  test("plus") {
+    assert((new Time(1000) + new Duration(100)) == new Time(1100))
+    assert((new Time(1000) + new Duration(0)) == new Time(1000))
+  }
+
+  test("minus Time") {
+    assert((new Time(1000) - new Time(100)) == new Duration(900))
+    assert((new Time(1000) - new Time(0)) == new Duration(1000))
+    assert((new Time(1000) - new Time(1000)) == new Duration(0))
+  }
+
+  test("minus Duration") {
+    assert((new Time(1000) - new Duration(100)) == new Time(900))
+    assert((new Time(1000) - new Duration(0)) == new Time(1000))
+    assert((new Time(1000) - new Duration(1000)) == new Time(0))
+  }
+
+  test("floor") {
+    assert(new Time(1350).floor(new Duration(200)) == new Time(1200))
+    assert(new Time(1200).floor(new Duration(200)) == new Time(1200))
+    assert(new Time(199).floor(new Duration(200)) == new Time(0))
+    assert(new Time(1).floor(new Duration(1)) == new Time(1))
+  }
+
+  test("isMultipleOf") {
+    assert(new Time(1000).isMultipleOf(new Duration(5)))
+    assert(new Time(1000).isMultipleOf(new Duration(1000)))
+    assert(new Time(1000).isMultipleOf(new Duration(1)))
+    assert(!new Time(1000).isMultipleOf(new Duration(6)))
+  }
+
+  test("min") {
+    assert(new Time(999).min(new Time(1000)) == new Time(999))
+    assert(new Time(1000).min(new Time(999)) == new Time(999))
+    assert(new Time(1000).min(new Time(1000)) == new Time(1000))
+  }
+
+  test("max") {
+    assert(new Time(999).max(new Time(1000)) == new Time(1000))
+    assert(new Time(1000).max(new Time(999)) == new Time(1000))
+    assert(new Time(1000).max(new Time(1000)) == new Time(1000))
+  }
+
+  test("until") {
+    assert(new Time(1000).until(new Time(1100), new Duration(100)) ==
+           Seq(Time(1000)))
+    assert(new Time(1000).until(new Time(1000), new Duration(100)) ==
+           Seq())
+    assert(new Time(1000).until(new Time(1100), new Duration(30)) ==
+           Seq(Time(1000), Time(1030), Time(1060), Time(1090)))
+  }
+
+  test("to") {
+    assert(new Time(1000).to(new Time(1100), new Duration(100)) ==
+           Seq(Time(1000), Time(1100)))
+    assert(new Time(1000).to(new Time(1000), new Duration(100)) ==
+           Seq(Time(1000)))
+    assert(new Time(1000).to(new Time(1100), new Duration(30)) ==
+           Seq(Time(1000), Time(1030), Time(1060), Time(1090)))
+  }
+
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
index 2a0db7564915d..8e30118266855 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
@@ -18,19 +18,27 @@
 package org.apache.spark.streaming
 
 import scala.io.Source
-import scala.language.postfixOps
 
 import org.scalatest.FunSuite
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.SparkConf
+
 class UISuite extends FunSuite {
 
   // Ignored: See SPARK-1530
   ignore("streaming tab in spark UI") {
-    val ssc = new StreamingContext("local", "test", Seconds(1))
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test")
+      .set("spark.ui.enabled", "true")
+    val ssc = new StreamingContext(conf, Seconds(1))
+    assert(ssc.sc.ui.isDefined, "Spark UI is not started!")
+    val ui = ssc.sc.ui.get
+
     eventually(timeout(10 seconds), interval(50 milliseconds)) {
-      val html = Source.fromURL(ssc.sparkContext.ui.appUIAddress).mkString
+      val html = Source.fromURL(ui.appUIAddress).mkString
       assert(!html.contains("random data that should not be present"))
       // test if streaming tab exist
       assert(html.toLowerCase.contains("streaming"))
@@ -39,8 +47,7 @@ class UISuite extends FunSuite {
     }
 
     eventually(timeout(10 seconds), interval(50 milliseconds)) {
-      val html = Source.fromURL(
-        ssc.sparkContext.ui.appUIAddress.stripSuffix("/") + "/streaming").mkString
+      val html = Source.fromURL(ui.appUIAddress.stripSuffix("/") + "/streaming").mkString
       assert(html.toLowerCase.contains("batch"))
       assert(html.toLowerCase.contains("network"))
     }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
new file mode 100644
index 0000000000000..d2b983c4b4d1a
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.rdd
+
+import java.io.File
+
+import scala.util.Random
+
+import com.google.common.io.Files
+import org.apache.hadoop.conf.Configuration
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.storage.{BlockId, BlockManager, StorageLevel, StreamBlockId}
+import org.apache.spark.streaming.util.{WriteAheadLogFileSegment, WriteAheadLogWriter}
+
+class WriteAheadLogBackedBlockRDDSuite extends FunSuite with BeforeAndAfterAll {
+  val conf = new SparkConf()
+    .setMaster("local[2]")
+    .setAppName(this.getClass.getSimpleName)
+  val hadoopConf = new Configuration()
+
+  var sparkContext: SparkContext = null
+  var blockManager: BlockManager = null
+  var dir: File = null
+
+  override def beforeAll(): Unit = {
+    sparkContext = new SparkContext(conf)
+    blockManager = sparkContext.env.blockManager
+    dir = Files.createTempDir()
+  }
+
+  override def afterAll(): Unit = {
+    // Copied from LocalSparkContext, simpler than to introduced test dependencies to core tests.
+    sparkContext.stop()
+    dir.delete()
+    System.clearProperty("spark.driver.port")
+  }
+
+  test("Read data available in block manager and write ahead log") {
+    testRDD(5, 5)
+  }
+
+  test("Read data available only in block manager, not in write ahead log") {
+    testRDD(5, 0)
+  }
+
+  test("Read data available only in write ahead log, not in block manager") {
+    testRDD(0, 5)
+  }
+
+  test("Read data available only in write ahead log, and test storing in block manager") {
+    testRDD(0, 5, testStoreInBM = true)
+  }
+
+  test("Read data with partially available in block manager, and rest in write ahead log") {
+    testRDD(3, 2)
+  }
+
+  /**
+   * Test the WriteAheadLogBackedRDD, by writing some partitions of the data to block manager
+   * and the rest to a write ahead log, and then reading reading it all back using the RDD.
+   * It can also test if the partitions that were read from the log were again stored in
+   * block manager.
+   * @param numPartitionsInBM Number of partitions to write to the Block Manager
+   * @param numPartitionsInWAL Number of partitions to write to the Write Ahead Log
+   * @param testStoreInBM Test whether blocks read from log are stored back into block manager
+   */
+  private def testRDD(numPartitionsInBM: Int, numPartitionsInWAL: Int, testStoreInBM: Boolean = false) {
+    val numBlocks = numPartitionsInBM + numPartitionsInWAL
+    val data = Seq.fill(numBlocks, 10)(scala.util.Random.nextString(50))
+
+    // Put the necessary blocks in the block manager
+    val blockIds = Array.fill(numBlocks)(StreamBlockId(Random.nextInt(), Random.nextInt()))
+    data.zip(blockIds).take(numPartitionsInBM).foreach { case(block, blockId) =>
+      blockManager.putIterator(blockId, block.iterator, StorageLevel.MEMORY_ONLY_SER)
+    }
+
+    // Generate write ahead log segments
+    val segments = generateFakeSegments(numPartitionsInBM) ++
+      writeLogSegments(data.takeRight(numPartitionsInWAL), blockIds.takeRight(numPartitionsInWAL))
+
+    // Make sure that the left `numPartitionsInBM` blocks are in block manager, and others are not
+    require(
+      blockIds.take(numPartitionsInBM).forall(blockManager.get(_).nonEmpty),
+      "Expected blocks not in BlockManager"
+    )
+    require(
+      blockIds.takeRight(numPartitionsInWAL).forall(blockManager.get(_).isEmpty),
+      "Unexpected blocks in BlockManager"
+    )
+
+    // Make sure that the right `numPartitionsInWAL` blocks are in write ahead logs, and other are not
+    require(
+      segments.takeRight(numPartitionsInWAL).forall(s =>
+        new File(s.path.stripPrefix("file://")).exists()),
+      "Expected blocks not in write ahead log"
+    )
+    require(
+      segments.take(numPartitionsInBM).forall(s =>
+        !new File(s.path.stripPrefix("file://")).exists()),
+      "Unexpected blocks in write ahead log"
+    )
+
+    // Create the RDD and verify whether the returned data is correct
+    val rdd = new WriteAheadLogBackedBlockRDD[String](sparkContext, blockIds.toArray,
+      segments.toArray, storeInBlockManager = false, StorageLevel.MEMORY_ONLY)
+    assert(rdd.collect() === data.flatten)
+
+    if (testStoreInBM) {
+      val rdd2 = new WriteAheadLogBackedBlockRDD[String](sparkContext, blockIds.toArray,
+        segments.toArray, storeInBlockManager = true, StorageLevel.MEMORY_ONLY)
+      assert(rdd2.collect() === data.flatten)
+      assert(
+        blockIds.forall(blockManager.get(_).nonEmpty),
+        "All blocks not found in block manager"
+      )
+    }
+  }
+
+  private def writeLogSegments(
+      blockData: Seq[Seq[String]],
+      blockIds: Seq[BlockId]
+    ): Seq[WriteAheadLogFileSegment] = {
+    require(blockData.size === blockIds.size)
+    val writer = new WriteAheadLogWriter(new File(dir, Random.nextString(10)).toString, hadoopConf)
+    val segments = blockData.zip(blockIds).map { case (data, id) =>
+      writer.write(blockManager.dataSerialize(id, data.iterator))
+    }
+    writer.close()
+    segments
+  }
+
+  private def generateFakeSegments(count: Int): Seq[WriteAheadLogFileSegment] = {
+    Array.fill(count)(new WriteAheadLogFileSegment("random", 0l, 0))
+  }
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
new file mode 100644
index 0000000000000..1956a4f1db90a
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import java.io._
+import java.nio.ByteBuffer
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
+import scala.language.{implicitConversions, postfixOps}
+import scala.util.Random
+
+import WriteAheadLogSuite._
+import com.google.common.io.Files
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.spark.util.Utils
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.scalatest.concurrent.Eventually._
+
+class WriteAheadLogSuite extends FunSuite with BeforeAndAfter {
+
+  val hadoopConf = new Configuration()
+  var tempDir: File = null
+  var testDir: String = null
+  var testFile: String = null
+  var manager: WriteAheadLogManager = null
+
+  before {
+    tempDir = Files.createTempDir()
+    testDir = tempDir.toString
+    testFile = new File(tempDir, Random.nextString(10)).toString
+    if (manager != null) {
+      manager.stop()
+      manager = null
+    }
+  }
+
+  after {
+    FileUtils.deleteQuietly(tempDir)
+  }
+
+  test("WriteAheadLogWriter - writing data") {
+    val dataToWrite = generateRandomData()
+    val segments = writeDataUsingWriter(testFile, dataToWrite)
+    val writtenData = readDataManually(segments)
+    assert(writtenData === dataToWrite)
+  }
+
+  test("WriteAheadLogWriter - syncing of data by writing and reading immediately") {
+    val dataToWrite = generateRandomData()
+    val writer = new WriteAheadLogWriter(testFile, hadoopConf)
+    dataToWrite.foreach { data =>
+      val segment = writer.write(stringToByteBuffer(data))
+      val dataRead = readDataManually(Seq(segment)).head
+      assert(data === dataRead)
+    }
+    writer.close()
+  }
+
+  test("WriteAheadLogReader - sequentially reading data") {
+    val writtenData = generateRandomData()
+    writeDataManually(writtenData, testFile)
+    val reader = new WriteAheadLogReader(testFile, hadoopConf)
+    val readData = reader.toSeq.map(byteBufferToString)
+    assert(readData === writtenData)
+    assert(reader.hasNext === false)
+    intercept[Exception] {
+      reader.next()
+    }
+    reader.close()
+  }
+
+  test("WriteAheadLogReader - sequentially reading data written with writer") {
+    val dataToWrite = generateRandomData()
+    writeDataUsingWriter(testFile, dataToWrite)
+    val readData = readDataUsingReader(testFile)
+    assert(readData === dataToWrite)
+  }
+
+  test("WriteAheadLogReader - reading data written with writer after corrupted write") {
+    // Write data manually for testing the sequential reader
+    val dataToWrite = generateRandomData()
+    writeDataUsingWriter(testFile, dataToWrite)
+    val fileLength = new File(testFile).length()
+
+    // Append some garbage data to get the effect of a corrupted write
+    val fw = new FileWriter(testFile, true)
+    fw.append("This line appended to file!")
+    fw.close()
+
+    // Verify the data can be read and is same as the one correctly written
+    assert(readDataUsingReader(testFile) === dataToWrite)
+
+    // Corrupt the last correctly written file
+    val raf = new FileOutputStream(testFile, true).getChannel()
+    raf.truncate(fileLength - 1)
+    raf.close()
+
+    // Verify all the data except the last can be read
+    assert(readDataUsingReader(testFile) === (dataToWrite.dropRight(1)))
+  }
+
+  test("WriteAheadLogRandomReader - reading data using random reader") {
+    // Write data manually for testing the random reader
+    val writtenData = generateRandomData()
+    val segments = writeDataManually(writtenData, testFile)
+
+    // Get a random order of these segments and read them back
+    val writtenDataAndSegments = writtenData.zip(segments).toSeq.permutations.take(10).flatten
+    val reader = new WriteAheadLogRandomReader(testFile, hadoopConf)
+    writtenDataAndSegments.foreach { case (data, segment) =>
+      assert(data === byteBufferToString(reader.read(segment)))
+    }
+    reader.close()
+  }
+
+  test("WriteAheadLogRandomReader - reading data using random reader written with writer") {
+    // Write data using writer for testing the random reader
+    val data = generateRandomData()
+    val segments = writeDataUsingWriter(testFile, data)
+
+    // Read a random sequence of segments and verify read data
+    val dataAndSegments = data.zip(segments).toSeq.permutations.take(10).flatten
+    val reader = new WriteAheadLogRandomReader(testFile, hadoopConf)
+    dataAndSegments.foreach { case (data, segment) =>
+      assert(data === byteBufferToString(reader.read(segment)))
+    }
+    reader.close()
+  }
+
+  test("WriteAheadLogManager - write rotating logs") {
+    // Write data using manager
+    val dataToWrite = generateRandomData()
+    writeDataUsingManager(testDir, dataToWrite)
+
+    // Read data manually to verify the written data
+    val logFiles = getLogFilesInDirectory(testDir)
+    assert(logFiles.size > 1)
+    val writtenData = logFiles.flatMap { file => readDataManually(file)}
+    assert(writtenData === dataToWrite)
+  }
+
+  test("WriteAheadLogManager - read rotating logs") {
+    // Write data manually for testing reading through manager
+    val writtenData = (1 to 10).map { i =>
+      val data = generateRandomData()
+      val file = testDir + s"/log-$i-$i"
+      writeDataManually(data, file)
+      data
+    }.flatten
+
+    val logDirectoryPath = new Path(testDir)
+    val fileSystem = HdfsUtils.getFileSystemForPath(logDirectoryPath, hadoopConf)
+    assert(fileSystem.exists(logDirectoryPath) === true)
+
+    // Read data using manager and verify
+    val readData = readDataUsingManager(testDir)
+    assert(readData === writtenData)
+  }
+
+  test("WriteAheadLogManager - recover past logs when creating new manager") {
+    // Write data with manager, recover with new manager and verify
+    val dataToWrite = generateRandomData()
+    writeDataUsingManager(testDir, dataToWrite)
+    val logFiles = getLogFilesInDirectory(testDir)
+    assert(logFiles.size > 1)
+    val readData = readDataUsingManager(testDir)
+    assert(dataToWrite === readData)
+  }
+
+  test("WriteAheadLogManager - cleanup old logs") {
+    // Write data with manager, recover with new manager and verify
+    val manualClock = new ManualClock
+    val dataToWrite = generateRandomData()
+    manager = writeDataUsingManager(testDir, dataToWrite, manualClock, stopManager = false)
+    val logFiles = getLogFilesInDirectory(testDir)
+    assert(logFiles.size > 1)
+    manager.cleanupOldLogs(manualClock.currentTime() / 2)
+    eventually(timeout(1 second), interval(10 milliseconds)) {
+      assert(getLogFilesInDirectory(testDir).size < logFiles.size)
+    }
+  }
+
+  test("WriteAheadLogManager - handling file errors while reading rotating logs") {
+    // Generate a set of log files
+    val manualClock = new ManualClock
+    val dataToWrite1 = generateRandomData()
+    writeDataUsingManager(testDir, dataToWrite1, manualClock)
+    val logFiles1 = getLogFilesInDirectory(testDir)
+    assert(logFiles1.size > 1)
+
+
+    // Recover old files and generate a second set of log files
+    val dataToWrite2 = generateRandomData()
+    manualClock.addToTime(100000)
+    writeDataUsingManager(testDir, dataToWrite2, manualClock)
+    val logFiles2 = getLogFilesInDirectory(testDir)
+    assert(logFiles2.size > logFiles1.size)
+
+    // Read the files and verify that all the written data can be read
+    val readData1 = readDataUsingManager(testDir)
+    assert(readData1 === (dataToWrite1 ++ dataToWrite2))
+
+    // Corrupt the first set of files so that they are basically unreadable
+    logFiles1.foreach { f =>
+      val raf = new FileOutputStream(f, true).getChannel()
+      raf.truncate(1)
+      raf.close()
+    }
+
+    // Verify that the corrupted files do not prevent reading of the second set of data
+    val readData = readDataUsingManager(testDir)
+    assert(readData === dataToWrite2)
+  }
+}
+
+object WriteAheadLogSuite {
+
+  private val hadoopConf = new Configuration()
+
+  /** Write data to a file directly and return an array of the file segments written. */
+  def writeDataManually(data: Seq[String], file: String): Seq[WriteAheadLogFileSegment] = {
+    val segments = new ArrayBuffer[WriteAheadLogFileSegment]()
+    val writer = HdfsUtils.getOutputStream(file, hadoopConf)
+    data.foreach { item =>
+      val offset = writer.getPos
+      val bytes = Utils.serialize(item)
+      writer.writeInt(bytes.size)
+      writer.write(bytes)
+      segments += WriteAheadLogFileSegment(file, offset, bytes.size)
+    }
+    writer.close()
+    segments
+  }
+
+  /**
+   * Write data to a file using the writer class and return an array of the file segments written.
+   */
+  def writeDataUsingWriter(filePath: String, data: Seq[String]): Seq[WriteAheadLogFileSegment] = {
+    val writer = new WriteAheadLogWriter(filePath, hadoopConf)
+    val segments = data.map {
+      item => writer.write(item)
+    }
+    writer.close()
+    segments
+  }
+
+  /** Write data to rotating files in log directory using the manager class. */
+  def writeDataUsingManager(
+      logDirectory: String,
+      data: Seq[String],
+      manualClock: ManualClock = new ManualClock,
+      stopManager: Boolean = true
+    ): WriteAheadLogManager = {
+    if (manualClock.currentTime < 100000) manualClock.setTime(10000)
+    val manager = new WriteAheadLogManager(logDirectory, hadoopConf,
+      rollingIntervalSecs = 1, callerName = "WriteAheadLogSuite", clock = manualClock)
+    // Ensure that 500 does not get sorted after 2000, so put a high base value.
+    data.foreach { item =>
+      manualClock.addToTime(500)
+      manager.writeToLog(item)
+    }
+    if (stopManager) manager.stop()
+    manager
+  }
+
+  /** Read data from a segments of a log file directly and return the list of byte buffers.*/
+  def readDataManually(segments: Seq[WriteAheadLogFileSegment]): Seq[String] = {
+    segments.map { segment =>
+      val reader = HdfsUtils.getInputStream(segment.path, hadoopConf)
+      try {
+        reader.seek(segment.offset)
+        val bytes = new Array[Byte](segment.length)
+        reader.readInt()
+        reader.readFully(bytes)
+        val data = Utils.deserialize[String](bytes)
+        reader.close()
+        data
+      } finally {
+        reader.close()
+      }
+    }
+  }
+
+  /** Read all the data from a log file directly and return the list of byte buffers. */
+  def readDataManually(file: String): Seq[String] = {
+    val reader = HdfsUtils.getInputStream(file, hadoopConf)
+    val buffer = new ArrayBuffer[String]
+    try {
+      while (true) {
+        // Read till EOF is thrown
+        val length = reader.readInt()
+        val bytes = new Array[Byte](length)
+        reader.read(bytes)
+        buffer += Utils.deserialize[String](bytes)
+      }
+    } catch {
+      case ex: EOFException =>
+    } finally {
+      reader.close()
+    }
+    buffer
+  }
+
+  /** Read all the data from a log file using reader class and return the list of byte buffers. */
+  def readDataUsingReader(file: String): Seq[String] = {
+    val reader = new WriteAheadLogReader(file, hadoopConf)
+    val readData = reader.toList.map(byteBufferToString)
+    reader.close()
+    readData
+  }
+
+  /** Read all the data in the log file in a directory using the manager class. */
+  def readDataUsingManager(logDirectory: String): Seq[String] = {
+    val manager = new WriteAheadLogManager(logDirectory, hadoopConf,
+      callerName = "WriteAheadLogSuite")
+    val data = manager.readFromLog().map(byteBufferToString).toSeq
+    manager.stop()
+    data
+  }
+
+  /** Get the log files in a direction */
+  def getLogFilesInDirectory(directory: String): Seq[String] = {
+    val logDirectoryPath = new Path(directory)
+    val fileSystem = HdfsUtils.getFileSystemForPath(logDirectoryPath, hadoopConf)
+
+    if (fileSystem.exists(logDirectoryPath) && fileSystem.getFileStatus(logDirectoryPath).isDir) {
+      fileSystem.listStatus(logDirectoryPath).map { _.getPath() }.sortBy {
+        _.getName().split("-")(1).toLong
+      }.map {
+        _.toString.stripPrefix("file:")
+      }
+    } else {
+      Seq.empty
+    }
+  }
+
+  def generateRandomData(): Seq[String] = {
+    (1 to 100).map { _.toString }
+  }
+
+  implicit def stringToByteBuffer(str: String): ByteBuffer = {
+    ByteBuffer.wrap(Utils.serialize(str))
+  }
+
+  implicit def byteBufferToString(byteBuffer: ByteBuffer): String = {
+    Utils.deserialize[String](byteBuffer.array)
+  }
+}
diff --git a/tools/pom.xml b/tools/pom.xml
index 79cd8551d0722..b90eb0ca250c5 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,12 +20,15 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-tools_2.10</artifactId>
+  <properties>
+    <sbt.project.name>tools</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project Tools</name>
   <url>http://spark.apache.org/</url>
@@ -60,6 +63,20 @@
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-deploy-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-install-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-source-plugin</artifactId>
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index 03a73f92b275e..595ded6ae67fa 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable
 import scala.collection.JavaConversions._
 import scala.reflect.runtime.universe.runtimeMirror
 import scala.reflect.runtime.{universe => unv}
+import scala.util.Try
 
 /**
  * A tool for generating classes to be excluded during binary checking with MIMA. It is expected
@@ -68,12 +69,11 @@ object GenerateMIMAIgnore {
     for (className <- classes) {
       try {
         val classSymbol = mirror.classSymbol(Class.forName(className, false, classLoader))
-        val moduleSymbol = mirror.staticModule(className) // TODO: see if it is necessary.
+        val moduleSymbol = mirror.staticModule(className)
         val directlyPrivateSpark =
           isPackagePrivate(classSymbol) || isPackagePrivateModule(moduleSymbol)
-        val developerApi = isDeveloperApi(classSymbol)
-        val experimental = isExperimental(classSymbol)
-
+        val developerApi = isDeveloperApi(classSymbol) || isDeveloperApi(moduleSymbol)
+        val experimental = isExperimental(classSymbol) || isExperimental(moduleSymbol)
         /* Inner classes defined within a private[spark] class or object are effectively
          invisible, so we account for them as package private. */
         lazy val indirectlyPrivateSpark = {
@@ -87,10 +87,9 @@ object GenerateMIMAIgnore {
         }
         if (directlyPrivateSpark || indirectlyPrivateSpark || developerApi || experimental) {
           ignoredClasses += className
-        } else {
-          // check if this class has package-private/annotated members.
-          ignoredMembers ++= getAnnotatedOrPackagePrivateMembers(classSymbol)
         }
+        // check if this class has package-private/annotated members.
+        ignoredMembers ++= getAnnotatedOrPackagePrivateMembers(classSymbol)
 
       } catch {
         case _: Throwable => println("Error instrumenting class:" + className)
@@ -99,18 +98,41 @@ object GenerateMIMAIgnore {
     (ignoredClasses.flatMap(c => Seq(c, c.replace("$", "#"))).toSet, ignoredMembers.toSet)
   }
 
+  /** Scala reflection does not let us see inner function even if they are upgraded
+    * to public for some reason. So had to resort to java reflection to get all inner
+    * functions with $$ in there name.
+    */
+  def getInnerFunctions(classSymbol: unv.ClassSymbol): Seq[String] = {
+    try {
+      Class.forName(classSymbol.fullName, false, classLoader).getMethods.map(_.getName)
+        .filter(_.contains("$$")).map(classSymbol.fullName + "." + _)
+    } catch {
+      case t: Throwable =>
+        println("[WARN] Unable to detect inner functions for class:" + classSymbol.fullName)
+        Seq.empty[String]
+    }
+  }
+
   private def getAnnotatedOrPackagePrivateMembers(classSymbol: unv.ClassSymbol) = {
-    classSymbol.typeSignature.members
-      .filter(x => isPackagePrivate(x) || isDeveloperApi(x) || isExperimental(x)).map(_.fullName)
+    classSymbol.typeSignature.members.filterNot(x =>
+      x.fullName.startsWith("java") || x.fullName.startsWith("scala")
+    ).filter(x =>
+      isPackagePrivate(x) || isDeveloperApi(x) || isExperimental(x)
+    ).map(_.fullName) ++ getInnerFunctions(classSymbol)
   }
 
   def main(args: Array[String]) {
+    import scala.tools.nsc.io.File
     val (privateClasses, privateMembers) = privateWithin("org.apache.spark")
-    scala.tools.nsc.io.File(".generated-mima-class-excludes").
-      writeAll(privateClasses.mkString("\n"))
+    val previousContents = Try(File(".generated-mima-class-excludes").lines()).
+      getOrElse(Iterator.empty).mkString("\n")
+    File(".generated-mima-class-excludes")
+      .writeAll(previousContents + privateClasses.mkString("\n"))
     println("Created : .generated-mima-class-excludes in current directory.")
-    scala.tools.nsc.io.File(".generated-mima-member-excludes").
-      writeAll(privateMembers.mkString("\n"))
+    val previousMembersContents = Try(File(".generated-mima-member-excludes").lines)
+      .getOrElse(Iterator.empty).mkString("\n")
+    File(".generated-mima-member-excludes").writeAll(previousMembersContents +
+      privateMembers.mkString("\n"))
     println("Created : .generated-mima-member-excludes in current directory.")
   }
 
diff --git a/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala b/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala
index 8e8c35615a711..db58eb642b56d 100644
--- a/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala
@@ -20,8 +20,10 @@ package org.apache.spark.tools
 import java.util.concurrent.{CountDownLatch, Executors}
 import java.util.concurrent.atomic.AtomicLong
 
+import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.SparkContext
 import org.apache.spark.serializer.KryoSerializer
+import org.apache.spark.shuffle.hash.HashShuffleManager
 import org.apache.spark.util.Utils
 
 /**
@@ -49,22 +51,23 @@ object StoragePerfTester {
 
     System.setProperty("spark.shuffle.compress", "false")
     System.setProperty("spark.shuffle.sync", "true")
+    System.setProperty("spark.shuffle.manager",
+      "org.apache.spark.shuffle.hash.HashShuffleManager")
 
     // This is only used to instantiate a BlockManager. All thread scheduling is done manually.
     val sc = new SparkContext("local[4]", "Write Tester")
-    val blockManager = sc.env.blockManager
+    val hashShuffleManager = sc.env.shuffleManager.asInstanceOf[HashShuffleManager]
 
     def writeOutputBytes(mapId: Int, total: AtomicLong) = {
-      val shuffle = blockManager.shuffleBlockManager.forMapTask(1, mapId, numOutputSplits,
-        new KryoSerializer(sc.conf))
+      val shuffle = hashShuffleManager.shuffleBlockManager.forMapTask(1, mapId, numOutputSplits,
+        new KryoSerializer(sc.conf), new ShuffleWriteMetrics())
       val writers = shuffle.writers
       for (i <- 1 to recordsPerMap) {
         writers(i % numOutputSplits).write(writeData)
       }
-      writers.map {w =>
-        w.commit()
+      writers.map { w =>
+        w.commitAndClose()
         total.addAndGet(w.fileSegment().length)
-        w.close()
       }
 
       shuffle.releaseWriters(true)
diff --git a/tox.ini b/tox.ini
index 44766e529bf7f..b568029a204cc 100644
--- a/tox.ini
+++ b/tox.ini
@@ -15,3 +15,4 @@
 
 [pep8]
 max-line-length=100
+exclude=cloudpickle.py,heapq3.py
diff --git a/yarn/alpha/pom.xml b/yarn/alpha/pom.xml
index b8a631dd0bb3b..7dadbba58fd82 100644
--- a/yarn/alpha/pom.xml
+++ b/yarn/alpha/pom.xml
@@ -20,9 +20,12 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>yarn-parent_2.10</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
+  <properties>
+    <sbt.project.name>yarn-alpha</sbt.project.name>
+  </properties>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-yarn-alpha_2.10</artifactId>
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
deleted file mode 100644
index 438737f7a6b60..0000000000000
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ /dev/null
@@ -1,496 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.io.IOException
-import java.net.Socket
-import java.util.concurrent.CopyOnWriteArrayList
-import java.util.concurrent.atomic.{AtomicInteger, AtomicReference}
-
-import scala.collection.JavaConversions._
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.net.NetUtils
-import org.apache.hadoop.util.ShutdownHookManager
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.api.protocolrecords._
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.ipc.YarnRPC
-import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
-
-import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext}
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.util.{SignalLogger, Utils}
-
-/**
- * An application master that runs the users driver program and allocates executors.
- */
-class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
-                        sparkConf: SparkConf) extends Logging {
-
-  def this(args: ApplicationMasterArguments, sparkConf: SparkConf) =
-    this(args, new Configuration(), sparkConf)
-
-  def this(args: ApplicationMasterArguments) = this(args, new SparkConf())
-
-  private val rpc: YarnRPC = YarnRPC.create(conf)
-  private var resourceManager: AMRMProtocol = _
-  private var appAttemptId: ApplicationAttemptId = _
-  private var userThread: Thread = _
-  private val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
-  private val fs = FileSystem.get(yarnConf)
-
-  private var yarnAllocator: YarnAllocationHandler = _
-  private var isFinished: Boolean = false
-  private var uiAddress: String = _
-  private val maxAppAttempts: Int = conf.getInt(YarnConfiguration.RM_AM_MAX_RETRIES,
-    YarnConfiguration.DEFAULT_RM_AM_MAX_RETRIES)
-  private var isLastAMRetry: Boolean = true
-
-  // Default to numExecutors * 2, with minimum of 3
-  private val maxNumExecutorFailures = sparkConf.getInt("spark.yarn.max.executor.failures",
-    sparkConf.getInt("spark.yarn.max.worker.failures", math.max(args.numExecutors * 2, 3)))
-
-  private var registered = false
-
-  def run() {
-    // Setup the directories so things go to yarn approved directories rather
-    // then user specified and /tmp.
-    System.setProperty("spark.local.dir", getLocalDirs())
-
-    // set the web ui port to be ephemeral for yarn so we don't conflict with
-    // other spark processes running on the same box
-    System.setProperty("spark.ui.port", "0")
-
-    // when running the AM, the Spark master is always "yarn-cluster"
-    System.setProperty("spark.master", "yarn-cluster")
-
-    // Use priority 30 as its higher then HDFS. Its same priority as MapReduce is using.
-    ShutdownHookManager.get().addShutdownHook(new AppMasterShutdownHook(this), 30)
-
-    appAttemptId = getApplicationAttemptId()
-    isLastAMRetry = appAttemptId.getAttemptId() >= maxAppAttempts
-    resourceManager = registerWithResourceManager()
-
-    // setup AmIpFilter for the SparkUI - do this before we start the UI
-    addAmIpFilter()
-
-    ApplicationMaster.register(this)
-
-    // Call this to force generation of secret so it gets populated into the
-    // hadoop UGI. This has to happen before the startUserClass which does a
-    // doAs in order for the credentials to be passed on to the executor containers.
-    val securityMgr = new SecurityManager(sparkConf)
-
-    // Start the user's JAR
-    userThread = startUserClass()
-
-    // This a bit hacky, but we need to wait until the spark.driver.port property has
-    // been set by the Thread executing the user class.
-    waitForSparkContextInitialized()
-
-    // Do this after spark master is up and SparkContext is created so that we can register UI Url
-    synchronized {
-      if (!isFinished) {
-        registerApplicationMaster()
-        registered = true
-      }
-    }
-
-    // Allocate all containers
-    allocateExecutors()
-
-    // Wait for the user class to Finish
-    userThread.join()
-
-    System.exit(0)
-  }
-
-  // add the yarn amIpFilter that Yarn requires for properly securing the UI
-  private def addAmIpFilter() {
-    val amFilter = "org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter"
-    System.setProperty("spark.ui.filters", amFilter)
-    val proxy = YarnConfiguration.getProxyHostAndPort(conf)
-    val parts : Array[String] = proxy.split(":")
-    val uriBase = "http://" + proxy +
-      System.getenv(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV)
-
-    val params = "PROXY_HOST=" + parts(0) + "," + "PROXY_URI_BASE=" + uriBase
-    System.setProperty("spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.params",
-      params)
-  }
-
-  /** Get the Yarn approved local directories. */
-  private def getLocalDirs(): String = {
-    // Hadoop 0.23 and 2.x have different Environment variable names for the
-    // local dirs, so lets check both. We assume one of the 2 is set.
-    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
-    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .orElse(Option(System.getenv("LOCAL_DIRS")))
-
-    localDirs match {
-      case None => throw new Exception("Yarn Local dirs can't be empty")
-      case Some(l) => l
-    }
-  }
-
-  private def getApplicationAttemptId(): ApplicationAttemptId = {
-    val envs = System.getenv()
-    val containerIdString = envs.get(ApplicationConstants.AM_CONTAINER_ID_ENV)
-    val containerId = ConverterUtils.toContainerId(containerIdString)
-    val appAttemptId = containerId.getApplicationAttemptId()
-    logInfo("ApplicationAttemptId: " + appAttemptId)
-    appAttemptId
-  }
-
-  private def registerWithResourceManager(): AMRMProtocol = {
-    val rmAddress = NetUtils.createSocketAddr(yarnConf.get(
-      YarnConfiguration.RM_SCHEDULER_ADDRESS,
-      YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS))
-    logInfo("Connecting to ResourceManager at " + rmAddress)
-    rpc.getProxy(classOf[AMRMProtocol], rmAddress, conf).asInstanceOf[AMRMProtocol]
-  }
-
-  private def registerApplicationMaster(): RegisterApplicationMasterResponse = {
-    logInfo("Registering the ApplicationMaster")
-    val appMasterRequest = Records.newRecord(classOf[RegisterApplicationMasterRequest])
-      .asInstanceOf[RegisterApplicationMasterRequest]
-    appMasterRequest.setApplicationAttemptId(appAttemptId)
-    // Setting this to master host,port - so that the ApplicationReport at client has some
-    // sensible info.
-    // Users can then monitor stderr/stdout on that node if required.
-    appMasterRequest.setHost(Utils.localHostName())
-    appMasterRequest.setRpcPort(0)
-    appMasterRequest.setTrackingUrl(uiAddress)
-    resourceManager.registerApplicationMaster(appMasterRequest)
-  }
-
-  private def startUserClass(): Thread = {
-    logInfo("Starting the user JAR in a separate Thread")
-    val mainMethod = Class.forName(
-      args.userClass,
-      false /* initialize */ ,
-      Thread.currentThread.getContextClassLoader).getMethod("main", classOf[Array[String]])
-    val t = new Thread {
-      override def run() {
-
-        var successed = false
-        try {
-          // Copy
-          var mainArgs: Array[String] = new Array[String](args.userArgs.size)
-          args.userArgs.copyToArray(mainArgs, 0, args.userArgs.size)
-          mainMethod.invoke(null, mainArgs)
-          // some job script has "System.exit(0)" at the end, for example SparkPi, SparkLR
-          // userThread will stop here unless it has uncaught exception thrown out
-          // It need shutdown hook to set SUCCEEDED
-          successed = true
-        } finally {
-          logDebug("finishing main")
-          isLastAMRetry = true
-          if (successed) {
-            ApplicationMaster.this.finishApplicationMaster(FinalApplicationStatus.SUCCEEDED)
-          } else {
-            ApplicationMaster.this.finishApplicationMaster(FinalApplicationStatus.FAILED)
-          }
-        }
-      }
-    }
-    t.start()
-    t
-  }
-
-  // this need to happen before allocateExecutors
-  private def waitForSparkContextInitialized() {
-    logInfo("Waiting for spark context initialization")
-    try {
-      var sparkContext: SparkContext = null
-      ApplicationMaster.sparkContextRef.synchronized {
-        var count = 0
-        val waitTime = 10000L
-        val numTries = sparkConf.getInt("spark.yarn.ApplicationMaster.waitTries", 10)
-        while (ApplicationMaster.sparkContextRef.get() == null && count < numTries
-            && !isFinished) {
-          logInfo("Waiting for spark context initialization ... " + count)
-          count = count + 1
-          ApplicationMaster.sparkContextRef.wait(waitTime)
-        }
-        sparkContext = ApplicationMaster.sparkContextRef.get()
-        assert(sparkContext != null || count >= numTries)
-
-        if (null != sparkContext) {
-          uiAddress = sparkContext.ui.appUIHostPort
-          this.yarnAllocator = YarnAllocationHandler.newAllocator(
-            yarnConf,
-            resourceManager,
-            appAttemptId,
-            args,
-            sparkContext.preferredNodeLocationData,
-            sparkContext.getConf)
-        } else {
-          logWarning("Unable to retrieve sparkContext inspite of waiting for %d, numTries = %d".
-            format(count * waitTime, numTries))
-          this.yarnAllocator = YarnAllocationHandler.newAllocator(
-            yarnConf,
-            resourceManager,
-            appAttemptId,
-            args,
-            sparkContext.getConf)
-        }
-      }
-    } finally {
-      // in case of exceptions, etc - ensure that count is atleast ALLOCATOR_LOOP_WAIT_COUNT :
-      // so that the loop (in ApplicationMaster.sparkContextInitialized) breaks
-      ApplicationMaster.incrementAllocatorLoop(ApplicationMaster.ALLOCATOR_LOOP_WAIT_COUNT)
-    }
-  }
-
-  private def allocateExecutors() {
-    try {
-      logInfo("Allocating " + args.numExecutors + " executors.")
-      // Wait until all containers have finished
-      // TODO: This is a bit ugly. Can we make it nicer?
-      // TODO: Handle container failure
-
-      // Exists the loop if the user thread exits.
-      while (yarnAllocator.getNumExecutorsRunning < args.numExecutors && userThread.isAlive) {
-        if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
-          finishApplicationMaster(FinalApplicationStatus.FAILED,
-            "max number of executor failures reached")
-        }
-        yarnAllocator.allocateContainers(
-          math.max(args.numExecutors - yarnAllocator.getNumExecutorsRunning, 0))
-        ApplicationMaster.incrementAllocatorLoop(1)
-        Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL)
-      }
-    } finally {
-      // In case of exceptions, etc - ensure that count is at least ALLOCATOR_LOOP_WAIT_COUNT,
-      // so that the loop in ApplicationMaster#sparkContextInitialized() breaks.
-      ApplicationMaster.incrementAllocatorLoop(ApplicationMaster.ALLOCATOR_LOOP_WAIT_COUNT)
-    }
-    logInfo("All executors have launched.")
-
-    // Launch a progress reporter thread, else the app will get killed after expiration
-    // (def: 10mins) timeout.
-    // TODO(harvey): Verify the timeout
-    if (userThread.isAlive) {
-      // Ensure that progress is sent before YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS elapses.
-      val timeoutInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
-
-      // we want to be reasonably responsive without causing too many requests to RM.
-      val schedulerInterval =
-        sparkConf.getLong("spark.yarn.scheduler.heartbeat.interval-ms", 5000)
-
-      // must be <= timeoutInterval / 2.
-      val interval = math.min(timeoutInterval / 2, schedulerInterval)
-
-      launchReporterThread(interval)
-    }
-  }
-
-  private def launchReporterThread(_sleepTime: Long): Thread = {
-    val sleepTime = if (_sleepTime <= 0) 0 else _sleepTime
-
-    val t = new Thread {
-      override def run() {
-        while (userThread.isAlive) {
-          if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
-            finishApplicationMaster(FinalApplicationStatus.FAILED,
-              "max number of executor failures reached")
-          }
-          val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning
-          if (missingExecutorCount > 0) {
-            logInfo("Allocating %d containers to make up for (potentially) lost containers".
-              format(missingExecutorCount))
-            yarnAllocator.allocateContainers(missingExecutorCount)
-          } else {
-            sendProgress()
-          }
-          Thread.sleep(sleepTime)
-        }
-      }
-    }
-    // Setting to daemon status, though this is usually not a good idea.
-    t.setDaemon(true)
-    t.start()
-    logInfo("Started progress reporter thread - sleep time : " + sleepTime)
-    t
-  }
-
-  private def sendProgress() {
-    logDebug("Sending progress")
-    // Simulated with an allocate request with no nodes requested ...
-    yarnAllocator.allocateContainers(0)
-  }
-
-  /*
-  def printContainers(containers: List[Container]) = {
-    for (container <- containers) {
-      logInfo("Launching shell command on a new container."
-        + ", containerId=" + container.getId()
-        + ", containerNode=" + container.getNodeId().getHost()
-        + ":" + container.getNodeId().getPort()
-        + ", containerNodeURI=" + container.getNodeHttpAddress()
-        + ", containerState" + container.getState()
-        + ", containerResourceMemory"
-        + container.getResource().getMemory())
-    }
-  }
-  */
-
-  def finishApplicationMaster(status: FinalApplicationStatus, diagnostics: String = "") {
-    synchronized {
-      if (isFinished) {
-        return
-      }
-      isFinished = true
-
-      logInfo("finishApplicationMaster with " + status)
-      if (registered) {
-        val finishReq = Records.newRecord(classOf[FinishApplicationMasterRequest])
-          .asInstanceOf[FinishApplicationMasterRequest]
-        finishReq.setAppAttemptId(appAttemptId)
-        finishReq.setFinishApplicationStatus(status)
-        finishReq.setDiagnostics(diagnostics)
-        finishReq.setTrackingUrl(sparkConf.get("spark.yarn.historyServer.address", ""))
-        resourceManager.finishApplicationMaster(finishReq)
-      }
-    }
-  }
-
-  /**
-   * Clean up the staging directory.
-   */
-  private def cleanupStagingDir() {
-    var stagingDirPath: Path = null
-    try {
-      val preserveFiles = sparkConf.get("spark.yarn.preserve.staging.files", "false").toBoolean
-      if (!preserveFiles) {
-        stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR"))
-        if (stagingDirPath == null) {
-          logError("Staging directory is null")
-          return
-        }
-        logInfo("Deleting staging directory " + stagingDirPath)
-        fs.delete(stagingDirPath, true)
-      }
-    } catch {
-      case ioe: IOException =>
-        logError("Failed to cleanup staging dir " + stagingDirPath, ioe)
-    }
-  }
-
-  // The shutdown hook that runs when a signal is received AND during normal close of the JVM.
-  class AppMasterShutdownHook(appMaster: ApplicationMaster) extends Runnable {
-
-    def run() {
-      logInfo("AppMaster received a signal.")
-      // we need to clean up staging dir before HDFS is shut down
-      // make sure we don't delete it until this is the last AM
-      if (appMaster.isLastAMRetry) appMaster.cleanupStagingDir()
-    }
-  }
-
-}
-
-object ApplicationMaster extends Logging {
-  // Number of times to wait for the allocator loop to complete.
-  // Each loop iteration waits for 100ms, so maximum of 3 seconds.
-  // This is to ensure that we have reasonable number of containers before we start
-  // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
-  // optimal as more containers are available. Might need to handle this better.
-  private val ALLOCATOR_LOOP_WAIT_COUNT = 30
-  private val ALLOCATE_HEARTBEAT_INTERVAL = 100
-
-  def incrementAllocatorLoop(by: Int) {
-    val count = yarnAllocatorLoop.getAndAdd(by)
-    if (count >= ALLOCATOR_LOOP_WAIT_COUNT) {
-      yarnAllocatorLoop.synchronized {
-        // to wake threads off wait ...
-        yarnAllocatorLoop.notifyAll()
-      }
-    }
-  }
-
-  private val applicationMasters = new CopyOnWriteArrayList[ApplicationMaster]()
-
-  def register(master: ApplicationMaster) {
-    applicationMasters.add(master)
-  }
-
-  val sparkContextRef: AtomicReference[SparkContext] =
-    new AtomicReference[SparkContext](null /* initialValue */)
-  val yarnAllocatorLoop: AtomicInteger = new AtomicInteger(0)
-
-  def sparkContextInitialized(sc: SparkContext): Boolean = {
-    var modified = false
-    sparkContextRef.synchronized {
-      modified = sparkContextRef.compareAndSet(null, sc)
-      sparkContextRef.notifyAll()
-    }
-
-    // Add a shutdown hook - as a best case effort in case users do not call sc.stop or do
-    // System.exit.
-    // Should not really have to do this, but it helps YARN to evict resources earlier.
-    // Not to mention, prevent the Client from declaring failure even though we exited properly.
-    // Note that this will unfortunately not properly clean up the staging files because it gets
-    // called too late, after the filesystem is already shutdown.
-    if (modified) {
-      Runtime.getRuntime().addShutdownHook(new Thread with Logging {
-        // This is not only logs, but also ensures that log system is initialized for this instance
-        // when we are actually 'run'-ing.
-        logInfo("Adding shutdown hook for context " + sc)
-
-        override def run() {
-          logInfo("Invoking sc stop from shutdown hook")
-          sc.stop()
-          // Best case ...
-          for (master <- applicationMasters) {
-            master.finishApplicationMaster(FinalApplicationStatus.SUCCEEDED)
-          }
-        }
-      })
-    }
-
-    modified
-  }
-
-
-  /**
-   * Returns when we've either
-   *  1) received all the requested executors,
-   *  2) waited ALLOCATOR_LOOP_WAIT_COUNT * ALLOCATE_HEARTBEAT_INTERVAL ms,
-   *  3) hit an error that causes us to terminate trying to get containers.
-   */
-  def waitForInitialAllocations() {
-    yarnAllocatorLoop.synchronized {
-      while (yarnAllocatorLoop.get() <= ALLOCATOR_LOOP_WAIT_COUNT) {
-        yarnAllocatorLoop.wait(1000L)
-      }
-    }
-  }
-
-  def main(argStrings: Array[String]) {
-    SignalLogger.register(log)
-    val args = new ApplicationMasterArguments(argStrings)
-    SparkHadoopUtil.get.runAsSparkUser { () =>
-      new ApplicationMaster(args).run()
-    }
-  }
-}
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 82f79d88a3009..73b705ba50051 100644
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -23,172 +23,123 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.DataOutputBuffer
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
 import org.apache.hadoop.yarn.api.protocolrecords._
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.YarnClientImpl
 import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.ipc.YarnRPC
-import org.apache.hadoop.yarn.util.{Apps, Records}
+import org.apache.hadoop.yarn.util.Records
 
 import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.deploy.SparkHadoopUtil
 
 /**
  * Version of [[org.apache.spark.deploy.yarn.ClientBase]] tailored to YARN's alpha API.
  */
-class Client(clientArgs: ClientArguments, hadoopConf: Configuration, spConf: SparkConf)
+@deprecated("use yarn/stable", "1.2.0")
+private[spark] class Client(
+    val args: ClientArguments,
+    val hadoopConf: Configuration,
+    val sparkConf: SparkConf)
   extends YarnClientImpl with ClientBase with Logging {
 
   def this(clientArgs: ClientArguments, spConf: SparkConf) =
-    this(clientArgs, new Configuration(), spConf)
+    this(clientArgs, SparkHadoopUtil.get.newConfiguration(spConf), spConf)
 
   def this(clientArgs: ClientArguments) = this(clientArgs, new SparkConf())
 
-  val args = clientArgs
-  val conf = hadoopConf
-  val sparkConf = spConf
-  var rpc: YarnRPC = YarnRPC.create(conf)
-  val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
+  val yarnConf: YarnConfiguration = new YarnConfiguration(hadoopConf)
 
+  /* ------------------------------------------------------------------------------------- *
+   | The following methods have much in common in the stable and alpha versions of Client, |
+   | but cannot be implemented in the parent trait due to subtle API differences across    |
+   | hadoop versions.                                                                      |
+   * ------------------------------------------------------------------------------------- */
 
-  // for client user who want to monitor app status by itself.
-  def runApp() = {
-    validateArgs()
-
+  /** Submit an application running our ApplicationMaster to the ResourceManager. */
+  override def submitApplication(): ApplicationId = {
     init(yarnConf)
     start()
-    logClusterResourceDetails()
 
-    val newApp = super.getNewApplication()
-    val appId = newApp.getApplicationId()
+    logInfo("Requesting a new application from cluster with %d NodeManagers"
+      .format(getYarnClusterMetrics.getNumNodeManagers))
 
-    verifyClusterResources(newApp)
-    val appContext = createApplicationSubmissionContext(appId)
-    val appStagingDir = getAppStagingDir(appId)
-    val localResources = prepareLocalResources(appStagingDir)
-    val env = setupLaunchEnv(localResources, appStagingDir)
-    val amContainer = createContainerLaunchContext(newApp, localResources, env)
+    // Get a new application from our RM
+    val newAppResponse = getNewApplication()
+    val appId = newAppResponse.getApplicationId()
 
-    val capability = Records.newRecord(classOf[Resource]).asInstanceOf[Resource]
-    // Memory for the ApplicationMaster.
-    capability.setMemory(args.amMemory + memoryOverhead)
-    amContainer.setResource(capability)
+    // Verify whether the cluster has enough resources for our AM
+    verifyClusterResources(newAppResponse)
 
-    appContext.setQueue(args.amQueue)
-    appContext.setAMContainerSpec(amContainer)
-    appContext.setUser(UserGroupInformation.getCurrentUser().getShortUserName())
+    // Set up the appropriate contexts to launch our AM
+    val containerContext = createContainerLaunchContext(newAppResponse)
+    val appContext = createApplicationSubmissionContext(appId, containerContext)
 
-    submitApp(appContext)
+    // Finally, submit and monitor the application
+    logInfo(s"Submitting application ${appId.getId} to ResourceManager")
+    submitApplication(appContext)
     appId
   }
 
-  def run() {
-    val appId = runApp()
-    monitorApplication(appId)
-  }
-
-  def logClusterResourceDetails() {
-    val clusterMetrics: YarnClusterMetrics = super.getYarnClusterMetrics
-    logInfo("Got Cluster metric info from ASM, numNodeManagers = " +
-      clusterMetrics.getNumNodeManagers)
-
-    val queueInfo: QueueInfo = super.getQueueInfo(args.amQueue)
-    logInfo( """Queue info ... queueName = %s, queueCurrentCapacity = %s, queueMaxCapacity = %s,
-      queueApplicationCount = %s, queueChildQueueCount = %s""".format(
-        queueInfo.getQueueName,
-        queueInfo.getCurrentCapacity,
-        queueInfo.getMaximumCapacity,
-        queueInfo.getApplications.size,
-        queueInfo.getChildQueues.size))
+  /**
+   * Set up a context for launching our ApplicationMaster container.
+   * In the Yarn alpha API, the memory requirements of this container must be set in
+   * the ContainerLaunchContext instead of the ApplicationSubmissionContext.
+   */
+  override def createContainerLaunchContext(newAppResponse: GetNewApplicationResponse)
+      : ContainerLaunchContext = {
+    val containerContext = super.createContainerLaunchContext(newAppResponse)
+    val capability = Records.newRecord(classOf[Resource])
+    capability.setMemory(args.amMemory + amMemoryOverhead)
+    containerContext.setResource(capability)
+    containerContext
   }
 
-
-  def createApplicationSubmissionContext(appId: ApplicationId): ApplicationSubmissionContext = {
-    logInfo("Setting up application submission context for ASM")
+  /** Set up the context for submitting our ApplicationMaster. */
+  def createApplicationSubmissionContext(
+      appId: ApplicationId,
+      containerContext: ContainerLaunchContext): ApplicationSubmissionContext = {
     val appContext = Records.newRecord(classOf[ApplicationSubmissionContext])
     appContext.setApplicationId(appId)
     appContext.setApplicationName(args.appName)
+    appContext.setQueue(args.amQueue)
+    appContext.setAMContainerSpec(containerContext)
+    appContext.setUser(UserGroupInformation.getCurrentUser.getShortUserName)
     appContext
   }
 
-  def calculateAMMemory(newApp: GetNewApplicationResponse): Int = {
-    val minResMemory = newApp.getMinimumResourceCapability().getMemory()
-    val amMemory = ((args.amMemory / minResMemory) * minResMemory) +
-          ((if ((args.amMemory % minResMemory) == 0) 0 else minResMemory) -
-          memoryOverhead)
-    amMemory
-  }
-
-  def setupSecurityToken(amContainer: ContainerLaunchContext) = {
-    // Setup security tokens.
+  /**
+   * Set up security tokens for launching our ApplicationMaster container.
+   * ContainerLaunchContext#setContainerTokens is renamed `setTokens` in the stable API.
+   */
+  override def setupSecurityToken(amContainer: ContainerLaunchContext): Unit = {
     val dob = new DataOutputBuffer()
     credentials.writeTokenStorageToStream(dob)
     amContainer.setContainerTokens(ByteBuffer.wrap(dob.getData()))
   }
 
-  def submitApp(appContext: ApplicationSubmissionContext) = {
-    // Submit the application to the applications manager.
-    logInfo("Submitting application to ASM")
-    super.submitApplication(appContext)
-  }
-
-  def monitorApplication(appId: ApplicationId): Boolean = {
-    val interval = sparkConf.getLong("spark.yarn.report.interval", 1000)
-
-    while (true) {
-      Thread.sleep(interval)
-      val report = super.getApplicationReport(appId)
-
-      logInfo("Application report from ASM: \n" +
-        "\t application identifier: " + appId.toString() + "\n" +
-        "\t appId: " + appId.getId() + "\n" +
-        "\t clientToken: " + report.getClientToken() + "\n" +
-        "\t appDiagnostics: " + report.getDiagnostics() + "\n" +
-        "\t appMasterHost: " + report.getHost() + "\n" +
-        "\t appQueue: " + report.getQueue() + "\n" +
-        "\t appMasterRpcPort: " + report.getRpcPort() + "\n" +
-        "\t appStartTime: " + report.getStartTime() + "\n" +
-        "\t yarnAppState: " + report.getYarnApplicationState() + "\n" +
-        "\t distributedFinalState: " + report.getFinalApplicationStatus() + "\n" +
-        "\t appTrackingUrl: " + report.getTrackingUrl() + "\n" +
-        "\t appUser: " + report.getUser()
-      )
-
-      val state = report.getYarnApplicationState()
-      if (state == YarnApplicationState.FINISHED ||
-        state == YarnApplicationState.FAILED ||
-        state == YarnApplicationState.KILLED) {
-        return true
-      }
-    }
-    true
-  }
+  /**
+   * Return the security token used by this client to communicate with the ApplicationMaster.
+   * If no security is enabled, the token returned by the report is null.
+   * ApplicationReport#getClientToken is renamed `getClientToAMToken` in the stable API.
+   */
+  override def getClientToken(report: ApplicationReport): String =
+    Option(report.getClientToken).map(_.toString).getOrElse("")
 }
 
 object Client {
-
   def main(argStrings: Array[String]) {
     if (!sys.props.contains("SPARK_SUBMIT")) {
       println("WARNING: This client is deprecated and will be removed in a " +
         "future version of Spark. Use ./bin/spark-submit with \"--master yarn\"")
     }
+    println("WARNING: Support for YARN-alpha API's will be removed in Spark 1.3 (see SPARK-3445)")
 
     // Set an env variable indicating we are running in YARN mode.
-    // Note that anything with SPARK prefix gets propagated to all (remote) processes
+    // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes
     System.setProperty("SPARK_YARN_MODE", "true")
-
     val sparkConf = new SparkConf
 
-    try {
-      val args = new ClientArguments(argStrings, sparkConf)
-      new Client(args, sparkConf).run()
-    } catch {
-      case e: Exception => {
-        Console.err.println(e.getMessage)
-        System.exit(1)
-      }
-    }
-
-    System.exit(0)
+    val args = new ClientArguments(argStrings, sparkConf)
+    new Client(args, sparkConf).run()
   }
 }
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
deleted file mode 100644
index bfdb6232f5113..0000000000000
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.net.Socket
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.net.NetUtils
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.api.protocolrecords._
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.ipc.YarnRPC
-import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
-import akka.actor._
-import akka.remote._
-import akka.actor.Terminated
-import org.apache.spark.{Logging, SecurityManager, SparkConf}
-import org.apache.spark.util.{Utils, AkkaUtils}
-import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
-import org.apache.spark.scheduler.SplitInfo
-import org.apache.spark.deploy.SparkHadoopUtil
-
-/**
- * An application master that allocates executors on behalf of a driver that is running outside
- * the cluster.
- *
- * This is used only in yarn-client mode.
- */
-class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sparkConf: SparkConf)
-  extends Logging {
-
-  def this(args: ApplicationMasterArguments, sparkConf: SparkConf) =
-    this(args, new Configuration(), sparkConf)
-
-  def this(args: ApplicationMasterArguments) = this(args, new SparkConf())
-
-  private val rpc: YarnRPC = YarnRPC.create(conf)
-  private var resourceManager: AMRMProtocol = _
-  private var appAttemptId: ApplicationAttemptId = _
-  private var reporterThread: Thread = _
-  private val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
-
-  private var yarnAllocator: YarnAllocationHandler = _
-  private var driverClosed:Boolean = false
-
-  val securityManager = new SecurityManager(sparkConf)
-  val actorSystem : ActorSystem = AkkaUtils.createActorSystem("sparkYarnAM", Utils.localHostName, 0,
-    conf = sparkConf, securityManager = securityManager)._1
-  var actor: ActorRef = _
-
-  // This actor just working as a monitor to watch on Driver Actor.
-  class MonitorActor(driverUrl: String) extends Actor {
-
-    var driver: ActorSelection = _
-
-    override def preStart() {
-      logInfo("Listen to driver: " + driverUrl)
-      driver = context.actorSelection(driverUrl)
-      // Send a hello message thus the connection is actually established, thus we can
-      // monitor Lifecycle Events.
-      driver ! "Hello"
-      context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
-    }
-
-    override def receive = {
-      case x: DisassociatedEvent =>
-        logInfo(s"Driver terminated or disconnected! Shutting down. $x")
-        driverClosed = true
-    }
-  }
-
-  def run() {
-
-    // Setup the directories so things go to yarn approved directories rather
-    // then user specified and /tmp.
-    System.setProperty("spark.local.dir", getLocalDirs())
-
-    appAttemptId = getApplicationAttemptId()
-    resourceManager = registerWithResourceManager()
-
-    val appMasterResponse: RegisterApplicationMasterResponse = registerApplicationMaster()
-
-    // Compute number of threads for akka
-    val minimumMemory = appMasterResponse.getMinimumResourceCapability().getMemory()
-
-    if (minimumMemory > 0) {
-      val mem = args.executorMemory + sparkConf.getInt("spark.yarn.executor.memoryOverhead",
-        YarnAllocationHandler.MEMORY_OVERHEAD)
-      val numCore = (mem  / minimumMemory) + (if (0 != (mem % minimumMemory)) 1 else 0)
-
-      if (numCore > 0) {
-        // do not override - hits https://issues.apache.org/jira/browse/HADOOP-8406
-        // TODO: Uncomment when hadoop is on a version which has this fixed.
-        // args.workerCores = numCore
-      }
-    }
-
-    waitForSparkMaster()
-
-    // Allocate all containers
-    allocateExecutors()
-
-    // Launch a progress reporter thread, else app will get killed after expiration
-    // (def: 10mins) timeout ensure that progress is sent before
-    // YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS elapse.
-
-    val timeoutInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
-    // we want to be reasonably responsive without causing too many requests to RM.
-    val schedulerInterval =
-      System.getProperty("spark.yarn.scheduler.heartbeat.interval-ms", "5000").toLong
-
-    // must be <= timeoutInterval / 2.
-    val interval = math.min(timeoutInterval / 2, schedulerInterval)
-
-    reporterThread = launchReporterThread(interval)
-
-    // Wait for the reporter thread to Finish.
-    reporterThread.join()
-
-    finishApplicationMaster(FinalApplicationStatus.SUCCEEDED)
-    actorSystem.shutdown()
-
-    logInfo("Exited")
-    System.exit(0)
-  }
-
-  /** Get the Yarn approved local directories. */
-  private def getLocalDirs(): String = {
-    // Hadoop 0.23 and 2.x have different Environment variable names for the
-    // local dirs, so lets check both. We assume one of the 2 is set.
-    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
-    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .orElse(Option(System.getenv("LOCAL_DIRS")))
-
-    localDirs match {
-      case None => throw new Exception("Yarn Local dirs can't be empty")
-      case Some(l) => l
-    }
-  }
-
-  private def getApplicationAttemptId(): ApplicationAttemptId = {
-    val envs = System.getenv()
-    val containerIdString = envs.get(ApplicationConstants.AM_CONTAINER_ID_ENV)
-    val containerId = ConverterUtils.toContainerId(containerIdString)
-    val appAttemptId = containerId.getApplicationAttemptId()
-    logInfo("ApplicationAttemptId: " + appAttemptId)
-    appAttemptId
-  }
-
-  private def registerWithResourceManager(): AMRMProtocol = {
-    val rmAddress = NetUtils.createSocketAddr(yarnConf.get(
-      YarnConfiguration.RM_SCHEDULER_ADDRESS,
-      YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS))
-    logInfo("Connecting to ResourceManager at " + rmAddress)
-    rpc.getProxy(classOf[AMRMProtocol], rmAddress, conf).asInstanceOf[AMRMProtocol]
-  }
-
-  private def registerApplicationMaster(): RegisterApplicationMasterResponse = {
-    logInfo("Registering the ApplicationMaster")
-    val appMasterRequest = Records.newRecord(classOf[RegisterApplicationMasterRequest])
-      .asInstanceOf[RegisterApplicationMasterRequest]
-    appMasterRequest.setApplicationAttemptId(appAttemptId)
-    // Setting this to master host,port - so that the ApplicationReport at client has
-    // some sensible info. Users can then monitor stderr/stdout on that node if required.
-    appMasterRequest.setHost(Utils.localHostName())
-    appMasterRequest.setRpcPort(0)
-    // What do we provide here ? Might make sense to expose something sensible later ?
-    appMasterRequest.setTrackingUrl("")
-    resourceManager.registerApplicationMaster(appMasterRequest)
-  }
-
-  private def waitForSparkMaster() {
-    logInfo("Waiting for spark driver to be reachable.")
-    var driverUp = false
-    val hostport = args.userArgs(0)
-    val (driverHost, driverPort) = Utils.parseHostPort(hostport)
-    while(!driverUp) {
-      try {
-        val socket = new Socket(driverHost, driverPort)
-        socket.close()
-        logInfo("Master now available: " + driverHost + ":" + driverPort)
-        driverUp = true
-      } catch {
-        case e: Exception =>
-          logError("Failed to connect to driver at " + driverHost + ":" + driverPort)
-        Thread.sleep(100)
-      }
-    }
-    sparkConf.set("spark.driver.host",  driverHost)
-    sparkConf.set("spark.driver.port",  driverPort.toString)
-
-    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
-      driverHost, driverPort.toString, CoarseGrainedSchedulerBackend.ACTOR_NAME)
-
-    actor = actorSystem.actorOf(Props(new MonitorActor(driverUrl)), name = "YarnAM")
-  }
-
-
-  private def allocateExecutors() {
-
-    // Fixme: should get preferredNodeLocationData from SparkContext, just fake a empty one for now.
-    val preferredNodeLocationData: scala.collection.Map[String, scala.collection.Set[SplitInfo]] =
-      scala.collection.immutable.Map()
-
-    yarnAllocator = YarnAllocationHandler.newAllocator(yarnConf, resourceManager, appAttemptId,
-      args, preferredNodeLocationData, sparkConf)
-
-    logInfo("Allocating " + args.numExecutors + " executors.")
-    // Wait until all containers have finished
-    // TODO: This is a bit ugly. Can we make it nicer?
-    // TODO: Handle container failure
-    while ((yarnAllocator.getNumExecutorsRunning < args.numExecutors) && (!driverClosed)) {
-      yarnAllocator.allocateContainers(
-        math.max(args.numExecutors - yarnAllocator.getNumExecutorsRunning, 0))
-      Thread.sleep(100)
-    }
-
-    logInfo("All executors have launched.")
-
-  }
-
-  // TODO: We might want to extend this to allocate more containers in case they die !
-  private def launchReporterThread(_sleepTime: Long): Thread = {
-    val sleepTime = if (_sleepTime <= 0 ) 0 else _sleepTime
-
-    val t = new Thread {
-      override def run() {
-        while (!driverClosed) {
-          val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning
-          if (missingExecutorCount > 0) {
-            logInfo("Allocating " + missingExecutorCount +
-              " containers to make up for (potentially ?) lost containers")
-            yarnAllocator.allocateContainers(missingExecutorCount)
-          } else {
-            sendProgress()
-          }
-          Thread.sleep(sleepTime)
-        }
-      }
-    }
-    // setting to daemon status, though this is usually not a good idea.
-    t.setDaemon(true)
-    t.start()
-    logInfo("Started progress reporter thread - sleep time : " + sleepTime)
-    t
-  }
-
-  private def sendProgress() {
-    logDebug("Sending progress")
-    // simulated with an allocate request with no nodes requested ...
-    yarnAllocator.allocateContainers(0)
-  }
-
-  def finishApplicationMaster(status: FinalApplicationStatus) {
-
-    logInfo("finish ApplicationMaster with " + status)
-    val finishReq = Records.newRecord(classOf[FinishApplicationMasterRequest])
-      .asInstanceOf[FinishApplicationMasterRequest]
-    finishReq.setAppAttemptId(appAttemptId)
-    finishReq.setFinishApplicationStatus(status)
-    finishReq.setTrackingUrl(sparkConf.get("spark.yarn.historyServer.address", ""))
-    resourceManager.finishApplicationMaster(finishReq)
-  }
-
-}
-
-
-object ExecutorLauncher {
-  def main(argStrings: Array[String]) {
-    val args = new ApplicationMasterArguments(argStrings)
-    SparkHadoopUtil.get.runAsSparkUser { () =>
-      new ExecutorLauncher(args).run()
-    }
-  }
-}
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 7dae248e3e7db..7023a1170654f 100644
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -35,9 +35,10 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{Apps, ConverterUtils, Records, ProtoUtils}
 
-import org.apache.spark.{SparkConf, Logging}
-
+import org.apache.spark.{SecurityManager, SparkConf, Logging}
+import org.apache.spark.network.util.JavaUtils
 
+@deprecated("use yarn/stable", "1.2.0")
 class ExecutorRunnable(
     container: Container,
     conf: Configuration,
@@ -46,7 +47,9 @@ class ExecutorRunnable(
     slaveId: String,
     hostname: String,
     executorMemory: Int,
-    executorCores: Int)
+    executorCores: Int,
+    appAttemptId: String,
+    securityMgr: SecurityManager)
   extends Runnable with ExecutorRunnableUtil with Logging {
 
   var rpc: YarnRPC = YarnRPC.create(conf)
@@ -82,10 +85,28 @@ class ExecutorRunnable(
     ctx.setContainerTokens(ByteBuffer.wrap(dob.getData()))
 
     val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores,
-      localResources)
+      appAttemptId, localResources)
     logInfo("Setting up executor with commands: " + commands)
     ctx.setCommands(commands)
 
+    ctx.setApplicationACLs(YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr))
+
+    // If external shuffle service is enabled, register with the Yarn shuffle service already
+    // started on the NodeManager and, if authentication is enabled, provide it with our secret
+    // key for fetching shuffle files later
+    if (sparkConf.getBoolean("spark.shuffle.service.enabled", false)) {
+      val secretString = securityMgr.getSecretKey()
+      val secretBytes =
+        if (secretString != null) {
+          // This conversion must match how the YarnShuffleService decodes our secret
+          JavaUtils.stringToBytes(secretString)
+        } else {
+          // Authentication is not enabled, so just provide dummy metadata
+          ByteBuffer.allocate(0)
+        }
+      ctx.setServiceData(Map[String, ByteBuffer]("spark_shuffle" -> secretBytes))
+    }
+
     // Send the start request to the ContainerManager
     val startReq = Records.newRecord(classOf[StartContainerRequest])
     .asInstanceOf[StartContainerRequest]
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index 80e0162e9f277..abd37834ed3cc 100644
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -17,397 +17,48 @@
 
 package org.apache.spark.deploy.yarn
 
-import java.lang.{Boolean => JBoolean}
-import java.util.{Collections, Set => JSet}
-import java.util.concurrent.{CopyOnWriteArrayList, ConcurrentHashMap}
+import java.util.concurrent.CopyOnWriteArrayList
 import java.util.concurrent.atomic.AtomicInteger
 
-import scala.collection
 import scala.collection.JavaConversions._
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+import scala.collection.mutable.{ArrayBuffer, HashMap}
 
-import org.apache.spark.{Logging, SparkConf}
-import org.apache.spark.scheduler.{SplitInfo,TaskSchedulerImpl}
-import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
-import org.apache.spark.util.Utils
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.scheduler.SplitInfo
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.yarn.api.AMRMProtocol
-import org.apache.hadoop.yarn.api.records.{AMResponse, ApplicationAttemptId}
-import org.apache.hadoop.yarn.api.records.{Container, ContainerId, ContainerStatus}
-import org.apache.hadoop.yarn.api.records.{Priority, Resource, ResourceRequest}
-import org.apache.hadoop.yarn.api.protocolrecords.{AllocateRequest, AllocateResponse}
-import org.apache.hadoop.yarn.util.{RackResolver, Records}
-
-
-object AllocationType extends Enumeration {
-  type AllocationType = Value
-  val HOST, RACK, ANY = Value
-}
-
-// TODO:
-// Too many params.
-// Needs to be mt-safe
-// Need to refactor this to make it 'cleaner' ... right now, all computation is reactive - should
-// make it more proactive and decoupled.
-
-// Note that right now, we assume all node asks as uniform in terms of capabilities and priority
-// Refer to http://developer.yahoo.com/blogs/hadoop/posts/2011/03/mapreduce-nextgen-scheduler/ for
-// more info on how we are requesting for containers.
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest
+import org.apache.hadoop.yarn.util.Records
 
 /**
  * Acquires resources for executors from a ResourceManager and launches executors in new containers.
  */
 private[yarn] class YarnAllocationHandler(
-    val conf: Configuration,
-    val resourceManager: AMRMProtocol,
-    val appAttemptId: ApplicationAttemptId,
-    val maxExecutors: Int,
-    val executorMemory: Int,
-    val executorCores: Int,
-    val preferredHostToCount: Map[String, Int],
-    val preferredRackToCount: Map[String, Int],
-    val sparkConf: SparkConf)
-  extends Logging {
-  // These three are locked on allocatedHostToContainersMap. Complementary data structures
-  // allocatedHostToContainersMap : containers which are running : host, Set<containerid>
-  // allocatedContainerToHostMap: container to host mapping.
-  private val allocatedHostToContainersMap =
-    new HashMap[String, collection.mutable.Set[ContainerId]]()
-
-  private val allocatedContainerToHostMap = new HashMap[ContainerId, String]()
-
-  // allocatedRackCount is populated ONLY if allocation happens (or decremented if this is an
-  // allocated node)
-  // As with the two data structures above, tightly coupled with them, and to be locked on
-  // allocatedHostToContainersMap
-  private val allocatedRackCount = new HashMap[String, Int]()
-
-  // Containers which have been released.
-  private val releasedContainerList = new CopyOnWriteArrayList[ContainerId]()
-  // Containers to be released in next request to RM
-  private val pendingReleaseContainers = new ConcurrentHashMap[ContainerId, Boolean]
-
-  // Additional memory overhead - in mb.
-  private def memoryOverhead: Int = sparkConf.getInt("spark.yarn.executor.memoryOverhead",
-    YarnAllocationHandler.MEMORY_OVERHEAD)
-
-  private val numExecutorsRunning = new AtomicInteger()
-  // Used to generate a unique id per executor
-  private val executorIdCounter = new AtomicInteger()
-  private val lastResponseId = new AtomicInteger()
-  private val numExecutorsFailed = new AtomicInteger()
-
-  def getNumExecutorsRunning: Int = numExecutorsRunning.intValue
-
-  def getNumExecutorsFailed: Int = numExecutorsFailed.intValue
-
-  def isResourceConstraintSatisfied(container: Container): Boolean = {
-    container.getResource.getMemory >= (executorMemory + memoryOverhead)
-  }
-
-  def allocateContainers(executorsToRequest: Int) {
-    // We need to send the request only once from what I understand ... but for now, not modifying
-    // this much.
-
-    // Keep polling the Resource Manager for containers
-    val amResp = allocateExecutorResources(executorsToRequest).getAMResponse
-
-    val _allocatedContainers = amResp.getAllocatedContainers()
-
-    if (_allocatedContainers.size > 0) {
-      logDebug("""
-        Allocated containers: %d
-        Current executor count: %d
-        Containers released: %s
-        Containers to be released: %s
-        Cluster resources: %s
-        """.format(
-          _allocatedContainers.size,
-          numExecutorsRunning.get(),
-          releasedContainerList,
-          pendingReleaseContainers,
-          amResp.getAvailableResources))
-
-      val hostToContainers = new HashMap[String, ArrayBuffer[Container]]()
-
-      // Ignore if not satisfying constraints      {
-      for (container <- _allocatedContainers) {
-        if (isResourceConstraintSatisfied(container)) {
-          // allocatedContainers += container
-
-          val host = container.getNodeId.getHost
-          val containers = hostToContainers.getOrElseUpdate(host, new ArrayBuffer[Container]())
-
-          containers += container
-        } else {
-          // Add all ignored containers to released list
-          releasedContainerList.add(container.getId())
-        }
-      }
-
-      // Find the appropriate containers to use. Slightly non trivial groupBy ...
-      val dataLocalContainers = new HashMap[String, ArrayBuffer[Container]]()
-      val rackLocalContainers = new HashMap[String, ArrayBuffer[Container]]()
-      val offRackContainers = new HashMap[String, ArrayBuffer[Container]]()
-
-      for (candidateHost <- hostToContainers.keySet)
-      {
-        val maxExpectedHostCount = preferredHostToCount.getOrElse(candidateHost, 0)
-        val requiredHostCount = maxExpectedHostCount - allocatedContainersOnHost(candidateHost)
-
-        var remainingContainers = hostToContainers.get(candidateHost).getOrElse(null)
-        assert(remainingContainers != null)
-
-        if (requiredHostCount >= remainingContainers.size){
-          // Since we got <= required containers, add all to dataLocalContainers
-          dataLocalContainers.put(candidateHost, remainingContainers)
-          // all consumed
-          remainingContainers = null
-        } else if (requiredHostCount > 0) {
-          // Container list has more containers than we need for data locality.
-          // Split into two : data local container count of (remainingContainers.size -
-          // requiredHostCount) and rest as remainingContainer
-          val (dataLocal, remaining) = remainingContainers.splitAt(
-            remainingContainers.size - requiredHostCount)
-          dataLocalContainers.put(candidateHost, dataLocal)
-          // remainingContainers = remaining
-
-          // yarn has nasty habit of allocating a tonne of containers on a host - discourage this :
-          // add remaining to release list. If we have insufficient containers, next allocation
-          // cycle will reallocate (but wont treat it as data local)
-          for (container <- remaining) releasedContainerList.add(container.getId())
-          remainingContainers = null
-        }
-
-        // Now rack local
-        if (remainingContainers != null){
-          val rack = YarnAllocationHandler.lookupRack(conf, candidateHost)
-
-          if (rack != null){
-            val maxExpectedRackCount = preferredRackToCount.getOrElse(rack, 0)
-            val requiredRackCount = maxExpectedRackCount - allocatedContainersOnRack(rack) -
-              rackLocalContainers.get(rack).getOrElse(List()).size
-
-
-            if (requiredRackCount >= remainingContainers.size){
-              // Add all to dataLocalContainers
-              dataLocalContainers.put(rack, remainingContainers)
-              // All consumed
-              remainingContainers = null
-            } else if (requiredRackCount > 0) {
-              // container list has more containers than we need for data locality.
-              // Split into two : data local container count of (remainingContainers.size -
-              // requiredRackCount) and rest as remainingContainer
-              val (rackLocal, remaining) = remainingContainers.splitAt(
-                remainingContainers.size - requiredRackCount)
-              val existingRackLocal = rackLocalContainers.getOrElseUpdate(rack,
-                new ArrayBuffer[Container]())
-
-              existingRackLocal ++= rackLocal
-              remainingContainers = remaining
-            }
-          }
-        }
-
-        // If still not consumed, then it is off rack host - add to that list.
-        if (remainingContainers != null){
-          offRackContainers.put(candidateHost, remainingContainers)
-        }
-      }
-
-      // Now that we have split the containers into various groups, go through them in order :
-      // first host local, then rack local and then off rack (everything else).
-      // Note that the list we create below tries to ensure that not all containers end up within a
-      // host if there are sufficiently large number of hosts/containers.
-
-      val allocatedContainers = new ArrayBuffer[Container](_allocatedContainers.size)
-      allocatedContainers ++= TaskSchedulerImpl.prioritizeContainers(dataLocalContainers)
-      allocatedContainers ++= TaskSchedulerImpl.prioritizeContainers(rackLocalContainers)
-      allocatedContainers ++= TaskSchedulerImpl.prioritizeContainers(offRackContainers)
-
-      // Run each of the allocated containers
-      for (container <- allocatedContainers) {
-        val numExecutorsRunningNow = numExecutorsRunning.incrementAndGet()
-        val executorHostname = container.getNodeId.getHost
-        val containerId = container.getId
-
-        assert( container.getResource.getMemory >=
-          (executorMemory + memoryOverhead))
-
-        if (numExecutorsRunningNow > maxExecutors) {
-          logInfo("""Ignoring container %s at host %s, since we already have the required number of
-            containers for it.""".format(containerId, executorHostname))
-          releasedContainerList.add(containerId)
-          // reset counter back to old value.
-          numExecutorsRunning.decrementAndGet()
-        } else {
-          // Deallocate + allocate can result in reusing id's wrongly - so use a different counter
-          // (executorIdCounter)
-          val executorId = executorIdCounter.incrementAndGet().toString
-          val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
-            sparkConf.get("spark.driver.host"), sparkConf.get("spark.driver.port"),
-            CoarseGrainedSchedulerBackend.ACTOR_NAME)
-
-          logInfo("launching container on " + containerId + " host " + executorHostname)
-          // Just to be safe, simply remove it from pendingReleaseContainers.
-          // Should not be there, but ..
-          pendingReleaseContainers.remove(containerId)
-
-          val rack = YarnAllocationHandler.lookupRack(conf, executorHostname)
-          allocatedHostToContainersMap.synchronized {
-            val containerSet = allocatedHostToContainersMap.getOrElseUpdate(executorHostname,
-              new HashSet[ContainerId]())
-
-            containerSet += containerId
-            allocatedContainerToHostMap.put(containerId, executorHostname)
-            if (rack != null) {
-              allocatedRackCount.put(rack, allocatedRackCount.getOrElse(rack, 0) + 1)
-            }
-          }
-
-          new Thread(
-            new ExecutorRunnable(container, conf, sparkConf, driverUrl, executorId,
-              executorHostname, executorMemory, executorCores)
-          ).start()
-        }
-      }
-      logDebug("""
-        Finished processing %d containers.
-        Current number of executors running: %d,
-        releasedContainerList: %s,
-        pendingReleaseContainers: %s
-        """.format(
-          allocatedContainers.size,
-          numExecutorsRunning.get(),
-          releasedContainerList,
-          pendingReleaseContainers))
-    }
-
-
-    val completedContainers = amResp.getCompletedContainersStatuses()
-    if (completedContainers.size > 0){
-      logDebug("Completed %d containers, to-be-released: %s".format(
-        completedContainers.size, releasedContainerList))
-      for (completedContainer <- completedContainers){
-        val containerId = completedContainer.getContainerId
-
-        // Was this released by us ? If yes, then simply remove from containerSet and move on.
-        if (pendingReleaseContainers.containsKey(containerId)) {
-          pendingReleaseContainers.remove(containerId)
-        } else {
-          // Simply decrement count - next iteration of ReporterThread will take care of allocating.
-          numExecutorsRunning.decrementAndGet()
-          logInfo("Completed container %s (state: %s, exit status: %s)".format(
-            containerId,
-            completedContainer.getState,
-            completedContainer.getExitStatus()))
-          // Hadoop 2.2.X added a ContainerExitStatus we should switch to use
-          // there are some exit status' we shouldn't necessarily count against us, but for
-          // now I think its ok as none of the containers are expected to exit
-          if (completedContainer.getExitStatus() != 0) {
-            logInfo("Container marked as failed: " + containerId)
-            numExecutorsFailed.incrementAndGet()
-          }
-        }
-
-        allocatedHostToContainersMap.synchronized {
-          if (allocatedContainerToHostMap.containsKey(containerId)) {
-            val host = allocatedContainerToHostMap.get(containerId).getOrElse(null)
-            assert (host != null)
-
-            val containerSet = allocatedHostToContainersMap.get(host).getOrElse(null)
-            assert (containerSet != null)
-
-            containerSet -= containerId
-            if (containerSet.isEmpty) {
-              allocatedHostToContainersMap.remove(host)
-            } else {
-              allocatedHostToContainersMap.update(host, containerSet)
-            }
-
-            allocatedContainerToHostMap -= containerId
-
-            // Doing this within locked context, sigh ... move to outside ?
-            val rack = YarnAllocationHandler.lookupRack(conf, host)
-            if (rack != null) {
-              val rackCount = allocatedRackCount.getOrElse(rack, 0) - 1
-              if (rackCount > 0) {
-                allocatedRackCount.put(rack, rackCount)
-              } else {
-                allocatedRackCount.remove(rack)
-              }
-            }
-          }
-        }
-      }
-      logDebug("""
-        Finished processing %d completed containers.
-        Current number of executors running: %d,
-        releasedContainerList: %s,
-        pendingReleaseContainers: %s
-        """.format(
-          completedContainers.size,
-          numExecutorsRunning.get(),
-          releasedContainerList,
-          pendingReleaseContainers))
-    }
-  }
-
-  def createRackResourceRequests(hostContainers: List[ResourceRequest]): List[ResourceRequest] = {
-    // First generate modified racks and new set of hosts under it : then issue requests
-    val rackToCounts = new HashMap[String, Int]()
-
-    // Within this lock - used to read/write to the rack related maps too.
-    for (container <- hostContainers) {
-      val candidateHost = container.getHostName
-      val candidateNumContainers = container.getNumContainers
-      assert(YarnAllocationHandler.ANY_HOST != candidateHost)
-
-      val rack = YarnAllocationHandler.lookupRack(conf, candidateHost)
-      if (rack != null) {
-        var count = rackToCounts.getOrElse(rack, 0)
-        count += candidateNumContainers
-        rackToCounts.put(rack, count)
-      }
-    }
-
-    val requestedContainers: ArrayBuffer[ResourceRequest] =
-      new ArrayBuffer[ResourceRequest](rackToCounts.size)
-    for ((rack, count) <- rackToCounts){
-      requestedContainers +=
-        createResourceRequest(AllocationType.RACK, rack, count, YarnAllocationHandler.PRIORITY)
-    }
-
-    requestedContainers.toList
-  }
-
-  def allocatedContainersOnHost(host: String): Int = {
-    var retval = 0
-    allocatedHostToContainersMap.synchronized {
-      retval = allocatedHostToContainersMap.getOrElse(host, Set()).size
-    }
-    retval
-  }
-
-  def allocatedContainersOnRack(rack: String): Int = {
-    var retval = 0
-    allocatedHostToContainersMap.synchronized {
-      retval = allocatedRackCount.getOrElse(rack, 0)
-    }
-    retval
-  }
+    conf: Configuration,
+    sparkConf: SparkConf,
+    resourceManager: AMRMProtocol,
+    appAttemptId: ApplicationAttemptId,
+    args: ApplicationMasterArguments,
+    preferredNodes: collection.Map[String, collection.Set[SplitInfo]],
+    securityMgr: SecurityManager)
+  extends YarnAllocator(conf, sparkConf, appAttemptId, args, preferredNodes, securityMgr) {
 
-  private def allocateExecutorResources(numExecutors: Int): AllocateResponse = {
+  private val lastResponseId = new AtomicInteger()
+  private val releaseList: CopyOnWriteArrayList[ContainerId] = new CopyOnWriteArrayList()
 
+  override protected def allocateContainers(count: Int, pending: Int): YarnAllocateResponse = {
     var resourceRequests: List[ResourceRequest] = null
 
-      // default.
-    if (numExecutors <= 0 || preferredHostToCount.isEmpty) {
-      logDebug("numExecutors: " + numExecutors + ", host preferences: " +
-        preferredHostToCount.isEmpty)
-      resourceRequests = List(createResourceRequest(
-        AllocationType.ANY, null, numExecutors, YarnAllocationHandler.PRIORITY))
+    logDebug("asking for additional executors: " + count + " with already pending: " + pending)
+    val totalNumAsk = count + pending
+    if (count <= 0) {
+      resourceRequests = List()
+    } else if (preferredHostToCount.isEmpty) {
+        logDebug("host preferences is empty")
+        resourceRequests = List(createResourceRequest(
+          AllocationType.ANY, null, totalNumAsk, YarnSparkHadoopUtil.RM_REQUEST_PRIORITY))
     } else {
       // request for all hosts in preferred nodes and for numExecutors -
       // candidates.size, request by default allocation policy.
@@ -421,7 +72,7 @@ private[yarn] class YarnAllocationHandler(
             AllocationType.HOST,
             candidateHost,
             requiredCount,
-            YarnAllocationHandler.PRIORITY)
+            YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
         }
       }
       val rackContainerRequests: List[ResourceRequest] = createRackResourceRequests(
@@ -430,8 +81,8 @@ private[yarn] class YarnAllocationHandler(
       val anyContainerRequests: ResourceRequest = createResourceRequest(
         AllocationType.ANY,
         resource = null,
-        numExecutors,
-        YarnAllocationHandler.PRIORITY)
+        totalNumAsk,
+        YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
 
       val containerRequests: ArrayBuffer[ResourceRequest] = new ArrayBuffer[ResourceRequest](
         hostContainerRequests.size + rackContainerRequests.size + 1)
@@ -452,8 +103,8 @@ private[yarn] class YarnAllocationHandler(
     val releasedContainerList = createReleasedContainerList()
     req.addAllReleases(releasedContainerList)
 
-    if (numExecutors > 0) {
-      logInfo("Allocating %d executor containers with %d of memory each.".format(numExecutors,
+    if (count > 0) {
+      logInfo("Allocating %d executor containers with %d of memory each.".format(totalNumAsk,
         executorMemory + memoryOverhead))
     } else {
       logDebug("Empty allocation req ..  release : " + releasedContainerList)
@@ -467,9 +118,42 @@ private[yarn] class YarnAllocationHandler(
           request.getPriority,
           request.getCapability))
     }
-    resourceManager.allocate(req)
+    new AlphaAllocateResponse(resourceManager.allocate(req).getAMResponse())
+  }
+
+  override protected def releaseContainer(container: Container) = {
+    releaseList.add(container.getId())
   }
 
+  private def createRackResourceRequests(hostContainers: List[ResourceRequest]):
+    List[ResourceRequest] = {
+    // First generate modified racks and new set of hosts under it : then issue requests
+    val rackToCounts = new HashMap[String, Int]()
+
+    // Within this lock - used to read/write to the rack related maps too.
+    for (container <- hostContainers) {
+      val candidateHost = container.getHostName
+      val candidateNumContainers = container.getNumContainers
+      assert(YarnSparkHadoopUtil.ANY_HOST != candidateHost)
+
+      val rack = YarnSparkHadoopUtil.lookupRack(conf, candidateHost)
+      if (rack != null) {
+        var count = rackToCounts.getOrElse(rack, 0)
+        count += candidateNumContainers
+        rackToCounts.put(rack, count)
+      }
+    }
+
+    val requestedContainers: ArrayBuffer[ResourceRequest] =
+      new ArrayBuffer[ResourceRequest](rackToCounts.size)
+    for ((rack, count) <- rackToCounts){
+      requestedContainers +=
+        createResourceRequest(AllocationType.RACK, rack, count,
+          YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
+    }
+
+    requestedContainers.toList
+  }
 
   private def createResourceRequest(
     requestType: AllocationType.AllocationType,
@@ -481,12 +165,12 @@ private[yarn] class YarnAllocationHandler(
     // There must be a third request - which is ANY : that will be specially handled.
     requestType match {
       case AllocationType.HOST => {
-        assert(YarnAllocationHandler.ANY_HOST != resource)
+        assert(YarnSparkHadoopUtil.ANY_HOST != resource)
         val hostname = resource
         val nodeLocal = createResourceRequestImpl(hostname, numExecutors, priority)
 
         // Add to host->rack mapping
-        YarnAllocationHandler.populateRackInfo(conf, hostname)
+        YarnSparkHadoopUtil.populateRackInfo(conf, hostname)
 
         nodeLocal
       }
@@ -495,7 +179,7 @@ private[yarn] class YarnAllocationHandler(
         createResourceRequestImpl(rack, numExecutors, priority)
       }
       case AllocationType.ANY => createResourceRequestImpl(
-        YarnAllocationHandler.ANY_HOST, numExecutors, priority)
+        YarnSparkHadoopUtil.ANY_HOST, numExecutors, priority)
       case _ => throw new IllegalArgumentException(
         "Unexpected/unsupported request type: " + requestType)
     }
@@ -522,169 +206,24 @@ private[yarn] class YarnAllocationHandler(
     rsrcRequest
   }
 
-  def createReleasedContainerList(): ArrayBuffer[ContainerId] = {
-
+  private def createReleasedContainerList(): ArrayBuffer[ContainerId] = {
     val retval = new ArrayBuffer[ContainerId](1)
     // Iterator on COW list ...
-    for (container <- releasedContainerList.iterator()){
+    for (container <- releaseList.iterator()){
       retval += container
     }
     // Remove from the original list.
-    if (! retval.isEmpty) {
-      releasedContainerList.removeAll(retval)
-      for (v <- retval) pendingReleaseContainers.put(v, true)
-      logInfo("Releasing " + retval.size + " containers. pendingReleaseContainers : " +
-        pendingReleaseContainers)
+    if (!retval.isEmpty) {
+      releaseList.removeAll(retval)
+      logInfo("Releasing " + retval.size + " containers.")
     }
-
     retval
   }
-}
-
-object YarnAllocationHandler {
-
-  val ANY_HOST = "*"
-  // All requests are issued with same priority : we do not (yet) have any distinction between
-  // request types (like map/reduce in hadoop for example)
-  val PRIORITY = 1
-
-  // Additional memory overhead - in mb
-  val MEMORY_OVERHEAD = 384
-
-  // Host to rack map - saved from allocation requests
-  // We are expecting this not to change.
-  // Note that it is possible for this to change : and RM will indicate that to us via update
-  // response to allocate. But we are punting on handling that for now.
-  private val hostToRack = new ConcurrentHashMap[String, String]()
-  private val rackToHostSet = new ConcurrentHashMap[String, JSet[String]]()
-
 
-  def newAllocator(
-    conf: Configuration,
-    resourceManager: AMRMProtocol,
-    appAttemptId: ApplicationAttemptId,
-    args: ApplicationMasterArguments,
-    sparkConf: SparkConf): YarnAllocationHandler = {
-
-    new YarnAllocationHandler(
-      conf,
-      resourceManager,
-      appAttemptId,
-      args.numExecutors,
-      args.executorMemory,
-      args.executorCores,
-      Map[String, Int](),
-      Map[String, Int](),
-      sparkConf)
+  private class AlphaAllocateResponse(response: AMResponse) extends YarnAllocateResponse {
+    override def getAllocatedContainers() = response.getAllocatedContainers()
+    override def getAvailableResources() = response.getAvailableResources()
+    override def getCompletedContainersStatuses() = response.getCompletedContainersStatuses()
   }
 
-  def newAllocator(
-    conf: Configuration,
-    resourceManager: AMRMProtocol,
-    appAttemptId: ApplicationAttemptId,
-    args: ApplicationMasterArguments,
-    map: collection.Map[String,
-    collection.Set[SplitInfo]],
-    sparkConf: SparkConf): YarnAllocationHandler = {
-
-    val (hostToCount, rackToCount) = generateNodeToWeight(conf, map)
-    new YarnAllocationHandler(
-      conf,
-      resourceManager,
-      appAttemptId,
-      args.numExecutors,
-      args.executorMemory,
-      args.executorCores,
-      hostToCount,
-      rackToCount,
-      sparkConf)
-  }
-
-  def newAllocator(
-    conf: Configuration,
-    resourceManager: AMRMProtocol,
-    appAttemptId: ApplicationAttemptId,
-    maxExecutors: Int,
-    executorMemory: Int,
-    executorCores: Int,
-    map: collection.Map[String, collection.Set[SplitInfo]],
-    sparkConf: SparkConf): YarnAllocationHandler = {
-
-    val (hostToCount, rackToCount) = generateNodeToWeight(conf, map)
-
-    new YarnAllocationHandler(
-      conf,
-      resourceManager,
-      appAttemptId,
-      maxExecutors,
-      executorMemory,
-      executorCores,
-      hostToCount,
-      rackToCount,
-      sparkConf)
-  }
-
-  // A simple method to copy the split info map.
-  private def generateNodeToWeight(
-    conf: Configuration,
-    input: collection.Map[String, collection.Set[SplitInfo]]) :
-  // host to count, rack to count
-  (Map[String, Int], Map[String, Int]) = {
-
-    if (input == null) return (Map[String, Int](), Map[String, Int]())
-
-    val hostToCount = new HashMap[String, Int]
-    val rackToCount = new HashMap[String, Int]
-
-    for ((host, splits) <- input) {
-      val hostCount = hostToCount.getOrElse(host, 0)
-      hostToCount.put(host, hostCount + splits.size)
-
-      val rack = lookupRack(conf, host)
-      if (rack != null){
-        val rackCount = rackToCount.getOrElse(host, 0)
-        rackToCount.put(host, rackCount + splits.size)
-      }
-    }
-
-    (hostToCount.toMap, rackToCount.toMap)
-  }
-
-  def lookupRack(conf: Configuration, host: String): String = {
-    if (!hostToRack.contains(host)) populateRackInfo(conf, host)
-    hostToRack.get(host)
-  }
-
-  def fetchCachedHostsForRack(rack: String): Option[Set[String]] = {
-    val set = rackToHostSet.get(rack)
-    if (set == null) return None
-
-    // No better way to get a Set[String] from JSet ?
-    val convertedSet: collection.mutable.Set[String] = set
-    Some(convertedSet.toSet)
-  }
-
-  def populateRackInfo(conf: Configuration, hostname: String) {
-    Utils.checkHost(hostname)
-
-    if (!hostToRack.containsKey(hostname)) {
-      // If there are repeated failures to resolve, all to an ignore list ?
-      val rackInfo = RackResolver.resolve(conf, hostname)
-      if (rackInfo != null && rackInfo.getNetworkLocation != null) {
-        val rack = rackInfo.getNetworkLocation
-        hostToRack.put(hostname, rack)
-        if (! rackToHostSet.containsKey(rack)) {
-          rackToHostSet.putIfAbsent(rack,
-            Collections.newSetFromMap(new ConcurrentHashMap[String, JBoolean]()))
-        }
-        rackToHostSet.get(rack).add(hostname)
-
-        // TODO(harvey): Figure out this comment...
-        // Since RackResolver caches, we are disabling this for now ...
-      } /* else {
-        // right ? Else we will keep calling rack resolver in case we cant resolve rack info ...
-        hostToRack.put(hostname, null)
-      } */
-    }
-  }
 }
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
new file mode 100644
index 0000000000000..e342cc82f454e
--- /dev/null
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import scala.collection.{Map, Set}
+import java.net.URI
+
+import org.apache.hadoop.net.NetUtils
+import org.apache.hadoop.yarn.api._
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.api.protocolrecords._
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.hadoop.yarn.ipc.YarnRPC
+import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
+
+import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.apache.spark.scheduler.SplitInfo
+import org.apache.spark.util.Utils
+
+/**
+ * YarnRMClient implementation for the Yarn alpha API.
+ */
+private class YarnRMClientImpl(args: ApplicationMasterArguments) extends YarnRMClient with Logging {
+
+  private var rpc: YarnRPC = null
+  private var resourceManager: AMRMProtocol = _
+  private var uiHistoryAddress: String = _
+  private var registered: Boolean = false
+
+  override def register(
+      conf: YarnConfiguration,
+      sparkConf: SparkConf,
+      preferredNodeLocations: Map[String, Set[SplitInfo]],
+      uiAddress: String,
+      uiHistoryAddress: String,
+      securityMgr: SecurityManager) = {
+    this.rpc = YarnRPC.create(conf)
+    this.uiHistoryAddress = uiHistoryAddress
+
+    synchronized {
+      resourceManager = registerWithResourceManager(conf)
+      registerApplicationMaster(uiAddress)
+      registered = true
+    }
+
+    new YarnAllocationHandler(conf, sparkConf, resourceManager, getAttemptId(), args,
+      preferredNodeLocations, securityMgr)
+  }
+
+  override def getAttemptId() = {
+    val envs = System.getenv()
+    val containerIdString = envs.get(ApplicationConstants.AM_CONTAINER_ID_ENV)
+    val containerId = ConverterUtils.toContainerId(containerIdString)
+    val appAttemptId = containerId.getApplicationAttemptId()
+    appAttemptId
+  }
+
+  override def unregister(status: FinalApplicationStatus, diagnostics: String = "") = synchronized {
+    if (registered) {
+      val finishReq = Records.newRecord(classOf[FinishApplicationMasterRequest])
+        .asInstanceOf[FinishApplicationMasterRequest]
+      finishReq.setAppAttemptId(getAttemptId())
+      finishReq.setFinishApplicationStatus(status)
+      finishReq.setDiagnostics(diagnostics)
+      finishReq.setTrackingUrl(uiHistoryAddress)
+      resourceManager.finishApplicationMaster(finishReq)
+    }
+  }
+
+  override def getAmIpFilterParams(conf: YarnConfiguration, proxyBase: String) = {
+    val proxy = YarnConfiguration.getProxyHostAndPort(conf)
+    val parts = proxy.split(":")
+    val uriBase = "http://" + proxy + proxyBase
+    Map("PROXY_HOST" -> parts(0), "PROXY_URI_BASE" -> uriBase)
+  }
+
+  override def getMaxRegAttempts(conf: YarnConfiguration) =
+    conf.getInt(YarnConfiguration.RM_AM_MAX_RETRIES, YarnConfiguration.DEFAULT_RM_AM_MAX_RETRIES)
+
+  private def registerWithResourceManager(conf: YarnConfiguration): AMRMProtocol = {
+    val rmAddress = NetUtils.createSocketAddr(conf.get(YarnConfiguration.RM_SCHEDULER_ADDRESS,
+      YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS))
+    logInfo("Connecting to ResourceManager at " + rmAddress)
+    rpc.getProxy(classOf[AMRMProtocol], rmAddress, conf).asInstanceOf[AMRMProtocol]
+  }
+
+  private def registerApplicationMaster(uiAddress: String): RegisterApplicationMasterResponse = {
+    val appMasterRequest = Records.newRecord(classOf[RegisterApplicationMasterRequest])
+      .asInstanceOf[RegisterApplicationMasterRequest]
+    appMasterRequest.setApplicationAttemptId(getAttemptId())
+    // Setting this to master host,port - so that the ApplicationReport at client has some
+    // sensible info.
+    // Users can then monitor stderr/stdout on that node if required.
+    appMasterRequest.setHost(Utils.localHostName())
+    appMasterRequest.setRpcPort(0)
+    // remove the scheme from the url if it exists since Hadoop does not expect scheme
+    val uri = new URI(uiAddress)
+    val authority = if (uri.getScheme == null) uiAddress else uri.getAuthority
+    appMasterRequest.setTrackingUrl(authority)
+    resourceManager.registerApplicationMaster(appMasterRequest)
+  }
+
+}
diff --git a/yarn/common/src/main/resources/log4j-spark-container.properties b/yarn/common/src/main/resources/log4j-spark-container.properties
deleted file mode 100644
index a1e37a0be27dd..0000000000000
--- a/yarn/common/src/main/resources/log4j-spark-container.properties
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
-
-# Set everything to be logged to the console
-log4j.rootCategory=INFO, console
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.target=System.err
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
-
-# Settings to quiet third party logs that are too verbose
-log4j.logger.org.eclipse.jetty=WARN
-log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
-log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
new file mode 100644
index 0000000000000..e90672c004d4b
--- /dev/null
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -0,0 +1,571 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import scala.util.control.NonFatal
+
+import java.io.IOException
+import java.lang.reflect.InvocationTargetException
+import java.net.Socket
+import java.util.concurrent.atomic.AtomicReference
+
+import akka.actor._
+import akka.remote._
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.util.ShutdownHookManager
+import org.apache.hadoop.yarn.api._
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+
+import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, SparkEnv}
+import org.apache.spark.SparkException
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.history.HistoryServer
+import org.apache.spark.scheduler.cluster.YarnSchedulerBackend
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
+import org.apache.spark.util.{AkkaUtils, SignalLogger, Utils}
+
+/**
+ * Common application master functionality for Spark on Yarn.
+ */
+private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
+  client: YarnRMClient) extends Logging {
+  // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
+  // optimal as more containers are available. Might need to handle this better.
+
+  private val sparkConf = new SparkConf()
+  private val yarnConf: YarnConfiguration = SparkHadoopUtil.get.newConfiguration(sparkConf)
+    .asInstanceOf[YarnConfiguration]
+  private val isDriver = args.userClass != null
+
+  // Default to numExecutors * 2, with minimum of 3
+  private val maxNumExecutorFailures = sparkConf.getInt("spark.yarn.max.executor.failures",
+    sparkConf.getInt("spark.yarn.max.worker.failures", math.max(args.numExecutors * 2, 3)))
+
+  @volatile private var exitCode = 0
+  @volatile private var unregistered = false
+  @volatile private var finished = false
+  @volatile private var finalStatus = FinalApplicationStatus.UNDEFINED
+  @volatile private var finalMsg: String = ""
+  @volatile private var userClassThread: Thread = _
+
+  private var reporterThread: Thread = _
+  private var allocator: YarnAllocator = _
+
+  // Fields used in client mode.
+  private var actorSystem: ActorSystem = null
+  private var actor: ActorRef = _
+
+  // Fields used in cluster mode.
+  private val sparkContextRef = new AtomicReference[SparkContext](null)
+
+  final def run(): Int = {
+    try {
+      val appAttemptId = client.getAttemptId()
+
+      if (isDriver) {
+        // Set the web ui port to be ephemeral for yarn so we don't conflict with
+        // other spark processes running on the same box
+        System.setProperty("spark.ui.port", "0")
+
+        // Set the master property to match the requested mode.
+        System.setProperty("spark.master", "yarn-cluster")
+
+        // Propagate the application ID so that YarnClusterSchedulerBackend can pick it up.
+        System.setProperty("spark.yarn.app.id", appAttemptId.getApplicationId().toString())
+      }
+
+      logInfo("ApplicationAttemptId: " + appAttemptId)
+
+      val fs = FileSystem.get(yarnConf)
+      val cleanupHook = new Runnable {
+        override def run() {
+          // If the SparkContext is still registered, shut it down as a best case effort in case
+          // users do not call sc.stop or do System.exit().
+          val sc = sparkContextRef.get()
+          if (sc != null) {
+            logInfo("Invoking sc stop from shutdown hook")
+            sc.stop()
+          }
+          val maxAppAttempts = client.getMaxRegAttempts(yarnConf)
+          val isLastAttempt = client.getAttemptId().getAttemptId() >= maxAppAttempts
+
+          if (!finished) {
+            // this shouldn't ever happen, but if it does assume weird failure
+            finish(FinalApplicationStatus.FAILED,
+              ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION,
+              "shutdown hook called without cleanly finishing")
+          }
+
+          if (!unregistered) {
+            // we only want to unregister if we don't want the RM to retry
+            if (finalStatus == FinalApplicationStatus.SUCCEEDED || isLastAttempt) {
+              unregister(finalStatus, finalMsg)
+              cleanupStagingDir(fs)
+            }
+          }
+        }
+      }
+
+      // Use higher priority than FileSystem.
+      assert(ApplicationMaster.SHUTDOWN_HOOK_PRIORITY > FileSystem.SHUTDOWN_HOOK_PRIORITY)
+      ShutdownHookManager
+        .get().addShutdownHook(cleanupHook, ApplicationMaster.SHUTDOWN_HOOK_PRIORITY)
+
+      // Call this to force generation of secret so it gets populated into the
+      // Hadoop UGI. This has to happen before the startUserClass which does a
+      // doAs in order for the credentials to be passed on to the executor containers.
+      val securityMgr = new SecurityManager(sparkConf)
+
+      if (isDriver) {
+        runDriver(securityMgr)
+      } else {
+        runExecutorLauncher(securityMgr)
+      }
+    } catch {
+      case e: Exception =>
+        // catch everything else if not specifically handled
+        logError("Uncaught exception: ", e)
+        finish(FinalApplicationStatus.FAILED,
+          ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION,
+          "Uncaught exception: " + e.getMessage())
+    }
+    exitCode
+  }
+
+  /**
+   * unregister is used to completely unregister the application from the ResourceManager.
+   * This means the ResourceManager will not retry the application attempt on your behalf if
+   * a failure occurred.
+   */
+  final def unregister(status: FinalApplicationStatus, diagnostics: String = null) = synchronized {
+    if (!unregistered) {
+      logInfo(s"Unregistering ApplicationMaster with $status" +
+        Option(diagnostics).map(msg => s" (diag message: $msg)").getOrElse(""))
+      unregistered = true
+      client.unregister(status, Option(diagnostics).getOrElse(""))
+    }
+  }
+
+  final def finish(status: FinalApplicationStatus, code: Int, msg: String = null) = synchronized {
+    if (!finished) {
+      logInfo(s"Final app status: ${status}, exitCode: ${code}" +
+        Option(msg).map(msg => s", (reason: $msg)").getOrElse(""))
+      exitCode = code
+      finalStatus = status
+      finalMsg = msg
+      finished = true
+      if (Thread.currentThread() != reporterThread && reporterThread != null) {
+        logDebug("shutting down reporter thread")
+        reporterThread.interrupt()
+      }
+      if (Thread.currentThread() != userClassThread && userClassThread != null) {
+        logDebug("shutting down user thread")
+        userClassThread.interrupt()
+      }
+    }
+  }
+
+  private def sparkContextInitialized(sc: SparkContext) = {
+    sparkContextRef.synchronized {
+      sparkContextRef.compareAndSet(null, sc)
+      sparkContextRef.notifyAll()
+    }
+  }
+
+  private def sparkContextStopped(sc: SparkContext) = {
+    sparkContextRef.compareAndSet(sc, null)
+  }
+
+  private def registerAM(uiAddress: String, securityMgr: SecurityManager) = {
+    val sc = sparkContextRef.get()
+
+    val appId = client.getAttemptId().getApplicationId().toString()
+    val historyAddress =
+      sparkConf.getOption("spark.yarn.historyServer.address")
+        .map { address => s"${address}${HistoryServer.UI_PATH_PREFIX}/${appId}" }
+        .getOrElse("")
+
+    allocator = client.register(yarnConf,
+      if (sc != null) sc.getConf else sparkConf,
+      if (sc != null) sc.preferredNodeLocationData else Map(),
+      uiAddress,
+      historyAddress,
+      securityMgr)
+
+    allocator.allocateResources()
+    reporterThread = launchReporterThread()
+  }
+
+  private def runDriver(securityMgr: SecurityManager): Unit = {
+    addAmIpFilter()
+    setupSystemSecurityManager()
+    userClassThread = startUserClass()
+
+    // This a bit hacky, but we need to wait until the spark.driver.port property has
+    // been set by the Thread executing the user class.
+    val sc = waitForSparkContextInitialized()
+
+    // If there is no SparkContext at this point, just fail the app.
+    if (sc == null) {
+      finish(FinalApplicationStatus.FAILED,
+        ApplicationMaster.EXIT_SC_NOT_INITED,
+        "Timed out waiting for SparkContext.")
+    } else {
+      registerAM(sc.ui.map(_.appUIAddress).getOrElse(""), securityMgr)
+      userClassThread.join()
+    }
+  }
+
+  private def runExecutorLauncher(securityMgr: SecurityManager): Unit = {
+    actorSystem = AkkaUtils.createActorSystem("sparkYarnAM", Utils.localHostName, 0,
+      conf = sparkConf, securityManager = securityMgr)._1
+    actor = waitForSparkDriver()
+    addAmIpFilter()
+    registerAM(sparkConf.get("spark.driver.appUIAddress", ""), securityMgr)
+
+    // In client mode the actor will stop the reporter thread.
+    reporterThread.join()
+  }
+
+  private def launchReporterThread(): Thread = {
+    // Ensure that progress is sent before YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS elapses.
+    val expiryInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
+
+    // we want to be reasonably responsive without causing too many requests to RM.
+    val schedulerInterval =
+      sparkConf.getLong("spark.yarn.scheduler.heartbeat.interval-ms", 5000)
+
+    // must be <= expiryInterval / 2.
+    val interval = math.max(0, math.min(expiryInterval / 2, schedulerInterval))
+
+    // The number of failures in a row until Reporter thread give up
+    val reporterMaxFailures = sparkConf.getInt("spark.yarn.scheduler.reporterThread.maxFailures", 5)
+
+    val t = new Thread {
+      override def run() {
+        var failureCount = 0
+        while (!finished) {
+          try {
+            if (allocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
+              finish(FinalApplicationStatus.FAILED,
+                ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
+                "Max number of executor failures reached")
+            } else {
+              logDebug("Sending progress")
+              allocator.allocateResources()
+            }
+            failureCount = 0
+          } catch {
+            case i: InterruptedException =>
+            case e: Throwable => {
+              failureCount += 1
+              if (!NonFatal(e) || failureCount >= reporterMaxFailures) {
+                finish(FinalApplicationStatus.FAILED,
+                  ApplicationMaster.EXIT_REPORTER_FAILURE, "Exception was thrown " +
+                    s"${failureCount} time(s) from Reporter thread.")
+
+              } else {
+                logWarning(s"Reporter thread fails ${failureCount} time(s) in a row.", e)
+              }
+            }
+          }
+          try {
+            Thread.sleep(interval)
+          } catch {
+            case e: InterruptedException =>
+          }
+        }
+      }
+    }
+    // setting to daemon status, though this is usually not a good idea.
+    t.setDaemon(true)
+    t.setName("Reporter")
+    t.start()
+    logInfo("Started progress reporter thread - sleep time : " + interval)
+    t
+  }
+
+  /**
+   * Clean up the staging directory.
+   */
+  private def cleanupStagingDir(fs: FileSystem) {
+    var stagingDirPath: Path = null
+    try {
+      val preserveFiles = sparkConf.get("spark.yarn.preserve.staging.files", "false").toBoolean
+      if (!preserveFiles) {
+        stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR"))
+        if (stagingDirPath == null) {
+          logError("Staging directory is null")
+          return
+        }
+        logInfo("Deleting staging directory " + stagingDirPath)
+        fs.delete(stagingDirPath, true)
+      }
+    } catch {
+      case ioe: IOException =>
+        logError("Failed to cleanup staging dir " + stagingDirPath, ioe)
+    }
+  }
+
+  private def waitForSparkContextInitialized(): SparkContext = {
+    logInfo("Waiting for spark context initialization")
+    try {
+      sparkContextRef.synchronized {
+        var count = 0
+        val waitTime = 10000L
+        val numTries = sparkConf.getInt("spark.yarn.applicationMaster.waitTries", 10)
+        while (sparkContextRef.get() == null && count < numTries && !finished) {
+          logInfo("Waiting for spark context initialization ... " + count)
+          count = count + 1
+          sparkContextRef.wait(waitTime)
+        }
+
+        val sparkContext = sparkContextRef.get()
+        if (sparkContext == null) {
+          logError(("SparkContext did not initialize after waiting for %d ms. Please check earlier"
+            + " log output for errors. Failing the application.").format(numTries * waitTime))
+        }
+        sparkContext
+      }
+    }
+  }
+
+  private def waitForSparkDriver(): ActorRef = {
+    logInfo("Waiting for Spark driver to be reachable.")
+    var driverUp = false
+    var count = 0
+    val hostport = args.userArgs(0)
+    val (driverHost, driverPort) = Utils.parseHostPort(hostport)
+
+    // spark driver should already be up since it launched us, but we don't want to
+    // wait forever, so wait 100 seconds max to match the cluster mode setting.
+    // Leave this config unpublished for now. SPARK-3779 to investigating changing
+    // this config to be time based.
+    val numTries = sparkConf.getInt("spark.yarn.applicationMaster.waitTries", 1000)
+
+    while (!driverUp && !finished && count < numTries) {
+      try {
+        count = count + 1
+        val socket = new Socket(driverHost, driverPort)
+        socket.close()
+        logInfo("Driver now available: %s:%s".format(driverHost, driverPort))
+        driverUp = true
+      } catch {
+        case e: Exception =>
+          logError("Failed to connect to driver at %s:%s, retrying ...".
+            format(driverHost, driverPort))
+          Thread.sleep(100)
+      }
+    }
+
+    if (!driverUp) {
+      throw new SparkException("Failed to connect to driver!")
+    }
+
+    sparkConf.set("spark.driver.host", driverHost)
+    sparkConf.set("spark.driver.port", driverPort.toString)
+
+    val driverUrl = "akka.tcp://%s@%s:%s/user/%s".format(
+      SparkEnv.driverActorSystemName,
+      driverHost,
+      driverPort.toString,
+      YarnSchedulerBackend.ACTOR_NAME)
+    actorSystem.actorOf(Props(new AMActor(driverUrl)), name = "YarnAM")
+  }
+
+  /** Add the Yarn IP filter that is required for properly securing the UI. */
+  private def addAmIpFilter() = {
+    val proxyBase = System.getenv(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV)
+    val amFilter = "org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter"
+    val params = client.getAmIpFilterParams(yarnConf, proxyBase)
+    if (isDriver) {
+      System.setProperty("spark.ui.filters", amFilter)
+      params.foreach { case (k, v) => System.setProperty(s"spark.$amFilter.param.$k", v) }
+    } else {
+      actor ! AddWebUIFilter(amFilter, params.toMap, proxyBase)
+    }
+  }
+
+  /**
+   * This system security manager applies to the entire process.
+   * It's main purpose is to handle the case if the user code does a System.exit.
+   * This allows us to catch that and properly set the YARN application status and
+   * cleanup if needed.
+   */
+  private def setupSystemSecurityManager(): Unit = {
+    try {
+      var stopped = false
+      System.setSecurityManager(new java.lang.SecurityManager() {
+        override def checkExit(paramInt: Int) {
+          if (!stopped) {
+            logInfo("In securityManager checkExit, exit code: " + paramInt)
+            if (paramInt == 0) {
+              finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
+            } else {
+              finish(FinalApplicationStatus.FAILED,
+                paramInt,
+                "User class exited with non-zero exit code")
+            }
+            stopped = true
+          }
+        }
+        // required for the checkExit to work properly
+        override def checkPermission(perm: java.security.Permission): Unit = {}
+      })
+    }
+    catch {
+      case e: SecurityException =>
+        finish(FinalApplicationStatus.FAILED,
+          ApplicationMaster.EXIT_SECURITY,
+          "Error in setSecurityManager")
+        logError("Error in setSecurityManager:", e)
+    }
+  }
+
+  /**
+   * Start the user class, which contains the spark driver, in a separate Thread.
+   * If the main routine exits cleanly or exits with System.exit(0) we
+   * assume it was successful, for all other cases we assume failure.
+   *
+   * Returns the user thread that was started.
+   */
+  private def startUserClass(): Thread = {
+    logInfo("Starting the user JAR in a separate Thread")
+    System.setProperty("spark.executor.instances", args.numExecutors.toString)
+    val mainMethod = Class.forName(args.userClass, false,
+      Thread.currentThread.getContextClassLoader).getMethod("main", classOf[Array[String]])
+
+    val userThread = new Thread {
+      override def run() {
+        try {
+          val mainArgs = new Array[String](args.userArgs.size)
+          args.userArgs.copyToArray(mainArgs, 0, args.userArgs.size)
+          mainMethod.invoke(null, mainArgs)
+          finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
+          logDebug("Done running users class")
+        } catch {
+          case e: InvocationTargetException =>
+            e.getCause match {
+              case _: InterruptedException =>
+                // Reporter thread can interrupt to stop user class
+              case e: Exception =>
+                finish(FinalApplicationStatus.FAILED,
+                  ApplicationMaster.EXIT_EXCEPTION_USER_CLASS,
+                  "User class threw exception: " + e.getMessage)
+                // re-throw to get it logged
+                throw e
+            }
+        }
+      }
+    }
+    userThread.setName("Driver")
+    userThread.start()
+    userThread
+  }
+
+  /**
+   * Actor that communicates with the driver in client deploy mode.
+   */
+  private class AMActor(driverUrl: String) extends Actor {
+    var driver: ActorSelection = _
+
+    override def preStart() = {
+      logInfo("Listen to driver: " + driverUrl)
+      driver = context.actorSelection(driverUrl)
+      // Send a hello message to establish the connection, after which
+      // we can monitor Lifecycle Events.
+      driver ! "Hello"
+      driver ! RegisterClusterManager
+      context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
+    }
+
+    override def receive = {
+      case x: DisassociatedEvent =>
+        logInfo(s"Driver terminated or disconnected! Shutting down. $x")
+        finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
+
+      case x: AddWebUIFilter =>
+        logInfo(s"Add WebUI Filter. $x")
+        driver ! x
+
+      case RequestExecutors(requestedTotal) =>
+        logInfo(s"Driver requested a total number of $requestedTotal executor(s).")
+        Option(allocator) match {
+          case Some(a) => a.requestTotalExecutors(requestedTotal)
+          case None => logWarning("Container allocator is not ready to request executors yet.")
+        }
+        sender ! true
+
+      case KillExecutors(executorIds) =>
+        logInfo(s"Driver requested to kill executor(s) ${executorIds.mkString(", ")}.")
+        Option(allocator) match {
+          case Some(a) => executorIds.foreach(a.killExecutor)
+          case None => logWarning("Container allocator is not ready to kill executors yet.")
+        }
+        sender ! true
+    }
+  }
+
+}
+
+object ApplicationMaster extends Logging {
+
+  val SHUTDOWN_HOOK_PRIORITY: Int = 30
+
+  // exit codes for different causes, no reason behind the values
+  private val EXIT_SUCCESS = 0
+  private val EXIT_UNCAUGHT_EXCEPTION = 10
+  private val EXIT_MAX_EXECUTOR_FAILURES = 11
+  private val EXIT_REPORTER_FAILURE = 12
+  private val EXIT_SC_NOT_INITED = 13
+  private val EXIT_SECURITY = 14
+  private val EXIT_EXCEPTION_USER_CLASS = 15
+
+  private var master: ApplicationMaster = _
+
+  def main(args: Array[String]) = {
+    SignalLogger.register(log)
+    val amArgs = new ApplicationMasterArguments(args)
+    SparkHadoopUtil.get.runAsSparkUser { () =>
+      master = new ApplicationMaster(amArgs, new YarnRMClientImpl(amArgs))
+      System.exit(master.run())
+    }
+  }
+
+  private[spark] def sparkContextInitialized(sc: SparkContext) = {
+    master.sparkContextInitialized(sc)
+  }
+
+  private[spark] def sparkContextStopped(sc: SparkContext) = {
+    master.sparkContextStopped(sc)
+  }
+
+}
+
+/**
+ * This object does not provide any special functionality. It exists so that it's easy to tell
+ * apart the client-mode AM from the cluster-mode AM when using tools such as ps or jps.
+ */
+object ExecutorLauncher {
+
+  def main(args: Array[String]) = {
+    ApplicationMaster.main(args)
+  }
+
+}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
index 25cc9016b10a6..8b32c76d14037 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.deploy.yarn
 
-import org.apache.spark.util.IntParam
+import org.apache.spark.util.{MemoryParam, IntParam}
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import collection.mutable.ArrayBuffer
 
 class ApplicationMasterArguments(val args: Array[String]) {
@@ -26,10 +27,10 @@ class ApplicationMasterArguments(val args: Array[String]) {
   var userArgs: Seq[String] = Seq[String]()
   var executorMemory = 1024
   var executorCores = 1
-  var numExecutors = 2
+  var numExecutors = DEFAULT_NUMBER_EXECUTORS
 
   parseArgs(args.toList)
-  
+
   private def parseArgs(inputArgs: List[String]): Unit = {
     val userArgsBuffer = new ArrayBuffer[String]()
 
@@ -47,7 +48,7 @@ class ApplicationMasterArguments(val args: Array[String]) {
           userClass = value
           args = tail
 
-        case ("--args") :: value :: tail =>
+        case ("--args" | "--arg") :: value :: tail =>
           userArgsBuffer += value
           args = tail
 
@@ -55,7 +56,7 @@ class ApplicationMasterArguments(val args: Array[String]) {
           numExecutors = value
           args = tail
 
-        case ("--worker-memory" | "--executor-memory") :: IntParam(value) :: tail =>
+        case ("--worker-memory" | "--executor-memory") :: MemoryParam(value) :: tail =>
           executorMemory = value
           args = tail
 
@@ -63,11 +64,6 @@ class ApplicationMasterArguments(val args: Array[String]) {
           executorCores = value
           args = tail
 
-        case Nil =>
-          if (userJar == null || userClass == null) {
-            printUsageAndExit(1)
-          }
-
         case _ =>
           printUsageAndExit(1, args)
       }
@@ -75,21 +71,26 @@ class ApplicationMasterArguments(val args: Array[String]) {
 
     userArgs = userArgsBuffer.readOnly
   }
-  
+
   def printUsageAndExit(exitCode: Int, unknownParam: Any = null) {
     if (unknownParam != null) {
       System.err.println("Unknown/unsupported param " + unknownParam)
     }
-    System.err.println(
-      "Usage: org.apache.spark.deploy.yarn.ApplicationMaster [options] \n" +
-      "Options:\n" +
-      "  --jar JAR_PATH       Path to your application's JAR file (required)\n" +
-      "  --class CLASS_NAME   Name of your application's main class (required)\n" +
-      "  --args ARGS          Arguments to be passed to your application's main class.\n" +
-      "                       Mutliple invocations are possible, each will be passed in order.\n" +
-      "  --num-executors NUM    Number of executors to start (Default: 2)\n" +
-      "  --executor-cores NUM   Number of cores for the executors (Default: 1)\n" +
-      "  --executor-memory MEM  Memory per executor (e.g. 1000M, 2G) (Default: 1G)\n")
+    System.err.println("""
+      |Usage: org.apache.spark.deploy.yarn.ApplicationMaster [options]
+      |Options:
+      |  --jar JAR_PATH       Path to your application's JAR file
+      |  --class CLASS_NAME   Name of your application's main class
+      |  --args ARGS          Arguments to be passed to your application's main class.
+      |                       Multiple invocations are possible, each will be passed in order.
+      |  --num-executors NUM    Number of executors to start (Default: 2)
+      |  --executor-cores NUM   Number of cores for the executors (Default: 1)
+      |  --executor-memory MEM  Memory per executor (e.g. 1000M, 2G) (Default: 1G)
+      """.stripMargin)
     System.exit(exitCode)
   }
 }
+
+object ApplicationMasterArguments {
+  val DEFAULT_NUMBER_EXECUTORS = 2
+}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index 62f9b3cf5ab88..4d859450efc63 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -17,15 +17,14 @@
 
 package org.apache.spark.deploy.yarn
 
-import scala.collection.mutable.{ArrayBuffer, HashMap}
+import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.SparkConf
-import org.apache.spark.scheduler.InputFormatInfo
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import org.apache.spark.util.{Utils, IntParam, MemoryParam}
 
-
 // TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware !
-class ClientArguments(val args: Array[String], val sparkConf: SparkConf) {
+private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf) {
   var addJars: String = null
   var files: String = null
   var archives: String = null
@@ -34,32 +33,63 @@ class ClientArguments(val args: Array[String], val sparkConf: SparkConf) {
   var userArgs: Seq[String] = Seq[String]()
   var executorMemory = 1024 // MB
   var executorCores = 1
-  var numExecutors = 2
-  var amQueue = sparkConf.get("QUEUE", "default")
+  var numExecutors = DEFAULT_NUMBER_EXECUTORS
+  var amQueue = sparkConf.get("spark.yarn.queue", "default")
   var amMemory: Int = 512 // MB
-  var amClass: String = "org.apache.spark.deploy.yarn.ApplicationMaster"
   var appName: String = "Spark"
-  var inputFormatInfo: List[InputFormatInfo] = null
   var priority = 0
 
-  parseArgs(args.toList)
+  // Additional memory to allocate to containers
+  // For now, use driver's memory overhead as our AM container's memory overhead
+  val amMemoryOverhead = sparkConf.getInt("spark.yarn.driver.memoryOverhead",
+    math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toInt, MEMORY_OVERHEAD_MIN))
 
-  // env variable SPARK_YARN_DIST_ARCHIVES/SPARK_YARN_DIST_FILES set in yarn-client then
-  // it should default to hdfs://
-  files = Option(files).getOrElse(sys.env.get("SPARK_YARN_DIST_FILES").orNull)
-  archives = Option(archives).getOrElse(sys.env.get("SPARK_YARN_DIST_ARCHIVES").orNull)
+  val executorMemoryOverhead = sparkConf.getInt("spark.yarn.executor.memoryOverhead",
+    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toInt, MEMORY_OVERHEAD_MIN))
 
-  // spark.yarn.dist.archives/spark.yarn.dist.files defaults to use file:// if not specified,
-  // for both yarn-client and yarn-cluster
-  files = Option(files).getOrElse(sparkConf.getOption("spark.yarn.dist.files").
-    map(p => Utils.resolveURIs(p)).orNull)
-  archives = Option(archives).getOrElse(sparkConf.getOption("spark.yarn.dist.archives").
-    map(p => Utils.resolveURIs(p)).orNull)
+  private val isDynamicAllocationEnabled =
+    sparkConf.getBoolean("spark.dynamicAllocation.enabled", false)
 
-  private def parseArgs(inputArgs: List[String]): Unit = {
-    val userArgsBuffer: ArrayBuffer[String] = new ArrayBuffer[String]()
-    val inputFormatMap: HashMap[String, InputFormatInfo] = new HashMap[String, InputFormatInfo]()
+  parseArgs(args.toList)
+  loadEnvironmentArgs()
+  validateArgs()
+
+  /** Load any default arguments provided through environment variables and Spark properties. */
+  private def loadEnvironmentArgs(): Unit = {
+    // For backward compatibility, SPARK_YARN_DIST_{ARCHIVES/FILES} should be resolved to hdfs://,
+    // while spark.yarn.dist.{archives/files} should be resolved to file:// (SPARK-2051).
+    files = Option(files)
+      .orElse(sys.env.get("SPARK_YARN_DIST_FILES"))
+      .orElse(sparkConf.getOption("spark.yarn.dist.files").map(p => Utils.resolveURIs(p)))
+      .orNull
+    archives = Option(archives)
+      .orElse(sys.env.get("SPARK_YARN_DIST_ARCHIVES"))
+      .orElse(sparkConf.getOption("spark.yarn.dist.archives").map(p => Utils.resolveURIs(p)))
+      .orNull
+    // If dynamic allocation is enabled, start at the max number of executors
+    if (isDynamicAllocationEnabled) {
+      val maxExecutorsConf = "spark.dynamicAllocation.maxExecutors"
+      if (!sparkConf.contains(maxExecutorsConf)) {
+        throw new IllegalArgumentException(
+          s"$maxExecutorsConf must be set if dynamic allocation is enabled!")
+      }
+      numExecutors = sparkConf.get(maxExecutorsConf).toInt
+    }
+  }
+
+  /**
+   * Fail fast if any arguments provided are invalid.
+   * This is intended to be called only after the provided arguments have been parsed.
+   */
+  private def validateArgs(): Unit = {
+    if (numExecutors <= 0) {
+      throw new IllegalArgumentException(
+        "You must specify at least 1 executor!\n" + getUsageMessage())
+    }
+  }
 
+  private def parseArgs(inputArgs: List[String]): Unit = {
+    val userArgsBuffer = new ArrayBuffer[String]()
     var args = inputArgs
 
     while (!args.isEmpty) {
@@ -80,10 +110,7 @@ class ClientArguments(val args: Array[String], val sparkConf: SparkConf) {
           args = tail
 
         case ("--master-class" | "--am-class") :: value :: tail =>
-          if (args(0) == "--master-class") {
-            println("--master-class is deprecated. Use --am-class instead.")
-          }
-          amClass = value
+          println(s"${args(0)} is deprecated and is not used anymore.")
           args = tail
 
         case ("--master-memory" | "--driver-memory") :: MemoryParam(value) :: tail =>
@@ -97,6 +124,11 @@ class ClientArguments(val args: Array[String], val sparkConf: SparkConf) {
           if (args(0) == "--num-workers") {
             println("--num-workers is deprecated. Use --num-executors instead.")
           }
+          // Dynamic allocation is not compatible with this option
+          if (isDynamicAllocationEnabled) {
+            throw new IllegalArgumentException("Explicitly setting the number " +
+              "of executors is not compatible with spark.dynamicAllocation.enabled!")
+          }
           numExecutors = value
           args = tail
 
@@ -135,9 +167,6 @@ class ClientArguments(val args: Array[String], val sparkConf: SparkConf) {
           args = tail
 
         case Nil =>
-          if (userClass == null) {
-            throw new IllegalArgumentException(getUsageMessage())
-          }
 
         case _ =>
           throw new IllegalArgumentException(getUsageMessage(args))
@@ -145,19 +174,16 @@ class ClientArguments(val args: Array[String], val sparkConf: SparkConf) {
     }
 
     userArgs = userArgsBuffer.readOnly
-    inputFormatInfo = inputFormatMap.values.toList
   }
 
-
-  def getUsageMessage(unknownParam: Any = null): String = {
+  private def getUsageMessage(unknownParam: List[String] = null): String = {
     val message = if (unknownParam != null) s"Unknown/unsupported param $unknownParam\n" else ""
-
     message +
       "Usage: org.apache.spark.deploy.yarn.Client [options] \n" +
       "Options:\n" +
       "  --jar JAR_PATH             Path to your application's JAR file (required in yarn-cluster mode)\n" +
       "  --class CLASS_NAME         Name of your application's main class (required)\n" +
-      "  --arg ARGS                 Argument to be passed to your application's main class.\n" +
+      "  --arg ARG                  Argument to be passed to your application's main class.\n" +
       "                             Multiple invocations are possible, each will be passed in order.\n" +
       "  --num-executors NUM        Number of executors to start (Default: 2)\n" +
       "  --executor-cores NUM       Number of cores for the executors (Default: 1).\n" +
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 556f49342977a..f95d72379171c 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -17,19 +17,19 @@
 
 package org.apache.spark.deploy.yarn
 
-import java.io.File
 import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException}
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{HashMap, ListBuffer, Map}
 import scala.util.{Try, Success, Failure}
 
+import com.google.common.base.Objects
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
 import org.apache.hadoop.mapred.Master
 import org.apache.hadoop.mapreduce.MRJobConfig
-import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.util.StringUtils
 import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
@@ -37,177 +37,116 @@ import org.apache.hadoop.yarn.api.protocolrecords._
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.util.Records
-import org.apache.spark.{SparkException, Logging, SparkConf, SparkContext}
+
+import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, SparkException}
+import org.apache.spark.util.Utils
 
 /**
- * The entry point (starting in Client#main() and Client#run()) for launching Spark on YARN. The
- * Client submits an application to the YARN ResourceManager.
- *
- * Depending on the deployment mode this will launch one of two application master classes:
- * 1. In cluster mode, it will launch an [[org.apache.spark.deploy.yarn.ApplicationMaster]]
- *      which launches a driver program inside of the cluster.
- * 2. In client mode, it will launch an [[org.apache.spark.deploy.yarn.ExecutorLauncher]] to
- *      request executors on behalf of a driver running outside of the cluster.
+ * The entry point (starting in Client#main() and Client#run()) for launching Spark on YARN.
+ * The Client submits an application to the YARN ResourceManager.
  */
-trait ClientBase extends Logging {
-  val args: ClientArguments
-  val conf: Configuration
-  val sparkConf: SparkConf
-  val yarnConf: YarnConfiguration
-  val credentials = UserGroupInformation.getCurrentUser().getCredentials()
-  private val SPARK_STAGING: String = ".sparkStaging"
+private[spark] trait ClientBase extends Logging {
+  import ClientBase._
+
+  protected val args: ClientArguments
+  protected val hadoopConf: Configuration
+  protected val sparkConf: SparkConf
+  protected val yarnConf: YarnConfiguration
+  protected val credentials = UserGroupInformation.getCurrentUser.getCredentials
+  protected val amMemoryOverhead = args.amMemoryOverhead // MB
+  protected val executorMemoryOverhead = args.executorMemoryOverhead // MB
   private val distCacheMgr = new ClientDistributedCacheManager()
+  private val isLaunchingDriver = args.userClass != null
 
-  // Staging directory is private! -> rwx--------
-  val STAGING_DIR_PERMISSION: FsPermission =
-    FsPermission.createImmutable(Integer.parseInt("700", 8).toShort)
-  // App files are world-wide readable and owner writable -> rw-r--r--
-  val APP_FILE_PERMISSION: FsPermission =
-    FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)
-
-  // Additional memory overhead - in mb.
-  protected def memoryOverhead: Int = sparkConf.getInt("spark.yarn.driver.memoryOverhead",
-    YarnAllocationHandler.MEMORY_OVERHEAD)
-
-  // TODO(harvey): This could just go in ClientArguments.
-  def validateArgs() = {
-    Map(
-      ((args.userJar == null && args.amClass == classOf[ApplicationMaster].getName) ->
-          "Error: You must specify a user jar when running in standalone mode!"),
-      (args.userClass == null) -> "Error: You must specify a user class!",
-      (args.numExecutors <= 0) -> "Error: You must specify at least 1 executor!",
-      (args.amMemory <= memoryOverhead) -> ("Error: AM memory size must be" +
-        "greater than: " + memoryOverhead),
-      (args.executorMemory <= memoryOverhead) -> ("Error: Executor memory size" +
-        "must be greater than: " + memoryOverhead.toString)
-    ).foreach { case(cond, errStr) =>
-      if (cond) {
-        logError(errStr)
-        throw new IllegalArgumentException(args.getUsageMessage())
-      }
-    }
-  }
-
-  def getAppStagingDir(appId: ApplicationId): String = {
-    SPARK_STAGING + Path.SEPARATOR + appId.toString() + Path.SEPARATOR
-  }
-
-  def verifyClusterResources(app: GetNewApplicationResponse) = {
-    val maxMem = app.getMaximumResourceCapability().getMemory()
-    logInfo("Max mem capabililty of a single resource in this cluster " + maxMem)
-
-    // If we have requested more then the clusters max for a single resource then exit.
-    if (args.executorMemory > maxMem) {
-      val errorMessage =
-        "Required executor memory (%d MB), is above the max threshold (%d MB) of this cluster."
-          .format(args.executorMemory, maxMem)
-
-      logError(errorMessage)
-      throw new IllegalArgumentException(errorMessage)
-    }
-    val amMem = args.amMemory + memoryOverhead
+  /**
+   * Fail fast if we have requested more resources per container than is available in the cluster.
+   */
+  protected def verifyClusterResources(newAppResponse: GetNewApplicationResponse): Unit = {
+    val maxMem = newAppResponse.getMaximumResourceCapability().getMemory()
+    logInfo("Verifying our application has not requested more than the maximum " +
+      s"memory capability of the cluster ($maxMem MB per container)")
+    val executorMem = args.executorMemory + executorMemoryOverhead
+    if (executorMem > maxMem) {
+      throw new IllegalArgumentException(s"Required executor memory (${args.executorMemory}" +
+        s"+$executorMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster!")
+    }
+    val amMem = args.amMemory + amMemoryOverhead
     if (amMem > maxMem) {
-
-      val errorMessage = "Required AM memory (%d) is above the max threshold (%d) of this cluster."
-        .format(args.amMemory, maxMem)
-      logError(errorMessage)
-      throw new IllegalArgumentException(errorMessage)
+      throw new IllegalArgumentException(s"Required AM memory (${args.amMemory}" +
+        s"+$amMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster!")
     }
+    logInfo("Will allocate AM container, with %d MB memory including %d MB overhead".format(
+      amMem,
+      amMemoryOverhead))
 
     // We could add checks to make sure the entire cluster has enough resources but that involves
     // getting all the node reports and computing ourselves.
   }
 
-  /** See if two file systems are the same or not. */
-  private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = {
-    val srcUri = srcFs.getUri()
-    val dstUri = destFs.getUri()
-    if (srcUri.getScheme() == null) {
-      return false
-    }
-    if (!srcUri.getScheme().equals(dstUri.getScheme())) {
-      return false
-    }
-    var srcHost = srcUri.getHost()
-    var dstHost = dstUri.getHost()
-    if ((srcHost != null) && (dstHost != null)) {
-      try {
-        srcHost = InetAddress.getByName(srcHost).getCanonicalHostName()
-        dstHost = InetAddress.getByName(dstHost).getCanonicalHostName()
-      } catch {
-        case e: UnknownHostException =>
-          return false
-      }
-      if (!srcHost.equals(dstHost)) {
-        return false
-      }
-    } else if (srcHost == null && dstHost != null) {
-      return false
-    } else if (srcHost != null && dstHost == null) {
-      return false
-    }
-    if (srcUri.getPort() != dstUri.getPort()) {
-      false
-    } else {
-      true
-    }
-  }
-
-  /** Copy the file into HDFS if needed. */
-  private[yarn] def copyRemoteFile(
-      dstDir: Path,
-      originalPath: Path,
+  /**
+   * Copy the given file to a remote file system (e.g. HDFS) if needed.
+   * The file is only copied if the source and destination file systems are different. This is used
+   * for preparing resources for launching the ApplicationMaster container. Exposed for testing.
+   */
+  def copyFileToRemote(
+      destDir: Path,
+      srcPath: Path,
       replication: Short,
       setPerms: Boolean = false): Path = {
-    val fs = FileSystem.get(conf)
-    val remoteFs = originalPath.getFileSystem(conf)
-    var newPath = originalPath
-    if (!compareFs(remoteFs, fs)) {
-      newPath = new Path(dstDir, originalPath.getName())
-      logInfo("Uploading " + originalPath + " to " + newPath)
-      FileUtil.copy(remoteFs, originalPath, fs, newPath, false, conf)
-      fs.setReplication(newPath, replication)
-      if (setPerms) fs.setPermission(newPath, new FsPermission(APP_FILE_PERMISSION))
+    val destFs = destDir.getFileSystem(hadoopConf)
+    val srcFs = srcPath.getFileSystem(hadoopConf)
+    var destPath = srcPath
+    if (!compareFs(srcFs, destFs)) {
+      destPath = new Path(destDir, srcPath.getName())
+      logInfo(s"Uploading resource $srcPath -> $destPath")
+      FileUtil.copy(srcFs, srcPath, destFs, destPath, false, hadoopConf)
+      destFs.setReplication(destPath, replication)
+      if (setPerms) {
+        destFs.setPermission(destPath, new FsPermission(APP_FILE_PERMISSION))
+      }
+    } else {
+      logInfo(s"Source and destination file systems are the same. Not copying $srcPath")
     }
     // Resolve any symlinks in the URI path so using a "current" symlink to point to a specific
     // version shows the specific version in the distributed cache configuration
-    val qualPath = fs.makeQualified(newPath)
-    val fc = FileContext.getFileContext(qualPath.toUri(), conf)
-    val destPath = fc.resolvePath(qualPath)
-    destPath
+    val qualifiedDestPath = destFs.makeQualified(destPath)
+    val fc = FileContext.getFileContext(qualifiedDestPath.toUri(), hadoopConf)
+    fc.resolvePath(qualifiedDestPath)
   }
 
-  private def qualifyForLocal(localURI: URI): Path = {
-    var qualifiedURI = localURI
-    // If not specified, assume these are in the local filesystem to keep behavior like Hadoop
-    if (qualifiedURI.getScheme() == null) {
-      qualifiedURI = new URI(FileSystem.getLocal(conf).makeQualified(new Path(qualifiedURI)).toString)
-    }
+  /**
+   * Given a local URI, resolve it and return a qualified local path that corresponds to the URI.
+   * This is used for preparing local resources to be included in the container launch context.
+   */
+  private def getQualifiedLocalPath(localURI: URI): Path = {
+    val qualifiedURI =
+      if (localURI.getScheme == null) {
+        // If not specified, assume this is in the local filesystem to keep the behavior
+        // consistent with that of Hadoop
+        new URI(FileSystem.getLocal(hadoopConf).makeQualified(new Path(localURI)).toString)
+      } else {
+        localURI
+      }
     new Path(qualifiedURI)
   }
 
+  /**
+   * Upload any resources to the distributed cache if needed. If a resource is intended to be
+   * consumed locally, set up the appropriate config for downstream code to handle it properly.
+   * This is used for setting up a container launch context for our ApplicationMaster.
+   * Exposed for testing.
+   */
   def prepareLocalResources(appStagingDir: String): HashMap[String, LocalResource] = {
-    logInfo("Preparing Local resources")
-    // Upload Spark and the application JAR to the remote file system if necessary. Add them as
-    // local resources to the application master.
-    val fs = FileSystem.get(conf)
-
-    val delegTokenRenewer = Master.getMasterPrincipal(conf)
-    if (UserGroupInformation.isSecurityEnabled()) {
-      if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
-        val errorMessage = "Can't get Master Kerberos principal for use as renewer"
-        logError(errorMessage)
-        throw new SparkException(errorMessage)
-      }
-    }
+    logInfo("Preparing resources for our AM container")
+    // Upload Spark and the application JAR to the remote file system if necessary,
+    // and add them as local resources to the application master.
+    val fs = FileSystem.get(hadoopConf)
     val dst = new Path(fs.getHomeDirectory(), appStagingDir)
-    val replication = sparkConf.getInt("spark.yarn.submit.file.replication", 3).toShort
-
-    if (UserGroupInformation.isSecurityEnabled()) {
-      val dstFs = dst.getFileSystem(conf)
-      dstFs.addDelegationTokens(delegTokenRenewer, credentials)
-    }
+    val nns = getNameNodesToAccess(sparkConf) + dst
+    obtainTokensForNamenodes(nns, hadoopConf, credentials)
 
+    val replication = sparkConf.getInt("spark.yarn.submit.file.replication",
+      fs.getDefaultReplication(dst)).toShort
     val localResources = HashMap[String, LocalResource]()
     FileSystem.mkdirs(fs, dst, new FsPermission(STAGING_DIR_PERMISSION))
 
@@ -221,64 +160,84 @@ trait ClientBase extends Logging {
         "for alternatives.")
     }
 
+    /**
+     * Copy the given main resource to the distributed cache if the scheme is not "local".
+     * Otherwise, set the corresponding key in our SparkConf to handle it downstream.
+     * Each resource is represented by a 4-tuple of:
+     *   (1) destination resource name,
+     *   (2) local path to the resource,
+     *   (3) Spark property key to set if the scheme is not local, and
+     *   (4) whether to set permissions for this resource
+     */
     List(
-      (ClientBase.SPARK_JAR, ClientBase.sparkJar(sparkConf), ClientBase.CONF_SPARK_JAR),
-      (ClientBase.APP_JAR, args.userJar, ClientBase.CONF_SPARK_USER_JAR),
-      ("log4j.properties", oldLog4jConf.getOrElse(null), null)
-    ).foreach { case(destName, _localPath, confKey) =>
+      (SPARK_JAR, sparkJar(sparkConf), CONF_SPARK_JAR, false),
+      (APP_JAR, args.userJar, CONF_SPARK_USER_JAR, true),
+      ("log4j.properties", oldLog4jConf.orNull, null, false)
+    ).foreach { case (destName, _localPath, confKey, setPermissions) =>
       val localPath: String = if (_localPath != null) _localPath.trim() else ""
-      if (! localPath.isEmpty()) {
+      if (!localPath.isEmpty()) {
         val localURI = new URI(localPath)
-        if (!ClientBase.LOCAL_SCHEME.equals(localURI.getScheme())) {
-          val setPermissions = if (destName.equals(ClientBase.APP_JAR)) true else false
-          val destPath = copyRemoteFile(dst, qualifyForLocal(localURI), replication, setPermissions)
-          distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE,
-            destName, statCache)
+        if (localURI.getScheme != LOCAL_SCHEME) {
+          val src = getQualifiedLocalPath(localURI)
+          val destPath = copyFileToRemote(dst, src, replication, setPermissions)
+          val destFs = FileSystem.get(destPath.toUri(), hadoopConf)
+          distCacheMgr.addResource(destFs, hadoopConf, destPath,
+            localResources, LocalResourceType.FILE, destName, statCache)
         } else if (confKey != null) {
+          // If the resource is intended for local use only, handle this downstream
+          // by setting the appropriate property
           sparkConf.set(confKey, localPath)
         }
       }
     }
 
+    /**
+     * Do the same for any additional resources passed in through ClientArguments.
+     * Each resource category is represented by a 3-tuple of:
+     *   (1) comma separated list of resources in this category,
+     *   (2) resource type, and
+     *   (3) whether to add these resources to the classpath
+     */
     val cachedSecondaryJarLinks = ListBuffer.empty[String]
-    val fileLists = List( (args.addJars, LocalResourceType.FILE, true),
+    List(
+      (args.addJars, LocalResourceType.FILE, true),
       (args.files, LocalResourceType.FILE, false),
-      (args.archives, LocalResourceType.ARCHIVE, false) )
-    fileLists.foreach { case (flist, resType, addToClasspath) =>
+      (args.archives, LocalResourceType.ARCHIVE, false)
+    ).foreach { case (flist, resType, addToClasspath) =>
       if (flist != null && !flist.isEmpty()) {
-        flist.split(',').foreach { case file: String =>
+        flist.split(',').foreach { file =>
           val localURI = new URI(file.trim())
-          if (!ClientBase.LOCAL_SCHEME.equals(localURI.getScheme())) {
+          if (localURI.getScheme != LOCAL_SCHEME) {
             val localPath = new Path(localURI)
             val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName())
-            val destPath = copyRemoteFile(dst, localPath, replication)
-            distCacheMgr.addResource(fs, conf, destPath, localResources, resType,
-              linkname, statCache)
+            val destPath = copyFileToRemote(dst, localPath, replication)
+            distCacheMgr.addResource(
+              fs, hadoopConf, destPath, localResources, resType, linkname, statCache)
             if (addToClasspath) {
               cachedSecondaryJarLinks += linkname
             }
           } else if (addToClasspath) {
+            // Resource is intended for local use only and should be added to the class path
             cachedSecondaryJarLinks += file.trim()
           }
         }
       }
     }
-    logInfo("Prepared Local resources " + localResources)
-    sparkConf.set(ClientBase.CONF_SPARK_YARN_SECONDARY_JARS, cachedSecondaryJarLinks.mkString(","))
+    if (cachedSecondaryJarLinks.nonEmpty) {
+      sparkConf.set(CONF_SPARK_YARN_SECONDARY_JARS, cachedSecondaryJarLinks.mkString(","))
+    }
 
-    UserGroupInformation.getCurrentUser().addCredentials(credentials)
     localResources
   }
 
-  def setupLaunchEnv(
-      localResources: HashMap[String, LocalResource],
-      stagingDir: String): HashMap[String, String] = {
-    logInfo("Setting up the launch environment")
-
+  /**
+   * Set up the environment for launching our ApplicationMaster container.
+   */
+  private def setupLaunchEnv(stagingDir: String): HashMap[String, String] = {
+    logInfo("Setting up the launch environment for our AM container")
     val env = new HashMap[String, String]()
-
     val extraCp = sparkConf.getOption("spark.driver.extraClassPath")
-    ClientBase.populateClasspath(args, yarnConf, sparkConf, env, extraCp)
+    populateClasspath(args, yarnConf, sparkConf, env, extraCp)
     env("SPARK_YARN_MODE") = "true"
     env("SPARK_YARN_STAGING_DIR") = stagingDir
     env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
@@ -287,37 +246,20 @@ trait ClientBase extends Logging {
     distCacheMgr.setDistFilesEnv(env)
     distCacheMgr.setDistArchivesEnv(env)
 
+    // Pick up any environment variables for the AM provided through spark.yarn.appMasterEnv.*
+    val amEnvPrefix = "spark.yarn.appMasterEnv."
+    sparkConf.getAll
+      .filter { case (k, v) => k.startsWith(amEnvPrefix) }
+      .map { case (k, v) => (k.substring(amEnvPrefix.length), v) }
+      .foreach { case (k, v) => YarnSparkHadoopUtil.addPathToEnvironment(env, k, v) }
+
+    // Keep this for backwards compatibility but users should move to the config
     sys.env.get("SPARK_YARN_USER_ENV").foreach { userEnvs =>
       // Allow users to specify some environment variables.
-      YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs, File.pathSeparator)
-
+      YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs)
       // Pass SPARK_YARN_USER_ENV itself to the AM so it can use it to set up executor environments.
       env("SPARK_YARN_USER_ENV") = userEnvs
     }
-    env
-  }
-
-  def userArgsToString(clientArgs: ClientArguments): String = {
-    val prefix = " --args "
-    val args = clientArgs.userArgs
-    val retval = new StringBuilder()
-    for (arg <- args) {
-      retval.append(prefix).append(" '").append(arg).append("' ")
-    }
-    retval.toString
-  }
-
-  def calculateAMMemory(newApp: GetNewApplicationResponse): Int
-
-  def setupSecurityToken(amContainer: ContainerLaunchContext)
-
-  def createContainerLaunchContext(
-        newApp: GetNewApplicationResponse,
-        localResources: HashMap[String, LocalResource],
-        env: HashMap[String, String]): ContainerLaunchContext = {
-    logInfo("Setting up container launch context")
-    val amContainer = Records.newRecord(classOf[ContainerLaunchContext])
-    amContainer.setLocalResources(localResources)
 
     // In cluster mode, if the deprecated SPARK_JAVA_OPTS is set, we need to propagate it to
     // executors. But we can't just set spark.executor.extraJavaOptions, because the driver's
@@ -327,7 +269,7 @@ trait ClientBase extends Logging {
     // Note that to warn the user about the deprecation in cluster mode, some code from
     // SparkConf#validateSettings() is duplicated here (to avoid triggering the condition
     // described above).
-    if (args.amClass == classOf[ApplicationMaster].getName) {
+    if (isLaunchingDriver) {
       sys.env.get("SPARK_JAVA_OPTS").foreach { value =>
         val warning =
           s"""
@@ -349,14 +291,34 @@ trait ClientBase extends Logging {
         env("SPARK_JAVA_OPTS") = value
       }
     }
-    amContainer.setEnvironment(env)
 
-    val amMemory = calculateAMMemory(newApp)
+    env
+  }
+
+  /**
+   * Set up a ContainerLaunchContext to launch our ApplicationMaster container.
+   * This sets up the launch environment, java options, and the command for launching the AM.
+   */
+  protected def createContainerLaunchContext(newAppResponse: GetNewApplicationResponse)
+      : ContainerLaunchContext = {
+    logInfo("Setting up container launch context for our AM")
+
+    val appId = newAppResponse.getApplicationId
+    val appStagingDir = getAppStagingDir(appId)
+    val localResources = prepareLocalResources(appStagingDir)
+    val launchEnv = setupLaunchEnv(appStagingDir)
+    val amContainer = Records.newRecord(classOf[ContainerLaunchContext])
+    amContainer.setLocalResources(localResources)
+    amContainer.setEnvironment(launchEnv)
 
     val javaOpts = ListBuffer[String]()
 
+    // Set the environment variable through a command prefix
+    // to append to the existing value of the variable
+    var prefixEnv: Option[String] = None
+
     // Add Xmx for AM memory
-    javaOpts += "-Xmx" + amMemory + "m"
+    javaOpts += "-Xmx" + args.amMemory + "m"
 
     val tmpDir = new Path(Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR)
     javaOpts += "-Djava.io.tmpdir=" + tmpDir
@@ -368,8 +330,7 @@ trait ClientBase extends Logging {
     // Instead of using this, rely on cpusets by YARN to enforce "proper" Spark behavior in
     // multi-tenant environments. Not sure how default Java GC behaves if it is limited to subset
     // of cores on a node.
-    val useConcurrentAndIncrementalGC = env.isDefinedAt("SPARK_USE_CONC_INCR_GC") &&
-      java.lang.Boolean.parseBoolean(env("SPARK_USE_CONC_INCR_GC"))
+    val useConcurrentAndIncrementalGC = launchEnv.get("SPARK_USE_CONC_INCR_GC").exists(_.toBoolean)
     if (useConcurrentAndIncrementalGC) {
       // In our expts, using (default) throughput collector has severe perf ramifications in
       // multi-tenant machines
@@ -384,58 +345,223 @@ trait ClientBase extends Logging {
     // TODO: it might be nicer to pass these as an internal environment variable rather than
     // as Java options, due to complications with string parsing of nested quotes.
     for ((k, v) <- sparkConf.getAll) {
-      javaOpts += "-D" + k + "=" + "\\\"" + v + "\\\""
+      javaOpts += YarnSparkHadoopUtil.escapeForShell(s"-D$k=$v")
     }
 
-    if (args.amClass == classOf[ApplicationMaster].getName) {
+    // Include driver-specific java options if we are launching a driver
+    if (isLaunchingDriver) {
       sparkConf.getOption("spark.driver.extraJavaOptions")
         .orElse(sys.env.get("SPARK_JAVA_OPTS"))
         .foreach(opts => javaOpts += opts)
-      sparkConf.getOption("spark.driver.libraryPath")
-        .foreach(p => javaOpts += s"-Djava.library.path=$p")
+      val libraryPaths = Seq(sys.props.get("spark.driver.extraLibraryPath"),
+        sys.props.get("spark.driver.libraryPath")).flatten
+      if (libraryPaths.nonEmpty) {
+        prefixEnv = Some(Utils.libraryPathEnvPrefix(libraryPaths))
+      }
     }
 
-    // Command for the ApplicationMaster
-    val commands = Seq(Environment.JAVA_HOME.$() + "/bin/java", "-server") ++
-      javaOpts ++
-      Seq(args.amClass, "--class", args.userClass, "--jar ", args.userJar,
-        userArgsToString(args),
-        "--executor-memory", args.executorMemory.toString,
+    // For log4j configuration to reference
+    javaOpts += ("-Dspark.yarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR)
+
+    val userClass =
+      if (isLaunchingDriver) {
+        Seq("--class", YarnSparkHadoopUtil.escapeForShell(args.userClass))
+      } else {
+        Nil
+      }
+    val userJar =
+      if (args.userJar != null) {
+        Seq("--jar", args.userJar)
+      } else {
+        Nil
+      }
+    val amClass =
+      if (isLaunchingDriver) {
+        Class.forName("org.apache.spark.deploy.yarn.ApplicationMaster").getName
+      } else {
+        Class.forName("org.apache.spark.deploy.yarn.ExecutorLauncher").getName
+      }
+    val userArgs = args.userArgs.flatMap { arg =>
+      Seq("--arg", YarnSparkHadoopUtil.escapeForShell(arg))
+    }
+    val amArgs =
+      Seq(amClass) ++ userClass ++ userJar ++ userArgs ++
+      Seq(
+        "--executor-memory", args.executorMemory.toString + "m",
         "--executor-cores", args.executorCores.toString,
-        "--num-executors ", args.numExecutors.toString,
+        "--num-executors ", args.numExecutors.toString)
+
+    // Command for the ApplicationMaster
+    val commands = prefixEnv ++ Seq(Environment.JAVA_HOME.$() + "/bin/java", "-server") ++
+      javaOpts ++ amArgs ++
+      Seq(
         "1>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout",
         "2>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")
 
-    logInfo("Yarn AM launch context:")
-    logInfo(s"  class:   ${args.amClass}")
-    logInfo(s"  env:     $env")
-    logInfo(s"  command: ${commands.mkString(" ")}")
-
     // TODO: it would be nicer to just make sure there are no null commands here
     val printableCommands = commands.map(s => if (s == null) "null" else s).toList
     amContainer.setCommands(printableCommands)
 
+    logDebug("===============================================================================")
+    logDebug("Yarn AM launch context:")
+    logDebug(s"    user class: ${Option(args.userClass).getOrElse("N/A")}")
+    logDebug("    env:")
+    launchEnv.foreach { case (k, v) => logDebug(s"        $k -> $v") }
+    logDebug("    resources:")
+    localResources.foreach { case (k, v) => logDebug(s"        $k -> $v")}
+    logDebug("    command:")
+    logDebug(s"        ${printableCommands.mkString(" ")}")
+    logDebug("===============================================================================")
+
+    // send the acl settings into YARN to control who has access via YARN interfaces
+    val securityManager = new SecurityManager(sparkConf)
+    amContainer.setApplicationACLs(YarnSparkHadoopUtil.getApplicationAclsForYarn(securityManager))
     setupSecurityToken(amContainer)
+    UserGroupInformation.getCurrentUser().addCredentials(credentials)
+
     amContainer
   }
+
+  /**
+   * Report the state of an application until it has exited, either successfully or
+   * due to some failure, then return a pair of the yarn application state (FINISHED, FAILED,
+   * KILLED, or RUNNING) and the final application state (UNDEFINED, SUCCEEDED, FAILED,
+   * or KILLED).
+   *
+   * @param appId ID of the application to monitor.
+   * @param returnOnRunning Whether to also return the application state when it is RUNNING.
+   * @param logApplicationReport Whether to log details of the application report every iteration.
+   * @return A pair of the yarn application state and the final application state.
+   */
+  def monitorApplication(
+      appId: ApplicationId,
+      returnOnRunning: Boolean = false,
+      logApplicationReport: Boolean = true): (YarnApplicationState, FinalApplicationStatus) = {
+    val interval = sparkConf.getLong("spark.yarn.report.interval", 1000)
+    var lastState: YarnApplicationState = null
+    while (true) {
+      Thread.sleep(interval)
+      val report = getApplicationReport(appId)
+      val state = report.getYarnApplicationState
+
+      if (logApplicationReport) {
+        logInfo(s"Application report for $appId (state: $state)")
+        val details = Seq[(String, String)](
+          ("client token", getClientToken(report)),
+          ("diagnostics", report.getDiagnostics),
+          ("ApplicationMaster host", report.getHost),
+          ("ApplicationMaster RPC port", report.getRpcPort.toString),
+          ("queue", report.getQueue),
+          ("start time", report.getStartTime.toString),
+          ("final status", report.getFinalApplicationStatus.toString),
+          ("tracking URL", report.getTrackingUrl),
+          ("user", report.getUser)
+        )
+
+        // Use more loggable format if value is null or empty
+        val formattedDetails = details
+          .map { case (k, v) =>
+            val newValue = Option(v).filter(_.nonEmpty).getOrElse("N/A")
+            s"\n\t $k: $newValue" }
+          .mkString("")
+
+        // If DEBUG is enabled, log report details every iteration
+        // Otherwise, log them every time the application changes state
+        if (log.isDebugEnabled) {
+          logDebug(formattedDetails)
+        } else if (lastState != state) {
+          logInfo(formattedDetails)
+        }
+      }
+
+      if (state == YarnApplicationState.FINISHED ||
+        state == YarnApplicationState.FAILED ||
+        state == YarnApplicationState.KILLED) {
+        return (state, report.getFinalApplicationStatus)
+      }
+
+      if (returnOnRunning && state == YarnApplicationState.RUNNING) {
+        return (state, report.getFinalApplicationStatus)
+      }
+
+      lastState = state
+    }
+
+    // Never reached, but keeps compiler happy
+    throw new SparkException("While loop is depleted! This should never happen...")
+  }
+
+  /**
+   * Submit an application to the ResourceManager and monitor its state.
+   * This continues until the application has exited for any reason.
+   * If the application finishes with a failed, killed, or undefined status,
+   * throw an appropriate SparkException.
+   */
+  def run(): Unit = {
+    val (yarnApplicationState, finalApplicationStatus) = monitorApplication(submitApplication())
+    if (yarnApplicationState == YarnApplicationState.FAILED ||
+      finalApplicationStatus == FinalApplicationStatus.FAILED) {
+      throw new SparkException("Application finished with failed status")
+    }
+    if (yarnApplicationState == YarnApplicationState.KILLED ||
+      finalApplicationStatus == FinalApplicationStatus.KILLED) {
+      throw new SparkException("Application is killed")
+    }
+    if (finalApplicationStatus == FinalApplicationStatus.UNDEFINED) {
+      throw new SparkException("The final status of application is undefined")
+    }
+  }
+
+  /* --------------------------------------------------------------------------------------- *
+   |  Methods that cannot be implemented here due to API differences across hadoop versions  |
+   * --------------------------------------------------------------------------------------- */
+
+  /** Submit an application running our ApplicationMaster to the ResourceManager. */
+  def submitApplication(): ApplicationId
+
+  /** Set up security tokens for launching our ApplicationMaster container. */
+  protected def setupSecurityToken(containerContext: ContainerLaunchContext): Unit
+
+  /** Get the application report from the ResourceManager for an application we have submitted. */
+  protected def getApplicationReport(appId: ApplicationId): ApplicationReport
+
+  /**
+   * Return the security token used by this client to communicate with the ApplicationMaster.
+   * If no security is enabled, the token returned by the report is null.
+   */
+  protected def getClientToken(report: ApplicationReport): String
 }
 
-object ClientBase extends Logging {
+private[spark] object ClientBase extends Logging {
+
+  // Alias for the Spark assembly jar and the user jar
   val SPARK_JAR: String = "__spark__.jar"
   val APP_JAR: String = "__app__.jar"
+
+  // URI scheme that identifies local resources
   val LOCAL_SCHEME = "local"
+
+  // Staging directory for any temporary jars or files
+  val SPARK_STAGING: String = ".sparkStaging"
+
+  // Location of any user-defined Spark jars
   val CONF_SPARK_JAR = "spark.yarn.jar"
-  /**
-   * This is an internal config used to propagate the location of the user's jar file to the
-   * driver/executors.
-   */
+  val ENV_SPARK_JAR = "SPARK_JAR"
+
+  // Internal config to propagate the location of the user's jar to the driver/executors
   val CONF_SPARK_USER_JAR = "spark.yarn.user.jar"
-  /**
-   * This is an internal config used to propagate the list of extra jars to add to the classpath
-   * of executors.
-   */
+
+  // Internal config to propagate the locations of any extra jars to add to the classpath
+  // of the executors
   val CONF_SPARK_YARN_SECONDARY_JARS = "spark.yarn.secondary.jars"
-  val ENV_SPARK_JAR = "SPARK_JAR"
+
+  // Staging directory is private! -> rwx--------
+  val STAGING_DIR_PERMISSION: FsPermission =
+    FsPermission.createImmutable(Integer.parseInt("700", 8).toShort)
+
+  // App files are world-wide readable and owner writable -> rw-r--r--
+  val APP_FILE_PERMISSION: FsPermission =
+    FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)
 
   /**
    * Find the user-defined Spark jar if configured, or return the jar containing this
@@ -444,7 +570,7 @@ object ClientBase extends Logging {
    * This method first looks in the SparkConf object for the CONF_SPARK_JAR key, and in the
    * user environment if that is not found (for backwards compatibility).
    */
-  def sparkJar(conf: SparkConf) = {
+  private def sparkJar(conf: SparkConf): String = {
     if (conf.contains(CONF_SPARK_JAR)) {
       conf.get(CONF_SPARK_JAR)
     } else if (System.getenv(ENV_SPARK_JAR) != null) {
@@ -457,16 +583,22 @@ object ClientBase extends Logging {
     }
   }
 
-  def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String]) = {
+  /**
+   * Return the path to the given application's staging directory.
+   */
+  private def getAppStagingDir(appId: ApplicationId): String = {
+    SPARK_STAGING + Path.SEPARATOR + appId.toString() + Path.SEPARATOR
+  }
+
+  /**
+   * Populate the classpath entry in the given environment map with any application
+   * classpath specified through the Hadoop and Yarn configurations.
+   */
+  def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String]): Unit = {
     val classPathElementsToAdd = getYarnAppClasspath(conf) ++ getMRAppClasspath(conf)
     for (c <- classPathElementsToAdd.flatten) {
-      YarnSparkHadoopUtil.addToEnvironment(
-        env,
-        Environment.CLASSPATH.name,
-        c.trim,
-        File.pathSeparator)
+      YarnSparkHadoopUtil.addPathToEnvironment(env, Environment.CLASSPATH.name, c.trim)
     }
-    classPathElementsToAdd
   }
 
   private def getYarnAppClasspath(conf: Configuration): Option[Seq[String]] =
@@ -502,7 +634,7 @@ object ClientBase extends Logging {
 
   /**
    * In Hadoop 0.23, the MR application classpath comes with the YARN application
-   * classpath.  In Hadoop 2.0, it's an array of Strings, and in 2.2+ it's a String.
+   * classpath. In Hadoop 2.0, it's an array of Strings, and in 2.2+ it's a String.
    * So we need to use reflection to retrieve it.
    */
   def getDefaultMRApplicationClasspath: Option[Seq[String]] = {
@@ -528,8 +660,16 @@ object ClientBase extends Logging {
     triedDefault.toOption
   }
 
-  def populateClasspath(args: ClientArguments, conf: Configuration, sparkConf: SparkConf,
-      env: HashMap[String, String], extraClassPath: Option[String] = None) {
+  /**
+   * Populate the classpath entry in the given environment map.
+   * This includes the user jar, Spark jar, and any extra application jars.
+   */
+  def populateClasspath(
+      args: ClientArguments,
+      conf: Configuration,
+      sparkConf: SparkConf,
+      env: HashMap[String, String],
+      extraClassPath: Option[String] = None): Unit = {
     extraClassPath.foreach(addClasspathEntry(_, env))
     addClasspathEntry(Environment.PWD.$(), env)
 
@@ -537,36 +677,40 @@ object ClientBase extends Logging {
     if (sparkConf.get("spark.yarn.user.classpath.first", "false").toBoolean) {
       addUserClasspath(args, sparkConf, env)
       addFileToClasspath(sparkJar(sparkConf), SPARK_JAR, env)
-      ClientBase.populateHadoopClasspath(conf, env)
+      populateHadoopClasspath(conf, env)
     } else {
       addFileToClasspath(sparkJar(sparkConf), SPARK_JAR, env)
-      ClientBase.populateHadoopClasspath(conf, env)
+      populateHadoopClasspath(conf, env)
       addUserClasspath(args, sparkConf, env)
     }
 
     // Append all jar files under the working directory to the classpath.
-    addClasspathEntry(Environment.PWD.$() + Path.SEPARATOR + "*", env);
+    addClasspathEntry(Environment.PWD.$() + Path.SEPARATOR + "*", env)
   }
 
   /**
    * Adds the user jars which have local: URIs (or alternate names, such as APP_JAR) explicitly
    * to the classpath.
    */
-  private def addUserClasspath(args: ClientArguments, conf: SparkConf,
-      env: HashMap[String, String]) = {
-    if (args != null) {
-      addFileToClasspath(args.userJar, APP_JAR, env)
-      if (args.addJars != null) {
-        args.addJars.split(",").foreach { case file: String =>
-          addFileToClasspath(file, null, env)
-        }
+  private def addUserClasspath(
+      args: ClientArguments,
+      conf: SparkConf,
+      env: HashMap[String, String]): Unit = {
+
+    // If `args` is not null, we are launching an AM container.
+    // Otherwise, we are launching executor containers.
+    val (mainJar, secondaryJars) =
+      if (args != null) {
+        (args.userJar, args.addJars)
+      } else {
+        (conf.get(CONF_SPARK_USER_JAR, null), conf.get(CONF_SPARK_YARN_SECONDARY_JARS, null))
       }
-    } else {
-      val userJar = conf.get(CONF_SPARK_USER_JAR, null)
-      addFileToClasspath(userJar, APP_JAR, env)
 
-      val cachedSecondaryJarLinks = conf.get(CONF_SPARK_YARN_SECONDARY_JARS, "").split(",")
-      cachedSecondaryJarLinks.foreach(jar => addFileToClasspath(jar, null, env))
+    addFileToClasspath(mainJar, APP_JAR, env)
+    if (secondaryJars != null) {
+      secondaryJars.split(",").filter(_.nonEmpty).foreach { jar =>
+        addFileToClasspath(jar, null, env)
+      }
     }
   }
 
@@ -582,35 +726,98 @@ object ClientBase extends Logging {
    * @param fileName  Alternate name for the file (optional).
    * @param env       Map holding the environment variables.
    */
-  private def addFileToClasspath(path: String, fileName: String,
-      env: HashMap[String, String]) : Unit = {
+  private def addFileToClasspath(
+      path: String,
+      fileName: String,
+      env: HashMap[String, String]): Unit = {
     if (path != null) {
       scala.util.control.Exception.ignoring(classOf[URISyntaxException]) {
-        val localPath = getLocalPath(path)
-        if (localPath != null) {
-          addClasspathEntry(localPath, env)
+        val uri = new URI(path)
+        if (uri.getScheme == LOCAL_SCHEME) {
+          addClasspathEntry(uri.getPath, env)
           return
         }
       }
     }
     if (fileName != null) {
-      addClasspathEntry(Environment.PWD.$() + Path.SEPARATOR + fileName, env);
+      addClasspathEntry(Environment.PWD.$() + Path.SEPARATOR + fileName, env)
     }
   }
 
   /**
-   * Returns the local path if the URI is a "local:" URI, or null otherwise.
+   * Add the given path to the classpath entry of the given environment map.
+   * If the classpath is already set, this appends the new path to the existing classpath.
    */
-  private def getLocalPath(resource: String): String = {
-    val uri = new URI(resource)
-    if (LOCAL_SCHEME.equals(uri.getScheme())) {
-      return uri.getPath()
+  private def addClasspathEntry(path: String, env: HashMap[String, String]): Unit =
+    YarnSparkHadoopUtil.addPathToEnvironment(env, Environment.CLASSPATH.name, path)
+
+  /**
+   * Get the list of namenodes the user may access.
+   */
+  def getNameNodesToAccess(sparkConf: SparkConf): Set[Path] = {
+    sparkConf.get("spark.yarn.access.namenodes", "")
+      .split(",")
+      .map(_.trim())
+      .filter(!_.isEmpty)
+      .map(new Path(_))
+      .toSet
+  }
+
+  def getTokenRenewer(conf: Configuration): String = {
+    val delegTokenRenewer = Master.getMasterPrincipal(conf)
+    logDebug("delegation token renewer is: " + delegTokenRenewer)
+    if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
+      val errorMessage = "Can't get Master Kerberos principal for use as renewer"
+      logError(errorMessage)
+      throw new SparkException(errorMessage)
     }
-    null
+    delegTokenRenewer
   }
 
-  private def addClasspathEntry(path: String, env: HashMap[String, String]) =
-    YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, path,
-            File.pathSeparator)
+  /**
+   * Obtains tokens for the namenodes passed in and adds them to the credentials.
+   */
+  def obtainTokensForNamenodes(
+      paths: Set[Path],
+      conf: Configuration,
+      creds: Credentials): Unit = {
+    if (UserGroupInformation.isSecurityEnabled()) {
+      val delegTokenRenewer = getTokenRenewer(conf)
+      paths.foreach { dst =>
+        val dstFs = dst.getFileSystem(conf)
+        logDebug("getting token for namenode: " + dst)
+        dstFs.addDelegationTokens(delegTokenRenewer, creds)
+      }
+    }
+  }
+
+  /**
+   * Return whether the two file systems are the same.
+   */
+  private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = {
+    val srcUri = srcFs.getUri()
+    val dstUri = destFs.getUri()
+    if (srcUri.getScheme() == null || srcUri.getScheme() != dstUri.getScheme()) {
+      return false
+    }
+
+    var srcHost = srcUri.getHost()
+    var dstHost = dstUri.getHost()
+
+    // In HA or when using viewfs, the host part of the URI may not actually be a host, but the
+    // name of the HDFS namespace. Those names won't resolve, so avoid even trying if they
+    // match.
+    if (srcHost != null && dstHost != null && srcHost != dstHost) {
+      try {
+        srcHost = InetAddress.getByName(srcHost).getCanonicalHostName()
+        dstHost = InetAddress.getByName(dstHost).getCanonicalHostName()
+      } catch {
+        case e: UnknownHostException =>
+          return false
+      }
+    }
+
+    Objects.equal(srcHost, dstHost) && srcUri.getPort() == dstUri.getPort()
+  }
 
 }
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
index 9b7f1fca96c6d..c592ecfdfce06 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
@@ -19,29 +19,24 @@ package org.apache.spark.deploy.yarn
 
 import java.net.URI
 
+import scala.collection.mutable.{HashMap, LinkedHashMap, Map}
+
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileStatus
-import org.apache.hadoop.fs.FileSystem
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
 import org.apache.hadoop.fs.permission.FsAction
-import org.apache.hadoop.yarn.api.records.LocalResource
-import org.apache.hadoop.yarn.api.records.LocalResourceVisibility
-import org.apache.hadoop.yarn.api.records.LocalResourceType
+import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.util.{Records, ConverterUtils}
 
-import org.apache.spark.Logging 
-
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.LinkedHashMap
-import scala.collection.mutable.Map
-
+import org.apache.spark.Logging
 
 /** Client side methods to setup the Hadoop distributed cache */
-class ClientDistributedCacheManager() extends Logging {
-  private val distCacheFiles: Map[String, Tuple3[String, String, String]] = 
-    LinkedHashMap[String, Tuple3[String, String, String]]()
-  private val distCacheArchives: Map[String, Tuple3[String, String, String]] = 
-    LinkedHashMap[String, Tuple3[String, String, String]]()
+private[spark] class ClientDistributedCacheManager() extends Logging {
+
+  // Mappings from remote URI to (file status, modification time, visibility)
+  private val distCacheFiles: Map[String, (String, String, String)] =
+    LinkedHashMap[String, (String, String, String)]()
+  private val distCacheArchives: Map[String, (String, String, String)] =
+    LinkedHashMap[String, (String, String, String)]()
 
 
   /**
@@ -68,9 +63,9 @@ class ClientDistributedCacheManager() extends Logging {
       resourceType: LocalResourceType,
       link: String,
       statCache: Map[URI, FileStatus],
-      appMasterOnly: Boolean = false) = {
+      appMasterOnly: Boolean = false): Unit = {
     val destStatus = fs.getFileStatus(destPath)
-    val amJarRsrc = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource]
+    val amJarRsrc = Records.newRecord(classOf[LocalResource])
     amJarRsrc.setType(resourceType)
     val visibility = getVisibility(conf, destPath.toUri(), statCache)
     amJarRsrc.setVisibility(visibility)
@@ -80,7 +75,7 @@ class ClientDistributedCacheManager() extends Logging {
     if (link == null || link.isEmpty()) throw new Exception("You must specify a valid link name")
     localResources(link) = amJarRsrc
     
-    if (appMasterOnly == false) {
+    if (!appMasterOnly) {
       val uri = destPath.toUri()
       val pathURI = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, link)
       if (resourceType == LocalResourceType.FILE) {
@@ -95,12 +90,10 @@ class ClientDistributedCacheManager() extends Logging {
 
   /**
    * Adds the necessary cache file env variables to the env passed in
-   * @param env
    */
-  def setDistFilesEnv(env: Map[String, String]) = {
+  def setDistFilesEnv(env: Map[String, String]): Unit = {
     val (keys, tupleValues) = distCacheFiles.unzip
     val (sizes, timeStamps, visibilities) = tupleValues.unzip3
-
     if (keys.size > 0) {
       env("SPARK_YARN_CACHE_FILES") = keys.reduceLeft[String] { (acc,n) => acc + "," + n }
       env("SPARK_YARN_CACHE_FILES_TIME_STAMPS") = 
@@ -114,12 +107,10 @@ class ClientDistributedCacheManager() extends Logging {
 
   /**
    * Adds the necessary cache archive env variables to the env passed in
-   * @param env
    */
-  def setDistArchivesEnv(env: Map[String, String]) = {
+  def setDistArchivesEnv(env: Map[String, String]): Unit = {
     val (keys, tupleValues) = distCacheArchives.unzip
     val (sizes, timeStamps, visibilities) = tupleValues.unzip3
-
     if (keys.size > 0) {
       env("SPARK_YARN_CACHE_ARCHIVES") = keys.reduceLeft[String] { (acc,n) => acc + "," + n }
       env("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") = 
@@ -133,25 +124,21 @@ class ClientDistributedCacheManager() extends Logging {
 
   /**
    * Returns the local resource visibility depending on the cache file permissions
-   * @param conf
-   * @param uri
-   * @param statCache
    * @return LocalResourceVisibility
    */
-  def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]):
-      LocalResourceVisibility = {
+  def getVisibility(
+      conf: Configuration,
+      uri: URI,
+      statCache: Map[URI, FileStatus]): LocalResourceVisibility = {
     if (isPublic(conf, uri, statCache)) {
-      return LocalResourceVisibility.PUBLIC 
-    } 
-    LocalResourceVisibility.PRIVATE
+      LocalResourceVisibility.PUBLIC
+    } else {
+      LocalResourceVisibility.PRIVATE
+    }
   }
 
   /**
-   * Returns a boolean to denote whether a cache file is visible to all(public)
-   * or not
-   * @param conf
-   * @param uri
-   * @param statCache
+   * Returns a boolean to denote whether a cache file is visible to all (public)
    * @return true if the path in the uri is visible to all, false otherwise
    */
   def isPublic(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): Boolean = {
@@ -167,13 +154,12 @@ class ClientDistributedCacheManager() extends Logging {
   /**
    * Returns true if all ancestors of the specified path have the 'execute'
    * permission set for all users (i.e. that other users can traverse
-   * the directory heirarchy to the given path)
-   * @param fs
-   * @param path
-   * @param statCache
+   * the directory hierarchy to the given path)
    * @return true if all ancestors have the 'execute' permission set for all users
    */
-  def ancestorsHaveExecutePermissions(fs: FileSystem, path: Path, 
+  def ancestorsHaveExecutePermissions(
+      fs: FileSystem,
+      path: Path,
       statCache: Map[URI, FileStatus]): Boolean =  {
     var current = path
     while (current != null) {
@@ -187,32 +173,25 @@ class ClientDistributedCacheManager() extends Logging {
   }
 
   /**
-   * Checks for a given path whether the Other permissions on it 
+   * Checks for a given path whether the Other permissions on it
    * imply the permission in the passed FsAction
-   * @param fs
-   * @param path
-   * @param action
-   * @param statCache
    * @return true if the path in the uri is visible to all, false otherwise
    */
-  def checkPermissionOfOther(fs: FileSystem, path: Path,
-      action: FsAction, statCache: Map[URI, FileStatus]): Boolean = {
+  def checkPermissionOfOther(
+      fs: FileSystem,
+      path: Path,
+      action: FsAction,
+      statCache: Map[URI, FileStatus]): Boolean = {
     val status = getFileStatus(fs, path.toUri(), statCache)
     val perms = status.getPermission()
     val otherAction = perms.getOtherAction()
-    if (otherAction.implies(action)) {
-      return true
-    }
-    false
+    otherAction.implies(action)
   }
 
   /**
-   * Checks to see if the given uri exists in the cache, if it does it 
+   * Checks to see if the given uri exists in the cache, if it does it
    * returns the existing FileStatus, otherwise it stats the uri, stores
    * it in the cache, and returns the FileStatus.
-   * @param fs
-   * @param uri
-   * @param statCache
    * @return FileStatus
    */
   def getFileStatus(fs: FileSystem, uri: URI, statCache: Map[URI, FileStatus]): FileStatus = {
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
index 4ba7133a959ed..88dad0febd03f 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.deploy.yarn
 
-import java.io.File
 import java.net.URI
 
 import scala.collection.JavaConversions._
@@ -31,6 +30,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 
 import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.util.Utils
 
 trait ExecutorRunnableUtil extends Logging {
 
@@ -44,9 +44,15 @@ trait ExecutorRunnableUtil extends Logging {
       hostname: String,
       executorMemory: Int,
       executorCores: Int,
+      appId: String,
       localResources: HashMap[String, LocalResource]): List[String] = {
     // Extra options for the JVM
     val javaOpts = ListBuffer[String]()
+
+    // Set the environment variable through a command prefix
+    // to append to the existing value of the variable
+    var prefixEnv: Option[String] = None
+
     // Set the JVM memory
     val executorMemoryString = executorMemory + "m"
     javaOpts += "-Xms" + executorMemoryString + " -Xmx" + executorMemoryString + " "
@@ -58,6 +64,9 @@ trait ExecutorRunnableUtil extends Logging {
     sys.env.get("SPARK_JAVA_OPTS").foreach { opts =>
       javaOpts += opts
     }
+    sys.props.get("spark.executor.extraLibraryPath").foreach { p =>
+      prefixEnv = Some(Utils.libraryPathEnvPrefix(Seq(p)))
+    }
 
     javaOpts += "-Djava.io.tmpdir=" +
       new Path(Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR)
@@ -68,10 +77,10 @@ trait ExecutorRunnableUtil extends Logging {
     // authentication settings.
     sparkConf.getAll.
       filter { case (k, v) => k.startsWith("spark.auth") || k.startsWith("spark.akka") }.
-      foreach { case (k, v) => javaOpts += "-D" + k + "=" + "\\\"" + v + "\\\"" }
+      foreach { case (k, v) => javaOpts += YarnSparkHadoopUtil.escapeForShell(s"-D$k=$v") }
 
     sparkConf.getAkkaConf.
-      foreach { case (k, v) => javaOpts += "-D" + k + "=" + "\\\"" + v + "\\\"" }
+      foreach { case (k, v) => javaOpts += YarnSparkHadoopUtil.escapeForShell(s"-D$k=$v") }
 
     // Commenting it out for now - so that people can refer to the properties if required. Remove
     // it once cpuset version is pushed out.
@@ -98,7 +107,10 @@ trait ExecutorRunnableUtil extends Logging {
         }
     */
 
-    val commands = Seq(Environment.JAVA_HOME.$() + "/bin/java",
+    // For log4j configuration to reference
+    javaOpts += ("-Dspark.yarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR)
+
+    val commands = prefixEnv ++ Seq(Environment.JAVA_HOME.$() + "/bin/java",
       "-server",
       // Kill if OOM is raised - leverage yarn's failure handling to cause rescheduling.
       // Not killing the task leaves various aspects of the executor and (to some extent) the jvm in
@@ -112,6 +124,7 @@ trait ExecutorRunnableUtil extends Logging {
       slaveId.toString,
       hostname.toString,
       executorCores.toString,
+      appId,
       "1>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout",
       "2>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")
 
@@ -125,9 +138,9 @@ trait ExecutorRunnableUtil extends Logging {
       localResources: HashMap[String, LocalResource],
       timestamp: String,
       size: String,
-      vis: String) = {
+      vis: String): Unit = {
     val uri = new URI(file)
-    val amJarRsrc = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource]
+    val amJarRsrc = Records.newRecord(classOf[LocalResource])
     amJarRsrc.setType(rtype)
     amJarRsrc.setVisibility(LocalResourceVisibility.valueOf(vis))
     amJarRsrc.setResource(ConverterUtils.getYarnUrlFromURI(uri))
@@ -171,11 +184,18 @@ trait ExecutorRunnableUtil extends Logging {
     val extraCp = sparkConf.getOption("spark.executor.extraClassPath")
     ClientBase.populateClasspath(null, yarnConf, sparkConf, env, extraCp)
 
-    // Allow users to specify some environment variables
-    YarnSparkHadoopUtil.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV"),
-      File.pathSeparator)
+    sparkConf.getExecutorEnv.foreach { case (key, value) =>
+      // This assumes each executor environment variable set here is a path
+      // This is kept for backward compatibility and consistency with hadoop
+      YarnSparkHadoopUtil.addPathToEnvironment(env, key, value)
+    }
+
+    // Keep this for backwards compatibility but users should move to the config
+    sys.env.get("SPARK_YARN_USER_ENV").foreach { userEnvs =>
+      YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs)
+    }
 
-    System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k,v) => env(k) = v }
+    System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k, v) => env(k) = v }
     env
   }
 
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
new file mode 100644
index 0000000000000..b32e15738f28b
--- /dev/null
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -0,0 +1,538 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.util.{List => JList}
+import java.util.concurrent._
+import java.util.concurrent.atomic.AtomicInteger
+import java.util.regex.Pattern
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse
+
+import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkEnv}
+import org.apache.spark.scheduler.{SplitInfo, TaskSchedulerImpl}
+import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
+
+object AllocationType extends Enumeration {
+  type AllocationType = Value
+  val HOST, RACK, ANY = Value
+}
+
+// TODO:
+// Too many params.
+// Needs to be mt-safe
+// Need to refactor this to make it 'cleaner' ... right now, all computation is reactive - should
+// make it more proactive and decoupled.
+
+// Note that right now, we assume all node asks as uniform in terms of capabilities and priority
+// Refer to http://developer.yahoo.com/blogs/hadoop/posts/2011/03/mapreduce-nextgen-scheduler/ for
+// more info on how we are requesting for containers.
+
+/**
+ * Common code for the Yarn container allocator. Contains all the version-agnostic code to
+ * manage container allocation for a running Spark application.
+ */
+private[yarn] abstract class YarnAllocator(
+    conf: Configuration,
+    sparkConf: SparkConf,
+    appAttemptId: ApplicationAttemptId,
+    args: ApplicationMasterArguments,
+    preferredNodes: collection.Map[String, collection.Set[SplitInfo]],
+    securityMgr: SecurityManager)
+  extends Logging {
+
+  import YarnAllocator._
+
+  // These three are locked on allocatedHostToContainersMap. Complementary data structures
+  // allocatedHostToContainersMap : containers which are running : host, Set<containerid>
+  // allocatedContainerToHostMap: container to host mapping.
+  private val allocatedHostToContainersMap =
+    new HashMap[String, collection.mutable.Set[ContainerId]]()
+
+  private val allocatedContainerToHostMap = new HashMap[ContainerId, String]()
+
+  // allocatedRackCount is populated ONLY if allocation happens (or decremented if this is an
+  // allocated node)
+  // As with the two data structures above, tightly coupled with them, and to be locked on
+  // allocatedHostToContainersMap
+  private val allocatedRackCount = new HashMap[String, Int]()
+
+  // Containers to be released in next request to RM
+  private val releasedContainers = new ConcurrentHashMap[ContainerId, Boolean]
+
+  // Number of container requests that have been sent to, but not yet allocated by the
+  // ApplicationMaster.
+  private val numPendingAllocate = new AtomicInteger()
+  private val numExecutorsRunning = new AtomicInteger()
+  // Used to generate a unique id per executor
+  private val executorIdCounter = new AtomicInteger()
+  private val numExecutorsFailed = new AtomicInteger()
+
+  private var maxExecutors = args.numExecutors
+
+  // Keep track of which container is running which executor to remove the executors later
+  private val executorIdToContainer = new HashMap[String, Container]
+
+  protected val executorMemory = args.executorMemory
+  protected val executorCores = args.executorCores
+  protected val (preferredHostToCount, preferredRackToCount) =
+    generateNodeToWeight(conf, preferredNodes)
+
+  // Additional memory overhead - in mb.
+  protected val memoryOverhead: Int = sparkConf.getInt("spark.yarn.executor.memoryOverhead",
+    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toInt, MEMORY_OVERHEAD_MIN))
+
+  private val launcherPool = new ThreadPoolExecutor(
+    // max pool size of Integer.MAX_VALUE is ignored because we use an unbounded queue
+    sparkConf.getInt("spark.yarn.containerLauncherMaxThreads", 25), Integer.MAX_VALUE,
+    1, TimeUnit.MINUTES,
+    new LinkedBlockingQueue[Runnable](),
+    new ThreadFactoryBuilder().setNameFormat("ContainerLauncher #%d").setDaemon(true).build())
+  launcherPool.allowCoreThreadTimeOut(true)
+
+  def getNumExecutorsRunning: Int = numExecutorsRunning.intValue
+
+  def getNumExecutorsFailed: Int = numExecutorsFailed.intValue
+
+  /**
+   * Request as many executors from the ResourceManager as needed to reach the desired total.
+   * This takes into account executors already running or pending.
+   */
+  def requestTotalExecutors(requestedTotal: Int): Unit = synchronized {
+    val currentTotal = numPendingAllocate.get + numExecutorsRunning.get
+    if (requestedTotal > currentTotal) {
+      maxExecutors += (requestedTotal - currentTotal)
+      // We need to call `allocateResources` here to avoid the following race condition:
+      // If we request executors twice before `allocateResources` is called, then we will end up
+      // double counting the number requested because `numPendingAllocate` is not updated yet.
+      allocateResources()
+    } else {
+      logInfo(s"Not allocating more executors because there are already $currentTotal " +
+        s"(application requested $requestedTotal total)")
+    }
+  }
+
+  /**
+   * Request that the ResourceManager release the container running the specified executor.
+   */
+  def killExecutor(executorId: String): Unit = synchronized {
+    if (executorIdToContainer.contains(executorId)) {
+      val container = executorIdToContainer.remove(executorId).get
+      internalReleaseContainer(container)
+      numExecutorsRunning.decrementAndGet()
+      maxExecutors -= 1
+      assert(maxExecutors >= 0, "Allocator killed more executors than are allocated!")
+    } else {
+      logWarning(s"Attempted to kill unknown executor $executorId!")
+    }
+  }
+
+  /**
+   * Allocate missing containers based on the number of executors currently pending and running.
+   *
+   * This method prioritizes the allocated container responses from the RM based on node and
+   * rack locality. Additionally, it releases any extra containers allocated for this application
+   * but are not needed. This must be synchronized because variables read in this block are
+   * mutated by other methods.
+   */
+  def allocateResources(): Unit = synchronized {
+    val missing = maxExecutors - numPendingAllocate.get() - numExecutorsRunning.get()
+
+    // this is needed by alpha, do it here since we add numPending right after this
+    val executorsPending = numPendingAllocate.get()
+    if (missing > 0) {
+      val totalExecutorMemory = executorMemory + memoryOverhead
+      numPendingAllocate.addAndGet(missing)
+      logInfo(s"Will allocate $missing executor containers, each with $totalExecutorMemory MB " +
+        s"memory including $memoryOverhead MB overhead")
+    } else {
+      logDebug("Empty allocation request ...")
+    }
+
+    val allocateResponse = allocateContainers(missing, executorsPending)
+    val allocatedContainers = allocateResponse.getAllocatedContainers()
+
+    if (allocatedContainers.size > 0) {
+      var numPendingAllocateNow = numPendingAllocate.addAndGet(-1 * allocatedContainers.size)
+
+      if (numPendingAllocateNow < 0) {
+        numPendingAllocateNow = numPendingAllocate.addAndGet(-1 * numPendingAllocateNow)
+      }
+
+      logDebug("""
+        Allocated containers: %d
+        Current executor count: %d
+        Containers released: %s
+        Cluster resources: %s
+        """.format(
+          allocatedContainers.size,
+          numExecutorsRunning.get(),
+          releasedContainers,
+          allocateResponse.getAvailableResources))
+
+      val hostToContainers = new HashMap[String, ArrayBuffer[Container]]()
+
+      for (container <- allocatedContainers) {
+        if (isResourceConstraintSatisfied(container)) {
+          // Add the accepted `container` to the host's list of already accepted,
+          // allocated containers
+          val host = container.getNodeId.getHost
+          val containersForHost = hostToContainers.getOrElseUpdate(host,
+            new ArrayBuffer[Container]())
+          containersForHost += container
+        } else {
+          // Release container, since it doesn't satisfy resource constraints.
+          internalReleaseContainer(container)
+        }
+      }
+
+       // Find the appropriate containers to use.
+      // TODO: Cleanup this group-by...
+      val dataLocalContainers = new HashMap[String, ArrayBuffer[Container]]()
+      val rackLocalContainers = new HashMap[String, ArrayBuffer[Container]]()
+      val offRackContainers = new HashMap[String, ArrayBuffer[Container]]()
+
+      for (candidateHost <- hostToContainers.keySet) {
+        val maxExpectedHostCount = preferredHostToCount.getOrElse(candidateHost, 0)
+        val requiredHostCount = maxExpectedHostCount - allocatedContainersOnHost(candidateHost)
+
+        val remainingContainersOpt = hostToContainers.get(candidateHost)
+        assert(remainingContainersOpt.isDefined)
+        var remainingContainers = remainingContainersOpt.get
+
+        if (requiredHostCount >= remainingContainers.size) {
+          // Since we have <= required containers, add all remaining containers to
+          // `dataLocalContainers`.
+          dataLocalContainers.put(candidateHost, remainingContainers)
+          // There are no more free containers remaining.
+          remainingContainers = null
+        } else if (requiredHostCount > 0) {
+          // Container list has more containers than we need for data locality.
+          // Split the list into two: one based on the data local container count,
+          // (`remainingContainers.size` - `requiredHostCount`), and the other to hold remaining
+          // containers.
+          val (dataLocal, remaining) = remainingContainers.splitAt(
+            remainingContainers.size - requiredHostCount)
+          dataLocalContainers.put(candidateHost, dataLocal)
+
+          // Invariant: remainingContainers == remaining
+
+          // YARN has a nasty habit of allocating a ton of containers on a host - discourage this.
+          // Add each container in `remaining` to list of containers to release. If we have an
+          // insufficient number of containers, then the next allocation cycle will reallocate
+          // (but won't treat it as data local).
+          // TODO(harvey): Rephrase this comment some more.
+          for (container <- remaining) internalReleaseContainer(container)
+          remainingContainers = null
+        }
+
+        // For rack local containers
+        if (remainingContainers != null) {
+          val rack = YarnSparkHadoopUtil.lookupRack(conf, candidateHost)
+          if (rack != null) {
+            val maxExpectedRackCount = preferredRackToCount.getOrElse(rack, 0)
+            val requiredRackCount = maxExpectedRackCount - allocatedContainersOnRack(rack) -
+              rackLocalContainers.getOrElse(rack, List()).size
+
+            if (requiredRackCount >= remainingContainers.size) {
+              // Add all remaining containers to to `dataLocalContainers`.
+              dataLocalContainers.put(rack, remainingContainers)
+              remainingContainers = null
+            } else if (requiredRackCount > 0) {
+              // Container list has more containers that we need for data locality.
+              // Split the list into two: one based on the data local container count,
+              // (`remainingContainers.size` - `requiredHostCount`), and the other to hold remaining
+              // containers.
+              val (rackLocal, remaining) = remainingContainers.splitAt(
+                remainingContainers.size - requiredRackCount)
+              val existingRackLocal = rackLocalContainers.getOrElseUpdate(rack,
+                new ArrayBuffer[Container]())
+
+              existingRackLocal ++= rackLocal
+
+              remainingContainers = remaining
+            }
+          }
+        }
+
+        if (remainingContainers != null) {
+          // Not all containers have been consumed - add them to the list of off-rack containers.
+          offRackContainers.put(candidateHost, remainingContainers)
+        }
+      }
+
+      // Now that we have split the containers into various groups, go through them in order:
+      // first host-local, then rack-local, and finally off-rack.
+      // Note that the list we create below tries to ensure that not all containers end up within
+      // a host if there is a sufficiently large number of hosts/containers.
+      val allocatedContainersToProcess = new ArrayBuffer[Container](allocatedContainers.size)
+      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(dataLocalContainers)
+      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(rackLocalContainers)
+      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(offRackContainers)
+
+      // Run each of the allocated containers.
+      for (container <- allocatedContainersToProcess) {
+        val numExecutorsRunningNow = numExecutorsRunning.incrementAndGet()
+        val executorHostname = container.getNodeId.getHost
+        val containerId = container.getId
+
+        val executorMemoryOverhead = (executorMemory + memoryOverhead)
+        assert(container.getResource.getMemory >= executorMemoryOverhead)
+
+        if (numExecutorsRunningNow > maxExecutors) {
+          logInfo("""Ignoring container %s at host %s, since we already have the required number of
+            containers for it.""".format(containerId, executorHostname))
+          internalReleaseContainer(container)
+          numExecutorsRunning.decrementAndGet()
+        } else {
+          val executorId = executorIdCounter.incrementAndGet().toString
+          val driverUrl = "akka.tcp://%s@%s:%s/user/%s".format(
+            SparkEnv.driverActorSystemName,
+            sparkConf.get("spark.driver.host"),
+            sparkConf.get("spark.driver.port"),
+            CoarseGrainedSchedulerBackend.ACTOR_NAME)
+
+          logInfo("Launching container %s for on host %s".format(containerId, executorHostname))
+          executorIdToContainer(executorId) = container
+
+          // To be safe, remove the container from `releasedContainers`.
+          releasedContainers.remove(containerId)
+
+          val rack = YarnSparkHadoopUtil.lookupRack(conf, executorHostname)
+          allocatedHostToContainersMap.synchronized {
+            val containerSet = allocatedHostToContainersMap.getOrElseUpdate(executorHostname,
+              new HashSet[ContainerId]())
+
+            containerSet += containerId
+            allocatedContainerToHostMap.put(containerId, executorHostname)
+
+            if (rack != null) {
+              allocatedRackCount.put(rack, allocatedRackCount.getOrElse(rack, 0) + 1)
+            }
+          }
+          logInfo("Launching ExecutorRunnable. driverUrl: %s,  executorHostname: %s".format(
+            driverUrl, executorHostname))
+          val executorRunnable = new ExecutorRunnable(
+            container,
+            conf,
+            sparkConf,
+            driverUrl,
+            executorId,
+            executorHostname,
+            executorMemory,
+            executorCores,
+            appAttemptId.getApplicationId.toString,
+            securityMgr)
+          launcherPool.execute(executorRunnable)
+        }
+      }
+      logDebug("""
+        Finished allocating %s containers (from %s originally).
+        Current number of executors running: %d,
+        Released containers: %s
+        """.format(
+          allocatedContainersToProcess,
+          allocatedContainers,
+          numExecutorsRunning.get(),
+          releasedContainers))
+    }
+
+    val completedContainers = allocateResponse.getCompletedContainersStatuses()
+    if (completedContainers.size > 0) {
+      logDebug("Completed %d containers".format(completedContainers.size))
+
+      for (completedContainer <- completedContainers) {
+        val containerId = completedContainer.getContainerId
+
+        if (releasedContainers.containsKey(containerId)) {
+          // YarnAllocationHandler already marked the container for release, so remove it from
+          // `releasedContainers`.
+          releasedContainers.remove(containerId)
+        } else {
+          // Decrement the number of executors running. The next iteration of
+          // the ApplicationMaster's reporting thread will take care of allocating.
+          numExecutorsRunning.decrementAndGet()
+          logInfo("Completed container %s (state: %s, exit status: %s)".format(
+            containerId,
+            completedContainer.getState,
+            completedContainer.getExitStatus))
+          // Hadoop 2.2.X added a ContainerExitStatus we should switch to use
+          // there are some exit status' we shouldn't necessarily count against us, but for
+          // now I think its ok as none of the containers are expected to exit
+          if (completedContainer.getExitStatus == -103) { // vmem limit exceeded
+            logWarning(memLimitExceededLogMessage(
+              completedContainer.getDiagnostics,
+              VMEM_EXCEEDED_PATTERN))
+          } else if (completedContainer.getExitStatus == -104) { // pmem limit exceeded
+            logWarning(memLimitExceededLogMessage(
+              completedContainer.getDiagnostics,
+              PMEM_EXCEEDED_PATTERN))
+          } else if (completedContainer.getExitStatus != 0) {
+            logInfo("Container marked as failed: " + containerId +
+              ". Exit status: " + completedContainer.getExitStatus +
+              ". Diagnostics: " + completedContainer.getDiagnostics)
+            numExecutorsFailed.incrementAndGet()
+          }
+        }
+
+        allocatedHostToContainersMap.synchronized {
+          if (allocatedContainerToHostMap.containsKey(containerId)) {
+            val hostOpt = allocatedContainerToHostMap.get(containerId)
+            assert(hostOpt.isDefined)
+            val host = hostOpt.get
+
+            val containerSetOpt = allocatedHostToContainersMap.get(host)
+            assert(containerSetOpt.isDefined)
+            val containerSet = containerSetOpt.get
+
+            containerSet.remove(containerId)
+            if (containerSet.isEmpty) {
+              allocatedHostToContainersMap.remove(host)
+            } else {
+              allocatedHostToContainersMap.update(host, containerSet)
+            }
+
+            allocatedContainerToHostMap.remove(containerId)
+
+            // TODO: Move this part outside the synchronized block?
+            val rack = YarnSparkHadoopUtil.lookupRack(conf, host)
+            if (rack != null) {
+              val rackCount = allocatedRackCount.getOrElse(rack, 0) - 1
+              if (rackCount > 0) {
+                allocatedRackCount.put(rack, rackCount)
+              } else {
+                allocatedRackCount.remove(rack)
+              }
+            }
+          }
+        }
+      }
+      logDebug("""
+        Finished processing %d completed containers.
+        Current number of executors running: %d,
+        Released containers: %s
+        """.format(
+          completedContainers.size,
+          numExecutorsRunning.get(),
+          releasedContainers))
+    }
+  }
+
+  protected def allocatedContainersOnHost(host: String): Int = {
+    var retval = 0
+    allocatedHostToContainersMap.synchronized {
+      retval = allocatedHostToContainersMap.getOrElse(host, Set()).size
+    }
+    retval
+  }
+
+  protected def allocatedContainersOnRack(rack: String): Int = {
+    var retval = 0
+    allocatedHostToContainersMap.synchronized {
+      retval = allocatedRackCount.getOrElse(rack, 0)
+    }
+    retval
+  }
+
+  private def isResourceConstraintSatisfied(container: Container): Boolean = {
+    container.getResource.getMemory >= (executorMemory + memoryOverhead)
+  }
+
+  // A simple method to copy the split info map.
+  private def generateNodeToWeight(
+      conf: Configuration,
+      input: collection.Map[String, collection.Set[SplitInfo]]
+    ): (Map[String, Int], Map[String, Int]) = {
+
+    if (input == null) {
+      return (Map[String, Int](), Map[String, Int]())
+    }
+
+    val hostToCount = new HashMap[String, Int]
+    val rackToCount = new HashMap[String, Int]
+
+    for ((host, splits) <- input) {
+      val hostCount = hostToCount.getOrElse(host, 0)
+      hostToCount.put(host, hostCount + splits.size)
+
+      val rack = YarnSparkHadoopUtil.lookupRack(conf, host)
+      if (rack != null) {
+        val rackCount = rackToCount.getOrElse(host, 0)
+        rackToCount.put(host, rackCount + splits.size)
+      }
+    }
+
+    (hostToCount.toMap, rackToCount.toMap)
+  }
+
+  private def internalReleaseContainer(container: Container) = {
+    releasedContainers.put(container.getId(), true)
+    releaseContainer(container)
+  }
+
+  /**
+   * Called to allocate containers in the cluster.
+   *
+   * @param count Number of containers to allocate.
+   *              If zero, should still contact RM (as a heartbeat).
+   * @param pending Number of containers pending allocate. Only used on alpha.
+   * @return Response to the allocation request.
+   */
+  protected def allocateContainers(count: Int, pending: Int): YarnAllocateResponse
+
+  /** Called to release a previously allocated container. */
+  protected def releaseContainer(container: Container): Unit
+
+  /**
+   * Defines the interface for an allocate response from the RM. This is needed since the alpha
+   * and stable interfaces differ here in ways that cannot be fixed using other routes.
+   */
+  protected trait YarnAllocateResponse {
+
+    def getAllocatedContainers(): JList[Container]
+
+    def getAvailableResources(): Resource
+
+    def getCompletedContainersStatuses(): JList[ContainerStatus]
+
+  }
+
+}
+
+private object YarnAllocator {
+  val MEM_REGEX = "[0-9.]+ [KMG]B"
+  val PMEM_EXCEEDED_PATTERN =
+    Pattern.compile(s"$MEM_REGEX of $MEM_REGEX physical memory used")
+  val VMEM_EXCEEDED_PATTERN =
+    Pattern.compile(s"$MEM_REGEX of $MEM_REGEX virtual memory used")
+
+  def memLimitExceededLogMessage(diagnostics: String, pattern: Pattern): String = {
+    val matcher = pattern.matcher(diagnostics)
+    val diag = if (matcher.find()) " " + matcher.group() + "." else ""
+    ("Container killed by YARN for exceeding memory limits." + diag
+      + " Consider boosting spark.yarn.executor.memoryOverhead.")
+  }
+}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
new file mode 100644
index 0000000000000..2510b9c9cef68
--- /dev/null
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import scala.collection.{Map, Set}
+
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.hadoop.yarn.api.records._
+
+import org.apache.spark.{SecurityManager, SparkConf, SparkContext}
+import org.apache.spark.scheduler.SplitInfo
+
+/**
+ * Interface that defines a Yarn RM client. Abstracts away Yarn version-specific functionality that
+ * is used by Spark's AM.
+ */
+trait YarnRMClient {
+
+  /**
+   * Registers the application master with the RM.
+   *
+   * @param conf The Yarn configuration.
+   * @param sparkConf The Spark configuration.
+   * @param preferredNodeLocations Map with hints about where to allocate containers.
+   * @param uiAddress Address of the SparkUI.
+   * @param uiHistoryAddress Address of the application on the History Server.
+   */
+  def register(
+      conf: YarnConfiguration,
+      sparkConf: SparkConf,
+      preferredNodeLocations: Map[String, Set[SplitInfo]],
+      uiAddress: String,
+      uiHistoryAddress: String,
+      securityMgr: SecurityManager): YarnAllocator
+
+  /**
+   * Unregister the AM. Guaranteed to only be called once.
+   *
+   * @param status The final status of the AM.
+   * @param diagnostics Diagnostics message to include in the final status.
+   */
+  def unregister(status: FinalApplicationStatus, diagnostics: String = ""): Unit
+
+  /** Returns the attempt ID. */
+  def getAttemptId(): ApplicationAttemptId
+
+  /** Returns the configuration for the AmIpFilter to add to the Spark UI. */
+  def getAmIpFilterParams(conf: YarnConfiguration, proxyBase: String): Map[String, String]
+
+  /** Returns the maximum number of attempts to register the AM. */
+  def getMaxRegAttempts(conf: YarnConfiguration): Int
+
+}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index 718cb19f57261..7d453ecb7983c 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -17,8 +17,12 @@
 
 package org.apache.spark.deploy.yarn
 
+import java.lang.{Boolean => JBoolean}
+import java.io.File
+import java.util.{Collections, Set => JSet}
 import java.util.regex.Matcher
 import java.util.regex.Pattern
+import java.util.concurrent.ConcurrentHashMap
 
 import scala.collection.mutable.HashMap
 
@@ -26,11 +30,14 @@ import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.security.Credentials
 import org.apache.hadoop.security.UserGroupInformation
-import org.apache.hadoop.util.StringInterner
 import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.api.ApplicationConstants
+import org.apache.hadoop.yarn.api.records.ApplicationAccessType
+import org.apache.hadoop.yarn.util.RackResolver
 import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.util.Utils
 
 /**
  * Contains util methods to interact with Hadoop from spark.
@@ -46,7 +53,8 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
 
   // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems
   // Always create a new config, dont reuse yarnConf.
-  override def newConfiguration(): Configuration = new YarnConfiguration(new Configuration())
+  override def newConfiguration(conf: SparkConf): Configuration =
+    new YarnConfiguration(super.newConfiguration(conf))
 
   // add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster
   override def addCredentials(conf: JobConf) {
@@ -76,30 +84,47 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
 }
 
 object YarnSparkHadoopUtil {
-  def addToEnvironment(
-      env: HashMap[String, String],
-      variable: String,
-      value: String,
-      classPathSeparator: String) = {
-    var envVariable = ""
-    if (env.get(variable) == None) {
-      envVariable = value
-    } else {
-      envVariable = env.get(variable).get + classPathSeparator + value
-    }
-    env put (StringInterner.weakIntern(variable), StringInterner.weakIntern(envVariable))
+  // Additional memory overhead 
+  // 7% was arrived at experimentally. In the interest of minimizing memory waste while covering
+  // the common cases. Memory overhead tends to grow with container size. 
+
+  val MEMORY_OVERHEAD_FACTOR = 0.07
+  val MEMORY_OVERHEAD_MIN = 384
+
+  val ANY_HOST = "*"
+
+  val DEFAULT_NUMBER_EXECUTORS = 2
+
+  // All RM requests are issued with same priority : we do not (yet) have any distinction between
+  // request types (like map/reduce in hadoop for example)
+  val RM_REQUEST_PRIORITY = 1
+
+  // Host to rack map - saved from allocation requests. We are expecting this not to change.
+  // Note that it is possible for this to change : and ResourceManager will indicate that to us via
+  // update response to allocate. But we are punting on handling that for now.
+  private val hostToRack = new ConcurrentHashMap[String, String]()
+  private val rackToHostSet = new ConcurrentHashMap[String, JSet[String]]()
+
+  /**
+   * Add a path variable to the given environment map.
+   * If the map already contains this key, append the value to the existing value instead.
+   */
+  def addPathToEnvironment(env: HashMap[String, String], key: String, value: String): Unit = {
+    val newValue = if (env.contains(key)) { env(key) + File.pathSeparator + value } else value
+    env.put(key, newValue)
   }
 
-  def setEnvFromInputString(
-      env: HashMap[String, String],
-      envString: String,
-      classPathSeparator: String) = {
-    if (envString != null && envString.length() > 0) {
-      var childEnvs = envString.split(",")
-      var p = Pattern.compile(getEnvironmentVariableRegex())
+  /**
+   * Set zero or more environment variables specified by the given input string.
+   * The input string is expected to take the form "KEY1=VAL1,KEY2=VAL2,KEY3=VAL3".
+   */
+  def setEnvFromInputString(env: HashMap[String, String], inputString: String): Unit = {
+    if (inputString != null && inputString.length() > 0) {
+      val childEnvs = inputString.split(",")
+      val p = Pattern.compile(environmentVariableRegex)
       for (cEnv <- childEnvs) {
-        var parts = cEnv.split("=") // split on '='
-        var m = p.matcher(parts(1))
+        val parts = cEnv.split("=") // split on '='
+        val m = p.matcher(parts(1))
         val sb = new StringBuffer
         while (m.find()) {
           val variable = m.group(1)
@@ -107,8 +132,7 @@ object YarnSparkHadoopUtil {
           if (env.get(variable) != None) {
             replace = env.get(variable).get
           } else {
-            // if this key is not configured for the child .. get it
-            // from the env
+            // if this key is not configured for the child .. get it from the env
             replace = System.getenv(variable)
             if (replace == null) {
             // the env key is note present anywhere .. simply set it
@@ -118,18 +142,83 @@ object YarnSparkHadoopUtil {
           m.appendReplacement(sb, Matcher.quoteReplacement(replace))
         }
         m.appendTail(sb)
-        addToEnvironment(env, parts(0), sb.toString(), classPathSeparator)
+        // This treats the environment variable as path variable delimited by `File.pathSeparator`
+        // This is kept for backward compatibility and consistency with Hadoop's behavior
+        addPathToEnvironment(env, parts(0), sb.toString)
       }
     }
   }
 
-  private def getEnvironmentVariableRegex() : String = {
-    val osName = System.getProperty("os.name")
-    if (osName startsWith "Windows") {
+  private val environmentVariableRegex: String = {
+    if (Utils.isWindows) {
       "%([A-Za-z_][A-Za-z0-9_]*?)%"
     } else {
       "\\$([A-Za-z_][A-Za-z0-9_]*)"
     }
   }
 
+  /**
+   * Escapes a string for inclusion in a command line executed by Yarn. Yarn executes commands
+   * using `bash -c "command arg1 arg2"` and that means plain quoting doesn't really work. The
+   * argument is enclosed in single quotes and some key characters are escaped.
+   *
+   * @param arg A single argument.
+   * @return Argument quoted for execution via Yarn's generated shell script.
+   */
+  def escapeForShell(arg: String): String = {
+    if (arg != null) {
+      val escaped = new StringBuilder("'")
+      for (i <- 0 to arg.length() - 1) {
+        arg.charAt(i) match {
+          case '$' => escaped.append("\\$")
+          case '"' => escaped.append("\\\"")
+          case '\'' => escaped.append("'\\''")
+          case c => escaped.append(c)
+        }
+      }
+      escaped.append("'").toString()
+    } else {
+      arg
+    }
+  }
+
+  def lookupRack(conf: Configuration, host: String): String = {
+    if (!hostToRack.contains(host)) {
+      populateRackInfo(conf, host)
+    }
+    hostToRack.get(host)
+  }
+
+  def populateRackInfo(conf: Configuration, hostname: String) {
+    Utils.checkHost(hostname)
+
+    if (!hostToRack.containsKey(hostname)) {
+      // If there are repeated failures to resolve, all to an ignore list.
+      val rackInfo = RackResolver.resolve(conf, hostname)
+      if (rackInfo != null && rackInfo.getNetworkLocation != null) {
+        val rack = rackInfo.getNetworkLocation
+        hostToRack.put(hostname, rack)
+        if (! rackToHostSet.containsKey(rack)) {
+          rackToHostSet.putIfAbsent(rack,
+            Collections.newSetFromMap(new ConcurrentHashMap[String, JBoolean]()))
+        }
+        rackToHostSet.get(rack).add(hostname)
+
+        // TODO(harvey): Figure out what this comment means...
+        // Since RackResolver caches, we are disabling this for now ...
+      } /* else {
+        // right ? Else we will keep calling rack resolver in case we cant resolve rack info ...
+        hostToRack.put(hostname, null)
+      } */
+    }
+  }
+
+  def getApplicationAclsForYarn(securityMgr: SecurityManager)
+      : Map[ApplicationAccessType, String] = {
+    Map[ApplicationAccessType, String] (
+      ApplicationAccessType.VIEW_APP -> securityMgr.getViewAcls,
+      ApplicationAccessType.MODIFY_APP -> securityMgr.getModifyAcls
+    )
+  }
+
 }
diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
index 6b91e6b9eb899..254774a6b839e 100644
--- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
@@ -18,31 +18,18 @@
 package org.apache.spark.scheduler.cluster
 
 import org.apache.spark._
-import org.apache.hadoop.conf.Configuration
-import org.apache.spark.deploy.yarn.YarnAllocationHandler
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.util.Utils
 
 /**
- *
- * This scheduler launches executors through Yarn - by calling into Client to launch ExecutorLauncher as AM.
+ * This scheduler launches executors through Yarn - by calling into Client to launch the Spark AM.
  */
-private[spark] class YarnClientClusterScheduler(sc: SparkContext, conf: Configuration) extends TaskSchedulerImpl(sc) {
-
-  def this(sc: SparkContext) = this(sc, new Configuration())
+private[spark] class YarnClientClusterScheduler(sc: SparkContext) extends TaskSchedulerImpl(sc) {
 
   // By default, rack is unknown
   override def getRackForHost(hostPort: String): Option[String] = {
     val host = Utils.parseHostPort(hostPort)._1
-    val retval = YarnAllocationHandler.lookupRack(conf, host)
-    if (retval != null) Some(retval) else None
-  }
-
-  override def postStartHook() {
-
-    // The yarn application is running, but the executor might not yet ready
-    // Wait for a few seconds for the slaves to bootstrap and register with master - best case attempt
-    Thread.sleep(2000L)
-    logInfo("YarnClientClusterScheduler.postStartHook done")
+    Option(YarnSparkHadoopUtil.lookupRack(sc.hadoopConfiguration, host))
   }
 }
diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index fd2694fe7278d..2923e6729cd6b 100644
--- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -17,101 +17,141 @@
 
 package org.apache.spark.scheduler.cluster
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.hadoop.yarn.api.records.{ApplicationId, YarnApplicationState}
+
 import org.apache.spark.{SparkException, Logging, SparkContext}
-import org.apache.spark.deploy.yarn.{Client, ClientArguments, ExecutorLauncher}
+import org.apache.spark.deploy.yarn.{Client, ClientArguments}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 
-import scala.collection.mutable.ArrayBuffer
-
 private[spark] class YarnClientSchedulerBackend(
     scheduler: TaskSchedulerImpl,
     sc: SparkContext)
-  extends CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
+  extends YarnSchedulerBackend(scheduler, sc)
   with Logging {
 
-  var client: Client = null
-  var appId: ApplicationId = null
-
-  private[spark] def addArg(optionName: String, envVar: String, sysProp: String,
-      arrayBuf: ArrayBuffer[String]) {
-    if (System.getenv(envVar) != null) {
-      arrayBuf += (optionName, System.getenv(envVar))
-    } else if (sc.getConf.contains(sysProp)) {
-      arrayBuf += (optionName, sc.getConf.get(sysProp))
-    }
-  }
+  private var client: Client = null
+  private var appId: ApplicationId = null
+  @volatile private var stopping: Boolean = false
 
+  /**
+   * Create a Yarn client to submit an application to the ResourceManager.
+   * This waits until the application is running.
+   */
   override def start() {
     super.start()
-
     val driverHost = conf.get("spark.driver.host")
     val driverPort = conf.get("spark.driver.port")
     val hostport = driverHost + ":" + driverPort
+    sc.ui.foreach { ui => conf.set("spark.driver.appUIAddress", ui.appUIAddress) }
 
     val argsArrayBuf = new ArrayBuffer[String]()
-    argsArrayBuf += (
-      "--class", "notused",
-      "--jar", null, // The primary jar will be added dynamically in SparkContext.
-      "--args", hostport,
-      "--am-class", classOf[ExecutorLauncher].getName
-    )
+    argsArrayBuf += ("--arg", hostport)
+    argsArrayBuf ++= getExtraClientArguments
 
-    // process any optional arguments, given either as environment variables
-    // or system properties. use the defaults already defined in ClientArguments
-    // if things aren't specified. system properties override environment
-    // variables.
-    List(("--driver-memory", "SPARK_MASTER_MEMORY", "spark.master.memory"),
-      ("--driver-memory", "SPARK_DRIVER_MEMORY", "spark.driver.memory"),
-      ("--num-executors", "SPARK_WORKER_INSTANCES", "spark.executor.instances"),
-      ("--num-executors", "SPARK_EXECUTOR_INSTANCES", "spark.executor.instances"),
-      ("--executor-memory", "SPARK_WORKER_MEMORY", "spark.executor.memory"),
-      ("--executor-memory", "SPARK_EXECUTOR_MEMORY", "spark.executor.memory"),
-      ("--executor-cores", "SPARK_WORKER_CORES", "spark.executor.cores"),
-      ("--executor-cores", "SPARK_EXECUTOR_CORES", "spark.executor.cores"),
-      ("--queue", "SPARK_YARN_QUEUE", "spark.yarn.queue"),
-      ("--name", "SPARK_YARN_APP_NAME", "spark.app.name"))
-    .foreach { case (optName, envVar, sysProp) => addArg(optName, envVar, sysProp, argsArrayBuf) }
-
-    logDebug("ClientArguments called with: " + argsArrayBuf)
+    logDebug("ClientArguments called with: " + argsArrayBuf.mkString(" "))
     val args = new ClientArguments(argsArrayBuf.toArray, conf)
+    totalExpectedExecutors = args.numExecutors
     client = new Client(args, conf)
-    appId = client.runApp()
-    waitForApp()
+    appId = client.submitApplication()
+    waitForApplication()
+    asyncMonitorApplication()
   }
 
-  def waitForApp() {
-
-    // TODO : need a better way to find out whether the executors are ready or not
-    // maybe by resource usage report?
-    while(true) {
-      val report = client.getApplicationReport(appId)
-
-      logInfo("Application report from ASM: \n" +
-        "\t appMasterRpcPort: " + report.getRpcPort() + "\n" +
-        "\t appStartTime: " + report.getStartTime() + "\n" +
-        "\t yarnAppState: " + report.getYarnApplicationState() + "\n"
+  /**
+   * Return any extra command line arguments to be passed to Client provided in the form of
+   * environment variables or Spark properties.
+   */
+  private def getExtraClientArguments: Seq[String] = {
+    val extraArgs = new ArrayBuffer[String]
+    val optionTuples = // List of (target Client argument, environment variable, Spark property)
+      List(
+        ("--driver-memory", "SPARK_MASTER_MEMORY", "spark.master.memory"),
+        ("--driver-memory", "SPARK_DRIVER_MEMORY", "spark.driver.memory"),
+        ("--num-executors", "SPARK_WORKER_INSTANCES", "spark.executor.instances"),
+        ("--num-executors", "SPARK_EXECUTOR_INSTANCES", "spark.executor.instances"),
+        ("--executor-memory", "SPARK_WORKER_MEMORY", "spark.executor.memory"),
+        ("--executor-memory", "SPARK_EXECUTOR_MEMORY", "spark.executor.memory"),
+        ("--executor-cores", "SPARK_WORKER_CORES", "spark.executor.cores"),
+        ("--executor-cores", "SPARK_EXECUTOR_CORES", "spark.executor.cores"),
+        ("--queue", "SPARK_YARN_QUEUE", "spark.yarn.queue"),
+        ("--name", "SPARK_YARN_APP_NAME", "spark.app.name")
       )
-
-      // Ready to go, or already gone.
-      val state = report.getYarnApplicationState()
-      if (state == YarnApplicationState.RUNNING) {
-        return
-      } else if (state == YarnApplicationState.FINISHED ||
-        state == YarnApplicationState.FAILED ||
-        state == YarnApplicationState.KILLED) {
-        throw new SparkException("Yarn application already ended," +
-          "might be killed or not able to launch application master.")
+    optionTuples.foreach { case (optionName, envVar, sparkProp) =>
+      if (System.getenv(envVar) != null) {
+        extraArgs += (optionName, System.getenv(envVar))
+      } else if (sc.getConf.contains(sparkProp)) {
+        extraArgs += (optionName, sc.getConf.get(sparkProp))
       }
+    }
+    extraArgs
+  }
 
-      Thread.sleep(1000)
+  /**
+   * Report the state of the application until it is running.
+   * If the application has finished, failed or been killed in the process, throw an exception.
+   * This assumes both `client` and `appId` have already been set.
+   */
+  private def waitForApplication(): Unit = {
+    assert(client != null && appId != null, "Application has not been submitted yet!")
+    val (state, _) = client.monitorApplication(appId, returnOnRunning = true) // blocking
+    if (state == YarnApplicationState.FINISHED ||
+      state == YarnApplicationState.FAILED ||
+      state == YarnApplicationState.KILLED) {
+      throw new SparkException("Yarn application has already ended! " +
+        "It might have been killed or unable to launch application master.")
+    }
+    if (state == YarnApplicationState.RUNNING) {
+      logInfo(s"Application $appId has started running.")
     }
   }
 
+  /**
+   * Monitor the application state in a separate thread.
+   * If the application has exited for any reason, stop the SparkContext.
+   * This assumes both `client` and `appId` have already been set.
+   */
+  private def asyncMonitorApplication(): Unit = {
+    assert(client != null && appId != null, "Application has not been submitted yet!")
+    val t = new Thread {
+      override def run() {
+        while (!stopping) {
+          val report = client.getApplicationReport(appId)
+          val state = report.getYarnApplicationState()
+          if (state == YarnApplicationState.FINISHED ||
+            state == YarnApplicationState.KILLED ||
+            state == YarnApplicationState.FAILED) {
+            logError(s"Yarn application has already exited with state $state!")
+            sc.stop()
+            stopping = true
+          }
+          Thread.sleep(1000L)
+        }
+        Thread.currentThread().interrupt()
+      }
+    }
+    t.setName("Yarn application state monitor")
+    t.setDaemon(true)
+    t.start()
+  }
+
+  /**
+   * Stop the scheduler. This assumes `start()` has already been called.
+   */
   override def stop() {
+    assert(client != null, "Attempted to stop this scheduler before starting it!")
+    stopping = true
     super.stop()
-    client.stop
+    client.stop()
     logInfo("Stopped")
   }
 
+  override def applicationId(): String = {
+    Option(appId).map(_.toString).getOrElse {
+      logWarning("Application ID is not initialized yet.")
+      super.applicationId
+    }
+  }
+
 }
diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
index 39cdd2e8a522b..4157ff95c2794 100644
--- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
@@ -18,21 +18,18 @@
 package org.apache.spark.scheduler.cluster
 
 import org.apache.spark._
-import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnAllocationHandler}
+import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnSparkHadoopUtil}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.util.Utils
-import org.apache.hadoop.conf.Configuration
 
 /**
- *
- * This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of ApplicationMaster, etc is done
+ * This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of
+ * ApplicationMaster, etc is done
  */
-private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration) extends TaskSchedulerImpl(sc) {
+private[spark] class YarnClusterScheduler(sc: SparkContext) extends TaskSchedulerImpl(sc) {
 
   logInfo("Created YarnClusterScheduler")
 
-  def this(sc: SparkContext) = this(sc, new Configuration())
-
   // Nothing else for now ... initialize application master : which needs a SparkContext to
   // determine how to allocate.
   // Note that only the first creation of a SparkContext influences (and ideally, there must be
@@ -42,17 +39,18 @@ private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration)
   // By default, rack is unknown
   override def getRackForHost(hostPort: String): Option[String] = {
     val host = Utils.parseHostPort(hostPort)._1
-    val retval = YarnAllocationHandler.lookupRack(conf, host)
-    if (retval != null) Some(retval) else None
+    Option(YarnSparkHadoopUtil.lookupRack(sc.hadoopConfiguration, host))
   }
 
   override def postStartHook() {
-    val sparkContextInitialized = ApplicationMaster.sparkContextInitialized(sc)
-    if (sparkContextInitialized){
-      ApplicationMaster.waitForInitialAllocations()
-      // Wait for a few seconds for the slaves to bootstrap and register with master - best case attempt
-      Thread.sleep(3000L)
-    }
+    ApplicationMaster.sparkContextInitialized(sc)
+    super.postStartHook()
     logInfo("YarnClusterScheduler.postStartHook done")
   }
+
+  override def stop() {
+    super.stop()
+    ApplicationMaster.sparkContextStopped(sc)
+  }
+
 }
diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
new file mode 100644
index 0000000000000..b1de81e6a8b0f
--- /dev/null
+++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import org.apache.spark.SparkContext
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
+import org.apache.spark.scheduler.TaskSchedulerImpl
+import org.apache.spark.util.IntParam
+
+private[spark] class YarnClusterSchedulerBackend(
+    scheduler: TaskSchedulerImpl,
+    sc: SparkContext)
+  extends YarnSchedulerBackend(scheduler, sc) {
+
+  override def start() {
+    super.start()
+    totalExpectedExecutors = DEFAULT_NUMBER_EXECUTORS
+    if (System.getenv("SPARK_EXECUTOR_INSTANCES") != null) {
+      totalExpectedExecutors = IntParam.unapply(System.getenv("SPARK_EXECUTOR_INSTANCES"))
+        .getOrElse(totalExpectedExecutors)
+    }
+    // System property can override environment variable.
+    totalExpectedExecutors = sc.getConf.getInt("spark.executor.instances", totalExpectedExecutors)
+  }
+
+  override def applicationId(): String =
+    // In YARN Cluster mode, spark.yarn.app.id is expect to be set
+    // before user application is launched.
+    // So, if spark.yarn.app.id is not set, it is something wrong.
+    sc.getConf.getOption("spark.yarn.app.id").getOrElse {
+      logError("Application ID is not set.")
+      super.applicationId
+    }
+
+}
diff --git a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
index 686714dc36488..17b79ae1d82c4 100644
--- a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
+++ b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
@@ -20,25 +20,25 @@ package org.apache.spark.deploy.yarn
 import java.io.File
 import java.net.URI
 
-import com.google.common.io.Files
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.MRJobConfig
-import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse
-import org.apache.hadoop.yarn.api.records.ContainerLaunchContext
+import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.mockito.Matchers._
 import org.mockito.Mockito._
+
+
 import org.scalatest.FunSuite
 import org.scalatest.Matchers
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ HashMap => MutableHashMap }
+import scala.reflect.ClassTag
 import scala.util.Try
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkException, SparkConf}
 import org.apache.spark.util.Utils
 
 class ClientBaseSuite extends FunSuite with Matchers {
@@ -87,7 +87,7 @@ class ClientBaseSuite extends FunSuite with Matchers {
     val env = new MutableHashMap[String, String]()
     val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf)
 
-    ClientBase.populateClasspath(args, conf, sparkConf, env, None)
+    ClientBase.populateClasspath(args, conf, sparkConf, env)
 
     val cp = env("CLASSPATH").split(File.pathSeparator)
     s"$SPARK,$USER,$ADDED".split(",").foreach({ entry =>
@@ -111,10 +111,10 @@ class ClientBaseSuite extends FunSuite with Matchers {
     val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf)
 
     val client = spy(new DummyClient(args, conf, sparkConf, yarnConf))
-    doReturn(new Path("/")).when(client).copyRemoteFile(any(classOf[Path]),
+    doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]),
       any(classOf[Path]), anyShort(), anyBoolean())
 
-    var tempDir = Files.createTempDir();
+    val tempDir = Utils.createTempDir()
     try {
       client.prepareLocalResources(tempDir.getAbsolutePath())
       sparkConf.getOption(ClientBase.CONF_SPARK_USER_JAR) should be (Some(USER))
@@ -138,6 +138,57 @@ class ClientBaseSuite extends FunSuite with Matchers {
     }
   }
 
+  test("check access nns empty") {
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.yarn.access.namenodes", "")
+    val nns = ClientBase.getNameNodesToAccess(sparkConf)
+    nns should be(Set())
+  }
+
+  test("check access nns unset") {
+    val sparkConf = new SparkConf()
+    val nns = ClientBase.getNameNodesToAccess(sparkConf)
+    nns should be(Set())
+  }
+
+  test("check access nns") {
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.yarn.access.namenodes", "hdfs://nn1:8032")
+    val nns = ClientBase.getNameNodesToAccess(sparkConf)
+    nns should be(Set(new Path("hdfs://nn1:8032")))
+  }
+
+  test("check access nns space") {
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.yarn.access.namenodes", "hdfs://nn1:8032, ")
+    val nns = ClientBase.getNameNodesToAccess(sparkConf)
+    nns should be(Set(new Path("hdfs://nn1:8032")))
+  }
+
+  test("check access two nns") {
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.yarn.access.namenodes", "hdfs://nn1:8032,hdfs://nn2:8032")
+    val nns = ClientBase.getNameNodesToAccess(sparkConf)
+    nns should be(Set(new Path("hdfs://nn1:8032"), new Path("hdfs://nn2:8032")))
+  }
+
+  test("check token renewer") {
+    val hadoopConf = new Configuration()
+    hadoopConf.set("yarn.resourcemanager.address", "myrm:8033")
+    hadoopConf.set("yarn.resourcemanager.principal", "yarn/myrm:8032@SPARKTEST.COM")
+    val renewer = ClientBase.getTokenRenewer(hadoopConf)
+    renewer should be ("yarn/myrm:8032@SPARKTEST.COM")
+  }
+
+  test("check token renewer default") {
+    val hadoopConf = new Configuration()
+    val caught =
+      intercept[SparkException] {
+        ClientBase.getTokenRenewer(hadoopConf)
+      }
+    assert(caught.getMessage === "Can't get Master Kerberos principal for use as renewer")
+  }
+
   object Fixtures {
 
     val knownDefYarnAppCP: Seq[String] =
@@ -147,9 +198,10 @@ class ClientBaseSuite extends FunSuite with Matchers {
 
 
     val knownDefMRAppCP: Seq[String] =
-      getFieldValue[String, Seq[String]](classOf[MRJobConfig],
-                                         "DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH",
-                                         Seq[String]())(a => a.split(","))
+      getFieldValue2[String, Array[String], Seq[String]](
+        classOf[MRJobConfig],
+        "DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH",
+        Seq[String]())(a => a.split(","))(a => a.toSeq)
 
     val knownYARNAppCP = Some(Seq("/known/yarn/path"))
 
@@ -179,18 +231,26 @@ class ClientBaseSuite extends FunSuite with Matchers {
   def getFieldValue[A, B](clazz: Class[_], field: String, defaults: => B)(mapTo: A => B): B =
     Try(clazz.getField(field)).map(_.get(null).asInstanceOf[A]).toOption.map(mapTo).getOrElse(defaults)
 
+  def getFieldValue2[A: ClassTag, A1: ClassTag, B](
+        clazz: Class[_],
+        field: String,
+        defaults: => B)(mapTo:  A => B)(mapTo1: A1 => B) : B = {
+    Try(clazz.getField(field)).map(_.get(null)).map {
+      case v: A => mapTo(v)
+      case v1: A1 => mapTo1(v1)
+      case _ => defaults
+    }.toOption.getOrElse(defaults)
+  }
+
   private class DummyClient(
       val args: ClientArguments,
-      val conf: Configuration,
+      val hadoopConf: Configuration,
       val sparkConf: SparkConf,
       val yarnConf: YarnConfiguration) extends ClientBase {
-
-    override def calculateAMMemory(newApp: GetNewApplicationResponse): Int =
-      throw new UnsupportedOperationException()
-
-    override def setupSecurityToken(amContainer: ContainerLaunchContext): Unit =
-      throw new UnsupportedOperationException()
-
+    override def setupSecurityToken(amContainer: ContainerLaunchContext): Unit = ???
+    override def submitApplication(): ApplicationId = ???
+    override def getApplicationReport(appId: ApplicationId): ApplicationReport = ???
+    override def getClientToken(report: ApplicationReport): String = ???
   }
 
 }
diff --git a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
new file mode 100644
index 0000000000000..8d184a09d64cc
--- /dev/null
+++ b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import org.apache.spark.deploy.yarn.YarnAllocator._
+import org.scalatest.FunSuite
+
+class YarnAllocatorSuite extends FunSuite {
+  test("memory exceeded diagnostic regexes") {
+    val diagnostics =
+      "Container [pid=12465,containerID=container_1412887393566_0003_01_000002] is running " +
+      "beyond physical memory limits. Current usage: 2.1 MB of 2 GB physical memory used; " +
+      "5.8 GB of 4.2 GB virtual memory used. Killing container."
+    val vmemMsg = memLimitExceededLogMessage(diagnostics, VMEM_EXCEEDED_PATTERN)
+    val pmemMsg = memLimitExceededLogMessage(diagnostics, PMEM_EXCEEDED_PATTERN)
+    assert(vmemMsg.contains("5.8 GB of 4.2 GB virtual memory used."))
+    assert(pmemMsg.contains("2.1 MB of 2 GB physical memory used."))
+  }
+}
diff --git a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
new file mode 100644
index 0000000000000..2cc5abb3a890c
--- /dev/null
+++ b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.io.{File, IOException}
+
+import com.google.common.io.{ByteStreams, Files}
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.scalatest.{FunSuite, Matchers}
+
+import org.apache.hadoop.yarn.api.records.ApplicationAccessType
+
+import org.apache.spark.{Logging, SecurityManager, SparkConf}
+
+
+class YarnSparkHadoopUtilSuite extends FunSuite with Matchers with Logging {
+
+  val hasBash =
+    try {
+      val exitCode = Runtime.getRuntime().exec(Array("bash", "--version")).waitFor()
+      exitCode == 0
+    } catch {
+      case e: IOException =>
+        false
+    }
+
+  if (!hasBash) {
+    logWarning("Cannot execute bash, skipping bash tests.")
+  }
+
+  def bashTest(name: String)(fn: => Unit) =
+    if (hasBash) test(name)(fn) else ignore(name)(fn)
+
+  bashTest("shell script escaping") {
+    val scriptFile = File.createTempFile("script.", ".sh")
+    val args = Array("arg1", "${arg.2}", "\"arg3\"", "'arg4'", "$arg5", "\\arg6")
+    try {
+      val argLine = args.map(a => YarnSparkHadoopUtil.escapeForShell(a)).mkString(" ")
+      Files.write(("bash -c \"echo " + argLine + "\"").getBytes(), scriptFile)
+      scriptFile.setExecutable(true)
+
+      val proc = Runtime.getRuntime().exec(Array(scriptFile.getAbsolutePath()))
+      val out = new String(ByteStreams.toByteArray(proc.getInputStream())).trim()
+      val err = new String(ByteStreams.toByteArray(proc.getErrorStream()))
+      val exitCode = proc.waitFor()
+      exitCode should be (0)
+      out should be (args.mkString(" "))
+    } finally {
+      scriptFile.delete()
+    }
+  }
+
+  test("Yarn configuration override") {
+    val key = "yarn.nodemanager.hostname"
+    val default = new YarnConfiguration()
+
+    val sparkConf = new SparkConf()
+      .set("spark.hadoop." + key, "someHostName")
+    val yarnConf = new YarnSparkHadoopUtil().newConfiguration(sparkConf)
+
+    yarnConf.getClass() should be (classOf[YarnConfiguration])
+    yarnConf.get(key) should not be default.get(key)
+  }
+
+
+  test("test getApplicationAclsForYarn acls on") {
+
+    // spark acls on, just pick up default user
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.acls.enable", "true")
+
+    val securityMgr = new SecurityManager(sparkConf)
+    val acls = YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr)
+
+    val viewAcls = acls.get(ApplicationAccessType.VIEW_APP)
+    val modifyAcls = acls.get(ApplicationAccessType.MODIFY_APP)
+
+    viewAcls match {
+      case Some(vacls) => {
+        val aclSet = vacls.split(',').map(_.trim).toSet
+        assert(aclSet.contains(System.getProperty("user.name", "invalid")))
+      }
+      case None => {
+        fail()
+      }
+    }
+    modifyAcls match {
+      case Some(macls) => {
+        val aclSet = macls.split(',').map(_.trim).toSet
+        assert(aclSet.contains(System.getProperty("user.name", "invalid")))
+      }
+      case None => {
+        fail()
+      }
+    }
+  }
+
+  test("test getApplicationAclsForYarn acls on and specify users") {
+
+    // default spark acls are on and specify acls
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.acls.enable", "true")
+    sparkConf.set("spark.ui.view.acls", "user1,user2")
+    sparkConf.set("spark.modify.acls", "user3,user4")
+
+    val securityMgr = new SecurityManager(sparkConf)
+    val acls = YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr)
+
+    val viewAcls = acls.get(ApplicationAccessType.VIEW_APP)
+    val modifyAcls = acls.get(ApplicationAccessType.MODIFY_APP)
+
+    viewAcls match {
+      case Some(vacls) => {
+        val aclSet = vacls.split(',').map(_.trim).toSet
+        assert(aclSet.contains("user1"))
+        assert(aclSet.contains("user2"))
+        assert(aclSet.contains(System.getProperty("user.name", "invalid")))
+      }
+      case None => {
+        fail()
+      }
+    }
+    modifyAcls match {
+      case Some(macls) => {
+        val aclSet = macls.split(',').map(_.trim).toSet
+        assert(aclSet.contains("user3"))
+        assert(aclSet.contains("user4"))
+        assert(aclSet.contains(System.getProperty("user.name", "invalid")))
+      }
+      case None => {
+        fail()
+      }
+    }
+
+  }
+}
diff --git a/yarn/pom.xml b/yarn/pom.xml
index ef7066ef1fdfc..2885e6607ec24 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -28,6 +28,9 @@
   <artifactId>yarn-parent_2.10</artifactId>
   <packaging>pom</packaging>
   <name>Spark Project YARN Parent POM</name>
+  <properties>
+    <sbt.project.name>yarn</sbt.project.name>
+  </properties>
 
   <dependencies>
     <dependency>
@@ -70,6 +73,28 @@
   <profiles>
     <profile>
       <id>yarn-alpha</id>
+      <build>
+        <plugins>
+          <plugin>
+            <artifactId>maven-antrun-plugin</artifactId>
+            <executions>
+              <execution>
+                <phase>validate</phase>
+                <goals>
+                  <goal>run</goal>
+                </goals>
+                <configuration>
+                  <tasks>
+                    <echo>*******************************************************************************************</echo>
+                    <echo>***WARNING***: Support for YARN-alpha API's will be removed in Spark 1.3 (see SPARK-3445).*</echo>
+                    <echo>*******************************************************************************************</echo>
+                  </tasks>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
       <modules>
         <module>alpha</module>
       </modules>
@@ -85,6 +110,20 @@
 
   <build>
     <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-deploy-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-install-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.codehaus.mojo</groupId>
         <artifactId>build-helper-maven-plugin</artifactId>
@@ -123,7 +162,6 @@
         <configuration>
           <environmentVariables>
             <SPARK_HOME>${basedir}/../..</SPARK_HOME>
-            <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
           </environmentVariables>
         </configuration>
       </plugin>
@@ -131,7 +169,7 @@
 
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  
+
     <resources>
       <resource>
         <directory>../common/src/main/resources</directory>
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 0931beb505508..fe55d70ccc370 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,13 +20,76 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>yarn-parent_2.10</artifactId>
-    <version>1.1.0-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
+  <properties>
+    <sbt.project.name>yarn-stable</sbt.project.name>
+  </properties>
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-yarn_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project YARN Stable API</name>
 
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-server-tests</artifactId>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <!--
+    See SPARK-3710. hadoop-yarn-server-tests in Hadoop 2.2 fails to pull some needed
+    dependencies, so they need to be added manually for the tests to work.
+  -->
+  <profiles>
+    <profile>
+      <id>hadoop-2.2</id>
+      <properties>
+        <jersey.version>1.9</jersey.version>
+      </properties>
+      <dependencies>
+        <dependency>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty</artifactId>
+          <version>6.1.26</version>
+          <exclusions>
+            <exclusion>
+              <groupId>org.mortbay.jetty</groupId>
+              <artifactId>servlet-api</artifactId>
+            </exclusion>
+          </exclusions>
+          <scope>test</scope>
+        </dependency>
+        <dependency>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-core</artifactId>
+          <version>${jersey.version}</version>
+          <scope>test</scope>
+        </dependency>
+        <dependency>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-json</artifactId>
+          <version>${jersey.version}</version>
+          <scope>test</scope>
+          <exclusions>
+            <exclusion>
+              <groupId>stax</groupId>
+              <artifactId>stax-api</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-server</artifactId>
+          <version>${jersey.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
 </project>
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
deleted file mode 100644
index ee1e9c9c23d22..0000000000000
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ /dev/null
@@ -1,464 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.io.IOException
-import java.util.concurrent.CopyOnWriteArrayList
-import java.util.concurrent.atomic.AtomicReference
-
-import scala.collection.JavaConversions._
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.util.ShutdownHookManager
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.protocolrecords._
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.client.api.AMRMClient
-import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.util.ConverterUtils
-import org.apache.hadoop.yarn.webapp.util.WebAppUtils
-
-import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext}
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.util.{SignalLogger, Utils}
-
-
-/**
- * An application master that runs the user's driver program and allocates executors.
- */
-class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
-                        sparkConf: SparkConf) extends Logging {
-
-  def this(args: ApplicationMasterArguments, sparkConf: SparkConf) =
-    this(args, new Configuration(), sparkConf)
-
-  def this(args: ApplicationMasterArguments) = this(args, new SparkConf())
-
-  private val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
-  private var appAttemptId: ApplicationAttemptId = _
-  private var userThread: Thread = _
-  private val fs = FileSystem.get(yarnConf)
-
-  private var yarnAllocator: YarnAllocationHandler = _
-  private var isFinished: Boolean = false
-  private var uiAddress: String = _
-  private val maxAppAttempts: Int = conf.getInt(
-    YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)
-  private var isLastAMRetry: Boolean = true
-  private var amClient: AMRMClient[ContainerRequest] = _
-
-  // Default to numExecutors * 2, with minimum of 3
-  private val maxNumExecutorFailures = sparkConf.getInt("spark.yarn.max.executor.failures",
-    sparkConf.getInt("spark.yarn.max.worker.failures", math.max(args.numExecutors * 2, 3)))
-
-  private var registered = false
-
-  def run() {
-    // Setup the directories so things go to YARN approved directories rather
-    // than user specified and /tmp.
-    System.setProperty("spark.local.dir", getLocalDirs())
-
-    // Set the web ui port to be ephemeral for yarn so we don't conflict with
-    // other spark processes running on the same box
-    System.setProperty("spark.ui.port", "0")
-
-    // When running the AM, the Spark master is always "yarn-cluster"
-    System.setProperty("spark.master", "yarn-cluster")
-
-    // Use priority 30 as it's higher than HDFS. It's the same priority MapReduce is using.
-    ShutdownHookManager.get().addShutdownHook(new AppMasterShutdownHook(this), 30)
-
-    appAttemptId = ApplicationMaster.getApplicationAttemptId()
-    logInfo("ApplicationAttemptId: " + appAttemptId)
-    isLastAMRetry = appAttemptId.getAttemptId() >= maxAppAttempts
-    amClient = AMRMClient.createAMRMClient()
-    amClient.init(yarnConf)
-    amClient.start()
-
-    // setup AmIpFilter for the SparkUI - do this before we start the UI
-    addAmIpFilter()
-
-    ApplicationMaster.register(this)
-
-    // Call this to force generation of secret so it gets populated into the
-    // Hadoop UGI. This has to happen before the startUserClass which does a
-    // doAs in order for the credentials to be passed on to the executor containers.
-    val securityMgr = new SecurityManager(sparkConf)
-
-    // Start the user's JAR
-    userThread = startUserClass()
-
-    // This a bit hacky, but we need to wait until the spark.driver.port property has
-    // been set by the Thread executing the user class.
-    waitForSparkContextInitialized()
-
-    // Do this after Spark master is up and SparkContext is created so that we can register UI Url.
-    synchronized {
-      if (!isFinished) {
-        registerApplicationMaster()
-        registered = true
-      }
-    }
-
-    // Allocate all containers
-    allocateExecutors()
-
-    // Launch thread that will heartbeat to the RM so it won't think the app has died.
-    launchReporterThread()
-
-    // Wait for the user class to finish
-    userThread.join()
-
-    System.exit(0)
-  }
-
-  // add the yarn amIpFilter that Yarn requires for properly securing the UI
-  private def addAmIpFilter() {
-    val amFilter = "org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter"
-    System.setProperty("spark.ui.filters", amFilter)
-    val proxy = WebAppUtils.getProxyHostAndPort(conf)
-    val parts : Array[String] = proxy.split(":")
-    val uriBase = "http://" + proxy +
-      System.getenv(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV)
-
-    val params = "PROXY_HOST=" + parts(0) + "," + "PROXY_URI_BASE=" + uriBase
-    System.setProperty(
-      "spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.params", params)
-  }
-
-  // Get the Yarn approved local directories.
-  private def getLocalDirs(): String = {
-    // Hadoop 0.23 and 2.x have different Environment variable names for the
-    // local dirs, so lets check both. We assume one of the 2 is set.
-    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
-    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .orElse(Option(System.getenv("LOCAL_DIRS")))
- 
-    localDirs match {
-      case None => throw new Exception("Yarn local dirs can't be empty")
-      case Some(l) => l
-    }
-  }
-
-  private def registerApplicationMaster(): RegisterApplicationMasterResponse = {
-    logInfo("Registering the ApplicationMaster")
-    amClient.registerApplicationMaster(Utils.localHostName(), 0, uiAddress)
-  }
-
-  private def startUserClass(): Thread = {
-    logInfo("Starting the user JAR in a separate Thread")
-    val mainMethod = Class.forName(
-      args.userClass,
-      false,
-      Thread.currentThread.getContextClassLoader).getMethod("main", classOf[Array[String]])
-    val t = new Thread {
-      override def run() {
-        var succeeded = false
-        try {
-          // Copy
-          val mainArgs = new Array[String](args.userArgs.size)
-          args.userArgs.copyToArray(mainArgs, 0, args.userArgs.size)
-          mainMethod.invoke(null, mainArgs)
-          // Some apps have "System.exit(0)" at the end.  The user thread will stop here unless
-          // it has an uncaught exception thrown out.  It needs a shutdown hook to set SUCCEEDED.
-          succeeded = true
-        } finally {
-          logDebug("Finishing main")
-          isLastAMRetry = true
-          if (succeeded) {
-            ApplicationMaster.this.finishApplicationMaster(FinalApplicationStatus.SUCCEEDED)
-          } else {
-            ApplicationMaster.this.finishApplicationMaster(FinalApplicationStatus.FAILED)
-          }
-        }
-      }
-    }
-    t.setName("Driver")
-    t.start()
-    t
-  }
-
-  // This needs to happen before allocateExecutors()
-  private def waitForSparkContextInitialized() {
-    logInfo("Waiting for Spark context initialization")
-    try {
-      var sparkContext: SparkContext = null
-      ApplicationMaster.sparkContextRef.synchronized {
-        var numTries = 0
-        val waitTime = 10000L
-        val maxNumTries = sparkConf.getInt("spark.yarn.applicationMaster.waitTries", 10)
-        while (ApplicationMaster.sparkContextRef.get() == null && numTries < maxNumTries
-            && !isFinished) {
-          logInfo("Waiting for Spark context initialization ... " + numTries)
-          numTries = numTries + 1
-          ApplicationMaster.sparkContextRef.wait(waitTime)
-        }
-        sparkContext = ApplicationMaster.sparkContextRef.get()
-        assert(sparkContext != null || numTries >= maxNumTries)
-
-        if (sparkContext != null) {
-          uiAddress = sparkContext.ui.appUIHostPort
-          this.yarnAllocator = YarnAllocationHandler.newAllocator(
-            yarnConf,
-            amClient,
-            appAttemptId,
-            args,
-            sparkContext.preferredNodeLocationData,
-            sparkContext.getConf)
-        } else {
-          logWarning("Unable to retrieve SparkContext in spite of waiting for %d, maxNumTries = %d".
-            format(numTries * waitTime, maxNumTries))
-          this.yarnAllocator = YarnAllocationHandler.newAllocator(
-            yarnConf,
-            amClient,
-            appAttemptId,
-            args,
-            sparkContext.getConf)
-        }
-      }
-    } finally {
-      // In case of exceptions, etc - ensure that the loop in
-      // ApplicationMaster#sparkContextInitialized() breaks.
-      ApplicationMaster.doneWithInitialAllocations()
-    }
-  }
-
-  private def allocateExecutors() {
-    try {
-      logInfo("Requesting" + args.numExecutors + " executors.")
-      // Wait until all containers have launched
-      yarnAllocator.addResourceRequests(args.numExecutors)
-      yarnAllocator.allocateResources()
-      // Exits the loop if the user thread exits.
-
-      var iters = 0
-      while (yarnAllocator.getNumExecutorsRunning < args.numExecutors && userThread.isAlive) {
-        checkNumExecutorsFailed()
-        allocateMissingExecutor()
-        yarnAllocator.allocateResources()
-        if (iters == ApplicationMaster.ALLOCATOR_LOOP_WAIT_COUNT) {
-          ApplicationMaster.doneWithInitialAllocations()
-        }
-        Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL)
-        iters += 1
-      }
-    } finally {
-      // In case of exceptions, etc - ensure that the loop in
-      // ApplicationMaster#sparkContextInitialized() breaks.
-      ApplicationMaster.doneWithInitialAllocations()
-    }
-    logInfo("All executors have launched.")
-  }
-
-  private def allocateMissingExecutor() {
-    val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning -
-      yarnAllocator.getNumPendingAllocate
-    if (missingExecutorCount > 0) {
-      logInfo("Allocating %d containers to make up for (potentially) lost containers".
-        format(missingExecutorCount))
-      yarnAllocator.addResourceRequests(missingExecutorCount)
-    }
-  }
-
-  private def checkNumExecutorsFailed() {
-    if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
-      finishApplicationMaster(FinalApplicationStatus.FAILED,
-        "max number of executor failures reached")
-    }
-  }
-
-  private def launchReporterThread(): Thread = {
-    // Ensure that progress is sent before YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS elapses.
-    val expiryInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
-
-    // we want to be reasonably responsive without causing too many requests to RM.
-    val schedulerInterval =
-      sparkConf.getLong("spark.yarn.scheduler.heartbeat.interval-ms", 5000)
-
-    // must be <= timeoutInterval / 2.
-    val interval = math.max(0, math.min(expiryInterval / 2, schedulerInterval))
-
-    val t = new Thread {
-      override def run() {
-        while (userThread.isAlive) {
-          checkNumExecutorsFailed()
-          allocateMissingExecutor()
-          logDebug("Sending progress")
-          yarnAllocator.allocateResources()
-          Thread.sleep(interval)
-        }
-      }
-    }
-    // Setting to daemon status, though this is usually not a good idea.
-    t.setDaemon(true)
-    t.start()
-    logInfo("Started progress reporter thread - heartbeat interval : " + interval)
-    t
-  }
-
-  def finishApplicationMaster(status: FinalApplicationStatus, diagnostics: String = "") {
-    synchronized {
-      if (isFinished) {
-        return
-      }
-      isFinished = true
-
-      logInfo("Unregistering ApplicationMaster with " + status)
-      if (registered) {
-        val trackingUrl = sparkConf.get("spark.yarn.historyServer.address", "")
-        amClient.unregisterApplicationMaster(status, diagnostics, trackingUrl)
-      }
-    }
-  }
-
-  /**
-   * Clean up the staging directory.
-   */
-  private def cleanupStagingDir() {
-    var stagingDirPath: Path = null
-    try {
-      val preserveFiles = sparkConf.get("spark.yarn.preserve.staging.files", "false").toBoolean
-      if (!preserveFiles) {
-        stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR"))
-        if (stagingDirPath == null) {
-          logError("Staging directory is null")
-          return
-        }
-        logInfo("Deleting staging directory " + stagingDirPath)
-        fs.delete(stagingDirPath, true)
-      }
-    } catch {
-      case ioe: IOException =>
-        logError("Failed to cleanup staging dir " + stagingDirPath, ioe)
-    }
-  }
-
-  // The shutdown hook that runs when a signal is received AND during normal close of the JVM.
-  class AppMasterShutdownHook(appMaster: ApplicationMaster) extends Runnable {
-
-    def run() {
-      logInfo("AppMaster received a signal.")
-      // We need to clean up staging dir before HDFS is shut down
-      // make sure we don't delete it until this is the last AM
-      if (appMaster.isLastAMRetry) appMaster.cleanupStagingDir()
-    }
-  }
-
-}
-
-object ApplicationMaster extends Logging {
-  // Number of times to wait for the allocator loop to complete.
-  // Each loop iteration waits for 100ms, so maximum of 3 seconds.
-  // This is to ensure that we have reasonable number of containers before we start
-  // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
-  // optimal as more containers are available. Might need to handle this better.
-  private val ALLOCATOR_LOOP_WAIT_COUNT = 30
-  private val ALLOCATE_HEARTBEAT_INTERVAL = 100
-
-  private val applicationMasters = new CopyOnWriteArrayList[ApplicationMaster]()
-
-  val sparkContextRef: AtomicReference[SparkContext] =
-    new AtomicReference[SparkContext](null)
-
-  // Variable used to notify the YarnClusterScheduler that it should stop waiting
-  // for the initial set of executors to be started and get on with its business.
-  val doneWithInitialAllocationsMonitor = new Object()
-
-  @volatile var isDoneWithInitialAllocations = false
-
-  def doneWithInitialAllocations() {
-    isDoneWithInitialAllocations = true
-    doneWithInitialAllocationsMonitor.synchronized {
-      // to wake threads off wait ...
-      doneWithInitialAllocationsMonitor.notifyAll()
-    }
-  }
-
-  def register(master: ApplicationMaster) {
-    applicationMasters.add(master)
-  }
-
-  /**
-   * Called from YarnClusterScheduler to notify the AM code that a SparkContext has been
-   * initialized in the user code.
-   */
-  def sparkContextInitialized(sc: SparkContext): Boolean = {
-    var modified = false
-    sparkContextRef.synchronized {
-      modified = sparkContextRef.compareAndSet(null, sc)
-      sparkContextRef.notifyAll()
-    }
-
-    // Add a shutdown hook - as a best effort in case users do not call sc.stop or do
-    // System.exit.
-    // Should not really have to do this, but it helps YARN to evict resources earlier.
-    // Not to mention, prevent the Client from declaring failure even though we exited properly.
-    // Note that this will unfortunately not properly clean up the staging files because it gets
-    // called too late, after the filesystem is already shutdown.
-    if (modified) {
-      Runtime.getRuntime().addShutdownHook(new Thread with Logging {
-        // This is not only logs, but also ensures that log system is initialized for this instance
-        // when we are actually 'run'-ing.
-        logInfo("Adding shutdown hook for context " + sc)
-
-        override def run() {
-          logInfo("Invoking sc stop from shutdown hook")
-          sc.stop()
-          // Best case ...
-          for (master <- applicationMasters) {
-            master.finishApplicationMaster(FinalApplicationStatus.SUCCEEDED)
-          }
-        }
-      })
-    }
-
-    // Wait for initialization to complete and at least 'some' nodes to get allocated.
-    modified
-  }
-
-  /**
-   * Returns when we've either
-   *  1) received all the requested executors,
-   *  2) waited ALLOCATOR_LOOP_WAIT_COUNT * ALLOCATE_HEARTBEAT_INTERVAL ms,
-   *  3) hit an error that causes us to terminate trying to get containers.
-   */
-  def waitForInitialAllocations() {
-    doneWithInitialAllocationsMonitor.synchronized {
-      while (!isDoneWithInitialAllocations) {
-        doneWithInitialAllocationsMonitor.wait(1000L)
-      }
-    }
-  }
-
-  def getApplicationAttemptId(): ApplicationAttemptId = {
-    val containerIdString = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name())
-    val containerId = ConverterUtils.toContainerId(containerIdString)
-    val appAttemptId = containerId.getApplicationAttemptId()
-    appAttemptId
-  }
-
-  def main(argStrings: Array[String]) {
-    SignalLogger.register(log)
-    val args = new ApplicationMasterArguments(argStrings)
-    SparkHadoopUtil.get.runAsSparkUser { () =>
-      new ApplicationMaster(args).run()
-    }
-  }
-}
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 15f3c4f180ea3..addaddb711d3c 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -21,159 +21,109 @@ import java.nio.ByteBuffer
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.DataOutputBuffer
-import org.apache.hadoop.yarn.api.protocolrecords._
 import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.client.api.YarnClient
+import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication}
 import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.Records
 
 import org.apache.spark.{Logging, SparkConf}
-
+import org.apache.spark.deploy.SparkHadoopUtil
 
 /**
  * Version of [[org.apache.spark.deploy.yarn.ClientBase]] tailored to YARN's stable API.
  */
-class Client(clientArgs: ClientArguments, hadoopConf: Configuration, spConf: SparkConf)
+private[spark] class Client(
+    val args: ClientArguments,
+    val hadoopConf: Configuration,
+    val sparkConf: SparkConf)
   extends ClientBase with Logging {
 
-  val yarnClient = YarnClient.createYarnClient
-
   def this(clientArgs: ClientArguments, spConf: SparkConf) =
-    this(clientArgs, new Configuration(), spConf)
+    this(clientArgs, SparkHadoopUtil.get.newConfiguration(spConf), spConf)
 
   def this(clientArgs: ClientArguments) = this(clientArgs, new SparkConf())
 
-  val args = clientArgs
-  val conf = hadoopConf
-  val sparkConf = spConf
-  var rpc: YarnRPC = YarnRPC.create(conf)
-  val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
-
-  def runApp(): ApplicationId = {
-    validateArgs()
-    // Initialize and start the client service.
+  val yarnClient = YarnClient.createYarnClient
+  val yarnConf = new YarnConfiguration(hadoopConf)
+
+  def stop(): Unit = yarnClient.stop()
+
+  /* ------------------------------------------------------------------------------------- *
+   | The following methods have much in common in the stable and alpha versions of Client, |
+   | but cannot be implemented in the parent trait due to subtle API differences across    |
+   | hadoop versions.                                                                      |
+   * ------------------------------------------------------------------------------------- */
+
+  /**
+   * Submit an application running our ApplicationMaster to the ResourceManager.
+   *
+   * The stable Yarn API provides a convenience method (YarnClient#createApplication) for
+   * creating applications and setting up the application submission context. This was not
+   * available in the alpha API.
+   */
+  override def submitApplication(): ApplicationId = {
     yarnClient.init(yarnConf)
     yarnClient.start()
 
-    // Log details about this YARN cluster (e.g, the number of slave machines/NodeManagers).
-    logClusterResourceDetails()
-
-    // Prepare to submit a request to the ResourcManager (specifically its ApplicationsManager (ASM)
-    // interface).
+    logInfo("Requesting a new application from cluster with %d NodeManagers"
+      .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers))
 
-    // Get a new client application.
+    // Get a new application from our RM
     val newApp = yarnClient.createApplication()
     val newAppResponse = newApp.getNewApplicationResponse()
     val appId = newAppResponse.getApplicationId()
 
+    // Verify whether the cluster has enough resources for our AM
     verifyClusterResources(newAppResponse)
 
-    // Set up resource and environment variables.
-    val appStagingDir = getAppStagingDir(appId)
-    val localResources = prepareLocalResources(appStagingDir)
-    val launchEnv = setupLaunchEnv(localResources, appStagingDir)
-    val amContainer = createContainerLaunchContext(newAppResponse, localResources, launchEnv)
+    // Set up the appropriate contexts to launch our AM
+    val containerContext = createContainerLaunchContext(newAppResponse)
+    val appContext = createApplicationSubmissionContext(newApp, containerContext)
 
-    // Set up an application submission context.
-    val appContext = newApp.getApplicationSubmissionContext()
-    appContext.setApplicationName(args.appName)
-    appContext.setQueue(args.amQueue)
-    appContext.setAMContainerSpec(amContainer)
-    appContext.setApplicationType("SPARK")
-
-    // Memory for the ApplicationMaster.
-    val memoryResource = Records.newRecord(classOf[Resource]).asInstanceOf[Resource]
-    memoryResource.setMemory(args.amMemory + memoryOverhead)
-    appContext.setResource(memoryResource)
-
-    // Finally, submit and monitor the application.
-    submitApp(appContext)
+    // Finally, submit and monitor the application
+    logInfo(s"Submitting application ${appId.getId} to ResourceManager")
+    yarnClient.submitApplication(appContext)
     appId
   }
 
-  def run() {
-    val appId = runApp()
-    monitorApplication(appId)
-  }
-
-  def logClusterResourceDetails() {
-    val clusterMetrics: YarnClusterMetrics = yarnClient.getYarnClusterMetrics
-    logInfo("Got Cluster metric info from ResourceManager, number of NodeManagers: " +
-      clusterMetrics.getNumNodeManagers)
-
-    val queueInfo: QueueInfo = yarnClient.getQueueInfo(args.amQueue)
-    logInfo( """Queue info ... queueName: %s, queueCurrentCapacity: %s, queueMaxCapacity: %s,
-      queueApplicationCount = %s, queueChildQueueCount = %s""".format(
-        queueInfo.getQueueName,
-        queueInfo.getCurrentCapacity,
-        queueInfo.getMaximumCapacity,
-        queueInfo.getApplications.size,
-        queueInfo.getChildQueues.size))
-  }
-
-  def calculateAMMemory(newApp: GetNewApplicationResponse) :Int = {
-    // TODO: Need a replacement for the following code to fix -Xmx?
-    // val minResMemory: Int = newApp.getMinimumResourceCapability().getMemory()
-    // var amMemory = ((args.amMemory / minResMemory) * minResMemory) +
-    //  ((if ((args.amMemory % minResMemory) == 0) 0 else minResMemory) -
-    //    memoryOverhead )
-    args.amMemory
+  /**
+   * Set up the context for submitting our ApplicationMaster.
+   * This uses the YarnClientApplication not available in the Yarn alpha API.
+   */
+  def createApplicationSubmissionContext(
+      newApp: YarnClientApplication,
+      containerContext: ContainerLaunchContext): ApplicationSubmissionContext = {
+    val appContext = newApp.getApplicationSubmissionContext
+    appContext.setApplicationName(args.appName)
+    appContext.setQueue(args.amQueue)
+    appContext.setAMContainerSpec(containerContext)
+    appContext.setApplicationType("SPARK")
+    val capability = Records.newRecord(classOf[Resource])
+    capability.setMemory(args.amMemory + amMemoryOverhead)
+    appContext.setResource(capability)
+    appContext
   }
 
-  def setupSecurityToken(amContainer: ContainerLaunchContext) = {
-    // Setup security tokens.
-    val dob = new DataOutputBuffer()
+  /** Set up security tokens for launching our ApplicationMaster container. */
+  override def setupSecurityToken(amContainer: ContainerLaunchContext): Unit = {
+    val dob = new DataOutputBuffer
     credentials.writeTokenStorageToStream(dob)
-    amContainer.setTokens(ByteBuffer.wrap(dob.getData()))
+    amContainer.setTokens(ByteBuffer.wrap(dob.getData))
   }
 
-  def submitApp(appContext: ApplicationSubmissionContext) = {
-    // Submit the application to the applications manager.
-    logInfo("Submitting application to ResourceManager")
-    yarnClient.submitApplication(appContext)
-  }
+  /** Get the application report from the ResourceManager for an application we have submitted. */
+  override def getApplicationReport(appId: ApplicationId): ApplicationReport =
+    yarnClient.getApplicationReport(appId)
 
-  def getApplicationReport(appId: ApplicationId) =
-      yarnClient.getApplicationReport(appId)
-
-  def stop = yarnClient.stop
-
-  def monitorApplication(appId: ApplicationId): Boolean = {
-    val interval = sparkConf.getLong("spark.yarn.report.interval", 1000)
-
-    while (true) {
-      Thread.sleep(interval)
-      val report = yarnClient.getApplicationReport(appId)
-
-      logInfo("Application report from ResourceManager: \n" +
-        "\t application identifier: " + appId.toString() + "\n" +
-        "\t appId: " + appId.getId() + "\n" +
-        "\t clientToAMToken: " + report.getClientToAMToken() + "\n" +
-        "\t appDiagnostics: " + report.getDiagnostics() + "\n" +
-        "\t appMasterHost: " + report.getHost() + "\n" +
-        "\t appQueue: " + report.getQueue() + "\n" +
-        "\t appMasterRpcPort: " + report.getRpcPort() + "\n" +
-        "\t appStartTime: " + report.getStartTime() + "\n" +
-        "\t yarnAppState: " + report.getYarnApplicationState() + "\n" +
-        "\t distributedFinalState: " + report.getFinalApplicationStatus() + "\n" +
-        "\t appTrackingUrl: " + report.getTrackingUrl() + "\n" +
-        "\t appUser: " + report.getUser()
-      )
-
-      val state = report.getYarnApplicationState()
-      if (state == YarnApplicationState.FINISHED ||
-        state == YarnApplicationState.FAILED ||
-        state == YarnApplicationState.KILLED) {
-        return true
-      }
-    }
-    true
-  }
+  /**
+   * Return the security token used by this client to communicate with the ApplicationMaster.
+   * If no security is enabled, the token returned by the report is null.
+   */
+  override def getClientToken(report: ApplicationReport): String =
+    Option(report.getClientToAMToken).map(_.toString).getOrElse("")
 }
 
 object Client {
-
   def main(argStrings: Array[String]) {
     if (!sys.props.contains("SPARK_SUBMIT")) {
       println("WARNING: This client is deprecated and will be removed in a " +
@@ -181,22 +131,11 @@ object Client {
     }
 
     // Set an env variable indicating we are running in YARN mode.
-    // Note: anything env variable with SPARK_ prefix gets propagated to all (remote) processes -
-    // see Client#setupLaunchEnv().
+    // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes
     System.setProperty("SPARK_YARN_MODE", "true")
-    val sparkConf = new SparkConf()
-
-    try {
-      val args = new ClientArguments(argStrings, sparkConf)
-      new Client(args, sparkConf).run()
-    } catch {
-      case e: Exception => {
-        Console.err.println(e.getMessage)
-        System.exit(1)
-      }
-    }
+    val sparkConf = new SparkConf
 
-    System.exit(0)
+    val args = new ClientArguments(argStrings, sparkConf)
+    new Client(args, sparkConf).run()
   }
-
 }
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
deleted file mode 100644
index f71ad036ce0f2..0000000000000
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.net.Socket
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.net.NetUtils
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.api.protocolrecords._
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
-import akka.actor._
-import akka.remote._
-import akka.actor.Terminated
-import org.apache.spark.{Logging, SecurityManager, SparkConf}
-import org.apache.spark.util.{Utils, AkkaUtils}
-import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
-import org.apache.spark.scheduler.SplitInfo
-import org.apache.hadoop.yarn.client.api.AMRMClient
-import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
-import org.apache.spark.deploy.SparkHadoopUtil
-
-/**
- * An application master that allocates executors on behalf of a driver that is running outside
- * the cluster.
- *
- * This is used only in yarn-client mode.
- */
-class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sparkConf: SparkConf)
-  extends Logging {
-
-  def this(args: ApplicationMasterArguments, sparkConf: SparkConf) =
-    this(args, new Configuration(), sparkConf)
-
-  def this(args: ApplicationMasterArguments) = this(args, new SparkConf())
-
-  private var appAttemptId: ApplicationAttemptId = _
-  private var reporterThread: Thread = _
-  private val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
-
-  private var yarnAllocator: YarnAllocationHandler = _
-  private var driverClosed:Boolean = false
-
-  private var amClient: AMRMClient[ContainerRequest] = _
-
-  val securityManager = new SecurityManager(sparkConf)
-  val actorSystem: ActorSystem = AkkaUtils.createActorSystem("sparkYarnAM", Utils.localHostName, 0,
-    conf = sparkConf, securityManager = securityManager)._1
-  var actor: ActorRef = _
-
-  // This actor just working as a monitor to watch on Driver Actor.
-  class MonitorActor(driverUrl: String) extends Actor {
-
-    var driver: ActorSelection = _
-
-    override def preStart() {
-      logInfo("Listen to driver: " + driverUrl)
-      driver = context.actorSelection(driverUrl)
-      // Send a hello message to establish the connection, after which
-      // we can monitor Lifecycle Events.
-      driver ! "Hello"
-      context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
-    }
-
-    override def receive = {
-      case x: DisassociatedEvent =>
-        logInfo(s"Driver terminated or disconnected! Shutting down. $x")
-        driverClosed = true
-    }
-  }
-
-  def run() {
-
-    // Setup the directories so things go to yarn approved directories rather
-    // then user specified and /tmp.
-    System.setProperty("spark.local.dir", getLocalDirs())
-
-    amClient = AMRMClient.createAMRMClient()
-    amClient.init(yarnConf)
-    amClient.start()
-
-    appAttemptId = ApplicationMaster.getApplicationAttemptId()
-    registerApplicationMaster()
-
-    waitForSparkMaster()
-
-    // Allocate all containers
-    allocateExecutors()
-
-    // Launch a progress reporter thread, else app will get killed after expiration
-    // (def: 10mins) timeout ensure that progress is sent before
-    // YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS elapse.
-
-    val timeoutInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
-    // we want to be reasonably responsive without causing too many requests to RM.
-    val schedulerInterval =
-      System.getProperty("spark.yarn.scheduler.heartbeat.interval-ms", "5000").toLong
-    // must be <= timeoutInterval / 2.
-    val interval = math.min(timeoutInterval / 2, schedulerInterval)
-
-    reporterThread = launchReporterThread(interval)
-
-
-    // Wait for the reporter thread to Finish.
-    reporterThread.join()
-
-    finishApplicationMaster(FinalApplicationStatus.SUCCEEDED)
-    actorSystem.shutdown()
-
-    logInfo("Exited")
-    System.exit(0)
-  }
-
-  /** Get the Yarn approved local directories. */
-  private def getLocalDirs(): String = {
-    // Hadoop 0.23 and 2.x have different Environment variable names for the
-    // local dirs, so lets check both. We assume one of the 2 is set.
-    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
-    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .orElse(Option(System.getenv("LOCAL_DIRS")))
-
-    localDirs match {
-      case None => throw new Exception("Yarn Local dirs can't be empty")
-      case Some(l) => l
-    }
-  }
-
-  private def registerApplicationMaster(): RegisterApplicationMasterResponse = {
-    logInfo("Registering the ApplicationMaster")
-    // TODO: Find out client's Spark UI address and fill in here?
-    amClient.registerApplicationMaster(Utils.localHostName(), 0, "")
-  }
-
-  private def waitForSparkMaster() {
-    logInfo("Waiting for Spark driver to be reachable.")
-    var driverUp = false
-    val hostport = args.userArgs(0)
-    val (driverHost, driverPort) = Utils.parseHostPort(hostport)
-    while(!driverUp) {
-      try {
-        val socket = new Socket(driverHost, driverPort)
-        socket.close()
-        logInfo("Driver now available: %s:%s".format(driverHost, driverPort))
-        driverUp = true
-      } catch {
-        case e: Exception =>
-          logError("Failed to connect to driver at %s:%s, retrying ...".
-            format(driverHost, driverPort))
-        Thread.sleep(100)
-      }
-    }
-    sparkConf.set("spark.driver.host", driverHost)
-    sparkConf.set("spark.driver.port", driverPort.toString)
-
-    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
-      driverHost, driverPort.toString, CoarseGrainedSchedulerBackend.ACTOR_NAME)
-
-    actor = actorSystem.actorOf(Props(new MonitorActor(driverUrl)), name = "YarnAM")
-  }
-
-
-  private def allocateExecutors() {
-    // TODO: should get preferredNodeLocationData from SparkContext, just fake a empty one for now.
-    val preferredNodeLocationData: scala.collection.Map[String, scala.collection.Set[SplitInfo]] =
-      scala.collection.immutable.Map()
-
-    yarnAllocator = YarnAllocationHandler.newAllocator(
-      yarnConf,
-      amClient,
-      appAttemptId,
-      args,
-      preferredNodeLocationData,
-      sparkConf)
-
-    logInfo("Requesting " + args.numExecutors + " executors.")
-    // Wait until all containers have launched
-    yarnAllocator.addResourceRequests(args.numExecutors)
-    yarnAllocator.allocateResources()
-    while ((yarnAllocator.getNumExecutorsRunning < args.numExecutors) && (!driverClosed)) {
-      allocateMissingExecutor()
-      yarnAllocator.allocateResources()
-      Thread.sleep(100)
-    }
-
-    logInfo("All executors have launched.")
-  }
-
-  private def allocateMissingExecutor() {
-    val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning -
-      yarnAllocator.getNumPendingAllocate
-    if (missingExecutorCount > 0) {
-      logInfo("Allocating %d containers to make up for (potentially) lost containers".
-        format(missingExecutorCount))
-      yarnAllocator.addResourceRequests(missingExecutorCount)
-    }
-  }
-
-  private def launchReporterThread(_sleepTime: Long): Thread = {
-    val sleepTime = if (_sleepTime <= 0) 0 else _sleepTime
-
-    val t = new Thread {
-      override def run() {
-        while (!driverClosed) {
-          allocateMissingExecutor()
-          logDebug("Sending progress")
-          yarnAllocator.allocateResources()
-          Thread.sleep(sleepTime)
-        }
-      }
-    }
-    // setting to daemon status, though this is usually not a good idea.
-    t.setDaemon(true)
-    t.start()
-    logInfo("Started progress reporter thread - sleep time : " + sleepTime)
-    t
-  }
-
-  def finishApplicationMaster(status: FinalApplicationStatus) {
-    logInfo("Unregistering ApplicationMaster with " + status)
-    val trackingUrl = sparkConf.get("spark.yarn.historyServer.address", "")
-    amClient.unregisterApplicationMaster(status, "" /* appMessage */ , trackingUrl)
-  }
-
-}
-
-object ExecutorLauncher {
-  def main(argStrings: Array[String]) {
-    val args = new ApplicationMasterArguments(argStrings)
-    SparkHadoopUtil.get.runAsSparkUser { () =>
-      new ExecutorLauncher(args).run()
-    }
-  }
-}
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 07ba0a4b30bd7..fdd3c2300fa78 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -35,7 +35,8 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{Apps, ConverterUtils, Records}
 
-import org.apache.spark.{SparkConf, Logging}
+import org.apache.spark.{SecurityManager, SparkConf, Logging}
+import org.apache.spark.network.util.JavaUtils
 
 
 class ExecutorRunnable(
@@ -46,7 +47,9 @@ class ExecutorRunnable(
     slaveId: String,
     hostname: String,
     executorMemory: Int,
-    executorCores: Int)
+    executorCores: Int,
+    appId: String,
+    securityMgr: SecurityManager)
   extends Runnable with ExecutorRunnableUtil with Logging {
 
   var rpc: YarnRPC = YarnRPC.create(conf)
@@ -79,12 +82,30 @@ class ExecutorRunnable(
     ctx.setTokens(ByteBuffer.wrap(dob.getData()))
 
     val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores,
-      localResources)
+      appId, localResources)
 
     logInfo(s"Setting up executor with environment: $env")
     logInfo("Setting up executor with commands: " + commands)
     ctx.setCommands(commands)
 
+    ctx.setApplicationACLs(YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr))
+
+    // If external shuffle service is enabled, register with the Yarn shuffle service already
+    // started on the NodeManager and, if authentication is enabled, provide it with our secret
+    // key for fetching shuffle files later
+    if (sparkConf.getBoolean("spark.shuffle.service.enabled", false)) {
+      val secretString = securityMgr.getSecretKey()
+      val secretBytes =
+        if (secretString != null) {
+          // This conversion must match how the YarnShuffleService decodes our secret
+          JavaUtils.stringToBytes(secretString)
+        } else {
+          // Authentication is not enabled, so just provide dummy metadata
+          ByteBuffer.allocate(0)
+        }
+      ctx.setServiceData(Map[String, ByteBuffer]("spark_shuffle" -> secretBytes))
+    }
+
     // Send the start request to the ContainerManager
     nmClient.startContainer(container, ctx)
   }
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index 29ccec2adcac3..2bbf5d7db8668 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -17,374 +17,47 @@
 
 package org.apache.spark.deploy.yarn
 
-import java.lang.{Boolean => JBoolean}
-import java.util.{Collections, Set => JSet}
-import java.util.concurrent.{CopyOnWriteArrayList, ConcurrentHashMap}
-import java.util.concurrent.atomic.AtomicInteger
-
-import scala.collection
 import scala.collection.JavaConversions._
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+import scala.collection.mutable.{ArrayBuffer, HashMap}
 
-import org.apache.spark.{Logging, SparkConf}
-import org.apache.spark.scheduler.{SplitInfo,TaskSchedulerImpl}
-import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
-import org.apache.spark.util.Utils
+import org.apache.spark.{SecurityManager, SparkConf} 
+import org.apache.spark.scheduler.SplitInfo
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.yarn.api.ApplicationMasterProtocol
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId
-import org.apache.hadoop.yarn.api.records.{Container, ContainerId, ContainerStatus}
-import org.apache.hadoop.yarn.api.records.{Priority, Resource, ResourceRequest}
-import org.apache.hadoop.yarn.api.protocolrecords.{AllocateRequest, AllocateResponse}
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse
 import org.apache.hadoop.yarn.client.api.AMRMClient
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
-import org.apache.hadoop.yarn.util.{RackResolver, Records}
-
-
-object AllocationType extends Enumeration {
-  type AllocationType = Value
-  val HOST, RACK, ANY = Value
-}
-
-// TODO:
-// Too many params.
-// Needs to be mt-safe
-// Need to refactor this to make it 'cleaner' ... right now, all computation is reactive - should
-// make it more proactive and decoupled.
-
-// Note that right now, we assume all node asks as uniform in terms of capabilities and priority
-// Refer to http://developer.yahoo.com/blogs/hadoop/posts/2011/03/mapreduce-nextgen-scheduler/ for
-// more info on how we are requesting for containers.
+import org.apache.hadoop.yarn.util.Records
 
 /**
  * Acquires resources for executors from a ResourceManager and launches executors in new containers.
  */
 private[yarn] class YarnAllocationHandler(
-    val conf: Configuration,
-    val amClient: AMRMClient[ContainerRequest],
-    val appAttemptId: ApplicationAttemptId,
-    val maxExecutors: Int,
-    val executorMemory: Int,
-    val executorCores: Int,
-    val preferredHostToCount: Map[String, Int], 
-    val preferredRackToCount: Map[String, Int],
-    val sparkConf: SparkConf)
-  extends Logging {
-  // These three are locked on allocatedHostToContainersMap. Complementary data structures
-  // allocatedHostToContainersMap : containers which are running : host, Set<containerid>
-  // allocatedContainerToHostMap: container to host mapping.
-  private val allocatedHostToContainersMap =
-    new HashMap[String, collection.mutable.Set[ContainerId]]()
-
-  private val allocatedContainerToHostMap = new HashMap[ContainerId, String]()
-
-  // allocatedRackCount is populated ONLY if allocation happens (or decremented if this is an
-  // allocated node)
-  // As with the two data structures above, tightly coupled with them, and to be locked on
-  // allocatedHostToContainersMap
-  private val allocatedRackCount = new HashMap[String, Int]()
-
-  // Containers which have been released.
-  private val releasedContainerList = new CopyOnWriteArrayList[ContainerId]()
-  // Containers to be released in next request to RM
-  private val pendingReleaseContainers = new ConcurrentHashMap[ContainerId, Boolean]
-
-  // Additional memory overhead - in mb.
-  private def memoryOverhead: Int = sparkConf.getInt("spark.yarn.executor.memoryOverhead",
-    YarnAllocationHandler.MEMORY_OVERHEAD)
-
-  // Number of container requests that have been sent to, but not yet allocated by the
-  // ApplicationMaster.
-  private val numPendingAllocate = new AtomicInteger()
-  private val numExecutorsRunning = new AtomicInteger()
-  // Used to generate a unique id per executor
-  private val executorIdCounter = new AtomicInteger()
-  private val lastResponseId = new AtomicInteger()
-  private val numExecutorsFailed = new AtomicInteger()
-
-  def getNumPendingAllocate: Int = numPendingAllocate.intValue
-
-  def getNumExecutorsRunning: Int = numExecutorsRunning.intValue
-
-  def getNumExecutorsFailed: Int = numExecutorsFailed.intValue
+    conf: Configuration,
+    sparkConf: SparkConf,
+    amClient: AMRMClient[ContainerRequest],
+    appAttemptId: ApplicationAttemptId,
+    args: ApplicationMasterArguments,
+    preferredNodes: collection.Map[String, collection.Set[SplitInfo]], 
+    securityMgr: SecurityManager)
+  extends YarnAllocator(conf, sparkConf, appAttemptId, args, preferredNodes, securityMgr) {
 
-  def isResourceConstraintSatisfied(container: Container): Boolean = {
-    container.getResource.getMemory >= (executorMemory + memoryOverhead)
+  override protected def releaseContainer(container: Container) = {
+    amClient.releaseAssignedContainer(container.getId())
   }
 
-  def releaseContainer(container: Container) {
-    val containerId = container.getId
-    pendingReleaseContainers.put(containerId, true)
-    amClient.releaseAssignedContainer(containerId)
-  }
+  // pending isn't used on stable as the AMRMClient handles incremental asks
+  override protected def allocateContainers(count: Int, pending: Int): YarnAllocateResponse = {
+    addResourceRequests(count)
 
-  def allocateResources() {
     // We have already set the container request. Poll the ResourceManager for a response.
     // This doubles as a heartbeat if there are no pending container requests.
     val progressIndicator = 0.1f
-    val allocateResponse = amClient.allocate(progressIndicator)
-
-    val allocatedContainers = allocateResponse.getAllocatedContainers()
-    if (allocatedContainers.size > 0) {
-      var numPendingAllocateNow = numPendingAllocate.addAndGet(-1 * allocatedContainers.size)
-
-      if (numPendingAllocateNow < 0) {
-        numPendingAllocateNow = numPendingAllocate.addAndGet(-1 * numPendingAllocateNow)
-      }
-
-      logDebug("""
-        Allocated containers: %d
-        Current executor count: %d
-        Containers released: %s
-        Containers to-be-released: %s
-        Cluster resources: %s
-        """.format(
-          allocatedContainers.size,
-          numExecutorsRunning.get(),
-          releasedContainerList,
-          pendingReleaseContainers,
-          allocateResponse.getAvailableResources))
-
-      val hostToContainers = new HashMap[String, ArrayBuffer[Container]]()
-
-      for (container <- allocatedContainers) {
-        if (isResourceConstraintSatisfied(container)) {
-          // Add the accepted `container` to the host's list of already accepted,
-          // allocated containers
-          val host = container.getNodeId.getHost
-          val containersForHost = hostToContainers.getOrElseUpdate(host,
-            new ArrayBuffer[Container]())
-          containersForHost += container
-        } else {
-          // Release container, since it doesn't satisfy resource constraints.
-          releaseContainer(container)
-        }
-      }
-
-       // Find the appropriate containers to use.
-      // TODO: Cleanup this group-by...
-      val dataLocalContainers = new HashMap[String, ArrayBuffer[Container]]()
-      val rackLocalContainers = new HashMap[String, ArrayBuffer[Container]]()
-      val offRackContainers = new HashMap[String, ArrayBuffer[Container]]()
-
-      for (candidateHost <- hostToContainers.keySet) {
-        val maxExpectedHostCount = preferredHostToCount.getOrElse(candidateHost, 0)
-        val requiredHostCount = maxExpectedHostCount - allocatedContainersOnHost(candidateHost)
-
-        val remainingContainersOpt = hostToContainers.get(candidateHost)
-        assert(remainingContainersOpt.isDefined)
-        var remainingContainers = remainingContainersOpt.get
-
-        if (requiredHostCount >= remainingContainers.size) {
-          // Since we have <= required containers, add all remaining containers to
-          // `dataLocalContainers`.
-          dataLocalContainers.put(candidateHost, remainingContainers)
-          // There are no more free containers remaining.
-          remainingContainers = null
-        } else if (requiredHostCount > 0) {
-          // Container list has more containers than we need for data locality.
-          // Split the list into two: one based on the data local container count,
-          // (`remainingContainers.size` - `requiredHostCount`), and the other to hold remaining
-          // containers.
-          val (dataLocal, remaining) = remainingContainers.splitAt(
-            remainingContainers.size - requiredHostCount)
-          dataLocalContainers.put(candidateHost, dataLocal)
-
-          // Invariant: remainingContainers == remaining
-
-          // YARN has a nasty habit of allocating a ton of containers on a host - discourage this.
-          // Add each container in `remaining` to list of containers to release. If we have an
-          // insufficient number of containers, then the next allocation cycle will reallocate
-          // (but won't treat it as data local).
-          // TODO(harvey): Rephrase this comment some more.
-          for (container <- remaining) releaseContainer(container)
-          remainingContainers = null
-        }
-
-        // For rack local containers
-        if (remainingContainers != null) {
-          val rack = YarnAllocationHandler.lookupRack(conf, candidateHost)
-          if (rack != null) {
-            val maxExpectedRackCount = preferredRackToCount.getOrElse(rack, 0)
-            val requiredRackCount = maxExpectedRackCount - allocatedContainersOnRack(rack) -
-              rackLocalContainers.getOrElse(rack, List()).size
-
-            if (requiredRackCount >= remainingContainers.size) {
-              // Add all remaining containers to to `dataLocalContainers`.
-              dataLocalContainers.put(rack, remainingContainers)
-              remainingContainers = null
-            } else if (requiredRackCount > 0) {
-              // Container list has more containers that we need for data locality.
-              // Split the list into two: one based on the data local container count,
-              // (`remainingContainers.size` - `requiredHostCount`), and the other to hold remaining
-              // containers.
-              val (rackLocal, remaining) = remainingContainers.splitAt(
-                remainingContainers.size - requiredRackCount)
-              val existingRackLocal = rackLocalContainers.getOrElseUpdate(rack,
-                new ArrayBuffer[Container]())
-
-              existingRackLocal ++= rackLocal
-
-              remainingContainers = remaining
-            }
-          }
-        }
-
-        if (remainingContainers != null) {
-          // Not all containers have been consumed - add them to the list of off-rack containers.
-          offRackContainers.put(candidateHost, remainingContainers)
-        }
-      }
-
-      // Now that we have split the containers into various groups, go through them in order:
-      // first host-local, then rack-local, and finally off-rack.
-      // Note that the list we create below tries to ensure that not all containers end up within
-      // a host if there is a sufficiently large number of hosts/containers.
-      val allocatedContainersToProcess = new ArrayBuffer[Container](allocatedContainers.size)
-      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(dataLocalContainers)
-      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(rackLocalContainers)
-      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(offRackContainers)
-
-      // Run each of the allocated containers.
-      for (container <- allocatedContainersToProcess) {
-        val numExecutorsRunningNow = numExecutorsRunning.incrementAndGet()
-        val executorHostname = container.getNodeId.getHost
-        val containerId = container.getId
-
-        val executorMemoryOverhead = (executorMemory + memoryOverhead)
-        assert(container.getResource.getMemory >= executorMemoryOverhead)
-
-        if (numExecutorsRunningNow > maxExecutors) {
-          logInfo("""Ignoring container %s at host %s, since we already have the required number of
-            containers for it.""".format(containerId, executorHostname))
-          releaseContainer(container)
-          numExecutorsRunning.decrementAndGet()
-        } else {
-          val executorId = executorIdCounter.incrementAndGet().toString
-          val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
-            sparkConf.get("spark.driver.host"),
-            sparkConf.get("spark.driver.port"),
-            CoarseGrainedSchedulerBackend.ACTOR_NAME)
-
-          logInfo("Launching container %s for on host %s".format(containerId, executorHostname))
-
-          // To be safe, remove the container from `pendingReleaseContainers`.
-          pendingReleaseContainers.remove(containerId)
-
-          val rack = YarnAllocationHandler.lookupRack(conf, executorHostname)
-          allocatedHostToContainersMap.synchronized {
-            val containerSet = allocatedHostToContainersMap.getOrElseUpdate(executorHostname,
-              new HashSet[ContainerId]())
-
-            containerSet += containerId
-            allocatedContainerToHostMap.put(containerId, executorHostname)
-
-            if (rack != null) {
-              allocatedRackCount.put(rack, allocatedRackCount.getOrElse(rack, 0) + 1)
-            }
-          }
-          logInfo("Launching ExecutorRunnable. driverUrl: %s,  executorHostname: %s".format(
-            driverUrl, executorHostname))
-          val executorRunnable = new ExecutorRunnable(
-            container,
-            conf,
-            sparkConf,
-            driverUrl,
-            executorId,
-            executorHostname,
-            executorMemory,
-            executorCores)
-          new Thread(executorRunnable).start()
-        }
-      }
-      logDebug("""
-        Finished allocating %s containers (from %s originally).
-        Current number of executors running: %d,
-        releasedContainerList: %s,
-        pendingReleaseContainers: %s
-        """.format(
-          allocatedContainersToProcess,
-          allocatedContainers,
-          numExecutorsRunning.get(),
-          releasedContainerList,
-          pendingReleaseContainers))
-    }
-
-    val completedContainers = allocateResponse.getCompletedContainersStatuses()
-    if (completedContainers.size > 0) {
-      logDebug("Completed %d containers".format(completedContainers.size))
-
-      for (completedContainer <- completedContainers) {
-        val containerId = completedContainer.getContainerId
-
-        if (pendingReleaseContainers.containsKey(containerId)) {
-          // YarnAllocationHandler already marked the container for release, so remove it from
-          // `pendingReleaseContainers`.
-          pendingReleaseContainers.remove(containerId)
-        } else {
-          // Decrement the number of executors running. The next iteration of
-          // the ApplicationMaster's reporting thread will take care of allocating.
-          numExecutorsRunning.decrementAndGet()
-          logInfo("Completed container %s (state: %s, exit status: %s)".format(
-            containerId,
-            completedContainer.getState,
-            completedContainer.getExitStatus()))
-          // Hadoop 2.2.X added a ContainerExitStatus we should switch to use
-          // there are some exit status' we shouldn't necessarily count against us, but for
-          // now I think its ok as none of the containers are expected to exit
-          if (completedContainer.getExitStatus() != 0) {
-            logInfo("Container marked as failed: " + containerId)
-            numExecutorsFailed.incrementAndGet()
-          }
-        }
-
-        allocatedHostToContainersMap.synchronized {
-          if (allocatedContainerToHostMap.containsKey(containerId)) {
-            val hostOpt = allocatedContainerToHostMap.get(containerId)
-            assert(hostOpt.isDefined)
-            val host = hostOpt.get
-
-            val containerSetOpt = allocatedHostToContainersMap.get(host)
-            assert(containerSetOpt.isDefined)
-            val containerSet = containerSetOpt.get
-
-            containerSet.remove(containerId)
-            if (containerSet.isEmpty) {
-              allocatedHostToContainersMap.remove(host)
-            } else {
-              allocatedHostToContainersMap.update(host, containerSet)
-            }
-
-            allocatedContainerToHostMap.remove(containerId)
-
-            // TODO: Move this part outside the synchronized block?
-            val rack = YarnAllocationHandler.lookupRack(conf, host)
-            if (rack != null) {
-              val rackCount = allocatedRackCount.getOrElse(rack, 0) - 1
-              if (rackCount > 0) {
-                allocatedRackCount.put(rack, rackCount)
-              } else {
-                allocatedRackCount.remove(rack)
-              }
-            }
-          }
-        }
-      }
-      logDebug("""
-        Finished processing %d completed containers.
-        Current number of executors running: %d,
-        releasedContainerList: %s,
-        pendingReleaseContainers: %s
-        """.format(
-          completedContainers.size,
-          numExecutorsRunning.get(),
-          releasedContainerList,
-          pendingReleaseContainers))
-    }
+    new StableAllocateResponse(amClient.allocate(progressIndicator))
   }
 
-  def createRackResourceRequests(
+  private def createRackResourceRequests(
       hostContainers: ArrayBuffer[ContainerRequest]
     ): ArrayBuffer[ContainerRequest] = {
     // Generate modified racks and new set of hosts under it before issuing requests.
@@ -392,9 +65,9 @@ private[yarn] class YarnAllocationHandler(
 
     for (container <- hostContainers) {
       val candidateHost = container.getNodes.last
-      assert(YarnAllocationHandler.ANY_HOST != candidateHost)
+      assert(YarnSparkHadoopUtil.ANY_HOST != candidateHost)
 
-      val rack = YarnAllocationHandler.lookupRack(conf, candidateHost)
+      val rack = YarnSparkHadoopUtil.lookupRack(conf, candidateHost)
       if (rack != null) {
         var count = rackToCounts.getOrElse(rack, 0)
         count += 1
@@ -408,40 +81,26 @@ private[yarn] class YarnAllocationHandler(
         AllocationType.RACK,
         rack,
         count,
-        YarnAllocationHandler.PRIORITY)
+        YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
     }
 
     requestedContainers
   }
 
-  def allocatedContainersOnHost(host: String): Int = {
-    var retval = 0
-    allocatedHostToContainersMap.synchronized {
-      retval = allocatedHostToContainersMap.getOrElse(host, Set()).size
-    }
-    retval
-  }
-
-  def allocatedContainersOnRack(rack: String): Int = {
-    var retval = 0
-    allocatedHostToContainersMap.synchronized {
-      retval = allocatedRackCount.getOrElse(rack, 0)
-    }
-    retval
-  }
-
-  def addResourceRequests(numExecutors: Int) {
+  private def addResourceRequests(numExecutors: Int) {
     val containerRequests: List[ContainerRequest] =
-      if (numExecutors <= 0 || preferredHostToCount.isEmpty) {
-        logDebug("numExecutors: " + numExecutors + ", host preferences: " +
-          preferredHostToCount.isEmpty)
+      if (numExecutors <= 0) {
+        logDebug("numExecutors: " + numExecutors)
+        List()
+      } else if (preferredHostToCount.isEmpty) {
+        logDebug("host preferences is empty")
         createResourceRequests(
           AllocationType.ANY,
           resource = null,
           numExecutors,
-          YarnAllocationHandler.PRIORITY).toList
+          YarnSparkHadoopUtil.RM_REQUEST_PRIORITY).toList
       } else {
-        // Request for all hosts in preferred nodes and for numExecutors - 
+        // Request for all hosts in preferred nodes and for numExecutors -
         // candidates.size, request by default allocation policy.
         val hostContainerRequests = new ArrayBuffer[ContainerRequest](preferredHostToCount.size)
         for ((candidateHost, candidateCount) <- preferredHostToCount) {
@@ -452,7 +111,7 @@ private[yarn] class YarnAllocationHandler(
               AllocationType.HOST,
               candidateHost,
               requiredCount,
-              YarnAllocationHandler.PRIORITY)
+              YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
           }
         }
         val rackContainerRequests: List[ContainerRequest] = createRackResourceRequests(
@@ -462,7 +121,7 @@ private[yarn] class YarnAllocationHandler(
           AllocationType.ANY,
           resource = null,
           numExecutors,
-          YarnAllocationHandler.PRIORITY)
+          YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
 
         val containerRequestBuffer = new ArrayBuffer[ContainerRequest](
           hostContainerRequests.size + rackContainerRequests.size() + anyContainerRequests.size)
@@ -477,15 +136,6 @@ private[yarn] class YarnAllocationHandler(
       amClient.addContainerRequest(request)
     }
 
-    if (numExecutors > 0) {
-      numPendingAllocate.addAndGet(numExecutors)
-      logInfo("Will Allocate %d executor containers, each with %d memory".format(
-        numExecutors,
-        (executorMemory + memoryOverhead)))
-    } else {
-      logDebug("Empty allocation request ...")
-    }
-
     for (request <- containerRequests) {
       val nodes = request.getNodes
       var hostStr = if (nodes == null || nodes.isEmpty) {
@@ -511,7 +161,7 @@ private[yarn] class YarnAllocationHandler(
     // There must be a third request, which is ANY. That will be specially handled.
     requestType match {
       case AllocationType.HOST => {
-        assert(YarnAllocationHandler.ANY_HOST != resource)
+        assert(YarnSparkHadoopUtil.ANY_HOST != resource)
         val hostname = resource
         val nodeLocal = constructContainerRequests(
           Array(hostname),
@@ -520,7 +170,7 @@ private[yarn] class YarnAllocationHandler(
           priority)
 
         // Add `hostname` to the global (singleton) host->rack mapping in YarnAllocationHandler.
-        YarnAllocationHandler.populateRackInfo(conf, hostname)
+        YarnSparkHadoopUtil.populateRackInfo(conf, hostname)
         nodeLocal
       }
       case AllocationType.RACK => {
@@ -553,152 +203,11 @@ private[yarn] class YarnAllocationHandler(
     }
     requests
   }
-}
-
-object YarnAllocationHandler {
-
-  val ANY_HOST = "*"
-  // All requests are issued with same priority : we do not (yet) have any distinction between 
-  // request types (like map/reduce in hadoop for example)
-  val PRIORITY = 1
-
-  // Additional memory overhead - in mb.
-  val MEMORY_OVERHEAD = 384
-
-  // Host to rack map - saved from allocation requests. We are expecting this not to change.
-  // Note that it is possible for this to change : and ResurceManager will indicate that to us via
-  // update response to allocate. But we are punting on handling that for now.
-  private val hostToRack = new ConcurrentHashMap[String, String]()
-  private val rackToHostSet = new ConcurrentHashMap[String, JSet[String]]()
-
-
-  def newAllocator(
-      conf: Configuration,
-      amClient: AMRMClient[ContainerRequest],
-      appAttemptId: ApplicationAttemptId,
-      args: ApplicationMasterArguments,
-      sparkConf: SparkConf
-    ): YarnAllocationHandler = {
-    new YarnAllocationHandler(
-      conf,
-      amClient,
-      appAttemptId,
-      args.numExecutors, 
-      args.executorMemory,
-      args.executorCores,
-      Map[String, Int](),
-      Map[String, Int](),
-      sparkConf)
-  }
-
-  def newAllocator(
-      conf: Configuration,
-      amClient: AMRMClient[ContainerRequest],
-      appAttemptId: ApplicationAttemptId,
-      args: ApplicationMasterArguments,
-      map: collection.Map[String,
-      collection.Set[SplitInfo]],
-      sparkConf: SparkConf
-    ): YarnAllocationHandler = {
-    val (hostToSplitCount, rackToSplitCount) = generateNodeToWeight(conf, map)
-    new YarnAllocationHandler(
-      conf,
-      amClient,
-      appAttemptId,
-      args.numExecutors, 
-      args.executorMemory,
-      args.executorCores,
-      hostToSplitCount,
-      rackToSplitCount,
-      sparkConf)
-  }
-
-  def newAllocator(
-      conf: Configuration,
-      amClient: AMRMClient[ContainerRequest],
-      appAttemptId: ApplicationAttemptId,
-      maxExecutors: Int,
-      executorMemory: Int,
-      executorCores: Int,
-      map: collection.Map[String, collection.Set[SplitInfo]],
-      sparkConf: SparkConf
-    ): YarnAllocationHandler = {
-    val (hostToCount, rackToCount) = generateNodeToWeight(conf, map)
-    new YarnAllocationHandler(
-      conf,
-      amClient,
-      appAttemptId,
-      maxExecutors,
-      executorMemory,
-      executorCores,
-      hostToCount,
-      rackToCount,
-      sparkConf)
-  }
-
-  // A simple method to copy the split info map.
-  private def generateNodeToWeight(
-      conf: Configuration,
-      input: collection.Map[String, collection.Set[SplitInfo]]
-    ): (Map[String, Int], Map[String, Int]) = {
-
-    if (input == null) {
-      return (Map[String, Int](), Map[String, Int]())
-    }
-
-    val hostToCount = new HashMap[String, Int]
-    val rackToCount = new HashMap[String, Int]
-
-    for ((host, splits) <- input) {
-      val hostCount = hostToCount.getOrElse(host, 0)
-      hostToCount.put(host, hostCount + splits.size)
-
-      val rack = lookupRack(conf, host)
-      if (rack != null){
-        val rackCount = rackToCount.getOrElse(host, 0)
-        rackToCount.put(host, rackCount + splits.size)
-      }
-    }
-
-    (hostToCount.toMap, rackToCount.toMap)
-  }
 
-  def lookupRack(conf: Configuration, host: String): String = {
-    if (!hostToRack.contains(host)) {
-      populateRackInfo(conf, host)
-    }
-    hostToRack.get(host)
+  private class StableAllocateResponse(response: AllocateResponse) extends YarnAllocateResponse {
+    override def getAllocatedContainers() = response.getAllocatedContainers()
+    override def getAvailableResources() = response.getAvailableResources()
+    override def getCompletedContainersStatuses() = response.getCompletedContainersStatuses()
   }
 
-  def fetchCachedHostsForRack(rack: String): Option[Set[String]] = {
-    Option(rackToHostSet.get(rack)).map { set =>
-      val convertedSet: collection.mutable.Set[String] = set
-      // TODO: Better way to get a Set[String] from JSet.
-      convertedSet.toSet
-    }
-  }
-
-  def populateRackInfo(conf: Configuration, hostname: String) {
-    Utils.checkHost(hostname)
-
-    if (!hostToRack.containsKey(hostname)) {
-      // If there are repeated failures to resolve, all to an ignore list.
-      val rackInfo = RackResolver.resolve(conf, hostname)
-      if (rackInfo != null && rackInfo.getNetworkLocation != null) {
-        val rack = rackInfo.getNetworkLocation
-        hostToRack.put(hostname, rack)
-        if (! rackToHostSet.containsKey(rack)) {
-          rackToHostSet.putIfAbsent(rack,
-            Collections.newSetFromMap(new ConcurrentHashMap[String, JBoolean]()))
-        }
-        rackToHostSet.get(rack).add(hostname)
-
-        // TODO(harvey): Figure out what this comment means...
-        // Since RackResolver caches, we are disabling this for now ...
-      } /* else {
-        // right ? Else we will keep calling rack resolver in case we cant resolve rack info ...
-        hostToRack.put(hostname, null)
-      } */
-    }
-  }
 }
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
new file mode 100644
index 0000000000000..8d4b96ed79933
--- /dev/null
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.util.{List => JList}
+
+import scala.collection.{Map, Set}
+import scala.collection.JavaConversions._
+import scala.util._
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.yarn.api._
+import org.apache.hadoop.yarn.api.protocolrecords._
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.client.api.AMRMClient
+import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.hadoop.yarn.util.ConverterUtils
+import org.apache.hadoop.yarn.webapp.util.WebAppUtils
+
+import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.apache.spark.scheduler.SplitInfo
+import org.apache.spark.util.Utils
+
+
+/**
+ * YarnRMClient implementation for the Yarn stable API.
+ */
+private class YarnRMClientImpl(args: ApplicationMasterArguments) extends YarnRMClient with Logging {
+
+  private var amClient: AMRMClient[ContainerRequest] = _
+  private var uiHistoryAddress: String = _
+  private var registered: Boolean = false
+
+  override def register(
+      conf: YarnConfiguration,
+      sparkConf: SparkConf,
+      preferredNodeLocations: Map[String, Set[SplitInfo]],
+      uiAddress: String,
+      uiHistoryAddress: String,
+      securityMgr: SecurityManager) = {
+    amClient = AMRMClient.createAMRMClient()
+    amClient.init(conf)
+    amClient.start()
+    this.uiHistoryAddress = uiHistoryAddress
+
+    logInfo("Registering the ApplicationMaster")
+    synchronized {
+      amClient.registerApplicationMaster(Utils.localHostName(), 0, uiAddress)
+      registered = true
+    }
+    new YarnAllocationHandler(conf, sparkConf, amClient, getAttemptId(), args,
+      preferredNodeLocations, securityMgr)
+  }
+
+  override def unregister(status: FinalApplicationStatus, diagnostics: String = "") = synchronized {
+    if (registered) {
+      amClient.unregisterApplicationMaster(status, diagnostics, uiHistoryAddress)
+    }
+  }
+
+  override def getAttemptId() = {
+    val containerIdString = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name())
+    val containerId = ConverterUtils.toContainerId(containerIdString)
+    val appAttemptId = containerId.getApplicationAttemptId()
+    appAttemptId
+  }
+
+  override def getAmIpFilterParams(conf: YarnConfiguration, proxyBase: String) = {
+    // Figure out which scheme Yarn is using. Note the method seems to have been added after 2.2,
+    // so not all stable releases have it.
+    val prefix = Try(classOf[WebAppUtils].getMethod("getHttpSchemePrefix", classOf[Configuration])
+        .invoke(null, conf).asInstanceOf[String]).getOrElse("http://")
+
+    // If running a new enough Yarn, use the HA-aware API for retrieving the RM addresses.
+    try {
+      val method = classOf[WebAppUtils].getMethod("getProxyHostsAndPortsForAmFilter",
+        classOf[Configuration])
+      val proxies = method.invoke(null, conf).asInstanceOf[JList[String]]
+      val hosts = proxies.map { proxy => proxy.split(":")(0) }
+      val uriBases = proxies.map { proxy => prefix + proxy + proxyBase }
+      Map("PROXY_HOSTS" -> hosts.mkString(","), "PROXY_URI_BASES" -> uriBases.mkString(","))
+    } catch {
+      case e: NoSuchMethodException =>
+        val proxy = WebAppUtils.getProxyHostAndPort(conf)
+        val parts = proxy.split(":")
+        val uriBase = prefix + proxy + proxyBase
+        Map("PROXY_HOST" -> parts(0), "PROXY_URI_BASE" -> uriBase)
+    }
+  }
+
+  override def getMaxRegAttempts(conf: YarnConfiguration) =
+    conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)
+
+}
diff --git a/yarn/stable/src/test/resources/log4j.properties b/yarn/stable/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..9dd05f17f012b
--- /dev/null
+++ b/yarn/stable/src/test/resources/log4j.properties
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file core/target/unit-tests.log
+log4j.rootCategory=INFO, file
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+org.eclipse.jetty.LEVEL=WARN
diff --git a/yarn/stable/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/stable/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
new file mode 100644
index 0000000000000..d79b85e867fcd
--- /dev/null
+++ b/yarn/stable/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.io.File
+import java.util.concurrent.TimeUnit
+
+import scala.collection.JavaConversions._
+
+import com.google.common.base.Charsets
+import com.google.common.io.Files
+import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers}
+
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.hadoop.yarn.server.MiniYARNCluster
+
+import org.apache.spark.{Logging, SparkConf, SparkContext, SparkException}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.util.Utils
+
+class YarnClusterSuite extends FunSuite with BeforeAndAfterAll with Matchers with Logging {
+
+  // log4j configuration for the Yarn containers, so that their output is collected
+  // by Yarn instead of trying to overwrite unit-tests.log.
+  private val LOG4J_CONF = """
+    |log4j.rootCategory=DEBUG, console
+    |log4j.appender.console=org.apache.log4j.ConsoleAppender
+    |log4j.appender.console.target=System.err
+    |log4j.appender.console.layout=org.apache.log4j.PatternLayout
+    |log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+    """.stripMargin
+
+  private var yarnCluster: MiniYARNCluster = _
+  private var tempDir: File = _
+  private var fakeSparkJar: File = _
+  private var oldConf: Map[String, String] = _
+
+  override def beforeAll() {
+    tempDir = Utils.createTempDir()
+
+    val logConfDir = new File(tempDir, "log4j")
+    logConfDir.mkdir()
+
+    val logConfFile = new File(logConfDir, "log4j.properties")
+    Files.write(LOG4J_CONF, logConfFile, Charsets.UTF_8)
+
+    val childClasspath = logConfDir.getAbsolutePath() + File.pathSeparator +
+      sys.props("java.class.path")
+
+    oldConf = sys.props.filter { case (k, v) => k.startsWith("spark.") }.toMap
+
+    yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1)
+    yarnCluster.init(new YarnConfiguration())
+    yarnCluster.start()
+
+    // There's a race in MiniYARNCluster in which start() may return before the RM has updated
+    // its address in the configuration. You can see this in the logs by noticing that when
+    // MiniYARNCluster prints the address, it still has port "0" assigned, although later the
+    // test works sometimes:
+    //
+    //    INFO MiniYARNCluster: MiniYARN ResourceManager address: blah:0
+    //
+    // That log message prints the contents of the RM_ADDRESS config variable. If you check it
+    // later on, it looks something like this:
+    //
+    //    INFO YarnClusterSuite: RM address in configuration is blah:42631
+    //
+    // This hack loops for a bit waiting for the port to change, and fails the test if it hasn't
+    // done so in a timely manner (defined to be 10 seconds).
+    val config = yarnCluster.getConfig()
+    val deadline = System.currentTimeMillis() + TimeUnit.SECONDS.toMillis(10)
+    while (config.get(YarnConfiguration.RM_ADDRESS).split(":")(1) == "0") {
+      if (System.currentTimeMillis() > deadline) {
+        throw new IllegalStateException("Timed out waiting for RM to come up.")
+      }
+      logDebug("RM address still not set in configuration, waiting...")
+      TimeUnit.MILLISECONDS.sleep(100)
+    }
+
+    logInfo(s"RM address in configuration is ${config.get(YarnConfiguration.RM_ADDRESS)}")
+    config.foreach { e =>
+      sys.props += ("spark.hadoop." + e.getKey() -> e.getValue())
+    }
+
+    fakeSparkJar = File.createTempFile("sparkJar", null, tempDir)
+    sys.props += ("spark.yarn.jar" -> ("local:" + fakeSparkJar.getAbsolutePath()))
+    sys.props += ("spark.executor.instances" -> "1")
+    sys.props += ("spark.driver.extraClassPath" -> childClasspath)
+    sys.props += ("spark.executor.extraClassPath" -> childClasspath)
+
+    super.beforeAll()
+  }
+
+  override def afterAll() {
+    yarnCluster.stop()
+    sys.props.retain { case (k, v) => !k.startsWith("spark.") }
+    sys.props ++= oldConf
+    super.afterAll()
+  }
+
+  test("run Spark in yarn-client mode") {
+    var result = File.createTempFile("result", null, tempDir)
+    YarnClusterDriver.main(Array("yarn-client", result.getAbsolutePath()))
+    checkResult(result)
+  }
+
+  test("run Spark in yarn-cluster mode") {
+    val main = YarnClusterDriver.getClass.getName().stripSuffix("$")
+    var result = File.createTempFile("result", null, tempDir)
+
+    val args = Array("--class", main,
+      "--jar", "file:" + fakeSparkJar.getAbsolutePath(),
+      "--arg", "yarn-cluster",
+      "--arg", result.getAbsolutePath(),
+      "--num-executors", "1")
+    Client.main(args)
+    checkResult(result)
+  }
+
+  test("run Spark in yarn-cluster mode unsuccessfully") {
+    val main = YarnClusterDriver.getClass.getName().stripSuffix("$")
+
+    // Use only one argument so the driver will fail
+    val args = Array("--class", main,
+      "--jar", "file:" + fakeSparkJar.getAbsolutePath(),
+      "--arg", "yarn-cluster",
+      "--num-executors", "1")
+    val exception = intercept[SparkException] {
+      Client.main(args)
+    }
+    assert(Utils.exceptionString(exception).contains("Application finished with failed status"))
+  }
+
+  /**
+   * This is a workaround for an issue with yarn-cluster mode: the Client class will not provide
+   * any sort of error when the job process finishes successfully, but the job itself fails. So
+   * the tests enforce that something is written to a file after everything is ok to indicate
+   * that the job succeeded.
+   */
+  private def checkResult(result: File) = {
+    var resultString = Files.toString(result, Charsets.UTF_8)
+    resultString should be ("success")
+  }
+
+}
+
+private object YarnClusterDriver extends Logging with Matchers {
+
+  def main(args: Array[String]) = {
+    if (args.length != 2) {
+      System.err.println(
+        s"""
+        |Invalid command line: ${args.mkString(" ")}
+        |
+        |Usage: YarnClusterDriver [master] [result file]
+        """.stripMargin)
+      System.exit(1)
+    }
+
+    val sc = new SparkContext(new SparkConf().setMaster(args(0))
+      .setAppName("yarn \"test app\" 'with quotes' and \\back\\slashes and $dollarSigns"))
+    val status = new File(args(1))
+    var result = "failure"
+    try {
+      val data = sc.parallelize(1 to 4, 4).collect().toSet
+      data should be (Set(1, 2, 3, 4))
+      result = "success"
+    } finally {
+      sc.stop()
+      Files.write(result, status, Charsets.UTF_8)
+    }
+  }
+
+}